Model: jackf857/qwen3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.45-s_star-0.4 Source: Original Platform
12705 lines
465 KiB
JSON
12705 lines
465 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.999244142101285,
|
|
"eval_steps": 100,
|
|
"global_step": 661,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0015117157974300832,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.002980858087539673,
|
|
"fcm_dpo/q_t": 0.5000747442245483,
|
|
"grad_norm": 17.89813995361328,
|
|
"learning_rate": 0.0,
|
|
"logits/chosen": 1.702779769897461,
|
|
"logits/rejected": 1.6965749263763428,
|
|
"logps/chosen": -80.20932006835938,
|
|
"logps/ref_chosen": -80.27740478515625,
|
|
"logps/ref_rejected": -83.5943374633789,
|
|
"logps/rejected": -83.52326965332031,
|
|
"loss": 1.387,
|
|
"margin_dpo/margin_mean": -0.0029816031455993652,
|
|
"margin_dpo/margin_std": 0.3835117816925049,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.0030234315948601664,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.029325395822525024,
|
|
"fcm_dpo/q_t": 0.4992692470550537,
|
|
"grad_norm": 21.475933074951172,
|
|
"learning_rate": 7.462686567164179e-09,
|
|
"logits/chosen": 1.7006168365478516,
|
|
"logits/rejected": 1.6698178052902222,
|
|
"logps/chosen": -74.51097869873047,
|
|
"logps/ref_chosen": -74.56095886230469,
|
|
"logps/ref_rejected": -83.53636169433594,
|
|
"logps/rejected": -83.51570892333984,
|
|
"loss": 1.3839,
|
|
"margin_dpo/margin_mean": 0.029325813055038452,
|
|
"margin_dpo/margin_std": 0.4646317958831787,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.0045351473922902496,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.02612045407295227,
|
|
"fcm_dpo/q_t": 0.5006523132324219,
|
|
"grad_norm": 19.997602462768555,
|
|
"learning_rate": 1.4925373134328357e-08,
|
|
"logits/chosen": 1.626187801361084,
|
|
"logits/rejected": 1.5350779294967651,
|
|
"logps/chosen": -82.14555358886719,
|
|
"logps/ref_chosen": -82.1510009765625,
|
|
"logps/ref_rejected": -109.82986450195312,
|
|
"logps/rejected": -109.79829406738281,
|
|
"loss": 1.3894,
|
|
"margin_dpo/margin_mean": -0.026119887828826904,
|
|
"margin_dpo/margin_std": 0.42776572704315186,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.006046863189720333,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.09968307614326477,
|
|
"fcm_dpo/q_t": 0.5024900436401367,
|
|
"grad_norm": 19.80933952331543,
|
|
"learning_rate": 2.2388059701492534e-08,
|
|
"logits/chosen": 1.7662708759307861,
|
|
"logits/rejected": 1.754350185394287,
|
|
"logps/chosen": -92.36078643798828,
|
|
"logps/ref_chosen": -92.37549591064453,
|
|
"logps/ref_rejected": -99.59553527832031,
|
|
"logps/rejected": -99.48114776611328,
|
|
"loss": 1.3967,
|
|
"margin_dpo/margin_mean": -0.09968316555023193,
|
|
"margin_dpo/margin_std": 0.4198913276195526,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.007558578987150416,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.057308733463287354,
|
|
"fcm_dpo/q_t": 0.501429557800293,
|
|
"grad_norm": 19.01952362060547,
|
|
"learning_rate": 2.9850746268656714e-08,
|
|
"logits/chosen": 1.547282338142395,
|
|
"logits/rejected": 1.4952952861785889,
|
|
"logps/chosen": -78.90206909179688,
|
|
"logps/ref_chosen": -78.84872436523438,
|
|
"logps/ref_rejected": -97.88040161132812,
|
|
"logps/rejected": -97.87643432617188,
|
|
"loss": 1.3927,
|
|
"margin_dpo/margin_mean": -0.057308852672576904,
|
|
"margin_dpo/margin_std": 0.4990013837814331,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.009070294784580499,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.06428655982017517,
|
|
"fcm_dpo/q_t": 0.49839404225349426,
|
|
"grad_norm": 18.07597541809082,
|
|
"learning_rate": 3.731343283582089e-08,
|
|
"logits/chosen": 1.5872879028320312,
|
|
"logits/rejected": 1.4798492193222046,
|
|
"logps/chosen": -68.31825256347656,
|
|
"logps/ref_chosen": -68.34607696533203,
|
|
"logps/ref_rejected": -99.24614715576172,
|
|
"logps/rejected": -99.28260803222656,
|
|
"loss": 1.3803,
|
|
"margin_dpo/margin_mean": 0.06428647041320801,
|
|
"margin_dpo/margin_std": 0.4039618670940399,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.010582010582010581,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.08962023258209229,
|
|
"fcm_dpo/q_t": 0.5022399425506592,
|
|
"grad_norm": 17.446853637695312,
|
|
"learning_rate": 4.477611940298507e-08,
|
|
"logits/chosen": 1.458909273147583,
|
|
"logits/rejected": 1.3968511819839478,
|
|
"logps/chosen": -69.16606903076172,
|
|
"logps/ref_chosen": -69.11282348632812,
|
|
"logps/ref_rejected": -84.01641845703125,
|
|
"logps/rejected": -83.98003387451172,
|
|
"loss": 1.3958,
|
|
"margin_dpo/margin_mean": -0.08962073922157288,
|
|
"margin_dpo/margin_std": 0.4627250134944916,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.012093726379440665,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.03348463773727417,
|
|
"fcm_dpo/q_t": 0.5008403062820435,
|
|
"grad_norm": 18.309078216552734,
|
|
"learning_rate": 5.223880597014925e-08,
|
|
"logits/chosen": 1.6482627391815186,
|
|
"logits/rejected": 1.6344785690307617,
|
|
"logps/chosen": -78.40745544433594,
|
|
"logps/ref_chosen": -78.3912353515625,
|
|
"logps/ref_rejected": -91.06254577636719,
|
|
"logps/rejected": -91.04528045654297,
|
|
"loss": 1.3902,
|
|
"margin_dpo/margin_mean": -0.03348389267921448,
|
|
"margin_dpo/margin_std": 0.4609626531600952,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.013605442176870748,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.06733879446983337,
|
|
"fcm_dpo/q_t": 0.4983159303665161,
|
|
"grad_norm": 19.262584686279297,
|
|
"learning_rate": 5.970149253731343e-08,
|
|
"logits/chosen": 1.9055404663085938,
|
|
"logits/rejected": 1.7056143283843994,
|
|
"logps/chosen": -69.64701843261719,
|
|
"logps/ref_chosen": -69.67422485351562,
|
|
"logps/ref_rejected": -105.00473022460938,
|
|
"logps/rejected": -105.04486846923828,
|
|
"loss": 1.3801,
|
|
"margin_dpo/margin_mean": 0.06733927130699158,
|
|
"margin_dpo/margin_std": 0.44255518913269043,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.015117157974300832,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.12282580137252808,
|
|
"fcm_dpo/q_t": 0.496931791305542,
|
|
"grad_norm": 18.874061584472656,
|
|
"learning_rate": 6.71641791044776e-08,
|
|
"logits/chosen": 1.5117344856262207,
|
|
"logits/rejected": 1.4378249645233154,
|
|
"logps/chosen": -79.62625122070312,
|
|
"logps/ref_chosen": -79.730712890625,
|
|
"logps/ref_rejected": -105.50645446777344,
|
|
"logps/rejected": -105.52481079101562,
|
|
"loss": 1.3745,
|
|
"margin_dpo/margin_mean": 0.12282583117485046,
|
|
"margin_dpo/margin_std": 0.4421079754829407,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.016628873771730914,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.0041959285736083984,
|
|
"fcm_dpo/q_t": 0.49989521503448486,
|
|
"grad_norm": 17.292160034179688,
|
|
"learning_rate": 7.462686567164178e-08,
|
|
"logits/chosen": 1.5478227138519287,
|
|
"logits/rejected": 1.5047085285186768,
|
|
"logps/chosen": -85.41871643066406,
|
|
"logps/ref_chosen": -85.41248321533203,
|
|
"logps/ref_rejected": -86.50241088867188,
|
|
"logps/rejected": -86.51283264160156,
|
|
"loss": 1.3863,
|
|
"margin_dpo/margin_mean": 0.004195868968963623,
|
|
"margin_dpo/margin_std": 0.41407087445259094,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.018140589569160998,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.005808025598526001,
|
|
"fcm_dpo/q_t": 0.500144898891449,
|
|
"grad_norm": 17.388395309448242,
|
|
"learning_rate": 8.208955223880596e-08,
|
|
"logits/chosen": 1.7034389972686768,
|
|
"logits/rejected": 1.6622624397277832,
|
|
"logps/chosen": -81.38066101074219,
|
|
"logps/ref_chosen": -81.38086700439453,
|
|
"logps/ref_rejected": -89.88151550292969,
|
|
"logps/rejected": -89.87550354003906,
|
|
"loss": 1.3872,
|
|
"margin_dpo/margin_mean": -0.0058082640171051025,
|
|
"margin_dpo/margin_std": 0.35740458965301514,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.019652305366591082,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.09430631995201111,
|
|
"fcm_dpo/q_t": 0.4976438283920288,
|
|
"grad_norm": 17.77792739868164,
|
|
"learning_rate": 8.955223880597014e-08,
|
|
"logits/chosen": 1.5007734298706055,
|
|
"logits/rejected": 1.3290517330169678,
|
|
"logps/chosen": -63.13867950439453,
|
|
"logps/ref_chosen": -63.17030715942383,
|
|
"logps/ref_rejected": -105.61166381835938,
|
|
"logps/rejected": -105.67434692382812,
|
|
"loss": 1.3772,
|
|
"margin_dpo/margin_mean": 0.09430572390556335,
|
|
"margin_dpo/margin_std": 0.35614386200904846,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.021164021164021163,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.09425640106201172,
|
|
"fcm_dpo/q_t": 0.5023550987243652,
|
|
"grad_norm": 19.559247970581055,
|
|
"learning_rate": 9.701492537313432e-08,
|
|
"logits/chosen": 1.846583604812622,
|
|
"logits/rejected": 1.8075042963027954,
|
|
"logps/chosen": -80.72798156738281,
|
|
"logps/ref_chosen": -80.71014404296875,
|
|
"logps/ref_rejected": -89.86041259765625,
|
|
"logps/rejected": -89.78399658203125,
|
|
"loss": 1.396,
|
|
"margin_dpo/margin_mean": -0.0942547619342804,
|
|
"margin_dpo/margin_std": 0.3276791572570801,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.022675736961451247,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.02120572328567505,
|
|
"fcm_dpo/q_t": 0.5005297660827637,
|
|
"grad_norm": 20.666595458984375,
|
|
"learning_rate": 1.044776119402985e-07,
|
|
"logits/chosen": 1.7599084377288818,
|
|
"logits/rejected": 1.6542625427246094,
|
|
"logps/chosen": -82.008056640625,
|
|
"logps/ref_chosen": -82.00294494628906,
|
|
"logps/ref_rejected": -106.43550109863281,
|
|
"logps/rejected": -106.41940307617188,
|
|
"loss": 1.3889,
|
|
"margin_dpo/margin_mean": -0.0212060809135437,
|
|
"margin_dpo/margin_std": 0.42040663957595825,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.02418745275888133,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.055621325969696045,
|
|
"fcm_dpo/q_t": 0.5013903379440308,
|
|
"grad_norm": 17.204975128173828,
|
|
"learning_rate": 1.1194029850746268e-07,
|
|
"logits/chosen": 1.84678316116333,
|
|
"logits/rejected": 1.722154140472412,
|
|
"logps/chosen": -62.32442092895508,
|
|
"logps/ref_chosen": -62.308345794677734,
|
|
"logps/ref_rejected": -89.6508560180664,
|
|
"logps/rejected": -89.61131286621094,
|
|
"loss": 1.3922,
|
|
"margin_dpo/margin_mean": -0.05562084913253784,
|
|
"margin_dpo/margin_std": 0.3397616744041443,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.025699168556311415,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.05070582032203674,
|
|
"fcm_dpo/q_t": 0.501266360282898,
|
|
"grad_norm": 18.484811782836914,
|
|
"learning_rate": 1.1940298507462686e-07,
|
|
"logits/chosen": 1.670881748199463,
|
|
"logits/rejected": 1.6353247165679932,
|
|
"logps/chosen": -85.19845581054688,
|
|
"logps/ref_chosen": -85.16903686523438,
|
|
"logps/ref_rejected": -102.57087707519531,
|
|
"logps/rejected": -102.54957580566406,
|
|
"loss": 1.3918,
|
|
"margin_dpo/margin_mean": -0.050705909729003906,
|
|
"margin_dpo/margin_std": 0.3820461928844452,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.027210884353741496,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.10842156410217285,
|
|
"fcm_dpo/q_t": 0.4972917437553406,
|
|
"grad_norm": 17.016756057739258,
|
|
"learning_rate": 1.2686567164179106e-07,
|
|
"logits/chosen": 1.951217770576477,
|
|
"logits/rejected": 1.7988412380218506,
|
|
"logps/chosen": -63.13896560668945,
|
|
"logps/ref_chosen": -63.17793273925781,
|
|
"logps/ref_rejected": -86.06461334228516,
|
|
"logps/rejected": -86.13406372070312,
|
|
"loss": 1.3759,
|
|
"margin_dpo/margin_mean": 0.10842183232307434,
|
|
"margin_dpo/margin_std": 0.40306586027145386,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.02872260015117158,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.02614566683769226,
|
|
"fcm_dpo/q_t": 0.500652551651001,
|
|
"grad_norm": 19.710174560546875,
|
|
"learning_rate": 1.343283582089552e-07,
|
|
"logits/chosen": 1.5407326221466064,
|
|
"logits/rejected": 1.539353609085083,
|
|
"logps/chosen": -85.8155746459961,
|
|
"logps/ref_chosen": -85.82405853271484,
|
|
"logps/ref_rejected": -100.07136535644531,
|
|
"logps/rejected": -100.0367431640625,
|
|
"loss": 1.3893,
|
|
"margin_dpo/margin_mean": -0.026145905256271362,
|
|
"margin_dpo/margin_std": 0.36016714572906494,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.030234315948601664,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.03781422972679138,
|
|
"fcm_dpo/q_t": 0.5009453296661377,
|
|
"grad_norm": 18.178298950195312,
|
|
"learning_rate": 1.4179104477611938e-07,
|
|
"logits/chosen": 2.0051045417785645,
|
|
"logits/rejected": 1.9216687679290771,
|
|
"logps/chosen": -73.64117431640625,
|
|
"logps/ref_chosen": -73.58621215820312,
|
|
"logps/ref_rejected": -91.21690368652344,
|
|
"logps/rejected": -91.23403930664062,
|
|
"loss": 1.3905,
|
|
"margin_dpo/margin_mean": -0.03781440854072571,
|
|
"margin_dpo/margin_std": 0.39522528648376465,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.031746031746031744,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.07584941387176514,
|
|
"fcm_dpo/q_t": 0.49810492992401123,
|
|
"grad_norm": 18.019695281982422,
|
|
"learning_rate": 1.4925373134328355e-07,
|
|
"logits/chosen": 1.7651563882827759,
|
|
"logits/rejected": 1.6562526226043701,
|
|
"logps/chosen": -81.92829895019531,
|
|
"logps/ref_chosen": -81.97251892089844,
|
|
"logps/ref_rejected": -98.05976867675781,
|
|
"logps/rejected": -98.09140014648438,
|
|
"loss": 1.3791,
|
|
"margin_dpo/margin_mean": 0.07584935426712036,
|
|
"margin_dpo/margin_std": 0.3969747722148895,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.03325774754346183,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.05920052528381348,
|
|
"fcm_dpo/q_t": 0.5014793872833252,
|
|
"grad_norm": 18.35191535949707,
|
|
"learning_rate": 1.5671641791044775e-07,
|
|
"logits/chosen": 1.5420267581939697,
|
|
"logits/rejected": 1.507912278175354,
|
|
"logps/chosen": -76.98601531982422,
|
|
"logps/ref_chosen": -76.99579620361328,
|
|
"logps/ref_rejected": -95.76089477539062,
|
|
"logps/rejected": -95.6919174194336,
|
|
"loss": 1.3926,
|
|
"margin_dpo/margin_mean": -0.0592007040977478,
|
|
"margin_dpo/margin_std": 0.40154093503952026,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.03476946334089191,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.03171822428703308,
|
|
"fcm_dpo/q_t": 0.49920740723609924,
|
|
"grad_norm": 19.181137084960938,
|
|
"learning_rate": 1.6417910447761193e-07,
|
|
"logits/chosen": 1.9651353359222412,
|
|
"logits/rejected": 1.8704643249511719,
|
|
"logps/chosen": -84.74580383300781,
|
|
"logps/ref_chosen": -84.76856994628906,
|
|
"logps/ref_rejected": -107.28266906738281,
|
|
"logps/rejected": -107.29161834716797,
|
|
"loss": 1.3835,
|
|
"margin_dpo/margin_mean": 0.03171861171722412,
|
|
"margin_dpo/margin_std": 0.3716067969799042,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.036281179138321996,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.03071492910385132,
|
|
"fcm_dpo/q_t": 0.49923175573349,
|
|
"grad_norm": 17.107723236083984,
|
|
"learning_rate": 1.716417910447761e-07,
|
|
"logits/chosen": 1.72803795337677,
|
|
"logits/rejected": 1.6687504053115845,
|
|
"logps/chosen": -69.82955932617188,
|
|
"logps/ref_chosen": -69.87112426757812,
|
|
"logps/ref_rejected": -84.02084350585938,
|
|
"logps/rejected": -84.00999450683594,
|
|
"loss": 1.3836,
|
|
"margin_dpo/margin_mean": 0.030714750289916992,
|
|
"margin_dpo/margin_std": 0.3912420868873596,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.03779289493575208,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.02832847833633423,
|
|
"fcm_dpo/q_t": 0.49929308891296387,
|
|
"grad_norm": 19.49996566772461,
|
|
"learning_rate": 1.7910447761194027e-07,
|
|
"logits/chosen": 1.7510415315628052,
|
|
"logits/rejected": 1.6113269329071045,
|
|
"logps/chosen": -78.23873901367188,
|
|
"logps/ref_chosen": -78.22694396972656,
|
|
"logps/ref_rejected": -106.65234375,
|
|
"logps/rejected": -106.69246673583984,
|
|
"loss": 1.384,
|
|
"margin_dpo/margin_mean": 0.028328508138656616,
|
|
"margin_dpo/margin_std": 0.45287278294563293,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.039304610733182165,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.09531095623970032,
|
|
"fcm_dpo/q_t": 0.49761906266212463,
|
|
"grad_norm": 17.824203491210938,
|
|
"learning_rate": 1.8656716417910447e-07,
|
|
"logits/chosen": 1.8126094341278076,
|
|
"logits/rejected": 1.7892074584960938,
|
|
"logps/chosen": -74.5651626586914,
|
|
"logps/ref_chosen": -74.59750366210938,
|
|
"logps/ref_rejected": -93.57858276367188,
|
|
"logps/rejected": -93.64155578613281,
|
|
"loss": 1.3772,
|
|
"margin_dpo/margin_mean": 0.09531095623970032,
|
|
"margin_dpo/margin_std": 0.4165651202201843,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.04081632653061224,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.008702605962753296,
|
|
"fcm_dpo/q_t": 0.4997824430465698,
|
|
"grad_norm": 18.663766860961914,
|
|
"learning_rate": 1.9402985074626865e-07,
|
|
"logits/chosen": 1.5955054759979248,
|
|
"logits/rejected": 1.5449596643447876,
|
|
"logps/chosen": -78.6395263671875,
|
|
"logps/ref_chosen": -78.64625549316406,
|
|
"logps/ref_rejected": -92.33645629882812,
|
|
"logps/rejected": -92.33843994140625,
|
|
"loss": 1.3857,
|
|
"margin_dpo/margin_mean": 0.00870358943939209,
|
|
"margin_dpo/margin_std": 0.33294573426246643,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.042328042328042326,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.017391502857208252,
|
|
"fcm_dpo/q_t": 0.49956539273262024,
|
|
"grad_norm": 17.787866592407227,
|
|
"learning_rate": 2.0149253731343282e-07,
|
|
"logits/chosen": 1.8521151542663574,
|
|
"logits/rejected": 1.7968239784240723,
|
|
"logps/chosen": -76.90467071533203,
|
|
"logps/ref_chosen": -76.91271209716797,
|
|
"logps/ref_rejected": -88.48194885253906,
|
|
"logps/rejected": -88.49130249023438,
|
|
"loss": 1.3849,
|
|
"margin_dpo/margin_mean": 0.017391175031661987,
|
|
"margin_dpo/margin_std": 0.35291895270347595,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.04383975812547241,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.04088890552520752,
|
|
"fcm_dpo/q_t": 0.4989783763885498,
|
|
"grad_norm": 21.243146896362305,
|
|
"learning_rate": 2.08955223880597e-07,
|
|
"logits/chosen": 1.9115407466888428,
|
|
"logits/rejected": 1.848691463470459,
|
|
"logps/chosen": -89.5405502319336,
|
|
"logps/ref_chosen": -89.62060546875,
|
|
"logps/ref_rejected": -100.57090759277344,
|
|
"logps/rejected": -100.53173828125,
|
|
"loss": 1.3828,
|
|
"margin_dpo/margin_mean": 0.040889084339141846,
|
|
"margin_dpo/margin_std": 0.48537182807922363,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.045351473922902494,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.06612420082092285,
|
|
"fcm_dpo/q_t": 0.4983447790145874,
|
|
"grad_norm": 19.041486740112305,
|
|
"learning_rate": 2.1641791044776117e-07,
|
|
"logits/chosen": 1.7888214588165283,
|
|
"logits/rejected": 1.6230595111846924,
|
|
"logps/chosen": -68.77651977539062,
|
|
"logps/ref_chosen": -68.82381439208984,
|
|
"logps/ref_rejected": -104.7047119140625,
|
|
"logps/rejected": -104.72354125976562,
|
|
"loss": 1.3803,
|
|
"margin_dpo/margin_mean": 0.06612381339073181,
|
|
"margin_dpo/margin_std": 0.4717687964439392,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.04686318972033258,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.0026360750198364258,
|
|
"fcm_dpo/q_t": 0.4999334514141083,
|
|
"grad_norm": 20.6657772064209,
|
|
"learning_rate": 2.2388059701492537e-07,
|
|
"logits/chosen": 1.8177580833435059,
|
|
"logits/rejected": 1.69121515750885,
|
|
"logps/chosen": -86.07583618164062,
|
|
"logps/ref_chosen": -86.06916809082031,
|
|
"logps/ref_rejected": -116.66394805908203,
|
|
"logps/rejected": -116.67324829101562,
|
|
"loss": 1.3865,
|
|
"margin_dpo/margin_mean": 0.0026363134384155273,
|
|
"margin_dpo/margin_std": 0.4249485731124878,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.04837490551776266,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.011246144771575928,
|
|
"fcm_dpo/q_t": 0.5002791285514832,
|
|
"grad_norm": 18.716094970703125,
|
|
"learning_rate": 2.3134328358208954e-07,
|
|
"logits/chosen": 1.7052407264709473,
|
|
"logits/rejected": 1.7523813247680664,
|
|
"logps/chosen": -87.57007598876953,
|
|
"logps/ref_chosen": -87.59808349609375,
|
|
"logps/ref_rejected": -100.26905822753906,
|
|
"logps/rejected": -100.22979736328125,
|
|
"loss": 1.3879,
|
|
"margin_dpo/margin_mean": -0.011246174573898315,
|
|
"margin_dpo/margin_std": 0.4132556915283203,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.049886621315192746,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.009764552116394043,
|
|
"fcm_dpo/q_t": 0.4997554421424866,
|
|
"grad_norm": 19.76194190979004,
|
|
"learning_rate": 2.388059701492537e-07,
|
|
"logits/chosen": 1.4848226308822632,
|
|
"logits/rejected": 1.3876771926879883,
|
|
"logps/chosen": -83.2780990600586,
|
|
"logps/ref_chosen": -83.29850769042969,
|
|
"logps/ref_rejected": -94.60990142822266,
|
|
"logps/rejected": -94.59925842285156,
|
|
"loss": 1.3857,
|
|
"margin_dpo/margin_mean": 0.009763926267623901,
|
|
"margin_dpo/margin_std": 0.39006873965263367,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.05139833711262283,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.010292023420333862,
|
|
"fcm_dpo/q_t": 0.4997427761554718,
|
|
"grad_norm": 17.98219108581543,
|
|
"learning_rate": 2.4626865671641786e-07,
|
|
"logits/chosen": 1.826859712600708,
|
|
"logits/rejected": 1.7421438694000244,
|
|
"logps/chosen": -70.12477111816406,
|
|
"logps/ref_chosen": -70.15069580078125,
|
|
"logps/ref_rejected": -84.4693832397461,
|
|
"logps/rejected": -84.45375061035156,
|
|
"loss": 1.3857,
|
|
"margin_dpo/margin_mean": 0.010291993618011475,
|
|
"margin_dpo/margin_std": 0.4043731391429901,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.05291005291005291,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.08489355444908142,
|
|
"fcm_dpo/q_t": 0.4978798031806946,
|
|
"grad_norm": 18.01149559020996,
|
|
"learning_rate": 2.537313432835821e-07,
|
|
"logits/chosen": 1.5768963098526,
|
|
"logits/rejected": 1.5215035676956177,
|
|
"logps/chosen": -78.22184753417969,
|
|
"logps/ref_chosen": -78.25238037109375,
|
|
"logps/ref_rejected": -91.06356811523438,
|
|
"logps/rejected": -91.1179428100586,
|
|
"loss": 1.3782,
|
|
"margin_dpo/margin_mean": 0.08489382266998291,
|
|
"margin_dpo/margin_std": 0.3789323568344116,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.05442176870748299,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.05526149272918701,
|
|
"fcm_dpo/q_t": 0.5013813972473145,
|
|
"grad_norm": 18.324071884155273,
|
|
"learning_rate": 2.611940298507462e-07,
|
|
"logits/chosen": 2.020540237426758,
|
|
"logits/rejected": 1.886946678161621,
|
|
"logps/chosen": -67.08876037597656,
|
|
"logps/ref_chosen": -67.06676483154297,
|
|
"logps/ref_rejected": -99.34661865234375,
|
|
"logps/rejected": -99.31333923339844,
|
|
"loss": 1.3922,
|
|
"margin_dpo/margin_mean": -0.05526161193847656,
|
|
"margin_dpo/margin_std": 0.39409780502319336,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.055933484504913075,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.05224241316318512,
|
|
"fcm_dpo/q_t": 0.5013055205345154,
|
|
"grad_norm": 23.652420043945312,
|
|
"learning_rate": 2.686567164179104e-07,
|
|
"logits/chosen": 1.912178635597229,
|
|
"logits/rejected": 1.6196130514144897,
|
|
"logps/chosen": -75.90277099609375,
|
|
"logps/ref_chosen": -75.9269790649414,
|
|
"logps/ref_rejected": -130.34371948242188,
|
|
"logps/rejected": -130.26727294921875,
|
|
"loss": 1.3919,
|
|
"margin_dpo/margin_mean": -0.05224302411079407,
|
|
"margin_dpo/margin_std": 0.3591179847717285,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.05744520030234316,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.0010804533958435059,
|
|
"fcm_dpo/q_t": 0.4999734163284302,
|
|
"grad_norm": 18.430307388305664,
|
|
"learning_rate": 2.761194029850746e-07,
|
|
"logits/chosen": 1.7531952857971191,
|
|
"logits/rejected": 1.7149598598480225,
|
|
"logps/chosen": -83.66578674316406,
|
|
"logps/ref_chosen": -83.65460205078125,
|
|
"logps/ref_rejected": -89.15221405029297,
|
|
"logps/rejected": -89.16448974609375,
|
|
"loss": 1.3867,
|
|
"margin_dpo/margin_mean": 0.0010806024074554443,
|
|
"margin_dpo/margin_std": 0.42552512884140015,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.05895691609977324,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.01400206983089447,
|
|
"fcm_dpo/q_t": 0.49964994192123413,
|
|
"grad_norm": 18.991655349731445,
|
|
"learning_rate": 2.8358208955223876e-07,
|
|
"logits/chosen": 2.085275888442993,
|
|
"logits/rejected": 2.0355820655822754,
|
|
"logps/chosen": -76.13926696777344,
|
|
"logps/ref_chosen": -76.18706512451172,
|
|
"logps/ref_rejected": -94.39262390136719,
|
|
"logps/rejected": -94.35882568359375,
|
|
"loss": 1.3852,
|
|
"margin_dpo/margin_mean": 0.014002442359924316,
|
|
"margin_dpo/margin_std": 0.32028961181640625,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.06046863189720333,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.07038983702659607,
|
|
"fcm_dpo/q_t": 0.49824073910713196,
|
|
"grad_norm": 17.965362548828125,
|
|
"learning_rate": 2.9104477611940296e-07,
|
|
"logits/chosen": 1.860640048980713,
|
|
"logits/rejected": 1.7459245920181274,
|
|
"logps/chosen": -77.41645050048828,
|
|
"logps/ref_chosen": -77.43476867675781,
|
|
"logps/ref_rejected": -98.58720397949219,
|
|
"logps/rejected": -98.63927459716797,
|
|
"loss": 1.3795,
|
|
"margin_dpo/margin_mean": 0.07039055228233337,
|
|
"margin_dpo/margin_std": 0.324634850025177,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.06198034769463341,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.08602339029312134,
|
|
"fcm_dpo/q_t": 0.49785032868385315,
|
|
"grad_norm": 18.264806747436523,
|
|
"learning_rate": 2.985074626865671e-07,
|
|
"logits/chosen": 1.8096897602081299,
|
|
"logits/rejected": 1.7466790676116943,
|
|
"logps/chosen": -86.822998046875,
|
|
"logps/ref_chosen": -86.87640380859375,
|
|
"logps/ref_rejected": -101.0856704711914,
|
|
"logps/rejected": -101.1182861328125,
|
|
"loss": 1.378,
|
|
"margin_dpo/margin_mean": 0.08602365851402283,
|
|
"margin_dpo/margin_std": 0.35287344455718994,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.06349206349206349,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.07593846321105957,
|
|
"fcm_dpo/q_t": 0.49810317158699036,
|
|
"grad_norm": 18.090917587280273,
|
|
"learning_rate": 3.059701492537313e-07,
|
|
"logits/chosen": 1.737091064453125,
|
|
"logits/rejected": 1.6874079704284668,
|
|
"logps/chosen": -79.32017517089844,
|
|
"logps/ref_chosen": -79.35625457763672,
|
|
"logps/ref_rejected": -91.54881286621094,
|
|
"logps/rejected": -91.58865356445312,
|
|
"loss": 1.3791,
|
|
"margin_dpo/margin_mean": 0.07593908905982971,
|
|
"margin_dpo/margin_std": 0.4052484929561615,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.06500377928949358,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.010380953550338745,
|
|
"fcm_dpo/q_t": 0.4997408986091614,
|
|
"grad_norm": 19.443647384643555,
|
|
"learning_rate": 3.134328358208955e-07,
|
|
"logits/chosen": 1.8869857788085938,
|
|
"logits/rejected": 1.7890424728393555,
|
|
"logps/chosen": -90.7708969116211,
|
|
"logps/ref_chosen": -90.81220245361328,
|
|
"logps/ref_rejected": -94.16316986083984,
|
|
"logps/rejected": -94.13224792480469,
|
|
"loss": 1.3857,
|
|
"margin_dpo/margin_mean": 0.010380983352661133,
|
|
"margin_dpo/margin_std": 0.424875408411026,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.06651549508692366,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.007240593433380127,
|
|
"fcm_dpo/q_t": 0.5001809000968933,
|
|
"grad_norm": 19.059425354003906,
|
|
"learning_rate": 3.2089552238805965e-07,
|
|
"logits/chosen": 1.4611210823059082,
|
|
"logits/rejected": 1.3848025798797607,
|
|
"logps/chosen": -88.25433349609375,
|
|
"logps/ref_chosen": -88.27932739257812,
|
|
"logps/ref_rejected": -101.14324951171875,
|
|
"logps/rejected": -101.11101531982422,
|
|
"loss": 1.3875,
|
|
"margin_dpo/margin_mean": -0.007240712642669678,
|
|
"margin_dpo/margin_std": 0.4283526539802551,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.06802721088435375,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.055614978075027466,
|
|
"fcm_dpo/q_t": 0.49861085414886475,
|
|
"grad_norm": 19.44415855407715,
|
|
"learning_rate": 3.2835820895522385e-07,
|
|
"logits/chosen": 1.6400771141052246,
|
|
"logits/rejected": 1.5230625867843628,
|
|
"logps/chosen": -78.38670349121094,
|
|
"logps/ref_chosen": -78.40264892578125,
|
|
"logps/ref_rejected": -109.39339447021484,
|
|
"logps/rejected": -109.43305969238281,
|
|
"loss": 1.3812,
|
|
"margin_dpo/margin_mean": 0.05561518669128418,
|
|
"margin_dpo/margin_std": 0.44193029403686523,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.06953892668178382,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.01664257049560547,
|
|
"fcm_dpo/q_t": 0.49958425760269165,
|
|
"grad_norm": 18.43988800048828,
|
|
"learning_rate": 3.3582089552238805e-07,
|
|
"logits/chosen": 1.6313531398773193,
|
|
"logits/rejected": 1.507079839706421,
|
|
"logps/chosen": -77.99624633789062,
|
|
"logps/ref_chosen": -78.08491516113281,
|
|
"logps/ref_rejected": -97.42544555664062,
|
|
"logps/rejected": -97.35342407226562,
|
|
"loss": 1.3849,
|
|
"margin_dpo/margin_mean": 0.01664254069328308,
|
|
"margin_dpo/margin_std": 0.34713345766067505,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.0710506424792139,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.048463642597198486,
|
|
"fcm_dpo/q_t": 0.4987889528274536,
|
|
"grad_norm": 19.143239974975586,
|
|
"learning_rate": 3.432835820895522e-07,
|
|
"logits/chosen": 1.6348772048950195,
|
|
"logits/rejected": 1.5529307126998901,
|
|
"logps/chosen": -70.75807189941406,
|
|
"logps/ref_chosen": -70.78988647460938,
|
|
"logps/ref_rejected": -91.17266845703125,
|
|
"logps/rejected": -91.18931579589844,
|
|
"loss": 1.3817,
|
|
"margin_dpo/margin_mean": 0.0484640896320343,
|
|
"margin_dpo/margin_std": 0.3223074674606323,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.07256235827664399,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.07357487082481384,
|
|
"fcm_dpo/q_t": 0.49816131591796875,
|
|
"grad_norm": 17.006410598754883,
|
|
"learning_rate": 3.507462686567164e-07,
|
|
"logits/chosen": 1.8455934524536133,
|
|
"logits/rejected": 1.7849183082580566,
|
|
"logps/chosen": -66.62384033203125,
|
|
"logps/ref_chosen": -66.67327880859375,
|
|
"logps/ref_rejected": -79.28543853759766,
|
|
"logps/rejected": -79.3095703125,
|
|
"loss": 1.3792,
|
|
"margin_dpo/margin_mean": 0.07357525825500488,
|
|
"margin_dpo/margin_std": 0.30543482303619385,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.07407407407407407,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.005153179168701172,
|
|
"fcm_dpo/q_t": 0.5001289248466492,
|
|
"grad_norm": 17.522451400756836,
|
|
"learning_rate": 3.5820895522388055e-07,
|
|
"logits/chosen": 1.5053105354309082,
|
|
"logits/rejected": 1.4593782424926758,
|
|
"logps/chosen": -75.131103515625,
|
|
"logps/ref_chosen": -75.17504119873047,
|
|
"logps/ref_rejected": -80.5369873046875,
|
|
"logps/rejected": -80.48789978027344,
|
|
"loss": 1.3871,
|
|
"margin_dpo/margin_mean": -0.005153149366378784,
|
|
"margin_dpo/margin_std": 0.34840038418769836,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.07558578987150416,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.008986026048660278,
|
|
"fcm_dpo/q_t": 0.49977606534957886,
|
|
"grad_norm": 17.941247940063477,
|
|
"learning_rate": 3.6567164179104475e-07,
|
|
"logits/chosen": 1.9036304950714111,
|
|
"logits/rejected": 1.8325103521347046,
|
|
"logps/chosen": -71.21244812011719,
|
|
"logps/ref_chosen": -71.2314224243164,
|
|
"logps/ref_rejected": -87.59088134765625,
|
|
"logps/rejected": -87.58090209960938,
|
|
"loss": 1.3857,
|
|
"margin_dpo/margin_mean": 0.008985787630081177,
|
|
"margin_dpo/margin_std": 0.34656456112861633,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.07709750566893424,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.026766493916511536,
|
|
"fcm_dpo/q_t": 0.5006682872772217,
|
|
"grad_norm": 18.8486385345459,
|
|
"learning_rate": 3.7313432835820895e-07,
|
|
"logits/chosen": 1.8772060871124268,
|
|
"logits/rejected": 1.823173999786377,
|
|
"logps/chosen": -78.64732360839844,
|
|
"logps/ref_chosen": -78.69171142578125,
|
|
"logps/ref_rejected": -100.78950500488281,
|
|
"logps/rejected": -100.71833801269531,
|
|
"loss": 1.3894,
|
|
"margin_dpo/margin_mean": -0.026766330003738403,
|
|
"margin_dpo/margin_std": 0.4199393391609192,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.07860922146636433,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.02304530143737793,
|
|
"fcm_dpo/q_t": 0.4994242787361145,
|
|
"grad_norm": 20.32939910888672,
|
|
"learning_rate": 3.805970149253731e-07,
|
|
"logits/chosen": 1.8706003427505493,
|
|
"logits/rejected": 1.694793939590454,
|
|
"logps/chosen": -89.05101776123047,
|
|
"logps/ref_chosen": -89.09419250488281,
|
|
"logps/ref_rejected": -116.87469482421875,
|
|
"logps/rejected": -116.85455322265625,
|
|
"loss": 1.3845,
|
|
"margin_dpo/margin_mean": 0.023045867681503296,
|
|
"margin_dpo/margin_std": 0.4608180522918701,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.0801209372637944,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.10077275335788727,
|
|
"fcm_dpo/q_t": 0.49748221039772034,
|
|
"grad_norm": 17.24545669555664,
|
|
"learning_rate": 3.880597014925373e-07,
|
|
"logits/chosen": 1.8220537900924683,
|
|
"logits/rejected": 1.7768255472183228,
|
|
"logps/chosen": -74.13320922851562,
|
|
"logps/ref_chosen": -74.21418762207031,
|
|
"logps/ref_rejected": -75.71168518066406,
|
|
"logps/rejected": -75.73147583007812,
|
|
"loss": 1.3768,
|
|
"margin_dpo/margin_mean": 0.1007724404335022,
|
|
"margin_dpo/margin_std": 0.4598434269428253,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.08163265306122448,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.03820416331291199,
|
|
"fcm_dpo/q_t": 0.4990437626838684,
|
|
"grad_norm": 16.42124366760254,
|
|
"learning_rate": 3.9552238805970144e-07,
|
|
"logits/chosen": 1.7365084886550903,
|
|
"logits/rejected": 1.7177741527557373,
|
|
"logps/chosen": -65.5650863647461,
|
|
"logps/ref_chosen": -65.63475799560547,
|
|
"logps/ref_rejected": -76.4462890625,
|
|
"logps/rejected": -76.41482543945312,
|
|
"loss": 1.383,
|
|
"margin_dpo/margin_mean": 0.03820416331291199,
|
|
"margin_dpo/margin_std": 0.41922712326049805,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.08314436885865457,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.20346182584762573,
|
|
"fcm_dpo/q_t": 0.49491745233535767,
|
|
"grad_norm": 20.51268768310547,
|
|
"learning_rate": 4.0298507462686564e-07,
|
|
"logits/chosen": 1.6703486442565918,
|
|
"logits/rejected": 1.4564030170440674,
|
|
"logps/chosen": -68.64900207519531,
|
|
"logps/ref_chosen": -68.7640380859375,
|
|
"logps/ref_rejected": -108.80074310302734,
|
|
"logps/rejected": -108.88917541503906,
|
|
"loss": 1.3666,
|
|
"margin_dpo/margin_mean": 0.20346179604530334,
|
|
"margin_dpo/margin_std": 0.45147860050201416,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.08465608465608465,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.08395838737487793,
|
|
"fcm_dpo/q_t": 0.4979020357131958,
|
|
"grad_norm": 17.007356643676758,
|
|
"learning_rate": 4.1044776119402984e-07,
|
|
"logits/chosen": 1.8042173385620117,
|
|
"logits/rejected": 1.777266502380371,
|
|
"logps/chosen": -74.74409484863281,
|
|
"logps/ref_chosen": -74.7939453125,
|
|
"logps/ref_rejected": -81.83535766601562,
|
|
"logps/rejected": -81.86946105957031,
|
|
"loss": 1.3784,
|
|
"margin_dpo/margin_mean": 0.08395865559577942,
|
|
"margin_dpo/margin_std": 0.44492030143737793,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.08616780045351474,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.251179039478302,
|
|
"fcm_dpo/q_t": 0.49372607469558716,
|
|
"grad_norm": 18.769027709960938,
|
|
"learning_rate": 4.17910447761194e-07,
|
|
"logits/chosen": 1.770645022392273,
|
|
"logits/rejected": 1.5965385437011719,
|
|
"logps/chosen": -74.42295837402344,
|
|
"logps/ref_chosen": -74.5794677734375,
|
|
"logps/ref_rejected": -105.61981964111328,
|
|
"logps/rejected": -105.71448516845703,
|
|
"loss": 1.362,
|
|
"margin_dpo/margin_mean": 0.25117942690849304,
|
|
"margin_dpo/margin_std": 0.5226833820343018,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.08767951625094482,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.05911412835121155,
|
|
"fcm_dpo/q_t": 0.4985233545303345,
|
|
"grad_norm": 19.216299057006836,
|
|
"learning_rate": 4.253731343283582e-07,
|
|
"logits/chosen": 1.771589994430542,
|
|
"logits/rejected": 1.6873626708984375,
|
|
"logps/chosen": -92.22261047363281,
|
|
"logps/ref_chosen": -92.24464416503906,
|
|
"logps/ref_rejected": -103.18975830078125,
|
|
"logps/rejected": -103.22683715820312,
|
|
"loss": 1.3809,
|
|
"margin_dpo/margin_mean": 0.059114038944244385,
|
|
"margin_dpo/margin_std": 0.44918161630630493,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.08919123204837491,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.1401573121547699,
|
|
"fcm_dpo/q_t": 0.4964984953403473,
|
|
"grad_norm": 16.799150466918945,
|
|
"learning_rate": 4.3283582089552234e-07,
|
|
"logits/chosen": 1.7912099361419678,
|
|
"logits/rejected": 1.5354535579681396,
|
|
"logps/chosen": -66.98262786865234,
|
|
"logps/ref_chosen": -67.12688446044922,
|
|
"logps/ref_rejected": -91.69569396972656,
|
|
"logps/rejected": -91.69160461425781,
|
|
"loss": 1.3729,
|
|
"margin_dpo/margin_mean": 0.14015719294548035,
|
|
"margin_dpo/margin_std": 0.4380917549133301,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.09070294784580499,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.030887484550476074,
|
|
"fcm_dpo/q_t": 0.49922844767570496,
|
|
"grad_norm": 18.56014060974121,
|
|
"learning_rate": 4.4029850746268654e-07,
|
|
"logits/chosen": 1.6983575820922852,
|
|
"logits/rejected": 1.7200520038604736,
|
|
"logps/chosen": -79.62914276123047,
|
|
"logps/ref_chosen": -79.74327087402344,
|
|
"logps/ref_rejected": -77.89244079589844,
|
|
"logps/rejected": -77.8092041015625,
|
|
"loss": 1.3838,
|
|
"margin_dpo/margin_mean": 0.030887097120285034,
|
|
"margin_dpo/margin_std": 0.4888887405395508,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.09221466364323508,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.03356589376926422,
|
|
"fcm_dpo/q_t": 0.4991615414619446,
|
|
"grad_norm": 16.52377700805664,
|
|
"learning_rate": 4.4776119402985074e-07,
|
|
"logits/chosen": 1.9934542179107666,
|
|
"logits/rejected": 1.939957857131958,
|
|
"logps/chosen": -66.00021362304688,
|
|
"logps/ref_chosen": -66.08685302734375,
|
|
"logps/ref_rejected": -88.1458740234375,
|
|
"logps/rejected": -88.09280395507812,
|
|
"loss": 1.3835,
|
|
"margin_dpo/margin_mean": 0.03356604278087616,
|
|
"margin_dpo/margin_std": 0.47667667269706726,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.09372637944066516,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.13860514760017395,
|
|
"fcm_dpo/q_t": 0.4965377449989319,
|
|
"grad_norm": 17.864152908325195,
|
|
"learning_rate": 4.552238805970149e-07,
|
|
"logits/chosen": 1.8296890258789062,
|
|
"logits/rejected": 1.7605338096618652,
|
|
"logps/chosen": -80.92373657226562,
|
|
"logps/ref_chosen": -81.0108871459961,
|
|
"logps/ref_rejected": -95.50444793701172,
|
|
"logps/rejected": -95.555908203125,
|
|
"loss": 1.3732,
|
|
"margin_dpo/margin_mean": 0.13860544562339783,
|
|
"margin_dpo/margin_std": 0.5384140014648438,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.09523809523809523,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.19448095560073853,
|
|
"fcm_dpo/q_t": 0.4951436519622803,
|
|
"grad_norm": 19.299381256103516,
|
|
"learning_rate": 4.626865671641791e-07,
|
|
"logits/chosen": 2.365891456604004,
|
|
"logits/rejected": 2.2761406898498535,
|
|
"logps/chosen": -78.40909576416016,
|
|
"logps/ref_chosen": -78.57593536376953,
|
|
"logps/ref_rejected": -99.71000671386719,
|
|
"logps/rejected": -99.73765563964844,
|
|
"loss": 1.3678,
|
|
"margin_dpo/margin_mean": 0.19448035955429077,
|
|
"margin_dpo/margin_std": 0.5808597803115845,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.09674981103552532,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.02114969491958618,
|
|
"fcm_dpo/q_t": 0.49946916103363037,
|
|
"grad_norm": 16.543537139892578,
|
|
"learning_rate": 4.701492537313433e-07,
|
|
"logits/chosen": 1.6816076040267944,
|
|
"logits/rejected": 1.6188148260116577,
|
|
"logps/chosen": -69.18231201171875,
|
|
"logps/ref_chosen": -69.24063110351562,
|
|
"logps/ref_rejected": -84.14842987060547,
|
|
"logps/rejected": -84.11125183105469,
|
|
"loss": 1.3849,
|
|
"margin_dpo/margin_mean": 0.021149873733520508,
|
|
"margin_dpo/margin_std": 0.5421885848045349,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.0982615268329554,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.17138445377349854,
|
|
"fcm_dpo/q_t": 0.4957193434238434,
|
|
"grad_norm": 18.766584396362305,
|
|
"learning_rate": 4.776119402985074e-07,
|
|
"logits/chosen": 2.203514337539673,
|
|
"logits/rejected": 2.1390552520751953,
|
|
"logps/chosen": -83.96290588378906,
|
|
"logps/ref_chosen": -84.0351333618164,
|
|
"logps/ref_rejected": -96.42926788330078,
|
|
"logps/rejected": -96.52842712402344,
|
|
"loss": 1.3701,
|
|
"margin_dpo/margin_mean": 0.171383798122406,
|
|
"margin_dpo/margin_std": 0.5873081684112549,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.09977324263038549,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.18604600429534912,
|
|
"fcm_dpo/q_t": 0.49535736441612244,
|
|
"grad_norm": 18.556272506713867,
|
|
"learning_rate": 4.850746268656717e-07,
|
|
"logits/chosen": 1.7415666580200195,
|
|
"logits/rejected": 1.6523596048355103,
|
|
"logps/chosen": -87.71971893310547,
|
|
"logps/ref_chosen": -87.79238891601562,
|
|
"logps/ref_rejected": -95.26547241210938,
|
|
"logps/rejected": -95.37884521484375,
|
|
"loss": 1.3688,
|
|
"margin_dpo/margin_mean": 0.18604576587677002,
|
|
"margin_dpo/margin_std": 0.6094644665718079,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.10128495842781557,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.19521397352218628,
|
|
"fcm_dpo/q_t": 0.49512505531311035,
|
|
"grad_norm": 19.030086517333984,
|
|
"learning_rate": 4.925373134328357e-07,
|
|
"logits/chosen": 1.739748477935791,
|
|
"logits/rejected": 1.6200695037841797,
|
|
"logps/chosen": -77.81809997558594,
|
|
"logps/ref_chosen": -78.00114440917969,
|
|
"logps/ref_rejected": -96.03421020507812,
|
|
"logps/rejected": -96.04637908935547,
|
|
"loss": 1.3675,
|
|
"margin_dpo/margin_mean": 0.1952143907546997,
|
|
"margin_dpo/margin_std": 0.5094437003135681,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.10279667422524566,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.05804261565208435,
|
|
"fcm_dpo/q_t": 0.4985469579696655,
|
|
"grad_norm": 19.809654235839844,
|
|
"learning_rate": 5e-07,
|
|
"logits/chosen": 1.5946201086044312,
|
|
"logits/rejected": 1.511031985282898,
|
|
"logps/chosen": -96.04458618164062,
|
|
"logps/ref_chosen": -96.04267883300781,
|
|
"logps/ref_rejected": -110.91169738769531,
|
|
"logps/rejected": -110.97163391113281,
|
|
"loss": 1.3815,
|
|
"margin_dpo/margin_mean": 0.05804276466369629,
|
|
"margin_dpo/margin_std": 0.5992398262023926,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.10430839002267574,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.1814037561416626,
|
|
"fcm_dpo/q_t": 0.4954685568809509,
|
|
"grad_norm": 20.02865219116211,
|
|
"learning_rate": 4.999965034812934e-07,
|
|
"logits/chosen": 1.5734765529632568,
|
|
"logits/rejected": 1.4613232612609863,
|
|
"logps/chosen": -84.95499420166016,
|
|
"logps/ref_chosen": -85.11124420166016,
|
|
"logps/ref_rejected": -107.57357025146484,
|
|
"logps/rejected": -107.59872436523438,
|
|
"loss": 1.3692,
|
|
"margin_dpo/margin_mean": 0.18140414357185364,
|
|
"margin_dpo/margin_std": 0.6236604452133179,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.10582010582010581,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.2574300467967987,
|
|
"fcm_dpo/q_t": 0.4935743510723114,
|
|
"grad_norm": 18.575429916381836,
|
|
"learning_rate": 4.999860140229787e-07,
|
|
"logits/chosen": 1.9775105714797974,
|
|
"logits/rejected": 1.9188661575317383,
|
|
"logps/chosen": -81.60387420654297,
|
|
"logps/ref_chosen": -81.87960815429688,
|
|
"logps/ref_rejected": -92.63243103027344,
|
|
"logps/rejected": -92.61412048339844,
|
|
"loss": 1.3619,
|
|
"margin_dpo/margin_mean": 0.2574295997619629,
|
|
"margin_dpo/margin_std": 0.6903476715087891,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.1073318216175359,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.1851758360862732,
|
|
"fcm_dpo/q_t": 0.4953737258911133,
|
|
"grad_norm": 17.575109481811523,
|
|
"learning_rate": 4.999685319184688e-07,
|
|
"logits/chosen": 1.6189801692962646,
|
|
"logits/rejected": 1.613362193107605,
|
|
"logps/chosen": -79.58382415771484,
|
|
"logps/ref_chosen": -79.74766540527344,
|
|
"logps/ref_rejected": -83.39110565185547,
|
|
"logps/rejected": -83.41243743896484,
|
|
"loss": 1.3688,
|
|
"margin_dpo/margin_mean": 0.18517541885375977,
|
|
"margin_dpo/margin_std": 0.6131936311721802,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.10884353741496598,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.31794312596321106,
|
|
"fcm_dpo/q_t": 0.4920644462108612,
|
|
"grad_norm": 18.973140716552734,
|
|
"learning_rate": 4.999440576567755e-07,
|
|
"logits/chosen": 1.853528380393982,
|
|
"logits/rejected": 1.660147786140442,
|
|
"logps/chosen": -72.76451873779297,
|
|
"logps/ref_chosen": -73.04458618164062,
|
|
"logps/ref_rejected": -92.64720153808594,
|
|
"logps/rejected": -92.6850814819336,
|
|
"loss": 1.3561,
|
|
"margin_dpo/margin_mean": 0.3179430365562439,
|
|
"margin_dpo/margin_std": 0.7343186140060425,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.11035525321239607,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.03165990114212036,
|
|
"fcm_dpo/q_t": 0.4992097020149231,
|
|
"grad_norm": 19.453399658203125,
|
|
"learning_rate": 4.999125919224965e-07,
|
|
"logits/chosen": 1.5204589366912842,
|
|
"logits/rejected": 1.4590003490447998,
|
|
"logps/chosen": -87.63735961914062,
|
|
"logps/ref_chosen": -87.71681213378906,
|
|
"logps/ref_rejected": -96.93572998046875,
|
|
"logps/rejected": -96.88794708251953,
|
|
"loss": 1.3846,
|
|
"margin_dpo/margin_mean": 0.031659454107284546,
|
|
"margin_dpo/margin_std": 0.734971821308136,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.11186696900982615,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.31990575790405273,
|
|
"fcm_dpo/q_t": 0.4920119047164917,
|
|
"grad_norm": 17.90540313720703,
|
|
"learning_rate": 4.998741355957963e-07,
|
|
"logits/chosen": 1.754788875579834,
|
|
"logits/rejected": 1.5819981098175049,
|
|
"logps/chosen": -66.66778564453125,
|
|
"logps/ref_chosen": -67.07321166992188,
|
|
"logps/ref_rejected": -96.5340347290039,
|
|
"logps/rejected": -96.44851684570312,
|
|
"loss": 1.3557,
|
|
"margin_dpo/margin_mean": 0.31990575790405273,
|
|
"margin_dpo/margin_std": 0.6406757235527039,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.11337868480725624,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.2673693597316742,
|
|
"fcm_dpo/q_t": 0.49332353472709656,
|
|
"grad_norm": 17.061166763305664,
|
|
"learning_rate": 4.998286897523808e-07,
|
|
"logits/chosen": 1.7332627773284912,
|
|
"logits/rejected": 1.5775887966156006,
|
|
"logps/chosen": -61.539031982421875,
|
|
"logps/ref_chosen": -61.80186462402344,
|
|
"logps/ref_rejected": -82.37368774414062,
|
|
"logps/rejected": -82.37822723388672,
|
|
"loss": 1.361,
|
|
"margin_dpo/margin_mean": 0.26736900210380554,
|
|
"margin_dpo/margin_std": 0.7122489213943481,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.11489040060468632,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.22301767766475677,
|
|
"fcm_dpo/q_t": 0.4944329559803009,
|
|
"grad_norm": 17.773778915405273,
|
|
"learning_rate": 4.997762556634679e-07,
|
|
"logits/chosen": 1.411060094833374,
|
|
"logits/rejected": 1.2945332527160645,
|
|
"logps/chosen": -69.60801696777344,
|
|
"logps/ref_chosen": -69.92233276367188,
|
|
"logps/ref_rejected": -97.08378601074219,
|
|
"logps/rejected": -96.99249267578125,
|
|
"loss": 1.3657,
|
|
"margin_dpo/margin_mean": 0.22301700711250305,
|
|
"margin_dpo/margin_std": 0.7884755730628967,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.1164021164021164,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.39924293756484985,
|
|
"fcm_dpo/q_t": 0.49004054069519043,
|
|
"grad_norm": 18.30223846435547,
|
|
"learning_rate": 4.99716834795752e-07,
|
|
"logits/chosen": 1.6913138628005981,
|
|
"logits/rejected": 1.5843505859375,
|
|
"logps/chosen": -70.841552734375,
|
|
"logps/ref_chosen": -71.206298828125,
|
|
"logps/ref_rejected": -95.22071075439453,
|
|
"logps/rejected": -95.25521850585938,
|
|
"loss": 1.3483,
|
|
"margin_dpo/margin_mean": 0.3992432653903961,
|
|
"margin_dpo/margin_std": 0.720382809638977,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.11791383219954649,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.33443766832351685,
|
|
"fcm_dpo/q_t": 0.4916543960571289,
|
|
"grad_norm": 17.89702606201172,
|
|
"learning_rate": 4.996504288113623e-07,
|
|
"logits/chosen": 1.7041699886322021,
|
|
"logits/rejected": 1.6885236501693726,
|
|
"logps/chosen": -83.9932861328125,
|
|
"logps/ref_chosen": -84.40055847167969,
|
|
"logps/ref_rejected": -95.41949462890625,
|
|
"logps/rejected": -95.34666442871094,
|
|
"loss": 1.3547,
|
|
"margin_dpo/margin_mean": 0.3344375789165497,
|
|
"margin_dpo/margin_std": 0.7655009031295776,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.11942554799697656,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.4601489007472992,
|
|
"fcm_dpo/q_t": 0.48854029178619385,
|
|
"grad_norm": 19.30738639831543,
|
|
"learning_rate": 4.995770395678171e-07,
|
|
"logits/chosen": 1.777226209640503,
|
|
"logits/rejected": 1.5804474353790283,
|
|
"logps/chosen": -65.54251098632812,
|
|
"logps/ref_chosen": -65.93923950195312,
|
|
"logps/ref_rejected": -102.92240905761719,
|
|
"logps/rejected": -102.98582458496094,
|
|
"loss": 1.3432,
|
|
"margin_dpo/margin_mean": 0.46014824509620667,
|
|
"margin_dpo/margin_std": 0.9754971265792847,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.12093726379440665,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.280136376619339,
|
|
"fcm_dpo/q_t": 0.4930172264575958,
|
|
"grad_norm": 17.493688583374023,
|
|
"learning_rate": 4.994966691179711e-07,
|
|
"logits/chosen": 1.8364324569702148,
|
|
"logits/rejected": 1.6422932147979736,
|
|
"logps/chosen": -78.30735778808594,
|
|
"logps/ref_chosen": -78.61624908447266,
|
|
"logps/ref_rejected": -99.9122314453125,
|
|
"logps/rejected": -99.88346862792969,
|
|
"loss": 1.3604,
|
|
"margin_dpo/margin_mean": 0.2801358699798584,
|
|
"margin_dpo/margin_std": 0.8769626617431641,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.12244897959183673,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.37103793025016785,
|
|
"fcm_dpo/q_t": 0.4907492399215698,
|
|
"grad_norm": 17.90415382385254,
|
|
"learning_rate": 4.994093197099587e-07,
|
|
"logits/chosen": 1.5202568769454956,
|
|
"logits/rejected": 1.4248197078704834,
|
|
"logps/chosen": -79.17378234863281,
|
|
"logps/ref_chosen": -79.49641418457031,
|
|
"logps/ref_rejected": -94.52413940429688,
|
|
"logps/rejected": -94.57255554199219,
|
|
"loss": 1.3515,
|
|
"margin_dpo/margin_mean": 0.3710388243198395,
|
|
"margin_dpo/margin_std": 0.8805793523788452,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.12396069538926682,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.6147125959396362,
|
|
"fcm_dpo/q_t": 0.48470258712768555,
|
|
"grad_norm": 17.819875717163086,
|
|
"learning_rate": 4.993149937871306e-07,
|
|
"logits/chosen": 1.7468092441558838,
|
|
"logits/rejected": 1.5803828239440918,
|
|
"logps/chosen": -64.35386657714844,
|
|
"logps/ref_chosen": -64.97168731689453,
|
|
"logps/ref_rejected": -86.69085693359375,
|
|
"logps/rejected": -86.68775939941406,
|
|
"loss": 1.3285,
|
|
"margin_dpo/margin_mean": 0.6147127151489258,
|
|
"margin_dpo/margin_std": 1.030060887336731,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.1254724111866969,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.5431385040283203,
|
|
"fcm_dpo/q_t": 0.4864582121372223,
|
|
"grad_norm": 18.520225524902344,
|
|
"learning_rate": 4.992136939879856e-07,
|
|
"logits/chosen": 1.7743589878082275,
|
|
"logits/rejected": 1.6339887380599976,
|
|
"logps/chosen": -72.32333374023438,
|
|
"logps/ref_chosen": -72.92498779296875,
|
|
"logps/ref_rejected": -92.27165222167969,
|
|
"logps/rejected": -92.213134765625,
|
|
"loss": 1.3347,
|
|
"margin_dpo/margin_mean": 0.5431385636329651,
|
|
"margin_dpo/margin_std": 0.8640463948249817,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.12698412698412698,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.5130627155303955,
|
|
"fcm_dpo/q_t": 0.4872024953365326,
|
|
"grad_norm": 19.25313377380371,
|
|
"learning_rate": 4.991054231460969e-07,
|
|
"logits/chosen": 1.8109040260314941,
|
|
"logits/rejected": 1.6407761573791504,
|
|
"logps/chosen": -81.33572387695312,
|
|
"logps/ref_chosen": -81.79109191894531,
|
|
"logps/ref_rejected": -99.20896911621094,
|
|
"logps/rejected": -99.26666259765625,
|
|
"loss": 1.3379,
|
|
"margin_dpo/margin_mean": 0.5130621194839478,
|
|
"margin_dpo/margin_std": 0.9440656900405884,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.12849584278155707,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.5673836469650269,
|
|
"fcm_dpo/q_t": 0.4858438968658447,
|
|
"grad_norm": 17.447330474853516,
|
|
"learning_rate": 4.989901842900325e-07,
|
|
"logits/chosen": 1.441693663597107,
|
|
"logits/rejected": 1.329040765762329,
|
|
"logps/chosen": -67.29985046386719,
|
|
"logps/ref_chosen": -67.94147491455078,
|
|
"logps/ref_rejected": -85.76875305175781,
|
|
"logps/rejected": -85.69450378417969,
|
|
"loss": 1.3324,
|
|
"margin_dpo/margin_mean": 0.5673837661743164,
|
|
"margin_dpo/margin_std": 0.907517671585083,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.13000755857898716,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.34173232316970825,
|
|
"fcm_dpo/q_t": 0.4914761185646057,
|
|
"grad_norm": 17.49716567993164,
|
|
"learning_rate": 4.988679806432711e-07,
|
|
"logits/chosen": 1.9244778156280518,
|
|
"logits/rejected": 1.8607923984527588,
|
|
"logps/chosen": -78.9068603515625,
|
|
"logps/ref_chosen": -79.21485900878906,
|
|
"logps/ref_rejected": -88.69877624511719,
|
|
"logps/rejected": -88.73251342773438,
|
|
"loss": 1.3541,
|
|
"margin_dpo/margin_mean": 0.3417322337627411,
|
|
"margin_dpo/margin_std": 0.8293349742889404,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.13151927437641722,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.7085888385772705,
|
|
"fcm_dpo/q_t": 0.48235026001930237,
|
|
"grad_norm": 18.823593139648438,
|
|
"learning_rate": 4.987388156241114e-07,
|
|
"logits/chosen": 1.5132489204406738,
|
|
"logits/rejected": 1.296330213546753,
|
|
"logps/chosen": -83.90985107421875,
|
|
"logps/ref_chosen": -84.45362854003906,
|
|
"logps/ref_rejected": -103.43824005126953,
|
|
"logps/rejected": -103.60305786132812,
|
|
"loss": 1.3201,
|
|
"margin_dpo/margin_mean": 0.7085880041122437,
|
|
"margin_dpo/margin_std": 1.1765565872192383,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.1330309901738473,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.3276621699333191,
|
|
"fcm_dpo/q_t": 0.4918380081653595,
|
|
"grad_norm": 18.28816032409668,
|
|
"learning_rate": 4.986026928455767e-07,
|
|
"logits/chosen": 1.723911166191101,
|
|
"logits/rejected": 1.7007009983062744,
|
|
"logps/chosen": -80.91500091552734,
|
|
"logps/ref_chosen": -81.27230834960938,
|
|
"logps/ref_rejected": -89.51646423339844,
|
|
"logps/rejected": -89.48681640625,
|
|
"loss": 1.3574,
|
|
"margin_dpo/margin_mean": 0.3276621699333191,
|
|
"margin_dpo/margin_std": 1.1452457904815674,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.1345427059712774,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.7864416837692261,
|
|
"fcm_dpo/q_t": 0.4804958701133728,
|
|
"grad_norm": 18.09223175048828,
|
|
"learning_rate": 4.984596161153135e-07,
|
|
"logits/chosen": 1.9219346046447754,
|
|
"logits/rejected": 1.647355079650879,
|
|
"logps/chosen": -57.445068359375,
|
|
"logps/ref_chosen": -58.142333984375,
|
|
"logps/ref_rejected": -102.53756713867188,
|
|
"logps/rejected": -102.62675476074219,
|
|
"loss": 1.3138,
|
|
"margin_dpo/margin_mean": 0.7864429354667664,
|
|
"margin_dpo/margin_std": 1.318110466003418,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.1360544217687075,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.5627226233482361,
|
|
"fcm_dpo/q_t": 0.4859766960144043,
|
|
"grad_norm": 19.66481590270996,
|
|
"learning_rate": 4.983095894354857e-07,
|
|
"logits/chosen": 1.6534063816070557,
|
|
"logits/rejected": 1.4348580837249756,
|
|
"logps/chosen": -74.80726623535156,
|
|
"logps/ref_chosen": -75.26505279541016,
|
|
"logps/ref_rejected": -104.32842254638672,
|
|
"logps/rejected": -104.43335723876953,
|
|
"loss": 1.3341,
|
|
"margin_dpo/margin_mean": 0.5627224445343018,
|
|
"margin_dpo/margin_std": 1.1481568813323975,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.13756613756613756,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.5365311503410339,
|
|
"fcm_dpo/q_t": 0.48667216300964355,
|
|
"grad_norm": 17.257240295410156,
|
|
"learning_rate": 4.98152617002662e-07,
|
|
"logits/chosen": 1.9292263984680176,
|
|
"logits/rejected": 1.7573230266571045,
|
|
"logps/chosen": -68.76639556884766,
|
|
"logps/ref_chosen": -69.33901977539062,
|
|
"logps/ref_rejected": -90.31411743164062,
|
|
"logps/rejected": -90.27801513671875,
|
|
"loss": 1.338,
|
|
"margin_dpo/margin_mean": 0.5365312099456787,
|
|
"margin_dpo/margin_std": 1.3548262119293213,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.13907785336356765,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.637989342212677,
|
|
"fcm_dpo/q_t": 0.4841547906398773,
|
|
"grad_norm": 20.7775936126709,
|
|
"learning_rate": 4.979887032076988e-07,
|
|
"logits/chosen": 1.601667881011963,
|
|
"logits/rejected": 1.4548040628433228,
|
|
"logps/chosen": -71.87997436523438,
|
|
"logps/ref_chosen": -72.4566650390625,
|
|
"logps/ref_rejected": -91.6706771850586,
|
|
"logps/rejected": -91.73197937011719,
|
|
"loss": 1.3276,
|
|
"margin_dpo/margin_mean": 0.6379896402359009,
|
|
"margin_dpo/margin_std": 1.2460086345672607,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.14058956916099774,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.4430373013019562,
|
|
"fcm_dpo/q_t": 0.489046186208725,
|
|
"grad_norm": 16.134719848632812,
|
|
"learning_rate": 4.978178526356172e-07,
|
|
"logits/chosen": 1.73604416847229,
|
|
"logits/rejected": 1.6414337158203125,
|
|
"logps/chosen": -63.422340393066406,
|
|
"logps/ref_chosen": -64.08897399902344,
|
|
"logps/ref_rejected": -75.09095764160156,
|
|
"logps/rejected": -74.86735534667969,
|
|
"loss": 1.3489,
|
|
"margin_dpo/margin_mean": 0.4430374205112457,
|
|
"margin_dpo/margin_std": 1.593023419380188,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.1421012849584278,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.0405027866363525,
|
|
"fcm_dpo/q_t": 0.4742341637611389,
|
|
"grad_norm": 37.81088638305664,
|
|
"learning_rate": 4.976400700654751e-07,
|
|
"logits/chosen": 2.0334482192993164,
|
|
"logits/rejected": 1.8472344875335693,
|
|
"logps/chosen": -78.81251525878906,
|
|
"logps/ref_chosen": -79.67372131347656,
|
|
"logps/ref_rejected": -94.64076232910156,
|
|
"logps/rejected": -94.82006072998047,
|
|
"loss": 1.292,
|
|
"margin_dpo/margin_mean": 1.040502667427063,
|
|
"margin_dpo/margin_std": 1.6742515563964844,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.1436130007558579,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.6995920538902283,
|
|
"fcm_dpo/q_t": 0.48263174295425415,
|
|
"grad_norm": 18.631126403808594,
|
|
"learning_rate": 4.974553604702332e-07,
|
|
"logits/chosen": 1.4102903604507446,
|
|
"logits/rejected": 1.2439355850219727,
|
|
"logps/chosen": -78.23617553710938,
|
|
"logps/ref_chosen": -78.65760803222656,
|
|
"logps/ref_rejected": -109.4048080444336,
|
|
"logps/rejected": -109.68296813964844,
|
|
"loss": 1.3237,
|
|
"margin_dpo/margin_mean": 0.6995913982391357,
|
|
"margin_dpo/margin_std": 1.566126823425293,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.14512471655328799,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.8266540169715881,
|
|
"fcm_dpo/q_t": 0.4794994592666626,
|
|
"grad_norm": 18.881208419799805,
|
|
"learning_rate": 4.972637290166157e-07,
|
|
"logits/chosen": 1.4069623947143555,
|
|
"logits/rejected": 1.2911567687988281,
|
|
"logps/chosen": -77.2410659790039,
|
|
"logps/ref_chosen": -77.708251953125,
|
|
"logps/ref_rejected": -104.36044311523438,
|
|
"logps/rejected": -104.71990203857422,
|
|
"loss": 1.3116,
|
|
"margin_dpo/margin_mean": 0.8266537189483643,
|
|
"margin_dpo/margin_std": 1.5478097200393677,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.14663643235071808,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.3047708570957184,
|
|
"fcm_dpo/q_t": 0.4924178719520569,
|
|
"grad_norm": 19.201942443847656,
|
|
"learning_rate": 4.970651810649666e-07,
|
|
"logits/chosen": 1.6156105995178223,
|
|
"logits/rejected": 1.5133846998214722,
|
|
"logps/chosen": -84.23475646972656,
|
|
"logps/ref_chosen": -84.58917999267578,
|
|
"logps/ref_rejected": -99.25704956054688,
|
|
"logps/rejected": -99.2073974609375,
|
|
"loss": 1.3622,
|
|
"margin_dpo/margin_mean": 0.3047705590724945,
|
|
"margin_dpo/margin_std": 1.56210196018219,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.14814814814814814,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.4234497845172882,
|
|
"fcm_dpo/q_t": 0.48944059014320374,
|
|
"grad_norm": 17.522523880004883,
|
|
"learning_rate": 4.968597221690985e-07,
|
|
"logits/chosen": 1.7427449226379395,
|
|
"logits/rejected": 1.6909432411193848,
|
|
"logps/chosen": -74.09392547607422,
|
|
"logps/ref_chosen": -74.42477416992188,
|
|
"logps/ref_rejected": -88.93840026855469,
|
|
"logps/rejected": -89.03099822998047,
|
|
"loss": 1.3494,
|
|
"margin_dpo/margin_mean": 0.42344966530799866,
|
|
"margin_dpo/margin_std": 1.4119412899017334,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.14965986394557823,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.6521110534667969,
|
|
"fcm_dpo/q_t": 0.4839605689048767,
|
|
"grad_norm": 18.090377807617188,
|
|
"learning_rate": 4.966473580761389e-07,
|
|
"logits/chosen": 2.0218091011047363,
|
|
"logits/rejected": 1.9115238189697266,
|
|
"logps/chosen": -75.04991912841797,
|
|
"logps/ref_chosen": -75.59742736816406,
|
|
"logps/ref_rejected": -98.2310791015625,
|
|
"logps/rejected": -98.33567810058594,
|
|
"loss": 1.332,
|
|
"margin_dpo/margin_mean": 0.6521108150482178,
|
|
"margin_dpo/margin_std": 1.979665994644165,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.15117157974300832,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.7829453349113464,
|
|
"fcm_dpo/q_t": 0.48082974553108215,
|
|
"grad_norm": 19.518943786621094,
|
|
"learning_rate": 4.964280947263676e-07,
|
|
"logits/chosen": 1.8196897506713867,
|
|
"logits/rejected": 1.7950403690338135,
|
|
"logps/chosen": -98.03388977050781,
|
|
"logps/ref_chosen": -98.55859375,
|
|
"logps/ref_rejected": -106.01295471191406,
|
|
"logps/rejected": -106.27117919921875,
|
|
"loss": 1.3228,
|
|
"margin_dpo/margin_mean": 0.7829455137252808,
|
|
"margin_dpo/margin_std": 2.2036726474761963,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.15117157974300832,
|
|
"eval_fcm_dpo/beta": 0.10000000894069672,
|
|
"eval_logits/chosen": 1.691135048866272,
|
|
"eval_logits/rejected": 1.577427864074707,
|
|
"eval_logps/chosen": -86.23526763916016,
|
|
"eval_logps/ref_chosen": -86.90177917480469,
|
|
"eval_logps/ref_rejected": -96.69639587402344,
|
|
"eval_logps/rejected": -96.92163848876953,
|
|
"eval_loss": 0.6546333432197571,
|
|
"eval_margin_dpo/margin_mean": 0.891772449016571,
|
|
"eval_margin_dpo/margin_std": 1.9760589599609375,
|
|
"eval_runtime": 42.284,
|
|
"eval_samples_per_second": 54.465,
|
|
"eval_steps_per_second": 1.703,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.15268329554043839,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.9559938907623291,
|
|
"fcm_dpo/q_t": 0.476234495639801,
|
|
"grad_norm": 16.3298282623291,
|
|
"learning_rate": 4.96201938253052e-07,
|
|
"logits/chosen": 1.4772682189941406,
|
|
"logits/rejected": 1.4215991497039795,
|
|
"logps/chosen": -68.6669921875,
|
|
"logps/ref_chosen": -69.45216369628906,
|
|
"logps/ref_rejected": -88.0458755493164,
|
|
"logps/rejected": -88.21668243408203,
|
|
"loss": 1.3001,
|
|
"margin_dpo/margin_mean": 0.9559941291809082,
|
|
"margin_dpo/margin_std": 1.6717371940612793,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.15419501133786848,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.5958479642868042,
|
|
"fcm_dpo/q_t": 0.48534175753593445,
|
|
"grad_norm": 17.342573165893555,
|
|
"learning_rate": 4.959688949822748e-07,
|
|
"logits/chosen": 1.3332377672195435,
|
|
"logits/rejected": 1.264945387840271,
|
|
"logps/chosen": -79.86666870117188,
|
|
"logps/ref_chosen": -80.35308837890625,
|
|
"logps/ref_rejected": -90.61380004882812,
|
|
"logps/rejected": -90.72323608398438,
|
|
"loss": 1.3377,
|
|
"margin_dpo/margin_mean": 0.5958476662635803,
|
|
"margin_dpo/margin_std": 2.011353015899658,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.15570672713529857,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.210947036743164,
|
|
"fcm_dpo/q_t": 0.47017818689346313,
|
|
"grad_norm": 17.412508010864258,
|
|
"learning_rate": 4.957289714327572e-07,
|
|
"logits/chosen": 1.8353683948516846,
|
|
"logits/rejected": 1.7695558071136475,
|
|
"logps/chosen": -78.50149536132812,
|
|
"logps/ref_chosen": -79.30392456054688,
|
|
"logps/ref_rejected": -93.745361328125,
|
|
"logps/rejected": -94.15388488769531,
|
|
"loss": 1.2799,
|
|
"margin_dpo/margin_mean": 1.210945963859558,
|
|
"margin_dpo/margin_std": 2.1122875213623047,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.15721844293272866,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.138046145439148,
|
|
"fcm_dpo/q_t": 0.4720809757709503,
|
|
"grad_norm": 18.750715255737305,
|
|
"learning_rate": 4.954821743156767e-07,
|
|
"logits/chosen": 1.8510358333587646,
|
|
"logits/rejected": 1.610731601715088,
|
|
"logps/chosen": -73.64946746826172,
|
|
"logps/ref_chosen": -74.50674438476562,
|
|
"logps/ref_rejected": -116.09912872314453,
|
|
"logps/rejected": -116.37989807128906,
|
|
"loss": 1.2898,
|
|
"margin_dpo/margin_mean": 1.1380459070205688,
|
|
"margin_dpo/margin_std": 2.3527626991271973,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.15873015873015872,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.992182731628418,
|
|
"fcm_dpo/q_t": 0.47577622532844543,
|
|
"grad_norm": 18.913448333740234,
|
|
"learning_rate": 4.952285105344791e-07,
|
|
"logits/chosen": 1.8412593603134155,
|
|
"logits/rejected": 1.6697262525558472,
|
|
"logps/chosen": -87.25275421142578,
|
|
"logps/ref_chosen": -87.76654815673828,
|
|
"logps/ref_rejected": -108.07927703857422,
|
|
"logps/rejected": -108.55766296386719,
|
|
"loss": 1.3093,
|
|
"margin_dpo/margin_mean": 0.9921829700469971,
|
|
"margin_dpo/margin_std": 2.8260951042175293,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.1602418745275888,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.8636230230331421,
|
|
"fcm_dpo/q_t": 0.47864556312561035,
|
|
"grad_norm": 17.13074493408203,
|
|
"learning_rate": 4.949679871846857e-07,
|
|
"logits/chosen": 1.705589771270752,
|
|
"logits/rejected": 1.6496243476867676,
|
|
"logps/chosen": -75.5406494140625,
|
|
"logps/ref_chosen": -76.38548278808594,
|
|
"logps/ref_rejected": -81.63407897949219,
|
|
"logps/rejected": -81.65287017822266,
|
|
"loss": 1.3159,
|
|
"margin_dpo/margin_mean": 0.8636230826377869,
|
|
"margin_dpo/margin_std": 2.3666610717773438,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.1617535903250189,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.6637716889381409,
|
|
"fcm_dpo/q_t": 0.4834796190261841,
|
|
"grad_norm": 19.10794448852539,
|
|
"learning_rate": 4.947006115536947e-07,
|
|
"logits/chosen": 1.299161434173584,
|
|
"logits/rejected": 1.2430386543273926,
|
|
"logps/chosen": -95.80593872070312,
|
|
"logps/ref_chosen": -96.14849853515625,
|
|
"logps/ref_rejected": -107.0481185913086,
|
|
"logps/rejected": -107.36932373046875,
|
|
"loss": 1.3336,
|
|
"margin_dpo/margin_mean": 0.6637719869613647,
|
|
"margin_dpo/margin_std": 2.2503273487091064,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.16326530612244897,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.853187084197998,
|
|
"fcm_dpo/q_t": 0.47897130250930786,
|
|
"grad_norm": 17.166805267333984,
|
|
"learning_rate": 4.944263911205772e-07,
|
|
"logits/chosen": 1.4326887130737305,
|
|
"logits/rejected": 1.309265375137329,
|
|
"logps/chosen": -84.71664428710938,
|
|
"logps/ref_chosen": -85.39241027832031,
|
|
"logps/ref_rejected": -97.79592895507812,
|
|
"logps/rejected": -97.97334289550781,
|
|
"loss": 1.3162,
|
|
"margin_dpo/margin_mean": 0.8531871438026428,
|
|
"margin_dpo/margin_std": 2.3266098499298096,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.16477702191987906,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.3385164737701416,
|
|
"fcm_dpo/q_t": 0.46750587224960327,
|
|
"grad_norm": 18.09962272644043,
|
|
"learning_rate": 4.941453335558681e-07,
|
|
"logits/chosen": 1.723799467086792,
|
|
"logits/rejected": 1.4775876998901367,
|
|
"logps/chosen": -78.03567504882812,
|
|
"logps/ref_chosen": -78.99874877929688,
|
|
"logps/ref_rejected": -100.79278564453125,
|
|
"logps/rejected": -101.16822814941406,
|
|
"loss": 1.2754,
|
|
"margin_dpo/margin_mean": 1.3385167121887207,
|
|
"margin_dpo/margin_std": 2.743279218673706,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.16628873771730915,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.241614431142807,
|
|
"fcm_dpo/q_t": 0.4939076602458954,
|
|
"grad_norm": 20.46181297302246,
|
|
"learning_rate": 4.938574467213517e-07,
|
|
"logits/chosen": 1.4239675998687744,
|
|
"logits/rejected": 1.485081434249878,
|
|
"logps/chosen": -96.55082702636719,
|
|
"logps/ref_chosen": -96.95277404785156,
|
|
"logps/ref_rejected": -91.44450378417969,
|
|
"logps/rejected": -91.2841567993164,
|
|
"loss": 1.3772,
|
|
"margin_dpo/margin_mean": 0.2416144609451294,
|
|
"margin_dpo/margin_std": 2.4270529747009277,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.16780045351473924,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.9300182461738586,
|
|
"fcm_dpo/q_t": 0.47719940543174744,
|
|
"grad_norm": 16.58338737487793,
|
|
"learning_rate": 4.935627386698418e-07,
|
|
"logits/chosen": 1.696366548538208,
|
|
"logits/rejected": 1.5432548522949219,
|
|
"logps/chosen": -69.25906372070312,
|
|
"logps/ref_chosen": -70.01641845703125,
|
|
"logps/ref_rejected": -92.87696838378906,
|
|
"logps/rejected": -93.04963684082031,
|
|
"loss": 1.3109,
|
|
"margin_dpo/margin_mean": 0.9300180077552795,
|
|
"margin_dpo/margin_std": 2.4844956398010254,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.1693121693121693,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.2315406799316406,
|
|
"fcm_dpo/q_t": 0.4697922468185425,
|
|
"grad_norm": 19.882429122924805,
|
|
"learning_rate": 4.932612176449559e-07,
|
|
"logits/chosen": 1.7238032817840576,
|
|
"logits/rejected": 1.5111950635910034,
|
|
"logps/chosen": -76.88069152832031,
|
|
"logps/ref_chosen": -77.80027770996094,
|
|
"logps/ref_rejected": -123.10624694824219,
|
|
"logps/rejected": -123.41820526123047,
|
|
"loss": 1.2845,
|
|
"margin_dpo/margin_mean": 1.2315394878387451,
|
|
"margin_dpo/margin_std": 2.6262001991271973,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.1708238851095994,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.9178091287612915,
|
|
"fcm_dpo/q_t": 0.47745102643966675,
|
|
"grad_norm": 16.442792892456055,
|
|
"learning_rate": 4.929528920808854e-07,
|
|
"logits/chosen": 1.3930963277816772,
|
|
"logits/rejected": 1.3205270767211914,
|
|
"logps/chosen": -69.23207092285156,
|
|
"logps/ref_chosen": -70.54346466064453,
|
|
"logps/ref_rejected": -88.79286193847656,
|
|
"logps/rejected": -88.39927673339844,
|
|
"loss": 1.3135,
|
|
"margin_dpo/margin_mean": 0.9178098440170288,
|
|
"margin_dpo/margin_std": 2.57177472114563,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.17233560090702948,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.3459113836288452,
|
|
"fcm_dpo/q_t": 0.4675772786140442,
|
|
"grad_norm": 17.996028900146484,
|
|
"learning_rate": 4.92637770602159e-07,
|
|
"logits/chosen": 1.716050386428833,
|
|
"logits/rejected": 1.5711562633514404,
|
|
"logps/chosen": -82.80506134033203,
|
|
"logps/ref_chosen": -83.9239501953125,
|
|
"logps/ref_rejected": -92.85765838623047,
|
|
"logps/rejected": -93.08468627929688,
|
|
"loss": 1.2804,
|
|
"margin_dpo/margin_mean": 1.3459113836288452,
|
|
"margin_dpo/margin_std": 3.139867067337036,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.17384731670445955,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.2582768201828003,
|
|
"fcm_dpo/q_t": 0.4693111777305603,
|
|
"grad_norm": 16.850736618041992,
|
|
"learning_rate": 4.923158620234019e-07,
|
|
"logits/chosen": 1.575303316116333,
|
|
"logits/rejected": 1.4128024578094482,
|
|
"logps/chosen": -68.53926086425781,
|
|
"logps/ref_chosen": -69.82767486572266,
|
|
"logps/ref_rejected": -96.51564025878906,
|
|
"logps/rejected": -96.48550415039062,
|
|
"loss": 1.2811,
|
|
"margin_dpo/margin_mean": 1.2582770586013794,
|
|
"margin_dpo/margin_std": 2.586811065673828,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.17535903250188964,
|
|
"fcm_dpo/beta": 0.1008991077542305,
|
|
"fcm_dpo/delta": 0.08911184966564178,
|
|
"fcm_dpo/margin": 1.6396100521087646,
|
|
"fcm_dpo/q_t": 0.4599873721599579,
|
|
"grad_norm": 18.267988204956055,
|
|
"learning_rate": 4.91987175349089e-07,
|
|
"logits/chosen": 1.6626043319702148,
|
|
"logits/rejected": 1.510635495185852,
|
|
"logps/chosen": -64.81758880615234,
|
|
"logps/ref_chosen": -66.19773864746094,
|
|
"logps/ref_rejected": -90.88304138183594,
|
|
"logps/rejected": -91.14250183105469,
|
|
"loss": 1.2453,
|
|
"margin_dpo/margin_mean": 1.6396100521087646,
|
|
"margin_dpo/margin_std": 2.6216936111450195,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.17687074829931973,
|
|
"fcm_dpo/beta": 0.10179821401834488,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.585550308227539,
|
|
"fcm_dpo/q_t": 0.4604984521865845,
|
|
"grad_norm": 16.826187133789062,
|
|
"learning_rate": 4.916517197732933e-07,
|
|
"logits/chosen": 1.6091898679733276,
|
|
"logits/rejected": 1.5089998245239258,
|
|
"logps/chosen": -70.49270629882812,
|
|
"logps/ref_chosen": -72.15988159179688,
|
|
"logps/ref_rejected": -85.30296325683594,
|
|
"logps/rejected": -85.22134399414062,
|
|
"loss": 1.2524,
|
|
"margin_dpo/margin_mean": 1.58555006980896,
|
|
"margin_dpo/margin_std": 2.8378653526306152,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.17838246409674982,
|
|
"fcm_dpo/beta": 0.10179821401834488,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.2571253776550293,
|
|
"fcm_dpo/q_t": 0.46861183643341064,
|
|
"grad_norm": 16.774629592895508,
|
|
"learning_rate": 4.913095046794281e-07,
|
|
"logits/chosen": 1.6488983631134033,
|
|
"logits/rejected": 1.5311334133148193,
|
|
"logps/chosen": -70.03057098388672,
|
|
"logps/ref_chosen": -71.47773742675781,
|
|
"logps/ref_rejected": -96.95051574707031,
|
|
"logps/rejected": -96.76048278808594,
|
|
"loss": 1.2838,
|
|
"margin_dpo/margin_mean": 1.2571251392364502,
|
|
"margin_dpo/margin_std": 2.8928143978118896,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.17989417989417988,
|
|
"fcm_dpo/beta": 0.10179821401834488,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.358586311340332,
|
|
"fcm_dpo/q_t": 0.4668753743171692,
|
|
"grad_norm": 17.19606590270996,
|
|
"learning_rate": 4.909605396399855e-07,
|
|
"logits/chosen": 1.7519330978393555,
|
|
"logits/rejected": 1.6531188488006592,
|
|
"logps/chosen": -76.84124755859375,
|
|
"logps/ref_chosen": -78.2727279663086,
|
|
"logps/ref_rejected": -94.71317291259766,
|
|
"logps/rejected": -94.64027404785156,
|
|
"loss": 1.2855,
|
|
"margin_dpo/margin_mean": 1.3585854768753052,
|
|
"margin_dpo/margin_std": 3.5642504692077637,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.18140589569160998,
|
|
"fcm_dpo/beta": 0.10353302955627441,
|
|
"fcm_dpo/delta": 0.08449066430330276,
|
|
"fcm_dpo/margin": 1.9712594747543335,
|
|
"fcm_dpo/q_t": 0.4509131908416748,
|
|
"grad_norm": 18.62177085876465,
|
|
"learning_rate": 4.906048344162676e-07,
|
|
"logits/chosen": 1.8366992473602295,
|
|
"logits/rejected": 1.6866886615753174,
|
|
"logps/chosen": -76.55362701416016,
|
|
"logps/ref_chosen": -78.43109130859375,
|
|
"logps/ref_rejected": -100.2771987915039,
|
|
"logps/rejected": -100.37100219726562,
|
|
"loss": 1.2151,
|
|
"margin_dpo/margin_mean": 1.9712594747543335,
|
|
"margin_dpo/margin_std": 2.9160304069519043,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.18291761148904007,
|
|
"fcm_dpo/beta": 0.10353302955627441,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.4688817262649536,
|
|
"fcm_dpo/q_t": 0.4635319113731384,
|
|
"grad_norm": 19.076129913330078,
|
|
"learning_rate": 4.902423989581143e-07,
|
|
"logits/chosen": 2.027730941772461,
|
|
"logits/rejected": 1.7624661922454834,
|
|
"logps/chosen": -72.38795471191406,
|
|
"logps/ref_chosen": -74.08768463134766,
|
|
"logps/ref_rejected": -118.6731948852539,
|
|
"logps/rejected": -118.44233703613281,
|
|
"loss": 1.268,
|
|
"margin_dpo/margin_mean": 1.4688820838928223,
|
|
"margin_dpo/margin_std": 3.261903762817383,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.18442932728647016,
|
|
"fcm_dpo/beta": 0.10353302955627441,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.4933545589447021,
|
|
"fcm_dpo/q_t": 0.46344470977783203,
|
|
"grad_norm": 18.32591438293457,
|
|
"learning_rate": 4.898732434036243e-07,
|
|
"logits/chosen": 1.5426521301269531,
|
|
"logits/rejected": 1.419804573059082,
|
|
"logps/chosen": -77.63048553466797,
|
|
"logps/ref_chosen": -79.36762237548828,
|
|
"logps/ref_rejected": -92.42371368408203,
|
|
"logps/rejected": -92.179931640625,
|
|
"loss": 1.2719,
|
|
"margin_dpo/margin_mean": 1.493354082107544,
|
|
"margin_dpo/margin_std": 3.603337287902832,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.18594104308390022,
|
|
"fcm_dpo/beta": 0.10353302955627441,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.630068063735962,
|
|
"fcm_dpo/q_t": 0.45924896001815796,
|
|
"grad_norm": 17.652915954589844,
|
|
"learning_rate": 4.894973780788722e-07,
|
|
"logits/chosen": 1.6213600635528564,
|
|
"logits/rejected": 1.5191831588745117,
|
|
"logps/chosen": -69.88966369628906,
|
|
"logps/ref_chosen": -71.91705322265625,
|
|
"logps/ref_rejected": -96.36418151855469,
|
|
"logps/rejected": -95.96685028076172,
|
|
"loss": 1.2503,
|
|
"margin_dpo/margin_mean": 1.630068063735962,
|
|
"margin_dpo/margin_std": 3.0862808227539062,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.1874527588813303,
|
|
"fcm_dpo/beta": 0.10440807044506073,
|
|
"fcm_dpo/delta": 0.0838112086057663,
|
|
"fcm_dpo/margin": 1.9508870840072632,
|
|
"fcm_dpo/q_t": 0.4518246054649353,
|
|
"grad_norm": 18.525684356689453,
|
|
"learning_rate": 4.89114813497619e-07,
|
|
"logits/chosen": 1.9808456897735596,
|
|
"logits/rejected": 1.8465297222137451,
|
|
"logps/chosen": -69.44947052001953,
|
|
"logps/ref_chosen": -71.72529602050781,
|
|
"logps/ref_rejected": -111.17984771728516,
|
|
"logps/rejected": -110.85490417480469,
|
|
"loss": 1.2301,
|
|
"margin_dpo/margin_mean": 1.9508872032165527,
|
|
"margin_dpo/margin_std": 3.758726119995117,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.1889644746787604,
|
|
"fcm_dpo/beta": 0.1063346192240715,
|
|
"fcm_dpo/delta": 0.04968992993235588,
|
|
"fcm_dpo/margin": 2.074455976486206,
|
|
"fcm_dpo/q_t": 0.44727903604507446,
|
|
"grad_norm": 19.388065338134766,
|
|
"learning_rate": 4.887255603610184e-07,
|
|
"logits/chosen": 1.8083943128585815,
|
|
"logits/rejected": 1.618847131729126,
|
|
"logps/chosen": -79.10482025146484,
|
|
"logps/ref_chosen": -81.55532836914062,
|
|
"logps/ref_rejected": -110.9144287109375,
|
|
"logps/rejected": -110.53838348388672,
|
|
"loss": 1.2137,
|
|
"margin_dpo/margin_mean": 2.074455499649048,
|
|
"margin_dpo/margin_std": 3.5274128913879395,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.19047619047619047,
|
|
"fcm_dpo/beta": 0.10724936425685883,
|
|
"fcm_dpo/delta": 0.08529400080442429,
|
|
"fcm_dpo/margin": 1.6670329570770264,
|
|
"fcm_dpo/q_t": 0.4588611423969269,
|
|
"grad_norm": 19.64308738708496,
|
|
"learning_rate": 4.883296295573176e-07,
|
|
"logits/chosen": 1.1320900917053223,
|
|
"logits/rejected": 1.1597490310668945,
|
|
"logps/chosen": -83.641357421875,
|
|
"logps/ref_chosen": -87.07349395751953,
|
|
"logps/ref_rejected": -85.05271911621094,
|
|
"logps/rejected": -83.28761291503906,
|
|
"loss": 1.2669,
|
|
"margin_dpo/margin_mean": 1.6670331954956055,
|
|
"margin_dpo/margin_std": 4.1900434494018555,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.19198790627362056,
|
|
"fcm_dpo/beta": 0.10914330929517746,
|
|
"fcm_dpo/delta": 0.04506043344736099,
|
|
"fcm_dpo/margin": 2.2443766593933105,
|
|
"fcm_dpo/q_t": 0.44131654500961304,
|
|
"grad_norm": 18.411846160888672,
|
|
"learning_rate": 4.87927032161552e-07,
|
|
"logits/chosen": 1.8658032417297363,
|
|
"logits/rejected": 1.8027232885360718,
|
|
"logps/chosen": -77.10999298095703,
|
|
"logps/ref_chosen": -80.4578857421875,
|
|
"logps/ref_rejected": -90.50740051269531,
|
|
"logps/rejected": -89.40388488769531,
|
|
"loss": 1.1873,
|
|
"margin_dpo/margin_mean": 2.2443761825561523,
|
|
"margin_dpo/margin_std": 3.211836338043213,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.19349962207105065,
|
|
"fcm_dpo/beta": 0.10914330929517746,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.470666527748108,
|
|
"fcm_dpo/q_t": 0.4621870517730713,
|
|
"grad_norm": 20.818655014038086,
|
|
"learning_rate": 4.875177794352363e-07,
|
|
"logits/chosen": 1.4957571029663086,
|
|
"logits/rejected": 1.3097018003463745,
|
|
"logps/chosen": -82.75028991699219,
|
|
"logps/ref_chosen": -85.77519226074219,
|
|
"logps/ref_rejected": -112.63516998291016,
|
|
"logps/rejected": -111.08094024658203,
|
|
"loss": 1.2966,
|
|
"margin_dpo/margin_mean": 1.4706671237945557,
|
|
"margin_dpo/margin_std": 4.722267150878906,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.19501133786848074,
|
|
"fcm_dpo/beta": 0.10914330929517746,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.6215567588806152,
|
|
"fcm_dpo/q_t": 0.45845484733581543,
|
|
"grad_norm": 20.263038635253906,
|
|
"learning_rate": 4.871018828260491e-07,
|
|
"logits/chosen": 1.5004316568374634,
|
|
"logits/rejected": 1.5068594217300415,
|
|
"logps/chosen": -81.99288940429688,
|
|
"logps/ref_chosen": -84.94615173339844,
|
|
"logps/ref_rejected": -85.36473846435547,
|
|
"logps/rejected": -84.03303527832031,
|
|
"loss": 1.2797,
|
|
"margin_dpo/margin_mean": 1.6215569972991943,
|
|
"margin_dpo/margin_std": 4.648078918457031,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.1965230536659108,
|
|
"fcm_dpo/beta": 0.11057177186012268,
|
|
"fcm_dpo/delta": 0.06501518934965134,
|
|
"fcm_dpo/margin": 2.0028374195098877,
|
|
"fcm_dpo/q_t": 0.44799530506134033,
|
|
"grad_norm": 20.011611938476562,
|
|
"learning_rate": 4.866793539675126e-07,
|
|
"logits/chosen": 1.4784972667694092,
|
|
"logits/rejected": 1.3632197380065918,
|
|
"logps/chosen": -75.56727600097656,
|
|
"logps/ref_chosen": -79.0184555053711,
|
|
"logps/ref_rejected": -97.63998413085938,
|
|
"logps/rejected": -96.19164276123047,
|
|
"loss": 1.2255,
|
|
"margin_dpo/margin_mean": 2.0028371810913086,
|
|
"margin_dpo/margin_std": 3.9271349906921387,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.1980347694633409,
|
|
"fcm_dpo/beta": 0.11228152364492416,
|
|
"fcm_dpo/delta": 0.11718001961708069,
|
|
"fcm_dpo/margin": 2.5501041412353516,
|
|
"fcm_dpo/q_t": 0.4346443712711334,
|
|
"grad_norm": 18.658187866210938,
|
|
"learning_rate": 4.86250204678667e-07,
|
|
"logits/chosen": 1.426423192024231,
|
|
"logits/rejected": 1.1902827024459839,
|
|
"logps/chosen": -64.61331939697266,
|
|
"logps/ref_chosen": -68.24565887451172,
|
|
"logps/ref_rejected": -97.99555969238281,
|
|
"logps/rejected": -96.91332244873047,
|
|
"loss": 1.1839,
|
|
"margin_dpo/margin_mean": 2.5501043796539307,
|
|
"margin_dpo/margin_std": 4.5746660232543945,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.19954648526077098,
|
|
"fcm_dpo/beta": 0.11424092948436737,
|
|
"fcm_dpo/delta": 0.09166815131902695,
|
|
"fcm_dpo/margin": 1.7558990716934204,
|
|
"fcm_dpo/q_t": 0.45261579751968384,
|
|
"grad_norm": 19.862043380737305,
|
|
"learning_rate": 4.858144469637408e-07,
|
|
"logits/chosen": 1.655261516571045,
|
|
"logits/rejected": 1.5392924547195435,
|
|
"logps/chosen": -78.36050415039062,
|
|
"logps/ref_chosen": -82.06532287597656,
|
|
"logps/ref_rejected": -89.47691345214844,
|
|
"logps/rejected": -87.52798461914062,
|
|
"loss": 1.2439,
|
|
"margin_dpo/margin_mean": 1.7558988332748413,
|
|
"margin_dpo/margin_std": 3.9155571460723877,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.20105820105820105,
|
|
"fcm_dpo/beta": 0.11807942390441895,
|
|
"fcm_dpo/delta": 0.16631799936294556,
|
|
"fcm_dpo/margin": 2.0136890411376953,
|
|
"fcm_dpo/q_t": 0.44534194469451904,
|
|
"grad_norm": 20.467275619506836,
|
|
"learning_rate": 4.853720930118138e-07,
|
|
"logits/chosen": 1.4204668998718262,
|
|
"logits/rejected": 1.4540302753448486,
|
|
"logps/chosen": -79.44143676757812,
|
|
"logps/ref_chosen": -83.70661163330078,
|
|
"logps/ref_rejected": -89.3868179321289,
|
|
"logps/rejected": -87.13533020019531,
|
|
"loss": 1.2249,
|
|
"margin_dpo/margin_mean": 2.0136892795562744,
|
|
"margin_dpo/margin_std": 4.2390546798706055,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.20256991685563114,
|
|
"fcm_dpo/beta": 0.12024421989917755,
|
|
"fcm_dpo/delta": 0.0528922863304615,
|
|
"fcm_dpo/margin": 2.901494026184082,
|
|
"fcm_dpo/q_t": 0.4216766357421875,
|
|
"grad_norm": 18.669694900512695,
|
|
"learning_rate": 4.849231551964771e-07,
|
|
"logits/chosen": 1.9196783304214478,
|
|
"logits/rejected": 1.7756422758102417,
|
|
"logps/chosen": -66.67295837402344,
|
|
"logps/ref_chosen": -71.57601928710938,
|
|
"logps/ref_rejected": -92.34259033203125,
|
|
"logps/rejected": -90.3410415649414,
|
|
"loss": 1.1417,
|
|
"margin_dpo/margin_mean": 2.901494026184082,
|
|
"margin_dpo/margin_std": 4.747570991516113,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.20408163265306123,
|
|
"fcm_dpo/beta": 0.12111984193325043,
|
|
"fcm_dpo/delta": 0.054889243096113205,
|
|
"fcm_dpo/margin": 2.007038116455078,
|
|
"fcm_dpo/q_t": 0.4430903196334839,
|
|
"grad_norm": 19.303375244140625,
|
|
"learning_rate": 4.844676460754862e-07,
|
|
"logits/chosen": 1.465507984161377,
|
|
"logits/rejected": 1.408734917640686,
|
|
"logps/chosen": -61.307891845703125,
|
|
"logps/ref_chosen": -66.39884948730469,
|
|
"logps/ref_rejected": -81.38636779785156,
|
|
"logps/rejected": -78.30242919921875,
|
|
"loss": 1.2202,
|
|
"margin_dpo/margin_mean": 2.007038116455078,
|
|
"margin_dpo/margin_std": 4.225521087646484,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.20559334845049132,
|
|
"fcm_dpo/beta": 0.12096191942691803,
|
|
"fcm_dpo/delta": -0.0338929109275341,
|
|
"fcm_dpo/margin": 2.4650301933288574,
|
|
"fcm_dpo/q_t": 0.4351881146430969,
|
|
"grad_norm": 22.06288719177246,
|
|
"learning_rate": 4.840055783904106e-07,
|
|
"logits/chosen": 1.5917037725448608,
|
|
"logits/rejected": 1.3086708784103394,
|
|
"logps/chosen": -82.8524169921875,
|
|
"logps/ref_chosen": -86.75381469726562,
|
|
"logps/ref_rejected": -113.35548400878906,
|
|
"logps/rejected": -111.91911315917969,
|
|
"loss": 1.232,
|
|
"margin_dpo/margin_mean": 2.4650299549102783,
|
|
"margin_dpo/margin_std": 5.841936111450195,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.20710506424792138,
|
|
"fcm_dpo/beta": 0.12060153484344482,
|
|
"fcm_dpo/delta": -0.029881920665502548,
|
|
"fcm_dpo/margin": 2.5589451789855957,
|
|
"fcm_dpo/q_t": 0.4295978248119354,
|
|
"grad_norm": 18.40294075012207,
|
|
"learning_rate": 4.835369650662767e-07,
|
|
"logits/chosen": 1.843554973602295,
|
|
"logits/rejected": 1.72433340549469,
|
|
"logps/chosen": -67.306396484375,
|
|
"logps/ref_chosen": -72.21119689941406,
|
|
"logps/ref_rejected": -88.30802917480469,
|
|
"logps/rejected": -85.96217346191406,
|
|
"loss": 1.1748,
|
|
"margin_dpo/margin_mean": 2.5589451789855957,
|
|
"margin_dpo/margin_std": 4.473442077636719,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.20861678004535147,
|
|
"fcm_dpo/beta": 0.12128353118896484,
|
|
"fcm_dpo/delta": 0.08594699203968048,
|
|
"fcm_dpo/margin": 1.882028579711914,
|
|
"fcm_dpo/q_t": 0.45027002692222595,
|
|
"grad_norm": 20.4240779876709,
|
|
"learning_rate": 4.830618192112065e-07,
|
|
"logits/chosen": 1.2977592945098877,
|
|
"logits/rejected": 1.201406717300415,
|
|
"logps/chosen": -70.62525939941406,
|
|
"logps/ref_chosen": -74.54273223876953,
|
|
"logps/ref_rejected": -84.63615417480469,
|
|
"logps/rejected": -82.60069274902344,
|
|
"loss": 1.2605,
|
|
"margin_dpo/margin_mean": 1.8820289373397827,
|
|
"margin_dpo/margin_std": 5.117735385894775,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.21012849584278157,
|
|
"fcm_dpo/beta": 0.12293344736099243,
|
|
"fcm_dpo/delta": 0.049421682953834534,
|
|
"fcm_dpo/margin": 1.9987208843231201,
|
|
"fcm_dpo/q_t": 0.4412422180175781,
|
|
"grad_norm": 24.668521881103516,
|
|
"learning_rate": 4.825801541160509e-07,
|
|
"logits/chosen": 1.370278000831604,
|
|
"logits/rejected": 1.3114217519760132,
|
|
"logps/chosen": -84.10897827148438,
|
|
"logps/ref_chosen": -87.63740539550781,
|
|
"logps/ref_rejected": -101.3896484375,
|
|
"logps/rejected": -99.85993957519531,
|
|
"loss": 1.2438,
|
|
"margin_dpo/margin_mean": 1.9987210035324097,
|
|
"margin_dpo/margin_std": 4.9212646484375,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.21164021164021163,
|
|
"fcm_dpo/beta": 0.12314370274543762,
|
|
"fcm_dpo/delta": 0.0037242621183395386,
|
|
"fcm_dpo/margin": 3.2170228958129883,
|
|
"fcm_dpo/q_t": 0.4121108651161194,
|
|
"grad_norm": 24.398794174194336,
|
|
"learning_rate": 4.820919832540181e-07,
|
|
"logits/chosen": 1.258446216583252,
|
|
"logits/rejected": 1.1674163341522217,
|
|
"logps/chosen": -76.97390747070312,
|
|
"logps/ref_chosen": -81.32339477539062,
|
|
"logps/ref_rejected": -99.7275619506836,
|
|
"logps/rejected": -98.59510040283203,
|
|
"loss": 1.1392,
|
|
"margin_dpo/margin_mean": 3.2170233726501465,
|
|
"margin_dpo/margin_std": 5.567892074584961,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.21315192743764172,
|
|
"fcm_dpo/beta": 0.12346719205379486,
|
|
"fcm_dpo/delta": 0.01799967885017395,
|
|
"fcm_dpo/margin": 3.0974936485290527,
|
|
"fcm_dpo/q_t": 0.4152432382106781,
|
|
"grad_norm": 18.94412612915039,
|
|
"learning_rate": 4.815973202802966e-07,
|
|
"logits/chosen": 1.5887515544891357,
|
|
"logits/rejected": 1.4874029159545898,
|
|
"logps/chosen": -74.20223236083984,
|
|
"logps/ref_chosen": -78.08534240722656,
|
|
"logps/ref_rejected": -101.70516967773438,
|
|
"logps/rejected": -100.9195556640625,
|
|
"loss": 1.1532,
|
|
"margin_dpo/margin_mean": 3.0974936485290527,
|
|
"margin_dpo/margin_std": 5.584391117095947,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.2146636432350718,
|
|
"fcm_dpo/beta": 0.12660640478134155,
|
|
"fcm_dpo/delta": 0.12214577943086624,
|
|
"fcm_dpo/margin": 2.2193617820739746,
|
|
"fcm_dpo/q_t": 0.4376070201396942,
|
|
"grad_norm": 20.792604446411133,
|
|
"learning_rate": 4.810961790316729e-07,
|
|
"logits/chosen": 1.5094895362854004,
|
|
"logits/rejected": 1.4517810344696045,
|
|
"logps/chosen": -79.07592010498047,
|
|
"logps/ref_chosen": -82.84616088867188,
|
|
"logps/ref_rejected": -95.14714050292969,
|
|
"logps/rejected": -93.59625244140625,
|
|
"loss": 1.2179,
|
|
"margin_dpo/margin_mean": 2.2193617820739746,
|
|
"margin_dpo/margin_std": 4.92032527923584,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.2161753590325019,
|
|
"fcm_dpo/beta": 0.12763959169387817,
|
|
"fcm_dpo/delta": 0.03856246545910835,
|
|
"fcm_dpo/margin": 2.0553064346313477,
|
|
"fcm_dpo/q_t": 0.442947119474411,
|
|
"grad_norm": 29.368139266967773,
|
|
"learning_rate": 4.805885735261454e-07,
|
|
"logits/chosen": 1.50538170337677,
|
|
"logits/rejected": 1.4741663932800293,
|
|
"logps/chosen": -76.84846496582031,
|
|
"logps/ref_chosen": -80.29791259765625,
|
|
"logps/ref_rejected": -87.44291687011719,
|
|
"logps/rejected": -86.04878234863281,
|
|
"loss": 1.2779,
|
|
"margin_dpo/margin_mean": 2.055307388305664,
|
|
"margin_dpo/margin_std": 6.01151180267334,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.21768707482993196,
|
|
"fcm_dpo/beta": 0.129078671336174,
|
|
"fcm_dpo/delta": 0.03681322559714317,
|
|
"fcm_dpo/margin": 1.5377318859100342,
|
|
"fcm_dpo/q_t": 0.4620184898376465,
|
|
"grad_norm": 30.103649139404297,
|
|
"learning_rate": 4.800745179625307e-07,
|
|
"logits/chosen": 1.5828511714935303,
|
|
"logits/rejected": 1.5261180400848389,
|
|
"logps/chosen": -76.47662353515625,
|
|
"logps/ref_chosen": -79.09429168701172,
|
|
"logps/ref_rejected": -92.42912292480469,
|
|
"logps/rejected": -91.34918212890625,
|
|
"loss": 1.3799,
|
|
"margin_dpo/margin_mean": 1.5377315282821655,
|
|
"margin_dpo/margin_std": 6.86276912689209,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.21919879062736206,
|
|
"fcm_dpo/beta": 0.129638671875,
|
|
"fcm_dpo/delta": 0.03883426636457443,
|
|
"fcm_dpo/margin": 2.796900510787964,
|
|
"fcm_dpo/q_t": 0.41991177201271057,
|
|
"grad_norm": 27.09717559814453,
|
|
"learning_rate": 4.795540267200686e-07,
|
|
"logits/chosen": 1.4565389156341553,
|
|
"logits/rejected": 1.4920694828033447,
|
|
"logps/chosen": -94.13655090332031,
|
|
"logps/ref_chosen": -97.7087173461914,
|
|
"logps/ref_rejected": -97.63011169433594,
|
|
"logps/rejected": -96.85485076904297,
|
|
"loss": 1.2269,
|
|
"margin_dpo/margin_mean": 2.7969002723693848,
|
|
"margin_dpo/margin_std": 6.733441352844238,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.22071050642479215,
|
|
"fcm_dpo/beta": 0.13282790780067444,
|
|
"fcm_dpo/delta": 0.12798386812210083,
|
|
"fcm_dpo/margin": 2.072317123413086,
|
|
"fcm_dpo/q_t": 0.44036737084388733,
|
|
"grad_norm": 25.196712493896484,
|
|
"learning_rate": 4.790271143580173e-07,
|
|
"logits/chosen": 1.359689712524414,
|
|
"logits/rejected": 1.3533302545547485,
|
|
"logps/chosen": -72.72005462646484,
|
|
"logps/ref_chosen": -76.56294250488281,
|
|
"logps/ref_rejected": -83.78160095214844,
|
|
"logps/rejected": -82.01103210449219,
|
|
"loss": 1.261,
|
|
"margin_dpo/margin_mean": 2.072317361831665,
|
|
"margin_dpo/margin_std": 5.668929100036621,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.2222222222222222,
|
|
"fcm_dpo/beta": 0.1359969526529312,
|
|
"fcm_dpo/delta": 0.132144033908844,
|
|
"fcm_dpo/margin": 1.9967001676559448,
|
|
"fcm_dpo/q_t": 0.44437921047210693,
|
|
"grad_norm": 28.042757034301758,
|
|
"learning_rate": 4.784937956152489e-07,
|
|
"logits/chosen": 1.5116376876831055,
|
|
"logits/rejected": 1.4199936389923096,
|
|
"logps/chosen": -80.23736572265625,
|
|
"logps/ref_chosen": -83.24113464355469,
|
|
"logps/ref_rejected": -97.50960540771484,
|
|
"logps/rejected": -96.50253295898438,
|
|
"loss": 1.285,
|
|
"margin_dpo/margin_mean": 1.9967000484466553,
|
|
"margin_dpo/margin_std": 5.979328155517578,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.2237339380196523,
|
|
"fcm_dpo/beta": 0.13668569922447205,
|
|
"fcm_dpo/delta": -0.06743814051151276,
|
|
"fcm_dpo/margin": 3.392505407333374,
|
|
"fcm_dpo/q_t": 0.4045356810092926,
|
|
"grad_norm": 19.915895462036133,
|
|
"learning_rate": 4.779540854098347e-07,
|
|
"logits/chosen": 1.6008939743041992,
|
|
"logits/rejected": 1.3894622325897217,
|
|
"logps/chosen": -62.7821159362793,
|
|
"logps/ref_chosen": -66.36277770996094,
|
|
"logps/ref_rejected": -87.66487121582031,
|
|
"logps/rejected": -87.47671508789062,
|
|
"loss": 1.166,
|
|
"margin_dpo/margin_mean": 3.392505407333374,
|
|
"margin_dpo/margin_std": 6.748303413391113,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.2252456538170824,
|
|
"fcm_dpo/beta": 0.13758441805839539,
|
|
"fcm_dpo/delta": 0.12122346460819244,
|
|
"fcm_dpo/margin": 2.0515592098236084,
|
|
"fcm_dpo/q_t": 0.43767067790031433,
|
|
"grad_norm": 24.13621711730957,
|
|
"learning_rate": 4.774079988386296e-07,
|
|
"logits/chosen": 1.406876802444458,
|
|
"logits/rejected": 1.2928733825683594,
|
|
"logps/chosen": -70.20016479492188,
|
|
"logps/ref_chosen": -72.0576171875,
|
|
"logps/ref_rejected": -83.94097900390625,
|
|
"logps/rejected": -84.13508605957031,
|
|
"loss": 1.2556,
|
|
"margin_dpo/margin_mean": 2.0515592098236084,
|
|
"margin_dpo/margin_std": 5.487130641937256,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.22675736961451248,
|
|
"fcm_dpo/beta": 0.13595245778560638,
|
|
"fcm_dpo/delta": -0.11543023586273193,
|
|
"fcm_dpo/margin": 3.746461868286133,
|
|
"fcm_dpo/q_t": 0.39566880464553833,
|
|
"grad_norm": 24.9544734954834,
|
|
"learning_rate": 4.768555511768486e-07,
|
|
"logits/chosen": 1.7028197050094604,
|
|
"logits/rejected": 1.6237821578979492,
|
|
"logps/chosen": -83.20237731933594,
|
|
"logps/ref_chosen": -85.52684783935547,
|
|
"logps/ref_rejected": -108.37449645996094,
|
|
"logps/rejected": -109.79650115966797,
|
|
"loss": 1.1504,
|
|
"margin_dpo/margin_mean": 3.746461868286133,
|
|
"margin_dpo/margin_std": 7.140506267547607,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.22826908541194255,
|
|
"fcm_dpo/beta": 0.13261011242866516,
|
|
"fcm_dpo/delta": -0.1349739134311676,
|
|
"fcm_dpo/margin": 3.980025291442871,
|
|
"fcm_dpo/q_t": 0.38910791277885437,
|
|
"grad_norm": 20.386749267578125,
|
|
"learning_rate": 4.762967578776406e-07,
|
|
"logits/chosen": 1.5325286388397217,
|
|
"logits/rejected": 1.386871337890625,
|
|
"logps/chosen": -65.10581970214844,
|
|
"logps/ref_chosen": -69.160888671875,
|
|
"logps/ref_rejected": -91.42207336425781,
|
|
"logps/rejected": -91.34703063964844,
|
|
"loss": 1.0712,
|
|
"margin_dpo/margin_mean": 3.980025291442871,
|
|
"margin_dpo/margin_std": 6.253316879272461,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.22978080120937264,
|
|
"fcm_dpo/beta": 0.13304655253887177,
|
|
"fcm_dpo/delta": 0.022187475115060806,
|
|
"fcm_dpo/margin": 2.838752508163452,
|
|
"fcm_dpo/q_t": 0.4203815460205078,
|
|
"grad_norm": 24.348512649536133,
|
|
"learning_rate": 4.757316345716553e-07,
|
|
"logits/chosen": 1.8199957609176636,
|
|
"logits/rejected": 1.6512593030929565,
|
|
"logps/chosen": -69.91305541992188,
|
|
"logps/ref_chosen": -72.48135375976562,
|
|
"logps/ref_rejected": -94.44818878173828,
|
|
"logps/rejected": -94.71864318847656,
|
|
"loss": 1.215,
|
|
"margin_dpo/margin_mean": 2.838752508163452,
|
|
"margin_dpo/margin_std": 6.552783489227295,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.23129251700680273,
|
|
"fcm_dpo/beta": 0.13313767313957214,
|
|
"fcm_dpo/delta": -0.022764816880226135,
|
|
"fcm_dpo/margin": 3.1567814350128174,
|
|
"fcm_dpo/q_t": 0.4093555212020874,
|
|
"grad_norm": 22.365690231323242,
|
|
"learning_rate": 4.751601970666064e-07,
|
|
"logits/chosen": 1.4918171167373657,
|
|
"logits/rejected": 1.4286854267120361,
|
|
"logps/chosen": -87.7032470703125,
|
|
"logps/ref_chosen": -89.6655044555664,
|
|
"logps/ref_rejected": -90.67737579345703,
|
|
"logps/rejected": -91.87190246582031,
|
|
"loss": 1.1532,
|
|
"margin_dpo/margin_mean": 3.1567811965942383,
|
|
"margin_dpo/margin_std": 5.790462970733643,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.2328042328042328,
|
|
"fcm_dpo/beta": 0.13354626297950745,
|
|
"fcm_dpo/delta": 0.10732235014438629,
|
|
"fcm_dpo/margin": 2.216388702392578,
|
|
"fcm_dpo/q_t": 0.4366697072982788,
|
|
"grad_norm": 25.555784225463867,
|
|
"learning_rate": 4.745824613468292e-07,
|
|
"logits/chosen": 1.8585283756256104,
|
|
"logits/rejected": 1.8039864301681519,
|
|
"logps/chosen": -73.84849548339844,
|
|
"logps/ref_chosen": -76.58096313476562,
|
|
"logps/ref_rejected": -78.18669891357422,
|
|
"logps/rejected": -77.67062377929688,
|
|
"loss": 1.2893,
|
|
"margin_dpo/margin_mean": 2.216388702392578,
|
|
"margin_dpo/margin_std": 6.572654724121094,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.23431594860166288,
|
|
"fcm_dpo/beta": 0.1331787407398224,
|
|
"fcm_dpo/delta": -0.039101965725421906,
|
|
"fcm_dpo/margin": 3.2796497344970703,
|
|
"fcm_dpo/q_t": 0.4062679409980774,
|
|
"grad_norm": 27.18946075439453,
|
|
"learning_rate": 4.7399844357283393e-07,
|
|
"logits/chosen": 1.571397066116333,
|
|
"logits/rejected": 1.5497605800628662,
|
|
"logps/chosen": -80.94525146484375,
|
|
"logps/ref_chosen": -82.65617370605469,
|
|
"logps/ref_rejected": -95.52484130859375,
|
|
"logps/rejected": -97.09357452392578,
|
|
"loss": 1.1833,
|
|
"margin_dpo/margin_mean": 3.2796502113342285,
|
|
"margin_dpo/margin_std": 6.847566604614258,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.23582766439909297,
|
|
"fcm_dpo/beta": 0.1313672512769699,
|
|
"fcm_dpo/delta": -0.10709728300571442,
|
|
"fcm_dpo/margin": 3.818819999694824,
|
|
"fcm_dpo/q_t": 0.39108410477638245,
|
|
"grad_norm": 24.178529739379883,
|
|
"learning_rate": 4.7340816008085305e-07,
|
|
"logits/chosen": 1.4785492420196533,
|
|
"logits/rejected": 1.3886404037475586,
|
|
"logps/chosen": -85.74685668945312,
|
|
"logps/ref_chosen": -87.66494750976562,
|
|
"logps/ref_rejected": -108.2437744140625,
|
|
"logps/rejected": -110.14449310302734,
|
|
"loss": 1.0881,
|
|
"margin_dpo/margin_mean": 3.818819522857666,
|
|
"margin_dpo/margin_std": 6.0646257400512695,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.23733938019652306,
|
|
"fcm_dpo/beta": 0.12807613611221313,
|
|
"fcm_dpo/delta": -0.10187535732984543,
|
|
"fcm_dpo/margin": 2.6955342292785645,
|
|
"fcm_dpo/q_t": 0.42544877529144287,
|
|
"grad_norm": 20.409114837646484,
|
|
"learning_rate": 4.728116273823847e-07,
|
|
"logits/chosen": 1.3515766859054565,
|
|
"logits/rejected": 1.3558907508850098,
|
|
"logps/chosen": -67.93927001953125,
|
|
"logps/ref_chosen": -70.77095794677734,
|
|
"logps/ref_rejected": -78.78271484375,
|
|
"logps/rejected": -78.64656066894531,
|
|
"loss": 1.2191,
|
|
"margin_dpo/margin_mean": 2.6955342292785645,
|
|
"margin_dpo/margin_std": 6.043845176696777,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.23885109599395313,
|
|
"fcm_dpo/beta": 0.1278986632823944,
|
|
"fcm_dpo/delta": -0.0014535868540406227,
|
|
"fcm_dpo/margin": 3.138162136077881,
|
|
"fcm_dpo/q_t": 0.4160592555999756,
|
|
"grad_norm": 23.344018936157227,
|
|
"learning_rate": 4.7220886216373085e-07,
|
|
"logits/chosen": 1.5847368240356445,
|
|
"logits/rejected": 1.5203303098678589,
|
|
"logps/chosen": -78.30911254882812,
|
|
"logps/ref_chosen": -81.21516418457031,
|
|
"logps/ref_rejected": -97.8381118774414,
|
|
"logps/rejected": -98.07023620605469,
|
|
"loss": 1.1989,
|
|
"margin_dpo/margin_mean": 3.138162136077881,
|
|
"margin_dpo/margin_std": 6.922576904296875,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.24036281179138322,
|
|
"fcm_dpo/beta": 0.1267024725675583,
|
|
"fcm_dpo/delta": -0.06470802426338196,
|
|
"fcm_dpo/margin": 3.643596649169922,
|
|
"fcm_dpo/q_t": 0.39887940883636475,
|
|
"grad_norm": 20.22292709350586,
|
|
"learning_rate": 4.715998812855304e-07,
|
|
"logits/chosen": 1.5062403678894043,
|
|
"logits/rejected": 1.4325311183929443,
|
|
"logps/chosen": -69.44245910644531,
|
|
"logps/ref_chosen": -72.33412170410156,
|
|
"logps/ref_rejected": -89.49591064453125,
|
|
"logps/rejected": -90.24784851074219,
|
|
"loss": 1.123,
|
|
"margin_dpo/margin_mean": 3.6435976028442383,
|
|
"margin_dpo/margin_std": 6.381722450256348,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.2418745275888133,
|
|
"fcm_dpo/beta": 0.12704569101333618,
|
|
"fcm_dpo/delta": 0.028272677212953568,
|
|
"fcm_dpo/margin": 2.9338912963867188,
|
|
"fcm_dpo/q_t": 0.4206444323062897,
|
|
"grad_norm": 20.26133155822754,
|
|
"learning_rate": 4.7098470178228755e-07,
|
|
"logits/chosen": 1.3379184007644653,
|
|
"logits/rejected": 1.1803221702575684,
|
|
"logps/chosen": -60.8831901550293,
|
|
"logps/ref_chosen": -63.26386260986328,
|
|
"logps/ref_rejected": -82.27867126464844,
|
|
"logps/rejected": -82.83187866210938,
|
|
"loss": 1.1803,
|
|
"margin_dpo/margin_mean": 2.933891773223877,
|
|
"margin_dpo/margin_std": 6.030610084533691,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.24338624338624337,
|
|
"fcm_dpo/beta": 0.12744706869125366,
|
|
"fcm_dpo/delta": -0.0032501202076673508,
|
|
"fcm_dpo/margin": 3.161600112915039,
|
|
"fcm_dpo/q_t": 0.4082825183868408,
|
|
"grad_norm": 19.699172973632812,
|
|
"learning_rate": 4.703633408618955e-07,
|
|
"logits/chosen": 1.5183749198913574,
|
|
"logits/rejected": 1.43423330783844,
|
|
"logps/chosen": -67.91627502441406,
|
|
"logps/ref_chosen": -70.69304656982422,
|
|
"logps/ref_rejected": -82.73606872558594,
|
|
"logps/rejected": -83.12089538574219,
|
|
"loss": 1.1607,
|
|
"margin_dpo/margin_mean": 3.161600112915039,
|
|
"margin_dpo/margin_std": 6.068305015563965,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.24489795918367346,
|
|
"fcm_dpo/beta": 0.12381276488304138,
|
|
"fcm_dpo/delta": -0.1807851493358612,
|
|
"fcm_dpo/margin": 4.608911037445068,
|
|
"fcm_dpo/q_t": 0.37070053815841675,
|
|
"grad_norm": 21.9332275390625,
|
|
"learning_rate": 4.697358159051549e-07,
|
|
"logits/chosen": 1.7209584712982178,
|
|
"logits/rejected": 1.6164560317993164,
|
|
"logps/chosen": -86.98110961914062,
|
|
"logps/ref_chosen": -89.3046646118164,
|
|
"logps/ref_rejected": -114.05778503417969,
|
|
"logps/rejected": -116.34314727783203,
|
|
"loss": 1.0072,
|
|
"margin_dpo/margin_mean": 4.608911514282227,
|
|
"margin_dpo/margin_std": 5.768190383911133,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.24640967498110355,
|
|
"fcm_dpo/beta": 0.12266433238983154,
|
|
"fcm_dpo/delta": -0.04616904258728027,
|
|
"fcm_dpo/margin": 3.6132428646087646,
|
|
"fcm_dpo/q_t": 0.40427160263061523,
|
|
"grad_norm": 20.070598602294922,
|
|
"learning_rate": 4.691021444652876e-07,
|
|
"logits/chosen": 1.5364042520523071,
|
|
"logits/rejected": 1.4361138343811035,
|
|
"logps/chosen": -65.273193359375,
|
|
"logps/ref_chosen": -68.61222076416016,
|
|
"logps/ref_rejected": -89.03155517578125,
|
|
"logps/rejected": -89.3057861328125,
|
|
"loss": 1.1298,
|
|
"margin_dpo/margin_mean": 3.6132428646087646,
|
|
"margin_dpo/margin_std": 6.241079807281494,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.24792139077853365,
|
|
"fcm_dpo/beta": 0.1191563755273819,
|
|
"fcm_dpo/delta": -0.14316755533218384,
|
|
"fcm_dpo/margin": 4.495945930480957,
|
|
"fcm_dpo/q_t": 0.3841249942779541,
|
|
"grad_norm": 19.52849578857422,
|
|
"learning_rate": 4.6846234426744624e-07,
|
|
"logits/chosen": 1.4956767559051514,
|
|
"logits/rejected": 1.3199671506881714,
|
|
"logps/chosen": -70.1712646484375,
|
|
"logps/ref_chosen": -73.55902862548828,
|
|
"logps/ref_rejected": -94.16201782226562,
|
|
"logps/rejected": -95.27018737792969,
|
|
"loss": 1.0776,
|
|
"margin_dpo/margin_mean": 4.495945930480957,
|
|
"margin_dpo/margin_std": 6.860191345214844,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.2494331065759637,
|
|
"fcm_dpo/beta": 0.11708627641201019,
|
|
"fcm_dpo/delta": -0.10044585913419724,
|
|
"fcm_dpo/margin": 4.228605270385742,
|
|
"fcm_dpo/q_t": 0.39115095138549805,
|
|
"grad_norm": 18.38561248779297,
|
|
"learning_rate": 4.678164332082175e-07,
|
|
"logits/chosen": 1.5222417116165161,
|
|
"logits/rejected": 1.3784247636795044,
|
|
"logps/chosen": -65.81343078613281,
|
|
"logps/ref_chosen": -68.67132568359375,
|
|
"logps/ref_rejected": -85.95689392089844,
|
|
"logps/rejected": -87.32759857177734,
|
|
"loss": 1.0836,
|
|
"margin_dpo/margin_mean": 4.2286057472229,
|
|
"margin_dpo/margin_std": 6.33597469329834,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.2509448223733938,
|
|
"fcm_dpo/beta": 0.1159178614616394,
|
|
"fcm_dpo/delta": 0.04237668961286545,
|
|
"fcm_dpo/margin": 3.097219944000244,
|
|
"fcm_dpo/q_t": 0.42350584268569946,
|
|
"grad_norm": 21.20147705078125,
|
|
"learning_rate": 4.6716442935512214e-07,
|
|
"logits/chosen": 1.5135124921798706,
|
|
"logits/rejected": 1.2978200912475586,
|
|
"logps/chosen": -78.65182495117188,
|
|
"logps/ref_chosen": -80.89755249023438,
|
|
"logps/ref_rejected": -111.91075134277344,
|
|
"logps/rejected": -112.76225280761719,
|
|
"loss": 1.1855,
|
|
"margin_dpo/margin_mean": 3.097219467163086,
|
|
"margin_dpo/margin_std": 6.383951187133789,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.25245653817082386,
|
|
"fcm_dpo/beta": 0.11525711417198181,
|
|
"fcm_dpo/delta": -0.03476811572909355,
|
|
"fcm_dpo/margin": 3.750250816345215,
|
|
"fcm_dpo/q_t": 0.4018850326538086,
|
|
"grad_norm": 18.29037094116211,
|
|
"learning_rate": 4.6650635094610966e-07,
|
|
"logits/chosen": 1.3140567541122437,
|
|
"logits/rejected": 1.2441596984863281,
|
|
"logps/chosen": -73.19277954101562,
|
|
"logps/ref_chosen": -76.73136138916016,
|
|
"logps/ref_rejected": -92.57389068603516,
|
|
"logps/rejected": -92.78555297851562,
|
|
"loss": 1.1115,
|
|
"margin_dpo/margin_mean": 3.750251293182373,
|
|
"margin_dpo/margin_std": 5.980134963989258,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.25396825396825395,
|
|
"fcm_dpo/beta": 0.11641091853380203,
|
|
"fcm_dpo/delta": 0.05485351383686066,
|
|
"fcm_dpo/margin": 2.9811627864837646,
|
|
"fcm_dpo/q_t": 0.42268872261047363,
|
|
"grad_norm": 19.12883949279785,
|
|
"learning_rate": 4.6584221638904767e-07,
|
|
"logits/chosen": 1.411829948425293,
|
|
"logits/rejected": 1.2877988815307617,
|
|
"logps/chosen": -79.95015716552734,
|
|
"logps/ref_chosen": -82.63671112060547,
|
|
"logps/ref_rejected": -96.72691345214844,
|
|
"logps/rejected": -97.02153015136719,
|
|
"loss": 1.1565,
|
|
"margin_dpo/margin_mean": 2.981163501739502,
|
|
"margin_dpo/margin_std": 5.256748676300049,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.25547996976568405,
|
|
"fcm_dpo/beta": 0.11612222343683243,
|
|
"fcm_dpo/delta": -0.07028884440660477,
|
|
"fcm_dpo/margin": 4.022111892700195,
|
|
"fcm_dpo/q_t": 0.39903855323791504,
|
|
"grad_norm": 20.48174476623535,
|
|
"learning_rate": 4.651720442612075e-07,
|
|
"logits/chosen": 1.6285090446472168,
|
|
"logits/rejected": 1.6098928451538086,
|
|
"logps/chosen": -74.69467163085938,
|
|
"logps/ref_chosen": -78.87673950195312,
|
|
"logps/ref_rejected": -94.18919372558594,
|
|
"logps/rejected": -94.02923583984375,
|
|
"loss": 1.1083,
|
|
"margin_dpo/margin_mean": 4.022111892700195,
|
|
"margin_dpo/margin_std": 6.698416709899902,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.25699168556311414,
|
|
"fcm_dpo/beta": 0.1153804212808609,
|
|
"fcm_dpo/delta": -0.004839559551328421,
|
|
"fcm_dpo/margin": 2.7229254245758057,
|
|
"fcm_dpo/q_t": 0.4343235194683075,
|
|
"grad_norm": 20.735963821411133,
|
|
"learning_rate": 4.6449585330874425e-07,
|
|
"logits/chosen": 1.4833619594573975,
|
|
"logits/rejected": 1.5050253868103027,
|
|
"logps/chosen": -69.8090591430664,
|
|
"logps/ref_chosen": -73.35820007324219,
|
|
"logps/ref_rejected": -76.85077667236328,
|
|
"logps/rejected": -76.02455139160156,
|
|
"loss": 1.3037,
|
|
"margin_dpo/margin_mean": 2.7229256629943848,
|
|
"margin_dpo/margin_std": 8.234253883361816,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.2585034013605442,
|
|
"fcm_dpo/beta": 0.11291978508234024,
|
|
"fcm_dpo/delta": -0.09370312094688416,
|
|
"fcm_dpo/margin": 4.317270278930664,
|
|
"fcm_dpo/q_t": 0.3929103910923004,
|
|
"grad_norm": 18.698780059814453,
|
|
"learning_rate": 4.6381366244617224e-07,
|
|
"logits/chosen": 1.7400933504104614,
|
|
"logits/rejected": 1.5900516510009766,
|
|
"logps/chosen": -77.08047485351562,
|
|
"logps/ref_chosen": -80.4322738647461,
|
|
"logps/ref_rejected": -96.99999237060547,
|
|
"logps/rejected": -97.96546936035156,
|
|
"loss": 1.1195,
|
|
"margin_dpo/margin_mean": 4.3172712326049805,
|
|
"margin_dpo/margin_std": 7.4338483810424805,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.2600151171579743,
|
|
"fcm_dpo/beta": 0.11300242692232132,
|
|
"fcm_dpo/delta": -0.033088989555835724,
|
|
"fcm_dpo/margin": 3.8189542293548584,
|
|
"fcm_dpo/q_t": 0.4061174988746643,
|
|
"grad_norm": 18.41037940979004,
|
|
"learning_rate": 4.631254907558365e-07,
|
|
"logits/chosen": 1.6603069305419922,
|
|
"logits/rejected": 1.5482029914855957,
|
|
"logps/chosen": -67.79273223876953,
|
|
"logps/ref_chosen": -70.45406341552734,
|
|
"logps/ref_rejected": -99.85603332519531,
|
|
"logps/rejected": -101.01365661621094,
|
|
"loss": 1.1392,
|
|
"margin_dpo/margin_mean": 3.8189544677734375,
|
|
"margin_dpo/margin_std": 6.91829252243042,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.2615268329554044,
|
|
"fcm_dpo/beta": 0.10994724184274673,
|
|
"fcm_dpo/delta": -0.11195747554302216,
|
|
"fcm_dpo/margin": 3.7884159088134766,
|
|
"fcm_dpo/q_t": 0.41372668743133545,
|
|
"grad_norm": 21.74614715576172,
|
|
"learning_rate": 4.624313574873786e-07,
|
|
"logits/chosen": 1.496457815170288,
|
|
"logits/rejected": 1.2725635766983032,
|
|
"logps/chosen": -69.65293884277344,
|
|
"logps/ref_chosen": -72.15026092529297,
|
|
"logps/ref_rejected": -94.10212707519531,
|
|
"logps/rejected": -95.39321899414062,
|
|
"loss": 1.1928,
|
|
"margin_dpo/margin_mean": 3.7884161472320557,
|
|
"margin_dpo/margin_std": 7.9902544021606445,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.26303854875283444,
|
|
"fcm_dpo/beta": 0.10757105052471161,
|
|
"fcm_dpo/delta": -0.1165534034371376,
|
|
"fcm_dpo/margin": 4.738400459289551,
|
|
"fcm_dpo/q_t": 0.3892877399921417,
|
|
"grad_norm": 18.56683349609375,
|
|
"learning_rate": 4.61731282057198e-07,
|
|
"logits/chosen": 1.6890777349472046,
|
|
"logits/rejected": 1.4859000444412231,
|
|
"logps/chosen": -73.22291564941406,
|
|
"logps/ref_chosen": -75.99629211425781,
|
|
"logps/ref_rejected": -106.2359619140625,
|
|
"logps/rejected": -108.20098876953125,
|
|
"loss": 1.0893,
|
|
"margin_dpo/margin_mean": 4.738400459289551,
|
|
"margin_dpo/margin_std": 7.599691390991211,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.26455026455026454,
|
|
"fcm_dpo/beta": 0.10623383522033691,
|
|
"fcm_dpo/delta": -0.05162263289093971,
|
|
"fcm_dpo/margin": 4.22575569152832,
|
|
"fcm_dpo/q_t": 0.40296652913093567,
|
|
"grad_norm": 19.128814697265625,
|
|
"learning_rate": 4.6102528404790965e-07,
|
|
"logits/chosen": 1.7655320167541504,
|
|
"logits/rejected": 1.6901226043701172,
|
|
"logps/chosen": -81.8990249633789,
|
|
"logps/ref_chosen": -84.51177978515625,
|
|
"logps/ref_rejected": -104.46299743652344,
|
|
"logps/rejected": -106.07599639892578,
|
|
"loss": 1.1556,
|
|
"margin_dpo/margin_mean": 4.225754737854004,
|
|
"margin_dpo/margin_std": 8.150654792785645,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.2660619803476946,
|
|
"fcm_dpo/beta": 0.10651630163192749,
|
|
"fcm_dpo/delta": 0.009663296863436699,
|
|
"fcm_dpo/margin": 2.732745409011841,
|
|
"fcm_dpo/q_t": 0.4402855634689331,
|
|
"grad_norm": 21.021997451782227,
|
|
"learning_rate": 4.603133832077953e-07,
|
|
"logits/chosen": 1.352994441986084,
|
|
"logits/rejected": 1.2789125442504883,
|
|
"logps/chosen": -97.29902648925781,
|
|
"logps/ref_chosen": -98.2034912109375,
|
|
"logps/ref_rejected": -103.2023696899414,
|
|
"logps/rejected": -105.03064727783203,
|
|
"loss": 1.2824,
|
|
"margin_dpo/margin_mean": 2.732745409011841,
|
|
"margin_dpo/margin_std": 8.030749320983887,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.2675736961451247,
|
|
"fcm_dpo/beta": 0.10103031247854233,
|
|
"fcm_dpo/delta": -0.35967448353767395,
|
|
"fcm_dpo/margin": 7.258904457092285,
|
|
"fcm_dpo/q_t": 0.3416978716850281,
|
|
"grad_norm": 20.529727935791016,
|
|
"learning_rate": 4.5959559945025183e-07,
|
|
"logits/chosen": 2.0382192134857178,
|
|
"logits/rejected": 1.7976688146591187,
|
|
"logps/chosen": -73.77230834960938,
|
|
"logps/ref_chosen": -78.029541015625,
|
|
"logps/ref_rejected": -112.57099914550781,
|
|
"logps/rejected": -115.57266235351562,
|
|
"loss": 0.9238,
|
|
"margin_dpo/margin_mean": 7.25890588760376,
|
|
"margin_dpo/margin_std": 8.024457931518555,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.2690854119425548,
|
|
"fcm_dpo/beta": 0.09761218726634979,
|
|
"fcm_dpo/delta": -0.05611484497785568,
|
|
"fcm_dpo/margin": 4.6330156326293945,
|
|
"fcm_dpo/q_t": 0.3984643220901489,
|
|
"grad_norm": 16.481779098510742,
|
|
"learning_rate": 4.588719528532341e-07,
|
|
"logits/chosen": 1.4155144691467285,
|
|
"logits/rejected": 1.2903285026550293,
|
|
"logps/chosen": -76.18199157714844,
|
|
"logps/ref_chosen": -79.48869323730469,
|
|
"logps/ref_rejected": -96.62449645996094,
|
|
"logps/rejected": -97.9508056640625,
|
|
"loss": 1.0842,
|
|
"margin_dpo/margin_mean": 4.633016109466553,
|
|
"margin_dpo/margin_std": 6.5637617111206055,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.2705971277399849,
|
|
"fcm_dpo/beta": 0.09835878014564514,
|
|
"fcm_dpo/delta": 0.011413969099521637,
|
|
"fcm_dpo/margin": 3.954383134841919,
|
|
"fcm_dpo/q_t": 0.4162752032279968,
|
|
"grad_norm": 17.731225967407227,
|
|
"learning_rate": 4.581424636586928e-07,
|
|
"logits/chosen": 1.7793190479278564,
|
|
"logits/rejected": 1.718389630317688,
|
|
"logps/chosen": -80.90934753417969,
|
|
"logps/ref_chosen": -84.5088119506836,
|
|
"logps/ref_rejected": -93.07945251464844,
|
|
"logps/rejected": -93.43437194824219,
|
|
"loss": 1.1847,
|
|
"margin_dpo/margin_mean": 3.954383134841919,
|
|
"margin_dpo/margin_std": 8.3207426071167,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.272108843537415,
|
|
"fcm_dpo/beta": 0.09892720729112625,
|
|
"fcm_dpo/delta": 0.015113111585378647,
|
|
"fcm_dpo/margin": 3.8914146423339844,
|
|
"fcm_dpo/q_t": 0.4185601472854614,
|
|
"grad_norm": 16.331005096435547,
|
|
"learning_rate": 4.5740715227200897e-07,
|
|
"logits/chosen": 1.004435658454895,
|
|
"logits/rejected": 0.9405149221420288,
|
|
"logps/chosen": -70.37954711914062,
|
|
"logps/ref_chosen": -74.5645523071289,
|
|
"logps/ref_rejected": -81.02266693115234,
|
|
"logps/rejected": -80.72906494140625,
|
|
"loss": 1.1612,
|
|
"margin_dpo/margin_mean": 3.8914148807525635,
|
|
"margin_dpo/margin_std": 7.424806118011475,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.273620559334845,
|
|
"fcm_dpo/beta": 0.0971488356590271,
|
|
"fcm_dpo/delta": -0.08372343331575394,
|
|
"fcm_dpo/margin": 4.935826301574707,
|
|
"fcm_dpo/q_t": 0.39608198404312134,
|
|
"grad_norm": 19.247838973999023,
|
|
"learning_rate": 4.566660392614228e-07,
|
|
"logits/chosen": 1.4877427816390991,
|
|
"logits/rejected": 1.3681514263153076,
|
|
"logps/chosen": -73.86396789550781,
|
|
"logps/ref_chosen": -78.77166748046875,
|
|
"logps/ref_rejected": -98.29750061035156,
|
|
"logps/rejected": -98.32562255859375,
|
|
"loss": 1.0703,
|
|
"margin_dpo/margin_mean": 4.935826301574707,
|
|
"margin_dpo/margin_std": 7.191739082336426,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.2751322751322751,
|
|
"fcm_dpo/beta": 0.09410342574119568,
|
|
"fcm_dpo/delta": -0.2044612020254135,
|
|
"fcm_dpo/margin": 6.296449661254883,
|
|
"fcm_dpo/q_t": 0.37253063917160034,
|
|
"grad_norm": 17.99175453186035,
|
|
"learning_rate": 4.5591914535745817e-07,
|
|
"logits/chosen": 2.003591537475586,
|
|
"logits/rejected": 1.784796953201294,
|
|
"logps/chosen": -72.50923156738281,
|
|
"logps/ref_chosen": -75.67765045166016,
|
|
"logps/ref_rejected": -107.47894287109375,
|
|
"logps/rejected": -110.60696411132812,
|
|
"loss": 1.0184,
|
|
"margin_dpo/margin_mean": 6.296449661254883,
|
|
"margin_dpo/margin_std": 8.511575698852539,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.2766439909297052,
|
|
"fcm_dpo/beta": 0.09400541335344315,
|
|
"fcm_dpo/delta": 0.05271903797984123,
|
|
"fcm_dpo/margin": 2.27756667137146,
|
|
"fcm_dpo/q_t": 0.4522141218185425,
|
|
"grad_norm": 18.433788299560547,
|
|
"learning_rate": 4.551664914523433e-07,
|
|
"logits/chosen": 1.6212289333343506,
|
|
"logits/rejected": 1.5513460636138916,
|
|
"logps/chosen": -80.0240707397461,
|
|
"logps/ref_chosen": -79.99969482421875,
|
|
"logps/ref_rejected": -89.35220336914062,
|
|
"logps/rejected": -91.65414428710938,
|
|
"loss": 1.2929,
|
|
"margin_dpo/margin_mean": 2.277566909790039,
|
|
"margin_dpo/margin_std": 7.199431419372559,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.2781557067271353,
|
|
"fcm_dpo/beta": 0.09329868853092194,
|
|
"fcm_dpo/delta": -0.021785538643598557,
|
|
"fcm_dpo/margin": 4.506135940551758,
|
|
"fcm_dpo/q_t": 0.4057055115699768,
|
|
"grad_norm": 14.286376953125,
|
|
"learning_rate": 4.544080985994258e-07,
|
|
"logits/chosen": 1.6556332111358643,
|
|
"logits/rejected": 1.5036529302597046,
|
|
"logps/chosen": -59.074684143066406,
|
|
"logps/ref_chosen": -62.133941650390625,
|
|
"logps/ref_rejected": -84.44404602050781,
|
|
"logps/rejected": -85.89093017578125,
|
|
"loss": 1.0965,
|
|
"margin_dpo/margin_mean": 4.506134986877441,
|
|
"margin_dpo/margin_std": 6.4260478019714355,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.2796674225245654,
|
|
"fcm_dpo/beta": 0.09228262305259705,
|
|
"fcm_dpo/delta": -0.06251490116119385,
|
|
"fcm_dpo/margin": 4.972772598266602,
|
|
"fcm_dpo/q_t": 0.4024953842163086,
|
|
"grad_norm": 14.063694953918457,
|
|
"learning_rate": 4.5364398801258394e-07,
|
|
"logits/chosen": 1.575112223625183,
|
|
"logits/rejected": 1.4495570659637451,
|
|
"logps/chosen": -66.57908630371094,
|
|
"logps/ref_chosen": -67.93174743652344,
|
|
"logps/ref_rejected": -83.76744079589844,
|
|
"logps/rejected": -87.38755798339844,
|
|
"loss": 1.1587,
|
|
"margin_dpo/margin_mean": 4.97277307510376,
|
|
"margin_dpo/margin_std": 9.61676025390625,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.2811791383219955,
|
|
"fcm_dpo/beta": 0.09120054543018341,
|
|
"fcm_dpo/delta": -0.07828307151794434,
|
|
"fcm_dpo/margin": 5.2014384269714355,
|
|
"fcm_dpo/q_t": 0.40300649404525757,
|
|
"grad_norm": 16.713613510131836,
|
|
"learning_rate": 4.5287418106563354e-07,
|
|
"logits/chosen": 1.347496509552002,
|
|
"logits/rejected": 1.2185564041137695,
|
|
"logps/chosen": -84.60653686523438,
|
|
"logps/ref_chosen": -86.22174072265625,
|
|
"logps/ref_rejected": -100.42019653320312,
|
|
"logps/rejected": -104.00642395019531,
|
|
"loss": 1.1461,
|
|
"margin_dpo/margin_mean": 5.201438903808594,
|
|
"margin_dpo/margin_std": 9.859893798828125,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.28269085411942557,
|
|
"fcm_dpo/beta": 0.09041302651166916,
|
|
"fcm_dpo/delta": -0.028823696076869965,
|
|
"fcm_dpo/margin": 4.727651119232178,
|
|
"fcm_dpo/q_t": 0.4054437577724457,
|
|
"grad_norm": 17.96294593811035,
|
|
"learning_rate": 4.520986992917297e-07,
|
|
"logits/chosen": 1.6717660427093506,
|
|
"logits/rejected": 1.5524613857269287,
|
|
"logps/chosen": -93.56632995605469,
|
|
"logps/ref_chosen": -92.81202697753906,
|
|
"logps/ref_rejected": -117.28926086425781,
|
|
"logps/rejected": -122.77122497558594,
|
|
"loss": 1.1282,
|
|
"margin_dpo/margin_mean": 4.7276506423950195,
|
|
"margin_dpo/margin_std": 8.11026668548584,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.2842025699168556,
|
|
"fcm_dpo/beta": 0.09080306440591812,
|
|
"fcm_dpo/delta": -0.020409882068634033,
|
|
"fcm_dpo/margin": 4.612438201904297,
|
|
"fcm_dpo/q_t": 0.4097273349761963,
|
|
"grad_norm": 16.726558685302734,
|
|
"learning_rate": 4.5131756438276466e-07,
|
|
"logits/chosen": 1.7009717226028442,
|
|
"logits/rejected": 1.5912370681762695,
|
|
"logps/chosen": -88.00601196289062,
|
|
"logps/ref_chosen": -87.85247802734375,
|
|
"logps/ref_rejected": -94.58252716064453,
|
|
"logps/rejected": -99.34849548339844,
|
|
"loss": 1.1286,
|
|
"margin_dpo/margin_mean": 4.612437725067139,
|
|
"margin_dpo/margin_std": 7.809675216674805,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.2857142857142857,
|
|
"fcm_dpo/beta": 0.08787811547517776,
|
|
"fcm_dpo/delta": -0.1251910775899887,
|
|
"fcm_dpo/margin": 3.8941988945007324,
|
|
"fcm_dpo/q_t": 0.42143514752388,
|
|
"grad_norm": 19.706933975219727,
|
|
"learning_rate": 4.5053079818876096e-07,
|
|
"logits/chosen": 1.5907940864562988,
|
|
"logits/rejected": 1.5847728252410889,
|
|
"logps/chosen": -94.68544006347656,
|
|
"logps/ref_chosen": -95.00414276123047,
|
|
"logps/ref_rejected": -90.50090789794922,
|
|
"logps/rejected": -94.07640075683594,
|
|
"loss": 1.2243,
|
|
"margin_dpo/margin_mean": 3.8941988945007324,
|
|
"margin_dpo/margin_std": 8.557470321655273,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.2872260015117158,
|
|
"fcm_dpo/beta": 0.08545570075511932,
|
|
"fcm_dpo/delta": -0.16769738495349884,
|
|
"fcm_dpo/margin": 6.525961399078369,
|
|
"fcm_dpo/q_t": 0.37398993968963623,
|
|
"grad_norm": 19.378028869628906,
|
|
"learning_rate": 4.4973842271726024e-07,
|
|
"logits/chosen": 1.5073564052581787,
|
|
"logits/rejected": 1.1648344993591309,
|
|
"logps/chosen": -69.2900390625,
|
|
"logps/ref_chosen": -70.79264831542969,
|
|
"logps/ref_rejected": -122.56155395507812,
|
|
"logps/rejected": -127.58491516113281,
|
|
"loss": 1.0211,
|
|
"margin_dpo/margin_mean": 6.525960922241211,
|
|
"margin_dpo/margin_std": 8.354592323303223,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.2887377173091459,
|
|
"fcm_dpo/beta": 0.08557368814945221,
|
|
"fcm_dpo/delta": 0.06597628444433212,
|
|
"fcm_dpo/margin": 3.9295034408569336,
|
|
"fcm_dpo/q_t": 0.4254741966724396,
|
|
"grad_norm": 17.88901138305664,
|
|
"learning_rate": 4.48940460132708e-07,
|
|
"logits/chosen": 1.7644071578979492,
|
|
"logits/rejected": 1.663835883140564,
|
|
"logps/chosen": -94.00773620605469,
|
|
"logps/ref_chosen": -92.15048217773438,
|
|
"logps/ref_rejected": -106.4153060913086,
|
|
"logps/rejected": -112.20206451416016,
|
|
"loss": 1.1986,
|
|
"margin_dpo/margin_mean": 3.9295034408569336,
|
|
"margin_dpo/margin_std": 8.408856391906738,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.29024943310657597,
|
|
"fcm_dpo/beta": 0.08797520399093628,
|
|
"fcm_dpo/delta": 0.1374768316745758,
|
|
"fcm_dpo/margin": 3.0240793228149414,
|
|
"fcm_dpo/q_t": 0.43987464904785156,
|
|
"grad_norm": 13.99423885345459,
|
|
"learning_rate": 4.481369327558329e-07,
|
|
"logits/chosen": 1.7628107070922852,
|
|
"logits/rejected": 1.6950645446777344,
|
|
"logps/chosen": -70.67575073242188,
|
|
"logps/ref_chosen": -69.51527404785156,
|
|
"logps/ref_rejected": -80.15898132324219,
|
|
"logps/rejected": -84.34353637695312,
|
|
"loss": 1.2409,
|
|
"margin_dpo/margin_mean": 3.0240793228149414,
|
|
"margin_dpo/margin_std": 7.654026508331299,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.29176114890400606,
|
|
"fcm_dpo/beta": 0.0865698754787445,
|
|
"fcm_dpo/delta": -0.14727595448493958,
|
|
"fcm_dpo/margin": 6.229554176330566,
|
|
"fcm_dpo/q_t": 0.38533905148506165,
|
|
"grad_norm": 14.288257598876953,
|
|
"learning_rate": 4.47327863063023e-07,
|
|
"logits/chosen": 1.495797038078308,
|
|
"logits/rejected": 1.507264256477356,
|
|
"logps/chosen": -72.861083984375,
|
|
"logps/ref_chosen": -73.43276977539062,
|
|
"logps/ref_rejected": -77.81238555908203,
|
|
"logps/rejected": -83.47024536132812,
|
|
"loss": 1.0529,
|
|
"margin_dpo/margin_mean": 6.229554176330566,
|
|
"margin_dpo/margin_std": 9.042031288146973,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.29327286470143615,
|
|
"fcm_dpo/beta": 0.08675388991832733,
|
|
"fcm_dpo/delta": 0.04714217036962509,
|
|
"fcm_dpo/margin": 2.9702444076538086,
|
|
"fcm_dpo/q_t": 0.4436585009098053,
|
|
"grad_norm": 17.544597625732422,
|
|
"learning_rate": 4.4651327368569684e-07,
|
|
"logits/chosen": 1.4156489372253418,
|
|
"logits/rejected": 1.3410913944244385,
|
|
"logps/chosen": -78.87250518798828,
|
|
"logps/ref_chosen": -76.63236999511719,
|
|
"logps/ref_rejected": -85.67449188232422,
|
|
"logps/rejected": -90.88487243652344,
|
|
"loss": 1.3064,
|
|
"margin_dpo/margin_mean": 2.9702446460723877,
|
|
"margin_dpo/margin_std": 9.60417366027832,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.2947845804988662,
|
|
"fcm_dpo/beta": 0.08641483634710312,
|
|
"fcm_dpo/delta": -0.002406056970357895,
|
|
"fcm_dpo/margin": 4.652583599090576,
|
|
"fcm_dpo/q_t": 0.4115176796913147,
|
|
"grad_norm": 16.650192260742188,
|
|
"learning_rate": 4.4569318740967043e-07,
|
|
"logits/chosen": 1.422691822052002,
|
|
"logits/rejected": 1.44659423828125,
|
|
"logps/chosen": -92.10873413085938,
|
|
"logps/ref_chosen": -89.43354797363281,
|
|
"logps/ref_rejected": -91.25908660888672,
|
|
"logps/rejected": -98.58685302734375,
|
|
"loss": 1.1326,
|
|
"margin_dpo/margin_mean": 4.652583122253418,
|
|
"margin_dpo/margin_std": 7.990112781524658,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.2962962962962963,
|
|
"fcm_dpo/beta": 0.08771341294050217,
|
|
"fcm_dpo/delta": 0.04121620953083038,
|
|
"fcm_dpo/margin": 4.0985236167907715,
|
|
"fcm_dpo/q_t": 0.4218651056289673,
|
|
"grad_norm": 15.421774864196777,
|
|
"learning_rate": 4.448676271745197e-07,
|
|
"logits/chosen": 1.4758151769638062,
|
|
"logits/rejected": 1.361769199371338,
|
|
"logps/chosen": -76.75479888916016,
|
|
"logps/ref_chosen": -75.47528839111328,
|
|
"logps/ref_rejected": -99.37582397460938,
|
|
"logps/rejected": -104.75386047363281,
|
|
"loss": 1.1852,
|
|
"margin_dpo/margin_mean": 4.098524570465088,
|
|
"margin_dpo/margin_std": 8.5865478515625,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.29780801209372637,
|
|
"fcm_dpo/beta": 0.0871208906173706,
|
|
"fcm_dpo/delta": -0.0760483369231224,
|
|
"fcm_dpo/margin": 5.416136264801025,
|
|
"fcm_dpo/q_t": 0.39775335788726807,
|
|
"grad_norm": 18.421781539916992,
|
|
"learning_rate": 4.440366160729392e-07,
|
|
"logits/chosen": 1.9163366556167603,
|
|
"logits/rejected": 1.760741114616394,
|
|
"logps/chosen": -67.76097106933594,
|
|
"logps/ref_chosen": -67.57392883300781,
|
|
"logps/ref_rejected": -89.97993469238281,
|
|
"logps/rejected": -95.58311462402344,
|
|
"loss": 1.1732,
|
|
"margin_dpo/margin_mean": 5.416136264801025,
|
|
"margin_dpo/margin_std": 10.692497253417969,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.29931972789115646,
|
|
"fcm_dpo/beta": 0.08542082458734512,
|
|
"fcm_dpo/delta": -0.08968013525009155,
|
|
"fcm_dpo/margin": 5.680561542510986,
|
|
"fcm_dpo/q_t": 0.3916228413581848,
|
|
"grad_norm": 15.027694702148438,
|
|
"learning_rate": 4.432001773500957e-07,
|
|
"logits/chosen": 1.7204902172088623,
|
|
"logits/rejected": 1.608296513557434,
|
|
"logps/chosen": -78.00040435791016,
|
|
"logps/ref_chosen": -77.36013793945312,
|
|
"logps/ref_rejected": -90.55670166015625,
|
|
"logps/rejected": -96.87753295898438,
|
|
"loss": 1.0841,
|
|
"margin_dpo/margin_mean": 5.6805620193481445,
|
|
"margin_dpo/margin_std": 8.565601348876953,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.30083144368858655,
|
|
"fcm_dpo/beta": 0.08444124460220337,
|
|
"fcm_dpo/delta": -0.018757252022624016,
|
|
"fcm_dpo/margin": 4.949807167053223,
|
|
"fcm_dpo/q_t": 0.4094560742378235,
|
|
"grad_norm": 16.759029388427734,
|
|
"learning_rate": 4.4235833440297856e-07,
|
|
"logits/chosen": 1.376772165298462,
|
|
"logits/rejected": 1.1592652797698975,
|
|
"logps/chosen": -74.80636596679688,
|
|
"logps/ref_chosen": -73.05004119873047,
|
|
"logps/ref_rejected": -95.21923065185547,
|
|
"logps/rejected": -101.92535400390625,
|
|
"loss": 1.194,
|
|
"margin_dpo/margin_mean": 4.949808120727539,
|
|
"margin_dpo/margin_std": 10.570771217346191,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.30234315948601664,
|
|
"fcm_dpo/beta": 0.08244869858026505,
|
|
"fcm_dpo/delta": -0.09225839376449585,
|
|
"fcm_dpo/margin": 5.892239570617676,
|
|
"fcm_dpo/q_t": 0.3961694836616516,
|
|
"grad_norm": 15.525456428527832,
|
|
"learning_rate": 4.415111107797445e-07,
|
|
"logits/chosen": 1.6795740127563477,
|
|
"logits/rejected": 1.4383859634399414,
|
|
"logps/chosen": -73.5711441040039,
|
|
"logps/ref_chosen": -73.75833129882812,
|
|
"logps/ref_rejected": -105.00157165527344,
|
|
"logps/rejected": -110.70662689208984,
|
|
"loss": 1.1508,
|
|
"margin_dpo/margin_mean": 5.892240524291992,
|
|
"margin_dpo/margin_std": 11.194169998168945,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.30234315948601664,
|
|
"eval_fcm_dpo/beta": 0.0827222391963005,
|
|
"eval_logits/chosen": 1.640813946723938,
|
|
"eval_logits/rejected": 1.512371301651001,
|
|
"eval_logps/chosen": -86.83106994628906,
|
|
"eval_logps/ref_chosen": -86.90177917480469,
|
|
"eval_logps/ref_rejected": -96.69639587402344,
|
|
"eval_logps/rejected": -102.12706756591797,
|
|
"eval_loss": 0.5663687586784363,
|
|
"eval_margin_dpo/margin_mean": 5.50140905380249,
|
|
"eval_margin_dpo/margin_std": 9.768028259277344,
|
|
"eval_runtime": 42.2671,
|
|
"eval_samples_per_second": 54.487,
|
|
"eval_steps_per_second": 1.703,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.30385487528344673,
|
|
"fcm_dpo/beta": 0.08134329319000244,
|
|
"fcm_dpo/delta": -0.10130226612091064,
|
|
"fcm_dpo/margin": 6.098217964172363,
|
|
"fcm_dpo/q_t": 0.39153581857681274,
|
|
"grad_norm": 16.230323791503906,
|
|
"learning_rate": 4.4065853017905953e-07,
|
|
"logits/chosen": 1.7866268157958984,
|
|
"logits/rejected": 1.635867714881897,
|
|
"logps/chosen": -79.84793853759766,
|
|
"logps/ref_chosen": -79.4841079711914,
|
|
"logps/ref_rejected": -100.94435119628906,
|
|
"logps/rejected": -107.40640258789062,
|
|
"loss": 1.0776,
|
|
"margin_dpo/margin_mean": 6.098217964172363,
|
|
"margin_dpo/margin_std": 9.222325325012207,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.30536659108087677,
|
|
"fcm_dpo/beta": 0.08060562610626221,
|
|
"fcm_dpo/delta": -0.08434365689754486,
|
|
"fcm_dpo/margin": 5.954107284545898,
|
|
"fcm_dpo/q_t": 0.395026832818985,
|
|
"grad_norm": 18.101303100585938,
|
|
"learning_rate": 4.3980061644943575e-07,
|
|
"logits/chosen": 1.5323119163513184,
|
|
"logits/rejected": 1.3420054912567139,
|
|
"logps/chosen": -65.55935668945312,
|
|
"logps/ref_chosen": -66.83952331542969,
|
|
"logps/ref_rejected": -93.05116271972656,
|
|
"logps/rejected": -97.72509765625,
|
|
"loss": 1.0971,
|
|
"margin_dpo/margin_mean": 5.954107284545898,
|
|
"margin_dpo/margin_std": 9.408431053161621,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.30687830687830686,
|
|
"fcm_dpo/beta": 0.07931406050920486,
|
|
"fcm_dpo/delta": -0.04864976555109024,
|
|
"fcm_dpo/margin": 5.629288196563721,
|
|
"fcm_dpo/q_t": 0.4027126431465149,
|
|
"grad_norm": 15.56915283203125,
|
|
"learning_rate": 4.3893739358856455e-07,
|
|
"logits/chosen": 1.7592835426330566,
|
|
"logits/rejected": 1.4586081504821777,
|
|
"logps/chosen": -80.4451904296875,
|
|
"logps/ref_chosen": -80.32998657226562,
|
|
"logps/ref_rejected": -113.52803039550781,
|
|
"logps/rejected": -119.27251434326172,
|
|
"loss": 1.1022,
|
|
"margin_dpo/margin_mean": 5.6292877197265625,
|
|
"margin_dpo/margin_std": 8.988699913024902,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.30839002267573695,
|
|
"fcm_dpo/beta": 0.07697254419326782,
|
|
"fcm_dpo/delta": -0.09150978177785873,
|
|
"fcm_dpo/margin": 6.280875205993652,
|
|
"fcm_dpo/q_t": 0.39389508962631226,
|
|
"grad_norm": 15.502331733703613,
|
|
"learning_rate": 4.380688857426449e-07,
|
|
"logits/chosen": 1.646850347518921,
|
|
"logits/rejected": 1.469982624053955,
|
|
"logps/chosen": -65.03179168701172,
|
|
"logps/ref_chosen": -66.68875885009766,
|
|
"logps/ref_rejected": -85.07585906982422,
|
|
"logps/rejected": -89.69976806640625,
|
|
"loss": 1.0762,
|
|
"margin_dpo/margin_mean": 6.280874252319336,
|
|
"margin_dpo/margin_std": 8.902972221374512,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.30990173847316704,
|
|
"fcm_dpo/beta": 0.07790710031986237,
|
|
"fcm_dpo/delta": 0.00653502345085144,
|
|
"fcm_dpo/margin": 5.048940658569336,
|
|
"fcm_dpo/q_t": 0.41906195878982544,
|
|
"grad_norm": 16.413204193115234,
|
|
"learning_rate": 4.3719511720570814e-07,
|
|
"logits/chosen": 1.6623713970184326,
|
|
"logits/rejected": 1.5246727466583252,
|
|
"logps/chosen": -86.12635040283203,
|
|
"logps/ref_chosen": -86.51950073242188,
|
|
"logps/ref_rejected": -112.55376434326172,
|
|
"logps/rejected": -117.20954895019531,
|
|
"loss": 1.1914,
|
|
"margin_dpo/margin_mean": 5.048940658569336,
|
|
"margin_dpo/margin_std": 10.76982593536377,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.31141345427059713,
|
|
"fcm_dpo/beta": 0.07731396704912186,
|
|
"fcm_dpo/delta": -0.03766930103302002,
|
|
"fcm_dpo/margin": 3.6308658123016357,
|
|
"fcm_dpo/q_t": 0.4388701319694519,
|
|
"grad_norm": 15.725306510925293,
|
|
"learning_rate": 4.363161124189387e-07,
|
|
"logits/chosen": 1.8234915733337402,
|
|
"logits/rejected": 1.7651959657669067,
|
|
"logps/chosen": -87.84285736083984,
|
|
"logps/ref_chosen": -88.68557739257812,
|
|
"logps/ref_rejected": -97.75945281982422,
|
|
"logps/rejected": -100.54759979248047,
|
|
"loss": 1.2596,
|
|
"margin_dpo/margin_mean": 3.6308655738830566,
|
|
"margin_dpo/margin_std": 9.579620361328125,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.3129251700680272,
|
|
"fcm_dpo/beta": 0.07598337531089783,
|
|
"fcm_dpo/delta": -0.0768546462059021,
|
|
"fcm_dpo/margin": 6.224672317504883,
|
|
"fcm_dpo/q_t": 0.4005652666091919,
|
|
"grad_norm": 15.966635704040527,
|
|
"learning_rate": 4.3543189596998986e-07,
|
|
"logits/chosen": 1.6207809448242188,
|
|
"logits/rejected": 1.3825271129608154,
|
|
"logps/chosen": -85.65934753417969,
|
|
"logps/ref_chosen": -85.12134552001953,
|
|
"logps/ref_rejected": -103.34955596923828,
|
|
"logps/rejected": -110.11223602294922,
|
|
"loss": 1.0937,
|
|
"margin_dpo/margin_mean": 6.224671840667725,
|
|
"margin_dpo/margin_std": 10.07467269897461,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.3144368858654573,
|
|
"fcm_dpo/beta": 0.07625681906938553,
|
|
"fcm_dpo/delta": 0.05362796038389206,
|
|
"fcm_dpo/margin": 3.379854917526245,
|
|
"fcm_dpo/q_t": 0.4436368942260742,
|
|
"grad_norm": 15.4779691696167,
|
|
"learning_rate": 4.3454249259229664e-07,
|
|
"logits/chosen": 1.4077962636947632,
|
|
"logits/rejected": 1.3891024589538574,
|
|
"logps/chosen": -76.3375473022461,
|
|
"logps/ref_chosen": -78.84121704101562,
|
|
"logps/ref_rejected": -89.82504272460938,
|
|
"logps/rejected": -90.70123291015625,
|
|
"loss": 1.2865,
|
|
"margin_dpo/margin_mean": 3.379855155944824,
|
|
"margin_dpo/margin_std": 10.172846794128418,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.31594860166288735,
|
|
"fcm_dpo/beta": 0.074808269739151,
|
|
"fcm_dpo/delta": -0.16914093494415283,
|
|
"fcm_dpo/margin": 7.482547760009766,
|
|
"fcm_dpo/q_t": 0.3817582130432129,
|
|
"grad_norm": 14.531926155090332,
|
|
"learning_rate": 4.336479271643833e-07,
|
|
"logits/chosen": 1.150145411491394,
|
|
"logits/rejected": 1.0794119834899902,
|
|
"logps/chosen": -83.18844604492188,
|
|
"logps/ref_chosen": -85.98588562011719,
|
|
"logps/ref_rejected": -107.1638412475586,
|
|
"logps/rejected": -111.84893798828125,
|
|
"loss": 1.0456,
|
|
"margin_dpo/margin_mean": 7.482547760009766,
|
|
"margin_dpo/margin_std": 10.86307430267334,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.31746031746031744,
|
|
"fcm_dpo/beta": 0.07286547869443893,
|
|
"fcm_dpo/delta": -0.15477873384952545,
|
|
"fcm_dpo/margin": 7.500222206115723,
|
|
"fcm_dpo/q_t": 0.37924668192863464,
|
|
"grad_norm": 15.154267311096191,
|
|
"learning_rate": 4.327482247091679e-07,
|
|
"logits/chosen": 1.9730315208435059,
|
|
"logits/rejected": 1.707854986190796,
|
|
"logps/chosen": -69.033203125,
|
|
"logps/ref_chosen": -71.75653076171875,
|
|
"logps/ref_rejected": -102.47966003417969,
|
|
"logps/rejected": -107.25654602050781,
|
|
"loss": 1.0331,
|
|
"margin_dpo/margin_mean": 7.5002217292785645,
|
|
"margin_dpo/margin_std": 9.832921981811523,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.31897203325774753,
|
|
"fcm_dpo/beta": 0.07181885093450546,
|
|
"fcm_dpo/delta": -0.03351927548646927,
|
|
"fcm_dpo/margin": 6.012731075286865,
|
|
"fcm_dpo/q_t": 0.40568870306015015,
|
|
"grad_norm": 13.956618309020996,
|
|
"learning_rate": 4.3184341039326217e-07,
|
|
"logits/chosen": 1.502807378768921,
|
|
"logits/rejected": 1.2806059122085571,
|
|
"logps/chosen": -67.71188354492188,
|
|
"logps/ref_chosen": -70.95170593261719,
|
|
"logps/ref_rejected": -108.51902770996094,
|
|
"logps/rejected": -111.29193115234375,
|
|
"loss": 1.1199,
|
|
"margin_dpo/margin_mean": 6.012731552124023,
|
|
"margin_dpo/margin_std": 10.054574966430664,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.3204837490551776,
|
|
"fcm_dpo/beta": 0.06993983685970306,
|
|
"fcm_dpo/delta": -0.14037087559700012,
|
|
"fcm_dpo/margin": 7.620143890380859,
|
|
"fcm_dpo/q_t": 0.3832334280014038,
|
|
"grad_norm": 15.770182609558105,
|
|
"learning_rate": 4.309335095262675e-07,
|
|
"logits/chosen": 1.540845513343811,
|
|
"logits/rejected": 1.3966429233551025,
|
|
"logps/chosen": -70.31551361083984,
|
|
"logps/ref_chosen": -74.34010314941406,
|
|
"logps/ref_rejected": -97.58259582519531,
|
|
"logps/rejected": -101.17815399169922,
|
|
"loss": 1.0673,
|
|
"margin_dpo/margin_mean": 7.620143890380859,
|
|
"margin_dpo/margin_std": 11.508644104003906,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.3219954648526077,
|
|
"fcm_dpo/beta": 0.06910738348960876,
|
|
"fcm_dpo/delta": -0.01402133610099554,
|
|
"fcm_dpo/margin": 5.980879306793213,
|
|
"fcm_dpo/q_t": 0.41256964206695557,
|
|
"grad_norm": 13.321818351745605,
|
|
"learning_rate": 4.3001854756006724e-07,
|
|
"logits/chosen": 1.6784493923187256,
|
|
"logits/rejected": 1.6832369565963745,
|
|
"logps/chosen": -75.61842346191406,
|
|
"logps/ref_chosen": -80.2526626586914,
|
|
"logps/ref_rejected": -94.76947021484375,
|
|
"logps/rejected": -96.11610412597656,
|
|
"loss": 1.1607,
|
|
"margin_dpo/margin_mean": 5.980880260467529,
|
|
"margin_dpo/margin_std": 11.716890335083008,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.3235071806500378,
|
|
"fcm_dpo/beta": 0.06942355632781982,
|
|
"fcm_dpo/delta": 0.0006352830678224564,
|
|
"fcm_dpo/margin": 5.750314712524414,
|
|
"fcm_dpo/q_t": 0.4148133099079132,
|
|
"grad_norm": 22.03853416442871,
|
|
"learning_rate": 4.290985500881143e-07,
|
|
"logits/chosen": 1.4918372631072998,
|
|
"logits/rejected": 1.4288208484649658,
|
|
"logps/chosen": -73.66145324707031,
|
|
"logps/ref_chosen": -77.9675064086914,
|
|
"logps/ref_rejected": -84.0354232788086,
|
|
"logps/rejected": -85.47969055175781,
|
|
"loss": 1.1484,
|
|
"margin_dpo/margin_mean": 5.750315189361572,
|
|
"margin_dpo/margin_std": 10.650880813598633,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.3250188964474679,
|
|
"fcm_dpo/beta": 0.06811805069446564,
|
|
"fcm_dpo/delta": -0.17968696355819702,
|
|
"fcm_dpo/margin": 8.354536056518555,
|
|
"fcm_dpo/q_t": 0.37755051255226135,
|
|
"grad_norm": 12.484495162963867,
|
|
"learning_rate": 4.281735428447157e-07,
|
|
"logits/chosen": 1.399054765701294,
|
|
"logits/rejected": 1.164167046546936,
|
|
"logps/chosen": -78.92556762695312,
|
|
"logps/ref_chosen": -81.2047348022461,
|
|
"logps/ref_rejected": -116.18414306640625,
|
|
"logps/rejected": -122.259521484375,
|
|
"loss": 1.0274,
|
|
"margin_dpo/margin_mean": 8.354536056518555,
|
|
"margin_dpo/margin_std": 10.667181015014648,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.32653061224489793,
|
|
"fcm_dpo/beta": 0.06630684435367584,
|
|
"fcm_dpo/delta": -0.036182302981615067,
|
|
"fcm_dpo/margin": 6.55294132232666,
|
|
"fcm_dpo/q_t": 0.403317928314209,
|
|
"grad_norm": 14.035476684570312,
|
|
"learning_rate": 4.2724355170431247e-07,
|
|
"logits/chosen": 1.8719550371170044,
|
|
"logits/rejected": 1.6385178565979004,
|
|
"logps/chosen": -81.72055053710938,
|
|
"logps/ref_chosen": -83.57113647460938,
|
|
"logps/ref_rejected": -112.51902770996094,
|
|
"logps/rejected": -117.22138977050781,
|
|
"loss": 1.1132,
|
|
"margin_dpo/margin_mean": 6.552940368652344,
|
|
"margin_dpo/margin_std": 10.67953109741211,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.328042328042328,
|
|
"fcm_dpo/beta": 0.0655495673418045,
|
|
"fcm_dpo/delta": -0.072674959897995,
|
|
"fcm_dpo/margin": 7.158425331115723,
|
|
"fcm_dpo/q_t": 0.40195178985595703,
|
|
"grad_norm": 12.58739185333252,
|
|
"learning_rate": 4.26308602680756e-07,
|
|
"logits/chosen": 1.6943777799606323,
|
|
"logits/rejected": 1.4496433734893799,
|
|
"logps/chosen": -76.8424301147461,
|
|
"logps/ref_chosen": -77.01390075683594,
|
|
"logps/ref_rejected": -105.28099822998047,
|
|
"logps/rejected": -112.26795196533203,
|
|
"loss": 1.1127,
|
|
"margin_dpo/margin_mean": 7.158425331115723,
|
|
"margin_dpo/margin_std": 12.355752944946289,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.3295540438397581,
|
|
"fcm_dpo/beta": 0.06492602825164795,
|
|
"fcm_dpo/delta": -0.06278467178344727,
|
|
"fcm_dpo/margin": 4.715144634246826,
|
|
"fcm_dpo/q_t": 0.4337427616119385,
|
|
"grad_norm": 13.146940231323242,
|
|
"learning_rate": 4.253687219265803e-07,
|
|
"logits/chosen": 1.2216265201568604,
|
|
"logits/rejected": 1.214383602142334,
|
|
"logps/chosen": -92.66165924072266,
|
|
"logps/ref_chosen": -92.47299194335938,
|
|
"logps/ref_rejected": -92.80751037597656,
|
|
"logps/rejected": -97.71131896972656,
|
|
"loss": 1.2578,
|
|
"margin_dpo/margin_mean": 4.715145111083984,
|
|
"margin_dpo/margin_std": 12.045462608337402,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.3310657596371882,
|
|
"fcm_dpo/beta": 0.06420271098613739,
|
|
"fcm_dpo/delta": -0.02842680737376213,
|
|
"fcm_dpo/margin": 6.653696060180664,
|
|
"fcm_dpo/q_t": 0.40601614117622375,
|
|
"grad_norm": 11.388204574584961,
|
|
"learning_rate": 4.2442393573227043e-07,
|
|
"logits/chosen": 1.2830934524536133,
|
|
"logits/rejected": 1.1858066320419312,
|
|
"logps/chosen": -76.35826110839844,
|
|
"logps/ref_chosen": -77.10382080078125,
|
|
"logps/ref_rejected": -92.3438949584961,
|
|
"logps/rejected": -98.25202941894531,
|
|
"loss": 1.1086,
|
|
"margin_dpo/margin_mean": 6.653696060180664,
|
|
"margin_dpo/margin_std": 10.547959327697754,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.3325774754346183,
|
|
"fcm_dpo/beta": 0.06442397832870483,
|
|
"fcm_dpo/delta": 0.024653874337673187,
|
|
"fcm_dpo/margin": 5.8402934074401855,
|
|
"fcm_dpo/q_t": 0.41860347986221313,
|
|
"grad_norm": 12.554850578308105,
|
|
"learning_rate": 4.234742705255272e-07,
|
|
"logits/chosen": 1.8427441120147705,
|
|
"logits/rejected": 1.6540197134017944,
|
|
"logps/chosen": -60.972564697265625,
|
|
"logps/ref_chosen": -62.48021697998047,
|
|
"logps/ref_rejected": -86.93276977539062,
|
|
"logps/rejected": -91.26541137695312,
|
|
"loss": 1.1688,
|
|
"margin_dpo/margin_mean": 5.840293884277344,
|
|
"margin_dpo/margin_std": 11.364240646362305,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.3340891912320484,
|
|
"fcm_dpo/beta": 0.06460246443748474,
|
|
"fcm_dpo/delta": -0.025578338652849197,
|
|
"fcm_dpo/margin": 6.562741279602051,
|
|
"fcm_dpo/q_t": 0.41032537817955017,
|
|
"grad_norm": 14.2130126953125,
|
|
"learning_rate": 4.22519752870528e-07,
|
|
"logits/chosen": 1.5389156341552734,
|
|
"logits/rejected": 1.3322972059249878,
|
|
"logps/chosen": -77.62300109863281,
|
|
"logps/ref_chosen": -78.35491943359375,
|
|
"logps/ref_rejected": -108.17631530761719,
|
|
"logps/rejected": -114.00713348388672,
|
|
"loss": 1.1522,
|
|
"margin_dpo/margin_mean": 6.562740325927734,
|
|
"margin_dpo/margin_std": 12.393266677856445,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.3356009070294785,
|
|
"fcm_dpo/beta": 0.06298163533210754,
|
|
"fcm_dpo/delta": -0.14094004034996033,
|
|
"fcm_dpo/margin": 8.472888946533203,
|
|
"fcm_dpo/q_t": 0.3822260797023773,
|
|
"grad_norm": 13.83692741394043,
|
|
"learning_rate": 4.2156040946718343e-07,
|
|
"logits/chosen": 1.9284300804138184,
|
|
"logits/rejected": 1.653240442276001,
|
|
"logps/chosen": -77.398193359375,
|
|
"logps/ref_chosen": -77.2734375,
|
|
"logps/ref_rejected": -126.41007995605469,
|
|
"logps/rejected": -135.00772094726562,
|
|
"loss": 1.0311,
|
|
"margin_dpo/margin_mean": 8.47288990020752,
|
|
"margin_dpo/margin_std": 10.996661186218262,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.3371126228269085,
|
|
"fcm_dpo/beta": 0.06107974052429199,
|
|
"fcm_dpo/delta": -0.14540138840675354,
|
|
"fcm_dpo/margin": 8.803044319152832,
|
|
"fcm_dpo/q_t": 0.38139283657073975,
|
|
"grad_norm": 11.530991554260254,
|
|
"learning_rate": 4.2059626715039065e-07,
|
|
"logits/chosen": 1.380382776260376,
|
|
"logits/rejected": 1.242692232131958,
|
|
"logps/chosen": -79.19699096679688,
|
|
"logps/ref_chosen": -78.4210205078125,
|
|
"logps/ref_rejected": -101.38420867919922,
|
|
"logps/rejected": -110.96322631835938,
|
|
"loss": 1.036,
|
|
"margin_dpo/margin_mean": 8.803045272827148,
|
|
"margin_dpo/margin_std": 11.874062538146973,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.3386243386243386,
|
|
"fcm_dpo/beta": 0.06099741533398628,
|
|
"fcm_dpo/delta": 0.058463674038648605,
|
|
"fcm_dpo/margin": 5.63276481628418,
|
|
"fcm_dpo/q_t": 0.4232059121131897,
|
|
"grad_norm": 15.79139232635498,
|
|
"learning_rate": 4.1962735288928304e-07,
|
|
"logits/chosen": 1.628061056137085,
|
|
"logits/rejected": 1.5595409870147705,
|
|
"logps/chosen": -81.38064575195312,
|
|
"logps/ref_chosen": -79.36337280273438,
|
|
"logps/ref_rejected": -89.99789428710938,
|
|
"logps/rejected": -97.64793395996094,
|
|
"loss": 1.1645,
|
|
"margin_dpo/margin_mean": 5.63276481628418,
|
|
"margin_dpo/margin_std": 10.35816764831543,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.3401360544217687,
|
|
"fcm_dpo/beta": 0.05994480103254318,
|
|
"fcm_dpo/delta": -0.10221153497695923,
|
|
"fcm_dpo/margin": 8.266857147216797,
|
|
"fcm_dpo/q_t": 0.3963213264942169,
|
|
"grad_norm": 13.923603057861328,
|
|
"learning_rate": 4.186536937864752e-07,
|
|
"logits/chosen": 1.6372480392456055,
|
|
"logits/rejected": 1.35588800907135,
|
|
"logps/chosen": -91.33637237548828,
|
|
"logps/ref_chosen": -88.99606323242188,
|
|
"logps/ref_rejected": -127.55032348632812,
|
|
"logps/rejected": -138.15748596191406,
|
|
"loss": 1.121,
|
|
"margin_dpo/margin_mean": 8.266858100891113,
|
|
"margin_dpo/margin_std": 14.600061416625977,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.3416477702191988,
|
|
"fcm_dpo/beta": 0.059474602341651917,
|
|
"fcm_dpo/delta": -0.0711555927991867,
|
|
"fcm_dpo/margin": 7.866827964782715,
|
|
"fcm_dpo/q_t": 0.39824751019477844,
|
|
"grad_norm": 10.33656120300293,
|
|
"learning_rate": 4.176753170773052e-07,
|
|
"logits/chosen": 1.4881277084350586,
|
|
"logits/rejected": 1.3798588514328003,
|
|
"logps/chosen": -69.00434112548828,
|
|
"logps/ref_chosen": -68.68444061279297,
|
|
"logps/ref_rejected": -85.81898498535156,
|
|
"logps/rejected": -94.0057144165039,
|
|
"loss": 1.1187,
|
|
"margin_dpo/margin_mean": 7.866827011108398,
|
|
"margin_dpo/margin_std": 13.508081436157227,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.3431594860166289,
|
|
"fcm_dpo/beta": 0.05821537971496582,
|
|
"fcm_dpo/delta": -0.07453415542840958,
|
|
"fcm_dpo/margin": 8.073860168457031,
|
|
"fcm_dpo/q_t": 0.4008241593837738,
|
|
"grad_norm": 11.849678993225098,
|
|
"learning_rate": 4.166922501290729e-07,
|
|
"logits/chosen": 1.849320411682129,
|
|
"logits/rejected": 1.7355461120605469,
|
|
"logps/chosen": -74.02799224853516,
|
|
"logps/ref_chosen": -72.52029418945312,
|
|
"logps/ref_rejected": -90.7720718383789,
|
|
"logps/rejected": -100.35362243652344,
|
|
"loss": 1.1423,
|
|
"margin_dpo/margin_mean": 8.073861122131348,
|
|
"margin_dpo/margin_std": 14.962955474853516,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.34467120181405897,
|
|
"fcm_dpo/beta": 0.05832822620868683,
|
|
"fcm_dpo/delta": -0.017807405441999435,
|
|
"fcm_dpo/margin": 7.148486137390137,
|
|
"fcm_dpo/q_t": 0.410169780254364,
|
|
"grad_norm": 11.562895774841309,
|
|
"learning_rate": 4.1570452044027405e-07,
|
|
"logits/chosen": 1.5992982387542725,
|
|
"logits/rejected": 1.4247077703475952,
|
|
"logps/chosen": -73.79998779296875,
|
|
"logps/ref_chosen": -72.23167419433594,
|
|
"logps/ref_rejected": -95.45873260498047,
|
|
"logps/rejected": -104.175537109375,
|
|
"loss": 1.1584,
|
|
"margin_dpo/margin_mean": 7.148487567901611,
|
|
"margin_dpo/margin_std": 13.711039543151855,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.34618291761148906,
|
|
"fcm_dpo/beta": 0.05742615461349487,
|
|
"fcm_dpo/delta": -0.07237652689218521,
|
|
"fcm_dpo/margin": 8.164986610412598,
|
|
"fcm_dpo/q_t": 0.3964681625366211,
|
|
"grad_norm": 11.010472297668457,
|
|
"learning_rate": 4.147121556398312e-07,
|
|
"logits/chosen": 1.90861177444458,
|
|
"logits/rejected": 1.6754601001739502,
|
|
"logps/chosen": -64.9649429321289,
|
|
"logps/ref_chosen": -66.88822174072266,
|
|
"logps/ref_rejected": -92.27890014648438,
|
|
"logps/rejected": -98.52061462402344,
|
|
"loss": 1.0953,
|
|
"margin_dpo/margin_mean": 8.164986610412598,
|
|
"margin_dpo/margin_std": 12.750991821289062,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.3476946334089191,
|
|
"fcm_dpo/beta": 0.0579565167427063,
|
|
"fcm_dpo/delta": 0.03130243346095085,
|
|
"fcm_dpo/margin": 6.360637664794922,
|
|
"fcm_dpo/q_t": 0.4185139238834381,
|
|
"grad_norm": 13.974564552307129,
|
|
"learning_rate": 4.137151834863213e-07,
|
|
"logits/chosen": 1.3691972494125366,
|
|
"logits/rejected": 1.3622510433197021,
|
|
"logps/chosen": -76.63909912109375,
|
|
"logps/ref_chosen": -76.12332153320312,
|
|
"logps/ref_rejected": -78.19171905517578,
|
|
"logps/rejected": -85.06814575195312,
|
|
"loss": 1.194,
|
|
"margin_dpo/margin_mean": 6.360638618469238,
|
|
"margin_dpo/margin_std": 13.329750061035156,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.3492063492063492,
|
|
"fcm_dpo/beta": 0.056014273315668106,
|
|
"fcm_dpo/delta": -0.1440962851047516,
|
|
"fcm_dpo/margin": 9.541065216064453,
|
|
"fcm_dpo/q_t": 0.3836032450199127,
|
|
"grad_norm": 13.153295516967773,
|
|
"learning_rate": 4.1271363186719835e-07,
|
|
"logits/chosen": 1.2926297187805176,
|
|
"logits/rejected": 1.297825813293457,
|
|
"logps/chosen": -93.82791137695312,
|
|
"logps/ref_chosen": -92.45181274414062,
|
|
"logps/ref_rejected": -100.89735412597656,
|
|
"logps/rejected": -111.81452178955078,
|
|
"loss": 1.0597,
|
|
"margin_dpo/margin_mean": 9.541065216064453,
|
|
"margin_dpo/margin_std": 13.813907623291016,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.3507180650037793,
|
|
"fcm_dpo/beta": 0.05636624991893768,
|
|
"fcm_dpo/delta": 0.038746319711208344,
|
|
"fcm_dpo/margin": 6.433258533477783,
|
|
"fcm_dpo/q_t": 0.42595604062080383,
|
|
"grad_norm": 52.55567932128906,
|
|
"learning_rate": 4.1170752879801436e-07,
|
|
"logits/chosen": 1.5060627460479736,
|
|
"logits/rejected": 1.4304571151733398,
|
|
"logps/chosen": -87.07777404785156,
|
|
"logps/ref_chosen": -86.75383758544922,
|
|
"logps/ref_rejected": -98.16909790039062,
|
|
"logps/rejected": -104.9262924194336,
|
|
"loss": 1.2126,
|
|
"margin_dpo/margin_mean": 6.433259010314941,
|
|
"margin_dpo/margin_std": 14.871390342712402,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.35222978080120937,
|
|
"fcm_dpo/beta": 0.055456142872571945,
|
|
"fcm_dpo/delta": -0.08742889761924744,
|
|
"fcm_dpo/margin": 6.654336452484131,
|
|
"fcm_dpo/q_t": 0.4225269556045532,
|
|
"grad_norm": 10.734969139099121,
|
|
"learning_rate": 4.106969024216348e-07,
|
|
"logits/chosen": 1.5932424068450928,
|
|
"logits/rejected": 1.4768648147583008,
|
|
"logps/chosen": -72.19903564453125,
|
|
"logps/ref_chosen": -72.87556457519531,
|
|
"logps/ref_rejected": -85.22943115234375,
|
|
"logps/rejected": -91.20724487304688,
|
|
"loss": 1.1889,
|
|
"margin_dpo/margin_mean": 6.654335975646973,
|
|
"margin_dpo/margin_std": 13.669539451599121,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.35374149659863946,
|
|
"fcm_dpo/beta": 0.05513335019350052,
|
|
"fcm_dpo/delta": -0.058548182249069214,
|
|
"fcm_dpo/margin": 6.796961784362793,
|
|
"fcm_dpo/q_t": 0.41953760385513306,
|
|
"grad_norm": 11.108580589294434,
|
|
"learning_rate": 4.09681781007452e-07,
|
|
"logits/chosen": 1.1654977798461914,
|
|
"logits/rejected": 1.123396635055542,
|
|
"logps/chosen": -68.56211853027344,
|
|
"logps/ref_chosen": -70.05477905273438,
|
|
"logps/ref_rejected": -68.7240982055664,
|
|
"logps/rejected": -74.02839660644531,
|
|
"loss": 1.1745,
|
|
"margin_dpo/margin_mean": 6.796961784362793,
|
|
"margin_dpo/margin_std": 13.235331535339355,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.35525321239606955,
|
|
"fcm_dpo/beta": 0.05425561964511871,
|
|
"fcm_dpo/delta": -0.07811406254768372,
|
|
"fcm_dpo/margin": 8.745163917541504,
|
|
"fcm_dpo/q_t": 0.39355021715164185,
|
|
"grad_norm": 14.021645545959473,
|
|
"learning_rate": 4.08662192950594e-07,
|
|
"logits/chosen": 1.5307154655456543,
|
|
"logits/rejected": 1.5013929605484009,
|
|
"logps/chosen": -82.95243835449219,
|
|
"logps/ref_chosen": -85.86051940917969,
|
|
"logps/ref_rejected": -96.14663696289062,
|
|
"logps/rejected": -101.98373413085938,
|
|
"loss": 1.0673,
|
|
"margin_dpo/margin_mean": 8.745163917541504,
|
|
"margin_dpo/margin_std": 12.195037841796875,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.35676492819349964,
|
|
"fcm_dpo/beta": 0.05361638218164444,
|
|
"fcm_dpo/delta": -0.029304729774594307,
|
|
"fcm_dpo/margin": 7.980991363525391,
|
|
"fcm_dpo/q_t": 0.40694406628608704,
|
|
"grad_norm": 11.097721099853516,
|
|
"learning_rate": 4.076381667711306e-07,
|
|
"logits/chosen": 1.3134417533874512,
|
|
"logits/rejected": 1.3008582592010498,
|
|
"logps/chosen": -89.44915771484375,
|
|
"logps/ref_chosen": -89.75252532958984,
|
|
"logps/ref_rejected": -99.28534698486328,
|
|
"logps/rejected": -106.96296691894531,
|
|
"loss": 1.168,
|
|
"margin_dpo/margin_mean": 7.980992317199707,
|
|
"margin_dpo/margin_std": 15.801910400390625,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.35827664399092973,
|
|
"fcm_dpo/beta": 0.0544375479221344,
|
|
"fcm_dpo/delta": 0.109842948615551,
|
|
"fcm_dpo/margin": 5.391643524169922,
|
|
"fcm_dpo/q_t": 0.4364463984966278,
|
|
"grad_norm": 12.905435562133789,
|
|
"learning_rate": 4.066097311132753e-07,
|
|
"logits/chosen": 1.6906208992004395,
|
|
"logits/rejected": 1.6837265491485596,
|
|
"logps/chosen": -92.12542724609375,
|
|
"logps/ref_chosen": -92.59001922607422,
|
|
"logps/ref_rejected": -101.45584869384766,
|
|
"logps/rejected": -106.38289642333984,
|
|
"loss": 1.2223,
|
|
"margin_dpo/margin_mean": 5.391643524169922,
|
|
"margin_dpo/margin_std": 12.385063171386719,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.35978835978835977,
|
|
"fcm_dpo/beta": 0.054263561964035034,
|
|
"fcm_dpo/delta": -0.04640674218535423,
|
|
"fcm_dpo/margin": 8.181036949157715,
|
|
"fcm_dpo/q_t": 0.4023571014404297,
|
|
"grad_norm": 10.64908218383789,
|
|
"learning_rate": 4.0557691474458414e-07,
|
|
"logits/chosen": 1.474362850189209,
|
|
"logits/rejected": 1.4516832828521729,
|
|
"logps/chosen": -80.7723388671875,
|
|
"logps/ref_chosen": -82.2470474243164,
|
|
"logps/ref_rejected": -92.59944152832031,
|
|
"logps/rejected": -99.30577850341797,
|
|
"loss": 1.1047,
|
|
"margin_dpo/margin_mean": 8.181037902832031,
|
|
"margin_dpo/margin_std": 12.993268013000488,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.36130007558578986,
|
|
"fcm_dpo/beta": 0.05412614718079567,
|
|
"fcm_dpo/delta": -0.05782606452703476,
|
|
"fcm_dpo/margin": 8.406447410583496,
|
|
"fcm_dpo/q_t": 0.4042254686355591,
|
|
"grad_norm": 12.209634780883789,
|
|
"learning_rate": 4.045397465551513e-07,
|
|
"logits/chosen": 1.825083613395691,
|
|
"logits/rejected": 1.4843999147415161,
|
|
"logps/chosen": -75.3488998413086,
|
|
"logps/ref_chosen": -75.30878448486328,
|
|
"logps/ref_rejected": -131.2318115234375,
|
|
"logps/rejected": -139.67837524414062,
|
|
"loss": 1.1293,
|
|
"margin_dpo/margin_mean": 8.40644645690918,
|
|
"margin_dpo/margin_std": 14.963945388793945,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.36281179138321995,
|
|
"fcm_dpo/beta": 0.05183475464582443,
|
|
"fcm_dpo/delta": -0.2632848024368286,
|
|
"fcm_dpo/margin": 12.479537010192871,
|
|
"fcm_dpo/q_t": 0.3565759062767029,
|
|
"grad_norm": 16.51203727722168,
|
|
"learning_rate": 4.0349825555680045e-07,
|
|
"logits/chosen": 1.3655710220336914,
|
|
"logits/rejected": 1.1419577598571777,
|
|
"logps/chosen": -68.95610809326172,
|
|
"logps/ref_chosen": -70.81785583496094,
|
|
"logps/ref_rejected": -98.53778076171875,
|
|
"logps/rejected": -109.15557861328125,
|
|
"loss": 0.9813,
|
|
"margin_dpo/margin_mean": 12.479536056518555,
|
|
"margin_dpo/margin_std": 15.227754592895508,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.36432350718065004,
|
|
"fcm_dpo/beta": 0.05159341171383858,
|
|
"fcm_dpo/delta": 0.07422710210084915,
|
|
"fcm_dpo/margin": 6.358600616455078,
|
|
"fcm_dpo/q_t": 0.4284597635269165,
|
|
"grad_norm": 16.396221160888672,
|
|
"learning_rate": 4.0245247088227377e-07,
|
|
"logits/chosen": 1.2168505191802979,
|
|
"logits/rejected": 1.1646525859832764,
|
|
"logps/chosen": -88.27828979492188,
|
|
"logps/ref_chosen": -88.60260772705078,
|
|
"logps/ref_rejected": -101.42214965820312,
|
|
"logps/rejected": -107.4564208984375,
|
|
"loss": 1.1956,
|
|
"margin_dpo/margin_mean": 6.35860013961792,
|
|
"margin_dpo/margin_std": 13.391414642333984,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.36583522297808013,
|
|
"fcm_dpo/beta": 0.050503283739089966,
|
|
"fcm_dpo/delta": -0.1315668523311615,
|
|
"fcm_dpo/margin": 10.372234344482422,
|
|
"fcm_dpo/q_t": 0.3853452801704407,
|
|
"grad_norm": 13.189273834228516,
|
|
"learning_rate": 4.0140242178441665e-07,
|
|
"logits/chosen": 1.3361291885375977,
|
|
"logits/rejected": 1.249570608139038,
|
|
"logps/chosen": -75.59468841552734,
|
|
"logps/ref_chosen": -77.34110260009766,
|
|
"logps/ref_rejected": -84.76332092285156,
|
|
"logps/rejected": -93.38914489746094,
|
|
"loss": 1.0559,
|
|
"margin_dpo/margin_mean": 10.372234344482422,
|
|
"margin_dpo/margin_std": 14.9345703125,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.3673469387755102,
|
|
"fcm_dpo/beta": 0.05032705143094063,
|
|
"fcm_dpo/delta": 0.002032870426774025,
|
|
"fcm_dpo/margin": 7.908749580383301,
|
|
"fcm_dpo/q_t": 0.41314005851745605,
|
|
"grad_norm": 13.510807991027832,
|
|
"learning_rate": 4.003481376353596e-07,
|
|
"logits/chosen": 1.4432936906814575,
|
|
"logits/rejected": 1.4497876167297363,
|
|
"logps/chosen": -95.52180480957031,
|
|
"logps/ref_chosen": -93.55897521972656,
|
|
"logps/ref_rejected": -89.33551025390625,
|
|
"logps/rejected": -99.20707702636719,
|
|
"loss": 1.1565,
|
|
"margin_dpo/margin_mean": 7.908749580383301,
|
|
"margin_dpo/margin_std": 14.790718078613281,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.3688586545729403,
|
|
"fcm_dpo/beta": 0.0483069010078907,
|
|
"fcm_dpo/delta": -0.277275025844574,
|
|
"fcm_dpo/margin": 13.635225296020508,
|
|
"fcm_dpo/q_t": 0.3540143668651581,
|
|
"grad_norm": 9.192960739135742,
|
|
"learning_rate": 3.9928964792569654e-07,
|
|
"logits/chosen": 1.7651071548461914,
|
|
"logits/rejected": 1.618671178817749,
|
|
"logps/chosen": -69.32542419433594,
|
|
"logps/ref_chosen": -69.82603454589844,
|
|
"logps/ref_rejected": -92.4764175415039,
|
|
"logps/rejected": -105.61103057861328,
|
|
"loss": 0.9311,
|
|
"margin_dpo/margin_mean": 13.635225296020508,
|
|
"margin_dpo/margin_std": 14.008907318115234,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.37037037037037035,
|
|
"fcm_dpo/beta": 0.04629525542259216,
|
|
"fcm_dpo/delta": -0.18837378919124603,
|
|
"fcm_dpo/margin": 12.46827220916748,
|
|
"fcm_dpo/q_t": 0.3724544048309326,
|
|
"grad_norm": 10.381243705749512,
|
|
"learning_rate": 3.982269822636601e-07,
|
|
"logits/chosen": 1.560800552368164,
|
|
"logits/rejected": 1.4880516529083252,
|
|
"logps/chosen": -89.27384948730469,
|
|
"logps/ref_chosen": -85.68216705322266,
|
|
"logps/ref_rejected": -93.8754653930664,
|
|
"logps/rejected": -109.9354248046875,
|
|
"loss": 1.0069,
|
|
"margin_dpo/margin_mean": 12.46827220916748,
|
|
"margin_dpo/margin_std": 15.679994583129883,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.37188208616780044,
|
|
"fcm_dpo/beta": 0.045213352888822556,
|
|
"fcm_dpo/delta": -0.11620943248271942,
|
|
"fcm_dpo/margin": 11.289131164550781,
|
|
"fcm_dpo/q_t": 0.38982075452804565,
|
|
"grad_norm": 11.111617088317871,
|
|
"learning_rate": 3.971601703742932e-07,
|
|
"logits/chosen": 1.5159287452697754,
|
|
"logits/rejected": 1.3694238662719727,
|
|
"logps/chosen": -97.58953857421875,
|
|
"logps/ref_chosen": -90.05093383789062,
|
|
"logps/ref_rejected": -112.77645874023438,
|
|
"logps/rejected": -131.60418701171875,
|
|
"loss": 1.0648,
|
|
"margin_dpo/margin_mean": 11.289131164550781,
|
|
"margin_dpo/margin_std": 16.619611740112305,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.37339380196523053,
|
|
"fcm_dpo/beta": 0.045018598437309265,
|
|
"fcm_dpo/delta": 0.03637855127453804,
|
|
"fcm_dpo/margin": 5.280495643615723,
|
|
"fcm_dpo/q_t": 0.44779178500175476,
|
|
"grad_norm": 14.453449249267578,
|
|
"learning_rate": 3.960892420986177e-07,
|
|
"logits/chosen": 1.7113162279129028,
|
|
"logits/rejected": 1.6601393222808838,
|
|
"logps/chosen": -112.38603973388672,
|
|
"logps/ref_chosen": -103.23979187011719,
|
|
"logps/ref_rejected": -105.26278686523438,
|
|
"logps/rejected": -119.68952941894531,
|
|
"loss": 1.2806,
|
|
"margin_dpo/margin_mean": 5.280494689941406,
|
|
"margin_dpo/margin_std": 15.77202033996582,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.3749055177626606,
|
|
"fcm_dpo/beta": 0.045170750468969345,
|
|
"fcm_dpo/delta": -0.044324249029159546,
|
|
"fcm_dpo/margin": 9.78189468383789,
|
|
"fcm_dpo/q_t": 0.40565359592437744,
|
|
"grad_norm": 12.155468940734863,
|
|
"learning_rate": 3.9501422739279953e-07,
|
|
"logits/chosen": 1.5762226581573486,
|
|
"logits/rejected": 1.7077572345733643,
|
|
"logps/chosen": -95.51654052734375,
|
|
"logps/ref_chosen": -88.16007995605469,
|
|
"logps/ref_rejected": -75.11514282226562,
|
|
"logps/rejected": -92.25349426269531,
|
|
"loss": 1.1298,
|
|
"margin_dpo/margin_mean": 9.78189468383789,
|
|
"margin_dpo/margin_std": 17.028648376464844,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.3764172335600907,
|
|
"fcm_dpo/beta": 0.04518796131014824,
|
|
"fcm_dpo/delta": 0.08948609232902527,
|
|
"fcm_dpo/margin": 3.5041308403015137,
|
|
"fcm_dpo/q_t": 0.46222302317619324,
|
|
"grad_norm": 11.555813789367676,
|
|
"learning_rate": 3.9393515632731094e-07,
|
|
"logits/chosen": 1.6086914539337158,
|
|
"logits/rejected": 1.6677453517913818,
|
|
"logps/chosen": -102.29704284667969,
|
|
"logps/ref_chosen": -91.01773071289062,
|
|
"logps/ref_rejected": -80.51113891601562,
|
|
"logps/rejected": -95.29458618164062,
|
|
"loss": 1.3803,
|
|
"margin_dpo/margin_mean": 3.5041308403015137,
|
|
"margin_dpo/margin_std": 17.459102630615234,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.3779289493575208,
|
|
"fcm_dpo/beta": 0.04499515891075134,
|
|
"fcm_dpo/delta": -0.1342799812555313,
|
|
"fcm_dpo/margin": 11.716409683227539,
|
|
"fcm_dpo/q_t": 0.3838357627391815,
|
|
"grad_norm": 14.925415992736816,
|
|
"learning_rate": 3.9285205908608934e-07,
|
|
"logits/chosen": 1.6468373537063599,
|
|
"logits/rejected": 1.5891878604888916,
|
|
"logps/chosen": -91.09884643554688,
|
|
"logps/ref_chosen": -80.5888671875,
|
|
"logps/ref_rejected": -90.15093994140625,
|
|
"logps/rejected": -112.37733459472656,
|
|
"loss": 1.0709,
|
|
"margin_dpo/margin_mean": 11.716409683227539,
|
|
"margin_dpo/margin_std": 17.74170684814453,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.3794406651549509,
|
|
"fcm_dpo/beta": 0.04469338804483414,
|
|
"fcm_dpo/delta": 0.06509894132614136,
|
|
"fcm_dpo/margin": 7.542912483215332,
|
|
"fcm_dpo/q_t": 0.42707180976867676,
|
|
"grad_norm": 10.588440895080566,
|
|
"learning_rate": 3.9176496596569265e-07,
|
|
"logits/chosen": 1.8731662034988403,
|
|
"logits/rejected": 1.7417778968811035,
|
|
"logps/chosen": -93.5906982421875,
|
|
"logps/ref_chosen": -82.70405578613281,
|
|
"logps/ref_rejected": -98.94266510009766,
|
|
"logps/rejected": -117.37222290039062,
|
|
"loss": 1.195,
|
|
"margin_dpo/margin_mean": 7.542912483215332,
|
|
"margin_dpo/margin_std": 15.997113227844238,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.38095238095238093,
|
|
"fcm_dpo/beta": 0.04474301263689995,
|
|
"fcm_dpo/delta": -0.04980228468775749,
|
|
"fcm_dpo/margin": 7.735275745391846,
|
|
"fcm_dpo/q_t": 0.42199695110321045,
|
|
"grad_norm": 10.110880851745605,
|
|
"learning_rate": 3.9067390737445254e-07,
|
|
"logits/chosen": 1.2721498012542725,
|
|
"logits/rejected": 1.1554539203643799,
|
|
"logps/chosen": -81.79570007324219,
|
|
"logps/ref_chosen": -73.10369110107422,
|
|
"logps/ref_rejected": -94.90235900878906,
|
|
"logps/rejected": -111.32964324951172,
|
|
"loss": 1.2083,
|
|
"margin_dpo/margin_mean": 7.735275745391846,
|
|
"margin_dpo/margin_std": 17.073593139648438,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.382464096749811,
|
|
"fcm_dpo/beta": 0.044133760035037994,
|
|
"fcm_dpo/delta": -0.0435882993042469,
|
|
"fcm_dpo/margin": 6.770211696624756,
|
|
"fcm_dpo/q_t": 0.43785423040390015,
|
|
"grad_norm": 13.272875785827637,
|
|
"learning_rate": 3.8957891383162304e-07,
|
|
"logits/chosen": 1.7309565544128418,
|
|
"logits/rejected": 1.6086697578430176,
|
|
"logps/chosen": -79.59036254882812,
|
|
"logps/ref_chosen": -68.7789535522461,
|
|
"logps/ref_rejected": -75.98162078857422,
|
|
"logps/rejected": -93.563232421875,
|
|
"loss": 1.2324,
|
|
"margin_dpo/margin_mean": 6.770212173461914,
|
|
"margin_dpo/margin_std": 16.197599411010742,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.3839758125472411,
|
|
"fcm_dpo/beta": 0.043851204216480255,
|
|
"fcm_dpo/delta": -0.030714038759469986,
|
|
"fcm_dpo/margin": 9.789338111877441,
|
|
"fcm_dpo/q_t": 0.40718260407447815,
|
|
"grad_norm": 12.212136268615723,
|
|
"learning_rate": 3.884800159665276e-07,
|
|
"logits/chosen": 1.1488699913024902,
|
|
"logits/rejected": 1.0198073387145996,
|
|
"logps/chosen": -92.68016815185547,
|
|
"logps/ref_chosen": -81.49362182617188,
|
|
"logps/ref_rejected": -101.43672943115234,
|
|
"logps/rejected": -122.4126205444336,
|
|
"loss": 1.1105,
|
|
"margin_dpo/margin_mean": 9.789339065551758,
|
|
"margin_dpo/margin_std": 15.794754028320312,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.3854875283446712,
|
|
"fcm_dpo/beta": 0.04367159307003021,
|
|
"fcm_dpo/delta": 0.004656985402107239,
|
|
"fcm_dpo/margin": 9.045727729797363,
|
|
"fcm_dpo/q_t": 0.41058290004730225,
|
|
"grad_norm": 15.931432723999023,
|
|
"learning_rate": 3.873772445177015e-07,
|
|
"logits/chosen": 1.6107978820800781,
|
|
"logits/rejected": 1.5556628704071045,
|
|
"logps/chosen": -101.8770751953125,
|
|
"logps/ref_chosen": -90.46351623535156,
|
|
"logps/ref_rejected": -105.32445526123047,
|
|
"logps/rejected": -125.78373718261719,
|
|
"loss": 1.1632,
|
|
"margin_dpo/margin_mean": 9.045727729797363,
|
|
"margin_dpo/margin_std": 17.29879379272461,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.3869992441421013,
|
|
"fcm_dpo/beta": 0.043372705578804016,
|
|
"fcm_dpo/delta": -0.06455010920763016,
|
|
"fcm_dpo/margin": 10.635354042053223,
|
|
"fcm_dpo/q_t": 0.401132732629776,
|
|
"grad_norm": 10.090229034423828,
|
|
"learning_rate": 3.862706303320329e-07,
|
|
"logits/chosen": 1.5428683757781982,
|
|
"logits/rejected": 1.3247222900390625,
|
|
"logps/chosen": -94.11489868164062,
|
|
"logps/ref_chosen": -81.56578063964844,
|
|
"logps/ref_rejected": -108.58460998535156,
|
|
"logps/rejected": -131.76907348632812,
|
|
"loss": 1.1149,
|
|
"margin_dpo/margin_mean": 10.635353088378906,
|
|
"margin_dpo/margin_std": 17.988861083984375,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.3885109599395314,
|
|
"fcm_dpo/beta": 0.04340306669473648,
|
|
"fcm_dpo/delta": 0.01081980112940073,
|
|
"fcm_dpo/margin": 8.97639274597168,
|
|
"fcm_dpo/q_t": 0.41701555252075195,
|
|
"grad_norm": 14.49475383758545,
|
|
"learning_rate": 3.851602043638994e-07,
|
|
"logits/chosen": 1.6534913778305054,
|
|
"logits/rejected": 1.538425326347351,
|
|
"logps/chosen": -103.74288940429688,
|
|
"logps/ref_chosen": -89.57557678222656,
|
|
"logps/ref_rejected": -123.74462127685547,
|
|
"logps/rejected": -146.88832092285156,
|
|
"loss": 1.1878,
|
|
"margin_dpo/margin_mean": 8.97639274597168,
|
|
"margin_dpo/margin_std": 19.01647186279297,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.3900226757369615,
|
|
"fcm_dpo/beta": 0.04306865483522415,
|
|
"fcm_dpo/delta": -0.06294693052768707,
|
|
"fcm_dpo/margin": 10.682682037353516,
|
|
"fcm_dpo/q_t": 0.3954845368862152,
|
|
"grad_norm": 15.105984687805176,
|
|
"learning_rate": 3.840459976743023e-07,
|
|
"logits/chosen": 1.445413589477539,
|
|
"logits/rejected": 1.294599175453186,
|
|
"logps/chosen": -91.37678527832031,
|
|
"logps/ref_chosen": -77.34173583984375,
|
|
"logps/ref_rejected": -99.5709228515625,
|
|
"logps/rejected": -124.28864288330078,
|
|
"loss": 1.0614,
|
|
"margin_dpo/margin_mean": 10.682682991027832,
|
|
"margin_dpo/margin_std": 13.988033294677734,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.3915343915343915,
|
|
"fcm_dpo/beta": 0.041154563426971436,
|
|
"fcm_dpo/delta": -0.2557143568992615,
|
|
"fcm_dpo/margin": 15.510650634765625,
|
|
"fcm_dpo/q_t": 0.3617561161518097,
|
|
"grad_norm": 9.235639572143555,
|
|
"learning_rate": 3.8292804142999796e-07,
|
|
"logits/chosen": 1.6183881759643555,
|
|
"logits/rejected": 1.4107770919799805,
|
|
"logps/chosen": -90.75386047363281,
|
|
"logps/ref_chosen": -82.39556121826172,
|
|
"logps/ref_rejected": -113.73309326171875,
|
|
"logps/rejected": -137.60205078125,
|
|
"loss": 0.9892,
|
|
"margin_dpo/margin_mean": 15.510651588439941,
|
|
"margin_dpo/margin_std": 19.06281280517578,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.3930461073318216,
|
|
"fcm_dpo/beta": 0.04060753434896469,
|
|
"fcm_dpo/delta": -0.0027508698403835297,
|
|
"fcm_dpo/margin": 9.910329818725586,
|
|
"fcm_dpo/q_t": 0.41329747438430786,
|
|
"grad_norm": 16.31837272644043,
|
|
"learning_rate": 3.818063669026256e-07,
|
|
"logits/chosen": 1.426304578781128,
|
|
"logits/rejected": 1.2057493925094604,
|
|
"logps/chosen": -78.43536376953125,
|
|
"logps/ref_chosen": -65.98947143554688,
|
|
"logps/ref_rejected": -94.59706115722656,
|
|
"logps/rejected": -116.95327758789062,
|
|
"loss": 1.2009,
|
|
"margin_dpo/margin_mean": 9.910329818725586,
|
|
"margin_dpo/margin_std": 21.698278427124023,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.3945578231292517,
|
|
"fcm_dpo/beta": 0.0410933792591095,
|
|
"fcm_dpo/delta": 0.07607575505971909,
|
|
"fcm_dpo/margin": 7.944557189941406,
|
|
"fcm_dpo/q_t": 0.4305505156517029,
|
|
"grad_norm": 12.40672779083252,
|
|
"learning_rate": 3.806810054678331e-07,
|
|
"logits/chosen": 1.302069902420044,
|
|
"logits/rejected": 1.3645875453948975,
|
|
"logps/chosen": -99.55830383300781,
|
|
"logps/ref_chosen": -88.87684631347656,
|
|
"logps/ref_rejected": -82.34838104248047,
|
|
"logps/rejected": -100.97440338134766,
|
|
"loss": 1.228,
|
|
"margin_dpo/margin_mean": 7.944557189941406,
|
|
"margin_dpo/margin_std": 18.96116828918457,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.3960695389266818,
|
|
"fcm_dpo/beta": 0.04135086387395859,
|
|
"fcm_dpo/delta": -0.03751703351736069,
|
|
"fcm_dpo/margin": 10.531759262084961,
|
|
"fcm_dpo/q_t": 0.40397828817367554,
|
|
"grad_norm": 9.584962844848633,
|
|
"learning_rate": 3.7955198860439887e-07,
|
|
"logits/chosen": 1.743009090423584,
|
|
"logits/rejected": 1.5424811840057373,
|
|
"logps/chosen": -93.39926147460938,
|
|
"logps/ref_chosen": -85.81719970703125,
|
|
"logps/ref_rejected": -105.49027252197266,
|
|
"logps/rejected": -123.60408782958984,
|
|
"loss": 1.102,
|
|
"margin_dpo/margin_mean": 10.531759262084961,
|
|
"margin_dpo/margin_std": 16.00366973876953,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.3975812547241119,
|
|
"fcm_dpo/beta": 0.040634773671627045,
|
|
"fcm_dpo/delta": -0.06253870576620102,
|
|
"fcm_dpo/margin": 11.31068229675293,
|
|
"fcm_dpo/q_t": 0.4001634120941162,
|
|
"grad_norm": 10.176654815673828,
|
|
"learning_rate": 3.784193478933516e-07,
|
|
"logits/chosen": 1.6961557865142822,
|
|
"logits/rejected": 1.3818693161010742,
|
|
"logps/chosen": -81.1441421508789,
|
|
"logps/ref_chosen": -73.61693572998047,
|
|
"logps/ref_rejected": -102.39161682128906,
|
|
"logps/rejected": -121.22950744628906,
|
|
"loss": 1.1071,
|
|
"margin_dpo/margin_mean": 11.31068229675293,
|
|
"margin_dpo/margin_std": 18.635833740234375,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.39909297052154197,
|
|
"fcm_dpo/beta": 0.04004526510834694,
|
|
"fcm_dpo/delta": -0.07845936715602875,
|
|
"fcm_dpo/margin": 11.853878021240234,
|
|
"fcm_dpo/q_t": 0.39685431122779846,
|
|
"grad_norm": 9.623083114624023,
|
|
"learning_rate": 3.7728311501708674e-07,
|
|
"logits/chosen": 1.3879857063293457,
|
|
"logits/rejected": 1.2453668117523193,
|
|
"logps/chosen": -110.89766693115234,
|
|
"logps/ref_chosen": -101.57856750488281,
|
|
"logps/ref_rejected": -111.65735626220703,
|
|
"logps/rejected": -132.83033752441406,
|
|
"loss": 1.0743,
|
|
"margin_dpo/margin_mean": 11.853878021240234,
|
|
"margin_dpo/margin_std": 17.387245178222656,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.40060468631897206,
|
|
"fcm_dpo/beta": 0.038992609828710556,
|
|
"fcm_dpo/delta": -0.1944998949766159,
|
|
"fcm_dpo/margin": 14.966062545776367,
|
|
"fcm_dpo/q_t": 0.3741992712020874,
|
|
"grad_norm": 9.02346420288086,
|
|
"learning_rate": 3.7614332175848027e-07,
|
|
"logits/chosen": 1.6460204124450684,
|
|
"logits/rejected": 1.5074365139007568,
|
|
"logps/chosen": -70.97564697265625,
|
|
"logps/ref_chosen": -65.76426696777344,
|
|
"logps/ref_rejected": -85.19627380371094,
|
|
"logps/rejected": -105.37371826171875,
|
|
"loss": 1.047,
|
|
"margin_dpo/margin_mean": 14.966060638427734,
|
|
"margin_dpo/margin_std": 21.565500259399414,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.4021164021164021,
|
|
"fcm_dpo/beta": 0.03780415654182434,
|
|
"fcm_dpo/delta": -0.0783822163939476,
|
|
"fcm_dpo/margin": 12.542582511901855,
|
|
"fcm_dpo/q_t": 0.3959764838218689,
|
|
"grad_norm": 12.275056838989258,
|
|
"learning_rate": 3.75e-07,
|
|
"logits/chosen": 1.502774715423584,
|
|
"logits/rejected": 1.3121366500854492,
|
|
"logps/chosen": -79.05199432373047,
|
|
"logps/ref_chosen": -75.05682373046875,
|
|
"logps/ref_rejected": -97.52758026123047,
|
|
"logps/rejected": -114.06533813476562,
|
|
"loss": 1.0869,
|
|
"margin_dpo/margin_mean": 12.542583465576172,
|
|
"margin_dpo/margin_std": 19.404693603515625,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.4036281179138322,
|
|
"fcm_dpo/beta": 0.038025856018066406,
|
|
"fcm_dpo/delta": -0.019957251846790314,
|
|
"fcm_dpo/margin": 10.98253059387207,
|
|
"fcm_dpo/q_t": 0.40621858835220337,
|
|
"grad_norm": 7.956915378570557,
|
|
"learning_rate": 3.738531817228131e-07,
|
|
"logits/chosen": 1.4474161863327026,
|
|
"logits/rejected": 1.3807919025421143,
|
|
"logps/chosen": -73.45161437988281,
|
|
"logps/ref_chosen": -71.13494110107422,
|
|
"logps/ref_rejected": -81.14566040039062,
|
|
"logps/rejected": -94.44486999511719,
|
|
"loss": 1.124,
|
|
"margin_dpo/margin_mean": 10.98253059387207,
|
|
"margin_dpo/margin_std": 17.64596176147461,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.4051398337112623,
|
|
"fcm_dpo/beta": 0.037608757615089417,
|
|
"fcm_dpo/delta": -0.006927022244781256,
|
|
"fcm_dpo/margin": 7.760471820831299,
|
|
"fcm_dpo/q_t": 0.4383552074432373,
|
|
"grad_norm": 8.453049659729004,
|
|
"learning_rate": 3.7270289900589204e-07,
|
|
"logits/chosen": 1.5731756687164307,
|
|
"logits/rejected": 1.5207586288452148,
|
|
"logps/chosen": -83.85981750488281,
|
|
"logps/ref_chosen": -80.06082153320312,
|
|
"logps/ref_rejected": -87.43035888671875,
|
|
"logps/rejected": -98.98983001708984,
|
|
"loss": 1.2233,
|
|
"margin_dpo/margin_mean": 7.760472297668457,
|
|
"margin_dpo/margin_std": 18.012435913085938,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.40665154950869237,
|
|
"fcm_dpo/beta": 0.03780782222747803,
|
|
"fcm_dpo/delta": -0.01912636309862137,
|
|
"fcm_dpo/margin": 11.029001235961914,
|
|
"fcm_dpo/q_t": 0.4066724181175232,
|
|
"grad_norm": 10.276360511779785,
|
|
"learning_rate": 3.7154918402511714e-07,
|
|
"logits/chosen": 1.6505131721496582,
|
|
"logits/rejected": 1.5838286876678467,
|
|
"logps/chosen": -89.04998016357422,
|
|
"logps/ref_chosen": -83.36944580078125,
|
|
"logps/ref_rejected": -100.66839599609375,
|
|
"logps/rejected": -117.37794494628906,
|
|
"loss": 1.1236,
|
|
"margin_dpo/margin_mean": 11.029001235961914,
|
|
"margin_dpo/margin_std": 17.85173797607422,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.40816326530612246,
|
|
"fcm_dpo/beta": 0.03762676566839218,
|
|
"fcm_dpo/delta": 0.02844144031405449,
|
|
"fcm_dpo/margin": 9.90216064453125,
|
|
"fcm_dpo/q_t": 0.41595685482025146,
|
|
"grad_norm": 10.71731185913086,
|
|
"learning_rate": 3.7039206905237656e-07,
|
|
"logits/chosen": 1.6075557470321655,
|
|
"logits/rejected": 1.3942244052886963,
|
|
"logps/chosen": -90.50005340576172,
|
|
"logps/ref_chosen": -85.35945129394531,
|
|
"logps/ref_rejected": -104.47489929199219,
|
|
"logps/rejected": -119.51766967773438,
|
|
"loss": 1.1518,
|
|
"margin_dpo/margin_mean": 9.902159690856934,
|
|
"margin_dpo/margin_std": 17.91962242126465,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.40967498110355255,
|
|
"fcm_dpo/beta": 0.03780830278992653,
|
|
"fcm_dpo/delta": 0.020608875900506973,
|
|
"fcm_dpo/margin": 7.860053539276123,
|
|
"fcm_dpo/q_t": 0.4410387873649597,
|
|
"grad_norm": 10.222976684570312,
|
|
"learning_rate": 3.692315864546635e-07,
|
|
"logits/chosen": 1.4401030540466309,
|
|
"logits/rejected": 1.307854175567627,
|
|
"logps/chosen": -92.08882141113281,
|
|
"logps/ref_chosen": -86.01373291015625,
|
|
"logps/ref_rejected": -109.99561309814453,
|
|
"logps/rejected": -123.93075561523438,
|
|
"loss": 1.2552,
|
|
"margin_dpo/margin_mean": 7.860054016113281,
|
|
"margin_dpo/margin_std": 21.145322799682617,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.41118669690098264,
|
|
"fcm_dpo/beta": 0.03660859167575836,
|
|
"fcm_dpo/delta": -0.21598142385482788,
|
|
"fcm_dpo/margin": 16.469074249267578,
|
|
"fcm_dpo/q_t": 0.3663064241409302,
|
|
"grad_norm": 11.490429878234863,
|
|
"learning_rate": 3.6806776869317067e-07,
|
|
"logits/chosen": 1.662217378616333,
|
|
"logits/rejected": 1.6814091205596924,
|
|
"logps/chosen": -88.33952331542969,
|
|
"logps/ref_chosen": -86.37013244628906,
|
|
"logps/ref_rejected": -85.74638366699219,
|
|
"logps/rejected": -104.1848373413086,
|
|
"loss": 0.9742,
|
|
"margin_dpo/margin_mean": 16.469074249267578,
|
|
"margin_dpo/margin_std": 18.660140991210938,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.4126984126984127,
|
|
"fcm_dpo/beta": 0.03605865687131882,
|
|
"fcm_dpo/delta": -0.018457308411598206,
|
|
"fcm_dpo/margin": 11.581703186035156,
|
|
"fcm_dpo/q_t": 0.4078387916088104,
|
|
"grad_norm": 10.316041946411133,
|
|
"learning_rate": 3.669006483223828e-07,
|
|
"logits/chosen": 1.6665234565734863,
|
|
"logits/rejected": 1.5125619173049927,
|
|
"logps/chosen": -82.37163543701172,
|
|
"logps/ref_chosen": -75.51087951660156,
|
|
"logps/ref_rejected": -101.60345458984375,
|
|
"logps/rejected": -120.04591369628906,
|
|
"loss": 1.1738,
|
|
"margin_dpo/margin_mean": 11.581703186035156,
|
|
"margin_dpo/margin_std": 23.04998016357422,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.41421012849584277,
|
|
"fcm_dpo/beta": 0.0356617271900177,
|
|
"fcm_dpo/delta": -0.07967354357242584,
|
|
"fcm_dpo/margin": 13.34488582611084,
|
|
"fcm_dpo/q_t": 0.3960227370262146,
|
|
"grad_norm": 7.991804599761963,
|
|
"learning_rate": 3.657302579891656e-07,
|
|
"logits/chosen": 1.3249926567077637,
|
|
"logits/rejected": 1.2660455703735352,
|
|
"logps/chosen": -85.18216705322266,
|
|
"logps/ref_chosen": -79.040283203125,
|
|
"logps/ref_rejected": -86.31329345703125,
|
|
"logps/rejected": -105.80005645751953,
|
|
"loss": 1.081,
|
|
"margin_dpo/margin_mean": 13.34488582611084,
|
|
"margin_dpo/margin_std": 19.965038299560547,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.41572184429327286,
|
|
"fcm_dpo/beta": 0.03469148278236389,
|
|
"fcm_dpo/delta": -0.2013135701417923,
|
|
"fcm_dpo/margin": 17.002704620361328,
|
|
"fcm_dpo/q_t": 0.37113866209983826,
|
|
"grad_norm": 9.240900039672852,
|
|
"learning_rate": 3.645566304318526e-07,
|
|
"logits/chosen": 1.4302536249160767,
|
|
"logits/rejected": 1.2116038799285889,
|
|
"logps/chosen": -74.8931884765625,
|
|
"logps/ref_chosen": -71.82034301757812,
|
|
"logps/ref_rejected": -94.29946899414062,
|
|
"logps/rejected": -114.37501525878906,
|
|
"loss": 0.996,
|
|
"margin_dpo/margin_mean": 17.002704620361328,
|
|
"margin_dpo/margin_std": 20.238014221191406,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.41723356009070295,
|
|
"fcm_dpo/beta": 0.03372751921415329,
|
|
"fcm_dpo/delta": -0.07244005799293518,
|
|
"fcm_dpo/margin": 13.906018257141113,
|
|
"fcm_dpo/q_t": 0.3970295786857605,
|
|
"grad_norm": 13.785542488098145,
|
|
"learning_rate": 3.633797984793294e-07,
|
|
"logits/chosen": 1.2479619979858398,
|
|
"logits/rejected": 1.1853089332580566,
|
|
"logps/chosen": -73.82440185546875,
|
|
"logps/ref_chosen": -69.54020690917969,
|
|
"logps/ref_rejected": -78.59674072265625,
|
|
"logps/rejected": -96.78695678710938,
|
|
"loss": 1.0941,
|
|
"margin_dpo/margin_mean": 13.906018257141113,
|
|
"margin_dpo/margin_std": 21.78231430053711,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.41874527588813304,
|
|
"fcm_dpo/beta": 0.03437428921461105,
|
|
"fcm_dpo/delta": 0.14951635897159576,
|
|
"fcm_dpo/margin": 7.397580146789551,
|
|
"fcm_dpo/q_t": 0.4457745850086212,
|
|
"grad_norm": 10.012701034545898,
|
|
"learning_rate": 3.6219979505011555e-07,
|
|
"logits/chosen": 1.7152776718139648,
|
|
"logits/rejected": 1.794743537902832,
|
|
"logps/chosen": -104.70123291015625,
|
|
"logps/ref_chosen": -94.4896240234375,
|
|
"logps/ref_rejected": -85.45901489257812,
|
|
"logps/rejected": -103.06820678710938,
|
|
"loss": 1.281,
|
|
"margin_dpo/margin_mean": 7.397579669952393,
|
|
"margin_dpo/margin_std": 22.03266716003418,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.42025699168556313,
|
|
"fcm_dpo/beta": 0.034356892108917236,
|
|
"fcm_dpo/delta": -0.08088916540145874,
|
|
"fcm_dpo/margin": 10.672147750854492,
|
|
"fcm_dpo/q_t": 0.4208662509918213,
|
|
"grad_norm": 11.455450057983398,
|
|
"learning_rate": 3.6101665315144353e-07,
|
|
"logits/chosen": 1.6356345415115356,
|
|
"logits/rejected": 1.4460411071777344,
|
|
"logps/chosen": -97.5278549194336,
|
|
"logps/ref_chosen": -87.42613220214844,
|
|
"logps/ref_rejected": -105.44854736328125,
|
|
"logps/rejected": -126.222412109375,
|
|
"loss": 1.1979,
|
|
"margin_dpo/margin_mean": 10.672147750854492,
|
|
"margin_dpo/margin_std": 22.455184936523438,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.4217687074829932,
|
|
"fcm_dpo/beta": 0.03291717916727066,
|
|
"fcm_dpo/delta": -0.25346609950065613,
|
|
"fcm_dpo/margin": 19.376266479492188,
|
|
"fcm_dpo/q_t": 0.3584246337413788,
|
|
"grad_norm": 10.21809196472168,
|
|
"learning_rate": 3.5983040587833563e-07,
|
|
"logits/chosen": 1.4293241500854492,
|
|
"logits/rejected": 1.3572511672973633,
|
|
"logps/chosen": -71.79463195800781,
|
|
"logps/ref_chosen": -70.516845703125,
|
|
"logps/ref_rejected": -86.04249572753906,
|
|
"logps/rejected": -106.69654846191406,
|
|
"loss": 0.9563,
|
|
"margin_dpo/margin_mean": 19.376266479492188,
|
|
"margin_dpo/margin_std": 21.07408905029297,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.42328042328042326,
|
|
"fcm_dpo/beta": 0.031365811824798584,
|
|
"fcm_dpo/delta": -0.20678894221782684,
|
|
"fcm_dpo/margin": 18.94228744506836,
|
|
"fcm_dpo/q_t": 0.36702173948287964,
|
|
"grad_norm": 10.397953987121582,
|
|
"learning_rate": 3.586410864126781e-07,
|
|
"logits/chosen": 1.5499365329742432,
|
|
"logits/rejected": 1.4086581468582153,
|
|
"logps/chosen": -82.18923950195312,
|
|
"logps/ref_chosen": -76.5021743774414,
|
|
"logps/ref_rejected": -94.2752685546875,
|
|
"logps/rejected": -118.90461730957031,
|
|
"loss": 0.9775,
|
|
"margin_dpo/margin_mean": 18.94228744506836,
|
|
"margin_dpo/margin_std": 21.44241714477539,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.42479213907785335,
|
|
"fcm_dpo/beta": 0.030857127159833908,
|
|
"fcm_dpo/delta": -0.04540977627038956,
|
|
"fcm_dpo/margin": 14.368467330932617,
|
|
"fcm_dpo/q_t": 0.4017066955566406,
|
|
"grad_norm": 8.291491508483887,
|
|
"learning_rate": 3.574487280222929e-07,
|
|
"logits/chosen": 1.611108422279358,
|
|
"logits/rejected": 1.6434032917022705,
|
|
"logps/chosen": -85.33029174804688,
|
|
"logps/ref_chosen": -77.50468444824219,
|
|
"logps/ref_rejected": -79.05717468261719,
|
|
"logps/rejected": -101.25123596191406,
|
|
"loss": 1.1022,
|
|
"margin_dpo/margin_mean": 14.368467330932617,
|
|
"margin_dpo/margin_std": 22.488018035888672,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.42630385487528344,
|
|
"fcm_dpo/beta": 0.03047410398721695,
|
|
"fcm_dpo/delta": -0.10886166244745255,
|
|
"fcm_dpo/margin": 16.517818450927734,
|
|
"fcm_dpo/q_t": 0.3921484351158142,
|
|
"grad_norm": 9.714167594909668,
|
|
"learning_rate": 3.562533640600075e-07,
|
|
"logits/chosen": 1.3984555006027222,
|
|
"logits/rejected": 1.2696608304977417,
|
|
"logps/chosen": -91.43336486816406,
|
|
"logps/ref_chosen": -80.31298065185547,
|
|
"logps/ref_rejected": -83.72120666503906,
|
|
"logps/rejected": -111.35940551757812,
|
|
"loss": 1.0718,
|
|
"margin_dpo/margin_mean": 16.517820358276367,
|
|
"margin_dpo/margin_std": 24.46967887878418,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.42781557067271353,
|
|
"fcm_dpo/beta": 0.03024178370833397,
|
|
"fcm_dpo/delta": -0.0018365830183029175,
|
|
"fcm_dpo/margin": 13.276296615600586,
|
|
"fcm_dpo/q_t": 0.41008254885673523,
|
|
"grad_norm": 11.770947456359863,
|
|
"learning_rate": 3.550550279627215e-07,
|
|
"logits/chosen": 1.4370884895324707,
|
|
"logits/rejected": 1.1001770496368408,
|
|
"logps/chosen": -96.20101928710938,
|
|
"logps/ref_chosen": -80.72602844238281,
|
|
"logps/ref_rejected": -115.68379211425781,
|
|
"logps/rejected": -144.43507385253906,
|
|
"loss": 1.15,
|
|
"margin_dpo/margin_mean": 13.276298522949219,
|
|
"margin_dpo/margin_std": 24.19223976135254,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.4293272864701436,
|
|
"fcm_dpo/beta": 0.02937573380768299,
|
|
"fcm_dpo/delta": -0.15650880336761475,
|
|
"fcm_dpo/margin": 18.640047073364258,
|
|
"fcm_dpo/q_t": 0.3789505362510681,
|
|
"grad_norm": 8.9287748336792,
|
|
"learning_rate": 3.5385375325047163e-07,
|
|
"logits/chosen": 1.5543755292892456,
|
|
"logits/rejected": 1.3925883769989014,
|
|
"logps/chosen": -87.98505401611328,
|
|
"logps/ref_chosen": -77.5223388671875,
|
|
"logps/ref_rejected": -104.1847152709961,
|
|
"logps/rejected": -133.2874755859375,
|
|
"loss": 1.0198,
|
|
"margin_dpo/margin_mean": 18.640047073364258,
|
|
"margin_dpo/margin_std": 23.840347290039062,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.4308390022675737,
|
|
"fcm_dpo/beta": 0.029584050178527832,
|
|
"fcm_dpo/delta": 0.06740415096282959,
|
|
"fcm_dpo/margin": 11.305548667907715,
|
|
"fcm_dpo/q_t": 0.42707347869873047,
|
|
"grad_norm": 11.452542304992676,
|
|
"learning_rate": 3.5264957352549375e-07,
|
|
"logits/chosen": 1.760129690170288,
|
|
"logits/rejected": 1.6619482040405273,
|
|
"logps/chosen": -107.24560546875,
|
|
"logps/ref_chosen": -85.79348754882812,
|
|
"logps/ref_rejected": -96.46463775634766,
|
|
"logps/rejected": -129.22230529785156,
|
|
"loss": 1.2082,
|
|
"margin_dpo/margin_mean": 11.305547714233398,
|
|
"margin_dpo/margin_std": 25.100751876831055,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.4323507180650038,
|
|
"fcm_dpo/beta": 0.028857434168457985,
|
|
"fcm_dpo/delta": -0.15750229358673096,
|
|
"fcm_dpo/margin": 19.008071899414062,
|
|
"fcm_dpo/q_t": 0.3800804316997528,
|
|
"grad_norm": 8.827457427978516,
|
|
"learning_rate": 3.514425224712835e-07,
|
|
"logits/chosen": 1.429862141609192,
|
|
"logits/rejected": 1.1608591079711914,
|
|
"logps/chosen": -95.72119140625,
|
|
"logps/ref_chosen": -77.86268615722656,
|
|
"logps/ref_rejected": -110.77134704589844,
|
|
"logps/rejected": -147.637939453125,
|
|
"loss": 1.0384,
|
|
"margin_dpo/margin_mean": 19.008071899414062,
|
|
"margin_dpo/margin_std": 26.186504364013672,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.43386243386243384,
|
|
"fcm_dpo/beta": 0.027673378586769104,
|
|
"fcm_dpo/delta": -0.29322800040245056,
|
|
"fcm_dpo/margin": 24.37060546875,
|
|
"fcm_dpo/q_t": 0.3525708317756653,
|
|
"grad_norm": 11.625649452209473,
|
|
"learning_rate": 3.502326338516534e-07,
|
|
"logits/chosen": 1.506529688835144,
|
|
"logits/rejected": 1.4740724563598633,
|
|
"logps/chosen": -74.51770782470703,
|
|
"logps/ref_chosen": -62.552825927734375,
|
|
"logps/ref_rejected": -77.7650146484375,
|
|
"logps/rejected": -114.10049438476562,
|
|
"loss": 0.9449,
|
|
"margin_dpo/margin_mean": 24.37060546875,
|
|
"margin_dpo/margin_std": 26.406269073486328,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.43537414965986393,
|
|
"fcm_dpo/beta": 0.027158301323652267,
|
|
"fcm_dpo/delta": 0.03195127844810486,
|
|
"fcm_dpo/margin": 13.59581470489502,
|
|
"fcm_dpo/q_t": 0.41956979036331177,
|
|
"grad_norm": 10.045726776123047,
|
|
"learning_rate": 3.490199415097892e-07,
|
|
"logits/chosen": 1.1744755506515503,
|
|
"logits/rejected": 1.060915470123291,
|
|
"logps/chosen": -106.11993408203125,
|
|
"logps/ref_chosen": -83.74117279052734,
|
|
"logps/ref_rejected": -106.93913269042969,
|
|
"logps/rejected": -142.91371154785156,
|
|
"loss": 1.1573,
|
|
"margin_dpo/margin_mean": 13.595813751220703,
|
|
"margin_dpo/margin_std": 25.147212982177734,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.436885865457294,
|
|
"fcm_dpo/beta": 0.026928097009658813,
|
|
"fcm_dpo/delta": -0.080781489610672,
|
|
"fcm_dpo/margin": 17.713703155517578,
|
|
"fcm_dpo/q_t": 0.3959569036960602,
|
|
"grad_norm": 8.819540023803711,
|
|
"learning_rate": 3.4780447936730247e-07,
|
|
"logits/chosen": 1.7476454973220825,
|
|
"logits/rejected": 1.671339750289917,
|
|
"logps/chosen": -94.48479461669922,
|
|
"logps/ref_chosen": -73.04204559326172,
|
|
"logps/ref_rejected": -88.07904052734375,
|
|
"logps/rejected": -127.23548889160156,
|
|
"loss": 1.0789,
|
|
"margin_dpo/margin_mean": 17.713703155517578,
|
|
"margin_dpo/margin_std": 26.483057022094727,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.4383975812547241,
|
|
"fcm_dpo/beta": 0.02662849798798561,
|
|
"fcm_dpo/delta": 0.0020782388746738434,
|
|
"fcm_dpo/margin": 14.92031192779541,
|
|
"fcm_dpo/q_t": 0.4109000563621521,
|
|
"grad_norm": 9.255086898803711,
|
|
"learning_rate": 3.465862814232821e-07,
|
|
"logits/chosen": 1.6757187843322754,
|
|
"logits/rejected": 1.5849800109863281,
|
|
"logps/chosen": -106.42593383789062,
|
|
"logps/ref_chosen": -78.60614013671875,
|
|
"logps/ref_rejected": -108.50082397460938,
|
|
"logps/rejected": -151.24093627929688,
|
|
"loss": 1.1414,
|
|
"margin_dpo/margin_mean": 14.920310974121094,
|
|
"margin_dpo/margin_std": 25.820228576660156,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.4399092970521542,
|
|
"fcm_dpo/beta": 0.02671034447848797,
|
|
"fcm_dpo/delta": -0.08461640775203705,
|
|
"fcm_dpo/margin": 17.954689025878906,
|
|
"fcm_dpo/q_t": 0.39686498045921326,
|
|
"grad_norm": 10.577985763549805,
|
|
"learning_rate": 3.4536538175334343e-07,
|
|
"logits/chosen": 1.9808932542800903,
|
|
"logits/rejected": 1.711446762084961,
|
|
"logps/chosen": -90.13938903808594,
|
|
"logps/ref_chosen": -66.71226501464844,
|
|
"logps/ref_rejected": -96.14029693603516,
|
|
"logps/rejected": -137.5220947265625,
|
|
"loss": 1.1239,
|
|
"margin_dpo/margin_mean": 17.954689025878906,
|
|
"margin_dpo/margin_std": 30.840965270996094,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.4414210128495843,
|
|
"fcm_dpo/beta": 0.026129107922315598,
|
|
"fcm_dpo/delta": -0.04495128244161606,
|
|
"fcm_dpo/margin": 16.948894500732422,
|
|
"fcm_dpo/q_t": 0.40278592705726624,
|
|
"grad_norm": 10.55582046508789,
|
|
"learning_rate": 3.4414181450867465e-07,
|
|
"logits/chosen": 1.4002811908721924,
|
|
"logits/rejected": 1.318420171737671,
|
|
"logps/chosen": -103.06094360351562,
|
|
"logps/ref_chosen": -80.3355484008789,
|
|
"logps/ref_rejected": -90.44906616210938,
|
|
"logps/rejected": -130.12335205078125,
|
|
"loss": 1.1202,
|
|
"margin_dpo/margin_mean": 16.948894500732422,
|
|
"margin_dpo/margin_std": 28.7812557220459,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.4429327286470144,
|
|
"fcm_dpo/beta": 0.025567151606082916,
|
|
"fcm_dpo/delta": -0.1617434173822403,
|
|
"fcm_dpo/margin": 21.632625579833984,
|
|
"fcm_dpo/q_t": 0.3831241726875305,
|
|
"grad_norm": 11.195418357849121,
|
|
"learning_rate": 3.4291561391508185e-07,
|
|
"logits/chosen": 1.8973731994628906,
|
|
"logits/rejected": 1.7341361045837402,
|
|
"logps/chosen": -95.07200622558594,
|
|
"logps/ref_chosen": -71.69970703125,
|
|
"logps/ref_rejected": -102.13948059082031,
|
|
"logps/rejected": -147.1444091796875,
|
|
"loss": 1.0974,
|
|
"margin_dpo/margin_mean": 21.632625579833984,
|
|
"margin_dpo/margin_std": 36.16749572753906,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.4444444444444444,
|
|
"fcm_dpo/beta": 0.024921298027038574,
|
|
"fcm_dpo/delta": -0.05894307792186737,
|
|
"fcm_dpo/margin": 18.27361488342285,
|
|
"fcm_dpo/q_t": 0.39900296926498413,
|
|
"grad_norm": 12.699747085571289,
|
|
"learning_rate": 3.4168681427203153e-07,
|
|
"logits/chosen": 1.6706271171569824,
|
|
"logits/rejected": 1.5605731010437012,
|
|
"logps/chosen": -93.49272155761719,
|
|
"logps/ref_chosen": -70.73458862304688,
|
|
"logps/ref_rejected": -86.68821716308594,
|
|
"logps/rejected": -127.719970703125,
|
|
"loss": 1.0928,
|
|
"margin_dpo/margin_mean": 18.273616790771484,
|
|
"margin_dpo/margin_std": 27.647445678710938,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.4459561602418745,
|
|
"fcm_dpo/beta": 0.025058383122086525,
|
|
"fcm_dpo/delta": 0.03231852129101753,
|
|
"fcm_dpo/margin": 14.720477104187012,
|
|
"fcm_dpo/q_t": 0.4185822606086731,
|
|
"grad_norm": 13.354880332946777,
|
|
"learning_rate": 3.4045544995169125e-07,
|
|
"logits/chosen": 1.2770599126815796,
|
|
"logits/rejected": 1.0146013498306274,
|
|
"logps/chosen": -91.09881591796875,
|
|
"logps/ref_chosen": -66.42644500732422,
|
|
"logps/ref_rejected": -99.58766174316406,
|
|
"logps/rejected": -138.9805145263672,
|
|
"loss": 1.1745,
|
|
"margin_dpo/margin_mean": 14.720477104187012,
|
|
"margin_dpo/margin_std": 29.390125274658203,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.4474678760393046,
|
|
"fcm_dpo/beta": 0.02432844787836075,
|
|
"fcm_dpo/delta": -0.17003798484802246,
|
|
"fcm_dpo/margin": 22.94456672668457,
|
|
"fcm_dpo/q_t": 0.37980806827545166,
|
|
"grad_norm": 9.647819519042969,
|
|
"learning_rate": 3.392215553979679e-07,
|
|
"logits/chosen": 1.331782579421997,
|
|
"logits/rejected": 1.188205599784851,
|
|
"logps/chosen": -109.94386291503906,
|
|
"logps/ref_chosen": -87.47459411621094,
|
|
"logps/ref_rejected": -103.96894836425781,
|
|
"logps/rejected": -149.38278198242188,
|
|
"loss": 1.0534,
|
|
"margin_dpo/margin_mean": 22.94456672668457,
|
|
"margin_dpo/margin_std": 33.13970947265625,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.4489795918367347,
|
|
"fcm_dpo/beta": 0.023840460926294327,
|
|
"fcm_dpo/delta": -0.18414409458637238,
|
|
"fcm_dpo/margin": 24.06473159790039,
|
|
"fcm_dpo/q_t": 0.3722948133945465,
|
|
"grad_norm": 12.918481826782227,
|
|
"learning_rate": 3.3798516512554485e-07,
|
|
"logits/chosen": 1.4045250415802002,
|
|
"logits/rejected": 1.276968002319336,
|
|
"logps/chosen": -97.88027954101562,
|
|
"logps/ref_chosen": -73.46731567382812,
|
|
"logps/ref_rejected": -88.22674560546875,
|
|
"logps/rejected": -136.70443725585938,
|
|
"loss": 0.9989,
|
|
"margin_dpo/margin_mean": 24.06473159790039,
|
|
"margin_dpo/margin_std": 27.818138122558594,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.4504913076341648,
|
|
"fcm_dpo/beta": 0.023241370916366577,
|
|
"fcm_dpo/delta": -0.06495536863803864,
|
|
"fcm_dpo/margin": 19.87840461730957,
|
|
"fcm_dpo/q_t": 0.4035850465297699,
|
|
"grad_norm": 12.550525665283203,
|
|
"learning_rate": 3.367463137189156e-07,
|
|
"logits/chosen": 1.7822816371917725,
|
|
"logits/rejected": 1.6975232362747192,
|
|
"logps/chosen": -94.45632934570312,
|
|
"logps/ref_chosen": -73.21676635742188,
|
|
"logps/ref_rejected": -84.9563217163086,
|
|
"logps/rejected": -126.07428741455078,
|
|
"loss": 1.14,
|
|
"margin_dpo/margin_mean": 19.878402709960938,
|
|
"margin_dpo/margin_std": 36.64874267578125,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.4520030234315949,
|
|
"fcm_dpo/beta": 0.023189421743154526,
|
|
"fcm_dpo/delta": 0.02762114629149437,
|
|
"fcm_dpo/margin": 16.102848052978516,
|
|
"fcm_dpo/q_t": 0.42042386531829834,
|
|
"grad_norm": 9.376786231994629,
|
|
"learning_rate": 3.355050358314172e-07,
|
|
"logits/chosen": 1.2836685180664062,
|
|
"logits/rejected": 1.1858904361724854,
|
|
"logps/chosen": -100.31362915039062,
|
|
"logps/ref_chosen": -76.9534912109375,
|
|
"logps/ref_rejected": -87.53433227539062,
|
|
"logps/rejected": -126.997314453125,
|
|
"loss": 1.19,
|
|
"margin_dpo/margin_mean": 16.10284996032715,
|
|
"margin_dpo/margin_std": 34.105308532714844,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.45351473922902497,
|
|
"fcm_dpo/beta": 0.02318103238940239,
|
|
"fcm_dpo/delta": -0.0181889571249485,
|
|
"fcm_dpo/margin": 18.007240295410156,
|
|
"fcm_dpo/q_t": 0.40901291370391846,
|
|
"grad_norm": 7.5748467445373535,
|
|
"learning_rate": 3.3426136618426043e-07,
|
|
"logits/chosen": 1.4052281379699707,
|
|
"logits/rejected": 1.2209758758544922,
|
|
"logps/chosen": -100.68650817871094,
|
|
"logps/ref_chosen": -78.36398315429688,
|
|
"logps/ref_rejected": -97.03912353515625,
|
|
"logps/rejected": -137.368896484375,
|
|
"loss": 1.1662,
|
|
"margin_dpo/margin_mean": 18.007240295410156,
|
|
"margin_dpo/margin_std": 35.32292175292969,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.45351473922902497,
|
|
"eval_fcm_dpo/beta": 0.02316315658390522,
|
|
"eval_logits/chosen": 1.3809725046157837,
|
|
"eval_logits/rejected": 1.253163456916809,
|
|
"eval_logps/chosen": -106.03602600097656,
|
|
"eval_logps/ref_chosen": -86.90177917480469,
|
|
"eval_logps/ref_rejected": -96.69639587402344,
|
|
"eval_logps/rejected": -135.76068115234375,
|
|
"eval_loss": 0.5600526928901672,
|
|
"eval_margin_dpo/margin_mean": 19.930051803588867,
|
|
"eval_margin_dpo/margin_std": 33.923973083496094,
|
|
"eval_runtime": 42.33,
|
|
"eval_samples_per_second": 54.406,
|
|
"eval_steps_per_second": 1.701,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.455026455026455,
|
|
"fcm_dpo/beta": 0.022925032302737236,
|
|
"fcm_dpo/delta": -0.031985800713300705,
|
|
"fcm_dpo/margin": 18.749425888061523,
|
|
"fcm_dpo/q_t": 0.4092825651168823,
|
|
"grad_norm": 7.92974328994751,
|
|
"learning_rate": 3.3301533956555885e-07,
|
|
"logits/chosen": 1.3435275554656982,
|
|
"logits/rejected": 1.2831223011016846,
|
|
"logps/chosen": -91.15614318847656,
|
|
"logps/ref_chosen": -70.6719741821289,
|
|
"logps/ref_rejected": -87.11650085449219,
|
|
"logps/rejected": -126.35010528564453,
|
|
"loss": 1.1464,
|
|
"margin_dpo/margin_mean": 18.749425888061523,
|
|
"margin_dpo/margin_std": 34.64903259277344,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.4565381708238851,
|
|
"fcm_dpo/beta": 0.023184455931186676,
|
|
"fcm_dpo/delta": 0.0728965699672699,
|
|
"fcm_dpo/margin": 9.814539909362793,
|
|
"fcm_dpo/q_t": 0.4503589868545532,
|
|
"grad_norm": 9.430741310119629,
|
|
"learning_rate": 3.317669908293554e-07,
|
|
"logits/chosen": 1.1618998050689697,
|
|
"logits/rejected": 1.0537515878677368,
|
|
"logps/chosen": -111.93255615234375,
|
|
"logps/ref_chosen": -85.29096221923828,
|
|
"logps/ref_rejected": -106.22589874267578,
|
|
"logps/rejected": -142.68203735351562,
|
|
"loss": 1.2936,
|
|
"margin_dpo/margin_mean": 9.814538955688477,
|
|
"margin_dpo/margin_std": 31.3475341796875,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.4580498866213152,
|
|
"fcm_dpo/beta": 0.022597171366214752,
|
|
"fcm_dpo/delta": -0.17091017961502075,
|
|
"fcm_dpo/margin": 24.739337921142578,
|
|
"fcm_dpo/q_t": 0.3804760277271271,
|
|
"grad_norm": 7.812971115112305,
|
|
"learning_rate": 3.3051635489464793e-07,
|
|
"logits/chosen": 1.7512423992156982,
|
|
"logits/rejected": 1.6072579622268677,
|
|
"logps/chosen": -104.96895599365234,
|
|
"logps/ref_chosen": -83.90059661865234,
|
|
"logps/ref_rejected": -104.7340087890625,
|
|
"logps/rejected": -150.54171752929688,
|
|
"loss": 1.0798,
|
|
"margin_dpo/margin_mean": 24.739337921142578,
|
|
"margin_dpo/margin_std": 39.02869415283203,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.4595616024187453,
|
|
"fcm_dpo/beta": 0.022171439602971077,
|
|
"fcm_dpo/delta": -0.11509604007005692,
|
|
"fcm_dpo/margin": 22.964481353759766,
|
|
"fcm_dpo/q_t": 0.38752371072769165,
|
|
"grad_norm": 9.252838134765625,
|
|
"learning_rate": 3.292634667444117e-07,
|
|
"logits/chosen": 1.3123148679733276,
|
|
"logits/rejected": 1.203786015510559,
|
|
"logps/chosen": -93.95158386230469,
|
|
"logps/ref_chosen": -77.39997100830078,
|
|
"logps/ref_rejected": -94.21647644042969,
|
|
"logps/rejected": -133.73257446289062,
|
|
"loss": 1.0487,
|
|
"margin_dpo/margin_mean": 22.9644832611084,
|
|
"margin_dpo/margin_std": 31.3437442779541,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.46107331821617537,
|
|
"fcm_dpo/beta": 0.02172023430466652,
|
|
"fcm_dpo/delta": -0.04386585205793381,
|
|
"fcm_dpo/margin": 20.260169982910156,
|
|
"fcm_dpo/q_t": 0.4058588147163391,
|
|
"grad_norm": 8.574239730834961,
|
|
"learning_rate": 3.280083614246217e-07,
|
|
"logits/chosen": 1.5030393600463867,
|
|
"logits/rejected": 1.5878260135650635,
|
|
"logps/chosen": -113.70249938964844,
|
|
"logps/ref_chosen": -90.90805053710938,
|
|
"logps/ref_rejected": -85.84992980957031,
|
|
"logps/rejected": -128.90455627441406,
|
|
"loss": 1.1498,
|
|
"margin_dpo/margin_mean": 20.260169982910156,
|
|
"margin_dpo/margin_std": 37.48213577270508,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.46258503401360546,
|
|
"fcm_dpo/beta": 0.021479588001966476,
|
|
"fcm_dpo/delta": -0.06405539065599442,
|
|
"fcm_dpo/margin": 21.383968353271484,
|
|
"fcm_dpo/q_t": 0.39899182319641113,
|
|
"grad_norm": 10.03992748260498,
|
|
"learning_rate": 3.267510740432719e-07,
|
|
"logits/chosen": 1.418445348739624,
|
|
"logits/rejected": 1.1668379306793213,
|
|
"logps/chosen": -90.37960815429688,
|
|
"logps/ref_chosen": -71.7261962890625,
|
|
"logps/ref_rejected": -97.70491027832031,
|
|
"logps/rejected": -137.74229431152344,
|
|
"loss": 1.0879,
|
|
"margin_dpo/margin_mean": 21.383968353271484,
|
|
"margin_dpo/margin_std": 31.563940048217773,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.46409674981103555,
|
|
"fcm_dpo/beta": 0.02162957563996315,
|
|
"fcm_dpo/delta": 0.02099861018359661,
|
|
"fcm_dpo/margin": 13.357643127441406,
|
|
"fcm_dpo/q_t": 0.4386698305606842,
|
|
"grad_norm": 12.694254875183105,
|
|
"learning_rate": 3.2549163976939285e-07,
|
|
"logits/chosen": 1.2605947256088257,
|
|
"logits/rejected": 1.1243689060211182,
|
|
"logps/chosen": -88.7791976928711,
|
|
"logps/ref_chosen": -74.38668823242188,
|
|
"logps/ref_rejected": -84.16001892089844,
|
|
"logps/rejected": -111.91016387939453,
|
|
"loss": 1.2375,
|
|
"margin_dpo/margin_mean": 13.357643127441406,
|
|
"margin_dpo/margin_std": 33.23987579345703,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.4656084656084656,
|
|
"fcm_dpo/beta": 0.021374888718128204,
|
|
"fcm_dpo/delta": -0.07016818970441818,
|
|
"fcm_dpo/margin": 21.82101821899414,
|
|
"fcm_dpo/q_t": 0.3989279270172119,
|
|
"grad_norm": 7.945812225341797,
|
|
"learning_rate": 3.2423009383206874e-07,
|
|
"logits/chosen": 1.2982113361358643,
|
|
"logits/rejected": 1.3503541946411133,
|
|
"logps/chosen": -103.78570556640625,
|
|
"logps/ref_chosen": -87.50894165039062,
|
|
"logps/ref_rejected": -94.80848693847656,
|
|
"logps/rejected": -132.90626525878906,
|
|
"loss": 1.1067,
|
|
"margin_dpo/margin_mean": 21.821016311645508,
|
|
"margin_dpo/margin_std": 35.72882080078125,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.4671201814058957,
|
|
"fcm_dpo/beta": 0.02099529653787613,
|
|
"fcm_dpo/delta": -0.06646728515625,
|
|
"fcm_dpo/margin": 21.988813400268555,
|
|
"fcm_dpo/q_t": 0.3982385993003845,
|
|
"grad_norm": 8.893543243408203,
|
|
"learning_rate": 3.229664715194511e-07,
|
|
"logits/chosen": 1.3217096328735352,
|
|
"logits/rejected": 1.2139837741851807,
|
|
"logps/chosen": -104.08708190917969,
|
|
"logps/ref_chosen": -82.15191650390625,
|
|
"logps/ref_rejected": -95.03496551513672,
|
|
"logps/rejected": -138.9589385986328,
|
|
"loss": 1.075,
|
|
"margin_dpo/margin_mean": 21.988813400268555,
|
|
"margin_dpo/margin_std": 30.44322967529297,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.46863189720332576,
|
|
"fcm_dpo/beta": 0.02120891772210598,
|
|
"fcm_dpo/delta": 0.02794772759079933,
|
|
"fcm_dpo/margin": 10.018726348876953,
|
|
"fcm_dpo/q_t": 0.455955445766449,
|
|
"grad_norm": 11.014235496520996,
|
|
"learning_rate": 3.2170080817777257e-07,
|
|
"logits/chosen": 1.458807110786438,
|
|
"logits/rejected": 1.4317870140075684,
|
|
"logps/chosen": -115.71739196777344,
|
|
"logps/ref_chosen": -93.7555160522461,
|
|
"logps/ref_rejected": -96.93236541748047,
|
|
"logps/rejected": -128.9129638671875,
|
|
"loss": 1.3344,
|
|
"margin_dpo/margin_mean": 10.018726348876953,
|
|
"margin_dpo/margin_std": 37.276611328125,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.47014361300075586,
|
|
"fcm_dpo/beta": 0.02111116424202919,
|
|
"fcm_dpo/delta": 0.006699252873659134,
|
|
"fcm_dpo/margin": 18.61467742919922,
|
|
"fcm_dpo/q_t": 0.41649070382118225,
|
|
"grad_norm": 9.210880279541016,
|
|
"learning_rate": 3.204331392103574e-07,
|
|
"logits/chosen": 1.4640614986419678,
|
|
"logits/rejected": 1.1159043312072754,
|
|
"logps/chosen": -89.65441131591797,
|
|
"logps/ref_chosen": -76.20762634277344,
|
|
"logps/ref_rejected": -110.48141479492188,
|
|
"logps/rejected": -142.54287719726562,
|
|
"loss": 1.149,
|
|
"margin_dpo/margin_mean": 18.61467933654785,
|
|
"margin_dpo/margin_std": 34.18421936035156,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.47165532879818595,
|
|
"fcm_dpo/beta": 0.02078743278980255,
|
|
"fcm_dpo/delta": -0.15529538691043854,
|
|
"fcm_dpo/margin": 26.313379287719727,
|
|
"fcm_dpo/q_t": 0.38429784774780273,
|
|
"grad_norm": 7.8850884437561035,
|
|
"learning_rate": 3.1916350007663176e-07,
|
|
"logits/chosen": 1.4752094745635986,
|
|
"logits/rejected": 1.304732084274292,
|
|
"logps/chosen": -89.44306182861328,
|
|
"logps/ref_chosen": -69.08878326416016,
|
|
"logps/ref_rejected": -91.84494018554688,
|
|
"logps/rejected": -138.51260375976562,
|
|
"loss": 1.0482,
|
|
"margin_dpo/margin_mean": 26.31338119506836,
|
|
"margin_dpo/margin_std": 37.86506271362305,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.47316704459561604,
|
|
"fcm_dpo/beta": 0.020855270326137543,
|
|
"fcm_dpo/delta": 0.07202167809009552,
|
|
"fcm_dpo/margin": 15.828784942626953,
|
|
"fcm_dpo/q_t": 0.4334028959274292,
|
|
"grad_norm": 8.256758689880371,
|
|
"learning_rate": 3.178919262911314e-07,
|
|
"logits/chosen": 1.533963680267334,
|
|
"logits/rejected": 1.4955235719680786,
|
|
"logps/chosen": -92.3877944946289,
|
|
"logps/ref_chosen": -78.20826721191406,
|
|
"logps/ref_rejected": -86.90351867675781,
|
|
"logps/rejected": -116.9118423461914,
|
|
"loss": 1.2266,
|
|
"margin_dpo/margin_mean": 15.828783988952637,
|
|
"margin_dpo/margin_std": 38.44497299194336,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.47467876039304613,
|
|
"fcm_dpo/beta": 0.02034802734851837,
|
|
"fcm_dpo/delta": -0.1469862312078476,
|
|
"fcm_dpo/margin": 26.44600486755371,
|
|
"fcm_dpo/q_t": 0.38667362928390503,
|
|
"grad_norm": 11.084165573120117,
|
|
"learning_rate": 3.166184534225087e-07,
|
|
"logits/chosen": 1.629220724105835,
|
|
"logits/rejected": 1.6717283725738525,
|
|
"logps/chosen": -105.98693084716797,
|
|
"logps/ref_chosen": -90.41890716552734,
|
|
"logps/ref_rejected": -84.33525848388672,
|
|
"logps/rejected": -126.34929656982422,
|
|
"loss": 1.0556,
|
|
"margin_dpo/margin_mean": 26.44600486755371,
|
|
"margin_dpo/margin_std": 38.6795654296875,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.47619047619047616,
|
|
"fcm_dpo/beta": 0.020289920270442963,
|
|
"fcm_dpo/delta": -0.03734702616930008,
|
|
"fcm_dpo/margin": 21.451175689697266,
|
|
"fcm_dpo/q_t": 0.4053088426589966,
|
|
"grad_norm": 8.479659080505371,
|
|
"learning_rate": 3.1534311709253723e-07,
|
|
"logits/chosen": 1.165109395980835,
|
|
"logits/rejected": 1.0913877487182617,
|
|
"logps/chosen": -103.09860229492188,
|
|
"logps/ref_chosen": -87.32842254638672,
|
|
"logps/ref_rejected": -93.71661376953125,
|
|
"logps/rejected": -130.93795776367188,
|
|
"loss": 1.0996,
|
|
"margin_dpo/margin_mean": 21.451175689697266,
|
|
"margin_dpo/margin_std": 32.51549530029297,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.47770219198790626,
|
|
"fcm_dpo/beta": 0.020012233406305313,
|
|
"fcm_dpo/delta": -0.13435091078281403,
|
|
"fcm_dpo/margin": 26.249847412109375,
|
|
"fcm_dpo/q_t": 0.38359588384628296,
|
|
"grad_norm": 9.866154670715332,
|
|
"learning_rate": 3.1406595297511564e-07,
|
|
"logits/chosen": 1.4812779426574707,
|
|
"logits/rejected": 1.1655080318450928,
|
|
"logps/chosen": -93.73405456542969,
|
|
"logps/ref_chosen": -73.898681640625,
|
|
"logps/ref_rejected": -115.42668151855469,
|
|
"logps/rejected": -161.5118865966797,
|
|
"loss": 1.0329,
|
|
"margin_dpo/margin_mean": 26.249849319458008,
|
|
"margin_dpo/margin_std": 30.74091339111328,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.47921390778533635,
|
|
"fcm_dpo/beta": 0.019283965229988098,
|
|
"fcm_dpo/delta": -0.07981756329536438,
|
|
"fcm_dpo/margin": 24.6502628326416,
|
|
"fcm_dpo/q_t": 0.3955802917480469,
|
|
"grad_norm": 14.283778190612793,
|
|
"learning_rate": 3.1278699679526975e-07,
|
|
"logits/chosen": 1.5710258483886719,
|
|
"logits/rejected": 1.4736764430999756,
|
|
"logps/chosen": -92.30467224121094,
|
|
"logps/ref_chosen": -75.42947387695312,
|
|
"logps/ref_rejected": -90.60166931152344,
|
|
"logps/rejected": -132.12713623046875,
|
|
"loss": 1.1094,
|
|
"margin_dpo/margin_mean": 24.650264739990234,
|
|
"margin_dpo/margin_std": 41.600746154785156,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 0.48072562358276644,
|
|
"fcm_dpo/beta": 0.019192587584257126,
|
|
"fcm_dpo/delta": -0.0218157060444355,
|
|
"fcm_dpo/margin": 21.925718307495117,
|
|
"fcm_dpo/q_t": 0.4134993255138397,
|
|
"grad_norm": 8.415660858154297,
|
|
"learning_rate": 3.1150628432815336e-07,
|
|
"logits/chosen": 1.4273430109024048,
|
|
"logits/rejected": 1.282718300819397,
|
|
"logps/chosen": -89.98394012451172,
|
|
"logps/ref_chosen": -70.38318634033203,
|
|
"logps/ref_rejected": -98.19901275634766,
|
|
"logps/rejected": -139.72549438476562,
|
|
"loss": 1.166,
|
|
"margin_dpo/margin_mean": 21.925718307495117,
|
|
"margin_dpo/margin_std": 43.43846893310547,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.48223733938019653,
|
|
"fcm_dpo/beta": 0.018868155777454376,
|
|
"fcm_dpo/delta": -0.10744694620370865,
|
|
"fcm_dpo/margin": 26.597312927246094,
|
|
"fcm_dpo/q_t": 0.39157384634017944,
|
|
"grad_norm": 7.358407020568848,
|
|
"learning_rate": 3.1022385139804707e-07,
|
|
"logits/chosen": 1.4171019792556763,
|
|
"logits/rejected": 1.3558650016784668,
|
|
"logps/chosen": -106.76776885986328,
|
|
"logps/ref_chosen": -83.40225982666016,
|
|
"logps/ref_rejected": -95.40069580078125,
|
|
"logps/rejected": -145.36351013183594,
|
|
"loss": 1.0818,
|
|
"margin_dpo/margin_mean": 26.597312927246094,
|
|
"margin_dpo/margin_std": 41.52876281738281,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 0.4837490551776266,
|
|
"fcm_dpo/beta": 0.018746916204690933,
|
|
"fcm_dpo/delta": -0.025277845561504364,
|
|
"fcm_dpo/margin": 14.949155807495117,
|
|
"fcm_dpo/q_t": 0.4393833875656128,
|
|
"grad_norm": 8.69232177734375,
|
|
"learning_rate": 3.0893973387735683e-07,
|
|
"logits/chosen": 1.186877727508545,
|
|
"logits/rejected": 1.0729196071624756,
|
|
"logps/chosen": -90.08442687988281,
|
|
"logps/ref_chosen": -68.70979309082031,
|
|
"logps/ref_rejected": -87.00540924072266,
|
|
"logps/rejected": -123.32920837402344,
|
|
"loss": 1.2802,
|
|
"margin_dpo/margin_mean": 14.949155807495117,
|
|
"margin_dpo/margin_std": 43.0057487487793,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.4852607709750567,
|
|
"fcm_dpo/beta": 0.018360208719968796,
|
|
"fcm_dpo/delta": -0.054547958076000214,
|
|
"fcm_dpo/margin": 24.483469009399414,
|
|
"fcm_dpo/q_t": 0.4054255485534668,
|
|
"grad_norm": 18.02200698852539,
|
|
"learning_rate": 3.0765396768561004e-07,
|
|
"logits/chosen": 1.4728999137878418,
|
|
"logits/rejected": 1.4148732423782349,
|
|
"logps/chosen": -88.64881896972656,
|
|
"logps/ref_chosen": -66.48135375976562,
|
|
"logps/ref_rejected": -71.84545135498047,
|
|
"logps/rejected": -118.49638366699219,
|
|
"loss": 1.1465,
|
|
"margin_dpo/margin_mean": 24.483470916748047,
|
|
"margin_dpo/margin_std": 44.83539581298828,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 0.48677248677248675,
|
|
"fcm_dpo/beta": 0.017961975187063217,
|
|
"fcm_dpo/delta": -0.21638159453868866,
|
|
"fcm_dpo/margin": 33.61131286621094,
|
|
"fcm_dpo/q_t": 0.36547616124153137,
|
|
"grad_norm": 15.814164161682129,
|
|
"learning_rate": 3.063665887884511e-07,
|
|
"logits/chosen": 1.8528566360473633,
|
|
"logits/rejected": 1.6500463485717773,
|
|
"logps/chosen": -94.36614990234375,
|
|
"logps/ref_chosen": -65.94654846191406,
|
|
"logps/ref_rejected": -94.26603698730469,
|
|
"logps/rejected": -156.29696655273438,
|
|
"loss": 0.9901,
|
|
"margin_dpo/margin_mean": 33.61131286621094,
|
|
"margin_dpo/margin_std": 40.53282928466797,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.48828420256991684,
|
|
"fcm_dpo/beta": 0.017774982377886772,
|
|
"fcm_dpo/delta": -0.011496573686599731,
|
|
"fcm_dpo/margin": 23.097457885742188,
|
|
"fcm_dpo/q_t": 0.4170437753200531,
|
|
"grad_norm": 13.843807220458984,
|
|
"learning_rate": 3.0507763319663517e-07,
|
|
"logits/chosen": 1.5236470699310303,
|
|
"logits/rejected": 1.389400601387024,
|
|
"logps/chosen": -119.60983276367188,
|
|
"logps/ref_chosen": -86.5498046875,
|
|
"logps/ref_rejected": -110.39498901367188,
|
|
"logps/rejected": -166.55245971679688,
|
|
"loss": 1.2227,
|
|
"margin_dpo/margin_mean": 23.097457885742188,
|
|
"margin_dpo/margin_std": 54.00891876220703,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 0.4897959183673469,
|
|
"fcm_dpo/beta": 0.01737690530717373,
|
|
"fcm_dpo/delta": -0.08369007706642151,
|
|
"fcm_dpo/margin": 27.562347412109375,
|
|
"fcm_dpo/q_t": 0.39454299211502075,
|
|
"grad_norm": 15.58385944366455,
|
|
"learning_rate": 3.0378713696502097e-07,
|
|
"logits/chosen": 1.4405279159545898,
|
|
"logits/rejected": 1.3061044216156006,
|
|
"logps/chosen": -102.10205841064453,
|
|
"logps/ref_chosen": -74.44218444824219,
|
|
"logps/ref_rejected": -85.7646484375,
|
|
"logps/rejected": -140.98687744140625,
|
|
"loss": 1.1105,
|
|
"margin_dpo/margin_mean": 27.562347412109375,
|
|
"margin_dpo/margin_std": 46.17185592651367,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.491307634164777,
|
|
"fcm_dpo/beta": 0.016980335116386414,
|
|
"fcm_dpo/delta": -0.09777640551328659,
|
|
"fcm_dpo/margin": 28.89820098876953,
|
|
"fcm_dpo/q_t": 0.394008994102478,
|
|
"grad_norm": 13.200928688049316,
|
|
"learning_rate": 3.0249513619156206e-07,
|
|
"logits/chosen": 1.6928132772445679,
|
|
"logits/rejected": 1.5178282260894775,
|
|
"logps/chosen": -123.48445892333984,
|
|
"logps/ref_chosen": -81.43812561035156,
|
|
"logps/ref_rejected": -97.04302978515625,
|
|
"logps/rejected": -167.987548828125,
|
|
"loss": 1.1019,
|
|
"margin_dpo/margin_mean": 28.898202896118164,
|
|
"margin_dpo/margin_std": 46.94188690185547,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.4928193499622071,
|
|
"fcm_dpo/beta": 0.01719042658805847,
|
|
"fcm_dpo/delta": 0.0699775367975235,
|
|
"fcm_dpo/margin": 19.33672332763672,
|
|
"fcm_dpo/q_t": 0.42833542823791504,
|
|
"grad_norm": 8.703057289123535,
|
|
"learning_rate": 3.012016670162977e-07,
|
|
"logits/chosen": 1.2717374563217163,
|
|
"logits/rejected": 1.2912553548812866,
|
|
"logps/chosen": -141.61001586914062,
|
|
"logps/ref_chosen": -91.65318298339844,
|
|
"logps/ref_rejected": -90.64222717285156,
|
|
"logps/rejected": -159.935791015625,
|
|
"loss": 1.2212,
|
|
"margin_dpo/margin_mean": 19.33672332763672,
|
|
"margin_dpo/margin_std": 45.26724624633789,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.4943310657596372,
|
|
"fcm_dpo/beta": 0.017470695078372955,
|
|
"fcm_dpo/delta": 0.037908885627985,
|
|
"fcm_dpo/margin": 20.763715744018555,
|
|
"fcm_dpo/q_t": 0.42454928159713745,
|
|
"grad_norm": 28.22562026977539,
|
|
"learning_rate": 2.99906765620341e-07,
|
|
"logits/chosen": 1.1321663856506348,
|
|
"logits/rejected": 1.0784250497817993,
|
|
"logps/chosen": -141.81544494628906,
|
|
"logps/ref_chosen": -89.97216796875,
|
|
"logps/ref_rejected": -97.54869079589844,
|
|
"logps/rejected": -170.15567016601562,
|
|
"loss": 1.2403,
|
|
"margin_dpo/margin_mean": 20.763715744018555,
|
|
"margin_dpo/margin_std": 51.22842025756836,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 0.4958427815570673,
|
|
"fcm_dpo/beta": 0.01727999374270439,
|
|
"fcm_dpo/delta": -0.0346628800034523,
|
|
"fcm_dpo/margin": 25.054723739624023,
|
|
"fcm_dpo/q_t": 0.4068809151649475,
|
|
"grad_norm": 8.079117774963379,
|
|
"learning_rate": 2.9861046822486766e-07,
|
|
"logits/chosen": 1.3686432838439941,
|
|
"logits/rejected": 1.2675422430038452,
|
|
"logps/chosen": -128.63833618164062,
|
|
"logps/ref_chosen": -80.27335357666016,
|
|
"logps/ref_rejected": -99.04093170166016,
|
|
"logps/rejected": -172.46063232421875,
|
|
"loss": 1.1454,
|
|
"margin_dpo/margin_mean": 25.054725646972656,
|
|
"margin_dpo/margin_std": 45.77302551269531,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.4973544973544973,
|
|
"fcm_dpo/beta": 0.016885770484805107,
|
|
"fcm_dpo/delta": -0.12400930374860764,
|
|
"fcm_dpo/margin": 30.581024169921875,
|
|
"fcm_dpo/q_t": 0.39069896936416626,
|
|
"grad_norm": 9.308547973632812,
|
|
"learning_rate": 2.9731281109010253e-07,
|
|
"logits/chosen": 1.4687421321868896,
|
|
"logits/rejected": 1.3343288898468018,
|
|
"logps/chosen": -128.83963012695312,
|
|
"logps/ref_chosen": -79.75892639160156,
|
|
"logps/ref_rejected": -102.06265258789062,
|
|
"logps/rejected": -181.72439575195312,
|
|
"loss": 1.0682,
|
|
"margin_dpo/margin_mean": 30.581024169921875,
|
|
"margin_dpo/margin_std": 45.86301040649414,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 0.4988662131519274,
|
|
"fcm_dpo/beta": 0.016510095447301865,
|
|
"fcm_dpo/delta": -0.20744779706001282,
|
|
"fcm_dpo/margin": 36.0556526184082,
|
|
"fcm_dpo/q_t": 0.37239986658096313,
|
|
"grad_norm": 10.562420845031738,
|
|
"learning_rate": 2.9601383051430505e-07,
|
|
"logits/chosen": 1.605103850364685,
|
|
"logits/rejected": 1.4531974792480469,
|
|
"logps/chosen": -114.76742553710938,
|
|
"logps/ref_chosen": -70.55734252929688,
|
|
"logps/ref_rejected": -94.53077697753906,
|
|
"logps/rejected": -174.7965087890625,
|
|
"loss": 1.0623,
|
|
"margin_dpo/margin_mean": 36.05565643310547,
|
|
"margin_dpo/margin_std": 53.4388427734375,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.5003779289493575,
|
|
"fcm_dpo/beta": 0.015694059431552887,
|
|
"fcm_dpo/delta": -0.1913868486881256,
|
|
"fcm_dpo/margin": 36.94060516357422,
|
|
"fcm_dpo/q_t": 0.3752760589122772,
|
|
"grad_norm": 6.33163595199585,
|
|
"learning_rate": 2.947135628327544e-07,
|
|
"logits/chosen": 2.0764613151550293,
|
|
"logits/rejected": 2.02164888381958,
|
|
"logps/chosen": -129.00782775878906,
|
|
"logps/ref_chosen": -75.46063232421875,
|
|
"logps/ref_rejected": -84.78495788574219,
|
|
"logps/rejected": -175.27273559570312,
|
|
"loss": 1.0412,
|
|
"margin_dpo/margin_mean": 36.94060516357422,
|
|
"margin_dpo/margin_std": 53.34477996826172,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 0.5018896447467877,
|
|
"fcm_dpo/beta": 0.015376402996480465,
|
|
"fcm_dpo/delta": -0.1497417539358139,
|
|
"fcm_dpo/margin": 35.20355224609375,
|
|
"fcm_dpo/q_t": 0.3825414180755615,
|
|
"grad_norm": 9.974367141723633,
|
|
"learning_rate": 2.934120444167326e-07,
|
|
"logits/chosen": 1.1533145904541016,
|
|
"logits/rejected": 1.0612804889678955,
|
|
"logps/chosen": -136.8844757080078,
|
|
"logps/ref_chosen": -84.32807922363281,
|
|
"logps/ref_rejected": -95.63302612304688,
|
|
"logps/rejected": -183.39297485351562,
|
|
"loss": 1.0329,
|
|
"margin_dpo/margin_mean": 35.20355224609375,
|
|
"margin_dpo/margin_std": 45.5770263671875,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.5034013605442177,
|
|
"fcm_dpo/beta": 0.014777831733226776,
|
|
"fcm_dpo/delta": -0.17367474734783173,
|
|
"fcm_dpo/margin": 38.178375244140625,
|
|
"fcm_dpo/q_t": 0.37468940019607544,
|
|
"grad_norm": 8.577049255371094,
|
|
"learning_rate": 2.921093116725076e-07,
|
|
"logits/chosen": 1.7649075984954834,
|
|
"logits/rejected": 1.598151683807373,
|
|
"logps/chosen": -137.31753540039062,
|
|
"logps/ref_chosen": -78.2132339477539,
|
|
"logps/ref_rejected": -103.82716369628906,
|
|
"logps/rejected": -201.10983276367188,
|
|
"loss": 0.9973,
|
|
"margin_dpo/margin_mean": 38.178375244140625,
|
|
"margin_dpo/margin_std": 44.62627410888672,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 0.5049130763416477,
|
|
"fcm_dpo/beta": 0.014649095013737679,
|
|
"fcm_dpo/delta": 0.020269624888896942,
|
|
"fcm_dpo/margin": 25.97400665283203,
|
|
"fcm_dpo/q_t": 0.4182738661766052,
|
|
"grad_norm": 9.703882217407227,
|
|
"learning_rate": 2.9080540104031484e-07,
|
|
"logits/chosen": 1.5542503595352173,
|
|
"logits/rejected": 1.3984897136688232,
|
|
"logps/chosen": -144.12342834472656,
|
|
"logps/ref_chosen": -85.0171127319336,
|
|
"logps/ref_rejected": -106.79039764404297,
|
|
"logps/rejected": -191.8707275390625,
|
|
"loss": 1.2143,
|
|
"margin_dpo/margin_mean": 25.9740047454834,
|
|
"margin_dpo/margin_std": 59.42298889160156,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.5064247921390779,
|
|
"fcm_dpo/beta": 0.014591803774237633,
|
|
"fcm_dpo/delta": -0.04717499762773514,
|
|
"fcm_dpo/margin": 25.18863296508789,
|
|
"fcm_dpo/q_t": 0.42526859045028687,
|
|
"grad_norm": 14.016016960144043,
|
|
"learning_rate": 2.895003489933375e-07,
|
|
"logits/chosen": 1.5459372997283936,
|
|
"logits/rejected": 1.444729208946228,
|
|
"logps/chosen": -143.58782958984375,
|
|
"logps/ref_chosen": -78.56513214111328,
|
|
"logps/ref_rejected": -92.68515014648438,
|
|
"logps/rejected": -182.89646911621094,
|
|
"loss": 1.2477,
|
|
"margin_dpo/margin_mean": 25.188629150390625,
|
|
"margin_dpo/margin_std": 63.48894500732422,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.5079365079365079,
|
|
"fcm_dpo/beta": 0.014425481669604778,
|
|
"fcm_dpo/delta": -0.014608364552259445,
|
|
"fcm_dpo/margin": 28.666763305664062,
|
|
"fcm_dpo/q_t": 0.40841758251190186,
|
|
"grad_norm": 15.602229118347168,
|
|
"learning_rate": 2.8819419203668675e-07,
|
|
"logits/chosen": 1.321707010269165,
|
|
"logits/rejected": 1.2782776355743408,
|
|
"logps/chosen": -163.07656860351562,
|
|
"logps/ref_chosen": -88.63243103027344,
|
|
"logps/ref_rejected": -107.89385986328125,
|
|
"logps/rejected": -211.0047607421875,
|
|
"loss": 1.1316,
|
|
"margin_dpo/margin_mean": 28.666763305664062,
|
|
"margin_dpo/margin_std": 49.40132141113281,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.509448223733938,
|
|
"fcm_dpo/beta": 0.01457132212817669,
|
|
"fcm_dpo/delta": 0.03601706027984619,
|
|
"fcm_dpo/margin": 25.066566467285156,
|
|
"fcm_dpo/q_t": 0.4219823479652405,
|
|
"grad_norm": 11.307106971740723,
|
|
"learning_rate": 2.8688696670638053e-07,
|
|
"logits/chosen": 1.351456642150879,
|
|
"logits/rejected": 1.244319200515747,
|
|
"logps/chosen": -164.80145263671875,
|
|
"logps/ref_chosen": -93.25018310546875,
|
|
"logps/ref_rejected": -103.8592529296875,
|
|
"logps/rejected": -200.4770965576172,
|
|
"loss": 1.1949,
|
|
"margin_dpo/margin_mean": 25.066566467285156,
|
|
"margin_dpo/margin_std": 53.922340393066406,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 0.5109599395313681,
|
|
"fcm_dpo/beta": 0.014669684693217278,
|
|
"fcm_dpo/delta": 0.039764419198036194,
|
|
"fcm_dpo/margin": 24.654888153076172,
|
|
"fcm_dpo/q_t": 0.42528173327445984,
|
|
"grad_norm": 9.578775405883789,
|
|
"learning_rate": 2.8557870956832133e-07,
|
|
"logits/chosen": 1.6919002532958984,
|
|
"logits/rejected": 1.6292130947113037,
|
|
"logps/chosen": -154.7214813232422,
|
|
"logps/ref_chosen": -81.79462432861328,
|
|
"logps/ref_rejected": -90.98942565917969,
|
|
"logps/rejected": -188.5711669921875,
|
|
"loss": 1.1853,
|
|
"margin_dpo/margin_mean": 24.654888153076172,
|
|
"margin_dpo/margin_std": 51.62914276123047,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.5124716553287982,
|
|
"fcm_dpo/beta": 0.014321266673505306,
|
|
"fcm_dpo/delta": -0.1600484699010849,
|
|
"fcm_dpo/margin": 38.45646286010742,
|
|
"fcm_dpo/q_t": 0.3788467347621918,
|
|
"grad_norm": 8.566285133361816,
|
|
"learning_rate": 2.842694572172736e-07,
|
|
"logits/chosen": 1.7731349468231201,
|
|
"logits/rejected": 1.559950590133667,
|
|
"logps/chosen": -119.42196655273438,
|
|
"logps/ref_chosen": -61.80355453491211,
|
|
"logps/ref_rejected": -85.16979217529297,
|
|
"logps/rejected": -181.24465942382812,
|
|
"loss": 1.0265,
|
|
"margin_dpo/margin_mean": 38.45646667480469,
|
|
"margin_dpo/margin_std": 50.06471252441406,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 0.5139833711262283,
|
|
"fcm_dpo/beta": 0.014187190681695938,
|
|
"fcm_dpo/delta": -0.06460127234458923,
|
|
"fcm_dpo/margin": 32.51493453979492,
|
|
"fcm_dpo/q_t": 0.40349990129470825,
|
|
"grad_norm": 11.992464065551758,
|
|
"learning_rate": 2.8295924627584004e-07,
|
|
"logits/chosen": 1.505007028579712,
|
|
"logits/rejected": 1.4854414463043213,
|
|
"logps/chosen": -140.9080810546875,
|
|
"logps/ref_chosen": -72.486083984375,
|
|
"logps/ref_rejected": -79.86129760742188,
|
|
"logps/rejected": -180.79823303222656,
|
|
"loss": 1.1434,
|
|
"margin_dpo/margin_mean": 32.51493453979492,
|
|
"margin_dpo/margin_std": 60.210792541503906,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.5154950869236583,
|
|
"fcm_dpo/beta": 0.013437741436064243,
|
|
"fcm_dpo/delta": -0.22484445571899414,
|
|
"fcm_dpo/margin": 37.80937576293945,
|
|
"fcm_dpo/q_t": 0.3881411552429199,
|
|
"grad_norm": 16.43123435974121,
|
|
"learning_rate": 2.816481133934373e-07,
|
|
"logits/chosen": 1.8760827779769897,
|
|
"logits/rejected": 1.7105937004089355,
|
|
"logps/chosen": -145.66722106933594,
|
|
"logps/ref_chosen": -77.36830139160156,
|
|
"logps/ref_rejected": -94.64933013916016,
|
|
"logps/rejected": -200.75762939453125,
|
|
"loss": 1.0886,
|
|
"margin_dpo/margin_mean": 37.80937576293945,
|
|
"margin_dpo/margin_std": 55.31840133666992,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 0.5170068027210885,
|
|
"fcm_dpo/beta": 0.013311228714883327,
|
|
"fcm_dpo/delta": -0.059115879237651825,
|
|
"fcm_dpo/margin": 34.28473663330078,
|
|
"fcm_dpo/q_t": 0.40271270275115967,
|
|
"grad_norm": 7.672739028930664,
|
|
"learning_rate": 2.8033609524527046e-07,
|
|
"logits/chosen": 1.5947625637054443,
|
|
"logits/rejected": 1.496432900428772,
|
|
"logps/chosen": -133.24288940429688,
|
|
"logps/ref_chosen": -71.00831604003906,
|
|
"logps/ref_rejected": -84.22953796386719,
|
|
"logps/rejected": -180.74884033203125,
|
|
"loss": 1.1126,
|
|
"margin_dpo/margin_mean": 34.284732818603516,
|
|
"margin_dpo/margin_std": 57.40011978149414,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.5185185185185185,
|
|
"fcm_dpo/beta": 0.01319624949246645,
|
|
"fcm_dpo/delta": -0.03155739977955818,
|
|
"fcm_dpo/margin": 21.631832122802734,
|
|
"fcm_dpo/q_t": 0.4371912479400635,
|
|
"grad_norm": 8.965015411376953,
|
|
"learning_rate": 2.7902322853130753e-07,
|
|
"logits/chosen": 1.3000271320343018,
|
|
"logits/rejected": 1.2619600296020508,
|
|
"logps/chosen": -152.49551391601562,
|
|
"logps/ref_chosen": -91.44624328613281,
|
|
"logps/ref_rejected": -99.06044006347656,
|
|
"logps/rejected": -181.74154663085938,
|
|
"loss": 1.245,
|
|
"margin_dpo/margin_mean": 21.631834030151367,
|
|
"margin_dpo/margin_std": 53.96006774902344,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 0.5200302343159486,
|
|
"fcm_dpo/beta": 0.013017518445849419,
|
|
"fcm_dpo/delta": -0.04310518503189087,
|
|
"fcm_dpo/margin": 33.79201126098633,
|
|
"fcm_dpo/q_t": 0.4027559459209442,
|
|
"grad_norm": 8.789457321166992,
|
|
"learning_rate": 2.7770954997525274e-07,
|
|
"logits/chosen": 1.9475184679031372,
|
|
"logits/rejected": 1.7549506425857544,
|
|
"logps/chosen": -145.70272827148438,
|
|
"logps/ref_chosen": -73.43608093261719,
|
|
"logps/ref_rejected": -100.76569366455078,
|
|
"logps/rejected": -206.82437133789062,
|
|
"loss": 1.1134,
|
|
"margin_dpo/margin_mean": 33.79201126098633,
|
|
"margin_dpo/margin_std": 54.33518600463867,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.5215419501133787,
|
|
"fcm_dpo/beta": 0.013107789680361748,
|
|
"fcm_dpo/delta": 0.009255402721464634,
|
|
"fcm_dpo/margin": 29.836973190307617,
|
|
"fcm_dpo/q_t": 0.4151036739349365,
|
|
"grad_norm": 8.21016788482666,
|
|
"learning_rate": 2.7639509632351927e-07,
|
|
"logits/chosen": 1.7126402854919434,
|
|
"logits/rejected": 1.5933332443237305,
|
|
"logps/chosen": -130.25466918945312,
|
|
"logps/ref_chosen": -75.79296875,
|
|
"logps/ref_rejected": -94.34156799316406,
|
|
"logps/rejected": -178.64024353027344,
|
|
"loss": 1.1678,
|
|
"margin_dpo/margin_mean": 29.83697509765625,
|
|
"margin_dpo/margin_std": 58.75901794433594,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.5230536659108088,
|
|
"fcm_dpo/beta": 0.013089219108223915,
|
|
"fcm_dpo/delta": -0.04255472123622894,
|
|
"fcm_dpo/margin": 33.64637756347656,
|
|
"fcm_dpo/q_t": 0.4048069715499878,
|
|
"grad_norm": 9.588007926940918,
|
|
"learning_rate": 2.7507990434420123e-07,
|
|
"logits/chosen": 1.4854192733764648,
|
|
"logits/rejected": 1.303803563117981,
|
|
"logps/chosen": -130.6438446044922,
|
|
"logps/ref_chosen": -72.26289367675781,
|
|
"logps/ref_rejected": -106.36925506591797,
|
|
"logps/rejected": -198.39659118652344,
|
|
"loss": 1.1534,
|
|
"margin_dpo/margin_mean": 33.6463737487793,
|
|
"margin_dpo/margin_std": 63.119842529296875,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.5245653817082389,
|
|
"fcm_dpo/beta": 0.013037774711847305,
|
|
"fcm_dpo/delta": 0.005431188270449638,
|
|
"fcm_dpo/margin": 30.270421981811523,
|
|
"fcm_dpo/q_t": 0.41649848222732544,
|
|
"grad_norm": 10.595651626586914,
|
|
"learning_rate": 2.737640108260456e-07,
|
|
"logits/chosen": 1.8637452125549316,
|
|
"logits/rejected": 1.750800609588623,
|
|
"logps/chosen": -138.73822021484375,
|
|
"logps/ref_chosen": -71.19871520996094,
|
|
"logps/ref_rejected": -91.543212890625,
|
|
"logps/rejected": -189.35311889648438,
|
|
"loss": 1.1634,
|
|
"margin_dpo/margin_mean": 30.270421981811523,
|
|
"margin_dpo/margin_std": 58.804443359375,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 0.5260770975056689,
|
|
"fcm_dpo/beta": 0.012842783704400063,
|
|
"fcm_dpo/delta": -0.06106501445174217,
|
|
"fcm_dpo/margin": 35.64274597167969,
|
|
"fcm_dpo/q_t": 0.4025927186012268,
|
|
"grad_norm": 8.013747215270996,
|
|
"learning_rate": 2.724474525774229e-07,
|
|
"logits/chosen": 1.8773822784423828,
|
|
"logits/rejected": 1.792330265045166,
|
|
"logps/chosen": -130.76272583007812,
|
|
"logps/ref_chosen": -69.95603942871094,
|
|
"logps/ref_rejected": -83.64309692382812,
|
|
"logps/rejected": -180.092529296875,
|
|
"loss": 1.1057,
|
|
"margin_dpo/margin_mean": 35.64274215698242,
|
|
"margin_dpo/margin_std": 58.040496826171875,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.527588813303099,
|
|
"fcm_dpo/beta": 0.012860096991062164,
|
|
"fcm_dpo/delta": -0.034538384526968,
|
|
"fcm_dpo/margin": 33.63842010498047,
|
|
"fcm_dpo/q_t": 0.4073810279369354,
|
|
"grad_norm": 8.36850643157959,
|
|
"learning_rate": 2.711302664252973e-07,
|
|
"logits/chosen": 1.7388920783996582,
|
|
"logits/rejected": 1.5637342929840088,
|
|
"logps/chosen": -130.26393127441406,
|
|
"logps/ref_chosen": -70.71857452392578,
|
|
"logps/ref_rejected": -99.93263244628906,
|
|
"logps/rejected": -193.11642456054688,
|
|
"loss": 1.1455,
|
|
"margin_dpo/margin_mean": 33.63842010498047,
|
|
"margin_dpo/margin_std": 61.37939453125,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 0.5291005291005291,
|
|
"fcm_dpo/beta": 0.012324045412242413,
|
|
"fcm_dpo/delta": -0.20469069480895996,
|
|
"fcm_dpo/margin": 47.97602081298828,
|
|
"fcm_dpo/q_t": 0.3688136339187622,
|
|
"grad_norm": 8.611337661743164,
|
|
"learning_rate": 2.698124892141971e-07,
|
|
"logits/chosen": 1.7097694873809814,
|
|
"logits/rejected": 1.5507686138153076,
|
|
"logps/chosen": -139.9197540283203,
|
|
"logps/ref_chosen": -78.16873168945312,
|
|
"logps/ref_rejected": -104.84308624267578,
|
|
"logps/rejected": -214.57012939453125,
|
|
"loss": 0.9812,
|
|
"margin_dpo/margin_mean": 47.976016998291016,
|
|
"margin_dpo/margin_std": 54.40205383300781,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.5306122448979592,
|
|
"fcm_dpo/beta": 0.012206001207232475,
|
|
"fcm_dpo/delta": -0.014462406747043133,
|
|
"fcm_dpo/margin": 33.90068817138672,
|
|
"fcm_dpo/q_t": 0.40708887577056885,
|
|
"grad_norm": 9.900226593017578,
|
|
"learning_rate": 2.6849415780518357e-07,
|
|
"logits/chosen": 1.5461342334747314,
|
|
"logits/rejected": 1.3549315929412842,
|
|
"logps/chosen": -138.26388549804688,
|
|
"logps/ref_chosen": -71.79151916503906,
|
|
"logps/ref_rejected": -97.04634094238281,
|
|
"logps/rejected": -197.41940307617188,
|
|
"loss": 1.1573,
|
|
"margin_dpo/margin_mean": 33.90068817138672,
|
|
"margin_dpo/margin_std": 63.30729293823242,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 0.5321239606953893,
|
|
"fcm_dpo/beta": 0.012208282947540283,
|
|
"fcm_dpo/delta": 0.02938525378704071,
|
|
"fcm_dpo/margin": 30.417400360107422,
|
|
"fcm_dpo/q_t": 0.4205033779144287,
|
|
"grad_norm": 8.994142532348633,
|
|
"learning_rate": 2.6717530907482024e-07,
|
|
"logits/chosen": 1.763619065284729,
|
|
"logits/rejected": 1.622790813446045,
|
|
"logps/chosen": -151.24343872070312,
|
|
"logps/ref_chosen": -80.86544799804688,
|
|
"logps/ref_rejected": -102.02129364013672,
|
|
"logps/rejected": -202.81666564941406,
|
|
"loss": 1.1736,
|
|
"margin_dpo/margin_mean": 30.417404174804688,
|
|
"margin_dpo/margin_std": 60.021575927734375,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.5336356764928194,
|
|
"fcm_dpo/beta": 0.012203685939311981,
|
|
"fcm_dpo/delta": -0.062111545354127884,
|
|
"fcm_dpo/margin": 37.6354866027832,
|
|
"fcm_dpo/q_t": 0.39819955825805664,
|
|
"grad_norm": 8.61074447631836,
|
|
"learning_rate": 2.658559799141411e-07,
|
|
"logits/chosen": 1.6032516956329346,
|
|
"logits/rejected": 1.6155762672424316,
|
|
"logps/chosen": -138.62901306152344,
|
|
"logps/ref_chosen": -84.77235412597656,
|
|
"logps/ref_rejected": -86.77130889892578,
|
|
"logps/rejected": -178.26345825195312,
|
|
"loss": 1.1042,
|
|
"margin_dpo/margin_mean": 37.6354866027832,
|
|
"margin_dpo/margin_std": 60.413963317871094,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 0.5351473922902494,
|
|
"fcm_dpo/beta": 0.01194113027304411,
|
|
"fcm_dpo/delta": -0.08657968789339066,
|
|
"fcm_dpo/margin": 40.35693359375,
|
|
"fcm_dpo/q_t": 0.39217013120651245,
|
|
"grad_norm": 7.400528907775879,
|
|
"learning_rate": 2.6453620722761895e-07,
|
|
"logits/chosen": 1.9535037279129028,
|
|
"logits/rejected": 1.5858149528503418,
|
|
"logps/chosen": -109.94905090332031,
|
|
"logps/ref_chosen": -54.33562088012695,
|
|
"logps/ref_rejected": -92.4120101928711,
|
|
"logps/rejected": -188.38235473632812,
|
|
"loss": 1.0929,
|
|
"margin_dpo/margin_mean": 40.35693359375,
|
|
"margin_dpo/margin_std": 63.149314880371094,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.5366591080876795,
|
|
"fcm_dpo/beta": 0.011824669316411018,
|
|
"fcm_dpo/delta": -0.07782451808452606,
|
|
"fcm_dpo/margin": 40.10158920288086,
|
|
"fcm_dpo/q_t": 0.3945278525352478,
|
|
"grad_norm": 6.816810131072998,
|
|
"learning_rate": 2.632160279321328e-07,
|
|
"logits/chosen": 1.802927017211914,
|
|
"logits/rejected": 1.4767255783081055,
|
|
"logps/chosen": -124.52835845947266,
|
|
"logps/ref_chosen": -61.8388671875,
|
|
"logps/ref_rejected": -98.65571594238281,
|
|
"logps/rejected": -201.44680786132812,
|
|
"loss": 1.0844,
|
|
"margin_dpo/margin_mean": 40.10158920288086,
|
|
"margin_dpo/margin_std": 59.98944854736328,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.5381708238851096,
|
|
"fcm_dpo/beta": 0.011599002406001091,
|
|
"fcm_dpo/delta": -0.050096526741981506,
|
|
"fcm_dpo/margin": 38.54914474487305,
|
|
"fcm_dpo/q_t": 0.4025239646434784,
|
|
"grad_norm": 9.990870475769043,
|
|
"learning_rate": 2.618954789559356e-07,
|
|
"logits/chosen": 1.7717761993408203,
|
|
"logits/rejected": 1.5829424858093262,
|
|
"logps/chosen": -113.30570983886719,
|
|
"logps/ref_chosen": -63.92546463012695,
|
|
"logps/ref_rejected": -89.682861328125,
|
|
"logps/rejected": -177.61224365234375,
|
|
"loss": 1.15,
|
|
"margin_dpo/margin_mean": 38.54914474487305,
|
|
"margin_dpo/margin_std": 72.21054077148438,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.5396825396825397,
|
|
"fcm_dpo/beta": 0.011380214244127274,
|
|
"fcm_dpo/delta": -0.1073249951004982,
|
|
"fcm_dpo/margin": 33.18156051635742,
|
|
"fcm_dpo/q_t": 0.4151083827018738,
|
|
"grad_norm": 7.118601322174072,
|
|
"learning_rate": 2.6057459723762076e-07,
|
|
"logits/chosen": 1.7753883600234985,
|
|
"logits/rejected": 1.6539323329925537,
|
|
"logps/chosen": -143.6602783203125,
|
|
"logps/ref_chosen": -81.07588958740234,
|
|
"logps/ref_rejected": -85.06967163085938,
|
|
"logps/rejected": -180.83560180664062,
|
|
"loss": 1.1537,
|
|
"margin_dpo/margin_mean": 33.181556701660156,
|
|
"margin_dpo/margin_std": 57.43933868408203,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 0.5411942554799698,
|
|
"fcm_dpo/beta": 0.011224227026104927,
|
|
"fcm_dpo/delta": -0.10705891996622086,
|
|
"fcm_dpo/margin": 44.70811462402344,
|
|
"fcm_dpo/q_t": 0.3878515660762787,
|
|
"grad_norm": 8.951573371887207,
|
|
"learning_rate": 2.5925341972508954e-07,
|
|
"logits/chosen": 1.4298584461212158,
|
|
"logits/rejected": 1.4541399478912354,
|
|
"logps/chosen": -139.0441436767578,
|
|
"logps/ref_chosen": -84.09109497070312,
|
|
"logps/ref_rejected": -85.07244873046875,
|
|
"logps/rejected": -184.73361206054688,
|
|
"loss": 1.0554,
|
|
"margin_dpo/margin_mean": 44.70811462402344,
|
|
"margin_dpo/margin_std": 61.42544937133789,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.5427059712773998,
|
|
"fcm_dpo/beta": 0.011128641664981842,
|
|
"fcm_dpo/delta": -0.009427734650671482,
|
|
"fcm_dpo/margin": 25.97496795654297,
|
|
"fcm_dpo/q_t": 0.43627357482910156,
|
|
"grad_norm": 7.67764949798584,
|
|
"learning_rate": 2.579319833745169e-07,
|
|
"logits/chosen": 1.1958760023117065,
|
|
"logits/rejected": 1.1496171951293945,
|
|
"logps/chosen": -139.93809509277344,
|
|
"logps/ref_chosen": -80.7490234375,
|
|
"logps/ref_rejected": -94.92911529541016,
|
|
"logps/rejected": -180.09317016601562,
|
|
"loss": 1.2185,
|
|
"margin_dpo/margin_mean": 25.97496795654297,
|
|
"margin_dpo/margin_std": 57.3719596862793,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 0.54421768707483,
|
|
"fcm_dpo/beta": 0.011084508150815964,
|
|
"fcm_dpo/delta": -0.0067586712539196014,
|
|
"fcm_dpo/margin": 36.662925720214844,
|
|
"fcm_dpo/q_t": 0.41188788414001465,
|
|
"grad_norm": 9.444145202636719,
|
|
"learning_rate": 2.5661032514931834e-07,
|
|
"logits/chosen": 1.6899638175964355,
|
|
"logits/rejected": 1.4466266632080078,
|
|
"logps/chosen": -139.62124633789062,
|
|
"logps/ref_chosen": -78.38681030273438,
|
|
"logps/ref_rejected": -109.68933868408203,
|
|
"logps/rejected": -207.58673095703125,
|
|
"loss": 1.1254,
|
|
"margin_dpo/margin_mean": 36.662925720214844,
|
|
"margin_dpo/margin_std": 61.32177734375,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.54572940287226,
|
|
"fcm_dpo/beta": 0.010927680879831314,
|
|
"fcm_dpo/delta": -0.11040109395980835,
|
|
"fcm_dpo/margin": 46.201229095458984,
|
|
"fcm_dpo/q_t": 0.3868357539176941,
|
|
"grad_norm": 10.33779525756836,
|
|
"learning_rate": 2.552884820191154e-07,
|
|
"logits/chosen": 1.3621007204055786,
|
|
"logits/rejected": 1.2309937477111816,
|
|
"logps/chosen": -130.75283813476562,
|
|
"logps/ref_chosen": -73.9055404663086,
|
|
"logps/ref_rejected": -89.8489990234375,
|
|
"logps/rejected": -192.89752197265625,
|
|
"loss": 1.0453,
|
|
"margin_dpo/margin_mean": 46.201229095458984,
|
|
"margin_dpo/margin_std": 61.137569427490234,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 0.54724111866969,
|
|
"fcm_dpo/beta": 0.010783391073346138,
|
|
"fcm_dpo/delta": -0.07165468484163284,
|
|
"fcm_dpo/margin": 32.059444427490234,
|
|
"fcm_dpo/q_t": 0.4229075610637665,
|
|
"grad_norm": 7.126445293426514,
|
|
"learning_rate": 2.53966490958702e-07,
|
|
"logits/chosen": 1.6053223609924316,
|
|
"logits/rejected": 1.2717453241348267,
|
|
"logps/chosen": -150.54673767089844,
|
|
"logps/ref_chosen": -82.32565307617188,
|
|
"logps/ref_rejected": -123.14100646972656,
|
|
"logps/rejected": -223.42153930664062,
|
|
"loss": 1.1834,
|
|
"margin_dpo/margin_mean": 32.059444427490234,
|
|
"margin_dpo/margin_std": 62.51380920410156,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.5487528344671202,
|
|
"fcm_dpo/beta": 0.010588840581476688,
|
|
"fcm_dpo/delta": -0.07053353637456894,
|
|
"fcm_dpo/margin": 44.12169647216797,
|
|
"fcm_dpo/q_t": 0.39325785636901855,
|
|
"grad_norm": 9.556055068969727,
|
|
"learning_rate": 2.526443889470099e-07,
|
|
"logits/chosen": 1.9458461999893188,
|
|
"logits/rejected": 1.5623431205749512,
|
|
"logps/chosen": -133.33316040039062,
|
|
"logps/ref_chosen": -66.05493927001953,
|
|
"logps/ref_rejected": -106.79598999023438,
|
|
"logps/rejected": -218.1959228515625,
|
|
"loss": 1.0552,
|
|
"margin_dpo/margin_mean": 44.12169647216797,
|
|
"margin_dpo/margin_std": 56.65771484375,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 0.5502645502645502,
|
|
"fcm_dpo/beta": 0.010463178157806396,
|
|
"fcm_dpo/delta": -0.044942498207092285,
|
|
"fcm_dpo/margin": 42.31687545776367,
|
|
"fcm_dpo/q_t": 0.40370985865592957,
|
|
"grad_norm": 6.737151622772217,
|
|
"learning_rate": 2.513222129660744e-07,
|
|
"logits/chosen": 1.4676833152770996,
|
|
"logits/rejected": 1.313372254371643,
|
|
"logps/chosen": -137.15481567382812,
|
|
"logps/ref_chosen": -76.38365173339844,
|
|
"logps/ref_rejected": -100.22221374511719,
|
|
"logps/rejected": -203.31024169921875,
|
|
"loss": 1.126,
|
|
"margin_dpo/margin_mean": 42.31687927246094,
|
|
"margin_dpo/margin_std": 73.18367004394531,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.5517762660619804,
|
|
"fcm_dpo/beta": 0.010423636063933372,
|
|
"fcm_dpo/delta": -0.021412841975688934,
|
|
"fcm_dpo/margin": 40.340911865234375,
|
|
"fcm_dpo/q_t": 0.403866708278656,
|
|
"grad_norm": 9.5303955078125,
|
|
"learning_rate": 2.5e-07,
|
|
"logits/chosen": 1.8161044120788574,
|
|
"logits/rejected": 1.81839120388031,
|
|
"logps/chosen": -129.7283172607422,
|
|
"logps/ref_chosen": -81.83399963378906,
|
|
"logps/ref_rejected": -89.06932830810547,
|
|
"logps/rejected": -177.3045654296875,
|
|
"loss": 1.0826,
|
|
"margin_dpo/margin_mean": 40.340911865234375,
|
|
"margin_dpo/margin_std": 52.74284744262695,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.5532879818594104,
|
|
"fcm_dpo/beta": 0.010475091636180878,
|
|
"fcm_dpo/delta": 0.034233205020427704,
|
|
"fcm_dpo/margin": 35.035865783691406,
|
|
"fcm_dpo/q_t": 0.41999638080596924,
|
|
"grad_norm": 7.062388896942139,
|
|
"learning_rate": 2.486777870339255e-07,
|
|
"logits/chosen": 1.525472640991211,
|
|
"logits/rejected": 1.503098964691162,
|
|
"logps/chosen": -128.90716552734375,
|
|
"logps/ref_chosen": -72.03398895263672,
|
|
"logps/ref_rejected": -83.65354919433594,
|
|
"logps/rejected": -175.56259155273438,
|
|
"loss": 1.1736,
|
|
"margin_dpo/margin_mean": 35.035865783691406,
|
|
"margin_dpo/margin_std": 69.0810546875,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.5547996976568406,
|
|
"fcm_dpo/beta": 0.010566307231783867,
|
|
"fcm_dpo/delta": 0.06605655699968338,
|
|
"fcm_dpo/margin": 31.818260192871094,
|
|
"fcm_dpo/q_t": 0.4226842224597931,
|
|
"grad_norm": 7.502590656280518,
|
|
"learning_rate": 2.4735561105299014e-07,
|
|
"logits/chosen": 1.4546136856079102,
|
|
"logits/rejected": 1.2036893367767334,
|
|
"logps/chosen": -138.4822540283203,
|
|
"logps/ref_chosen": -72.39828491210938,
|
|
"logps/ref_rejected": -95.58364868164062,
|
|
"logps/rejected": -193.4858856201172,
|
|
"loss": 1.1717,
|
|
"margin_dpo/margin_mean": 31.818260192871094,
|
|
"margin_dpo/margin_std": 59.4080810546875,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 0.5563114134542706,
|
|
"fcm_dpo/beta": 0.010582388378679752,
|
|
"fcm_dpo/delta": -0.018604513257741928,
|
|
"fcm_dpo/margin": 39.4763298034668,
|
|
"fcm_dpo/q_t": 0.40766745805740356,
|
|
"grad_norm": 7.9668803215026855,
|
|
"learning_rate": 2.46033509041298e-07,
|
|
"logits/chosen": 1.2730112075805664,
|
|
"logits/rejected": 1.2791517972946167,
|
|
"logps/chosen": -163.7975311279297,
|
|
"logps/ref_chosen": -90.12812042236328,
|
|
"logps/ref_rejected": -91.6636962890625,
|
|
"logps/rejected": -204.8094482421875,
|
|
"loss": 1.1212,
|
|
"margin_dpo/margin_mean": 39.47633361816406,
|
|
"margin_dpo/margin_std": 65.0750732421875,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.5578231292517006,
|
|
"fcm_dpo/beta": 0.010618243366479874,
|
|
"fcm_dpo/delta": 0.027943773195147514,
|
|
"fcm_dpo/margin": 26.496131896972656,
|
|
"fcm_dpo/q_t": 0.4377315640449524,
|
|
"grad_norm": 9.919585227966309,
|
|
"learning_rate": 2.447115179808846e-07,
|
|
"logits/chosen": 1.5244429111480713,
|
|
"logits/rejected": 1.434938669204712,
|
|
"logps/chosen": -138.58261108398438,
|
|
"logps/ref_chosen": -71.29417419433594,
|
|
"logps/ref_rejected": -99.03875732421875,
|
|
"logps/rejected": -192.82333374023438,
|
|
"loss": 1.2296,
|
|
"margin_dpo/margin_mean": 26.49612808227539,
|
|
"margin_dpo/margin_std": 62.91883850097656,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 0.5593348450491308,
|
|
"fcm_dpo/beta": 0.010387836024165154,
|
|
"fcm_dpo/delta": -0.14537188410758972,
|
|
"fcm_dpo/margin": 51.689735412597656,
|
|
"fcm_dpo/q_t": 0.3806115984916687,
|
|
"grad_norm": 8.526147842407227,
|
|
"learning_rate": 2.4338967485068164e-07,
|
|
"logits/chosen": 1.9894087314605713,
|
|
"logits/rejected": 1.8496686220169067,
|
|
"logps/chosen": -132.82601928710938,
|
|
"logps/ref_chosen": -69.14627075195312,
|
|
"logps/ref_rejected": -93.58651733398438,
|
|
"logps/rejected": -208.95599365234375,
|
|
"loss": 1.0493,
|
|
"margin_dpo/margin_mean": 51.689735412597656,
|
|
"margin_dpo/margin_std": 72.51553344726562,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.5608465608465608,
|
|
"fcm_dpo/beta": 0.010324855335056782,
|
|
"fcm_dpo/delta": -0.04348411411046982,
|
|
"fcm_dpo/margin": 42.744075775146484,
|
|
"fcm_dpo/q_t": 0.40137621760368347,
|
|
"grad_norm": 15.825779914855957,
|
|
"learning_rate": 2.420680166254831e-07,
|
|
"logits/chosen": 2.0278491973876953,
|
|
"logits/rejected": 1.9837315082550049,
|
|
"logps/chosen": -133.10084533691406,
|
|
"logps/ref_chosen": -65.76728820800781,
|
|
"logps/ref_rejected": -79.9320068359375,
|
|
"logps/rejected": -190.0096435546875,
|
|
"loss": 1.115,
|
|
"margin_dpo/margin_mean": 42.74407958984375,
|
|
"margin_dpo/margin_std": 69.81803894042969,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 0.562358276643991,
|
|
"fcm_dpo/beta": 0.010167205706238747,
|
|
"fcm_dpo/delta": -0.08461183309555054,
|
|
"fcm_dpo/margin": 34.201377868652344,
|
|
"fcm_dpo/q_t": 0.4236924350261688,
|
|
"grad_norm": 11.217028617858887,
|
|
"learning_rate": 2.4074658027491044e-07,
|
|
"logits/chosen": 1.6819504499435425,
|
|
"logits/rejected": 1.464050054550171,
|
|
"logps/chosen": -131.26275634765625,
|
|
"logps/ref_chosen": -69.97252655029297,
|
|
"logps/ref_rejected": -92.38316345214844,
|
|
"logps/rejected": -187.87478637695312,
|
|
"loss": 1.2092,
|
|
"margin_dpo/margin_mean": 34.201377868652344,
|
|
"margin_dpo/margin_std": 73.22529602050781,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.563869992441421,
|
|
"fcm_dpo/beta": 0.01011381484568119,
|
|
"fcm_dpo/delta": 0.01528643537312746,
|
|
"fcm_dpo/margin": 38.09349822998047,
|
|
"fcm_dpo/q_t": 0.41596657037734985,
|
|
"grad_norm": 6.146734237670898,
|
|
"learning_rate": 2.394254027623792e-07,
|
|
"logits/chosen": 1.6164660453796387,
|
|
"logits/rejected": 1.3731865882873535,
|
|
"logps/chosen": -157.2783203125,
|
|
"logps/ref_chosen": -79.34700012207031,
|
|
"logps/ref_rejected": -95.69737243652344,
|
|
"logps/rejected": -211.72218322753906,
|
|
"loss": 1.1663,
|
|
"margin_dpo/margin_mean": 38.093502044677734,
|
|
"margin_dpo/margin_std": 73.8922119140625,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 0.5653817082388511,
|
|
"fcm_dpo/beta": 0.0098224813118577,
|
|
"fcm_dpo/delta": -0.1968994438648224,
|
|
"fcm_dpo/margin": 59.59770965576172,
|
|
"fcm_dpo/q_t": 0.36753708124160767,
|
|
"grad_norm": 10.473079681396484,
|
|
"learning_rate": 2.381045210440644e-07,
|
|
"logits/chosen": 1.605952262878418,
|
|
"logits/rejected": 1.594057559967041,
|
|
"logps/chosen": -160.69447326660156,
|
|
"logps/ref_chosen": -93.45108032226562,
|
|
"logps/ref_rejected": -93.575927734375,
|
|
"logps/rejected": -220.41702270507812,
|
|
"loss": 0.9881,
|
|
"margin_dpo/margin_mean": 59.59770965576172,
|
|
"margin_dpo/margin_std": 68.90078735351562,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.5668934240362812,
|
|
"fcm_dpo/beta": 0.009714547544717789,
|
|
"fcm_dpo/delta": 0.006051559001207352,
|
|
"fcm_dpo/margin": 40.572608947753906,
|
|
"fcm_dpo/q_t": 0.4105721712112427,
|
|
"grad_norm": 8.553228378295898,
|
|
"learning_rate": 2.3678397206786715e-07,
|
|
"logits/chosen": 1.466545581817627,
|
|
"logits/rejected": 1.3870232105255127,
|
|
"logps/chosen": -130.58074951171875,
|
|
"logps/ref_chosen": -77.37177276611328,
|
|
"logps/ref_rejected": -98.59054565429688,
|
|
"logps/rejected": -192.37213134765625,
|
|
"loss": 1.1341,
|
|
"margin_dpo/margin_mean": 40.572608947753906,
|
|
"margin_dpo/margin_std": 68.16059112548828,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.5684051398337112,
|
|
"fcm_dpo/beta": 0.009592314250767231,
|
|
"fcm_dpo/delta": -0.0968877449631691,
|
|
"fcm_dpo/margin": 51.301605224609375,
|
|
"fcm_dpo/q_t": 0.39151814579963684,
|
|
"grad_norm": 6.652123928070068,
|
|
"learning_rate": 2.3546379277238103e-07,
|
|
"logits/chosen": 1.765944480895996,
|
|
"logits/rejected": 1.5501282215118408,
|
|
"logps/chosen": -136.59339904785156,
|
|
"logps/ref_chosen": -68.99790954589844,
|
|
"logps/ref_rejected": -90.37117004394531,
|
|
"logps/rejected": -209.2682647705078,
|
|
"loss": 1.0789,
|
|
"margin_dpo/margin_mean": 51.301605224609375,
|
|
"margin_dpo/margin_std": 77.24393463134766,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 0.5699168556311414,
|
|
"fcm_dpo/beta": 0.009752610698342323,
|
|
"fcm_dpo/delta": 0.1289438158273697,
|
|
"fcm_dpo/margin": 28.12000846862793,
|
|
"fcm_dpo/q_t": 0.439037561416626,
|
|
"grad_norm": 8.08517074584961,
|
|
"learning_rate": 2.3414402008585886e-07,
|
|
"logits/chosen": 1.6637239456176758,
|
|
"logits/rejected": 1.628348469734192,
|
|
"logps/chosen": -143.10202026367188,
|
|
"logps/ref_chosen": -64.22705841064453,
|
|
"logps/ref_rejected": -73.10292053222656,
|
|
"logps/rejected": -180.09786987304688,
|
|
"loss": 1.2252,
|
|
"margin_dpo/margin_mean": 28.12000846862793,
|
|
"margin_dpo/margin_std": 64.73475646972656,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 0.5714285714285714,
|
|
"fcm_dpo/beta": 0.009980445727705956,
|
|
"fcm_dpo/delta": 0.10566462576389313,
|
|
"fcm_dpo/margin": 29.759580612182617,
|
|
"fcm_dpo/q_t": 0.43272238969802856,
|
|
"grad_norm": 11.20825481414795,
|
|
"learning_rate": 2.3282469092517977e-07,
|
|
"logits/chosen": 1.4938591718673706,
|
|
"logits/rejected": 1.3901243209838867,
|
|
"logps/chosen": -152.7470703125,
|
|
"logps/ref_chosen": -76.90864562988281,
|
|
"logps/ref_rejected": -90.53460693359375,
|
|
"logps/rejected": -196.13259887695312,
|
|
"loss": 1.2127,
|
|
"margin_dpo/margin_mean": 29.759580612182617,
|
|
"margin_dpo/margin_std": 65.59465789794922,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 0.5729402872260015,
|
|
"fcm_dpo/beta": 0.00992667581886053,
|
|
"fcm_dpo/delta": -0.06411861628293991,
|
|
"fcm_dpo/margin": 46.461585998535156,
|
|
"fcm_dpo/q_t": 0.3969395160675049,
|
|
"grad_norm": 10.400646209716797,
|
|
"learning_rate": 2.3150584219481643e-07,
|
|
"logits/chosen": 1.529576063156128,
|
|
"logits/rejected": 1.3459718227386475,
|
|
"logps/chosen": -159.71408081054688,
|
|
"logps/ref_chosen": -91.2371597290039,
|
|
"logps/ref_rejected": -120.1969985961914,
|
|
"logps/rejected": -235.135498046875,
|
|
"loss": 1.0854,
|
|
"margin_dpo/margin_mean": 46.461585998535156,
|
|
"margin_dpo/margin_std": 68.9682388305664,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 0.5744520030234316,
|
|
"fcm_dpo/beta": 0.009684507735073566,
|
|
"fcm_dpo/delta": -0.12210749089717865,
|
|
"fcm_dpo/margin": 53.237144470214844,
|
|
"fcm_dpo/q_t": 0.38306939601898193,
|
|
"grad_norm": 6.27390718460083,
|
|
"learning_rate": 2.3018751078580283e-07,
|
|
"logits/chosen": 1.5858521461486816,
|
|
"logits/rejected": 1.4949119091033936,
|
|
"logps/chosen": -135.59014892578125,
|
|
"logps/ref_chosen": -77.78315734863281,
|
|
"logps/ref_rejected": -92.56083679199219,
|
|
"logps/rejected": -203.60496520996094,
|
|
"loss": 1.0686,
|
|
"margin_dpo/margin_mean": 53.237144470214844,
|
|
"margin_dpo/margin_std": 78.18292236328125,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.5759637188208617,
|
|
"fcm_dpo/beta": 0.009657653979957104,
|
|
"fcm_dpo/delta": 0.018920322880148888,
|
|
"fcm_dpo/margin": 24.543724060058594,
|
|
"fcm_dpo/q_t": 0.4460574686527252,
|
|
"grad_norm": 9.84516429901123,
|
|
"learning_rate": 2.288697335747027e-07,
|
|
"logits/chosen": 1.6163301467895508,
|
|
"logits/rejected": 1.5747904777526855,
|
|
"logps/chosen": -155.70779418945312,
|
|
"logps/ref_chosen": -75.28189086914062,
|
|
"logps/ref_rejected": -81.1995849609375,
|
|
"logps/rejected": -186.16920471191406,
|
|
"loss": 1.2755,
|
|
"margin_dpo/margin_mean": 24.543724060058594,
|
|
"margin_dpo/margin_std": 70.27555847167969,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 0.5774754346182918,
|
|
"fcm_dpo/beta": 0.009639125317335129,
|
|
"fcm_dpo/delta": -0.03817974030971527,
|
|
"fcm_dpo/margin": 33.828857421875,
|
|
"fcm_dpo/q_t": 0.42681363224983215,
|
|
"grad_norm": 12.585222244262695,
|
|
"learning_rate": 2.2755254742257706e-07,
|
|
"logits/chosen": 1.5320096015930176,
|
|
"logits/rejected": 1.3989049196243286,
|
|
"logps/chosen": -159.08447265625,
|
|
"logps/ref_chosen": -78.74870300292969,
|
|
"logps/ref_rejected": -99.77484130859375,
|
|
"logps/rejected": -213.93946838378906,
|
|
"loss": 1.1735,
|
|
"margin_dpo/margin_mean": 33.828857421875,
|
|
"margin_dpo/margin_std": 61.3028450012207,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 0.5789871504157218,
|
|
"fcm_dpo/beta": 0.009590652771294117,
|
|
"fcm_dpo/delta": -0.00014057103544473648,
|
|
"fcm_dpo/margin": 41.718406677246094,
|
|
"fcm_dpo/q_t": 0.41220152378082275,
|
|
"grad_norm": 6.8291192054748535,
|
|
"learning_rate": 2.2623598917395436e-07,
|
|
"logits/chosen": 1.277235984802246,
|
|
"logits/rejected": 1.402155876159668,
|
|
"logps/chosen": -168.30319213867188,
|
|
"logps/ref_chosen": -95.92772674560547,
|
|
"logps/ref_rejected": -92.13604736328125,
|
|
"logps/rejected": -206.22991943359375,
|
|
"loss": 1.1432,
|
|
"margin_dpo/margin_mean": 41.718406677246094,
|
|
"margin_dpo/margin_std": 74.20654296875,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 0.5804988662131519,
|
|
"fcm_dpo/beta": 0.00961991772055626,
|
|
"fcm_dpo/delta": 0.013582942076027393,
|
|
"fcm_dpo/margin": 40.22323989868164,
|
|
"fcm_dpo/q_t": 0.41252365708351135,
|
|
"grad_norm": 8.104625701904297,
|
|
"learning_rate": 2.2492009565579875e-07,
|
|
"logits/chosen": 1.8011313676834106,
|
|
"logits/rejected": 1.7203788757324219,
|
|
"logps/chosen": -156.7716064453125,
|
|
"logps/ref_chosen": -80.208984375,
|
|
"logps/ref_rejected": -94.39380645751953,
|
|
"logps/rejected": -211.17965698242188,
|
|
"loss": 1.1359,
|
|
"margin_dpo/margin_mean": 40.22323989868164,
|
|
"margin_dpo/margin_std": 68.09140014648438,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 0.582010582010582,
|
|
"fcm_dpo/beta": 0.009620234370231628,
|
|
"fcm_dpo/delta": -0.018791217356920242,
|
|
"fcm_dpo/margin": 43.446475982666016,
|
|
"fcm_dpo/q_t": 0.4067288041114807,
|
|
"grad_norm": 13.1287260055542,
|
|
"learning_rate": 2.2360490367648084e-07,
|
|
"logits/chosen": 1.3702280521392822,
|
|
"logits/rejected": 1.2520372867584229,
|
|
"logps/chosen": -159.83053588867188,
|
|
"logps/ref_chosen": -85.26632690429688,
|
|
"logps/ref_rejected": -102.1983413696289,
|
|
"logps/rejected": -220.20901489257812,
|
|
"loss": 1.1143,
|
|
"margin_dpo/margin_mean": 43.44647979736328,
|
|
"margin_dpo/margin_std": 69.08692932128906,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.5835222978080121,
|
|
"fcm_dpo/beta": 0.009717528708279133,
|
|
"fcm_dpo/delta": 0.0649806335568428,
|
|
"fcm_dpo/margin": 21.092330932617188,
|
|
"fcm_dpo/q_t": 0.45253363251686096,
|
|
"grad_norm": 12.614625930786133,
|
|
"learning_rate": 2.2229045002474724e-07,
|
|
"logits/chosen": 1.76641845703125,
|
|
"logits/rejected": 1.5830888748168945,
|
|
"logps/chosen": -182.44427490234375,
|
|
"logps/ref_chosen": -93.19975280761719,
|
|
"logps/ref_rejected": -112.98831176757812,
|
|
"logps/rejected": -223.32516479492188,
|
|
"loss": 1.296,
|
|
"margin_dpo/margin_mean": 21.092330932617188,
|
|
"margin_dpo/margin_std": 68.20382690429688,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 0.5850340136054422,
|
|
"fcm_dpo/beta": 0.00968782976269722,
|
|
"fcm_dpo/delta": -0.054171886295080185,
|
|
"fcm_dpo/margin": 46.604862213134766,
|
|
"fcm_dpo/q_t": 0.40007221698760986,
|
|
"grad_norm": 7.663426399230957,
|
|
"learning_rate": 2.209767714686924e-07,
|
|
"logits/chosen": 1.5394854545593262,
|
|
"logits/rejected": 1.3090683221817017,
|
|
"logps/chosen": -142.0372772216797,
|
|
"logps/ref_chosen": -66.32861328125,
|
|
"logps/ref_rejected": -100.56486511230469,
|
|
"logps/rejected": -222.87838745117188,
|
|
"loss": 1.0958,
|
|
"margin_dpo/margin_mean": 46.604862213134766,
|
|
"margin_dpo/margin_std": 70.9585189819336,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 0.5865457294028723,
|
|
"fcm_dpo/beta": 0.009683318436145782,
|
|
"fcm_dpo/delta": 0.03653840348124504,
|
|
"fcm_dpo/margin": 26.106887817382812,
|
|
"fcm_dpo/q_t": 0.4437592029571533,
|
|
"grad_norm": 13.81177043914795,
|
|
"learning_rate": 2.1966390475472954e-07,
|
|
"logits/chosen": 1.5186080932617188,
|
|
"logits/rejected": 1.5088022947311401,
|
|
"logps/chosen": -176.24365234375,
|
|
"logps/ref_chosen": -92.95967864990234,
|
|
"logps/ref_rejected": -97.9437255859375,
|
|
"logps/rejected": -207.33456420898438,
|
|
"loss": 1.2804,
|
|
"margin_dpo/margin_mean": 26.106887817382812,
|
|
"margin_dpo/margin_std": 76.65711975097656,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 0.5880574452003023,
|
|
"fcm_dpo/beta": 0.009643949568271637,
|
|
"fcm_dpo/delta": -0.03215987980365753,
|
|
"fcm_dpo/margin": 44.669090270996094,
|
|
"fcm_dpo/q_t": 0.40315672755241394,
|
|
"grad_norm": 9.2018404006958,
|
|
"learning_rate": 2.1835188660656265e-07,
|
|
"logits/chosen": 1.5738117694854736,
|
|
"logits/rejected": 1.4677269458770752,
|
|
"logps/chosen": -144.96087646484375,
|
|
"logps/ref_chosen": -76.89031982421875,
|
|
"logps/ref_rejected": -93.79212951660156,
|
|
"logps/rejected": -206.53176879882812,
|
|
"loss": 1.1067,
|
|
"margin_dpo/margin_mean": 44.669090270996094,
|
|
"margin_dpo/margin_std": 69.94828796386719,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 0.5895691609977324,
|
|
"fcm_dpo/beta": 0.009584802202880383,
|
|
"fcm_dpo/delta": -0.008903810754418373,
|
|
"fcm_dpo/margin": 42.60907745361328,
|
|
"fcm_dpo/q_t": 0.40758416056632996,
|
|
"grad_norm": 7.528814792633057,
|
|
"learning_rate": 2.170407537241599e-07,
|
|
"logits/chosen": 1.746974229812622,
|
|
"logits/rejected": 1.6056780815124512,
|
|
"logps/chosen": -115.68708038330078,
|
|
"logps/ref_chosen": -61.058815002441406,
|
|
"logps/ref_rejected": -79.55152893066406,
|
|
"logps/rejected": -176.7888641357422,
|
|
"loss": 1.11,
|
|
"margin_dpo/margin_mean": 42.60907745361328,
|
|
"margin_dpo/margin_std": 65.27470397949219,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.5910808767951625,
|
|
"fcm_dpo/beta": 0.009371999651193619,
|
|
"fcm_dpo/delta": -0.15121221542358398,
|
|
"fcm_dpo/margin": 57.90020751953125,
|
|
"fcm_dpo/q_t": 0.3787153959274292,
|
|
"grad_norm": 7.222849369049072,
|
|
"learning_rate": 2.1573054278272636e-07,
|
|
"logits/chosen": 1.3327152729034424,
|
|
"logits/rejected": 1.2173633575439453,
|
|
"logps/chosen": -140.46063232421875,
|
|
"logps/ref_chosen": -78.60820770263672,
|
|
"logps/ref_rejected": -103.3367691040039,
|
|
"logps/rejected": -223.08938598632812,
|
|
"loss": 1.0448,
|
|
"margin_dpo/margin_mean": 57.900211334228516,
|
|
"margin_dpo/margin_std": 80.29869842529297,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 0.5925925925925926,
|
|
"fcm_dpo/beta": 0.009207520633935928,
|
|
"fcm_dpo/delta": -0.12280896306037903,
|
|
"fcm_dpo/margin": 56.0882568359375,
|
|
"fcm_dpo/q_t": 0.3837066888809204,
|
|
"grad_norm": 8.607046127319336,
|
|
"learning_rate": 2.1442129043167873e-07,
|
|
"logits/chosen": 1.732191801071167,
|
|
"logits/rejected": 1.638712763786316,
|
|
"logps/chosen": -139.175537109375,
|
|
"logps/ref_chosen": -86.99468994140625,
|
|
"logps/ref_rejected": -112.73616027832031,
|
|
"logps/rejected": -221.00526428222656,
|
|
"loss": 1.0457,
|
|
"margin_dpo/margin_mean": 56.088260650634766,
|
|
"margin_dpo/margin_std": 74.01065063476562,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 0.5941043083900227,
|
|
"fcm_dpo/beta": 0.009078966453671455,
|
|
"fcm_dpo/delta": 0.0228070467710495,
|
|
"fcm_dpo/margin": 41.60166931152344,
|
|
"fcm_dpo/q_t": 0.41527247428894043,
|
|
"grad_norm": 5.811827182769775,
|
|
"learning_rate": 2.131130332936195e-07,
|
|
"logits/chosen": 1.8474013805389404,
|
|
"logits/rejected": 1.7108526229858398,
|
|
"logps/chosen": -136.55609130859375,
|
|
"logps/ref_chosen": -71.26398468017578,
|
|
"logps/ref_rejected": -88.99722290039062,
|
|
"logps/rejected": -195.89100646972656,
|
|
"loss": 1.1339,
|
|
"margin_dpo/margin_mean": 41.60166931152344,
|
|
"margin_dpo/margin_std": 67.7447509765625,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 0.5956160241874527,
|
|
"fcm_dpo/beta": 0.00911475345492363,
|
|
"fcm_dpo/delta": -0.02656731568276882,
|
|
"fcm_dpo/margin": 46.67322540283203,
|
|
"fcm_dpo/q_t": 0.4027498960494995,
|
|
"grad_norm": 7.995791435241699,
|
|
"learning_rate": 2.1180580796331323e-07,
|
|
"logits/chosen": 1.8606326580047607,
|
|
"logits/rejected": 1.759063720703125,
|
|
"logps/chosen": -133.31561279296875,
|
|
"logps/ref_chosen": -78.70564270019531,
|
|
"logps/ref_rejected": -87.01431274414062,
|
|
"logps/rejected": -188.29750061035156,
|
|
"loss": 1.0838,
|
|
"margin_dpo/margin_mean": 46.6732292175293,
|
|
"margin_dpo/margin_std": 63.45305633544922,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 0.5971277399848829,
|
|
"fcm_dpo/beta": 0.009070505388081074,
|
|
"fcm_dpo/delta": -0.01608949899673462,
|
|
"fcm_dpo/margin": 32.70347213745117,
|
|
"fcm_dpo/q_t": 0.43361908197402954,
|
|
"grad_norm": 7.361412048339844,
|
|
"learning_rate": 2.104996510066625e-07,
|
|
"logits/chosen": 1.6011724472045898,
|
|
"logits/rejected": 1.3548979759216309,
|
|
"logps/chosen": -120.16991424560547,
|
|
"logps/ref_chosen": -65.30274963378906,
|
|
"logps/ref_rejected": -93.22492218017578,
|
|
"logps/rejected": -180.79556274414062,
|
|
"loss": 1.2047,
|
|
"margin_dpo/margin_mean": 32.70347213745117,
|
|
"margin_dpo/margin_std": 67.76264953613281,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.5986394557823129,
|
|
"fcm_dpo/beta": 0.009096162393689156,
|
|
"fcm_dpo/delta": 0.06893099844455719,
|
|
"fcm_dpo/margin": 36.61708068847656,
|
|
"fcm_dpo/q_t": 0.42313504219055176,
|
|
"grad_norm": 8.793717384338379,
|
|
"learning_rate": 2.0919459895968517e-07,
|
|
"logits/chosen": 1.633918046951294,
|
|
"logits/rejected": 1.4138092994689941,
|
|
"logps/chosen": -121.96156311035156,
|
|
"logps/ref_chosen": -67.33502197265625,
|
|
"logps/ref_rejected": -98.8193359375,
|
|
"logps/rejected": -190.06295776367188,
|
|
"loss": 1.1485,
|
|
"margin_dpo/margin_mean": 36.61708068847656,
|
|
"margin_dpo/margin_std": 57.59046173095703,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 0.600151171579743,
|
|
"fcm_dpo/beta": 0.009252132847905159,
|
|
"fcm_dpo/delta": 0.07621178776025772,
|
|
"fcm_dpo/margin": 24.135467529296875,
|
|
"fcm_dpo/q_t": 0.4461718797683716,
|
|
"grad_norm": 8.252684593200684,
|
|
"learning_rate": 2.078906883274924e-07,
|
|
"logits/chosen": 1.580981731414795,
|
|
"logits/rejected": 1.4645161628723145,
|
|
"logps/chosen": -145.71463012695312,
|
|
"logps/ref_chosen": -89.6042251586914,
|
|
"logps/ref_rejected": -104.9779052734375,
|
|
"logps/rejected": -185.22377014160156,
|
|
"loss": 1.2607,
|
|
"margin_dpo/margin_mean": 24.135465621948242,
|
|
"margin_dpo/margin_std": 64.16143798828125,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 0.6016628873771731,
|
|
"fcm_dpo/beta": 0.009271949529647827,
|
|
"fcm_dpo/delta": -0.04590045288205147,
|
|
"fcm_dpo/margin": 47.874114990234375,
|
|
"fcm_dpo/q_t": 0.40098968148231506,
|
|
"grad_norm": 5.616165637969971,
|
|
"learning_rate": 2.065879555832674e-07,
|
|
"logits/chosen": 1.6307227611541748,
|
|
"logits/rejected": 1.4722830057144165,
|
|
"logps/chosen": -110.94779205322266,
|
|
"logps/ref_chosen": -66.43465423583984,
|
|
"logps/ref_rejected": -90.90376281738281,
|
|
"logps/rejected": -183.291015625,
|
|
"loss": 1.0816,
|
|
"margin_dpo/margin_mean": 47.874114990234375,
|
|
"margin_dpo/margin_std": 67.85723876953125,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 0.6031746031746031,
|
|
"fcm_dpo/beta": 0.009118804708123207,
|
|
"fcm_dpo/delta": -0.05795277655124664,
|
|
"fcm_dpo/margin": 49.865936279296875,
|
|
"fcm_dpo/q_t": 0.39776691794395447,
|
|
"grad_norm": 14.43397045135498,
|
|
"learning_rate": 2.052864371672457e-07,
|
|
"logits/chosen": 1.5677590370178223,
|
|
"logits/rejected": 1.220775842666626,
|
|
"logps/chosen": -153.78182983398438,
|
|
"logps/ref_chosen": -87.22315979003906,
|
|
"logps/ref_rejected": -136.32411193847656,
|
|
"logps/rejected": -252.7487030029297,
|
|
"loss": 1.0823,
|
|
"margin_dpo/margin_mean": 49.865936279296875,
|
|
"margin_dpo/margin_std": 71.72140502929688,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 0.6046863189720333,
|
|
"fcm_dpo/beta": 0.009189041331410408,
|
|
"fcm_dpo/delta": 0.031679678708314896,
|
|
"fcm_dpo/margin": 27.590740203857422,
|
|
"fcm_dpo/q_t": 0.44303256273269653,
|
|
"grad_norm": 11.517251014709473,
|
|
"learning_rate": 2.0398616948569493e-07,
|
|
"logits/chosen": 1.9134583473205566,
|
|
"logits/rejected": 1.700062870979309,
|
|
"logps/chosen": -163.27114868164062,
|
|
"logps/ref_chosen": -91.1212158203125,
|
|
"logps/ref_rejected": -108.19235229492188,
|
|
"logps/rejected": -207.93301391601562,
|
|
"loss": 1.2381,
|
|
"margin_dpo/margin_mean": 27.590740203857422,
|
|
"margin_dpo/margin_std": 66.34977722167969,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.6046863189720333,
|
|
"eval_fcm_dpo/beta": 0.009189040400087833,
|
|
"eval_logits/chosen": 1.7609648704528809,
|
|
"eval_logits/rejected": 1.6208163499832153,
|
|
"eval_logps/chosen": -144.77313232421875,
|
|
"eval_logps/ref_chosen": -86.90177917480469,
|
|
"eval_logps/ref_rejected": -96.69639587402344,
|
|
"eval_logps/rejected": -194.42681884765625,
|
|
"eval_loss": 0.5745885968208313,
|
|
"eval_margin_dpo/margin_mean": 39.85907745361328,
|
|
"eval_margin_dpo/margin_std": 68.78260040283203,
|
|
"eval_runtime": 42.2968,
|
|
"eval_samples_per_second": 54.449,
|
|
"eval_steps_per_second": 1.702,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.6061980347694633,
|
|
"fcm_dpo/beta": 0.009086466394364834,
|
|
"fcm_dpo/delta": -0.08671090006828308,
|
|
"fcm_dpo/margin": 53.11298370361328,
|
|
"fcm_dpo/q_t": 0.39035192131996155,
|
|
"grad_norm": 6.894371509552002,
|
|
"learning_rate": 2.0268718890989752e-07,
|
|
"logits/chosen": 1.9491140842437744,
|
|
"logits/rejected": 1.7437255382537842,
|
|
"logps/chosen": -116.19009399414062,
|
|
"logps/ref_chosen": -67.54151153564453,
|
|
"logps/ref_rejected": -98.06488800048828,
|
|
"logps/rejected": -199.82644653320312,
|
|
"loss": 1.0545,
|
|
"margin_dpo/margin_mean": 53.11298370361328,
|
|
"margin_dpo/margin_std": 69.13746643066406,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 0.6077097505668935,
|
|
"fcm_dpo/beta": 0.008998140692710876,
|
|
"fcm_dpo/delta": -0.00021855533123016357,
|
|
"fcm_dpo/margin": 44.44822692871094,
|
|
"fcm_dpo/q_t": 0.40850377082824707,
|
|
"grad_norm": 15.925997734069824,
|
|
"learning_rate": 2.013895317751323e-07,
|
|
"logits/chosen": 1.445326328277588,
|
|
"logits/rejected": 1.4380943775177002,
|
|
"logps/chosen": -130.43907165527344,
|
|
"logps/ref_chosen": -77.44487762451172,
|
|
"logps/ref_rejected": -83.1333236694336,
|
|
"logps/rejected": -180.57574462890625,
|
|
"loss": 1.0932,
|
|
"margin_dpo/margin_mean": 44.44822692871094,
|
|
"margin_dpo/margin_std": 59.27261734008789,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 0.6092214663643235,
|
|
"fcm_dpo/beta": 0.008991338312625885,
|
|
"fcm_dpo/delta": -0.061279378831386566,
|
|
"fcm_dpo/margin": 50.972496032714844,
|
|
"fcm_dpo/q_t": 0.39812153577804565,
|
|
"grad_norm": 6.25055456161499,
|
|
"learning_rate": 2.0009323437965898e-07,
|
|
"logits/chosen": 1.9196542501449585,
|
|
"logits/rejected": 1.7053799629211426,
|
|
"logps/chosen": -129.6885528564453,
|
|
"logps/ref_chosen": -68.8230972290039,
|
|
"logps/ref_rejected": -99.82356262207031,
|
|
"logps/rejected": -211.66152954101562,
|
|
"loss": 1.0887,
|
|
"margin_dpo/margin_mean": 50.972496032714844,
|
|
"margin_dpo/margin_std": 75.49934387207031,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 0.6107331821617535,
|
|
"fcm_dpo/beta": 0.008772274479269981,
|
|
"fcm_dpo/delta": -0.05370837450027466,
|
|
"fcm_dpo/margin": 51.221717834472656,
|
|
"fcm_dpo/q_t": 0.3986782431602478,
|
|
"grad_norm": 6.705301761627197,
|
|
"learning_rate": 1.9879833298370237e-07,
|
|
"logits/chosen": 1.60830557346344,
|
|
"logits/rejected": 1.4156162738800049,
|
|
"logps/chosen": -134.65676879882812,
|
|
"logps/ref_chosen": -80.26783752441406,
|
|
"logps/ref_rejected": -111.60258483886719,
|
|
"logps/rejected": -217.2132110595703,
|
|
"loss": 1.0884,
|
|
"margin_dpo/margin_mean": 51.221717834472656,
|
|
"margin_dpo/margin_std": 72.74002075195312,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 0.6122448979591837,
|
|
"fcm_dpo/beta": 0.008734223432838917,
|
|
"fcm_dpo/delta": -0.05189571902155876,
|
|
"fcm_dpo/margin": 34.009098052978516,
|
|
"fcm_dpo/q_t": 0.4333046078681946,
|
|
"grad_norm": 8.25037956237793,
|
|
"learning_rate": 1.975048638084379e-07,
|
|
"logits/chosen": 1.7744567394256592,
|
|
"logits/rejected": 1.6257699728012085,
|
|
"logps/chosen": -130.64573669433594,
|
|
"logps/ref_chosen": -68.31065368652344,
|
|
"logps/ref_rejected": -81.56044006347656,
|
|
"logps/rejected": -177.9046173095703,
|
|
"loss": 1.1885,
|
|
"margin_dpo/margin_mean": 34.009098052978516,
|
|
"margin_dpo/margin_std": 61.043663024902344,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.6137566137566137,
|
|
"fcm_dpo/beta": 0.008658639155328274,
|
|
"fcm_dpo/delta": -0.0498778373003006,
|
|
"fcm_dpo/margin": 51.688690185546875,
|
|
"fcm_dpo/q_t": 0.3979532718658447,
|
|
"grad_norm": 7.853333950042725,
|
|
"learning_rate": 1.9621286303497914e-07,
|
|
"logits/chosen": 1.5959994792938232,
|
|
"logits/rejected": 1.2231396436691284,
|
|
"logps/chosen": -121.9815444946289,
|
|
"logps/ref_chosen": -64.86714935302734,
|
|
"logps/ref_rejected": -110.06051635742188,
|
|
"logps/rejected": -218.8636016845703,
|
|
"loss": 1.0798,
|
|
"margin_dpo/margin_mean": 51.688690185546875,
|
|
"margin_dpo/margin_std": 71.60235595703125,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 0.6152683295540439,
|
|
"fcm_dpo/beta": 0.008745891973376274,
|
|
"fcm_dpo/delta": 0.054477281868457794,
|
|
"fcm_dpo/margin": 39.68103790283203,
|
|
"fcm_dpo/q_t": 0.42161452770233154,
|
|
"grad_norm": 6.449524402618408,
|
|
"learning_rate": 1.9492236680336483e-07,
|
|
"logits/chosen": 1.6160037517547607,
|
|
"logits/rejected": 1.3780653476715088,
|
|
"logps/chosen": -187.60446166992188,
|
|
"logps/ref_chosen": -102.01712799072266,
|
|
"logps/ref_rejected": -121.53548431396484,
|
|
"logps/rejected": -246.80386352539062,
|
|
"loss": 1.14,
|
|
"margin_dpo/margin_mean": 39.68103790283203,
|
|
"margin_dpo/margin_std": 62.7738037109375,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 0.6167800453514739,
|
|
"fcm_dpo/beta": 0.008701864629983902,
|
|
"fcm_dpo/delta": -0.06911702454090118,
|
|
"fcm_dpo/margin": 53.51190948486328,
|
|
"fcm_dpo/q_t": 0.39388060569763184,
|
|
"grad_norm": 6.081846714019775,
|
|
"learning_rate": 1.9363341121154895e-07,
|
|
"logits/chosen": 1.58525812625885,
|
|
"logits/rejected": 1.3879069089889526,
|
|
"logps/chosen": -135.20721435546875,
|
|
"logps/ref_chosen": -72.77989959716797,
|
|
"logps/ref_rejected": -92.01815795898438,
|
|
"logps/rejected": -207.9573974609375,
|
|
"loss": 1.0482,
|
|
"margin_dpo/margin_mean": 53.51191329956055,
|
|
"margin_dpo/margin_std": 63.87721252441406,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 0.618291761148904,
|
|
"fcm_dpo/beta": 0.00872439332306385,
|
|
"fcm_dpo/delta": 0.05888619273900986,
|
|
"fcm_dpo/margin": 28.853759765625,
|
|
"fcm_dpo/q_t": 0.4449183940887451,
|
|
"grad_norm": 7.567356109619141,
|
|
"learning_rate": 1.9234603231438994e-07,
|
|
"logits/chosen": 1.5403639078140259,
|
|
"logits/rejected": 1.5558340549468994,
|
|
"logps/chosen": -158.68392944335938,
|
|
"logps/ref_chosen": -77.7901611328125,
|
|
"logps/ref_rejected": -79.2997055053711,
|
|
"logps/rejected": -189.0472412109375,
|
|
"loss": 1.2434,
|
|
"margin_dpo/margin_mean": 28.853759765625,
|
|
"margin_dpo/margin_std": 71.60784149169922,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 0.6198034769463341,
|
|
"fcm_dpo/beta": 0.00859205611050129,
|
|
"fcm_dpo/delta": -0.07421442866325378,
|
|
"fcm_dpo/margin": 54.710350036621094,
|
|
"fcm_dpo/q_t": 0.39280834794044495,
|
|
"grad_norm": 6.605342864990234,
|
|
"learning_rate": 1.9106026612264315e-07,
|
|
"logits/chosen": 1.5201159715652466,
|
|
"logits/rejected": 1.449505090713501,
|
|
"logps/chosen": -152.5746612548828,
|
|
"logps/ref_chosen": -80.35844421386719,
|
|
"logps/ref_rejected": -92.19056701660156,
|
|
"logps/rejected": -219.11712646484375,
|
|
"loss": 1.0505,
|
|
"margin_dpo/margin_mean": 54.710350036621094,
|
|
"margin_dpo/margin_std": 67.6512451171875,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.6213151927437641,
|
|
"fcm_dpo/beta": 0.008500929921865463,
|
|
"fcm_dpo/delta": -0.08138732612133026,
|
|
"fcm_dpo/margin": 56.17717361450195,
|
|
"fcm_dpo/q_t": 0.3915950059890747,
|
|
"grad_norm": 8.85523509979248,
|
|
"learning_rate": 1.8977614860195296e-07,
|
|
"logits/chosen": 1.5671194791793823,
|
|
"logits/rejected": 1.4427812099456787,
|
|
"logps/chosen": -139.56689453125,
|
|
"logps/ref_chosen": -70.72857666015625,
|
|
"logps/ref_rejected": -93.19204711914062,
|
|
"logps/rejected": -218.20755004882812,
|
|
"loss": 1.042,
|
|
"margin_dpo/margin_mean": 56.17717742919922,
|
|
"margin_dpo/margin_std": 68.0445556640625,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 0.6228269085411943,
|
|
"fcm_dpo/beta": 0.008425667881965637,
|
|
"fcm_dpo/delta": 0.007511887699365616,
|
|
"fcm_dpo/margin": 46.5667724609375,
|
|
"fcm_dpo/q_t": 0.41029736399650574,
|
|
"grad_norm": 9.423548698425293,
|
|
"learning_rate": 1.8849371567184662e-07,
|
|
"logits/chosen": 1.6571754217147827,
|
|
"logits/rejected": 1.5339932441711426,
|
|
"logps/chosen": -159.95870971679688,
|
|
"logps/ref_chosen": -72.87568664550781,
|
|
"logps/ref_rejected": -88.21068572998047,
|
|
"logps/rejected": -221.8604736328125,
|
|
"loss": 1.1057,
|
|
"margin_dpo/margin_mean": 46.5667724609375,
|
|
"margin_dpo/margin_std": 65.58912658691406,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 0.6243386243386243,
|
|
"fcm_dpo/beta": 0.008588114753365517,
|
|
"fcm_dpo/delta": 0.09323213249444962,
|
|
"fcm_dpo/margin": 36.05671691894531,
|
|
"fcm_dpo/q_t": 0.43061697483062744,
|
|
"grad_norm": 7.6309967041015625,
|
|
"learning_rate": 1.872130032047302e-07,
|
|
"logits/chosen": 1.1301251649856567,
|
|
"logits/rejected": 1.026894211769104,
|
|
"logps/chosen": -175.1704864501953,
|
|
"logps/ref_chosen": -84.70051574707031,
|
|
"logps/ref_rejected": -92.06742095947266,
|
|
"logps/rejected": -218.5941162109375,
|
|
"loss": 1.2021,
|
|
"margin_dpo/margin_mean": 36.05672073364258,
|
|
"margin_dpo/margin_std": 76.68885803222656,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 0.6258503401360545,
|
|
"fcm_dpo/beta": 0.008647723123431206,
|
|
"fcm_dpo/delta": 0.007389996200799942,
|
|
"fcm_dpo/margin": 45.430633544921875,
|
|
"fcm_dpo/q_t": 0.41087737679481506,
|
|
"grad_norm": 7.641894817352295,
|
|
"learning_rate": 1.8593404702488436e-07,
|
|
"logits/chosen": 1.8904352188110352,
|
|
"logits/rejected": 1.7254257202148438,
|
|
"logps/chosen": -158.95175170898438,
|
|
"logps/ref_chosen": -70.97660827636719,
|
|
"logps/ref_rejected": -92.90523529052734,
|
|
"logps/rejected": -226.31101989746094,
|
|
"loss": 1.1192,
|
|
"margin_dpo/margin_mean": 45.430633544921875,
|
|
"margin_dpo/margin_std": 70.77737426757812,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 0.6273620559334845,
|
|
"fcm_dpo/beta": 0.008703294210135937,
|
|
"fcm_dpo/delta": 0.05299728363752365,
|
|
"fcm_dpo/margin": 40.08605194091797,
|
|
"fcm_dpo/q_t": 0.42089951038360596,
|
|
"grad_norm": 9.653564453125,
|
|
"learning_rate": 1.846568829074628e-07,
|
|
"logits/chosen": 1.6395957469940186,
|
|
"logits/rejected": 1.5583677291870117,
|
|
"logps/chosen": -162.5501708984375,
|
|
"logps/ref_chosen": -71.7189712524414,
|
|
"logps/ref_rejected": -74.54219818115234,
|
|
"logps/rejected": -205.45945739746094,
|
|
"loss": 1.1706,
|
|
"margin_dpo/margin_mean": 40.086055755615234,
|
|
"margin_dpo/margin_std": 76.08244323730469,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.6288737717309146,
|
|
"fcm_dpo/beta": 0.008706950582563877,
|
|
"fcm_dpo/delta": -0.03111879713833332,
|
|
"fcm_dpo/margin": 32.41926956176758,
|
|
"fcm_dpo/q_t": 0.436288446187973,
|
|
"grad_norm": 7.236203670501709,
|
|
"learning_rate": 1.8338154657749128e-07,
|
|
"logits/chosen": 1.453350305557251,
|
|
"logits/rejected": 1.3089187145233154,
|
|
"logps/chosen": -167.54823303222656,
|
|
"logps/ref_chosen": -72.88249206542969,
|
|
"logps/ref_rejected": -85.30693054199219,
|
|
"logps/rejected": -212.39193725585938,
|
|
"loss": 1.2269,
|
|
"margin_dpo/margin_mean": 32.419273376464844,
|
|
"margin_dpo/margin_std": 73.30819702148438,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 0.6303854875283447,
|
|
"fcm_dpo/beta": 0.008633752353489399,
|
|
"fcm_dpo/delta": -0.05498684570193291,
|
|
"fcm_dpo/margin": 52.40943145751953,
|
|
"fcm_dpo/q_t": 0.3974682092666626,
|
|
"grad_norm": 7.425167083740234,
|
|
"learning_rate": 1.8210807370886849e-07,
|
|
"logits/chosen": 1.7691454887390137,
|
|
"logits/rejected": 1.5810437202453613,
|
|
"logps/chosen": -164.9538116455078,
|
|
"logps/ref_chosen": -72.49703216552734,
|
|
"logps/ref_rejected": -89.38966369628906,
|
|
"logps/rejected": -234.25587463378906,
|
|
"loss": 1.0932,
|
|
"margin_dpo/margin_mean": 52.40943145751953,
|
|
"margin_dpo/margin_std": 79.10160064697266,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 0.6318972033257747,
|
|
"fcm_dpo/beta": 0.008647737093269825,
|
|
"fcm_dpo/delta": 0.03645090013742447,
|
|
"fcm_dpo/margin": 26.66490936279297,
|
|
"fcm_dpo/q_t": 0.44797760248184204,
|
|
"grad_norm": 8.460302352905273,
|
|
"learning_rate": 1.8083649992336825e-07,
|
|
"logits/chosen": 1.6079257726669312,
|
|
"logits/rejected": 1.5933177471160889,
|
|
"logps/chosen": -192.292724609375,
|
|
"logps/ref_chosen": -89.70926666259766,
|
|
"logps/ref_rejected": -90.98756408691406,
|
|
"logps/rejected": -220.23593139648438,
|
|
"loss": 1.2555,
|
|
"margin_dpo/margin_mean": 26.664907455444336,
|
|
"margin_dpo/margin_std": 68.90656280517578,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 0.6334089191232048,
|
|
"fcm_dpo/beta": 0.008394574746489525,
|
|
"fcm_dpo/delta": -0.1813567876815796,
|
|
"fcm_dpo/margin": 67.94660186767578,
|
|
"fcm_dpo/q_t": 0.3701520562171936,
|
|
"grad_norm": 6.600857257843018,
|
|
"learning_rate": 1.7956686078964255e-07,
|
|
"logits/chosen": 1.4430088996887207,
|
|
"logits/rejected": 1.2721772193908691,
|
|
"logps/chosen": -146.96456909179688,
|
|
"logps/ref_chosen": -75.652099609375,
|
|
"logps/ref_rejected": -91.0013427734375,
|
|
"logps/rejected": -230.2604217529297,
|
|
"loss": 0.9805,
|
|
"margin_dpo/margin_mean": 67.94660949707031,
|
|
"margin_dpo/margin_std": 72.4099349975586,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 0.6349206349206349,
|
|
"fcm_dpo/beta": 0.00842717569321394,
|
|
"fcm_dpo/delta": 0.052176523953676224,
|
|
"fcm_dpo/margin": 30.08838653564453,
|
|
"fcm_dpo/q_t": 0.4437430500984192,
|
|
"grad_norm": 6.708927631378174,
|
|
"learning_rate": 1.782991918222275e-07,
|
|
"logits/chosen": 1.5903661251068115,
|
|
"logits/rejected": 1.4636292457580566,
|
|
"logps/chosen": -171.9695587158203,
|
|
"logps/ref_chosen": -72.58027648925781,
|
|
"logps/ref_rejected": -79.90303802490234,
|
|
"logps/rejected": -209.38070678710938,
|
|
"loss": 1.2683,
|
|
"margin_dpo/margin_mean": 30.08838653564453,
|
|
"margin_dpo/margin_std": 84.14425659179688,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.636432350718065,
|
|
"fcm_dpo/beta": 0.008517824113368988,
|
|
"fcm_dpo/delta": 0.08660048246383667,
|
|
"fcm_dpo/margin": 37.125335693359375,
|
|
"fcm_dpo/q_t": 0.42935702204704285,
|
|
"grad_norm": 7.109913349151611,
|
|
"learning_rate": 1.7703352848054887e-07,
|
|
"logits/chosen": 1.5397415161132812,
|
|
"logits/rejected": 1.3150533437728882,
|
|
"logps/chosen": -173.07357788085938,
|
|
"logps/ref_chosen": -78.71546936035156,
|
|
"logps/ref_rejected": -90.82321166992188,
|
|
"logps/rejected": -222.30667114257812,
|
|
"loss": 1.2212,
|
|
"margin_dpo/margin_mean": 37.12533950805664,
|
|
"margin_dpo/margin_std": 85.766357421875,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 0.6379440665154951,
|
|
"fcm_dpo/beta": 0.008670301176607609,
|
|
"fcm_dpo/delta": 0.06840848177671432,
|
|
"fcm_dpo/margin": 38.49900817871094,
|
|
"fcm_dpo/q_t": 0.42432913184165955,
|
|
"grad_norm": 9.032937049865723,
|
|
"learning_rate": 1.7576990616793137e-07,
|
|
"logits/chosen": 1.5093660354614258,
|
|
"logits/rejected": 1.4550291299819946,
|
|
"logps/chosen": -174.02197265625,
|
|
"logps/ref_chosen": -86.74519348144531,
|
|
"logps/ref_rejected": -94.02015686035156,
|
|
"logps/rejected": -219.79595947265625,
|
|
"loss": 1.1566,
|
|
"margin_dpo/margin_mean": 38.49900436401367,
|
|
"margin_dpo/margin_std": 65.20552825927734,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 0.6394557823129252,
|
|
"fcm_dpo/beta": 0.008607940748333931,
|
|
"fcm_dpo/delta": -0.05018071457743645,
|
|
"fcm_dpo/margin": 52.0123405456543,
|
|
"fcm_dpo/q_t": 0.39915522933006287,
|
|
"grad_norm": 6.428966522216797,
|
|
"learning_rate": 1.745083602306071e-07,
|
|
"logits/chosen": 1.4873400926589966,
|
|
"logits/rejected": 1.2859231233596802,
|
|
"logps/chosen": -158.43319702148438,
|
|
"logps/ref_chosen": -72.02232360839844,
|
|
"logps/ref_rejected": -93.26976776123047,
|
|
"logps/rejected": -231.69296264648438,
|
|
"loss": 1.0761,
|
|
"margin_dpo/margin_mean": 52.0123405456543,
|
|
"margin_dpo/margin_std": 71.97749328613281,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 0.6409674981103552,
|
|
"fcm_dpo/beta": 0.008534763008356094,
|
|
"fcm_dpo/delta": -0.04528824985027313,
|
|
"fcm_dpo/margin": 51.92509460449219,
|
|
"fcm_dpo/q_t": 0.3988720178604126,
|
|
"grad_norm": 10.321234703063965,
|
|
"learning_rate": 1.7324892595672804e-07,
|
|
"logits/chosen": 1.3067110776901245,
|
|
"logits/rejected": 1.2278244495391846,
|
|
"logps/chosen": -161.60940551757812,
|
|
"logps/ref_chosen": -68.22148132324219,
|
|
"logps/ref_rejected": -94.12411499023438,
|
|
"logps/rejected": -239.4371337890625,
|
|
"loss": 1.0754,
|
|
"margin_dpo/margin_mean": 51.92509460449219,
|
|
"margin_dpo/margin_std": 70.35984802246094,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 0.6424792139077853,
|
|
"fcm_dpo/beta": 0.008516119793057442,
|
|
"fcm_dpo/delta": -0.034915171563625336,
|
|
"fcm_dpo/margin": 50.874088287353516,
|
|
"fcm_dpo/q_t": 0.4017060101032257,
|
|
"grad_norm": 6.930230140686035,
|
|
"learning_rate": 1.7199163857537824e-07,
|
|
"logits/chosen": 1.5249474048614502,
|
|
"logits/rejected": 1.4606943130493164,
|
|
"logps/chosen": -156.6333770751953,
|
|
"logps/ref_chosen": -75.90104675292969,
|
|
"logps/ref_rejected": -86.08673095703125,
|
|
"logps/rejected": -217.69314575195312,
|
|
"loss": 1.0782,
|
|
"margin_dpo/margin_mean": 50.87408447265625,
|
|
"margin_dpo/margin_std": 67.64917755126953,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.6439909297052154,
|
|
"fcm_dpo/beta": 0.008540986105799675,
|
|
"fcm_dpo/delta": 0.08454061299562454,
|
|
"fcm_dpo/margin": 26.161170959472656,
|
|
"fcm_dpo/q_t": 0.45000118017196655,
|
|
"grad_norm": 8.317493438720703,
|
|
"learning_rate": 1.7073653325558828e-07,
|
|
"logits/chosen": 1.5708837509155273,
|
|
"logits/rejected": 1.5647852420806885,
|
|
"logps/chosen": -192.17919921875,
|
|
"logps/ref_chosen": -89.93118286132812,
|
|
"logps/ref_rejected": -91.04658508300781,
|
|
"logps/rejected": -219.45579528808594,
|
|
"loss": 1.2791,
|
|
"margin_dpo/margin_mean": 26.161174774169922,
|
|
"margin_dpo/margin_std": 78.16825866699219,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 0.6455026455026455,
|
|
"fcm_dpo/beta": 0.008664184249937534,
|
|
"fcm_dpo/delta": 0.034002143889665604,
|
|
"fcm_dpo/margin": 42.38199996948242,
|
|
"fcm_dpo/q_t": 0.41778403520584106,
|
|
"grad_norm": 7.2056684494018555,
|
|
"learning_rate": 1.6948364510535218e-07,
|
|
"logits/chosen": 1.706808090209961,
|
|
"logits/rejected": 1.5242950916290283,
|
|
"logps/chosen": -175.19021606445312,
|
|
"logps/ref_chosen": -77.83393859863281,
|
|
"logps/ref_rejected": -98.69864654541016,
|
|
"logps/rejected": -238.43692016601562,
|
|
"loss": 1.1425,
|
|
"margin_dpo/margin_mean": 42.38199996948242,
|
|
"margin_dpo/margin_std": 72.0445327758789,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 0.6470143613000756,
|
|
"fcm_dpo/beta": 0.00863520335406065,
|
|
"fcm_dpo/delta": -0.045657265931367874,
|
|
"fcm_dpo/margin": 51.373043060302734,
|
|
"fcm_dpo/q_t": 0.4012737274169922,
|
|
"grad_norm": 7.712218284606934,
|
|
"learning_rate": 1.6823300917064458e-07,
|
|
"logits/chosen": 1.3941190242767334,
|
|
"logits/rejected": 1.3461658954620361,
|
|
"logps/chosen": -178.5933837890625,
|
|
"logps/ref_chosen": -90.3450927734375,
|
|
"logps/ref_rejected": -100.24185180664062,
|
|
"logps/rejected": -239.86318969726562,
|
|
"loss": 1.0844,
|
|
"margin_dpo/margin_mean": 51.373046875,
|
|
"margin_dpo/margin_std": 73.79509735107422,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 0.6485260770975056,
|
|
"fcm_dpo/beta": 0.008655520156025887,
|
|
"fcm_dpo/delta": 0.015263576060533524,
|
|
"fcm_dpo/margin": 44.45375442504883,
|
|
"fcm_dpo/q_t": 0.41261976957321167,
|
|
"grad_norm": 7.072147846221924,
|
|
"learning_rate": 1.669846604344412e-07,
|
|
"logits/chosen": 1.2022795677185059,
|
|
"logits/rejected": 1.2440556287765503,
|
|
"logps/chosen": -170.97415161132812,
|
|
"logps/ref_chosen": -78.24811553955078,
|
|
"logps/ref_rejected": -75.24495697021484,
|
|
"logps/rejected": -212.42477416992188,
|
|
"loss": 1.1794,
|
|
"margin_dpo/margin_mean": 44.453758239746094,
|
|
"margin_dpo/margin_std": 88.38716125488281,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 0.6500377928949358,
|
|
"fcm_dpo/beta": 0.00848034955561161,
|
|
"fcm_dpo/delta": -0.09685202687978745,
|
|
"fcm_dpo/margin": 57.9986686706543,
|
|
"fcm_dpo/q_t": 0.3889318108558655,
|
|
"grad_norm": 6.905314922332764,
|
|
"learning_rate": 1.6573863381573954e-07,
|
|
"logits/chosen": 1.3700964450836182,
|
|
"logits/rejected": 1.4100453853607178,
|
|
"logps/chosen": -157.59194946289062,
|
|
"logps/ref_chosen": -76.08027648925781,
|
|
"logps/ref_rejected": -84.09554290771484,
|
|
"logps/rejected": -223.60589599609375,
|
|
"loss": 1.0623,
|
|
"margin_dpo/margin_mean": 57.99867248535156,
|
|
"margin_dpo/margin_std": 80.27232360839844,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.6515495086923658,
|
|
"fcm_dpo/beta": 0.008486424572765827,
|
|
"fcm_dpo/delta": 0.014092888683080673,
|
|
"fcm_dpo/margin": 45.52803039550781,
|
|
"fcm_dpo/q_t": 0.4137864112854004,
|
|
"grad_norm": 6.048389911651611,
|
|
"learning_rate": 1.6449496416858282e-07,
|
|
"logits/chosen": 1.37843656539917,
|
|
"logits/rejected": 1.2628583908081055,
|
|
"logps/chosen": -143.2403106689453,
|
|
"logps/ref_chosen": -66.88581085205078,
|
|
"logps/ref_rejected": -89.56040954589844,
|
|
"logps/rejected": -211.44293212890625,
|
|
"loss": 1.1442,
|
|
"margin_dpo/margin_mean": 45.52803039550781,
|
|
"margin_dpo/margin_std": 80.02131652832031,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 0.6530612244897959,
|
|
"fcm_dpo/beta": 0.008482584729790688,
|
|
"fcm_dpo/delta": 0.0026997558306902647,
|
|
"fcm_dpo/margin": 46.84972381591797,
|
|
"fcm_dpo/q_t": 0.412258505821228,
|
|
"grad_norm": 6.3566412925720215,
|
|
"learning_rate": 1.632536862810844e-07,
|
|
"logits/chosen": 1.5703511238098145,
|
|
"logits/rejected": 1.4383772611618042,
|
|
"logps/chosen": -160.373291015625,
|
|
"logps/ref_chosen": -79.65066528320312,
|
|
"logps/ref_rejected": -103.92634582519531,
|
|
"logps/rejected": -231.4987030029297,
|
|
"loss": 1.155,
|
|
"margin_dpo/margin_mean": 46.84972381591797,
|
|
"margin_dpo/margin_std": 87.462646484375,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 0.654572940287226,
|
|
"fcm_dpo/beta": 0.008361434563994408,
|
|
"fcm_dpo/delta": -0.14136160910129547,
|
|
"fcm_dpo/margin": 63.84764862060547,
|
|
"fcm_dpo/q_t": 0.38163742423057556,
|
|
"grad_norm": 6.963474750518799,
|
|
"learning_rate": 1.6201483487445515e-07,
|
|
"logits/chosen": 1.7979011535644531,
|
|
"logits/rejected": 1.780426263809204,
|
|
"logps/chosen": -154.82102966308594,
|
|
"logps/ref_chosen": -77.30774688720703,
|
|
"logps/ref_rejected": -81.65180206298828,
|
|
"logps/rejected": -223.0127410888672,
|
|
"loss": 1.057,
|
|
"margin_dpo/margin_mean": 63.84764862060547,
|
|
"margin_dpo/margin_std": 90.18070220947266,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 0.656084656084656,
|
|
"fcm_dpo/beta": 0.008094357326626778,
|
|
"fcm_dpo/delta": -0.08986343443393707,
|
|
"fcm_dpo/margin": 59.861175537109375,
|
|
"fcm_dpo/q_t": 0.3910486698150635,
|
|
"grad_norm": 6.0232343673706055,
|
|
"learning_rate": 1.6077844460203204e-07,
|
|
"logits/chosen": 1.4988017082214355,
|
|
"logits/rejected": 1.337533950805664,
|
|
"logps/chosen": -124.65926361083984,
|
|
"logps/ref_chosen": -63.31850051879883,
|
|
"logps/ref_rejected": -89.15093994140625,
|
|
"logps/rejected": -210.35289001464844,
|
|
"loss": 1.0736,
|
|
"margin_dpo/margin_mean": 59.861175537109375,
|
|
"margin_dpo/margin_std": 86.07001495361328,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 0.6575963718820862,
|
|
"fcm_dpo/beta": 0.008202778175473213,
|
|
"fcm_dpo/delta": 0.058817461133003235,
|
|
"fcm_dpo/margin": 41.77471923828125,
|
|
"fcm_dpo/q_t": 0.4230271577835083,
|
|
"grad_norm": 7.037782192230225,
|
|
"learning_rate": 1.5954455004830878e-07,
|
|
"logits/chosen": 1.81736421585083,
|
|
"logits/rejected": 1.734781265258789,
|
|
"logps/chosen": -157.01971435546875,
|
|
"logps/ref_chosen": -71.1719741821289,
|
|
"logps/ref_rejected": -86.42095184326172,
|
|
"logps/rejected": -214.0434112548828,
|
|
"loss": 1.176,
|
|
"margin_dpo/margin_mean": 41.77471923828125,
|
|
"margin_dpo/margin_std": 80.3061294555664,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.6591080876795162,
|
|
"fcm_dpo/beta": 0.008254708722233772,
|
|
"fcm_dpo/delta": 0.020179305225610733,
|
|
"fcm_dpo/margin": 46.05265808105469,
|
|
"fcm_dpo/q_t": 0.41473814845085144,
|
|
"grad_norm": 7.05510139465332,
|
|
"learning_rate": 1.5831318572796847e-07,
|
|
"logits/chosen": 1.3741307258605957,
|
|
"logits/rejected": 1.2321834564208984,
|
|
"logps/chosen": -149.34500122070312,
|
|
"logps/ref_chosen": -74.45087432861328,
|
|
"logps/ref_rejected": -86.01708984375,
|
|
"logps/rejected": -206.9638671875,
|
|
"loss": 1.1532,
|
|
"margin_dpo/margin_mean": 46.05265808105469,
|
|
"margin_dpo/margin_std": 83.09732055664062,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 0.6606198034769464,
|
|
"fcm_dpo/beta": 0.008225423283874989,
|
|
"fcm_dpo/delta": -0.0016473679570481181,
|
|
"fcm_dpo/margin": 33.15676498413086,
|
|
"fcm_dpo/q_t": 0.43936091661453247,
|
|
"grad_norm": 8.62120246887207,
|
|
"learning_rate": 1.5708438608491815e-07,
|
|
"logits/chosen": 1.6454570293426514,
|
|
"logits/rejected": 1.309108018875122,
|
|
"logps/chosen": -168.96128845214844,
|
|
"logps/ref_chosen": -72.38907623291016,
|
|
"logps/ref_rejected": -111.03279876708984,
|
|
"logps/rejected": -240.76177978515625,
|
|
"loss": 1.2447,
|
|
"margin_dpo/margin_mean": 33.15676498413086,
|
|
"margin_dpo/margin_std": 82.86212158203125,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 0.6621315192743764,
|
|
"fcm_dpo/beta": 0.00817845668643713,
|
|
"fcm_dpo/delta": -0.08092987537384033,
|
|
"fcm_dpo/margin": 58.298133850097656,
|
|
"fcm_dpo/q_t": 0.39217159152030945,
|
|
"grad_norm": 7.8519816398620605,
|
|
"learning_rate": 1.558581854913253e-07,
|
|
"logits/chosen": 1.588195562362671,
|
|
"logits/rejected": 1.4413819313049316,
|
|
"logps/chosen": -132.78721618652344,
|
|
"logps/ref_chosen": -57.27682876586914,
|
|
"logps/ref_rejected": -83.07940673828125,
|
|
"logps/rejected": -216.887939453125,
|
|
"loss": 1.07,
|
|
"margin_dpo/margin_mean": 58.298133850097656,
|
|
"margin_dpo/margin_std": 79.39116668701172,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 0.6636432350718064,
|
|
"fcm_dpo/beta": 0.00810486450791359,
|
|
"fcm_dpo/delta": -0.027155064046382904,
|
|
"fcm_dpo/margin": 52.506919860839844,
|
|
"fcm_dpo/q_t": 0.4055066704750061,
|
|
"grad_norm": 5.769200801849365,
|
|
"learning_rate": 1.5463461824665658e-07,
|
|
"logits/chosen": 1.5664254426956177,
|
|
"logits/rejected": 1.445422887802124,
|
|
"logps/chosen": -185.7174530029297,
|
|
"logps/ref_chosen": -98.35890197753906,
|
|
"logps/ref_rejected": -112.69817352294922,
|
|
"logps/rejected": -252.5636444091797,
|
|
"loss": 1.0962,
|
|
"margin_dpo/margin_mean": 52.506927490234375,
|
|
"margin_dpo/margin_std": 76.33085632324219,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 0.6651549508692366,
|
|
"fcm_dpo/beta": 0.007925990968942642,
|
|
"fcm_dpo/delta": -0.11724238097667694,
|
|
"fcm_dpo/margin": 64.5205307006836,
|
|
"fcm_dpo/q_t": 0.38388943672180176,
|
|
"grad_norm": 9.103875160217285,
|
|
"learning_rate": 1.534137185767178e-07,
|
|
"logits/chosen": 1.3807024955749512,
|
|
"logits/rejected": 1.1057807207107544,
|
|
"logps/chosen": -127.14051818847656,
|
|
"logps/ref_chosen": -61.662452697753906,
|
|
"logps/ref_rejected": -86.81646728515625,
|
|
"logps/rejected": -216.8150634765625,
|
|
"loss": 1.0246,
|
|
"margin_dpo/margin_mean": 64.5205307006836,
|
|
"margin_dpo/margin_std": 76.79633331298828,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.6666666666666666,
|
|
"fcm_dpo/beta": 0.007821722887456417,
|
|
"fcm_dpo/delta": -0.01750565692782402,
|
|
"fcm_dpo/margin": 53.261146545410156,
|
|
"fcm_dpo/q_t": 0.40477800369262695,
|
|
"grad_norm": 11.901995658874512,
|
|
"learning_rate": 1.521955206326976e-07,
|
|
"logits/chosen": 1.5941314697265625,
|
|
"logits/rejected": 1.301910400390625,
|
|
"logps/chosen": -140.73521423339844,
|
|
"logps/ref_chosen": -74.33235168457031,
|
|
"logps/ref_rejected": -99.654541015625,
|
|
"logps/rejected": -219.3185577392578,
|
|
"loss": 1.0951,
|
|
"margin_dpo/margin_mean": 53.26115417480469,
|
|
"margin_dpo/margin_std": 74.70690155029297,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 0.6681783824640968,
|
|
"fcm_dpo/beta": 0.007868031039834023,
|
|
"fcm_dpo/delta": 0.006100071594119072,
|
|
"fcm_dpo/margin": 50.065460205078125,
|
|
"fcm_dpo/q_t": 0.41083312034606934,
|
|
"grad_norm": 9.3712797164917,
|
|
"learning_rate": 1.5098005849021078e-07,
|
|
"logits/chosen": 1.5173122882843018,
|
|
"logits/rejected": 1.4266064167022705,
|
|
"logps/chosen": -173.49835205078125,
|
|
"logps/ref_chosen": -82.42591857910156,
|
|
"logps/ref_rejected": -106.71090698242188,
|
|
"logps/rejected": -247.8488006591797,
|
|
"loss": 1.1319,
|
|
"margin_dpo/margin_mean": 50.065460205078125,
|
|
"margin_dpo/margin_std": 83.03895568847656,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 0.6696900982615268,
|
|
"fcm_dpo/beta": 0.007733378559350967,
|
|
"fcm_dpo/delta": -0.07331807166337967,
|
|
"fcm_dpo/margin": 60.69889450073242,
|
|
"fcm_dpo/q_t": 0.3952465057373047,
|
|
"grad_norm": 9.156957626342773,
|
|
"learning_rate": 1.4976736614834662e-07,
|
|
"logits/chosen": 1.7997548580169678,
|
|
"logits/rejected": 1.6043787002563477,
|
|
"logps/chosen": -152.74725341796875,
|
|
"logps/ref_chosen": -72.87019348144531,
|
|
"logps/ref_rejected": -94.48143005371094,
|
|
"logps/rejected": -235.05740356445312,
|
|
"loss": 1.0812,
|
|
"margin_dpo/margin_mean": 60.69889450073242,
|
|
"margin_dpo/margin_std": 89.32925415039062,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 0.671201814058957,
|
|
"fcm_dpo/beta": 0.00782048236578703,
|
|
"fcm_dpo/delta": 0.0576334185898304,
|
|
"fcm_dpo/margin": 21.587419509887695,
|
|
"fcm_dpo/q_t": 0.46234452724456787,
|
|
"grad_norm": 10.07677936553955,
|
|
"learning_rate": 1.4855747752871654e-07,
|
|
"logits/chosen": 1.544628620147705,
|
|
"logits/rejected": 1.298480749130249,
|
|
"logps/chosen": -171.25473022460938,
|
|
"logps/ref_chosen": -74.650390625,
|
|
"logps/ref_rejected": -106.89204406738281,
|
|
"logps/rejected": -225.08380126953125,
|
|
"loss": 1.3148,
|
|
"margin_dpo/margin_mean": 21.587419509887695,
|
|
"margin_dpo/margin_std": 77.45458221435547,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 0.672713529856387,
|
|
"fcm_dpo/beta": 0.007657904177904129,
|
|
"fcm_dpo/delta": -0.1426275670528412,
|
|
"fcm_dpo/margin": 69.86483764648438,
|
|
"fcm_dpo/q_t": 0.37939491868019104,
|
|
"grad_norm": 8.217625617980957,
|
|
"learning_rate": 1.473504264745062e-07,
|
|
"logits/chosen": 1.5077362060546875,
|
|
"logits/rejected": 1.4950257539749146,
|
|
"logps/chosen": -158.41409301757812,
|
|
"logps/ref_chosen": -76.26957702636719,
|
|
"logps/ref_rejected": -89.84994506835938,
|
|
"logps/rejected": -241.8592987060547,
|
|
"loss": 1.0212,
|
|
"margin_dpo/margin_mean": 69.86483001708984,
|
|
"margin_dpo/margin_std": 85.99278259277344,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.674225245653817,
|
|
"fcm_dpo/beta": 0.007591135799884796,
|
|
"fcm_dpo/delta": -0.09901002794504166,
|
|
"fcm_dpo/margin": 64.8037338256836,
|
|
"fcm_dpo/q_t": 0.38721728324890137,
|
|
"grad_norm": 5.767418384552002,
|
|
"learning_rate": 1.461462467495284e-07,
|
|
"logits/chosen": 1.5965964794158936,
|
|
"logits/rejected": 1.43882417678833,
|
|
"logps/chosen": -137.1719207763672,
|
|
"logps/ref_chosen": -62.74647903442383,
|
|
"logps/ref_rejected": -86.395751953125,
|
|
"logps/rejected": -225.62493896484375,
|
|
"loss": 1.0341,
|
|
"margin_dpo/margin_mean": 64.80374145507812,
|
|
"margin_dpo/margin_std": 68.17994689941406,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 0.6757369614512472,
|
|
"fcm_dpo/beta": 0.007440866902470589,
|
|
"fcm_dpo/delta": -0.015056482516229153,
|
|
"fcm_dpo/margin": 55.69664764404297,
|
|
"fcm_dpo/q_t": 0.4054851233959198,
|
|
"grad_norm": 9.256994247436523,
|
|
"learning_rate": 1.4494497203727843e-07,
|
|
"logits/chosen": 1.2242474555969238,
|
|
"logits/rejected": 0.9210813045501709,
|
|
"logps/chosen": -139.55810546875,
|
|
"logps/ref_chosen": -71.06666564941406,
|
|
"logps/ref_rejected": -103.57111358642578,
|
|
"logps/rejected": -227.7592010498047,
|
|
"loss": 1.1148,
|
|
"margin_dpo/margin_mean": 55.69664764404297,
|
|
"margin_dpo/margin_std": 87.89811706542969,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 0.6772486772486772,
|
|
"fcm_dpo/beta": 0.007497244980186224,
|
|
"fcm_dpo/delta": 0.03677457943558693,
|
|
"fcm_dpo/margin": 48.574466705322266,
|
|
"fcm_dpo/q_t": 0.417564332485199,
|
|
"grad_norm": 7.66340446472168,
|
|
"learning_rate": 1.4374663593999256e-07,
|
|
"logits/chosen": 1.652498483657837,
|
|
"logits/rejected": 1.538625955581665,
|
|
"logps/chosen": -159.7042236328125,
|
|
"logps/ref_chosen": -73.400146484375,
|
|
"logps/ref_rejected": -96.34330749511719,
|
|
"logps/rejected": -231.22186279296875,
|
|
"loss": 1.1446,
|
|
"margin_dpo/margin_mean": 48.574466705322266,
|
|
"margin_dpo/margin_std": 81.99598693847656,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 0.6787603930461074,
|
|
"fcm_dpo/beta": 0.007546110078692436,
|
|
"fcm_dpo/delta": 0.04134930670261383,
|
|
"fcm_dpo/margin": 28.31925392150879,
|
|
"fcm_dpo/q_t": 0.45164692401885986,
|
|
"grad_norm": 15.793785095214844,
|
|
"learning_rate": 1.4255127197770707e-07,
|
|
"logits/chosen": 0.9623701572418213,
|
|
"logits/rejected": 0.9727632403373718,
|
|
"logps/chosen": -188.20831298828125,
|
|
"logps/ref_chosen": -93.66099548339844,
|
|
"logps/ref_rejected": -102.53019714355469,
|
|
"logps/rejected": -225.39675903320312,
|
|
"loss": 1.2846,
|
|
"margin_dpo/margin_mean": 28.319255828857422,
|
|
"margin_dpo/margin_std": 84.99736022949219,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 0.6802721088435374,
|
|
"fcm_dpo/beta": 0.007580921053886414,
|
|
"fcm_dpo/delta": 0.04489194229245186,
|
|
"fcm_dpo/margin": 47.054134368896484,
|
|
"fcm_dpo/q_t": 0.41971227526664734,
|
|
"grad_norm": 6.454341888427734,
|
|
"learning_rate": 1.4135891358732205e-07,
|
|
"logits/chosen": 1.755326509475708,
|
|
"logits/rejected": 1.4095392227172852,
|
|
"logps/chosen": -133.84730529785156,
|
|
"logps/ref_chosen": -62.52460479736328,
|
|
"logps/ref_rejected": -94.04986572265625,
|
|
"logps/rejected": -212.42669677734375,
|
|
"loss": 1.1614,
|
|
"margin_dpo/margin_mean": 47.054134368896484,
|
|
"margin_dpo/margin_std": 86.08123779296875,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.6817838246409675,
|
|
"fcm_dpo/beta": 0.007728881202638149,
|
|
"fcm_dpo/delta": 0.0906517505645752,
|
|
"fcm_dpo/margin": 40.36960983276367,
|
|
"fcm_dpo/q_t": 0.43043777346611023,
|
|
"grad_norm": 7.109012126922607,
|
|
"learning_rate": 1.4016959412166437e-07,
|
|
"logits/chosen": 1.3793981075286865,
|
|
"logits/rejected": 1.2791502475738525,
|
|
"logps/chosen": -155.21142578125,
|
|
"logps/ref_chosen": -79.14009094238281,
|
|
"logps/ref_rejected": -93.23919677734375,
|
|
"logps/rejected": -209.68016052246094,
|
|
"loss": 1.1896,
|
|
"margin_dpo/margin_mean": 40.36961364746094,
|
|
"margin_dpo/margin_std": 80.68801879882812,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 0.6832955404383976,
|
|
"fcm_dpo/beta": 0.00771725969389081,
|
|
"fcm_dpo/delta": -0.02904406189918518,
|
|
"fcm_dpo/margin": 55.432777404785156,
|
|
"fcm_dpo/q_t": 0.40317466855049133,
|
|
"grad_norm": 7.301806926727295,
|
|
"learning_rate": 1.3898334684855645e-07,
|
|
"logits/chosen": 1.551164150238037,
|
|
"logits/rejected": 1.3258707523345947,
|
|
"logps/chosen": -147.094482421875,
|
|
"logps/ref_chosen": -70.38827514648438,
|
|
"logps/ref_rejected": -95.47691345214844,
|
|
"logps/rejected": -227.61587524414062,
|
|
"loss": 1.1117,
|
|
"margin_dpo/margin_mean": 55.432777404785156,
|
|
"margin_dpo/margin_std": 87.90678405761719,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 0.6848072562358276,
|
|
"fcm_dpo/beta": 0.0077826473861932755,
|
|
"fcm_dpo/delta": 0.03363284468650818,
|
|
"fcm_dpo/margin": 47.162410736083984,
|
|
"fcm_dpo/q_t": 0.4179924726486206,
|
|
"grad_norm": 7.004802703857422,
|
|
"learning_rate": 1.3780020494988445e-07,
|
|
"logits/chosen": 1.5915346145629883,
|
|
"logits/rejected": 1.474679946899414,
|
|
"logps/chosen": -158.24111938476562,
|
|
"logps/ref_chosen": -79.9207763671875,
|
|
"logps/ref_rejected": -90.20779418945312,
|
|
"logps/rejected": -215.6905517578125,
|
|
"loss": 1.1627,
|
|
"margin_dpo/margin_mean": 47.162410736083984,
|
|
"margin_dpo/margin_std": 87.50480651855469,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 0.6863189720332578,
|
|
"fcm_dpo/beta": 0.007651199586689472,
|
|
"fcm_dpo/delta": -0.10082674026489258,
|
|
"fcm_dpo/margin": 64.81204223632812,
|
|
"fcm_dpo/q_t": 0.38581979274749756,
|
|
"grad_norm": 7.975934028625488,
|
|
"learning_rate": 1.366202015206706e-07,
|
|
"logits/chosen": 1.653881311416626,
|
|
"logits/rejected": 1.5468456745147705,
|
|
"logps/chosen": -131.710693359375,
|
|
"logps/ref_chosen": -69.71887969970703,
|
|
"logps/ref_rejected": -82.86952209472656,
|
|
"logps/rejected": -209.67337036132812,
|
|
"loss": 1.0413,
|
|
"margin_dpo/margin_mean": 64.81204223632812,
|
|
"margin_dpo/margin_std": 79.79120635986328,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 0.6878306878306878,
|
|
"fcm_dpo/beta": 0.007537747733294964,
|
|
"fcm_dpo/delta": -0.06928986310958862,
|
|
"fcm_dpo/margin": 61.83723449707031,
|
|
"fcm_dpo/q_t": 0.393891841173172,
|
|
"grad_norm": 5.723858833312988,
|
|
"learning_rate": 1.354433695681474e-07,
|
|
"logits/chosen": 1.210571527481079,
|
|
"logits/rejected": 1.1374115943908691,
|
|
"logps/chosen": -170.25729370117188,
|
|
"logps/ref_chosen": -89.51481628417969,
|
|
"logps/ref_rejected": -97.93235778808594,
|
|
"logps/rejected": -240.51206970214844,
|
|
"loss": 1.0549,
|
|
"margin_dpo/margin_mean": 61.83723449707031,
|
|
"margin_dpo/margin_std": 78.60615539550781,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.6893424036281179,
|
|
"fcm_dpo/beta": 0.0074795568361878395,
|
|
"fcm_dpo/delta": 0.006430737674236298,
|
|
"fcm_dpo/margin": 52.61790466308594,
|
|
"fcm_dpo/q_t": 0.4104902744293213,
|
|
"grad_norm": 9.476668357849121,
|
|
"learning_rate": 1.3426974201083439e-07,
|
|
"logits/chosen": 1.48141610622406,
|
|
"logits/rejected": 1.3234397172927856,
|
|
"logps/chosen": -151.86122131347656,
|
|
"logps/ref_chosen": -74.60527038574219,
|
|
"logps/ref_rejected": -97.98377227783203,
|
|
"logps/rejected": -227.85763549804688,
|
|
"loss": 1.1121,
|
|
"margin_dpo/margin_mean": 52.61790466308594,
|
|
"margin_dpo/margin_std": 77.86461639404297,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 0.690854119425548,
|
|
"fcm_dpo/beta": 0.007550334092229605,
|
|
"fcm_dpo/delta": 0.06581398844718933,
|
|
"fcm_dpo/margin": 44.53820037841797,
|
|
"fcm_dpo/q_t": 0.4231857359409332,
|
|
"grad_norm": 7.015773773193359,
|
|
"learning_rate": 1.3309935167761717e-07,
|
|
"logits/chosen": 1.758169174194336,
|
|
"logits/rejected": 1.5407148599624634,
|
|
"logps/chosen": -152.5609893798828,
|
|
"logps/ref_chosen": -63.927032470703125,
|
|
"logps/ref_rejected": -83.15243530273438,
|
|
"logps/rejected": -216.3245849609375,
|
|
"loss": 1.151,
|
|
"margin_dpo/margin_mean": 44.53820037841797,
|
|
"margin_dpo/margin_std": 72.46507263183594,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 0.6923658352229781,
|
|
"fcm_dpo/beta": 0.0076367976143956184,
|
|
"fcm_dpo/delta": -0.017959222197532654,
|
|
"fcm_dpo/margin": 54.54873275756836,
|
|
"fcm_dpo/q_t": 0.4054056406021118,
|
|
"grad_norm": 8.418825149536133,
|
|
"learning_rate": 1.3193223130682936e-07,
|
|
"logits/chosen": 1.6931997537612915,
|
|
"logits/rejected": 1.3649234771728516,
|
|
"logps/chosen": -142.6044464111328,
|
|
"logps/ref_chosen": -67.68869018554688,
|
|
"logps/ref_rejected": -104.40899658203125,
|
|
"logps/rejected": -233.8734893798828,
|
|
"loss": 1.1064,
|
|
"margin_dpo/margin_mean": 54.548736572265625,
|
|
"margin_dpo/margin_std": 81.37754821777344,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 0.6938775510204082,
|
|
"fcm_dpo/beta": 0.007586327847093344,
|
|
"fcm_dpo/delta": -0.08097509294748306,
|
|
"fcm_dpo/margin": 62.6253547668457,
|
|
"fcm_dpo/q_t": 0.3917158246040344,
|
|
"grad_norm": 6.951050758361816,
|
|
"learning_rate": 1.3076841354533658e-07,
|
|
"logits/chosen": 1.7440357208251953,
|
|
"logits/rejected": 1.6339316368103027,
|
|
"logps/chosen": -152.58859252929688,
|
|
"logps/ref_chosen": -83.82363891601562,
|
|
"logps/ref_rejected": -103.75938415527344,
|
|
"logps/rejected": -235.14968872070312,
|
|
"loss": 1.0538,
|
|
"margin_dpo/margin_mean": 62.6253547668457,
|
|
"margin_dpo/margin_std": 73.25015258789062,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 0.6953892668178382,
|
|
"fcm_dpo/beta": 0.007416483946144581,
|
|
"fcm_dpo/delta": -0.0269007571041584,
|
|
"fcm_dpo/margin": 57.389373779296875,
|
|
"fcm_dpo/q_t": 0.4039474427700043,
|
|
"grad_norm": 7.818911075592041,
|
|
"learning_rate": 1.2960793094762345e-07,
|
|
"logits/chosen": 1.9773120880126953,
|
|
"logits/rejected": 1.5847508907318115,
|
|
"logps/chosen": -170.81787109375,
|
|
"logps/ref_chosen": -79.4836654663086,
|
|
"logps/ref_rejected": -112.31745910644531,
|
|
"logps/rejected": -261.041015625,
|
|
"loss": 1.0852,
|
|
"margin_dpo/margin_mean": 57.389373779296875,
|
|
"margin_dpo/margin_std": 78.99481964111328,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.6969009826152683,
|
|
"fcm_dpo/beta": 0.007295292802155018,
|
|
"fcm_dpo/delta": -0.07289466261863708,
|
|
"fcm_dpo/margin": 64.19013977050781,
|
|
"fcm_dpo/q_t": 0.39158502221107483,
|
|
"grad_norm": 7.286057949066162,
|
|
"learning_rate": 1.2845081597488286e-07,
|
|
"logits/chosen": 1.7993457317352295,
|
|
"logits/rejected": 1.565769910812378,
|
|
"logps/chosen": -129.4446563720703,
|
|
"logps/ref_chosen": -64.28482055664062,
|
|
"logps/ref_rejected": -93.73818969726562,
|
|
"logps/rejected": -223.08816528320312,
|
|
"loss": 1.0383,
|
|
"margin_dpo/margin_mean": 64.19013977050781,
|
|
"margin_dpo/margin_std": 71.33786010742188,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 0.6984126984126984,
|
|
"fcm_dpo/beta": 0.0071984813548624516,
|
|
"fcm_dpo/delta": -0.08874664455652237,
|
|
"fcm_dpo/margin": 67.24839782714844,
|
|
"fcm_dpo/q_t": 0.3891562223434448,
|
|
"grad_norm": 9.859999656677246,
|
|
"learning_rate": 1.27297100994108e-07,
|
|
"logits/chosen": 1.5112301111221313,
|
|
"logits/rejected": 1.3689056634902954,
|
|
"logps/chosen": -158.24203491210938,
|
|
"logps/ref_chosen": -77.15335083007812,
|
|
"logps/ref_rejected": -91.12923431396484,
|
|
"logps/rejected": -239.46630859375,
|
|
"loss": 1.0579,
|
|
"margin_dpo/margin_mean": 67.24839782714844,
|
|
"margin_dpo/margin_std": 89.81712341308594,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 0.6999244142101285,
|
|
"fcm_dpo/beta": 0.007196832448244095,
|
|
"fcm_dpo/delta": 0.008259914815425873,
|
|
"fcm_dpo/margin": 40.18242645263672,
|
|
"fcm_dpo/q_t": 0.43439820408821106,
|
|
"grad_norm": 11.131797790527344,
|
|
"learning_rate": 1.2614681827718695e-07,
|
|
"logits/chosen": 1.4589455127716064,
|
|
"logits/rejected": 1.4636832475662231,
|
|
"logps/chosen": -173.68218994140625,
|
|
"logps/ref_chosen": -87.58760070800781,
|
|
"logps/ref_rejected": -87.97022247314453,
|
|
"logps/rejected": -214.24722290039062,
|
|
"loss": 1.198,
|
|
"margin_dpo/margin_mean": 40.18242645263672,
|
|
"margin_dpo/margin_std": 79.24378967285156,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 0.7014361300075586,
|
|
"fcm_dpo/beta": 0.007220131810754538,
|
|
"fcm_dpo/delta": -0.008086636662483215,
|
|
"fcm_dpo/margin": 56.42665100097656,
|
|
"fcm_dpo/q_t": 0.40838852524757385,
|
|
"grad_norm": 8.530649185180664,
|
|
"learning_rate": 1.2500000000000005e-07,
|
|
"logits/chosen": 1.1444811820983887,
|
|
"logits/rejected": 1.046148419380188,
|
|
"logps/chosen": -166.62535095214844,
|
|
"logps/ref_chosen": -75.83175659179688,
|
|
"logps/ref_rejected": -84.4811019897461,
|
|
"logps/rejected": -231.70135498046875,
|
|
"loss": 1.1332,
|
|
"margin_dpo/margin_mean": 56.42665100097656,
|
|
"margin_dpo/margin_std": 94.67403411865234,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 0.7029478458049887,
|
|
"fcm_dpo/beta": 0.007176822982728481,
|
|
"fcm_dpo/delta": 0.00978757068514824,
|
|
"fcm_dpo/margin": 54.408992767333984,
|
|
"fcm_dpo/q_t": 0.411876916885376,
|
|
"grad_norm": 13.895620346069336,
|
|
"learning_rate": 1.238566782415197e-07,
|
|
"logits/chosen": 1.7335069179534912,
|
|
"logits/rejected": 1.5657398700714111,
|
|
"logps/chosen": -169.32749938964844,
|
|
"logps/ref_chosen": -77.057861328125,
|
|
"logps/ref_rejected": -102.75727844238281,
|
|
"logps/rejected": -249.4359130859375,
|
|
"loss": 1.1214,
|
|
"margin_dpo/margin_mean": 54.40899658203125,
|
|
"margin_dpo/margin_std": 85.08840942382812,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.7044595616024187,
|
|
"fcm_dpo/beta": 0.0072374227456748486,
|
|
"fcm_dpo/delta": 0.026420170441269875,
|
|
"fcm_dpo/margin": 36.631690979003906,
|
|
"fcm_dpo/q_t": 0.43956026434898376,
|
|
"grad_norm": 11.89194393157959,
|
|
"learning_rate": 1.2271688498291334e-07,
|
|
"logits/chosen": 1.5139234066009521,
|
|
"logits/rejected": 1.562455415725708,
|
|
"logps/chosen": -190.8651885986328,
|
|
"logps/ref_chosen": -91.7751693725586,
|
|
"logps/ref_rejected": -90.2679443359375,
|
|
"logps/rejected": -225.98965454101562,
|
|
"loss": 1.2149,
|
|
"margin_dpo/margin_mean": 36.63169479370117,
|
|
"margin_dpo/margin_std": 77.32295989990234,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 0.7059712773998488,
|
|
"fcm_dpo/beta": 0.007246088702231646,
|
|
"fcm_dpo/delta": -0.0002483353018760681,
|
|
"fcm_dpo/margin": 55.23102569580078,
|
|
"fcm_dpo/q_t": 0.4089811146259308,
|
|
"grad_norm": 7.205003261566162,
|
|
"learning_rate": 1.2158065210664848e-07,
|
|
"logits/chosen": 1.6517345905303955,
|
|
"logits/rejected": 1.2684385776519775,
|
|
"logps/chosen": -141.467041015625,
|
|
"logps/ref_chosen": -64.77557373046875,
|
|
"logps/ref_rejected": -102.58863830566406,
|
|
"logps/rejected": -234.51113891601562,
|
|
"loss": 1.0982,
|
|
"margin_dpo/margin_mean": 55.23102569580078,
|
|
"margin_dpo/margin_std": 77.0743408203125,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 0.7074829931972789,
|
|
"fcm_dpo/beta": 0.007257150486111641,
|
|
"fcm_dpo/delta": -0.006373733282089233,
|
|
"fcm_dpo/margin": 55.92658233642578,
|
|
"fcm_dpo/q_t": 0.4085814356803894,
|
|
"grad_norm": 9.413773536682129,
|
|
"learning_rate": 1.204480113956011e-07,
|
|
"logits/chosen": 1.6140985488891602,
|
|
"logits/rejected": 1.6181747913360596,
|
|
"logps/chosen": -170.14039611816406,
|
|
"logps/ref_chosen": -82.22445678710938,
|
|
"logps/ref_rejected": -92.99041748046875,
|
|
"logps/rejected": -236.83291625976562,
|
|
"loss": 1.1214,
|
|
"margin_dpo/margin_mean": 55.92658233642578,
|
|
"margin_dpo/margin_std": 89.77207946777344,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 0.708994708994709,
|
|
"fcm_dpo/beta": 0.007210570853203535,
|
|
"fcm_dpo/delta": 0.03220885246992111,
|
|
"fcm_dpo/margin": 51.060340881347656,
|
|
"fcm_dpo/q_t": 0.41571545600891113,
|
|
"grad_norm": 17.996444702148438,
|
|
"learning_rate": 1.1931899453216697e-07,
|
|
"logits/chosen": 1.7104898691177368,
|
|
"logits/rejected": 1.6818902492523193,
|
|
"logps/chosen": -161.86837768554688,
|
|
"logps/ref_chosen": -75.93031311035156,
|
|
"logps/ref_rejected": -92.26559448242188,
|
|
"logps/rejected": -229.2639923095703,
|
|
"loss": 1.1197,
|
|
"margin_dpo/margin_mean": 51.060340881347656,
|
|
"margin_dpo/margin_std": 71.57931518554688,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 0.7105064247921391,
|
|
"fcm_dpo/beta": 0.0072533609345555305,
|
|
"fcm_dpo/delta": -0.014294777996838093,
|
|
"fcm_dpo/margin": 57.032936096191406,
|
|
"fcm_dpo/q_t": 0.40608060359954834,
|
|
"grad_norm": 6.177210330963135,
|
|
"learning_rate": 1.1819363309737438e-07,
|
|
"logits/chosen": 1.6594834327697754,
|
|
"logits/rejected": 1.4640038013458252,
|
|
"logps/chosen": -148.8472900390625,
|
|
"logps/ref_chosen": -65.86345672607422,
|
|
"logps/ref_rejected": -85.89832305908203,
|
|
"logps/rejected": -225.91510009765625,
|
|
"loss": 1.1063,
|
|
"margin_dpo/margin_mean": 57.032936096191406,
|
|
"margin_dpo/margin_std": 86.32967376708984,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.7120181405895691,
|
|
"fcm_dpo/beta": 0.007146833464503288,
|
|
"fcm_dpo/delta": -0.09884218871593475,
|
|
"fcm_dpo/margin": 69.11367797851562,
|
|
"fcm_dpo/q_t": 0.3864142894744873,
|
|
"grad_norm": 7.910089015960693,
|
|
"learning_rate": 1.1707195857000215e-07,
|
|
"logits/chosen": 1.5441298484802246,
|
|
"logits/rejected": 1.4003360271453857,
|
|
"logps/chosen": -151.22935485839844,
|
|
"logps/ref_chosen": -74.3460922241211,
|
|
"logps/ref_rejected": -93.43672943115234,
|
|
"logps/rejected": -239.4336700439453,
|
|
"loss": 1.0383,
|
|
"margin_dpo/margin_mean": 69.11367797851562,
|
|
"margin_dpo/margin_std": 84.86093139648438,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 0.7135298563869993,
|
|
"fcm_dpo/beta": 0.007175215519964695,
|
|
"fcm_dpo/delta": 0.021523576229810715,
|
|
"fcm_dpo/margin": 52.75737380981445,
|
|
"fcm_dpo/q_t": 0.41504496335983276,
|
|
"grad_norm": 7.508319854736328,
|
|
"learning_rate": 1.1595400232569768e-07,
|
|
"logits/chosen": 1.6507457494735718,
|
|
"logits/rejected": 1.5339549779891968,
|
|
"logps/chosen": -153.89007568359375,
|
|
"logps/ref_chosen": -74.75674438476562,
|
|
"logps/ref_rejected": -95.18183135986328,
|
|
"logps/rejected": -227.07254028320312,
|
|
"loss": 1.1488,
|
|
"margin_dpo/margin_mean": 52.75737380981445,
|
|
"margin_dpo/margin_std": 92.27010345458984,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 0.7150415721844293,
|
|
"fcm_dpo/beta": 0.00716636935248971,
|
|
"fcm_dpo/delta": 0.0196013655513525,
|
|
"fcm_dpo/margin": 53.183753967285156,
|
|
"fcm_dpo/q_t": 0.41339337825775146,
|
|
"grad_norm": 13.5819091796875,
|
|
"learning_rate": 1.1483979563610069e-07,
|
|
"logits/chosen": 1.4470967054367065,
|
|
"logits/rejected": 1.1765843629837036,
|
|
"logps/chosen": -144.18069458007812,
|
|
"logps/ref_chosen": -71.65933227539062,
|
|
"logps/ref_rejected": -109.99200439453125,
|
|
"logps/rejected": -235.69711303710938,
|
|
"loss": 1.1334,
|
|
"margin_dpo/margin_mean": 53.183753967285156,
|
|
"margin_dpo/margin_std": 87.66374206542969,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 0.7165532879818595,
|
|
"fcm_dpo/beta": 0.0072361379861831665,
|
|
"fcm_dpo/delta": 0.07070795446634293,
|
|
"fcm_dpo/margin": 45.83869934082031,
|
|
"fcm_dpo/q_t": 0.42501258850097656,
|
|
"grad_norm": 10.908045768737793,
|
|
"learning_rate": 1.1372936966796709e-07,
|
|
"logits/chosen": 1.7690794467926025,
|
|
"logits/rejected": 1.540644884109497,
|
|
"logps/chosen": -154.1907958984375,
|
|
"logps/ref_chosen": -65.91990661621094,
|
|
"logps/ref_rejected": -89.09432983398438,
|
|
"logps/rejected": -223.20391845703125,
|
|
"loss": 1.1675,
|
|
"margin_dpo/margin_mean": 45.83869934082031,
|
|
"margin_dpo/margin_std": 83.50460815429688,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 0.7180650037792895,
|
|
"fcm_dpo/beta": 0.007119235582649708,
|
|
"fcm_dpo/delta": -0.13208799064159393,
|
|
"fcm_dpo/margin": 73.72787475585938,
|
|
"fcm_dpo/q_t": 0.37819790840148926,
|
|
"grad_norm": 6.977424621582031,
|
|
"learning_rate": 1.126227554822985e-07,
|
|
"logits/chosen": 1.305135726928711,
|
|
"logits/rejected": 1.2358264923095703,
|
|
"logps/chosen": -163.15887451171875,
|
|
"logps/ref_chosen": -79.02459716796875,
|
|
"logps/ref_rejected": -107.33058166503906,
|
|
"logps/rejected": -265.1927185058594,
|
|
"loss": 0.9995,
|
|
"margin_dpo/margin_mean": 73.72787475585938,
|
|
"margin_dpo/margin_std": 77.44041442871094,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.7195767195767195,
|
|
"fcm_dpo/beta": 0.0071355425752699375,
|
|
"fcm_dpo/delta": 0.04274769499897957,
|
|
"fcm_dpo/margin": 50.27285385131836,
|
|
"fcm_dpo/q_t": 0.4180905520915985,
|
|
"grad_norm": 6.6651201248168945,
|
|
"learning_rate": 1.1151998403347243e-07,
|
|
"logits/chosen": 1.3839824199676514,
|
|
"logits/rejected": 1.3616328239440918,
|
|
"logps/chosen": -186.7384033203125,
|
|
"logps/ref_chosen": -93.72602844238281,
|
|
"logps/ref_rejected": -94.390625,
|
|
"logps/rejected": -237.67584228515625,
|
|
"loss": 1.1399,
|
|
"margin_dpo/margin_mean": 50.272850036621094,
|
|
"margin_dpo/margin_std": 82.08091735839844,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 0.7210884353741497,
|
|
"fcm_dpo/beta": 0.007278554607182741,
|
|
"fcm_dpo/delta": 0.12292856723070145,
|
|
"fcm_dpo/margin": 38.535186767578125,
|
|
"fcm_dpo/q_t": 0.43708232045173645,
|
|
"grad_norm": 7.889428615570068,
|
|
"learning_rate": 1.1042108616837692e-07,
|
|
"logits/chosen": 1.58128023147583,
|
|
"logits/rejected": 1.484431266784668,
|
|
"logps/chosen": -189.0590057373047,
|
|
"logps/ref_chosen": -76.51399993896484,
|
|
"logps/ref_rejected": -99.14356231689453,
|
|
"logps/rejected": -250.2237548828125,
|
|
"loss": 1.2394,
|
|
"margin_dpo/margin_mean": 38.535194396972656,
|
|
"margin_dpo/margin_std": 94.74784088134766,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 0.7226001511715797,
|
|
"fcm_dpo/beta": 0.007414321415126324,
|
|
"fcm_dpo/delta": 0.09388057887554169,
|
|
"fcm_dpo/margin": 41.69120788574219,
|
|
"fcm_dpo/q_t": 0.43154817819595337,
|
|
"grad_norm": 9.232916831970215,
|
|
"learning_rate": 1.0932609262554746e-07,
|
|
"logits/chosen": 1.4031165838241577,
|
|
"logits/rejected": 1.3811558485031128,
|
|
"logps/chosen": -168.47891235351562,
|
|
"logps/ref_chosen": -77.95186614990234,
|
|
"logps/ref_rejected": -69.77754211425781,
|
|
"logps/rejected": -201.99581909179688,
|
|
"loss": 1.2187,
|
|
"margin_dpo/margin_mean": 41.69120788574219,
|
|
"margin_dpo/margin_std": 95.53366088867188,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 0.7241118669690099,
|
|
"fcm_dpo/beta": 0.007548708468675613,
|
|
"fcm_dpo/delta": 0.08790592849254608,
|
|
"fcm_dpo/margin": 41.721351623535156,
|
|
"fcm_dpo/q_t": 0.4289689362049103,
|
|
"grad_norm": 9.109953880310059,
|
|
"learning_rate": 1.0823503403430734e-07,
|
|
"logits/chosen": 1.3494288921356201,
|
|
"logits/rejected": 1.2263519763946533,
|
|
"logps/chosen": -164.5086212158203,
|
|
"logps/ref_chosen": -76.56551361083984,
|
|
"logps/ref_rejected": -84.33758544921875,
|
|
"logps/rejected": -214.00204467773438,
|
|
"loss": 1.2174,
|
|
"margin_dpo/margin_mean": 41.721351623535156,
|
|
"margin_dpo/margin_std": 95.05142211914062,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 0.7256235827664399,
|
|
"fcm_dpo/beta": 0.007575510535389185,
|
|
"fcm_dpo/delta": -0.04130330681800842,
|
|
"fcm_dpo/margin": 57.996116638183594,
|
|
"fcm_dpo/q_t": 0.4007851481437683,
|
|
"grad_norm": 11.577788352966309,
|
|
"learning_rate": 1.0714794091391072e-07,
|
|
"logits/chosen": 1.257918119430542,
|
|
"logits/rejected": 1.2538151741027832,
|
|
"logps/chosen": -165.85855102539062,
|
|
"logps/ref_chosen": -80.15884399414062,
|
|
"logps/ref_rejected": -84.88697814941406,
|
|
"logps/rejected": -228.58279418945312,
|
|
"loss": 1.1053,
|
|
"margin_dpo/margin_mean": 57.996116638183594,
|
|
"margin_dpo/margin_std": 90.42449951171875,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.72713529856387,
|
|
"fcm_dpo/beta": 0.007553757634013891,
|
|
"fcm_dpo/delta": 0.018801141530275345,
|
|
"fcm_dpo/margin": 50.558876037597656,
|
|
"fcm_dpo/q_t": 0.41458386182785034,
|
|
"grad_norm": 8.55683422088623,
|
|
"learning_rate": 1.0606484367268906e-07,
|
|
"logits/chosen": 1.2538020610809326,
|
|
"logits/rejected": 1.2463691234588623,
|
|
"logps/chosen": -175.15777587890625,
|
|
"logps/ref_chosen": -84.56254577636719,
|
|
"logps/ref_rejected": -90.06451416015625,
|
|
"logps/rejected": -231.2186279296875,
|
|
"loss": 1.1431,
|
|
"margin_dpo/margin_mean": 50.558876037597656,
|
|
"margin_dpo/margin_std": 88.14126586914062,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 0.7286470143613001,
|
|
"fcm_dpo/beta": 0.007597423158586025,
|
|
"fcm_dpo/delta": 0.01751277782022953,
|
|
"fcm_dpo/margin": 50.41091537475586,
|
|
"fcm_dpo/q_t": 0.415740966796875,
|
|
"grad_norm": 6.092764377593994,
|
|
"learning_rate": 1.0498577260720048e-07,
|
|
"logits/chosen": 1.6070960760116577,
|
|
"logits/rejected": 1.2131727933883667,
|
|
"logps/chosen": -184.42752075195312,
|
|
"logps/ref_chosen": -78.88141632080078,
|
|
"logps/ref_rejected": -125.41990661621094,
|
|
"logps/rejected": -281.3769226074219,
|
|
"loss": 1.1738,
|
|
"margin_dpo/margin_mean": 50.41091537475586,
|
|
"margin_dpo/margin_std": 100.31005859375,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 0.7301587301587301,
|
|
"fcm_dpo/beta": 0.007529154419898987,
|
|
"fcm_dpo/delta": -0.043737735599279404,
|
|
"fcm_dpo/margin": 58.672149658203125,
|
|
"fcm_dpo/q_t": 0.39993715286254883,
|
|
"grad_norm": 10.599023818969727,
|
|
"learning_rate": 1.0391075790138232e-07,
|
|
"logits/chosen": 1.8194961547851562,
|
|
"logits/rejected": 1.504716396331787,
|
|
"logps/chosen": -157.94146728515625,
|
|
"logps/ref_chosen": -72.690185546875,
|
|
"logps/ref_rejected": -98.37237548828125,
|
|
"logps/rejected": -242.29580688476562,
|
|
"loss": 1.0924,
|
|
"margin_dpo/margin_mean": 58.672149658203125,
|
|
"margin_dpo/margin_std": 87.27351379394531,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 0.7316704459561603,
|
|
"fcm_dpo/beta": 0.007634430192410946,
|
|
"fcm_dpo/delta": 0.08491643518209457,
|
|
"fcm_dpo/margin": 41.58033752441406,
|
|
"fcm_dpo/q_t": 0.42818331718444824,
|
|
"grad_norm": 7.452567100524902,
|
|
"learning_rate": 1.0283982962570681e-07,
|
|
"logits/chosen": 1.5895819664001465,
|
|
"logits/rejected": 1.5464513301849365,
|
|
"logps/chosen": -158.65072631835938,
|
|
"logps/ref_chosen": -73.98435974121094,
|
|
"logps/ref_rejected": -89.99178314208984,
|
|
"logps/rejected": -216.23846435546875,
|
|
"loss": 1.1752,
|
|
"margin_dpo/margin_mean": 41.58033752441406,
|
|
"margin_dpo/margin_std": 76.23760986328125,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 0.7331821617535903,
|
|
"fcm_dpo/beta": 0.0076524196192622185,
|
|
"fcm_dpo/delta": 0.046526044607162476,
|
|
"fcm_dpo/margin": 46.33942413330078,
|
|
"fcm_dpo/q_t": 0.4194195866584778,
|
|
"grad_norm": 7.961450099945068,
|
|
"learning_rate": 1.0177301773633992e-07,
|
|
"logits/chosen": 1.7761842012405396,
|
|
"logits/rejected": 1.681747555732727,
|
|
"logps/chosen": -166.32699584960938,
|
|
"logps/ref_chosen": -78.0927963256836,
|
|
"logps/ref_rejected": -89.14010620117188,
|
|
"logps/rejected": -223.7137451171875,
|
|
"loss": 1.1499,
|
|
"margin_dpo/margin_mean": 46.33942413330078,
|
|
"margin_dpo/margin_std": 77.6761474609375,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.7346938775510204,
|
|
"fcm_dpo/beta": 0.007767094299197197,
|
|
"fcm_dpo/delta": 0.02391296997666359,
|
|
"fcm_dpo/margin": 48.51192092895508,
|
|
"fcm_dpo/q_t": 0.41680556535720825,
|
|
"grad_norm": 6.9542646408081055,
|
|
"learning_rate": 1.007103520743035e-07,
|
|
"logits/chosen": 1.6766993999481201,
|
|
"logits/rejected": 1.3588069677352905,
|
|
"logps/chosen": -170.6450653076172,
|
|
"logps/ref_chosen": -73.74685668945312,
|
|
"logps/ref_rejected": -107.752685546875,
|
|
"logps/rejected": -253.16281127929688,
|
|
"loss": 1.1663,
|
|
"margin_dpo/margin_mean": 48.51192092895508,
|
|
"margin_dpo/margin_std": 93.03460693359375,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 0.7362055933484505,
|
|
"fcm_dpo/beta": 0.007746942341327667,
|
|
"fcm_dpo/delta": -0.003563001286238432,
|
|
"fcm_dpo/margin": 52.0731201171875,
|
|
"fcm_dpo/q_t": 0.41003572940826416,
|
|
"grad_norm": 7.556940078735352,
|
|
"learning_rate": 9.965186236464046e-08,
|
|
"logits/chosen": 1.468240737915039,
|
|
"logits/rejected": 1.282604455947876,
|
|
"logps/chosen": -176.71240234375,
|
|
"logps/ref_chosen": -79.57780456542969,
|
|
"logps/ref_rejected": -102.2916259765625,
|
|
"logps/rejected": -251.4993438720703,
|
|
"loss": 1.1183,
|
|
"margin_dpo/margin_mean": 52.0731201171875,
|
|
"margin_dpo/margin_std": 83.100341796875,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 0.7377173091458806,
|
|
"fcm_dpo/beta": 0.007788753602653742,
|
|
"fcm_dpo/delta": 0.0015588030219078064,
|
|
"fcm_dpo/margin": 51.109127044677734,
|
|
"fcm_dpo/q_t": 0.4141284227371216,
|
|
"grad_norm": 20.479114532470703,
|
|
"learning_rate": 9.859757821558337e-08,
|
|
"logits/chosen": 1.504908561706543,
|
|
"logits/rejected": 1.3210365772247314,
|
|
"logps/chosen": -168.25746154785156,
|
|
"logps/ref_chosen": -80.62767791748047,
|
|
"logps/ref_rejected": -100.4541015625,
|
|
"logps/rejected": -239.19302368164062,
|
|
"loss": 1.1584,
|
|
"margin_dpo/margin_mean": 51.10912322998047,
|
|
"margin_dpo/margin_std": 96.0980453491211,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 0.7392290249433107,
|
|
"fcm_dpo/beta": 0.0077533237636089325,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 24.302478790283203,
|
|
"fcm_dpo/q_t": 0.4580850899219513,
|
|
"grad_norm": 13.592652320861816,
|
|
"learning_rate": 9.754752911772615e-08,
|
|
"logits/chosen": 1.2843654155731201,
|
|
"logits/rejected": 1.170314073562622,
|
|
"logps/chosen": -185.48910522460938,
|
|
"logps/ref_chosen": -85.39521026611328,
|
|
"logps/ref_rejected": -101.97309875488281,
|
|
"logps/rejected": -226.36947631835938,
|
|
"loss": 1.2946,
|
|
"margin_dpo/margin_mean": 24.302478790283203,
|
|
"margin_dpo/margin_std": 78.64244079589844,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 0.7407407407407407,
|
|
"fcm_dpo/beta": 0.007696731481701136,
|
|
"fcm_dpo/delta": -0.017715483903884888,
|
|
"fcm_dpo/margin": 54.10951614379883,
|
|
"fcm_dpo/q_t": 0.4072234034538269,
|
|
"grad_norm": 9.231451034545898,
|
|
"learning_rate": 9.650174444319956e-08,
|
|
"logits/chosen": 1.945656418800354,
|
|
"logits/rejected": 1.905264973640442,
|
|
"logps/chosen": -161.941650390625,
|
|
"logps/ref_chosen": -77.75590515136719,
|
|
"logps/ref_rejected": -88.98885345458984,
|
|
"logps/rejected": -227.2841033935547,
|
|
"loss": 1.1338,
|
|
"margin_dpo/margin_mean": 54.10951614379883,
|
|
"margin_dpo/margin_std": 93.16372680664062,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.7422524565381708,
|
|
"fcm_dpo/beta": 0.007663640193641186,
|
|
"fcm_dpo/delta": -0.013870414346456528,
|
|
"fcm_dpo/margin": 53.826045989990234,
|
|
"fcm_dpo/q_t": 0.40752020478248596,
|
|
"grad_norm": 8.06557559967041,
|
|
"learning_rate": 9.546025344484868e-08,
|
|
"logits/chosen": 1.3035305738449097,
|
|
"logits/rejected": 1.1545779705047607,
|
|
"logps/chosen": -157.3299560546875,
|
|
"logps/ref_chosen": -74.33360290527344,
|
|
"logps/ref_rejected": -91.4105224609375,
|
|
"logps/rejected": -228.23292541503906,
|
|
"loss": 1.1018,
|
|
"margin_dpo/margin_mean": 53.826053619384766,
|
|
"margin_dpo/margin_std": 78.12969970703125,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 0.7437641723356009,
|
|
"fcm_dpo/beta": 0.00771558191627264,
|
|
"fcm_dpo/delta": 0.01436400692909956,
|
|
"fcm_dpo/margin": 36.33604431152344,
|
|
"fcm_dpo/q_t": 0.4382849335670471,
|
|
"grad_norm": 12.463248252868652,
|
|
"learning_rate": 9.442308525541589e-08,
|
|
"logits/chosen": 1.6404715776443481,
|
|
"logits/rejected": 1.3996877670288086,
|
|
"logps/chosen": -193.6622314453125,
|
|
"logps/ref_chosen": -85.14178466796875,
|
|
"logps/ref_rejected": -103.44204711914062,
|
|
"logps/rejected": -248.29855346679688,
|
|
"loss": 1.2292,
|
|
"margin_dpo/margin_mean": 36.33604431152344,
|
|
"margin_dpo/margin_std": 85.1468734741211,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 0.745275888133031,
|
|
"fcm_dpo/beta": 0.007784358225762844,
|
|
"fcm_dpo/delta": 0.026980683207511902,
|
|
"fcm_dpo/margin": 48.0078010559082,
|
|
"fcm_dpo/q_t": 0.41681572794914246,
|
|
"grad_norm": 9.322619438171387,
|
|
"learning_rate": 9.339026888672468e-08,
|
|
"logits/chosen": 1.6487233638763428,
|
|
"logits/rejected": 1.4259631633758545,
|
|
"logps/chosen": -165.03854370117188,
|
|
"logps/ref_chosen": -75.81439208984375,
|
|
"logps/ref_rejected": -95.30766296386719,
|
|
"logps/rejected": -232.53961181640625,
|
|
"loss": 1.175,
|
|
"margin_dpo/margin_mean": 48.00779724121094,
|
|
"margin_dpo/margin_std": 94.79415130615234,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 0.7467876039304611,
|
|
"fcm_dpo/beta": 0.007813823409378529,
|
|
"fcm_dpo/delta": 0.060058847069740295,
|
|
"fcm_dpo/margin": 43.76633071899414,
|
|
"fcm_dpo/q_t": 0.4248165488243103,
|
|
"grad_norm": 10.274767875671387,
|
|
"learning_rate": 9.236183322886945e-08,
|
|
"logits/chosen": 1.3294415473937988,
|
|
"logits/rejected": 1.2298643589019775,
|
|
"logps/chosen": -182.89959716796875,
|
|
"logps/ref_chosen": -93.83562469482422,
|
|
"logps/ref_rejected": -112.21142578125,
|
|
"logps/rejected": -245.04171752929688,
|
|
"loss": 1.1956,
|
|
"margin_dpo/margin_mean": 43.766326904296875,
|
|
"margin_dpo/margin_std": 92.66915893554688,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 0.7482993197278912,
|
|
"fcm_dpo/beta": 0.007944008335471153,
|
|
"fcm_dpo/delta": 0.050161540508270264,
|
|
"fcm_dpo/margin": 44.22233200073242,
|
|
"fcm_dpo/q_t": 0.4226170778274536,
|
|
"grad_norm": 14.485008239746094,
|
|
"learning_rate": 9.133780704940594e-08,
|
|
"logits/chosen": 1.640815258026123,
|
|
"logits/rejected": 1.4514687061309814,
|
|
"logps/chosen": -155.20675659179688,
|
|
"logps/ref_chosen": -68.52467346191406,
|
|
"logps/ref_rejected": -89.65379333496094,
|
|
"logps/rejected": -220.55819702148438,
|
|
"loss": 1.1922,
|
|
"margin_dpo/margin_mean": 44.22233581542969,
|
|
"margin_dpo/margin_std": 91.24566650390625,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.7498110355253212,
|
|
"fcm_dpo/beta": 0.007922867313027382,
|
|
"fcm_dpo/delta": -0.0013457629829645157,
|
|
"fcm_dpo/margin": 50.63785934448242,
|
|
"fcm_dpo/q_t": 0.41145336627960205,
|
|
"grad_norm": 7.610332489013672,
|
|
"learning_rate": 9.031821899254797e-08,
|
|
"logits/chosen": 1.7609965801239014,
|
|
"logits/rejected": 1.4800307750701904,
|
|
"logps/chosen": -164.26663208007812,
|
|
"logps/ref_chosen": -73.13618469238281,
|
|
"logps/ref_rejected": -111.50930786132812,
|
|
"logps/rejected": -253.27760314941406,
|
|
"loss": 1.1438,
|
|
"margin_dpo/margin_mean": 50.63785934448242,
|
|
"margin_dpo/margin_std": 89.98391723632812,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 0.7513227513227513,
|
|
"fcm_dpo/beta": 0.007826825603842735,
|
|
"fcm_dpo/delta": -0.06445329636335373,
|
|
"fcm_dpo/margin": 58.869712829589844,
|
|
"fcm_dpo/q_t": 0.39559072256088257,
|
|
"grad_norm": 12.443891525268555,
|
|
"learning_rate": 8.930309757836516e-08,
|
|
"logits/chosen": 1.779658555984497,
|
|
"logits/rejected": 1.6461775302886963,
|
|
"logps/chosen": -184.48745727539062,
|
|
"logps/ref_chosen": -88.71475219726562,
|
|
"logps/ref_rejected": -105.74935913085938,
|
|
"logps/rejected": -260.39178466796875,
|
|
"loss": 1.0692,
|
|
"margin_dpo/margin_mean": 58.869712829589844,
|
|
"margin_dpo/margin_std": 79.18601989746094,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 0.7528344671201814,
|
|
"fcm_dpo/beta": 0.007826920598745346,
|
|
"fcm_dpo/delta": -0.021468058228492737,
|
|
"fcm_dpo/margin": 53.729766845703125,
|
|
"fcm_dpo/q_t": 0.4065396189689636,
|
|
"grad_norm": 6.615968227386475,
|
|
"learning_rate": 8.829247120198563e-08,
|
|
"logits/chosen": 1.4853155612945557,
|
|
"logits/rejected": 1.4078007936477661,
|
|
"logps/chosen": -163.5498046875,
|
|
"logps/ref_chosen": -83.3353271484375,
|
|
"logps/ref_rejected": -89.34941864013672,
|
|
"logps/rejected": -223.29367065429688,
|
|
"loss": 1.1105,
|
|
"margin_dpo/margin_mean": 53.729766845703125,
|
|
"margin_dpo/margin_std": 84.90679931640625,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 0.7543461829176115,
|
|
"fcm_dpo/beta": 0.007880712859332561,
|
|
"fcm_dpo/delta": 0.05969448387622833,
|
|
"fcm_dpo/margin": 43.433326721191406,
|
|
"fcm_dpo/q_t": 0.425603449344635,
|
|
"grad_norm": 10.157211303710938,
|
|
"learning_rate": 8.728636813280163e-08,
|
|
"logits/chosen": 1.4775807857513428,
|
|
"logits/rejected": 1.2861871719360352,
|
|
"logps/chosen": -172.53701782226562,
|
|
"logps/ref_chosen": -79.373779296875,
|
|
"logps/ref_rejected": -104.62533569335938,
|
|
"logps/rejected": -241.22189331054688,
|
|
"loss": 1.2216,
|
|
"margin_dpo/margin_mean": 43.433326721191406,
|
|
"margin_dpo/margin_std": 101.27740478515625,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 0.7558578987150416,
|
|
"fcm_dpo/beta": 0.007957592606544495,
|
|
"fcm_dpo/delta": 0.006692651659250259,
|
|
"fcm_dpo/margin": 49.34884262084961,
|
|
"fcm_dpo/q_t": 0.41281697154045105,
|
|
"grad_norm": 8.680094718933105,
|
|
"learning_rate": 8.628481651367875e-08,
|
|
"logits/chosen": 1.2721607685089111,
|
|
"logits/rejected": 1.2470263242721558,
|
|
"logps/chosen": -177.0205078125,
|
|
"logps/ref_chosen": -85.953857421875,
|
|
"logps/ref_rejected": -90.40995788574219,
|
|
"logps/rejected": -230.82546997070312,
|
|
"loss": 1.1309,
|
|
"margin_dpo/margin_mean": 49.34884262084961,
|
|
"margin_dpo/margin_std": 80.7742919921875,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.7558578987150416,
|
|
"eval_fcm_dpo/beta": 0.007908526808023453,
|
|
"eval_logits/chosen": 1.5455379486083984,
|
|
"eval_logits/rejected": 1.4084484577178955,
|
|
"eval_logps/chosen": -169.92417907714844,
|
|
"eval_logps/ref_chosen": -86.90177917480469,
|
|
"eval_logps/ref_rejected": -96.69639587402344,
|
|
"eval_logps/rejected": -230.74034118652344,
|
|
"eval_loss": 0.5676109790802002,
|
|
"eval_margin_dpo/margin_mean": 51.021575927734375,
|
|
"eval_margin_dpo/margin_std": 86.49874114990234,
|
|
"eval_runtime": 42.2831,
|
|
"eval_samples_per_second": 54.466,
|
|
"eval_steps_per_second": 1.703,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.7573696145124716,
|
|
"fcm_dpo/beta": 0.008018089458346367,
|
|
"fcm_dpo/delta": 0.06569428741931915,
|
|
"fcm_dpo/margin": 41.89936065673828,
|
|
"fcm_dpo/q_t": 0.4245343804359436,
|
|
"grad_norm": 7.522352695465088,
|
|
"learning_rate": 8.528784436016878e-08,
|
|
"logits/chosen": 1.4151782989501953,
|
|
"logits/rejected": 1.424743413925171,
|
|
"logps/chosen": -168.38230895996094,
|
|
"logps/ref_chosen": -81.22268676757812,
|
|
"logps/ref_rejected": -86.97892761230469,
|
|
"logps/rejected": -216.03790283203125,
|
|
"loss": 1.1462,
|
|
"margin_dpo/margin_mean": 41.89936065673828,
|
|
"margin_dpo/margin_std": 66.22032165527344,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 0.7588813303099018,
|
|
"fcm_dpo/beta": 0.008128372952342033,
|
|
"fcm_dpo/delta": 0.08888862282037735,
|
|
"fcm_dpo/margin": 38.60469436645508,
|
|
"fcm_dpo/q_t": 0.42894744873046875,
|
|
"grad_norm": 10.359253883361816,
|
|
"learning_rate": 8.4295479559726e-08,
|
|
"logits/chosen": 1.2615420818328857,
|
|
"logits/rejected": 1.1646403074264526,
|
|
"logps/chosen": -168.2417755126953,
|
|
"logps/ref_chosen": -83.1567611694336,
|
|
"logps/ref_rejected": -106.74440002441406,
|
|
"logps/rejected": -230.4341278076172,
|
|
"loss": 1.1725,
|
|
"margin_dpo/margin_mean": 38.604698181152344,
|
|
"margin_dpo/margin_std": 69.67572784423828,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 0.7603930461073318,
|
|
"fcm_dpo/beta": 0.008050322532653809,
|
|
"fcm_dpo/delta": -0.08986638486385345,
|
|
"fcm_dpo/margin": 60.310672760009766,
|
|
"fcm_dpo/q_t": 0.3902174234390259,
|
|
"grad_norm": 7.535645484924316,
|
|
"learning_rate": 8.330774987092712e-08,
|
|
"logits/chosen": 1.522834062576294,
|
|
"logits/rejected": 1.5648481845855713,
|
|
"logps/chosen": -146.69717407226562,
|
|
"logps/ref_chosen": -68.51583862304688,
|
|
"logps/ref_rejected": -75.02178955078125,
|
|
"logps/rejected": -213.5137939453125,
|
|
"loss": 1.0616,
|
|
"margin_dpo/margin_mean": 60.31067657470703,
|
|
"margin_dpo/margin_std": 82.89733123779297,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 0.7619047619047619,
|
|
"fcm_dpo/beta": 0.007913358509540558,
|
|
"fcm_dpo/delta": -0.12686683237552643,
|
|
"fcm_dpo/margin": 65.74114990234375,
|
|
"fcm_dpo/q_t": 0.3832342326641083,
|
|
"grad_norm": 7.837214469909668,
|
|
"learning_rate": 8.232468292269479e-08,
|
|
"logits/chosen": 1.6095514297485352,
|
|
"logits/rejected": 1.5772470235824585,
|
|
"logps/chosen": -164.8587646484375,
|
|
"logps/ref_chosen": -85.15829467773438,
|
|
"logps/ref_rejected": -96.16879272460938,
|
|
"logps/rejected": -241.6103973388672,
|
|
"loss": 1.0304,
|
|
"margin_dpo/margin_mean": 65.74114990234375,
|
|
"margin_dpo/margin_std": 80.77653503417969,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 0.763416477702192,
|
|
"fcm_dpo/beta": 0.007828344590961933,
|
|
"fcm_dpo/delta": 0.011198666878044605,
|
|
"fcm_dpo/margin": 34.941402435302734,
|
|
"fcm_dpo/q_t": 0.4389588236808777,
|
|
"grad_norm": 18.425174713134766,
|
|
"learning_rate": 8.134630621352483e-08,
|
|
"logits/chosen": 1.3816214799880981,
|
|
"logits/rejected": 1.2274703979492188,
|
|
"logps/chosen": -167.62496948242188,
|
|
"logps/ref_chosen": -79.26185607910156,
|
|
"logps/ref_rejected": -96.34947967529297,
|
|
"logps/rejected": -219.65399169921875,
|
|
"loss": 1.2728,
|
|
"margin_dpo/margin_mean": 34.941402435302734,
|
|
"margin_dpo/margin_std": 98.05575561523438,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.764928193499622,
|
|
"fcm_dpo/beta": 0.007884200662374496,
|
|
"fcm_dpo/delta": 0.059605300426483154,
|
|
"fcm_dpo/margin": 43.43827819824219,
|
|
"fcm_dpo/q_t": 0.42400920391082764,
|
|
"grad_norm": 8.992940902709961,
|
|
"learning_rate": 8.037264711071698e-08,
|
|
"logits/chosen": 1.498244047164917,
|
|
"logits/rejected": 1.4761946201324463,
|
|
"logps/chosen": -175.23486328125,
|
|
"logps/ref_chosen": -88.192626953125,
|
|
"logps/ref_rejected": -100.86880493164062,
|
|
"logps/rejected": -231.3493194580078,
|
|
"loss": 1.2296,
|
|
"margin_dpo/margin_mean": 43.43827819824219,
|
|
"margin_dpo/margin_std": 104.13943481445312,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 0.7664399092970522,
|
|
"fcm_dpo/beta": 0.00795232504606247,
|
|
"fcm_dpo/delta": 0.02266060933470726,
|
|
"fcm_dpo/margin": 47.55640411376953,
|
|
"fcm_dpo/q_t": 0.41735711693763733,
|
|
"grad_norm": 7.954833507537842,
|
|
"learning_rate": 7.940373284960933e-08,
|
|
"logits/chosen": 1.1958003044128418,
|
|
"logits/rejected": 1.0543439388275146,
|
|
"logps/chosen": -181.73443603515625,
|
|
"logps/ref_chosen": -86.04632568359375,
|
|
"logps/ref_rejected": -111.44412994384766,
|
|
"logps/rejected": -254.68865966796875,
|
|
"loss": 1.1584,
|
|
"margin_dpo/margin_mean": 47.5564079284668,
|
|
"margin_dpo/margin_std": 88.17842102050781,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 0.7679516250944822,
|
|
"fcm_dpo/beta": 0.007975887507200241,
|
|
"fcm_dpo/delta": 0.0014539193361997604,
|
|
"fcm_dpo/margin": 49.96595001220703,
|
|
"fcm_dpo/q_t": 0.4134010374546051,
|
|
"grad_norm": 9.42202091217041,
|
|
"learning_rate": 7.843959053281663e-08,
|
|
"logits/chosen": 1.2900254726409912,
|
|
"logits/rejected": 0.9407453536987305,
|
|
"logps/chosen": -164.1695556640625,
|
|
"logps/ref_chosen": -79.25038146972656,
|
|
"logps/ref_rejected": -118.49089813232422,
|
|
"logps/rejected": -253.37603759765625,
|
|
"loss": 1.1535,
|
|
"margin_dpo/margin_mean": 49.96595001220703,
|
|
"margin_dpo/margin_std": 93.14352416992188,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 0.7694633408919124,
|
|
"fcm_dpo/beta": 0.007849051617085934,
|
|
"fcm_dpo/delta": -0.06434465199708939,
|
|
"fcm_dpo/margin": 58.69524383544922,
|
|
"fcm_dpo/q_t": 0.39460235834121704,
|
|
"grad_norm": 8.181747436523438,
|
|
"learning_rate": 7.748024712947204e-08,
|
|
"logits/chosen": 1.2476528882980347,
|
|
"logits/rejected": 1.179711103439331,
|
|
"logps/chosen": -157.0579376220703,
|
|
"logps/ref_chosen": -80.7039566040039,
|
|
"logps/ref_rejected": -90.50444793701172,
|
|
"logps/rejected": -225.55368041992188,
|
|
"loss": 1.0765,
|
|
"margin_dpo/margin_mean": 58.69524002075195,
|
|
"margin_dpo/margin_std": 82.17289733886719,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 0.7709750566893424,
|
|
"fcm_dpo/beta": 0.0077163660898804665,
|
|
"fcm_dpo/delta": -0.0882585346698761,
|
|
"fcm_dpo/margin": 62.59996795654297,
|
|
"fcm_dpo/q_t": 0.39215773344039917,
|
|
"grad_norm": 7.753228187561035,
|
|
"learning_rate": 7.652572947447272e-08,
|
|
"logits/chosen": 1.672702670097351,
|
|
"logits/rejected": 1.4109150171279907,
|
|
"logps/chosen": -145.37037658691406,
|
|
"logps/ref_chosen": -67.64491271972656,
|
|
"logps/ref_rejected": -108.92274475097656,
|
|
"logps/rejected": -249.2481689453125,
|
|
"loss": 1.0962,
|
|
"margin_dpo/margin_mean": 62.59996795654297,
|
|
"margin_dpo/margin_std": 98.73306274414062,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.7724867724867724,
|
|
"fcm_dpo/beta": 0.00761351129040122,
|
|
"fcm_dpo/delta": -0.11252570152282715,
|
|
"fcm_dpo/margin": 66.58784484863281,
|
|
"fcm_dpo/q_t": 0.3877149224281311,
|
|
"grad_norm": 8.772534370422363,
|
|
"learning_rate": 7.557606426772961e-08,
|
|
"logits/chosen": 1.4028170108795166,
|
|
"logits/rejected": 1.2960667610168457,
|
|
"logps/chosen": -158.74371337890625,
|
|
"logps/ref_chosen": -75.66263580322266,
|
|
"logps/ref_rejected": -104.26296997070312,
|
|
"logps/rejected": -253.931884765625,
|
|
"loss": 1.0694,
|
|
"margin_dpo/margin_mean": 66.58784484863281,
|
|
"margin_dpo/margin_std": 97.47923278808594,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 0.7739984882842026,
|
|
"fcm_dpo/beta": 0.00763990543782711,
|
|
"fcm_dpo/delta": 0.09289787709712982,
|
|
"fcm_dpo/margin": 40.58736038208008,
|
|
"fcm_dpo/q_t": 0.43213844299316406,
|
|
"grad_norm": 9.161316871643066,
|
|
"learning_rate": 7.463127807341966e-08,
|
|
"logits/chosen": 1.1914927959442139,
|
|
"logits/rejected": 1.1217591762542725,
|
|
"logps/chosen": -163.9389190673828,
|
|
"logps/ref_chosen": -79.31925964355469,
|
|
"logps/ref_rejected": -82.22052001953125,
|
|
"logps/rejected": -207.4275360107422,
|
|
"loss": 1.2055,
|
|
"margin_dpo/margin_mean": 40.58736038208008,
|
|
"margin_dpo/margin_std": 87.80073547363281,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 0.7755102040816326,
|
|
"fcm_dpo/beta": 0.007657117675989866,
|
|
"fcm_dpo/delta": 0.0008791796863079071,
|
|
"fcm_dpo/margin": 52.076133728027344,
|
|
"fcm_dpo/q_t": 0.4106717109680176,
|
|
"grad_norm": 7.0931172370910645,
|
|
"learning_rate": 7.369139731924401e-08,
|
|
"logits/chosen": 1.6813411712646484,
|
|
"logits/rejected": 1.5264880657196045,
|
|
"logps/chosen": -152.16217041015625,
|
|
"logps/ref_chosen": -72.02534484863281,
|
|
"logps/ref_rejected": -86.56224060058594,
|
|
"logps/rejected": -218.77520751953125,
|
|
"loss": 1.1117,
|
|
"margin_dpo/margin_mean": 52.07612991333008,
|
|
"margin_dpo/margin_std": 78.48776245117188,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 0.7770219198790628,
|
|
"fcm_dpo/beta": 0.007597388233989477,
|
|
"fcm_dpo/delta": -0.09755103290081024,
|
|
"fcm_dpo/margin": 64.87118530273438,
|
|
"fcm_dpo/q_t": 0.3874030113220215,
|
|
"grad_norm": 8.100028038024902,
|
|
"learning_rate": 7.275644829568747e-08,
|
|
"logits/chosen": 1.5403776168823242,
|
|
"logits/rejected": 1.4848439693450928,
|
|
"logps/chosen": -169.60714721679688,
|
|
"logps/ref_chosen": -84.94093322753906,
|
|
"logps/ref_rejected": -102.44367980957031,
|
|
"logps/rejected": -251.98109436035156,
|
|
"loss": 1.0548,
|
|
"margin_dpo/margin_mean": 64.87118530273438,
|
|
"margin_dpo/margin_std": 87.02154541015625,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 0.7785336356764928,
|
|
"fcm_dpo/beta": 0.007553492672741413,
|
|
"fcm_dpo/delta": 0.01082646381109953,
|
|
"fcm_dpo/margin": 51.578346252441406,
|
|
"fcm_dpo/q_t": 0.41361895203590393,
|
|
"grad_norm": 13.685517311096191,
|
|
"learning_rate": 7.182645715528435e-08,
|
|
"logits/chosen": 1.8065004348754883,
|
|
"logits/rejected": 1.5909342765808105,
|
|
"logps/chosen": -162.07525634765625,
|
|
"logps/ref_chosen": -72.9662094116211,
|
|
"logps/ref_rejected": -102.53651428222656,
|
|
"logps/rejected": -243.22390747070312,
|
|
"loss": 1.1433,
|
|
"margin_dpo/margin_mean": 51.57834243774414,
|
|
"margin_dpo/margin_std": 90.6058349609375,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.780045351473923,
|
|
"fcm_dpo/beta": 0.0076246303506195545,
|
|
"fcm_dpo/delta": 0.048703186213970184,
|
|
"fcm_dpo/margin": 46.287757873535156,
|
|
"fcm_dpo/q_t": 0.41831153631210327,
|
|
"grad_norm": 15.306377410888672,
|
|
"learning_rate": 7.090144991188568e-08,
|
|
"logits/chosen": 1.5644421577453613,
|
|
"logits/rejected": 1.3997728824615479,
|
|
"logps/chosen": -148.07150268554688,
|
|
"logps/ref_chosen": -76.63414001464844,
|
|
"logps/ref_rejected": -91.01750183105469,
|
|
"logps/rejected": -208.74264526367188,
|
|
"loss": 1.1623,
|
|
"margin_dpo/margin_mean": 46.28776168823242,
|
|
"margin_dpo/margin_std": 84.75020599365234,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 0.781557067271353,
|
|
"fcm_dpo/beta": 0.007668118923902512,
|
|
"fcm_dpo/delta": 0.044410329312086105,
|
|
"fcm_dpo/margin": 31.92813491821289,
|
|
"fcm_dpo/q_t": 0.44710972905158997,
|
|
"grad_norm": 8.951191902160645,
|
|
"learning_rate": 6.998145243993284e-08,
|
|
"logits/chosen": 1.6496453285217285,
|
|
"logits/rejected": 1.6587225198745728,
|
|
"logps/chosen": -174.9427032470703,
|
|
"logps/ref_chosen": -77.06817626953125,
|
|
"logps/ref_rejected": -80.048583984375,
|
|
"logps/rejected": -209.8512420654297,
|
|
"loss": 1.2711,
|
|
"margin_dpo/margin_mean": 31.928133010864258,
|
|
"margin_dpo/margin_std": 91.1561279296875,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 0.783068783068783,
|
|
"fcm_dpo/beta": 0.00768995750695467,
|
|
"fcm_dpo/delta": -0.0029140058904886246,
|
|
"fcm_dpo/margin": 52.375755310058594,
|
|
"fcm_dpo/q_t": 0.4096565544605255,
|
|
"grad_norm": 9.834497451782227,
|
|
"learning_rate": 6.906649047373245e-08,
|
|
"logits/chosen": 1.4923443794250488,
|
|
"logits/rejected": 1.3131781816482544,
|
|
"logps/chosen": -149.75588989257812,
|
|
"logps/ref_chosen": -78.69026184082031,
|
|
"logps/ref_rejected": -97.58124542236328,
|
|
"logps/rejected": -221.02264404296875,
|
|
"loss": 1.1372,
|
|
"margin_dpo/margin_mean": 52.375755310058594,
|
|
"margin_dpo/margin_std": 90.88835144042969,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 0.7845804988662132,
|
|
"fcm_dpo/beta": 0.007714019622653723,
|
|
"fcm_dpo/delta": 0.02117512747645378,
|
|
"fcm_dpo/margin": 31.934837341308594,
|
|
"fcm_dpo/q_t": 0.4449980854988098,
|
|
"grad_norm": 8.429184913635254,
|
|
"learning_rate": 6.815658960673781e-08,
|
|
"logits/chosen": 1.5910195112228394,
|
|
"logits/rejected": 1.467621088027954,
|
|
"logps/chosen": -176.11871337890625,
|
|
"logps/ref_chosen": -78.35087585449219,
|
|
"logps/ref_rejected": -95.79212188720703,
|
|
"logps/rejected": -225.4947967529297,
|
|
"loss": 1.3203,
|
|
"margin_dpo/margin_mean": 31.934839248657227,
|
|
"margin_dpo/margin_std": 107.92922973632812,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 0.7860922146636432,
|
|
"fcm_dpo/beta": 0.007716222666203976,
|
|
"fcm_dpo/delta": 0.03524667024612427,
|
|
"fcm_dpo/margin": 47.34004211425781,
|
|
"fcm_dpo/q_t": 0.41651666164398193,
|
|
"grad_norm": 8.534330368041992,
|
|
"learning_rate": 6.725177529083209e-08,
|
|
"logits/chosen": 1.6108531951904297,
|
|
"logits/rejected": 1.4225859642028809,
|
|
"logps/chosen": -163.86770629882812,
|
|
"logps/ref_chosen": -80.40513610839844,
|
|
"logps/ref_rejected": -93.02791595458984,
|
|
"logps/rejected": -223.83053588867188,
|
|
"loss": 1.1325,
|
|
"margin_dpo/margin_mean": 47.34003829956055,
|
|
"margin_dpo/margin_std": 72.44415283203125,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.7876039304610734,
|
|
"fcm_dpo/beta": 0.007807170040905476,
|
|
"fcm_dpo/delta": -0.018284276127815247,
|
|
"fcm_dpo/margin": 53.414710998535156,
|
|
"fcm_dpo/q_t": 0.4060171842575073,
|
|
"grad_norm": 8.058144569396973,
|
|
"learning_rate": 6.63520728356167e-08,
|
|
"logits/chosen": 1.2749577760696411,
|
|
"logits/rejected": 1.0321365594863892,
|
|
"logps/chosen": -174.77325439453125,
|
|
"logps/ref_chosen": -86.5218276977539,
|
|
"logps/ref_rejected": -109.20257568359375,
|
|
"logps/rejected": -250.86871337890625,
|
|
"loss": 1.1165,
|
|
"margin_dpo/margin_mean": 53.414710998535156,
|
|
"margin_dpo/margin_std": 84.53794860839844,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 0.7891156462585034,
|
|
"fcm_dpo/beta": 0.007875899784266949,
|
|
"fcm_dpo/delta": 0.10553185641765594,
|
|
"fcm_dpo/margin": 37.79329299926758,
|
|
"fcm_dpo/q_t": 0.4356454014778137,
|
|
"grad_norm": 9.020870208740234,
|
|
"learning_rate": 6.545750740770336e-08,
|
|
"logits/chosen": 1.2026581764221191,
|
|
"logits/rejected": 1.2267944812774658,
|
|
"logps/chosen": -168.311767578125,
|
|
"logps/ref_chosen": -78.24254608154297,
|
|
"logps/ref_rejected": -85.23554992675781,
|
|
"logps/rejected": -213.09805297851562,
|
|
"loss": 1.2329,
|
|
"margin_dpo/margin_mean": 37.793296813964844,
|
|
"margin_dpo/margin_std": 92.2118911743164,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 0.7906273620559335,
|
|
"fcm_dpo/beta": 0.007950318977236748,
|
|
"fcm_dpo/delta": 0.054845184087753296,
|
|
"fcm_dpo/margin": 43.630802154541016,
|
|
"fcm_dpo/q_t": 0.4217304587364197,
|
|
"grad_norm": 10.220833778381348,
|
|
"learning_rate": 6.456810403001012e-08,
|
|
"logits/chosen": 1.435563087463379,
|
|
"logits/rejected": 1.1382131576538086,
|
|
"logps/chosen": -179.95175170898438,
|
|
"logps/ref_chosen": -83.50096893310547,
|
|
"logps/ref_rejected": -117.45217895507812,
|
|
"logps/rejected": -257.53375244140625,
|
|
"loss": 1.1755,
|
|
"margin_dpo/margin_mean": 43.63079833984375,
|
|
"margin_dpo/margin_std": 83.78480529785156,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 0.7921390778533636,
|
|
"fcm_dpo/beta": 0.008069904521107674,
|
|
"fcm_dpo/delta": 0.020586442202329636,
|
|
"fcm_dpo/margin": 47.05378723144531,
|
|
"fcm_dpo/q_t": 0.41369175910949707,
|
|
"grad_norm": 8.774334907531738,
|
|
"learning_rate": 6.368388758106134e-08,
|
|
"logits/chosen": 1.3084217309951782,
|
|
"logits/rejected": 1.2617387771606445,
|
|
"logps/chosen": -165.7652130126953,
|
|
"logps/ref_chosen": -93.22590637207031,
|
|
"logps/ref_rejected": -108.17863464355469,
|
|
"logps/rejected": -227.771728515625,
|
|
"loss": 1.1598,
|
|
"margin_dpo/margin_mean": 47.05378723144531,
|
|
"margin_dpo/margin_std": 87.1861343383789,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 0.7936507936507936,
|
|
"fcm_dpo/beta": 0.008036179468035698,
|
|
"fcm_dpo/delta": -0.003930669277906418,
|
|
"fcm_dpo/margin": 30.131698608398438,
|
|
"fcm_dpo/q_t": 0.4459991455078125,
|
|
"grad_norm": 10.040858268737793,
|
|
"learning_rate": 6.280488279429185e-08,
|
|
"logits/chosen": 0.972198486328125,
|
|
"logits/rejected": 0.968452513217926,
|
|
"logps/chosen": -189.3087158203125,
|
|
"logps/ref_chosen": -94.08831787109375,
|
|
"logps/ref_rejected": -100.682373046875,
|
|
"logps/rejected": -226.03445434570312,
|
|
"loss": 1.2665,
|
|
"margin_dpo/margin_mean": 30.131698608398438,
|
|
"margin_dpo/margin_std": 81.79655456542969,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.7951625094482238,
|
|
"fcm_dpo/beta": 0.007959752343595028,
|
|
"fcm_dpo/delta": -0.047779276967048645,
|
|
"fcm_dpo/margin": 39.6178092956543,
|
|
"fcm_dpo/q_t": 0.42807871103286743,
|
|
"grad_norm": 12.535789489746094,
|
|
"learning_rate": 6.193111425735515e-08,
|
|
"logits/chosen": 1.7268145084381104,
|
|
"logits/rejected": 1.5319491624832153,
|
|
"logps/chosen": -172.51361083984375,
|
|
"logps/ref_chosen": -77.78373718261719,
|
|
"logps/ref_rejected": -100.29583740234375,
|
|
"logps/rejected": -234.64352416992188,
|
|
"loss": 1.1984,
|
|
"margin_dpo/margin_mean": 39.61780548095703,
|
|
"margin_dpo/margin_std": 80.48246765136719,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 0.7966742252456538,
|
|
"fcm_dpo/beta": 0.008134608156979084,
|
|
"fcm_dpo/delta": 0.12869685888290405,
|
|
"fcm_dpo/margin": 33.732025146484375,
|
|
"fcm_dpo/q_t": 0.4389396607875824,
|
|
"grad_norm": 12.428853988647461,
|
|
"learning_rate": 6.106260641143546e-08,
|
|
"logits/chosen": 1.9265590906143188,
|
|
"logits/rejected": 1.6606621742248535,
|
|
"logps/chosen": -182.11387634277344,
|
|
"logps/ref_chosen": -76.695068359375,
|
|
"logps/ref_rejected": -107.68281555175781,
|
|
"logps/rejected": -246.83364868164062,
|
|
"loss": 1.2233,
|
|
"margin_dpo/margin_mean": 33.732025146484375,
|
|
"margin_dpo/margin_std": 77.11158752441406,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 0.7981859410430839,
|
|
"fcm_dpo/beta": 0.008256562054157257,
|
|
"fcm_dpo/delta": 0.07902608066797256,
|
|
"fcm_dpo/margin": 39.19126892089844,
|
|
"fcm_dpo/q_t": 0.42815977334976196,
|
|
"grad_norm": 13.860108375549316,
|
|
"learning_rate": 6.019938355056422e-08,
|
|
"logits/chosen": 1.4714868068695068,
|
|
"logits/rejected": 1.2811636924743652,
|
|
"logps/chosen": -157.07546997070312,
|
|
"logps/ref_chosen": -75.0361328125,
|
|
"logps/ref_rejected": -94.67579650878906,
|
|
"logps/rejected": -215.90640258789062,
|
|
"loss": 1.2167,
|
|
"margin_dpo/margin_mean": 39.19126892089844,
|
|
"margin_dpo/margin_std": 90.16360473632812,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 0.799697656840514,
|
|
"fcm_dpo/beta": 0.007970629259943962,
|
|
"fcm_dpo/delta": -0.23854364454746246,
|
|
"fcm_dpo/margin": 78.05899810791016,
|
|
"fcm_dpo/q_t": 0.3612556457519531,
|
|
"grad_norm": 5.713718414306641,
|
|
"learning_rate": 5.934146982094049e-08,
|
|
"logits/chosen": 1.3859180212020874,
|
|
"logits/rejected": 1.2408912181854248,
|
|
"logps/chosen": -147.29428100585938,
|
|
"logps/ref_chosen": -72.84869384765625,
|
|
"logps/ref_rejected": -93.25855255126953,
|
|
"logps/rejected": -245.76315307617188,
|
|
"loss": 0.979,
|
|
"margin_dpo/margin_mean": 78.05899810791016,
|
|
"margin_dpo/margin_std": 91.38536071777344,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 0.8012093726379441,
|
|
"fcm_dpo/beta": 0.007819782942533493,
|
|
"fcm_dpo/delta": -0.05217093229293823,
|
|
"fcm_dpo/margin": 57.46356964111328,
|
|
"fcm_dpo/q_t": 0.4001220464706421,
|
|
"grad_norm": 7.648416996002197,
|
|
"learning_rate": 5.848888922025552e-08,
|
|
"logits/chosen": 1.516282558441162,
|
|
"logits/rejected": 1.4072870016098022,
|
|
"logps/chosen": -155.42422485351562,
|
|
"logps/ref_chosen": -79.4971694946289,
|
|
"logps/ref_rejected": -93.59564208984375,
|
|
"logps/rejected": -226.98626708984375,
|
|
"loss": 1.0785,
|
|
"margin_dpo/margin_mean": 57.46356201171875,
|
|
"margin_dpo/margin_std": 80.2945327758789,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.8027210884353742,
|
|
"fcm_dpo/beta": 0.007811452262103558,
|
|
"fcm_dpo/delta": -0.022111359983682632,
|
|
"fcm_dpo/margin": 41.554649353027344,
|
|
"fcm_dpo/q_t": 0.42616134881973267,
|
|
"grad_norm": 7.592843532562256,
|
|
"learning_rate": 5.7641665597021435e-08,
|
|
"logits/chosen": 1.5034434795379639,
|
|
"logits/rejected": 1.340606689453125,
|
|
"logps/chosen": -156.27935791015625,
|
|
"logps/ref_chosen": -69.45396423339844,
|
|
"logps/ref_rejected": -96.30017852783203,
|
|
"logps/rejected": -224.68020629882812,
|
|
"loss": 1.1692,
|
|
"margin_dpo/margin_mean": 41.554649353027344,
|
|
"margin_dpo/margin_std": 73.87443542480469,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 0.8042328042328042,
|
|
"fcm_dpo/beta": 0.007778765633702278,
|
|
"fcm_dpo/delta": 0.014322098344564438,
|
|
"fcm_dpo/margin": 49.61630630493164,
|
|
"fcm_dpo/q_t": 0.41433119773864746,
|
|
"grad_norm": 8.264182090759277,
|
|
"learning_rate": 5.679982264990424e-08,
|
|
"logits/chosen": 1.2624887228012085,
|
|
"logits/rejected": 1.1222734451293945,
|
|
"logps/chosen": -179.19329833984375,
|
|
"logps/ref_chosen": -76.52011108398438,
|
|
"logps/ref_rejected": -94.79593658447266,
|
|
"logps/rejected": -247.08544921875,
|
|
"loss": 1.1447,
|
|
"margin_dpo/margin_mean": 49.61630630493164,
|
|
"margin_dpo/margin_std": 86.81497192382812,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 0.8057445200302343,
|
|
"fcm_dpo/beta": 0.007856892421841621,
|
|
"fcm_dpo/delta": 0.01767205074429512,
|
|
"fcm_dpo/margin": 48.72636032104492,
|
|
"fcm_dpo/q_t": 0.4150225520133972,
|
|
"grad_norm": 9.463761329650879,
|
|
"learning_rate": 5.596338392706076e-08,
|
|
"logits/chosen": 1.8482627868652344,
|
|
"logits/rejected": 1.6290550231933594,
|
|
"logps/chosen": -143.453369140625,
|
|
"logps/ref_chosen": -72.31800842285156,
|
|
"logps/ref_rejected": -89.26652526855469,
|
|
"logps/rejected": -209.12826538085938,
|
|
"loss": 1.1506,
|
|
"margin_dpo/margin_mean": 48.72636032104492,
|
|
"margin_dpo/margin_std": 87.32180786132812,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 0.8072562358276644,
|
|
"fcm_dpo/beta": 0.007759833708405495,
|
|
"fcm_dpo/delta": -0.03004516288638115,
|
|
"fcm_dpo/margin": 55.12841033935547,
|
|
"fcm_dpo/q_t": 0.40312954783439636,
|
|
"grad_norm": 8.84256649017334,
|
|
"learning_rate": 5.513237282548033e-08,
|
|
"logits/chosen": 1.4910259246826172,
|
|
"logits/rejected": 1.4467054605484009,
|
|
"logps/chosen": -148.83465576171875,
|
|
"logps/ref_chosen": -77.87559509277344,
|
|
"logps/ref_rejected": -92.21171569824219,
|
|
"logps/rejected": -218.29917907714844,
|
|
"loss": 1.1012,
|
|
"margin_dpo/margin_mean": 55.12840270996094,
|
|
"margin_dpo/margin_std": 81.7762451171875,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 0.8087679516250945,
|
|
"fcm_dpo/beta": 0.00789577979594469,
|
|
"fcm_dpo/delta": 0.0677485466003418,
|
|
"fcm_dpo/margin": 42.331016540527344,
|
|
"fcm_dpo/q_t": 0.42601895332336426,
|
|
"grad_norm": 7.4685587882995605,
|
|
"learning_rate": 5.430681259032957e-08,
|
|
"logits/chosen": 1.137467622756958,
|
|
"logits/rejected": 0.9851101636886597,
|
|
"logps/chosen": -168.10491943359375,
|
|
"logps/ref_chosen": -78.16358184814453,
|
|
"logps/ref_rejected": -97.78164672851562,
|
|
"logps/rejected": -230.0540008544922,
|
|
"loss": 1.1951,
|
|
"margin_dpo/margin_mean": 42.331016540527344,
|
|
"margin_dpo/margin_std": 88.60259246826172,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.8102796674225246,
|
|
"fcm_dpo/beta": 0.0078198853880167,
|
|
"fcm_dpo/delta": -0.08114311099052429,
|
|
"fcm_dpo/margin": 61.04181671142578,
|
|
"fcm_dpo/q_t": 0.3922014832496643,
|
|
"grad_norm": 6.651211738586426,
|
|
"learning_rate": 5.3486726314303175e-08,
|
|
"logits/chosen": 1.4360030889511108,
|
|
"logits/rejected": 1.2131242752075195,
|
|
"logps/chosen": -145.51263427734375,
|
|
"logps/ref_chosen": -66.65623474121094,
|
|
"logps/ref_rejected": -89.49085998535156,
|
|
"logps/rejected": -229.38906860351562,
|
|
"loss": 1.0404,
|
|
"margin_dpo/margin_mean": 61.04182434082031,
|
|
"margin_dpo/margin_std": 73.56864166259766,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 0.8117913832199547,
|
|
"fcm_dpo/beta": 0.007776736747473478,
|
|
"fcm_dpo/delta": 0.02339194528758526,
|
|
"fcm_dpo/margin": 48.52098846435547,
|
|
"fcm_dpo/q_t": 0.4167477488517761,
|
|
"grad_norm": 6.910667896270752,
|
|
"learning_rate": 5.267213693697695e-08,
|
|
"logits/chosen": 1.7525105476379395,
|
|
"logits/rejected": 1.4899015426635742,
|
|
"logps/chosen": -174.97885131835938,
|
|
"logps/ref_chosen": -74.99390411376953,
|
|
"logps/ref_rejected": -110.6627197265625,
|
|
"logps/rejected": -259.16864013671875,
|
|
"loss": 1.1461,
|
|
"margin_dpo/margin_mean": 48.52098846435547,
|
|
"margin_dpo/margin_std": 84.7010498046875,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 0.8133030990173847,
|
|
"fcm_dpo/beta": 0.007822012528777122,
|
|
"fcm_dpo/delta": -0.037601351737976074,
|
|
"fcm_dpo/margin": 55.653079986572266,
|
|
"fcm_dpo/q_t": 0.4035933315753937,
|
|
"grad_norm": 6.632546424865723,
|
|
"learning_rate": 5.1863067244167144e-08,
|
|
"logits/chosen": 1.4656171798706055,
|
|
"logits/rejected": 1.3803694248199463,
|
|
"logps/chosen": -179.07177734375,
|
|
"logps/ref_chosen": -87.61151123046875,
|
|
"logps/ref_rejected": -98.1150131225586,
|
|
"logps/rejected": -245.22833251953125,
|
|
"loss": 1.1038,
|
|
"margin_dpo/margin_mean": 55.653079986572266,
|
|
"margin_dpo/margin_std": 84.92758178710938,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 0.8148148148148148,
|
|
"fcm_dpo/beta": 0.007802900858223438,
|
|
"fcm_dpo/delta": 0.05218294635415077,
|
|
"fcm_dpo/margin": 44.81316375732422,
|
|
"fcm_dpo/q_t": 0.4222569763660431,
|
|
"grad_norm": 8.919053077697754,
|
|
"learning_rate": 5.105953986729195e-08,
|
|
"logits/chosen": 1.5954413414001465,
|
|
"logits/rejected": 1.256216049194336,
|
|
"logps/chosen": -169.81942749023438,
|
|
"logps/ref_chosen": -78.86482238769531,
|
|
"logps/ref_rejected": -100.84349822998047,
|
|
"logps/rejected": -236.61126708984375,
|
|
"loss": 1.1518,
|
|
"margin_dpo/margin_mean": 44.813167572021484,
|
|
"margin_dpo/margin_std": 77.50193786621094,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 0.8163265306122449,
|
|
"fcm_dpo/beta": 0.007814684882760048,
|
|
"fcm_dpo/delta": -0.043207671493291855,
|
|
"fcm_dpo/margin": 56.455596923828125,
|
|
"fcm_dpo/q_t": 0.4015369117259979,
|
|
"grad_norm": 7.1346659660339355,
|
|
"learning_rate": 5.026157728273966e-08,
|
|
"logits/chosen": 1.431809425354004,
|
|
"logits/rejected": 1.1509695053100586,
|
|
"logps/chosen": -171.71527099609375,
|
|
"logps/ref_chosen": -83.66409301757812,
|
|
"logps/ref_rejected": -114.8860092163086,
|
|
"logps/rejected": -259.39276123046875,
|
|
"loss": 1.1013,
|
|
"margin_dpo/margin_mean": 56.45559310913086,
|
|
"margin_dpo/margin_std": 87.17050170898438,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.817838246409675,
|
|
"fcm_dpo/beta": 0.007632635533809662,
|
|
"fcm_dpo/delta": -0.09920863062143326,
|
|
"fcm_dpo/margin": 64.70645141601562,
|
|
"fcm_dpo/q_t": 0.38797062635421753,
|
|
"grad_norm": 6.952167510986328,
|
|
"learning_rate": 4.9469201811239035e-08,
|
|
"logits/chosen": 1.3315198421478271,
|
|
"logits/rejected": 1.3776156902313232,
|
|
"logps/chosen": -161.97076416015625,
|
|
"logps/ref_chosen": -83.12225341796875,
|
|
"logps/ref_rejected": -74.80526733398438,
|
|
"logps/rejected": -218.3602294921875,
|
|
"loss": 1.0381,
|
|
"margin_dpo/margin_mean": 64.70645141601562,
|
|
"margin_dpo/margin_std": 79.60298156738281,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 0.8193499622071051,
|
|
"fcm_dpo/beta": 0.007459321990609169,
|
|
"fcm_dpo/delta": -0.1281038075685501,
|
|
"fcm_dpo/margin": 69.86752319335938,
|
|
"fcm_dpo/q_t": 0.38251811265945435,
|
|
"grad_norm": 6.959683895111084,
|
|
"learning_rate": 4.868243561723534e-08,
|
|
"logits/chosen": 1.6677457094192505,
|
|
"logits/rejected": 1.5448417663574219,
|
|
"logps/chosen": -135.40603637695312,
|
|
"logps/ref_chosen": -66.3132553100586,
|
|
"logps/ref_rejected": -83.24588012695312,
|
|
"logps/rejected": -222.2061767578125,
|
|
"loss": 1.033,
|
|
"margin_dpo/margin_mean": 69.86752319335938,
|
|
"margin_dpo/margin_std": 89.15379333496094,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 0.8208616780045351,
|
|
"fcm_dpo/beta": 0.0074135130271315575,
|
|
"fcm_dpo/delta": -0.023007860407233238,
|
|
"fcm_dpo/margin": 56.92228317260742,
|
|
"fcm_dpo/q_t": 0.4051409065723419,
|
|
"grad_norm": 8.201580047607422,
|
|
"learning_rate": 4.790130070827028e-08,
|
|
"logits/chosen": 1.3210530281066895,
|
|
"logits/rejected": 1.05372953414917,
|
|
"logps/chosen": -150.39297485351562,
|
|
"logps/ref_chosen": -68.11429595947266,
|
|
"logps/ref_rejected": -94.62380981445312,
|
|
"logps/rejected": -233.8247833251953,
|
|
"loss": 1.1143,
|
|
"margin_dpo/margin_mean": 56.92228698730469,
|
|
"margin_dpo/margin_std": 91.03446960449219,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 0.8223733938019653,
|
|
"fcm_dpo/beta": 0.0073410142213106155,
|
|
"fcm_dpo/delta": -0.03817659243941307,
|
|
"fcm_dpo/margin": 59.45600891113281,
|
|
"fcm_dpo/q_t": 0.4023340344429016,
|
|
"grad_norm": 7.93362283706665,
|
|
"learning_rate": 4.7125818934366454e-08,
|
|
"logits/chosen": 1.6760622262954712,
|
|
"logits/rejected": 1.4469034671783447,
|
|
"logps/chosen": -167.59967041015625,
|
|
"logps/ref_chosen": -81.187255859375,
|
|
"logps/ref_rejected": -105.84722900390625,
|
|
"logps/rejected": -251.71563720703125,
|
|
"loss": 1.0968,
|
|
"margin_dpo/margin_mean": 59.45600891113281,
|
|
"margin_dpo/margin_std": 89.44548034667969,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 0.8238851095993953,
|
|
"fcm_dpo/beta": 0.007404550909996033,
|
|
"fcm_dpo/delta": 0.08629242330789566,
|
|
"fcm_dpo/margin": 42.74488067626953,
|
|
"fcm_dpo/q_t": 0.4291386604309082,
|
|
"grad_norm": 7.565622329711914,
|
|
"learning_rate": 4.635601198741607e-08,
|
|
"logits/chosen": 1.3802976608276367,
|
|
"logits/rejected": 1.1980688571929932,
|
|
"logps/chosen": -170.48873901367188,
|
|
"logps/ref_chosen": -78.81717681884766,
|
|
"logps/ref_rejected": -98.65876770019531,
|
|
"logps/rejected": -233.07521057128906,
|
|
"loss": 1.1938,
|
|
"margin_dpo/margin_mean": 42.74488067626953,
|
|
"margin_dpo/margin_std": 86.98738098144531,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.8253968253968254,
|
|
"fcm_dpo/beta": 0.0075421445071697235,
|
|
"fcm_dpo/delta": 0.05457151308655739,
|
|
"fcm_dpo/margin": 46.011558532714844,
|
|
"fcm_dpo/q_t": 0.4226982593536377,
|
|
"grad_norm": 7.30482816696167,
|
|
"learning_rate": 4.559190140057428e-08,
|
|
"logits/chosen": 1.5349187850952148,
|
|
"logits/rejected": 1.5369720458984375,
|
|
"logps/chosen": -154.75668334960938,
|
|
"logps/ref_chosen": -74.2529296875,
|
|
"logps/ref_rejected": -80.32308959960938,
|
|
"logps/rejected": -206.83840942382812,
|
|
"loss": 1.1535,
|
|
"margin_dpo/margin_mean": 46.011558532714844,
|
|
"margin_dpo/margin_std": 78.93978881835938,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 0.8269085411942555,
|
|
"fcm_dpo/beta": 0.007430747617036104,
|
|
"fcm_dpo/delta": -0.08842149376869202,
|
|
"fcm_dpo/margin": 65.12631225585938,
|
|
"fcm_dpo/q_t": 0.38987237215042114,
|
|
"grad_norm": 9.399640083312988,
|
|
"learning_rate": 4.483350854765672e-08,
|
|
"logits/chosen": 1.1584597826004028,
|
|
"logits/rejected": 0.9976880550384521,
|
|
"logps/chosen": -144.39535522460938,
|
|
"logps/ref_chosen": -69.9368896484375,
|
|
"logps/ref_rejected": -90.25672912597656,
|
|
"logps/rejected": -229.84152221679688,
|
|
"loss": 1.058,
|
|
"margin_dpo/margin_mean": 65.12631225585938,
|
|
"margin_dpo/margin_std": 86.3273696899414,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 0.8284202569916855,
|
|
"fcm_dpo/beta": 0.007527903653681278,
|
|
"fcm_dpo/delta": 0.07334420830011368,
|
|
"fcm_dpo/margin": 43.605491638183594,
|
|
"fcm_dpo/q_t": 0.42615681886672974,
|
|
"grad_norm": 11.522804260253906,
|
|
"learning_rate": 4.4080854642541826e-08,
|
|
"logits/chosen": 1.3264061212539673,
|
|
"logits/rejected": 1.1371994018554688,
|
|
"logps/chosen": -176.6475067138672,
|
|
"logps/ref_chosen": -81.1605224609375,
|
|
"logps/ref_rejected": -99.7246322631836,
|
|
"logps/rejected": -238.81710815429688,
|
|
"loss": 1.1772,
|
|
"margin_dpo/margin_mean": 43.60548782348633,
|
|
"margin_dpo/margin_std": 82.16632080078125,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 0.8299319727891157,
|
|
"fcm_dpo/beta": 0.007619916927069426,
|
|
"fcm_dpo/delta": 0.0813249796628952,
|
|
"fcm_dpo/margin": 42.149147033691406,
|
|
"fcm_dpo/q_t": 0.4282255172729492,
|
|
"grad_norm": 14.586856842041016,
|
|
"learning_rate": 4.333396073857723e-08,
|
|
"logits/chosen": 1.9198331832885742,
|
|
"logits/rejected": 1.7107844352722168,
|
|
"logps/chosen": -170.18838500976562,
|
|
"logps/ref_chosen": -80.49800872802734,
|
|
"logps/ref_rejected": -113.20750427246094,
|
|
"logps/rejected": -245.04702758789062,
|
|
"loss": 1.2076,
|
|
"margin_dpo/margin_mean": 42.149147033691406,
|
|
"margin_dpo/margin_std": 92.20016479492188,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 0.8314436885865457,
|
|
"fcm_dpo/beta": 0.007620522286742926,
|
|
"fcm_dpo/delta": -0.01459870021790266,
|
|
"fcm_dpo/margin": 40.88159942626953,
|
|
"fcm_dpo/q_t": 0.42959415912628174,
|
|
"grad_norm": 11.19546127319336,
|
|
"learning_rate": 4.259284772799099e-08,
|
|
"logits/chosen": 1.6284606456756592,
|
|
"logits/rejected": 1.5507255792617798,
|
|
"logps/chosen": -167.57029724121094,
|
|
"logps/ref_chosen": -75.13760375976562,
|
|
"logps/ref_rejected": -79.04876708984375,
|
|
"logps/rejected": -212.36305236816406,
|
|
"loss": 1.2136,
|
|
"margin_dpo/margin_mean": 40.881595611572266,
|
|
"margin_dpo/margin_std": 90.40715789794922,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.8329554043839759,
|
|
"fcm_dpo/beta": 0.007725295145064592,
|
|
"fcm_dpo/delta": 0.06760243326425552,
|
|
"fcm_dpo/margin": 43.256893157958984,
|
|
"fcm_dpo/q_t": 0.42407017946243286,
|
|
"grad_norm": 13.680648803710938,
|
|
"learning_rate": 4.1857536341307176e-08,
|
|
"logits/chosen": 1.9498487710952759,
|
|
"logits/rejected": 1.7900187969207764,
|
|
"logps/chosen": -177.50527954101562,
|
|
"logps/ref_chosen": -85.4496078491211,
|
|
"logps/ref_rejected": -103.48530578613281,
|
|
"logps/rejected": -238.7978515625,
|
|
"loss": 1.1569,
|
|
"margin_dpo/margin_mean": 43.256893157958984,
|
|
"margin_dpo/margin_std": 74.33500671386719,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 0.8344671201814059,
|
|
"fcm_dpo/beta": 0.007839495316147804,
|
|
"fcm_dpo/delta": 0.06313008815050125,
|
|
"fcm_dpo/margin": 43.128273010253906,
|
|
"fcm_dpo/q_t": 0.42163771390914917,
|
|
"grad_norm": 8.0436372756958,
|
|
"learning_rate": 4.112804714676593e-08,
|
|
"logits/chosen": 1.6295115947723389,
|
|
"logits/rejected": 1.4652934074401855,
|
|
"logps/chosen": -169.71463012695312,
|
|
"logps/ref_chosen": -82.01036071777344,
|
|
"logps/ref_rejected": -101.61884307861328,
|
|
"logps/rejected": -232.45138549804688,
|
|
"loss": 1.1854,
|
|
"margin_dpo/margin_mean": 43.128273010253906,
|
|
"margin_dpo/margin_std": 85.137451171875,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 0.8359788359788359,
|
|
"fcm_dpo/beta": 0.007808818481862545,
|
|
"fcm_dpo/delta": -0.0008383337408304214,
|
|
"fcm_dpo/margin": 51.32048797607422,
|
|
"fcm_dpo/q_t": 0.41101810336112976,
|
|
"grad_norm": 7.586859703063965,
|
|
"learning_rate": 4.0404400549748144e-08,
|
|
"logits/chosen": 1.6329729557037354,
|
|
"logits/rejected": 1.3663992881774902,
|
|
"logps/chosen": -168.96517944335938,
|
|
"logps/ref_chosen": -73.81416320800781,
|
|
"logps/ref_rejected": -104.27050018310547,
|
|
"logps/rejected": -250.7420196533203,
|
|
"loss": 1.1658,
|
|
"margin_dpo/margin_mean": 51.32048797607422,
|
|
"margin_dpo/margin_std": 98.8353271484375,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 0.8374905517762661,
|
|
"fcm_dpo/beta": 0.007749027572572231,
|
|
"fcm_dpo/delta": -0.03996382653713226,
|
|
"fcm_dpo/margin": 56.50690460205078,
|
|
"fcm_dpo/q_t": 0.4017537832260132,
|
|
"grad_norm": 8.198256492614746,
|
|
"learning_rate": 3.968661679220467e-08,
|
|
"logits/chosen": 1.1512682437896729,
|
|
"logits/rejected": 1.1083042621612549,
|
|
"logps/chosen": -169.98992919921875,
|
|
"logps/ref_chosen": -81.43980407714844,
|
|
"logps/ref_rejected": -89.32518005371094,
|
|
"logps/rejected": -234.3822021484375,
|
|
"loss": 1.105,
|
|
"margin_dpo/margin_mean": 56.50690460205078,
|
|
"margin_dpo/margin_std": 87.52005767822266,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 0.8390022675736961,
|
|
"fcm_dpo/beta": 0.007797827012836933,
|
|
"fcm_dpo/delta": -0.008863821625709534,
|
|
"fcm_dpo/margin": 52.293487548828125,
|
|
"fcm_dpo/q_t": 0.4080876410007477,
|
|
"grad_norm": 7.238057613372803,
|
|
"learning_rate": 3.89747159520904e-08,
|
|
"logits/chosen": 1.528789758682251,
|
|
"logits/rejected": 1.4679286479949951,
|
|
"logps/chosen": -178.78054809570312,
|
|
"logps/ref_chosen": -81.66071319580078,
|
|
"logps/ref_rejected": -87.20857238769531,
|
|
"logps/rejected": -236.6219024658203,
|
|
"loss": 1.154,
|
|
"margin_dpo/margin_mean": 52.293487548828125,
|
|
"margin_dpo/margin_std": 95.19200134277344,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.8405139833711263,
|
|
"fcm_dpo/beta": 0.00775865837931633,
|
|
"fcm_dpo/delta": 0.0233792494982481,
|
|
"fcm_dpo/margin": 48.65314483642578,
|
|
"fcm_dpo/q_t": 0.41721731424331665,
|
|
"grad_norm": 10.24924373626709,
|
|
"learning_rate": 3.826871794280192e-08,
|
|
"logits/chosen": 1.7879140377044678,
|
|
"logits/rejected": 1.6586774587631226,
|
|
"logps/chosen": -162.71484375,
|
|
"logps/ref_chosen": -66.02448272705078,
|
|
"logps/ref_rejected": -82.74746704101562,
|
|
"logps/rejected": -228.09097290039062,
|
|
"loss": 1.1462,
|
|
"margin_dpo/margin_mean": 48.653141021728516,
|
|
"margin_dpo/margin_std": 85.63908386230469,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 0.8420256991685563,
|
|
"fcm_dpo/beta": 0.0076392171904444695,
|
|
"fcm_dpo/delta": -0.0801071897149086,
|
|
"fcm_dpo/margin": 62.17573928833008,
|
|
"fcm_dpo/q_t": 0.3935287594795227,
|
|
"grad_norm": 6.463259220123291,
|
|
"learning_rate": 3.756864251262143e-08,
|
|
"logits/chosen": 1.7464919090270996,
|
|
"logits/rejected": 1.4219131469726562,
|
|
"logps/chosen": -164.66787719726562,
|
|
"logps/ref_chosen": -73.08985900878906,
|
|
"logps/ref_rejected": -97.43034362792969,
|
|
"logps/rejected": -251.18409729003906,
|
|
"loss": 1.0599,
|
|
"margin_dpo/margin_mean": 62.17573928833008,
|
|
"margin_dpo/margin_std": 81.93167114257812,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 0.8435374149659864,
|
|
"fcm_dpo/beta": 0.007571034599095583,
|
|
"fcm_dpo/delta": -0.04616940766572952,
|
|
"fcm_dpo/margin": 58.581756591796875,
|
|
"fcm_dpo/q_t": 0.39998599886894226,
|
|
"grad_norm": 6.480743408203125,
|
|
"learning_rate": 3.687450924416341e-08,
|
|
"logits/chosen": 1.7585268020629883,
|
|
"logits/rejected": 1.6162996292114258,
|
|
"logps/chosen": -171.26419067382812,
|
|
"logps/ref_chosen": -80.1357192993164,
|
|
"logps/ref_rejected": -106.65797424316406,
|
|
"logps/rejected": -256.3681945800781,
|
|
"loss": 1.0993,
|
|
"margin_dpo/margin_mean": 58.581756591796875,
|
|
"margin_dpo/margin_std": 88.86549377441406,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 0.8450491307634165,
|
|
"fcm_dpo/beta": 0.007496877107769251,
|
|
"fcm_dpo/delta": -0.05996110662817955,
|
|
"fcm_dpo/margin": 49.02497863769531,
|
|
"fcm_dpo/q_t": 0.41891011595726013,
|
|
"grad_norm": 7.072001934051514,
|
|
"learning_rate": 3.6186337553827743e-08,
|
|
"logits/chosen": 1.4914745092391968,
|
|
"logits/rejected": 1.255955696105957,
|
|
"logps/chosen": -169.67742919921875,
|
|
"logps/ref_chosen": -79.42267608642578,
|
|
"logps/ref_rejected": -98.59402465820312,
|
|
"logps/rejected": -237.87374877929688,
|
|
"loss": 1.1862,
|
|
"margin_dpo/margin_mean": 49.02497863769531,
|
|
"margin_dpo/margin_std": 99.9202880859375,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 0.8465608465608465,
|
|
"fcm_dpo/beta": 0.007487626746296883,
|
|
"fcm_dpo/delta": -0.02866279147565365,
|
|
"fcm_dpo/margin": 57.073089599609375,
|
|
"fcm_dpo/q_t": 0.40354275703430176,
|
|
"grad_norm": 12.783280372619629,
|
|
"learning_rate": 3.550414669125573e-08,
|
|
"logits/chosen": 1.37535560131073,
|
|
"logits/rejected": 1.2821707725524902,
|
|
"logps/chosen": -170.34442138671875,
|
|
"logps/ref_chosen": -77.49559020996094,
|
|
"logps/ref_rejected": -92.61347961425781,
|
|
"logps/rejected": -242.535400390625,
|
|
"loss": 1.0977,
|
|
"margin_dpo/margin_mean": 57.073089599609375,
|
|
"margin_dpo/margin_std": 84.51661682128906,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.8480725623582767,
|
|
"fcm_dpo/beta": 0.007413958199322224,
|
|
"fcm_dpo/delta": -0.018094100058078766,
|
|
"fcm_dpo/margin": 56.26551818847656,
|
|
"fcm_dpo/q_t": 0.40612316131591797,
|
|
"grad_norm": 5.807117462158203,
|
|
"learning_rate": 3.482795573879241e-08,
|
|
"logits/chosen": 1.424013376235962,
|
|
"logits/rejected": 1.3693034648895264,
|
|
"logps/chosen": -168.86117553710938,
|
|
"logps/ref_chosen": -79.20771789550781,
|
|
"logps/ref_rejected": -93.46514892578125,
|
|
"logps/rejected": -239.38412475585938,
|
|
"loss": 1.0969,
|
|
"margin_dpo/margin_mean": 56.26551818847656,
|
|
"margin_dpo/margin_std": 80.80874633789062,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 0.8495842781557067,
|
|
"fcm_dpo/beta": 0.0073739904910326,
|
|
"fcm_dpo/delta": -0.012268077582120895,
|
|
"fcm_dpo/margin": 55.75514221191406,
|
|
"fcm_dpo/q_t": 0.40779781341552734,
|
|
"grad_norm": 7.490271091461182,
|
|
"learning_rate": 3.415778361095226e-08,
|
|
"logits/chosen": 1.7233574390411377,
|
|
"logits/rejected": 1.5865188837051392,
|
|
"logps/chosen": -188.96682739257812,
|
|
"logps/ref_chosen": -94.88652801513672,
|
|
"logps/ref_rejected": -109.33815002441406,
|
|
"logps/rejected": -259.173583984375,
|
|
"loss": 1.1011,
|
|
"margin_dpo/margin_mean": 55.7551383972168,
|
|
"margin_dpo/margin_std": 80.49266052246094,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 0.8510959939531368,
|
|
"fcm_dpo/beta": 0.0073908064514398575,
|
|
"fcm_dpo/delta": -0.026350384578108788,
|
|
"fcm_dpo/margin": 57.53425216674805,
|
|
"fcm_dpo/q_t": 0.4067358374595642,
|
|
"grad_norm": 11.612875938415527,
|
|
"learning_rate": 3.349364905389032e-08,
|
|
"logits/chosen": 1.7460877895355225,
|
|
"logits/rejected": 1.5561182498931885,
|
|
"logps/chosen": -144.20834350585938,
|
|
"logps/ref_chosen": -65.90719604492188,
|
|
"logps/ref_rejected": -84.07121276855469,
|
|
"logps/rejected": -219.9066162109375,
|
|
"loss": 1.1289,
|
|
"margin_dpo/margin_mean": 57.53425216674805,
|
|
"margin_dpo/margin_std": 98.30337524414062,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 0.8526077097505669,
|
|
"fcm_dpo/beta": 0.007390887942165136,
|
|
"fcm_dpo/delta": -0.003784339874982834,
|
|
"fcm_dpo/margin": 54.58319854736328,
|
|
"fcm_dpo/q_t": 0.40921688079833984,
|
|
"grad_norm": 7.469838619232178,
|
|
"learning_rate": 3.283557064487785e-08,
|
|
"logits/chosen": 1.5153069496154785,
|
|
"logits/rejected": 1.4310331344604492,
|
|
"logps/chosen": -151.17416381835938,
|
|
"logps/ref_chosen": -72.32071685791016,
|
|
"logps/ref_rejected": -88.05014038085938,
|
|
"logps/rejected": -221.48675537109375,
|
|
"loss": 1.1491,
|
|
"margin_dpo/margin_mean": 54.58319854736328,
|
|
"margin_dpo/margin_std": 98.81858825683594,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 0.854119425547997,
|
|
"fcm_dpo/beta": 0.007420201785862446,
|
|
"fcm_dpo/delta": 0.0206848606467247,
|
|
"fcm_dpo/margin": 51.16043472290039,
|
|
"fcm_dpo/q_t": 0.41406577825546265,
|
|
"grad_norm": 8.247408866882324,
|
|
"learning_rate": 3.218356679178252e-08,
|
|
"logits/chosen": 1.2496565580368042,
|
|
"logits/rejected": 1.1596516370773315,
|
|
"logps/chosen": -176.40371704101562,
|
|
"logps/ref_chosen": -80.18453979492188,
|
|
"logps/ref_rejected": -99.55126953125,
|
|
"logps/rejected": -246.93087768554688,
|
|
"loss": 1.1314,
|
|
"margin_dpo/margin_mean": 51.16043472290039,
|
|
"margin_dpo/margin_std": 82.370361328125,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.8556311413454271,
|
|
"fcm_dpo/beta": 0.007455028593540192,
|
|
"fcm_dpo/delta": 0.0031773000955581665,
|
|
"fcm_dpo/margin": 53.09089279174805,
|
|
"fcm_dpo/q_t": 0.4105803370475769,
|
|
"grad_norm": 8.502391815185547,
|
|
"learning_rate": 3.1537655732553764e-08,
|
|
"logits/chosen": 1.427902102470398,
|
|
"logits/rejected": 1.3613345623016357,
|
|
"logps/chosen": -168.61880493164062,
|
|
"logps/ref_chosen": -88.0877914428711,
|
|
"logps/ref_rejected": -87.7589111328125,
|
|
"logps/rejected": -221.3808135986328,
|
|
"loss": 1.1421,
|
|
"margin_dpo/margin_mean": 53.09088897705078,
|
|
"margin_dpo/margin_std": 90.66483306884766,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 0.8571428571428571,
|
|
"fcm_dpo/beta": 0.007287529297173023,
|
|
"fcm_dpo/delta": -0.06993226706981659,
|
|
"fcm_dpo/margin": 63.92971420288086,
|
|
"fcm_dpo/q_t": 0.39457574486732483,
|
|
"grad_norm": 11.652644157409668,
|
|
"learning_rate": 3.089785553471233e-08,
|
|
"logits/chosen": 1.7592835426330566,
|
|
"logits/rejected": 1.4737862348556519,
|
|
"logps/chosen": -156.90435791015625,
|
|
"logps/ref_chosen": -69.93267822265625,
|
|
"logps/ref_rejected": -95.71786499023438,
|
|
"logps/rejected": -246.6192626953125,
|
|
"loss": 1.0628,
|
|
"margin_dpo/margin_mean": 63.92971420288086,
|
|
"margin_dpo/margin_std": 84.06944274902344,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 0.8586545729402872,
|
|
"fcm_dpo/beta": 0.007296562194824219,
|
|
"fcm_dpo/delta": -0.05656176805496216,
|
|
"fcm_dpo/margin": 62.0971565246582,
|
|
"fcm_dpo/q_t": 0.3984963893890381,
|
|
"grad_norm": 7.199716091156006,
|
|
"learning_rate": 3.026418409484513e-08,
|
|
"logits/chosen": 1.3262768983840942,
|
|
"logits/rejected": 1.1121933460235596,
|
|
"logps/chosen": -143.99913024902344,
|
|
"logps/ref_chosen": -70.33343505859375,
|
|
"logps/ref_rejected": -108.86271667480469,
|
|
"logps/rejected": -244.62554931640625,
|
|
"loss": 1.0754,
|
|
"margin_dpo/margin_mean": 62.09714889526367,
|
|
"margin_dpo/margin_std": 80.93890380859375,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 0.8601662887377173,
|
|
"fcm_dpo/beta": 0.007188756484538317,
|
|
"fcm_dpo/delta": -0.01805015653371811,
|
|
"fcm_dpo/margin": 41.79746627807617,
|
|
"fcm_dpo/q_t": 0.4310176372528076,
|
|
"grad_norm": 8.492779731750488,
|
|
"learning_rate": 2.963665913810451e-08,
|
|
"logits/chosen": 1.2412662506103516,
|
|
"logits/rejected": 1.2150421142578125,
|
|
"logps/chosen": -167.37234497070312,
|
|
"logps/ref_chosen": -80.85043334960938,
|
|
"logps/ref_rejected": -92.77810668945312,
|
|
"logps/rejected": -221.0974884033203,
|
|
"loss": 1.2138,
|
|
"margin_dpo/margin_mean": 41.797462463378906,
|
|
"margin_dpo/margin_std": 90.91089630126953,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 0.8616780045351474,
|
|
"fcm_dpo/beta": 0.007139476947486401,
|
|
"fcm_dpo/delta": -0.08498235046863556,
|
|
"fcm_dpo/margin": 67.33297729492188,
|
|
"fcm_dpo/q_t": 0.3915640115737915,
|
|
"grad_norm": 7.1222615242004395,
|
|
"learning_rate": 2.9015298217712453e-08,
|
|
"logits/chosen": 1.2934246063232422,
|
|
"logits/rejected": 1.1038811206817627,
|
|
"logps/chosen": -145.34852600097656,
|
|
"logps/ref_chosen": -69.94769287109375,
|
|
"logps/ref_rejected": -97.37059020996094,
|
|
"logps/rejected": -240.10440063476562,
|
|
"loss": 1.0557,
|
|
"margin_dpo/margin_mean": 67.33297729492188,
|
|
"margin_dpo/margin_std": 86.74455261230469,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.8631897203325775,
|
|
"fcm_dpo/beta": 0.0072095394134521484,
|
|
"fcm_dpo/delta": 0.12205064296722412,
|
|
"fcm_dpo/margin": 38.994258880615234,
|
|
"fcm_dpo/q_t": 0.435294508934021,
|
|
"grad_norm": 24.326377868652344,
|
|
"learning_rate": 2.840011871446962e-08,
|
|
"logits/chosen": 1.3863130807876587,
|
|
"logits/rejected": 1.3189308643341064,
|
|
"logps/chosen": -157.417724609375,
|
|
"logps/ref_chosen": -72.28555297851562,
|
|
"logps/ref_rejected": -84.57748413085938,
|
|
"logps/rejected": -208.70391845703125,
|
|
"loss": 1.2304,
|
|
"margin_dpo/margin_mean": 38.994258880615234,
|
|
"margin_dpo/margin_std": 92.22761535644531,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 0.8647014361300076,
|
|
"fcm_dpo/beta": 0.007278360426425934,
|
|
"fcm_dpo/delta": 0.041402481496334076,
|
|
"fcm_dpo/margin": 49.476531982421875,
|
|
"fcm_dpo/q_t": 0.41775819659233093,
|
|
"grad_norm": 6.691201686859131,
|
|
"learning_rate": 2.7791137836269158e-08,
|
|
"logits/chosen": 1.612381935119629,
|
|
"logits/rejected": 1.7202537059783936,
|
|
"logps/chosen": -181.08970642089844,
|
|
"logps/ref_chosen": -91.4906997680664,
|
|
"logps/ref_rejected": -80.44602966308594,
|
|
"logps/rejected": -219.52157592773438,
|
|
"loss": 1.1267,
|
|
"margin_dpo/margin_mean": 49.476531982421875,
|
|
"margin_dpo/margin_std": 74.04133605957031,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 0.8662131519274376,
|
|
"fcm_dpo/beta": 0.007352625019848347,
|
|
"fcm_dpo/delta": 0.05982336774468422,
|
|
"fcm_dpo/margin": 46.549224853515625,
|
|
"fcm_dpo/q_t": 0.4252585470676422,
|
|
"grad_norm": 7.987117290496826,
|
|
"learning_rate": 2.718837261761528e-08,
|
|
"logits/chosen": 1.4434504508972168,
|
|
"logits/rejected": 1.321394443511963,
|
|
"logps/chosen": -180.029052734375,
|
|
"logps/ref_chosen": -87.54232788085938,
|
|
"logps/ref_rejected": -104.32984924316406,
|
|
"logps/rejected": -243.36578369140625,
|
|
"loss": 1.2021,
|
|
"margin_dpo/margin_mean": 46.549224853515625,
|
|
"margin_dpo/margin_std": 101.46734619140625,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 0.8677248677248677,
|
|
"fcm_dpo/beta": 0.007343747653067112,
|
|
"fcm_dpo/delta": -0.07292570173740387,
|
|
"fcm_dpo/margin": 63.92116165161133,
|
|
"fcm_dpo/q_t": 0.3938440978527069,
|
|
"grad_norm": 6.1711297035217285,
|
|
"learning_rate": 2.659183991914696e-08,
|
|
"logits/chosen": 1.590470790863037,
|
|
"logits/rejected": 1.4732091426849365,
|
|
"logps/chosen": -159.10311889648438,
|
|
"logps/ref_chosen": -75.36632537841797,
|
|
"logps/ref_rejected": -103.27328491210938,
|
|
"logps/rejected": -250.93124389648438,
|
|
"loss": 1.0603,
|
|
"margin_dpo/margin_mean": 63.92115783691406,
|
|
"margin_dpo/margin_std": 83.52648162841797,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 0.8692365835222978,
|
|
"fcm_dpo/beta": 0.007190365344285965,
|
|
"fcm_dpo/delta": -0.06424938142299652,
|
|
"fcm_dpo/margin": 43.404239654541016,
|
|
"fcm_dpo/q_t": 0.43036943674087524,
|
|
"grad_norm": 7.720411777496338,
|
|
"learning_rate": 2.600155642716606e-08,
|
|
"logits/chosen": 1.527137279510498,
|
|
"logits/rejected": 1.3130396604537964,
|
|
"logps/chosen": -173.9998779296875,
|
|
"logps/ref_chosen": -81.678466796875,
|
|
"logps/ref_rejected": -112.84233093261719,
|
|
"logps/rejected": -248.56800842285156,
|
|
"loss": 1.2034,
|
|
"margin_dpo/margin_mean": 43.404239654541016,
|
|
"margin_dpo/margin_std": 88.2564926147461,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.8707482993197279,
|
|
"fcm_dpo/beta": 0.007088257931172848,
|
|
"fcm_dpo/delta": -0.07014288008213043,
|
|
"fcm_dpo/margin": 65.78793334960938,
|
|
"fcm_dpo/q_t": 0.3944045901298523,
|
|
"grad_norm": 9.954662322998047,
|
|
"learning_rate": 2.5417538653170754e-08,
|
|
"logits/chosen": 1.54152250289917,
|
|
"logits/rejected": 1.2846912145614624,
|
|
"logps/chosen": -145.27874755859375,
|
|
"logps/ref_chosen": -68.78944396972656,
|
|
"logps/ref_rejected": -102.79037475585938,
|
|
"logps/rejected": -245.06761169433594,
|
|
"loss": 1.0648,
|
|
"margin_dpo/margin_mean": 65.78793334960938,
|
|
"margin_dpo/margin_std": 87.97828674316406,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 0.872260015117158,
|
|
"fcm_dpo/beta": 0.0072364904917776585,
|
|
"fcm_dpo/delta": 0.13765141367912292,
|
|
"fcm_dpo/margin": 36.76244354248047,
|
|
"fcm_dpo/q_t": 0.43941205739974976,
|
|
"grad_norm": 6.795727729797363,
|
|
"learning_rate": 2.4839802933393607e-08,
|
|
"logits/chosen": 1.2348496913909912,
|
|
"logits/rejected": 1.201481819152832,
|
|
"logps/chosen": -163.54574584960938,
|
|
"logps/ref_chosen": -79.84675598144531,
|
|
"logps/ref_rejected": -84.08309936523438,
|
|
"logps/rejected": -204.54454040527344,
|
|
"loss": 1.2285,
|
|
"margin_dpo/margin_mean": 36.76244354248047,
|
|
"margin_dpo/margin_std": 85.11286163330078,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 0.873771730914588,
|
|
"fcm_dpo/beta": 0.00742256548255682,
|
|
"fcm_dpo/delta": 0.13279461860656738,
|
|
"fcm_dpo/margin": 36.504695892333984,
|
|
"fcm_dpo/q_t": 0.4408913850784302,
|
|
"grad_norm": 8.053617477416992,
|
|
"learning_rate": 2.4268365428344733e-08,
|
|
"logits/chosen": 1.703123688697815,
|
|
"logits/rejected": 1.610666036605835,
|
|
"logps/chosen": -160.46142578125,
|
|
"logps/ref_chosen": -74.91357421875,
|
|
"logps/ref_rejected": -83.64881896972656,
|
|
"logps/rejected": -205.7013702392578,
|
|
"loss": 1.215,
|
|
"margin_dpo/margin_mean": 36.504695892333984,
|
|
"margin_dpo/margin_std": 79.07931518554688,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 0.8752834467120182,
|
|
"fcm_dpo/beta": 0.007423688657581806,
|
|
"fcm_dpo/delta": -0.02768631838262081,
|
|
"fcm_dpo/margin": 57.395477294921875,
|
|
"fcm_dpo/q_t": 0.40147894620895386,
|
|
"grad_norm": 6.850172519683838,
|
|
"learning_rate": 2.3703242122359357e-08,
|
|
"logits/chosen": 1.2945516109466553,
|
|
"logits/rejected": 1.2207485437393188,
|
|
"logps/chosen": -168.03199768066406,
|
|
"logps/ref_chosen": -75.51022338867188,
|
|
"logps/ref_rejected": -84.83192443847656,
|
|
"logps/rejected": -234.74917602539062,
|
|
"loss": 1.087,
|
|
"margin_dpo/margin_mean": 57.395477294921875,
|
|
"margin_dpo/margin_std": 79.23515319824219,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 0.8767951625094482,
|
|
"fcm_dpo/beta": 0.007472585886716843,
|
|
"fcm_dpo/delta": 0.005576103925704956,
|
|
"fcm_dpo/margin": 52.784088134765625,
|
|
"fcm_dpo/q_t": 0.4105369448661804,
|
|
"grad_norm": 6.64032506942749,
|
|
"learning_rate": 2.3144448823151392e-08,
|
|
"logits/chosen": 1.4655518531799316,
|
|
"logits/rejected": 1.2879977226257324,
|
|
"logps/chosen": -152.16595458984375,
|
|
"logps/ref_chosen": -76.61564636230469,
|
|
"logps/ref_rejected": -97.09959411621094,
|
|
"logps/rejected": -225.43399047851562,
|
|
"loss": 1.1362,
|
|
"margin_dpo/margin_mean": 52.784088134765625,
|
|
"margin_dpo/margin_std": 89.77684020996094,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.8783068783068783,
|
|
"fcm_dpo/beta": 0.007484517525881529,
|
|
"fcm_dpo/delta": 0.05809904262423515,
|
|
"fcm_dpo/margin": 45.93260955810547,
|
|
"fcm_dpo/q_t": 0.4221350848674774,
|
|
"grad_norm": 7.525079727172852,
|
|
"learning_rate": 2.259200116137039e-08,
|
|
"logits/chosen": 1.4739665985107422,
|
|
"logits/rejected": 1.3425915241241455,
|
|
"logps/chosen": -169.97055053710938,
|
|
"logps/ref_chosen": -74.8531265258789,
|
|
"logps/ref_rejected": -101.5344009399414,
|
|
"logps/rejected": -242.58445739746094,
|
|
"loss": 1.1751,
|
|
"margin_dpo/margin_mean": 45.93260955810547,
|
|
"margin_dpo/margin_std": 87.29202270507812,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 0.8798185941043084,
|
|
"fcm_dpo/beta": 0.007569895125925541,
|
|
"fcm_dpo/delta": 0.01635417342185974,
|
|
"fcm_dpo/margin": 50.751991271972656,
|
|
"fcm_dpo/q_t": 0.4134330749511719,
|
|
"grad_norm": 8.547701835632324,
|
|
"learning_rate": 2.204591459016525e-08,
|
|
"logits/chosen": 1.5489530563354492,
|
|
"logits/rejected": 1.6596481800079346,
|
|
"logps/chosen": -170.80535888671875,
|
|
"logps/ref_chosen": -81.07638549804688,
|
|
"logps/ref_rejected": -72.83570861816406,
|
|
"logps/rejected": -213.3166961669922,
|
|
"loss": 1.1526,
|
|
"margin_dpo/margin_mean": 50.751991271972656,
|
|
"margin_dpo/margin_std": 92.16818237304688,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 0.8813303099017384,
|
|
"fcm_dpo/beta": 0.007639412768185139,
|
|
"fcm_dpo/delta": 0.06253516674041748,
|
|
"fcm_dpo/margin": 44.445091247558594,
|
|
"fcm_dpo/q_t": 0.4248436987400055,
|
|
"grad_norm": 9.364850044250488,
|
|
"learning_rate": 2.1506204384751064e-08,
|
|
"logits/chosen": 1.6426869630813599,
|
|
"logits/rejected": 1.3243718147277832,
|
|
"logps/chosen": -159.094482421875,
|
|
"logps/ref_chosen": -66.78465270996094,
|
|
"logps/ref_rejected": -106.45825958251953,
|
|
"logps/rejected": -243.21319580078125,
|
|
"loss": 1.2079,
|
|
"margin_dpo/margin_mean": 44.44509506225586,
|
|
"margin_dpo/margin_std": 98.5951919555664,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 0.8828420256991686,
|
|
"fcm_dpo/beta": 0.007633395027369261,
|
|
"fcm_dpo/delta": 0.02281120792031288,
|
|
"fcm_dpo/margin": 49.43035888671875,
|
|
"fcm_dpo/q_t": 0.41661393642425537,
|
|
"grad_norm": 8.11761474609375,
|
|
"learning_rate": 2.09728856419826e-08,
|
|
"logits/chosen": 2.000200033187866,
|
|
"logits/rejected": 1.7143186330795288,
|
|
"logps/chosen": -134.8970184326172,
|
|
"logps/ref_chosen": -60.802913665771484,
|
|
"logps/ref_rejected": -99.45012664794922,
|
|
"logps/rejected": -222.97457885742188,
|
|
"loss": 1.17,
|
|
"margin_dpo/margin_mean": 49.43035888671875,
|
|
"margin_dpo/margin_std": 94.57049560546875,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 0.8843537414965986,
|
|
"fcm_dpo/beta": 0.007718199864029884,
|
|
"fcm_dpo/delta": 0.03235515207052231,
|
|
"fcm_dpo/margin": 34.23884201049805,
|
|
"fcm_dpo/q_t": 0.440193235874176,
|
|
"grad_norm": 6.512060642242432,
|
|
"learning_rate": 2.044597327993153e-08,
|
|
"logits/chosen": 1.4816594123840332,
|
|
"logits/rejected": 1.3898038864135742,
|
|
"logps/chosen": -161.7969207763672,
|
|
"logps/ref_chosen": -75.92616271972656,
|
|
"logps/ref_rejected": -94.47601318359375,
|
|
"logps/rejected": -214.58563232421875,
|
|
"loss": 1.2388,
|
|
"margin_dpo/margin_mean": 34.23883819580078,
|
|
"margin_dpo/margin_std": 82.98875427246094,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.8858654572940288,
|
|
"fcm_dpo/beta": 0.007636943832039833,
|
|
"fcm_dpo/delta": -0.08058130741119385,
|
|
"fcm_dpo/margin": 62.398765563964844,
|
|
"fcm_dpo/q_t": 0.3913051187992096,
|
|
"grad_norm": 9.164061546325684,
|
|
"learning_rate": 1.9925482037469187e-08,
|
|
"logits/chosen": 1.5348619222640991,
|
|
"logits/rejected": 1.399658441543579,
|
|
"logps/chosen": -147.88909912109375,
|
|
"logps/ref_chosen": -68.62062072753906,
|
|
"logps/ref_rejected": -81.98324584960938,
|
|
"logps/rejected": -223.65048217773438,
|
|
"loss": 1.0567,
|
|
"margin_dpo/margin_mean": 62.398765563964844,
|
|
"margin_dpo/margin_std": 82.34362030029297,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 0.8873771730914588,
|
|
"fcm_dpo/beta": 0.007494157180190086,
|
|
"fcm_dpo/delta": -0.10239773988723755,
|
|
"fcm_dpo/margin": 66.33705139160156,
|
|
"fcm_dpo/q_t": 0.38791346549987793,
|
|
"grad_norm": 12.113510131835938,
|
|
"learning_rate": 1.9411426473854687e-08,
|
|
"logits/chosen": 1.3303208351135254,
|
|
"logits/rejected": 1.3321213722229004,
|
|
"logps/chosen": -153.1464385986328,
|
|
"logps/ref_chosen": -77.67031860351562,
|
|
"logps/ref_rejected": -79.35327911376953,
|
|
"logps/rejected": -221.1664581298828,
|
|
"loss": 1.0714,
|
|
"margin_dpo/margin_mean": 66.33705139160156,
|
|
"margin_dpo/margin_std": 96.68234252929688,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 0.8888888888888888,
|
|
"fcm_dpo/beta": 0.007391571067273617,
|
|
"fcm_dpo/delta": -0.03514527529478073,
|
|
"fcm_dpo/margin": 58.586280822753906,
|
|
"fcm_dpo/q_t": 0.40127915143966675,
|
|
"grad_norm": 10.57682991027832,
|
|
"learning_rate": 1.890382096832699e-08,
|
|
"logits/chosen": 1.4161376953125,
|
|
"logits/rejected": 1.3039031028747559,
|
|
"logps/chosen": -164.74606323242188,
|
|
"logps/ref_chosen": -77.94320678710938,
|
|
"logps/ref_rejected": -98.41210174560547,
|
|
"logps/rejected": -243.8012237548828,
|
|
"loss": 1.0924,
|
|
"margin_dpo/margin_mean": 58.586280822753906,
|
|
"margin_dpo/margin_std": 84.51554870605469,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 0.890400604686319,
|
|
"fcm_dpo/beta": 0.00733354315161705,
|
|
"fcm_dpo/delta": -0.0650940015912056,
|
|
"fcm_dpo/margin": 62.99828338623047,
|
|
"fcm_dpo/q_t": 0.3934873938560486,
|
|
"grad_norm": 10.318829536437988,
|
|
"learning_rate": 1.840267971970344e-08,
|
|
"logits/chosen": 1.322197437286377,
|
|
"logits/rejected": 1.2743580341339111,
|
|
"logps/chosen": -153.74691772460938,
|
|
"logps/ref_chosen": -75.18646240234375,
|
|
"logps/ref_rejected": -93.35910034179688,
|
|
"logps/rejected": -234.9178466796875,
|
|
"loss": 1.0548,
|
|
"margin_dpo/margin_mean": 62.99828338623047,
|
|
"margin_dpo/margin_std": 78.32982635498047,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 0.891912320483749,
|
|
"fcm_dpo/beta": 0.007300875149667263,
|
|
"fcm_dpo/delta": -0.027903899550437927,
|
|
"fcm_dpo/margin": 58.443626403808594,
|
|
"fcm_dpo/q_t": 0.4029507637023926,
|
|
"grad_norm": 6.3697028160095215,
|
|
"learning_rate": 1.7908016745981856e-08,
|
|
"logits/chosen": 1.296320915222168,
|
|
"logits/rejected": 1.2430548667907715,
|
|
"logps/chosen": -173.72471618652344,
|
|
"logps/ref_chosen": -86.9908447265625,
|
|
"logps/ref_rejected": -100.61723327636719,
|
|
"logps/rejected": -245.7947235107422,
|
|
"loss": 1.097,
|
|
"margin_dpo/margin_mean": 58.443626403808594,
|
|
"margin_dpo/margin_std": 86.44003295898438,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.8934240362811792,
|
|
"fcm_dpo/beta": 0.007098148111253977,
|
|
"fcm_dpo/delta": -0.12376554310321808,
|
|
"fcm_dpo/margin": 57.82063674926758,
|
|
"fcm_dpo/q_t": 0.4067874252796173,
|
|
"grad_norm": 9.336226463317871,
|
|
"learning_rate": 1.7419845883949098e-08,
|
|
"logits/chosen": 1.7065235376358032,
|
|
"logits/rejected": 1.542229413986206,
|
|
"logps/chosen": -146.87442016601562,
|
|
"logps/ref_chosen": -74.85809326171875,
|
|
"logps/ref_rejected": -102.75840759277344,
|
|
"logps/rejected": -232.59536743164062,
|
|
"loss": 1.1219,
|
|
"margin_dpo/margin_mean": 57.82063293457031,
|
|
"margin_dpo/margin_std": 89.06134796142578,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 0.8949357520786092,
|
|
"fcm_dpo/beta": 0.007119194604456425,
|
|
"fcm_dpo/delta": 0.0307568721473217,
|
|
"fcm_dpo/margin": 52.02532958984375,
|
|
"fcm_dpo/q_t": 0.4162050783634186,
|
|
"grad_norm": 6.295296669006348,
|
|
"learning_rate": 1.6938180788793556e-08,
|
|
"logits/chosen": 1.7110962867736816,
|
|
"logits/rejected": 1.4122859239578247,
|
|
"logps/chosen": -148.53353881835938,
|
|
"logps/ref_chosen": -67.90579223632812,
|
|
"logps/ref_rejected": -100.35234069824219,
|
|
"logps/rejected": -233.00543212890625,
|
|
"loss": 1.1321,
|
|
"margin_dpo/margin_mean": 52.02532958984375,
|
|
"margin_dpo/margin_std": 80.80274963378906,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 0.8964474678760394,
|
|
"fcm_dpo/beta": 0.007164083421230316,
|
|
"fcm_dpo/delta": 0.02655250020325184,
|
|
"fcm_dpo/margin": 52.26756286621094,
|
|
"fcm_dpo/q_t": 0.4153831899166107,
|
|
"grad_norm": 10.239151954650879,
|
|
"learning_rate": 1.6463034933723336e-08,
|
|
"logits/chosen": 1.348867416381836,
|
|
"logits/rejected": 1.1232930421829224,
|
|
"logps/chosen": -131.43345642089844,
|
|
"logps/ref_chosen": -59.29489517211914,
|
|
"logps/ref_rejected": -85.31307983398438,
|
|
"logps/rejected": -209.71920776367188,
|
|
"loss": 1.1429,
|
|
"margin_dpo/margin_mean": 52.26756286621094,
|
|
"margin_dpo/margin_std": 89.36260986328125,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 0.8979591836734694,
|
|
"fcm_dpo/beta": 0.007193025201559067,
|
|
"fcm_dpo/delta": 0.029274439439177513,
|
|
"fcm_dpo/margin": 51.68307876586914,
|
|
"fcm_dpo/q_t": 0.41542428731918335,
|
|
"grad_norm": 10.818907737731934,
|
|
"learning_rate": 1.5994421609589385e-08,
|
|
"logits/chosen": 1.2572638988494873,
|
|
"logits/rejected": 1.2137444019317627,
|
|
"logps/chosen": -168.90765380859375,
|
|
"logps/ref_chosen": -83.14643859863281,
|
|
"logps/ref_rejected": -88.201904296875,
|
|
"logps/rejected": -225.64620971679688,
|
|
"loss": 1.1315,
|
|
"margin_dpo/margin_mean": 51.68307876586914,
|
|
"margin_dpo/margin_std": 82.45545959472656,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 0.8994708994708994,
|
|
"fcm_dpo/beta": 0.007129493169486523,
|
|
"fcm_dpo/delta": -0.09650059044361115,
|
|
"fcm_dpo/margin": 68.9874267578125,
|
|
"fcm_dpo/q_t": 0.38809216022491455,
|
|
"grad_norm": 10.010942459106445,
|
|
"learning_rate": 1.553235392451377e-08,
|
|
"logits/chosen": 1.7847068309783936,
|
|
"logits/rejected": 1.529867172241211,
|
|
"logps/chosen": -150.77587890625,
|
|
"logps/ref_chosen": -70.40016174316406,
|
|
"logps/ref_rejected": -103.95550537109375,
|
|
"logps/rejected": -253.31863403320312,
|
|
"loss": 1.0697,
|
|
"margin_dpo/margin_mean": 68.9874267578125,
|
|
"margin_dpo/margin_std": 98.64369201660156,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.9009826152683296,
|
|
"fcm_dpo/beta": 0.007084083743393421,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 19.771663665771484,
|
|
"fcm_dpo/q_t": 0.469125896692276,
|
|
"grad_norm": 6.528999328613281,
|
|
"learning_rate": 1.507684480352292e-08,
|
|
"logits/chosen": 1.183467149734497,
|
|
"logits/rejected": 1.2314069271087646,
|
|
"logps/chosen": -183.73483276367188,
|
|
"logps/ref_chosen": -86.083740234375,
|
|
"logps/ref_rejected": -78.41991424560547,
|
|
"logps/rejected": -195.84266662597656,
|
|
"loss": 1.3362,
|
|
"margin_dpo/margin_mean": 19.771663665771484,
|
|
"margin_dpo/margin_std": 84.07432556152344,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 0.9024943310657596,
|
|
"fcm_dpo/beta": 0.007151216268539429,
|
|
"fcm_dpo/delta": 0.05845186114311218,
|
|
"fcm_dpo/margin": 48.03459167480469,
|
|
"fcm_dpo/q_t": 0.42271360754966736,
|
|
"grad_norm": 7.565585613250732,
|
|
"learning_rate": 1.4627906988186111e-08,
|
|
"logits/chosen": 1.5501301288604736,
|
|
"logits/rejected": 1.5076608657836914,
|
|
"logps/chosen": -140.9503173828125,
|
|
"logps/ref_chosen": -67.8086166381836,
|
|
"logps/ref_rejected": -71.09245300292969,
|
|
"logps/rejected": -192.26873779296875,
|
|
"loss": 1.1646,
|
|
"margin_dpo/margin_mean": 48.03459167480469,
|
|
"margin_dpo/margin_std": 87.82294464111328,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 0.9040060468631897,
|
|
"fcm_dpo/beta": 0.007216842845082283,
|
|
"fcm_dpo/delta": 0.06853266060352325,
|
|
"fcm_dpo/margin": 31.869421005249023,
|
|
"fcm_dpo/q_t": 0.44815605878829956,
|
|
"grad_norm": 6.673587322235107,
|
|
"learning_rate": 1.4185553036259095e-08,
|
|
"logits/chosen": 1.628659725189209,
|
|
"logits/rejected": 1.4073009490966797,
|
|
"logps/chosen": -178.23072814941406,
|
|
"logps/ref_chosen": -74.31095886230469,
|
|
"logps/ref_rejected": -98.08122253417969,
|
|
"logps/rejected": -233.87042236328125,
|
|
"loss": 1.2428,
|
|
"margin_dpo/margin_mean": 31.869421005249023,
|
|
"margin_dpo/margin_std": 77.04680633544922,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 0.9055177626606198,
|
|
"fcm_dpo/beta": 0.0073530226945877075,
|
|
"fcm_dpo/delta": 0.07715515047311783,
|
|
"fcm_dpo/margin": 44.24391555786133,
|
|
"fcm_dpo/q_t": 0.4264746904373169,
|
|
"grad_norm": 9.405556678771973,
|
|
"learning_rate": 1.3749795321332885e-08,
|
|
"logits/chosen": 1.7087280750274658,
|
|
"logits/rejected": 1.6005630493164062,
|
|
"logps/chosen": -167.94850158691406,
|
|
"logps/ref_chosen": -74.21861267089844,
|
|
"logps/ref_rejected": -90.1492919921875,
|
|
"logps/rejected": -228.12310791015625,
|
|
"loss": 1.1816,
|
|
"margin_dpo/margin_mean": 44.24391555786133,
|
|
"margin_dpo/margin_std": 86.17495727539062,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 0.9070294784580499,
|
|
"fcm_dpo/beta": 0.007373571861535311,
|
|
"fcm_dpo/delta": -0.007764637470245361,
|
|
"fcm_dpo/margin": 41.892059326171875,
|
|
"fcm_dpo/q_t": 0.4296872019767761,
|
|
"grad_norm": 7.145091533660889,
|
|
"learning_rate": 1.3320646032487393e-08,
|
|
"logits/chosen": 1.3918439149856567,
|
|
"logits/rejected": 1.212207317352295,
|
|
"logps/chosen": -168.78567504882812,
|
|
"logps/ref_chosen": -79.34190368652344,
|
|
"logps/ref_rejected": -97.0519790649414,
|
|
"logps/rejected": -228.3878173828125,
|
|
"loss": 1.183,
|
|
"margin_dpo/margin_mean": 41.89206314086914,
|
|
"margin_dpo/margin_std": 78.34820556640625,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.9070294784580499,
|
|
"eval_fcm_dpo/beta": 0.007367846090346575,
|
|
"eval_logits/chosen": 1.3822048902511597,
|
|
"eval_logits/rejected": 1.2497957944869995,
|
|
"eval_logps/chosen": -172.7694549560547,
|
|
"eval_logps/ref_chosen": -86.90177917480469,
|
|
"eval_logps/ref_rejected": -96.69639587402344,
|
|
"eval_logps/rejected": -234.10513305664062,
|
|
"eval_loss": 0.5698250532150269,
|
|
"eval_margin_dpo/margin_mean": 51.54107666015625,
|
|
"eval_margin_dpo/margin_std": 86.13619995117188,
|
|
"eval_runtime": 42.2834,
|
|
"eval_samples_per_second": 54.466,
|
|
"eval_steps_per_second": 1.703,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.90854119425548,
|
|
"fcm_dpo/beta": 0.007286690175533295,
|
|
"fcm_dpo/delta": -0.06402283906936646,
|
|
"fcm_dpo/margin": 63.2564697265625,
|
|
"fcm_dpo/q_t": 0.3962099254131317,
|
|
"grad_norm": 8.408620834350586,
|
|
"learning_rate": 1.2898117173950868e-08,
|
|
"logits/chosen": 1.2801477909088135,
|
|
"logits/rejected": 1.0866169929504395,
|
|
"logps/chosen": -146.13392639160156,
|
|
"logps/ref_chosen": -72.06497192382812,
|
|
"logps/ref_rejected": -97.60928344726562,
|
|
"logps/rejected": -234.93472290039062,
|
|
"loss": 1.0767,
|
|
"margin_dpo/margin_mean": 63.256465911865234,
|
|
"margin_dpo/margin_std": 89.92544555664062,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 0.91005291005291,
|
|
"fcm_dpo/beta": 0.00729843694716692,
|
|
"fcm_dpo/delta": -0.006445102393627167,
|
|
"fcm_dpo/margin": 55.607749938964844,
|
|
"fcm_dpo/q_t": 0.40874192118644714,
|
|
"grad_norm": 9.119906425476074,
|
|
"learning_rate": 1.2482220564763667e-08,
|
|
"logits/chosen": 1.881563425064087,
|
|
"logits/rejected": 1.7863805294036865,
|
|
"logps/chosen": -146.75225830078125,
|
|
"logps/ref_chosen": -77.80416870117188,
|
|
"logps/ref_rejected": -89.05026245117188,
|
|
"logps/rejected": -213.60609436035156,
|
|
"loss": 1.1081,
|
|
"margin_dpo/margin_mean": 55.607749938964844,
|
|
"margin_dpo/margin_std": 82.802001953125,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 0.9115646258503401,
|
|
"fcm_dpo/beta": 0.007275847718119621,
|
|
"fcm_dpo/delta": 0.03916401416063309,
|
|
"fcm_dpo/margin": 49.760406494140625,
|
|
"fcm_dpo/q_t": 0.41777119040489197,
|
|
"grad_norm": 6.798327445983887,
|
|
"learning_rate": 1.2072967838448051e-08,
|
|
"logits/chosen": 1.402630090713501,
|
|
"logits/rejected": 1.265625,
|
|
"logps/chosen": -154.68133544921875,
|
|
"logps/ref_chosen": -68.30155944824219,
|
|
"logps/ref_rejected": -90.542724609375,
|
|
"logps/rejected": -226.68292236328125,
|
|
"loss": 1.1616,
|
|
"margin_dpo/margin_mean": 49.760406494140625,
|
|
"margin_dpo/margin_std": 90.02642822265625,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 0.9130763416477702,
|
|
"fcm_dpo/beta": 0.0073940567672252655,
|
|
"fcm_dpo/delta": 0.04941772669553757,
|
|
"fcm_dpo/margin": 47.617759704589844,
|
|
"fcm_dpo/q_t": 0.4212024509906769,
|
|
"grad_norm": 5.99643611907959,
|
|
"learning_rate": 1.1670370442682459e-08,
|
|
"logits/chosen": 1.3572053909301758,
|
|
"logits/rejected": 1.3480005264282227,
|
|
"logps/chosen": -162.58450317382812,
|
|
"logps/ref_chosen": -90.55952453613281,
|
|
"logps/ref_rejected": -84.6327133178711,
|
|
"logps/rejected": -204.27545166015625,
|
|
"loss": 1.1818,
|
|
"margin_dpo/margin_mean": 47.617759704589844,
|
|
"margin_dpo/margin_std": 95.67184448242188,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 0.9145880574452003,
|
|
"fcm_dpo/beta": 0.007479371502995491,
|
|
"fcm_dpo/delta": 0.06939111649990082,
|
|
"fcm_dpo/margin": 44.499267578125,
|
|
"fcm_dpo/q_t": 0.4240415096282959,
|
|
"grad_norm": 10.035806655883789,
|
|
"learning_rate": 1.1274439638981532e-08,
|
|
"logits/chosen": 1.2821848392486572,
|
|
"logits/rejected": 1.1488748788833618,
|
|
"logps/chosen": -173.67697143554688,
|
|
"logps/ref_chosen": -80.26661682128906,
|
|
"logps/ref_rejected": -100.26485443115234,
|
|
"logps/rejected": -238.17446899414062,
|
|
"loss": 1.1637,
|
|
"margin_dpo/margin_mean": 44.499267578125,
|
|
"margin_dpo/margin_std": 79.42837524414062,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 0.9160997732426304,
|
|
"fcm_dpo/beta": 0.007518238388001919,
|
|
"fcm_dpo/delta": -0.0017635505646467209,
|
|
"fcm_dpo/margin": 53.40687561035156,
|
|
"fcm_dpo/q_t": 0.4092721939086914,
|
|
"grad_norm": 9.322007179260254,
|
|
"learning_rate": 1.0885186502381016e-08,
|
|
"logits/chosen": 1.525209665298462,
|
|
"logits/rejected": 1.3565664291381836,
|
|
"logps/chosen": -146.97242736816406,
|
|
"logps/ref_chosen": -70.73554229736328,
|
|
"logps/ref_rejected": -95.9410400390625,
|
|
"logps/rejected": -225.58480834960938,
|
|
"loss": 1.106,
|
|
"margin_dpo/margin_mean": 53.406883239746094,
|
|
"margin_dpo/margin_std": 78.14736938476562,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 0.9176114890400605,
|
|
"fcm_dpo/beta": 0.00732019217684865,
|
|
"fcm_dpo/delta": -0.11818597465753555,
|
|
"fcm_dpo/margin": 53.20615768432617,
|
|
"fcm_dpo/q_t": 0.4098881781101227,
|
|
"grad_norm": 9.862732887268066,
|
|
"learning_rate": 1.0502621921127774e-08,
|
|
"logits/chosen": 1.524349570274353,
|
|
"logits/rejected": 1.4376423358917236,
|
|
"logps/chosen": -171.41452026367188,
|
|
"logps/ref_chosen": -81.26203918457031,
|
|
"logps/ref_rejected": -92.71575927734375,
|
|
"logps/rejected": -236.07440185546875,
|
|
"loss": 1.1186,
|
|
"margin_dpo/margin_mean": 53.20615768432617,
|
|
"margin_dpo/margin_std": 75.97901916503906,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 0.9191232048374905,
|
|
"fcm_dpo/beta": 0.007443271577358246,
|
|
"fcm_dpo/delta": 0.10113994777202606,
|
|
"fcm_dpo/margin": 40.53437042236328,
|
|
"fcm_dpo/q_t": 0.43184107542037964,
|
|
"grad_norm": 9.41500473022461,
|
|
"learning_rate": 1.0126756596375685e-08,
|
|
"logits/chosen": 1.4544808864593506,
|
|
"logits/rejected": 1.1771209239959717,
|
|
"logps/chosen": -175.67120361328125,
|
|
"logps/ref_chosen": -82.6530990600586,
|
|
"logps/ref_rejected": -110.64334106445312,
|
|
"logps/rejected": -244.19581604003906,
|
|
"loss": 1.1933,
|
|
"margin_dpo/margin_mean": 40.53437042236328,
|
|
"margin_dpo/margin_std": 81.21488952636719,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 0.9206349206349206,
|
|
"fcm_dpo/beta": 0.007462367881089449,
|
|
"fcm_dpo/delta": -0.0018095960840582848,
|
|
"fcm_dpo/margin": 53.83350372314453,
|
|
"fcm_dpo/q_t": 0.40612369775772095,
|
|
"grad_norm": 8.645689010620117,
|
|
"learning_rate": 9.757601041885694e-09,
|
|
"logits/chosen": 1.7371234893798828,
|
|
"logits/rejected": 1.6018296480178833,
|
|
"logps/chosen": -146.37277221679688,
|
|
"logps/ref_chosen": -68.20232391357422,
|
|
"logps/ref_rejected": -81.90515899658203,
|
|
"logps/rejected": -213.90911865234375,
|
|
"loss": 1.0875,
|
|
"margin_dpo/margin_mean": 53.83350372314453,
|
|
"margin_dpo/margin_std": 68.90634155273438,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 0.9221466364323507,
|
|
"fcm_dpo/beta": 0.007475072983652353,
|
|
"fcm_dpo/delta": 0.019946957007050514,
|
|
"fcm_dpo/margin": 50.93968963623047,
|
|
"fcm_dpo/q_t": 0.4158932864665985,
|
|
"grad_norm": 14.192397117614746,
|
|
"learning_rate": 9.395165583732379e-09,
|
|
"logits/chosen": 1.5707309246063232,
|
|
"logits/rejected": 1.5060572624206543,
|
|
"logps/chosen": -187.23873901367188,
|
|
"logps/ref_chosen": -99.01324462890625,
|
|
"logps/ref_rejected": -102.26054382324219,
|
|
"logps/rejected": -241.42572021484375,
|
|
"loss": 1.1659,
|
|
"margin_dpo/margin_mean": 50.93968963623047,
|
|
"margin_dpo/margin_std": 97.81775665283203,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.9236583522297808,
|
|
"fcm_dpo/beta": 0.007563972845673561,
|
|
"fcm_dpo/delta": 0.08229602128267288,
|
|
"fcm_dpo/margin": 42.35655212402344,
|
|
"fcm_dpo/q_t": 0.42811521887779236,
|
|
"grad_norm": 7.965384006500244,
|
|
"learning_rate": 9.03946036001449e-09,
|
|
"logits/chosen": 1.5573077201843262,
|
|
"logits/rejected": 1.4318535327911377,
|
|
"logps/chosen": -144.16986083984375,
|
|
"logps/ref_chosen": -66.36254119873047,
|
|
"logps/ref_rejected": -88.74557495117188,
|
|
"logps/rejected": -208.90943908691406,
|
|
"loss": 1.1632,
|
|
"margin_dpo/margin_mean": 42.35655212402344,
|
|
"margin_dpo/margin_std": 72.27166748046875,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 0.9251700680272109,
|
|
"fcm_dpo/beta": 0.007552753668278456,
|
|
"fcm_dpo/delta": -0.07210865616798401,
|
|
"fcm_dpo/margin": 62.068572998046875,
|
|
"fcm_dpo/q_t": 0.3943250775337219,
|
|
"grad_norm": 6.521274089813232,
|
|
"learning_rate": 8.690495320571839e-09,
|
|
"logits/chosen": 1.2143969535827637,
|
|
"logits/rejected": 1.0262000560760498,
|
|
"logps/chosen": -170.40911865234375,
|
|
"logps/ref_chosen": -78.6339111328125,
|
|
"logps/ref_rejected": -108.34969329833984,
|
|
"logps/rejected": -262.1934814453125,
|
|
"loss": 1.0867,
|
|
"margin_dpo/margin_mean": 62.06857681274414,
|
|
"margin_dpo/margin_std": 93.37065887451172,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 0.926681783824641,
|
|
"fcm_dpo/beta": 0.0074174664914608,
|
|
"fcm_dpo/delta": -0.1009131669998169,
|
|
"fcm_dpo/margin": 66.87256622314453,
|
|
"fcm_dpo/q_t": 0.387722373008728,
|
|
"grad_norm": 7.430573463439941,
|
|
"learning_rate": 8.348280226706722e-09,
|
|
"logits/chosen": 1.249620795249939,
|
|
"logits/rejected": 1.2631680965423584,
|
|
"logps/chosen": -143.5052032470703,
|
|
"logps/ref_chosen": -73.3539047241211,
|
|
"logps/ref_rejected": -76.91837310791016,
|
|
"logps/rejected": -213.94223022460938,
|
|
"loss": 1.0475,
|
|
"margin_dpo/margin_mean": 66.87255859375,
|
|
"margin_dpo/margin_std": 87.33309173583984,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 0.9281934996220711,
|
|
"fcm_dpo/beta": 0.007404108997434378,
|
|
"fcm_dpo/delta": 0.002595767378807068,
|
|
"fcm_dpo/margin": 53.611568450927734,
|
|
"fcm_dpo/q_t": 0.40840423107147217,
|
|
"grad_norm": 8.825176239013672,
|
|
"learning_rate": 8.012824650910937e-09,
|
|
"logits/chosen": 1.506474494934082,
|
|
"logits/rejected": 1.480654001235962,
|
|
"logps/chosen": -165.64871215820312,
|
|
"logps/ref_chosen": -77.80007934570312,
|
|
"logps/ref_rejected": -89.05572509765625,
|
|
"logps/rejected": -230.51593017578125,
|
|
"loss": 1.1024,
|
|
"margin_dpo/margin_mean": 53.61156463623047,
|
|
"margin_dpo/margin_std": 74.10795593261719,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 0.9297052154195011,
|
|
"fcm_dpo/beta": 0.007292012684047222,
|
|
"fcm_dpo/delta": -0.0258035846054554,
|
|
"fcm_dpo/margin": 58.11692428588867,
|
|
"fcm_dpo/q_t": 0.4042222499847412,
|
|
"grad_norm": 7.2233381271362305,
|
|
"learning_rate": 7.684137976598088e-09,
|
|
"logits/chosen": 1.276944637298584,
|
|
"logits/rejected": 1.1412029266357422,
|
|
"logps/chosen": -183.43215942382812,
|
|
"logps/ref_chosen": -90.06971740722656,
|
|
"logps/ref_rejected": -118.7764892578125,
|
|
"logps/rejected": -270.255859375,
|
|
"loss": 1.1177,
|
|
"margin_dpo/margin_mean": 58.116920471191406,
|
|
"margin_dpo/margin_std": 92.6316909790039,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 0.9312169312169312,
|
|
"fcm_dpo/beta": 0.0073302434757351875,
|
|
"fcm_dpo/delta": 0.01808079145848751,
|
|
"fcm_dpo/margin": 52.18651580810547,
|
|
"fcm_dpo/q_t": 0.4146103262901306,
|
|
"grad_norm": 10.137645721435547,
|
|
"learning_rate": 7.36222939784098e-09,
|
|
"logits/chosen": 1.5233194828033447,
|
|
"logits/rejected": 1.325713872909546,
|
|
"logps/chosen": -160.01788330078125,
|
|
"logps/ref_chosen": -74.62954711914062,
|
|
"logps/ref_rejected": -93.655029296875,
|
|
"logps/rejected": -231.22987365722656,
|
|
"loss": 1.126,
|
|
"margin_dpo/margin_mean": 52.18651580810547,
|
|
"margin_dpo/margin_std": 82.57902526855469,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 0.9327286470143613,
|
|
"fcm_dpo/beta": 0.007304855156689882,
|
|
"fcm_dpo/delta": -0.033911511301994324,
|
|
"fcm_dpo/margin": 44.39569854736328,
|
|
"fcm_dpo/q_t": 0.4244207739830017,
|
|
"grad_norm": 9.833024024963379,
|
|
"learning_rate": 7.047107919114586e-09,
|
|
"logits/chosen": 1.3680877685546875,
|
|
"logits/rejected": 1.2418947219848633,
|
|
"logps/chosen": -173.10882568359375,
|
|
"logps/ref_chosen": -75.98182678222656,
|
|
"logps/ref_rejected": -97.1640625,
|
|
"logps/rejected": -238.68673706054688,
|
|
"loss": 1.1656,
|
|
"margin_dpo/margin_mean": 44.39570236206055,
|
|
"margin_dpo/margin_std": 76.2586669921875,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 0.9342403628117913,
|
|
"fcm_dpo/beta": 0.0073931096121668816,
|
|
"fcm_dpo/delta": 0.08461872488260269,
|
|
"fcm_dpo/margin": 43.02881622314453,
|
|
"fcm_dpo/q_t": 0.4286499619483948,
|
|
"grad_norm": 16.107576370239258,
|
|
"learning_rate": 6.738782355044048e-09,
|
|
"logits/chosen": 1.7296488285064697,
|
|
"logits/rejected": 1.44431471824646,
|
|
"logps/chosen": -155.86195373535156,
|
|
"logps/ref_chosen": -74.47208404541016,
|
|
"logps/ref_rejected": -107.09980773925781,
|
|
"logps/rejected": -231.51849365234375,
|
|
"loss": 1.1778,
|
|
"margin_dpo/margin_mean": 43.02881622314453,
|
|
"margin_dpo/margin_std": 81.21162414550781,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 0.9357520786092215,
|
|
"fcm_dpo/beta": 0.007361262571066618,
|
|
"fcm_dpo/delta": -0.04189985245466232,
|
|
"fcm_dpo/margin": 59.74002456665039,
|
|
"fcm_dpo/q_t": 0.40103021264076233,
|
|
"grad_norm": 9.501195907592773,
|
|
"learning_rate": 6.437261330158206e-09,
|
|
"logits/chosen": 1.698243260383606,
|
|
"logits/rejected": 1.468477487564087,
|
|
"logps/chosen": -154.20974731445312,
|
|
"logps/ref_chosen": -70.84220886230469,
|
|
"logps/ref_rejected": -98.07801818847656,
|
|
"logps/rejected": -241.18557739257812,
|
|
"loss": 1.0963,
|
|
"margin_dpo/margin_mean": 59.740020751953125,
|
|
"margin_dpo/margin_std": 90.09310913085938,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 0.9372637944066515,
|
|
"fcm_dpo/beta": 0.007336704060435295,
|
|
"fcm_dpo/delta": -0.042020995169878006,
|
|
"fcm_dpo/margin": 44.221435546875,
|
|
"fcm_dpo/q_t": 0.4262590706348419,
|
|
"grad_norm": 7.753275394439697,
|
|
"learning_rate": 6.142553278648238e-09,
|
|
"logits/chosen": 1.2661397457122803,
|
|
"logits/rejected": 1.2839014530181885,
|
|
"logps/chosen": -156.5693817138672,
|
|
"logps/ref_chosen": -76.93606567382812,
|
|
"logps/ref_rejected": -81.28453063964844,
|
|
"logps/rejected": -205.1392822265625,
|
|
"loss": 1.1765,
|
|
"margin_dpo/margin_mean": 44.221435546875,
|
|
"margin_dpo/margin_std": 80.71849060058594,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.9387755102040817,
|
|
"fcm_dpo/beta": 0.007401227951049805,
|
|
"fcm_dpo/delta": 0.08091681450605392,
|
|
"fcm_dpo/margin": 43.45226287841797,
|
|
"fcm_dpo/q_t": 0.42756223678588867,
|
|
"grad_norm": 7.846070766448975,
|
|
"learning_rate": 5.854666444131934e-09,
|
|
"logits/chosen": 1.8054823875427246,
|
|
"logits/rejected": 1.4816551208496094,
|
|
"logps/chosen": -157.64730834960938,
|
|
"logps/ref_chosen": -69.87464904785156,
|
|
"logps/ref_rejected": -105.61328887939453,
|
|
"logps/rejected": -236.83822631835938,
|
|
"loss": 1.1834,
|
|
"margin_dpo/margin_mean": 43.452266693115234,
|
|
"margin_dpo/margin_std": 85.07792663574219,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 0.9402872260015117,
|
|
"fcm_dpo/beta": 0.007493887562304735,
|
|
"fcm_dpo/delta": 0.0487191379070282,
|
|
"fcm_dpo/margin": 47.08264923095703,
|
|
"fcm_dpo/q_t": 0.4192439317703247,
|
|
"grad_norm": 10.554768562316895,
|
|
"learning_rate": 5.573608879422875e-09,
|
|
"logits/chosen": 1.321624994277954,
|
|
"logits/rejected": 1.184661626815796,
|
|
"logps/chosen": -166.62425231933594,
|
|
"logps/ref_chosen": -78.9598388671875,
|
|
"logps/ref_rejected": -97.90648651123047,
|
|
"logps/rejected": -232.65354919433594,
|
|
"loss": 1.1468,
|
|
"margin_dpo/margin_mean": 47.08264923095703,
|
|
"margin_dpo/margin_std": 78.495361328125,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 0.9417989417989417,
|
|
"fcm_dpo/beta": 0.007525959052145481,
|
|
"fcm_dpo/delta": 0.02474692091345787,
|
|
"fcm_dpo/margin": 49.984649658203125,
|
|
"fcm_dpo/q_t": 0.41398096084594727,
|
|
"grad_norm": 6.809837818145752,
|
|
"learning_rate": 5.299388446305342e-09,
|
|
"logits/chosen": 1.6094965934753418,
|
|
"logits/rejected": 1.4611616134643555,
|
|
"logps/chosen": -181.2794189453125,
|
|
"logps/ref_chosen": -83.22647094726562,
|
|
"logps/ref_rejected": -105.1362533569336,
|
|
"logps/rejected": -253.17385864257812,
|
|
"loss": 1.126,
|
|
"margin_dpo/margin_mean": 49.984649658203125,
|
|
"margin_dpo/margin_std": 77.9920654296875,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 0.9433106575963719,
|
|
"fcm_dpo/beta": 0.007394228130578995,
|
|
"fcm_dpo/delta": -0.10044856369495392,
|
|
"fcm_dpo/margin": 66.91777038574219,
|
|
"fcm_dpo/q_t": 0.3872142434120178,
|
|
"grad_norm": 6.696838855743408,
|
|
"learning_rate": 5.03201281531429e-09,
|
|
"logits/chosen": 1.518819808959961,
|
|
"logits/rejected": 1.2560850381851196,
|
|
"logps/chosen": -140.40402221679688,
|
|
"logps/ref_chosen": -66.10560607910156,
|
|
"logps/ref_rejected": -91.66778564453125,
|
|
"logps/rejected": -232.88397216796875,
|
|
"loss": 1.0408,
|
|
"margin_dpo/margin_mean": 66.91777038574219,
|
|
"margin_dpo/margin_std": 82.78280639648438,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 0.9448223733938019,
|
|
"fcm_dpo/beta": 0.007477354723960161,
|
|
"fcm_dpo/delta": 0.06206696480512619,
|
|
"fcm_dpo/margin": 32.61128616333008,
|
|
"fcm_dpo/q_t": 0.4454808235168457,
|
|
"grad_norm": 11.197325706481934,
|
|
"learning_rate": 4.7714894655209174e-09,
|
|
"logits/chosen": 1.6123887300491333,
|
|
"logits/rejected": 1.3841365575790405,
|
|
"logps/chosen": -161.52566528320312,
|
|
"logps/ref_chosen": -73.20295715332031,
|
|
"logps/ref_rejected": -105.31025695800781,
|
|
"logps/rejected": -226.2442626953125,
|
|
"loss": 1.2569,
|
|
"margin_dpo/margin_mean": 32.61128616333008,
|
|
"margin_dpo/margin_std": 85.51419067382812,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.9463340891912321,
|
|
"fcm_dpo/beta": 0.007401737850159407,
|
|
"fcm_dpo/delta": -0.0417199544608593,
|
|
"fcm_dpo/margin": 59.367549896240234,
|
|
"fcm_dpo/q_t": 0.4038395881652832,
|
|
"grad_norm": 8.29497241973877,
|
|
"learning_rate": 4.517825684323323e-09,
|
|
"logits/chosen": 1.704930067062378,
|
|
"logits/rejected": 1.3351404666900635,
|
|
"logps/chosen": -140.19259643554688,
|
|
"logps/ref_chosen": -62.181278228759766,
|
|
"logps/ref_rejected": -108.17747497558594,
|
|
"logps/rejected": -245.55633544921875,
|
|
"loss": 1.1167,
|
|
"margin_dpo/margin_mean": 59.367549896240234,
|
|
"margin_dpo/margin_std": 97.91468048095703,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 0.9478458049886621,
|
|
"fcm_dpo/beta": 0.007266830187290907,
|
|
"fcm_dpo/delta": -0.12542851269245148,
|
|
"fcm_dpo/margin": 71.37725830078125,
|
|
"fcm_dpo/q_t": 0.3819334805011749,
|
|
"grad_norm": 7.506706714630127,
|
|
"learning_rate": 4.271028567242818e-09,
|
|
"logits/chosen": 1.4920332431793213,
|
|
"logits/rejected": 1.1570463180541992,
|
|
"logps/chosen": -160.24082946777344,
|
|
"logps/ref_chosen": -77.72123718261719,
|
|
"logps/ref_rejected": -114.40547180175781,
|
|
"logps/rejected": -268.30230712890625,
|
|
"loss": 1.0246,
|
|
"margin_dpo/margin_mean": 71.37725830078125,
|
|
"margin_dpo/margin_std": 86.36345672607422,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 0.9493575207860923,
|
|
"fcm_dpo/beta": 0.007203748449683189,
|
|
"fcm_dpo/delta": -0.09114761650562286,
|
|
"fcm_dpo/margin": 67.43045806884766,
|
|
"fcm_dpo/q_t": 0.38849127292633057,
|
|
"grad_norm": 7.820545673370361,
|
|
"learning_rate": 4.0311050177251895e-09,
|
|
"logits/chosen": 1.194248080253601,
|
|
"logits/rejected": 1.1566791534423828,
|
|
"logps/chosen": -151.1189422607422,
|
|
"logps/ref_chosen": -70.71195983886719,
|
|
"logps/ref_rejected": -93.85909271240234,
|
|
"logps/rejected": -241.696533203125,
|
|
"loss": 1.0798,
|
|
"margin_dpo/margin_mean": 67.43045806884766,
|
|
"margin_dpo/margin_std": 92.61256408691406,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 0.9508692365835223,
|
|
"fcm_dpo/beta": 0.007152793928980827,
|
|
"fcm_dpo/delta": 0.03857763484120369,
|
|
"fcm_dpo/margin": 50.71005630493164,
|
|
"fcm_dpo/q_t": 0.4172513782978058,
|
|
"grad_norm": 8.010543823242188,
|
|
"learning_rate": 3.798061746947995e-09,
|
|
"logits/chosen": 1.3519011735916138,
|
|
"logits/rejected": 1.3062744140625,
|
|
"logps/chosen": -169.06600952148438,
|
|
"logps/ref_chosen": -88.66283416748047,
|
|
"logps/ref_rejected": -94.67845153808594,
|
|
"logps/rejected": -225.79168701171875,
|
|
"loss": 1.1216,
|
|
"margin_dpo/margin_mean": 50.71005630493164,
|
|
"margin_dpo/margin_std": 73.14350891113281,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 0.9523809523809523,
|
|
"fcm_dpo/beta": 0.007133721373975277,
|
|
"fcm_dpo/delta": -0.020191561430692673,
|
|
"fcm_dpo/margin": 58.78317642211914,
|
|
"fcm_dpo/q_t": 0.40502220392227173,
|
|
"grad_norm": 6.478476524353027,
|
|
"learning_rate": 3.5719052736323806e-09,
|
|
"logits/chosen": 1.4322681427001953,
|
|
"logits/rejected": 1.270760416984558,
|
|
"logps/chosen": -153.3521728515625,
|
|
"logps/ref_chosen": -72.94979858398438,
|
|
"logps/ref_rejected": -92.7632827758789,
|
|
"logps/rejected": -231.9488525390625,
|
|
"loss": 1.0856,
|
|
"margin_dpo/margin_mean": 58.78317642211914,
|
|
"margin_dpo/margin_std": 79.745849609375,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.9538926681783825,
|
|
"fcm_dpo/beta": 0.0070120044983923435,
|
|
"fcm_dpo/delta": -0.06671465933322906,
|
|
"fcm_dpo/margin": 65.94947052001953,
|
|
"fcm_dpo/q_t": 0.39557164907455444,
|
|
"grad_norm": 8.37364673614502,
|
|
"learning_rate": 3.352641923861144e-09,
|
|
"logits/chosen": 1.8200433254241943,
|
|
"logits/rejected": 1.4821150302886963,
|
|
"logps/chosen": -156.98353576660156,
|
|
"logps/ref_chosen": -78.58656311035156,
|
|
"logps/ref_rejected": -115.38685607910156,
|
|
"logps/rejected": -259.7332763671875,
|
|
"loss": 1.0733,
|
|
"margin_dpo/margin_mean": 65.949462890625,
|
|
"margin_dpo/margin_std": 90.15409851074219,
|
|
"step": 631
|
|
},
|
|
{
|
|
"epoch": 0.9554043839758125,
|
|
"fcm_dpo/beta": 0.007028430234640837,
|
|
"fcm_dpo/delta": -0.010720066726207733,
|
|
"fcm_dpo/margin": 58.3730354309082,
|
|
"fcm_dpo/q_t": 0.40583336353302,
|
|
"grad_norm": 8.03717041015625,
|
|
"learning_rate": 3.140277830901428e-09,
|
|
"logits/chosen": 1.498687744140625,
|
|
"logits/rejected": 1.4070721864700317,
|
|
"logps/chosen": -154.97467041015625,
|
|
"logps/ref_chosen": -75.24861907958984,
|
|
"logps/ref_rejected": -82.98665618896484,
|
|
"logps/rejected": -221.08575439453125,
|
|
"loss": 1.099,
|
|
"margin_dpo/margin_mean": 58.37303924560547,
|
|
"margin_dpo/margin_std": 83.79042053222656,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 0.9569160997732427,
|
|
"fcm_dpo/beta": 0.007006094790995121,
|
|
"fcm_dpo/delta": -0.03479746729135513,
|
|
"fcm_dpo/margin": 61.82661437988281,
|
|
"fcm_dpo/q_t": 0.40178823471069336,
|
|
"grad_norm": 12.49810791015625,
|
|
"learning_rate": 2.9348189350335007e-09,
|
|
"logits/chosen": 1.363844394683838,
|
|
"logits/rejected": 1.2224321365356445,
|
|
"logps/chosen": -131.18251037597656,
|
|
"logps/ref_chosen": -68.8402099609375,
|
|
"logps/ref_rejected": -84.64610290527344,
|
|
"logps/rejected": -208.81500244140625,
|
|
"loss": 1.0818,
|
|
"margin_dpo/margin_mean": 61.82661056518555,
|
|
"margin_dpo/margin_std": 84.4749755859375,
|
|
"step": 633
|
|
},
|
|
{
|
|
"epoch": 0.9584278155706727,
|
|
"fcm_dpo/beta": 0.006968853063881397,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 17.51637840270996,
|
|
"fcm_dpo/q_t": 0.4729679226875305,
|
|
"grad_norm": 10.013653755187988,
|
|
"learning_rate": 2.736270983384276e-09,
|
|
"logits/chosen": 1.642852783203125,
|
|
"logits/rejected": 1.6877248287200928,
|
|
"logps/chosen": -173.70953369140625,
|
|
"logps/ref_chosen": -77.0589599609375,
|
|
"logps/ref_rejected": -74.37579345703125,
|
|
"logps/rejected": -188.54275512695312,
|
|
"loss": 1.3598,
|
|
"margin_dpo/margin_mean": 17.516376495361328,
|
|
"margin_dpo/margin_std": 88.91429138183594,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 0.9599395313681028,
|
|
"fcm_dpo/beta": 0.0071297320537269115,
|
|
"fcm_dpo/delta": 0.1415490359067917,
|
|
"fcm_dpo/margin": 36.72964859008789,
|
|
"fcm_dpo/q_t": 0.4411892592906952,
|
|
"grad_norm": 7.8840765953063965,
|
|
"learning_rate": 2.5446395297668287e-09,
|
|
"logits/chosen": 1.1347073316574097,
|
|
"logits/rejected": 0.9766046404838562,
|
|
"logps/chosen": -190.28985595703125,
|
|
"logps/ref_chosen": -85.60243225097656,
|
|
"logps/ref_rejected": -104.29497528076172,
|
|
"logps/rejected": -245.71206665039062,
|
|
"loss": 1.2396,
|
|
"margin_dpo/margin_mean": 36.72964859008789,
|
|
"margin_dpo/margin_std": 90.49435424804688,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 0.9614512471655329,
|
|
"fcm_dpo/beta": 0.007117372006177902,
|
|
"fcm_dpo/delta": -0.05001338571310043,
|
|
"fcm_dpo/margin": 62.914146423339844,
|
|
"fcm_dpo/q_t": 0.39747586846351624,
|
|
"grad_norm": 7.932164669036865,
|
|
"learning_rate": 2.359929934524829e-09,
|
|
"logits/chosen": 1.6017391681671143,
|
|
"logits/rejected": 1.3030370473861694,
|
|
"logps/chosen": -145.52099609375,
|
|
"logps/ref_chosen": -68.72154235839844,
|
|
"logps/ref_rejected": -97.44863891601562,
|
|
"logps/rejected": -237.1622314453125,
|
|
"loss": 1.0621,
|
|
"margin_dpo/margin_mean": 62.914146423339844,
|
|
"margin_dpo/margin_std": 79.58000183105469,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 0.9629629629629629,
|
|
"fcm_dpo/beta": 0.007132566533982754,
|
|
"fcm_dpo/delta": 0.01812063902616501,
|
|
"fcm_dpo/margin": 53.61853790283203,
|
|
"fcm_dpo/q_t": 0.41387444734573364,
|
|
"grad_norm": 6.419241428375244,
|
|
"learning_rate": 2.1821473643827137e-09,
|
|
"logits/chosen": 1.409173846244812,
|
|
"logits/rejected": 1.2437349557876587,
|
|
"logps/chosen": -193.94113159179688,
|
|
"logps/ref_chosen": -92.38919067382812,
|
|
"logps/ref_rejected": -103.70460510253906,
|
|
"logps/rejected": -258.8750915527344,
|
|
"loss": 1.1161,
|
|
"margin_dpo/margin_mean": 53.61853790283203,
|
|
"margin_dpo/margin_std": 80.25310516357422,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 0.9644746787603931,
|
|
"fcm_dpo/beta": 0.007207995280623436,
|
|
"fcm_dpo/delta": 0.06300410628318787,
|
|
"fcm_dpo/margin": 47.013023376464844,
|
|
"fcm_dpo/q_t": 0.42337724566459656,
|
|
"grad_norm": 10.596990585327148,
|
|
"learning_rate": 2.0112967923011646e-09,
|
|
"logits/chosen": 1.3792505264282227,
|
|
"logits/rejected": 1.233081340789795,
|
|
"logps/chosen": -178.09756469726562,
|
|
"logps/ref_chosen": -83.36921691894531,
|
|
"logps/ref_rejected": -103.04508209228516,
|
|
"logps/rejected": -244.78646850585938,
|
|
"loss": 1.1589,
|
|
"margin_dpo/margin_mean": 47.01301956176758,
|
|
"margin_dpo/margin_std": 82.29072570800781,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 0.9659863945578231,
|
|
"fcm_dpo/beta": 0.007193556986749172,
|
|
"fcm_dpo/delta": -0.041456226259469986,
|
|
"fcm_dpo/margin": 61.10279083251953,
|
|
"fcm_dpo/q_t": 0.40119677782058716,
|
|
"grad_norm": 6.433756351470947,
|
|
"learning_rate": 1.847382997337943e-09,
|
|
"logits/chosen": 1.2101722955703735,
|
|
"logits/rejected": 0.983842670917511,
|
|
"logps/chosen": -148.6090850830078,
|
|
"logps/ref_chosen": -70.45247650146484,
|
|
"logps/ref_rejected": -93.77748107910156,
|
|
"logps/rejected": -233.036865234375,
|
|
"loss": 1.0853,
|
|
"margin_dpo/margin_mean": 61.102783203125,
|
|
"margin_dpo/margin_std": 86.17239379882812,
|
|
"step": 639
|
|
},
|
|
{
|
|
"epoch": 0.9674981103552532,
|
|
"fcm_dpo/beta": 0.007202021777629852,
|
|
"fcm_dpo/delta": 0.06750090420246124,
|
|
"fcm_dpo/margin": 46.48023986816406,
|
|
"fcm_dpo/q_t": 0.4226430058479309,
|
|
"grad_norm": 7.745254039764404,
|
|
"learning_rate": 1.690410564514244e-09,
|
|
"logits/chosen": 1.6796314716339111,
|
|
"logits/rejected": 1.4730074405670166,
|
|
"logps/chosen": -152.3632354736328,
|
|
"logps/ref_chosen": -68.51570129394531,
|
|
"logps/ref_rejected": -92.35081481933594,
|
|
"logps/rejected": -222.6785888671875,
|
|
"loss": 1.1784,
|
|
"margin_dpo/margin_mean": 46.48023986816406,
|
|
"margin_dpo/margin_std": 89.2449951171875,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.9690098261526833,
|
|
"fcm_dpo/beta": 0.007284895051270723,
|
|
"fcm_dpo/delta": 0.058470651507377625,
|
|
"fcm_dpo/margin": 47.1434326171875,
|
|
"fcm_dpo/q_t": 0.4222804605960846,
|
|
"grad_norm": 8.309812545776367,
|
|
"learning_rate": 1.5403838846864692e-09,
|
|
"logits/chosen": 1.6926078796386719,
|
|
"logits/rejected": 1.6602896451950073,
|
|
"logps/chosen": -186.4867401123047,
|
|
"logps/ref_chosen": -92.35102844238281,
|
|
"logps/ref_rejected": -102.4269790649414,
|
|
"logps/rejected": -243.7061309814453,
|
|
"loss": 1.1561,
|
|
"margin_dpo/margin_mean": 47.14342498779297,
|
|
"margin_dpo/margin_std": 81.37835693359375,
|
|
"step": 641
|
|
},
|
|
{
|
|
"epoch": 0.9705215419501134,
|
|
"fcm_dpo/beta": 0.007320537231862545,
|
|
"fcm_dpo/delta": -0.011010588146746159,
|
|
"fcm_dpo/margin": 38.06249237060547,
|
|
"fcm_dpo/q_t": 0.4363037943840027,
|
|
"grad_norm": 8.437037467956543,
|
|
"learning_rate": 1.3973071544233218e-09,
|
|
"logits/chosen": 1.2161636352539062,
|
|
"logits/rejected": 1.2543673515319824,
|
|
"logps/chosen": -184.1721954345703,
|
|
"logps/ref_chosen": -88.39617919921875,
|
|
"logps/ref_rejected": -88.73035430908203,
|
|
"logps/rejected": -222.56884765625,
|
|
"loss": 1.2144,
|
|
"margin_dpo/margin_mean": 38.06249237060547,
|
|
"margin_dpo/margin_std": 80.98915100097656,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 0.9720332577475435,
|
|
"fcm_dpo/beta": 0.007371140643954277,
|
|
"fcm_dpo/delta": 0.07876811921596527,
|
|
"fcm_dpo/margin": 43.91210174560547,
|
|
"fcm_dpo/q_t": 0.42799311876296997,
|
|
"grad_norm": 9.573797225952148,
|
|
"learning_rate": 1.261184375888541e-09,
|
|
"logits/chosen": 1.276777982711792,
|
|
"logits/rejected": 0.9570169448852539,
|
|
"logps/chosen": -174.10198974609375,
|
|
"logps/ref_chosen": -84.83087158203125,
|
|
"logps/ref_rejected": -105.31499481201172,
|
|
"logps/rejected": -238.4982147216797,
|
|
"loss": 1.2043,
|
|
"margin_dpo/margin_mean": 43.91210174560547,
|
|
"margin_dpo/margin_std": 94.48892974853516,
|
|
"step": 643
|
|
},
|
|
{
|
|
"epoch": 0.9735449735449735,
|
|
"fcm_dpo/beta": 0.0075573730282485485,
|
|
"fcm_dpo/delta": 0.09459188580513,
|
|
"fcm_dpo/margin": 40.763572692871094,
|
|
"fcm_dpo/q_t": 0.431037575006485,
|
|
"grad_norm": 8.969107627868652,
|
|
"learning_rate": 1.1320193567288527e-09,
|
|
"logits/chosen": 1.6555099487304688,
|
|
"logits/rejected": 1.5394855737686157,
|
|
"logps/chosen": -154.88946533203125,
|
|
"logps/ref_chosen": -65.11122131347656,
|
|
"logps/ref_rejected": -80.4027328491211,
|
|
"logps/rejected": -210.94454956054688,
|
|
"loss": 1.222,
|
|
"margin_dpo/margin_mean": 40.763572692871094,
|
|
"margin_dpo/margin_std": 94.83735656738281,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 0.9750566893424036,
|
|
"fcm_dpo/beta": 0.007558923214673996,
|
|
"fcm_dpo/delta": -0.014460157603025436,
|
|
"fcm_dpo/margin": 54.74996566772461,
|
|
"fcm_dpo/q_t": 0.4060879647731781,
|
|
"grad_norm": 8.757953643798828,
|
|
"learning_rate": 1.0098157099674987e-09,
|
|
"logits/chosen": 1.608795404434204,
|
|
"logits/rejected": 1.5974090099334717,
|
|
"logps/chosen": -162.4353790283203,
|
|
"logps/ref_chosen": -76.93634033203125,
|
|
"logps/ref_rejected": -89.14311981201172,
|
|
"logps/rejected": -229.39212036132812,
|
|
"loss": 1.1055,
|
|
"margin_dpo/margin_mean": 54.749969482421875,
|
|
"margin_dpo/margin_std": 82.67251586914062,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 0.9765684051398337,
|
|
"fcm_dpo/beta": 0.007531238719820976,
|
|
"fcm_dpo/delta": -0.0364893302321434,
|
|
"fcm_dpo/margin": 57.74522018432617,
|
|
"fcm_dpo/q_t": 0.4003961682319641,
|
|
"grad_norm": 8.327498435974121,
|
|
"learning_rate": 8.945768539031783e-10,
|
|
"logits/chosen": 1.7237350940704346,
|
|
"logits/rejected": 1.6151888370513916,
|
|
"logps/chosen": -174.32347106933594,
|
|
"logps/ref_chosen": -77.69122314453125,
|
|
"logps/ref_rejected": -98.14374542236328,
|
|
"logps/rejected": -252.52120971679688,
|
|
"loss": 1.0839,
|
|
"margin_dpo/margin_mean": 57.74522399902344,
|
|
"margin_dpo/margin_std": 80.33204650878906,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 0.9780801209372638,
|
|
"fcm_dpo/beta": 0.007370360661298037,
|
|
"fcm_dpo/delta": -0.13900147378444672,
|
|
"fcm_dpo/margin": 72.1575698852539,
|
|
"fcm_dpo/q_t": 0.3773247301578522,
|
|
"grad_norm": 8.999638557434082,
|
|
"learning_rate": 7.863060120144316e-10,
|
|
"logits/chosen": 1.6755530834197998,
|
|
"logits/rejected": 1.4901223182678223,
|
|
"logps/chosen": -178.73919677734375,
|
|
"logps/ref_chosen": -83.79997253417969,
|
|
"logps/ref_rejected": -116.81965637207031,
|
|
"logps/rejected": -283.91644287109375,
|
|
"loss": 0.9998,
|
|
"margin_dpo/margin_mean": 72.1575698852539,
|
|
"margin_dpo/margin_std": 77.4002914428711,
|
|
"step": 647
|
|
},
|
|
{
|
|
"epoch": 0.9795918367346939,
|
|
"fcm_dpo/beta": 0.007339120376855135,
|
|
"fcm_dpo/delta": 0.05002519488334656,
|
|
"fcm_dpo/margin": 47.92961120605469,
|
|
"fcm_dpo/q_t": 0.4191440939903259,
|
|
"grad_norm": 6.385339736938477,
|
|
"learning_rate": 6.850062128694045e-10,
|
|
"logits/chosen": 1.3917063474655151,
|
|
"logits/rejected": 1.2107430696487427,
|
|
"logps/chosen": -182.20465087890625,
|
|
"logps/ref_chosen": -85.9629898071289,
|
|
"logps/ref_rejected": -101.36552429199219,
|
|
"logps/rejected": -245.53680419921875,
|
|
"loss": 1.1435,
|
|
"margin_dpo/margin_mean": 47.92961120605469,
|
|
"margin_dpo/margin_std": 76.83576965332031,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 0.981103552532124,
|
|
"fcm_dpo/beta": 0.007368366699665785,
|
|
"fcm_dpo/delta": 0.004403694532811642,
|
|
"fcm_dpo/margin": 53.710758209228516,
|
|
"fcm_dpo/q_t": 0.4111127257347107,
|
|
"grad_norm": 9.27660083770752,
|
|
"learning_rate": 5.906802900412788e-10,
|
|
"logits/chosen": 1.7626551389694214,
|
|
"logits/rejected": 1.6102287769317627,
|
|
"logps/chosen": -153.03140258789062,
|
|
"logps/ref_chosen": -68.64892578125,
|
|
"logps/ref_rejected": -89.84898376464844,
|
|
"logps/rejected": -227.94223022460938,
|
|
"loss": 1.1353,
|
|
"margin_dpo/margin_mean": 53.710758209228516,
|
|
"margin_dpo/margin_std": 91.26134490966797,
|
|
"step": 649
|
|
},
|
|
{
|
|
"epoch": 0.982615268329554,
|
|
"fcm_dpo/beta": 0.007361288648098707,
|
|
"fcm_dpo/delta": -0.014930552802979946,
|
|
"fcm_dpo/margin": 56.28282165527344,
|
|
"fcm_dpo/q_t": 0.40710097551345825,
|
|
"grad_norm": 7.917013645172119,
|
|
"learning_rate": 5.033308820289184e-10,
|
|
"logits/chosen": 1.457055687904358,
|
|
"logits/rejected": 1.2520674467086792,
|
|
"logps/chosen": -147.81912231445312,
|
|
"logps/ref_chosen": -72.97265625,
|
|
"logps/ref_rejected": -93.04617309570312,
|
|
"logps/rejected": -224.17544555664062,
|
|
"loss": 1.1136,
|
|
"margin_dpo/margin_mean": 56.282814025878906,
|
|
"margin_dpo/margin_std": 87.57786560058594,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.9841269841269841,
|
|
"fcm_dpo/beta": 0.007448974065482616,
|
|
"fcm_dpo/delta": 0.06296257674694061,
|
|
"fcm_dpo/margin": 45.47364044189453,
|
|
"fcm_dpo/q_t": 0.4245682954788208,
|
|
"grad_norm": 14.473260879516602,
|
|
"learning_rate": 4.2296043218295606e-10,
|
|
"logits/chosen": 1.6711803674697876,
|
|
"logits/rejected": 1.4281296730041504,
|
|
"logps/chosen": -158.29653930664062,
|
|
"logps/ref_chosen": -71.05281066894531,
|
|
"logps/ref_rejected": -94.23469543457031,
|
|
"logps/rejected": -226.95205688476562,
|
|
"loss": 1.1638,
|
|
"margin_dpo/margin_mean": 45.47364044189453,
|
|
"margin_dpo/margin_std": 79.72322082519531,
|
|
"step": 651
|
|
},
|
|
{
|
|
"epoch": 0.9856386999244142,
|
|
"fcm_dpo/beta": 0.0074330344796180725,
|
|
"fcm_dpo/delta": -0.005656237713992596,
|
|
"fcm_dpo/margin": 54.540061950683594,
|
|
"fcm_dpo/q_t": 0.40991270542144775,
|
|
"grad_norm": 9.309069633483887,
|
|
"learning_rate": 3.4957118863768176e-10,
|
|
"logits/chosen": 1.4886457920074463,
|
|
"logits/rejected": 1.407999873161316,
|
|
"logps/chosen": -172.19561767578125,
|
|
"logps/ref_chosen": -80.06941223144531,
|
|
"logps/ref_rejected": -99.22327423095703,
|
|
"logps/rejected": -245.88955688476562,
|
|
"loss": 1.1204,
|
|
"margin_dpo/margin_mean": 54.540061950683594,
|
|
"margin_dpo/margin_std": 87.85244750976562,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 0.9871504157218443,
|
|
"fcm_dpo/beta": 0.007382528856396675,
|
|
"fcm_dpo/delta": -0.08395875990390778,
|
|
"fcm_dpo/margin": 64.99937438964844,
|
|
"fcm_dpo/q_t": 0.3910355567932129,
|
|
"grad_norm": 10.658775329589844,
|
|
"learning_rate": 2.831652042480093e-10,
|
|
"logits/chosen": 1.7752074003219604,
|
|
"logits/rejected": 1.6271909475326538,
|
|
"logps/chosen": -158.2850341796875,
|
|
"logps/ref_chosen": -80.35701751708984,
|
|
"logps/ref_rejected": -92.1295394897461,
|
|
"logps/rejected": -235.05691528320312,
|
|
"loss": 1.0567,
|
|
"margin_dpo/margin_mean": 64.99937438964844,
|
|
"margin_dpo/margin_std": 84.79908752441406,
|
|
"step": 653
|
|
},
|
|
{
|
|
"epoch": 0.9886621315192744,
|
|
"fcm_dpo/beta": 0.00728489737957716,
|
|
"fcm_dpo/delta": -0.02065638080239296,
|
|
"fcm_dpo/margin": 43.905029296875,
|
|
"fcm_dpo/q_t": 0.4288891553878784,
|
|
"grad_norm": 9.484676361083984,
|
|
"learning_rate": 2.2374433653205016e-10,
|
|
"logits/chosen": 1.6096107959747314,
|
|
"logits/rejected": 1.3091274499893188,
|
|
"logps/chosen": -171.35845947265625,
|
|
"logps/ref_chosen": -78.06475830078125,
|
|
"logps/ref_rejected": -106.05763244628906,
|
|
"logps/rejected": -243.25636291503906,
|
|
"loss": 1.1841,
|
|
"margin_dpo/margin_mean": 43.905029296875,
|
|
"margin_dpo/margin_std": 84.46098327636719,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 0.9901738473167044,
|
|
"fcm_dpo/beta": 0.007229020819067955,
|
|
"fcm_dpo/delta": -0.002755559980869293,
|
|
"fcm_dpo/margin": 55.5506591796875,
|
|
"fcm_dpo/q_t": 0.4089524447917938,
|
|
"grad_norm": 8.344823837280273,
|
|
"learning_rate": 1.7131024761923852e-10,
|
|
"logits/chosen": 1.263197660446167,
|
|
"logits/rejected": 0.9553810358047485,
|
|
"logps/chosen": -145.0635528564453,
|
|
"logps/ref_chosen": -67.03407287597656,
|
|
"logps/ref_rejected": -97.57197570800781,
|
|
"logps/rejected": -231.152099609375,
|
|
"loss": 1.0989,
|
|
"margin_dpo/margin_mean": 55.5506591796875,
|
|
"margin_dpo/margin_std": 75.53077697753906,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 0.9916855631141346,
|
|
"fcm_dpo/beta": 0.007296122610569,
|
|
"fcm_dpo/delta": 0.006786551792174578,
|
|
"fcm_dpo/margin": 53.92597961425781,
|
|
"fcm_dpo/q_t": 0.4102938175201416,
|
|
"grad_norm": 5.894443035125732,
|
|
"learning_rate": 1.2586440420372934e-10,
|
|
"logits/chosen": 1.2040374279022217,
|
|
"logits/rejected": 1.0824060440063477,
|
|
"logps/chosen": -184.591796875,
|
|
"logps/ref_chosen": -89.31463623046875,
|
|
"logps/ref_rejected": -105.14315795898438,
|
|
"logps/rejected": -254.3463134765625,
|
|
"loss": 1.1228,
|
|
"margin_dpo/margin_mean": 53.92597961425781,
|
|
"margin_dpo/margin_std": 85.65472412109375,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 0.9931972789115646,
|
|
"fcm_dpo/beta": 0.0071443431079387665,
|
|
"fcm_dpo/delta": -0.13093850016593933,
|
|
"fcm_dpo/margin": 73.339111328125,
|
|
"fcm_dpo/q_t": 0.38050127029418945,
|
|
"grad_norm": 7.297959327697754,
|
|
"learning_rate": 8.740807750345913e-11,
|
|
"logits/chosen": 1.6534512042999268,
|
|
"logits/rejected": 1.423713207244873,
|
|
"logps/chosen": -142.54873657226562,
|
|
"logps/ref_chosen": -64.89747619628906,
|
|
"logps/ref_rejected": -94.21998596191406,
|
|
"logps/rejected": -245.21035766601562,
|
|
"loss": 1.0233,
|
|
"margin_dpo/margin_mean": 73.339111328125,
|
|
"margin_dpo/margin_std": 88.55533599853516,
|
|
"step": 657
|
|
},
|
|
{
|
|
"epoch": 0.9947089947089947,
|
|
"fcm_dpo/beta": 0.007135612890124321,
|
|
"fcm_dpo/delta": -0.005358390510082245,
|
|
"fcm_dpo/margin": 56.69953155517578,
|
|
"fcm_dpo/q_t": 0.4094886779785156,
|
|
"grad_norm": 12.389694213867188,
|
|
"learning_rate": 5.594234322453539e-11,
|
|
"logits/chosen": 1.448069453239441,
|
|
"logits/rejected": 1.3515777587890625,
|
|
"logps/chosen": -162.52061462402344,
|
|
"logps/ref_chosen": -81.16606140136719,
|
|
"logps/ref_rejected": -97.72825622558594,
|
|
"logps/rejected": -235.7823486328125,
|
|
"loss": 1.1347,
|
|
"margin_dpo/margin_mean": 56.69953155517578,
|
|
"margin_dpo/margin_std": 96.20867919921875,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 0.9962207105064248,
|
|
"fcm_dpo/beta": 0.007101975381374359,
|
|
"fcm_dpo/delta": 0.005114157218486071,
|
|
"fcm_dpo/margin": 29.31315803527832,
|
|
"fcm_dpo/q_t": 0.4529067277908325,
|
|
"grad_norm": 7.469188690185547,
|
|
"learning_rate": 3.146808153123293e-11,
|
|
"logits/chosen": 1.7898869514465332,
|
|
"logits/rejected": 1.5685615539550781,
|
|
"logps/chosen": -173.17181396484375,
|
|
"logps/ref_chosen": -74.42193603515625,
|
|
"logps/ref_rejected": -87.81561279296875,
|
|
"logps/rejected": -215.87863159179688,
|
|
"loss": 1.2881,
|
|
"margin_dpo/margin_mean": 29.313159942626953,
|
|
"margin_dpo/margin_std": 86.78317260742188,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 0.9977324263038548,
|
|
"fcm_dpo/beta": 0.007041741628199816,
|
|
"fcm_dpo/delta": -0.042580414563417435,
|
|
"fcm_dpo/margin": 62.558837890625,
|
|
"fcm_dpo/q_t": 0.39910098910331726,
|
|
"grad_norm": 8.689871788024902,
|
|
"learning_rate": 1.3985977021235829e-11,
|
|
"logits/chosen": 1.7107985019683838,
|
|
"logits/rejected": 1.5310373306274414,
|
|
"logps/chosen": -157.3565216064453,
|
|
"logps/ref_chosen": -71.68511962890625,
|
|
"logps/ref_rejected": -98.01472473144531,
|
|
"logps/rejected": -246.24496459960938,
|
|
"loss": 1.0657,
|
|
"margin_dpo/margin_mean": 62.558837890625,
|
|
"margin_dpo/margin_std": 78.82179260253906,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.999244142101285,
|
|
"fcm_dpo/beta": 0.007080161012709141,
|
|
"fcm_dpo/delta": 0.05424977466464043,
|
|
"fcm_dpo/margin": 36.04890060424805,
|
|
"fcm_dpo/q_t": 0.4424479603767395,
|
|
"grad_norm": 7.011692523956299,
|
|
"learning_rate": 3.4965187065971735e-12,
|
|
"logits/chosen": 1.4969191551208496,
|
|
"logits/rejected": 1.2757140398025513,
|
|
"logps/chosen": -187.23583984375,
|
|
"logps/ref_chosen": -78.35111999511719,
|
|
"logps/ref_rejected": -99.47113037109375,
|
|
"logps/rejected": -244.40475463867188,
|
|
"loss": 1.251,
|
|
"margin_dpo/margin_mean": 36.04889678955078,
|
|
"margin_dpo/margin_std": 93.42096710205078,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 0.999244142101285,
|
|
"step": 661,
|
|
"total_flos": 0.0,
|
|
"train_loss": 1.1795702667712444,
|
|
"train_runtime": 2124.2784,
|
|
"train_samples_per_second": 19.93,
|
|
"train_steps_per_second": 0.311
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 661,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 50,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": false,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|