Files
qwen3-8b-base-new-dpo-hh-ha…/trainer_state.json
ModelHub XC 9268b0b929 初始化项目,由ModelHub XC社区提供模型
Model: jackf857/qwen3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.45-s_star-0.6
Source: Original Platform
2026-05-13 01:12:57 +08:00

12705 lines
464 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.999244142101285,
"eval_steps": 100,
"global_step": 661,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0015117157974300832,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.002980858087539673,
"fcm_dpo/q_t": 0.5000747442245483,
"grad_norm": 17.89801597595215,
"learning_rate": 0.0,
"logits/chosen": 1.702779769897461,
"logits/rejected": 1.6965749263763428,
"logps/chosen": -80.20932006835938,
"logps/ref_chosen": -80.27740478515625,
"logps/ref_rejected": -83.5943374633789,
"logps/rejected": -83.52326965332031,
"loss": 1.387,
"margin_dpo/margin_mean": -0.0029816031455993652,
"margin_dpo/margin_std": 0.3835117816925049,
"step": 1
},
{
"epoch": 0.0030234315948601664,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.029325395822525024,
"fcm_dpo/q_t": 0.4992692470550537,
"grad_norm": 21.36615562438965,
"learning_rate": 7.462686567164179e-09,
"logits/chosen": 1.7006168365478516,
"logits/rejected": 1.6698178052902222,
"logps/chosen": -74.51097869873047,
"logps/ref_chosen": -74.56095886230469,
"logps/ref_rejected": -83.53636169433594,
"logps/rejected": -83.51570892333984,
"loss": 1.3839,
"margin_dpo/margin_mean": 0.029325813055038452,
"margin_dpo/margin_std": 0.4646317958831787,
"step": 2
},
{
"epoch": 0.0045351473922902496,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.03095117211341858,
"fcm_dpo/q_t": 0.4992258846759796,
"grad_norm": 19.930883407592773,
"learning_rate": 1.4925373134328357e-08,
"logits/chosen": 1.6261851787567139,
"logits/rejected": 1.5350717306137085,
"logps/chosen": -82.15226745605469,
"logps/ref_chosen": -82.1510009765625,
"logps/ref_rejected": -109.82986450195312,
"logps/rejected": -109.86207580566406,
"loss": 1.3837,
"margin_dpo/margin_mean": 0.030951082706451416,
"margin_dpo/margin_std": 0.44513028860092163,
"step": 3
},
{
"epoch": 0.006046863189720333,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.042372316122055054,
"fcm_dpo/q_t": 0.4989404082298279,
"grad_norm": 19.7413272857666,
"learning_rate": 2.2388059701492534e-08,
"logits/chosen": 1.7652442455291748,
"logits/rejected": 1.7535886764526367,
"logps/chosen": -92.318603515625,
"logps/ref_chosen": -92.37549591064453,
"logps/ref_rejected": -99.59553527832031,
"logps/rejected": -99.58100891113281,
"loss": 1.3827,
"margin_dpo/margin_mean": 0.0423721969127655,
"margin_dpo/margin_std": 0.4652661681175232,
"step": 4
},
{
"epoch": 0.007558578987150416,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.004836410284042358,
"fcm_dpo/q_t": 0.5001212954521179,
"grad_norm": 18.83965492248535,
"learning_rate": 2.9850746268656714e-08,
"logits/chosen": 1.6294361352920532,
"logits/rejected": 1.5735852718353271,
"logps/chosen": -78.87178039550781,
"logps/ref_chosen": -78.84872436523438,
"logps/ref_rejected": -97.88040161132812,
"logps/rejected": -97.89862060546875,
"loss": 1.3871,
"margin_dpo/margin_mean": -0.0048364996910095215,
"margin_dpo/margin_std": 0.36210399866104126,
"step": 5
},
{
"epoch": 0.009070294784580499,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.095939040184021,
"fcm_dpo/q_t": 0.49760186672210693,
"grad_norm": 18.0408878326416,
"learning_rate": 3.731343283582089e-08,
"logits/chosen": 1.5870825052261353,
"logits/rejected": 1.4796451330184937,
"logps/chosen": -68.30978393554688,
"logps/ref_chosen": -68.34607696533203,
"logps/ref_rejected": -99.24614715576172,
"logps/rejected": -99.3057861328125,
"loss": 1.3769,
"margin_dpo/margin_mean": 0.0959392786026001,
"margin_dpo/margin_std": 0.29579028487205505,
"step": 6
},
{
"epoch": 0.010582010582010581,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.06473231315612793,
"fcm_dpo/q_t": 0.5016177892684937,
"grad_norm": 17.415424346923828,
"learning_rate": 4.477611940298507e-08,
"logits/chosen": 1.4590237140655518,
"logits/rejected": 1.3971405029296875,
"logps/chosen": -69.1452865600586,
"logps/ref_chosen": -69.11282348632812,
"logps/ref_rejected": -84.01641845703125,
"logps/rejected": -83.98414611816406,
"loss": 1.3931,
"margin_dpo/margin_mean": -0.0647326409816742,
"margin_dpo/margin_std": 0.3379696011543274,
"step": 7
},
{
"epoch": 0.012093726379440665,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.038746654987335205,
"fcm_dpo/q_t": 0.5009682774543762,
"grad_norm": 18.376161575317383,
"learning_rate": 5.223880597014925e-08,
"logits/chosen": 1.7957122325897217,
"logits/rejected": 1.7788466215133667,
"logps/chosen": -78.38008117675781,
"logps/ref_chosen": -78.3912353515625,
"logps/ref_rejected": -91.06254577636719,
"logps/rejected": -91.01263427734375,
"loss": 1.3906,
"margin_dpo/margin_mean": -0.038746029138565063,
"margin_dpo/margin_std": 0.38139432668685913,
"step": 8
},
{
"epoch": 0.013605442176870748,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.06898093223571777,
"fcm_dpo/q_t": 0.4982767105102539,
"grad_norm": 19.275188446044922,
"learning_rate": 5.970149253731343e-08,
"logits/chosen": 1.9897737503051758,
"logits/rejected": 1.7835183143615723,
"logps/chosen": -69.65217590332031,
"logps/ref_chosen": -69.67422485351562,
"logps/ref_rejected": -105.00473022460938,
"logps/rejected": -105.05166625976562,
"loss": 1.38,
"margin_dpo/margin_mean": 0.068980872631073,
"margin_dpo/margin_std": 0.4633823037147522,
"step": 9
},
{
"epoch": 0.015117157974300832,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.008043617010116577,
"fcm_dpo/q_t": 0.5002011060714722,
"grad_norm": 19.03217124938965,
"learning_rate": 6.71641791044776e-08,
"logits/chosen": 1.5980093479156494,
"logits/rejected": 1.5193543434143066,
"logps/chosen": -79.69847106933594,
"logps/ref_chosen": -79.730712890625,
"logps/ref_rejected": -105.50645446777344,
"logps/rejected": -105.4661636352539,
"loss": 1.3875,
"margin_dpo/margin_mean": -0.008043557405471802,
"margin_dpo/margin_std": 0.3877168893814087,
"step": 10
},
{
"epoch": 0.016628873771730914,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.010087519884109497,
"fcm_dpo/q_t": 0.5002526044845581,
"grad_norm": 17.490964889526367,
"learning_rate": 7.462686567164178e-08,
"logits/chosen": 1.785116195678711,
"logits/rejected": 1.7379379272460938,
"logps/chosen": -85.41242980957031,
"logps/ref_chosen": -85.41248321533203,
"logps/ref_rejected": -86.50241088867188,
"logps/rejected": -86.49227142333984,
"loss": 1.3877,
"margin_dpo/margin_mean": -0.010087013244628906,
"margin_dpo/margin_std": 0.40967226028442383,
"step": 11
},
{
"epoch": 0.018140589569160998,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.043316930532455444,
"fcm_dpo/q_t": 0.49891871213912964,
"grad_norm": 17.32830047607422,
"learning_rate": 8.208955223880596e-08,
"logits/chosen": 1.645331621170044,
"logits/rejected": 1.6056153774261475,
"logps/chosen": -81.3597183227539,
"logps/ref_chosen": -81.38086700439453,
"logps/ref_rejected": -89.88151550292969,
"logps/rejected": -89.9036865234375,
"loss": 1.3825,
"margin_dpo/margin_mean": 0.043317049741744995,
"margin_dpo/margin_std": 0.444502055644989,
"step": 12
},
{
"epoch": 0.019652305366591082,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.026911377906799316,
"fcm_dpo/q_t": 0.49932724237442017,
"grad_norm": 17.857009887695312,
"learning_rate": 8.955223880597014e-08,
"logits/chosen": 1.6704635620117188,
"logits/rejected": 1.481621503829956,
"logps/chosen": -63.172264099121094,
"logps/ref_chosen": -63.17030715942383,
"logps/ref_rejected": -105.61166381835938,
"logps/rejected": -105.64053344726562,
"loss": 1.3838,
"margin_dpo/margin_mean": 0.026911497116088867,
"margin_dpo/margin_std": 0.3080989122390747,
"step": 13
},
{
"epoch": 0.021164021164021163,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.017434000968933105,
"fcm_dpo/q_t": 0.5004353523254395,
"grad_norm": 20.941539764404297,
"learning_rate": 9.701492537313432e-08,
"logits/chosen": 1.6752700805664062,
"logits/rejected": 1.6424415111541748,
"logps/chosen": -80.72457122802734,
"logps/ref_chosen": -80.71014404296875,
"logps/ref_rejected": -89.86041259765625,
"logps/rejected": -89.85740661621094,
"loss": 1.3883,
"margin_dpo/margin_mean": -0.017433375120162964,
"margin_dpo/margin_std": 0.32124900817871094,
"step": 14
},
{
"epoch": 0.022675736961451247,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.0804678201675415,
"fcm_dpo/q_t": 0.5020108222961426,
"grad_norm": 19.288888931274414,
"learning_rate": 1.044776119402985e-07,
"logits/chosen": 1.4386959075927734,
"logits/rejected": 1.3539936542510986,
"logps/chosen": -82.036865234375,
"logps/ref_chosen": -82.00294494628906,
"logps/ref_rejected": -106.43550109863281,
"logps/rejected": -106.38895416259766,
"loss": 1.3947,
"margin_dpo/margin_mean": -0.08046802878379822,
"margin_dpo/margin_std": 0.336540549993515,
"step": 15
},
{
"epoch": 0.02418745275888133,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.025534391403198242,
"fcm_dpo/q_t": 0.5006370544433594,
"grad_norm": 17.247323989868164,
"learning_rate": 1.1194029850746268e-07,
"logits/chosen": 1.8795946836471558,
"logits/rejected": 1.7519097328186035,
"logps/chosen": -62.282501220703125,
"logps/ref_chosen": -62.308345794677734,
"logps/ref_rejected": -89.6508560180664,
"logps/rejected": -89.59947967529297,
"loss": 1.3893,
"margin_dpo/margin_mean": -0.025534451007843018,
"margin_dpo/margin_std": 0.41592419147491455,
"step": 16
},
{
"epoch": 0.025699168556311415,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.011744916439056396,
"fcm_dpo/q_t": 0.4997067451477051,
"grad_norm": 18.33580780029297,
"learning_rate": 1.1940298507462686e-07,
"logits/chosen": 1.513704538345337,
"logits/rejected": 1.4842028617858887,
"logps/chosen": -85.16311645507812,
"logps/ref_chosen": -85.16903686523438,
"logps/ref_rejected": -102.57087707519531,
"logps/rejected": -102.57669830322266,
"loss": 1.3855,
"margin_dpo/margin_mean": 0.011744409799575806,
"margin_dpo/margin_std": 0.3892754316329956,
"step": 17
},
{
"epoch": 0.027210884353741496,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.04118014872074127,
"fcm_dpo/q_t": 0.4989708662033081,
"grad_norm": 17.161205291748047,
"learning_rate": 1.2686567164179106e-07,
"logits/chosen": 1.929447889328003,
"logits/rejected": 1.7789592742919922,
"logps/chosen": -63.15791320800781,
"logps/ref_chosen": -63.17793273925781,
"logps/ref_rejected": -86.06461334228516,
"logps/rejected": -86.08576965332031,
"loss": 1.3825,
"margin_dpo/margin_mean": 0.04118022322654724,
"margin_dpo/margin_std": 0.33605462312698364,
"step": 18
},
{
"epoch": 0.02872260015117158,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.04288366436958313,
"fcm_dpo/q_t": 0.5010709762573242,
"grad_norm": 19.838333129882812,
"learning_rate": 1.343283582089552e-07,
"logits/chosen": 1.90325927734375,
"logits/rejected": 1.8959013223648071,
"logps/chosen": -85.86503601074219,
"logps/ref_chosen": -85.82405853271484,
"logps/ref_rejected": -100.07136535644531,
"logps/rejected": -100.06946563720703,
"loss": 1.391,
"margin_dpo/margin_mean": -0.04288366436958313,
"margin_dpo/margin_std": 0.4113919138908386,
"step": 19
},
{
"epoch": 0.030234315948601664,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.07760676741600037,
"fcm_dpo/q_t": 0.5019393563270569,
"grad_norm": 18.312501907348633,
"learning_rate": 1.4179104477611938e-07,
"logits/chosen": 2.0755791664123535,
"logits/rejected": 1.9887995719909668,
"logps/chosen": -73.6259994506836,
"logps/ref_chosen": -73.58621215820312,
"logps/ref_rejected": -91.21690368652344,
"logps/rejected": -91.17908477783203,
"loss": 1.3944,
"margin_dpo/margin_mean": -0.07760673761367798,
"margin_dpo/margin_std": 0.36418983340263367,
"step": 20
},
{
"epoch": 0.031746031746031744,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.03705587983131409,
"fcm_dpo/q_t": 0.4990747272968292,
"grad_norm": 18.1649112701416,
"learning_rate": 1.4925373134328355e-07,
"logits/chosen": 2.150765895843506,
"logits/rejected": 2.022829294204712,
"logps/chosen": -81.96690368652344,
"logps/ref_chosen": -81.97251892089844,
"logps/ref_rejected": -98.05976867675781,
"logps/rejected": -98.09120178222656,
"loss": 1.3831,
"margin_dpo/margin_mean": 0.03705599904060364,
"margin_dpo/margin_std": 0.43046677112579346,
"step": 21
},
{
"epoch": 0.03325774754346183,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.028704047203063965,
"fcm_dpo/q_t": 0.5007180571556091,
"grad_norm": 18.55045509338379,
"learning_rate": 1.5671641791044775e-07,
"logits/chosen": 1.544440746307373,
"logits/rejected": 1.5105493068695068,
"logps/chosen": -76.98062896728516,
"logps/ref_chosen": -76.99579620361328,
"logps/ref_rejected": -95.76089477539062,
"logps/rejected": -95.71702575683594,
"loss": 1.3897,
"margin_dpo/margin_mean": -0.028704792261123657,
"margin_dpo/margin_std": 0.4347040057182312,
"step": 22
},
{
"epoch": 0.03476946334089191,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.07605567574501038,
"fcm_dpo/q_t": 0.49809861183166504,
"grad_norm": 19.070331573486328,
"learning_rate": 1.6417910447761193e-07,
"logits/chosen": 1.923293113708496,
"logits/rejected": 1.831559658050537,
"logps/chosen": -84.68144989013672,
"logps/ref_chosen": -84.76856994628906,
"logps/ref_rejected": -107.28266906738281,
"logps/rejected": -107.2716064453125,
"loss": 1.3792,
"margin_dpo/margin_mean": 0.0760551393032074,
"margin_dpo/margin_std": 0.41792023181915283,
"step": 23
},
{
"epoch": 0.036281179138321996,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.03800934553146362,
"fcm_dpo/q_t": 0.5009497404098511,
"grad_norm": 17.18250846862793,
"learning_rate": 1.716417910447761e-07,
"logits/chosen": 1.7587859630584717,
"logits/rejected": 1.6988754272460938,
"logps/chosen": -69.89579010009766,
"logps/ref_chosen": -69.87112426757812,
"logps/ref_rejected": -84.02084350585938,
"logps/rejected": -84.00749969482422,
"loss": 1.3904,
"margin_dpo/margin_mean": -0.038009583950042725,
"margin_dpo/margin_std": 0.3587035536766052,
"step": 24
},
{
"epoch": 0.03779289493575208,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.04470279812812805,
"fcm_dpo/q_t": 0.4988824129104614,
"grad_norm": 19.49793815612793,
"learning_rate": 1.7910447761194027e-07,
"logits/chosen": 1.9868547916412354,
"logits/rejected": 1.8301403522491455,
"logps/chosen": -78.24287414550781,
"logps/ref_chosen": -78.22694396972656,
"logps/ref_rejected": -106.65234375,
"logps/rejected": -106.71296691894531,
"loss": 1.3822,
"margin_dpo/margin_mean": 0.04470303654670715,
"margin_dpo/margin_std": 0.39123424887657166,
"step": 25
},
{
"epoch": 0.039304610733182165,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.0833863914012909,
"fcm_dpo/q_t": 0.4979166090488434,
"grad_norm": 17.834970474243164,
"learning_rate": 1.8656716417910447e-07,
"logits/chosen": 1.9412182569503784,
"logits/rejected": 1.914105772972107,
"logps/chosen": -74.54658508300781,
"logps/ref_chosen": -74.59750366210938,
"logps/ref_rejected": -93.57858276367188,
"logps/rejected": -93.61105346679688,
"loss": 1.3783,
"margin_dpo/margin_mean": 0.08338648080825806,
"margin_dpo/margin_std": 0.37571650743484497,
"step": 26
},
{
"epoch": 0.04081632653061224,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.05425041913986206,
"fcm_dpo/q_t": 0.4986443519592285,
"grad_norm": 18.620058059692383,
"learning_rate": 1.9402985074626865e-07,
"logits/chosen": 1.8641291856765747,
"logits/rejected": 1.8036224842071533,
"logps/chosen": -78.64132690429688,
"logps/ref_chosen": -78.64625549316406,
"logps/ref_rejected": -92.33645629882812,
"logps/rejected": -92.38578796386719,
"loss": 1.3812,
"margin_dpo/margin_mean": 0.0542508065700531,
"margin_dpo/margin_std": 0.3697792887687683,
"step": 27
},
{
"epoch": 0.042328042328042326,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.007928252220153809,
"fcm_dpo/q_t": 0.5001976490020752,
"grad_norm": 17.704586029052734,
"learning_rate": 2.0149253731343282e-07,
"logits/chosen": 1.5783494710922241,
"logits/rejected": 1.5295078754425049,
"logps/chosen": -76.9276351928711,
"logps/ref_chosen": -76.91271209716797,
"logps/ref_rejected": -88.48194885253906,
"logps/rejected": -88.48894500732422,
"loss": 1.3875,
"margin_dpo/margin_mean": -0.007928639650344849,
"margin_dpo/margin_std": 0.393305242061615,
"step": 28
},
{
"epoch": 0.04383975812547241,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.05503666400909424,
"fcm_dpo/q_t": 0.4986268877983093,
"grad_norm": 21.232301712036133,
"learning_rate": 2.08955223880597e-07,
"logits/chosen": 2.018385648727417,
"logits/rejected": 1.9525585174560547,
"logps/chosen": -89.55824279785156,
"logps/ref_chosen": -89.62060546875,
"logps/ref_rejected": -100.57090759277344,
"logps/rejected": -100.56358337402344,
"loss": 1.3814,
"margin_dpo/margin_mean": 0.05503681302070618,
"margin_dpo/margin_std": 0.49973034858703613,
"step": 29
},
{
"epoch": 0.045351473922902494,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.0687638521194458,
"fcm_dpo/q_t": 0.4982798397541046,
"grad_norm": 18.995983123779297,
"learning_rate": 2.1641791044776117e-07,
"logits/chosen": 2.1269025802612305,
"logits/rejected": 1.9344401359558105,
"logps/chosen": -68.77285766601562,
"logps/ref_chosen": -68.82381439208984,
"logps/ref_rejected": -104.7047119140625,
"logps/rejected": -104.72251892089844,
"loss": 1.3799,
"margin_dpo/margin_mean": 0.06876346468925476,
"margin_dpo/margin_std": 0.44119542837142944,
"step": 30
},
{
"epoch": 0.04686318972033258,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.001088559627532959,
"fcm_dpo/q_t": 0.49997323751449585,
"grad_norm": 20.58745002746582,
"learning_rate": 2.2388059701492537e-07,
"logits/chosen": 1.7696319818496704,
"logits/rejected": 1.6470561027526855,
"logps/chosen": -86.03531646728516,
"logps/ref_chosen": -86.06916809082031,
"logps/ref_rejected": -116.66394805908203,
"logps/rejected": -116.63117980957031,
"loss": 1.3865,
"margin_dpo/margin_mean": 0.0010884404182434082,
"margin_dpo/margin_std": 0.36631911993026733,
"step": 31
},
{
"epoch": 0.04837490551776266,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.07467979192733765,
"fcm_dpo/q_t": 0.4981331527233124,
"grad_norm": 18.509641647338867,
"learning_rate": 2.3134328358208954e-07,
"logits/chosen": 1.4663035869598389,
"logits/rejected": 1.515918493270874,
"logps/chosen": -87.51797485351562,
"logps/ref_chosen": -87.59808349609375,
"logps/ref_rejected": -100.26905822753906,
"logps/rejected": -100.26361846923828,
"loss": 1.3793,
"margin_dpo/margin_mean": 0.07468008995056152,
"margin_dpo/margin_std": 0.4339354634284973,
"step": 32
},
{
"epoch": 0.049886621315192746,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.023064017295837402,
"fcm_dpo/q_t": 0.5005767941474915,
"grad_norm": 19.758060455322266,
"learning_rate": 2.388059701492537e-07,
"logits/chosen": 1.5679633617401123,
"logits/rejected": 1.4670143127441406,
"logps/chosen": -83.30245971679688,
"logps/ref_chosen": -83.29850769042969,
"logps/ref_rejected": -94.60990142822266,
"logps/rejected": -94.59078979492188,
"loss": 1.389,
"margin_dpo/margin_mean": -0.02306431531906128,
"margin_dpo/margin_std": 0.3838702440261841,
"step": 33
},
{
"epoch": 0.05139833711262283,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.058904439210891724,
"fcm_dpo/q_t": 0.49852806329727173,
"grad_norm": 18.01277732849121,
"learning_rate": 2.4626865671641786e-07,
"logits/chosen": 1.8714017868041992,
"logits/rejected": 1.7843908071517944,
"logps/chosen": -70.11520385742188,
"logps/ref_chosen": -70.15069580078125,
"logps/ref_rejected": -84.4693832397461,
"logps/rejected": -84.49279022216797,
"loss": 1.3807,
"margin_dpo/margin_mean": 0.058904558420181274,
"margin_dpo/margin_std": 0.35321176052093506,
"step": 34
},
{
"epoch": 0.05291005291005291,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.07404336333274841,
"fcm_dpo/q_t": 0.49815088510513306,
"grad_norm": 18.077713012695312,
"learning_rate": 2.537313432835821e-07,
"logits/chosen": 1.47958505153656,
"logits/rejected": 1.427431583404541,
"logps/chosen": -78.203857421875,
"logps/ref_chosen": -78.25238037109375,
"logps/ref_rejected": -91.06356811523438,
"logps/rejected": -91.08910369873047,
"loss": 1.3793,
"margin_dpo/margin_mean": 0.0740436315536499,
"margin_dpo/margin_std": 0.405529260635376,
"step": 35
},
{
"epoch": 0.05442176870748299,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.09882274270057678,
"fcm_dpo/q_t": 0.49753493070602417,
"grad_norm": 17.96520233154297,
"learning_rate": 2.611940298507462e-07,
"logits/chosen": 1.845771312713623,
"logits/rejected": 1.7256853580474854,
"logps/chosen": -67.06063079833984,
"logps/ref_chosen": -67.06676483154297,
"logps/ref_rejected": -99.34661865234375,
"logps/rejected": -99.4393081665039,
"loss": 1.3771,
"margin_dpo/margin_mean": 0.09882298111915588,
"margin_dpo/margin_std": 0.49245503544807434,
"step": 36
},
{
"epoch": 0.055933484504913075,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.0062815845012664795,
"fcm_dpo/q_t": 0.5001574754714966,
"grad_norm": 23.530776977539062,
"learning_rate": 2.686567164179104e-07,
"logits/chosen": 1.8602843284606934,
"logits/rejected": 1.5742213726043701,
"logps/chosen": -75.89591979980469,
"logps/ref_chosen": -75.9269790649414,
"logps/ref_rejected": -130.34371948242188,
"logps/rejected": -130.30636596679688,
"loss": 1.3874,
"margin_dpo/margin_mean": -0.00628247857093811,
"margin_dpo/margin_std": 0.4102315306663513,
"step": 37
},
{
"epoch": 0.05744520030234316,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.005452930927276611,
"fcm_dpo/q_t": 0.49986520409584045,
"grad_norm": 18.435871124267578,
"learning_rate": 2.761194029850746e-07,
"logits/chosen": 1.7420191764831543,
"logits/rejected": 1.7041373252868652,
"logps/chosen": -83.6760025024414,
"logps/ref_chosen": -83.65460205078125,
"logps/ref_rejected": -89.15221405029297,
"logps/rejected": -89.17906951904297,
"loss": 1.3862,
"margin_dpo/margin_mean": 0.005452901124954224,
"margin_dpo/margin_std": 0.3982018530368805,
"step": 38
},
{
"epoch": 0.05895691609977324,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.1193189024925232,
"fcm_dpo/q_t": 0.497018039226532,
"grad_norm": 18.950082778930664,
"learning_rate": 2.8358208955223876e-07,
"logits/chosen": 2.043585777282715,
"logits/rejected": 1.994788646697998,
"logps/chosen": -76.06095886230469,
"logps/ref_chosen": -76.18706512451172,
"logps/ref_rejected": -94.39262390136719,
"logps/rejected": -94.3858413696289,
"loss": 1.3747,
"margin_dpo/margin_mean": 0.11931854486465454,
"margin_dpo/margin_std": 0.3407592177391052,
"step": 39
},
{
"epoch": 0.06046863189720333,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.056918948888778687,
"fcm_dpo/q_t": 0.501421332359314,
"grad_norm": 18.159303665161133,
"learning_rate": 2.9104477611940296e-07,
"logits/chosen": 1.812699556350708,
"logits/rejected": 1.7002441883087158,
"logps/chosen": -77.47999572753906,
"logps/ref_chosen": -77.43476867675781,
"logps/ref_rejected": -98.58720397949219,
"logps/rejected": -98.57550811767578,
"loss": 1.3925,
"margin_dpo/margin_mean": -0.05691874027252197,
"margin_dpo/margin_std": 0.42336541414260864,
"step": 40
},
{
"epoch": 0.06198034769463341,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.08366265892982483,
"fcm_dpo/q_t": 0.4979090094566345,
"grad_norm": 18.28632354736328,
"learning_rate": 2.985074626865671e-07,
"logits/chosen": 1.813473105430603,
"logits/rejected": 1.750382423400879,
"logps/chosen": -86.79568481445312,
"logps/ref_chosen": -86.87640380859375,
"logps/ref_rejected": -101.0856704711914,
"logps/rejected": -101.08860778808594,
"loss": 1.3784,
"margin_dpo/margin_mean": 0.08366268873214722,
"margin_dpo/margin_std": 0.4136474132537842,
"step": 41
},
{
"epoch": 0.06349206349206349,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.01882028579711914,
"fcm_dpo/q_t": 0.49952933192253113,
"grad_norm": 18.222118377685547,
"learning_rate": 3.059701492537313e-07,
"logits/chosen": 1.686318278312683,
"logits/rejected": 1.6382153034210205,
"logps/chosen": -79.34196472167969,
"logps/ref_chosen": -79.35625457763672,
"logps/ref_rejected": -91.54881286621094,
"logps/rejected": -91.55332946777344,
"loss": 1.3848,
"margin_dpo/margin_mean": 0.018820196390151978,
"margin_dpo/margin_std": 0.4075120687484741,
"step": 42
},
{
"epoch": 0.06500377928949358,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.02868404984474182,
"fcm_dpo/q_t": 0.5007175207138062,
"grad_norm": 19.569217681884766,
"learning_rate": 3.134328358208955e-07,
"logits/chosen": 1.7295043468475342,
"logits/rejected": 1.6345380544662476,
"logps/chosen": -90.81892395019531,
"logps/ref_chosen": -90.81220245361328,
"logps/ref_rejected": -94.16316986083984,
"logps/rejected": -94.1412124633789,
"loss": 1.3898,
"margin_dpo/margin_mean": -0.02868404984474182,
"margin_dpo/margin_std": 0.4629897475242615,
"step": 43
},
{
"epoch": 0.06651549508692366,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.005178973078727722,
"fcm_dpo/q_t": 0.4998709261417389,
"grad_norm": 18.98065948486328,
"learning_rate": 3.2089552238805965e-07,
"logits/chosen": 1.3732523918151855,
"logits/rejected": 1.301151156425476,
"logps/chosen": -88.26017761230469,
"logps/ref_chosen": -88.27932739257812,
"logps/ref_rejected": -101.14324951171875,
"logps/rejected": -101.1292724609375,
"loss": 1.3863,
"margin_dpo/margin_mean": 0.005178704857826233,
"margin_dpo/margin_std": 0.4382961690425873,
"step": 44
},
{
"epoch": 0.06802721088435375,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.07508471608161926,
"fcm_dpo/q_t": 0.498124361038208,
"grad_norm": 19.43973731994629,
"learning_rate": 3.2835820895522385e-07,
"logits/chosen": 1.695054292678833,
"logits/rejected": 1.5746557712554932,
"logps/chosen": -78.40066528320312,
"logps/ref_chosen": -78.40264892578125,
"logps/ref_rejected": -109.39339447021484,
"logps/rejected": -109.46649169921875,
"loss": 1.3793,
"margin_dpo/margin_mean": 0.0750853419303894,
"margin_dpo/margin_std": 0.4212068021297455,
"step": 45
},
{
"epoch": 0.06953892668178382,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.10518896579742432,
"fcm_dpo/q_t": 0.4973721504211426,
"grad_norm": 18.316635131835938,
"learning_rate": 3.3582089552238805e-07,
"logits/chosen": 1.685295820236206,
"logits/rejected": 1.5578134059906006,
"logps/chosen": -77.9603271484375,
"logps/ref_chosen": -78.08491516113281,
"logps/ref_rejected": -97.42544555664062,
"logps/rejected": -97.40605163574219,
"loss": 1.3762,
"margin_dpo/margin_mean": 0.1051889955997467,
"margin_dpo/margin_std": 0.4159674048423767,
"step": 46
},
{
"epoch": 0.0710506424792139,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.023257285356521606,
"fcm_dpo/q_t": 0.49941885471343994,
"grad_norm": 19.098480224609375,
"learning_rate": 3.432835820895522e-07,
"logits/chosen": 1.5158767700195312,
"logits/rejected": 1.4411935806274414,
"logps/chosen": -70.76278686523438,
"logps/ref_chosen": -70.78988647460938,
"logps/ref_rejected": -91.17266845703125,
"logps/rejected": -91.1688232421875,
"loss": 1.3842,
"margin_dpo/margin_mean": 0.02325788140296936,
"margin_dpo/margin_std": 0.31453946232795715,
"step": 47
},
{
"epoch": 0.07256235827664399,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.015231698751449585,
"fcm_dpo/q_t": 0.4996192157268524,
"grad_norm": 17.00591278076172,
"learning_rate": 3.507462686567164e-07,
"logits/chosen": 1.9108043909072876,
"logits/rejected": 1.8479423522949219,
"logps/chosen": -66.61248779296875,
"logps/ref_chosen": -66.67327880859375,
"logps/ref_rejected": -79.28543853759766,
"logps/rejected": -79.23987579345703,
"loss": 1.3852,
"margin_dpo/margin_mean": 0.015231996774673462,
"margin_dpo/margin_std": 0.3935966491699219,
"step": 48
},
{
"epoch": 0.07407407407407407,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.001067519187927246,
"fcm_dpo/q_t": 0.5000255107879639,
"grad_norm": 17.59852409362793,
"learning_rate": 3.5820895522388055e-07,
"logits/chosen": 1.4201464653015137,
"logits/rejected": 1.3769769668579102,
"logps/chosen": -75.09858703613281,
"logps/ref_chosen": -75.17504119873047,
"logps/ref_rejected": -80.5369873046875,
"logps/rejected": -80.45946502685547,
"loss": 1.3868,
"margin_dpo/margin_mean": -0.0010673105716705322,
"margin_dpo/margin_std": 0.379297137260437,
"step": 49
},
{
"epoch": 0.07558578987150416,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.014360487461090088,
"fcm_dpo/q_t": 0.4996405839920044,
"grad_norm": 17.910799026489258,
"learning_rate": 3.6567164179104475e-07,
"logits/chosen": 1.8102669715881348,
"logits/rejected": 1.7434110641479492,
"logps/chosen": -71.19473266601562,
"logps/ref_chosen": -71.2314224243164,
"logps/ref_rejected": -87.59088134765625,
"logps/rejected": -87.56855773925781,
"loss": 1.3852,
"margin_dpo/margin_mean": 0.014360368251800537,
"margin_dpo/margin_std": 0.371703177690506,
"step": 50
},
{
"epoch": 0.07709750566893424,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.03938554227352142,
"fcm_dpo/q_t": 0.5009841322898865,
"grad_norm": 18.880245208740234,
"learning_rate": 3.7313432835820895e-07,
"logits/chosen": 1.744141936302185,
"logits/rejected": 1.693819284439087,
"logps/chosen": -78.70307922363281,
"logps/ref_chosen": -78.69171142578125,
"logps/ref_rejected": -100.78950500488281,
"logps/rejected": -100.76148986816406,
"loss": 1.3907,
"margin_dpo/margin_mean": -0.03938555717468262,
"margin_dpo/margin_std": 0.4431745409965515,
"step": 51
},
{
"epoch": 0.07860922146636433,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.06755271553993225,
"fcm_dpo/q_t": 0.4983121156692505,
"grad_norm": 20.320974349975586,
"learning_rate": 3.805970149253731e-07,
"logits/chosen": 1.745551347732544,
"logits/rejected": 1.5814502239227295,
"logps/chosen": -89.07058715820312,
"logps/ref_chosen": -89.09419250488281,
"logps/ref_rejected": -116.87469482421875,
"logps/rejected": -116.91864013671875,
"loss": 1.38,
"margin_dpo/margin_mean": 0.06755334138870239,
"margin_dpo/margin_std": 0.4279418885707855,
"step": 52
},
{
"epoch": 0.0801209372637944,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.08846598863601685,
"fcm_dpo/q_t": 0.4977889060974121,
"grad_norm": 17.33759307861328,
"learning_rate": 3.880597014925373e-07,
"logits/chosen": 1.6374804973602295,
"logits/rejected": 1.595580816268921,
"logps/chosen": -74.09617614746094,
"logps/ref_chosen": -74.21418762207031,
"logps/ref_rejected": -75.71168518066406,
"logps/rejected": -75.68213653564453,
"loss": 1.378,
"margin_dpo/margin_mean": 0.08846625685691833,
"margin_dpo/margin_std": 0.4409305453300476,
"step": 53
},
{
"epoch": 0.08163265306122448,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.028407424688339233,
"fcm_dpo/q_t": 0.4992886483669281,
"grad_norm": 16.426828384399414,
"learning_rate": 3.9552238805970144e-07,
"logits/chosen": 1.7313616275787354,
"logits/rejected": 1.7127957344055176,
"logps/chosen": -65.56224822998047,
"logps/ref_chosen": -65.63475799560547,
"logps/ref_rejected": -76.4462890625,
"logps/rejected": -76.40218353271484,
"loss": 1.384,
"margin_dpo/margin_mean": 0.028407543897628784,
"margin_dpo/margin_std": 0.46945488452911377,
"step": 54
},
{
"epoch": 0.08314436885865457,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.2457306683063507,
"fcm_dpo/q_t": 0.4938609004020691,
"grad_norm": 19.258989334106445,
"learning_rate": 4.0298507462686564e-07,
"logits/chosen": 1.6469519138336182,
"logits/rejected": 1.4373173713684082,
"logps/chosen": -68.6036376953125,
"logps/ref_chosen": -68.7640380859375,
"logps/ref_rejected": -108.80074310302734,
"logps/rejected": -108.88607788085938,
"loss": 1.3623,
"margin_dpo/margin_mean": 0.24573048949241638,
"margin_dpo/margin_std": 0.4226919412612915,
"step": 55
},
{
"epoch": 0.08465608465608465,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.05905681848526001,
"fcm_dpo/q_t": 0.49852341413497925,
"grad_norm": 16.984230041503906,
"learning_rate": 4.1044776119402984e-07,
"logits/chosen": 1.773057222366333,
"logits/rejected": 1.746607780456543,
"logps/chosen": -74.76235961914062,
"logps/ref_chosen": -74.7939453125,
"logps/ref_rejected": -81.83535766601562,
"logps/rejected": -81.86283111572266,
"loss": 1.3811,
"margin_dpo/margin_mean": 0.05905655026435852,
"margin_dpo/margin_std": 0.5325890779495239,
"step": 56
},
{
"epoch": 0.08616780045351474,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.17420035600662231,
"fcm_dpo/q_t": 0.4956481456756592,
"grad_norm": 18.959041595458984,
"learning_rate": 4.17910447761194e-07,
"logits/chosen": 1.9884854555130005,
"logits/rejected": 1.7981288433074951,
"logps/chosen": -74.45184326171875,
"logps/ref_chosen": -74.5794677734375,
"logps/ref_rejected": -105.61981964111328,
"logps/rejected": -105.6663818359375,
"loss": 1.3695,
"margin_dpo/margin_mean": 0.174201101064682,
"margin_dpo/margin_std": 0.47768181562423706,
"step": 57
},
{
"epoch": 0.08767951625094482,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.017644047737121582,
"fcm_dpo/q_t": 0.4995603561401367,
"grad_norm": 19.281293869018555,
"learning_rate": 4.253731343283582e-07,
"logits/chosen": 1.4138903617858887,
"logits/rejected": 1.3425607681274414,
"logps/chosen": -92.22441101074219,
"logps/ref_chosen": -92.24464416503906,
"logps/ref_rejected": -103.18975830078125,
"logps/rejected": -103.18716430664062,
"loss": 1.3852,
"margin_dpo/margin_mean": 0.017644047737121582,
"margin_dpo/margin_std": 0.48450881242752075,
"step": 58
},
{
"epoch": 0.08919123204837491,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.19744977355003357,
"fcm_dpo/q_t": 0.4950745701789856,
"grad_norm": 20.637189865112305,
"learning_rate": 4.3283582089552234e-07,
"logits/chosen": 1.76304292678833,
"logits/rejected": 1.509261131286621,
"logps/chosen": -66.98043060302734,
"logps/ref_chosen": -67.12688446044922,
"logps/ref_rejected": -91.69569396972656,
"logps/rejected": -91.7467041015625,
"loss": 1.3675,
"margin_dpo/margin_mean": 0.19744998216629028,
"margin_dpo/margin_std": 0.5356566905975342,
"step": 59
},
{
"epoch": 0.09070294784580499,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.13416770100593567,
"fcm_dpo/q_t": 0.49665212631225586,
"grad_norm": 18.396495819091797,
"learning_rate": 4.4029850746268654e-07,
"logits/chosen": 1.7832741737365723,
"logits/rejected": 1.8049073219299316,
"logps/chosen": -79.5711669921875,
"logps/ref_chosen": -79.74327087402344,
"logps/ref_rejected": -77.89244079589844,
"logps/rejected": -77.85449981689453,
"loss": 1.3736,
"margin_dpo/margin_mean": 0.1341674029827118,
"margin_dpo/margin_std": 0.5320160984992981,
"step": 60
},
{
"epoch": 0.09221466364323508,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.034498006105422974,
"fcm_dpo/q_t": 0.4991379380226135,
"grad_norm": 16.57811164855957,
"learning_rate": 4.4776119402985074e-07,
"logits/chosen": 1.688488245010376,
"logits/rejected": 1.6512866020202637,
"logps/chosen": -65.99887084960938,
"logps/ref_chosen": -66.08685302734375,
"logps/ref_rejected": -88.1458740234375,
"logps/rejected": -88.0923843383789,
"loss": 1.3834,
"margin_dpo/margin_mean": 0.0344984233379364,
"margin_dpo/margin_std": 0.48245492577552795,
"step": 61
},
{
"epoch": 0.09372637944066516,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.16568706929683685,
"fcm_dpo/q_t": 0.49585962295532227,
"grad_norm": 17.876060485839844,
"learning_rate": 4.552238805970149e-07,
"logits/chosen": 1.7909362316131592,
"logits/rejected": 1.7238061428070068,
"logps/chosen": -80.88427734375,
"logps/ref_chosen": -81.0108871459961,
"logps/ref_rejected": -95.50444793701172,
"logps/rejected": -95.54353332519531,
"loss": 1.3705,
"margin_dpo/margin_mean": 0.16568706929683685,
"margin_dpo/margin_std": 0.5354666113853455,
"step": 62
},
{
"epoch": 0.09523809523809523,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.19854192435741425,
"fcm_dpo/q_t": 0.49504005908966064,
"grad_norm": 19.247526168823242,
"learning_rate": 4.626865671641791e-07,
"logits/chosen": 2.170567750930786,
"logits/rejected": 2.088958263397217,
"logps/chosen": -78.36114501953125,
"logps/ref_chosen": -78.57593536376953,
"logps/ref_rejected": -99.71000671386719,
"logps/rejected": -99.69376373291016,
"loss": 1.3672,
"margin_dpo/margin_mean": 0.19854141771793365,
"margin_dpo/margin_std": 0.5002174377441406,
"step": 63
},
{
"epoch": 0.09674981103552532,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.017069846391677856,
"fcm_dpo/q_t": 0.4995724558830261,
"grad_norm": 16.591533660888672,
"learning_rate": 4.701492537313433e-07,
"logits/chosen": 1.7883801460266113,
"logits/rejected": 1.7215988636016846,
"logps/chosen": -69.16060638427734,
"logps/ref_chosen": -69.24063110351562,
"logps/ref_rejected": -84.14842987060547,
"logps/rejected": -84.0854721069336,
"loss": 1.3852,
"margin_dpo/margin_mean": 0.017070025205612183,
"margin_dpo/margin_std": 0.49829041957855225,
"step": 64
},
{
"epoch": 0.0982615268329554,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.11321347951889038,
"fcm_dpo/q_t": 0.49717018008232117,
"grad_norm": 18.86490821838379,
"learning_rate": 4.776119402985074e-07,
"logits/chosen": 1.8629745244979858,
"logits/rejected": 1.8083195686340332,
"logps/chosen": -83.99441528320312,
"logps/ref_chosen": -84.0351333618164,
"logps/ref_rejected": -96.42926788330078,
"logps/rejected": -96.50176239013672,
"loss": 1.3759,
"margin_dpo/margin_mean": 0.11321339011192322,
"margin_dpo/margin_std": 0.592049241065979,
"step": 65
},
{
"epoch": 0.09977324263038549,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.1881301999092102,
"fcm_dpo/q_t": 0.49529772996902466,
"grad_norm": 18.52004623413086,
"learning_rate": 4.850746268656717e-07,
"logits/chosen": 1.5754730701446533,
"logits/rejected": 1.4911394119262695,
"logps/chosen": -87.75141143798828,
"logps/ref_chosen": -87.79238891601562,
"logps/ref_rejected": -95.26547241210938,
"logps/rejected": -95.41261291503906,
"loss": 1.3687,
"margin_dpo/margin_mean": 0.18812981247901917,
"margin_dpo/margin_std": 0.6606887578964233,
"step": 66
},
{
"epoch": 0.10128495842781557,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.22150889039039612,
"fcm_dpo/q_t": 0.49446702003479004,
"grad_norm": 18.993770599365234,
"learning_rate": 4.925373134328357e-07,
"logits/chosen": 1.943489670753479,
"logits/rejected": 1.8127994537353516,
"logps/chosen": -77.81979370117188,
"logps/ref_chosen": -78.00114440917969,
"logps/ref_rejected": -96.03421020507812,
"logps/rejected": -96.07437133789062,
"loss": 1.3648,
"margin_dpo/margin_mean": 0.22150954604148865,
"margin_dpo/margin_std": 0.47556179761886597,
"step": 67
},
{
"epoch": 0.10279667422524566,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.13636493682861328,
"fcm_dpo/q_t": 0.4965946674346924,
"grad_norm": 19.818443298339844,
"learning_rate": 5e-07,
"logits/chosen": 1.6929965019226074,
"logits/rejected": 1.6059188842773438,
"logps/chosen": -96.03993225097656,
"logps/ref_chosen": -96.04267883300781,
"logps/ref_rejected": -110.91169738769531,
"logps/rejected": -111.04530334472656,
"loss": 1.3738,
"margin_dpo/margin_mean": 0.1363646388053894,
"margin_dpo/margin_std": 0.6684163212776184,
"step": 68
},
{
"epoch": 0.10430839002267574,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.22328773140907288,
"fcm_dpo/q_t": 0.49442198872566223,
"grad_norm": 19.92877960205078,
"learning_rate": 4.999965034812934e-07,
"logits/chosen": 1.787092924118042,
"logits/rejected": 1.6620742082595825,
"logps/chosen": -84.8952865600586,
"logps/ref_chosen": -85.11124420166016,
"logps/ref_rejected": -107.57357025146484,
"logps/rejected": -107.58089447021484,
"loss": 1.3648,
"margin_dpo/margin_mean": 0.22328829765319824,
"margin_dpo/margin_std": 0.5401943325996399,
"step": 69
},
{
"epoch": 0.10582010582010581,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.209281325340271,
"fcm_dpo/q_t": 0.49477431178092957,
"grad_norm": 18.5528621673584,
"learning_rate": 4.999860140229787e-07,
"logits/chosen": 1.7440345287322998,
"logits/rejected": 1.6920671463012695,
"logps/chosen": -81.64407348632812,
"logps/ref_chosen": -81.87960815429688,
"logps/ref_rejected": -92.63243103027344,
"logps/rejected": -92.60617065429688,
"loss": 1.3664,
"margin_dpo/margin_mean": 0.20928049087524414,
"margin_dpo/margin_std": 0.603878378868103,
"step": 70
},
{
"epoch": 0.1073318216175359,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.10643842816352844,
"fcm_dpo/q_t": 0.4973390996456146,
"grad_norm": 17.682138442993164,
"learning_rate": 4.999685319184688e-07,
"logits/chosen": 1.5762542486190796,
"logits/rejected": 1.5709012746810913,
"logps/chosen": -79.61023712158203,
"logps/ref_chosen": -79.74766540527344,
"logps/ref_rejected": -83.39110565185547,
"logps/rejected": -83.360107421875,
"loss": 1.3767,
"margin_dpo/margin_mean": 0.10643890500068665,
"margin_dpo/margin_std": 0.6256778836250305,
"step": 71
},
{
"epoch": 0.10884353741496598,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.32459521293640137,
"fcm_dpo/q_t": 0.4918937087059021,
"grad_norm": 19.00299835205078,
"learning_rate": 4.999440576567755e-07,
"logits/chosen": 1.7387597560882568,
"logits/rejected": 1.553479790687561,
"logps/chosen": -72.77051544189453,
"logps/ref_chosen": -73.04458618164062,
"logps/ref_rejected": -92.64720153808594,
"logps/rejected": -92.69772338867188,
"loss": 1.355,
"margin_dpo/margin_mean": 0.32459497451782227,
"margin_dpo/margin_std": 0.5936090350151062,
"step": 72
},
{
"epoch": 0.11035525321239607,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.05147072672843933,
"fcm_dpo/q_t": 0.4987148642539978,
"grad_norm": 19.377613067626953,
"learning_rate": 4.999125919224965e-07,
"logits/chosen": 1.6101853847503662,
"logits/rejected": 1.5460271835327148,
"logps/chosen": -87.6396255493164,
"logps/ref_chosen": -87.71681213378906,
"logps/ref_rejected": -96.93572998046875,
"logps/rejected": -96.9100112915039,
"loss": 1.3826,
"margin_dpo/margin_mean": 0.05147099494934082,
"margin_dpo/margin_std": 0.7500083446502686,
"step": 73
},
{
"epoch": 0.11186696900982615,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.28650206327438354,
"fcm_dpo/q_t": 0.4928475618362427,
"grad_norm": 18.037437438964844,
"learning_rate": 4.998741355957963e-07,
"logits/chosen": 1.8539071083068848,
"logits/rejected": 1.6711949110031128,
"logps/chosen": -66.66632843017578,
"logps/ref_chosen": -67.07321166992188,
"logps/ref_rejected": -96.5340347290039,
"logps/rejected": -96.41365051269531,
"loss": 1.3591,
"margin_dpo/margin_mean": 0.2865017056465149,
"margin_dpo/margin_std": 0.6675806045532227,
"step": 74
},
{
"epoch": 0.11337868480725624,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.30480456352233887,
"fcm_dpo/q_t": 0.4923963248729706,
"grad_norm": 16.95103645324707,
"learning_rate": 4.998286897523808e-07,
"logits/chosen": 1.6327571868896484,
"logits/rejected": 1.4845844507217407,
"logps/chosen": -61.54745864868164,
"logps/ref_chosen": -61.80186462402344,
"logps/ref_rejected": -82.37368774414062,
"logps/rejected": -82.42408752441406,
"loss": 1.3576,
"margin_dpo/margin_mean": 0.3048042356967926,
"margin_dpo/margin_std": 0.7787231802940369,
"step": 75
},
{
"epoch": 0.11489040060468632,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.2743793725967407,
"fcm_dpo/q_t": 0.4931487441062927,
"grad_norm": 17.764631271362305,
"learning_rate": 4.997762556634679e-07,
"logits/chosen": 1.6228429079055786,
"logits/rejected": 1.4914746284484863,
"logps/chosen": -69.57562255859375,
"logps/ref_chosen": -69.92233276367188,
"logps/ref_rejected": -97.08378601074219,
"logps/rejected": -97.01145935058594,
"loss": 1.3604,
"margin_dpo/margin_mean": 0.274379700422287,
"margin_dpo/margin_std": 0.7334781885147095,
"step": 76
},
{
"epoch": 0.1164021164021164,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.37875401973724365,
"fcm_dpo/q_t": 0.4905601739883423,
"grad_norm": 18.358583450317383,
"learning_rate": 4.99716834795752e-07,
"logits/chosen": 1.6846084594726562,
"logits/rejected": 1.5783634185791016,
"logps/chosen": -70.83340454101562,
"logps/ref_chosen": -71.206298828125,
"logps/ref_rejected": -95.22071075439453,
"logps/rejected": -95.22657775878906,
"loss": 1.3507,
"margin_dpo/margin_mean": 0.3787541389465332,
"margin_dpo/margin_std": 0.8486927151679993,
"step": 77
},
{
"epoch": 0.11791383219954649,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.30582955479621887,
"fcm_dpo/q_t": 0.4923703074455261,
"grad_norm": 17.85964584350586,
"learning_rate": 4.996504288113623e-07,
"logits/chosen": 1.8145931959152222,
"logits/rejected": 1.79677414894104,
"logps/chosen": -83.99678802490234,
"logps/ref_chosen": -84.40055847167969,
"logps/ref_rejected": -95.41949462890625,
"logps/rejected": -95.3215560913086,
"loss": 1.3576,
"margin_dpo/margin_mean": 0.3058291971683502,
"margin_dpo/margin_std": 0.7991141080856323,
"step": 78
},
{
"epoch": 0.11942554799697656,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.35301172733306885,
"fcm_dpo/q_t": 0.4912147521972656,
"grad_norm": 19.52484130859375,
"learning_rate": 4.995770395678171e-07,
"logits/chosen": 1.894843578338623,
"logits/rejected": 1.6881787776947021,
"logps/chosen": -65.58863830566406,
"logps/ref_chosen": -65.93923950195312,
"logps/ref_rejected": -102.92240905761719,
"logps/rejected": -102.9248046875,
"loss": 1.3539,
"margin_dpo/margin_mean": 0.35301104187965393,
"margin_dpo/margin_std": 1.007969856262207,
"step": 79
},
{
"epoch": 0.12093726379440665,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.2960975170135498,
"fcm_dpo/q_t": 0.49261194467544556,
"grad_norm": 17.473323822021484,
"learning_rate": 4.994966691179711e-07,
"logits/chosen": 1.82114839553833,
"logits/rejected": 1.629103660583496,
"logps/chosen": -78.35586547851562,
"logps/ref_chosen": -78.61624908447266,
"logps/ref_rejected": -99.9122314453125,
"logps/rejected": -99.94795227050781,
"loss": 1.3586,
"margin_dpo/margin_mean": 0.29609763622283936,
"margin_dpo/margin_std": 0.8148288726806641,
"step": 80
},
{
"epoch": 0.12244897959183673,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.347815603017807,
"fcm_dpo/q_t": 0.4913283884525299,
"grad_norm": 17.89147186279297,
"learning_rate": 4.994093197099587e-07,
"logits/chosen": 1.6513278484344482,
"logits/rejected": 1.5496854782104492,
"logps/chosen": -79.164306640625,
"logps/ref_chosen": -79.49641418457031,
"logps/ref_rejected": -94.52413940429688,
"logps/rejected": -94.53985595703125,
"loss": 1.3538,
"margin_dpo/margin_mean": 0.3478164076805115,
"margin_dpo/margin_std": 0.8724742531776428,
"step": 81
},
{
"epoch": 0.12396069538926682,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.6286215782165527,
"fcm_dpo/q_t": 0.4843388497829437,
"grad_norm": 17.7833309173584,
"learning_rate": 4.993149937871306e-07,
"logits/chosen": 1.5996932983398438,
"logits/rejected": 1.4459776878356934,
"logps/chosen": -64.317138671875,
"logps/ref_chosen": -64.97168731689453,
"logps/ref_rejected": -86.69085693359375,
"logps/rejected": -86.66493225097656,
"loss": 1.3266,
"margin_dpo/margin_mean": 0.6286218166351318,
"margin_dpo/margin_std": 0.9195500016212463,
"step": 82
},
{
"epoch": 0.1254724111866969,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.4474705457687378,
"fcm_dpo/q_t": 0.48884087800979614,
"grad_norm": 21.962217330932617,
"learning_rate": 4.992136939879856e-07,
"logits/chosen": 1.8637166023254395,
"logits/rejected": 1.7185570001602173,
"logps/chosen": -72.42298889160156,
"logps/ref_chosen": -72.92498779296875,
"logps/ref_rejected": -92.27165222167969,
"logps/rejected": -92.21711730957031,
"loss": 1.3442,
"margin_dpo/margin_mean": 0.44747063517570496,
"margin_dpo/margin_std": 0.9168833494186401,
"step": 83
},
{
"epoch": 0.12698412698412698,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.5169297456741333,
"fcm_dpo/q_t": 0.4871019721031189,
"grad_norm": 19.37822723388672,
"learning_rate": 4.991054231460969e-07,
"logits/chosen": 1.851825475692749,
"logits/rejected": 1.6797561645507812,
"logps/chosen": -81.32493591308594,
"logps/ref_chosen": -81.79109191894531,
"logps/ref_rejected": -99.20896911621094,
"logps/rejected": -99.2597427368164,
"loss": 1.3376,
"margin_dpo/margin_mean": 0.5169292688369751,
"margin_dpo/margin_std": 0.9634412527084351,
"step": 84
},
{
"epoch": 0.12849584278155707,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.49980345368385315,
"fcm_dpo/q_t": 0.48752886056900024,
"grad_norm": 17.480857849121094,
"learning_rate": 4.989901842900325e-07,
"logits/chosen": 1.607172966003418,
"logits/rejected": 1.4860568046569824,
"logps/chosen": -67.3022689819336,
"logps/ref_chosen": -67.94147491455078,
"logps/ref_rejected": -85.76875305175781,
"logps/rejected": -85.62934112548828,
"loss": 1.3389,
"margin_dpo/margin_mean": 0.4998033940792084,
"margin_dpo/margin_std": 0.8719754219055176,
"step": 85
},
{
"epoch": 0.13000755857898716,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.29450273513793945,
"fcm_dpo/q_t": 0.49265211820602417,
"grad_norm": 17.54376792907715,
"learning_rate": 4.988679806432711e-07,
"logits/chosen": 1.8459415435791016,
"logits/rejected": 1.7846070528030396,
"logps/chosen": -78.91737365722656,
"logps/ref_chosen": -79.21485900878906,
"logps/ref_rejected": -88.69877624511719,
"logps/rejected": -88.69578552246094,
"loss": 1.3588,
"margin_dpo/margin_mean": 0.2945028245449066,
"margin_dpo/margin_std": 0.837791919708252,
"step": 86
},
{
"epoch": 0.13151927437641722,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.7170735001564026,
"fcm_dpo/q_t": 0.4821361303329468,
"grad_norm": 18.833833694458008,
"learning_rate": 4.987388156241114e-07,
"logits/chosen": 1.451952338218689,
"logits/rejected": 1.2411874532699585,
"logps/chosen": -83.91081237792969,
"logps/ref_chosen": -84.45362854003906,
"logps/ref_rejected": -103.43824005126953,
"logps/rejected": -103.61250305175781,
"loss": 1.319,
"margin_dpo/margin_mean": 0.7170728445053101,
"margin_dpo/margin_std": 1.1152534484863281,
"step": 87
},
{
"epoch": 0.1330309901738473,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.33253175020217896,
"fcm_dpo/q_t": 0.49171459674835205,
"grad_norm": 18.181365966796875,
"learning_rate": 4.986026928455767e-07,
"logits/chosen": 2.0365257263183594,
"logits/rejected": 2.005300283432007,
"logps/chosen": -80.89089965820312,
"logps/ref_chosen": -81.27230834960938,
"logps/ref_rejected": -89.51646423339844,
"logps/rejected": -89.46759033203125,
"loss": 1.3566,
"margin_dpo/margin_mean": 0.33253180980682373,
"margin_dpo/margin_std": 1.0969430208206177,
"step": 88
},
{
"epoch": 0.1345427059712774,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.8061107397079468,
"fcm_dpo/q_t": 0.4799831807613373,
"grad_norm": 18.168310165405273,
"learning_rate": 4.984596161153135e-07,
"logits/chosen": 2.1221489906311035,
"logits/rejected": 1.827742099761963,
"logps/chosen": -57.4072265625,
"logps/ref_chosen": -58.142333984375,
"logps/ref_rejected": -102.53756713867188,
"logps/rejected": -102.60858154296875,
"loss": 1.3116,
"margin_dpo/margin_mean": 0.8061116933822632,
"margin_dpo/margin_std": 1.2829334735870361,
"step": 89
},
{
"epoch": 0.1360544217687075,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.6056348085403442,
"fcm_dpo/q_t": 0.48491472005844116,
"grad_norm": 19.63652992248535,
"learning_rate": 4.983095894354857e-07,
"logits/chosen": 1.6260521411895752,
"logits/rejected": 1.4100569486618042,
"logps/chosen": -74.74089050292969,
"logps/ref_chosen": -75.26505279541016,
"logps/ref_rejected": -104.32842254638672,
"logps/rejected": -104.40989685058594,
"loss": 1.3299,
"margin_dpo/margin_mean": 0.6056344509124756,
"margin_dpo/margin_std": 1.134067177772522,
"step": 90
},
{
"epoch": 0.13756613756613756,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.5181472301483154,
"fcm_dpo/q_t": 0.4871419072151184,
"grad_norm": 17.70273208618164,
"learning_rate": 4.98152617002662e-07,
"logits/chosen": 1.8195009231567383,
"logits/rejected": 1.654341220855713,
"logps/chosen": -68.80375671386719,
"logps/ref_chosen": -69.33901977539062,
"logps/ref_rejected": -90.31411743164062,
"logps/rejected": -90.29698944091797,
"loss": 1.34,
"margin_dpo/margin_mean": 0.5181469917297363,
"margin_dpo/margin_std": 1.3797237873077393,
"step": 91
},
{
"epoch": 0.13907785336356765,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.6811751127243042,
"fcm_dpo/q_t": 0.48308151960372925,
"grad_norm": 18.557336807250977,
"learning_rate": 4.979887032076988e-07,
"logits/chosen": 1.660226821899414,
"logits/rejected": 1.5099092721939087,
"logps/chosen": -71.76761627197266,
"logps/ref_chosen": -72.4566650390625,
"logps/ref_rejected": -91.6706771850586,
"logps/rejected": -91.66280364990234,
"loss": 1.3234,
"margin_dpo/margin_mean": 0.6811752319335938,
"margin_dpo/margin_std": 1.2715716361999512,
"step": 92
},
{
"epoch": 0.14058956916099774,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.465279757976532,
"fcm_dpo/q_t": 0.48848843574523926,
"grad_norm": 16.0939998626709,
"learning_rate": 4.978178526356172e-07,
"logits/chosen": 1.8248298168182373,
"logits/rejected": 1.7261273860931396,
"logps/chosen": -63.3609619140625,
"logps/ref_chosen": -64.08897399902344,
"logps/ref_rejected": -75.09095764160156,
"logps/rejected": -74.82823181152344,
"loss": 1.346,
"margin_dpo/margin_mean": 0.4652804434299469,
"margin_dpo/margin_std": 1.505643606185913,
"step": 93
},
{
"epoch": 0.1421012849584278,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.8975893259048462,
"fcm_dpo/q_t": 0.4777688980102539,
"grad_norm": 31.570436477661133,
"learning_rate": 4.976400700654751e-07,
"logits/chosen": 2.060239315032959,
"logits/rejected": 1.872870922088623,
"logps/chosen": -78.88949584960938,
"logps/ref_chosen": -79.67372131347656,
"logps/ref_rejected": -94.64076232910156,
"logps/rejected": -94.75411987304688,
"loss": 1.3053,
"margin_dpo/margin_mean": 0.8975897431373596,
"margin_dpo/margin_std": 1.6404364109039307,
"step": 94
},
{
"epoch": 0.1436130007558579,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.6297564506530762,
"fcm_dpo/q_t": 0.4843508005142212,
"grad_norm": 18.723411560058594,
"learning_rate": 4.974553604702332e-07,
"logits/chosen": 1.5617575645446777,
"logits/rejected": 1.383953332901001,
"logps/chosen": -78.22109985351562,
"logps/ref_chosen": -78.65760803222656,
"logps/ref_rejected": -109.4048080444336,
"logps/rejected": -109.5980453491211,
"loss": 1.3296,
"margin_dpo/margin_mean": 0.6297565698623657,
"margin_dpo/margin_std": 1.4608389139175415,
"step": 95
},
{
"epoch": 0.14512471655328799,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.7316545248031616,
"fcm_dpo/q_t": 0.48186880350112915,
"grad_norm": 19.00063705444336,
"learning_rate": 4.972637290166157e-07,
"logits/chosen": 1.5328270196914673,
"logits/rejected": 1.4091155529022217,
"logps/chosen": -77.26933288574219,
"logps/ref_chosen": -77.708251953125,
"logps/ref_rejected": -104.36044311523438,
"logps/rejected": -104.65316772460938,
"loss": 1.3209,
"margin_dpo/margin_mean": 0.7316542863845825,
"margin_dpo/margin_std": 1.5796196460723877,
"step": 96
},
{
"epoch": 0.14663643235071808,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.2733203172683716,
"fcm_dpo/q_t": 0.4931853413581848,
"grad_norm": 19.248638153076172,
"learning_rate": 4.970651810649666e-07,
"logits/chosen": 1.3479671478271484,
"logits/rejected": 1.257331371307373,
"logps/chosen": -84.284912109375,
"logps/ref_chosen": -84.58917999267578,
"logps/ref_rejected": -99.25704956054688,
"logps/rejected": -99.2260971069336,
"loss": 1.3655,
"margin_dpo/margin_mean": 0.2733200490474701,
"margin_dpo/margin_std": 1.5922892093658447,
"step": 97
},
{
"epoch": 0.14814814814814814,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.38651925325393677,
"fcm_dpo/q_t": 0.4903528094291687,
"grad_norm": 17.57636260986328,
"learning_rate": 4.968597221690985e-07,
"logits/chosen": 1.6413347721099854,
"logits/rejected": 1.5928442478179932,
"logps/chosen": -74.04196166992188,
"logps/ref_chosen": -74.42477416992188,
"logps/ref_rejected": -88.93840026855469,
"logps/rejected": -88.94210815429688,
"loss": 1.3525,
"margin_dpo/margin_mean": 0.3865186870098114,
"margin_dpo/margin_std": 1.329129934310913,
"step": 98
},
{
"epoch": 0.14965986394557823,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.757216215133667,
"fcm_dpo/q_t": 0.48140111565589905,
"grad_norm": 18.15253448486328,
"learning_rate": 4.966473580761389e-07,
"logits/chosen": 1.6560975313186646,
"logits/rejected": 1.5651164054870605,
"logps/chosen": -74.99360656738281,
"logps/ref_chosen": -75.59742736816406,
"logps/ref_rejected": -98.2310791015625,
"logps/rejected": -98.38446807861328,
"loss": 1.3225,
"margin_dpo/margin_mean": 0.7572157979011536,
"margin_dpo/margin_std": 2.042893886566162,
"step": 99
},
{
"epoch": 0.15117157974300832,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.7813898324966431,
"fcm_dpo/q_t": 0.48093181848526,
"grad_norm": 19.486074447631836,
"learning_rate": 4.964280947263676e-07,
"logits/chosen": 1.808748483657837,
"logits/rejected": 1.7841796875,
"logps/chosen": -98.01991271972656,
"logps/ref_chosen": -98.55859375,
"logps/ref_rejected": -106.01295471191406,
"logps/rejected": -106.25565338134766,
"loss": 1.3231,
"margin_dpo/margin_mean": 0.7813898324966431,
"margin_dpo/margin_std": 2.200671672821045,
"step": 100
},
{
"epoch": 0.15117157974300832,
"eval_fcm_dpo/beta": 0.10000000894069672,
"eval_logits/chosen": 1.6085329055786133,
"eval_logits/rejected": 1.4983155727386475,
"eval_logps/chosen": -86.20779418945312,
"eval_logps/ref_chosen": -86.90177917480469,
"eval_logps/ref_rejected": -96.69639587402344,
"eval_logps/rejected": -96.88575744628906,
"eval_loss": 0.6549195051193237,
"eval_margin_dpo/margin_mean": 0.8833596706390381,
"eval_margin_dpo/margin_std": 1.9511994123458862,
"eval_runtime": 42.2871,
"eval_samples_per_second": 54.461,
"eval_steps_per_second": 1.703,
"step": 100
},
{
"epoch": 0.15268329554043839,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.9666967391967773,
"fcm_dpo/q_t": 0.4759965240955353,
"grad_norm": 16.30520248413086,
"learning_rate": 4.96201938253052e-07,
"logits/chosen": 1.3939599990844727,
"logits/rejected": 1.3424584865570068,
"logps/chosen": -68.65782165527344,
"logps/ref_chosen": -69.45216369628906,
"logps/ref_rejected": -88.0458755493164,
"logps/rejected": -88.21821594238281,
"loss": 1.2998,
"margin_dpo/margin_mean": 0.966697096824646,
"margin_dpo/margin_std": 1.7669236660003662,
"step": 101
},
{
"epoch": 0.15419501133786848,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.6540460586547852,
"fcm_dpo/q_t": 0.48388558626174927,
"grad_norm": 17.389196395874023,
"learning_rate": 4.959688949822748e-07,
"logits/chosen": 1.5881282091140747,
"logits/rejected": 1.511448860168457,
"logps/chosen": -79.79618835449219,
"logps/ref_chosen": -80.35308837890625,
"logps/ref_rejected": -90.61380004882812,
"logps/rejected": -90.71095275878906,
"loss": 1.3314,
"margin_dpo/margin_mean": 0.6540460586547852,
"margin_dpo/margin_std": 1.9390764236450195,
"step": 102
},
{
"epoch": 0.15570672713529857,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.2188489437103271,
"fcm_dpo/q_t": 0.4699843227863312,
"grad_norm": 17.374025344848633,
"learning_rate": 4.957289714327572e-07,
"logits/chosen": 1.997759461402893,
"logits/rejected": 1.9253690242767334,
"logps/chosen": -78.43865966796875,
"logps/ref_chosen": -79.30392456054688,
"logps/ref_rejected": -93.745361328125,
"logps/rejected": -94.09895324707031,
"loss": 1.2791,
"margin_dpo/margin_mean": 1.2188482284545898,
"margin_dpo/margin_std": 2.111494302749634,
"step": 103
},
{
"epoch": 0.15721844293272866,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.1677416563034058,
"fcm_dpo/q_t": 0.47133898735046387,
"grad_norm": 18.580514907836914,
"learning_rate": 4.954821743156767e-07,
"logits/chosen": 1.8479002714157104,
"logits/rejected": 1.6079638004302979,
"logps/chosen": -73.59405517578125,
"logps/ref_chosen": -74.50674438476562,
"logps/ref_rejected": -116.09912872314453,
"logps/rejected": -116.35417938232422,
"loss": 1.2871,
"margin_dpo/margin_mean": 1.1677416563034058,
"margin_dpo/margin_std": 2.363769769668579,
"step": 104
},
{
"epoch": 0.15873015873015872,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.9023860692977905,
"fcm_dpo/q_t": 0.4778934121131897,
"grad_norm": 18.901168823242188,
"learning_rate": 4.952285105344791e-07,
"logits/chosen": 1.5916838645935059,
"logits/rejected": 1.4354064464569092,
"logps/chosen": -87.22508239746094,
"logps/ref_chosen": -87.76654815673828,
"logps/ref_rejected": -108.07927703857422,
"logps/rejected": -108.440185546875,
"loss": 1.3158,
"margin_dpo/margin_mean": 0.902385950088501,
"margin_dpo/margin_std": 2.6685338020324707,
"step": 105
},
{
"epoch": 0.1602418745275888,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.8738718628883362,
"fcm_dpo/q_t": 0.4783915877342224,
"grad_norm": 17.121471405029297,
"learning_rate": 4.949679871846857e-07,
"logits/chosen": 1.8275485038757324,
"logits/rejected": 1.7684516906738281,
"logps/chosen": -75.5244369506836,
"logps/ref_chosen": -76.38548278808594,
"logps/ref_rejected": -81.63407897949219,
"logps/rejected": -81.64691162109375,
"loss": 1.3155,
"margin_dpo/margin_mean": 0.8738718032836914,
"margin_dpo/margin_std": 2.4262328147888184,
"step": 106
},
{
"epoch": 0.1617535903250189,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.5380064249038696,
"fcm_dpo/q_t": 0.4865608215332031,
"grad_norm": 19.295825958251953,
"learning_rate": 4.947006115536947e-07,
"logits/chosen": 1.439363956451416,
"logits/rejected": 1.3768948316574097,
"logps/chosen": -95.8614273071289,
"logps/ref_chosen": -96.14849853515625,
"logps/ref_rejected": -107.0481185913086,
"logps/rejected": -107.29904174804688,
"loss": 1.3456,
"margin_dpo/margin_mean": 0.5380067825317383,
"margin_dpo/margin_std": 2.2211201190948486,
"step": 107
},
{
"epoch": 0.16326530612244897,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.9229694604873657,
"fcm_dpo/q_t": 0.4772723913192749,
"grad_norm": 17.066343307495117,
"learning_rate": 4.944263911205772e-07,
"logits/chosen": 1.3664920330047607,
"logits/rejected": 1.2473000288009644,
"logps/chosen": -84.6237564086914,
"logps/ref_chosen": -85.39241027832031,
"logps/ref_rejected": -97.79592895507812,
"logps/rejected": -97.95022583007812,
"loss": 1.3095,
"margin_dpo/margin_mean": 0.9229696989059448,
"margin_dpo/margin_std": 2.316845417022705,
"step": 108
},
{
"epoch": 0.16477702191987906,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.389096975326538,
"fcm_dpo/q_t": 0.46625447273254395,
"grad_norm": 17.88287925720215,
"learning_rate": 4.941453335558681e-07,
"logits/chosen": 1.3533546924591064,
"logits/rejected": 1.1422548294067383,
"logps/chosen": -78.01161193847656,
"logps/ref_chosen": -78.99874877929688,
"logps/ref_rejected": -100.79278564453125,
"logps/rejected": -101.19475555419922,
"loss": 1.2711,
"margin_dpo/margin_mean": 1.389096975326538,
"margin_dpo/margin_std": 2.766042709350586,
"step": 109
},
{
"epoch": 0.16628873771730915,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.20230808854103088,
"fcm_dpo/q_t": 0.4948790669441223,
"grad_norm": 20.587228775024414,
"learning_rate": 4.938574467213517e-07,
"logits/chosen": 1.3525140285491943,
"logits/rejected": 1.4121595621109009,
"logps/chosen": -96.599853515625,
"logps/ref_chosen": -96.95277404785156,
"logps/ref_rejected": -91.44450378417969,
"logps/rejected": -91.29388427734375,
"loss": 1.3813,
"margin_dpo/margin_mean": 0.20230792462825775,
"margin_dpo/margin_std": 2.4545016288757324,
"step": 110
},
{
"epoch": 0.16780045351473924,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.0242056846618652,
"fcm_dpo/q_t": 0.47484272718429565,
"grad_norm": 16.43102264404297,
"learning_rate": 4.935627386698418e-07,
"logits/chosen": 1.7560900449752808,
"logits/rejected": 1.6001261472702026,
"logps/chosen": -69.20565795898438,
"logps/ref_chosen": -70.01641845703125,
"logps/ref_rejected": -92.87696838378906,
"logps/rejected": -93.0904312133789,
"loss": 1.3021,
"margin_dpo/margin_mean": 1.024204969406128,
"margin_dpo/margin_std": 2.487879514694214,
"step": 111
},
{
"epoch": 0.1693121693121693,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.2949209213256836,
"fcm_dpo/q_t": 0.46816879510879517,
"grad_norm": 18.924081802368164,
"learning_rate": 4.932612176449559e-07,
"logits/chosen": 1.5642766952514648,
"logits/rejected": 1.3693039417266846,
"logps/chosen": -76.81999206542969,
"logps/ref_chosen": -77.80027770996094,
"logps/ref_rejected": -123.10624694824219,
"logps/rejected": -123.42089080810547,
"loss": 1.2778,
"margin_dpo/margin_mean": 1.2949196100234985,
"margin_dpo/margin_std": 2.5712859630584717,
"step": 112
},
{
"epoch": 0.1708238851095994,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.8377039432525635,
"fcm_dpo/q_t": 0.4793519079685211,
"grad_norm": 16.532751083374023,
"learning_rate": 4.929528920808854e-07,
"logits/chosen": 1.691911220550537,
"logits/rejected": 1.603704810142517,
"logps/chosen": -69.27433776855469,
"logps/ref_chosen": -70.54346466064453,
"logps/ref_rejected": -88.79286193847656,
"logps/rejected": -88.36143493652344,
"loss": 1.3198,
"margin_dpo/margin_mean": 0.8377047181129456,
"margin_dpo/margin_std": 2.449063777923584,
"step": 113
},
{
"epoch": 0.17233560090702948,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.3174316883087158,
"fcm_dpo/q_t": 0.4681586027145386,
"grad_norm": 18.068744659423828,
"learning_rate": 4.92637770602159e-07,
"logits/chosen": 1.864563226699829,
"logits/rejected": 1.7135505676269531,
"logps/chosen": -82.77174377441406,
"logps/ref_chosen": -83.9239501953125,
"logps/ref_rejected": -92.85765838623047,
"logps/rejected": -93.02288818359375,
"loss": 1.2816,
"margin_dpo/margin_mean": 1.317431926727295,
"margin_dpo/margin_std": 3.0355539321899414,
"step": 114
},
{
"epoch": 0.17384731670445955,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.194430947303772,
"fcm_dpo/q_t": 0.4708959460258484,
"grad_norm": 16.878889083862305,
"learning_rate": 4.923158620234019e-07,
"logits/chosen": 1.5815708637237549,
"logits/rejected": 1.4187781810760498,
"logps/chosen": -68.57202911376953,
"logps/ref_chosen": -69.82767486572266,
"logps/ref_rejected": -96.51564025878906,
"logps/rejected": -96.45442199707031,
"loss": 1.2872,
"margin_dpo/margin_mean": 1.1944315433502197,
"margin_dpo/margin_std": 2.593759536743164,
"step": 115
},
{
"epoch": 0.17535903250188964,
"fcm_dpo/beta": 0.10189038515090942,
"fcm_dpo/delta": 0.1855519711971283,
"fcm_dpo/margin": 1.6823737621307373,
"fcm_dpo/q_t": 0.45880037546157837,
"grad_norm": 18.34799575805664,
"learning_rate": 4.91987175349089e-07,
"logits/chosen": 1.7459111213684082,
"logits/rejected": 1.5880231857299805,
"logps/chosen": -64.80363464355469,
"logps/ref_chosen": -66.19773864746094,
"logps/ref_rejected": -90.88304138183594,
"logps/rejected": -91.17130279541016,
"loss": 1.238,
"margin_dpo/margin_mean": 1.682374119758606,
"margin_dpo/margin_std": 2.485858917236328,
"step": 116
},
{
"epoch": 0.17687074829931973,
"fcm_dpo/beta": 0.10378076136112213,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.591629981994629,
"fcm_dpo/q_t": 0.4595806896686554,
"grad_norm": 17.06990623474121,
"learning_rate": 4.916517197732933e-07,
"logits/chosen": 1.644856333732605,
"logits/rejected": 1.5434954166412354,
"logps/chosen": -70.47422790527344,
"logps/ref_chosen": -72.15988159179688,
"logps/ref_rejected": -85.30296325683594,
"logps/rejected": -85.20893859863281,
"loss": 1.2484,
"margin_dpo/margin_mean": 1.5916296243667603,
"margin_dpo/margin_std": 2.757472276687622,
"step": 117
},
{
"epoch": 0.17838246409674982,
"fcm_dpo/beta": 0.10378076136112213,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.3567464351654053,
"fcm_dpo/q_t": 0.4655323326587677,
"grad_norm": 16.98048973083496,
"learning_rate": 4.913095046794281e-07,
"logits/chosen": 1.9137165546417236,
"logits/rejected": 1.7827235460281372,
"logps/chosen": -70.03201293945312,
"logps/ref_chosen": -71.47773742675781,
"logps/ref_rejected": -96.95051574707031,
"logps/rejected": -96.8615493774414,
"loss": 1.2725,
"margin_dpo/margin_mean": 1.3567461967468262,
"margin_dpo/margin_std": 2.878359794616699,
"step": 118
},
{
"epoch": 0.17989417989417988,
"fcm_dpo/beta": 0.10378076136112213,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.293745517730713,
"fcm_dpo/q_t": 0.4677852988243103,
"grad_norm": 17.565595626831055,
"learning_rate": 4.909605396399855e-07,
"logits/chosen": 1.7382652759552002,
"logits/rejected": 1.6407535076141357,
"logps/chosen": -76.83675384521484,
"logps/ref_chosen": -78.2727279663086,
"logps/ref_rejected": -94.71317291259766,
"logps/rejected": -94.57093811035156,
"loss": 1.2879,
"margin_dpo/margin_mean": 1.2937450408935547,
"margin_dpo/margin_std": 3.416473150253296,
"step": 119
},
{
"epoch": 0.18140589569160998,
"fcm_dpo/beta": 0.1076236441731453,
"fcm_dpo/delta": 0.18179886043071747,
"fcm_dpo/margin": 1.9414758682250977,
"fcm_dpo/q_t": 0.45040810108184814,
"grad_norm": 19.297927856445312,
"learning_rate": 4.906048344162676e-07,
"logits/chosen": 1.7652626037597656,
"logits/rejected": 1.620312213897705,
"logps/chosen": -76.534423828125,
"logps/ref_chosen": -78.43109130859375,
"logps/ref_rejected": -100.2771987915039,
"logps/rejected": -100.32200622558594,
"loss": 1.2131,
"margin_dpo/margin_mean": 1.9414761066436768,
"margin_dpo/margin_std": 2.947596549987793,
"step": 120
},
{
"epoch": 0.18291761148904007,
"fcm_dpo/beta": 0.1076236441731453,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.3684052228927612,
"fcm_dpo/q_t": 0.46462342143058777,
"grad_norm": 20.10668182373047,
"learning_rate": 4.902423989581143e-07,
"logits/chosen": 2.0717074871063232,
"logits/rejected": 1.8019273281097412,
"logps/chosen": -72.44033813476562,
"logps/ref_chosen": -74.08768463134766,
"logps/ref_rejected": -118.6731948852539,
"logps/rejected": -118.39424896240234,
"loss": 1.2726,
"margin_dpo/margin_mean": 1.368406057357788,
"margin_dpo/margin_std": 3.1463675498962402,
"step": 121
},
{
"epoch": 0.18442932728647016,
"fcm_dpo/beta": 0.1076236441731453,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.446579933166504,
"fcm_dpo/q_t": 0.46327704191207886,
"grad_norm": 19.02294158935547,
"learning_rate": 4.898732434036243e-07,
"logits/chosen": 1.5263891220092773,
"logits/rejected": 1.4055243730545044,
"logps/chosen": -77.6707763671875,
"logps/ref_chosen": -79.36762237548828,
"logps/ref_rejected": -92.42371368408203,
"logps/rejected": -92.17345428466797,
"loss": 1.2719,
"margin_dpo/margin_mean": 1.4465796947479248,
"margin_dpo/margin_std": 3.5089354515075684,
"step": 122
},
{
"epoch": 0.18594104308390022,
"fcm_dpo/beta": 0.1076236441731453,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.629873514175415,
"fcm_dpo/q_t": 0.4577152729034424,
"grad_norm": 18.349647521972656,
"learning_rate": 4.894973780788722e-07,
"logits/chosen": 1.517094373703003,
"logits/rejected": 1.4203633069992065,
"logps/chosen": -69.94999694824219,
"logps/ref_chosen": -71.91705322265625,
"logps/ref_rejected": -96.36418151855469,
"logps/rejected": -96.02699279785156,
"loss": 1.2446,
"margin_dpo/margin_mean": 1.6298733949661255,
"margin_dpo/margin_std": 2.976022720336914,
"step": 123
},
{
"epoch": 0.1874527588813303,
"fcm_dpo/beta": 0.10963409394025803,
"fcm_dpo/delta": 0.18339866399765015,
"fcm_dpo/margin": 1.8729770183563232,
"fcm_dpo/q_t": 0.45209065079689026,
"grad_norm": 19.461355209350586,
"learning_rate": 4.89114813497619e-07,
"logits/chosen": 1.773781657218933,
"logits/rejected": 1.6544387340545654,
"logps/chosen": -69.50850677490234,
"logps/ref_chosen": -71.72529602050781,
"logps/ref_rejected": -111.17984771728516,
"logps/rejected": -110.83602905273438,
"loss": 1.2312,
"margin_dpo/margin_mean": 1.8729764223098755,
"margin_dpo/margin_std": 3.717836380004883,
"step": 124
},
{
"epoch": 0.1889644746787604,
"fcm_dpo/beta": 0.1148253008723259,
"fcm_dpo/delta": 0.14045879244804382,
"fcm_dpo/margin": 2.0888173580169678,
"fcm_dpo/q_t": 0.4435945749282837,
"grad_norm": 21.065311431884766,
"learning_rate": 4.887255603610184e-07,
"logits/chosen": 1.6337459087371826,
"logits/rejected": 1.4570084810256958,
"logps/chosen": -79.20709991455078,
"logps/ref_chosen": -81.55532836914062,
"logps/ref_rejected": -110.9144287109375,
"logps/rejected": -110.65502166748047,
"loss": 1.2003,
"margin_dpo/margin_mean": 2.0888171195983887,
"margin_dpo/margin_std": 3.4307498931884766,
"step": 125
},
{
"epoch": 0.19047619047619047,
"fcm_dpo/beta": 0.11694176495075226,
"fcm_dpo/delta": 0.1810038983821869,
"fcm_dpo/margin": 1.6035174131393433,
"fcm_dpo/q_t": 0.4577940106391907,
"grad_norm": 21.680734634399414,
"learning_rate": 4.883296295573176e-07,
"logits/chosen": 1.2153024673461914,
"logits/rejected": 1.241828441619873,
"logps/chosen": -83.78556823730469,
"logps/ref_chosen": -87.07349395751953,
"logps/ref_rejected": -85.05271911621094,
"logps/rejected": -83.36831665039062,
"loss": 1.2689,
"margin_dpo/margin_mean": 1.603518009185791,
"margin_dpo/margin_std": 4.2111663818359375,
"step": 126
},
{
"epoch": 0.19198790627362056,
"fcm_dpo/beta": 0.1221129521727562,
"fcm_dpo/delta": 0.12666912376880646,
"fcm_dpo/margin": 2.1889748573303223,
"fcm_dpo/q_t": 0.43728864192962646,
"grad_norm": 20.29819107055664,
"learning_rate": 4.87927032161552e-07,
"logits/chosen": 1.7409113645553589,
"logits/rejected": 1.6811532974243164,
"logps/chosen": -77.265625,
"logps/ref_chosen": -80.4578857421875,
"logps/ref_rejected": -90.50740051269531,
"logps/rejected": -89.50411987304688,
"loss": 1.175,
"margin_dpo/margin_mean": 2.1889748573303223,
"margin_dpo/margin_std": 3.178166627883911,
"step": 127
},
{
"epoch": 0.19349962207105065,
"fcm_dpo/beta": 0.12689261138439178,
"fcm_dpo/delta": 0.19197335839271545,
"fcm_dpo/margin": 1.4382350444793701,
"fcm_dpo/q_t": 0.45880699157714844,
"grad_norm": 23.865381240844727,
"learning_rate": 4.875177794352363e-07,
"logits/chosen": 1.8986896276474,
"logits/rejected": 1.6842751502990723,
"logps/chosen": -82.81161499023438,
"logps/ref_chosen": -85.77519226074219,
"logps/ref_rejected": -112.63516998291016,
"logps/rejected": -111.10983276367188,
"loss": 1.301,
"margin_dpo/margin_mean": 1.4382350444793701,
"margin_dpo/margin_std": 4.812017440795898,
"step": 128
},
{
"epoch": 0.19501133786848074,
"fcm_dpo/beta": 0.12921908497810364,
"fcm_dpo/delta": 0.18006029725074768,
"fcm_dpo/margin": 1.665739893913269,
"fcm_dpo/q_t": 0.45093223452568054,
"grad_norm": 24.163936614990234,
"learning_rate": 4.871018828260491e-07,
"logits/chosen": 1.3364216089248657,
"logits/rejected": 1.3428212404251099,
"logps/chosen": -82.05198669433594,
"logps/ref_chosen": -84.94615173339844,
"logps/ref_rejected": -85.36473846435547,
"logps/rejected": -84.13630676269531,
"loss": 1.2632,
"margin_dpo/margin_mean": 1.6657401323318481,
"margin_dpo/margin_std": 4.499945640563965,
"step": 129
},
{
"epoch": 0.1965230536659108,
"fcm_dpo/beta": 0.13545329868793488,
"fcm_dpo/delta": 0.1463683694601059,
"fcm_dpo/margin": 1.901643991470337,
"fcm_dpo/q_t": 0.4415082633495331,
"grad_norm": 24.466577529907227,
"learning_rate": 4.866793539675126e-07,
"logits/chosen": 1.6595840454101562,
"logits/rejected": 1.5338075160980225,
"logps/chosen": -75.72906494140625,
"logps/ref_chosen": -79.0184555053711,
"logps/ref_rejected": -97.63998413085938,
"logps/rejected": -96.25224304199219,
"loss": 1.2165,
"margin_dpo/margin_mean": 1.90164315700531,
"margin_dpo/margin_std": 3.910851001739502,
"step": 130
},
{
"epoch": 0.1980347694633409,
"fcm_dpo/beta": 0.14048483967781067,
"fcm_dpo/delta": 0.25885722041130066,
"fcm_dpo/margin": 2.4962222576141357,
"fcm_dpo/q_t": 0.4236597418785095,
"grad_norm": 22.44991111755371,
"learning_rate": 4.86250204678667e-07,
"logits/chosen": 1.7617708444595337,
"logits/rejected": 1.4943931102752686,
"logps/chosen": -64.65379333496094,
"logps/ref_chosen": -68.24565887451172,
"logps/ref_rejected": -97.99555969238281,
"logps/rejected": -96.89991760253906,
"loss": 1.1614,
"margin_dpo/margin_mean": 2.496222496032715,
"margin_dpo/margin_std": 4.536970138549805,
"step": 131
},
{
"epoch": 0.19954648526077098,
"fcm_dpo/beta": 0.15050306916236877,
"fcm_dpo/delta": 0.3449888527393341,
"fcm_dpo/margin": 1.749915599822998,
"fcm_dpo/q_t": 0.44061940908432007,
"grad_norm": 29.038654327392578,
"learning_rate": 4.858144469637408e-07,
"logits/chosen": 1.7392385005950928,
"logits/rejected": 1.6208550930023193,
"logps/chosen": -78.46183776855469,
"logps/ref_chosen": -82.06532287597656,
"logps/ref_rejected": -89.47691345214844,
"logps/rejected": -87.62332916259766,
"loss": 1.2182,
"margin_dpo/margin_mean": 1.749915599822998,
"margin_dpo/margin_std": 3.832667350769043,
"step": 132
},
{
"epoch": 0.20105820105820105,
"fcm_dpo/beta": 0.15955929458141327,
"fcm_dpo/delta": 0.2966747283935547,
"fcm_dpo/margin": 1.961458683013916,
"fcm_dpo/q_t": 0.43171608448028564,
"grad_norm": 27.75748634338379,
"learning_rate": 4.853720930118138e-07,
"logits/chosen": 1.3796292543411255,
"logits/rejected": 1.4140539169311523,
"logps/chosen": -79.47694396972656,
"logps/ref_chosen": -83.70661163330078,
"logps/ref_rejected": -89.3868179321289,
"logps/rejected": -87.11859893798828,
"loss": 1.2052,
"margin_dpo/margin_mean": 1.961458444595337,
"margin_dpo/margin_std": 4.18922233581543,
"step": 133
},
{
"epoch": 0.20256991685563114,
"fcm_dpo/beta": 0.1658254861831665,
"fcm_dpo/delta": 0.13663284480571747,
"fcm_dpo/margin": 2.83111834526062,
"fcm_dpo/q_t": 0.401967316865921,
"grad_norm": 24.919769287109375,
"learning_rate": 4.849231551964771e-07,
"logits/chosen": 1.6263504028320312,
"logits/rejected": 1.5011672973632812,
"logps/chosen": -66.6833267211914,
"logps/ref_chosen": -71.57601928710938,
"logps/ref_rejected": -92.34259033203125,
"logps/rejected": -90.2810287475586,
"loss": 1.1007,
"margin_dpo/margin_mean": 2.831118106842041,
"margin_dpo/margin_std": 4.706888198852539,
"step": 134
},
{
"epoch": 0.20408163265306123,
"fcm_dpo/beta": 0.17376971244812012,
"fcm_dpo/delta": 0.2560199201107025,
"fcm_dpo/margin": 2.0251028537750244,
"fcm_dpo/q_t": 0.42260998487472534,
"grad_norm": 26.780994415283203,
"learning_rate": 4.844676460754862e-07,
"logits/chosen": 1.4887380599975586,
"logits/rejected": 1.431467056274414,
"logps/chosen": -61.249507904052734,
"logps/ref_chosen": -66.39884948730469,
"logps/ref_rejected": -81.38636779785156,
"logps/rejected": -78.26211547851562,
"loss": 1.1841,
"margin_dpo/margin_mean": 2.025102376937866,
"margin_dpo/margin_std": 4.178084373474121,
"step": 135
},
{
"epoch": 0.20559334845049132,
"fcm_dpo/beta": 0.17436444759368896,
"fcm_dpo/delta": -0.030570298433303833,
"fcm_dpo/margin": 2.4477319717407227,
"fcm_dpo/q_t": 0.417044460773468,
"grad_norm": 32.673492431640625,
"learning_rate": 4.840055783904106e-07,
"logits/chosen": 1.5175824165344238,
"logits/rejected": 1.2436449527740479,
"logps/chosen": -82.66349029541016,
"logps/ref_chosen": -86.75381469726562,
"logps/ref_rejected": -113.35548400878906,
"logps/rejected": -111.71290588378906,
"loss": 1.2295,
"margin_dpo/margin_mean": 2.4477314949035645,
"margin_dpo/margin_std": 5.707584857940674,
"step": 136
},
{
"epoch": 0.20710506424792138,
"fcm_dpo/beta": 0.17389875650405884,
"fcm_dpo/delta": -0.026779502630233765,
"fcm_dpo/margin": 2.481207847595215,
"fcm_dpo/q_t": 0.4082632064819336,
"grad_norm": 24.82634162902832,
"learning_rate": 4.835369650662767e-07,
"logits/chosen": 1.7708425521850586,
"logits/rejected": 1.6555781364440918,
"logps/chosen": -67.07716369628906,
"logps/ref_chosen": -72.21119689941406,
"logps/ref_rejected": -88.30802917480469,
"logps/rejected": -85.65521240234375,
"loss": 1.1352,
"margin_dpo/margin_mean": 2.481208324432373,
"margin_dpo/margin_std": 4.302756309509277,
"step": 137
},
{
"epoch": 0.20861678004535147,
"fcm_dpo/beta": 0.1810002624988556,
"fcm_dpo/delta": 0.28295964002609253,
"fcm_dpo/margin": 1.8019649982452393,
"fcm_dpo/q_t": 0.43597790598869324,
"grad_norm": 30.83222770690918,
"learning_rate": 4.830618192112065e-07,
"logits/chosen": 1.7719461917877197,
"logits/rejected": 1.6590218544006348,
"logps/chosen": -70.21919250488281,
"logps/ref_chosen": -74.54273223876953,
"logps/ref_rejected": -84.63615417480469,
"logps/rejected": -82.11457824707031,
"loss": 1.2579,
"margin_dpo/margin_mean": 1.801964521408081,
"margin_dpo/margin_std": 4.899100303649902,
"step": 138
},
{
"epoch": 0.21012849584278157,
"fcm_dpo/beta": 0.1919894814491272,
"fcm_dpo/delta": 0.2702018916606903,
"fcm_dpo/margin": 1.7565574645996094,
"fcm_dpo/q_t": 0.4235602617263794,
"grad_norm": 40.13972473144531,
"learning_rate": 4.825801541160509e-07,
"logits/chosen": 1.2733126878738403,
"logits/rejected": 1.2188732624053955,
"logps/chosen": -83.63345336914062,
"logps/ref_chosen": -87.63740539550781,
"logps/ref_rejected": -101.3896484375,
"logps/rejected": -99.14225006103516,
"loss": 1.2606,
"margin_dpo/margin_mean": 1.7565574645996094,
"margin_dpo/margin_std": 4.6638336181640625,
"step": 139
},
{
"epoch": 0.21164021164021163,
"fcm_dpo/beta": 0.1938619166612625,
"fcm_dpo/delta": 0.03991154208779335,
"fcm_dpo/margin": 2.8989505767822266,
"fcm_dpo/q_t": 0.3879617750644684,
"grad_norm": 37.32284164428711,
"learning_rate": 4.820919832540181e-07,
"logits/chosen": 1.2418723106384277,
"logits/rejected": 1.1534655094146729,
"logps/chosen": -76.45626831054688,
"logps/ref_chosen": -81.32339477539062,
"logps/ref_rejected": -99.7275619506836,
"logps/rejected": -97.7593765258789,
"loss": 1.1224,
"margin_dpo/margin_mean": 2.898951292037964,
"margin_dpo/margin_std": 5.1982293128967285,
"step": 140
},
{
"epoch": 0.21315192743764172,
"fcm_dpo/beta": 0.19558203220367432,
"fcm_dpo/delta": 0.04644138365983963,
"fcm_dpo/margin": 2.841860771179199,
"fcm_dpo/q_t": 0.3873937726020813,
"grad_norm": 28.78200340270996,
"learning_rate": 4.815973202802966e-07,
"logits/chosen": 1.7557021379470825,
"logits/rejected": 1.647310495376587,
"logps/chosen": -73.34912109375,
"logps/ref_chosen": -78.08534240722656,
"logps/ref_rejected": -101.70516967773438,
"logps/rejected": -99.81080627441406,
"loss": 1.1138,
"margin_dpo/margin_mean": 2.8418610095977783,
"margin_dpo/margin_std": 4.951857089996338,
"step": 141
},
{
"epoch": 0.2146636432350718,
"fcm_dpo/beta": 0.2037084996700287,
"fcm_dpo/delta": 0.19900891184806824,
"fcm_dpo/margin": 2.004154682159424,
"fcm_dpo/q_t": 0.4186818599700928,
"grad_norm": 32.78255081176758,
"learning_rate": 4.810961790316729e-07,
"logits/chosen": 1.576524019241333,
"logits/rejected": 1.517528772354126,
"logps/chosen": -78.01957702636719,
"logps/ref_chosen": -82.84616088867188,
"logps/ref_rejected": -95.14714050292969,
"logps/rejected": -92.32470703125,
"loss": 1.2129,
"margin_dpo/margin_mean": 2.004155158996582,
"margin_dpo/margin_std": 4.6407389640808105,
"step": 142
},
{
"epoch": 0.2161753590325019,
"fcm_dpo/beta": 0.21314668655395508,
"fcm_dpo/delta": 0.2351382076740265,
"fcm_dpo/margin": 1.7482174634933472,
"fcm_dpo/q_t": 0.42905452847480774,
"grad_norm": 48.79874038696289,
"learning_rate": 4.805885735261454e-07,
"logits/chosen": 1.6225446462631226,
"logits/rejected": 1.5896368026733398,
"logps/chosen": -75.57222747802734,
"logps/ref_chosen": -80.29791259765625,
"logps/ref_rejected": -87.44291687011719,
"logps/rejected": -84.4654541015625,
"loss": 1.3224,
"margin_dpo/margin_mean": 1.7482179403305054,
"margin_dpo/margin_std": 5.3093485832214355,
"step": 143
},
{
"epoch": 0.21768707482993196,
"fcm_dpo/beta": 0.21803462505340576,
"fcm_dpo/delta": 0.0706484317779541,
"fcm_dpo/margin": 1.2398256063461304,
"fcm_dpo/q_t": 0.4590599238872528,
"grad_norm": 51.408302307128906,
"learning_rate": 4.800745179625307e-07,
"logits/chosen": 1.5925514698028564,
"logits/rejected": 1.5372166633605957,
"logps/chosen": -74.98297119140625,
"logps/ref_chosen": -79.09429168701172,
"logps/ref_rejected": -92.42912292480469,
"logps/rejected": -89.55763244628906,
"loss": 1.5033,
"margin_dpo/margin_mean": 1.2398253679275513,
"margin_dpo/margin_std": 6.016498565673828,
"step": 144
},
{
"epoch": 0.21919879062736206,
"fcm_dpo/beta": 0.21978802978992462,
"fcm_dpo/delta": 0.07088879495859146,
"fcm_dpo/margin": 2.42476224899292,
"fcm_dpo/q_t": 0.39249861240386963,
"grad_norm": 48.06195831298828,
"learning_rate": 4.795540267200686e-07,
"logits/chosen": 1.6124813556671143,
"logits/rejected": 1.647786259651184,
"logps/chosen": -92.46937561035156,
"logps/ref_chosen": -97.7087173461914,
"logps/ref_rejected": -97.63011169433594,
"logps/rejected": -94.81553649902344,
"loss": 1.2479,
"margin_dpo/margin_mean": 2.424762010574341,
"margin_dpo/margin_std": 5.748805522918701,
"step": 145
},
{
"epoch": 0.22071050642479215,
"fcm_dpo/beta": 0.22856765985488892,
"fcm_dpo/delta": 0.20782732963562012,
"fcm_dpo/margin": 1.7500625848770142,
"fcm_dpo/q_t": 0.4228348731994629,
"grad_norm": 43.26691818237305,
"learning_rate": 4.790271143580173e-07,
"logits/chosen": 1.291736125946045,
"logits/rejected": 1.2866215705871582,
"logps/chosen": -70.85255432128906,
"logps/ref_chosen": -76.56294250488281,
"logps/ref_rejected": -83.78160095214844,
"logps/rejected": -79.82127380371094,
"loss": 1.2569,
"margin_dpo/margin_mean": 1.7500635385513306,
"margin_dpo/margin_std": 4.598239898681641,
"step": 146
},
{
"epoch": 0.2222222222222222,
"fcm_dpo/beta": 0.2355855107307434,
"fcm_dpo/delta": 0.16424530744552612,
"fcm_dpo/margin": 1.8817954063415527,
"fcm_dpo/q_t": 0.4236310124397278,
"grad_norm": 49.79362106323242,
"learning_rate": 4.784937956152489e-07,
"logits/chosen": 1.5991694927215576,
"logits/rejected": 1.5054233074188232,
"logps/chosen": -78.0293960571289,
"logps/ref_chosen": -83.24113464355469,
"logps/ref_rejected": -97.50960540771484,
"logps/rejected": -94.1796646118164,
"loss": 1.2993,
"margin_dpo/margin_mean": 1.8817964792251587,
"margin_dpo/margin_std": 5.177776336669922,
"step": 147
},
{
"epoch": 0.2237339380196523,
"fcm_dpo/beta": 0.2397206574678421,
"fcm_dpo/delta": -0.03846623748540878,
"fcm_dpo/margin": 2.6436634063720703,
"fcm_dpo/q_t": 0.3887077569961548,
"grad_norm": 34.138458251953125,
"learning_rate": 4.779540854098347e-07,
"logits/chosen": 1.8034465312957764,
"logits/rejected": 1.581761360168457,
"logps/chosen": -60.71531677246094,
"logps/ref_chosen": -66.36277770996094,
"logps/ref_rejected": -87.66487121582031,
"logps/rejected": -84.66107940673828,
"loss": 1.1977,
"margin_dpo/margin_mean": 2.6436638832092285,
"margin_dpo/margin_std": 5.585522651672363,
"step": 148
},
{
"epoch": 0.2252456538170824,
"fcm_dpo/beta": 0.24301239848136902,
"fcm_dpo/delta": 0.18357330560684204,
"fcm_dpo/margin": 1.745511770248413,
"fcm_dpo/q_t": 0.41498392820358276,
"grad_norm": 41.716548919677734,
"learning_rate": 4.774079988386296e-07,
"logits/chosen": 1.7528355121612549,
"logits/rejected": 1.6265687942504883,
"logps/chosen": -67.40153503417969,
"logps/ref_chosen": -72.0576171875,
"logps/ref_rejected": -83.94097900390625,
"logps/rejected": -81.0303955078125,
"loss": 1.2486,
"margin_dpo/margin_mean": 1.745511770248413,
"margin_dpo/margin_std": 4.357009410858154,
"step": 149
},
{
"epoch": 0.22675736961451248,
"fcm_dpo/beta": 0.23915709555149078,
"fcm_dpo/delta": -0.14455409348011017,
"fcm_dpo/margin": 3.065699577331543,
"fcm_dpo/q_t": 0.36927932500839233,
"grad_norm": 46.32753372192383,
"learning_rate": 4.768555511768486e-07,
"logits/chosen": 1.7210315465927124,
"logits/rejected": 1.645007610321045,
"logps/chosen": -80.22085571289062,
"logps/ref_chosen": -85.52684783935547,
"logps/ref_rejected": -108.37449645996094,
"logps/rejected": -106.13421630859375,
"loss": 1.1408,
"margin_dpo/margin_mean": 3.065699338912964,
"margin_dpo/margin_std": 5.578556060791016,
"step": 150
},
{
"epoch": 0.22826908541194255,
"fcm_dpo/beta": 0.2331182360649109,
"fcm_dpo/delta": -0.14278751611709595,
"fcm_dpo/margin": 3.1410341262817383,
"fcm_dpo/q_t": 0.36318397521972656,
"grad_norm": 34.232784271240234,
"learning_rate": 4.762967578776406e-07,
"logits/chosen": 1.6338458061218262,
"logits/rejected": 1.4856846332550049,
"logps/chosen": -62.60700988769531,
"logps/ref_chosen": -69.160888671875,
"logps/ref_rejected": -91.42207336425781,
"logps/rejected": -88.00923156738281,
"loss": 1.0322,
"margin_dpo/margin_mean": 3.1410341262817383,
"margin_dpo/margin_std": 4.984264373779297,
"step": 151
},
{
"epoch": 0.22978080120937264,
"fcm_dpo/beta": 0.23434986174106598,
"fcm_dpo/delta": 0.013609714806079865,
"fcm_dpo/margin": 2.4968485832214355,
"fcm_dpo/q_t": 0.3925015330314636,
"grad_norm": 42.97077941894531,
"learning_rate": 4.757316345716553e-07,
"logits/chosen": 1.8712348937988281,
"logits/rejected": 1.704573631286621,
"logps/chosen": -66.96708679199219,
"logps/ref_chosen": -72.48135375976562,
"logps/ref_rejected": -94.44818878173828,
"logps/rejected": -91.4307632446289,
"loss": 1.178,
"margin_dpo/margin_mean": 2.4968485832214355,
"margin_dpo/margin_std": 5.199925422668457,
"step": 152
},
{
"epoch": 0.23129251700680273,
"fcm_dpo/beta": 0.23393282294273376,
"fcm_dpo/delta": -0.0029235482215881348,
"fcm_dpo/margin": 2.571483612060547,
"fcm_dpo/q_t": 0.38379669189453125,
"grad_norm": 37.91641616821289,
"learning_rate": 4.751601970666064e-07,
"logits/chosen": 1.5314666032791138,
"logits/rejected": 1.468802809715271,
"logps/chosen": -84.36506652832031,
"logps/ref_chosen": -89.6655044555664,
"logps/ref_rejected": -90.67737579345703,
"logps/rejected": -87.94842529296875,
"loss": 1.1238,
"margin_dpo/margin_mean": 2.5714833736419678,
"margin_dpo/margin_std": 4.653265953063965,
"step": 153
},
{
"epoch": 0.2328042328042328,
"fcm_dpo/beta": 0.24052271246910095,
"fcm_dpo/delta": 0.24382811784744263,
"fcm_dpo/margin": 1.518359899520874,
"fcm_dpo/q_t": 0.42862796783447266,
"grad_norm": 45.47004699707031,
"learning_rate": 4.745824613468292e-07,
"logits/chosen": 1.7060202360153198,
"logits/rejected": 1.652904987335205,
"logps/chosen": -70.89018249511719,
"logps/ref_chosen": -76.58096313476562,
"logps/ref_rejected": -78.18669891357422,
"logps/rejected": -74.0142822265625,
"loss": 1.3464,
"margin_dpo/margin_mean": 1.5183594226837158,
"margin_dpo/margin_std": 4.859274864196777,
"step": 154
},
{
"epoch": 0.23431594860166288,
"fcm_dpo/beta": 0.2394261211156845,
"fcm_dpo/delta": -0.0790242925286293,
"fcm_dpo/margin": 2.8094193935394287,
"fcm_dpo/q_t": 0.37184637784957886,
"grad_norm": 41.79259490966797,
"learning_rate": 4.7399844357283393e-07,
"logits/chosen": 1.7715044021606445,
"logits/rejected": 1.7470512390136719,
"logps/chosen": -76.92964172363281,
"logps/ref_chosen": -82.65617370605469,
"logps/ref_rejected": -95.52484130859375,
"logps/rejected": -92.60773468017578,
"loss": 1.1682,
"margin_dpo/margin_mean": 2.8094191551208496,
"margin_dpo/margin_std": 5.463638782501221,
"step": 155
},
{
"epoch": 0.23582766439909297,
"fcm_dpo/beta": 0.23629328608512878,
"fcm_dpo/delta": -0.07246021926403046,
"fcm_dpo/margin": 2.822887659072876,
"fcm_dpo/q_t": 0.36988958716392517,
"grad_norm": 41.03186798095703,
"learning_rate": 4.7340816008085305e-07,
"logits/chosen": 1.615354061126709,
"logits/rejected": 1.5233306884765625,
"logps/chosen": -82.16059875488281,
"logps/ref_chosen": -87.66494750976562,
"logps/ref_rejected": -108.2437744140625,
"logps/rejected": -105.56230163574219,
"loss": 1.0859,
"margin_dpo/margin_mean": 2.822887897491455,
"margin_dpo/margin_std": 4.765664100646973,
"step": 156
},
{
"epoch": 0.23733938019652306,
"fcm_dpo/beta": 0.23056308925151825,
"fcm_dpo/delta": -0.12440288811922073,
"fcm_dpo/margin": 2.047905445098877,
"fcm_dpo/q_t": 0.40755152702331543,
"grad_norm": 35.36820983886719,
"learning_rate": 4.728116273823847e-07,
"logits/chosen": 1.527277946472168,
"logits/rejected": 1.5307586193084717,
"logps/chosen": -64.32658386230469,
"logps/ref_chosen": -70.77095794677734,
"logps/ref_rejected": -78.78271484375,
"logps/rejected": -74.38624572753906,
"loss": 1.2108,
"margin_dpo/margin_mean": 2.047905445098877,
"margin_dpo/margin_std": 4.452766418457031,
"step": 157
},
{
"epoch": 0.23885109599395313,
"fcm_dpo/beta": 0.2327193319797516,
"fcm_dpo/delta": 0.0721401646733284,
"fcm_dpo/margin": 2.28495717048645,
"fcm_dpo/q_t": 0.39623498916625977,
"grad_norm": 38.024356842041016,
"learning_rate": 4.7220886216373085e-07,
"logits/chosen": 1.5624784231185913,
"logits/rejected": 1.5014305114746094,
"logps/chosen": -75.03966522216797,
"logps/ref_chosen": -81.21516418457031,
"logps/ref_rejected": -97.8381118774414,
"logps/rejected": -93.94757080078125,
"loss": 1.1784,
"margin_dpo/margin_mean": 2.284956693649292,
"margin_dpo/margin_std": 4.70979118347168,
"step": 158
},
{
"epoch": 0.24036281179138322,
"fcm_dpo/beta": 0.2319149523973465,
"fcm_dpo/delta": -0.03987874090671539,
"fcm_dpo/margin": 2.747131824493408,
"fcm_dpo/q_t": 0.37046635150909424,
"grad_norm": 34.66891098022461,
"learning_rate": 4.715998812855304e-07,
"logits/chosen": 1.7413641214370728,
"logits/rejected": 1.661763310432434,
"logps/chosen": -66.25703430175781,
"logps/ref_chosen": -72.33412170410156,
"logps/ref_rejected": -89.49591064453125,
"logps/rejected": -86.16595458984375,
"loss": 1.0663,
"margin_dpo/margin_mean": 2.7471323013305664,
"margin_dpo/margin_std": 4.491193771362305,
"step": 159
},
{
"epoch": 0.2418745275888133,
"fcm_dpo/beta": 0.23739804327487946,
"fcm_dpo/delta": 0.1521824300289154,
"fcm_dpo/margin": 1.9154564142227173,
"fcm_dpo/q_t": 0.4132426381111145,
"grad_norm": 36.02449417114258,
"learning_rate": 4.7098470178228755e-07,
"logits/chosen": 1.2908469438552856,
"logits/rejected": 1.1444838047027588,
"logps/chosen": -57.67263412475586,
"logps/ref_chosen": -63.26386260986328,
"logps/ref_rejected": -82.27867126464844,
"logps/rejected": -78.60289001464844,
"loss": 1.2065,
"margin_dpo/margin_mean": 1.915457010269165,
"margin_dpo/margin_std": 4.364532470703125,
"step": 160
},
{
"epoch": 0.24338624338624337,
"fcm_dpo/beta": 0.24455958604812622,
"fcm_dpo/delta": 0.12015791237354279,
"fcm_dpo/margin": 1.982551097869873,
"fcm_dpo/q_t": 0.39857205748558044,
"grad_norm": 39.60112762451172,
"learning_rate": 4.703633408618955e-07,
"logits/chosen": 1.6757632493972778,
"logits/rejected": 1.5887293815612793,
"logps/chosen": -64.71536254882812,
"logps/ref_chosen": -70.69304656982422,
"logps/ref_rejected": -82.73606872558594,
"logps/rejected": -78.74093627929688,
"loss": 1.1955,
"margin_dpo/margin_mean": 1.9825514554977417,
"margin_dpo/margin_std": 4.350788116455078,
"step": 161
},
{
"epoch": 0.24489795918367346,
"fcm_dpo/beta": 0.23786741495132446,
"fcm_dpo/delta": -0.20256876945495605,
"fcm_dpo/margin": 3.307166576385498,
"fcm_dpo/q_t": 0.33586543798446655,
"grad_norm": 38.64269256591797,
"learning_rate": 4.697358159051549e-07,
"logits/chosen": 1.6229711771011353,
"logits/rejected": 1.5281124114990234,
"logps/chosen": -83.81607055664062,
"logps/ref_chosen": -89.3046646118164,
"logps/ref_rejected": -114.05778503417969,
"logps/rejected": -111.87635803222656,
"loss": 0.9342,
"margin_dpo/margin_mean": 3.3071675300598145,
"margin_dpo/margin_std": 4.0596442222595215,
"step": 162
},
{
"epoch": 0.24640967498110355,
"fcm_dpo/beta": 0.23702430725097656,
"fcm_dpo/delta": -0.014801152050495148,
"fcm_dpo/margin": 2.5833659172058105,
"fcm_dpo/q_t": 0.3794647753238678,
"grad_norm": 36.25377655029297,
"learning_rate": 4.691021444652876e-07,
"logits/chosen": 1.65557861328125,
"logits/rejected": 1.5559160709381104,
"logps/chosen": -62.761451721191406,
"logps/ref_chosen": -68.61222076416016,
"logps/ref_rejected": -89.03155517578125,
"logps/rejected": -85.76416015625,
"loss": 1.0845,
"margin_dpo/margin_mean": 2.5833659172058105,
"margin_dpo/margin_std": 4.294673442840576,
"step": 163
},
{
"epoch": 0.24792139077853365,
"fcm_dpo/beta": 0.22922706604003906,
"fcm_dpo/delta": -0.16434648633003235,
"fcm_dpo/margin": 3.281121253967285,
"fcm_dpo/q_t": 0.3554421663284302,
"grad_norm": 35.74726486206055,
"learning_rate": 4.6846234426744624e-07,
"logits/chosen": 1.4175546169281006,
"logits/rejected": 1.2538635730743408,
"logps/chosen": -67.86227416992188,
"logps/ref_chosen": -73.55902862548828,
"logps/ref_rejected": -94.16201782226562,
"logps/rejected": -91.74636840820312,
"loss": 1.0476,
"margin_dpo/margin_mean": 3.281121253967285,
"margin_dpo/margin_std": 4.952568054199219,
"step": 164
},
{
"epoch": 0.2494331065759637,
"fcm_dpo/beta": 0.22541096806526184,
"fcm_dpo/delta": -0.11093769967556,
"fcm_dpo/margin": 3.116410970687866,
"fcm_dpo/q_t": 0.361217737197876,
"grad_norm": 34.11616134643555,
"learning_rate": 4.678164332082175e-07,
"logits/chosen": 1.893852710723877,
"logits/rejected": 1.7336184978485107,
"logps/chosen": -63.764854431152344,
"logps/ref_chosen": -68.67132568359375,
"logps/ref_rejected": -85.95689392089844,
"logps/rejected": -84.16683197021484,
"loss": 1.0469,
"margin_dpo/margin_mean": 3.116410732269287,
"margin_dpo/margin_std": 4.765564441680908,
"step": 165
},
{
"epoch": 0.2509448223733938,
"fcm_dpo/beta": 0.2261592447757721,
"fcm_dpo/delta": 0.14897161722183228,
"fcm_dpo/margin": 2.025606632232666,
"fcm_dpo/q_t": 0.4120250642299652,
"grad_norm": 40.28575134277344,
"learning_rate": 4.6716442935512214e-07,
"logits/chosen": 1.6033815145492554,
"logits/rejected": 1.3893630504608154,
"logps/chosen": -76.40534973144531,
"logps/ref_chosen": -80.89755249023438,
"logps/ref_rejected": -111.91075134277344,
"logps/rejected": -109.44416809082031,
"loss": 1.191,
"margin_dpo/margin_mean": 2.025606632232666,
"margin_dpo/margin_std": 4.367696762084961,
"step": 166
},
{
"epoch": 0.25245653817082386,
"fcm_dpo/beta": 0.22757622599601746,
"fcm_dpo/delta": 0.010319948196411133,
"fcm_dpo/margin": 2.5901713371276855,
"fcm_dpo/q_t": 0.37613973021507263,
"grad_norm": 33.78873062133789,
"learning_rate": 4.6650635094610966e-07,
"logits/chosen": 1.401757001876831,
"logits/rejected": 1.3311492204666138,
"logps/chosen": -71.38160705566406,
"logps/ref_chosen": -76.73136138916016,
"logps/ref_rejected": -92.57389068603516,
"logps/rejected": -89.8143081665039,
"loss": 1.0629,
"margin_dpo/margin_mean": 2.5901713371276855,
"margin_dpo/margin_std": 4.040890216827393,
"step": 167
},
{
"epoch": 0.25396825396825395,
"fcm_dpo/beta": 0.23258665204048157,
"fcm_dpo/delta": 0.11638811230659485,
"fcm_dpo/margin": 2.1044440269470215,
"fcm_dpo/q_t": 0.3990965485572815,
"grad_norm": 35.83817672729492,
"learning_rate": 4.6584221638904767e-07,
"logits/chosen": 1.6169400215148926,
"logits/rejected": 1.4897035360336304,
"logps/chosen": -78.1120376586914,
"logps/ref_chosen": -82.63671112060547,
"logps/ref_rejected": -96.72691345214844,
"logps/rejected": -94.30669403076172,
"loss": 1.1093,
"margin_dpo/margin_mean": 2.1044440269470215,
"margin_dpo/margin_std": 3.624438762664795,
"step": 168
},
{
"epoch": 0.25547996976568405,
"fcm_dpo/beta": 0.23403212428092957,
"fcm_dpo/delta": -0.05251135677099228,
"fcm_dpo/margin": 2.772446393966675,
"fcm_dpo/q_t": 0.3757784366607666,
"grad_norm": 39.18862533569336,
"learning_rate": 4.651720442612075e-07,
"logits/chosen": 1.5533463954925537,
"logits/rejected": 1.540328025817871,
"logps/chosen": -73.46492004394531,
"logps/ref_chosen": -78.87673950195312,
"logps/ref_rejected": -94.18919372558594,
"logps/rejected": -91.54983520507812,
"loss": 1.081,
"margin_dpo/margin_mean": 2.772446393966675,
"margin_dpo/margin_std": 4.667253017425537,
"step": 169
},
{
"epoch": 0.25699168556311414,
"fcm_dpo/beta": 0.23770752549171448,
"fcm_dpo/delta": 0.11538364738225937,
"fcm_dpo/margin": 2.0562214851379395,
"fcm_dpo/q_t": 0.4132143557071686,
"grad_norm": 42.443660736083984,
"learning_rate": 4.6449585330874425e-07,
"logits/chosen": 1.6440541744232178,
"logits/rejected": 1.6669882535934448,
"logps/chosen": -68.31556701660156,
"logps/ref_chosen": -73.35820007324219,
"logps/ref_rejected": -76.85077667236328,
"logps/rejected": -73.8643569946289,
"loss": 1.317,
"margin_dpo/margin_mean": 2.0562217235565186,
"margin_dpo/margin_std": 5.564802169799805,
"step": 170
},
{
"epoch": 0.2585034013605442,
"fcm_dpo/beta": 0.2314375638961792,
"fcm_dpo/delta": -0.12175580114126205,
"fcm_dpo/margin": 3.0714287757873535,
"fcm_dpo/q_t": 0.35926738381385803,
"grad_norm": 35.90308380126953,
"learning_rate": 4.6381366244617224e-07,
"logits/chosen": 1.6220589876174927,
"logits/rejected": 1.4832584857940674,
"logps/chosen": -75.58470153808594,
"logps/ref_chosen": -80.4322738647461,
"logps/ref_rejected": -96.99999237060547,
"logps/rejected": -95.2238540649414,
"loss": 1.0666,
"margin_dpo/margin_mean": 3.071429491043091,
"margin_dpo/margin_std": 4.930670738220215,
"step": 171
},
{
"epoch": 0.2600151171579743,
"fcm_dpo/beta": 0.23240095376968384,
"fcm_dpo/delta": -0.009122611954808235,
"fcm_dpo/margin": 2.6176974773406982,
"fcm_dpo/q_t": 0.38063037395477295,
"grad_norm": 36.17333984375,
"learning_rate": 4.631254907558365e-07,
"logits/chosen": 1.692131757736206,
"logits/rejected": 1.5869046449661255,
"logps/chosen": -66.05056762695312,
"logps/ref_chosen": -70.45406341552734,
"logps/ref_rejected": -99.85603332519531,
"logps/rejected": -98.07023620605469,
"loss": 1.1094,
"margin_dpo/margin_mean": 2.6176977157592773,
"margin_dpo/margin_std": 4.624697685241699,
"step": 172
},
{
"epoch": 0.2615268329554044,
"fcm_dpo/beta": 0.2278570830821991,
"fcm_dpo/delta": -0.012191221117973328,
"fcm_dpo/margin": 2.6660757064819336,
"fcm_dpo/q_t": 0.38813263177871704,
"grad_norm": 40.461055755615234,
"learning_rate": 4.624313574873786e-07,
"logits/chosen": 1.467531681060791,
"logits/rejected": 1.2549827098846436,
"logps/chosen": -67.7020034790039,
"logps/ref_chosen": -72.15026092529297,
"logps/ref_rejected": -94.10212707519531,
"logps/rejected": -92.3199462890625,
"loss": 1.1725,
"margin_dpo/margin_mean": 2.6660757064819336,
"margin_dpo/margin_std": 5.199055194854736,
"step": 173
},
{
"epoch": 0.26303854875283444,
"fcm_dpo/beta": 0.22771653532981873,
"fcm_dpo/delta": -0.06701561063528061,
"fcm_dpo/margin": 2.9072351455688477,
"fcm_dpo/q_t": 0.3727726936340332,
"grad_norm": 39.59587860107422,
"learning_rate": 4.61731282057198e-07,
"logits/chosen": 1.9616103172302246,
"logits/rejected": 1.7486342191696167,
"logps/chosen": -71.55302429199219,
"logps/ref_chosen": -75.99629211425781,
"logps/ref_rejected": -106.2359619140625,
"logps/rejected": -104.6999282836914,
"loss": 1.1068,
"margin_dpo/margin_mean": 2.9072351455688477,
"margin_dpo/margin_std": 5.125247955322266,
"step": 174
},
{
"epoch": 0.26455026455026454,
"fcm_dpo/beta": 0.22606691718101501,
"fcm_dpo/delta": -0.037291742861270905,
"fcm_dpo/margin": 2.8077917098999023,
"fcm_dpo/q_t": 0.37847793102264404,
"grad_norm": 39.049476623535156,
"learning_rate": 4.6102528404790965e-07,
"logits/chosen": 1.6430723667144775,
"logits/rejected": 1.579434871673584,
"logps/chosen": -80.18596649169922,
"logps/ref_chosen": -84.51177978515625,
"logps/ref_rejected": -104.46299743652344,
"logps/rejected": -102.94497680664062,
"loss": 1.1348,
"margin_dpo/margin_mean": 2.807791233062744,
"margin_dpo/margin_std": 5.241055488586426,
"step": 175
},
{
"epoch": 0.2660619803476946,
"fcm_dpo/beta": 0.2272205799818039,
"fcm_dpo/delta": 0.023860936984419823,
"fcm_dpo/margin": 1.8057135343551636,
"fcm_dpo/q_t": 0.4259718060493469,
"grad_norm": 44.9836540222168,
"learning_rate": 4.603133832077953e-07,
"logits/chosen": 1.9060659408569336,
"logits/rejected": 1.8190908432006836,
"logps/chosen": -94.92770385742188,
"logps/ref_chosen": -98.2034912109375,
"logps/ref_rejected": -103.2023696899414,
"logps/rejected": -101.7322998046875,
"loss": 1.3075,
"margin_dpo/margin_mean": 1.805713415145874,
"margin_dpo/margin_std": 5.15474796295166,
"step": 176
},
{
"epoch": 0.2675736961451247,
"fcm_dpo/beta": 0.2129383683204651,
"fcm_dpo/delta": -0.4364026188850403,
"fcm_dpo/margin": 4.662107467651367,
"fcm_dpo/q_t": 0.3069148361682892,
"grad_norm": 39.75569152832031,
"learning_rate": 4.5959559945025183e-07,
"logits/chosen": 2.090988874435425,
"logits/rejected": 1.858922004699707,
"logps/chosen": -72.60111999511719,
"logps/ref_chosen": -78.029541015625,
"logps/ref_rejected": -112.57099914550781,
"logps/rejected": -111.8046875,
"loss": 0.8663,
"margin_dpo/margin_mean": 4.662107467651367,
"margin_dpo/margin_std": 5.272144317626953,
"step": 177
},
{
"epoch": 0.2690854119425548,
"fcm_dpo/beta": 0.20561328530311584,
"fcm_dpo/delta": -0.012379378080368042,
"fcm_dpo/margin": 2.9629063606262207,
"fcm_dpo/q_t": 0.37169766426086426,
"grad_norm": 29.65110969543457,
"learning_rate": 4.588719528532341e-07,
"logits/chosen": 1.4764043092727661,
"logits/rejected": 1.3558849096298218,
"logps/chosen": -74.66604614257812,
"logps/ref_chosen": -79.48869323730469,
"logps/ref_rejected": -96.62449645996094,
"logps/rejected": -94.76475524902344,
"loss": 1.0248,
"margin_dpo/margin_mean": 2.9629063606262207,
"margin_dpo/margin_std": 3.9983906745910645,
"step": 178
},
{
"epoch": 0.2705971277399849,
"fcm_dpo/beta": 0.20993559062480927,
"fcm_dpo/delta": 0.06102978438138962,
"fcm_dpo/margin": 2.5824151039123535,
"fcm_dpo/q_t": 0.3936367630958557,
"grad_norm": 34.15560531616211,
"learning_rate": 4.581424636586928e-07,
"logits/chosen": 1.9131920337677002,
"logits/rejected": 1.8513381481170654,
"logps/chosen": -79.43586730957031,
"logps/ref_chosen": -84.5088119506836,
"logps/ref_rejected": -93.07945251464844,
"logps/rejected": -90.58891296386719,
"loss": 1.1693,
"margin_dpo/margin_mean": 2.5824155807495117,
"margin_dpo/margin_std": 5.296082019805908,
"step": 179
},
{
"epoch": 0.272108843537415,
"fcm_dpo/beta": 0.2143281102180481,
"fcm_dpo/delta": 0.08803573995828629,
"fcm_dpo/margin": 2.4041786193847656,
"fcm_dpo/q_t": 0.39963027834892273,
"grad_norm": 33.341609954833984,
"learning_rate": 4.5740715227200897e-07,
"logits/chosen": 1.260475754737854,
"logits/rejected": 1.1927244663238525,
"logps/chosen": -69.06391906738281,
"logps/ref_chosen": -74.5645523071289,
"logps/ref_rejected": -81.02266693115234,
"logps/rejected": -77.92620849609375,
"loss": 1.1261,
"margin_dpo/margin_mean": 2.4041786193847656,
"margin_dpo/margin_std": 4.394153118133545,
"step": 180
},
{
"epoch": 0.273620559334845,
"fcm_dpo/beta": 0.21226423978805542,
"fcm_dpo/delta": -0.049738913774490356,
"fcm_dpo/margin": 3.045619487762451,
"fcm_dpo/q_t": 0.3686140775680542,
"grad_norm": 33.64764404296875,
"learning_rate": 4.566660392614228e-07,
"logits/chosen": 1.569049596786499,
"logits/rejected": 1.4539225101470947,
"logps/chosen": -72.93116760253906,
"logps/ref_chosen": -78.77166748046875,
"logps/ref_rejected": -98.29750061035156,
"logps/rejected": -95.50262451171875,
"loss": 0.9918,
"margin_dpo/margin_mean": 3.0456197261810303,
"margin_dpo/margin_std": 4.077629089355469,
"step": 181
},
{
"epoch": 0.2751322751322751,
"fcm_dpo/beta": 0.20631521940231323,
"fcm_dpo/delta": -0.18685951828956604,
"fcm_dpo/margin": 3.744624614715576,
"fcm_dpo/q_t": 0.3533626198768616,
"grad_norm": 34.31681823730469,
"learning_rate": 4.5591914535745817e-07,
"logits/chosen": 1.5861796140670776,
"logits/rejected": 1.4125421047210693,
"logps/chosen": -70.77108764648438,
"logps/ref_chosen": -75.67765045166016,
"logps/ref_rejected": -107.47894287109375,
"logps/rejected": -106.31700134277344,
"loss": 0.9945,
"margin_dpo/margin_mean": 3.7446250915527344,
"margin_dpo/margin_std": 5.412623882293701,
"step": 182
},
{
"epoch": 0.2766439909297052,
"fcm_dpo/beta": 0.20775122940540314,
"fcm_dpo/delta": 0.08841533213853836,
"fcm_dpo/margin": 1.4735186100006104,
"fcm_dpo/q_t": 0.4364347457885742,
"grad_norm": 42.95817947387695,
"learning_rate": 4.551664914523433e-07,
"logits/chosen": 1.7009767293930054,
"logits/rejected": 1.6352108716964722,
"logps/chosen": -77.0204086303711,
"logps/ref_chosen": -79.99969482421875,
"logps/ref_rejected": -89.35220336914062,
"logps/rejected": -87.846435546875,
"loss": 1.2782,
"margin_dpo/margin_mean": 1.4735193252563477,
"margin_dpo/margin_std": 4.169427394866943,
"step": 183
},
{
"epoch": 0.2781557067271353,
"fcm_dpo/beta": 0.2090856432914734,
"fcm_dpo/delta": 0.06833438575267792,
"fcm_dpo/margin": 2.560145854949951,
"fcm_dpo/q_t": 0.3854142129421234,
"grad_norm": 28.153854370117188,
"learning_rate": 4.544080985994258e-07,
"logits/chosen": 1.8167436122894287,
"logits/rejected": 1.6645784378051758,
"logps/chosen": -57.20887756347656,
"logps/ref_chosen": -62.133941650390625,
"logps/ref_rejected": -84.44404602050781,
"logps/rejected": -82.07913208007812,
"loss": 1.0415,
"margin_dpo/margin_mean": 2.560145854949951,
"margin_dpo/margin_std": 3.473294734954834,
"step": 184
},
{
"epoch": 0.2796674225245654,
"fcm_dpo/beta": 0.20768845081329346,
"fcm_dpo/delta": -0.040163375437259674,
"fcm_dpo/margin": 3.062562942504883,
"fcm_dpo/q_t": 0.37965598702430725,
"grad_norm": 30.467384338378906,
"learning_rate": 4.5364398801258394e-07,
"logits/chosen": 1.7906408309936523,
"logits/rejected": 1.663421630859375,
"logps/chosen": -63.672325134277344,
"logps/ref_chosen": -67.93174743652344,
"logps/ref_rejected": -83.76744079589844,
"logps/rejected": -82.57058715820312,
"loss": 1.1426,
"margin_dpo/margin_mean": 3.062563419342041,
"margin_dpo/margin_std": 5.689934730529785,
"step": 185
},
{
"epoch": 0.2811791383219955,
"fcm_dpo/beta": 0.20655421912670135,
"fcm_dpo/delta": -0.05656307190656662,
"fcm_dpo/margin": 3.1591484546661377,
"fcm_dpo/q_t": 0.3779166340827942,
"grad_norm": 34.896888732910156,
"learning_rate": 4.5287418106563354e-07,
"logits/chosen": 1.5623699426651,
"logits/rejected": 1.4292669296264648,
"logps/chosen": -81.7421875,
"logps/ref_chosen": -86.22174072265625,
"logps/ref_rejected": -100.42019653320312,
"logps/rejected": -99.09979248046875,
"loss": 1.1253,
"margin_dpo/margin_mean": 3.159148693084717,
"margin_dpo/margin_std": 5.729801177978516,
"step": 186
},
{
"epoch": 0.28269085411942557,
"fcm_dpo/beta": 0.2069222331047058,
"fcm_dpo/delta": 0.03436320275068283,
"fcm_dpo/margin": 2.742424249649048,
"fcm_dpo/q_t": 0.3854818046092987,
"grad_norm": 39.628883361816406,
"learning_rate": 4.520986992917297e-07,
"logits/chosen": 1.7931580543518066,
"logits/rejected": 1.6749093532562256,
"logps/chosen": -89.49134826660156,
"logps/ref_chosen": -92.81202697753906,
"logps/ref_rejected": -117.28926086425781,
"logps/rejected": -116.71101379394531,
"loss": 1.1152,
"margin_dpo/margin_mean": 2.7424240112304688,
"margin_dpo/margin_std": 4.828680515289307,
"step": 187
},
{
"epoch": 0.2842025699168556,
"fcm_dpo/beta": 0.20844730734825134,
"fcm_dpo/delta": -0.04868890345096588,
"fcm_dpo/margin": 3.0909528732299805,
"fcm_dpo/q_t": 0.37605804204940796,
"grad_norm": 34.80514144897461,
"learning_rate": 4.5131756438276466e-07,
"logits/chosen": 1.670656681060791,
"logits/rejected": 1.5700781345367432,
"logps/chosen": -83.94563293457031,
"logps/ref_chosen": -87.85247802734375,
"logps/ref_rejected": -94.58252716064453,
"logps/rejected": -93.76663208007812,
"loss": 1.0477,
"margin_dpo/margin_mean": 3.0909528732299805,
"margin_dpo/margin_std": 4.717261791229248,
"step": 188
},
{
"epoch": 0.2857142857142857,
"fcm_dpo/beta": 0.20079335570335388,
"fcm_dpo/delta": -0.12760794162750244,
"fcm_dpo/margin": 2.3743178844451904,
"fcm_dpo/q_t": 0.39784711599349976,
"grad_norm": 41.234535217285156,
"learning_rate": 4.5053079818876096e-07,
"logits/chosen": 1.8904516696929932,
"logits/rejected": 1.885125994682312,
"logps/chosen": -90.63731384277344,
"logps/ref_chosen": -95.00414276123047,
"logps/ref_rejected": -90.50090789794922,
"logps/rejected": -88.50839233398438,
"loss": 1.2093,
"margin_dpo/margin_mean": 2.3743185997009277,
"margin_dpo/margin_std": 4.945883750915527,
"step": 189
},
{
"epoch": 0.2872260015117158,
"fcm_dpo/beta": 0.19793203473091125,
"fcm_dpo/delta": -0.07783995568752289,
"fcm_dpo/margin": 3.3963685035705566,
"fcm_dpo/q_t": 0.35779502987861633,
"grad_norm": 36.98722839355469,
"learning_rate": 4.4973842271726024e-07,
"logits/chosen": 1.5879793167114258,
"logits/rejected": 1.2592995166778564,
"logps/chosen": -65.76041412353516,
"logps/ref_chosen": -70.79264831542969,
"logps/ref_rejected": -122.56155395507812,
"logps/rejected": -120.9256820678711,
"loss": 1.0021,
"margin_dpo/margin_mean": 3.3963685035705566,
"margin_dpo/margin_std": 4.595223903656006,
"step": 190
},
{
"epoch": 0.2887377173091459,
"fcm_dpo/beta": 0.19986538589000702,
"fcm_dpo/delta": 0.0998755544424057,
"fcm_dpo/margin": 2.5279035568237305,
"fcm_dpo/q_t": 0.3947795629501343,
"grad_norm": 39.27770233154297,
"learning_rate": 4.48940460132708e-07,
"logits/chosen": 1.6662890911102295,
"logits/rejected": 1.5767499208450317,
"logps/chosen": -88.5599365234375,
"logps/ref_chosen": -92.15048217773438,
"logps/ref_rejected": -106.4153060913086,
"logps/rejected": -105.3526611328125,
"loss": 1.1413,
"margin_dpo/margin_mean": 2.5279035568237305,
"margin_dpo/margin_std": 4.721172332763672,
"step": 191
},
{
"epoch": 0.29024943310657597,
"fcm_dpo/beta": 0.20867319405078888,
"fcm_dpo/delta": 0.2078724503517151,
"fcm_dpo/margin": 1.9151049852371216,
"fcm_dpo/q_t": 0.41488662362098694,
"grad_norm": 29.34225082397461,
"learning_rate": 4.481369327558329e-07,
"logits/chosen": 1.9284831285476685,
"logits/rejected": 1.860489845275879,
"logps/chosen": -65.33647155761719,
"logps/ref_chosen": -69.51527404785156,
"logps/ref_rejected": -80.15898132324219,
"logps/rejected": -77.8952865600586,
"loss": 1.2003,
"margin_dpo/margin_mean": 1.915104866027832,
"margin_dpo/margin_std": 4.322020053863525,
"step": 192
},
{
"epoch": 0.29176114890400606,
"fcm_dpo/beta": 0.2067011594772339,
"fcm_dpo/delta": -0.10582581162452698,
"fcm_dpo/margin": 3.3781826496124268,
"fcm_dpo/q_t": 0.3606608211994171,
"grad_norm": 30.841073989868164,
"learning_rate": 4.47327863063023e-07,
"logits/chosen": 1.6029868125915527,
"logits/rejected": 1.6146012544631958,
"logps/chosen": -68.25808715820312,
"logps/ref_chosen": -73.43276977539062,
"logps/ref_rejected": -77.81238555908203,
"logps/rejected": -76.01588439941406,
"loss": 0.9987,
"margin_dpo/margin_mean": 3.378182888031006,
"margin_dpo/margin_std": 4.662139892578125,
"step": 193
},
{
"epoch": 0.29327286470143615,
"fcm_dpo/beta": 0.21076488494873047,
"fcm_dpo/delta": 0.19742809236049652,
"fcm_dpo/margin": 1.9506275653839111,
"fcm_dpo/q_t": 0.4201662838459015,
"grad_norm": 39.32268524169922,
"learning_rate": 4.4651327368569684e-07,
"logits/chosen": 1.6824967861175537,
"logits/rejected": 1.605659008026123,
"logps/chosen": -72.58547973632812,
"logps/ref_chosen": -76.63236999511719,
"logps/ref_rejected": -85.67449188232422,
"logps/rejected": -83.5782241821289,
"loss": 1.27,
"margin_dpo/margin_mean": 1.9506279230117798,
"margin_dpo/margin_std": 5.051035404205322,
"step": 194
},
{
"epoch": 0.2947845804988662,
"fcm_dpo/beta": 0.2124922275543213,
"fcm_dpo/delta": -0.01660079136490822,
"fcm_dpo/margin": 2.89408540725708,
"fcm_dpo/q_t": 0.3722650408744812,
"grad_norm": 33.66180419921875,
"learning_rate": 4.4569318740967043e-07,
"logits/chosen": 1.3596398830413818,
"logits/rejected": 1.3858997821807861,
"logps/chosen": -85.43913269042969,
"logps/ref_chosen": -89.43354797363281,
"logps/ref_rejected": -91.25908660888672,
"logps/rejected": -90.15875244140625,
"loss": 1.0395,
"margin_dpo/margin_mean": 2.8940858840942383,
"margin_dpo/margin_std": 4.30559778213501,
"step": 195
},
{
"epoch": 0.2962962962962963,
"fcm_dpo/beta": 0.21977734565734863,
"fcm_dpo/delta": 0.13098813593387604,
"fcm_dpo/margin": 2.147923469543457,
"fcm_dpo/q_t": 0.40691670775413513,
"grad_norm": 36.275367736816406,
"learning_rate": 4.448676271745197e-07,
"logits/chosen": 1.6476595401763916,
"logits/rejected": 1.5382771492004395,
"logps/chosen": -70.8466796875,
"logps/ref_chosen": -75.47528839111328,
"logps/ref_rejected": -99.37582397460938,
"logps/rejected": -96.8951416015625,
"loss": 1.2002,
"margin_dpo/margin_mean": 2.1479239463806152,
"margin_dpo/margin_std": 4.784689903259277,
"step": 196
},
{
"epoch": 0.29780801209372637,
"fcm_dpo/beta": 0.21895651519298553,
"fcm_dpo/delta": -0.07098434120416641,
"fcm_dpo/margin": 3.0373172760009766,
"fcm_dpo/q_t": 0.3720216751098633,
"grad_norm": 35.873863220214844,
"learning_rate": 4.440366160729392e-07,
"logits/chosen": 2.085923671722412,
"logits/rejected": 1.9330120086669922,
"logps/chosen": -62.252044677734375,
"logps/ref_chosen": -67.57392883300781,
"logps/ref_rejected": -89.97993469238281,
"logps/rejected": -87.69536590576172,
"loss": 1.1481,
"margin_dpo/margin_mean": 3.0373175144195557,
"margin_dpo/margin_std": 5.603744029998779,
"step": 197
},
{
"epoch": 0.29931972789115646,
"fcm_dpo/beta": 0.21519571542739868,
"fcm_dpo/delta": -0.08675536513328552,
"fcm_dpo/margin": 3.1599316596984863,
"fcm_dpo/q_t": 0.3605668544769287,
"grad_norm": 31.00609016418457,
"learning_rate": 4.432001773500957e-07,
"logits/chosen": 1.671442985534668,
"logits/rejected": 1.5708034038543701,
"logps/chosen": -72.02076721191406,
"logps/ref_chosen": -77.36013793945312,
"logps/ref_rejected": -90.55670166015625,
"logps/rejected": -88.37725830078125,
"loss": 1.0148,
"margin_dpo/margin_mean": 3.1599321365356445,
"margin_dpo/margin_std": 4.4009599685668945,
"step": 198
},
{
"epoch": 0.30083144368858655,
"fcm_dpo/beta": 0.21355471014976501,
"fcm_dpo/delta": 0.0302957221865654,
"fcm_dpo/margin": 2.675426483154297,
"fcm_dpo/q_t": 0.38793349266052246,
"grad_norm": 36.102745056152344,
"learning_rate": 4.4235833440297856e-07,
"logits/chosen": 1.812955379486084,
"logits/rejected": 1.5753262042999268,
"logps/chosen": -68.58213806152344,
"logps/ref_chosen": -73.05004119873047,
"logps/ref_rejected": -95.21923065185547,
"logps/rejected": -93.42674255371094,
"loss": 1.1725,
"margin_dpo/margin_mean": 2.6754274368286133,
"margin_dpo/margin_std": 5.355816841125488,
"step": 199
},
{
"epoch": 0.30234315948601664,
"fcm_dpo/beta": 0.20989277958869934,
"fcm_dpo/delta": -0.04891178011894226,
"fcm_dpo/margin": 3.0649447441101074,
"fcm_dpo/q_t": 0.37643855810165405,
"grad_norm": 35.946041107177734,
"learning_rate": 4.415111107797445e-07,
"logits/chosen": 1.7141060829162598,
"logits/rejected": 1.4877715110778809,
"logps/chosen": -68.55535888671875,
"logps/ref_chosen": -73.75833129882812,
"logps/ref_rejected": -105.00157165527344,
"logps/rejected": -102.86354064941406,
"loss": 1.1388,
"margin_dpo/margin_mean": 3.0649447441101074,
"margin_dpo/margin_std": 5.666423797607422,
"step": 200
},
{
"epoch": 0.30234315948601664,
"eval_fcm_dpo/beta": 0.21149146556854248,
"eval_logits/chosen": 1.6652804613113403,
"eval_logits/rejected": 1.545433521270752,
"eval_logps/chosen": -81.7110595703125,
"eval_logps/ref_chosen": -86.90177917480469,
"eval_logps/ref_rejected": -96.69639587402344,
"eval_logps/rejected": -94.4600830078125,
"eval_loss": 0.5426459908485413,
"eval_margin_dpo/margin_mean": 2.9544265270233154,
"eval_margin_dpo/margin_std": 4.917666912078857,
"eval_runtime": 42.298,
"eval_samples_per_second": 54.447,
"eval_steps_per_second": 1.702,
"step": 200
},
{
"epoch": 0.30385487528344673,
"fcm_dpo/beta": 0.20943114161491394,
"fcm_dpo/delta": -0.04685019701719284,
"fcm_dpo/margin": 3.072869300842285,
"fcm_dpo/q_t": 0.37180715799331665,
"grad_norm": 35.43907165527344,
"learning_rate": 4.4065853017905953e-07,
"logits/chosen": 1.7999753952026367,
"logits/rejected": 1.6574466228485107,
"logps/chosen": -74.76203918457031,
"logps/ref_chosen": -79.4841079711914,
"logps/ref_rejected": -100.94435119628906,
"logps/rejected": -99.29515838623047,
"loss": 1.0374,
"margin_dpo/margin_mean": 3.0728700160980225,
"margin_dpo/margin_std": 4.596537113189697,
"step": 201
},
{
"epoch": 0.30536659108087677,
"fcm_dpo/beta": 0.21286721527576447,
"fcm_dpo/delta": 0.031130120158195496,
"fcm_dpo/margin": 2.667759895324707,
"fcm_dpo/q_t": 0.38877129554748535,
"grad_norm": 36.30960464477539,
"learning_rate": 4.3980061644943575e-07,
"logits/chosen": 1.692112922668457,
"logits/rejected": 1.505289077758789,
"logps/chosen": -61.41923904418945,
"logps/ref_chosen": -66.83952331542969,
"logps/ref_rejected": -93.05116271972656,
"logps/rejected": -90.29863739013672,
"loss": 1.1288,
"margin_dpo/margin_mean": 2.667759656906128,
"margin_dpo/margin_std": 4.909058570861816,
"step": 202
},
{
"epoch": 0.30687830687830686,
"fcm_dpo/beta": 0.20987369120121002,
"fcm_dpo/delta": -0.04730100557208061,
"fcm_dpo/margin": 3.0696792602539062,
"fcm_dpo/q_t": 0.37272530794143677,
"grad_norm": 35.791664123535156,
"learning_rate": 4.3893739358856455e-07,
"logits/chosen": 1.5090558528900146,
"logits/rejected": 1.256967306137085,
"logps/chosen": -75.16111755371094,
"logps/ref_chosen": -80.32998657226562,
"logps/ref_rejected": -113.52803039550781,
"logps/rejected": -111.4288330078125,
"loss": 1.0387,
"margin_dpo/margin_mean": 3.069678783416748,
"margin_dpo/margin_std": 4.734375476837158,
"step": 203
},
{
"epoch": 0.30839002267573695,
"fcm_dpo/beta": 0.2047777771949768,
"fcm_dpo/delta": -0.04267580807209015,
"fcm_dpo/margin": 3.1055994033813477,
"fcm_dpo/q_t": 0.37084197998046875,
"grad_norm": 28.791095733642578,
"learning_rate": 4.380688857426449e-07,
"logits/chosen": 1.7406399250030518,
"logits/rejected": 1.5678801536560059,
"logps/chosen": -61.10273742675781,
"logps/ref_chosen": -66.68875885009766,
"logps/ref_rejected": -85.07585906982422,
"logps/rejected": -82.59544372558594,
"loss": 1.0277,
"margin_dpo/margin_mean": 3.1055989265441895,
"margin_dpo/margin_std": 4.362913131713867,
"step": 204
},
{
"epoch": 0.30990173847316704,
"fcm_dpo/beta": 0.20836231112480164,
"fcm_dpo/delta": 0.035259123891592026,
"fcm_dpo/margin": 2.719717502593994,
"fcm_dpo/q_t": 0.39517733454704285,
"grad_norm": 38.341949462890625,
"learning_rate": 4.3719511720570814e-07,
"logits/chosen": 1.7649842500686646,
"logits/rejected": 1.6390607357025146,
"logps/chosen": -81.71599578857422,
"logps/ref_chosen": -86.51950073242188,
"logps/ref_rejected": -112.55376434326172,
"logps/rejected": -110.469970703125,
"loss": 1.1568,
"margin_dpo/margin_mean": 2.719717502593994,
"margin_dpo/margin_std": 5.38202428817749,
"step": 205
},
{
"epoch": 0.31141345427059713,
"fcm_dpo/beta": 0.20770695805549622,
"fcm_dpo/delta": -0.03976750001311302,
"fcm_dpo/margin": 1.8963961601257324,
"fcm_dpo/q_t": 0.42589449882507324,
"grad_norm": 38.55003356933594,
"learning_rate": 4.363161124189387e-07,
"logits/chosen": 1.9415085315704346,
"logits/rejected": 1.884864091873169,
"logps/chosen": -83.91799926757812,
"logps/ref_chosen": -88.68557739257812,
"logps/ref_rejected": -97.75945281982422,
"logps/rejected": -94.88827514648438,
"loss": 1.2642,
"margin_dpo/margin_mean": 1.896395206451416,
"margin_dpo/margin_std": 4.907966613769531,
"step": 206
},
{
"epoch": 0.3129251700680272,
"fcm_dpo/beta": 0.20486952364444733,
"fcm_dpo/delta": -0.038567088544368744,
"fcm_dpo/margin": 3.1027143001556396,
"fcm_dpo/q_t": 0.37806349992752075,
"grad_norm": 34.48740768432617,
"learning_rate": 4.3543189596998986e-07,
"logits/chosen": 1.5088391304016113,
"logits/rejected": 1.2929071187973022,
"logps/chosen": -81.11598205566406,
"logps/ref_chosen": -85.12134552001953,
"logps/ref_rejected": -103.34955596923828,
"logps/rejected": -102.4468994140625,
"loss": 1.0748,
"margin_dpo/margin_mean": 3.1027140617370605,
"margin_dpo/margin_std": 5.350442886352539,
"step": 207
},
{
"epoch": 0.3144368858654573,
"fcm_dpo/beta": 0.21422894299030304,
"fcm_dpo/delta": 0.26585763692855835,
"fcm_dpo/margin": 1.5979573726654053,
"fcm_dpo/q_t": 0.4325675666332245,
"grad_norm": 38.53110122680664,
"learning_rate": 4.3454249259229664e-07,
"logits/chosen": 1.486476182937622,
"logits/rejected": 1.47090482711792,
"logps/chosen": -73.46044921875,
"logps/ref_chosen": -78.84121704101562,
"logps/ref_rejected": -89.82504272460938,
"logps/rejected": -86.04224395751953,
"loss": 1.3135,
"margin_dpo/margin_mean": 1.5979571342468262,
"margin_dpo/margin_std": 4.840599060058594,
"step": 208
},
{
"epoch": 0.31594860166288735,
"fcm_dpo/beta": 0.2104741930961609,
"fcm_dpo/delta": -0.22462649643421173,
"fcm_dpo/margin": 3.8344333171844482,
"fcm_dpo/q_t": 0.34945350885391235,
"grad_norm": 33.83891677856445,
"learning_rate": 4.336479271643833e-07,
"logits/chosen": 1.7296254634857178,
"logits/rejected": 1.6433987617492676,
"logps/chosen": -80.73587799072266,
"logps/ref_chosen": -85.98588562011719,
"logps/ref_rejected": -107.1638412475586,
"logps/rejected": -105.74826049804688,
"loss": 0.9947,
"margin_dpo/margin_mean": 3.834433078765869,
"margin_dpo/margin_std": 5.514036178588867,
"step": 209
},
{
"epoch": 0.31746031746031744,
"fcm_dpo/beta": 0.2026514708995819,
"fcm_dpo/delta": -0.18983915448188782,
"fcm_dpo/margin": 3.826395034790039,
"fcm_dpo/q_t": 0.34678876399993896,
"grad_norm": 29.725292205810547,
"learning_rate": 4.327482247091679e-07,
"logits/chosen": 1.8481976985931396,
"logits/rejected": 1.6151676177978516,
"logps/chosen": -66.71804809570312,
"logps/ref_chosen": -71.75653076171875,
"logps/ref_rejected": -102.47966003417969,
"logps/rejected": -101.26757049560547,
"loss": 0.9828,
"margin_dpo/margin_mean": 3.826395034790039,
"margin_dpo/margin_std": 5.108582496643066,
"step": 210
},
{
"epoch": 0.31897203325774753,
"fcm_dpo/beta": 0.2011139690876007,
"fcm_dpo/delta": 0.019685715436935425,
"fcm_dpo/margin": 2.8868653774261475,
"fcm_dpo/q_t": 0.38376328349113464,
"grad_norm": 33.415435791015625,
"learning_rate": 4.3184341039326217e-07,
"logits/chosen": 1.8024325370788574,
"logits/rejected": 1.5747759342193604,
"logps/chosen": -66.08028411865234,
"logps/ref_chosen": -70.95170593261719,
"logps/ref_rejected": -108.51902770996094,
"logps/rejected": -106.53446960449219,
"loss": 1.0831,
"margin_dpo/margin_mean": 2.8868651390075684,
"margin_dpo/margin_std": 4.80080509185791,
"step": 211
},
{
"epoch": 0.3204837490551776,
"fcm_dpo/beta": 0.1954108476638794,
"fcm_dpo/delta": -0.1490822583436966,
"fcm_dpo/margin": 3.775815486907959,
"fcm_dpo/q_t": 0.35547029972076416,
"grad_norm": 30.241546630859375,
"learning_rate": 4.309335095262675e-07,
"logits/chosen": 1.6221773624420166,
"logits/rejected": 1.4874701499938965,
"logps/chosen": -69.07429504394531,
"logps/ref_chosen": -74.34010314941406,
"logps/ref_rejected": -97.58259582519531,
"logps/rejected": -96.09260559082031,
"loss": 1.0283,
"margin_dpo/margin_mean": 3.7758164405822754,
"margin_dpo/margin_std": 5.671681880950928,
"step": 212
},
{
"epoch": 0.3219954648526077,
"fcm_dpo/beta": 0.193147212266922,
"fcm_dpo/delta": -0.013596100732684135,
"fcm_dpo/margin": 3.1711630821228027,
"fcm_dpo/q_t": 0.3848028779029846,
"grad_norm": 32.83842468261719,
"learning_rate": 4.3001854756006724e-07,
"logits/chosen": 1.5679874420166016,
"logits/rejected": 1.579345703125,
"logps/chosen": -74.82106018066406,
"logps/ref_chosen": -80.2526626586914,
"logps/ref_rejected": -94.76947021484375,
"logps/rejected": -92.509033203125,
"loss": 1.1348,
"margin_dpo/margin_mean": 3.171164035797119,
"margin_dpo/margin_std": 5.987710475921631,
"step": 213
},
{
"epoch": 0.3235071806500378,
"fcm_dpo/beta": 0.1943490356206894,
"fcm_dpo/delta": -0.005742315202951431,
"fcm_dpo/margin": 3.1131887435913086,
"fcm_dpo/q_t": 0.3851265609264374,
"grad_norm": 33.79700469970703,
"learning_rate": 4.290985500881143e-07,
"logits/chosen": 1.2866859436035156,
"logits/rejected": 1.23015296459198,
"logps/chosen": -72.68575286865234,
"logps/ref_chosen": -77.9675064086914,
"logps/ref_rejected": -84.0354232788086,
"logps/rejected": -81.86685943603516,
"loss": 1.0903,
"margin_dpo/margin_mean": 3.1131887435913086,
"margin_dpo/margin_std": 5.462100028991699,
"step": 214
},
{
"epoch": 0.3250188964474679,
"fcm_dpo/beta": 0.19066134095191956,
"fcm_dpo/delta": -0.197215735912323,
"fcm_dpo/margin": 4.093410968780518,
"fcm_dpo/q_t": 0.34744980931282043,
"grad_norm": 29.585453033447266,
"learning_rate": 4.281735428447157e-07,
"logits/chosen": 1.447283148765564,
"logits/rejected": 1.229423999786377,
"logps/chosen": -76.76492309570312,
"logps/ref_chosen": -81.2047348022461,
"logps/ref_rejected": -116.18414306640625,
"logps/rejected": -115.83775329589844,
"loss": 0.968,
"margin_dpo/margin_mean": 4.093410491943359,
"margin_dpo/margin_std": 5.116345405578613,
"step": 215
},
{
"epoch": 0.32653061224489793,
"fcm_dpo/beta": 0.18577197194099426,
"fcm_dpo/delta": -0.024033507332205772,
"fcm_dpo/margin": 3.35071063041687,
"fcm_dpo/q_t": 0.3735596537590027,
"grad_norm": 28.57142448425293,
"learning_rate": 4.2724355170431247e-07,
"logits/chosen": 1.929776668548584,
"logits/rejected": 1.7076435089111328,
"logps/chosen": -79.36236572265625,
"logps/ref_chosen": -83.57113647460938,
"logps/ref_rejected": -112.51902770996094,
"logps/rejected": -111.66097259521484,
"loss": 1.0691,
"margin_dpo/margin_mean": 3.35071063041687,
"margin_dpo/margin_std": 5.3623809814453125,
"step": 216
},
{
"epoch": 0.328042328042328,
"fcm_dpo/beta": 0.18335673213005066,
"fcm_dpo/delta": -0.05656471848487854,
"fcm_dpo/margin": 3.5603256225585938,
"fcm_dpo/q_t": 0.37649548053741455,
"grad_norm": 30.940200805664062,
"learning_rate": 4.26308602680756e-07,
"logits/chosen": 1.5567448139190674,
"logits/rejected": 1.3418993949890137,
"logps/chosen": -73.3405532836914,
"logps/ref_chosen": -77.01390075683594,
"logps/ref_rejected": -105.28099822998047,
"logps/rejected": -105.16797637939453,
"loss": 1.0697,
"margin_dpo/margin_mean": 3.5603256225585938,
"margin_dpo/margin_std": 5.963696479797363,
"step": 217
},
{
"epoch": 0.3295540438397581,
"fcm_dpo/beta": 0.18211647868156433,
"fcm_dpo/delta": -0.04763578251004219,
"fcm_dpo/margin": 2.4268999099731445,
"fcm_dpo/q_t": 0.4111635684967041,
"grad_norm": 32.71650695800781,
"learning_rate": 4.253687219265803e-07,
"logits/chosen": 1.4811632633209229,
"logits/rejected": 1.4756027460098267,
"logps/chosen": -88.89375305175781,
"logps/ref_chosen": -92.47299194335938,
"logps/ref_rejected": -92.80751037597656,
"logps/rejected": -91.65516662597656,
"loss": 1.2178,
"margin_dpo/margin_mean": 2.4268996715545654,
"margin_dpo/margin_std": 5.39958381652832,
"step": 218
},
{
"epoch": 0.3310657596371882,
"fcm_dpo/beta": 0.18171855807304382,
"fcm_dpo/delta": 0.020053904503583908,
"fcm_dpo/margin": 3.1978893280029297,
"fcm_dpo/q_t": 0.37985914945602417,
"grad_norm": 26.802236557006836,
"learning_rate": 4.2442393573227043e-07,
"logits/chosen": 1.4625592231750488,
"logits/rejected": 1.3691678047180176,
"logps/chosen": -72.9026107788086,
"logps/ref_chosen": -77.10382080078125,
"logps/ref_rejected": -92.3438949584961,
"logps/rejected": -91.340576171875,
"loss": 1.0694,
"margin_dpo/margin_mean": 3.1978893280029297,
"margin_dpo/margin_std": 5.0218329429626465,
"step": 219
},
{
"epoch": 0.3325774754346183,
"fcm_dpo/beta": 0.18513716757297516,
"fcm_dpo/delta": 0.10643023252487183,
"fcm_dpo/margin": 2.6936841011047363,
"fcm_dpo/q_t": 0.4018252193927765,
"grad_norm": 28.29593276977539,
"learning_rate": 4.234742705255272e-07,
"logits/chosen": 1.977401614189148,
"logits/rejected": 1.7988307476043701,
"logps/chosen": -57.971031188964844,
"logps/ref_chosen": -62.48021697998047,
"logps/ref_rejected": -86.93276977539062,
"logps/rejected": -85.11727905273438,
"loss": 1.1588,
"margin_dpo/margin_mean": 2.6936841011047363,
"margin_dpo/margin_std": 5.318660736083984,
"step": 220
},
{
"epoch": 0.3340891912320484,
"fcm_dpo/beta": 0.18827053904533386,
"fcm_dpo/delta": 0.02966354787349701,
"fcm_dpo/margin": 3.030022621154785,
"fcm_dpo/q_t": 0.3889802098274231,
"grad_norm": 30.431005477905273,
"learning_rate": 4.22519752870528e-07,
"logits/chosen": 1.6791030168533325,
"logits/rejected": 1.4852240085601807,
"logps/chosen": -73.93135070800781,
"logps/ref_chosen": -78.35491943359375,
"logps/ref_rejected": -108.17631530761719,
"logps/rejected": -106.78276062011719,
"loss": 1.1445,
"margin_dpo/margin_mean": 3.0300216674804688,
"margin_dpo/margin_std": 5.741293907165527,
"step": 221
},
{
"epoch": 0.3356009070294785,
"fcm_dpo/beta": 0.18445804715156555,
"fcm_dpo/delta": -0.12642233073711395,
"fcm_dpo/margin": 3.889547824859619,
"fcm_dpo/q_t": 0.35349398851394653,
"grad_norm": 31.85706901550293,
"learning_rate": 4.2156040946718343e-07,
"logits/chosen": 1.816218614578247,
"logits/rejected": 1.5741527080535889,
"logps/chosen": -72.87997436523438,
"logps/ref_chosen": -77.2734375,
"logps/ref_rejected": -126.41007995605469,
"logps/rejected": -125.90616607666016,
"loss": 0.9677,
"margin_dpo/margin_mean": 3.889547824859619,
"margin_dpo/margin_std": 4.93218994140625,
"step": 222
},
{
"epoch": 0.3371126228269085,
"fcm_dpo/beta": 0.17852336168289185,
"fcm_dpo/delta": -0.14979343116283417,
"fcm_dpo/margin": 4.138674736022949,
"fcm_dpo/q_t": 0.34866607189178467,
"grad_norm": 25.545076370239258,
"learning_rate": 4.2059626715039065e-07,
"logits/chosen": 1.600868582725525,
"logits/rejected": 1.468353271484375,
"logps/chosen": -73.9285888671875,
"logps/ref_chosen": -78.4210205078125,
"logps/ref_rejected": -101.38420867919922,
"logps/rejected": -101.03044128417969,
"loss": 0.9532,
"margin_dpo/margin_mean": 4.138675212860107,
"margin_dpo/margin_std": 5.194621562957764,
"step": 223
},
{
"epoch": 0.3386243386243386,
"fcm_dpo/beta": 0.17954044044017792,
"fcm_dpo/delta": 0.09770029783248901,
"fcm_dpo/margin": 2.82529878616333,
"fcm_dpo/q_t": 0.3936260938644409,
"grad_norm": 29.668907165527344,
"learning_rate": 4.1962735288928304e-07,
"logits/chosen": 1.5408947467803955,
"logits/rejected": 1.482640266418457,
"logps/chosen": -75.03809356689453,
"logps/ref_chosen": -79.36337280273438,
"logps/ref_rejected": -89.99789428710938,
"logps/rejected": -88.4979248046875,
"loss": 1.0829,
"margin_dpo/margin_mean": 2.825299024581909,
"margin_dpo/margin_std": 4.494820594787598,
"step": 224
},
{
"epoch": 0.3401360544217687,
"fcm_dpo/beta": 0.17720326781272888,
"fcm_dpo/delta": -0.056610286235809326,
"fcm_dpo/margin": 3.671389102935791,
"fcm_dpo/q_t": 0.3757237493991852,
"grad_norm": 35.15827178955078,
"learning_rate": 4.186536937864752e-07,
"logits/chosen": 1.5901364088058472,
"logits/rejected": 1.3371665477752686,
"logps/chosen": -85.0064926147461,
"logps/ref_chosen": -88.99606323242188,
"logps/ref_rejected": -127.55032348632812,
"logps/rejected": -127.23213195800781,
"loss": 1.0867,
"margin_dpo/margin_mean": 3.671389579772949,
"margin_dpo/margin_std": 6.1652045249938965,
"step": 225
},
{
"epoch": 0.3416477702191988,
"fcm_dpo/beta": 0.17827850580215454,
"fcm_dpo/delta": 0.0006794985383749008,
"fcm_dpo/margin": 3.361865520477295,
"fcm_dpo/q_t": 0.37807855010032654,
"grad_norm": 24.475175857543945,
"learning_rate": 4.176753170773052e-07,
"logits/chosen": 1.7302067279815674,
"logits/rejected": 1.627681851387024,
"logps/chosen": -63.755279541015625,
"logps/ref_chosen": -68.68444061279297,
"logps/ref_rejected": -85.81898498535156,
"logps/rejected": -84.2516860961914,
"loss": 1.0853,
"margin_dpo/margin_mean": 3.361865520477295,
"margin_dpo/margin_std": 5.56801700592041,
"step": 226
},
{
"epoch": 0.3431594860166289,
"fcm_dpo/beta": 0.17725418508052826,
"fcm_dpo/delta": -0.0032851658761501312,
"fcm_dpo/margin": 3.3981308937072754,
"fcm_dpo/q_t": 0.38432079553604126,
"grad_norm": 30.37522315979004,
"learning_rate": 4.166922501290729e-07,
"logits/chosen": 2.125030755996704,
"logits/rejected": 2.0093131065368652,
"logps/chosen": -68.0997314453125,
"logps/ref_chosen": -72.52029418945312,
"logps/ref_rejected": -90.7720718383789,
"logps/rejected": -89.7496337890625,
"loss": 1.1434,
"margin_dpo/margin_mean": 3.3981308937072754,
"margin_dpo/margin_std": 6.397700786590576,
"step": 227
},
{
"epoch": 0.34467120181405897,
"fcm_dpo/beta": 0.18030789494514465,
"fcm_dpo/delta": 0.051092106848955154,
"fcm_dpo/margin": 3.057624340057373,
"fcm_dpo/q_t": 0.38950926065444946,
"grad_norm": 27.90323829650879,
"learning_rate": 4.1570452044027405e-07,
"logits/chosen": 1.6655057668685913,
"logits/rejected": 1.5038576126098633,
"logps/chosen": -67.99946594238281,
"logps/ref_chosen": -72.23167419433594,
"logps/ref_rejected": -95.45873260498047,
"logps/rejected": -94.28414154052734,
"loss": 1.1318,
"margin_dpo/margin_mean": 3.0576257705688477,
"margin_dpo/margin_std": 5.6507768630981445,
"step": 228
},
{
"epoch": 0.34618291761148906,
"fcm_dpo/beta": 0.1783168613910675,
"fcm_dpo/delta": -0.05871783569455147,
"fcm_dpo/margin": 3.6718177795410156,
"fcm_dpo/q_t": 0.3637602925300598,
"grad_norm": 26.114145278930664,
"learning_rate": 4.147121556398312e-07,
"logits/chosen": 2.333543300628662,
"logits/rejected": 2.095942735671997,
"logps/chosen": -61.12850570678711,
"logps/ref_chosen": -66.88822174072266,
"logps/ref_rejected": -92.27890014648438,
"logps/rejected": -90.19100189208984,
"loss": 1.0181,
"margin_dpo/margin_mean": 3.6718177795410156,
"margin_dpo/margin_std": 5.082923889160156,
"step": 229
},
{
"epoch": 0.3476946334089191,
"fcm_dpo/beta": 0.18222709000110626,
"fcm_dpo/delta": 0.08974069356918335,
"fcm_dpo/margin": 2.8103251457214355,
"fcm_dpo/q_t": 0.39406752586364746,
"grad_norm": 32.12981033325195,
"learning_rate": 4.137151834863213e-07,
"logits/chosen": 1.4846326112747192,
"logits/rejected": 1.4764800071716309,
"logps/chosen": -71.39359283447266,
"logps/ref_chosen": -76.12332153320312,
"logps/ref_rejected": -78.19171905517578,
"logps/rejected": -76.27232360839844,
"loss": 1.1493,
"margin_dpo/margin_mean": 2.810325860977173,
"margin_dpo/margin_std": 5.281257629394531,
"step": 230
},
{
"epoch": 0.3492063492063492,
"fcm_dpo/beta": 0.17589285969734192,
"fcm_dpo/delta": -0.14354144036769867,
"fcm_dpo/margin": 4.152382850646973,
"fcm_dpo/q_t": 0.3555976152420044,
"grad_norm": 30.2910213470459,
"learning_rate": 4.1271363186719835e-07,
"logits/chosen": 1.2955679893493652,
"logits/rejected": 1.2993032932281494,
"logps/chosen": -88.24240112304688,
"logps/ref_chosen": -92.45181274414062,
"logps/ref_rejected": -100.89735412597656,
"logps/rejected": -100.84032440185547,
"loss": 1.0081,
"margin_dpo/margin_mean": 4.152383327484131,
"margin_dpo/margin_std": 5.8546953201293945,
"step": 231
},
{
"epoch": 0.3507180650037793,
"fcm_dpo/beta": 0.1787184178829193,
"fcm_dpo/delta": 0.10779862105846405,
"fcm_dpo/margin": 2.7846970558166504,
"fcm_dpo/q_t": 0.40889689326286316,
"grad_norm": 100.12737274169922,
"learning_rate": 4.1170752879801436e-07,
"logits/chosen": 1.4190423488616943,
"logits/rejected": 1.359769582748413,
"logps/chosen": -82.397705078125,
"logps/ref_chosen": -86.75383758544922,
"logps/ref_rejected": -98.16909790039062,
"logps/rejected": -96.59767150878906,
"loss": 1.2198,
"margin_dpo/margin_mean": 2.7846975326538086,
"margin_dpo/margin_std": 6.417873382568359,
"step": 232
},
{
"epoch": 0.35222978080120937,
"fcm_dpo/beta": 0.1750573068857193,
"fcm_dpo/delta": -0.13857866823673248,
"fcm_dpo/margin": 3.1586740016937256,
"fcm_dpo/q_t": 0.39157694578170776,
"grad_norm": 27.844707489013672,
"learning_rate": 4.106969024216348e-07,
"logits/chosen": 1.5601751804351807,
"logits/rejected": 1.4629740715026855,
"logps/chosen": -68.05751037597656,
"logps/ref_chosen": -72.87556457519531,
"logps/ref_rejected": -85.22943115234375,
"logps/rejected": -83.57006072998047,
"loss": 1.1361,
"margin_dpo/margin_mean": 3.1586740016937256,
"margin_dpo/margin_std": 5.672341823577881,
"step": 233
},
{
"epoch": 0.35374149659863946,
"fcm_dpo/beta": 0.17893540859222412,
"fcm_dpo/delta": 0.08269474655389786,
"fcm_dpo/margin": 2.900754928588867,
"fcm_dpo/q_t": 0.3984100818634033,
"grad_norm": 27.9986572265625,
"learning_rate": 4.09681781007452e-07,
"logits/chosen": 1.29150390625,
"logits/rejected": 1.2452890872955322,
"logps/chosen": -65.02719116210938,
"logps/ref_chosen": -70.05477905273438,
"logps/ref_rejected": -68.7240982055664,
"logps/rejected": -66.59725952148438,
"loss": 1.1605,
"margin_dpo/margin_mean": 2.900754928588867,
"margin_dpo/margin_std": 5.591994285583496,
"step": 234
},
{
"epoch": 0.35525321239606955,
"fcm_dpo/beta": 0.17522048950195312,
"fcm_dpo/delta": -0.09907099604606628,
"fcm_dpo/margin": 3.9499454498291016,
"fcm_dpo/q_t": 0.3530880808830261,
"grad_norm": 29.631473541259766,
"learning_rate": 4.08662192950594e-07,
"logits/chosen": 1.7008306980133057,
"logits/rejected": 1.671209454536438,
"logps/chosen": -79.98178100585938,
"logps/ref_chosen": -85.86051940917969,
"logps/ref_rejected": -96.14663696289062,
"logps/rejected": -94.21785736083984,
"loss": 0.9612,
"margin_dpo/margin_mean": 3.9499454498291016,
"margin_dpo/margin_std": 4.817038536071777,
"step": 235
},
{
"epoch": 0.35676492819349964,
"fcm_dpo/beta": 0.1744535267353058,
"fcm_dpo/delta": 0.011955919675529003,
"fcm_dpo/margin": 3.374511957168579,
"fcm_dpo/q_t": 0.3849853277206421,
"grad_norm": 30.669490814208984,
"learning_rate": 4.076381667711306e-07,
"logits/chosen": 1.6705328226089478,
"logits/rejected": 1.653887391090393,
"logps/chosen": -85.20165252685547,
"logps/ref_chosen": -89.75252532958984,
"logps/ref_rejected": -99.28534698486328,
"logps/rejected": -98.1089859008789,
"loss": 1.1536,
"margin_dpo/margin_mean": 3.3745126724243164,
"margin_dpo/margin_std": 6.521100997924805,
"step": 236
},
{
"epoch": 0.35827664399092973,
"fcm_dpo/beta": 0.1793350875377655,
"fcm_dpo/delta": 0.18251191079616547,
"fcm_dpo/margin": 2.3729090690612793,
"fcm_dpo/q_t": 0.41561537981033325,
"grad_norm": 34.04213333129883,
"learning_rate": 4.066097311132753e-07,
"logits/chosen": 1.594843864440918,
"logits/rejected": 1.5914448499679565,
"logps/chosen": -87.67994689941406,
"logps/ref_chosen": -92.59001922607422,
"logps/ref_rejected": -101.45584869384766,
"logps/rejected": -98.91868591308594,
"loss": 1.2014,
"margin_dpo/margin_mean": 2.3729095458984375,
"margin_dpo/margin_std": 5.1990275382995605,
"step": 237
},
{
"epoch": 0.35978835978835977,
"fcm_dpo/beta": 0.17949533462524414,
"fcm_dpo/delta": -0.03761757165193558,
"fcm_dpo/margin": 3.535294532775879,
"fcm_dpo/q_t": 0.37026742100715637,
"grad_norm": 28.87019920349121,
"learning_rate": 4.0557691474458414e-07,
"logits/chosen": 1.5214321613311768,
"logits/rejected": 1.505321741104126,
"logps/chosen": -76.80465698242188,
"logps/ref_chosen": -82.2470474243164,
"logps/ref_rejected": -92.59944152832031,
"logps/rejected": -90.69235229492188,
"loss": 1.059,
"margin_dpo/margin_mean": 3.535294532775879,
"margin_dpo/margin_std": 5.499220848083496,
"step": 238
},
{
"epoch": 0.36130007558578986,
"fcm_dpo/beta": 0.18097104132175446,
"fcm_dpo/delta": -0.004759851843118668,
"fcm_dpo/margin": 3.3366780281066895,
"fcm_dpo/q_t": 0.3874804973602295,
"grad_norm": 32.175743103027344,
"learning_rate": 4.045397465551513e-07,
"logits/chosen": 1.8428244590759277,
"logits/rejected": 1.5413553714752197,
"logps/chosen": -70.64430236816406,
"logps/ref_chosen": -75.30878448486328,
"logps/ref_rejected": -131.2318115234375,
"logps/rejected": -129.90402221679688,
"loss": 1.1283,
"margin_dpo/margin_mean": 3.336677074432373,
"margin_dpo/margin_std": 6.204185485839844,
"step": 239
},
{
"epoch": 0.36281179138321995,
"fcm_dpo/beta": 0.17442013323307037,
"fcm_dpo/delta": -0.2626558840274811,
"fcm_dpo/margin": 4.82325553894043,
"fcm_dpo/q_t": 0.3299955725669861,
"grad_norm": 29.32863998413086,
"learning_rate": 4.0349825555680045e-07,
"logits/chosen": 1.6912193298339844,
"logits/rejected": 1.4668397903442383,
"logps/chosen": -65.2503890991211,
"logps/ref_chosen": -70.81785583496094,
"logps/ref_rejected": -98.53778076171875,
"logps/rejected": -97.7935791015625,
"loss": 0.9305,
"margin_dpo/margin_mean": 4.82325553894043,
"margin_dpo/margin_std": 5.806119918823242,
"step": 240
},
{
"epoch": 0.36432350718065004,
"fcm_dpo/beta": 0.1754496693611145,
"fcm_dpo/delta": 0.17503008246421814,
"fcm_dpo/margin": 2.46356201171875,
"fcm_dpo/q_t": 0.4143379032611847,
"grad_norm": 33.23362350463867,
"learning_rate": 4.0245247088227377e-07,
"logits/chosen": 1.521575927734375,
"logits/rejected": 1.4731206893920898,
"logps/chosen": -83.33875274658203,
"logps/ref_chosen": -88.60260772705078,
"logps/ref_rejected": -101.42214965820312,
"logps/rejected": -98.62185668945312,
"loss": 1.1913,
"margin_dpo/margin_mean": 2.463561773300171,
"margin_dpo/margin_std": 5.311173915863037,
"step": 241
},
{
"epoch": 0.36583522297808013,
"fcm_dpo/beta": 0.17090430855751038,
"fcm_dpo/delta": -0.17562828958034515,
"fcm_dpo/margin": 4.446490287780762,
"fcm_dpo/q_t": 0.35152992606163025,
"grad_norm": 24.115978240966797,
"learning_rate": 4.0140242178441665e-07,
"logits/chosen": 1.3632407188415527,
"logits/rejected": 1.2794369459152222,
"logps/chosen": -71.06964111328125,
"logps/ref_chosen": -77.34110260009766,
"logps/ref_rejected": -84.76332092285156,
"logps/rejected": -82.93833923339844,
"loss": 0.9778,
"margin_dpo/margin_mean": 4.446490287780762,
"margin_dpo/margin_std": 6.0980224609375,
"step": 242
},
{
"epoch": 0.3673469387755102,
"fcm_dpo/beta": 0.17107948660850525,
"fcm_dpo/delta": 0.049044981598854065,
"fcm_dpo/margin": 3.235016345977783,
"fcm_dpo/q_t": 0.3897736668586731,
"grad_norm": 32.52568435668945,
"learning_rate": 4.003481376353596e-07,
"logits/chosen": 1.3836629390716553,
"logits/rejected": 1.3904385566711426,
"logps/chosen": -88.62066650390625,
"logps/ref_chosen": -93.55897521972656,
"logps/ref_rejected": -89.33551025390625,
"logps/rejected": -87.63221740722656,
"loss": 1.1273,
"margin_dpo/margin_mean": 3.235015869140625,
"margin_dpo/margin_std": 5.809148788452148,
"step": 243
},
{
"epoch": 0.3688586545729403,
"fcm_dpo/beta": 0.16387493908405304,
"fcm_dpo/delta": -0.33298128843307495,
"fcm_dpo/margin": 5.507326126098633,
"fcm_dpo/q_t": 0.31379491090774536,
"grad_norm": 22.067636489868164,
"learning_rate": 3.9928964792569654e-07,
"logits/chosen": 1.7233500480651855,
"logits/rejected": 1.5986864566802979,
"logps/chosen": -64.07341003417969,
"logps/ref_chosen": -69.82603454589844,
"logps/ref_rejected": -92.4764175415039,
"logps/rejected": -92.23110961914062,
"loss": 0.8217,
"margin_dpo/margin_mean": 5.507327079772949,
"margin_dpo/margin_std": 5.298586368560791,
"step": 244
},
{
"epoch": 0.37037037037037035,
"fcm_dpo/beta": 0.1566339135169983,
"fcm_dpo/delta": -0.18002939224243164,
"fcm_dpo/margin": 4.888873100280762,
"fcm_dpo/q_t": 0.34525537490844727,
"grad_norm": 27.70292854309082,
"learning_rate": 3.982269822636601e-07,
"logits/chosen": 1.7034671306610107,
"logits/rejected": 1.6361165046691895,
"logps/chosen": -81.09748077392578,
"logps/ref_chosen": -85.68216705322266,
"logps/ref_rejected": -93.8754653930664,
"logps/rejected": -94.17964935302734,
"loss": 0.9552,
"margin_dpo/margin_mean": 4.888873100280762,
"margin_dpo/margin_std": 6.235966682434082,
"step": 245
},
{
"epoch": 0.37188208616780044,
"fcm_dpo/beta": 0.1533387005329132,
"fcm_dpo/delta": -0.1032537966966629,
"fcm_dpo/margin": 4.539944171905518,
"fcm_dpo/q_t": 0.3663942813873291,
"grad_norm": 28.609737396240234,
"learning_rate": 3.971601703742932e-07,
"logits/chosen": 1.8069227933883667,
"logits/rejected": 1.666886568069458,
"logps/chosen": -86.67765808105469,
"logps/ref_chosen": -90.05093383789062,
"logps/ref_rejected": -112.77645874023438,
"logps/rejected": -113.94313049316406,
"loss": 1.034,
"margin_dpo/margin_mean": 4.539945602416992,
"margin_dpo/margin_std": 6.906374931335449,
"step": 246
},
{
"epoch": 0.37339380196523053,
"fcm_dpo/beta": 0.15819424390792847,
"fcm_dpo/delta": 0.22064730525016785,
"fcm_dpo/margin": 2.442718744277954,
"fcm_dpo/q_t": 0.42223188281059265,
"grad_norm": 33.092018127441406,
"learning_rate": 3.960892420986177e-07,
"logits/chosen": 1.6968116760253906,
"logits/rejected": 1.646969199180603,
"logps/chosen": -100.31169128417969,
"logps/ref_chosen": -103.23979187011719,
"logps/ref_rejected": -105.26278686523438,
"logps/rejected": -104.77740478515625,
"loss": 1.2204,
"margin_dpo/margin_mean": 2.442718505859375,
"margin_dpo/margin_std": 5.771693706512451,
"step": 247
},
{
"epoch": 0.3749055177626606,
"fcm_dpo/beta": 0.16033056378364563,
"fcm_dpo/delta": -0.012121915817260742,
"fcm_dpo/margin": 3.8047213554382324,
"fcm_dpo/q_t": 0.38406720757484436,
"grad_norm": 31.485231399536133,
"learning_rate": 3.9501422739279953e-07,
"logits/chosen": 1.509279489517212,
"logits/rejected": 1.620393991470337,
"logps/chosen": -84.99481201171875,
"logps/ref_chosen": -88.16007995605469,
"logps/ref_rejected": -75.11514282226562,
"logps/rejected": -75.75459289550781,
"loss": 1.111,
"margin_dpo/margin_mean": 3.804720878601074,
"margin_dpo/margin_std": 6.728028297424316,
"step": 248
},
{
"epoch": 0.3764172335600907,
"fcm_dpo/beta": 0.16030041873455048,
"fcm_dpo/delta": 0.09141341596841812,
"fcm_dpo/margin": 1.8059356212615967,
"fcm_dpo/q_t": 0.4365463852882385,
"grad_norm": 39.32426834106445,
"learning_rate": 3.9393515632731094e-07,
"logits/chosen": 1.5910958051681519,
"logits/rejected": 1.6389684677124023,
"logps/chosen": -89.02435302734375,
"logps/ref_chosen": -91.01773071289062,
"logps/ref_rejected": -80.51113891601562,
"logps/rejected": -80.32369995117188,
"loss": 1.3894,
"margin_dpo/margin_mean": 1.8059358596801758,
"margin_dpo/margin_std": 6.863600730895996,
"step": 249
},
{
"epoch": 0.3779289493575208,
"fcm_dpo/beta": 0.15786859393119812,
"fcm_dpo/delta": -0.2350025177001953,
"fcm_dpo/margin": 5.169163703918457,
"fcm_dpo/q_t": 0.33603614568710327,
"grad_norm": 26.052709579467773,
"learning_rate": 3.9285205908608934e-07,
"logits/chosen": 1.9258992671966553,
"logits/rejected": 1.8700491189956665,
"logps/chosen": -78.1419906616211,
"logps/ref_chosen": -80.5888671875,
"logps/ref_rejected": -90.15093994140625,
"logps/rejected": -92.87322998046875,
"loss": 0.9754,
"margin_dpo/margin_mean": 5.169164657592773,
"margin_dpo/margin_std": 7.048128128051758,
"step": 250
},
{
"epoch": 0.3794406651549509,
"fcm_dpo/beta": 0.15806275606155396,
"fcm_dpo/delta": 0.16336648166179657,
"fcm_dpo/margin": 2.808767318725586,
"fcm_dpo/q_t": 0.4145629405975342,
"grad_norm": 28.555532455444336,
"learning_rate": 3.9176496596569265e-07,
"logits/chosen": 1.852798581123352,
"logits/rejected": 1.7349350452423096,
"logps/chosen": -80.37120056152344,
"logps/ref_chosen": -82.70405578613281,
"logps/ref_rejected": -98.94266510009766,
"logps/rejected": -99.4185791015625,
"loss": 1.1979,
"margin_dpo/margin_mean": 2.8087668418884277,
"margin_dpo/margin_std": 6.254486560821533,
"step": 251
},
{
"epoch": 0.38095238095238093,
"fcm_dpo/beta": 0.16371968388557434,
"fcm_dpo/delta": 0.10678450763225555,
"fcm_dpo/margin": 3.0254108905792236,
"fcm_dpo/q_t": 0.39587944746017456,
"grad_norm": 26.863725662231445,
"learning_rate": 3.9067390737445254e-07,
"logits/chosen": 1.3596301078796387,
"logits/rejected": 1.255225658416748,
"logps/chosen": -70.09324645996094,
"logps/ref_chosen": -73.10369110107422,
"logps/ref_rejected": -94.90235900878906,
"logps/rejected": -94.91732788085938,
"loss": 1.1854,
"margin_dpo/margin_mean": 3.0254111289978027,
"margin_dpo/margin_std": 6.294610977172852,
"step": 252
},
{
"epoch": 0.382464096749811,
"fcm_dpo/beta": 0.16050489246845245,
"fcm_dpo/delta": -0.07427267730236053,
"fcm_dpo/margin": 2.915503978729248,
"fcm_dpo/q_t": 0.4101312458515167,
"grad_norm": 25.899141311645508,
"learning_rate": 3.8957891383162304e-07,
"logits/chosen": 1.8655691146850586,
"logits/rejected": 1.7491161823272705,
"logps/chosen": -66.2563705444336,
"logps/ref_chosen": -68.7789535522461,
"logps/ref_rejected": -75.98162078857422,
"logps/rejected": -76.37454223632812,
"loss": 1.1667,
"margin_dpo/margin_mean": 2.915503978729248,
"margin_dpo/margin_std": 5.7905473709106445,
"step": 253
},
{
"epoch": 0.3839758125472411,
"fcm_dpo/beta": 0.15999382734298706,
"fcm_dpo/delta": 0.009141262620687485,
"fcm_dpo/margin": 3.6941099166870117,
"fcm_dpo/q_t": 0.38594502210617065,
"grad_norm": 27.95285987854004,
"learning_rate": 3.884800159665276e-07,
"logits/chosen": 1.5770944356918335,
"logits/rejected": 1.4472434520721436,
"logps/chosen": -79.24081420898438,
"logps/ref_chosen": -81.49362182617188,
"logps/ref_rejected": -101.43672943115234,
"logps/rejected": -102.8780288696289,
"loss": 1.0868,
"margin_dpo/margin_mean": 3.69411039352417,
"margin_dpo/margin_std": 6.211980819702148,
"step": 254
},
{
"epoch": 0.3854875283446712,
"fcm_dpo/beta": 0.15869814157485962,
"fcm_dpo/delta": -0.039540208876132965,
"fcm_dpo/margin": 4.0058794021606445,
"fcm_dpo/q_t": 0.36750900745391846,
"grad_norm": 28.362796783447266,
"learning_rate": 3.873772445177015e-07,
"logits/chosen": 1.6912145614624023,
"logits/rejected": 1.6456090211868286,
"logps/chosen": -87.90896606445312,
"logps/ref_chosen": -90.46351623535156,
"logps/ref_rejected": -105.32445526123047,
"logps/rejected": -106.7757797241211,
"loss": 1.0774,
"margin_dpo/margin_mean": 4.0058794021606445,
"margin_dpo/margin_std": 6.450137138366699,
"step": 255
},
{
"epoch": 0.3869992441421013,
"fcm_dpo/beta": 0.15828856825828552,
"fcm_dpo/delta": -0.008787527680397034,
"fcm_dpo/margin": 3.8371100425720215,
"fcm_dpo/q_t": 0.38170868158340454,
"grad_norm": 29.053625106811523,
"learning_rate": 3.862706303320329e-07,
"logits/chosen": 1.3574330806732178,
"logits/rejected": 1.1794304847717285,
"logps/chosen": -79.84109497070312,
"logps/ref_chosen": -81.56578063964844,
"logps/ref_rejected": -108.58460998535156,
"logps/rejected": -110.69703674316406,
"loss": 1.096,
"margin_dpo/margin_mean": 3.8371100425720215,
"margin_dpo/margin_std": 6.507552146911621,
"step": 256
},
{
"epoch": 0.3885109599395314,
"fcm_dpo/beta": 0.15910214185714722,
"fcm_dpo/delta": 0.007843798026442528,
"fcm_dpo/margin": 3.7243387699127197,
"fcm_dpo/q_t": 0.3876420557498932,
"grad_norm": 34.42729187011719,
"learning_rate": 3.851602043638994e-07,
"logits/chosen": 1.7406425476074219,
"logits/rejected": 1.6441729068756104,
"logps/chosen": -88.56779479980469,
"logps/ref_chosen": -89.57557678222656,
"logps/ref_rejected": -123.74462127685547,
"logps/rejected": -126.461181640625,
"loss": 1.1465,
"margin_dpo/margin_mean": 3.724337577819824,
"margin_dpo/margin_std": 7.162275314331055,
"step": 257
},
{
"epoch": 0.3900226757369615,
"fcm_dpo/beta": 0.1584070920944214,
"fcm_dpo/delta": -0.07847022265195847,
"fcm_dpo/margin": 4.249208450317383,
"fcm_dpo/q_t": 0.35672086477279663,
"grad_norm": 26.64623260498047,
"learning_rate": 3.840459976743023e-07,
"logits/chosen": 1.6348203420639038,
"logits/rejected": 1.4954969882965088,
"logps/chosen": -76.1566162109375,
"logps/ref_chosen": -77.34173583984375,
"logps/ref_rejected": -99.5709228515625,
"logps/rejected": -102.63500213623047,
"loss": 0.9596,
"margin_dpo/margin_mean": 4.249208927154541,
"margin_dpo/margin_std": 5.020096778869629,
"step": 258
},
{
"epoch": 0.3915343915343915,
"fcm_dpo/beta": 0.14894232153892517,
"fcm_dpo/delta": -0.3066738247871399,
"fcm_dpo/margin": 5.885484218597412,
"fcm_dpo/q_t": 0.3259732127189636,
"grad_norm": 23.831703186035156,
"learning_rate": 3.8292804142999796e-07,
"logits/chosen": 1.3765077590942383,
"logits/rejected": 1.2210215330123901,
"logps/chosen": -79.07723999023438,
"logps/ref_chosen": -82.39556121826172,
"logps/ref_rejected": -113.73309326171875,
"logps/rejected": -116.30026245117188,
"loss": 0.9218,
"margin_dpo/margin_mean": 5.88548469543457,
"margin_dpo/margin_std": 6.962657928466797,
"step": 259
},
{
"epoch": 0.3930461073318216,
"fcm_dpo/beta": 0.14742638170719147,
"fcm_dpo/delta": 0.024370871484279633,
"fcm_dpo/margin": 3.9111266136169434,
"fcm_dpo/q_t": 0.3883991539478302,
"grad_norm": 30.209863662719727,
"learning_rate": 3.818063669026256e-07,
"logits/chosen": 1.6762707233428955,
"logits/rejected": 1.4572563171386719,
"logps/chosen": -64.32984924316406,
"logps/ref_chosen": -65.98947143554688,
"logps/ref_rejected": -94.59706115722656,
"logps/rejected": -96.84855651855469,
"loss": 1.1689,
"margin_dpo/margin_mean": 3.911125659942627,
"margin_dpo/margin_std": 7.731362342834473,
"step": 260
},
{
"epoch": 0.3945578231292517,
"fcm_dpo/beta": 0.15125837922096252,
"fcm_dpo/delta": 0.15498995780944824,
"fcm_dpo/margin": 2.9902381896972656,
"fcm_dpo/q_t": 0.41241246461868286,
"grad_norm": 33.53489303588867,
"learning_rate": 3.806810054678331e-07,
"logits/chosen": 1.4755849838256836,
"logits/rejected": 1.5271053314208984,
"logps/chosen": -86.73929595947266,
"logps/ref_chosen": -88.87684631347656,
"logps/ref_rejected": -82.34838104248047,
"logps/rejected": -83.2010726928711,
"loss": 1.2115,
"margin_dpo/margin_mean": 2.9902381896972656,
"margin_dpo/margin_std": 6.766839981079102,
"step": 261
},
{
"epoch": 0.3960695389266818,
"fcm_dpo/beta": 0.1537524312734604,
"fcm_dpo/delta": -0.01379317045211792,
"fcm_dpo/margin": 3.981868028640747,
"fcm_dpo/q_t": 0.3742169737815857,
"grad_norm": 25.462480545043945,
"learning_rate": 3.7955198860439887e-07,
"logits/chosen": 1.4629535675048828,
"logits/rejected": 1.3017804622650146,
"logps/chosen": -82.5713882446289,
"logps/ref_chosen": -85.81719970703125,
"logps/ref_rejected": -105.49027252197266,
"logps/rejected": -106.22633361816406,
"loss": 1.0262,
"margin_dpo/margin_mean": 3.981867790222168,
"margin_dpo/margin_std": 5.544529438018799,
"step": 262
},
{
"epoch": 0.3975812547241119,
"fcm_dpo/beta": 0.15217387676239014,
"fcm_dpo/delta": -0.018349166959524155,
"fcm_dpo/margin": 4.05593204498291,
"fcm_dpo/q_t": 0.37988507747650146,
"grad_norm": 27.196908950805664,
"learning_rate": 3.784193478933516e-07,
"logits/chosen": 1.6040055751800537,
"logits/rejected": 1.326271891593933,
"logps/chosen": -70.52960205078125,
"logps/ref_chosen": -73.61693572998047,
"logps/ref_rejected": -102.39161682128906,
"logps/rejected": -103.36022186279297,
"loss": 1.0854,
"margin_dpo/margin_mean": 4.055931568145752,
"margin_dpo/margin_std": 6.913456916809082,
"step": 263
},
{
"epoch": 0.39909297052154197,
"fcm_dpo/beta": 0.15037304162979126,
"fcm_dpo/delta": -0.07774695008993149,
"fcm_dpo/margin": 4.472409248352051,
"fcm_dpo/q_t": 0.3686443567276001,
"grad_norm": 26.572608947753906,
"learning_rate": 3.7728311501708674e-07,
"logits/chosen": 1.420727014541626,
"logits/rejected": 1.28560209274292,
"logps/chosen": -99.06858825683594,
"logps/ref_chosen": -101.57856750488281,
"logps/ref_rejected": -111.65735626220703,
"logps/rejected": -113.61978149414062,
"loss": 1.0112,
"margin_dpo/margin_mean": 4.472408294677734,
"margin_dpo/margin_std": 6.513485431671143,
"step": 264
},
{
"epoch": 0.40060468631897206,
"fcm_dpo/beta": 0.14593267440795898,
"fcm_dpo/delta": -0.22615569829940796,
"fcm_dpo/margin": 5.539245128631592,
"fcm_dpo/q_t": 0.3435715138912201,
"grad_norm": 22.93355369567871,
"learning_rate": 3.7614332175848027e-07,
"logits/chosen": 2.0376992225646973,
"logits/rejected": 1.8995461463928223,
"logps/chosen": -61.875823974609375,
"logps/ref_chosen": -65.76426696777344,
"logps/ref_rejected": -85.19627380371094,
"logps/rejected": -86.84707641601562,
"loss": 0.9984,
"margin_dpo/margin_mean": 5.53924560546875,
"margin_dpo/margin_std": 7.727536201477051,
"step": 265
},
{
"epoch": 0.4021164021164021,
"fcm_dpo/beta": 0.1404722034931183,
"fcm_dpo/delta": -0.09249762445688248,
"fcm_dpo/margin": 4.878218650817871,
"fcm_dpo/q_t": 0.3628734350204468,
"grad_norm": 24.192852020263672,
"learning_rate": 3.75e-07,
"logits/chosen": 1.521256446838379,
"logits/rejected": 1.3532524108886719,
"logps/chosen": -70.55802917480469,
"logps/ref_chosen": -75.05682373046875,
"logps/ref_rejected": -97.52758026123047,
"logps/rejected": -97.90699768066406,
"loss": 1.0127,
"margin_dpo/margin_mean": 4.878218650817871,
"margin_dpo/margin_std": 7.106654167175293,
"step": 266
},
{
"epoch": 0.4036281179138322,
"fcm_dpo/beta": 0.14268219470977783,
"fcm_dpo/delta": 0.010663837194442749,
"fcm_dpo/margin": 4.10806941986084,
"fcm_dpo/q_t": 0.3781440854072571,
"grad_norm": 21.69527816772461,
"learning_rate": 3.738531817228131e-07,
"logits/chosen": 1.7143603563308716,
"logits/rejected": 1.6460785865783691,
"logps/chosen": -66.24651336669922,
"logps/ref_chosen": -71.13494110107422,
"logps/ref_rejected": -81.14566040039062,
"logps/rejected": -80.36531066894531,
"loss": 1.0651,
"margin_dpo/margin_mean": 4.10806941986084,
"margin_dpo/margin_std": 6.094596862792969,
"step": 267
},
{
"epoch": 0.4051398337112623,
"fcm_dpo/beta": 0.1408797800540924,
"fcm_dpo/delta": 0.005832695867866278,
"fcm_dpo/margin": 2.7348809242248535,
"fcm_dpo/q_t": 0.4266659617424011,
"grad_norm": 24.759931564331055,
"learning_rate": 3.7270289900589204e-07,
"logits/chosen": 1.4918147325515747,
"logits/rejected": 1.4511568546295166,
"logps/chosen": -76.04736328125,
"logps/ref_chosen": -80.06082153320312,
"logps/ref_rejected": -87.43035888671875,
"logps/rejected": -86.15179443359375,
"loss": 1.2272,
"margin_dpo/margin_mean": 2.7348811626434326,
"margin_dpo/margin_std": 6.584110260009766,
"step": 268
},
{
"epoch": 0.40665154950869237,
"fcm_dpo/beta": 0.1431499421596527,
"fcm_dpo/delta": 0.009387940168380737,
"fcm_dpo/margin": 4.102551460266113,
"fcm_dpo/q_t": 0.3775489330291748,
"grad_norm": 24.901914596557617,
"learning_rate": 3.7154918402511714e-07,
"logits/chosen": 1.9551441669464111,
"logits/rejected": 1.888648271560669,
"logps/chosen": -79.94139099121094,
"logps/ref_chosen": -83.36944580078125,
"logps/ref_rejected": -100.66839599609375,
"logps/rejected": -101.3428955078125,
"loss": 1.0698,
"margin_dpo/margin_mean": 4.102551460266113,
"margin_dpo/margin_std": 6.241857051849365,
"step": 269
},
{
"epoch": 0.40816326530612246,
"fcm_dpo/beta": 0.14368438720703125,
"fcm_dpo/delta": 0.10129906237125397,
"fcm_dpo/margin": 3.5041966438293457,
"fcm_dpo/q_t": 0.3959196209907532,
"grad_norm": 26.79942512512207,
"learning_rate": 3.7039206905237656e-07,
"logits/chosen": 1.8443632125854492,
"logits/rejected": 1.643599510192871,
"logps/chosen": -81.50987243652344,
"logps/ref_chosen": -85.35945129394531,
"logps/ref_rejected": -104.47489929199219,
"logps/rejected": -104.1295166015625,
"loss": 1.1234,
"margin_dpo/margin_mean": 3.5041966438293457,
"margin_dpo/margin_std": 6.335942268371582,
"step": 270
},
{
"epoch": 0.40967498110355255,
"fcm_dpo/beta": 0.1482258141040802,
"fcm_dpo/delta": 0.21132034063339233,
"fcm_dpo/margin": 2.6820480823516846,
"fcm_dpo/q_t": 0.43086880445480347,
"grad_norm": 29.933801651000977,
"learning_rate": 3.692315864546635e-07,
"logits/chosen": 1.7451214790344238,
"logits/rejected": 1.6185526847839355,
"logps/chosen": -82.49104309082031,
"logps/ref_chosen": -86.01373291015625,
"logps/ref_rejected": -109.99561309814453,
"logps/rejected": -109.15498352050781,
"loss": 1.2796,
"margin_dpo/margin_mean": 2.6820485591888428,
"margin_dpo/margin_std": 7.379421710968018,
"step": 271
},
{
"epoch": 0.41118669690098264,
"fcm_dpo/beta": 0.1463513821363449,
"fcm_dpo/delta": -0.1709228903055191,
"fcm_dpo/margin": 5.177058219909668,
"fcm_dpo/q_t": 0.3447331190109253,
"grad_norm": 22.9885311126709,
"learning_rate": 3.6806776869317067e-07,
"logits/chosen": 1.8490626811981201,
"logits/rejected": 1.8614492416381836,
"logps/chosen": -81.42835998535156,
"logps/ref_chosen": -86.37013244628906,
"logps/ref_rejected": -85.74638366699219,
"logps/rejected": -85.98165893554688,
"loss": 0.9331,
"margin_dpo/margin_mean": 5.177058219909668,
"margin_dpo/margin_std": 6.183077812194824,
"step": 272
},
{
"epoch": 0.4126984126984127,
"fcm_dpo/beta": 0.14383597671985626,
"fcm_dpo/delta": -0.0414503775537014,
"fcm_dpo/margin": 4.438119888305664,
"fcm_dpo/q_t": 0.36971086263656616,
"grad_norm": 25.183574676513672,
"learning_rate": 3.669006483223828e-07,
"logits/chosen": 1.6745984554290771,
"logits/rejected": 1.5456163883209229,
"logps/chosen": -71.47685241699219,
"logps/ref_chosen": -75.51087951660156,
"logps/ref_rejected": -101.60345458984375,
"logps/rejected": -102.0075454711914,
"loss": 1.0868,
"margin_dpo/margin_mean": 4.438118934631348,
"margin_dpo/margin_std": 7.2522125244140625,
"step": 273
},
{
"epoch": 0.41421012849584277,
"fcm_dpo/beta": 0.14348173141479492,
"fcm_dpo/delta": -0.04204811155796051,
"fcm_dpo/margin": 4.4560394287109375,
"fcm_dpo/q_t": 0.3708673417568207,
"grad_norm": 22.110204696655273,
"learning_rate": 3.657302579891656e-07,
"logits/chosen": 1.7263871431350708,
"logits/rejected": 1.6659188270568848,
"logps/chosen": -74.85958862304688,
"logps/ref_chosen": -79.040283203125,
"logps/ref_rejected": -86.31329345703125,
"logps/rejected": -86.58863830566406,
"loss": 1.0231,
"margin_dpo/margin_mean": 4.4560394287109375,
"margin_dpo/margin_std": 6.352434158325195,
"step": 274
},
{
"epoch": 0.41572184429327286,
"fcm_dpo/beta": 0.1401127278804779,
"fcm_dpo/delta": -0.17330898344516754,
"fcm_dpo/margin": 5.427064895629883,
"fcm_dpo/q_t": 0.3469482362270355,
"grad_norm": 21.600595474243164,
"learning_rate": 3.645566304318526e-07,
"logits/chosen": 1.4458909034729004,
"logits/rejected": 1.246681809425354,
"logps/chosen": -66.42031860351562,
"logps/ref_chosen": -71.82034301757812,
"logps/ref_rejected": -94.29946899414062,
"logps/rejected": -94.32649993896484,
"loss": 0.9368,
"margin_dpo/margin_mean": 5.427064895629883,
"margin_dpo/margin_std": 6.502839088439941,
"step": 275
},
{
"epoch": 0.41723356009070295,
"fcm_dpo/beta": 0.13682496547698975,
"fcm_dpo/delta": -0.036286476999521255,
"fcm_dpo/margin": 4.631248950958252,
"fcm_dpo/q_t": 0.370879590511322,
"grad_norm": 24.544862747192383,
"learning_rate": 3.633797984793294e-07,
"logits/chosen": 1.3585480451583862,
"logits/rejected": 1.2991361618041992,
"logps/chosen": -64.3580093383789,
"logps/ref_chosen": -69.54020690917969,
"logps/ref_rejected": -78.59674072265625,
"logps/rejected": -78.04579162597656,
"loss": 1.0346,
"margin_dpo/margin_mean": 4.631248950958252,
"margin_dpo/margin_std": 6.801164150238037,
"step": 276
},
{
"epoch": 0.41874527588813304,
"fcm_dpo/beta": 0.1429087072610855,
"fcm_dpo/delta": 0.2666781544685364,
"fcm_dpo/margin": 2.3913447856903076,
"fcm_dpo/q_t": 0.4346947968006134,
"grad_norm": 27.025981903076172,
"learning_rate": 3.6219979505011555e-07,
"logits/chosen": 1.641129493713379,
"logits/rejected": 1.712536334991455,
"logps/chosen": -91.08503723144531,
"logps/ref_chosen": -94.4896240234375,
"logps/ref_rejected": -85.45901489257812,
"logps/rejected": -84.44577026367188,
"loss": 1.2882,
"margin_dpo/margin_mean": 2.391343593597412,
"margin_dpo/margin_std": 6.963379383087158,
"step": 277
},
{
"epoch": 0.42025699168556313,
"fcm_dpo/beta": 0.14845089614391327,
"fcm_dpo/delta": 0.09349919855594635,
"fcm_dpo/margin": 3.414287805557251,
"fcm_dpo/q_t": 0.3993656039237976,
"grad_norm": 29.298189163208008,
"learning_rate": 3.6101665315144353e-07,
"logits/chosen": 1.4798262119293213,
"logits/rejected": 1.3288986682891846,
"logps/chosen": -83.59101867675781,
"logps/ref_chosen": -87.42613220214844,
"logps/ref_rejected": -105.44854736328125,
"logps/rejected": -105.02772521972656,
"loss": 1.171,
"margin_dpo/margin_mean": 3.414287805557251,
"margin_dpo/margin_std": 6.732444763183594,
"step": 278
},
{
"epoch": 0.4217687074829932,
"fcm_dpo/beta": 0.14359894394874573,
"fcm_dpo/delta": -0.208085834980011,
"fcm_dpo/margin": 5.515895843505859,
"fcm_dpo/q_t": 0.3363301753997803,
"grad_norm": 22.904470443725586,
"learning_rate": 3.5983040587833563e-07,
"logits/chosen": 1.4755373001098633,
"logits/rejected": 1.4106316566467285,
"logps/chosen": -64.2370376586914,
"logps/ref_chosen": -70.516845703125,
"logps/ref_rejected": -86.04249572753906,
"logps/rejected": -85.27857971191406,
"loss": 0.9183,
"margin_dpo/margin_mean": 5.515895843505859,
"margin_dpo/margin_std": 6.264064788818359,
"step": 279
},
{
"epoch": 0.42328042328042326,
"fcm_dpo/beta": 0.13692662119865417,
"fcm_dpo/delta": -0.20576292276382446,
"fcm_dpo/margin": 5.765023231506348,
"fcm_dpo/q_t": 0.33472245931625366,
"grad_norm": 27.43783187866211,
"learning_rate": 3.586410864126781e-07,
"logits/chosen": 1.6306326389312744,
"logits/rejected": 1.507961392402649,
"logps/chosen": -70.88343048095703,
"logps/ref_chosen": -76.5021743774414,
"logps/ref_rejected": -94.2752685546875,
"logps/rejected": -94.42154693603516,
"loss": 0.8887,
"margin_dpo/margin_mean": 5.765023231506348,
"margin_dpo/margin_std": 6.170578479766846,
"step": 280
},
{
"epoch": 0.42479213907785335,
"fcm_dpo/beta": 0.13551339507102966,
"fcm_dpo/delta": 0.006325826048851013,
"fcm_dpo/margin": 4.383538246154785,
"fcm_dpo/q_t": 0.37640178203582764,
"grad_norm": 20.854692459106445,
"learning_rate": 3.574487280222929e-07,
"logits/chosen": 1.4427279233932495,
"logits/rejected": 1.467543125152588,
"logps/chosen": -72.43865203857422,
"logps/ref_chosen": -77.50468444824219,
"logps/ref_rejected": -79.05717468261719,
"logps/rejected": -78.37466430664062,
"loss": 1.0394,
"margin_dpo/margin_mean": 4.383538246154785,
"margin_dpo/margin_std": 6.361909866333008,
"step": 281
},
{
"epoch": 0.42630385487528344,
"fcm_dpo/beta": 0.1353539526462555,
"fcm_dpo/delta": -0.06713277101516724,
"fcm_dpo/margin": 4.891935348510742,
"fcm_dpo/q_t": 0.3712444305419922,
"grad_norm": 24.80596923828125,
"learning_rate": 3.562533640600075e-07,
"logits/chosen": 1.26842200756073,
"logits/rejected": 1.1664509773254395,
"logps/chosen": -75.95979309082031,
"logps/ref_chosen": -80.31298065185547,
"logps/ref_rejected": -83.72120666503906,
"logps/rejected": -84.25996398925781,
"loss": 1.032,
"margin_dpo/margin_mean": 4.891935348510742,
"margin_dpo/margin_std": 7.308884143829346,
"step": 282
},
{
"epoch": 0.42781557067271353,
"fcm_dpo/beta": 0.1342248022556305,
"fcm_dpo/delta": -5.543231964111328e-06,
"fcm_dpo/margin": 4.469372749328613,
"fcm_dpo/q_t": 0.3756641745567322,
"grad_norm": 24.568660736083984,
"learning_rate": 3.550550279627215e-07,
"logits/chosen": 1.5680984258651733,
"logits/rejected": 1.2567392587661743,
"logps/chosen": -77.70895385742188,
"logps/ref_chosen": -80.72602844238281,
"logps/ref_rejected": -115.68379211425781,
"logps/rejected": -117.13607788085938,
"loss": 1.0686,
"margin_dpo/margin_mean": 4.46937370300293,
"margin_dpo/margin_std": 7.047473907470703,
"step": 283
},
{
"epoch": 0.4293272864701436,
"fcm_dpo/beta": 0.13157621026039124,
"fcm_dpo/delta": -0.11532597243785858,
"fcm_dpo/margin": 5.374138355255127,
"fcm_dpo/q_t": 0.35533052682876587,
"grad_norm": 21.152599334716797,
"learning_rate": 3.5385375325047163e-07,
"logits/chosen": 1.6199060678482056,
"logits/rejected": 1.474593162536621,
"logps/chosen": -73.30044555664062,
"logps/ref_chosen": -77.5223388671875,
"logps/ref_rejected": -104.1847152709961,
"logps/rejected": -105.33695983886719,
"loss": 0.9637,
"margin_dpo/margin_mean": 5.374138832092285,
"margin_dpo/margin_std": 6.845863342285156,
"step": 284
},
{
"epoch": 0.4308390022675737,
"fcm_dpo/beta": 0.13484027981758118,
"fcm_dpo/delta": 0.1905381679534912,
"fcm_dpo/margin": 3.093123435974121,
"fcm_dpo/q_t": 0.4164922833442688,
"grad_norm": 28.799518585205078,
"learning_rate": 3.5264957352549375e-07,
"logits/chosen": 1.525662899017334,
"logits/rejected": 1.44883394241333,
"logps/chosen": -84.68502807617188,
"logps/ref_chosen": -85.79348754882812,
"logps/ref_rejected": -96.46463775634766,
"logps/rejected": -98.44929504394531,
"loss": 1.2172,
"margin_dpo/margin_mean": 3.0931224822998047,
"margin_dpo/margin_std": 7.149640083312988,
"step": 285
},
{
"epoch": 0.4323507180650038,
"fcm_dpo/beta": 0.1331361085176468,
"fcm_dpo/delta": -0.12348408997058868,
"fcm_dpo/margin": 5.364745140075684,
"fcm_dpo/q_t": 0.35463234782218933,
"grad_norm": 24.2138729095459,
"learning_rate": 3.514425224712835e-07,
"logits/chosen": 1.406418800354004,
"logits/rejected": 1.1762328147888184,
"logps/chosen": -75.9451904296875,
"logps/ref_chosen": -77.86268615722656,
"logps/ref_rejected": -110.77134704589844,
"logps/rejected": -114.21859741210938,
"loss": 0.9968,
"margin_dpo/margin_mean": 5.364744186401367,
"margin_dpo/margin_std": 7.451730728149414,
"step": 286
},
{
"epoch": 0.43386243386243384,
"fcm_dpo/beta": 0.12852045893669128,
"fcm_dpo/delta": -0.29661697149276733,
"fcm_dpo/margin": 6.779278755187988,
"fcm_dpo/q_t": 0.32936036586761475,
"grad_norm": 23.7078800201416,
"learning_rate": 3.502326338516534e-07,
"logits/chosen": 1.394890546798706,
"logits/rejected": 1.3782275915145874,
"logps/chosen": -59.10296630859375,
"logps/ref_chosen": -62.552825927734375,
"logps/ref_rejected": -77.7650146484375,
"logps/rejected": -81.09442901611328,
"loss": 0.9124,
"margin_dpo/margin_mean": 6.779278755187988,
"margin_dpo/margin_std": 7.917557239532471,
"step": 287
},
{
"epoch": 0.43537414965986393,
"fcm_dpo/beta": 0.1267382800579071,
"fcm_dpo/delta": 0.1060422956943512,
"fcm_dpo/margin": 3.940197229385376,
"fcm_dpo/q_t": 0.40103963017463684,
"grad_norm": 25.170812606811523,
"learning_rate": 3.490199415097892e-07,
"logits/chosen": 1.2241320610046387,
"logits/rejected": 1.1363778114318848,
"logps/chosen": -82.68171691894531,
"logps/ref_chosen": -83.74117279052734,
"logps/ref_rejected": -106.93913269042969,
"logps/rejected": -109.81986999511719,
"loss": 1.152,
"margin_dpo/margin_mean": 3.9401965141296387,
"margin_dpo/margin_std": 7.708506107330322,
"step": 288
},
{
"epoch": 0.436885865457294,
"fcm_dpo/beta": 0.1263837367296219,
"fcm_dpo/delta": -0.06516540050506592,
"fcm_dpo/margin": 5.2287468910217285,
"fcm_dpo/q_t": 0.36657577753067017,
"grad_norm": 18.498565673828125,
"learning_rate": 3.4780447936730247e-07,
"logits/chosen": 1.5653696060180664,
"logits/rejected": 1.5069622993469238,
"logps/chosen": -71.78709411621094,
"logps/ref_chosen": -73.04204559326172,
"logps/ref_rejected": -88.07904052734375,
"logps/rejected": -92.05284118652344,
"loss": 1.0073,
"margin_dpo/margin_mean": 5.22874641418457,
"margin_dpo/margin_std": 7.315879821777344,
"step": 289
},
{
"epoch": 0.4383975812547241,
"fcm_dpo/beta": 0.12540464103221893,
"fcm_dpo/delta": 0.0063613057136535645,
"fcm_dpo/margin": 4.731854438781738,
"fcm_dpo/q_t": 0.37520450353622437,
"grad_norm": 22.365402221679688,
"learning_rate": 3.465862814232821e-07,
"logits/chosen": 1.7237253189086914,
"logits/rejected": 1.6590449810028076,
"logps/chosen": -79.21012115478516,
"logps/ref_chosen": -78.60614013671875,
"logps/ref_rejected": -108.50082397460938,
"logps/rejected": -113.836669921875,
"loss": 1.0361,
"margin_dpo/margin_mean": 4.73185396194458,
"margin_dpo/margin_std": 6.734312057495117,
"step": 290
},
{
"epoch": 0.4399092970521542,
"fcm_dpo/beta": 0.12606967985630035,
"fcm_dpo/delta": -0.09047486633062363,
"fcm_dpo/margin": 5.4125800132751465,
"fcm_dpo/q_t": 0.3635619580745697,
"grad_norm": 20.724634170532227,
"learning_rate": 3.4536538175334343e-07,
"logits/chosen": 1.6329586505889893,
"logits/rejected": 1.4041390419006348,
"logps/chosen": -66.39132690429688,
"logps/ref_chosen": -66.71226501464844,
"logps/ref_rejected": -96.14029693603516,
"logps/rejected": -101.23193359375,
"loss": 1.0578,
"margin_dpo/margin_mean": 5.412579536437988,
"margin_dpo/margin_std": 8.23811149597168,
"step": 291
},
{
"epoch": 0.4414210128495843,
"fcm_dpo/beta": 0.12402984499931335,
"fcm_dpo/delta": 0.001040758565068245,
"fcm_dpo/margin": 4.829550266265869,
"fcm_dpo/q_t": 0.37668299674987793,
"grad_norm": 25.246936798095703,
"learning_rate": 3.4414181450867465e-07,
"logits/chosen": 1.69663667678833,
"logits/rejected": 1.6170578002929688,
"logps/chosen": -79.89686584472656,
"logps/ref_chosen": -80.3355484008789,
"logps/ref_rejected": -90.44906616210938,
"logps/rejected": -94.83992767333984,
"loss": 1.0653,
"margin_dpo/margin_mean": 4.829549789428711,
"margin_dpo/margin_std": 7.592476844787598,
"step": 292
},
{
"epoch": 0.4429327286470144,
"fcm_dpo/beta": 0.12098394334316254,
"fcm_dpo/delta": -0.18759030103683472,
"fcm_dpo/margin": 6.393030643463135,
"fcm_dpo/q_t": 0.35188886523246765,
"grad_norm": 20.60826873779297,
"learning_rate": 3.4291561391508185e-07,
"logits/chosen": 1.9168094396591187,
"logits/rejected": 1.776773452758789,
"logps/chosen": -71.5949478149414,
"logps/ref_chosen": -71.69970703125,
"logps/ref_rejected": -102.13948059082031,
"logps/rejected": -108.4277572631836,
"loss": 1.0365,
"margin_dpo/margin_mean": 6.393031120300293,
"margin_dpo/margin_std": 9.774253845214844,
"step": 293
},
{
"epoch": 0.4444444444444444,
"fcm_dpo/beta": 0.1188976839184761,
"fcm_dpo/delta": 0.024206943809986115,
"fcm_dpo/margin": 4.846450328826904,
"fcm_dpo/q_t": 0.3811735510826111,
"grad_norm": 19.384902954101562,
"learning_rate": 3.4168681427203153e-07,
"logits/chosen": 1.659212589263916,
"logits/rejected": 1.5612801313400269,
"logps/chosen": -70.84461975097656,
"logps/ref_chosen": -70.73458862304688,
"logps/ref_rejected": -86.68821716308594,
"logps/rejected": -91.64469146728516,
"loss": 1.0683,
"margin_dpo/margin_mean": 4.846449851989746,
"margin_dpo/margin_std": 7.481466293334961,
"step": 294
},
{
"epoch": 0.4459561602418745,
"fcm_dpo/beta": 0.12134327739477158,
"fcm_dpo/delta": 0.08148723840713501,
"fcm_dpo/margin": 4.308539390563965,
"fcm_dpo/q_t": 0.39348357915878296,
"grad_norm": 23.888975143432617,
"learning_rate": 3.4045544995169125e-07,
"logits/chosen": 1.4067692756652832,
"logits/rejected": 1.1668778657913208,
"logps/chosen": -66.92997741699219,
"logps/ref_chosen": -66.42644500732422,
"logps/ref_rejected": -99.58766174316406,
"logps/rejected": -104.39974212646484,
"loss": 1.1134,
"margin_dpo/margin_mean": 4.308538913726807,
"margin_dpo/margin_std": 7.631232261657715,
"step": 295
},
{
"epoch": 0.4474678760393046,
"fcm_dpo/beta": 0.11868500709533691,
"fcm_dpo/delta": -0.132828950881958,
"fcm_dpo/margin": 6.077790260314941,
"fcm_dpo/q_t": 0.3579130470752716,
"grad_norm": 22.89322280883789,
"learning_rate": 3.392215553979679e-07,
"logits/chosen": 1.6104109287261963,
"logits/rejected": 1.4648113250732422,
"logps/chosen": -87.55390930175781,
"logps/ref_chosen": -87.47459411621094,
"logps/ref_rejected": -103.96894836425781,
"logps/rejected": -110.12605285644531,
"loss": 0.9981,
"margin_dpo/margin_mean": 6.0777907371521,
"margin_dpo/margin_std": 8.397589683532715,
"step": 296
},
{
"epoch": 0.4489795918367347,
"fcm_dpo/beta": 0.11766720563173294,
"fcm_dpo/delta": -0.11798781156539917,
"fcm_dpo/margin": 6.026991844177246,
"fcm_dpo/q_t": 0.35117679834365845,
"grad_norm": 22.429386138916016,
"learning_rate": 3.3798516512554485e-07,
"logits/chosen": 1.5266656875610352,
"logits/rejected": 1.4038197994232178,
"logps/chosen": -74.09784698486328,
"logps/ref_chosen": -73.46731567382812,
"logps/ref_rejected": -88.22674560546875,
"logps/rejected": -94.8842544555664,
"loss": 0.9467,
"margin_dpo/margin_mean": 6.026991844177246,
"margin_dpo/margin_std": 6.926385402679443,
"step": 297
},
{
"epoch": 0.4504913076341648,
"fcm_dpo/beta": 0.11617027223110199,
"fcm_dpo/delta": -0.016583360731601715,
"fcm_dpo/margin": 5.296326160430908,
"fcm_dpo/q_t": 0.38224735856056213,
"grad_norm": 22.070199966430664,
"learning_rate": 3.367463137189156e-07,
"logits/chosen": 1.797358512878418,
"logits/rejected": 1.719627022743225,
"logps/chosen": -72.86018371582031,
"logps/ref_chosen": -73.21676635742188,
"logps/ref_rejected": -84.9563217163086,
"logps/rejected": -89.89605712890625,
"loss": 1.1139,
"margin_dpo/margin_mean": 5.29632568359375,
"margin_dpo/margin_std": 9.410942077636719,
"step": 298
},
{
"epoch": 0.4520030234315949,
"fcm_dpo/beta": 0.11756888777017593,
"fcm_dpo/delta": 0.12657645344734192,
"fcm_dpo/margin": 4.079702377319336,
"fcm_dpo/q_t": 0.4041763246059418,
"grad_norm": 22.431743621826172,
"learning_rate": 3.355050358314172e-07,
"logits/chosen": 1.2828481197357178,
"logits/rejected": 1.1960859298706055,
"logps/chosen": -77.17741394042969,
"logps/ref_chosen": -76.9534912109375,
"logps/ref_rejected": -87.53433227539062,
"logps/rejected": -91.83795166015625,
"loss": 1.1585,
"margin_dpo/margin_mean": 4.0797014236450195,
"margin_dpo/margin_std": 8.059319496154785,
"step": 299
},
{
"epoch": 0.45351473922902497,
"fcm_dpo/beta": 0.11919373273849487,
"fcm_dpo/delta": 0.0511748343706131,
"fcm_dpo/margin": 4.628298759460449,
"fcm_dpo/q_t": 0.38677746057510376,
"grad_norm": 21.598217010498047,
"learning_rate": 3.3426136618426043e-07,
"logits/chosen": 1.7995554208755493,
"logits/rejected": 1.6187442541122437,
"logps/chosen": -78.25460815429688,
"logps/ref_chosen": -78.36398315429688,
"logps/ref_rejected": -97.03912353515625,
"logps/rejected": -101.55804443359375,
"loss": 1.1386,
"margin_dpo/margin_mean": 4.628297805786133,
"margin_dpo/margin_std": 8.503219604492188,
"step": 300
},
{
"epoch": 0.45351473922902497,
"eval_fcm_dpo/beta": 0.11975711584091187,
"eval_logits/chosen": 1.5470160245895386,
"eval_logits/rejected": 1.4236763715744019,
"eval_logps/chosen": -85.9098129272461,
"eval_logps/ref_chosen": -86.90177917480469,
"eval_logps/ref_rejected": -96.69639587402344,
"eval_logps/rejected": -100.92195892333984,
"eval_loss": 0.5411239266395569,
"eval_margin_dpo/margin_mean": 5.2175445556640625,
"eval_margin_dpo/margin_std": 8.570341110229492,
"eval_runtime": 42.3487,
"eval_samples_per_second": 54.382,
"eval_steps_per_second": 1.7,
"step": 300
},
{
"epoch": 0.455026455026455,
"fcm_dpo/beta": 0.11965688318014145,
"fcm_dpo/delta": 0.026650425046682358,
"fcm_dpo/margin": 4.801657676696777,
"fcm_dpo/q_t": 0.39114803075790405,
"grad_norm": 20.64493179321289,
"learning_rate": 3.3301533956555885e-07,
"logits/chosen": 1.4913475513458252,
"logits/rejected": 1.432964563369751,
"logps/chosen": -69.89122772216797,
"logps/ref_chosen": -70.6719741821289,
"logps/ref_rejected": -87.11650085449219,
"logps/rejected": -91.13742065429688,
"loss": 1.1163,
"margin_dpo/margin_mean": 4.801657676696777,
"margin_dpo/margin_std": 8.655708312988281,
"step": 301
},
{
"epoch": 0.4565381708238851,
"fcm_dpo/beta": 0.12548677623271942,
"fcm_dpo/delta": 0.2517406940460205,
"fcm_dpo/margin": 2.8369710445404053,
"fcm_dpo/q_t": 0.4301077127456665,
"grad_norm": 26.383052825927734,
"learning_rate": 3.317669908293554e-07,
"logits/chosen": 1.3318819999694824,
"logits/rejected": 1.2350269556045532,
"logps/chosen": -85.69977569580078,
"logps/ref_chosen": -85.29096221923828,
"logps/ref_rejected": -106.22589874267578,
"logps/rejected": -109.4716796875,
"loss": 1.2691,
"margin_dpo/margin_mean": 2.836970806121826,
"margin_dpo/margin_std": 7.777761936187744,
"step": 302
},
{
"epoch": 0.4580498866213152,
"fcm_dpo/beta": 0.1212981790304184,
"fcm_dpo/delta": -0.23223206400871277,
"fcm_dpo/margin": 6.679323196411133,
"fcm_dpo/q_t": 0.3449594974517822,
"grad_norm": 20.195770263671875,
"learning_rate": 3.3051635489464793e-07,
"logits/chosen": 1.8518650531768799,
"logits/rejected": 1.7183852195739746,
"logps/chosen": -82.73690795898438,
"logps/ref_chosen": -83.90059661865234,
"logps/ref_rejected": -104.7340087890625,
"logps/rejected": -110.24964904785156,
"loss": 0.9852,
"margin_dpo/margin_mean": 6.679323673248291,
"margin_dpo/margin_std": 9.238541603088379,
"step": 303
},
{
"epoch": 0.4595616024187453,
"fcm_dpo/beta": 0.11956004798412323,
"fcm_dpo/delta": -0.08801032602787018,
"fcm_dpo/margin": 5.705035209655762,
"fcm_dpo/q_t": 0.3594469428062439,
"grad_norm": 20.22382164001465,
"learning_rate": 3.292634667444117e-07,
"logits/chosen": 1.3408575057983398,
"logits/rejected": 1.24342679977417,
"logps/chosen": -75.45277404785156,
"logps/ref_chosen": -77.39997100830078,
"logps/ref_rejected": -94.21647644042969,
"logps/rejected": -97.97433471679688,
"loss": 0.9937,
"margin_dpo/margin_mean": 5.705035209655762,
"margin_dpo/margin_std": 7.6752142906188965,
"step": 304
},
{
"epoch": 0.46107331821617537,
"fcm_dpo/beta": 0.11689537763595581,
"fcm_dpo/delta": -0.03063960373401642,
"fcm_dpo/margin": 5.354496002197266,
"fcm_dpo/q_t": 0.37809717655181885,
"grad_norm": 20.9038028717041,
"learning_rate": 3.280083614246217e-07,
"logits/chosen": 1.3459558486938477,
"logits/rejected": 1.4120479822158813,
"logps/chosen": -89.80319213867188,
"logps/ref_chosen": -90.90805053710938,
"logps/ref_rejected": -85.84992980957031,
"logps/rejected": -90.09957885742188,
"loss": 1.1074,
"margin_dpo/margin_mean": 5.354496479034424,
"margin_dpo/margin_std": 9.220006942749023,
"step": 305
},
{
"epoch": 0.46258503401360546,
"fcm_dpo/beta": 0.11684860289096832,
"fcm_dpo/delta": 0.002469673752784729,
"fcm_dpo/margin": 5.096833229064941,
"fcm_dpo/q_t": 0.38039129972457886,
"grad_norm": 21.22861099243164,
"learning_rate": 3.267510740432719e-07,
"logits/chosen": 1.4836664199829102,
"logits/rejected": 1.2493438720703125,
"logps/chosen": -69.71931457519531,
"logps/ref_chosen": -71.7261962890625,
"logps/ref_rejected": -97.70491027832031,
"logps/rejected": -100.79486083984375,
"loss": 1.0663,
"margin_dpo/margin_mean": 5.096833229064941,
"margin_dpo/margin_std": 7.983862400054932,
"step": 306
},
{
"epoch": 0.46409674981103555,
"fcm_dpo/beta": 0.12025651335716248,
"fcm_dpo/delta": 0.09597062319517136,
"fcm_dpo/margin": 4.226437091827393,
"fcm_dpo/q_t": 0.40421485900878906,
"grad_norm": 21.80240821838379,
"learning_rate": 3.2549163976939285e-07,
"logits/chosen": 1.6594510078430176,
"logits/rejected": 1.5168578624725342,
"logps/chosen": -70.91734313964844,
"logps/ref_chosen": -74.38668823242188,
"logps/ref_rejected": -84.16001892089844,
"logps/rejected": -84.91710662841797,
"loss": 1.1539,
"margin_dpo/margin_mean": 4.226436138153076,
"margin_dpo/margin_std": 8.34419059753418,
"step": 307
},
{
"epoch": 0.4656084656084656,
"fcm_dpo/beta": 0.11909815669059753,
"fcm_dpo/delta": -0.049813512712717056,
"fcm_dpo/margin": 5.426000595092773,
"fcm_dpo/q_t": 0.37172651290893555,
"grad_norm": 23.397077560424805,
"learning_rate": 3.2423009383206874e-07,
"logits/chosen": 1.3079359531402588,
"logits/rejected": 1.3617353439331055,
"logps/chosen": -84.75643920898438,
"logps/ref_chosen": -87.50894165039062,
"logps/ref_rejected": -94.80848693847656,
"logps/rejected": -97.48197937011719,
"loss": 1.0602,
"margin_dpo/margin_mean": 5.425999641418457,
"margin_dpo/margin_std": 8.557561874389648,
"step": 308
},
{
"epoch": 0.4671201814058957,
"fcm_dpo/beta": 0.11748093366622925,
"fcm_dpo/delta": -0.03162723034620285,
"fcm_dpo/margin": 5.341372966766357,
"fcm_dpo/q_t": 0.3704092800617218,
"grad_norm": 19.2188777923584,
"learning_rate": 3.229664715194511e-07,
"logits/chosen": 1.7406518459320068,
"logits/rejected": 1.6214611530303955,
"logps/chosen": -80.2752456665039,
"logps/ref_chosen": -82.15191650390625,
"logps/ref_rejected": -95.03496551513672,
"logps/rejected": -98.49966430664062,
"loss": 1.0017,
"margin_dpo/margin_mean": 5.341372489929199,
"margin_dpo/margin_std": 6.893287658691406,
"step": 309
},
{
"epoch": 0.46863189720332576,
"fcm_dpo/beta": 0.120293527841568,
"fcm_dpo/delta": 0.07783792167901993,
"fcm_dpo/margin": 2.3885998725891113,
"fcm_dpo/q_t": 0.44259071350097656,
"grad_norm": 27.145200729370117,
"learning_rate": 3.2170080817777257e-07,
"logits/chosen": 1.591238021850586,
"logits/rejected": 1.5714151859283447,
"logps/chosen": -92.16886901855469,
"logps/ref_chosen": -93.7555160522461,
"logps/ref_rejected": -96.93236541748047,
"logps/rejected": -97.73431396484375,
"loss": 1.3459,
"margin_dpo/margin_mean": 2.3886003494262695,
"margin_dpo/margin_std": 8.228015899658203,
"step": 310
},
{
"epoch": 0.47014361300075586,
"fcm_dpo/beta": 0.11992132663726807,
"fcm_dpo/delta": 0.025824643671512604,
"fcm_dpo/margin": 4.793860912322998,
"fcm_dpo/q_t": 0.3902488350868225,
"grad_norm": 20.445270538330078,
"learning_rate": 3.204331392103574e-07,
"logits/chosen": 1.4957547187805176,
"logits/rejected": 1.1831345558166504,
"logps/chosen": -71.67782592773438,
"logps/ref_chosen": -76.20762634277344,
"logps/ref_rejected": -110.48141479492188,
"logps/rejected": -110.74546813964844,
"loss": 1.0995,
"margin_dpo/margin_mean": 4.793861389160156,
"margin_dpo/margin_std": 8.486019134521484,
"step": 311
},
{
"epoch": 0.47165532879818595,
"fcm_dpo/beta": 0.11924172937870026,
"fcm_dpo/delta": -0.09825573861598969,
"fcm_dpo/margin": 5.799221038818359,
"fcm_dpo/q_t": 0.3667619228363037,
"grad_norm": 19.300731658935547,
"learning_rate": 3.1916350007663176e-07,
"logits/chosen": 1.4672105312347412,
"logits/rejected": 1.3198761940002441,
"logps/chosen": -66.4232406616211,
"logps/ref_chosen": -69.08878326416016,
"logps/ref_rejected": -91.84494018554688,
"logps/rejected": -94.97862243652344,
"loss": 1.0111,
"margin_dpo/margin_mean": 5.799221038818359,
"margin_dpo/margin_std": 8.42913818359375,
"step": 312
},
{
"epoch": 0.47316704459561604,
"fcm_dpo/beta": 0.12133464217185974,
"fcm_dpo/delta": 0.15221793949604034,
"fcm_dpo/margin": 3.7469735145568848,
"fcm_dpo/q_t": 0.4182596802711487,
"grad_norm": 21.947050094604492,
"learning_rate": 3.178919262911314e-07,
"logits/chosen": 1.6795837879180908,
"logits/rejected": 1.642377257347107,
"logps/chosen": -73.63683319091797,
"logps/ref_chosen": -78.20826721191406,
"logps/ref_rejected": -86.90351867675781,
"logps/rejected": -86.07906341552734,
"loss": 1.2413,
"margin_dpo/margin_mean": 3.7469730377197266,
"margin_dpo/margin_std": 9.230291366577148,
"step": 313
},
{
"epoch": 0.47467876039304613,
"fcm_dpo/beta": 0.12020012736320496,
"fcm_dpo/delta": -0.08286652714014053,
"fcm_dpo/margin": 5.628398418426514,
"fcm_dpo/q_t": 0.36906343698501587,
"grad_norm": 22.41501808166504,
"learning_rate": 3.166184534225087e-07,
"logits/chosen": 1.6936403512954712,
"logits/rejected": 1.713541030883789,
"logps/chosen": -85.90785217285156,
"logps/ref_chosen": -90.41890716552734,
"logps/ref_rejected": -84.33525848388672,
"logps/rejected": -85.45260620117188,
"loss": 1.0274,
"margin_dpo/margin_mean": 5.628398895263672,
"margin_dpo/margin_std": 8.3358793258667,
"step": 314
},
{
"epoch": 0.47619047619047616,
"fcm_dpo/beta": 0.12187933176755905,
"fcm_dpo/delta": 0.03306184709072113,
"fcm_dpo/margin": 4.652489185333252,
"fcm_dpo/q_t": 0.3853157162666321,
"grad_norm": 21.278676986694336,
"learning_rate": 3.1534311709253723e-07,
"logits/chosen": 1.3511652946472168,
"logits/rejected": 1.277963638305664,
"logps/chosen": -82.36071014404297,
"logps/ref_chosen": -87.32842254638672,
"logps/ref_rejected": -93.71661376953125,
"logps/rejected": -93.40138244628906,
"loss": 1.0678,
"margin_dpo/margin_mean": 4.652489185333252,
"margin_dpo/margin_std": 7.178579807281494,
"step": 315
},
{
"epoch": 0.47770219198790626,
"fcm_dpo/beta": 0.12188950181007385,
"fcm_dpo/delta": -0.06950892508029938,
"fcm_dpo/margin": 5.416971206665039,
"fcm_dpo/q_t": 0.3643546998500824,
"grad_norm": 25.714170455932617,
"learning_rate": 3.1406595297511564e-07,
"logits/chosen": 1.5012097358703613,
"logits/rejected": 1.2324557304382324,
"logps/chosen": -69.50252532958984,
"logps/ref_chosen": -73.898681640625,
"logps/ref_rejected": -115.42668151855469,
"logps/rejected": -116.4474868774414,
"loss": 0.9983,
"margin_dpo/margin_mean": 5.416971206665039,
"margin_dpo/margin_std": 6.694197654724121,
"step": 316
},
{
"epoch": 0.47921390778533635,
"fcm_dpo/beta": 0.1169140413403511,
"fcm_dpo/delta": -0.1020512804389,
"fcm_dpo/margin": 5.934911727905273,
"fcm_dpo/q_t": 0.3591901361942291,
"grad_norm": 22.108245849609375,
"learning_rate": 3.1278699679526975e-07,
"logits/chosen": 1.83012056350708,
"logits/rejected": 1.7280373573303223,
"logps/chosen": -69.74676513671875,
"logps/ref_chosen": -75.42947387695312,
"logps/ref_rejected": -90.60166931152344,
"logps/rejected": -90.8538818359375,
"loss": 1.0275,
"margin_dpo/margin_mean": 5.934911251068115,
"margin_dpo/margin_std": 8.861763000488281,
"step": 317
},
{
"epoch": 0.48072562358276644,
"fcm_dpo/beta": 0.11810323596000671,
"fcm_dpo/delta": 0.07940696179866791,
"fcm_dpo/margin": 4.443850517272949,
"fcm_dpo/q_t": 0.40077465772628784,
"grad_norm": 19.98630142211914,
"learning_rate": 3.1150628432815336e-07,
"logits/chosen": 1.6220097541809082,
"logits/rejected": 1.4846813678741455,
"logps/chosen": -65.35560607910156,
"logps/ref_chosen": -70.38318634033203,
"logps/ref_rejected": -98.19901275634766,
"logps/rejected": -97.61528015136719,
"loss": 1.1674,
"margin_dpo/margin_mean": 4.443850040435791,
"margin_dpo/margin_std": 8.962568283081055,
"step": 318
},
{
"epoch": 0.48223733938019653,
"fcm_dpo/beta": 0.11873992532491684,
"fcm_dpo/delta": -0.004999694414436817,
"fcm_dpo/margin": 5.09262228012085,
"fcm_dpo/q_t": 0.378478467464447,
"grad_norm": 20.11252212524414,
"learning_rate": 3.1022385139804707e-07,
"logits/chosen": 1.1534156799316406,
"logits/rejected": 1.1021003723144531,
"logps/chosen": -79.05325317382812,
"logps/ref_chosen": -83.40225982666016,
"logps/ref_rejected": -95.40069580078125,
"logps/rejected": -96.14430236816406,
"loss": 1.0636,
"margin_dpo/margin_mean": 5.092622756958008,
"margin_dpo/margin_std": 8.152623176574707,
"step": 319
},
{
"epoch": 0.4837490551776266,
"fcm_dpo/beta": 0.11889675259590149,
"fcm_dpo/delta": 0.01753305457532406,
"fcm_dpo/margin": 3.168153762817383,
"fcm_dpo/q_t": 0.42565175890922546,
"grad_norm": 21.558019638061523,
"learning_rate": 3.0893973387735683e-07,
"logits/chosen": 1.238257646560669,
"logits/rejected": 1.128818154335022,
"logps/chosen": -63.69639587402344,
"logps/ref_chosen": -68.70979309082031,
"logps/ref_rejected": -87.00540924072266,
"logps/rejected": -85.16017150878906,
"loss": 1.2676,
"margin_dpo/margin_mean": 3.168154239654541,
"margin_dpo/margin_std": 8.407440185546875,
"step": 320
},
{
"epoch": 0.4852607709750567,
"fcm_dpo/beta": 0.11838892847299576,
"fcm_dpo/delta": 0.032411910593509674,
"fcm_dpo/margin": 4.793027400970459,
"fcm_dpo/q_t": 0.3875201344490051,
"grad_norm": 22.37877655029297,
"learning_rate": 3.0765396768561004e-07,
"logits/chosen": 1.4487881660461426,
"logits/rejected": 1.3890159130096436,
"logps/chosen": -61.277713775634766,
"logps/ref_chosen": -66.48135375976562,
"logps/ref_rejected": -71.84545135498047,
"logps/rejected": -71.4348373413086,
"loss": 1.1063,
"margin_dpo/margin_mean": 4.793027877807617,
"margin_dpo/margin_std": 8.183032989501953,
"step": 321
},
{
"epoch": 0.48677248677248675,
"fcm_dpo/beta": 0.11761731654405594,
"fcm_dpo/delta": -0.16031649708747864,
"fcm_dpo/margin": 6.364631175994873,
"fcm_dpo/q_t": 0.3440389633178711,
"grad_norm": 22.891164779663086,
"learning_rate": 3.063665887884511e-07,
"logits/chosen": 1.7625864744186401,
"logits/rejected": 1.587303876876831,
"logps/chosen": -61.12324523925781,
"logps/ref_chosen": -65.94654846191406,
"logps/ref_rejected": -94.26603698730469,
"logps/rejected": -95.807373046875,
"loss": 0.9436,
"margin_dpo/margin_mean": 6.364631175994873,
"margin_dpo/margin_std": 7.739552974700928,
"step": 322
},
{
"epoch": 0.48828420256991684,
"fcm_dpo/beta": 0.11629685759544373,
"fcm_dpo/delta": -0.050307899713516235,
"fcm_dpo/margin": 5.553721904754639,
"fcm_dpo/q_t": 0.3829064667224884,
"grad_norm": 22.22823143005371,
"learning_rate": 3.0507763319663517e-07,
"logits/chosen": 1.3363523483276367,
"logits/rejected": 1.2372665405273438,
"logps/chosen": -82.47151184082031,
"logps/ref_chosen": -86.5498046875,
"logps/ref_rejected": -110.39498901367188,
"logps/rejected": -111.87040710449219,
"loss": 1.1249,
"margin_dpo/margin_mean": 5.553721904754639,
"margin_dpo/margin_std": 10.197261810302734,
"step": 323
},
{
"epoch": 0.4897959183673469,
"fcm_dpo/beta": 0.11416380107402802,
"fcm_dpo/delta": -0.02028251811861992,
"fcm_dpo/margin": 5.4193010330200195,
"fcm_dpo/q_t": 0.37376847863197327,
"grad_norm": 22.1555233001709,
"learning_rate": 3.0378713696502097e-07,
"logits/chosen": 1.5647233724594116,
"logits/rejected": 1.423370599746704,
"logps/chosen": -69.38572692871094,
"logps/ref_chosen": -74.44218444824219,
"logps/ref_rejected": -85.7646484375,
"logps/rejected": -86.12747955322266,
"loss": 1.0561,
"margin_dpo/margin_mean": 5.4193010330200195,
"margin_dpo/margin_std": 8.43246078491211,
"step": 324
},
{
"epoch": 0.491307634164777,
"fcm_dpo/beta": 0.11330123245716095,
"fcm_dpo/delta": 0.00421547144651413,
"fcm_dpo/margin": 5.240480422973633,
"fcm_dpo/q_t": 0.3807021379470825,
"grad_norm": 21.85565948486328,
"learning_rate": 3.0249513619156206e-07,
"logits/chosen": 1.697942852973938,
"logits/rejected": 1.5425655841827393,
"logps/chosen": -78.68364715576172,
"logps/ref_chosen": -81.43812561035156,
"logps/ref_rejected": -97.04302978515625,
"logps/rejected": -99.5290298461914,
"loss": 1.0905,
"margin_dpo/margin_mean": 5.240480422973633,
"margin_dpo/margin_std": 8.678589820861816,
"step": 325
},
{
"epoch": 0.4928193499622071,
"fcm_dpo/beta": 0.11744363605976105,
"fcm_dpo/delta": 0.18146520853042603,
"fcm_dpo/margin": 3.6321234703063965,
"fcm_dpo/q_t": 0.4141997694969177,
"grad_norm": 22.132080078125,
"learning_rate": 3.012016670162977e-07,
"logits/chosen": 1.546274185180664,
"logits/rejected": 1.554617166519165,
"logps/chosen": -90.23068237304688,
"logps/ref_chosen": -91.65318298339844,
"logps/ref_rejected": -90.64222717285156,
"logps/rejected": -92.85185241699219,
"loss": 1.2096,
"margin_dpo/margin_mean": 3.6321234703063965,
"margin_dpo/margin_std": 8.183576583862305,
"step": 326
},
{
"epoch": 0.4943310657596372,
"fcm_dpo/beta": 0.12188417464494705,
"fcm_dpo/delta": 0.1355394572019577,
"fcm_dpo/margin": 3.8523268699645996,
"fcm_dpo/q_t": 0.41473180055618286,
"grad_norm": 25.818225860595703,
"learning_rate": 2.99906765620341e-07,
"logits/chosen": 1.3597536087036133,
"logits/rejected": 1.2990000247955322,
"logps/chosen": -88.77840423583984,
"logps/ref_chosen": -89.97216796875,
"logps/ref_rejected": -97.54869079589844,
"logps/rejected": -100.20724487304688,
"loss": 1.2589,
"margin_dpo/margin_mean": 3.8523268699645996,
"margin_dpo/margin_std": 9.669429779052734,
"step": 327
},
{
"epoch": 0.4958427815570673,
"fcm_dpo/beta": 0.12286022305488586,
"fcm_dpo/delta": 0.0626559630036354,
"fcm_dpo/margin": 4.401291847229004,
"fcm_dpo/q_t": 0.39334431290626526,
"grad_norm": 21.354494094848633,
"learning_rate": 2.9861046822486766e-07,
"logits/chosen": 1.6216108798980713,
"logits/rejected": 1.524030327796936,
"logps/chosen": -78.1365966796875,
"logps/ref_chosen": -80.27335357666016,
"logps/ref_rejected": -99.04093170166016,
"logps/rejected": -101.30546569824219,
"loss": 1.1266,
"margin_dpo/margin_mean": 4.401291847229004,
"margin_dpo/margin_std": 8.014993667602539,
"step": 328
},
{
"epoch": 0.4973544973544973,
"fcm_dpo/beta": 0.12164503335952759,
"fcm_dpo/delta": -0.046737946569919586,
"fcm_dpo/margin": 5.275608539581299,
"fcm_dpo/q_t": 0.3809892535209656,
"grad_norm": 22.692781448364258,
"learning_rate": 2.9731281109010253e-07,
"logits/chosen": 1.4558098316192627,
"logits/rejected": 1.334930419921875,
"logps/chosen": -77.7724380493164,
"logps/ref_chosen": -79.75892639160156,
"logps/ref_rejected": -102.06265258789062,
"logps/rejected": -105.35179138183594,
"loss": 1.0789,
"margin_dpo/margin_mean": 5.275609016418457,
"margin_dpo/margin_std": 8.842366218566895,
"step": 329
},
{
"epoch": 0.4988662131519274,
"fcm_dpo/beta": 0.11930276453495026,
"fcm_dpo/delta": -0.23741337656974792,
"fcm_dpo/margin": 6.860002517700195,
"fcm_dpo/q_t": 0.3392508327960968,
"grad_norm": 18.562469482421875,
"learning_rate": 2.9601383051430505e-07,
"logits/chosen": 1.6714489459991455,
"logits/rejected": 1.5213907957077026,
"logps/chosen": -67.23228454589844,
"logps/ref_chosen": -70.55734252929688,
"logps/ref_rejected": -94.53077697753906,
"logps/rejected": -98.06571960449219,
"loss": 0.9976,
"margin_dpo/margin_mean": 6.8600029945373535,
"margin_dpo/margin_std": 9.460945129394531,
"step": 330
},
{
"epoch": 0.5003779289493575,
"fcm_dpo/beta": 0.1132928729057312,
"fcm_dpo/delta": -0.18013113737106323,
"fcm_dpo/margin": 6.756691932678223,
"fcm_dpo/q_t": 0.3501453101634979,
"grad_norm": 17.997892379760742,
"learning_rate": 2.947135628327544e-07,
"logits/chosen": 1.7973315715789795,
"logits/rejected": 1.7496538162231445,
"logps/chosen": -74.01251220703125,
"logps/ref_chosen": -75.46063232421875,
"logps/ref_rejected": -84.78495788574219,
"logps/rejected": -90.0935287475586,
"loss": 1.0156,
"margin_dpo/margin_mean": 6.756691932678223,
"margin_dpo/margin_std": 9.993051528930664,
"step": 331
},
{
"epoch": 0.5018896447467877,
"fcm_dpo/beta": 0.11203811317682266,
"fcm_dpo/delta": -0.11051306873559952,
"fcm_dpo/margin": 6.262184143066406,
"fcm_dpo/q_t": 0.3616057336330414,
"grad_norm": 22.966106414794922,
"learning_rate": 2.934120444167326e-07,
"logits/chosen": 1.3178709745407104,
"logits/rejected": 1.2265524864196777,
"logps/chosen": -82.64828491210938,
"logps/ref_chosen": -84.32807922363281,
"logps/ref_rejected": -95.63302612304688,
"logps/rejected": -100.21540832519531,
"loss": 0.9857,
"margin_dpo/margin_mean": 6.2621846199035645,
"margin_dpo/margin_std": 8.354652404785156,
"step": 332
},
{
"epoch": 0.5034013605442177,
"fcm_dpo/beta": 0.10700565576553345,
"fcm_dpo/delta": -0.20431974530220032,
"fcm_dpo/margin": 7.367546558380127,
"fcm_dpo/q_t": 0.33816075325012207,
"grad_norm": 16.382230758666992,
"learning_rate": 2.921093116725076e-07,
"logits/chosen": 1.4132413864135742,
"logits/rejected": 1.2920411825180054,
"logps/chosen": -77.47364044189453,
"logps/ref_chosen": -78.2132339477539,
"logps/ref_rejected": -103.82716369628906,
"logps/rejected": -110.45510864257812,
"loss": 0.8994,
"margin_dpo/margin_mean": 7.367546558380127,
"margin_dpo/margin_std": 8.159334182739258,
"step": 333
},
{
"epoch": 0.5049130763416477,
"fcm_dpo/beta": 0.10628392547369003,
"fcm_dpo/delta": 0.04136139154434204,
"fcm_dpo/margin": 5.278096675872803,
"fcm_dpo/q_t": 0.38895177841186523,
"grad_norm": 19.709138870239258,
"learning_rate": 2.9080540104031484e-07,
"logits/chosen": 1.8486359119415283,
"logits/rejected": 1.6987314224243164,
"logps/chosen": -83.99331665039062,
"logps/ref_chosen": -85.0171127319336,
"logps/ref_rejected": -106.79039764404297,
"logps/rejected": -111.04469299316406,
"loss": 1.1631,
"margin_dpo/margin_mean": 5.278097152709961,
"margin_dpo/margin_std": 10.453556060791016,
"step": 334
},
{
"epoch": 0.5064247921390779,
"fcm_dpo/beta": 0.10827849805355072,
"fcm_dpo/delta": 0.04513784497976303,
"fcm_dpo/margin": 5.131414413452148,
"fcm_dpo/q_t": 0.4016219973564148,
"grad_norm": 23.983985900878906,
"learning_rate": 2.895003489933375e-07,
"logits/chosen": 1.232424259185791,
"logits/rejected": 1.1511411666870117,
"logps/chosen": -79.13843536376953,
"logps/ref_chosen": -78.56513214111328,
"logps/ref_rejected": -92.68515014648438,
"logps/rejected": -98.38986206054688,
"loss": 1.2239,
"margin_dpo/margin_mean": 5.131413459777832,
"margin_dpo/margin_std": 11.419921875,
"step": 335
},
{
"epoch": 0.5079365079365079,
"fcm_dpo/beta": 0.10628928989171982,
"fcm_dpo/delta": -0.04637442156672478,
"fcm_dpo/margin": 6.045434474945068,
"fcm_dpo/q_t": 0.37050485610961914,
"grad_norm": 22.625778198242188,
"learning_rate": 2.8819419203668675e-07,
"logits/chosen": 1.4954988956451416,
"logits/rejected": 1.4547088146209717,
"logps/chosen": -90.63356018066406,
"logps/ref_chosen": -88.63243103027344,
"logps/ref_rejected": -107.89385986328125,
"logps/rejected": -115.94041442871094,
"loss": 1.0439,
"margin_dpo/margin_mean": 6.04543399810791,
"margin_dpo/margin_std": 9.264274597167969,
"step": 336
},
{
"epoch": 0.509448223733938,
"fcm_dpo/beta": 0.10824999213218689,
"fcm_dpo/delta": 0.1246347650885582,
"fcm_dpo/margin": 4.448444366455078,
"fcm_dpo/q_t": 0.40610629320144653,
"grad_norm": 19.88324737548828,
"learning_rate": 2.8688696670638053e-07,
"logits/chosen": 1.3406782150268555,
"logits/rejected": 1.236850619316101,
"logps/chosen": -94.76239013671875,
"logps/ref_chosen": -93.25018310546875,
"logps/ref_rejected": -103.8592529296875,
"logps/rejected": -109.81990814208984,
"loss": 1.1949,
"margin_dpo/margin_mean": 4.44844388961792,
"margin_dpo/margin_std": 9.610854148864746,
"step": 337
},
{
"epoch": 0.5109599395313681,
"fcm_dpo/beta": 0.11230061948299408,
"fcm_dpo/delta": 0.14871221780776978,
"fcm_dpo/margin": 4.068912506103516,
"fcm_dpo/q_t": 0.4091913402080536,
"grad_norm": 20.65287971496582,
"learning_rate": 2.8557870956832133e-07,
"logits/chosen": 1.6767020225524902,
"logits/rejected": 1.6137604713439941,
"logps/chosen": -84.22471618652344,
"logps/ref_chosen": -81.79462432861328,
"logps/ref_rejected": -90.98942565917969,
"logps/rejected": -97.48843383789062,
"loss": 1.1521,
"margin_dpo/margin_mean": 4.068912506103516,
"margin_dpo/margin_std": 7.917887210845947,
"step": 338
},
{
"epoch": 0.5124716553287982,
"fcm_dpo/beta": 0.11033567786216736,
"fcm_dpo/delta": -0.12316159904003143,
"fcm_dpo/margin": 6.471385955810547,
"fcm_dpo/q_t": 0.35316306352615356,
"grad_norm": 17.84832000732422,
"learning_rate": 2.842694572172736e-07,
"logits/chosen": 1.7615846395492554,
"logits/rejected": 1.5558931827545166,
"logps/chosen": -60.82051086425781,
"logps/ref_chosen": -61.80355453491211,
"logps/ref_rejected": -85.16979217529297,
"logps/rejected": -90.65812683105469,
"loss": 0.978,
"margin_dpo/margin_mean": 6.471385955810547,
"margin_dpo/margin_std": 8.390908241271973,
"step": 339
},
{
"epoch": 0.5139833711262283,
"fcm_dpo/beta": 0.11038414388895035,
"fcm_dpo/delta": -0.023266099393367767,
"fcm_dpo/margin": 5.6234588623046875,
"fcm_dpo/q_t": 0.3836401104927063,
"grad_norm": 21.793188095092773,
"learning_rate": 2.8295924627584004e-07,
"logits/chosen": 1.7331141233444214,
"logits/rejected": 1.7046101093292236,
"logps/chosen": -73.67045593261719,
"logps/ref_chosen": -72.486083984375,
"logps/ref_rejected": -79.86129760742188,
"logps/rejected": -86.66914367675781,
"loss": 1.1356,
"margin_dpo/margin_mean": 5.623458385467529,
"margin_dpo/margin_std": 10.358892440795898,
"step": 340
},
{
"epoch": 0.5154950869236583,
"fcm_dpo/beta": 0.10636292397975922,
"fcm_dpo/delta": -0.061252087354660034,
"fcm_dpo/margin": 6.104777812957764,
"fcm_dpo/q_t": 0.3701562285423279,
"grad_norm": 18.614044189453125,
"learning_rate": 2.816481133934373e-07,
"logits/chosen": 1.387300729751587,
"logits/rejected": 1.2599934339523315,
"logps/chosen": -78.27288818359375,
"logps/ref_chosen": -77.36830139160156,
"logps/ref_rejected": -94.64933013916016,
"logps/rejected": -101.65869903564453,
"loss": 1.0677,
"margin_dpo/margin_mean": 6.104778289794922,
"margin_dpo/margin_std": 9.133384704589844,
"step": 341
},
{
"epoch": 0.5170068027210885,
"fcm_dpo/beta": 0.10703742504119873,
"fcm_dpo/delta": -0.057476550340652466,
"fcm_dpo/margin": 6.106937885284424,
"fcm_dpo/q_t": 0.3717387914657593,
"grad_norm": 16.413331985473633,
"learning_rate": 2.8033609524527046e-07,
"logits/chosen": 1.6669539213180542,
"logits/rejected": 1.5651522874832153,
"logps/chosen": -70.8360366821289,
"logps/ref_chosen": -71.00831604003906,
"logps/ref_rejected": -84.22953796386719,
"logps/rejected": -90.16419982910156,
"loss": 1.0395,
"margin_dpo/margin_mean": 6.106938362121582,
"margin_dpo/margin_std": 9.189284324645996,
"step": 342
},
{
"epoch": 0.5185185185185185,
"fcm_dpo/beta": 0.10651153326034546,
"fcm_dpo/delta": -0.013351213186979294,
"fcm_dpo/margin": 3.356048345565796,
"fcm_dpo/q_t": 0.42331787943840027,
"grad_norm": 20.029727935791016,
"learning_rate": 2.7902322853130753e-07,
"logits/chosen": 1.2742222547531128,
"logits/rejected": 1.2351291179656982,
"logps/chosen": -91.23426055908203,
"logps/ref_chosen": -91.44624328613281,
"logps/ref_rejected": -99.06044006347656,
"logps/rejected": -102.20451354980469,
"loss": 1.2497,
"margin_dpo/margin_mean": 3.356048583984375,
"margin_dpo/margin_std": 8.23534870147705,
"step": 343
},
{
"epoch": 0.5200302343159486,
"fcm_dpo/beta": 0.10474497079849243,
"fcm_dpo/delta": -0.06812702864408493,
"fcm_dpo/margin": 6.323397159576416,
"fcm_dpo/q_t": 0.36448514461517334,
"grad_norm": 17.860971450805664,
"learning_rate": 2.7770954997525274e-07,
"logits/chosen": 1.657717227935791,
"logits/rejected": 1.498356580734253,
"logps/chosen": -74.89875793457031,
"logps/ref_chosen": -73.43608093261719,
"logps/ref_rejected": -100.76569366455078,
"logps/rejected": -108.55177307128906,
"loss": 1.0072,
"margin_dpo/margin_mean": 6.323396682739258,
"margin_dpo/margin_std": 8.665111541748047,
"step": 344
},
{
"epoch": 0.5215419501133787,
"fcm_dpo/beta": 0.10513152182102203,
"fcm_dpo/delta": 0.029769858345389366,
"fcm_dpo/margin": 5.438445568084717,
"fcm_dpo/q_t": 0.38533106446266174,
"grad_norm": 16.376707077026367,
"learning_rate": 2.7639509632351927e-07,
"logits/chosen": 1.825488805770874,
"logits/rejected": 1.71343994140625,
"logps/chosen": -73.80440521240234,
"logps/ref_chosen": -75.79296875,
"logps/ref_rejected": -94.34156799316406,
"logps/rejected": -97.79145812988281,
"loss": 1.1054,
"margin_dpo/margin_mean": 5.438445568084717,
"margin_dpo/margin_std": 9.440225601196289,
"step": 345
},
{
"epoch": 0.5230536659108088,
"fcm_dpo/beta": 0.10529479384422302,
"fcm_dpo/delta": -0.06721366941928864,
"fcm_dpo/margin": 6.291139125823975,
"fcm_dpo/q_t": 0.37077468633651733,
"grad_norm": 17.804290771484375,
"learning_rate": 2.7507990434420123e-07,
"logits/chosen": 1.8073234558105469,
"logits/rejected": 1.6271549463272095,
"logps/chosen": -70.38911437988281,
"logps/ref_chosen": -72.26289367675781,
"logps/ref_rejected": -106.36925506591797,
"logps/rejected": -110.78662109375,
"loss": 1.0943,
"margin_dpo/margin_mean": 6.291138648986816,
"margin_dpo/margin_std": 10.537273406982422,
"step": 346
},
{
"epoch": 0.5245653817082389,
"fcm_dpo/beta": 0.10580353438854218,
"fcm_dpo/delta": 0.07595658302307129,
"fcm_dpo/margin": 4.986888408660889,
"fcm_dpo/q_t": 0.3997945785522461,
"grad_norm": 19.979366302490234,
"learning_rate": 2.737640108260456e-07,
"logits/chosen": 1.9318368434906006,
"logits/rejected": 1.8235411643981934,
"logps/chosen": -70.96662902832031,
"logps/ref_chosen": -71.19871520996094,
"logps/ref_rejected": -91.543212890625,
"logps/rejected": -96.29800415039062,
"loss": 1.1564,
"margin_dpo/margin_mean": 4.986888408660889,
"margin_dpo/margin_std": 9.96225643157959,
"step": 347
},
{
"epoch": 0.5260770975056689,
"fcm_dpo/beta": 0.10394434630870819,
"fcm_dpo/delta": -0.10433568060398102,
"fcm_dpo/margin": 6.701659202575684,
"fcm_dpo/q_t": 0.3716875910758972,
"grad_norm": 16.589065551757812,
"learning_rate": 2.724474525774229e-07,
"logits/chosen": 1.8227729797363281,
"logits/rejected": 1.7435760498046875,
"logps/chosen": -68.37228393554688,
"logps/ref_chosen": -69.95603942871094,
"logps/ref_rejected": -83.64309692382812,
"logps/rejected": -88.76100158691406,
"loss": 1.0329,
"margin_dpo/margin_mean": 6.701659202575684,
"margin_dpo/margin_std": 10.39244270324707,
"step": 348
},
{
"epoch": 0.527588813303099,
"fcm_dpo/beta": 0.10336841642856598,
"fcm_dpo/delta": -0.08035076409578323,
"fcm_dpo/margin": 6.521647930145264,
"fcm_dpo/q_t": 0.36513036489486694,
"grad_norm": 17.546951293945312,
"learning_rate": 2.711302664252973e-07,
"logits/chosen": 1.61627197265625,
"logits/rejected": 1.469820261001587,
"logps/chosen": -68.54325866699219,
"logps/ref_chosen": -70.71857452392578,
"logps/ref_rejected": -99.93263244628906,
"logps/rejected": -104.27896118164062,
"loss": 1.0386,
"margin_dpo/margin_mean": 6.521646976470947,
"margin_dpo/margin_std": 9.721222877502441,
"step": 349
},
{
"epoch": 0.5291005291005291,
"fcm_dpo/beta": 0.09830678999423981,
"fcm_dpo/delta": -0.19679048657417297,
"fcm_dpo/margin": 7.92108678817749,
"fcm_dpo/q_t": 0.34431931376457214,
"grad_norm": 15.19959831237793,
"learning_rate": 2.698124892141971e-07,
"logits/chosen": 1.451259732246399,
"logits/rejected": 1.3303101062774658,
"logps/chosen": -76.7044906616211,
"logps/ref_chosen": -78.16873168945312,
"logps/ref_rejected": -104.84308624267578,
"logps/rejected": -111.29993438720703,
"loss": 0.9326,
"margin_dpo/margin_mean": 7.921087265014648,
"margin_dpo/margin_std": 9.488693237304688,
"step": 350
},
{
"epoch": 0.5306122448979592,
"fcm_dpo/beta": 0.09811578691005707,
"fcm_dpo/delta": 0.035826023668050766,
"fcm_dpo/margin": 5.767122268676758,
"fcm_dpo/q_t": 0.37812340259552,
"grad_norm": 16.31528091430664,
"learning_rate": 2.6849415780518357e-07,
"logits/chosen": 1.5375101566314697,
"logits/rejected": 1.3594462871551514,
"logps/chosen": -70.99044799804688,
"logps/ref_chosen": -71.79151916503906,
"logps/ref_rejected": -97.04634094238281,
"logps/rejected": -102.01239013671875,
"loss": 1.1137,
"margin_dpo/margin_mean": 5.767122268676758,
"margin_dpo/margin_std": 9.835864067077637,
"step": 351
},
{
"epoch": 0.5321239606953893,
"fcm_dpo/beta": 0.09900518506765366,
"fcm_dpo/delta": 0.06033958122134209,
"fcm_dpo/margin": 5.478187084197998,
"fcm_dpo/q_t": 0.39339566230773926,
"grad_norm": 18.448850631713867,
"learning_rate": 2.6717530907482024e-07,
"logits/chosen": 1.4493322372436523,
"logits/rejected": 1.3444267511367798,
"logps/chosen": -80.5949478149414,
"logps/ref_chosen": -80.86544799804688,
"logps/ref_rejected": -102.02129364013672,
"logps/rejected": -107.2289810180664,
"loss": 1.0991,
"margin_dpo/margin_mean": 5.478187084197998,
"margin_dpo/margin_std": 9.334031105041504,
"step": 352
},
{
"epoch": 0.5336356764928194,
"fcm_dpo/beta": 0.10044597089290619,
"fcm_dpo/delta": 0.007823506370186806,
"fcm_dpo/margin": 5.897891521453857,
"fcm_dpo/q_t": 0.3776635229587555,
"grad_norm": 17.52521514892578,
"learning_rate": 2.658559799141411e-07,
"logits/chosen": 1.4600498676300049,
"logits/rejected": 1.461470365524292,
"logps/chosen": -82.79106140136719,
"logps/ref_chosen": -84.77235412597656,
"logps/ref_rejected": -86.77130889892578,
"logps/rejected": -90.68791198730469,
"loss": 1.1012,
"margin_dpo/margin_mean": 5.897891998291016,
"margin_dpo/margin_std": 10.14565372467041,
"step": 353
},
{
"epoch": 0.5351473922902494,
"fcm_dpo/beta": 0.09875574707984924,
"fcm_dpo/delta": -0.056699298322200775,
"fcm_dpo/margin": 6.602656364440918,
"fcm_dpo/q_t": 0.36554601788520813,
"grad_norm": 16.898591995239258,
"learning_rate": 2.6453620722761895e-07,
"logits/chosen": 1.4595623016357422,
"logits/rejected": 1.1693522930145264,
"logps/chosen": -52.2856559753418,
"logps/ref_chosen": -54.33562088012695,
"logps/ref_rejected": -92.4120101928711,
"logps/rejected": -96.9646987915039,
"loss": 1.0463,
"margin_dpo/margin_mean": 6.602656364440918,
"margin_dpo/margin_std": 9.901817321777344,
"step": 354
},
{
"epoch": 0.5366591080876795,
"fcm_dpo/beta": 0.09814473986625671,
"fcm_dpo/delta": -0.07233710587024689,
"fcm_dpo/margin": 6.801861763000488,
"fcm_dpo/q_t": 0.3676859140396118,
"grad_norm": 16.300806045532227,
"learning_rate": 2.632160279321328e-07,
"logits/chosen": 1.657884955406189,
"logits/rejected": 1.3802344799041748,
"logps/chosen": -60.84781265258789,
"logps/ref_chosen": -61.8388671875,
"logps/ref_rejected": -98.65571594238281,
"logps/rejected": -104.46652221679688,
"loss": 1.0267,
"margin_dpo/margin_mean": 6.801861763000488,
"margin_dpo/margin_std": 9.98470687866211,
"step": 355
},
{
"epoch": 0.5381708238851096,
"fcm_dpo/beta": 0.09741011261940002,
"fcm_dpo/delta": 0.013626519590616226,
"fcm_dpo/margin": 6.024990081787109,
"fcm_dpo/q_t": 0.3817838132381439,
"grad_norm": 18.076440811157227,
"learning_rate": 2.618954789559356e-07,
"logits/chosen": 1.8296735286712646,
"logits/rejected": 1.6565245389938354,
"logps/chosen": -61.38618850708008,
"logps/ref_chosen": -63.92546463012695,
"logps/ref_rejected": -89.682861328125,
"logps/rejected": -93.16857147216797,
"loss": 1.1492,
"margin_dpo/margin_mean": 6.024989128112793,
"margin_dpo/margin_std": 11.450614929199219,
"step": 356
},
{
"epoch": 0.5396825396825397,
"fcm_dpo/beta": 0.097140371799469,
"fcm_dpo/delta": 0.04706104099750519,
"fcm_dpo/margin": 5.682187557220459,
"fcm_dpo/q_t": 0.38888034224510193,
"grad_norm": 17.230754852294922,
"learning_rate": 2.6057459723762076e-07,
"logits/chosen": 1.6313337087631226,
"logits/rejected": 1.510791301727295,
"logps/chosen": -80.17047882080078,
"logps/ref_chosen": -81.07588958740234,
"logps/ref_rejected": -85.06967163085938,
"logps/rejected": -89.846435546875,
"loss": 1.122,
"margin_dpo/margin_mean": 5.682187557220459,
"margin_dpo/margin_std": 9.793578147888184,
"step": 357
},
{
"epoch": 0.5411942554799698,
"fcm_dpo/beta": 0.09706053137779236,
"fcm_dpo/delta": -0.14514653384685516,
"fcm_dpo/margin": 7.569679260253906,
"fcm_dpo/q_t": 0.35118916630744934,
"grad_norm": 16.265806198120117,
"learning_rate": 2.5925341972508954e-07,
"logits/chosen": 1.5452799797058105,
"logits/rejected": 1.5542395114898682,
"logps/chosen": -82.51944732666016,
"logps/ref_chosen": -84.09109497070312,
"logps/ref_rejected": -85.07244873046875,
"logps/rejected": -91.07048034667969,
"loss": 0.9672,
"margin_dpo/margin_mean": 7.569679260253906,
"margin_dpo/margin_std": 9.713032722473145,
"step": 358
},
{
"epoch": 0.5427059712773998,
"fcm_dpo/beta": 0.09953833371400833,
"fcm_dpo/delta": 0.184108167886734,
"fcm_dpo/margin": 4.224143028259277,
"fcm_dpo/q_t": 0.4165111482143402,
"grad_norm": 21.359352111816406,
"learning_rate": 2.579319833745169e-07,
"logits/chosen": 1.290248155593872,
"logits/rejected": 1.2457648515701294,
"logps/chosen": -80.081298828125,
"logps/ref_chosen": -80.7490234375,
"logps/ref_rejected": -94.92911529541016,
"logps/rejected": -98.48553466796875,
"loss": 1.2256,
"margin_dpo/margin_mean": 4.224142551422119,
"margin_dpo/margin_std": 9.785512924194336,
"step": 359
},
{
"epoch": 0.54421768707483,
"fcm_dpo/beta": 0.09920628368854523,
"fcm_dpo/delta": -0.02150268852710724,
"fcm_dpo/margin": 6.251132965087891,
"fcm_dpo/q_t": 0.38255369663238525,
"grad_norm": 16.852054595947266,
"learning_rate": 2.5661032514931834e-07,
"logits/chosen": 1.3395185470581055,
"logits/rejected": 1.1418027877807617,
"logps/chosen": -78.16912078857422,
"logps/ref_chosen": -78.38681030273438,
"logps/ref_rejected": -109.68933868408203,
"logps/rejected": -115.72278594970703,
"loss": 1.0573,
"margin_dpo/margin_mean": 6.251132011413574,
"margin_dpo/margin_std": 10.023842811584473,
"step": 360
},
{
"epoch": 0.54572940287226,
"fcm_dpo/beta": 0.09724115580320358,
"fcm_dpo/delta": -0.11813297867774963,
"fcm_dpo/margin": 7.2958984375,
"fcm_dpo/q_t": 0.3593568801879883,
"grad_norm": 16.29405975341797,
"learning_rate": 2.552884820191154e-07,
"logits/chosen": 1.7258775234222412,
"logits/rejected": 1.5785211324691772,
"logps/chosen": -73.3158950805664,
"logps/ref_chosen": -73.9055404663086,
"logps/ref_rejected": -89.8489990234375,
"logps/rejected": -96.55525207519531,
"loss": 0.9985,
"margin_dpo/margin_mean": 7.2958984375,
"margin_dpo/margin_std": 10.358057022094727,
"step": 361
},
{
"epoch": 0.54724111866969,
"fcm_dpo/beta": 0.09884364902973175,
"fcm_dpo/delta": 0.03309012949466705,
"fcm_dpo/margin": 5.702146530151367,
"fcm_dpo/q_t": 0.38847479224205017,
"grad_norm": 18.959983825683594,
"learning_rate": 2.53966490958702e-07,
"logits/chosen": 1.7043962478637695,
"logits/rejected": 1.397092342376709,
"logps/chosen": -83.36479187011719,
"logps/ref_chosen": -82.32565307617188,
"logps/ref_rejected": -123.14100646972656,
"logps/rejected": -129.88229370117188,
"loss": 1.122,
"margin_dpo/margin_mean": 5.702146530151367,
"margin_dpo/margin_std": 10.04395866394043,
"step": 362
},
{
"epoch": 0.5487528344671202,
"fcm_dpo/beta": 0.09623068571090698,
"fcm_dpo/delta": -0.08842341601848602,
"fcm_dpo/margin": 7.091344833374023,
"fcm_dpo/q_t": 0.35770153999328613,
"grad_norm": 18.15400505065918,
"learning_rate": 2.526443889470099e-07,
"logits/chosen": 1.6108663082122803,
"logits/rejected": 1.2962677478790283,
"logps/chosen": -67.00276184082031,
"logps/ref_chosen": -66.05493927001953,
"logps/ref_rejected": -106.79598999023438,
"logps/rejected": -114.83515167236328,
"loss": 0.9693,
"margin_dpo/margin_mean": 7.091343879699707,
"margin_dpo/margin_std": 8.933826446533203,
"step": 363
},
{
"epoch": 0.5502645502645502,
"fcm_dpo/beta": 0.09286511689424515,
"fcm_dpo/delta": -0.17400389909744263,
"fcm_dpo/margin": 8.17833137512207,
"fcm_dpo/q_t": 0.3556019067764282,
"grad_norm": 14.506973266601562,
"learning_rate": 2.513222129660744e-07,
"logits/chosen": 1.6289961338043213,
"logits/rejected": 1.4720258712768555,
"logps/chosen": -75.12677001953125,
"logps/ref_chosen": -76.38365173339844,
"logps/ref_rejected": -100.22221374511719,
"logps/rejected": -107.14366149902344,
"loss": 1.0085,
"margin_dpo/margin_mean": 8.17833137512207,
"margin_dpo/margin_std": 11.904781341552734,
"step": 364
},
{
"epoch": 0.5517762660619804,
"fcm_dpo/beta": 0.09245184063911438,
"fcm_dpo/delta": -0.006460797972977161,
"fcm_dpo/margin": 6.555495262145996,
"fcm_dpo/q_t": 0.37094324827194214,
"grad_norm": 16.775344848632812,
"learning_rate": 2.5e-07,
"logits/chosen": 1.6948661804199219,
"logits/rejected": 1.6915894746780396,
"logps/chosen": -79.36397552490234,
"logps/ref_chosen": -81.83399963378906,
"logps/ref_rejected": -89.06932830810547,
"logps/rejected": -93.15480041503906,
"loss": 1.0072,
"margin_dpo/margin_mean": 6.555495262145996,
"margin_dpo/margin_std": 8.44549560546875,
"step": 365
},
{
"epoch": 0.5532879818594104,
"fcm_dpo/beta": 0.09339077025651932,
"fcm_dpo/delta": 0.05166340246796608,
"fcm_dpo/margin": 5.897998332977295,
"fcm_dpo/q_t": 0.3923712968826294,
"grad_norm": 17.448740005493164,
"learning_rate": 2.486777870339255e-07,
"logits/chosen": 1.3756999969482422,
"logits/rejected": 1.3472323417663574,
"logps/chosen": -70.53245544433594,
"logps/ref_chosen": -72.03398895263672,
"logps/ref_rejected": -83.65354919433594,
"logps/rejected": -88.05001068115234,
"loss": 1.1294,
"margin_dpo/margin_mean": 5.897997856140137,
"margin_dpo/margin_std": 10.893966674804688,
"step": 366
},
{
"epoch": 0.5547996976568406,
"fcm_dpo/beta": 0.09498877823352814,
"fcm_dpo/delta": 0.12418322265148163,
"fcm_dpo/margin": 5.073179721832275,
"fcm_dpo/q_t": 0.3966239094734192,
"grad_norm": 17.194896697998047,
"learning_rate": 2.4735561105299014e-07,
"logits/chosen": 1.2562966346740723,
"logits/rejected": 1.0315181016921997,
"logps/chosen": -72.96987915039062,
"logps/ref_chosen": -72.39828491210938,
"logps/ref_rejected": -95.58364868164062,
"logps/rejected": -101.22842407226562,
"loss": 1.1257,
"margin_dpo/margin_mean": 5.073179244995117,
"margin_dpo/margin_std": 9.012093544006348,
"step": 367
},
{
"epoch": 0.5563114134542706,
"fcm_dpo/beta": 0.09633514285087585,
"fcm_dpo/delta": 0.05619820952415466,
"fcm_dpo/margin": 5.677167892456055,
"fcm_dpo/q_t": 0.3904702365398407,
"grad_norm": 16.966304779052734,
"learning_rate": 2.46033509041298e-07,
"logits/chosen": 1.3356876373291016,
"logits/rejected": 1.3284211158752441,
"logps/chosen": -92.12042236328125,
"logps/ref_chosen": -90.12812042236328,
"logps/ref_rejected": -91.6636962890625,
"logps/rejected": -99.33316040039062,
"loss": 1.1177,
"margin_dpo/margin_mean": 5.677168369293213,
"margin_dpo/margin_std": 10.074119567871094,
"step": 368
},
{
"epoch": 0.5578231292517006,
"fcm_dpo/beta": 0.09858047962188721,
"fcm_dpo/delta": 0.10922784358263016,
"fcm_dpo/margin": 5.029770851135254,
"fcm_dpo/q_t": 0.40121111273765564,
"grad_norm": 20.732187271118164,
"learning_rate": 2.447115179808846e-07,
"logits/chosen": 1.6115643978118896,
"logits/rejected": 1.5338950157165527,
"logps/chosen": -71.41743469238281,
"logps/ref_chosen": -71.29417419433594,
"logps/ref_rejected": -99.03875732421875,
"logps/rejected": -104.19178771972656,
"loss": 1.1616,
"margin_dpo/margin_mean": 5.029770851135254,
"margin_dpo/margin_std": 10.044649124145508,
"step": 369
},
{
"epoch": 0.5593348450491308,
"fcm_dpo/beta": 0.09525743126869202,
"fcm_dpo/delta": -0.20872148871421814,
"fcm_dpo/margin": 8.294317245483398,
"fcm_dpo/q_t": 0.34550201892852783,
"grad_norm": 17.368907928466797,
"learning_rate": 2.4338967485068164e-07,
"logits/chosen": 1.8338669538497925,
"logits/rejected": 1.706188678741455,
"logps/chosen": -69.02984619140625,
"logps/ref_chosen": -69.14627075195312,
"logps/ref_rejected": -93.58651733398438,
"logps/rejected": -101.764404296875,
"loss": 1.0024,
"margin_dpo/margin_mean": 8.294317245483398,
"margin_dpo/margin_std": 11.767637252807617,
"step": 370
},
{
"epoch": 0.5608465608465608,
"fcm_dpo/beta": 0.0957072526216507,
"fcm_dpo/delta": -0.02265823632478714,
"fcm_dpo/margin": 6.463840484619141,
"fcm_dpo/q_t": 0.3787399232387543,
"grad_norm": 23.442960739135742,
"learning_rate": 2.420680166254831e-07,
"logits/chosen": 1.911960482597351,
"logits/rejected": 1.8716447353363037,
"logps/chosen": -66.00799560546875,
"logps/ref_chosen": -65.76728820800781,
"logps/ref_rejected": -79.9320068359375,
"logps/rejected": -86.63655853271484,
"loss": 1.1154,
"margin_dpo/margin_mean": 6.463840484619141,
"margin_dpo/margin_std": 11.249137878417969,
"step": 371
},
{
"epoch": 0.562358276643991,
"fcm_dpo/beta": 0.09300929307937622,
"fcm_dpo/delta": -0.15263891220092773,
"fcm_dpo/margin": 5.775787353515625,
"fcm_dpo/q_t": 0.3951214849948883,
"grad_norm": 16.215530395507812,
"learning_rate": 2.4074658027491044e-07,
"logits/chosen": 1.5042600631713867,
"logits/rejected": 1.3156919479370117,
"logps/chosen": -69.15006256103516,
"logps/ref_chosen": -69.97252655029297,
"logps/ref_rejected": -92.38316345214844,
"logps/rejected": -97.33649444580078,
"loss": 1.1919,
"margin_dpo/margin_mean": 5.775787353515625,
"margin_dpo/margin_std": 11.504581451416016,
"step": 372
},
{
"epoch": 0.563869992441421,
"fcm_dpo/beta": 0.09124951809644699,
"fcm_dpo/delta": -0.022103700786828995,
"fcm_dpo/margin": 6.802433013916016,
"fcm_dpo/q_t": 0.38105976581573486,
"grad_norm": 18.401546478271484,
"learning_rate": 2.394254027623792e-07,
"logits/chosen": 1.4205693006515503,
"logits/rejected": 1.211039423942566,
"logps/chosen": -81.23674011230469,
"logps/ref_chosen": -79.34700012207031,
"logps/ref_rejected": -95.69737243652344,
"logps/rejected": -104.3895492553711,
"loss": 1.0933,
"margin_dpo/margin_mean": 6.802433013916016,
"margin_dpo/margin_std": 11.771400451660156,
"step": 373
},
{
"epoch": 0.5653817082388511,
"fcm_dpo/beta": 0.08851991593837738,
"fcm_dpo/delta": -0.20605599880218506,
"fcm_dpo/margin": 8.923542022705078,
"fcm_dpo/q_t": 0.3382510542869568,
"grad_norm": 16.197402954101562,
"learning_rate": 2.381045210440644e-07,
"logits/chosen": 1.2998840808868408,
"logits/rejected": 1.2736904621124268,
"logps/chosen": -94.04849243164062,
"logps/ref_chosen": -93.45108032226562,
"logps/ref_rejected": -93.575927734375,
"logps/rejected": -103.09687805175781,
"loss": 0.9287,
"margin_dpo/margin_mean": 8.923542022705078,
"margin_dpo/margin_std": 10.628515243530273,
"step": 374
},
{
"epoch": 0.5668934240362812,
"fcm_dpo/beta": 0.08819793164730072,
"fcm_dpo/delta": 0.06437498331069946,
"fcm_dpo/margin": 6.112876892089844,
"fcm_dpo/q_t": 0.3876236081123352,
"grad_norm": 17.605037689208984,
"learning_rate": 2.3678397206786715e-07,
"logits/chosen": 1.652343988418579,
"logits/rejected": 1.5740084648132324,
"logps/chosen": -75.57273864746094,
"logps/ref_chosen": -77.37177276611328,
"logps/ref_rejected": -98.59054565429688,
"logps/rejected": -102.90438842773438,
"loss": 1.1058,
"margin_dpo/margin_mean": 6.112877368927002,
"margin_dpo/margin_std": 10.367046356201172,
"step": 375
},
{
"epoch": 0.5684051398337112,
"fcm_dpo/beta": 0.08656222373247147,
"fcm_dpo/delta": -0.13484880328178406,
"fcm_dpo/margin": 8.36793041229248,
"fcm_dpo/q_t": 0.35931164026260376,
"grad_norm": 13.579909324645996,
"learning_rate": 2.3546379277238103e-07,
"logits/chosen": 1.4965224266052246,
"logits/rejected": 1.3083943128585815,
"logps/chosen": -68.63471984863281,
"logps/ref_chosen": -68.99790954589844,
"logps/ref_rejected": -90.37117004394531,
"logps/rejected": -98.37591552734375,
"loss": 1.0212,
"margin_dpo/margin_mean": 8.367931365966797,
"margin_dpo/margin_std": 12.326183319091797,
"step": 376
},
{
"epoch": 0.5699168556311414,
"fcm_dpo/beta": 0.08805879950523376,
"fcm_dpo/delta": 0.11377710849046707,
"fcm_dpo/margin": 5.578058242797852,
"fcm_dpo/q_t": 0.39941370487213135,
"grad_norm": 15.439395904541016,
"learning_rate": 2.3414402008585886e-07,
"logits/chosen": 1.5486724376678467,
"logits/rejected": 1.5061874389648438,
"logps/chosen": -64.6747817993164,
"logps/ref_chosen": -64.22705841064453,
"logps/ref_rejected": -73.10292053222656,
"logps/rejected": -79.12869262695312,
"loss": 1.1203,
"margin_dpo/margin_mean": 5.578058242797852,
"margin_dpo/margin_std": 9.869219779968262,
"step": 377
},
{
"epoch": 0.5714285714285714,
"fcm_dpo/beta": 0.09075718373060226,
"fcm_dpo/delta": 0.1389361023902893,
"fcm_dpo/margin": 5.1299543380737305,
"fcm_dpo/q_t": 0.403687059879303,
"grad_norm": 17.890914916992188,
"learning_rate": 2.3282469092517977e-07,
"logits/chosen": 1.5328569412231445,
"logits/rejected": 1.4390175342559814,
"logps/chosen": -77.28669738769531,
"logps/ref_chosen": -76.90864562988281,
"logps/ref_rejected": -90.53460693359375,
"logps/rejected": -96.0426025390625,
"loss": 1.1606,
"margin_dpo/margin_mean": 5.129953384399414,
"margin_dpo/margin_std": 10.130236625671387,
"step": 378
},
{
"epoch": 0.5729402872260015,
"fcm_dpo/beta": 0.09023825079202652,
"fcm_dpo/delta": -0.05217514559626579,
"fcm_dpo/margin": 7.189953327178955,
"fcm_dpo/q_t": 0.36802151799201965,
"grad_norm": 20.274019241333008,
"learning_rate": 2.3150584219481643e-07,
"logits/chosen": 1.6543352603912354,
"logits/rejected": 1.4839537143707275,
"logps/chosen": -90.58717346191406,
"logps/ref_chosen": -91.2371597290039,
"logps/ref_rejected": -120.1969985961914,
"logps/rejected": -126.7369613647461,
"loss": 1.0244,
"margin_dpo/margin_mean": 7.189952850341797,
"margin_dpo/margin_std": 10.472833633422852,
"step": 379
},
{
"epoch": 0.5744520030234316,
"fcm_dpo/beta": 0.08770506083965302,
"fcm_dpo/delta": -0.15575076639652252,
"fcm_dpo/margin": 8.483596801757812,
"fcm_dpo/q_t": 0.34348416328430176,
"grad_norm": 14.336956024169922,
"learning_rate": 2.3018751078580283e-07,
"logits/chosen": 1.458883285522461,
"logits/rejected": 1.3730969429016113,
"logps/chosen": -75.30821228027344,
"logps/ref_chosen": -77.78315734863281,
"logps/ref_rejected": -92.56083679199219,
"logps/rejected": -98.56948852539062,
"loss": 0.996,
"margin_dpo/margin_mean": 8.483596801757812,
"margin_dpo/margin_std": 11.687616348266602,
"step": 380
},
{
"epoch": 0.5759637188208617,
"fcm_dpo/beta": 0.09002942591905594,
"fcm_dpo/delta": 0.15056607127189636,
"fcm_dpo/margin": 5.02277135848999,
"fcm_dpo/q_t": 0.4095456898212433,
"grad_norm": 17.64664077758789,
"learning_rate": 2.288697335747027e-07,
"logits/chosen": 1.3801250457763672,
"logits/rejected": 1.3382147550582886,
"logps/chosen": -75.73654174804688,
"logps/ref_chosen": -75.28189086914062,
"logps/ref_rejected": -81.1995849609375,
"logps/rejected": -86.67700958251953,
"loss": 1.1939,
"margin_dpo/margin_mean": 5.022771835327148,
"margin_dpo/margin_std": 10.672150611877441,
"step": 381
},
{
"epoch": 0.5774754346182918,
"fcm_dpo/beta": 0.09176512807607651,
"fcm_dpo/delta": 0.07397836446762085,
"fcm_dpo/margin": 5.737614631652832,
"fcm_dpo/q_t": 0.39556747674942017,
"grad_norm": 15.80502986907959,
"learning_rate": 2.2755254742257706e-07,
"logits/chosen": 1.4859418869018555,
"logits/rejected": 1.3694580793380737,
"logps/chosen": -79.2926025390625,
"logps/ref_chosen": -78.74870300292969,
"logps/ref_rejected": -99.77484130859375,
"logps/rejected": -106.05636596679688,
"loss": 1.0977,
"margin_dpo/margin_mean": 5.73761510848999,
"margin_dpo/margin_std": 9.539802551269531,
"step": 382
},
{
"epoch": 0.5789871504157218,
"fcm_dpo/beta": 0.09031803905963898,
"fcm_dpo/delta": -0.04464414715766907,
"fcm_dpo/margin": 7.104597568511963,
"fcm_dpo/q_t": 0.37682849168777466,
"grad_norm": 20.42765235900879,
"learning_rate": 2.2623598917395436e-07,
"logits/chosen": 1.2135952711105347,
"logits/rejected": 1.3143563270568848,
"logps/chosen": -95.2799301147461,
"logps/ref_chosen": -95.92772674560547,
"logps/ref_rejected": -92.13604736328125,
"logps/rejected": -98.59284973144531,
"loss": 1.079,
"margin_dpo/margin_mean": 7.104598045349121,
"margin_dpo/margin_std": 11.770940780639648,
"step": 383
},
{
"epoch": 0.5804988662131519,
"fcm_dpo/beta": 0.08988260477781296,
"fcm_dpo/delta": -0.024513855576515198,
"fcm_dpo/margin": 6.930983543395996,
"fcm_dpo/q_t": 0.37451279163360596,
"grad_norm": 16.834680557250977,
"learning_rate": 2.2492009565579875e-07,
"logits/chosen": 1.8998196125030518,
"logits/rejected": 1.8183977603912354,
"logps/chosen": -80.07945251464844,
"logps/ref_chosen": -80.208984375,
"logps/ref_rejected": -94.39380645751953,
"logps/rejected": -101.19525146484375,
"loss": 1.0493,
"margin_dpo/margin_mean": 6.930984020233154,
"margin_dpo/margin_std": 10.665237426757812,
"step": 384
},
{
"epoch": 0.582010582010582,
"fcm_dpo/beta": 0.08912694454193115,
"fcm_dpo/delta": -0.08408209681510925,
"fcm_dpo/margin": 7.610535621643066,
"fcm_dpo/q_t": 0.3625371754169464,
"grad_norm": 15.654504776000977,
"learning_rate": 2.2360490367648084e-07,
"logits/chosen": 1.2281148433685303,
"logits/rejected": 1.1218184232711792,
"logps/chosen": -85.64151000976562,
"logps/ref_chosen": -85.26632690429688,
"logps/ref_rejected": -102.1983413696289,
"logps/rejected": -110.1840591430664,
"loss": 0.9953,
"margin_dpo/margin_mean": 7.610535621643066,
"margin_dpo/margin_std": 10.202959060668945,
"step": 385
},
{
"epoch": 0.5835222978080121,
"fcm_dpo/beta": 0.0907883420586586,
"fcm_dpo/delta": 0.205733060836792,
"fcm_dpo/margin": 4.438615798950195,
"fcm_dpo/q_t": 0.41605353355407715,
"grad_norm": 19.5189208984375,
"learning_rate": 2.2229045002474724e-07,
"logits/chosen": 1.660266637802124,
"logits/rejected": 1.497814655303955,
"logps/chosen": -95.44474029541016,
"logps/ref_chosen": -93.19975280761719,
"logps/ref_rejected": -112.98831176757812,
"logps/rejected": -119.67190551757812,
"loss": 1.2382,
"margin_dpo/margin_mean": 4.4386162757873535,
"margin_dpo/margin_std": 10.870526313781738,
"step": 386
},
{
"epoch": 0.5850340136054422,
"fcm_dpo/beta": 0.09045910835266113,
"fcm_dpo/delta": -0.15201978385448456,
"fcm_dpo/margin": 8.19107437133789,
"fcm_dpo/q_t": 0.3515468239784241,
"grad_norm": 16.042306900024414,
"learning_rate": 2.209767714686924e-07,
"logits/chosen": 1.5781548023223877,
"logits/rejected": 1.3721168041229248,
"logps/chosen": -66.01132202148438,
"logps/ref_chosen": -66.32861328125,
"logps/ref_rejected": -100.56486511230469,
"logps/rejected": -108.43865203857422,
"loss": 0.9654,
"margin_dpo/margin_mean": 8.19107437133789,
"margin_dpo/margin_std": 10.515932083129883,
"step": 387
},
{
"epoch": 0.5865457294028723,
"fcm_dpo/beta": 0.09115570038557053,
"fcm_dpo/delta": 0.17942151427268982,
"fcm_dpo/margin": 4.701900482177734,
"fcm_dpo/q_t": 0.4133910834789276,
"grad_norm": 25.36449432373047,
"learning_rate": 2.1966390475472954e-07,
"logits/chosen": 1.275603175163269,
"logits/rejected": 1.2700649499893188,
"logps/chosen": -94.36763000488281,
"logps/ref_chosen": -92.95967864990234,
"logps/ref_rejected": -97.9437255859375,
"logps/rejected": -104.05357360839844,
"loss": 1.219,
"margin_dpo/margin_mean": 4.701900482177734,
"margin_dpo/margin_std": 10.651166915893555,
"step": 388
},
{
"epoch": 0.5880574452003023,
"fcm_dpo/beta": 0.09211251139640808,
"fcm_dpo/delta": -0.03098585084080696,
"fcm_dpo/margin": 6.8290510177612305,
"fcm_dpo/q_t": 0.3709871172904968,
"grad_norm": 15.271201133728027,
"learning_rate": 2.1835188660656265e-07,
"logits/chosen": 1.538404941558838,
"logits/rejected": 1.44305419921875,
"logps/chosen": -76.40469360351562,
"logps/ref_chosen": -76.89031982421875,
"logps/ref_rejected": -93.79212951660156,
"logps/rejected": -100.13555908203125,
"loss": 1.0445,
"margin_dpo/margin_mean": 6.829051971435547,
"margin_dpo/margin_std": 10.24068832397461,
"step": 389
},
{
"epoch": 0.5895691609977324,
"fcm_dpo/beta": 0.09146776795387268,
"fcm_dpo/delta": -0.032210350036621094,
"fcm_dpo/margin": 6.88944149017334,
"fcm_dpo/q_t": 0.37150269746780396,
"grad_norm": 33.26952362060547,
"learning_rate": 2.170407537241599e-07,
"logits/chosen": 1.6717298030853271,
"logits/rejected": 1.545634388923645,
"logps/chosen": -58.30984878540039,
"logps/ref_chosen": -61.058815002441406,
"logps/ref_rejected": -79.55152893066406,
"logps/rejected": -83.69200134277344,
"loss": 1.0366,
"margin_dpo/margin_mean": 6.889441967010498,
"margin_dpo/margin_std": 10.276315689086914,
"step": 390
},
{
"epoch": 0.5910808767951625,
"fcm_dpo/beta": 0.08865350484848022,
"fcm_dpo/delta": -0.2127716988325119,
"fcm_dpo/margin": 8.979472160339355,
"fcm_dpo/q_t": 0.34665796160697937,
"grad_norm": 14.729681015014648,
"learning_rate": 2.1573054278272636e-07,
"logits/chosen": 1.5793694257736206,
"logits/rejected": 1.4618054628372192,
"logps/chosen": -77.08914947509766,
"logps/ref_chosen": -78.60820770263672,
"logps/ref_rejected": -103.3367691040039,
"logps/rejected": -110.79718780517578,
"loss": 1.0036,
"margin_dpo/margin_mean": 8.979471206665039,
"margin_dpo/margin_std": 12.976020812988281,
"step": 391
},
{
"epoch": 0.5925925925925926,
"fcm_dpo/beta": 0.08490710705518723,
"fcm_dpo/delta": -0.2628289759159088,
"fcm_dpo/margin": 9.909902572631836,
"fcm_dpo/q_t": 0.33273494243621826,
"grad_norm": 13.956271171569824,
"learning_rate": 2.1442129043167873e-07,
"logits/chosen": 1.9578715562820435,
"logits/rejected": 1.8658185005187988,
"logps/chosen": -83.63600158691406,
"logps/ref_chosen": -86.99468994140625,
"logps/ref_rejected": -112.73616027832031,
"logps/rejected": -119.2873764038086,
"loss": 0.9194,
"margin_dpo/margin_mean": 9.90990161895752,
"margin_dpo/margin_std": 11.858428955078125,
"step": 392
},
{
"epoch": 0.5941043083900227,
"fcm_dpo/beta": 0.08286817371845245,
"fcm_dpo/delta": 0.029584839940071106,
"fcm_dpo/margin": 6.895089626312256,
"fcm_dpo/q_t": 0.3812785744667053,
"grad_norm": 14.21916675567627,
"learning_rate": 2.131130332936195e-07,
"logits/chosen": 1.4827511310577393,
"logits/rejected": 1.3603489398956299,
"logps/chosen": -70.90792083740234,
"logps/ref_chosen": -71.26398468017578,
"logps/ref_rejected": -88.99722290039062,
"logps/rejected": -95.5362548828125,
"loss": 1.0343,
"margin_dpo/margin_mean": 6.895089149475098,
"margin_dpo/margin_std": 9.561573028564453,
"step": 393
},
{
"epoch": 0.5956160241874527,
"fcm_dpo/beta": 0.08388794958591461,
"fcm_dpo/delta": 0.017447378486394882,
"fcm_dpo/margin": 6.955907344818115,
"fcm_dpo/q_t": 0.37385329604148865,
"grad_norm": 17.475746154785156,
"learning_rate": 2.1180580796331323e-07,
"logits/chosen": 1.7049648761749268,
"logits/rejected": 1.5989840030670166,
"logps/chosen": -76.82679748535156,
"logps/ref_chosen": -78.70564270019531,
"logps/ref_rejected": -87.01431274414062,
"logps/rejected": -92.09137725830078,
"loss": 1.0137,
"margin_dpo/margin_mean": 6.955907821655273,
"margin_dpo/margin_std": 9.090719223022461,
"step": 394
},
{
"epoch": 0.5971277399848829,
"fcm_dpo/beta": 0.08548291027545929,
"fcm_dpo/delta": 0.05162470042705536,
"fcm_dpo/margin": 6.411340236663818,
"fcm_dpo/q_t": 0.388310968875885,
"grad_norm": 15.08697509765625,
"learning_rate": 2.104996510066625e-07,
"logits/chosen": 1.6531175374984741,
"logits/rejected": 1.425588607788086,
"logps/chosen": -63.353946685791016,
"logps/ref_chosen": -65.30274963378906,
"logps/ref_rejected": -93.22492218017578,
"logps/rejected": -97.68746185302734,
"loss": 1.0842,
"margin_dpo/margin_mean": 6.41133975982666,
"margin_dpo/margin_std": 10.070281028747559,
"step": 395
},
{
"epoch": 0.5986394557823129,
"fcm_dpo/beta": 0.08400573581457138,
"fcm_dpo/delta": 0.023646876215934753,
"fcm_dpo/margin": 6.846286296844482,
"fcm_dpo/q_t": 0.372989296913147,
"grad_norm": 15.486177444458008,
"learning_rate": 2.0919459895968517e-07,
"logits/chosen": 1.5460519790649414,
"logits/rejected": 1.3561407327651978,
"logps/chosen": -65.57781982421875,
"logps/ref_chosen": -67.33502197265625,
"logps/ref_rejected": -98.8193359375,
"logps/rejected": -103.9084243774414,
"loss": 1.0156,
"margin_dpo/margin_mean": 6.846286296844482,
"margin_dpo/margin_std": 8.490182876586914,
"step": 396
},
{
"epoch": 0.600151171579743,
"fcm_dpo/beta": 0.08782678842544556,
"fcm_dpo/delta": 0.19592741131782532,
"fcm_dpo/margin": 4.687417984008789,
"fcm_dpo/q_t": 0.40758201479911804,
"grad_norm": 18.059585571289062,
"learning_rate": 2.078906883274924e-07,
"logits/chosen": 1.7242810726165771,
"logits/rejected": 1.6127660274505615,
"logps/chosen": -88.16705322265625,
"logps/ref_chosen": -89.6042251586914,
"logps/ref_rejected": -104.9779052734375,
"logps/rejected": -108.22815704345703,
"loss": 1.215,
"margin_dpo/margin_mean": 4.687417030334473,
"margin_dpo/margin_std": 10.656184196472168,
"step": 397
},
{
"epoch": 0.6016628873771731,
"fcm_dpo/beta": 0.08691433072090149,
"fcm_dpo/delta": -0.10858240723609924,
"fcm_dpo/margin": 8.061241149902344,
"fcm_dpo/q_t": 0.36168110370635986,
"grad_norm": 13.436088562011719,
"learning_rate": 2.065879555832674e-07,
"logits/chosen": 1.6729035377502441,
"logits/rejected": 1.5150872468948364,
"logps/chosen": -63.89373016357422,
"logps/ref_chosen": -66.43465423583984,
"logps/ref_rejected": -90.90376281738281,
"logps/rejected": -96.42407989501953,
"loss": 0.9976,
"margin_dpo/margin_mean": 8.061240196228027,
"margin_dpo/margin_std": 11.222570419311523,
"step": 398
},
{
"epoch": 0.6031746031746031,
"fcm_dpo/beta": 0.08543148636817932,
"fcm_dpo/delta": -0.05866962671279907,
"fcm_dpo/margin": 7.65556001663208,
"fcm_dpo/q_t": 0.36848974227905273,
"grad_norm": 16.3436222076416,
"learning_rate": 2.052864371672457e-07,
"logits/chosen": 1.4369208812713623,
"logits/rejected": 1.147277593612671,
"logps/chosen": -87.73809814453125,
"logps/ref_chosen": -87.22315979003906,
"logps/ref_rejected": -136.32411193847656,
"logps/rejected": -144.49459838867188,
"loss": 1.022,
"margin_dpo/margin_mean": 7.655560493469238,
"margin_dpo/margin_std": 11.066182136535645,
"step": 399
},
{
"epoch": 0.6046863189720333,
"fcm_dpo/beta": 0.08775937557220459,
"fcm_dpo/delta": 0.19517040252685547,
"fcm_dpo/margin": 4.70993709564209,
"fcm_dpo/q_t": 0.41906869411468506,
"grad_norm": 19.55310821533203,
"learning_rate": 2.0398616948569493e-07,
"logits/chosen": 1.7458031177520752,
"logits/rejected": 1.553027868270874,
"logps/chosen": -92.2566146850586,
"logps/ref_chosen": -91.1212158203125,
"logps/ref_rejected": -108.19235229492188,
"logps/rejected": -114.03768920898438,
"loss": 1.209,
"margin_dpo/margin_mean": 4.709938049316406,
"margin_dpo/margin_std": 10.708078384399414,
"step": 400
},
{
"epoch": 0.6046863189720333,
"eval_fcm_dpo/beta": 0.08902417868375778,
"eval_logits/chosen": 1.643101692199707,
"eval_logits/rejected": 1.5118398666381836,
"eval_logps/chosen": -85.37108612060547,
"eval_logps/ref_chosen": -86.90177917480469,
"eval_logps/ref_rejected": -96.69639587402344,
"eval_logps/rejected": -102.01863861083984,
"eval_loss": 0.5379721522331238,
"eval_margin_dpo/margin_mean": 6.852944850921631,
"eval_margin_dpo/margin_std": 11.100692749023438,
"eval_runtime": 42.277,
"eval_samples_per_second": 54.474,
"eval_steps_per_second": 1.703,
"step": 400
},
{
"epoch": 0.6061980347694633,
"fcm_dpo/beta": 0.08596399426460266,
"fcm_dpo/delta": -0.2335837483406067,
"fcm_dpo/margin": 9.474281311035156,
"fcm_dpo/q_t": 0.3320953845977783,
"grad_norm": 13.233601570129395,
"learning_rate": 2.0268718890989752e-07,
"logits/chosen": 1.7577645778656006,
"logits/rejected": 1.5829046964645386,
"logps/chosen": -64.07991027832031,
"logps/ref_chosen": -67.54151153564453,
"logps/ref_rejected": -98.06488800048828,
"logps/rejected": -104.07756042480469,
"loss": 0.9049,
"margin_dpo/margin_mean": 9.474281311035156,
"margin_dpo/margin_std": 10.675681114196777,
"step": 401
},
{
"epoch": 0.6077097505668935,
"fcm_dpo/beta": 0.08430798351764679,
"fcm_dpo/delta": -0.025104699656367302,
"fcm_dpo/margin": 7.391202926635742,
"fcm_dpo/q_t": 0.37311261892318726,
"grad_norm": 22.79948616027832,
"learning_rate": 2.013895317751323e-07,
"logits/chosen": 1.424646258354187,
"logits/rejected": 1.3981235027313232,
"logps/chosen": -74.81704711914062,
"logps/ref_chosen": -77.44487762451172,
"logps/ref_rejected": -83.1333236694336,
"logps/rejected": -87.89669799804688,
"loss": 1.0328,
"margin_dpo/margin_mean": 7.391202926635742,
"margin_dpo/margin_std": 10.843732833862305,
"step": 402
},
{
"epoch": 0.6092214663643235,
"fcm_dpo/beta": 0.08236850798130035,
"fcm_dpo/delta": -0.2032555490732193,
"fcm_dpo/margin": 9.562971115112305,
"fcm_dpo/q_t": 0.3458006978034973,
"grad_norm": 14.288395881652832,
"learning_rate": 2.0009323437965898e-07,
"logits/chosen": 1.732656478881836,
"logits/rejected": 1.5398776531219482,
"logps/chosen": -67.0589599609375,
"logps/ref_chosen": -68.8230972290039,
"logps/ref_rejected": -99.82356262207031,
"logps/rejected": -107.62240600585938,
"loss": 0.9865,
"margin_dpo/margin_mean": 9.562971115112305,
"margin_dpo/margin_std": 13.16672420501709,
"step": 403
},
{
"epoch": 0.6107331821617535,
"fcm_dpo/beta": 0.07909810543060303,
"fcm_dpo/delta": -0.09509045630693436,
"fcm_dpo/margin": 8.658452033996582,
"fcm_dpo/q_t": 0.36028921604156494,
"grad_norm": 14.61069393157959,
"learning_rate": 1.9879833298370237e-07,
"logits/chosen": 1.521719217300415,
"logits/rejected": 1.3546950817108154,
"logps/chosen": -77.83849334716797,
"logps/ref_chosen": -80.26783752441406,
"logps/ref_rejected": -111.60258483886719,
"logps/rejected": -117.8316879272461,
"loss": 0.9825,
"margin_dpo/margin_mean": 8.658452033996582,
"margin_dpo/margin_std": 10.987064361572266,
"step": 404
},
{
"epoch": 0.6122448979591837,
"fcm_dpo/beta": 0.0779149979352951,
"fcm_dpo/delta": -0.10941280424594879,
"fcm_dpo/margin": 6.381047248840332,
"fcm_dpo/q_t": 0.39759546518325806,
"grad_norm": 13.850225448608398,
"learning_rate": 1.975048638084379e-07,
"logits/chosen": 1.3373526334762573,
"logits/rejected": 1.2223901748657227,
"logps/chosen": -66.5140151977539,
"logps/ref_chosen": -68.31065368652344,
"logps/ref_rejected": -81.56044006347656,
"logps/rejected": -86.14483642578125,
"loss": 1.1024,
"margin_dpo/margin_mean": 6.381047248840332,
"margin_dpo/margin_std": 10.001663208007812,
"step": 405
},
{
"epoch": 0.6137566137566137,
"fcm_dpo/beta": 0.07638435065746307,
"fcm_dpo/delta": -0.11889545619487762,
"fcm_dpo/margin": 9.296536445617676,
"fcm_dpo/q_t": 0.3526379466056824,
"grad_norm": 13.350903511047363,
"learning_rate": 1.9621286303497914e-07,
"logits/chosen": 1.6963818073272705,
"logits/rejected": 1.3465378284454346,
"logps/chosen": -62.2177619934082,
"logps/ref_chosen": -64.86714935302734,
"logps/ref_rejected": -110.06051635742188,
"logps/rejected": -116.70765686035156,
"loss": 0.9653,
"margin_dpo/margin_mean": 9.296536445617676,
"margin_dpo/margin_std": 11.602973937988281,
"step": 406
},
{
"epoch": 0.6152683295540439,
"fcm_dpo/beta": 0.07776181399822235,
"fcm_dpo/delta": 0.11209922283887863,
"fcm_dpo/margin": 6.331610202789307,
"fcm_dpo/q_t": 0.3990859389305115,
"grad_norm": 18.031661987304688,
"learning_rate": 1.9492236680336483e-07,
"logits/chosen": 1.3154183626174927,
"logits/rejected": 1.1056073904037476,
"logps/chosen": -104.64439392089844,
"logps/ref_chosen": -102.01712799072266,
"logps/ref_rejected": -121.53548431396484,
"logps/rejected": -130.49435424804688,
"loss": 1.1031,
"margin_dpo/margin_mean": 6.331610679626465,
"margin_dpo/margin_std": 10.628255844116211,
"step": 407
},
{
"epoch": 0.6167800453514739,
"fcm_dpo/beta": 0.07716310024261475,
"fcm_dpo/delta": -0.11025048792362213,
"fcm_dpo/margin": 9.09909439086914,
"fcm_dpo/q_t": 0.35395896434783936,
"grad_norm": 12.185460090637207,
"learning_rate": 1.9363341121154895e-07,
"logits/chosen": 1.5756661891937256,
"logits/rejected": 1.3888509273529053,
"logps/chosen": -71.03925323486328,
"logps/ref_chosen": -72.77989959716797,
"logps/ref_rejected": -92.01815795898438,
"logps/rejected": -99.37660217285156,
"loss": 0.9483,
"margin_dpo/margin_mean": 9.09909439086914,
"margin_dpo/margin_std": 10.556812286376953,
"step": 408
},
{
"epoch": 0.618291761148904,
"fcm_dpo/beta": 0.07857310026884079,
"fcm_dpo/delta": 0.23402190208435059,
"fcm_dpo/margin": 4.776151657104492,
"fcm_dpo/q_t": 0.4284062385559082,
"grad_norm": 14.054645538330078,
"learning_rate": 1.9234603231438994e-07,
"logits/chosen": 1.5529547929763794,
"logits/rejected": 1.55470609664917,
"logps/chosen": -79.22383117675781,
"logps/ref_chosen": -77.7901611328125,
"logps/ref_rejected": -79.2997055053711,
"logps/rejected": -85.509521484375,
"loss": 1.2311,
"margin_dpo/margin_mean": 4.776151657104492,
"margin_dpo/margin_std": 11.638947486877441,
"step": 409
},
{
"epoch": 0.6198034769463341,
"fcm_dpo/beta": 0.07810753583908081,
"fcm_dpo/delta": -0.09311722218990326,
"fcm_dpo/margin": 8.771734237670898,
"fcm_dpo/q_t": 0.35872140526771545,
"grad_norm": 14.657498359680176,
"learning_rate": 1.9106026612264315e-07,
"logits/chosen": 1.351644515991211,
"logits/rejected": 1.2707946300506592,
"logps/chosen": -80.2009506225586,
"logps/ref_chosen": -80.35844421386719,
"logps/ref_rejected": -92.19056701660156,
"logps/rejected": -100.80480194091797,
"loss": 0.9973,
"margin_dpo/margin_mean": 8.771734237670898,
"margin_dpo/margin_std": 11.75565242767334,
"step": 410
},
{
"epoch": 0.6213151927437641,
"fcm_dpo/beta": 0.0773276686668396,
"fcm_dpo/delta": -0.09193338453769684,
"fcm_dpo/margin": 8.867551803588867,
"fcm_dpo/q_t": 0.36154991388320923,
"grad_norm": 22.89755630493164,
"learning_rate": 1.8977614860195296e-07,
"logits/chosen": 1.677168369293213,
"logits/rejected": 1.54649019241333,
"logps/chosen": -70.70728302001953,
"logps/ref_chosen": -70.72857666015625,
"logps/ref_rejected": -93.19204711914062,
"logps/rejected": -102.03831481933594,
"loss": 0.9948,
"margin_dpo/margin_mean": 8.867551803588867,
"margin_dpo/margin_std": 12.079263687133789,
"step": 411
},
{
"epoch": 0.6228269085411943,
"fcm_dpo/beta": 0.07649530470371246,
"fcm_dpo/delta": 0.018762707710266113,
"fcm_dpo/margin": 7.599701881408691,
"fcm_dpo/q_t": 0.381513774394989,
"grad_norm": 16.80459976196289,
"learning_rate": 1.8849371567184662e-07,
"logits/chosen": 1.7272131443023682,
"logits/rejected": 1.6005456447601318,
"logps/chosen": -74.96763610839844,
"logps/ref_chosen": -72.87568664550781,
"logps/ref_rejected": -88.21068572998047,
"logps/rejected": -97.90233612060547,
"loss": 1.0506,
"margin_dpo/margin_mean": 7.599700927734375,
"margin_dpo/margin_std": 11.350024223327637,
"step": 412
},
{
"epoch": 0.6243386243386243,
"fcm_dpo/beta": 0.07793605327606201,
"fcm_dpo/delta": 0.07105319201946259,
"fcm_dpo/margin": 6.835833549499512,
"fcm_dpo/q_t": 0.39478057622909546,
"grad_norm": 16.19317054748535,
"learning_rate": 1.872130032047302e-07,
"logits/chosen": 1.1888175010681152,
"logits/rejected": 1.079815149307251,
"logps/chosen": -87.0733642578125,
"logps/ref_chosen": -84.70051574707031,
"logps/ref_rejected": -92.06742095947266,
"logps/rejected": -101.27610778808594,
"loss": 1.1479,
"margin_dpo/margin_mean": 6.835833549499512,
"margin_dpo/margin_std": 13.243301391601562,
"step": 413
},
{
"epoch": 0.6258503401360545,
"fcm_dpo/beta": 0.07741403579711914,
"fcm_dpo/delta": -0.06040637195110321,
"fcm_dpo/margin": 8.477437019348145,
"fcm_dpo/q_t": 0.3683781325817108,
"grad_norm": 13.204545021057129,
"learning_rate": 1.8593404702488436e-07,
"logits/chosen": 1.5923712253570557,
"logits/rejected": 1.4576035737991333,
"logps/chosen": -73.57768249511719,
"logps/ref_chosen": -70.97660827636719,
"logps/ref_rejected": -92.90523529052734,
"logps/rejected": -103.9837417602539,
"loss": 1.0228,
"margin_dpo/margin_mean": 8.477436065673828,
"margin_dpo/margin_std": 12.279195785522461,
"step": 414
},
{
"epoch": 0.6273620559334845,
"fcm_dpo/beta": 0.07709582149982452,
"fcm_dpo/delta": -0.006522274576127529,
"fcm_dpo/margin": 7.860967636108398,
"fcm_dpo/q_t": 0.3789500594139099,
"grad_norm": 15.471349716186523,
"learning_rate": 1.846568829074628e-07,
"logits/chosen": 1.4484164714813232,
"logits/rejected": 1.361172080039978,
"logps/chosen": -74.24467468261719,
"logps/ref_chosen": -71.7189712524414,
"logps/ref_rejected": -74.54219818115234,
"logps/rejected": -84.92887878417969,
"loss": 1.1216,
"margin_dpo/margin_mean": 7.860968112945557,
"margin_dpo/margin_std": 14.171285629272461,
"step": 415
},
{
"epoch": 0.6288737717309146,
"fcm_dpo/beta": 0.0767393410205841,
"fcm_dpo/delta": -0.06185510754585266,
"fcm_dpo/margin": 5.816371440887451,
"fcm_dpo/q_t": 0.41204434633255005,
"grad_norm": 15.024466514587402,
"learning_rate": 1.8338154657749128e-07,
"logits/chosen": 1.430651307106018,
"logits/rejected": 1.2943285703659058,
"logps/chosen": -76.21292114257812,
"logps/ref_chosen": -72.88249206542969,
"logps/ref_rejected": -85.30693054199219,
"logps/rejected": -94.45372009277344,
"loss": 1.2034,
"margin_dpo/margin_mean": 5.816370964050293,
"margin_dpo/margin_std": 12.529787063598633,
"step": 416
},
{
"epoch": 0.6303854875283447,
"fcm_dpo/beta": 0.0747881531715393,
"fcm_dpo/delta": -0.18942071497440338,
"fcm_dpo/margin": 10.360841751098633,
"fcm_dpo/q_t": 0.3438907861709595,
"grad_norm": 13.735955238342285,
"learning_rate": 1.8210807370886849e-07,
"logits/chosen": 1.8037786483764648,
"logits/rejected": 1.6183760166168213,
"logps/chosen": -75.36857604980469,
"logps/ref_chosen": -72.49703216552734,
"logps/ref_rejected": -89.38966369628906,
"logps/rejected": -102.62205505371094,
"loss": 0.9679,
"margin_dpo/margin_mean": 10.360841751098633,
"margin_dpo/margin_std": 13.366008758544922,
"step": 417
},
{
"epoch": 0.6318972033257747,
"fcm_dpo/beta": 0.07399855554103851,
"fcm_dpo/delta": 0.03859926387667656,
"fcm_dpo/margin": 4.893869876861572,
"fcm_dpo/q_t": 0.42707228660583496,
"grad_norm": 16.023584365844727,
"learning_rate": 1.8083649992336825e-07,
"logits/chosen": 1.6475262641906738,
"logits/rejected": 1.6260570287704468,
"logps/chosen": -95.30635070800781,
"logps/ref_chosen": -89.70926666259766,
"logps/ref_rejected": -90.98756408691406,
"logps/rejected": -101.478515625,
"loss": 1.2382,
"margin_dpo/margin_mean": 4.893869400024414,
"margin_dpo/margin_std": 11.968032836914062,
"step": 418
},
{
"epoch": 0.6334089191232048,
"fcm_dpo/beta": 0.0715949535369873,
"fcm_dpo/delta": -0.1930314600467682,
"fcm_dpo/margin": 10.8507080078125,
"fcm_dpo/q_t": 0.3386869430541992,
"grad_norm": 12.689598083496094,
"learning_rate": 1.7956686078964255e-07,
"logits/chosen": 1.2596359252929688,
"logits/rejected": 1.0992696285247803,
"logps/chosen": -75.74531555175781,
"logps/ref_chosen": -75.652099609375,
"logps/ref_rejected": -91.0013427734375,
"logps/rejected": -101.94527435302734,
"loss": 0.9125,
"margin_dpo/margin_mean": 10.8507080078125,
"margin_dpo/margin_std": 12.108956336975098,
"step": 419
},
{
"epoch": 0.6349206349206349,
"fcm_dpo/beta": 0.07248981297016144,
"fcm_dpo/delta": 0.14235371351242065,
"fcm_dpo/margin": 6.4076995849609375,
"fcm_dpo/q_t": 0.4088389575481415,
"grad_norm": 14.2985258102417,
"learning_rate": 1.782991918222275e-07,
"logits/chosen": 1.4011223316192627,
"logits/rejected": 1.2798104286193848,
"logps/chosen": -77.19985961914062,
"logps/ref_chosen": -72.58027648925781,
"logps/ref_rejected": -79.90303802490234,
"logps/rejected": -90.93031311035156,
"loss": 1.2141,
"margin_dpo/margin_mean": 6.4076995849609375,
"margin_dpo/margin_std": 14.4735107421875,
"step": 420
},
{
"epoch": 0.636432350718065,
"fcm_dpo/beta": 0.0741676315665245,
"fcm_dpo/delta": 0.0948985144495964,
"fcm_dpo/margin": 6.876837253570557,
"fcm_dpo/q_t": 0.39845705032348633,
"grad_norm": 14.456829071044922,
"learning_rate": 1.7703352848054887e-07,
"logits/chosen": 1.5099852085113525,
"logits/rejected": 1.2740275859832764,
"logps/chosen": -82.48133087158203,
"logps/ref_chosen": -78.71546936035156,
"logps/ref_rejected": -90.82321166992188,
"logps/rejected": -101.46591186523438,
"loss": 1.2044,
"margin_dpo/margin_mean": 6.876836776733398,
"margin_dpo/margin_std": 14.906235694885254,
"step": 421
},
{
"epoch": 0.6379440665154951,
"fcm_dpo/beta": 0.0753115639090538,
"fcm_dpo/delta": 0.041089512407779694,
"fcm_dpo/margin": 7.448758125305176,
"fcm_dpo/q_t": 0.3887425363063812,
"grad_norm": 15.896506309509277,
"learning_rate": 1.7576990616793137e-07,
"logits/chosen": 1.6806402206420898,
"logits/rejected": 1.6230077743530273,
"logps/chosen": -89.55979919433594,
"logps/ref_chosen": -86.74519348144531,
"logps/ref_rejected": -94.02015686035156,
"logps/rejected": -104.28353881835938,
"loss": 1.0896,
"margin_dpo/margin_mean": 7.448757171630859,
"margin_dpo/margin_std": 12.532878875732422,
"step": 422
},
{
"epoch": 0.6394557823129252,
"fcm_dpo/beta": 0.07428386062383652,
"fcm_dpo/delta": -0.048137813806533813,
"fcm_dpo/margin": 8.670099258422852,
"fcm_dpo/q_t": 0.3703242540359497,
"grad_norm": 13.458715438842773,
"learning_rate": 1.745083602306071e-07,
"logits/chosen": 1.7160992622375488,
"logits/rejected": 1.5093460083007812,
"logps/chosen": -75.56707763671875,
"logps/ref_chosen": -72.02232360839844,
"logps/ref_rejected": -93.26976776123047,
"logps/rejected": -105.484619140625,
"loss": 1.0292,
"margin_dpo/margin_mean": 8.670099258422852,
"margin_dpo/margin_std": 12.686551094055176,
"step": 423
},
{
"epoch": 0.6409674981103552,
"fcm_dpo/beta": 0.07354743778705597,
"fcm_dpo/delta": -0.09381558746099472,
"fcm_dpo/margin": 9.346273422241211,
"fcm_dpo/q_t": 0.358426570892334,
"grad_norm": 14.17031192779541,
"learning_rate": 1.7324892595672804e-07,
"logits/chosen": 1.158247470855713,
"logits/rejected": 1.0918021202087402,
"logps/chosen": -72.14212799072266,
"logps/ref_chosen": -68.22148132324219,
"logps/ref_rejected": -94.12411499023438,
"logps/rejected": -107.39103698730469,
"loss": 0.9896,
"margin_dpo/margin_mean": 9.346274375915527,
"margin_dpo/margin_std": 12.390763282775879,
"step": 424
},
{
"epoch": 0.6424792139077853,
"fcm_dpo/beta": 0.07256484031677246,
"fcm_dpo/delta": -0.08450430631637573,
"fcm_dpo/margin": 9.353759765625,
"fcm_dpo/q_t": 0.3590080738067627,
"grad_norm": 13.15410041809082,
"learning_rate": 1.7199163857537824e-07,
"logits/chosen": 1.5787358283996582,
"logits/rejected": 1.5033378601074219,
"logps/chosen": -78.48164367675781,
"logps/ref_chosen": -75.90104675292969,
"logps/ref_rejected": -86.08673095703125,
"logps/rejected": -98.02108764648438,
"loss": 0.9676,
"margin_dpo/margin_mean": 9.353760719299316,
"margin_dpo/margin_std": 11.51694107055664,
"step": 425
},
{
"epoch": 0.6439909297052154,
"fcm_dpo/beta": 0.07429321110248566,
"fcm_dpo/delta": 0.23680397868156433,
"fcm_dpo/margin": 5.014549732208252,
"fcm_dpo/q_t": 0.43028032779693604,
"grad_norm": 19.14048957824707,
"learning_rate": 1.7073653325558828e-07,
"logits/chosen": 1.6519157886505127,
"logits/rejected": 1.637803316116333,
"logps/chosen": -95.85223388671875,
"logps/ref_chosen": -89.93118286132812,
"logps/ref_rejected": -91.04658508300781,
"logps/rejected": -101.98219299316406,
"loss": 1.2953,
"margin_dpo/margin_mean": 5.014549732208252,
"margin_dpo/margin_std": 14.537099838256836,
"step": 426
},
{
"epoch": 0.6455026455026455,
"fcm_dpo/beta": 0.07508181780576706,
"fcm_dpo/delta": -0.03407387062907219,
"fcm_dpo/margin": 8.416566848754883,
"fcm_dpo/q_t": 0.37357228994369507,
"grad_norm": 14.374457359313965,
"learning_rate": 1.6948364510535218e-07,
"logits/chosen": 1.7039687633514404,
"logits/rejected": 1.5296571254730225,
"logps/chosen": -82.45877838134766,
"logps/ref_chosen": -77.83393859863281,
"logps/ref_rejected": -98.69864654541016,
"logps/rejected": -111.74006652832031,
"loss": 1.0382,
"margin_dpo/margin_mean": 8.416566848754883,
"margin_dpo/margin_std": 12.733621597290039,
"step": 427
},
{
"epoch": 0.6470143613000756,
"fcm_dpo/beta": 0.07478933781385422,
"fcm_dpo/delta": -0.026344936341047287,
"fcm_dpo/margin": 8.352245330810547,
"fcm_dpo/q_t": 0.3767045736312866,
"grad_norm": 15.099655151367188,
"learning_rate": 1.6823300917064458e-07,
"logits/chosen": 1.3732352256774902,
"logits/rejected": 1.3193600177764893,
"logps/chosen": -95.37033081054688,
"logps/ref_chosen": -90.3450927734375,
"logps/ref_rejected": -100.24185180664062,
"logps/rejected": -113.61933135986328,
"loss": 1.0725,
"margin_dpo/margin_mean": 8.352245330810547,
"margin_dpo/margin_std": 13.533937454223633,
"step": 428
},
{
"epoch": 0.6485260770975056,
"fcm_dpo/beta": 0.07440754771232605,
"fcm_dpo/delta": -0.06462083011865616,
"fcm_dpo/margin": 8.864797592163086,
"fcm_dpo/q_t": 0.3679465055465698,
"grad_norm": 15.361631393432617,
"learning_rate": 1.669846604344412e-07,
"logits/chosen": 1.2320432662963867,
"logits/rejected": 1.2528884410858154,
"logps/chosen": -83.45864868164062,
"logps/ref_chosen": -78.24811553955078,
"logps/ref_rejected": -75.24495697021484,
"logps/rejected": -89.32029724121094,
"loss": 1.0975,
"margin_dpo/margin_mean": 8.864797592163086,
"margin_dpo/margin_std": 14.934419631958008,
"step": 429
},
{
"epoch": 0.6500377928949358,
"fcm_dpo/beta": 0.07186997681856155,
"fcm_dpo/delta": -0.13423848152160645,
"fcm_dpo/margin": 10.071898460388184,
"fcm_dpo/q_t": 0.3517456650733948,
"grad_norm": 13.017266273498535,
"learning_rate": 1.6573863381573954e-07,
"logits/chosen": 1.2743966579437256,
"logits/rejected": 1.2923517227172852,
"logps/chosen": -79.84930419921875,
"logps/ref_chosen": -76.08027648925781,
"logps/ref_rejected": -84.09554290771484,
"logps/rejected": -97.93647766113281,
"loss": 1.0004,
"margin_dpo/margin_mean": 10.0718994140625,
"margin_dpo/margin_std": 13.884342193603516,
"step": 430
},
{
"epoch": 0.6515495086923658,
"fcm_dpo/beta": 0.07156576216220856,
"fcm_dpo/delta": -0.024212071672081947,
"fcm_dpo/margin": 8.699821472167969,
"fcm_dpo/q_t": 0.3760807514190674,
"grad_norm": 13.218536376953125,
"learning_rate": 1.6449496416858282e-07,
"logits/chosen": 1.4648240804672241,
"logits/rejected": 1.3462178707122803,
"logps/chosen": -69.23052978515625,
"logps/ref_chosen": -66.88581085205078,
"logps/ref_rejected": -89.56040954589844,
"logps/rejected": -100.60494232177734,
"loss": 1.0911,
"margin_dpo/margin_mean": 8.699821472167969,
"margin_dpo/margin_std": 14.66958236694336,
"step": 431
},
{
"epoch": 0.6530612244897959,
"fcm_dpo/beta": 0.07108249515295029,
"fcm_dpo/delta": 0.0022036749869585037,
"fcm_dpo/margin": 8.409427642822266,
"fcm_dpo/q_t": 0.3838854730129242,
"grad_norm": 14.229828834533691,
"learning_rate": 1.632536862810844e-07,
"logits/chosen": 1.4838778972625732,
"logits/rejected": 1.3657793998718262,
"logps/chosen": -83.34407043457031,
"logps/ref_chosen": -79.65066528320312,
"logps/ref_rejected": -103.92634582519531,
"logps/rejected": -116.02919006347656,
"loss": 1.1148,
"margin_dpo/margin_mean": 8.409428596496582,
"margin_dpo/margin_std": 15.037290573120117,
"step": 432
},
{
"epoch": 0.654572940287226,
"fcm_dpo/beta": 0.0698608011007309,
"fcm_dpo/delta": -0.19808723032474518,
"fcm_dpo/margin": 11.203939437866211,
"fcm_dpo/q_t": 0.3509420156478882,
"grad_norm": 12.846253395080566,
"learning_rate": 1.6201483487445515e-07,
"logits/chosen": 1.7028049230575562,
"logits/rejected": 1.6700106859207153,
"logps/chosen": -80.68118286132812,
"logps/ref_chosen": -77.30774688720703,
"logps/ref_rejected": -81.65180206298828,
"logps/rejected": -96.22918701171875,
"loss": 1.0107,
"margin_dpo/margin_mean": 11.203940391540527,
"margin_dpo/margin_std": 16.126134872436523,
"step": 433
},
{
"epoch": 0.656084656084656,
"fcm_dpo/beta": 0.06696303188800812,
"fcm_dpo/delta": -0.09753292053937912,
"fcm_dpo/margin": 10.282703399658203,
"fcm_dpo/q_t": 0.36050257086753845,
"grad_norm": 11.524694442749023,
"learning_rate": 1.6077844460203204e-07,
"logits/chosen": 1.6322749853134155,
"logits/rejected": 1.4705924987792969,
"logps/chosen": -64.55253601074219,
"logps/ref_chosen": -63.31850051879883,
"logps/ref_rejected": -89.15093994140625,
"logps/rejected": -100.66767883300781,
"loss": 1.022,
"margin_dpo/margin_mean": 10.282703399658203,
"margin_dpo/margin_std": 14.64346694946289,
"step": 434
},
{
"epoch": 0.6575963718820862,
"fcm_dpo/beta": 0.0687536746263504,
"fcm_dpo/delta": 0.11254524439573288,
"fcm_dpo/margin": 7.150820732116699,
"fcm_dpo/q_t": 0.39818453788757324,
"grad_norm": 13.866617202758789,
"learning_rate": 1.5954455004830878e-07,
"logits/chosen": 1.7764880657196045,
"logits/rejected": 1.69718337059021,
"logps/chosen": -75.79131317138672,
"logps/ref_chosen": -71.1719741821289,
"logps/ref_rejected": -86.42095184326172,
"logps/rejected": -98.19110870361328,
"loss": 1.1273,
"margin_dpo/margin_mean": 7.150820732116699,
"margin_dpo/margin_std": 12.781055450439453,
"step": 435
},
{
"epoch": 0.6591080876795162,
"fcm_dpo/beta": 0.06901911646127701,
"fcm_dpo/delta": -0.016552124172449112,
"fcm_dpo/margin": 8.907886505126953,
"fcm_dpo/q_t": 0.3748345375061035,
"grad_norm": 12.772109985351562,
"learning_rate": 1.5831318572796847e-07,
"logits/chosen": 1.5760531425476074,
"logits/rejected": 1.4302666187286377,
"logps/chosen": -77.02301025390625,
"logps/ref_chosen": -74.45087432861328,
"logps/ref_rejected": -86.01708984375,
"logps/rejected": -97.49711608886719,
"loss": 1.0546,
"margin_dpo/margin_mean": 8.907885551452637,
"margin_dpo/margin_std": 13.655805587768555,
"step": 436
},
{
"epoch": 0.6606198034769464,
"fcm_dpo/beta": 0.06862768530845642,
"fcm_dpo/delta": 0.08339989930391312,
"fcm_dpo/margin": 7.569982528686523,
"fcm_dpo/q_t": 0.40054136514663696,
"grad_norm": 14.294130325317383,
"learning_rate": 1.5708438608491815e-07,
"logits/chosen": 1.5998663902282715,
"logits/rejected": 1.2967803478240967,
"logps/chosen": -78.01884460449219,
"logps/ref_chosen": -72.38907623291016,
"logps/ref_rejected": -111.03279876708984,
"logps/rejected": -124.2325439453125,
"loss": 1.1775,
"margin_dpo/margin_mean": 7.569982528686523,
"margin_dpo/margin_std": 15.420955657958984,
"step": 437
},
{
"epoch": 0.6621315192743764,
"fcm_dpo/beta": 0.0692143589258194,
"fcm_dpo/delta": -0.11934801936149597,
"fcm_dpo/margin": 10.254268646240234,
"fcm_dpo/q_t": 0.3581221103668213,
"grad_norm": 12.05996322631836,
"learning_rate": 1.558581854913253e-07,
"logits/chosen": 1.4713064432144165,
"logits/rejected": 1.3395293951034546,
"logps/chosen": -60.1005859375,
"logps/ref_chosen": -57.27682876586914,
"logps/ref_rejected": -83.07940673828125,
"logps/rejected": -96.15742492675781,
"loss": 1.0149,
"margin_dpo/margin_mean": 10.25426959991455,
"margin_dpo/margin_std": 14.174232482910156,
"step": 438
},
{
"epoch": 0.6636432350718064,
"fcm_dpo/beta": 0.0677412897348404,
"fcm_dpo/delta": -0.08142800629138947,
"fcm_dpo/margin": 9.969182968139648,
"fcm_dpo/q_t": 0.36517322063446045,
"grad_norm": 13.264842987060547,
"learning_rate": 1.5463461824665658e-07,
"logits/chosen": 1.4709299802780151,
"logits/rejected": 1.3555489778518677,
"logps/chosen": -101.94784545898438,
"logps/ref_chosen": -98.35890197753906,
"logps/ref_rejected": -112.69817352294922,
"logps/rejected": -126.25629425048828,
"loss": 1.0005,
"margin_dpo/margin_mean": 9.969182968139648,
"margin_dpo/margin_std": 13.567419052124023,
"step": 439
},
{
"epoch": 0.6651549508692366,
"fcm_dpo/beta": 0.0649486556649208,
"fcm_dpo/delta": -0.22526824474334717,
"fcm_dpo/margin": 12.433676719665527,
"fcm_dpo/q_t": 0.33364030718803406,
"grad_norm": 12.849279403686523,
"learning_rate": 1.534137185767178e-07,
"logits/chosen": 1.199397087097168,
"logits/rejected": 0.9434144496917725,
"logps/chosen": -62.47209930419922,
"logps/ref_chosen": -61.662452697753906,
"logps/ref_rejected": -86.81646728515625,
"logps/rejected": -100.0597915649414,
"loss": 0.892,
"margin_dpo/margin_mean": 12.433677673339844,
"margin_dpo/margin_std": 13.436738967895508,
"step": 440
},
{
"epoch": 0.6666666666666666,
"fcm_dpo/beta": 0.06319974362850189,
"fcm_dpo/delta": -0.0502944216132164,
"fcm_dpo/margin": 10.227783203125,
"fcm_dpo/q_t": 0.364610493183136,
"grad_norm": 13.985926628112793,
"learning_rate": 1.521955206326976e-07,
"logits/chosen": 1.3294029235839844,
"logits/rejected": 1.0824699401855469,
"logps/chosen": -75.17243957519531,
"logps/ref_chosen": -74.33235168457031,
"logps/ref_rejected": -99.654541015625,
"logps/rejected": -110.72241973876953,
"loss": 0.9857,
"margin_dpo/margin_mean": 10.227784156799316,
"margin_dpo/margin_std": 12.865455627441406,
"step": 441
},
{
"epoch": 0.6681783824640968,
"fcm_dpo/beta": 0.06394974142313004,
"fcm_dpo/delta": 0.053972695022821426,
"fcm_dpo/margin": 8.582521438598633,
"fcm_dpo/q_t": 0.38399142026901245,
"grad_norm": 12.952445983886719,
"learning_rate": 1.5098005849021078e-07,
"logits/chosen": 1.4604705572128296,
"logits/rejected": 1.3838759660720825,
"logps/chosen": -88.58601379394531,
"logps/ref_chosen": -82.42591857910156,
"logps/ref_rejected": -106.71090698242188,
"logps/rejected": -121.45352172851562,
"loss": 1.0601,
"margin_dpo/margin_mean": 8.582521438598633,
"margin_dpo/margin_std": 12.810592651367188,
"step": 442
},
{
"epoch": 0.6696900982615268,
"fcm_dpo/beta": 0.06243997812271118,
"fcm_dpo/delta": -0.13223691284656525,
"fcm_dpo/margin": 11.554607391357422,
"fcm_dpo/q_t": 0.3588990867137909,
"grad_norm": 11.408082008361816,
"learning_rate": 1.4976736614834662e-07,
"logits/chosen": 1.7185375690460205,
"logits/rejected": 1.536795735359192,
"logps/chosen": -76.07586669921875,
"logps/ref_chosen": -72.87019348144531,
"logps/ref_rejected": -94.48143005371094,
"logps/rejected": -109.24171447753906,
"loss": 1.008,
"margin_dpo/margin_mean": 11.554609298706055,
"margin_dpo/margin_std": 16.71849822998047,
"step": 443
},
{
"epoch": 0.671201814058957,
"fcm_dpo/beta": 0.06339798122644424,
"fcm_dpo/delta": 0.08339248597621918,
"fcm_dpo/margin": 4.655009746551514,
"fcm_dpo/q_t": 0.4389148950576782,
"grad_norm": 16.32600975036621,
"learning_rate": 1.4855747752871654e-07,
"logits/chosen": 1.699981689453125,
"logits/rejected": 1.4776818752288818,
"logps/chosen": -80.7209701538086,
"logps/ref_chosen": -74.650390625,
"logps/ref_rejected": -106.89204406738281,
"logps/rejected": -117.61763000488281,
"loss": 1.2909,
"margin_dpo/margin_mean": 4.655010223388672,
"margin_dpo/margin_std": 14.046520233154297,
"step": 444
},
{
"epoch": 0.672713529856387,
"fcm_dpo/beta": 0.06215813755989075,
"fcm_dpo/delta": -0.12267709523439407,
"fcm_dpo/margin": 11.481884002685547,
"fcm_dpo/q_t": 0.3535075783729553,
"grad_norm": 14.066444396972656,
"learning_rate": 1.473504264745062e-07,
"logits/chosen": 1.548810362815857,
"logits/rejected": 1.5138942003250122,
"logps/chosen": -81.1192855834961,
"logps/ref_chosen": -76.26957702636719,
"logps/ref_rejected": -89.84994506835938,
"logps/rejected": -106.1815414428711,
"loss": 0.9811,
"margin_dpo/margin_mean": 11.481884002685547,
"margin_dpo/margin_std": 15.008056640625,
"step": 445
},
{
"epoch": 0.674225245653817,
"fcm_dpo/beta": 0.061203934252262115,
"fcm_dpo/delta": -0.18258565664291382,
"fcm_dpo/margin": 12.521493911743164,
"fcm_dpo/q_t": 0.3369706869125366,
"grad_norm": 11.436417579650879,
"learning_rate": 1.461462467495284e-07,
"logits/chosen": 1.613840103149414,
"logits/rejected": 1.4555943012237549,
"logps/chosen": -64.69538879394531,
"logps/ref_chosen": -62.74647903442383,
"logps/ref_rejected": -86.395751953125,
"logps/rejected": -100.86616516113281,
"loss": 0.8931,
"margin_dpo/margin_mean": 12.521492958068848,
"margin_dpo/margin_std": 11.73055648803711,
"step": 446
},
{
"epoch": 0.6757369614512472,
"fcm_dpo/beta": 0.05934043973684311,
"fcm_dpo/delta": -0.04737187922000885,
"fcm_dpo/margin": 10.858245849609375,
"fcm_dpo/q_t": 0.36491459608078003,
"grad_norm": 15.227911949157715,
"learning_rate": 1.4494497203727843e-07,
"logits/chosen": 1.104290246963501,
"logits/rejected": 0.837737500667572,
"logps/chosen": -72.19987487792969,
"logps/ref_chosen": -71.06666564941406,
"logps/ref_rejected": -103.57111358642578,
"logps/rejected": -115.56256103515625,
"loss": 1.0167,
"margin_dpo/margin_mean": 10.858245849609375,
"margin_dpo/margin_std": 15.037179946899414,
"step": 447
},
{
"epoch": 0.6772486772486772,
"fcm_dpo/beta": 0.05986708775162697,
"fcm_dpo/delta": 0.056214213371276855,
"fcm_dpo/margin": 9.122482299804688,
"fcm_dpo/q_t": 0.3877629041671753,
"grad_norm": 11.285871505737305,
"learning_rate": 1.4374663593999256e-07,
"logits/chosen": 1.5320073366165161,
"logits/rejected": 1.4336310625076294,
"logps/chosen": -78.13660430908203,
"logps/ref_chosen": -73.400146484375,
"logps/ref_rejected": -96.34330749511719,
"logps/rejected": -110.20223999023438,
"loss": 1.0652,
"margin_dpo/margin_mean": 9.122482299804688,
"margin_dpo/margin_std": 13.94580078125,
"step": 448
},
{
"epoch": 0.6787603930461074,
"fcm_dpo/beta": 0.06041814386844635,
"fcm_dpo/delta": 0.055707208812236786,
"fcm_dpo/margin": 5.106272220611572,
"fcm_dpo/q_t": 0.4346545934677124,
"grad_norm": 17.820480346679688,
"learning_rate": 1.4255127197770707e-07,
"logits/chosen": 1.1485610008239746,
"logits/rejected": 1.1482468843460083,
"logps/chosen": -100.8125991821289,
"logps/ref_chosen": -93.66099548339844,
"logps/ref_rejected": -102.53019714355469,
"logps/rejected": -114.78807067871094,
"loss": 1.2859,
"margin_dpo/margin_mean": 5.1062726974487305,
"margin_dpo/margin_std": 14.768918991088867,
"step": 449
},
{
"epoch": 0.6802721088435374,
"fcm_dpo/beta": 0.06103084981441498,
"fcm_dpo/delta": 0.07125148177146912,
"fcm_dpo/margin": 8.726282119750977,
"fcm_dpo/q_t": 0.3890073895454407,
"grad_norm": 11.10236644744873,
"learning_rate": 1.4135891358732205e-07,
"logits/chosen": 1.480630874633789,
"logits/rejected": 1.1958320140838623,
"logps/chosen": -64.80349731445312,
"logps/ref_chosen": -62.52460479736328,
"logps/ref_rejected": -94.04986572265625,
"logps/rejected": -105.05503845214844,
"loss": 1.0907,
"margin_dpo/margin_mean": 8.726282119750977,
"margin_dpo/margin_std": 14.36546516418457,
"step": 450
},
{
"epoch": 0.6817838246409675,
"fcm_dpo/beta": 0.06261729449033737,
"fcm_dpo/delta": 0.13213106989860535,
"fcm_dpo/margin": 7.567141056060791,
"fcm_dpo/q_t": 0.40211862325668335,
"grad_norm": 12.100115776062012,
"learning_rate": 1.4016959412166437e-07,
"logits/chosen": 1.2561284303665161,
"logits/rejected": 1.1738208532333374,
"logps/chosen": -82.48243713378906,
"logps/ref_chosen": -79.14009094238281,
"logps/ref_rejected": -93.23919677734375,
"logps/rejected": -104.14868927001953,
"loss": 1.124,
"margin_dpo/margin_mean": 7.567141056060791,
"margin_dpo/margin_std": 13.501859664916992,
"step": 451
},
{
"epoch": 0.6832955404383976,
"fcm_dpo/beta": 0.0629560723900795,
"fcm_dpo/delta": -0.004285541363060474,
"fcm_dpo/margin": 9.594108581542969,
"fcm_dpo/q_t": 0.3788967430591583,
"grad_norm": 12.963932037353516,
"learning_rate": 1.3898334684855645e-07,
"logits/chosen": 1.3859320878982544,
"logits/rejected": 1.1935582160949707,
"logps/chosen": -74.30093383789062,
"logps/ref_chosen": -70.38827514648438,
"logps/ref_rejected": -95.47691345214844,
"logps/rejected": -108.98368835449219,
"loss": 1.0677,
"margin_dpo/margin_mean": 9.594108581542969,
"margin_dpo/margin_std": 15.307428359985352,
"step": 452
},
{
"epoch": 0.6848072562358276,
"fcm_dpo/beta": 0.06412823498249054,
"fcm_dpo/delta": 0.07911929488182068,
"fcm_dpo/margin": 8.15820026397705,
"fcm_dpo/q_t": 0.39394861459732056,
"grad_norm": 17.296875,
"learning_rate": 1.3780020494988445e-07,
"logits/chosen": 1.3142707347869873,
"logits/rejected": 1.205606460571289,
"logps/chosen": -84.48233032226562,
"logps/ref_chosen": -79.9207763671875,
"logps/ref_rejected": -90.20779418945312,
"logps/rejected": -102.92755126953125,
"loss": 1.149,
"margin_dpo/margin_mean": 8.15820026397705,
"margin_dpo/margin_std": 15.361320495605469,
"step": 453
},
{
"epoch": 0.6863189720332578,
"fcm_dpo/beta": 0.06284962594509125,
"fcm_dpo/delta": -0.10391123592853546,
"fcm_dpo/margin": 11.083335876464844,
"fcm_dpo/q_t": 0.3570789694786072,
"grad_norm": 11.321577072143555,
"learning_rate": 1.366202015206706e-07,
"logits/chosen": 1.2680672407150269,
"logits/rejected": 1.1710636615753174,
"logps/chosen": -70.97998046875,
"logps/ref_chosen": -69.71887969970703,
"logps/ref_rejected": -82.86952209472656,
"logps/rejected": -95.21395874023438,
"loss": 1.0145,
"margin_dpo/margin_mean": 11.083334922790527,
"margin_dpo/margin_std": 15.592472076416016,
"step": 454
},
{
"epoch": 0.6878306878306878,
"fcm_dpo/beta": 0.061194583773612976,
"fcm_dpo/delta": -0.140131413936615,
"fcm_dpo/margin": 11.924175262451172,
"fcm_dpo/q_t": 0.3493908643722534,
"grad_norm": 11.763124465942383,
"learning_rate": 1.354433695681474e-07,
"logits/chosen": 1.165942668914795,
"logits/rejected": 1.083987832069397,
"logps/chosen": -93.37379455566406,
"logps/ref_chosen": -89.51481628417969,
"logps/ref_rejected": -97.93235778808594,
"logps/rejected": -113.71551513671875,
"loss": 0.9404,
"margin_dpo/margin_mean": 11.924175262451172,
"margin_dpo/margin_std": 14.309093475341797,
"step": 455
},
{
"epoch": 0.6893424036281179,
"fcm_dpo/beta": 0.06031159684062004,
"fcm_dpo/delta": -0.015023987740278244,
"fcm_dpo/margin": 10.167566299438477,
"fcm_dpo/q_t": 0.3712005317211151,
"grad_norm": 11.464375495910645,
"learning_rate": 1.3426974201083439e-07,
"logits/chosen": 1.5450102090835571,
"logits/rejected": 1.3907766342163086,
"logps/chosen": -78.01705932617188,
"logps/ref_chosen": -74.60527038574219,
"logps/ref_rejected": -97.98377227783203,
"logps/rejected": -111.56312561035156,
"loss": 1.0104,
"margin_dpo/margin_mean": 10.167566299438477,
"margin_dpo/margin_std": 13.522197723388672,
"step": 456
},
{
"epoch": 0.690854119425548,
"fcm_dpo/beta": 0.06131181865930557,
"fcm_dpo/delta": 0.11020015925168991,
"fcm_dpo/margin": 8.077266693115234,
"fcm_dpo/q_t": 0.39555391669273376,
"grad_norm": 14.085563659667969,
"learning_rate": 1.3309935167761717e-07,
"logits/chosen": 1.6795952320098877,
"logits/rejected": 1.471513032913208,
"logps/chosen": -69.77462005615234,
"logps/ref_chosen": -63.927032470703125,
"logps/ref_rejected": -83.15243530273438,
"logps/rejected": -97.0772933959961,
"loss": 1.0796,
"margin_dpo/margin_mean": 8.07726764678955,
"margin_dpo/margin_std": 12.317647933959961,
"step": 457
},
{
"epoch": 0.6923658352229781,
"fcm_dpo/beta": 0.061296649277210236,
"fcm_dpo/delta": -0.0939876139163971,
"fcm_dpo/margin": 11.217958450317383,
"fcm_dpo/q_t": 0.3544745445251465,
"grad_norm": 13.609352111816406,
"learning_rate": 1.3193223130682936e-07,
"logits/chosen": 1.7661914825439453,
"logits/rejected": 1.458505392074585,
"logps/chosen": -69.31581115722656,
"logps/ref_chosen": -67.68869018554688,
"logps/ref_rejected": -104.40899658203125,
"logps/rejected": -117.25407409667969,
"loss": 0.9614,
"margin_dpo/margin_mean": 11.217958450317383,
"margin_dpo/margin_std": 13.57296371459961,
"step": 458
},
{
"epoch": 0.6938775510204082,
"fcm_dpo/beta": 0.06135866418480873,
"fcm_dpo/delta": -0.07701873779296875,
"fcm_dpo/margin": 10.853067398071289,
"fcm_dpo/q_t": 0.36258336901664734,
"grad_norm": 12.754447937011719,
"learning_rate": 1.3076841354533658e-07,
"logits/chosen": 1.7871050834655762,
"logits/rejected": 1.6686228513717651,
"logps/chosen": -85.60640716552734,
"logps/ref_chosen": -83.82363891601562,
"logps/ref_rejected": -103.75938415527344,
"logps/rejected": -116.39521789550781,
"loss": 1.0063,
"margin_dpo/margin_mean": 10.853067398071289,
"margin_dpo/margin_std": 13.418067932128906,
"step": 459
},
{
"epoch": 0.6953892668178382,
"fcm_dpo/beta": 0.05903376638889313,
"fcm_dpo/delta": -0.0766952782869339,
"fcm_dpo/margin": 11.3674955368042,
"fcm_dpo/q_t": 0.3633783757686615,
"grad_norm": 12.821139335632324,
"learning_rate": 1.2960793094762345e-07,
"logits/chosen": 1.5267561674118042,
"logits/rejected": 1.1998920440673828,
"logps/chosen": -84.30833435058594,
"logps/ref_chosen": -79.4836654663086,
"logps/ref_rejected": -112.31745910644531,
"logps/rejected": -128.50962829589844,
"loss": 0.9777,
"margin_dpo/margin_mean": 11.367496490478516,
"margin_dpo/margin_std": 14.649511337280273,
"step": 460
},
{
"epoch": 0.6969009826152683,
"fcm_dpo/beta": 0.05793311446905136,
"fcm_dpo/delta": -0.05363578349351883,
"fcm_dpo/margin": 11.177489280700684,
"fcm_dpo/q_t": 0.36283209919929504,
"grad_norm": 11.308926582336426,
"learning_rate": 1.2845081597488286e-07,
"logits/chosen": 1.801574468612671,
"logits/rejected": 1.589023232460022,
"logps/chosen": -65.58940124511719,
"logps/ref_chosen": -64.28482055664062,
"logps/ref_rejected": -93.73818969726562,
"logps/rejected": -106.22026062011719,
"loss": 0.978,
"margin_dpo/margin_mean": 11.177490234375,
"margin_dpo/margin_std": 13.440290451049805,
"step": 461
},
{
"epoch": 0.6984126984126984,
"fcm_dpo/beta": 0.05706631764769554,
"fcm_dpo/delta": -0.12463247776031494,
"fcm_dpo/margin": 12.52739143371582,
"fcm_dpo/q_t": 0.3522951006889343,
"grad_norm": 13.344515800476074,
"learning_rate": 1.27297100994108e-07,
"logits/chosen": 1.6235347986221313,
"logits/rejected": 1.4617130756378174,
"logps/chosen": -80.09829711914062,
"logps/ref_chosen": -77.15335083007812,
"logps/ref_rejected": -91.12923431396484,
"logps/rejected": -106.60157012939453,
"loss": 0.9886,
"margin_dpo/margin_mean": 12.52739143371582,
"margin_dpo/margin_std": 16.76566505432129,
"step": 462
},
{
"epoch": 0.6999244142101285,
"fcm_dpo/beta": 0.05774568021297455,
"fcm_dpo/delta": 0.1331457495689392,
"fcm_dpo/margin": 8.195122718811035,
"fcm_dpo/q_t": 0.4045426547527313,
"grad_norm": 15.691317558288574,
"learning_rate": 1.2614681827718695e-07,
"logits/chosen": 1.3940520286560059,
"logits/rejected": 1.3858566284179688,
"logps/chosen": -91.70310974121094,
"logps/ref_chosen": -87.58760070800781,
"logps/ref_rejected": -87.97022247314453,
"logps/rejected": -100.28085327148438,
"loss": 1.1291,
"margin_dpo/margin_mean": 8.195122718811035,
"margin_dpo/margin_std": 14.629854202270508,
"step": 463
},
{
"epoch": 0.7014361300075586,
"fcm_dpo/beta": 0.05825965106487274,
"fcm_dpo/delta": -0.06146547943353653,
"fcm_dpo/margin": 11.280075073242188,
"fcm_dpo/q_t": 0.37248778343200684,
"grad_norm": 12.037423133850098,
"learning_rate": 1.2500000000000005e-07,
"logits/chosen": 1.252054214477539,
"logits/rejected": 1.1450973749160767,
"logps/chosen": -80.35469818115234,
"logps/ref_chosen": -75.83175659179688,
"logps/ref_rejected": -84.4811019897461,
"logps/rejected": -100.28411865234375,
"loss": 1.0885,
"margin_dpo/margin_mean": 11.280075073242188,
"margin_dpo/margin_std": 18.626415252685547,
"step": 464
},
{
"epoch": 0.7029478458049887,
"fcm_dpo/beta": 0.057572394609451294,
"fcm_dpo/delta": -0.009513016790151596,
"fcm_dpo/margin": 10.575401306152344,
"fcm_dpo/q_t": 0.3781369924545288,
"grad_norm": 14.940975189208984,
"learning_rate": 1.238566782415197e-07,
"logits/chosen": 1.646533727645874,
"logits/rejected": 1.4950196743011475,
"logps/chosen": -82.08723449707031,
"logps/ref_chosen": -77.057861328125,
"logps/ref_rejected": -102.75727844238281,
"logps/rejected": -118.36204528808594,
"loss": 1.0649,
"margin_dpo/margin_mean": 10.575400352478027,
"margin_dpo/margin_std": 16.719135284423828,
"step": 465
},
{
"epoch": 0.7044595616024187,
"fcm_dpo/beta": 0.059257976710796356,
"fcm_dpo/delta": 0.2110356092453003,
"fcm_dpo/margin": 6.714020729064941,
"fcm_dpo/q_t": 0.41916608810424805,
"grad_norm": 20.281587600708008,
"learning_rate": 1.2271688498291334e-07,
"logits/chosen": 1.3608951568603516,
"logits/rejected": 1.3925187587738037,
"logps/chosen": -97.87811279296875,
"logps/ref_chosen": -91.7751693725586,
"logps/ref_rejected": -90.2679443359375,
"logps/rejected": -103.08491516113281,
"loss": 1.1829,
"margin_dpo/margin_mean": 6.714020252227783,
"margin_dpo/margin_std": 14.033638000488281,
"step": 466
},
{
"epoch": 0.7059712773998488,
"fcm_dpo/beta": 0.05979441851377487,
"fcm_dpo/delta": -0.03212842717766762,
"fcm_dpo/margin": 10.537097930908203,
"fcm_dpo/q_t": 0.370755672454834,
"grad_norm": 12.759522438049316,
"learning_rate": 1.2158065210664848e-07,
"logits/chosen": 1.4100677967071533,
"logits/rejected": 1.0926790237426758,
"logps/chosen": -67.85696411132812,
"logps/ref_chosen": -64.77557373046875,
"logps/ref_rejected": -102.58863830566406,
"logps/rejected": -116.20712280273438,
"loss": 1.0002,
"margin_dpo/margin_mean": 10.53709602355957,
"margin_dpo/margin_std": 14.119759559631348,
"step": 467
},
{
"epoch": 0.7074829931972789,
"fcm_dpo/beta": 0.05894845724105835,
"fcm_dpo/delta": -0.12605436146259308,
"fcm_dpo/margin": 12.165056228637695,
"fcm_dpo/q_t": 0.35496601462364197,
"grad_norm": 12.62433910369873,
"learning_rate": 1.204480113956011e-07,
"logits/chosen": 1.520838737487793,
"logits/rejected": 1.5226354598999023,
"logps/chosen": -86.23693084716797,
"logps/ref_chosen": -82.22445678710938,
"logps/ref_rejected": -92.99041748046875,
"logps/rejected": -109.1679458618164,
"loss": 0.9979,
"margin_dpo/margin_mean": 12.165056228637695,
"margin_dpo/margin_std": 16.692893981933594,
"step": 468
},
{
"epoch": 0.708994708994709,
"fcm_dpo/beta": 0.05790429934859276,
"fcm_dpo/delta": 0.029473505914211273,
"fcm_dpo/margin": 9.849138259887695,
"fcm_dpo/q_t": 0.38115769624710083,
"grad_norm": 12.907513618469238,
"learning_rate": 1.1931899453216697e-07,
"logits/chosen": 1.6713244915008545,
"logits/rejected": 1.6393948793411255,
"logps/chosen": -79.8751449584961,
"logps/ref_chosen": -75.93031311035156,
"logps/ref_rejected": -92.26559448242188,
"logps/rejected": -106.0595703125,
"loss": 1.0303,
"margin_dpo/margin_mean": 9.849139213562012,
"margin_dpo/margin_std": 13.105131149291992,
"step": 469
},
{
"epoch": 0.7105064247921391,
"fcm_dpo/beta": 0.05865296721458435,
"fcm_dpo/delta": 0.004474967252463102,
"fcm_dpo/margin": 10.157899856567383,
"fcm_dpo/q_t": 0.3750629723072052,
"grad_norm": 10.989476203918457,
"learning_rate": 1.1819363309737438e-07,
"logits/chosen": 1.3479657173156738,
"logits/rejected": 1.1786839962005615,
"logps/chosen": -70.01264953613281,
"logps/ref_chosen": -65.86345672607422,
"logps/ref_rejected": -85.89832305908203,
"logps/rejected": -100.2054214477539,
"loss": 1.0466,
"margin_dpo/margin_mean": 10.157899856567383,
"margin_dpo/margin_std": 15.077445983886719,
"step": 470
},
{
"epoch": 0.7120181405895691,
"fcm_dpo/beta": 0.057473134249448776,
"fcm_dpo/delta": -0.15603893995285034,
"fcm_dpo/margin": 12.95675277709961,
"fcm_dpo/q_t": 0.34615379571914673,
"grad_norm": 12.145216941833496,
"learning_rate": 1.1707195857000215e-07,
"logits/chosen": 1.4731967449188232,
"logits/rejected": 1.3386666774749756,
"logps/chosen": -76.57672119140625,
"logps/ref_chosen": -74.3460922241211,
"logps/ref_rejected": -93.43672943115234,
"logps/rejected": -108.62411499023438,
"loss": 0.9717,
"margin_dpo/margin_mean": 12.95675277709961,
"margin_dpo/margin_std": 16.800918579101562,
"step": 471
},
{
"epoch": 0.7135298563869993,
"fcm_dpo/beta": 0.056866731494665146,
"fcm_dpo/delta": -0.03586677089333534,
"fcm_dpo/margin": 11.134857177734375,
"fcm_dpo/q_t": 0.3769086003303528,
"grad_norm": 11.125615119934082,
"learning_rate": 1.1595400232569768e-07,
"logits/chosen": 1.6612334251403809,
"logits/rejected": 1.5427844524383545,
"logps/chosen": -76.58341217041016,
"logps/ref_chosen": -74.75674438476562,
"logps/ref_rejected": -95.18183135986328,
"logps/rejected": -108.14335632324219,
"loss": 1.0803,
"margin_dpo/margin_mean": 11.134857177734375,
"margin_dpo/margin_std": 18.514007568359375,
"step": 472
},
{
"epoch": 0.7150415721844293,
"fcm_dpo/beta": 0.056203171610832214,
"fcm_dpo/delta": -0.01779717206954956,
"fcm_dpo/margin": 10.971089363098145,
"fcm_dpo/q_t": 0.37407466769218445,
"grad_norm": 11.89809799194336,
"learning_rate": 1.1483979563610069e-07,
"logits/chosen": 1.95254647731781,
"logits/rejected": 1.671574592590332,
"logps/chosen": -71.95792388916016,
"logps/ref_chosen": -71.65933227539062,
"logps/ref_rejected": -109.99200439453125,
"logps/rejected": -121.26168060302734,
"loss": 1.0624,
"margin_dpo/margin_mean": 10.971089363098145,
"margin_dpo/margin_std": 17.36050796508789,
"step": 473
},
{
"epoch": 0.7165532879818595,
"fcm_dpo/beta": 0.05685240030288696,
"fcm_dpo/delta": 0.0948951244354248,
"fcm_dpo/margin": 8.970964431762695,
"fcm_dpo/q_t": 0.39646458625793457,
"grad_norm": 14.758679389953613,
"learning_rate": 1.1372936966796709e-07,
"logits/chosen": 1.6065518856048584,
"logits/rejected": 1.4003615379333496,
"logps/chosen": -70.70443725585938,
"logps/ref_chosen": -65.91990661621094,
"logps/ref_rejected": -89.09432983398438,
"logps/rejected": -102.84982299804688,
"loss": 1.1318,
"margin_dpo/margin_mean": 8.970963478088379,
"margin_dpo/margin_std": 16.53539276123047,
"step": 474
},
{
"epoch": 0.7180650037792895,
"fcm_dpo/beta": 0.055412329733371735,
"fcm_dpo/delta": -0.21177589893341064,
"fcm_dpo/margin": 14.334150314331055,
"fcm_dpo/q_t": 0.33283495903015137,
"grad_norm": 11.61395263671875,
"learning_rate": 1.126227554822985e-07,
"logits/chosen": 1.3431096076965332,
"logits/rejected": 1.2758687734603882,
"logps/chosen": -82.76185607910156,
"logps/ref_chosen": -79.02459716796875,
"logps/ref_rejected": -107.33058166503906,
"logps/rejected": -125.4019775390625,
"loss": 0.8963,
"margin_dpo/margin_mean": 14.334149360656738,
"margin_dpo/margin_std": 15.547351837158203,
"step": 475
},
{
"epoch": 0.7195767195767195,
"fcm_dpo/beta": 0.05529044568538666,
"fcm_dpo/delta": 0.057061512023210526,
"fcm_dpo/margin": 9.876468658447266,
"fcm_dpo/q_t": 0.3873744606971741,
"grad_norm": 11.611725807189941,
"learning_rate": 1.1151998403347243e-07,
"logits/chosen": 1.4797489643096924,
"logits/rejected": 1.4424426555633545,
"logps/chosen": -99.78776550292969,
"logps/ref_chosen": -93.72602844238281,
"logps/ref_rejected": -94.390625,
"logps/rejected": -110.32882690429688,
"loss": 1.0784,
"margin_dpo/margin_mean": 9.876466751098633,
"margin_dpo/margin_std": 15.821935653686523,
"step": 476
},
{
"epoch": 0.7210884353741497,
"fcm_dpo/beta": 0.056804411113262177,
"fcm_dpo/delta": 0.1499871164560318,
"fcm_dpo/margin": 8.044179916381836,
"fcm_dpo/q_t": 0.4083792567253113,
"grad_norm": 18.420042037963867,
"learning_rate": 1.1042108616837692e-07,
"logits/chosen": 1.642760992050171,
"logits/rejected": 1.558333158493042,
"logps/chosen": -85.10441589355469,
"logps/ref_chosen": -76.51399993896484,
"logps/ref_rejected": -99.14356231689453,
"logps/rejected": -115.77816009521484,
"loss": 1.2094,
"margin_dpo/margin_mean": 8.044179916381836,
"margin_dpo/margin_std": 17.958162307739258,
"step": 477
},
{
"epoch": 0.7226001511715797,
"fcm_dpo/beta": 0.057875052094459534,
"fcm_dpo/delta": 0.09590927511453629,
"fcm_dpo/margin": 8.795384407043457,
"fcm_dpo/q_t": 0.40439170598983765,
"grad_norm": 16.46158218383789,
"learning_rate": 1.0932609262554746e-07,
"logits/chosen": 1.2920639514923096,
"logits/rejected": 1.2550523281097412,
"logps/chosen": -82.43702697753906,
"logps/ref_chosen": -77.95186614990234,
"logps/ref_rejected": -69.77754211425781,
"logps/rejected": -83.05809020996094,
"loss": 1.2198,
"margin_dpo/margin_mean": 8.795384407043457,
"margin_dpo/margin_std": 19.819679260253906,
"step": 478
},
{
"epoch": 0.7241118669690099,
"fcm_dpo/beta": 0.059072867035865784,
"fcm_dpo/delta": 0.09962013363838196,
"fcm_dpo/margin": 8.55759048461914,
"fcm_dpo/q_t": 0.396778404712677,
"grad_norm": 17.041179656982422,
"learning_rate": 1.0823503403430734e-07,
"logits/chosen": 1.332829475402832,
"logits/rejected": 1.2122066020965576,
"logps/chosen": -80.6783447265625,
"logps/ref_chosen": -76.56551361083984,
"logps/ref_rejected": -84.33758544921875,
"logps/rejected": -97.00800323486328,
"loss": 1.1704,
"margin_dpo/margin_mean": 8.55759048461914,
"margin_dpo/margin_std": 17.415115356445312,
"step": 479
},
{
"epoch": 0.7256235827664399,
"fcm_dpo/beta": 0.05932193621993065,
"fcm_dpo/delta": -0.08332247287034988,
"fcm_dpo/margin": 11.412254333496094,
"fcm_dpo/q_t": 0.3587559759616852,
"grad_norm": 20.78253936767578,
"learning_rate": 1.0714794091391072e-07,
"logits/chosen": 1.3048365116119385,
"logits/rejected": 1.2908906936645508,
"logps/chosen": -82.61824798583984,
"logps/ref_chosen": -80.15884399414062,
"logps/ref_rejected": -84.88697814941406,
"logps/rejected": -98.75863647460938,
"loss": 1.0667,
"margin_dpo/margin_mean": 11.412253379821777,
"margin_dpo/margin_std": 17.866085052490234,
"step": 480
},
{
"epoch": 0.72713529856387,
"fcm_dpo/beta": 0.0585302859544754,
"fcm_dpo/delta": -0.008791010826826096,
"fcm_dpo/margin": 10.392139434814453,
"fcm_dpo/q_t": 0.38119131326675415,
"grad_norm": 12.047948837280273,
"learning_rate": 1.0606484367268906e-07,
"logits/chosen": 1.1564797163009644,
"logits/rejected": 1.1351284980773926,
"logps/chosen": -88.25604248046875,
"logps/ref_chosen": -84.56254577636719,
"logps/ref_rejected": -90.06451416015625,
"logps/rejected": -104.150146484375,
"loss": 1.0787,
"margin_dpo/margin_mean": 10.392141342163086,
"margin_dpo/margin_std": 17.54533576965332,
"step": 481
},
{
"epoch": 0.7286470143613001,
"fcm_dpo/beta": 0.058919116854667664,
"fcm_dpo/delta": 0.0236910879611969,
"fcm_dpo/margin": 9.797449111938477,
"fcm_dpo/q_t": 0.390002578496933,
"grad_norm": 17.564355850219727,
"learning_rate": 1.0498577260720048e-07,
"logits/chosen": 1.3875517845153809,
"logits/rejected": 1.0670008659362793,
"logps/chosen": -85.58686828613281,
"logps/ref_chosen": -78.88141632080078,
"logps/ref_rejected": -125.41990661621094,
"logps/rejected": -141.9228057861328,
"loss": 1.1476,
"margin_dpo/margin_mean": 9.79744815826416,
"margin_dpo/margin_std": 18.76854705810547,
"step": 482
},
{
"epoch": 0.7301587301587301,
"fcm_dpo/beta": 0.05753147974610329,
"fcm_dpo/delta": -0.13096870481967926,
"fcm_dpo/margin": 12.537253379821777,
"fcm_dpo/q_t": 0.3545553684234619,
"grad_norm": 13.328836441040039,
"learning_rate": 1.0391075790138232e-07,
"logits/chosen": 1.4349303245544434,
"logits/rejected": 1.1912516355514526,
"logps/chosen": -76.0250244140625,
"logps/ref_chosen": -72.690185546875,
"logps/ref_rejected": -98.37237548828125,
"logps/rejected": -114.24446868896484,
"loss": 1.0076,
"margin_dpo/margin_mean": 12.537254333496094,
"margin_dpo/margin_std": 17.870529174804688,
"step": 483
},
{
"epoch": 0.7316704459561603,
"fcm_dpo/beta": 0.05832766741514206,
"fcm_dpo/delta": 0.09759794175624847,
"fcm_dpo/margin": 8.684357643127441,
"fcm_dpo/q_t": 0.3952082395553589,
"grad_norm": 12.609947204589844,
"learning_rate": 1.0283982962570681e-07,
"logits/chosen": 1.8161753416061401,
"logits/rejected": 1.7682876586914062,
"logps/chosen": -77.07719421386719,
"logps/ref_chosen": -73.98435974121094,
"logps/ref_rejected": -89.99178314208984,
"logps/rejected": -101.76897430419922,
"loss": 1.0971,
"margin_dpo/margin_mean": 8.684356689453125,
"margin_dpo/margin_std": 14.368082046508789,
"step": 484
},
{
"epoch": 0.7331821617535903,
"fcm_dpo/beta": 0.05779796093702316,
"fcm_dpo/delta": 0.01702851802110672,
"fcm_dpo/margin": 10.05638599395752,
"fcm_dpo/q_t": 0.385127454996109,
"grad_norm": 13.867687225341797,
"learning_rate": 1.0177301773633992e-07,
"logits/chosen": 1.931138277053833,
"logits/rejected": 1.8297758102416992,
"logps/chosen": -81.68190002441406,
"logps/ref_chosen": -78.0927963256836,
"logps/ref_rejected": -89.14010620117188,
"logps/rejected": -102.78559875488281,
"loss": 1.0807,
"margin_dpo/margin_mean": 10.056386947631836,
"margin_dpo/margin_std": 16.07583999633789,
"step": 485
},
{
"epoch": 0.7346938775510204,
"fcm_dpo/beta": 0.05844534933567047,
"fcm_dpo/delta": -0.05448358133435249,
"fcm_dpo/margin": 11.130483627319336,
"fcm_dpo/q_t": 0.3756754696369171,
"grad_norm": 11.819550514221191,
"learning_rate": 1.007103520743035e-07,
"logits/chosen": 1.565119743347168,
"logits/rejected": 1.296295404434204,
"logps/chosen": -80.0963363647461,
"logps/ref_chosen": -73.74685668945312,
"logps/ref_rejected": -107.752685546875,
"logps/rejected": -125.23265838623047,
"loss": 1.0994,
"margin_dpo/margin_mean": 11.130483627319336,
"margin_dpo/margin_std": 19.282230377197266,
"step": 486
},
{
"epoch": 0.7362055933484505,
"fcm_dpo/beta": 0.0577419213950634,
"fcm_dpo/delta": -0.019458172842860222,
"fcm_dpo/margin": 10.707120895385742,
"fcm_dpo/q_t": 0.37412503361701965,
"grad_norm": 12.673027992248535,
"learning_rate": 9.965186236464046e-08,
"logits/chosen": 1.7286944389343262,
"logits/rejected": 1.5361676216125488,
"logps/chosen": -85.74382019042969,
"logps/ref_chosen": -79.57780456542969,
"logps/ref_rejected": -102.2916259765625,
"logps/rejected": -119.16477966308594,
"loss": 1.0409,
"margin_dpo/margin_mean": 10.707120895385742,
"margin_dpo/margin_std": 16.0146541595459,
"step": 487
},
{
"epoch": 0.7377173091458806,
"fcm_dpo/beta": 0.05774332210421562,
"fcm_dpo/delta": -0.0757601261138916,
"fcm_dpo/margin": 11.58226203918457,
"fcm_dpo/q_t": 0.37853580713272095,
"grad_norm": 19.57643699645996,
"learning_rate": 9.859757821558337e-08,
"logits/chosen": 1.3294143676757812,
"logits/rejected": 1.1671702861785889,
"logps/chosen": -84.84293365478516,
"logps/ref_chosen": -80.62767791748047,
"logps/ref_rejected": -100.4541015625,
"logps/rejected": -116.25162506103516,
"loss": 1.0968,
"margin_dpo/margin_mean": 11.582262992858887,
"margin_dpo/margin_std": 19.66269302368164,
"step": 488
},
{
"epoch": 0.7392290249433107,
"fcm_dpo/beta": 0.05942504480481148,
"fcm_dpo/delta": 0.30510973930358887,
"fcm_dpo/margin": 5.123412132263184,
"fcm_dpo/q_t": 0.44007402658462524,
"grad_norm": 15.113093376159668,
"learning_rate": 9.754752911772615e-08,
"logits/chosen": 1.3929004669189453,
"logits/rejected": 1.290812373161316,
"logps/chosen": -91.3660888671875,
"logps/ref_chosen": -85.39521026611328,
"logps/ref_rejected": -101.97309875488281,
"logps/rejected": -113.06739044189453,
"loss": 1.2833,
"margin_dpo/margin_mean": 5.1234130859375,
"margin_dpo/margin_std": 15.02302360534668,
"step": 489
},
{
"epoch": 0.7407407407407407,
"fcm_dpo/beta": 0.05948188900947571,
"fcm_dpo/delta": -0.07982932776212692,
"fcm_dpo/margin": 11.330092430114746,
"fcm_dpo/q_t": 0.374492347240448,
"grad_norm": 16.68990135192871,
"learning_rate": 9.650174444319956e-08,
"logits/chosen": 1.7335550785064697,
"logits/rejected": 1.6902096271514893,
"logps/chosen": -82.07659912109375,
"logps/ref_chosen": -77.75590515136719,
"logps/ref_rejected": -88.98885345458984,
"logps/rejected": -104.6396484375,
"loss": 1.1096,
"margin_dpo/margin_mean": 11.330092430114746,
"margin_dpo/margin_std": 19.99999237060547,
"step": 490
},
{
"epoch": 0.7422524565381708,
"fcm_dpo/beta": 0.05829251557588577,
"fcm_dpo/delta": -0.06507232040166855,
"fcm_dpo/margin": 11.291997909545898,
"fcm_dpo/q_t": 0.3683939576148987,
"grad_norm": 11.451486587524414,
"learning_rate": 9.546025344484868e-08,
"logits/chosen": 1.2975845336914062,
"logits/rejected": 1.1464436054229736,
"logps/chosen": -78.01144409179688,
"logps/ref_chosen": -74.33360290527344,
"logps/ref_rejected": -91.4105224609375,
"logps/rejected": -106.38035583496094,
"loss": 1.007,
"margin_dpo/margin_mean": 11.291997909545898,
"margin_dpo/margin_std": 15.383825302124023,
"step": 491
},
{
"epoch": 0.7437641723356009,
"fcm_dpo/beta": 0.058672577142715454,
"fcm_dpo/delta": -0.0013382361503317952,
"fcm_dpo/margin": 7.504390716552734,
"fcm_dpo/q_t": 0.4148620367050171,
"grad_norm": 16.56096076965332,
"learning_rate": 9.442308525541589e-08,
"logits/chosen": 1.4910600185394287,
"logits/rejected": 1.2763690948486328,
"logps/chosen": -94.51632690429688,
"logps/ref_chosen": -85.14178466796875,
"logps/ref_rejected": -103.44204711914062,
"logps/rejected": -120.32098388671875,
"loss": 1.2105,
"margin_dpo/margin_mean": 7.504390716552734,
"margin_dpo/margin_std": 16.77596664428711,
"step": 492
},
{
"epoch": 0.745275888133031,
"fcm_dpo/beta": 0.05836326628923416,
"fcm_dpo/delta": -0.06718596071004868,
"fcm_dpo/margin": 11.352909088134766,
"fcm_dpo/q_t": 0.36989927291870117,
"grad_norm": 11.82755184173584,
"learning_rate": 9.339026888672468e-08,
"logits/chosen": 1.6452562808990479,
"logits/rejected": 1.4447059631347656,
"logps/chosen": -79.38182067871094,
"logps/ref_chosen": -75.81439208984375,
"logps/ref_rejected": -95.30766296386719,
"logps/rejected": -110.22799682617188,
"loss": 1.0806,
"margin_dpo/margin_mean": 11.352909088134766,
"margin_dpo/margin_std": 18.79556655883789,
"step": 493
},
{
"epoch": 0.7467876039304611,
"fcm_dpo/beta": 0.058134227991104126,
"fcm_dpo/delta": 0.04909532144665718,
"fcm_dpo/margin": 9.522611618041992,
"fcm_dpo/q_t": 0.3936055302619934,
"grad_norm": 15.155073165893555,
"learning_rate": 9.236183322886945e-08,
"logits/chosen": 1.1880576610565186,
"logits/rejected": 1.1128486394882202,
"logps/chosen": -98.40640258789062,
"logps/ref_chosen": -93.83562469482422,
"logps/ref_rejected": -112.21142578125,
"logps/rejected": -126.30481719970703,
"loss": 1.1519,
"margin_dpo/margin_mean": 9.522610664367676,
"margin_dpo/margin_std": 18.529830932617188,
"step": 494
},
{
"epoch": 0.7482993197278912,
"fcm_dpo/beta": 0.059422802180051804,
"fcm_dpo/delta": 0.07689768075942993,
"fcm_dpo/margin": 8.856720924377441,
"fcm_dpo/q_t": 0.3950550854206085,
"grad_norm": 18.023229598999023,
"learning_rate": 9.133780704940594e-08,
"logits/chosen": 1.6511743068695068,
"logits/rejected": 1.4738554954528809,
"logps/chosen": -73.00120544433594,
"logps/ref_chosen": -68.52467346191406,
"logps/ref_rejected": -89.65379333496094,
"logps/rejected": -102.98704528808594,
"loss": 1.17,
"margin_dpo/margin_mean": 8.856720924377441,
"margin_dpo/margin_std": 17.582279205322266,
"step": 495
},
{
"epoch": 0.7498110355253212,
"fcm_dpo/beta": 0.05871668457984924,
"fcm_dpo/delta": -0.040396757423877716,
"fcm_dpo/margin": 10.85169506072998,
"fcm_dpo/q_t": 0.37551993131637573,
"grad_norm": 12.240704536437988,
"learning_rate": 9.031821899254797e-08,
"logits/chosen": 1.7110998630523682,
"logits/rejected": 1.4665889739990234,
"logps/chosen": -78.06710815429688,
"logps/ref_chosen": -73.13618469238281,
"logps/ref_rejected": -111.50930786132812,
"logps/rejected": -127.29193115234375,
"loss": 1.0757,
"margin_dpo/margin_mean": 10.851696014404297,
"margin_dpo/margin_std": 17.6138973236084,
"step": 496
},
{
"epoch": 0.7513227513227513,
"fcm_dpo/beta": 0.05747950077056885,
"fcm_dpo/delta": -0.11834258586168289,
"fcm_dpo/margin": 12.324544906616211,
"fcm_dpo/q_t": 0.3581116795539856,
"grad_norm": 14.844618797302246,
"learning_rate": 8.930309757836516e-08,
"logits/chosen": 1.3825771808624268,
"logits/rejected": 1.2625739574432373,
"logps/chosen": -95.60760498046875,
"logps/ref_chosen": -88.71475219726562,
"logps/ref_rejected": -105.74935913085938,
"logps/rejected": -124.96675109863281,
"loss": 0.9953,
"margin_dpo/margin_mean": 12.324544906616211,
"margin_dpo/margin_std": 16.996997833251953,
"step": 497
},
{
"epoch": 0.7528344671201814,
"fcm_dpo/beta": 0.05717446655035019,
"fcm_dpo/delta": -0.0596219003200531,
"fcm_dpo/margin": 11.468419075012207,
"fcm_dpo/q_t": 0.3738415241241455,
"grad_norm": 13.000709533691406,
"learning_rate": 8.829247120198563e-08,
"logits/chosen": 1.6439807415008545,
"logits/rejected": 1.5671942234039307,
"logps/chosen": -86.48239135742188,
"logps/ref_chosen": -83.3353271484375,
"logps/ref_rejected": -89.34941864013672,
"logps/rejected": -103.96488952636719,
"loss": 1.0519,
"margin_dpo/margin_mean": 11.468419075012207,
"margin_dpo/margin_std": 18.170747756958008,
"step": 498
},
{
"epoch": 0.7543461829176115,
"fcm_dpo/beta": 0.056997328996658325,
"fcm_dpo/delta": 0.01875562034547329,
"fcm_dpo/margin": 10.216790199279785,
"fcm_dpo/q_t": 0.38748350739479065,
"grad_norm": 14.796299934387207,
"learning_rate": 8.728636813280163e-08,
"logits/chosen": 1.5643539428710938,
"logits/rejected": 1.375131368637085,
"logps/chosen": -85.56379699707031,
"logps/ref_chosen": -79.373779296875,
"logps/ref_rejected": -104.62533569335938,
"logps/rejected": -121.03214263916016,
"loss": 1.1548,
"margin_dpo/margin_mean": 10.216791152954102,
"margin_dpo/margin_std": 19.614974975585938,
"step": 499
},
{
"epoch": 0.7558578987150416,
"fcm_dpo/beta": 0.05729161575436592,
"fcm_dpo/delta": -0.0038493499159812927,
"fcm_dpo/margin": 10.52761459350586,
"fcm_dpo/q_t": 0.37920159101486206,
"grad_norm": 14.683732032775879,
"learning_rate": 8.628481651367875e-08,
"logits/chosen": 1.3626763820648193,
"logits/rejected": 1.326643466949463,
"logps/chosen": -91.39049530029297,
"logps/ref_chosen": -85.953857421875,
"logps/ref_rejected": -90.40995788574219,
"logps/rejected": -106.37420654296875,
"loss": 1.0608,
"margin_dpo/margin_mean": 10.52761459350586,
"margin_dpo/margin_std": 16.482694625854492,
"step": 500
},
{
"epoch": 0.7558578987150416,
"eval_fcm_dpo/beta": 0.056991901248693466,
"eval_logits/chosen": 1.29319167137146,
"eval_logits/rejected": 1.1717369556427002,
"eval_logps/chosen": -90.84538269042969,
"eval_logps/ref_chosen": -86.90177917480469,
"eval_logps/ref_rejected": -96.69639587402344,
"eval_logps/rejected": -111.40538787841797,
"eval_loss": 0.5388143062591553,
"eval_margin_dpo/margin_mean": 10.765402793884277,
"eval_margin_dpo/margin_std": 17.406885147094727,
"eval_runtime": 42.3061,
"eval_samples_per_second": 54.437,
"eval_steps_per_second": 1.702,
"step": 500
},
{
"epoch": 0.7573696145124716,
"fcm_dpo/beta": 0.05800885707139969,
"fcm_dpo/delta": 0.07376056164503098,
"fcm_dpo/margin": 9.115702629089355,
"fcm_dpo/q_t": 0.394004762172699,
"grad_norm": 11.813311576843262,
"learning_rate": 8.528784436016878e-08,
"logits/chosen": 1.156029224395752,
"logits/rejected": 1.1506352424621582,
"logps/chosen": -85.94219970703125,
"logps/ref_chosen": -81.22268676757812,
"logps/ref_rejected": -86.97892761230469,
"logps/rejected": -100.81414794921875,
"loss": 1.0727,
"margin_dpo/margin_mean": 9.115702629089355,
"margin_dpo/margin_std": 14.19137954711914,
"step": 501
},
{
"epoch": 0.7588813303099018,
"fcm_dpo/beta": 0.05887910723686218,
"fcm_dpo/delta": 0.12389560043811798,
"fcm_dpo/margin": 8.188718795776367,
"fcm_dpo/q_t": 0.4010313153266907,
"grad_norm": 13.988794326782227,
"learning_rate": 8.4295479559726e-08,
"logits/chosen": 1.4468226432800293,
"logits/rejected": 1.3555638790130615,
"logps/chosen": -87.16419219970703,
"logps/ref_chosen": -83.1567611694336,
"logps/ref_rejected": -106.74440002441406,
"logps/rejected": -118.9405517578125,
"loss": 1.1022,
"margin_dpo/margin_mean": 8.188718795776367,
"margin_dpo/margin_std": 13.904130935668945,
"step": 502
},
{
"epoch": 0.7603930461073318,
"fcm_dpo/beta": 0.05860746279358864,
"fcm_dpo/delta": -0.07429009675979614,
"fcm_dpo/margin": 11.419576644897461,
"fcm_dpo/q_t": 0.36525100469589233,
"grad_norm": 12.156607627868652,
"learning_rate": 8.330774987092712e-08,
"logits/chosen": 1.5399150848388672,
"logits/rejected": 1.5529388189315796,
"logps/chosen": -70.98908996582031,
"logps/ref_chosen": -68.51583862304688,
"logps/ref_rejected": -75.02178955078125,
"logps/rejected": -88.91461944580078,
"loss": 1.0363,
"margin_dpo/margin_mean": 11.419578552246094,
"margin_dpo/margin_std": 17.146236419677734,
"step": 503
},
{
"epoch": 0.7619047619047619,
"fcm_dpo/beta": 0.05688774213194847,
"fcm_dpo/delta": -0.23274773359298706,
"fcm_dpo/margin": 14.314230918884277,
"fcm_dpo/q_t": 0.33439508080482483,
"grad_norm": 12.193303108215332,
"learning_rate": 8.232468292269479e-08,
"logits/chosen": 1.447981595993042,
"logits/rejected": 1.4175838232040405,
"logps/chosen": -88.09152221679688,
"logps/ref_chosen": -85.15829467773438,
"logps/ref_rejected": -96.16879272460938,
"logps/rejected": -113.41624450683594,
"loss": 0.8953,
"margin_dpo/margin_mean": 14.314229965209961,
"margin_dpo/margin_std": 15.71872615814209,
"step": 504
},
{
"epoch": 0.763416477702192,
"fcm_dpo/beta": 0.05636422336101532,
"fcm_dpo/delta": 0.1211187019944191,
"fcm_dpo/margin": 8.596763610839844,
"fcm_dpo/q_t": 0.4058091938495636,
"grad_norm": 21.137935638427734,
"learning_rate": 8.134630621352483e-08,
"logits/chosen": 1.5486080646514893,
"logits/rejected": 1.3943827152252197,
"logps/chosen": -83.37315368652344,
"logps/ref_chosen": -79.26185607910156,
"logps/ref_rejected": -96.34947967529297,
"logps/rejected": -109.05754089355469,
"loss": 1.2078,
"margin_dpo/margin_mean": 8.596763610839844,
"margin_dpo/margin_std": 18.844703674316406,
"step": 505
},
{
"epoch": 0.764928193499622,
"fcm_dpo/beta": 0.05752148851752281,
"fcm_dpo/delta": 0.08011193573474884,
"fcm_dpo/margin": 9.109159469604492,
"fcm_dpo/q_t": 0.39814865589141846,
"grad_norm": 18.01957893371582,
"learning_rate": 8.037264711071698e-08,
"logits/chosen": 1.4216543436050415,
"logits/rejected": 1.4008054733276367,
"logps/chosen": -93.21182250976562,
"logps/ref_chosen": -88.192626953125,
"logps/ref_rejected": -100.86880493164062,
"logps/rejected": -114.99716186523438,
"loss": 1.2276,
"margin_dpo/margin_mean": 9.109158515930176,
"margin_dpo/margin_std": 20.6380672454834,
"step": 506
},
{
"epoch": 0.7664399092970522,
"fcm_dpo/beta": 0.05810971558094025,
"fcm_dpo/delta": -0.009886268526315689,
"fcm_dpo/margin": 10.48335075378418,
"fcm_dpo/q_t": 0.38148459792137146,
"grad_norm": 13.946736335754395,
"learning_rate": 7.940373284960933e-08,
"logits/chosen": 1.231891393661499,
"logits/rejected": 1.1046533584594727,
"logps/chosen": -92.22346496582031,
"logps/ref_chosen": -86.04632568359375,
"logps/ref_rejected": -111.44412994384766,
"logps/rejected": -128.1046142578125,
"loss": 1.083,
"margin_dpo/margin_mean": 10.48335075378418,
"margin_dpo/margin_std": 17.50394058227539,
"step": 507
},
{
"epoch": 0.7679516250944822,
"fcm_dpo/beta": 0.05754335597157478,
"fcm_dpo/delta": -0.07930278778076172,
"fcm_dpo/margin": 11.710323333740234,
"fcm_dpo/q_t": 0.3726339340209961,
"grad_norm": 15.400092124938965,
"learning_rate": 7.843959053281663e-08,
"logits/chosen": 1.4893312454223633,
"logits/rejected": 1.1656887531280518,
"logps/chosen": -83.44557189941406,
"logps/ref_chosen": -79.25038146972656,
"logps/ref_rejected": -118.49089813232422,
"logps/rejected": -134.39639282226562,
"loss": 1.0495,
"margin_dpo/margin_mean": 11.710323333740234,
"margin_dpo/margin_std": 18.529254913330078,
"step": 508
},
{
"epoch": 0.7694633408919124,
"fcm_dpo/beta": 0.05650604888796806,
"fcm_dpo/delta": -0.0452471524477005,
"fcm_dpo/margin": 11.363935470581055,
"fcm_dpo/q_t": 0.36331725120544434,
"grad_norm": 12.700797080993652,
"learning_rate": 7.748024712947204e-08,
"logits/chosen": 1.4477837085723877,
"logits/rejected": 1.3602555990219116,
"logps/chosen": -83.59129333496094,
"logps/ref_chosen": -80.7039566040039,
"logps/ref_rejected": -90.50444793701172,
"logps/rejected": -104.75572967529297,
"loss": 1.0001,
"margin_dpo/margin_mean": 11.363937377929688,
"margin_dpo/margin_std": 14.95089054107666,
"step": 509
},
{
"epoch": 0.7709750566893424,
"fcm_dpo/beta": 0.055086106061935425,
"fcm_dpo/delta": -0.13460469245910645,
"fcm_dpo/margin": 13.135368347167969,
"fcm_dpo/q_t": 0.3555517792701721,
"grad_norm": 13.34060287475586,
"learning_rate": 7.652572947447272e-08,
"logits/chosen": 1.587594985961914,
"logits/rejected": 1.364477515220642,
"logps/chosen": -70.32972717285156,
"logps/ref_chosen": -67.64491271972656,
"logps/ref_rejected": -108.92274475097656,
"logps/rejected": -124.742919921875,
"loss": 1.0292,
"margin_dpo/margin_mean": 13.135368347167969,
"margin_dpo/margin_std": 19.449054718017578,
"step": 510
},
{
"epoch": 0.7724867724867724,
"fcm_dpo/beta": 0.053726132959127426,
"fcm_dpo/delta": -0.1980828195810318,
"fcm_dpo/margin": 14.573793411254883,
"fcm_dpo/q_t": 0.3462072014808655,
"grad_norm": 11.345755577087402,
"learning_rate": 7.557606426772961e-08,
"logits/chosen": 1.4740877151489258,
"logits/rejected": 1.3778789043426514,
"logps/chosen": -79.63981628417969,
"logps/ref_chosen": -75.66263580322266,
"logps/ref_rejected": -104.26296997070312,
"logps/rejected": -122.81393432617188,
"loss": 0.9632,
"margin_dpo/margin_mean": 14.573793411254883,
"margin_dpo/margin_std": 18.880844116210938,
"step": 511
},
{
"epoch": 0.7739984882842026,
"fcm_dpo/beta": 0.05385109782218933,
"fcm_dpo/delta": 0.14281541109085083,
"fcm_dpo/margin": 8.617152214050293,
"fcm_dpo/q_t": 0.41105854511260986,
"grad_norm": 14.020538330078125,
"learning_rate": 7.463127807341966e-08,
"logits/chosen": 1.1061210632324219,
"logits/rejected": 1.0223177671432495,
"logps/chosen": -83.80526733398438,
"logps/ref_chosen": -79.31925964355469,
"logps/ref_rejected": -82.22052001953125,
"logps/rejected": -95.32367706298828,
"loss": 1.1695,
"margin_dpo/margin_mean": 8.617152214050293,
"margin_dpo/margin_std": 17.66824722290039,
"step": 512
},
{
"epoch": 0.7755102040816326,
"fcm_dpo/beta": 0.05385831743478775,
"fcm_dpo/delta": -0.017212651669979095,
"fcm_dpo/margin": 11.417625427246094,
"fcm_dpo/q_t": 0.37617164850234985,
"grad_norm": 10.337393760681152,
"learning_rate": 7.369139731924401e-08,
"logits/chosen": 1.775747299194336,
"logits/rejected": 1.6193273067474365,
"logps/chosen": -74.82000732421875,
"logps/ref_chosen": -72.02534484863281,
"logps/ref_rejected": -86.56224060058594,
"logps/rejected": -100.77452850341797,
"loss": 1.0165,
"margin_dpo/margin_mean": 11.417625427246094,
"margin_dpo/margin_std": 15.95500373840332,
"step": 513
},
{
"epoch": 0.7770219198790628,
"fcm_dpo/beta": 0.05360942333936691,
"fcm_dpo/delta": -0.10015452653169632,
"fcm_dpo/margin": 12.932397842407227,
"fcm_dpo/q_t": 0.3562164604663849,
"grad_norm": 11.159914016723633,
"learning_rate": 7.275644829568747e-08,
"logits/chosen": 1.7210180759429932,
"logits/rejected": 1.6601059436798096,
"logps/chosen": -89.07673645019531,
"logps/ref_chosen": -84.94093322753906,
"logps/ref_rejected": -102.44367980957031,
"logps/rejected": -119.51187133789062,
"loss": 0.9902,
"margin_dpo/margin_mean": 12.93239688873291,
"margin_dpo/margin_std": 17.244815826416016,
"step": 514
},
{
"epoch": 0.7785336356764928,
"fcm_dpo/beta": 0.053353969007730484,
"fcm_dpo/delta": 0.014163432642817497,
"fcm_dpo/margin": 10.993968963623047,
"fcm_dpo/q_t": 0.38286080956459045,
"grad_norm": 15.05717658996582,
"learning_rate": 7.182645715528435e-08,
"logits/chosen": 1.679139256477356,
"logits/rejected": 1.4939582347869873,
"logps/chosen": -78.55538940429688,
"logps/ref_chosen": -72.9662094116211,
"logps/ref_rejected": -102.53651428222656,
"logps/rejected": -119.11965942382812,
"loss": 1.0871,
"margin_dpo/margin_mean": 10.99396800994873,
"margin_dpo/margin_std": 18.255279541015625,
"step": 515
},
{
"epoch": 0.780045351473923,
"fcm_dpo/beta": 0.05421724542975426,
"fcm_dpo/delta": 0.09501098096370697,
"fcm_dpo/margin": 9.394851684570312,
"fcm_dpo/q_t": 0.390326589345932,
"grad_norm": 14.797728538513184,
"learning_rate": 7.090144991188568e-08,
"logits/chosen": 1.6032508611679077,
"logits/rejected": 1.4421273469924927,
"logps/chosen": -78.98271179199219,
"logps/ref_chosen": -76.63414001464844,
"logps/ref_rejected": -91.01750183105469,
"logps/rejected": -102.76092529296875,
"loss": 1.1055,
"margin_dpo/margin_mean": 9.394851684570312,
"margin_dpo/margin_std": 15.970596313476562,
"step": 516
},
{
"epoch": 0.781557067271353,
"fcm_dpo/beta": 0.055460721254348755,
"fcm_dpo/delta": 0.11611491441726685,
"fcm_dpo/margin": 8.81535530090332,
"fcm_dpo/q_t": 0.4120446443557739,
"grad_norm": 13.8700590133667,
"learning_rate": 6.998145243993284e-08,
"logits/chosen": 1.6591780185699463,
"logits/rejected": 1.6576237678527832,
"logps/chosen": -83.76336669921875,
"logps/ref_chosen": -77.06817626953125,
"logps/ref_rejected": -80.048583984375,
"logps/rejected": -95.55912780761719,
"loss": 1.2023,
"margin_dpo/margin_mean": 8.81535530090332,
"margin_dpo/margin_std": 19.780887603759766,
"step": 517
},
{
"epoch": 0.783068783068783,
"fcm_dpo/beta": 0.056159548461437225,
"fcm_dpo/delta": 0.05302170664072037,
"fcm_dpo/margin": 9.787691116333008,
"fcm_dpo/q_t": 0.3945106863975525,
"grad_norm": 11.806503295898438,
"learning_rate": 6.906649047373245e-08,
"logits/chosen": 1.42006516456604,
"logits/rejected": 1.2588882446289062,
"logps/chosen": -80.86302185058594,
"logps/ref_chosen": -78.69026184082031,
"logps/ref_rejected": -97.58124542236328,
"logps/rejected": -109.54169464111328,
"loss": 1.1268,
"margin_dpo/margin_mean": 9.787691116333008,
"margin_dpo/margin_std": 18.07889175415039,
"step": 518
},
{
"epoch": 0.7845804988662132,
"fcm_dpo/beta": 0.05649217590689659,
"fcm_dpo/delta": 0.05481000617146492,
"fcm_dpo/margin": 6.627190589904785,
"fcm_dpo/q_t": 0.4286935031414032,
"grad_norm": 18.38283920288086,
"learning_rate": 6.815658960673781e-08,
"logits/chosen": 1.611324429512024,
"logits/rejected": 1.49735426902771,
"logps/chosen": -85.2575912475586,
"logps/ref_chosen": -78.35087585449219,
"logps/ref_rejected": -95.79212188720703,
"logps/rejected": -109.32601928710938,
"loss": 1.3611,
"margin_dpo/margin_mean": 6.627191066741943,
"margin_dpo/margin_std": 21.301122665405273,
"step": 519
},
{
"epoch": 0.7860922146636432,
"fcm_dpo/beta": 0.056376829743385315,
"fcm_dpo/delta": 0.022743336856365204,
"fcm_dpo/margin": 10.225279808044434,
"fcm_dpo/q_t": 0.3800208866596222,
"grad_norm": 14.909660339355469,
"learning_rate": 6.725177529083209e-08,
"logits/chosen": 1.4296002388000488,
"logits/rejected": 1.2566463947296143,
"logps/chosen": -83.81216430664062,
"logps/ref_chosen": -80.40513610839844,
"logps/ref_rejected": -93.02791595458984,
"logps/rejected": -106.66022491455078,
"loss": 1.0306,
"margin_dpo/margin_mean": 10.225279808044434,
"margin_dpo/margin_std": 13.84968376159668,
"step": 520
},
{
"epoch": 0.7876039304610734,
"fcm_dpo/beta": 0.057500891387462616,
"fcm_dpo/delta": 0.0005270391702651978,
"fcm_dpo/margin": 10.406567573547363,
"fcm_dpo/q_t": 0.3807342052459717,
"grad_norm": 14.164873123168945,
"learning_rate": 6.63520728356167e-08,
"logits/chosen": 1.4418920278549194,
"logits/rejected": 1.2171070575714111,
"logps/chosen": -91.44085693359375,
"logps/ref_chosen": -86.5218276977539,
"logps/ref_rejected": -109.20257568359375,
"logps/rejected": -124.5281753540039,
"loss": 1.0793,
"margin_dpo/margin_mean": 10.406567573547363,
"margin_dpo/margin_std": 17.031408309936523,
"step": 521
},
{
"epoch": 0.7891156462585034,
"fcm_dpo/beta": 0.058100029826164246,
"fcm_dpo/delta": 0.10878778994083405,
"fcm_dpo/margin": 8.54337215423584,
"fcm_dpo/q_t": 0.40556806325912476,
"grad_norm": 16.047704696655273,
"learning_rate": 6.545750740770336e-08,
"logits/chosen": 1.297495722770691,
"logits/rejected": 1.3115613460540771,
"logps/chosen": -83.30645751953125,
"logps/ref_chosen": -78.24254608154297,
"logps/ref_rejected": -85.23554992675781,
"logps/rejected": -98.84282684326172,
"loss": 1.2144,
"margin_dpo/margin_mean": 8.54337215423584,
"margin_dpo/margin_std": 19.285472869873047,
"step": 522
},
{
"epoch": 0.7906273620559335,
"fcm_dpo/beta": 0.05785483866930008,
"fcm_dpo/delta": -0.0011233240365982056,
"fcm_dpo/margin": 10.367927551269531,
"fcm_dpo/q_t": 0.37423598766326904,
"grad_norm": 13.733061790466309,
"learning_rate": 6.456810403001012e-08,
"logits/chosen": 1.4599554538726807,
"logits/rejected": 1.2018170356750488,
"logps/chosen": -89.55259704589844,
"logps/ref_chosen": -83.50096893310547,
"logps/ref_rejected": -117.45217895507812,
"logps/rejected": -133.87173461914062,
"loss": 1.0643,
"margin_dpo/margin_mean": 10.367927551269531,
"margin_dpo/margin_std": 16.020349502563477,
"step": 523
},
{
"epoch": 0.7921390778533636,
"fcm_dpo/beta": 0.058989353477954865,
"fcm_dpo/delta": 0.042257122695446014,
"fcm_dpo/margin": 9.482121467590332,
"fcm_dpo/q_t": 0.3825477957725525,
"grad_norm": 13.890347480773926,
"learning_rate": 6.368388758106134e-08,
"logits/chosen": 1.3521630764007568,
"logits/rejected": 1.3045108318328857,
"logps/chosen": -94.79153442382812,
"logps/ref_chosen": -93.22590637207031,
"logps/ref_rejected": -108.17863464355469,
"logps/rejected": -119.22637176513672,
"loss": 1.1023,
"margin_dpo/margin_mean": 9.482121467590332,
"margin_dpo/margin_std": 16.036991119384766,
"step": 524
},
{
"epoch": 0.7936507936507936,
"fcm_dpo/beta": 0.05859127268195152,
"fcm_dpo/delta": -0.018103765323758125,
"fcm_dpo/margin": 7.183684825897217,
"fcm_dpo/q_t": 0.41909414529800415,
"grad_norm": 13.745256423950195,
"learning_rate": 6.280488279429185e-08,
"logits/chosen": 1.0294437408447266,
"logits/rejected": 1.0196237564086914,
"logps/chosen": -99.94296264648438,
"logps/ref_chosen": -94.08831787109375,
"logps/ref_rejected": -100.682373046875,
"logps/rejected": -113.720703125,
"loss": 1.2164,
"margin_dpo/margin_mean": 7.183685302734375,
"margin_dpo/margin_std": 16.434297561645508,
"step": 525
},
{
"epoch": 0.7951625094482238,
"fcm_dpo/beta": 0.057315438985824585,
"fcm_dpo/delta": -0.11007855832576752,
"fcm_dpo/margin": 8.643760681152344,
"fcm_dpo/q_t": 0.39720046520233154,
"grad_norm": 14.708939552307129,
"learning_rate": 6.193111425735515e-08,
"logits/chosen": 1.6031224727630615,
"logits/rejected": 1.4323936700820923,
"logps/chosen": -83.46339416503906,
"logps/ref_chosen": -77.78373718261719,
"logps/ref_rejected": -100.29583740234375,
"logps/rejected": -114.6192626953125,
"loss": 1.1392,
"margin_dpo/margin_mean": 8.643760681152344,
"margin_dpo/margin_std": 15.403488159179688,
"step": 526
},
{
"epoch": 0.7966742252456538,
"fcm_dpo/beta": 0.05917923152446747,
"fcm_dpo/delta": 0.18667322397232056,
"fcm_dpo/margin": 7.098379611968994,
"fcm_dpo/q_t": 0.4146905541419983,
"grad_norm": 14.931519508361816,
"learning_rate": 6.106260641143546e-08,
"logits/chosen": 1.578808069229126,
"logits/rejected": 1.3636837005615234,
"logps/chosen": -85.0775146484375,
"logps/ref_chosen": -76.695068359375,
"logps/ref_rejected": -107.68281555175781,
"logps/rejected": -123.16364288330078,
"loss": 1.2101,
"margin_dpo/margin_mean": 7.098379135131836,
"margin_dpo/margin_std": 16.101825714111328,
"step": 527
},
{
"epoch": 0.7981859410430839,
"fcm_dpo/beta": 0.05997871980071068,
"fcm_dpo/delta": 0.08462747931480408,
"fcm_dpo/margin": 8.665306091308594,
"fcm_dpo/q_t": 0.39577969908714294,
"grad_norm": 15.675226211547852,
"learning_rate": 6.019938355056422e-08,
"logits/chosen": 1.3997437953948975,
"logits/rejected": 1.2392219305038452,
"logps/chosen": -79.13544464111328,
"logps/ref_chosen": -75.0361328125,
"logps/ref_rejected": -94.67579650878906,
"logps/rejected": -107.44041442871094,
"loss": 1.1857,
"margin_dpo/margin_mean": 8.665306091308594,
"margin_dpo/margin_std": 18.087797164916992,
"step": 528
},
{
"epoch": 0.799697656840514,
"fcm_dpo/beta": 0.05653582885861397,
"fcm_dpo/delta": -0.3923792243003845,
"fcm_dpo/margin": 16.796003341674805,
"fcm_dpo/q_t": 0.31350094079971313,
"grad_norm": 10.827249526977539,
"learning_rate": 5.934146982094049e-08,
"logits/chosen": 1.3487932682037354,
"logits/rejected": 1.2035433053970337,
"logps/chosen": -76.30598449707031,
"logps/ref_chosen": -72.84869384765625,
"logps/ref_rejected": -93.25855255126953,
"logps/rejected": -113.51184844970703,
"loss": 0.8556,
"margin_dpo/margin_mean": 16.796003341674805,
"margin_dpo/margin_std": 17.993106842041016,
"step": 529
},
{
"epoch": 0.8012093726379441,
"fcm_dpo/beta": 0.05551842600107193,
"fcm_dpo/delta": -0.025491856038570404,
"fcm_dpo/margin": 11.230892181396484,
"fcm_dpo/q_t": 0.3764331340789795,
"grad_norm": 12.22794246673584,
"learning_rate": 5.848888922025552e-08,
"logits/chosen": 1.6910094022750854,
"logits/rejected": 1.5774551630020142,
"logps/chosen": -83.728759765625,
"logps/ref_chosen": -79.4971694946289,
"logps/ref_rejected": -93.59564208984375,
"logps/rejected": -109.05812072753906,
"loss": 1.0428,
"margin_dpo/margin_mean": 11.230892181396484,
"margin_dpo/margin_std": 16.893659591674805,
"step": 530
},
{
"epoch": 0.8027210884353742,
"fcm_dpo/beta": 0.05706261843442917,
"fcm_dpo/delta": 0.08858685940504074,
"fcm_dpo/margin": 8.977895736694336,
"fcm_dpo/q_t": 0.39383938908576965,
"grad_norm": 13.592248916625977,
"learning_rate": 5.7641665597021435e-08,
"logits/chosen": 1.5204875469207764,
"logits/rejected": 1.377941370010376,
"logps/chosen": -74.43402099609375,
"logps/ref_chosen": -69.45396423339844,
"logps/ref_rejected": -96.30017852783203,
"logps/rejected": -110.25813293457031,
"loss": 1.1054,
"margin_dpo/margin_mean": 8.977895736694336,
"margin_dpo/margin_std": 14.877981185913086,
"step": 531
},
{
"epoch": 0.8042328042328042,
"fcm_dpo/beta": 0.05634995549917221,
"fcm_dpo/delta": -0.006979711353778839,
"fcm_dpo/margin": 10.757040023803711,
"fcm_dpo/q_t": 0.3826729357242584,
"grad_norm": 12.061864852905273,
"learning_rate": 5.679982264990424e-08,
"logits/chosen": 1.2939972877502441,
"logits/rejected": 1.1688940525054932,
"logps/chosen": -84.37901306152344,
"logps/ref_chosen": -76.52011108398438,
"logps/ref_rejected": -94.79593658447266,
"logps/rejected": -113.41188049316406,
"loss": 1.0853,
"margin_dpo/margin_mean": 10.757040023803711,
"margin_dpo/margin_std": 18.02541732788086,
"step": 532
},
{
"epoch": 0.8057445200302343,
"fcm_dpo/beta": 0.05673651024699211,
"fcm_dpo/delta": -0.011338796466588974,
"fcm_dpo/margin": 10.75674819946289,
"fcm_dpo/q_t": 0.3817342519760132,
"grad_norm": 18.434955596923828,
"learning_rate": 5.596338392706076e-08,
"logits/chosen": 1.5201002359390259,
"logits/rejected": 1.337731122970581,
"logps/chosen": -74.36298370361328,
"logps/ref_chosen": -72.31800842285156,
"logps/ref_rejected": -89.26652526855469,
"logps/rejected": -102.06825256347656,
"loss": 1.1072,
"margin_dpo/margin_mean": 10.75674819946289,
"margin_dpo/margin_std": 18.741741180419922,
"step": 533
},
{
"epoch": 0.8072562358276644,
"fcm_dpo/beta": 0.05574037879705429,
"fcm_dpo/delta": -0.009521931409835815,
"fcm_dpo/margin": 10.878827095031738,
"fcm_dpo/q_t": 0.375033438205719,
"grad_norm": 15.397263526916504,
"learning_rate": 5.513237282548033e-08,
"logits/chosen": 1.2019238471984863,
"logits/rejected": 1.1612894535064697,
"logps/chosen": -79.74507141113281,
"logps/ref_chosen": -77.87559509277344,
"logps/ref_rejected": -92.21171569824219,
"logps/rejected": -104.96002197265625,
"loss": 1.0573,
"margin_dpo/margin_mean": 10.878826141357422,
"margin_dpo/margin_std": 16.20137596130371,
"step": 534
},
{
"epoch": 0.8087679516250945,
"fcm_dpo/beta": 0.05694824457168579,
"fcm_dpo/delta": 0.05008779466152191,
"fcm_dpo/margin": 9.698480606079102,
"fcm_dpo/q_t": 0.3916192650794983,
"grad_norm": 11.60848331451416,
"learning_rate": 5.430681259032957e-08,
"logits/chosen": 1.196105718612671,
"logits/rejected": 1.0437562465667725,
"logps/chosen": -83.9339828491211,
"logps/ref_chosen": -78.16358184814453,
"logps/ref_rejected": -97.78164672851562,
"logps/rejected": -113.25051879882812,
"loss": 1.1577,
"margin_dpo/margin_mean": 9.698480606079102,
"margin_dpo/margin_std": 19.059343338012695,
"step": 535
},
{
"epoch": 0.8102796674225246,
"fcm_dpo/beta": 0.05582098662853241,
"fcm_dpo/delta": -0.13114593923091888,
"fcm_dpo/margin": 12.92877197265625,
"fcm_dpo/q_t": 0.352193146944046,
"grad_norm": 10.292197227478027,
"learning_rate": 5.3486726314303175e-08,
"logits/chosen": 1.3923474550247192,
"logits/rejected": 1.190511703491211,
"logps/chosen": -70.1307373046875,
"logps/ref_chosen": -66.65623474121094,
"logps/ref_rejected": -89.49085998535156,
"logps/rejected": -105.89413452148438,
"loss": 0.9384,
"margin_dpo/margin_mean": 12.92877197265625,
"margin_dpo/margin_std": 15.259733200073242,
"step": 536
},
{
"epoch": 0.8117913832199547,
"fcm_dpo/beta": 0.055668190121650696,
"fcm_dpo/delta": 0.051453668624162674,
"fcm_dpo/margin": 9.903670310974121,
"fcm_dpo/q_t": 0.3890642523765564,
"grad_norm": 11.76916790008545,
"learning_rate": 5.267213693697695e-08,
"logits/chosen": 1.5167008638381958,
"logits/rejected": 1.303575038909912,
"logps/chosen": -83.30008697509766,
"logps/ref_chosen": -74.99390411376953,
"logps/ref_rejected": -110.6627197265625,
"logps/rejected": -128.87257385253906,
"loss": 1.1049,
"margin_dpo/margin_mean": 9.903672218322754,
"margin_dpo/margin_std": 16.979801177978516,
"step": 537
},
{
"epoch": 0.8133030990173847,
"fcm_dpo/beta": 0.055662307888269424,
"fcm_dpo/delta": -0.09925831854343414,
"fcm_dpo/margin": 12.428467750549316,
"fcm_dpo/q_t": 0.3608172535896301,
"grad_norm": 12.546353340148926,
"learning_rate": 5.1863067244167144e-08,
"logits/chosen": 1.4921224117279053,
"logits/rejected": 1.4055488109588623,
"logps/chosen": -93.9822998046875,
"logps/ref_chosen": -87.61151123046875,
"logps/ref_rejected": -98.1150131225586,
"logps/rejected": -116.91426086425781,
"loss": 0.9884,
"margin_dpo/margin_mean": 12.428467750549316,
"margin_dpo/margin_std": 16.471281051635742,
"step": 538
},
{
"epoch": 0.8148148148148148,
"fcm_dpo/beta": 0.05533237010240555,
"fcm_dpo/delta": 0.06810373067855835,
"fcm_dpo/margin": 9.67943286895752,
"fcm_dpo/q_t": 0.3939323127269745,
"grad_norm": 13.805160522460938,
"learning_rate": 5.105953986729195e-08,
"logits/chosen": 1.6138584613800049,
"logits/rejected": 1.3060567378997803,
"logps/chosen": -84.86448669433594,
"logps/ref_chosen": -78.86482238769531,
"logps/ref_rejected": -100.84349822998047,
"logps/rejected": -116.5226058959961,
"loss": 1.0875,
"margin_dpo/margin_mean": 9.679433822631836,
"margin_dpo/margin_std": 15.986173629760742,
"step": 539
},
{
"epoch": 0.8163265306122449,
"fcm_dpo/beta": 0.0549115426838398,
"fcm_dpo/delta": -0.1168595626950264,
"fcm_dpo/margin": 12.906469345092773,
"fcm_dpo/q_t": 0.3611002564430237,
"grad_norm": 13.961292266845703,
"learning_rate": 5.026157728273966e-08,
"logits/chosen": 1.416818618774414,
"logits/rejected": 1.1737697124481201,
"logps/chosen": -89.22088623046875,
"logps/ref_chosen": -83.66409301757812,
"logps/ref_rejected": -114.8860092163086,
"logps/rejected": -133.34927368164062,
"loss": 1.0225,
"margin_dpo/margin_mean": 12.906469345092773,
"margin_dpo/margin_std": 18.96722412109375,
"step": 540
},
{
"epoch": 0.817838246409675,
"fcm_dpo/beta": 0.05322103202342987,
"fcm_dpo/delta": -0.11219315230846405,
"fcm_dpo/margin": 13.215749740600586,
"fcm_dpo/q_t": 0.3545529544353485,
"grad_norm": 15.04979133605957,
"learning_rate": 4.9469201811239035e-08,
"logits/chosen": 1.3256299495697021,
"logits/rejected": 1.3482017517089844,
"logps/chosen": -87.13902282714844,
"logps/ref_chosen": -83.12225341796875,
"logps/ref_rejected": -74.80526733398438,
"logps/rejected": -92.03779602050781,
"loss": 0.979,
"margin_dpo/margin_mean": 13.215751647949219,
"margin_dpo/margin_std": 17.005870819091797,
"step": 541
},
{
"epoch": 0.8193499622071051,
"fcm_dpo/beta": 0.05173543840646744,
"fcm_dpo/delta": -0.16066280007362366,
"fcm_dpo/margin": 14.455942153930664,
"fcm_dpo/q_t": 0.3514820337295532,
"grad_norm": 10.039311408996582,
"learning_rate": 4.868243561723534e-08,
"logits/chosen": 1.587374210357666,
"logits/rejected": 1.4626187086105347,
"logps/chosen": -67.82963562011719,
"logps/ref_chosen": -66.3132553100586,
"logps/ref_rejected": -83.24588012695312,
"logps/rejected": -99.21820831298828,
"loss": 0.9778,
"margin_dpo/margin_mean": 14.455942153930664,
"margin_dpo/margin_std": 19.253398895263672,
"step": 542
},
{
"epoch": 0.8208616780045351,
"fcm_dpo/beta": 0.051359452307224274,
"fcm_dpo/delta": -0.05219798535108566,
"fcm_dpo/margin": 12.62739086151123,
"fcm_dpo/q_t": 0.36705687642097473,
"grad_norm": 9.661619186401367,
"learning_rate": 4.790130070827028e-08,
"logits/chosen": 1.3386754989624023,
"logits/rejected": 1.0855376720428467,
"logps/chosen": -73.03467559814453,
"logps/ref_chosen": -68.11429595947266,
"logps/ref_rejected": -94.62380981445312,
"logps/rejected": -112.17157745361328,
"loss": 1.0345,
"margin_dpo/margin_mean": 12.627391815185547,
"margin_dpo/margin_std": 18.65164566040039,
"step": 543
},
{
"epoch": 0.8223733938019653,
"fcm_dpo/beta": 0.05012405291199684,
"fcm_dpo/delta": -0.10903792828321457,
"fcm_dpo/margin": 13.992612838745117,
"fcm_dpo/q_t": 0.3610704243183136,
"grad_norm": 11.929317474365234,
"learning_rate": 4.7125818934366454e-08,
"logits/chosen": 1.5628447532653809,
"logits/rejected": 1.3672630786895752,
"logps/chosen": -85.86703491210938,
"logps/ref_chosen": -81.187255859375,
"logps/ref_rejected": -105.84722900390625,
"logps/rejected": -124.51962280273438,
"loss": 1.0017,
"margin_dpo/margin_mean": 13.9926118850708,
"margin_dpo/margin_std": 19.482681274414062,
"step": 544
},
{
"epoch": 0.8238851095993953,
"fcm_dpo/beta": 0.05042444169521332,
"fcm_dpo/delta": 0.10349908471107483,
"fcm_dpo/margin": 9.951008796691895,
"fcm_dpo/q_t": 0.3982735872268677,
"grad_norm": 11.872567176818848,
"learning_rate": 4.635601198741607e-08,
"logits/chosen": 1.2976285219192505,
"logits/rejected": 1.1338021755218506,
"logps/chosen": -85.16439819335938,
"logps/ref_chosen": -78.81717681884766,
"logps/ref_rejected": -98.65876770019531,
"logps/rejected": -114.95700073242188,
"loss": 1.1213,
"margin_dpo/margin_mean": 9.951007843017578,
"margin_dpo/margin_std": 17.55803680419922,
"step": 545
},
{
"epoch": 0.8253968253968254,
"fcm_dpo/beta": 0.05110456794500351,
"fcm_dpo/delta": -0.006316348910331726,
"fcm_dpo/margin": 11.8477144241333,
"fcm_dpo/q_t": 0.3751433491706848,
"grad_norm": 11.023509979248047,
"learning_rate": 4.559190140057428e-08,
"logits/chosen": 1.6506929397583008,
"logits/rejected": 1.6415163278579712,
"logps/chosen": -77.45428466796875,
"logps/ref_chosen": -74.2529296875,
"logps/ref_rejected": -80.32308959960938,
"logps/rejected": -95.37216186523438,
"loss": 1.0302,
"margin_dpo/margin_mean": 11.847713470458984,
"margin_dpo/margin_std": 16.76009750366211,
"step": 546
},
{
"epoch": 0.8269085411942555,
"fcm_dpo/beta": 0.049887072294950485,
"fcm_dpo/delta": -0.10243887454271317,
"fcm_dpo/margin": 13.926226615905762,
"fcm_dpo/q_t": 0.3531789779663086,
"grad_norm": 10.132402420043945,
"learning_rate": 4.483350854765672e-08,
"logits/chosen": 1.390183448791504,
"logits/rejected": 1.2259633541107178,
"logps/chosen": -73.09548950195312,
"logps/ref_chosen": -69.9368896484375,
"logps/ref_rejected": -90.25672912597656,
"logps/rejected": -107.34156799316406,
"loss": 0.9679,
"margin_dpo/margin_mean": 13.926226615905762,
"margin_dpo/margin_std": 16.944210052490234,
"step": 547
},
{
"epoch": 0.8284202569916855,
"fcm_dpo/beta": 0.051217325031757355,
"fcm_dpo/delta": 0.1423822045326233,
"fcm_dpo/margin": 9.027074813842773,
"fcm_dpo/q_t": 0.4059806168079376,
"grad_norm": 12.69057559967041,
"learning_rate": 4.4080854642541826e-08,
"logits/chosen": 1.1699552536010742,
"logits/rejected": 1.0122103691101074,
"logps/chosen": -88.73080444335938,
"logps/ref_chosen": -81.1605224609375,
"logps/ref_rejected": -99.7246322631836,
"logps/rejected": -116.32199096679688,
"loss": 1.1604,
"margin_dpo/margin_mean": 9.027073860168457,
"margin_dpo/margin_std": 17.565166473388672,
"step": 548
},
{
"epoch": 0.8299319727891157,
"fcm_dpo/beta": 0.05247477814555168,
"fcm_dpo/delta": 0.15556570887565613,
"fcm_dpo/margin": 8.601249694824219,
"fcm_dpo/q_t": 0.40862107276916504,
"grad_norm": 21.05410385131836,
"learning_rate": 4.333396073857723e-08,
"logits/chosen": 1.810377597808838,
"logits/rejected": 1.6249217987060547,
"logps/chosen": -86.17178344726562,
"logps/ref_chosen": -80.49800872802734,
"logps/ref_rejected": -113.20750427246094,
"logps/rejected": -127.48252868652344,
"loss": 1.2102,
"margin_dpo/margin_mean": 8.601249694824219,
"margin_dpo/margin_std": 19.13092041015625,
"step": 549
},
{
"epoch": 0.8314436885865457,
"fcm_dpo/beta": 0.05345090851187706,
"fcm_dpo/delta": 0.12718510627746582,
"fcm_dpo/margin": 8.954895973205566,
"fcm_dpo/q_t": 0.4034798741340637,
"grad_norm": 17.55516242980957,
"learning_rate": 4.259284772799099e-08,
"logits/chosen": 1.5192465782165527,
"logits/rejected": 1.4428541660308838,
"logps/chosen": -81.42501068115234,
"logps/ref_chosen": -75.13760375976562,
"logps/ref_rejected": -79.04876708984375,
"logps/rejected": -94.29107666015625,
"loss": 1.1939,
"margin_dpo/margin_mean": 8.954895973205566,
"margin_dpo/margin_std": 18.97968101501465,
"step": 550
},
{
"epoch": 0.8329554043839759,
"fcm_dpo/beta": 0.054639048874378204,
"fcm_dpo/delta": 0.024431193247437477,
"fcm_dpo/margin": 10.552170753479004,
"fcm_dpo/q_t": 0.38189250230789185,
"grad_norm": 16.292776107788086,
"learning_rate": 4.1857536341307176e-08,
"logits/chosen": 1.483259916305542,
"logits/rejected": 1.3449373245239258,
"logps/chosen": -91.39802551269531,
"logps/ref_chosen": -85.4496078491211,
"logps/ref_rejected": -103.48530578613281,
"logps/rejected": -119.98588562011719,
"loss": 1.0539,
"margin_dpo/margin_mean": 10.552170753479004,
"margin_dpo/margin_std": 16.359243392944336,
"step": 551
},
{
"epoch": 0.8344671201814059,
"fcm_dpo/beta": 0.05562649294734001,
"fcm_dpo/delta": 0.07661741226911545,
"fcm_dpo/margin": 9.438526153564453,
"fcm_dpo/q_t": 0.38665229082107544,
"grad_norm": 14.911755561828613,
"learning_rate": 4.112804714676593e-08,
"logits/chosen": 1.4020166397094727,
"logits/rejected": 1.2649641036987305,
"logps/chosen": -87.27410888671875,
"logps/ref_chosen": -82.01036071777344,
"logps/ref_rejected": -101.61884307861328,
"logps/rejected": -116.32112121582031,
"loss": 1.1424,
"margin_dpo/margin_mean": 9.438526153564453,
"margin_dpo/margin_std": 17.219161987304688,
"step": 552
},
{
"epoch": 0.8359788359788359,
"fcm_dpo/beta": 0.055003680288791656,
"fcm_dpo/delta": -0.022091738879680634,
"fcm_dpo/margin": 11.281782150268555,
"fcm_dpo/q_t": 0.3798624873161316,
"grad_norm": 16.0929012298584,
"learning_rate": 4.0404400549748144e-08,
"logits/chosen": 1.6245696544647217,
"logits/rejected": 1.3851006031036377,
"logps/chosen": -82.01695251464844,
"logps/ref_chosen": -73.81416320800781,
"logps/ref_rejected": -104.27050018310547,
"logps/rejected": -123.75507354736328,
"loss": 1.1399,
"margin_dpo/margin_mean": 11.281782150268555,
"margin_dpo/margin_std": 20.727617263793945,
"step": 553
},
{
"epoch": 0.8374905517762661,
"fcm_dpo/beta": 0.05463992804288864,
"fcm_dpo/delta": -0.04029256850481033,
"fcm_dpo/margin": 11.668399810791016,
"fcm_dpo/q_t": 0.37226054072380066,
"grad_norm": 11.775946617126465,
"learning_rate": 3.968661679220467e-08,
"logits/chosen": 1.2836058139801025,
"logits/rejected": 1.2342619895935059,
"logps/chosen": -87.90589904785156,
"logps/ref_chosen": -81.43980407714844,
"logps/ref_rejected": -89.32518005371094,
"logps/rejected": -107.45967102050781,
"loss": 1.064,
"margin_dpo/margin_mean": 11.668397903442383,
"margin_dpo/margin_std": 18.33779525756836,
"step": 554
},
{
"epoch": 0.8390022675736961,
"fcm_dpo/beta": 0.05459080636501312,
"fcm_dpo/delta": -0.06235264241695404,
"fcm_dpo/margin": 12.04253101348877,
"fcm_dpo/q_t": 0.36375415325164795,
"grad_norm": 12.814702033996582,
"learning_rate": 3.89747159520904e-08,
"logits/chosen": 1.669064998626709,
"logits/rejected": 1.5991406440734863,
"logps/chosen": -89.34579467773438,
"logps/ref_chosen": -81.66071319580078,
"logps/ref_rejected": -87.20857238769531,
"logps/rejected": -106.93618774414062,
"loss": 1.0901,
"margin_dpo/margin_mean": 12.042530059814453,
"margin_dpo/margin_std": 19.42294692993164,
"step": 555
},
{
"epoch": 0.8405139833711263,
"fcm_dpo/beta": 0.05409371852874756,
"fcm_dpo/delta": 0.02651580236852169,
"fcm_dpo/margin": 10.629733085632324,
"fcm_dpo/q_t": 0.3883476257324219,
"grad_norm": 11.263772010803223,
"learning_rate": 3.826871794280192e-08,
"logits/chosen": 1.311571717262268,
"logits/rejected": 1.2102625370025635,
"logps/chosen": -74.93548583984375,
"logps/ref_chosen": -66.02448272705078,
"logps/ref_rejected": -82.74746704101562,
"logps/rejected": -102.28819274902344,
"loss": 1.1078,
"margin_dpo/margin_mean": 10.62973403930664,
"margin_dpo/margin_std": 18.774234771728516,
"step": 556
},
{
"epoch": 0.8420256991685563,
"fcm_dpo/beta": 0.05260467901825905,
"fcm_dpo/delta": -0.13225093483924866,
"fcm_dpo/margin": 13.666872024536133,
"fcm_dpo/q_t": 0.358351469039917,
"grad_norm": 11.609122276306152,
"learning_rate": 3.756864251262143e-08,
"logits/chosen": 1.4144532680511475,
"logits/rejected": 1.133040189743042,
"logps/chosen": -79.66389465332031,
"logps/ref_chosen": -73.08985900878906,
"logps/ref_rejected": -97.43034362792969,
"logps/rejected": -117.6712417602539,
"loss": 1.0001,
"margin_dpo/margin_mean": 13.66687297821045,
"margin_dpo/margin_std": 18.801319122314453,
"step": 557
},
{
"epoch": 0.8435374149659864,
"fcm_dpo/beta": 0.05203159525990486,
"fcm_dpo/delta": -0.06583023071289062,
"fcm_dpo/margin": 12.695112228393555,
"fcm_dpo/q_t": 0.3633221983909607,
"grad_norm": 13.396330833435059,
"learning_rate": 3.687450924416341e-08,
"logits/chosen": 1.5742992162704468,
"logits/rejected": 1.4637998342514038,
"logps/chosen": -87.541015625,
"logps/ref_chosen": -80.1357192993164,
"logps/ref_rejected": -106.65797424316406,
"logps/rejected": -126.75838470458984,
"loss": 1.0222,
"margin_dpo/margin_mean": 12.695113182067871,
"margin_dpo/margin_std": 17.957141876220703,
"step": 558
},
{
"epoch": 0.8450491307634165,
"fcm_dpo/beta": 0.0517839640378952,
"fcm_dpo/delta": 0.01864977926015854,
"fcm_dpo/margin": 11.21513557434082,
"fcm_dpo/q_t": 0.3890264630317688,
"grad_norm": 13.40358829498291,
"learning_rate": 3.6186337553827743e-08,
"logits/chosen": 1.5580424070358276,
"logits/rejected": 1.3317805528640747,
"logps/chosen": -85.84085845947266,
"logps/ref_chosen": -79.42267608642578,
"logps/ref_rejected": -98.59402465820312,
"logps/rejected": -116.22734069824219,
"loss": 1.158,
"margin_dpo/margin_mean": 11.21513557434082,
"margin_dpo/margin_std": 21.69605827331543,
"step": 559
},
{
"epoch": 0.8465608465608465,
"fcm_dpo/beta": 0.052260056138038635,
"fcm_dpo/delta": -0.06172545626759529,
"fcm_dpo/margin": 12.571533203125,
"fcm_dpo/q_t": 0.3696938157081604,
"grad_norm": 12.267929077148438,
"learning_rate": 3.550414669125573e-08,
"logits/chosen": 1.4044283628463745,
"logits/rejected": 1.3158057928085327,
"logps/chosen": -84.15168762207031,
"logps/ref_chosen": -77.49559020996094,
"logps/ref_rejected": -92.61347961425781,
"logps/rejected": -111.84111785888672,
"loss": 1.0404,
"margin_dpo/margin_mean": 12.571533203125,
"margin_dpo/margin_std": 18.772090911865234,
"step": 560
},
{
"epoch": 0.8480725623582767,
"fcm_dpo/beta": 0.05100402235984802,
"fcm_dpo/delta": -0.07462760806083679,
"fcm_dpo/margin": 13.123538970947266,
"fcm_dpo/q_t": 0.37038421630859375,
"grad_norm": 11.18996810913086,
"learning_rate": 3.482795573879241e-08,
"logits/chosen": 1.6638920307159424,
"logits/rejected": 1.6029834747314453,
"logps/chosen": -85.24051666259766,
"logps/ref_chosen": -79.20771789550781,
"logps/ref_rejected": -93.46514892578125,
"logps/rejected": -112.62149047851562,
"loss": 1.0184,
"margin_dpo/margin_mean": 13.123538970947266,
"margin_dpo/margin_std": 19.08633804321289,
"step": 561
},
{
"epoch": 0.8495842781557067,
"fcm_dpo/beta": 0.05021877959370613,
"fcm_dpo/delta": -0.04768141731619835,
"fcm_dpo/margin": 12.811036109924316,
"fcm_dpo/q_t": 0.3715214133262634,
"grad_norm": 11.333587646484375,
"learning_rate": 3.415778361095226e-08,
"logits/chosen": 1.6762216091156006,
"logits/rejected": 1.5491697788238525,
"logps/chosen": -102.22946166992188,
"logps/ref_chosen": -94.88652801513672,
"logps/ref_rejected": -109.33815002441406,
"logps/rejected": -129.4921112060547,
"loss": 1.0176,
"margin_dpo/margin_mean": 12.81103515625,
"margin_dpo/margin_std": 17.984500885009766,
"step": 562
},
{
"epoch": 0.8510959939531368,
"fcm_dpo/beta": 0.05006178095936775,
"fcm_dpo/delta": -0.048707108944654465,
"fcm_dpo/margin": 12.89486026763916,
"fcm_dpo/q_t": 0.3766898810863495,
"grad_norm": 11.322830200195312,
"learning_rate": 3.349364905389032e-08,
"logits/chosen": 1.6359024047851562,
"logits/rejected": 1.4605317115783691,
"logps/chosen": -69.72264099121094,
"logps/ref_chosen": -65.90719604492188,
"logps/ref_rejected": -84.07121276855469,
"logps/rejected": -100.78152465820312,
"loss": 1.0887,
"margin_dpo/margin_mean": 12.89486026763916,
"margin_dpo/margin_std": 21.711301803588867,
"step": 563
},
{
"epoch": 0.8526077097505669,
"fcm_dpo/beta": 0.0500749796628952,
"fcm_dpo/delta": -0.0054284874349832535,
"fcm_dpo/margin": 12.080354690551758,
"fcm_dpo/q_t": 0.3790516257286072,
"grad_norm": 16.2349910736084,
"learning_rate": 3.283557064487785e-08,
"logits/chosen": 1.315792202949524,
"logits/rejected": 1.2397656440734863,
"logps/chosen": -76.4324722290039,
"logps/ref_chosen": -72.32071685791016,
"logps/ref_rejected": -88.05014038085938,
"logps/rejected": -104.24224853515625,
"loss": 1.1096,
"margin_dpo/margin_mean": 12.080353736877441,
"margin_dpo/margin_std": 21.116573333740234,
"step": 564
},
{
"epoch": 0.854119425547997,
"fcm_dpo/beta": 0.050448790192604065,
"fcm_dpo/delta": 0.026010502129793167,
"fcm_dpo/margin": 11.385034561157227,
"fcm_dpo/q_t": 0.3829692006111145,
"grad_norm": 10.440814971923828,
"learning_rate": 3.218356679178252e-08,
"logits/chosen": 1.3742563724517822,
"logits/rejected": 1.2928344011306763,
"logps/chosen": -88.49894714355469,
"logps/ref_chosen": -80.18453979492188,
"logps/ref_rejected": -99.55126953125,
"logps/rejected": -119.25071716308594,
"loss": 1.074,
"margin_dpo/margin_mean": 11.385034561157227,
"margin_dpo/margin_std": 18.052478790283203,
"step": 565
},
{
"epoch": 0.8556311413454271,
"fcm_dpo/beta": 0.05103863775730133,
"fcm_dpo/delta": 0.020700603723526,
"fcm_dpo/margin": 11.292126655578613,
"fcm_dpo/q_t": 0.38311922550201416,
"grad_norm": 15.4530668258667,
"learning_rate": 3.1537655732553764e-08,
"logits/chosen": 1.4136245250701904,
"logits/rejected": 1.3437389135360718,
"logps/chosen": -93.4151611328125,
"logps/ref_chosen": -88.0877914428711,
"logps/ref_rejected": -87.7589111328125,
"logps/rejected": -104.37841033935547,
"loss": 1.1149,
"margin_dpo/margin_mean": 11.292126655578613,
"margin_dpo/margin_std": 19.296255111694336,
"step": 566
},
{
"epoch": 0.8571428571428571,
"fcm_dpo/beta": 0.049624793231487274,
"fcm_dpo/delta": -0.062444910407066345,
"fcm_dpo/margin": 13.247112274169922,
"fcm_dpo/q_t": 0.3635924160480499,
"grad_norm": 13.69182014465332,
"learning_rate": 3.089785553471233e-08,
"logits/chosen": 1.6392173767089844,
"logits/rejected": 1.3875160217285156,
"logps/chosen": -76.57147216796875,
"logps/ref_chosen": -69.93267822265625,
"logps/ref_rejected": -95.71786499023438,
"logps/rejected": -115.60376739501953,
"loss": 0.9874,
"margin_dpo/margin_mean": 13.247111320495605,
"margin_dpo/margin_std": 17.017322540283203,
"step": 567
},
{
"epoch": 0.8586545729402872,
"fcm_dpo/beta": 0.04972817003726959,
"fcm_dpo/delta": -0.09054756909608841,
"fcm_dpo/margin": 13.71946907043457,
"fcm_dpo/q_t": 0.36071109771728516,
"grad_norm": 10.639127731323242,
"learning_rate": 3.026418409484513e-08,
"logits/chosen": 1.6388273239135742,
"logits/rejected": 1.4276424646377563,
"logps/chosen": -73.75872802734375,
"logps/ref_chosen": -70.33343505859375,
"logps/ref_rejected": -108.86271667480469,
"logps/rejected": -126.00747680664062,
"loss": 0.9822,
"margin_dpo/margin_mean": 13.71946907043457,
"margin_dpo/margin_std": 16.7993221282959,
"step": 568
},
{
"epoch": 0.8601662887377173,
"fcm_dpo/beta": 0.04921431094408035,
"fcm_dpo/delta": 0.11886165291070938,
"fcm_dpo/margin": 9.874387741088867,
"fcm_dpo/q_t": 0.39863890409469604,
"grad_norm": 13.355634689331055,
"learning_rate": 2.963665913810451e-08,
"logits/chosen": 1.3453781604766846,
"logits/rejected": 1.3191735744476318,
"logps/chosen": -87.32330322265625,
"logps/ref_chosen": -80.85043334960938,
"logps/ref_rejected": -92.77810668945312,
"logps/rejected": -109.12537384033203,
"loss": 1.1497,
"margin_dpo/margin_mean": 9.874388694763184,
"margin_dpo/margin_std": 18.156291961669922,
"step": 569
},
{
"epoch": 0.8616780045351474,
"fcm_dpo/beta": 0.04896945506334305,
"fcm_dpo/delta": -0.2022635042667389,
"fcm_dpo/margin": 16.062522888183594,
"fcm_dpo/q_t": 0.33973759412765503,
"grad_norm": 10.115246772766113,
"learning_rate": 2.9015298217712453e-08,
"logits/chosen": 1.133542537689209,
"logits/rejected": 0.9755515456199646,
"logps/chosen": -72.37799835205078,
"logps/ref_chosen": -69.94769287109375,
"logps/ref_rejected": -97.37059020996094,
"logps/rejected": -115.86341857910156,
"loss": 0.9229,
"margin_dpo/margin_mean": 16.062524795532227,
"margin_dpo/margin_std": 18.405261993408203,
"step": 570
},
{
"epoch": 0.8631897203325775,
"fcm_dpo/beta": 0.04955677688121796,
"fcm_dpo/delta": 0.18852154910564423,
"fcm_dpo/margin": 8.448715209960938,
"fcm_dpo/q_t": 0.4095662236213684,
"grad_norm": 12.49162769317627,
"learning_rate": 2.840011871446962e-08,
"logits/chosen": 1.477982997894287,
"logits/rejected": 1.4091600179672241,
"logps/chosen": -77.71009826660156,
"logps/ref_chosen": -72.28555297851562,
"logps/ref_rejected": -84.57748413085938,
"logps/rejected": -98.45074462890625,
"loss": 1.198,
"margin_dpo/margin_mean": 8.448714256286621,
"margin_dpo/margin_std": 18.35816192626953,
"step": 571
},
{
"epoch": 0.8647014361300076,
"fcm_dpo/beta": 0.05024714022874832,
"fcm_dpo/delta": 0.058539681136608124,
"fcm_dpo/margin": 10.840170860290527,
"fcm_dpo/q_t": 0.3887644410133362,
"grad_norm": 11.713820457458496,
"learning_rate": 2.7791137836269158e-08,
"logits/chosen": 1.5529911518096924,
"logits/rejected": 1.6280450820922852,
"logps/chosen": -98.46131896972656,
"logps/ref_chosen": -91.4906997680664,
"logps/ref_rejected": -80.44602966308594,
"logps/rejected": -98.25682067871094,
"loss": 1.0653,
"margin_dpo/margin_mean": 10.840169906616211,
"margin_dpo/margin_std": 16.804597854614258,
"step": 572
},
{
"epoch": 0.8662131519274376,
"fcm_dpo/beta": 0.05019587278366089,
"fcm_dpo/delta": -0.019488001242280006,
"fcm_dpo/margin": 12.313529014587402,
"fcm_dpo/q_t": 0.3816547989845276,
"grad_norm": 12.991681098937988,
"learning_rate": 2.718837261761528e-08,
"logits/chosen": 1.6213641166687012,
"logits/rejected": 1.5047485828399658,
"logps/chosen": -94.56259155273438,
"logps/ref_chosen": -87.54232788085938,
"logps/ref_rejected": -104.32984924316406,
"logps/rejected": -123.66363525390625,
"loss": 1.1209,
"margin_dpo/margin_mean": 12.313529014587402,
"margin_dpo/margin_std": 22.380565643310547,
"step": 573
},
{
"epoch": 0.8677248677248677,
"fcm_dpo/beta": 0.049620434641838074,
"fcm_dpo/delta": -0.14074860513210297,
"fcm_dpo/margin": 14.71909236907959,
"fcm_dpo/q_t": 0.35213059186935425,
"grad_norm": 10.749646186828613,
"learning_rate": 2.659183991914696e-08,
"logits/chosen": 1.8166577816009521,
"logits/rejected": 1.7073123455047607,
"logps/chosen": -80.68084716796875,
"logps/ref_chosen": -75.36632537841797,
"logps/ref_rejected": -103.27328491210938,
"logps/rejected": -123.30690002441406,
"loss": 0.9694,
"margin_dpo/margin_mean": 14.719091415405273,
"margin_dpo/margin_std": 18.72347640991211,
"step": 574
},
{
"epoch": 0.8692365835222978,
"fcm_dpo/beta": 0.04792990908026695,
"fcm_dpo/delta": -0.09886258840560913,
"fcm_dpo/margin": 9.744050979614258,
"fcm_dpo/q_t": 0.40980133414268494,
"grad_norm": 11.675394058227539,
"learning_rate": 2.600155642716606e-08,
"logits/chosen": 1.63795804977417,
"logits/rejected": 1.4440220594406128,
"logps/chosen": -88.9930191040039,
"logps/ref_chosen": -81.678466796875,
"logps/ref_rejected": -112.84233093261719,
"logps/rejected": -129.90093994140625,
"loss": 1.1909,
"margin_dpo/margin_mean": 9.744050979614258,
"margin_dpo/margin_std": 20.141633987426758,
"step": 575
},
{
"epoch": 0.8707482993197279,
"fcm_dpo/beta": 0.04688930884003639,
"fcm_dpo/delta": -0.10789503902196884,
"fcm_dpo/margin": 14.908914566040039,
"fcm_dpo/q_t": 0.354342520236969,
"grad_norm": 10.642295837402344,
"learning_rate": 2.5417538653170754e-08,
"logits/chosen": 1.4131211042404175,
"logits/rejected": 1.199857234954834,
"logps/chosen": -72.4935302734375,
"logps/ref_chosen": -68.78944396972656,
"logps/ref_rejected": -102.79037475585938,
"logps/rejected": -121.40336608886719,
"loss": 0.9817,
"margin_dpo/margin_mean": 14.908914566040039,
"margin_dpo/margin_std": 19.143428802490234,
"step": 576
},
{
"epoch": 0.872260015117158,
"fcm_dpo/beta": 0.04831491410732269,
"fcm_dpo/delta": 0.2083924412727356,
"fcm_dpo/margin": 8.283341407775879,
"fcm_dpo/q_t": 0.4179866909980774,
"grad_norm": 11.02425479888916,
"learning_rate": 2.4839802933393607e-08,
"logits/chosen": 1.3915233612060547,
"logits/rejected": 1.3548476696014404,
"logps/chosen": -85.09516906738281,
"logps/ref_chosen": -79.84675598144531,
"logps/ref_rejected": -84.08309936523438,
"logps/rejected": -97.61485290527344,
"loss": 1.191,
"margin_dpo/margin_mean": 8.283340454101562,
"margin_dpo/margin_std": 17.960664749145508,
"step": 577
},
{
"epoch": 0.873771730914588,
"fcm_dpo/beta": 0.05029800906777382,
"fcm_dpo/delta": 0.20370450615882874,
"fcm_dpo/margin": 8.049797058105469,
"fcm_dpo/q_t": 0.42061176896095276,
"grad_norm": 14.181432723999023,
"learning_rate": 2.4268365428344733e-08,
"logits/chosen": 1.4182995557785034,
"logits/rejected": 1.3366800546646118,
"logps/chosen": -80.43700408935547,
"logps/ref_chosen": -74.91357421875,
"logps/ref_rejected": -83.64881896972656,
"logps/rejected": -97.2220458984375,
"loss": 1.185,
"margin_dpo/margin_mean": 8.049795150756836,
"margin_dpo/margin_std": 17.056869506835938,
"step": 578
},
{
"epoch": 0.8752834467120182,
"fcm_dpo/beta": 0.05020540952682495,
"fcm_dpo/delta": -0.054013222455978394,
"fcm_dpo/margin": 12.934350967407227,
"fcm_dpo/q_t": 0.36339443922042847,
"grad_norm": 10.921134948730469,
"learning_rate": 2.3703242122359357e-08,
"logits/chosen": 1.4313457012176514,
"logits/rejected": 1.3517735004425049,
"logps/chosen": -82.50849151611328,
"logps/ref_chosen": -75.51022338867188,
"logps/ref_rejected": -84.83192443847656,
"logps/rejected": -104.76454162597656,
"loss": 1.0188,
"margin_dpo/margin_mean": 12.93435001373291,
"margin_dpo/margin_std": 18.144775390625,
"step": 579
},
{
"epoch": 0.8767951625094482,
"fcm_dpo/beta": 0.051037803292274475,
"fcm_dpo/delta": 0.04716159403324127,
"fcm_dpo/margin": 10.865426063537598,
"fcm_dpo/q_t": 0.3839249610900879,
"grad_norm": 10.735759735107422,
"learning_rate": 2.3144448823151392e-08,
"logits/chosen": 1.422055721282959,
"logits/rejected": 1.2607250213623047,
"logps/chosen": -80.66156005859375,
"logps/ref_chosen": -76.61564636230469,
"logps/ref_rejected": -97.09959411621094,
"logps/rejected": -112.01094055175781,
"loss": 1.1127,
"margin_dpo/margin_mean": 10.86542797088623,
"margin_dpo/margin_std": 18.92938995361328,
"step": 580
},
{
"epoch": 0.8783068783068783,
"fcm_dpo/beta": 0.05055753141641617,
"fcm_dpo/delta": 0.0056791529059410095,
"fcm_dpo/margin": 11.744110107421875,
"fcm_dpo/q_t": 0.3782769739627838,
"grad_norm": 11.562898635864258,
"learning_rate": 2.259200116137039e-08,
"logits/chosen": 1.5065741539001465,
"logits/rejected": 1.3873448371887207,
"logps/chosen": -81.64364624023438,
"logps/ref_chosen": -74.8531265258789,
"logps/ref_rejected": -101.5344009399414,
"logps/rejected": -120.06903076171875,
"loss": 1.0871,
"margin_dpo/margin_mean": 11.744109153747559,
"margin_dpo/margin_std": 19.222896575927734,
"step": 581
},
{
"epoch": 0.8798185941043084,
"fcm_dpo/beta": 0.05103091150522232,
"fcm_dpo/delta": 0.0025793779641389847,
"fcm_dpo/margin": 11.70887279510498,
"fcm_dpo/q_t": 0.3785492777824402,
"grad_norm": 10.20443344116211,
"learning_rate": 2.204591459016525e-08,
"logits/chosen": 1.301880121231079,
"logits/rejected": 1.376091480255127,
"logps/chosen": -88.2724609375,
"logps/ref_chosen": -81.07638549804688,
"logps/ref_rejected": -72.83570861816406,
"logps/rejected": -91.74066162109375,
"loss": 1.0997,
"margin_dpo/margin_mean": 11.70887279510498,
"margin_dpo/margin_std": 20.00967788696289,
"step": 582
},
{
"epoch": 0.8813303099017384,
"fcm_dpo/beta": 0.051685880869627,
"fcm_dpo/delta": 0.09393204003572464,
"fcm_dpo/margin": 9.883415222167969,
"fcm_dpo/q_t": 0.4038504362106323,
"grad_norm": 16.68686294555664,
"learning_rate": 2.1506204384751064e-08,
"logits/chosen": 1.7437877655029297,
"logits/rejected": 1.4623810052871704,
"logps/chosen": -73.52954864501953,
"logps/ref_chosen": -66.78465270996094,
"logps/ref_rejected": -106.45825958251953,
"logps/rejected": -123.08657836914062,
"loss": 1.2033,
"margin_dpo/margin_mean": 9.883415222167969,
"margin_dpo/margin_std": 21.616724014282227,
"step": 583
},
{
"epoch": 0.8828420256991686,
"fcm_dpo/beta": 0.051659468561410904,
"fcm_dpo/delta": 0.028514884412288666,
"fcm_dpo/margin": 11.066621780395508,
"fcm_dpo/q_t": 0.3903641104698181,
"grad_norm": 17.737751007080078,
"learning_rate": 2.09728856419826e-08,
"logits/chosen": 1.8142191171646118,
"logits/rejected": 1.5797849893569946,
"logps/chosen": -64.03971862792969,
"logps/ref_chosen": -60.802913665771484,
"logps/ref_rejected": -99.45012664794922,
"logps/rejected": -113.75355529785156,
"loss": 1.1356,
"margin_dpo/margin_mean": 11.066622734069824,
"margin_dpo/margin_std": 20.347135543823242,
"step": 584
},
{
"epoch": 0.8843537414965986,
"fcm_dpo/beta": 0.05364014208316803,
"fcm_dpo/delta": 0.1607358157634735,
"fcm_dpo/margin": 8.310773849487305,
"fcm_dpo/q_t": 0.4064415395259857,
"grad_norm": 11.880135536193848,
"learning_rate": 2.044597327993153e-08,
"logits/chosen": 1.2061209678649902,
"logits/rejected": 1.139693260192871,
"logps/chosen": -81.80364990234375,
"logps/ref_chosen": -75.92616271972656,
"logps/ref_rejected": -94.47601318359375,
"logps/rejected": -108.66427612304688,
"loss": 1.1862,
"margin_dpo/margin_mean": 8.310773849487305,
"margin_dpo/margin_std": 17.42922592163086,
"step": 585
},
{
"epoch": 0.8858654572940288,
"fcm_dpo/beta": 0.052861057221889496,
"fcm_dpo/delta": -0.12748199701309204,
"fcm_dpo/margin": 13.586462020874023,
"fcm_dpo/q_t": 0.35138410329818726,
"grad_norm": 10.37414836883545,
"learning_rate": 1.9925482037469187e-08,
"logits/chosen": 1.7434275150299072,
"logits/rejected": 1.6040321588516235,
"logps/chosen": -72.82206726074219,
"logps/ref_chosen": -68.62062072753906,
"logps/ref_rejected": -81.98324584960938,
"logps/rejected": -99.77114868164062,
"loss": 0.9753,
"margin_dpo/margin_mean": 13.586462020874023,
"margin_dpo/margin_std": 17.776878356933594,
"step": 586
},
{
"epoch": 0.8873771730914588,
"fcm_dpo/beta": 0.05152256414294243,
"fcm_dpo/delta": -0.15027303993701935,
"fcm_dpo/margin": 14.35053825378418,
"fcm_dpo/q_t": 0.35401198267936707,
"grad_norm": 21.796733856201172,
"learning_rate": 1.9411426473854687e-08,
"logits/chosen": 1.2960412502288818,
"logits/rejected": 1.2788506746292114,
"logps/chosen": -81.04063415527344,
"logps/ref_chosen": -77.67031860351562,
"logps/ref_rejected": -79.35327911376953,
"logps/rejected": -97.07413482666016,
"loss": 1.0189,
"margin_dpo/margin_mean": 14.35053825378418,
"margin_dpo/margin_std": 20.870136260986328,
"step": 587
},
{
"epoch": 0.8888888888888888,
"fcm_dpo/beta": 0.05039939284324646,
"fcm_dpo/delta": -0.038049668073654175,
"fcm_dpo/margin": 12.592073440551758,
"fcm_dpo/q_t": 0.36368703842163086,
"grad_norm": 14.892370223999023,
"learning_rate": 1.890382096832699e-08,
"logits/chosen": 1.4794716835021973,
"logits/rejected": 1.3708586692810059,
"logps/chosen": -84.13540649414062,
"logps/ref_chosen": -77.94320678710938,
"logps/ref_rejected": -98.41210174560547,
"logps/rejected": -117.19638061523438,
"loss": 0.9984,
"margin_dpo/margin_mean": 12.592074394226074,
"margin_dpo/margin_std": 16.2170467376709,
"step": 588
},
{
"epoch": 0.890400604686319,
"fcm_dpo/beta": 0.04980698972940445,
"fcm_dpo/delta": -0.10331517457962036,
"fcm_dpo/margin": 13.974435806274414,
"fcm_dpo/q_t": 0.35320183634757996,
"grad_norm": 13.363643646240234,
"learning_rate": 1.840267971970344e-08,
"logits/chosen": 1.1975750923156738,
"logits/rejected": 1.1503894329071045,
"logps/chosen": -78.79930114746094,
"logps/ref_chosen": -75.18646240234375,
"logps/ref_rejected": -93.35910034179688,
"logps/rejected": -110.94638061523438,
"loss": 0.9705,
"margin_dpo/margin_mean": 13.974435806274414,
"margin_dpo/margin_std": 17.48256492614746,
"step": 589
},
{
"epoch": 0.891912320483749,
"fcm_dpo/beta": 0.049094684422016144,
"fcm_dpo/delta": -0.07261404395103455,
"fcm_dpo/margin": 13.602447509765625,
"fcm_dpo/q_t": 0.36111220717430115,
"grad_norm": 14.437777519226074,
"learning_rate": 1.7908016745981856e-08,
"logits/chosen": 1.493552565574646,
"logits/rejected": 1.4283008575439453,
"logps/chosen": -93.33152770996094,
"logps/ref_chosen": -86.9908447265625,
"logps/ref_rejected": -100.61723327636719,
"logps/rejected": -120.56035614013672,
"loss": 0.9957,
"margin_dpo/margin_mean": 13.602448463439941,
"margin_dpo/margin_std": 18.259305953979492,
"step": 590
},
{
"epoch": 0.8934240362811792,
"fcm_dpo/beta": 0.04784344136714935,
"fcm_dpo/delta": -0.04332631826400757,
"fcm_dpo/margin": 13.286850929260254,
"fcm_dpo/q_t": 0.3692328631877899,
"grad_norm": 11.933575630187988,
"learning_rate": 1.7419845883949098e-08,
"logits/chosen": 1.6455740928649902,
"logits/rejected": 1.4992456436157227,
"logps/chosen": -77.32672119140625,
"logps/ref_chosen": -74.85809326171875,
"logps/ref_rejected": -102.75840759277344,
"logps/rejected": -118.51390075683594,
"loss": 1.038,
"margin_dpo/margin_mean": 13.28685188293457,
"margin_dpo/margin_std": 18.549617767333984,
"step": 591
},
{
"epoch": 0.8949357520786092,
"fcm_dpo/beta": 0.04816969856619835,
"fcm_dpo/delta": -0.016897665336728096,
"fcm_dpo/margin": 12.779840469360352,
"fcm_dpo/q_t": 0.378559410572052,
"grad_norm": 10.442089080810547,
"learning_rate": 1.6938180788793556e-08,
"logits/chosen": 1.8930881023406982,
"logits/rejected": 1.6082756519317627,
"logps/chosen": -71.08365631103516,
"logps/ref_chosen": -67.90579223632812,
"logps/ref_rejected": -100.35234069824219,
"logps/rejected": -116.31005096435547,
"loss": 1.0329,
"margin_dpo/margin_mean": 12.779840469360352,
"margin_dpo/margin_std": 18.484506607055664,
"step": 592
},
{
"epoch": 0.8964474678760394,
"fcm_dpo/beta": 0.04836907237768173,
"fcm_dpo/delta": 0.006007889751344919,
"fcm_dpo/margin": 12.287393569946289,
"fcm_dpo/q_t": 0.38056206703186035,
"grad_norm": 12.300355911254883,
"learning_rate": 1.6463034933723336e-08,
"logits/chosen": 1.4134345054626465,
"logits/rejected": 1.197739839553833,
"logps/chosen": -62.10874938964844,
"logps/ref_chosen": -59.29489517211914,
"logps/ref_rejected": -85.31307983398438,
"logps/rejected": -100.4143295288086,
"loss": 1.0848,
"margin_dpo/margin_mean": 12.287393569946289,
"margin_dpo/margin_std": 20.32806396484375,
"step": 593
},
{
"epoch": 0.8979591836734694,
"fcm_dpo/beta": 0.048611119389534,
"fcm_dpo/delta": 0.06948675960302353,
"fcm_dpo/margin": 10.986978530883789,
"fcm_dpo/q_t": 0.3919852375984192,
"grad_norm": 12.52187442779541,
"learning_rate": 1.5994421609589385e-08,
"logits/chosen": 1.2517837285995483,
"logits/rejected": 1.1950948238372803,
"logps/chosen": -88.89640808105469,
"logps/ref_chosen": -83.14643859863281,
"logps/ref_rejected": -88.201904296875,
"logps/rejected": -104.93885803222656,
"loss": 1.0882,
"margin_dpo/margin_mean": 10.986978530883789,
"margin_dpo/margin_std": 18.014604568481445,
"step": 594
},
{
"epoch": 0.8994708994708994,
"fcm_dpo/beta": 0.048267461359500885,
"fcm_dpo/delta": -0.10970209538936615,
"fcm_dpo/margin": 14.54538345336914,
"fcm_dpo/q_t": 0.354385644197464,
"grad_norm": 11.967026710510254,
"learning_rate": 1.553235392451377e-08,
"logits/chosen": 1.6991221904754639,
"logits/rejected": 1.4775123596191406,
"logps/chosen": -75.35159301757812,
"logps/ref_chosen": -70.40016174316406,
"logps/ref_rejected": -103.95550537109375,
"logps/rejected": -123.45231628417969,
"loss": 1.0295,
"margin_dpo/margin_mean": 14.545382499694824,
"margin_dpo/margin_std": 21.330772399902344,
"step": 595
},
{
"epoch": 0.9009826152683296,
"fcm_dpo/beta": 0.04931151121854782,
"fcm_dpo/delta": 0.13946330547332764,
"fcm_dpo/margin": 5.445644378662109,
"fcm_dpo/q_t": 0.4492513835430145,
"grad_norm": 12.14819049835205,
"learning_rate": 1.507684480352292e-08,
"logits/chosen": 1.042555570602417,
"logits/rejected": 1.0750787258148193,
"logps/chosen": -93.83740997314453,
"logps/ref_chosen": -86.083740234375,
"logps/ref_rejected": -78.41991424560547,
"logps/rejected": -91.61923217773438,
"loss": 1.3056,
"margin_dpo/margin_mean": 5.445644378662109,
"margin_dpo/margin_std": 17.816463470458984,
"step": 596
},
{
"epoch": 0.9024943310657596,
"fcm_dpo/beta": 0.049357444047927856,
"fcm_dpo/delta": 0.009085144847631454,
"fcm_dpo/margin": 11.98304271697998,
"fcm_dpo/q_t": 0.37970802187919617,
"grad_norm": 10.093440055847168,
"learning_rate": 1.4627906988186111e-08,
"logits/chosen": 1.4211645126342773,
"logits/rejected": 1.3739463090896606,
"logps/chosen": -70.9151611328125,
"logps/ref_chosen": -67.8086166381836,
"logps/ref_rejected": -71.09245300292969,
"logps/rejected": -86.18203735351562,
"loss": 1.0678,
"margin_dpo/margin_mean": 11.98304271697998,
"margin_dpo/margin_std": 18.88620948791504,
"step": 597
},
{
"epoch": 0.9040060468631897,
"fcm_dpo/beta": 0.050865307450294495,
"fcm_dpo/delta": 0.16860562562942505,
"fcm_dpo/margin": 8.606800079345703,
"fcm_dpo/q_t": 0.41332411766052246,
"grad_norm": 11.829634666442871,
"learning_rate": 1.4185553036259095e-08,
"logits/chosen": 1.620859146118164,
"logits/rejected": 1.4285743236541748,
"logps/chosen": -83.40132141113281,
"logps/ref_chosen": -74.31095886230469,
"logps/ref_rejected": -98.08122253417969,
"logps/rejected": -115.77838897705078,
"loss": 1.1809,
"margin_dpo/margin_mean": 8.606800079345703,
"margin_dpo/margin_std": 18.199466705322266,
"step": 598
},
{
"epoch": 0.9055177626606198,
"fcm_dpo/beta": 0.05157572776079178,
"fcm_dpo/delta": 0.06592804938554764,
"fcm_dpo/margin": 10.424295425415039,
"fcm_dpo/q_t": 0.3934212625026703,
"grad_norm": 11.825658798217773,
"learning_rate": 1.3749795321332885e-08,
"logits/chosen": 1.8811097145080566,
"logits/rejected": 1.7760019302368164,
"logps/chosen": -81.8123779296875,
"logps/ref_chosen": -74.21861267089844,
"logps/ref_rejected": -90.1492919921875,
"logps/rejected": -108.1673583984375,
"loss": 1.1224,
"margin_dpo/margin_mean": 10.424295425415039,
"margin_dpo/margin_std": 18.91455078125,
"step": 599
},
{
"epoch": 0.9070294784580499,
"fcm_dpo/beta": 0.05311460793018341,
"fcm_dpo/delta": 0.114117830991745,
"fcm_dpo/margin": 9.208206176757812,
"fcm_dpo/q_t": 0.40185290575027466,
"grad_norm": 13.263036727905273,
"learning_rate": 1.3320646032487393e-08,
"logits/chosen": 1.6416901350021362,
"logits/rejected": 1.4666112661361694,
"logps/chosen": -86.00879669189453,
"logps/ref_chosen": -79.34190368652344,
"logps/ref_rejected": -97.0519790649414,
"logps/rejected": -112.92707824707031,
"loss": 1.1399,
"margin_dpo/margin_mean": 9.208206176757812,
"margin_dpo/margin_std": 17.026790618896484,
"step": 600
},
{
"epoch": 0.9070294784580499,
"eval_fcm_dpo/beta": 0.05296889320015907,
"eval_logits/chosen": 1.6311073303222656,
"eval_logits/rejected": 1.493275761604309,
"eval_logps/chosen": -92.30020141601562,
"eval_logps/ref_chosen": -86.90177917480469,
"eval_logps/ref_rejected": -96.69639587402344,
"eval_logps/rejected": -113.7957534790039,
"eval_loss": 0.5368251800537109,
"eval_margin_dpo/margin_mean": 11.700956344604492,
"eval_margin_dpo/margin_std": 18.786291122436523,
"eval_runtime": 42.3352,
"eval_samples_per_second": 54.399,
"eval_steps_per_second": 1.701,
"step": 600
},
{
"epoch": 0.90854119425548,
"fcm_dpo/beta": 0.051687560975551605,
"fcm_dpo/delta": -0.12882345914840698,
"fcm_dpo/margin": 13.898124694824219,
"fcm_dpo/q_t": 0.35476869344711304,
"grad_norm": 10.692002296447754,
"learning_rate": 1.2898117173950868e-08,
"logits/chosen": 1.5194027423858643,
"logits/rejected": 1.3310624361038208,
"logps/chosen": -74.42862701416016,
"logps/ref_chosen": -72.06497192382812,
"logps/ref_rejected": -97.60928344726562,
"logps/rejected": -113.87106323242188,
"loss": 0.996,
"margin_dpo/margin_mean": 13.898124694824219,
"margin_dpo/margin_std": 19.016834259033203,
"step": 601
},
{
"epoch": 0.91005291005291,
"fcm_dpo/beta": 0.05144128203392029,
"fcm_dpo/delta": -0.0647466778755188,
"fcm_dpo/margin": 12.83267593383789,
"fcm_dpo/q_t": 0.36555975675582886,
"grad_norm": 14.516810417175293,
"learning_rate": 1.2482220564763667e-08,
"logits/chosen": 1.3734362125396729,
"logits/rejected": 1.289400577545166,
"logps/chosen": -79.3190689086914,
"logps/ref_chosen": -77.80416870117188,
"logps/ref_rejected": -89.05026245117188,
"logps/rejected": -103.3978271484375,
"loss": 0.995,
"margin_dpo/margin_mean": 12.83267593383789,
"margin_dpo/margin_std": 16.826704025268555,
"step": 602
},
{
"epoch": 0.9115646258503401,
"fcm_dpo/beta": 0.050677426159381866,
"fcm_dpo/delta": 0.006852999329566956,
"fcm_dpo/margin": 11.696775436401367,
"fcm_dpo/q_t": 0.37813225388526917,
"grad_norm": 11.326213836669922,
"learning_rate": 1.2072967838448051e-08,
"logits/chosen": 1.1551480293273926,
"logits/rejected": 1.051234483718872,
"logps/chosen": -73.44308471679688,
"logps/ref_chosen": -68.30155944824219,
"logps/ref_rejected": -90.542724609375,
"logps/rejected": -107.38102722167969,
"loss": 1.0835,
"margin_dpo/margin_mean": 11.69677448272705,
"margin_dpo/margin_std": 19.035118103027344,
"step": 603
},
{
"epoch": 0.9130763416477702,
"fcm_dpo/beta": 0.05132821202278137,
"fcm_dpo/delta": 0.03209719434380531,
"fcm_dpo/margin": 11.098645210266113,
"fcm_dpo/q_t": 0.38882869482040405,
"grad_norm": 13.501641273498535,
"learning_rate": 1.1670370442682459e-08,
"logits/chosen": 1.3291363716125488,
"logits/rejected": 1.3053700923919678,
"logps/chosen": -93.28549194335938,
"logps/ref_chosen": -90.55952453613281,
"logps/ref_rejected": -84.6327133178711,
"logps/rejected": -98.45733642578125,
"loss": 1.1398,
"margin_dpo/margin_mean": 11.098645210266113,
"margin_dpo/margin_std": 21.025123596191406,
"step": 604
},
{
"epoch": 0.9145880574452003,
"fcm_dpo/beta": 0.05177273601293564,
"fcm_dpo/delta": 0.04624027758836746,
"fcm_dpo/margin": 10.744077682495117,
"fcm_dpo/q_t": 0.38820844888687134,
"grad_norm": 15.81956958770752,
"learning_rate": 1.1274439638981532e-08,
"logits/chosen": 1.6206059455871582,
"logits/rejected": 1.4927140474319458,
"logps/chosen": -87.21122741699219,
"logps/ref_chosen": -80.26661682128906,
"logps/ref_rejected": -100.26485443115234,
"logps/rejected": -117.95354461669922,
"loss": 1.0946,
"margin_dpo/margin_mean": 10.744077682495117,
"margin_dpo/margin_std": 18.287504196166992,
"step": 605
},
{
"epoch": 0.9160997732426304,
"fcm_dpo/beta": 0.05220865458250046,
"fcm_dpo/delta": 0.008291337639093399,
"fcm_dpo/margin": 11.328773498535156,
"fcm_dpo/q_t": 0.3812514841556549,
"grad_norm": 13.073740005493164,
"learning_rate": 1.0885186502381016e-08,
"logits/chosen": 1.5954047441482544,
"logits/rejected": 1.4421148300170898,
"logps/chosen": -74.8997802734375,
"logps/ref_chosen": -70.73554229736328,
"logps/ref_rejected": -95.9410400390625,
"logps/rejected": -111.43405151367188,
"loss": 1.0533,
"margin_dpo/margin_mean": 11.328773498535156,
"margin_dpo/margin_std": 17.17182159423828,
"step": 606
},
{
"epoch": 0.9176114890400605,
"fcm_dpo/beta": 0.0508500337600708,
"fcm_dpo/delta": -0.03433932363986969,
"fcm_dpo/margin": 12.327168464660645,
"fcm_dpo/q_t": 0.366793692111969,
"grad_norm": 13.337456703186035,
"learning_rate": 1.0502621921127774e-08,
"logits/chosen": 1.4727654457092285,
"logits/rejected": 1.3896727561950684,
"logps/chosen": -87.79872131347656,
"logps/ref_chosen": -81.26203918457031,
"logps/ref_rejected": -92.71575927734375,
"logps/rejected": -111.57960510253906,
"loss": 1.0111,
"margin_dpo/margin_mean": 12.327167510986328,
"margin_dpo/margin_std": 15.473169326782227,
"step": 607
},
{
"epoch": 0.9191232048374905,
"fcm_dpo/beta": 0.05308441445231438,
"fcm_dpo/delta": 0.17097234725952148,
"fcm_dpo/margin": 8.206452369689941,
"fcm_dpo/q_t": 0.41368526220321655,
"grad_norm": 15.105749130249023,
"learning_rate": 1.0126756596375685e-08,
"logits/chosen": 1.3038733005523682,
"logits/rejected": 1.0675292015075684,
"logps/chosen": -90.75921630859375,
"logps/ref_chosen": -82.6530990600586,
"logps/ref_rejected": -110.64334106445312,
"logps/rejected": -126.95590209960938,
"loss": 1.1895,
"margin_dpo/margin_mean": 8.206452369689941,
"margin_dpo/margin_std": 17.802762985229492,
"step": 608
},
{
"epoch": 0.9206349206349206,
"fcm_dpo/beta": 0.052457720041275024,
"fcm_dpo/delta": -0.07344165444374084,
"fcm_dpo/margin": 12.722570419311523,
"fcm_dpo/q_t": 0.35613733530044556,
"grad_norm": 10.500450134277344,
"learning_rate": 9.757601041885694e-09,
"logits/chosen": 1.6448678970336914,
"logits/rejected": 1.5165568590164185,
"logps/chosen": -71.36085510253906,
"logps/ref_chosen": -68.20232391357422,
"logps/ref_rejected": -81.90515899658203,
"logps/rejected": -97.7862548828125,
"loss": 0.9728,
"margin_dpo/margin_mean": 12.72257137298584,
"margin_dpo/margin_std": 15.513092041015625,
"step": 609
},
{
"epoch": 0.9221466364323507,
"fcm_dpo/beta": 0.05239449441432953,
"fcm_dpo/delta": -0.012467984110116959,
"fcm_dpo/margin": 11.673583030700684,
"fcm_dpo/q_t": 0.3851981461048126,
"grad_norm": 15.478520393371582,
"learning_rate": 9.395165583732379e-09,
"logits/chosen": 1.5082423686981201,
"logits/rejected": 1.4333908557891846,
"logps/chosen": -105.09656524658203,
"logps/ref_chosen": -99.01324462890625,
"logps/ref_rejected": -102.26054382324219,
"logps/rejected": -120.01744079589844,
"loss": 1.1231,
"margin_dpo/margin_mean": 11.673582077026367,
"margin_dpo/margin_std": 21.25330352783203,
"step": 610
},
{
"epoch": 0.9236583522297808,
"fcm_dpo/beta": 0.053039200603961945,
"fcm_dpo/delta": 0.1085679829120636,
"fcm_dpo/margin": 9.36767578125,
"fcm_dpo/q_t": 0.40129148960113525,
"grad_norm": 12.573719024658203,
"learning_rate": 9.03946036001449e-09,
"logits/chosen": 1.474776268005371,
"logits/rejected": 1.3687703609466553,
"logps/chosen": -70.27494812011719,
"logps/ref_chosen": -66.36254119873047,
"logps/ref_rejected": -88.74557495117188,
"logps/rejected": -102.0256576538086,
"loss": 1.1166,
"margin_dpo/margin_mean": 9.36767578125,
"margin_dpo/margin_std": 16.48019790649414,
"step": 611
},
{
"epoch": 0.9251700680272109,
"fcm_dpo/beta": 0.052704453468322754,
"fcm_dpo/delta": -0.12535026669502258,
"fcm_dpo/margin": 13.594877243041992,
"fcm_dpo/q_t": 0.35807400941848755,
"grad_norm": 10.761448860168457,
"learning_rate": 8.690495320571839e-09,
"logits/chosen": 1.2493185997009277,
"logits/rejected": 1.0813252925872803,
"logps/chosen": -85.430908203125,
"logps/ref_chosen": -78.6339111328125,
"logps/ref_rejected": -108.34969329833984,
"logps/rejected": -128.74156188964844,
"loss": 1.0352,
"margin_dpo/margin_mean": 13.594876289367676,
"margin_dpo/margin_std": 20.702537536621094,
"step": 612
},
{
"epoch": 0.926681783824641,
"fcm_dpo/beta": 0.05094806104898453,
"fcm_dpo/delta": -0.21549217402935028,
"fcm_dpo/margin": 15.678262710571289,
"fcm_dpo/q_t": 0.33935976028442383,
"grad_norm": 12.649880409240723,
"learning_rate": 8.348280226706722e-09,
"logits/chosen": 1.3885865211486816,
"logits/rejected": 1.382106900215149,
"logps/chosen": -75.3154296875,
"logps/ref_chosen": -73.3539047241211,
"logps/ref_rejected": -76.91837310791016,
"logps/rejected": -94.55816650390625,
"loss": 0.9547,
"margin_dpo/margin_mean": 15.678261756896973,
"margin_dpo/margin_std": 20.076194763183594,
"step": 613
},
{
"epoch": 0.9281934996220711,
"fcm_dpo/beta": 0.0507182851433754,
"fcm_dpo/delta": 0.047426991164684296,
"fcm_dpo/margin": 10.928446769714355,
"fcm_dpo/q_t": 0.3804364502429962,
"grad_norm": 15.334555625915527,
"learning_rate": 8.012824650910937e-09,
"logits/chosen": 1.514979600906372,
"logits/rejected": 1.4878126382827759,
"logps/chosen": -84.84788513183594,
"logps/ref_chosen": -77.80007934570312,
"logps/ref_rejected": -89.05572509765625,
"logps/rejected": -107.031982421875,
"loss": 1.0489,
"margin_dpo/margin_mean": 10.928446769714355,
"margin_dpo/margin_std": 15.56425666809082,
"step": 614
},
{
"epoch": 0.9297052154195011,
"fcm_dpo/beta": 0.049501433968544006,
"fcm_dpo/delta": -0.09533637017011642,
"fcm_dpo/margin": 13.88467788696289,
"fcm_dpo/q_t": 0.3619763255119324,
"grad_norm": 15.51099967956543,
"learning_rate": 7.684137976598088e-09,
"logits/chosen": 1.603013515472412,
"logits/rejected": 1.4670634269714355,
"logps/chosen": -97.00009155273438,
"logps/ref_chosen": -90.06971740722656,
"logps/ref_rejected": -118.7764892578125,
"logps/rejected": -139.591552734375,
"loss": 1.0328,
"margin_dpo/margin_mean": 13.884675979614258,
"margin_dpo/margin_std": 20.35204315185547,
"step": 615
},
{
"epoch": 0.9312169312169312,
"fcm_dpo/beta": 0.04965684935450554,
"fcm_dpo/delta": 0.042817123234272,
"fcm_dpo/margin": 11.263029098510742,
"fcm_dpo/q_t": 0.3879207968711853,
"grad_norm": 13.444324493408203,
"learning_rate": 7.36222939784098e-09,
"logits/chosen": 1.362182855606079,
"logits/rejected": 1.189748764038086,
"logps/chosen": -80.3229751586914,
"logps/ref_chosen": -74.62954711914062,
"logps/ref_rejected": -93.655029296875,
"logps/rejected": -110.61148071289062,
"loss": 1.0719,
"margin_dpo/margin_mean": 11.26302719116211,
"margin_dpo/margin_std": 17.761255264282227,
"step": 616
},
{
"epoch": 0.9327286470143613,
"fcm_dpo/beta": 0.050146251916885376,
"fcm_dpo/delta": 0.09086576849222183,
"fcm_dpo/margin": 10.21737289428711,
"fcm_dpo/q_t": 0.390741765499115,
"grad_norm": 13.885807991027832,
"learning_rate": 7.047107919114586e-09,
"logits/chosen": 1.496596336364746,
"logits/rejected": 1.3787349462509155,
"logps/chosen": -84.53483581542969,
"logps/ref_chosen": -75.98182678222656,
"logps/ref_rejected": -97.1640625,
"logps/rejected": -115.93444061279297,
"loss": 1.0971,
"margin_dpo/margin_mean": 10.21737289428711,
"margin_dpo/margin_std": 16.45693016052246,
"step": 617
},
{
"epoch": 0.9342403628117913,
"fcm_dpo/beta": 0.051238950341939926,
"fcm_dpo/delta": 0.07789164781570435,
"fcm_dpo/margin": 10.265620231628418,
"fcm_dpo/q_t": 0.3962671160697937,
"grad_norm": 16.189624786376953,
"learning_rate": 6.738782355044048e-09,
"logits/chosen": 1.3820104598999023,
"logits/rejected": 1.1591483354568481,
"logps/chosen": -79.09159851074219,
"logps/ref_chosen": -74.47208404541016,
"logps/ref_rejected": -107.09980773925781,
"logps/rejected": -121.98493957519531,
"loss": 1.1232,
"margin_dpo/margin_mean": 10.265620231628418,
"margin_dpo/margin_std": 18.69152069091797,
"step": 618
},
{
"epoch": 0.9357520786092215,
"fcm_dpo/beta": 0.05077732354402542,
"fcm_dpo/delta": -0.07294730842113495,
"fcm_dpo/margin": 13.123184204101562,
"fcm_dpo/q_t": 0.36624816060066223,
"grad_norm": 11.196142196655273,
"learning_rate": 6.437261330158206e-09,
"logits/chosen": 1.6958047151565552,
"logits/rejected": 1.4898502826690674,
"logps/chosen": -75.58355712890625,
"logps/ref_chosen": -70.84220886230469,
"logps/ref_rejected": -98.07801818847656,
"logps/rejected": -115.94255065917969,
"loss": 1.0293,
"margin_dpo/margin_mean": 13.123184204101562,
"margin_dpo/margin_std": 19.087276458740234,
"step": 619
},
{
"epoch": 0.9372637944066515,
"fcm_dpo/beta": 0.050601303577423096,
"fcm_dpo/delta": -0.07571752369403839,
"fcm_dpo/margin": 9.317473411560059,
"fcm_dpo/q_t": 0.40199047327041626,
"grad_norm": 13.557417869567871,
"learning_rate": 6.142553278648238e-09,
"logits/chosen": 1.5680897235870361,
"logits/rejected": 1.568605899810791,
"logps/chosen": -81.21891021728516,
"logps/ref_chosen": -76.93606567382812,
"logps/ref_rejected": -81.28453063964844,
"logps/rejected": -94.88484954833984,
"loss": 1.1566,
"margin_dpo/margin_mean": 9.317474365234375,
"margin_dpo/margin_std": 17.439876556396484,
"step": 620
},
{
"epoch": 0.9387755102040817,
"fcm_dpo/beta": 0.0511331781744957,
"fcm_dpo/delta": 0.08140092343091965,
"fcm_dpo/margin": 10.20268440246582,
"fcm_dpo/q_t": 0.39421504735946655,
"grad_norm": 12.2879638671875,
"learning_rate": 5.854666444131934e-09,
"logits/chosen": 1.767906904220581,
"logits/rejected": 1.4925261735916138,
"logps/chosen": -75.88006591796875,
"logps/ref_chosen": -69.87464904785156,
"logps/ref_rejected": -105.61328887939453,
"logps/rejected": -121.82138061523438,
"loss": 1.1127,
"margin_dpo/margin_mean": 10.20268440246582,
"margin_dpo/margin_std": 17.7759952545166,
"step": 621
},
{
"epoch": 0.9402872260015117,
"fcm_dpo/beta": 0.05143500864505768,
"fcm_dpo/delta": 0.030062519013881683,
"fcm_dpo/margin": 11.108509063720703,
"fcm_dpo/q_t": 0.379915326833725,
"grad_norm": 11.659144401550293,
"learning_rate": 5.573608879422875e-09,
"logits/chosen": 1.2655305862426758,
"logits/rejected": 1.1411113739013672,
"logps/chosen": -84.64167785644531,
"logps/ref_chosen": -78.9598388671875,
"logps/ref_rejected": -97.90648651123047,
"logps/rejected": -114.69683837890625,
"loss": 1.0483,
"margin_dpo/margin_mean": 11.108508110046387,
"margin_dpo/margin_std": 16.232669830322266,
"step": 622
},
{
"epoch": 0.9417989417989417,
"fcm_dpo/beta": 0.051177725195884705,
"fcm_dpo/delta": -0.018996700644493103,
"fcm_dpo/margin": 12.071805953979492,
"fcm_dpo/q_t": 0.3701121211051941,
"grad_norm": 15.908820152282715,
"learning_rate": 5.299388446305342e-09,
"logits/chosen": 1.5092880725860596,
"logits/rejected": 1.3792246580123901,
"logps/chosen": -91.16535186767578,
"logps/ref_chosen": -83.22647094726562,
"logps/ref_rejected": -105.1362533569336,
"logps/rejected": -125.14694213867188,
"loss": 1.0229,
"margin_dpo/margin_mean": 12.071805000305176,
"margin_dpo/margin_std": 16.85704803466797,
"step": 623
},
{
"epoch": 0.9433106575963719,
"fcm_dpo/beta": 0.05008304864168167,
"fcm_dpo/delta": -0.1030079573392868,
"fcm_dpo/margin": 13.869135856628418,
"fcm_dpo/q_t": 0.3560662567615509,
"grad_norm": 11.17168140411377,
"learning_rate": 5.03201281531429e-09,
"logits/chosen": 1.2747315168380737,
"logits/rejected": 1.0452553033828735,
"logps/chosen": -69.04547882080078,
"logps/ref_chosen": -66.10560607910156,
"logps/ref_rejected": -91.66778564453125,
"logps/rejected": -108.47679138183594,
"loss": 0.9783,
"margin_dpo/margin_mean": 13.869135856628418,
"margin_dpo/margin_std": 17.713294982910156,
"step": 624
},
{
"epoch": 0.9448223733938019,
"fcm_dpo/beta": 0.051767922937870026,
"fcm_dpo/delta": 0.2112494707107544,
"fcm_dpo/margin": 7.6672444343566895,
"fcm_dpo/q_t": 0.4205039143562317,
"grad_norm": 12.619704246520996,
"learning_rate": 4.7714894655209174e-09,
"logits/chosen": 1.896888256072998,
"logits/rejected": 1.684779405593872,
"logps/chosen": -79.9560317993164,
"logps/ref_chosen": -73.20295715332031,
"logps/ref_rejected": -105.31025695800781,
"logps/rejected": -119.7305908203125,
"loss": 1.2256,
"margin_dpo/margin_mean": 7.667244911193848,
"margin_dpo/margin_std": 18.133235931396484,
"step": 625
},
{
"epoch": 0.9463340891912321,
"fcm_dpo/beta": 0.05126585811376572,
"fcm_dpo/delta": -0.1028745248913765,
"fcm_dpo/margin": 13.556199073791504,
"fcm_dpo/q_t": 0.3712637424468994,
"grad_norm": 13.120343208312988,
"learning_rate": 4.517825684323323e-09,
"logits/chosen": 1.7848681211471558,
"logits/rejected": 1.4529061317443848,
"logps/chosen": -66.4439697265625,
"logps/ref_chosen": -62.181278228759766,
"logps/ref_rejected": -108.17747497558594,
"logps/rejected": -125.99636840820312,
"loss": 1.0751,
"margin_dpo/margin_mean": 13.556198120117188,
"margin_dpo/margin_std": 22.397666931152344,
"step": 626
},
{
"epoch": 0.9478458049886621,
"fcm_dpo/beta": 0.0497593954205513,
"fcm_dpo/delta": -0.19564473628997803,
"fcm_dpo/margin": 15.683549880981445,
"fcm_dpo/q_t": 0.34314611554145813,
"grad_norm": 11.137099266052246,
"learning_rate": 4.271028567242818e-09,
"logits/chosen": 1.3055121898651123,
"logits/rejected": 1.0202195644378662,
"logps/chosen": -82.22528076171875,
"logps/ref_chosen": -77.72123718261719,
"logps/ref_rejected": -114.40547180175781,
"logps/rejected": -134.59307861328125,
"loss": 0.9303,
"margin_dpo/margin_mean": 15.683549880981445,
"margin_dpo/margin_std": 18.70248794555664,
"step": 627
},
{
"epoch": 0.9493575207860923,
"fcm_dpo/beta": 0.04885813593864441,
"fcm_dpo/delta": -0.1272927224636078,
"fcm_dpo/margin": 14.678993225097656,
"fcm_dpo/q_t": 0.35350874066352844,
"grad_norm": 11.312077522277832,
"learning_rate": 4.0311050177251895e-09,
"logits/chosen": 1.472090482711792,
"logits/rejected": 1.4293111562728882,
"logps/chosen": -74.28559112548828,
"logps/ref_chosen": -70.71195983886719,
"logps/ref_rejected": -93.85909271240234,
"logps/rejected": -112.11170959472656,
"loss": 1.0458,
"margin_dpo/margin_mean": 14.678994178771973,
"margin_dpo/margin_std": 21.24105453491211,
"step": 628
},
{
"epoch": 0.9508692365835223,
"fcm_dpo/beta": 0.048182882368564606,
"fcm_dpo/delta": 0.01766796223819256,
"fcm_dpo/margin": 12.106483459472656,
"fcm_dpo/q_t": 0.37841320037841797,
"grad_norm": 13.544909477233887,
"learning_rate": 3.798061746947995e-09,
"logits/chosen": 1.484879970550537,
"logits/rejected": 1.4223473072052002,
"logps/chosen": -91.72695922851562,
"logps/ref_chosen": -88.66283416748047,
"logps/ref_rejected": -94.67845153808594,
"logps/rejected": -109.84906005859375,
"loss": 1.021,
"margin_dpo/margin_mean": 12.106481552124023,
"margin_dpo/margin_std": 16.435211181640625,
"step": 629
},
{
"epoch": 0.9523809523809523,
"fcm_dpo/beta": 0.04793520271778107,
"fcm_dpo/delta": -0.049655731767416,
"fcm_dpo/margin": 13.485906600952148,
"fcm_dpo/q_t": 0.36746057868003845,
"grad_norm": 9.105417251586914,
"learning_rate": 3.5719052736323806e-09,
"logits/chosen": 1.4485478401184082,
"logits/rejected": 1.287881851196289,
"logps/chosen": -76.72743225097656,
"logps/ref_chosen": -72.94979858398438,
"logps/ref_rejected": -92.7632827758789,
"logps/rejected": -110.02682495117188,
"loss": 0.9886,
"margin_dpo/margin_mean": 13.485905647277832,
"margin_dpo/margin_std": 17.394798278808594,
"step": 630
},
{
"epoch": 0.9538926681783825,
"fcm_dpo/beta": 0.04620499163866043,
"fcm_dpo/delta": -0.1376815140247345,
"fcm_dpo/margin": 15.663002967834473,
"fcm_dpo/q_t": 0.3513961434364319,
"grad_norm": 11.871007919311523,
"learning_rate": 3.352641923861144e-09,
"logits/chosen": 1.7959654331207275,
"logits/rejected": 1.492279052734375,
"logps/chosen": -82.16062927246094,
"logps/ref_chosen": -78.58656311035156,
"logps/ref_rejected": -115.38685607910156,
"logps/rejected": -134.62391662597656,
"loss": 0.9653,
"margin_dpo/margin_mean": 15.663003921508789,
"margin_dpo/margin_std": 19.171688079833984,
"step": 631
},
{
"epoch": 0.9554043839758125,
"fcm_dpo/beta": 0.046446263790130615,
"fcm_dpo/delta": 0.008756112307310104,
"fcm_dpo/margin": 12.740787506103516,
"fcm_dpo/q_t": 0.37455809116363525,
"grad_norm": 9.769354820251465,
"learning_rate": 3.140277830901428e-09,
"logits/chosen": 1.4969274997711182,
"logits/rejected": 1.402543067932129,
"logps/chosen": -79.57364654541016,
"logps/ref_chosen": -75.24861907958984,
"logps/ref_rejected": -82.98665618896484,
"logps/rejected": -100.05247497558594,
"loss": 1.0419,
"margin_dpo/margin_mean": 12.740787506103516,
"margin_dpo/margin_std": 18.489219665527344,
"step": 632
},
{
"epoch": 0.9569160997732427,
"fcm_dpo/beta": 0.046251967549324036,
"fcm_dpo/delta": -0.0757179856300354,
"fcm_dpo/margin": 14.493501663208008,
"fcm_dpo/q_t": 0.3659166693687439,
"grad_norm": 14.5718994140625,
"learning_rate": 2.9348189350335007e-09,
"logits/chosen": 1.6168040037155151,
"logits/rejected": 1.4710830450057983,
"logps/chosen": -69.67645263671875,
"logps/ref_chosen": -68.8402099609375,
"logps/ref_rejected": -84.64610290527344,
"logps/rejected": -99.97584533691406,
"loss": 1.0,
"margin_dpo/margin_mean": 14.493501663208008,
"margin_dpo/margin_std": 19.82632064819336,
"step": 633
},
{
"epoch": 0.9584278155706727,
"fcm_dpo/beta": 0.04652927815914154,
"fcm_dpo/delta": 0.1592503935098648,
"fcm_dpo/margin": 5.743697166442871,
"fcm_dpo/q_t": 0.446336030960083,
"grad_norm": 17.360897064208984,
"learning_rate": 2.736270983384276e-09,
"logits/chosen": 1.441842794418335,
"logits/rejected": 1.4726186990737915,
"logps/chosen": -83.4759750366211,
"logps/ref_chosen": -77.0589599609375,
"logps/ref_rejected": -74.37579345703125,
"logps/rejected": -86.5364990234375,
"loss": 1.2976,
"margin_dpo/margin_mean": 5.743697166442871,
"margin_dpo/margin_std": 18.205974578857422,
"step": 634
},
{
"epoch": 0.9599395313681028,
"fcm_dpo/beta": 0.04858339577913284,
"fcm_dpo/delta": 0.17846964299678802,
"fcm_dpo/margin": 8.830991744995117,
"fcm_dpo/q_t": 0.4160218834877014,
"grad_norm": 12.776594161987305,
"learning_rate": 2.5446395297668287e-09,
"logits/chosen": 1.352858066558838,
"logits/rejected": 1.2052037715911865,
"logps/chosen": -95.20944213867188,
"logps/ref_chosen": -85.60243225097656,
"logps/ref_rejected": -104.29497528076172,
"logps/rejected": -122.73298645019531,
"loss": 1.1964,
"margin_dpo/margin_mean": 8.830991744995117,
"margin_dpo/margin_std": 19.583810806274414,
"step": 635
},
{
"epoch": 0.9614512471655329,
"fcm_dpo/beta": 0.04776782542467117,
"fcm_dpo/delta": -0.1610623449087143,
"fcm_dpo/margin": 15.674917221069336,
"fcm_dpo/q_t": 0.3444763422012329,
"grad_norm": 10.230683326721191,
"learning_rate": 2.359929934524829e-09,
"logits/chosen": 1.4697428941726685,
"logits/rejected": 1.205780029296875,
"logps/chosen": -71.68122863769531,
"logps/ref_chosen": -68.72154235839844,
"logps/ref_rejected": -97.44863891601562,
"logps/rejected": -116.0832290649414,
"loss": 0.9226,
"margin_dpo/margin_mean": 15.674918174743652,
"margin_dpo/margin_std": 17.92241859436035,
"step": 636
},
{
"epoch": 0.9629629629629629,
"fcm_dpo/beta": 0.0476045086979866,
"fcm_dpo/delta": -0.006276901811361313,
"fcm_dpo/margin": 12.720074653625488,
"fcm_dpo/q_t": 0.37932315468788147,
"grad_norm": 10.013970375061035,
"learning_rate": 2.1821473643827137e-09,
"logits/chosen": 1.3179742097854614,
"logits/rejected": 1.1738412380218506,
"logps/chosen": -101.20001220703125,
"logps/ref_chosen": -92.38919067382812,
"logps/ref_rejected": -103.70460510253906,
"logps/rejected": -125.23550415039062,
"loss": 1.04,
"margin_dpo/margin_mean": 12.720074653625488,
"margin_dpo/margin_std": 19.057313919067383,
"step": 637
},
{
"epoch": 0.9644746787603931,
"fcm_dpo/beta": 0.04759259521961212,
"fcm_dpo/delta": 0.019558563828468323,
"fcm_dpo/margin": 12.21816349029541,
"fcm_dpo/q_t": 0.37911561131477356,
"grad_norm": 12.155881881713867,
"learning_rate": 2.0112967923011646e-09,
"logits/chosen": 1.4781807661056519,
"logits/rejected": 1.3293653726577759,
"logps/chosen": -89.75704956054688,
"logps/ref_chosen": -83.36921691894531,
"logps/ref_rejected": -103.04508209228516,
"logps/rejected": -121.65107727050781,
"loss": 1.0429,
"margin_dpo/margin_mean": 12.218162536621094,
"margin_dpo/margin_std": 17.843093872070312,
"step": 638
},
{
"epoch": 0.9659863945578231,
"fcm_dpo/beta": 0.047518063336610794,
"fcm_dpo/delta": -0.03895752504467964,
"fcm_dpo/margin": 13.388608932495117,
"fcm_dpo/q_t": 0.3698895573616028,
"grad_norm": 9.916508674621582,
"learning_rate": 1.847382997337943e-09,
"logits/chosen": 1.6210851669311523,
"logits/rejected": 1.3688468933105469,
"logps/chosen": -74.5676040649414,
"logps/ref_chosen": -70.45247650146484,
"logps/ref_rejected": -93.77748107910156,
"logps/rejected": -111.28121948242188,
"loss": 1.0065,
"margin_dpo/margin_mean": 13.38861083984375,
"margin_dpo/margin_std": 18.01410484313965,
"step": 639
},
{
"epoch": 0.9674981103552532,
"fcm_dpo/beta": 0.04807348549365997,
"fcm_dpo/delta": 0.15326353907585144,
"fcm_dpo/margin": 9.443033218383789,
"fcm_dpo/q_t": 0.4005971848964691,
"grad_norm": 11.905314445495605,
"learning_rate": 1.690410564514244e-09,
"logits/chosen": 1.5392241477966309,
"logits/rejected": 1.3597580194473267,
"logps/chosen": -74.13145446777344,
"logps/ref_chosen": -68.51570129394531,
"logps/ref_rejected": -92.35081481933594,
"logps/rejected": -107.40959930419922,
"loss": 1.1826,
"margin_dpo/margin_mean": 9.443033218383789,
"margin_dpo/margin_std": 19.444664001464844,
"step": 640
},
{
"epoch": 0.9690098261526833,
"fcm_dpo/beta": 0.04892860725522041,
"fcm_dpo/delta": 0.06555097550153732,
"fcm_dpo/margin": 10.993219375610352,
"fcm_dpo/q_t": 0.38847824931144714,
"grad_norm": 12.829493522644043,
"learning_rate": 1.5403838846864692e-09,
"logits/chosen": 1.3246686458587646,
"logits/rejected": 1.3006986379623413,
"logps/chosen": -99.61031341552734,
"logps/ref_chosen": -92.35102844238281,
"logps/ref_rejected": -102.4269790649414,
"logps/rejected": -120.67948150634766,
"loss": 1.0872,
"margin_dpo/margin_mean": 10.993219375610352,
"margin_dpo/margin_std": 18.027175903320312,
"step": 641
},
{
"epoch": 0.9705215419501134,
"fcm_dpo/beta": 0.049451105296611786,
"fcm_dpo/delta": 0.09589925408363342,
"fcm_dpo/margin": 10.26209545135498,
"fcm_dpo/q_t": 0.3960247039794922,
"grad_norm": 11.930103302001953,
"learning_rate": 1.3973071544233218e-09,
"logits/chosen": 1.1650804281234741,
"logits/rejected": 1.1836330890655518,
"logps/chosen": -95.06695556640625,
"logps/ref_chosen": -88.39617919921875,
"logps/ref_rejected": -88.73035430908203,
"logps/rejected": -105.66322326660156,
"loss": 1.1312,
"margin_dpo/margin_mean": 10.26209545135498,
"margin_dpo/margin_std": 18.21469497680664,
"step": 642
},
{
"epoch": 0.9720332577475435,
"fcm_dpo/beta": 0.05022279545664787,
"fcm_dpo/delta": 0.046514783054590225,
"fcm_dpo/margin": 11.055997848510742,
"fcm_dpo/q_t": 0.3896936774253845,
"grad_norm": 12.769640922546387,
"learning_rate": 1.261184375888541e-09,
"logits/chosen": 1.4847640991210938,
"logits/rejected": 1.178241491317749,
"logps/chosen": -90.30828857421875,
"logps/ref_chosen": -84.83087158203125,
"logps/ref_rejected": -105.31499481201172,
"logps/rejected": -121.84840393066406,
"loss": 1.1231,
"margin_dpo/margin_mean": 11.055997848510742,
"margin_dpo/margin_std": 19.96835708618164,
"step": 643
},
{
"epoch": 0.9735449735449735,
"fcm_dpo/beta": 0.05142327770590782,
"fcm_dpo/delta": 0.05963759124279022,
"fcm_dpo/margin": 10.557317733764648,
"fcm_dpo/q_t": 0.39105477929115295,
"grad_norm": 13.958258628845215,
"learning_rate": 1.1320193567288527e-09,
"logits/chosen": 1.6530699729919434,
"logits/rejected": 1.544208288192749,
"logps/chosen": -70.48993682861328,
"logps/ref_chosen": -65.11122131347656,
"logps/ref_rejected": -80.4027328491211,
"logps/rejected": -96.33876037597656,
"loss": 1.1411,
"margin_dpo/margin_mean": 10.557317733764648,
"margin_dpo/margin_std": 19.831335067749023,
"step": 644
},
{
"epoch": 0.9750566893424036,
"fcm_dpo/beta": 0.05053392052650452,
"fcm_dpo/delta": -0.08560548722743988,
"fcm_dpo/margin": 13.443717002868652,
"fcm_dpo/q_t": 0.36036476492881775,
"grad_norm": 13.477982521057129,
"learning_rate": 1.0098157099674987e-09,
"logits/chosen": 1.4453848600387573,
"logits/rejected": 1.4229815006256104,
"logps/chosen": -81.81217193603516,
"logps/ref_chosen": -76.93634033203125,
"logps/ref_rejected": -89.14311981201172,
"logps/rejected": -107.46266174316406,
"loss": 0.9803,
"margin_dpo/margin_mean": 13.443717956542969,
"margin_dpo/margin_std": 17.437297821044922,
"step": 645
},
{
"epoch": 0.9765684051398337,
"fcm_dpo/beta": 0.05015309527516365,
"fcm_dpo/delta": -0.047624535858631134,
"fcm_dpo/margin": 12.85222339630127,
"fcm_dpo/q_t": 0.3658458888530731,
"grad_norm": 10.155190467834473,
"learning_rate": 8.945768539031783e-10,
"logits/chosen": 1.6856896877288818,
"logits/rejected": 1.5893869400024414,
"logps/chosen": -86.48307800292969,
"logps/ref_chosen": -77.69122314453125,
"logps/ref_rejected": -98.14374542236328,
"logps/rejected": -119.78782653808594,
"loss": 1.017,
"margin_dpo/margin_mean": 12.85222339630127,
"margin_dpo/margin_std": 18.211654663085938,
"step": 646
},
{
"epoch": 0.9780801209372638,
"fcm_dpo/beta": 0.04868451505899429,
"fcm_dpo/delta": -0.19314002990722656,
"fcm_dpo/margin": 15.98859977722168,
"fcm_dpo/q_t": 0.33671772480010986,
"grad_norm": 12.119245529174805,
"learning_rate": 7.863060120144316e-10,
"logits/chosen": 1.5582287311553955,
"logits/rejected": 1.4055562019348145,
"logps/chosen": -91.17578125,
"logps/ref_chosen": -83.79997253417969,
"logps/ref_rejected": -116.81965637207031,
"logps/rejected": -140.18405151367188,
"loss": 0.8989,
"margin_dpo/margin_mean": 15.988598823547363,
"margin_dpo/margin_std": 17.51923942565918,
"step": 647
},
{
"epoch": 0.9795918367346939,
"fcm_dpo/beta": 0.04834875464439392,
"fcm_dpo/delta": 0.05289806053042412,
"fcm_dpo/margin": 11.375927925109863,
"fcm_dpo/q_t": 0.3829188346862793,
"grad_norm": 13.859929084777832,
"learning_rate": 6.850062128694045e-10,
"logits/chosen": 1.3009628057479858,
"logits/rejected": 1.134709119796753,
"logps/chosen": -94.1376953125,
"logps/ref_chosen": -85.9629898071289,
"logps/ref_rejected": -101.36552429199219,
"logps/rejected": -120.9161605834961,
"loss": 1.0614,
"margin_dpo/margin_mean": 11.37592887878418,
"margin_dpo/margin_std": 16.891958236694336,
"step": 648
},
{
"epoch": 0.981103552532124,
"fcm_dpo/beta": 0.048618800938129425,
"fcm_dpo/delta": 0.008311666548252106,
"fcm_dpo/margin": 12.179876327514648,
"fcm_dpo/q_t": 0.3789626955986023,
"grad_norm": 13.455903053283691,
"learning_rate": 5.906802900412788e-10,
"logits/chosen": 1.450695514678955,
"logits/rejected": 1.321246862411499,
"logps/chosen": -74.89276885986328,
"logps/ref_chosen": -68.64892578125,
"logps/ref_rejected": -89.84898376464844,
"logps/rejected": -108.272705078125,
"loss": 1.0928,
"margin_dpo/margin_mean": 12.179876327514648,
"margin_dpo/margin_std": 20.43763542175293,
"step": 649
},
{
"epoch": 0.982615268329554,
"fcm_dpo/beta": 0.04859776794910431,
"fcm_dpo/delta": -0.03493582457304001,
"fcm_dpo/margin": 13.017370223999023,
"fcm_dpo/q_t": 0.36858803033828735,
"grad_norm": 11.794471740722656,
"learning_rate": 5.033308820289184e-10,
"logits/chosen": 1.5901975631713867,
"logits/rejected": 1.3985137939453125,
"logps/chosen": -76.39089965820312,
"logps/ref_chosen": -72.97265625,
"logps/ref_rejected": -93.04617309570312,
"logps/rejected": -109.48178100585938,
"loss": 1.0177,
"margin_dpo/margin_mean": 13.017369270324707,
"margin_dpo/margin_std": 17.961902618408203,
"step": 650
},
{
"epoch": 0.9841269841269841,
"fcm_dpo/beta": 0.04906022548675537,
"fcm_dpo/delta": 0.046510156244039536,
"fcm_dpo/margin": 11.303712844848633,
"fcm_dpo/q_t": 0.38729214668273926,
"grad_norm": 15.220462799072266,
"learning_rate": 4.2296043218295606e-10,
"logits/chosen": 1.6060431003570557,
"logits/rejected": 1.3864119052886963,
"logps/chosen": -76.44781494140625,
"logps/ref_chosen": -71.05281066894531,
"logps/ref_rejected": -94.23469543457031,
"logps/rejected": -110.93341064453125,
"loss": 1.0596,
"margin_dpo/margin_mean": 11.303714752197266,
"margin_dpo/margin_std": 16.41510009765625,
"step": 651
},
{
"epoch": 0.9856386999244142,
"fcm_dpo/beta": 0.04880473017692566,
"fcm_dpo/delta": 0.0105612026527524,
"fcm_dpo/margin": 12.089744567871094,
"fcm_dpo/q_t": 0.3832206726074219,
"grad_norm": 15.424723625183105,
"learning_rate": 3.4957118863768176e-10,
"logits/chosen": 1.6924582719802856,
"logits/rejected": 1.6193931102752686,
"logps/chosen": -86.58667755126953,
"logps/ref_chosen": -80.06941223144531,
"logps/ref_rejected": -99.22327423095703,
"logps/rejected": -117.83027648925781,
"loss": 1.0705,
"margin_dpo/margin_mean": 12.089743614196777,
"margin_dpo/margin_std": 19.402515411376953,
"step": 652
},
{
"epoch": 0.9871504157218443,
"fcm_dpo/beta": 0.048587001860141754,
"fcm_dpo/delta": -0.0633477047085762,
"fcm_dpo/margin": 13.566983222961426,
"fcm_dpo/q_t": 0.36429864168167114,
"grad_norm": 11.197211265563965,
"learning_rate": 2.831652042480093e-10,
"logits/chosen": 1.611598014831543,
"logits/rejected": 1.483473300933838,
"logps/chosen": -84.53128051757812,
"logps/ref_chosen": -80.35701751708984,
"logps/ref_rejected": -92.1295394897461,
"logps/rejected": -109.87078857421875,
"loss": 1.0152,
"margin_dpo/margin_mean": 13.56698226928711,
"margin_dpo/margin_std": 19.007705688476562,
"step": 653
},
{
"epoch": 0.9886621315192744,
"fcm_dpo/beta": 0.04817197844386101,
"fcm_dpo/delta": 0.055898845195770264,
"fcm_dpo/margin": 11.323410034179688,
"fcm_dpo/q_t": 0.39379796385765076,
"grad_norm": 13.024750709533691,
"learning_rate": 2.2374433653205016e-10,
"logits/chosen": 1.4926725625991821,
"logits/rejected": 1.2302258014678955,
"logps/chosen": -84.48985290527344,
"logps/ref_chosen": -78.06475830078125,
"logps/ref_rejected": -106.05763244628906,
"logps/rejected": -123.80614471435547,
"loss": 1.104,
"margin_dpo/margin_mean": 11.323410034179688,
"margin_dpo/margin_std": 19.10687828063965,
"step": 654
},
{
"epoch": 0.9901738473167044,
"fcm_dpo/beta": 0.048346683382987976,
"fcm_dpo/delta": 0.001804165542125702,
"fcm_dpo/margin": 12.33364486694336,
"fcm_dpo/q_t": 0.3771480917930603,
"grad_norm": 9.984294891357422,
"learning_rate": 1.7131024761923852e-10,
"logits/chosen": 1.4067647457122803,
"logits/rejected": 1.1172595024108887,
"logps/chosen": -70.79195404052734,
"logps/ref_chosen": -67.03407287597656,
"logps/ref_rejected": -97.57197570800781,
"logps/rejected": -113.66349792480469,
"loss": 1.0162,
"margin_dpo/margin_mean": 12.33364486694336,
"margin_dpo/margin_std": 16.364456176757812,
"step": 655
},
{
"epoch": 0.9916855631141346,
"fcm_dpo/beta": 0.04848009720444679,
"fcm_dpo/delta": -0.0400017648935318,
"fcm_dpo/margin": 13.146528244018555,
"fcm_dpo/q_t": 0.368743360042572,
"grad_norm": 11.32723617553711,
"learning_rate": 1.2586440420372934e-10,
"logits/chosen": 1.2692241668701172,
"logits/rejected": 1.151146650314331,
"logps/chosen": -96.94473266601562,
"logps/ref_chosen": -89.31463623046875,
"logps/ref_rejected": -105.14315795898438,
"logps/rejected": -125.91978454589844,
"loss": 1.023,
"margin_dpo/margin_mean": 13.146528244018555,
"margin_dpo/margin_std": 18.80926513671875,
"step": 656
},
{
"epoch": 0.9931972789115646,
"fcm_dpo/beta": 0.04680928587913513,
"fcm_dpo/delta": -0.21155594289302826,
"fcm_dpo/margin": 16.960391998291016,
"fcm_dpo/q_t": 0.33587589859962463,
"grad_norm": 12.175826072692871,
"learning_rate": 8.740807750345913e-11,
"logits/chosen": 1.6932892799377441,
"logits/rejected": 1.4875112771987915,
"logps/chosen": -68.65042877197266,
"logps/ref_chosen": -64.89747619628906,
"logps/ref_rejected": -94.21998596191406,
"logps/rejected": -114.93333435058594,
"loss": 0.9204,
"margin_dpo/margin_mean": 16.960391998291016,
"margin_dpo/margin_std": 19.61644172668457,
"step": 657
},
{
"epoch": 0.9947089947089947,
"fcm_dpo/beta": 0.047107864171266556,
"fcm_dpo/delta": 0.04111632704734802,
"fcm_dpo/margin": 11.880049705505371,
"fcm_dpo/q_t": 0.38745826482772827,
"grad_norm": 12.966909408569336,
"learning_rate": 5.594234322453539e-11,
"logits/chosen": 1.4143362045288086,
"logits/rejected": 1.3188287019729614,
"logps/chosen": -86.38758087158203,
"logps/ref_chosen": -81.16606140136719,
"logps/ref_rejected": -97.72825622558594,
"logps/rejected": -114.82982635498047,
"loss": 1.1141,
"margin_dpo/margin_mean": 11.880049705505371,
"margin_dpo/margin_std": 20.876911163330078,
"step": 658
},
{
"epoch": 0.9962207105064248,
"fcm_dpo/beta": 0.04666716232895851,
"fcm_dpo/delta": -0.017684001475572586,
"fcm_dpo/margin": 8.309593200683594,
"fcm_dpo/q_t": 0.42115259170532227,
"grad_norm": 12.249117851257324,
"learning_rate": 3.146808153123293e-11,
"logits/chosen": 1.562005877494812,
"logits/rejected": 1.3696491718292236,
"logps/chosen": -82.52589416503906,
"logps/ref_chosen": -74.42193603515625,
"logps/ref_rejected": -87.81561279296875,
"logps/rejected": -104.22916412353516,
"loss": 1.2285,
"margin_dpo/margin_mean": 8.30959415435791,
"margin_dpo/margin_std": 19.117603302001953,
"step": 659
},
{
"epoch": 0.9977324263038548,
"fcm_dpo/beta": 0.045825421810150146,
"fcm_dpo/delta": -0.10102089494466782,
"fcm_dpo/margin": 15.1337308883667,
"fcm_dpo/q_t": 0.35269391536712646,
"grad_norm": 10.398505210876465,
"learning_rate": 1.3985977021235829e-11,
"logits/chosen": 1.5079681873321533,
"logits/rejected": 1.3555164337158203,
"logps/chosen": -77.58157348632812,
"logps/ref_chosen": -71.68511962890625,
"logps/ref_rejected": -98.01472473144531,
"logps/rejected": -119.04491424560547,
"loss": 0.9414,
"margin_dpo/margin_mean": 15.133729934692383,
"margin_dpo/margin_std": 17.095909118652344,
"step": 660
},
{
"epoch": 0.999244142101285,
"fcm_dpo/beta": 0.04709383845329285,
"fcm_dpo/delta": 0.17111794650554657,
"fcm_dpo/margin": 9.24586296081543,
"fcm_dpo/q_t": 0.4106820821762085,
"grad_norm": 10.930879592895508,
"learning_rate": 3.4965187065971735e-12,
"logits/chosen": 1.3035640716552734,
"logits/rejected": 1.1167458295822144,
"logps/chosen": -89.0513916015625,
"logps/ref_chosen": -78.35111999511719,
"logps/ref_rejected": -99.47113037109375,
"logps/rejected": -119.4172592163086,
"loss": 1.1821,
"margin_dpo/margin_mean": 9.24586296081543,
"margin_dpo/margin_std": 19.480510711669922,
"step": 661
},
{
"epoch": 0.999244142101285,
"step": 661,
"total_flos": 0.0,
"train_loss": 1.1374638212250148,
"train_runtime": 2122.2138,
"train_samples_per_second": 19.949,
"train_steps_per_second": 0.311
}
],
"logging_steps": 1,
"max_steps": 661,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}