{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.999244142101285, "eval_steps": 100, "global_step": 661, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0015117157974300832, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.002980858087539673, "fcm_dpo/q_t": 0.5000747442245483, "grad_norm": 17.89801597595215, "learning_rate": 0.0, "logits/chosen": 1.702779769897461, "logits/rejected": 1.6965749263763428, "logps/chosen": -80.20932006835938, "logps/ref_chosen": -80.27740478515625, "logps/ref_rejected": -83.5943374633789, "logps/rejected": -83.52326965332031, "loss": 1.387, "margin_dpo/margin_mean": -0.0029816031455993652, "margin_dpo/margin_std": 0.3835117816925049, "step": 1 }, { "epoch": 0.0030234315948601664, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.029325395822525024, "fcm_dpo/q_t": 0.4992692470550537, "grad_norm": 21.36615562438965, "learning_rate": 7.462686567164179e-09, "logits/chosen": 1.7006168365478516, "logits/rejected": 1.6698178052902222, "logps/chosen": -74.51097869873047, "logps/ref_chosen": -74.56095886230469, "logps/ref_rejected": -83.53636169433594, "logps/rejected": -83.51570892333984, "loss": 1.3839, "margin_dpo/margin_mean": 0.029325813055038452, "margin_dpo/margin_std": 0.4646317958831787, "step": 2 }, { "epoch": 0.0045351473922902496, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.03095117211341858, "fcm_dpo/q_t": 0.4992258846759796, "grad_norm": 19.930883407592773, "learning_rate": 1.4925373134328357e-08, "logits/chosen": 1.6261851787567139, "logits/rejected": 1.5350717306137085, "logps/chosen": -82.15226745605469, "logps/ref_chosen": -82.1510009765625, "logps/ref_rejected": -109.82986450195312, "logps/rejected": -109.86207580566406, "loss": 1.3837, "margin_dpo/margin_mean": 0.030951082706451416, "margin_dpo/margin_std": 0.44513028860092163, "step": 3 }, { "epoch": 0.006046863189720333, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.042372316122055054, "fcm_dpo/q_t": 0.4989404082298279, "grad_norm": 19.7413272857666, "learning_rate": 2.2388059701492534e-08, "logits/chosen": 1.7652442455291748, "logits/rejected": 1.7535886764526367, "logps/chosen": -92.318603515625, "logps/ref_chosen": -92.37549591064453, "logps/ref_rejected": -99.59553527832031, "logps/rejected": -99.58100891113281, "loss": 1.3827, "margin_dpo/margin_mean": 0.0423721969127655, "margin_dpo/margin_std": 0.4652661681175232, "step": 4 }, { "epoch": 0.007558578987150416, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.004836410284042358, "fcm_dpo/q_t": 0.5001212954521179, "grad_norm": 18.83965492248535, "learning_rate": 2.9850746268656714e-08, "logits/chosen": 1.6294361352920532, "logits/rejected": 1.5735852718353271, "logps/chosen": -78.87178039550781, "logps/ref_chosen": -78.84872436523438, "logps/ref_rejected": -97.88040161132812, "logps/rejected": -97.89862060546875, "loss": 1.3871, "margin_dpo/margin_mean": -0.0048364996910095215, "margin_dpo/margin_std": 0.36210399866104126, "step": 5 }, { "epoch": 0.009070294784580499, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.095939040184021, "fcm_dpo/q_t": 0.49760186672210693, "grad_norm": 18.0408878326416, "learning_rate": 3.731343283582089e-08, "logits/chosen": 1.5870825052261353, "logits/rejected": 1.4796451330184937, "logps/chosen": -68.30978393554688, "logps/ref_chosen": -68.34607696533203, "logps/ref_rejected": -99.24614715576172, "logps/rejected": -99.3057861328125, "loss": 1.3769, "margin_dpo/margin_mean": 0.0959392786026001, "margin_dpo/margin_std": 0.29579028487205505, "step": 6 }, { "epoch": 0.010582010582010581, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.06473231315612793, "fcm_dpo/q_t": 0.5016177892684937, "grad_norm": 17.415424346923828, "learning_rate": 4.477611940298507e-08, "logits/chosen": 1.4590237140655518, "logits/rejected": 1.3971405029296875, "logps/chosen": -69.1452865600586, "logps/ref_chosen": -69.11282348632812, "logps/ref_rejected": -84.01641845703125, "logps/rejected": -83.98414611816406, "loss": 1.3931, "margin_dpo/margin_mean": -0.0647326409816742, "margin_dpo/margin_std": 0.3379696011543274, "step": 7 }, { "epoch": 0.012093726379440665, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.038746654987335205, "fcm_dpo/q_t": 0.5009682774543762, "grad_norm": 18.376161575317383, "learning_rate": 5.223880597014925e-08, "logits/chosen": 1.7957122325897217, "logits/rejected": 1.7788466215133667, "logps/chosen": -78.38008117675781, "logps/ref_chosen": -78.3912353515625, "logps/ref_rejected": -91.06254577636719, "logps/rejected": -91.01263427734375, "loss": 1.3906, "margin_dpo/margin_mean": -0.038746029138565063, "margin_dpo/margin_std": 0.38139432668685913, "step": 8 }, { "epoch": 0.013605442176870748, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.06898093223571777, "fcm_dpo/q_t": 0.4982767105102539, "grad_norm": 19.275188446044922, "learning_rate": 5.970149253731343e-08, "logits/chosen": 1.9897737503051758, "logits/rejected": 1.7835183143615723, "logps/chosen": -69.65217590332031, "logps/ref_chosen": -69.67422485351562, "logps/ref_rejected": -105.00473022460938, "logps/rejected": -105.05166625976562, "loss": 1.38, "margin_dpo/margin_mean": 0.068980872631073, "margin_dpo/margin_std": 0.4633823037147522, "step": 9 }, { "epoch": 0.015117157974300832, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.008043617010116577, "fcm_dpo/q_t": 0.5002011060714722, "grad_norm": 19.03217124938965, "learning_rate": 6.71641791044776e-08, "logits/chosen": 1.5980093479156494, "logits/rejected": 1.5193543434143066, "logps/chosen": -79.69847106933594, "logps/ref_chosen": -79.730712890625, "logps/ref_rejected": -105.50645446777344, "logps/rejected": -105.4661636352539, "loss": 1.3875, "margin_dpo/margin_mean": -0.008043557405471802, "margin_dpo/margin_std": 0.3877168893814087, "step": 10 }, { "epoch": 0.016628873771730914, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.010087519884109497, "fcm_dpo/q_t": 0.5002526044845581, "grad_norm": 17.490964889526367, "learning_rate": 7.462686567164178e-08, "logits/chosen": 1.785116195678711, "logits/rejected": 1.7379379272460938, "logps/chosen": -85.41242980957031, "logps/ref_chosen": -85.41248321533203, "logps/ref_rejected": -86.50241088867188, "logps/rejected": -86.49227142333984, "loss": 1.3877, "margin_dpo/margin_mean": -0.010087013244628906, "margin_dpo/margin_std": 0.40967226028442383, "step": 11 }, { "epoch": 0.018140589569160998, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.043316930532455444, "fcm_dpo/q_t": 0.49891871213912964, "grad_norm": 17.32830047607422, "learning_rate": 8.208955223880596e-08, "logits/chosen": 1.645331621170044, "logits/rejected": 1.6056153774261475, "logps/chosen": -81.3597183227539, "logps/ref_chosen": -81.38086700439453, "logps/ref_rejected": -89.88151550292969, "logps/rejected": -89.9036865234375, "loss": 1.3825, "margin_dpo/margin_mean": 0.043317049741744995, "margin_dpo/margin_std": 0.444502055644989, "step": 12 }, { "epoch": 0.019652305366591082, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.026911377906799316, "fcm_dpo/q_t": 0.49932724237442017, "grad_norm": 17.857009887695312, "learning_rate": 8.955223880597014e-08, "logits/chosen": 1.6704635620117188, "logits/rejected": 1.481621503829956, "logps/chosen": -63.172264099121094, "logps/ref_chosen": -63.17030715942383, "logps/ref_rejected": -105.61166381835938, "logps/rejected": -105.64053344726562, "loss": 1.3838, "margin_dpo/margin_mean": 0.026911497116088867, "margin_dpo/margin_std": 0.3080989122390747, "step": 13 }, { "epoch": 0.021164021164021163, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.017434000968933105, "fcm_dpo/q_t": 0.5004353523254395, "grad_norm": 20.941539764404297, "learning_rate": 9.701492537313432e-08, "logits/chosen": 1.6752700805664062, "logits/rejected": 1.6424415111541748, "logps/chosen": -80.72457122802734, "logps/ref_chosen": -80.71014404296875, "logps/ref_rejected": -89.86041259765625, "logps/rejected": -89.85740661621094, "loss": 1.3883, "margin_dpo/margin_mean": -0.017433375120162964, "margin_dpo/margin_std": 0.32124900817871094, "step": 14 }, { "epoch": 0.022675736961451247, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.0804678201675415, "fcm_dpo/q_t": 0.5020108222961426, "grad_norm": 19.288888931274414, "learning_rate": 1.044776119402985e-07, "logits/chosen": 1.4386959075927734, "logits/rejected": 1.3539936542510986, "logps/chosen": -82.036865234375, "logps/ref_chosen": -82.00294494628906, "logps/ref_rejected": -106.43550109863281, "logps/rejected": -106.38895416259766, "loss": 1.3947, "margin_dpo/margin_mean": -0.08046802878379822, "margin_dpo/margin_std": 0.336540549993515, "step": 15 }, { "epoch": 0.02418745275888133, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.025534391403198242, "fcm_dpo/q_t": 0.5006370544433594, "grad_norm": 17.247323989868164, "learning_rate": 1.1194029850746268e-07, "logits/chosen": 1.8795946836471558, "logits/rejected": 1.7519097328186035, "logps/chosen": -62.282501220703125, "logps/ref_chosen": -62.308345794677734, "logps/ref_rejected": -89.6508560180664, "logps/rejected": -89.59947967529297, "loss": 1.3893, "margin_dpo/margin_mean": -0.025534451007843018, "margin_dpo/margin_std": 0.41592419147491455, "step": 16 }, { "epoch": 0.025699168556311415, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.011744916439056396, "fcm_dpo/q_t": 0.4997067451477051, "grad_norm": 18.33580780029297, "learning_rate": 1.1940298507462686e-07, "logits/chosen": 1.513704538345337, "logits/rejected": 1.4842028617858887, "logps/chosen": -85.16311645507812, "logps/ref_chosen": -85.16903686523438, "logps/ref_rejected": -102.57087707519531, "logps/rejected": -102.57669830322266, "loss": 1.3855, "margin_dpo/margin_mean": 0.011744409799575806, "margin_dpo/margin_std": 0.3892754316329956, "step": 17 }, { "epoch": 0.027210884353741496, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.04118014872074127, "fcm_dpo/q_t": 0.4989708662033081, "grad_norm": 17.161205291748047, "learning_rate": 1.2686567164179106e-07, "logits/chosen": 1.929447889328003, "logits/rejected": 1.7789592742919922, "logps/chosen": -63.15791320800781, "logps/ref_chosen": -63.17793273925781, "logps/ref_rejected": -86.06461334228516, "logps/rejected": -86.08576965332031, "loss": 1.3825, "margin_dpo/margin_mean": 0.04118022322654724, "margin_dpo/margin_std": 0.33605462312698364, "step": 18 }, { "epoch": 0.02872260015117158, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.04288366436958313, "fcm_dpo/q_t": 0.5010709762573242, "grad_norm": 19.838333129882812, "learning_rate": 1.343283582089552e-07, "logits/chosen": 1.90325927734375, "logits/rejected": 1.8959013223648071, "logps/chosen": -85.86503601074219, "logps/ref_chosen": -85.82405853271484, "logps/ref_rejected": -100.07136535644531, "logps/rejected": -100.06946563720703, "loss": 1.391, "margin_dpo/margin_mean": -0.04288366436958313, "margin_dpo/margin_std": 0.4113919138908386, "step": 19 }, { "epoch": 0.030234315948601664, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.07760676741600037, "fcm_dpo/q_t": 0.5019393563270569, "grad_norm": 18.312501907348633, "learning_rate": 1.4179104477611938e-07, "logits/chosen": 2.0755791664123535, "logits/rejected": 1.9887995719909668, "logps/chosen": -73.6259994506836, "logps/ref_chosen": -73.58621215820312, "logps/ref_rejected": -91.21690368652344, "logps/rejected": -91.17908477783203, "loss": 1.3944, "margin_dpo/margin_mean": -0.07760673761367798, "margin_dpo/margin_std": 0.36418983340263367, "step": 20 }, { "epoch": 0.031746031746031744, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.03705587983131409, "fcm_dpo/q_t": 0.4990747272968292, "grad_norm": 18.1649112701416, "learning_rate": 1.4925373134328355e-07, "logits/chosen": 2.150765895843506, "logits/rejected": 2.022829294204712, "logps/chosen": -81.96690368652344, "logps/ref_chosen": -81.97251892089844, "logps/ref_rejected": -98.05976867675781, "logps/rejected": -98.09120178222656, "loss": 1.3831, "margin_dpo/margin_mean": 0.03705599904060364, "margin_dpo/margin_std": 0.43046677112579346, "step": 21 }, { "epoch": 0.03325774754346183, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.028704047203063965, "fcm_dpo/q_t": 0.5007180571556091, "grad_norm": 18.55045509338379, "learning_rate": 1.5671641791044775e-07, "logits/chosen": 1.544440746307373, "logits/rejected": 1.5105493068695068, "logps/chosen": -76.98062896728516, "logps/ref_chosen": -76.99579620361328, "logps/ref_rejected": -95.76089477539062, "logps/rejected": -95.71702575683594, "loss": 1.3897, "margin_dpo/margin_mean": -0.028704792261123657, "margin_dpo/margin_std": 0.4347040057182312, "step": 22 }, { "epoch": 0.03476946334089191, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.07605567574501038, "fcm_dpo/q_t": 0.49809861183166504, "grad_norm": 19.070331573486328, "learning_rate": 1.6417910447761193e-07, "logits/chosen": 1.923293113708496, "logits/rejected": 1.831559658050537, "logps/chosen": -84.68144989013672, "logps/ref_chosen": -84.76856994628906, "logps/ref_rejected": -107.28266906738281, "logps/rejected": -107.2716064453125, "loss": 1.3792, "margin_dpo/margin_mean": 0.0760551393032074, "margin_dpo/margin_std": 0.41792023181915283, "step": 23 }, { "epoch": 0.036281179138321996, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.03800934553146362, "fcm_dpo/q_t": 0.5009497404098511, "grad_norm": 17.18250846862793, "learning_rate": 1.716417910447761e-07, "logits/chosen": 1.7587859630584717, "logits/rejected": 1.6988754272460938, "logps/chosen": -69.89579010009766, "logps/ref_chosen": -69.87112426757812, "logps/ref_rejected": -84.02084350585938, "logps/rejected": -84.00749969482422, "loss": 1.3904, "margin_dpo/margin_mean": -0.038009583950042725, "margin_dpo/margin_std": 0.3587035536766052, "step": 24 }, { "epoch": 0.03779289493575208, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.04470279812812805, "fcm_dpo/q_t": 0.4988824129104614, "grad_norm": 19.49793815612793, "learning_rate": 1.7910447761194027e-07, "logits/chosen": 1.9868547916412354, "logits/rejected": 1.8301403522491455, "logps/chosen": -78.24287414550781, "logps/ref_chosen": -78.22694396972656, "logps/ref_rejected": -106.65234375, "logps/rejected": -106.71296691894531, "loss": 1.3822, "margin_dpo/margin_mean": 0.04470303654670715, "margin_dpo/margin_std": 0.39123424887657166, "step": 25 }, { "epoch": 0.039304610733182165, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.0833863914012909, "fcm_dpo/q_t": 0.4979166090488434, "grad_norm": 17.834970474243164, "learning_rate": 1.8656716417910447e-07, "logits/chosen": 1.9412182569503784, "logits/rejected": 1.914105772972107, "logps/chosen": -74.54658508300781, "logps/ref_chosen": -74.59750366210938, "logps/ref_rejected": -93.57858276367188, "logps/rejected": -93.61105346679688, "loss": 1.3783, "margin_dpo/margin_mean": 0.08338648080825806, "margin_dpo/margin_std": 0.37571650743484497, "step": 26 }, { "epoch": 0.04081632653061224, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.05425041913986206, "fcm_dpo/q_t": 0.4986443519592285, "grad_norm": 18.620058059692383, "learning_rate": 1.9402985074626865e-07, "logits/chosen": 1.8641291856765747, "logits/rejected": 1.8036224842071533, "logps/chosen": -78.64132690429688, "logps/ref_chosen": -78.64625549316406, "logps/ref_rejected": -92.33645629882812, "logps/rejected": -92.38578796386719, "loss": 1.3812, "margin_dpo/margin_mean": 0.0542508065700531, "margin_dpo/margin_std": 0.3697792887687683, "step": 27 }, { "epoch": 0.042328042328042326, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.007928252220153809, "fcm_dpo/q_t": 0.5001976490020752, "grad_norm": 17.704586029052734, "learning_rate": 2.0149253731343282e-07, "logits/chosen": 1.5783494710922241, "logits/rejected": 1.5295078754425049, "logps/chosen": -76.9276351928711, "logps/ref_chosen": -76.91271209716797, "logps/ref_rejected": -88.48194885253906, "logps/rejected": -88.48894500732422, "loss": 1.3875, "margin_dpo/margin_mean": -0.007928639650344849, "margin_dpo/margin_std": 0.393305242061615, "step": 28 }, { "epoch": 0.04383975812547241, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.05503666400909424, "fcm_dpo/q_t": 0.4986268877983093, "grad_norm": 21.232301712036133, "learning_rate": 2.08955223880597e-07, "logits/chosen": 2.018385648727417, "logits/rejected": 1.9525585174560547, "logps/chosen": -89.55824279785156, "logps/ref_chosen": -89.62060546875, "logps/ref_rejected": -100.57090759277344, "logps/rejected": -100.56358337402344, "loss": 1.3814, "margin_dpo/margin_mean": 0.05503681302070618, "margin_dpo/margin_std": 0.49973034858703613, "step": 29 }, { "epoch": 0.045351473922902494, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.0687638521194458, "fcm_dpo/q_t": 0.4982798397541046, "grad_norm": 18.995983123779297, "learning_rate": 2.1641791044776117e-07, "logits/chosen": 2.1269025802612305, "logits/rejected": 1.9344401359558105, "logps/chosen": -68.77285766601562, "logps/ref_chosen": -68.82381439208984, "logps/ref_rejected": -104.7047119140625, "logps/rejected": -104.72251892089844, "loss": 1.3799, "margin_dpo/margin_mean": 0.06876346468925476, "margin_dpo/margin_std": 0.44119542837142944, "step": 30 }, { "epoch": 0.04686318972033258, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.001088559627532959, "fcm_dpo/q_t": 0.49997323751449585, "grad_norm": 20.58745002746582, "learning_rate": 2.2388059701492537e-07, "logits/chosen": 1.7696319818496704, "logits/rejected": 1.6470561027526855, "logps/chosen": -86.03531646728516, "logps/ref_chosen": -86.06916809082031, "logps/ref_rejected": -116.66394805908203, "logps/rejected": -116.63117980957031, "loss": 1.3865, "margin_dpo/margin_mean": 0.0010884404182434082, "margin_dpo/margin_std": 0.36631911993026733, "step": 31 }, { "epoch": 0.04837490551776266, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.07467979192733765, "fcm_dpo/q_t": 0.4981331527233124, "grad_norm": 18.509641647338867, "learning_rate": 2.3134328358208954e-07, "logits/chosen": 1.4663035869598389, "logits/rejected": 1.515918493270874, "logps/chosen": -87.51797485351562, "logps/ref_chosen": -87.59808349609375, "logps/ref_rejected": -100.26905822753906, "logps/rejected": -100.26361846923828, "loss": 1.3793, "margin_dpo/margin_mean": 0.07468008995056152, "margin_dpo/margin_std": 0.4339354634284973, "step": 32 }, { "epoch": 0.049886621315192746, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.023064017295837402, "fcm_dpo/q_t": 0.5005767941474915, "grad_norm": 19.758060455322266, "learning_rate": 2.388059701492537e-07, "logits/chosen": 1.5679633617401123, "logits/rejected": 1.4670143127441406, "logps/chosen": -83.30245971679688, "logps/ref_chosen": -83.29850769042969, "logps/ref_rejected": -94.60990142822266, "logps/rejected": -94.59078979492188, "loss": 1.389, "margin_dpo/margin_mean": -0.02306431531906128, "margin_dpo/margin_std": 0.3838702440261841, "step": 33 }, { "epoch": 0.05139833711262283, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.058904439210891724, "fcm_dpo/q_t": 0.49852806329727173, "grad_norm": 18.01277732849121, "learning_rate": 2.4626865671641786e-07, "logits/chosen": 1.8714017868041992, "logits/rejected": 1.7843908071517944, "logps/chosen": -70.11520385742188, "logps/ref_chosen": -70.15069580078125, "logps/ref_rejected": -84.4693832397461, "logps/rejected": -84.49279022216797, "loss": 1.3807, "margin_dpo/margin_mean": 0.058904558420181274, "margin_dpo/margin_std": 0.35321176052093506, "step": 34 }, { "epoch": 0.05291005291005291, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.07404336333274841, "fcm_dpo/q_t": 0.49815088510513306, "grad_norm": 18.077713012695312, "learning_rate": 2.537313432835821e-07, "logits/chosen": 1.47958505153656, "logits/rejected": 1.427431583404541, "logps/chosen": -78.203857421875, "logps/ref_chosen": -78.25238037109375, "logps/ref_rejected": -91.06356811523438, "logps/rejected": -91.08910369873047, "loss": 1.3793, "margin_dpo/margin_mean": 0.0740436315536499, "margin_dpo/margin_std": 0.405529260635376, "step": 35 }, { "epoch": 0.05442176870748299, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.09882274270057678, "fcm_dpo/q_t": 0.49753493070602417, "grad_norm": 17.96520233154297, "learning_rate": 2.611940298507462e-07, "logits/chosen": 1.845771312713623, "logits/rejected": 1.7256853580474854, "logps/chosen": -67.06063079833984, "logps/ref_chosen": -67.06676483154297, "logps/ref_rejected": -99.34661865234375, "logps/rejected": -99.4393081665039, "loss": 1.3771, "margin_dpo/margin_mean": 0.09882298111915588, "margin_dpo/margin_std": 0.49245503544807434, "step": 36 }, { "epoch": 0.055933484504913075, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.0062815845012664795, "fcm_dpo/q_t": 0.5001574754714966, "grad_norm": 23.530776977539062, "learning_rate": 2.686567164179104e-07, "logits/chosen": 1.8602843284606934, "logits/rejected": 1.5742213726043701, "logps/chosen": -75.89591979980469, "logps/ref_chosen": -75.9269790649414, "logps/ref_rejected": -130.34371948242188, "logps/rejected": -130.30636596679688, "loss": 1.3874, "margin_dpo/margin_mean": -0.00628247857093811, "margin_dpo/margin_std": 0.4102315306663513, "step": 37 }, { "epoch": 0.05744520030234316, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.005452930927276611, "fcm_dpo/q_t": 0.49986520409584045, "grad_norm": 18.435871124267578, "learning_rate": 2.761194029850746e-07, "logits/chosen": 1.7420191764831543, "logits/rejected": 1.7041373252868652, "logps/chosen": -83.6760025024414, "logps/ref_chosen": -83.65460205078125, "logps/ref_rejected": -89.15221405029297, "logps/rejected": -89.17906951904297, "loss": 1.3862, "margin_dpo/margin_mean": 0.005452901124954224, "margin_dpo/margin_std": 0.3982018530368805, "step": 38 }, { "epoch": 0.05895691609977324, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.1193189024925232, "fcm_dpo/q_t": 0.497018039226532, "grad_norm": 18.950082778930664, "learning_rate": 2.8358208955223876e-07, "logits/chosen": 2.043585777282715, "logits/rejected": 1.994788646697998, "logps/chosen": -76.06095886230469, "logps/ref_chosen": -76.18706512451172, "logps/ref_rejected": -94.39262390136719, "logps/rejected": -94.3858413696289, "loss": 1.3747, "margin_dpo/margin_mean": 0.11931854486465454, "margin_dpo/margin_std": 0.3407592177391052, "step": 39 }, { "epoch": 0.06046863189720333, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.056918948888778687, "fcm_dpo/q_t": 0.501421332359314, "grad_norm": 18.159303665161133, "learning_rate": 2.9104477611940296e-07, "logits/chosen": 1.812699556350708, "logits/rejected": 1.7002441883087158, "logps/chosen": -77.47999572753906, "logps/ref_chosen": -77.43476867675781, "logps/ref_rejected": -98.58720397949219, "logps/rejected": -98.57550811767578, "loss": 1.3925, "margin_dpo/margin_mean": -0.05691874027252197, "margin_dpo/margin_std": 0.42336541414260864, "step": 40 }, { "epoch": 0.06198034769463341, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.08366265892982483, "fcm_dpo/q_t": 0.4979090094566345, "grad_norm": 18.28632354736328, "learning_rate": 2.985074626865671e-07, "logits/chosen": 1.813473105430603, "logits/rejected": 1.750382423400879, "logps/chosen": -86.79568481445312, "logps/ref_chosen": -86.87640380859375, "logps/ref_rejected": -101.0856704711914, "logps/rejected": -101.08860778808594, "loss": 1.3784, "margin_dpo/margin_mean": 0.08366268873214722, "margin_dpo/margin_std": 0.4136474132537842, "step": 41 }, { "epoch": 0.06349206349206349, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.01882028579711914, "fcm_dpo/q_t": 0.49952933192253113, "grad_norm": 18.222118377685547, "learning_rate": 3.059701492537313e-07, "logits/chosen": 1.686318278312683, "logits/rejected": 1.6382153034210205, "logps/chosen": -79.34196472167969, "logps/ref_chosen": -79.35625457763672, "logps/ref_rejected": -91.54881286621094, "logps/rejected": -91.55332946777344, "loss": 1.3848, "margin_dpo/margin_mean": 0.018820196390151978, "margin_dpo/margin_std": 0.4075120687484741, "step": 42 }, { "epoch": 0.06500377928949358, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.02868404984474182, "fcm_dpo/q_t": 0.5007175207138062, "grad_norm": 19.569217681884766, "learning_rate": 3.134328358208955e-07, "logits/chosen": 1.7295043468475342, "logits/rejected": 1.6345380544662476, "logps/chosen": -90.81892395019531, "logps/ref_chosen": -90.81220245361328, "logps/ref_rejected": -94.16316986083984, "logps/rejected": -94.1412124633789, "loss": 1.3898, "margin_dpo/margin_mean": -0.02868404984474182, "margin_dpo/margin_std": 0.4629897475242615, "step": 43 }, { "epoch": 0.06651549508692366, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.005178973078727722, "fcm_dpo/q_t": 0.4998709261417389, "grad_norm": 18.98065948486328, "learning_rate": 3.2089552238805965e-07, "logits/chosen": 1.3732523918151855, "logits/rejected": 1.301151156425476, "logps/chosen": -88.26017761230469, "logps/ref_chosen": -88.27932739257812, "logps/ref_rejected": -101.14324951171875, "logps/rejected": -101.1292724609375, "loss": 1.3863, "margin_dpo/margin_mean": 0.005178704857826233, "margin_dpo/margin_std": 0.4382961690425873, "step": 44 }, { "epoch": 0.06802721088435375, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.07508471608161926, "fcm_dpo/q_t": 0.498124361038208, "grad_norm": 19.43973731994629, "learning_rate": 3.2835820895522385e-07, "logits/chosen": 1.695054292678833, "logits/rejected": 1.5746557712554932, "logps/chosen": -78.40066528320312, "logps/ref_chosen": -78.40264892578125, "logps/ref_rejected": -109.39339447021484, "logps/rejected": -109.46649169921875, "loss": 1.3793, "margin_dpo/margin_mean": 0.0750853419303894, "margin_dpo/margin_std": 0.4212068021297455, "step": 45 }, { "epoch": 0.06953892668178382, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.10518896579742432, "fcm_dpo/q_t": 0.4973721504211426, "grad_norm": 18.316635131835938, "learning_rate": 3.3582089552238805e-07, "logits/chosen": 1.685295820236206, "logits/rejected": 1.5578134059906006, "logps/chosen": -77.9603271484375, "logps/ref_chosen": -78.08491516113281, "logps/ref_rejected": -97.42544555664062, "logps/rejected": -97.40605163574219, "loss": 1.3762, "margin_dpo/margin_mean": 0.1051889955997467, "margin_dpo/margin_std": 0.4159674048423767, "step": 46 }, { "epoch": 0.0710506424792139, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.023257285356521606, "fcm_dpo/q_t": 0.49941885471343994, "grad_norm": 19.098480224609375, "learning_rate": 3.432835820895522e-07, "logits/chosen": 1.5158767700195312, "logits/rejected": 1.4411935806274414, "logps/chosen": -70.76278686523438, "logps/ref_chosen": -70.78988647460938, "logps/ref_rejected": -91.17266845703125, "logps/rejected": -91.1688232421875, "loss": 1.3842, "margin_dpo/margin_mean": 0.02325788140296936, "margin_dpo/margin_std": 0.31453946232795715, "step": 47 }, { "epoch": 0.07256235827664399, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.015231698751449585, "fcm_dpo/q_t": 0.4996192157268524, "grad_norm": 17.00591278076172, "learning_rate": 3.507462686567164e-07, "logits/chosen": 1.9108043909072876, "logits/rejected": 1.8479423522949219, "logps/chosen": -66.61248779296875, "logps/ref_chosen": -66.67327880859375, "logps/ref_rejected": -79.28543853759766, "logps/rejected": -79.23987579345703, "loss": 1.3852, "margin_dpo/margin_mean": 0.015231996774673462, "margin_dpo/margin_std": 0.3935966491699219, "step": 48 }, { "epoch": 0.07407407407407407, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.001067519187927246, "fcm_dpo/q_t": 0.5000255107879639, "grad_norm": 17.59852409362793, "learning_rate": 3.5820895522388055e-07, "logits/chosen": 1.4201464653015137, "logits/rejected": 1.3769769668579102, "logps/chosen": -75.09858703613281, "logps/ref_chosen": -75.17504119873047, "logps/ref_rejected": -80.5369873046875, "logps/rejected": -80.45946502685547, "loss": 1.3868, "margin_dpo/margin_mean": -0.0010673105716705322, "margin_dpo/margin_std": 0.379297137260437, "step": 49 }, { "epoch": 0.07558578987150416, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.014360487461090088, "fcm_dpo/q_t": 0.4996405839920044, "grad_norm": 17.910799026489258, "learning_rate": 3.6567164179104475e-07, "logits/chosen": 1.8102669715881348, "logits/rejected": 1.7434110641479492, "logps/chosen": -71.19473266601562, "logps/ref_chosen": -71.2314224243164, "logps/ref_rejected": -87.59088134765625, "logps/rejected": -87.56855773925781, "loss": 1.3852, "margin_dpo/margin_mean": 0.014360368251800537, "margin_dpo/margin_std": 0.371703177690506, "step": 50 }, { "epoch": 0.07709750566893424, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.03938554227352142, "fcm_dpo/q_t": 0.5009841322898865, "grad_norm": 18.880245208740234, "learning_rate": 3.7313432835820895e-07, "logits/chosen": 1.744141936302185, "logits/rejected": 1.693819284439087, "logps/chosen": -78.70307922363281, "logps/ref_chosen": -78.69171142578125, "logps/ref_rejected": -100.78950500488281, "logps/rejected": -100.76148986816406, "loss": 1.3907, "margin_dpo/margin_mean": -0.03938555717468262, "margin_dpo/margin_std": 0.4431745409965515, "step": 51 }, { "epoch": 0.07860922146636433, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.06755271553993225, "fcm_dpo/q_t": 0.4983121156692505, "grad_norm": 20.320974349975586, "learning_rate": 3.805970149253731e-07, "logits/chosen": 1.745551347732544, "logits/rejected": 1.5814502239227295, "logps/chosen": -89.07058715820312, "logps/ref_chosen": -89.09419250488281, "logps/ref_rejected": -116.87469482421875, "logps/rejected": -116.91864013671875, "loss": 1.38, "margin_dpo/margin_mean": 0.06755334138870239, "margin_dpo/margin_std": 0.4279418885707855, "step": 52 }, { "epoch": 0.0801209372637944, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.08846598863601685, "fcm_dpo/q_t": 0.4977889060974121, "grad_norm": 17.33759307861328, "learning_rate": 3.880597014925373e-07, "logits/chosen": 1.6374804973602295, "logits/rejected": 1.595580816268921, "logps/chosen": -74.09617614746094, "logps/ref_chosen": -74.21418762207031, "logps/ref_rejected": -75.71168518066406, "logps/rejected": -75.68213653564453, "loss": 1.378, "margin_dpo/margin_mean": 0.08846625685691833, "margin_dpo/margin_std": 0.4409305453300476, "step": 53 }, { "epoch": 0.08163265306122448, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.028407424688339233, "fcm_dpo/q_t": 0.4992886483669281, "grad_norm": 16.426828384399414, "learning_rate": 3.9552238805970144e-07, "logits/chosen": 1.7313616275787354, "logits/rejected": 1.7127957344055176, "logps/chosen": -65.56224822998047, "logps/ref_chosen": -65.63475799560547, "logps/ref_rejected": -76.4462890625, "logps/rejected": -76.40218353271484, "loss": 1.384, "margin_dpo/margin_mean": 0.028407543897628784, "margin_dpo/margin_std": 0.46945488452911377, "step": 54 }, { "epoch": 0.08314436885865457, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.2457306683063507, "fcm_dpo/q_t": 0.4938609004020691, "grad_norm": 19.258989334106445, "learning_rate": 4.0298507462686564e-07, "logits/chosen": 1.6469519138336182, "logits/rejected": 1.4373173713684082, "logps/chosen": -68.6036376953125, "logps/ref_chosen": -68.7640380859375, "logps/ref_rejected": -108.80074310302734, "logps/rejected": -108.88607788085938, "loss": 1.3623, "margin_dpo/margin_mean": 0.24573048949241638, "margin_dpo/margin_std": 0.4226919412612915, "step": 55 }, { "epoch": 0.08465608465608465, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.05905681848526001, "fcm_dpo/q_t": 0.49852341413497925, "grad_norm": 16.984230041503906, "learning_rate": 4.1044776119402984e-07, "logits/chosen": 1.773057222366333, "logits/rejected": 1.746607780456543, "logps/chosen": -74.76235961914062, "logps/ref_chosen": -74.7939453125, "logps/ref_rejected": -81.83535766601562, "logps/rejected": -81.86283111572266, "loss": 1.3811, "margin_dpo/margin_mean": 0.05905655026435852, "margin_dpo/margin_std": 0.5325890779495239, "step": 56 }, { "epoch": 0.08616780045351474, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.17420035600662231, "fcm_dpo/q_t": 0.4956481456756592, "grad_norm": 18.959041595458984, "learning_rate": 4.17910447761194e-07, "logits/chosen": 1.9884854555130005, "logits/rejected": 1.7981288433074951, "logps/chosen": -74.45184326171875, "logps/ref_chosen": -74.5794677734375, "logps/ref_rejected": -105.61981964111328, "logps/rejected": -105.6663818359375, "loss": 1.3695, "margin_dpo/margin_mean": 0.174201101064682, "margin_dpo/margin_std": 0.47768181562423706, "step": 57 }, { "epoch": 0.08767951625094482, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.017644047737121582, "fcm_dpo/q_t": 0.4995603561401367, "grad_norm": 19.281293869018555, "learning_rate": 4.253731343283582e-07, "logits/chosen": 1.4138903617858887, "logits/rejected": 1.3425607681274414, "logps/chosen": -92.22441101074219, "logps/ref_chosen": -92.24464416503906, "logps/ref_rejected": -103.18975830078125, "logps/rejected": -103.18716430664062, "loss": 1.3852, "margin_dpo/margin_mean": 0.017644047737121582, "margin_dpo/margin_std": 0.48450881242752075, "step": 58 }, { "epoch": 0.08919123204837491, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.19744977355003357, "fcm_dpo/q_t": 0.4950745701789856, "grad_norm": 20.637189865112305, "learning_rate": 4.3283582089552234e-07, "logits/chosen": 1.76304292678833, "logits/rejected": 1.509261131286621, "logps/chosen": -66.98043060302734, "logps/ref_chosen": -67.12688446044922, "logps/ref_rejected": -91.69569396972656, "logps/rejected": -91.7467041015625, "loss": 1.3675, "margin_dpo/margin_mean": 0.19744998216629028, "margin_dpo/margin_std": 0.5356566905975342, "step": 59 }, { "epoch": 0.09070294784580499, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.13416770100593567, "fcm_dpo/q_t": 0.49665212631225586, "grad_norm": 18.396495819091797, "learning_rate": 4.4029850746268654e-07, "logits/chosen": 1.7832741737365723, "logits/rejected": 1.8049073219299316, "logps/chosen": -79.5711669921875, "logps/ref_chosen": -79.74327087402344, "logps/ref_rejected": -77.89244079589844, "logps/rejected": -77.85449981689453, "loss": 1.3736, "margin_dpo/margin_mean": 0.1341674029827118, "margin_dpo/margin_std": 0.5320160984992981, "step": 60 }, { "epoch": 0.09221466364323508, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.034498006105422974, "fcm_dpo/q_t": 0.4991379380226135, "grad_norm": 16.57811164855957, "learning_rate": 4.4776119402985074e-07, "logits/chosen": 1.688488245010376, "logits/rejected": 1.6512866020202637, "logps/chosen": -65.99887084960938, "logps/ref_chosen": -66.08685302734375, "logps/ref_rejected": -88.1458740234375, "logps/rejected": -88.0923843383789, "loss": 1.3834, "margin_dpo/margin_mean": 0.0344984233379364, "margin_dpo/margin_std": 0.48245492577552795, "step": 61 }, { "epoch": 0.09372637944066516, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.16568706929683685, "fcm_dpo/q_t": 0.49585962295532227, "grad_norm": 17.876060485839844, "learning_rate": 4.552238805970149e-07, "logits/chosen": 1.7909362316131592, "logits/rejected": 1.7238061428070068, "logps/chosen": -80.88427734375, "logps/ref_chosen": -81.0108871459961, "logps/ref_rejected": -95.50444793701172, "logps/rejected": -95.54353332519531, "loss": 1.3705, "margin_dpo/margin_mean": 0.16568706929683685, "margin_dpo/margin_std": 0.5354666113853455, "step": 62 }, { "epoch": 0.09523809523809523, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.19854192435741425, "fcm_dpo/q_t": 0.49504005908966064, "grad_norm": 19.247526168823242, "learning_rate": 4.626865671641791e-07, "logits/chosen": 2.170567750930786, "logits/rejected": 2.088958263397217, "logps/chosen": -78.36114501953125, "logps/ref_chosen": -78.57593536376953, "logps/ref_rejected": -99.71000671386719, "logps/rejected": -99.69376373291016, "loss": 1.3672, "margin_dpo/margin_mean": 0.19854141771793365, "margin_dpo/margin_std": 0.5002174377441406, "step": 63 }, { "epoch": 0.09674981103552532, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.017069846391677856, "fcm_dpo/q_t": 0.4995724558830261, "grad_norm": 16.591533660888672, "learning_rate": 4.701492537313433e-07, "logits/chosen": 1.7883801460266113, "logits/rejected": 1.7215988636016846, "logps/chosen": -69.16060638427734, "logps/ref_chosen": -69.24063110351562, "logps/ref_rejected": -84.14842987060547, "logps/rejected": -84.0854721069336, "loss": 1.3852, "margin_dpo/margin_mean": 0.017070025205612183, "margin_dpo/margin_std": 0.49829041957855225, "step": 64 }, { "epoch": 0.0982615268329554, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.11321347951889038, "fcm_dpo/q_t": 0.49717018008232117, "grad_norm": 18.86490821838379, "learning_rate": 4.776119402985074e-07, "logits/chosen": 1.8629745244979858, "logits/rejected": 1.8083195686340332, "logps/chosen": -83.99441528320312, "logps/ref_chosen": -84.0351333618164, "logps/ref_rejected": -96.42926788330078, "logps/rejected": -96.50176239013672, "loss": 1.3759, "margin_dpo/margin_mean": 0.11321339011192322, "margin_dpo/margin_std": 0.592049241065979, "step": 65 }, { "epoch": 0.09977324263038549, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.1881301999092102, "fcm_dpo/q_t": 0.49529772996902466, "grad_norm": 18.52004623413086, "learning_rate": 4.850746268656717e-07, "logits/chosen": 1.5754730701446533, "logits/rejected": 1.4911394119262695, "logps/chosen": -87.75141143798828, "logps/ref_chosen": -87.79238891601562, "logps/ref_rejected": -95.26547241210938, "logps/rejected": -95.41261291503906, "loss": 1.3687, "margin_dpo/margin_mean": 0.18812981247901917, "margin_dpo/margin_std": 0.6606887578964233, "step": 66 }, { "epoch": 0.10128495842781557, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.22150889039039612, "fcm_dpo/q_t": 0.49446702003479004, "grad_norm": 18.993770599365234, "learning_rate": 4.925373134328357e-07, "logits/chosen": 1.943489670753479, "logits/rejected": 1.8127994537353516, "logps/chosen": -77.81979370117188, "logps/ref_chosen": -78.00114440917969, "logps/ref_rejected": -96.03421020507812, "logps/rejected": -96.07437133789062, "loss": 1.3648, "margin_dpo/margin_mean": 0.22150954604148865, "margin_dpo/margin_std": 0.47556179761886597, "step": 67 }, { "epoch": 0.10279667422524566, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.13636493682861328, "fcm_dpo/q_t": 0.4965946674346924, "grad_norm": 19.818443298339844, "learning_rate": 5e-07, "logits/chosen": 1.6929965019226074, "logits/rejected": 1.6059188842773438, "logps/chosen": -96.03993225097656, "logps/ref_chosen": -96.04267883300781, "logps/ref_rejected": -110.91169738769531, "logps/rejected": -111.04530334472656, "loss": 1.3738, "margin_dpo/margin_mean": 0.1363646388053894, "margin_dpo/margin_std": 0.6684163212776184, "step": 68 }, { "epoch": 0.10430839002267574, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.22328773140907288, "fcm_dpo/q_t": 0.49442198872566223, "grad_norm": 19.92877960205078, "learning_rate": 4.999965034812934e-07, "logits/chosen": 1.787092924118042, "logits/rejected": 1.6620742082595825, "logps/chosen": -84.8952865600586, "logps/ref_chosen": -85.11124420166016, "logps/ref_rejected": -107.57357025146484, "logps/rejected": -107.58089447021484, "loss": 1.3648, "margin_dpo/margin_mean": 0.22328829765319824, "margin_dpo/margin_std": 0.5401943325996399, "step": 69 }, { "epoch": 0.10582010582010581, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.209281325340271, "fcm_dpo/q_t": 0.49477431178092957, "grad_norm": 18.5528621673584, "learning_rate": 4.999860140229787e-07, "logits/chosen": 1.7440345287322998, "logits/rejected": 1.6920671463012695, "logps/chosen": -81.64407348632812, "logps/ref_chosen": -81.87960815429688, "logps/ref_rejected": -92.63243103027344, "logps/rejected": -92.60617065429688, "loss": 1.3664, "margin_dpo/margin_mean": 0.20928049087524414, "margin_dpo/margin_std": 0.603878378868103, "step": 70 }, { "epoch": 0.1073318216175359, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.10643842816352844, "fcm_dpo/q_t": 0.4973390996456146, "grad_norm": 17.682138442993164, "learning_rate": 4.999685319184688e-07, "logits/chosen": 1.5762542486190796, "logits/rejected": 1.5709012746810913, "logps/chosen": -79.61023712158203, "logps/ref_chosen": -79.74766540527344, "logps/ref_rejected": -83.39110565185547, "logps/rejected": -83.360107421875, "loss": 1.3767, "margin_dpo/margin_mean": 0.10643890500068665, "margin_dpo/margin_std": 0.6256778836250305, "step": 71 }, { "epoch": 0.10884353741496598, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.32459521293640137, "fcm_dpo/q_t": 0.4918937087059021, "grad_norm": 19.00299835205078, "learning_rate": 4.999440576567755e-07, "logits/chosen": 1.7387597560882568, "logits/rejected": 1.553479790687561, "logps/chosen": -72.77051544189453, "logps/ref_chosen": -73.04458618164062, "logps/ref_rejected": -92.64720153808594, "logps/rejected": -92.69772338867188, "loss": 1.355, "margin_dpo/margin_mean": 0.32459497451782227, "margin_dpo/margin_std": 0.5936090350151062, "step": 72 }, { "epoch": 0.11035525321239607, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.05147072672843933, "fcm_dpo/q_t": 0.4987148642539978, "grad_norm": 19.377613067626953, "learning_rate": 4.999125919224965e-07, "logits/chosen": 1.6101853847503662, "logits/rejected": 1.5460271835327148, "logps/chosen": -87.6396255493164, "logps/ref_chosen": -87.71681213378906, "logps/ref_rejected": -96.93572998046875, "logps/rejected": -96.9100112915039, "loss": 1.3826, "margin_dpo/margin_mean": 0.05147099494934082, "margin_dpo/margin_std": 0.7500083446502686, "step": 73 }, { "epoch": 0.11186696900982615, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.28650206327438354, "fcm_dpo/q_t": 0.4928475618362427, "grad_norm": 18.037437438964844, "learning_rate": 4.998741355957963e-07, "logits/chosen": 1.8539071083068848, "logits/rejected": 1.6711949110031128, "logps/chosen": -66.66632843017578, "logps/ref_chosen": -67.07321166992188, "logps/ref_rejected": -96.5340347290039, "logps/rejected": -96.41365051269531, "loss": 1.3591, "margin_dpo/margin_mean": 0.2865017056465149, "margin_dpo/margin_std": 0.6675806045532227, "step": 74 }, { "epoch": 0.11337868480725624, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.30480456352233887, "fcm_dpo/q_t": 0.4923963248729706, "grad_norm": 16.95103645324707, "learning_rate": 4.998286897523808e-07, "logits/chosen": 1.6327571868896484, "logits/rejected": 1.4845844507217407, "logps/chosen": -61.54745864868164, "logps/ref_chosen": -61.80186462402344, "logps/ref_rejected": -82.37368774414062, "logps/rejected": -82.42408752441406, "loss": 1.3576, "margin_dpo/margin_mean": 0.3048042356967926, "margin_dpo/margin_std": 0.7787231802940369, "step": 75 }, { "epoch": 0.11489040060468632, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.2743793725967407, "fcm_dpo/q_t": 0.4931487441062927, "grad_norm": 17.764631271362305, "learning_rate": 4.997762556634679e-07, "logits/chosen": 1.6228429079055786, "logits/rejected": 1.4914746284484863, "logps/chosen": -69.57562255859375, "logps/ref_chosen": -69.92233276367188, "logps/ref_rejected": -97.08378601074219, "logps/rejected": -97.01145935058594, "loss": 1.3604, "margin_dpo/margin_mean": 0.274379700422287, "margin_dpo/margin_std": 0.7334781885147095, "step": 76 }, { "epoch": 0.1164021164021164, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.37875401973724365, "fcm_dpo/q_t": 0.4905601739883423, "grad_norm": 18.358583450317383, "learning_rate": 4.99716834795752e-07, "logits/chosen": 1.6846084594726562, "logits/rejected": 1.5783634185791016, "logps/chosen": -70.83340454101562, "logps/ref_chosen": -71.206298828125, "logps/ref_rejected": -95.22071075439453, "logps/rejected": -95.22657775878906, "loss": 1.3507, "margin_dpo/margin_mean": 0.3787541389465332, "margin_dpo/margin_std": 0.8486927151679993, "step": 77 }, { "epoch": 0.11791383219954649, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.30582955479621887, "fcm_dpo/q_t": 0.4923703074455261, "grad_norm": 17.85964584350586, "learning_rate": 4.996504288113623e-07, "logits/chosen": 1.8145931959152222, "logits/rejected": 1.79677414894104, "logps/chosen": -83.99678802490234, "logps/ref_chosen": -84.40055847167969, "logps/ref_rejected": -95.41949462890625, "logps/rejected": -95.3215560913086, "loss": 1.3576, "margin_dpo/margin_mean": 0.3058291971683502, "margin_dpo/margin_std": 0.7991141080856323, "step": 78 }, { "epoch": 0.11942554799697656, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.35301172733306885, "fcm_dpo/q_t": 0.4912147521972656, "grad_norm": 19.52484130859375, "learning_rate": 4.995770395678171e-07, "logits/chosen": 1.894843578338623, "logits/rejected": 1.6881787776947021, "logps/chosen": -65.58863830566406, "logps/ref_chosen": -65.93923950195312, "logps/ref_rejected": -102.92240905761719, "logps/rejected": -102.9248046875, "loss": 1.3539, "margin_dpo/margin_mean": 0.35301104187965393, "margin_dpo/margin_std": 1.007969856262207, "step": 79 }, { "epoch": 0.12093726379440665, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.2960975170135498, "fcm_dpo/q_t": 0.49261194467544556, "grad_norm": 17.473323822021484, "learning_rate": 4.994966691179711e-07, "logits/chosen": 1.82114839553833, "logits/rejected": 1.629103660583496, "logps/chosen": -78.35586547851562, "logps/ref_chosen": -78.61624908447266, "logps/ref_rejected": -99.9122314453125, "logps/rejected": -99.94795227050781, "loss": 1.3586, "margin_dpo/margin_mean": 0.29609763622283936, "margin_dpo/margin_std": 0.8148288726806641, "step": 80 }, { "epoch": 0.12244897959183673, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.347815603017807, "fcm_dpo/q_t": 0.4913283884525299, "grad_norm": 17.89147186279297, "learning_rate": 4.994093197099587e-07, "logits/chosen": 1.6513278484344482, "logits/rejected": 1.5496854782104492, "logps/chosen": -79.164306640625, "logps/ref_chosen": -79.49641418457031, "logps/ref_rejected": -94.52413940429688, "logps/rejected": -94.53985595703125, "loss": 1.3538, "margin_dpo/margin_mean": 0.3478164076805115, "margin_dpo/margin_std": 0.8724742531776428, "step": 81 }, { "epoch": 0.12396069538926682, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.6286215782165527, "fcm_dpo/q_t": 0.4843388497829437, "grad_norm": 17.7833309173584, "learning_rate": 4.993149937871306e-07, "logits/chosen": 1.5996932983398438, "logits/rejected": 1.4459776878356934, "logps/chosen": -64.317138671875, "logps/ref_chosen": -64.97168731689453, "logps/ref_rejected": -86.69085693359375, "logps/rejected": -86.66493225097656, "loss": 1.3266, "margin_dpo/margin_mean": 0.6286218166351318, "margin_dpo/margin_std": 0.9195500016212463, "step": 82 }, { "epoch": 0.1254724111866969, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.4474705457687378, "fcm_dpo/q_t": 0.48884087800979614, "grad_norm": 21.962217330932617, "learning_rate": 4.992136939879856e-07, "logits/chosen": 1.8637166023254395, "logits/rejected": 1.7185570001602173, "logps/chosen": -72.42298889160156, "logps/ref_chosen": -72.92498779296875, "logps/ref_rejected": -92.27165222167969, "logps/rejected": -92.21711730957031, "loss": 1.3442, "margin_dpo/margin_mean": 0.44747063517570496, "margin_dpo/margin_std": 0.9168833494186401, "step": 83 }, { "epoch": 0.12698412698412698, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.5169297456741333, "fcm_dpo/q_t": 0.4871019721031189, "grad_norm": 19.37822723388672, "learning_rate": 4.991054231460969e-07, "logits/chosen": 1.851825475692749, "logits/rejected": 1.6797561645507812, "logps/chosen": -81.32493591308594, "logps/ref_chosen": -81.79109191894531, "logps/ref_rejected": -99.20896911621094, "logps/rejected": -99.2597427368164, "loss": 1.3376, "margin_dpo/margin_mean": 0.5169292688369751, "margin_dpo/margin_std": 0.9634412527084351, "step": 84 }, { "epoch": 0.12849584278155707, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.49980345368385315, "fcm_dpo/q_t": 0.48752886056900024, "grad_norm": 17.480857849121094, "learning_rate": 4.989901842900325e-07, "logits/chosen": 1.607172966003418, "logits/rejected": 1.4860568046569824, "logps/chosen": -67.3022689819336, "logps/ref_chosen": -67.94147491455078, "logps/ref_rejected": -85.76875305175781, "logps/rejected": -85.62934112548828, "loss": 1.3389, "margin_dpo/margin_mean": 0.4998033940792084, "margin_dpo/margin_std": 0.8719754219055176, "step": 85 }, { "epoch": 0.13000755857898716, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.29450273513793945, "fcm_dpo/q_t": 0.49265211820602417, "grad_norm": 17.54376792907715, "learning_rate": 4.988679806432711e-07, "logits/chosen": 1.8459415435791016, "logits/rejected": 1.7846070528030396, "logps/chosen": -78.91737365722656, "logps/ref_chosen": -79.21485900878906, "logps/ref_rejected": -88.69877624511719, "logps/rejected": -88.69578552246094, "loss": 1.3588, "margin_dpo/margin_mean": 0.2945028245449066, "margin_dpo/margin_std": 0.837791919708252, "step": 86 }, { "epoch": 0.13151927437641722, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.7170735001564026, "fcm_dpo/q_t": 0.4821361303329468, "grad_norm": 18.833833694458008, "learning_rate": 4.987388156241114e-07, "logits/chosen": 1.451952338218689, "logits/rejected": 1.2411874532699585, "logps/chosen": -83.91081237792969, "logps/ref_chosen": -84.45362854003906, "logps/ref_rejected": -103.43824005126953, "logps/rejected": -103.61250305175781, "loss": 1.319, "margin_dpo/margin_mean": 0.7170728445053101, "margin_dpo/margin_std": 1.1152534484863281, "step": 87 }, { "epoch": 0.1330309901738473, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.33253175020217896, "fcm_dpo/q_t": 0.49171459674835205, "grad_norm": 18.181365966796875, "learning_rate": 4.986026928455767e-07, "logits/chosen": 2.0365257263183594, "logits/rejected": 2.005300283432007, "logps/chosen": -80.89089965820312, "logps/ref_chosen": -81.27230834960938, "logps/ref_rejected": -89.51646423339844, "logps/rejected": -89.46759033203125, "loss": 1.3566, "margin_dpo/margin_mean": 0.33253180980682373, "margin_dpo/margin_std": 1.0969430208206177, "step": 88 }, { "epoch": 0.1345427059712774, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.8061107397079468, "fcm_dpo/q_t": 0.4799831807613373, "grad_norm": 18.168310165405273, "learning_rate": 4.984596161153135e-07, "logits/chosen": 2.1221489906311035, "logits/rejected": 1.827742099761963, "logps/chosen": -57.4072265625, "logps/ref_chosen": -58.142333984375, "logps/ref_rejected": -102.53756713867188, "logps/rejected": -102.60858154296875, "loss": 1.3116, "margin_dpo/margin_mean": 0.8061116933822632, "margin_dpo/margin_std": 1.2829334735870361, "step": 89 }, { "epoch": 0.1360544217687075, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.6056348085403442, "fcm_dpo/q_t": 0.48491472005844116, "grad_norm": 19.63652992248535, "learning_rate": 4.983095894354857e-07, "logits/chosen": 1.6260521411895752, "logits/rejected": 1.4100569486618042, "logps/chosen": -74.74089050292969, "logps/ref_chosen": -75.26505279541016, "logps/ref_rejected": -104.32842254638672, "logps/rejected": -104.40989685058594, "loss": 1.3299, "margin_dpo/margin_mean": 0.6056344509124756, "margin_dpo/margin_std": 1.134067177772522, "step": 90 }, { "epoch": 0.13756613756613756, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.5181472301483154, "fcm_dpo/q_t": 0.4871419072151184, "grad_norm": 17.70273208618164, "learning_rate": 4.98152617002662e-07, "logits/chosen": 1.8195009231567383, "logits/rejected": 1.654341220855713, "logps/chosen": -68.80375671386719, "logps/ref_chosen": -69.33901977539062, "logps/ref_rejected": -90.31411743164062, "logps/rejected": -90.29698944091797, "loss": 1.34, "margin_dpo/margin_mean": 0.5181469917297363, "margin_dpo/margin_std": 1.3797237873077393, "step": 91 }, { "epoch": 0.13907785336356765, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.6811751127243042, "fcm_dpo/q_t": 0.48308151960372925, "grad_norm": 18.557336807250977, "learning_rate": 4.979887032076988e-07, "logits/chosen": 1.660226821899414, "logits/rejected": 1.5099092721939087, "logps/chosen": -71.76761627197266, "logps/ref_chosen": -72.4566650390625, "logps/ref_rejected": -91.6706771850586, "logps/rejected": -91.66280364990234, "loss": 1.3234, "margin_dpo/margin_mean": 0.6811752319335938, "margin_dpo/margin_std": 1.2715716361999512, "step": 92 }, { "epoch": 0.14058956916099774, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.465279757976532, "fcm_dpo/q_t": 0.48848843574523926, "grad_norm": 16.0939998626709, "learning_rate": 4.978178526356172e-07, "logits/chosen": 1.8248298168182373, "logits/rejected": 1.7261273860931396, "logps/chosen": -63.3609619140625, "logps/ref_chosen": -64.08897399902344, "logps/ref_rejected": -75.09095764160156, "logps/rejected": -74.82823181152344, "loss": 1.346, "margin_dpo/margin_mean": 0.4652804434299469, "margin_dpo/margin_std": 1.505643606185913, "step": 93 }, { "epoch": 0.1421012849584278, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.8975893259048462, "fcm_dpo/q_t": 0.4777688980102539, "grad_norm": 31.570436477661133, "learning_rate": 4.976400700654751e-07, "logits/chosen": 2.060239315032959, "logits/rejected": 1.872870922088623, "logps/chosen": -78.88949584960938, "logps/ref_chosen": -79.67372131347656, "logps/ref_rejected": -94.64076232910156, "logps/rejected": -94.75411987304688, "loss": 1.3053, "margin_dpo/margin_mean": 0.8975897431373596, "margin_dpo/margin_std": 1.6404364109039307, "step": 94 }, { "epoch": 0.1436130007558579, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.6297564506530762, "fcm_dpo/q_t": 0.4843508005142212, "grad_norm": 18.723411560058594, "learning_rate": 4.974553604702332e-07, "logits/chosen": 1.5617575645446777, "logits/rejected": 1.383953332901001, "logps/chosen": -78.22109985351562, "logps/ref_chosen": -78.65760803222656, "logps/ref_rejected": -109.4048080444336, "logps/rejected": -109.5980453491211, "loss": 1.3296, "margin_dpo/margin_mean": 0.6297565698623657, "margin_dpo/margin_std": 1.4608389139175415, "step": 95 }, { "epoch": 0.14512471655328799, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.7316545248031616, "fcm_dpo/q_t": 0.48186880350112915, "grad_norm": 19.00063705444336, "learning_rate": 4.972637290166157e-07, "logits/chosen": 1.5328270196914673, "logits/rejected": 1.4091155529022217, "logps/chosen": -77.26933288574219, "logps/ref_chosen": -77.708251953125, "logps/ref_rejected": -104.36044311523438, "logps/rejected": -104.65316772460938, "loss": 1.3209, "margin_dpo/margin_mean": 0.7316542863845825, "margin_dpo/margin_std": 1.5796196460723877, "step": 96 }, { "epoch": 0.14663643235071808, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.2733203172683716, "fcm_dpo/q_t": 0.4931853413581848, "grad_norm": 19.248638153076172, "learning_rate": 4.970651810649666e-07, "logits/chosen": 1.3479671478271484, "logits/rejected": 1.257331371307373, "logps/chosen": -84.284912109375, "logps/ref_chosen": -84.58917999267578, "logps/ref_rejected": -99.25704956054688, "logps/rejected": -99.2260971069336, "loss": 1.3655, "margin_dpo/margin_mean": 0.2733200490474701, "margin_dpo/margin_std": 1.5922892093658447, "step": 97 }, { "epoch": 0.14814814814814814, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.38651925325393677, "fcm_dpo/q_t": 0.4903528094291687, "grad_norm": 17.57636260986328, "learning_rate": 4.968597221690985e-07, "logits/chosen": 1.6413347721099854, "logits/rejected": 1.5928442478179932, "logps/chosen": -74.04196166992188, "logps/ref_chosen": -74.42477416992188, "logps/ref_rejected": -88.93840026855469, "logps/rejected": -88.94210815429688, "loss": 1.3525, "margin_dpo/margin_mean": 0.3865186870098114, "margin_dpo/margin_std": 1.329129934310913, "step": 98 }, { "epoch": 0.14965986394557823, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.757216215133667, "fcm_dpo/q_t": 0.48140111565589905, "grad_norm": 18.15253448486328, "learning_rate": 4.966473580761389e-07, "logits/chosen": 1.6560975313186646, "logits/rejected": 1.5651164054870605, "logps/chosen": -74.99360656738281, "logps/ref_chosen": -75.59742736816406, "logps/ref_rejected": -98.2310791015625, "logps/rejected": -98.38446807861328, "loss": 1.3225, "margin_dpo/margin_mean": 0.7572157979011536, "margin_dpo/margin_std": 2.042893886566162, "step": 99 }, { "epoch": 0.15117157974300832, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.7813898324966431, "fcm_dpo/q_t": 0.48093181848526, "grad_norm": 19.486074447631836, "learning_rate": 4.964280947263676e-07, "logits/chosen": 1.808748483657837, "logits/rejected": 1.7841796875, "logps/chosen": -98.01991271972656, "logps/ref_chosen": -98.55859375, "logps/ref_rejected": -106.01295471191406, "logps/rejected": -106.25565338134766, "loss": 1.3231, "margin_dpo/margin_mean": 0.7813898324966431, "margin_dpo/margin_std": 2.200671672821045, "step": 100 }, { "epoch": 0.15117157974300832, "eval_fcm_dpo/beta": 0.10000000894069672, "eval_logits/chosen": 1.6085329055786133, "eval_logits/rejected": 1.4983155727386475, "eval_logps/chosen": -86.20779418945312, "eval_logps/ref_chosen": -86.90177917480469, "eval_logps/ref_rejected": -96.69639587402344, "eval_logps/rejected": -96.88575744628906, "eval_loss": 0.6549195051193237, "eval_margin_dpo/margin_mean": 0.8833596706390381, "eval_margin_dpo/margin_std": 1.9511994123458862, "eval_runtime": 42.2871, "eval_samples_per_second": 54.461, "eval_steps_per_second": 1.703, "step": 100 }, { "epoch": 0.15268329554043839, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.9666967391967773, "fcm_dpo/q_t": 0.4759965240955353, "grad_norm": 16.30520248413086, "learning_rate": 4.96201938253052e-07, "logits/chosen": 1.3939599990844727, "logits/rejected": 1.3424584865570068, "logps/chosen": -68.65782165527344, "logps/ref_chosen": -69.45216369628906, "logps/ref_rejected": -88.0458755493164, "logps/rejected": -88.21821594238281, "loss": 1.2998, "margin_dpo/margin_mean": 0.966697096824646, "margin_dpo/margin_std": 1.7669236660003662, "step": 101 }, { "epoch": 0.15419501133786848, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.6540460586547852, "fcm_dpo/q_t": 0.48388558626174927, "grad_norm": 17.389196395874023, "learning_rate": 4.959688949822748e-07, "logits/chosen": 1.5881282091140747, "logits/rejected": 1.511448860168457, "logps/chosen": -79.79618835449219, "logps/ref_chosen": -80.35308837890625, "logps/ref_rejected": -90.61380004882812, "logps/rejected": -90.71095275878906, "loss": 1.3314, "margin_dpo/margin_mean": 0.6540460586547852, "margin_dpo/margin_std": 1.9390764236450195, "step": 102 }, { "epoch": 0.15570672713529857, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.2188489437103271, "fcm_dpo/q_t": 0.4699843227863312, "grad_norm": 17.374025344848633, "learning_rate": 4.957289714327572e-07, "logits/chosen": 1.997759461402893, "logits/rejected": 1.9253690242767334, "logps/chosen": -78.43865966796875, "logps/ref_chosen": -79.30392456054688, "logps/ref_rejected": -93.745361328125, "logps/rejected": -94.09895324707031, "loss": 1.2791, "margin_dpo/margin_mean": 1.2188482284545898, "margin_dpo/margin_std": 2.111494302749634, "step": 103 }, { "epoch": 0.15721844293272866, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.1677416563034058, "fcm_dpo/q_t": 0.47133898735046387, "grad_norm": 18.580514907836914, "learning_rate": 4.954821743156767e-07, "logits/chosen": 1.8479002714157104, "logits/rejected": 1.6079638004302979, "logps/chosen": -73.59405517578125, "logps/ref_chosen": -74.50674438476562, "logps/ref_rejected": -116.09912872314453, "logps/rejected": -116.35417938232422, "loss": 1.2871, "margin_dpo/margin_mean": 1.1677416563034058, "margin_dpo/margin_std": 2.363769769668579, "step": 104 }, { "epoch": 0.15873015873015872, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.9023860692977905, "fcm_dpo/q_t": 0.4778934121131897, "grad_norm": 18.901168823242188, "learning_rate": 4.952285105344791e-07, "logits/chosen": 1.5916838645935059, "logits/rejected": 1.4354064464569092, "logps/chosen": -87.22508239746094, "logps/ref_chosen": -87.76654815673828, "logps/ref_rejected": -108.07927703857422, "logps/rejected": -108.440185546875, "loss": 1.3158, "margin_dpo/margin_mean": 0.902385950088501, "margin_dpo/margin_std": 2.6685338020324707, "step": 105 }, { "epoch": 0.1602418745275888, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.8738718628883362, "fcm_dpo/q_t": 0.4783915877342224, "grad_norm": 17.121471405029297, "learning_rate": 4.949679871846857e-07, "logits/chosen": 1.8275485038757324, "logits/rejected": 1.7684516906738281, "logps/chosen": -75.5244369506836, "logps/ref_chosen": -76.38548278808594, "logps/ref_rejected": -81.63407897949219, "logps/rejected": -81.64691162109375, "loss": 1.3155, "margin_dpo/margin_mean": 0.8738718032836914, "margin_dpo/margin_std": 2.4262328147888184, "step": 106 }, { "epoch": 0.1617535903250189, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.5380064249038696, "fcm_dpo/q_t": 0.4865608215332031, "grad_norm": 19.295825958251953, "learning_rate": 4.947006115536947e-07, "logits/chosen": 1.439363956451416, "logits/rejected": 1.3768948316574097, "logps/chosen": -95.8614273071289, "logps/ref_chosen": -96.14849853515625, "logps/ref_rejected": -107.0481185913086, "logps/rejected": -107.29904174804688, "loss": 1.3456, "margin_dpo/margin_mean": 0.5380067825317383, "margin_dpo/margin_std": 2.2211201190948486, "step": 107 }, { "epoch": 0.16326530612244897, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.9229694604873657, "fcm_dpo/q_t": 0.4772723913192749, "grad_norm": 17.066343307495117, "learning_rate": 4.944263911205772e-07, "logits/chosen": 1.3664920330047607, "logits/rejected": 1.2473000288009644, "logps/chosen": -84.6237564086914, "logps/ref_chosen": -85.39241027832031, "logps/ref_rejected": -97.79592895507812, "logps/rejected": -97.95022583007812, "loss": 1.3095, "margin_dpo/margin_mean": 0.9229696989059448, "margin_dpo/margin_std": 2.316845417022705, "step": 108 }, { "epoch": 0.16477702191987906, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.389096975326538, "fcm_dpo/q_t": 0.46625447273254395, "grad_norm": 17.88287925720215, "learning_rate": 4.941453335558681e-07, "logits/chosen": 1.3533546924591064, "logits/rejected": 1.1422548294067383, "logps/chosen": -78.01161193847656, "logps/ref_chosen": -78.99874877929688, "logps/ref_rejected": -100.79278564453125, "logps/rejected": -101.19475555419922, "loss": 1.2711, "margin_dpo/margin_mean": 1.389096975326538, "margin_dpo/margin_std": 2.766042709350586, "step": 109 }, { "epoch": 0.16628873771730915, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.20230808854103088, "fcm_dpo/q_t": 0.4948790669441223, "grad_norm": 20.587228775024414, "learning_rate": 4.938574467213517e-07, "logits/chosen": 1.3525140285491943, "logits/rejected": 1.4121595621109009, "logps/chosen": -96.599853515625, "logps/ref_chosen": -96.95277404785156, "logps/ref_rejected": -91.44450378417969, "logps/rejected": -91.29388427734375, "loss": 1.3813, "margin_dpo/margin_mean": 0.20230792462825775, "margin_dpo/margin_std": 2.4545016288757324, "step": 110 }, { "epoch": 0.16780045351473924, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.0242056846618652, "fcm_dpo/q_t": 0.47484272718429565, "grad_norm": 16.43102264404297, "learning_rate": 4.935627386698418e-07, "logits/chosen": 1.7560900449752808, "logits/rejected": 1.6001261472702026, "logps/chosen": -69.20565795898438, "logps/ref_chosen": -70.01641845703125, "logps/ref_rejected": -92.87696838378906, "logps/rejected": -93.0904312133789, "loss": 1.3021, "margin_dpo/margin_mean": 1.024204969406128, "margin_dpo/margin_std": 2.487879514694214, "step": 111 }, { "epoch": 0.1693121693121693, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.2949209213256836, "fcm_dpo/q_t": 0.46816879510879517, "grad_norm": 18.924081802368164, "learning_rate": 4.932612176449559e-07, "logits/chosen": 1.5642766952514648, "logits/rejected": 1.3693039417266846, "logps/chosen": -76.81999206542969, "logps/ref_chosen": -77.80027770996094, "logps/ref_rejected": -123.10624694824219, "logps/rejected": -123.42089080810547, "loss": 1.2778, "margin_dpo/margin_mean": 1.2949196100234985, "margin_dpo/margin_std": 2.5712859630584717, "step": 112 }, { "epoch": 0.1708238851095994, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.8377039432525635, "fcm_dpo/q_t": 0.4793519079685211, "grad_norm": 16.532751083374023, "learning_rate": 4.929528920808854e-07, "logits/chosen": 1.691911220550537, "logits/rejected": 1.603704810142517, "logps/chosen": -69.27433776855469, "logps/ref_chosen": -70.54346466064453, "logps/ref_rejected": -88.79286193847656, "logps/rejected": -88.36143493652344, "loss": 1.3198, "margin_dpo/margin_mean": 0.8377047181129456, "margin_dpo/margin_std": 2.449063777923584, "step": 113 }, { "epoch": 0.17233560090702948, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.3174316883087158, "fcm_dpo/q_t": 0.4681586027145386, "grad_norm": 18.068744659423828, "learning_rate": 4.92637770602159e-07, "logits/chosen": 1.864563226699829, "logits/rejected": 1.7135505676269531, "logps/chosen": -82.77174377441406, "logps/ref_chosen": -83.9239501953125, "logps/ref_rejected": -92.85765838623047, "logps/rejected": -93.02288818359375, "loss": 1.2816, "margin_dpo/margin_mean": 1.317431926727295, "margin_dpo/margin_std": 3.0355539321899414, "step": 114 }, { "epoch": 0.17384731670445955, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.194430947303772, "fcm_dpo/q_t": 0.4708959460258484, "grad_norm": 16.878889083862305, "learning_rate": 4.923158620234019e-07, "logits/chosen": 1.5815708637237549, "logits/rejected": 1.4187781810760498, "logps/chosen": -68.57202911376953, "logps/ref_chosen": -69.82767486572266, "logps/ref_rejected": -96.51564025878906, "logps/rejected": -96.45442199707031, "loss": 1.2872, "margin_dpo/margin_mean": 1.1944315433502197, "margin_dpo/margin_std": 2.593759536743164, "step": 115 }, { "epoch": 0.17535903250188964, "fcm_dpo/beta": 0.10189038515090942, "fcm_dpo/delta": 0.1855519711971283, "fcm_dpo/margin": 1.6823737621307373, "fcm_dpo/q_t": 0.45880037546157837, "grad_norm": 18.34799575805664, "learning_rate": 4.91987175349089e-07, "logits/chosen": 1.7459111213684082, "logits/rejected": 1.5880231857299805, "logps/chosen": -64.80363464355469, "logps/ref_chosen": -66.19773864746094, "logps/ref_rejected": -90.88304138183594, "logps/rejected": -91.17130279541016, "loss": 1.238, "margin_dpo/margin_mean": 1.682374119758606, "margin_dpo/margin_std": 2.485858917236328, "step": 116 }, { "epoch": 0.17687074829931973, "fcm_dpo/beta": 0.10378076136112213, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.591629981994629, "fcm_dpo/q_t": 0.4595806896686554, "grad_norm": 17.06990623474121, "learning_rate": 4.916517197732933e-07, "logits/chosen": 1.644856333732605, "logits/rejected": 1.5434954166412354, "logps/chosen": -70.47422790527344, "logps/ref_chosen": -72.15988159179688, "logps/ref_rejected": -85.30296325683594, "logps/rejected": -85.20893859863281, "loss": 1.2484, "margin_dpo/margin_mean": 1.5916296243667603, "margin_dpo/margin_std": 2.757472276687622, "step": 117 }, { "epoch": 0.17838246409674982, "fcm_dpo/beta": 0.10378076136112213, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.3567464351654053, "fcm_dpo/q_t": 0.4655323326587677, "grad_norm": 16.98048973083496, "learning_rate": 4.913095046794281e-07, "logits/chosen": 1.9137165546417236, "logits/rejected": 1.7827235460281372, "logps/chosen": -70.03201293945312, "logps/ref_chosen": -71.47773742675781, "logps/ref_rejected": -96.95051574707031, "logps/rejected": -96.8615493774414, "loss": 1.2725, "margin_dpo/margin_mean": 1.3567461967468262, "margin_dpo/margin_std": 2.878359794616699, "step": 118 }, { "epoch": 0.17989417989417988, "fcm_dpo/beta": 0.10378076136112213, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.293745517730713, "fcm_dpo/q_t": 0.4677852988243103, "grad_norm": 17.565595626831055, "learning_rate": 4.909605396399855e-07, "logits/chosen": 1.7382652759552002, "logits/rejected": 1.6407535076141357, "logps/chosen": -76.83675384521484, "logps/ref_chosen": -78.2727279663086, "logps/ref_rejected": -94.71317291259766, "logps/rejected": -94.57093811035156, "loss": 1.2879, "margin_dpo/margin_mean": 1.2937450408935547, "margin_dpo/margin_std": 3.416473150253296, "step": 119 }, { "epoch": 0.18140589569160998, "fcm_dpo/beta": 0.1076236441731453, "fcm_dpo/delta": 0.18179886043071747, "fcm_dpo/margin": 1.9414758682250977, "fcm_dpo/q_t": 0.45040810108184814, "grad_norm": 19.297927856445312, "learning_rate": 4.906048344162676e-07, "logits/chosen": 1.7652626037597656, "logits/rejected": 1.620312213897705, "logps/chosen": -76.534423828125, "logps/ref_chosen": -78.43109130859375, "logps/ref_rejected": -100.2771987915039, "logps/rejected": -100.32200622558594, "loss": 1.2131, "margin_dpo/margin_mean": 1.9414761066436768, "margin_dpo/margin_std": 2.947596549987793, "step": 120 }, { "epoch": 0.18291761148904007, "fcm_dpo/beta": 0.1076236441731453, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.3684052228927612, "fcm_dpo/q_t": 0.46462342143058777, "grad_norm": 20.10668182373047, "learning_rate": 4.902423989581143e-07, "logits/chosen": 2.0717074871063232, "logits/rejected": 1.8019273281097412, "logps/chosen": -72.44033813476562, "logps/ref_chosen": -74.08768463134766, "logps/ref_rejected": -118.6731948852539, "logps/rejected": -118.39424896240234, "loss": 1.2726, "margin_dpo/margin_mean": 1.368406057357788, "margin_dpo/margin_std": 3.1463675498962402, "step": 121 }, { "epoch": 0.18442932728647016, "fcm_dpo/beta": 0.1076236441731453, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.446579933166504, "fcm_dpo/q_t": 0.46327704191207886, "grad_norm": 19.02294158935547, "learning_rate": 4.898732434036243e-07, "logits/chosen": 1.5263891220092773, "logits/rejected": 1.4055243730545044, "logps/chosen": -77.6707763671875, "logps/ref_chosen": -79.36762237548828, "logps/ref_rejected": -92.42371368408203, "logps/rejected": -92.17345428466797, "loss": 1.2719, "margin_dpo/margin_mean": 1.4465796947479248, "margin_dpo/margin_std": 3.5089354515075684, "step": 122 }, { "epoch": 0.18594104308390022, "fcm_dpo/beta": 0.1076236441731453, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.629873514175415, "fcm_dpo/q_t": 0.4577152729034424, "grad_norm": 18.349647521972656, "learning_rate": 4.894973780788722e-07, "logits/chosen": 1.517094373703003, "logits/rejected": 1.4203633069992065, "logps/chosen": -69.94999694824219, "logps/ref_chosen": -71.91705322265625, "logps/ref_rejected": -96.36418151855469, "logps/rejected": -96.02699279785156, "loss": 1.2446, "margin_dpo/margin_mean": 1.6298733949661255, "margin_dpo/margin_std": 2.976022720336914, "step": 123 }, { "epoch": 0.1874527588813303, "fcm_dpo/beta": 0.10963409394025803, "fcm_dpo/delta": 0.18339866399765015, "fcm_dpo/margin": 1.8729770183563232, "fcm_dpo/q_t": 0.45209065079689026, "grad_norm": 19.461355209350586, "learning_rate": 4.89114813497619e-07, "logits/chosen": 1.773781657218933, "logits/rejected": 1.6544387340545654, "logps/chosen": -69.50850677490234, "logps/ref_chosen": -71.72529602050781, "logps/ref_rejected": -111.17984771728516, "logps/rejected": -110.83602905273438, "loss": 1.2312, "margin_dpo/margin_mean": 1.8729764223098755, "margin_dpo/margin_std": 3.717836380004883, "step": 124 }, { "epoch": 0.1889644746787604, "fcm_dpo/beta": 0.1148253008723259, "fcm_dpo/delta": 0.14045879244804382, "fcm_dpo/margin": 2.0888173580169678, "fcm_dpo/q_t": 0.4435945749282837, "grad_norm": 21.065311431884766, "learning_rate": 4.887255603610184e-07, "logits/chosen": 1.6337459087371826, "logits/rejected": 1.4570084810256958, "logps/chosen": -79.20709991455078, "logps/ref_chosen": -81.55532836914062, "logps/ref_rejected": -110.9144287109375, "logps/rejected": -110.65502166748047, "loss": 1.2003, "margin_dpo/margin_mean": 2.0888171195983887, "margin_dpo/margin_std": 3.4307498931884766, "step": 125 }, { "epoch": 0.19047619047619047, "fcm_dpo/beta": 0.11694176495075226, "fcm_dpo/delta": 0.1810038983821869, "fcm_dpo/margin": 1.6035174131393433, "fcm_dpo/q_t": 0.4577940106391907, "grad_norm": 21.680734634399414, "learning_rate": 4.883296295573176e-07, "logits/chosen": 1.2153024673461914, "logits/rejected": 1.241828441619873, "logps/chosen": -83.78556823730469, "logps/ref_chosen": -87.07349395751953, "logps/ref_rejected": -85.05271911621094, "logps/rejected": -83.36831665039062, "loss": 1.2689, "margin_dpo/margin_mean": 1.603518009185791, "margin_dpo/margin_std": 4.2111663818359375, "step": 126 }, { "epoch": 0.19198790627362056, "fcm_dpo/beta": 0.1221129521727562, "fcm_dpo/delta": 0.12666912376880646, "fcm_dpo/margin": 2.1889748573303223, "fcm_dpo/q_t": 0.43728864192962646, "grad_norm": 20.29819107055664, "learning_rate": 4.87927032161552e-07, "logits/chosen": 1.7409113645553589, "logits/rejected": 1.6811532974243164, "logps/chosen": -77.265625, "logps/ref_chosen": -80.4578857421875, "logps/ref_rejected": -90.50740051269531, "logps/rejected": -89.50411987304688, "loss": 1.175, "margin_dpo/margin_mean": 2.1889748573303223, "margin_dpo/margin_std": 3.178166627883911, "step": 127 }, { "epoch": 0.19349962207105065, "fcm_dpo/beta": 0.12689261138439178, "fcm_dpo/delta": 0.19197335839271545, "fcm_dpo/margin": 1.4382350444793701, "fcm_dpo/q_t": 0.45880699157714844, "grad_norm": 23.865381240844727, "learning_rate": 4.875177794352363e-07, "logits/chosen": 1.8986896276474, "logits/rejected": 1.6842751502990723, "logps/chosen": -82.81161499023438, "logps/ref_chosen": -85.77519226074219, "logps/ref_rejected": -112.63516998291016, "logps/rejected": -111.10983276367188, "loss": 1.301, "margin_dpo/margin_mean": 1.4382350444793701, "margin_dpo/margin_std": 4.812017440795898, "step": 128 }, { "epoch": 0.19501133786848074, "fcm_dpo/beta": 0.12921908497810364, "fcm_dpo/delta": 0.18006029725074768, "fcm_dpo/margin": 1.665739893913269, "fcm_dpo/q_t": 0.45093223452568054, "grad_norm": 24.163936614990234, "learning_rate": 4.871018828260491e-07, "logits/chosen": 1.3364216089248657, "logits/rejected": 1.3428212404251099, "logps/chosen": -82.05198669433594, "logps/ref_chosen": -84.94615173339844, "logps/ref_rejected": -85.36473846435547, "logps/rejected": -84.13630676269531, "loss": 1.2632, "margin_dpo/margin_mean": 1.6657401323318481, "margin_dpo/margin_std": 4.499945640563965, "step": 129 }, { "epoch": 0.1965230536659108, "fcm_dpo/beta": 0.13545329868793488, "fcm_dpo/delta": 0.1463683694601059, "fcm_dpo/margin": 1.901643991470337, "fcm_dpo/q_t": 0.4415082633495331, "grad_norm": 24.466577529907227, "learning_rate": 4.866793539675126e-07, "logits/chosen": 1.6595840454101562, "logits/rejected": 1.5338075160980225, "logps/chosen": -75.72906494140625, "logps/ref_chosen": -79.0184555053711, "logps/ref_rejected": -97.63998413085938, "logps/rejected": -96.25224304199219, "loss": 1.2165, "margin_dpo/margin_mean": 1.90164315700531, "margin_dpo/margin_std": 3.910851001739502, "step": 130 }, { "epoch": 0.1980347694633409, "fcm_dpo/beta": 0.14048483967781067, "fcm_dpo/delta": 0.25885722041130066, "fcm_dpo/margin": 2.4962222576141357, "fcm_dpo/q_t": 0.4236597418785095, "grad_norm": 22.44991111755371, "learning_rate": 4.86250204678667e-07, "logits/chosen": 1.7617708444595337, "logits/rejected": 1.4943931102752686, "logps/chosen": -64.65379333496094, "logps/ref_chosen": -68.24565887451172, "logps/ref_rejected": -97.99555969238281, "logps/rejected": -96.89991760253906, "loss": 1.1614, "margin_dpo/margin_mean": 2.496222496032715, "margin_dpo/margin_std": 4.536970138549805, "step": 131 }, { "epoch": 0.19954648526077098, "fcm_dpo/beta": 0.15050306916236877, "fcm_dpo/delta": 0.3449888527393341, "fcm_dpo/margin": 1.749915599822998, "fcm_dpo/q_t": 0.44061940908432007, "grad_norm": 29.038654327392578, "learning_rate": 4.858144469637408e-07, "logits/chosen": 1.7392385005950928, "logits/rejected": 1.6208550930023193, "logps/chosen": -78.46183776855469, "logps/ref_chosen": -82.06532287597656, "logps/ref_rejected": -89.47691345214844, "logps/rejected": -87.62332916259766, "loss": 1.2182, "margin_dpo/margin_mean": 1.749915599822998, "margin_dpo/margin_std": 3.832667350769043, "step": 132 }, { "epoch": 0.20105820105820105, "fcm_dpo/beta": 0.15955929458141327, "fcm_dpo/delta": 0.2966747283935547, "fcm_dpo/margin": 1.961458683013916, "fcm_dpo/q_t": 0.43171608448028564, "grad_norm": 27.75748634338379, "learning_rate": 4.853720930118138e-07, "logits/chosen": 1.3796292543411255, "logits/rejected": 1.4140539169311523, "logps/chosen": -79.47694396972656, "logps/ref_chosen": -83.70661163330078, "logps/ref_rejected": -89.3868179321289, "logps/rejected": -87.11859893798828, "loss": 1.2052, "margin_dpo/margin_mean": 1.961458444595337, "margin_dpo/margin_std": 4.18922233581543, "step": 133 }, { "epoch": 0.20256991685563114, "fcm_dpo/beta": 0.1658254861831665, "fcm_dpo/delta": 0.13663284480571747, "fcm_dpo/margin": 2.83111834526062, "fcm_dpo/q_t": 0.401967316865921, "grad_norm": 24.919769287109375, "learning_rate": 4.849231551964771e-07, "logits/chosen": 1.6263504028320312, "logits/rejected": 1.5011672973632812, "logps/chosen": -66.6833267211914, "logps/ref_chosen": -71.57601928710938, "logps/ref_rejected": -92.34259033203125, "logps/rejected": -90.2810287475586, "loss": 1.1007, "margin_dpo/margin_mean": 2.831118106842041, "margin_dpo/margin_std": 4.706888198852539, "step": 134 }, { "epoch": 0.20408163265306123, "fcm_dpo/beta": 0.17376971244812012, "fcm_dpo/delta": 0.2560199201107025, "fcm_dpo/margin": 2.0251028537750244, "fcm_dpo/q_t": 0.42260998487472534, "grad_norm": 26.780994415283203, "learning_rate": 4.844676460754862e-07, "logits/chosen": 1.4887380599975586, "logits/rejected": 1.431467056274414, "logps/chosen": -61.249507904052734, "logps/ref_chosen": -66.39884948730469, "logps/ref_rejected": -81.38636779785156, "logps/rejected": -78.26211547851562, "loss": 1.1841, "margin_dpo/margin_mean": 2.025102376937866, "margin_dpo/margin_std": 4.178084373474121, "step": 135 }, { "epoch": 0.20559334845049132, "fcm_dpo/beta": 0.17436444759368896, "fcm_dpo/delta": -0.030570298433303833, "fcm_dpo/margin": 2.4477319717407227, "fcm_dpo/q_t": 0.417044460773468, "grad_norm": 32.673492431640625, "learning_rate": 4.840055783904106e-07, "logits/chosen": 1.5175824165344238, "logits/rejected": 1.2436449527740479, "logps/chosen": -82.66349029541016, "logps/ref_chosen": -86.75381469726562, "logps/ref_rejected": -113.35548400878906, "logps/rejected": -111.71290588378906, "loss": 1.2295, "margin_dpo/margin_mean": 2.4477314949035645, "margin_dpo/margin_std": 5.707584857940674, "step": 136 }, { "epoch": 0.20710506424792138, "fcm_dpo/beta": 0.17389875650405884, "fcm_dpo/delta": -0.026779502630233765, "fcm_dpo/margin": 2.481207847595215, "fcm_dpo/q_t": 0.4082632064819336, "grad_norm": 24.82634162902832, "learning_rate": 4.835369650662767e-07, "logits/chosen": 1.7708425521850586, "logits/rejected": 1.6555781364440918, "logps/chosen": -67.07716369628906, "logps/ref_chosen": -72.21119689941406, "logps/ref_rejected": -88.30802917480469, "logps/rejected": -85.65521240234375, "loss": 1.1352, "margin_dpo/margin_mean": 2.481208324432373, "margin_dpo/margin_std": 4.302756309509277, "step": 137 }, { "epoch": 0.20861678004535147, "fcm_dpo/beta": 0.1810002624988556, "fcm_dpo/delta": 0.28295964002609253, "fcm_dpo/margin": 1.8019649982452393, "fcm_dpo/q_t": 0.43597790598869324, "grad_norm": 30.83222770690918, "learning_rate": 4.830618192112065e-07, "logits/chosen": 1.7719461917877197, "logits/rejected": 1.6590218544006348, "logps/chosen": -70.21919250488281, "logps/ref_chosen": -74.54273223876953, "logps/ref_rejected": -84.63615417480469, "logps/rejected": -82.11457824707031, "loss": 1.2579, "margin_dpo/margin_mean": 1.801964521408081, "margin_dpo/margin_std": 4.899100303649902, "step": 138 }, { "epoch": 0.21012849584278157, "fcm_dpo/beta": 0.1919894814491272, "fcm_dpo/delta": 0.2702018916606903, "fcm_dpo/margin": 1.7565574645996094, "fcm_dpo/q_t": 0.4235602617263794, "grad_norm": 40.13972473144531, "learning_rate": 4.825801541160509e-07, "logits/chosen": 1.2733126878738403, "logits/rejected": 1.2188732624053955, "logps/chosen": -83.63345336914062, "logps/ref_chosen": -87.63740539550781, "logps/ref_rejected": -101.3896484375, "logps/rejected": -99.14225006103516, "loss": 1.2606, "margin_dpo/margin_mean": 1.7565574645996094, "margin_dpo/margin_std": 4.6638336181640625, "step": 139 }, { "epoch": 0.21164021164021163, "fcm_dpo/beta": 0.1938619166612625, "fcm_dpo/delta": 0.03991154208779335, "fcm_dpo/margin": 2.8989505767822266, "fcm_dpo/q_t": 0.3879617750644684, "grad_norm": 37.32284164428711, "learning_rate": 4.820919832540181e-07, "logits/chosen": 1.2418723106384277, "logits/rejected": 1.1534655094146729, "logps/chosen": -76.45626831054688, "logps/ref_chosen": -81.32339477539062, "logps/ref_rejected": -99.7275619506836, "logps/rejected": -97.7593765258789, "loss": 1.1224, "margin_dpo/margin_mean": 2.898951292037964, "margin_dpo/margin_std": 5.1982293128967285, "step": 140 }, { "epoch": 0.21315192743764172, "fcm_dpo/beta": 0.19558203220367432, "fcm_dpo/delta": 0.04644138365983963, "fcm_dpo/margin": 2.841860771179199, "fcm_dpo/q_t": 0.3873937726020813, "grad_norm": 28.78200340270996, "learning_rate": 4.815973202802966e-07, "logits/chosen": 1.7557021379470825, "logits/rejected": 1.647310495376587, "logps/chosen": -73.34912109375, "logps/ref_chosen": -78.08534240722656, "logps/ref_rejected": -101.70516967773438, "logps/rejected": -99.81080627441406, "loss": 1.1138, "margin_dpo/margin_mean": 2.8418610095977783, "margin_dpo/margin_std": 4.951857089996338, "step": 141 }, { "epoch": 0.2146636432350718, "fcm_dpo/beta": 0.2037084996700287, "fcm_dpo/delta": 0.19900891184806824, "fcm_dpo/margin": 2.004154682159424, "fcm_dpo/q_t": 0.4186818599700928, "grad_norm": 32.78255081176758, "learning_rate": 4.810961790316729e-07, "logits/chosen": 1.576524019241333, "logits/rejected": 1.517528772354126, "logps/chosen": -78.01957702636719, "logps/ref_chosen": -82.84616088867188, "logps/ref_rejected": -95.14714050292969, "logps/rejected": -92.32470703125, "loss": 1.2129, "margin_dpo/margin_mean": 2.004155158996582, "margin_dpo/margin_std": 4.6407389640808105, "step": 142 }, { "epoch": 0.2161753590325019, "fcm_dpo/beta": 0.21314668655395508, "fcm_dpo/delta": 0.2351382076740265, "fcm_dpo/margin": 1.7482174634933472, "fcm_dpo/q_t": 0.42905452847480774, "grad_norm": 48.79874038696289, "learning_rate": 4.805885735261454e-07, "logits/chosen": 1.6225446462631226, "logits/rejected": 1.5896368026733398, "logps/chosen": -75.57222747802734, "logps/ref_chosen": -80.29791259765625, "logps/ref_rejected": -87.44291687011719, "logps/rejected": -84.4654541015625, "loss": 1.3224, "margin_dpo/margin_mean": 1.7482179403305054, "margin_dpo/margin_std": 5.3093485832214355, "step": 143 }, { "epoch": 0.21768707482993196, "fcm_dpo/beta": 0.21803462505340576, "fcm_dpo/delta": 0.0706484317779541, "fcm_dpo/margin": 1.2398256063461304, "fcm_dpo/q_t": 0.4590599238872528, "grad_norm": 51.408302307128906, "learning_rate": 4.800745179625307e-07, "logits/chosen": 1.5925514698028564, "logits/rejected": 1.5372166633605957, "logps/chosen": -74.98297119140625, "logps/ref_chosen": -79.09429168701172, "logps/ref_rejected": -92.42912292480469, "logps/rejected": -89.55763244628906, "loss": 1.5033, "margin_dpo/margin_mean": 1.2398253679275513, "margin_dpo/margin_std": 6.016498565673828, "step": 144 }, { "epoch": 0.21919879062736206, "fcm_dpo/beta": 0.21978802978992462, "fcm_dpo/delta": 0.07088879495859146, "fcm_dpo/margin": 2.42476224899292, "fcm_dpo/q_t": 0.39249861240386963, "grad_norm": 48.06195831298828, "learning_rate": 4.795540267200686e-07, "logits/chosen": 1.6124813556671143, "logits/rejected": 1.647786259651184, "logps/chosen": -92.46937561035156, "logps/ref_chosen": -97.7087173461914, "logps/ref_rejected": -97.63011169433594, "logps/rejected": -94.81553649902344, "loss": 1.2479, "margin_dpo/margin_mean": 2.424762010574341, "margin_dpo/margin_std": 5.748805522918701, "step": 145 }, { "epoch": 0.22071050642479215, "fcm_dpo/beta": 0.22856765985488892, "fcm_dpo/delta": 0.20782732963562012, "fcm_dpo/margin": 1.7500625848770142, "fcm_dpo/q_t": 0.4228348731994629, "grad_norm": 43.26691818237305, "learning_rate": 4.790271143580173e-07, "logits/chosen": 1.291736125946045, "logits/rejected": 1.2866215705871582, "logps/chosen": -70.85255432128906, "logps/ref_chosen": -76.56294250488281, "logps/ref_rejected": -83.78160095214844, "logps/rejected": -79.82127380371094, "loss": 1.2569, "margin_dpo/margin_mean": 1.7500635385513306, "margin_dpo/margin_std": 4.598239898681641, "step": 146 }, { "epoch": 0.2222222222222222, "fcm_dpo/beta": 0.2355855107307434, "fcm_dpo/delta": 0.16424530744552612, "fcm_dpo/margin": 1.8817954063415527, "fcm_dpo/q_t": 0.4236310124397278, "grad_norm": 49.79362106323242, "learning_rate": 4.784937956152489e-07, "logits/chosen": 1.5991694927215576, "logits/rejected": 1.5054233074188232, "logps/chosen": -78.0293960571289, "logps/ref_chosen": -83.24113464355469, "logps/ref_rejected": -97.50960540771484, "logps/rejected": -94.1796646118164, "loss": 1.2993, "margin_dpo/margin_mean": 1.8817964792251587, "margin_dpo/margin_std": 5.177776336669922, "step": 147 }, { "epoch": 0.2237339380196523, "fcm_dpo/beta": 0.2397206574678421, "fcm_dpo/delta": -0.03846623748540878, "fcm_dpo/margin": 2.6436634063720703, "fcm_dpo/q_t": 0.3887077569961548, "grad_norm": 34.138458251953125, "learning_rate": 4.779540854098347e-07, "logits/chosen": 1.8034465312957764, "logits/rejected": 1.581761360168457, "logps/chosen": -60.71531677246094, "logps/ref_chosen": -66.36277770996094, "logps/ref_rejected": -87.66487121582031, "logps/rejected": -84.66107940673828, "loss": 1.1977, "margin_dpo/margin_mean": 2.6436638832092285, "margin_dpo/margin_std": 5.585522651672363, "step": 148 }, { "epoch": 0.2252456538170824, "fcm_dpo/beta": 0.24301239848136902, "fcm_dpo/delta": 0.18357330560684204, "fcm_dpo/margin": 1.745511770248413, "fcm_dpo/q_t": 0.41498392820358276, "grad_norm": 41.716548919677734, "learning_rate": 4.774079988386296e-07, "logits/chosen": 1.7528355121612549, "logits/rejected": 1.6265687942504883, "logps/chosen": -67.40153503417969, "logps/ref_chosen": -72.0576171875, "logps/ref_rejected": -83.94097900390625, "logps/rejected": -81.0303955078125, "loss": 1.2486, "margin_dpo/margin_mean": 1.745511770248413, "margin_dpo/margin_std": 4.357009410858154, "step": 149 }, { "epoch": 0.22675736961451248, "fcm_dpo/beta": 0.23915709555149078, "fcm_dpo/delta": -0.14455409348011017, "fcm_dpo/margin": 3.065699577331543, "fcm_dpo/q_t": 0.36927932500839233, "grad_norm": 46.32753372192383, "learning_rate": 4.768555511768486e-07, "logits/chosen": 1.7210315465927124, "logits/rejected": 1.645007610321045, "logps/chosen": -80.22085571289062, "logps/ref_chosen": -85.52684783935547, "logps/ref_rejected": -108.37449645996094, "logps/rejected": -106.13421630859375, "loss": 1.1408, "margin_dpo/margin_mean": 3.065699338912964, "margin_dpo/margin_std": 5.578556060791016, "step": 150 }, { "epoch": 0.22826908541194255, "fcm_dpo/beta": 0.2331182360649109, "fcm_dpo/delta": -0.14278751611709595, "fcm_dpo/margin": 3.1410341262817383, "fcm_dpo/q_t": 0.36318397521972656, "grad_norm": 34.232784271240234, "learning_rate": 4.762967578776406e-07, "logits/chosen": 1.6338458061218262, "logits/rejected": 1.4856846332550049, "logps/chosen": -62.60700988769531, "logps/ref_chosen": -69.160888671875, "logps/ref_rejected": -91.42207336425781, "logps/rejected": -88.00923156738281, "loss": 1.0322, "margin_dpo/margin_mean": 3.1410341262817383, "margin_dpo/margin_std": 4.984264373779297, "step": 151 }, { "epoch": 0.22978080120937264, "fcm_dpo/beta": 0.23434986174106598, "fcm_dpo/delta": 0.013609714806079865, "fcm_dpo/margin": 2.4968485832214355, "fcm_dpo/q_t": 0.3925015330314636, "grad_norm": 42.97077941894531, "learning_rate": 4.757316345716553e-07, "logits/chosen": 1.8712348937988281, "logits/rejected": 1.704573631286621, "logps/chosen": -66.96708679199219, "logps/ref_chosen": -72.48135375976562, "logps/ref_rejected": -94.44818878173828, "logps/rejected": -91.4307632446289, "loss": 1.178, "margin_dpo/margin_mean": 2.4968485832214355, "margin_dpo/margin_std": 5.199925422668457, "step": 152 }, { "epoch": 0.23129251700680273, "fcm_dpo/beta": 0.23393282294273376, "fcm_dpo/delta": -0.0029235482215881348, "fcm_dpo/margin": 2.571483612060547, "fcm_dpo/q_t": 0.38379669189453125, "grad_norm": 37.91641616821289, "learning_rate": 4.751601970666064e-07, "logits/chosen": 1.5314666032791138, "logits/rejected": 1.468802809715271, "logps/chosen": -84.36506652832031, "logps/ref_chosen": -89.6655044555664, "logps/ref_rejected": -90.67737579345703, "logps/rejected": -87.94842529296875, "loss": 1.1238, "margin_dpo/margin_mean": 2.5714833736419678, "margin_dpo/margin_std": 4.653265953063965, "step": 153 }, { "epoch": 0.2328042328042328, "fcm_dpo/beta": 0.24052271246910095, "fcm_dpo/delta": 0.24382811784744263, "fcm_dpo/margin": 1.518359899520874, "fcm_dpo/q_t": 0.42862796783447266, "grad_norm": 45.47004699707031, "learning_rate": 4.745824613468292e-07, "logits/chosen": 1.7060202360153198, "logits/rejected": 1.652904987335205, "logps/chosen": -70.89018249511719, "logps/ref_chosen": -76.58096313476562, "logps/ref_rejected": -78.18669891357422, "logps/rejected": -74.0142822265625, "loss": 1.3464, "margin_dpo/margin_mean": 1.5183594226837158, "margin_dpo/margin_std": 4.859274864196777, "step": 154 }, { "epoch": 0.23431594860166288, "fcm_dpo/beta": 0.2394261211156845, "fcm_dpo/delta": -0.0790242925286293, "fcm_dpo/margin": 2.8094193935394287, "fcm_dpo/q_t": 0.37184637784957886, "grad_norm": 41.79259490966797, "learning_rate": 4.7399844357283393e-07, "logits/chosen": 1.7715044021606445, "logits/rejected": 1.7470512390136719, "logps/chosen": -76.92964172363281, "logps/ref_chosen": -82.65617370605469, "logps/ref_rejected": -95.52484130859375, "logps/rejected": -92.60773468017578, "loss": 1.1682, "margin_dpo/margin_mean": 2.8094191551208496, "margin_dpo/margin_std": 5.463638782501221, "step": 155 }, { "epoch": 0.23582766439909297, "fcm_dpo/beta": 0.23629328608512878, "fcm_dpo/delta": -0.07246021926403046, "fcm_dpo/margin": 2.822887659072876, "fcm_dpo/q_t": 0.36988958716392517, "grad_norm": 41.03186798095703, "learning_rate": 4.7340816008085305e-07, "logits/chosen": 1.615354061126709, "logits/rejected": 1.5233306884765625, "logps/chosen": -82.16059875488281, "logps/ref_chosen": -87.66494750976562, "logps/ref_rejected": -108.2437744140625, "logps/rejected": -105.56230163574219, "loss": 1.0859, "margin_dpo/margin_mean": 2.822887897491455, "margin_dpo/margin_std": 4.765664100646973, "step": 156 }, { "epoch": 0.23733938019652306, "fcm_dpo/beta": 0.23056308925151825, "fcm_dpo/delta": -0.12440288811922073, "fcm_dpo/margin": 2.047905445098877, "fcm_dpo/q_t": 0.40755152702331543, "grad_norm": 35.36820983886719, "learning_rate": 4.728116273823847e-07, "logits/chosen": 1.527277946472168, "logits/rejected": 1.5307586193084717, "logps/chosen": -64.32658386230469, "logps/ref_chosen": -70.77095794677734, "logps/ref_rejected": -78.78271484375, "logps/rejected": -74.38624572753906, "loss": 1.2108, "margin_dpo/margin_mean": 2.047905445098877, "margin_dpo/margin_std": 4.452766418457031, "step": 157 }, { "epoch": 0.23885109599395313, "fcm_dpo/beta": 0.2327193319797516, "fcm_dpo/delta": 0.0721401646733284, "fcm_dpo/margin": 2.28495717048645, "fcm_dpo/q_t": 0.39623498916625977, "grad_norm": 38.024356842041016, "learning_rate": 4.7220886216373085e-07, "logits/chosen": 1.5624784231185913, "logits/rejected": 1.5014305114746094, "logps/chosen": -75.03966522216797, "logps/ref_chosen": -81.21516418457031, "logps/ref_rejected": -97.8381118774414, "logps/rejected": -93.94757080078125, "loss": 1.1784, "margin_dpo/margin_mean": 2.284956693649292, "margin_dpo/margin_std": 4.70979118347168, "step": 158 }, { "epoch": 0.24036281179138322, "fcm_dpo/beta": 0.2319149523973465, "fcm_dpo/delta": -0.03987874090671539, "fcm_dpo/margin": 2.747131824493408, "fcm_dpo/q_t": 0.37046635150909424, "grad_norm": 34.66891098022461, "learning_rate": 4.715998812855304e-07, "logits/chosen": 1.7413641214370728, "logits/rejected": 1.661763310432434, "logps/chosen": -66.25703430175781, "logps/ref_chosen": -72.33412170410156, "logps/ref_rejected": -89.49591064453125, "logps/rejected": -86.16595458984375, "loss": 1.0663, "margin_dpo/margin_mean": 2.7471323013305664, "margin_dpo/margin_std": 4.491193771362305, "step": 159 }, { "epoch": 0.2418745275888133, "fcm_dpo/beta": 0.23739804327487946, "fcm_dpo/delta": 0.1521824300289154, "fcm_dpo/margin": 1.9154564142227173, "fcm_dpo/q_t": 0.4132426381111145, "grad_norm": 36.02449417114258, "learning_rate": 4.7098470178228755e-07, "logits/chosen": 1.2908469438552856, "logits/rejected": 1.1444838047027588, "logps/chosen": -57.67263412475586, "logps/ref_chosen": -63.26386260986328, "logps/ref_rejected": -82.27867126464844, "logps/rejected": -78.60289001464844, "loss": 1.2065, "margin_dpo/margin_mean": 1.915457010269165, "margin_dpo/margin_std": 4.364532470703125, "step": 160 }, { "epoch": 0.24338624338624337, "fcm_dpo/beta": 0.24455958604812622, "fcm_dpo/delta": 0.12015791237354279, "fcm_dpo/margin": 1.982551097869873, "fcm_dpo/q_t": 0.39857205748558044, "grad_norm": 39.60112762451172, "learning_rate": 4.703633408618955e-07, "logits/chosen": 1.6757632493972778, "logits/rejected": 1.5887293815612793, "logps/chosen": -64.71536254882812, "logps/ref_chosen": -70.69304656982422, "logps/ref_rejected": -82.73606872558594, "logps/rejected": -78.74093627929688, "loss": 1.1955, "margin_dpo/margin_mean": 1.9825514554977417, "margin_dpo/margin_std": 4.350788116455078, "step": 161 }, { "epoch": 0.24489795918367346, "fcm_dpo/beta": 0.23786741495132446, "fcm_dpo/delta": -0.20256876945495605, "fcm_dpo/margin": 3.307166576385498, "fcm_dpo/q_t": 0.33586543798446655, "grad_norm": 38.64269256591797, "learning_rate": 4.697358159051549e-07, "logits/chosen": 1.6229711771011353, "logits/rejected": 1.5281124114990234, "logps/chosen": -83.81607055664062, "logps/ref_chosen": -89.3046646118164, "logps/ref_rejected": -114.05778503417969, "logps/rejected": -111.87635803222656, "loss": 0.9342, "margin_dpo/margin_mean": 3.3071675300598145, "margin_dpo/margin_std": 4.0596442222595215, "step": 162 }, { "epoch": 0.24640967498110355, "fcm_dpo/beta": 0.23702430725097656, "fcm_dpo/delta": -0.014801152050495148, "fcm_dpo/margin": 2.5833659172058105, "fcm_dpo/q_t": 0.3794647753238678, "grad_norm": 36.25377655029297, "learning_rate": 4.691021444652876e-07, "logits/chosen": 1.65557861328125, "logits/rejected": 1.5559160709381104, "logps/chosen": -62.761451721191406, "logps/ref_chosen": -68.61222076416016, "logps/ref_rejected": -89.03155517578125, "logps/rejected": -85.76416015625, "loss": 1.0845, "margin_dpo/margin_mean": 2.5833659172058105, "margin_dpo/margin_std": 4.294673442840576, "step": 163 }, { "epoch": 0.24792139077853365, "fcm_dpo/beta": 0.22922706604003906, "fcm_dpo/delta": -0.16434648633003235, "fcm_dpo/margin": 3.281121253967285, "fcm_dpo/q_t": 0.3554421663284302, "grad_norm": 35.74726486206055, "learning_rate": 4.6846234426744624e-07, "logits/chosen": 1.4175546169281006, "logits/rejected": 1.2538635730743408, "logps/chosen": -67.86227416992188, "logps/ref_chosen": -73.55902862548828, "logps/ref_rejected": -94.16201782226562, "logps/rejected": -91.74636840820312, "loss": 1.0476, "margin_dpo/margin_mean": 3.281121253967285, "margin_dpo/margin_std": 4.952568054199219, "step": 164 }, { "epoch": 0.2494331065759637, "fcm_dpo/beta": 0.22541096806526184, "fcm_dpo/delta": -0.11093769967556, "fcm_dpo/margin": 3.116410970687866, "fcm_dpo/q_t": 0.361217737197876, "grad_norm": 34.11616134643555, "learning_rate": 4.678164332082175e-07, "logits/chosen": 1.893852710723877, "logits/rejected": 1.7336184978485107, "logps/chosen": -63.764854431152344, "logps/ref_chosen": -68.67132568359375, "logps/ref_rejected": -85.95689392089844, "logps/rejected": -84.16683197021484, "loss": 1.0469, "margin_dpo/margin_mean": 3.116410732269287, "margin_dpo/margin_std": 4.765564441680908, "step": 165 }, { "epoch": 0.2509448223733938, "fcm_dpo/beta": 0.2261592447757721, "fcm_dpo/delta": 0.14897161722183228, "fcm_dpo/margin": 2.025606632232666, "fcm_dpo/q_t": 0.4120250642299652, "grad_norm": 40.28575134277344, "learning_rate": 4.6716442935512214e-07, "logits/chosen": 1.6033815145492554, "logits/rejected": 1.3893630504608154, "logps/chosen": -76.40534973144531, "logps/ref_chosen": -80.89755249023438, "logps/ref_rejected": -111.91075134277344, "logps/rejected": -109.44416809082031, "loss": 1.191, "margin_dpo/margin_mean": 2.025606632232666, "margin_dpo/margin_std": 4.367696762084961, "step": 166 }, { "epoch": 0.25245653817082386, "fcm_dpo/beta": 0.22757622599601746, "fcm_dpo/delta": 0.010319948196411133, "fcm_dpo/margin": 2.5901713371276855, "fcm_dpo/q_t": 0.37613973021507263, "grad_norm": 33.78873062133789, "learning_rate": 4.6650635094610966e-07, "logits/chosen": 1.401757001876831, "logits/rejected": 1.3311492204666138, "logps/chosen": -71.38160705566406, "logps/ref_chosen": -76.73136138916016, "logps/ref_rejected": -92.57389068603516, "logps/rejected": -89.8143081665039, "loss": 1.0629, "margin_dpo/margin_mean": 2.5901713371276855, "margin_dpo/margin_std": 4.040890216827393, "step": 167 }, { "epoch": 0.25396825396825395, "fcm_dpo/beta": 0.23258665204048157, "fcm_dpo/delta": 0.11638811230659485, "fcm_dpo/margin": 2.1044440269470215, "fcm_dpo/q_t": 0.3990965485572815, "grad_norm": 35.83817672729492, "learning_rate": 4.6584221638904767e-07, "logits/chosen": 1.6169400215148926, "logits/rejected": 1.4897035360336304, "logps/chosen": -78.1120376586914, "logps/ref_chosen": -82.63671112060547, "logps/ref_rejected": -96.72691345214844, "logps/rejected": -94.30669403076172, "loss": 1.1093, "margin_dpo/margin_mean": 2.1044440269470215, "margin_dpo/margin_std": 3.624438762664795, "step": 168 }, { "epoch": 0.25547996976568405, "fcm_dpo/beta": 0.23403212428092957, "fcm_dpo/delta": -0.05251135677099228, "fcm_dpo/margin": 2.772446393966675, "fcm_dpo/q_t": 0.3757784366607666, "grad_norm": 39.18862533569336, "learning_rate": 4.651720442612075e-07, "logits/chosen": 1.5533463954925537, "logits/rejected": 1.540328025817871, "logps/chosen": -73.46492004394531, "logps/ref_chosen": -78.87673950195312, "logps/ref_rejected": -94.18919372558594, "logps/rejected": -91.54983520507812, "loss": 1.081, "margin_dpo/margin_mean": 2.772446393966675, "margin_dpo/margin_std": 4.667253017425537, "step": 169 }, { "epoch": 0.25699168556311414, "fcm_dpo/beta": 0.23770752549171448, "fcm_dpo/delta": 0.11538364738225937, "fcm_dpo/margin": 2.0562214851379395, "fcm_dpo/q_t": 0.4132143557071686, "grad_norm": 42.443660736083984, "learning_rate": 4.6449585330874425e-07, "logits/chosen": 1.6440541744232178, "logits/rejected": 1.6669882535934448, "logps/chosen": -68.31556701660156, "logps/ref_chosen": -73.35820007324219, "logps/ref_rejected": -76.85077667236328, "logps/rejected": -73.8643569946289, "loss": 1.317, "margin_dpo/margin_mean": 2.0562217235565186, "margin_dpo/margin_std": 5.564802169799805, "step": 170 }, { "epoch": 0.2585034013605442, "fcm_dpo/beta": 0.2314375638961792, "fcm_dpo/delta": -0.12175580114126205, "fcm_dpo/margin": 3.0714287757873535, "fcm_dpo/q_t": 0.35926738381385803, "grad_norm": 35.90308380126953, "learning_rate": 4.6381366244617224e-07, "logits/chosen": 1.6220589876174927, "logits/rejected": 1.4832584857940674, "logps/chosen": -75.58470153808594, "logps/ref_chosen": -80.4322738647461, "logps/ref_rejected": -96.99999237060547, "logps/rejected": -95.2238540649414, "loss": 1.0666, "margin_dpo/margin_mean": 3.071429491043091, "margin_dpo/margin_std": 4.930670738220215, "step": 171 }, { "epoch": 0.2600151171579743, "fcm_dpo/beta": 0.23240095376968384, "fcm_dpo/delta": -0.009122611954808235, "fcm_dpo/margin": 2.6176974773406982, "fcm_dpo/q_t": 0.38063037395477295, "grad_norm": 36.17333984375, "learning_rate": 4.631254907558365e-07, "logits/chosen": 1.692131757736206, "logits/rejected": 1.5869046449661255, "logps/chosen": -66.05056762695312, "logps/ref_chosen": -70.45406341552734, "logps/ref_rejected": -99.85603332519531, "logps/rejected": -98.07023620605469, "loss": 1.1094, "margin_dpo/margin_mean": 2.6176977157592773, "margin_dpo/margin_std": 4.624697685241699, "step": 172 }, { "epoch": 0.2615268329554044, "fcm_dpo/beta": 0.2278570830821991, "fcm_dpo/delta": -0.012191221117973328, "fcm_dpo/margin": 2.6660757064819336, "fcm_dpo/q_t": 0.38813263177871704, "grad_norm": 40.461055755615234, "learning_rate": 4.624313574873786e-07, "logits/chosen": 1.467531681060791, "logits/rejected": 1.2549827098846436, "logps/chosen": -67.7020034790039, "logps/ref_chosen": -72.15026092529297, "logps/ref_rejected": -94.10212707519531, "logps/rejected": -92.3199462890625, "loss": 1.1725, "margin_dpo/margin_mean": 2.6660757064819336, "margin_dpo/margin_std": 5.199055194854736, "step": 173 }, { "epoch": 0.26303854875283444, "fcm_dpo/beta": 0.22771653532981873, "fcm_dpo/delta": -0.06701561063528061, "fcm_dpo/margin": 2.9072351455688477, "fcm_dpo/q_t": 0.3727726936340332, "grad_norm": 39.59587860107422, "learning_rate": 4.61731282057198e-07, "logits/chosen": 1.9616103172302246, "logits/rejected": 1.7486342191696167, "logps/chosen": -71.55302429199219, "logps/ref_chosen": -75.99629211425781, "logps/ref_rejected": -106.2359619140625, "logps/rejected": -104.6999282836914, "loss": 1.1068, "margin_dpo/margin_mean": 2.9072351455688477, "margin_dpo/margin_std": 5.125247955322266, "step": 174 }, { "epoch": 0.26455026455026454, "fcm_dpo/beta": 0.22606691718101501, "fcm_dpo/delta": -0.037291742861270905, "fcm_dpo/margin": 2.8077917098999023, "fcm_dpo/q_t": 0.37847793102264404, "grad_norm": 39.049476623535156, "learning_rate": 4.6102528404790965e-07, "logits/chosen": 1.6430723667144775, "logits/rejected": 1.579434871673584, "logps/chosen": -80.18596649169922, "logps/ref_chosen": -84.51177978515625, "logps/ref_rejected": -104.46299743652344, "logps/rejected": -102.94497680664062, "loss": 1.1348, "margin_dpo/margin_mean": 2.807791233062744, "margin_dpo/margin_std": 5.241055488586426, "step": 175 }, { "epoch": 0.2660619803476946, "fcm_dpo/beta": 0.2272205799818039, "fcm_dpo/delta": 0.023860936984419823, "fcm_dpo/margin": 1.8057135343551636, "fcm_dpo/q_t": 0.4259718060493469, "grad_norm": 44.9836540222168, "learning_rate": 4.603133832077953e-07, "logits/chosen": 1.9060659408569336, "logits/rejected": 1.8190908432006836, "logps/chosen": -94.92770385742188, "logps/ref_chosen": -98.2034912109375, "logps/ref_rejected": -103.2023696899414, "logps/rejected": -101.7322998046875, "loss": 1.3075, "margin_dpo/margin_mean": 1.805713415145874, "margin_dpo/margin_std": 5.15474796295166, "step": 176 }, { "epoch": 0.2675736961451247, "fcm_dpo/beta": 0.2129383683204651, "fcm_dpo/delta": -0.4364026188850403, "fcm_dpo/margin": 4.662107467651367, "fcm_dpo/q_t": 0.3069148361682892, "grad_norm": 39.75569152832031, "learning_rate": 4.5959559945025183e-07, "logits/chosen": 2.090988874435425, "logits/rejected": 1.858922004699707, "logps/chosen": -72.60111999511719, "logps/ref_chosen": -78.029541015625, "logps/ref_rejected": -112.57099914550781, "logps/rejected": -111.8046875, "loss": 0.8663, "margin_dpo/margin_mean": 4.662107467651367, "margin_dpo/margin_std": 5.272144317626953, "step": 177 }, { "epoch": 0.2690854119425548, "fcm_dpo/beta": 0.20561328530311584, "fcm_dpo/delta": -0.012379378080368042, "fcm_dpo/margin": 2.9629063606262207, "fcm_dpo/q_t": 0.37169766426086426, "grad_norm": 29.65110969543457, "learning_rate": 4.588719528532341e-07, "logits/chosen": 1.4764043092727661, "logits/rejected": 1.3558849096298218, "logps/chosen": -74.66604614257812, "logps/ref_chosen": -79.48869323730469, "logps/ref_rejected": -96.62449645996094, "logps/rejected": -94.76475524902344, "loss": 1.0248, "margin_dpo/margin_mean": 2.9629063606262207, "margin_dpo/margin_std": 3.9983906745910645, "step": 178 }, { "epoch": 0.2705971277399849, "fcm_dpo/beta": 0.20993559062480927, "fcm_dpo/delta": 0.06102978438138962, "fcm_dpo/margin": 2.5824151039123535, "fcm_dpo/q_t": 0.3936367630958557, "grad_norm": 34.15560531616211, "learning_rate": 4.581424636586928e-07, "logits/chosen": 1.9131920337677002, "logits/rejected": 1.8513381481170654, "logps/chosen": -79.43586730957031, "logps/ref_chosen": -84.5088119506836, "logps/ref_rejected": -93.07945251464844, "logps/rejected": -90.58891296386719, "loss": 1.1693, "margin_dpo/margin_mean": 2.5824155807495117, "margin_dpo/margin_std": 5.296082019805908, "step": 179 }, { "epoch": 0.272108843537415, "fcm_dpo/beta": 0.2143281102180481, "fcm_dpo/delta": 0.08803573995828629, "fcm_dpo/margin": 2.4041786193847656, "fcm_dpo/q_t": 0.39963027834892273, "grad_norm": 33.341609954833984, "learning_rate": 4.5740715227200897e-07, "logits/chosen": 1.260475754737854, "logits/rejected": 1.1927244663238525, "logps/chosen": -69.06391906738281, "logps/ref_chosen": -74.5645523071289, "logps/ref_rejected": -81.02266693115234, "logps/rejected": -77.92620849609375, "loss": 1.1261, "margin_dpo/margin_mean": 2.4041786193847656, "margin_dpo/margin_std": 4.394153118133545, "step": 180 }, { "epoch": 0.273620559334845, "fcm_dpo/beta": 0.21226423978805542, "fcm_dpo/delta": -0.049738913774490356, "fcm_dpo/margin": 3.045619487762451, "fcm_dpo/q_t": 0.3686140775680542, "grad_norm": 33.64764404296875, "learning_rate": 4.566660392614228e-07, "logits/chosen": 1.569049596786499, "logits/rejected": 1.4539225101470947, "logps/chosen": -72.93116760253906, "logps/ref_chosen": -78.77166748046875, "logps/ref_rejected": -98.29750061035156, "logps/rejected": -95.50262451171875, "loss": 0.9918, "margin_dpo/margin_mean": 3.0456197261810303, "margin_dpo/margin_std": 4.077629089355469, "step": 181 }, { "epoch": 0.2751322751322751, "fcm_dpo/beta": 0.20631521940231323, "fcm_dpo/delta": -0.18685951828956604, "fcm_dpo/margin": 3.744624614715576, "fcm_dpo/q_t": 0.3533626198768616, "grad_norm": 34.31681823730469, "learning_rate": 4.5591914535745817e-07, "logits/chosen": 1.5861796140670776, "logits/rejected": 1.4125421047210693, "logps/chosen": -70.77108764648438, "logps/ref_chosen": -75.67765045166016, "logps/ref_rejected": -107.47894287109375, "logps/rejected": -106.31700134277344, "loss": 0.9945, "margin_dpo/margin_mean": 3.7446250915527344, "margin_dpo/margin_std": 5.412623882293701, "step": 182 }, { "epoch": 0.2766439909297052, "fcm_dpo/beta": 0.20775122940540314, "fcm_dpo/delta": 0.08841533213853836, "fcm_dpo/margin": 1.4735186100006104, "fcm_dpo/q_t": 0.4364347457885742, "grad_norm": 42.95817947387695, "learning_rate": 4.551664914523433e-07, "logits/chosen": 1.7009767293930054, "logits/rejected": 1.6352108716964722, "logps/chosen": -77.0204086303711, "logps/ref_chosen": -79.99969482421875, "logps/ref_rejected": -89.35220336914062, "logps/rejected": -87.846435546875, "loss": 1.2782, "margin_dpo/margin_mean": 1.4735193252563477, "margin_dpo/margin_std": 4.169427394866943, "step": 183 }, { "epoch": 0.2781557067271353, "fcm_dpo/beta": 0.2090856432914734, "fcm_dpo/delta": 0.06833438575267792, "fcm_dpo/margin": 2.560145854949951, "fcm_dpo/q_t": 0.3854142129421234, "grad_norm": 28.153854370117188, "learning_rate": 4.544080985994258e-07, "logits/chosen": 1.8167436122894287, "logits/rejected": 1.6645784378051758, "logps/chosen": -57.20887756347656, "logps/ref_chosen": -62.133941650390625, "logps/ref_rejected": -84.44404602050781, "logps/rejected": -82.07913208007812, "loss": 1.0415, "margin_dpo/margin_mean": 2.560145854949951, "margin_dpo/margin_std": 3.473294734954834, "step": 184 }, { "epoch": 0.2796674225245654, "fcm_dpo/beta": 0.20768845081329346, "fcm_dpo/delta": -0.040163375437259674, "fcm_dpo/margin": 3.062562942504883, "fcm_dpo/q_t": 0.37965598702430725, "grad_norm": 30.467384338378906, "learning_rate": 4.5364398801258394e-07, "logits/chosen": 1.7906408309936523, "logits/rejected": 1.663421630859375, "logps/chosen": -63.672325134277344, "logps/ref_chosen": -67.93174743652344, "logps/ref_rejected": -83.76744079589844, "logps/rejected": -82.57058715820312, "loss": 1.1426, "margin_dpo/margin_mean": 3.062563419342041, "margin_dpo/margin_std": 5.689934730529785, "step": 185 }, { "epoch": 0.2811791383219955, "fcm_dpo/beta": 0.20655421912670135, "fcm_dpo/delta": -0.05656307190656662, "fcm_dpo/margin": 3.1591484546661377, "fcm_dpo/q_t": 0.3779166340827942, "grad_norm": 34.896888732910156, "learning_rate": 4.5287418106563354e-07, "logits/chosen": 1.5623699426651, "logits/rejected": 1.4292669296264648, "logps/chosen": -81.7421875, "logps/ref_chosen": -86.22174072265625, "logps/ref_rejected": -100.42019653320312, "logps/rejected": -99.09979248046875, "loss": 1.1253, "margin_dpo/margin_mean": 3.159148693084717, "margin_dpo/margin_std": 5.729801177978516, "step": 186 }, { "epoch": 0.28269085411942557, "fcm_dpo/beta": 0.2069222331047058, "fcm_dpo/delta": 0.03436320275068283, "fcm_dpo/margin": 2.742424249649048, "fcm_dpo/q_t": 0.3854818046092987, "grad_norm": 39.628883361816406, "learning_rate": 4.520986992917297e-07, "logits/chosen": 1.7931580543518066, "logits/rejected": 1.6749093532562256, "logps/chosen": -89.49134826660156, "logps/ref_chosen": -92.81202697753906, "logps/ref_rejected": -117.28926086425781, "logps/rejected": -116.71101379394531, "loss": 1.1152, "margin_dpo/margin_mean": 2.7424240112304688, "margin_dpo/margin_std": 4.828680515289307, "step": 187 }, { "epoch": 0.2842025699168556, "fcm_dpo/beta": 0.20844730734825134, "fcm_dpo/delta": -0.04868890345096588, "fcm_dpo/margin": 3.0909528732299805, "fcm_dpo/q_t": 0.37605804204940796, "grad_norm": 34.80514144897461, "learning_rate": 4.5131756438276466e-07, "logits/chosen": 1.670656681060791, "logits/rejected": 1.5700781345367432, "logps/chosen": -83.94563293457031, "logps/ref_chosen": -87.85247802734375, "logps/ref_rejected": -94.58252716064453, "logps/rejected": -93.76663208007812, "loss": 1.0477, "margin_dpo/margin_mean": 3.0909528732299805, "margin_dpo/margin_std": 4.717261791229248, "step": 188 }, { "epoch": 0.2857142857142857, "fcm_dpo/beta": 0.20079335570335388, "fcm_dpo/delta": -0.12760794162750244, "fcm_dpo/margin": 2.3743178844451904, "fcm_dpo/q_t": 0.39784711599349976, "grad_norm": 41.234535217285156, "learning_rate": 4.5053079818876096e-07, "logits/chosen": 1.8904516696929932, "logits/rejected": 1.885125994682312, "logps/chosen": -90.63731384277344, "logps/ref_chosen": -95.00414276123047, "logps/ref_rejected": -90.50090789794922, "logps/rejected": -88.50839233398438, "loss": 1.2093, "margin_dpo/margin_mean": 2.3743185997009277, "margin_dpo/margin_std": 4.945883750915527, "step": 189 }, { "epoch": 0.2872260015117158, "fcm_dpo/beta": 0.19793203473091125, "fcm_dpo/delta": -0.07783995568752289, "fcm_dpo/margin": 3.3963685035705566, "fcm_dpo/q_t": 0.35779502987861633, "grad_norm": 36.98722839355469, "learning_rate": 4.4973842271726024e-07, "logits/chosen": 1.5879793167114258, "logits/rejected": 1.2592995166778564, "logps/chosen": -65.76041412353516, "logps/ref_chosen": -70.79264831542969, "logps/ref_rejected": -122.56155395507812, "logps/rejected": -120.9256820678711, "loss": 1.0021, "margin_dpo/margin_mean": 3.3963685035705566, "margin_dpo/margin_std": 4.595223903656006, "step": 190 }, { "epoch": 0.2887377173091459, "fcm_dpo/beta": 0.19986538589000702, "fcm_dpo/delta": 0.0998755544424057, "fcm_dpo/margin": 2.5279035568237305, "fcm_dpo/q_t": 0.3947795629501343, "grad_norm": 39.27770233154297, "learning_rate": 4.48940460132708e-07, "logits/chosen": 1.6662890911102295, "logits/rejected": 1.5767499208450317, "logps/chosen": -88.5599365234375, "logps/ref_chosen": -92.15048217773438, "logps/ref_rejected": -106.4153060913086, "logps/rejected": -105.3526611328125, "loss": 1.1413, "margin_dpo/margin_mean": 2.5279035568237305, "margin_dpo/margin_std": 4.721172332763672, "step": 191 }, { "epoch": 0.29024943310657597, "fcm_dpo/beta": 0.20867319405078888, "fcm_dpo/delta": 0.2078724503517151, "fcm_dpo/margin": 1.9151049852371216, "fcm_dpo/q_t": 0.41488662362098694, "grad_norm": 29.34225082397461, "learning_rate": 4.481369327558329e-07, "logits/chosen": 1.9284831285476685, "logits/rejected": 1.860489845275879, "logps/chosen": -65.33647155761719, "logps/ref_chosen": -69.51527404785156, "logps/ref_rejected": -80.15898132324219, "logps/rejected": -77.8952865600586, "loss": 1.2003, "margin_dpo/margin_mean": 1.915104866027832, "margin_dpo/margin_std": 4.322020053863525, "step": 192 }, { "epoch": 0.29176114890400606, "fcm_dpo/beta": 0.2067011594772339, "fcm_dpo/delta": -0.10582581162452698, "fcm_dpo/margin": 3.3781826496124268, "fcm_dpo/q_t": 0.3606608211994171, "grad_norm": 30.841073989868164, "learning_rate": 4.47327863063023e-07, "logits/chosen": 1.6029868125915527, "logits/rejected": 1.6146012544631958, "logps/chosen": -68.25808715820312, "logps/ref_chosen": -73.43276977539062, "logps/ref_rejected": -77.81238555908203, "logps/rejected": -76.01588439941406, "loss": 0.9987, "margin_dpo/margin_mean": 3.378182888031006, "margin_dpo/margin_std": 4.662139892578125, "step": 193 }, { "epoch": 0.29327286470143615, "fcm_dpo/beta": 0.21076488494873047, "fcm_dpo/delta": 0.19742809236049652, "fcm_dpo/margin": 1.9506275653839111, "fcm_dpo/q_t": 0.4201662838459015, "grad_norm": 39.32268524169922, "learning_rate": 4.4651327368569684e-07, "logits/chosen": 1.6824967861175537, "logits/rejected": 1.605659008026123, "logps/chosen": -72.58547973632812, "logps/ref_chosen": -76.63236999511719, "logps/ref_rejected": -85.67449188232422, "logps/rejected": -83.5782241821289, "loss": 1.27, "margin_dpo/margin_mean": 1.9506279230117798, "margin_dpo/margin_std": 5.051035404205322, "step": 194 }, { "epoch": 0.2947845804988662, "fcm_dpo/beta": 0.2124922275543213, "fcm_dpo/delta": -0.01660079136490822, "fcm_dpo/margin": 2.89408540725708, "fcm_dpo/q_t": 0.3722650408744812, "grad_norm": 33.66180419921875, "learning_rate": 4.4569318740967043e-07, "logits/chosen": 1.3596398830413818, "logits/rejected": 1.3858997821807861, "logps/chosen": -85.43913269042969, "logps/ref_chosen": -89.43354797363281, "logps/ref_rejected": -91.25908660888672, "logps/rejected": -90.15875244140625, "loss": 1.0395, "margin_dpo/margin_mean": 2.8940858840942383, "margin_dpo/margin_std": 4.30559778213501, "step": 195 }, { "epoch": 0.2962962962962963, "fcm_dpo/beta": 0.21977734565734863, "fcm_dpo/delta": 0.13098813593387604, "fcm_dpo/margin": 2.147923469543457, "fcm_dpo/q_t": 0.40691670775413513, "grad_norm": 36.275367736816406, "learning_rate": 4.448676271745197e-07, "logits/chosen": 1.6476595401763916, "logits/rejected": 1.5382771492004395, "logps/chosen": -70.8466796875, "logps/ref_chosen": -75.47528839111328, "logps/ref_rejected": -99.37582397460938, "logps/rejected": -96.8951416015625, "loss": 1.2002, "margin_dpo/margin_mean": 2.1479239463806152, "margin_dpo/margin_std": 4.784689903259277, "step": 196 }, { "epoch": 0.29780801209372637, "fcm_dpo/beta": 0.21895651519298553, "fcm_dpo/delta": -0.07098434120416641, "fcm_dpo/margin": 3.0373172760009766, "fcm_dpo/q_t": 0.3720216751098633, "grad_norm": 35.873863220214844, "learning_rate": 4.440366160729392e-07, "logits/chosen": 2.085923671722412, "logits/rejected": 1.9330120086669922, "logps/chosen": -62.252044677734375, "logps/ref_chosen": -67.57392883300781, "logps/ref_rejected": -89.97993469238281, "logps/rejected": -87.69536590576172, "loss": 1.1481, "margin_dpo/margin_mean": 3.0373175144195557, "margin_dpo/margin_std": 5.603744029998779, "step": 197 }, { "epoch": 0.29931972789115646, "fcm_dpo/beta": 0.21519571542739868, "fcm_dpo/delta": -0.08675536513328552, "fcm_dpo/margin": 3.1599316596984863, "fcm_dpo/q_t": 0.3605668544769287, "grad_norm": 31.00609016418457, "learning_rate": 4.432001773500957e-07, "logits/chosen": 1.671442985534668, "logits/rejected": 1.5708034038543701, "logps/chosen": -72.02076721191406, "logps/ref_chosen": -77.36013793945312, "logps/ref_rejected": -90.55670166015625, "logps/rejected": -88.37725830078125, "loss": 1.0148, "margin_dpo/margin_mean": 3.1599321365356445, "margin_dpo/margin_std": 4.4009599685668945, "step": 198 }, { "epoch": 0.30083144368858655, "fcm_dpo/beta": 0.21355471014976501, "fcm_dpo/delta": 0.0302957221865654, "fcm_dpo/margin": 2.675426483154297, "fcm_dpo/q_t": 0.38793349266052246, "grad_norm": 36.102745056152344, "learning_rate": 4.4235833440297856e-07, "logits/chosen": 1.812955379486084, "logits/rejected": 1.5753262042999268, "logps/chosen": -68.58213806152344, "logps/ref_chosen": -73.05004119873047, "logps/ref_rejected": -95.21923065185547, "logps/rejected": -93.42674255371094, "loss": 1.1725, "margin_dpo/margin_mean": 2.6754274368286133, "margin_dpo/margin_std": 5.355816841125488, "step": 199 }, { "epoch": 0.30234315948601664, "fcm_dpo/beta": 0.20989277958869934, "fcm_dpo/delta": -0.04891178011894226, "fcm_dpo/margin": 3.0649447441101074, "fcm_dpo/q_t": 0.37643855810165405, "grad_norm": 35.946041107177734, "learning_rate": 4.415111107797445e-07, "logits/chosen": 1.7141060829162598, "logits/rejected": 1.4877715110778809, "logps/chosen": -68.55535888671875, "logps/ref_chosen": -73.75833129882812, "logps/ref_rejected": -105.00157165527344, "logps/rejected": -102.86354064941406, "loss": 1.1388, "margin_dpo/margin_mean": 3.0649447441101074, "margin_dpo/margin_std": 5.666423797607422, "step": 200 }, { "epoch": 0.30234315948601664, "eval_fcm_dpo/beta": 0.21149146556854248, "eval_logits/chosen": 1.6652804613113403, "eval_logits/rejected": 1.545433521270752, "eval_logps/chosen": -81.7110595703125, "eval_logps/ref_chosen": -86.90177917480469, "eval_logps/ref_rejected": -96.69639587402344, "eval_logps/rejected": -94.4600830078125, "eval_loss": 0.5426459908485413, "eval_margin_dpo/margin_mean": 2.9544265270233154, "eval_margin_dpo/margin_std": 4.917666912078857, "eval_runtime": 42.298, "eval_samples_per_second": 54.447, "eval_steps_per_second": 1.702, "step": 200 }, { "epoch": 0.30385487528344673, "fcm_dpo/beta": 0.20943114161491394, "fcm_dpo/delta": -0.04685019701719284, "fcm_dpo/margin": 3.072869300842285, "fcm_dpo/q_t": 0.37180715799331665, "grad_norm": 35.43907165527344, "learning_rate": 4.4065853017905953e-07, "logits/chosen": 1.7999753952026367, "logits/rejected": 1.6574466228485107, "logps/chosen": -74.76203918457031, "logps/ref_chosen": -79.4841079711914, "logps/ref_rejected": -100.94435119628906, "logps/rejected": -99.29515838623047, "loss": 1.0374, "margin_dpo/margin_mean": 3.0728700160980225, "margin_dpo/margin_std": 4.596537113189697, "step": 201 }, { "epoch": 0.30536659108087677, "fcm_dpo/beta": 0.21286721527576447, "fcm_dpo/delta": 0.031130120158195496, "fcm_dpo/margin": 2.667759895324707, "fcm_dpo/q_t": 0.38877129554748535, "grad_norm": 36.30960464477539, "learning_rate": 4.3980061644943575e-07, "logits/chosen": 1.692112922668457, "logits/rejected": 1.505289077758789, "logps/chosen": -61.41923904418945, "logps/ref_chosen": -66.83952331542969, "logps/ref_rejected": -93.05116271972656, "logps/rejected": -90.29863739013672, "loss": 1.1288, "margin_dpo/margin_mean": 2.667759656906128, "margin_dpo/margin_std": 4.909058570861816, "step": 202 }, { "epoch": 0.30687830687830686, "fcm_dpo/beta": 0.20987369120121002, "fcm_dpo/delta": -0.04730100557208061, "fcm_dpo/margin": 3.0696792602539062, "fcm_dpo/q_t": 0.37272530794143677, "grad_norm": 35.791664123535156, "learning_rate": 4.3893739358856455e-07, "logits/chosen": 1.5090558528900146, "logits/rejected": 1.256967306137085, "logps/chosen": -75.16111755371094, "logps/ref_chosen": -80.32998657226562, "logps/ref_rejected": -113.52803039550781, "logps/rejected": -111.4288330078125, "loss": 1.0387, "margin_dpo/margin_mean": 3.069678783416748, "margin_dpo/margin_std": 4.734375476837158, "step": 203 }, { "epoch": 0.30839002267573695, "fcm_dpo/beta": 0.2047777771949768, "fcm_dpo/delta": -0.04267580807209015, "fcm_dpo/margin": 3.1055994033813477, "fcm_dpo/q_t": 0.37084197998046875, "grad_norm": 28.791095733642578, "learning_rate": 4.380688857426449e-07, "logits/chosen": 1.7406399250030518, "logits/rejected": 1.5678801536560059, "logps/chosen": -61.10273742675781, "logps/ref_chosen": -66.68875885009766, "logps/ref_rejected": -85.07585906982422, "logps/rejected": -82.59544372558594, "loss": 1.0277, "margin_dpo/margin_mean": 3.1055989265441895, "margin_dpo/margin_std": 4.362913131713867, "step": 204 }, { "epoch": 0.30990173847316704, "fcm_dpo/beta": 0.20836231112480164, "fcm_dpo/delta": 0.035259123891592026, "fcm_dpo/margin": 2.719717502593994, "fcm_dpo/q_t": 0.39517733454704285, "grad_norm": 38.341949462890625, "learning_rate": 4.3719511720570814e-07, "logits/chosen": 1.7649842500686646, "logits/rejected": 1.6390607357025146, "logps/chosen": -81.71599578857422, "logps/ref_chosen": -86.51950073242188, "logps/ref_rejected": -112.55376434326172, "logps/rejected": -110.469970703125, "loss": 1.1568, "margin_dpo/margin_mean": 2.719717502593994, "margin_dpo/margin_std": 5.38202428817749, "step": 205 }, { "epoch": 0.31141345427059713, "fcm_dpo/beta": 0.20770695805549622, "fcm_dpo/delta": -0.03976750001311302, "fcm_dpo/margin": 1.8963961601257324, "fcm_dpo/q_t": 0.42589449882507324, "grad_norm": 38.55003356933594, "learning_rate": 4.363161124189387e-07, "logits/chosen": 1.9415085315704346, "logits/rejected": 1.884864091873169, "logps/chosen": -83.91799926757812, "logps/ref_chosen": -88.68557739257812, "logps/ref_rejected": -97.75945281982422, "logps/rejected": -94.88827514648438, "loss": 1.2642, "margin_dpo/margin_mean": 1.896395206451416, "margin_dpo/margin_std": 4.907966613769531, "step": 206 }, { "epoch": 0.3129251700680272, "fcm_dpo/beta": 0.20486952364444733, "fcm_dpo/delta": -0.038567088544368744, "fcm_dpo/margin": 3.1027143001556396, "fcm_dpo/q_t": 0.37806349992752075, "grad_norm": 34.48740768432617, "learning_rate": 4.3543189596998986e-07, "logits/chosen": 1.5088391304016113, "logits/rejected": 1.2929071187973022, "logps/chosen": -81.11598205566406, "logps/ref_chosen": -85.12134552001953, "logps/ref_rejected": -103.34955596923828, "logps/rejected": -102.4468994140625, "loss": 1.0748, "margin_dpo/margin_mean": 3.1027140617370605, "margin_dpo/margin_std": 5.350442886352539, "step": 207 }, { "epoch": 0.3144368858654573, "fcm_dpo/beta": 0.21422894299030304, "fcm_dpo/delta": 0.26585763692855835, "fcm_dpo/margin": 1.5979573726654053, "fcm_dpo/q_t": 0.4325675666332245, "grad_norm": 38.53110122680664, "learning_rate": 4.3454249259229664e-07, "logits/chosen": 1.486476182937622, "logits/rejected": 1.47090482711792, "logps/chosen": -73.46044921875, "logps/ref_chosen": -78.84121704101562, "logps/ref_rejected": -89.82504272460938, "logps/rejected": -86.04224395751953, "loss": 1.3135, "margin_dpo/margin_mean": 1.5979571342468262, "margin_dpo/margin_std": 4.840599060058594, "step": 208 }, { "epoch": 0.31594860166288735, "fcm_dpo/beta": 0.2104741930961609, "fcm_dpo/delta": -0.22462649643421173, "fcm_dpo/margin": 3.8344333171844482, "fcm_dpo/q_t": 0.34945350885391235, "grad_norm": 33.83891677856445, "learning_rate": 4.336479271643833e-07, "logits/chosen": 1.7296254634857178, "logits/rejected": 1.6433987617492676, "logps/chosen": -80.73587799072266, "logps/ref_chosen": -85.98588562011719, "logps/ref_rejected": -107.1638412475586, "logps/rejected": -105.74826049804688, "loss": 0.9947, "margin_dpo/margin_mean": 3.834433078765869, "margin_dpo/margin_std": 5.514036178588867, "step": 209 }, { "epoch": 0.31746031746031744, "fcm_dpo/beta": 0.2026514708995819, "fcm_dpo/delta": -0.18983915448188782, "fcm_dpo/margin": 3.826395034790039, "fcm_dpo/q_t": 0.34678876399993896, "grad_norm": 29.725292205810547, "learning_rate": 4.327482247091679e-07, "logits/chosen": 1.8481976985931396, "logits/rejected": 1.6151676177978516, "logps/chosen": -66.71804809570312, "logps/ref_chosen": -71.75653076171875, "logps/ref_rejected": -102.47966003417969, "logps/rejected": -101.26757049560547, "loss": 0.9828, "margin_dpo/margin_mean": 3.826395034790039, "margin_dpo/margin_std": 5.108582496643066, "step": 210 }, { "epoch": 0.31897203325774753, "fcm_dpo/beta": 0.2011139690876007, "fcm_dpo/delta": 0.019685715436935425, "fcm_dpo/margin": 2.8868653774261475, "fcm_dpo/q_t": 0.38376328349113464, "grad_norm": 33.415435791015625, "learning_rate": 4.3184341039326217e-07, "logits/chosen": 1.8024325370788574, "logits/rejected": 1.5747759342193604, "logps/chosen": -66.08028411865234, "logps/ref_chosen": -70.95170593261719, "logps/ref_rejected": -108.51902770996094, "logps/rejected": -106.53446960449219, "loss": 1.0831, "margin_dpo/margin_mean": 2.8868651390075684, "margin_dpo/margin_std": 4.80080509185791, "step": 211 }, { "epoch": 0.3204837490551776, "fcm_dpo/beta": 0.1954108476638794, "fcm_dpo/delta": -0.1490822583436966, "fcm_dpo/margin": 3.775815486907959, "fcm_dpo/q_t": 0.35547029972076416, "grad_norm": 30.241546630859375, "learning_rate": 4.309335095262675e-07, "logits/chosen": 1.6221773624420166, "logits/rejected": 1.4874701499938965, "logps/chosen": -69.07429504394531, "logps/ref_chosen": -74.34010314941406, "logps/ref_rejected": -97.58259582519531, "logps/rejected": -96.09260559082031, "loss": 1.0283, "margin_dpo/margin_mean": 3.7758164405822754, "margin_dpo/margin_std": 5.671681880950928, "step": 212 }, { "epoch": 0.3219954648526077, "fcm_dpo/beta": 0.193147212266922, "fcm_dpo/delta": -0.013596100732684135, "fcm_dpo/margin": 3.1711630821228027, "fcm_dpo/q_t": 0.3848028779029846, "grad_norm": 32.83842468261719, "learning_rate": 4.3001854756006724e-07, "logits/chosen": 1.5679874420166016, "logits/rejected": 1.579345703125, "logps/chosen": -74.82106018066406, "logps/ref_chosen": -80.2526626586914, "logps/ref_rejected": -94.76947021484375, "logps/rejected": -92.509033203125, "loss": 1.1348, "margin_dpo/margin_mean": 3.171164035797119, "margin_dpo/margin_std": 5.987710475921631, "step": 213 }, { "epoch": 0.3235071806500378, "fcm_dpo/beta": 0.1943490356206894, "fcm_dpo/delta": -0.005742315202951431, "fcm_dpo/margin": 3.1131887435913086, "fcm_dpo/q_t": 0.3851265609264374, "grad_norm": 33.79700469970703, "learning_rate": 4.290985500881143e-07, "logits/chosen": 1.2866859436035156, "logits/rejected": 1.23015296459198, "logps/chosen": -72.68575286865234, "logps/ref_chosen": -77.9675064086914, "logps/ref_rejected": -84.0354232788086, "logps/rejected": -81.86685943603516, "loss": 1.0903, "margin_dpo/margin_mean": 3.1131887435913086, "margin_dpo/margin_std": 5.462100028991699, "step": 214 }, { "epoch": 0.3250188964474679, "fcm_dpo/beta": 0.19066134095191956, "fcm_dpo/delta": -0.197215735912323, "fcm_dpo/margin": 4.093410968780518, "fcm_dpo/q_t": 0.34744980931282043, "grad_norm": 29.585453033447266, "learning_rate": 4.281735428447157e-07, "logits/chosen": 1.447283148765564, "logits/rejected": 1.229423999786377, "logps/chosen": -76.76492309570312, "logps/ref_chosen": -81.2047348022461, "logps/ref_rejected": -116.18414306640625, "logps/rejected": -115.83775329589844, "loss": 0.968, "margin_dpo/margin_mean": 4.093410491943359, "margin_dpo/margin_std": 5.116345405578613, "step": 215 }, { "epoch": 0.32653061224489793, "fcm_dpo/beta": 0.18577197194099426, "fcm_dpo/delta": -0.024033507332205772, "fcm_dpo/margin": 3.35071063041687, "fcm_dpo/q_t": 0.3735596537590027, "grad_norm": 28.57142448425293, "learning_rate": 4.2724355170431247e-07, "logits/chosen": 1.929776668548584, "logits/rejected": 1.7076435089111328, "logps/chosen": -79.36236572265625, "logps/ref_chosen": -83.57113647460938, "logps/ref_rejected": -112.51902770996094, "logps/rejected": -111.66097259521484, "loss": 1.0691, "margin_dpo/margin_mean": 3.35071063041687, "margin_dpo/margin_std": 5.3623809814453125, "step": 216 }, { "epoch": 0.328042328042328, "fcm_dpo/beta": 0.18335673213005066, "fcm_dpo/delta": -0.05656471848487854, "fcm_dpo/margin": 3.5603256225585938, "fcm_dpo/q_t": 0.37649548053741455, "grad_norm": 30.940200805664062, "learning_rate": 4.26308602680756e-07, "logits/chosen": 1.5567448139190674, "logits/rejected": 1.3418993949890137, "logps/chosen": -73.3405532836914, "logps/ref_chosen": -77.01390075683594, "logps/ref_rejected": -105.28099822998047, "logps/rejected": -105.16797637939453, "loss": 1.0697, "margin_dpo/margin_mean": 3.5603256225585938, "margin_dpo/margin_std": 5.963696479797363, "step": 217 }, { "epoch": 0.3295540438397581, "fcm_dpo/beta": 0.18211647868156433, "fcm_dpo/delta": -0.04763578251004219, "fcm_dpo/margin": 2.4268999099731445, "fcm_dpo/q_t": 0.4111635684967041, "grad_norm": 32.71650695800781, "learning_rate": 4.253687219265803e-07, "logits/chosen": 1.4811632633209229, "logits/rejected": 1.4756027460098267, "logps/chosen": -88.89375305175781, "logps/ref_chosen": -92.47299194335938, "logps/ref_rejected": -92.80751037597656, "logps/rejected": -91.65516662597656, "loss": 1.2178, "margin_dpo/margin_mean": 2.4268996715545654, "margin_dpo/margin_std": 5.39958381652832, "step": 218 }, { "epoch": 0.3310657596371882, "fcm_dpo/beta": 0.18171855807304382, "fcm_dpo/delta": 0.020053904503583908, "fcm_dpo/margin": 3.1978893280029297, "fcm_dpo/q_t": 0.37985914945602417, "grad_norm": 26.802236557006836, "learning_rate": 4.2442393573227043e-07, "logits/chosen": 1.4625592231750488, "logits/rejected": 1.3691678047180176, "logps/chosen": -72.9026107788086, "logps/ref_chosen": -77.10382080078125, "logps/ref_rejected": -92.3438949584961, "logps/rejected": -91.340576171875, "loss": 1.0694, "margin_dpo/margin_mean": 3.1978893280029297, "margin_dpo/margin_std": 5.0218329429626465, "step": 219 }, { "epoch": 0.3325774754346183, "fcm_dpo/beta": 0.18513716757297516, "fcm_dpo/delta": 0.10643023252487183, "fcm_dpo/margin": 2.6936841011047363, "fcm_dpo/q_t": 0.4018252193927765, "grad_norm": 28.29593276977539, "learning_rate": 4.234742705255272e-07, "logits/chosen": 1.977401614189148, "logits/rejected": 1.7988307476043701, "logps/chosen": -57.971031188964844, "logps/ref_chosen": -62.48021697998047, "logps/ref_rejected": -86.93276977539062, "logps/rejected": -85.11727905273438, "loss": 1.1588, "margin_dpo/margin_mean": 2.6936841011047363, "margin_dpo/margin_std": 5.318660736083984, "step": 220 }, { "epoch": 0.3340891912320484, "fcm_dpo/beta": 0.18827053904533386, "fcm_dpo/delta": 0.02966354787349701, "fcm_dpo/margin": 3.030022621154785, "fcm_dpo/q_t": 0.3889802098274231, "grad_norm": 30.431005477905273, "learning_rate": 4.22519752870528e-07, "logits/chosen": 1.6791030168533325, "logits/rejected": 1.4852240085601807, "logps/chosen": -73.93135070800781, "logps/ref_chosen": -78.35491943359375, "logps/ref_rejected": -108.17631530761719, "logps/rejected": -106.78276062011719, "loss": 1.1445, "margin_dpo/margin_mean": 3.0300216674804688, "margin_dpo/margin_std": 5.741293907165527, "step": 221 }, { "epoch": 0.3356009070294785, "fcm_dpo/beta": 0.18445804715156555, "fcm_dpo/delta": -0.12642233073711395, "fcm_dpo/margin": 3.889547824859619, "fcm_dpo/q_t": 0.35349398851394653, "grad_norm": 31.85706901550293, "learning_rate": 4.2156040946718343e-07, "logits/chosen": 1.816218614578247, "logits/rejected": 1.5741527080535889, "logps/chosen": -72.87997436523438, "logps/ref_chosen": -77.2734375, "logps/ref_rejected": -126.41007995605469, "logps/rejected": -125.90616607666016, "loss": 0.9677, "margin_dpo/margin_mean": 3.889547824859619, "margin_dpo/margin_std": 4.93218994140625, "step": 222 }, { "epoch": 0.3371126228269085, "fcm_dpo/beta": 0.17852336168289185, "fcm_dpo/delta": -0.14979343116283417, "fcm_dpo/margin": 4.138674736022949, "fcm_dpo/q_t": 0.34866607189178467, "grad_norm": 25.545076370239258, "learning_rate": 4.2059626715039065e-07, "logits/chosen": 1.600868582725525, "logits/rejected": 1.468353271484375, "logps/chosen": -73.9285888671875, "logps/ref_chosen": -78.4210205078125, "logps/ref_rejected": -101.38420867919922, "logps/rejected": -101.03044128417969, "loss": 0.9532, "margin_dpo/margin_mean": 4.138675212860107, "margin_dpo/margin_std": 5.194621562957764, "step": 223 }, { "epoch": 0.3386243386243386, "fcm_dpo/beta": 0.17954044044017792, "fcm_dpo/delta": 0.09770029783248901, "fcm_dpo/margin": 2.82529878616333, "fcm_dpo/q_t": 0.3936260938644409, "grad_norm": 29.668907165527344, "learning_rate": 4.1962735288928304e-07, "logits/chosen": 1.5408947467803955, "logits/rejected": 1.482640266418457, "logps/chosen": -75.03809356689453, "logps/ref_chosen": -79.36337280273438, "logps/ref_rejected": -89.99789428710938, "logps/rejected": -88.4979248046875, "loss": 1.0829, "margin_dpo/margin_mean": 2.825299024581909, "margin_dpo/margin_std": 4.494820594787598, "step": 224 }, { "epoch": 0.3401360544217687, "fcm_dpo/beta": 0.17720326781272888, "fcm_dpo/delta": -0.056610286235809326, "fcm_dpo/margin": 3.671389102935791, "fcm_dpo/q_t": 0.3757237493991852, "grad_norm": 35.15827178955078, "learning_rate": 4.186536937864752e-07, "logits/chosen": 1.5901364088058472, "logits/rejected": 1.3371665477752686, "logps/chosen": -85.0064926147461, "logps/ref_chosen": -88.99606323242188, "logps/ref_rejected": -127.55032348632812, "logps/rejected": -127.23213195800781, "loss": 1.0867, "margin_dpo/margin_mean": 3.671389579772949, "margin_dpo/margin_std": 6.1652045249938965, "step": 225 }, { "epoch": 0.3416477702191988, "fcm_dpo/beta": 0.17827850580215454, "fcm_dpo/delta": 0.0006794985383749008, "fcm_dpo/margin": 3.361865520477295, "fcm_dpo/q_t": 0.37807855010032654, "grad_norm": 24.475175857543945, "learning_rate": 4.176753170773052e-07, "logits/chosen": 1.7302067279815674, "logits/rejected": 1.627681851387024, "logps/chosen": -63.755279541015625, "logps/ref_chosen": -68.68444061279297, "logps/ref_rejected": -85.81898498535156, "logps/rejected": -84.2516860961914, "loss": 1.0853, "margin_dpo/margin_mean": 3.361865520477295, "margin_dpo/margin_std": 5.56801700592041, "step": 226 }, { "epoch": 0.3431594860166289, "fcm_dpo/beta": 0.17725418508052826, "fcm_dpo/delta": -0.0032851658761501312, "fcm_dpo/margin": 3.3981308937072754, "fcm_dpo/q_t": 0.38432079553604126, "grad_norm": 30.37522315979004, "learning_rate": 4.166922501290729e-07, "logits/chosen": 2.125030755996704, "logits/rejected": 2.0093131065368652, "logps/chosen": -68.0997314453125, "logps/ref_chosen": -72.52029418945312, "logps/ref_rejected": -90.7720718383789, "logps/rejected": -89.7496337890625, "loss": 1.1434, "margin_dpo/margin_mean": 3.3981308937072754, "margin_dpo/margin_std": 6.397700786590576, "step": 227 }, { "epoch": 0.34467120181405897, "fcm_dpo/beta": 0.18030789494514465, "fcm_dpo/delta": 0.051092106848955154, "fcm_dpo/margin": 3.057624340057373, "fcm_dpo/q_t": 0.38950926065444946, "grad_norm": 27.90323829650879, "learning_rate": 4.1570452044027405e-07, "logits/chosen": 1.6655057668685913, "logits/rejected": 1.5038576126098633, "logps/chosen": -67.99946594238281, "logps/ref_chosen": -72.23167419433594, "logps/ref_rejected": -95.45873260498047, "logps/rejected": -94.28414154052734, "loss": 1.1318, "margin_dpo/margin_mean": 3.0576257705688477, "margin_dpo/margin_std": 5.6507768630981445, "step": 228 }, { "epoch": 0.34618291761148906, "fcm_dpo/beta": 0.1783168613910675, "fcm_dpo/delta": -0.05871783569455147, "fcm_dpo/margin": 3.6718177795410156, "fcm_dpo/q_t": 0.3637602925300598, "grad_norm": 26.114145278930664, "learning_rate": 4.147121556398312e-07, "logits/chosen": 2.333543300628662, "logits/rejected": 2.095942735671997, "logps/chosen": -61.12850570678711, "logps/ref_chosen": -66.88822174072266, "logps/ref_rejected": -92.27890014648438, "logps/rejected": -90.19100189208984, "loss": 1.0181, "margin_dpo/margin_mean": 3.6718177795410156, "margin_dpo/margin_std": 5.082923889160156, "step": 229 }, { "epoch": 0.3476946334089191, "fcm_dpo/beta": 0.18222709000110626, "fcm_dpo/delta": 0.08974069356918335, "fcm_dpo/margin": 2.8103251457214355, "fcm_dpo/q_t": 0.39406752586364746, "grad_norm": 32.12981033325195, "learning_rate": 4.137151834863213e-07, "logits/chosen": 1.4846326112747192, "logits/rejected": 1.4764800071716309, "logps/chosen": -71.39359283447266, "logps/ref_chosen": -76.12332153320312, "logps/ref_rejected": -78.19171905517578, "logps/rejected": -76.27232360839844, "loss": 1.1493, "margin_dpo/margin_mean": 2.810325860977173, "margin_dpo/margin_std": 5.281257629394531, "step": 230 }, { "epoch": 0.3492063492063492, "fcm_dpo/beta": 0.17589285969734192, "fcm_dpo/delta": -0.14354144036769867, "fcm_dpo/margin": 4.152382850646973, "fcm_dpo/q_t": 0.3555976152420044, "grad_norm": 30.2910213470459, "learning_rate": 4.1271363186719835e-07, "logits/chosen": 1.2955679893493652, "logits/rejected": 1.2993032932281494, "logps/chosen": -88.24240112304688, "logps/ref_chosen": -92.45181274414062, "logps/ref_rejected": -100.89735412597656, "logps/rejected": -100.84032440185547, "loss": 1.0081, "margin_dpo/margin_mean": 4.152383327484131, "margin_dpo/margin_std": 5.8546953201293945, "step": 231 }, { "epoch": 0.3507180650037793, "fcm_dpo/beta": 0.1787184178829193, "fcm_dpo/delta": 0.10779862105846405, "fcm_dpo/margin": 2.7846970558166504, "fcm_dpo/q_t": 0.40889689326286316, "grad_norm": 100.12737274169922, "learning_rate": 4.1170752879801436e-07, "logits/chosen": 1.4190423488616943, "logits/rejected": 1.359769582748413, "logps/chosen": -82.397705078125, "logps/ref_chosen": -86.75383758544922, "logps/ref_rejected": -98.16909790039062, "logps/rejected": -96.59767150878906, "loss": 1.2198, "margin_dpo/margin_mean": 2.7846975326538086, "margin_dpo/margin_std": 6.417873382568359, "step": 232 }, { "epoch": 0.35222978080120937, "fcm_dpo/beta": 0.1750573068857193, "fcm_dpo/delta": -0.13857866823673248, "fcm_dpo/margin": 3.1586740016937256, "fcm_dpo/q_t": 0.39157694578170776, "grad_norm": 27.844707489013672, "learning_rate": 4.106969024216348e-07, "logits/chosen": 1.5601751804351807, "logits/rejected": 1.4629740715026855, "logps/chosen": -68.05751037597656, "logps/ref_chosen": -72.87556457519531, "logps/ref_rejected": -85.22943115234375, "logps/rejected": -83.57006072998047, "loss": 1.1361, "margin_dpo/margin_mean": 3.1586740016937256, "margin_dpo/margin_std": 5.672341823577881, "step": 233 }, { "epoch": 0.35374149659863946, "fcm_dpo/beta": 0.17893540859222412, "fcm_dpo/delta": 0.08269474655389786, "fcm_dpo/margin": 2.900754928588867, "fcm_dpo/q_t": 0.3984100818634033, "grad_norm": 27.9986572265625, "learning_rate": 4.09681781007452e-07, "logits/chosen": 1.29150390625, "logits/rejected": 1.2452890872955322, "logps/chosen": -65.02719116210938, "logps/ref_chosen": -70.05477905273438, "logps/ref_rejected": -68.7240982055664, "logps/rejected": -66.59725952148438, "loss": 1.1605, "margin_dpo/margin_mean": 2.900754928588867, "margin_dpo/margin_std": 5.591994285583496, "step": 234 }, { "epoch": 0.35525321239606955, "fcm_dpo/beta": 0.17522048950195312, "fcm_dpo/delta": -0.09907099604606628, "fcm_dpo/margin": 3.9499454498291016, "fcm_dpo/q_t": 0.3530880808830261, "grad_norm": 29.631473541259766, "learning_rate": 4.08662192950594e-07, "logits/chosen": 1.7008306980133057, "logits/rejected": 1.671209454536438, "logps/chosen": -79.98178100585938, "logps/ref_chosen": -85.86051940917969, "logps/ref_rejected": -96.14663696289062, "logps/rejected": -94.21785736083984, "loss": 0.9612, "margin_dpo/margin_mean": 3.9499454498291016, "margin_dpo/margin_std": 4.817038536071777, "step": 235 }, { "epoch": 0.35676492819349964, "fcm_dpo/beta": 0.1744535267353058, "fcm_dpo/delta": 0.011955919675529003, "fcm_dpo/margin": 3.374511957168579, "fcm_dpo/q_t": 0.3849853277206421, "grad_norm": 30.669490814208984, "learning_rate": 4.076381667711306e-07, "logits/chosen": 1.6705328226089478, "logits/rejected": 1.653887391090393, "logps/chosen": -85.20165252685547, "logps/ref_chosen": -89.75252532958984, "logps/ref_rejected": -99.28534698486328, "logps/rejected": -98.1089859008789, "loss": 1.1536, "margin_dpo/margin_mean": 3.3745126724243164, "margin_dpo/margin_std": 6.521100997924805, "step": 236 }, { "epoch": 0.35827664399092973, "fcm_dpo/beta": 0.1793350875377655, "fcm_dpo/delta": 0.18251191079616547, "fcm_dpo/margin": 2.3729090690612793, "fcm_dpo/q_t": 0.41561537981033325, "grad_norm": 34.04213333129883, "learning_rate": 4.066097311132753e-07, "logits/chosen": 1.594843864440918, "logits/rejected": 1.5914448499679565, "logps/chosen": -87.67994689941406, "logps/ref_chosen": -92.59001922607422, "logps/ref_rejected": -101.45584869384766, "logps/rejected": -98.91868591308594, "loss": 1.2014, "margin_dpo/margin_mean": 2.3729095458984375, "margin_dpo/margin_std": 5.1990275382995605, "step": 237 }, { "epoch": 0.35978835978835977, "fcm_dpo/beta": 0.17949533462524414, "fcm_dpo/delta": -0.03761757165193558, "fcm_dpo/margin": 3.535294532775879, "fcm_dpo/q_t": 0.37026742100715637, "grad_norm": 28.87019920349121, "learning_rate": 4.0557691474458414e-07, "logits/chosen": 1.5214321613311768, "logits/rejected": 1.505321741104126, "logps/chosen": -76.80465698242188, "logps/ref_chosen": -82.2470474243164, "logps/ref_rejected": -92.59944152832031, "logps/rejected": -90.69235229492188, "loss": 1.059, "margin_dpo/margin_mean": 3.535294532775879, "margin_dpo/margin_std": 5.499220848083496, "step": 238 }, { "epoch": 0.36130007558578986, "fcm_dpo/beta": 0.18097104132175446, "fcm_dpo/delta": -0.004759851843118668, "fcm_dpo/margin": 3.3366780281066895, "fcm_dpo/q_t": 0.3874804973602295, "grad_norm": 32.175743103027344, "learning_rate": 4.045397465551513e-07, "logits/chosen": 1.8428244590759277, "logits/rejected": 1.5413553714752197, "logps/chosen": -70.64430236816406, "logps/ref_chosen": -75.30878448486328, "logps/ref_rejected": -131.2318115234375, "logps/rejected": -129.90402221679688, "loss": 1.1283, "margin_dpo/margin_mean": 3.336677074432373, "margin_dpo/margin_std": 6.204185485839844, "step": 239 }, { "epoch": 0.36281179138321995, "fcm_dpo/beta": 0.17442013323307037, "fcm_dpo/delta": -0.2626558840274811, "fcm_dpo/margin": 4.82325553894043, "fcm_dpo/q_t": 0.3299955725669861, "grad_norm": 29.32863998413086, "learning_rate": 4.0349825555680045e-07, "logits/chosen": 1.6912193298339844, "logits/rejected": 1.4668397903442383, "logps/chosen": -65.2503890991211, "logps/ref_chosen": -70.81785583496094, "logps/ref_rejected": -98.53778076171875, "logps/rejected": -97.7935791015625, "loss": 0.9305, "margin_dpo/margin_mean": 4.82325553894043, "margin_dpo/margin_std": 5.806119918823242, "step": 240 }, { "epoch": 0.36432350718065004, "fcm_dpo/beta": 0.1754496693611145, "fcm_dpo/delta": 0.17503008246421814, "fcm_dpo/margin": 2.46356201171875, "fcm_dpo/q_t": 0.4143379032611847, "grad_norm": 33.23362350463867, "learning_rate": 4.0245247088227377e-07, "logits/chosen": 1.521575927734375, "logits/rejected": 1.4731206893920898, "logps/chosen": -83.33875274658203, "logps/ref_chosen": -88.60260772705078, "logps/ref_rejected": -101.42214965820312, "logps/rejected": -98.62185668945312, "loss": 1.1913, "margin_dpo/margin_mean": 2.463561773300171, "margin_dpo/margin_std": 5.311173915863037, "step": 241 }, { "epoch": 0.36583522297808013, "fcm_dpo/beta": 0.17090430855751038, "fcm_dpo/delta": -0.17562828958034515, "fcm_dpo/margin": 4.446490287780762, "fcm_dpo/q_t": 0.35152992606163025, "grad_norm": 24.115978240966797, "learning_rate": 4.0140242178441665e-07, "logits/chosen": 1.3632407188415527, "logits/rejected": 1.2794369459152222, "logps/chosen": -71.06964111328125, "logps/ref_chosen": -77.34110260009766, "logps/ref_rejected": -84.76332092285156, "logps/rejected": -82.93833923339844, "loss": 0.9778, "margin_dpo/margin_mean": 4.446490287780762, "margin_dpo/margin_std": 6.0980224609375, "step": 242 }, { "epoch": 0.3673469387755102, "fcm_dpo/beta": 0.17107948660850525, "fcm_dpo/delta": 0.049044981598854065, "fcm_dpo/margin": 3.235016345977783, "fcm_dpo/q_t": 0.3897736668586731, "grad_norm": 32.52568435668945, "learning_rate": 4.003481376353596e-07, "logits/chosen": 1.3836629390716553, "logits/rejected": 1.3904385566711426, "logps/chosen": -88.62066650390625, "logps/ref_chosen": -93.55897521972656, "logps/ref_rejected": -89.33551025390625, "logps/rejected": -87.63221740722656, "loss": 1.1273, "margin_dpo/margin_mean": 3.235015869140625, "margin_dpo/margin_std": 5.809148788452148, "step": 243 }, { "epoch": 0.3688586545729403, "fcm_dpo/beta": 0.16387493908405304, "fcm_dpo/delta": -0.33298128843307495, "fcm_dpo/margin": 5.507326126098633, "fcm_dpo/q_t": 0.31379491090774536, "grad_norm": 22.067636489868164, "learning_rate": 3.9928964792569654e-07, "logits/chosen": 1.7233500480651855, "logits/rejected": 1.5986864566802979, "logps/chosen": -64.07341003417969, "logps/ref_chosen": -69.82603454589844, "logps/ref_rejected": -92.4764175415039, "logps/rejected": -92.23110961914062, "loss": 0.8217, "margin_dpo/margin_mean": 5.507327079772949, "margin_dpo/margin_std": 5.298586368560791, "step": 244 }, { "epoch": 0.37037037037037035, "fcm_dpo/beta": 0.1566339135169983, "fcm_dpo/delta": -0.18002939224243164, "fcm_dpo/margin": 4.888873100280762, "fcm_dpo/q_t": 0.34525537490844727, "grad_norm": 27.70292854309082, "learning_rate": 3.982269822636601e-07, "logits/chosen": 1.7034671306610107, "logits/rejected": 1.6361165046691895, "logps/chosen": -81.09748077392578, "logps/ref_chosen": -85.68216705322266, "logps/ref_rejected": -93.8754653930664, "logps/rejected": -94.17964935302734, "loss": 0.9552, "margin_dpo/margin_mean": 4.888873100280762, "margin_dpo/margin_std": 6.235966682434082, "step": 245 }, { "epoch": 0.37188208616780044, "fcm_dpo/beta": 0.1533387005329132, "fcm_dpo/delta": -0.1032537966966629, "fcm_dpo/margin": 4.539944171905518, "fcm_dpo/q_t": 0.3663942813873291, "grad_norm": 28.609737396240234, "learning_rate": 3.971601703742932e-07, "logits/chosen": 1.8069227933883667, "logits/rejected": 1.666886568069458, "logps/chosen": -86.67765808105469, "logps/ref_chosen": -90.05093383789062, "logps/ref_rejected": -112.77645874023438, "logps/rejected": -113.94313049316406, "loss": 1.034, "margin_dpo/margin_mean": 4.539945602416992, "margin_dpo/margin_std": 6.906374931335449, "step": 246 }, { "epoch": 0.37339380196523053, "fcm_dpo/beta": 0.15819424390792847, "fcm_dpo/delta": 0.22064730525016785, "fcm_dpo/margin": 2.442718744277954, "fcm_dpo/q_t": 0.42223188281059265, "grad_norm": 33.092018127441406, "learning_rate": 3.960892420986177e-07, "logits/chosen": 1.6968116760253906, "logits/rejected": 1.646969199180603, "logps/chosen": -100.31169128417969, "logps/ref_chosen": -103.23979187011719, "logps/ref_rejected": -105.26278686523438, "logps/rejected": -104.77740478515625, "loss": 1.2204, "margin_dpo/margin_mean": 2.442718505859375, "margin_dpo/margin_std": 5.771693706512451, "step": 247 }, { "epoch": 0.3749055177626606, "fcm_dpo/beta": 0.16033056378364563, "fcm_dpo/delta": -0.012121915817260742, "fcm_dpo/margin": 3.8047213554382324, "fcm_dpo/q_t": 0.38406720757484436, "grad_norm": 31.485231399536133, "learning_rate": 3.9501422739279953e-07, "logits/chosen": 1.509279489517212, "logits/rejected": 1.620393991470337, "logps/chosen": -84.99481201171875, "logps/ref_chosen": -88.16007995605469, "logps/ref_rejected": -75.11514282226562, "logps/rejected": -75.75459289550781, "loss": 1.111, "margin_dpo/margin_mean": 3.804720878601074, "margin_dpo/margin_std": 6.728028297424316, "step": 248 }, { "epoch": 0.3764172335600907, "fcm_dpo/beta": 0.16030041873455048, "fcm_dpo/delta": 0.09141341596841812, "fcm_dpo/margin": 1.8059356212615967, "fcm_dpo/q_t": 0.4365463852882385, "grad_norm": 39.32426834106445, "learning_rate": 3.9393515632731094e-07, "logits/chosen": 1.5910958051681519, "logits/rejected": 1.6389684677124023, "logps/chosen": -89.02435302734375, "logps/ref_chosen": -91.01773071289062, "logps/ref_rejected": -80.51113891601562, "logps/rejected": -80.32369995117188, "loss": 1.3894, "margin_dpo/margin_mean": 1.8059358596801758, "margin_dpo/margin_std": 6.863600730895996, "step": 249 }, { "epoch": 0.3779289493575208, "fcm_dpo/beta": 0.15786859393119812, "fcm_dpo/delta": -0.2350025177001953, "fcm_dpo/margin": 5.169163703918457, "fcm_dpo/q_t": 0.33603614568710327, "grad_norm": 26.052709579467773, "learning_rate": 3.9285205908608934e-07, "logits/chosen": 1.9258992671966553, "logits/rejected": 1.8700491189956665, "logps/chosen": -78.1419906616211, "logps/ref_chosen": -80.5888671875, "logps/ref_rejected": -90.15093994140625, "logps/rejected": -92.87322998046875, "loss": 0.9754, "margin_dpo/margin_mean": 5.169164657592773, "margin_dpo/margin_std": 7.048128128051758, "step": 250 }, { "epoch": 0.3794406651549509, "fcm_dpo/beta": 0.15806275606155396, "fcm_dpo/delta": 0.16336648166179657, "fcm_dpo/margin": 2.808767318725586, "fcm_dpo/q_t": 0.4145629405975342, "grad_norm": 28.555532455444336, "learning_rate": 3.9176496596569265e-07, "logits/chosen": 1.852798581123352, "logits/rejected": 1.7349350452423096, "logps/chosen": -80.37120056152344, "logps/ref_chosen": -82.70405578613281, "logps/ref_rejected": -98.94266510009766, "logps/rejected": -99.4185791015625, "loss": 1.1979, "margin_dpo/margin_mean": 2.8087668418884277, "margin_dpo/margin_std": 6.254486560821533, "step": 251 }, { "epoch": 0.38095238095238093, "fcm_dpo/beta": 0.16371968388557434, "fcm_dpo/delta": 0.10678450763225555, "fcm_dpo/margin": 3.0254108905792236, "fcm_dpo/q_t": 0.39587944746017456, "grad_norm": 26.863725662231445, "learning_rate": 3.9067390737445254e-07, "logits/chosen": 1.3596301078796387, "logits/rejected": 1.255225658416748, "logps/chosen": -70.09324645996094, "logps/ref_chosen": -73.10369110107422, "logps/ref_rejected": -94.90235900878906, "logps/rejected": -94.91732788085938, "loss": 1.1854, "margin_dpo/margin_mean": 3.0254111289978027, "margin_dpo/margin_std": 6.294610977172852, "step": 252 }, { "epoch": 0.382464096749811, "fcm_dpo/beta": 0.16050489246845245, "fcm_dpo/delta": -0.07427267730236053, "fcm_dpo/margin": 2.915503978729248, "fcm_dpo/q_t": 0.4101312458515167, "grad_norm": 25.899141311645508, "learning_rate": 3.8957891383162304e-07, "logits/chosen": 1.8655691146850586, "logits/rejected": 1.7491161823272705, "logps/chosen": -66.2563705444336, "logps/ref_chosen": -68.7789535522461, "logps/ref_rejected": -75.98162078857422, "logps/rejected": -76.37454223632812, "loss": 1.1667, "margin_dpo/margin_mean": 2.915503978729248, "margin_dpo/margin_std": 5.7905473709106445, "step": 253 }, { "epoch": 0.3839758125472411, "fcm_dpo/beta": 0.15999382734298706, "fcm_dpo/delta": 0.009141262620687485, "fcm_dpo/margin": 3.6941099166870117, "fcm_dpo/q_t": 0.38594502210617065, "grad_norm": 27.95285987854004, "learning_rate": 3.884800159665276e-07, "logits/chosen": 1.5770944356918335, "logits/rejected": 1.4472434520721436, "logps/chosen": -79.24081420898438, "logps/ref_chosen": -81.49362182617188, "logps/ref_rejected": -101.43672943115234, "logps/rejected": -102.8780288696289, "loss": 1.0868, "margin_dpo/margin_mean": 3.69411039352417, "margin_dpo/margin_std": 6.211980819702148, "step": 254 }, { "epoch": 0.3854875283446712, "fcm_dpo/beta": 0.15869814157485962, "fcm_dpo/delta": -0.039540208876132965, "fcm_dpo/margin": 4.0058794021606445, "fcm_dpo/q_t": 0.36750900745391846, "grad_norm": 28.362796783447266, "learning_rate": 3.873772445177015e-07, "logits/chosen": 1.6912145614624023, "logits/rejected": 1.6456090211868286, "logps/chosen": -87.90896606445312, "logps/ref_chosen": -90.46351623535156, "logps/ref_rejected": -105.32445526123047, "logps/rejected": -106.7757797241211, "loss": 1.0774, "margin_dpo/margin_mean": 4.0058794021606445, "margin_dpo/margin_std": 6.450137138366699, "step": 255 }, { "epoch": 0.3869992441421013, "fcm_dpo/beta": 0.15828856825828552, "fcm_dpo/delta": -0.008787527680397034, "fcm_dpo/margin": 3.8371100425720215, "fcm_dpo/q_t": 0.38170868158340454, "grad_norm": 29.053625106811523, "learning_rate": 3.862706303320329e-07, "logits/chosen": 1.3574330806732178, "logits/rejected": 1.1794304847717285, "logps/chosen": -79.84109497070312, "logps/ref_chosen": -81.56578063964844, "logps/ref_rejected": -108.58460998535156, "logps/rejected": -110.69703674316406, "loss": 1.096, "margin_dpo/margin_mean": 3.8371100425720215, "margin_dpo/margin_std": 6.507552146911621, "step": 256 }, { "epoch": 0.3885109599395314, "fcm_dpo/beta": 0.15910214185714722, "fcm_dpo/delta": 0.007843798026442528, "fcm_dpo/margin": 3.7243387699127197, "fcm_dpo/q_t": 0.3876420557498932, "grad_norm": 34.42729187011719, "learning_rate": 3.851602043638994e-07, "logits/chosen": 1.7406425476074219, "logits/rejected": 1.6441729068756104, "logps/chosen": -88.56779479980469, "logps/ref_chosen": -89.57557678222656, "logps/ref_rejected": -123.74462127685547, "logps/rejected": -126.461181640625, "loss": 1.1465, "margin_dpo/margin_mean": 3.724337577819824, "margin_dpo/margin_std": 7.162275314331055, "step": 257 }, { "epoch": 0.3900226757369615, "fcm_dpo/beta": 0.1584070920944214, "fcm_dpo/delta": -0.07847022265195847, "fcm_dpo/margin": 4.249208450317383, "fcm_dpo/q_t": 0.35672086477279663, "grad_norm": 26.64623260498047, "learning_rate": 3.840459976743023e-07, "logits/chosen": 1.6348203420639038, "logits/rejected": 1.4954969882965088, "logps/chosen": -76.1566162109375, "logps/ref_chosen": -77.34173583984375, "logps/ref_rejected": -99.5709228515625, "logps/rejected": -102.63500213623047, "loss": 0.9596, "margin_dpo/margin_mean": 4.249208927154541, "margin_dpo/margin_std": 5.020096778869629, "step": 258 }, { "epoch": 0.3915343915343915, "fcm_dpo/beta": 0.14894232153892517, "fcm_dpo/delta": -0.3066738247871399, "fcm_dpo/margin": 5.885484218597412, "fcm_dpo/q_t": 0.3259732127189636, "grad_norm": 23.831703186035156, "learning_rate": 3.8292804142999796e-07, "logits/chosen": 1.3765077590942383, "logits/rejected": 1.2210215330123901, "logps/chosen": -79.07723999023438, "logps/ref_chosen": -82.39556121826172, "logps/ref_rejected": -113.73309326171875, "logps/rejected": -116.30026245117188, "loss": 0.9218, "margin_dpo/margin_mean": 5.88548469543457, "margin_dpo/margin_std": 6.962657928466797, "step": 259 }, { "epoch": 0.3930461073318216, "fcm_dpo/beta": 0.14742638170719147, "fcm_dpo/delta": 0.024370871484279633, "fcm_dpo/margin": 3.9111266136169434, "fcm_dpo/q_t": 0.3883991539478302, "grad_norm": 30.209863662719727, "learning_rate": 3.818063669026256e-07, "logits/chosen": 1.6762707233428955, "logits/rejected": 1.4572563171386719, "logps/chosen": -64.32984924316406, "logps/ref_chosen": -65.98947143554688, "logps/ref_rejected": -94.59706115722656, "logps/rejected": -96.84855651855469, "loss": 1.1689, "margin_dpo/margin_mean": 3.911125659942627, "margin_dpo/margin_std": 7.731362342834473, "step": 260 }, { "epoch": 0.3945578231292517, "fcm_dpo/beta": 0.15125837922096252, "fcm_dpo/delta": 0.15498995780944824, "fcm_dpo/margin": 2.9902381896972656, "fcm_dpo/q_t": 0.41241246461868286, "grad_norm": 33.53489303588867, "learning_rate": 3.806810054678331e-07, "logits/chosen": 1.4755849838256836, "logits/rejected": 1.5271053314208984, "logps/chosen": -86.73929595947266, "logps/ref_chosen": -88.87684631347656, "logps/ref_rejected": -82.34838104248047, "logps/rejected": -83.2010726928711, "loss": 1.2115, "margin_dpo/margin_mean": 2.9902381896972656, "margin_dpo/margin_std": 6.766839981079102, "step": 261 }, { "epoch": 0.3960695389266818, "fcm_dpo/beta": 0.1537524312734604, "fcm_dpo/delta": -0.01379317045211792, "fcm_dpo/margin": 3.981868028640747, "fcm_dpo/q_t": 0.3742169737815857, "grad_norm": 25.462480545043945, "learning_rate": 3.7955198860439887e-07, "logits/chosen": 1.4629535675048828, "logits/rejected": 1.3017804622650146, "logps/chosen": -82.5713882446289, "logps/ref_chosen": -85.81719970703125, "logps/ref_rejected": -105.49027252197266, "logps/rejected": -106.22633361816406, "loss": 1.0262, "margin_dpo/margin_mean": 3.981867790222168, "margin_dpo/margin_std": 5.544529438018799, "step": 262 }, { "epoch": 0.3975812547241119, "fcm_dpo/beta": 0.15217387676239014, "fcm_dpo/delta": -0.018349166959524155, "fcm_dpo/margin": 4.05593204498291, "fcm_dpo/q_t": 0.37988507747650146, "grad_norm": 27.196908950805664, "learning_rate": 3.784193478933516e-07, "logits/chosen": 1.6040055751800537, "logits/rejected": 1.326271891593933, "logps/chosen": -70.52960205078125, "logps/ref_chosen": -73.61693572998047, "logps/ref_rejected": -102.39161682128906, "logps/rejected": -103.36022186279297, "loss": 1.0854, "margin_dpo/margin_mean": 4.055931568145752, "margin_dpo/margin_std": 6.913456916809082, "step": 263 }, { "epoch": 0.39909297052154197, "fcm_dpo/beta": 0.15037304162979126, "fcm_dpo/delta": -0.07774695008993149, "fcm_dpo/margin": 4.472409248352051, "fcm_dpo/q_t": 0.3686443567276001, "grad_norm": 26.572608947753906, "learning_rate": 3.7728311501708674e-07, "logits/chosen": 1.420727014541626, "logits/rejected": 1.28560209274292, "logps/chosen": -99.06858825683594, "logps/ref_chosen": -101.57856750488281, "logps/ref_rejected": -111.65735626220703, "logps/rejected": -113.61978149414062, "loss": 1.0112, "margin_dpo/margin_mean": 4.472408294677734, "margin_dpo/margin_std": 6.513485431671143, "step": 264 }, { "epoch": 0.40060468631897206, "fcm_dpo/beta": 0.14593267440795898, "fcm_dpo/delta": -0.22615569829940796, "fcm_dpo/margin": 5.539245128631592, "fcm_dpo/q_t": 0.3435715138912201, "grad_norm": 22.93355369567871, "learning_rate": 3.7614332175848027e-07, "logits/chosen": 2.0376992225646973, "logits/rejected": 1.8995461463928223, "logps/chosen": -61.875823974609375, "logps/ref_chosen": -65.76426696777344, "logps/ref_rejected": -85.19627380371094, "logps/rejected": -86.84707641601562, "loss": 0.9984, "margin_dpo/margin_mean": 5.53924560546875, "margin_dpo/margin_std": 7.727536201477051, "step": 265 }, { "epoch": 0.4021164021164021, "fcm_dpo/beta": 0.1404722034931183, "fcm_dpo/delta": -0.09249762445688248, "fcm_dpo/margin": 4.878218650817871, "fcm_dpo/q_t": 0.3628734350204468, "grad_norm": 24.192852020263672, "learning_rate": 3.75e-07, "logits/chosen": 1.521256446838379, "logits/rejected": 1.3532524108886719, "logps/chosen": -70.55802917480469, "logps/ref_chosen": -75.05682373046875, "logps/ref_rejected": -97.52758026123047, "logps/rejected": -97.90699768066406, "loss": 1.0127, "margin_dpo/margin_mean": 4.878218650817871, "margin_dpo/margin_std": 7.106654167175293, "step": 266 }, { "epoch": 0.4036281179138322, "fcm_dpo/beta": 0.14268219470977783, "fcm_dpo/delta": 0.010663837194442749, "fcm_dpo/margin": 4.10806941986084, "fcm_dpo/q_t": 0.3781440854072571, "grad_norm": 21.69527816772461, "learning_rate": 3.738531817228131e-07, "logits/chosen": 1.7143603563308716, "logits/rejected": 1.6460785865783691, "logps/chosen": -66.24651336669922, "logps/ref_chosen": -71.13494110107422, "logps/ref_rejected": -81.14566040039062, "logps/rejected": -80.36531066894531, "loss": 1.0651, "margin_dpo/margin_mean": 4.10806941986084, "margin_dpo/margin_std": 6.094596862792969, "step": 267 }, { "epoch": 0.4051398337112623, "fcm_dpo/beta": 0.1408797800540924, "fcm_dpo/delta": 0.005832695867866278, "fcm_dpo/margin": 2.7348809242248535, "fcm_dpo/q_t": 0.4266659617424011, "grad_norm": 24.759931564331055, "learning_rate": 3.7270289900589204e-07, "logits/chosen": 1.4918147325515747, "logits/rejected": 1.4511568546295166, "logps/chosen": -76.04736328125, "logps/ref_chosen": -80.06082153320312, "logps/ref_rejected": -87.43035888671875, "logps/rejected": -86.15179443359375, "loss": 1.2272, "margin_dpo/margin_mean": 2.7348811626434326, "margin_dpo/margin_std": 6.584110260009766, "step": 268 }, { "epoch": 0.40665154950869237, "fcm_dpo/beta": 0.1431499421596527, "fcm_dpo/delta": 0.009387940168380737, "fcm_dpo/margin": 4.102551460266113, "fcm_dpo/q_t": 0.3775489330291748, "grad_norm": 24.901914596557617, "learning_rate": 3.7154918402511714e-07, "logits/chosen": 1.9551441669464111, "logits/rejected": 1.888648271560669, "logps/chosen": -79.94139099121094, "logps/ref_chosen": -83.36944580078125, "logps/ref_rejected": -100.66839599609375, "logps/rejected": -101.3428955078125, "loss": 1.0698, "margin_dpo/margin_mean": 4.102551460266113, "margin_dpo/margin_std": 6.241857051849365, "step": 269 }, { "epoch": 0.40816326530612246, "fcm_dpo/beta": 0.14368438720703125, "fcm_dpo/delta": 0.10129906237125397, "fcm_dpo/margin": 3.5041966438293457, "fcm_dpo/q_t": 0.3959196209907532, "grad_norm": 26.79942512512207, "learning_rate": 3.7039206905237656e-07, "logits/chosen": 1.8443632125854492, "logits/rejected": 1.643599510192871, "logps/chosen": -81.50987243652344, "logps/ref_chosen": -85.35945129394531, "logps/ref_rejected": -104.47489929199219, "logps/rejected": -104.1295166015625, "loss": 1.1234, "margin_dpo/margin_mean": 3.5041966438293457, "margin_dpo/margin_std": 6.335942268371582, "step": 270 }, { "epoch": 0.40967498110355255, "fcm_dpo/beta": 0.1482258141040802, "fcm_dpo/delta": 0.21132034063339233, "fcm_dpo/margin": 2.6820480823516846, "fcm_dpo/q_t": 0.43086880445480347, "grad_norm": 29.933801651000977, "learning_rate": 3.692315864546635e-07, "logits/chosen": 1.7451214790344238, "logits/rejected": 1.6185526847839355, "logps/chosen": -82.49104309082031, "logps/ref_chosen": -86.01373291015625, "logps/ref_rejected": -109.99561309814453, "logps/rejected": -109.15498352050781, "loss": 1.2796, "margin_dpo/margin_mean": 2.6820485591888428, "margin_dpo/margin_std": 7.379421710968018, "step": 271 }, { "epoch": 0.41118669690098264, "fcm_dpo/beta": 0.1463513821363449, "fcm_dpo/delta": -0.1709228903055191, "fcm_dpo/margin": 5.177058219909668, "fcm_dpo/q_t": 0.3447331190109253, "grad_norm": 22.9885311126709, "learning_rate": 3.6806776869317067e-07, "logits/chosen": 1.8490626811981201, "logits/rejected": 1.8614492416381836, "logps/chosen": -81.42835998535156, "logps/ref_chosen": -86.37013244628906, "logps/ref_rejected": -85.74638366699219, "logps/rejected": -85.98165893554688, "loss": 0.9331, "margin_dpo/margin_mean": 5.177058219909668, "margin_dpo/margin_std": 6.183077812194824, "step": 272 }, { "epoch": 0.4126984126984127, "fcm_dpo/beta": 0.14383597671985626, "fcm_dpo/delta": -0.0414503775537014, "fcm_dpo/margin": 4.438119888305664, "fcm_dpo/q_t": 0.36971086263656616, "grad_norm": 25.183574676513672, "learning_rate": 3.669006483223828e-07, "logits/chosen": 1.6745984554290771, "logits/rejected": 1.5456163883209229, "logps/chosen": -71.47685241699219, "logps/ref_chosen": -75.51087951660156, "logps/ref_rejected": -101.60345458984375, "logps/rejected": -102.0075454711914, "loss": 1.0868, "margin_dpo/margin_mean": 4.438118934631348, "margin_dpo/margin_std": 7.2522125244140625, "step": 273 }, { "epoch": 0.41421012849584277, "fcm_dpo/beta": 0.14348173141479492, "fcm_dpo/delta": -0.04204811155796051, "fcm_dpo/margin": 4.4560394287109375, "fcm_dpo/q_t": 0.3708673417568207, "grad_norm": 22.110204696655273, "learning_rate": 3.657302579891656e-07, "logits/chosen": 1.7263871431350708, "logits/rejected": 1.6659188270568848, "logps/chosen": -74.85958862304688, "logps/ref_chosen": -79.040283203125, "logps/ref_rejected": -86.31329345703125, "logps/rejected": -86.58863830566406, "loss": 1.0231, "margin_dpo/margin_mean": 4.4560394287109375, "margin_dpo/margin_std": 6.352434158325195, "step": 274 }, { "epoch": 0.41572184429327286, "fcm_dpo/beta": 0.1401127278804779, "fcm_dpo/delta": -0.17330898344516754, "fcm_dpo/margin": 5.427064895629883, "fcm_dpo/q_t": 0.3469482362270355, "grad_norm": 21.600595474243164, "learning_rate": 3.645566304318526e-07, "logits/chosen": 1.4458909034729004, "logits/rejected": 1.246681809425354, "logps/chosen": -66.42031860351562, "logps/ref_chosen": -71.82034301757812, "logps/ref_rejected": -94.29946899414062, "logps/rejected": -94.32649993896484, "loss": 0.9368, "margin_dpo/margin_mean": 5.427064895629883, "margin_dpo/margin_std": 6.502839088439941, "step": 275 }, { "epoch": 0.41723356009070295, "fcm_dpo/beta": 0.13682496547698975, "fcm_dpo/delta": -0.036286476999521255, "fcm_dpo/margin": 4.631248950958252, "fcm_dpo/q_t": 0.370879590511322, "grad_norm": 24.544862747192383, "learning_rate": 3.633797984793294e-07, "logits/chosen": 1.3585480451583862, "logits/rejected": 1.2991361618041992, "logps/chosen": -64.3580093383789, "logps/ref_chosen": -69.54020690917969, "logps/ref_rejected": -78.59674072265625, "logps/rejected": -78.04579162597656, "loss": 1.0346, "margin_dpo/margin_mean": 4.631248950958252, "margin_dpo/margin_std": 6.801164150238037, "step": 276 }, { "epoch": 0.41874527588813304, "fcm_dpo/beta": 0.1429087072610855, "fcm_dpo/delta": 0.2666781544685364, "fcm_dpo/margin": 2.3913447856903076, "fcm_dpo/q_t": 0.4346947968006134, "grad_norm": 27.025981903076172, "learning_rate": 3.6219979505011555e-07, "logits/chosen": 1.641129493713379, "logits/rejected": 1.712536334991455, "logps/chosen": -91.08503723144531, "logps/ref_chosen": -94.4896240234375, "logps/ref_rejected": -85.45901489257812, "logps/rejected": -84.44577026367188, "loss": 1.2882, "margin_dpo/margin_mean": 2.391343593597412, "margin_dpo/margin_std": 6.963379383087158, "step": 277 }, { "epoch": 0.42025699168556313, "fcm_dpo/beta": 0.14845089614391327, "fcm_dpo/delta": 0.09349919855594635, "fcm_dpo/margin": 3.414287805557251, "fcm_dpo/q_t": 0.3993656039237976, "grad_norm": 29.298189163208008, "learning_rate": 3.6101665315144353e-07, "logits/chosen": 1.4798262119293213, "logits/rejected": 1.3288986682891846, "logps/chosen": -83.59101867675781, "logps/ref_chosen": -87.42613220214844, "logps/ref_rejected": -105.44854736328125, "logps/rejected": -105.02772521972656, "loss": 1.171, "margin_dpo/margin_mean": 3.414287805557251, "margin_dpo/margin_std": 6.732444763183594, "step": 278 }, { "epoch": 0.4217687074829932, "fcm_dpo/beta": 0.14359894394874573, "fcm_dpo/delta": -0.208085834980011, "fcm_dpo/margin": 5.515895843505859, "fcm_dpo/q_t": 0.3363301753997803, "grad_norm": 22.904470443725586, "learning_rate": 3.5983040587833563e-07, "logits/chosen": 1.4755373001098633, "logits/rejected": 1.4106316566467285, "logps/chosen": -64.2370376586914, "logps/ref_chosen": -70.516845703125, "logps/ref_rejected": -86.04249572753906, "logps/rejected": -85.27857971191406, "loss": 0.9183, "margin_dpo/margin_mean": 5.515895843505859, "margin_dpo/margin_std": 6.264064788818359, "step": 279 }, { "epoch": 0.42328042328042326, "fcm_dpo/beta": 0.13692662119865417, "fcm_dpo/delta": -0.20576292276382446, "fcm_dpo/margin": 5.765023231506348, "fcm_dpo/q_t": 0.33472245931625366, "grad_norm": 27.43783187866211, "learning_rate": 3.586410864126781e-07, "logits/chosen": 1.6306326389312744, "logits/rejected": 1.507961392402649, "logps/chosen": -70.88343048095703, "logps/ref_chosen": -76.5021743774414, "logps/ref_rejected": -94.2752685546875, "logps/rejected": -94.42154693603516, "loss": 0.8887, "margin_dpo/margin_mean": 5.765023231506348, "margin_dpo/margin_std": 6.170578479766846, "step": 280 }, { "epoch": 0.42479213907785335, "fcm_dpo/beta": 0.13551339507102966, "fcm_dpo/delta": 0.006325826048851013, "fcm_dpo/margin": 4.383538246154785, "fcm_dpo/q_t": 0.37640178203582764, "grad_norm": 20.854692459106445, "learning_rate": 3.574487280222929e-07, "logits/chosen": 1.4427279233932495, "logits/rejected": 1.467543125152588, "logps/chosen": -72.43865203857422, "logps/ref_chosen": -77.50468444824219, "logps/ref_rejected": -79.05717468261719, "logps/rejected": -78.37466430664062, "loss": 1.0394, "margin_dpo/margin_mean": 4.383538246154785, "margin_dpo/margin_std": 6.361909866333008, "step": 281 }, { "epoch": 0.42630385487528344, "fcm_dpo/beta": 0.1353539526462555, "fcm_dpo/delta": -0.06713277101516724, "fcm_dpo/margin": 4.891935348510742, "fcm_dpo/q_t": 0.3712444305419922, "grad_norm": 24.80596923828125, "learning_rate": 3.562533640600075e-07, "logits/chosen": 1.26842200756073, "logits/rejected": 1.1664509773254395, "logps/chosen": -75.95979309082031, "logps/ref_chosen": -80.31298065185547, "logps/ref_rejected": -83.72120666503906, "logps/rejected": -84.25996398925781, "loss": 1.032, "margin_dpo/margin_mean": 4.891935348510742, "margin_dpo/margin_std": 7.308884143829346, "step": 282 }, { "epoch": 0.42781557067271353, "fcm_dpo/beta": 0.1342248022556305, "fcm_dpo/delta": -5.543231964111328e-06, "fcm_dpo/margin": 4.469372749328613, "fcm_dpo/q_t": 0.3756641745567322, "grad_norm": 24.568660736083984, "learning_rate": 3.550550279627215e-07, "logits/chosen": 1.5680984258651733, "logits/rejected": 1.2567392587661743, "logps/chosen": -77.70895385742188, "logps/ref_chosen": -80.72602844238281, "logps/ref_rejected": -115.68379211425781, "logps/rejected": -117.13607788085938, "loss": 1.0686, "margin_dpo/margin_mean": 4.46937370300293, "margin_dpo/margin_std": 7.047473907470703, "step": 283 }, { "epoch": 0.4293272864701436, "fcm_dpo/beta": 0.13157621026039124, "fcm_dpo/delta": -0.11532597243785858, "fcm_dpo/margin": 5.374138355255127, "fcm_dpo/q_t": 0.35533052682876587, "grad_norm": 21.152599334716797, "learning_rate": 3.5385375325047163e-07, "logits/chosen": 1.6199060678482056, "logits/rejected": 1.474593162536621, "logps/chosen": -73.30044555664062, "logps/ref_chosen": -77.5223388671875, "logps/ref_rejected": -104.1847152709961, "logps/rejected": -105.33695983886719, "loss": 0.9637, "margin_dpo/margin_mean": 5.374138832092285, "margin_dpo/margin_std": 6.845863342285156, "step": 284 }, { "epoch": 0.4308390022675737, "fcm_dpo/beta": 0.13484027981758118, "fcm_dpo/delta": 0.1905381679534912, "fcm_dpo/margin": 3.093123435974121, "fcm_dpo/q_t": 0.4164922833442688, "grad_norm": 28.799518585205078, "learning_rate": 3.5264957352549375e-07, "logits/chosen": 1.525662899017334, "logits/rejected": 1.44883394241333, "logps/chosen": -84.68502807617188, "logps/ref_chosen": -85.79348754882812, "logps/ref_rejected": -96.46463775634766, "logps/rejected": -98.44929504394531, "loss": 1.2172, "margin_dpo/margin_mean": 3.0931224822998047, "margin_dpo/margin_std": 7.149640083312988, "step": 285 }, { "epoch": 0.4323507180650038, "fcm_dpo/beta": 0.1331361085176468, "fcm_dpo/delta": -0.12348408997058868, "fcm_dpo/margin": 5.364745140075684, "fcm_dpo/q_t": 0.35463234782218933, "grad_norm": 24.2138729095459, "learning_rate": 3.514425224712835e-07, "logits/chosen": 1.406418800354004, "logits/rejected": 1.1762328147888184, "logps/chosen": -75.9451904296875, "logps/ref_chosen": -77.86268615722656, "logps/ref_rejected": -110.77134704589844, "logps/rejected": -114.21859741210938, "loss": 0.9968, "margin_dpo/margin_mean": 5.364744186401367, "margin_dpo/margin_std": 7.451730728149414, "step": 286 }, { "epoch": 0.43386243386243384, "fcm_dpo/beta": 0.12852045893669128, "fcm_dpo/delta": -0.29661697149276733, "fcm_dpo/margin": 6.779278755187988, "fcm_dpo/q_t": 0.32936036586761475, "grad_norm": 23.7078800201416, "learning_rate": 3.502326338516534e-07, "logits/chosen": 1.394890546798706, "logits/rejected": 1.3782275915145874, "logps/chosen": -59.10296630859375, "logps/ref_chosen": -62.552825927734375, "logps/ref_rejected": -77.7650146484375, "logps/rejected": -81.09442901611328, "loss": 0.9124, "margin_dpo/margin_mean": 6.779278755187988, "margin_dpo/margin_std": 7.917557239532471, "step": 287 }, { "epoch": 0.43537414965986393, "fcm_dpo/beta": 0.1267382800579071, "fcm_dpo/delta": 0.1060422956943512, "fcm_dpo/margin": 3.940197229385376, "fcm_dpo/q_t": 0.40103963017463684, "grad_norm": 25.170812606811523, "learning_rate": 3.490199415097892e-07, "logits/chosen": 1.2241320610046387, "logits/rejected": 1.1363778114318848, "logps/chosen": -82.68171691894531, "logps/ref_chosen": -83.74117279052734, "logps/ref_rejected": -106.93913269042969, "logps/rejected": -109.81986999511719, "loss": 1.152, "margin_dpo/margin_mean": 3.9401965141296387, "margin_dpo/margin_std": 7.708506107330322, "step": 288 }, { "epoch": 0.436885865457294, "fcm_dpo/beta": 0.1263837367296219, "fcm_dpo/delta": -0.06516540050506592, "fcm_dpo/margin": 5.2287468910217285, "fcm_dpo/q_t": 0.36657577753067017, "grad_norm": 18.498565673828125, "learning_rate": 3.4780447936730247e-07, "logits/chosen": 1.5653696060180664, "logits/rejected": 1.5069622993469238, "logps/chosen": -71.78709411621094, "logps/ref_chosen": -73.04204559326172, "logps/ref_rejected": -88.07904052734375, "logps/rejected": -92.05284118652344, "loss": 1.0073, "margin_dpo/margin_mean": 5.22874641418457, "margin_dpo/margin_std": 7.315879821777344, "step": 289 }, { "epoch": 0.4383975812547241, "fcm_dpo/beta": 0.12540464103221893, "fcm_dpo/delta": 0.0063613057136535645, "fcm_dpo/margin": 4.731854438781738, "fcm_dpo/q_t": 0.37520450353622437, "grad_norm": 22.365402221679688, "learning_rate": 3.465862814232821e-07, "logits/chosen": 1.7237253189086914, "logits/rejected": 1.6590449810028076, "logps/chosen": -79.21012115478516, "logps/ref_chosen": -78.60614013671875, "logps/ref_rejected": -108.50082397460938, "logps/rejected": -113.836669921875, "loss": 1.0361, "margin_dpo/margin_mean": 4.73185396194458, "margin_dpo/margin_std": 6.734312057495117, "step": 290 }, { "epoch": 0.4399092970521542, "fcm_dpo/beta": 0.12606967985630035, "fcm_dpo/delta": -0.09047486633062363, "fcm_dpo/margin": 5.4125800132751465, "fcm_dpo/q_t": 0.3635619580745697, "grad_norm": 20.724634170532227, "learning_rate": 3.4536538175334343e-07, "logits/chosen": 1.6329586505889893, "logits/rejected": 1.4041390419006348, "logps/chosen": -66.39132690429688, "logps/ref_chosen": -66.71226501464844, "logps/ref_rejected": -96.14029693603516, "logps/rejected": -101.23193359375, "loss": 1.0578, "margin_dpo/margin_mean": 5.412579536437988, "margin_dpo/margin_std": 8.23811149597168, "step": 291 }, { "epoch": 0.4414210128495843, "fcm_dpo/beta": 0.12402984499931335, "fcm_dpo/delta": 0.001040758565068245, "fcm_dpo/margin": 4.829550266265869, "fcm_dpo/q_t": 0.37668299674987793, "grad_norm": 25.246936798095703, "learning_rate": 3.4414181450867465e-07, "logits/chosen": 1.69663667678833, "logits/rejected": 1.6170578002929688, "logps/chosen": -79.89686584472656, "logps/ref_chosen": -80.3355484008789, "logps/ref_rejected": -90.44906616210938, "logps/rejected": -94.83992767333984, "loss": 1.0653, "margin_dpo/margin_mean": 4.829549789428711, "margin_dpo/margin_std": 7.592476844787598, "step": 292 }, { "epoch": 0.4429327286470144, "fcm_dpo/beta": 0.12098394334316254, "fcm_dpo/delta": -0.18759030103683472, "fcm_dpo/margin": 6.393030643463135, "fcm_dpo/q_t": 0.35188886523246765, "grad_norm": 20.60826873779297, "learning_rate": 3.4291561391508185e-07, "logits/chosen": 1.9168094396591187, "logits/rejected": 1.776773452758789, "logps/chosen": -71.5949478149414, "logps/ref_chosen": -71.69970703125, "logps/ref_rejected": -102.13948059082031, "logps/rejected": -108.4277572631836, "loss": 1.0365, "margin_dpo/margin_mean": 6.393031120300293, "margin_dpo/margin_std": 9.774253845214844, "step": 293 }, { "epoch": 0.4444444444444444, "fcm_dpo/beta": 0.1188976839184761, "fcm_dpo/delta": 0.024206943809986115, "fcm_dpo/margin": 4.846450328826904, "fcm_dpo/q_t": 0.3811735510826111, "grad_norm": 19.384902954101562, "learning_rate": 3.4168681427203153e-07, "logits/chosen": 1.659212589263916, "logits/rejected": 1.5612801313400269, "logps/chosen": -70.84461975097656, "logps/ref_chosen": -70.73458862304688, "logps/ref_rejected": -86.68821716308594, "logps/rejected": -91.64469146728516, "loss": 1.0683, "margin_dpo/margin_mean": 4.846449851989746, "margin_dpo/margin_std": 7.481466293334961, "step": 294 }, { "epoch": 0.4459561602418745, "fcm_dpo/beta": 0.12134327739477158, "fcm_dpo/delta": 0.08148723840713501, "fcm_dpo/margin": 4.308539390563965, "fcm_dpo/q_t": 0.39348357915878296, "grad_norm": 23.888975143432617, "learning_rate": 3.4045544995169125e-07, "logits/chosen": 1.4067692756652832, "logits/rejected": 1.1668778657913208, "logps/chosen": -66.92997741699219, "logps/ref_chosen": -66.42644500732422, "logps/ref_rejected": -99.58766174316406, "logps/rejected": -104.39974212646484, "loss": 1.1134, "margin_dpo/margin_mean": 4.308538913726807, "margin_dpo/margin_std": 7.631232261657715, "step": 295 }, { "epoch": 0.4474678760393046, "fcm_dpo/beta": 0.11868500709533691, "fcm_dpo/delta": -0.132828950881958, "fcm_dpo/margin": 6.077790260314941, "fcm_dpo/q_t": 0.3579130470752716, "grad_norm": 22.89322280883789, "learning_rate": 3.392215553979679e-07, "logits/chosen": 1.6104109287261963, "logits/rejected": 1.4648113250732422, "logps/chosen": -87.55390930175781, "logps/ref_chosen": -87.47459411621094, "logps/ref_rejected": -103.96894836425781, "logps/rejected": -110.12605285644531, "loss": 0.9981, "margin_dpo/margin_mean": 6.0777907371521, "margin_dpo/margin_std": 8.397589683532715, "step": 296 }, { "epoch": 0.4489795918367347, "fcm_dpo/beta": 0.11766720563173294, "fcm_dpo/delta": -0.11798781156539917, "fcm_dpo/margin": 6.026991844177246, "fcm_dpo/q_t": 0.35117679834365845, "grad_norm": 22.429386138916016, "learning_rate": 3.3798516512554485e-07, "logits/chosen": 1.5266656875610352, "logits/rejected": 1.4038197994232178, "logps/chosen": -74.09784698486328, "logps/ref_chosen": -73.46731567382812, "logps/ref_rejected": -88.22674560546875, "logps/rejected": -94.8842544555664, "loss": 0.9467, "margin_dpo/margin_mean": 6.026991844177246, "margin_dpo/margin_std": 6.926385402679443, "step": 297 }, { "epoch": 0.4504913076341648, "fcm_dpo/beta": 0.11617027223110199, "fcm_dpo/delta": -0.016583360731601715, "fcm_dpo/margin": 5.296326160430908, "fcm_dpo/q_t": 0.38224735856056213, "grad_norm": 22.070199966430664, "learning_rate": 3.367463137189156e-07, "logits/chosen": 1.797358512878418, "logits/rejected": 1.719627022743225, "logps/chosen": -72.86018371582031, "logps/ref_chosen": -73.21676635742188, "logps/ref_rejected": -84.9563217163086, "logps/rejected": -89.89605712890625, "loss": 1.1139, "margin_dpo/margin_mean": 5.29632568359375, "margin_dpo/margin_std": 9.410942077636719, "step": 298 }, { "epoch": 0.4520030234315949, "fcm_dpo/beta": 0.11756888777017593, "fcm_dpo/delta": 0.12657645344734192, "fcm_dpo/margin": 4.079702377319336, "fcm_dpo/q_t": 0.4041763246059418, "grad_norm": 22.431743621826172, "learning_rate": 3.355050358314172e-07, "logits/chosen": 1.2828481197357178, "logits/rejected": 1.1960859298706055, "logps/chosen": -77.17741394042969, "logps/ref_chosen": -76.9534912109375, "logps/ref_rejected": -87.53433227539062, "logps/rejected": -91.83795166015625, "loss": 1.1585, "margin_dpo/margin_mean": 4.0797014236450195, "margin_dpo/margin_std": 8.059319496154785, "step": 299 }, { "epoch": 0.45351473922902497, "fcm_dpo/beta": 0.11919373273849487, "fcm_dpo/delta": 0.0511748343706131, "fcm_dpo/margin": 4.628298759460449, "fcm_dpo/q_t": 0.38677746057510376, "grad_norm": 21.598217010498047, "learning_rate": 3.3426136618426043e-07, "logits/chosen": 1.7995554208755493, "logits/rejected": 1.6187442541122437, "logps/chosen": -78.25460815429688, "logps/ref_chosen": -78.36398315429688, "logps/ref_rejected": -97.03912353515625, "logps/rejected": -101.55804443359375, "loss": 1.1386, "margin_dpo/margin_mean": 4.628297805786133, "margin_dpo/margin_std": 8.503219604492188, "step": 300 }, { "epoch": 0.45351473922902497, "eval_fcm_dpo/beta": 0.11975711584091187, "eval_logits/chosen": 1.5470160245895386, "eval_logits/rejected": 1.4236763715744019, "eval_logps/chosen": -85.9098129272461, "eval_logps/ref_chosen": -86.90177917480469, "eval_logps/ref_rejected": -96.69639587402344, "eval_logps/rejected": -100.92195892333984, "eval_loss": 0.5411239266395569, "eval_margin_dpo/margin_mean": 5.2175445556640625, "eval_margin_dpo/margin_std": 8.570341110229492, "eval_runtime": 42.3487, "eval_samples_per_second": 54.382, "eval_steps_per_second": 1.7, "step": 300 }, { "epoch": 0.455026455026455, "fcm_dpo/beta": 0.11965688318014145, "fcm_dpo/delta": 0.026650425046682358, "fcm_dpo/margin": 4.801657676696777, "fcm_dpo/q_t": 0.39114803075790405, "grad_norm": 20.64493179321289, "learning_rate": 3.3301533956555885e-07, "logits/chosen": 1.4913475513458252, "logits/rejected": 1.432964563369751, "logps/chosen": -69.89122772216797, "logps/ref_chosen": -70.6719741821289, "logps/ref_rejected": -87.11650085449219, "logps/rejected": -91.13742065429688, "loss": 1.1163, "margin_dpo/margin_mean": 4.801657676696777, "margin_dpo/margin_std": 8.655708312988281, "step": 301 }, { "epoch": 0.4565381708238851, "fcm_dpo/beta": 0.12548677623271942, "fcm_dpo/delta": 0.2517406940460205, "fcm_dpo/margin": 2.8369710445404053, "fcm_dpo/q_t": 0.4301077127456665, "grad_norm": 26.383052825927734, "learning_rate": 3.317669908293554e-07, "logits/chosen": 1.3318819999694824, "logits/rejected": 1.2350269556045532, "logps/chosen": -85.69977569580078, "logps/ref_chosen": -85.29096221923828, "logps/ref_rejected": -106.22589874267578, "logps/rejected": -109.4716796875, "loss": 1.2691, "margin_dpo/margin_mean": 2.836970806121826, "margin_dpo/margin_std": 7.777761936187744, "step": 302 }, { "epoch": 0.4580498866213152, "fcm_dpo/beta": 0.1212981790304184, "fcm_dpo/delta": -0.23223206400871277, "fcm_dpo/margin": 6.679323196411133, "fcm_dpo/q_t": 0.3449594974517822, "grad_norm": 20.195770263671875, "learning_rate": 3.3051635489464793e-07, "logits/chosen": 1.8518650531768799, "logits/rejected": 1.7183852195739746, "logps/chosen": -82.73690795898438, "logps/ref_chosen": -83.90059661865234, "logps/ref_rejected": -104.7340087890625, "logps/rejected": -110.24964904785156, "loss": 0.9852, "margin_dpo/margin_mean": 6.679323673248291, "margin_dpo/margin_std": 9.238541603088379, "step": 303 }, { "epoch": 0.4595616024187453, "fcm_dpo/beta": 0.11956004798412323, "fcm_dpo/delta": -0.08801032602787018, "fcm_dpo/margin": 5.705035209655762, "fcm_dpo/q_t": 0.3594469428062439, "grad_norm": 20.22382164001465, "learning_rate": 3.292634667444117e-07, "logits/chosen": 1.3408575057983398, "logits/rejected": 1.24342679977417, "logps/chosen": -75.45277404785156, "logps/ref_chosen": -77.39997100830078, "logps/ref_rejected": -94.21647644042969, "logps/rejected": -97.97433471679688, "loss": 0.9937, "margin_dpo/margin_mean": 5.705035209655762, "margin_dpo/margin_std": 7.6752142906188965, "step": 304 }, { "epoch": 0.46107331821617537, "fcm_dpo/beta": 0.11689537763595581, "fcm_dpo/delta": -0.03063960373401642, "fcm_dpo/margin": 5.354496002197266, "fcm_dpo/q_t": 0.37809717655181885, "grad_norm": 20.9038028717041, "learning_rate": 3.280083614246217e-07, "logits/chosen": 1.3459558486938477, "logits/rejected": 1.4120479822158813, "logps/chosen": -89.80319213867188, "logps/ref_chosen": -90.90805053710938, "logps/ref_rejected": -85.84992980957031, "logps/rejected": -90.09957885742188, "loss": 1.1074, "margin_dpo/margin_mean": 5.354496479034424, "margin_dpo/margin_std": 9.220006942749023, "step": 305 }, { "epoch": 0.46258503401360546, "fcm_dpo/beta": 0.11684860289096832, "fcm_dpo/delta": 0.002469673752784729, "fcm_dpo/margin": 5.096833229064941, "fcm_dpo/q_t": 0.38039129972457886, "grad_norm": 21.22861099243164, "learning_rate": 3.267510740432719e-07, "logits/chosen": 1.4836664199829102, "logits/rejected": 1.2493438720703125, "logps/chosen": -69.71931457519531, "logps/ref_chosen": -71.7261962890625, "logps/ref_rejected": -97.70491027832031, "logps/rejected": -100.79486083984375, "loss": 1.0663, "margin_dpo/margin_mean": 5.096833229064941, "margin_dpo/margin_std": 7.983862400054932, "step": 306 }, { "epoch": 0.46409674981103555, "fcm_dpo/beta": 0.12025651335716248, "fcm_dpo/delta": 0.09597062319517136, "fcm_dpo/margin": 4.226437091827393, "fcm_dpo/q_t": 0.40421485900878906, "grad_norm": 21.80240821838379, "learning_rate": 3.2549163976939285e-07, "logits/chosen": 1.6594510078430176, "logits/rejected": 1.5168578624725342, "logps/chosen": -70.91734313964844, "logps/ref_chosen": -74.38668823242188, "logps/ref_rejected": -84.16001892089844, "logps/rejected": -84.91710662841797, "loss": 1.1539, "margin_dpo/margin_mean": 4.226436138153076, "margin_dpo/margin_std": 8.34419059753418, "step": 307 }, { "epoch": 0.4656084656084656, "fcm_dpo/beta": 0.11909815669059753, "fcm_dpo/delta": -0.049813512712717056, "fcm_dpo/margin": 5.426000595092773, "fcm_dpo/q_t": 0.37172651290893555, "grad_norm": 23.397077560424805, "learning_rate": 3.2423009383206874e-07, "logits/chosen": 1.3079359531402588, "logits/rejected": 1.3617353439331055, "logps/chosen": -84.75643920898438, "logps/ref_chosen": -87.50894165039062, "logps/ref_rejected": -94.80848693847656, "logps/rejected": -97.48197937011719, "loss": 1.0602, "margin_dpo/margin_mean": 5.425999641418457, "margin_dpo/margin_std": 8.557561874389648, "step": 308 }, { "epoch": 0.4671201814058957, "fcm_dpo/beta": 0.11748093366622925, "fcm_dpo/delta": -0.03162723034620285, "fcm_dpo/margin": 5.341372966766357, "fcm_dpo/q_t": 0.3704092800617218, "grad_norm": 19.2188777923584, "learning_rate": 3.229664715194511e-07, "logits/chosen": 1.7406518459320068, "logits/rejected": 1.6214611530303955, "logps/chosen": -80.2752456665039, "logps/ref_chosen": -82.15191650390625, "logps/ref_rejected": -95.03496551513672, "logps/rejected": -98.49966430664062, "loss": 1.0017, "margin_dpo/margin_mean": 5.341372489929199, "margin_dpo/margin_std": 6.893287658691406, "step": 309 }, { "epoch": 0.46863189720332576, "fcm_dpo/beta": 0.120293527841568, "fcm_dpo/delta": 0.07783792167901993, "fcm_dpo/margin": 2.3885998725891113, "fcm_dpo/q_t": 0.44259071350097656, "grad_norm": 27.145200729370117, "learning_rate": 3.2170080817777257e-07, "logits/chosen": 1.591238021850586, "logits/rejected": 1.5714151859283447, "logps/chosen": -92.16886901855469, "logps/ref_chosen": -93.7555160522461, "logps/ref_rejected": -96.93236541748047, "logps/rejected": -97.73431396484375, "loss": 1.3459, "margin_dpo/margin_mean": 2.3886003494262695, "margin_dpo/margin_std": 8.228015899658203, "step": 310 }, { "epoch": 0.47014361300075586, "fcm_dpo/beta": 0.11992132663726807, "fcm_dpo/delta": 0.025824643671512604, "fcm_dpo/margin": 4.793860912322998, "fcm_dpo/q_t": 0.3902488350868225, "grad_norm": 20.445270538330078, "learning_rate": 3.204331392103574e-07, "logits/chosen": 1.4957547187805176, "logits/rejected": 1.1831345558166504, "logps/chosen": -71.67782592773438, "logps/ref_chosen": -76.20762634277344, "logps/ref_rejected": -110.48141479492188, "logps/rejected": -110.74546813964844, "loss": 1.0995, "margin_dpo/margin_mean": 4.793861389160156, "margin_dpo/margin_std": 8.486019134521484, "step": 311 }, { "epoch": 0.47165532879818595, "fcm_dpo/beta": 0.11924172937870026, "fcm_dpo/delta": -0.09825573861598969, "fcm_dpo/margin": 5.799221038818359, "fcm_dpo/q_t": 0.3667619228363037, "grad_norm": 19.300731658935547, "learning_rate": 3.1916350007663176e-07, "logits/chosen": 1.4672105312347412, "logits/rejected": 1.3198761940002441, "logps/chosen": -66.4232406616211, "logps/ref_chosen": -69.08878326416016, "logps/ref_rejected": -91.84494018554688, "logps/rejected": -94.97862243652344, "loss": 1.0111, "margin_dpo/margin_mean": 5.799221038818359, "margin_dpo/margin_std": 8.42913818359375, "step": 312 }, { "epoch": 0.47316704459561604, "fcm_dpo/beta": 0.12133464217185974, "fcm_dpo/delta": 0.15221793949604034, "fcm_dpo/margin": 3.7469735145568848, "fcm_dpo/q_t": 0.4182596802711487, "grad_norm": 21.947050094604492, "learning_rate": 3.178919262911314e-07, "logits/chosen": 1.6795837879180908, "logits/rejected": 1.642377257347107, "logps/chosen": -73.63683319091797, "logps/ref_chosen": -78.20826721191406, "logps/ref_rejected": -86.90351867675781, "logps/rejected": -86.07906341552734, "loss": 1.2413, "margin_dpo/margin_mean": 3.7469730377197266, "margin_dpo/margin_std": 9.230291366577148, "step": 313 }, { "epoch": 0.47467876039304613, "fcm_dpo/beta": 0.12020012736320496, "fcm_dpo/delta": -0.08286652714014053, "fcm_dpo/margin": 5.628398418426514, "fcm_dpo/q_t": 0.36906343698501587, "grad_norm": 22.41501808166504, "learning_rate": 3.166184534225087e-07, "logits/chosen": 1.6936403512954712, "logits/rejected": 1.713541030883789, "logps/chosen": -85.90785217285156, "logps/ref_chosen": -90.41890716552734, "logps/ref_rejected": -84.33525848388672, "logps/rejected": -85.45260620117188, "loss": 1.0274, "margin_dpo/margin_mean": 5.628398895263672, "margin_dpo/margin_std": 8.3358793258667, "step": 314 }, { "epoch": 0.47619047619047616, "fcm_dpo/beta": 0.12187933176755905, "fcm_dpo/delta": 0.03306184709072113, "fcm_dpo/margin": 4.652489185333252, "fcm_dpo/q_t": 0.3853157162666321, "grad_norm": 21.278676986694336, "learning_rate": 3.1534311709253723e-07, "logits/chosen": 1.3511652946472168, "logits/rejected": 1.277963638305664, "logps/chosen": -82.36071014404297, "logps/ref_chosen": -87.32842254638672, "logps/ref_rejected": -93.71661376953125, "logps/rejected": -93.40138244628906, "loss": 1.0678, "margin_dpo/margin_mean": 4.652489185333252, "margin_dpo/margin_std": 7.178579807281494, "step": 315 }, { "epoch": 0.47770219198790626, "fcm_dpo/beta": 0.12188950181007385, "fcm_dpo/delta": -0.06950892508029938, "fcm_dpo/margin": 5.416971206665039, "fcm_dpo/q_t": 0.3643546998500824, "grad_norm": 25.714170455932617, "learning_rate": 3.1406595297511564e-07, "logits/chosen": 1.5012097358703613, "logits/rejected": 1.2324557304382324, "logps/chosen": -69.50252532958984, "logps/ref_chosen": -73.898681640625, "logps/ref_rejected": -115.42668151855469, "logps/rejected": -116.4474868774414, "loss": 0.9983, "margin_dpo/margin_mean": 5.416971206665039, "margin_dpo/margin_std": 6.694197654724121, "step": 316 }, { "epoch": 0.47921390778533635, "fcm_dpo/beta": 0.1169140413403511, "fcm_dpo/delta": -0.1020512804389, "fcm_dpo/margin": 5.934911727905273, "fcm_dpo/q_t": 0.3591901361942291, "grad_norm": 22.108245849609375, "learning_rate": 3.1278699679526975e-07, "logits/chosen": 1.83012056350708, "logits/rejected": 1.7280373573303223, "logps/chosen": -69.74676513671875, "logps/ref_chosen": -75.42947387695312, "logps/ref_rejected": -90.60166931152344, "logps/rejected": -90.8538818359375, "loss": 1.0275, "margin_dpo/margin_mean": 5.934911251068115, "margin_dpo/margin_std": 8.861763000488281, "step": 317 }, { "epoch": 0.48072562358276644, "fcm_dpo/beta": 0.11810323596000671, "fcm_dpo/delta": 0.07940696179866791, "fcm_dpo/margin": 4.443850517272949, "fcm_dpo/q_t": 0.40077465772628784, "grad_norm": 19.98630142211914, "learning_rate": 3.1150628432815336e-07, "logits/chosen": 1.6220097541809082, "logits/rejected": 1.4846813678741455, "logps/chosen": -65.35560607910156, "logps/ref_chosen": -70.38318634033203, "logps/ref_rejected": -98.19901275634766, "logps/rejected": -97.61528015136719, "loss": 1.1674, "margin_dpo/margin_mean": 4.443850040435791, "margin_dpo/margin_std": 8.962568283081055, "step": 318 }, { "epoch": 0.48223733938019653, "fcm_dpo/beta": 0.11873992532491684, "fcm_dpo/delta": -0.004999694414436817, "fcm_dpo/margin": 5.09262228012085, "fcm_dpo/q_t": 0.378478467464447, "grad_norm": 20.11252212524414, "learning_rate": 3.1022385139804707e-07, "logits/chosen": 1.1534156799316406, "logits/rejected": 1.1021003723144531, "logps/chosen": -79.05325317382812, "logps/ref_chosen": -83.40225982666016, "logps/ref_rejected": -95.40069580078125, "logps/rejected": -96.14430236816406, "loss": 1.0636, "margin_dpo/margin_mean": 5.092622756958008, "margin_dpo/margin_std": 8.152623176574707, "step": 319 }, { "epoch": 0.4837490551776266, "fcm_dpo/beta": 0.11889675259590149, "fcm_dpo/delta": 0.01753305457532406, "fcm_dpo/margin": 3.168153762817383, "fcm_dpo/q_t": 0.42565175890922546, "grad_norm": 21.558019638061523, "learning_rate": 3.0893973387735683e-07, "logits/chosen": 1.238257646560669, "logits/rejected": 1.128818154335022, "logps/chosen": -63.69639587402344, "logps/ref_chosen": -68.70979309082031, "logps/ref_rejected": -87.00540924072266, "logps/rejected": -85.16017150878906, "loss": 1.2676, "margin_dpo/margin_mean": 3.168154239654541, "margin_dpo/margin_std": 8.407440185546875, "step": 320 }, { "epoch": 0.4852607709750567, "fcm_dpo/beta": 0.11838892847299576, "fcm_dpo/delta": 0.032411910593509674, "fcm_dpo/margin": 4.793027400970459, "fcm_dpo/q_t": 0.3875201344490051, "grad_norm": 22.37877655029297, "learning_rate": 3.0765396768561004e-07, "logits/chosen": 1.4487881660461426, "logits/rejected": 1.3890159130096436, "logps/chosen": -61.277713775634766, "logps/ref_chosen": -66.48135375976562, "logps/ref_rejected": -71.84545135498047, "logps/rejected": -71.4348373413086, "loss": 1.1063, "margin_dpo/margin_mean": 4.793027877807617, "margin_dpo/margin_std": 8.183032989501953, "step": 321 }, { "epoch": 0.48677248677248675, "fcm_dpo/beta": 0.11761731654405594, "fcm_dpo/delta": -0.16031649708747864, "fcm_dpo/margin": 6.364631175994873, "fcm_dpo/q_t": 0.3440389633178711, "grad_norm": 22.891164779663086, "learning_rate": 3.063665887884511e-07, "logits/chosen": 1.7625864744186401, "logits/rejected": 1.587303876876831, "logps/chosen": -61.12324523925781, "logps/ref_chosen": -65.94654846191406, "logps/ref_rejected": -94.26603698730469, "logps/rejected": -95.807373046875, "loss": 0.9436, "margin_dpo/margin_mean": 6.364631175994873, "margin_dpo/margin_std": 7.739552974700928, "step": 322 }, { "epoch": 0.48828420256991684, "fcm_dpo/beta": 0.11629685759544373, "fcm_dpo/delta": -0.050307899713516235, "fcm_dpo/margin": 5.553721904754639, "fcm_dpo/q_t": 0.3829064667224884, "grad_norm": 22.22823143005371, "learning_rate": 3.0507763319663517e-07, "logits/chosen": 1.3363523483276367, "logits/rejected": 1.2372665405273438, "logps/chosen": -82.47151184082031, "logps/ref_chosen": -86.5498046875, "logps/ref_rejected": -110.39498901367188, "logps/rejected": -111.87040710449219, "loss": 1.1249, "margin_dpo/margin_mean": 5.553721904754639, "margin_dpo/margin_std": 10.197261810302734, "step": 323 }, { "epoch": 0.4897959183673469, "fcm_dpo/beta": 0.11416380107402802, "fcm_dpo/delta": -0.02028251811861992, "fcm_dpo/margin": 5.4193010330200195, "fcm_dpo/q_t": 0.37376847863197327, "grad_norm": 22.1555233001709, "learning_rate": 3.0378713696502097e-07, "logits/chosen": 1.5647233724594116, "logits/rejected": 1.423370599746704, "logps/chosen": -69.38572692871094, "logps/ref_chosen": -74.44218444824219, "logps/ref_rejected": -85.7646484375, "logps/rejected": -86.12747955322266, "loss": 1.0561, "margin_dpo/margin_mean": 5.4193010330200195, "margin_dpo/margin_std": 8.43246078491211, "step": 324 }, { "epoch": 0.491307634164777, "fcm_dpo/beta": 0.11330123245716095, "fcm_dpo/delta": 0.00421547144651413, "fcm_dpo/margin": 5.240480422973633, "fcm_dpo/q_t": 0.3807021379470825, "grad_norm": 21.85565948486328, "learning_rate": 3.0249513619156206e-07, "logits/chosen": 1.697942852973938, "logits/rejected": 1.5425655841827393, "logps/chosen": -78.68364715576172, "logps/ref_chosen": -81.43812561035156, "logps/ref_rejected": -97.04302978515625, "logps/rejected": -99.5290298461914, "loss": 1.0905, "margin_dpo/margin_mean": 5.240480422973633, "margin_dpo/margin_std": 8.678589820861816, "step": 325 }, { "epoch": 0.4928193499622071, "fcm_dpo/beta": 0.11744363605976105, "fcm_dpo/delta": 0.18146520853042603, "fcm_dpo/margin": 3.6321234703063965, "fcm_dpo/q_t": 0.4141997694969177, "grad_norm": 22.132080078125, "learning_rate": 3.012016670162977e-07, "logits/chosen": 1.546274185180664, "logits/rejected": 1.554617166519165, "logps/chosen": -90.23068237304688, "logps/ref_chosen": -91.65318298339844, "logps/ref_rejected": -90.64222717285156, "logps/rejected": -92.85185241699219, "loss": 1.2096, "margin_dpo/margin_mean": 3.6321234703063965, "margin_dpo/margin_std": 8.183576583862305, "step": 326 }, { "epoch": 0.4943310657596372, "fcm_dpo/beta": 0.12188417464494705, "fcm_dpo/delta": 0.1355394572019577, "fcm_dpo/margin": 3.8523268699645996, "fcm_dpo/q_t": 0.41473180055618286, "grad_norm": 25.818225860595703, "learning_rate": 2.99906765620341e-07, "logits/chosen": 1.3597536087036133, "logits/rejected": 1.2990000247955322, "logps/chosen": -88.77840423583984, "logps/ref_chosen": -89.97216796875, "logps/ref_rejected": -97.54869079589844, "logps/rejected": -100.20724487304688, "loss": 1.2589, "margin_dpo/margin_mean": 3.8523268699645996, "margin_dpo/margin_std": 9.669429779052734, "step": 327 }, { "epoch": 0.4958427815570673, "fcm_dpo/beta": 0.12286022305488586, "fcm_dpo/delta": 0.0626559630036354, "fcm_dpo/margin": 4.401291847229004, "fcm_dpo/q_t": 0.39334431290626526, "grad_norm": 21.354494094848633, "learning_rate": 2.9861046822486766e-07, "logits/chosen": 1.6216108798980713, "logits/rejected": 1.524030327796936, "logps/chosen": -78.1365966796875, "logps/ref_chosen": -80.27335357666016, "logps/ref_rejected": -99.04093170166016, "logps/rejected": -101.30546569824219, "loss": 1.1266, "margin_dpo/margin_mean": 4.401291847229004, "margin_dpo/margin_std": 8.014993667602539, "step": 328 }, { "epoch": 0.4973544973544973, "fcm_dpo/beta": 0.12164503335952759, "fcm_dpo/delta": -0.046737946569919586, "fcm_dpo/margin": 5.275608539581299, "fcm_dpo/q_t": 0.3809892535209656, "grad_norm": 22.692781448364258, "learning_rate": 2.9731281109010253e-07, "logits/chosen": 1.4558098316192627, "logits/rejected": 1.334930419921875, "logps/chosen": -77.7724380493164, "logps/ref_chosen": -79.75892639160156, "logps/ref_rejected": -102.06265258789062, "logps/rejected": -105.35179138183594, "loss": 1.0789, "margin_dpo/margin_mean": 5.275609016418457, "margin_dpo/margin_std": 8.842366218566895, "step": 329 }, { "epoch": 0.4988662131519274, "fcm_dpo/beta": 0.11930276453495026, "fcm_dpo/delta": -0.23741337656974792, "fcm_dpo/margin": 6.860002517700195, "fcm_dpo/q_t": 0.3392508327960968, "grad_norm": 18.562469482421875, "learning_rate": 2.9601383051430505e-07, "logits/chosen": 1.6714489459991455, "logits/rejected": 1.5213907957077026, "logps/chosen": -67.23228454589844, "logps/ref_chosen": -70.55734252929688, "logps/ref_rejected": -94.53077697753906, "logps/rejected": -98.06571960449219, "loss": 0.9976, "margin_dpo/margin_mean": 6.8600029945373535, "margin_dpo/margin_std": 9.460945129394531, "step": 330 }, { "epoch": 0.5003779289493575, "fcm_dpo/beta": 0.1132928729057312, "fcm_dpo/delta": -0.18013113737106323, "fcm_dpo/margin": 6.756691932678223, "fcm_dpo/q_t": 0.3501453101634979, "grad_norm": 17.997892379760742, "learning_rate": 2.947135628327544e-07, "logits/chosen": 1.7973315715789795, "logits/rejected": 1.7496538162231445, "logps/chosen": -74.01251220703125, "logps/ref_chosen": -75.46063232421875, "logps/ref_rejected": -84.78495788574219, "logps/rejected": -90.0935287475586, "loss": 1.0156, "margin_dpo/margin_mean": 6.756691932678223, "margin_dpo/margin_std": 9.993051528930664, "step": 331 }, { "epoch": 0.5018896447467877, "fcm_dpo/beta": 0.11203811317682266, "fcm_dpo/delta": -0.11051306873559952, "fcm_dpo/margin": 6.262184143066406, "fcm_dpo/q_t": 0.3616057336330414, "grad_norm": 22.966106414794922, "learning_rate": 2.934120444167326e-07, "logits/chosen": 1.3178709745407104, "logits/rejected": 1.2265524864196777, "logps/chosen": -82.64828491210938, "logps/ref_chosen": -84.32807922363281, "logps/ref_rejected": -95.63302612304688, "logps/rejected": -100.21540832519531, "loss": 0.9857, "margin_dpo/margin_mean": 6.2621846199035645, "margin_dpo/margin_std": 8.354652404785156, "step": 332 }, { "epoch": 0.5034013605442177, "fcm_dpo/beta": 0.10700565576553345, "fcm_dpo/delta": -0.20431974530220032, "fcm_dpo/margin": 7.367546558380127, "fcm_dpo/q_t": 0.33816075325012207, "grad_norm": 16.382230758666992, "learning_rate": 2.921093116725076e-07, "logits/chosen": 1.4132413864135742, "logits/rejected": 1.2920411825180054, "logps/chosen": -77.47364044189453, "logps/ref_chosen": -78.2132339477539, "logps/ref_rejected": -103.82716369628906, "logps/rejected": -110.45510864257812, "loss": 0.8994, "margin_dpo/margin_mean": 7.367546558380127, "margin_dpo/margin_std": 8.159334182739258, "step": 333 }, { "epoch": 0.5049130763416477, "fcm_dpo/beta": 0.10628392547369003, "fcm_dpo/delta": 0.04136139154434204, "fcm_dpo/margin": 5.278096675872803, "fcm_dpo/q_t": 0.38895177841186523, "grad_norm": 19.709138870239258, "learning_rate": 2.9080540104031484e-07, "logits/chosen": 1.8486359119415283, "logits/rejected": 1.6987314224243164, "logps/chosen": -83.99331665039062, "logps/ref_chosen": -85.0171127319336, "logps/ref_rejected": -106.79039764404297, "logps/rejected": -111.04469299316406, "loss": 1.1631, "margin_dpo/margin_mean": 5.278097152709961, "margin_dpo/margin_std": 10.453556060791016, "step": 334 }, { "epoch": 0.5064247921390779, "fcm_dpo/beta": 0.10827849805355072, "fcm_dpo/delta": 0.04513784497976303, "fcm_dpo/margin": 5.131414413452148, "fcm_dpo/q_t": 0.4016219973564148, "grad_norm": 23.983985900878906, "learning_rate": 2.895003489933375e-07, "logits/chosen": 1.232424259185791, "logits/rejected": 1.1511411666870117, "logps/chosen": -79.13843536376953, "logps/ref_chosen": -78.56513214111328, "logps/ref_rejected": -92.68515014648438, "logps/rejected": -98.38986206054688, "loss": 1.2239, "margin_dpo/margin_mean": 5.131413459777832, "margin_dpo/margin_std": 11.419921875, "step": 335 }, { "epoch": 0.5079365079365079, "fcm_dpo/beta": 0.10628928989171982, "fcm_dpo/delta": -0.04637442156672478, "fcm_dpo/margin": 6.045434474945068, "fcm_dpo/q_t": 0.37050485610961914, "grad_norm": 22.625778198242188, "learning_rate": 2.8819419203668675e-07, "logits/chosen": 1.4954988956451416, "logits/rejected": 1.4547088146209717, "logps/chosen": -90.63356018066406, "logps/ref_chosen": -88.63243103027344, "logps/ref_rejected": -107.89385986328125, "logps/rejected": -115.94041442871094, "loss": 1.0439, "margin_dpo/margin_mean": 6.04543399810791, "margin_dpo/margin_std": 9.264274597167969, "step": 336 }, { "epoch": 0.509448223733938, "fcm_dpo/beta": 0.10824999213218689, "fcm_dpo/delta": 0.1246347650885582, "fcm_dpo/margin": 4.448444366455078, "fcm_dpo/q_t": 0.40610629320144653, "grad_norm": 19.88324737548828, "learning_rate": 2.8688696670638053e-07, "logits/chosen": 1.3406782150268555, "logits/rejected": 1.236850619316101, "logps/chosen": -94.76239013671875, "logps/ref_chosen": -93.25018310546875, "logps/ref_rejected": -103.8592529296875, "logps/rejected": -109.81990814208984, "loss": 1.1949, "margin_dpo/margin_mean": 4.44844388961792, "margin_dpo/margin_std": 9.610854148864746, "step": 337 }, { "epoch": 0.5109599395313681, "fcm_dpo/beta": 0.11230061948299408, "fcm_dpo/delta": 0.14871221780776978, "fcm_dpo/margin": 4.068912506103516, "fcm_dpo/q_t": 0.4091913402080536, "grad_norm": 20.65287971496582, "learning_rate": 2.8557870956832133e-07, "logits/chosen": 1.6767020225524902, "logits/rejected": 1.6137604713439941, "logps/chosen": -84.22471618652344, "logps/ref_chosen": -81.79462432861328, "logps/ref_rejected": -90.98942565917969, "logps/rejected": -97.48843383789062, "loss": 1.1521, "margin_dpo/margin_mean": 4.068912506103516, "margin_dpo/margin_std": 7.917887210845947, "step": 338 }, { "epoch": 0.5124716553287982, "fcm_dpo/beta": 0.11033567786216736, "fcm_dpo/delta": -0.12316159904003143, "fcm_dpo/margin": 6.471385955810547, "fcm_dpo/q_t": 0.35316306352615356, "grad_norm": 17.84832000732422, "learning_rate": 2.842694572172736e-07, "logits/chosen": 1.7615846395492554, "logits/rejected": 1.5558931827545166, "logps/chosen": -60.82051086425781, "logps/ref_chosen": -61.80355453491211, "logps/ref_rejected": -85.16979217529297, "logps/rejected": -90.65812683105469, "loss": 0.978, "margin_dpo/margin_mean": 6.471385955810547, "margin_dpo/margin_std": 8.390908241271973, "step": 339 }, { "epoch": 0.5139833711262283, "fcm_dpo/beta": 0.11038414388895035, "fcm_dpo/delta": -0.023266099393367767, "fcm_dpo/margin": 5.6234588623046875, "fcm_dpo/q_t": 0.3836401104927063, "grad_norm": 21.793188095092773, "learning_rate": 2.8295924627584004e-07, "logits/chosen": 1.7331141233444214, "logits/rejected": 1.7046101093292236, "logps/chosen": -73.67045593261719, "logps/ref_chosen": -72.486083984375, "logps/ref_rejected": -79.86129760742188, "logps/rejected": -86.66914367675781, "loss": 1.1356, "margin_dpo/margin_mean": 5.623458385467529, "margin_dpo/margin_std": 10.358892440795898, "step": 340 }, { "epoch": 0.5154950869236583, "fcm_dpo/beta": 0.10636292397975922, "fcm_dpo/delta": -0.061252087354660034, "fcm_dpo/margin": 6.104777812957764, "fcm_dpo/q_t": 0.3701562285423279, "grad_norm": 18.614044189453125, "learning_rate": 2.816481133934373e-07, "logits/chosen": 1.387300729751587, "logits/rejected": 1.2599934339523315, "logps/chosen": -78.27288818359375, "logps/ref_chosen": -77.36830139160156, "logps/ref_rejected": -94.64933013916016, "logps/rejected": -101.65869903564453, "loss": 1.0677, "margin_dpo/margin_mean": 6.104778289794922, "margin_dpo/margin_std": 9.133384704589844, "step": 341 }, { "epoch": 0.5170068027210885, "fcm_dpo/beta": 0.10703742504119873, "fcm_dpo/delta": -0.057476550340652466, "fcm_dpo/margin": 6.106937885284424, "fcm_dpo/q_t": 0.3717387914657593, "grad_norm": 16.413331985473633, "learning_rate": 2.8033609524527046e-07, "logits/chosen": 1.6669539213180542, "logits/rejected": 1.5651522874832153, "logps/chosen": -70.8360366821289, "logps/ref_chosen": -71.00831604003906, "logps/ref_rejected": -84.22953796386719, "logps/rejected": -90.16419982910156, "loss": 1.0395, "margin_dpo/margin_mean": 6.106938362121582, "margin_dpo/margin_std": 9.189284324645996, "step": 342 }, { "epoch": 0.5185185185185185, "fcm_dpo/beta": 0.10651153326034546, "fcm_dpo/delta": -0.013351213186979294, "fcm_dpo/margin": 3.356048345565796, "fcm_dpo/q_t": 0.42331787943840027, "grad_norm": 20.029727935791016, "learning_rate": 2.7902322853130753e-07, "logits/chosen": 1.2742222547531128, "logits/rejected": 1.2351291179656982, "logps/chosen": -91.23426055908203, "logps/ref_chosen": -91.44624328613281, "logps/ref_rejected": -99.06044006347656, "logps/rejected": -102.20451354980469, "loss": 1.2497, "margin_dpo/margin_mean": 3.356048583984375, "margin_dpo/margin_std": 8.23534870147705, "step": 343 }, { "epoch": 0.5200302343159486, "fcm_dpo/beta": 0.10474497079849243, "fcm_dpo/delta": -0.06812702864408493, "fcm_dpo/margin": 6.323397159576416, "fcm_dpo/q_t": 0.36448514461517334, "grad_norm": 17.860971450805664, "learning_rate": 2.7770954997525274e-07, "logits/chosen": 1.657717227935791, "logits/rejected": 1.498356580734253, "logps/chosen": -74.89875793457031, "logps/ref_chosen": -73.43608093261719, "logps/ref_rejected": -100.76569366455078, "logps/rejected": -108.55177307128906, "loss": 1.0072, "margin_dpo/margin_mean": 6.323396682739258, "margin_dpo/margin_std": 8.665111541748047, "step": 344 }, { "epoch": 0.5215419501133787, "fcm_dpo/beta": 0.10513152182102203, "fcm_dpo/delta": 0.029769858345389366, "fcm_dpo/margin": 5.438445568084717, "fcm_dpo/q_t": 0.38533106446266174, "grad_norm": 16.376707077026367, "learning_rate": 2.7639509632351927e-07, "logits/chosen": 1.825488805770874, "logits/rejected": 1.71343994140625, "logps/chosen": -73.80440521240234, "logps/ref_chosen": -75.79296875, "logps/ref_rejected": -94.34156799316406, "logps/rejected": -97.79145812988281, "loss": 1.1054, "margin_dpo/margin_mean": 5.438445568084717, "margin_dpo/margin_std": 9.440225601196289, "step": 345 }, { "epoch": 0.5230536659108088, "fcm_dpo/beta": 0.10529479384422302, "fcm_dpo/delta": -0.06721366941928864, "fcm_dpo/margin": 6.291139125823975, "fcm_dpo/q_t": 0.37077468633651733, "grad_norm": 17.804290771484375, "learning_rate": 2.7507990434420123e-07, "logits/chosen": 1.8073234558105469, "logits/rejected": 1.6271549463272095, "logps/chosen": -70.38911437988281, "logps/ref_chosen": -72.26289367675781, "logps/ref_rejected": -106.36925506591797, "logps/rejected": -110.78662109375, "loss": 1.0943, "margin_dpo/margin_mean": 6.291138648986816, "margin_dpo/margin_std": 10.537273406982422, "step": 346 }, { "epoch": 0.5245653817082389, "fcm_dpo/beta": 0.10580353438854218, "fcm_dpo/delta": 0.07595658302307129, "fcm_dpo/margin": 4.986888408660889, "fcm_dpo/q_t": 0.3997945785522461, "grad_norm": 19.979366302490234, "learning_rate": 2.737640108260456e-07, "logits/chosen": 1.9318368434906006, "logits/rejected": 1.8235411643981934, "logps/chosen": -70.96662902832031, "logps/ref_chosen": -71.19871520996094, "logps/ref_rejected": -91.543212890625, "logps/rejected": -96.29800415039062, "loss": 1.1564, "margin_dpo/margin_mean": 4.986888408660889, "margin_dpo/margin_std": 9.96225643157959, "step": 347 }, { "epoch": 0.5260770975056689, "fcm_dpo/beta": 0.10394434630870819, "fcm_dpo/delta": -0.10433568060398102, "fcm_dpo/margin": 6.701659202575684, "fcm_dpo/q_t": 0.3716875910758972, "grad_norm": 16.589065551757812, "learning_rate": 2.724474525774229e-07, "logits/chosen": 1.8227729797363281, "logits/rejected": 1.7435760498046875, "logps/chosen": -68.37228393554688, "logps/ref_chosen": -69.95603942871094, "logps/ref_rejected": -83.64309692382812, "logps/rejected": -88.76100158691406, "loss": 1.0329, "margin_dpo/margin_mean": 6.701659202575684, "margin_dpo/margin_std": 10.39244270324707, "step": 348 }, { "epoch": 0.527588813303099, "fcm_dpo/beta": 0.10336841642856598, "fcm_dpo/delta": -0.08035076409578323, "fcm_dpo/margin": 6.521647930145264, "fcm_dpo/q_t": 0.36513036489486694, "grad_norm": 17.546951293945312, "learning_rate": 2.711302664252973e-07, "logits/chosen": 1.61627197265625, "logits/rejected": 1.469820261001587, "logps/chosen": -68.54325866699219, "logps/ref_chosen": -70.71857452392578, "logps/ref_rejected": -99.93263244628906, "logps/rejected": -104.27896118164062, "loss": 1.0386, "margin_dpo/margin_mean": 6.521646976470947, "margin_dpo/margin_std": 9.721222877502441, "step": 349 }, { "epoch": 0.5291005291005291, "fcm_dpo/beta": 0.09830678999423981, "fcm_dpo/delta": -0.19679048657417297, "fcm_dpo/margin": 7.92108678817749, "fcm_dpo/q_t": 0.34431931376457214, "grad_norm": 15.19959831237793, "learning_rate": 2.698124892141971e-07, "logits/chosen": 1.451259732246399, "logits/rejected": 1.3303101062774658, "logps/chosen": -76.7044906616211, "logps/ref_chosen": -78.16873168945312, "logps/ref_rejected": -104.84308624267578, "logps/rejected": -111.29993438720703, "loss": 0.9326, "margin_dpo/margin_mean": 7.921087265014648, "margin_dpo/margin_std": 9.488693237304688, "step": 350 }, { "epoch": 0.5306122448979592, "fcm_dpo/beta": 0.09811578691005707, "fcm_dpo/delta": 0.035826023668050766, "fcm_dpo/margin": 5.767122268676758, "fcm_dpo/q_t": 0.37812340259552, "grad_norm": 16.31528091430664, "learning_rate": 2.6849415780518357e-07, "logits/chosen": 1.5375101566314697, "logits/rejected": 1.3594462871551514, "logps/chosen": -70.99044799804688, "logps/ref_chosen": -71.79151916503906, "logps/ref_rejected": -97.04634094238281, "logps/rejected": -102.01239013671875, "loss": 1.1137, "margin_dpo/margin_mean": 5.767122268676758, "margin_dpo/margin_std": 9.835864067077637, "step": 351 }, { "epoch": 0.5321239606953893, "fcm_dpo/beta": 0.09900518506765366, "fcm_dpo/delta": 0.06033958122134209, "fcm_dpo/margin": 5.478187084197998, "fcm_dpo/q_t": 0.39339566230773926, "grad_norm": 18.448850631713867, "learning_rate": 2.6717530907482024e-07, "logits/chosen": 1.4493322372436523, "logits/rejected": 1.3444267511367798, "logps/chosen": -80.5949478149414, "logps/ref_chosen": -80.86544799804688, "logps/ref_rejected": -102.02129364013672, "logps/rejected": -107.2289810180664, "loss": 1.0991, "margin_dpo/margin_mean": 5.478187084197998, "margin_dpo/margin_std": 9.334031105041504, "step": 352 }, { "epoch": 0.5336356764928194, "fcm_dpo/beta": 0.10044597089290619, "fcm_dpo/delta": 0.007823506370186806, "fcm_dpo/margin": 5.897891521453857, "fcm_dpo/q_t": 0.3776635229587555, "grad_norm": 17.52521514892578, "learning_rate": 2.658559799141411e-07, "logits/chosen": 1.4600498676300049, "logits/rejected": 1.461470365524292, "logps/chosen": -82.79106140136719, "logps/ref_chosen": -84.77235412597656, "logps/ref_rejected": -86.77130889892578, "logps/rejected": -90.68791198730469, "loss": 1.1012, "margin_dpo/margin_mean": 5.897891998291016, "margin_dpo/margin_std": 10.14565372467041, "step": 353 }, { "epoch": 0.5351473922902494, "fcm_dpo/beta": 0.09875574707984924, "fcm_dpo/delta": -0.056699298322200775, "fcm_dpo/margin": 6.602656364440918, "fcm_dpo/q_t": 0.36554601788520813, "grad_norm": 16.898591995239258, "learning_rate": 2.6453620722761895e-07, "logits/chosen": 1.4595623016357422, "logits/rejected": 1.1693522930145264, "logps/chosen": -52.2856559753418, "logps/ref_chosen": -54.33562088012695, "logps/ref_rejected": -92.4120101928711, "logps/rejected": -96.9646987915039, "loss": 1.0463, "margin_dpo/margin_mean": 6.602656364440918, "margin_dpo/margin_std": 9.901817321777344, "step": 354 }, { "epoch": 0.5366591080876795, "fcm_dpo/beta": 0.09814473986625671, "fcm_dpo/delta": -0.07233710587024689, "fcm_dpo/margin": 6.801861763000488, "fcm_dpo/q_t": 0.3676859140396118, "grad_norm": 16.300806045532227, "learning_rate": 2.632160279321328e-07, "logits/chosen": 1.657884955406189, "logits/rejected": 1.3802344799041748, "logps/chosen": -60.84781265258789, "logps/ref_chosen": -61.8388671875, "logps/ref_rejected": -98.65571594238281, "logps/rejected": -104.46652221679688, "loss": 1.0267, "margin_dpo/margin_mean": 6.801861763000488, "margin_dpo/margin_std": 9.98470687866211, "step": 355 }, { "epoch": 0.5381708238851096, "fcm_dpo/beta": 0.09741011261940002, "fcm_dpo/delta": 0.013626519590616226, "fcm_dpo/margin": 6.024990081787109, "fcm_dpo/q_t": 0.3817838132381439, "grad_norm": 18.076440811157227, "learning_rate": 2.618954789559356e-07, "logits/chosen": 1.8296735286712646, "logits/rejected": 1.6565245389938354, "logps/chosen": -61.38618850708008, "logps/ref_chosen": -63.92546463012695, "logps/ref_rejected": -89.682861328125, "logps/rejected": -93.16857147216797, "loss": 1.1492, "margin_dpo/margin_mean": 6.024989128112793, "margin_dpo/margin_std": 11.450614929199219, "step": 356 }, { "epoch": 0.5396825396825397, "fcm_dpo/beta": 0.097140371799469, "fcm_dpo/delta": 0.04706104099750519, "fcm_dpo/margin": 5.682187557220459, "fcm_dpo/q_t": 0.38888034224510193, "grad_norm": 17.230754852294922, "learning_rate": 2.6057459723762076e-07, "logits/chosen": 1.6313337087631226, "logits/rejected": 1.510791301727295, "logps/chosen": -80.17047882080078, "logps/ref_chosen": -81.07588958740234, "logps/ref_rejected": -85.06967163085938, "logps/rejected": -89.846435546875, "loss": 1.122, "margin_dpo/margin_mean": 5.682187557220459, "margin_dpo/margin_std": 9.793578147888184, "step": 357 }, { "epoch": 0.5411942554799698, "fcm_dpo/beta": 0.09706053137779236, "fcm_dpo/delta": -0.14514653384685516, "fcm_dpo/margin": 7.569679260253906, "fcm_dpo/q_t": 0.35118916630744934, "grad_norm": 16.265806198120117, "learning_rate": 2.5925341972508954e-07, "logits/chosen": 1.5452799797058105, "logits/rejected": 1.5542395114898682, "logps/chosen": -82.51944732666016, "logps/ref_chosen": -84.09109497070312, "logps/ref_rejected": -85.07244873046875, "logps/rejected": -91.07048034667969, "loss": 0.9672, "margin_dpo/margin_mean": 7.569679260253906, "margin_dpo/margin_std": 9.713032722473145, "step": 358 }, { "epoch": 0.5427059712773998, "fcm_dpo/beta": 0.09953833371400833, "fcm_dpo/delta": 0.184108167886734, "fcm_dpo/margin": 4.224143028259277, "fcm_dpo/q_t": 0.4165111482143402, "grad_norm": 21.359352111816406, "learning_rate": 2.579319833745169e-07, "logits/chosen": 1.290248155593872, "logits/rejected": 1.2457648515701294, "logps/chosen": -80.081298828125, "logps/ref_chosen": -80.7490234375, "logps/ref_rejected": -94.92911529541016, "logps/rejected": -98.48553466796875, "loss": 1.2256, "margin_dpo/margin_mean": 4.224142551422119, "margin_dpo/margin_std": 9.785512924194336, "step": 359 }, { "epoch": 0.54421768707483, "fcm_dpo/beta": 0.09920628368854523, "fcm_dpo/delta": -0.02150268852710724, "fcm_dpo/margin": 6.251132965087891, "fcm_dpo/q_t": 0.38255369663238525, "grad_norm": 16.852054595947266, "learning_rate": 2.5661032514931834e-07, "logits/chosen": 1.3395185470581055, "logits/rejected": 1.1418027877807617, "logps/chosen": -78.16912078857422, "logps/ref_chosen": -78.38681030273438, "logps/ref_rejected": -109.68933868408203, "logps/rejected": -115.72278594970703, "loss": 1.0573, "margin_dpo/margin_mean": 6.251132011413574, "margin_dpo/margin_std": 10.023842811584473, "step": 360 }, { "epoch": 0.54572940287226, "fcm_dpo/beta": 0.09724115580320358, "fcm_dpo/delta": -0.11813297867774963, "fcm_dpo/margin": 7.2958984375, "fcm_dpo/q_t": 0.3593568801879883, "grad_norm": 16.29405975341797, "learning_rate": 2.552884820191154e-07, "logits/chosen": 1.7258775234222412, "logits/rejected": 1.5785211324691772, "logps/chosen": -73.3158950805664, "logps/ref_chosen": -73.9055404663086, "logps/ref_rejected": -89.8489990234375, "logps/rejected": -96.55525207519531, "loss": 0.9985, "margin_dpo/margin_mean": 7.2958984375, "margin_dpo/margin_std": 10.358057022094727, "step": 361 }, { "epoch": 0.54724111866969, "fcm_dpo/beta": 0.09884364902973175, "fcm_dpo/delta": 0.03309012949466705, "fcm_dpo/margin": 5.702146530151367, "fcm_dpo/q_t": 0.38847479224205017, "grad_norm": 18.959983825683594, "learning_rate": 2.53966490958702e-07, "logits/chosen": 1.7043962478637695, "logits/rejected": 1.397092342376709, "logps/chosen": -83.36479187011719, "logps/ref_chosen": -82.32565307617188, "logps/ref_rejected": -123.14100646972656, "logps/rejected": -129.88229370117188, "loss": 1.122, "margin_dpo/margin_mean": 5.702146530151367, "margin_dpo/margin_std": 10.04395866394043, "step": 362 }, { "epoch": 0.5487528344671202, "fcm_dpo/beta": 0.09623068571090698, "fcm_dpo/delta": -0.08842341601848602, "fcm_dpo/margin": 7.091344833374023, "fcm_dpo/q_t": 0.35770153999328613, "grad_norm": 18.15400505065918, "learning_rate": 2.526443889470099e-07, "logits/chosen": 1.6108663082122803, "logits/rejected": 1.2962677478790283, "logps/chosen": -67.00276184082031, "logps/ref_chosen": -66.05493927001953, "logps/ref_rejected": -106.79598999023438, "logps/rejected": -114.83515167236328, "loss": 0.9693, "margin_dpo/margin_mean": 7.091343879699707, "margin_dpo/margin_std": 8.933826446533203, "step": 363 }, { "epoch": 0.5502645502645502, "fcm_dpo/beta": 0.09286511689424515, "fcm_dpo/delta": -0.17400389909744263, "fcm_dpo/margin": 8.17833137512207, "fcm_dpo/q_t": 0.3556019067764282, "grad_norm": 14.506973266601562, "learning_rate": 2.513222129660744e-07, "logits/chosen": 1.6289961338043213, "logits/rejected": 1.4720258712768555, "logps/chosen": -75.12677001953125, "logps/ref_chosen": -76.38365173339844, "logps/ref_rejected": -100.22221374511719, "logps/rejected": -107.14366149902344, "loss": 1.0085, "margin_dpo/margin_mean": 8.17833137512207, "margin_dpo/margin_std": 11.904781341552734, "step": 364 }, { "epoch": 0.5517762660619804, "fcm_dpo/beta": 0.09245184063911438, "fcm_dpo/delta": -0.006460797972977161, "fcm_dpo/margin": 6.555495262145996, "fcm_dpo/q_t": 0.37094324827194214, "grad_norm": 16.775344848632812, "learning_rate": 2.5e-07, "logits/chosen": 1.6948661804199219, "logits/rejected": 1.6915894746780396, "logps/chosen": -79.36397552490234, "logps/ref_chosen": -81.83399963378906, "logps/ref_rejected": -89.06932830810547, "logps/rejected": -93.15480041503906, "loss": 1.0072, "margin_dpo/margin_mean": 6.555495262145996, "margin_dpo/margin_std": 8.44549560546875, "step": 365 }, { "epoch": 0.5532879818594104, "fcm_dpo/beta": 0.09339077025651932, "fcm_dpo/delta": 0.05166340246796608, "fcm_dpo/margin": 5.897998332977295, "fcm_dpo/q_t": 0.3923712968826294, "grad_norm": 17.448740005493164, "learning_rate": 2.486777870339255e-07, "logits/chosen": 1.3756999969482422, "logits/rejected": 1.3472323417663574, "logps/chosen": -70.53245544433594, "logps/ref_chosen": -72.03398895263672, "logps/ref_rejected": -83.65354919433594, "logps/rejected": -88.05001068115234, "loss": 1.1294, "margin_dpo/margin_mean": 5.897997856140137, "margin_dpo/margin_std": 10.893966674804688, "step": 366 }, { "epoch": 0.5547996976568406, "fcm_dpo/beta": 0.09498877823352814, "fcm_dpo/delta": 0.12418322265148163, "fcm_dpo/margin": 5.073179721832275, "fcm_dpo/q_t": 0.3966239094734192, "grad_norm": 17.194896697998047, "learning_rate": 2.4735561105299014e-07, "logits/chosen": 1.2562966346740723, "logits/rejected": 1.0315181016921997, "logps/chosen": -72.96987915039062, "logps/ref_chosen": -72.39828491210938, "logps/ref_rejected": -95.58364868164062, "logps/rejected": -101.22842407226562, "loss": 1.1257, "margin_dpo/margin_mean": 5.073179244995117, "margin_dpo/margin_std": 9.012093544006348, "step": 367 }, { "epoch": 0.5563114134542706, "fcm_dpo/beta": 0.09633514285087585, "fcm_dpo/delta": 0.05619820952415466, "fcm_dpo/margin": 5.677167892456055, "fcm_dpo/q_t": 0.3904702365398407, "grad_norm": 16.966304779052734, "learning_rate": 2.46033509041298e-07, "logits/chosen": 1.3356876373291016, "logits/rejected": 1.3284211158752441, "logps/chosen": -92.12042236328125, "logps/ref_chosen": -90.12812042236328, "logps/ref_rejected": -91.6636962890625, "logps/rejected": -99.33316040039062, "loss": 1.1177, "margin_dpo/margin_mean": 5.677168369293213, "margin_dpo/margin_std": 10.074119567871094, "step": 368 }, { "epoch": 0.5578231292517006, "fcm_dpo/beta": 0.09858047962188721, "fcm_dpo/delta": 0.10922784358263016, "fcm_dpo/margin": 5.029770851135254, "fcm_dpo/q_t": 0.40121111273765564, "grad_norm": 20.732187271118164, "learning_rate": 2.447115179808846e-07, "logits/chosen": 1.6115643978118896, "logits/rejected": 1.5338950157165527, "logps/chosen": -71.41743469238281, "logps/ref_chosen": -71.29417419433594, "logps/ref_rejected": -99.03875732421875, "logps/rejected": -104.19178771972656, "loss": 1.1616, "margin_dpo/margin_mean": 5.029770851135254, "margin_dpo/margin_std": 10.044649124145508, "step": 369 }, { "epoch": 0.5593348450491308, "fcm_dpo/beta": 0.09525743126869202, "fcm_dpo/delta": -0.20872148871421814, "fcm_dpo/margin": 8.294317245483398, "fcm_dpo/q_t": 0.34550201892852783, "grad_norm": 17.368907928466797, "learning_rate": 2.4338967485068164e-07, "logits/chosen": 1.8338669538497925, "logits/rejected": 1.706188678741455, "logps/chosen": -69.02984619140625, "logps/ref_chosen": -69.14627075195312, "logps/ref_rejected": -93.58651733398438, "logps/rejected": -101.764404296875, "loss": 1.0024, "margin_dpo/margin_mean": 8.294317245483398, "margin_dpo/margin_std": 11.767637252807617, "step": 370 }, { "epoch": 0.5608465608465608, "fcm_dpo/beta": 0.0957072526216507, "fcm_dpo/delta": -0.02265823632478714, "fcm_dpo/margin": 6.463840484619141, "fcm_dpo/q_t": 0.3787399232387543, "grad_norm": 23.442960739135742, "learning_rate": 2.420680166254831e-07, "logits/chosen": 1.911960482597351, "logits/rejected": 1.8716447353363037, "logps/chosen": -66.00799560546875, "logps/ref_chosen": -65.76728820800781, "logps/ref_rejected": -79.9320068359375, "logps/rejected": -86.63655853271484, "loss": 1.1154, "margin_dpo/margin_mean": 6.463840484619141, "margin_dpo/margin_std": 11.249137878417969, "step": 371 }, { "epoch": 0.562358276643991, "fcm_dpo/beta": 0.09300929307937622, "fcm_dpo/delta": -0.15263891220092773, "fcm_dpo/margin": 5.775787353515625, "fcm_dpo/q_t": 0.3951214849948883, "grad_norm": 16.215530395507812, "learning_rate": 2.4074658027491044e-07, "logits/chosen": 1.5042600631713867, "logits/rejected": 1.3156919479370117, "logps/chosen": -69.15006256103516, "logps/ref_chosen": -69.97252655029297, "logps/ref_rejected": -92.38316345214844, "logps/rejected": -97.33649444580078, "loss": 1.1919, "margin_dpo/margin_mean": 5.775787353515625, "margin_dpo/margin_std": 11.504581451416016, "step": 372 }, { "epoch": 0.563869992441421, "fcm_dpo/beta": 0.09124951809644699, "fcm_dpo/delta": -0.022103700786828995, "fcm_dpo/margin": 6.802433013916016, "fcm_dpo/q_t": 0.38105976581573486, "grad_norm": 18.401546478271484, "learning_rate": 2.394254027623792e-07, "logits/chosen": 1.4205693006515503, "logits/rejected": 1.211039423942566, "logps/chosen": -81.23674011230469, "logps/ref_chosen": -79.34700012207031, "logps/ref_rejected": -95.69737243652344, "logps/rejected": -104.3895492553711, "loss": 1.0933, "margin_dpo/margin_mean": 6.802433013916016, "margin_dpo/margin_std": 11.771400451660156, "step": 373 }, { "epoch": 0.5653817082388511, "fcm_dpo/beta": 0.08851991593837738, "fcm_dpo/delta": -0.20605599880218506, "fcm_dpo/margin": 8.923542022705078, "fcm_dpo/q_t": 0.3382510542869568, "grad_norm": 16.197402954101562, "learning_rate": 2.381045210440644e-07, "logits/chosen": 1.2998840808868408, "logits/rejected": 1.2736904621124268, "logps/chosen": -94.04849243164062, "logps/ref_chosen": -93.45108032226562, "logps/ref_rejected": -93.575927734375, "logps/rejected": -103.09687805175781, "loss": 0.9287, "margin_dpo/margin_mean": 8.923542022705078, "margin_dpo/margin_std": 10.628515243530273, "step": 374 }, { "epoch": 0.5668934240362812, "fcm_dpo/beta": 0.08819793164730072, "fcm_dpo/delta": 0.06437498331069946, "fcm_dpo/margin": 6.112876892089844, "fcm_dpo/q_t": 0.3876236081123352, "grad_norm": 17.605037689208984, "learning_rate": 2.3678397206786715e-07, "logits/chosen": 1.652343988418579, "logits/rejected": 1.5740084648132324, "logps/chosen": -75.57273864746094, "logps/ref_chosen": -77.37177276611328, "logps/ref_rejected": -98.59054565429688, "logps/rejected": -102.90438842773438, "loss": 1.1058, "margin_dpo/margin_mean": 6.112877368927002, "margin_dpo/margin_std": 10.367046356201172, "step": 375 }, { "epoch": 0.5684051398337112, "fcm_dpo/beta": 0.08656222373247147, "fcm_dpo/delta": -0.13484880328178406, "fcm_dpo/margin": 8.36793041229248, "fcm_dpo/q_t": 0.35931164026260376, "grad_norm": 13.579909324645996, "learning_rate": 2.3546379277238103e-07, "logits/chosen": 1.4965224266052246, "logits/rejected": 1.3083943128585815, "logps/chosen": -68.63471984863281, "logps/ref_chosen": -68.99790954589844, "logps/ref_rejected": -90.37117004394531, "logps/rejected": -98.37591552734375, "loss": 1.0212, "margin_dpo/margin_mean": 8.367931365966797, "margin_dpo/margin_std": 12.326183319091797, "step": 376 }, { "epoch": 0.5699168556311414, "fcm_dpo/beta": 0.08805879950523376, "fcm_dpo/delta": 0.11377710849046707, "fcm_dpo/margin": 5.578058242797852, "fcm_dpo/q_t": 0.39941370487213135, "grad_norm": 15.439395904541016, "learning_rate": 2.3414402008585886e-07, "logits/chosen": 1.5486724376678467, "logits/rejected": 1.5061874389648438, "logps/chosen": -64.6747817993164, "logps/ref_chosen": -64.22705841064453, "logps/ref_rejected": -73.10292053222656, "logps/rejected": -79.12869262695312, "loss": 1.1203, "margin_dpo/margin_mean": 5.578058242797852, "margin_dpo/margin_std": 9.869219779968262, "step": 377 }, { "epoch": 0.5714285714285714, "fcm_dpo/beta": 0.09075718373060226, "fcm_dpo/delta": 0.1389361023902893, "fcm_dpo/margin": 5.1299543380737305, "fcm_dpo/q_t": 0.403687059879303, "grad_norm": 17.890914916992188, "learning_rate": 2.3282469092517977e-07, "logits/chosen": 1.5328569412231445, "logits/rejected": 1.4390175342559814, "logps/chosen": -77.28669738769531, "logps/ref_chosen": -76.90864562988281, "logps/ref_rejected": -90.53460693359375, "logps/rejected": -96.0426025390625, "loss": 1.1606, "margin_dpo/margin_mean": 5.129953384399414, "margin_dpo/margin_std": 10.130236625671387, "step": 378 }, { "epoch": 0.5729402872260015, "fcm_dpo/beta": 0.09023825079202652, "fcm_dpo/delta": -0.05217514559626579, "fcm_dpo/margin": 7.189953327178955, "fcm_dpo/q_t": 0.36802151799201965, "grad_norm": 20.274019241333008, "learning_rate": 2.3150584219481643e-07, "logits/chosen": 1.6543352603912354, "logits/rejected": 1.4839537143707275, "logps/chosen": -90.58717346191406, "logps/ref_chosen": -91.2371597290039, "logps/ref_rejected": -120.1969985961914, "logps/rejected": -126.7369613647461, "loss": 1.0244, "margin_dpo/margin_mean": 7.189952850341797, "margin_dpo/margin_std": 10.472833633422852, "step": 379 }, { "epoch": 0.5744520030234316, "fcm_dpo/beta": 0.08770506083965302, "fcm_dpo/delta": -0.15575076639652252, "fcm_dpo/margin": 8.483596801757812, "fcm_dpo/q_t": 0.34348416328430176, "grad_norm": 14.336956024169922, "learning_rate": 2.3018751078580283e-07, "logits/chosen": 1.458883285522461, "logits/rejected": 1.3730969429016113, "logps/chosen": -75.30821228027344, "logps/ref_chosen": -77.78315734863281, "logps/ref_rejected": -92.56083679199219, "logps/rejected": -98.56948852539062, "loss": 0.996, "margin_dpo/margin_mean": 8.483596801757812, "margin_dpo/margin_std": 11.687616348266602, "step": 380 }, { "epoch": 0.5759637188208617, "fcm_dpo/beta": 0.09002942591905594, "fcm_dpo/delta": 0.15056607127189636, "fcm_dpo/margin": 5.02277135848999, "fcm_dpo/q_t": 0.4095456898212433, "grad_norm": 17.64664077758789, "learning_rate": 2.288697335747027e-07, "logits/chosen": 1.3801250457763672, "logits/rejected": 1.3382147550582886, "logps/chosen": -75.73654174804688, "logps/ref_chosen": -75.28189086914062, "logps/ref_rejected": -81.1995849609375, "logps/rejected": -86.67700958251953, "loss": 1.1939, "margin_dpo/margin_mean": 5.022771835327148, "margin_dpo/margin_std": 10.672150611877441, "step": 381 }, { "epoch": 0.5774754346182918, "fcm_dpo/beta": 0.09176512807607651, "fcm_dpo/delta": 0.07397836446762085, "fcm_dpo/margin": 5.737614631652832, "fcm_dpo/q_t": 0.39556747674942017, "grad_norm": 15.80502986907959, "learning_rate": 2.2755254742257706e-07, "logits/chosen": 1.4859418869018555, "logits/rejected": 1.3694580793380737, "logps/chosen": -79.2926025390625, "logps/ref_chosen": -78.74870300292969, "logps/ref_rejected": -99.77484130859375, "logps/rejected": -106.05636596679688, "loss": 1.0977, "margin_dpo/margin_mean": 5.73761510848999, "margin_dpo/margin_std": 9.539802551269531, "step": 382 }, { "epoch": 0.5789871504157218, "fcm_dpo/beta": 0.09031803905963898, "fcm_dpo/delta": -0.04464414715766907, "fcm_dpo/margin": 7.104597568511963, "fcm_dpo/q_t": 0.37682849168777466, "grad_norm": 20.42765235900879, "learning_rate": 2.2623598917395436e-07, "logits/chosen": 1.2135952711105347, "logits/rejected": 1.3143563270568848, "logps/chosen": -95.2799301147461, "logps/ref_chosen": -95.92772674560547, "logps/ref_rejected": -92.13604736328125, "logps/rejected": -98.59284973144531, "loss": 1.079, "margin_dpo/margin_mean": 7.104598045349121, "margin_dpo/margin_std": 11.770940780639648, "step": 383 }, { "epoch": 0.5804988662131519, "fcm_dpo/beta": 0.08988260477781296, "fcm_dpo/delta": -0.024513855576515198, "fcm_dpo/margin": 6.930983543395996, "fcm_dpo/q_t": 0.37451279163360596, "grad_norm": 16.834680557250977, "learning_rate": 2.2492009565579875e-07, "logits/chosen": 1.8998196125030518, "logits/rejected": 1.8183977603912354, "logps/chosen": -80.07945251464844, "logps/ref_chosen": -80.208984375, "logps/ref_rejected": -94.39380645751953, "logps/rejected": -101.19525146484375, "loss": 1.0493, "margin_dpo/margin_mean": 6.930984020233154, "margin_dpo/margin_std": 10.665237426757812, "step": 384 }, { "epoch": 0.582010582010582, "fcm_dpo/beta": 0.08912694454193115, "fcm_dpo/delta": -0.08408209681510925, "fcm_dpo/margin": 7.610535621643066, "fcm_dpo/q_t": 0.3625371754169464, "grad_norm": 15.654504776000977, "learning_rate": 2.2360490367648084e-07, "logits/chosen": 1.2281148433685303, "logits/rejected": 1.1218184232711792, "logps/chosen": -85.64151000976562, "logps/ref_chosen": -85.26632690429688, "logps/ref_rejected": -102.1983413696289, "logps/rejected": -110.1840591430664, "loss": 0.9953, "margin_dpo/margin_mean": 7.610535621643066, "margin_dpo/margin_std": 10.202959060668945, "step": 385 }, { "epoch": 0.5835222978080121, "fcm_dpo/beta": 0.0907883420586586, "fcm_dpo/delta": 0.205733060836792, "fcm_dpo/margin": 4.438615798950195, "fcm_dpo/q_t": 0.41605353355407715, "grad_norm": 19.5189208984375, "learning_rate": 2.2229045002474724e-07, "logits/chosen": 1.660266637802124, "logits/rejected": 1.497814655303955, "logps/chosen": -95.44474029541016, "logps/ref_chosen": -93.19975280761719, "logps/ref_rejected": -112.98831176757812, "logps/rejected": -119.67190551757812, "loss": 1.2382, "margin_dpo/margin_mean": 4.4386162757873535, "margin_dpo/margin_std": 10.870526313781738, "step": 386 }, { "epoch": 0.5850340136054422, "fcm_dpo/beta": 0.09045910835266113, "fcm_dpo/delta": -0.15201978385448456, "fcm_dpo/margin": 8.19107437133789, "fcm_dpo/q_t": 0.3515468239784241, "grad_norm": 16.042306900024414, "learning_rate": 2.209767714686924e-07, "logits/chosen": 1.5781548023223877, "logits/rejected": 1.3721168041229248, "logps/chosen": -66.01132202148438, "logps/ref_chosen": -66.32861328125, "logps/ref_rejected": -100.56486511230469, "logps/rejected": -108.43865203857422, "loss": 0.9654, "margin_dpo/margin_mean": 8.19107437133789, "margin_dpo/margin_std": 10.515932083129883, "step": 387 }, { "epoch": 0.5865457294028723, "fcm_dpo/beta": 0.09115570038557053, "fcm_dpo/delta": 0.17942151427268982, "fcm_dpo/margin": 4.701900482177734, "fcm_dpo/q_t": 0.4133910834789276, "grad_norm": 25.36449432373047, "learning_rate": 2.1966390475472954e-07, "logits/chosen": 1.275603175163269, "logits/rejected": 1.2700649499893188, "logps/chosen": -94.36763000488281, "logps/ref_chosen": -92.95967864990234, "logps/ref_rejected": -97.9437255859375, "logps/rejected": -104.05357360839844, "loss": 1.219, "margin_dpo/margin_mean": 4.701900482177734, "margin_dpo/margin_std": 10.651166915893555, "step": 388 }, { "epoch": 0.5880574452003023, "fcm_dpo/beta": 0.09211251139640808, "fcm_dpo/delta": -0.03098585084080696, "fcm_dpo/margin": 6.8290510177612305, "fcm_dpo/q_t": 0.3709871172904968, "grad_norm": 15.271201133728027, "learning_rate": 2.1835188660656265e-07, "logits/chosen": 1.538404941558838, "logits/rejected": 1.44305419921875, "logps/chosen": -76.40469360351562, "logps/ref_chosen": -76.89031982421875, "logps/ref_rejected": -93.79212951660156, "logps/rejected": -100.13555908203125, "loss": 1.0445, "margin_dpo/margin_mean": 6.829051971435547, "margin_dpo/margin_std": 10.24068832397461, "step": 389 }, { "epoch": 0.5895691609977324, "fcm_dpo/beta": 0.09146776795387268, "fcm_dpo/delta": -0.032210350036621094, "fcm_dpo/margin": 6.88944149017334, "fcm_dpo/q_t": 0.37150269746780396, "grad_norm": 33.26952362060547, "learning_rate": 2.170407537241599e-07, "logits/chosen": 1.6717298030853271, "logits/rejected": 1.545634388923645, "logps/chosen": -58.30984878540039, "logps/ref_chosen": -61.058815002441406, "logps/ref_rejected": -79.55152893066406, "logps/rejected": -83.69200134277344, "loss": 1.0366, "margin_dpo/margin_mean": 6.889441967010498, "margin_dpo/margin_std": 10.276315689086914, "step": 390 }, { "epoch": 0.5910808767951625, "fcm_dpo/beta": 0.08865350484848022, "fcm_dpo/delta": -0.2127716988325119, "fcm_dpo/margin": 8.979472160339355, "fcm_dpo/q_t": 0.34665796160697937, "grad_norm": 14.729681015014648, "learning_rate": 2.1573054278272636e-07, "logits/chosen": 1.5793694257736206, "logits/rejected": 1.4618054628372192, "logps/chosen": -77.08914947509766, "logps/ref_chosen": -78.60820770263672, "logps/ref_rejected": -103.3367691040039, "logps/rejected": -110.79718780517578, "loss": 1.0036, "margin_dpo/margin_mean": 8.979471206665039, "margin_dpo/margin_std": 12.976020812988281, "step": 391 }, { "epoch": 0.5925925925925926, "fcm_dpo/beta": 0.08490710705518723, "fcm_dpo/delta": -0.2628289759159088, "fcm_dpo/margin": 9.909902572631836, "fcm_dpo/q_t": 0.33273494243621826, "grad_norm": 13.956271171569824, "learning_rate": 2.1442129043167873e-07, "logits/chosen": 1.9578715562820435, "logits/rejected": 1.8658185005187988, "logps/chosen": -83.63600158691406, "logps/ref_chosen": -86.99468994140625, "logps/ref_rejected": -112.73616027832031, "logps/rejected": -119.2873764038086, "loss": 0.9194, "margin_dpo/margin_mean": 9.90990161895752, "margin_dpo/margin_std": 11.858428955078125, "step": 392 }, { "epoch": 0.5941043083900227, "fcm_dpo/beta": 0.08286817371845245, "fcm_dpo/delta": 0.029584839940071106, "fcm_dpo/margin": 6.895089626312256, "fcm_dpo/q_t": 0.3812785744667053, "grad_norm": 14.21916675567627, "learning_rate": 2.131130332936195e-07, "logits/chosen": 1.4827511310577393, "logits/rejected": 1.3603489398956299, "logps/chosen": -70.90792083740234, "logps/ref_chosen": -71.26398468017578, "logps/ref_rejected": -88.99722290039062, "logps/rejected": -95.5362548828125, "loss": 1.0343, "margin_dpo/margin_mean": 6.895089149475098, "margin_dpo/margin_std": 9.561573028564453, "step": 393 }, { "epoch": 0.5956160241874527, "fcm_dpo/beta": 0.08388794958591461, "fcm_dpo/delta": 0.017447378486394882, "fcm_dpo/margin": 6.955907344818115, "fcm_dpo/q_t": 0.37385329604148865, "grad_norm": 17.475746154785156, "learning_rate": 2.1180580796331323e-07, "logits/chosen": 1.7049648761749268, "logits/rejected": 1.5989840030670166, "logps/chosen": -76.82679748535156, "logps/ref_chosen": -78.70564270019531, "logps/ref_rejected": -87.01431274414062, "logps/rejected": -92.09137725830078, "loss": 1.0137, "margin_dpo/margin_mean": 6.955907821655273, "margin_dpo/margin_std": 9.090719223022461, "step": 394 }, { "epoch": 0.5971277399848829, "fcm_dpo/beta": 0.08548291027545929, "fcm_dpo/delta": 0.05162470042705536, "fcm_dpo/margin": 6.411340236663818, "fcm_dpo/q_t": 0.388310968875885, "grad_norm": 15.08697509765625, "learning_rate": 2.104996510066625e-07, "logits/chosen": 1.6531175374984741, "logits/rejected": 1.425588607788086, "logps/chosen": -63.353946685791016, "logps/ref_chosen": -65.30274963378906, "logps/ref_rejected": -93.22492218017578, "logps/rejected": -97.68746185302734, "loss": 1.0842, "margin_dpo/margin_mean": 6.41133975982666, "margin_dpo/margin_std": 10.070281028747559, "step": 395 }, { "epoch": 0.5986394557823129, "fcm_dpo/beta": 0.08400573581457138, "fcm_dpo/delta": 0.023646876215934753, "fcm_dpo/margin": 6.846286296844482, "fcm_dpo/q_t": 0.372989296913147, "grad_norm": 15.486177444458008, "learning_rate": 2.0919459895968517e-07, "logits/chosen": 1.5460519790649414, "logits/rejected": 1.3561407327651978, "logps/chosen": -65.57781982421875, "logps/ref_chosen": -67.33502197265625, "logps/ref_rejected": -98.8193359375, "logps/rejected": -103.9084243774414, "loss": 1.0156, "margin_dpo/margin_mean": 6.846286296844482, "margin_dpo/margin_std": 8.490182876586914, "step": 396 }, { "epoch": 0.600151171579743, "fcm_dpo/beta": 0.08782678842544556, "fcm_dpo/delta": 0.19592741131782532, "fcm_dpo/margin": 4.687417984008789, "fcm_dpo/q_t": 0.40758201479911804, "grad_norm": 18.059585571289062, "learning_rate": 2.078906883274924e-07, "logits/chosen": 1.7242810726165771, "logits/rejected": 1.6127660274505615, "logps/chosen": -88.16705322265625, "logps/ref_chosen": -89.6042251586914, "logps/ref_rejected": -104.9779052734375, "logps/rejected": -108.22815704345703, "loss": 1.215, "margin_dpo/margin_mean": 4.687417030334473, "margin_dpo/margin_std": 10.656184196472168, "step": 397 }, { "epoch": 0.6016628873771731, "fcm_dpo/beta": 0.08691433072090149, "fcm_dpo/delta": -0.10858240723609924, "fcm_dpo/margin": 8.061241149902344, "fcm_dpo/q_t": 0.36168110370635986, "grad_norm": 13.436088562011719, "learning_rate": 2.065879555832674e-07, "logits/chosen": 1.6729035377502441, "logits/rejected": 1.5150872468948364, "logps/chosen": -63.89373016357422, "logps/ref_chosen": -66.43465423583984, "logps/ref_rejected": -90.90376281738281, "logps/rejected": -96.42407989501953, "loss": 0.9976, "margin_dpo/margin_mean": 8.061240196228027, "margin_dpo/margin_std": 11.222570419311523, "step": 398 }, { "epoch": 0.6031746031746031, "fcm_dpo/beta": 0.08543148636817932, "fcm_dpo/delta": -0.05866962671279907, "fcm_dpo/margin": 7.65556001663208, "fcm_dpo/q_t": 0.36848974227905273, "grad_norm": 16.3436222076416, "learning_rate": 2.052864371672457e-07, "logits/chosen": 1.4369208812713623, "logits/rejected": 1.147277593612671, "logps/chosen": -87.73809814453125, "logps/ref_chosen": -87.22315979003906, "logps/ref_rejected": -136.32411193847656, "logps/rejected": -144.49459838867188, "loss": 1.022, "margin_dpo/margin_mean": 7.655560493469238, "margin_dpo/margin_std": 11.066182136535645, "step": 399 }, { "epoch": 0.6046863189720333, "fcm_dpo/beta": 0.08775937557220459, "fcm_dpo/delta": 0.19517040252685547, "fcm_dpo/margin": 4.70993709564209, "fcm_dpo/q_t": 0.41906869411468506, "grad_norm": 19.55310821533203, "learning_rate": 2.0398616948569493e-07, "logits/chosen": 1.7458031177520752, "logits/rejected": 1.553027868270874, "logps/chosen": -92.2566146850586, "logps/ref_chosen": -91.1212158203125, "logps/ref_rejected": -108.19235229492188, "logps/rejected": -114.03768920898438, "loss": 1.209, "margin_dpo/margin_mean": 4.709938049316406, "margin_dpo/margin_std": 10.708078384399414, "step": 400 }, { "epoch": 0.6046863189720333, "eval_fcm_dpo/beta": 0.08902417868375778, "eval_logits/chosen": 1.643101692199707, "eval_logits/rejected": 1.5118398666381836, "eval_logps/chosen": -85.37108612060547, "eval_logps/ref_chosen": -86.90177917480469, "eval_logps/ref_rejected": -96.69639587402344, "eval_logps/rejected": -102.01863861083984, "eval_loss": 0.5379721522331238, "eval_margin_dpo/margin_mean": 6.852944850921631, "eval_margin_dpo/margin_std": 11.100692749023438, "eval_runtime": 42.277, "eval_samples_per_second": 54.474, "eval_steps_per_second": 1.703, "step": 400 }, { "epoch": 0.6061980347694633, "fcm_dpo/beta": 0.08596399426460266, "fcm_dpo/delta": -0.2335837483406067, "fcm_dpo/margin": 9.474281311035156, "fcm_dpo/q_t": 0.3320953845977783, "grad_norm": 13.233601570129395, "learning_rate": 2.0268718890989752e-07, "logits/chosen": 1.7577645778656006, "logits/rejected": 1.5829046964645386, "logps/chosen": -64.07991027832031, "logps/ref_chosen": -67.54151153564453, "logps/ref_rejected": -98.06488800048828, "logps/rejected": -104.07756042480469, "loss": 0.9049, "margin_dpo/margin_mean": 9.474281311035156, "margin_dpo/margin_std": 10.675681114196777, "step": 401 }, { "epoch": 0.6077097505668935, "fcm_dpo/beta": 0.08430798351764679, "fcm_dpo/delta": -0.025104699656367302, "fcm_dpo/margin": 7.391202926635742, "fcm_dpo/q_t": 0.37311261892318726, "grad_norm": 22.79948616027832, "learning_rate": 2.013895317751323e-07, "logits/chosen": 1.424646258354187, "logits/rejected": 1.3981235027313232, "logps/chosen": -74.81704711914062, "logps/ref_chosen": -77.44487762451172, "logps/ref_rejected": -83.1333236694336, "logps/rejected": -87.89669799804688, "loss": 1.0328, "margin_dpo/margin_mean": 7.391202926635742, "margin_dpo/margin_std": 10.843732833862305, "step": 402 }, { "epoch": 0.6092214663643235, "fcm_dpo/beta": 0.08236850798130035, "fcm_dpo/delta": -0.2032555490732193, "fcm_dpo/margin": 9.562971115112305, "fcm_dpo/q_t": 0.3458006978034973, "grad_norm": 14.288395881652832, "learning_rate": 2.0009323437965898e-07, "logits/chosen": 1.732656478881836, "logits/rejected": 1.5398776531219482, "logps/chosen": -67.0589599609375, "logps/ref_chosen": -68.8230972290039, "logps/ref_rejected": -99.82356262207031, "logps/rejected": -107.62240600585938, "loss": 0.9865, "margin_dpo/margin_mean": 9.562971115112305, "margin_dpo/margin_std": 13.16672420501709, "step": 403 }, { "epoch": 0.6107331821617535, "fcm_dpo/beta": 0.07909810543060303, "fcm_dpo/delta": -0.09509045630693436, "fcm_dpo/margin": 8.658452033996582, "fcm_dpo/q_t": 0.36028921604156494, "grad_norm": 14.61069393157959, "learning_rate": 1.9879833298370237e-07, "logits/chosen": 1.521719217300415, "logits/rejected": 1.3546950817108154, "logps/chosen": -77.83849334716797, "logps/ref_chosen": -80.26783752441406, "logps/ref_rejected": -111.60258483886719, "logps/rejected": -117.8316879272461, "loss": 0.9825, "margin_dpo/margin_mean": 8.658452033996582, "margin_dpo/margin_std": 10.987064361572266, "step": 404 }, { "epoch": 0.6122448979591837, "fcm_dpo/beta": 0.0779149979352951, "fcm_dpo/delta": -0.10941280424594879, "fcm_dpo/margin": 6.381047248840332, "fcm_dpo/q_t": 0.39759546518325806, "grad_norm": 13.850225448608398, "learning_rate": 1.975048638084379e-07, "logits/chosen": 1.3373526334762573, "logits/rejected": 1.2223901748657227, "logps/chosen": -66.5140151977539, "logps/ref_chosen": -68.31065368652344, "logps/ref_rejected": -81.56044006347656, "logps/rejected": -86.14483642578125, "loss": 1.1024, "margin_dpo/margin_mean": 6.381047248840332, "margin_dpo/margin_std": 10.001663208007812, "step": 405 }, { "epoch": 0.6137566137566137, "fcm_dpo/beta": 0.07638435065746307, "fcm_dpo/delta": -0.11889545619487762, "fcm_dpo/margin": 9.296536445617676, "fcm_dpo/q_t": 0.3526379466056824, "grad_norm": 13.350903511047363, "learning_rate": 1.9621286303497914e-07, "logits/chosen": 1.6963818073272705, "logits/rejected": 1.3465378284454346, "logps/chosen": -62.2177619934082, "logps/ref_chosen": -64.86714935302734, "logps/ref_rejected": -110.06051635742188, "logps/rejected": -116.70765686035156, "loss": 0.9653, "margin_dpo/margin_mean": 9.296536445617676, "margin_dpo/margin_std": 11.602973937988281, "step": 406 }, { "epoch": 0.6152683295540439, "fcm_dpo/beta": 0.07776181399822235, "fcm_dpo/delta": 0.11209922283887863, "fcm_dpo/margin": 6.331610202789307, "fcm_dpo/q_t": 0.3990859389305115, "grad_norm": 18.031661987304688, "learning_rate": 1.9492236680336483e-07, "logits/chosen": 1.3154183626174927, "logits/rejected": 1.1056073904037476, "logps/chosen": -104.64439392089844, "logps/ref_chosen": -102.01712799072266, "logps/ref_rejected": -121.53548431396484, "logps/rejected": -130.49435424804688, "loss": 1.1031, "margin_dpo/margin_mean": 6.331610679626465, "margin_dpo/margin_std": 10.628255844116211, "step": 407 }, { "epoch": 0.6167800453514739, "fcm_dpo/beta": 0.07716310024261475, "fcm_dpo/delta": -0.11025048792362213, "fcm_dpo/margin": 9.09909439086914, "fcm_dpo/q_t": 0.35395896434783936, "grad_norm": 12.185460090637207, "learning_rate": 1.9363341121154895e-07, "logits/chosen": 1.5756661891937256, "logits/rejected": 1.3888509273529053, "logps/chosen": -71.03925323486328, "logps/ref_chosen": -72.77989959716797, "logps/ref_rejected": -92.01815795898438, "logps/rejected": -99.37660217285156, "loss": 0.9483, "margin_dpo/margin_mean": 9.09909439086914, "margin_dpo/margin_std": 10.556812286376953, "step": 408 }, { "epoch": 0.618291761148904, "fcm_dpo/beta": 0.07857310026884079, "fcm_dpo/delta": 0.23402190208435059, "fcm_dpo/margin": 4.776151657104492, "fcm_dpo/q_t": 0.4284062385559082, "grad_norm": 14.054645538330078, "learning_rate": 1.9234603231438994e-07, "logits/chosen": 1.5529547929763794, "logits/rejected": 1.55470609664917, "logps/chosen": -79.22383117675781, "logps/ref_chosen": -77.7901611328125, "logps/ref_rejected": -79.2997055053711, "logps/rejected": -85.509521484375, "loss": 1.2311, "margin_dpo/margin_mean": 4.776151657104492, "margin_dpo/margin_std": 11.638947486877441, "step": 409 }, { "epoch": 0.6198034769463341, "fcm_dpo/beta": 0.07810753583908081, "fcm_dpo/delta": -0.09311722218990326, "fcm_dpo/margin": 8.771734237670898, "fcm_dpo/q_t": 0.35872140526771545, "grad_norm": 14.657498359680176, "learning_rate": 1.9106026612264315e-07, "logits/chosen": 1.351644515991211, "logits/rejected": 1.2707946300506592, "logps/chosen": -80.2009506225586, "logps/ref_chosen": -80.35844421386719, "logps/ref_rejected": -92.19056701660156, "logps/rejected": -100.80480194091797, "loss": 0.9973, "margin_dpo/margin_mean": 8.771734237670898, "margin_dpo/margin_std": 11.75565242767334, "step": 410 }, { "epoch": 0.6213151927437641, "fcm_dpo/beta": 0.0773276686668396, "fcm_dpo/delta": -0.09193338453769684, "fcm_dpo/margin": 8.867551803588867, "fcm_dpo/q_t": 0.36154991388320923, "grad_norm": 22.89755630493164, "learning_rate": 1.8977614860195296e-07, "logits/chosen": 1.677168369293213, "logits/rejected": 1.54649019241333, "logps/chosen": -70.70728302001953, "logps/ref_chosen": -70.72857666015625, "logps/ref_rejected": -93.19204711914062, "logps/rejected": -102.03831481933594, "loss": 0.9948, "margin_dpo/margin_mean": 8.867551803588867, "margin_dpo/margin_std": 12.079263687133789, "step": 411 }, { "epoch": 0.6228269085411943, "fcm_dpo/beta": 0.07649530470371246, "fcm_dpo/delta": 0.018762707710266113, "fcm_dpo/margin": 7.599701881408691, "fcm_dpo/q_t": 0.381513774394989, "grad_norm": 16.80459976196289, "learning_rate": 1.8849371567184662e-07, "logits/chosen": 1.7272131443023682, "logits/rejected": 1.6005456447601318, "logps/chosen": -74.96763610839844, "logps/ref_chosen": -72.87568664550781, "logps/ref_rejected": -88.21068572998047, "logps/rejected": -97.90233612060547, "loss": 1.0506, "margin_dpo/margin_mean": 7.599700927734375, "margin_dpo/margin_std": 11.350024223327637, "step": 412 }, { "epoch": 0.6243386243386243, "fcm_dpo/beta": 0.07793605327606201, "fcm_dpo/delta": 0.07105319201946259, "fcm_dpo/margin": 6.835833549499512, "fcm_dpo/q_t": 0.39478057622909546, "grad_norm": 16.19317054748535, "learning_rate": 1.872130032047302e-07, "logits/chosen": 1.1888175010681152, "logits/rejected": 1.079815149307251, "logps/chosen": -87.0733642578125, "logps/ref_chosen": -84.70051574707031, "logps/ref_rejected": -92.06742095947266, "logps/rejected": -101.27610778808594, "loss": 1.1479, "margin_dpo/margin_mean": 6.835833549499512, "margin_dpo/margin_std": 13.243301391601562, "step": 413 }, { "epoch": 0.6258503401360545, "fcm_dpo/beta": 0.07741403579711914, "fcm_dpo/delta": -0.06040637195110321, "fcm_dpo/margin": 8.477437019348145, "fcm_dpo/q_t": 0.3683781325817108, "grad_norm": 13.204545021057129, "learning_rate": 1.8593404702488436e-07, "logits/chosen": 1.5923712253570557, "logits/rejected": 1.4576035737991333, "logps/chosen": -73.57768249511719, "logps/ref_chosen": -70.97660827636719, "logps/ref_rejected": -92.90523529052734, "logps/rejected": -103.9837417602539, "loss": 1.0228, "margin_dpo/margin_mean": 8.477436065673828, "margin_dpo/margin_std": 12.279195785522461, "step": 414 }, { "epoch": 0.6273620559334845, "fcm_dpo/beta": 0.07709582149982452, "fcm_dpo/delta": -0.006522274576127529, "fcm_dpo/margin": 7.860967636108398, "fcm_dpo/q_t": 0.3789500594139099, "grad_norm": 15.471349716186523, "learning_rate": 1.846568829074628e-07, "logits/chosen": 1.4484164714813232, "logits/rejected": 1.361172080039978, "logps/chosen": -74.24467468261719, "logps/ref_chosen": -71.7189712524414, "logps/ref_rejected": -74.54219818115234, "logps/rejected": -84.92887878417969, "loss": 1.1216, "margin_dpo/margin_mean": 7.860968112945557, "margin_dpo/margin_std": 14.171285629272461, "step": 415 }, { "epoch": 0.6288737717309146, "fcm_dpo/beta": 0.0767393410205841, "fcm_dpo/delta": -0.06185510754585266, "fcm_dpo/margin": 5.816371440887451, "fcm_dpo/q_t": 0.41204434633255005, "grad_norm": 15.024466514587402, "learning_rate": 1.8338154657749128e-07, "logits/chosen": 1.430651307106018, "logits/rejected": 1.2943285703659058, "logps/chosen": -76.21292114257812, "logps/ref_chosen": -72.88249206542969, "logps/ref_rejected": -85.30693054199219, "logps/rejected": -94.45372009277344, "loss": 1.2034, "margin_dpo/margin_mean": 5.816370964050293, "margin_dpo/margin_std": 12.529787063598633, "step": 416 }, { "epoch": 0.6303854875283447, "fcm_dpo/beta": 0.0747881531715393, "fcm_dpo/delta": -0.18942071497440338, "fcm_dpo/margin": 10.360841751098633, "fcm_dpo/q_t": 0.3438907861709595, "grad_norm": 13.735955238342285, "learning_rate": 1.8210807370886849e-07, "logits/chosen": 1.8037786483764648, "logits/rejected": 1.6183760166168213, "logps/chosen": -75.36857604980469, "logps/ref_chosen": -72.49703216552734, "logps/ref_rejected": -89.38966369628906, "logps/rejected": -102.62205505371094, "loss": 0.9679, "margin_dpo/margin_mean": 10.360841751098633, "margin_dpo/margin_std": 13.366008758544922, "step": 417 }, { "epoch": 0.6318972033257747, "fcm_dpo/beta": 0.07399855554103851, "fcm_dpo/delta": 0.03859926387667656, "fcm_dpo/margin": 4.893869876861572, "fcm_dpo/q_t": 0.42707228660583496, "grad_norm": 16.023584365844727, "learning_rate": 1.8083649992336825e-07, "logits/chosen": 1.6475262641906738, "logits/rejected": 1.6260570287704468, "logps/chosen": -95.30635070800781, "logps/ref_chosen": -89.70926666259766, "logps/ref_rejected": -90.98756408691406, "logps/rejected": -101.478515625, "loss": 1.2382, "margin_dpo/margin_mean": 4.893869400024414, "margin_dpo/margin_std": 11.968032836914062, "step": 418 }, { "epoch": 0.6334089191232048, "fcm_dpo/beta": 0.0715949535369873, "fcm_dpo/delta": -0.1930314600467682, "fcm_dpo/margin": 10.8507080078125, "fcm_dpo/q_t": 0.3386869430541992, "grad_norm": 12.689598083496094, "learning_rate": 1.7956686078964255e-07, "logits/chosen": 1.2596359252929688, "logits/rejected": 1.0992696285247803, "logps/chosen": -75.74531555175781, "logps/ref_chosen": -75.652099609375, "logps/ref_rejected": -91.0013427734375, "logps/rejected": -101.94527435302734, "loss": 0.9125, "margin_dpo/margin_mean": 10.8507080078125, "margin_dpo/margin_std": 12.108956336975098, "step": 419 }, { "epoch": 0.6349206349206349, "fcm_dpo/beta": 0.07248981297016144, "fcm_dpo/delta": 0.14235371351242065, "fcm_dpo/margin": 6.4076995849609375, "fcm_dpo/q_t": 0.4088389575481415, "grad_norm": 14.2985258102417, "learning_rate": 1.782991918222275e-07, "logits/chosen": 1.4011223316192627, "logits/rejected": 1.2798104286193848, "logps/chosen": -77.19985961914062, "logps/ref_chosen": -72.58027648925781, "logps/ref_rejected": -79.90303802490234, "logps/rejected": -90.93031311035156, "loss": 1.2141, "margin_dpo/margin_mean": 6.4076995849609375, "margin_dpo/margin_std": 14.4735107421875, "step": 420 }, { "epoch": 0.636432350718065, "fcm_dpo/beta": 0.0741676315665245, "fcm_dpo/delta": 0.0948985144495964, "fcm_dpo/margin": 6.876837253570557, "fcm_dpo/q_t": 0.39845705032348633, "grad_norm": 14.456829071044922, "learning_rate": 1.7703352848054887e-07, "logits/chosen": 1.5099852085113525, "logits/rejected": 1.2740275859832764, "logps/chosen": -82.48133087158203, "logps/ref_chosen": -78.71546936035156, "logps/ref_rejected": -90.82321166992188, "logps/rejected": -101.46591186523438, "loss": 1.2044, "margin_dpo/margin_mean": 6.876836776733398, "margin_dpo/margin_std": 14.906235694885254, "step": 421 }, { "epoch": 0.6379440665154951, "fcm_dpo/beta": 0.0753115639090538, "fcm_dpo/delta": 0.041089512407779694, "fcm_dpo/margin": 7.448758125305176, "fcm_dpo/q_t": 0.3887425363063812, "grad_norm": 15.896506309509277, "learning_rate": 1.7576990616793137e-07, "logits/chosen": 1.6806402206420898, "logits/rejected": 1.6230077743530273, "logps/chosen": -89.55979919433594, "logps/ref_chosen": -86.74519348144531, "logps/ref_rejected": -94.02015686035156, "logps/rejected": -104.28353881835938, "loss": 1.0896, "margin_dpo/margin_mean": 7.448757171630859, "margin_dpo/margin_std": 12.532878875732422, "step": 422 }, { "epoch": 0.6394557823129252, "fcm_dpo/beta": 0.07428386062383652, "fcm_dpo/delta": -0.048137813806533813, "fcm_dpo/margin": 8.670099258422852, "fcm_dpo/q_t": 0.3703242540359497, "grad_norm": 13.458715438842773, "learning_rate": 1.745083602306071e-07, "logits/chosen": 1.7160992622375488, "logits/rejected": 1.5093460083007812, "logps/chosen": -75.56707763671875, "logps/ref_chosen": -72.02232360839844, "logps/ref_rejected": -93.26976776123047, "logps/rejected": -105.484619140625, "loss": 1.0292, "margin_dpo/margin_mean": 8.670099258422852, "margin_dpo/margin_std": 12.686551094055176, "step": 423 }, { "epoch": 0.6409674981103552, "fcm_dpo/beta": 0.07354743778705597, "fcm_dpo/delta": -0.09381558746099472, "fcm_dpo/margin": 9.346273422241211, "fcm_dpo/q_t": 0.358426570892334, "grad_norm": 14.17031192779541, "learning_rate": 1.7324892595672804e-07, "logits/chosen": 1.158247470855713, "logits/rejected": 1.0918021202087402, "logps/chosen": -72.14212799072266, "logps/ref_chosen": -68.22148132324219, "logps/ref_rejected": -94.12411499023438, "logps/rejected": -107.39103698730469, "loss": 0.9896, "margin_dpo/margin_mean": 9.346274375915527, "margin_dpo/margin_std": 12.390763282775879, "step": 424 }, { "epoch": 0.6424792139077853, "fcm_dpo/beta": 0.07256484031677246, "fcm_dpo/delta": -0.08450430631637573, "fcm_dpo/margin": 9.353759765625, "fcm_dpo/q_t": 0.3590080738067627, "grad_norm": 13.15410041809082, "learning_rate": 1.7199163857537824e-07, "logits/chosen": 1.5787358283996582, "logits/rejected": 1.5033378601074219, "logps/chosen": -78.48164367675781, "logps/ref_chosen": -75.90104675292969, "logps/ref_rejected": -86.08673095703125, "logps/rejected": -98.02108764648438, "loss": 0.9676, "margin_dpo/margin_mean": 9.353760719299316, "margin_dpo/margin_std": 11.51694107055664, "step": 425 }, { "epoch": 0.6439909297052154, "fcm_dpo/beta": 0.07429321110248566, "fcm_dpo/delta": 0.23680397868156433, "fcm_dpo/margin": 5.014549732208252, "fcm_dpo/q_t": 0.43028032779693604, "grad_norm": 19.14048957824707, "learning_rate": 1.7073653325558828e-07, "logits/chosen": 1.6519157886505127, "logits/rejected": 1.637803316116333, "logps/chosen": -95.85223388671875, "logps/ref_chosen": -89.93118286132812, "logps/ref_rejected": -91.04658508300781, "logps/rejected": -101.98219299316406, "loss": 1.2953, "margin_dpo/margin_mean": 5.014549732208252, "margin_dpo/margin_std": 14.537099838256836, "step": 426 }, { "epoch": 0.6455026455026455, "fcm_dpo/beta": 0.07508181780576706, "fcm_dpo/delta": -0.03407387062907219, "fcm_dpo/margin": 8.416566848754883, "fcm_dpo/q_t": 0.37357228994369507, "grad_norm": 14.374457359313965, "learning_rate": 1.6948364510535218e-07, "logits/chosen": 1.7039687633514404, "logits/rejected": 1.5296571254730225, "logps/chosen": -82.45877838134766, "logps/ref_chosen": -77.83393859863281, "logps/ref_rejected": -98.69864654541016, "logps/rejected": -111.74006652832031, "loss": 1.0382, "margin_dpo/margin_mean": 8.416566848754883, "margin_dpo/margin_std": 12.733621597290039, "step": 427 }, { "epoch": 0.6470143613000756, "fcm_dpo/beta": 0.07478933781385422, "fcm_dpo/delta": -0.026344936341047287, "fcm_dpo/margin": 8.352245330810547, "fcm_dpo/q_t": 0.3767045736312866, "grad_norm": 15.099655151367188, "learning_rate": 1.6823300917064458e-07, "logits/chosen": 1.3732352256774902, "logits/rejected": 1.3193600177764893, "logps/chosen": -95.37033081054688, "logps/ref_chosen": -90.3450927734375, "logps/ref_rejected": -100.24185180664062, "logps/rejected": -113.61933135986328, "loss": 1.0725, "margin_dpo/margin_mean": 8.352245330810547, "margin_dpo/margin_std": 13.533937454223633, "step": 428 }, { "epoch": 0.6485260770975056, "fcm_dpo/beta": 0.07440754771232605, "fcm_dpo/delta": -0.06462083011865616, "fcm_dpo/margin": 8.864797592163086, "fcm_dpo/q_t": 0.3679465055465698, "grad_norm": 15.361631393432617, "learning_rate": 1.669846604344412e-07, "logits/chosen": 1.2320432662963867, "logits/rejected": 1.2528884410858154, "logps/chosen": -83.45864868164062, "logps/ref_chosen": -78.24811553955078, "logps/ref_rejected": -75.24495697021484, "logps/rejected": -89.32029724121094, "loss": 1.0975, "margin_dpo/margin_mean": 8.864797592163086, "margin_dpo/margin_std": 14.934419631958008, "step": 429 }, { "epoch": 0.6500377928949358, "fcm_dpo/beta": 0.07186997681856155, "fcm_dpo/delta": -0.13423848152160645, "fcm_dpo/margin": 10.071898460388184, "fcm_dpo/q_t": 0.3517456650733948, "grad_norm": 13.017266273498535, "learning_rate": 1.6573863381573954e-07, "logits/chosen": 1.2743966579437256, "logits/rejected": 1.2923517227172852, "logps/chosen": -79.84930419921875, "logps/ref_chosen": -76.08027648925781, "logps/ref_rejected": -84.09554290771484, "logps/rejected": -97.93647766113281, "loss": 1.0004, "margin_dpo/margin_mean": 10.0718994140625, "margin_dpo/margin_std": 13.884342193603516, "step": 430 }, { "epoch": 0.6515495086923658, "fcm_dpo/beta": 0.07156576216220856, "fcm_dpo/delta": -0.024212071672081947, "fcm_dpo/margin": 8.699821472167969, "fcm_dpo/q_t": 0.3760807514190674, "grad_norm": 13.218536376953125, "learning_rate": 1.6449496416858282e-07, "logits/chosen": 1.4648240804672241, "logits/rejected": 1.3462178707122803, "logps/chosen": -69.23052978515625, "logps/ref_chosen": -66.88581085205078, "logps/ref_rejected": -89.56040954589844, "logps/rejected": -100.60494232177734, "loss": 1.0911, "margin_dpo/margin_mean": 8.699821472167969, "margin_dpo/margin_std": 14.66958236694336, "step": 431 }, { "epoch": 0.6530612244897959, "fcm_dpo/beta": 0.07108249515295029, "fcm_dpo/delta": 0.0022036749869585037, "fcm_dpo/margin": 8.409427642822266, "fcm_dpo/q_t": 0.3838854730129242, "grad_norm": 14.229828834533691, "learning_rate": 1.632536862810844e-07, "logits/chosen": 1.4838778972625732, "logits/rejected": 1.3657793998718262, "logps/chosen": -83.34407043457031, "logps/ref_chosen": -79.65066528320312, "logps/ref_rejected": -103.92634582519531, "logps/rejected": -116.02919006347656, "loss": 1.1148, "margin_dpo/margin_mean": 8.409428596496582, "margin_dpo/margin_std": 15.037290573120117, "step": 432 }, { "epoch": 0.654572940287226, "fcm_dpo/beta": 0.0698608011007309, "fcm_dpo/delta": -0.19808723032474518, "fcm_dpo/margin": 11.203939437866211, "fcm_dpo/q_t": 0.3509420156478882, "grad_norm": 12.846253395080566, "learning_rate": 1.6201483487445515e-07, "logits/chosen": 1.7028049230575562, "logits/rejected": 1.6700106859207153, "logps/chosen": -80.68118286132812, "logps/ref_chosen": -77.30774688720703, "logps/ref_rejected": -81.65180206298828, "logps/rejected": -96.22918701171875, "loss": 1.0107, "margin_dpo/margin_mean": 11.203940391540527, "margin_dpo/margin_std": 16.126134872436523, "step": 433 }, { "epoch": 0.656084656084656, "fcm_dpo/beta": 0.06696303188800812, "fcm_dpo/delta": -0.09753292053937912, "fcm_dpo/margin": 10.282703399658203, "fcm_dpo/q_t": 0.36050257086753845, "grad_norm": 11.524694442749023, "learning_rate": 1.6077844460203204e-07, "logits/chosen": 1.6322749853134155, "logits/rejected": 1.4705924987792969, "logps/chosen": -64.55253601074219, "logps/ref_chosen": -63.31850051879883, "logps/ref_rejected": -89.15093994140625, "logps/rejected": -100.66767883300781, "loss": 1.022, "margin_dpo/margin_mean": 10.282703399658203, "margin_dpo/margin_std": 14.64346694946289, "step": 434 }, { "epoch": 0.6575963718820862, "fcm_dpo/beta": 0.0687536746263504, "fcm_dpo/delta": 0.11254524439573288, "fcm_dpo/margin": 7.150820732116699, "fcm_dpo/q_t": 0.39818453788757324, "grad_norm": 13.866617202758789, "learning_rate": 1.5954455004830878e-07, "logits/chosen": 1.7764880657196045, "logits/rejected": 1.69718337059021, "logps/chosen": -75.79131317138672, "logps/ref_chosen": -71.1719741821289, "logps/ref_rejected": -86.42095184326172, "logps/rejected": -98.19110870361328, "loss": 1.1273, "margin_dpo/margin_mean": 7.150820732116699, "margin_dpo/margin_std": 12.781055450439453, "step": 435 }, { "epoch": 0.6591080876795162, "fcm_dpo/beta": 0.06901911646127701, "fcm_dpo/delta": -0.016552124172449112, "fcm_dpo/margin": 8.907886505126953, "fcm_dpo/q_t": 0.3748345375061035, "grad_norm": 12.772109985351562, "learning_rate": 1.5831318572796847e-07, "logits/chosen": 1.5760531425476074, "logits/rejected": 1.4302666187286377, "logps/chosen": -77.02301025390625, "logps/ref_chosen": -74.45087432861328, "logps/ref_rejected": -86.01708984375, "logps/rejected": -97.49711608886719, "loss": 1.0546, "margin_dpo/margin_mean": 8.907885551452637, "margin_dpo/margin_std": 13.655805587768555, "step": 436 }, { "epoch": 0.6606198034769464, "fcm_dpo/beta": 0.06862768530845642, "fcm_dpo/delta": 0.08339989930391312, "fcm_dpo/margin": 7.569982528686523, "fcm_dpo/q_t": 0.40054136514663696, "grad_norm": 14.294130325317383, "learning_rate": 1.5708438608491815e-07, "logits/chosen": 1.5998663902282715, "logits/rejected": 1.2967803478240967, "logps/chosen": -78.01884460449219, "logps/ref_chosen": -72.38907623291016, "logps/ref_rejected": -111.03279876708984, "logps/rejected": -124.2325439453125, "loss": 1.1775, "margin_dpo/margin_mean": 7.569982528686523, "margin_dpo/margin_std": 15.420955657958984, "step": 437 }, { "epoch": 0.6621315192743764, "fcm_dpo/beta": 0.0692143589258194, "fcm_dpo/delta": -0.11934801936149597, "fcm_dpo/margin": 10.254268646240234, "fcm_dpo/q_t": 0.3581221103668213, "grad_norm": 12.05996322631836, "learning_rate": 1.558581854913253e-07, "logits/chosen": 1.4713064432144165, "logits/rejected": 1.3395293951034546, "logps/chosen": -60.1005859375, "logps/ref_chosen": -57.27682876586914, "logps/ref_rejected": -83.07940673828125, "logps/rejected": -96.15742492675781, "loss": 1.0149, "margin_dpo/margin_mean": 10.25426959991455, "margin_dpo/margin_std": 14.174232482910156, "step": 438 }, { "epoch": 0.6636432350718064, "fcm_dpo/beta": 0.0677412897348404, "fcm_dpo/delta": -0.08142800629138947, "fcm_dpo/margin": 9.969182968139648, "fcm_dpo/q_t": 0.36517322063446045, "grad_norm": 13.264842987060547, "learning_rate": 1.5463461824665658e-07, "logits/chosen": 1.4709299802780151, "logits/rejected": 1.3555489778518677, "logps/chosen": -101.94784545898438, "logps/ref_chosen": -98.35890197753906, "logps/ref_rejected": -112.69817352294922, "logps/rejected": -126.25629425048828, "loss": 1.0005, "margin_dpo/margin_mean": 9.969182968139648, "margin_dpo/margin_std": 13.567419052124023, "step": 439 }, { "epoch": 0.6651549508692366, "fcm_dpo/beta": 0.0649486556649208, "fcm_dpo/delta": -0.22526824474334717, "fcm_dpo/margin": 12.433676719665527, "fcm_dpo/q_t": 0.33364030718803406, "grad_norm": 12.849279403686523, "learning_rate": 1.534137185767178e-07, "logits/chosen": 1.199397087097168, "logits/rejected": 0.9434144496917725, "logps/chosen": -62.47209930419922, "logps/ref_chosen": -61.662452697753906, "logps/ref_rejected": -86.81646728515625, "logps/rejected": -100.0597915649414, "loss": 0.892, "margin_dpo/margin_mean": 12.433677673339844, "margin_dpo/margin_std": 13.436738967895508, "step": 440 }, { "epoch": 0.6666666666666666, "fcm_dpo/beta": 0.06319974362850189, "fcm_dpo/delta": -0.0502944216132164, "fcm_dpo/margin": 10.227783203125, "fcm_dpo/q_t": 0.364610493183136, "grad_norm": 13.985926628112793, "learning_rate": 1.521955206326976e-07, "logits/chosen": 1.3294029235839844, "logits/rejected": 1.0824699401855469, "logps/chosen": -75.17243957519531, "logps/ref_chosen": -74.33235168457031, "logps/ref_rejected": -99.654541015625, "logps/rejected": -110.72241973876953, "loss": 0.9857, "margin_dpo/margin_mean": 10.227784156799316, "margin_dpo/margin_std": 12.865455627441406, "step": 441 }, { "epoch": 0.6681783824640968, "fcm_dpo/beta": 0.06394974142313004, "fcm_dpo/delta": 0.053972695022821426, "fcm_dpo/margin": 8.582521438598633, "fcm_dpo/q_t": 0.38399142026901245, "grad_norm": 12.952445983886719, "learning_rate": 1.5098005849021078e-07, "logits/chosen": 1.4604705572128296, "logits/rejected": 1.3838759660720825, "logps/chosen": -88.58601379394531, "logps/ref_chosen": -82.42591857910156, "logps/ref_rejected": -106.71090698242188, "logps/rejected": -121.45352172851562, "loss": 1.0601, "margin_dpo/margin_mean": 8.582521438598633, "margin_dpo/margin_std": 12.810592651367188, "step": 442 }, { "epoch": 0.6696900982615268, "fcm_dpo/beta": 0.06243997812271118, "fcm_dpo/delta": -0.13223691284656525, "fcm_dpo/margin": 11.554607391357422, "fcm_dpo/q_t": 0.3588990867137909, "grad_norm": 11.408082008361816, "learning_rate": 1.4976736614834662e-07, "logits/chosen": 1.7185375690460205, "logits/rejected": 1.536795735359192, "logps/chosen": -76.07586669921875, "logps/ref_chosen": -72.87019348144531, "logps/ref_rejected": -94.48143005371094, "logps/rejected": -109.24171447753906, "loss": 1.008, "margin_dpo/margin_mean": 11.554609298706055, "margin_dpo/margin_std": 16.71849822998047, "step": 443 }, { "epoch": 0.671201814058957, "fcm_dpo/beta": 0.06339798122644424, "fcm_dpo/delta": 0.08339248597621918, "fcm_dpo/margin": 4.655009746551514, "fcm_dpo/q_t": 0.4389148950576782, "grad_norm": 16.32600975036621, "learning_rate": 1.4855747752871654e-07, "logits/chosen": 1.699981689453125, "logits/rejected": 1.4776818752288818, "logps/chosen": -80.7209701538086, "logps/ref_chosen": -74.650390625, "logps/ref_rejected": -106.89204406738281, "logps/rejected": -117.61763000488281, "loss": 1.2909, "margin_dpo/margin_mean": 4.655010223388672, "margin_dpo/margin_std": 14.046520233154297, "step": 444 }, { "epoch": 0.672713529856387, "fcm_dpo/beta": 0.06215813755989075, "fcm_dpo/delta": -0.12267709523439407, "fcm_dpo/margin": 11.481884002685547, "fcm_dpo/q_t": 0.3535075783729553, "grad_norm": 14.066444396972656, "learning_rate": 1.473504264745062e-07, "logits/chosen": 1.548810362815857, "logits/rejected": 1.5138942003250122, "logps/chosen": -81.1192855834961, "logps/ref_chosen": -76.26957702636719, "logps/ref_rejected": -89.84994506835938, "logps/rejected": -106.1815414428711, "loss": 0.9811, "margin_dpo/margin_mean": 11.481884002685547, "margin_dpo/margin_std": 15.008056640625, "step": 445 }, { "epoch": 0.674225245653817, "fcm_dpo/beta": 0.061203934252262115, "fcm_dpo/delta": -0.18258565664291382, "fcm_dpo/margin": 12.521493911743164, "fcm_dpo/q_t": 0.3369706869125366, "grad_norm": 11.436417579650879, "learning_rate": 1.461462467495284e-07, "logits/chosen": 1.613840103149414, "logits/rejected": 1.4555943012237549, "logps/chosen": -64.69538879394531, "logps/ref_chosen": -62.74647903442383, "logps/ref_rejected": -86.395751953125, "logps/rejected": -100.86616516113281, "loss": 0.8931, "margin_dpo/margin_mean": 12.521492958068848, "margin_dpo/margin_std": 11.73055648803711, "step": 446 }, { "epoch": 0.6757369614512472, "fcm_dpo/beta": 0.05934043973684311, "fcm_dpo/delta": -0.04737187922000885, "fcm_dpo/margin": 10.858245849609375, "fcm_dpo/q_t": 0.36491459608078003, "grad_norm": 15.227911949157715, "learning_rate": 1.4494497203727843e-07, "logits/chosen": 1.104290246963501, "logits/rejected": 0.837737500667572, "logps/chosen": -72.19987487792969, "logps/ref_chosen": -71.06666564941406, "logps/ref_rejected": -103.57111358642578, "logps/rejected": -115.56256103515625, "loss": 1.0167, "margin_dpo/margin_mean": 10.858245849609375, "margin_dpo/margin_std": 15.037179946899414, "step": 447 }, { "epoch": 0.6772486772486772, "fcm_dpo/beta": 0.05986708775162697, "fcm_dpo/delta": 0.056214213371276855, "fcm_dpo/margin": 9.122482299804688, "fcm_dpo/q_t": 0.3877629041671753, "grad_norm": 11.285871505737305, "learning_rate": 1.4374663593999256e-07, "logits/chosen": 1.5320073366165161, "logits/rejected": 1.4336310625076294, "logps/chosen": -78.13660430908203, "logps/ref_chosen": -73.400146484375, "logps/ref_rejected": -96.34330749511719, "logps/rejected": -110.20223999023438, "loss": 1.0652, "margin_dpo/margin_mean": 9.122482299804688, "margin_dpo/margin_std": 13.94580078125, "step": 448 }, { "epoch": 0.6787603930461074, "fcm_dpo/beta": 0.06041814386844635, "fcm_dpo/delta": 0.055707208812236786, "fcm_dpo/margin": 5.106272220611572, "fcm_dpo/q_t": 0.4346545934677124, "grad_norm": 17.820480346679688, "learning_rate": 1.4255127197770707e-07, "logits/chosen": 1.1485610008239746, "logits/rejected": 1.1482468843460083, "logps/chosen": -100.8125991821289, "logps/ref_chosen": -93.66099548339844, "logps/ref_rejected": -102.53019714355469, "logps/rejected": -114.78807067871094, "loss": 1.2859, "margin_dpo/margin_mean": 5.1062726974487305, "margin_dpo/margin_std": 14.768918991088867, "step": 449 }, { "epoch": 0.6802721088435374, "fcm_dpo/beta": 0.06103084981441498, "fcm_dpo/delta": 0.07125148177146912, "fcm_dpo/margin": 8.726282119750977, "fcm_dpo/q_t": 0.3890073895454407, "grad_norm": 11.10236644744873, "learning_rate": 1.4135891358732205e-07, "logits/chosen": 1.480630874633789, "logits/rejected": 1.1958320140838623, "logps/chosen": -64.80349731445312, "logps/ref_chosen": -62.52460479736328, "logps/ref_rejected": -94.04986572265625, "logps/rejected": -105.05503845214844, "loss": 1.0907, "margin_dpo/margin_mean": 8.726282119750977, "margin_dpo/margin_std": 14.36546516418457, "step": 450 }, { "epoch": 0.6817838246409675, "fcm_dpo/beta": 0.06261729449033737, "fcm_dpo/delta": 0.13213106989860535, "fcm_dpo/margin": 7.567141056060791, "fcm_dpo/q_t": 0.40211862325668335, "grad_norm": 12.100115776062012, "learning_rate": 1.4016959412166437e-07, "logits/chosen": 1.2561284303665161, "logits/rejected": 1.1738208532333374, "logps/chosen": -82.48243713378906, "logps/ref_chosen": -79.14009094238281, "logps/ref_rejected": -93.23919677734375, "logps/rejected": -104.14868927001953, "loss": 1.124, "margin_dpo/margin_mean": 7.567141056060791, "margin_dpo/margin_std": 13.501859664916992, "step": 451 }, { "epoch": 0.6832955404383976, "fcm_dpo/beta": 0.0629560723900795, "fcm_dpo/delta": -0.004285541363060474, "fcm_dpo/margin": 9.594108581542969, "fcm_dpo/q_t": 0.3788967430591583, "grad_norm": 12.963932037353516, "learning_rate": 1.3898334684855645e-07, "logits/chosen": 1.3859320878982544, "logits/rejected": 1.1935582160949707, "logps/chosen": -74.30093383789062, "logps/ref_chosen": -70.38827514648438, "logps/ref_rejected": -95.47691345214844, "logps/rejected": -108.98368835449219, "loss": 1.0677, "margin_dpo/margin_mean": 9.594108581542969, "margin_dpo/margin_std": 15.307428359985352, "step": 452 }, { "epoch": 0.6848072562358276, "fcm_dpo/beta": 0.06412823498249054, "fcm_dpo/delta": 0.07911929488182068, "fcm_dpo/margin": 8.15820026397705, "fcm_dpo/q_t": 0.39394861459732056, "grad_norm": 17.296875, "learning_rate": 1.3780020494988445e-07, "logits/chosen": 1.3142707347869873, "logits/rejected": 1.205606460571289, "logps/chosen": -84.48233032226562, "logps/ref_chosen": -79.9207763671875, "logps/ref_rejected": -90.20779418945312, "logps/rejected": -102.92755126953125, "loss": 1.149, "margin_dpo/margin_mean": 8.15820026397705, "margin_dpo/margin_std": 15.361320495605469, "step": 453 }, { "epoch": 0.6863189720332578, "fcm_dpo/beta": 0.06284962594509125, "fcm_dpo/delta": -0.10391123592853546, "fcm_dpo/margin": 11.083335876464844, "fcm_dpo/q_t": 0.3570789694786072, "grad_norm": 11.321577072143555, "learning_rate": 1.366202015206706e-07, "logits/chosen": 1.2680672407150269, "logits/rejected": 1.1710636615753174, "logps/chosen": -70.97998046875, "logps/ref_chosen": -69.71887969970703, "logps/ref_rejected": -82.86952209472656, "logps/rejected": -95.21395874023438, "loss": 1.0145, "margin_dpo/margin_mean": 11.083334922790527, "margin_dpo/margin_std": 15.592472076416016, "step": 454 }, { "epoch": 0.6878306878306878, "fcm_dpo/beta": 0.061194583773612976, "fcm_dpo/delta": -0.140131413936615, "fcm_dpo/margin": 11.924175262451172, "fcm_dpo/q_t": 0.3493908643722534, "grad_norm": 11.763124465942383, "learning_rate": 1.354433695681474e-07, "logits/chosen": 1.165942668914795, "logits/rejected": 1.083987832069397, "logps/chosen": -93.37379455566406, "logps/ref_chosen": -89.51481628417969, "logps/ref_rejected": -97.93235778808594, "logps/rejected": -113.71551513671875, "loss": 0.9404, "margin_dpo/margin_mean": 11.924175262451172, "margin_dpo/margin_std": 14.309093475341797, "step": 455 }, { "epoch": 0.6893424036281179, "fcm_dpo/beta": 0.06031159684062004, "fcm_dpo/delta": -0.015023987740278244, "fcm_dpo/margin": 10.167566299438477, "fcm_dpo/q_t": 0.3712005317211151, "grad_norm": 11.464375495910645, "learning_rate": 1.3426974201083439e-07, "logits/chosen": 1.5450102090835571, "logits/rejected": 1.3907766342163086, "logps/chosen": -78.01705932617188, "logps/ref_chosen": -74.60527038574219, "logps/ref_rejected": -97.98377227783203, "logps/rejected": -111.56312561035156, "loss": 1.0104, "margin_dpo/margin_mean": 10.167566299438477, "margin_dpo/margin_std": 13.522197723388672, "step": 456 }, { "epoch": 0.690854119425548, "fcm_dpo/beta": 0.06131181865930557, "fcm_dpo/delta": 0.11020015925168991, "fcm_dpo/margin": 8.077266693115234, "fcm_dpo/q_t": 0.39555391669273376, "grad_norm": 14.085563659667969, "learning_rate": 1.3309935167761717e-07, "logits/chosen": 1.6795952320098877, "logits/rejected": 1.471513032913208, "logps/chosen": -69.77462005615234, "logps/ref_chosen": -63.927032470703125, "logps/ref_rejected": -83.15243530273438, "logps/rejected": -97.0772933959961, "loss": 1.0796, "margin_dpo/margin_mean": 8.07726764678955, "margin_dpo/margin_std": 12.317647933959961, "step": 457 }, { "epoch": 0.6923658352229781, "fcm_dpo/beta": 0.061296649277210236, "fcm_dpo/delta": -0.0939876139163971, "fcm_dpo/margin": 11.217958450317383, "fcm_dpo/q_t": 0.3544745445251465, "grad_norm": 13.609352111816406, "learning_rate": 1.3193223130682936e-07, "logits/chosen": 1.7661914825439453, "logits/rejected": 1.458505392074585, "logps/chosen": -69.31581115722656, "logps/ref_chosen": -67.68869018554688, "logps/ref_rejected": -104.40899658203125, "logps/rejected": -117.25407409667969, "loss": 0.9614, "margin_dpo/margin_mean": 11.217958450317383, "margin_dpo/margin_std": 13.57296371459961, "step": 458 }, { "epoch": 0.6938775510204082, "fcm_dpo/beta": 0.06135866418480873, "fcm_dpo/delta": -0.07701873779296875, "fcm_dpo/margin": 10.853067398071289, "fcm_dpo/q_t": 0.36258336901664734, "grad_norm": 12.754447937011719, "learning_rate": 1.3076841354533658e-07, "logits/chosen": 1.7871050834655762, "logits/rejected": 1.6686228513717651, "logps/chosen": -85.60640716552734, "logps/ref_chosen": -83.82363891601562, "logps/ref_rejected": -103.75938415527344, "logps/rejected": -116.39521789550781, "loss": 1.0063, "margin_dpo/margin_mean": 10.853067398071289, "margin_dpo/margin_std": 13.418067932128906, "step": 459 }, { "epoch": 0.6953892668178382, "fcm_dpo/beta": 0.05903376638889313, "fcm_dpo/delta": -0.0766952782869339, "fcm_dpo/margin": 11.3674955368042, "fcm_dpo/q_t": 0.3633783757686615, "grad_norm": 12.821139335632324, "learning_rate": 1.2960793094762345e-07, "logits/chosen": 1.5267561674118042, "logits/rejected": 1.1998920440673828, "logps/chosen": -84.30833435058594, "logps/ref_chosen": -79.4836654663086, "logps/ref_rejected": -112.31745910644531, "logps/rejected": -128.50962829589844, "loss": 0.9777, "margin_dpo/margin_mean": 11.367496490478516, "margin_dpo/margin_std": 14.649511337280273, "step": 460 }, { "epoch": 0.6969009826152683, "fcm_dpo/beta": 0.05793311446905136, "fcm_dpo/delta": -0.05363578349351883, "fcm_dpo/margin": 11.177489280700684, "fcm_dpo/q_t": 0.36283209919929504, "grad_norm": 11.308926582336426, "learning_rate": 1.2845081597488286e-07, "logits/chosen": 1.801574468612671, "logits/rejected": 1.589023232460022, "logps/chosen": -65.58940124511719, "logps/ref_chosen": -64.28482055664062, "logps/ref_rejected": -93.73818969726562, "logps/rejected": -106.22026062011719, "loss": 0.978, "margin_dpo/margin_mean": 11.177490234375, "margin_dpo/margin_std": 13.440290451049805, "step": 461 }, { "epoch": 0.6984126984126984, "fcm_dpo/beta": 0.05706631764769554, "fcm_dpo/delta": -0.12463247776031494, "fcm_dpo/margin": 12.52739143371582, "fcm_dpo/q_t": 0.3522951006889343, "grad_norm": 13.344515800476074, "learning_rate": 1.27297100994108e-07, "logits/chosen": 1.6235347986221313, "logits/rejected": 1.4617130756378174, "logps/chosen": -80.09829711914062, "logps/ref_chosen": -77.15335083007812, "logps/ref_rejected": -91.12923431396484, "logps/rejected": -106.60157012939453, "loss": 0.9886, "margin_dpo/margin_mean": 12.52739143371582, "margin_dpo/margin_std": 16.76566505432129, "step": 462 }, { "epoch": 0.6999244142101285, "fcm_dpo/beta": 0.05774568021297455, "fcm_dpo/delta": 0.1331457495689392, "fcm_dpo/margin": 8.195122718811035, "fcm_dpo/q_t": 0.4045426547527313, "grad_norm": 15.691317558288574, "learning_rate": 1.2614681827718695e-07, "logits/chosen": 1.3940520286560059, "logits/rejected": 1.3858566284179688, "logps/chosen": -91.70310974121094, "logps/ref_chosen": -87.58760070800781, "logps/ref_rejected": -87.97022247314453, "logps/rejected": -100.28085327148438, "loss": 1.1291, "margin_dpo/margin_mean": 8.195122718811035, "margin_dpo/margin_std": 14.629854202270508, "step": 463 }, { "epoch": 0.7014361300075586, "fcm_dpo/beta": 0.05825965106487274, "fcm_dpo/delta": -0.06146547943353653, "fcm_dpo/margin": 11.280075073242188, "fcm_dpo/q_t": 0.37248778343200684, "grad_norm": 12.037423133850098, "learning_rate": 1.2500000000000005e-07, "logits/chosen": 1.252054214477539, "logits/rejected": 1.1450973749160767, "logps/chosen": -80.35469818115234, "logps/ref_chosen": -75.83175659179688, "logps/ref_rejected": -84.4811019897461, "logps/rejected": -100.28411865234375, "loss": 1.0885, "margin_dpo/margin_mean": 11.280075073242188, "margin_dpo/margin_std": 18.626415252685547, "step": 464 }, { "epoch": 0.7029478458049887, "fcm_dpo/beta": 0.057572394609451294, "fcm_dpo/delta": -0.009513016790151596, "fcm_dpo/margin": 10.575401306152344, "fcm_dpo/q_t": 0.3781369924545288, "grad_norm": 14.940975189208984, "learning_rate": 1.238566782415197e-07, "logits/chosen": 1.646533727645874, "logits/rejected": 1.4950196743011475, "logps/chosen": -82.08723449707031, "logps/ref_chosen": -77.057861328125, "logps/ref_rejected": -102.75727844238281, "logps/rejected": -118.36204528808594, "loss": 1.0649, "margin_dpo/margin_mean": 10.575400352478027, "margin_dpo/margin_std": 16.719135284423828, "step": 465 }, { "epoch": 0.7044595616024187, "fcm_dpo/beta": 0.059257976710796356, "fcm_dpo/delta": 0.2110356092453003, "fcm_dpo/margin": 6.714020729064941, "fcm_dpo/q_t": 0.41916608810424805, "grad_norm": 20.281587600708008, "learning_rate": 1.2271688498291334e-07, "logits/chosen": 1.3608951568603516, "logits/rejected": 1.3925187587738037, "logps/chosen": -97.87811279296875, "logps/ref_chosen": -91.7751693725586, "logps/ref_rejected": -90.2679443359375, "logps/rejected": -103.08491516113281, "loss": 1.1829, "margin_dpo/margin_mean": 6.714020252227783, "margin_dpo/margin_std": 14.033638000488281, "step": 466 }, { "epoch": 0.7059712773998488, "fcm_dpo/beta": 0.05979441851377487, "fcm_dpo/delta": -0.03212842717766762, "fcm_dpo/margin": 10.537097930908203, "fcm_dpo/q_t": 0.370755672454834, "grad_norm": 12.759522438049316, "learning_rate": 1.2158065210664848e-07, "logits/chosen": 1.4100677967071533, "logits/rejected": 1.0926790237426758, "logps/chosen": -67.85696411132812, "logps/ref_chosen": -64.77557373046875, "logps/ref_rejected": -102.58863830566406, "logps/rejected": -116.20712280273438, "loss": 1.0002, "margin_dpo/margin_mean": 10.53709602355957, "margin_dpo/margin_std": 14.119759559631348, "step": 467 }, { "epoch": 0.7074829931972789, "fcm_dpo/beta": 0.05894845724105835, "fcm_dpo/delta": -0.12605436146259308, "fcm_dpo/margin": 12.165056228637695, "fcm_dpo/q_t": 0.35496601462364197, "grad_norm": 12.62433910369873, "learning_rate": 1.204480113956011e-07, "logits/chosen": 1.520838737487793, "logits/rejected": 1.5226354598999023, "logps/chosen": -86.23693084716797, "logps/ref_chosen": -82.22445678710938, "logps/ref_rejected": -92.99041748046875, "logps/rejected": -109.1679458618164, "loss": 0.9979, "margin_dpo/margin_mean": 12.165056228637695, "margin_dpo/margin_std": 16.692893981933594, "step": 468 }, { "epoch": 0.708994708994709, "fcm_dpo/beta": 0.05790429934859276, "fcm_dpo/delta": 0.029473505914211273, "fcm_dpo/margin": 9.849138259887695, "fcm_dpo/q_t": 0.38115769624710083, "grad_norm": 12.907513618469238, "learning_rate": 1.1931899453216697e-07, "logits/chosen": 1.6713244915008545, "logits/rejected": 1.6393948793411255, "logps/chosen": -79.8751449584961, "logps/ref_chosen": -75.93031311035156, "logps/ref_rejected": -92.26559448242188, "logps/rejected": -106.0595703125, "loss": 1.0303, "margin_dpo/margin_mean": 9.849139213562012, "margin_dpo/margin_std": 13.105131149291992, "step": 469 }, { "epoch": 0.7105064247921391, "fcm_dpo/beta": 0.05865296721458435, "fcm_dpo/delta": 0.004474967252463102, "fcm_dpo/margin": 10.157899856567383, "fcm_dpo/q_t": 0.3750629723072052, "grad_norm": 10.989476203918457, "learning_rate": 1.1819363309737438e-07, "logits/chosen": 1.3479657173156738, "logits/rejected": 1.1786839962005615, "logps/chosen": -70.01264953613281, "logps/ref_chosen": -65.86345672607422, "logps/ref_rejected": -85.89832305908203, "logps/rejected": -100.2054214477539, "loss": 1.0466, "margin_dpo/margin_mean": 10.157899856567383, "margin_dpo/margin_std": 15.077445983886719, "step": 470 }, { "epoch": 0.7120181405895691, "fcm_dpo/beta": 0.057473134249448776, "fcm_dpo/delta": -0.15603893995285034, "fcm_dpo/margin": 12.95675277709961, "fcm_dpo/q_t": 0.34615379571914673, "grad_norm": 12.145216941833496, "learning_rate": 1.1707195857000215e-07, "logits/chosen": 1.4731967449188232, "logits/rejected": 1.3386666774749756, "logps/chosen": -76.57672119140625, "logps/ref_chosen": -74.3460922241211, "logps/ref_rejected": -93.43672943115234, "logps/rejected": -108.62411499023438, "loss": 0.9717, "margin_dpo/margin_mean": 12.95675277709961, "margin_dpo/margin_std": 16.800918579101562, "step": 471 }, { "epoch": 0.7135298563869993, "fcm_dpo/beta": 0.056866731494665146, "fcm_dpo/delta": -0.03586677089333534, "fcm_dpo/margin": 11.134857177734375, "fcm_dpo/q_t": 0.3769086003303528, "grad_norm": 11.125615119934082, "learning_rate": 1.1595400232569768e-07, "logits/chosen": 1.6612334251403809, "logits/rejected": 1.5427844524383545, "logps/chosen": -76.58341217041016, "logps/ref_chosen": -74.75674438476562, "logps/ref_rejected": -95.18183135986328, "logps/rejected": -108.14335632324219, "loss": 1.0803, "margin_dpo/margin_mean": 11.134857177734375, "margin_dpo/margin_std": 18.514007568359375, "step": 472 }, { "epoch": 0.7150415721844293, "fcm_dpo/beta": 0.056203171610832214, "fcm_dpo/delta": -0.01779717206954956, "fcm_dpo/margin": 10.971089363098145, "fcm_dpo/q_t": 0.37407466769218445, "grad_norm": 11.89809799194336, "learning_rate": 1.1483979563610069e-07, "logits/chosen": 1.95254647731781, "logits/rejected": 1.671574592590332, "logps/chosen": -71.95792388916016, "logps/ref_chosen": -71.65933227539062, "logps/ref_rejected": -109.99200439453125, "logps/rejected": -121.26168060302734, "loss": 1.0624, "margin_dpo/margin_mean": 10.971089363098145, "margin_dpo/margin_std": 17.36050796508789, "step": 473 }, { "epoch": 0.7165532879818595, "fcm_dpo/beta": 0.05685240030288696, "fcm_dpo/delta": 0.0948951244354248, "fcm_dpo/margin": 8.970964431762695, "fcm_dpo/q_t": 0.39646458625793457, "grad_norm": 14.758679389953613, "learning_rate": 1.1372936966796709e-07, "logits/chosen": 1.6065518856048584, "logits/rejected": 1.4003615379333496, "logps/chosen": -70.70443725585938, "logps/ref_chosen": -65.91990661621094, "logps/ref_rejected": -89.09432983398438, "logps/rejected": -102.84982299804688, "loss": 1.1318, "margin_dpo/margin_mean": 8.970963478088379, "margin_dpo/margin_std": 16.53539276123047, "step": 474 }, { "epoch": 0.7180650037792895, "fcm_dpo/beta": 0.055412329733371735, "fcm_dpo/delta": -0.21177589893341064, "fcm_dpo/margin": 14.334150314331055, "fcm_dpo/q_t": 0.33283495903015137, "grad_norm": 11.61395263671875, "learning_rate": 1.126227554822985e-07, "logits/chosen": 1.3431096076965332, "logits/rejected": 1.2758687734603882, "logps/chosen": -82.76185607910156, "logps/ref_chosen": -79.02459716796875, "logps/ref_rejected": -107.33058166503906, "logps/rejected": -125.4019775390625, "loss": 0.8963, "margin_dpo/margin_mean": 14.334149360656738, "margin_dpo/margin_std": 15.547351837158203, "step": 475 }, { "epoch": 0.7195767195767195, "fcm_dpo/beta": 0.05529044568538666, "fcm_dpo/delta": 0.057061512023210526, "fcm_dpo/margin": 9.876468658447266, "fcm_dpo/q_t": 0.3873744606971741, "grad_norm": 11.611725807189941, "learning_rate": 1.1151998403347243e-07, "logits/chosen": 1.4797489643096924, "logits/rejected": 1.4424426555633545, "logps/chosen": -99.78776550292969, "logps/ref_chosen": -93.72602844238281, "logps/ref_rejected": -94.390625, "logps/rejected": -110.32882690429688, "loss": 1.0784, "margin_dpo/margin_mean": 9.876466751098633, "margin_dpo/margin_std": 15.821935653686523, "step": 476 }, { "epoch": 0.7210884353741497, "fcm_dpo/beta": 0.056804411113262177, "fcm_dpo/delta": 0.1499871164560318, "fcm_dpo/margin": 8.044179916381836, "fcm_dpo/q_t": 0.4083792567253113, "grad_norm": 18.420042037963867, "learning_rate": 1.1042108616837692e-07, "logits/chosen": 1.642760992050171, "logits/rejected": 1.558333158493042, "logps/chosen": -85.10441589355469, "logps/ref_chosen": -76.51399993896484, "logps/ref_rejected": -99.14356231689453, "logps/rejected": -115.77816009521484, "loss": 1.2094, "margin_dpo/margin_mean": 8.044179916381836, "margin_dpo/margin_std": 17.958162307739258, "step": 477 }, { "epoch": 0.7226001511715797, "fcm_dpo/beta": 0.057875052094459534, "fcm_dpo/delta": 0.09590927511453629, "fcm_dpo/margin": 8.795384407043457, "fcm_dpo/q_t": 0.40439170598983765, "grad_norm": 16.46158218383789, "learning_rate": 1.0932609262554746e-07, "logits/chosen": 1.2920639514923096, "logits/rejected": 1.2550523281097412, "logps/chosen": -82.43702697753906, "logps/ref_chosen": -77.95186614990234, "logps/ref_rejected": -69.77754211425781, "logps/rejected": -83.05809020996094, "loss": 1.2198, "margin_dpo/margin_mean": 8.795384407043457, "margin_dpo/margin_std": 19.819679260253906, "step": 478 }, { "epoch": 0.7241118669690099, "fcm_dpo/beta": 0.059072867035865784, "fcm_dpo/delta": 0.09962013363838196, "fcm_dpo/margin": 8.55759048461914, "fcm_dpo/q_t": 0.396778404712677, "grad_norm": 17.041179656982422, "learning_rate": 1.0823503403430734e-07, "logits/chosen": 1.332829475402832, "logits/rejected": 1.2122066020965576, "logps/chosen": -80.6783447265625, "logps/ref_chosen": -76.56551361083984, "logps/ref_rejected": -84.33758544921875, "logps/rejected": -97.00800323486328, "loss": 1.1704, "margin_dpo/margin_mean": 8.55759048461914, "margin_dpo/margin_std": 17.415115356445312, "step": 479 }, { "epoch": 0.7256235827664399, "fcm_dpo/beta": 0.05932193621993065, "fcm_dpo/delta": -0.08332247287034988, "fcm_dpo/margin": 11.412254333496094, "fcm_dpo/q_t": 0.3587559759616852, "grad_norm": 20.78253936767578, "learning_rate": 1.0714794091391072e-07, "logits/chosen": 1.3048365116119385, "logits/rejected": 1.2908906936645508, "logps/chosen": -82.61824798583984, "logps/ref_chosen": -80.15884399414062, "logps/ref_rejected": -84.88697814941406, "logps/rejected": -98.75863647460938, "loss": 1.0667, "margin_dpo/margin_mean": 11.412253379821777, "margin_dpo/margin_std": 17.866085052490234, "step": 480 }, { "epoch": 0.72713529856387, "fcm_dpo/beta": 0.0585302859544754, "fcm_dpo/delta": -0.008791010826826096, "fcm_dpo/margin": 10.392139434814453, "fcm_dpo/q_t": 0.38119131326675415, "grad_norm": 12.047948837280273, "learning_rate": 1.0606484367268906e-07, "logits/chosen": 1.1564797163009644, "logits/rejected": 1.1351284980773926, "logps/chosen": -88.25604248046875, "logps/ref_chosen": -84.56254577636719, "logps/ref_rejected": -90.06451416015625, "logps/rejected": -104.150146484375, "loss": 1.0787, "margin_dpo/margin_mean": 10.392141342163086, "margin_dpo/margin_std": 17.54533576965332, "step": 481 }, { "epoch": 0.7286470143613001, "fcm_dpo/beta": 0.058919116854667664, "fcm_dpo/delta": 0.0236910879611969, "fcm_dpo/margin": 9.797449111938477, "fcm_dpo/q_t": 0.390002578496933, "grad_norm": 17.564355850219727, "learning_rate": 1.0498577260720048e-07, "logits/chosen": 1.3875517845153809, "logits/rejected": 1.0670008659362793, "logps/chosen": -85.58686828613281, "logps/ref_chosen": -78.88141632080078, "logps/ref_rejected": -125.41990661621094, "logps/rejected": -141.9228057861328, "loss": 1.1476, "margin_dpo/margin_mean": 9.79744815826416, "margin_dpo/margin_std": 18.76854705810547, "step": 482 }, { "epoch": 0.7301587301587301, "fcm_dpo/beta": 0.05753147974610329, "fcm_dpo/delta": -0.13096870481967926, "fcm_dpo/margin": 12.537253379821777, "fcm_dpo/q_t": 0.3545553684234619, "grad_norm": 13.328836441040039, "learning_rate": 1.0391075790138232e-07, "logits/chosen": 1.4349303245544434, "logits/rejected": 1.1912516355514526, "logps/chosen": -76.0250244140625, "logps/ref_chosen": -72.690185546875, "logps/ref_rejected": -98.37237548828125, "logps/rejected": -114.24446868896484, "loss": 1.0076, "margin_dpo/margin_mean": 12.537254333496094, "margin_dpo/margin_std": 17.870529174804688, "step": 483 }, { "epoch": 0.7316704459561603, "fcm_dpo/beta": 0.05832766741514206, "fcm_dpo/delta": 0.09759794175624847, "fcm_dpo/margin": 8.684357643127441, "fcm_dpo/q_t": 0.3952082395553589, "grad_norm": 12.609947204589844, "learning_rate": 1.0283982962570681e-07, "logits/chosen": 1.8161753416061401, "logits/rejected": 1.7682876586914062, "logps/chosen": -77.07719421386719, "logps/ref_chosen": -73.98435974121094, "logps/ref_rejected": -89.99178314208984, "logps/rejected": -101.76897430419922, "loss": 1.0971, "margin_dpo/margin_mean": 8.684356689453125, "margin_dpo/margin_std": 14.368082046508789, "step": 484 }, { "epoch": 0.7331821617535903, "fcm_dpo/beta": 0.05779796093702316, "fcm_dpo/delta": 0.01702851802110672, "fcm_dpo/margin": 10.05638599395752, "fcm_dpo/q_t": 0.385127454996109, "grad_norm": 13.867687225341797, "learning_rate": 1.0177301773633992e-07, "logits/chosen": 1.931138277053833, "logits/rejected": 1.8297758102416992, "logps/chosen": -81.68190002441406, "logps/ref_chosen": -78.0927963256836, "logps/ref_rejected": -89.14010620117188, "logps/rejected": -102.78559875488281, "loss": 1.0807, "margin_dpo/margin_mean": 10.056386947631836, "margin_dpo/margin_std": 16.07583999633789, "step": 485 }, { "epoch": 0.7346938775510204, "fcm_dpo/beta": 0.05844534933567047, "fcm_dpo/delta": -0.05448358133435249, "fcm_dpo/margin": 11.130483627319336, "fcm_dpo/q_t": 0.3756754696369171, "grad_norm": 11.819550514221191, "learning_rate": 1.007103520743035e-07, "logits/chosen": 1.565119743347168, "logits/rejected": 1.296295404434204, "logps/chosen": -80.0963363647461, "logps/ref_chosen": -73.74685668945312, "logps/ref_rejected": -107.752685546875, "logps/rejected": -125.23265838623047, "loss": 1.0994, "margin_dpo/margin_mean": 11.130483627319336, "margin_dpo/margin_std": 19.282230377197266, "step": 486 }, { "epoch": 0.7362055933484505, "fcm_dpo/beta": 0.0577419213950634, "fcm_dpo/delta": -0.019458172842860222, "fcm_dpo/margin": 10.707120895385742, "fcm_dpo/q_t": 0.37412503361701965, "grad_norm": 12.673027992248535, "learning_rate": 9.965186236464046e-08, "logits/chosen": 1.7286944389343262, "logits/rejected": 1.5361676216125488, "logps/chosen": -85.74382019042969, "logps/ref_chosen": -79.57780456542969, "logps/ref_rejected": -102.2916259765625, "logps/rejected": -119.16477966308594, "loss": 1.0409, "margin_dpo/margin_mean": 10.707120895385742, "margin_dpo/margin_std": 16.0146541595459, "step": 487 }, { "epoch": 0.7377173091458806, "fcm_dpo/beta": 0.05774332210421562, "fcm_dpo/delta": -0.0757601261138916, "fcm_dpo/margin": 11.58226203918457, "fcm_dpo/q_t": 0.37853580713272095, "grad_norm": 19.57643699645996, "learning_rate": 9.859757821558337e-08, "logits/chosen": 1.3294143676757812, "logits/rejected": 1.1671702861785889, "logps/chosen": -84.84293365478516, "logps/ref_chosen": -80.62767791748047, "logps/ref_rejected": -100.4541015625, "logps/rejected": -116.25162506103516, "loss": 1.0968, "margin_dpo/margin_mean": 11.582262992858887, "margin_dpo/margin_std": 19.66269302368164, "step": 488 }, { "epoch": 0.7392290249433107, "fcm_dpo/beta": 0.05942504480481148, "fcm_dpo/delta": 0.30510973930358887, "fcm_dpo/margin": 5.123412132263184, "fcm_dpo/q_t": 0.44007402658462524, "grad_norm": 15.113093376159668, "learning_rate": 9.754752911772615e-08, "logits/chosen": 1.3929004669189453, "logits/rejected": 1.290812373161316, "logps/chosen": -91.3660888671875, "logps/ref_chosen": -85.39521026611328, "logps/ref_rejected": -101.97309875488281, "logps/rejected": -113.06739044189453, "loss": 1.2833, "margin_dpo/margin_mean": 5.1234130859375, "margin_dpo/margin_std": 15.02302360534668, "step": 489 }, { "epoch": 0.7407407407407407, "fcm_dpo/beta": 0.05948188900947571, "fcm_dpo/delta": -0.07982932776212692, "fcm_dpo/margin": 11.330092430114746, "fcm_dpo/q_t": 0.374492347240448, "grad_norm": 16.68990135192871, "learning_rate": 9.650174444319956e-08, "logits/chosen": 1.7335550785064697, "logits/rejected": 1.6902096271514893, "logps/chosen": -82.07659912109375, "logps/ref_chosen": -77.75590515136719, "logps/ref_rejected": -88.98885345458984, "logps/rejected": -104.6396484375, "loss": 1.1096, "margin_dpo/margin_mean": 11.330092430114746, "margin_dpo/margin_std": 19.99999237060547, "step": 490 }, { "epoch": 0.7422524565381708, "fcm_dpo/beta": 0.05829251557588577, "fcm_dpo/delta": -0.06507232040166855, "fcm_dpo/margin": 11.291997909545898, "fcm_dpo/q_t": 0.3683939576148987, "grad_norm": 11.451486587524414, "learning_rate": 9.546025344484868e-08, "logits/chosen": 1.2975845336914062, "logits/rejected": 1.1464436054229736, "logps/chosen": -78.01144409179688, "logps/ref_chosen": -74.33360290527344, "logps/ref_rejected": -91.4105224609375, "logps/rejected": -106.38035583496094, "loss": 1.007, "margin_dpo/margin_mean": 11.291997909545898, "margin_dpo/margin_std": 15.383825302124023, "step": 491 }, { "epoch": 0.7437641723356009, "fcm_dpo/beta": 0.058672577142715454, "fcm_dpo/delta": -0.0013382361503317952, "fcm_dpo/margin": 7.504390716552734, "fcm_dpo/q_t": 0.4148620367050171, "grad_norm": 16.56096076965332, "learning_rate": 9.442308525541589e-08, "logits/chosen": 1.4910600185394287, "logits/rejected": 1.2763690948486328, "logps/chosen": -94.51632690429688, "logps/ref_chosen": -85.14178466796875, "logps/ref_rejected": -103.44204711914062, "logps/rejected": -120.32098388671875, "loss": 1.2105, "margin_dpo/margin_mean": 7.504390716552734, "margin_dpo/margin_std": 16.77596664428711, "step": 492 }, { "epoch": 0.745275888133031, "fcm_dpo/beta": 0.05836326628923416, "fcm_dpo/delta": -0.06718596071004868, "fcm_dpo/margin": 11.352909088134766, "fcm_dpo/q_t": 0.36989927291870117, "grad_norm": 11.82755184173584, "learning_rate": 9.339026888672468e-08, "logits/chosen": 1.6452562808990479, "logits/rejected": 1.4447059631347656, "logps/chosen": -79.38182067871094, "logps/ref_chosen": -75.81439208984375, "logps/ref_rejected": -95.30766296386719, "logps/rejected": -110.22799682617188, "loss": 1.0806, "margin_dpo/margin_mean": 11.352909088134766, "margin_dpo/margin_std": 18.79556655883789, "step": 493 }, { "epoch": 0.7467876039304611, "fcm_dpo/beta": 0.058134227991104126, "fcm_dpo/delta": 0.04909532144665718, "fcm_dpo/margin": 9.522611618041992, "fcm_dpo/q_t": 0.3936055302619934, "grad_norm": 15.155073165893555, "learning_rate": 9.236183322886945e-08, "logits/chosen": 1.1880576610565186, "logits/rejected": 1.1128486394882202, "logps/chosen": -98.40640258789062, "logps/ref_chosen": -93.83562469482422, "logps/ref_rejected": -112.21142578125, "logps/rejected": -126.30481719970703, "loss": 1.1519, "margin_dpo/margin_mean": 9.522610664367676, "margin_dpo/margin_std": 18.529830932617188, "step": 494 }, { "epoch": 0.7482993197278912, "fcm_dpo/beta": 0.059422802180051804, "fcm_dpo/delta": 0.07689768075942993, "fcm_dpo/margin": 8.856720924377441, "fcm_dpo/q_t": 0.3950550854206085, "grad_norm": 18.023229598999023, "learning_rate": 9.133780704940594e-08, "logits/chosen": 1.6511743068695068, "logits/rejected": 1.4738554954528809, "logps/chosen": -73.00120544433594, "logps/ref_chosen": -68.52467346191406, "logps/ref_rejected": -89.65379333496094, "logps/rejected": -102.98704528808594, "loss": 1.17, "margin_dpo/margin_mean": 8.856720924377441, "margin_dpo/margin_std": 17.582279205322266, "step": 495 }, { "epoch": 0.7498110355253212, "fcm_dpo/beta": 0.05871668457984924, "fcm_dpo/delta": -0.040396757423877716, "fcm_dpo/margin": 10.85169506072998, "fcm_dpo/q_t": 0.37551993131637573, "grad_norm": 12.240704536437988, "learning_rate": 9.031821899254797e-08, "logits/chosen": 1.7110998630523682, "logits/rejected": 1.4665889739990234, "logps/chosen": -78.06710815429688, "logps/ref_chosen": -73.13618469238281, "logps/ref_rejected": -111.50930786132812, "logps/rejected": -127.29193115234375, "loss": 1.0757, "margin_dpo/margin_mean": 10.851696014404297, "margin_dpo/margin_std": 17.6138973236084, "step": 496 }, { "epoch": 0.7513227513227513, "fcm_dpo/beta": 0.05747950077056885, "fcm_dpo/delta": -0.11834258586168289, "fcm_dpo/margin": 12.324544906616211, "fcm_dpo/q_t": 0.3581116795539856, "grad_norm": 14.844618797302246, "learning_rate": 8.930309757836516e-08, "logits/chosen": 1.3825771808624268, "logits/rejected": 1.2625739574432373, "logps/chosen": -95.60760498046875, "logps/ref_chosen": -88.71475219726562, "logps/ref_rejected": -105.74935913085938, "logps/rejected": -124.96675109863281, "loss": 0.9953, "margin_dpo/margin_mean": 12.324544906616211, "margin_dpo/margin_std": 16.996997833251953, "step": 497 }, { "epoch": 0.7528344671201814, "fcm_dpo/beta": 0.05717446655035019, "fcm_dpo/delta": -0.0596219003200531, "fcm_dpo/margin": 11.468419075012207, "fcm_dpo/q_t": 0.3738415241241455, "grad_norm": 13.000709533691406, "learning_rate": 8.829247120198563e-08, "logits/chosen": 1.6439807415008545, "logits/rejected": 1.5671942234039307, "logps/chosen": -86.48239135742188, "logps/ref_chosen": -83.3353271484375, "logps/ref_rejected": -89.34941864013672, "logps/rejected": -103.96488952636719, "loss": 1.0519, "margin_dpo/margin_mean": 11.468419075012207, "margin_dpo/margin_std": 18.170747756958008, "step": 498 }, { "epoch": 0.7543461829176115, "fcm_dpo/beta": 0.056997328996658325, "fcm_dpo/delta": 0.01875562034547329, "fcm_dpo/margin": 10.216790199279785, "fcm_dpo/q_t": 0.38748350739479065, "grad_norm": 14.796299934387207, "learning_rate": 8.728636813280163e-08, "logits/chosen": 1.5643539428710938, "logits/rejected": 1.375131368637085, "logps/chosen": -85.56379699707031, "logps/ref_chosen": -79.373779296875, "logps/ref_rejected": -104.62533569335938, "logps/rejected": -121.03214263916016, "loss": 1.1548, "margin_dpo/margin_mean": 10.216791152954102, "margin_dpo/margin_std": 19.614974975585938, "step": 499 }, { "epoch": 0.7558578987150416, "fcm_dpo/beta": 0.05729161575436592, "fcm_dpo/delta": -0.0038493499159812927, "fcm_dpo/margin": 10.52761459350586, "fcm_dpo/q_t": 0.37920159101486206, "grad_norm": 14.683732032775879, "learning_rate": 8.628481651367875e-08, "logits/chosen": 1.3626763820648193, "logits/rejected": 1.326643466949463, "logps/chosen": -91.39049530029297, "logps/ref_chosen": -85.953857421875, "logps/ref_rejected": -90.40995788574219, "logps/rejected": -106.37420654296875, "loss": 1.0608, "margin_dpo/margin_mean": 10.52761459350586, "margin_dpo/margin_std": 16.482694625854492, "step": 500 }, { "epoch": 0.7558578987150416, "eval_fcm_dpo/beta": 0.056991901248693466, "eval_logits/chosen": 1.29319167137146, "eval_logits/rejected": 1.1717369556427002, "eval_logps/chosen": -90.84538269042969, "eval_logps/ref_chosen": -86.90177917480469, "eval_logps/ref_rejected": -96.69639587402344, "eval_logps/rejected": -111.40538787841797, "eval_loss": 0.5388143062591553, "eval_margin_dpo/margin_mean": 10.765402793884277, "eval_margin_dpo/margin_std": 17.406885147094727, "eval_runtime": 42.3061, "eval_samples_per_second": 54.437, "eval_steps_per_second": 1.702, "step": 500 }, { "epoch": 0.7573696145124716, "fcm_dpo/beta": 0.05800885707139969, "fcm_dpo/delta": 0.07376056164503098, "fcm_dpo/margin": 9.115702629089355, "fcm_dpo/q_t": 0.394004762172699, "grad_norm": 11.813311576843262, "learning_rate": 8.528784436016878e-08, "logits/chosen": 1.156029224395752, "logits/rejected": 1.1506352424621582, "logps/chosen": -85.94219970703125, "logps/ref_chosen": -81.22268676757812, "logps/ref_rejected": -86.97892761230469, "logps/rejected": -100.81414794921875, "loss": 1.0727, "margin_dpo/margin_mean": 9.115702629089355, "margin_dpo/margin_std": 14.19137954711914, "step": 501 }, { "epoch": 0.7588813303099018, "fcm_dpo/beta": 0.05887910723686218, "fcm_dpo/delta": 0.12389560043811798, "fcm_dpo/margin": 8.188718795776367, "fcm_dpo/q_t": 0.4010313153266907, "grad_norm": 13.988794326782227, "learning_rate": 8.4295479559726e-08, "logits/chosen": 1.4468226432800293, "logits/rejected": 1.3555638790130615, "logps/chosen": -87.16419219970703, "logps/ref_chosen": -83.1567611694336, "logps/ref_rejected": -106.74440002441406, "logps/rejected": -118.9405517578125, "loss": 1.1022, "margin_dpo/margin_mean": 8.188718795776367, "margin_dpo/margin_std": 13.904130935668945, "step": 502 }, { "epoch": 0.7603930461073318, "fcm_dpo/beta": 0.05860746279358864, "fcm_dpo/delta": -0.07429009675979614, "fcm_dpo/margin": 11.419576644897461, "fcm_dpo/q_t": 0.36525100469589233, "grad_norm": 12.156607627868652, "learning_rate": 8.330774987092712e-08, "logits/chosen": 1.5399150848388672, "logits/rejected": 1.5529388189315796, "logps/chosen": -70.98908996582031, "logps/ref_chosen": -68.51583862304688, "logps/ref_rejected": -75.02178955078125, "logps/rejected": -88.91461944580078, "loss": 1.0363, "margin_dpo/margin_mean": 11.419578552246094, "margin_dpo/margin_std": 17.146236419677734, "step": 503 }, { "epoch": 0.7619047619047619, "fcm_dpo/beta": 0.05688774213194847, "fcm_dpo/delta": -0.23274773359298706, "fcm_dpo/margin": 14.314230918884277, "fcm_dpo/q_t": 0.33439508080482483, "grad_norm": 12.193303108215332, "learning_rate": 8.232468292269479e-08, "logits/chosen": 1.447981595993042, "logits/rejected": 1.4175838232040405, "logps/chosen": -88.09152221679688, "logps/ref_chosen": -85.15829467773438, "logps/ref_rejected": -96.16879272460938, "logps/rejected": -113.41624450683594, "loss": 0.8953, "margin_dpo/margin_mean": 14.314229965209961, "margin_dpo/margin_std": 15.71872615814209, "step": 504 }, { "epoch": 0.763416477702192, "fcm_dpo/beta": 0.05636422336101532, "fcm_dpo/delta": 0.1211187019944191, "fcm_dpo/margin": 8.596763610839844, "fcm_dpo/q_t": 0.4058091938495636, "grad_norm": 21.137935638427734, "learning_rate": 8.134630621352483e-08, "logits/chosen": 1.5486080646514893, "logits/rejected": 1.3943827152252197, "logps/chosen": -83.37315368652344, "logps/ref_chosen": -79.26185607910156, "logps/ref_rejected": -96.34947967529297, "logps/rejected": -109.05754089355469, "loss": 1.2078, "margin_dpo/margin_mean": 8.596763610839844, "margin_dpo/margin_std": 18.844703674316406, "step": 505 }, { "epoch": 0.764928193499622, "fcm_dpo/beta": 0.05752148851752281, "fcm_dpo/delta": 0.08011193573474884, "fcm_dpo/margin": 9.109159469604492, "fcm_dpo/q_t": 0.39814865589141846, "grad_norm": 18.01957893371582, "learning_rate": 8.037264711071698e-08, "logits/chosen": 1.4216543436050415, "logits/rejected": 1.4008054733276367, "logps/chosen": -93.21182250976562, "logps/ref_chosen": -88.192626953125, "logps/ref_rejected": -100.86880493164062, "logps/rejected": -114.99716186523438, "loss": 1.2276, "margin_dpo/margin_mean": 9.109158515930176, "margin_dpo/margin_std": 20.6380672454834, "step": 506 }, { "epoch": 0.7664399092970522, "fcm_dpo/beta": 0.05810971558094025, "fcm_dpo/delta": -0.009886268526315689, "fcm_dpo/margin": 10.48335075378418, "fcm_dpo/q_t": 0.38148459792137146, "grad_norm": 13.946736335754395, "learning_rate": 7.940373284960933e-08, "logits/chosen": 1.231891393661499, "logits/rejected": 1.1046533584594727, "logps/chosen": -92.22346496582031, "logps/ref_chosen": -86.04632568359375, "logps/ref_rejected": -111.44412994384766, "logps/rejected": -128.1046142578125, "loss": 1.083, "margin_dpo/margin_mean": 10.48335075378418, "margin_dpo/margin_std": 17.50394058227539, "step": 507 }, { "epoch": 0.7679516250944822, "fcm_dpo/beta": 0.05754335597157478, "fcm_dpo/delta": -0.07930278778076172, "fcm_dpo/margin": 11.710323333740234, "fcm_dpo/q_t": 0.3726339340209961, "grad_norm": 15.400092124938965, "learning_rate": 7.843959053281663e-08, "logits/chosen": 1.4893312454223633, "logits/rejected": 1.1656887531280518, "logps/chosen": -83.44557189941406, "logps/ref_chosen": -79.25038146972656, "logps/ref_rejected": -118.49089813232422, "logps/rejected": -134.39639282226562, "loss": 1.0495, "margin_dpo/margin_mean": 11.710323333740234, "margin_dpo/margin_std": 18.529254913330078, "step": 508 }, { "epoch": 0.7694633408919124, "fcm_dpo/beta": 0.05650604888796806, "fcm_dpo/delta": -0.0452471524477005, "fcm_dpo/margin": 11.363935470581055, "fcm_dpo/q_t": 0.36331725120544434, "grad_norm": 12.700797080993652, "learning_rate": 7.748024712947204e-08, "logits/chosen": 1.4477837085723877, "logits/rejected": 1.3602555990219116, "logps/chosen": -83.59129333496094, "logps/ref_chosen": -80.7039566040039, "logps/ref_rejected": -90.50444793701172, "logps/rejected": -104.75572967529297, "loss": 1.0001, "margin_dpo/margin_mean": 11.363937377929688, "margin_dpo/margin_std": 14.95089054107666, "step": 509 }, { "epoch": 0.7709750566893424, "fcm_dpo/beta": 0.055086106061935425, "fcm_dpo/delta": -0.13460469245910645, "fcm_dpo/margin": 13.135368347167969, "fcm_dpo/q_t": 0.3555517792701721, "grad_norm": 13.34060287475586, "learning_rate": 7.652572947447272e-08, "logits/chosen": 1.587594985961914, "logits/rejected": 1.364477515220642, "logps/chosen": -70.32972717285156, "logps/ref_chosen": -67.64491271972656, "logps/ref_rejected": -108.92274475097656, "logps/rejected": -124.742919921875, "loss": 1.0292, "margin_dpo/margin_mean": 13.135368347167969, "margin_dpo/margin_std": 19.449054718017578, "step": 510 }, { "epoch": 0.7724867724867724, "fcm_dpo/beta": 0.053726132959127426, "fcm_dpo/delta": -0.1980828195810318, "fcm_dpo/margin": 14.573793411254883, "fcm_dpo/q_t": 0.3462072014808655, "grad_norm": 11.345755577087402, "learning_rate": 7.557606426772961e-08, "logits/chosen": 1.4740877151489258, "logits/rejected": 1.3778789043426514, "logps/chosen": -79.63981628417969, "logps/ref_chosen": -75.66263580322266, "logps/ref_rejected": -104.26296997070312, "logps/rejected": -122.81393432617188, "loss": 0.9632, "margin_dpo/margin_mean": 14.573793411254883, "margin_dpo/margin_std": 18.880844116210938, "step": 511 }, { "epoch": 0.7739984882842026, "fcm_dpo/beta": 0.05385109782218933, "fcm_dpo/delta": 0.14281541109085083, "fcm_dpo/margin": 8.617152214050293, "fcm_dpo/q_t": 0.41105854511260986, "grad_norm": 14.020538330078125, "learning_rate": 7.463127807341966e-08, "logits/chosen": 1.1061210632324219, "logits/rejected": 1.0223177671432495, "logps/chosen": -83.80526733398438, "logps/ref_chosen": -79.31925964355469, "logps/ref_rejected": -82.22052001953125, "logps/rejected": -95.32367706298828, "loss": 1.1695, "margin_dpo/margin_mean": 8.617152214050293, "margin_dpo/margin_std": 17.66824722290039, "step": 512 }, { "epoch": 0.7755102040816326, "fcm_dpo/beta": 0.05385831743478775, "fcm_dpo/delta": -0.017212651669979095, "fcm_dpo/margin": 11.417625427246094, "fcm_dpo/q_t": 0.37617164850234985, "grad_norm": 10.337393760681152, "learning_rate": 7.369139731924401e-08, "logits/chosen": 1.775747299194336, "logits/rejected": 1.6193273067474365, "logps/chosen": -74.82000732421875, "logps/ref_chosen": -72.02534484863281, "logps/ref_rejected": -86.56224060058594, "logps/rejected": -100.77452850341797, "loss": 1.0165, "margin_dpo/margin_mean": 11.417625427246094, "margin_dpo/margin_std": 15.95500373840332, "step": 513 }, { "epoch": 0.7770219198790628, "fcm_dpo/beta": 0.05360942333936691, "fcm_dpo/delta": -0.10015452653169632, "fcm_dpo/margin": 12.932397842407227, "fcm_dpo/q_t": 0.3562164604663849, "grad_norm": 11.159914016723633, "learning_rate": 7.275644829568747e-08, "logits/chosen": 1.7210180759429932, "logits/rejected": 1.6601059436798096, "logps/chosen": -89.07673645019531, "logps/ref_chosen": -84.94093322753906, "logps/ref_rejected": -102.44367980957031, "logps/rejected": -119.51187133789062, "loss": 0.9902, "margin_dpo/margin_mean": 12.93239688873291, "margin_dpo/margin_std": 17.244815826416016, "step": 514 }, { "epoch": 0.7785336356764928, "fcm_dpo/beta": 0.053353969007730484, "fcm_dpo/delta": 0.014163432642817497, "fcm_dpo/margin": 10.993968963623047, "fcm_dpo/q_t": 0.38286080956459045, "grad_norm": 15.05717658996582, "learning_rate": 7.182645715528435e-08, "logits/chosen": 1.679139256477356, "logits/rejected": 1.4939582347869873, "logps/chosen": -78.55538940429688, "logps/ref_chosen": -72.9662094116211, "logps/ref_rejected": -102.53651428222656, "logps/rejected": -119.11965942382812, "loss": 1.0871, "margin_dpo/margin_mean": 10.99396800994873, "margin_dpo/margin_std": 18.255279541015625, "step": 515 }, { "epoch": 0.780045351473923, "fcm_dpo/beta": 0.05421724542975426, "fcm_dpo/delta": 0.09501098096370697, "fcm_dpo/margin": 9.394851684570312, "fcm_dpo/q_t": 0.390326589345932, "grad_norm": 14.797728538513184, "learning_rate": 7.090144991188568e-08, "logits/chosen": 1.6032508611679077, "logits/rejected": 1.4421273469924927, "logps/chosen": -78.98271179199219, "logps/ref_chosen": -76.63414001464844, "logps/ref_rejected": -91.01750183105469, "logps/rejected": -102.76092529296875, "loss": 1.1055, "margin_dpo/margin_mean": 9.394851684570312, "margin_dpo/margin_std": 15.970596313476562, "step": 516 }, { "epoch": 0.781557067271353, "fcm_dpo/beta": 0.055460721254348755, "fcm_dpo/delta": 0.11611491441726685, "fcm_dpo/margin": 8.81535530090332, "fcm_dpo/q_t": 0.4120446443557739, "grad_norm": 13.8700590133667, "learning_rate": 6.998145243993284e-08, "logits/chosen": 1.6591780185699463, "logits/rejected": 1.6576237678527832, "logps/chosen": -83.76336669921875, "logps/ref_chosen": -77.06817626953125, "logps/ref_rejected": -80.048583984375, "logps/rejected": -95.55912780761719, "loss": 1.2023, "margin_dpo/margin_mean": 8.81535530090332, "margin_dpo/margin_std": 19.780887603759766, "step": 517 }, { "epoch": 0.783068783068783, "fcm_dpo/beta": 0.056159548461437225, "fcm_dpo/delta": 0.05302170664072037, "fcm_dpo/margin": 9.787691116333008, "fcm_dpo/q_t": 0.3945106863975525, "grad_norm": 11.806503295898438, "learning_rate": 6.906649047373245e-08, "logits/chosen": 1.42006516456604, "logits/rejected": 1.2588882446289062, "logps/chosen": -80.86302185058594, "logps/ref_chosen": -78.69026184082031, "logps/ref_rejected": -97.58124542236328, "logps/rejected": -109.54169464111328, "loss": 1.1268, "margin_dpo/margin_mean": 9.787691116333008, "margin_dpo/margin_std": 18.07889175415039, "step": 518 }, { "epoch": 0.7845804988662132, "fcm_dpo/beta": 0.05649217590689659, "fcm_dpo/delta": 0.05481000617146492, "fcm_dpo/margin": 6.627190589904785, "fcm_dpo/q_t": 0.4286935031414032, "grad_norm": 18.38283920288086, "learning_rate": 6.815658960673781e-08, "logits/chosen": 1.611324429512024, "logits/rejected": 1.49735426902771, "logps/chosen": -85.2575912475586, "logps/ref_chosen": -78.35087585449219, "logps/ref_rejected": -95.79212188720703, "logps/rejected": -109.32601928710938, "loss": 1.3611, "margin_dpo/margin_mean": 6.627191066741943, "margin_dpo/margin_std": 21.301122665405273, "step": 519 }, { "epoch": 0.7860922146636432, "fcm_dpo/beta": 0.056376829743385315, "fcm_dpo/delta": 0.022743336856365204, "fcm_dpo/margin": 10.225279808044434, "fcm_dpo/q_t": 0.3800208866596222, "grad_norm": 14.909660339355469, "learning_rate": 6.725177529083209e-08, "logits/chosen": 1.4296002388000488, "logits/rejected": 1.2566463947296143, "logps/chosen": -83.81216430664062, "logps/ref_chosen": -80.40513610839844, "logps/ref_rejected": -93.02791595458984, "logps/rejected": -106.66022491455078, "loss": 1.0306, "margin_dpo/margin_mean": 10.225279808044434, "margin_dpo/margin_std": 13.84968376159668, "step": 520 }, { "epoch": 0.7876039304610734, "fcm_dpo/beta": 0.057500891387462616, "fcm_dpo/delta": 0.0005270391702651978, "fcm_dpo/margin": 10.406567573547363, "fcm_dpo/q_t": 0.3807342052459717, "grad_norm": 14.164873123168945, "learning_rate": 6.63520728356167e-08, "logits/chosen": 1.4418920278549194, "logits/rejected": 1.2171070575714111, "logps/chosen": -91.44085693359375, "logps/ref_chosen": -86.5218276977539, "logps/ref_rejected": -109.20257568359375, "logps/rejected": -124.5281753540039, "loss": 1.0793, "margin_dpo/margin_mean": 10.406567573547363, "margin_dpo/margin_std": 17.031408309936523, "step": 521 }, { "epoch": 0.7891156462585034, "fcm_dpo/beta": 0.058100029826164246, "fcm_dpo/delta": 0.10878778994083405, "fcm_dpo/margin": 8.54337215423584, "fcm_dpo/q_t": 0.40556806325912476, "grad_norm": 16.047704696655273, "learning_rate": 6.545750740770336e-08, "logits/chosen": 1.297495722770691, "logits/rejected": 1.3115613460540771, "logps/chosen": -83.30645751953125, "logps/ref_chosen": -78.24254608154297, "logps/ref_rejected": -85.23554992675781, "logps/rejected": -98.84282684326172, "loss": 1.2144, "margin_dpo/margin_mean": 8.54337215423584, "margin_dpo/margin_std": 19.285472869873047, "step": 522 }, { "epoch": 0.7906273620559335, "fcm_dpo/beta": 0.05785483866930008, "fcm_dpo/delta": -0.0011233240365982056, "fcm_dpo/margin": 10.367927551269531, "fcm_dpo/q_t": 0.37423598766326904, "grad_norm": 13.733061790466309, "learning_rate": 6.456810403001012e-08, "logits/chosen": 1.4599554538726807, "logits/rejected": 1.2018170356750488, "logps/chosen": -89.55259704589844, "logps/ref_chosen": -83.50096893310547, "logps/ref_rejected": -117.45217895507812, "logps/rejected": -133.87173461914062, "loss": 1.0643, "margin_dpo/margin_mean": 10.367927551269531, "margin_dpo/margin_std": 16.020349502563477, "step": 523 }, { "epoch": 0.7921390778533636, "fcm_dpo/beta": 0.058989353477954865, "fcm_dpo/delta": 0.042257122695446014, "fcm_dpo/margin": 9.482121467590332, "fcm_dpo/q_t": 0.3825477957725525, "grad_norm": 13.890347480773926, "learning_rate": 6.368388758106134e-08, "logits/chosen": 1.3521630764007568, "logits/rejected": 1.3045108318328857, "logps/chosen": -94.79153442382812, "logps/ref_chosen": -93.22590637207031, "logps/ref_rejected": -108.17863464355469, "logps/rejected": -119.22637176513672, "loss": 1.1023, "margin_dpo/margin_mean": 9.482121467590332, "margin_dpo/margin_std": 16.036991119384766, "step": 524 }, { "epoch": 0.7936507936507936, "fcm_dpo/beta": 0.05859127268195152, "fcm_dpo/delta": -0.018103765323758125, "fcm_dpo/margin": 7.183684825897217, "fcm_dpo/q_t": 0.41909414529800415, "grad_norm": 13.745256423950195, "learning_rate": 6.280488279429185e-08, "logits/chosen": 1.0294437408447266, "logits/rejected": 1.0196237564086914, "logps/chosen": -99.94296264648438, "logps/ref_chosen": -94.08831787109375, "logps/ref_rejected": -100.682373046875, "logps/rejected": -113.720703125, "loss": 1.2164, "margin_dpo/margin_mean": 7.183685302734375, "margin_dpo/margin_std": 16.434297561645508, "step": 525 }, { "epoch": 0.7951625094482238, "fcm_dpo/beta": 0.057315438985824585, "fcm_dpo/delta": -0.11007855832576752, "fcm_dpo/margin": 8.643760681152344, "fcm_dpo/q_t": 0.39720046520233154, "grad_norm": 14.708939552307129, "learning_rate": 6.193111425735515e-08, "logits/chosen": 1.6031224727630615, "logits/rejected": 1.4323936700820923, "logps/chosen": -83.46339416503906, "logps/ref_chosen": -77.78373718261719, "logps/ref_rejected": -100.29583740234375, "logps/rejected": -114.6192626953125, "loss": 1.1392, "margin_dpo/margin_mean": 8.643760681152344, "margin_dpo/margin_std": 15.403488159179688, "step": 526 }, { "epoch": 0.7966742252456538, "fcm_dpo/beta": 0.05917923152446747, "fcm_dpo/delta": 0.18667322397232056, "fcm_dpo/margin": 7.098379611968994, "fcm_dpo/q_t": 0.4146905541419983, "grad_norm": 14.931519508361816, "learning_rate": 6.106260641143546e-08, "logits/chosen": 1.578808069229126, "logits/rejected": 1.3636837005615234, "logps/chosen": -85.0775146484375, "logps/ref_chosen": -76.695068359375, "logps/ref_rejected": -107.68281555175781, "logps/rejected": -123.16364288330078, "loss": 1.2101, "margin_dpo/margin_mean": 7.098379135131836, "margin_dpo/margin_std": 16.101825714111328, "step": 527 }, { "epoch": 0.7981859410430839, "fcm_dpo/beta": 0.05997871980071068, "fcm_dpo/delta": 0.08462747931480408, "fcm_dpo/margin": 8.665306091308594, "fcm_dpo/q_t": 0.39577969908714294, "grad_norm": 15.675226211547852, "learning_rate": 6.019938355056422e-08, "logits/chosen": 1.3997437953948975, "logits/rejected": 1.2392219305038452, "logps/chosen": -79.13544464111328, "logps/ref_chosen": -75.0361328125, "logps/ref_rejected": -94.67579650878906, "logps/rejected": -107.44041442871094, "loss": 1.1857, "margin_dpo/margin_mean": 8.665306091308594, "margin_dpo/margin_std": 18.087797164916992, "step": 528 }, { "epoch": 0.799697656840514, "fcm_dpo/beta": 0.05653582885861397, "fcm_dpo/delta": -0.3923792243003845, "fcm_dpo/margin": 16.796003341674805, "fcm_dpo/q_t": 0.31350094079971313, "grad_norm": 10.827249526977539, "learning_rate": 5.934146982094049e-08, "logits/chosen": 1.3487932682037354, "logits/rejected": 1.2035433053970337, "logps/chosen": -76.30598449707031, "logps/ref_chosen": -72.84869384765625, "logps/ref_rejected": -93.25855255126953, "logps/rejected": -113.51184844970703, "loss": 0.8556, "margin_dpo/margin_mean": 16.796003341674805, "margin_dpo/margin_std": 17.993106842041016, "step": 529 }, { "epoch": 0.8012093726379441, "fcm_dpo/beta": 0.05551842600107193, "fcm_dpo/delta": -0.025491856038570404, "fcm_dpo/margin": 11.230892181396484, "fcm_dpo/q_t": 0.3764331340789795, "grad_norm": 12.22794246673584, "learning_rate": 5.848888922025552e-08, "logits/chosen": 1.6910094022750854, "logits/rejected": 1.5774551630020142, "logps/chosen": -83.728759765625, "logps/ref_chosen": -79.4971694946289, "logps/ref_rejected": -93.59564208984375, "logps/rejected": -109.05812072753906, "loss": 1.0428, "margin_dpo/margin_mean": 11.230892181396484, "margin_dpo/margin_std": 16.893659591674805, "step": 530 }, { "epoch": 0.8027210884353742, "fcm_dpo/beta": 0.05706261843442917, "fcm_dpo/delta": 0.08858685940504074, "fcm_dpo/margin": 8.977895736694336, "fcm_dpo/q_t": 0.39383938908576965, "grad_norm": 13.592248916625977, "learning_rate": 5.7641665597021435e-08, "logits/chosen": 1.5204875469207764, "logits/rejected": 1.377941370010376, "logps/chosen": -74.43402099609375, "logps/ref_chosen": -69.45396423339844, "logps/ref_rejected": -96.30017852783203, "logps/rejected": -110.25813293457031, "loss": 1.1054, "margin_dpo/margin_mean": 8.977895736694336, "margin_dpo/margin_std": 14.877981185913086, "step": 531 }, { "epoch": 0.8042328042328042, "fcm_dpo/beta": 0.05634995549917221, "fcm_dpo/delta": -0.006979711353778839, "fcm_dpo/margin": 10.757040023803711, "fcm_dpo/q_t": 0.3826729357242584, "grad_norm": 12.061864852905273, "learning_rate": 5.679982264990424e-08, "logits/chosen": 1.2939972877502441, "logits/rejected": 1.1688940525054932, "logps/chosen": -84.37901306152344, "logps/ref_chosen": -76.52011108398438, "logps/ref_rejected": -94.79593658447266, "logps/rejected": -113.41188049316406, "loss": 1.0853, "margin_dpo/margin_mean": 10.757040023803711, "margin_dpo/margin_std": 18.02541732788086, "step": 532 }, { "epoch": 0.8057445200302343, "fcm_dpo/beta": 0.05673651024699211, "fcm_dpo/delta": -0.011338796466588974, "fcm_dpo/margin": 10.75674819946289, "fcm_dpo/q_t": 0.3817342519760132, "grad_norm": 18.434955596923828, "learning_rate": 5.596338392706076e-08, "logits/chosen": 1.5201002359390259, "logits/rejected": 1.337731122970581, "logps/chosen": -74.36298370361328, "logps/ref_chosen": -72.31800842285156, "logps/ref_rejected": -89.26652526855469, "logps/rejected": -102.06825256347656, "loss": 1.1072, "margin_dpo/margin_mean": 10.75674819946289, "margin_dpo/margin_std": 18.741741180419922, "step": 533 }, { "epoch": 0.8072562358276644, "fcm_dpo/beta": 0.05574037879705429, "fcm_dpo/delta": -0.009521931409835815, "fcm_dpo/margin": 10.878827095031738, "fcm_dpo/q_t": 0.375033438205719, "grad_norm": 15.397263526916504, "learning_rate": 5.513237282548033e-08, "logits/chosen": 1.2019238471984863, "logits/rejected": 1.1612894535064697, "logps/chosen": -79.74507141113281, "logps/ref_chosen": -77.87559509277344, "logps/ref_rejected": -92.21171569824219, "logps/rejected": -104.96002197265625, "loss": 1.0573, "margin_dpo/margin_mean": 10.878826141357422, "margin_dpo/margin_std": 16.20137596130371, "step": 534 }, { "epoch": 0.8087679516250945, "fcm_dpo/beta": 0.05694824457168579, "fcm_dpo/delta": 0.05008779466152191, "fcm_dpo/margin": 9.698480606079102, "fcm_dpo/q_t": 0.3916192650794983, "grad_norm": 11.60848331451416, "learning_rate": 5.430681259032957e-08, "logits/chosen": 1.196105718612671, "logits/rejected": 1.0437562465667725, "logps/chosen": -83.9339828491211, "logps/ref_chosen": -78.16358184814453, "logps/ref_rejected": -97.78164672851562, "logps/rejected": -113.25051879882812, "loss": 1.1577, "margin_dpo/margin_mean": 9.698480606079102, "margin_dpo/margin_std": 19.059343338012695, "step": 535 }, { "epoch": 0.8102796674225246, "fcm_dpo/beta": 0.05582098662853241, "fcm_dpo/delta": -0.13114593923091888, "fcm_dpo/margin": 12.92877197265625, "fcm_dpo/q_t": 0.352193146944046, "grad_norm": 10.292197227478027, "learning_rate": 5.3486726314303175e-08, "logits/chosen": 1.3923474550247192, "logits/rejected": 1.190511703491211, "logps/chosen": -70.1307373046875, "logps/ref_chosen": -66.65623474121094, "logps/ref_rejected": -89.49085998535156, "logps/rejected": -105.89413452148438, "loss": 0.9384, "margin_dpo/margin_mean": 12.92877197265625, "margin_dpo/margin_std": 15.259733200073242, "step": 536 }, { "epoch": 0.8117913832199547, "fcm_dpo/beta": 0.055668190121650696, "fcm_dpo/delta": 0.051453668624162674, "fcm_dpo/margin": 9.903670310974121, "fcm_dpo/q_t": 0.3890642523765564, "grad_norm": 11.76916790008545, "learning_rate": 5.267213693697695e-08, "logits/chosen": 1.5167008638381958, "logits/rejected": 1.303575038909912, "logps/chosen": -83.30008697509766, "logps/ref_chosen": -74.99390411376953, "logps/ref_rejected": -110.6627197265625, "logps/rejected": -128.87257385253906, "loss": 1.1049, "margin_dpo/margin_mean": 9.903672218322754, "margin_dpo/margin_std": 16.979801177978516, "step": 537 }, { "epoch": 0.8133030990173847, "fcm_dpo/beta": 0.055662307888269424, "fcm_dpo/delta": -0.09925831854343414, "fcm_dpo/margin": 12.428467750549316, "fcm_dpo/q_t": 0.3608172535896301, "grad_norm": 12.546353340148926, "learning_rate": 5.1863067244167144e-08, "logits/chosen": 1.4921224117279053, "logits/rejected": 1.4055488109588623, "logps/chosen": -93.9822998046875, "logps/ref_chosen": -87.61151123046875, "logps/ref_rejected": -98.1150131225586, "logps/rejected": -116.91426086425781, "loss": 0.9884, "margin_dpo/margin_mean": 12.428467750549316, "margin_dpo/margin_std": 16.471281051635742, "step": 538 }, { "epoch": 0.8148148148148148, "fcm_dpo/beta": 0.05533237010240555, "fcm_dpo/delta": 0.06810373067855835, "fcm_dpo/margin": 9.67943286895752, "fcm_dpo/q_t": 0.3939323127269745, "grad_norm": 13.805160522460938, "learning_rate": 5.105953986729195e-08, "logits/chosen": 1.6138584613800049, "logits/rejected": 1.3060567378997803, "logps/chosen": -84.86448669433594, "logps/ref_chosen": -78.86482238769531, "logps/ref_rejected": -100.84349822998047, "logps/rejected": -116.5226058959961, "loss": 1.0875, "margin_dpo/margin_mean": 9.679433822631836, "margin_dpo/margin_std": 15.986173629760742, "step": 539 }, { "epoch": 0.8163265306122449, "fcm_dpo/beta": 0.0549115426838398, "fcm_dpo/delta": -0.1168595626950264, "fcm_dpo/margin": 12.906469345092773, "fcm_dpo/q_t": 0.3611002564430237, "grad_norm": 13.961292266845703, "learning_rate": 5.026157728273966e-08, "logits/chosen": 1.416818618774414, "logits/rejected": 1.1737697124481201, "logps/chosen": -89.22088623046875, "logps/ref_chosen": -83.66409301757812, "logps/ref_rejected": -114.8860092163086, "logps/rejected": -133.34927368164062, "loss": 1.0225, "margin_dpo/margin_mean": 12.906469345092773, "margin_dpo/margin_std": 18.96722412109375, "step": 540 }, { "epoch": 0.817838246409675, "fcm_dpo/beta": 0.05322103202342987, "fcm_dpo/delta": -0.11219315230846405, "fcm_dpo/margin": 13.215749740600586, "fcm_dpo/q_t": 0.3545529544353485, "grad_norm": 15.04979133605957, "learning_rate": 4.9469201811239035e-08, "logits/chosen": 1.3256299495697021, "logits/rejected": 1.3482017517089844, "logps/chosen": -87.13902282714844, "logps/ref_chosen": -83.12225341796875, "logps/ref_rejected": -74.80526733398438, "logps/rejected": -92.03779602050781, "loss": 0.979, "margin_dpo/margin_mean": 13.215751647949219, "margin_dpo/margin_std": 17.005870819091797, "step": 541 }, { "epoch": 0.8193499622071051, "fcm_dpo/beta": 0.05173543840646744, "fcm_dpo/delta": -0.16066280007362366, "fcm_dpo/margin": 14.455942153930664, "fcm_dpo/q_t": 0.3514820337295532, "grad_norm": 10.039311408996582, "learning_rate": 4.868243561723534e-08, "logits/chosen": 1.587374210357666, "logits/rejected": 1.4626187086105347, "logps/chosen": -67.82963562011719, "logps/ref_chosen": -66.3132553100586, "logps/ref_rejected": -83.24588012695312, "logps/rejected": -99.21820831298828, "loss": 0.9778, "margin_dpo/margin_mean": 14.455942153930664, "margin_dpo/margin_std": 19.253398895263672, "step": 542 }, { "epoch": 0.8208616780045351, "fcm_dpo/beta": 0.051359452307224274, "fcm_dpo/delta": -0.05219798535108566, "fcm_dpo/margin": 12.62739086151123, "fcm_dpo/q_t": 0.36705687642097473, "grad_norm": 9.661619186401367, "learning_rate": 4.790130070827028e-08, "logits/chosen": 1.3386754989624023, "logits/rejected": 1.0855376720428467, "logps/chosen": -73.03467559814453, "logps/ref_chosen": -68.11429595947266, "logps/ref_rejected": -94.62380981445312, "logps/rejected": -112.17157745361328, "loss": 1.0345, "margin_dpo/margin_mean": 12.627391815185547, "margin_dpo/margin_std": 18.65164566040039, "step": 543 }, { "epoch": 0.8223733938019653, "fcm_dpo/beta": 0.05012405291199684, "fcm_dpo/delta": -0.10903792828321457, "fcm_dpo/margin": 13.992612838745117, "fcm_dpo/q_t": 0.3610704243183136, "grad_norm": 11.929317474365234, "learning_rate": 4.7125818934366454e-08, "logits/chosen": 1.5628447532653809, "logits/rejected": 1.3672630786895752, "logps/chosen": -85.86703491210938, "logps/ref_chosen": -81.187255859375, "logps/ref_rejected": -105.84722900390625, "logps/rejected": -124.51962280273438, "loss": 1.0017, "margin_dpo/margin_mean": 13.9926118850708, "margin_dpo/margin_std": 19.482681274414062, "step": 544 }, { "epoch": 0.8238851095993953, "fcm_dpo/beta": 0.05042444169521332, "fcm_dpo/delta": 0.10349908471107483, "fcm_dpo/margin": 9.951008796691895, "fcm_dpo/q_t": 0.3982735872268677, "grad_norm": 11.872567176818848, "learning_rate": 4.635601198741607e-08, "logits/chosen": 1.2976285219192505, "logits/rejected": 1.1338021755218506, "logps/chosen": -85.16439819335938, "logps/ref_chosen": -78.81717681884766, "logps/ref_rejected": -98.65876770019531, "logps/rejected": -114.95700073242188, "loss": 1.1213, "margin_dpo/margin_mean": 9.951007843017578, "margin_dpo/margin_std": 17.55803680419922, "step": 545 }, { "epoch": 0.8253968253968254, "fcm_dpo/beta": 0.05110456794500351, "fcm_dpo/delta": -0.006316348910331726, "fcm_dpo/margin": 11.8477144241333, "fcm_dpo/q_t": 0.3751433491706848, "grad_norm": 11.023509979248047, "learning_rate": 4.559190140057428e-08, "logits/chosen": 1.6506929397583008, "logits/rejected": 1.6415163278579712, "logps/chosen": -77.45428466796875, "logps/ref_chosen": -74.2529296875, "logps/ref_rejected": -80.32308959960938, "logps/rejected": -95.37216186523438, "loss": 1.0302, "margin_dpo/margin_mean": 11.847713470458984, "margin_dpo/margin_std": 16.76009750366211, "step": 546 }, { "epoch": 0.8269085411942555, "fcm_dpo/beta": 0.049887072294950485, "fcm_dpo/delta": -0.10243887454271317, "fcm_dpo/margin": 13.926226615905762, "fcm_dpo/q_t": 0.3531789779663086, "grad_norm": 10.132402420043945, "learning_rate": 4.483350854765672e-08, "logits/chosen": 1.390183448791504, "logits/rejected": 1.2259633541107178, "logps/chosen": -73.09548950195312, "logps/ref_chosen": -69.9368896484375, "logps/ref_rejected": -90.25672912597656, "logps/rejected": -107.34156799316406, "loss": 0.9679, "margin_dpo/margin_mean": 13.926226615905762, "margin_dpo/margin_std": 16.944210052490234, "step": 547 }, { "epoch": 0.8284202569916855, "fcm_dpo/beta": 0.051217325031757355, "fcm_dpo/delta": 0.1423822045326233, "fcm_dpo/margin": 9.027074813842773, "fcm_dpo/q_t": 0.4059806168079376, "grad_norm": 12.69057559967041, "learning_rate": 4.4080854642541826e-08, "logits/chosen": 1.1699552536010742, "logits/rejected": 1.0122103691101074, "logps/chosen": -88.73080444335938, "logps/ref_chosen": -81.1605224609375, "logps/ref_rejected": -99.7246322631836, "logps/rejected": -116.32199096679688, "loss": 1.1604, "margin_dpo/margin_mean": 9.027073860168457, "margin_dpo/margin_std": 17.565166473388672, "step": 548 }, { "epoch": 0.8299319727891157, "fcm_dpo/beta": 0.05247477814555168, "fcm_dpo/delta": 0.15556570887565613, "fcm_dpo/margin": 8.601249694824219, "fcm_dpo/q_t": 0.40862107276916504, "grad_norm": 21.05410385131836, "learning_rate": 4.333396073857723e-08, "logits/chosen": 1.810377597808838, "logits/rejected": 1.6249217987060547, "logps/chosen": -86.17178344726562, "logps/ref_chosen": -80.49800872802734, "logps/ref_rejected": -113.20750427246094, "logps/rejected": -127.48252868652344, "loss": 1.2102, "margin_dpo/margin_mean": 8.601249694824219, "margin_dpo/margin_std": 19.13092041015625, "step": 549 }, { "epoch": 0.8314436885865457, "fcm_dpo/beta": 0.05345090851187706, "fcm_dpo/delta": 0.12718510627746582, "fcm_dpo/margin": 8.954895973205566, "fcm_dpo/q_t": 0.4034798741340637, "grad_norm": 17.55516242980957, "learning_rate": 4.259284772799099e-08, "logits/chosen": 1.5192465782165527, "logits/rejected": 1.4428541660308838, "logps/chosen": -81.42501068115234, "logps/ref_chosen": -75.13760375976562, "logps/ref_rejected": -79.04876708984375, "logps/rejected": -94.29107666015625, "loss": 1.1939, "margin_dpo/margin_mean": 8.954895973205566, "margin_dpo/margin_std": 18.97968101501465, "step": 550 }, { "epoch": 0.8329554043839759, "fcm_dpo/beta": 0.054639048874378204, "fcm_dpo/delta": 0.024431193247437477, "fcm_dpo/margin": 10.552170753479004, "fcm_dpo/q_t": 0.38189250230789185, "grad_norm": 16.292776107788086, "learning_rate": 4.1857536341307176e-08, "logits/chosen": 1.483259916305542, "logits/rejected": 1.3449373245239258, "logps/chosen": -91.39802551269531, "logps/ref_chosen": -85.4496078491211, "logps/ref_rejected": -103.48530578613281, "logps/rejected": -119.98588562011719, "loss": 1.0539, "margin_dpo/margin_mean": 10.552170753479004, "margin_dpo/margin_std": 16.359243392944336, "step": 551 }, { "epoch": 0.8344671201814059, "fcm_dpo/beta": 0.05562649294734001, "fcm_dpo/delta": 0.07661741226911545, "fcm_dpo/margin": 9.438526153564453, "fcm_dpo/q_t": 0.38665229082107544, "grad_norm": 14.911755561828613, "learning_rate": 4.112804714676593e-08, "logits/chosen": 1.4020166397094727, "logits/rejected": 1.2649641036987305, "logps/chosen": -87.27410888671875, "logps/ref_chosen": -82.01036071777344, "logps/ref_rejected": -101.61884307861328, "logps/rejected": -116.32112121582031, "loss": 1.1424, "margin_dpo/margin_mean": 9.438526153564453, "margin_dpo/margin_std": 17.219161987304688, "step": 552 }, { "epoch": 0.8359788359788359, "fcm_dpo/beta": 0.055003680288791656, "fcm_dpo/delta": -0.022091738879680634, "fcm_dpo/margin": 11.281782150268555, "fcm_dpo/q_t": 0.3798624873161316, "grad_norm": 16.0929012298584, "learning_rate": 4.0404400549748144e-08, "logits/chosen": 1.6245696544647217, "logits/rejected": 1.3851006031036377, "logps/chosen": -82.01695251464844, "logps/ref_chosen": -73.81416320800781, "logps/ref_rejected": -104.27050018310547, "logps/rejected": -123.75507354736328, "loss": 1.1399, "margin_dpo/margin_mean": 11.281782150268555, "margin_dpo/margin_std": 20.727617263793945, "step": 553 }, { "epoch": 0.8374905517762661, "fcm_dpo/beta": 0.05463992804288864, "fcm_dpo/delta": -0.04029256850481033, "fcm_dpo/margin": 11.668399810791016, "fcm_dpo/q_t": 0.37226054072380066, "grad_norm": 11.775946617126465, "learning_rate": 3.968661679220467e-08, "logits/chosen": 1.2836058139801025, "logits/rejected": 1.2342619895935059, "logps/chosen": -87.90589904785156, "logps/ref_chosen": -81.43980407714844, "logps/ref_rejected": -89.32518005371094, "logps/rejected": -107.45967102050781, "loss": 1.064, "margin_dpo/margin_mean": 11.668397903442383, "margin_dpo/margin_std": 18.33779525756836, "step": 554 }, { "epoch": 0.8390022675736961, "fcm_dpo/beta": 0.05459080636501312, "fcm_dpo/delta": -0.06235264241695404, "fcm_dpo/margin": 12.04253101348877, "fcm_dpo/q_t": 0.36375415325164795, "grad_norm": 12.814702033996582, "learning_rate": 3.89747159520904e-08, "logits/chosen": 1.669064998626709, "logits/rejected": 1.5991406440734863, "logps/chosen": -89.34579467773438, "logps/ref_chosen": -81.66071319580078, "logps/ref_rejected": -87.20857238769531, "logps/rejected": -106.93618774414062, "loss": 1.0901, "margin_dpo/margin_mean": 12.042530059814453, "margin_dpo/margin_std": 19.42294692993164, "step": 555 }, { "epoch": 0.8405139833711263, "fcm_dpo/beta": 0.05409371852874756, "fcm_dpo/delta": 0.02651580236852169, "fcm_dpo/margin": 10.629733085632324, "fcm_dpo/q_t": 0.3883476257324219, "grad_norm": 11.263772010803223, "learning_rate": 3.826871794280192e-08, "logits/chosen": 1.311571717262268, "logits/rejected": 1.2102625370025635, "logps/chosen": -74.93548583984375, "logps/ref_chosen": -66.02448272705078, "logps/ref_rejected": -82.74746704101562, "logps/rejected": -102.28819274902344, "loss": 1.1078, "margin_dpo/margin_mean": 10.62973403930664, "margin_dpo/margin_std": 18.774234771728516, "step": 556 }, { "epoch": 0.8420256991685563, "fcm_dpo/beta": 0.05260467901825905, "fcm_dpo/delta": -0.13225093483924866, "fcm_dpo/margin": 13.666872024536133, "fcm_dpo/q_t": 0.358351469039917, "grad_norm": 11.609122276306152, "learning_rate": 3.756864251262143e-08, "logits/chosen": 1.4144532680511475, "logits/rejected": 1.133040189743042, "logps/chosen": -79.66389465332031, "logps/ref_chosen": -73.08985900878906, "logps/ref_rejected": -97.43034362792969, "logps/rejected": -117.6712417602539, "loss": 1.0001, "margin_dpo/margin_mean": 13.66687297821045, "margin_dpo/margin_std": 18.801319122314453, "step": 557 }, { "epoch": 0.8435374149659864, "fcm_dpo/beta": 0.05203159525990486, "fcm_dpo/delta": -0.06583023071289062, "fcm_dpo/margin": 12.695112228393555, "fcm_dpo/q_t": 0.3633221983909607, "grad_norm": 13.396330833435059, "learning_rate": 3.687450924416341e-08, "logits/chosen": 1.5742992162704468, "logits/rejected": 1.4637998342514038, "logps/chosen": -87.541015625, "logps/ref_chosen": -80.1357192993164, "logps/ref_rejected": -106.65797424316406, "logps/rejected": -126.75838470458984, "loss": 1.0222, "margin_dpo/margin_mean": 12.695113182067871, "margin_dpo/margin_std": 17.957141876220703, "step": 558 }, { "epoch": 0.8450491307634165, "fcm_dpo/beta": 0.0517839640378952, "fcm_dpo/delta": 0.01864977926015854, "fcm_dpo/margin": 11.21513557434082, "fcm_dpo/q_t": 0.3890264630317688, "grad_norm": 13.40358829498291, "learning_rate": 3.6186337553827743e-08, "logits/chosen": 1.5580424070358276, "logits/rejected": 1.3317805528640747, "logps/chosen": -85.84085845947266, "logps/ref_chosen": -79.42267608642578, "logps/ref_rejected": -98.59402465820312, "logps/rejected": -116.22734069824219, "loss": 1.158, "margin_dpo/margin_mean": 11.21513557434082, "margin_dpo/margin_std": 21.69605827331543, "step": 559 }, { "epoch": 0.8465608465608465, "fcm_dpo/beta": 0.052260056138038635, "fcm_dpo/delta": -0.06172545626759529, "fcm_dpo/margin": 12.571533203125, "fcm_dpo/q_t": 0.3696938157081604, "grad_norm": 12.267929077148438, "learning_rate": 3.550414669125573e-08, "logits/chosen": 1.4044283628463745, "logits/rejected": 1.3158057928085327, "logps/chosen": -84.15168762207031, "logps/ref_chosen": -77.49559020996094, "logps/ref_rejected": -92.61347961425781, "logps/rejected": -111.84111785888672, "loss": 1.0404, "margin_dpo/margin_mean": 12.571533203125, "margin_dpo/margin_std": 18.772090911865234, "step": 560 }, { "epoch": 0.8480725623582767, "fcm_dpo/beta": 0.05100402235984802, "fcm_dpo/delta": -0.07462760806083679, "fcm_dpo/margin": 13.123538970947266, "fcm_dpo/q_t": 0.37038421630859375, "grad_norm": 11.18996810913086, "learning_rate": 3.482795573879241e-08, "logits/chosen": 1.6638920307159424, "logits/rejected": 1.6029834747314453, "logps/chosen": -85.24051666259766, "logps/ref_chosen": -79.20771789550781, "logps/ref_rejected": -93.46514892578125, "logps/rejected": -112.62149047851562, "loss": 1.0184, "margin_dpo/margin_mean": 13.123538970947266, "margin_dpo/margin_std": 19.08633804321289, "step": 561 }, { "epoch": 0.8495842781557067, "fcm_dpo/beta": 0.05021877959370613, "fcm_dpo/delta": -0.04768141731619835, "fcm_dpo/margin": 12.811036109924316, "fcm_dpo/q_t": 0.3715214133262634, "grad_norm": 11.333587646484375, "learning_rate": 3.415778361095226e-08, "logits/chosen": 1.6762216091156006, "logits/rejected": 1.5491697788238525, "logps/chosen": -102.22946166992188, "logps/ref_chosen": -94.88652801513672, "logps/ref_rejected": -109.33815002441406, "logps/rejected": -129.4921112060547, "loss": 1.0176, "margin_dpo/margin_mean": 12.81103515625, "margin_dpo/margin_std": 17.984500885009766, "step": 562 }, { "epoch": 0.8510959939531368, "fcm_dpo/beta": 0.05006178095936775, "fcm_dpo/delta": -0.048707108944654465, "fcm_dpo/margin": 12.89486026763916, "fcm_dpo/q_t": 0.3766898810863495, "grad_norm": 11.322830200195312, "learning_rate": 3.349364905389032e-08, "logits/chosen": 1.6359024047851562, "logits/rejected": 1.4605317115783691, "logps/chosen": -69.72264099121094, "logps/ref_chosen": -65.90719604492188, "logps/ref_rejected": -84.07121276855469, "logps/rejected": -100.78152465820312, "loss": 1.0887, "margin_dpo/margin_mean": 12.89486026763916, "margin_dpo/margin_std": 21.711301803588867, "step": 563 }, { "epoch": 0.8526077097505669, "fcm_dpo/beta": 0.0500749796628952, "fcm_dpo/delta": -0.0054284874349832535, "fcm_dpo/margin": 12.080354690551758, "fcm_dpo/q_t": 0.3790516257286072, "grad_norm": 16.2349910736084, "learning_rate": 3.283557064487785e-08, "logits/chosen": 1.315792202949524, "logits/rejected": 1.2397656440734863, "logps/chosen": -76.4324722290039, "logps/ref_chosen": -72.32071685791016, "logps/ref_rejected": -88.05014038085938, "logps/rejected": -104.24224853515625, "loss": 1.1096, "margin_dpo/margin_mean": 12.080353736877441, "margin_dpo/margin_std": 21.116573333740234, "step": 564 }, { "epoch": 0.854119425547997, "fcm_dpo/beta": 0.050448790192604065, "fcm_dpo/delta": 0.026010502129793167, "fcm_dpo/margin": 11.385034561157227, "fcm_dpo/q_t": 0.3829692006111145, "grad_norm": 10.440814971923828, "learning_rate": 3.218356679178252e-08, "logits/chosen": 1.3742563724517822, "logits/rejected": 1.2928344011306763, "logps/chosen": -88.49894714355469, "logps/ref_chosen": -80.18453979492188, "logps/ref_rejected": -99.55126953125, "logps/rejected": -119.25071716308594, "loss": 1.074, "margin_dpo/margin_mean": 11.385034561157227, "margin_dpo/margin_std": 18.052478790283203, "step": 565 }, { "epoch": 0.8556311413454271, "fcm_dpo/beta": 0.05103863775730133, "fcm_dpo/delta": 0.020700603723526, "fcm_dpo/margin": 11.292126655578613, "fcm_dpo/q_t": 0.38311922550201416, "grad_norm": 15.4530668258667, "learning_rate": 3.1537655732553764e-08, "logits/chosen": 1.4136245250701904, "logits/rejected": 1.3437389135360718, "logps/chosen": -93.4151611328125, "logps/ref_chosen": -88.0877914428711, "logps/ref_rejected": -87.7589111328125, "logps/rejected": -104.37841033935547, "loss": 1.1149, "margin_dpo/margin_mean": 11.292126655578613, "margin_dpo/margin_std": 19.296255111694336, "step": 566 }, { "epoch": 0.8571428571428571, "fcm_dpo/beta": 0.049624793231487274, "fcm_dpo/delta": -0.062444910407066345, "fcm_dpo/margin": 13.247112274169922, "fcm_dpo/q_t": 0.3635924160480499, "grad_norm": 13.69182014465332, "learning_rate": 3.089785553471233e-08, "logits/chosen": 1.6392173767089844, "logits/rejected": 1.3875160217285156, "logps/chosen": -76.57147216796875, "logps/ref_chosen": -69.93267822265625, "logps/ref_rejected": -95.71786499023438, "logps/rejected": -115.60376739501953, "loss": 0.9874, "margin_dpo/margin_mean": 13.247111320495605, "margin_dpo/margin_std": 17.017322540283203, "step": 567 }, { "epoch": 0.8586545729402872, "fcm_dpo/beta": 0.04972817003726959, "fcm_dpo/delta": -0.09054756909608841, "fcm_dpo/margin": 13.71946907043457, "fcm_dpo/q_t": 0.36071109771728516, "grad_norm": 10.639127731323242, "learning_rate": 3.026418409484513e-08, "logits/chosen": 1.6388273239135742, "logits/rejected": 1.4276424646377563, "logps/chosen": -73.75872802734375, "logps/ref_chosen": -70.33343505859375, "logps/ref_rejected": -108.86271667480469, "logps/rejected": -126.00747680664062, "loss": 0.9822, "margin_dpo/margin_mean": 13.71946907043457, "margin_dpo/margin_std": 16.7993221282959, "step": 568 }, { "epoch": 0.8601662887377173, "fcm_dpo/beta": 0.04921431094408035, "fcm_dpo/delta": 0.11886165291070938, "fcm_dpo/margin": 9.874387741088867, "fcm_dpo/q_t": 0.39863890409469604, "grad_norm": 13.355634689331055, "learning_rate": 2.963665913810451e-08, "logits/chosen": 1.3453781604766846, "logits/rejected": 1.3191735744476318, "logps/chosen": -87.32330322265625, "logps/ref_chosen": -80.85043334960938, "logps/ref_rejected": -92.77810668945312, "logps/rejected": -109.12537384033203, "loss": 1.1497, "margin_dpo/margin_mean": 9.874388694763184, "margin_dpo/margin_std": 18.156291961669922, "step": 569 }, { "epoch": 0.8616780045351474, "fcm_dpo/beta": 0.04896945506334305, "fcm_dpo/delta": -0.2022635042667389, "fcm_dpo/margin": 16.062522888183594, "fcm_dpo/q_t": 0.33973759412765503, "grad_norm": 10.115246772766113, "learning_rate": 2.9015298217712453e-08, "logits/chosen": 1.133542537689209, "logits/rejected": 0.9755515456199646, "logps/chosen": -72.37799835205078, "logps/ref_chosen": -69.94769287109375, "logps/ref_rejected": -97.37059020996094, "logps/rejected": -115.86341857910156, "loss": 0.9229, "margin_dpo/margin_mean": 16.062524795532227, "margin_dpo/margin_std": 18.405261993408203, "step": 570 }, { "epoch": 0.8631897203325775, "fcm_dpo/beta": 0.04955677688121796, "fcm_dpo/delta": 0.18852154910564423, "fcm_dpo/margin": 8.448715209960938, "fcm_dpo/q_t": 0.4095662236213684, "grad_norm": 12.49162769317627, "learning_rate": 2.840011871446962e-08, "logits/chosen": 1.477982997894287, "logits/rejected": 1.4091600179672241, "logps/chosen": -77.71009826660156, "logps/ref_chosen": -72.28555297851562, "logps/ref_rejected": -84.57748413085938, "logps/rejected": -98.45074462890625, "loss": 1.198, "margin_dpo/margin_mean": 8.448714256286621, "margin_dpo/margin_std": 18.35816192626953, "step": 571 }, { "epoch": 0.8647014361300076, "fcm_dpo/beta": 0.05024714022874832, "fcm_dpo/delta": 0.058539681136608124, "fcm_dpo/margin": 10.840170860290527, "fcm_dpo/q_t": 0.3887644410133362, "grad_norm": 11.713820457458496, "learning_rate": 2.7791137836269158e-08, "logits/chosen": 1.5529911518096924, "logits/rejected": 1.6280450820922852, "logps/chosen": -98.46131896972656, "logps/ref_chosen": -91.4906997680664, "logps/ref_rejected": -80.44602966308594, "logps/rejected": -98.25682067871094, "loss": 1.0653, "margin_dpo/margin_mean": 10.840169906616211, "margin_dpo/margin_std": 16.804597854614258, "step": 572 }, { "epoch": 0.8662131519274376, "fcm_dpo/beta": 0.05019587278366089, "fcm_dpo/delta": -0.019488001242280006, "fcm_dpo/margin": 12.313529014587402, "fcm_dpo/q_t": 0.3816547989845276, "grad_norm": 12.991681098937988, "learning_rate": 2.718837261761528e-08, "logits/chosen": 1.6213641166687012, "logits/rejected": 1.5047485828399658, "logps/chosen": -94.56259155273438, "logps/ref_chosen": -87.54232788085938, "logps/ref_rejected": -104.32984924316406, "logps/rejected": -123.66363525390625, "loss": 1.1209, "margin_dpo/margin_mean": 12.313529014587402, "margin_dpo/margin_std": 22.380565643310547, "step": 573 }, { "epoch": 0.8677248677248677, "fcm_dpo/beta": 0.049620434641838074, "fcm_dpo/delta": -0.14074860513210297, "fcm_dpo/margin": 14.71909236907959, "fcm_dpo/q_t": 0.35213059186935425, "grad_norm": 10.749646186828613, "learning_rate": 2.659183991914696e-08, "logits/chosen": 1.8166577816009521, "logits/rejected": 1.7073123455047607, "logps/chosen": -80.68084716796875, "logps/ref_chosen": -75.36632537841797, "logps/ref_rejected": -103.27328491210938, "logps/rejected": -123.30690002441406, "loss": 0.9694, "margin_dpo/margin_mean": 14.719091415405273, "margin_dpo/margin_std": 18.72347640991211, "step": 574 }, { "epoch": 0.8692365835222978, "fcm_dpo/beta": 0.04792990908026695, "fcm_dpo/delta": -0.09886258840560913, "fcm_dpo/margin": 9.744050979614258, "fcm_dpo/q_t": 0.40980133414268494, "grad_norm": 11.675394058227539, "learning_rate": 2.600155642716606e-08, "logits/chosen": 1.63795804977417, "logits/rejected": 1.4440220594406128, "logps/chosen": -88.9930191040039, "logps/ref_chosen": -81.678466796875, "logps/ref_rejected": -112.84233093261719, "logps/rejected": -129.90093994140625, "loss": 1.1909, "margin_dpo/margin_mean": 9.744050979614258, "margin_dpo/margin_std": 20.141633987426758, "step": 575 }, { "epoch": 0.8707482993197279, "fcm_dpo/beta": 0.04688930884003639, "fcm_dpo/delta": -0.10789503902196884, "fcm_dpo/margin": 14.908914566040039, "fcm_dpo/q_t": 0.354342520236969, "grad_norm": 10.642295837402344, "learning_rate": 2.5417538653170754e-08, "logits/chosen": 1.4131211042404175, "logits/rejected": 1.199857234954834, "logps/chosen": -72.4935302734375, "logps/ref_chosen": -68.78944396972656, "logps/ref_rejected": -102.79037475585938, "logps/rejected": -121.40336608886719, "loss": 0.9817, "margin_dpo/margin_mean": 14.908914566040039, "margin_dpo/margin_std": 19.143428802490234, "step": 576 }, { "epoch": 0.872260015117158, "fcm_dpo/beta": 0.04831491410732269, "fcm_dpo/delta": 0.2083924412727356, "fcm_dpo/margin": 8.283341407775879, "fcm_dpo/q_t": 0.4179866909980774, "grad_norm": 11.02425479888916, "learning_rate": 2.4839802933393607e-08, "logits/chosen": 1.3915233612060547, "logits/rejected": 1.3548476696014404, "logps/chosen": -85.09516906738281, "logps/ref_chosen": -79.84675598144531, "logps/ref_rejected": -84.08309936523438, "logps/rejected": -97.61485290527344, "loss": 1.191, "margin_dpo/margin_mean": 8.283340454101562, "margin_dpo/margin_std": 17.960664749145508, "step": 577 }, { "epoch": 0.873771730914588, "fcm_dpo/beta": 0.05029800906777382, "fcm_dpo/delta": 0.20370450615882874, "fcm_dpo/margin": 8.049797058105469, "fcm_dpo/q_t": 0.42061176896095276, "grad_norm": 14.181432723999023, "learning_rate": 2.4268365428344733e-08, "logits/chosen": 1.4182995557785034, "logits/rejected": 1.3366800546646118, "logps/chosen": -80.43700408935547, "logps/ref_chosen": -74.91357421875, "logps/ref_rejected": -83.64881896972656, "logps/rejected": -97.2220458984375, "loss": 1.185, "margin_dpo/margin_mean": 8.049795150756836, "margin_dpo/margin_std": 17.056869506835938, "step": 578 }, { "epoch": 0.8752834467120182, "fcm_dpo/beta": 0.05020540952682495, "fcm_dpo/delta": -0.054013222455978394, "fcm_dpo/margin": 12.934350967407227, "fcm_dpo/q_t": 0.36339443922042847, "grad_norm": 10.921134948730469, "learning_rate": 2.3703242122359357e-08, "logits/chosen": 1.4313457012176514, "logits/rejected": 1.3517735004425049, "logps/chosen": -82.50849151611328, "logps/ref_chosen": -75.51022338867188, "logps/ref_rejected": -84.83192443847656, "logps/rejected": -104.76454162597656, "loss": 1.0188, "margin_dpo/margin_mean": 12.93435001373291, "margin_dpo/margin_std": 18.144775390625, "step": 579 }, { "epoch": 0.8767951625094482, "fcm_dpo/beta": 0.051037803292274475, "fcm_dpo/delta": 0.04716159403324127, "fcm_dpo/margin": 10.865426063537598, "fcm_dpo/q_t": 0.3839249610900879, "grad_norm": 10.735759735107422, "learning_rate": 2.3144448823151392e-08, "logits/chosen": 1.422055721282959, "logits/rejected": 1.2607250213623047, "logps/chosen": -80.66156005859375, "logps/ref_chosen": -76.61564636230469, "logps/ref_rejected": -97.09959411621094, "logps/rejected": -112.01094055175781, "loss": 1.1127, "margin_dpo/margin_mean": 10.86542797088623, "margin_dpo/margin_std": 18.92938995361328, "step": 580 }, { "epoch": 0.8783068783068783, "fcm_dpo/beta": 0.05055753141641617, "fcm_dpo/delta": 0.0056791529059410095, "fcm_dpo/margin": 11.744110107421875, "fcm_dpo/q_t": 0.3782769739627838, "grad_norm": 11.562898635864258, "learning_rate": 2.259200116137039e-08, "logits/chosen": 1.5065741539001465, "logits/rejected": 1.3873448371887207, "logps/chosen": -81.64364624023438, "logps/ref_chosen": -74.8531265258789, "logps/ref_rejected": -101.5344009399414, "logps/rejected": -120.06903076171875, "loss": 1.0871, "margin_dpo/margin_mean": 11.744109153747559, "margin_dpo/margin_std": 19.222896575927734, "step": 581 }, { "epoch": 0.8798185941043084, "fcm_dpo/beta": 0.05103091150522232, "fcm_dpo/delta": 0.0025793779641389847, "fcm_dpo/margin": 11.70887279510498, "fcm_dpo/q_t": 0.3785492777824402, "grad_norm": 10.20443344116211, "learning_rate": 2.204591459016525e-08, "logits/chosen": 1.301880121231079, "logits/rejected": 1.376091480255127, "logps/chosen": -88.2724609375, "logps/ref_chosen": -81.07638549804688, "logps/ref_rejected": -72.83570861816406, "logps/rejected": -91.74066162109375, "loss": 1.0997, "margin_dpo/margin_mean": 11.70887279510498, "margin_dpo/margin_std": 20.00967788696289, "step": 582 }, { "epoch": 0.8813303099017384, "fcm_dpo/beta": 0.051685880869627, "fcm_dpo/delta": 0.09393204003572464, "fcm_dpo/margin": 9.883415222167969, "fcm_dpo/q_t": 0.4038504362106323, "grad_norm": 16.68686294555664, "learning_rate": 2.1506204384751064e-08, "logits/chosen": 1.7437877655029297, "logits/rejected": 1.4623810052871704, "logps/chosen": -73.52954864501953, "logps/ref_chosen": -66.78465270996094, "logps/ref_rejected": -106.45825958251953, "logps/rejected": -123.08657836914062, "loss": 1.2033, "margin_dpo/margin_mean": 9.883415222167969, "margin_dpo/margin_std": 21.616724014282227, "step": 583 }, { "epoch": 0.8828420256991686, "fcm_dpo/beta": 0.051659468561410904, "fcm_dpo/delta": 0.028514884412288666, "fcm_dpo/margin": 11.066621780395508, "fcm_dpo/q_t": 0.3903641104698181, "grad_norm": 17.737751007080078, "learning_rate": 2.09728856419826e-08, "logits/chosen": 1.8142191171646118, "logits/rejected": 1.5797849893569946, "logps/chosen": -64.03971862792969, "logps/ref_chosen": -60.802913665771484, "logps/ref_rejected": -99.45012664794922, "logps/rejected": -113.75355529785156, "loss": 1.1356, "margin_dpo/margin_mean": 11.066622734069824, "margin_dpo/margin_std": 20.347135543823242, "step": 584 }, { "epoch": 0.8843537414965986, "fcm_dpo/beta": 0.05364014208316803, "fcm_dpo/delta": 0.1607358157634735, "fcm_dpo/margin": 8.310773849487305, "fcm_dpo/q_t": 0.4064415395259857, "grad_norm": 11.880135536193848, "learning_rate": 2.044597327993153e-08, "logits/chosen": 1.2061209678649902, "logits/rejected": 1.139693260192871, "logps/chosen": -81.80364990234375, "logps/ref_chosen": -75.92616271972656, "logps/ref_rejected": -94.47601318359375, "logps/rejected": -108.66427612304688, "loss": 1.1862, "margin_dpo/margin_mean": 8.310773849487305, "margin_dpo/margin_std": 17.42922592163086, "step": 585 }, { "epoch": 0.8858654572940288, "fcm_dpo/beta": 0.052861057221889496, "fcm_dpo/delta": -0.12748199701309204, "fcm_dpo/margin": 13.586462020874023, "fcm_dpo/q_t": 0.35138410329818726, "grad_norm": 10.37414836883545, "learning_rate": 1.9925482037469187e-08, "logits/chosen": 1.7434275150299072, "logits/rejected": 1.6040321588516235, "logps/chosen": -72.82206726074219, "logps/ref_chosen": -68.62062072753906, "logps/ref_rejected": -81.98324584960938, "logps/rejected": -99.77114868164062, "loss": 0.9753, "margin_dpo/margin_mean": 13.586462020874023, "margin_dpo/margin_std": 17.776878356933594, "step": 586 }, { "epoch": 0.8873771730914588, "fcm_dpo/beta": 0.05152256414294243, "fcm_dpo/delta": -0.15027303993701935, "fcm_dpo/margin": 14.35053825378418, "fcm_dpo/q_t": 0.35401198267936707, "grad_norm": 21.796733856201172, "learning_rate": 1.9411426473854687e-08, "logits/chosen": 1.2960412502288818, "logits/rejected": 1.2788506746292114, "logps/chosen": -81.04063415527344, "logps/ref_chosen": -77.67031860351562, "logps/ref_rejected": -79.35327911376953, "logps/rejected": -97.07413482666016, "loss": 1.0189, "margin_dpo/margin_mean": 14.35053825378418, "margin_dpo/margin_std": 20.870136260986328, "step": 587 }, { "epoch": 0.8888888888888888, "fcm_dpo/beta": 0.05039939284324646, "fcm_dpo/delta": -0.038049668073654175, "fcm_dpo/margin": 12.592073440551758, "fcm_dpo/q_t": 0.36368703842163086, "grad_norm": 14.892370223999023, "learning_rate": 1.890382096832699e-08, "logits/chosen": 1.4794716835021973, "logits/rejected": 1.3708586692810059, "logps/chosen": -84.13540649414062, "logps/ref_chosen": -77.94320678710938, "logps/ref_rejected": -98.41210174560547, "logps/rejected": -117.19638061523438, "loss": 0.9984, "margin_dpo/margin_mean": 12.592074394226074, "margin_dpo/margin_std": 16.2170467376709, "step": 588 }, { "epoch": 0.890400604686319, "fcm_dpo/beta": 0.04980698972940445, "fcm_dpo/delta": -0.10331517457962036, "fcm_dpo/margin": 13.974435806274414, "fcm_dpo/q_t": 0.35320183634757996, "grad_norm": 13.363643646240234, "learning_rate": 1.840267971970344e-08, "logits/chosen": 1.1975750923156738, "logits/rejected": 1.1503894329071045, "logps/chosen": -78.79930114746094, "logps/ref_chosen": -75.18646240234375, "logps/ref_rejected": -93.35910034179688, "logps/rejected": -110.94638061523438, "loss": 0.9705, "margin_dpo/margin_mean": 13.974435806274414, "margin_dpo/margin_std": 17.48256492614746, "step": 589 }, { "epoch": 0.891912320483749, "fcm_dpo/beta": 0.049094684422016144, "fcm_dpo/delta": -0.07261404395103455, "fcm_dpo/margin": 13.602447509765625, "fcm_dpo/q_t": 0.36111220717430115, "grad_norm": 14.437777519226074, "learning_rate": 1.7908016745981856e-08, "logits/chosen": 1.493552565574646, "logits/rejected": 1.4283008575439453, "logps/chosen": -93.33152770996094, "logps/ref_chosen": -86.9908447265625, "logps/ref_rejected": -100.61723327636719, "logps/rejected": -120.56035614013672, "loss": 0.9957, "margin_dpo/margin_mean": 13.602448463439941, "margin_dpo/margin_std": 18.259305953979492, "step": 590 }, { "epoch": 0.8934240362811792, "fcm_dpo/beta": 0.04784344136714935, "fcm_dpo/delta": -0.04332631826400757, "fcm_dpo/margin": 13.286850929260254, "fcm_dpo/q_t": 0.3692328631877899, "grad_norm": 11.933575630187988, "learning_rate": 1.7419845883949098e-08, "logits/chosen": 1.6455740928649902, "logits/rejected": 1.4992456436157227, "logps/chosen": -77.32672119140625, "logps/ref_chosen": -74.85809326171875, "logps/ref_rejected": -102.75840759277344, "logps/rejected": -118.51390075683594, "loss": 1.038, "margin_dpo/margin_mean": 13.28685188293457, "margin_dpo/margin_std": 18.549617767333984, "step": 591 }, { "epoch": 0.8949357520786092, "fcm_dpo/beta": 0.04816969856619835, "fcm_dpo/delta": -0.016897665336728096, "fcm_dpo/margin": 12.779840469360352, "fcm_dpo/q_t": 0.378559410572052, "grad_norm": 10.442089080810547, "learning_rate": 1.6938180788793556e-08, "logits/chosen": 1.8930881023406982, "logits/rejected": 1.6082756519317627, "logps/chosen": -71.08365631103516, "logps/ref_chosen": -67.90579223632812, "logps/ref_rejected": -100.35234069824219, "logps/rejected": -116.31005096435547, "loss": 1.0329, "margin_dpo/margin_mean": 12.779840469360352, "margin_dpo/margin_std": 18.484506607055664, "step": 592 }, { "epoch": 0.8964474678760394, "fcm_dpo/beta": 0.04836907237768173, "fcm_dpo/delta": 0.006007889751344919, "fcm_dpo/margin": 12.287393569946289, "fcm_dpo/q_t": 0.38056206703186035, "grad_norm": 12.300355911254883, "learning_rate": 1.6463034933723336e-08, "logits/chosen": 1.4134345054626465, "logits/rejected": 1.197739839553833, "logps/chosen": -62.10874938964844, "logps/ref_chosen": -59.29489517211914, "logps/ref_rejected": -85.31307983398438, "logps/rejected": -100.4143295288086, "loss": 1.0848, "margin_dpo/margin_mean": 12.287393569946289, "margin_dpo/margin_std": 20.32806396484375, "step": 593 }, { "epoch": 0.8979591836734694, "fcm_dpo/beta": 0.048611119389534, "fcm_dpo/delta": 0.06948675960302353, "fcm_dpo/margin": 10.986978530883789, "fcm_dpo/q_t": 0.3919852375984192, "grad_norm": 12.52187442779541, "learning_rate": 1.5994421609589385e-08, "logits/chosen": 1.2517837285995483, "logits/rejected": 1.1950948238372803, "logps/chosen": -88.89640808105469, "logps/ref_chosen": -83.14643859863281, "logps/ref_rejected": -88.201904296875, "logps/rejected": -104.93885803222656, "loss": 1.0882, "margin_dpo/margin_mean": 10.986978530883789, "margin_dpo/margin_std": 18.014604568481445, "step": 594 }, { "epoch": 0.8994708994708994, "fcm_dpo/beta": 0.048267461359500885, "fcm_dpo/delta": -0.10970209538936615, "fcm_dpo/margin": 14.54538345336914, "fcm_dpo/q_t": 0.354385644197464, "grad_norm": 11.967026710510254, "learning_rate": 1.553235392451377e-08, "logits/chosen": 1.6991221904754639, "logits/rejected": 1.4775123596191406, "logps/chosen": -75.35159301757812, "logps/ref_chosen": -70.40016174316406, "logps/ref_rejected": -103.95550537109375, "logps/rejected": -123.45231628417969, "loss": 1.0295, "margin_dpo/margin_mean": 14.545382499694824, "margin_dpo/margin_std": 21.330772399902344, "step": 595 }, { "epoch": 0.9009826152683296, "fcm_dpo/beta": 0.04931151121854782, "fcm_dpo/delta": 0.13946330547332764, "fcm_dpo/margin": 5.445644378662109, "fcm_dpo/q_t": 0.4492513835430145, "grad_norm": 12.14819049835205, "learning_rate": 1.507684480352292e-08, "logits/chosen": 1.042555570602417, "logits/rejected": 1.0750787258148193, "logps/chosen": -93.83740997314453, "logps/ref_chosen": -86.083740234375, "logps/ref_rejected": -78.41991424560547, "logps/rejected": -91.61923217773438, "loss": 1.3056, "margin_dpo/margin_mean": 5.445644378662109, "margin_dpo/margin_std": 17.816463470458984, "step": 596 }, { "epoch": 0.9024943310657596, "fcm_dpo/beta": 0.049357444047927856, "fcm_dpo/delta": 0.009085144847631454, "fcm_dpo/margin": 11.98304271697998, "fcm_dpo/q_t": 0.37970802187919617, "grad_norm": 10.093440055847168, "learning_rate": 1.4627906988186111e-08, "logits/chosen": 1.4211645126342773, "logits/rejected": 1.3739463090896606, "logps/chosen": -70.9151611328125, "logps/ref_chosen": -67.8086166381836, "logps/ref_rejected": -71.09245300292969, "logps/rejected": -86.18203735351562, "loss": 1.0678, "margin_dpo/margin_mean": 11.98304271697998, "margin_dpo/margin_std": 18.88620948791504, "step": 597 }, { "epoch": 0.9040060468631897, "fcm_dpo/beta": 0.050865307450294495, "fcm_dpo/delta": 0.16860562562942505, "fcm_dpo/margin": 8.606800079345703, "fcm_dpo/q_t": 0.41332411766052246, "grad_norm": 11.829634666442871, "learning_rate": 1.4185553036259095e-08, "logits/chosen": 1.620859146118164, "logits/rejected": 1.4285743236541748, "logps/chosen": -83.40132141113281, "logps/ref_chosen": -74.31095886230469, "logps/ref_rejected": -98.08122253417969, "logps/rejected": -115.77838897705078, "loss": 1.1809, "margin_dpo/margin_mean": 8.606800079345703, "margin_dpo/margin_std": 18.199466705322266, "step": 598 }, { "epoch": 0.9055177626606198, "fcm_dpo/beta": 0.05157572776079178, "fcm_dpo/delta": 0.06592804938554764, "fcm_dpo/margin": 10.424295425415039, "fcm_dpo/q_t": 0.3934212625026703, "grad_norm": 11.825658798217773, "learning_rate": 1.3749795321332885e-08, "logits/chosen": 1.8811097145080566, "logits/rejected": 1.7760019302368164, "logps/chosen": -81.8123779296875, "logps/ref_chosen": -74.21861267089844, "logps/ref_rejected": -90.1492919921875, "logps/rejected": -108.1673583984375, "loss": 1.1224, "margin_dpo/margin_mean": 10.424295425415039, "margin_dpo/margin_std": 18.91455078125, "step": 599 }, { "epoch": 0.9070294784580499, "fcm_dpo/beta": 0.05311460793018341, "fcm_dpo/delta": 0.114117830991745, "fcm_dpo/margin": 9.208206176757812, "fcm_dpo/q_t": 0.40185290575027466, "grad_norm": 13.263036727905273, "learning_rate": 1.3320646032487393e-08, "logits/chosen": 1.6416901350021362, "logits/rejected": 1.4666112661361694, "logps/chosen": -86.00879669189453, "logps/ref_chosen": -79.34190368652344, "logps/ref_rejected": -97.0519790649414, "logps/rejected": -112.92707824707031, "loss": 1.1399, "margin_dpo/margin_mean": 9.208206176757812, "margin_dpo/margin_std": 17.026790618896484, "step": 600 }, { "epoch": 0.9070294784580499, "eval_fcm_dpo/beta": 0.05296889320015907, "eval_logits/chosen": 1.6311073303222656, "eval_logits/rejected": 1.493275761604309, "eval_logps/chosen": -92.30020141601562, "eval_logps/ref_chosen": -86.90177917480469, "eval_logps/ref_rejected": -96.69639587402344, "eval_logps/rejected": -113.7957534790039, "eval_loss": 0.5368251800537109, "eval_margin_dpo/margin_mean": 11.700956344604492, "eval_margin_dpo/margin_std": 18.786291122436523, "eval_runtime": 42.3352, "eval_samples_per_second": 54.399, "eval_steps_per_second": 1.701, "step": 600 }, { "epoch": 0.90854119425548, "fcm_dpo/beta": 0.051687560975551605, "fcm_dpo/delta": -0.12882345914840698, "fcm_dpo/margin": 13.898124694824219, "fcm_dpo/q_t": 0.35476869344711304, "grad_norm": 10.692002296447754, "learning_rate": 1.2898117173950868e-08, "logits/chosen": 1.5194027423858643, "logits/rejected": 1.3310624361038208, "logps/chosen": -74.42862701416016, "logps/ref_chosen": -72.06497192382812, "logps/ref_rejected": -97.60928344726562, "logps/rejected": -113.87106323242188, "loss": 0.996, "margin_dpo/margin_mean": 13.898124694824219, "margin_dpo/margin_std": 19.016834259033203, "step": 601 }, { "epoch": 0.91005291005291, "fcm_dpo/beta": 0.05144128203392029, "fcm_dpo/delta": -0.0647466778755188, "fcm_dpo/margin": 12.83267593383789, "fcm_dpo/q_t": 0.36555975675582886, "grad_norm": 14.516810417175293, "learning_rate": 1.2482220564763667e-08, "logits/chosen": 1.3734362125396729, "logits/rejected": 1.289400577545166, "logps/chosen": -79.3190689086914, "logps/ref_chosen": -77.80416870117188, "logps/ref_rejected": -89.05026245117188, "logps/rejected": -103.3978271484375, "loss": 0.995, "margin_dpo/margin_mean": 12.83267593383789, "margin_dpo/margin_std": 16.826704025268555, "step": 602 }, { "epoch": 0.9115646258503401, "fcm_dpo/beta": 0.050677426159381866, "fcm_dpo/delta": 0.006852999329566956, "fcm_dpo/margin": 11.696775436401367, "fcm_dpo/q_t": 0.37813225388526917, "grad_norm": 11.326213836669922, "learning_rate": 1.2072967838448051e-08, "logits/chosen": 1.1551480293273926, "logits/rejected": 1.051234483718872, "logps/chosen": -73.44308471679688, "logps/ref_chosen": -68.30155944824219, "logps/ref_rejected": -90.542724609375, "logps/rejected": -107.38102722167969, "loss": 1.0835, "margin_dpo/margin_mean": 11.69677448272705, "margin_dpo/margin_std": 19.035118103027344, "step": 603 }, { "epoch": 0.9130763416477702, "fcm_dpo/beta": 0.05132821202278137, "fcm_dpo/delta": 0.03209719434380531, "fcm_dpo/margin": 11.098645210266113, "fcm_dpo/q_t": 0.38882869482040405, "grad_norm": 13.501641273498535, "learning_rate": 1.1670370442682459e-08, "logits/chosen": 1.3291363716125488, "logits/rejected": 1.3053700923919678, "logps/chosen": -93.28549194335938, "logps/ref_chosen": -90.55952453613281, "logps/ref_rejected": -84.6327133178711, "logps/rejected": -98.45733642578125, "loss": 1.1398, "margin_dpo/margin_mean": 11.098645210266113, "margin_dpo/margin_std": 21.025123596191406, "step": 604 }, { "epoch": 0.9145880574452003, "fcm_dpo/beta": 0.05177273601293564, "fcm_dpo/delta": 0.04624027758836746, "fcm_dpo/margin": 10.744077682495117, "fcm_dpo/q_t": 0.38820844888687134, "grad_norm": 15.81956958770752, "learning_rate": 1.1274439638981532e-08, "logits/chosen": 1.6206059455871582, "logits/rejected": 1.4927140474319458, "logps/chosen": -87.21122741699219, "logps/ref_chosen": -80.26661682128906, "logps/ref_rejected": -100.26485443115234, "logps/rejected": -117.95354461669922, "loss": 1.0946, "margin_dpo/margin_mean": 10.744077682495117, "margin_dpo/margin_std": 18.287504196166992, "step": 605 }, { "epoch": 0.9160997732426304, "fcm_dpo/beta": 0.05220865458250046, "fcm_dpo/delta": 0.008291337639093399, "fcm_dpo/margin": 11.328773498535156, "fcm_dpo/q_t": 0.3812514841556549, "grad_norm": 13.073740005493164, "learning_rate": 1.0885186502381016e-08, "logits/chosen": 1.5954047441482544, "logits/rejected": 1.4421148300170898, "logps/chosen": -74.8997802734375, "logps/ref_chosen": -70.73554229736328, "logps/ref_rejected": -95.9410400390625, "logps/rejected": -111.43405151367188, "loss": 1.0533, "margin_dpo/margin_mean": 11.328773498535156, "margin_dpo/margin_std": 17.17182159423828, "step": 606 }, { "epoch": 0.9176114890400605, "fcm_dpo/beta": 0.0508500337600708, "fcm_dpo/delta": -0.03433932363986969, "fcm_dpo/margin": 12.327168464660645, "fcm_dpo/q_t": 0.366793692111969, "grad_norm": 13.337456703186035, "learning_rate": 1.0502621921127774e-08, "logits/chosen": 1.4727654457092285, "logits/rejected": 1.3896727561950684, "logps/chosen": -87.79872131347656, "logps/ref_chosen": -81.26203918457031, "logps/ref_rejected": -92.71575927734375, "logps/rejected": -111.57960510253906, "loss": 1.0111, "margin_dpo/margin_mean": 12.327167510986328, "margin_dpo/margin_std": 15.473169326782227, "step": 607 }, { "epoch": 0.9191232048374905, "fcm_dpo/beta": 0.05308441445231438, "fcm_dpo/delta": 0.17097234725952148, "fcm_dpo/margin": 8.206452369689941, "fcm_dpo/q_t": 0.41368526220321655, "grad_norm": 15.105749130249023, "learning_rate": 1.0126756596375685e-08, "logits/chosen": 1.3038733005523682, "logits/rejected": 1.0675292015075684, "logps/chosen": -90.75921630859375, "logps/ref_chosen": -82.6530990600586, "logps/ref_rejected": -110.64334106445312, "logps/rejected": -126.95590209960938, "loss": 1.1895, "margin_dpo/margin_mean": 8.206452369689941, "margin_dpo/margin_std": 17.802762985229492, "step": 608 }, { "epoch": 0.9206349206349206, "fcm_dpo/beta": 0.052457720041275024, "fcm_dpo/delta": -0.07344165444374084, "fcm_dpo/margin": 12.722570419311523, "fcm_dpo/q_t": 0.35613733530044556, "grad_norm": 10.500450134277344, "learning_rate": 9.757601041885694e-09, "logits/chosen": 1.6448678970336914, "logits/rejected": 1.5165568590164185, "logps/chosen": -71.36085510253906, "logps/ref_chosen": -68.20232391357422, "logps/ref_rejected": -81.90515899658203, "logps/rejected": -97.7862548828125, "loss": 0.9728, "margin_dpo/margin_mean": 12.72257137298584, "margin_dpo/margin_std": 15.513092041015625, "step": 609 }, { "epoch": 0.9221466364323507, "fcm_dpo/beta": 0.05239449441432953, "fcm_dpo/delta": -0.012467984110116959, "fcm_dpo/margin": 11.673583030700684, "fcm_dpo/q_t": 0.3851981461048126, "grad_norm": 15.478520393371582, "learning_rate": 9.395165583732379e-09, "logits/chosen": 1.5082423686981201, "logits/rejected": 1.4333908557891846, "logps/chosen": -105.09656524658203, "logps/ref_chosen": -99.01324462890625, "logps/ref_rejected": -102.26054382324219, "logps/rejected": -120.01744079589844, "loss": 1.1231, "margin_dpo/margin_mean": 11.673582077026367, "margin_dpo/margin_std": 21.25330352783203, "step": 610 }, { "epoch": 0.9236583522297808, "fcm_dpo/beta": 0.053039200603961945, "fcm_dpo/delta": 0.1085679829120636, "fcm_dpo/margin": 9.36767578125, "fcm_dpo/q_t": 0.40129148960113525, "grad_norm": 12.573719024658203, "learning_rate": 9.03946036001449e-09, "logits/chosen": 1.474776268005371, "logits/rejected": 1.3687703609466553, "logps/chosen": -70.27494812011719, "logps/ref_chosen": -66.36254119873047, "logps/ref_rejected": -88.74557495117188, "logps/rejected": -102.0256576538086, "loss": 1.1166, "margin_dpo/margin_mean": 9.36767578125, "margin_dpo/margin_std": 16.48019790649414, "step": 611 }, { "epoch": 0.9251700680272109, "fcm_dpo/beta": 0.052704453468322754, "fcm_dpo/delta": -0.12535026669502258, "fcm_dpo/margin": 13.594877243041992, "fcm_dpo/q_t": 0.35807400941848755, "grad_norm": 10.761448860168457, "learning_rate": 8.690495320571839e-09, "logits/chosen": 1.2493185997009277, "logits/rejected": 1.0813252925872803, "logps/chosen": -85.430908203125, "logps/ref_chosen": -78.6339111328125, "logps/ref_rejected": -108.34969329833984, "logps/rejected": -128.74156188964844, "loss": 1.0352, "margin_dpo/margin_mean": 13.594876289367676, "margin_dpo/margin_std": 20.702537536621094, "step": 612 }, { "epoch": 0.926681783824641, "fcm_dpo/beta": 0.05094806104898453, "fcm_dpo/delta": -0.21549217402935028, "fcm_dpo/margin": 15.678262710571289, "fcm_dpo/q_t": 0.33935976028442383, "grad_norm": 12.649880409240723, "learning_rate": 8.348280226706722e-09, "logits/chosen": 1.3885865211486816, "logits/rejected": 1.382106900215149, "logps/chosen": -75.3154296875, "logps/ref_chosen": -73.3539047241211, "logps/ref_rejected": -76.91837310791016, "logps/rejected": -94.55816650390625, "loss": 0.9547, "margin_dpo/margin_mean": 15.678261756896973, "margin_dpo/margin_std": 20.076194763183594, "step": 613 }, { "epoch": 0.9281934996220711, "fcm_dpo/beta": 0.0507182851433754, "fcm_dpo/delta": 0.047426991164684296, "fcm_dpo/margin": 10.928446769714355, "fcm_dpo/q_t": 0.3804364502429962, "grad_norm": 15.334555625915527, "learning_rate": 8.012824650910937e-09, "logits/chosen": 1.514979600906372, "logits/rejected": 1.4878126382827759, "logps/chosen": -84.84788513183594, "logps/ref_chosen": -77.80007934570312, "logps/ref_rejected": -89.05572509765625, "logps/rejected": -107.031982421875, "loss": 1.0489, "margin_dpo/margin_mean": 10.928446769714355, "margin_dpo/margin_std": 15.56425666809082, "step": 614 }, { "epoch": 0.9297052154195011, "fcm_dpo/beta": 0.049501433968544006, "fcm_dpo/delta": -0.09533637017011642, "fcm_dpo/margin": 13.88467788696289, "fcm_dpo/q_t": 0.3619763255119324, "grad_norm": 15.51099967956543, "learning_rate": 7.684137976598088e-09, "logits/chosen": 1.603013515472412, "logits/rejected": 1.4670634269714355, "logps/chosen": -97.00009155273438, "logps/ref_chosen": -90.06971740722656, "logps/ref_rejected": -118.7764892578125, "logps/rejected": -139.591552734375, "loss": 1.0328, "margin_dpo/margin_mean": 13.884675979614258, "margin_dpo/margin_std": 20.35204315185547, "step": 615 }, { "epoch": 0.9312169312169312, "fcm_dpo/beta": 0.04965684935450554, "fcm_dpo/delta": 0.042817123234272, "fcm_dpo/margin": 11.263029098510742, "fcm_dpo/q_t": 0.3879207968711853, "grad_norm": 13.444324493408203, "learning_rate": 7.36222939784098e-09, "logits/chosen": 1.362182855606079, "logits/rejected": 1.189748764038086, "logps/chosen": -80.3229751586914, "logps/ref_chosen": -74.62954711914062, "logps/ref_rejected": -93.655029296875, "logps/rejected": -110.61148071289062, "loss": 1.0719, "margin_dpo/margin_mean": 11.26302719116211, "margin_dpo/margin_std": 17.761255264282227, "step": 616 }, { "epoch": 0.9327286470143613, "fcm_dpo/beta": 0.050146251916885376, "fcm_dpo/delta": 0.09086576849222183, "fcm_dpo/margin": 10.21737289428711, "fcm_dpo/q_t": 0.390741765499115, "grad_norm": 13.885807991027832, "learning_rate": 7.047107919114586e-09, "logits/chosen": 1.496596336364746, "logits/rejected": 1.3787349462509155, "logps/chosen": -84.53483581542969, "logps/ref_chosen": -75.98182678222656, "logps/ref_rejected": -97.1640625, "logps/rejected": -115.93444061279297, "loss": 1.0971, "margin_dpo/margin_mean": 10.21737289428711, "margin_dpo/margin_std": 16.45693016052246, "step": 617 }, { "epoch": 0.9342403628117913, "fcm_dpo/beta": 0.051238950341939926, "fcm_dpo/delta": 0.07789164781570435, "fcm_dpo/margin": 10.265620231628418, "fcm_dpo/q_t": 0.3962671160697937, "grad_norm": 16.189624786376953, "learning_rate": 6.738782355044048e-09, "logits/chosen": 1.3820104598999023, "logits/rejected": 1.1591483354568481, "logps/chosen": -79.09159851074219, "logps/ref_chosen": -74.47208404541016, "logps/ref_rejected": -107.09980773925781, "logps/rejected": -121.98493957519531, "loss": 1.1232, "margin_dpo/margin_mean": 10.265620231628418, "margin_dpo/margin_std": 18.69152069091797, "step": 618 }, { "epoch": 0.9357520786092215, "fcm_dpo/beta": 0.05077732354402542, "fcm_dpo/delta": -0.07294730842113495, "fcm_dpo/margin": 13.123184204101562, "fcm_dpo/q_t": 0.36624816060066223, "grad_norm": 11.196142196655273, "learning_rate": 6.437261330158206e-09, "logits/chosen": 1.6958047151565552, "logits/rejected": 1.4898502826690674, "logps/chosen": -75.58355712890625, "logps/ref_chosen": -70.84220886230469, "logps/ref_rejected": -98.07801818847656, "logps/rejected": -115.94255065917969, "loss": 1.0293, "margin_dpo/margin_mean": 13.123184204101562, "margin_dpo/margin_std": 19.087276458740234, "step": 619 }, { "epoch": 0.9372637944066515, "fcm_dpo/beta": 0.050601303577423096, "fcm_dpo/delta": -0.07571752369403839, "fcm_dpo/margin": 9.317473411560059, "fcm_dpo/q_t": 0.40199047327041626, "grad_norm": 13.557417869567871, "learning_rate": 6.142553278648238e-09, "logits/chosen": 1.5680897235870361, "logits/rejected": 1.568605899810791, "logps/chosen": -81.21891021728516, "logps/ref_chosen": -76.93606567382812, "logps/ref_rejected": -81.28453063964844, "logps/rejected": -94.88484954833984, "loss": 1.1566, "margin_dpo/margin_mean": 9.317474365234375, "margin_dpo/margin_std": 17.439876556396484, "step": 620 }, { "epoch": 0.9387755102040817, "fcm_dpo/beta": 0.0511331781744957, "fcm_dpo/delta": 0.08140092343091965, "fcm_dpo/margin": 10.20268440246582, "fcm_dpo/q_t": 0.39421504735946655, "grad_norm": 12.2879638671875, "learning_rate": 5.854666444131934e-09, "logits/chosen": 1.767906904220581, "logits/rejected": 1.4925261735916138, "logps/chosen": -75.88006591796875, "logps/ref_chosen": -69.87464904785156, "logps/ref_rejected": -105.61328887939453, "logps/rejected": -121.82138061523438, "loss": 1.1127, "margin_dpo/margin_mean": 10.20268440246582, "margin_dpo/margin_std": 17.7759952545166, "step": 621 }, { "epoch": 0.9402872260015117, "fcm_dpo/beta": 0.05143500864505768, "fcm_dpo/delta": 0.030062519013881683, "fcm_dpo/margin": 11.108509063720703, "fcm_dpo/q_t": 0.379915326833725, "grad_norm": 11.659144401550293, "learning_rate": 5.573608879422875e-09, "logits/chosen": 1.2655305862426758, "logits/rejected": 1.1411113739013672, "logps/chosen": -84.64167785644531, "logps/ref_chosen": -78.9598388671875, "logps/ref_rejected": -97.90648651123047, "logps/rejected": -114.69683837890625, "loss": 1.0483, "margin_dpo/margin_mean": 11.108508110046387, "margin_dpo/margin_std": 16.232669830322266, "step": 622 }, { "epoch": 0.9417989417989417, "fcm_dpo/beta": 0.051177725195884705, "fcm_dpo/delta": -0.018996700644493103, "fcm_dpo/margin": 12.071805953979492, "fcm_dpo/q_t": 0.3701121211051941, "grad_norm": 15.908820152282715, "learning_rate": 5.299388446305342e-09, "logits/chosen": 1.5092880725860596, "logits/rejected": 1.3792246580123901, "logps/chosen": -91.16535186767578, "logps/ref_chosen": -83.22647094726562, "logps/ref_rejected": -105.1362533569336, "logps/rejected": -125.14694213867188, "loss": 1.0229, "margin_dpo/margin_mean": 12.071805000305176, "margin_dpo/margin_std": 16.85704803466797, "step": 623 }, { "epoch": 0.9433106575963719, "fcm_dpo/beta": 0.05008304864168167, "fcm_dpo/delta": -0.1030079573392868, "fcm_dpo/margin": 13.869135856628418, "fcm_dpo/q_t": 0.3560662567615509, "grad_norm": 11.17168140411377, "learning_rate": 5.03201281531429e-09, "logits/chosen": 1.2747315168380737, "logits/rejected": 1.0452553033828735, "logps/chosen": -69.04547882080078, "logps/ref_chosen": -66.10560607910156, "logps/ref_rejected": -91.66778564453125, "logps/rejected": -108.47679138183594, "loss": 0.9783, "margin_dpo/margin_mean": 13.869135856628418, "margin_dpo/margin_std": 17.713294982910156, "step": 624 }, { "epoch": 0.9448223733938019, "fcm_dpo/beta": 0.051767922937870026, "fcm_dpo/delta": 0.2112494707107544, "fcm_dpo/margin": 7.6672444343566895, "fcm_dpo/q_t": 0.4205039143562317, "grad_norm": 12.619704246520996, "learning_rate": 4.7714894655209174e-09, "logits/chosen": 1.896888256072998, "logits/rejected": 1.684779405593872, "logps/chosen": -79.9560317993164, "logps/ref_chosen": -73.20295715332031, "logps/ref_rejected": -105.31025695800781, "logps/rejected": -119.7305908203125, "loss": 1.2256, "margin_dpo/margin_mean": 7.667244911193848, "margin_dpo/margin_std": 18.133235931396484, "step": 625 }, { "epoch": 0.9463340891912321, "fcm_dpo/beta": 0.05126585811376572, "fcm_dpo/delta": -0.1028745248913765, "fcm_dpo/margin": 13.556199073791504, "fcm_dpo/q_t": 0.3712637424468994, "grad_norm": 13.120343208312988, "learning_rate": 4.517825684323323e-09, "logits/chosen": 1.7848681211471558, "logits/rejected": 1.4529061317443848, "logps/chosen": -66.4439697265625, "logps/ref_chosen": -62.181278228759766, "logps/ref_rejected": -108.17747497558594, "logps/rejected": -125.99636840820312, "loss": 1.0751, "margin_dpo/margin_mean": 13.556198120117188, "margin_dpo/margin_std": 22.397666931152344, "step": 626 }, { "epoch": 0.9478458049886621, "fcm_dpo/beta": 0.0497593954205513, "fcm_dpo/delta": -0.19564473628997803, "fcm_dpo/margin": 15.683549880981445, "fcm_dpo/q_t": 0.34314611554145813, "grad_norm": 11.137099266052246, "learning_rate": 4.271028567242818e-09, "logits/chosen": 1.3055121898651123, "logits/rejected": 1.0202195644378662, "logps/chosen": -82.22528076171875, "logps/ref_chosen": -77.72123718261719, "logps/ref_rejected": -114.40547180175781, "logps/rejected": -134.59307861328125, "loss": 0.9303, "margin_dpo/margin_mean": 15.683549880981445, "margin_dpo/margin_std": 18.70248794555664, "step": 627 }, { "epoch": 0.9493575207860923, "fcm_dpo/beta": 0.04885813593864441, "fcm_dpo/delta": -0.1272927224636078, "fcm_dpo/margin": 14.678993225097656, "fcm_dpo/q_t": 0.35350874066352844, "grad_norm": 11.312077522277832, "learning_rate": 4.0311050177251895e-09, "logits/chosen": 1.472090482711792, "logits/rejected": 1.4293111562728882, "logps/chosen": -74.28559112548828, "logps/ref_chosen": -70.71195983886719, "logps/ref_rejected": -93.85909271240234, "logps/rejected": -112.11170959472656, "loss": 1.0458, "margin_dpo/margin_mean": 14.678994178771973, "margin_dpo/margin_std": 21.24105453491211, "step": 628 }, { "epoch": 0.9508692365835223, "fcm_dpo/beta": 0.048182882368564606, "fcm_dpo/delta": 0.01766796223819256, "fcm_dpo/margin": 12.106483459472656, "fcm_dpo/q_t": 0.37841320037841797, "grad_norm": 13.544909477233887, "learning_rate": 3.798061746947995e-09, "logits/chosen": 1.484879970550537, "logits/rejected": 1.4223473072052002, "logps/chosen": -91.72695922851562, "logps/ref_chosen": -88.66283416748047, "logps/ref_rejected": -94.67845153808594, "logps/rejected": -109.84906005859375, "loss": 1.021, "margin_dpo/margin_mean": 12.106481552124023, "margin_dpo/margin_std": 16.435211181640625, "step": 629 }, { "epoch": 0.9523809523809523, "fcm_dpo/beta": 0.04793520271778107, "fcm_dpo/delta": -0.049655731767416, "fcm_dpo/margin": 13.485906600952148, "fcm_dpo/q_t": 0.36746057868003845, "grad_norm": 9.105417251586914, "learning_rate": 3.5719052736323806e-09, "logits/chosen": 1.4485478401184082, "logits/rejected": 1.287881851196289, "logps/chosen": -76.72743225097656, "logps/ref_chosen": -72.94979858398438, "logps/ref_rejected": -92.7632827758789, "logps/rejected": -110.02682495117188, "loss": 0.9886, "margin_dpo/margin_mean": 13.485905647277832, "margin_dpo/margin_std": 17.394798278808594, "step": 630 }, { "epoch": 0.9538926681783825, "fcm_dpo/beta": 0.04620499163866043, "fcm_dpo/delta": -0.1376815140247345, "fcm_dpo/margin": 15.663002967834473, "fcm_dpo/q_t": 0.3513961434364319, "grad_norm": 11.871007919311523, "learning_rate": 3.352641923861144e-09, "logits/chosen": 1.7959654331207275, "logits/rejected": 1.492279052734375, "logps/chosen": -82.16062927246094, "logps/ref_chosen": -78.58656311035156, "logps/ref_rejected": -115.38685607910156, "logps/rejected": -134.62391662597656, "loss": 0.9653, "margin_dpo/margin_mean": 15.663003921508789, "margin_dpo/margin_std": 19.171688079833984, "step": 631 }, { "epoch": 0.9554043839758125, "fcm_dpo/beta": 0.046446263790130615, "fcm_dpo/delta": 0.008756112307310104, "fcm_dpo/margin": 12.740787506103516, "fcm_dpo/q_t": 0.37455809116363525, "grad_norm": 9.769354820251465, "learning_rate": 3.140277830901428e-09, "logits/chosen": 1.4969274997711182, "logits/rejected": 1.402543067932129, "logps/chosen": -79.57364654541016, "logps/ref_chosen": -75.24861907958984, "logps/ref_rejected": -82.98665618896484, "logps/rejected": -100.05247497558594, "loss": 1.0419, "margin_dpo/margin_mean": 12.740787506103516, "margin_dpo/margin_std": 18.489219665527344, "step": 632 }, { "epoch": 0.9569160997732427, "fcm_dpo/beta": 0.046251967549324036, "fcm_dpo/delta": -0.0757179856300354, "fcm_dpo/margin": 14.493501663208008, "fcm_dpo/q_t": 0.3659166693687439, "grad_norm": 14.5718994140625, "learning_rate": 2.9348189350335007e-09, "logits/chosen": 1.6168040037155151, "logits/rejected": 1.4710830450057983, "logps/chosen": -69.67645263671875, "logps/ref_chosen": -68.8402099609375, "logps/ref_rejected": -84.64610290527344, "logps/rejected": -99.97584533691406, "loss": 1.0, "margin_dpo/margin_mean": 14.493501663208008, "margin_dpo/margin_std": 19.82632064819336, "step": 633 }, { "epoch": 0.9584278155706727, "fcm_dpo/beta": 0.04652927815914154, "fcm_dpo/delta": 0.1592503935098648, "fcm_dpo/margin": 5.743697166442871, "fcm_dpo/q_t": 0.446336030960083, "grad_norm": 17.360897064208984, "learning_rate": 2.736270983384276e-09, "logits/chosen": 1.441842794418335, "logits/rejected": 1.4726186990737915, "logps/chosen": -83.4759750366211, "logps/ref_chosen": -77.0589599609375, "logps/ref_rejected": -74.37579345703125, "logps/rejected": -86.5364990234375, "loss": 1.2976, "margin_dpo/margin_mean": 5.743697166442871, "margin_dpo/margin_std": 18.205974578857422, "step": 634 }, { "epoch": 0.9599395313681028, "fcm_dpo/beta": 0.04858339577913284, "fcm_dpo/delta": 0.17846964299678802, "fcm_dpo/margin": 8.830991744995117, "fcm_dpo/q_t": 0.4160218834877014, "grad_norm": 12.776594161987305, "learning_rate": 2.5446395297668287e-09, "logits/chosen": 1.352858066558838, "logits/rejected": 1.2052037715911865, "logps/chosen": -95.20944213867188, "logps/ref_chosen": -85.60243225097656, "logps/ref_rejected": -104.29497528076172, "logps/rejected": -122.73298645019531, "loss": 1.1964, "margin_dpo/margin_mean": 8.830991744995117, "margin_dpo/margin_std": 19.583810806274414, "step": 635 }, { "epoch": 0.9614512471655329, "fcm_dpo/beta": 0.04776782542467117, "fcm_dpo/delta": -0.1610623449087143, "fcm_dpo/margin": 15.674917221069336, "fcm_dpo/q_t": 0.3444763422012329, "grad_norm": 10.230683326721191, "learning_rate": 2.359929934524829e-09, "logits/chosen": 1.4697428941726685, "logits/rejected": 1.205780029296875, "logps/chosen": -71.68122863769531, "logps/ref_chosen": -68.72154235839844, "logps/ref_rejected": -97.44863891601562, "logps/rejected": -116.0832290649414, "loss": 0.9226, "margin_dpo/margin_mean": 15.674918174743652, "margin_dpo/margin_std": 17.92241859436035, "step": 636 }, { "epoch": 0.9629629629629629, "fcm_dpo/beta": 0.0476045086979866, "fcm_dpo/delta": -0.006276901811361313, "fcm_dpo/margin": 12.720074653625488, "fcm_dpo/q_t": 0.37932315468788147, "grad_norm": 10.013970375061035, "learning_rate": 2.1821473643827137e-09, "logits/chosen": 1.3179742097854614, "logits/rejected": 1.1738412380218506, "logps/chosen": -101.20001220703125, "logps/ref_chosen": -92.38919067382812, "logps/ref_rejected": -103.70460510253906, "logps/rejected": -125.23550415039062, "loss": 1.04, "margin_dpo/margin_mean": 12.720074653625488, "margin_dpo/margin_std": 19.057313919067383, "step": 637 }, { "epoch": 0.9644746787603931, "fcm_dpo/beta": 0.04759259521961212, "fcm_dpo/delta": 0.019558563828468323, "fcm_dpo/margin": 12.21816349029541, "fcm_dpo/q_t": 0.37911561131477356, "grad_norm": 12.155881881713867, "learning_rate": 2.0112967923011646e-09, "logits/chosen": 1.4781807661056519, "logits/rejected": 1.3293653726577759, "logps/chosen": -89.75704956054688, "logps/ref_chosen": -83.36921691894531, "logps/ref_rejected": -103.04508209228516, "logps/rejected": -121.65107727050781, "loss": 1.0429, "margin_dpo/margin_mean": 12.218162536621094, "margin_dpo/margin_std": 17.843093872070312, "step": 638 }, { "epoch": 0.9659863945578231, "fcm_dpo/beta": 0.047518063336610794, "fcm_dpo/delta": -0.03895752504467964, "fcm_dpo/margin": 13.388608932495117, "fcm_dpo/q_t": 0.3698895573616028, "grad_norm": 9.916508674621582, "learning_rate": 1.847382997337943e-09, "logits/chosen": 1.6210851669311523, "logits/rejected": 1.3688468933105469, "logps/chosen": -74.5676040649414, "logps/ref_chosen": -70.45247650146484, "logps/ref_rejected": -93.77748107910156, "logps/rejected": -111.28121948242188, "loss": 1.0065, "margin_dpo/margin_mean": 13.38861083984375, "margin_dpo/margin_std": 18.01410484313965, "step": 639 }, { "epoch": 0.9674981103552532, "fcm_dpo/beta": 0.04807348549365997, "fcm_dpo/delta": 0.15326353907585144, "fcm_dpo/margin": 9.443033218383789, "fcm_dpo/q_t": 0.4005971848964691, "grad_norm": 11.905314445495605, "learning_rate": 1.690410564514244e-09, "logits/chosen": 1.5392241477966309, "logits/rejected": 1.3597580194473267, "logps/chosen": -74.13145446777344, "logps/ref_chosen": -68.51570129394531, "logps/ref_rejected": -92.35081481933594, "logps/rejected": -107.40959930419922, "loss": 1.1826, "margin_dpo/margin_mean": 9.443033218383789, "margin_dpo/margin_std": 19.444664001464844, "step": 640 }, { "epoch": 0.9690098261526833, "fcm_dpo/beta": 0.04892860725522041, "fcm_dpo/delta": 0.06555097550153732, "fcm_dpo/margin": 10.993219375610352, "fcm_dpo/q_t": 0.38847824931144714, "grad_norm": 12.829493522644043, "learning_rate": 1.5403838846864692e-09, "logits/chosen": 1.3246686458587646, "logits/rejected": 1.3006986379623413, "logps/chosen": -99.61031341552734, "logps/ref_chosen": -92.35102844238281, "logps/ref_rejected": -102.4269790649414, "logps/rejected": -120.67948150634766, "loss": 1.0872, "margin_dpo/margin_mean": 10.993219375610352, "margin_dpo/margin_std": 18.027175903320312, "step": 641 }, { "epoch": 0.9705215419501134, "fcm_dpo/beta": 0.049451105296611786, "fcm_dpo/delta": 0.09589925408363342, "fcm_dpo/margin": 10.26209545135498, "fcm_dpo/q_t": 0.3960247039794922, "grad_norm": 11.930103302001953, "learning_rate": 1.3973071544233218e-09, "logits/chosen": 1.1650804281234741, "logits/rejected": 1.1836330890655518, "logps/chosen": -95.06695556640625, "logps/ref_chosen": -88.39617919921875, "logps/ref_rejected": -88.73035430908203, "logps/rejected": -105.66322326660156, "loss": 1.1312, "margin_dpo/margin_mean": 10.26209545135498, "margin_dpo/margin_std": 18.21469497680664, "step": 642 }, { "epoch": 0.9720332577475435, "fcm_dpo/beta": 0.05022279545664787, "fcm_dpo/delta": 0.046514783054590225, "fcm_dpo/margin": 11.055997848510742, "fcm_dpo/q_t": 0.3896936774253845, "grad_norm": 12.769640922546387, "learning_rate": 1.261184375888541e-09, "logits/chosen": 1.4847640991210938, "logits/rejected": 1.178241491317749, "logps/chosen": -90.30828857421875, "logps/ref_chosen": -84.83087158203125, "logps/ref_rejected": -105.31499481201172, "logps/rejected": -121.84840393066406, "loss": 1.1231, "margin_dpo/margin_mean": 11.055997848510742, "margin_dpo/margin_std": 19.96835708618164, "step": 643 }, { "epoch": 0.9735449735449735, "fcm_dpo/beta": 0.05142327770590782, "fcm_dpo/delta": 0.05963759124279022, "fcm_dpo/margin": 10.557317733764648, "fcm_dpo/q_t": 0.39105477929115295, "grad_norm": 13.958258628845215, "learning_rate": 1.1320193567288527e-09, "logits/chosen": 1.6530699729919434, "logits/rejected": 1.544208288192749, "logps/chosen": -70.48993682861328, "logps/ref_chosen": -65.11122131347656, "logps/ref_rejected": -80.4027328491211, "logps/rejected": -96.33876037597656, "loss": 1.1411, "margin_dpo/margin_mean": 10.557317733764648, "margin_dpo/margin_std": 19.831335067749023, "step": 644 }, { "epoch": 0.9750566893424036, "fcm_dpo/beta": 0.05053392052650452, "fcm_dpo/delta": -0.08560548722743988, "fcm_dpo/margin": 13.443717002868652, "fcm_dpo/q_t": 0.36036476492881775, "grad_norm": 13.477982521057129, "learning_rate": 1.0098157099674987e-09, "logits/chosen": 1.4453848600387573, "logits/rejected": 1.4229815006256104, "logps/chosen": -81.81217193603516, "logps/ref_chosen": -76.93634033203125, "logps/ref_rejected": -89.14311981201172, "logps/rejected": -107.46266174316406, "loss": 0.9803, "margin_dpo/margin_mean": 13.443717956542969, "margin_dpo/margin_std": 17.437297821044922, "step": 645 }, { "epoch": 0.9765684051398337, "fcm_dpo/beta": 0.05015309527516365, "fcm_dpo/delta": -0.047624535858631134, "fcm_dpo/margin": 12.85222339630127, "fcm_dpo/q_t": 0.3658458888530731, "grad_norm": 10.155190467834473, "learning_rate": 8.945768539031783e-10, "logits/chosen": 1.6856896877288818, "logits/rejected": 1.5893869400024414, "logps/chosen": -86.48307800292969, "logps/ref_chosen": -77.69122314453125, "logps/ref_rejected": -98.14374542236328, "logps/rejected": -119.78782653808594, "loss": 1.017, "margin_dpo/margin_mean": 12.85222339630127, "margin_dpo/margin_std": 18.211654663085938, "step": 646 }, { "epoch": 0.9780801209372638, "fcm_dpo/beta": 0.04868451505899429, "fcm_dpo/delta": -0.19314002990722656, "fcm_dpo/margin": 15.98859977722168, "fcm_dpo/q_t": 0.33671772480010986, "grad_norm": 12.119245529174805, "learning_rate": 7.863060120144316e-10, "logits/chosen": 1.5582287311553955, "logits/rejected": 1.4055562019348145, "logps/chosen": -91.17578125, "logps/ref_chosen": -83.79997253417969, "logps/ref_rejected": -116.81965637207031, "logps/rejected": -140.18405151367188, "loss": 0.8989, "margin_dpo/margin_mean": 15.988598823547363, "margin_dpo/margin_std": 17.51923942565918, "step": 647 }, { "epoch": 0.9795918367346939, "fcm_dpo/beta": 0.04834875464439392, "fcm_dpo/delta": 0.05289806053042412, "fcm_dpo/margin": 11.375927925109863, "fcm_dpo/q_t": 0.3829188346862793, "grad_norm": 13.859929084777832, "learning_rate": 6.850062128694045e-10, "logits/chosen": 1.3009628057479858, "logits/rejected": 1.134709119796753, "logps/chosen": -94.1376953125, "logps/ref_chosen": -85.9629898071289, "logps/ref_rejected": -101.36552429199219, "logps/rejected": -120.9161605834961, "loss": 1.0614, "margin_dpo/margin_mean": 11.37592887878418, "margin_dpo/margin_std": 16.891958236694336, "step": 648 }, { "epoch": 0.981103552532124, "fcm_dpo/beta": 0.048618800938129425, "fcm_dpo/delta": 0.008311666548252106, "fcm_dpo/margin": 12.179876327514648, "fcm_dpo/q_t": 0.3789626955986023, "grad_norm": 13.455903053283691, "learning_rate": 5.906802900412788e-10, "logits/chosen": 1.450695514678955, "logits/rejected": 1.321246862411499, "logps/chosen": -74.89276885986328, "logps/ref_chosen": -68.64892578125, "logps/ref_rejected": -89.84898376464844, "logps/rejected": -108.272705078125, "loss": 1.0928, "margin_dpo/margin_mean": 12.179876327514648, "margin_dpo/margin_std": 20.43763542175293, "step": 649 }, { "epoch": 0.982615268329554, "fcm_dpo/beta": 0.04859776794910431, "fcm_dpo/delta": -0.03493582457304001, "fcm_dpo/margin": 13.017370223999023, "fcm_dpo/q_t": 0.36858803033828735, "grad_norm": 11.794471740722656, "learning_rate": 5.033308820289184e-10, "logits/chosen": 1.5901975631713867, "logits/rejected": 1.3985137939453125, "logps/chosen": -76.39089965820312, "logps/ref_chosen": -72.97265625, "logps/ref_rejected": -93.04617309570312, "logps/rejected": -109.48178100585938, "loss": 1.0177, "margin_dpo/margin_mean": 13.017369270324707, "margin_dpo/margin_std": 17.961902618408203, "step": 650 }, { "epoch": 0.9841269841269841, "fcm_dpo/beta": 0.04906022548675537, "fcm_dpo/delta": 0.046510156244039536, "fcm_dpo/margin": 11.303712844848633, "fcm_dpo/q_t": 0.38729214668273926, "grad_norm": 15.220462799072266, "learning_rate": 4.2296043218295606e-10, "logits/chosen": 1.6060431003570557, "logits/rejected": 1.3864119052886963, "logps/chosen": -76.44781494140625, "logps/ref_chosen": -71.05281066894531, "logps/ref_rejected": -94.23469543457031, "logps/rejected": -110.93341064453125, "loss": 1.0596, "margin_dpo/margin_mean": 11.303714752197266, "margin_dpo/margin_std": 16.41510009765625, "step": 651 }, { "epoch": 0.9856386999244142, "fcm_dpo/beta": 0.04880473017692566, "fcm_dpo/delta": 0.0105612026527524, "fcm_dpo/margin": 12.089744567871094, "fcm_dpo/q_t": 0.3832206726074219, "grad_norm": 15.424723625183105, "learning_rate": 3.4957118863768176e-10, "logits/chosen": 1.6924582719802856, "logits/rejected": 1.6193931102752686, "logps/chosen": -86.58667755126953, "logps/ref_chosen": -80.06941223144531, "logps/ref_rejected": -99.22327423095703, "logps/rejected": -117.83027648925781, "loss": 1.0705, "margin_dpo/margin_mean": 12.089743614196777, "margin_dpo/margin_std": 19.402515411376953, "step": 652 }, { "epoch": 0.9871504157218443, "fcm_dpo/beta": 0.048587001860141754, "fcm_dpo/delta": -0.0633477047085762, "fcm_dpo/margin": 13.566983222961426, "fcm_dpo/q_t": 0.36429864168167114, "grad_norm": 11.197211265563965, "learning_rate": 2.831652042480093e-10, "logits/chosen": 1.611598014831543, "logits/rejected": 1.483473300933838, "logps/chosen": -84.53128051757812, "logps/ref_chosen": -80.35701751708984, "logps/ref_rejected": -92.1295394897461, "logps/rejected": -109.87078857421875, "loss": 1.0152, "margin_dpo/margin_mean": 13.56698226928711, "margin_dpo/margin_std": 19.007705688476562, "step": 653 }, { "epoch": 0.9886621315192744, "fcm_dpo/beta": 0.04817197844386101, "fcm_dpo/delta": 0.055898845195770264, "fcm_dpo/margin": 11.323410034179688, "fcm_dpo/q_t": 0.39379796385765076, "grad_norm": 13.024750709533691, "learning_rate": 2.2374433653205016e-10, "logits/chosen": 1.4926725625991821, "logits/rejected": 1.2302258014678955, "logps/chosen": -84.48985290527344, "logps/ref_chosen": -78.06475830078125, "logps/ref_rejected": -106.05763244628906, "logps/rejected": -123.80614471435547, "loss": 1.104, "margin_dpo/margin_mean": 11.323410034179688, "margin_dpo/margin_std": 19.10687828063965, "step": 654 }, { "epoch": 0.9901738473167044, "fcm_dpo/beta": 0.048346683382987976, "fcm_dpo/delta": 0.001804165542125702, "fcm_dpo/margin": 12.33364486694336, "fcm_dpo/q_t": 0.3771480917930603, "grad_norm": 9.984294891357422, "learning_rate": 1.7131024761923852e-10, "logits/chosen": 1.4067647457122803, "logits/rejected": 1.1172595024108887, "logps/chosen": -70.79195404052734, "logps/ref_chosen": -67.03407287597656, "logps/ref_rejected": -97.57197570800781, "logps/rejected": -113.66349792480469, "loss": 1.0162, "margin_dpo/margin_mean": 12.33364486694336, "margin_dpo/margin_std": 16.364456176757812, "step": 655 }, { "epoch": 0.9916855631141346, "fcm_dpo/beta": 0.04848009720444679, "fcm_dpo/delta": -0.0400017648935318, "fcm_dpo/margin": 13.146528244018555, "fcm_dpo/q_t": 0.368743360042572, "grad_norm": 11.32723617553711, "learning_rate": 1.2586440420372934e-10, "logits/chosen": 1.2692241668701172, "logits/rejected": 1.151146650314331, "logps/chosen": -96.94473266601562, "logps/ref_chosen": -89.31463623046875, "logps/ref_rejected": -105.14315795898438, "logps/rejected": -125.91978454589844, "loss": 1.023, "margin_dpo/margin_mean": 13.146528244018555, "margin_dpo/margin_std": 18.80926513671875, "step": 656 }, { "epoch": 0.9931972789115646, "fcm_dpo/beta": 0.04680928587913513, "fcm_dpo/delta": -0.21155594289302826, "fcm_dpo/margin": 16.960391998291016, "fcm_dpo/q_t": 0.33587589859962463, "grad_norm": 12.175826072692871, "learning_rate": 8.740807750345913e-11, "logits/chosen": 1.6932892799377441, "logits/rejected": 1.4875112771987915, "logps/chosen": -68.65042877197266, "logps/ref_chosen": -64.89747619628906, "logps/ref_rejected": -94.21998596191406, "logps/rejected": -114.93333435058594, "loss": 0.9204, "margin_dpo/margin_mean": 16.960391998291016, "margin_dpo/margin_std": 19.61644172668457, "step": 657 }, { "epoch": 0.9947089947089947, "fcm_dpo/beta": 0.047107864171266556, "fcm_dpo/delta": 0.04111632704734802, "fcm_dpo/margin": 11.880049705505371, "fcm_dpo/q_t": 0.38745826482772827, "grad_norm": 12.966909408569336, "learning_rate": 5.594234322453539e-11, "logits/chosen": 1.4143362045288086, "logits/rejected": 1.3188287019729614, "logps/chosen": -86.38758087158203, "logps/ref_chosen": -81.16606140136719, "logps/ref_rejected": -97.72825622558594, "logps/rejected": -114.82982635498047, "loss": 1.1141, "margin_dpo/margin_mean": 11.880049705505371, "margin_dpo/margin_std": 20.876911163330078, "step": 658 }, { "epoch": 0.9962207105064248, "fcm_dpo/beta": 0.04666716232895851, "fcm_dpo/delta": -0.017684001475572586, "fcm_dpo/margin": 8.309593200683594, "fcm_dpo/q_t": 0.42115259170532227, "grad_norm": 12.249117851257324, "learning_rate": 3.146808153123293e-11, "logits/chosen": 1.562005877494812, "logits/rejected": 1.3696491718292236, "logps/chosen": -82.52589416503906, "logps/ref_chosen": -74.42193603515625, "logps/ref_rejected": -87.81561279296875, "logps/rejected": -104.22916412353516, "loss": 1.2285, "margin_dpo/margin_mean": 8.30959415435791, "margin_dpo/margin_std": 19.117603302001953, "step": 659 }, { "epoch": 0.9977324263038548, "fcm_dpo/beta": 0.045825421810150146, "fcm_dpo/delta": -0.10102089494466782, "fcm_dpo/margin": 15.1337308883667, "fcm_dpo/q_t": 0.35269391536712646, "grad_norm": 10.398505210876465, "learning_rate": 1.3985977021235829e-11, "logits/chosen": 1.5079681873321533, "logits/rejected": 1.3555164337158203, "logps/chosen": -77.58157348632812, "logps/ref_chosen": -71.68511962890625, "logps/ref_rejected": -98.01472473144531, "logps/rejected": -119.04491424560547, "loss": 0.9414, "margin_dpo/margin_mean": 15.133729934692383, "margin_dpo/margin_std": 17.095909118652344, "step": 660 }, { "epoch": 0.999244142101285, "fcm_dpo/beta": 0.04709383845329285, "fcm_dpo/delta": 0.17111794650554657, "fcm_dpo/margin": 9.24586296081543, "fcm_dpo/q_t": 0.4106820821762085, "grad_norm": 10.930879592895508, "learning_rate": 3.4965187065971735e-12, "logits/chosen": 1.3035640716552734, "logits/rejected": 1.1167458295822144, "logps/chosen": -89.0513916015625, "logps/ref_chosen": -78.35111999511719, "logps/ref_rejected": -99.47113037109375, "logps/rejected": -119.4172592163086, "loss": 1.1821, "margin_dpo/margin_mean": 9.24586296081543, "margin_dpo/margin_std": 19.480510711669922, "step": 661 }, { "epoch": 0.999244142101285, "step": 661, "total_flos": 0.0, "train_loss": 1.1374638212250148, "train_runtime": 2122.2138, "train_samples_per_second": 19.949, "train_steps_per_second": 0.311 } ], "logging_steps": 1, "max_steps": 661, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }