{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.999244142101285, "eval_steps": 100, "global_step": 661, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0015117157974300832, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.002980858087539673, "fcm_dpo/q_t": 0.5000747442245483, "grad_norm": 17.898353576660156, "learning_rate": 0.0, "logits/chosen": 1.702779769897461, "logits/rejected": 1.6965749263763428, "logps/chosen": -80.20932006835938, "logps/ref_chosen": -80.27740478515625, "logps/ref_rejected": -83.5943374633789, "logps/rejected": -83.52326965332031, "loss": 1.387, "margin_dpo/margin_mean": -0.0029816031455993652, "margin_dpo/margin_std": 0.3835117816925049, "step": 1 }, { "epoch": 0.0030234315948601664, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.029325395822525024, "fcm_dpo/q_t": 0.4992692470550537, "grad_norm": 21.484628677368164, "learning_rate": 7.462686567164179e-09, "logits/chosen": 1.7006168365478516, "logits/rejected": 1.6698178052902222, "logps/chosen": -74.51097869873047, "logps/ref_chosen": -74.56095886230469, "logps/ref_rejected": -83.53636169433594, "logps/rejected": -83.51570892333984, "loss": 1.3839, "margin_dpo/margin_mean": 0.029325813055038452, "margin_dpo/margin_std": 0.4646317958831787, "step": 2 }, { "epoch": 0.0045351473922902496, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.003523111343383789, "fcm_dpo/q_t": 0.5000885128974915, "grad_norm": 19.935203552246094, "learning_rate": 1.4925373134328357e-08, "logits/chosen": 1.6261146068572998, "logits/rejected": 1.535043716430664, "logps/chosen": -82.14225006103516, "logps/ref_chosen": -82.1510009765625, "logps/ref_rejected": -109.82986450195312, "logps/rejected": -109.81758117675781, "loss": 1.3871, "margin_dpo/margin_mean": -0.0035227537155151367, "margin_dpo/margin_std": 0.4260812997817993, "step": 3 }, { "epoch": 0.006046863189720333, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.006996512413024902, "fcm_dpo/q_t": 0.5001745223999023, "grad_norm": 19.782325744628906, "learning_rate": 2.2388059701492534e-08, "logits/chosen": 1.766474723815918, "logits/rejected": 1.7546875476837158, "logps/chosen": -92.36776733398438, "logps/ref_chosen": -92.37549591064453, "logps/ref_rejected": -99.59553527832031, "logps/rejected": -99.580810546875, "loss": 1.3875, "margin_dpo/margin_mean": -0.00699692964553833, "margin_dpo/margin_std": 0.4406859278678894, "step": 4 }, { "epoch": 0.007558578987150416, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.03886502981185913, "fcm_dpo/q_t": 0.500970721244812, "grad_norm": 18.87920570373535, "learning_rate": 2.9850746268656714e-08, "logits/chosen": 1.5482947826385498, "logits/rejected": 1.4964426755905151, "logps/chosen": -78.91131591796875, "logps/ref_chosen": -78.84872436523438, "logps/ref_rejected": -97.88040161132812, "logps/rejected": -97.90412902832031, "loss": 1.3906, "margin_dpo/margin_mean": -0.03886544704437256, "margin_dpo/margin_std": 0.4082863926887512, "step": 5 }, { "epoch": 0.009070294784580499, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.07824429869651794, "fcm_dpo/q_t": 0.4980442523956299, "grad_norm": 18.059982299804688, "learning_rate": 3.731343283582089e-08, "logits/chosen": 1.5881304740905762, "logits/rejected": 1.4806277751922607, "logps/chosen": -68.29608917236328, "logps/ref_chosen": -68.34607696533203, "logps/ref_rejected": -99.24614715576172, "logps/rejected": -99.27439880371094, "loss": 1.3789, "margin_dpo/margin_mean": 0.07824432849884033, "margin_dpo/margin_std": 0.4144379794597626, "step": 6 }, { "epoch": 0.010582010582010581, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.07302752137184143, "fcm_dpo/q_t": 0.5018250942230225, "grad_norm": 17.438974380493164, "learning_rate": 4.477611940298507e-08, "logits/chosen": 1.4593021869659424, "logits/rejected": 1.3967918157577515, "logps/chosen": -69.18865966796875, "logps/ref_chosen": -69.11282348632812, "logps/ref_rejected": -84.01641845703125, "logps/rejected": -84.01922607421875, "loss": 1.3939, "margin_dpo/margin_mean": -0.0730276107788086, "margin_dpo/margin_std": 0.36465680599212646, "step": 7 }, { "epoch": 0.012093726379440665, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.0639738142490387, "fcm_dpo/q_t": 0.5015987157821655, "grad_norm": 18.393983840942383, "learning_rate": 5.223880597014925e-08, "logits/chosen": 1.647634506225586, "logits/rejected": 1.6342148780822754, "logps/chosen": -78.386474609375, "logps/ref_chosen": -78.3912353515625, "logps/ref_rejected": -91.06254577636719, "logps/rejected": -90.99380493164062, "loss": 1.3931, "margin_dpo/margin_mean": -0.06397378444671631, "margin_dpo/margin_std": 0.3821854591369629, "step": 8 }, { "epoch": 0.013605442176870748, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.03132617473602295, "fcm_dpo/q_t": 0.4992170035839081, "grad_norm": 19.418487548828125, "learning_rate": 5.970149253731343e-08, "logits/chosen": 2.1118154525756836, "logits/rejected": 1.8952994346618652, "logps/chosen": -69.66719818115234, "logps/ref_chosen": -69.67422485351562, "logps/ref_rejected": -105.00473022460938, "logps/rejected": -105.0290298461914, "loss": 1.3836, "margin_dpo/margin_mean": 0.03132587671279907, "margin_dpo/margin_std": 0.3932754695415497, "step": 9 }, { "epoch": 0.015117157974300832, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.031807154417037964, "fcm_dpo/q_t": 0.4992050230503082, "grad_norm": 19.023218154907227, "learning_rate": 6.71641791044776e-08, "logits/chosen": 1.7425557374954224, "logits/rejected": 1.6554481983184814, "logps/chosen": -79.6943359375, "logps/ref_chosen": -79.730712890625, "logps/ref_rejected": -105.50645446777344, "logps/rejected": -105.50188446044922, "loss": 1.3836, "margin_dpo/margin_mean": 0.031807392835617065, "margin_dpo/margin_std": 0.41680943965911865, "step": 10 }, { "epoch": 0.016628873771730914, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.00566980242729187, "fcm_dpo/q_t": 0.4998597502708435, "grad_norm": 17.280485153198242, "learning_rate": 7.462686567164178e-08, "logits/chosen": 1.610607385635376, "logits/rejected": 1.566218614578247, "logps/chosen": -85.4349365234375, "logps/ref_chosen": -85.41248321533203, "logps/ref_rejected": -86.50241088867188, "logps/rejected": -86.53053283691406, "loss": 1.3862, "margin_dpo/margin_mean": 0.005669832229614258, "margin_dpo/margin_std": 0.40662485361099243, "step": 11 }, { "epoch": 0.018140589569160998, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.045232102274894714, "fcm_dpo/q_t": 0.498870313167572, "grad_norm": 17.30267906188965, "learning_rate": 8.208955223880596e-08, "logits/chosen": 1.5188937187194824, "logits/rejected": 1.4821337461471558, "logps/chosen": -81.39826965332031, "logps/ref_chosen": -81.38086700439453, "logps/ref_rejected": -89.88151550292969, "logps/rejected": -89.94414520263672, "loss": 1.3821, "margin_dpo/margin_mean": 0.045232415199279785, "margin_dpo/margin_std": 0.33078908920288086, "step": 12 }, { "epoch": 0.019652305366591082, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.0010968297719955444, "fcm_dpo/q_t": 0.49997279047966003, "grad_norm": 17.860565185546875, "learning_rate": 8.955223880597014e-08, "logits/chosen": 1.5909333229064941, "logits/rejected": 1.4107434749603271, "logps/chosen": -63.136024475097656, "logps/ref_chosen": -63.17030715942383, "logps/ref_rejected": -105.61166381835938, "logps/rejected": -105.57847595214844, "loss": 1.3864, "margin_dpo/margin_mean": 0.0010965168476104736, "margin_dpo/margin_std": 0.3132143020629883, "step": 13 }, { "epoch": 0.021164021164021163, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.025872915983200073, "fcm_dpo/q_t": 0.500646710395813, "grad_norm": 19.512710571289062, "learning_rate": 9.701492537313432e-08, "logits/chosen": 1.6522200107574463, "logits/rejected": 1.6186612844467163, "logps/chosen": -80.70231628417969, "logps/ref_chosen": -80.71014404296875, "logps/ref_rejected": -89.86041259765625, "logps/rejected": -89.82671356201172, "loss": 1.3891, "margin_dpo/margin_mean": -0.025872111320495605, "margin_dpo/margin_std": 0.296722948551178, "step": 14 }, { "epoch": 0.022675736961451247, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.07544693350791931, "fcm_dpo/q_t": 0.5018854141235352, "grad_norm": 20.62965965270996, "learning_rate": 1.044776119402985e-07, "logits/chosen": 1.3531144857406616, "logits/rejected": 1.2740521430969238, "logps/chosen": -82.05131530761719, "logps/ref_chosen": -82.00294494628906, "logps/ref_rejected": -106.43550109863281, "logps/rejected": -106.40841674804688, "loss": 1.3941, "margin_dpo/margin_mean": -0.07544746994972229, "margin_dpo/margin_std": 0.3172140121459961, "step": 15 }, { "epoch": 0.02418745275888133, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.014335334300994873, "fcm_dpo/q_t": 0.49964210391044617, "grad_norm": 17.199363708496094, "learning_rate": 1.1194029850746268e-07, "logits/chosen": 1.7679736614227295, "logits/rejected": 1.6486386060714722, "logps/chosen": -62.32301330566406, "logps/ref_chosen": -62.308345794677734, "logps/ref_rejected": -89.6508560180664, "logps/rejected": -89.67985534667969, "loss": 1.3851, "margin_dpo/margin_mean": 0.01433536410331726, "margin_dpo/margin_std": 0.3363468050956726, "step": 16 }, { "epoch": 0.025699168556311415, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.0437452495098114, "fcm_dpo/q_t": 0.501093327999115, "grad_norm": 18.453763961791992, "learning_rate": 1.1940298507462686e-07, "logits/chosen": 1.6343588829040527, "logits/rejected": 1.5999202728271484, "logps/chosen": -85.20316314697266, "logps/ref_chosen": -85.16903686523438, "logps/ref_rejected": -102.57087707519531, "logps/rejected": -102.56124877929688, "loss": 1.391, "margin_dpo/margin_mean": -0.04374605417251587, "margin_dpo/margin_std": 0.3627570867538452, "step": 17 }, { "epoch": 0.027210884353741496, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.13690681755542755, "fcm_dpo/q_t": 0.496579110622406, "grad_norm": 17.045387268066406, "learning_rate": 1.2686567164179106e-07, "logits/chosen": 1.6750521659851074, "logits/rejected": 1.5454905033111572, "logps/chosen": -63.13652801513672, "logps/ref_chosen": -63.17793273925781, "logps/ref_rejected": -86.06461334228516, "logps/rejected": -86.16011047363281, "loss": 1.373, "margin_dpo/margin_mean": 0.13690713047981262, "margin_dpo/margin_std": 0.3616068363189697, "step": 18 }, { "epoch": 0.02872260015117158, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.039892733097076416, "fcm_dpo/q_t": 0.50099778175354, "grad_norm": 19.78537940979004, "learning_rate": 1.343283582089552e-07, "logits/chosen": 1.9965240955352783, "logits/rejected": 1.98760986328125, "logps/chosen": -85.86576080322266, "logps/ref_chosen": -85.82405853271484, "logps/ref_rejected": -100.07136535644531, "logps/rejected": -100.07318115234375, "loss": 1.3907, "margin_dpo/margin_mean": -0.03989291191101074, "margin_dpo/margin_std": 0.39928844571113586, "step": 19 }, { "epoch": 0.030234315948601664, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.046687304973602295, "fcm_dpo/q_t": 0.5011659860610962, "grad_norm": 18.191457748413086, "learning_rate": 1.4179104477611938e-07, "logits/chosen": 1.9364959001541138, "logits/rejected": 1.8560017347335815, "logps/chosen": -73.64295959472656, "logps/ref_chosen": -73.58621215820312, "logps/ref_rejected": -91.21690368652344, "logps/rejected": -91.22695922851562, "loss": 1.3913, "margin_dpo/margin_mean": -0.046687573194503784, "margin_dpo/margin_std": 0.36182230710983276, "step": 20 }, { "epoch": 0.031746031746031744, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.08522829413414001, "fcm_dpo/q_t": 0.4978693723678589, "grad_norm": 18.132219314575195, "learning_rate": 1.4925373134328355e-07, "logits/chosen": 1.9595677852630615, "logits/rejected": 1.8412469625473022, "logps/chosen": -81.89152526855469, "logps/ref_chosen": -81.97251892089844, "logps/ref_rejected": -98.05976867675781, "logps/rejected": -98.06401062011719, "loss": 1.3783, "margin_dpo/margin_mean": 0.08522748947143555, "margin_dpo/margin_std": 0.4540433883666992, "step": 21 }, { "epoch": 0.03325774754346183, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.024463504552841187, "fcm_dpo/q_t": 0.49939069151878357, "grad_norm": 18.27927017211914, "learning_rate": 1.5671641791044775e-07, "logits/chosen": 1.7110607624053955, "logits/rejected": 1.6684741973876953, "logps/chosen": -76.95679473876953, "logps/ref_chosen": -76.99579620361328, "logps/ref_rejected": -95.76089477539062, "logps/rejected": -95.74636840820312, "loss": 1.3844, "margin_dpo/margin_mean": 0.024462968111038208, "margin_dpo/margin_std": 0.4479817748069763, "step": 22 }, { "epoch": 0.03476946334089191, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.0017972886562347412, "fcm_dpo/q_t": 0.5000447630882263, "grad_norm": 19.142587661743164, "learning_rate": 1.6417910447761193e-07, "logits/chosen": 2.1720407009124756, "logits/rejected": 2.068018913269043, "logps/chosen": -84.75054168701172, "logps/ref_chosen": -84.76856994628906, "logps/ref_rejected": -107.28266906738281, "logps/rejected": -107.2628402709961, "loss": 1.3869, "margin_dpo/margin_mean": -0.0017971396446228027, "margin_dpo/margin_std": 0.3923270106315613, "step": 23 }, { "epoch": 0.036281179138321996, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.053469717502593994, "fcm_dpo/q_t": 0.49866342544555664, "grad_norm": 17.15791893005371, "learning_rate": 1.716417910447761e-07, "logits/chosen": 1.7385156154632568, "logits/rejected": 1.6794748306274414, "logps/chosen": -69.82743835449219, "logps/ref_chosen": -69.87112426757812, "logps/ref_rejected": -84.02084350585938, "logps/rejected": -84.03063201904297, "loss": 1.3813, "margin_dpo/margin_mean": 0.053469330072402954, "margin_dpo/margin_std": 0.37825846672058105, "step": 24 }, { "epoch": 0.03779289493575208, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.009903967380523682, "fcm_dpo/q_t": 0.4997522532939911, "grad_norm": 19.575454711914062, "learning_rate": 1.7910447761194027e-07, "logits/chosen": 2.062734603881836, "logits/rejected": 1.9006829261779785, "logps/chosen": -78.26441192626953, "logps/ref_chosen": -78.22694396972656, "logps/ref_rejected": -106.65234375, "logps/rejected": -106.69970703125, "loss": 1.3857, "margin_dpo/margin_mean": 0.009904235601425171, "margin_dpo/margin_std": 0.4116858243942261, "step": 25 }, { "epoch": 0.039304610733182165, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.07625684142112732, "fcm_dpo/q_t": 0.4980948865413666, "grad_norm": 17.888681411743164, "learning_rate": 1.8656716417910447e-07, "logits/chosen": 2.0140490531921387, "logits/rejected": 1.9844526052474976, "logps/chosen": -74.57620239257812, "logps/ref_chosen": -74.59750366210938, "logps/ref_rejected": -93.57858276367188, "logps/rejected": -93.63352966308594, "loss": 1.3791, "margin_dpo/margin_mean": 0.07625627517700195, "margin_dpo/margin_std": 0.4107317328453064, "step": 26 }, { "epoch": 0.04081632653061224, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.09230369329452515, "fcm_dpo/q_t": 0.4976937770843506, "grad_norm": 18.47425079345703, "learning_rate": 1.9402985074626865e-07, "logits/chosen": 1.786578893661499, "logits/rejected": 1.7295043468475342, "logps/chosen": -78.6236801147461, "logps/ref_chosen": -78.64625549316406, "logps/ref_rejected": -92.33645629882812, "logps/rejected": -92.40618896484375, "loss": 1.3774, "margin_dpo/margin_mean": 0.09230378270149231, "margin_dpo/margin_std": 0.32660043239593506, "step": 27 }, { "epoch": 0.042328042328042326, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.06432390213012695, "fcm_dpo/q_t": 0.5016065835952759, "grad_norm": 18.158647537231445, "learning_rate": 2.0149253731343282e-07, "logits/chosen": 1.6356275081634521, "logits/rejected": 1.585396409034729, "logps/chosen": -76.95513153076172, "logps/ref_chosen": -76.91271209716797, "logps/ref_rejected": -88.48194885253906, "logps/rejected": -88.46004486083984, "loss": 1.3931, "margin_dpo/margin_mean": -0.06432461738586426, "margin_dpo/margin_std": 0.35822808742523193, "step": 28 }, { "epoch": 0.04383975812547241, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.002993851900100708, "fcm_dpo/q_t": 0.49992460012435913, "grad_norm": 21.283586502075195, "learning_rate": 2.08955223880597e-07, "logits/chosen": 1.913273811340332, "logits/rejected": 1.850356936454773, "logps/chosen": -89.58018493652344, "logps/ref_chosen": -89.62060546875, "logps/ref_rejected": -100.57090759277344, "logps/rejected": -100.53347778320312, "loss": 1.3864, "margin_dpo/margin_mean": 0.002994030714035034, "margin_dpo/margin_std": 0.4085092842578888, "step": 29 }, { "epoch": 0.045351473922902494, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.029388487339019775, "fcm_dpo/q_t": 0.4992656707763672, "grad_norm": 18.88498306274414, "learning_rate": 2.1641791044776117e-07, "logits/chosen": 1.961219072341919, "logits/rejected": 1.7812705039978027, "logps/chosen": -68.8134536743164, "logps/ref_chosen": -68.82381439208984, "logps/ref_rejected": -104.7047119140625, "logps/rejected": -104.72373962402344, "loss": 1.3838, "margin_dpo/margin_mean": 0.029387563467025757, "margin_dpo/margin_std": 0.44134002923965454, "step": 30 }, { "epoch": 0.04686318972033258, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.04079049825668335, "fcm_dpo/q_t": 0.5010193586349487, "grad_norm": 21.307546615600586, "learning_rate": 2.2388059701492537e-07, "logits/chosen": 1.6620802879333496, "logits/rejected": 1.5458331108093262, "logps/chosen": -86.08538818359375, "logps/ref_chosen": -86.06916809082031, "logps/ref_rejected": -116.66394805908203, "logps/rejected": -116.63937377929688, "loss": 1.3908, "margin_dpo/margin_mean": -0.04079073667526245, "margin_dpo/margin_std": 0.4178071618080139, "step": 31 }, { "epoch": 0.04837490551776266, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.04694744944572449, "fcm_dpo/q_t": 0.4988267421722412, "grad_norm": 18.534250259399414, "learning_rate": 2.3134328358208954e-07, "logits/chosen": 1.7644070386886597, "logits/rejected": 1.810452938079834, "logps/chosen": -87.56648254394531, "logps/ref_chosen": -87.59808349609375, "logps/ref_rejected": -100.26905822753906, "logps/rejected": -100.28439331054688, "loss": 1.3821, "margin_dpo/margin_mean": 0.04694738984107971, "margin_dpo/margin_std": 0.4298707842826843, "step": 32 }, { "epoch": 0.049886621315192746, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.006792932748794556, "fcm_dpo/q_t": 0.4998300075531006, "grad_norm": 19.715877532958984, "learning_rate": 2.388059701492537e-07, "logits/chosen": 1.1110432147979736, "logits/rejected": 1.0335161685943604, "logps/chosen": -83.32142639160156, "logps/ref_chosen": -83.29850769042969, "logps/ref_rejected": -94.60990142822266, "logps/rejected": -94.63961791992188, "loss": 1.386, "margin_dpo/margin_mean": 0.006792932748794556, "margin_dpo/margin_std": 0.4145265221595764, "step": 33 }, { "epoch": 0.05139833711262283, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.016542896628379822, "fcm_dpo/q_t": 0.5004134178161621, "grad_norm": 18.04196548461914, "learning_rate": 2.4626865671641786e-07, "logits/chosen": 2.0257890224456787, "logits/rejected": 1.9314777851104736, "logps/chosen": -70.14801788330078, "logps/ref_chosen": -70.15069580078125, "logps/ref_rejected": -84.4693832397461, "logps/rejected": -84.45014953613281, "loss": 1.3883, "margin_dpo/margin_mean": -0.016543224453926086, "margin_dpo/margin_std": 0.35318687558174133, "step": 34 }, { "epoch": 0.05291005291005291, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.14254692196846008, "fcm_dpo/q_t": 0.4964386820793152, "grad_norm": 17.937305450439453, "learning_rate": 2.537313432835821e-07, "logits/chosen": 1.6588869094848633, "logits/rejected": 1.600965142250061, "logps/chosen": -78.1865005493164, "logps/ref_chosen": -78.25238037109375, "logps/ref_rejected": -91.06356811523438, "logps/rejected": -91.14024353027344, "loss": 1.3725, "margin_dpo/margin_mean": 0.14254716038703918, "margin_dpo/margin_std": 0.4224141240119934, "step": 35 }, { "epoch": 0.05442176870748299, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.004450559616088867, "fcm_dpo/q_t": 0.5001120567321777, "grad_norm": 18.370346069335938, "learning_rate": 2.611940298507462e-07, "logits/chosen": 1.8511872291564941, "logits/rejected": 1.7305129766464233, "logps/chosen": -67.08213806152344, "logps/ref_chosen": -67.06676483154297, "logps/ref_rejected": -99.34661865234375, "logps/rejected": -99.35753631591797, "loss": 1.387, "margin_dpo/margin_mean": -0.004450619220733643, "margin_dpo/margin_std": 0.3298412561416626, "step": 36 }, { "epoch": 0.055933484504913075, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.03487496078014374, "fcm_dpo/q_t": 0.500869870185852, "grad_norm": 23.71449851989746, "learning_rate": 2.686567164179104e-07, "logits/chosen": 1.983677864074707, "logits/rejected": 1.6817830801010132, "logps/chosen": -75.88996124267578, "logps/ref_chosen": -75.9269790649414, "logps/ref_rejected": -130.34371948242188, "logps/rejected": -130.27182006835938, "loss": 1.3903, "margin_dpo/margin_mean": -0.03487536311149597, "margin_dpo/margin_std": 0.440315306186676, "step": 37 }, { "epoch": 0.05744520030234316, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.008413180708885193, "fcm_dpo/q_t": 0.5002103447914124, "grad_norm": 18.42804718017578, "learning_rate": 2.761194029850746e-07, "logits/chosen": 1.5153725147247314, "logits/rejected": 1.4806842803955078, "logps/chosen": -83.677978515625, "logps/ref_chosen": -83.65460205078125, "logps/ref_rejected": -89.15221405029297, "logps/rejected": -89.16717529296875, "loss": 1.3877, "margin_dpo/margin_mean": -0.008413195610046387, "margin_dpo/margin_std": 0.4714970588684082, "step": 38 }, { "epoch": 0.05895691609977324, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.06950554251670837, "fcm_dpo/q_t": 0.4982631206512451, "grad_norm": 19.082138061523438, "learning_rate": 2.8358208955223876e-07, "logits/chosen": 1.8353855609893799, "logits/rejected": 1.793013572692871, "logps/chosen": -76.090087890625, "logps/ref_chosen": -76.18706512451172, "logps/ref_rejected": -94.39262390136719, "logps/rejected": -94.36514282226562, "loss": 1.3797, "margin_dpo/margin_mean": 0.06950537860393524, "margin_dpo/margin_std": 0.35339751839637756, "step": 39 }, { "epoch": 0.06046863189720333, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.007292389869689941, "fcm_dpo/q_t": 0.5001822710037231, "grad_norm": 18.086139678955078, "learning_rate": 2.9104477611940296e-07, "logits/chosen": 1.9194378852844238, "logits/rejected": 1.8017828464508057, "logps/chosen": -77.4515609741211, "logps/ref_chosen": -77.43476867675781, "logps/ref_rejected": -98.58720397949219, "logps/rejected": -98.5967025756836, "loss": 1.3874, "margin_dpo/margin_mean": -0.00729215145111084, "margin_dpo/margin_std": 0.35634469985961914, "step": 40 }, { "epoch": 0.06198034769463341, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.09080618619918823, "fcm_dpo/q_t": 0.4977305829524994, "grad_norm": 18.268693923950195, "learning_rate": 2.985074626865671e-07, "logits/chosen": 1.5746355056762695, "logits/rejected": 1.5183868408203125, "logps/chosen": -86.79508972167969, "logps/ref_chosen": -86.87640380859375, "logps/ref_rejected": -101.0856704711914, "logps/rejected": -101.09515380859375, "loss": 1.3777, "margin_dpo/margin_mean": 0.09080681204795837, "margin_dpo/margin_std": 0.4117854833602905, "step": 41 }, { "epoch": 0.06349206349206349, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.038176313042640686, "fcm_dpo/q_t": 0.49904680252075195, "grad_norm": 18.118419647216797, "learning_rate": 3.059701492537313e-07, "logits/chosen": 1.7109103202819824, "logits/rejected": 1.6621713638305664, "logps/chosen": -79.33087158203125, "logps/ref_chosen": -79.35625457763672, "logps/ref_rejected": -91.54881286621094, "logps/rejected": -91.56159973144531, "loss": 1.3829, "margin_dpo/margin_mean": 0.03817671537399292, "margin_dpo/margin_std": 0.4199693202972412, "step": 42 }, { "epoch": 0.06500377928949358, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.049236446619033813, "fcm_dpo/q_t": 0.5012304782867432, "grad_norm": 19.544836044311523, "learning_rate": 3.134328358208955e-07, "logits/chosen": 1.7505764961242676, "logits/rejected": 1.6548776626586914, "logps/chosen": -90.8389892578125, "logps/ref_chosen": -90.81220245361328, "logps/ref_rejected": -94.16316986083984, "logps/rejected": -94.1407241821289, "loss": 1.3918, "margin_dpo/margin_mean": -0.0492367148399353, "margin_dpo/margin_std": 0.46574994921684265, "step": 43 }, { "epoch": 0.06651549508692366, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.14597035944461823, "fcm_dpo/q_t": 0.4963557720184326, "grad_norm": 18.76498794555664, "learning_rate": 3.2089552238805965e-07, "logits/chosen": 1.420806884765625, "logits/rejected": 1.3459522724151611, "logps/chosen": -88.20413208007812, "logps/ref_chosen": -88.27932739257812, "logps/ref_rejected": -101.14324951171875, "logps/rejected": -101.21401977539062, "loss": 1.3724, "margin_dpo/margin_mean": 0.14597000181674957, "margin_dpo/margin_std": 0.48067185282707214, "step": 44 }, { "epoch": 0.06802721088435375, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.08005937933921814, "fcm_dpo/q_t": 0.49800071120262146, "grad_norm": 19.359880447387695, "learning_rate": 3.2835820895522385e-07, "logits/chosen": 1.6777125597000122, "logits/rejected": 1.5594508647918701, "logps/chosen": -78.37330627441406, "logps/ref_chosen": -78.40264892578125, "logps/ref_rejected": -109.39339447021484, "logps/rejected": -109.4441146850586, "loss": 1.3788, "margin_dpo/margin_mean": 0.08005967736244202, "margin_dpo/margin_std": 0.43401455879211426, "step": 45 }, { "epoch": 0.06953892668178382, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.09679755568504333, "fcm_dpo/q_t": 0.497580885887146, "grad_norm": 18.390920639038086, "learning_rate": 3.3582089552238805e-07, "logits/chosen": 1.4970589876174927, "logits/rejected": 1.38013756275177, "logps/chosen": -77.99214172363281, "logps/ref_chosen": -78.08491516113281, "logps/ref_rejected": -97.42544555664062, "logps/rejected": -97.42948150634766, "loss": 1.3769, "margin_dpo/margin_mean": 0.09679737687110901, "margin_dpo/margin_std": 0.3261662721633911, "step": 46 }, { "epoch": 0.0710506424792139, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.010644227266311646, "fcm_dpo/q_t": 0.499734491109848, "grad_norm": 19.139677047729492, "learning_rate": 3.432835820895522e-07, "logits/chosen": 1.5834190845489502, "logits/rejected": 1.5048835277557373, "logps/chosen": -70.7773208618164, "logps/ref_chosen": -70.78988647460938, "logps/ref_rejected": -91.17266845703125, "logps/rejected": -91.17074584960938, "loss": 1.3856, "margin_dpo/margin_mean": 0.010644763708114624, "margin_dpo/margin_std": 0.3661983013153076, "step": 47 }, { "epoch": 0.07256235827664399, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.08006027340888977, "fcm_dpo/q_t": 0.4979988932609558, "grad_norm": 17.098857879638672, "learning_rate": 3.507462686567164e-07, "logits/chosen": 1.9507906436920166, "logits/rejected": 1.8869541883468628, "logps/chosen": -66.59869384765625, "logps/ref_chosen": -66.67327880859375, "logps/ref_rejected": -79.28543853759766, "logps/rejected": -79.29090881347656, "loss": 1.3786, "margin_dpo/margin_mean": 0.08006066083908081, "margin_dpo/margin_std": 0.3583065867424011, "step": 48 }, { "epoch": 0.07407407407407407, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.04423801600933075, "fcm_dpo/q_t": 0.49889448285102844, "grad_norm": 17.491674423217773, "learning_rate": 3.5820895522388055e-07, "logits/chosen": 1.35086190700531, "logits/rejected": 1.3094708919525146, "logps/chosen": -75.12703704833984, "logps/ref_chosen": -75.17504119873047, "logps/ref_rejected": -80.5369873046875, "logps/rejected": -80.5332260131836, "loss": 1.3822, "margin_dpo/margin_mean": 0.04423774778842926, "margin_dpo/margin_std": 0.36896711587905884, "step": 49 }, { "epoch": 0.07558578987150416, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.03308814764022827, "fcm_dpo/q_t": 0.5008265972137451, "grad_norm": 17.986473083496094, "learning_rate": 3.6567164179104475e-07, "logits/chosen": 1.7103403806686401, "logits/rejected": 1.6472971439361572, "logps/chosen": -71.20738220214844, "logps/ref_chosen": -71.2314224243164, "logps/ref_rejected": -87.59088134765625, "logps/rejected": -87.53375244140625, "loss": 1.3899, "margin_dpo/margin_mean": -0.03308817744255066, "margin_dpo/margin_std": 0.33914172649383545, "step": 50 }, { "epoch": 0.07709750566893424, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.07528911530971527, "fcm_dpo/q_t": 0.5018813610076904, "grad_norm": 18.881925582885742, "learning_rate": 3.7313432835820895e-07, "logits/chosen": 1.7805354595184326, "logits/rejected": 1.7294878959655762, "logps/chosen": -78.7321548461914, "logps/ref_chosen": -78.69171142578125, "logps/ref_rejected": -100.78950500488281, "logps/rejected": -100.75465393066406, "loss": 1.3941, "margin_dpo/margin_mean": -0.07528868317604065, "margin_dpo/margin_std": 0.34039679169654846, "step": 51 }, { "epoch": 0.07860922146636433, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.036653727293014526, "fcm_dpo/q_t": 0.4990845322608948, "grad_norm": 20.350269317626953, "learning_rate": 3.805970149253731e-07, "logits/chosen": 1.852992296218872, "logits/rejected": 1.678769588470459, "logps/chosen": -89.09419250488281, "logps/ref_chosen": -89.09419250488281, "logps/ref_rejected": -116.87469482421875, "logps/rejected": -116.91134643554688, "loss": 1.3831, "margin_dpo/margin_mean": 0.03665390610694885, "margin_dpo/margin_std": 0.44389206171035767, "step": 52 }, { "epoch": 0.0801209372637944, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.07223968207836151, "fcm_dpo/q_t": 0.498193621635437, "grad_norm": 17.331947326660156, "learning_rate": 3.880597014925373e-07, "logits/chosen": 1.4440419673919678, "logits/rejected": 1.405485987663269, "logps/chosen": -74.11995697021484, "logps/ref_chosen": -74.21418762207031, "logps/ref_rejected": -75.71168518066406, "logps/rejected": -75.68968200683594, "loss": 1.3796, "margin_dpo/margin_mean": 0.07224002480506897, "margin_dpo/margin_std": 0.4341847598552704, "step": 53 }, { "epoch": 0.08163265306122448, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.0331706702709198, "fcm_dpo/q_t": 0.49917101860046387, "grad_norm": 16.379093170166016, "learning_rate": 3.9552238805970144e-07, "logits/chosen": 1.6438074111938477, "logits/rejected": 1.6267582178115845, "logps/chosen": -65.57046508789062, "logps/ref_chosen": -65.63475799560547, "logps/ref_rejected": -76.4462890625, "logps/rejected": -76.41516876220703, "loss": 1.3834, "margin_dpo/margin_mean": 0.03317078948020935, "margin_dpo/margin_std": 0.4223175644874573, "step": 54 }, { "epoch": 0.08314436885865457, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.19592681527137756, "fcm_dpo/q_t": 0.49510544538497925, "grad_norm": 19.32520294189453, "learning_rate": 4.0298507462686564e-07, "logits/chosen": 1.6696865558624268, "logits/rejected": 1.4560625553131104, "logps/chosen": -68.66702270507812, "logps/ref_chosen": -68.7640380859375, "logps/ref_rejected": -108.80074310302734, "logps/rejected": -108.899658203125, "loss": 1.3673, "margin_dpo/margin_mean": 0.1959269940853119, "margin_dpo/margin_std": 0.4270592927932739, "step": 55 }, { "epoch": 0.08465608465608465, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.09722763299942017, "fcm_dpo/q_t": 0.49757248163223267, "grad_norm": 16.941253662109375, "learning_rate": 4.1044776119402984e-07, "logits/chosen": 1.7606887817382812, "logits/rejected": 1.7339457273483276, "logps/chosen": -74.73141479492188, "logps/ref_chosen": -74.7939453125, "logps/ref_rejected": -81.83535766601562, "logps/rejected": -81.87005615234375, "loss": 1.3771, "margin_dpo/margin_mean": 0.09722745418548584, "margin_dpo/margin_std": 0.46504199504852295, "step": 56 }, { "epoch": 0.08616780045351474, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.18711082637310028, "fcm_dpo/q_t": 0.49532490968704224, "grad_norm": 18.977163314819336, "learning_rate": 4.17910447761194e-07, "logits/chosen": 1.6965608596801758, "logits/rejected": 1.528353214263916, "logps/chosen": -74.48811340332031, "logps/ref_chosen": -74.5794677734375, "logps/ref_rejected": -105.61981964111328, "logps/rejected": -105.715576171875, "loss": 1.3684, "margin_dpo/margin_mean": 0.18711179494857788, "margin_dpo/margin_std": 0.5311607122421265, "step": 57 }, { "epoch": 0.08767951625094482, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.040792107582092285, "fcm_dpo/q_t": 0.49898386001586914, "grad_norm": 19.24970054626465, "learning_rate": 4.253731343283582e-07, "logits/chosen": 1.5790772438049316, "logits/rejected": 1.50129234790802, "logps/chosen": -92.24163055419922, "logps/ref_chosen": -92.24464416503906, "logps/ref_rejected": -103.18975830078125, "logps/rejected": -103.22753143310547, "loss": 1.383, "margin_dpo/margin_mean": 0.040792256593704224, "margin_dpo/margin_std": 0.5367269515991211, "step": 58 }, { "epoch": 0.08919123204837491, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.12307733297348022, "fcm_dpo/q_t": 0.4969256520271301, "grad_norm": 16.87409782409668, "learning_rate": 4.3283582089552234e-07, "logits/chosen": 1.8218092918395996, "logits/rejected": 1.5633070468902588, "logps/chosen": -67.01643371582031, "logps/ref_chosen": -67.12688446044922, "logps/ref_rejected": -91.69569396972656, "logps/rejected": -91.70832824707031, "loss": 1.3747, "margin_dpo/margin_mean": 0.12307757139205933, "margin_dpo/margin_std": 0.4987383782863617, "step": 59 }, { "epoch": 0.09070294784580499, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.026643604040145874, "fcm_dpo/q_t": 0.4993351995944977, "grad_norm": 18.580293655395508, "learning_rate": 4.4029850746268654e-07, "logits/chosen": 1.6838085651397705, "logits/rejected": 1.705696940422058, "logps/chosen": -79.6468505859375, "logps/ref_chosen": -79.74327087402344, "logps/ref_rejected": -77.89244079589844, "logps/rejected": -77.82266235351562, "loss": 1.3845, "margin_dpo/margin_mean": 0.02664312720298767, "margin_dpo/margin_std": 0.5903670787811279, "step": 60 }, { "epoch": 0.09221466364323508, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.057510122656822205, "fcm_dpo/q_t": 0.49856314063072205, "grad_norm": 16.548521041870117, "learning_rate": 4.4776119402985074e-07, "logits/chosen": 1.6789517402648926, "logits/rejected": 1.6414930820465088, "logps/chosen": -65.96928405761719, "logps/ref_chosen": -66.08685302734375, "logps/ref_rejected": -88.1458740234375, "logps/rejected": -88.0858154296875, "loss": 1.3813, "margin_dpo/margin_mean": 0.05751065909862518, "margin_dpo/margin_std": 0.5422056913375854, "step": 61 }, { "epoch": 0.09372637944066516, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.08958558738231659, "fcm_dpo/q_t": 0.49775969982147217, "grad_norm": 18.009490966796875, "learning_rate": 4.552238805970149e-07, "logits/chosen": 2.040876865386963, "logits/rejected": 1.9636160135269165, "logps/chosen": -80.92247009277344, "logps/ref_chosen": -81.0108871459961, "logps/ref_rejected": -95.50444793701172, "logps/rejected": -95.50562286376953, "loss": 1.378, "margin_dpo/margin_mean": 0.08958582580089569, "margin_dpo/margin_std": 0.5040308833122253, "step": 62 }, { "epoch": 0.09523809523809523, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.16455836594104767, "fcm_dpo/q_t": 0.49589046835899353, "grad_norm": 19.401832580566406, "learning_rate": 4.626865671641791e-07, "logits/chosen": 2.1442782878875732, "logits/rejected": 2.062937021255493, "logps/chosen": -78.39234924316406, "logps/ref_chosen": -78.57593536376953, "logps/ref_rejected": -99.71000671386719, "logps/rejected": -99.69097900390625, "loss": 1.3706, "margin_dpo/margin_mean": 0.16455818712711334, "margin_dpo/margin_std": 0.5183212757110596, "step": 63 }, { "epoch": 0.09674981103552532, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.019572198390960693, "fcm_dpo/q_t": 0.4995095133781433, "grad_norm": 16.533632278442383, "learning_rate": 4.701492537313433e-07, "logits/chosen": 1.6818785667419434, "logits/rejected": 1.6190211772918701, "logps/chosen": -69.181396484375, "logps/ref_chosen": -69.24063110351562, "logps/ref_rejected": -84.14842987060547, "logps/rejected": -84.1087646484375, "loss": 1.3849, "margin_dpo/margin_mean": 0.019572317600250244, "margin_dpo/margin_std": 0.45630577206611633, "step": 64 }, { "epoch": 0.0982615268329554, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.09427054226398468, "fcm_dpo/q_t": 0.4976460933685303, "grad_norm": 18.83917236328125, "learning_rate": 4.776119402985074e-07, "logits/chosen": 1.6213884353637695, "logits/rejected": 1.5737123489379883, "logps/chosen": -83.97723388671875, "logps/ref_chosen": -84.0351333618164, "logps/ref_rejected": -96.42926788330078, "logps/rejected": -96.46564483642578, "loss": 1.3777, "margin_dpo/margin_mean": 0.09427036345005035, "margin_dpo/margin_std": 0.5586059093475342, "step": 65 }, { "epoch": 0.09977324263038549, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.17929774522781372, "fcm_dpo/q_t": 0.49552321434020996, "grad_norm": 18.57543182373047, "learning_rate": 4.850746268656717e-07, "logits/chosen": 1.5354599952697754, "logits/rejected": 1.4523582458496094, "logps/chosen": -87.74684143066406, "logps/ref_chosen": -87.79238891601562, "logps/ref_rejected": -95.26547241210938, "logps/rejected": -95.39921569824219, "loss": 1.3693, "margin_dpo/margin_mean": 0.17929738759994507, "margin_dpo/margin_std": 0.5706717371940613, "step": 66 }, { "epoch": 0.10128495842781557, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.16508522629737854, "fcm_dpo/q_t": 0.4958776831626892, "grad_norm": 19.095687866210938, "learning_rate": 4.925373134328357e-07, "logits/chosen": 1.5590996742248535, "logits/rejected": 1.4503483772277832, "logps/chosen": -77.86262512207031, "logps/ref_chosen": -78.00114440917969, "logps/ref_rejected": -96.03421020507812, "logps/rejected": -96.06077575683594, "loss": 1.3704, "margin_dpo/margin_mean": 0.16508588194847107, "margin_dpo/margin_std": 0.4702576994895935, "step": 67 }, { "epoch": 0.10279667422524566, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.09334829449653625, "fcm_dpo/q_t": 0.4976690411567688, "grad_norm": 19.808475494384766, "learning_rate": 5e-07, "logits/chosen": 1.5709608793258667, "logits/rejected": 1.4888055324554443, "logps/chosen": -96.04895782470703, "logps/ref_chosen": -96.04267883300781, "logps/ref_rejected": -110.91169738769531, "logps/rejected": -111.01132202148438, "loss": 1.3779, "margin_dpo/margin_mean": 0.09334835410118103, "margin_dpo/margin_std": 0.6071600914001465, "step": 68 }, { "epoch": 0.10430839002267574, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.22511988878250122, "fcm_dpo/q_t": 0.4943769872188568, "grad_norm": 19.941877365112305, "learning_rate": 4.999965034812934e-07, "logits/chosen": 1.3999309539794922, "logits/rejected": 1.2983310222625732, "logps/chosen": -84.92649841308594, "logps/ref_chosen": -85.11124420166016, "logps/ref_rejected": -107.57357025146484, "logps/rejected": -107.61393737792969, "loss": 1.3647, "margin_dpo/margin_mean": 0.2251199185848236, "margin_dpo/margin_std": 0.542759895324707, "step": 69 }, { "epoch": 0.10582010582010581, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.1886361539363861, "fcm_dpo/q_t": 0.4952925741672516, "grad_norm": 18.396041870117188, "learning_rate": 4.999860140229787e-07, "logits/chosen": 1.8381619453430176, "logits/rejected": 1.7834123373031616, "logps/chosen": -81.65312194824219, "logps/ref_chosen": -81.87960815429688, "logps/ref_rejected": -92.63243103027344, "logps/rejected": -92.59457397460938, "loss": 1.3687, "margin_dpo/margin_mean": 0.1886359453201294, "margin_dpo/margin_std": 0.6872633695602417, "step": 70 }, { "epoch": 0.1073318216175359, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.1606692373752594, "fcm_dpo/q_t": 0.49598369002342224, "grad_norm": 17.651378631591797, "learning_rate": 4.999685319184688e-07, "logits/chosen": 1.5981061458587646, "logits/rejected": 1.5926434993743896, "logps/chosen": -79.60566711425781, "logps/ref_chosen": -79.74766540527344, "logps/ref_rejected": -83.39110565185547, "logps/rejected": -83.4097671508789, "loss": 1.3712, "margin_dpo/margin_mean": 0.16066959500312805, "margin_dpo/margin_std": 0.60643470287323, "step": 71 }, { "epoch": 0.10884353741496598, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.23458097875118256, "fcm_dpo/q_t": 0.4941413402557373, "grad_norm": 19.199542999267578, "learning_rate": 4.999440576567755e-07, "logits/chosen": 1.6563818454742432, "logits/rejected": 1.4758176803588867, "logps/chosen": -72.76530456542969, "logps/ref_chosen": -73.04458618164062, "logps/ref_rejected": -92.64720153808594, "logps/rejected": -92.60250854492188, "loss": 1.3639, "margin_dpo/margin_mean": 0.23458027839660645, "margin_dpo/margin_std": 0.6084821820259094, "step": 72 }, { "epoch": 0.11035525321239607, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.018752366304397583, "fcm_dpo/q_t": 0.49953436851501465, "grad_norm": 19.4569091796875, "learning_rate": 4.999125919224965e-07, "logits/chosen": 1.4742746353149414, "logits/rejected": 1.4132012128829956, "logps/chosen": -87.65899658203125, "logps/ref_chosen": -87.71681213378906, "logps/ref_rejected": -96.93572998046875, "logps/rejected": -96.89665985107422, "loss": 1.3858, "margin_dpo/margin_mean": 0.018752455711364746, "margin_dpo/margin_std": 0.7262225151062012, "step": 73 }, { "epoch": 0.11186696900982615, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.27362507581710815, "fcm_dpo/q_t": 0.4931687116622925, "grad_norm": 18.025976181030273, "learning_rate": 4.998741355957963e-07, "logits/chosen": 1.7035603523254395, "logits/rejected": 1.5352582931518555, "logps/chosen": -66.72885131835938, "logps/ref_chosen": -67.07321166992188, "logps/ref_rejected": -96.5340347290039, "logps/rejected": -96.46330261230469, "loss": 1.3603, "margin_dpo/margin_mean": 0.27362462878227234, "margin_dpo/margin_std": 0.6678668260574341, "step": 74 }, { "epoch": 0.11337868480725624, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.263182669878006, "fcm_dpo/q_t": 0.4934360384941101, "grad_norm": 16.99013900756836, "learning_rate": 4.998286897523808e-07, "logits/chosen": 1.5969147682189941, "logits/rejected": 1.4522219896316528, "logps/chosen": -61.555511474609375, "logps/ref_chosen": -61.80186462402344, "logps/ref_rejected": -82.37368774414062, "logps/rejected": -82.39051818847656, "loss": 1.3616, "margin_dpo/margin_mean": 0.26318252086639404, "margin_dpo/margin_std": 0.751126766204834, "step": 75 }, { "epoch": 0.11489040060468632, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.21271714568138123, "fcm_dpo/q_t": 0.4946902394294739, "grad_norm": 24.02982521057129, "learning_rate": 4.997762556634679e-07, "logits/chosen": 1.528346300125122, "logits/rejected": 1.4039630889892578, "logps/chosen": -69.59974670410156, "logps/ref_chosen": -69.92233276367188, "logps/ref_rejected": -97.08378601074219, "logps/rejected": -96.97392272949219, "loss": 1.3667, "margin_dpo/margin_mean": 0.21271675825119019, "margin_dpo/margin_std": 0.7994598150253296, "step": 76 }, { "epoch": 0.1164021164021164, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.4026382267475128, "fcm_dpo/q_t": 0.4899574816226959, "grad_norm": 18.348121643066406, "learning_rate": 4.99716834795752e-07, "logits/chosen": 1.432910680770874, "logits/rejected": 1.342555284500122, "logps/chosen": -70.83218383789062, "logps/ref_chosen": -71.206298828125, "logps/ref_rejected": -95.22071075439453, "logps/rejected": -95.24923706054688, "loss": 1.348, "margin_dpo/margin_mean": 0.40263840556144714, "margin_dpo/margin_std": 0.729675829410553, "step": 77 }, { "epoch": 0.11791383219954649, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.37810221314430237, "fcm_dpo/q_t": 0.4905719757080078, "grad_norm": 17.756649017333984, "learning_rate": 4.996504288113623e-07, "logits/chosen": 1.6024014949798584, "logits/rejected": 1.5885231494903564, "logps/chosen": -83.94834899902344, "logps/ref_chosen": -84.40055847167969, "logps/ref_rejected": -95.41949462890625, "logps/rejected": -95.34538269042969, "loss": 1.3506, "margin_dpo/margin_mean": 0.37810176610946655, "margin_dpo/margin_std": 0.819503903388977, "step": 78 }, { "epoch": 0.11942554799697656, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.39458632469177246, "fcm_dpo/q_t": 0.4901793599128723, "grad_norm": 19.339645385742188, "learning_rate": 4.995770395678171e-07, "logits/chosen": 1.9015599489212036, "logits/rejected": 1.694523572921753, "logps/chosen": -65.59817504882812, "logps/ref_chosen": -65.93923950195312, "logps/ref_rejected": -102.92240905761719, "logps/rejected": -102.97592163085938, "loss": 1.3497, "margin_dpo/margin_mean": 0.39458605647087097, "margin_dpo/margin_std": 0.9920768737792969, "step": 79 }, { "epoch": 0.12093726379440665, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.31022706627845764, "fcm_dpo/q_t": 0.4922669529914856, "grad_norm": 17.57803726196289, "learning_rate": 4.994966691179711e-07, "logits/chosen": 1.6756740808486938, "logits/rejected": 1.4945653676986694, "logps/chosen": -78.33760070800781, "logps/ref_chosen": -78.61624908447266, "logps/ref_rejected": -99.9122314453125, "logps/rejected": -99.94380950927734, "loss": 1.3576, "margin_dpo/margin_mean": 0.3102267384529114, "margin_dpo/margin_std": 0.9209951758384705, "step": 80 }, { "epoch": 0.12244897959183673, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.4144776165485382, "fcm_dpo/q_t": 0.48966920375823975, "grad_norm": 17.837419509887695, "learning_rate": 4.994093197099587e-07, "logits/chosen": 1.5351800918579102, "logits/rejected": 1.438685655593872, "logps/chosen": -79.15130615234375, "logps/ref_chosen": -79.49641418457031, "logps/ref_rejected": -94.52413940429688, "logps/rejected": -94.593505859375, "loss": 1.3474, "margin_dpo/margin_mean": 0.4144783914089203, "margin_dpo/margin_std": 0.8849209547042847, "step": 81 }, { "epoch": 0.12396069538926682, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.5314297676086426, "fcm_dpo/q_t": 0.486750990152359, "grad_norm": 17.93260383605957, "learning_rate": 4.993149937871306e-07, "logits/chosen": 2.1229565143585205, "logits/rejected": 1.9273746013641357, "logps/chosen": -64.33843231201172, "logps/ref_chosen": -64.97168731689453, "logps/ref_rejected": -86.69085693359375, "logps/rejected": -86.58903503417969, "loss": 1.3359, "margin_dpo/margin_mean": 0.531429648399353, "margin_dpo/margin_std": 0.8953331112861633, "step": 82 }, { "epoch": 0.1254724111866969, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.4476925730705261, "fcm_dpo/q_t": 0.4888409674167633, "grad_norm": 18.511592864990234, "learning_rate": 4.992136939879856e-07, "logits/chosen": 1.7243876457214355, "logits/rejected": 1.5867960453033447, "logps/chosen": -72.40562438964844, "logps/ref_chosen": -72.92498779296875, "logps/ref_rejected": -92.27165222167969, "logps/rejected": -92.19998168945312, "loss": 1.3441, "margin_dpo/margin_mean": 0.44769296050071716, "margin_dpo/margin_std": 0.8938767910003662, "step": 83 }, { "epoch": 0.12698412698412698, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.45546847581863403, "fcm_dpo/q_t": 0.48863863945007324, "grad_norm": 19.406330108642578, "learning_rate": 4.991054231460969e-07, "logits/chosen": 1.9599827527999878, "logits/rejected": 1.7785536050796509, "logps/chosen": -81.36518859863281, "logps/ref_chosen": -81.79109191894531, "logps/ref_rejected": -99.20896911621094, "logps/rejected": -99.23854064941406, "loss": 1.344, "margin_dpo/margin_mean": 0.45546823740005493, "margin_dpo/margin_std": 1.0371967554092407, "step": 84 }, { "epoch": 0.12849584278155707, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.48611921072006226, "fcm_dpo/q_t": 0.4878706634044647, "grad_norm": 17.551944732666016, "learning_rate": 4.989901842900325e-07, "logits/chosen": 1.5689201354980469, "logits/rejected": 1.4499050378799438, "logps/chosen": -67.32066345214844, "logps/ref_chosen": -67.94147491455078, "logps/ref_rejected": -85.76875305175781, "logps/rejected": -85.63406372070312, "loss": 1.3406, "margin_dpo/margin_mean": 0.4861195683479309, "margin_dpo/margin_std": 0.9498151540756226, "step": 85 }, { "epoch": 0.13000755857898716, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.3929840326309204, "fcm_dpo/q_t": 0.4902006983757019, "grad_norm": 17.350364685058594, "learning_rate": 4.988679806432711e-07, "logits/chosen": 1.9112555980682373, "logits/rejected": 1.8482015132904053, "logps/chosen": -78.8712158203125, "logps/ref_chosen": -79.21485900878906, "logps/ref_rejected": -88.69877624511719, "logps/rejected": -88.74812316894531, "loss": 1.3492, "margin_dpo/margin_mean": 0.392984002828598, "margin_dpo/margin_std": 0.840862512588501, "step": 86 }, { "epoch": 0.13151927437641722, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.7411639094352722, "fcm_dpo/q_t": 0.4815508723258972, "grad_norm": 18.811569213867188, "learning_rate": 4.987388156241114e-07, "logits/chosen": 1.541797161102295, "logits/rejected": 1.3224825859069824, "logps/chosen": -83.91616821289062, "logps/ref_chosen": -84.45362854003906, "logps/ref_rejected": -103.43824005126953, "logps/rejected": -103.6419448852539, "loss": 1.3171, "margin_dpo/margin_mean": 0.7411632537841797, "margin_dpo/margin_std": 1.1999635696411133, "step": 87 }, { "epoch": 0.1330309901738473, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.36682209372520447, "fcm_dpo/q_t": 0.4908638894557953, "grad_norm": 18.169139862060547, "learning_rate": 4.986026928455767e-07, "logits/chosen": 1.8407284021377563, "logits/rejected": 1.814268708229065, "logps/chosen": -80.88067626953125, "logps/ref_chosen": -81.27230834960938, "logps/ref_rejected": -89.51646423339844, "logps/rejected": -89.49165344238281, "loss": 1.3531, "margin_dpo/margin_mean": 0.3668217658996582, "margin_dpo/margin_std": 1.042789101600647, "step": 88 }, { "epoch": 0.1345427059712774, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.7424743175506592, "fcm_dpo/q_t": 0.4815485179424286, "grad_norm": 18.281597137451172, "learning_rate": 4.984596161153135e-07, "logits/chosen": 2.108861207962036, "logits/rejected": 1.8158016204833984, "logps/chosen": -57.46028137207031, "logps/ref_chosen": -58.142333984375, "logps/ref_rejected": -102.53756713867188, "logps/rejected": -102.59799194335938, "loss": 1.3176, "margin_dpo/margin_mean": 0.7424756288528442, "margin_dpo/margin_std": 1.2756874561309814, "step": 89 }, { "epoch": 0.1360544217687075, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.5880427360534668, "fcm_dpo/q_t": 0.4853513538837433, "grad_norm": 19.718008041381836, "learning_rate": 4.983095894354857e-07, "logits/chosen": 1.734527587890625, "logits/rejected": 1.509333610534668, "logps/chosen": -74.75138854980469, "logps/ref_chosen": -75.26505279541016, "logps/ref_rejected": -104.32842254638672, "logps/rejected": -104.40279388427734, "loss": 1.3319, "margin_dpo/margin_mean": 0.588042676448822, "margin_dpo/margin_std": 1.1916567087173462, "step": 90 }, { "epoch": 0.13756613756613756, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.5270799398422241, "fcm_dpo/q_t": 0.4869080185890198, "grad_norm": 19.484155654907227, "learning_rate": 4.98152617002662e-07, "logits/chosen": 2.009382486343384, "logits/rejected": 1.8327490091323853, "logps/chosen": -68.78492736816406, "logps/ref_chosen": -69.33901977539062, "logps/ref_rejected": -90.31411743164062, "logps/rejected": -90.28709411621094, "loss": 1.3388, "margin_dpo/margin_mean": 0.5270801782608032, "margin_dpo/margin_std": 1.3382683992385864, "step": 91 }, { "epoch": 0.13907785336356765, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.6017959713935852, "fcm_dpo/q_t": 0.4850356876850128, "grad_norm": 18.92400360107422, "learning_rate": 4.979887032076988e-07, "logits/chosen": 1.8455489873886108, "logits/rejected": 1.6811567544937134, "logps/chosen": -71.865478515625, "logps/ref_chosen": -72.4566650390625, "logps/ref_rejected": -91.6706771850586, "logps/rejected": -91.68128204345703, "loss": 1.3306, "margin_dpo/margin_mean": 0.6017957925796509, "margin_dpo/margin_std": 1.1899182796478271, "step": 92 }, { "epoch": 0.14058956916099774, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.4494805932044983, "fcm_dpo/q_t": 0.48889386653900146, "grad_norm": 16.139991760253906, "learning_rate": 4.978178526356172e-07, "logits/chosen": 1.5873305797576904, "logits/rejected": 1.5008435249328613, "logps/chosen": -63.4215202331543, "logps/ref_chosen": -64.08897399902344, "logps/ref_rejected": -75.09095764160156, "logps/rejected": -74.87298583984375, "loss": 1.3479, "margin_dpo/margin_mean": 0.4494805335998535, "margin_dpo/margin_std": 1.543592929840088, "step": 93 }, { "epoch": 0.1421012849584278, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.0324513912200928, "fcm_dpo/q_t": 0.474477618932724, "grad_norm": 20.35590171813965, "learning_rate": 4.976400700654751e-07, "logits/chosen": 1.6213706731796265, "logits/rejected": 1.4608677625656128, "logps/chosen": -78.84420776367188, "logps/ref_chosen": -79.67372131347656, "logps/ref_rejected": -94.64076232910156, "logps/rejected": -94.84370422363281, "loss": 1.2933, "margin_dpo/margin_mean": 1.0324519872665405, "margin_dpo/margin_std": 1.7397394180297852, "step": 94 }, { "epoch": 0.1436130007558579, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.6408168077468872, "fcm_dpo/q_t": 0.4840792715549469, "grad_norm": 18.675636291503906, "learning_rate": 4.974553604702332e-07, "logits/chosen": 1.4493110179901123, "logits/rejected": 1.2806124687194824, "logps/chosen": -78.254638671875, "logps/ref_chosen": -78.65760803222656, "logps/ref_rejected": -109.4048080444336, "logps/rejected": -109.64266204833984, "loss": 1.3286, "margin_dpo/margin_mean": 0.6408175230026245, "margin_dpo/margin_std": 1.4486722946166992, "step": 95 }, { "epoch": 0.14512471655328799, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.812680721282959, "fcm_dpo/q_t": 0.47984737157821655, "grad_norm": 18.943740844726562, "learning_rate": 4.972637290166157e-07, "logits/chosen": 1.8304685354232788, "logits/rejected": 1.6877751350402832, "logps/chosen": -77.20866394042969, "logps/ref_chosen": -77.708251953125, "logps/ref_rejected": -104.36044311523438, "logps/rejected": -104.67352294921875, "loss": 1.3126, "margin_dpo/margin_mean": 0.8126805424690247, "margin_dpo/margin_std": 1.518812656402588, "step": 96 }, { "epoch": 0.14663643235071808, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.3321409225463867, "fcm_dpo/q_t": 0.4917249083518982, "grad_norm": 19.33458709716797, "learning_rate": 4.970651810649666e-07, "logits/chosen": 1.5826592445373535, "logits/rejected": 1.481719732284546, "logps/chosen": -84.24017333984375, "logps/ref_chosen": -84.58917999267578, "logps/ref_rejected": -99.25704956054688, "logps/rejected": -99.24018096923828, "loss": 1.3605, "margin_dpo/margin_mean": 0.3321412205696106, "margin_dpo/margin_std": 1.6863982677459717, "step": 97 }, { "epoch": 0.14814814814814814, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.4240918755531311, "fcm_dpo/q_t": 0.4894029498100281, "grad_norm": 17.551227569580078, "learning_rate": 4.968597221690985e-07, "logits/chosen": 1.6251521110534668, "logits/rejected": 1.5772819519042969, "logps/chosen": -74.0291519165039, "logps/ref_chosen": -74.42477416992188, "logps/ref_rejected": -88.93840026855469, "logps/rejected": -88.96687316894531, "loss": 1.3493, "margin_dpo/margin_mean": 0.42409175634384155, "margin_dpo/margin_std": 1.4047505855560303, "step": 98 }, { "epoch": 0.14965986394557823, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.6838282346725464, "fcm_dpo/q_t": 0.4832128882408142, "grad_norm": 17.991985321044922, "learning_rate": 4.966473580761389e-07, "logits/chosen": 1.8413865566253662, "logits/rejected": 1.7409846782684326, "logps/chosen": -75.00747680664062, "logps/ref_chosen": -75.59742736816406, "logps/ref_rejected": -98.2310791015625, "logps/rejected": -98.324951171875, "loss": 1.3295, "margin_dpo/margin_mean": 0.6838279962539673, "margin_dpo/margin_std": 2.0383212566375732, "step": 99 }, { "epoch": 0.15117157974300832, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.7622036337852478, "fcm_dpo/q_t": 0.4814043343067169, "grad_norm": 19.61545181274414, "learning_rate": 4.964280947263676e-07, "logits/chosen": 1.954576849937439, "logits/rejected": 1.9274578094482422, "logps/chosen": -98.07032775878906, "logps/ref_chosen": -98.55859375, "logps/ref_rejected": -106.01295471191406, "logps/rejected": -106.2868881225586, "loss": 1.325, "margin_dpo/margin_mean": 0.7622038125991821, "margin_dpo/margin_std": 2.1858882904052734, "step": 100 }, { "epoch": 0.15117157974300832, "eval_fcm_dpo/beta": 0.10000000894069672, "eval_logits/chosen": 1.6834615468978882, "eval_logits/rejected": 1.5697993040084839, "eval_logps/chosen": -86.1994857788086, "eval_logps/ref_chosen": -86.90177917480469, "eval_logps/ref_rejected": -96.69639587402344, "eval_logps/rejected": -96.89765167236328, "eval_loss": 0.6541018486022949, "eval_margin_dpo/margin_mean": 0.9035704731941223, "eval_margin_dpo/margin_std": 1.9803118705749512, "eval_runtime": 42.2787, "eval_samples_per_second": 54.472, "eval_steps_per_second": 1.703, "step": 100 }, { "epoch": 0.15268329554043839, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.9653818607330322, "fcm_dpo/q_t": 0.4760565757751465, "grad_norm": 16.312408447265625, "learning_rate": 4.96201938253052e-07, "logits/chosen": 1.4704093933105469, "logits/rejected": 1.4150559902191162, "logps/chosen": -68.62451171875, "logps/ref_chosen": -69.45216369628906, "logps/ref_rejected": -88.0458755493164, "logps/rejected": -88.18360137939453, "loss": 1.3003, "margin_dpo/margin_mean": 0.9653820991516113, "margin_dpo/margin_std": 1.8038573265075684, "step": 101 }, { "epoch": 0.15419501133786848, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.6511964797973633, "fcm_dpo/q_t": 0.48394304513931274, "grad_norm": 17.326839447021484, "learning_rate": 4.959688949822748e-07, "logits/chosen": 1.6613342761993408, "logits/rejected": 1.582979679107666, "logps/chosen": -79.78057861328125, "logps/ref_chosen": -80.35308837890625, "logps/ref_rejected": -90.61380004882812, "logps/rejected": -90.69248962402344, "loss": 1.3318, "margin_dpo/margin_mean": 0.651196300983429, "margin_dpo/margin_std": 1.949533462524414, "step": 102 }, { "epoch": 0.15570672713529857, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.2242300510406494, "fcm_dpo/q_t": 0.4698329269886017, "grad_norm": 17.35326385498047, "learning_rate": 4.957289714327572e-07, "logits/chosen": 1.6548218727111816, "logits/rejected": 1.5961244106292725, "logps/chosen": -78.47612762451172, "logps/ref_chosen": -79.30392456054688, "logps/ref_rejected": -93.745361328125, "logps/rejected": -94.14179992675781, "loss": 1.2785, "margin_dpo/margin_mean": 1.224229097366333, "margin_dpo/margin_std": 2.101567029953003, "step": 103 }, { "epoch": 0.15721844293272866, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.1804628372192383, "fcm_dpo/q_t": 0.4710468351840973, "grad_norm": 18.6791934967041, "learning_rate": 4.954821743156767e-07, "logits/chosen": 1.7360849380493164, "logits/rejected": 1.5076425075531006, "logps/chosen": -73.59494018554688, "logps/ref_chosen": -74.50674438476562, "logps/ref_rejected": -116.09912872314453, "logps/rejected": -116.3677978515625, "loss": 1.2851, "margin_dpo/margin_mean": 1.18046236038208, "margin_dpo/margin_std": 2.282648801803589, "step": 104 }, { "epoch": 0.15873015873015872, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.9783838987350464, "fcm_dpo/q_t": 0.4760921001434326, "grad_norm": 18.822858810424805, "learning_rate": 4.952285105344791e-07, "logits/chosen": 1.7781691551208496, "logits/rejected": 1.611711025238037, "logps/chosen": -87.190673828125, "logps/ref_chosen": -87.76654815673828, "logps/ref_rejected": -108.07927703857422, "logps/rejected": -108.48179626464844, "loss": 1.3109, "margin_dpo/margin_mean": 0.9783839583396912, "margin_dpo/margin_std": 2.846864700317383, "step": 105 }, { "epoch": 0.1602418745275888, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.9594533443450928, "fcm_dpo/q_t": 0.4763692617416382, "grad_norm": 16.99517250061035, "learning_rate": 4.949679871846857e-07, "logits/chosen": 1.7635796070098877, "logits/rejected": 1.7058625221252441, "logps/chosen": -75.43994140625, "logps/ref_chosen": -76.38548278808594, "logps/ref_rejected": -81.63407897949219, "logps/rejected": -81.64799499511719, "loss": 1.3078, "margin_dpo/margin_mean": 0.9594534039497375, "margin_dpo/margin_std": 2.473605155944824, "step": 106 }, { "epoch": 0.1617535903250189, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.620085597038269, "fcm_dpo/q_t": 0.4845724403858185, "grad_norm": 19.18521499633789, "learning_rate": 4.947006115536947e-07, "logits/chosen": 1.33339262008667, "logits/rejected": 1.2754939794540405, "logps/chosen": -95.81202697753906, "logps/ref_chosen": -96.14849853515625, "logps/ref_rejected": -107.0481185913086, "logps/rejected": -107.33172607421875, "loss": 1.3381, "margin_dpo/margin_mean": 0.6200859546661377, "margin_dpo/margin_std": 2.2718071937561035, "step": 107 }, { "epoch": 0.16326530612244897, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.8618567585945129, "fcm_dpo/q_t": 0.4787394404411316, "grad_norm": 17.177370071411133, "learning_rate": 4.944263911205772e-07, "logits/chosen": 1.4922263622283936, "logits/rejected": 1.3662118911743164, "logps/chosen": -84.60310363769531, "logps/ref_chosen": -85.39241027832031, "logps/ref_rejected": -97.79592895507812, "logps/rejected": -97.86846923828125, "loss": 1.3151, "margin_dpo/margin_mean": 0.8618567585945129, "margin_dpo/margin_std": 2.300678253173828, "step": 108 }, { "epoch": 0.16477702191987906, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.3147889375686646, "fcm_dpo/q_t": 0.46812164783477783, "grad_norm": 18.14927101135254, "learning_rate": 4.941453335558681e-07, "logits/chosen": 1.3905439376831055, "logits/rejected": 1.174678087234497, "logps/chosen": -77.99679565429688, "logps/ref_chosen": -78.99874877929688, "logps/ref_rejected": -100.79278564453125, "logps/rejected": -101.10562133789062, "loss": 1.2781, "margin_dpo/margin_mean": 1.3147889375686646, "margin_dpo/margin_std": 2.780221939086914, "step": 109 }, { "epoch": 0.16628873771730915, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.2178063839673996, "fcm_dpo/q_t": 0.49449440836906433, "grad_norm": 20.665647506713867, "learning_rate": 4.938574467213517e-07, "logits/chosen": 1.4688796997070312, "logits/rejected": 1.5312542915344238, "logps/chosen": -96.5684814453125, "logps/ref_chosen": -96.95277404785156, "logps/ref_rejected": -91.44450378417969, "logps/rejected": -91.27799987792969, "loss": 1.381, "margin_dpo/margin_mean": 0.2178059071302414, "margin_dpo/margin_std": 2.5462493896484375, "step": 110 }, { "epoch": 0.16780045351473924, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.0503623485565186, "fcm_dpo/q_t": 0.4742129445075989, "grad_norm": 16.398740768432617, "learning_rate": 4.935627386698418e-07, "logits/chosen": 1.9407978057861328, "logits/rejected": 1.774618148803711, "logps/chosen": -69.15205383300781, "logps/ref_chosen": -70.01641845703125, "logps/ref_rejected": -92.87696838378906, "logps/rejected": -93.06297302246094, "loss": 1.3, "margin_dpo/margin_mean": 1.0503621101379395, "margin_dpo/margin_std": 2.530200958251953, "step": 111 }, { "epoch": 0.1693121693121693, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.3121293783187866, "fcm_dpo/q_t": 0.46776682138442993, "grad_norm": 19.021568298339844, "learning_rate": 4.932612176449559e-07, "logits/chosen": 1.7154479026794434, "logits/rejected": 1.5044281482696533, "logps/chosen": -76.7746810913086, "logps/ref_chosen": -77.80027770996094, "logps/ref_rejected": -123.10624694824219, "logps/rejected": -123.39279174804688, "loss": 1.276, "margin_dpo/margin_mean": 1.3121283054351807, "margin_dpo/margin_std": 2.5639796257019043, "step": 112 }, { "epoch": 0.1708238851095994, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.8528228402137756, "fcm_dpo/q_t": 0.47901567816734314, "grad_norm": 16.568147659301758, "learning_rate": 4.929528920808854e-07, "logits/chosen": 1.7319364547729492, "logits/rejected": 1.641928791999817, "logps/chosen": -69.28436279296875, "logps/ref_chosen": -70.54346466064453, "logps/ref_rejected": -88.79286193847656, "logps/rejected": -88.38658142089844, "loss": 1.3191, "margin_dpo/margin_mean": 0.8528228998184204, "margin_dpo/margin_std": 2.5240535736083984, "step": 113 }, { "epoch": 0.17233560090702948, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.332871913909912, "fcm_dpo/q_t": 0.46783509850502014, "grad_norm": 21.800825119018555, "learning_rate": 4.92637770602159e-07, "logits/chosen": 1.7463035583496094, "logits/rejected": 1.6003742218017578, "logps/chosen": -82.77552032470703, "logps/ref_chosen": -83.9239501953125, "logps/ref_rejected": -92.85765838623047, "logps/rejected": -93.04209899902344, "loss": 1.282, "margin_dpo/margin_mean": 1.3328726291656494, "margin_dpo/margin_std": 3.1606192588806152, "step": 114 }, { "epoch": 0.17384731670445955, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.1995645761489868, "fcm_dpo/q_t": 0.4707218408584595, "grad_norm": 23.079219818115234, "learning_rate": 4.923158620234019e-07, "logits/chosen": 1.6879757642745972, "logits/rejected": 1.517378568649292, "logps/chosen": -68.5137939453125, "logps/ref_chosen": -69.82767486572266, "logps/ref_rejected": -96.51564025878906, "logps/rejected": -96.40132141113281, "loss": 1.287, "margin_dpo/margin_mean": 1.199564814567566, "margin_dpo/margin_std": 2.603288173675537, "step": 115 }, { "epoch": 0.17535903250188964, "fcm_dpo/beta": 0.10327555239200592, "fcm_dpo/delta": 0.3172721266746521, "fcm_dpo/margin": 1.6490867137908936, "fcm_dpo/q_t": 0.45969825983047485, "grad_norm": 18.574960708618164, "learning_rate": 4.91987175349089e-07, "logits/chosen": 1.7174615859985352, "logits/rejected": 1.561848521232605, "logps/chosen": -64.7716064453125, "logps/ref_chosen": -66.19773864746094, "logps/ref_rejected": -90.88304138183594, "logps/rejected": -91.10599517822266, "loss": 1.2404, "margin_dpo/margin_mean": 1.6490864753723145, "margin_dpo/margin_std": 2.610063314437866, "step": 116 }, { "epoch": 0.17687074829931973, "fcm_dpo/beta": 0.11360542476177216, "fcm_dpo/delta": 0.32053306698799133, "fcm_dpo/margin": 1.6154546737670898, "fcm_dpo/q_t": 0.45687851309776306, "grad_norm": 18.478755950927734, "learning_rate": 4.916517197732933e-07, "logits/chosen": 1.6380093097686768, "logits/rejected": 1.536433458328247, "logps/chosen": -70.42033386230469, "logps/ref_chosen": -72.15988159179688, "logps/ref_rejected": -85.30296325683594, "logps/rejected": -85.17887115478516, "loss": 1.2359, "margin_dpo/margin_mean": 1.6154546737670898, "margin_dpo/margin_std": 2.7676258087158203, "step": 117 }, { "epoch": 0.17838246409674982, "fcm_dpo/beta": 0.11360542476177216, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.179227590560913, "fcm_dpo/q_t": 0.4674232006072998, "grad_norm": 18.794208526611328, "learning_rate": 4.913095046794281e-07, "logits/chosen": 1.7830784320831299, "logits/rejected": 1.6586743593215942, "logps/chosen": -70.03721618652344, "logps/ref_chosen": -71.47773742675781, "logps/ref_rejected": -96.95051574707031, "logps/rejected": -96.6892318725586, "loss": 1.2843, "margin_dpo/margin_mean": 1.1792272329330444, "margin_dpo/margin_std": 2.940877676010132, "step": 118 }, { "epoch": 0.17989417989417988, "fcm_dpo/beta": 0.11360542476177216, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.357081413269043, "fcm_dpo/q_t": 0.46345484256744385, "grad_norm": 19.133548736572266, "learning_rate": 4.909605396399855e-07, "logits/chosen": 1.7609422206878662, "logits/rejected": 1.6618965864181519, "logps/chosen": -76.75531005859375, "logps/ref_chosen": -78.2727279663086, "logps/ref_rejected": -94.71317291259766, "logps/rejected": -94.5528335571289, "loss": 1.2794, "margin_dpo/margin_mean": 1.3570810556411743, "margin_dpo/margin_std": 3.6027512550354004, "step": 119 }, { "epoch": 0.18140589569160998, "fcm_dpo/beta": 0.12034539878368378, "fcm_dpo/delta": 0.28817349672317505, "fcm_dpo/margin": 2.0527327060699463, "fcm_dpo/q_t": 0.4423573911190033, "grad_norm": 21.03508758544922, "learning_rate": 4.906048344162676e-07, "logits/chosen": 2.0089728832244873, "logits/rejected": 1.8490796089172363, "logps/chosen": -76.51966094970703, "logps/ref_chosen": -78.43109130859375, "logps/ref_rejected": -100.2771987915039, "logps/rejected": -100.41851043701172, "loss": 1.1849, "margin_dpo/margin_mean": 2.052732467651367, "margin_dpo/margin_std": 2.924802303314209, "step": 120 }, { "epoch": 0.18291761148904007, "fcm_dpo/beta": 0.12824112176895142, "fcm_dpo/delta": 0.317731648683548, "fcm_dpo/margin": 1.4499316215515137, "fcm_dpo/q_t": 0.4577900767326355, "grad_norm": 22.99397850036621, "learning_rate": 4.902423989581143e-07, "logits/chosen": 2.067244052886963, "logits/rejected": 1.7988061904907227, "logps/chosen": -72.40137481689453, "logps/ref_chosen": -74.08768463134766, "logps/ref_rejected": -118.6731948852539, "logps/rejected": -118.43681335449219, "loss": 1.25, "margin_dpo/margin_mean": 1.4499316215515137, "margin_dpo/margin_std": 3.1986875534057617, "step": 121 }, { "epoch": 0.18442932728647016, "fcm_dpo/beta": 0.13225537538528442, "fcm_dpo/delta": 0.30361536145210266, "fcm_dpo/margin": 1.4434542655944824, "fcm_dpo/q_t": 0.45735594630241394, "grad_norm": 22.976837158203125, "learning_rate": 4.898732434036243e-07, "logits/chosen": 1.6426172256469727, "logits/rejected": 1.5161330699920654, "logps/chosen": -77.5340576171875, "logps/ref_chosen": -79.36762237548828, "logps/ref_rejected": -92.42371368408203, "logps/rejected": -92.03360748291016, "loss": 1.2557, "margin_dpo/margin_mean": 1.4434537887573242, "margin_dpo/margin_std": 3.4955062866210938, "step": 122 }, { "epoch": 0.18594104308390022, "fcm_dpo/beta": 0.140414759516716, "fcm_dpo/delta": 0.29529285430908203, "fcm_dpo/margin": 1.6619194746017456, "fcm_dpo/q_t": 0.4464726448059082, "grad_norm": 22.928123474121094, "learning_rate": 4.894973780788722e-07, "logits/chosen": 1.538325309753418, "logits/rejected": 1.4405975341796875, "logps/chosen": -69.86561584472656, "logps/ref_chosen": -71.91705322265625, "logps/ref_rejected": -96.36418151855469, "logps/rejected": -95.97465515136719, "loss": 1.2102, "margin_dpo/margin_mean": 1.661919355392456, "margin_dpo/margin_std": 3.0403366088867188, "step": 123 }, { "epoch": 0.1874527588813303, "fcm_dpo/beta": 0.15786468982696533, "fcm_dpo/delta": 0.5678054690361023, "fcm_dpo/margin": 1.8830089569091797, "fcm_dpo/q_t": 0.43587011098861694, "grad_norm": 27.404041290283203, "learning_rate": 4.89114813497619e-07, "logits/chosen": 1.656646966934204, "logits/rejected": 1.5460621118545532, "logps/chosen": -69.46039581298828, "logps/ref_chosen": -71.72529602050781, "logps/ref_rejected": -111.17984771728516, "logps/rejected": -110.79795837402344, "loss": 1.1936, "margin_dpo/margin_mean": 1.883009433746338, "margin_dpo/margin_std": 3.7926838397979736, "step": 124 }, { "epoch": 0.1889644746787604, "fcm_dpo/beta": 0.1739986538887024, "fcm_dpo/delta": 0.509579062461853, "fcm_dpo/margin": 2.0727288722991943, "fcm_dpo/q_t": 0.42179858684539795, "grad_norm": 29.48147201538086, "learning_rate": 4.887255603610184e-07, "logits/chosen": 1.688849687576294, "logits/rejected": 1.507893443107605, "logps/chosen": -79.08648681640625, "logps/ref_chosen": -81.55532836914062, "logps/ref_rejected": -110.9144287109375, "logps/rejected": -110.51831817626953, "loss": 1.1477, "margin_dpo/margin_mean": 2.072727680206299, "margin_dpo/margin_std": 3.4490890502929688, "step": 125 }, { "epoch": 0.19047619047619047, "fcm_dpo/beta": 0.1837829053401947, "fcm_dpo/delta": 0.24289314448833466, "fcm_dpo/margin": 1.6221380233764648, "fcm_dpo/q_t": 0.4397786855697632, "grad_norm": 33.398712158203125, "learning_rate": 4.883296295573176e-07, "logits/chosen": 1.1400885581970215, "logits/rejected": 1.1677029132843018, "logps/chosen": -83.61918640136719, "logps/ref_chosen": -87.07349395751953, "logps/ref_rejected": -85.05271911621094, "logps/rejected": -83.22055053710938, "loss": 1.2483, "margin_dpo/margin_mean": 1.6221377849578857, "margin_dpo/margin_std": 4.112092018127441, "step": 126 }, { "epoch": 0.19198790627362056, "fcm_dpo/beta": 0.19954904913902283, "fcm_dpo/delta": 0.4273369312286377, "fcm_dpo/margin": 2.2214713096618652, "fcm_dpo/q_t": 0.4040955603122711, "grad_norm": 31.264402389526367, "learning_rate": 4.87927032161552e-07, "logits/chosen": 1.6167099475860596, "logits/rejected": 1.5610288381576538, "logps/chosen": -77.12249755859375, "logps/ref_chosen": -80.4578857421875, "logps/ref_rejected": -90.50740051269531, "logps/rejected": -89.39348602294922, "loss": 1.0822, "margin_dpo/margin_mean": 2.221470832824707, "margin_dpo/margin_std": 3.047856330871582, "step": 127 }, { "epoch": 0.19349962207105065, "fcm_dpo/beta": 0.21528372168540955, "fcm_dpo/delta": 0.24369965493679047, "fcm_dpo/margin": 1.4183709621429443, "fcm_dpo/q_t": 0.43787145614624023, "grad_norm": 39.298763275146484, "learning_rate": 4.875177794352363e-07, "logits/chosen": 1.5626955032348633, "logits/rejected": 1.3739066123962402, "logps/chosen": -82.65255737304688, "logps/ref_chosen": -85.77519226074219, "logps/ref_rejected": -112.63516998291016, "logps/rejected": -110.930908203125, "loss": 1.3259, "margin_dpo/margin_mean": 1.4183712005615234, "margin_dpo/margin_std": 4.649229526519775, "step": 128 }, { "epoch": 0.19501133786848074, "fcm_dpo/beta": 0.2325761914253235, "fcm_dpo/delta": 0.497514545917511, "fcm_dpo/margin": 1.5880060195922852, "fcm_dpo/q_t": 0.4272322654724121, "grad_norm": 48.150299072265625, "learning_rate": 4.871018828260491e-07, "logits/chosen": 1.585038423538208, "logits/rejected": 1.5908199548721313, "logps/chosen": -81.96442413330078, "logps/ref_chosen": -84.94615173339844, "logps/ref_rejected": -85.36473846435547, "logps/rejected": -83.97102355957031, "loss": 1.2814, "margin_dpo/margin_mean": 1.5880064964294434, "margin_dpo/margin_std": 4.411214828491211, "step": 129 }, { "epoch": 0.1965230536659108, "fcm_dpo/beta": 0.25136274099349976, "fcm_dpo/delta": 0.4040859341621399, "fcm_dpo/margin": 1.8546559810638428, "fcm_dpo/q_t": 0.40786296129226685, "grad_norm": 44.689945220947266, "learning_rate": 4.866793539675126e-07, "logits/chosen": 1.576695442199707, "logits/rejected": 1.4571049213409424, "logps/chosen": -75.59423828125, "logps/ref_chosen": -79.0184555053711, "logps/ref_rejected": -97.63998413085938, "logps/rejected": -96.07042694091797, "loss": 1.1687, "margin_dpo/margin_mean": 1.8546559810638428, "margin_dpo/margin_std": 3.65340518951416, "step": 130 }, { "epoch": 0.1980347694633409, "fcm_dpo/beta": 0.26572394371032715, "fcm_dpo/delta": 0.2274240255355835, "fcm_dpo/margin": 2.399470090866089, "fcm_dpo/q_t": 0.3838863968849182, "grad_norm": 40.63927459716797, "learning_rate": 4.86250204678667e-07, "logits/chosen": 1.4264767169952393, "logits/rejected": 1.1921199560165405, "logps/chosen": -64.54088592529297, "logps/ref_chosen": -68.24565887451172, "logps/ref_rejected": -97.99555969238281, "logps/rejected": -96.69026184082031, "loss": 1.1124, "margin_dpo/margin_mean": 2.399470329284668, "margin_dpo/margin_std": 4.310845851898193, "step": 131 }, { "epoch": 0.19954648526077098, "fcm_dpo/beta": 0.28637751936912537, "fcm_dpo/delta": 0.3753895163536072, "fcm_dpo/margin": 1.716357707977295, "fcm_dpo/q_t": 0.40129733085632324, "grad_norm": 48.86708450317383, "learning_rate": 4.858144469637408e-07, "logits/chosen": 1.8425252437591553, "logits/rejected": 1.721367359161377, "logps/chosen": -78.26350402832031, "logps/ref_chosen": -82.06532287597656, "logps/ref_rejected": -89.47691345214844, "logps/rejected": -87.39144134521484, "loss": 1.187, "margin_dpo/margin_mean": 1.716357707977295, "margin_dpo/margin_std": 3.696367025375366, "step": 132 }, { "epoch": 0.20105820105820105, "fcm_dpo/beta": 0.3037889003753662, "fcm_dpo/delta": 0.30762773752212524, "fcm_dpo/margin": 1.8439881801605225, "fcm_dpo/q_t": 0.39642536640167236, "grad_norm": 55.50751876831055, "learning_rate": 4.853720930118138e-07, "logits/chosen": 1.4474172592163086, "logits/rejected": 1.4812402725219727, "logps/chosen": -79.32565307617188, "logps/ref_chosen": -83.70661163330078, "logps/ref_rejected": -89.3868179321289, "logps/rejected": -86.84983825683594, "loss": 1.1999, "margin_dpo/margin_mean": 1.8439884185791016, "margin_dpo/margin_std": 3.8783974647521973, "step": 133 }, { "epoch": 0.20256991685563114, "fcm_dpo/beta": 0.31286799907684326, "fcm_dpo/delta": 0.05367041379213333, "fcm_dpo/margin": 2.5576937198638916, "fcm_dpo/q_t": 0.3593463897705078, "grad_norm": 47.953609466552734, "learning_rate": 4.849231551964771e-07, "logits/chosen": 1.8493995666503906, "logits/rejected": 1.7130919694900513, "logps/chosen": -66.60760498046875, "logps/ref_chosen": -71.57601928710938, "logps/ref_rejected": -92.34259033203125, "logps/rejected": -89.931884765625, "loss": 1.0617, "margin_dpo/margin_mean": 2.5576934814453125, "margin_dpo/margin_std": 4.291810989379883, "step": 134 }, { "epoch": 0.20408163265306123, "fcm_dpo/beta": 0.32738637924194336, "fcm_dpo/delta": 0.2678135931491852, "fcm_dpo/margin": 1.8198938369750977, "fcm_dpo/q_t": 0.38489198684692383, "grad_norm": 49.348323822021484, "learning_rate": 4.844676460754862e-07, "logits/chosen": 1.499782919883728, "logits/rejected": 1.4428374767303467, "logps/chosen": -61.08345031738281, "logps/ref_chosen": -66.39884948730469, "logps/ref_rejected": -81.38636779785156, "logps/rejected": -77.89085388183594, "loss": 1.1684, "margin_dpo/margin_mean": 1.8198933601379395, "margin_dpo/margin_std": 3.714056968688965, "step": 135 }, { "epoch": 0.20559334845049132, "fcm_dpo/beta": 0.32900992035865784, "fcm_dpo/delta": 0.10048435628414154, "fcm_dpo/margin": 2.2877230644226074, "fcm_dpo/q_t": 0.3859502673149109, "grad_norm": 65.70194244384766, "learning_rate": 4.840055783904106e-07, "logits/chosen": 1.6483759880065918, "logits/rejected": 1.3668988943099976, "logps/chosen": -82.15748596191406, "logps/ref_chosen": -86.75381469726562, "logps/ref_rejected": -113.35548400878906, "logps/rejected": -111.04689025878906, "loss": 1.2694, "margin_dpo/margin_mean": 2.2877230644226074, "margin_dpo/margin_std": 4.9393510818481445, "step": 136 }, { "epoch": 0.20710506424792138, "fcm_dpo/beta": 0.3471960425376892, "fcm_dpo/delta": 0.0843411386013031, "fcm_dpo/margin": 2.200601816177368, "fcm_dpo/q_t": 0.36530107259750366, "grad_norm": 49.037841796875, "learning_rate": 4.835369650662767e-07, "logits/chosen": 1.7660987377166748, "logits/rejected": 1.6533045768737793, "logps/chosen": -66.63494110107422, "logps/ref_chosen": -72.21119689941406, "logps/ref_rejected": -88.30802917480469, "logps/rejected": -84.93238067626953, "loss": 1.0984, "margin_dpo/margin_mean": 2.2006025314331055, "margin_dpo/margin_std": 3.669823169708252, "step": 137 }, { "epoch": 0.20861678004535147, "fcm_dpo/beta": 0.3590894043445587, "fcm_dpo/delta": 0.30840471386909485, "fcm_dpo/margin": 1.5543081760406494, "fcm_dpo/q_t": 0.4112345576286316, "grad_norm": 63.31071472167969, "learning_rate": 4.830618192112065e-07, "logits/chosen": 1.435781717300415, "logits/rejected": 1.3367691040039062, "logps/chosen": -69.54479217529297, "logps/ref_chosen": -74.54273223876953, "logps/ref_rejected": -84.63615417480469, "logps/rejected": -81.19251251220703, "loss": 1.3065, "margin_dpo/margin_mean": 1.5543079376220703, "margin_dpo/margin_std": 4.103353977203369, "step": 138 }, { "epoch": 0.21012849584278157, "fcm_dpo/beta": 0.3809944987297058, "fcm_dpo/delta": 0.2578265070915222, "fcm_dpo/margin": 1.588025689125061, "fcm_dpo/q_t": 0.38131409883499146, "grad_norm": 84.61750030517578, "learning_rate": 4.825801541160509e-07, "logits/chosen": 1.6165993213653564, "logits/rejected": 1.5509192943572998, "logps/chosen": -82.63727569580078, "logps/ref_chosen": -87.63740539550781, "logps/ref_rejected": -101.3896484375, "logps/rejected": -97.97754669189453, "loss": 1.2994, "margin_dpo/margin_mean": 1.5880258083343506, "margin_dpo/margin_std": 3.8282291889190674, "step": 139 }, { "epoch": 0.21164021164021163, "fcm_dpo/beta": 0.376004695892334, "fcm_dpo/delta": -0.06728397309780121, "fcm_dpo/margin": 2.417325019836426, "fcm_dpo/q_t": 0.35431066155433655, "grad_norm": 72.31173706054688, "learning_rate": 4.820919832540181e-07, "logits/chosen": 1.5636279582977295, "logits/rejected": 1.4595727920532227, "logps/chosen": -75.60233306884766, "logps/ref_chosen": -81.32339477539062, "logps/ref_rejected": -99.7275619506836, "logps/rejected": -96.423828125, "loss": 1.1302, "margin_dpo/margin_mean": 2.417325973510742, "margin_dpo/margin_std": 4.188722133636475, "step": 140 }, { "epoch": 0.21315192743764172, "fcm_dpo/beta": 0.3740063011646271, "fcm_dpo/delta": -0.018594570457935333, "fcm_dpo/margin": 2.3140902519226074, "fcm_dpo/q_t": 0.35507285594940186, "grad_norm": 57.69200134277344, "learning_rate": 4.815973202802966e-07, "logits/chosen": 1.8846083879470825, "logits/rejected": 1.7731068134307861, "logps/chosen": -72.21094512939453, "logps/ref_chosen": -78.08534240722656, "logps/ref_rejected": -101.70516967773438, "logps/rejected": -98.14485931396484, "loss": 1.0983, "margin_dpo/margin_mean": 2.3140902519226074, "margin_dpo/margin_std": 3.8522796630859375, "step": 141 }, { "epoch": 0.2146636432350718, "fcm_dpo/beta": 0.39571413397789, "fcm_dpo/delta": 0.27962976694107056, "fcm_dpo/margin": 1.4749341011047363, "fcm_dpo/q_t": 0.4017455577850342, "grad_norm": 64.10457611083984, "learning_rate": 4.810961790316729e-07, "logits/chosen": 1.6766084432601929, "logits/rejected": 1.6162614822387695, "logps/chosen": -76.84469604492188, "logps/ref_chosen": -82.84616088867188, "logps/ref_rejected": -95.14714050292969, "logps/rejected": -90.62060546875, "loss": 1.3081, "margin_dpo/margin_mean": 1.4749343395233154, "margin_dpo/margin_std": 3.789971351623535, "step": 142 }, { "epoch": 0.2161753590325019, "fcm_dpo/beta": 0.41883236169815063, "fcm_dpo/delta": 0.32501593232154846, "fcm_dpo/margin": 1.2943665981292725, "fcm_dpo/q_t": 0.4153968393802643, "grad_norm": 101.07804870605469, "learning_rate": 4.805885735261454e-07, "logits/chosen": 1.5617542266845703, "logits/rejected": 1.5315983295440674, "logps/chosen": -74.02728271484375, "logps/ref_chosen": -80.29791259765625, "logps/ref_rejected": -87.44291687011719, "logps/rejected": -82.46665954589844, "loss": 1.4461, "margin_dpo/margin_mean": 1.2943671941757202, "margin_dpo/margin_std": 4.0470781326293945, "step": 143 }, { "epoch": 0.21768707482993196, "fcm_dpo/beta": 0.4348004460334778, "fcm_dpo/delta": 0.11130322515964508, "fcm_dpo/margin": 0.8273683190345764, "fcm_dpo/q_t": 0.46184492111206055, "grad_norm": 111.18672943115234, "learning_rate": 4.800745179625307e-07, "logits/chosen": 1.8222707509994507, "logits/rejected": 1.7600898742675781, "logps/chosen": -73.16317749023438, "logps/ref_chosen": -79.09429168701172, "logps/ref_rejected": -92.42912292480469, "logps/rejected": -87.32537841796875, "loss": 1.7283, "margin_dpo/margin_mean": 0.8273676633834839, "margin_dpo/margin_std": 4.292266368865967, "step": 144 }, { "epoch": 0.21919879062736206, "fcm_dpo/beta": 0.4399738609790802, "fcm_dpo/delta": 0.10660990327596664, "fcm_dpo/margin": 1.7080283164978027, "fcm_dpo/q_t": 0.36994537711143494, "grad_norm": 100.66554260253906, "learning_rate": 4.795540267200686e-07, "logits/chosen": 1.6031596660614014, "logits/rejected": 1.6387650966644287, "logps/chosen": -90.66630554199219, "logps/ref_chosen": -97.7087173461914, "logps/ref_rejected": -97.63011169433594, "logps/rejected": -92.29573059082031, "loss": 1.3454, "margin_dpo/margin_mean": 1.7080283164978027, "margin_dpo/margin_std": 4.123508453369141, "step": 145 }, { "epoch": 0.22071050642479215, "fcm_dpo/beta": 0.46773919463157654, "fcm_dpo/delta": 0.29919642210006714, "fcm_dpo/margin": 1.2071739435195923, "fcm_dpo/q_t": 0.40284016728401184, "grad_norm": 89.68445587158203, "learning_rate": 4.790271143580173e-07, "logits/chosen": 1.3380377292633057, "logits/rejected": 1.3341362476348877, "logps/chosen": -69.06131744384766, "logps/ref_chosen": -76.56294250488281, "logps/ref_rejected": -83.78160095214844, "logps/rejected": -77.48715209960938, "loss": 1.3059, "margin_dpo/margin_mean": 1.2071746587753296, "margin_dpo/margin_std": 3.1625607013702393, "step": 146 }, { "epoch": 0.2222222222222222, "fcm_dpo/beta": 0.4885963797569275, "fcm_dpo/delta": 0.22966217994689941, "fcm_dpo/margin": 1.298787236213684, "fcm_dpo/q_t": 0.4034077525138855, "grad_norm": 102.2337417602539, "learning_rate": 4.784937956152489e-07, "logits/chosen": 1.6136701107025146, "logits/rejected": 1.5254911184310913, "logps/chosen": -76.01017761230469, "logps/ref_chosen": -83.24113464355469, "logps/ref_rejected": -97.50960540771484, "logps/rejected": -91.57743835449219, "loss": 1.3886, "margin_dpo/margin_mean": 1.298788070678711, "margin_dpo/margin_std": 3.5394697189331055, "step": 147 }, { "epoch": 0.2237339380196523, "fcm_dpo/beta": 0.4971775710582733, "fcm_dpo/delta": -0.038856156170368195, "fcm_dpo/margin": 1.7757008075714111, "fcm_dpo/q_t": 0.3661366105079651, "grad_norm": 69.0617446899414, "learning_rate": 4.779540854098347e-07, "logits/chosen": 1.8557909727096558, "logits/rejected": 1.6362807750701904, "logps/chosen": -58.756004333496094, "logps/ref_chosen": -66.36277770996094, "logps/ref_rejected": -87.66487121582031, "logps/rejected": -81.83380126953125, "loss": 1.24, "margin_dpo/margin_mean": 1.7757010459899902, "margin_dpo/margin_std": 3.699665069580078, "step": 148 }, { "epoch": 0.2252456538170824, "fcm_dpo/beta": 0.5092729330062866, "fcm_dpo/delta": 0.26832953095436096, "fcm_dpo/margin": 1.1741929054260254, "fcm_dpo/q_t": 0.39438316226005554, "grad_norm": 84.74821472167969, "learning_rate": 4.774079988386296e-07, "logits/chosen": 1.424088716506958, "logits/rejected": 1.3155875205993652, "logps/chosen": -64.8631591796875, "logps/ref_chosen": -72.0576171875, "logps/ref_rejected": -83.94097900390625, "logps/rejected": -77.92071533203125, "loss": 1.2921, "margin_dpo/margin_mean": 1.1741926670074463, "margin_dpo/margin_std": 2.8617429733276367, "step": 149 }, { "epoch": 0.22675736961451248, "fcm_dpo/beta": 0.511961042881012, "fcm_dpo/delta": -0.03353915736079216, "fcm_dpo/margin": 1.7189321517944336, "fcm_dpo/q_t": 0.36745959520339966, "grad_norm": 103.64623260498047, "learning_rate": 4.768555511768486e-07, "logits/chosen": 1.6222187280654907, "logits/rejected": 1.558018445968628, "logps/chosen": -77.80455017089844, "logps/ref_chosen": -85.52684783935547, "logps/ref_rejected": -108.37449645996094, "logps/rejected": -102.37114715576172, "loss": 1.2533, "margin_dpo/margin_mean": 1.7189325094223022, "margin_dpo/margin_std": 3.4795143604278564, "step": 150 }, { "epoch": 0.22826908541194255, "fcm_dpo/beta": 0.4981721043586731, "fcm_dpo/delta": -0.1791907250881195, "fcm_dpo/margin": 2.0277981758117676, "fcm_dpo/q_t": 0.332998663187027, "grad_norm": 69.01924896240234, "learning_rate": 4.762967578776406e-07, "logits/chosen": 1.5413520336151123, "logits/rejected": 1.4086077213287354, "logps/chosen": -60.528953552246094, "logps/ref_chosen": -69.160888671875, "logps/ref_rejected": -91.42207336425781, "logps/rejected": -84.81794738769531, "loss": 1.0109, "margin_dpo/margin_mean": 2.0277981758117676, "margin_dpo/margin_std": 3.165318489074707, "step": 151 }, { "epoch": 0.22978080120937264, "fcm_dpo/beta": 0.5071883201599121, "fcm_dpo/delta": 0.05071830749511719, "fcm_dpo/margin": 1.5727683305740356, "fcm_dpo/q_t": 0.3731786906719208, "grad_norm": 83.03250122070312, "learning_rate": 4.757316345716553e-07, "logits/chosen": 2.0041542053222656, "logits/rejected": 1.8374791145324707, "logps/chosen": -64.47390747070312, "logps/ref_chosen": -72.48135375976562, "logps/ref_rejected": -94.44818878173828, "logps/rejected": -88.01351165771484, "loss": 1.1366, "margin_dpo/margin_mean": 1.5727685689926147, "margin_dpo/margin_std": 3.0940496921539307, "step": 152 }, { "epoch": 0.23129251700680273, "fcm_dpo/beta": 0.5022574067115784, "fcm_dpo/delta": -0.014510933309793472, "fcm_dpo/margin": 1.7176454067230225, "fcm_dpo/q_t": 0.3635936677455902, "grad_norm": 77.17406463623047, "learning_rate": 4.751601970666064e-07, "logits/chosen": 1.5174614191055298, "logits/rejected": 1.4577882289886475, "logps/chosen": -81.51570129394531, "logps/ref_chosen": -89.6655044555664, "logps/ref_rejected": -90.67737579345703, "logps/rejected": -84.24522399902344, "loss": 1.0794, "margin_dpo/margin_mean": 1.7176458835601807, "margin_dpo/margin_std": 2.961656332015991, "step": 153 }, { "epoch": 0.2328042328042328, "fcm_dpo/beta": 0.5269556045532227, "fcm_dpo/delta": 0.3518673777580261, "fcm_dpo/margin": 0.978224515914917, "fcm_dpo/q_t": 0.4100213944911957, "grad_norm": 96.40692138671875, "learning_rate": 4.745824613468292e-07, "logits/chosen": 1.7381703853607178, "logits/rejected": 1.6854023933410645, "logps/chosen": -68.4336929321289, "logps/ref_chosen": -76.58096313476562, "logps/ref_rejected": -78.18669891357422, "logps/rejected": -71.01765441894531, "loss": 1.3664, "margin_dpo/margin_mean": 0.9782246351242065, "margin_dpo/margin_std": 2.7987735271453857, "step": 154 }, { "epoch": 0.23431594860166288, "fcm_dpo/beta": 0.5390084981918335, "fcm_dpo/delta": 0.08307254314422607, "fcm_dpo/margin": 1.4344533681869507, "fcm_dpo/q_t": 0.3690027892589569, "grad_norm": 95.5949478149414, "learning_rate": 4.7399844357283393e-07, "logits/chosen": 1.6641685962677002, "logits/rejected": 1.6441385746002197, "logps/chosen": -74.2553482055664, "logps/ref_chosen": -82.65617370605469, "logps/ref_rejected": -95.52484130859375, "logps/rejected": -88.5584716796875, "loss": 1.2341, "margin_dpo/margin_mean": 1.4344533681869507, "margin_dpo/margin_std": 2.9911344051361084, "step": 155 }, { "epoch": 0.23582766439909297, "fcm_dpo/beta": 0.5347750186920166, "fcm_dpo/delta": -0.04582615941762924, "fcm_dpo/margin": 1.6623612642288208, "fcm_dpo/q_t": 0.3499874472618103, "grad_norm": 91.02811431884766, "learning_rate": 4.7340816008085305e-07, "logits/chosen": 1.663049340248108, "logits/rejected": 1.5744928121566772, "logps/chosen": -79.29481506347656, "logps/ref_chosen": -87.66494750976562, "logps/ref_rejected": -108.2437744140625, "logps/rejected": -101.53599548339844, "loss": 1.0597, "margin_dpo/margin_mean": 1.6623611450195312, "margin_dpo/margin_std": 2.6712918281555176, "step": 156 }, { "epoch": 0.23733938019652306, "fcm_dpo/beta": 0.5272543430328369, "fcm_dpo/delta": -0.11355408281087875, "fcm_dpo/margin": 1.0453565120697021, "fcm_dpo/q_t": 0.41212987899780273, "grad_norm": 83.02538299560547, "learning_rate": 4.728116273823847e-07, "logits/chosen": 1.5571646690368652, "logits/rejected": 1.562293529510498, "logps/chosen": -61.72172927856445, "logps/ref_chosen": -70.77095794677734, "logps/ref_rejected": -78.78271484375, "logps/rejected": -70.77883911132812, "loss": 1.2828, "margin_dpo/margin_mean": 1.0453565120697021, "margin_dpo/margin_std": 2.5130724906921387, "step": 157 }, { "epoch": 0.23885109599395313, "fcm_dpo/beta": 0.5388778448104858, "fcm_dpo/delta": 0.19029124081134796, "fcm_dpo/margin": 1.2486048936843872, "fcm_dpo/q_t": 0.3905091881752014, "grad_norm": 89.57019805908203, "learning_rate": 4.7220886216373085e-07, "logits/chosen": 1.6847059726715088, "logits/rejected": 1.6245031356811523, "logps/chosen": -72.43364715576172, "logps/ref_chosen": -81.21516418457031, "logps/ref_rejected": -97.8381118774414, "logps/rejected": -90.30520629882812, "loss": 1.2569, "margin_dpo/margin_mean": 1.24860417842865, "margin_dpo/margin_std": 2.810882091522217, "step": 158 }, { "epoch": 0.24036281179138322, "fcm_dpo/beta": 0.5474465489387512, "fcm_dpo/delta": 0.02943047508597374, "fcm_dpo/margin": 1.5027416944503784, "fcm_dpo/q_t": 0.35206806659698486, "grad_norm": 77.63984680175781, "learning_rate": 4.715998812855304e-07, "logits/chosen": 1.7496761083602905, "logits/rejected": 1.6762511730194092, "logps/chosen": -63.55103302001953, "logps/ref_chosen": -72.33412170410156, "logps/ref_rejected": -89.49591064453125, "logps/rejected": -82.21556091308594, "loss": 1.0543, "margin_dpo/margin_mean": 1.5027427673339844, "margin_dpo/margin_std": 2.681060791015625, "step": 159 }, { "epoch": 0.2418745275888133, "fcm_dpo/beta": 0.5775221586227417, "fcm_dpo/delta": 0.32942667603492737, "fcm_dpo/margin": 0.9330779314041138, "fcm_dpo/q_t": 0.3996606469154358, "grad_norm": 84.65573120117188, "learning_rate": 4.7098470178228755e-07, "logits/chosen": 1.4114882946014404, "logits/rejected": 1.2656062841415405, "logps/chosen": -54.79229736328125, "logps/ref_chosen": -63.26386260986328, "logps/ref_rejected": -82.27867126464844, "logps/rejected": -74.74017333984375, "loss": 1.2682, "margin_dpo/margin_mean": 0.9330783486366272, "margin_dpo/margin_std": 2.315305233001709, "step": 160 }, { "epoch": 0.24338624338624337, "fcm_dpo/beta": 0.6198341846466064, "fcm_dpo/delta": 0.29712557792663574, "fcm_dpo/margin": 0.9137060642242432, "fcm_dpo/q_t": 0.3977496027946472, "grad_norm": 97.38703918457031, "learning_rate": 4.703633408618955e-07, "logits/chosen": 1.8469001054763794, "logits/rejected": 1.7587244510650635, "logps/chosen": -61.889503479003906, "logps/ref_chosen": -70.69304656982422, "logps/ref_rejected": -82.73606872558594, "logps/rejected": -74.84622955322266, "loss": 1.3213, "margin_dpo/margin_mean": 0.9137062430381775, "margin_dpo/margin_std": 2.3940858840942383, "step": 161 }, { "epoch": 0.24489795918367346, "fcm_dpo/beta": 0.6205596327781677, "fcm_dpo/delta": -0.1046181172132492, "fcm_dpo/margin": 1.5212860107421875, "fcm_dpo/q_t": 0.3311256170272827, "grad_norm": 94.5212173461914, "learning_rate": 4.697358159051549e-07, "logits/chosen": 1.73167085647583, "logits/rejected": 1.638787031173706, "logps/chosen": -80.61572265625, "logps/ref_chosen": -89.3046646118164, "logps/ref_rejected": -114.05778503417969, "logps/rejected": -106.89012908935547, "loss": 0.9664, "margin_dpo/margin_mean": 1.5212857723236084, "margin_dpo/margin_std": 2.2130351066589355, "step": 162 }, { "epoch": 0.24640967498110355, "fcm_dpo/beta": 0.6169939041137695, "fcm_dpo/delta": 0.020175732672214508, "fcm_dpo/margin": 1.3464248180389404, "fcm_dpo/q_t": 0.3558712303638458, "grad_norm": 86.37958526611328, "learning_rate": 4.691021444652876e-07, "logits/chosen": 1.7835041284561157, "logits/rejected": 1.6861741542816162, "logps/chosen": -59.539772033691406, "logps/ref_chosen": -68.61222076416016, "logps/ref_rejected": -89.03155517578125, "logps/rejected": -81.3055419921875, "loss": 1.05, "margin_dpo/margin_mean": 1.3464242219924927, "margin_dpo/margin_std": 2.1600122451782227, "step": 163 }, { "epoch": 0.24792139077853365, "fcm_dpo/beta": 0.6004210710525513, "fcm_dpo/delta": -0.14600637555122375, "fcm_dpo/margin": 1.6347507238388062, "fcm_dpo/q_t": 0.33791494369506836, "grad_norm": 91.74009704589844, "learning_rate": 4.6846234426744624e-07, "logits/chosen": 1.3953123092651367, "logits/rejected": 1.2430917024612427, "logps/chosen": -64.46735382080078, "logps/ref_chosen": -73.55902862548828, "logps/ref_rejected": -94.16201782226562, "logps/rejected": -86.705078125, "loss": 1.1363, "margin_dpo/margin_mean": 1.6347506046295166, "margin_dpo/margin_std": 2.7181103229522705, "step": 164 }, { "epoch": 0.2494331065759637, "fcm_dpo/beta": 0.5928429365158081, "fcm_dpo/delta": -0.08283931761980057, "fcm_dpo/margin": 1.5601141452789307, "fcm_dpo/q_t": 0.3488255739212036, "grad_norm": 81.30715942382812, "learning_rate": 4.678164332082175e-07, "logits/chosen": 1.9156373739242554, "logits/rejected": 1.759425401687622, "logps/chosen": -59.70091247558594, "logps/ref_chosen": -68.67132568359375, "logps/ref_rejected": -85.95689392089844, "logps/rejected": -78.54659271240234, "loss": 0.9951, "margin_dpo/margin_mean": 1.5601141452789307, "margin_dpo/margin_std": 2.4698634147644043, "step": 165 }, { "epoch": 0.2509448223733938, "fcm_dpo/beta": 0.6078928112983704, "fcm_dpo/delta": 0.17859025299549103, "fcm_dpo/margin": 0.6258453130722046, "fcm_dpo/q_t": 0.4346492290496826, "grad_norm": 114.55907440185547, "learning_rate": 4.6716442935512214e-07, "logits/chosen": 1.5327785015106201, "logits/rejected": 1.3427965641021729, "logps/chosen": -72.38050079345703, "logps/ref_chosen": -80.89755249023438, "logps/ref_rejected": -111.91075134277344, "logps/rejected": -104.01954650878906, "loss": 1.3994, "margin_dpo/margin_mean": 0.6258450746536255, "margin_dpo/margin_std": 2.1673123836517334, "step": 166 }, { "epoch": 0.25245653817082386, "fcm_dpo/beta": 0.6160829663276672, "fcm_dpo/delta": 0.1423880010843277, "fcm_dpo/margin": 1.1649137735366821, "fcm_dpo/q_t": 0.36513078212738037, "grad_norm": 88.68524932861328, "learning_rate": 4.6650635094610966e-07, "logits/chosen": 1.1840271949768066, "logits/rejected": 1.1251733303070068, "logps/chosen": -67.54503631591797, "logps/ref_chosen": -76.73136138916016, "logps/ref_rejected": -92.57389068603516, "logps/rejected": -84.55247497558594, "loss": 1.0499, "margin_dpo/margin_mean": 1.1649138927459717, "margin_dpo/margin_std": 1.8795359134674072, "step": 167 }, { "epoch": 0.25396825396825395, "fcm_dpo/beta": 0.6469905376434326, "fcm_dpo/delta": 0.22754508256912231, "fcm_dpo/margin": 0.9844677448272705, "fcm_dpo/q_t": 0.380914568901062, "grad_norm": 100.6047592163086, "learning_rate": 4.6584221638904767e-07, "logits/chosen": 1.5291261672973633, "logits/rejected": 1.4169994592666626, "logps/chosen": -74.01771545410156, "logps/ref_chosen": -82.63671112060547, "logps/ref_rejected": -96.72691345214844, "logps/rejected": -89.09239196777344, "loss": 1.1236, "margin_dpo/margin_mean": 0.9844681024551392, "margin_dpo/margin_std": 1.8431049585342407, "step": 168 }, { "epoch": 0.25547996976568405, "fcm_dpo/beta": 0.6636664867401123, "fcm_dpo/delta": 0.0465950183570385, "fcm_dpo/margin": 1.2143943309783936, "fcm_dpo/q_t": 0.372569739818573, "grad_norm": 111.19847106933594, "learning_rate": 4.651720442612075e-07, "logits/chosen": 1.8221468925476074, "logits/rejected": 1.8048608303070068, "logps/chosen": -69.81880950927734, "logps/ref_chosen": -78.87673950195312, "logps/ref_rejected": -94.18919372558594, "logps/rejected": -86.34565734863281, "loss": 1.126, "margin_dpo/margin_mean": 1.2143940925598145, "margin_dpo/margin_std": 2.2234106063842773, "step": 169 }, { "epoch": 0.25699168556311414, "fcm_dpo/beta": 0.6836942434310913, "fcm_dpo/delta": 0.17387819290161133, "fcm_dpo/margin": 1.0023448467254639, "fcm_dpo/q_t": 0.385806679725647, "grad_norm": 104.86164093017578, "learning_rate": 4.6449585330874425e-07, "logits/chosen": 1.6126837730407715, "logits/rejected": 1.6387670040130615, "logps/chosen": -64.28702545166016, "logps/ref_chosen": -73.35820007324219, "logps/ref_rejected": -76.85077667236328, "logps/rejected": -68.78193664550781, "loss": 1.2824, "margin_dpo/margin_mean": 1.0023449659347534, "margin_dpo/margin_std": 2.2842979431152344, "step": 170 }, { "epoch": 0.2585034013605442, "fcm_dpo/beta": 0.6622629165649414, "fcm_dpo/delta": -0.1891041398048401, "fcm_dpo/margin": 1.5381622314453125, "fcm_dpo/q_t": 0.3209994435310364, "grad_norm": 93.2053451538086, "learning_rate": 4.6381366244617224e-07, "logits/chosen": 1.8994969129562378, "logits/rejected": 1.752985954284668, "logps/chosen": -71.6744384765625, "logps/ref_chosen": -80.4322738647461, "logps/ref_rejected": -96.99999237060547, "logps/rejected": -89.78031921386719, "loss": 1.0006, "margin_dpo/margin_mean": 1.5381627082824707, "margin_dpo/margin_std": 2.266875743865967, "step": 171 }, { "epoch": 0.2600151171579743, "fcm_dpo/beta": 0.6701521873474121, "fcm_dpo/delta": 0.06850168108940125, "fcm_dpo/margin": 1.172062635421753, "fcm_dpo/q_t": 0.3691914975643158, "grad_norm": 105.6916275024414, "learning_rate": 4.631254907558365e-07, "logits/chosen": 1.7724663019180298, "logits/rejected": 1.6741005182266235, "logps/chosen": -61.7747802734375, "logps/ref_chosen": -70.45406341552734, "logps/ref_rejected": -99.85603332519531, "logps/rejected": -92.34881591796875, "loss": 1.169, "margin_dpo/margin_mean": 1.1720627546310425, "margin_dpo/margin_std": 2.2597615718841553, "step": 172 }, { "epoch": 0.2615268329554044, "fcm_dpo/beta": 0.6584789752960205, "fcm_dpo/delta": 0.04426664113998413, "fcm_dpo/margin": 1.2176408767700195, "fcm_dpo/q_t": 0.37218981981277466, "grad_norm": 106.22607421875, "learning_rate": 4.624313574873786e-07, "logits/chosen": 1.7763798236846924, "logits/rejected": 1.5477124452590942, "logps/chosen": -63.321632385253906, "logps/ref_chosen": -72.15026092529297, "logps/ref_rejected": -94.10212707519531, "logps/rejected": -86.49114990234375, "loss": 1.1903, "margin_dpo/margin_mean": 1.2176411151885986, "margin_dpo/margin_std": 2.281262159347534, "step": 173 }, { "epoch": 0.26303854875283444, "fcm_dpo/beta": 0.674136757850647, "fcm_dpo/delta": 0.03188333287835121, "fcm_dpo/margin": 1.2171550989151, "fcm_dpo/q_t": 0.3766389787197113, "grad_norm": 116.89978790283203, "learning_rate": 4.61731282057198e-07, "logits/chosen": 1.798130750656128, "logits/rejected": 1.6105579137802124, "logps/chosen": -67.50865936279297, "logps/ref_chosen": -75.99629211425781, "logps/ref_rejected": -106.2359619140625, "logps/rejected": -98.96548461914062, "loss": 1.2473, "margin_dpo/margin_mean": 1.2171552181243896, "margin_dpo/margin_std": 2.535811424255371, "step": 174 }, { "epoch": 0.26455026455026454, "fcm_dpo/beta": 0.6743377447128296, "fcm_dpo/delta": -0.04016567021608353, "fcm_dpo/margin": 1.3147974014282227, "fcm_dpo/q_t": 0.3643365800380707, "grad_norm": 117.52193450927734, "learning_rate": 4.6102528404790965e-07, "logits/chosen": 1.733808159828186, "logits/rejected": 1.673501968383789, "logps/chosen": -75.97535705566406, "logps/ref_chosen": -84.51177978515625, "logps/ref_rejected": -104.46299743652344, "logps/rejected": -97.24137115478516, "loss": 1.2056, "margin_dpo/margin_mean": 1.3147969245910645, "margin_dpo/margin_std": 2.6120011806488037, "step": 175 }, { "epoch": 0.2660619803476946, "fcm_dpo/beta": 0.665306031703949, "fcm_dpo/delta": -0.054544560611248016, "fcm_dpo/margin": 0.9671778678894043, "fcm_dpo/q_t": 0.404443621635437, "grad_norm": 131.4767608642578, "learning_rate": 4.603133832077953e-07, "logits/chosen": 1.8389785289764404, "logits/rejected": 1.755552053451538, "logps/chosen": -90.02412414550781, "logps/ref_chosen": -98.2034912109375, "logps/ref_rejected": -103.2023696899414, "logps/rejected": -95.99018859863281, "loss": 1.3566, "margin_dpo/margin_mean": 0.9671777486801147, "margin_dpo/margin_std": 2.518389940261841, "step": 176 }, { "epoch": 0.2675736961451247, "fcm_dpo/beta": 0.6090140342712402, "fcm_dpo/delta": -0.5656509399414062, "fcm_dpo/margin": 2.1946027278900146, "fcm_dpo/q_t": 0.2761075496673584, "grad_norm": 115.04895782470703, "learning_rate": 4.5959559945025183e-07, "logits/chosen": 1.9416172504425049, "logits/rejected": 1.738627314567566, "logps/chosen": -69.18362426757812, "logps/ref_chosen": -78.029541015625, "logps/ref_rejected": -112.57099914550781, "logps/rejected": -105.91969299316406, "loss": 0.8653, "margin_dpo/margin_mean": 2.19460391998291, "margin_dpo/margin_std": 2.5792174339294434, "step": 177 }, { "epoch": 0.2690854119425548, "fcm_dpo/beta": 0.5866925716400146, "fcm_dpo/delta": -0.008861862123012543, "fcm_dpo/margin": 1.458240032196045, "fcm_dpo/q_t": 0.3417511284351349, "grad_norm": 75.9957275390625, "learning_rate": 4.588719528532341e-07, "logits/chosen": 1.3384625911712646, "logits/rejected": 1.2324693202972412, "logps/chosen": -70.76741027832031, "logps/ref_chosen": -79.48869323730469, "logps/ref_rejected": -96.62449645996094, "logps/rejected": -89.3614501953125, "loss": 0.9873, "margin_dpo/margin_mean": 1.458240270614624, "margin_dpo/margin_std": 2.050736904144287, "step": 178 }, { "epoch": 0.2705971277399849, "fcm_dpo/beta": 0.6052649617195129, "fcm_dpo/delta": 0.13288062810897827, "fcm_dpo/margin": 1.2006174325942993, "fcm_dpo/q_t": 0.3806132972240448, "grad_norm": 95.76106262207031, "learning_rate": 4.581424636586928e-07, "logits/chosen": 1.5645568370819092, "logits/rejected": 1.5139702558517456, "logps/chosen": -75.68244934082031, "logps/ref_chosen": -84.5088119506836, "logps/ref_rejected": -93.07945251464844, "logps/rejected": -85.45369720458984, "loss": 1.209, "margin_dpo/margin_mean": 1.2006173133850098, "margin_dpo/margin_std": 2.5477375984191895, "step": 179 }, { "epoch": 0.272108843537415, "fcm_dpo/beta": 0.6288666725158691, "fcm_dpo/delta": 0.18441423773765564, "fcm_dpo/margin": 1.0763027667999268, "fcm_dpo/q_t": 0.38645124435424805, "grad_norm": 93.25940704345703, "learning_rate": 4.5740715227200897e-07, "logits/chosen": 1.276105284690857, "logits/rejected": 1.212243914604187, "logps/chosen": -65.58257293701172, "logps/ref_chosen": -74.5645523071289, "logps/ref_rejected": -81.02266693115234, "logps/rejected": -73.11698913574219, "loss": 1.1389, "margin_dpo/margin_mean": 1.0763027667999268, "margin_dpo/margin_std": 2.0827672481536865, "step": 180 }, { "epoch": 0.273620559334845, "fcm_dpo/beta": 0.6410256624221802, "fcm_dpo/delta": 0.06620515137910843, "fcm_dpo/margin": 1.2298375368118286, "fcm_dpo/q_t": 0.36326679587364197, "grad_norm": 93.19351196289062, "learning_rate": 4.566660392614228e-07, "logits/chosen": 1.4095215797424316, "logits/rejected": 1.3104197978973389, "logps/chosen": -69.90322875976562, "logps/ref_chosen": -78.77166748046875, "logps/ref_rejected": -98.29750061035156, "logps/rejected": -90.65890502929688, "loss": 1.0257, "margin_dpo/margin_mean": 1.2298375368118286, "margin_dpo/margin_std": 1.9370605945587158, "step": 181 }, { "epoch": 0.2751322751322751, "fcm_dpo/beta": 0.6139971613883972, "fcm_dpo/delta": -0.28054773807525635, "fcm_dpo/margin": 1.7906737327575684, "fcm_dpo/q_t": 0.32836031913757324, "grad_norm": 92.22091674804688, "learning_rate": 4.5591914535745817e-07, "logits/chosen": 1.8836357593536377, "logits/rejected": 1.7016699314117432, "logps/chosen": -66.96229553222656, "logps/ref_chosen": -75.67765045166016, "logps/ref_rejected": -107.47894287109375, "logps/rejected": -100.55426025390625, "loss": 0.9697, "margin_dpo/margin_mean": 1.7906737327575684, "margin_dpo/margin_std": 2.7334959506988525, "step": 182 }, { "epoch": 0.2766439909297052, "fcm_dpo/beta": 0.6289302706718445, "fcm_dpo/delta": 0.19042739272117615, "fcm_dpo/margin": 0.4961353540420532, "fcm_dpo/q_t": 0.4469048082828522, "grad_norm": 121.42922973632812, "learning_rate": 4.551664914523433e-07, "logits/chosen": 1.6563621759414673, "logits/rejected": 1.5986146926879883, "logps/chosen": -72.16058349609375, "logps/ref_chosen": -79.99969482421875, "logps/ref_rejected": -89.35220336914062, "logps/rejected": -82.00922393798828, "loss": 1.453, "margin_dpo/margin_mean": 0.4961353540420532, "margin_dpo/margin_std": 2.0346364974975586, "step": 183 }, { "epoch": 0.2781557067271353, "fcm_dpo/beta": 0.6417108178138733, "fcm_dpo/delta": 0.15971535444259644, "fcm_dpo/margin": 1.0936038494110107, "fcm_dpo/q_t": 0.36856314539909363, "grad_norm": 82.0419921875, "learning_rate": 4.544080985994258e-07, "logits/chosen": 2.0075020790100098, "logits/rejected": 1.8546810150146484, "logps/chosen": -53.30805206298828, "logps/ref_chosen": -62.133941650390625, "logps/ref_rejected": -84.44404602050781, "logps/rejected": -76.71176147460938, "loss": 1.0538, "margin_dpo/margin_mean": 1.0936038494110107, "margin_dpo/margin_std": 1.735282301902771, "step": 184 }, { "epoch": 0.2796674225245654, "fcm_dpo/beta": 0.6385919451713562, "fcm_dpo/delta": -0.0348266065120697, "fcm_dpo/margin": 1.3761906623840332, "fcm_dpo/q_t": 0.3491626977920532, "grad_norm": 88.21831512451172, "learning_rate": 4.5364398801258394e-07, "logits/chosen": 1.896065592765808, "logits/rejected": 1.775359869003296, "logps/chosen": -59.2283821105957, "logps/ref_chosen": -67.93174743652344, "logps/ref_rejected": -83.76744079589844, "logps/rejected": -76.44026184082031, "loss": 1.1267, "margin_dpo/margin_mean": 1.3761909008026123, "margin_dpo/margin_std": 2.3595733642578125, "step": 185 }, { "epoch": 0.2811791383219955, "fcm_dpo/beta": 0.6363253593444824, "fcm_dpo/delta": -0.0157480388879776, "fcm_dpo/margin": 1.3545132875442505, "fcm_dpo/q_t": 0.3593972623348236, "grad_norm": 102.75836181640625, "learning_rate": 4.5287418106563354e-07, "logits/chosen": 1.4891412258148193, "logits/rejected": 1.3690303564071655, "logps/chosen": -77.59123229980469, "logps/ref_chosen": -86.22174072265625, "logps/ref_rejected": -100.42019653320312, "logps/rejected": -93.14421081542969, "loss": 1.1483, "margin_dpo/margin_mean": 1.3545129299163818, "margin_dpo/margin_std": 2.477220058441162, "step": 186 }, { "epoch": 0.28269085411942557, "fcm_dpo/beta": 0.6449699401855469, "fcm_dpo/delta": 0.02151723951101303, "fcm_dpo/margin": 1.2872929573059082, "fcm_dpo/q_t": 0.3625199496746063, "grad_norm": 116.49866485595703, "learning_rate": 4.520986992917297e-07, "logits/chosen": 1.7953424453735352, "logits/rejected": 1.6858184337615967, "logps/chosen": -84.72871398925781, "logps/ref_chosen": -92.81202697753906, "logps/ref_rejected": -117.28926086425781, "logps/rejected": -110.49324035644531, "loss": 1.1079, "margin_dpo/margin_mean": 1.2872931957244873, "margin_dpo/margin_std": 2.2549121379852295, "step": 187 }, { "epoch": 0.2842025699168556, "fcm_dpo/beta": 0.6408007740974426, "fcm_dpo/delta": -0.14380419254302979, "fcm_dpo/margin": 1.5262317657470703, "fcm_dpo/q_t": 0.34180963039398193, "grad_norm": 95.1890869140625, "learning_rate": 4.5131756438276466e-07, "logits/chosen": 1.8667633533477783, "logits/rejected": 1.7648987770080566, "logps/chosen": -79.36767578125, "logps/ref_chosen": -87.85247802734375, "logps/ref_rejected": -94.58252716064453, "logps/rejected": -87.62394714355469, "loss": 0.9844, "margin_dpo/margin_mean": 1.5262320041656494, "margin_dpo/margin_std": 2.5105552673339844, "step": 188 }, { "epoch": 0.2857142857142857, "fcm_dpo/beta": 0.6031299829483032, "fcm_dpo/delta": -0.19629237055778503, "fcm_dpo/margin": 1.142228364944458, "fcm_dpo/q_t": 0.3792150020599365, "grad_norm": 118.79749298095703, "learning_rate": 4.5053079818876096e-07, "logits/chosen": 1.6882644891738892, "logits/rejected": 1.6798309087753296, "logps/chosen": -86.59615325927734, "logps/ref_chosen": -95.00414276123047, "logps/ref_rejected": -90.50090789794922, "logps/rejected": -83.23514556884766, "loss": 1.2151, "margin_dpo/margin_mean": 1.1422284841537476, "margin_dpo/margin_std": 2.214970588684082, "step": 189 }, { "epoch": 0.2872260015117158, "fcm_dpo/beta": 0.5990357398986816, "fcm_dpo/delta": -0.0020574182271957397, "fcm_dpo/margin": 1.4207065105438232, "fcm_dpo/q_t": 0.3320964574813843, "grad_norm": 89.09442138671875, "learning_rate": 4.4973842271726024e-07, "logits/chosen": 1.8460028171539307, "logits/rejected": 1.50532865524292, "logps/chosen": -62.2581787109375, "logps/ref_chosen": -70.79264831542969, "logps/ref_rejected": -122.56155395507812, "logps/rejected": -115.44779968261719, "loss": 0.9537, "margin_dpo/margin_mean": 1.4207061529159546, "margin_dpo/margin_std": 1.7832438945770264, "step": 190 }, { "epoch": 0.2887377173091459, "fcm_dpo/beta": 0.6087629795074463, "fcm_dpo/delta": 0.08328632265329361, "fcm_dpo/margin": 1.270153284072876, "fcm_dpo/q_t": 0.3589320778846741, "grad_norm": 111.36334991455078, "learning_rate": 4.48940460132708e-07, "logits/chosen": 1.5673696994781494, "logits/rejected": 1.4889506101608276, "logps/chosen": -84.18894958496094, "logps/ref_chosen": -92.15048217773438, "logps/ref_rejected": -106.4153060913086, "logps/rejected": -99.72392272949219, "loss": 1.0703, "margin_dpo/margin_mean": 1.2701534032821655, "margin_dpo/margin_std": 2.1253159046173096, "step": 191 }, { "epoch": 0.29024943310657597, "fcm_dpo/beta": 0.6490231156349182, "fcm_dpo/delta": 0.3352760076522827, "fcm_dpo/margin": 0.8153266906738281, "fcm_dpo/q_t": 0.40081295371055603, "grad_norm": 86.10425567626953, "learning_rate": 4.481369327558329e-07, "logits/chosen": 1.7814728021621704, "logits/rejected": 1.7212591171264648, "logps/chosen": -61.26172637939453, "logps/ref_chosen": -69.51527404785156, "logps/ref_rejected": -80.15898132324219, "logps/rejected": -72.72076416015625, "loss": 1.2172, "margin_dpo/margin_mean": 0.8153265714645386, "margin_dpo/margin_std": 1.8970856666564941, "step": 192 }, { "epoch": 0.29176114890400606, "fcm_dpo/beta": 0.6535841226577759, "fcm_dpo/delta": -0.020041286945343018, "fcm_dpo/margin": 1.3285222053527832, "fcm_dpo/q_t": 0.3438006043434143, "grad_norm": 87.5340805053711, "learning_rate": 4.47327863063023e-07, "logits/chosen": 1.6366324424743652, "logits/rejected": 1.6494388580322266, "logps/chosen": -64.92332458496094, "logps/ref_chosen": -73.43276977539062, "logps/ref_rejected": -77.81238555908203, "logps/rejected": -70.63145446777344, "loss": 0.9885, "margin_dpo/margin_mean": 1.3285223245620728, "margin_dpo/margin_std": 1.8894892930984497, "step": 193 }, { "epoch": 0.29327286470143615, "fcm_dpo/beta": 0.6577266454696655, "fcm_dpo/delta": 0.12170780450105667, "fcm_dpo/margin": 1.1193406581878662, "fcm_dpo/q_t": 0.3734307885169983, "grad_norm": 89.93775177001953, "learning_rate": 4.4651327368569684e-07, "logits/chosen": 1.660496711730957, "logits/rejected": 1.588250994682312, "logps/chosen": -68.1932144165039, "logps/ref_chosen": -76.63236999511719, "logps/ref_rejected": -85.67449188232422, "logps/rejected": -78.35467529296875, "loss": 1.14, "margin_dpo/margin_mean": 1.119341254234314, "margin_dpo/margin_std": 2.060373306274414, "step": 194 }, { "epoch": 0.2947845804988662, "fcm_dpo/beta": 0.6720187067985535, "fcm_dpo/delta": 0.09920601546764374, "fcm_dpo/margin": 1.1267728805541992, "fcm_dpo/q_t": 0.35634636878967285, "grad_norm": 107.51087188720703, "learning_rate": 4.4569318740967043e-07, "logits/chosen": 1.5862284898757935, "logits/rejected": 1.6139237880706787, "logps/chosen": -81.3615493774414, "logps/ref_chosen": -89.43354797363281, "logps/ref_rejected": -91.25908660888672, "logps/rejected": -84.31385803222656, "loss": 1.1097, "margin_dpo/margin_mean": 1.126772403717041, "margin_dpo/margin_std": 1.9978525638580322, "step": 195 }, { "epoch": 0.2962962962962963, "fcm_dpo/beta": 0.726308286190033, "fcm_dpo/delta": 0.357510507106781, "fcm_dpo/margin": 0.6964332461357117, "fcm_dpo/q_t": 0.4117005467414856, "grad_norm": 120.37006378173828, "learning_rate": 4.448676271745197e-07, "logits/chosen": 1.6787617206573486, "logits/rejected": 1.5786409378051758, "logps/chosen": -67.36844635009766, "logps/ref_chosen": -75.47528839111328, "logps/ref_rejected": -99.37582397460938, "logps/rejected": -91.96542358398438, "loss": 1.4766, "margin_dpo/margin_mean": 0.6964335441589355, "margin_dpo/margin_std": 2.46044921875, "step": 196 }, { "epoch": 0.29780801209372637, "fcm_dpo/beta": 0.736380398273468, "fcm_dpo/delta": -0.10233466327190399, "fcm_dpo/margin": 1.2729198932647705, "fcm_dpo/q_t": 0.3577408790588379, "grad_norm": 125.2252426147461, "learning_rate": 4.440366160729392e-07, "logits/chosen": 1.6729737520217896, "logits/rejected": 1.55661940574646, "logps/chosen": -59.2158088684082, "logps/ref_chosen": -67.57392883300781, "logps/ref_rejected": -89.97993469238281, "logps/rejected": -82.89472961425781, "loss": 1.3448, "margin_dpo/margin_mean": 1.2729198932647705, "margin_dpo/margin_std": 2.7377114295959473, "step": 197 }, { "epoch": 0.29931972789115646, "fcm_dpo/beta": 0.7087437510490417, "fcm_dpo/delta": -0.21124190092086792, "fcm_dpo/margin": 1.4629135131835938, "fcm_dpo/q_t": 0.31773099303245544, "grad_norm": 93.81770324707031, "learning_rate": 4.432001773500957e-07, "logits/chosen": 1.6051056385040283, "logits/rejected": 1.5143699645996094, "logps/chosen": -68.83720397949219, "logps/ref_chosen": -77.36013793945312, "logps/ref_rejected": -90.55670166015625, "logps/rejected": -83.49667358398438, "loss": 0.9198, "margin_dpo/margin_mean": 1.462914228439331, "margin_dpo/margin_std": 1.8346251249313354, "step": 198 }, { "epoch": 0.30083144368858655, "fcm_dpo/beta": 0.7020251750946045, "fcm_dpo/delta": 0.13893431425094604, "fcm_dpo/margin": 1.0273115634918213, "fcm_dpo/q_t": 0.3853752613067627, "grad_norm": 124.17066192626953, "learning_rate": 4.4235833440297856e-07, "logits/chosen": 1.7206742763519287, "logits/rejected": 1.4951976537704468, "logps/chosen": -65.01742553710938, "logps/ref_chosen": -73.05004119873047, "logps/ref_rejected": -95.21923065185547, "logps/rejected": -88.21392059326172, "loss": 1.3002, "margin_dpo/margin_mean": 1.0273126363754272, "margin_dpo/margin_std": 2.409886598587036, "step": 199 }, { "epoch": 0.30234315948601664, "fcm_dpo/beta": 0.6905279755592346, "fcm_dpo/delta": -0.07728119194507599, "fcm_dpo/margin": 1.3263179063796997, "fcm_dpo/q_t": 0.34874215722084045, "grad_norm": 118.3066177368164, "learning_rate": 4.415111107797445e-07, "logits/chosen": 1.7629958391189575, "logits/rejected": 1.5483076572418213, "logps/chosen": -65.42704010009766, "logps/ref_chosen": -73.75833129882812, "logps/ref_rejected": -105.00157165527344, "logps/rejected": -97.99659729003906, "loss": 1.1881, "margin_dpo/margin_mean": 1.3263182640075684, "margin_dpo/margin_std": 2.497131824493408, "step": 200 }, { "epoch": 0.30234315948601664, "eval_fcm_dpo/beta": 0.6969584226608276, "eval_logits/chosen": 1.8626530170440674, "eval_logits/rejected": 1.7409390211105347, "eval_logps/chosen": -78.61611938476562, "eval_logps/ref_chosen": -86.90177917480469, "eval_logps/ref_rejected": -96.69639587402344, "eval_logps/rejected": -89.6644287109375, "eval_loss": 0.5625263452529907, "eval_margin_dpo/margin_mean": 1.2537044286727905, "eval_margin_dpo/margin_std": 2.2270843982696533, "eval_runtime": 42.2527, "eval_samples_per_second": 54.505, "eval_steps_per_second": 1.704, "step": 200 }, { "epoch": 0.30385487528344673, "fcm_dpo/beta": 0.6900404691696167, "fcm_dpo/delta": -0.05886637419462204, "fcm_dpo/margin": 1.3092676401138306, "fcm_dpo/q_t": 0.34856978058815, "grad_norm": 118.63353729248047, "learning_rate": 4.4065853017905953e-07, "logits/chosen": 1.997534155845642, "logits/rejected": 1.8514721393585205, "logps/chosen": -71.470458984375, "logps/ref_chosen": -79.4841079711914, "logps/ref_rejected": -100.94435119628906, "logps/rejected": -94.2399673461914, "loss": 1.0364, "margin_dpo/margin_mean": 1.3092677593231201, "margin_dpo/margin_std": 2.063784122467041, "step": 201 }, { "epoch": 0.30536659108087677, "fcm_dpo/beta": 0.7053598165512085, "fcm_dpo/delta": 0.023715481162071228, "fcm_dpo/margin": 1.1626203060150146, "fcm_dpo/q_t": 0.358773797750473, "grad_norm": 105.97468566894531, "learning_rate": 4.3980061644943575e-07, "logits/chosen": 1.4268461465835571, "logits/rejected": 1.2712658643722534, "logps/chosen": -58.487152099609375, "logps/ref_chosen": -66.83952331542969, "logps/ref_rejected": -93.05116271972656, "logps/rejected": -85.86141204833984, "loss": 1.1519, "margin_dpo/margin_mean": 1.1626203060150146, "margin_dpo/margin_std": 2.15989351272583, "step": 202 }, { "epoch": 0.30687830687830686, "fcm_dpo/beta": 0.6896209120750427, "fcm_dpo/delta": -0.03713443875312805, "fcm_dpo/margin": 1.2816414833068848, "fcm_dpo/q_t": 0.3703385293483734, "grad_norm": 114.06891632080078, "learning_rate": 4.3893739358856455e-07, "logits/chosen": 1.7179381847381592, "logits/rejected": 1.4678292274475098, "logps/chosen": -72.00523376464844, "logps/ref_chosen": -80.32998657226562, "logps/ref_rejected": -113.52803039550781, "logps/rejected": -106.48490905761719, "loss": 1.1194, "margin_dpo/margin_mean": 1.2816411256790161, "margin_dpo/margin_std": 2.475268840789795, "step": 203 }, { "epoch": 0.30839002267573695, "fcm_dpo/beta": 0.6696390509605408, "fcm_dpo/delta": -0.03372015058994293, "fcm_dpo/margin": 1.3020836114883423, "fcm_dpo/q_t": 0.34320205450057983, "grad_norm": 91.20820617675781, "learning_rate": 4.380688857426449e-07, "logits/chosen": 1.6615278720855713, "logits/rejected": 1.502671241760254, "logps/chosen": -58.270694732666016, "logps/ref_chosen": -66.68875885009766, "logps/ref_rejected": -85.07585906982422, "logps/rejected": -77.95987701416016, "loss": 1.0448, "margin_dpo/margin_mean": 1.302083134651184, "margin_dpo/margin_std": 1.9596548080444336, "step": 204 }, { "epoch": 0.30990173847316704, "fcm_dpo/beta": 0.6933684945106506, "fcm_dpo/delta": 0.11131396889686584, "fcm_dpo/margin": 1.0774143934249878, "fcm_dpo/q_t": 0.3806332051753998, "grad_norm": 119.31201934814453, "learning_rate": 4.3719511720570814e-07, "logits/chosen": 1.9803879261016846, "logits/rejected": 1.8551146984100342, "logps/chosen": -78.51766204833984, "logps/ref_chosen": -86.51950073242188, "logps/ref_rejected": -112.55376434326172, "logps/rejected": -105.62934112548828, "loss": 1.2119, "margin_dpo/margin_mean": 1.0774142742156982, "margin_dpo/margin_std": 2.233020067214966, "step": 205 }, { "epoch": 0.31141345427059713, "fcm_dpo/beta": 0.688675045967102, "fcm_dpo/delta": -0.13103625178337097, "fcm_dpo/margin": 0.9063512086868286, "fcm_dpo/q_t": 0.42519694566726685, "grad_norm": 138.00732421875, "learning_rate": 4.363161124189387e-07, "logits/chosen": 2.0987722873687744, "logits/rejected": 2.042625665664673, "logps/chosen": -80.69242095947266, "logps/ref_chosen": -88.68557739257812, "logps/ref_rejected": -97.75945281982422, "logps/rejected": -90.67264556884766, "loss": 1.4227, "margin_dpo/margin_mean": 0.9063505530357361, "margin_dpo/margin_std": 2.7547712326049805, "step": 206 }, { "epoch": 0.3129251700680272, "fcm_dpo/beta": 0.6832928657531738, "fcm_dpo/delta": 0.0868806540966034, "fcm_dpo/margin": 1.1258772611618042, "fcm_dpo/q_t": 0.3708181083202362, "grad_norm": 113.93563842773438, "learning_rate": 4.3543189596998986e-07, "logits/chosen": 1.40671968460083, "logits/rejected": 1.2092251777648926, "logps/chosen": -77.26618957519531, "logps/ref_chosen": -85.12134552001953, "logps/ref_rejected": -103.34955596923828, "logps/rejected": -96.62027740478516, "loss": 1.1403, "margin_dpo/margin_mean": 1.1258769035339355, "margin_dpo/margin_std": 2.300265073776245, "step": 207 }, { "epoch": 0.3144368858654573, "fcm_dpo/beta": 0.7332829833030701, "fcm_dpo/delta": 0.37100207805633545, "fcm_dpo/margin": 0.6764590740203857, "fcm_dpo/q_t": 0.42520248889923096, "grad_norm": 134.1845245361328, "learning_rate": 4.3454249259229664e-07, "logits/chosen": 1.4971542358398438, "logits/rejected": 1.4829304218292236, "logps/chosen": -70.43374633789062, "logps/ref_chosen": -78.84121704101562, "logps/ref_rejected": -89.82504272460938, "logps/rejected": -82.09403991699219, "loss": 1.4158, "margin_dpo/margin_mean": 0.6764594316482544, "margin_dpo/margin_std": 2.1062352657318115, "step": 208 }, { "epoch": 0.31594860166288735, "fcm_dpo/beta": 0.7302354574203491, "fcm_dpo/delta": -0.23841455578804016, "fcm_dpo/margin": 1.4544109106063843, "fcm_dpo/q_t": 0.33910322189331055, "grad_norm": 112.4314193725586, "learning_rate": 4.336479271643833e-07, "logits/chosen": 1.6004903316497803, "logits/rejected": 1.5286908149719238, "logps/chosen": -77.70037841796875, "logps/ref_chosen": -85.98588562011719, "logps/ref_rejected": -107.1638412475586, "logps/rejected": -100.33273315429688, "loss": 1.0837, "margin_dpo/margin_mean": 1.4544110298156738, "margin_dpo/margin_std": 2.3539958000183105, "step": 209 }, { "epoch": 0.31746031746031744, "fcm_dpo/beta": 0.6787157654762268, "fcm_dpo/delta": -0.3772972822189331, "fcm_dpo/margin": 1.7446019649505615, "fcm_dpo/q_t": 0.3218177258968353, "grad_norm": 100.42723083496094, "learning_rate": 4.327482247091679e-07, "logits/chosen": 1.6652277708053589, "logits/rejected": 1.4687567949295044, "logps/chosen": -63.37964630126953, "logps/ref_chosen": -71.75653076171875, "logps/ref_rejected": -102.47966003417969, "logps/rejected": -95.84736633300781, "loss": 1.0083, "margin_dpo/margin_mean": 1.7446017265319824, "margin_dpo/margin_std": 2.5480265617370605, "step": 210 }, { "epoch": 0.31897203325774753, "fcm_dpo/beta": 0.6792653799057007, "fcm_dpo/delta": 0.14284920692443848, "fcm_dpo/margin": 1.0514057874679565, "fcm_dpo/q_t": 0.3711149990558624, "grad_norm": 112.68789672851562, "learning_rate": 4.3184341039326217e-07, "logits/chosen": 1.7981252670288086, "logits/rejected": 1.5915374755859375, "logps/chosen": -62.572628021240234, "logps/ref_chosen": -70.95170593261719, "logps/ref_rejected": -108.51902770996094, "logps/rejected": -101.19136047363281, "loss": 1.1428, "margin_dpo/margin_mean": 1.0514049530029297, "margin_dpo/margin_std": 2.041271209716797, "step": 211 }, { "epoch": 0.3204837490551776, "fcm_dpo/beta": 0.660285472869873, "fcm_dpo/delta": -0.15960374474525452, "fcm_dpo/margin": 1.5043416023254395, "fcm_dpo/q_t": 0.32801175117492676, "grad_norm": 99.5758056640625, "learning_rate": 4.309335095262675e-07, "logits/chosen": 1.6569452285766602, "logits/rejected": 1.533679723739624, "logps/chosen": -65.77145385742188, "logps/ref_chosen": -74.34010314941406, "logps/ref_rejected": -97.58259582519531, "logps/rejected": -90.5182876586914, "loss": 0.9946, "margin_dpo/margin_mean": 1.5043418407440186, "margin_dpo/margin_std": 2.18064022064209, "step": 212 }, { "epoch": 0.3219954648526077, "fcm_dpo/beta": 0.6624789237976074, "fcm_dpo/delta": 0.05524115264415741, "fcm_dpo/margin": 1.2062104940414429, "fcm_dpo/q_t": 0.3639560341835022, "grad_norm": 110.04399108886719, "learning_rate": 4.3001854756006724e-07, "logits/chosen": 1.7112276554107666, "logits/rejected": 1.722957968711853, "logps/chosen": -71.81007385253906, "logps/ref_chosen": -80.2526626586914, "logps/ref_rejected": -94.76947021484375, "logps/rejected": -87.5330810546875, "loss": 1.1933, "margin_dpo/margin_mean": 1.2062103748321533, "margin_dpo/margin_std": 2.391385555267334, "step": 213 }, { "epoch": 0.3235071806500378, "fcm_dpo/beta": 0.6735548973083496, "fcm_dpo/delta": 0.07444822043180466, "fcm_dpo/margin": 1.1592483520507812, "fcm_dpo/q_t": 0.3672788441181183, "grad_norm": 105.3218765258789, "learning_rate": 4.290985500881143e-07, "logits/chosen": 1.4524340629577637, "logits/rejected": 1.394425392150879, "logps/chosen": -69.37520599365234, "logps/ref_chosen": -77.9675064086914, "logps/ref_rejected": -84.0354232788086, "logps/rejected": -76.60237121582031, "loss": 1.0677, "margin_dpo/margin_mean": 1.1592485904693604, "margin_dpo/margin_std": 2.0362842082977295, "step": 214 }, { "epoch": 0.3250188964474679, "fcm_dpo/beta": 0.6623063087463379, "fcm_dpo/delta": -0.1859441101551056, "fcm_dpo/margin": 1.535156011581421, "fcm_dpo/q_t": 0.3248975872993469, "grad_norm": 98.75223541259766, "learning_rate": 4.281735428447157e-07, "logits/chosen": 1.5025854110717773, "logits/rejected": 1.2978618144989014, "logps/chosen": -72.98377990722656, "logps/ref_chosen": -81.2047348022461, "logps/ref_rejected": -116.18414306640625, "logps/rejected": -109.49835205078125, "loss": 0.9512, "margin_dpo/margin_mean": 1.53515625, "margin_dpo/margin_std": 2.0457441806793213, "step": 215 }, { "epoch": 0.32653061224489793, "fcm_dpo/beta": 0.6392388343811035, "fcm_dpo/delta": -0.187744140625, "fcm_dpo/margin": 1.5922799110412598, "fcm_dpo/q_t": 0.34065473079681396, "grad_norm": 88.64974975585938, "learning_rate": 4.2724355170431247e-07, "logits/chosen": 2.229978084564209, "logits/rejected": 1.9987678527832031, "logps/chosen": -75.49063873291016, "logps/ref_chosen": -83.57113647460938, "logps/ref_rejected": -112.51902770996094, "logps/rejected": -106.03080749511719, "loss": 1.0216, "margin_dpo/margin_mean": 1.5922796726226807, "margin_dpo/margin_std": 2.530792713165283, "step": 216 }, { "epoch": 0.328042328042328, "fcm_dpo/beta": 0.618314266204834, "fcm_dpo/delta": -0.05883919447660446, "fcm_dpo/margin": 1.4609345197677612, "fcm_dpo/q_t": 0.36006930470466614, "grad_norm": 106.62305450439453, "learning_rate": 4.26308602680756e-07, "logits/chosen": 1.7996132373809814, "logits/rejected": 1.5775671005249023, "logps/chosen": -69.00894165039062, "logps/ref_chosen": -77.01390075683594, "logps/ref_rejected": -105.28099822998047, "logps/rejected": -98.73696899414062, "loss": 1.0933, "margin_dpo/margin_mean": 1.460935115814209, "margin_dpo/margin_std": 2.555617332458496, "step": 217 }, { "epoch": 0.3295540438397581, "fcm_dpo/beta": 0.6417362093925476, "fcm_dpo/delta": 0.20055294036865234, "fcm_dpo/margin": 1.028103232383728, "fcm_dpo/q_t": 0.3811229467391968, "grad_norm": 99.2293472290039, "learning_rate": 4.253687219265803e-07, "logits/chosen": 1.507783055305481, "logits/rejected": 1.506219744682312, "logps/chosen": -84.46420288085938, "logps/ref_chosen": -92.47299194335938, "logps/ref_rejected": -92.80751037597656, "logps/rejected": -85.82682800292969, "loss": 1.1278, "margin_dpo/margin_mean": 1.0281034708023071, "margin_dpo/margin_std": 1.8990416526794434, "step": 218 }, { "epoch": 0.3310657596371882, "fcm_dpo/beta": 0.6548258066177368, "fcm_dpo/delta": 0.08293704688549042, "fcm_dpo/margin": 1.1794757843017578, "fcm_dpo/q_t": 0.3652268648147583, "grad_norm": 90.89139556884766, "learning_rate": 4.2442393573227043e-07, "logits/chosen": 1.5002977848052979, "logits/rejected": 1.4157586097717285, "logps/chosen": -68.95957946777344, "logps/ref_chosen": -77.10382080078125, "logps/ref_rejected": -92.3438949584961, "logps/rejected": -85.37913513183594, "loss": 1.1377, "margin_dpo/margin_mean": 1.1794754266738892, "margin_dpo/margin_std": 2.164668083190918, "step": 219 }, { "epoch": 0.3325774754346183, "fcm_dpo/beta": 0.6796347498893738, "fcm_dpo/delta": 0.1952458620071411, "fcm_dpo/margin": 0.9770975708961487, "fcm_dpo/q_t": 0.39049315452575684, "grad_norm": 91.66635131835938, "learning_rate": 4.234742705255272e-07, "logits/chosen": 2.009399890899658, "logits/rejected": 1.8451021909713745, "logps/chosen": -54.451942443847656, "logps/ref_chosen": -62.48021697998047, "logps/ref_rejected": -86.93276977539062, "logps/rejected": -79.881591796875, "loss": 1.2308, "margin_dpo/margin_mean": 0.9770973920822144, "margin_dpo/margin_std": 2.1490917205810547, "step": 220 }, { "epoch": 0.3340891912320484, "fcm_dpo/beta": 0.6868494153022766, "fcm_dpo/delta": 0.022192861884832382, "fcm_dpo/margin": 1.206336259841919, "fcm_dpo/q_t": 0.36380764842033386, "grad_norm": 109.6985092163086, "learning_rate": 4.22519752870528e-07, "logits/chosen": 1.804550051689148, "logits/rejected": 1.6219594478607178, "logps/chosen": -70.11363220214844, "logps/ref_chosen": -78.35491943359375, "logps/ref_rejected": -108.17631530761719, "logps/rejected": -101.141357421875, "loss": 1.1374, "margin_dpo/margin_mean": 1.2063356637954712, "margin_dpo/margin_std": 2.2121567726135254, "step": 221 }, { "epoch": 0.3356009070294785, "fcm_dpo/beta": 0.6856993436813354, "fcm_dpo/delta": -0.011833667755126953, "fcm_dpo/margin": 1.254509687423706, "fcm_dpo/q_t": 0.3495626151561737, "grad_norm": 115.02777862548828, "learning_rate": 4.2156040946718343e-07, "logits/chosen": 2.041644334793091, "logits/rejected": 1.7993850708007812, "logps/chosen": -69.54031372070312, "logps/ref_chosen": -77.2734375, "logps/ref_rejected": -126.41007995605469, "logps/rejected": -119.93147277832031, "loss": 1.0305, "margin_dpo/margin_mean": 1.2545100450515747, "margin_dpo/margin_std": 1.9318989515304565, "step": 222 }, { "epoch": 0.3371126228269085, "fcm_dpo/beta": 0.6608577966690063, "fcm_dpo/delta": -0.19959528744220734, "fcm_dpo/margin": 1.5568513870239258, "fcm_dpo/q_t": 0.31522971391677856, "grad_norm": 93.16920471191406, "learning_rate": 4.2059626715039065e-07, "logits/chosen": 1.7795042991638184, "logits/rejected": 1.6519936323165894, "logps/chosen": -70.11776733398438, "logps/ref_chosen": -78.4210205078125, "logps/ref_rejected": -101.38420867919922, "logps/rejected": -94.63780975341797, "loss": 0.8706, "margin_dpo/margin_mean": 1.5568515062332153, "margin_dpo/margin_std": 1.8883434534072876, "step": 223 }, { "epoch": 0.3386243386243386, "fcm_dpo/beta": 0.6652363538742065, "fcm_dpo/delta": 0.10752552002668381, "fcm_dpo/margin": 1.128204107284546, "fcm_dpo/q_t": 0.362488716840744, "grad_norm": 107.10758972167969, "learning_rate": 4.1962735288928304e-07, "logits/chosen": 2.056093454360962, "logits/rejected": 1.9930920600891113, "logps/chosen": -71.17828369140625, "logps/ref_chosen": -79.36337280273438, "logps/ref_rejected": -89.99789428710938, "logps/rejected": -82.94100952148438, "loss": 1.0344, "margin_dpo/margin_mean": 1.128204584121704, "margin_dpo/margin_std": 1.7746167182922363, "step": 224 }, { "epoch": 0.3401360544217687, "fcm_dpo/beta": 0.662153422832489, "fcm_dpo/delta": 0.008250989019870758, "fcm_dpo/margin": 1.2687159776687622, "fcm_dpo/q_t": 0.35935088992118835, "grad_norm": 137.93043518066406, "learning_rate": 4.186536937864752e-07, "logits/chosen": 1.6968061923980713, "logits/rejected": 1.4539750814437866, "logps/chosen": -81.02485656738281, "logps/ref_chosen": -88.99606323242188, "logps/ref_rejected": -127.55032348632812, "logps/rejected": -120.84783935546875, "loss": 1.1876, "margin_dpo/margin_mean": 1.2687162160873413, "margin_dpo/margin_std": 2.3828163146972656, "step": 225 }, { "epoch": 0.3416477702191988, "fcm_dpo/beta": 0.6740131378173828, "fcm_dpo/delta": 0.04441865161061287, "fcm_dpo/margin": 1.2005562782287598, "fcm_dpo/q_t": 0.3622625172138214, "grad_norm": 88.77747344970703, "learning_rate": 4.176753170773052e-07, "logits/chosen": 1.8422725200653076, "logits/rejected": 1.749298095703125, "logps/chosen": -60.43705368041992, "logps/ref_chosen": -68.68444061279297, "logps/ref_rejected": -85.81898498535156, "logps/rejected": -78.77215576171875, "loss": 1.1248, "margin_dpo/margin_mean": 1.2005561590194702, "margin_dpo/margin_std": 2.140181541442871, "step": 226 }, { "epoch": 0.3431594860166289, "fcm_dpo/beta": 0.6707133054733276, "fcm_dpo/delta": 0.00018239766359329224, "fcm_dpo/margin": 1.2653456926345825, "fcm_dpo/q_t": 0.35821062326431274, "grad_norm": 108.87771606445312, "learning_rate": 4.166922501290729e-07, "logits/chosen": 1.8412506580352783, "logits/rejected": 1.7449533939361572, "logps/chosen": -64.33416748046875, "logps/ref_chosen": -72.52029418945312, "logps/ref_rejected": -90.7720718383789, "logps/rejected": -83.85128784179688, "loss": 1.1363, "margin_dpo/margin_mean": 1.265345811843872, "margin_dpo/margin_std": 2.2258872985839844, "step": 227 }, { "epoch": 0.34467120181405897, "fcm_dpo/beta": 0.6848806142807007, "fcm_dpo/delta": 0.057742100208997726, "fcm_dpo/margin": 1.162534475326538, "fcm_dpo/q_t": 0.36188948154449463, "grad_norm": 101.69074249267578, "learning_rate": 4.1570452044027405e-07, "logits/chosen": 1.739363431930542, "logits/rejected": 1.585823893547058, "logps/chosen": -64.13141632080078, "logps/ref_chosen": -72.23167419433594, "logps/ref_rejected": -95.45873260498047, "logps/rejected": -88.52101135253906, "loss": 1.1563, "margin_dpo/margin_mean": 1.1625350713729858, "margin_dpo/margin_std": 2.186185598373413, "step": 228 }, { "epoch": 0.34618291761148906, "fcm_dpo/beta": 0.6733275651931763, "fcm_dpo/delta": -0.10372693836688995, "fcm_dpo/margin": 1.4018828868865967, "fcm_dpo/q_t": 0.3341498374938965, "grad_norm": 94.96331787109375, "learning_rate": 4.147121556398312e-07, "logits/chosen": 1.7647275924682617, "logits/rejected": 1.5876259803771973, "logps/chosen": -58.482269287109375, "logps/ref_chosen": -66.88822174072266, "logps/ref_rejected": -92.27890014648438, "logps/rejected": -85.27483367919922, "loss": 1.0192, "margin_dpo/margin_mean": 1.4018831253051758, "margin_dpo/margin_std": 2.1348557472229004, "step": 229 }, { "epoch": 0.3476946334089191, "fcm_dpo/beta": 0.6939650774002075, "fcm_dpo/delta": 0.12067630141973495, "fcm_dpo/margin": 1.052196741104126, "fcm_dpo/q_t": 0.3544687330722809, "grad_norm": 114.52188110351562, "learning_rate": 4.137151834863213e-07, "logits/chosen": 1.8524203300476074, "logits/rejected": 1.8436353206634521, "logps/chosen": -67.94467163085938, "logps/ref_chosen": -76.12332153320312, "logps/ref_rejected": -78.19171905517578, "logps/rejected": -71.06526947021484, "loss": 1.1746, "margin_dpo/margin_mean": 1.0521972179412842, "margin_dpo/margin_std": 2.023137092590332, "step": 230 }, { "epoch": 0.3492063492063492, "fcm_dpo/beta": 0.6795934438705444, "fcm_dpo/delta": -0.047244250774383545, "fcm_dpo/margin": 1.3137178421020508, "fcm_dpo/q_t": 0.3336409330368042, "grad_norm": 117.06100463867188, "learning_rate": 4.1271363186719835e-07, "logits/chosen": 1.5964305400848389, "logits/rejected": 1.5918617248535156, "logps/chosen": -84.60713195800781, "logps/ref_chosen": -92.45181274414062, "logps/ref_rejected": -100.89735412597656, "logps/rejected": -94.36639404296875, "loss": 1.0741, "margin_dpo/margin_mean": 1.3137177228927612, "margin_dpo/margin_std": 2.144465923309326, "step": 231 }, { "epoch": 0.3507180650037793, "fcm_dpo/beta": 0.7037328481674194, "fcm_dpo/delta": 0.2652207016944885, "fcm_dpo/margin": 0.8546900749206543, "fcm_dpo/q_t": 0.4035346508026123, "grad_norm": 234.2224884033203, "learning_rate": 4.1170752879801436e-07, "logits/chosen": 1.4689631462097168, "logits/rejected": 1.4187445640563965, "logps/chosen": -78.85874938964844, "logps/ref_chosen": -86.75383758544922, "logps/ref_rejected": -98.16909790039062, "logps/rejected": -91.12869262695312, "loss": 1.4459, "margin_dpo/margin_mean": 0.854690432548523, "margin_dpo/margin_std": 2.5719518661499023, "step": 232 }, { "epoch": 0.35222978080120937, "fcm_dpo/beta": 0.6840606331825256, "fcm_dpo/delta": -0.23278965055942535, "fcm_dpo/margin": 1.099263310432434, "fcm_dpo/q_t": 0.37205249071121216, "grad_norm": 110.37529754638672, "learning_rate": 4.106969024216348e-07, "logits/chosen": 1.5777666568756104, "logits/rejected": 1.4945855140686035, "logps/chosen": -64.928955078125, "logps/ref_chosen": -72.87556457519531, "logps/ref_rejected": -85.22943115234375, "logps/rejected": -78.38209533691406, "loss": 1.197, "margin_dpo/margin_mean": 1.0992629528045654, "margin_dpo/margin_std": 2.0313029289245605, "step": 233 }, { "epoch": 0.35374149659863946, "fcm_dpo/beta": 0.701070249080658, "fcm_dpo/delta": 0.08123414218425751, "fcm_dpo/margin": 1.099867820739746, "fcm_dpo/q_t": 0.36371809244155884, "grad_norm": 95.63811492919922, "learning_rate": 4.09681781007452e-07, "logits/chosen": 1.4570426940917969, "logits/rejected": 1.408102035522461, "logps/chosen": -61.800174713134766, "logps/ref_chosen": -70.05477905273438, "logps/ref_rejected": -68.7240982055664, "logps/rejected": -61.569358825683594, "loss": 1.1211, "margin_dpo/margin_mean": 1.099867820739746, "margin_dpo/margin_std": 1.8799715042114258, "step": 234 }, { "epoch": 0.35525321239606955, "fcm_dpo/beta": 0.6795494556427002, "fcm_dpo/delta": -0.1432761698961258, "fcm_dpo/margin": 1.440608024597168, "fcm_dpo/q_t": 0.316908061504364, "grad_norm": 107.87815856933594, "learning_rate": 4.08662192950594e-07, "logits/chosen": 1.609418511390686, "logits/rejected": 1.5853018760681152, "logps/chosen": -77.45685577392578, "logps/ref_chosen": -85.86051940917969, "logps/ref_rejected": -96.14663696289062, "logps/rejected": -89.18359375, "loss": 0.9073, "margin_dpo/margin_mean": 1.4406075477600098, "margin_dpo/margin_std": 1.7774099111557007, "step": 235 }, { "epoch": 0.35676492819349964, "fcm_dpo/beta": 0.6827093362808228, "fcm_dpo/delta": 0.08276916295289993, "fcm_dpo/margin": 1.133293628692627, "fcm_dpo/q_t": 0.36910420656204224, "grad_norm": 112.19277954101562, "learning_rate": 4.076381667711306e-07, "logits/chosen": 1.8993335962295532, "logits/rejected": 1.8813176155090332, "logps/chosen": -81.87387084960938, "logps/ref_chosen": -89.75252532958984, "logps/ref_rejected": -99.28534698486328, "logps/rejected": -92.53997802734375, "loss": 1.1588, "margin_dpo/margin_mean": 1.133293867111206, "margin_dpo/margin_std": 2.1710267066955566, "step": 236 }, { "epoch": 0.35827664399092973, "fcm_dpo/beta": 0.7253872156143188, "fcm_dpo/delta": 0.3865929841995239, "fcm_dpo/margin": 0.6659508943557739, "fcm_dpo/q_t": 0.415424644947052, "grad_norm": 137.00572204589844, "learning_rate": 4.066097311132753e-07, "logits/chosen": 1.6266775131225586, "logits/rejected": 1.6260457038879395, "logps/chosen": -84.624267578125, "logps/ref_chosen": -92.59001922607422, "logps/ref_rejected": -101.45584869384766, "logps/rejected": -94.15604400634766, "loss": 1.3267, "margin_dpo/margin_mean": 0.6659514904022217, "margin_dpo/margin_std": 1.8473809957504272, "step": 237 }, { "epoch": 0.35978835978835977, "fcm_dpo/beta": 0.7275218963623047, "fcm_dpo/delta": -0.08721604943275452, "fcm_dpo/margin": 1.275065302848816, "fcm_dpo/q_t": 0.3357663154602051, "grad_norm": 118.53727722167969, "learning_rate": 4.0557691474458414e-07, "logits/chosen": 1.514404058456421, "logits/rejected": 1.5060009956359863, "logps/chosen": -73.95974731445312, "logps/ref_chosen": -82.2470474243164, "logps/ref_rejected": -92.59944152832031, "logps/rejected": -85.58721923828125, "loss": 1.0485, "margin_dpo/margin_mean": 1.2750656604766846, "margin_dpo/margin_std": 1.9951952695846558, "step": 238 }, { "epoch": 0.36130007558578986, "fcm_dpo/beta": 0.7322613596916199, "fcm_dpo/delta": -0.01715192198753357, "fcm_dpo/margin": 1.181195616722107, "fcm_dpo/q_t": 0.35318899154663086, "grad_norm": 118.10260009765625, "learning_rate": 4.045397465551513e-07, "logits/chosen": 2.0101892948150635, "logits/rejected": 1.724487543106079, "logps/chosen": -67.31920623779297, "logps/ref_chosen": -75.30878448486328, "logps/ref_rejected": -131.2318115234375, "logps/rejected": -124.42342376708984, "loss": 1.087, "margin_dpo/margin_mean": 1.1811952590942383, "margin_dpo/margin_std": 1.9706592559814453, "step": 239 }, { "epoch": 0.36281179138321995, "fcm_dpo/beta": 0.714773416519165, "fcm_dpo/delta": -0.23679864406585693, "fcm_dpo/margin": 1.4823143482208252, "fcm_dpo/q_t": 0.31994450092315674, "grad_norm": 102.3418197631836, "learning_rate": 4.0349825555680045e-07, "logits/chosen": 1.5584038496017456, "logits/rejected": 1.364628553390503, "logps/chosen": -62.70786666870117, "logps/ref_chosen": -70.81785583496094, "logps/ref_rejected": -98.53778076171875, "logps/rejected": -91.91011047363281, "loss": 0.9887, "margin_dpo/margin_mean": 1.4823133945465088, "margin_dpo/margin_std": 2.0326457023620605, "step": 240 }, { "epoch": 0.36432350718065004, "fcm_dpo/beta": 0.7306882739067078, "fcm_dpo/delta": 0.3302416205406189, "fcm_dpo/margin": 0.7337081432342529, "fcm_dpo/q_t": 0.41419199109077454, "grad_norm": 125.35552978515625, "learning_rate": 4.0245247088227377e-07, "logits/chosen": 1.4376188516616821, "logits/rejected": 1.402127981185913, "logps/chosen": -80.48194885253906, "logps/ref_chosen": -88.60260772705078, "logps/ref_rejected": -101.42214965820312, "logps/rejected": -94.03518676757812, "loss": 1.2775, "margin_dpo/margin_mean": 0.7337080240249634, "margin_dpo/margin_std": 1.8403754234313965, "step": 241 }, { "epoch": 0.36583522297808013, "fcm_dpo/beta": 0.6982107162475586, "fcm_dpo/delta": -0.33571815490722656, "fcm_dpo/margin": 1.6368021965026855, "fcm_dpo/q_t": 0.30604660511016846, "grad_norm": 85.99467468261719, "learning_rate": 4.0140242178441665e-07, "logits/chosen": 1.5356554985046387, "logits/rejected": 1.451372742652893, "logps/chosen": -68.53840637207031, "logps/ref_chosen": -77.34110260009766, "logps/ref_rejected": -84.76332092285156, "logps/rejected": -77.59742736816406, "loss": 0.8799, "margin_dpo/margin_mean": 1.636801838874817, "margin_dpo/margin_std": 2.0876946449279785, "step": 242 }, { "epoch": 0.3673469387755102, "fcm_dpo/beta": 0.6918625831604004, "fcm_dpo/delta": 0.05498592555522919, "fcm_dpo/margin": 1.153847336769104, "fcm_dpo/q_t": 0.3613772988319397, "grad_norm": 119.00396728515625, "learning_rate": 4.003481376353596e-07, "logits/chosen": 1.7110121250152588, "logits/rejected": 1.7173479795455933, "logps/chosen": -85.46659088134766, "logps/ref_chosen": -93.55897521972656, "logps/ref_rejected": -89.33551025390625, "logps/rejected": -82.39696502685547, "loss": 1.0625, "margin_dpo/margin_mean": 1.1538474559783936, "margin_dpo/margin_std": 1.8430607318878174, "step": 243 }, { "epoch": 0.3688586545729403, "fcm_dpo/beta": 0.665020227432251, "fcm_dpo/delta": -0.3315753936767578, "fcm_dpo/margin": 1.7183572053909302, "fcm_dpo/q_t": 0.27718719840049744, "grad_norm": 68.53643798828125, "learning_rate": 3.9928964792569654e-07, "logits/chosen": 1.6703214645385742, "logits/rejected": 1.5628724098205566, "logps/chosen": -61.40729904174805, "logps/ref_chosen": -69.82603454589844, "logps/ref_rejected": -92.4764175415039, "logps/rejected": -85.77603149414062, "loss": 0.7236, "margin_dpo/margin_mean": 1.7183579206466675, "margin_dpo/margin_std": 1.5396392345428467, "step": 244 }, { "epoch": 0.37037037037037035, "fcm_dpo/beta": 0.6507720947265625, "fcm_dpo/delta": -0.028134608641266823, "fcm_dpo/margin": 1.34529709815979, "fcm_dpo/q_t": 0.34099605679512024, "grad_norm": 118.12181854248047, "learning_rate": 3.982269822636601e-07, "logits/chosen": 1.7217857837677002, "logits/rejected": 1.6632800102233887, "logps/chosen": -77.65235900878906, "logps/ref_chosen": -85.68216705322266, "logps/ref_rejected": -93.8754653930664, "logps/rejected": -87.19094848632812, "loss": 1.0423, "margin_dpo/margin_mean": 1.3452973365783691, "margin_dpo/margin_std": 2.0852465629577637, "step": 245 }, { "epoch": 0.37188208616780044, "fcm_dpo/beta": 0.6409244537353516, "fcm_dpo/delta": -0.06382192671298981, "fcm_dpo/margin": 1.4147106409072876, "fcm_dpo/q_t": 0.3571142554283142, "grad_norm": 108.5011978149414, "learning_rate": 3.971601703742932e-07, "logits/chosen": 1.762617826461792, "logits/rejected": 1.6464555263519287, "logps/chosen": -82.2135238647461, "logps/ref_chosen": -90.05093383789062, "logps/ref_rejected": -112.77645874023438, "logps/rejected": -106.353759765625, "loss": 1.0714, "margin_dpo/margin_mean": 1.4147106409072876, "margin_dpo/margin_std": 2.4063146114349365, "step": 246 }, { "epoch": 0.37339380196523053, "fcm_dpo/beta": 0.6898531913757324, "fcm_dpo/delta": 0.45279592275619507, "fcm_dpo/margin": 0.6013703346252441, "fcm_dpo/q_t": 0.4185771942138672, "grad_norm": 155.9046630859375, "learning_rate": 3.960892420986177e-07, "logits/chosen": 1.8204069137573242, "logits/rejected": 1.7708896398544312, "logps/chosen": -95.5366439819336, "logps/ref_chosen": -103.23979187011719, "logps/ref_rejected": -105.26278686523438, "logps/rejected": -98.1610107421875, "loss": 1.3927, "margin_dpo/margin_mean": 0.6013697385787964, "margin_dpo/margin_std": 2.031219244003296, "step": 247 }, { "epoch": 0.3749055177626606, "fcm_dpo/beta": 0.7205560207366943, "fcm_dpo/delta": 0.1306939274072647, "fcm_dpo/margin": 1.0105092525482178, "fcm_dpo/q_t": 0.3771660327911377, "grad_norm": 125.46910095214844, "learning_rate": 3.9501422739279953e-07, "logits/chosen": 1.6252491474151611, "logits/rejected": 1.7246313095092773, "logps/chosen": -80.30120849609375, "logps/ref_chosen": -88.16007995605469, "logps/ref_rejected": -75.11514282226562, "logps/rejected": -68.26677703857422, "loss": 1.224, "margin_dpo/margin_mean": 1.0105094909667969, "margin_dpo/margin_std": 2.206491470336914, "step": 248 }, { "epoch": 0.3764172335600907, "fcm_dpo/beta": 0.7585580348968506, "fcm_dpo/delta": 0.26705750823020935, "fcm_dpo/margin": 0.7847131490707397, "fcm_dpo/q_t": 0.3981843888759613, "grad_norm": 278.2481994628906, "learning_rate": 3.9393515632731094e-07, "logits/chosen": 1.854946255683899, "logits/rejected": 1.8992762565612793, "logps/chosen": -83.19174194335938, "logps/ref_chosen": -91.01773071289062, "logps/ref_rejected": -80.51113891601562, "logps/rejected": -73.46986389160156, "loss": 1.427, "margin_dpo/margin_mean": 0.7847132682800293, "margin_dpo/margin_std": 2.277224063873291, "step": 249 }, { "epoch": 0.3779289493575208, "fcm_dpo/beta": 0.7253923416137695, "fcm_dpo/delta": -0.4017148017883301, "fcm_dpo/margin": 1.6611292362213135, "fcm_dpo/q_t": 0.2990487813949585, "grad_norm": 98.78463745117188, "learning_rate": 3.9285205908608934e-07, "logits/chosen": 2.0098440647125244, "logits/rejected": 1.9583611488342285, "logps/chosen": -72.56451416015625, "logps/ref_chosen": -80.5888671875, "logps/ref_rejected": -90.15093994140625, "logps/rejected": -83.7877197265625, "loss": 0.9505, "margin_dpo/margin_mean": 1.6611298322677612, "margin_dpo/margin_std": 2.270531177520752, "step": 250 }, { "epoch": 0.3794406651549509, "fcm_dpo/beta": 0.731696605682373, "fcm_dpo/delta": 0.27014490962028503, "fcm_dpo/margin": 0.8148388862609863, "fcm_dpo/q_t": 0.39993759989738464, "grad_norm": 125.67576599121094, "learning_rate": 3.9176496596569265e-07, "logits/chosen": 2.2030210494995117, "logits/rejected": 2.085310459136963, "logps/chosen": -74.93942260742188, "logps/ref_chosen": -82.70405578613281, "logps/ref_rejected": -98.94266510009766, "logps/rejected": -91.99287414550781, "loss": 1.3519, "margin_dpo/margin_mean": 0.8148387670516968, "margin_dpo/margin_std": 2.184969186782837, "step": 251 }, { "epoch": 0.38095238095238093, "fcm_dpo/beta": 0.7701340913772583, "fcm_dpo/delta": 0.16608819365501404, "fcm_dpo/margin": 0.8971800804138184, "fcm_dpo/q_t": 0.37846630811691284, "grad_norm": 123.63345336914062, "learning_rate": 3.9067390737445254e-07, "logits/chosen": 1.8227040767669678, "logits/rejected": 1.7111635208129883, "logps/chosen": -65.16363525390625, "logps/ref_chosen": -73.10369110107422, "logps/ref_rejected": -94.90235900878906, "logps/rejected": -87.85948181152344, "loss": 1.2088, "margin_dpo/margin_mean": 0.8971810936927795, "margin_dpo/margin_std": 1.9166100025177002, "step": 252 }, { "epoch": 0.382464096749811, "fcm_dpo/beta": 0.7829554080963135, "fcm_dpo/delta": 0.22496165335178375, "fcm_dpo/margin": 0.8155179619789124, "fcm_dpo/q_t": 0.3876587152481079, "grad_norm": 109.97724151611328, "learning_rate": 3.8957891383162304e-07, "logits/chosen": 2.1262078285217285, "logits/rejected": 2.010744094848633, "logps/chosen": -60.83722686767578, "logps/ref_chosen": -68.7789535522461, "logps/ref_rejected": -75.98162078857422, "logps/rejected": -68.85540771484375, "loss": 1.1644, "margin_dpo/margin_mean": 0.8155180215835571, "margin_dpo/margin_std": 1.587095022201538, "step": 253 }, { "epoch": 0.3839758125472411, "fcm_dpo/beta": 0.7989780902862549, "fcm_dpo/delta": -0.00629083439707756, "fcm_dpo/margin": 1.070601463317871, "fcm_dpo/q_t": 0.35906800627708435, "grad_norm": 130.26258850097656, "learning_rate": 3.884800159665276e-07, "logits/chosen": 1.5529742240905762, "logits/rejected": 1.442957878112793, "logps/chosen": -73.70938873291016, "logps/ref_chosen": -81.49362182617188, "logps/ref_rejected": -101.43672943115234, "logps/rejected": -94.72309875488281, "loss": 1.1745, "margin_dpo/margin_mean": 1.0706019401550293, "margin_dpo/margin_std": 2.0320396423339844, "step": 254 }, { "epoch": 0.3854875283446712, "fcm_dpo/beta": 0.7874916791915894, "fcm_dpo/delta": -0.00892484188079834, "fcm_dpo/margin": 1.082383394241333, "fcm_dpo/q_t": 0.351526141166687, "grad_norm": 126.74822235107422, "learning_rate": 3.873772445177015e-07, "logits/chosen": 1.9369912147521973, "logits/rejected": 1.8948047161102295, "logps/chosen": -82.60552215576172, "logps/ref_chosen": -90.46351623535156, "logps/ref_rejected": -105.32445526123047, "logps/rejected": -98.54884338378906, "loss": 1.0957, "margin_dpo/margin_mean": 1.0823832750320435, "margin_dpo/margin_std": 1.7699964046478271, "step": 255 }, { "epoch": 0.3869992441421013, "fcm_dpo/beta": 0.7961438894271851, "fcm_dpo/delta": -0.011890236288309097, "fcm_dpo/margin": 1.0808762311935425, "fcm_dpo/q_t": 0.3597353994846344, "grad_norm": 130.22622680664062, "learning_rate": 3.862706303320329e-07, "logits/chosen": 1.7444734573364258, "logits/rejected": 1.5635294914245605, "logps/chosen": -73.82586669921875, "logps/ref_chosen": -81.56578063964844, "logps/ref_rejected": -108.58460998535156, "logps/rejected": -101.92556762695312, "loss": 1.1265, "margin_dpo/margin_mean": 1.0808756351470947, "margin_dpo/margin_std": 1.953932523727417, "step": 256 }, { "epoch": 0.3885109599395314, "fcm_dpo/beta": 0.7655524015426636, "fcm_dpo/delta": -0.21847796440124512, "fcm_dpo/margin": 1.3607196807861328, "fcm_dpo/q_t": 0.34392106533050537, "grad_norm": 142.00601196289062, "learning_rate": 3.851602043638994e-07, "logits/chosen": 1.687360167503357, "logits/rejected": 1.6134676933288574, "logps/chosen": -81.99454498291016, "logps/ref_chosen": -89.57557678222656, "logps/ref_rejected": -123.74462127685547, "logps/rejected": -117.52430725097656, "loss": 1.1313, "margin_dpo/margin_mean": 1.360719919204712, "margin_dpo/margin_std": 2.331404209136963, "step": 257 }, { "epoch": 0.3900226757369615, "fcm_dpo/beta": 0.7688815593719482, "fcm_dpo/delta": -0.041162073612213135, "fcm_dpo/margin": 1.1515392065048218, "fcm_dpo/q_t": 0.3347882628440857, "grad_norm": 122.0123062133789, "learning_rate": 3.840459976743023e-07, "logits/chosen": 1.8406729698181152, "logits/rejected": 1.712424635887146, "logps/chosen": -69.8455581665039, "logps/ref_chosen": -77.34173583984375, "logps/ref_rejected": -99.5709228515625, "logps/rejected": -93.22627258300781, "loss": 0.9454, "margin_dpo/margin_mean": 1.1515395641326904, "margin_dpo/margin_std": 1.4946105480194092, "step": 258 }, { "epoch": 0.3915343915343915, "fcm_dpo/beta": 0.7144708037376404, "fcm_dpo/delta": -0.3142710328102112, "fcm_dpo/margin": 1.5702617168426514, "fcm_dpo/q_t": 0.30293339490890503, "grad_norm": 104.40306854248047, "learning_rate": 3.8292804142999796e-07, "logits/chosen": 1.6947746276855469, "logits/rejected": 1.5361230373382568, "logps/chosen": -74.45024108886719, "logps/ref_chosen": -82.39556121826172, "logps/ref_rejected": -113.73309326171875, "logps/rejected": -107.3580322265625, "loss": 0.9803, "margin_dpo/margin_mean": 1.5702614784240723, "margin_dpo/margin_std": 2.119558334350586, "step": 259 }, { "epoch": 0.3930461073318216, "fcm_dpo/beta": 0.70084547996521, "fcm_dpo/delta": -0.02580847591161728, "fcm_dpo/margin": 1.2419445514678955, "fcm_dpo/q_t": 0.3515279293060303, "grad_norm": 115.76726531982422, "learning_rate": 3.818063669026256e-07, "logits/chosen": 1.5052006244659424, "logits/rejected": 1.3275035619735718, "logps/chosen": -58.38023376464844, "logps/ref_chosen": -65.98947143554688, "logps/ref_rejected": -94.59706115722656, "logps/rejected": -88.22976684570312, "loss": 1.0882, "margin_dpo/margin_mean": 1.241944432258606, "margin_dpo/margin_std": 2.076496124267578, "step": 260 }, { "epoch": 0.3945578231292517, "fcm_dpo/beta": 0.7224990129470825, "fcm_dpo/delta": 0.16039912402629852, "fcm_dpo/margin": 0.9704261422157288, "fcm_dpo/q_t": 0.3779695928096771, "grad_norm": 124.87211608886719, "learning_rate": 3.806810054678331e-07, "logits/chosen": 1.6255950927734375, "logits/rejected": 1.6686420440673828, "logps/chosen": -81.06610870361328, "logps/ref_chosen": -88.87684631347656, "logps/ref_rejected": -82.34838104248047, "logps/rejected": -75.50807189941406, "loss": 1.1139, "margin_dpo/margin_mean": 0.9704260230064392, "margin_dpo/margin_std": 1.7810626029968262, "step": 261 }, { "epoch": 0.3960695389266818, "fcm_dpo/beta": 0.7318278551101685, "fcm_dpo/delta": -0.09685448557138443, "fcm_dpo/margin": 1.2777411937713623, "fcm_dpo/q_t": 0.33473628759384155, "grad_norm": 107.7884521484375, "learning_rate": 3.7955198860439887e-07, "logits/chosen": 2.0021393299102783, "logits/rejected": 1.834110975265503, "logps/chosen": -78.01080322265625, "logps/ref_chosen": -85.81719970703125, "logps/ref_rejected": -105.49027252197266, "logps/rejected": -98.96160888671875, "loss": 0.9994, "margin_dpo/margin_mean": 1.277741551399231, "margin_dpo/margin_std": 1.8485413789749146, "step": 262 }, { "epoch": 0.3975812547241119, "fcm_dpo/beta": 0.7293317317962646, "fcm_dpo/delta": 0.11338646709918976, "fcm_dpo/margin": 1.0216336250305176, "fcm_dpo/q_t": 0.37511640787124634, "grad_norm": 124.12923431396484, "learning_rate": 3.784193478933516e-07, "logits/chosen": 1.688468098640442, "logits/rejected": 1.4333298206329346, "logps/chosen": -65.73736572265625, "logps/ref_chosen": -73.61693572998047, "logps/ref_rejected": -102.39161682128906, "logps/rejected": -95.53368377685547, "loss": 1.1517, "margin_dpo/margin_mean": 1.0216336250305176, "margin_dpo/margin_std": 1.9469211101531982, "step": 263 }, { "epoch": 0.39909297052154197, "fcm_dpo/beta": 0.7261339426040649, "fcm_dpo/delta": -0.09379763156175613, "fcm_dpo/margin": 1.287791132926941, "fcm_dpo/q_t": 0.3396008610725403, "grad_norm": 115.59385681152344, "learning_rate": 3.7728311501708674e-07, "logits/chosen": 1.5454028844833374, "logits/rejected": 1.414917230606079, "logps/chosen": -93.84513854980469, "logps/ref_chosen": -101.57856750488281, "logps/ref_rejected": -111.65735626220703, "logps/rejected": -105.21172332763672, "loss": 0.9606, "margin_dpo/margin_mean": 1.2877914905548096, "margin_dpo/margin_std": 1.829077959060669, "step": 264 }, { "epoch": 0.40060468631897206, "fcm_dpo/beta": 0.693960428237915, "fcm_dpo/delta": -0.3431906998157501, "fcm_dpo/margin": 1.6635128259658813, "fcm_dpo/q_t": 0.31836625933647156, "grad_norm": 101.43350982666016, "learning_rate": 3.7614332175848027e-07, "logits/chosen": 1.7897309064865112, "logits/rejected": 1.6861083507537842, "logps/chosen": -57.975059509277344, "logps/ref_chosen": -65.76426696777344, "logps/ref_rejected": -85.19627380371094, "logps/rejected": -79.0705795288086, "loss": 0.9912, "margin_dpo/margin_mean": 1.6635124683380127, "margin_dpo/margin_std": 2.3382043838500977, "step": 265 }, { "epoch": 0.4021164021164021, "fcm_dpo/beta": 0.6654868125915527, "fcm_dpo/delta": -0.08284502476453781, "fcm_dpo/margin": 1.3903380632400513, "fcm_dpo/q_t": 0.3421247601509094, "grad_norm": 109.36337280273438, "learning_rate": 3.75e-07, "logits/chosen": 1.7660210132598877, "logits/rejected": 1.606216549873352, "logps/chosen": -67.14865112304688, "logps/ref_chosen": -75.05682373046875, "logps/ref_rejected": -97.52758026123047, "logps/rejected": -91.00975036621094, "loss": 1.0966, "margin_dpo/margin_mean": 1.3903379440307617, "margin_dpo/margin_std": 2.380831718444824, "step": 266 }, { "epoch": 0.4036281179138322, "fcm_dpo/beta": 0.6768280267715454, "fcm_dpo/delta": 0.007773265242576599, "fcm_dpo/margin": 1.2320008277893066, "fcm_dpo/q_t": 0.34474682807922363, "grad_norm": 93.51432800292969, "learning_rate": 3.738531817228131e-07, "logits/chosen": 1.7341187000274658, "logits/rejected": 1.6710472106933594, "logps/chosen": -63.215660095214844, "logps/ref_chosen": -71.13494110107422, "logps/ref_rejected": -81.14566040039062, "logps/rejected": -74.45838928222656, "loss": 1.0138, "margin_dpo/margin_mean": 1.2320003509521484, "margin_dpo/margin_std": 1.68747878074646, "step": 267 }, { "epoch": 0.4051398337112623, "fcm_dpo/beta": 0.6612510085105896, "fcm_dpo/delta": -0.019557401537895203, "fcm_dpo/margin": 0.8605862855911255, "fcm_dpo/q_t": 0.40874695777893066, "grad_norm": 110.64611053466797, "learning_rate": 3.7270289900589204e-07, "logits/chosen": 1.6089922189712524, "logits/rejected": 1.5754320621490479, "logps/chosen": -72.44566345214844, "logps/ref_chosen": -80.06082153320312, "logps/ref_rejected": -87.43035888671875, "logps/rejected": -80.67579650878906, "loss": 1.2458, "margin_dpo/margin_mean": 0.860586404800415, "margin_dpo/margin_std": 2.068237066268921, "step": 268 }, { "epoch": 0.40665154950869237, "fcm_dpo/beta": 0.6837191581726074, "fcm_dpo/delta": 0.07626542448997498, "fcm_dpo/margin": 1.1217018365859985, "fcm_dpo/q_t": 0.3589403033256531, "grad_norm": 116.86734008789062, "learning_rate": 3.7154918402511714e-07, "logits/chosen": 2.1879143714904785, "logits/rejected": 2.1236653327941895, "logps/chosen": -75.88362121582031, "logps/ref_chosen": -83.36944580078125, "logps/ref_rejected": -100.66839599609375, "logps/rejected": -94.30427551269531, "loss": 1.0928, "margin_dpo/margin_mean": 1.121701955795288, "margin_dpo/margin_std": 1.7563908100128174, "step": 269 }, { "epoch": 0.40816326530612246, "fcm_dpo/beta": 0.6945409774780273, "fcm_dpo/delta": 0.22258900105953217, "fcm_dpo/margin": 0.9228448867797852, "fcm_dpo/q_t": 0.38596194982528687, "grad_norm": 128.61280822753906, "learning_rate": 3.7039206905237656e-07, "logits/chosen": 1.8595054149627686, "logits/rejected": 1.6868481636047363, "logps/chosen": -77.72801208496094, "logps/ref_chosen": -85.35945129394531, "logps/ref_rejected": -104.47489929199219, "logps/rejected": -97.76631164550781, "loss": 1.1717, "margin_dpo/margin_mean": 0.9228445291519165, "margin_dpo/margin_std": 1.8657047748565674, "step": 270 }, { "epoch": 0.40967498110355255, "fcm_dpo/beta": 0.7304049134254456, "fcm_dpo/delta": 0.2556050419807434, "fcm_dpo/margin": 0.833530068397522, "fcm_dpo/q_t": 0.41634997725486755, "grad_norm": 134.74444580078125, "learning_rate": 3.692315864546635e-07, "logits/chosen": 1.6439170837402344, "logits/rejected": 1.5446662902832031, "logps/chosen": -78.65917205810547, "logps/ref_chosen": -86.01373291015625, "logps/ref_rejected": -109.99561309814453, "logps/rejected": -103.47459411621094, "loss": 1.3205, "margin_dpo/margin_mean": 0.8335303068161011, "margin_dpo/margin_std": 2.265486717224121, "step": 271 }, { "epoch": 0.41118669690098264, "fcm_dpo/beta": 0.7071244716644287, "fcm_dpo/delta": -0.19628196954727173, "fcm_dpo/margin": 1.4435720443725586, "fcm_dpo/q_t": 0.311567485332489, "grad_norm": 101.71968841552734, "learning_rate": 3.6806776869317067e-07, "logits/chosen": 1.8980367183685303, "logits/rejected": 1.907454252243042, "logps/chosen": -78.45289611816406, "logps/ref_chosen": -86.37013244628906, "logps/ref_rejected": -85.74638366699219, "logps/rejected": -79.27271270751953, "loss": 0.9784, "margin_dpo/margin_mean": 1.4435718059539795, "margin_dpo/margin_std": 2.2550201416015625, "step": 272 }, { "epoch": 0.4126984126984127, "fcm_dpo/beta": 0.6779999136924744, "fcm_dpo/delta": -0.2062806934118271, "fcm_dpo/margin": 1.5170402526855469, "fcm_dpo/q_t": 0.31864964962005615, "grad_norm": 97.80946350097656, "learning_rate": 3.669006483223828e-07, "logits/chosen": 2.017239809036255, "logits/rejected": 1.89370858669281, "logps/chosen": -67.67605590820312, "logps/ref_chosen": -75.51087951660156, "logps/ref_rejected": -101.60345458984375, "logps/rejected": -95.28567504882812, "loss": 0.9779, "margin_dpo/margin_mean": 1.5170400142669678, "margin_dpo/margin_std": 2.1456356048583984, "step": 273 }, { "epoch": 0.41421012849584277, "fcm_dpo/beta": 0.6887916326522827, "fcm_dpo/delta": 0.07680375128984451, "fcm_dpo/margin": 1.1310944557189941, "fcm_dpo/q_t": 0.3631494641304016, "grad_norm": 110.92794036865234, "learning_rate": 3.657302579891656e-07, "logits/chosen": 1.6533360481262207, "logits/rejected": 1.6071021556854248, "logps/chosen": -71.356201171875, "logps/ref_chosen": -79.040283203125, "logps/ref_rejected": -86.31329345703125, "logps/rejected": -79.76029968261719, "loss": 1.0728, "margin_dpo/margin_mean": 1.1310945749282837, "margin_dpo/margin_std": 1.8437747955322266, "step": 274 }, { "epoch": 0.41572184429327286, "fcm_dpo/beta": 0.6772985458374023, "fcm_dpo/delta": -0.1687670648097992, "fcm_dpo/margin": 1.4796142578125, "fcm_dpo/q_t": 0.32771944999694824, "grad_norm": 96.30763244628906, "learning_rate": 3.645566304318526e-07, "logits/chosen": 1.5781314373016357, "logits/rejected": 1.3936455249786377, "logps/chosen": -63.84782409667969, "logps/ref_chosen": -71.82034301757812, "logps/ref_rejected": -94.29946899414062, "logps/rejected": -87.80656433105469, "loss": 0.9312, "margin_dpo/margin_mean": 1.4796141386032104, "margin_dpo/margin_std": 1.9710824489593506, "step": 275 }, { "epoch": 0.41723356009070295, "fcm_dpo/beta": 0.6629969477653503, "fcm_dpo/delta": 0.0017523542046546936, "fcm_dpo/margin": 1.2779521942138672, "fcm_dpo/q_t": 0.35035306215286255, "grad_norm": 104.47041320800781, "learning_rate": 3.633797984793294e-07, "logits/chosen": 1.33461594581604, "logits/rejected": 1.284557580947876, "logps/chosen": -61.51626205444336, "logps/ref_chosen": -69.54020690917969, "logps/ref_rejected": -78.59674072265625, "logps/rejected": -71.85074615478516, "loss": 1.0768, "margin_dpo/margin_mean": 1.277951717376709, "margin_dpo/margin_std": 2.062228202819824, "step": 276 }, { "epoch": 0.41874527588813304, "fcm_dpo/beta": 0.7123348116874695, "fcm_dpo/delta": 0.4012848138809204, "fcm_dpo/margin": 0.6534094214439392, "fcm_dpo/q_t": 0.4210251569747925, "grad_norm": 130.19923400878906, "learning_rate": 3.6219979505011555e-07, "logits/chosen": 1.9294466972351074, "logits/rejected": 1.9969127178192139, "logps/chosen": -87.2117919921875, "logps/ref_chosen": -94.4896240234375, "logps/ref_rejected": -85.45901489257812, "logps/rejected": -78.8345947265625, "loss": 1.4113, "margin_dpo/margin_mean": 0.6534090638160706, "margin_dpo/margin_std": 2.0752005577087402, "step": 277 }, { "epoch": 0.42025699168556313, "fcm_dpo/beta": 0.7564055323600769, "fcm_dpo/delta": 0.16849283874034882, "fcm_dpo/margin": 0.9032930135726929, "fcm_dpo/q_t": 0.3842105269432068, "grad_norm": 154.10960388183594, "learning_rate": 3.6101665315144353e-07, "logits/chosen": 1.4928083419799805, "logits/rejected": 1.3751368522644043, "logps/chosen": -79.97828674316406, "logps/ref_chosen": -87.42613220214844, "logps/ref_rejected": -105.44854736328125, "logps/rejected": -98.90399932861328, "loss": 1.2411, "margin_dpo/margin_mean": 0.9032933712005615, "margin_dpo/margin_std": 1.8733234405517578, "step": 278 }, { "epoch": 0.4217687074829932, "fcm_dpo/beta": 0.7383002042770386, "fcm_dpo/delta": -0.1532890498638153, "fcm_dpo/margin": 1.3381755352020264, "fcm_dpo/q_t": 0.31081023812294006, "grad_norm": 95.42361450195312, "learning_rate": 3.5983040587833563e-07, "logits/chosen": 1.6094672679901123, "logits/rejected": 1.5519022941589355, "logps/chosen": -62.492191314697266, "logps/ref_chosen": -70.516845703125, "logps/ref_rejected": -86.04249572753906, "logps/rejected": -79.35601806640625, "loss": 0.922, "margin_dpo/margin_mean": 1.3381754159927368, "margin_dpo/margin_std": 1.6217188835144043, "step": 279 }, { "epoch": 0.42328042328042326, "fcm_dpo/beta": 0.6995072364807129, "fcm_dpo/delta": -0.2576003074645996, "fcm_dpo/margin": 1.5437543392181396, "fcm_dpo/q_t": 0.2961677312850952, "grad_norm": 82.45001983642578, "learning_rate": 3.586410864126781e-07, "logits/chosen": 1.6309643983840942, "logits/rejected": 1.534651517868042, "logps/chosen": -68.54518127441406, "logps/ref_chosen": -76.5021743774414, "logps/ref_rejected": -94.2752685546875, "logps/rejected": -87.86203002929688, "loss": 0.792, "margin_dpo/margin_mean": 1.5437543392181396, "margin_dpo/margin_std": 1.59554123878479, "step": 280 }, { "epoch": 0.42479213907785335, "fcm_dpo/beta": 0.6891137361526489, "fcm_dpo/delta": -0.01607239432632923, "fcm_dpo/margin": 1.2547452449798584, "fcm_dpo/q_t": 0.34183090925216675, "grad_norm": 86.30965423583984, "learning_rate": 3.574487280222929e-07, "logits/chosen": 1.7331175804138184, "logits/rejected": 1.7544758319854736, "logps/chosen": -69.59275817871094, "logps/ref_chosen": -77.50468444824219, "logps/ref_rejected": -79.05717468261719, "logps/rejected": -72.39997863769531, "loss": 0.9887, "margin_dpo/margin_mean": 1.2547452449798584, "margin_dpo/margin_std": 1.7732079029083252, "step": 281 }, { "epoch": 0.42630385487528344, "fcm_dpo/beta": 0.6955768465995789, "fcm_dpo/delta": -0.03829964995384216, "fcm_dpo/margin": 1.2650123834609985, "fcm_dpo/q_t": 0.35389894247055054, "grad_norm": 114.90829467773438, "learning_rate": 3.562533640600075e-07, "logits/chosen": 1.4182642698287964, "logits/rejected": 1.3314048051834106, "logps/chosen": -72.59228515625, "logps/ref_chosen": -80.31298065185547, "logps/ref_rejected": -83.72120666503906, "logps/rejected": -77.26553344726562, "loss": 1.0497, "margin_dpo/margin_mean": 1.2650126218795776, "margin_dpo/margin_std": 2.0284345149993896, "step": 282 }, { "epoch": 0.42781557067271353, "fcm_dpo/beta": 0.6725457906723022, "fcm_dpo/delta": -0.064914271235466, "fcm_dpo/margin": 1.3505455255508423, "fcm_dpo/q_t": 0.35023432970046997, "grad_norm": 110.76354217529297, "learning_rate": 3.550550279627215e-07, "logits/chosen": 1.724194884300232, "logits/rejected": 1.4388937950134277, "logps/chosen": -73.36625671386719, "logps/ref_chosen": -80.72602844238281, "logps/ref_rejected": -115.68379211425781, "logps/rejected": -109.67456817626953, "loss": 1.0482, "margin_dpo/margin_mean": 1.3505456447601318, "margin_dpo/margin_std": 2.190821647644043, "step": 283 }, { "epoch": 0.4293272864701436, "fcm_dpo/beta": 0.6819826364517212, "fcm_dpo/delta": 0.028838299214839935, "fcm_dpo/margin": 1.2053046226501465, "fcm_dpo/q_t": 0.3472345471382141, "grad_norm": 118.19925689697266, "learning_rate": 3.5385375325047163e-07, "logits/chosen": 1.6477620601654053, "logits/rejected": 1.527233600616455, "logps/chosen": -69.83183288574219, "logps/ref_chosen": -77.5223388671875, "logps/ref_rejected": -104.1847152709961, "logps/rejected": -97.69950866699219, "loss": 1.0878, "margin_dpo/margin_mean": 1.2053046226501465, "margin_dpo/margin_std": 1.9999240636825562, "step": 284 }, { "epoch": 0.4308390022675737, "fcm_dpo/beta": 0.7069652080535889, "fcm_dpo/delta": 0.25054311752319336, "fcm_dpo/margin": 0.866946816444397, "fcm_dpo/q_t": 0.3933573365211487, "grad_norm": 136.90176391601562, "learning_rate": 3.5264957352549375e-07, "logits/chosen": 1.8853328227996826, "logits/rejected": 1.8092838525772095, "logps/chosen": -78.52452087402344, "logps/ref_chosen": -85.79348754882812, "logps/ref_rejected": -96.46463775634766, "logps/rejected": -90.06261444091797, "loss": 1.2171, "margin_dpo/margin_mean": 0.8669461011886597, "margin_dpo/margin_std": 1.8605599403381348, "step": 285 }, { "epoch": 0.4323507180650038, "fcm_dpo/beta": 0.6897353529930115, "fcm_dpo/delta": -0.19799461960792542, "fcm_dpo/margin": 1.48897123336792, "fcm_dpo/q_t": 0.3047965168952942, "grad_norm": 103.36872863769531, "learning_rate": 3.514425224712835e-07, "logits/chosen": 1.4609103202819824, "logits/rejected": 1.2690503597259521, "logps/chosen": -70.50439453125, "logps/ref_chosen": -77.86268615722656, "logps/ref_rejected": -110.77134704589844, "logps/rejected": -104.90202331542969, "loss": 0.9198, "margin_dpo/margin_mean": 1.4889705181121826, "margin_dpo/margin_std": 1.9027836322784424, "step": 286 }, { "epoch": 0.43386243386243384, "fcm_dpo/beta": 0.6805661916732788, "fcm_dpo/delta": -0.16245746612548828, "fcm_dpo/margin": 1.4589457511901855, "fcm_dpo/q_t": 0.32934582233428955, "grad_norm": 129.42222595214844, "learning_rate": 3.502326338516534e-07, "logits/chosen": 1.5750706195831299, "logits/rejected": 1.5692813396453857, "logps/chosen": -54.83763885498047, "logps/ref_chosen": -62.552825927734375, "logps/ref_rejected": -77.7650146484375, "logps/rejected": -71.5087661743164, "loss": 1.0502, "margin_dpo/margin_mean": 1.4589459896087646, "margin_dpo/margin_std": 2.2289929389953613, "step": 287 }, { "epoch": 0.43537414965986393, "fcm_dpo/beta": 0.6700143218040466, "fcm_dpo/delta": 0.08774229884147644, "fcm_dpo/margin": 1.147871732711792, "fcm_dpo/q_t": 0.367706835269928, "grad_norm": 119.22252655029297, "learning_rate": 3.490199415097892e-07, "logits/chosen": 1.2430505752563477, "logits/rejected": 1.1871659755706787, "logps/chosen": -76.31163024902344, "logps/ref_chosen": -83.74117279052734, "logps/ref_rejected": -106.93913269042969, "logps/rejected": -100.65745544433594, "loss": 1.1886, "margin_dpo/margin_mean": 1.1478712558746338, "margin_dpo/margin_std": 2.267000675201416, "step": 288 }, { "epoch": 0.436885865457294, "fcm_dpo/beta": 0.6890686750411987, "fcm_dpo/delta": 0.14361050724983215, "fcm_dpo/margin": 1.0400171279907227, "fcm_dpo/q_t": 0.37594807147979736, "grad_norm": 107.06436157226562, "learning_rate": 3.4780447936730247e-07, "logits/chosen": 2.0020227432250977, "logits/rejected": 1.9443631172180176, "logps/chosen": -65.77583312988281, "logps/ref_chosen": -73.04204559326172, "logps/ref_rejected": -88.07904052734375, "logps/rejected": -81.85284423828125, "loss": 1.0901, "margin_dpo/margin_mean": 1.0400168895721436, "margin_dpo/margin_std": 1.791898250579834, "step": 289 }, { "epoch": 0.4383975812547241, "fcm_dpo/beta": 0.6793229579925537, "fcm_dpo/delta": -0.10350015759468079, "fcm_dpo/margin": 1.387347936630249, "fcm_dpo/q_t": 0.3246391713619232, "grad_norm": 89.62129211425781, "learning_rate": 3.465862814232821e-07, "logits/chosen": 1.905479073524475, "logits/rejected": 1.8604974746704102, "logps/chosen": -71.6802978515625, "logps/ref_chosen": -78.60614013671875, "logps/ref_rejected": -108.50082397460938, "logps/rejected": -102.96233367919922, "loss": 0.9096, "margin_dpo/margin_mean": 1.3873467445373535, "margin_dpo/margin_std": 1.714540958404541, "step": 290 }, { "epoch": 0.4399092970521542, "fcm_dpo/beta": 0.6729252338409424, "fcm_dpo/delta": -0.13280794024467468, "fcm_dpo/margin": 1.4405841827392578, "fcm_dpo/q_t": 0.33513006567955017, "grad_norm": 92.02787017822266, "learning_rate": 3.4536538175334343e-07, "logits/chosen": 1.7266473770141602, "logits/rejected": 1.5194730758666992, "logps/chosen": -59.418121337890625, "logps/ref_chosen": -66.71226501464844, "logps/ref_rejected": -96.14029693603516, "logps/rejected": -90.28672790527344, "loss": 1.0444, "margin_dpo/margin_mean": 1.4405841827392578, "margin_dpo/margin_std": 2.1988980770111084, "step": 291 }, { "epoch": 0.4414210128495843, "fcm_dpo/beta": 0.6711477041244507, "fcm_dpo/delta": 0.08469577133655548, "fcm_dpo/margin": 1.1498453617095947, "fcm_dpo/q_t": 0.37209683656692505, "grad_norm": 128.0401611328125, "learning_rate": 3.4414181450867465e-07, "logits/chosen": 1.6419053077697754, "logits/rejected": 1.5817103385925293, "logps/chosen": -72.85503387451172, "logps/ref_chosen": -80.3355484008789, "logps/ref_rejected": -90.44906616210938, "logps/rejected": -84.1183853149414, "loss": 1.1254, "margin_dpo/margin_mean": 1.1498451232910156, "margin_dpo/margin_std": 2.085261583328247, "step": 292 }, { "epoch": 0.4429327286470144, "fcm_dpo/beta": 0.6578654050827026, "fcm_dpo/delta": -0.17292258143424988, "fcm_dpo/margin": 1.5288608074188232, "fcm_dpo/q_t": 0.3235200047492981, "grad_norm": 98.03875732421875, "learning_rate": 3.4291561391508185e-07, "logits/chosen": 1.8567837476730347, "logits/rejected": 1.749990701675415, "logps/chosen": -64.44340515136719, "logps/ref_chosen": -71.69970703125, "logps/ref_rejected": -102.13948059082031, "logps/rejected": -96.41204833984375, "loss": 1.0279, "margin_dpo/margin_mean": 1.5288608074188232, "margin_dpo/margin_std": 2.306509017944336, "step": 293 }, { "epoch": 0.4444444444444444, "fcm_dpo/beta": 0.6519556045532227, "fcm_dpo/delta": 0.06831908226013184, "fcm_dpo/margin": 1.2061865329742432, "fcm_dpo/q_t": 0.3630805015563965, "grad_norm": 106.47350311279297, "learning_rate": 3.4168681427203153e-07, "logits/chosen": 1.8985698223114014, "logits/rejected": 1.8086557388305664, "logps/chosen": -63.361061096191406, "logps/ref_chosen": -70.73458862304688, "logps/ref_rejected": -86.68821716308594, "logps/rejected": -80.5208740234375, "loss": 1.0708, "margin_dpo/margin_mean": 1.206186056137085, "margin_dpo/margin_std": 2.0081372261047363, "step": 294 }, { "epoch": 0.4459561602418745, "fcm_dpo/beta": 0.6680145263671875, "fcm_dpo/delta": 0.033205777406692505, "fcm_dpo/margin": 1.2245148420333862, "fcm_dpo/q_t": 0.36217230558395386, "grad_norm": 95.26288604736328, "learning_rate": 3.4045544995169125e-07, "logits/chosen": 1.6314678192138672, "logits/rejected": 1.4151748418807983, "logps/chosen": -59.078224182128906, "logps/ref_chosen": -66.42644500732422, "logps/ref_rejected": -99.58766174316406, "logps/rejected": -93.46395874023438, "loss": 1.073, "margin_dpo/margin_mean": 1.2245147228240967, "margin_dpo/margin_std": 2.132988929748535, "step": 295 }, { "epoch": 0.4474678760393046, "fcm_dpo/beta": 0.6513910293579102, "fcm_dpo/delta": -0.0948001965880394, "fcm_dpo/margin": 1.4355227947235107, "fcm_dpo/q_t": 0.34222400188446045, "grad_norm": 112.76392364501953, "learning_rate": 3.392215553979679e-07, "logits/chosen": 1.5478891134262085, "logits/rejected": 1.4268944263458252, "logps/chosen": -80.12300872802734, "logps/ref_chosen": -87.47459411621094, "logps/ref_rejected": -103.96894836425781, "logps/rejected": -98.05288696289062, "loss": 1.0099, "margin_dpo/margin_mean": 1.4355229139328003, "margin_dpo/margin_std": 2.1124844551086426, "step": 296 }, { "epoch": 0.4489795918367347, "fcm_dpo/beta": 0.6308517456054688, "fcm_dpo/delta": -0.2799900472164154, "fcm_dpo/margin": 1.7435264587402344, "fcm_dpo/q_t": 0.2898668050765991, "grad_norm": 77.33614349365234, "learning_rate": 3.3798516512554485e-07, "logits/chosen": 1.6062670946121216, "logits/rejected": 1.498281717300415, "logps/chosen": -66.13429260253906, "logps/ref_chosen": -73.46731567382812, "logps/ref_rejected": -88.22674560546875, "logps/rejected": -82.63723754882812, "loss": 0.7987, "margin_dpo/margin_mean": 1.7435266971588135, "margin_dpo/margin_std": 1.7232532501220703, "step": 297 }, { "epoch": 0.4504913076341648, "fcm_dpo/beta": 0.6258028745651245, "fcm_dpo/delta": 0.04764336347579956, "fcm_dpo/margin": 1.2851078510284424, "fcm_dpo/q_t": 0.35264813899993896, "grad_norm": 84.5466079711914, "learning_rate": 3.367463137189156e-07, "logits/chosen": 2.073479175567627, "logits/rejected": 1.9958412647247314, "logps/chosen": -65.99359130859375, "logps/ref_chosen": -73.21676635742188, "logps/ref_rejected": -84.9563217163086, "logps/rejected": -79.01826477050781, "loss": 1.0548, "margin_dpo/margin_mean": 1.2851074934005737, "margin_dpo/margin_std": 2.02394437789917, "step": 298 }, { "epoch": 0.4520030234315949, "fcm_dpo/beta": 0.6498622894287109, "fcm_dpo/delta": 0.2780328392982483, "fcm_dpo/margin": 0.9038135409355164, "fcm_dpo/q_t": 0.39124494791030884, "grad_norm": 99.46502685546875, "learning_rate": 3.355050358314172e-07, "logits/chosen": 1.5133554935455322, "logits/rejected": 1.4368096590042114, "logps/chosen": -69.8514633178711, "logps/ref_chosen": -76.9534912109375, "logps/ref_rejected": -87.53433227539062, "logps/rejected": -81.33611297607422, "loss": 1.1256, "margin_dpo/margin_mean": 0.9038130044937134, "margin_dpo/margin_std": 1.717150330543518, "step": 299 }, { "epoch": 0.45351473922902497, "fcm_dpo/beta": 0.6565319299697876, "fcm_dpo/delta": 0.0751301571726799, "fcm_dpo/margin": 1.1859982013702393, "fcm_dpo/q_t": 0.3662998080253601, "grad_norm": 105.666015625, "learning_rate": 3.3426136618426043e-07, "logits/chosen": 1.7022504806518555, "logits/rejected": 1.5639350414276123, "logps/chosen": -71.36064147949219, "logps/ref_chosen": -78.36398315429688, "logps/ref_rejected": -97.03912353515625, "logps/rejected": -91.22178649902344, "loss": 1.1394, "margin_dpo/margin_mean": 1.1859978437423706, "margin_dpo/margin_std": 2.2330620288848877, "step": 300 }, { "epoch": 0.45351473922902497, "eval_fcm_dpo/beta": 0.6672008633613586, "eval_logits/chosen": 1.875981330871582, "eval_logits/rejected": 1.7587411403656006, "eval_logps/chosen": -79.80497741699219, "eval_logps/ref_chosen": -86.90177917480469, "eval_logps/ref_rejected": -96.69639587402344, "eval_logps/rejected": -90.79612731933594, "eval_loss": 0.5554325580596924, "eval_margin_dpo/margin_mean": 1.1965415477752686, "eval_margin_dpo/margin_std": 2.101234197616577, "eval_runtime": 42.2923, "eval_samples_per_second": 54.454, "eval_steps_per_second": 1.702, "step": 300 }, { "epoch": 0.455026455026455, "fcm_dpo/beta": 0.6764302253723145, "fcm_dpo/delta": 0.14282676577568054, "fcm_dpo/margin": 1.0604530572891235, "fcm_dpo/q_t": 0.3812572956085205, "grad_norm": 111.0069351196289, "learning_rate": 3.3301533956555885e-07, "logits/chosen": 1.8055815696716309, "logits/rejected": 1.753129482269287, "logps/chosen": -63.5869255065918, "logps/ref_chosen": -70.6719741821289, "logps/ref_rejected": -87.11650085449219, "logps/rejected": -81.09191131591797, "loss": 1.1743, "margin_dpo/margin_mean": 1.060452938079834, "margin_dpo/margin_std": 2.0663914680480957, "step": 301 }, { "epoch": 0.4565381708238851, "fcm_dpo/beta": 0.7274559140205383, "fcm_dpo/delta": 0.33254778385162354, "fcm_dpo/margin": 0.7300405502319336, "fcm_dpo/q_t": 0.40241554379463196, "grad_norm": 123.19378662109375, "learning_rate": 3.317669908293554e-07, "logits/chosen": 1.402217149734497, "logits/rejected": 1.3271420001983643, "logps/chosen": -78.32188415527344, "logps/ref_chosen": -85.29096221923828, "logps/ref_rejected": -106.22589874267578, "logps/rejected": -99.98685455322266, "loss": 1.2018, "margin_dpo/margin_mean": 0.7300394773483276, "margin_dpo/margin_std": 1.5843493938446045, "step": 302 }, { "epoch": 0.4580498866213152, "fcm_dpo/beta": 0.6903226375579834, "fcm_dpo/delta": -0.33917659521102905, "fcm_dpo/margin": 1.6581306457519531, "fcm_dpo/q_t": 0.30730390548706055, "grad_norm": 94.3570556640625, "learning_rate": 3.3051635489464793e-07, "logits/chosen": 1.7029955387115479, "logits/rejected": 1.6008670330047607, "logps/chosen": -76.84014129638672, "logps/ref_chosen": -83.90059661865234, "logps/ref_rejected": -104.7340087890625, "logps/rejected": -99.33168029785156, "loss": 0.8927, "margin_dpo/margin_mean": 1.6581302881240845, "margin_dpo/margin_std": 2.1296894550323486, "step": 303 }, { "epoch": 0.4595616024187453, "fcm_dpo/beta": 0.6939245462417603, "fcm_dpo/delta": 0.02791178971529007, "fcm_dpo/margin": 1.1859949827194214, "fcm_dpo/q_t": 0.3410758972167969, "grad_norm": 110.60444641113281, "learning_rate": 3.292634667444117e-07, "logits/chosen": 1.6923638582229614, "logits/rejected": 1.5971921682357788, "logps/chosen": -70.20957946777344, "logps/ref_chosen": -77.39997100830078, "logps/ref_rejected": -94.21647644042969, "logps/rejected": -88.21208190917969, "loss": 0.9704, "margin_dpo/margin_mean": 1.1859947443008423, "margin_dpo/margin_std": 1.6047704219818115, "step": 304 }, { "epoch": 0.46107331821617537, "fcm_dpo/beta": 0.6839466094970703, "fcm_dpo/delta": 0.019364356994628906, "fcm_dpo/margin": 1.2137377262115479, "fcm_dpo/q_t": 0.3603229522705078, "grad_norm": 109.2071762084961, "learning_rate": 3.280083614246217e-07, "logits/chosen": 1.5578687191009521, "logits/rejected": 1.6177153587341309, "logps/chosen": -83.97364807128906, "logps/ref_chosen": -90.90805053710938, "logps/ref_rejected": -85.84992980957031, "logps/rejected": -80.1292724609375, "loss": 1.1384, "margin_dpo/margin_mean": 1.213738203048706, "margin_dpo/margin_std": 2.1745810508728027, "step": 305 }, { "epoch": 0.46258503401360546, "fcm_dpo/beta": 0.6984622478485107, "fcm_dpo/delta": 0.1099298745393753, "fcm_dpo/margin": 1.0709459781646729, "fcm_dpo/q_t": 0.37685760855674744, "grad_norm": 103.61567687988281, "learning_rate": 3.267510740432719e-07, "logits/chosen": 1.6553785800933838, "logits/rejected": 1.441781997680664, "logps/chosen": -64.66431427001953, "logps/ref_chosen": -71.7261962890625, "logps/ref_rejected": -97.70491027832031, "logps/rejected": -91.71397399902344, "loss": 1.103, "margin_dpo/margin_mean": 1.0709459781646729, "margin_dpo/margin_std": 2.0044429302215576, "step": 306 }, { "epoch": 0.46409674981103555, "fcm_dpo/beta": 0.725088357925415, "fcm_dpo/delta": 0.142677903175354, "fcm_dpo/margin": 0.9886835813522339, "fcm_dpo/q_t": 0.383963018655777, "grad_norm": 117.87000274658203, "learning_rate": 3.2549163976939285e-07, "logits/chosen": 1.6312376260757446, "logits/rejected": 1.5077619552612305, "logps/chosen": -67.0914306640625, "logps/ref_chosen": -74.38668823242188, "logps/ref_rejected": -84.16001892089844, "logps/rejected": -77.85345458984375, "loss": 1.2788, "margin_dpo/margin_mean": 0.9886834621429443, "margin_dpo/margin_std": 2.2475128173828125, "step": 307 }, { "epoch": 0.4656084656084656, "fcm_dpo/beta": 0.7155085802078247, "fcm_dpo/delta": -0.11654899269342422, "fcm_dpo/margin": 1.3352723121643066, "fcm_dpo/q_t": 0.3403710424900055, "grad_norm": 109.76605987548828, "learning_rate": 3.2423009383206874e-07, "logits/chosen": 1.6301313638687134, "logits/rejected": 1.6842124462127686, "logps/chosen": -80.45036315917969, "logps/ref_chosen": -87.50894165039062, "logps/ref_rejected": -94.80848693847656, "logps/rejected": -89.08517456054688, "loss": 1.0804, "margin_dpo/margin_mean": 1.3352723121643066, "margin_dpo/margin_std": 2.199124813079834, "step": 308 }, { "epoch": 0.4671201814058957, "fcm_dpo/beta": 0.6909149885177612, "fcm_dpo/delta": -0.06711931526660919, "fcm_dpo/margin": 1.3067526817321777, "fcm_dpo/q_t": 0.3539488911628723, "grad_norm": 115.49034881591797, "learning_rate": 3.229664715194511e-07, "logits/chosen": 2.0674304962158203, "logits/rejected": 1.9489227533340454, "logps/chosen": -75.26066589355469, "logps/ref_chosen": -82.15191650390625, "logps/ref_rejected": -95.03496551513672, "logps/rejected": -89.45046997070312, "loss": 1.0461, "margin_dpo/margin_mean": 1.3067526817321777, "margin_dpo/margin_std": 2.162260055541992, "step": 309 }, { "epoch": 0.46863189720332576, "fcm_dpo/beta": 0.7321085929870605, "fcm_dpo/delta": 0.2117757946252823, "fcm_dpo/margin": 0.48994821310043335, "fcm_dpo/q_t": 0.4401339292526245, "grad_norm": 151.37319946289062, "learning_rate": 3.2170080817777257e-07, "logits/chosen": 1.7694876194000244, "logits/rejected": 1.7599756717681885, "logps/chosen": -87.14959716796875, "logps/ref_chosen": -93.7555160522461, "logps/ref_rejected": -96.93236541748047, "logps/rejected": -90.81639099121094, "loss": 1.5129, "margin_dpo/margin_mean": 0.4899486005306244, "margin_dpo/margin_std": 2.0309510231018066, "step": 310 }, { "epoch": 0.47014361300075586, "fcm_dpo/beta": 0.7175389528274536, "fcm_dpo/delta": -0.07844534516334534, "fcm_dpo/margin": 1.2810978889465332, "fcm_dpo/q_t": 0.35001736879348755, "grad_norm": 114.28585052490234, "learning_rate": 3.204331392103574e-07, "logits/chosen": 1.8003596067428589, "logits/rejected": 1.5091215372085571, "logps/chosen": -68.82054138183594, "logps/ref_chosen": -76.20762634277344, "logps/ref_rejected": -110.48141479492188, "logps/rejected": -104.37541961669922, "loss": 1.0974, "margin_dpo/margin_mean": 1.281097650527954, "margin_dpo/margin_std": 2.2102816104888916, "step": 311 }, { "epoch": 0.47165532879818595, "fcm_dpo/beta": 0.6969722509384155, "fcm_dpo/delta": -0.2547772526741028, "fcm_dpo/margin": 1.5465623140335083, "fcm_dpo/q_t": 0.32185274362564087, "grad_norm": 94.9644546508789, "learning_rate": 3.1916350007663176e-07, "logits/chosen": 1.5592865943908691, "logits/rejected": 1.4448232650756836, "logps/chosen": -62.12285614013672, "logps/ref_chosen": -69.08878326416016, "logps/ref_rejected": -91.84494018554688, "logps/rejected": -86.42558288574219, "loss": 0.9426, "margin_dpo/margin_mean": 1.546562910079956, "margin_dpo/margin_std": 2.1454343795776367, "step": 312 }, { "epoch": 0.47316704459561604, "fcm_dpo/beta": 0.7049161195755005, "fcm_dpo/delta": 0.18395394086837769, "fcm_dpo/margin": 0.9621329307556152, "fcm_dpo/q_t": 0.3967057466506958, "grad_norm": 119.09286499023438, "learning_rate": 3.178919262911314e-07, "logits/chosen": 1.6497703790664673, "logits/rejected": 1.6208115816116333, "logps/chosen": -70.82321166992188, "logps/ref_chosen": -78.20826721191406, "logps/ref_rejected": -86.90351867675781, "logps/rejected": -80.48060607910156, "loss": 1.2842, "margin_dpo/margin_mean": 0.9621328711509705, "margin_dpo/margin_std": 2.283693313598633, "step": 313 }, { "epoch": 0.47467876039304613, "fcm_dpo/beta": 0.7063366174697876, "fcm_dpo/delta": 0.015392206609249115, "fcm_dpo/margin": 1.1817160844802856, "fcm_dpo/q_t": 0.3654022812843323, "grad_norm": 121.3820571899414, "learning_rate": 3.166184534225087e-07, "logits/chosen": 1.6584588289260864, "logits/rejected": 1.6634647846221924, "logps/chosen": -83.33063507080078, "logps/ref_chosen": -90.41890716552734, "logps/ref_rejected": -84.33525848388672, "logps/rejected": -78.4287109375, "loss": 1.1208, "margin_dpo/margin_mean": 1.181715965270996, "margin_dpo/margin_std": 2.112767219543457, "step": 314 }, { "epoch": 0.47619047619047616, "fcm_dpo/beta": 0.7153505682945251, "fcm_dpo/delta": -0.07875702530145645, "fcm_dpo/margin": 1.2836734056472778, "fcm_dpo/q_t": 0.33902478218078613, "grad_norm": 103.74569702148438, "learning_rate": 3.1534311709253723e-07, "logits/chosen": 1.672802448272705, "logits/rejected": 1.6053755283355713, "logps/chosen": -80.0546646118164, "logps/ref_chosen": -87.32842254638672, "logps/ref_rejected": -93.71661376953125, "logps/rejected": -87.72652435302734, "loss": 1.001, "margin_dpo/margin_mean": 1.2836732864379883, "margin_dpo/margin_std": 1.82138192653656, "step": 315 }, { "epoch": 0.47770219198790626, "fcm_dpo/beta": 0.7006301879882812, "fcm_dpo/delta": -0.14333555102348328, "fcm_dpo/margin": 1.3912466764450073, "fcm_dpo/q_t": 0.32626068592071533, "grad_norm": 95.67511749267578, "learning_rate": 3.1406595297511564e-07, "logits/chosen": 1.7230030298233032, "logits/rejected": 1.4919373989105225, "logps/chosen": -66.60594177246094, "logps/ref_chosen": -73.898681640625, "logps/ref_rejected": -115.42668151855469, "logps/rejected": -109.52519226074219, "loss": 0.9455, "margin_dpo/margin_mean": 1.391247034072876, "margin_dpo/margin_std": 1.7532538175582886, "step": 316 }, { "epoch": 0.47921390778533635, "fcm_dpo/beta": 0.6516163349151611, "fcm_dpo/delta": -0.26026636362075806, "fcm_dpo/margin": 1.6568280458450317, "fcm_dpo/q_t": 0.3177596926689148, "grad_norm": 97.86814880371094, "learning_rate": 3.1278699679526975e-07, "logits/chosen": 1.7669193744659424, "logits/rejected": 1.682518720626831, "logps/chosen": -68.1146240234375, "logps/ref_chosen": -75.42947387695312, "logps/ref_rejected": -90.60166931152344, "logps/rejected": -84.94364929199219, "loss": 0.9728, "margin_dpo/margin_mean": 1.6568281650543213, "margin_dpo/margin_std": 2.361431837081909, "step": 317 }, { "epoch": 0.48072562358276644, "fcm_dpo/beta": 0.6571391820907593, "fcm_dpo/delta": 0.16736406087875366, "fcm_dpo/margin": 1.055985689163208, "fcm_dpo/q_t": 0.37599390745162964, "grad_norm": 92.90299224853516, "learning_rate": 3.1150628432815336e-07, "logits/chosen": 1.9868645668029785, "logits/rejected": 1.8562074899673462, "logps/chosen": -63.284942626953125, "logps/ref_chosen": -70.38318634033203, "logps/ref_rejected": -98.19901275634766, "logps/rejected": -92.15675354003906, "loss": 1.2687, "margin_dpo/margin_mean": 1.055985450744629, "margin_dpo/margin_std": 2.3484559059143066, "step": 318 }, { "epoch": 0.48223733938019653, "fcm_dpo/beta": 0.6785935163497925, "fcm_dpo/delta": 0.06372390687465668, "fcm_dpo/margin": 1.1651678085327148, "fcm_dpo/q_t": 0.3581188917160034, "grad_norm": 106.19795227050781, "learning_rate": 3.1022385139804707e-07, "logits/chosen": 1.6127734184265137, "logits/rejected": 1.5631192922592163, "logps/chosen": -76.34298706054688, "logps/ref_chosen": -83.40225982666016, "logps/ref_rejected": -95.40069580078125, "logps/rejected": -89.50658416748047, "loss": 1.1087, "margin_dpo/margin_mean": 1.1651681661605835, "margin_dpo/margin_std": 2.0549566745758057, "step": 319 }, { "epoch": 0.4837490551776266, "fcm_dpo/beta": 0.7149513363838196, "fcm_dpo/delta": 0.28710103034973145, "fcm_dpo/margin": 0.8032557964324951, "fcm_dpo/q_t": 0.42163750529289246, "grad_norm": 119.99693298339844, "learning_rate": 3.0893973387735683e-07, "logits/chosen": 1.4589346647262573, "logits/rejected": 1.3581949472427368, "logps/chosen": -61.47698211669922, "logps/ref_chosen": -68.70979309082031, "logps/ref_rejected": -87.00540924072266, "logps/rejected": -80.57585144042969, "loss": 1.3991, "margin_dpo/margin_mean": 0.8032557964324951, "margin_dpo/margin_std": 2.3650741577148438, "step": 320 }, { "epoch": 0.4852607709750567, "fcm_dpo/beta": 0.7074248194694519, "fcm_dpo/delta": -0.008499190211296082, "fcm_dpo/margin": 1.2074357271194458, "fcm_dpo/q_t": 0.34586799144744873, "grad_norm": 95.66856384277344, "learning_rate": 3.0765396768561004e-07, "logits/chosen": 1.56925630569458, "logits/rejected": 1.516863226890564, "logps/chosen": -59.29505157470703, "logps/ref_chosen": -66.48135375976562, "logps/ref_rejected": -71.84545135498047, "logps/rejected": -65.8665771484375, "loss": 1.0626, "margin_dpo/margin_mean": 1.2074360847473145, "margin_dpo/margin_std": 1.8859655857086182, "step": 321 }, { "epoch": 0.48677248677248675, "fcm_dpo/beta": 0.7035623788833618, "fcm_dpo/delta": -0.19782285392284393, "fcm_dpo/margin": 1.4601200819015503, "fcm_dpo/q_t": 0.3138246536254883, "grad_norm": 95.54669189453125, "learning_rate": 3.063665887884511e-07, "logits/chosen": 1.760352373123169, "logits/rejected": 1.6214886903762817, "logps/chosen": -58.70317077636719, "logps/ref_chosen": -65.94654846191406, "logps/ref_rejected": -94.26603698730469, "logps/rejected": -88.4827880859375, "loss": 0.9047, "margin_dpo/margin_mean": 1.4601197242736816, "margin_dpo/margin_std": 1.8270645141601562, "step": 322 }, { "epoch": 0.48828420256991684, "fcm_dpo/beta": 0.6910836696624756, "fcm_dpo/delta": -0.08377201855182648, "fcm_dpo/margin": 1.335155963897705, "fcm_dpo/q_t": 0.352658212184906, "grad_norm": 112.16151428222656, "learning_rate": 3.0507763319663517e-07, "logits/chosen": 1.7015190124511719, "logits/rejected": 1.6088063716888428, "logps/chosen": -79.46102142333984, "logps/ref_chosen": -86.5498046875, "logps/ref_rejected": -110.39498901367188, "logps/rejected": -104.641357421875, "loss": 1.0917, "margin_dpo/margin_mean": 1.3351564407348633, "margin_dpo/margin_std": 2.2610673904418945, "step": 323 }, { "epoch": 0.4897959183673469, "fcm_dpo/beta": 0.6660194396972656, "fcm_dpo/delta": -0.08605515211820602, "fcm_dpo/margin": 1.39243745803833, "fcm_dpo/q_t": 0.32699155807495117, "grad_norm": 88.87966918945312, "learning_rate": 3.0378713696502097e-07, "logits/chosen": 1.7513608932495117, "logits/rejected": 1.61769700050354, "logps/chosen": -66.96436309814453, "logps/ref_chosen": -74.44218444824219, "logps/ref_rejected": -85.7646484375, "logps/rejected": -79.67926025390625, "loss": 0.9591, "margin_dpo/margin_mean": 1.3924373388290405, "margin_dpo/margin_std": 1.9160056114196777, "step": 324 }, { "epoch": 0.491307634164777, "fcm_dpo/beta": 0.6700199842453003, "fcm_dpo/delta": 0.12378650903701782, "fcm_dpo/margin": 1.0951839685440063, "fcm_dpo/q_t": 0.3673388957977295, "grad_norm": 106.75263214111328, "learning_rate": 3.0249513619156206e-07, "logits/chosen": 1.737715482711792, "logits/rejected": 1.617845058441162, "logps/chosen": -74.48870849609375, "logps/ref_chosen": -81.43812561035156, "logps/ref_rejected": -97.04302978515625, "logps/rejected": -91.18878936767578, "loss": 1.0978, "margin_dpo/margin_mean": 1.0951833724975586, "margin_dpo/margin_std": 1.8893883228302002, "step": 325 }, { "epoch": 0.4928193499622071, "fcm_dpo/beta": 0.7143986225128174, "fcm_dpo/delta": 0.3045271635055542, "fcm_dpo/margin": 0.7874218225479126, "fcm_dpo/q_t": 0.3986685872077942, "grad_norm": 119.16207885742188, "learning_rate": 3.012016670162977e-07, "logits/chosen": 1.532388687133789, "logits/rejected": 1.5331604480743408, "logps/chosen": -85.00900268554688, "logps/ref_chosen": -91.65318298339844, "logps/ref_rejected": -90.64222717285156, "logps/rejected": -84.78547668457031, "loss": 1.2558, "margin_dpo/margin_mean": 0.787421464920044, "margin_dpo/margin_std": 1.8421351909637451, "step": 326 }, { "epoch": 0.4943310657596372, "fcm_dpo/beta": 0.7667665481567383, "fcm_dpo/delta": 0.30395039916038513, "fcm_dpo/margin": 0.7277549505233765, "fcm_dpo/q_t": 0.4066656231880188, "grad_norm": 140.64083862304688, "learning_rate": 2.99906765620341e-07, "logits/chosen": 1.3383753299713135, "logits/rejected": 1.2867255210876465, "logps/chosen": -83.17657470703125, "logps/ref_chosen": -89.97216796875, "logps/ref_rejected": -97.54869079589844, "logps/rejected": -91.48085021972656, "loss": 1.3205, "margin_dpo/margin_mean": 0.7277547717094421, "margin_dpo/margin_std": 1.9155395030975342, "step": 327 }, { "epoch": 0.4958427815570673, "fcm_dpo/beta": 0.7802586555480957, "fcm_dpo/delta": 0.15494143962860107, "fcm_dpo/margin": 0.9038246870040894, "fcm_dpo/q_t": 0.3814663887023926, "grad_norm": 132.4653778076172, "learning_rate": 2.9861046822486766e-07, "logits/chosen": 1.6037625074386597, "logits/rejected": 1.5314295291900635, "logps/chosen": -73.24417877197266, "logps/ref_chosen": -80.27335357666016, "logps/ref_rejected": -99.04093170166016, "logps/rejected": -92.91558837890625, "loss": 1.2057, "margin_dpo/margin_mean": 0.9038242697715759, "margin_dpo/margin_std": 1.8299709558486938, "step": 328 }, { "epoch": 0.4973544973544973, "fcm_dpo/beta": 0.7895931005477905, "fcm_dpo/delta": 0.01884014904499054, "fcm_dpo/margin": 1.0524828433990479, "fcm_dpo/q_t": 0.38687556982040405, "grad_norm": 145.9662322998047, "learning_rate": 2.9731281109010253e-07, "logits/chosen": 1.860405683517456, "logits/rejected": 1.7466496229171753, "logps/chosen": -72.85738372802734, "logps/ref_chosen": -79.75892639160156, "logps/ref_rejected": -102.06265258789062, "logps/rejected": -96.21360778808594, "loss": 1.2091, "margin_dpo/margin_mean": 1.0524829626083374, "margin_dpo/margin_std": 2.168503522872925, "step": 329 }, { "epoch": 0.4988662131519274, "fcm_dpo/beta": 0.7690603137016296, "fcm_dpo/delta": -0.29933562874794006, "fcm_dpo/margin": 1.4522162675857544, "fcm_dpo/q_t": 0.3026127517223358, "grad_norm": 94.12672424316406, "learning_rate": 2.9601383051430505e-07, "logits/chosen": 1.6887285709381104, "logits/rejected": 1.5656009912490845, "logps/chosen": -63.10783767700195, "logps/ref_chosen": -70.55734252929688, "logps/ref_rejected": -94.53077697753906, "logps/rejected": -88.53349304199219, "loss": 0.9435, "margin_dpo/margin_mean": 1.4522165060043335, "margin_dpo/margin_std": 1.8758301734924316, "step": 330 }, { "epoch": 0.5003779289493575, "fcm_dpo/beta": 0.722969651222229, "fcm_dpo/delta": -0.2056279480457306, "fcm_dpo/margin": 1.4265342950820923, "fcm_dpo/q_t": 0.326630175113678, "grad_norm": 110.82780456542969, "learning_rate": 2.947135628327544e-07, "logits/chosen": 1.7697137594223022, "logits/rejected": 1.7361394166946411, "logps/chosen": -68.56468963623047, "logps/ref_chosen": -75.46063232421875, "logps/ref_rejected": -84.78495788574219, "logps/rejected": -79.31553649902344, "loss": 1.0925, "margin_dpo/margin_mean": 1.4265344142913818, "margin_dpo/margin_std": 2.3631396293640137, "step": 331 }, { "epoch": 0.5018896447467877, "fcm_dpo/beta": 0.7259687185287476, "fcm_dpo/delta": -0.03762027621269226, "fcm_dpo/margin": 1.2150681018829346, "fcm_dpo/q_t": 0.3482212424278259, "grad_norm": 115.17147064208984, "learning_rate": 2.934120444167326e-07, "logits/chosen": 1.4272816181182861, "logits/rejected": 1.3553186655044556, "logps/chosen": -77.3675537109375, "logps/ref_chosen": -84.32807922363281, "logps/ref_rejected": -95.63302612304688, "logps/rejected": -89.88756561279297, "loss": 1.0542, "margin_dpo/margin_mean": 1.2150681018829346, "margin_dpo/margin_std": 1.9501895904541016, "step": 332 }, { "epoch": 0.5034013605442177, "fcm_dpo/beta": 0.674473226070404, "fcm_dpo/delta": -0.3686579167842865, "fcm_dpo/margin": 1.738995909690857, "fcm_dpo/q_t": 0.2906306982040405, "grad_norm": 80.1180648803711, "learning_rate": 2.921093116725076e-07, "logits/chosen": 1.7883816957473755, "logits/rejected": 1.6795051097869873, "logps/chosen": -71.42243194580078, "logps/ref_chosen": -78.2132339477539, "logps/ref_rejected": -103.82716369628906, "logps/rejected": -98.77536010742188, "loss": 0.7898, "margin_dpo/margin_mean": 1.7389962673187256, "margin_dpo/margin_std": 1.895709753036499, "step": 333 }, { "epoch": 0.5049130763416477, "fcm_dpo/beta": 0.6675664186477661, "fcm_dpo/delta": 0.06639145314693451, "fcm_dpo/margin": 1.1811097860336304, "fcm_dpo/q_t": 0.3664048910140991, "grad_norm": 105.71363830566406, "learning_rate": 2.9080540104031484e-07, "logits/chosen": 1.6449368000030518, "logits/rejected": 1.5412189960479736, "logps/chosen": -78.09677124023438, "logps/ref_chosen": -85.0171127319336, "logps/ref_rejected": -106.79039764404297, "logps/rejected": -101.05116271972656, "loss": 1.1462, "margin_dpo/margin_mean": 1.1811105012893677, "margin_dpo/margin_std": 2.216235876083374, "step": 334 }, { "epoch": 0.5064247921390779, "fcm_dpo/beta": 0.6799655556678772, "fcm_dpo/delta": 0.005240932106971741, "fcm_dpo/margin": 1.2411878108978271, "fcm_dpo/q_t": 0.3700757622718811, "grad_norm": 103.19578552246094, "learning_rate": 2.895003489933375e-07, "logits/chosen": 1.7492305040359497, "logits/rejected": 1.6704251766204834, "logps/chosen": -71.7774429321289, "logps/ref_chosen": -78.56513214111328, "logps/ref_rejected": -92.68515014648438, "logps/rejected": -87.13864135742188, "loss": 1.166, "margin_dpo/margin_mean": 1.241187572479248, "margin_dpo/margin_std": 2.3274662494659424, "step": 335 }, { "epoch": 0.5079365079365079, "fcm_dpo/beta": 0.6593726873397827, "fcm_dpo/delta": -0.09310643374919891, "fcm_dpo/margin": 1.4134182929992676, "fcm_dpo/q_t": 0.3384147882461548, "grad_norm": 100.4131088256836, "learning_rate": 2.8819419203668675e-07, "logits/chosen": 1.7871836423873901, "logits/rejected": 1.7552770376205444, "logps/chosen": -82.16726684570312, "logps/ref_chosen": -88.63243103027344, "logps/ref_rejected": -107.89385986328125, "logps/rejected": -102.84210968017578, "loss": 0.9968, "margin_dpo/margin_mean": 1.4134178161621094, "margin_dpo/margin_std": 2.0912837982177734, "step": 336 }, { "epoch": 0.509448223733938, "fcm_dpo/beta": 0.6767586469650269, "fcm_dpo/delta": 0.2001107931137085, "fcm_dpo/margin": 0.9802356958389282, "fcm_dpo/q_t": 0.3863935172557831, "grad_norm": 113.31838989257812, "learning_rate": 2.8688696670638053e-07, "logits/chosen": 1.8030924797058105, "logits/rejected": 1.699881911277771, "logps/chosen": -86.61439514160156, "logps/ref_chosen": -93.25018310546875, "logps/ref_rejected": -103.8592529296875, "logps/rejected": -98.20370483398438, "loss": 1.2108, "margin_dpo/margin_mean": 0.98023521900177, "margin_dpo/margin_std": 2.0510101318359375, "step": 337 }, { "epoch": 0.5109599395313681, "fcm_dpo/beta": 0.7058612108230591, "fcm_dpo/delta": 0.07689429819583893, "fcm_dpo/margin": 1.0984101295471191, "fcm_dpo/q_t": 0.3622785210609436, "grad_norm": 104.60888671875, "learning_rate": 2.8557870956832133e-07, "logits/chosen": 1.6752837896347046, "logits/rejected": 1.6237120628356934, "logps/chosen": -75.27117156982422, "logps/ref_chosen": -81.79462432861328, "logps/ref_rejected": -90.98942565917969, "logps/rejected": -85.56438446044922, "loss": 1.041, "margin_dpo/margin_mean": 1.0984106063842773, "margin_dpo/margin_std": 1.7123408317565918, "step": 338 }, { "epoch": 0.5124716553287982, "fcm_dpo/beta": 0.6881164312362671, "fcm_dpo/delta": -0.08667253702878952, "fcm_dpo/margin": 1.348482370376587, "fcm_dpo/q_t": 0.322110652923584, "grad_norm": 89.46073913574219, "learning_rate": 2.842694572172736e-07, "logits/chosen": 1.827816128730774, "logits/rejected": 1.6548161506652832, "logps/chosen": -54.872840881347656, "logps/ref_chosen": -61.80355453491211, "logps/ref_rejected": -85.16979217529297, "logps/rejected": -79.58755493164062, "loss": 0.9793, "margin_dpo/margin_mean": 1.3484827280044556, "margin_dpo/margin_std": 1.903770923614502, "step": 339 }, { "epoch": 0.5139833711262283, "fcm_dpo/beta": 0.703204870223999, "fcm_dpo/delta": 0.03393559157848358, "fcm_dpo/margin": 1.1560697555541992, "fcm_dpo/q_t": 0.3668256103992462, "grad_norm": 101.28387451171875, "learning_rate": 2.8295924627584004e-07, "logits/chosen": 1.4966051578521729, "logits/rejected": 1.4827940464019775, "logps/chosen": -66.0333251953125, "logps/ref_chosen": -72.486083984375, "logps/ref_rejected": -79.86129760742188, "logps/rejected": -74.56460571289062, "loss": 1.1705, "margin_dpo/margin_mean": 1.1560699939727783, "margin_dpo/margin_std": 2.1286869049072266, "step": 340 }, { "epoch": 0.5154950869236583, "fcm_dpo/beta": 0.677609920501709, "fcm_dpo/delta": -0.01477833092212677, "fcm_dpo/margin": 1.2626094818115234, "fcm_dpo/q_t": 0.3455454111099243, "grad_norm": 104.65491485595703, "learning_rate": 2.816481133934373e-07, "logits/chosen": 1.6853771209716797, "logits/rejected": 1.5724800825119019, "logps/chosen": -70.659912109375, "logps/ref_chosen": -77.36830139160156, "logps/ref_rejected": -94.64933013916016, "logps/rejected": -89.20354461669922, "loss": 1.0474, "margin_dpo/margin_mean": 1.2626094818115234, "margin_dpo/margin_std": 1.8441420793533325, "step": 341 }, { "epoch": 0.5170068027210885, "fcm_dpo/beta": 0.6741093397140503, "fcm_dpo/delta": -0.15685820579528809, "fcm_dpo/margin": 1.4702677726745605, "fcm_dpo/q_t": 0.3300013840198517, "grad_norm": 86.32221984863281, "learning_rate": 2.8033609524527046e-07, "logits/chosen": 1.7303967475891113, "logits/rejected": 1.6494003534317017, "logps/chosen": -64.28861999511719, "logps/ref_chosen": -71.00831604003906, "logps/ref_rejected": -84.22953796386719, "logps/rejected": -78.98011779785156, "loss": 0.9393, "margin_dpo/margin_mean": 1.4702682495117188, "margin_dpo/margin_std": 1.9783341884613037, "step": 342 }, { "epoch": 0.5185185185185185, "fcm_dpo/beta": 0.6742951273918152, "fcm_dpo/delta": 0.03692948818206787, "fcm_dpo/margin": 0.7514773607254028, "fcm_dpo/q_t": 0.4021931290626526, "grad_norm": 112.97252655029297, "learning_rate": 2.7902322853130753e-07, "logits/chosen": 1.315962553024292, "logits/rejected": 1.2896361351013184, "logps/chosen": -84.8480224609375, "logps/ref_chosen": -91.44624328613281, "logps/ref_rejected": -99.06044006347656, "logps/rejected": -93.21369934082031, "loss": 1.2966, "margin_dpo/margin_mean": 0.7514776587486267, "margin_dpo/margin_std": 1.8802409172058105, "step": 343 }, { "epoch": 0.5200302343159486, "fcm_dpo/beta": 0.6519577503204346, "fcm_dpo/delta": -0.22193169593811035, "fcm_dpo/margin": 1.6081267595291138, "fcm_dpo/q_t": 0.3195720613002777, "grad_norm": 91.64571380615234, "learning_rate": 2.7770954997525274e-07, "logits/chosen": 1.6671485900878906, "logits/rejected": 1.5444729328155518, "logps/chosen": -66.93972778320312, "logps/ref_chosen": -73.43608093261719, "logps/ref_rejected": -100.76569366455078, "logps/rejected": -95.8774642944336, "loss": 0.9195, "margin_dpo/margin_mean": 1.608127474784851, "margin_dpo/margin_std": 2.2376999855041504, "step": 344 }, { "epoch": 0.5215419501133787, "fcm_dpo/beta": 0.6448332667350769, "fcm_dpo/delta": 0.06164184957742691, "fcm_dpo/margin": 1.2283003330230713, "fcm_dpo/q_t": 0.35730862617492676, "grad_norm": 89.81734466552734, "learning_rate": 2.7639509632351927e-07, "logits/chosen": 2.0488429069519043, "logits/rejected": 1.9527053833007812, "logps/chosen": -68.85000610351562, "logps/ref_chosen": -75.79296875, "logps/ref_rejected": -94.34156799316406, "logps/rejected": -88.62691497802734, "loss": 1.0656, "margin_dpo/margin_mean": 1.2283005714416504, "margin_dpo/margin_std": 1.9429552555084229, "step": 345 }, { "epoch": 0.5230536659108088, "fcm_dpo/beta": 0.6434746980667114, "fcm_dpo/delta": -0.126961350440979, "fcm_dpo/margin": 1.4994654655456543, "fcm_dpo/q_t": 0.3333126902580261, "grad_norm": 92.5584487915039, "learning_rate": 2.7507990434420123e-07, "logits/chosen": 1.7417409420013428, "logits/rejected": 1.6066086292266846, "logps/chosen": -65.25099182128906, "logps/ref_chosen": -72.26289367675781, "logps/ref_rejected": -106.36925506591797, "logps/rejected": -100.85682678222656, "loss": 1.0344, "margin_dpo/margin_mean": 1.499464750289917, "margin_dpo/margin_std": 2.23475980758667, "step": 346 }, { "epoch": 0.5245653817082389, "fcm_dpo/beta": 0.6435239911079407, "fcm_dpo/delta": 0.08430784940719604, "fcm_dpo/margin": 1.2000800371170044, "fcm_dpo/q_t": 0.37312763929367065, "grad_norm": 102.34098815917969, "learning_rate": 2.737640108260456e-07, "logits/chosen": 1.687086582183838, "logits/rejected": 1.6137919425964355, "logps/chosen": -64.65863037109375, "logps/ref_chosen": -71.19871520996094, "logps/ref_rejected": -91.543212890625, "logps/rejected": -86.20320892333984, "loss": 1.181, "margin_dpo/margin_mean": 1.200080394744873, "margin_dpo/margin_std": 2.4104766845703125, "step": 347 }, { "epoch": 0.5260770975056689, "fcm_dpo/beta": 0.6327615976333618, "fcm_dpo/delta": -0.1322903335094452, "fcm_dpo/margin": 1.5311083793640137, "fcm_dpo/q_t": 0.3531337380409241, "grad_norm": 89.33076477050781, "learning_rate": 2.724474525774229e-07, "logits/chosen": 2.2387795448303223, "logits/rejected": 2.1621313095092773, "logps/chosen": -63.21007537841797, "logps/ref_chosen": -69.95603942871094, "logps/ref_rejected": -83.64309692382812, "logps/rejected": -78.42823791503906, "loss": 1.0263, "margin_dpo/margin_mean": 1.5311079025268555, "margin_dpo/margin_std": 2.4763777256011963, "step": 348 }, { "epoch": 0.527588813303099, "fcm_dpo/beta": 0.638213038444519, "fcm_dpo/delta": -0.043823257088661194, "fcm_dpo/margin": 1.385543704032898, "fcm_dpo/q_t": 0.34594935178756714, "grad_norm": 93.91361999511719, "learning_rate": 2.711302664252973e-07, "logits/chosen": 1.8765864372253418, "logits/rejected": 1.7512259483337402, "logps/chosen": -63.903682708740234, "logps/ref_chosen": -70.71857452392578, "logps/ref_rejected": -99.93263244628906, "logps/rejected": -94.50328826904297, "loss": 1.0296, "margin_dpo/margin_mean": 1.3855433464050293, "margin_dpo/margin_std": 2.1671652793884277, "step": 349 }, { "epoch": 0.5291005291005291, "fcm_dpo/beta": 0.5975005030632019, "fcm_dpo/delta": -0.23265165090560913, "fcm_dpo/margin": 1.7634344100952148, "fcm_dpo/q_t": 0.31782710552215576, "grad_norm": 78.32647705078125, "learning_rate": 2.698124892141971e-07, "logits/chosen": 1.5105071067810059, "logits/rejected": 1.427919626235962, "logps/chosen": -71.44319152832031, "logps/ref_chosen": -78.16873168945312, "logps/ref_rejected": -104.84308624267578, "logps/rejected": -99.8809814453125, "loss": 0.8778, "margin_dpo/margin_mean": 1.7634345293045044, "margin_dpo/margin_std": 2.1569459438323975, "step": 350 }, { "epoch": 0.5306122448979592, "fcm_dpo/beta": 0.5988894104957581, "fcm_dpo/delta": 0.056822769343853, "fcm_dpo/margin": 1.3316857814788818, "fcm_dpo/q_t": 0.34192731976509094, "grad_norm": 91.16613006591797, "learning_rate": 2.6849415780518357e-07, "logits/chosen": 1.873887300491333, "logits/rejected": 1.7134041786193848, "logps/chosen": -65.16175842285156, "logps/ref_chosen": -71.79151916503906, "logps/ref_rejected": -97.04634094238281, "logps/rejected": -91.74826049804688, "loss": 1.0744, "margin_dpo/margin_mean": 1.3316857814788818, "margin_dpo/margin_std": 2.146547317504883, "step": 351 }, { "epoch": 0.5321239606953893, "fcm_dpo/beta": 0.622305691242218, "fcm_dpo/delta": 0.2838347256183624, "fcm_dpo/margin": 0.9371163845062256, "fcm_dpo/q_t": 0.3857288956642151, "grad_norm": 102.97727966308594, "learning_rate": 2.6717530907482024e-07, "logits/chosen": 1.7103533744812012, "logits/rejected": 1.6334961652755737, "logps/chosen": -74.54743194580078, "logps/ref_chosen": -80.86544799804688, "logps/ref_rejected": -102.02129364013672, "logps/rejected": -96.64038848876953, "loss": 1.1919, "margin_dpo/margin_mean": 0.9371156096458435, "margin_dpo/margin_std": 1.9518498182296753, "step": 352 }, { "epoch": 0.5336356764928194, "fcm_dpo/beta": 0.6423227787017822, "fcm_dpo/delta": 0.03207054361701012, "fcm_dpo/margin": 1.2774255275726318, "fcm_dpo/q_t": 0.34687069058418274, "grad_norm": 98.20146179199219, "learning_rate": 2.658559799141411e-07, "logits/chosen": 1.8999056816101074, "logits/rejected": 1.9004359245300293, "logps/chosen": -78.00933837890625, "logps/ref_chosen": -84.77235412597656, "logps/ref_rejected": -86.77130889892578, "logps/rejected": -81.28572082519531, "loss": 1.0978, "margin_dpo/margin_mean": 1.2774256467819214, "margin_dpo/margin_std": 2.1611897945404053, "step": 353 }, { "epoch": 0.5351473922902494, "fcm_dpo/beta": 0.6360931396484375, "fcm_dpo/delta": -0.04566780477762222, "fcm_dpo/margin": 1.401000738143921, "fcm_dpo/q_t": 0.34180209040641785, "grad_norm": 89.1875228881836, "learning_rate": 2.6453620722761895e-07, "logits/chosen": 1.8383228778839111, "logits/rejected": 1.5652598142623901, "logps/chosen": -47.40654754638672, "logps/ref_chosen": -54.33562088012695, "logps/ref_rejected": -92.4120101928711, "logps/rejected": -86.88394165039062, "loss": 1.0269, "margin_dpo/margin_mean": 1.4010006189346313, "margin_dpo/margin_std": 2.115865707397461, "step": 354 }, { "epoch": 0.5366591080876795, "fcm_dpo/beta": 0.6303632259368896, "fcm_dpo/delta": -0.04144010692834854, "fcm_dpo/margin": 1.4075102806091309, "fcm_dpo/q_t": 0.3529096841812134, "grad_norm": 99.93326568603516, "learning_rate": 2.632160279321328e-07, "logits/chosen": 1.9949486255645752, "logits/rejected": 1.7414538860321045, "logps/chosen": -55.229888916015625, "logps/ref_chosen": -61.8388671875, "logps/ref_rejected": -98.65571594238281, "logps/rejected": -93.45425415039062, "loss": 1.0339, "margin_dpo/margin_mean": 1.4075103998184204, "margin_dpo/margin_std": 2.2196106910705566, "step": 355 }, { "epoch": 0.5381708238851096, "fcm_dpo/beta": 0.6423078179359436, "fcm_dpo/delta": 0.17999966442584991, "fcm_dpo/margin": 1.061737060546875, "fcm_dpo/q_t": 0.383014053106308, "grad_norm": 106.97945404052734, "learning_rate": 2.618954789559356e-07, "logits/chosen": 1.916182279586792, "logits/rejected": 1.7761037349700928, "logps/chosen": -57.06999206542969, "logps/ref_chosen": -63.92546463012695, "logps/ref_rejected": -89.682861328125, "logps/rejected": -83.88912963867188, "loss": 1.3227, "margin_dpo/margin_mean": 1.0617367029190063, "margin_dpo/margin_std": 2.4881436824798584, "step": 356 }, { "epoch": 0.5396825396825397, "fcm_dpo/beta": 0.6452049016952515, "fcm_dpo/delta": 0.00560779869556427, "fcm_dpo/margin": 1.3021241426467896, "fcm_dpo/q_t": 0.35388654470443726, "grad_norm": 103.93507385253906, "learning_rate": 2.6057459723762076e-07, "logits/chosen": 1.7489900588989258, "logits/rejected": 1.6417170763015747, "logps/chosen": -74.31562805175781, "logps/ref_chosen": -81.07588958740234, "logps/ref_rejected": -85.06967163085938, "logps/rejected": -79.61152648925781, "loss": 1.0675, "margin_dpo/margin_mean": 1.3021240234375, "margin_dpo/margin_std": 2.0928993225097656, "step": 357 }, { "epoch": 0.5411942554799698, "fcm_dpo/beta": 0.6459277868270874, "fcm_dpo/delta": -0.15767623484134674, "fcm_dpo/margin": 1.535652756690979, "fcm_dpo/q_t": 0.323412150144577, "grad_norm": 88.0017318725586, "learning_rate": 2.5925341972508954e-07, "logits/chosen": 1.7714424133300781, "logits/rejected": 1.7848539352416992, "logps/chosen": -77.4293441772461, "logps/ref_chosen": -84.09109497070312, "logps/ref_rejected": -85.07244873046875, "logps/rejected": -79.94635772705078, "loss": 0.9144, "margin_dpo/margin_mean": 1.5356526374816895, "margin_dpo/margin_std": 1.961099624633789, "step": 358 }, { "epoch": 0.5427059712773998, "fcm_dpo/beta": 0.676872968673706, "fcm_dpo/delta": 0.3618844151496887, "fcm_dpo/margin": 0.7404053211212158, "fcm_dpo/q_t": 0.4080279767513275, "grad_norm": 138.78829956054688, "learning_rate": 2.579319833745169e-07, "logits/chosen": 1.537717342376709, "logits/rejected": 1.5032204389572144, "logps/chosen": -74.22945404052734, "logps/ref_chosen": -80.7490234375, "logps/ref_rejected": -94.92911529541016, "logps/rejected": -89.14994812011719, "loss": 1.4657, "margin_dpo/margin_mean": 0.7404047250747681, "margin_dpo/margin_std": 2.4702301025390625, "step": 359 }, { "epoch": 0.54421768707483, "fcm_dpo/beta": 0.6807535886764526, "fcm_dpo/delta": 0.005708474665880203, "fcm_dpo/margin": 1.2405047416687012, "fcm_dpo/q_t": 0.36353182792663574, "grad_norm": 104.39427947998047, "learning_rate": 2.5661032514931834e-07, "logits/chosen": 1.6437644958496094, "logits/rejected": 1.4671106338500977, "logps/chosen": -71.83963012695312, "logps/ref_chosen": -78.38681030273438, "logps/ref_rejected": -109.68933868408203, "logps/rejected": -104.3826675415039, "loss": 1.0574, "margin_dpo/margin_mean": 1.2405047416687012, "margin_dpo/margin_std": 2.0635769367218018, "step": 360 }, { "epoch": 0.54572940287226, "fcm_dpo/beta": 0.6654163599014282, "fcm_dpo/delta": -0.14612089097499847, "fcm_dpo/margin": 1.4734528064727783, "fcm_dpo/q_t": 0.33461007475852966, "grad_norm": 99.80976867675781, "learning_rate": 2.552884820191154e-07, "logits/chosen": 1.853990077972412, "logits/rejected": 1.7232751846313477, "logps/chosen": -67.30521392822266, "logps/ref_chosen": -73.9055404663086, "logps/ref_rejected": -89.8489990234375, "logps/rejected": -84.72212219238281, "loss": 0.9743, "margin_dpo/margin_mean": 1.4734525680541992, "margin_dpo/margin_std": 2.1431329250335693, "step": 361 }, { "epoch": 0.54724111866969, "fcm_dpo/beta": 0.6757440567016602, "fcm_dpo/delta": -0.07346963882446289, "fcm_dpo/margin": 1.3376474380493164, "fcm_dpo/q_t": 0.3617614507675171, "grad_norm": 116.218017578125, "learning_rate": 2.53966490958702e-07, "logits/chosen": 1.9418387413024902, "logits/rejected": 1.6752915382385254, "logps/chosen": -75.97872924804688, "logps/ref_chosen": -82.32565307617188, "logps/ref_rejected": -123.14100646972656, "logps/rejected": -118.13172912597656, "loss": 1.1054, "margin_dpo/margin_mean": 1.337647557258606, "margin_dpo/margin_std": 2.2377192974090576, "step": 362 }, { "epoch": 0.5487528344671202, "fcm_dpo/beta": 0.6594882011413574, "fcm_dpo/delta": 0.006209194660186768, "fcm_dpo/margin": 1.2788195610046387, "fcm_dpo/q_t": 0.3492434322834015, "grad_norm": 92.99041748046875, "learning_rate": 2.526443889470099e-07, "logits/chosen": 2.0503106117248535, "logits/rejected": 1.757457971572876, "logps/chosen": -59.85978698730469, "logps/ref_chosen": -66.05493927001953, "logps/ref_rejected": -106.79598999023438, "logps/rejected": -101.87965393066406, "loss": 0.9826, "margin_dpo/margin_mean": 1.2788193225860596, "margin_dpo/margin_std": 1.83968186378479, "step": 363 }, { "epoch": 0.5502645502645502, "fcm_dpo/beta": 0.624494194984436, "fcm_dpo/delta": -0.2905758023262024, "fcm_dpo/margin": 1.7719086408615112, "fcm_dpo/q_t": 0.32380062341690063, "grad_norm": 83.77694702148438, "learning_rate": 2.513222129660744e-07, "logits/chosen": 1.5185627937316895, "logits/rejected": 1.4087271690368652, "logps/chosen": -69.42403411865234, "logps/ref_chosen": -76.38365173339844, "logps/ref_rejected": -100.22221374511719, "logps/rejected": -95.03450012207031, "loss": 0.9612, "margin_dpo/margin_mean": 1.7719082832336426, "margin_dpo/margin_std": 2.5487070083618164, "step": 364 }, { "epoch": 0.5517762660619804, "fcm_dpo/beta": 0.6193841695785522, "fcm_dpo/delta": -0.022001437842845917, "fcm_dpo/margin": 1.4040987491607666, "fcm_dpo/q_t": 0.3279988467693329, "grad_norm": 77.93173217773438, "learning_rate": 2.5e-07, "logits/chosen": 1.806230902671814, "logits/rejected": 1.8085339069366455, "logps/chosen": -74.8114013671875, "logps/ref_chosen": -81.83399963378906, "logps/ref_rejected": -89.06932830810547, "logps/rejected": -83.45082092285156, "loss": 0.8875, "margin_dpo/margin_mean": 1.4040985107421875, "margin_dpo/margin_std": 1.5749378204345703, "step": 365 }, { "epoch": 0.5532879818594104, "fcm_dpo/beta": 0.6239637732505798, "fcm_dpo/delta": 0.04806492105126381, "fcm_dpo/margin": 1.2896121740341187, "fcm_dpo/q_t": 0.3578612804412842, "grad_norm": 85.66990661621094, "learning_rate": 2.486777870339255e-07, "logits/chosen": 1.577677607536316, "logits/rejected": 1.554199457168579, "logps/chosen": -65.12374877929688, "logps/ref_chosen": -72.03398895263672, "logps/ref_rejected": -83.65354919433594, "logps/rejected": -78.03291320800781, "loss": 1.0881, "margin_dpo/margin_mean": 1.2896113395690918, "margin_dpo/margin_std": 2.166577100753784, "step": 366 }, { "epoch": 0.5547996976568406, "fcm_dpo/beta": 0.6451700329780579, "fcm_dpo/delta": 0.27520129084587097, "fcm_dpo/margin": 0.9171974062919617, "fcm_dpo/q_t": 0.38411933183670044, "grad_norm": 103.54834747314453, "learning_rate": 2.4735561105299014e-07, "logits/chosen": 1.33754301071167, "logits/rejected": 1.1491222381591797, "logps/chosen": -66.0779037475586, "logps/ref_chosen": -72.39828491210938, "logps/ref_rejected": -95.58364868164062, "logps/rejected": -90.18046569824219, "loss": 1.1462, "margin_dpo/margin_mean": 0.917197585105896, "margin_dpo/margin_std": 1.7585558891296387, "step": 367 }, { "epoch": 0.5563114134542706, "fcm_dpo/beta": 0.666650652885437, "fcm_dpo/delta": 0.1275821030139923, "fcm_dpo/margin": 1.097920536994934, "fcm_dpo/q_t": 0.3611149191856384, "grad_norm": 99.10115814208984, "learning_rate": 2.46033509041298e-07, "logits/chosen": 1.3809869289398193, "logits/rejected": 1.3760086297988892, "logps/chosen": -83.81150817871094, "logps/ref_chosen": -90.12812042236328, "logps/ref_rejected": -91.6636962890625, "logps/rejected": -86.44500732421875, "loss": 1.0882, "margin_dpo/margin_mean": 1.0979206562042236, "margin_dpo/margin_std": 1.7993700504302979, "step": 368 }, { "epoch": 0.5578231292517006, "fcm_dpo/beta": 0.6904096007347107, "fcm_dpo/delta": 0.1361449509859085, "fcm_dpo/margin": 1.0467851161956787, "fcm_dpo/q_t": 0.3757448196411133, "grad_norm": 100.53764343261719, "learning_rate": 2.447115179808846e-07, "logits/chosen": 1.497382402420044, "logits/rejected": 1.4508342742919922, "logps/chosen": -64.85260009765625, "logps/ref_chosen": -71.29417419433594, "logps/ref_rejected": -99.03875732421875, "logps/rejected": -93.64397430419922, "loss": 1.2263, "margin_dpo/margin_mean": 1.0467851161956787, "margin_dpo/margin_std": 2.2027394771575928, "step": 369 }, { "epoch": 0.5593348450491308, "fcm_dpo/beta": 0.6650384664535522, "fcm_dpo/delta": -0.23074662685394287, "fcm_dpo/margin": 1.5856378078460693, "fcm_dpo/q_t": 0.3312636613845825, "grad_norm": 104.1839599609375, "learning_rate": 2.4338967485068164e-07, "logits/chosen": 1.8746541738510132, "logits/rejected": 1.7757220268249512, "logps/chosen": -62.68914031982422, "logps/ref_chosen": -69.14627075195312, "logps/ref_rejected": -93.58651733398438, "logps/rejected": -88.71501159667969, "loss": 1.0246, "margin_dpo/margin_mean": 1.5856380462646484, "margin_dpo/margin_std": 2.374504566192627, "step": 370 }, { "epoch": 0.5608465608465608, "fcm_dpo/beta": 0.6482232809066772, "fcm_dpo/delta": -0.19568899273872375, "fcm_dpo/margin": 1.1544885635375977, "fcm_dpo/q_t": 0.38237959146499634, "grad_norm": 106.52591705322266, "learning_rate": 2.420680166254831e-07, "logits/chosen": 1.9493294954299927, "logits/rejected": 1.9273741245269775, "logps/chosen": -59.593040466308594, "logps/ref_chosen": -65.76728820800781, "logps/ref_rejected": -79.9320068359375, "logps/rejected": -74.91224670410156, "loss": 1.264, "margin_dpo/margin_mean": 1.1544888019561768, "margin_dpo/margin_std": 2.445178508758545, "step": 371 }, { "epoch": 0.562358276643991, "fcm_dpo/beta": 0.6568230390548706, "fcm_dpo/delta": 0.09663718938827515, "fcm_dpo/margin": 1.1454503536224365, "fcm_dpo/q_t": 0.38104021549224854, "grad_norm": 106.87626647949219, "learning_rate": 2.4074658027491044e-07, "logits/chosen": 1.9118516445159912, "logits/rejected": 1.7353770732879639, "logps/chosen": -63.38538360595703, "logps/ref_chosen": -69.97252655029297, "logps/ref_rejected": -92.38316345214844, "logps/rejected": -86.94148254394531, "loss": 1.2561, "margin_dpo/margin_mean": 1.1454503536224365, "margin_dpo/margin_std": 2.4081778526306152, "step": 372 }, { "epoch": 0.563869992441421, "fcm_dpo/beta": 0.6400260925292969, "fcm_dpo/delta": -0.09241245687007904, "fcm_dpo/margin": 1.4590966701507568, "fcm_dpo/q_t": 0.34233933687210083, "grad_norm": 100.00511932373047, "learning_rate": 2.394254027623792e-07, "logits/chosen": 1.9285614490509033, "logits/rejected": 1.7315351963043213, "logps/chosen": -73.30908203125, "logps/ref_chosen": -79.34700012207031, "logps/ref_rejected": -95.69737243652344, "logps/rejected": -91.1185531616211, "loss": 1.0186, "margin_dpo/margin_mean": 1.4590967893600464, "margin_dpo/margin_std": 2.2739272117614746, "step": 373 }, { "epoch": 0.5653817082388511, "fcm_dpo/beta": 0.6214843988418579, "fcm_dpo/delta": -0.17304188013076782, "fcm_dpo/margin": 1.6185259819030762, "fcm_dpo/q_t": 0.31608933210372925, "grad_norm": 96.7579116821289, "learning_rate": 2.381045210440644e-07, "logits/chosen": 1.5188226699829102, "logits/rejected": 1.4934524297714233, "logps/chosen": -87.31782531738281, "logps/ref_chosen": -93.45108032226562, "logps/ref_rejected": -93.575927734375, "logps/rejected": -89.06118774414062, "loss": 0.9094, "margin_dpo/margin_mean": 1.6185256242752075, "margin_dpo/margin_std": 1.9877994060516357, "step": 374 }, { "epoch": 0.5668934240362812, "fcm_dpo/beta": 0.6275245547294617, "fcm_dpo/delta": 0.1873345673084259, "fcm_dpo/margin": 1.0766301155090332, "fcm_dpo/q_t": 0.3831120431423187, "grad_norm": 96.12359619140625, "learning_rate": 2.3678397206786715e-07, "logits/chosen": 1.9656658172607422, "logits/rejected": 1.900733232498169, "logps/chosen": -70.84635925292969, "logps/ref_chosen": -77.37177276611328, "logps/ref_rejected": -98.59054565429688, "logps/rejected": -93.14176177978516, "loss": 1.161, "margin_dpo/margin_mean": 1.0766297578811646, "margin_dpo/margin_std": 2.177039623260498, "step": 375 }, { "epoch": 0.5684051398337112, "fcm_dpo/beta": 0.6014668941497803, "fcm_dpo/delta": -0.30763763189315796, "fcm_dpo/margin": 1.8556671142578125, "fcm_dpo/q_t": 0.3150825798511505, "grad_norm": 77.44905090332031, "learning_rate": 2.3546379277238103e-07, "logits/chosen": 2.103670597076416, "logits/rejected": 1.9150159358978271, "logps/chosen": -62.69090270996094, "logps/ref_chosen": -68.99790954589844, "logps/ref_rejected": -90.37117004394531, "logps/rejected": -85.91983032226562, "loss": 0.9165, "margin_dpo/margin_mean": 1.8556674718856812, "margin_dpo/margin_std": 2.43137264251709, "step": 376 }, { "epoch": 0.5699168556311414, "fcm_dpo/beta": 0.6137855052947998, "fcm_dpo/delta": 0.129922553896904, "fcm_dpo/margin": 1.187425136566162, "fcm_dpo/q_t": 0.3663738965988159, "grad_norm": 80.50379943847656, "learning_rate": 2.3414402008585886e-07, "logits/chosen": 1.9742162227630615, "logits/rejected": 1.9277293682098389, "logps/chosen": -58.0892333984375, "logps/ref_chosen": -64.22705841064453, "logps/ref_rejected": -73.10292053222656, "logps/rejected": -68.15251159667969, "loss": 1.1213, "margin_dpo/margin_mean": 1.187424898147583, "margin_dpo/margin_std": 2.1330418586730957, "step": 377 }, { "epoch": 0.5714285714285714, "fcm_dpo/beta": 0.6374775767326355, "fcm_dpo/delta": 0.13885389268398285, "fcm_dpo/margin": 1.1220755577087402, "fcm_dpo/q_t": 0.38182294368743896, "grad_norm": 104.7765884399414, "learning_rate": 2.3282469092517977e-07, "logits/chosen": 1.991929292678833, "logits/rejected": 1.9022181034088135, "logps/chosen": -70.78045654296875, "logps/ref_chosen": -76.90864562988281, "logps/ref_rejected": -90.53460693359375, "logps/rejected": -85.52848815917969, "loss": 1.1762, "margin_dpo/margin_mean": 1.1220749616622925, "margin_dpo/margin_std": 2.2846970558166504, "step": 378 }, { "epoch": 0.5729402872260015, "fcm_dpo/beta": 0.6344826817512512, "fcm_dpo/delta": -0.007661148905754089, "fcm_dpo/margin": 1.3503775596618652, "fcm_dpo/q_t": 0.3466755151748657, "grad_norm": 118.14442443847656, "learning_rate": 2.3150584219481643e-07, "logits/chosen": 2.016268730163574, "logits/rejected": 1.8680901527404785, "logps/chosen": -85.14906311035156, "logps/ref_chosen": -91.2371597290039, "logps/ref_rejected": -120.1969985961914, "logps/rejected": -115.45927429199219, "loss": 1.0249, "margin_dpo/margin_mean": 1.3503767251968384, "margin_dpo/margin_std": 2.0785036087036133, "step": 379 }, { "epoch": 0.5744520030234316, "fcm_dpo/beta": 0.6113913059234619, "fcm_dpo/delta": -0.24338015913963318, "fcm_dpo/margin": 1.7465507984161377, "fcm_dpo/q_t": 0.3018234372138977, "grad_norm": 84.41504669189453, "learning_rate": 2.3018751078580283e-07, "logits/chosen": 1.567660927772522, "logits/rejected": 1.5065686702728271, "logps/chosen": -71.13554382324219, "logps/ref_chosen": -77.78315734863281, "logps/ref_rejected": -92.56083679199219, "logps/rejected": -87.65977478027344, "loss": 0.965, "margin_dpo/margin_mean": 1.7465509176254272, "margin_dpo/margin_std": 2.397176504135132, "step": 380 }, { "epoch": 0.5759637188208617, "fcm_dpo/beta": 0.6341466903686523, "fcm_dpo/delta": 0.20742377638816833, "fcm_dpo/margin": 1.0140228271484375, "fcm_dpo/q_t": 0.38262784481048584, "grad_norm": 104.05586242675781, "learning_rate": 2.288697335747027e-07, "logits/chosen": 1.4323937892913818, "logits/rejected": 1.3942238092422485, "logps/chosen": -69.36187744140625, "logps/ref_chosen": -75.28189086914062, "logps/ref_rejected": -81.1995849609375, "logps/rejected": -76.29359436035156, "loss": 1.2221, "margin_dpo/margin_mean": 1.0140235424041748, "margin_dpo/margin_std": 2.141716480255127, "step": 381 }, { "epoch": 0.5774754346182918, "fcm_dpo/beta": 0.6523127555847168, "fcm_dpo/delta": 0.13858559727668762, "fcm_dpo/margin": 1.0922915935516357, "fcm_dpo/q_t": 0.38505297899246216, "grad_norm": 98.1589584350586, "learning_rate": 2.2755254742257706e-07, "logits/chosen": 1.8270576000213623, "logits/rejected": 1.7259621620178223, "logps/chosen": -72.89222717285156, "logps/ref_chosen": -78.74870300292969, "logps/ref_rejected": -99.77484130859375, "logps/rejected": -95.01066589355469, "loss": 1.1694, "margin_dpo/margin_mean": 1.0922926664352417, "margin_dpo/margin_std": 2.1799964904785156, "step": 382 }, { "epoch": 0.5789871504157218, "fcm_dpo/beta": 0.634296178817749, "fcm_dpo/delta": -0.08330284804105759, "fcm_dpo/margin": 1.458072543144226, "fcm_dpo/q_t": 0.3481723964214325, "grad_norm": 116.07969665527344, "learning_rate": 2.2623598917395436e-07, "logits/chosen": 1.3643097877502441, "logits/rejected": 1.4560441970825195, "logps/chosen": -89.80696105957031, "logps/ref_chosen": -95.92772674560547, "logps/ref_rejected": -92.13604736328125, "logps/rejected": -87.47335815429688, "loss": 1.0809, "margin_dpo/margin_mean": 1.4580726623535156, "margin_dpo/margin_std": 2.3793869018554688, "step": 383 }, { "epoch": 0.5804988662131519, "fcm_dpo/beta": 0.637192964553833, "fcm_dpo/delta": 0.027054572477936745, "fcm_dpo/margin": 1.29500412940979, "fcm_dpo/q_t": 0.34487384557724, "grad_norm": 106.3746566772461, "learning_rate": 2.2492009565579875e-07, "logits/chosen": 2.0678038597106934, "logits/rejected": 1.9981340169906616, "logps/chosen": -74.25601959228516, "logps/ref_chosen": -80.208984375, "logps/ref_rejected": -94.39380645751953, "logps/rejected": -89.73583984375, "loss": 1.0629, "margin_dpo/margin_mean": 1.29500412940979, "margin_dpo/margin_std": 2.108096122741699, "step": 384 }, { "epoch": 0.582010582010582, "fcm_dpo/beta": 0.6329531669616699, "fcm_dpo/delta": -0.12863296270370483, "fcm_dpo/margin": 1.5248796939849854, "fcm_dpo/q_t": 0.3282264471054077, "grad_norm": 97.46591186523438, "learning_rate": 2.2360490367648084e-07, "logits/chosen": 1.816765308380127, "logits/rejected": 1.7105906009674072, "logps/chosen": -79.43816375732422, "logps/ref_chosen": -85.26632690429688, "logps/ref_rejected": -102.1983413696289, "logps/rejected": -97.89505767822266, "loss": 0.9315, "margin_dpo/margin_mean": 1.524879813194275, "margin_dpo/margin_std": 1.9778671264648438, "step": 385 }, { "epoch": 0.5835222978080121, "fcm_dpo/beta": 0.6364574432373047, "fcm_dpo/delta": 0.14686298370361328, "fcm_dpo/margin": 1.1207685470581055, "fcm_dpo/q_t": 0.37132441997528076, "grad_norm": 108.806396484375, "learning_rate": 2.2229045002474724e-07, "logits/chosen": 1.6947441101074219, "logits/rejected": 1.5647040605545044, "logps/chosen": -87.65086364746094, "logps/ref_chosen": -93.19975280761719, "logps/ref_rejected": -112.98831176757812, "logps/rejected": -108.5601806640625, "loss": 1.1979, "margin_dpo/margin_mean": 1.1207683086395264, "margin_dpo/margin_std": 2.267601490020752, "step": 386 }, { "epoch": 0.5850340136054422, "fcm_dpo/beta": 0.6233910918235779, "fcm_dpo/delta": -0.21825829148292542, "fcm_dpo/margin": 1.677814245223999, "fcm_dpo/q_t": 0.3103215992450714, "grad_norm": 85.16645812988281, "learning_rate": 2.209767714686924e-07, "logits/chosen": 1.881676435470581, "logits/rejected": 1.70719575881958, "logps/chosen": -60.38775634765625, "logps/ref_chosen": -66.32861328125, "logps/ref_rejected": -100.56486511230469, "logps/rejected": -96.30183410644531, "loss": 0.8773, "margin_dpo/margin_mean": 1.6778154373168945, "margin_dpo/margin_std": 1.9726049900054932, "step": 387 }, { "epoch": 0.5865457294028723, "fcm_dpo/beta": 0.6180027723312378, "fcm_dpo/delta": 0.12911082804203033, "fcm_dpo/margin": 1.1801085472106934, "fcm_dpo/q_t": 0.3718109726905823, "grad_norm": 95.51586151123047, "learning_rate": 2.1966390475472954e-07, "logits/chosen": 1.846620798110962, "logits/rejected": 1.8400707244873047, "logps/chosen": -87.35832214355469, "logps/ref_chosen": -92.95967864990234, "logps/ref_rejected": -97.9437255859375, "logps/rejected": -93.52247619628906, "loss": 1.1316, "margin_dpo/margin_mean": 1.1801083087921143, "margin_dpo/margin_std": 2.105041027069092, "step": 388 }, { "epoch": 0.5880574452003023, "fcm_dpo/beta": 0.6405338048934937, "fcm_dpo/delta": 0.08333232998847961, "fcm_dpo/margin": 1.2051608562469482, "fcm_dpo/q_t": 0.3516117334365845, "grad_norm": 103.3174057006836, "learning_rate": 2.1835188660656265e-07, "logits/chosen": 1.9260954856872559, "logits/rejected": 1.8456639051437378, "logps/chosen": -71.16734313964844, "logps/ref_chosen": -76.89031982421875, "logps/ref_rejected": -93.79212951660156, "logps/rejected": -89.27430725097656, "loss": 1.1927, "margin_dpo/margin_mean": 1.205160140991211, "margin_dpo/margin_std": 2.2965409755706787, "step": 389 }, { "epoch": 0.5895691609977324, "fcm_dpo/beta": 0.645778238773346, "fcm_dpo/delta": 0.037246476858854294, "fcm_dpo/margin": 1.2624857425689697, "fcm_dpo/q_t": 0.3491223454475403, "grad_norm": 86.41116333007812, "learning_rate": 2.170407537241599e-07, "logits/chosen": 2.1113734245300293, "logits/rejected": 2.000617027282715, "logps/chosen": -54.835906982421875, "logps/ref_chosen": -61.058815002441406, "logps/ref_rejected": -79.55152893066406, "logps/rejected": -74.59110260009766, "loss": 1.0129, "margin_dpo/margin_mean": 1.2624856233596802, "margin_dpo/margin_std": 1.911329984664917, "step": 390 }, { "epoch": 0.5910808767951625, "fcm_dpo/beta": 0.6336863040924072, "fcm_dpo/delta": -0.15196503698825836, "fcm_dpo/margin": 1.5577452182769775, "fcm_dpo/q_t": 0.34712958335876465, "grad_norm": 101.08207702636719, "learning_rate": 2.1573054278272636e-07, "logits/chosen": 1.7356573343276978, "logits/rejected": 1.647139310836792, "logps/chosen": -72.69304656982422, "logps/ref_chosen": -78.60820770263672, "logps/ref_rejected": -103.3367691040039, "logps/rejected": -98.97935485839844, "loss": 1.1506, "margin_dpo/margin_mean": 1.5577449798583984, "margin_dpo/margin_std": 2.82521390914917, "step": 391 }, { "epoch": 0.5925925925925926, "fcm_dpo/beta": 0.600347638130188, "fcm_dpo/delta": -0.32079529762268066, "fcm_dpo/margin": 1.891295313835144, "fcm_dpo/q_t": 0.3097449541091919, "grad_norm": 92.39323425292969, "learning_rate": 2.1442129043167873e-07, "logits/chosen": 2.090341806411743, "logits/rejected": 2.024127721786499, "logps/chosen": -80.92561340332031, "logps/ref_chosen": -86.99468994140625, "logps/ref_rejected": -112.73616027832031, "logps/rejected": -108.55838012695312, "loss": 0.9083, "margin_dpo/margin_mean": 1.8912949562072754, "margin_dpo/margin_std": 2.522023916244507, "step": 392 }, { "epoch": 0.5941043083900227, "fcm_dpo/beta": 0.5811384916305542, "fcm_dpo/delta": 0.017529495060443878, "fcm_dpo/margin": 1.431158423423767, "fcm_dpo/q_t": 0.35059964656829834, "grad_norm": 86.41921997070312, "learning_rate": 2.131130332936195e-07, "logits/chosen": 1.6732672452926636, "logits/rejected": 1.5687006711959839, "logps/chosen": -65.68231964111328, "logps/ref_chosen": -71.26398468017578, "logps/ref_rejected": -88.99722290039062, "logps/rejected": -84.84672546386719, "loss": 0.9829, "margin_dpo/margin_mean": 1.4311583042144775, "margin_dpo/margin_std": 1.997040033340454, "step": 393 }, { "epoch": 0.5956160241874527, "fcm_dpo/beta": 0.5913628339767456, "fcm_dpo/delta": 0.04651292413473129, "fcm_dpo/margin": 1.3650890588760376, "fcm_dpo/q_t": 0.34188681840896606, "grad_norm": 94.80079650878906, "learning_rate": 2.1180580796331323e-07, "logits/chosen": 2.1540815830230713, "logits/rejected": 2.0499520301818848, "logps/chosen": -72.911865234375, "logps/ref_chosen": -78.70564270019531, "logps/ref_rejected": -87.01431274414062, "logps/rejected": -82.58561706542969, "loss": 0.9604, "margin_dpo/margin_mean": 1.3650896549224854, "margin_dpo/margin_std": 1.7961560487747192, "step": 394 }, { "epoch": 0.5971277399848829, "fcm_dpo/beta": 0.5942326784133911, "fcm_dpo/delta": -0.09017691761255264, "fcm_dpo/margin": 1.5636615753173828, "fcm_dpo/q_t": 0.33331990242004395, "grad_norm": 74.20651245117188, "learning_rate": 2.104996510066625e-07, "logits/chosen": 1.5473686456680298, "logits/rejected": 1.3798058032989502, "logps/chosen": -59.18064498901367, "logps/ref_chosen": -65.30274963378906, "logps/ref_rejected": -93.22492218017578, "logps/rejected": -88.6664810180664, "loss": 0.9713, "margin_dpo/margin_mean": 1.563661813735962, "margin_dpo/margin_std": 2.150235891342163, "step": 395 }, { "epoch": 0.5986394557823129, "fcm_dpo/beta": 0.5756855607032776, "fcm_dpo/delta": 0.042295172810554504, "fcm_dpo/margin": 1.3979148864746094, "fcm_dpo/q_t": 0.3453894853591919, "grad_norm": 85.51688385009766, "learning_rate": 2.0919459895968517e-07, "logits/chosen": 1.5687694549560547, "logits/rejected": 1.4305109977722168, "logps/chosen": -61.45856475830078, "logps/ref_chosen": -67.33502197265625, "logps/ref_rejected": -98.8193359375, "logps/rejected": -94.3407974243164, "loss": 1.0108, "margin_dpo/margin_mean": 1.397914171218872, "margin_dpo/margin_std": 1.9760756492614746, "step": 396 }, { "epoch": 0.600151171579743, "fcm_dpo/beta": 0.6103275418281555, "fcm_dpo/delta": 0.24452278017997742, "fcm_dpo/margin": 1.0165547132492065, "fcm_dpo/q_t": 0.37763512134552, "grad_norm": 118.63185119628906, "learning_rate": 2.078906883274924e-07, "logits/chosen": 1.754250168800354, "logits/rejected": 1.6709903478622437, "logps/chosen": -83.72361755371094, "logps/ref_chosen": -89.6042251586914, "logps/ref_rejected": -104.9779052734375, "logps/rejected": -100.11386108398438, "loss": 1.2895, "margin_dpo/margin_mean": 1.0165547132492065, "margin_dpo/margin_std": 2.3554749488830566, "step": 397 }, { "epoch": 0.6016628873771731, "fcm_dpo/beta": 0.599021315574646, "fcm_dpo/delta": -0.14220577478408813, "fcm_dpo/margin": 1.62839937210083, "fcm_dpo/q_t": 0.32787105441093445, "grad_norm": 79.75584411621094, "learning_rate": 2.065879555832674e-07, "logits/chosen": 1.8086758852005005, "logits/rejected": 1.6731503009796143, "logps/chosen": -60.36062240600586, "logps/ref_chosen": -66.43465423583984, "logps/ref_rejected": -90.90376281738281, "logps/rejected": -86.4581298828125, "loss": 0.9496, "margin_dpo/margin_mean": 1.6283986568450928, "margin_dpo/margin_std": 2.1961557865142822, "step": 398 }, { "epoch": 0.6031746031746031, "fcm_dpo/beta": 0.599816083908081, "fcm_dpo/delta": 0.027060478925704956, "fcm_dpo/margin": 1.375216007232666, "fcm_dpo/q_t": 0.35677099227905273, "grad_norm": 100.44293975830078, "learning_rate": 2.052864371672457e-07, "logits/chosen": 1.86820650100708, "logits/rejected": 1.6108736991882324, "logps/chosen": -81.86981964111328, "logps/ref_chosen": -87.22315979003906, "logps/ref_rejected": -136.32411193847656, "logps/rejected": -132.34597778320312, "loss": 1.0919, "margin_dpo/margin_mean": 1.3752162456512451, "margin_dpo/margin_std": 2.36777663230896, "step": 399 }, { "epoch": 0.6046863189720333, "fcm_dpo/beta": 0.6271291971206665, "fcm_dpo/delta": 0.27985769510269165, "fcm_dpo/margin": 0.9363595247268677, "fcm_dpo/q_t": 0.39870405197143555, "grad_norm": 111.8140869140625, "learning_rate": 2.0398616948569493e-07, "logits/chosen": 2.029981851577759, "logits/rejected": 1.8537551164627075, "logps/chosen": -85.73567199707031, "logps/ref_chosen": -91.1212158203125, "logps/ref_rejected": -108.19235229492188, "logps/rejected": -103.7431640625, "loss": 1.1945, "margin_dpo/margin_mean": 0.9363600015640259, "margin_dpo/margin_std": 1.9859216213226318, "step": 400 }, { "epoch": 0.6046863189720333, "eval_fcm_dpo/beta": 0.6381731033325195, "eval_logits/chosen": 1.927345633506775, "eval_logits/rejected": 1.811177372932434, "eval_logps/chosen": -81.09713745117188, "eval_logps/ref_chosen": -86.90177917480469, "eval_logps/ref_rejected": -96.69639587402344, "eval_logps/rejected": -92.2294692993164, "eval_loss": 0.5504243969917297, "eval_margin_dpo/margin_mean": 1.337730884552002, "eval_margin_dpo/margin_std": 2.288548707962036, "eval_runtime": 42.2981, "eval_samples_per_second": 54.447, "eval_steps_per_second": 1.702, "step": 400 }, { "epoch": 0.6061980347694633, "fcm_dpo/beta": 0.6117605566978455, "fcm_dpo/delta": -0.28475117683410645, "fcm_dpo/margin": 1.8033357858657837, "fcm_dpo/q_t": 0.30270397663116455, "grad_norm": 74.6077651977539, "learning_rate": 2.0268718890989752e-07, "logits/chosen": 1.9857186079025269, "logits/rejected": 1.8438901901245117, "logps/chosen": -61.3988037109375, "logps/ref_chosen": -67.54151153564453, "logps/ref_rejected": -98.06488800048828, "logps/rejected": -93.72550964355469, "loss": 0.8707, "margin_dpo/margin_mean": 1.803335428237915, "margin_dpo/margin_std": 2.163586378097534, "step": 401 }, { "epoch": 0.6077097505668935, "fcm_dpo/beta": 0.5940630435943604, "fcm_dpo/delta": -0.0942755714058876, "fcm_dpo/margin": 1.5745465755462646, "fcm_dpo/q_t": 0.33752086758613586, "grad_norm": 84.7685317993164, "learning_rate": 2.013895317751323e-07, "logits/chosen": 1.5190571546554565, "logits/rejected": 1.489319086074829, "logps/chosen": -71.37261962890625, "logps/ref_chosen": -77.44487762451172, "logps/ref_rejected": -83.1333236694336, "logps/rejected": -78.63561248779297, "loss": 0.9789, "margin_dpo/margin_mean": 1.5745457410812378, "margin_dpo/margin_std": 2.261594533920288, "step": 402 }, { "epoch": 0.6092214663643235, "fcm_dpo/beta": 0.5662052035331726, "fcm_dpo/delta": -0.3262058198451996, "fcm_dpo/margin": 2.0131633281707764, "fcm_dpo/q_t": 0.3089754581451416, "grad_norm": 75.9161148071289, "learning_rate": 2.0009323437965898e-07, "logits/chosen": 2.0743629932403564, "logits/rejected": 1.8951172828674316, "logps/chosen": -62.74315643310547, "logps/ref_chosen": -68.8230972290039, "logps/ref_rejected": -99.82356262207031, "logps/rejected": -95.75678253173828, "loss": 0.9734, "margin_dpo/margin_mean": 2.013162851333618, "margin_dpo/margin_std": 2.808767795562744, "step": 403 }, { "epoch": 0.6107331821617535, "fcm_dpo/beta": 0.5434421300888062, "fcm_dpo/delta": -0.018316656351089478, "fcm_dpo/margin": 1.5841295719146729, "fcm_dpo/q_t": 0.3353651165962219, "grad_norm": 85.1689224243164, "learning_rate": 1.9879833298370237e-07, "logits/chosen": 1.5624295473098755, "logits/rejected": 1.443617582321167, "logps/chosen": -74.19883728027344, "logps/ref_chosen": -80.26783752441406, "logps/ref_rejected": -111.60258483886719, "logps/rejected": -107.11771392822266, "loss": 0.957, "margin_dpo/margin_mean": 1.5841295719146729, "margin_dpo/margin_std": 2.007789134979248, "step": 404 }, { "epoch": 0.6122448979591837, "fcm_dpo/beta": 0.5555033683776855, "fcm_dpo/delta": 0.15998849272727966, "fcm_dpo/margin": 1.2557036876678467, "fcm_dpo/q_t": 0.37533363699913025, "grad_norm": 75.46453094482422, "learning_rate": 1.975048638084379e-07, "logits/chosen": 1.792256236076355, "logits/rejected": 1.6907178163528442, "logps/chosen": -62.33729934692383, "logps/ref_chosen": -68.31065368652344, "logps/ref_rejected": -81.56044006347656, "logps/rejected": -76.84278106689453, "loss": 1.0891, "margin_dpo/margin_mean": 1.2557039260864258, "margin_dpo/margin_std": 2.046755790710449, "step": 405 }, { "epoch": 0.6137566137566137, "fcm_dpo/beta": 0.567691445350647, "fcm_dpo/delta": -0.02151249535381794, "fcm_dpo/margin": 1.5318082571029663, "fcm_dpo/q_t": 0.3437275290489197, "grad_norm": 85.55471801757812, "learning_rate": 1.9621286303497914e-07, "logits/chosen": 1.7501626014709473, "logits/rejected": 1.4838974475860596, "logps/chosen": -58.76299285888672, "logps/ref_chosen": -64.86714935302734, "logps/ref_rejected": -110.06051635742188, "logps/rejected": -105.48816680908203, "loss": 0.9992, "margin_dpo/margin_mean": 1.531808853149414, "margin_dpo/margin_std": 2.274860382080078, "step": 406 }, { "epoch": 0.6152683295540439, "fcm_dpo/beta": 0.5869194269180298, "fcm_dpo/delta": 0.14187076687812805, "fcm_dpo/margin": 1.2160757780075073, "fcm_dpo/q_t": 0.3661472797393799, "grad_norm": 120.85926055908203, "learning_rate": 1.9492236680336483e-07, "logits/chosen": 1.4653615951538086, "logits/rejected": 1.2881108522415161, "logps/chosen": -96.7445068359375, "logps/ref_chosen": -102.01712799072266, "logps/ref_rejected": -121.53548431396484, "logps/rejected": -117.47894287109375, "loss": 1.057, "margin_dpo/margin_mean": 1.2160767316818237, "margin_dpo/margin_std": 1.9763216972351074, "step": 407 }, { "epoch": 0.6167800453514739, "fcm_dpo/beta": 0.5652141571044922, "fcm_dpo/delta": -0.28196677565574646, "fcm_dpo/margin": 1.94913649559021, "fcm_dpo/q_t": 0.2972300052642822, "grad_norm": 65.2387466430664, "learning_rate": 1.9363341121154895e-07, "logits/chosen": 1.9260717630386353, "logits/rejected": 1.76041579246521, "logps/chosen": -66.63929748535156, "logps/ref_chosen": -72.77989959716797, "logps/ref_rejected": -92.01815795898438, "logps/rejected": -87.82669830322266, "loss": 0.8207, "margin_dpo/margin_mean": 1.949136734008789, "margin_dpo/margin_std": 2.074869155883789, "step": 408 }, { "epoch": 0.618291761148904, "fcm_dpo/beta": 0.5699707865715027, "fcm_dpo/delta": 0.2298847734928131, "fcm_dpo/margin": 1.1142563819885254, "fcm_dpo/q_t": 0.3908570408821106, "grad_norm": 83.33867645263672, "learning_rate": 1.9234603231438994e-07, "logits/chosen": 1.7916343212127686, "logits/rejected": 1.7911202907562256, "logps/chosen": -72.04692077636719, "logps/ref_chosen": -77.7901611328125, "logps/ref_rejected": -79.2997055053711, "logps/rejected": -74.67072296142578, "loss": 1.1559, "margin_dpo/margin_mean": 1.1142561435699463, "margin_dpo/margin_std": 2.188920259475708, "step": 409 }, { "epoch": 0.6198034769463341, "fcm_dpo/beta": 0.576758861541748, "fcm_dpo/delta": 0.06280102580785751, "fcm_dpo/margin": 1.3701996803283691, "fcm_dpo/q_t": 0.35670357942581177, "grad_norm": 94.09947967529297, "learning_rate": 1.9106026612264315e-07, "logits/chosen": 1.590678095817566, "logits/rejected": 1.514933705329895, "logps/chosen": -74.49111938476562, "logps/ref_chosen": -80.35844421386719, "logps/ref_rejected": -92.19056701660156, "logps/rejected": -87.69343566894531, "loss": 1.0963, "margin_dpo/margin_mean": 1.3701996803283691, "margin_dpo/margin_std": 2.2878055572509766, "step": 410 }, { "epoch": 0.6213151927437641, "fcm_dpo/beta": 0.5804015398025513, "fcm_dpo/delta": -0.0807589739561081, "fcm_dpo/margin": 1.5908392667770386, "fcm_dpo/q_t": 0.3370782732963562, "grad_norm": 97.5421371459961, "learning_rate": 1.8977614860195296e-07, "logits/chosen": 1.5710428953170776, "logits/rejected": 1.4811426401138306, "logps/chosen": -65.04608917236328, "logps/ref_chosen": -70.72857666015625, "logps/ref_rejected": -93.19204711914062, "logps/rejected": -89.10040283203125, "loss": 1.0582, "margin_dpo/margin_mean": 1.5908393859863281, "margin_dpo/margin_std": 2.534249782562256, "step": 411 }, { "epoch": 0.6228269085411943, "fcm_dpo/beta": 0.5793402194976807, "fcm_dpo/delta": 0.09534160792827606, "fcm_dpo/margin": 1.3141306638717651, "fcm_dpo/q_t": 0.36529815196990967, "grad_norm": 94.25563049316406, "learning_rate": 1.8849371567184662e-07, "logits/chosen": 1.7595200538635254, "logits/rejected": 1.668968915939331, "logps/chosen": -67.35857391357422, "logps/ref_chosen": -72.87568664550781, "logps/ref_rejected": -88.21068572998047, "logps/rejected": -84.00770568847656, "loss": 1.0512, "margin_dpo/margin_mean": 1.3141303062438965, "margin_dpo/margin_std": 2.1212642192840576, "step": 412 }, { "epoch": 0.6243386243386243, "fcm_dpo/beta": 0.587976336479187, "fcm_dpo/delta": 0.01775454543530941, "fcm_dpo/margin": 1.4179542064666748, "fcm_dpo/q_t": 0.3521287739276886, "grad_norm": 90.87653350830078, "learning_rate": 1.872130032047302e-07, "logits/chosen": 1.2563691139221191, "logits/rejected": 1.169920802116394, "logps/chosen": -79.08854675292969, "logps/ref_chosen": -84.70051574707031, "logps/ref_rejected": -92.06742095947266, "logps/rejected": -87.8734130859375, "loss": 1.063, "margin_dpo/margin_mean": 1.4179542064666748, "margin_dpo/margin_std": 2.338256359100342, "step": 413 }, { "epoch": 0.6258503401360545, "fcm_dpo/beta": 0.5731327533721924, "fcm_dpo/delta": -0.11875976622104645, "fcm_dpo/margin": 1.6653555631637573, "fcm_dpo/q_t": 0.32663214206695557, "grad_norm": 78.96257781982422, "learning_rate": 1.8593404702488436e-07, "logits/chosen": 1.8244965076446533, "logits/rejected": 1.717824935913086, "logps/chosen": -65.36703491210938, "logps/ref_chosen": -70.97660827636719, "logps/ref_rejected": -92.90523529052734, "logps/rejected": -88.96101379394531, "loss": 0.9509, "margin_dpo/margin_mean": 1.6653554439544678, "margin_dpo/margin_std": 2.187335252761841, "step": 414 }, { "epoch": 0.6273620559334845, "fcm_dpo/beta": 0.5758957862854004, "fcm_dpo/delta": 0.007150387391448021, "fcm_dpo/margin": 1.4645922183990479, "fcm_dpo/q_t": 0.3530283570289612, "grad_norm": 84.59825134277344, "learning_rate": 1.846568829074628e-07, "logits/chosen": 1.6455614566802979, "logits/rejected": 1.566821575164795, "logps/chosen": -66.1544189453125, "logps/ref_chosen": -71.7189712524414, "logps/ref_rejected": -74.54219818115234, "logps/rejected": -70.44224548339844, "loss": 1.1014, "margin_dpo/margin_mean": 1.4645925760269165, "margin_dpo/margin_std": 2.4894518852233887, "step": 415 }, { "epoch": 0.6288737717309146, "fcm_dpo/beta": 0.5732775926589966, "fcm_dpo/delta": -0.049004100263118744, "fcm_dpo/margin": 1.0549395084381104, "fcm_dpo/q_t": 0.3943997621536255, "grad_norm": 120.06104278564453, "learning_rate": 1.8338154657749128e-07, "logits/chosen": 1.7915780544281006, "logits/rejected": 1.6690804958343506, "logps/chosen": -67.31856536865234, "logps/ref_chosen": -72.88249206542969, "logps/ref_rejected": -85.30693054199219, "logps/rejected": -80.79793548583984, "loss": 1.2072, "margin_dpo/margin_mean": 1.0549399852752686, "margin_dpo/margin_std": 2.200162410736084, "step": 416 }, { "epoch": 0.6303854875283447, "fcm_dpo/beta": 0.5584331750869751, "fcm_dpo/delta": -0.2724847197532654, "fcm_dpo/margin": 1.9535236358642578, "fcm_dpo/q_t": 0.3041985034942627, "grad_norm": 77.82061004638672, "learning_rate": 1.8210807370886849e-07, "logits/chosen": 1.860659122467041, "logits/rejected": 1.7060253620147705, "logps/chosen": -66.84624481201172, "logps/ref_chosen": -72.49703216552734, "logps/ref_rejected": -89.38966369628906, "logps/rejected": -85.69239807128906, "loss": 0.8895, "margin_dpo/margin_mean": 1.9535231590270996, "margin_dpo/margin_std": 2.344741106033325, "step": 417 }, { "epoch": 0.6318972033257747, "fcm_dpo/beta": 0.5670143961906433, "fcm_dpo/delta": 0.3630419969558716, "fcm_dpo/margin": 0.8936295509338379, "fcm_dpo/q_t": 0.4141634702682495, "grad_norm": 104.79427337646484, "learning_rate": 1.8083649992336825e-07, "logits/chosen": 1.8443524837493896, "logits/rejected": 1.8251988887786865, "logps/chosen": -84.56147766113281, "logps/ref_chosen": -89.70926666259766, "logps/ref_rejected": -90.98756408691406, "logps/rejected": -86.73341369628906, "loss": 1.243, "margin_dpo/margin_mean": 0.8936293125152588, "margin_dpo/margin_std": 2.1351263523101807, "step": 418 }, { "epoch": 0.6334089191232048, "fcm_dpo/beta": 0.5673000812530518, "fcm_dpo/delta": -0.13866420090198517, "fcm_dpo/margin": 1.717864751815796, "fcm_dpo/q_t": 0.31805652379989624, "grad_norm": 73.9402847290039, "learning_rate": 1.7956686078964255e-07, "logits/chosen": 1.4547133445739746, "logits/rejected": 1.3302205801010132, "logps/chosen": -69.59623718261719, "logps/ref_chosen": -75.652099609375, "logps/ref_rejected": -91.0013427734375, "logps/rejected": -86.6633529663086, "loss": 0.8888, "margin_dpo/margin_mean": 1.717864751815796, "margin_dpo/margin_std": 2.056272029876709, "step": 419 }, { "epoch": 0.6349206349206349, "fcm_dpo/beta": 0.570350706577301, "fcm_dpo/delta": 0.046944353729486465, "fcm_dpo/margin": 1.4142495393753052, "fcm_dpo/q_t": 0.3754135072231293, "grad_norm": 88.30995178222656, "learning_rate": 1.782991918222275e-07, "logits/chosen": 1.5207473039627075, "logits/rejected": 1.4120867252349854, "logps/chosen": -67.18255615234375, "logps/ref_chosen": -72.58027648925781, "logps/ref_rejected": -79.90303802490234, "logps/rejected": -75.91956329345703, "loss": 1.1411, "margin_dpo/margin_mean": 1.414249300956726, "margin_dpo/margin_std": 2.669312000274658, "step": 420 }, { "epoch": 0.636432350718065, "fcm_dpo/beta": 0.5930507779121399, "fcm_dpo/delta": 0.228386789560318, "fcm_dpo/margin": 1.0698721408843994, "fcm_dpo/q_t": 0.3951471447944641, "grad_norm": 98.86088562011719, "learning_rate": 1.7703352848054887e-07, "logits/chosen": 1.5929409265518188, "logits/rejected": 1.374795913696289, "logps/chosen": -73.23966979980469, "logps/ref_chosen": -78.71546936035156, "logps/ref_rejected": -90.82321166992188, "logps/rejected": -86.41729736328125, "loss": 1.3057, "margin_dpo/margin_mean": 1.0698716640472412, "margin_dpo/margin_std": 2.6327433586120605, "step": 421 }, { "epoch": 0.6379440665154951, "fcm_dpo/beta": 0.5989946126937866, "fcm_dpo/delta": -0.007140956819057465, "fcm_dpo/margin": 1.429376244544983, "fcm_dpo/q_t": 0.3605658710002899, "grad_norm": 107.47647094726562, "learning_rate": 1.7576990616793137e-07, "logits/chosen": 1.7196696996688843, "logits/rejected": 1.6793361902236938, "logps/chosen": -80.95000457763672, "logps/ref_chosen": -86.74519348144531, "logps/ref_rejected": -94.02015686035156, "logps/rejected": -89.65435028076172, "loss": 1.1255, "margin_dpo/margin_mean": 1.4293758869171143, "margin_dpo/margin_std": 2.6032376289367676, "step": 422 }, { "epoch": 0.6394557823129252, "fcm_dpo/beta": 0.5808249711990356, "fcm_dpo/delta": -0.1427406370639801, "fcm_dpo/margin": 1.683246374130249, "fcm_dpo/q_t": 0.3259110450744629, "grad_norm": 79.15151977539062, "learning_rate": 1.745083602306071e-07, "logits/chosen": 1.875573992729187, "logits/rejected": 1.7012746334075928, "logps/chosen": -66.27205657958984, "logps/ref_chosen": -72.02232360839844, "logps/ref_rejected": -93.26976776123047, "logps/rejected": -89.2027587890625, "loss": 0.922, "margin_dpo/margin_mean": 1.6832462549209595, "margin_dpo/margin_std": 2.1930503845214844, "step": 423 }, { "epoch": 0.6409674981103552, "fcm_dpo/beta": 0.572269856929779, "fcm_dpo/delta": -0.10694173723459244, "fcm_dpo/margin": 1.6546802520751953, "fcm_dpo/q_t": 0.32539302110671997, "grad_norm": 88.41535949707031, "learning_rate": 1.7324892595672804e-07, "logits/chosen": 1.6597294807434082, "logits/rejected": 1.6074860095977783, "logps/chosen": -62.50016784667969, "logps/ref_chosen": -68.22148132324219, "logps/ref_rejected": -94.12411499023438, "logps/rejected": -90.05748748779297, "loss": 0.9463, "margin_dpo/margin_mean": 1.6546807289123535, "margin_dpo/margin_std": 2.1842234134674072, "step": 424 }, { "epoch": 0.6424792139077853, "fcm_dpo/beta": 0.5608553886413574, "fcm_dpo/delta": -0.1236172616481781, "fcm_dpo/margin": 1.7144718170166016, "fcm_dpo/q_t": 0.3162755072116852, "grad_norm": 73.8619384765625, "learning_rate": 1.7199163857537824e-07, "logits/chosen": 1.8607254028320312, "logits/rejected": 1.7949458360671997, "logps/chosen": -70.13557434082031, "logps/ref_chosen": -75.90104675292969, "logps/ref_rejected": -86.08673095703125, "logps/rejected": -82.03573608398438, "loss": 0.9, "margin_dpo/margin_mean": 1.7144721746444702, "margin_dpo/margin_std": 2.046931743621826, "step": 425 }, { "epoch": 0.6439909297052154, "fcm_dpo/beta": 0.5839896202087402, "fcm_dpo/delta": 0.410540372133255, "fcm_dpo/margin": 0.7879926562309265, "fcm_dpo/q_t": 0.4247323274612427, "grad_norm": 108.58043670654297, "learning_rate": 1.7073653325558828e-07, "logits/chosen": 1.8453181982040405, "logits/rejected": 1.8402283191680908, "logps/chosen": -84.62348937988281, "logps/ref_chosen": -89.93118286132812, "logps/ref_rejected": -91.04658508300781, "logps/rejected": -86.52688598632812, "loss": 1.4298, "margin_dpo/margin_mean": 0.787992000579834, "margin_dpo/margin_std": 2.581155300140381, "step": 426 }, { "epoch": 0.6455026455026455, "fcm_dpo/beta": 0.600010871887207, "fcm_dpo/delta": -0.029273666441440582, "fcm_dpo/margin": 1.4609463214874268, "fcm_dpo/q_t": 0.3494294285774231, "grad_norm": 93.7463607788086, "learning_rate": 1.6948364510535218e-07, "logits/chosen": 2.0117008686065674, "logits/rejected": 1.8596858978271484, "logps/chosen": -72.434814453125, "logps/ref_chosen": -77.83393859863281, "logps/ref_rejected": -98.69864654541016, "logps/rejected": -94.76046752929688, "loss": 1.023, "margin_dpo/margin_mean": 1.4609463214874268, "margin_dpo/margin_std": 2.3029470443725586, "step": 427 }, { "epoch": 0.6470143613000756, "fcm_dpo/beta": 0.5907303094863892, "fcm_dpo/delta": -0.07789819687604904, "fcm_dpo/margin": 1.5586960315704346, "fcm_dpo/q_t": 0.35224780440330505, "grad_norm": 110.9155502319336, "learning_rate": 1.6823300917064458e-07, "logits/chosen": 1.7061760425567627, "logits/rejected": 1.6600472927093506, "logps/chosen": -84.93183898925781, "logps/ref_chosen": -90.3450927734375, "logps/ref_rejected": -100.24185180664062, "logps/rejected": -96.38729858398438, "loss": 1.0751, "margin_dpo/margin_mean": 1.5586960315704346, "margin_dpo/margin_std": 2.5471014976501465, "step": 428 }, { "epoch": 0.6485260770975056, "fcm_dpo/beta": 0.5782663226127625, "fcm_dpo/delta": -0.12282080948352814, "fcm_dpo/margin": 1.6621170043945312, "fcm_dpo/q_t": 0.32879000902175903, "grad_norm": 88.22840118408203, "learning_rate": 1.669846604344412e-07, "logits/chosen": 1.6005504131317139, "logits/rejected": 1.6149730682373047, "logps/chosen": -72.61544799804688, "logps/ref_chosen": -78.24811553955078, "logps/ref_rejected": -75.24495697021484, "logps/rejected": -71.27439880371094, "loss": 1.0433, "margin_dpo/margin_mean": 1.6621167659759521, "margin_dpo/margin_std": 2.5759923458099365, "step": 429 }, { "epoch": 0.6500377928949358, "fcm_dpo/beta": 0.5593866109848022, "fcm_dpo/delta": -0.13159701228141785, "fcm_dpo/margin": 1.7297087907791138, "fcm_dpo/q_t": 0.31945735216140747, "grad_norm": 73.9142837524414, "learning_rate": 1.6573863381573954e-07, "logits/chosen": 1.5620594024658203, "logits/rejected": 1.5809931755065918, "logps/chosen": -70.3321533203125, "logps/ref_chosen": -76.08027648925781, "logps/ref_rejected": -84.09554290771484, "logps/rejected": -80.0771255493164, "loss": 0.9161, "margin_dpo/margin_mean": 1.7297089099884033, "margin_dpo/margin_std": 2.1894445419311523, "step": 430 }, { "epoch": 0.6515495086923658, "fcm_dpo/beta": 0.560405969619751, "fcm_dpo/delta": 0.0203985795378685, "fcm_dpo/margin": 1.4833824634552002, "fcm_dpo/q_t": 0.34995564818382263, "grad_norm": 82.28486633300781, "learning_rate": 1.6449496416858282e-07, "logits/chosen": 1.4608607292175293, "logits/rejected": 1.3733350038528442, "logps/chosen": -61.16302490234375, "logps/ref_chosen": -66.88581085205078, "logps/ref_rejected": -89.56040954589844, "logps/rejected": -85.32099914550781, "loss": 1.0695, "margin_dpo/margin_mean": 1.483382225036621, "margin_dpo/margin_std": 2.421769142150879, "step": 431 }, { "epoch": 0.6530612244897959, "fcm_dpo/beta": 0.5456819534301758, "fcm_dpo/delta": -0.1097467765212059, "fcm_dpo/margin": 1.7330988645553589, "fcm_dpo/q_t": 0.3340963125228882, "grad_norm": 82.00106048583984, "learning_rate": 1.632536862810844e-07, "logits/chosen": 1.8679120540618896, "logits/rejected": 1.7690483331680298, "logps/chosen": -73.94156646728516, "logps/ref_chosen": -79.65066528320312, "logps/ref_rejected": -103.92634582519531, "logps/rejected": -99.95034790039062, "loss": 0.9746, "margin_dpo/margin_mean": 1.7330987453460693, "margin_dpo/margin_std": 2.4346117973327637, "step": 432 }, { "epoch": 0.654572940287226, "fcm_dpo/beta": 0.5389462113380432, "fcm_dpo/delta": -0.1568116843700409, "fcm_dpo/margin": 1.8397012948989868, "fcm_dpo/q_t": 0.3335553705692291, "grad_norm": 70.97694396972656, "learning_rate": 1.6201483487445515e-07, "logits/chosen": 2.0188956260681152, "logits/rejected": 1.9827733039855957, "logps/chosen": -71.76106262207031, "logps/ref_chosen": -77.30774688720703, "logps/ref_rejected": -81.65180206298828, "logps/rejected": -77.94482421875, "loss": 1.0222, "margin_dpo/margin_mean": 1.8397008180618286, "margin_dpo/margin_std": 2.890751361846924, "step": 433 }, { "epoch": 0.656084656084656, "fcm_dpo/beta": 0.5215494632720947, "fcm_dpo/delta": -0.03774160146713257, "fcm_dpo/margin": 1.6875739097595215, "fcm_dpo/q_t": 0.33912625908851624, "grad_norm": 71.490966796875, "learning_rate": 1.6077844460203204e-07, "logits/chosen": 1.885265827178955, "logits/rejected": 1.756982445716858, "logps/chosen": -57.4691047668457, "logps/ref_chosen": -63.31850051879883, "logps/ref_rejected": -89.15093994140625, "logps/rejected": -84.98912048339844, "loss": 1.0276, "margin_dpo/margin_mean": 1.6875743865966797, "margin_dpo/margin_std": 2.495243549346924, "step": 434 }, { "epoch": 0.6575963718820862, "fcm_dpo/beta": 0.5468607544898987, "fcm_dpo/delta": 0.1618640273809433, "fcm_dpo/margin": 1.271209478378296, "fcm_dpo/q_t": 0.3671290874481201, "grad_norm": 78.38941192626953, "learning_rate": 1.5954455004830878e-07, "logits/chosen": 2.278850555419922, "logits/rejected": 2.2084856033325195, "logps/chosen": -65.71345520019531, "logps/ref_chosen": -71.1719741821289, "logps/ref_rejected": -86.42095184326172, "logps/rejected": -82.233642578125, "loss": 1.0575, "margin_dpo/margin_mean": 1.271209955215454, "margin_dpo/margin_std": 1.9971004724502563, "step": 435 }, { "epoch": 0.6591080876795162, "fcm_dpo/beta": 0.5605753064155579, "fcm_dpo/delta": 0.1037134975194931, "fcm_dpo/margin": 1.3377704620361328, "fcm_dpo/q_t": 0.35941988229751587, "grad_norm": 81.34796905517578, "learning_rate": 1.5831318572796847e-07, "logits/chosen": 1.5238187313079834, "logits/rejected": 1.4155125617980957, "logps/chosen": -68.72625732421875, "logps/ref_chosen": -74.45087432861328, "logps/ref_rejected": -86.01708984375, "logps/rejected": -81.63023376464844, "loss": 1.0929, "margin_dpo/margin_mean": 1.3377702236175537, "margin_dpo/margin_std": 2.242361545562744, "step": 436 }, { "epoch": 0.6606198034769464, "fcm_dpo/beta": 0.544946014881134, "fcm_dpo/delta": -0.01443202793598175, "fcm_dpo/margin": 1.570465087890625, "fcm_dpo/q_t": 0.3623065948486328, "grad_norm": 87.133544921875, "learning_rate": 1.5708438608491815e-07, "logits/chosen": 1.8089007139205933, "logits/rejected": 1.5546307563781738, "logps/chosen": -66.98934936523438, "logps/ref_chosen": -72.38907623291016, "logps/ref_rejected": -111.03279876708984, "logps/rejected": -107.20354461669922, "loss": 1.115, "margin_dpo/margin_mean": 1.5704649686813354, "margin_dpo/margin_std": 2.739298105239868, "step": 437 }, { "epoch": 0.6621315192743764, "fcm_dpo/beta": 0.5507192015647888, "fcm_dpo/delta": -0.16311952471733093, "fcm_dpo/margin": 1.8056895732879639, "fcm_dpo/q_t": 0.3250262141227722, "grad_norm": 66.01669311523438, "learning_rate": 1.558581854913253e-07, "logits/chosen": 1.564880609512329, "logits/rejected": 1.4786386489868164, "logps/chosen": -51.56304168701172, "logps/ref_chosen": -57.27682876586914, "logps/ref_rejected": -83.07940673828125, "logps/rejected": -79.17129516601562, "loss": 0.9502, "margin_dpo/margin_mean": 1.8056901693344116, "margin_dpo/margin_std": 2.3092832565307617, "step": 438 }, { "epoch": 0.6636432350718064, "fcm_dpo/beta": 0.5346947908401489, "fcm_dpo/delta": -0.10351482778787613, "fcm_dpo/margin": 1.763001799583435, "fcm_dpo/q_t": 0.32669079303741455, "grad_norm": 84.674072265625, "learning_rate": 1.5463461824665658e-07, "logits/chosen": 1.6811615228652954, "logits/rejected": 1.588653564453125, "logps/chosen": -92.66934967041016, "logps/ref_chosen": -98.35890197753906, "logps/ref_rejected": -112.69817352294922, "logps/rejected": -108.77161407470703, "loss": 0.9337, "margin_dpo/margin_mean": 1.7630020380020142, "margin_dpo/margin_std": 2.2514312267303467, "step": 439 }, { "epoch": 0.6651549508692366, "fcm_dpo/beta": 0.5049252510070801, "fcm_dpo/delta": -0.28788667917251587, "fcm_dpo/margin": 2.1909289360046387, "fcm_dpo/q_t": 0.2954619824886322, "grad_norm": 64.66181945800781, "learning_rate": 1.534137185767178e-07, "logits/chosen": 1.518364667892456, "logits/rejected": 1.2891108989715576, "logps/chosen": -55.59807205200195, "logps/ref_chosen": -61.662452697753906, "logps/ref_rejected": -86.81646728515625, "logps/rejected": -82.9430160522461, "loss": 0.8081, "margin_dpo/margin_mean": 2.1909286975860596, "margin_dpo/margin_std": 2.2650413513183594, "step": 440 }, { "epoch": 0.6666666666666666, "fcm_dpo/beta": 0.5017116069793701, "fcm_dpo/delta": 0.10611478239297867, "fcm_dpo/margin": 1.4984135627746582, "fcm_dpo/q_t": 0.35643941164016724, "grad_norm": 76.86129760742188, "learning_rate": 1.521955206326976e-07, "logits/chosen": 1.6022846698760986, "logits/rejected": 1.3970829248428345, "logps/chosen": -68.37437438964844, "logps/ref_chosen": -74.33235168457031, "logps/ref_rejected": -99.654541015625, "logps/rejected": -95.19499206542969, "loss": 1.0114, "margin_dpo/margin_mean": 1.4984139204025269, "margin_dpo/margin_std": 2.176797389984131, "step": 441 }, { "epoch": 0.6681783824640968, "fcm_dpo/beta": 0.5159778594970703, "fcm_dpo/delta": 0.11021871864795685, "fcm_dpo/margin": 1.4496480226516724, "fcm_dpo/q_t": 0.35975363850593567, "grad_norm": 85.56470489501953, "learning_rate": 1.5098005849021078e-07, "logits/chosen": 1.7873557806015015, "logits/rejected": 1.7346773147583008, "logps/chosen": -77.30924987792969, "logps/ref_chosen": -82.42591857910156, "logps/ref_rejected": -106.71090698242188, "logps/rejected": -103.04387664794922, "loss": 1.0487, "margin_dpo/margin_mean": 1.4496479034423828, "margin_dpo/margin_std": 2.3134610652923584, "step": 442 }, { "epoch": 0.6696900982615268, "fcm_dpo/beta": 0.5047956109046936, "fcm_dpo/delta": -0.1580228954553604, "fcm_dpo/margin": 1.9641375541687012, "fcm_dpo/q_t": 0.3337998390197754, "grad_norm": 69.60123443603516, "learning_rate": 1.4976736614834662e-07, "logits/chosen": 2.1173884868621826, "logits/rejected": 1.9571037292480469, "logps/chosen": -67.29206848144531, "logps/ref_chosen": -72.87019348144531, "logps/ref_rejected": -94.48143005371094, "logps/rejected": -90.86744689941406, "loss": 0.9343, "margin_dpo/margin_mean": 1.9641380310058594, "margin_dpo/margin_std": 2.791342258453369, "step": 443 }, { "epoch": 0.671201814058957, "fcm_dpo/beta": 0.5266600847244263, "fcm_dpo/delta": 0.35409846901893616, "fcm_dpo/margin": 0.9787815809249878, "fcm_dpo/q_t": 0.4062029719352722, "grad_norm": 88.73951721191406, "learning_rate": 1.4855747752871654e-07, "logits/chosen": 1.7228628396987915, "logits/rejected": 1.5662447214126587, "logps/chosen": -69.3934097290039, "logps/ref_chosen": -74.650390625, "logps/ref_rejected": -106.89204406738281, "logps/rejected": -102.61383819580078, "loss": 1.248, "margin_dpo/margin_mean": 0.9787817001342773, "margin_dpo/margin_std": 2.3466291427612305, "step": 444 }, { "epoch": 0.672713529856387, "fcm_dpo/beta": 0.5298882126808167, "fcm_dpo/delta": -0.08345725387334824, "fcm_dpo/margin": 1.7450919151306152, "fcm_dpo/q_t": 0.3345106840133667, "grad_norm": 86.28073120117188, "learning_rate": 1.473504264745062e-07, "logits/chosen": 1.8607685565948486, "logits/rejected": 1.8312535285949707, "logps/chosen": -71.2159423828125, "logps/ref_chosen": -76.26957702636719, "logps/ref_rejected": -89.84994506835938, "logps/rejected": -86.5414047241211, "loss": 1.0261, "margin_dpo/margin_mean": 1.7450923919677734, "margin_dpo/margin_std": 2.598353862762451, "step": 445 }, { "epoch": 0.674225245653817, "fcm_dpo/beta": 0.5256571769714355, "fcm_dpo/delta": -0.2002544403076172, "fcm_dpo/margin": 1.9515938758850098, "fcm_dpo/q_t": 0.3043816387653351, "grad_norm": 57.9477424621582, "learning_rate": 1.461462467495284e-07, "logits/chosen": 1.8240482807159424, "logits/rejected": 1.6946065425872803, "logps/chosen": -57.154510498046875, "logps/ref_chosen": -62.74647903442383, "logps/ref_rejected": -86.395751953125, "logps/rejected": -82.75537872314453, "loss": 0.84, "margin_dpo/margin_mean": 1.9515937566757202, "margin_dpo/margin_std": 1.970376968383789, "step": 446 }, { "epoch": 0.6757369614512472, "fcm_dpo/beta": 0.5028195977210999, "fcm_dpo/delta": -0.09459690004587173, "fcm_dpo/margin": 1.8610410690307617, "fcm_dpo/q_t": 0.32163363695144653, "grad_norm": 68.67730712890625, "learning_rate": 1.4494497203727843e-07, "logits/chosen": 1.4977566003799438, "logits/rejected": 1.265622615814209, "logps/chosen": -65.15301513671875, "logps/ref_chosen": -71.06666564941406, "logps/ref_rejected": -103.57111358642578, "logps/rejected": -99.51850891113281, "loss": 0.9279, "margin_dpo/margin_mean": 1.8610403537750244, "margin_dpo/margin_std": 2.3173511028289795, "step": 447 }, { "epoch": 0.6772486772486772, "fcm_dpo/beta": 0.5102126598358154, "fcm_dpo/delta": 0.08946660906076431, "fcm_dpo/margin": 1.501331090927124, "fcm_dpo/q_t": 0.36332303285598755, "grad_norm": 72.64019775390625, "learning_rate": 1.4374663593999256e-07, "logits/chosen": 1.6274654865264893, "logits/rejected": 1.5620205402374268, "logps/chosen": -68.0380859375, "logps/ref_chosen": -73.400146484375, "logps/ref_rejected": -96.34330749511719, "logps/rejected": -92.48257446289062, "loss": 1.0835, "margin_dpo/margin_mean": 1.501330852508545, "margin_dpo/margin_std": 2.5533509254455566, "step": 448 }, { "epoch": 0.6787603930461074, "fcm_dpo/beta": 0.5148006677627563, "fcm_dpo/delta": 0.05458805337548256, "fcm_dpo/margin": 0.9342146515846252, "fcm_dpo/q_t": 0.4083643853664398, "grad_norm": 91.0919189453125, "learning_rate": 1.4255127197770707e-07, "logits/chosen": 1.5526158809661865, "logits/rejected": 1.547848105430603, "logps/chosen": -88.7008285522461, "logps/ref_chosen": -93.66099548339844, "logps/ref_rejected": -102.53019714355469, "logps/rejected": -98.5042495727539, "loss": 1.2504, "margin_dpo/margin_mean": 0.9342143535614014, "margin_dpo/margin_std": 2.240139961242676, "step": 449 }, { "epoch": 0.6802721088435374, "fcm_dpo/beta": 0.5282891988754272, "fcm_dpo/delta": 0.14706285297870636, "fcm_dpo/margin": 1.348769187927246, "fcm_dpo/q_t": 0.36513522267341614, "grad_norm": 70.94473266601562, "learning_rate": 1.4135891358732205e-07, "logits/chosen": 1.606363296508789, "logits/rejected": 1.3925542831420898, "logps/chosen": -56.854942321777344, "logps/ref_chosen": -62.52460479736328, "logps/ref_rejected": -94.04986572265625, "logps/rejected": -89.72897338867188, "loss": 1.0421, "margin_dpo/margin_mean": 1.348769187927246, "margin_dpo/margin_std": 2.1072897911071777, "step": 450 }, { "epoch": 0.6817838246409675, "fcm_dpo/beta": 0.5469115376472473, "fcm_dpo/delta": 0.17872020602226257, "fcm_dpo/margin": 1.247553825378418, "fcm_dpo/q_t": 0.3751906156539917, "grad_norm": 88.25247955322266, "learning_rate": 1.4016959412166437e-07, "logits/chosen": 1.7541656494140625, "logits/rejected": 1.6891770362854004, "logps/chosen": -73.66560363769531, "logps/ref_chosen": -79.14009094238281, "logps/ref_rejected": -93.23919677734375, "logps/rejected": -89.01226043701172, "loss": 1.1194, "margin_dpo/margin_mean": 1.247553825378418, "margin_dpo/margin_std": 2.247030258178711, "step": 451 }, { "epoch": 0.6832955404383976, "fcm_dpo/beta": 0.5521150827407837, "fcm_dpo/delta": 0.04130814969539642, "fcm_dpo/margin": 1.4708223342895508, "fcm_dpo/q_t": 0.35841596126556396, "grad_norm": 84.68819427490234, "learning_rate": 1.3898334684855645e-07, "logits/chosen": 1.6760461330413818, "logits/rejected": 1.521647572517395, "logps/chosen": -65.01879119873047, "logps/ref_chosen": -70.38827514648438, "logps/ref_rejected": -95.47691345214844, "logps/rejected": -91.57825469970703, "loss": 1.0956, "margin_dpo/margin_mean": 1.4708220958709717, "margin_dpo/margin_std": 2.534980535507202, "step": 452 }, { "epoch": 0.6848072562358276, "fcm_dpo/beta": 0.5732132196426392, "fcm_dpo/delta": 0.1644117832183838, "fcm_dpo/margin": 1.2101879119873047, "fcm_dpo/q_t": 0.3788529336452484, "grad_norm": 90.92646026611328, "learning_rate": 1.3780020494988445e-07, "logits/chosen": 1.7101848125457764, "logits/rejected": 1.611031413078308, "logps/chosen": -74.53843688964844, "logps/ref_chosen": -79.9207763671875, "logps/ref_rejected": -90.20779418945312, "logps/rejected": -86.03564453125, "loss": 1.1578, "margin_dpo/margin_mean": 1.2101881504058838, "margin_dpo/margin_std": 2.333146095275879, "step": 453 }, { "epoch": 0.6863189720332578, "fcm_dpo/beta": 0.5651696920394897, "fcm_dpo/delta": -0.05718090757727623, "fcm_dpo/margin": 1.5949244499206543, "fcm_dpo/q_t": 0.34462159872055054, "grad_norm": 76.58309936523438, "learning_rate": 1.366202015206706e-07, "logits/chosen": 1.6377469301223755, "logits/rejected": 1.551138997077942, "logps/chosen": -63.92818069458008, "logps/ref_chosen": -69.71887969970703, "logps/ref_rejected": -82.86952209472656, "logps/rejected": -78.67375183105469, "loss": 1.0832, "margin_dpo/margin_mean": 1.5949238538742065, "margin_dpo/margin_std": 2.597616672515869, "step": 454 }, { "epoch": 0.6878306878306878, "fcm_dpo/beta": 0.5443323850631714, "fcm_dpo/delta": -0.21190257370471954, "fcm_dpo/margin": 1.9055442810058594, "fcm_dpo/q_t": 0.31488168239593506, "grad_norm": 82.54447174072266, "learning_rate": 1.354433695681474e-07, "logits/chosen": 1.338344931602478, "logits/rejected": 1.2727608680725098, "logps/chosen": -84.03068542480469, "logps/ref_chosen": -89.51481628417969, "logps/ref_rejected": -97.93235778808594, "logps/rejected": -94.35377502441406, "loss": 0.9113, "margin_dpo/margin_mean": 1.9055445194244385, "margin_dpo/margin_std": 2.519261121749878, "step": 455 }, { "epoch": 0.6893424036281179, "fcm_dpo/beta": 0.539644181728363, "fcm_dpo/delta": 0.01813328266143799, "fcm_dpo/margin": 1.5422388315200806, "fcm_dpo/q_t": 0.34008723497390747, "grad_norm": 80.02922058105469, "learning_rate": 1.3426974201083439e-07, "logits/chosen": 1.7968485355377197, "logits/rejected": 1.6787824630737305, "logps/chosen": -69.20973205566406, "logps/ref_chosen": -74.60527038574219, "logps/ref_rejected": -97.98377227783203, "logps/rejected": -94.13047790527344, "loss": 0.962, "margin_dpo/margin_mean": 1.5422389507293701, "margin_dpo/margin_std": 2.0328426361083984, "step": 456 }, { "epoch": 0.690854119425548, "fcm_dpo/beta": 0.5443383455276489, "fcm_dpo/delta": 0.07683409005403519, "fcm_dpo/margin": 1.4279565811157227, "fcm_dpo/q_t": 0.3532698154449463, "grad_norm": 71.82817077636719, "learning_rate": 1.3309935167761717e-07, "logits/chosen": 1.6997053623199463, "logits/rejected": 1.5273003578186035, "logps/chosen": -58.893951416015625, "logps/ref_chosen": -63.927032470703125, "logps/ref_rejected": -83.15243530273438, "logps/rejected": -79.54731750488281, "loss": 0.9919, "margin_dpo/margin_mean": 1.4279568195343018, "margin_dpo/margin_std": 1.9557170867919922, "step": 457 }, { "epoch": 0.6923658352229781, "fcm_dpo/beta": 0.5384401082992554, "fcm_dpo/delta": -0.12141396105289459, "fcm_dpo/margin": 1.77810800075531, "fcm_dpo/q_t": 0.3204982280731201, "grad_norm": 71.2940444946289, "learning_rate": 1.3193223130682936e-07, "logits/chosen": 1.7315986156463623, "logits/rejected": 1.5027096271514893, "logps/chosen": -62.00345993041992, "logps/ref_chosen": -67.68869018554688, "logps/ref_rejected": -104.40899658203125, "logps/rejected": -100.50187683105469, "loss": 0.9192, "margin_dpo/margin_mean": 1.77810800075531, "margin_dpo/margin_std": 2.1941637992858887, "step": 458 }, { "epoch": 0.6938775510204082, "fcm_dpo/beta": 0.5484590530395508, "fcm_dpo/delta": -0.1108066737651825, "fcm_dpo/margin": 1.703685998916626, "fcm_dpo/q_t": 0.33259010314941406, "grad_norm": 87.72430419921875, "learning_rate": 1.3076841354533658e-07, "logits/chosen": 1.8845622539520264, "logits/rejected": 1.785888433456421, "logps/chosen": -78.22889709472656, "logps/ref_chosen": -83.82363891601562, "logps/ref_rejected": -103.75938415527344, "logps/rejected": -99.86831665039062, "loss": 0.9903, "margin_dpo/margin_mean": 1.7036855220794678, "margin_dpo/margin_std": 2.135005474090576, "step": 459 }, { "epoch": 0.6953892668178382, "fcm_dpo/beta": 0.5242894887924194, "fcm_dpo/delta": -0.02234676666557789, "fcm_dpo/margin": 1.6595816612243652, "fcm_dpo/q_t": 0.35441914200782776, "grad_norm": 88.8072280883789, "learning_rate": 1.2960793094762345e-07, "logits/chosen": 1.696329951286316, "logits/rejected": 1.4185569286346436, "logps/chosen": -74.41940307617188, "logps/ref_chosen": -79.4836654663086, "logps/ref_rejected": -112.31745910644531, "logps/rejected": -108.91278076171875, "loss": 1.0182, "margin_dpo/margin_mean": 1.6595821380615234, "margin_dpo/margin_std": 2.5861330032348633, "step": 460 }, { "epoch": 0.6969009826152683, "fcm_dpo/beta": 0.5151861906051636, "fcm_dpo/delta": -0.010441526770591736, "fcm_dpo/margin": 1.6554614305496216, "fcm_dpo/q_t": 0.3441765606403351, "grad_norm": 74.9832534790039, "learning_rate": 1.2845081597488286e-07, "logits/chosen": 2.076706647872925, "logits/rejected": 1.9077098369598389, "logps/chosen": -58.701873779296875, "logps/ref_chosen": -64.28482055664062, "logps/ref_rejected": -93.73818969726562, "logps/rejected": -89.81069946289062, "loss": 1.0235, "margin_dpo/margin_mean": 1.655461311340332, "margin_dpo/margin_std": 2.43689227104187, "step": 461 }, { "epoch": 0.6984126984126984, "fcm_dpo/beta": 0.5152299404144287, "fcm_dpo/delta": -0.07396998256444931, "fcm_dpo/margin": 1.7776132822036743, "fcm_dpo/q_t": 0.3375456929206848, "grad_norm": 76.16434478759766, "learning_rate": 1.27297100994108e-07, "logits/chosen": 1.8015167713165283, "logits/rejected": 1.6600192785263062, "logps/chosen": -71.72679901123047, "logps/ref_chosen": -77.15335083007812, "logps/ref_rejected": -91.12923431396484, "logps/rejected": -87.48030090332031, "loss": 0.9983, "margin_dpo/margin_mean": 1.7776132822036743, "margin_dpo/margin_std": 2.624232769012451, "step": 462 }, { "epoch": 0.6999244142101285, "fcm_dpo/beta": 0.5137749910354614, "fcm_dpo/delta": 0.03142701834440231, "fcm_dpo/margin": 1.5948131084442139, "fcm_dpo/q_t": 0.3569672405719757, "grad_norm": 85.6458511352539, "learning_rate": 1.2614681827718695e-07, "logits/chosen": 1.7675786018371582, "logits/rejected": 1.7552111148834229, "logps/chosen": -82.32722473144531, "logps/ref_chosen": -87.58760070800781, "logps/ref_rejected": -87.97022247314453, "logps/rejected": -84.30465698242188, "loss": 1.0237, "margin_dpo/margin_mean": 1.594813585281372, "margin_dpo/margin_std": 2.4094183444976807, "step": 463 }, { "epoch": 0.7014361300075586, "fcm_dpo/beta": 0.5252482891082764, "fcm_dpo/delta": -0.01944570243358612, "fcm_dpo/margin": 1.6464765071868896, "fcm_dpo/q_t": 0.35834911465644836, "grad_norm": 83.27401733398438, "learning_rate": 1.2500000000000005e-07, "logits/chosen": 1.5235177278518677, "logits/rejected": 1.4369018077850342, "logps/chosen": -70.7602310180664, "logps/ref_chosen": -75.83175659179688, "logps/ref_rejected": -84.4811019897461, "logps/rejected": -81.05604553222656, "loss": 1.1129, "margin_dpo/margin_mean": 1.6464769840240479, "margin_dpo/margin_std": 2.8207643032073975, "step": 464 }, { "epoch": 0.7029478458049887, "fcm_dpo/beta": 0.5253424644470215, "fcm_dpo/delta": 0.08905892819166183, "fcm_dpo/margin": 1.4612863063812256, "fcm_dpo/q_t": 0.3681473135948181, "grad_norm": 88.10529327392578, "learning_rate": 1.238566782415197e-07, "logits/chosen": 1.891004204750061, "logits/rejected": 1.7700917720794678, "logps/chosen": -71.99024963378906, "logps/ref_chosen": -77.057861328125, "logps/ref_rejected": -102.75727844238281, "logps/rejected": -99.15094757080078, "loss": 1.0896, "margin_dpo/margin_mean": 1.461285948753357, "margin_dpo/margin_std": 2.4996771812438965, "step": 465 }, { "epoch": 0.7044595616024187, "fcm_dpo/beta": 0.5524252653121948, "fcm_dpo/delta": 0.3179141581058502, "fcm_dpo/margin": 0.9951980710029602, "fcm_dpo/q_t": 0.398731529712677, "grad_norm": 108.6539535522461, "learning_rate": 1.2271688498291334e-07, "logits/chosen": 1.800580620765686, "logits/rejected": 1.824805736541748, "logps/chosen": -86.94599914550781, "logps/ref_chosen": -91.7751693725586, "logps/ref_rejected": -90.2679443359375, "logps/rejected": -86.43397521972656, "loss": 1.1914, "margin_dpo/margin_mean": 0.9951978921890259, "margin_dpo/margin_std": 2.0894367694854736, "step": 466 }, { "epoch": 0.7059712773998488, "fcm_dpo/beta": 0.5498294234275818, "fcm_dpo/delta": -0.11944600939750671, "fcm_dpo/margin": 1.7410266399383545, "fcm_dpo/q_t": 0.3283127546310425, "grad_norm": 68.84928131103516, "learning_rate": 1.2158065210664848e-07, "logits/chosen": 1.9185365438461304, "logits/rejected": 1.6363033056259155, "logps/chosen": -59.54905319213867, "logps/ref_chosen": -64.77557373046875, "logps/ref_rejected": -102.58863830566406, "logps/rejected": -99.10314178466797, "loss": 0.9153, "margin_dpo/margin_mean": 1.741027593612671, "margin_dpo/margin_std": 2.2309818267822266, "step": 467 }, { "epoch": 0.7074829931972789, "fcm_dpo/beta": 0.5375959277153015, "fcm_dpo/delta": -0.19219672679901123, "fcm_dpo/margin": 1.9023988246917725, "fcm_dpo/q_t": 0.31552615761756897, "grad_norm": 79.92118072509766, "learning_rate": 1.204480113956011e-07, "logits/chosen": 1.9349050521850586, "logits/rejected": 1.9412510395050049, "logps/chosen": -77.01482391357422, "logps/ref_chosen": -82.22445678710938, "logps/ref_rejected": -92.99041748046875, "logps/rejected": -89.68318176269531, "loss": 0.8997, "margin_dpo/margin_mean": 1.9023983478546143, "margin_dpo/margin_std": 2.337515354156494, "step": 468 }, { "epoch": 0.708994708994709, "fcm_dpo/beta": 0.5282810926437378, "fcm_dpo/delta": 0.1127682775259018, "fcm_dpo/margin": 1.4049665927886963, "fcm_dpo/q_t": 0.3656489849090576, "grad_norm": 82.08759307861328, "learning_rate": 1.1931899453216697e-07, "logits/chosen": 1.9689496755599976, "logits/rejected": 1.948734998703003, "logps/chosen": -70.78817749023438, "logps/ref_chosen": -75.93031311035156, "logps/ref_rejected": -92.26559448242188, "logps/rejected": -88.52842712402344, "loss": 1.0365, "margin_dpo/margin_mean": 1.4049668312072754, "margin_dpo/margin_std": 2.103820562362671, "step": 469 }, { "epoch": 0.7105064247921391, "fcm_dpo/beta": 0.5462840795516968, "fcm_dpo/delta": 0.07731571793556213, "fcm_dpo/margin": 1.425428032875061, "fcm_dpo/q_t": 0.3547430634498596, "grad_norm": 66.45403289794922, "learning_rate": 1.1819363309737438e-07, "logits/chosen": 1.4768280982971191, "logits/rejected": 1.3471271991729736, "logps/chosen": -60.886566162109375, "logps/ref_chosen": -65.86345672607422, "logps/ref_rejected": -85.89832305908203, "logps/rejected": -82.34687042236328, "loss": 1.0604, "margin_dpo/margin_mean": 1.4254283905029297, "margin_dpo/margin_std": 2.2816340923309326, "step": 470 }, { "epoch": 0.7120181405895691, "fcm_dpo/beta": 0.53891521692276, "fcm_dpo/delta": -0.16177864372730255, "fcm_dpo/margin": 1.847838044166565, "fcm_dpo/q_t": 0.32141464948654175, "grad_norm": 71.49030303955078, "learning_rate": 1.1707195857000215e-07, "logits/chosen": 1.760124921798706, "logits/rejected": 1.6479649543762207, "logps/chosen": -68.9094467163086, "logps/ref_chosen": -74.3460922241211, "logps/ref_rejected": -93.43672943115234, "logps/rejected": -89.84793090820312, "loss": 0.9497, "margin_dpo/margin_mean": 1.8478378057479858, "margin_dpo/margin_std": 2.4519433975219727, "step": 471 }, { "epoch": 0.7135298563869993, "fcm_dpo/beta": 0.5203032493591309, "fcm_dpo/delta": -0.18647995591163635, "fcm_dpo/margin": 1.9560319185256958, "fcm_dpo/q_t": 0.33989793062210083, "grad_norm": 79.76250457763672, "learning_rate": 1.1595400232569768e-07, "logits/chosen": 2.001483201980591, "logits/rejected": 1.8946865797042847, "logps/chosen": -69.1002426147461, "logps/ref_chosen": -74.75674438476562, "logps/ref_rejected": -95.18183135986328, "logps/rejected": -91.48135375976562, "loss": 1.0331, "margin_dpo/margin_mean": 1.9560320377349854, "margin_dpo/margin_std": 3.0834403038024902, "step": 472 }, { "epoch": 0.7150415721844293, "fcm_dpo/beta": 0.5078408718109131, "fcm_dpo/delta": -0.03109436295926571, "fcm_dpo/margin": 1.7292225360870361, "fcm_dpo/q_t": 0.34417200088500977, "grad_norm": 69.86459350585938, "learning_rate": 1.1483979563610069e-07, "logits/chosen": 1.8151731491088867, "logits/rejected": 1.6192166805267334, "logps/chosen": -65.9654312133789, "logps/ref_chosen": -71.65933227539062, "logps/ref_rejected": -109.99200439453125, "logps/rejected": -106.02731323242188, "loss": 1.0484, "margin_dpo/margin_mean": 1.729222059249878, "margin_dpo/margin_std": 2.8078012466430664, "step": 473 }, { "epoch": 0.7165532879818595, "fcm_dpo/beta": 0.5117301940917969, "fcm_dpo/delta": 0.06498853117227554, "fcm_dpo/margin": 1.5440913438796997, "fcm_dpo/q_t": 0.35763585567474365, "grad_norm": 70.37702941894531, "learning_rate": 1.1372936966796709e-07, "logits/chosen": 2.007793426513672, "logits/rejected": 1.824831485748291, "logps/chosen": -61.00871276855469, "logps/ref_chosen": -65.91990661621094, "logps/ref_rejected": -89.09432983398438, "logps/rejected": -85.72722625732422, "loss": 1.0608, "margin_dpo/margin_mean": 1.5440911054611206, "margin_dpo/margin_std": 2.4802653789520264, "step": 474 }, { "epoch": 0.7180650037792895, "fcm_dpo/beta": 0.4982389807701111, "fcm_dpo/delta": -0.17701946198940277, "fcm_dpo/margin": 2.021491765975952, "fcm_dpo/q_t": 0.3174302577972412, "grad_norm": 67.81841278076172, "learning_rate": 1.126227554822985e-07, "logits/chosen": 1.543381690979004, "logits/rejected": 1.5033472776412964, "logps/chosen": -74.09674835205078, "logps/ref_chosen": -79.02459716796875, "logps/ref_rejected": -107.33058166503906, "logps/rejected": -104.42422485351562, "loss": 0.9131, "margin_dpo/margin_mean": 2.021491050720215, "margin_dpo/margin_std": 2.585183620452881, "step": 475 }, { "epoch": 0.7195767195767195, "fcm_dpo/beta": 0.5055756568908691, "fcm_dpo/delta": 0.14022918045520782, "fcm_dpo/margin": 1.424285888671875, "fcm_dpo/q_t": 0.367862343788147, "grad_norm": 89.28751373291016, "learning_rate": 1.1151998403347243e-07, "logits/chosen": 1.6474313735961914, "logits/rejected": 1.612290859222412, "logps/chosen": -88.98843383789062, "logps/ref_chosen": -93.72602844238281, "logps/ref_rejected": -94.390625, "logps/rejected": -91.07731628417969, "loss": 1.0766, "margin_dpo/margin_mean": 1.4242854118347168, "margin_dpo/margin_std": 2.3705697059631348, "step": 476 }, { "epoch": 0.7210884353741497, "fcm_dpo/beta": 0.5270170569419861, "fcm_dpo/delta": 0.21027953922748566, "fcm_dpo/margin": 1.2401819229125977, "fcm_dpo/q_t": 0.3814718723297119, "grad_norm": 92.97918701171875, "learning_rate": 1.1042108616837692e-07, "logits/chosen": 2.0096664428710938, "logits/rejected": 1.9469687938690186, "logps/chosen": -71.95021057128906, "logps/ref_chosen": -76.51399993896484, "logps/ref_rejected": -99.14356231689453, "logps/rejected": -95.81996154785156, "loss": 1.2076, "margin_dpo/margin_mean": 1.240182638168335, "margin_dpo/margin_std": 2.572378158569336, "step": 477 }, { "epoch": 0.7226001511715797, "fcm_dpo/beta": 0.5363619327545166, "fcm_dpo/delta": 0.07953216135501862, "fcm_dpo/margin": 1.4475769996643066, "fcm_dpo/q_t": 0.37544333934783936, "grad_norm": 82.0169677734375, "learning_rate": 1.0932609262554746e-07, "logits/chosen": 1.8754984140396118, "logits/rejected": 1.8362398147583008, "logps/chosen": -73.00425720214844, "logps/ref_chosen": -77.95186614990234, "logps/ref_rejected": -69.77754211425781, "logps/rejected": -66.27751159667969, "loss": 1.2311, "margin_dpo/margin_mean": 1.4475772380828857, "margin_dpo/margin_std": 3.051959991455078, "step": 478 }, { "epoch": 0.7241118669690099, "fcm_dpo/beta": 0.5497398376464844, "fcm_dpo/delta": 0.15630201995372772, "fcm_dpo/margin": 1.2815978527069092, "fcm_dpo/q_t": 0.3775695264339447, "grad_norm": 88.32197570800781, "learning_rate": 1.0823503403430734e-07, "logits/chosen": 1.392703652381897, "logits/rejected": 1.299776315689087, "logps/chosen": -71.73004150390625, "logps/ref_chosen": -76.56551361083984, "logps/ref_rejected": -84.33758544921875, "logps/rejected": -80.7837142944336, "loss": 1.2199, "margin_dpo/margin_mean": 1.2815985679626465, "margin_dpo/margin_std": 2.66666841506958, "step": 479 }, { "epoch": 0.7256235827664399, "fcm_dpo/beta": 0.5562114715576172, "fcm_dpo/delta": -0.12454620003700256, "fcm_dpo/margin": 1.72718346118927, "fcm_dpo/q_t": 0.32873159646987915, "grad_norm": 84.74827575683594, "learning_rate": 1.0714794091391072e-07, "logits/chosen": 1.7120177745819092, "logits/rejected": 1.7063748836517334, "logps/chosen": -74.99505615234375, "logps/ref_chosen": -80.15884399414062, "logps/ref_rejected": -84.88697814941406, "logps/rejected": -81.45037841796875, "loss": 1.0862, "margin_dpo/margin_mean": 1.7271829843521118, "margin_dpo/margin_std": 2.7646212577819824, "step": 480 }, { "epoch": 0.72713529856387, "fcm_dpo/beta": 0.5491403341293335, "fcm_dpo/delta": 0.01012316346168518, "fcm_dpo/margin": 1.5297075510025024, "fcm_dpo/q_t": 0.3679242432117462, "grad_norm": 86.62976837158203, "learning_rate": 1.0606484367268906e-07, "logits/chosen": 1.5805827379226685, "logits/rejected": 1.5588011741638184, "logps/chosen": -79.52948760986328, "logps/ref_chosen": -84.56254577636719, "logps/ref_rejected": -90.06451416015625, "logps/rejected": -86.56116485595703, "loss": 1.1362, "margin_dpo/margin_mean": 1.5297071933746338, "margin_dpo/margin_std": 2.896221399307251, "step": 481 }, { "epoch": 0.7286470143613001, "fcm_dpo/beta": 0.5414802432060242, "fcm_dpo/delta": -0.08684663474559784, "fcm_dpo/margin": 1.715275526046753, "fcm_dpo/q_t": 0.3422348201274872, "grad_norm": 91.90193939208984, "learning_rate": 1.0498577260720048e-07, "logits/chosen": 1.6824023723602295, "logits/rejected": 1.4254873991012573, "logps/chosen": -74.20223999023438, "logps/ref_chosen": -78.88141632080078, "logps/ref_rejected": -125.41990661621094, "logps/rejected": -122.45600128173828, "loss": 1.0844, "margin_dpo/margin_mean": 1.7152750492095947, "margin_dpo/margin_std": 2.783423900604248, "step": 482 }, { "epoch": 0.7301587301587301, "fcm_dpo/beta": 0.5211665630340576, "fcm_dpo/delta": -0.18208253383636475, "fcm_dpo/margin": 1.9437726736068726, "fcm_dpo/q_t": 0.3126748204231262, "grad_norm": 71.99545288085938, "learning_rate": 1.0391075790138232e-07, "logits/chosen": 1.7354581356048584, "logits/rejected": 1.5467109680175781, "logps/chosen": -67.4843521118164, "logps/ref_chosen": -72.690185546875, "logps/ref_rejected": -98.37237548828125, "logps/rejected": -95.11031341552734, "loss": 0.8906, "margin_dpo/margin_mean": 1.9437729120254517, "margin_dpo/margin_std": 2.387289524078369, "step": 483 }, { "epoch": 0.7316704459561603, "fcm_dpo/beta": 0.5343146920204163, "fcm_dpo/delta": 0.16501466929912567, "fcm_dpo/margin": 1.2994334697723389, "fcm_dpo/q_t": 0.36794763803482056, "grad_norm": 87.20106506347656, "learning_rate": 1.0283982962570681e-07, "logits/chosen": 1.7466881275177002, "logits/rejected": 1.7247745990753174, "logps/chosen": -68.83059692382812, "logps/ref_chosen": -73.98435974121094, "logps/ref_rejected": -89.99178314208984, "logps/rejected": -86.137451171875, "loss": 1.0497, "margin_dpo/margin_mean": 1.299433946609497, "margin_dpo/margin_std": 2.0427210330963135, "step": 484 }, { "epoch": 0.7331821617535903, "fcm_dpo/beta": 0.5269999504089355, "fcm_dpo/delta": -0.00029647350311279297, "fcm_dpo/margin": 1.6050857305526733, "fcm_dpo/q_t": 0.3547622263431549, "grad_norm": 83.7960205078125, "learning_rate": 1.0177301773633992e-07, "logits/chosen": 1.9939520359039307, "logits/rejected": 1.9123587608337402, "logps/chosen": -72.948486328125, "logps/ref_chosen": -78.0927963256836, "logps/ref_rejected": -89.14010620117188, "logps/rejected": -85.60088348388672, "loss": 1.0126, "margin_dpo/margin_mean": 1.605086326599121, "margin_dpo/margin_std": 2.3685264587402344, "step": 485 }, { "epoch": 0.7346938775510204, "fcm_dpo/beta": 0.5255827903747559, "fcm_dpo/delta": -0.17571258544921875, "fcm_dpo/margin": 1.917954444885254, "fcm_dpo/q_t": 0.3390832543373108, "grad_norm": 74.30439758300781, "learning_rate": 1.007103520743035e-07, "logits/chosen": 1.6516636610031128, "logits/rejected": 1.4582644701004028, "logps/chosen": -69.16941833496094, "logps/ref_chosen": -73.74685668945312, "logps/ref_rejected": -107.752685546875, "logps/rejected": -105.09320068359375, "loss": 1.0416, "margin_dpo/margin_mean": 1.917954921722412, "margin_dpo/margin_std": 3.016083240509033, "step": 486 }, { "epoch": 0.7362055933484505, "fcm_dpo/beta": 0.5252140760421753, "fcm_dpo/delta": 0.09578922390937805, "fcm_dpo/margin": 1.4492162466049194, "fcm_dpo/q_t": 0.3582766056060791, "grad_norm": 81.06444549560547, "learning_rate": 9.965186236464046e-08, "logits/chosen": 1.869274377822876, "logits/rejected": 1.7187423706054688, "logps/chosen": -74.88436889648438, "logps/ref_chosen": -79.57780456542969, "logps/ref_rejected": -102.2916259765625, "logps/rejected": -99.04740905761719, "loss": 1.058, "margin_dpo/margin_mean": 1.4492161273956299, "margin_dpo/margin_std": 2.371695041656494, "step": 487 }, { "epoch": 0.7377173091458806, "fcm_dpo/beta": 0.5243767499923706, "fcm_dpo/delta": -0.1707746684551239, "fcm_dpo/margin": 1.904654622077942, "fcm_dpo/q_t": 0.3486997187137604, "grad_norm": 81.35235595703125, "learning_rate": 9.859757821558337e-08, "logits/chosen": 1.7296903133392334, "logits/rejected": 1.5843555927276611, "logps/chosen": -75.58885955810547, "logps/ref_chosen": -80.62767791748047, "logps/ref_rejected": -100.4541015625, "logps/rejected": -97.3199462890625, "loss": 1.0677, "margin_dpo/margin_mean": 1.9046547412872314, "margin_dpo/margin_std": 3.0004210472106934, "step": 488 }, { "epoch": 0.7392290249433107, "fcm_dpo/beta": 0.5423703789710999, "fcm_dpo/delta": 0.43078815937042236, "fcm_dpo/margin": 0.8081073760986328, "fcm_dpo/q_t": 0.4194217324256897, "grad_norm": 92.3609390258789, "learning_rate": 9.754752911772615e-08, "logits/chosen": 1.6849393844604492, "logits/rejected": 1.6025831699371338, "logps/chosen": -80.61341857910156, "logps/ref_chosen": -85.39521026611328, "logps/ref_rejected": -101.97309875488281, "logps/rejected": -97.9994125366211, "loss": 1.2433, "margin_dpo/margin_mean": 0.8081076741218567, "margin_dpo/margin_std": 1.996825933456421, "step": 489 }, { "epoch": 0.7407407407407407, "fcm_dpo/beta": 0.5469303131103516, "fcm_dpo/delta": -0.05695779621601105, "fcm_dpo/margin": 1.6473114490509033, "fcm_dpo/q_t": 0.3572388291358948, "grad_norm": 90.47389221191406, "learning_rate": 9.650174444319956e-08, "logits/chosen": 2.277765989303589, "logits/rejected": 2.227505683898926, "logps/chosen": -72.671875, "logps/ref_chosen": -77.75590515136719, "logps/ref_rejected": -88.98885345458984, "logps/rejected": -85.55213928222656, "loss": 1.1484, "margin_dpo/margin_mean": 1.6473113298416138, "margin_dpo/margin_std": 2.9487578868865967, "step": 490 }, { "epoch": 0.7422524565381708, "fcm_dpo/beta": 0.5318253040313721, "fcm_dpo/delta": -0.10165757685899734, "fcm_dpo/margin": 1.7601425647735596, "fcm_dpo/q_t": 0.33336108922958374, "grad_norm": 78.02272033691406, "learning_rate": 9.546025344484868e-08, "logits/chosen": 1.6132152080535889, "logits/rejected": 1.4740610122680664, "logps/chosen": -69.28722381591797, "logps/ref_chosen": -74.33360290527344, "logps/ref_rejected": -91.4105224609375, "logps/rejected": -88.12428283691406, "loss": 0.9392, "margin_dpo/margin_mean": 1.7601426839828491, "margin_dpo/margin_std": 2.289900302886963, "step": 491 }, { "epoch": 0.7437641723356009, "fcm_dpo/beta": 0.5352230072021484, "fcm_dpo/delta": -0.038240764290094376, "fcm_dpo/margin": 1.1772292852401733, "fcm_dpo/q_t": 0.39304035902023315, "grad_norm": 98.42277526855469, "learning_rate": 9.442308525541589e-08, "logits/chosen": 1.5843994617462158, "logits/rejected": 1.4141943454742432, "logps/chosen": -80.89947509765625, "logps/ref_chosen": -85.14178466796875, "logps/ref_rejected": -103.44204711914062, "logps/rejected": -100.37696838378906, "loss": 1.264, "margin_dpo/margin_mean": 1.177229642868042, "margin_dpo/margin_std": 2.6634840965270996, "step": 492 }, { "epoch": 0.745275888133031, "fcm_dpo/beta": 0.5297720432281494, "fcm_dpo/delta": -0.07003698498010635, "fcm_dpo/margin": 1.7245081663131714, "fcm_dpo/q_t": 0.3485276997089386, "grad_norm": 82.64747619628906, "learning_rate": 9.339026888672468e-08, "logits/chosen": 2.035825490951538, "logits/rejected": 1.8640735149383545, "logps/chosen": -70.65989685058594, "logps/ref_chosen": -75.81439208984375, "logps/ref_rejected": -95.30766296386719, "logps/rejected": -91.87767028808594, "loss": 1.042, "margin_dpo/margin_mean": 1.7245078086853027, "margin_dpo/margin_std": 2.7354183197021484, "step": 493 }, { "epoch": 0.7467876039304611, "fcm_dpo/beta": 0.5313657522201538, "fcm_dpo/delta": 0.0808698982000351, "fcm_dpo/margin": 1.459385633468628, "fcm_dpo/q_t": 0.3662447929382324, "grad_norm": 103.77287292480469, "learning_rate": 9.236183322886945e-08, "logits/chosen": 1.5601379871368408, "logits/rejected": 1.5064457654953003, "logps/chosen": -88.89629364013672, "logps/ref_chosen": -93.83562469482422, "logps/ref_rejected": -112.21142578125, "logps/rejected": -108.73147583007812, "loss": 1.1484, "margin_dpo/margin_mean": 1.459385633468628, "margin_dpo/margin_std": 2.74635648727417, "step": 494 }, { "epoch": 0.7482993197278912, "fcm_dpo/beta": 0.5466286540031433, "fcm_dpo/delta": 0.10114302486181259, "fcm_dpo/margin": 1.3804593086242676, "fcm_dpo/q_t": 0.36845171451568604, "grad_norm": 77.80597686767578, "learning_rate": 9.133780704940594e-08, "logits/chosen": 1.9278578758239746, "logits/rejected": 1.7768769264221191, "logps/chosen": -63.57561492919922, "logps/ref_chosen": -68.52467346191406, "logps/ref_rejected": -89.65379333496094, "logps/rejected": -86.08518981933594, "loss": 1.1418, "margin_dpo/margin_mean": 1.3804597854614258, "margin_dpo/margin_std": 2.606942653656006, "step": 495 }, { "epoch": 0.7498110355253212, "fcm_dpo/beta": 0.5298216938972473, "fcm_dpo/delta": -0.11912409216165543, "fcm_dpo/margin": 1.800965428352356, "fcm_dpo/q_t": 0.34737032651901245, "grad_norm": 77.42396545410156, "learning_rate": 9.031821899254797e-08, "logits/chosen": 1.789339542388916, "logits/rejected": 1.6037667989730835, "logps/chosen": -68.10794830322266, "logps/ref_chosen": -73.13618469238281, "logps/ref_rejected": -111.50930786132812, "logps/rejected": -108.28204345703125, "loss": 1.0363, "margin_dpo/margin_mean": 1.8009655475616455, "margin_dpo/margin_std": 2.837244987487793, "step": 496 }, { "epoch": 0.7513227513227513, "fcm_dpo/beta": 0.51277756690979, "fcm_dpo/delta": -0.21961292624473572, "fcm_dpo/margin": 2.0381200313568115, "fcm_dpo/q_t": 0.32603585720062256, "grad_norm": 73.66954040527344, "learning_rate": 8.930309757836516e-08, "logits/chosen": 2.010392427444458, "logits/rejected": 1.8880023956298828, "logps/chosen": -84.01908874511719, "logps/ref_chosen": -88.71475219726562, "logps/ref_rejected": -105.74935913085938, "logps/rejected": -103.09181213378906, "loss": 0.9522, "margin_dpo/margin_mean": 2.0381202697753906, "margin_dpo/margin_std": 2.9248361587524414, "step": 497 }, { "epoch": 0.7528344671201814, "fcm_dpo/beta": 0.5155347585678101, "fcm_dpo/delta": 0.06701716780662537, "fcm_dpo/margin": 1.5286895036697388, "fcm_dpo/q_t": 0.3693251311779022, "grad_norm": 89.92591857910156, "learning_rate": 8.829247120198563e-08, "logits/chosen": 1.8527765274047852, "logits/rejected": 1.798376441001892, "logps/chosen": -78.18376922607422, "logps/ref_chosen": -83.3353271484375, "logps/ref_rejected": -89.34941864013672, "logps/rejected": -85.72654724121094, "loss": 1.0684, "margin_dpo/margin_mean": 1.5286893844604492, "margin_dpo/margin_std": 2.6188063621520996, "step": 498 }, { "epoch": 0.7543461829176115, "fcm_dpo/beta": 0.5106238722801208, "fcm_dpo/delta": -0.10215874761343002, "fcm_dpo/margin": 1.8459928035736084, "fcm_dpo/q_t": 0.34619784355163574, "grad_norm": 78.80824279785156, "learning_rate": 8.728636813280163e-08, "logits/chosen": 1.7513785362243652, "logits/rejected": 1.5904102325439453, "logps/chosen": -74.64079284667969, "logps/ref_chosen": -79.373779296875, "logps/ref_rejected": -104.62533569335938, "logps/rejected": -101.73834228515625, "loss": 1.0767, "margin_dpo/margin_mean": 1.8459930419921875, "margin_dpo/margin_std": 2.95662522315979, "step": 499 }, { "epoch": 0.7558578987150416, "fcm_dpo/beta": 0.5103375911712646, "fcm_dpo/delta": 0.05852968245744705, "fcm_dpo/margin": 1.5600131750106812, "fcm_dpo/q_t": 0.3537482023239136, "grad_norm": 77.21739959716797, "learning_rate": 8.628481651367875e-08, "logits/chosen": 1.3777530193328857, "logits/rejected": 1.3496954441070557, "logps/chosen": -81.1361083984375, "logps/ref_chosen": -85.953857421875, "logps/ref_rejected": -90.40995788574219, "logps/rejected": -87.1522216796875, "loss": 1.0818, "margin_dpo/margin_mean": 1.5600130558013916, "margin_dpo/margin_std": 2.59567928314209, "step": 500 }, { "epoch": 0.7558578987150416, "eval_fcm_dpo/beta": 0.5119248032569885, "eval_logits/chosen": 1.8607048988342285, "eval_logits/rejected": 1.7467539310455322, "eval_logps/chosen": -81.96269989013672, "eval_logps/ref_chosen": -86.90177917480469, "eval_logps/ref_rejected": -96.69639587402344, "eval_logps/rejected": -93.33091735839844, "eval_loss": 0.5445140600204468, "eval_margin_dpo/margin_mean": 1.573616623878479, "eval_margin_dpo/margin_std": 2.6447198390960693, "eval_runtime": 42.2639, "eval_samples_per_second": 54.491, "eval_steps_per_second": 1.704, "step": 500 }, { "epoch": 0.7573696145124716, "fcm_dpo/beta": 0.5188782215118408, "fcm_dpo/delta": 0.004477545619010925, "fcm_dpo/margin": 1.6241436004638672, "fcm_dpo/q_t": 0.3532504439353943, "grad_norm": 71.30140686035156, "learning_rate": 8.528784436016878e-08, "logits/chosen": 1.705322027206421, "logits/rejected": 1.6886937618255615, "logps/chosen": -76.24929809570312, "logps/ref_chosen": -81.22268676757812, "logps/ref_rejected": -86.97892761230469, "logps/rejected": -83.62968444824219, "loss": 0.9835, "margin_dpo/margin_mean": 1.6241438388824463, "margin_dpo/margin_std": 2.3686084747314453, "step": 501 }, { "epoch": 0.7588813303099018, "fcm_dpo/beta": 0.5274479985237122, "fcm_dpo/delta": 0.22003662586212158, "fcm_dpo/margin": 1.222203016281128, "fcm_dpo/q_t": 0.3778621554374695, "grad_norm": 86.91670227050781, "learning_rate": 8.4295479559726e-08, "logits/chosen": 1.8385815620422363, "logits/rejected": 1.7697994709014893, "logps/chosen": -78.10995483398438, "logps/ref_chosen": -83.1567611694336, "logps/ref_rejected": -106.74440002441406, "logps/rejected": -102.91980743408203, "loss": 1.0534, "margin_dpo/margin_mean": 1.2222027778625488, "margin_dpo/margin_std": 1.9343492984771729, "step": 502 }, { "epoch": 0.7603930461073318, "fcm_dpo/beta": 0.5325933694839478, "fcm_dpo/delta": -0.007229819893836975, "fcm_dpo/margin": 1.6076710224151611, "fcm_dpo/q_t": 0.34382903575897217, "grad_norm": 78.07044982910156, "learning_rate": 8.330774987092712e-08, "logits/chosen": 1.5651829242706299, "logits/rejected": 1.5705194473266602, "logps/chosen": -63.34757995605469, "logps/ref_chosen": -68.51583862304688, "logps/ref_rejected": -75.02178955078125, "logps/rejected": -71.46119689941406, "loss": 1.0491, "margin_dpo/margin_mean": 1.6076714992523193, "margin_dpo/margin_std": 2.520142078399658, "step": 503 }, { "epoch": 0.7619047619047619, "fcm_dpo/beta": 0.5068508386611938, "fcm_dpo/delta": -0.38821882009506226, "fcm_dpo/margin": 2.3534343242645264, "fcm_dpo/q_t": 0.2800072133541107, "grad_norm": 62.00618362426758, "learning_rate": 8.232468292269479e-08, "logits/chosen": 1.7697081565856934, "logits/rejected": 1.7505295276641846, "logps/chosen": -80.1093521118164, "logps/ref_chosen": -85.15829467773438, "logps/ref_rejected": -96.16879272460938, "logps/rejected": -93.47328186035156, "loss": 0.7507, "margin_dpo/margin_mean": 2.3534343242645264, "margin_dpo/margin_std": 2.318760395050049, "step": 504 }, { "epoch": 0.763416477702192, "fcm_dpo/beta": 0.5089028477668762, "fcm_dpo/delta": 0.22177591919898987, "fcm_dpo/margin": 1.2635679244995117, "fcm_dpo/q_t": 0.39394375681877136, "grad_norm": 92.9467544555664, "learning_rate": 8.134630621352483e-08, "logits/chosen": 1.7371578216552734, "logits/rejected": 1.60854172706604, "logps/chosen": -74.40583801269531, "logps/ref_chosen": -79.26185607910156, "logps/ref_rejected": -96.34947967529297, "logps/rejected": -92.75701904296875, "loss": 1.2414, "margin_dpo/margin_mean": 1.2635676860809326, "margin_dpo/margin_std": 2.7791409492492676, "step": 505 }, { "epoch": 0.764928193499622, "fcm_dpo/beta": 0.5233539342880249, "fcm_dpo/delta": 0.15490612387657166, "fcm_dpo/margin": 1.3474644422531128, "fcm_dpo/q_t": 0.38606366515159607, "grad_norm": 109.93718719482422, "learning_rate": 8.037264711071698e-08, "logits/chosen": 1.879191279411316, "logits/rejected": 1.866205096244812, "logps/chosen": -83.36624908447266, "logps/ref_chosen": -88.192626953125, "logps/ref_rejected": -100.86880493164062, "logps/rejected": -97.38990020751953, "loss": 1.3173, "margin_dpo/margin_mean": 1.347464919090271, "margin_dpo/margin_std": 3.231222629547119, "step": 506 }, { "epoch": 0.7664399092970522, "fcm_dpo/beta": 0.5418146252632141, "fcm_dpo/delta": 0.09071876108646393, "fcm_dpo/margin": 1.4136903285980225, "fcm_dpo/q_t": 0.36703741550445557, "grad_norm": 96.97240447998047, "learning_rate": 7.940373284960933e-08, "logits/chosen": 1.6270873546600342, "logits/rejected": 1.5263984203338623, "logps/chosen": -81.430908203125, "logps/ref_chosen": -86.04632568359375, "logps/ref_rejected": -111.44412994384766, "logps/rejected": -108.24239349365234, "loss": 1.1249, "margin_dpo/margin_mean": 1.4136903285980225, "margin_dpo/margin_std": 2.5658071041107178, "step": 507 }, { "epoch": 0.7679516250944822, "fcm_dpo/beta": 0.5383831262588501, "fcm_dpo/delta": -0.1335231363773346, "fcm_dpo/margin": 1.8011484146118164, "fcm_dpo/q_t": 0.3420735001564026, "grad_norm": 74.95172882080078, "learning_rate": 7.843959053281663e-08, "logits/chosen": 1.6859643459320068, "logits/rejected": 1.438178539276123, "logps/chosen": -74.3707504272461, "logps/ref_chosen": -79.25038146972656, "logps/ref_rejected": -118.49089813232422, "logps/rejected": -115.41241455078125, "loss": 0.9989, "margin_dpo/margin_mean": 1.8011486530303955, "margin_dpo/margin_std": 2.758845806121826, "step": 508 }, { "epoch": 0.7694633408919124, "fcm_dpo/beta": 0.5251543521881104, "fcm_dpo/delta": -0.097801074385643, "fcm_dpo/margin": 1.786637783050537, "fcm_dpo/q_t": 0.3379754424095154, "grad_norm": 73.10486602783203, "learning_rate": 7.748024712947204e-08, "logits/chosen": 1.5579968690872192, "logits/rejected": 1.4831342697143555, "logps/chosen": -75.50135803222656, "logps/ref_chosen": -80.7039566040039, "logps/ref_rejected": -90.50444793701172, "logps/rejected": -87.08848571777344, "loss": 0.9619, "margin_dpo/margin_mean": 1.7866381406784058, "margin_dpo/margin_std": 2.4936304092407227, "step": 509 }, { "epoch": 0.7709750566893424, "fcm_dpo/beta": 0.5186738967895508, "fcm_dpo/delta": 0.03997340798377991, "fcm_dpo/margin": 1.5671298503875732, "fcm_dpo/q_t": 0.35538774728775024, "grad_norm": 91.2940673828125, "learning_rate": 7.652572947447272e-08, "logits/chosen": 1.920919418334961, "logits/rejected": 1.748721957206726, "logps/chosen": -62.64276885986328, "logps/ref_chosen": -67.64491271972656, "logps/ref_rejected": -108.92274475097656, "logps/rejected": -105.48773193359375, "loss": 1.1015, "margin_dpo/margin_mean": 1.5671300888061523, "margin_dpo/margin_std": 2.6930947303771973, "step": 510 }, { "epoch": 0.7724867724867724, "fcm_dpo/beta": 0.5019285678863525, "fcm_dpo/delta": -0.2909063696861267, "fcm_dpo/margin": 2.2096104621887207, "fcm_dpo/q_t": 0.3058483600616455, "grad_norm": 66.41272735595703, "learning_rate": 7.557606426772961e-08, "logits/chosen": 1.8397446870803833, "logits/rejected": 1.7726900577545166, "logps/chosen": -70.74977111816406, "logps/ref_chosen": -75.66263580322266, "logps/ref_rejected": -104.26296997070312, "logps/rejected": -101.5597152709961, "loss": 0.8574, "margin_dpo/margin_mean": 2.209610939025879, "margin_dpo/margin_std": 2.602205991744995, "step": 511 }, { "epoch": 0.7739984882842026, "fcm_dpo/beta": 0.4980090856552124, "fcm_dpo/delta": 0.08836930245161057, "fcm_dpo/margin": 1.5430858135223389, "fcm_dpo/q_t": 0.36593347787857056, "grad_norm": 79.47098541259766, "learning_rate": 7.463127807341966e-08, "logits/chosen": 1.4316006898880005, "logits/rejected": 1.3491363525390625, "logps/chosen": -74.39913940429688, "logps/ref_chosen": -79.31925964355469, "logps/ref_rejected": -82.22052001953125, "logps/rejected": -78.8434829711914, "loss": 1.036, "margin_dpo/margin_mean": 1.543086051940918, "margin_dpo/margin_std": 2.4387998580932617, "step": 512 }, { "epoch": 0.7755102040816326, "fcm_dpo/beta": 0.485543429851532, "fcm_dpo/delta": -0.11648038774728775, "fcm_dpo/margin": 1.9538068771362305, "fcm_dpo/q_t": 0.3228296935558319, "grad_norm": 57.1087760925293, "learning_rate": 7.369139731924401e-08, "logits/chosen": 2.3472414016723633, "logits/rejected": 2.1964426040649414, "logps/chosen": -66.7118148803711, "logps/ref_chosen": -72.02534484863281, "logps/ref_rejected": -86.56224060058594, "logps/rejected": -83.2025146484375, "loss": 0.8578, "margin_dpo/margin_mean": 1.9538071155548096, "margin_dpo/margin_std": 2.2184200286865234, "step": 513 }, { "epoch": 0.7770219198790628, "fcm_dpo/beta": 0.4895557463169098, "fcm_dpo/delta": -0.029002681374549866, "fcm_dpo/margin": 1.7902264595031738, "fcm_dpo/q_t": 0.34171241521835327, "grad_norm": 71.4445571899414, "learning_rate": 7.275644829568747e-08, "logits/chosen": 1.7180615663528442, "logits/rejected": 1.6842756271362305, "logps/chosen": -80.10594177246094, "logps/ref_chosen": -84.94093322753906, "logps/ref_rejected": -102.44367980957031, "logps/rejected": -99.39891052246094, "loss": 0.9853, "margin_dpo/margin_mean": 1.7902262210845947, "margin_dpo/margin_std": 2.5876619815826416, "step": 514 }, { "epoch": 0.7785336356764928, "fcm_dpo/beta": 0.4983447194099426, "fcm_dpo/delta": 0.10146874189376831, "fcm_dpo/margin": 1.51442551612854, "fcm_dpo/q_t": 0.3634001910686493, "grad_norm": 77.20426177978516, "learning_rate": 7.182645715528435e-08, "logits/chosen": 1.7342910766601562, "logits/rejected": 1.601979374885559, "logps/chosen": -68.45315551757812, "logps/ref_chosen": -72.9662094116211, "logps/ref_rejected": -102.53651428222656, "logps/rejected": -99.53788757324219, "loss": 1.0669, "margin_dpo/margin_mean": 1.514425277709961, "margin_dpo/margin_std": 2.46881103515625, "step": 515 }, { "epoch": 0.780045351473923, "fcm_dpo/beta": 0.511983335018158, "fcm_dpo/delta": 0.16067957878112793, "fcm_dpo/margin": 1.3661940097808838, "fcm_dpo/q_t": 0.37095892429351807, "grad_norm": 82.00129699707031, "learning_rate": 7.090144991188568e-08, "logits/chosen": 1.8110804557800293, "logits/rejected": 1.678971529006958, "logps/chosen": -71.64615631103516, "logps/ref_chosen": -76.63414001464844, "logps/ref_rejected": -91.01750183105469, "logps/rejected": -87.39571380615234, "loss": 1.1056, "margin_dpo/margin_mean": 1.366194486618042, "margin_dpo/margin_std": 2.439441204071045, "step": 516 }, { "epoch": 0.781557067271353, "fcm_dpo/beta": 0.5124858617782593, "fcm_dpo/delta": -0.02756066992878914, "fcm_dpo/margin": 1.7076544761657715, "fcm_dpo/q_t": 0.3710241913795471, "grad_norm": 76.47826385498047, "learning_rate": 6.998145243993284e-08, "logits/chosen": 1.9861897230148315, "logits/rejected": 1.9750926494598389, "logps/chosen": -72.4866714477539, "logps/ref_chosen": -77.06817626953125, "logps/ref_rejected": -80.048583984375, "logps/rejected": -77.17472839355469, "loss": 1.1053, "margin_dpo/margin_mean": 1.7076544761657715, "margin_dpo/margin_std": 3.1321678161621094, "step": 517 }, { "epoch": 0.783068783068783, "fcm_dpo/beta": 0.526599109172821, "fcm_dpo/delta": 0.13619472086429596, "fcm_dpo/margin": 1.3683173656463623, "fcm_dpo/q_t": 0.3868216872215271, "grad_norm": 79.98131561279297, "learning_rate": 6.906649047373245e-08, "logits/chosen": 1.6740235090255737, "logits/rejected": 1.5478146076202393, "logps/chosen": -73.65196228027344, "logps/ref_chosen": -78.69026184082031, "logps/ref_rejected": -97.58124542236328, "logps/rejected": -93.9112548828125, "loss": 1.191, "margin_dpo/margin_mean": 1.3683173656463623, "margin_dpo/margin_std": 2.781571388244629, "step": 518 }, { "epoch": 0.7845804988662132, "fcm_dpo/beta": 0.5283833742141724, "fcm_dpo/delta": 0.06050370633602142, "fcm_dpo/margin": 0.9821269512176514, "fcm_dpo/q_t": 0.4151061177253723, "grad_norm": 106.65807342529297, "learning_rate": 6.815658960673781e-08, "logits/chosen": 2.014273166656494, "logits/rejected": 1.9104008674621582, "logps/chosen": -73.96952819824219, "logps/ref_chosen": -78.35087585449219, "logps/ref_rejected": -95.79212188720703, "logps/rejected": -92.39290618896484, "loss": 1.4895, "margin_dpo/margin_mean": 0.9821275472640991, "margin_dpo/margin_std": 3.163806915283203, "step": 519 }, { "epoch": 0.7860922146636432, "fcm_dpo/beta": 0.5247069597244263, "fcm_dpo/delta": 0.002389952540397644, "fcm_dpo/margin": 1.6092243194580078, "fcm_dpo/q_t": 0.345553994178772, "grad_norm": 80.86093139648438, "learning_rate": 6.725177529083209e-08, "logits/chosen": 1.7643589973449707, "logits/rejected": 1.607407808303833, "logps/chosen": -75.44636535644531, "logps/ref_chosen": -80.40513610839844, "logps/ref_rejected": -93.02791595458984, "logps/rejected": -89.67837524414062, "loss": 0.9769, "margin_dpo/margin_mean": 1.6092244386672974, "margin_dpo/margin_std": 2.20902681350708, "step": 520 }, { "epoch": 0.7876039304610734, "fcm_dpo/beta": 0.5398433208465576, "fcm_dpo/delta": 0.026781171560287476, "fcm_dpo/margin": 1.5242526531219482, "fcm_dpo/q_t": 0.3553396463394165, "grad_norm": 88.6592025756836, "learning_rate": 6.63520728356167e-08, "logits/chosen": 1.613523006439209, "logits/rejected": 1.4470891952514648, "logps/chosen": -81.82543182373047, "logps/ref_chosen": -86.5218276977539, "logps/ref_rejected": -109.20257568359375, "logps/rejected": -106.03044128417969, "loss": 1.0274, "margin_dpo/margin_mean": 1.5242516994476318, "margin_dpo/margin_std": 2.3445534706115723, "step": 521 }, { "epoch": 0.7891156462585034, "fcm_dpo/beta": 0.5545932650566101, "fcm_dpo/delta": 0.17674265801906586, "fcm_dpo/margin": 1.2288824319839478, "fcm_dpo/q_t": 0.3987041711807251, "grad_norm": 93.76902770996094, "learning_rate": 6.545750740770336e-08, "logits/chosen": 1.6111791133880615, "logits/rejected": 1.6222134828567505, "logps/chosen": -73.34974670410156, "logps/ref_chosen": -78.24254608154297, "logps/ref_rejected": -85.23554992675781, "logps/rejected": -81.57162475585938, "loss": 1.3201, "margin_dpo/margin_mean": 1.2288823127746582, "margin_dpo/margin_std": 2.988895893096924, "step": 522 }, { "epoch": 0.7906273620559335, "fcm_dpo/beta": 0.5467442870140076, "fcm_dpo/delta": -0.03936249762773514, "fcm_dpo/margin": 1.6179664134979248, "fcm_dpo/q_t": 0.33749115467071533, "grad_norm": 95.7950439453125, "learning_rate": 6.456810403001012e-08, "logits/chosen": 2.068002223968506, "logits/rejected": 1.8295702934265137, "logps/chosen": -78.77156066894531, "logps/ref_chosen": -83.50096893310547, "logps/ref_rejected": -117.45217895507812, "logps/rejected": -114.34073638916016, "loss": 1.0423, "margin_dpo/margin_mean": 1.6179664134979248, "margin_dpo/margin_std": 2.464113712310791, "step": 523 }, { "epoch": 0.7921390778533636, "fcm_dpo/beta": 0.5638613104820251, "fcm_dpo/delta": 0.1269315630197525, "fcm_dpo/margin": 1.2957689762115479, "fcm_dpo/q_t": 0.36413639783859253, "grad_norm": 103.0126953125, "learning_rate": 6.368388758106134e-08, "logits/chosen": 1.544802188873291, "logits/rejected": 1.5172946453094482, "logps/chosen": -87.98350524902344, "logps/ref_chosen": -93.22590637207031, "logps/ref_rejected": -108.17863464355469, "logps/rejected": -104.23199462890625, "loss": 1.123, "margin_dpo/margin_mean": 1.2957689762115479, "margin_dpo/margin_std": 2.3095810413360596, "step": 524 }, { "epoch": 0.7936507936507936, "fcm_dpo/beta": 0.5804117918014526, "fcm_dpo/delta": 0.23566076159477234, "fcm_dpo/margin": 1.0851058959960938, "fcm_dpo/q_t": 0.40458396077156067, "grad_norm": 105.79865264892578, "learning_rate": 6.280488279429185e-08, "logits/chosen": 1.4469276666641235, "logits/rejected": 1.4357231855392456, "logps/chosen": -89.34066772460938, "logps/ref_chosen": -94.08831787109375, "logps/ref_rejected": -100.682373046875, "logps/rejected": -97.01982116699219, "loss": 1.3254, "margin_dpo/margin_mean": 1.085106372833252, "margin_dpo/margin_std": 2.825222969055176, "step": 525 }, { "epoch": 0.7951625094482238, "fcm_dpo/beta": 0.5839090347290039, "fcm_dpo/delta": 0.07317899167537689, "fcm_dpo/margin": 1.3251323699951172, "fcm_dpo/q_t": 0.36826279759407043, "grad_norm": 86.31532287597656, "learning_rate": 6.193111425735515e-08, "logits/chosen": 1.864978551864624, "logits/rejected": 1.7237721681594849, "logps/chosen": -73.06343841552734, "logps/ref_chosen": -77.78373718261719, "logps/ref_rejected": -100.29583740234375, "logps/rejected": -96.90068054199219, "loss": 1.1024, "margin_dpo/margin_mean": 1.3251322507858276, "margin_dpo/margin_std": 2.194312810897827, "step": 526 }, { "epoch": 0.7966742252456538, "fcm_dpo/beta": 0.627768874168396, "fcm_dpo/delta": 0.26169681549072266, "fcm_dpo/margin": 0.957957923412323, "fcm_dpo/q_t": 0.3964363932609558, "grad_norm": 133.1971435546875, "learning_rate": 6.106260641143546e-08, "logits/chosen": 2.207190990447998, "logits/rejected": 2.003704786300659, "logps/chosen": -72.29412841796875, "logps/ref_chosen": -76.695068359375, "logps/ref_rejected": -107.68281555175781, "logps/rejected": -104.23982238769531, "loss": 1.3457, "margin_dpo/margin_mean": 0.9579578638076782, "margin_dpo/margin_std": 2.4768576622009277, "step": 527 }, { "epoch": 0.7981859410430839, "fcm_dpo/beta": 0.6325068473815918, "fcm_dpo/delta": 0.1113191694021225, "fcm_dpo/margin": 1.176019310951233, "fcm_dpo/q_t": 0.37600845098495483, "grad_norm": 96.25592803955078, "learning_rate": 6.019938355056422e-08, "logits/chosen": 1.7700958251953125, "logits/rejected": 1.6433663368225098, "logps/chosen": -70.225341796875, "logps/ref_chosen": -75.0361328125, "logps/ref_rejected": -94.67579650878906, "logps/rejected": -91.04102325439453, "loss": 1.3686, "margin_dpo/margin_mean": 1.1760194301605225, "margin_dpo/margin_std": 2.789802074432373, "step": 528 }, { "epoch": 0.799697656840514, "fcm_dpo/beta": 0.5813958644866943, "fcm_dpo/delta": -0.5735065340995789, "fcm_dpo/margin": 2.284200668334961, "fcm_dpo/q_t": 0.28090566396713257, "grad_norm": 72.14741516113281, "learning_rate": 5.934146982094049e-08, "logits/chosen": 1.7006018161773682, "logits/rejected": 1.5770983695983887, "logps/chosen": -67.83549499511719, "logps/ref_chosen": -72.84869384765625, "logps/ref_rejected": -93.25855255126953, "logps/rejected": -90.52955627441406, "loss": 0.7807, "margin_dpo/margin_mean": 2.284201145172119, "margin_dpo/margin_std": 2.4762625694274902, "step": 529 }, { "epoch": 0.8012093726379441, "fcm_dpo/beta": 0.5802004337310791, "fcm_dpo/delta": 0.04107225686311722, "fcm_dpo/margin": 1.399862289428711, "fcm_dpo/q_t": 0.36792880296707153, "grad_norm": 95.07054138183594, "learning_rate": 5.848888922025552e-08, "logits/chosen": 1.9237902164459229, "logits/rejected": 1.8377227783203125, "logps/chosen": -74.80259704589844, "logps/ref_chosen": -79.4971694946289, "logps/ref_rejected": -93.59564208984375, "logps/rejected": -90.3009262084961, "loss": 1.1697, "margin_dpo/margin_mean": 1.3998620510101318, "margin_dpo/margin_std": 2.7202823162078857, "step": 530 }, { "epoch": 0.8027210884353742, "fcm_dpo/beta": 0.6038322448730469, "fcm_dpo/delta": 0.12020966410636902, "fcm_dpo/margin": 1.2052103281021118, "fcm_dpo/q_t": 0.36855727434158325, "grad_norm": 97.9964599609375, "learning_rate": 5.7641665597021435e-08, "logits/chosen": 1.6562883853912354, "logits/rejected": 1.561152458190918, "logps/chosen": -64.65231323242188, "logps/ref_chosen": -69.45396423339844, "logps/ref_rejected": -96.30017852783203, "logps/rejected": -92.70374298095703, "loss": 1.1081, "margin_dpo/margin_mean": 1.2052104473114014, "margin_dpo/margin_std": 2.0545098781585693, "step": 531 }, { "epoch": 0.8042328042328042, "fcm_dpo/beta": 0.5897899866104126, "fcm_dpo/delta": -0.0036588534712791443, "fcm_dpo/margin": 1.4446355104446411, "fcm_dpo/q_t": 0.35917529463768005, "grad_norm": 97.351806640625, "learning_rate": 5.679982264990424e-08, "logits/chosen": 1.3556745052337646, "logits/rejected": 1.2717360258102417, "logps/chosen": -72.01132202148438, "logps/ref_chosen": -76.52011108398438, "logps/ref_rejected": -94.79593658447266, "logps/rejected": -91.73179626464844, "loss": 1.0801, "margin_dpo/margin_mean": 1.4446359872817993, "margin_dpo/margin_std": 2.4376420974731445, "step": 532 }, { "epoch": 0.8057445200302343, "fcm_dpo/beta": 0.5956892967224121, "fcm_dpo/delta": -0.05127408355474472, "fcm_dpo/margin": 1.5031523704528809, "fcm_dpo/q_t": 0.35674595832824707, "grad_norm": 99.48766326904297, "learning_rate": 5.596338392706076e-08, "logits/chosen": 2.067509412765503, "logits/rejected": 1.904585838317871, "logps/chosen": -67.12313842773438, "logps/ref_chosen": -72.31800842285156, "logps/ref_rejected": -89.26652526855469, "logps/rejected": -85.57481384277344, "loss": 1.1153, "margin_dpo/margin_mean": 1.5031521320343018, "margin_dpo/margin_std": 2.5594966411590576, "step": 533 }, { "epoch": 0.8072562358276644, "fcm_dpo/beta": 0.5751690864562988, "fcm_dpo/delta": -0.005829840898513794, "fcm_dpo/margin": 1.4704824686050415, "fcm_dpo/q_t": 0.34591174125671387, "grad_norm": 92.95126342773438, "learning_rate": 5.513237282548033e-08, "logits/chosen": 1.5100412368774414, "logits/rejected": 1.4817965030670166, "logps/chosen": -72.63851928710938, "logps/ref_chosen": -77.87559509277344, "logps/ref_rejected": -92.21171569824219, "logps/rejected": -88.44511413574219, "loss": 1.1029, "margin_dpo/margin_mean": 1.4704830646514893, "margin_dpo/margin_std": 2.3595573902130127, "step": 534 }, { "epoch": 0.8087679516250945, "fcm_dpo/beta": 0.5866076946258545, "fcm_dpo/delta": -0.038706980645656586, "fcm_dpo/margin": 1.5089240074157715, "fcm_dpo/q_t": 0.35734421014785767, "grad_norm": 90.3484115600586, "learning_rate": 5.430681259032957e-08, "logits/chosen": 1.4031028747558594, "logits/rejected": 1.2734661102294922, "logps/chosen": -73.26467895507812, "logps/ref_chosen": -78.16358184814453, "logps/ref_rejected": -97.78164672851562, "logps/rejected": -94.39166259765625, "loss": 1.1293, "margin_dpo/margin_mean": 1.5089242458343506, "margin_dpo/margin_std": 2.7176802158355713, "step": 535 }, { "epoch": 0.8102796674225246, "fcm_dpo/beta": 0.5691394805908203, "fcm_dpo/delta": -0.13241420686244965, "fcm_dpo/margin": 1.7017556428909302, "fcm_dpo/q_t": 0.3234456777572632, "grad_norm": 75.58480834960938, "learning_rate": 5.3486726314303175e-08, "logits/chosen": 1.7192052602767944, "logits/rejected": 1.5519870519638062, "logps/chosen": -61.61140823364258, "logps/ref_chosen": -66.65623474121094, "logps/ref_rejected": -89.49085998535156, "logps/rejected": -86.14779663085938, "loss": 0.9111, "margin_dpo/margin_mean": 1.7017555236816406, "margin_dpo/margin_std": 2.1126914024353027, "step": 536 }, { "epoch": 0.8117913832199547, "fcm_dpo/beta": 0.563645601272583, "fcm_dpo/delta": -0.042136892676353455, "fcm_dpo/margin": 1.5759865045547485, "fcm_dpo/q_t": 0.346375048160553, "grad_norm": 74.55496215820312, "learning_rate": 5.267213693697695e-08, "logits/chosen": 1.7074635028839111, "logits/rejected": 1.5484142303466797, "logps/chosen": -70.44686126708984, "logps/ref_chosen": -74.99390411376953, "logps/ref_rejected": -110.6627197265625, "logps/rejected": -107.69166564941406, "loss": 1.0427, "margin_dpo/margin_mean": 1.5759867429733276, "margin_dpo/margin_std": 2.441058397293091, "step": 537 }, { "epoch": 0.8133030990173847, "fcm_dpo/beta": 0.5549603700637817, "fcm_dpo/delta": -0.16347691416740417, "fcm_dpo/margin": 1.796407699584961, "fcm_dpo/q_t": 0.32237929105758667, "grad_norm": 88.10001373291016, "learning_rate": 5.1863067244167144e-08, "logits/chosen": 1.4939422607421875, "logits/rejected": 1.4379546642303467, "logps/chosen": -82.83648681640625, "logps/ref_chosen": -87.61151123046875, "logps/ref_rejected": -98.1150131225586, "logps/rejected": -95.13639068603516, "loss": 0.9309, "margin_dpo/margin_mean": 1.79640793800354, "margin_dpo/margin_std": 2.3823800086975098, "step": 538 }, { "epoch": 0.8148148148148148, "fcm_dpo/beta": 0.5492007732391357, "fcm_dpo/delta": 0.0604596883058548, "fcm_dpo/margin": 1.4464352130889893, "fcm_dpo/q_t": 0.3573772609233856, "grad_norm": 86.81356811523438, "learning_rate": 5.105953986729195e-08, "logits/chosen": 1.5843884944915771, "logits/rejected": 1.3659675121307373, "logps/chosen": -74.08658599853516, "logps/ref_chosen": -78.86482238769531, "logps/ref_rejected": -100.84349822998047, "logps/rejected": -97.5116958618164, "loss": 1.0038, "margin_dpo/margin_mean": 1.446435809135437, "margin_dpo/margin_std": 2.100681781768799, "step": 539 }, { "epoch": 0.8163265306122449, "fcm_dpo/beta": 0.5411156415939331, "fcm_dpo/delta": -0.14517144858837128, "fcm_dpo/margin": 1.8131245374679565, "fcm_dpo/q_t": 0.322839617729187, "grad_norm": 92.86735534667969, "learning_rate": 5.026157728273966e-08, "logits/chosen": 1.8254791498184204, "logits/rejected": 1.632624626159668, "logps/chosen": -78.81214141845703, "logps/ref_chosen": -83.66409301757812, "logps/ref_rejected": -114.8860092163086, "logps/rejected": -111.84718322753906, "loss": 1.0435, "margin_dpo/margin_mean": 1.813124656677246, "margin_dpo/margin_std": 2.7530481815338135, "step": 540 }, { "epoch": 0.817838246409675, "fcm_dpo/beta": 0.5307464003562927, "fcm_dpo/delta": -0.04240588843822479, "fcm_dpo/margin": 1.6736271381378174, "fcm_dpo/q_t": 0.3363361060619354, "grad_norm": 88.91351318359375, "learning_rate": 4.9469201811239035e-08, "logits/chosen": 1.5942778587341309, "logits/rejected": 1.6136151552200317, "logps/chosen": -78.06910705566406, "logps/ref_chosen": -83.12225341796875, "logps/ref_rejected": -74.80526733398438, "logps/rejected": -71.42574310302734, "loss": 1.0115, "margin_dpo/margin_mean": 1.6736273765563965, "margin_dpo/margin_std": 2.4181206226348877, "step": 541 }, { "epoch": 0.8193499622071051, "fcm_dpo/beta": 0.5184097290039062, "fcm_dpo/delta": -0.11768058687448502, "fcm_dpo/margin": 1.8414095640182495, "fcm_dpo/q_t": 0.3354414105415344, "grad_norm": 68.62602233886719, "learning_rate": 4.868243561723534e-08, "logits/chosen": 1.882128119468689, "logits/rejected": 1.7743515968322754, "logps/chosen": -60.99085235595703, "logps/ref_chosen": -66.3132553100586, "logps/ref_rejected": -83.24588012695312, "logps/rejected": -79.764892578125, "loss": 0.9949, "margin_dpo/margin_mean": 1.8414098024368286, "margin_dpo/margin_std": 2.648585557937622, "step": 542 }, { "epoch": 0.8208616780045351, "fcm_dpo/beta": 0.5239220857620239, "fcm_dpo/delta": 0.005960091948509216, "fcm_dpo/margin": 1.6092872619628906, "fcm_dpo/q_t": 0.3501929044723511, "grad_norm": 77.4872817993164, "learning_rate": 4.790130070827028e-08, "logits/chosen": 1.893631100654602, "logits/rejected": 1.6607489585876465, "logps/chosen": -63.158504486083984, "logps/ref_chosen": -68.11429595947266, "logps/ref_rejected": -94.62380981445312, "logps/rejected": -91.27730560302734, "loss": 1.0308, "margin_dpo/margin_mean": 1.6092875003814697, "margin_dpo/margin_std": 2.471250057220459, "step": 543 }, { "epoch": 0.8223733938019653, "fcm_dpo/beta": 0.49690988659858704, "fcm_dpo/delta": -0.31122803688049316, "fcm_dpo/margin": 2.2667787075042725, "fcm_dpo/q_t": 0.3194485902786255, "grad_norm": 66.967529296875, "learning_rate": 4.7125818934366454e-08, "logits/chosen": 1.8328646421432495, "logits/rejected": 1.6826552152633667, "logps/chosen": -76.01614379882812, "logps/ref_chosen": -81.187255859375, "logps/ref_rejected": -105.84722900390625, "logps/rejected": -102.9428939819336, "loss": 0.9294, "margin_dpo/margin_mean": 2.2667789459228516, "margin_dpo/margin_std": 3.020232677459717, "step": 544 }, { "epoch": 0.8238851095993953, "fcm_dpo/beta": 0.4952470362186432, "fcm_dpo/delta": 0.13346421718597412, "fcm_dpo/margin": 1.4665462970733643, "fcm_dpo/q_t": 0.37403714656829834, "grad_norm": 81.59075927734375, "learning_rate": 4.635601198741607e-08, "logits/chosen": 1.6682891845703125, "logits/rejected": 1.5271377563476562, "logps/chosen": -74.03522491455078, "logps/ref_chosen": -78.81717681884766, "logps/ref_rejected": -98.65876770019531, "logps/rejected": -95.3433609008789, "loss": 1.0963, "margin_dpo/margin_mean": 1.4665460586547852, "margin_dpo/margin_std": 2.5844061374664307, "step": 545 }, { "epoch": 0.8253968253968254, "fcm_dpo/beta": 0.5028541088104248, "fcm_dpo/delta": -0.008033901453018188, "fcm_dpo/margin": 1.704458236694336, "fcm_dpo/q_t": 0.3371015191078186, "grad_norm": 72.56092834472656, "learning_rate": 4.559190140057428e-08, "logits/chosen": 1.9643224477767944, "logits/rejected": 1.9538801908493042, "logps/chosen": -68.92066955566406, "logps/ref_chosen": -74.2529296875, "logps/ref_rejected": -80.32308959960938, "logps/rejected": -76.6952896118164, "loss": 0.9537, "margin_dpo/margin_mean": 1.704458236694336, "margin_dpo/margin_std": 2.263866901397705, "step": 546 }, { "epoch": 0.8269085411942555, "fcm_dpo/beta": 0.49540919065475464, "fcm_dpo/delta": -0.06381530314683914, "fcm_dpo/margin": 1.8326653242111206, "fcm_dpo/q_t": 0.32726162672042847, "grad_norm": 64.46299743652344, "learning_rate": 4.483350854765672e-08, "logits/chosen": 1.5813419818878174, "logits/rejected": 1.4572780132293701, "logps/chosen": -64.79362487792969, "logps/ref_chosen": -69.9368896484375, "logps/ref_rejected": -90.25672912597656, "logps/rejected": -86.94613647460938, "loss": 0.9145, "margin_dpo/margin_mean": 1.8326648473739624, "margin_dpo/margin_std": 2.2874417304992676, "step": 547 }, { "epoch": 0.8284202569916855, "fcm_dpo/beta": 0.5134807825088501, "fcm_dpo/delta": 0.1882130205631256, "fcm_dpo/margin": 1.306201696395874, "fcm_dpo/q_t": 0.3803349733352661, "grad_norm": 88.97979736328125, "learning_rate": 4.4080854642541826e-08, "logits/chosen": 1.4894611835479736, "logits/rejected": 1.3644602298736572, "logps/chosen": -76.54277801513672, "logps/ref_chosen": -81.1605224609375, "logps/ref_rejected": -99.7246322631836, "logps/rejected": -96.4130859375, "loss": 1.1116, "margin_dpo/margin_mean": 1.306201457977295, "margin_dpo/margin_std": 2.309016704559326, "step": 548 }, { "epoch": 0.8299319727891157, "fcm_dpo/beta": 0.5256662964820862, "fcm_dpo/delta": 0.1741892248392105, "fcm_dpo/margin": 1.3089189529418945, "fcm_dpo/q_t": 0.3829908072948456, "grad_norm": 93.77330780029297, "learning_rate": 4.333396073857723e-08, "logits/chosen": 2.2063562870025635, "logits/rejected": 2.0459249019622803, "logps/chosen": -75.51959991455078, "logps/ref_chosen": -80.49800872802734, "logps/ref_rejected": -113.20750427246094, "logps/rejected": -109.53801727294922, "loss": 1.2299, "margin_dpo/margin_mean": 1.3089196681976318, "margin_dpo/margin_std": 2.8981709480285645, "step": 549 }, { "epoch": 0.8314436885865457, "fcm_dpo/beta": 0.5375339984893799, "fcm_dpo/delta": 0.15981407463550568, "fcm_dpo/margin": 1.30253267288208, "fcm_dpo/q_t": 0.37385594844818115, "grad_norm": 77.2861328125, "learning_rate": 4.259284772799099e-08, "logits/chosen": 1.5224547386169434, "logits/rejected": 1.4622929096221924, "logps/chosen": -70.20506286621094, "logps/ref_chosen": -75.13760375976562, "logps/ref_rejected": -79.04876708984375, "logps/rejected": -75.41875457763672, "loss": 1.1472, "margin_dpo/margin_mean": 1.30253267288208, "margin_dpo/margin_std": 2.410634994506836, "step": 550 }, { "epoch": 0.8329554043839759, "fcm_dpo/beta": 0.5626946091651917, "fcm_dpo/delta": 0.08639901876449585, "fcm_dpo/margin": 1.362003207206726, "fcm_dpo/q_t": 0.35773536562919617, "grad_norm": 93.26870727539062, "learning_rate": 4.1857536341307176e-08, "logits/chosen": 1.9698476791381836, "logits/rejected": 1.842726230621338, "logps/chosen": -80.63394927978516, "logps/ref_chosen": -85.4496078491211, "logps/ref_rejected": -103.48530578613281, "logps/rejected": -100.03164672851562, "loss": 1.0683, "margin_dpo/margin_mean": 1.3620030879974365, "margin_dpo/margin_std": 2.265345573425293, "step": 551 }, { "epoch": 0.8344671201814059, "fcm_dpo/beta": 0.5713049173355103, "fcm_dpo/delta": 0.01876942813396454, "fcm_dpo/margin": 1.443709135055542, "fcm_dpo/q_t": 0.3453782796859741, "grad_norm": 87.71670532226562, "learning_rate": 4.112804714676593e-08, "logits/chosen": 1.630192518234253, "logits/rejected": 1.5265223979949951, "logps/chosen": -76.890380859375, "logps/ref_chosen": -82.01036071777344, "logps/ref_rejected": -101.61884307861328, "logps/rejected": -97.94257354736328, "loss": 1.0856, "margin_dpo/margin_mean": 1.4437094926834106, "margin_dpo/margin_std": 2.2685065269470215, "step": 552 }, { "epoch": 0.8359788359788359, "fcm_dpo/beta": 0.5514621138572693, "fcm_dpo/delta": -0.06148216500878334, "fcm_dpo/margin": 1.640283465385437, "fcm_dpo/q_t": 0.3572534918785095, "grad_norm": 92.91000366210938, "learning_rate": 4.0404400549748144e-08, "logits/chosen": 1.8313771486282349, "logits/rejected": 1.6508121490478516, "logps/chosen": -69.30296325683594, "logps/ref_chosen": -73.81416320800781, "logps/ref_rejected": -104.27050018310547, "logps/rejected": -101.39958953857422, "loss": 1.1162, "margin_dpo/margin_mean": 1.6402831077575684, "margin_dpo/margin_std": 2.8277735710144043, "step": 553 }, { "epoch": 0.8374905517762661, "fcm_dpo/beta": 0.5503508448600769, "fcm_dpo/delta": -0.00613846629858017, "fcm_dpo/margin": 1.5537428855895996, "fcm_dpo/q_t": 0.3472326695919037, "grad_norm": 82.89059448242188, "learning_rate": 3.968661679220467e-08, "logits/chosen": 1.3481284379959106, "logits/rejected": 1.3186860084533691, "logps/chosen": -76.67272186279297, "logps/ref_chosen": -81.43980407714844, "logps/ref_rejected": -89.32518005371094, "logps/rejected": -86.11184692382812, "loss": 1.0697, "margin_dpo/margin_mean": 1.55374276638031, "margin_dpo/margin_std": 2.5019257068634033, "step": 554 }, { "epoch": 0.8390022675736961, "fcm_dpo/beta": 0.5594339370727539, "fcm_dpo/delta": -0.010179772973060608, "fcm_dpo/margin": 1.5303804874420166, "fcm_dpo/q_t": 0.3368152379989624, "grad_norm": 85.1632308959961, "learning_rate": 3.89747159520904e-08, "logits/chosen": 1.6793913841247559, "logits/rejected": 1.6269229650497437, "logps/chosen": -76.93789672851562, "logps/ref_chosen": -81.66071319580078, "logps/ref_rejected": -87.20857238769531, "logps/rejected": -84.01614379882812, "loss": 1.0866, "margin_dpo/margin_mean": 1.5303804874420166, "margin_dpo/margin_std": 2.4207754135131836, "step": 555 }, { "epoch": 0.8405139833711263, "fcm_dpo/beta": 0.5653465986251831, "fcm_dpo/delta": 0.14113594591617584, "fcm_dpo/margin": 1.2706456184387207, "fcm_dpo/q_t": 0.38003456592559814, "grad_norm": 89.62040710449219, "learning_rate": 3.826871794280192e-08, "logits/chosen": 1.7433538436889648, "logits/rejected": 1.6561615467071533, "logps/chosen": -61.646141052246094, "logps/ref_chosen": -66.02448272705078, "logps/ref_rejected": -82.74746704101562, "logps/rejected": -79.6397705078125, "loss": 1.2086, "margin_dpo/margin_mean": 1.2706454992294312, "margin_dpo/margin_std": 2.578793525695801, "step": 556 }, { "epoch": 0.8420256991685563, "fcm_dpo/beta": 0.5589163303375244, "fcm_dpo/delta": -0.01051899790763855, "fcm_dpo/margin": 1.53108811378479, "fcm_dpo/q_t": 0.3562447428703308, "grad_norm": 100.4636459350586, "learning_rate": 3.756864251262143e-08, "logits/chosen": 1.7672959566116333, "logits/rejected": 1.5255687236785889, "logps/chosen": -68.58070373535156, "logps/ref_chosen": -73.08985900878906, "logps/ref_rejected": -97.43034362792969, "logps/rejected": -94.45227813720703, "loss": 1.1511, "margin_dpo/margin_mean": 1.5310877561569214, "margin_dpo/margin_std": 2.7770981788635254, "step": 557 }, { "epoch": 0.8435374149659864, "fcm_dpo/beta": 0.5567211508750916, "fcm_dpo/delta": -0.06094827130436897, "fcm_dpo/margin": 1.623206615447998, "fcm_dpo/q_t": 0.33581194281578064, "grad_norm": 91.5750732421875, "learning_rate": 3.687450924416341e-08, "logits/chosen": 1.9485085010528564, "logits/rejected": 1.8674168586730957, "logps/chosen": -75.47303771972656, "logps/ref_chosen": -80.1357192993164, "logps/ref_rejected": -106.65797424316406, "logps/rejected": -103.61849975585938, "loss": 1.0374, "margin_dpo/margin_mean": 1.6232068538665771, "margin_dpo/margin_std": 2.4886608123779297, "step": 558 }, { "epoch": 0.8450491307634165, "fcm_dpo/beta": 0.5594509840011597, "fcm_dpo/delta": 0.08693183213472366, "fcm_dpo/margin": 1.3717814683914185, "fcm_dpo/q_t": 0.37605616450309753, "grad_norm": 99.626953125, "learning_rate": 3.6186337553827743e-08, "logits/chosen": 1.522691011428833, "logits/rejected": 1.3501641750335693, "logps/chosen": -74.56044006347656, "logps/ref_chosen": -79.42267608642578, "logps/ref_rejected": -98.59402465820312, "logps/rejected": -95.10356140136719, "loss": 1.2814, "margin_dpo/margin_mean": 1.371781349182129, "margin_dpo/margin_std": 2.9634933471679688, "step": 559 }, { "epoch": 0.8465608465608465, "fcm_dpo/beta": 0.5630248785018921, "fcm_dpo/delta": -0.13770724833011627, "fcm_dpo/margin": 1.7297303676605225, "fcm_dpo/q_t": 0.3417326807975769, "grad_norm": 83.30538177490234, "learning_rate": 3.550414669125573e-08, "logits/chosen": 1.4917066097259521, "logits/rejected": 1.4372332096099854, "logps/chosen": -72.65574645996094, "logps/ref_chosen": -77.49559020996094, "logps/ref_rejected": -92.61347961425781, "logps/rejected": -89.50337219238281, "loss": 1.0605, "margin_dpo/margin_mean": 1.7297307252883911, "margin_dpo/margin_std": 2.701158046722412, "step": 560 }, { "epoch": 0.8480725623582767, "fcm_dpo/beta": 0.5409590005874634, "fcm_dpo/delta": -0.1466280072927475, "fcm_dpo/margin": 1.8150770664215088, "fcm_dpo/q_t": 0.349008172750473, "grad_norm": 78.01751708984375, "learning_rate": 3.482795573879241e-08, "logits/chosen": 1.7881979942321777, "logits/rejected": 1.7428940534591675, "logps/chosen": -74.37679290771484, "logps/ref_chosen": -79.20771789550781, "logps/ref_rejected": -93.46514892578125, "logps/rejected": -90.44929504394531, "loss": 1.0416, "margin_dpo/margin_mean": 1.8150770664215088, "margin_dpo/margin_std": 2.915646553039551, "step": 561 }, { "epoch": 0.8495842781557067, "fcm_dpo/beta": 0.5316770076751709, "fcm_dpo/delta": -0.06161924824118614, "fcm_dpo/margin": 1.7036185264587402, "fcm_dpo/q_t": 0.3406429886817932, "grad_norm": 77.7786865234375, "learning_rate": 3.415778361095226e-08, "logits/chosen": 1.9883166551589966, "logits/rejected": 1.8916831016540527, "logps/chosen": -90.15193176269531, "logps/ref_chosen": -94.88652801513672, "logps/ref_rejected": -109.33815002441406, "logps/rejected": -106.30716705322266, "loss": 1.0043, "margin_dpo/margin_mean": 1.7036182880401611, "margin_dpo/margin_std": 2.4467196464538574, "step": 562 }, { "epoch": 0.8510959939531368, "fcm_dpo/beta": 0.5239354372024536, "fcm_dpo/delta": -0.08977065980434418, "fcm_dpo/margin": 1.7775870561599731, "fcm_dpo/q_t": 0.34091219305992126, "grad_norm": 75.40060424804688, "learning_rate": 3.349364905389032e-08, "logits/chosen": 1.6115353107452393, "logits/rejected": 1.4812626838684082, "logps/chosen": -60.68045425415039, "logps/ref_chosen": -65.90719604492188, "logps/ref_rejected": -84.07121276855469, "logps/rejected": -80.62205505371094, "loss": 1.0734, "margin_dpo/margin_mean": 1.7775870561599731, "margin_dpo/margin_std": 2.815526247024536, "step": 563 }, { "epoch": 0.8526077097505669, "fcm_dpo/beta": 0.5255653858184814, "fcm_dpo/delta": 0.024834435433149338, "fcm_dpo/margin": 1.5733851194381714, "fcm_dpo/q_t": 0.35912322998046875, "grad_norm": 86.95561218261719, "learning_rate": 3.283557064487785e-08, "logits/chosen": 1.7449924945831299, "logits/rejected": 1.6827529668807983, "logps/chosen": -67.22463989257812, "logps/ref_chosen": -72.32071685791016, "logps/ref_rejected": -88.05014038085938, "logps/rejected": -84.52745056152344, "loss": 1.1226, "margin_dpo/margin_mean": 1.5733850002288818, "margin_dpo/margin_std": 2.7897634506225586, "step": 564 }, { "epoch": 0.854119425547997, "fcm_dpo/beta": 0.528499960899353, "fcm_dpo/delta": 0.007532309740781784, "fcm_dpo/margin": 1.5936261415481567, "fcm_dpo/q_t": 0.34886032342910767, "grad_norm": 86.20529174804688, "learning_rate": 3.218356679178252e-08, "logits/chosen": 2.053807258605957, "logits/rejected": 1.9824556112289429, "logps/chosen": -75.60353088378906, "logps/ref_chosen": -80.18453979492188, "logps/ref_rejected": -99.55126953125, "logps/rejected": -96.56388854980469, "loss": 1.0384, "margin_dpo/margin_mean": 1.5936262607574463, "margin_dpo/margin_std": 2.4494309425354004, "step": 565 }, { "epoch": 0.8556311413454271, "fcm_dpo/beta": 0.5436303615570068, "fcm_dpo/delta": 0.09894217550754547, "fcm_dpo/margin": 1.3781930208206177, "fcm_dpo/q_t": 0.3776889443397522, "grad_norm": 96.017578125, "learning_rate": 3.1537655732553764e-08, "logits/chosen": 1.9359018802642822, "logits/rejected": 1.872054100036621, "logps/chosen": -83.3899154663086, "logps/ref_chosen": -88.0877914428711, "logps/ref_rejected": -87.7589111328125, "logps/rejected": -84.43922424316406, "loss": 1.2347, "margin_dpo/margin_mean": 1.3781930208206177, "margin_dpo/margin_std": 2.8941593170166016, "step": 566 }, { "epoch": 0.8571428571428571, "fcm_dpo/beta": 0.530328631401062, "fcm_dpo/delta": -0.04391162469983101, "fcm_dpo/margin": 1.6774555444717407, "fcm_dpo/q_t": 0.33421647548675537, "grad_norm": 78.94988250732422, "learning_rate": 3.089785553471233e-08, "logits/chosen": 1.9912657737731934, "logits/rejected": 1.7774717807769775, "logps/chosen": -65.12935638427734, "logps/ref_chosen": -69.93267822265625, "logps/ref_rejected": -95.71786499023438, "logps/rejected": -92.59199523925781, "loss": 0.9518, "margin_dpo/margin_mean": 1.6774554252624512, "margin_dpo/margin_std": 2.2273342609405518, "step": 567 }, { "epoch": 0.8586545729402872, "fcm_dpo/beta": 0.5215575695037842, "fcm_dpo/delta": -0.2450091391801834, "fcm_dpo/margin": 2.045576572418213, "fcm_dpo/q_t": 0.3052712678909302, "grad_norm": 66.90857696533203, "learning_rate": 3.026418409484513e-08, "logits/chosen": 1.7025278806686401, "logits/rejected": 1.5524958372116089, "logps/chosen": -65.14755249023438, "logps/ref_chosen": -70.33343505859375, "logps/ref_rejected": -108.86271667480469, "logps/rejected": -105.722412109375, "loss": 0.8475, "margin_dpo/margin_mean": 2.045576572418213, "margin_dpo/margin_std": 2.2456419467926025, "step": 568 }, { "epoch": 0.8601662887377173, "fcm_dpo/beta": 0.5055083632469177, "fcm_dpo/delta": 0.0923711508512497, "fcm_dpo/margin": 1.5074416399002075, "fcm_dpo/q_t": 0.35584020614624023, "grad_norm": 73.7436752319336, "learning_rate": 2.963665913810451e-08, "logits/chosen": 1.5560593605041504, "logits/rejected": 1.5503093004226685, "logps/chosen": -76.06471252441406, "logps/ref_chosen": -80.85043334960938, "logps/ref_rejected": -92.77810668945312, "logps/rejected": -89.49983215332031, "loss": 1.0816, "margin_dpo/margin_mean": 1.5074411630630493, "margin_dpo/margin_std": 2.4058260917663574, "step": 569 }, { "epoch": 0.8616780045351474, "fcm_dpo/beta": 0.5042208433151245, "fcm_dpo/delta": -0.2551186978816986, "fcm_dpo/margin": 2.1350512504577637, "fcm_dpo/q_t": 0.3052746653556824, "grad_norm": 66.28195190429688, "learning_rate": 2.9015298217712453e-08, "logits/chosen": 1.4883897304534912, "logits/rejected": 1.3627548217773438, "logps/chosen": -64.59855651855469, "logps/ref_chosen": -69.94769287109375, "logps/ref_rejected": -97.37059020996094, "logps/rejected": -94.15650939941406, "loss": 0.8679, "margin_dpo/margin_mean": 2.1350512504577637, "margin_dpo/margin_std": 2.4027867317199707, "step": 570 }, { "epoch": 0.8631897203325775, "fcm_dpo/beta": 0.5050674080848694, "fcm_dpo/delta": 0.22430172562599182, "fcm_dpo/margin": 1.2679533958435059, "fcm_dpo/q_t": 0.3824647068977356, "grad_norm": 78.28890991210938, "learning_rate": 2.840011871446962e-08, "logits/chosen": 1.7317427396774292, "logits/rejected": 1.6764953136444092, "logps/chosen": -67.2318115234375, "logps/ref_chosen": -72.28555297851562, "logps/ref_rejected": -84.57748413085938, "logps/rejected": -80.79170227050781, "loss": 1.1642, "margin_dpo/margin_mean": 1.2679531574249268, "margin_dpo/margin_std": 2.5002126693725586, "step": 571 }, { "epoch": 0.8647014361300076, "fcm_dpo/beta": 0.5231929421424866, "fcm_dpo/delta": 0.15702974796295166, "fcm_dpo/margin": 1.3459784984588623, "fcm_dpo/q_t": 0.37420764565467834, "grad_norm": 91.95054626464844, "learning_rate": 2.7791137836269158e-08, "logits/chosen": 1.8538180589675903, "logits/rejected": 1.9062275886535645, "logps/chosen": -86.89059448242188, "logps/ref_chosen": -91.4906997680664, "logps/ref_rejected": -80.44602966308594, "logps/rejected": -77.19190216064453, "loss": 1.0802, "margin_dpo/margin_mean": 1.3459784984588623, "margin_dpo/margin_std": 2.314822196960449, "step": 572 }, { "epoch": 0.8662131519274376, "fcm_dpo/beta": 0.5159151554107666, "fcm_dpo/delta": -0.12967026233673096, "fcm_dpo/margin": 1.872837781906128, "fcm_dpo/q_t": 0.3495451807975769, "grad_norm": 82.7747802734375, "learning_rate": 2.718837261761528e-08, "logits/chosen": 1.7055302858352661, "logits/rejected": 1.623398780822754, "logps/chosen": -82.83917236328125, "logps/ref_chosen": -87.54232788085938, "logps/ref_rejected": -104.32984924316406, "logps/rejected": -101.49952697753906, "loss": 1.0981, "margin_dpo/margin_mean": 1.8728383779525757, "margin_dpo/margin_std": 3.2378549575805664, "step": 573 }, { "epoch": 0.8677248677248677, "fcm_dpo/beta": 0.5023356080055237, "fcm_dpo/delta": -0.21158885955810547, "fcm_dpo/margin": 2.0703470706939697, "fcm_dpo/q_t": 0.31453508138656616, "grad_norm": 68.75479888916016, "learning_rate": 2.659183991914696e-08, "logits/chosen": 2.0316388607025146, "logits/rejected": 1.9578845500946045, "logps/chosen": -70.46785736083984, "logps/ref_chosen": -75.36632537841797, "logps/ref_rejected": -103.27328491210938, "logps/rejected": -100.4451675415039, "loss": 0.8919, "margin_dpo/margin_mean": 2.0703463554382324, "margin_dpo/margin_std": 2.517101764678955, "step": 574 }, { "epoch": 0.8692365835222978, "fcm_dpo/beta": 0.4883292615413666, "fcm_dpo/delta": -0.051660750061273575, "fcm_dpo/margin": 1.2661917209625244, "fcm_dpo/q_t": 0.3973788917064667, "grad_norm": 85.65643310546875, "learning_rate": 2.600155642716606e-08, "logits/chosen": 1.819608211517334, "logits/rejected": 1.6805698871612549, "logps/chosen": -76.97919464111328, "logps/ref_chosen": -81.678466796875, "logps/ref_rejected": -112.84233093261719, "logps/rejected": -109.40924835205078, "loss": 1.2291, "margin_dpo/margin_mean": 1.266192078590393, "margin_dpo/margin_std": 2.7936654090881348, "step": 575 }, { "epoch": 0.8707482993197279, "fcm_dpo/beta": 0.46955758333206177, "fcm_dpo/delta": -0.15395161509513855, "fcm_dpo/margin": 2.08994722366333, "fcm_dpo/q_t": 0.3229488730430603, "grad_norm": 68.0239486694336, "learning_rate": 2.5417538653170754e-08, "logits/chosen": 2.0921566486358643, "logits/rejected": 1.8919597864151, "logps/chosen": -63.571937561035156, "logps/ref_chosen": -68.78944396972656, "logps/ref_rejected": -102.79037475585938, "logps/rejected": -99.66281127929688, "loss": 0.9436, "margin_dpo/margin_mean": 2.089946746826172, "margin_dpo/margin_std": 2.686835765838623, "step": 576 }, { "epoch": 0.872260015117158, "fcm_dpo/beta": 0.4906901717185974, "fcm_dpo/delta": 0.2550051510334015, "fcm_dpo/margin": 1.2452499866485596, "fcm_dpo/q_t": 0.39396393299102783, "grad_norm": 75.48487091064453, "learning_rate": 2.4839802933393607e-08, "logits/chosen": 1.5746073722839355, "logits/rejected": 1.5519602298736572, "logps/chosen": -74.94413757324219, "logps/ref_chosen": -79.84675598144531, "logps/ref_rejected": -84.08309936523438, "logps/rejected": -80.42573547363281, "loss": 1.1763, "margin_dpo/margin_mean": 1.2452495098114014, "margin_dpo/margin_std": 2.5598554611206055, "step": 577 }, { "epoch": 0.873771730914588, "fcm_dpo/beta": 0.5132976770401001, "fcm_dpo/delta": 0.22512920200824738, "fcm_dpo/margin": 1.2464478015899658, "fcm_dpo/q_t": 0.3836420774459839, "grad_norm": 75.34688568115234, "learning_rate": 2.4268365428344733e-08, "logits/chosen": 1.906113862991333, "logits/rejected": 1.8371565341949463, "logps/chosen": -69.94638061523438, "logps/ref_chosen": -74.91357421875, "logps/ref_rejected": -83.64881896972656, "logps/rejected": -79.92807006835938, "loss": 1.1037, "margin_dpo/margin_mean": 1.2464478015899658, "margin_dpo/margin_std": 2.216444492340088, "step": 578 }, { "epoch": 0.8752834467120182, "fcm_dpo/beta": 0.5136229395866394, "fcm_dpo/delta": -0.053459469228982925, "fcm_dpo/margin": 1.747258186340332, "fcm_dpo/q_t": 0.3330836296081543, "grad_norm": 84.27195739746094, "learning_rate": 2.3703242122359357e-08, "logits/chosen": 1.5232186317443848, "logits/rejected": 1.4639792442321777, "logps/chosen": -70.65562438964844, "logps/ref_chosen": -75.51022338867188, "logps/ref_rejected": -84.83192443847656, "logps/rejected": -81.72457885742188, "loss": 1.0588, "margin_dpo/margin_mean": 1.7472577095031738, "margin_dpo/margin_std": 2.7469921112060547, "step": 579 }, { "epoch": 0.8767951625094482, "fcm_dpo/beta": 0.5151132941246033, "fcm_dpo/delta": -0.06574690341949463, "fcm_dpo/margin": 1.7644736766815186, "fcm_dpo/q_t": 0.33593645691871643, "grad_norm": 64.35208892822266, "learning_rate": 2.3144448823151392e-08, "logits/chosen": 1.6839550733566284, "logits/rejected": 1.5548913478851318, "logps/chosen": -71.26051330566406, "logps/ref_chosen": -76.61564636230469, "logps/ref_rejected": -97.09959411621094, "logps/rejected": -93.50894165039062, "loss": 1.0099, "margin_dpo/margin_mean": 1.7644741535186768, "margin_dpo/margin_std": 2.5570900440216064, "step": 580 }, { "epoch": 0.8783068783068783, "fcm_dpo/beta": 0.505305290222168, "fcm_dpo/delta": 0.011192187666893005, "fcm_dpo/margin": 1.6590213775634766, "fcm_dpo/q_t": 0.35038408637046814, "grad_norm": 80.1307601928711, "learning_rate": 2.259200116137039e-08, "logits/chosen": 1.7930132150650024, "logits/rejected": 1.6991674900054932, "logps/chosen": -70.05377197265625, "logps/ref_chosen": -74.8531265258789, "logps/ref_rejected": -101.5344009399414, "logps/rejected": -98.3940658569336, "loss": 1.0409, "margin_dpo/margin_mean": 1.6590216159820557, "margin_dpo/margin_std": 2.571774959564209, "step": 581 }, { "epoch": 0.8798185941043084, "fcm_dpo/beta": 0.5117372274398804, "fcm_dpo/delta": 0.04091556370258331, "fcm_dpo/margin": 1.5874507427215576, "fcm_dpo/q_t": 0.35794246196746826, "grad_norm": 78.47570037841797, "learning_rate": 2.204591459016525e-08, "logits/chosen": 1.5402312278747559, "logits/rejected": 1.5932610034942627, "logps/chosen": -76.41178894042969, "logps/ref_chosen": -81.07638549804688, "logps/ref_rejected": -72.83570861816406, "logps/rejected": -69.75856018066406, "loss": 1.0968, "margin_dpo/margin_mean": 1.5874508619308472, "margin_dpo/margin_std": 2.7462587356567383, "step": 582 }, { "epoch": 0.8813303099017384, "fcm_dpo/beta": 0.5294514298439026, "fcm_dpo/delta": 0.18893937766551971, "fcm_dpo/margin": 1.2722506523132324, "fcm_dpo/q_t": 0.393571674823761, "grad_norm": 94.36971282958984, "learning_rate": 2.1506204384751064e-08, "logits/chosen": 1.9707103967666626, "logits/rejected": 1.7553215026855469, "logps/chosen": -61.93994903564453, "logps/ref_chosen": -66.78465270996094, "logps/ref_rejected": -106.45825958251953, "logps/rejected": -102.88580322265625, "loss": 1.2479, "margin_dpo/margin_mean": 1.2722513675689697, "margin_dpo/margin_std": 2.820009708404541, "step": 583 }, { "epoch": 0.8828420256991686, "fcm_dpo/beta": 0.534497857093811, "fcm_dpo/delta": 0.05336347967386246, "fcm_dpo/margin": 1.497223138809204, "fcm_dpo/q_t": 0.3743545413017273, "grad_norm": 78.58843231201172, "learning_rate": 2.09728856419826e-08, "logits/chosen": 1.9033856391906738, "logits/rejected": 1.7374662160873413, "logps/chosen": -55.71133041381836, "logps/ref_chosen": -60.802913665771484, "logps/ref_rejected": -99.45012664794922, "logps/rejected": -95.85576629638672, "loss": 1.1555, "margin_dpo/margin_mean": 1.4972236156463623, "margin_dpo/margin_std": 2.8621163368225098, "step": 584 }, { "epoch": 0.8843537414965986, "fcm_dpo/beta": 0.5567061305046082, "fcm_dpo/delta": 0.22613489627838135, "fcm_dpo/margin": 1.1473885774612427, "fcm_dpo/q_t": 0.3761305809020996, "grad_norm": 84.38145446777344, "learning_rate": 2.044597327993153e-08, "logits/chosen": 1.534332513809204, "logits/rejected": 1.4831292629241943, "logps/chosen": -71.04457092285156, "logps/ref_chosen": -75.92616271972656, "logps/ref_rejected": -94.47601318359375, "logps/rejected": -90.74182891845703, "loss": 1.1613, "margin_dpo/margin_mean": 1.1473884582519531, "margin_dpo/margin_std": 2.2100892066955566, "step": 585 }, { "epoch": 0.8858654572940288, "fcm_dpo/beta": 0.552130937576294, "fcm_dpo/delta": -0.13436739146709442, "fcm_dpo/margin": 1.7582968473434448, "fcm_dpo/q_t": 0.3284263610839844, "grad_norm": 361.6727600097656, "learning_rate": 1.9925482037469187e-08, "logits/chosen": 1.6084355115890503, "logits/rejected": 1.513660192489624, "logps/chosen": -63.508636474609375, "logps/ref_chosen": -68.62062072753906, "logps/ref_rejected": -81.98324584960938, "logps/rejected": -78.62955474853516, "loss": 0.9626, "margin_dpo/margin_mean": 1.7582967281341553, "margin_dpo/margin_std": 2.437290906906128, "step": 586 }, { "epoch": 0.8873771730914588, "fcm_dpo/beta": 0.5419132709503174, "fcm_dpo/delta": -0.14227743446826935, "fcm_dpo/margin": 1.8054625988006592, "fcm_dpo/q_t": 0.33169883489608765, "grad_norm": 92.720947265625, "learning_rate": 1.9411426473854687e-08, "logits/chosen": 1.8379313945770264, "logits/rejected": 1.8169505596160889, "logps/chosen": -72.4877700805664, "logps/ref_chosen": -77.67031860351562, "logps/ref_rejected": -79.35327911376953, "logps/rejected": -75.9761962890625, "loss": 1.0136, "margin_dpo/margin_mean": 1.8054628372192383, "margin_dpo/margin_std": 2.6331286430358887, "step": 587 }, { "epoch": 0.8888888888888888, "fcm_dpo/beta": 0.5268039703369141, "fcm_dpo/delta": -0.051131971180438995, "fcm_dpo/margin": 1.699599027633667, "fcm_dpo/q_t": 0.32383257150650024, "grad_norm": 76.1657485961914, "learning_rate": 1.890382096832699e-08, "logits/chosen": 1.8298721313476562, "logits/rejected": 1.7479240894317627, "logps/chosen": -73.06620788574219, "logps/ref_chosen": -77.94320678710938, "logps/ref_rejected": -98.41210174560547, "logps/rejected": -95.23469543457031, "loss": 0.9219, "margin_dpo/margin_mean": 1.699598789215088, "margin_dpo/margin_std": 2.1158838272094727, "step": 588 }, { "epoch": 0.890400604686319, "fcm_dpo/beta": 0.5218112468719482, "fcm_dpo/delta": -0.0979146659374237, "fcm_dpo/margin": 1.7990820407867432, "fcm_dpo/q_t": 0.32140815258026123, "grad_norm": 90.76171112060547, "learning_rate": 1.840267971970344e-08, "logits/chosen": 1.8079521656036377, "logits/rejected": 1.7648406028747559, "logps/chosen": -70.08314514160156, "logps/ref_chosen": -75.18646240234375, "logps/ref_rejected": -93.35910034179688, "logps/rejected": -90.05486297607422, "loss": 0.9824, "margin_dpo/margin_mean": 1.799081563949585, "margin_dpo/margin_std": 2.512829303741455, "step": 589 }, { "epoch": 0.891912320483749, "fcm_dpo/beta": 0.5154800415039062, "fcm_dpo/delta": -0.06581678241491318, "fcm_dpo/margin": 1.764974594116211, "fcm_dpo/q_t": 0.3345610499382019, "grad_norm": 78.90821075439453, "learning_rate": 1.7908016745981856e-08, "logits/chosen": 1.7630910873413086, "logits/rejected": 1.7047700881958008, "logps/chosen": -82.21047973632812, "logps/ref_chosen": -86.9908447265625, "logps/ref_rejected": -100.61723327636719, "logps/rejected": -97.60183715820312, "loss": 0.9671, "margin_dpo/margin_mean": 1.7649750709533691, "margin_dpo/margin_std": 2.4630494117736816, "step": 590 }, { "epoch": 0.8934240362811792, "fcm_dpo/beta": 0.49655839800834656, "fcm_dpo/delta": -0.0999019667506218, "fcm_dpo/margin": 1.8817025423049927, "fcm_dpo/q_t": 0.32333502173423767, "grad_norm": 71.44941711425781, "learning_rate": 1.7419845883949098e-08, "logits/chosen": 1.6200222969055176, "logits/rejected": 1.5246787071228027, "logps/chosen": -69.49836730957031, "logps/ref_chosen": -74.85809326171875, "logps/ref_rejected": -102.75840759277344, "logps/rejected": -99.28038024902344, "loss": 0.9459, "margin_dpo/margin_mean": 1.8817024230957031, "margin_dpo/margin_std": 2.3763575553894043, "step": 591 }, { "epoch": 0.8949357520786092, "fcm_dpo/beta": 0.4994744658470154, "fcm_dpo/delta": -0.023017864674329758, "fcm_dpo/margin": 1.743746042251587, "fcm_dpo/q_t": 0.3495708703994751, "grad_norm": 70.25218963623047, "learning_rate": 1.6938180788793556e-08, "logits/chosen": 1.5799570083618164, "logits/rejected": 1.410193681716919, "logps/chosen": -62.72476577758789, "logps/ref_chosen": -67.90579223632812, "logps/ref_rejected": -100.35234069824219, "logps/rejected": -96.91506958007812, "loss": 1.0063, "margin_dpo/margin_mean": 1.7437461614608765, "margin_dpo/margin_std": 2.631432056427002, "step": 592 }, { "epoch": 0.8964474678760394, "fcm_dpo/beta": 0.5060767531394958, "fcm_dpo/delta": 0.08997678756713867, "fcm_dpo/margin": 1.5154685974121094, "fcm_dpo/q_t": 0.36925971508026123, "grad_norm": 77.16971588134766, "learning_rate": 1.6463034933723336e-08, "logits/chosen": 1.7553977966308594, "logits/rejected": 1.5694162845611572, "logps/chosen": -54.12544250488281, "logps/ref_chosen": -59.29489517211914, "logps/ref_rejected": -85.31307983398438, "logps/rejected": -81.65910339355469, "loss": 1.1547, "margin_dpo/margin_mean": 1.5154688358306885, "margin_dpo/margin_std": 2.83261775970459, "step": 593 }, { "epoch": 0.8979591836734694, "fcm_dpo/beta": 0.5137929916381836, "fcm_dpo/delta": 0.13700157403945923, "fcm_dpo/margin": 1.4055750370025635, "fcm_dpo/q_t": 0.37197840213775635, "grad_norm": 74.884765625, "learning_rate": 1.5994421609589385e-08, "logits/chosen": 1.6110825538635254, "logits/rejected": 1.5490773916244507, "logps/chosen": -78.18447875976562, "logps/ref_chosen": -83.14643859863281, "logps/ref_rejected": -88.201904296875, "logps/rejected": -84.64552307128906, "loss": 1.0794, "margin_dpo/margin_mean": 1.4055747985839844, "margin_dpo/margin_std": 2.3255198001861572, "step": 594 }, { "epoch": 0.8994708994708994, "fcm_dpo/beta": 0.5185683965682983, "fcm_dpo/delta": -0.08683624863624573, "fcm_dpo/margin": 1.790905237197876, "fcm_dpo/q_t": 0.334526926279068, "grad_norm": 88.72738647460938, "learning_rate": 1.553235392451377e-08, "logits/chosen": 2.002946615219116, "logits/rejected": 1.8320910930633545, "logps/chosen": -65.44224548339844, "logps/ref_chosen": -70.40016174316406, "logps/ref_rejected": -103.95550537109375, "logps/rejected": -100.78850555419922, "loss": 1.111, "margin_dpo/margin_mean": 1.7909047603607178, "margin_dpo/margin_std": 3.0485243797302246, "step": 595 }, { "epoch": 0.9009826152683296, "fcm_dpo/beta": 0.5402435064315796, "fcm_dpo/delta": 0.34699779748916626, "fcm_dpo/margin": 0.9653339982032776, "fcm_dpo/q_t": 0.42396220564842224, "grad_norm": 86.49633026123047, "learning_rate": 1.507684480352292e-08, "logits/chosen": 1.4556769132614136, "logits/rejected": 1.4827091693878174, "logps/chosen": -81.25218963623047, "logps/ref_chosen": -86.083740234375, "logps/ref_rejected": -78.41991424560547, "logps/rejected": -74.55369567871094, "loss": 1.2796, "margin_dpo/margin_mean": 0.965334415435791, "margin_dpo/margin_std": 2.5436482429504395, "step": 596 }, { "epoch": 0.9024943310657596, "fcm_dpo/beta": 0.5435788631439209, "fcm_dpo/delta": -0.0711166262626648, "fcm_dpo/margin": 1.682100534439087, "fcm_dpo/q_t": 0.3363920748233795, "grad_norm": 70.85747528076172, "learning_rate": 1.4627906988186111e-08, "logits/chosen": 1.7241406440734863, "logits/rejected": 1.6851555109024048, "logps/chosen": -62.60491943359375, "logps/ref_chosen": -67.8086166381836, "logps/ref_rejected": -71.09245300292969, "logps/rejected": -67.57085418701172, "loss": 0.9945, "margin_dpo/margin_mean": 1.6821008920669556, "margin_dpo/margin_std": 2.4386579990386963, "step": 597 }, { "epoch": 0.9040060468631897, "fcm_dpo/beta": 0.5551949739456177, "fcm_dpo/delta": 0.10281100124120712, "fcm_dpo/margin": 1.3566747903823853, "fcm_dpo/q_t": 0.3706873059272766, "grad_norm": 86.94221496582031, "learning_rate": 1.4185553036259095e-08, "logits/chosen": 1.848606824874878, "logits/rejected": 1.704300880432129, "logps/chosen": -69.71743774414062, "logps/ref_chosen": -74.31095886230469, "logps/ref_rejected": -98.08122253417969, "logps/rejected": -94.84437561035156, "loss": 1.1446, "margin_dpo/margin_mean": 1.3566749095916748, "margin_dpo/margin_std": 2.557732343673706, "step": 598 }, { "epoch": 0.9055177626606198, "fcm_dpo/beta": 0.555027961730957, "fcm_dpo/delta": 0.007491939701139927, "fcm_dpo/margin": 1.5190777778625488, "fcm_dpo/q_t": 0.3532414436340332, "grad_norm": 84.7350082397461, "learning_rate": 1.3749795321332885e-08, "logits/chosen": 1.6712158918380737, "logits/rejected": 1.6012566089630127, "logps/chosen": -69.50666809082031, "logps/ref_chosen": -74.21861267089844, "logps/ref_rejected": -90.1492919921875, "logps/rejected": -86.9564208984375, "loss": 1.0617, "margin_dpo/margin_mean": 1.5190777778625488, "margin_dpo/margin_std": 2.5156443119049072, "step": 599 }, { "epoch": 0.9070294784580499, "fcm_dpo/beta": 0.5707370042800903, "fcm_dpo/delta": 0.1365622580051422, "fcm_dpo/margin": 1.2634763717651367, "fcm_dpo/q_t": 0.3676835894584656, "grad_norm": 82.2341079711914, "learning_rate": 1.3320646032487393e-08, "logits/chosen": 1.9393136501312256, "logits/rejected": 1.793006181716919, "logps/chosen": -74.63346099853516, "logps/ref_chosen": -79.34190368652344, "logps/ref_rejected": -97.0519790649414, "logps/rejected": -93.60700988769531, "loss": 1.1017, "margin_dpo/margin_mean": 1.2634763717651367, "margin_dpo/margin_std": 2.209320068359375, "step": 600 }, { "epoch": 0.9070294784580499, "eval_fcm_dpo/beta": 0.5703136920928955, "eval_logits/chosen": 1.713449239730835, "eval_logits/rejected": 1.6065305471420288, "eval_logps/chosen": -82.01586151123047, "eval_logps/ref_chosen": -86.90177917480469, "eval_logps/ref_rejected": -96.69639587402344, "eval_logps/rejected": -93.35726928710938, "eval_loss": 0.5468015074729919, "eval_margin_dpo/margin_mean": 1.546818733215332, "eval_margin_dpo/margin_std": 2.597526788711548, "eval_runtime": 42.2813, "eval_samples_per_second": 54.469, "eval_steps_per_second": 1.703, "step": 600 }, { "epoch": 0.90854119425548, "fcm_dpo/beta": 0.5453665256500244, "fcm_dpo/delta": -0.22268065810203552, "fcm_dpo/margin": 1.9149441719055176, "fcm_dpo/q_t": 0.3091183304786682, "grad_norm": 69.08990478515625, "learning_rate": 1.2898117173950868e-08, "logits/chosen": 1.5800609588623047, "logits/rejected": 1.451780080795288, "logps/chosen": -66.61608123779297, "logps/ref_chosen": -72.06497192382812, "logps/ref_rejected": -97.60928344726562, "logps/rejected": -94.0753402709961, "loss": 0.8982, "margin_dpo/margin_mean": 1.9149435758590698, "margin_dpo/margin_std": 2.311171531677246, "step": 601 }, { "epoch": 0.91005291005291, "fcm_dpo/beta": 0.5353580713272095, "fcm_dpo/delta": -0.16216537356376648, "fcm_dpo/margin": 1.8609248399734497, "fcm_dpo/q_t": 0.3207598924636841, "grad_norm": 69.55952453613281, "learning_rate": 1.2482220564763667e-08, "logits/chosen": 1.7727607488632202, "logits/rejected": 1.6953372955322266, "logps/chosen": -72.2029800415039, "logps/ref_chosen": -77.80416870117188, "logps/ref_rejected": -89.05026245117188, "logps/rejected": -85.30999755859375, "loss": 0.9179, "margin_dpo/margin_mean": 1.8609247207641602, "margin_dpo/margin_std": 2.3164451122283936, "step": 602 }, { "epoch": 0.9115646258503401, "fcm_dpo/beta": 0.5281209945678711, "fcm_dpo/delta": 0.02333132177591324, "fcm_dpo/margin": 1.5686895847320557, "fcm_dpo/q_t": 0.34824633598327637, "grad_norm": 69.28165435791016, "learning_rate": 1.2072967838448051e-08, "logits/chosen": 1.318486213684082, "logits/rejected": 1.2564785480499268, "logps/chosen": -63.34666442871094, "logps/ref_chosen": -68.30155944824219, "logps/ref_rejected": -90.542724609375, "logps/rejected": -87.15652465820312, "loss": 1.0188, "margin_dpo/margin_mean": 1.5686898231506348, "margin_dpo/margin_std": 2.38751220703125, "step": 603 }, { "epoch": 0.9130763416477702, "fcm_dpo/beta": 0.5328730344772339, "fcm_dpo/delta": 0.055717065930366516, "fcm_dpo/margin": 1.4987510442733765, "fcm_dpo/q_t": 0.3579341769218445, "grad_norm": 76.82192993164062, "learning_rate": 1.1670370442682459e-08, "logits/chosen": 1.7707507610321045, "logits/rejected": 1.7423770427703857, "logps/chosen": -85.27188110351562, "logps/ref_chosen": -90.55952453613281, "logps/ref_rejected": -84.6327133178711, "logps/rejected": -80.84382629394531, "loss": 1.1278, "margin_dpo/margin_mean": 1.4987506866455078, "margin_dpo/margin_std": 2.708588123321533, "step": 604 }, { "epoch": 0.9145880574452003, "fcm_dpo/beta": 0.5490473508834839, "fcm_dpo/delta": 0.14943906664848328, "fcm_dpo/margin": 1.2950233221054077, "fcm_dpo/q_t": 0.3697357177734375, "grad_norm": 135.77040100097656, "learning_rate": 1.1274439638981532e-08, "logits/chosen": 1.8290135860443115, "logits/rejected": 1.7359825372695923, "logps/chosen": -75.59449768066406, "logps/ref_chosen": -80.26661682128906, "logps/ref_rejected": -100.26485443115234, "logps/rejected": -96.88775634765625, "loss": 1.1026, "margin_dpo/margin_mean": 1.295022964477539, "margin_dpo/margin_std": 2.2529892921447754, "step": 605 }, { "epoch": 0.9160997732426304, "fcm_dpo/beta": 0.5741589069366455, "fcm_dpo/delta": 0.18281231820583344, "fcm_dpo/margin": 1.1763060092926025, "fcm_dpo/q_t": 0.385870099067688, "grad_norm": 90.35079956054688, "learning_rate": 1.0885186502381016e-08, "logits/chosen": 1.9423694610595703, "logits/rejected": 1.8221031427383423, "logps/chosen": -65.81397247314453, "logps/ref_chosen": -70.73554229736328, "logps/ref_rejected": -95.9410400390625, "logps/rejected": -92.19577026367188, "loss": 1.1632, "margin_dpo/margin_mean": 1.1763060092926025, "margin_dpo/margin_std": 2.2583560943603516, "step": 606 }, { "epoch": 0.9176114890400605, "fcm_dpo/beta": 0.564205527305603, "fcm_dpo/delta": -0.014900833368301392, "fcm_dpo/margin": 1.5237514972686768, "fcm_dpo/q_t": 0.34097421169281006, "grad_norm": 91.87316131591797, "learning_rate": 1.0502621921127774e-08, "logits/chosen": 1.543046474456787, "logits/rejected": 1.481963038444519, "logps/chosen": -76.32693481445312, "logps/ref_chosen": -81.26203918457031, "logps/ref_rejected": -92.71575927734375, "logps/rejected": -89.30439758300781, "loss": 1.0182, "margin_dpo/margin_mean": 1.5237513780593872, "margin_dpo/margin_std": 2.2329049110412598, "step": 607 }, { "epoch": 0.9191232048374905, "fcm_dpo/beta": 0.580283522605896, "fcm_dpo/delta": 0.084572434425354, "fcm_dpo/margin": 1.3297598361968994, "fcm_dpo/q_t": 0.3788830041885376, "grad_norm": 99.93155670166016, "learning_rate": 1.0126756596375685e-08, "logits/chosen": 1.7759461402893066, "logits/rejected": 1.5685594081878662, "logps/chosen": -78.00862121582031, "logps/ref_chosen": -82.6530990600586, "logps/ref_rejected": -110.64334106445312, "logps/rejected": -107.32861328125, "loss": 1.148, "margin_dpo/margin_mean": 1.3297593593597412, "margin_dpo/margin_std": 2.623138904571533, "step": 608 }, { "epoch": 0.9206349206349206, "fcm_dpo/beta": 0.5636272430419922, "fcm_dpo/delta": -0.10882381349802017, "fcm_dpo/margin": 1.6727293729782104, "fcm_dpo/q_t": 0.3256949484348297, "grad_norm": 68.13907623291016, "learning_rate": 9.757601041885694e-09, "logits/chosen": 2.040724039077759, "logits/rejected": 1.932198405265808, "logps/chosen": -62.85011291503906, "logps/ref_chosen": -68.20232391357422, "logps/ref_rejected": -81.90515899658203, "logps/rejected": -78.22567749023438, "loss": 0.9724, "margin_dpo/margin_mean": 1.6727293729782104, "margin_dpo/margin_std": 2.256746292114258, "step": 609 }, { "epoch": 0.9221466364323507, "fcm_dpo/beta": 0.5605812668800354, "fcm_dpo/delta": -0.08716221898794174, "fcm_dpo/margin": 1.6569585800170898, "fcm_dpo/q_t": 0.348406046628952, "grad_norm": 108.67435455322266, "learning_rate": 9.395165583732379e-09, "logits/chosen": 1.8048813343048096, "logits/rejected": 1.7305305004119873, "logps/chosen": -93.95142364501953, "logps/ref_chosen": -99.01324462890625, "logps/ref_rejected": -102.26054382324219, "logps/rejected": -98.85566711425781, "loss": 1.101, "margin_dpo/margin_mean": 1.656959056854248, "margin_dpo/margin_std": 2.800694704055786, "step": 610 }, { "epoch": 0.9236583522297808, "fcm_dpo/beta": 0.5618192553520203, "fcm_dpo/delta": 0.09524710476398468, "fcm_dpo/margin": 1.3548386096954346, "fcm_dpo/q_t": 0.3647194802761078, "grad_norm": 79.89419555664062, "learning_rate": 9.03946036001449e-09, "logits/chosen": 1.958254098892212, "logits/rejected": 1.8665025234222412, "logps/chosen": -61.16302490234375, "logps/ref_chosen": -66.36254119873047, "logps/ref_rejected": -88.74557495117188, "logps/rejected": -84.90089416503906, "loss": 1.1055, "margin_dpo/margin_mean": 1.3548383712768555, "margin_dpo/margin_std": 2.3446433544158936, "step": 611 }, { "epoch": 0.9251700680272109, "fcm_dpo/beta": 0.5615559816360474, "fcm_dpo/delta": -0.13578204810619354, "fcm_dpo/margin": 1.7321219444274902, "fcm_dpo/q_t": 0.3264394700527191, "grad_norm": 77.33811950683594, "learning_rate": 8.690495320571839e-09, "logits/chosen": 1.6673080921173096, "logits/rejected": 1.5355725288391113, "logps/chosen": -73.73802185058594, "logps/ref_chosen": -78.6339111328125, "logps/ref_rejected": -108.34969329833984, "logps/rejected": -105.18592834472656, "loss": 1.0014, "margin_dpo/margin_mean": 1.7321220636367798, "margin_dpo/margin_std": 2.5373339653015137, "step": 612 }, { "epoch": 0.926681783824641, "fcm_dpo/beta": 0.5403913259506226, "fcm_dpo/delta": -0.22849300503730774, "fcm_dpo/margin": 1.9520108699798584, "fcm_dpo/q_t": 0.31533053517341614, "grad_norm": 84.33403778076172, "learning_rate": 8.348280226706722e-09, "logits/chosen": 1.7047350406646729, "logits/rejected": 1.6978881359100342, "logps/chosen": -68.17232513427734, "logps/ref_chosen": -73.3539047241211, "logps/ref_rejected": -76.91837310791016, "logps/rejected": -73.68879699707031, "loss": 0.9682, "margin_dpo/margin_mean": 1.9520103931427002, "margin_dpo/margin_std": 2.7025699615478516, "step": 613 }, { "epoch": 0.9281934996220711, "fcm_dpo/beta": 0.5397230386734009, "fcm_dpo/delta": 0.055488236248493195, "fcm_dpo/margin": 1.4754180908203125, "fcm_dpo/q_t": 0.3497070074081421, "grad_norm": 78.92454528808594, "learning_rate": 8.012824650910937e-09, "logits/chosen": 1.951556921005249, "logits/rejected": 1.9331920146942139, "logps/chosen": -73.11341857910156, "logps/ref_chosen": -77.80007934570312, "logps/ref_rejected": -89.05572509765625, "logps/rejected": -85.84449005126953, "loss": 1.008, "margin_dpo/margin_mean": 1.475417971611023, "margin_dpo/margin_std": 2.1356678009033203, "step": 614 }, { "epoch": 0.9297052154195011, "fcm_dpo/beta": 0.5249578952789307, "fcm_dpo/delta": -0.0973423644900322, "fcm_dpo/margin": 1.7850191593170166, "fcm_dpo/q_t": 0.3419983386993408, "grad_norm": 94.88264465332031, "learning_rate": 7.684137976598088e-09, "logits/chosen": 1.7977044582366943, "logits/rejected": 1.711987018585205, "logps/chosen": -85.27580261230469, "logps/ref_chosen": -90.06971740722656, "logps/ref_rejected": -118.7764892578125, "logps/rejected": -115.76759338378906, "loss": 1.0349, "margin_dpo/margin_mean": 1.785017967224121, "margin_dpo/margin_std": 2.7509849071502686, "step": 615 }, { "epoch": 0.9312169312169312, "fcm_dpo/beta": 0.5319191813468933, "fcm_dpo/delta": 0.14790388941764832, "fcm_dpo/margin": 1.3388638496398926, "fcm_dpo/q_t": 0.3731805682182312, "grad_norm": 75.28299713134766, "learning_rate": 7.36222939784098e-09, "logits/chosen": 1.7395646572113037, "logits/rejected": 1.5876948833465576, "logps/chosen": -69.80252838134766, "logps/ref_chosen": -74.62954711914062, "logps/ref_rejected": -93.655029296875, "logps/rejected": -90.1668701171875, "loss": 1.1017, "margin_dpo/margin_mean": 1.3388640880584717, "margin_dpo/margin_std": 2.321733236312866, "step": 616 }, { "epoch": 0.9327286470143613, "fcm_dpo/beta": 0.5505938529968262, "fcm_dpo/delta": 0.20431050658226013, "fcm_dpo/margin": 1.1955896615982056, "fcm_dpo/q_t": 0.3828273117542267, "grad_norm": 90.21337890625, "learning_rate": 7.047107919114586e-09, "logits/chosen": 1.8987562656402588, "logits/rejected": 1.8017901182174683, "logps/chosen": -71.38261413574219, "logps/ref_chosen": -75.98182678222656, "logps/ref_rejected": -97.1640625, "logps/rejected": -93.76043701171875, "loss": 1.1641, "margin_dpo/margin_mean": 1.1955888271331787, "margin_dpo/margin_std": 2.333718776702881, "step": 617 }, { "epoch": 0.9342403628117913, "fcm_dpo/beta": 0.5518548488616943, "fcm_dpo/delta": -0.06153812259435654, "fcm_dpo/margin": 1.6353678703308105, "fcm_dpo/q_t": 0.3478153347969055, "grad_norm": 88.34520721435547, "learning_rate": 6.738782355044048e-09, "logits/chosen": 1.519637107849121, "logits/rejected": 1.3564965724945068, "logps/chosen": -69.31785583496094, "logps/ref_chosen": -74.47208404541016, "logps/ref_rejected": -107.09980773925781, "logps/rejected": -103.5809555053711, "loss": 1.0393, "margin_dpo/margin_mean": 1.6353681087493896, "margin_dpo/margin_std": 2.6017632484436035, "step": 618 }, { "epoch": 0.9357520786092215, "fcm_dpo/beta": 0.5464029312133789, "fcm_dpo/delta": -0.044458553194999695, "fcm_dpo/margin": 1.623572826385498, "fcm_dpo/q_t": 0.3364899456501007, "grad_norm": 84.14434814453125, "learning_rate": 6.437261330158206e-09, "logits/chosen": 1.9933700561523438, "logits/rejected": 1.82912278175354, "logps/chosen": -65.78286743164062, "logps/ref_chosen": -70.84220886230469, "logps/ref_rejected": -98.07801818847656, "logps/rejected": -94.64225006103516, "loss": 0.9841, "margin_dpo/margin_mean": 1.6235718727111816, "margin_dpo/margin_std": 2.256570339202881, "step": 619 }, { "epoch": 0.9372637944066515, "fcm_dpo/beta": 0.5447486042976379, "fcm_dpo/delta": -0.13042320311069489, "fcm_dpo/margin": 1.1954697370529175, "fcm_dpo/q_t": 0.3794524669647217, "grad_norm": 106.2513198852539, "learning_rate": 6.142553278648238e-09, "logits/chosen": 1.6421568393707275, "logits/rejected": 1.6440951824188232, "logps/chosen": -71.89732360839844, "logps/ref_chosen": -76.93606567382812, "logps/ref_rejected": -81.28453063964844, "logps/rejected": -77.44125366210938, "loss": 1.2403, "margin_dpo/margin_mean": 1.1954690217971802, "margin_dpo/margin_std": 2.530783176422119, "step": 620 }, { "epoch": 0.9387755102040817, "fcm_dpo/beta": 0.5438255667686462, "fcm_dpo/delta": -0.037219464778900146, "fcm_dpo/margin": 1.616603136062622, "fcm_dpo/q_t": 0.36209145188331604, "grad_norm": 80.9903335571289, "learning_rate": 5.854666444131934e-09, "logits/chosen": 1.8847250938415527, "logits/rejected": 1.6826179027557373, "logps/chosen": -64.96229553222656, "logps/ref_chosen": -69.87464904785156, "logps/ref_rejected": -105.61328887939453, "logps/rejected": -102.31753540039062, "loss": 1.134, "margin_dpo/margin_mean": 1.616602897644043, "margin_dpo/margin_std": 2.893141269683838, "step": 621 }, { "epoch": 0.9402872260015117, "fcm_dpo/beta": 0.5321084260940552, "fcm_dpo/delta": -0.040384843945503235, "fcm_dpo/margin": 1.6664624214172363, "fcm_dpo/q_t": 0.3371589183807373, "grad_norm": 73.74857330322266, "learning_rate": 5.573608879422875e-09, "logits/chosen": 1.545579433441162, "logits/rejected": 1.4461474418640137, "logps/chosen": -73.89500427246094, "logps/ref_chosen": -78.9598388671875, "logps/ref_rejected": -97.90648651123047, "logps/rejected": -94.50811767578125, "loss": 0.9599, "margin_dpo/margin_mean": 1.6664619445800781, "margin_dpo/margin_std": 2.2627735137939453, "step": 622 }, { "epoch": 0.9417989417989417, "fcm_dpo/beta": 0.5304499864578247, "fcm_dpo/delta": -0.01973407343029976, "fcm_dpo/margin": 1.63567316532135, "fcm_dpo/q_t": 0.34609299898147583, "grad_norm": 77.88653564453125, "learning_rate": 5.299388446305342e-09, "logits/chosen": 1.8903741836547852, "logits/rejected": 1.7817987203598022, "logps/chosen": -78.66651916503906, "logps/ref_chosen": -83.22647094726562, "logps/ref_rejected": -105.1362533569336, "logps/rejected": -102.21197509765625, "loss": 1.0052, "margin_dpo/margin_mean": 1.6356725692749023, "margin_dpo/margin_std": 2.4788856506347656, "step": 623 }, { "epoch": 0.9433106575963719, "fcm_dpo/beta": 0.5237078070640564, "fcm_dpo/delta": -0.0306610856205225, "fcm_dpo/margin": 1.6761407852172852, "fcm_dpo/q_t": 0.3443066477775574, "grad_norm": 80.54615783691406, "learning_rate": 5.03201281531429e-09, "logits/chosen": 1.5723378658294678, "logits/rejected": 1.3790596723556519, "logps/chosen": -60.83345031738281, "logps/ref_chosen": -66.10560607910156, "logps/ref_rejected": -91.66778564453125, "logps/rejected": -88.07176208496094, "loss": 1.0358, "margin_dpo/margin_mean": 1.6761407852172852, "margin_dpo/margin_std": 2.5644569396972656, "step": 624 }, { "epoch": 0.9448223733938019, "fcm_dpo/beta": 0.5492905378341675, "fcm_dpo/delta": 0.27562734484672546, "fcm_dpo/margin": 1.0710108280181885, "fcm_dpo/q_t": 0.3919370174407959, "grad_norm": 82.9334945678711, "learning_rate": 4.7714894655209174e-09, "logits/chosen": 1.9585500955581665, "logits/rejected": 1.806025743484497, "logps/chosen": -68.38748168945312, "logps/ref_chosen": -73.20295715332031, "logps/ref_rejected": -105.31025695800781, "logps/rejected": -101.5657958984375, "loss": 1.2049, "margin_dpo/margin_mean": 1.0710105895996094, "margin_dpo/margin_std": 2.273772716522217, "step": 625 }, { "epoch": 0.9463340891912321, "fcm_dpo/beta": 0.5458341836929321, "fcm_dpo/delta": -0.1310361623764038, "fcm_dpo/margin": 1.7742984294891357, "fcm_dpo/q_t": 0.3442504405975342, "grad_norm": 82.92687225341797, "learning_rate": 4.517825684323323e-09, "logits/chosen": 1.956076979637146, "logits/rejected": 1.7062575817108154, "logps/chosen": -57.01714324951172, "logps/ref_chosen": -62.181278228759766, "logps/ref_rejected": -108.17747497558594, "logps/rejected": -104.78764343261719, "loss": 1.0583, "margin_dpo/margin_mean": 1.7742981910705566, "margin_dpo/margin_std": 2.8389077186584473, "step": 626 }, { "epoch": 0.9478458049886621, "fcm_dpo/beta": 0.5245725512504578, "fcm_dpo/delta": -0.16692830622196198, "fcm_dpo/margin": 1.9054150581359863, "fcm_dpo/q_t": 0.3315790891647339, "grad_norm": 84.73611450195312, "learning_rate": 4.271028567242818e-09, "logits/chosen": 1.9087677001953125, "logits/rejected": 1.6444659233093262, "logps/chosen": -72.71656799316406, "logps/ref_chosen": -77.72123718261719, "logps/ref_rejected": -114.40547180175781, "logps/rejected": -111.30622100830078, "loss": 0.926, "margin_dpo/margin_mean": 1.9054151773452759, "margin_dpo/margin_std": 2.5662083625793457, "step": 627 }, { "epoch": 0.9493575207860923, "fcm_dpo/beta": 0.5171005129814148, "fcm_dpo/delta": -0.15388283133506775, "fcm_dpo/margin": 1.9088798761367798, "fcm_dpo/q_t": 0.32542526721954346, "grad_norm": 82.72279357910156, "learning_rate": 4.0311050177251895e-09, "logits/chosen": 1.9638164043426514, "logits/rejected": 1.9315705299377441, "logps/chosen": -65.36286926269531, "logps/ref_chosen": -70.71195983886719, "logps/ref_rejected": -93.85909271240234, "logps/rejected": -90.41887664794922, "loss": 1.0614, "margin_dpo/margin_mean": 1.9088804721832275, "margin_dpo/margin_std": 2.9062271118164062, "step": 628 }, { "epoch": 0.9508692365835223, "fcm_dpo/beta": 0.508413553237915, "fcm_dpo/delta": -0.01258166879415512, "fcm_dpo/margin": 1.6929104328155518, "fcm_dpo/q_t": 0.34355059266090393, "grad_norm": 75.12218475341797, "learning_rate": 3.798061746947995e-09, "logits/chosen": 1.9230015277862549, "logits/rejected": 1.86118745803833, "logps/chosen": -83.1578369140625, "logps/ref_chosen": -88.66283416748047, "logps/ref_rejected": -94.67845153808594, "logps/rejected": -90.86636352539062, "loss": 0.9851, "margin_dpo/margin_mean": 1.692910075187683, "margin_dpo/margin_std": 2.403411865234375, "step": 629 }, { "epoch": 0.9523809523809523, "fcm_dpo/beta": 0.49728578329086304, "fcm_dpo/delta": -0.13409729301929474, "fcm_dpo/margin": 1.9530057907104492, "fcm_dpo/q_t": 0.32737135887145996, "grad_norm": 62.6065559387207, "learning_rate": 3.5719052736323806e-09, "logits/chosen": 1.497613787651062, "logits/rejected": 1.3795734643936157, "logps/chosen": -67.59822845458984, "logps/ref_chosen": -72.94979858398438, "logps/ref_rejected": -92.7632827758789, "logps/rejected": -89.36473083496094, "loss": 0.9097, "margin_dpo/margin_mean": 1.953005313873291, "margin_dpo/margin_std": 2.455533742904663, "step": 630 }, { "epoch": 0.9538926681783825, "fcm_dpo/beta": 0.4697021245956421, "fcm_dpo/delta": -0.1924063116312027, "fcm_dpo/margin": 2.1605029106140137, "fcm_dpo/q_t": 0.3222210109233856, "grad_norm": 64.52576446533203, "learning_rate": 3.352641923861144e-09, "logits/chosen": 1.821082353591919, "logits/rejected": 1.5979212522506714, "logps/chosen": -73.60709381103516, "logps/ref_chosen": -78.58656311035156, "logps/ref_rejected": -115.38685607910156, "logps/rejected": -112.56788635253906, "loss": 0.9299, "margin_dpo/margin_mean": 2.160503387451172, "margin_dpo/margin_std": 2.728344440460205, "step": 631 }, { "epoch": 0.9554043839758125, "fcm_dpo/beta": 0.4686143100261688, "fcm_dpo/delta": -0.03186669945716858, "fcm_dpo/margin": 1.8749642372131348, "fcm_dpo/q_t": 0.328419029712677, "grad_norm": 65.45675659179688, "learning_rate": 3.140277830901428e-09, "logits/chosen": 1.9738693237304688, "logits/rejected": 1.889809250831604, "logps/chosen": -70.00298309326172, "logps/ref_chosen": -75.24861907958984, "logps/ref_rejected": -82.98665618896484, "logps/rejected": -79.6159896850586, "loss": 0.9563, "margin_dpo/margin_mean": 1.8749642372131348, "margin_dpo/margin_std": 2.4336998462677, "step": 632 }, { "epoch": 0.9569160997732427, "fcm_dpo/beta": 0.46875280141830444, "fcm_dpo/delta": -0.08242163062095642, "fcm_dpo/margin": 1.9704583883285522, "fcm_dpo/q_t": 0.34461504220962524, "grad_norm": 64.5451889038086, "learning_rate": 2.9348189350335007e-09, "logits/chosen": 1.4927654266357422, "logits/rejected": 1.4023582935333252, "logps/chosen": -63.2589111328125, "logps/ref_chosen": -68.8402099609375, "logps/ref_rejected": -84.64610290527344, "logps/rejected": -81.0352554321289, "loss": 1.0005, "margin_dpo/margin_mean": 1.9704585075378418, "margin_dpo/margin_std": 2.9617648124694824, "step": 633 }, { "epoch": 0.9584278155706727, "fcm_dpo/beta": 0.49346354603767395, "fcm_dpo/delta": 0.4376094341278076, "fcm_dpo/margin": 0.873611569404602, "fcm_dpo/q_t": 0.43060654401779175, "grad_norm": 88.31072998046875, "learning_rate": 2.736270983384276e-09, "logits/chosen": 1.7620604038238525, "logits/rejected": 1.7831530570983887, "logps/chosen": -72.12706756591797, "logps/ref_chosen": -77.0589599609375, "logps/ref_rejected": -74.37579345703125, "logps/rejected": -70.31752014160156, "loss": 1.2866, "margin_dpo/margin_mean": 0.8736119270324707, "margin_dpo/margin_std": 2.4669814109802246, "step": 634 }, { "epoch": 0.9599395313681028, "fcm_dpo/beta": 0.523192822933197, "fcm_dpo/delta": 0.22281187772750854, "fcm_dpo/margin": 1.2240848541259766, "fcm_dpo/q_t": 0.3974232077598572, "grad_norm": 89.01469421386719, "learning_rate": 2.5446395297668287e-09, "logits/chosen": 1.5564526319503784, "logits/rejected": 1.446262001991272, "logps/chosen": -81.07947540283203, "logps/ref_chosen": -85.60243225097656, "logps/ref_rejected": -104.29497528076172, "logps/rejected": -100.99610900878906, "loss": 1.1869, "margin_dpo/margin_mean": 1.2240850925445557, "margin_dpo/margin_std": 2.678163766860962, "step": 635 }, { "epoch": 0.9614512471655329, "fcm_dpo/beta": 0.5146247148513794, "fcm_dpo/delta": -0.1677190065383911, "fcm_dpo/margin": 1.945070743560791, "fcm_dpo/q_t": 0.31559455394744873, "grad_norm": 67.87616729736328, "learning_rate": 2.359929934524829e-09, "logits/chosen": 1.7444641590118408, "logits/rejected": 1.53926682472229, "logps/chosen": -63.456119537353516, "logps/ref_chosen": -68.72154235839844, "logps/ref_rejected": -97.44863891601562, "logps/rejected": -94.12828063964844, "loss": 0.8636, "margin_dpo/margin_mean": 1.9450714588165283, "margin_dpo/margin_std": 2.2955727577209473, "step": 636 }, { "epoch": 0.9629629629629629, "fcm_dpo/beta": 0.5062402486801147, "fcm_dpo/delta": -0.03895487263798714, "fcm_dpo/margin": 1.7489988803863525, "fcm_dpo/q_t": 0.3463430404663086, "grad_norm": 75.6401596069336, "learning_rate": 2.1821473643827137e-09, "logits/chosen": 1.8284952640533447, "logits/rejected": 1.7029112577438354, "logps/chosen": -87.67061614990234, "logps/ref_chosen": -92.38919067382812, "logps/ref_rejected": -103.70460510253906, "logps/rejected": -100.73503875732422, "loss": 0.9843, "margin_dpo/margin_mean": 1.748998761177063, "margin_dpo/margin_std": 2.5600695610046387, "step": 637 }, { "epoch": 0.9644746787603931, "fcm_dpo/beta": 0.49277734756469727, "fcm_dpo/delta": -0.14965899288654327, "fcm_dpo/margin": 1.9968385696411133, "fcm_dpo/q_t": 0.31863152980804443, "grad_norm": 71.01853942871094, "learning_rate": 2.0112967923011646e-09, "logits/chosen": 1.5376474857330322, "logits/rejected": 1.4166994094848633, "logps/chosen": -78.29296875, "logps/ref_chosen": -83.36921691894531, "logps/ref_rejected": -103.04508209228516, "logps/rejected": -99.96566772460938, "loss": 0.9037, "margin_dpo/margin_mean": 1.9968383312225342, "margin_dpo/margin_std": 2.4473977088928223, "step": 638 }, { "epoch": 0.9659863945578231, "fcm_dpo/beta": 0.5017915964126587, "fcm_dpo/delta": 0.15579509735107422, "fcm_dpo/margin": 1.405687928199768, "fcm_dpo/q_t": 0.3671724200248718, "grad_norm": 75.0201644897461, "learning_rate": 1.847382997337943e-09, "logits/chosen": 1.6120787858963013, "logits/rejected": 1.4171923398971558, "logps/chosen": -65.34456634521484, "logps/ref_chosen": -70.45247650146484, "logps/ref_rejected": -93.77748107910156, "logps/rejected": -90.07525634765625, "loss": 1.0572, "margin_dpo/margin_mean": 1.4056884050369263, "margin_dpo/margin_std": 2.241938591003418, "step": 639 }, { "epoch": 0.9674981103552532, "fcm_dpo/beta": 0.5281813144683838, "fcm_dpo/delta": 0.26455286145210266, "fcm_dpo/margin": 1.137129306793213, "fcm_dpo/q_t": 0.3849751949310303, "grad_norm": 80.01968383789062, "learning_rate": 1.690410564514244e-09, "logits/chosen": 1.9318921566009521, "logits/rejected": 1.7825345993041992, "logps/chosen": -63.573158264160156, "logps/ref_chosen": -68.51570129394531, "logps/ref_rejected": -92.35081481933594, "logps/rejected": -88.54540252685547, "loss": 1.221, "margin_dpo/margin_mean": 1.1371290683746338, "margin_dpo/margin_std": 2.42228364944458, "step": 640 }, { "epoch": 0.9690098261526833, "fcm_dpo/beta": 0.5420957803726196, "fcm_dpo/delta": 0.11781658977270126, "fcm_dpo/margin": 1.3670419454574585, "fcm_dpo/q_t": 0.3666565716266632, "grad_norm": 112.83721160888672, "learning_rate": 1.5403838846864692e-09, "logits/chosen": 1.751739501953125, "logits/rejected": 1.7309410572052002, "logps/chosen": -87.5758056640625, "logps/ref_chosen": -92.35102844238281, "logps/ref_rejected": -102.4269790649414, "logps/rejected": -99.018798828125, "loss": 1.0715, "margin_dpo/margin_mean": 1.3670417070388794, "margin_dpo/margin_std": 2.2877607345581055, "step": 641 }, { "epoch": 0.9705215419501134, "fcm_dpo/beta": 0.5414741039276123, "fcm_dpo/delta": 0.020895883440971375, "fcm_dpo/margin": 1.5288487672805786, "fcm_dpo/q_t": 0.3546501100063324, "grad_norm": 91.0616683959961, "learning_rate": 1.3973071544233218e-09, "logits/chosen": 1.7637138366699219, "logits/rejected": 1.767164945602417, "logps/chosen": -83.28409576416016, "logps/ref_chosen": -88.39617919921875, "logps/ref_rejected": -88.73035430908203, "logps/rejected": -85.14712524414062, "loss": 1.0806, "margin_dpo/margin_mean": 1.5288479328155518, "margin_dpo/margin_std": 2.504338264465332, "step": 642 }, { "epoch": 0.9720332577475435, "fcm_dpo/beta": 0.5468122363090515, "fcm_dpo/delta": 0.0597110316157341, "fcm_dpo/margin": 1.4491500854492188, "fcm_dpo/q_t": 0.3618500232696533, "grad_norm": 105.30182647705078, "learning_rate": 1.261184375888541e-09, "logits/chosen": 1.5826127529144287, "logits/rejected": 1.3515585660934448, "logps/chosen": -79.6438217163086, "logps/ref_chosen": -84.83087158203125, "logps/ref_rejected": -105.31499481201172, "logps/rejected": -101.57708740234375, "loss": 1.1106, "margin_dpo/margin_mean": 1.4491502046585083, "margin_dpo/margin_std": 2.5398941040039062, "step": 643 }, { "epoch": 0.9735449735449735, "fcm_dpo/beta": 0.5619449615478516, "fcm_dpo/delta": 0.0273895226418972, "fcm_dpo/margin": 1.465846300125122, "fcm_dpo/q_t": 0.351859986782074, "grad_norm": 76.38080596923828, "learning_rate": 1.1320193567288527e-09, "logits/chosen": 1.936923861503601, "logits/rejected": 1.8447864055633545, "logps/chosen": -60.06108856201172, "logps/ref_chosen": -65.11122131347656, "logps/ref_rejected": -80.4027328491211, "logps/rejected": -76.81845092773438, "loss": 1.0602, "margin_dpo/margin_mean": 1.4658467769622803, "margin_dpo/margin_std": 2.313875675201416, "step": 644 }, { "epoch": 0.9750566893424036, "fcm_dpo/beta": 0.5310744047164917, "fcm_dpo/delta": -0.26899102330207825, "fcm_dpo/margin": 2.0425100326538086, "fcm_dpo/q_t": 0.3078554570674896, "grad_norm": 77.42607879638672, "learning_rate": 1.0098157099674987e-09, "logits/chosen": 1.8113110065460205, "logits/rejected": 1.7860450744628906, "logps/chosen": -71.65998840332031, "logps/ref_chosen": -76.93634033203125, "logps/ref_rejected": -89.14311981201172, "logps/rejected": -85.9092788696289, "loss": 0.8496, "margin_dpo/margin_mean": 2.0425095558166504, "margin_dpo/margin_std": 2.3543620109558105, "step": 645 }, { "epoch": 0.9765684051398337, "fcm_dpo/beta": 0.526309609413147, "fcm_dpo/delta": -0.043940138071775436, "fcm_dpo/margin": 1.6911022663116455, "fcm_dpo/q_t": 0.33978480100631714, "grad_norm": 81.16790008544922, "learning_rate": 8.945768539031783e-10, "logits/chosen": 1.8826818466186523, "logits/rejected": 1.8223220109939575, "logps/chosen": -73.10558319091797, "logps/ref_chosen": -77.69122314453125, "logps/ref_rejected": -98.14374542236328, "logps/rejected": -95.24920654296875, "loss": 1.0431, "margin_dpo/margin_mean": 1.691102147102356, "margin_dpo/margin_std": 2.6934101581573486, "step": 646 }, { "epoch": 0.9780801209372638, "fcm_dpo/beta": 0.5049861669540405, "fcm_dpo/delta": -0.2970149517059326, "fcm_dpo/margin": 2.2074761390686035, "fcm_dpo/q_t": 0.30400532484054565, "grad_norm": 72.28752899169922, "learning_rate": 7.863060120144316e-10, "logits/chosen": 1.770219326019287, "logits/rejected": 1.6623930931091309, "logps/chosen": -78.85108947753906, "logps/ref_chosen": -83.79997253417969, "logps/ref_rejected": -116.81965637207031, "logps/rejected": -114.0782470703125, "loss": 0.8414, "margin_dpo/margin_mean": 2.2074756622314453, "margin_dpo/margin_std": 2.673130989074707, "step": 647 }, { "epoch": 0.9795918367346939, "fcm_dpo/beta": 0.4951819181442261, "fcm_dpo/delta": 0.001391381025314331, "fcm_dpo/margin": 1.7139298915863037, "fcm_dpo/q_t": 0.3343745470046997, "grad_norm": 75.23444366455078, "learning_rate": 6.850062128694045e-10, "logits/chosen": 1.7372221946716309, "logits/rejected": 1.5879833698272705, "logps/chosen": -81.30325317382812, "logps/ref_chosen": -85.9629898071289, "logps/ref_rejected": -101.36552429199219, "logps/rejected": -98.41972351074219, "loss": 0.9327, "margin_dpo/margin_mean": 1.713930606842041, "margin_dpo/margin_std": 2.1953155994415283, "step": 648 }, { "epoch": 0.981103552532124, "fcm_dpo/beta": 0.4961739182472229, "fcm_dpo/delta": 0.011451632715761662, "fcm_dpo/margin": 1.6918838024139404, "fcm_dpo/q_t": 0.34832143783569336, "grad_norm": 75.87056732177734, "learning_rate": 5.906802900412788e-10, "logits/chosen": 1.8142719268798828, "logits/rejected": 1.7031172513961792, "logps/chosen": -63.719032287597656, "logps/ref_chosen": -68.64892578125, "logps/ref_rejected": -89.84898376464844, "logps/rejected": -86.61097717285156, "loss": 1.0822, "margin_dpo/margin_mean": 1.6918833255767822, "margin_dpo/margin_std": 2.7808849811553955, "step": 649 }, { "epoch": 0.982615268329554, "fcm_dpo/beta": 0.49525290727615356, "fcm_dpo/delta": -0.05411721393465996, "fcm_dpo/margin": 1.8146793842315674, "fcm_dpo/q_t": 0.32556748390197754, "grad_norm": 70.36974334716797, "learning_rate": 5.033308820289184e-10, "logits/chosen": 2.1288902759552, "logits/rejected": 1.9611902236938477, "logps/chosen": -67.82957458496094, "logps/ref_chosen": -72.97265625, "logps/ref_rejected": -93.04617309570312, "logps/rejected": -89.7177734375, "loss": 0.9211, "margin_dpo/margin_mean": 1.8146789073944092, "margin_dpo/margin_std": 2.226635694503784, "step": 650 }, { "epoch": 0.9841269841269841, "fcm_dpo/beta": 0.5010315179824829, "fcm_dpo/delta": 0.09419667720794678, "fcm_dpo/margin": 1.5195380449295044, "fcm_dpo/q_t": 0.3595953583717346, "grad_norm": 71.67884826660156, "learning_rate": 4.2296043218295606e-10, "logits/chosen": 1.8622299432754517, "logits/rejected": 1.676352858543396, "logps/chosen": -65.97269439697266, "logps/ref_chosen": -71.05281066894531, "logps/ref_rejected": -94.23469543457031, "logps/rejected": -90.67411804199219, "loss": 1.0445, "margin_dpo/margin_mean": 1.5195378065109253, "margin_dpo/margin_std": 2.327319622039795, "step": 651 }, { "epoch": 0.9856386999244142, "fcm_dpo/beta": 0.5064749121665955, "fcm_dpo/delta": 0.09547622501850128, "fcm_dpo/margin": 1.5042533874511719, "fcm_dpo/q_t": 0.3629649877548218, "grad_norm": 83.33802795410156, "learning_rate": 3.4957118863768176e-10, "logits/chosen": 1.598526954650879, "logits/rejected": 1.5740216970443726, "logps/chosen": -75.08648681640625, "logps/ref_chosen": -80.06941223144531, "logps/ref_rejected": -99.22327423095703, "logps/rejected": -95.74461364746094, "loss": 1.1239, "margin_dpo/margin_mean": 1.5042537450790405, "margin_dpo/margin_std": 2.696470260620117, "step": 652 }, { "epoch": 0.9871504157218443, "fcm_dpo/beta": 0.5104721784591675, "fcm_dpo/delta": 0.024954237043857574, "fcm_dpo/margin": 1.6202447414398193, "fcm_dpo/q_t": 0.3395509421825409, "grad_norm": 75.06707763671875, "learning_rate": 2.831652042480093e-10, "logits/chosen": 1.9889471530914307, "logits/rejected": 1.885004997253418, "logps/chosen": -75.12460327148438, "logps/ref_chosen": -80.35701751708984, "logps/ref_rejected": -92.1295394897461, "logps/rejected": -88.51736450195312, "loss": 0.9896, "margin_dpo/margin_mean": 1.6202449798583984, "margin_dpo/margin_std": 2.2250888347625732, "step": 653 }, { "epoch": 0.9886621315192744, "fcm_dpo/beta": 0.5066887140274048, "fcm_dpo/delta": 0.04392646253108978, "fcm_dpo/margin": 1.5880036354064941, "fcm_dpo/q_t": 0.362444132566452, "grad_norm": 82.49794006347656, "learning_rate": 2.2374433653205016e-10, "logits/chosen": 1.6642907857894897, "logits/rejected": 1.4560799598693848, "logps/chosen": -73.12020874023438, "logps/ref_chosen": -78.06475830078125, "logps/ref_rejected": -106.05763244628906, "logps/rejected": -102.70108032226562, "loss": 1.0555, "margin_dpo/margin_mean": 1.588003158569336, "margin_dpo/margin_std": 2.5010390281677246, "step": 654 }, { "epoch": 0.9901738473167044, "fcm_dpo/beta": 0.5086958408355713, "fcm_dpo/delta": -0.013962805271148682, "fcm_dpo/margin": 1.6889777183532715, "fcm_dpo/q_t": 0.3477616310119629, "grad_norm": 68.81745147705078, "learning_rate": 1.7131024761923852e-10, "logits/chosen": 1.5516088008880615, "logits/rejected": 1.3263245820999146, "logps/chosen": -61.7364501953125, "logps/ref_chosen": -67.03407287597656, "logps/ref_rejected": -97.57197570800781, "logps/rejected": -93.96332550048828, "loss": 0.9777, "margin_dpo/margin_mean": 1.6889780759811401, "margin_dpo/margin_std": 2.4260029792785645, "step": 655 }, { "epoch": 0.9916855631141346, "fcm_dpo/beta": 0.507418155670166, "fcm_dpo/delta": -0.06031068414449692, "fcm_dpo/margin": 1.7811647653579712, "fcm_dpo/q_t": 0.3425629734992981, "grad_norm": 117.67913055419922, "learning_rate": 1.2586440420372934e-10, "logits/chosen": 1.5796771049499512, "logits/rejected": 1.482344388961792, "logps/chosen": -84.55900573730469, "logps/ref_chosen": -89.31463623046875, "logps/ref_rejected": -105.14315795898438, "logps/rejected": -102.168701171875, "loss": 1.0279, "margin_dpo/margin_mean": 1.7811646461486816, "margin_dpo/margin_std": 2.7895307540893555, "step": 656 }, { "epoch": 0.9931972789115646, "fcm_dpo/beta": 0.4933139383792877, "fcm_dpo/delta": -0.18509797751903534, "fcm_dpo/margin": 2.0582315921783447, "fcm_dpo/q_t": 0.3128923177719116, "grad_norm": 69.68338012695312, "learning_rate": 8.740807750345913e-11, "logits/chosen": 1.9346446990966797, "logits/rejected": 1.777329921722412, "logps/chosen": -59.56598663330078, "logps/ref_chosen": -64.89747619628906, "logps/ref_rejected": -94.21998596191406, "logps/rejected": -90.94673156738281, "loss": 0.8776, "margin_dpo/margin_mean": 2.058232545852661, "margin_dpo/margin_std": 2.4367268085479736, "step": 657 }, { "epoch": 0.9947089947089947, "fcm_dpo/beta": 0.5024198293685913, "fcm_dpo/delta": 0.06533484160900116, "fcm_dpo/margin": 1.5623362064361572, "fcm_dpo/q_t": 0.36561936140060425, "grad_norm": 81.33644104003906, "learning_rate": 5.594234322453539e-11, "logits/chosen": 1.726067304611206, "logits/rejected": 1.6502690315246582, "logps/chosen": -76.22871398925781, "logps/ref_chosen": -81.16606140136719, "logps/ref_rejected": -97.72825622558594, "logps/rejected": -94.35324096679688, "loss": 1.1139, "margin_dpo/margin_mean": 1.5623366832733154, "margin_dpo/margin_std": 2.743770122528076, "step": 658 }, { "epoch": 0.9962207105064248, "fcm_dpo/beta": 0.5083540678024292, "fcm_dpo/delta": 0.24330151081085205, "fcm_dpo/margin": 1.2222051620483398, "fcm_dpo/q_t": 0.3878973424434662, "grad_norm": 78.94991302490234, "learning_rate": 3.146808153123293e-11, "logits/chosen": 1.8459105491638184, "logits/rejected": 1.6787996292114258, "logps/chosen": -69.77076721191406, "logps/ref_chosen": -74.42193603515625, "logps/ref_rejected": -87.81561279296875, "logps/rejected": -84.38665008544922, "loss": 1.2148, "margin_dpo/margin_mean": 1.222205400466919, "margin_dpo/margin_std": 2.527038097381592, "step": 659 }, { "epoch": 0.9977324263038548, "fcm_dpo/beta": 0.5075035095214844, "fcm_dpo/delta": -0.19134506583213806, "fcm_dpo/margin": 2.0135555267333984, "fcm_dpo/q_t": 0.3037455677986145, "grad_norm": 70.64783477783203, "learning_rate": 1.3985977021235829e-11, "logits/chosen": 1.8135673999786377, "logits/rejected": 1.689598560333252, "logps/chosen": -66.68452453613281, "logps/ref_chosen": -71.68511962890625, "logps/ref_rejected": -98.01472473144531, "logps/rejected": -95.0276870727539, "loss": 0.8403, "margin_dpo/margin_mean": 2.0135550498962402, "margin_dpo/margin_std": 2.1681275367736816, "step": 660 }, { "epoch": 0.999244142101285, "fcm_dpo/beta": 0.5117430686950684, "fcm_dpo/delta": 0.07624995708465576, "fcm_dpo/margin": 1.5202569961547852, "fcm_dpo/q_t": 0.3583983778953552, "grad_norm": 78.20012664794922, "learning_rate": 3.4965187065971735e-12, "logits/chosen": 1.612052083015442, "logits/rejected": 1.4620475769042969, "logps/chosen": -73.8533935546875, "logps/ref_chosen": -78.35111999511719, "logps/ref_rejected": -99.47113037109375, "logps/rejected": -96.49366760253906, "loss": 1.103, "margin_dpo/margin_mean": 1.5202577114105225, "margin_dpo/margin_std": 2.647984027862549, "step": 661 }, { "epoch": 0.999244142101285, "step": 661, "total_flos": 0.0, "train_loss": 1.1363916052632181, "train_runtime": 2119.1053, "train_samples_per_second": 19.978, "train_steps_per_second": 0.312 } ], "logging_steps": 1, "max_steps": 661, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }