{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.999244142101285, "eval_steps": 200, "global_step": 661, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0015117157974300832, "fcm_dpo/beta": 0.009999999776482582, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.0036174654960632324, "fcm_dpo/q_t": 0.5000090599060059, "grad_norm": 2.850151538848877, "learning_rate": 0.0, "logits/chosen": 0.12559199333190918, "logits/rejected": 0.11513248085975647, "logps/chosen": -65.17359924316406, "logps/ref_chosen": -65.2028579711914, "logps/ref_rejected": -64.80973052978516, "logps/rejected": -64.77685546875, "loss": 1.3863, "margin_dpo/margin_mean": -0.0036170482635498047, "margin_dpo/margin_std": 0.2552323341369629, "step": 1 }, { "epoch": 0.007558578987150416, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.015368208289146423, "fcm_dpo/q_t": 0.4999615550041199, "grad_norm": 2.9636518955230713, "learning_rate": 2.9850746268656714e-08, "logits/chosen": 0.08381284773349762, "logits/rejected": 0.056591667234897614, "logps/chosen": -65.78416442871094, "logps/ref_chosen": -65.79120635986328, "logps/ref_rejected": -79.74447631835938, "logps/rejected": -79.75279998779297, "loss": 1.3861, "margin_dpo/margin_mean": 0.015368461608886719, "margin_dpo/margin_std": 0.30196240544319153, "step": 5 }, { "epoch": 0.015117157974300832, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.017599213868379593, "fcm_dpo/q_t": 0.500044047832489, "grad_norm": 2.9695703983306885, "learning_rate": 6.71641791044776e-08, "logits/chosen": 0.10358067601919174, "logits/rejected": 0.06364428251981735, "logps/chosen": -57.39263153076172, "logps/ref_chosen": -57.38689041137695, "logps/ref_rejected": -80.92173767089844, "logps/rejected": -80.90988159179688, "loss": 1.3865, "margin_dpo/margin_mean": -0.01759929582476616, "margin_dpo/margin_std": 0.3119713366031647, "step": 10 }, { "epoch": 0.022675736961451247, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.009314382448792458, "fcm_dpo/q_t": 0.4999767243862152, "grad_norm": 3.320962905883789, "learning_rate": 1.044776119402985e-07, "logits/chosen": 0.07827206701040268, "logits/rejected": 0.04801332950592041, "logps/chosen": -61.75555419921875, "logps/ref_chosen": -61.75988006591797, "logps/ref_rejected": -80.3942642211914, "logps/rejected": -80.39925384521484, "loss": 1.3862, "margin_dpo/margin_mean": 0.00931442342698574, "margin_dpo/margin_std": 0.3290034830570221, "step": 15 }, { "epoch": 0.030234315948601664, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.007567483000457287, "fcm_dpo/q_t": 0.4999810755252838, "grad_norm": 2.972181797027588, "learning_rate": 1.4179104477611938e-07, "logits/chosen": 0.08886369317770004, "logits/rejected": 0.06055384874343872, "logps/chosen": -56.6132698059082, "logps/ref_chosen": -56.6275749206543, "logps/ref_rejected": -78.54231262207031, "logps/rejected": -78.53557586669922, "loss": 1.3862, "margin_dpo/margin_mean": 0.007567489054054022, "margin_dpo/margin_std": 0.30747541785240173, "step": 20 }, { "epoch": 0.03779289493575208, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.021364277228713036, "fcm_dpo/q_t": 0.49994659423828125, "grad_norm": 2.9778642654418945, "learning_rate": 1.7910447761194027e-07, "logits/chosen": 0.10495474189519882, "logits/rejected": 0.07259530574083328, "logps/chosen": -61.929527282714844, "logps/ref_chosen": -61.922279357910156, "logps/ref_rejected": -83.95155334472656, "logps/rejected": -83.98015594482422, "loss": 1.3861, "margin_dpo/margin_mean": 0.02136421762406826, "margin_dpo/margin_std": 0.2991010844707489, "step": 25 }, { "epoch": 0.045351473922902494, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.004342102911323309, "fcm_dpo/q_t": 0.4999891221523285, "grad_norm": 3.0755813121795654, "learning_rate": 2.1641791044776117e-07, "logits/chosen": 0.09887897223234177, "logits/rejected": 0.07384434342384338, "logps/chosen": -61.931427001953125, "logps/ref_chosen": -61.90684127807617, "logps/ref_rejected": -79.56486511230469, "logps/rejected": -79.59378814697266, "loss": 1.3863, "margin_dpo/margin_mean": 0.004342180676758289, "margin_dpo/margin_std": 0.31154924631118774, "step": 30 }, { "epoch": 0.05291005291005291, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.04645770788192749, "fcm_dpo/q_t": 0.49988383054733276, "grad_norm": 2.7926185131073, "learning_rate": 2.537313432835821e-07, "logits/chosen": 0.056805629283189774, "logits/rejected": 0.03099716268479824, "logps/chosen": -64.05213165283203, "logps/ref_chosen": -64.01432800292969, "logps/ref_rejected": -81.33033752441406, "logps/rejected": -81.41459655761719, "loss": 1.3858, "margin_dpo/margin_mean": 0.04645807296037674, "margin_dpo/margin_std": 0.3312261402606964, "step": 35 }, { "epoch": 0.06046863189720333, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.012222861871123314, "fcm_dpo/q_t": 0.4999694228172302, "grad_norm": 3.1424221992492676, "learning_rate": 2.9104477611940296e-07, "logits/chosen": 0.10701529681682587, "logits/rejected": 0.061525583267211914, "logps/chosen": -60.28235626220703, "logps/ref_chosen": -60.1998176574707, "logps/ref_rejected": -85.63372039794922, "logps/rejected": -85.72847747802734, "loss": 1.3862, "margin_dpo/margin_mean": 0.012222832068800926, "margin_dpo/margin_std": 0.3903924524784088, "step": 40 }, { "epoch": 0.06802721088435375, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.049396924674510956, "fcm_dpo/q_t": 0.4998764991760254, "grad_norm": 3.186418056488037, "learning_rate": 3.2835820895522385e-07, "logits/chosen": 0.06446581333875656, "logits/rejected": 0.03697461634874344, "logps/chosen": -66.853515625, "logps/ref_chosen": -66.71932220458984, "logps/ref_rejected": -84.73368835449219, "logps/rejected": -84.91728210449219, "loss": 1.3858, "margin_dpo/margin_mean": 0.049397267401218414, "margin_dpo/margin_std": 0.4028749465942383, "step": 45 }, { "epoch": 0.07558578987150416, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.11318810284137726, "fcm_dpo/q_t": 0.4997170567512512, "grad_norm": 2.841273069381714, "learning_rate": 3.6567164179104475e-07, "logits/chosen": 0.07657527923583984, "logits/rejected": 0.04275421425700188, "logps/chosen": -56.79387283325195, "logps/ref_chosen": -56.59545135498047, "logps/ref_rejected": -71.17185974121094, "logps/rejected": -71.48346710205078, "loss": 1.3852, "margin_dpo/margin_mean": 0.11318818479776382, "margin_dpo/margin_std": 0.4424575865268707, "step": 50 }, { "epoch": 0.08314436885865457, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.19668380916118622, "fcm_dpo/q_t": 0.4995082914829254, "grad_norm": 3.2169511318206787, "learning_rate": 4.0298507462686564e-07, "logits/chosen": 0.06426317989826202, "logits/rejected": 0.026701394468545914, "logps/chosen": -58.7703971862793, "logps/ref_chosen": -58.43064498901367, "logps/ref_rejected": -81.11677551269531, "logps/rejected": -81.6532211303711, "loss": 1.3843, "margin_dpo/margin_mean": 0.19668370485305786, "margin_dpo/margin_std": 0.5449806451797485, "step": 55 }, { "epoch": 0.09070294784580499, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.19983884692192078, "fcm_dpo/q_t": 0.49950042366981506, "grad_norm": 2.920549154281616, "learning_rate": 4.4029850746268654e-07, "logits/chosen": 0.10816339403390884, "logits/rejected": 0.07374849915504456, "logps/chosen": -61.71905517578125, "logps/ref_chosen": -61.1767463684082, "logps/ref_rejected": -75.71009063720703, "logps/rejected": -76.45222473144531, "loss": 1.3843, "margin_dpo/margin_mean": 0.19983868300914764, "margin_dpo/margin_std": 0.7893710136413574, "step": 60 }, { "epoch": 0.0982615268329554, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.29686489701271057, "fcm_dpo/q_t": 0.4992578625679016, "grad_norm": 3.1904947757720947, "learning_rate": 4.776119402985074e-07, "logits/chosen": 0.1440560221672058, "logits/rejected": 0.11248280853033066, "logps/chosen": -61.19800567626953, "logps/ref_chosen": -60.42144012451172, "logps/ref_rejected": -77.3677749633789, "logps/rejected": -78.44120025634766, "loss": 1.3834, "margin_dpo/margin_mean": 0.29686498641967773, "margin_dpo/margin_std": 1.026890754699707, "step": 65 }, { "epoch": 0.10582010582010581, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.3897508978843689, "fcm_dpo/q_t": 0.4990256726741791, "grad_norm": 2.9328501224517822, "learning_rate": 4.999860140229787e-07, "logits/chosen": 0.10854315757751465, "logits/rejected": 0.07236559689044952, "logps/chosen": -69.1954574584961, "logps/ref_chosen": -68.04537200927734, "logps/ref_rejected": -83.14714050292969, "logps/rejected": -84.68696594238281, "loss": 1.3824, "margin_dpo/margin_mean": 0.3897508978843689, "margin_dpo/margin_std": 1.3252379894256592, "step": 70 }, { "epoch": 0.11337868480725624, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.5443285703659058, "fcm_dpo/q_t": 0.4986393451690674, "grad_norm": 2.8421285152435303, "learning_rate": 4.998286897523808e-07, "logits/chosen": 0.11565772444009781, "logits/rejected": 0.07986008375883102, "logps/chosen": -58.929412841796875, "logps/ref_chosen": -57.3649787902832, "logps/ref_rejected": -73.14057159423828, "logps/rejected": -75.24932861328125, "loss": 1.3809, "margin_dpo/margin_mean": 0.5443285703659058, "margin_dpo/margin_std": 1.6223704814910889, "step": 75 }, { "epoch": 0.12093726379440665, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.8009698987007141, "fcm_dpo/q_t": 0.4979979991912842, "grad_norm": 2.954160690307617, "learning_rate": 4.994966691179711e-07, "logits/chosen": 0.15183034539222717, "logits/rejected": 0.10617075115442276, "logps/chosen": -60.83113479614258, "logps/ref_chosen": -58.77534103393555, "logps/ref_rejected": -79.07672119140625, "logps/rejected": -81.93347930908203, "loss": 1.3784, "margin_dpo/margin_mean": 0.8009698987007141, "margin_dpo/margin_std": 2.1611573696136475, "step": 80 }, { "epoch": 0.12849584278155707, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.2792611122131348, "fcm_dpo/q_t": 0.49680256843566895, "grad_norm": 3.1464085578918457, "learning_rate": 4.989901842900325e-07, "logits/chosen": 0.15984781086444855, "logits/rejected": 0.11359409987926483, "logps/chosen": -60.38011932373047, "logps/ref_chosen": -57.70839309692383, "logps/ref_rejected": -76.26394653320312, "logps/rejected": -80.21492767333984, "loss": 1.3737, "margin_dpo/margin_mean": 1.2792608737945557, "margin_dpo/margin_std": 2.5403237342834473, "step": 85 }, { "epoch": 0.1360544217687075, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.5232597589492798, "fcm_dpo/q_t": 0.4961939752101898, "grad_norm": 3.333669662475586, "learning_rate": 4.983095894354857e-07, "logits/chosen": 0.2166169136762619, "logits/rejected": 0.1685468852519989, "logps/chosen": -62.62725067138672, "logps/ref_chosen": -58.71812057495117, "logps/ref_rejected": -82.2930908203125, "logps/rejected": -87.7254867553711, "loss": 1.3715, "margin_dpo/margin_mean": 1.5232598781585693, "margin_dpo/margin_std": 3.794527053833008, "step": 90 }, { "epoch": 0.1436130007558579, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 2.0687878131866455, "fcm_dpo/q_t": 0.49483543634414673, "grad_norm": 3.5051140785217285, "learning_rate": 4.974553604702332e-07, "logits/chosen": 0.22696343064308167, "logits/rejected": 0.1834408938884735, "logps/chosen": -59.80878829956055, "logps/ref_chosen": -54.887908935546875, "logps/ref_rejected": -76.79985046386719, "logps/rejected": -83.78950500488281, "loss": 1.3665, "margin_dpo/margin_mean": 2.0687873363494873, "margin_dpo/margin_std": 5.531675338745117, "step": 95 }, { "epoch": 0.15117157974300832, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.6540107727050781, "fcm_dpo/q_t": 0.49587664008140564, "grad_norm": 3.7864036560058594, "learning_rate": 4.964280947263676e-07, "logits/chosen": 0.2681678235530853, "logits/rejected": 0.23597940802574158, "logps/chosen": -72.77100372314453, "logps/ref_chosen": -65.1898422241211, "logps/ref_rejected": -83.39742279052734, "logps/rejected": -92.63258361816406, "loss": 1.3719, "margin_dpo/margin_mean": 1.6540113687515259, "margin_dpo/margin_std": 8.77057933807373, "step": 100 }, { "epoch": 0.15873015873015872, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 2.923435688018799, "fcm_dpo/q_t": 0.49274763464927673, "grad_norm": 4.0385565757751465, "learning_rate": 4.952285105344791e-07, "logits/chosen": 0.3018716275691986, "logits/rejected": 0.25165122747421265, "logps/chosen": -72.08647155761719, "logps/ref_chosen": -63.611778259277344, "logps/ref_rejected": -81.8642578125, "logps/rejected": -93.26237487792969, "loss": 1.3604, "margin_dpo/margin_mean": 2.9234354496002197, "margin_dpo/margin_std": 10.657812118530273, "step": 105 }, { "epoch": 0.16628873771730915, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 2.3822619915008545, "fcm_dpo/q_t": 0.4940711557865143, "grad_norm": 4.880163669586182, "learning_rate": 4.938574467213517e-07, "logits/chosen": 0.27982911467552185, "logits/rejected": 0.25695186853408813, "logps/chosen": -82.95537567138672, "logps/ref_chosen": -70.61798858642578, "logps/ref_rejected": -80.55892181396484, "logps/rejected": -95.27857971191406, "loss": 1.3683, "margin_dpo/margin_mean": 2.3822619915008545, "margin_dpo/margin_std": 14.694234848022461, "step": 110 }, { "epoch": 0.17384731670445955, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 4.24505615234375, "fcm_dpo/q_t": 0.4895564913749695, "grad_norm": 3.4979965686798096, "learning_rate": 4.923158620234019e-07, "logits/chosen": 0.3670777380466461, "logits/rejected": 0.31523239612579346, "logps/chosen": -73.29847717285156, "logps/ref_chosen": -60.36003494262695, "logps/ref_rejected": -83.49537658691406, "logps/rejected": -100.67887878417969, "loss": 1.3508, "margin_dpo/margin_mean": 4.24505615234375, "margin_dpo/margin_std": 15.931065559387207, "step": 115 }, { "epoch": 0.18140589569160998, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 5.727739334106445, "fcm_dpo/q_t": 0.48592695593833923, "grad_norm": 4.6423659324646, "learning_rate": 4.906048344162676e-07, "logits/chosen": 0.40479379892349243, "logits/rejected": 0.3574323058128357, "logps/chosen": -72.22782897949219, "logps/ref_chosen": -57.185150146484375, "logps/ref_rejected": -76.90118408203125, "logps/rejected": -97.67161560058594, "loss": 1.3385, "margin_dpo/margin_mean": 5.727739334106445, "margin_dpo/margin_std": 17.80091667175293, "step": 120 }, { "epoch": 0.1889644746787604, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 8.763603210449219, "fcm_dpo/q_t": 0.47878074645996094, "grad_norm": 4.912358283996582, "learning_rate": 4.887255603610184e-07, "logits/chosen": 0.46570539474487305, "logits/rejected": 0.412194162607193, "logps/chosen": -80.9414291381836, "logps/ref_chosen": -60.63164138793945, "logps/ref_rejected": -87.692138671875, "logps/rejected": -116.76551818847656, "loss": 1.3224, "margin_dpo/margin_mean": 8.763603210449219, "margin_dpo/margin_std": 29.222675323486328, "step": 125 }, { "epoch": 0.1965230536659108, "fcm_dpo/beta": 0.009999998845160007, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 7.171716213226318, "fcm_dpo/q_t": 0.48291224241256714, "grad_norm": 5.604545593261719, "learning_rate": 4.866793539675126e-07, "logits/chosen": 0.42708373069763184, "logits/rejected": 0.3993222713470459, "logps/chosen": -91.39217376708984, "logps/ref_chosen": -65.96144104003906, "logps/ref_rejected": -77.07868194580078, "logps/rejected": -109.6811294555664, "loss": 1.3402, "margin_dpo/margin_mean": 7.171716213226318, "margin_dpo/margin_std": 29.880590438842773, "step": 130 }, { "epoch": 0.20408163265306123, "fcm_dpo/beta": 0.010768004693090916, "fcm_dpo/delta": 0.07399419695138931, "fcm_dpo/margin": 10.805874824523926, "fcm_dpo/q_t": 0.47332343459129333, "grad_norm": 4.438642978668213, "learning_rate": 4.844676460754862e-07, "logits/chosen": 0.5009486079216003, "logits/rejected": 0.4642602801322937, "logps/chosen": -85.66596221923828, "logps/ref_chosen": -58.002349853515625, "logps/ref_rejected": -74.80711364746094, "logps/rejected": -113.27659606933594, "loss": 1.3098, "margin_dpo/margin_mean": 10.80587387084961, "margin_dpo/margin_std": 34.74369430541992, "step": 135 }, { "epoch": 0.21164021164021163, "fcm_dpo/beta": 0.011533305048942566, "fcm_dpo/delta": 0.06865964084863663, "fcm_dpo/margin": 11.856359481811523, "fcm_dpo/q_t": 0.4694371223449707, "grad_norm": 7.658777713775635, "learning_rate": 4.820919832540181e-07, "logits/chosen": 0.4951881468296051, "logits/rejected": 0.4550108015537262, "logps/chosen": -100.08610534667969, "logps/ref_chosen": -64.05648803710938, "logps/ref_rejected": -80.10523986816406, "logps/rejected": -127.9912109375, "loss": 1.3239, "margin_dpo/margin_mean": 11.856356620788574, "margin_dpo/margin_std": 46.47296142578125, "step": 140 }, { "epoch": 0.21919879062736206, "fcm_dpo/beta": 0.01241993997246027, "fcm_dpo/delta": 0.07406426966190338, "fcm_dpo/margin": 8.729610443115234, "fcm_dpo/q_t": 0.47626978158950806, "grad_norm": 16.21937370300293, "learning_rate": 4.795540267200686e-07, "logits/chosen": 0.514064610004425, "logits/rejected": 0.49313417077064514, "logps/chosen": -103.54437255859375, "logps/ref_chosen": -65.76856994628906, "logps/ref_rejected": -81.22962951660156, "logps/rejected": -127.73504638671875, "loss": 1.3539, "margin_dpo/margin_mean": 8.729610443115234, "margin_dpo/margin_std": 44.03262710571289, "step": 145 }, { "epoch": 0.22675736961451248, "fcm_dpo/beta": 0.012905704788863659, "fcm_dpo/delta": 0.07531466335058212, "fcm_dpo/margin": 10.41810417175293, "fcm_dpo/q_t": 0.4676801264286041, "grad_norm": 5.872846603393555, "learning_rate": 4.768555511768486e-07, "logits/chosen": 0.5009235143661499, "logits/rejected": 0.4602130353450775, "logps/chosen": -94.07633972167969, "logps/ref_chosen": -60.346473693847656, "logps/ref_rejected": -75.12642669677734, "logps/rejected": -119.2743911743164, "loss": 1.3045, "margin_dpo/margin_mean": 10.418103218078613, "margin_dpo/margin_std": 34.75267791748047, "step": 150 }, { "epoch": 0.23431594860166288, "fcm_dpo/beta": 0.017845138907432556, "fcm_dpo/delta": 0.4511590898036957, "fcm_dpo/margin": 12.379961013793945, "fcm_dpo/q_t": 0.45373255014419556, "grad_norm": 13.452332496643066, "learning_rate": 4.7399844357283393e-07, "logits/chosen": 0.5150389671325684, "logits/rejected": 0.482626348733902, "logps/chosen": -93.2920913696289, "logps/ref_chosen": -61.901710510253906, "logps/ref_rejected": -75.51579284667969, "logps/rejected": -119.28614807128906, "loss": 1.2681, "margin_dpo/margin_mean": 12.379961013793945, "margin_dpo/margin_std": 33.46470642089844, "step": 155 }, { "epoch": 0.2418745275888133, "fcm_dpo/beta": 0.028629502281546593, "fcm_dpo/delta": 0.5467379093170166, "fcm_dpo/margin": 13.615964889526367, "fcm_dpo/q_t": 0.4253949522972107, "grad_norm": 14.785847663879395, "learning_rate": 4.7098470178228755e-07, "logits/chosen": 0.48933330178260803, "logits/rejected": 0.4520339369773865, "logps/chosen": -91.06707763671875, "logps/ref_chosen": -59.82744598388672, "logps/ref_rejected": -76.28009033203125, "logps/rejected": -121.13565826416016, "loss": 1.1965, "margin_dpo/margin_mean": 13.615964889526367, "margin_dpo/margin_std": 29.862279891967773, "step": 160 }, { "epoch": 0.2494331065759637, "fcm_dpo/beta": 0.04379943758249283, "fcm_dpo/delta": 0.27622583508491516, "fcm_dpo/margin": 16.814184188842773, "fcm_dpo/q_t": 0.36278295516967773, "grad_norm": 19.85223960876465, "learning_rate": 4.678164332082175e-07, "logits/chosen": 0.5087782144546509, "logits/rejected": 0.4584970474243164, "logps/chosen": -86.49288940429688, "logps/ref_chosen": -56.396278381347656, "logps/ref_rejected": -77.31051635742188, "logps/rejected": -124.2213134765625, "loss": 1.0917, "margin_dpo/margin_mean": 16.814186096191406, "margin_dpo/margin_std": 28.670608520507812, "step": 165 }, { "epoch": 0.25699168556311414, "fcm_dpo/beta": 0.053840864449739456, "fcm_dpo/delta": 0.203588604927063, "fcm_dpo/margin": 15.119009017944336, "fcm_dpo/q_t": 0.36196133494377136, "grad_norm": 30.58293342590332, "learning_rate": 4.6449585330874425e-07, "logits/chosen": 0.46817341446876526, "logits/rejected": 0.4275393486022949, "logps/chosen": -88.77765655517578, "logps/ref_chosen": -62.323707580566406, "logps/ref_rejected": -78.42765808105469, "logps/rejected": -120.00062561035156, "loss": 1.1343, "margin_dpo/margin_mean": 15.119009017944336, "margin_dpo/margin_std": 27.05625343322754, "step": 170 }, { "epoch": 0.26455026455026454, "fcm_dpo/beta": 0.05749092251062393, "fcm_dpo/delta": -0.05166977643966675, "fcm_dpo/margin": 18.14907455444336, "fcm_dpo/q_t": 0.33645352721214294, "grad_norm": 27.735427856445312, "learning_rate": 4.6102528404790965e-07, "logits/chosen": 0.49779874086380005, "logits/rejected": 0.4384356141090393, "logps/chosen": -84.31999206542969, "logps/ref_chosen": -60.14301681518555, "logps/ref_rejected": -82.65170288085938, "logps/rejected": -124.97774505615234, "loss": 1.0789, "margin_dpo/margin_mean": 18.14907455444336, "margin_dpo/margin_std": 29.03778648376465, "step": 175 }, { "epoch": 0.272108843537415, "fcm_dpo/beta": 0.05699415132403374, "fcm_dpo/delta": 0.09521742165088654, "fcm_dpo/margin": 16.0145206451416, "fcm_dpo/q_t": 0.34686630964279175, "grad_norm": 28.401058197021484, "learning_rate": 4.5740715227200897e-07, "logits/chosen": 0.4524223208427429, "logits/rejected": 0.4112408757209778, "logps/chosen": -86.755859375, "logps/ref_chosen": -65.02766418457031, "logps/ref_rejected": -80.62745666503906, "logps/rejected": -118.37019348144531, "loss": 1.0718, "margin_dpo/margin_mean": 16.0145206451416, "margin_dpo/margin_std": 25.418670654296875, "step": 180 }, { "epoch": 0.2796674225245654, "fcm_dpo/beta": 0.06562753766775131, "fcm_dpo/delta": 0.15315786004066467, "fcm_dpo/margin": 13.123418807983398, "fcm_dpo/q_t": 0.3597589433193207, "grad_norm": 31.76349449157715, "learning_rate": 4.5364398801258394e-07, "logits/chosen": 0.4652015268802643, "logits/rejected": 0.4133850932121277, "logps/chosen": -74.6918716430664, "logps/ref_chosen": -57.59275436401367, "logps/ref_rejected": -77.97161865234375, "logps/rejected": -108.19415283203125, "loss": 1.1242, "margin_dpo/margin_mean": 13.123418807983398, "margin_dpo/margin_std": 22.702762603759766, "step": 185 }, { "epoch": 0.2872260015117158, "fcm_dpo/beta": 0.07316488027572632, "fcm_dpo/delta": 0.03896424174308777, "fcm_dpo/margin": 13.159120559692383, "fcm_dpo/q_t": 0.3480309545993805, "grad_norm": 35.800045013427734, "learning_rate": 4.4973842271726024e-07, "logits/chosen": 0.4268653392791748, "logits/rejected": 0.3715541362762451, "logps/chosen": -83.3046875, "logps/ref_chosen": -69.40254974365234, "logps/ref_rejected": -87.62089538574219, "logps/rejected": -114.6821517944336, "loss": 1.0661, "margin_dpo/margin_mean": 13.159120559692383, "margin_dpo/margin_std": 21.489017486572266, "step": 190 }, { "epoch": 0.2947845804988662, "fcm_dpo/beta": 0.08043137937784195, "fcm_dpo/delta": 0.10039126873016357, "fcm_dpo/margin": 11.223628997802734, "fcm_dpo/q_t": 0.35878369212150574, "grad_norm": 38.323482513427734, "learning_rate": 4.4569318740967043e-07, "logits/chosen": 0.4170468747615814, "logits/rejected": 0.39506852626800537, "logps/chosen": -78.15182495117188, "logps/ref_chosen": -63.38518524169922, "logps/ref_rejected": -72.65580749511719, "logps/rejected": -98.64608001708984, "loss": 1.1468, "margin_dpo/margin_mean": 11.223628044128418, "margin_dpo/margin_std": 20.275413513183594, "step": 195 }, { "epoch": 0.30234315948601664, "fcm_dpo/beta": 0.08364946395158768, "fcm_dpo/delta": -0.03751251846551895, "fcm_dpo/margin": 12.287755966186523, "fcm_dpo/q_t": 0.33327925205230713, "grad_norm": 29.11920738220215, "learning_rate": 4.415111107797445e-07, "logits/chosen": 0.44890865683555603, "logits/rejected": 0.39030200242996216, "logps/chosen": -70.07976531982422, "logps/ref_chosen": -57.999359130859375, "logps/ref_rejected": -79.5167007446289, "logps/rejected": -103.8848648071289, "loss": 1.0502, "margin_dpo/margin_mean": 12.28775691986084, "margin_dpo/margin_std": 18.79964828491211, "step": 200 }, { "epoch": 0.30234315948601664, "eval_fcm_dpo/beta": 0.09359671175479889, "eval_fcm_dpo/delta": 0.018532773479819298, "eval_fcm_dpo/margin": 10.367452621459961, "eval_fcm_dpo/q_t": 0.35254982113838196, "eval_logits/chosen": 0.4369470477104187, "eval_logits/rejected": 0.38747942447662354, "eval_logps/chosen": -87.90721893310547, "eval_logps/ref_chosen": -75.86933135986328, "eval_logps/ref_rejected": -80.85771942138672, "eval_logps/rejected": -103.26305389404297, "eval_loss": 0.571725070476532, "eval_margin_dpo/margin_mean": 10.367453575134277, "eval_margin_dpo/margin_std": 18.42043113708496, "eval_runtime": 38.6614, "eval_samples_per_second": 59.568, "eval_steps_per_second": 1.862, "step": 200 }, { "epoch": 0.30990173847316704, "fcm_dpo/beta": 0.09407475590705872, "fcm_dpo/delta": 0.060309164226055145, "fcm_dpo/margin": 9.996593475341797, "fcm_dpo/q_t": 0.35484787821769714, "grad_norm": 45.5085563659668, "learning_rate": 4.3719511720570814e-07, "logits/chosen": 0.4219132363796234, "logits/rejected": 0.35513609647750854, "logps/chosen": -71.04405975341797, "logps/ref_chosen": -58.64111328125, "logps/ref_rejected": -84.33369445800781, "logps/rejected": -106.73323822021484, "loss": 1.1443, "margin_dpo/margin_mean": 9.99659252166748, "margin_dpo/margin_std": 18.014066696166992, "step": 205 }, { "epoch": 0.31746031746031744, "fcm_dpo/beta": 0.09743638336658478, "fcm_dpo/delta": -0.04126477986574173, "fcm_dpo/margin": 10.603793144226074, "fcm_dpo/q_t": 0.34100794792175293, "grad_norm": 29.75200080871582, "learning_rate": 4.327482247091679e-07, "logits/chosen": 0.39647018909454346, "logits/rejected": 0.3432803452014923, "logps/chosen": -78.04428100585938, "logps/ref_chosen": -65.22540283203125, "logps/ref_rejected": -84.33940887451172, "logps/rejected": -107.76206970214844, "loss": 1.0715, "margin_dpo/margin_mean": 10.60379409790039, "margin_dpo/margin_std": 16.901790618896484, "step": 210 }, { "epoch": 0.3250188964474679, "fcm_dpo/beta": 0.09210662543773651, "fcm_dpo/delta": -0.050839781761169434, "fcm_dpo/margin": 11.34312629699707, "fcm_dpo/q_t": 0.3336792588233948, "grad_norm": 30.288963317871094, "learning_rate": 4.281735428447157e-07, "logits/chosen": 0.37580037117004395, "logits/rejected": 0.31153884530067444, "logps/chosen": -72.8878173828125, "logps/ref_chosen": -61.34074020385742, "logps/ref_rejected": -85.00725555419922, "logps/rejected": -107.89747619628906, "loss": 1.0263, "margin_dpo/margin_mean": 11.343125343322754, "margin_dpo/margin_std": 17.116981506347656, "step": 215 }, { "epoch": 0.3325774754346183, "fcm_dpo/beta": 0.08995531499385834, "fcm_dpo/delta": 0.10806653648614883, "fcm_dpo/margin": 10.049107551574707, "fcm_dpo/q_t": 0.34747129678726196, "grad_norm": 29.705045700073242, "learning_rate": 4.234742705255272e-07, "logits/chosen": 0.3524443507194519, "logits/rejected": 0.2914368212223053, "logps/chosen": -74.65157318115234, "logps/ref_chosen": -62.409584045410156, "logps/ref_rejected": -81.9083023071289, "logps/rejected": -104.19940185546875, "loss": 1.0618, "margin_dpo/margin_mean": 10.049107551574707, "margin_dpo/margin_std": 15.988082885742188, "step": 220 }, { "epoch": 0.3401360544217687, "fcm_dpo/beta": 0.0927683562040329, "fcm_dpo/delta": -0.03574846684932709, "fcm_dpo/margin": 11.09398078918457, "fcm_dpo/q_t": 0.3298317790031433, "grad_norm": 42.090301513671875, "learning_rate": 4.186536937864752e-07, "logits/chosen": 0.41360267996788025, "logits/rejected": 0.34270113706588745, "logps/chosen": -75.1723403930664, "logps/ref_chosen": -63.19435501098633, "logps/ref_rejected": -94.3624038696289, "logps/rejected": -117.43436431884766, "loss": 0.9653, "margin_dpo/margin_mean": 11.09398078918457, "margin_dpo/margin_std": 15.557014465332031, "step": 225 }, { "epoch": 0.3476946334089191, "fcm_dpo/beta": 0.0995025485754013, "fcm_dpo/delta": 0.0801922157406807, "fcm_dpo/margin": 9.299490928649902, "fcm_dpo/q_t": 0.3476495146751404, "grad_norm": 41.26875305175781, "learning_rate": 4.137151834863213e-07, "logits/chosen": 0.4226433336734772, "logits/rejected": 0.37348127365112305, "logps/chosen": -67.02788543701172, "logps/ref_chosen": -55.014076232910156, "logps/ref_rejected": -72.50662994384766, "logps/rejected": -93.8199234008789, "loss": 1.0886, "margin_dpo/margin_mean": 9.299490928649902, "margin_dpo/margin_std": 15.479217529296875, "step": 230 }, { "epoch": 0.35525321239606955, "fcm_dpo/beta": 0.10012258589267731, "fcm_dpo/delta": -0.0002490147890057415, "fcm_dpo/margin": 9.948552131652832, "fcm_dpo/q_t": 0.3335891366004944, "grad_norm": 31.21560287475586, "learning_rate": 4.08662192950594e-07, "logits/chosen": 0.35273051261901855, "logits/rejected": 0.3253236711025238, "logps/chosen": -77.28237915039062, "logps/ref_chosen": -64.1020278930664, "logps/ref_rejected": -73.81226348876953, "logps/rejected": -96.94117736816406, "loss": 1.0153, "margin_dpo/margin_mean": 9.948553085327148, "margin_dpo/margin_std": 14.477907180786133, "step": 235 }, { "epoch": 0.36281179138321995, "fcm_dpo/beta": 0.0998903140425682, "fcm_dpo/delta": -0.08263019472360611, "fcm_dpo/margin": 10.745096206665039, "fcm_dpo/q_t": 0.3202618360519409, "grad_norm": 25.250896453857422, "learning_rate": 4.0349825555680045e-07, "logits/chosen": 0.384564071893692, "logits/rejected": 0.33145636320114136, "logps/chosen": -81.09648132324219, "logps/ref_chosen": -66.39305877685547, "logps/ref_rejected": -88.76033020019531, "logps/rejected": -114.2088394165039, "loss": 0.9897, "margin_dpo/margin_mean": 10.745096206665039, "margin_dpo/margin_std": 14.994283676147461, "step": 240 }, { "epoch": 0.37037037037037035, "fcm_dpo/beta": 0.09539251029491425, "fcm_dpo/delta": -0.0672103613615036, "fcm_dpo/margin": 11.129631996154785, "fcm_dpo/q_t": 0.31643834710121155, "grad_norm": 51.30845642089844, "learning_rate": 3.982269822636601e-07, "logits/chosen": 0.3606599271297455, "logits/rejected": 0.3225025534629822, "logps/chosen": -81.35462951660156, "logps/ref_chosen": -67.98930358886719, "logps/ref_rejected": -77.23219299316406, "logps/rejected": -101.7271499633789, "loss": 0.9158, "margin_dpo/margin_mean": 11.129631042480469, "margin_dpo/margin_std": 13.993242263793945, "step": 245 }, { "epoch": 0.3779289493575208, "fcm_dpo/beta": 0.0966949611902237, "fcm_dpo/delta": 0.11792643368244171, "fcm_dpo/margin": 9.158844947814941, "fcm_dpo/q_t": 0.3497825860977173, "grad_norm": 29.191015243530273, "learning_rate": 3.9285205908608934e-07, "logits/chosen": 0.387145459651947, "logits/rejected": 0.3767244815826416, "logps/chosen": -89.218994140625, "logps/ref_chosen": -72.36497497558594, "logps/ref_rejected": -77.82171630859375, "logps/rejected": -103.8345947265625, "loss": 1.1125, "margin_dpo/margin_mean": 9.158845901489258, "margin_dpo/margin_std": 15.31347370147705, "step": 250 }, { "epoch": 0.3854875283446712, "fcm_dpo/beta": 0.10761729627847672, "fcm_dpo/delta": 0.08714894950389862, "fcm_dpo/margin": 8.48575496673584, "fcm_dpo/q_t": 0.35001808404922485, "grad_norm": 32.06684112548828, "learning_rate": 3.873772445177015e-07, "logits/chosen": 0.3626454174518585, "logits/rejected": 0.3155224919319153, "logps/chosen": -79.03126525878906, "logps/ref_chosen": -63.40877151489258, "logps/ref_rejected": -79.03904724121094, "logps/rejected": -103.14729309082031, "loss": 1.1116, "margin_dpo/margin_mean": 8.48575496673584, "margin_dpo/margin_std": 14.401884078979492, "step": 255 }, { "epoch": 0.3930461073318216, "fcm_dpo/beta": 0.09919991344213486, "fcm_dpo/delta": -0.1404997706413269, "fcm_dpo/margin": 11.3255033493042, "fcm_dpo/q_t": 0.31073272228240967, "grad_norm": 30.121217727661133, "learning_rate": 3.818063669026256e-07, "logits/chosen": 0.3378009796142578, "logits/rejected": 0.2627086341381073, "logps/chosen": -79.64289855957031, "logps/ref_chosen": -63.3157844543457, "logps/ref_rejected": -93.57626342773438, "logps/rejected": -121.2288818359375, "loss": 0.9261, "margin_dpo/margin_mean": 11.3255033493042, "margin_dpo/margin_std": 14.669309616088867, "step": 260 }, { "epoch": 0.40060468631897206, "fcm_dpo/beta": 0.10028767585754395, "fcm_dpo/delta": 0.0695745199918747, "fcm_dpo/margin": 9.310823440551758, "fcm_dpo/q_t": 0.34190893173217773, "grad_norm": 39.34469985961914, "learning_rate": 3.7614332175848027e-07, "logits/chosen": 0.3269875645637512, "logits/rejected": 0.27384883165359497, "logps/chosen": -82.93329620361328, "logps/ref_chosen": -66.82787322998047, "logps/ref_rejected": -79.1831283569336, "logps/rejected": -104.599365234375, "loss": 1.0288, "margin_dpo/margin_mean": 9.310823440551758, "margin_dpo/margin_std": 14.060315132141113, "step": 265 }, { "epoch": 0.40816326530612246, "fcm_dpo/beta": 0.10525654256343842, "fcm_dpo/delta": 0.06899507343769073, "fcm_dpo/margin": 8.870966911315918, "fcm_dpo/q_t": 0.33700481057167053, "grad_norm": 34.62828063964844, "learning_rate": 3.7039206905237656e-07, "logits/chosen": 0.35178321599960327, "logits/rejected": 0.3009414076805115, "logps/chosen": -78.264404296875, "logps/ref_chosen": -63.54209518432617, "logps/ref_rejected": -78.09616088867188, "logps/rejected": -101.68943786621094, "loss": 1.0151, "margin_dpo/margin_mean": 8.870966911315918, "margin_dpo/margin_std": 13.135915756225586, "step": 270 }, { "epoch": 0.41572184429327286, "fcm_dpo/beta": 0.10544770956039429, "fcm_dpo/delta": -0.05175945162773132, "fcm_dpo/margin": 9.915693283081055, "fcm_dpo/q_t": 0.3259262442588806, "grad_norm": 30.281387329101562, "learning_rate": 3.645566304318526e-07, "logits/chosen": 0.3330889642238617, "logits/rejected": 0.28538888692855835, "logps/chosen": -77.23836517333984, "logps/ref_chosen": -63.090972900390625, "logps/ref_rejected": -79.1383056640625, "logps/rejected": -103.2013931274414, "loss": 0.9663, "margin_dpo/margin_mean": 9.915693283081055, "margin_dpo/margin_std": 13.829241752624512, "step": 275 }, { "epoch": 0.42328042328042326, "fcm_dpo/beta": 0.10395065695047379, "fcm_dpo/delta": -0.05435022711753845, "fcm_dpo/margin": 10.08836555480957, "fcm_dpo/q_t": 0.31206685304641724, "grad_norm": 23.842212677001953, "learning_rate": 3.586410864126781e-07, "logits/chosen": 0.31397441029548645, "logits/rejected": 0.28041377663612366, "logps/chosen": -75.13356018066406, "logps/ref_chosen": -61.85026168823242, "logps/ref_rejected": -73.87454986572266, "logps/rejected": -97.2462158203125, "loss": 0.9042, "margin_dpo/margin_mean": 10.088364601135254, "margin_dpo/margin_std": 12.210702896118164, "step": 280 }, { "epoch": 0.4308390022675737, "fcm_dpo/beta": 0.09839525073766708, "fcm_dpo/delta": 0.06181678920984268, "fcm_dpo/margin": 9.582967758178711, "fcm_dpo/q_t": 0.33559128642082214, "grad_norm": 35.4563102722168, "learning_rate": 3.5264957352549375e-07, "logits/chosen": 0.33106130361557007, "logits/rejected": 0.2812042832374573, "logps/chosen": -78.70259094238281, "logps/ref_chosen": -64.2256851196289, "logps/ref_rejected": -80.54659271240234, "logps/rejected": -104.6064682006836, "loss": 0.9747, "margin_dpo/margin_mean": 9.582967758178711, "margin_dpo/margin_std": 13.323356628417969, "step": 285 }, { "epoch": 0.4383975812547241, "fcm_dpo/beta": 0.0960320457816124, "fcm_dpo/delta": -0.08064164221286774, "fcm_dpo/margin": 11.12451171875, "fcm_dpo/q_t": 0.31567567586898804, "grad_norm": 36.847713470458984, "learning_rate": 3.465862814232821e-07, "logits/chosen": 0.3282826542854309, "logits/rejected": 0.26554709672927856, "logps/chosen": -74.4558334350586, "logps/ref_chosen": -58.45670700073242, "logps/ref_rejected": -80.57959747314453, "logps/rejected": -107.7032241821289, "loss": 0.9158, "margin_dpo/margin_mean": 11.12451171875, "margin_dpo/margin_std": 14.189311027526855, "step": 290 }, { "epoch": 0.4459561602418745, "fcm_dpo/beta": 0.09265764057636261, "fcm_dpo/delta": -0.019339444115757942, "fcm_dpo/margin": 10.953125, "fcm_dpo/q_t": 0.325559139251709, "grad_norm": 31.604331970214844, "learning_rate": 3.4045544995169125e-07, "logits/chosen": 0.3938923478126526, "logits/rejected": 0.32047176361083984, "logps/chosen": -73.53590393066406, "logps/ref_chosen": -56.701622009277344, "logps/ref_rejected": -79.15914916992188, "logps/rejected": -106.9465560913086, "loss": 0.9464, "margin_dpo/margin_mean": 10.953125, "margin_dpo/margin_std": 14.6715726852417, "step": 295 }, { "epoch": 0.45351473922902497, "fcm_dpo/beta": 0.09225670993328094, "fcm_dpo/delta": 0.019766664132475853, "fcm_dpo/margin": 10.636110305786133, "fcm_dpo/q_t": 0.32959383726119995, "grad_norm": 29.963603973388672, "learning_rate": 3.3426136618426043e-07, "logits/chosen": 0.3454452157020569, "logits/rejected": 0.28967177867889404, "logps/chosen": -81.18212127685547, "logps/ref_chosen": -62.49296951293945, "logps/ref_rejected": -76.37828063964844, "logps/rejected": -105.70356750488281, "loss": 0.9604, "margin_dpo/margin_mean": 10.636110305786133, "margin_dpo/margin_std": 14.378878593444824, "step": 300 }, { "epoch": 0.46107331821617537, "fcm_dpo/beta": 0.09693561494350433, "fcm_dpo/delta": -0.0007272452348843217, "fcm_dpo/margin": 10.295035362243652, "fcm_dpo/q_t": 0.33259207010269165, "grad_norm": 41.86263656616211, "learning_rate": 3.280083614246217e-07, "logits/chosen": 0.32713833451271057, "logits/rejected": 0.2873557209968567, "logps/chosen": -83.19766235351562, "logps/ref_chosen": -63.961265563964844, "logps/ref_rejected": -79.19660949707031, "logps/rejected": -108.72804260253906, "loss": 1.012, "margin_dpo/margin_mean": 10.295036315917969, "margin_dpo/margin_std": 14.964961051940918, "step": 305 }, { "epoch": 0.46863189720332576, "fcm_dpo/beta": 0.10219204425811768, "fcm_dpo/delta": 0.1630358248949051, "fcm_dpo/margin": 8.336259841918945, "fcm_dpo/q_t": 0.3623664081096649, "grad_norm": 43.633724212646484, "learning_rate": 3.2170080817777257e-07, "logits/chosen": 0.3746958076953888, "logits/rejected": 0.3229959309101105, "logps/chosen": -84.5007553100586, "logps/ref_chosen": -65.43470764160156, "logps/ref_rejected": -76.08763885498047, "logps/rejected": -103.48995208740234, "loss": 1.1116, "margin_dpo/margin_mean": 8.336259841918945, "margin_dpo/margin_std": 14.339553833007812, "step": 310 }, { "epoch": 0.47619047619047616, "fcm_dpo/beta": 0.10657189041376114, "fcm_dpo/delta": -0.09334631264209747, "fcm_dpo/margin": 10.140634536743164, "fcm_dpo/q_t": 0.3195830285549164, "grad_norm": 29.57124900817871, "learning_rate": 3.1534311709253723e-07, "logits/chosen": 0.3460700511932373, "logits/rejected": 0.288535475730896, "logps/chosen": -79.8516845703125, "logps/ref_chosen": -62.9846305847168, "logps/ref_rejected": -75.53777313232422, "logps/rejected": -102.54544830322266, "loss": 0.9646, "margin_dpo/margin_mean": 10.14063549041748, "margin_dpo/margin_std": 13.977231979370117, "step": 315 }, { "epoch": 0.4837490551776266, "fcm_dpo/beta": 0.09681382775306702, "fcm_dpo/delta": -0.04583514854311943, "fcm_dpo/margin": 10.692605018615723, "fcm_dpo/q_t": 0.3291170001029968, "grad_norm": 32.62282943725586, "learning_rate": 3.0893973387735683e-07, "logits/chosen": 0.3217319846153259, "logits/rejected": 0.2573382556438446, "logps/chosen": -72.19245147705078, "logps/ref_chosen": -56.67329788208008, "logps/ref_rejected": -81.22078704833984, "logps/rejected": -107.43255615234375, "loss": 1.0017, "margin_dpo/margin_mean": 10.692605018615723, "margin_dpo/margin_std": 15.321540832519531, "step": 320 }, { "epoch": 0.491307634164777, "fcm_dpo/beta": 0.09586743265390396, "fcm_dpo/delta": -0.005144490860402584, "fcm_dpo/margin": 10.444814682006836, "fcm_dpo/q_t": 0.32588261365890503, "grad_norm": 29.304916381835938, "learning_rate": 3.0249513619156206e-07, "logits/chosen": 0.336866557598114, "logits/rejected": 0.273507684469223, "logps/chosen": -74.34947967529297, "logps/ref_chosen": -58.42055130004883, "logps/ref_rejected": -74.77824401855469, "logps/rejected": -101.1519775390625, "loss": 0.9637, "margin_dpo/margin_mean": 10.444815635681152, "margin_dpo/margin_std": 14.225895881652832, "step": 325 }, { "epoch": 0.4988662131519274, "fcm_dpo/beta": 0.10534314811229706, "fcm_dpo/delta": 0.07023780792951584, "fcm_dpo/margin": 8.831705093383789, "fcm_dpo/q_t": 0.34769195318222046, "grad_norm": 31.103214263916016, "learning_rate": 2.9601383051430505e-07, "logits/chosen": 0.2966001629829407, "logits/rejected": 0.2543550431728363, "logps/chosen": -83.2847900390625, "logps/ref_chosen": -66.16510772705078, "logps/ref_rejected": -79.58935546875, "logps/rejected": -105.5407485961914, "loss": 1.0797, "margin_dpo/margin_mean": 8.831704139709473, "margin_dpo/margin_std": 14.508381843566895, "step": 330 }, { "epoch": 0.5064247921390779, "fcm_dpo/beta": 0.09514714032411575, "fcm_dpo/delta": -0.08864019811153412, "fcm_dpo/margin": 11.286481857299805, "fcm_dpo/q_t": 0.3190566897392273, "grad_norm": 30.666662216186523, "learning_rate": 2.895003489933375e-07, "logits/chosen": 0.34321731328964233, "logits/rejected": 0.29262733459472656, "logps/chosen": -81.09008026123047, "logps/ref_chosen": -64.61544799804688, "logps/ref_rejected": -81.56526947021484, "logps/rejected": -109.3263931274414, "loss": 0.9389, "margin_dpo/margin_mean": 11.286481857299805, "margin_dpo/margin_std": 15.018880844116211, "step": 335 }, { "epoch": 0.5139833711262283, "fcm_dpo/beta": 0.09981605410575867, "fcm_dpo/delta": 0.05951204150915146, "fcm_dpo/margin": 9.45673656463623, "fcm_dpo/q_t": 0.34890830516815186, "grad_norm": 35.18781661987305, "learning_rate": 2.8295924627584004e-07, "logits/chosen": 0.3079679608345032, "logits/rejected": 0.2665550112724304, "logps/chosen": -81.59588623046875, "logps/ref_chosen": -62.10752487182617, "logps/ref_rejected": -77.66670227050781, "logps/rejected": -106.6117935180664, "loss": 1.0579, "margin_dpo/margin_mean": 9.45673656463623, "margin_dpo/margin_std": 15.178037643432617, "step": 340 }, { "epoch": 0.5215419501133787, "fcm_dpo/beta": 0.09543491154909134, "fcm_dpo/delta": -0.054286111146211624, "fcm_dpo/margin": 10.175302505493164, "fcm_dpo/q_t": 0.33326101303100586, "grad_norm": 26.658878326416016, "learning_rate": 2.7639509632351927e-07, "logits/chosen": 0.340119332075119, "logits/rejected": 0.2935033440589905, "logps/chosen": -79.18875885009766, "logps/ref_chosen": -61.37943649291992, "logps/ref_rejected": -79.8868637084961, "logps/rejected": -107.87149810791016, "loss": 0.9934, "margin_dpo/margin_mean": 10.175302505493164, "margin_dpo/margin_std": 14.267759323120117, "step": 345 }, { "epoch": 0.5291005291005291, "fcm_dpo/beta": 0.09232033044099808, "fcm_dpo/delta": -0.09336394816637039, "fcm_dpo/margin": 11.742635726928711, "fcm_dpo/q_t": 0.321283221244812, "grad_norm": 28.552518844604492, "learning_rate": 2.698124892141971e-07, "logits/chosen": 0.40391048789024353, "logits/rejected": 0.3338189125061035, "logps/chosen": -72.78422546386719, "logps/ref_chosen": -56.05344772338867, "logps/ref_rejected": -81.98738861083984, "logps/rejected": -110.4608154296875, "loss": 0.933, "margin_dpo/margin_mean": 11.742635726928711, "margin_dpo/margin_std": 15.581090927124023, "step": 350 }, { "epoch": 0.5366591080876795, "fcm_dpo/beta": 0.08842920511960983, "fcm_dpo/delta": 0.005702398717403412, "fcm_dpo/margin": 11.23391056060791, "fcm_dpo/q_t": 0.3230994641780853, "grad_norm": 30.30412483215332, "learning_rate": 2.632160279321328e-07, "logits/chosen": 0.3609849512577057, "logits/rejected": 0.2793940007686615, "logps/chosen": -72.1435317993164, "logps/ref_chosen": -56.14973831176758, "logps/ref_rejected": -78.04826354980469, "logps/rejected": -105.2759780883789, "loss": 0.963, "margin_dpo/margin_mean": 11.23391056060791, "margin_dpo/margin_std": 15.269304275512695, "step": 355 }, { "epoch": 0.54421768707483, "fcm_dpo/beta": 0.09247281402349472, "fcm_dpo/delta": 0.09105464816093445, "fcm_dpo/margin": 9.872905731201172, "fcm_dpo/q_t": 0.34462517499923706, "grad_norm": 31.00304412841797, "learning_rate": 2.5661032514931834e-07, "logits/chosen": 0.3180425763130188, "logits/rejected": 0.27117234468460083, "logps/chosen": -78.58186340332031, "logps/ref_chosen": -61.611045837402344, "logps/ref_rejected": -76.07168579101562, "logps/rejected": -102.9154052734375, "loss": 1.0624, "margin_dpo/margin_mean": 9.872904777526855, "margin_dpo/margin_std": 15.538159370422363, "step": 360 }, { "epoch": 0.5517762660619804, "fcm_dpo/beta": 0.08917222917079926, "fcm_dpo/delta": -0.11267988383769989, "fcm_dpo/margin": 12.325703620910645, "fcm_dpo/q_t": 0.3082793056964874, "grad_norm": 23.29572296142578, "learning_rate": 2.5e-07, "logits/chosen": 0.35711944103240967, "logits/rejected": 0.2748965919017792, "logps/chosen": -76.3038558959961, "logps/ref_chosen": -59.96733474731445, "logps/ref_rejected": -85.49105834960938, "logps/rejected": -114.15328216552734, "loss": 0.8889, "margin_dpo/margin_mean": 12.325704574584961, "margin_dpo/margin_std": 15.190678596496582, "step": 365 }, { "epoch": 0.5593348450491308, "fcm_dpo/beta": 0.09262686967849731, "fcm_dpo/delta": 0.08950239419937134, "fcm_dpo/margin": 9.877817153930664, "fcm_dpo/q_t": 0.3416265845298767, "grad_norm": 27.85451889038086, "learning_rate": 2.4338967485068164e-07, "logits/chosen": 0.29736343026161194, "logits/rejected": 0.24403652548789978, "logps/chosen": -76.03514862060547, "logps/ref_chosen": -60.001609802246094, "logps/ref_rejected": -76.47229766845703, "logps/rejected": -102.38365173339844, "loss": 1.0434, "margin_dpo/margin_mean": 9.877817153930664, "margin_dpo/margin_std": 15.134869575500488, "step": 370 }, { "epoch": 0.5668934240362812, "fcm_dpo/beta": 0.09520609676837921, "fcm_dpo/delta": 0.03861137107014656, "fcm_dpo/margin": 10.090354919433594, "fcm_dpo/q_t": 0.3396856188774109, "grad_norm": 35.437652587890625, "learning_rate": 2.3678397206786715e-07, "logits/chosen": 0.3540731370449066, "logits/rejected": 0.29804927110671997, "logps/chosen": -77.1456527709961, "logps/ref_chosen": -59.98427200317383, "logps/ref_rejected": -75.23977661132812, "logps/rejected": -102.49151611328125, "loss": 1.0658, "margin_dpo/margin_mean": 10.090354919433594, "margin_dpo/margin_std": 15.728398323059082, "step": 375 }, { "epoch": 0.5744520030234316, "fcm_dpo/beta": 0.09189613163471222, "fcm_dpo/delta": -0.08170835673809052, "fcm_dpo/margin": 11.631962776184082, "fcm_dpo/q_t": 0.319501131772995, "grad_norm": 27.832942962646484, "learning_rate": 2.3018751078580283e-07, "logits/chosen": 0.3677051365375519, "logits/rejected": 0.31347864866256714, "logps/chosen": -77.08321380615234, "logps/ref_chosen": -60.21544647216797, "logps/ref_rejected": -77.54380798339844, "logps/rejected": -106.04354095458984, "loss": 0.9542, "margin_dpo/margin_mean": 11.631962776184082, "margin_dpo/margin_std": 15.64165210723877, "step": 380 }, { "epoch": 0.582010582010582, "fcm_dpo/beta": 0.09286109358072281, "fcm_dpo/delta": 0.020266292616724968, "fcm_dpo/margin": 9.64010238647461, "fcm_dpo/q_t": 0.34380003809928894, "grad_norm": 29.564525604248047, "learning_rate": 2.2360490367648084e-07, "logits/chosen": 0.31580013036727905, "logits/rejected": 0.2819364070892334, "logps/chosen": -85.32447814941406, "logps/ref_chosen": -67.37496185302734, "logps/ref_rejected": -77.77253723144531, "logps/rejected": -105.36214447021484, "loss": 1.0145, "margin_dpo/margin_mean": 9.64010238647461, "margin_dpo/margin_std": 14.208511352539062, "step": 385 }, { "epoch": 0.5895691609977324, "fcm_dpo/beta": 0.09398090094327927, "fcm_dpo/delta": 0.04170190542936325, "fcm_dpo/margin": 10.224244117736816, "fcm_dpo/q_t": 0.3312895596027374, "grad_norm": 25.84682273864746, "learning_rate": 2.170407537241599e-07, "logits/chosen": 0.3518233597278595, "logits/rejected": 0.29235878586769104, "logps/chosen": -79.87808990478516, "logps/ref_chosen": -62.08070755004883, "logps/ref_rejected": -80.65849304199219, "logps/rejected": -108.68013763427734, "loss": 0.9694, "margin_dpo/margin_mean": 10.224244117736816, "margin_dpo/margin_std": 14.191637992858887, "step": 390 }, { "epoch": 0.5971277399848829, "fcm_dpo/beta": 0.08911158889532089, "fcm_dpo/delta": -0.06056561321020126, "fcm_dpo/margin": 11.786225318908691, "fcm_dpo/q_t": 0.31978386640548706, "grad_norm": 27.2460994720459, "learning_rate": 2.104996510066625e-07, "logits/chosen": 0.3753899037837982, "logits/rejected": 0.3135729134082794, "logps/chosen": -76.43754577636719, "logps/ref_chosen": -59.841339111328125, "logps/ref_rejected": -81.67756652832031, "logps/rejected": -110.05999755859375, "loss": 0.9292, "margin_dpo/margin_mean": 11.786226272583008, "margin_dpo/margin_std": 15.320358276367188, "step": 395 }, { "epoch": 0.6046863189720333, "fcm_dpo/beta": 0.0909147709608078, "fcm_dpo/delta": 0.033920951187610626, "fcm_dpo/margin": 10.581128120422363, "fcm_dpo/q_t": 0.3356344997882843, "grad_norm": 40.5637321472168, "learning_rate": 2.0398616948569493e-07, "logits/chosen": 0.32412275671958923, "logits/rejected": 0.23972614109516144, "logps/chosen": -80.27471923828125, "logps/ref_chosen": -61.95880889892578, "logps/ref_rejected": -89.60023498535156, "logps/rejected": -118.49725341796875, "loss": 1.0126, "margin_dpo/margin_mean": 10.58112907409668, "margin_dpo/margin_std": 15.470416069030762, "step": 400 }, { "epoch": 0.6046863189720333, "eval_fcm_dpo/beta": 0.10331619530916214, "eval_fcm_dpo/delta": 0.009760010987520218, "eval_fcm_dpo/margin": 9.474839210510254, "eval_fcm_dpo/q_t": 0.3416881263256073, "eval_logits/chosen": 0.35637208819389343, "eval_logits/rejected": 0.3007829487323761, "eval_logps/chosen": -92.93755340576172, "eval_logps/ref_chosen": -75.86933135986328, "eval_logps/ref_rejected": -80.85771942138672, "eval_logps/rejected": -107.40077209472656, "eval_loss": 0.5364252328872681, "eval_margin_dpo/margin_mean": 9.474839210510254, "eval_margin_dpo/margin_std": 15.286213874816895, "eval_runtime": 38.7022, "eval_samples_per_second": 59.506, "eval_steps_per_second": 1.86, "step": 400 }, { "epoch": 0.6122448979591837, "fcm_dpo/beta": 0.08817870914936066, "fcm_dpo/delta": -0.1327141523361206, "fcm_dpo/margin": 12.578492164611816, "fcm_dpo/q_t": 0.3107960820198059, "grad_norm": 23.5877742767334, "learning_rate": 1.975048638084379e-07, "logits/chosen": 0.37417587637901306, "logits/rejected": 0.2990309000015259, "logps/chosen": -73.52447509765625, "logps/ref_chosen": -57.03437423706055, "logps/ref_rejected": -78.54074096679688, "logps/rejected": -107.60932922363281, "loss": 0.8956, "margin_dpo/margin_mean": 12.5784912109375, "margin_dpo/margin_std": 15.335866928100586, "step": 405 }, { "epoch": 0.6198034769463341, "fcm_dpo/beta": 0.08702994883060455, "fcm_dpo/delta": 0.04099176451563835, "fcm_dpo/margin": 11.039661407470703, "fcm_dpo/q_t": 0.3316665291786194, "grad_norm": 29.719276428222656, "learning_rate": 1.9106026612264315e-07, "logits/chosen": 0.3420962393283844, "logits/rejected": 0.27093860507011414, "logps/chosen": -82.31895446777344, "logps/ref_chosen": -65.09486389160156, "logps/ref_rejected": -82.60694885253906, "logps/rejected": -110.87071228027344, "loss": 0.9591, "margin_dpo/margin_mean": 11.039661407470703, "margin_dpo/margin_std": 14.888700485229492, "step": 410 }, { "epoch": 0.6273620559334845, "fcm_dpo/beta": 0.08784516900777817, "fcm_dpo/delta": -0.04525933414697647, "fcm_dpo/margin": 11.834297180175781, "fcm_dpo/q_t": 0.3292234539985657, "grad_norm": 30.517234802246094, "learning_rate": 1.846568829074628e-07, "logits/chosen": 0.3091197609901428, "logits/rejected": 0.25511085987091064, "logps/chosen": -78.46774291992188, "logps/ref_chosen": -58.7742805480957, "logps/ref_rejected": -72.8920669555664, "logps/rejected": -104.4198226928711, "loss": 0.9871, "margin_dpo/margin_mean": 11.834297180175781, "margin_dpo/margin_std": 17.207059860229492, "step": 415 }, { "epoch": 0.6349206349206349, "fcm_dpo/beta": 0.09040302783250809, "fcm_dpo/delta": 0.08599478006362915, "fcm_dpo/margin": 10.107150077819824, "fcm_dpo/q_t": 0.34641337394714355, "grad_norm": 37.267417907714844, "learning_rate": 1.782991918222275e-07, "logits/chosen": 0.3381853699684143, "logits/rejected": 0.29058149456977844, "logps/chosen": -80.76959991455078, "logps/ref_chosen": -59.88574981689453, "logps/ref_rejected": -70.21773529052734, "logps/rejected": -101.208740234375, "loss": 1.1005, "margin_dpo/margin_mean": 10.107150077819824, "margin_dpo/margin_std": 16.638538360595703, "step": 420 }, { "epoch": 0.6424792139077853, "fcm_dpo/beta": 0.09201686084270477, "fcm_dpo/delta": -0.06255164742469788, "fcm_dpo/margin": 11.48328685760498, "fcm_dpo/q_t": 0.3294609785079956, "grad_norm": 26.42628288269043, "learning_rate": 1.7199163857537824e-07, "logits/chosen": 0.3337697982788086, "logits/rejected": 0.28397053480148315, "logps/chosen": -78.5306167602539, "logps/ref_chosen": -59.304222106933594, "logps/ref_rejected": -75.0927963256836, "logps/rejected": -105.80247497558594, "loss": 1.0021, "margin_dpo/margin_mean": 11.48328685760498, "margin_dpo/margin_std": 16.985111236572266, "step": 425 }, { "epoch": 0.6500377928949358, "fcm_dpo/beta": 0.09108453243970871, "fcm_dpo/delta": -0.03131581097841263, "fcm_dpo/margin": 11.279854774475098, "fcm_dpo/q_t": 0.3330654799938202, "grad_norm": 25.44510269165039, "learning_rate": 1.6573863381573954e-07, "logits/chosen": 0.30293092131614685, "logits/rejected": 0.2799461781978607, "logps/chosen": -84.71260833740234, "logps/ref_chosen": -63.816734313964844, "logps/ref_rejected": -75.532470703125, "logps/rejected": -107.70819091796875, "loss": 1.0274, "margin_dpo/margin_mean": 11.279854774475098, "margin_dpo/margin_std": 17.151844024658203, "step": 430 }, { "epoch": 0.6575963718820862, "fcm_dpo/beta": 0.08440228551626205, "fcm_dpo/delta": -0.03266172856092453, "fcm_dpo/margin": 12.173011779785156, "fcm_dpo/q_t": 0.3264302611351013, "grad_norm": 28.003742218017578, "learning_rate": 1.5954455004830878e-07, "logits/chosen": 0.40501174330711365, "logits/rejected": 0.358463317155838, "logps/chosen": -75.50459289550781, "logps/ref_chosen": -56.96874237060547, "logps/ref_rejected": -75.08180236816406, "logps/rejected": -105.7906723022461, "loss": 0.9586, "margin_dpo/margin_mean": 12.173012733459473, "margin_dpo/margin_std": 16.6791934967041, "step": 435 }, { "epoch": 0.6651549508692366, "fcm_dpo/beta": 0.0831587016582489, "fcm_dpo/delta": -0.03108084760606289, "fcm_dpo/margin": 12.317110061645508, "fcm_dpo/q_t": 0.32641124725341797, "grad_norm": 28.61335563659668, "learning_rate": 1.534137185767178e-07, "logits/chosen": 0.33481085300445557, "logits/rejected": 0.25145426392555237, "logps/chosen": -75.74763488769531, "logps/ref_chosen": -56.746910095214844, "logps/ref_rejected": -77.73384857177734, "logps/rejected": -109.05167388916016, "loss": 0.9722, "margin_dpo/margin_mean": 12.317110061645508, "margin_dpo/margin_std": 16.91001319885254, "step": 440 }, { "epoch": 0.672713529856387, "fcm_dpo/beta": 0.07718690484762192, "fcm_dpo/delta": -0.042038463056087494, "fcm_dpo/margin": 12.584096908569336, "fcm_dpo/q_t": 0.3266654312610626, "grad_norm": 29.00673484802246, "learning_rate": 1.473504264745062e-07, "logits/chosen": 0.35013240575790405, "logits/rejected": 0.2813698351383209, "logps/chosen": -81.3695068359375, "logps/ref_chosen": -61.107688903808594, "logps/ref_rejected": -83.23820495605469, "logps/rejected": -116.08412170410156, "loss": 0.946, "margin_dpo/margin_mean": 12.58409595489502, "margin_dpo/margin_std": 16.468950271606445, "step": 445 }, { "epoch": 0.6802721088435374, "fcm_dpo/beta": 0.07511289417743683, "fcm_dpo/delta": 0.060914408415555954, "fcm_dpo/margin": 12.590319633483887, "fcm_dpo/q_t": 0.3290197253227234, "grad_norm": 28.1600284576416, "learning_rate": 1.4135891358732205e-07, "logits/chosen": 0.348872572183609, "logits/rejected": 0.2725016176700592, "logps/chosen": -77.0075454711914, "logps/ref_chosen": -56.97221755981445, "logps/ref_rejected": -80.6880874633789, "logps/rejected": -113.31373596191406, "loss": 0.9376, "margin_dpo/margin_mean": 12.590319633483887, "margin_dpo/margin_std": 15.93859577178955, "step": 450 }, { "epoch": 0.6878306878306878, "fcm_dpo/beta": 0.0825229063630104, "fcm_dpo/delta": -0.027547325938940048, "fcm_dpo/margin": 12.420158386230469, "fcm_dpo/q_t": 0.32720088958740234, "grad_norm": 26.225448608398438, "learning_rate": 1.354433695681474e-07, "logits/chosen": 0.2992292046546936, "logits/rejected": 0.2455030232667923, "logps/chosen": -82.07698822021484, "logps/ref_chosen": -61.983673095703125, "logps/ref_rejected": -74.9884033203125, "logps/rejected": -107.50187683105469, "loss": 0.9822, "margin_dpo/margin_mean": 12.420158386230469, "margin_dpo/margin_std": 17.609575271606445, "step": 455 }, { "epoch": 0.6953892668178382, "fcm_dpo/beta": 0.07965027540922165, "fcm_dpo/delta": -0.08106034994125366, "fcm_dpo/margin": 13.459956169128418, "fcm_dpo/q_t": 0.3144467771053314, "grad_norm": 26.587221145629883, "learning_rate": 1.2960793094762345e-07, "logits/chosen": 0.3679925501346588, "logits/rejected": 0.280788391828537, "logps/chosen": -78.30216979980469, "logps/ref_chosen": -57.59019088745117, "logps/ref_rejected": -84.5114517211914, "logps/rejected": -118.68338775634766, "loss": 0.9095, "margin_dpo/margin_mean": 13.459956169128418, "margin_dpo/margin_std": 16.770097732543945, "step": 460 }, { "epoch": 0.7029478458049887, "fcm_dpo/beta": 0.07221703231334686, "fcm_dpo/delta": 0.014492052607238293, "fcm_dpo/margin": 13.642268180847168, "fcm_dpo/q_t": 0.3231440782546997, "grad_norm": 27.278940200805664, "learning_rate": 1.238566782415197e-07, "logits/chosen": 0.37134069204330444, "logits/rejected": 0.31564953923225403, "logps/chosen": -80.76337432861328, "logps/ref_chosen": -59.79584503173828, "logps/ref_rejected": -75.25082397460938, "logps/rejected": -109.86061096191406, "loss": 0.9232, "margin_dpo/margin_mean": 13.642268180847168, "margin_dpo/margin_std": 17.10280418395996, "step": 465 }, { "epoch": 0.7105064247921391, "fcm_dpo/beta": 0.07616017013788223, "fcm_dpo/delta": -0.016742905601859093, "fcm_dpo/margin": 13.275070190429688, "fcm_dpo/q_t": 0.32321128249168396, "grad_norm": 22.729766845703125, "learning_rate": 1.1819363309737438e-07, "logits/chosen": 0.37919512391090393, "logits/rejected": 0.32598358392715454, "logps/chosen": -80.35087585449219, "logps/ref_chosen": -59.0323486328125, "logps/ref_rejected": -74.96698760986328, "logps/rejected": -109.56058502197266, "loss": 0.9218, "margin_dpo/margin_mean": 13.275070190429688, "margin_dpo/margin_std": 16.799335479736328, "step": 470 }, { "epoch": 0.7180650037792895, "fcm_dpo/beta": 0.07094570249319077, "fcm_dpo/delta": -0.080512635409832, "fcm_dpo/margin": 15.083788871765137, "fcm_dpo/q_t": 0.3183867931365967, "grad_norm": 20.770301818847656, "learning_rate": 1.126227554822985e-07, "logits/chosen": 0.3839421570301056, "logits/rejected": 0.3089445233345032, "logps/chosen": -77.02687072753906, "logps/ref_chosen": -56.396690368652344, "logps/ref_rejected": -81.70674133300781, "logps/rejected": -117.42071533203125, "loss": 0.9389, "margin_dpo/margin_mean": 15.083788871765137, "margin_dpo/margin_std": 19.728008270263672, "step": 475 }, { "epoch": 0.7256235827664399, "fcm_dpo/beta": 0.07331489771604538, "fcm_dpo/delta": 0.10081305354833603, "fcm_dpo/margin": 12.341837882995605, "fcm_dpo/q_t": 0.3461955189704895, "grad_norm": 30.526918411254883, "learning_rate": 1.0714794091391072e-07, "logits/chosen": 0.3099084198474884, "logits/rejected": 0.2813830077648163, "logps/chosen": -88.34848022460938, "logps/ref_chosen": -64.63165283203125, "logps/ref_rejected": -70.14222717285156, "logps/rejected": -106.20088958740234, "loss": 1.0757, "margin_dpo/margin_mean": 12.341837882995605, "margin_dpo/margin_std": 19.969202041625977, "step": 480 }, { "epoch": 0.7331821617535903, "fcm_dpo/beta": 0.07241444289684296, "fcm_dpo/delta": -0.03449578955769539, "fcm_dpo/margin": 14.190716743469238, "fcm_dpo/q_t": 0.3262421786785126, "grad_norm": 23.741697311401367, "learning_rate": 1.0177301773633992e-07, "logits/chosen": 0.3603590130805969, "logits/rejected": 0.29149702191352844, "logps/chosen": -82.0468521118164, "logps/ref_chosen": -59.954673767089844, "logps/ref_rejected": -80.82916259765625, "logps/rejected": -117.11204528808594, "loss": 0.9667, "margin_dpo/margin_mean": 14.190716743469238, "margin_dpo/margin_std": 19.688003540039062, "step": 485 }, { "epoch": 0.7407407407407407, "fcm_dpo/beta": 0.07517864555120468, "fcm_dpo/delta": 0.08142177760601044, "fcm_dpo/margin": 12.304274559020996, "fcm_dpo/q_t": 0.34900832176208496, "grad_norm": 34.26311492919922, "learning_rate": 9.650174444319956e-08, "logits/chosen": 0.3991266191005707, "logits/rejected": 0.3289317190647125, "logps/chosen": -86.1991195678711, "logps/ref_chosen": -62.238365173339844, "logps/ref_rejected": -81.98704528808594, "logps/rejected": -118.2520751953125, "loss": 1.0675, "margin_dpo/margin_mean": 12.304274559020996, "margin_dpo/margin_std": 19.93437385559082, "step": 490 }, { "epoch": 0.7482993197278912, "fcm_dpo/beta": 0.07940609008073807, "fcm_dpo/delta": 0.03455258160829544, "fcm_dpo/margin": 12.161388397216797, "fcm_dpo/q_t": 0.340284138917923, "grad_norm": 31.470069885253906, "learning_rate": 9.133780704940594e-08, "logits/chosen": 0.3151213526725769, "logits/rejected": 0.24373404681682587, "logps/chosen": -83.99868774414062, "logps/ref_chosen": -60.60944747924805, "logps/ref_rejected": -81.48342895507812, "logps/rejected": -117.0340576171875, "loss": 1.0687, "margin_dpo/margin_mean": 12.161388397216797, "margin_dpo/margin_std": 19.294418334960938, "step": 495 }, { "epoch": 0.7558578987150416, "fcm_dpo/beta": 0.07518203556537628, "fcm_dpo/delta": -0.09908589720726013, "fcm_dpo/margin": 14.436444282531738, "fcm_dpo/q_t": 0.3233835697174072, "grad_norm": 30.398324966430664, "learning_rate": 8.628481651367875e-08, "logits/chosen": 0.36071914434432983, "logits/rejected": 0.3034532070159912, "logps/chosen": -90.01958465576172, "logps/ref_chosen": -67.44170379638672, "logps/ref_rejected": -85.10578155517578, "logps/rejected": -122.12010192871094, "loss": 0.9682, "margin_dpo/margin_mean": 14.436445236206055, "margin_dpo/margin_std": 20.496013641357422, "step": 500 }, { "epoch": 0.763416477702192, "fcm_dpo/beta": 0.07424916326999664, "fcm_dpo/delta": -0.0052908300422132015, "fcm_dpo/margin": 13.501623153686523, "fcm_dpo/q_t": 0.3237389028072357, "grad_norm": 30.474390029907227, "learning_rate": 8.134630621352483e-08, "logits/chosen": 0.35755619406700134, "logits/rejected": 0.32606256008148193, "logps/chosen": -84.3893814086914, "logps/ref_chosen": -63.399513244628906, "logps/ref_rejected": -75.75922393798828, "logps/rejected": -110.250732421875, "loss": 0.9268, "margin_dpo/margin_mean": 13.501623153686523, "margin_dpo/margin_std": 17.42774200439453, "step": 505 }, { "epoch": 0.7709750566893424, "fcm_dpo/beta": 0.07383919507265091, "fcm_dpo/delta": -0.002689933869987726, "fcm_dpo/margin": 13.561166763305664, "fcm_dpo/q_t": 0.3301324248313904, "grad_norm": 25.812915802001953, "learning_rate": 7.652572947447272e-08, "logits/chosen": 0.35465937852859497, "logits/rejected": 0.28478819131851196, "logps/chosen": -87.05335998535156, "logps/ref_chosen": -65.54673767089844, "logps/ref_rejected": -88.05908203125, "logps/rejected": -123.12687683105469, "loss": 0.9774, "margin_dpo/margin_mean": 13.56116771697998, "margin_dpo/margin_std": 19.210861206054688, "step": 510 }, { "epoch": 0.7785336356764928, "fcm_dpo/beta": 0.0702199786901474, "fcm_dpo/delta": -0.04104772210121155, "fcm_dpo/margin": 14.736480712890625, "fcm_dpo/q_t": 0.3220558762550354, "grad_norm": 25.192188262939453, "learning_rate": 7.182645715528435e-08, "logits/chosen": 0.37450742721557617, "logits/rejected": 0.3201286792755127, "logps/chosen": -81.3960952758789, "logps/ref_chosen": -58.967079162597656, "logps/ref_rejected": -79.77230834960938, "logps/rejected": -116.93778991699219, "loss": 0.9213, "margin_dpo/margin_mean": 14.736480712890625, "margin_dpo/margin_std": 19.175683975219727, "step": 515 }, { "epoch": 0.7860922146636432, "fcm_dpo/beta": 0.07708217203617096, "fcm_dpo/delta": 0.17160889506340027, "fcm_dpo/margin": 10.951704025268555, "fcm_dpo/q_t": 0.35802939534187317, "grad_norm": 30.653854370117188, "learning_rate": 6.725177529083209e-08, "logits/chosen": 0.39081019163131714, "logits/rejected": 0.34083622694015503, "logps/chosen": -85.04821014404297, "logps/ref_chosen": -62.04914474487305, "logps/ref_rejected": -73.25074768066406, "logps/rejected": -107.20152282714844, "loss": 1.0965, "margin_dpo/margin_mean": 10.951704025268555, "margin_dpo/margin_std": 18.67728042602539, "step": 520 }, { "epoch": 0.7936507936507936, "fcm_dpo/beta": 0.08038587868213654, "fcm_dpo/delta": 0.0009529069066047668, "fcm_dpo/margin": 12.417234420776367, "fcm_dpo/q_t": 0.32943472266197205, "grad_norm": 32.0303955078125, "learning_rate": 6.280488279429185e-08, "logits/chosen": 0.2926352620124817, "logits/rejected": 0.23847930133342743, "logps/chosen": -90.89793395996094, "logps/ref_chosen": -68.93287658691406, "logps/ref_rejected": -86.20756530761719, "logps/rejected": -120.58984375, "loss": 1.0199, "margin_dpo/margin_mean": 12.417234420776367, "margin_dpo/margin_std": 18.533092498779297, "step": 525 }, { "epoch": 0.8012093726379441, "fcm_dpo/beta": 0.08636742830276489, "fcm_dpo/delta": -0.015138429589569569, "fcm_dpo/margin": 11.636919021606445, "fcm_dpo/q_t": 0.33553168177604675, "grad_norm": 28.005414962768555, "learning_rate": 5.848888922025552e-08, "logits/chosen": 0.33514514565467834, "logits/rejected": 0.2650687098503113, "logps/chosen": -82.46696472167969, "logps/ref_chosen": -59.8493537902832, "logps/ref_rejected": -80.61486053466797, "logps/rejected": -114.86936950683594, "loss": 1.0585, "margin_dpo/margin_mean": 11.636918067932129, "margin_dpo/margin_std": 17.36086082458496, "step": 530 }, { "epoch": 0.8087679516250945, "fcm_dpo/beta": 0.08257714658975601, "fcm_dpo/delta": -0.002869441406801343, "fcm_dpo/margin": 12.110450744628906, "fcm_dpo/q_t": 0.3353736996650696, "grad_norm": 31.17659568786621, "learning_rate": 5.430681259032957e-08, "logits/chosen": 0.3293718099594116, "logits/rejected": 0.26540592312812805, "logps/chosen": -80.54605865478516, "logps/ref_chosen": -58.72953414916992, "logps/ref_rejected": -78.62208557128906, "logps/rejected": -112.54905700683594, "loss": 1.0464, "margin_dpo/margin_mean": 12.110448837280273, "margin_dpo/margin_std": 18.811302185058594, "step": 535 }, { "epoch": 0.8163265306122449, "fcm_dpo/beta": 0.07747219502925873, "fcm_dpo/delta": -0.11059974133968353, "fcm_dpo/margin": 14.1632661819458, "fcm_dpo/q_t": 0.31101471185684204, "grad_norm": 27.576501846313477, "learning_rate": 5.026157728273966e-08, "logits/chosen": 0.3622625470161438, "logits/rejected": 0.2790268659591675, "logps/chosen": -83.3790054321289, "logps/ref_chosen": -61.27280807495117, "logps/ref_rejected": -86.4178237915039, "logps/rejected": -122.6872787475586, "loss": 0.8863, "margin_dpo/margin_mean": 14.1632661819458, "margin_dpo/margin_std": 17.347553253173828, "step": 540 }, { "epoch": 0.8238851095993953, "fcm_dpo/beta": 0.07073845714330673, "fcm_dpo/delta": -0.046964578330516815, "fcm_dpo/margin": 14.707046508789062, "fcm_dpo/q_t": 0.3177848756313324, "grad_norm": 25.9344539642334, "learning_rate": 4.635601198741607e-08, "logits/chosen": 0.3474125266075134, "logits/rejected": 0.2886578142642975, "logps/chosen": -77.79493713378906, "logps/ref_chosen": -57.53668975830078, "logps/ref_rejected": -73.76582336425781, "logps/rejected": -108.73112487792969, "loss": 0.9144, "margin_dpo/margin_mean": 14.707046508789062, "margin_dpo/margin_std": 18.784482955932617, "step": 545 }, { "epoch": 0.8314436885865457, "fcm_dpo/beta": 0.07421617209911346, "fcm_dpo/delta": 0.14916327595710754, "fcm_dpo/margin": 11.67965030670166, "fcm_dpo/q_t": 0.34690457582473755, "grad_norm": 27.697330474853516, "learning_rate": 4.259284772799099e-08, "logits/chosen": 0.3449970781803131, "logits/rejected": 0.28996026515960693, "logps/chosen": -81.71879577636719, "logps/ref_chosen": -60.406890869140625, "logps/ref_rejected": -76.10121154785156, "logps/rejected": -109.0927734375, "loss": 1.0218, "margin_dpo/margin_mean": 11.679651260375977, "margin_dpo/margin_std": 17.418115615844727, "step": 550 }, { "epoch": 0.8390022675736961, "fcm_dpo/beta": 0.08086894452571869, "fcm_dpo/delta": -0.021382993087172508, "fcm_dpo/margin": 12.578062057495117, "fcm_dpo/q_t": 0.3289267420768738, "grad_norm": 31.26999855041504, "learning_rate": 3.89747159520904e-08, "logits/chosen": 0.32917284965515137, "logits/rejected": 0.2717982232570648, "logps/chosen": -88.04100799560547, "logps/ref_chosen": -65.4435806274414, "logps/ref_rejected": -80.65763092041016, "logps/rejected": -115.8331298828125, "loss": 0.9988, "margin_dpo/margin_mean": 12.578062057495117, "margin_dpo/margin_std": 18.475194931030273, "step": 555 }, { "epoch": 0.8465608465608465, "fcm_dpo/beta": 0.07681386172771454, "fcm_dpo/delta": -0.0602848045527935, "fcm_dpo/margin": 13.695526123046875, "fcm_dpo/q_t": 0.33021193742752075, "grad_norm": 26.484222412109375, "learning_rate": 3.550414669125573e-08, "logits/chosen": 0.3839051425457001, "logits/rejected": 0.3160412907600403, "logps/chosen": -82.10123443603516, "logps/ref_chosen": -59.31481170654297, "logps/ref_rejected": -79.35322570800781, "logps/rejected": -115.83515930175781, "loss": 0.9693, "margin_dpo/margin_mean": 13.695526123046875, "margin_dpo/margin_std": 19.27083969116211, "step": 560 }, { "epoch": 0.854119425547997, "fcm_dpo/beta": 0.07272513210773468, "fcm_dpo/delta": -0.029256444424390793, "fcm_dpo/margin": 14.087471008300781, "fcm_dpo/q_t": 0.3242108225822449, "grad_norm": 28.179359436035156, "learning_rate": 3.218356679178252e-08, "logits/chosen": 0.36016514897346497, "logits/rejected": 0.3109044134616852, "logps/chosen": -82.92088317871094, "logps/ref_chosen": -61.065895080566406, "logps/ref_rejected": -79.14593505859375, "logps/rejected": -115.0884017944336, "loss": 0.9432, "margin_dpo/margin_mean": 14.087471008300781, "margin_dpo/margin_std": 18.77931022644043, "step": 565 }, { "epoch": 0.8616780045351474, "fcm_dpo/beta": 0.07263718545436859, "fcm_dpo/delta": -0.05456575006246567, "fcm_dpo/margin": 14.389799118041992, "fcm_dpo/q_t": 0.3169875741004944, "grad_norm": 21.003314971923828, "learning_rate": 2.9015298217712453e-08, "logits/chosen": 0.33407479524612427, "logits/rejected": 0.26317259669303894, "logps/chosen": -80.6412124633789, "logps/ref_chosen": -58.91632843017578, "logps/ref_rejected": -78.48197937011719, "logps/rejected": -114.59666442871094, "loss": 0.9191, "margin_dpo/margin_mean": 14.389799118041992, "margin_dpo/margin_std": 17.922327041625977, "step": 570 }, { "epoch": 0.8692365835222978, "fcm_dpo/beta": 0.07354731857776642, "fcm_dpo/delta": 0.09992051124572754, "fcm_dpo/margin": 12.301939010620117, "fcm_dpo/q_t": 0.3429938554763794, "grad_norm": 29.671510696411133, "learning_rate": 2.600155642716606e-08, "logits/chosen": 0.3663448691368103, "logits/rejected": 0.3218410015106201, "logps/chosen": -87.20105743408203, "logps/ref_chosen": -64.36775970458984, "logps/ref_rejected": -80.37776184082031, "logps/rejected": -115.51298522949219, "loss": 1.016, "margin_dpo/margin_mean": 12.301939010620117, "margin_dpo/margin_std": 18.205175399780273, "step": 575 }, { "epoch": 0.8767951625094482, "fcm_dpo/beta": 0.07715228199958801, "fcm_dpo/delta": 0.006949651055037975, "fcm_dpo/margin": 12.843107223510742, "fcm_dpo/q_t": 0.33547455072402954, "grad_norm": 29.658864974975586, "learning_rate": 2.3144448823151392e-08, "logits/chosen": 0.3541300892829895, "logits/rejected": 0.2988041043281555, "logps/chosen": -79.77593994140625, "logps/ref_chosen": -58.415260314941406, "logps/ref_rejected": -74.52140045166016, "logps/rejected": -108.72517395019531, "loss": 0.9975, "margin_dpo/margin_mean": 12.843107223510742, "margin_dpo/margin_std": 18.566728591918945, "step": 580 }, { "epoch": 0.8843537414965986, "fcm_dpo/beta": 0.07894281297922134, "fcm_dpo/delta": 0.0601823627948761, "fcm_dpo/margin": 11.971592903137207, "fcm_dpo/q_t": 0.33943796157836914, "grad_norm": 27.738752365112305, "learning_rate": 2.044597327993153e-08, "logits/chosen": 0.39197593927383423, "logits/rejected": 0.3263949751853943, "logps/chosen": -78.97578430175781, "logps/ref_chosen": -56.64149856567383, "logps/ref_rejected": -77.79124450683594, "logps/rejected": -112.09712219238281, "loss": 1.0413, "margin_dpo/margin_mean": 11.971592903137207, "margin_dpo/margin_std": 18.696794509887695, "step": 585 }, { "epoch": 0.891912320483749, "fcm_dpo/beta": 0.07663112133741379, "fcm_dpo/delta": -0.1261170357465744, "fcm_dpo/margin": 14.529006958007812, "fcm_dpo/q_t": 0.3086654543876648, "grad_norm": 25.352630615234375, "learning_rate": 1.7908016745981856e-08, "logits/chosen": 0.35477882623672485, "logits/rejected": 0.31469103693962097, "logps/chosen": -82.77430725097656, "logps/ref_chosen": -61.251670837402344, "logps/ref_rejected": -75.03556823730469, "logps/rejected": -111.08720397949219, "loss": 0.9034, "margin_dpo/margin_mean": 14.529006958007812, "margin_dpo/margin_std": 18.56852149963379, "step": 590 }, { "epoch": 0.8994708994708994, "fcm_dpo/beta": 0.07121709734201431, "fcm_dpo/delta": -0.01630423031747341, "fcm_dpo/margin": 14.214367866516113, "fcm_dpo/q_t": 0.32772403955459595, "grad_norm": 24.538618087768555, "learning_rate": 1.553235392451377e-08, "logits/chosen": 0.36895376443862915, "logits/rejected": 0.2859138250350952, "logps/chosen": -75.7615737915039, "logps/ref_chosen": -55.449249267578125, "logps/ref_rejected": -78.81550598144531, "logps/rejected": -113.34220123291016, "loss": 0.9717, "margin_dpo/margin_mean": 14.214367866516113, "margin_dpo/margin_std": 19.63026237487793, "step": 595 }, { "epoch": 0.9070294784580499, "fcm_dpo/beta": 0.07918272912502289, "fcm_dpo/delta": 0.15003207325935364, "fcm_dpo/margin": 10.823195457458496, "fcm_dpo/q_t": 0.3627161383628845, "grad_norm": 29.42555046081543, "learning_rate": 1.3320646032487393e-08, "logits/chosen": 0.36016735434532166, "logits/rejected": 0.3182498812675476, "logps/chosen": -81.17481231689453, "logps/ref_chosen": -58.89445877075195, "logps/ref_rejected": -71.14781951904297, "logps/rejected": -104.25135803222656, "loss": 1.0944, "margin_dpo/margin_mean": 10.82319450378418, "margin_dpo/margin_std": 18.560047149658203, "step": 600 }, { "epoch": 0.9070294784580499, "eval_fcm_dpo/beta": 0.08356332033872604, "eval_fcm_dpo/delta": -0.005035701673477888, "eval_fcm_dpo/margin": 11.875618934631348, "eval_fcm_dpo/q_t": 0.3380221426486969, "eval_logits/chosen": 0.3597419559955597, "eval_logits/rejected": 0.304570734500885, "eval_logps/chosen": -96.24739074707031, "eval_logps/ref_chosen": -75.86933135986328, "eval_logps/ref_rejected": -80.85771942138672, "eval_logps/rejected": -113.11141204833984, "eval_loss": 0.5214306712150574, "eval_margin_dpo/margin_mean": 11.875618934631348, "eval_margin_dpo/margin_std": 18.387540817260742, "eval_runtime": 38.6993, "eval_samples_per_second": 59.51, "eval_steps_per_second": 1.86, "step": 600 }, { "epoch": 0.9145880574452003, "fcm_dpo/beta": 0.07584916055202484, "fcm_dpo/delta": -0.017186608165502548, "fcm_dpo/margin": 13.365765571594238, "fcm_dpo/q_t": 0.32789379358291626, "grad_norm": 28.76905059814453, "learning_rate": 1.1274439638981532e-08, "logits/chosen": 0.35118022561073303, "logits/rejected": 0.299066424369812, "logps/chosen": -79.40129089355469, "logps/ref_chosen": -60.206268310546875, "logps/ref_rejected": -76.11177825927734, "logps/rejected": -108.6725845336914, "loss": 0.963, "margin_dpo/margin_mean": 13.365765571594238, "margin_dpo/margin_std": 18.610855102539062, "step": 605 }, { "epoch": 0.9221466364323507, "fcm_dpo/beta": 0.07705016434192657, "fcm_dpo/delta": -0.0388740599155426, "fcm_dpo/margin": 13.390531539916992, "fcm_dpo/q_t": 0.3253692388534546, "grad_norm": 31.618640899658203, "learning_rate": 9.395165583732379e-09, "logits/chosen": 0.3527846336364746, "logits/rejected": 0.30443698167800903, "logps/chosen": -82.28140258789062, "logps/ref_chosen": -61.04254150390625, "logps/ref_rejected": -82.46031188964844, "logps/rejected": -117.0897216796875, "loss": 0.9402, "margin_dpo/margin_mean": 13.390533447265625, "margin_dpo/margin_std": 17.899368286132812, "step": 610 }, { "epoch": 0.9297052154195011, "fcm_dpo/beta": 0.0739460289478302, "fcm_dpo/delta": -0.05259857699275017, "fcm_dpo/margin": 14.14459228515625, "fcm_dpo/q_t": 0.317154198884964, "grad_norm": 28.697656631469727, "learning_rate": 7.684137976598088e-09, "logits/chosen": 0.33570918440818787, "logits/rejected": 0.2927141785621643, "logps/chosen": -81.54222106933594, "logps/ref_chosen": -60.49250411987305, "logps/ref_rejected": -81.13261413574219, "logps/rejected": -116.32688903808594, "loss": 0.927, "margin_dpo/margin_mean": 14.14459228515625, "margin_dpo/margin_std": 18.297395706176758, "step": 615 }, { "epoch": 0.9372637944066515, "fcm_dpo/beta": 0.07480698078870773, "fcm_dpo/delta": 0.08452598005533218, "fcm_dpo/margin": 12.292991638183594, "fcm_dpo/q_t": 0.3419121503829956, "grad_norm": 33.187259674072266, "learning_rate": 6.142553278648238e-09, "logits/chosen": 0.37463945150375366, "logits/rejected": 0.3073200583457947, "logps/chosen": -79.49095153808594, "logps/ref_chosen": -58.75004959106445, "logps/ref_rejected": -79.14283752441406, "logps/rejected": -112.1767349243164, "loss": 0.9754, "margin_dpo/margin_mean": 12.292991638183594, "margin_dpo/margin_std": 17.254846572875977, "step": 620 }, { "epoch": 0.9448223733938019, "fcm_dpo/beta": 0.07812217622995377, "fcm_dpo/delta": 0.025508109480142593, "fcm_dpo/margin": 12.489466667175293, "fcm_dpo/q_t": 0.33177176117897034, "grad_norm": 28.660175323486328, "learning_rate": 4.7714894655209174e-09, "logits/chosen": 0.3732627034187317, "logits/rejected": 0.28870144486427307, "logps/chosen": -79.76417541503906, "logps/ref_chosen": -57.77447509765625, "logps/ref_rejected": -83.365966796875, "logps/rejected": -117.8451156616211, "loss": 0.9557, "margin_dpo/margin_mean": 12.489466667175293, "margin_dpo/margin_std": 16.900440216064453, "step": 625 }, { "epoch": 0.9523809523809523, "fcm_dpo/beta": 0.07247981429100037, "fcm_dpo/delta": -0.11990991979837418, "fcm_dpo/margin": 15.216099739074707, "fcm_dpo/q_t": 0.3133440315723419, "grad_norm": 21.737163543701172, "learning_rate": 3.5719052736323806e-09, "logits/chosen": 0.36857935786247253, "logits/rejected": 0.2945020794868469, "logps/chosen": -78.60719299316406, "logps/ref_chosen": -58.47271728515625, "logps/ref_rejected": -84.48008728027344, "logps/rejected": -119.83065032958984, "loss": 0.9124, "margin_dpo/margin_mean": 15.216100692749023, "margin_dpo/margin_std": 19.21467399597168, "step": 630 }, { "epoch": 0.9599395313681028, "fcm_dpo/beta": 0.07038284093141556, "fcm_dpo/delta": 0.0925895944237709, "fcm_dpo/margin": 13.050129890441895, "fcm_dpo/q_t": 0.34775209426879883, "grad_norm": 32.16118621826172, "learning_rate": 2.5446395297668287e-09, "logits/chosen": 0.3829967677593231, "logits/rejected": 0.3268979787826538, "logps/chosen": -80.91036987304688, "logps/ref_chosen": -60.0723991394043, "logps/ref_rejected": -75.8419189453125, "logps/rejected": -109.73001861572266, "loss": 1.0213, "margin_dpo/margin_mean": 13.050129890441895, "margin_dpo/margin_std": 19.307937622070312, "step": 635 }, { "epoch": 0.9674981103552532, "fcm_dpo/beta": 0.07497520744800568, "fcm_dpo/delta": -0.0180673748254776, "fcm_dpo/margin": 13.5342435836792, "fcm_dpo/q_t": 0.327609658241272, "grad_norm": 28.1774959564209, "learning_rate": 1.690410564514244e-09, "logits/chosen": 0.3562234938144684, "logits/rejected": 0.27130261063575745, "logps/chosen": -82.12296295166016, "logps/ref_chosen": -59.24292755126953, "logps/ref_rejected": -81.03025817871094, "logps/rejected": -117.44453430175781, "loss": 0.9614, "margin_dpo/margin_mean": 13.5342435836792, "margin_dpo/margin_std": 19.009403228759766, "step": 640 }, { "epoch": 0.9750566893424036, "fcm_dpo/beta": 0.07942639291286469, "fcm_dpo/delta": 0.06387078016996384, "fcm_dpo/margin": 11.842493057250977, "fcm_dpo/q_t": 0.3415600657463074, "grad_norm": 27.630109786987305, "learning_rate": 1.0098157099674987e-09, "logits/chosen": 0.33102938532829285, "logits/rejected": 0.293028324842453, "logps/chosen": -86.07749938964844, "logps/ref_chosen": -63.97548294067383, "logps/ref_rejected": -74.65735626220703, "logps/rejected": -108.60185241699219, "loss": 1.0304, "margin_dpo/margin_mean": 11.842493057250977, "margin_dpo/margin_std": 18.239856719970703, "step": 645 }, { "epoch": 0.982615268329554, "fcm_dpo/beta": 0.07533489167690277, "fcm_dpo/delta": -0.09645902365446091, "fcm_dpo/margin": 14.389871597290039, "fcm_dpo/q_t": 0.3166733682155609, "grad_norm": 29.344585418701172, "learning_rate": 5.033308820289184e-10, "logits/chosen": 0.38797593116760254, "logits/rejected": 0.3146594166755676, "logps/chosen": -82.97822570800781, "logps/ref_chosen": -60.51557159423828, "logps/ref_rejected": -85.11001586914062, "logps/rejected": -121.96253967285156, "loss": 0.9395, "margin_dpo/margin_mean": 14.389869689941406, "margin_dpo/margin_std": 19.199068069458008, "step": 650 }, { "epoch": 0.9901738473167044, "fcm_dpo/beta": 0.07521242648363113, "fcm_dpo/delta": 0.03413959592580795, "fcm_dpo/margin": 12.832531929016113, "fcm_dpo/q_t": 0.3351586163043976, "grad_norm": 22.92190170288086, "learning_rate": 1.7131024761923852e-10, "logits/chosen": 0.37830454111099243, "logits/rejected": 0.2979954779148102, "logps/chosen": -80.21737670898438, "logps/ref_chosen": -59.14573287963867, "logps/ref_rejected": -80.98335266113281, "logps/rejected": -114.88752746582031, "loss": 0.9554, "margin_dpo/margin_mean": 12.83253002166748, "margin_dpo/margin_std": 17.429914474487305, "step": 655 }, { "epoch": 0.9977324263038548, "fcm_dpo/beta": 0.07314083725214005, "fcm_dpo/delta": -0.027313020080327988, "fcm_dpo/margin": 13.969868659973145, "fcm_dpo/q_t": 0.3238561749458313, "grad_norm": 25.15906524658203, "learning_rate": 1.3985977021235829e-11, "logits/chosen": 0.43907564878463745, "logits/rejected": 0.36568042635917664, "logps/chosen": -82.41399383544922, "logps/ref_chosen": -60.18262481689453, "logps/ref_rejected": -80.55596160888672, "logps/rejected": -116.7572250366211, "loss": 0.9519, "margin_dpo/margin_mean": 13.969868659973145, "margin_dpo/margin_std": 18.85131072998047, "step": 660 }, { "epoch": 0.999244142101285, "step": 661, "total_flos": 0.0, "train_loss": 1.0854419020769637, "train_runtime": 1766.4629, "train_samples_per_second": 23.967, "train_steps_per_second": 0.374 } ], "logging_steps": 5, "max_steps": 661, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }