{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.999244142101285, "eval_steps": 200, "global_step": 661, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0015117157974300832, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.0013532638549804688, "fcm_dpo/q_t": 0.5000336766242981, "grad_norm": 28.21888542175293, "learning_rate": 0.0, "logits/chosen": 0.13337239623069763, "logits/rejected": 0.12492948770523071, "logps/chosen": -64.5841293334961, "logps/ref_chosen": -64.61280822753906, "logps/ref_rejected": -64.17195129394531, "logps/rejected": -64.14192199707031, "loss": 1.3866, "margin_dpo/margin_mean": -0.0013527870178222656, "margin_dpo/margin_std": 0.2561596930027008, "step": 1 }, { "epoch": 0.007558578987150416, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.006315112113952637, "fcm_dpo/q_t": 0.5001578330993652, "grad_norm": 29.724584579467773, "learning_rate": 2.9850746268656714e-08, "logits/chosen": 0.09723952412605286, "logits/rejected": 0.06879077106714249, "logps/chosen": -65.34913635253906, "logps/ref_chosen": -65.34695434570312, "logps/ref_rejected": -79.315673828125, "logps/rejected": -79.31153869628906, "loss": 1.3872, "margin_dpo/margin_mean": -0.006315216422080994, "margin_dpo/margin_std": 0.2993292212486267, "step": 5 }, { "epoch": 0.015117157974300832, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.013524067588150501, "fcm_dpo/q_t": 0.500338077545166, "grad_norm": 29.753292083740234, "learning_rate": 6.71641791044776e-08, "logits/chosen": 0.10936982929706573, "logits/rejected": 0.07020524889230728, "logps/chosen": -56.681053161621094, "logps/ref_chosen": -56.65692901611328, "logps/ref_rejected": -80.12786865234375, "logps/rejected": -80.13846588134766, "loss": 1.3879, "margin_dpo/margin_mean": -0.013524264097213745, "margin_dpo/margin_std": 0.3152056336402893, "step": 10 }, { "epoch": 0.022675736961451247, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.01606299914419651, "fcm_dpo/q_t": 0.49959880113601685, "grad_norm": 33.080528259277344, "learning_rate": 1.044776119402985e-07, "logits/chosen": 0.07683371752500534, "logits/rejected": 0.04736438766121864, "logps/chosen": -60.102806091308594, "logps/ref_chosen": -60.09392166137695, "logps/ref_rejected": -78.99056243896484, "logps/rejected": -79.01551818847656, "loss": 1.385, "margin_dpo/margin_mean": 0.01606297492980957, "margin_dpo/margin_std": 0.31872445344924927, "step": 15 }, { "epoch": 0.030234315948601664, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.045558154582977295, "fcm_dpo/q_t": 0.498861163854599, "grad_norm": 29.158737182617188, "learning_rate": 1.4179104477611938e-07, "logits/chosen": 0.0912957563996315, "logits/rejected": 0.06395339965820312, "logps/chosen": -55.449317932128906, "logps/ref_chosen": -55.464561462402344, "logps/ref_rejected": -77.40013122558594, "logps/rejected": -77.43043518066406, "loss": 1.382, "margin_dpo/margin_mean": 0.04555808752775192, "margin_dpo/margin_std": 0.29819026589393616, "step": 20 }, { "epoch": 0.03779289493575208, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.004711696412414312, "fcm_dpo/q_t": 0.5001178979873657, "grad_norm": 29.72311782836914, "learning_rate": 1.7910447761194027e-07, "logits/chosen": 0.10293181240558624, "logits/rejected": 0.07127834111452103, "logps/chosen": -60.726539611816406, "logps/ref_chosen": -60.711814880371094, "logps/ref_rejected": -82.71756744384766, "logps/rejected": -82.72756958007812, "loss": 1.387, "margin_dpo/margin_mean": -0.004711783025413752, "margin_dpo/margin_std": 0.29338979721069336, "step": 25 }, { "epoch": 0.045351473922902494, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.011212587356567383, "fcm_dpo/q_t": 0.4997197091579437, "grad_norm": 30.495267868041992, "learning_rate": 2.1641791044776117e-07, "logits/chosen": 0.11331719160079956, "logits/rejected": 0.088210329413414, "logps/chosen": -60.893218994140625, "logps/ref_chosen": -60.880210876464844, "logps/ref_rejected": -78.44148254394531, "logps/rejected": -78.4656982421875, "loss": 1.3854, "margin_dpo/margin_mean": 0.011212664656341076, "margin_dpo/margin_std": 0.283229798078537, "step": 30 }, { "epoch": 0.05291005291005291, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.033431313931941986, "fcm_dpo/q_t": 0.4991644322872162, "grad_norm": 27.916645050048828, "learning_rate": 2.537313432835821e-07, "logits/chosen": 0.06567516177892685, "logits/rejected": 0.03955943509936333, "logps/chosen": -62.278358459472656, "logps/ref_chosen": -62.248138427734375, "logps/ref_rejected": -79.56475830078125, "logps/rejected": -79.62838745117188, "loss": 1.3832, "margin_dpo/margin_mean": 0.03343154489994049, "margin_dpo/margin_std": 0.3196953535079956, "step": 35 }, { "epoch": 0.06046863189720333, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.026270773261785507, "fcm_dpo/q_t": 0.49934354424476624, "grad_norm": 31.407669067382812, "learning_rate": 2.9104477611940296e-07, "logits/chosen": 0.1118750348687172, "logits/rejected": 0.06694410741329193, "logps/chosen": -58.9628791809082, "logps/ref_chosen": -58.87812423706055, "logps/ref_rejected": -84.22982025146484, "logps/rejected": -84.34083557128906, "loss": 1.384, "margin_dpo/margin_mean": 0.026270756497979164, "margin_dpo/margin_std": 0.32897505164146423, "step": 40 }, { "epoch": 0.06802721088435375, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.06596313416957855, "fcm_dpo/q_t": 0.49835172295570374, "grad_norm": 31.758451461791992, "learning_rate": 3.2835820895522385e-07, "logits/chosen": 0.07056122273206711, "logits/rejected": 0.043311070650815964, "logps/chosen": -66.0146713256836, "logps/ref_chosen": -65.88298034667969, "logps/ref_rejected": -83.87881469726562, "logps/rejected": -84.07647705078125, "loss": 1.3801, "margin_dpo/margin_mean": 0.0659632533788681, "margin_dpo/margin_std": 0.4093398153781891, "step": 45 }, { "epoch": 0.07558578987150416, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.11442438513040543, "fcm_dpo/q_t": 0.4971412718296051, "grad_norm": 27.961788177490234, "learning_rate": 3.6567164179104475e-07, "logits/chosen": 0.0864916443824768, "logits/rejected": 0.052734147757291794, "logps/chosen": -55.3635139465332, "logps/ref_chosen": -55.172386169433594, "logps/ref_rejected": -69.63300323486328, "logps/rejected": -69.93855285644531, "loss": 1.3753, "margin_dpo/margin_mean": 0.11442458629608154, "margin_dpo/margin_std": 0.4219285845756531, "step": 50 }, { "epoch": 0.08314436885865457, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.20081201195716858, "fcm_dpo/q_t": 0.4949868321418762, "grad_norm": 31.33340835571289, "learning_rate": 4.0298507462686564e-07, "logits/chosen": 0.07780580222606659, "logits/rejected": 0.04163379222154617, "logps/chosen": -57.531211853027344, "logps/ref_chosen": -57.193580627441406, "logps/ref_rejected": -79.69940948486328, "logps/rejected": -80.23786926269531, "loss": 1.3672, "margin_dpo/margin_mean": 0.20081210136413574, "margin_dpo/margin_std": 0.5645917057991028, "step": 55 }, { "epoch": 0.09070294784580499, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.21879427134990692, "fcm_dpo/q_t": 0.4945460259914398, "grad_norm": 29.641948699951172, "learning_rate": 4.4029850746268654e-07, "logits/chosen": 0.1170019656419754, "logits/rejected": 0.0827331617474556, "logps/chosen": -60.59779739379883, "logps/ref_chosen": -60.068870544433594, "logps/ref_rejected": -74.41178894042969, "logps/rejected": -75.15950012207031, "loss": 1.3662, "margin_dpo/margin_mean": 0.2187943458557129, "margin_dpo/margin_std": 0.7729828357696533, "step": 60 }, { "epoch": 0.0982615268329554, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.30554288625717163, "fcm_dpo/q_t": 0.492379754781723, "grad_norm": 30.804401397705078, "learning_rate": 4.776119402985074e-07, "logits/chosen": 0.14683708548545837, "logits/rejected": 0.11651895940303802, "logps/chosen": -58.885589599609375, "logps/ref_chosen": -58.1558952331543, "logps/ref_rejected": -76.06512451171875, "logps/rejected": -77.1003646850586, "loss": 1.3584, "margin_dpo/margin_mean": 0.30554264783859253, "margin_dpo/margin_std": 0.9574313163757324, "step": 65 }, { "epoch": 0.10582010582010581, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.4061034321784973, "fcm_dpo/q_t": 0.4898872971534729, "grad_norm": 28.625547409057617, "learning_rate": 4.999860140229787e-07, "logits/chosen": 0.10340998321771622, "logits/rejected": 0.0681125819683075, "logps/chosen": -68.41093444824219, "logps/ref_chosen": -67.35506439208984, "logps/ref_rejected": -82.24962615966797, "logps/rejected": -83.71160125732422, "loss": 1.3494, "margin_dpo/margin_mean": 0.40610337257385254, "margin_dpo/margin_std": 1.122269868850708, "step": 70 }, { "epoch": 0.11337868480725624, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.519697368144989, "fcm_dpo/q_t": 0.48709648847579956, "grad_norm": 26.1798095703125, "learning_rate": 4.998286897523808e-07, "logits/chosen": 0.11257852613925934, "logits/rejected": 0.07697894424200058, "logps/chosen": -58.29564666748047, "logps/ref_chosen": -56.86763381958008, "logps/ref_rejected": -72.56938934326172, "logps/rejected": -74.51709747314453, "loss": 1.3404, "margin_dpo/margin_mean": 0.5196975469589233, "margin_dpo/margin_std": 1.4106642007827759, "step": 75 }, { "epoch": 0.12093726379440665, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.6465596556663513, "fcm_dpo/q_t": 0.48402470350265503, "grad_norm": 28.485530853271484, "learning_rate": 4.994966691179711e-07, "logits/chosen": 0.14081288874149323, "logits/rejected": 0.09565093368291855, "logps/chosen": -59.448753356933594, "logps/ref_chosen": -57.687095642089844, "logps/ref_rejected": -78.06813049316406, "logps/rejected": -80.4763412475586, "loss": 1.3302, "margin_dpo/margin_mean": 0.6465598344802856, "margin_dpo/margin_std": 1.6814677715301514, "step": 80 }, { "epoch": 0.12849584278155707, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.0534989833831787, "fcm_dpo/q_t": 0.4739624559879303, "grad_norm": 26.71432876586914, "learning_rate": 4.989901842900325e-07, "logits/chosen": 0.16136503219604492, "logits/rejected": 0.11435899883508682, "logps/chosen": -59.10918426513672, "logps/ref_chosen": -56.96040725708008, "logps/ref_rejected": -75.22166442871094, "logps/rejected": -78.42394256591797, "loss": 1.2921, "margin_dpo/margin_mean": 1.0534991025924683, "margin_dpo/margin_std": 1.7913897037506104, "step": 85 }, { "epoch": 0.1360544217687075, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.108295202255249, "fcm_dpo/q_t": 0.47283005714416504, "grad_norm": 29.2835693359375, "learning_rate": 4.983095894354857e-07, "logits/chosen": 0.21447758376598358, "logits/rejected": 0.1654311865568161, "logps/chosen": -60.33354949951172, "logps/ref_chosen": -57.41730499267578, "logps/ref_rejected": -80.87986755371094, "logps/rejected": -84.9044189453125, "loss": 1.2951, "margin_dpo/margin_mean": 1.1082954406738281, "margin_dpo/margin_std": 2.5071663856506348, "step": 90 }, { "epoch": 0.1436130007558579, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.4385011196136475, "fcm_dpo/q_t": 0.4653662145137787, "grad_norm": 29.20956039428711, "learning_rate": 4.974553604702332e-07, "logits/chosen": 0.2017609179019928, "logits/rejected": 0.15785647928714752, "logps/chosen": -57.5848274230957, "logps/ref_chosen": -54.08087158203125, "logps/ref_rejected": -76.15860748291016, "logps/rejected": -81.1010513305664, "loss": 1.2755, "margin_dpo/margin_mean": 1.4385008811950684, "margin_dpo/margin_std": 3.3299403190612793, "step": 95 }, { "epoch": 0.15117157974300832, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.2202621698379517, "fcm_dpo/q_t": 0.47157055139541626, "grad_norm": 33.23603057861328, "learning_rate": 4.964280947263676e-07, "logits/chosen": 0.21698299050331116, "logits/rejected": 0.184483140707016, "logps/chosen": -68.64390563964844, "logps/ref_chosen": -63.875038146972656, "logps/ref_rejected": -82.077880859375, "logps/rejected": -88.06700134277344, "loss": 1.3233, "margin_dpo/margin_mean": 1.220262050628662, "margin_dpo/margin_std": 4.634930610656738, "step": 100 }, { "epoch": 0.15873015873015872, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.7332004308700562, "fcm_dpo/q_t": 0.46054258942604065, "grad_norm": 30.745962142944336, "learning_rate": 4.952285105344791e-07, "logits/chosen": 0.2272680103778839, "logits/rejected": 0.17643623054027557, "logps/chosen": -67.46278381347656, "logps/ref_chosen": -62.572479248046875, "logps/ref_rejected": -80.93415069580078, "logps/rejected": -87.5576400756836, "loss": 1.2811, "margin_dpo/margin_mean": 1.7332004308700562, "margin_dpo/margin_std": 4.913107872009277, "step": 105 }, { "epoch": 0.16628873771730915, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.5242812633514404, "fcm_dpo/q_t": 0.46426910161972046, "grad_norm": 44.22966003417969, "learning_rate": 4.938574467213517e-07, "logits/chosen": 0.19085553288459778, "logits/rejected": 0.16806969046592712, "logps/chosen": -74.31179809570312, "logps/ref_chosen": -68.67534637451172, "logps/ref_rejected": -78.82028198242188, "logps/rejected": -85.98100280761719, "loss": 1.3145, "margin_dpo/margin_mean": 1.5242810249328613, "margin_dpo/margin_std": 5.536143779754639, "step": 110 }, { "epoch": 0.17384731670445955, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 2.013551712036133, "fcm_dpo/q_t": 0.452955424785614, "grad_norm": 28.415409088134766, "learning_rate": 4.923158620234019e-07, "logits/chosen": 0.23339705169200897, "logits/rejected": 0.18038101494312286, "logps/chosen": -64.06039428710938, "logps/ref_chosen": -58.65370559692383, "logps/ref_rejected": -81.89688873291016, "logps/rejected": -89.31713104248047, "loss": 1.2498, "margin_dpo/margin_mean": 2.013551712036133, "margin_dpo/margin_std": 4.735473155975342, "step": 115 }, { "epoch": 0.18140589569160998, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 2.4133291244506836, "fcm_dpo/q_t": 0.44279351830482483, "grad_norm": 28.7838191986084, "learning_rate": 4.906048344162676e-07, "logits/chosen": 0.24171380698680878, "logits/rejected": 0.19227424263954163, "logps/chosen": -61.56781768798828, "logps/ref_chosen": -56.16423797607422, "logps/ref_rejected": -75.87689971923828, "logps/rejected": -83.69380187988281, "loss": 1.2069, "margin_dpo/margin_mean": 2.4133291244506836, "margin_dpo/margin_std": 4.3148322105407715, "step": 120 }, { "epoch": 0.1889644746787604, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 3.0360968112945557, "fcm_dpo/q_t": 0.4307987093925476, "grad_norm": 29.158105850219727, "learning_rate": 4.887255603610184e-07, "logits/chosen": 0.2693176865577698, "logits/rejected": 0.21195952594280243, "logps/chosen": -66.05271911621094, "logps/ref_chosen": -59.744285583496094, "logps/ref_rejected": -86.77314758300781, "logps/rejected": -96.11767578125, "loss": 1.1645, "margin_dpo/margin_mean": 3.0360968112945557, "margin_dpo/margin_std": 5.009610652923584, "step": 125 }, { "epoch": 0.1965230536659108, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 2.8338823318481445, "fcm_dpo/q_t": 0.43448346853256226, "grad_norm": 30.431678771972656, "learning_rate": 4.866793539675126e-07, "logits/chosen": 0.20911665260791779, "logits/rejected": 0.17860978841781616, "logps/chosen": -71.53947448730469, "logps/ref_chosen": -64.15296936035156, "logps/ref_rejected": -75.17271423339844, "logps/rejected": -85.3930892944336, "loss": 1.1921, "margin_dpo/margin_mean": 2.8338828086853027, "margin_dpo/margin_std": 5.266488075256348, "step": 130 }, { "epoch": 0.20408163265306123, "fcm_dpo/beta": 0.10191638767719269, "fcm_dpo/delta": 0.031610384583473206, "fcm_dpo/margin": 3.762406826019287, "fcm_dpo/q_t": 0.4133889079093933, "grad_norm": 26.239471435546875, "learning_rate": 4.844676460754862e-07, "logits/chosen": 0.25873705744743347, "logits/rejected": 0.21873533725738525, "logps/chosen": -65.23578643798828, "logps/ref_chosen": -57.006690979003906, "logps/ref_rejected": -73.71768188476562, "logps/rejected": -85.70919036865234, "loss": 1.1276, "margin_dpo/margin_mean": 3.76240611076355, "margin_dpo/margin_std": 5.971688270568848, "step": 135 }, { "epoch": 0.21164021164021163, "fcm_dpo/beta": 0.10494687408208847, "fcm_dpo/delta": 0.025396008044481277, "fcm_dpo/margin": 4.405590534210205, "fcm_dpo/q_t": 0.3999941945075989, "grad_norm": 33.757999420166016, "learning_rate": 4.820919832540181e-07, "logits/chosen": 0.24625614285469055, "logits/rejected": 0.2025199830532074, "logps/chosen": -74.15595245361328, "logps/ref_chosen": -63.36246871948242, "logps/ref_rejected": -79.62621307373047, "logps/rejected": -94.8252944946289, "loss": 1.1183, "margin_dpo/margin_mean": 4.405590534210205, "margin_dpo/margin_std": 7.449770927429199, "step": 140 }, { "epoch": 0.21919879062736206, "fcm_dpo/beta": 0.10641102492809296, "fcm_dpo/delta": 0.022974295541644096, "fcm_dpo/margin": 4.693943977355957, "fcm_dpo/q_t": 0.395569384098053, "grad_norm": 38.00341033935547, "learning_rate": 4.795540267200686e-07, "logits/chosen": 0.2727429270744324, "logits/rejected": 0.2512747049331665, "logps/chosen": -77.23705291748047, "logps/ref_chosen": -65.01470184326172, "logps/ref_rejected": -80.49073791503906, "logps/rejected": -97.40702819824219, "loss": 1.1399, "margin_dpo/margin_mean": 4.693943977355957, "margin_dpo/margin_std": 8.747485160827637, "step": 145 }, { "epoch": 0.22675736961451248, "fcm_dpo/beta": 0.10816685855388641, "fcm_dpo/delta": -0.04916912317276001, "fcm_dpo/margin": 5.7198591232299805, "fcm_dpo/q_t": 0.3732493221759796, "grad_norm": 28.534120559692383, "learning_rate": 4.768555511768486e-07, "logits/chosen": 0.28103944659233093, "logits/rejected": 0.2354610413312912, "logps/chosen": -71.84245300292969, "logps/ref_chosen": -59.19135284423828, "logps/ref_rejected": -74.0339126586914, "logps/rejected": -92.40486145019531, "loss": 1.0549, "margin_dpo/margin_mean": 5.7198591232299805, "margin_dpo/margin_std": 8.658864974975586, "step": 150 }, { "epoch": 0.23431594860166288, "fcm_dpo/beta": 0.09626957774162292, "fcm_dpo/delta": -0.08038349449634552, "fcm_dpo/margin": 6.609867095947266, "fcm_dpo/q_t": 0.36982256174087524, "grad_norm": 29.370683670043945, "learning_rate": 4.7399844357283393e-07, "logits/chosen": 0.29301148653030396, "logits/rejected": 0.25923386216163635, "logps/chosen": -73.53243255615234, "logps/ref_chosen": -60.93949508666992, "logps/ref_rejected": -74.51151275634766, "logps/rejected": -93.71430969238281, "loss": 1.0523, "margin_dpo/margin_mean": 6.609866142272949, "margin_dpo/margin_std": 10.054447174072266, "step": 155 }, { "epoch": 0.2418745275888133, "fcm_dpo/beta": 0.08975216001272202, "fcm_dpo/delta": -0.07737629860639572, "fcm_dpo/margin": 7.146323204040527, "fcm_dpo/q_t": 0.36957624554634094, "grad_norm": 26.117460250854492, "learning_rate": 4.7098470178228755e-07, "logits/chosen": 0.2661024034023285, "logits/rejected": 0.22873857617378235, "logps/chosen": -72.47210693359375, "logps/ref_chosen": -58.763816833496094, "logps/ref_rejected": -74.94743347167969, "logps/rejected": -95.80204010009766, "loss": 1.0421, "margin_dpo/margin_mean": 7.146323204040527, "margin_dpo/margin_std": 10.654400825500488, "step": 160 }, { "epoch": 0.2494331065759637, "fcm_dpo/beta": 0.08210185170173645, "fcm_dpo/delta": -0.13484172523021698, "fcm_dpo/margin": 8.718481063842773, "fcm_dpo/q_t": 0.3526487350463867, "grad_norm": 23.741703033447266, "learning_rate": 4.678164332082175e-07, "logits/chosen": 0.3242879807949066, "logits/rejected": 0.2718299329280853, "logps/chosen": -70.94097900390625, "logps/ref_chosen": -55.70417022705078, "logps/ref_rejected": -76.59439849853516, "logps/rejected": -100.54969024658203, "loss": 0.9977, "margin_dpo/margin_mean": 8.718481063842773, "margin_dpo/margin_std": 11.619240760803223, "step": 165 }, { "epoch": 0.25699168556311414, "fcm_dpo/beta": 0.07538954168558121, "fcm_dpo/delta": -0.042643819004297256, "fcm_dpo/margin": 8.284780502319336, "fcm_dpo/q_t": 0.3727918565273285, "grad_norm": 21.66844367980957, "learning_rate": 4.6449585330874425e-07, "logits/chosen": 0.2913161516189575, "logits/rejected": 0.2505802512168884, "logps/chosen": -75.16352081298828, "logps/ref_chosen": -61.169105529785156, "logps/ref_rejected": -77.21674346923828, "logps/rejected": -99.49595642089844, "loss": 1.0435, "margin_dpo/margin_mean": 8.284780502319336, "margin_dpo/margin_std": 12.27697467803955, "step": 170 }, { "epoch": 0.26455026455026454, "fcm_dpo/beta": 0.06802255660295486, "fcm_dpo/delta": -0.13938404619693756, "fcm_dpo/margin": 9.97547721862793, "fcm_dpo/q_t": 0.3644518256187439, "grad_norm": 21.26988983154297, "learning_rate": 4.6102528404790965e-07, "logits/chosen": 0.34859341382980347, "logits/rejected": 0.2882172167301178, "logps/chosen": -73.75447082519531, "logps/ref_chosen": -59.24176788330078, "logps/ref_rejected": -81.80384826660156, "logps/rejected": -106.2920150756836, "loss": 1.0468, "margin_dpo/margin_mean": 9.97547721862793, "margin_dpo/margin_std": 15.061019897460938, "step": 175 }, { "epoch": 0.272108843537415, "fcm_dpo/beta": 0.060754068195819855, "fcm_dpo/delta": -0.07431790977716446, "fcm_dpo/margin": 9.864774703979492, "fcm_dpo/q_t": 0.37669187784194946, "grad_norm": 20.148046493530273, "learning_rate": 4.5740715227200897e-07, "logits/chosen": 0.3143027424812317, "logits/rejected": 0.27188676595687866, "logps/chosen": -77.92072296142578, "logps/ref_chosen": -63.24883270263672, "logps/ref_rejected": -79.00736236572266, "logps/rejected": -103.54402923583984, "loss": 1.0681, "margin_dpo/margin_mean": 9.864773750305176, "margin_dpo/margin_std": 15.329916000366211, "step": 180 }, { "epoch": 0.2796674225245654, "fcm_dpo/beta": 0.05635453015565872, "fcm_dpo/delta": -0.08090370148420334, "fcm_dpo/margin": 10.306377410888672, "fcm_dpo/q_t": 0.37912043929100037, "grad_norm": 18.77512550354004, "learning_rate": 4.5364398801258394e-07, "logits/chosen": 0.36532798409461975, "logits/rejected": 0.31204718351364136, "logps/chosen": -70.10028076171875, "logps/ref_chosen": -56.390625, "logps/ref_rejected": -76.81001281738281, "logps/rejected": -100.82603454589844, "loss": 1.0654, "margin_dpo/margin_mean": 10.306377410888672, "margin_dpo/margin_std": 15.676936149597168, "step": 185 }, { "epoch": 0.2872260015117158, "fcm_dpo/beta": 0.053449880331754684, "fcm_dpo/delta": -0.07286106050014496, "fcm_dpo/margin": 12.053500175476074, "fcm_dpo/q_t": 0.36934491991996765, "grad_norm": 25.36161231994629, "learning_rate": 4.4973842271726024e-07, "logits/chosen": 0.35614460706710815, "logits/rejected": 0.30015695095062256, "logps/chosen": -81.80848693847656, "logps/ref_chosen": -68.25389099121094, "logps/ref_rejected": -86.461181640625, "logps/rejected": -112.06925964355469, "loss": 1.0366, "margin_dpo/margin_mean": 12.05350112915039, "margin_dpo/margin_std": 17.739303588867188, "step": 190 }, { "epoch": 0.2947845804988662, "fcm_dpo/beta": 0.05007879063487053, "fcm_dpo/delta": -0.04965587332844734, "fcm_dpo/margin": 11.500941276550293, "fcm_dpo/q_t": 0.3801359236240387, "grad_norm": 21.779829025268555, "learning_rate": 4.4569318740967043e-07, "logits/chosen": 0.38067522644996643, "logits/rejected": 0.35913893580436707, "logps/chosen": -79.82146453857422, "logps/ref_chosen": -62.1484260559082, "logps/ref_rejected": -71.33458709716797, "logps/rejected": -100.50856018066406, "loss": 1.0743, "margin_dpo/margin_mean": 11.50094223022461, "margin_dpo/margin_std": 17.975563049316406, "step": 195 }, { "epoch": 0.30234315948601664, "fcm_dpo/beta": 0.04691356047987938, "fcm_dpo/delta": -0.08742973953485489, "fcm_dpo/margin": 13.574827194213867, "fcm_dpo/q_t": 0.36781007051467896, "grad_norm": 18.177783966064453, "learning_rate": 4.415111107797445e-07, "logits/chosen": 0.4625890851020813, "logits/rejected": 0.39986932277679443, "logps/chosen": -75.27424621582031, "logps/ref_chosen": -56.950096130371094, "logps/ref_rejected": -78.66989135742188, "logps/rejected": -110.5688705444336, "loss": 1.0505, "margin_dpo/margin_mean": 13.574827194213867, "margin_dpo/margin_std": 20.146961212158203, "step": 200 }, { "epoch": 0.30234315948601664, "eval_fcm_dpo/beta": 0.0445309579372406, "eval_logits/chosen": 0.43528974056243896, "eval_logits/rejected": 0.3850432336330414, "eval_logps/chosen": -94.9354248046875, "eval_logps/ref_chosen": -74.85946655273438, "eval_logps/ref_rejected": -79.54898834228516, "eval_logps/rejected": -112.46160888671875, "eval_loss": 0.5490387678146362, "eval_margin_dpo/margin_mean": 12.836673736572266, "eval_margin_dpo/margin_std": 21.53274154663086, "eval_runtime": 39.0577, "eval_samples_per_second": 58.964, "eval_steps_per_second": 1.843, "step": 200 }, { "epoch": 0.30990173847316704, "fcm_dpo/beta": 0.043045125901699066, "fcm_dpo/delta": -0.05937931686639786, "fcm_dpo/margin": 14.329627990722656, "fcm_dpo/q_t": 0.3726270794868469, "grad_norm": 24.482276916503906, "learning_rate": 4.3719511720570814e-07, "logits/chosen": 0.4526013731956482, "logits/rejected": 0.3818688988685608, "logps/chosen": -80.59419250488281, "logps/ref_chosen": -57.99428176879883, "logps/ref_rejected": -83.5367431640625, "logps/rejected": -120.4662857055664, "loss": 1.0519, "margin_dpo/margin_mean": 14.329629898071289, "margin_dpo/margin_std": 21.601360321044922, "step": 205 }, { "epoch": 0.31746031746031744, "fcm_dpo/beta": 0.04131709039211273, "fcm_dpo/delta": -0.05793388932943344, "fcm_dpo/margin": 13.949444770812988, "fcm_dpo/q_t": 0.3844899535179138, "grad_norm": 19.35005760192871, "learning_rate": 4.327482247091679e-07, "logits/chosen": 0.4452959895133972, "logits/rejected": 0.39058345556259155, "logps/chosen": -90.13018798828125, "logps/ref_chosen": -63.77195358276367, "logps/ref_rejected": -82.56491088867188, "logps/rejected": -122.87260437011719, "loss": 1.1108, "margin_dpo/margin_mean": 13.949444770812988, "margin_dpo/margin_std": 24.073421478271484, "step": 210 }, { "epoch": 0.3250188964474679, "fcm_dpo/beta": 0.03761471435427666, "fcm_dpo/delta": -0.12060566991567612, "fcm_dpo/margin": 18.96577262878418, "fcm_dpo/q_t": 0.35724228620529175, "grad_norm": 18.37898826599121, "learning_rate": 4.281735428447157e-07, "logits/chosen": 0.4427056908607483, "logits/rejected": 0.37563619017601013, "logps/chosen": -84.36368560791016, "logps/ref_chosen": -60.27800750732422, "logps/ref_rejected": -83.91607666015625, "logps/rejected": -126.96754455566406, "loss": 1.002, "margin_dpo/margin_mean": 18.965770721435547, "margin_dpo/margin_std": 26.138744354248047, "step": 215 }, { "epoch": 0.3325774754346183, "fcm_dpo/beta": 0.033143509179353714, "fcm_dpo/delta": -0.07818768918514252, "fcm_dpo/margin": 18.13027000427246, "fcm_dpo/q_t": 0.37564554810523987, "grad_norm": 17.499849319458008, "learning_rate": 4.234742705255272e-07, "logits/chosen": 0.4456612169742584, "logits/rejected": 0.38407641649246216, "logps/chosen": -88.01606750488281, "logps/ref_chosen": -60.88572311401367, "logps/ref_rejected": -80.1805191040039, "logps/rejected": -125.44112396240234, "loss": 1.0594, "margin_dpo/margin_mean": 18.13027000427246, "margin_dpo/margin_std": 27.581802368164062, "step": 220 }, { "epoch": 0.3401360544217687, "fcm_dpo/beta": 0.030816316604614258, "fcm_dpo/delta": -0.08487220108509064, "fcm_dpo/margin": 20.55803871154785, "fcm_dpo/q_t": 0.37105461955070496, "grad_norm": 17.931140899658203, "learning_rate": 4.186536937864752e-07, "logits/chosen": 0.511905312538147, "logits/rejected": 0.43826180696487427, "logps/chosen": -89.73455047607422, "logps/ref_chosen": -61.02507781982422, "logps/ref_rejected": -91.92439270019531, "logps/rejected": -141.19189453125, "loss": 1.0315, "margin_dpo/margin_mean": 20.55803680419922, "margin_dpo/margin_std": 29.531564712524414, "step": 225 }, { "epoch": 0.3476946334089191, "fcm_dpo/beta": 0.030043313279747963, "fcm_dpo/delta": -0.015888774767518044, "fcm_dpo/margin": 19.927255630493164, "fcm_dpo/q_t": 0.3782123029232025, "grad_norm": 17.704288482666016, "learning_rate": 4.137151834863213e-07, "logits/chosen": 0.567510724067688, "logits/rejected": 0.5179559588432312, "logps/chosen": -85.0999984741211, "logps/ref_chosen": -54.49797821044922, "logps/ref_rejected": -71.96363830566406, "logps/rejected": -122.492919921875, "loss": 1.0889, "margin_dpo/margin_mean": 19.927255630493164, "margin_dpo/margin_std": 33.03584671020508, "step": 230 }, { "epoch": 0.35525321239606955, "fcm_dpo/beta": 0.02700546756386757, "fcm_dpo/delta": -0.13411830365657806, "fcm_dpo/margin": 23.45638656616211, "fcm_dpo/q_t": 0.36994558572769165, "grad_norm": 16.464786529541016, "learning_rate": 4.08662192950594e-07, "logits/chosen": 0.5007590055465698, "logits/rejected": 0.4768039286136627, "logps/chosen": -99.23008728027344, "logps/ref_chosen": -63.250282287597656, "logps/ref_rejected": -73.09049987792969, "logps/rejected": -132.5266876220703, "loss": 1.0474, "margin_dpo/margin_mean": 23.456384658813477, "margin_dpo/margin_std": 33.97542190551758, "step": 235 }, { "epoch": 0.36281179138321995, "fcm_dpo/beta": 0.024938663467764854, "fcm_dpo/delta": -0.08994299918413162, "fcm_dpo/margin": 27.00982666015625, "fcm_dpo/q_t": 0.3612818121910095, "grad_norm": 14.642565727233887, "learning_rate": 4.0349825555680045e-07, "logits/chosen": 0.561758279800415, "logits/rejected": 0.5071486234664917, "logps/chosen": -108.31246185302734, "logps/ref_chosen": -65.26150512695312, "logps/ref_rejected": -87.60311126708984, "logps/rejected": -157.66390991210938, "loss": 1.0195, "margin_dpo/margin_mean": 27.00982666015625, "margin_dpo/margin_std": 37.65082550048828, "step": 240 }, { "epoch": 0.37037037037037035, "fcm_dpo/beta": 0.02278234250843525, "fcm_dpo/delta": -0.10555760562419891, "fcm_dpo/margin": 30.133275985717773, "fcm_dpo/q_t": 0.3576427102088928, "grad_norm": 15.980971336364746, "learning_rate": 3.982269822636601e-07, "logits/chosen": 0.5394655466079712, "logits/rejected": 0.5064207911491394, "logps/chosen": -108.2628173828125, "logps/ref_chosen": -65.73170471191406, "logps/ref_rejected": -75.19642639160156, "logps/rejected": -147.86080932617188, "loss": 0.9861, "margin_dpo/margin_mean": 30.133275985717773, "margin_dpo/margin_std": 38.397438049316406, "step": 245 }, { "epoch": 0.3779289493575208, "fcm_dpo/beta": 0.0209406279027462, "fcm_dpo/delta": -0.029252177104353905, "fcm_dpo/margin": 25.17742347717285, "fcm_dpo/q_t": 0.39082691073417664, "grad_norm": 18.229862213134766, "learning_rate": 3.9285205908608934e-07, "logits/chosen": 0.599532961845398, "logits/rejected": 0.5946930050849915, "logps/chosen": -126.43806457519531, "logps/ref_chosen": -70.71224212646484, "logps/ref_rejected": -76.12723541259766, "logps/rejected": -157.03048706054688, "loss": 1.1321, "margin_dpo/margin_mean": 25.17742347717285, "margin_dpo/margin_std": 44.761314392089844, "step": 250 }, { "epoch": 0.3854875283446712, "fcm_dpo/beta": 0.02031904086470604, "fcm_dpo/delta": -0.02309424616396427, "fcm_dpo/margin": 24.082748413085938, "fcm_dpo/q_t": 0.3976757824420929, "grad_norm": 18.887149810791016, "learning_rate": 3.873772445177015e-07, "logits/chosen": 0.609887957572937, "logits/rejected": 0.5647954940795898, "logps/chosen": -117.80338287353516, "logps/ref_chosen": -61.767662048339844, "logps/ref_rejected": -77.38813018798828, "logps/rejected": -157.50662231445312, "loss": 1.1377, "margin_dpo/margin_mean": 24.082748413085938, "margin_dpo/margin_std": 43.56954574584961, "step": 255 }, { "epoch": 0.3930461073318216, "fcm_dpo/beta": 0.019356101751327515, "fcm_dpo/delta": -0.06188065558671951, "fcm_dpo/margin": 33.2767448425293, "fcm_dpo/q_t": 0.3668089807033539, "grad_norm": 19.52748680114746, "learning_rate": 3.818063669026256e-07, "logits/chosen": 0.585278332233429, "logits/rejected": 0.5075653195381165, "logps/chosen": -119.7823486328125, "logps/ref_chosen": -61.57584762573242, "logps/ref_rejected": -91.87513732910156, "logps/rejected": -183.35836791992188, "loss": 1.0274, "margin_dpo/margin_mean": 33.2767448425293, "margin_dpo/margin_std": 47.10149002075195, "step": 260 }, { "epoch": 0.40060468631897206, "fcm_dpo/beta": 0.019167505204677582, "fcm_dpo/delta": 0.02147207036614418, "fcm_dpo/margin": 28.888378143310547, "fcm_dpo/q_t": 0.38470879197120667, "grad_norm": 16.250465393066406, "learning_rate": 3.7614332175848027e-07, "logits/chosen": 0.5632354021072388, "logits/rejected": 0.5125952959060669, "logps/chosen": -119.10379791259766, "logps/ref_chosen": -65.75422668457031, "logps/ref_rejected": -77.9569320678711, "logps/rejected": -160.19488525390625, "loss": 1.082, "margin_dpo/margin_mean": 28.888378143310547, "margin_dpo/margin_std": 46.08098602294922, "step": 265 }, { "epoch": 0.40816326530612246, "fcm_dpo/beta": 0.018922636285424232, "fcm_dpo/delta": -0.03335314989089966, "fcm_dpo/margin": 29.144763946533203, "fcm_dpo/q_t": 0.3839413523674011, "grad_norm": 16.50005531311035, "learning_rate": 3.7039206905237656e-07, "logits/chosen": 0.6007962226867676, "logits/rejected": 0.5515360236167908, "logps/chosen": -110.72042083740234, "logps/ref_chosen": -62.27649688720703, "logps/ref_rejected": -76.56950378417969, "logps/rejected": -154.158203125, "loss": 1.0747, "margin_dpo/margin_mean": 29.144763946533203, "margin_dpo/margin_std": 44.99258804321289, "step": 270 }, { "epoch": 0.41572184429327286, "fcm_dpo/beta": 0.018402384594082832, "fcm_dpo/delta": -0.021868888288736343, "fcm_dpo/margin": 32.19723129272461, "fcm_dpo/q_t": 0.37771254777908325, "grad_norm": 16.371837615966797, "learning_rate": 3.645566304318526e-07, "logits/chosen": 0.5913391709327698, "logits/rejected": 0.5466698408126831, "logps/chosen": -113.94535827636719, "logps/ref_chosen": -61.854393005371094, "logps/ref_rejected": -77.22246551513672, "logps/rejected": -161.51065063476562, "loss": 1.0643, "margin_dpo/margin_mean": 32.197235107421875, "margin_dpo/margin_std": 49.30824279785156, "step": 275 }, { "epoch": 0.42328042328042326, "fcm_dpo/beta": 0.01777859404683113, "fcm_dpo/delta": -0.08700723201036453, "fcm_dpo/margin": 34.64583206176758, "fcm_dpo/q_t": 0.3692876696586609, "grad_norm": 15.50888729095459, "learning_rate": 3.586410864126781e-07, "logits/chosen": 0.593945324420929, "logits/rejected": 0.5639765858650208, "logps/chosen": -114.51588439941406, "logps/ref_chosen": -61.29896926879883, "logps/ref_rejected": -73.35762023925781, "logps/rejected": -161.22035217285156, "loss": 1.0139, "margin_dpo/margin_mean": 34.64583206176758, "margin_dpo/margin_std": 45.06488800048828, "step": 280 }, { "epoch": 0.4308390022675737, "fcm_dpo/beta": 0.016293346881866455, "fcm_dpo/delta": -0.030563678592443466, "fcm_dpo/margin": 35.089866638183594, "fcm_dpo/q_t": 0.37944841384887695, "grad_norm": 18.91819190979004, "learning_rate": 3.5264957352549375e-07, "logits/chosen": 0.6056411862373352, "logits/rejected": 0.5613064765930176, "logps/chosen": -125.51373291015625, "logps/ref_chosen": -63.435462951660156, "logps/ref_rejected": -79.73661804199219, "logps/rejected": -176.90478515625, "loss": 1.0488, "margin_dpo/margin_mean": 35.089866638183594, "margin_dpo/margin_std": 50.59145736694336, "step": 285 }, { "epoch": 0.4383975812547241, "fcm_dpo/beta": 0.015350925736129284, "fcm_dpo/delta": -0.06803837418556213, "fcm_dpo/margin": 40.154258728027344, "fcm_dpo/q_t": 0.36974334716796875, "grad_norm": 18.484561920166016, "learning_rate": 3.465862814232821e-07, "logits/chosen": 0.6393694877624512, "logits/rejected": 0.5767431855201721, "logps/chosen": -128.746337890625, "logps/ref_chosen": -57.696876525878906, "logps/ref_rejected": -79.78132629394531, "logps/rejected": -190.9850616455078, "loss": 1.0121, "margin_dpo/margin_mean": 40.154258728027344, "margin_dpo/margin_std": 53.58546829223633, "step": 290 }, { "epoch": 0.4459561602418745, "fcm_dpo/beta": 0.014825056307017803, "fcm_dpo/delta": -0.02245759218931198, "fcm_dpo/margin": 38.19078063964844, "fcm_dpo/q_t": 0.3817628026008606, "grad_norm": 19.01740074157715, "learning_rate": 3.4045544995169125e-07, "logits/chosen": 0.7111937999725342, "logits/rejected": 0.6355310678482056, "logps/chosen": -135.02879333496094, "logps/ref_chosen": -55.430633544921875, "logps/ref_rejected": -78.1390151977539, "logps/rejected": -195.927978515625, "loss": 1.0571, "margin_dpo/margin_mean": 38.19078826904297, "margin_dpo/margin_std": 56.477027893066406, "step": 295 }, { "epoch": 0.45351473922902497, "fcm_dpo/beta": 0.015004401095211506, "fcm_dpo/delta": 0.03381625562906265, "fcm_dpo/margin": 34.913299560546875, "fcm_dpo/q_t": 0.39039894938468933, "grad_norm": 16.89678955078125, "learning_rate": 3.3426136618426043e-07, "logits/chosen": 0.6468077898025513, "logits/rejected": 0.5938016176223755, "logps/chosen": -149.64816284179688, "logps/ref_chosen": -61.207069396972656, "logps/ref_rejected": -75.23294067382812, "logps/rejected": -198.58737182617188, "loss": 1.0995, "margin_dpo/margin_mean": 34.913299560546875, "margin_dpo/margin_std": 57.363426208496094, "step": 300 }, { "epoch": 0.46107331821617537, "fcm_dpo/beta": 0.015164652839303017, "fcm_dpo/delta": -0.027595514431595802, "fcm_dpo/margin": 34.962440490722656, "fcm_dpo/q_t": 0.38818344473838806, "grad_norm": 17.029956817626953, "learning_rate": 3.280083614246217e-07, "logits/chosen": 0.6154987812042236, "logits/rejected": 0.578453540802002, "logps/chosen": -147.16146850585938, "logps/ref_chosen": -63.06663131713867, "logps/ref_rejected": -78.45845031738281, "logps/rejected": -197.51571655273438, "loss": 1.0998, "margin_dpo/margin_mean": 34.96243667602539, "margin_dpo/margin_std": 57.51310348510742, "step": 305 }, { "epoch": 0.46863189720332576, "fcm_dpo/beta": 0.014774846844375134, "fcm_dpo/delta": -0.008402171544730663, "fcm_dpo/margin": 27.70456886291504, "fcm_dpo/q_t": 0.41280922293663025, "grad_norm": 15.905935287475586, "learning_rate": 3.2170080817777257e-07, "logits/chosen": 0.6693840622901917, "logits/rejected": 0.619363009929657, "logps/chosen": -142.225341796875, "logps/ref_chosen": -63.60908889770508, "logps/ref_rejected": -74.06394958496094, "logps/rejected": -180.38479614257812, "loss": 1.1676, "margin_dpo/margin_mean": 27.70456886291504, "margin_dpo/margin_std": 53.79878616333008, "step": 310 }, { "epoch": 0.47619047619047616, "fcm_dpo/beta": 0.014566788449883461, "fcm_dpo/delta": -0.02135154977440834, "fcm_dpo/margin": 35.78001403808594, "fcm_dpo/q_t": 0.38799604773521423, "grad_norm": 13.580395698547363, "learning_rate": 3.1534311709253723e-07, "logits/chosen": 0.6421744227409363, "logits/rejected": 0.5867301225662231, "logps/chosen": -130.10659790039062, "logps/ref_chosen": -62.31493377685547, "logps/ref_rejected": -75.07472229003906, "logps/rejected": -178.64637756347656, "loss": 1.0606, "margin_dpo/margin_mean": 35.78001403808594, "margin_dpo/margin_std": 51.48203659057617, "step": 315 }, { "epoch": 0.4837490551776266, "fcm_dpo/beta": 0.013901944272220135, "fcm_dpo/delta": -0.061062753200531006, "fcm_dpo/margin": 40.63452911376953, "fcm_dpo/q_t": 0.3800038993358612, "grad_norm": 15.753009796142578, "learning_rate": 3.0893973387735683e-07, "logits/chosen": 0.587223470211029, "logits/rejected": 0.524192214012146, "logps/chosen": -118.84355163574219, "logps/ref_chosen": -55.336036682128906, "logps/ref_rejected": -80.05536651611328, "logps/rejected": -184.19741821289062, "loss": 1.0452, "margin_dpo/margin_mean": 40.63452911376953, "margin_dpo/margin_std": 56.74776077270508, "step": 320 }, { "epoch": 0.491307634164777, "fcm_dpo/beta": 0.013363140635192394, "fcm_dpo/delta": -0.023248041048645973, "fcm_dpo/margin": 41.32105255126953, "fcm_dpo/q_t": 0.3810966908931732, "grad_norm": 14.63464069366455, "learning_rate": 3.0249513619156206e-07, "logits/chosen": 0.6304353475570679, "logits/rejected": 0.5674418210983276, "logps/chosen": -130.5197296142578, "logps/ref_chosen": -57.90629959106445, "logps/ref_rejected": -74.2243881225586, "logps/rejected": -188.1588592529297, "loss": 1.0513, "margin_dpo/margin_mean": 41.32105255126953, "margin_dpo/margin_std": 59.10980987548828, "step": 325 }, { "epoch": 0.4988662131519274, "fcm_dpo/beta": 0.013443930074572563, "fcm_dpo/delta": 0.021733686327934265, "fcm_dpo/margin": 36.37574005126953, "fcm_dpo/q_t": 0.39693742990493774, "grad_norm": 15.07494831085205, "learning_rate": 2.9601383051430505e-07, "logits/chosen": 0.5850898623466492, "logits/rejected": 0.5469895005226135, "logps/chosen": -149.595458984375, "logps/ref_chosen": -65.17555236816406, "logps/ref_rejected": -78.53681182861328, "logps/rejected": -199.3324432373047, "loss": 1.1175, "margin_dpo/margin_mean": 36.37574005126953, "margin_dpo/margin_std": 62.23854446411133, "step": 330 }, { "epoch": 0.5064247921390779, "fcm_dpo/beta": 0.01288739126175642, "fcm_dpo/delta": -0.0680394172668457, "fcm_dpo/margin": 46.44968795776367, "fcm_dpo/q_t": 0.37343794107437134, "grad_norm": 19.42026710510254, "learning_rate": 2.895003489933375e-07, "logits/chosen": 0.6479321122169495, "logits/rejected": 0.600612998008728, "logps/chosen": -147.13876342773438, "logps/ref_chosen": -62.62797927856445, "logps/ref_rejected": -79.9095458984375, "logps/rejected": -210.8699951171875, "loss": 1.0295, "margin_dpo/margin_mean": 46.44968795776367, "margin_dpo/margin_std": 64.50286865234375, "step": 335 }, { "epoch": 0.5139833711262283, "fcm_dpo/beta": 0.012590940110385418, "fcm_dpo/delta": 0.003505054162815213, "fcm_dpo/margin": 38.454471588134766, "fcm_dpo/q_t": 0.39839255809783936, "grad_norm": 15.376999855041504, "learning_rate": 2.8295924627584004e-07, "logits/chosen": 0.6036761999130249, "logits/rejected": 0.5653026103973389, "logps/chosen": -155.42881774902344, "logps/ref_chosen": -61.1064567565918, "logps/ref_rejected": -76.71846008300781, "logps/rejected": -209.4952850341797, "loss": 1.1186, "margin_dpo/margin_mean": 38.454471588134766, "margin_dpo/margin_std": 66.48394012451172, "step": 340 }, { "epoch": 0.5215419501133787, "fcm_dpo/beta": 0.012363018468022346, "fcm_dpo/delta": -0.01725325547158718, "fcm_dpo/margin": 44.01114273071289, "fcm_dpo/q_t": 0.3849506974220276, "grad_norm": 15.500265121459961, "learning_rate": 2.7639509632351927e-07, "logits/chosen": 0.6400725841522217, "logits/rejected": 0.5962103009223938, "logps/chosen": -147.3646240234375, "logps/ref_chosen": -60.12370681762695, "logps/ref_rejected": -78.58574676513672, "logps/rejected": -209.83779907226562, "loss": 1.0666, "margin_dpo/margin_mean": 44.01114273071289, "margin_dpo/margin_std": 65.67613220214844, "step": 345 }, { "epoch": 0.5291005291005291, "fcm_dpo/beta": 0.012403665110468864, "fcm_dpo/delta": -0.03641422092914581, "fcm_dpo/margin": 49.11076354980469, "fcm_dpo/q_t": 0.3730853796005249, "grad_norm": 18.272642135620117, "learning_rate": 2.698124892141971e-07, "logits/chosen": 0.6958315372467041, "logits/rejected": 0.6265360116958618, "logps/chosen": -138.1358642578125, "logps/ref_chosen": -55.104461669921875, "logps/ref_rejected": -80.63292694091797, "logps/rejected": -212.77511596679688, "loss": 1.0253, "margin_dpo/margin_mean": 49.11076354980469, "margin_dpo/margin_std": 67.76351165771484, "step": 350 }, { "epoch": 0.5366591080876795, "fcm_dpo/beta": 0.012127144262194633, "fcm_dpo/delta": -0.010115819983184338, "fcm_dpo/margin": 50.23556137084961, "fcm_dpo/q_t": 0.37128472328186035, "grad_norm": 17.643238067626953, "learning_rate": 2.632160279321328e-07, "logits/chosen": 0.6702345609664917, "logits/rejected": 0.5912803411483765, "logps/chosen": -134.65940856933594, "logps/ref_chosen": -54.87224197387695, "logps/ref_rejected": -77.01316833496094, "logps/rejected": -207.03591918945312, "loss": 1.0338, "margin_dpo/margin_mean": 50.23556137084961, "margin_dpo/margin_std": 71.0433578491211, "step": 355 }, { "epoch": 0.54421768707483, "fcm_dpo/beta": 0.01166975311934948, "fcm_dpo/delta": -0.01579994522035122, "fcm_dpo/margin": 41.836509704589844, "fcm_dpo/q_t": 0.3966527581214905, "grad_norm": 15.070870399475098, "learning_rate": 2.5661032514931834e-07, "logits/chosen": 0.6302360892295837, "logits/rejected": 0.5863192677497864, "logps/chosen": -152.76345825195312, "logps/ref_chosen": -60.75285720825195, "logps/ref_rejected": -75.21507263183594, "logps/rejected": -209.0622100830078, "loss": 1.1188, "margin_dpo/margin_mean": 41.83650588989258, "margin_dpo/margin_std": 70.53252410888672, "step": 360 }, { "epoch": 0.5517762660619804, "fcm_dpo/beta": 0.01143195666372776, "fcm_dpo/delta": -0.052489638328552246, "fcm_dpo/margin": 56.774986267089844, "fcm_dpo/q_t": 0.3644375205039978, "grad_norm": 12.386019706726074, "learning_rate": 2.5e-07, "logits/chosen": 0.6857043504714966, "logits/rejected": 0.6063531637191772, "logps/chosen": -149.87673950195312, "logps/ref_chosen": -58.56513595581055, "logps/ref_rejected": -84.06403350830078, "logps/rejected": -232.150634765625, "loss": 0.9963, "margin_dpo/margin_mean": 56.774986267089844, "margin_dpo/margin_std": 74.50080871582031, "step": 365 }, { "epoch": 0.5593348450491308, "fcm_dpo/beta": 0.011315222829580307, "fcm_dpo/delta": 0.02159869484603405, "fcm_dpo/margin": 45.84794616699219, "fcm_dpo/q_t": 0.3889442980289459, "grad_norm": 15.18197250366211, "learning_rate": 2.4338967485068164e-07, "logits/chosen": 0.6318791508674622, "logits/rejected": 0.5831542611122131, "logps/chosen": -152.20651245117188, "logps/ref_chosen": -59.443138122558594, "logps/ref_rejected": -75.80937194824219, "logps/rejected": -214.42068481445312, "loss": 1.0915, "margin_dpo/margin_mean": 45.84794616699219, "margin_dpo/margin_std": 73.65330505371094, "step": 370 }, { "epoch": 0.5668934240362812, "fcm_dpo/beta": 0.011248277500271797, "fcm_dpo/delta": -0.027419675141572952, "fcm_dpo/margin": 46.06201171875, "fcm_dpo/q_t": 0.3906846046447754, "grad_norm": 16.094900131225586, "learning_rate": 2.3678397206786715e-07, "logits/chosen": 0.6881932616233826, "logits/rejected": 0.6348354816436768, "logps/chosen": -155.08950805664062, "logps/ref_chosen": -58.59185028076172, "logps/ref_rejected": -73.7529525756836, "logps/rejected": -216.31265258789062, "loss": 1.1143, "margin_dpo/margin_mean": 46.062007904052734, "margin_dpo/margin_std": 78.36566162109375, "step": 375 }, { "epoch": 0.5744520030234316, "fcm_dpo/beta": 0.010973427444696426, "fcm_dpo/delta": -0.023532114923000336, "fcm_dpo/margin": 51.64561080932617, "fcm_dpo/q_t": 0.3803391456604004, "grad_norm": 13.79555892944336, "learning_rate": 2.3018751078580283e-07, "logits/chosen": 0.7098981142044067, "logits/rejected": 0.6563004851341248, "logps/chosen": -153.05264282226562, "logps/ref_chosen": -58.93424606323242, "logps/ref_rejected": -76.27055358886719, "logps/rejected": -222.03457641601562, "loss": 1.0563, "margin_dpo/margin_mean": 51.64561080932617, "margin_dpo/margin_std": 76.09913635253906, "step": 380 }, { "epoch": 0.582010582010582, "fcm_dpo/beta": 0.010893596336245537, "fcm_dpo/delta": 0.006062631495296955, "fcm_dpo/margin": 44.309181213378906, "fcm_dpo/q_t": 0.39645999670028687, "grad_norm": 13.824441909790039, "learning_rate": 2.2360490367648084e-07, "logits/chosen": 0.6328981518745422, "logits/rejected": 0.606531023979187, "logps/chosen": -165.55270385742188, "logps/ref_chosen": -66.42684173583984, "logps/ref_rejected": -76.96304321289062, "logps/rejected": -220.3980712890625, "loss": 1.1059, "margin_dpo/margin_mean": 44.30918502807617, "margin_dpo/margin_std": 72.50572204589844, "step": 385 }, { "epoch": 0.5895691609977324, "fcm_dpo/beta": 0.011033998802304268, "fcm_dpo/delta": 0.017967429012060165, "fcm_dpo/margin": 46.52573013305664, "fcm_dpo/q_t": 0.39048102498054504, "grad_norm": 13.157139778137207, "learning_rate": 2.170407537241599e-07, "logits/chosen": 0.6664471626281738, "logits/rejected": 0.6082719564437866, "logps/chosen": -158.45370483398438, "logps/ref_chosen": -60.984214782714844, "logps/ref_rejected": -79.54056549072266, "logps/rejected": -223.53579711914062, "loss": 1.0801, "margin_dpo/margin_mean": 46.52573013305664, "margin_dpo/margin_std": 71.85535430908203, "step": 390 }, { "epoch": 0.5971277399848829, "fcm_dpo/beta": 0.010882696136832237, "fcm_dpo/delta": -0.01834883727133274, "fcm_dpo/margin": 53.6688117980957, "fcm_dpo/q_t": 0.37560978531837463, "grad_norm": 17.336837768554688, "learning_rate": 2.104996510066625e-07, "logits/chosen": 0.6918023824691772, "logits/rejected": 0.6310297846794128, "logps/chosen": -150.5190887451172, "logps/ref_chosen": -58.30937957763672, "logps/ref_rejected": -80.09587097167969, "logps/rejected": -225.9744110107422, "loss": 1.0295, "margin_dpo/margin_mean": 53.6688117980957, "margin_dpo/margin_std": 73.11488342285156, "step": 395 }, { "epoch": 0.6046863189720333, "fcm_dpo/beta": 0.010488608852028847, "fcm_dpo/delta": -0.05746689438819885, "fcm_dpo/margin": 49.87424850463867, "fcm_dpo/q_t": 0.38892143964767456, "grad_norm": 19.94718360900879, "learning_rate": 2.0398616948569493e-07, "logits/chosen": 0.6400595307350159, "logits/rejected": 0.5548309087753296, "logps/chosen": -163.4067840576172, "logps/ref_chosen": -61.39867401123047, "logps/ref_rejected": -89.0177993774414, "logps/rejected": -240.90017700195312, "loss": 1.0738, "margin_dpo/margin_mean": 49.87424850463867, "margin_dpo/margin_std": 74.35093688964844, "step": 400 }, { "epoch": 0.6046863189720333, "eval_fcm_dpo/beta": 0.010275867767632008, "eval_logits/chosen": 0.6246050596237183, "eval_logits/rejected": 0.5774987936019897, "eval_logps/chosen": -176.5742950439453, "eval_logps/ref_chosen": -74.85946655273438, "eval_logps/ref_rejected": -79.54898834228516, "eval_logps/rejected": -226.17611694335938, "eval_loss": 0.5613793134689331, "eval_margin_dpo/margin_mean": 44.91230392456055, "eval_margin_dpo/margin_std": 77.27910614013672, "eval_runtime": 39.1906, "eval_samples_per_second": 58.764, "eval_steps_per_second": 1.837, "step": 400 }, { "epoch": 0.6122448979591837, "fcm_dpo/beta": 0.010087807662785053, "fcm_dpo/delta": -0.038665831089019775, "fcm_dpo/margin": 55.62682342529297, "fcm_dpo/q_t": 0.3804628252983093, "grad_norm": 13.084096908569336, "learning_rate": 1.975048638084379e-07, "logits/chosen": 0.6958111524581909, "logits/rejected": 0.6256667971611023, "logps/chosen": -153.22628784179688, "logps/ref_chosen": -55.953521728515625, "logps/ref_rejected": -77.67539978027344, "logps/rejected": -230.57498168945312, "loss": 1.0364, "margin_dpo/margin_mean": 55.62682342529297, "margin_dpo/margin_std": 75.48118591308594, "step": 405 }, { "epoch": 0.6198034769463341, "fcm_dpo/beta": 0.009773796424269676, "fcm_dpo/delta": -0.03128813952207565, "fcm_dpo/margin": 55.161216735839844, "fcm_dpo/q_t": 0.38234299421310425, "grad_norm": 12.041950225830078, "learning_rate": 1.9106026612264315e-07, "logits/chosen": 0.6618553996086121, "logits/rejected": 0.5954070687294006, "logps/chosen": -169.55258178710938, "logps/ref_chosen": -63.40419387817383, "logps/ref_rejected": -80.85710144042969, "logps/rejected": -242.1666717529297, "loss": 1.0439, "margin_dpo/margin_mean": 55.161216735839844, "margin_dpo/margin_std": 74.25384521484375, "step": 410 }, { "epoch": 0.6273620559334845, "fcm_dpo/beta": 0.009798675775527954, "fcm_dpo/delta": 0.039564795792102814, "fcm_dpo/margin": 52.46839141845703, "fcm_dpo/q_t": 0.38860344886779785, "grad_norm": 13.28818416595459, "learning_rate": 1.846568829074628e-07, "logits/chosen": 0.6661458015441895, "logits/rejected": 0.6173139214515686, "logps/chosen": -174.634765625, "logps/ref_chosen": -57.6942024230957, "logps/ref_rejected": -71.74036407470703, "logps/rejected": -241.14932250976562, "loss": 1.0677, "margin_dpo/margin_mean": 52.46839141845703, "margin_dpo/margin_std": 77.23680114746094, "step": 415 }, { "epoch": 0.6349206349206349, "fcm_dpo/beta": 0.009716962464153767, "fcm_dpo/delta": -0.0343676395714283, "fcm_dpo/margin": 47.64646911621094, "fcm_dpo/q_t": 0.4010258615016937, "grad_norm": 15.641878128051758, "learning_rate": 1.782991918222275e-07, "logits/chosen": 0.6854730248451233, "logits/rejected": 0.6435775756835938, "logps/chosen": -182.29537963867188, "logps/ref_chosen": -59.169517517089844, "logps/ref_rejected": -69.47721099853516, "logps/rejected": -240.24954223632812, "loss": 1.1303, "margin_dpo/margin_mean": 47.64646911621094, "margin_dpo/margin_std": 82.67323303222656, "step": 420 }, { "epoch": 0.6424792139077853, "fcm_dpo/beta": 0.009624272584915161, "fcm_dpo/delta": 0.003170407610014081, "fcm_dpo/margin": 58.16600799560547, "fcm_dpo/q_t": 0.3807678818702698, "grad_norm": 14.270646095275879, "learning_rate": 1.7199163857537824e-07, "logits/chosen": 0.6730635166168213, "logits/rejected": 0.6302940845489502, "logps/chosen": -172.57290649414062, "logps/ref_chosen": -58.09320831298828, "logps/ref_rejected": -73.98226165771484, "logps/rejected": -246.6279754638672, "loss": 1.0478, "margin_dpo/margin_mean": 58.16600799560547, "margin_dpo/margin_std": 82.8730239868164, "step": 425 }, { "epoch": 0.6500377928949358, "fcm_dpo/beta": 0.009675036184489727, "fcm_dpo/delta": -0.009549234993755817, "fcm_dpo/margin": 51.29081344604492, "fcm_dpo/q_t": 0.39365965127944946, "grad_norm": 14.406279563903809, "learning_rate": 1.6573863381573954e-07, "logits/chosen": 0.6383100748062134, "logits/rejected": 0.6207207441329956, "logps/chosen": -191.18653869628906, "logps/ref_chosen": -62.7039909362793, "logps/ref_rejected": -74.52284240722656, "logps/rejected": -254.29623413085938, "loss": 1.112, "margin_dpo/margin_mean": 51.29081344604492, "margin_dpo/margin_std": 86.48429870605469, "step": 430 }, { "epoch": 0.6575963718820862, "fcm_dpo/beta": 0.009738308377563953, "fcm_dpo/delta": 0.005447807256132364, "fcm_dpo/margin": 59.08086013793945, "fcm_dpo/q_t": 0.3763192296028137, "grad_norm": 16.807050704956055, "learning_rate": 1.5954455004830878e-07, "logits/chosen": 0.7440453171730042, "logits/rejected": 0.7008776664733887, "logps/chosen": -169.50389099121094, "logps/ref_chosen": -56.12516403198242, "logps/ref_rejected": -74.36073303222656, "logps/rejected": -246.8203125, "loss": 1.0417, "margin_dpo/margin_mean": 59.08086013793945, "margin_dpo/margin_std": 83.0219955444336, "step": 435 }, { "epoch": 0.6651549508692366, "fcm_dpo/beta": 0.009510443545877934, "fcm_dpo/delta": -0.04184270650148392, "fcm_dpo/margin": 58.687843322753906, "fcm_dpo/q_t": 0.3826465308666229, "grad_norm": 22.634798049926758, "learning_rate": 1.534137185767178e-07, "logits/chosen": 0.6856757998466492, "logits/rejected": 0.6076371073722839, "logps/chosen": -172.70729064941406, "logps/ref_chosen": -55.67548751831055, "logps/ref_rejected": -76.62055206298828, "logps/rejected": -252.34017944335938, "loss": 1.0688, "margin_dpo/margin_mean": 58.687843322753906, "margin_dpo/margin_std": 89.2109146118164, "step": 440 }, { "epoch": 0.672713529856387, "fcm_dpo/beta": 0.008997871540486813, "fcm_dpo/delta": -0.03983866050839424, "fcm_dpo/margin": 64.14662170410156, "fcm_dpo/q_t": 0.3751087486743927, "grad_norm": 14.766826629638672, "learning_rate": 1.473504264745062e-07, "logits/chosen": 0.6673511266708374, "logits/rejected": 0.6046266555786133, "logps/chosen": -180.44454956054688, "logps/ref_chosen": -59.903411865234375, "logps/ref_rejected": -82.02873229980469, "logps/rejected": -266.71649169921875, "loss": 1.023, "margin_dpo/margin_mean": 64.14662170410156, "margin_dpo/margin_std": 84.01716613769531, "step": 445 }, { "epoch": 0.6802721088435374, "fcm_dpo/beta": 0.008571968413889408, "fcm_dpo/delta": -0.029057633131742477, "fcm_dpo/margin": 60.95649337768555, "fcm_dpo/q_t": 0.3861503601074219, "grad_norm": 15.082186698913574, "learning_rate": 1.4135891358732205e-07, "logits/chosen": 0.670871376991272, "logits/rejected": 0.6003819704055786, "logps/chosen": -176.04681396484375, "logps/ref_chosen": -55.83526611328125, "logps/ref_rejected": -79.63658142089844, "logps/rejected": -260.80462646484375, "loss": 1.0575, "margin_dpo/margin_mean": 60.95649337768555, "margin_dpo/margin_std": 84.30404663085938, "step": 450 }, { "epoch": 0.6878306878306878, "fcm_dpo/beta": 0.008977680467069149, "fcm_dpo/delta": 0.06819285452365875, "fcm_dpo/margin": 57.031578063964844, "fcm_dpo/q_t": 0.39055323600769043, "grad_norm": 14.447818756103516, "learning_rate": 1.354433695681474e-07, "logits/chosen": 0.6231824159622192, "logits/rejected": 0.5777461528778076, "logps/chosen": -178.04124450683594, "logps/ref_chosen": -60.59226608276367, "logps/ref_rejected": -73.37936401367188, "logps/rejected": -247.8599395751953, "loss": 1.0795, "margin_dpo/margin_mean": 57.031578063964844, "margin_dpo/margin_std": 88.28900146484375, "step": 455 }, { "epoch": 0.6953892668178382, "fcm_dpo/beta": 0.009271183051168919, "fcm_dpo/delta": -0.02087727189064026, "fcm_dpo/margin": 65.31262969970703, "fcm_dpo/q_t": 0.3707457184791565, "grad_norm": 13.688095092773438, "learning_rate": 1.2960793094762345e-07, "logits/chosen": 0.6910965442657471, "logits/rejected": 0.6071202158927917, "logps/chosen": -175.61166381835938, "logps/ref_chosen": -56.21283721923828, "logps/ref_rejected": -83.02075958251953, "logps/rejected": -267.73223876953125, "loss": 1.0119, "margin_dpo/margin_mean": 65.31262969970703, "margin_dpo/margin_std": 85.2598648071289, "step": 460 }, { "epoch": 0.7029478458049887, "fcm_dpo/beta": 0.008703077211976051, "fcm_dpo/delta": -0.03589098900556564, "fcm_dpo/margin": 65.30760192871094, "fcm_dpo/q_t": 0.37661120295524597, "grad_norm": 15.768505096435547, "learning_rate": 1.238566782415197e-07, "logits/chosen": 0.7279990315437317, "logits/rejected": 0.6809935569763184, "logps/chosen": -180.1580352783203, "logps/ref_chosen": -59.0674934387207, "logps/ref_rejected": -74.53498840332031, "logps/rejected": -260.93310546875, "loss": 1.0318, "margin_dpo/margin_mean": 65.30760192871094, "margin_dpo/margin_std": 86.46934509277344, "step": 465 }, { "epoch": 0.7105064247921391, "fcm_dpo/beta": 0.008754456415772438, "fcm_dpo/delta": 0.00041560232057236135, "fcm_dpo/margin": 60.178565979003906, "fcm_dpo/q_t": 0.3857038617134094, "grad_norm": 13.696944236755371, "learning_rate": 1.1819363309737438e-07, "logits/chosen": 0.7032414674758911, "logits/rejected": 0.6553713083267212, "logps/chosen": -180.98046875, "logps/ref_chosen": -58.3397331237793, "logps/ref_rejected": -74.33660125732422, "logps/rejected": -257.1558837890625, "loss": 1.0557, "margin_dpo/margin_mean": 60.178558349609375, "margin_dpo/margin_std": 85.16178131103516, "step": 470 }, { "epoch": 0.7180650037792895, "fcm_dpo/beta": 0.008638769388198853, "fcm_dpo/delta": -0.022516410797834396, "fcm_dpo/margin": 68.27113342285156, "fcm_dpo/q_t": 0.3759641647338867, "grad_norm": 10.956338882446289, "learning_rate": 1.126227554822985e-07, "logits/chosen": 0.7186409831047058, "logits/rejected": 0.6497506499290466, "logps/chosen": -168.41812133789062, "logps/ref_chosen": -54.60407638549805, "logps/ref_rejected": -79.94635009765625, "logps/rejected": -262.03155517578125, "loss": 1.0416, "margin_dpo/margin_mean": 68.27113342285156, "margin_dpo/margin_std": 97.92758178710938, "step": 475 }, { "epoch": 0.7256235827664399, "fcm_dpo/beta": 0.00875013880431652, "fcm_dpo/delta": 0.02565288171172142, "fcm_dpo/margin": 54.354835510253906, "fcm_dpo/q_t": 0.39852726459503174, "grad_norm": 17.79652976989746, "learning_rate": 1.0714794091391072e-07, "logits/chosen": 0.6550045013427734, "logits/rejected": 0.6366511583328247, "logps/chosen": -192.35037231445312, "logps/ref_chosen": -63.0672492980957, "logps/ref_rejected": -68.59602355957031, "logps/rejected": -252.2339324951172, "loss": 1.1297, "margin_dpo/margin_mean": 54.354835510253906, "margin_dpo/margin_std": 96.1163101196289, "step": 480 }, { "epoch": 0.7331821617535903, "fcm_dpo/beta": 0.008799830451607704, "fcm_dpo/delta": 0.002256347332149744, "fcm_dpo/margin": 59.921966552734375, "fcm_dpo/q_t": 0.3874397277832031, "grad_norm": 14.530784606933594, "learning_rate": 1.0177301773633992e-07, "logits/chosen": 0.7001765370368958, "logits/rejected": 0.6357440948486328, "logps/chosen": -185.18301391601562, "logps/ref_chosen": -58.75799560546875, "logps/ref_rejected": -79.72233581542969, "logps/rejected": -266.0693054199219, "loss": 1.0675, "margin_dpo/margin_mean": 59.921974182128906, "margin_dpo/margin_std": 89.0008316040039, "step": 485 }, { "epoch": 0.7407407407407407, "fcm_dpo/beta": 0.00887683592736721, "fcm_dpo/delta": 0.020326469093561172, "fcm_dpo/margin": 53.544097900390625, "fcm_dpo/q_t": 0.4002537131309509, "grad_norm": 14.441047668457031, "learning_rate": 9.650174444319956e-08, "logits/chosen": 0.7496536374092102, "logits/rejected": 0.6850180625915527, "logps/chosen": -195.7850799560547, "logps/ref_chosen": -61.394195556640625, "logps/ref_rejected": -81.1914291381836, "logps/rejected": -269.12640380859375, "loss": 1.1317, "margin_dpo/margin_mean": 53.544097900390625, "margin_dpo/margin_std": 96.80680847167969, "step": 490 }, { "epoch": 0.7482993197278912, "fcm_dpo/beta": 0.008899571374058723, "fcm_dpo/delta": -0.002458928618580103, "fcm_dpo/margin": 53.95166778564453, "fcm_dpo/q_t": 0.39891940355300903, "grad_norm": 18.731355667114258, "learning_rate": 9.133780704940594e-08, "logits/chosen": 0.6664924621582031, "logits/rejected": 0.600567102432251, "logps/chosen": -189.1266632080078, "logps/ref_chosen": -59.85382843017578, "logps/ref_rejected": -80.63748931884766, "logps/rejected": -263.86199951171875, "loss": 1.1266, "margin_dpo/margin_mean": 53.95166778564453, "margin_dpo/margin_std": 95.11207580566406, "step": 495 }, { "epoch": 0.7558578987150416, "fcm_dpo/beta": 0.008871853351593018, "fcm_dpo/delta": 0.011157815344631672, "fcm_dpo/margin": 64.1400375366211, "fcm_dpo/q_t": 0.3798808455467224, "grad_norm": 18.928569793701172, "learning_rate": 8.628481651367875e-08, "logits/chosen": 0.6870494484901428, "logits/rejected": 0.6374934911727905, "logps/chosen": -194.50869750976562, "logps/ref_chosen": -66.17753601074219, "logps/ref_rejected": -83.75955200195312, "logps/rejected": -276.23077392578125, "loss": 1.0621, "margin_dpo/margin_mean": 64.1400375366211, "margin_dpo/margin_std": 97.7823257446289, "step": 500 }, { "epoch": 0.763416477702192, "fcm_dpo/beta": 0.009083734825253487, "fcm_dpo/delta": -0.0031442195177078247, "fcm_dpo/margin": 59.1200065612793, "fcm_dpo/q_t": 0.3837282359600067, "grad_norm": 16.126550674438477, "learning_rate": 8.134630621352483e-08, "logits/chosen": 0.6756311655044556, "logits/rejected": 0.655124843120575, "logps/chosen": -185.13583374023438, "logps/ref_chosen": -62.11005401611328, "logps/ref_rejected": -74.64705657958984, "logps/rejected": -256.7928466796875, "loss": 1.0546, "margin_dpo/margin_mean": 59.1200065612793, "margin_dpo/margin_std": 83.1792984008789, "step": 505 }, { "epoch": 0.7709750566893424, "fcm_dpo/beta": 0.008973537012934685, "fcm_dpo/delta": -0.01379710715264082, "fcm_dpo/margin": 61.247772216796875, "fcm_dpo/q_t": 0.38576018810272217, "grad_norm": 15.6004056930542, "learning_rate": 7.652572947447272e-08, "logits/chosen": 0.6822357177734375, "logits/rejected": 0.6185404658317566, "logps/chosen": -192.7943572998047, "logps/ref_chosen": -64.42265319824219, "logps/ref_rejected": -87.00096130371094, "logps/rejected": -276.62042236328125, "loss": 1.0879, "margin_dpo/margin_mean": 61.247772216796875, "margin_dpo/margin_std": 99.09281158447266, "step": 510 }, { "epoch": 0.7785336356764928, "fcm_dpo/beta": 0.008877582848072052, "fcm_dpo/delta": 0.016705047339200974, "fcm_dpo/margin": 63.62353515625, "fcm_dpo/q_t": 0.37913957238197327, "grad_norm": 17.3339900970459, "learning_rate": 7.182645715528435e-08, "logits/chosen": 0.7022300958633423, "logits/rejected": 0.6536498665809631, "logps/chosen": -183.25079345703125, "logps/ref_chosen": -58.284393310546875, "logps/ref_rejected": -79.09356689453125, "logps/rejected": -267.6835021972656, "loss": 1.0458, "margin_dpo/margin_mean": 63.62353515625, "margin_dpo/margin_std": 90.51985168457031, "step": 515 }, { "epoch": 0.7860922146636432, "fcm_dpo/beta": 0.009218396618962288, "fcm_dpo/delta": 0.03311960771679878, "fcm_dpo/margin": 47.081138610839844, "fcm_dpo/q_t": 0.40798163414001465, "grad_norm": 17.232772827148438, "learning_rate": 6.725177529083209e-08, "logits/chosen": 0.7172441482543945, "logits/rejected": 0.6776331067085266, "logps/chosen": -190.6273956298828, "logps/ref_chosen": -61.03638458251953, "logps/ref_rejected": -72.15824890136719, "logps/rejected": -248.83041381835938, "loss": 1.1669, "margin_dpo/margin_mean": 47.081138610839844, "margin_dpo/margin_std": 92.7890396118164, "step": 520 }, { "epoch": 0.7936507936507936, "fcm_dpo/beta": 0.009218786843121052, "fcm_dpo/delta": -0.009822583757340908, "fcm_dpo/margin": 54.7793083190918, "fcm_dpo/q_t": 0.39144212007522583, "grad_norm": 19.94845199584961, "learning_rate": 6.280488279429185e-08, "logits/chosen": 0.6219805479049683, "logits/rejected": 0.5724949240684509, "logps/chosen": -194.5631103515625, "logps/ref_chosen": -68.02732849121094, "logps/ref_rejected": -85.41429901123047, "logps/rejected": -266.7294006347656, "loss": 1.1074, "margin_dpo/margin_mean": 54.77930450439453, "margin_dpo/margin_std": 92.09957122802734, "step": 525 }, { "epoch": 0.8012093726379441, "fcm_dpo/beta": 0.009130226448178291, "fcm_dpo/delta": -0.02196129783987999, "fcm_dpo/margin": 50.85799026489258, "fcm_dpo/q_t": 0.40008097887039185, "grad_norm": 18.58346939086914, "learning_rate": 5.848888922025552e-08, "logits/chosen": 0.6770039796829224, "logits/rejected": 0.6091686487197876, "logps/chosen": -189.2750701904297, "logps/ref_chosen": -58.67436599731445, "logps/ref_rejected": -79.38807678222656, "logps/rejected": -260.8467712402344, "loss": 1.1307, "margin_dpo/margin_mean": 50.857994079589844, "margin_dpo/margin_std": 87.7162857055664, "step": 530 }, { "epoch": 0.8087679516250945, "fcm_dpo/beta": 0.0091576362028718, "fcm_dpo/delta": 0.029367243871092796, "fcm_dpo/margin": 56.45143508911133, "fcm_dpo/q_t": 0.39041271805763245, "grad_norm": 18.556217193603516, "learning_rate": 5.430681259032957e-08, "logits/chosen": 0.6739664077758789, "logits/rejected": 0.6132084727287292, "logps/chosen": -183.68429565429688, "logps/ref_chosen": -57.640098571777344, "logps/ref_rejected": -77.25399780273438, "logps/rejected": -259.7496643066406, "loss": 1.0926, "margin_dpo/margin_mean": 56.451438903808594, "margin_dpo/margin_std": 91.31838989257812, "step": 535 }, { "epoch": 0.8163265306122449, "fcm_dpo/beta": 0.009130528196692467, "fcm_dpo/delta": -0.028515305370092392, "fcm_dpo/margin": 64.78633117675781, "fcm_dpo/q_t": 0.3742666244506836, "grad_norm": 15.093501091003418, "learning_rate": 5.026157728273966e-08, "logits/chosen": 0.7153981924057007, "logits/rejected": 0.6350281238555908, "logps/chosen": -190.83726501464844, "logps/ref_chosen": -60.17341995239258, "logps/ref_rejected": -85.50316619873047, "logps/rejected": -280.9533386230469, "loss": 1.0162, "margin_dpo/margin_mean": 64.78633117675781, "margin_dpo/margin_std": 85.54998779296875, "step": 540 }, { "epoch": 0.8238851095993953, "fcm_dpo/beta": 0.008908280171453953, "fcm_dpo/delta": -0.029253508895635605, "fcm_dpo/margin": 66.00330352783203, "fcm_dpo/q_t": 0.37347084283828735, "grad_norm": 16.25635528564453, "learning_rate": 4.635601198741607e-08, "logits/chosen": 0.7022188305854797, "logits/rejected": 0.6508811116218567, "logps/chosen": -179.33633422851562, "logps/ref_chosen": -56.985809326171875, "logps/ref_rejected": -73.21353912353516, "logps/rejected": -261.56732177734375, "loss": 1.029, "margin_dpo/margin_mean": 66.00330352783203, "margin_dpo/margin_std": 89.81163787841797, "step": 545 }, { "epoch": 0.8314436885865457, "fcm_dpo/beta": 0.008911579847335815, "fcm_dpo/delta": 0.03279992565512657, "fcm_dpo/margin": 52.2221794128418, "fcm_dpo/q_t": 0.3997356593608856, "grad_norm": 16.76923179626465, "learning_rate": 4.259284772799099e-08, "logits/chosen": 0.6925860643386841, "logits/rejected": 0.6450766324996948, "logps/chosen": -189.15829467773438, "logps/ref_chosen": -59.600929260253906, "logps/ref_rejected": -75.24870300292969, "logps/rejected": -257.02825927734375, "loss": 1.1275, "margin_dpo/margin_mean": 52.2221794128418, "margin_dpo/margin_std": 90.92387390136719, "step": 550 }, { "epoch": 0.8390022675736961, "fcm_dpo/beta": 0.009163258597254753, "fcm_dpo/delta": 0.0038489706348627806, "fcm_dpo/margin": 56.18461990356445, "fcm_dpo/q_t": 0.38901767134666443, "grad_norm": 16.93880271911621, "learning_rate": 3.89747159520904e-08, "logits/chosen": 0.6867181062698364, "logits/rejected": 0.637639045715332, "logps/chosen": -197.40147399902344, "logps/ref_chosen": -63.578895568847656, "logps/ref_rejected": -78.87867736816406, "logps/rejected": -268.8858947753906, "loss": 1.0913, "margin_dpo/margin_mean": 56.18461990356445, "margin_dpo/margin_std": 90.01570129394531, "step": 555 }, { "epoch": 0.8465608465608465, "fcm_dpo/beta": 0.008957808837294579, "fcm_dpo/delta": -0.0419851616024971, "fcm_dpo/margin": 64.90444946289062, "fcm_dpo/q_t": 0.3777204155921936, "grad_norm": 16.868953704833984, "learning_rate": 3.550414669125573e-08, "logits/chosen": 0.7213538289070129, "logits/rejected": 0.6617583632469177, "logps/chosen": -192.423828125, "logps/ref_chosen": -58.651512145996094, "logps/ref_rejected": -78.67181396484375, "logps/rejected": -277.34857177734375, "loss": 1.0363, "margin_dpo/margin_mean": 64.90444946289062, "margin_dpo/margin_std": 90.19843292236328, "step": 560 }, { "epoch": 0.854119425547997, "fcm_dpo/beta": 0.008735055103898048, "fcm_dpo/delta": -0.009981656447052956, "fcm_dpo/margin": 67.18910217285156, "fcm_dpo/q_t": 0.3747270703315735, "grad_norm": 14.736648559570312, "learning_rate": 3.218356679178252e-08, "logits/chosen": 0.7032049298286438, "logits/rejected": 0.6600871682167053, "logps/chosen": -187.43948364257812, "logps/ref_chosen": -60.3114128112793, "logps/ref_rejected": -78.25270080566406, "logps/rejected": -272.56982421875, "loss": 1.028, "margin_dpo/margin_mean": 67.18910217285156, "margin_dpo/margin_std": 91.42976379394531, "step": 565 }, { "epoch": 0.8616780045351474, "fcm_dpo/beta": 0.008505801670253277, "fcm_dpo/delta": -0.04424377158284187, "fcm_dpo/margin": 65.48918914794922, "fcm_dpo/q_t": 0.3805517554283142, "grad_norm": 12.924057006835938, "learning_rate": 2.9015298217712453e-08, "logits/chosen": 0.6734435558319092, "logits/rejected": 0.6112924814224243, "logps/chosen": -184.1428680419922, "logps/ref_chosen": -57.752410888671875, "logps/ref_rejected": -76.99858093261719, "logps/rejected": -268.8782043457031, "loss": 1.049, "margin_dpo/margin_mean": 65.48918914794922, "margin_dpo/margin_std": 91.4268798828125, "step": 570 }, { "epoch": 0.8692365835222978, "fcm_dpo/beta": 0.008431388065218925, "fcm_dpo/delta": 0.01765955612063408, "fcm_dpo/margin": 56.814170837402344, "fcm_dpo/q_t": 0.39621537923812866, "grad_norm": 14.525823593139648, "learning_rate": 2.600155642716606e-08, "logits/chosen": 0.701653242111206, "logits/rejected": 0.66460120677948, "logps/chosen": -196.06419372558594, "logps/ref_chosen": -63.61958694458008, "logps/ref_rejected": -79.51353454589844, "logps/rejected": -268.7723388671875, "loss": 1.101, "margin_dpo/margin_mean": 56.814170837402344, "margin_dpo/margin_std": 90.25949096679688, "step": 575 }, { "epoch": 0.8767951625094482, "fcm_dpo/beta": 0.008416803553700447, "fcm_dpo/delta": 0.0003098793386016041, "fcm_dpo/margin": 62.08296585083008, "fcm_dpo/q_t": 0.38741156458854675, "grad_norm": 15.001502990722656, "learning_rate": 2.3144448823151392e-08, "logits/chosen": 0.6825278997421265, "logits/rejected": 0.6386707425117493, "logps/chosen": -182.51686096191406, "logps/ref_chosen": -57.3541145324707, "logps/ref_rejected": -73.14434051513672, "logps/rejected": -260.3900451660156, "loss": 1.0717, "margin_dpo/margin_mean": 62.08296585083008, "margin_dpo/margin_std": 92.49629211425781, "step": 580 }, { "epoch": 0.8843537414965986, "fcm_dpo/beta": 0.008522504940629005, "fcm_dpo/delta": 0.025987576693296432, "fcm_dpo/margin": 57.36644744873047, "fcm_dpo/q_t": 0.39568692445755005, "grad_norm": 14.261137962341309, "learning_rate": 2.044597327993153e-08, "logits/chosen": 0.7333514094352722, "logits/rejected": 0.6711838245391846, "logps/chosen": -186.538330078125, "logps/ref_chosen": -56.0127067565918, "logps/ref_rejected": -77.16522216796875, "logps/rejected": -265.05731201171875, "loss": 1.1076, "margin_dpo/margin_mean": 57.36644744873047, "margin_dpo/margin_std": 95.25784301757812, "step": 585 }, { "epoch": 0.891912320483749, "fcm_dpo/beta": 0.008789935149252415, "fcm_dpo/delta": 0.02721945382654667, "fcm_dpo/margin": 63.61796188354492, "fcm_dpo/q_t": 0.3782525360584259, "grad_norm": 15.488899230957031, "learning_rate": 1.7908016745981856e-08, "logits/chosen": 0.717170774936676, "logits/rejected": 0.6842165589332581, "logps/chosen": -190.9798126220703, "logps/ref_chosen": -60.5894660949707, "logps/ref_rejected": -74.34771728515625, "logps/rejected": -268.35601806640625, "loss": 1.0465, "margin_dpo/margin_mean": 63.61796188354492, "margin_dpo/margin_std": 90.69212341308594, "step": 590 }, { "epoch": 0.8994708994708994, "fcm_dpo/beta": 0.008735476061701775, "fcm_dpo/delta": -0.010337557643651962, "fcm_dpo/margin": 67.07391357421875, "fcm_dpo/q_t": 0.3761887550354004, "grad_norm": 14.772221565246582, "learning_rate": 1.553235392451377e-08, "logits/chosen": 0.7367411255836487, "logits/rejected": 0.6589676141738892, "logps/chosen": -176.36756896972656, "logps/ref_chosen": -54.77838897705078, "logps/ref_rejected": -78.102783203125, "logps/rejected": -266.765869140625, "loss": 1.0433, "margin_dpo/margin_mean": 67.07390594482422, "margin_dpo/margin_std": 95.8719482421875, "step": 595 }, { "epoch": 0.9070294784580499, "fcm_dpo/beta": 0.008924348279833794, "fcm_dpo/delta": 0.013378431089222431, "fcm_dpo/margin": 45.238609313964844, "fcm_dpo/q_t": 0.41461238265037537, "grad_norm": 18.25551986694336, "learning_rate": 1.3320646032487393e-08, "logits/chosen": 0.7176660299301147, "logits/rejected": 0.6818990111351013, "logps/chosen": -193.6077880859375, "logps/ref_chosen": -58.45500564575195, "logps/ref_rejected": -70.7367172241211, "logps/rejected": -251.1281280517578, "loss": 1.1653, "margin_dpo/margin_mean": 45.238609313964844, "margin_dpo/margin_std": 87.94874572753906, "step": 600 }, { "epoch": 0.9070294784580499, "eval_fcm_dpo/beta": 0.008908301591873169, "eval_logits/chosen": 0.7206099629402161, "eval_logits/rejected": 0.6751406192779541, "eval_logps/chosen": -205.21878051757812, "eval_logps/ref_chosen": -74.85946655273438, "eval_logps/ref_rejected": -79.54898834228516, "eval_logps/rejected": -265.1909484863281, "eval_loss": 0.5591413378715515, "eval_margin_dpo/margin_mean": 55.28264236450195, "eval_margin_dpo/margin_std": 95.356689453125, "eval_runtime": 39.0896, "eval_samples_per_second": 58.916, "eval_steps_per_second": 1.842, "step": 600 }, { "epoch": 0.9145880574452003, "fcm_dpo/beta": 0.00893603079020977, "fcm_dpo/delta": 0.01746518909931183, "fcm_dpo/margin": 63.60172653198242, "fcm_dpo/q_t": 0.37951910495758057, "grad_norm": 16.666015625, "learning_rate": 1.1274439638981532e-08, "logits/chosen": 0.7014644742012024, "logits/rejected": 0.6582333445549011, "logps/chosen": -179.68258666992188, "logps/ref_chosen": -59.87483596801758, "logps/ref_rejected": -75.75318908691406, "logps/rejected": -259.16265869140625, "loss": 1.0544, "margin_dpo/margin_mean": 63.60172653198242, "margin_dpo/margin_std": 93.95574951171875, "step": 605 }, { "epoch": 0.9221466364323507, "fcm_dpo/beta": 0.008915998041629791, "fcm_dpo/delta": -0.01860709860920906, "fcm_dpo/margin": 66.0380859375, "fcm_dpo/q_t": 0.37500935792922974, "grad_norm": 14.762502670288086, "learning_rate": 9.395165583732379e-09, "logits/chosen": 0.706168532371521, "logits/rejected": 0.6614619493484497, "logps/chosen": -186.66229248046875, "logps/ref_chosen": -60.35883712768555, "logps/ref_rejected": -81.3543930053711, "logps/rejected": -273.6959228515625, "loss": 1.0278, "margin_dpo/margin_mean": 66.03807830810547, "margin_dpo/margin_std": 89.66718292236328, "step": 610 }, { "epoch": 0.9297052154195011, "fcm_dpo/beta": 0.008840186521410942, "fcm_dpo/delta": -0.026694372296333313, "fcm_dpo/margin": 66.6244125366211, "fcm_dpo/q_t": 0.3726271688938141, "grad_norm": 15.342317581176758, "learning_rate": 7.684137976598088e-09, "logits/chosen": 0.6743055582046509, "logits/rejected": 0.6356549859046936, "logps/chosen": -185.5474395751953, "logps/ref_chosen": -59.17219161987305, "logps/ref_rejected": -79.92167663574219, "logps/rejected": -272.9213562011719, "loss": 1.0255, "margin_dpo/margin_mean": 66.62440490722656, "margin_dpo/margin_std": 89.74223327636719, "step": 615 }, { "epoch": 0.9372637944066515, "fcm_dpo/beta": 0.00864451751112938, "fcm_dpo/delta": -0.011452676728367805, "fcm_dpo/margin": 58.722190856933594, "fcm_dpo/q_t": 0.39084118604660034, "grad_norm": 17.209096908569336, "learning_rate": 6.142553278648238e-09, "logits/chosen": 0.7176477313041687, "logits/rejected": 0.6551352739334106, "logps/chosen": -185.00552368164062, "logps/ref_chosen": -58.052696228027344, "logps/ref_rejected": -78.37252807617188, "logps/rejected": -264.04754638671875, "loss": 1.0716, "margin_dpo/margin_mean": 58.722190856933594, "margin_dpo/margin_std": 86.17276763916016, "step": 620 }, { "epoch": 0.9448223733938019, "fcm_dpo/beta": 0.008548585698008537, "fcm_dpo/delta": -0.0001246035099029541, "fcm_dpo/margin": 60.6451530456543, "fcm_dpo/q_t": 0.3894796669483185, "grad_norm": 14.61771011352539, "learning_rate": 4.7714894655209174e-09, "logits/chosen": 0.708177924156189, "logits/rejected": 0.6277607679367065, "logps/chosen": -186.0558624267578, "logps/ref_chosen": -56.957862854003906, "logps/ref_rejected": -82.68255615234375, "logps/rejected": -272.4256896972656, "loss": 1.0748, "margin_dpo/margin_mean": 60.6451530456543, "margin_dpo/margin_std": 92.42916870117188, "step": 625 }, { "epoch": 0.9523809523809523, "fcm_dpo/beta": 0.008414940908551216, "fcm_dpo/delta": -0.02683025598526001, "fcm_dpo/margin": 70.0490951538086, "fcm_dpo/q_t": 0.3737347424030304, "grad_norm": 11.857268333435059, "learning_rate": 3.5719052736323806e-09, "logits/chosen": 0.7122366428375244, "logits/rejected": 0.6418401598930359, "logps/chosen": -179.7769012451172, "logps/ref_chosen": -56.71510696411133, "logps/ref_rejected": -82.94544219970703, "logps/rejected": -276.05633544921875, "loss": 1.0285, "margin_dpo/margin_mean": 70.0490951538086, "margin_dpo/margin_std": 94.82304382324219, "step": 630 }, { "epoch": 0.9599395313681028, "fcm_dpo/beta": 0.008062823675572872, "fcm_dpo/delta": -0.03275930508971214, "fcm_dpo/margin": 59.115440368652344, "fcm_dpo/q_t": 0.3975747227668762, "grad_norm": 13.964070320129395, "learning_rate": 2.5446395297668287e-09, "logits/chosen": 0.6974132061004639, "logits/rejected": 0.6502237319946289, "logps/chosen": -186.69309997558594, "logps/ref_chosen": -59.33793258666992, "logps/ref_rejected": -75.01703643798828, "logps/rejected": -261.4876403808594, "loss": 1.1043, "margin_dpo/margin_mean": 59.115440368652344, "margin_dpo/margin_std": 93.75061798095703, "step": 635 }, { "epoch": 0.9674981103552532, "fcm_dpo/beta": 0.008327251300215721, "fcm_dpo/delta": 0.06021968647837639, "fcm_dpo/margin": 62.01350784301758, "fcm_dpo/q_t": 0.38918977975845337, "grad_norm": 14.674144744873047, "learning_rate": 1.690410564514244e-09, "logits/chosen": 0.6967864036560059, "logits/rejected": 0.6229372024536133, "logps/chosen": -191.25723266601562, "logps/ref_chosen": -58.1605339050293, "logps/ref_rejected": -79.85365295410156, "logps/rejected": -274.9638671875, "loss": 1.0702, "margin_dpo/margin_mean": 62.01350784301758, "margin_dpo/margin_std": 92.87417602539062, "step": 640 }, { "epoch": 0.9750566893424036, "fcm_dpo/beta": 0.008840223774313927, "fcm_dpo/delta": 0.05628042295575142, "fcm_dpo/margin": 54.37908935546875, "fcm_dpo/q_t": 0.3959693908691406, "grad_norm": 16.510818481445312, "learning_rate": 1.0098157099674987e-09, "logits/chosen": 0.6872873306274414, "logits/rejected": 0.6607547998428345, "logps/chosen": -193.90921020507812, "logps/ref_chosen": -63.45180130004883, "logps/ref_rejected": -74.18285369873047, "logps/rejected": -259.01934814453125, "loss": 1.1021, "margin_dpo/margin_mean": 54.37909698486328, "margin_dpo/margin_std": 87.93758392333984, "step": 645 }, { "epoch": 0.982615268329554, "fcm_dpo/beta": 0.009017017669975758, "fcm_dpo/delta": -0.01029270887374878, "fcm_dpo/margin": 63.77238845825195, "fcm_dpo/q_t": 0.3787182569503784, "grad_norm": 15.225814819335938, "learning_rate": 5.033308820289184e-10, "logits/chosen": 0.7203370332717896, "logits/rejected": 0.6512852311134338, "logps/chosen": -195.11532592773438, "logps/ref_chosen": -59.75496292114258, "logps/ref_rejected": -84.31481170654297, "logps/rejected": -283.4475402832031, "loss": 1.0611, "margin_dpo/margin_mean": 63.77238082885742, "margin_dpo/margin_std": 96.05448913574219, "step": 650 }, { "epoch": 0.9901738473167044, "fcm_dpo/beta": 0.008963796310126781, "fcm_dpo/delta": -0.022582078352570534, "fcm_dpo/margin": 60.55501174926758, "fcm_dpo/q_t": 0.3851621747016907, "grad_norm": 13.021146774291992, "learning_rate": 1.7131024761923852e-10, "logits/chosen": 0.7035177946090698, "logits/rejected": 0.6318515539169312, "logps/chosen": -185.91603088378906, "logps/ref_chosen": -57.817848205566406, "logps/ref_rejected": -79.81755065917969, "logps/rejected": -268.4707336425781, "loss": 1.0654, "margin_dpo/margin_mean": 60.55500411987305, "margin_dpo/margin_std": 88.59367370605469, "step": 655 }, { "epoch": 0.9977324263038548, "fcm_dpo/beta": 0.008747505024075508, "fcm_dpo/delta": -0.01453787088394165, "fcm_dpo/margin": 64.36149597167969, "fcm_dpo/q_t": 0.3801228702068329, "grad_norm": 15.389237403869629, "learning_rate": 1.3985977021235829e-11, "logits/chosen": 0.7586897611618042, "logits/rejected": 0.6943166255950928, "logps/chosen": -190.80844116210938, "logps/ref_chosen": -59.12651443481445, "logps/ref_rejected": -79.42085266113281, "logps/rejected": -275.46429443359375, "loss": 1.0594, "margin_dpo/margin_mean": 64.36149597167969, "margin_dpo/margin_std": 96.14913940429688, "step": 660 }, { "epoch": 0.999244142101285, "step": 661, "total_flos": 0.0, "train_loss": 1.120150500454809, "train_runtime": 1811.4729, "train_samples_per_second": 23.371, "train_steps_per_second": 0.365 } ], "logging_steps": 5, "max_steps": 661, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }