{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 200, "global_step": 681, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0014684287812041115, "fcm_dpo/beta": 0.10000000149011612, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.02287006378173828, "fcm_dpo/q_t": 0.5005706548690796, "grad_norm": 83.50728607177734, "learning_rate": 0.0, "logits/chosen": -0.4974287748336792, "logits/rejected": -0.43299180269241333, "logps/chosen": -50.1435661315918, "logps/ref_chosen": -50.14883804321289, "logps/ref_rejected": -74.1280517578125, "logps/rejected": -74.09991455078125, "loss": 1.389, "margin_dpo/margin_mean": -0.02287048101425171, "margin_dpo/margin_std": 0.41920793056488037, "step": 1 }, { "epoch": 0.007342143906020558, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": -0.03240281343460083, "fcm_dpo/q_t": 0.5008102059364319, "grad_norm": 90.14773559570312, "learning_rate": 2.898550724637681e-08, "logits/chosen": -0.4901035726070404, "logits/rejected": -0.4534408450126648, "logps/chosen": -56.07246017456055, "logps/ref_chosen": -56.05734634399414, "logps/ref_rejected": -78.69325256347656, "logps/rejected": -78.67597198486328, "loss": 1.3899, "margin_dpo/margin_mean": -0.03240284323692322, "margin_dpo/margin_std": 0.3555586636066437, "step": 5 }, { "epoch": 0.014684287812041116, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.027925759553909302, "fcm_dpo/q_t": 0.4993022382259369, "grad_norm": 70.48045349121094, "learning_rate": 6.521739130434782e-08, "logits/chosen": -0.5015245079994202, "logits/rejected": -0.4629823565483093, "logps/chosen": -59.527122497558594, "logps/ref_chosen": -59.54457473754883, "logps/ref_rejected": -91.17041778564453, "logps/rejected": -91.18089294433594, "loss": 1.3839, "margin_dpo/margin_mean": 0.027925794944167137, "margin_dpo/margin_std": 0.37033817172050476, "step": 10 }, { "epoch": 0.022026431718061675, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.005324178840965033, "fcm_dpo/q_t": 0.49986687302589417, "grad_norm": 64.33786010742188, "learning_rate": 1.0144927536231885e-07, "logits/chosen": -0.4974799156188965, "logits/rejected": -0.46847113966941833, "logps/chosen": -58.83959197998047, "logps/ref_chosen": -58.83195877075195, "logps/ref_rejected": -92.93949890136719, "logps/rejected": -92.95245361328125, "loss": 1.3861, "margin_dpo/margin_mean": 0.005324071738868952, "margin_dpo/margin_std": 0.36571556329727173, "step": 15 }, { "epoch": 0.02936857562408223, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.05579507350921631, "fcm_dpo/q_t": 0.49860554933547974, "grad_norm": 73.8245620727539, "learning_rate": 1.3768115942028986e-07, "logits/chosen": -0.5040138959884644, "logits/rejected": -0.45514219999313354, "logps/chosen": -59.63999557495117, "logps/ref_chosen": -59.6396598815918, "logps/ref_rejected": -82.76026916503906, "logps/rejected": -82.81639862060547, "loss": 1.381, "margin_dpo/margin_mean": 0.055795006453990936, "margin_dpo/margin_std": 0.33391329646110535, "step": 20 }, { "epoch": 0.03671071953010279, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.20841345191001892, "fcm_dpo/q_t": 0.49479326605796814, "grad_norm": 73.5445785522461, "learning_rate": 1.7391304347826085e-07, "logits/chosen": -0.5032899975776672, "logits/rejected": -0.4763486981391907, "logps/chosen": -53.173057556152344, "logps/ref_chosen": -53.205284118652344, "logps/ref_rejected": -88.99608612060547, "logps/rejected": -89.17227172851562, "loss": 1.366, "margin_dpo/margin_mean": 0.20841336250305176, "margin_dpo/margin_std": 0.4185457229614258, "step": 25 }, { "epoch": 0.04405286343612335, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.4891234338283539, "fcm_dpo/q_t": 0.48778820037841797, "grad_norm": 87.73991394042969, "learning_rate": 2.1014492753623187e-07, "logits/chosen": -0.5239602327346802, "logits/rejected": -0.48419055342674255, "logps/chosen": -53.45922088623047, "logps/ref_chosen": -53.5526008605957, "logps/ref_rejected": -97.87371826171875, "logps/rejected": -98.26947021484375, "loss": 1.3389, "margin_dpo/margin_mean": 0.4891238212585449, "margin_dpo/margin_std": 0.5750466585159302, "step": 30 }, { "epoch": 0.0513950073421439, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 0.7763983607292175, "fcm_dpo/q_t": 0.48066458106040955, "grad_norm": 82.94285583496094, "learning_rate": 2.463768115942029e-07, "logits/chosen": -0.4989829957485199, "logits/rejected": -0.4650956094264984, "logps/chosen": -56.198211669921875, "logps/ref_chosen": -56.3298454284668, "logps/ref_rejected": -91.76858520507812, "logps/rejected": -92.41334533691406, "loss": 1.3122, "margin_dpo/margin_mean": 0.776398241519928, "margin_dpo/margin_std": 0.8276771306991577, "step": 35 }, { "epoch": 0.05873715124816446, "fcm_dpo/beta": 0.10000000894069672, "fcm_dpo/delta": 0.0, "fcm_dpo/margin": 1.359745979309082, "fcm_dpo/q_t": 0.46632710099220276, "grad_norm": 60.41274642944336, "learning_rate": 2.8260869565217386e-07, "logits/chosen": -0.5347701907157898, "logits/rejected": -0.4986083507537842, "logps/chosen": -54.27339553833008, "logps/ref_chosen": -54.38492965698242, "logps/ref_rejected": -82.94353485107422, "logps/rejected": -84.19175720214844, "loss": 1.2606, "margin_dpo/margin_mean": 1.359745979309082, "margin_dpo/margin_std": 1.4517606496810913, "step": 40 }, { "epoch": 0.06607929515418502, "fcm_dpo/beta": 0.1127050369977951, "fcm_dpo/delta": 0.25362294912338257, "fcm_dpo/margin": 2.3980860710144043, "fcm_dpo/q_t": 0.4357197880744934, "grad_norm": 74.45612335205078, "learning_rate": 3.188405797101449e-07, "logits/chosen": -0.5095103979110718, "logits/rejected": -0.48132508993148804, "logps/chosen": -54.6392822265625, "logps/ref_chosen": -54.862335205078125, "logps/ref_rejected": -98.0264663696289, "logps/rejected": -100.20148468017578, "loss": 1.1498, "margin_dpo/margin_mean": 2.398085832595825, "margin_dpo/margin_std": 2.2269370555877686, "step": 45 }, { "epoch": 0.07342143906020558, "fcm_dpo/beta": 0.141450434923172, "fcm_dpo/delta": 0.12390259653329849, "fcm_dpo/margin": 3.386523485183716, "fcm_dpo/q_t": 0.3908053934574127, "grad_norm": 79.67459869384766, "learning_rate": 3.5507246376811595e-07, "logits/chosen": -0.5551148653030396, "logits/rejected": -0.5035051107406616, "logps/chosen": -58.14664840698242, "logps/ref_chosen": -58.304595947265625, "logps/ref_rejected": -91.69480895996094, "logps/rejected": -94.92338562011719, "loss": 1.0187, "margin_dpo/margin_mean": 3.386524200439453, "margin_dpo/margin_std": 3.3679816722869873, "step": 50 }, { "epoch": 0.08076358296622614, "fcm_dpo/beta": 0.135177880525589, "fcm_dpo/delta": -0.16671812534332275, "fcm_dpo/margin": 5.600610256195068, "fcm_dpo/q_t": 0.3425524830818176, "grad_norm": 62.811153411865234, "learning_rate": 3.9130434782608694e-07, "logits/chosen": -0.6010715961456299, "logits/rejected": -0.5568638443946838, "logps/chosen": -56.37145233154297, "logps/ref_chosen": -56.06591796875, "logps/ref_rejected": -85.69367980957031, "logps/rejected": -91.59982299804688, "loss": 0.8962, "margin_dpo/margin_mean": 5.600610256195068, "margin_dpo/margin_std": 5.793082237243652, "step": 55 }, { "epoch": 0.0881057268722467, "fcm_dpo/beta": 0.11089271306991577, "fcm_dpo/delta": -0.1925317347049713, "fcm_dpo/margin": 7.025670528411865, "fcm_dpo/q_t": 0.33725228905677795, "grad_norm": 67.2679214477539, "learning_rate": 4.2753623188405794e-07, "logits/chosen": -0.6061812043190002, "logits/rejected": -0.5570945739746094, "logps/chosen": -61.9241828918457, "logps/ref_chosen": -60.6871337890625, "logps/ref_rejected": -89.72715759277344, "logps/rejected": -97.98988342285156, "loss": 0.8969, "margin_dpo/margin_mean": 7.025670528411865, "margin_dpo/margin_std": 7.496710777282715, "step": 60 }, { "epoch": 0.09544787077826726, "fcm_dpo/beta": 0.09299755096435547, "fcm_dpo/delta": -0.18385855853557587, "fcm_dpo/margin": 8.298527717590332, "fcm_dpo/q_t": 0.3424831032752991, "grad_norm": 48.89730453491211, "learning_rate": 4.63768115942029e-07, "logits/chosen": -0.6179511547088623, "logits/rejected": -0.5864478945732117, "logps/chosen": -63.573402404785156, "logps/ref_chosen": -61.75325393676758, "logps/ref_rejected": -93.30108642578125, "logps/rejected": -103.41975402832031, "loss": 0.923, "margin_dpo/margin_mean": 8.298527717590332, "margin_dpo/margin_std": 9.724918365478516, "step": 65 }, { "epoch": 0.1027900146842878, "fcm_dpo/beta": 0.07844052463769913, "fcm_dpo/delta": -0.1582036018371582, "fcm_dpo/margin": 9.539754867553711, "fcm_dpo/q_t": 0.3441976308822632, "grad_norm": 47.65688705444336, "learning_rate": 5e-07, "logits/chosen": -0.6304086446762085, "logits/rejected": -0.5917232632637024, "logps/chosen": -62.56956100463867, "logps/ref_chosen": -59.548004150390625, "logps/ref_rejected": -84.01609802246094, "logps/rejected": -96.57740783691406, "loss": 0.9041, "margin_dpo/margin_mean": 9.539755821228027, "margin_dpo/margin_std": 10.295551300048828, "step": 70 }, { "epoch": 0.11013215859030837, "fcm_dpo/beta": 0.06165589019656181, "fcm_dpo/delta": -0.32924890518188477, "fcm_dpo/margin": 14.760737419128418, "fcm_dpo/q_t": 0.3237493336200714, "grad_norm": 36.49312973022461, "learning_rate": 4.999176576834721e-07, "logits/chosen": -0.6605738997459412, "logits/rejected": -0.6328510642051697, "logps/chosen": -65.28561401367188, "logps/ref_chosen": -59.86931228637695, "logps/ref_rejected": -98.05613708496094, "logps/rejected": -118.2331771850586, "loss": 0.873, "margin_dpo/margin_mean": 14.760736465454102, "margin_dpo/margin_std": 17.107942581176758, "step": 75 }, { "epoch": 0.11747430249632893, "fcm_dpo/beta": 0.04629804939031601, "fcm_dpo/delta": -0.19796454906463623, "fcm_dpo/margin": 16.89699935913086, "fcm_dpo/q_t": 0.341538667678833, "grad_norm": 35.74776077270508, "learning_rate": 4.996706849759452e-07, "logits/chosen": -0.6835442781448364, "logits/rejected": -0.6468649506568909, "logps/chosen": -63.93366622924805, "logps/ref_chosen": -56.18925857543945, "logps/ref_rejected": -86.42393493652344, "logps/rejected": -111.06534576416016, "loss": 0.9203, "margin_dpo/margin_mean": 16.896997451782227, "margin_dpo/margin_std": 19.718297958374023, "step": 80 }, { "epoch": 0.12481644640234948, "fcm_dpo/beta": 0.0381317213177681, "fcm_dpo/delta": -0.16901178658008575, "fcm_dpo/margin": 19.726295471191406, "fcm_dpo/q_t": 0.34707337617874146, "grad_norm": 34.31068420410156, "learning_rate": 4.992592445678582e-07, "logits/chosen": -0.6622103452682495, "logits/rejected": -0.6311969757080078, "logps/chosen": -70.46139526367188, "logps/ref_chosen": -60.018287658691406, "logps/ref_rejected": -98.01185607910156, "logps/rejected": -128.18124389648438, "loss": 0.9443, "margin_dpo/margin_mean": 19.726295471191406, "margin_dpo/margin_std": 24.040042877197266, "step": 85 }, { "epoch": 0.13215859030837004, "fcm_dpo/beta": 0.03405915945768356, "fcm_dpo/delta": -0.11596596240997314, "fcm_dpo/margin": 20.768291473388672, "fcm_dpo/q_t": 0.3624621331691742, "grad_norm": 35.00300216674805, "learning_rate": 4.986836074908615e-07, "logits/chosen": -0.7018736600875854, "logits/rejected": -0.6867517232894897, "logps/chosen": -73.39559173583984, "logps/ref_chosen": -59.8709831237793, "logps/ref_rejected": -96.78519439697266, "logps/rejected": -131.07809448242188, "loss": 1.0061, "margin_dpo/margin_mean": 20.768291473388672, "margin_dpo/margin_std": 29.607013702392578, "step": 90 }, { "epoch": 0.1395007342143906, "fcm_dpo/beta": 0.030608216300606728, "fcm_dpo/delta": -0.07700999826192856, "fcm_dpo/margin": 21.932090759277344, "fcm_dpo/q_t": 0.36035576462745667, "grad_norm": 27.68400764465332, "learning_rate": 4.979441529392784e-07, "logits/chosen": -0.708720326423645, "logits/rejected": -0.6767187714576721, "logps/chosen": -69.35963439941406, "logps/ref_chosen": -55.94385528564453, "logps/ref_rejected": -83.6790542602539, "logps/rejected": -119.02693939208984, "loss": 0.9664, "margin_dpo/margin_mean": 21.932090759277344, "margin_dpo/margin_std": 26.880752563476562, "step": 95 }, { "epoch": 0.14684287812041116, "fcm_dpo/beta": 0.028173187747597694, "fcm_dpo/delta": -0.068596251308918, "fcm_dpo/margin": 23.49247169494629, "fcm_dpo/q_t": 0.36101511120796204, "grad_norm": 30.916765213012695, "learning_rate": 4.970413680203148e-07, "logits/chosen": -0.6772828698158264, "logits/rejected": -0.648100733757019, "logps/chosen": -71.47965240478516, "logps/ref_chosen": -57.05888748168945, "logps/ref_rejected": -86.11727142333984, "logps/rejected": -124.03050231933594, "loss": 0.9722, "margin_dpo/margin_mean": 23.49247169494629, "margin_dpo/margin_std": 28.96224594116211, "step": 100 }, { "epoch": 0.15418502202643172, "fcm_dpo/beta": 0.027121257036924362, "fcm_dpo/delta": -0.13553811609745026, "fcm_dpo/margin": 26.961578369140625, "fcm_dpo/q_t": 0.35333341360092163, "grad_norm": 26.486059188842773, "learning_rate": 4.959758474331832e-07, "logits/chosen": -0.6960592269897461, "logits/rejected": -0.6600139141082764, "logps/chosen": -76.32167053222656, "logps/ref_chosen": -59.20774459838867, "logps/ref_rejected": -86.49754333496094, "logps/rejected": -130.57305908203125, "loss": 0.9567, "margin_dpo/margin_mean": 26.961578369140625, "margin_dpo/margin_std": 32.831111907958984, "step": 105 }, { "epoch": 0.16152716593245228, "fcm_dpo/beta": 0.02301758900284767, "fcm_dpo/delta": -0.11018934100866318, "fcm_dpo/margin": 30.556344985961914, "fcm_dpo/q_t": 0.3538368046283722, "grad_norm": 24.114713668823242, "learning_rate": 4.947482930773511e-07, "logits/chosen": -0.6646202206611633, "logits/rejected": -0.6281755566596985, "logps/chosen": -78.81887817382812, "logps/ref_chosen": -60.437957763671875, "logps/ref_rejected": -90.83917999267578, "logps/rejected": -139.77645874023438, "loss": 0.9511, "margin_dpo/margin_mean": 30.556344985961914, "margin_dpo/margin_std": 35.99966812133789, "step": 110 }, { "epoch": 0.16886930983847284, "fcm_dpo/beta": 0.021153923124074936, "fcm_dpo/delta": -0.041334737092256546, "fcm_dpo/margin": 30.124019622802734, "fcm_dpo/q_t": 0.3688841462135315, "grad_norm": 40.84029769897461, "learning_rate": 4.933595135901732e-07, "logits/chosen": -0.6649340391159058, "logits/rejected": -0.6294328570365906, "logps/chosen": -84.20191955566406, "logps/ref_chosen": -61.7908821105957, "logps/ref_rejected": -85.36943054199219, "logps/rejected": -137.90447998046875, "loss": 0.9992, "margin_dpo/margin_mean": 30.124013900756836, "margin_dpo/margin_std": 39.94293212890625, "step": 115 }, { "epoch": 0.1762114537444934, "fcm_dpo/beta": 0.02078414149582386, "fcm_dpo/delta": -0.037118665874004364, "fcm_dpo/margin": 30.540584564208984, "fcm_dpo/q_t": 0.36713889241218567, "grad_norm": 26.78792381286621, "learning_rate": 4.918104238142103e-07, "logits/chosen": -0.6711692214012146, "logits/rejected": -0.645135760307312, "logps/chosen": -91.19302368164062, "logps/ref_chosen": -65.3261489868164, "logps/ref_rejected": -86.75518798828125, "logps/rejected": -143.1626434326172, "loss": 0.9818, "margin_dpo/margin_mean": 30.540584564208984, "margin_dpo/margin_std": 38.079750061035156, "step": 120 }, { "epoch": 0.18355359765051396, "fcm_dpo/beta": 0.017505459487438202, "fcm_dpo/delta": -0.22550848126411438, "fcm_dpo/margin": 46.04296112060547, "fcm_dpo/q_t": 0.3401046693325043, "grad_norm": 23.552217483520508, "learning_rate": 4.90102044194588e-07, "logits/chosen": -0.6151807904243469, "logits/rejected": -0.6104758381843567, "logps/chosen": -87.12136840820312, "logps/ref_chosen": -58.323204040527344, "logps/ref_rejected": -101.2106704711914, "logps/rejected": -176.0518035888672, "loss": 0.9204, "margin_dpo/margin_mean": 46.0429573059082, "margin_dpo/margin_std": 55.54075241088867, "step": 125 }, { "epoch": 0.19089574155653452, "fcm_dpo/beta": 0.01598326489329338, "fcm_dpo/delta": -0.04410712048411369, "fcm_dpo/margin": 40.082298278808594, "fcm_dpo/q_t": 0.3680208623409271, "grad_norm": 22.16413116455078, "learning_rate": 4.882355001067891e-07, "logits/chosen": -0.5932961106300354, "logits/rejected": -0.5749183893203735, "logps/chosen": -86.93000793457031, "logps/ref_chosen": -56.38518524169922, "logps/ref_rejected": -86.15767669677734, "logps/rejected": -156.78482055664062, "loss": 1.0035, "margin_dpo/margin_mean": 40.082298278808594, "margin_dpo/margin_std": 53.219139099121094, "step": 130 }, { "epoch": 0.19823788546255505, "fcm_dpo/beta": 0.014573054388165474, "fcm_dpo/delta": -0.09526528418064117, "fcm_dpo/margin": 47.24794387817383, "fcm_dpo/q_t": 0.3577379286289215, "grad_norm": 25.677669525146484, "learning_rate": 4.862120211153265e-07, "logits/chosen": -0.5761778950691223, "logits/rejected": -0.5731192827224731, "logps/chosen": -86.5953140258789, "logps/ref_chosen": -54.59065628051758, "logps/ref_rejected": -95.26080322265625, "logps/rejected": -174.51339721679688, "loss": 0.9545, "margin_dpo/margin_mean": 47.24794387817383, "margin_dpo/margin_std": 57.28125762939453, "step": 135 }, { "epoch": 0.2055800293685756, "fcm_dpo/beta": 0.013362633995711803, "fcm_dpo/delta": -0.08761467784643173, "fcm_dpo/margin": 51.02484893798828, "fcm_dpo/q_t": 0.3625403940677643, "grad_norm": 25.558738708496094, "learning_rate": 4.840329401637809e-07, "logits/chosen": -0.5525860786437988, "logits/rejected": -0.545661449432373, "logps/chosen": -96.27259826660156, "logps/ref_chosen": -56.04347610473633, "logps/ref_rejected": -93.27880859375, "logps/rejected": -184.53277587890625, "loss": 0.9755, "margin_dpo/margin_mean": 51.02485275268555, "margin_dpo/margin_std": 65.68046569824219, "step": 140 }, { "epoch": 0.21292217327459617, "fcm_dpo/beta": 0.012635116465389729, "fcm_dpo/delta": -0.008492978289723396, "fcm_dpo/margin": 48.067604064941406, "fcm_dpo/q_t": 0.3737943470478058, "grad_norm": 29.300233840942383, "learning_rate": 4.816996926967401e-07, "logits/chosen": -0.5054234862327576, "logits/rejected": -0.4867471754550934, "logps/chosen": -107.9009017944336, "logps/ref_chosen": -61.4414176940918, "logps/ref_rejected": -86.32813262939453, "logps/rejected": -180.85520935058594, "loss": 1.0202, "margin_dpo/margin_mean": 48.067596435546875, "margin_dpo/margin_std": 66.08811950683594, "step": 145 }, { "epoch": 0.22026431718061673, "fcm_dpo/beta": 0.012664164416491985, "fcm_dpo/delta": -0.01627928391098976, "fcm_dpo/margin": 48.467201232910156, "fcm_dpo/q_t": 0.3732047379016876, "grad_norm": 25.043779373168945, "learning_rate": 4.792138157142157e-07, "logits/chosen": -0.5404887199401855, "logits/rejected": -0.5210872888565063, "logps/chosen": -104.0806884765625, "logps/ref_chosen": -57.70451736450195, "logps/ref_rejected": -87.76991271972656, "logps/rejected": -182.61329650878906, "loss": 1.0113, "margin_dpo/margin_mean": 48.467201232910156, "margin_dpo/margin_std": 64.91874694824219, "step": 150 }, { "epoch": 0.2276064610866373, "fcm_dpo/beta": 0.011945498175919056, "fcm_dpo/delta": -0.06720416247844696, "fcm_dpo/margin": 55.43426513671875, "fcm_dpo/q_t": 0.3624417185783386, "grad_norm": 23.727567672729492, "learning_rate": 4.7657694675916247e-07, "logits/chosen": -0.581199586391449, "logits/rejected": -0.5655697584152222, "logps/chosen": -105.16175842285156, "logps/ref_chosen": -62.08925247192383, "logps/ref_rejected": -94.79930114746094, "logps/rejected": -193.30606079101562, "loss": 0.9764, "margin_dpo/margin_mean": 55.43426513671875, "margin_dpo/margin_std": 69.9148178100586, "step": 155 }, { "epoch": 0.23494860499265785, "fcm_dpo/beta": 0.011539025232195854, "fcm_dpo/delta": -0.045818835496902466, "fcm_dpo/margin": 55.6801643371582, "fcm_dpo/q_t": 0.3720964789390564, "grad_norm": 25.801401138305664, "learning_rate": 4.737908228387656e-07, "logits/chosen": -0.5244706869125366, "logits/rejected": -0.5115067362785339, "logps/chosen": -124.75065612792969, "logps/ref_chosen": -67.15288543701172, "logps/ref_rejected": -96.92537689208984, "logps/rejected": -210.2032928466797, "loss": 1.0372, "margin_dpo/margin_mean": 55.6801643371582, "margin_dpo/margin_std": 83.104736328125, "step": 160 }, { "epoch": 0.2422907488986784, "fcm_dpo/beta": 0.010906776413321495, "fcm_dpo/delta": -0.010979633778333664, "fcm_dpo/margin": 55.84454345703125, "fcm_dpo/q_t": 0.3736818730831146, "grad_norm": 37.94820022583008, "learning_rate": 4.708572792802069e-07, "logits/chosen": -0.5201188325881958, "logits/rejected": -0.49569135904312134, "logps/chosen": -110.22456359863281, "logps/ref_chosen": -57.40401077270508, "logps/ref_rejected": -80.31498718261719, "logps/rejected": -188.9801025390625, "loss": 1.0098, "margin_dpo/margin_mean": 55.84454345703125, "margin_dpo/margin_std": 74.67647552490234, "step": 165 }, { "epoch": 0.24963289280469897, "fcm_dpo/beta": 0.010051427409052849, "fcm_dpo/delta": -0.10779444873332977, "fcm_dpo/margin": 69.25593566894531, "fcm_dpo/q_t": 0.3577578365802765, "grad_norm": 23.627363204956055, "learning_rate": 4.6777824852166437e-07, "logits/chosen": -0.45740675926208496, "logits/rejected": -0.4491025507450104, "logps/chosen": -106.43888854980469, "logps/ref_chosen": -52.029144287109375, "logps/ref_rejected": -85.73944091796875, "logps/rejected": -209.40512084960938, "loss": 0.9592, "margin_dpo/margin_mean": 69.25593566894531, "margin_dpo/margin_std": 85.97371673583984, "step": 170 }, { "epoch": 0.25697503671071953, "fcm_dpo/beta": 0.009930510073900223, "fcm_dpo/delta": -0.047995198518037796, "fcm_dpo/margin": 65.00736236572266, "fcm_dpo/q_t": 0.3673258423805237, "grad_norm": 29.522018432617188, "learning_rate": 4.645557588393406e-07, "logits/chosen": -0.45035696029663086, "logits/rejected": -0.4322957396507263, "logps/chosen": -128.42086791992188, "logps/ref_chosen": -62.996971130371094, "logps/ref_rejected": -92.98394012451172, "logps/rejected": -223.41519165039062, "loss": 0.9915, "margin_dpo/margin_mean": 65.00736236572266, "margin_dpo/margin_std": 84.73751831054688, "step": 175 }, { "epoch": 0.2643171806167401, "fcm_dpo/beta": 0.008855604566633701, "fcm_dpo/delta": -0.11197604238986969, "fcm_dpo/margin": 79.53601837158203, "fcm_dpo/q_t": 0.35542401671409607, "grad_norm": 23.635892868041992, "learning_rate": 4.611919330113591e-07, "logits/chosen": -0.38669413328170776, "logits/rejected": -0.3846648335456848, "logps/chosen": -127.61091613769531, "logps/ref_chosen": -57.0670280456543, "logps/ref_rejected": -97.1115493774414, "logps/rejected": -247.19143676757812, "loss": 0.9528, "margin_dpo/margin_mean": 79.53601837158203, "margin_dpo/margin_std": 97.05994415283203, "step": 180 }, { "epoch": 0.27165932452276065, "fcm_dpo/beta": 0.008529609069228172, "fcm_dpo/delta": 0.05834978073835373, "fcm_dpo/margin": 58.99933624267578, "fcm_dpo/q_t": 0.39326274394989014, "grad_norm": 26.241926193237305, "learning_rate": 4.5768898691940836e-07, "logits/chosen": -0.421181857585907, "logits/rejected": -0.40174850821495056, "logps/chosen": -120.03946685791016, "logps/ref_chosen": -54.840736389160156, "logps/ref_rejected": -75.51002502441406, "logps/rejected": -199.70809936523438, "loss": 1.0713, "margin_dpo/margin_mean": 58.99933624267578, "margin_dpo/margin_std": 85.73370361328125, "step": 185 }, { "epoch": 0.2790014684287812, "fcm_dpo/beta": 0.008668321184813976, "fcm_dpo/delta": -0.054877202957868576, "fcm_dpo/margin": 75.03819274902344, "fcm_dpo/q_t": 0.3645266592502594, "grad_norm": 28.541696548461914, "learning_rate": 4.5404922808905543e-07, "logits/chosen": -0.41162386536598206, "logits/rejected": -0.39615827798843384, "logps/chosen": -127.11979675292969, "logps/ref_chosen": -57.72148895263672, "logps/ref_rejected": -86.85997009277344, "logps/rejected": -231.29647827148438, "loss": 0.9793, "margin_dpo/margin_mean": 75.03819274902344, "margin_dpo/margin_std": 94.09630584716797, "step": 190 }, { "epoch": 0.28634361233480177, "fcm_dpo/beta": 0.008089645765721798, "fcm_dpo/delta": -0.03434378653764725, "fcm_dpo/margin": 77.82075500488281, "fcm_dpo/q_t": 0.3677811920642853, "grad_norm": 28.007156372070312, "learning_rate": 4.5027505416968985e-07, "logits/chosen": -0.3651648759841919, "logits/rejected": -0.35718274116516113, "logps/chosen": -140.86399841308594, "logps/ref_chosen": -58.26164627075195, "logps/ref_rejected": -89.46485900878906, "logps/rejected": -249.8879852294922, "loss": 0.9959, "margin_dpo/margin_mean": 77.82075500488281, "margin_dpo/margin_std": 99.57084655761719, "step": 195 }, { "epoch": 0.2936857562408223, "fcm_dpo/beta": 0.007911969907581806, "fcm_dpo/delta": -0.04977406933903694, "fcm_dpo/margin": 81.63540649414062, "fcm_dpo/q_t": 0.365100622177124, "grad_norm": 28.69991111755371, "learning_rate": 4.4636895135509966e-07, "logits/chosen": -0.3688076138496399, "logits/rejected": -0.3557121157646179, "logps/chosen": -130.88851928710938, "logps/ref_chosen": -55.71953201293945, "logps/ref_rejected": -83.15235137939453, "logps/rejected": -239.95675659179688, "loss": 0.9767, "margin_dpo/margin_mean": 81.63540649414062, "margin_dpo/margin_std": 101.0685806274414, "step": 200 }, { "epoch": 0.2936857562408223, "eval_fcm_dpo/beta": 0.010551726445555687, "eval_fcm_dpo/delta": 0.055143389850854874, "eval_fcm_dpo/margin": 47.49774932861328, "eval_fcm_dpo/q_t": 0.4060860276222229, "eval_logits/chosen": -0.40839019417762756, "eval_logits/rejected": -0.3899528980255127, "eval_logps/chosen": -179.96328735351562, "eval_logps/ref_chosen": -79.05104064941406, "eval_logps/ref_rejected": -86.79793548583984, "eval_logps/rejected": -235.20794677734375, "eval_loss": 0.5984740257263184, "eval_margin_dpo/margin_mean": 47.49774932861328, "eval_margin_dpo/margin_std": 101.77977752685547, "eval_runtime": 39.7643, "eval_samples_per_second": 58.822, "eval_steps_per_second": 1.861, "step": 200 }, { "epoch": 0.3010279001468429, "fcm_dpo/beta": 0.010471022687852383, "fcm_dpo/delta": -0.17127129435539246, "fcm_dpo/margin": 72.22938537597656, "fcm_dpo/q_t": 0.3502196669578552, "grad_norm": 38.45856857299805, "learning_rate": 4.4233349274571974e-07, "logits/chosen": -0.40718260407447815, "logits/rejected": -0.3891496956348419, "logps/chosen": -145.1938934326172, "logps/ref_chosen": -65.13258361816406, "logps/ref_rejected": -92.10203552246094, "logps/rejected": -244.39273071289062, "loss": 0.9696, "margin_dpo/margin_mean": 72.22938537597656, "margin_dpo/margin_std": 95.00855255126953, "step": 205 }, { "epoch": 0.30837004405286345, "fcm_dpo/beta": 0.008621977642178535, "fcm_dpo/delta": -0.17882244288921356, "fcm_dpo/margin": 88.25994873046875, "fcm_dpo/q_t": 0.3427308201789856, "grad_norm": 25.199186325073242, "learning_rate": 4.381713366536311e-07, "logits/chosen": -0.3917924165725708, "logits/rejected": -0.3768480718135834, "logps/chosen": -135.15476989746094, "logps/ref_chosen": -54.52837371826172, "logps/ref_rejected": -87.06227111816406, "logps/rejected": -255.94863891601562, "loss": 0.9227, "margin_dpo/margin_mean": 88.25994110107422, "margin_dpo/margin_std": 101.74530029296875, "step": 210 }, { "epoch": 0.315712187958884, "fcm_dpo/beta": 0.0077339522540569305, "fcm_dpo/delta": -0.09488168358802795, "fcm_dpo/margin": 88.91537475585938, "fcm_dpo/q_t": 0.35661423206329346, "grad_norm": 28.223777770996094, "learning_rate": 4.3388522485142885e-07, "logits/chosen": -0.4153861403465271, "logits/rejected": -0.39465969800949097, "logps/chosen": -147.28273010253906, "logps/ref_chosen": -59.905250549316406, "logps/ref_rejected": -90.25511932373047, "logps/rejected": -266.5479736328125, "loss": 0.9549, "margin_dpo/margin_mean": 88.91536712646484, "margin_dpo/margin_std": 104.00807189941406, "step": 215 }, { "epoch": 0.32305433186490456, "fcm_dpo/beta": 0.007219684775918722, "fcm_dpo/delta": -0.007601064629852772, "fcm_dpo/margin": 83.97865295410156, "fcm_dpo/q_t": 0.3702937960624695, "grad_norm": 29.1616268157959, "learning_rate": 4.2947798076611047e-07, "logits/chosen": -0.4109880030155182, "logits/rejected": -0.3932788074016571, "logps/chosen": -151.2802734375, "logps/ref_chosen": -57.68498611450195, "logps/ref_rejected": -87.72792053222656, "logps/rejected": -265.3018798828125, "loss": 0.993, "margin_dpo/margin_mean": 83.97865295410156, "margin_dpo/margin_std": 102.7934799194336, "step": 220 }, { "epoch": 0.3303964757709251, "fcm_dpo/beta": 0.006763989571481943, "fcm_dpo/delta": -0.13978341221809387, "fcm_dpo/margin": 107.536376953125, "fcm_dpo/q_t": 0.3473281264305115, "grad_norm": 33.25477981567383, "learning_rate": 4.249525076191759e-07, "logits/chosen": -0.3368683457374573, "logits/rejected": -0.3260774612426758, "logps/chosen": -171.83956909179688, "logps/ref_chosen": -54.47245407104492, "logps/ref_rejected": -93.26266479492188, "logps/rejected": -318.1661071777344, "loss": 0.9375, "margin_dpo/margin_mean": 107.536376953125, "margin_dpo/margin_std": 123.95018005371094, "step": 225 }, { "epoch": 0.3377386196769457, "fcm_dpo/beta": 0.006237152963876724, "fcm_dpo/delta": -0.06372438371181488, "fcm_dpo/margin": 105.6369400024414, "fcm_dpo/q_t": 0.35965046286582947, "grad_norm": 26.257822036743164, "learning_rate": 4.203117865141635e-07, "logits/chosen": -0.37301507592201233, "logits/rejected": -0.3503766357898712, "logps/chosen": -172.18060302734375, "logps/ref_chosen": -58.7701301574707, "logps/ref_rejected": -87.85963439941406, "logps/rejected": -306.90704345703125, "loss": 0.9652, "margin_dpo/margin_mean": 105.6369400024414, "margin_dpo/margin_std": 124.76959228515625, "step": 230 }, { "epoch": 0.34508076358296624, "fcm_dpo/beta": 0.006058714352548122, "fcm_dpo/delta": 0.011923698708415031, "fcm_dpo/margin": 97.03569030761719, "fcm_dpo/q_t": 0.37649449706077576, "grad_norm": 38.68661880493164, "learning_rate": 4.1555887447288255e-07, "logits/chosen": -0.38763222098350525, "logits/rejected": -0.37434476613998413, "logps/chosen": -174.42852783203125, "logps/ref_chosen": -59.0481071472168, "logps/ref_rejected": -91.19654846191406, "logps/rejected": -303.6126403808594, "loss": 1.0075, "margin_dpo/margin_mean": 97.03569030761719, "margin_dpo/margin_std": 123.6645278930664, "step": 235 }, { "epoch": 0.3524229074889868, "fcm_dpo/beta": 0.005926494486629963, "fcm_dpo/delta": -0.02454141527414322, "fcm_dpo/margin": 104.9325942993164, "fcm_dpo/q_t": 0.3663932681083679, "grad_norm": 38.213321685791016, "learning_rate": 4.106969024216348e-07, "logits/chosen": -0.3597189784049988, "logits/rejected": -0.35432037711143494, "logps/chosen": -182.47573852539062, "logps/ref_chosen": -55.238983154296875, "logps/ref_rejected": -91.08428955078125, "logps/rejected": -323.2536926269531, "loss": 0.9776, "margin_dpo/margin_mean": 104.9325942993164, "margin_dpo/margin_std": 123.68228912353516, "step": 240 }, { "epoch": 0.35976505139500736, "fcm_dpo/beta": 0.005522926338016987, "fcm_dpo/delta": -0.09819710999727249, "fcm_dpo/margin": 124.70765686035156, "fcm_dpo/q_t": 0.35423046350479126, "grad_norm": 34.15117645263672, "learning_rate": 4.057290731287531e-07, "logits/chosen": -0.37256139516830444, "logits/rejected": -0.34320348501205444, "logps/chosen": -202.49417114257812, "logps/ref_chosen": -65.08844757080078, "logps/ref_rejected": -86.05777740478516, "logps/rejected": -348.1711730957031, "loss": 0.9381, "margin_dpo/margin_mean": 124.7076416015625, "margin_dpo/margin_std": 138.202880859375, "step": 245 }, { "epoch": 0.3671071953010279, "fcm_dpo/beta": 0.005308011546730995, "fcm_dpo/delta": -0.02011699415743351, "fcm_dpo/margin": 116.39664459228516, "fcm_dpo/q_t": 0.3652518391609192, "grad_norm": 38.33260726928711, "learning_rate": 4.006586590948141e-07, "logits/chosen": -0.38754525780677795, "logits/rejected": -0.36416977643966675, "logps/chosen": -204.6479034423828, "logps/ref_chosen": -59.08491897583008, "logps/ref_rejected": -87.36727142333984, "logps/rejected": -349.326904296875, "loss": 0.9688, "margin_dpo/margin_mean": 116.39664459228516, "margin_dpo/margin_std": 128.171875, "step": 250 }, { "epoch": 0.3744493392070485, "fcm_dpo/beta": 0.005321727134287357, "fcm_dpo/delta": 0.008542664349079132, "fcm_dpo/margin": 111.1045150756836, "fcm_dpo/q_t": 0.3720734715461731, "grad_norm": 34.73932647705078, "learning_rate": 3.954890003969163e-07, "logits/chosen": -0.37525609135627747, "logits/rejected": -0.3513938784599304, "logps/chosen": -217.4912567138672, "logps/ref_chosen": -61.85979461669922, "logps/ref_rejected": -88.32804107666016, "logps/rejected": -355.06402587890625, "loss": 1.0064, "margin_dpo/margin_mean": 111.1045150756836, "margin_dpo/margin_std": 139.63497924804688, "step": 255 }, { "epoch": 0.38179148311306904, "fcm_dpo/beta": 0.005232472904026508, "fcm_dpo/delta": -0.022610364481806755, "fcm_dpo/margin": 118.58265686035156, "fcm_dpo/q_t": 0.36637741327285767, "grad_norm": 43.125038146972656, "learning_rate": 3.9022350248844246e-07, "logits/chosen": -0.36137908697128296, "logits/rejected": -0.3586873412132263, "logps/chosen": -199.85968017578125, "logps/ref_chosen": -52.843467712402344, "logps/ref_rejected": -90.4744873046875, "logps/rejected": -356.0733642578125, "loss": 0.9784, "margin_dpo/margin_mean": 118.58265686035156, "margin_dpo/margin_std": 140.423095703125, "step": 260 }, { "epoch": 0.3891336270190896, "fcm_dpo/beta": 0.005059250630438328, "fcm_dpo/delta": -0.050201721489429474, "fcm_dpo/margin": 127.75601959228516, "fcm_dpo/q_t": 0.3604031205177307, "grad_norm": 59.875736236572266, "learning_rate": 3.848656339557562e-07, "logits/chosen": -0.3211337924003601, "logits/rejected": -0.3021458685398102, "logps/chosen": -235.56326293945312, "logps/ref_chosen": -59.35320281982422, "logps/ref_rejected": -90.73350524902344, "logps/rejected": -394.6995849609375, "loss": 0.961, "margin_dpo/margin_mean": 127.75602722167969, "margin_dpo/margin_std": 145.37832641601562, "step": 265 }, { "epoch": 0.3964757709251101, "fcm_dpo/beta": 0.004921893123537302, "fcm_dpo/delta": -0.016059506684541702, "fcm_dpo/margin": 124.87815856933594, "fcm_dpo/q_t": 0.36820322275161743, "grad_norm": 33.326416015625, "learning_rate": 3.794189242333106e-07, "logits/chosen": -0.3540351688861847, "logits/rejected": -0.33318471908569336, "logps/chosen": -250.6675567626953, "logps/ref_chosen": -66.30875396728516, "logps/ref_rejected": -95.4130630493164, "logps/rejected": -404.6500244140625, "loss": 0.9914, "margin_dpo/margin_mean": 124.87815856933594, "margin_dpo/margin_std": 155.1671600341797, "step": 270 }, { "epoch": 0.40381791483113066, "fcm_dpo/beta": 0.004636920988559723, "fcm_dpo/delta": -0.06920859962701797, "fcm_dpo/margin": 143.0980224609375, "fcm_dpo/q_t": 0.35574159026145935, "grad_norm": 35.31757736206055, "learning_rate": 3.738869612786737e-07, "logits/chosen": -0.29508358240127563, "logits/rejected": -0.28438499569892883, "logps/chosen": -223.6616973876953, "logps/ref_chosen": -54.69990921020508, "logps/ref_rejected": -92.23838806152344, "logps/rejected": -404.2982177734375, "loss": 0.9386, "margin_dpo/margin_mean": 143.0980224609375, "margin_dpo/margin_std": 151.79656982421875, "step": 275 }, { "epoch": 0.4111600587371512, "fcm_dpo/beta": 0.004416828043758869, "fcm_dpo/delta": -0.06373202055692673, "fcm_dpo/margin": 149.3416290283203, "fcm_dpo/q_t": 0.35707592964172363, "grad_norm": 52.24574279785156, "learning_rate": 3.6827338920900253e-07, "logits/chosen": -0.2858836352825165, "logits/rejected": -0.2778168320655823, "logps/chosen": -238.4672088623047, "logps/ref_chosen": -54.64586639404297, "logps/ref_rejected": -88.19416809082031, "logps/rejected": -421.35711669921875, "loss": 0.9399, "margin_dpo/margin_mean": 149.3416290283203, "margin_dpo/margin_std": 158.734619140625, "step": 280 }, { "epoch": 0.4185022026431718, "fcm_dpo/beta": 0.004238150082528591, "fcm_dpo/delta": -0.031216781586408615, "fcm_dpo/margin": 148.39759826660156, "fcm_dpo/q_t": 0.3608683943748474, "grad_norm": 36.15656661987305, "learning_rate": 3.625819059005228e-07, "logits/chosen": -0.3247283399105072, "logits/rejected": -0.3061348497867584, "logps/chosen": -271.26629638671875, "logps/ref_chosen": -63.02496337890625, "logps/ref_rejected": -93.16323852539062, "logps/rejected": -449.8021545410156, "loss": 0.9501, "margin_dpo/margin_mean": 148.39761352539062, "margin_dpo/margin_std": 154.8343048095703, "step": 285 }, { "epoch": 0.42584434654919234, "fcm_dpo/beta": 0.004017127677798271, "fcm_dpo/delta": -0.07293753325939178, "fcm_dpo/margin": 166.34750366210938, "fcm_dpo/q_t": 0.35577893257141113, "grad_norm": 28.974525451660156, "learning_rate": 3.568162605525952e-07, "logits/chosen": -0.28096064925193787, "logits/rejected": -0.26907119154930115, "logps/chosen": -278.18634033203125, "logps/ref_chosen": -58.37105178833008, "logps/ref_rejected": -91.59428405761719, "logps/rejected": -477.75714111328125, "loss": 0.9404, "margin_dpo/margin_mean": 166.34750366210938, "margin_dpo/margin_std": 178.529296875, "step": 290 }, { "epoch": 0.4331864904552129, "fcm_dpo/beta": 0.0038156050723046064, "fcm_dpo/delta": -0.006700708530843258, "fcm_dpo/margin": 158.75344848632812, "fcm_dpo/q_t": 0.3686682879924774, "grad_norm": 48.21137619018555, "learning_rate": 3.509802512179737e-07, "logits/chosen": -0.30817854404449463, "logits/rejected": -0.2976076900959015, "logps/chosen": -266.36083984375, "logps/ref_chosen": -55.113426208496094, "logps/ref_rejected": -85.29283905029297, "logps/rejected": -455.293701171875, "loss": 0.9886, "margin_dpo/margin_mean": 158.75344848632812, "margin_dpo/margin_std": 190.89013671875, "step": 295 }, { "epoch": 0.44052863436123346, "fcm_dpo/beta": 0.003914177417755127, "fcm_dpo/delta": -0.005944193806499243, "fcm_dpo/margin": 154.51087951660156, "fcm_dpo/q_t": 0.37021997570991516, "grad_norm": 48.85098648071289, "learning_rate": 3.4507772230088147e-07, "logits/chosen": -0.3540688157081604, "logits/rejected": -0.35069912672042847, "logps/chosen": -299.68817138671875, "logps/ref_chosen": -59.46582794189453, "logps/ref_rejected": -97.03690338134766, "logps/rejected": -491.77008056640625, "loss": 0.9968, "margin_dpo/margin_mean": 154.51087951660156, "margin_dpo/margin_std": 190.1680145263672, "step": 300 }, { "epoch": 0.447870778267254, "fcm_dpo/beta": 0.0037945318035781384, "fcm_dpo/delta": 0.018312707543373108, "fcm_dpo/margin": 153.49285888671875, "fcm_dpo/q_t": 0.3729243278503418, "grad_norm": 30.252052307128906, "learning_rate": 3.391125620245535e-07, "logits/chosen": -0.38246288895606995, "logits/rejected": -0.36992448568344116, "logps/chosen": -264.3889465332031, "logps/ref_chosen": -62.78144454956055, "logps/ref_rejected": -91.95039367675781, "logps/rejected": -447.05078125, "loss": 0.9864, "margin_dpo/margin_mean": 153.49285888671875, "margin_dpo/margin_std": 174.68467712402344, "step": 305 }, { "epoch": 0.4552129221732746, "fcm_dpo/beta": 0.0039411550387740135, "fcm_dpo/delta": 0.013458488509058952, "fcm_dpo/margin": 148.8785400390625, "fcm_dpo/q_t": 0.3692263960838318, "grad_norm": 32.29579544067383, "learning_rate": 3.3308869986991487e-07, "logits/chosen": -0.3574088215827942, "logits/rejected": -0.33746927976608276, "logps/chosen": -252.6819610595703, "logps/ref_chosen": -61.359039306640625, "logps/ref_rejected": -82.75496673583984, "logps/rejected": -422.9564514160156, "loss": 0.9708, "margin_dpo/margin_mean": 148.87855529785156, "margin_dpo/margin_std": 156.66542053222656, "step": 310 }, { "epoch": 0.46255506607929514, "fcm_dpo/beta": 0.0038400962948799133, "fcm_dpo/delta": -0.01564035564661026, "fcm_dpo/margin": 159.8279571533203, "fcm_dpo/q_t": 0.3695194125175476, "grad_norm": 48.496238708496094, "learning_rate": 3.270101039870797e-07, "logits/chosen": -0.30073413252830505, "logits/rejected": -0.29121869802474976, "logps/chosen": -291.8459167480469, "logps/ref_chosen": -51.77602005004883, "logps/ref_rejected": -84.58292388916016, "logps/rejected": -484.48077392578125, "loss": 0.9925, "margin_dpo/margin_mean": 159.8279571533203, "margin_dpo/margin_std": 199.2019805908203, "step": 315 }, { "epoch": 0.4698972099853157, "fcm_dpo/beta": 0.0036372647155076265, "fcm_dpo/delta": -0.10146065801382065, "fcm_dpo/margin": 190.74334716796875, "fcm_dpo/q_t": 0.3486129343509674, "grad_norm": 38.94823455810547, "learning_rate": 3.208807785813777e-07, "logits/chosen": -0.3392784595489502, "logits/rejected": -0.3321293592453003, "logps/chosen": -283.16229248046875, "logps/ref_chosen": -56.777862548828125, "logps/ref_rejected": -99.26368713378906, "logps/rejected": -516.3914184570312, "loss": 0.9121, "margin_dpo/margin_mean": 190.74334716796875, "margin_dpo/margin_std": 189.1331787109375, "step": 320 }, { "epoch": 0.47723935389133626, "fcm_dpo/beta": 0.003477086080238223, "fcm_dpo/delta": 0.012917397543787956, "fcm_dpo/margin": 168.87332153320312, "fcm_dpo/q_t": 0.3705959916114807, "grad_norm": 46.6136474609375, "learning_rate": 3.147047612756302e-07, "logits/chosen": -0.3095089793205261, "logits/rejected": -0.29444074630737305, "logps/chosen": -309.4768981933594, "logps/ref_chosen": -58.28468704223633, "logps/ref_rejected": -83.80326843261719, "logps/rejected": -503.86883544921875, "loss": 0.9858, "margin_dpo/margin_mean": 168.87332153320312, "margin_dpo/margin_std": 188.2744140625, "step": 325 }, { "epoch": 0.4845814977973568, "fcm_dpo/beta": 0.0035190985072404146, "fcm_dpo/delta": -0.028615426272153854, "fcm_dpo/margin": 178.0848388671875, "fcm_dpo/q_t": 0.3618434965610504, "grad_norm": 33.46125030517578, "learning_rate": 3.084861204504122e-07, "logits/chosen": -0.3480074405670166, "logits/rejected": -0.3351406455039978, "logps/chosen": -294.0130615234375, "logps/ref_chosen": -62.75822067260742, "logps/ref_rejected": -94.04203033447266, "logps/rejected": -503.3817443847656, "loss": 0.9518, "margin_dpo/margin_mean": 178.0848388671875, "margin_dpo/margin_std": 184.51199340820312, "step": 330 }, { "epoch": 0.4919236417033774, "fcm_dpo/beta": 0.003449521493166685, "fcm_dpo/delta": 0.025535067543387413, "fcm_dpo/margin": 166.8475799560547, "fcm_dpo/q_t": 0.374795138835907, "grad_norm": 38.16062927246094, "learning_rate": 3.022289525640531e-07, "logits/chosen": -0.35363438725471497, "logits/rejected": -0.3413279056549072, "logps/chosen": -310.823486328125, "logps/ref_chosen": -58.59650421142578, "logps/ref_rejected": -88.69586944580078, "logps/rejected": -507.77044677734375, "loss": 0.9994, "margin_dpo/margin_mean": 166.8475799560547, "margin_dpo/margin_std": 196.09652709960938, "step": 335 }, { "epoch": 0.49926578560939794, "fcm_dpo/beta": 0.00335273751989007, "fcm_dpo/delta": -0.06508271396160126, "fcm_dpo/margin": 196.6968536376953, "fcm_dpo/q_t": 0.3557378649711609, "grad_norm": 33.71820831298828, "learning_rate": 2.959373794541426e-07, "logits/chosen": -0.3746200203895569, "logits/rejected": -0.3606112599372864, "logps/chosen": -308.72821044921875, "logps/ref_chosen": -58.18162155151367, "logps/ref_rejected": -94.44358825683594, "logps/rejected": -541.68701171875, "loss": 0.9382, "margin_dpo/margin_mean": 196.6968536376953, "margin_dpo/margin_std": 204.6141357421875, "step": 340 }, { "epoch": 0.5066079295154186, "fcm_dpo/beta": 0.003242532955482602, "fcm_dpo/delta": -0.03586641699075699, "fcm_dpo/margin": 195.18536376953125, "fcm_dpo/q_t": 0.36007246375083923, "grad_norm": 31.20911979675293, "learning_rate": 2.896155456223163e-07, "logits/chosen": -0.4067712724208832, "logits/rejected": -0.3969467282295227, "logps/chosen": -296.33489990234375, "logps/ref_chosen": -57.9904899597168, "logps/ref_rejected": -99.11092376708984, "logps/rejected": -532.6407470703125, "loss": 0.9402, "margin_dpo/margin_mean": 195.18536376953125, "margin_dpo/margin_std": 193.4260711669922, "step": 345 }, { "epoch": 0.5139500734214391, "fcm_dpo/beta": 0.0032154968939721584, "fcm_dpo/delta": 0.021350596100091934, "fcm_dpo/margin": 180.19900512695312, "fcm_dpo/q_t": 0.3731505274772644, "grad_norm": 44.616703033447266, "learning_rate": 2.8326761550411346e-07, "logits/chosen": -0.38301217555999756, "logits/rejected": -0.37859946489334106, "logps/chosen": -335.93255615234375, "logps/ref_chosen": -58.29923629760742, "logps/ref_rejected": -89.25711822509766, "logps/rejected": -547.0894775390625, "loss": 0.9973, "margin_dpo/margin_mean": 180.19900512695312, "margin_dpo/margin_std": 214.97982788085938, "step": 350 }, { "epoch": 0.5212922173274597, "fcm_dpo/beta": 0.0032494659535586834, "fcm_dpo/delta": -0.003944025840610266, "fcm_dpo/margin": 185.5841522216797, "fcm_dpo/q_t": 0.3689090609550476, "grad_norm": 71.0094223022461, "learning_rate": 2.7689777072570284e-07, "logits/chosen": -0.4027875065803528, "logits/rejected": -0.3871976435184479, "logps/chosen": -334.85272216796875, "logps/ref_chosen": -60.788482666015625, "logps/ref_rejected": -85.94129943847656, "logps/rejected": -545.5897216796875, "loss": 0.9792, "margin_dpo/margin_mean": 185.5841522216797, "margin_dpo/margin_std": 211.45913696289062, "step": 355 }, { "epoch": 0.5286343612334802, "fcm_dpo/beta": 0.0032737895380705595, "fcm_dpo/delta": 0.027207667008042336, "fcm_dpo/margin": 163.24708557128906, "fcm_dpo/q_t": 0.3836524188518524, "grad_norm": 41.69169616699219, "learning_rate": 2.7051020734928443e-07, "logits/chosen": -0.37818944454193115, "logits/rejected": -0.3610234558582306, "logps/chosen": -335.9218444824219, "logps/ref_chosen": -57.6871337890625, "logps/ref_rejected": -80.62527465820312, "logps/rejected": -522.1071166992188, "loss": 1.0286, "margin_dpo/margin_mean": 163.24708557128906, "margin_dpo/margin_std": 207.1353759765625, "step": 360 }, { "epoch": 0.5359765051395007, "fcm_dpo/beta": 0.0032265144400298595, "fcm_dpo/delta": -0.0519678071141243, "fcm_dpo/margin": 200.8280029296875, "fcm_dpo/q_t": 0.35923275351524353, "grad_norm": 34.91660690307617, "learning_rate": 2.641091331089811e-07, "logits/chosen": -0.33706134557724, "logits/rejected": -0.3363405764102936, "logps/chosen": -300.1896667480469, "logps/ref_chosen": -51.490867614746094, "logps/ref_rejected": -91.02871704101562, "logps/rejected": -540.5555419921875, "loss": 0.9461, "margin_dpo/margin_mean": 200.82803344726562, "margin_dpo/margin_std": 215.3280487060547, "step": 365 }, { "epoch": 0.5433186490455213, "fcm_dpo/beta": 0.0032107695005834103, "fcm_dpo/delta": 0.05830075219273567, "fcm_dpo/margin": 169.78909301757812, "fcm_dpo/q_t": 0.37912872433662415, "grad_norm": 50.44911193847656, "learning_rate": 2.5769876463904263e-07, "logits/chosen": -0.3650849461555481, "logits/rejected": -0.35905131697654724, "logps/chosen": -303.07366943359375, "logps/ref_chosen": -58.113502502441406, "logps/ref_rejected": -89.43451690673828, "logps/rejected": -504.18377685546875, "loss": 1.0101, "margin_dpo/margin_mean": 169.78909301757812, "margin_dpo/margin_std": 197.232666015625, "step": 370 }, { "epoch": 0.5506607929515418, "fcm_dpo/beta": 0.003249173518270254, "fcm_dpo/delta": -0.03621614724397659, "fcm_dpo/margin": 194.74484252929688, "fcm_dpo/q_t": 0.3639177083969116, "grad_norm": 40.54216003417969, "learning_rate": 2.512833246961859e-07, "logits/chosen": -0.41527968645095825, "logits/rejected": -0.3979854881763458, "logps/chosen": -325.7327880859375, "logps/ref_chosen": -65.23600769042969, "logps/ref_rejected": -89.24995422363281, "logps/rejected": -544.4915771484375, "loss": 0.9763, "margin_dpo/margin_mean": 194.74484252929688, "margin_dpo/margin_std": 230.34060668945312, "step": 375 }, { "epoch": 0.5580029368575624, "fcm_dpo/beta": 0.003087093820795417, "fcm_dpo/delta": -0.08724673092365265, "fcm_dpo/margin": 220.7941131591797, "fcm_dpo/q_t": 0.3510977625846863, "grad_norm": 30.83326530456543, "learning_rate": 2.4486703937790243e-07, "logits/chosen": -0.35653212666511536, "logits/rejected": -0.36338263750076294, "logps/chosen": -309.36334228515625, "logps/ref_chosen": -53.33893966674805, "logps/ref_rejected": -102.15375518798828, "logps/rejected": -578.9722900390625, "loss": 0.9291, "margin_dpo/margin_mean": 220.79409790039062, "margin_dpo/margin_std": 231.2888641357422, "step": 380 }, { "epoch": 0.5653450807635829, "fcm_dpo/beta": 0.0030052317306399345, "fcm_dpo/delta": 0.018545908853411674, "fcm_dpo/margin": 193.73928833007812, "fcm_dpo/q_t": 0.37150126695632935, "grad_norm": 33.003395080566406, "learning_rate": 2.3845413533856514e-07, "logits/chosen": -0.3649640679359436, "logits/rejected": -0.3546612858772278, "logps/chosen": -344.1658630371094, "logps/ref_chosen": -58.36262130737305, "logps/ref_rejected": -89.44685363769531, "logps/rejected": -568.9893798828125, "loss": 0.9882, "margin_dpo/margin_mean": 193.73927307128906, "margin_dpo/margin_std": 221.2782440185547, "step": 385 }, { "epoch": 0.5726872246696035, "fcm_dpo/beta": 0.0029720370657742023, "fcm_dpo/delta": -0.022272076457738876, "fcm_dpo/margin": 208.7709503173828, "fcm_dpo/q_t": 0.36599045991897583, "grad_norm": 34.66581726074219, "learning_rate": 2.320488370051681e-07, "logits/chosen": -0.3872172236442566, "logits/rejected": -0.3775530159473419, "logps/chosen": -367.145263671875, "logps/ref_chosen": -56.380653381347656, "logps/ref_rejected": -90.47447204589844, "logps/rejected": -610.010009765625, "loss": 0.9741, "margin_dpo/margin_mean": 208.7709503173828, "margin_dpo/margin_std": 244.04592895507812, "step": 390 }, { "epoch": 0.580029368575624, "fcm_dpo/beta": 0.0030248172115534544, "fcm_dpo/delta": 0.0180402509868145, "fcm_dpo/margin": 192.4487762451172, "fcm_dpo/q_t": 0.37626224756240845, "grad_norm": 38.42062759399414, "learning_rate": 2.2565536379453404e-07, "logits/chosen": -0.40114063024520874, "logits/rejected": -0.39468201994895935, "logps/chosen": -349.0114440917969, "logps/ref_chosen": -55.95304489135742, "logps/ref_rejected": -87.13162994384766, "logps/rejected": -572.6387939453125, "loss": 1.0072, "margin_dpo/margin_mean": 192.4487762451172, "margin_dpo/margin_std": 244.62527465820312, "step": 395 }, { "epoch": 0.5873715124816447, "fcm_dpo/beta": 0.0028987762052565813, "fcm_dpo/delta": -0.08543933928012848, "fcm_dpo/margin": 234.6150665283203, "fcm_dpo/q_t": 0.3521673381328583, "grad_norm": 28.816965103149414, "learning_rate": 2.192779273338215e-07, "logits/chosen": -0.4242188036441803, "logits/rejected": -0.40835875272750854, "logps/chosen": -368.6856994628906, "logps/ref_chosen": -64.59160614013672, "logps/ref_rejected": -96.700927734375, "logps/rejected": -635.4100341796875, "loss": 0.9245, "margin_dpo/margin_mean": 234.6150665283203, "margin_dpo/margin_std": 239.69271850585938, "step": 400 }, { "epoch": 0.5873715124816447, "eval_fcm_dpo/beta": 0.003817289602011442, "eval_fcm_dpo/delta": 0.06625650823116302, "eval_fcm_dpo/margin": 134.67041015625, "eval_fcm_dpo/q_t": 0.39730218052864075, "eval_logits/chosen": -0.43868309259414673, "eval_logits/rejected": -0.4222582280635834, "eval_logps/chosen": -454.1283264160156, "eval_logps/ref_chosen": -79.05104064941406, "eval_logps/ref_rejected": -86.79793548583984, "eval_logps/rejected": -596.5455932617188, "eval_loss": 0.5729401111602783, "eval_margin_dpo/margin_mean": 134.67041015625, "eval_margin_dpo/margin_std": 251.6155242919922, "eval_runtime": 39.7615, "eval_samples_per_second": 58.826, "eval_steps_per_second": 1.861, "step": 400 }, { "epoch": 0.5947136563876652, "fcm_dpo/beta": 0.003784316824749112, "fcm_dpo/delta": -0.2645108699798584, "fcm_dpo/margin": 221.6338348388672, "fcm_dpo/q_t": 0.3333556056022644, "grad_norm": 62.11888885498047, "learning_rate": 2.129207286861638e-07, "logits/chosen": -0.40359169244766235, "logits/rejected": -0.39246273040771484, "logps/chosen": -352.8330078125, "logps/ref_chosen": -53.61777877807617, "logps/ref_rejected": -81.28938293457031, "logps/rejected": -602.1383666992188, "loss": 0.9244, "margin_dpo/margin_mean": 221.6338348388672, "margin_dpo/margin_std": 262.4577941894531, "step": 405 }, { "epoch": 0.6020558002936858, "fcm_dpo/beta": 0.0033690209966152906, "fcm_dpo/delta": -0.02751757577061653, "fcm_dpo/margin": 185.41323852539062, "fcm_dpo/q_t": 0.3654642701148987, "grad_norm": 36.665096282958984, "learning_rate": 2.065879555832674e-07, "logits/chosen": -0.4079107642173767, "logits/rejected": -0.3933170437812805, "logps/chosen": -350.5545654296875, "logps/ref_chosen": -58.9287223815918, "logps/ref_rejected": -85.55818176269531, "logps/rejected": -562.5972290039062, "loss": 0.9824, "margin_dpo/margin_mean": 185.41323852539062, "margin_dpo/margin_std": 221.5907440185547, "step": 410 }, { "epoch": 0.6093979441997063, "fcm_dpo/beta": 0.003266123589128256, "fcm_dpo/delta": -0.03337870165705681, "fcm_dpo/margin": 192.88467407226562, "fcm_dpo/q_t": 0.3626883924007416, "grad_norm": 39.306392669677734, "learning_rate": 2.002837796667909e-07, "logits/chosen": -0.382286936044693, "logits/rejected": -0.3848063051700592, "logps/chosen": -352.2253112792969, "logps/ref_chosen": -58.45662307739258, "logps/ref_rejected": -93.67063903808594, "logps/rejected": -580.3240356445312, "loss": 0.9633, "margin_dpo/margin_mean": 192.88467407226562, "margin_dpo/margin_std": 213.9280548095703, "step": 415 }, { "epoch": 0.6167400881057269, "fcm_dpo/beta": 0.003045933786779642, "fcm_dpo/delta": -0.07158443331718445, "fcm_dpo/margin": 218.24789428710938, "fcm_dpo/q_t": 0.3554866909980774, "grad_norm": 52.39727020263672, "learning_rate": 1.9401235374032425e-07, "logits/chosen": -0.4089987874031067, "logits/rejected": -0.3841163218021393, "logps/chosen": -361.6821594238281, "logps/ref_chosen": -64.2349853515625, "logps/ref_rejected": -84.86299133300781, "logps/rejected": -600.5580444335938, "loss": 0.9405, "margin_dpo/margin_mean": 218.24789428710938, "margin_dpo/margin_std": 232.1405487060547, "step": 420 }, { "epoch": 0.6240822320117474, "fcm_dpo/beta": 0.0030709414277225733, "fcm_dpo/delta": 0.03329915553331375, "fcm_dpo/margin": 185.03384399414062, "fcm_dpo/q_t": 0.37453344464302063, "grad_norm": 36.52717590332031, "learning_rate": 1.8777780903377732e-07, "logits/chosen": -0.41324177384376526, "logits/rejected": -0.39911994338035583, "logps/chosen": -333.1438293457031, "logps/ref_chosen": -56.054161071777344, "logps/ref_rejected": -85.17119598388672, "logps/rejected": -547.294677734375, "loss": 1.0006, "margin_dpo/margin_mean": 185.03384399414062, "margin_dpo/margin_std": 220.0364990234375, "step": 425 }, { "epoch": 0.631424375917768, "fcm_dpo/beta": 0.003164885099977255, "fcm_dpo/delta": 0.03732316568493843, "fcm_dpo/margin": 178.30136108398438, "fcm_dpo/q_t": 0.37670689821243286, "grad_norm": 40.842708587646484, "learning_rate": 1.8158425248197928e-07, "logits/chosen": -0.4269745945930481, "logits/rejected": -0.40301522612571716, "logps/chosen": -347.55450439453125, "logps/ref_chosen": -69.24568176269531, "logps/ref_rejected": -91.8664321899414, "logps/rejected": -548.4766235351562, "loss": 1.0109, "margin_dpo/margin_mean": 178.30136108398438, "margin_dpo/margin_std": 218.34555053710938, "step": 430 }, { "epoch": 0.6387665198237885, "fcm_dpo/beta": 0.003134062048047781, "fcm_dpo/delta": -0.024000858888030052, "fcm_dpo/margin": 198.20034790039062, "fcm_dpo/q_t": 0.3648958206176758, "grad_norm": 33.455562591552734, "learning_rate": 1.7543576401928218e-07, "logits/chosen": -0.38360780477523804, "logits/rejected": -0.3695995807647705, "logps/chosen": -344.5923767089844, "logps/ref_chosen": -60.03449630737305, "logps/ref_rejected": -90.6872329711914, "logps/rejected": -573.4454345703125, "loss": 0.9696, "margin_dpo/margin_mean": 198.2003631591797, "margin_dpo/margin_std": 223.5625457763672, "step": 435 }, { "epoch": 0.6461086637298091, "fcm_dpo/beta": 0.0031414516270160675, "fcm_dpo/delta": 0.008422891609370708, "fcm_dpo/margin": 188.22352600097656, "fcm_dpo/q_t": 0.3708716332912445, "grad_norm": 39.6731071472168, "learning_rate": 1.6933639389195134e-07, "logits/chosen": -0.4058264195919037, "logits/rejected": -0.38181501626968384, "logps/chosen": -336.13507080078125, "logps/ref_chosen": -65.50349426269531, "logps/ref_rejected": -85.66627502441406, "logps/rejected": -544.5213623046875, "loss": 0.9793, "margin_dpo/margin_mean": 188.22352600097656, "margin_dpo/margin_std": 209.7587127685547, "step": 440 }, { "epoch": 0.6534508076358296, "fcm_dpo/beta": 0.00310450023971498, "fcm_dpo/delta": -0.04807734861969948, "fcm_dpo/margin": 207.53652954101562, "fcm_dpo/q_t": 0.36341750621795654, "grad_norm": 31.921981811523438, "learning_rate": 1.6329015999011182e-07, "logits/chosen": -0.3922547996044159, "logits/rejected": -0.37669098377227783, "logps/chosen": -361.6647033691406, "logps/ref_chosen": -60.72443389892578, "logps/ref_rejected": -89.9255142211914, "logps/rejected": -598.40234375, "loss": 0.9762, "margin_dpo/margin_mean": 207.53652954101562, "margin_dpo/margin_std": 250.76455688476562, "step": 445 }, { "epoch": 0.6607929515418502, "fcm_dpo/beta": 0.002883550710976124, "fcm_dpo/delta": -0.08991138637065887, "fcm_dpo/margin": 237.09793090820312, "fcm_dpo/q_t": 0.35191330313682556, "grad_norm": 38.20564270019531, "learning_rate": 1.573010452010098e-07, "logits/chosen": -0.378944993019104, "logits/rejected": -0.3763580918312073, "logps/chosen": -349.90106201171875, "logps/ref_chosen": -59.96248245239258, "logps/ref_rejected": -98.71509552001953, "logps/rejected": -625.751708984375, "loss": 0.924, "margin_dpo/margin_mean": 237.09793090820312, "margin_dpo/margin_std": 247.1533660888672, "step": 450 }, { "epoch": 0.6681350954478708, "fcm_dpo/beta": 0.002764140721410513, "fcm_dpo/delta": -0.03560318797826767, "fcm_dpo/margin": 229.11276245117188, "fcm_dpo/q_t": 0.362155020236969, "grad_norm": 35.098609924316406, "learning_rate": 1.5137299478533064e-07, "logits/chosen": -0.3881308138370514, "logits/rejected": -0.3764522671699524, "logps/chosen": -346.89105224609375, "logps/ref_chosen": -54.48131561279297, "logps/ref_rejected": -90.6321029663086, "logps/rejected": -612.1546630859375, "loss": 0.9596, "margin_dpo/margin_mean": 229.11276245117188, "margin_dpo/margin_std": 254.59921264648438, "step": 455 }, { "epoch": 0.6754772393538914, "fcm_dpo/beta": 0.0025613114703446627, "fcm_dpo/delta": -0.05846347659826279, "fcm_dpo/margin": 255.3362274169922, "fcm_dpo/q_t": 0.3553471565246582, "grad_norm": 46.92758560180664, "learning_rate": 1.4550991377830423e-07, "logits/chosen": -0.4343814253807068, "logits/rejected": -0.4337409436702728, "logps/chosen": -382.7040100097656, "logps/ref_chosen": -52.97611618041992, "logps/ref_rejected": -95.65971374511719, "logps/rejected": -680.7238159179688, "loss": 0.9276, "margin_dpo/margin_mean": 255.3362579345703, "margin_dpo/margin_std": 251.95751953125, "step": 460 }, { "epoch": 0.6828193832599119, "fcm_dpo/beta": 0.0025722947902977467, "fcm_dpo/delta": -0.020649045705795288, "fcm_dpo/margin": 240.62417602539062, "fcm_dpo/q_t": 0.36578595638275146, "grad_norm": 36.07568359375, "learning_rate": 1.3971566441730714e-07, "logits/chosen": -0.46626290678977966, "logits/rejected": -0.47067561745643616, "logps/chosen": -415.87188720703125, "logps/ref_chosen": -58.2827033996582, "logps/ref_rejected": -94.15567779541016, "logps/rejected": -692.3690185546875, "loss": 0.9691, "margin_dpo/margin_mean": 240.62417602539062, "margin_dpo/margin_std": 267.12054443359375, "step": 465 }, { "epoch": 0.6901615271659325, "fcm_dpo/beta": 0.0024534829426556826, "fcm_dpo/delta": -0.02453800104558468, "fcm_dpo/margin": 253.6896209716797, "fcm_dpo/q_t": 0.36288315057754517, "grad_norm": 28.073619842529297, "learning_rate": 1.339940635976592e-07, "logits/chosen": -0.5113226175308228, "logits/rejected": -0.49717050790786743, "logps/chosen": -398.2627868652344, "logps/ref_chosen": -62.69774627685547, "logps/ref_rejected": -96.33873748779297, "logps/rejected": -685.5933837890625, "loss": 0.9596, "margin_dpo/margin_mean": 253.6896209716797, "margin_dpo/margin_std": 270.79937744140625, "step": 470 }, { "epoch": 0.697503671071953, "fcm_dpo/beta": 0.002478546230122447, "fcm_dpo/delta": 0.038639336824417114, "fcm_dpo/margin": 227.10226440429688, "fcm_dpo/q_t": 0.3776451051235199, "grad_norm": 29.91801643371582, "learning_rate": 1.2834888035828596e-07, "logits/chosen": -0.5382334589958191, "logits/rejected": -0.5220173597335815, "logps/chosen": -366.8069763183594, "logps/ref_chosen": -61.12194061279297, "logps/ref_rejected": -92.42192077636719, "logps/rejected": -625.2091674804688, "loss": 1.0046, "margin_dpo/margin_mean": 227.10226440429688, "margin_dpo/margin_std": 276.1416320800781, "step": 475 }, { "epoch": 0.7048458149779736, "fcm_dpo/beta": 0.0025619766674935818, "fcm_dpo/delta": 0.04407358169555664, "fcm_dpo/margin": 217.63076782226562, "fcm_dpo/q_t": 0.37629732489585876, "grad_norm": 55.00226974487305, "learning_rate": 1.227838333989088e-07, "logits/chosen": -0.5085734128952026, "logits/rejected": -0.49212780594825745, "logps/chosen": -347.72064208984375, "logps/ref_chosen": -53.550628662109375, "logps/ref_rejected": -82.65167999267578, "logps/rejected": -594.4524536132812, "loss": 1.001, "margin_dpo/margin_mean": 217.63070678710938, "margin_dpo/margin_std": 248.6515350341797, "step": 480 }, { "epoch": 0.7121879588839941, "fcm_dpo/beta": 0.0026028165593743324, "fcm_dpo/delta": -0.024844055995345116, "fcm_dpo/margin": 239.23391723632812, "fcm_dpo/q_t": 0.3650123178958893, "grad_norm": 38.28968811035156, "learning_rate": 1.1730258863039347e-07, "logits/chosen": -0.5096001029014587, "logits/rejected": -0.5005736351013184, "logps/chosen": -372.1683654785156, "logps/ref_chosen": -60.76704788208008, "logps/ref_rejected": -92.70649719238281, "logps/rejected": -643.341796875, "loss": 0.9685, "margin_dpo/margin_mean": 239.23391723632812, "margin_dpo/margin_std": 269.9613037109375, "step": 485 }, { "epoch": 0.7195301027900147, "fcm_dpo/beta": 0.002558878855779767, "fcm_dpo/delta": 0.009935403242707253, "fcm_dpo/margin": 230.77432250976562, "fcm_dpo/q_t": 0.3699950575828552, "grad_norm": 33.77944564819336, "learning_rate": 1.1190875675987355e-07, "logits/chosen": -0.5016877055168152, "logits/rejected": -0.4948623776435852, "logps/chosen": -340.79034423828125, "logps/ref_chosen": -53.78142547607422, "logps/ref_rejected": -91.67438507080078, "logps/rejected": -609.4576416015625, "loss": 0.977, "margin_dpo/margin_mean": 230.77432250976562, "margin_dpo/margin_std": 254.27835083007812, "step": 490 }, { "epoch": 0.7268722466960352, "fcm_dpo/beta": 0.002750278217718005, "fcm_dpo/delta": 0.05883105471730232, "fcm_dpo/margin": 196.99557495117188, "fcm_dpo/q_t": 0.38119006156921387, "grad_norm": 49.648441314697266, "learning_rate": 1.0660589091223854e-07, "logits/chosen": -0.501973032951355, "logits/rejected": -0.49266910552978516, "logps/chosen": -345.7253112792969, "logps/ref_chosen": -58.9004020690918, "logps/ref_rejected": -82.15424346923828, "logps/rejected": -565.9747314453125, "loss": 1.021, "margin_dpo/margin_mean": 196.99557495117188, "margin_dpo/margin_std": 244.4354248046875, "step": 495 }, { "epoch": 0.7342143906020558, "fcm_dpo/beta": 0.002816407708451152, "fcm_dpo/delta": 0.02024873159825802, "fcm_dpo/margin": 205.9034423828125, "fcm_dpo/q_t": 0.37294405698776245, "grad_norm": 48.712711334228516, "learning_rate": 1.0139748428955333e-07, "logits/chosen": -0.5191727876663208, "logits/rejected": -0.5140315294265747, "logps/chosen": -387.0251159667969, "logps/ref_chosen": -62.13483810424805, "logps/ref_rejected": -87.91773223876953, "logps/rejected": -618.7114868164062, "loss": 0.9971, "margin_dpo/margin_mean": 205.90341186523438, "margin_dpo/margin_std": 241.8444366455078, "step": 500 }, { "epoch": 0.7415565345080763, "fcm_dpo/beta": 0.002710042055696249, "fcm_dpo/delta": -0.02353382483124733, "fcm_dpo/margin": 229.17935180664062, "fcm_dpo/q_t": 0.36337199807167053, "grad_norm": 56.58249282836914, "learning_rate": 9.628696786995188e-08, "logits/chosen": -0.5033208131790161, "logits/rejected": -0.48864540457725525, "logps/chosen": -374.4068298339844, "logps/ref_chosen": -62.631813049316406, "logps/ref_rejected": -87.59168243408203, "logps/rejected": -628.546142578125, "loss": 0.9598, "margin_dpo/margin_mean": 229.17935180664062, "margin_dpo/margin_std": 242.96011352539062, "step": 505 }, { "epoch": 0.748898678414097, "fcm_dpo/beta": 0.0027464856393635273, "fcm_dpo/delta": 0.013351870700716972, "fcm_dpo/margin": 213.7869873046875, "fcm_dpo/q_t": 0.37243741750717163, "grad_norm": 31.705190658569336, "learning_rate": 9.127770814751932e-08, "logits/chosen": -0.47533559799194336, "logits/rejected": -0.4699474275112152, "logps/chosen": -376.91949462890625, "logps/ref_chosen": -60.552574157714844, "logps/ref_rejected": -91.0874252319336, "logps/rejected": -621.2413330078125, "loss": 0.9892, "margin_dpo/margin_mean": 213.7869873046875, "margin_dpo/margin_std": 251.62619018554688, "step": 510 }, { "epoch": 0.7562408223201175, "fcm_dpo/beta": 0.0027470688801258802, "fcm_dpo/delta": 0.029249707236886024, "fcm_dpo/margin": 208.3932647705078, "fcm_dpo/q_t": 0.37528282403945923, "grad_norm": 40.00129318237305, "learning_rate": 8.637300491465272e-08, "logits/chosen": -0.4838427007198334, "logits/rejected": -0.4740561544895172, "logps/chosen": -362.11346435546875, "logps/ref_chosen": -60.9382438659668, "logps/ref_rejected": -83.36767578125, "logps/rejected": -592.9361572265625, "loss": 1.0079, "margin_dpo/margin_mean": 208.3932647705078, "margin_dpo/margin_std": 256.8492736816406, "step": 515 }, { "epoch": 0.7635829662261381, "fcm_dpo/beta": 0.002783264499157667, "fcm_dpo/delta": -0.007280466612428427, "fcm_dpo/margin": 217.810302734375, "fcm_dpo/q_t": 0.3678578734397888, "grad_norm": 32.118003845214844, "learning_rate": 8.15760890883607e-08, "logits/chosen": -0.48051896691322327, "logits/rejected": -0.4790908396244049, "logps/chosen": -364.2611999511719, "logps/ref_chosen": -65.47642517089844, "logps/ref_rejected": -98.10872650146484, "logps/rejected": -614.7037963867188, "loss": 0.972, "margin_dpo/margin_mean": 217.810302734375, "margin_dpo/margin_std": 243.54904174804688, "step": 520 }, { "epoch": 0.7709251101321586, "fcm_dpo/beta": 0.002879193751141429, "fcm_dpo/delta": 0.00723473122343421, "fcm_dpo/margin": 205.75973510742188, "fcm_dpo/q_t": 0.368082195520401, "grad_norm": 43.291744232177734, "learning_rate": 7.689012058193384e-08, "logits/chosen": -0.49591636657714844, "logits/rejected": -0.4880523085594177, "logps/chosen": -354.49664306640625, "logps/ref_chosen": -59.072021484375, "logps/ref_rejected": -82.8821792602539, "logps/rejected": -584.0665283203125, "loss": 0.9739, "margin_dpo/margin_mean": 205.7597198486328, "margin_dpo/margin_std": 222.150390625, "step": 525 }, { "epoch": 0.7782672540381792, "fcm_dpo/beta": 0.002823830349370837, "fcm_dpo/delta": -0.012813677079975605, "fcm_dpo/margin": 216.4855499267578, "fcm_dpo/q_t": 0.36813658475875854, "grad_norm": 35.54737091064453, "learning_rate": 7.231818622338822e-08, "logits/chosen": -0.5179253816604614, "logits/rejected": -0.5039080381393433, "logps/chosen": -397.35479736328125, "logps/ref_chosen": -61.11234664916992, "logps/ref_rejected": -87.01112365722656, "logps/rejected": -639.7390747070312, "loss": 0.985, "margin_dpo/margin_mean": 216.4855499267578, "margin_dpo/margin_std": 256.6390380859375, "step": 530 }, { "epoch": 0.7856093979441997, "fcm_dpo/beta": 0.0028174181934446096, "fcm_dpo/delta": -0.0029197714757174253, "fcm_dpo/margin": 213.47830200195312, "fcm_dpo/q_t": 0.3706435561180115, "grad_norm": 48.269222259521484, "learning_rate": 6.786329772205246e-08, "logits/chosen": -0.5222650766372681, "logits/rejected": -0.5029199719429016, "logps/chosen": -414.7535705566406, "logps/ref_chosen": -60.96736526489258, "logps/ref_rejected": -81.727294921875, "logps/rejected": -648.9918212890625, "loss": 0.9981, "margin_dpo/margin_mean": 213.4783172607422, "margin_dpo/margin_std": 261.0979919433594, "step": 535 }, { "epoch": 0.7929515418502202, "fcm_dpo/beta": 0.002668160479515791, "fcm_dpo/delta": -0.07808558642864227, "fcm_dpo/margin": 251.76071166992188, "fcm_dpo/q_t": 0.35538631677627563, "grad_norm": 36.01322937011719, "learning_rate": 6.352838968463919e-08, "logits/chosen": -0.5208684206008911, "logits/rejected": -0.5084649324417114, "logps/chosen": -376.98809814453125, "logps/ref_chosen": -58.64385986328125, "logps/ref_rejected": -90.40965270996094, "logps/rejected": -660.5145874023438, "loss": 0.9397, "margin_dpo/margin_mean": 251.76071166992188, "margin_dpo/margin_std": 272.35394287109375, "step": 540 }, { "epoch": 0.8002936857562408, "fcm_dpo/beta": 0.002645547967404127, "fcm_dpo/delta": 0.01414306741207838, "fcm_dpo/margin": 221.38949584960938, "fcm_dpo/q_t": 0.3734440207481384, "grad_norm": 34.583797454833984, "learning_rate": 5.9316317682106294e-08, "logits/chosen": -0.5391398668289185, "logits/rejected": -0.5384151339530945, "logps/chosen": -415.88970947265625, "logps/ref_chosen": -64.73474884033203, "logps/ref_rejected": -100.44208526611328, "logps/rejected": -672.986572265625, "loss": 0.9969, "margin_dpo/margin_mean": 221.38949584960938, "margin_dpo/margin_std": 267.8731384277344, "step": 545 }, { "epoch": 0.8076358296622613, "fcm_dpo/beta": 0.002687679138034582, "fcm_dpo/delta": 0.049287859350442886, "fcm_dpo/margin": 205.9238739013672, "fcm_dpo/q_t": 0.37848031520843506, "grad_norm": 35.05332565307617, "learning_rate": 5.5229856368582376e-08, "logits/chosen": -0.5319250226020813, "logits/rejected": -0.523718535900116, "logps/chosen": -401.68536376953125, "logps/ref_chosen": -59.13951873779297, "logps/ref_rejected": -87.15635681152344, "logps/rejected": -635.6260986328125, "loss": 1.009, "margin_dpo/margin_mean": 205.9238739013672, "margin_dpo/margin_std": 250.6122283935547, "step": 550 }, { "epoch": 0.8149779735682819, "fcm_dpo/beta": 0.002561022061854601, "fcm_dpo/delta": -0.10589297860860825, "fcm_dpo/margin": 271.74420166015625, "fcm_dpo/q_t": 0.3499818444252014, "grad_norm": 34.610145568847656, "learning_rate": 5.127169765359515e-08, "logits/chosen": -0.5212115049362183, "logits/rejected": -0.5161057114601135, "logps/chosen": -394.1856384277344, "logps/ref_chosen": -62.1995849609375, "logps/ref_rejected": -102.51883697509766, "logps/rejected": -706.2490844726562, "loss": 0.9221, "margin_dpo/margin_mean": 271.74420166015625, "margin_dpo/margin_std": 282.8084411621094, "step": 555 }, { "epoch": 0.8223201174743024, "fcm_dpo/beta": 0.0024848259054124355, "fcm_dpo/delta": 0.015132618136703968, "fcm_dpo/margin": 235.41403198242188, "fcm_dpo/q_t": 0.3717556595802307, "grad_norm": 34.969547271728516, "learning_rate": 4.7444448928806615e-08, "logits/chosen": -0.5141924619674683, "logits/rejected": -0.5002972483634949, "logps/chosen": -410.2688903808594, "logps/ref_chosen": -61.541908264160156, "logps/ref_rejected": -97.37491607666016, "logps/rejected": -681.5159912109375, "loss": 0.983, "margin_dpo/margin_mean": 235.41403198242188, "margin_dpo/margin_std": 259.69488525390625, "step": 560 }, { "epoch": 0.8296622613803231, "fcm_dpo/beta": 0.0025754589587450027, "fcm_dpo/delta": 0.02127731405198574, "fcm_dpo/margin": 224.775634765625, "fcm_dpo/q_t": 0.37311750650405884, "grad_norm": 35.28373718261719, "learning_rate": 4.375063135042445e-08, "logits/chosen": -0.48246508836746216, "logits/rejected": -0.4706944525241852, "logps/chosen": -427.0626525878906, "logps/ref_chosen": -62.85475540161133, "logps/ref_rejected": -93.8392105102539, "logps/rejected": -682.8226318359375, "loss": 0.9922, "margin_dpo/margin_mean": 224.775634765625, "margin_dpo/margin_std": 259.5643005371094, "step": 565 }, { "epoch": 0.8370044052863436, "fcm_dpo/beta": 0.0025527041871100664, "fcm_dpo/delta": -0.007842612452805042, "fcm_dpo/margin": 237.66757202148438, "fcm_dpo/q_t": 0.3671664595603943, "grad_norm": 32.344730377197266, "learning_rate": 4.019267817841834e-08, "logits/chosen": -0.49153321981430054, "logits/rejected": -0.4876040816307068, "logps/chosen": -414.8893127441406, "logps/ref_chosen": -57.98622512817383, "logps/ref_rejected": -87.91555786132812, "logps/rejected": -682.4862060546875, "loss": 0.9792, "margin_dpo/margin_mean": 237.66757202148438, "margin_dpo/margin_std": 269.2391052246094, "step": 570 }, { "epoch": 0.8443465491923642, "fcm_dpo/beta": 0.002516696462407708, "fcm_dpo/delta": -0.01592089980840683, "fcm_dpo/margin": 243.9696044921875, "fcm_dpo/q_t": 0.36822399497032166, "grad_norm": 46.91700744628906, "learning_rate": 3.677293317363864e-08, "logits/chosen": -0.4880523085594177, "logits/rejected": -0.4857984483242035, "logps/chosen": -424.9076232910156, "logps/ref_chosen": -55.194114685058594, "logps/ref_rejected": -89.68229675292969, "logps/rejected": -703.3653564453125, "loss": 0.9947, "margin_dpo/margin_mean": 243.9696044921875, "margin_dpo/margin_std": 302.0916748046875, "step": 575 }, { "epoch": 0.8516886930983847, "fcm_dpo/beta": 0.0025378705468028784, "fcm_dpo/delta": 0.005721461959183216, "fcm_dpo/margin": 233.96383666992188, "fcm_dpo/q_t": 0.36850807070732117, "grad_norm": 26.867847442626953, "learning_rate": 3.349364905389032e-08, "logits/chosen": -0.49183306097984314, "logits/rejected": -0.4879623055458069, "logps/chosen": -380.11614990234375, "logps/ref_chosen": -54.605796813964844, "logps/ref_rejected": -88.9614486694336, "logps/rejected": -648.4356689453125, "loss": 0.9794, "margin_dpo/margin_mean": 233.9638214111328, "margin_dpo/margin_std": 259.25726318359375, "step": 580 }, { "epoch": 0.8590308370044053, "fcm_dpo/beta": 0.002550755860283971, "fcm_dpo/delta": 0.00011723488569259644, "fcm_dpo/margin": 234.6815185546875, "fcm_dpo/q_t": 0.36916983127593994, "grad_norm": 38.553314208984375, "learning_rate": 3.035698600998121e-08, "logits/chosen": -0.52159583568573, "logits/rejected": -0.5172958970069885, "logps/chosen": -393.91827392578125, "logps/ref_chosen": -59.03770065307617, "logps/ref_rejected": -94.4029541015625, "logps/rejected": -663.9650268554688, "loss": 0.9843, "margin_dpo/margin_mean": 234.6815185546875, "margin_dpo/margin_std": 265.8410339355469, "step": 585 }, { "epoch": 0.8663729809104258, "fcm_dpo/beta": 0.0025425164494663477, "fcm_dpo/delta": -0.02153742127120495, "fcm_dpo/margin": 243.96188354492188, "fcm_dpo/q_t": 0.36495503783226013, "grad_norm": 31.69843101501465, "learning_rate": 2.736501028272095e-08, "logits/chosen": -0.500575602054596, "logits/rejected": -0.5034629702568054, "logps/chosen": -374.412353515625, "logps/ref_chosen": -53.5163688659668, "logps/ref_rejected": -99.90290832519531, "logps/rejected": -664.7607421875, "loss": 0.9663, "margin_dpo/margin_mean": 243.96188354492188, "margin_dpo/margin_std": 272.521728515625, "step": 590 }, { "epoch": 0.8737151248164464, "fcm_dpo/beta": 0.002531964797526598, "fcm_dpo/delta": 0.040782030671834946, "fcm_dpo/margin": 221.4866485595703, "fcm_dpo/q_t": 0.37717491388320923, "grad_norm": 27.417510986328125, "learning_rate": 2.451969280180849e-08, "logits/chosen": -0.4844892621040344, "logits/rejected": -0.463656485080719, "logps/chosen": -368.6888427734375, "logps/ref_chosen": -51.44538497924805, "logps/ref_rejected": -77.43083190917969, "logps/rejected": -616.1609497070312, "loss": 1.0148, "margin_dpo/margin_mean": 221.4866485595703, "margin_dpo/margin_std": 274.03509521484375, "step": 595 }, { "epoch": 0.8810572687224669, "fcm_dpo/beta": 0.002594124060124159, "fcm_dpo/delta": 0.02012869343161583, "fcm_dpo/margin": 223.6268310546875, "fcm_dpo/q_t": 0.3721489906311035, "grad_norm": 38.589534759521484, "learning_rate": 2.1822907887504932e-08, "logits/chosen": -0.4984382688999176, "logits/rejected": -0.4893369674682617, "logps/chosen": -383.90936279296875, "logps/ref_chosen": -57.161705017089844, "logps/ref_rejected": -87.73274230957031, "logps/rejected": -638.107177734375, "loss": 0.9964, "margin_dpo/margin_mean": 223.6268310546875, "margin_dpo/margin_std": 261.2717590332031, "step": 600 }, { "epoch": 0.8810572687224669, "eval_fcm_dpo/beta": 0.003444387810304761, "eval_fcm_dpo/delta": 0.05230085551738739, "eval_fcm_dpo/margin": 156.23291015625, "eval_fcm_dpo/q_t": 0.39142969250679016, "eval_logits/chosen": -0.5275665521621704, "eval_logits/rejected": -0.5133021473884583, "eval_logps/chosen": -459.30078125, "eval_logps/ref_chosen": -79.05104064941406, "eval_logps/ref_rejected": -86.79793548583984, "eval_logps/rejected": -623.2805786132812, "eval_loss": 0.55719393491745, "eval_margin_dpo/margin_mean": 156.23291015625, "eval_margin_dpo/margin_std": 271.4756774902344, "eval_runtime": 39.7472, "eval_samples_per_second": 58.847, "eval_steps_per_second": 1.862, "step": 600 }, { "epoch": 0.8883994126284875, "fcm_dpo/beta": 0.0032743208575993776, "fcm_dpo/delta": -0.2591201961040497, "fcm_dpo/margin": 255.2403106689453, "fcm_dpo/q_t": 0.32824546098709106, "grad_norm": 34.93064498901367, "learning_rate": 1.9276432015946446e-08, "logits/chosen": -0.5040395855903625, "logits/rejected": -0.4974172115325928, "logps/chosen": -369.64263916015625, "logps/ref_chosen": -58.169830322265625, "logps/ref_rejected": -95.36891174316406, "logps/rejected": -662.0820922851562, "loss": 0.8815, "margin_dpo/margin_mean": 255.2403106689453, "margin_dpo/margin_std": 277.72467041015625, "step": 605 }, { "epoch": 0.895741556534508, "fcm_dpo/beta": 0.0028547747060656548, "fcm_dpo/delta": -0.07051874697208405, "fcm_dpo/margin": 233.07736206054688, "fcm_dpo/q_t": 0.3554316461086273, "grad_norm": 36.279544830322266, "learning_rate": 1.6881942648911074e-08, "logits/chosen": -0.5211396813392639, "logits/rejected": -0.5063233971595764, "logps/chosen": -370.6011047363281, "logps/ref_chosen": -58.97087860107422, "logps/ref_rejected": -89.0286865234375, "logps/rejected": -633.7362670898438, "loss": 0.9375, "margin_dpo/margin_mean": 233.07736206054688, "margin_dpo/margin_std": 247.29598999023438, "step": 610 }, { "epoch": 0.9030837004405287, "fcm_dpo/beta": 0.0026367397513240576, "fcm_dpo/delta": -0.06413199007511139, "fcm_dpo/margin": 249.89028930664062, "fcm_dpo/q_t": 0.3541339635848999, "grad_norm": 35.62370300292969, "learning_rate": 1.4641017128809801e-08, "logits/chosen": -0.4915865361690521, "logits/rejected": -0.5005960464477539, "logps/chosen": -372.3035583496094, "logps/ref_chosen": -58.081878662109375, "logps/ref_rejected": -95.92155456542969, "logps/rejected": -660.0335083007812, "loss": 0.9241, "margin_dpo/margin_mean": 249.89022827148438, "margin_dpo/margin_std": 243.20175170898438, "step": 615 }, { "epoch": 0.9104258443465492, "fcm_dpo/beta": 0.002739082556217909, "fcm_dpo/delta": 0.0633806362748146, "fcm_dpo/margin": 196.49574279785156, "fcm_dpo/q_t": 0.3842490315437317, "grad_norm": 41.11799240112305, "learning_rate": 1.2555131639630567e-08, "logits/chosen": -0.4931033253669739, "logits/rejected": -0.477125346660614, "logps/chosen": -404.1942138671875, "logps/ref_chosen": -62.203094482421875, "logps/ref_rejected": -80.53683471679688, "logps/rejected": -619.023681640625, "loss": 1.0346, "margin_dpo/margin_mean": 196.49574279785156, "margin_dpo/margin_std": 260.45947265625, "step": 620 }, { "epoch": 0.9177679882525698, "fcm_dpo/beta": 0.0026761619374156, "fcm_dpo/delta": -0.042652104049921036, "fcm_dpo/margin": 238.6814727783203, "fcm_dpo/q_t": 0.3606039881706238, "grad_norm": 37.73185348510742, "learning_rate": 1.0625660234518913e-08, "logits/chosen": -0.49364280700683594, "logits/rejected": -0.48199111223220825, "logps/chosen": -384.24359130859375, "logps/ref_chosen": -61.727455139160156, "logps/ref_rejected": -88.4387435913086, "logps/rejected": -649.6363525390625, "loss": 0.9522, "margin_dpo/margin_mean": 238.68148803710938, "margin_dpo/margin_std": 257.99114990234375, "step": 625 }, { "epoch": 0.9251101321585903, "fcm_dpo/beta": 0.002640167949721217, "fcm_dpo/delta": 0.01016196422278881, "fcm_dpo/margin": 223.45654296875, "fcm_dpo/q_t": 0.3729027211666107, "grad_norm": 40.86648178100586, "learning_rate": 8.85387393063622e-09, "logits/chosen": -0.4925254285335541, "logits/rejected": -0.48594093322753906, "logps/chosen": -378.8038024902344, "logps/ref_chosen": -61.30865478515625, "logps/ref_rejected": -96.54997253417969, "logps/rejected": -637.5016479492188, "loss": 0.9872, "margin_dpo/margin_mean": 223.45654296875, "margin_dpo/margin_std": 258.1217956542969, "step": 630 }, { "epoch": 0.9324522760646109, "fcm_dpo/beta": 0.0027915460523217916, "fcm_dpo/delta": 0.07615941017866135, "fcm_dpo/margin": 189.08535766601562, "fcm_dpo/q_t": 0.38567864894866943, "grad_norm": 37.90126419067383, "learning_rate": 7.240939871891699e-09, "logits/chosen": -0.49808454513549805, "logits/rejected": -0.4813234806060791, "logps/chosen": -401.56622314453125, "logps/ref_chosen": -63.7315673828125, "logps/ref_rejected": -89.66435241699219, "logps/rejected": -616.5842895507812, "loss": 1.0422, "margin_dpo/margin_mean": 189.08535766601562, "margin_dpo/margin_std": 255.0254364013672, "step": 635 }, { "epoch": 0.9397944199706314, "fcm_dpo/beta": 0.002841885667294264, "fcm_dpo/delta": -0.047639258205890656, "fcm_dpo/margin": 226.51620483398438, "fcm_dpo/q_t": 0.35998988151550293, "grad_norm": 34.88581085205078, "learning_rate": 5.7879205600998296e-09, "logits/chosen": -0.4770120084285736, "logits/rejected": -0.46177831292152405, "logps/chosen": -379.24822998046875, "logps/ref_chosen": -59.17915725708008, "logps/ref_rejected": -88.51210021972656, "logps/rejected": -635.0973510742188, "loss": 0.9492, "margin_dpo/margin_mean": 226.51620483398438, "margin_dpo/margin_std": 240.34689331054688, "step": 640 }, { "epoch": 0.947136563876652, "fcm_dpo/beta": 0.0027807278092950583, "fcm_dpo/delta": 0.045675117522478104, "fcm_dpo/margin": 200.19139099121094, "fcm_dpo/q_t": 0.3824850618839264, "grad_norm": 49.77201461791992, "learning_rate": 4.495773155069299e-09, "logits/chosen": -0.5024424195289612, "logits/rejected": -0.4973903298377991, "logps/chosen": -392.8785400390625, "logps/ref_chosen": -59.50596237182617, "logps/ref_rejected": -93.92404174804688, "logps/rejected": -627.488037109375, "loss": 1.0362, "margin_dpo/margin_mean": 200.19139099121094, "margin_dpo/margin_std": 274.2567138671875, "step": 645 }, { "epoch": 0.9544787077826725, "fcm_dpo/beta": 0.0028442046605050564, "fcm_dpo/delta": -0.03724042326211929, "fcm_dpo/margin": 222.9950408935547, "fcm_dpo/q_t": 0.3662676215171814, "grad_norm": 32.262725830078125, "learning_rate": 3.3653488440851253e-09, "logits/chosen": -0.4864223003387451, "logits/rejected": -0.4813409745693207, "logps/chosen": -373.74139404296875, "logps/ref_chosen": -57.774566650390625, "logps/ref_rejected": -89.61600494384766, "logps/rejected": -628.5778198242188, "loss": 0.983, "margin_dpo/margin_mean": 222.9950408935547, "margin_dpo/margin_std": 272.8062438964844, "step": 650 }, { "epoch": 0.9618208516886931, "fcm_dpo/beta": 0.002694058697670698, "fcm_dpo/delta": -0.033371347934007645, "fcm_dpo/margin": 234.06857299804688, "fcm_dpo/q_t": 0.36482754349708557, "grad_norm": 33.339229583740234, "learning_rate": 2.397392281198729e-09, "logits/chosen": -0.490518182516098, "logits/rejected": -0.4940160810947418, "logps/chosen": -366.3544006347656, "logps/ref_chosen": -55.68403244018555, "logps/ref_rejected": -102.4081802368164, "logps/rejected": -647.1470947265625, "loss": 0.9655, "margin_dpo/margin_mean": 234.06857299804688, "margin_dpo/margin_std": 270.1910705566406, "step": 655 }, { "epoch": 0.9691629955947136, "fcm_dpo/beta": 0.002519825007766485, "fcm_dpo/delta": -0.08081191033124924, "fcm_dpo/margin": 267.1984558105469, "fcm_dpo/q_t": 0.3513564467430115, "grad_norm": 35.18833541870117, "learning_rate": 1.592541096695571e-09, "logits/chosen": -0.4812515676021576, "logits/rejected": -0.4766325354576111, "logps/chosen": -370.0708312988281, "logps/ref_chosen": -59.19981002807617, "logps/ref_rejected": -94.19200134277344, "logps/rejected": -672.2614135742188, "loss": 0.9108, "margin_dpo/margin_mean": 267.1984558105469, "margin_dpo/margin_std": 252.93112182617188, "step": 660 }, { "epoch": 0.9765051395007343, "fcm_dpo/beta": 0.0024876741226762533, "fcm_dpo/delta": -0.004373815376311541, "fcm_dpo/margin": 242.69509887695312, "fcm_dpo/q_t": 0.36792677640914917, "grad_norm": 32.33696746826172, "learning_rate": 9.513254770636137e-10, "logits/chosen": -0.49785465002059937, "logits/rejected": -0.4872364103794098, "logps/chosen": -370.6224365234375, "logps/ref_chosen": -61.2533073425293, "logps/ref_rejected": -95.86351013183594, "logps/rejected": -647.927734375, "loss": 0.9751, "margin_dpo/margin_mean": 242.69509887695312, "margin_dpo/margin_std": 275.34063720703125, "step": 665 }, { "epoch": 0.9838472834067548, "fcm_dpo/beta": 0.0024808943271636963, "fcm_dpo/delta": -0.016260143369436264, "fcm_dpo/margin": 247.93661499023438, "fcm_dpo/q_t": 0.36324819922447205, "grad_norm": 26.02719497680664, "learning_rate": 4.741678157389739e-10, "logits/chosen": -0.48116713762283325, "logits/rejected": -0.46722927689552307, "logps/chosen": -372.7713928222656, "logps/ref_chosen": -62.95263671875, "logps/ref_rejected": -92.4662094116211, "logps/rejected": -650.2215576171875, "loss": 0.9554, "margin_dpo/margin_mean": 247.93661499023438, "margin_dpo/margin_std": 256.9874572753906, "step": 670 }, { "epoch": 0.9911894273127754, "fcm_dpo/beta": 0.002432363573461771, "fcm_dpo/delta": -0.023618485778570175, "fcm_dpo/margin": 255.810302734375, "fcm_dpo/q_t": 0.361946165561676, "grad_norm": 27.06682777404785, "learning_rate": 1.6138243485910863e-10, "logits/chosen": -0.47107481956481934, "logits/rejected": -0.46272093057632446, "logps/chosen": -361.4900817871094, "logps/ref_chosen": -48.5856819152832, "logps/ref_rejected": -81.27871704101562, "logps/rejected": -649.9933471679688, "loss": 0.947, "margin_dpo/margin_mean": 255.81027221679688, "margin_dpo/margin_std": 257.8631286621094, "step": 675 }, { "epoch": 0.9985315712187959, "fcm_dpo/beta": 0.0023933127522468567, "fcm_dpo/delta": 0.015318195335566998, "fcm_dpo/margin": 244.6057586669922, "fcm_dpo/q_t": 0.36930760741233826, "grad_norm": 29.81550407409668, "learning_rate": 1.31753782067201e-11, "logits/chosen": -0.5069360733032227, "logits/rejected": -0.4939002990722656, "logps/chosen": -374.1042175292969, "logps/ref_chosen": -60.25421905517578, "logps/ref_rejected": -87.23457336425781, "logps/rejected": -645.6903076171875, "loss": 0.9708, "margin_dpo/margin_mean": 244.6057586669922, "margin_dpo/margin_std": 257.46160888671875, "step": 680 }, { "epoch": 1.0, "step": 681, "total_flos": 0.0, "train_loss": 0.9952153347312266, "train_runtime": 1856.6581, "train_samples_per_second": 23.482, "train_steps_per_second": 0.367 } ], "logging_steps": 5, "max_steps": 681, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }