Files
llama-3-8b-base-new-dpo-har…/trainer_state.json
ModelHub XC e7a907fbc6 初始化项目,由ModelHub XC社区提供模型
Model: jackf857/llama-3-8b-base-new-dpo-harmless-s_star0.6-q_t0.4
Source: Original Platform
2026-06-12 13:30:30 +08:00

2622 lines
96 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.999244142101285,
"eval_steps": 200,
"global_step": 661,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0015117157974300832,
"fcm_dpo/beta": 0.10000000149011612,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.0013532638549804688,
"fcm_dpo/q_t": 0.5000336766242981,
"grad_norm": 28.21888542175293,
"learning_rate": 0.0,
"logits/chosen": 0.13337239623069763,
"logits/rejected": 0.12492948770523071,
"logps/chosen": -64.5841293334961,
"logps/ref_chosen": -64.61280822753906,
"logps/ref_rejected": -64.17195129394531,
"logps/rejected": -64.14192199707031,
"loss": 1.3866,
"margin_dpo/margin_mean": -0.0013527870178222656,
"margin_dpo/margin_std": 0.2561596930027008,
"step": 1
},
{
"epoch": 0.007558578987150416,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.006315112113952637,
"fcm_dpo/q_t": 0.5001578330993652,
"grad_norm": 29.724584579467773,
"learning_rate": 2.9850746268656714e-08,
"logits/chosen": 0.09723952412605286,
"logits/rejected": 0.06879077106714249,
"logps/chosen": -65.34913635253906,
"logps/ref_chosen": -65.34695434570312,
"logps/ref_rejected": -79.315673828125,
"logps/rejected": -79.31153869628906,
"loss": 1.3872,
"margin_dpo/margin_mean": -0.006315216422080994,
"margin_dpo/margin_std": 0.2993292212486267,
"step": 5
},
{
"epoch": 0.015117157974300832,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.013524067588150501,
"fcm_dpo/q_t": 0.500338077545166,
"grad_norm": 29.753292083740234,
"learning_rate": 6.71641791044776e-08,
"logits/chosen": 0.10936982929706573,
"logits/rejected": 0.07020524889230728,
"logps/chosen": -56.681053161621094,
"logps/ref_chosen": -56.65692901611328,
"logps/ref_rejected": -80.12786865234375,
"logps/rejected": -80.13846588134766,
"loss": 1.3879,
"margin_dpo/margin_mean": -0.013524264097213745,
"margin_dpo/margin_std": 0.3152056336402893,
"step": 10
},
{
"epoch": 0.022675736961451247,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.01606299914419651,
"fcm_dpo/q_t": 0.49959880113601685,
"grad_norm": 33.080528259277344,
"learning_rate": 1.044776119402985e-07,
"logits/chosen": 0.07683371752500534,
"logits/rejected": 0.04736438766121864,
"logps/chosen": -60.102806091308594,
"logps/ref_chosen": -60.09392166137695,
"logps/ref_rejected": -78.99056243896484,
"logps/rejected": -79.01551818847656,
"loss": 1.385,
"margin_dpo/margin_mean": 0.01606297492980957,
"margin_dpo/margin_std": 0.31872445344924927,
"step": 15
},
{
"epoch": 0.030234315948601664,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.045558154582977295,
"fcm_dpo/q_t": 0.498861163854599,
"grad_norm": 29.158737182617188,
"learning_rate": 1.4179104477611938e-07,
"logits/chosen": 0.0912957563996315,
"logits/rejected": 0.06395339965820312,
"logps/chosen": -55.449317932128906,
"logps/ref_chosen": -55.464561462402344,
"logps/ref_rejected": -77.40013122558594,
"logps/rejected": -77.43043518066406,
"loss": 1.382,
"margin_dpo/margin_mean": 0.04555808752775192,
"margin_dpo/margin_std": 0.29819026589393616,
"step": 20
},
{
"epoch": 0.03779289493575208,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.004711696412414312,
"fcm_dpo/q_t": 0.5001178979873657,
"grad_norm": 29.72311782836914,
"learning_rate": 1.7910447761194027e-07,
"logits/chosen": 0.10293181240558624,
"logits/rejected": 0.07127834111452103,
"logps/chosen": -60.726539611816406,
"logps/ref_chosen": -60.711814880371094,
"logps/ref_rejected": -82.71756744384766,
"logps/rejected": -82.72756958007812,
"loss": 1.387,
"margin_dpo/margin_mean": -0.004711783025413752,
"margin_dpo/margin_std": 0.29338979721069336,
"step": 25
},
{
"epoch": 0.045351473922902494,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.011212587356567383,
"fcm_dpo/q_t": 0.4997197091579437,
"grad_norm": 30.495267868041992,
"learning_rate": 2.1641791044776117e-07,
"logits/chosen": 0.11331719160079956,
"logits/rejected": 0.088210329413414,
"logps/chosen": -60.893218994140625,
"logps/ref_chosen": -60.880210876464844,
"logps/ref_rejected": -78.44148254394531,
"logps/rejected": -78.4656982421875,
"loss": 1.3854,
"margin_dpo/margin_mean": 0.011212664656341076,
"margin_dpo/margin_std": 0.283229798078537,
"step": 30
},
{
"epoch": 0.05291005291005291,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.033431313931941986,
"fcm_dpo/q_t": 0.4991644322872162,
"grad_norm": 27.916645050048828,
"learning_rate": 2.537313432835821e-07,
"logits/chosen": 0.06567516177892685,
"logits/rejected": 0.03955943509936333,
"logps/chosen": -62.278358459472656,
"logps/ref_chosen": -62.248138427734375,
"logps/ref_rejected": -79.56475830078125,
"logps/rejected": -79.62838745117188,
"loss": 1.3832,
"margin_dpo/margin_mean": 0.03343154489994049,
"margin_dpo/margin_std": 0.3196953535079956,
"step": 35
},
{
"epoch": 0.06046863189720333,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.026270773261785507,
"fcm_dpo/q_t": 0.49934354424476624,
"grad_norm": 31.407669067382812,
"learning_rate": 2.9104477611940296e-07,
"logits/chosen": 0.1118750348687172,
"logits/rejected": 0.06694410741329193,
"logps/chosen": -58.9628791809082,
"logps/ref_chosen": -58.87812423706055,
"logps/ref_rejected": -84.22982025146484,
"logps/rejected": -84.34083557128906,
"loss": 1.384,
"margin_dpo/margin_mean": 0.026270756497979164,
"margin_dpo/margin_std": 0.32897505164146423,
"step": 40
},
{
"epoch": 0.06802721088435375,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.06596313416957855,
"fcm_dpo/q_t": 0.49835172295570374,
"grad_norm": 31.758451461791992,
"learning_rate": 3.2835820895522385e-07,
"logits/chosen": 0.07056122273206711,
"logits/rejected": 0.043311070650815964,
"logps/chosen": -66.0146713256836,
"logps/ref_chosen": -65.88298034667969,
"logps/ref_rejected": -83.87881469726562,
"logps/rejected": -84.07647705078125,
"loss": 1.3801,
"margin_dpo/margin_mean": 0.0659632533788681,
"margin_dpo/margin_std": 0.4093398153781891,
"step": 45
},
{
"epoch": 0.07558578987150416,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.11442438513040543,
"fcm_dpo/q_t": 0.4971412718296051,
"grad_norm": 27.961788177490234,
"learning_rate": 3.6567164179104475e-07,
"logits/chosen": 0.0864916443824768,
"logits/rejected": 0.052734147757291794,
"logps/chosen": -55.3635139465332,
"logps/ref_chosen": -55.172386169433594,
"logps/ref_rejected": -69.63300323486328,
"logps/rejected": -69.93855285644531,
"loss": 1.3753,
"margin_dpo/margin_mean": 0.11442458629608154,
"margin_dpo/margin_std": 0.4219285845756531,
"step": 50
},
{
"epoch": 0.08314436885865457,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.20081201195716858,
"fcm_dpo/q_t": 0.4949868321418762,
"grad_norm": 31.33340835571289,
"learning_rate": 4.0298507462686564e-07,
"logits/chosen": 0.07780580222606659,
"logits/rejected": 0.04163379222154617,
"logps/chosen": -57.531211853027344,
"logps/ref_chosen": -57.193580627441406,
"logps/ref_rejected": -79.69940948486328,
"logps/rejected": -80.23786926269531,
"loss": 1.3672,
"margin_dpo/margin_mean": 0.20081210136413574,
"margin_dpo/margin_std": 0.5645917057991028,
"step": 55
},
{
"epoch": 0.09070294784580499,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.21879427134990692,
"fcm_dpo/q_t": 0.4945460259914398,
"grad_norm": 29.641948699951172,
"learning_rate": 4.4029850746268654e-07,
"logits/chosen": 0.1170019656419754,
"logits/rejected": 0.0827331617474556,
"logps/chosen": -60.59779739379883,
"logps/ref_chosen": -60.068870544433594,
"logps/ref_rejected": -74.41178894042969,
"logps/rejected": -75.15950012207031,
"loss": 1.3662,
"margin_dpo/margin_mean": 0.2187943458557129,
"margin_dpo/margin_std": 0.7729828357696533,
"step": 60
},
{
"epoch": 0.0982615268329554,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.30554288625717163,
"fcm_dpo/q_t": 0.492379754781723,
"grad_norm": 30.804401397705078,
"learning_rate": 4.776119402985074e-07,
"logits/chosen": 0.14683708548545837,
"logits/rejected": 0.11651895940303802,
"logps/chosen": -58.885589599609375,
"logps/ref_chosen": -58.1558952331543,
"logps/ref_rejected": -76.06512451171875,
"logps/rejected": -77.1003646850586,
"loss": 1.3584,
"margin_dpo/margin_mean": 0.30554264783859253,
"margin_dpo/margin_std": 0.9574313163757324,
"step": 65
},
{
"epoch": 0.10582010582010581,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.4061034321784973,
"fcm_dpo/q_t": 0.4898872971534729,
"grad_norm": 28.625547409057617,
"learning_rate": 4.999860140229787e-07,
"logits/chosen": 0.10340998321771622,
"logits/rejected": 0.0681125819683075,
"logps/chosen": -68.41093444824219,
"logps/ref_chosen": -67.35506439208984,
"logps/ref_rejected": -82.24962615966797,
"logps/rejected": -83.71160125732422,
"loss": 1.3494,
"margin_dpo/margin_mean": 0.40610337257385254,
"margin_dpo/margin_std": 1.122269868850708,
"step": 70
},
{
"epoch": 0.11337868480725624,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.519697368144989,
"fcm_dpo/q_t": 0.48709648847579956,
"grad_norm": 26.1798095703125,
"learning_rate": 4.998286897523808e-07,
"logits/chosen": 0.11257852613925934,
"logits/rejected": 0.07697894424200058,
"logps/chosen": -58.29564666748047,
"logps/ref_chosen": -56.86763381958008,
"logps/ref_rejected": -72.56938934326172,
"logps/rejected": -74.51709747314453,
"loss": 1.3404,
"margin_dpo/margin_mean": 0.5196975469589233,
"margin_dpo/margin_std": 1.4106642007827759,
"step": 75
},
{
"epoch": 0.12093726379440665,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.6465596556663513,
"fcm_dpo/q_t": 0.48402470350265503,
"grad_norm": 28.485530853271484,
"learning_rate": 4.994966691179711e-07,
"logits/chosen": 0.14081288874149323,
"logits/rejected": 0.09565093368291855,
"logps/chosen": -59.448753356933594,
"logps/ref_chosen": -57.687095642089844,
"logps/ref_rejected": -78.06813049316406,
"logps/rejected": -80.4763412475586,
"loss": 1.3302,
"margin_dpo/margin_mean": 0.6465598344802856,
"margin_dpo/margin_std": 1.6814677715301514,
"step": 80
},
{
"epoch": 0.12849584278155707,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.0534989833831787,
"fcm_dpo/q_t": 0.4739624559879303,
"grad_norm": 26.71432876586914,
"learning_rate": 4.989901842900325e-07,
"logits/chosen": 0.16136503219604492,
"logits/rejected": 0.11435899883508682,
"logps/chosen": -59.10918426513672,
"logps/ref_chosen": -56.96040725708008,
"logps/ref_rejected": -75.22166442871094,
"logps/rejected": -78.42394256591797,
"loss": 1.2921,
"margin_dpo/margin_mean": 1.0534991025924683,
"margin_dpo/margin_std": 1.7913897037506104,
"step": 85
},
{
"epoch": 0.1360544217687075,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.108295202255249,
"fcm_dpo/q_t": 0.47283005714416504,
"grad_norm": 29.2835693359375,
"learning_rate": 4.983095894354857e-07,
"logits/chosen": 0.21447758376598358,
"logits/rejected": 0.1654311865568161,
"logps/chosen": -60.33354949951172,
"logps/ref_chosen": -57.41730499267578,
"logps/ref_rejected": -80.87986755371094,
"logps/rejected": -84.9044189453125,
"loss": 1.2951,
"margin_dpo/margin_mean": 1.1082954406738281,
"margin_dpo/margin_std": 2.5071663856506348,
"step": 90
},
{
"epoch": 0.1436130007558579,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.4385011196136475,
"fcm_dpo/q_t": 0.4653662145137787,
"grad_norm": 29.20956039428711,
"learning_rate": 4.974553604702332e-07,
"logits/chosen": 0.2017609179019928,
"logits/rejected": 0.15785647928714752,
"logps/chosen": -57.5848274230957,
"logps/ref_chosen": -54.08087158203125,
"logps/ref_rejected": -76.15860748291016,
"logps/rejected": -81.1010513305664,
"loss": 1.2755,
"margin_dpo/margin_mean": 1.4385008811950684,
"margin_dpo/margin_std": 3.3299403190612793,
"step": 95
},
{
"epoch": 0.15117157974300832,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.2202621698379517,
"fcm_dpo/q_t": 0.47157055139541626,
"grad_norm": 33.23603057861328,
"learning_rate": 4.964280947263676e-07,
"logits/chosen": 0.21698299050331116,
"logits/rejected": 0.184483140707016,
"logps/chosen": -68.64390563964844,
"logps/ref_chosen": -63.875038146972656,
"logps/ref_rejected": -82.077880859375,
"logps/rejected": -88.06700134277344,
"loss": 1.3233,
"margin_dpo/margin_mean": 1.220262050628662,
"margin_dpo/margin_std": 4.634930610656738,
"step": 100
},
{
"epoch": 0.15873015873015872,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.7332004308700562,
"fcm_dpo/q_t": 0.46054258942604065,
"grad_norm": 30.745962142944336,
"learning_rate": 4.952285105344791e-07,
"logits/chosen": 0.2272680103778839,
"logits/rejected": 0.17643623054027557,
"logps/chosen": -67.46278381347656,
"logps/ref_chosen": -62.572479248046875,
"logps/ref_rejected": -80.93415069580078,
"logps/rejected": -87.5576400756836,
"loss": 1.2811,
"margin_dpo/margin_mean": 1.7332004308700562,
"margin_dpo/margin_std": 4.913107872009277,
"step": 105
},
{
"epoch": 0.16628873771730915,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.5242812633514404,
"fcm_dpo/q_t": 0.46426910161972046,
"grad_norm": 44.22966003417969,
"learning_rate": 4.938574467213517e-07,
"logits/chosen": 0.19085553288459778,
"logits/rejected": 0.16806969046592712,
"logps/chosen": -74.31179809570312,
"logps/ref_chosen": -68.67534637451172,
"logps/ref_rejected": -78.82028198242188,
"logps/rejected": -85.98100280761719,
"loss": 1.3145,
"margin_dpo/margin_mean": 1.5242810249328613,
"margin_dpo/margin_std": 5.536143779754639,
"step": 110
},
{
"epoch": 0.17384731670445955,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 2.013551712036133,
"fcm_dpo/q_t": 0.452955424785614,
"grad_norm": 28.415409088134766,
"learning_rate": 4.923158620234019e-07,
"logits/chosen": 0.23339705169200897,
"logits/rejected": 0.18038101494312286,
"logps/chosen": -64.06039428710938,
"logps/ref_chosen": -58.65370559692383,
"logps/ref_rejected": -81.89688873291016,
"logps/rejected": -89.31713104248047,
"loss": 1.2498,
"margin_dpo/margin_mean": 2.013551712036133,
"margin_dpo/margin_std": 4.735473155975342,
"step": 115
},
{
"epoch": 0.18140589569160998,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 2.4133291244506836,
"fcm_dpo/q_t": 0.44279351830482483,
"grad_norm": 28.7838191986084,
"learning_rate": 4.906048344162676e-07,
"logits/chosen": 0.24171380698680878,
"logits/rejected": 0.19227424263954163,
"logps/chosen": -61.56781768798828,
"logps/ref_chosen": -56.16423797607422,
"logps/ref_rejected": -75.87689971923828,
"logps/rejected": -83.69380187988281,
"loss": 1.2069,
"margin_dpo/margin_mean": 2.4133291244506836,
"margin_dpo/margin_std": 4.3148322105407715,
"step": 120
},
{
"epoch": 0.1889644746787604,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 3.0360968112945557,
"fcm_dpo/q_t": 0.4307987093925476,
"grad_norm": 29.158105850219727,
"learning_rate": 4.887255603610184e-07,
"logits/chosen": 0.2693176865577698,
"logits/rejected": 0.21195952594280243,
"logps/chosen": -66.05271911621094,
"logps/ref_chosen": -59.744285583496094,
"logps/ref_rejected": -86.77314758300781,
"logps/rejected": -96.11767578125,
"loss": 1.1645,
"margin_dpo/margin_mean": 3.0360968112945557,
"margin_dpo/margin_std": 5.009610652923584,
"step": 125
},
{
"epoch": 0.1965230536659108,
"fcm_dpo/beta": 0.10000000894069672,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 2.8338823318481445,
"fcm_dpo/q_t": 0.43448346853256226,
"grad_norm": 30.431678771972656,
"learning_rate": 4.866793539675126e-07,
"logits/chosen": 0.20911665260791779,
"logits/rejected": 0.17860978841781616,
"logps/chosen": -71.53947448730469,
"logps/ref_chosen": -64.15296936035156,
"logps/ref_rejected": -75.17271423339844,
"logps/rejected": -85.3930892944336,
"loss": 1.1921,
"margin_dpo/margin_mean": 2.8338828086853027,
"margin_dpo/margin_std": 5.266488075256348,
"step": 130
},
{
"epoch": 0.20408163265306123,
"fcm_dpo/beta": 0.10191638767719269,
"fcm_dpo/delta": 0.031610384583473206,
"fcm_dpo/margin": 3.762406826019287,
"fcm_dpo/q_t": 0.4133889079093933,
"grad_norm": 26.239471435546875,
"learning_rate": 4.844676460754862e-07,
"logits/chosen": 0.25873705744743347,
"logits/rejected": 0.21873533725738525,
"logps/chosen": -65.23578643798828,
"logps/ref_chosen": -57.006690979003906,
"logps/ref_rejected": -73.71768188476562,
"logps/rejected": -85.70919036865234,
"loss": 1.1276,
"margin_dpo/margin_mean": 3.76240611076355,
"margin_dpo/margin_std": 5.971688270568848,
"step": 135
},
{
"epoch": 0.21164021164021163,
"fcm_dpo/beta": 0.10494687408208847,
"fcm_dpo/delta": 0.025396008044481277,
"fcm_dpo/margin": 4.405590534210205,
"fcm_dpo/q_t": 0.3999941945075989,
"grad_norm": 33.757999420166016,
"learning_rate": 4.820919832540181e-07,
"logits/chosen": 0.24625614285469055,
"logits/rejected": 0.2025199830532074,
"logps/chosen": -74.15595245361328,
"logps/ref_chosen": -63.36246871948242,
"logps/ref_rejected": -79.62621307373047,
"logps/rejected": -94.8252944946289,
"loss": 1.1183,
"margin_dpo/margin_mean": 4.405590534210205,
"margin_dpo/margin_std": 7.449770927429199,
"step": 140
},
{
"epoch": 0.21919879062736206,
"fcm_dpo/beta": 0.10641102492809296,
"fcm_dpo/delta": 0.022974295541644096,
"fcm_dpo/margin": 4.693943977355957,
"fcm_dpo/q_t": 0.395569384098053,
"grad_norm": 38.00341033935547,
"learning_rate": 4.795540267200686e-07,
"logits/chosen": 0.2727429270744324,
"logits/rejected": 0.2512747049331665,
"logps/chosen": -77.23705291748047,
"logps/ref_chosen": -65.01470184326172,
"logps/ref_rejected": -80.49073791503906,
"logps/rejected": -97.40702819824219,
"loss": 1.1399,
"margin_dpo/margin_mean": 4.693943977355957,
"margin_dpo/margin_std": 8.747485160827637,
"step": 145
},
{
"epoch": 0.22675736961451248,
"fcm_dpo/beta": 0.10816685855388641,
"fcm_dpo/delta": -0.04916912317276001,
"fcm_dpo/margin": 5.7198591232299805,
"fcm_dpo/q_t": 0.3732493221759796,
"grad_norm": 28.534120559692383,
"learning_rate": 4.768555511768486e-07,
"logits/chosen": 0.28103944659233093,
"logits/rejected": 0.2354610413312912,
"logps/chosen": -71.84245300292969,
"logps/ref_chosen": -59.19135284423828,
"logps/ref_rejected": -74.0339126586914,
"logps/rejected": -92.40486145019531,
"loss": 1.0549,
"margin_dpo/margin_mean": 5.7198591232299805,
"margin_dpo/margin_std": 8.658864974975586,
"step": 150
},
{
"epoch": 0.23431594860166288,
"fcm_dpo/beta": 0.09626957774162292,
"fcm_dpo/delta": -0.08038349449634552,
"fcm_dpo/margin": 6.609867095947266,
"fcm_dpo/q_t": 0.36982256174087524,
"grad_norm": 29.370683670043945,
"learning_rate": 4.7399844357283393e-07,
"logits/chosen": 0.29301148653030396,
"logits/rejected": 0.25923386216163635,
"logps/chosen": -73.53243255615234,
"logps/ref_chosen": -60.93949508666992,
"logps/ref_rejected": -74.51151275634766,
"logps/rejected": -93.71430969238281,
"loss": 1.0523,
"margin_dpo/margin_mean": 6.609866142272949,
"margin_dpo/margin_std": 10.054447174072266,
"step": 155
},
{
"epoch": 0.2418745275888133,
"fcm_dpo/beta": 0.08975216001272202,
"fcm_dpo/delta": -0.07737629860639572,
"fcm_dpo/margin": 7.146323204040527,
"fcm_dpo/q_t": 0.36957624554634094,
"grad_norm": 26.117460250854492,
"learning_rate": 4.7098470178228755e-07,
"logits/chosen": 0.2661024034023285,
"logits/rejected": 0.22873857617378235,
"logps/chosen": -72.47210693359375,
"logps/ref_chosen": -58.763816833496094,
"logps/ref_rejected": -74.94743347167969,
"logps/rejected": -95.80204010009766,
"loss": 1.0421,
"margin_dpo/margin_mean": 7.146323204040527,
"margin_dpo/margin_std": 10.654400825500488,
"step": 160
},
{
"epoch": 0.2494331065759637,
"fcm_dpo/beta": 0.08210185170173645,
"fcm_dpo/delta": -0.13484172523021698,
"fcm_dpo/margin": 8.718481063842773,
"fcm_dpo/q_t": 0.3526487350463867,
"grad_norm": 23.741703033447266,
"learning_rate": 4.678164332082175e-07,
"logits/chosen": 0.3242879807949066,
"logits/rejected": 0.2718299329280853,
"logps/chosen": -70.94097900390625,
"logps/ref_chosen": -55.70417022705078,
"logps/ref_rejected": -76.59439849853516,
"logps/rejected": -100.54969024658203,
"loss": 0.9977,
"margin_dpo/margin_mean": 8.718481063842773,
"margin_dpo/margin_std": 11.619240760803223,
"step": 165
},
{
"epoch": 0.25699168556311414,
"fcm_dpo/beta": 0.07538954168558121,
"fcm_dpo/delta": -0.042643819004297256,
"fcm_dpo/margin": 8.284780502319336,
"fcm_dpo/q_t": 0.3727918565273285,
"grad_norm": 21.66844367980957,
"learning_rate": 4.6449585330874425e-07,
"logits/chosen": 0.2913161516189575,
"logits/rejected": 0.2505802512168884,
"logps/chosen": -75.16352081298828,
"logps/ref_chosen": -61.169105529785156,
"logps/ref_rejected": -77.21674346923828,
"logps/rejected": -99.49595642089844,
"loss": 1.0435,
"margin_dpo/margin_mean": 8.284780502319336,
"margin_dpo/margin_std": 12.27697467803955,
"step": 170
},
{
"epoch": 0.26455026455026454,
"fcm_dpo/beta": 0.06802255660295486,
"fcm_dpo/delta": -0.13938404619693756,
"fcm_dpo/margin": 9.97547721862793,
"fcm_dpo/q_t": 0.3644518256187439,
"grad_norm": 21.26988983154297,
"learning_rate": 4.6102528404790965e-07,
"logits/chosen": 0.34859341382980347,
"logits/rejected": 0.2882172167301178,
"logps/chosen": -73.75447082519531,
"logps/ref_chosen": -59.24176788330078,
"logps/ref_rejected": -81.80384826660156,
"logps/rejected": -106.2920150756836,
"loss": 1.0468,
"margin_dpo/margin_mean": 9.97547721862793,
"margin_dpo/margin_std": 15.061019897460938,
"step": 175
},
{
"epoch": 0.272108843537415,
"fcm_dpo/beta": 0.060754068195819855,
"fcm_dpo/delta": -0.07431790977716446,
"fcm_dpo/margin": 9.864774703979492,
"fcm_dpo/q_t": 0.37669187784194946,
"grad_norm": 20.148046493530273,
"learning_rate": 4.5740715227200897e-07,
"logits/chosen": 0.3143027424812317,
"logits/rejected": 0.27188676595687866,
"logps/chosen": -77.92072296142578,
"logps/ref_chosen": -63.24883270263672,
"logps/ref_rejected": -79.00736236572266,
"logps/rejected": -103.54402923583984,
"loss": 1.0681,
"margin_dpo/margin_mean": 9.864773750305176,
"margin_dpo/margin_std": 15.329916000366211,
"step": 180
},
{
"epoch": 0.2796674225245654,
"fcm_dpo/beta": 0.05635453015565872,
"fcm_dpo/delta": -0.08090370148420334,
"fcm_dpo/margin": 10.306377410888672,
"fcm_dpo/q_t": 0.37912043929100037,
"grad_norm": 18.77512550354004,
"learning_rate": 4.5364398801258394e-07,
"logits/chosen": 0.36532798409461975,
"logits/rejected": 0.31204718351364136,
"logps/chosen": -70.10028076171875,
"logps/ref_chosen": -56.390625,
"logps/ref_rejected": -76.81001281738281,
"logps/rejected": -100.82603454589844,
"loss": 1.0654,
"margin_dpo/margin_mean": 10.306377410888672,
"margin_dpo/margin_std": 15.676936149597168,
"step": 185
},
{
"epoch": 0.2872260015117158,
"fcm_dpo/beta": 0.053449880331754684,
"fcm_dpo/delta": -0.07286106050014496,
"fcm_dpo/margin": 12.053500175476074,
"fcm_dpo/q_t": 0.36934491991996765,
"grad_norm": 25.36161231994629,
"learning_rate": 4.4973842271726024e-07,
"logits/chosen": 0.35614460706710815,
"logits/rejected": 0.30015695095062256,
"logps/chosen": -81.80848693847656,
"logps/ref_chosen": -68.25389099121094,
"logps/ref_rejected": -86.461181640625,
"logps/rejected": -112.06925964355469,
"loss": 1.0366,
"margin_dpo/margin_mean": 12.05350112915039,
"margin_dpo/margin_std": 17.739303588867188,
"step": 190
},
{
"epoch": 0.2947845804988662,
"fcm_dpo/beta": 0.05007879063487053,
"fcm_dpo/delta": -0.04965587332844734,
"fcm_dpo/margin": 11.500941276550293,
"fcm_dpo/q_t": 0.3801359236240387,
"grad_norm": 21.779829025268555,
"learning_rate": 4.4569318740967043e-07,
"logits/chosen": 0.38067522644996643,
"logits/rejected": 0.35913893580436707,
"logps/chosen": -79.82146453857422,
"logps/ref_chosen": -62.1484260559082,
"logps/ref_rejected": -71.33458709716797,
"logps/rejected": -100.50856018066406,
"loss": 1.0743,
"margin_dpo/margin_mean": 11.50094223022461,
"margin_dpo/margin_std": 17.975563049316406,
"step": 195
},
{
"epoch": 0.30234315948601664,
"fcm_dpo/beta": 0.04691356047987938,
"fcm_dpo/delta": -0.08742973953485489,
"fcm_dpo/margin": 13.574827194213867,
"fcm_dpo/q_t": 0.36781007051467896,
"grad_norm": 18.177783966064453,
"learning_rate": 4.415111107797445e-07,
"logits/chosen": 0.4625890851020813,
"logits/rejected": 0.39986932277679443,
"logps/chosen": -75.27424621582031,
"logps/ref_chosen": -56.950096130371094,
"logps/ref_rejected": -78.66989135742188,
"logps/rejected": -110.5688705444336,
"loss": 1.0505,
"margin_dpo/margin_mean": 13.574827194213867,
"margin_dpo/margin_std": 20.146961212158203,
"step": 200
},
{
"epoch": 0.30234315948601664,
"eval_fcm_dpo/beta": 0.0445309579372406,
"eval_logits/chosen": 0.43528974056243896,
"eval_logits/rejected": 0.3850432336330414,
"eval_logps/chosen": -94.9354248046875,
"eval_logps/ref_chosen": -74.85946655273438,
"eval_logps/ref_rejected": -79.54898834228516,
"eval_logps/rejected": -112.46160888671875,
"eval_loss": 0.5490387678146362,
"eval_margin_dpo/margin_mean": 12.836673736572266,
"eval_margin_dpo/margin_std": 21.53274154663086,
"eval_runtime": 39.0577,
"eval_samples_per_second": 58.964,
"eval_steps_per_second": 1.843,
"step": 200
},
{
"epoch": 0.30990173847316704,
"fcm_dpo/beta": 0.043045125901699066,
"fcm_dpo/delta": -0.05937931686639786,
"fcm_dpo/margin": 14.329627990722656,
"fcm_dpo/q_t": 0.3726270794868469,
"grad_norm": 24.482276916503906,
"learning_rate": 4.3719511720570814e-07,
"logits/chosen": 0.4526013731956482,
"logits/rejected": 0.3818688988685608,
"logps/chosen": -80.59419250488281,
"logps/ref_chosen": -57.99428176879883,
"logps/ref_rejected": -83.5367431640625,
"logps/rejected": -120.4662857055664,
"loss": 1.0519,
"margin_dpo/margin_mean": 14.329629898071289,
"margin_dpo/margin_std": 21.601360321044922,
"step": 205
},
{
"epoch": 0.31746031746031744,
"fcm_dpo/beta": 0.04131709039211273,
"fcm_dpo/delta": -0.05793388932943344,
"fcm_dpo/margin": 13.949444770812988,
"fcm_dpo/q_t": 0.3844899535179138,
"grad_norm": 19.35005760192871,
"learning_rate": 4.327482247091679e-07,
"logits/chosen": 0.4452959895133972,
"logits/rejected": 0.39058345556259155,
"logps/chosen": -90.13018798828125,
"logps/ref_chosen": -63.77195358276367,
"logps/ref_rejected": -82.56491088867188,
"logps/rejected": -122.87260437011719,
"loss": 1.1108,
"margin_dpo/margin_mean": 13.949444770812988,
"margin_dpo/margin_std": 24.073421478271484,
"step": 210
},
{
"epoch": 0.3250188964474679,
"fcm_dpo/beta": 0.03761471435427666,
"fcm_dpo/delta": -0.12060566991567612,
"fcm_dpo/margin": 18.96577262878418,
"fcm_dpo/q_t": 0.35724228620529175,
"grad_norm": 18.37898826599121,
"learning_rate": 4.281735428447157e-07,
"logits/chosen": 0.4427056908607483,
"logits/rejected": 0.37563619017601013,
"logps/chosen": -84.36368560791016,
"logps/ref_chosen": -60.27800750732422,
"logps/ref_rejected": -83.91607666015625,
"logps/rejected": -126.96754455566406,
"loss": 1.002,
"margin_dpo/margin_mean": 18.965770721435547,
"margin_dpo/margin_std": 26.138744354248047,
"step": 215
},
{
"epoch": 0.3325774754346183,
"fcm_dpo/beta": 0.033143509179353714,
"fcm_dpo/delta": -0.07818768918514252,
"fcm_dpo/margin": 18.13027000427246,
"fcm_dpo/q_t": 0.37564554810523987,
"grad_norm": 17.499849319458008,
"learning_rate": 4.234742705255272e-07,
"logits/chosen": 0.4456612169742584,
"logits/rejected": 0.38407641649246216,
"logps/chosen": -88.01606750488281,
"logps/ref_chosen": -60.88572311401367,
"logps/ref_rejected": -80.1805191040039,
"logps/rejected": -125.44112396240234,
"loss": 1.0594,
"margin_dpo/margin_mean": 18.13027000427246,
"margin_dpo/margin_std": 27.581802368164062,
"step": 220
},
{
"epoch": 0.3401360544217687,
"fcm_dpo/beta": 0.030816316604614258,
"fcm_dpo/delta": -0.08487220108509064,
"fcm_dpo/margin": 20.55803871154785,
"fcm_dpo/q_t": 0.37105461955070496,
"grad_norm": 17.931140899658203,
"learning_rate": 4.186536937864752e-07,
"logits/chosen": 0.511905312538147,
"logits/rejected": 0.43826180696487427,
"logps/chosen": -89.73455047607422,
"logps/ref_chosen": -61.02507781982422,
"logps/ref_rejected": -91.92439270019531,
"logps/rejected": -141.19189453125,
"loss": 1.0315,
"margin_dpo/margin_mean": 20.55803680419922,
"margin_dpo/margin_std": 29.531564712524414,
"step": 225
},
{
"epoch": 0.3476946334089191,
"fcm_dpo/beta": 0.030043313279747963,
"fcm_dpo/delta": -0.015888774767518044,
"fcm_dpo/margin": 19.927255630493164,
"fcm_dpo/q_t": 0.3782123029232025,
"grad_norm": 17.704288482666016,
"learning_rate": 4.137151834863213e-07,
"logits/chosen": 0.567510724067688,
"logits/rejected": 0.5179559588432312,
"logps/chosen": -85.0999984741211,
"logps/ref_chosen": -54.49797821044922,
"logps/ref_rejected": -71.96363830566406,
"logps/rejected": -122.492919921875,
"loss": 1.0889,
"margin_dpo/margin_mean": 19.927255630493164,
"margin_dpo/margin_std": 33.03584671020508,
"step": 230
},
{
"epoch": 0.35525321239606955,
"fcm_dpo/beta": 0.02700546756386757,
"fcm_dpo/delta": -0.13411830365657806,
"fcm_dpo/margin": 23.45638656616211,
"fcm_dpo/q_t": 0.36994558572769165,
"grad_norm": 16.464786529541016,
"learning_rate": 4.08662192950594e-07,
"logits/chosen": 0.5007590055465698,
"logits/rejected": 0.4768039286136627,
"logps/chosen": -99.23008728027344,
"logps/ref_chosen": -63.250282287597656,
"logps/ref_rejected": -73.09049987792969,
"logps/rejected": -132.5266876220703,
"loss": 1.0474,
"margin_dpo/margin_mean": 23.456384658813477,
"margin_dpo/margin_std": 33.97542190551758,
"step": 235
},
{
"epoch": 0.36281179138321995,
"fcm_dpo/beta": 0.024938663467764854,
"fcm_dpo/delta": -0.08994299918413162,
"fcm_dpo/margin": 27.00982666015625,
"fcm_dpo/q_t": 0.3612818121910095,
"grad_norm": 14.642565727233887,
"learning_rate": 4.0349825555680045e-07,
"logits/chosen": 0.561758279800415,
"logits/rejected": 0.5071486234664917,
"logps/chosen": -108.31246185302734,
"logps/ref_chosen": -65.26150512695312,
"logps/ref_rejected": -87.60311126708984,
"logps/rejected": -157.66390991210938,
"loss": 1.0195,
"margin_dpo/margin_mean": 27.00982666015625,
"margin_dpo/margin_std": 37.65082550048828,
"step": 240
},
{
"epoch": 0.37037037037037035,
"fcm_dpo/beta": 0.02278234250843525,
"fcm_dpo/delta": -0.10555760562419891,
"fcm_dpo/margin": 30.133275985717773,
"fcm_dpo/q_t": 0.3576427102088928,
"grad_norm": 15.980971336364746,
"learning_rate": 3.982269822636601e-07,
"logits/chosen": 0.5394655466079712,
"logits/rejected": 0.5064207911491394,
"logps/chosen": -108.2628173828125,
"logps/ref_chosen": -65.73170471191406,
"logps/ref_rejected": -75.19642639160156,
"logps/rejected": -147.86080932617188,
"loss": 0.9861,
"margin_dpo/margin_mean": 30.133275985717773,
"margin_dpo/margin_std": 38.397438049316406,
"step": 245
},
{
"epoch": 0.3779289493575208,
"fcm_dpo/beta": 0.0209406279027462,
"fcm_dpo/delta": -0.029252177104353905,
"fcm_dpo/margin": 25.17742347717285,
"fcm_dpo/q_t": 0.39082691073417664,
"grad_norm": 18.229862213134766,
"learning_rate": 3.9285205908608934e-07,
"logits/chosen": 0.599532961845398,
"logits/rejected": 0.5946930050849915,
"logps/chosen": -126.43806457519531,
"logps/ref_chosen": -70.71224212646484,
"logps/ref_rejected": -76.12723541259766,
"logps/rejected": -157.03048706054688,
"loss": 1.1321,
"margin_dpo/margin_mean": 25.17742347717285,
"margin_dpo/margin_std": 44.761314392089844,
"step": 250
},
{
"epoch": 0.3854875283446712,
"fcm_dpo/beta": 0.02031904086470604,
"fcm_dpo/delta": -0.02309424616396427,
"fcm_dpo/margin": 24.082748413085938,
"fcm_dpo/q_t": 0.3976757824420929,
"grad_norm": 18.887149810791016,
"learning_rate": 3.873772445177015e-07,
"logits/chosen": 0.609887957572937,
"logits/rejected": 0.5647954940795898,
"logps/chosen": -117.80338287353516,
"logps/ref_chosen": -61.767662048339844,
"logps/ref_rejected": -77.38813018798828,
"logps/rejected": -157.50662231445312,
"loss": 1.1377,
"margin_dpo/margin_mean": 24.082748413085938,
"margin_dpo/margin_std": 43.56954574584961,
"step": 255
},
{
"epoch": 0.3930461073318216,
"fcm_dpo/beta": 0.019356101751327515,
"fcm_dpo/delta": -0.06188065558671951,
"fcm_dpo/margin": 33.2767448425293,
"fcm_dpo/q_t": 0.3668089807033539,
"grad_norm": 19.52748680114746,
"learning_rate": 3.818063669026256e-07,
"logits/chosen": 0.585278332233429,
"logits/rejected": 0.5075653195381165,
"logps/chosen": -119.7823486328125,
"logps/ref_chosen": -61.57584762573242,
"logps/ref_rejected": -91.87513732910156,
"logps/rejected": -183.35836791992188,
"loss": 1.0274,
"margin_dpo/margin_mean": 33.2767448425293,
"margin_dpo/margin_std": 47.10149002075195,
"step": 260
},
{
"epoch": 0.40060468631897206,
"fcm_dpo/beta": 0.019167505204677582,
"fcm_dpo/delta": 0.02147207036614418,
"fcm_dpo/margin": 28.888378143310547,
"fcm_dpo/q_t": 0.38470879197120667,
"grad_norm": 16.250465393066406,
"learning_rate": 3.7614332175848027e-07,
"logits/chosen": 0.5632354021072388,
"logits/rejected": 0.5125952959060669,
"logps/chosen": -119.10379791259766,
"logps/ref_chosen": -65.75422668457031,
"logps/ref_rejected": -77.9569320678711,
"logps/rejected": -160.19488525390625,
"loss": 1.082,
"margin_dpo/margin_mean": 28.888378143310547,
"margin_dpo/margin_std": 46.08098602294922,
"step": 265
},
{
"epoch": 0.40816326530612246,
"fcm_dpo/beta": 0.018922636285424232,
"fcm_dpo/delta": -0.03335314989089966,
"fcm_dpo/margin": 29.144763946533203,
"fcm_dpo/q_t": 0.3839413523674011,
"grad_norm": 16.50005531311035,
"learning_rate": 3.7039206905237656e-07,
"logits/chosen": 0.6007962226867676,
"logits/rejected": 0.5515360236167908,
"logps/chosen": -110.72042083740234,
"logps/ref_chosen": -62.27649688720703,
"logps/ref_rejected": -76.56950378417969,
"logps/rejected": -154.158203125,
"loss": 1.0747,
"margin_dpo/margin_mean": 29.144763946533203,
"margin_dpo/margin_std": 44.99258804321289,
"step": 270
},
{
"epoch": 0.41572184429327286,
"fcm_dpo/beta": 0.018402384594082832,
"fcm_dpo/delta": -0.021868888288736343,
"fcm_dpo/margin": 32.19723129272461,
"fcm_dpo/q_t": 0.37771254777908325,
"grad_norm": 16.371837615966797,
"learning_rate": 3.645566304318526e-07,
"logits/chosen": 0.5913391709327698,
"logits/rejected": 0.5466698408126831,
"logps/chosen": -113.94535827636719,
"logps/ref_chosen": -61.854393005371094,
"logps/ref_rejected": -77.22246551513672,
"logps/rejected": -161.51065063476562,
"loss": 1.0643,
"margin_dpo/margin_mean": 32.197235107421875,
"margin_dpo/margin_std": 49.30824279785156,
"step": 275
},
{
"epoch": 0.42328042328042326,
"fcm_dpo/beta": 0.01777859404683113,
"fcm_dpo/delta": -0.08700723201036453,
"fcm_dpo/margin": 34.64583206176758,
"fcm_dpo/q_t": 0.3692876696586609,
"grad_norm": 15.50888729095459,
"learning_rate": 3.586410864126781e-07,
"logits/chosen": 0.593945324420929,
"logits/rejected": 0.5639765858650208,
"logps/chosen": -114.51588439941406,
"logps/ref_chosen": -61.29896926879883,
"logps/ref_rejected": -73.35762023925781,
"logps/rejected": -161.22035217285156,
"loss": 1.0139,
"margin_dpo/margin_mean": 34.64583206176758,
"margin_dpo/margin_std": 45.06488800048828,
"step": 280
},
{
"epoch": 0.4308390022675737,
"fcm_dpo/beta": 0.016293346881866455,
"fcm_dpo/delta": -0.030563678592443466,
"fcm_dpo/margin": 35.089866638183594,
"fcm_dpo/q_t": 0.37944841384887695,
"grad_norm": 18.91819190979004,
"learning_rate": 3.5264957352549375e-07,
"logits/chosen": 0.6056411862373352,
"logits/rejected": 0.5613064765930176,
"logps/chosen": -125.51373291015625,
"logps/ref_chosen": -63.435462951660156,
"logps/ref_rejected": -79.73661804199219,
"logps/rejected": -176.90478515625,
"loss": 1.0488,
"margin_dpo/margin_mean": 35.089866638183594,
"margin_dpo/margin_std": 50.59145736694336,
"step": 285
},
{
"epoch": 0.4383975812547241,
"fcm_dpo/beta": 0.015350925736129284,
"fcm_dpo/delta": -0.06803837418556213,
"fcm_dpo/margin": 40.154258728027344,
"fcm_dpo/q_t": 0.36974334716796875,
"grad_norm": 18.484561920166016,
"learning_rate": 3.465862814232821e-07,
"logits/chosen": 0.6393694877624512,
"logits/rejected": 0.5767431855201721,
"logps/chosen": -128.746337890625,
"logps/ref_chosen": -57.696876525878906,
"logps/ref_rejected": -79.78132629394531,
"logps/rejected": -190.9850616455078,
"loss": 1.0121,
"margin_dpo/margin_mean": 40.154258728027344,
"margin_dpo/margin_std": 53.58546829223633,
"step": 290
},
{
"epoch": 0.4459561602418745,
"fcm_dpo/beta": 0.014825056307017803,
"fcm_dpo/delta": -0.02245759218931198,
"fcm_dpo/margin": 38.19078063964844,
"fcm_dpo/q_t": 0.3817628026008606,
"grad_norm": 19.01740074157715,
"learning_rate": 3.4045544995169125e-07,
"logits/chosen": 0.7111937999725342,
"logits/rejected": 0.6355310678482056,
"logps/chosen": -135.02879333496094,
"logps/ref_chosen": -55.430633544921875,
"logps/ref_rejected": -78.1390151977539,
"logps/rejected": -195.927978515625,
"loss": 1.0571,
"margin_dpo/margin_mean": 38.19078826904297,
"margin_dpo/margin_std": 56.477027893066406,
"step": 295
},
{
"epoch": 0.45351473922902497,
"fcm_dpo/beta": 0.015004401095211506,
"fcm_dpo/delta": 0.03381625562906265,
"fcm_dpo/margin": 34.913299560546875,
"fcm_dpo/q_t": 0.39039894938468933,
"grad_norm": 16.89678955078125,
"learning_rate": 3.3426136618426043e-07,
"logits/chosen": 0.6468077898025513,
"logits/rejected": 0.5938016176223755,
"logps/chosen": -149.64816284179688,
"logps/ref_chosen": -61.207069396972656,
"logps/ref_rejected": -75.23294067382812,
"logps/rejected": -198.58737182617188,
"loss": 1.0995,
"margin_dpo/margin_mean": 34.913299560546875,
"margin_dpo/margin_std": 57.363426208496094,
"step": 300
},
{
"epoch": 0.46107331821617537,
"fcm_dpo/beta": 0.015164652839303017,
"fcm_dpo/delta": -0.027595514431595802,
"fcm_dpo/margin": 34.962440490722656,
"fcm_dpo/q_t": 0.38818344473838806,
"grad_norm": 17.029956817626953,
"learning_rate": 3.280083614246217e-07,
"logits/chosen": 0.6154987812042236,
"logits/rejected": 0.578453540802002,
"logps/chosen": -147.16146850585938,
"logps/ref_chosen": -63.06663131713867,
"logps/ref_rejected": -78.45845031738281,
"logps/rejected": -197.51571655273438,
"loss": 1.0998,
"margin_dpo/margin_mean": 34.96243667602539,
"margin_dpo/margin_std": 57.51310348510742,
"step": 305
},
{
"epoch": 0.46863189720332576,
"fcm_dpo/beta": 0.014774846844375134,
"fcm_dpo/delta": -0.008402171544730663,
"fcm_dpo/margin": 27.70456886291504,
"fcm_dpo/q_t": 0.41280922293663025,
"grad_norm": 15.905935287475586,
"learning_rate": 3.2170080817777257e-07,
"logits/chosen": 0.6693840622901917,
"logits/rejected": 0.619363009929657,
"logps/chosen": -142.225341796875,
"logps/ref_chosen": -63.60908889770508,
"logps/ref_rejected": -74.06394958496094,
"logps/rejected": -180.38479614257812,
"loss": 1.1676,
"margin_dpo/margin_mean": 27.70456886291504,
"margin_dpo/margin_std": 53.79878616333008,
"step": 310
},
{
"epoch": 0.47619047619047616,
"fcm_dpo/beta": 0.014566788449883461,
"fcm_dpo/delta": -0.02135154977440834,
"fcm_dpo/margin": 35.78001403808594,
"fcm_dpo/q_t": 0.38799604773521423,
"grad_norm": 13.580395698547363,
"learning_rate": 3.1534311709253723e-07,
"logits/chosen": 0.6421744227409363,
"logits/rejected": 0.5867301225662231,
"logps/chosen": -130.10659790039062,
"logps/ref_chosen": -62.31493377685547,
"logps/ref_rejected": -75.07472229003906,
"logps/rejected": -178.64637756347656,
"loss": 1.0606,
"margin_dpo/margin_mean": 35.78001403808594,
"margin_dpo/margin_std": 51.48203659057617,
"step": 315
},
{
"epoch": 0.4837490551776266,
"fcm_dpo/beta": 0.013901944272220135,
"fcm_dpo/delta": -0.061062753200531006,
"fcm_dpo/margin": 40.63452911376953,
"fcm_dpo/q_t": 0.3800038993358612,
"grad_norm": 15.753009796142578,
"learning_rate": 3.0893973387735683e-07,
"logits/chosen": 0.587223470211029,
"logits/rejected": 0.524192214012146,
"logps/chosen": -118.84355163574219,
"logps/ref_chosen": -55.336036682128906,
"logps/ref_rejected": -80.05536651611328,
"logps/rejected": -184.19741821289062,
"loss": 1.0452,
"margin_dpo/margin_mean": 40.63452911376953,
"margin_dpo/margin_std": 56.74776077270508,
"step": 320
},
{
"epoch": 0.491307634164777,
"fcm_dpo/beta": 0.013363140635192394,
"fcm_dpo/delta": -0.023248041048645973,
"fcm_dpo/margin": 41.32105255126953,
"fcm_dpo/q_t": 0.3810966908931732,
"grad_norm": 14.63464069366455,
"learning_rate": 3.0249513619156206e-07,
"logits/chosen": 0.6304353475570679,
"logits/rejected": 0.5674418210983276,
"logps/chosen": -130.5197296142578,
"logps/ref_chosen": -57.90629959106445,
"logps/ref_rejected": -74.2243881225586,
"logps/rejected": -188.1588592529297,
"loss": 1.0513,
"margin_dpo/margin_mean": 41.32105255126953,
"margin_dpo/margin_std": 59.10980987548828,
"step": 325
},
{
"epoch": 0.4988662131519274,
"fcm_dpo/beta": 0.013443930074572563,
"fcm_dpo/delta": 0.021733686327934265,
"fcm_dpo/margin": 36.37574005126953,
"fcm_dpo/q_t": 0.39693742990493774,
"grad_norm": 15.07494831085205,
"learning_rate": 2.9601383051430505e-07,
"logits/chosen": 0.5850898623466492,
"logits/rejected": 0.5469895005226135,
"logps/chosen": -149.595458984375,
"logps/ref_chosen": -65.17555236816406,
"logps/ref_rejected": -78.53681182861328,
"logps/rejected": -199.3324432373047,
"loss": 1.1175,
"margin_dpo/margin_mean": 36.37574005126953,
"margin_dpo/margin_std": 62.23854446411133,
"step": 330
},
{
"epoch": 0.5064247921390779,
"fcm_dpo/beta": 0.01288739126175642,
"fcm_dpo/delta": -0.0680394172668457,
"fcm_dpo/margin": 46.44968795776367,
"fcm_dpo/q_t": 0.37343794107437134,
"grad_norm": 19.42026710510254,
"learning_rate": 2.895003489933375e-07,
"logits/chosen": 0.6479321122169495,
"logits/rejected": 0.600612998008728,
"logps/chosen": -147.13876342773438,
"logps/ref_chosen": -62.62797927856445,
"logps/ref_rejected": -79.9095458984375,
"logps/rejected": -210.8699951171875,
"loss": 1.0295,
"margin_dpo/margin_mean": 46.44968795776367,
"margin_dpo/margin_std": 64.50286865234375,
"step": 335
},
{
"epoch": 0.5139833711262283,
"fcm_dpo/beta": 0.012590940110385418,
"fcm_dpo/delta": 0.003505054162815213,
"fcm_dpo/margin": 38.454471588134766,
"fcm_dpo/q_t": 0.39839255809783936,
"grad_norm": 15.376999855041504,
"learning_rate": 2.8295924627584004e-07,
"logits/chosen": 0.6036761999130249,
"logits/rejected": 0.5653026103973389,
"logps/chosen": -155.42881774902344,
"logps/ref_chosen": -61.1064567565918,
"logps/ref_rejected": -76.71846008300781,
"logps/rejected": -209.4952850341797,
"loss": 1.1186,
"margin_dpo/margin_mean": 38.454471588134766,
"margin_dpo/margin_std": 66.48394012451172,
"step": 340
},
{
"epoch": 0.5215419501133787,
"fcm_dpo/beta": 0.012363018468022346,
"fcm_dpo/delta": -0.01725325547158718,
"fcm_dpo/margin": 44.01114273071289,
"fcm_dpo/q_t": 0.3849506974220276,
"grad_norm": 15.500265121459961,
"learning_rate": 2.7639509632351927e-07,
"logits/chosen": 0.6400725841522217,
"logits/rejected": 0.5962103009223938,
"logps/chosen": -147.3646240234375,
"logps/ref_chosen": -60.12370681762695,
"logps/ref_rejected": -78.58574676513672,
"logps/rejected": -209.83779907226562,
"loss": 1.0666,
"margin_dpo/margin_mean": 44.01114273071289,
"margin_dpo/margin_std": 65.67613220214844,
"step": 345
},
{
"epoch": 0.5291005291005291,
"fcm_dpo/beta": 0.012403665110468864,
"fcm_dpo/delta": -0.03641422092914581,
"fcm_dpo/margin": 49.11076354980469,
"fcm_dpo/q_t": 0.3730853796005249,
"grad_norm": 18.272642135620117,
"learning_rate": 2.698124892141971e-07,
"logits/chosen": 0.6958315372467041,
"logits/rejected": 0.6265360116958618,
"logps/chosen": -138.1358642578125,
"logps/ref_chosen": -55.104461669921875,
"logps/ref_rejected": -80.63292694091797,
"logps/rejected": -212.77511596679688,
"loss": 1.0253,
"margin_dpo/margin_mean": 49.11076354980469,
"margin_dpo/margin_std": 67.76351165771484,
"step": 350
},
{
"epoch": 0.5366591080876795,
"fcm_dpo/beta": 0.012127144262194633,
"fcm_dpo/delta": -0.010115819983184338,
"fcm_dpo/margin": 50.23556137084961,
"fcm_dpo/q_t": 0.37128472328186035,
"grad_norm": 17.643238067626953,
"learning_rate": 2.632160279321328e-07,
"logits/chosen": 0.6702345609664917,
"logits/rejected": 0.5912803411483765,
"logps/chosen": -134.65940856933594,
"logps/ref_chosen": -54.87224197387695,
"logps/ref_rejected": -77.01316833496094,
"logps/rejected": -207.03591918945312,
"loss": 1.0338,
"margin_dpo/margin_mean": 50.23556137084961,
"margin_dpo/margin_std": 71.0433578491211,
"step": 355
},
{
"epoch": 0.54421768707483,
"fcm_dpo/beta": 0.01166975311934948,
"fcm_dpo/delta": -0.01579994522035122,
"fcm_dpo/margin": 41.836509704589844,
"fcm_dpo/q_t": 0.3966527581214905,
"grad_norm": 15.070870399475098,
"learning_rate": 2.5661032514931834e-07,
"logits/chosen": 0.6302360892295837,
"logits/rejected": 0.5863192677497864,
"logps/chosen": -152.76345825195312,
"logps/ref_chosen": -60.75285720825195,
"logps/ref_rejected": -75.21507263183594,
"logps/rejected": -209.0622100830078,
"loss": 1.1188,
"margin_dpo/margin_mean": 41.83650588989258,
"margin_dpo/margin_std": 70.53252410888672,
"step": 360
},
{
"epoch": 0.5517762660619804,
"fcm_dpo/beta": 0.01143195666372776,
"fcm_dpo/delta": -0.052489638328552246,
"fcm_dpo/margin": 56.774986267089844,
"fcm_dpo/q_t": 0.3644375205039978,
"grad_norm": 12.386019706726074,
"learning_rate": 2.5e-07,
"logits/chosen": 0.6857043504714966,
"logits/rejected": 0.6063531637191772,
"logps/chosen": -149.87673950195312,
"logps/ref_chosen": -58.56513595581055,
"logps/ref_rejected": -84.06403350830078,
"logps/rejected": -232.150634765625,
"loss": 0.9963,
"margin_dpo/margin_mean": 56.774986267089844,
"margin_dpo/margin_std": 74.50080871582031,
"step": 365
},
{
"epoch": 0.5593348450491308,
"fcm_dpo/beta": 0.011315222829580307,
"fcm_dpo/delta": 0.02159869484603405,
"fcm_dpo/margin": 45.84794616699219,
"fcm_dpo/q_t": 0.3889442980289459,
"grad_norm": 15.18197250366211,
"learning_rate": 2.4338967485068164e-07,
"logits/chosen": 0.6318791508674622,
"logits/rejected": 0.5831542611122131,
"logps/chosen": -152.20651245117188,
"logps/ref_chosen": -59.443138122558594,
"logps/ref_rejected": -75.80937194824219,
"logps/rejected": -214.42068481445312,
"loss": 1.0915,
"margin_dpo/margin_mean": 45.84794616699219,
"margin_dpo/margin_std": 73.65330505371094,
"step": 370
},
{
"epoch": 0.5668934240362812,
"fcm_dpo/beta": 0.011248277500271797,
"fcm_dpo/delta": -0.027419675141572952,
"fcm_dpo/margin": 46.06201171875,
"fcm_dpo/q_t": 0.3906846046447754,
"grad_norm": 16.094900131225586,
"learning_rate": 2.3678397206786715e-07,
"logits/chosen": 0.6881932616233826,
"logits/rejected": 0.6348354816436768,
"logps/chosen": -155.08950805664062,
"logps/ref_chosen": -58.59185028076172,
"logps/ref_rejected": -73.7529525756836,
"logps/rejected": -216.31265258789062,
"loss": 1.1143,
"margin_dpo/margin_mean": 46.062007904052734,
"margin_dpo/margin_std": 78.36566162109375,
"step": 375
},
{
"epoch": 0.5744520030234316,
"fcm_dpo/beta": 0.010973427444696426,
"fcm_dpo/delta": -0.023532114923000336,
"fcm_dpo/margin": 51.64561080932617,
"fcm_dpo/q_t": 0.3803391456604004,
"grad_norm": 13.79555892944336,
"learning_rate": 2.3018751078580283e-07,
"logits/chosen": 0.7098981142044067,
"logits/rejected": 0.6563004851341248,
"logps/chosen": -153.05264282226562,
"logps/ref_chosen": -58.93424606323242,
"logps/ref_rejected": -76.27055358886719,
"logps/rejected": -222.03457641601562,
"loss": 1.0563,
"margin_dpo/margin_mean": 51.64561080932617,
"margin_dpo/margin_std": 76.09913635253906,
"step": 380
},
{
"epoch": 0.582010582010582,
"fcm_dpo/beta": 0.010893596336245537,
"fcm_dpo/delta": 0.006062631495296955,
"fcm_dpo/margin": 44.309181213378906,
"fcm_dpo/q_t": 0.39645999670028687,
"grad_norm": 13.824441909790039,
"learning_rate": 2.2360490367648084e-07,
"logits/chosen": 0.6328981518745422,
"logits/rejected": 0.606531023979187,
"logps/chosen": -165.55270385742188,
"logps/ref_chosen": -66.42684173583984,
"logps/ref_rejected": -76.96304321289062,
"logps/rejected": -220.3980712890625,
"loss": 1.1059,
"margin_dpo/margin_mean": 44.30918502807617,
"margin_dpo/margin_std": 72.50572204589844,
"step": 385
},
{
"epoch": 0.5895691609977324,
"fcm_dpo/beta": 0.011033998802304268,
"fcm_dpo/delta": 0.017967429012060165,
"fcm_dpo/margin": 46.52573013305664,
"fcm_dpo/q_t": 0.39048102498054504,
"grad_norm": 13.157139778137207,
"learning_rate": 2.170407537241599e-07,
"logits/chosen": 0.6664471626281738,
"logits/rejected": 0.6082719564437866,
"logps/chosen": -158.45370483398438,
"logps/ref_chosen": -60.984214782714844,
"logps/ref_rejected": -79.54056549072266,
"logps/rejected": -223.53579711914062,
"loss": 1.0801,
"margin_dpo/margin_mean": 46.52573013305664,
"margin_dpo/margin_std": 71.85535430908203,
"step": 390
},
{
"epoch": 0.5971277399848829,
"fcm_dpo/beta": 0.010882696136832237,
"fcm_dpo/delta": -0.01834883727133274,
"fcm_dpo/margin": 53.6688117980957,
"fcm_dpo/q_t": 0.37560978531837463,
"grad_norm": 17.336837768554688,
"learning_rate": 2.104996510066625e-07,
"logits/chosen": 0.6918023824691772,
"logits/rejected": 0.6310297846794128,
"logps/chosen": -150.5190887451172,
"logps/ref_chosen": -58.30937957763672,
"logps/ref_rejected": -80.09587097167969,
"logps/rejected": -225.9744110107422,
"loss": 1.0295,
"margin_dpo/margin_mean": 53.6688117980957,
"margin_dpo/margin_std": 73.11488342285156,
"step": 395
},
{
"epoch": 0.6046863189720333,
"fcm_dpo/beta": 0.010488608852028847,
"fcm_dpo/delta": -0.05746689438819885,
"fcm_dpo/margin": 49.87424850463867,
"fcm_dpo/q_t": 0.38892143964767456,
"grad_norm": 19.94718360900879,
"learning_rate": 2.0398616948569493e-07,
"logits/chosen": 0.6400595307350159,
"logits/rejected": 0.5548309087753296,
"logps/chosen": -163.4067840576172,
"logps/ref_chosen": -61.39867401123047,
"logps/ref_rejected": -89.0177993774414,
"logps/rejected": -240.90017700195312,
"loss": 1.0738,
"margin_dpo/margin_mean": 49.87424850463867,
"margin_dpo/margin_std": 74.35093688964844,
"step": 400
},
{
"epoch": 0.6046863189720333,
"eval_fcm_dpo/beta": 0.010275867767632008,
"eval_logits/chosen": 0.6246050596237183,
"eval_logits/rejected": 0.5774987936019897,
"eval_logps/chosen": -176.5742950439453,
"eval_logps/ref_chosen": -74.85946655273438,
"eval_logps/ref_rejected": -79.54898834228516,
"eval_logps/rejected": -226.17611694335938,
"eval_loss": 0.5613793134689331,
"eval_margin_dpo/margin_mean": 44.91230392456055,
"eval_margin_dpo/margin_std": 77.27910614013672,
"eval_runtime": 39.1906,
"eval_samples_per_second": 58.764,
"eval_steps_per_second": 1.837,
"step": 400
},
{
"epoch": 0.6122448979591837,
"fcm_dpo/beta": 0.010087807662785053,
"fcm_dpo/delta": -0.038665831089019775,
"fcm_dpo/margin": 55.62682342529297,
"fcm_dpo/q_t": 0.3804628252983093,
"grad_norm": 13.084096908569336,
"learning_rate": 1.975048638084379e-07,
"logits/chosen": 0.6958111524581909,
"logits/rejected": 0.6256667971611023,
"logps/chosen": -153.22628784179688,
"logps/ref_chosen": -55.953521728515625,
"logps/ref_rejected": -77.67539978027344,
"logps/rejected": -230.57498168945312,
"loss": 1.0364,
"margin_dpo/margin_mean": 55.62682342529297,
"margin_dpo/margin_std": 75.48118591308594,
"step": 405
},
{
"epoch": 0.6198034769463341,
"fcm_dpo/beta": 0.009773796424269676,
"fcm_dpo/delta": -0.03128813952207565,
"fcm_dpo/margin": 55.161216735839844,
"fcm_dpo/q_t": 0.38234299421310425,
"grad_norm": 12.041950225830078,
"learning_rate": 1.9106026612264315e-07,
"logits/chosen": 0.6618553996086121,
"logits/rejected": 0.5954070687294006,
"logps/chosen": -169.55258178710938,
"logps/ref_chosen": -63.40419387817383,
"logps/ref_rejected": -80.85710144042969,
"logps/rejected": -242.1666717529297,
"loss": 1.0439,
"margin_dpo/margin_mean": 55.161216735839844,
"margin_dpo/margin_std": 74.25384521484375,
"step": 410
},
{
"epoch": 0.6273620559334845,
"fcm_dpo/beta": 0.009798675775527954,
"fcm_dpo/delta": 0.039564795792102814,
"fcm_dpo/margin": 52.46839141845703,
"fcm_dpo/q_t": 0.38860344886779785,
"grad_norm": 13.28818416595459,
"learning_rate": 1.846568829074628e-07,
"logits/chosen": 0.6661458015441895,
"logits/rejected": 0.6173139214515686,
"logps/chosen": -174.634765625,
"logps/ref_chosen": -57.6942024230957,
"logps/ref_rejected": -71.74036407470703,
"logps/rejected": -241.14932250976562,
"loss": 1.0677,
"margin_dpo/margin_mean": 52.46839141845703,
"margin_dpo/margin_std": 77.23680114746094,
"step": 415
},
{
"epoch": 0.6349206349206349,
"fcm_dpo/beta": 0.009716962464153767,
"fcm_dpo/delta": -0.0343676395714283,
"fcm_dpo/margin": 47.64646911621094,
"fcm_dpo/q_t": 0.4010258615016937,
"grad_norm": 15.641878128051758,
"learning_rate": 1.782991918222275e-07,
"logits/chosen": 0.6854730248451233,
"logits/rejected": 0.6435775756835938,
"logps/chosen": -182.29537963867188,
"logps/ref_chosen": -59.169517517089844,
"logps/ref_rejected": -69.47721099853516,
"logps/rejected": -240.24954223632812,
"loss": 1.1303,
"margin_dpo/margin_mean": 47.64646911621094,
"margin_dpo/margin_std": 82.67323303222656,
"step": 420
},
{
"epoch": 0.6424792139077853,
"fcm_dpo/beta": 0.009624272584915161,
"fcm_dpo/delta": 0.003170407610014081,
"fcm_dpo/margin": 58.16600799560547,
"fcm_dpo/q_t": 0.3807678818702698,
"grad_norm": 14.270646095275879,
"learning_rate": 1.7199163857537824e-07,
"logits/chosen": 0.6730635166168213,
"logits/rejected": 0.6302940845489502,
"logps/chosen": -172.57290649414062,
"logps/ref_chosen": -58.09320831298828,
"logps/ref_rejected": -73.98226165771484,
"logps/rejected": -246.6279754638672,
"loss": 1.0478,
"margin_dpo/margin_mean": 58.16600799560547,
"margin_dpo/margin_std": 82.8730239868164,
"step": 425
},
{
"epoch": 0.6500377928949358,
"fcm_dpo/beta": 0.009675036184489727,
"fcm_dpo/delta": -0.009549234993755817,
"fcm_dpo/margin": 51.29081344604492,
"fcm_dpo/q_t": 0.39365965127944946,
"grad_norm": 14.406279563903809,
"learning_rate": 1.6573863381573954e-07,
"logits/chosen": 0.6383100748062134,
"logits/rejected": 0.6207207441329956,
"logps/chosen": -191.18653869628906,
"logps/ref_chosen": -62.7039909362793,
"logps/ref_rejected": -74.52284240722656,
"logps/rejected": -254.29623413085938,
"loss": 1.112,
"margin_dpo/margin_mean": 51.29081344604492,
"margin_dpo/margin_std": 86.48429870605469,
"step": 430
},
{
"epoch": 0.6575963718820862,
"fcm_dpo/beta": 0.009738308377563953,
"fcm_dpo/delta": 0.005447807256132364,
"fcm_dpo/margin": 59.08086013793945,
"fcm_dpo/q_t": 0.3763192296028137,
"grad_norm": 16.807050704956055,
"learning_rate": 1.5954455004830878e-07,
"logits/chosen": 0.7440453171730042,
"logits/rejected": 0.7008776664733887,
"logps/chosen": -169.50389099121094,
"logps/ref_chosen": -56.12516403198242,
"logps/ref_rejected": -74.36073303222656,
"logps/rejected": -246.8203125,
"loss": 1.0417,
"margin_dpo/margin_mean": 59.08086013793945,
"margin_dpo/margin_std": 83.0219955444336,
"step": 435
},
{
"epoch": 0.6651549508692366,
"fcm_dpo/beta": 0.009510443545877934,
"fcm_dpo/delta": -0.04184270650148392,
"fcm_dpo/margin": 58.687843322753906,
"fcm_dpo/q_t": 0.3826465308666229,
"grad_norm": 22.634798049926758,
"learning_rate": 1.534137185767178e-07,
"logits/chosen": 0.6856757998466492,
"logits/rejected": 0.6076371073722839,
"logps/chosen": -172.70729064941406,
"logps/ref_chosen": -55.67548751831055,
"logps/ref_rejected": -76.62055206298828,
"logps/rejected": -252.34017944335938,
"loss": 1.0688,
"margin_dpo/margin_mean": 58.687843322753906,
"margin_dpo/margin_std": 89.2109146118164,
"step": 440
},
{
"epoch": 0.672713529856387,
"fcm_dpo/beta": 0.008997871540486813,
"fcm_dpo/delta": -0.03983866050839424,
"fcm_dpo/margin": 64.14662170410156,
"fcm_dpo/q_t": 0.3751087486743927,
"grad_norm": 14.766826629638672,
"learning_rate": 1.473504264745062e-07,
"logits/chosen": 0.6673511266708374,
"logits/rejected": 0.6046266555786133,
"logps/chosen": -180.44454956054688,
"logps/ref_chosen": -59.903411865234375,
"logps/ref_rejected": -82.02873229980469,
"logps/rejected": -266.71649169921875,
"loss": 1.023,
"margin_dpo/margin_mean": 64.14662170410156,
"margin_dpo/margin_std": 84.01716613769531,
"step": 445
},
{
"epoch": 0.6802721088435374,
"fcm_dpo/beta": 0.008571968413889408,
"fcm_dpo/delta": -0.029057633131742477,
"fcm_dpo/margin": 60.95649337768555,
"fcm_dpo/q_t": 0.3861503601074219,
"grad_norm": 15.082186698913574,
"learning_rate": 1.4135891358732205e-07,
"logits/chosen": 0.670871376991272,
"logits/rejected": 0.6003819704055786,
"logps/chosen": -176.04681396484375,
"logps/ref_chosen": -55.83526611328125,
"logps/ref_rejected": -79.63658142089844,
"logps/rejected": -260.80462646484375,
"loss": 1.0575,
"margin_dpo/margin_mean": 60.95649337768555,
"margin_dpo/margin_std": 84.30404663085938,
"step": 450
},
{
"epoch": 0.6878306878306878,
"fcm_dpo/beta": 0.008977680467069149,
"fcm_dpo/delta": 0.06819285452365875,
"fcm_dpo/margin": 57.031578063964844,
"fcm_dpo/q_t": 0.39055323600769043,
"grad_norm": 14.447818756103516,
"learning_rate": 1.354433695681474e-07,
"logits/chosen": 0.6231824159622192,
"logits/rejected": 0.5777461528778076,
"logps/chosen": -178.04124450683594,
"logps/ref_chosen": -60.59226608276367,
"logps/ref_rejected": -73.37936401367188,
"logps/rejected": -247.8599395751953,
"loss": 1.0795,
"margin_dpo/margin_mean": 57.031578063964844,
"margin_dpo/margin_std": 88.28900146484375,
"step": 455
},
{
"epoch": 0.6953892668178382,
"fcm_dpo/beta": 0.009271183051168919,
"fcm_dpo/delta": -0.02087727189064026,
"fcm_dpo/margin": 65.31262969970703,
"fcm_dpo/q_t": 0.3707457184791565,
"grad_norm": 13.688095092773438,
"learning_rate": 1.2960793094762345e-07,
"logits/chosen": 0.6910965442657471,
"logits/rejected": 0.6071202158927917,
"logps/chosen": -175.61166381835938,
"logps/ref_chosen": -56.21283721923828,
"logps/ref_rejected": -83.02075958251953,
"logps/rejected": -267.73223876953125,
"loss": 1.0119,
"margin_dpo/margin_mean": 65.31262969970703,
"margin_dpo/margin_std": 85.2598648071289,
"step": 460
},
{
"epoch": 0.7029478458049887,
"fcm_dpo/beta": 0.008703077211976051,
"fcm_dpo/delta": -0.03589098900556564,
"fcm_dpo/margin": 65.30760192871094,
"fcm_dpo/q_t": 0.37661120295524597,
"grad_norm": 15.768505096435547,
"learning_rate": 1.238566782415197e-07,
"logits/chosen": 0.7279990315437317,
"logits/rejected": 0.6809935569763184,
"logps/chosen": -180.1580352783203,
"logps/ref_chosen": -59.0674934387207,
"logps/ref_rejected": -74.53498840332031,
"logps/rejected": -260.93310546875,
"loss": 1.0318,
"margin_dpo/margin_mean": 65.30760192871094,
"margin_dpo/margin_std": 86.46934509277344,
"step": 465
},
{
"epoch": 0.7105064247921391,
"fcm_dpo/beta": 0.008754456415772438,
"fcm_dpo/delta": 0.00041560232057236135,
"fcm_dpo/margin": 60.178565979003906,
"fcm_dpo/q_t": 0.3857038617134094,
"grad_norm": 13.696944236755371,
"learning_rate": 1.1819363309737438e-07,
"logits/chosen": 0.7032414674758911,
"logits/rejected": 0.6553713083267212,
"logps/chosen": -180.98046875,
"logps/ref_chosen": -58.3397331237793,
"logps/ref_rejected": -74.33660125732422,
"logps/rejected": -257.1558837890625,
"loss": 1.0557,
"margin_dpo/margin_mean": 60.178558349609375,
"margin_dpo/margin_std": 85.16178131103516,
"step": 470
},
{
"epoch": 0.7180650037792895,
"fcm_dpo/beta": 0.008638769388198853,
"fcm_dpo/delta": -0.022516410797834396,
"fcm_dpo/margin": 68.27113342285156,
"fcm_dpo/q_t": 0.3759641647338867,
"grad_norm": 10.956338882446289,
"learning_rate": 1.126227554822985e-07,
"logits/chosen": 0.7186409831047058,
"logits/rejected": 0.6497506499290466,
"logps/chosen": -168.41812133789062,
"logps/ref_chosen": -54.60407638549805,
"logps/ref_rejected": -79.94635009765625,
"logps/rejected": -262.03155517578125,
"loss": 1.0416,
"margin_dpo/margin_mean": 68.27113342285156,
"margin_dpo/margin_std": 97.92758178710938,
"step": 475
},
{
"epoch": 0.7256235827664399,
"fcm_dpo/beta": 0.00875013880431652,
"fcm_dpo/delta": 0.02565288171172142,
"fcm_dpo/margin": 54.354835510253906,
"fcm_dpo/q_t": 0.39852726459503174,
"grad_norm": 17.79652976989746,
"learning_rate": 1.0714794091391072e-07,
"logits/chosen": 0.6550045013427734,
"logits/rejected": 0.6366511583328247,
"logps/chosen": -192.35037231445312,
"logps/ref_chosen": -63.0672492980957,
"logps/ref_rejected": -68.59602355957031,
"logps/rejected": -252.2339324951172,
"loss": 1.1297,
"margin_dpo/margin_mean": 54.354835510253906,
"margin_dpo/margin_std": 96.1163101196289,
"step": 480
},
{
"epoch": 0.7331821617535903,
"fcm_dpo/beta": 0.008799830451607704,
"fcm_dpo/delta": 0.002256347332149744,
"fcm_dpo/margin": 59.921966552734375,
"fcm_dpo/q_t": 0.3874397277832031,
"grad_norm": 14.530784606933594,
"learning_rate": 1.0177301773633992e-07,
"logits/chosen": 0.7001765370368958,
"logits/rejected": 0.6357440948486328,
"logps/chosen": -185.18301391601562,
"logps/ref_chosen": -58.75799560546875,
"logps/ref_rejected": -79.72233581542969,
"logps/rejected": -266.0693054199219,
"loss": 1.0675,
"margin_dpo/margin_mean": 59.921974182128906,
"margin_dpo/margin_std": 89.0008316040039,
"step": 485
},
{
"epoch": 0.7407407407407407,
"fcm_dpo/beta": 0.00887683592736721,
"fcm_dpo/delta": 0.020326469093561172,
"fcm_dpo/margin": 53.544097900390625,
"fcm_dpo/q_t": 0.4002537131309509,
"grad_norm": 14.441047668457031,
"learning_rate": 9.650174444319956e-08,
"logits/chosen": 0.7496536374092102,
"logits/rejected": 0.6850180625915527,
"logps/chosen": -195.7850799560547,
"logps/ref_chosen": -61.394195556640625,
"logps/ref_rejected": -81.1914291381836,
"logps/rejected": -269.12640380859375,
"loss": 1.1317,
"margin_dpo/margin_mean": 53.544097900390625,
"margin_dpo/margin_std": 96.80680847167969,
"step": 490
},
{
"epoch": 0.7482993197278912,
"fcm_dpo/beta": 0.008899571374058723,
"fcm_dpo/delta": -0.002458928618580103,
"fcm_dpo/margin": 53.95166778564453,
"fcm_dpo/q_t": 0.39891940355300903,
"grad_norm": 18.731355667114258,
"learning_rate": 9.133780704940594e-08,
"logits/chosen": 0.6664924621582031,
"logits/rejected": 0.600567102432251,
"logps/chosen": -189.1266632080078,
"logps/ref_chosen": -59.85382843017578,
"logps/ref_rejected": -80.63748931884766,
"logps/rejected": -263.86199951171875,
"loss": 1.1266,
"margin_dpo/margin_mean": 53.95166778564453,
"margin_dpo/margin_std": 95.11207580566406,
"step": 495
},
{
"epoch": 0.7558578987150416,
"fcm_dpo/beta": 0.008871853351593018,
"fcm_dpo/delta": 0.011157815344631672,
"fcm_dpo/margin": 64.1400375366211,
"fcm_dpo/q_t": 0.3798808455467224,
"grad_norm": 18.928569793701172,
"learning_rate": 8.628481651367875e-08,
"logits/chosen": 0.6870494484901428,
"logits/rejected": 0.6374934911727905,
"logps/chosen": -194.50869750976562,
"logps/ref_chosen": -66.17753601074219,
"logps/ref_rejected": -83.75955200195312,
"logps/rejected": -276.23077392578125,
"loss": 1.0621,
"margin_dpo/margin_mean": 64.1400375366211,
"margin_dpo/margin_std": 97.7823257446289,
"step": 500
},
{
"epoch": 0.763416477702192,
"fcm_dpo/beta": 0.009083734825253487,
"fcm_dpo/delta": -0.0031442195177078247,
"fcm_dpo/margin": 59.1200065612793,
"fcm_dpo/q_t": 0.3837282359600067,
"grad_norm": 16.126550674438477,
"learning_rate": 8.134630621352483e-08,
"logits/chosen": 0.6756311655044556,
"logits/rejected": 0.655124843120575,
"logps/chosen": -185.13583374023438,
"logps/ref_chosen": -62.11005401611328,
"logps/ref_rejected": -74.64705657958984,
"logps/rejected": -256.7928466796875,
"loss": 1.0546,
"margin_dpo/margin_mean": 59.1200065612793,
"margin_dpo/margin_std": 83.1792984008789,
"step": 505
},
{
"epoch": 0.7709750566893424,
"fcm_dpo/beta": 0.008973537012934685,
"fcm_dpo/delta": -0.01379710715264082,
"fcm_dpo/margin": 61.247772216796875,
"fcm_dpo/q_t": 0.38576018810272217,
"grad_norm": 15.6004056930542,
"learning_rate": 7.652572947447272e-08,
"logits/chosen": 0.6822357177734375,
"logits/rejected": 0.6185404658317566,
"logps/chosen": -192.7943572998047,
"logps/ref_chosen": -64.42265319824219,
"logps/ref_rejected": -87.00096130371094,
"logps/rejected": -276.62042236328125,
"loss": 1.0879,
"margin_dpo/margin_mean": 61.247772216796875,
"margin_dpo/margin_std": 99.09281158447266,
"step": 510
},
{
"epoch": 0.7785336356764928,
"fcm_dpo/beta": 0.008877582848072052,
"fcm_dpo/delta": 0.016705047339200974,
"fcm_dpo/margin": 63.62353515625,
"fcm_dpo/q_t": 0.37913957238197327,
"grad_norm": 17.3339900970459,
"learning_rate": 7.182645715528435e-08,
"logits/chosen": 0.7022300958633423,
"logits/rejected": 0.6536498665809631,
"logps/chosen": -183.25079345703125,
"logps/ref_chosen": -58.284393310546875,
"logps/ref_rejected": -79.09356689453125,
"logps/rejected": -267.6835021972656,
"loss": 1.0458,
"margin_dpo/margin_mean": 63.62353515625,
"margin_dpo/margin_std": 90.51985168457031,
"step": 515
},
{
"epoch": 0.7860922146636432,
"fcm_dpo/beta": 0.009218396618962288,
"fcm_dpo/delta": 0.03311960771679878,
"fcm_dpo/margin": 47.081138610839844,
"fcm_dpo/q_t": 0.40798163414001465,
"grad_norm": 17.232772827148438,
"learning_rate": 6.725177529083209e-08,
"logits/chosen": 0.7172441482543945,
"logits/rejected": 0.6776331067085266,
"logps/chosen": -190.6273956298828,
"logps/ref_chosen": -61.03638458251953,
"logps/ref_rejected": -72.15824890136719,
"logps/rejected": -248.83041381835938,
"loss": 1.1669,
"margin_dpo/margin_mean": 47.081138610839844,
"margin_dpo/margin_std": 92.7890396118164,
"step": 520
},
{
"epoch": 0.7936507936507936,
"fcm_dpo/beta": 0.009218786843121052,
"fcm_dpo/delta": -0.009822583757340908,
"fcm_dpo/margin": 54.7793083190918,
"fcm_dpo/q_t": 0.39144212007522583,
"grad_norm": 19.94845199584961,
"learning_rate": 6.280488279429185e-08,
"logits/chosen": 0.6219805479049683,
"logits/rejected": 0.5724949240684509,
"logps/chosen": -194.5631103515625,
"logps/ref_chosen": -68.02732849121094,
"logps/ref_rejected": -85.41429901123047,
"logps/rejected": -266.7294006347656,
"loss": 1.1074,
"margin_dpo/margin_mean": 54.77930450439453,
"margin_dpo/margin_std": 92.09957122802734,
"step": 525
},
{
"epoch": 0.8012093726379441,
"fcm_dpo/beta": 0.009130226448178291,
"fcm_dpo/delta": -0.02196129783987999,
"fcm_dpo/margin": 50.85799026489258,
"fcm_dpo/q_t": 0.40008097887039185,
"grad_norm": 18.58346939086914,
"learning_rate": 5.848888922025552e-08,
"logits/chosen": 0.6770039796829224,
"logits/rejected": 0.6091686487197876,
"logps/chosen": -189.2750701904297,
"logps/ref_chosen": -58.67436599731445,
"logps/ref_rejected": -79.38807678222656,
"logps/rejected": -260.8467712402344,
"loss": 1.1307,
"margin_dpo/margin_mean": 50.857994079589844,
"margin_dpo/margin_std": 87.7162857055664,
"step": 530
},
{
"epoch": 0.8087679516250945,
"fcm_dpo/beta": 0.0091576362028718,
"fcm_dpo/delta": 0.029367243871092796,
"fcm_dpo/margin": 56.45143508911133,
"fcm_dpo/q_t": 0.39041271805763245,
"grad_norm": 18.556217193603516,
"learning_rate": 5.430681259032957e-08,
"logits/chosen": 0.6739664077758789,
"logits/rejected": 0.6132084727287292,
"logps/chosen": -183.68429565429688,
"logps/ref_chosen": -57.640098571777344,
"logps/ref_rejected": -77.25399780273438,
"logps/rejected": -259.7496643066406,
"loss": 1.0926,
"margin_dpo/margin_mean": 56.451438903808594,
"margin_dpo/margin_std": 91.31838989257812,
"step": 535
},
{
"epoch": 0.8163265306122449,
"fcm_dpo/beta": 0.009130528196692467,
"fcm_dpo/delta": -0.028515305370092392,
"fcm_dpo/margin": 64.78633117675781,
"fcm_dpo/q_t": 0.3742666244506836,
"grad_norm": 15.093501091003418,
"learning_rate": 5.026157728273966e-08,
"logits/chosen": 0.7153981924057007,
"logits/rejected": 0.6350281238555908,
"logps/chosen": -190.83726501464844,
"logps/ref_chosen": -60.17341995239258,
"logps/ref_rejected": -85.50316619873047,
"logps/rejected": -280.9533386230469,
"loss": 1.0162,
"margin_dpo/margin_mean": 64.78633117675781,
"margin_dpo/margin_std": 85.54998779296875,
"step": 540
},
{
"epoch": 0.8238851095993953,
"fcm_dpo/beta": 0.008908280171453953,
"fcm_dpo/delta": -0.029253508895635605,
"fcm_dpo/margin": 66.00330352783203,
"fcm_dpo/q_t": 0.37347084283828735,
"grad_norm": 16.25635528564453,
"learning_rate": 4.635601198741607e-08,
"logits/chosen": 0.7022188305854797,
"logits/rejected": 0.6508811116218567,
"logps/chosen": -179.33633422851562,
"logps/ref_chosen": -56.985809326171875,
"logps/ref_rejected": -73.21353912353516,
"logps/rejected": -261.56732177734375,
"loss": 1.029,
"margin_dpo/margin_mean": 66.00330352783203,
"margin_dpo/margin_std": 89.81163787841797,
"step": 545
},
{
"epoch": 0.8314436885865457,
"fcm_dpo/beta": 0.008911579847335815,
"fcm_dpo/delta": 0.03279992565512657,
"fcm_dpo/margin": 52.2221794128418,
"fcm_dpo/q_t": 0.3997356593608856,
"grad_norm": 16.76923179626465,
"learning_rate": 4.259284772799099e-08,
"logits/chosen": 0.6925860643386841,
"logits/rejected": 0.6450766324996948,
"logps/chosen": -189.15829467773438,
"logps/ref_chosen": -59.600929260253906,
"logps/ref_rejected": -75.24870300292969,
"logps/rejected": -257.02825927734375,
"loss": 1.1275,
"margin_dpo/margin_mean": 52.2221794128418,
"margin_dpo/margin_std": 90.92387390136719,
"step": 550
},
{
"epoch": 0.8390022675736961,
"fcm_dpo/beta": 0.009163258597254753,
"fcm_dpo/delta": 0.0038489706348627806,
"fcm_dpo/margin": 56.18461990356445,
"fcm_dpo/q_t": 0.38901767134666443,
"grad_norm": 16.93880271911621,
"learning_rate": 3.89747159520904e-08,
"logits/chosen": 0.6867181062698364,
"logits/rejected": 0.637639045715332,
"logps/chosen": -197.40147399902344,
"logps/ref_chosen": -63.578895568847656,
"logps/ref_rejected": -78.87867736816406,
"logps/rejected": -268.8858947753906,
"loss": 1.0913,
"margin_dpo/margin_mean": 56.18461990356445,
"margin_dpo/margin_std": 90.01570129394531,
"step": 555
},
{
"epoch": 0.8465608465608465,
"fcm_dpo/beta": 0.008957808837294579,
"fcm_dpo/delta": -0.0419851616024971,
"fcm_dpo/margin": 64.90444946289062,
"fcm_dpo/q_t": 0.3777204155921936,
"grad_norm": 16.868953704833984,
"learning_rate": 3.550414669125573e-08,
"logits/chosen": 0.7213538289070129,
"logits/rejected": 0.6617583632469177,
"logps/chosen": -192.423828125,
"logps/ref_chosen": -58.651512145996094,
"logps/ref_rejected": -78.67181396484375,
"logps/rejected": -277.34857177734375,
"loss": 1.0363,
"margin_dpo/margin_mean": 64.90444946289062,
"margin_dpo/margin_std": 90.19843292236328,
"step": 560
},
{
"epoch": 0.854119425547997,
"fcm_dpo/beta": 0.008735055103898048,
"fcm_dpo/delta": -0.009981656447052956,
"fcm_dpo/margin": 67.18910217285156,
"fcm_dpo/q_t": 0.3747270703315735,
"grad_norm": 14.736648559570312,
"learning_rate": 3.218356679178252e-08,
"logits/chosen": 0.7032049298286438,
"logits/rejected": 0.6600871682167053,
"logps/chosen": -187.43948364257812,
"logps/ref_chosen": -60.3114128112793,
"logps/ref_rejected": -78.25270080566406,
"logps/rejected": -272.56982421875,
"loss": 1.028,
"margin_dpo/margin_mean": 67.18910217285156,
"margin_dpo/margin_std": 91.42976379394531,
"step": 565
},
{
"epoch": 0.8616780045351474,
"fcm_dpo/beta": 0.008505801670253277,
"fcm_dpo/delta": -0.04424377158284187,
"fcm_dpo/margin": 65.48918914794922,
"fcm_dpo/q_t": 0.3805517554283142,
"grad_norm": 12.924057006835938,
"learning_rate": 2.9015298217712453e-08,
"logits/chosen": 0.6734435558319092,
"logits/rejected": 0.6112924814224243,
"logps/chosen": -184.1428680419922,
"logps/ref_chosen": -57.752410888671875,
"logps/ref_rejected": -76.99858093261719,
"logps/rejected": -268.8782043457031,
"loss": 1.049,
"margin_dpo/margin_mean": 65.48918914794922,
"margin_dpo/margin_std": 91.4268798828125,
"step": 570
},
{
"epoch": 0.8692365835222978,
"fcm_dpo/beta": 0.008431388065218925,
"fcm_dpo/delta": 0.01765955612063408,
"fcm_dpo/margin": 56.814170837402344,
"fcm_dpo/q_t": 0.39621537923812866,
"grad_norm": 14.525823593139648,
"learning_rate": 2.600155642716606e-08,
"logits/chosen": 0.701653242111206,
"logits/rejected": 0.66460120677948,
"logps/chosen": -196.06419372558594,
"logps/ref_chosen": -63.61958694458008,
"logps/ref_rejected": -79.51353454589844,
"logps/rejected": -268.7723388671875,
"loss": 1.101,
"margin_dpo/margin_mean": 56.814170837402344,
"margin_dpo/margin_std": 90.25949096679688,
"step": 575
},
{
"epoch": 0.8767951625094482,
"fcm_dpo/beta": 0.008416803553700447,
"fcm_dpo/delta": 0.0003098793386016041,
"fcm_dpo/margin": 62.08296585083008,
"fcm_dpo/q_t": 0.38741156458854675,
"grad_norm": 15.001502990722656,
"learning_rate": 2.3144448823151392e-08,
"logits/chosen": 0.6825278997421265,
"logits/rejected": 0.6386707425117493,
"logps/chosen": -182.51686096191406,
"logps/ref_chosen": -57.3541145324707,
"logps/ref_rejected": -73.14434051513672,
"logps/rejected": -260.3900451660156,
"loss": 1.0717,
"margin_dpo/margin_mean": 62.08296585083008,
"margin_dpo/margin_std": 92.49629211425781,
"step": 580
},
{
"epoch": 0.8843537414965986,
"fcm_dpo/beta": 0.008522504940629005,
"fcm_dpo/delta": 0.025987576693296432,
"fcm_dpo/margin": 57.36644744873047,
"fcm_dpo/q_t": 0.39568692445755005,
"grad_norm": 14.261137962341309,
"learning_rate": 2.044597327993153e-08,
"logits/chosen": 0.7333514094352722,
"logits/rejected": 0.6711838245391846,
"logps/chosen": -186.538330078125,
"logps/ref_chosen": -56.0127067565918,
"logps/ref_rejected": -77.16522216796875,
"logps/rejected": -265.05731201171875,
"loss": 1.1076,
"margin_dpo/margin_mean": 57.36644744873047,
"margin_dpo/margin_std": 95.25784301757812,
"step": 585
},
{
"epoch": 0.891912320483749,
"fcm_dpo/beta": 0.008789935149252415,
"fcm_dpo/delta": 0.02721945382654667,
"fcm_dpo/margin": 63.61796188354492,
"fcm_dpo/q_t": 0.3782525360584259,
"grad_norm": 15.488899230957031,
"learning_rate": 1.7908016745981856e-08,
"logits/chosen": 0.717170774936676,
"logits/rejected": 0.6842165589332581,
"logps/chosen": -190.9798126220703,
"logps/ref_chosen": -60.5894660949707,
"logps/ref_rejected": -74.34771728515625,
"logps/rejected": -268.35601806640625,
"loss": 1.0465,
"margin_dpo/margin_mean": 63.61796188354492,
"margin_dpo/margin_std": 90.69212341308594,
"step": 590
},
{
"epoch": 0.8994708994708994,
"fcm_dpo/beta": 0.008735476061701775,
"fcm_dpo/delta": -0.010337557643651962,
"fcm_dpo/margin": 67.07391357421875,
"fcm_dpo/q_t": 0.3761887550354004,
"grad_norm": 14.772221565246582,
"learning_rate": 1.553235392451377e-08,
"logits/chosen": 0.7367411255836487,
"logits/rejected": 0.6589676141738892,
"logps/chosen": -176.36756896972656,
"logps/ref_chosen": -54.77838897705078,
"logps/ref_rejected": -78.102783203125,
"logps/rejected": -266.765869140625,
"loss": 1.0433,
"margin_dpo/margin_mean": 67.07390594482422,
"margin_dpo/margin_std": 95.8719482421875,
"step": 595
},
{
"epoch": 0.9070294784580499,
"fcm_dpo/beta": 0.008924348279833794,
"fcm_dpo/delta": 0.013378431089222431,
"fcm_dpo/margin": 45.238609313964844,
"fcm_dpo/q_t": 0.41461238265037537,
"grad_norm": 18.25551986694336,
"learning_rate": 1.3320646032487393e-08,
"logits/chosen": 0.7176660299301147,
"logits/rejected": 0.6818990111351013,
"logps/chosen": -193.6077880859375,
"logps/ref_chosen": -58.45500564575195,
"logps/ref_rejected": -70.7367172241211,
"logps/rejected": -251.1281280517578,
"loss": 1.1653,
"margin_dpo/margin_mean": 45.238609313964844,
"margin_dpo/margin_std": 87.94874572753906,
"step": 600
},
{
"epoch": 0.9070294784580499,
"eval_fcm_dpo/beta": 0.008908301591873169,
"eval_logits/chosen": 0.7206099629402161,
"eval_logits/rejected": 0.6751406192779541,
"eval_logps/chosen": -205.21878051757812,
"eval_logps/ref_chosen": -74.85946655273438,
"eval_logps/ref_rejected": -79.54898834228516,
"eval_logps/rejected": -265.1909484863281,
"eval_loss": 0.5591413378715515,
"eval_margin_dpo/margin_mean": 55.28264236450195,
"eval_margin_dpo/margin_std": 95.356689453125,
"eval_runtime": 39.0896,
"eval_samples_per_second": 58.916,
"eval_steps_per_second": 1.842,
"step": 600
},
{
"epoch": 0.9145880574452003,
"fcm_dpo/beta": 0.00893603079020977,
"fcm_dpo/delta": 0.01746518909931183,
"fcm_dpo/margin": 63.60172653198242,
"fcm_dpo/q_t": 0.37951910495758057,
"grad_norm": 16.666015625,
"learning_rate": 1.1274439638981532e-08,
"logits/chosen": 0.7014644742012024,
"logits/rejected": 0.6582333445549011,
"logps/chosen": -179.68258666992188,
"logps/ref_chosen": -59.87483596801758,
"logps/ref_rejected": -75.75318908691406,
"logps/rejected": -259.16265869140625,
"loss": 1.0544,
"margin_dpo/margin_mean": 63.60172653198242,
"margin_dpo/margin_std": 93.95574951171875,
"step": 605
},
{
"epoch": 0.9221466364323507,
"fcm_dpo/beta": 0.008915998041629791,
"fcm_dpo/delta": -0.01860709860920906,
"fcm_dpo/margin": 66.0380859375,
"fcm_dpo/q_t": 0.37500935792922974,
"grad_norm": 14.762502670288086,
"learning_rate": 9.395165583732379e-09,
"logits/chosen": 0.706168532371521,
"logits/rejected": 0.6614619493484497,
"logps/chosen": -186.66229248046875,
"logps/ref_chosen": -60.35883712768555,
"logps/ref_rejected": -81.3543930053711,
"logps/rejected": -273.6959228515625,
"loss": 1.0278,
"margin_dpo/margin_mean": 66.03807830810547,
"margin_dpo/margin_std": 89.66718292236328,
"step": 610
},
{
"epoch": 0.9297052154195011,
"fcm_dpo/beta": 0.008840186521410942,
"fcm_dpo/delta": -0.026694372296333313,
"fcm_dpo/margin": 66.6244125366211,
"fcm_dpo/q_t": 0.3726271688938141,
"grad_norm": 15.342317581176758,
"learning_rate": 7.684137976598088e-09,
"logits/chosen": 0.6743055582046509,
"logits/rejected": 0.6356549859046936,
"logps/chosen": -185.5474395751953,
"logps/ref_chosen": -59.17219161987305,
"logps/ref_rejected": -79.92167663574219,
"logps/rejected": -272.9213562011719,
"loss": 1.0255,
"margin_dpo/margin_mean": 66.62440490722656,
"margin_dpo/margin_std": 89.74223327636719,
"step": 615
},
{
"epoch": 0.9372637944066515,
"fcm_dpo/beta": 0.00864451751112938,
"fcm_dpo/delta": -0.011452676728367805,
"fcm_dpo/margin": 58.722190856933594,
"fcm_dpo/q_t": 0.39084118604660034,
"grad_norm": 17.209096908569336,
"learning_rate": 6.142553278648238e-09,
"logits/chosen": 0.7176477313041687,
"logits/rejected": 0.6551352739334106,
"logps/chosen": -185.00552368164062,
"logps/ref_chosen": -58.052696228027344,
"logps/ref_rejected": -78.37252807617188,
"logps/rejected": -264.04754638671875,
"loss": 1.0716,
"margin_dpo/margin_mean": 58.722190856933594,
"margin_dpo/margin_std": 86.17276763916016,
"step": 620
},
{
"epoch": 0.9448223733938019,
"fcm_dpo/beta": 0.008548585698008537,
"fcm_dpo/delta": -0.0001246035099029541,
"fcm_dpo/margin": 60.6451530456543,
"fcm_dpo/q_t": 0.3894796669483185,
"grad_norm": 14.61771011352539,
"learning_rate": 4.7714894655209174e-09,
"logits/chosen": 0.708177924156189,
"logits/rejected": 0.6277607679367065,
"logps/chosen": -186.0558624267578,
"logps/ref_chosen": -56.957862854003906,
"logps/ref_rejected": -82.68255615234375,
"logps/rejected": -272.4256896972656,
"loss": 1.0748,
"margin_dpo/margin_mean": 60.6451530456543,
"margin_dpo/margin_std": 92.42916870117188,
"step": 625
},
{
"epoch": 0.9523809523809523,
"fcm_dpo/beta": 0.008414940908551216,
"fcm_dpo/delta": -0.02683025598526001,
"fcm_dpo/margin": 70.0490951538086,
"fcm_dpo/q_t": 0.3737347424030304,
"grad_norm": 11.857268333435059,
"learning_rate": 3.5719052736323806e-09,
"logits/chosen": 0.7122366428375244,
"logits/rejected": 0.6418401598930359,
"logps/chosen": -179.7769012451172,
"logps/ref_chosen": -56.71510696411133,
"logps/ref_rejected": -82.94544219970703,
"logps/rejected": -276.05633544921875,
"loss": 1.0285,
"margin_dpo/margin_mean": 70.0490951538086,
"margin_dpo/margin_std": 94.82304382324219,
"step": 630
},
{
"epoch": 0.9599395313681028,
"fcm_dpo/beta": 0.008062823675572872,
"fcm_dpo/delta": -0.03275930508971214,
"fcm_dpo/margin": 59.115440368652344,
"fcm_dpo/q_t": 0.3975747227668762,
"grad_norm": 13.964070320129395,
"learning_rate": 2.5446395297668287e-09,
"logits/chosen": 0.6974132061004639,
"logits/rejected": 0.6502237319946289,
"logps/chosen": -186.69309997558594,
"logps/ref_chosen": -59.33793258666992,
"logps/ref_rejected": -75.01703643798828,
"logps/rejected": -261.4876403808594,
"loss": 1.1043,
"margin_dpo/margin_mean": 59.115440368652344,
"margin_dpo/margin_std": 93.75061798095703,
"step": 635
},
{
"epoch": 0.9674981103552532,
"fcm_dpo/beta": 0.008327251300215721,
"fcm_dpo/delta": 0.06021968647837639,
"fcm_dpo/margin": 62.01350784301758,
"fcm_dpo/q_t": 0.38918977975845337,
"grad_norm": 14.674144744873047,
"learning_rate": 1.690410564514244e-09,
"logits/chosen": 0.6967864036560059,
"logits/rejected": 0.6229372024536133,
"logps/chosen": -191.25723266601562,
"logps/ref_chosen": -58.1605339050293,
"logps/ref_rejected": -79.85365295410156,
"logps/rejected": -274.9638671875,
"loss": 1.0702,
"margin_dpo/margin_mean": 62.01350784301758,
"margin_dpo/margin_std": 92.87417602539062,
"step": 640
},
{
"epoch": 0.9750566893424036,
"fcm_dpo/beta": 0.008840223774313927,
"fcm_dpo/delta": 0.05628042295575142,
"fcm_dpo/margin": 54.37908935546875,
"fcm_dpo/q_t": 0.3959693908691406,
"grad_norm": 16.510818481445312,
"learning_rate": 1.0098157099674987e-09,
"logits/chosen": 0.6872873306274414,
"logits/rejected": 0.6607547998428345,
"logps/chosen": -193.90921020507812,
"logps/ref_chosen": -63.45180130004883,
"logps/ref_rejected": -74.18285369873047,
"logps/rejected": -259.01934814453125,
"loss": 1.1021,
"margin_dpo/margin_mean": 54.37909698486328,
"margin_dpo/margin_std": 87.93758392333984,
"step": 645
},
{
"epoch": 0.982615268329554,
"fcm_dpo/beta": 0.009017017669975758,
"fcm_dpo/delta": -0.01029270887374878,
"fcm_dpo/margin": 63.77238845825195,
"fcm_dpo/q_t": 0.3787182569503784,
"grad_norm": 15.225814819335938,
"learning_rate": 5.033308820289184e-10,
"logits/chosen": 0.7203370332717896,
"logits/rejected": 0.6512852311134338,
"logps/chosen": -195.11532592773438,
"logps/ref_chosen": -59.75496292114258,
"logps/ref_rejected": -84.31481170654297,
"logps/rejected": -283.4475402832031,
"loss": 1.0611,
"margin_dpo/margin_mean": 63.77238082885742,
"margin_dpo/margin_std": 96.05448913574219,
"step": 650
},
{
"epoch": 0.9901738473167044,
"fcm_dpo/beta": 0.008963796310126781,
"fcm_dpo/delta": -0.022582078352570534,
"fcm_dpo/margin": 60.55501174926758,
"fcm_dpo/q_t": 0.3851621747016907,
"grad_norm": 13.021146774291992,
"learning_rate": 1.7131024761923852e-10,
"logits/chosen": 0.7035177946090698,
"logits/rejected": 0.6318515539169312,
"logps/chosen": -185.91603088378906,
"logps/ref_chosen": -57.817848205566406,
"logps/ref_rejected": -79.81755065917969,
"logps/rejected": -268.4707336425781,
"loss": 1.0654,
"margin_dpo/margin_mean": 60.55500411987305,
"margin_dpo/margin_std": 88.59367370605469,
"step": 655
},
{
"epoch": 0.9977324263038548,
"fcm_dpo/beta": 0.008747505024075508,
"fcm_dpo/delta": -0.01453787088394165,
"fcm_dpo/margin": 64.36149597167969,
"fcm_dpo/q_t": 0.3801228702068329,
"grad_norm": 15.389237403869629,
"learning_rate": 1.3985977021235829e-11,
"logits/chosen": 0.7586897611618042,
"logits/rejected": 0.6943166255950928,
"logps/chosen": -190.80844116210938,
"logps/ref_chosen": -59.12651443481445,
"logps/ref_rejected": -79.42085266113281,
"logps/rejected": -275.46429443359375,
"loss": 1.0594,
"margin_dpo/margin_mean": 64.36149597167969,
"margin_dpo/margin_std": 96.14913940429688,
"step": 660
},
{
"epoch": 0.999244142101285,
"step": 661,
"total_flos": 0.0,
"train_loss": 1.120150500454809,
"train_runtime": 1811.4729,
"train_samples_per_second": 23.371,
"train_steps_per_second": 0.365
}
],
"logging_steps": 5,
"max_steps": 661,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}