Model: jackf857/llama-3-8b-base-new-dpo-hh-helpful-s_star0.6-4xh200-batch-64-20260421-214335-rerun Source: Original Platform
2707 lines
99 KiB
JSON
2707 lines
99 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 1.0,
|
|
"eval_steps": 200,
|
|
"global_step": 681,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0014684287812041115,
|
|
"fcm_dpo/beta": 0.10000000149011612,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.02287006378173828,
|
|
"fcm_dpo/q_t": 0.5005706548690796,
|
|
"grad_norm": 83.50728607177734,
|
|
"learning_rate": 0.0,
|
|
"logits/chosen": -0.4974287748336792,
|
|
"logits/rejected": -0.43299180269241333,
|
|
"logps/chosen": -50.1435661315918,
|
|
"logps/ref_chosen": -50.14883804321289,
|
|
"logps/ref_rejected": -74.1280517578125,
|
|
"logps/rejected": -74.09991455078125,
|
|
"loss": 1.389,
|
|
"margin_dpo/margin_mean": -0.02287048101425171,
|
|
"margin_dpo/margin_std": 0.41920793056488037,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.007342143906020558,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.03240281343460083,
|
|
"fcm_dpo/q_t": 0.5008102059364319,
|
|
"grad_norm": 90.14773559570312,
|
|
"learning_rate": 2.898550724637681e-08,
|
|
"logits/chosen": -0.4901035726070404,
|
|
"logits/rejected": -0.4534408450126648,
|
|
"logps/chosen": -56.07246017456055,
|
|
"logps/ref_chosen": -56.05734634399414,
|
|
"logps/ref_rejected": -78.69325256347656,
|
|
"logps/rejected": -78.67597198486328,
|
|
"loss": 1.3899,
|
|
"margin_dpo/margin_mean": -0.03240284323692322,
|
|
"margin_dpo/margin_std": 0.3555586636066437,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.014684287812041116,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.027925759553909302,
|
|
"fcm_dpo/q_t": 0.4993022382259369,
|
|
"grad_norm": 70.48045349121094,
|
|
"learning_rate": 6.521739130434782e-08,
|
|
"logits/chosen": -0.5015245079994202,
|
|
"logits/rejected": -0.4629823565483093,
|
|
"logps/chosen": -59.527122497558594,
|
|
"logps/ref_chosen": -59.54457473754883,
|
|
"logps/ref_rejected": -91.17041778564453,
|
|
"logps/rejected": -91.18089294433594,
|
|
"loss": 1.3839,
|
|
"margin_dpo/margin_mean": 0.027925794944167137,
|
|
"margin_dpo/margin_std": 0.37033817172050476,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.022026431718061675,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.005324178840965033,
|
|
"fcm_dpo/q_t": 0.49986687302589417,
|
|
"grad_norm": 64.33786010742188,
|
|
"learning_rate": 1.0144927536231885e-07,
|
|
"logits/chosen": -0.4974799156188965,
|
|
"logits/rejected": -0.46847113966941833,
|
|
"logps/chosen": -58.83959197998047,
|
|
"logps/ref_chosen": -58.83195877075195,
|
|
"logps/ref_rejected": -92.93949890136719,
|
|
"logps/rejected": -92.95245361328125,
|
|
"loss": 1.3861,
|
|
"margin_dpo/margin_mean": 0.005324071738868952,
|
|
"margin_dpo/margin_std": 0.36571556329727173,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.02936857562408223,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.05579507350921631,
|
|
"fcm_dpo/q_t": 0.49860554933547974,
|
|
"grad_norm": 73.8245620727539,
|
|
"learning_rate": 1.3768115942028986e-07,
|
|
"logits/chosen": -0.5040138959884644,
|
|
"logits/rejected": -0.45514219999313354,
|
|
"logps/chosen": -59.63999557495117,
|
|
"logps/ref_chosen": -59.6396598815918,
|
|
"logps/ref_rejected": -82.76026916503906,
|
|
"logps/rejected": -82.81639862060547,
|
|
"loss": 1.381,
|
|
"margin_dpo/margin_mean": 0.055795006453990936,
|
|
"margin_dpo/margin_std": 0.33391329646110535,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.03671071953010279,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.20841345191001892,
|
|
"fcm_dpo/q_t": 0.49479326605796814,
|
|
"grad_norm": 73.5445785522461,
|
|
"learning_rate": 1.7391304347826085e-07,
|
|
"logits/chosen": -0.5032899975776672,
|
|
"logits/rejected": -0.4763486981391907,
|
|
"logps/chosen": -53.173057556152344,
|
|
"logps/ref_chosen": -53.205284118652344,
|
|
"logps/ref_rejected": -88.99608612060547,
|
|
"logps/rejected": -89.17227172851562,
|
|
"loss": 1.366,
|
|
"margin_dpo/margin_mean": 0.20841336250305176,
|
|
"margin_dpo/margin_std": 0.4185457229614258,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.04405286343612335,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.4891234338283539,
|
|
"fcm_dpo/q_t": 0.48778820037841797,
|
|
"grad_norm": 87.73991394042969,
|
|
"learning_rate": 2.1014492753623187e-07,
|
|
"logits/chosen": -0.5239602327346802,
|
|
"logits/rejected": -0.48419055342674255,
|
|
"logps/chosen": -53.45922088623047,
|
|
"logps/ref_chosen": -53.5526008605957,
|
|
"logps/ref_rejected": -97.87371826171875,
|
|
"logps/rejected": -98.26947021484375,
|
|
"loss": 1.3389,
|
|
"margin_dpo/margin_mean": 0.4891238212585449,
|
|
"margin_dpo/margin_std": 0.5750466585159302,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.0513950073421439,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.7763983607292175,
|
|
"fcm_dpo/q_t": 0.48066458106040955,
|
|
"grad_norm": 82.94285583496094,
|
|
"learning_rate": 2.463768115942029e-07,
|
|
"logits/chosen": -0.4989829957485199,
|
|
"logits/rejected": -0.4650956094264984,
|
|
"logps/chosen": -56.198211669921875,
|
|
"logps/ref_chosen": -56.3298454284668,
|
|
"logps/ref_rejected": -91.76858520507812,
|
|
"logps/rejected": -92.41334533691406,
|
|
"loss": 1.3122,
|
|
"margin_dpo/margin_mean": 0.776398241519928,
|
|
"margin_dpo/margin_std": 0.8276771306991577,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.05873715124816446,
|
|
"fcm_dpo/beta": 0.10000000894069672,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.359745979309082,
|
|
"fcm_dpo/q_t": 0.46632710099220276,
|
|
"grad_norm": 60.41274642944336,
|
|
"learning_rate": 2.8260869565217386e-07,
|
|
"logits/chosen": -0.5347701907157898,
|
|
"logits/rejected": -0.4986083507537842,
|
|
"logps/chosen": -54.27339553833008,
|
|
"logps/ref_chosen": -54.38492965698242,
|
|
"logps/ref_rejected": -82.94353485107422,
|
|
"logps/rejected": -84.19175720214844,
|
|
"loss": 1.2606,
|
|
"margin_dpo/margin_mean": 1.359745979309082,
|
|
"margin_dpo/margin_std": 1.4517606496810913,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.06607929515418502,
|
|
"fcm_dpo/beta": 0.1127050369977951,
|
|
"fcm_dpo/delta": 0.25362294912338257,
|
|
"fcm_dpo/margin": 2.3980860710144043,
|
|
"fcm_dpo/q_t": 0.4357197880744934,
|
|
"grad_norm": 74.45612335205078,
|
|
"learning_rate": 3.188405797101449e-07,
|
|
"logits/chosen": -0.5095103979110718,
|
|
"logits/rejected": -0.48132508993148804,
|
|
"logps/chosen": -54.6392822265625,
|
|
"logps/ref_chosen": -54.862335205078125,
|
|
"logps/ref_rejected": -98.0264663696289,
|
|
"logps/rejected": -100.20148468017578,
|
|
"loss": 1.1498,
|
|
"margin_dpo/margin_mean": 2.398085832595825,
|
|
"margin_dpo/margin_std": 2.2269370555877686,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.07342143906020558,
|
|
"fcm_dpo/beta": 0.141450434923172,
|
|
"fcm_dpo/delta": 0.12390259653329849,
|
|
"fcm_dpo/margin": 3.386523485183716,
|
|
"fcm_dpo/q_t": 0.3908053934574127,
|
|
"grad_norm": 79.67459869384766,
|
|
"learning_rate": 3.5507246376811595e-07,
|
|
"logits/chosen": -0.5551148653030396,
|
|
"logits/rejected": -0.5035051107406616,
|
|
"logps/chosen": -58.14664840698242,
|
|
"logps/ref_chosen": -58.304595947265625,
|
|
"logps/ref_rejected": -91.69480895996094,
|
|
"logps/rejected": -94.92338562011719,
|
|
"loss": 1.0187,
|
|
"margin_dpo/margin_mean": 3.386524200439453,
|
|
"margin_dpo/margin_std": 3.3679816722869873,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.08076358296622614,
|
|
"fcm_dpo/beta": 0.135177880525589,
|
|
"fcm_dpo/delta": -0.16671812534332275,
|
|
"fcm_dpo/margin": 5.600610256195068,
|
|
"fcm_dpo/q_t": 0.3425524830818176,
|
|
"grad_norm": 62.811153411865234,
|
|
"learning_rate": 3.9130434782608694e-07,
|
|
"logits/chosen": -0.6010715961456299,
|
|
"logits/rejected": -0.5568638443946838,
|
|
"logps/chosen": -56.37145233154297,
|
|
"logps/ref_chosen": -56.06591796875,
|
|
"logps/ref_rejected": -85.69367980957031,
|
|
"logps/rejected": -91.59982299804688,
|
|
"loss": 0.8962,
|
|
"margin_dpo/margin_mean": 5.600610256195068,
|
|
"margin_dpo/margin_std": 5.793082237243652,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.0881057268722467,
|
|
"fcm_dpo/beta": 0.11089271306991577,
|
|
"fcm_dpo/delta": -0.1925317347049713,
|
|
"fcm_dpo/margin": 7.025670528411865,
|
|
"fcm_dpo/q_t": 0.33725228905677795,
|
|
"grad_norm": 67.2679214477539,
|
|
"learning_rate": 4.2753623188405794e-07,
|
|
"logits/chosen": -0.6061812043190002,
|
|
"logits/rejected": -0.5570945739746094,
|
|
"logps/chosen": -61.9241828918457,
|
|
"logps/ref_chosen": -60.6871337890625,
|
|
"logps/ref_rejected": -89.72715759277344,
|
|
"logps/rejected": -97.98988342285156,
|
|
"loss": 0.8969,
|
|
"margin_dpo/margin_mean": 7.025670528411865,
|
|
"margin_dpo/margin_std": 7.496710777282715,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.09544787077826726,
|
|
"fcm_dpo/beta": 0.09299755096435547,
|
|
"fcm_dpo/delta": -0.18385855853557587,
|
|
"fcm_dpo/margin": 8.298527717590332,
|
|
"fcm_dpo/q_t": 0.3424831032752991,
|
|
"grad_norm": 48.89730453491211,
|
|
"learning_rate": 4.63768115942029e-07,
|
|
"logits/chosen": -0.6179511547088623,
|
|
"logits/rejected": -0.5864478945732117,
|
|
"logps/chosen": -63.573402404785156,
|
|
"logps/ref_chosen": -61.75325393676758,
|
|
"logps/ref_rejected": -93.30108642578125,
|
|
"logps/rejected": -103.41975402832031,
|
|
"loss": 0.923,
|
|
"margin_dpo/margin_mean": 8.298527717590332,
|
|
"margin_dpo/margin_std": 9.724918365478516,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.1027900146842878,
|
|
"fcm_dpo/beta": 0.07844052463769913,
|
|
"fcm_dpo/delta": -0.1582036018371582,
|
|
"fcm_dpo/margin": 9.539754867553711,
|
|
"fcm_dpo/q_t": 0.3441976308822632,
|
|
"grad_norm": 47.65688705444336,
|
|
"learning_rate": 5e-07,
|
|
"logits/chosen": -0.6304086446762085,
|
|
"logits/rejected": -0.5917232632637024,
|
|
"logps/chosen": -62.56956100463867,
|
|
"logps/ref_chosen": -59.548004150390625,
|
|
"logps/ref_rejected": -84.01609802246094,
|
|
"logps/rejected": -96.57740783691406,
|
|
"loss": 0.9041,
|
|
"margin_dpo/margin_mean": 9.539755821228027,
|
|
"margin_dpo/margin_std": 10.295551300048828,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.11013215859030837,
|
|
"fcm_dpo/beta": 0.06165589019656181,
|
|
"fcm_dpo/delta": -0.32924890518188477,
|
|
"fcm_dpo/margin": 14.760737419128418,
|
|
"fcm_dpo/q_t": 0.3237493336200714,
|
|
"grad_norm": 36.49312973022461,
|
|
"learning_rate": 4.999176576834721e-07,
|
|
"logits/chosen": -0.6605738997459412,
|
|
"logits/rejected": -0.6328510642051697,
|
|
"logps/chosen": -65.28561401367188,
|
|
"logps/ref_chosen": -59.86931228637695,
|
|
"logps/ref_rejected": -98.05613708496094,
|
|
"logps/rejected": -118.2331771850586,
|
|
"loss": 0.873,
|
|
"margin_dpo/margin_mean": 14.760736465454102,
|
|
"margin_dpo/margin_std": 17.107942581176758,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.11747430249632893,
|
|
"fcm_dpo/beta": 0.04629804939031601,
|
|
"fcm_dpo/delta": -0.19796454906463623,
|
|
"fcm_dpo/margin": 16.89699935913086,
|
|
"fcm_dpo/q_t": 0.341538667678833,
|
|
"grad_norm": 35.74776077270508,
|
|
"learning_rate": 4.996706849759452e-07,
|
|
"logits/chosen": -0.6835442781448364,
|
|
"logits/rejected": -0.6468649506568909,
|
|
"logps/chosen": -63.93366622924805,
|
|
"logps/ref_chosen": -56.18925857543945,
|
|
"logps/ref_rejected": -86.42393493652344,
|
|
"logps/rejected": -111.06534576416016,
|
|
"loss": 0.9203,
|
|
"margin_dpo/margin_mean": 16.896997451782227,
|
|
"margin_dpo/margin_std": 19.718297958374023,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.12481644640234948,
|
|
"fcm_dpo/beta": 0.0381317213177681,
|
|
"fcm_dpo/delta": -0.16901178658008575,
|
|
"fcm_dpo/margin": 19.726295471191406,
|
|
"fcm_dpo/q_t": 0.34707337617874146,
|
|
"grad_norm": 34.31068420410156,
|
|
"learning_rate": 4.992592445678582e-07,
|
|
"logits/chosen": -0.6622103452682495,
|
|
"logits/rejected": -0.6311969757080078,
|
|
"logps/chosen": -70.46139526367188,
|
|
"logps/ref_chosen": -60.018287658691406,
|
|
"logps/ref_rejected": -98.01185607910156,
|
|
"logps/rejected": -128.18124389648438,
|
|
"loss": 0.9443,
|
|
"margin_dpo/margin_mean": 19.726295471191406,
|
|
"margin_dpo/margin_std": 24.040042877197266,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.13215859030837004,
|
|
"fcm_dpo/beta": 0.03405915945768356,
|
|
"fcm_dpo/delta": -0.11596596240997314,
|
|
"fcm_dpo/margin": 20.768291473388672,
|
|
"fcm_dpo/q_t": 0.3624621331691742,
|
|
"grad_norm": 35.00300216674805,
|
|
"learning_rate": 4.986836074908615e-07,
|
|
"logits/chosen": -0.7018736600875854,
|
|
"logits/rejected": -0.6867517232894897,
|
|
"logps/chosen": -73.39559173583984,
|
|
"logps/ref_chosen": -59.8709831237793,
|
|
"logps/ref_rejected": -96.78519439697266,
|
|
"logps/rejected": -131.07809448242188,
|
|
"loss": 1.0061,
|
|
"margin_dpo/margin_mean": 20.768291473388672,
|
|
"margin_dpo/margin_std": 29.607013702392578,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.1395007342143906,
|
|
"fcm_dpo/beta": 0.030608216300606728,
|
|
"fcm_dpo/delta": -0.07700999826192856,
|
|
"fcm_dpo/margin": 21.932090759277344,
|
|
"fcm_dpo/q_t": 0.36035576462745667,
|
|
"grad_norm": 27.68400764465332,
|
|
"learning_rate": 4.979441529392784e-07,
|
|
"logits/chosen": -0.708720326423645,
|
|
"logits/rejected": -0.6767187714576721,
|
|
"logps/chosen": -69.35963439941406,
|
|
"logps/ref_chosen": -55.94385528564453,
|
|
"logps/ref_rejected": -83.6790542602539,
|
|
"logps/rejected": -119.02693939208984,
|
|
"loss": 0.9664,
|
|
"margin_dpo/margin_mean": 21.932090759277344,
|
|
"margin_dpo/margin_std": 26.880752563476562,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.14684287812041116,
|
|
"fcm_dpo/beta": 0.028173187747597694,
|
|
"fcm_dpo/delta": -0.068596251308918,
|
|
"fcm_dpo/margin": 23.49247169494629,
|
|
"fcm_dpo/q_t": 0.36101511120796204,
|
|
"grad_norm": 30.916765213012695,
|
|
"learning_rate": 4.970413680203148e-07,
|
|
"logits/chosen": -0.6772828698158264,
|
|
"logits/rejected": -0.648100733757019,
|
|
"logps/chosen": -71.47965240478516,
|
|
"logps/ref_chosen": -57.05888748168945,
|
|
"logps/ref_rejected": -86.11727142333984,
|
|
"logps/rejected": -124.03050231933594,
|
|
"loss": 0.9722,
|
|
"margin_dpo/margin_mean": 23.49247169494629,
|
|
"margin_dpo/margin_std": 28.96224594116211,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.15418502202643172,
|
|
"fcm_dpo/beta": 0.027121257036924362,
|
|
"fcm_dpo/delta": -0.13553811609745026,
|
|
"fcm_dpo/margin": 26.961578369140625,
|
|
"fcm_dpo/q_t": 0.35333341360092163,
|
|
"grad_norm": 26.486059188842773,
|
|
"learning_rate": 4.959758474331832e-07,
|
|
"logits/chosen": -0.6960592269897461,
|
|
"logits/rejected": -0.6600139141082764,
|
|
"logps/chosen": -76.32167053222656,
|
|
"logps/ref_chosen": -59.20774459838867,
|
|
"logps/ref_rejected": -86.49754333496094,
|
|
"logps/rejected": -130.57305908203125,
|
|
"loss": 0.9567,
|
|
"margin_dpo/margin_mean": 26.961578369140625,
|
|
"margin_dpo/margin_std": 32.831111907958984,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.16152716593245228,
|
|
"fcm_dpo/beta": 0.02301758900284767,
|
|
"fcm_dpo/delta": -0.11018934100866318,
|
|
"fcm_dpo/margin": 30.556344985961914,
|
|
"fcm_dpo/q_t": 0.3538368046283722,
|
|
"grad_norm": 24.114713668823242,
|
|
"learning_rate": 4.947482930773511e-07,
|
|
"logits/chosen": -0.6646202206611633,
|
|
"logits/rejected": -0.6281755566596985,
|
|
"logps/chosen": -78.81887817382812,
|
|
"logps/ref_chosen": -60.437957763671875,
|
|
"logps/ref_rejected": -90.83917999267578,
|
|
"logps/rejected": -139.77645874023438,
|
|
"loss": 0.9511,
|
|
"margin_dpo/margin_mean": 30.556344985961914,
|
|
"margin_dpo/margin_std": 35.99966812133789,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.16886930983847284,
|
|
"fcm_dpo/beta": 0.021153923124074936,
|
|
"fcm_dpo/delta": -0.041334737092256546,
|
|
"fcm_dpo/margin": 30.124019622802734,
|
|
"fcm_dpo/q_t": 0.3688841462135315,
|
|
"grad_norm": 40.84029769897461,
|
|
"learning_rate": 4.933595135901732e-07,
|
|
"logits/chosen": -0.6649340391159058,
|
|
"logits/rejected": -0.6294328570365906,
|
|
"logps/chosen": -84.20191955566406,
|
|
"logps/ref_chosen": -61.7908821105957,
|
|
"logps/ref_rejected": -85.36943054199219,
|
|
"logps/rejected": -137.90447998046875,
|
|
"loss": 0.9992,
|
|
"margin_dpo/margin_mean": 30.124013900756836,
|
|
"margin_dpo/margin_std": 39.94293212890625,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.1762114537444934,
|
|
"fcm_dpo/beta": 0.02078414149582386,
|
|
"fcm_dpo/delta": -0.037118665874004364,
|
|
"fcm_dpo/margin": 30.540584564208984,
|
|
"fcm_dpo/q_t": 0.36713889241218567,
|
|
"grad_norm": 26.78792381286621,
|
|
"learning_rate": 4.918104238142103e-07,
|
|
"logits/chosen": -0.6711692214012146,
|
|
"logits/rejected": -0.645135760307312,
|
|
"logps/chosen": -91.19302368164062,
|
|
"logps/ref_chosen": -65.3261489868164,
|
|
"logps/ref_rejected": -86.75518798828125,
|
|
"logps/rejected": -143.1626434326172,
|
|
"loss": 0.9818,
|
|
"margin_dpo/margin_mean": 30.540584564208984,
|
|
"margin_dpo/margin_std": 38.079750061035156,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.18355359765051396,
|
|
"fcm_dpo/beta": 0.017505459487438202,
|
|
"fcm_dpo/delta": -0.22550848126411438,
|
|
"fcm_dpo/margin": 46.04296112060547,
|
|
"fcm_dpo/q_t": 0.3401046693325043,
|
|
"grad_norm": 23.552217483520508,
|
|
"learning_rate": 4.90102044194588e-07,
|
|
"logits/chosen": -0.6151807904243469,
|
|
"logits/rejected": -0.6104758381843567,
|
|
"logps/chosen": -87.12136840820312,
|
|
"logps/ref_chosen": -58.323204040527344,
|
|
"logps/ref_rejected": -101.2106704711914,
|
|
"logps/rejected": -176.0518035888672,
|
|
"loss": 0.9204,
|
|
"margin_dpo/margin_mean": 46.0429573059082,
|
|
"margin_dpo/margin_std": 55.54075241088867,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.19089574155653452,
|
|
"fcm_dpo/beta": 0.01598326489329338,
|
|
"fcm_dpo/delta": -0.04410712048411369,
|
|
"fcm_dpo/margin": 40.082298278808594,
|
|
"fcm_dpo/q_t": 0.3680208623409271,
|
|
"grad_norm": 22.16413116455078,
|
|
"learning_rate": 4.882355001067891e-07,
|
|
"logits/chosen": -0.5932961106300354,
|
|
"logits/rejected": -0.5749183893203735,
|
|
"logps/chosen": -86.93000793457031,
|
|
"logps/ref_chosen": -56.38518524169922,
|
|
"logps/ref_rejected": -86.15767669677734,
|
|
"logps/rejected": -156.78482055664062,
|
|
"loss": 1.0035,
|
|
"margin_dpo/margin_mean": 40.082298278808594,
|
|
"margin_dpo/margin_std": 53.219139099121094,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.19823788546255505,
|
|
"fcm_dpo/beta": 0.014573054388165474,
|
|
"fcm_dpo/delta": -0.09526528418064117,
|
|
"fcm_dpo/margin": 47.24794387817383,
|
|
"fcm_dpo/q_t": 0.3577379286289215,
|
|
"grad_norm": 25.677669525146484,
|
|
"learning_rate": 4.862120211153265e-07,
|
|
"logits/chosen": -0.5761778950691223,
|
|
"logits/rejected": -0.5731192827224731,
|
|
"logps/chosen": -86.5953140258789,
|
|
"logps/ref_chosen": -54.59065628051758,
|
|
"logps/ref_rejected": -95.26080322265625,
|
|
"logps/rejected": -174.51339721679688,
|
|
"loss": 0.9545,
|
|
"margin_dpo/margin_mean": 47.24794387817383,
|
|
"margin_dpo/margin_std": 57.28125762939453,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.2055800293685756,
|
|
"fcm_dpo/beta": 0.013362633995711803,
|
|
"fcm_dpo/delta": -0.08761467784643173,
|
|
"fcm_dpo/margin": 51.02484893798828,
|
|
"fcm_dpo/q_t": 0.3625403940677643,
|
|
"grad_norm": 25.558738708496094,
|
|
"learning_rate": 4.840329401637809e-07,
|
|
"logits/chosen": -0.5525860786437988,
|
|
"logits/rejected": -0.545661449432373,
|
|
"logps/chosen": -96.27259826660156,
|
|
"logps/ref_chosen": -56.04347610473633,
|
|
"logps/ref_rejected": -93.27880859375,
|
|
"logps/rejected": -184.53277587890625,
|
|
"loss": 0.9755,
|
|
"margin_dpo/margin_mean": 51.02485275268555,
|
|
"margin_dpo/margin_std": 65.68046569824219,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.21292217327459617,
|
|
"fcm_dpo/beta": 0.012635116465389729,
|
|
"fcm_dpo/delta": -0.008492978289723396,
|
|
"fcm_dpo/margin": 48.067604064941406,
|
|
"fcm_dpo/q_t": 0.3737943470478058,
|
|
"grad_norm": 29.300233840942383,
|
|
"learning_rate": 4.816996926967401e-07,
|
|
"logits/chosen": -0.5054234862327576,
|
|
"logits/rejected": -0.4867471754550934,
|
|
"logps/chosen": -107.9009017944336,
|
|
"logps/ref_chosen": -61.4414176940918,
|
|
"logps/ref_rejected": -86.32813262939453,
|
|
"logps/rejected": -180.85520935058594,
|
|
"loss": 1.0202,
|
|
"margin_dpo/margin_mean": 48.067596435546875,
|
|
"margin_dpo/margin_std": 66.08811950683594,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.22026431718061673,
|
|
"fcm_dpo/beta": 0.012664164416491985,
|
|
"fcm_dpo/delta": -0.01627928391098976,
|
|
"fcm_dpo/margin": 48.467201232910156,
|
|
"fcm_dpo/q_t": 0.3732047379016876,
|
|
"grad_norm": 25.043779373168945,
|
|
"learning_rate": 4.792138157142157e-07,
|
|
"logits/chosen": -0.5404887199401855,
|
|
"logits/rejected": -0.5210872888565063,
|
|
"logps/chosen": -104.0806884765625,
|
|
"logps/ref_chosen": -57.70451736450195,
|
|
"logps/ref_rejected": -87.76991271972656,
|
|
"logps/rejected": -182.61329650878906,
|
|
"loss": 1.0113,
|
|
"margin_dpo/margin_mean": 48.467201232910156,
|
|
"margin_dpo/margin_std": 64.91874694824219,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.2276064610866373,
|
|
"fcm_dpo/beta": 0.011945498175919056,
|
|
"fcm_dpo/delta": -0.06720416247844696,
|
|
"fcm_dpo/margin": 55.43426513671875,
|
|
"fcm_dpo/q_t": 0.3624417185783386,
|
|
"grad_norm": 23.727567672729492,
|
|
"learning_rate": 4.7657694675916247e-07,
|
|
"logits/chosen": -0.581199586391449,
|
|
"logits/rejected": -0.5655697584152222,
|
|
"logps/chosen": -105.16175842285156,
|
|
"logps/ref_chosen": -62.08925247192383,
|
|
"logps/ref_rejected": -94.79930114746094,
|
|
"logps/rejected": -193.30606079101562,
|
|
"loss": 0.9764,
|
|
"margin_dpo/margin_mean": 55.43426513671875,
|
|
"margin_dpo/margin_std": 69.9148178100586,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.23494860499265785,
|
|
"fcm_dpo/beta": 0.011539025232195854,
|
|
"fcm_dpo/delta": -0.045818835496902466,
|
|
"fcm_dpo/margin": 55.6801643371582,
|
|
"fcm_dpo/q_t": 0.3720964789390564,
|
|
"grad_norm": 25.801401138305664,
|
|
"learning_rate": 4.737908228387656e-07,
|
|
"logits/chosen": -0.5244706869125366,
|
|
"logits/rejected": -0.5115067362785339,
|
|
"logps/chosen": -124.75065612792969,
|
|
"logps/ref_chosen": -67.15288543701172,
|
|
"logps/ref_rejected": -96.92537689208984,
|
|
"logps/rejected": -210.2032928466797,
|
|
"loss": 1.0372,
|
|
"margin_dpo/margin_mean": 55.6801643371582,
|
|
"margin_dpo/margin_std": 83.104736328125,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.2422907488986784,
|
|
"fcm_dpo/beta": 0.010906776413321495,
|
|
"fcm_dpo/delta": -0.010979633778333664,
|
|
"fcm_dpo/margin": 55.84454345703125,
|
|
"fcm_dpo/q_t": 0.3736818730831146,
|
|
"grad_norm": 37.94820022583008,
|
|
"learning_rate": 4.708572792802069e-07,
|
|
"logits/chosen": -0.5201188325881958,
|
|
"logits/rejected": -0.49569135904312134,
|
|
"logps/chosen": -110.22456359863281,
|
|
"logps/ref_chosen": -57.40401077270508,
|
|
"logps/ref_rejected": -80.31498718261719,
|
|
"logps/rejected": -188.9801025390625,
|
|
"loss": 1.0098,
|
|
"margin_dpo/margin_mean": 55.84454345703125,
|
|
"margin_dpo/margin_std": 74.67647552490234,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.24963289280469897,
|
|
"fcm_dpo/beta": 0.010051427409052849,
|
|
"fcm_dpo/delta": -0.10779444873332977,
|
|
"fcm_dpo/margin": 69.25593566894531,
|
|
"fcm_dpo/q_t": 0.3577578365802765,
|
|
"grad_norm": 23.627363204956055,
|
|
"learning_rate": 4.6777824852166437e-07,
|
|
"logits/chosen": -0.45740675926208496,
|
|
"logits/rejected": -0.4491025507450104,
|
|
"logps/chosen": -106.43888854980469,
|
|
"logps/ref_chosen": -52.029144287109375,
|
|
"logps/ref_rejected": -85.73944091796875,
|
|
"logps/rejected": -209.40512084960938,
|
|
"loss": 0.9592,
|
|
"margin_dpo/margin_mean": 69.25593566894531,
|
|
"margin_dpo/margin_std": 85.97371673583984,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.25697503671071953,
|
|
"fcm_dpo/beta": 0.009930510073900223,
|
|
"fcm_dpo/delta": -0.047995198518037796,
|
|
"fcm_dpo/margin": 65.00736236572266,
|
|
"fcm_dpo/q_t": 0.3673258423805237,
|
|
"grad_norm": 29.522018432617188,
|
|
"learning_rate": 4.645557588393406e-07,
|
|
"logits/chosen": -0.45035696029663086,
|
|
"logits/rejected": -0.4322957396507263,
|
|
"logps/chosen": -128.42086791992188,
|
|
"logps/ref_chosen": -62.996971130371094,
|
|
"logps/ref_rejected": -92.98394012451172,
|
|
"logps/rejected": -223.41519165039062,
|
|
"loss": 0.9915,
|
|
"margin_dpo/margin_mean": 65.00736236572266,
|
|
"margin_dpo/margin_std": 84.73751831054688,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.2643171806167401,
|
|
"fcm_dpo/beta": 0.008855604566633701,
|
|
"fcm_dpo/delta": -0.11197604238986969,
|
|
"fcm_dpo/margin": 79.53601837158203,
|
|
"fcm_dpo/q_t": 0.35542401671409607,
|
|
"grad_norm": 23.635892868041992,
|
|
"learning_rate": 4.611919330113591e-07,
|
|
"logits/chosen": -0.38669413328170776,
|
|
"logits/rejected": -0.3846648335456848,
|
|
"logps/chosen": -127.61091613769531,
|
|
"logps/ref_chosen": -57.0670280456543,
|
|
"logps/ref_rejected": -97.1115493774414,
|
|
"logps/rejected": -247.19143676757812,
|
|
"loss": 0.9528,
|
|
"margin_dpo/margin_mean": 79.53601837158203,
|
|
"margin_dpo/margin_std": 97.05994415283203,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.27165932452276065,
|
|
"fcm_dpo/beta": 0.008529609069228172,
|
|
"fcm_dpo/delta": 0.05834978073835373,
|
|
"fcm_dpo/margin": 58.99933624267578,
|
|
"fcm_dpo/q_t": 0.39326274394989014,
|
|
"grad_norm": 26.241926193237305,
|
|
"learning_rate": 4.5768898691940836e-07,
|
|
"logits/chosen": -0.421181857585907,
|
|
"logits/rejected": -0.40174850821495056,
|
|
"logps/chosen": -120.03946685791016,
|
|
"logps/ref_chosen": -54.840736389160156,
|
|
"logps/ref_rejected": -75.51002502441406,
|
|
"logps/rejected": -199.70809936523438,
|
|
"loss": 1.0713,
|
|
"margin_dpo/margin_mean": 58.99933624267578,
|
|
"margin_dpo/margin_std": 85.73370361328125,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.2790014684287812,
|
|
"fcm_dpo/beta": 0.008668321184813976,
|
|
"fcm_dpo/delta": -0.054877202957868576,
|
|
"fcm_dpo/margin": 75.03819274902344,
|
|
"fcm_dpo/q_t": 0.3645266592502594,
|
|
"grad_norm": 28.541696548461914,
|
|
"learning_rate": 4.5404922808905543e-07,
|
|
"logits/chosen": -0.41162386536598206,
|
|
"logits/rejected": -0.39615827798843384,
|
|
"logps/chosen": -127.11979675292969,
|
|
"logps/ref_chosen": -57.72148895263672,
|
|
"logps/ref_rejected": -86.85997009277344,
|
|
"logps/rejected": -231.29647827148438,
|
|
"loss": 0.9793,
|
|
"margin_dpo/margin_mean": 75.03819274902344,
|
|
"margin_dpo/margin_std": 94.09630584716797,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.28634361233480177,
|
|
"fcm_dpo/beta": 0.008089645765721798,
|
|
"fcm_dpo/delta": -0.03434378653764725,
|
|
"fcm_dpo/margin": 77.82075500488281,
|
|
"fcm_dpo/q_t": 0.3677811920642853,
|
|
"grad_norm": 28.007156372070312,
|
|
"learning_rate": 4.5027505416968985e-07,
|
|
"logits/chosen": -0.3651648759841919,
|
|
"logits/rejected": -0.35718274116516113,
|
|
"logps/chosen": -140.86399841308594,
|
|
"logps/ref_chosen": -58.26164627075195,
|
|
"logps/ref_rejected": -89.46485900878906,
|
|
"logps/rejected": -249.8879852294922,
|
|
"loss": 0.9959,
|
|
"margin_dpo/margin_mean": 77.82075500488281,
|
|
"margin_dpo/margin_std": 99.57084655761719,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.2936857562408223,
|
|
"fcm_dpo/beta": 0.007911969907581806,
|
|
"fcm_dpo/delta": -0.04977406933903694,
|
|
"fcm_dpo/margin": 81.63540649414062,
|
|
"fcm_dpo/q_t": 0.365100622177124,
|
|
"grad_norm": 28.69991111755371,
|
|
"learning_rate": 4.4636895135509966e-07,
|
|
"logits/chosen": -0.3688076138496399,
|
|
"logits/rejected": -0.3557121157646179,
|
|
"logps/chosen": -130.88851928710938,
|
|
"logps/ref_chosen": -55.71953201293945,
|
|
"logps/ref_rejected": -83.15235137939453,
|
|
"logps/rejected": -239.95675659179688,
|
|
"loss": 0.9767,
|
|
"margin_dpo/margin_mean": 81.63540649414062,
|
|
"margin_dpo/margin_std": 101.0685806274414,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.2936857562408223,
|
|
"eval_fcm_dpo/beta": 0.010551726445555687,
|
|
"eval_fcm_dpo/delta": 0.055143389850854874,
|
|
"eval_fcm_dpo/margin": 47.49774932861328,
|
|
"eval_fcm_dpo/q_t": 0.4060860276222229,
|
|
"eval_logits/chosen": -0.40839019417762756,
|
|
"eval_logits/rejected": -0.3899528980255127,
|
|
"eval_logps/chosen": -179.96328735351562,
|
|
"eval_logps/ref_chosen": -79.05104064941406,
|
|
"eval_logps/ref_rejected": -86.79793548583984,
|
|
"eval_logps/rejected": -235.20794677734375,
|
|
"eval_loss": 0.5984740257263184,
|
|
"eval_margin_dpo/margin_mean": 47.49774932861328,
|
|
"eval_margin_dpo/margin_std": 101.77977752685547,
|
|
"eval_runtime": 39.7643,
|
|
"eval_samples_per_second": 58.822,
|
|
"eval_steps_per_second": 1.861,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.3010279001468429,
|
|
"fcm_dpo/beta": 0.010471022687852383,
|
|
"fcm_dpo/delta": -0.17127129435539246,
|
|
"fcm_dpo/margin": 72.22938537597656,
|
|
"fcm_dpo/q_t": 0.3502196669578552,
|
|
"grad_norm": 38.45856857299805,
|
|
"learning_rate": 4.4233349274571974e-07,
|
|
"logits/chosen": -0.40718260407447815,
|
|
"logits/rejected": -0.3891496956348419,
|
|
"logps/chosen": -145.1938934326172,
|
|
"logps/ref_chosen": -65.13258361816406,
|
|
"logps/ref_rejected": -92.10203552246094,
|
|
"logps/rejected": -244.39273071289062,
|
|
"loss": 0.9696,
|
|
"margin_dpo/margin_mean": 72.22938537597656,
|
|
"margin_dpo/margin_std": 95.00855255126953,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.30837004405286345,
|
|
"fcm_dpo/beta": 0.008621977642178535,
|
|
"fcm_dpo/delta": -0.17882244288921356,
|
|
"fcm_dpo/margin": 88.25994873046875,
|
|
"fcm_dpo/q_t": 0.3427308201789856,
|
|
"grad_norm": 25.199186325073242,
|
|
"learning_rate": 4.381713366536311e-07,
|
|
"logits/chosen": -0.3917924165725708,
|
|
"logits/rejected": -0.3768480718135834,
|
|
"logps/chosen": -135.15476989746094,
|
|
"logps/ref_chosen": -54.52837371826172,
|
|
"logps/ref_rejected": -87.06227111816406,
|
|
"logps/rejected": -255.94863891601562,
|
|
"loss": 0.9227,
|
|
"margin_dpo/margin_mean": 88.25994110107422,
|
|
"margin_dpo/margin_std": 101.74530029296875,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.315712187958884,
|
|
"fcm_dpo/beta": 0.0077339522540569305,
|
|
"fcm_dpo/delta": -0.09488168358802795,
|
|
"fcm_dpo/margin": 88.91537475585938,
|
|
"fcm_dpo/q_t": 0.35661423206329346,
|
|
"grad_norm": 28.223777770996094,
|
|
"learning_rate": 4.3388522485142885e-07,
|
|
"logits/chosen": -0.4153861403465271,
|
|
"logits/rejected": -0.39465969800949097,
|
|
"logps/chosen": -147.28273010253906,
|
|
"logps/ref_chosen": -59.905250549316406,
|
|
"logps/ref_rejected": -90.25511932373047,
|
|
"logps/rejected": -266.5479736328125,
|
|
"loss": 0.9549,
|
|
"margin_dpo/margin_mean": 88.91536712646484,
|
|
"margin_dpo/margin_std": 104.00807189941406,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.32305433186490456,
|
|
"fcm_dpo/beta": 0.007219684775918722,
|
|
"fcm_dpo/delta": -0.007601064629852772,
|
|
"fcm_dpo/margin": 83.97865295410156,
|
|
"fcm_dpo/q_t": 0.3702937960624695,
|
|
"grad_norm": 29.1616268157959,
|
|
"learning_rate": 4.2947798076611047e-07,
|
|
"logits/chosen": -0.4109880030155182,
|
|
"logits/rejected": -0.3932788074016571,
|
|
"logps/chosen": -151.2802734375,
|
|
"logps/ref_chosen": -57.68498611450195,
|
|
"logps/ref_rejected": -87.72792053222656,
|
|
"logps/rejected": -265.3018798828125,
|
|
"loss": 0.993,
|
|
"margin_dpo/margin_mean": 83.97865295410156,
|
|
"margin_dpo/margin_std": 102.7934799194336,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.3303964757709251,
|
|
"fcm_dpo/beta": 0.006763989571481943,
|
|
"fcm_dpo/delta": -0.13978341221809387,
|
|
"fcm_dpo/margin": 107.536376953125,
|
|
"fcm_dpo/q_t": 0.3473281264305115,
|
|
"grad_norm": 33.25477981567383,
|
|
"learning_rate": 4.249525076191759e-07,
|
|
"logits/chosen": -0.3368683457374573,
|
|
"logits/rejected": -0.3260774612426758,
|
|
"logps/chosen": -171.83956909179688,
|
|
"logps/ref_chosen": -54.47245407104492,
|
|
"logps/ref_rejected": -93.26266479492188,
|
|
"logps/rejected": -318.1661071777344,
|
|
"loss": 0.9375,
|
|
"margin_dpo/margin_mean": 107.536376953125,
|
|
"margin_dpo/margin_std": 123.95018005371094,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.3377386196769457,
|
|
"fcm_dpo/beta": 0.006237152963876724,
|
|
"fcm_dpo/delta": -0.06372438371181488,
|
|
"fcm_dpo/margin": 105.6369400024414,
|
|
"fcm_dpo/q_t": 0.35965046286582947,
|
|
"grad_norm": 26.257822036743164,
|
|
"learning_rate": 4.203117865141635e-07,
|
|
"logits/chosen": -0.37301507592201233,
|
|
"logits/rejected": -0.3503766357898712,
|
|
"logps/chosen": -172.18060302734375,
|
|
"logps/ref_chosen": -58.7701301574707,
|
|
"logps/ref_rejected": -87.85963439941406,
|
|
"logps/rejected": -306.90704345703125,
|
|
"loss": 0.9652,
|
|
"margin_dpo/margin_mean": 105.6369400024414,
|
|
"margin_dpo/margin_std": 124.76959228515625,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.34508076358296624,
|
|
"fcm_dpo/beta": 0.006058714352548122,
|
|
"fcm_dpo/delta": 0.011923698708415031,
|
|
"fcm_dpo/margin": 97.03569030761719,
|
|
"fcm_dpo/q_t": 0.37649449706077576,
|
|
"grad_norm": 38.68661880493164,
|
|
"learning_rate": 4.1555887447288255e-07,
|
|
"logits/chosen": -0.38763222098350525,
|
|
"logits/rejected": -0.37434476613998413,
|
|
"logps/chosen": -174.42852783203125,
|
|
"logps/ref_chosen": -59.0481071472168,
|
|
"logps/ref_rejected": -91.19654846191406,
|
|
"logps/rejected": -303.6126403808594,
|
|
"loss": 1.0075,
|
|
"margin_dpo/margin_mean": 97.03569030761719,
|
|
"margin_dpo/margin_std": 123.6645278930664,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.3524229074889868,
|
|
"fcm_dpo/beta": 0.005926494486629963,
|
|
"fcm_dpo/delta": -0.02454141527414322,
|
|
"fcm_dpo/margin": 104.9325942993164,
|
|
"fcm_dpo/q_t": 0.3663932681083679,
|
|
"grad_norm": 38.213321685791016,
|
|
"learning_rate": 4.106969024216348e-07,
|
|
"logits/chosen": -0.3597189784049988,
|
|
"logits/rejected": -0.35432037711143494,
|
|
"logps/chosen": -182.47573852539062,
|
|
"logps/ref_chosen": -55.238983154296875,
|
|
"logps/ref_rejected": -91.08428955078125,
|
|
"logps/rejected": -323.2536926269531,
|
|
"loss": 0.9776,
|
|
"margin_dpo/margin_mean": 104.9325942993164,
|
|
"margin_dpo/margin_std": 123.68228912353516,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.35976505139500736,
|
|
"fcm_dpo/beta": 0.005522926338016987,
|
|
"fcm_dpo/delta": -0.09819710999727249,
|
|
"fcm_dpo/margin": 124.70765686035156,
|
|
"fcm_dpo/q_t": 0.35423046350479126,
|
|
"grad_norm": 34.15117645263672,
|
|
"learning_rate": 4.057290731287531e-07,
|
|
"logits/chosen": -0.37256139516830444,
|
|
"logits/rejected": -0.34320348501205444,
|
|
"logps/chosen": -202.49417114257812,
|
|
"logps/ref_chosen": -65.08844757080078,
|
|
"logps/ref_rejected": -86.05777740478516,
|
|
"logps/rejected": -348.1711730957031,
|
|
"loss": 0.9381,
|
|
"margin_dpo/margin_mean": 124.7076416015625,
|
|
"margin_dpo/margin_std": 138.202880859375,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.3671071953010279,
|
|
"fcm_dpo/beta": 0.005308011546730995,
|
|
"fcm_dpo/delta": -0.02011699415743351,
|
|
"fcm_dpo/margin": 116.39664459228516,
|
|
"fcm_dpo/q_t": 0.3652518391609192,
|
|
"grad_norm": 38.33260726928711,
|
|
"learning_rate": 4.006586590948141e-07,
|
|
"logits/chosen": -0.38754525780677795,
|
|
"logits/rejected": -0.36416977643966675,
|
|
"logps/chosen": -204.6479034423828,
|
|
"logps/ref_chosen": -59.08491897583008,
|
|
"logps/ref_rejected": -87.36727142333984,
|
|
"logps/rejected": -349.326904296875,
|
|
"loss": 0.9688,
|
|
"margin_dpo/margin_mean": 116.39664459228516,
|
|
"margin_dpo/margin_std": 128.171875,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.3744493392070485,
|
|
"fcm_dpo/beta": 0.005321727134287357,
|
|
"fcm_dpo/delta": 0.008542664349079132,
|
|
"fcm_dpo/margin": 111.1045150756836,
|
|
"fcm_dpo/q_t": 0.3720734715461731,
|
|
"grad_norm": 34.73932647705078,
|
|
"learning_rate": 3.954890003969163e-07,
|
|
"logits/chosen": -0.37525609135627747,
|
|
"logits/rejected": -0.3513938784599304,
|
|
"logps/chosen": -217.4912567138672,
|
|
"logps/ref_chosen": -61.85979461669922,
|
|
"logps/ref_rejected": -88.32804107666016,
|
|
"logps/rejected": -355.06402587890625,
|
|
"loss": 1.0064,
|
|
"margin_dpo/margin_mean": 111.1045150756836,
|
|
"margin_dpo/margin_std": 139.63497924804688,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.38179148311306904,
|
|
"fcm_dpo/beta": 0.005232472904026508,
|
|
"fcm_dpo/delta": -0.022610364481806755,
|
|
"fcm_dpo/margin": 118.58265686035156,
|
|
"fcm_dpo/q_t": 0.36637741327285767,
|
|
"grad_norm": 43.125038146972656,
|
|
"learning_rate": 3.9022350248844246e-07,
|
|
"logits/chosen": -0.36137908697128296,
|
|
"logits/rejected": -0.3586873412132263,
|
|
"logps/chosen": -199.85968017578125,
|
|
"logps/ref_chosen": -52.843467712402344,
|
|
"logps/ref_rejected": -90.4744873046875,
|
|
"logps/rejected": -356.0733642578125,
|
|
"loss": 0.9784,
|
|
"margin_dpo/margin_mean": 118.58265686035156,
|
|
"margin_dpo/margin_std": 140.423095703125,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.3891336270190896,
|
|
"fcm_dpo/beta": 0.005059250630438328,
|
|
"fcm_dpo/delta": -0.050201721489429474,
|
|
"fcm_dpo/margin": 127.75601959228516,
|
|
"fcm_dpo/q_t": 0.3604031205177307,
|
|
"grad_norm": 59.875736236572266,
|
|
"learning_rate": 3.848656339557562e-07,
|
|
"logits/chosen": -0.3211337924003601,
|
|
"logits/rejected": -0.3021458685398102,
|
|
"logps/chosen": -235.56326293945312,
|
|
"logps/ref_chosen": -59.35320281982422,
|
|
"logps/ref_rejected": -90.73350524902344,
|
|
"logps/rejected": -394.6995849609375,
|
|
"loss": 0.961,
|
|
"margin_dpo/margin_mean": 127.75602722167969,
|
|
"margin_dpo/margin_std": 145.37832641601562,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.3964757709251101,
|
|
"fcm_dpo/beta": 0.004921893123537302,
|
|
"fcm_dpo/delta": -0.016059506684541702,
|
|
"fcm_dpo/margin": 124.87815856933594,
|
|
"fcm_dpo/q_t": 0.36820322275161743,
|
|
"grad_norm": 33.326416015625,
|
|
"learning_rate": 3.794189242333106e-07,
|
|
"logits/chosen": -0.3540351688861847,
|
|
"logits/rejected": -0.33318471908569336,
|
|
"logps/chosen": -250.6675567626953,
|
|
"logps/ref_chosen": -66.30875396728516,
|
|
"logps/ref_rejected": -95.4130630493164,
|
|
"logps/rejected": -404.6500244140625,
|
|
"loss": 0.9914,
|
|
"margin_dpo/margin_mean": 124.87815856933594,
|
|
"margin_dpo/margin_std": 155.1671600341797,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.40381791483113066,
|
|
"fcm_dpo/beta": 0.004636920988559723,
|
|
"fcm_dpo/delta": -0.06920859962701797,
|
|
"fcm_dpo/margin": 143.0980224609375,
|
|
"fcm_dpo/q_t": 0.35574159026145935,
|
|
"grad_norm": 35.31757736206055,
|
|
"learning_rate": 3.738869612786737e-07,
|
|
"logits/chosen": -0.29508358240127563,
|
|
"logits/rejected": -0.28438499569892883,
|
|
"logps/chosen": -223.6616973876953,
|
|
"logps/ref_chosen": -54.69990921020508,
|
|
"logps/ref_rejected": -92.23838806152344,
|
|
"logps/rejected": -404.2982177734375,
|
|
"loss": 0.9386,
|
|
"margin_dpo/margin_mean": 143.0980224609375,
|
|
"margin_dpo/margin_std": 151.79656982421875,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.4111600587371512,
|
|
"fcm_dpo/beta": 0.004416828043758869,
|
|
"fcm_dpo/delta": -0.06373202055692673,
|
|
"fcm_dpo/margin": 149.3416290283203,
|
|
"fcm_dpo/q_t": 0.35707592964172363,
|
|
"grad_norm": 52.24574279785156,
|
|
"learning_rate": 3.6827338920900253e-07,
|
|
"logits/chosen": -0.2858836352825165,
|
|
"logits/rejected": -0.2778168320655823,
|
|
"logps/chosen": -238.4672088623047,
|
|
"logps/ref_chosen": -54.64586639404297,
|
|
"logps/ref_rejected": -88.19416809082031,
|
|
"logps/rejected": -421.35711669921875,
|
|
"loss": 0.9399,
|
|
"margin_dpo/margin_mean": 149.3416290283203,
|
|
"margin_dpo/margin_std": 158.734619140625,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.4185022026431718,
|
|
"fcm_dpo/beta": 0.004238150082528591,
|
|
"fcm_dpo/delta": -0.031216781586408615,
|
|
"fcm_dpo/margin": 148.39759826660156,
|
|
"fcm_dpo/q_t": 0.3608683943748474,
|
|
"grad_norm": 36.15656661987305,
|
|
"learning_rate": 3.625819059005228e-07,
|
|
"logits/chosen": -0.3247283399105072,
|
|
"logits/rejected": -0.3061348497867584,
|
|
"logps/chosen": -271.26629638671875,
|
|
"logps/ref_chosen": -63.02496337890625,
|
|
"logps/ref_rejected": -93.16323852539062,
|
|
"logps/rejected": -449.8021545410156,
|
|
"loss": 0.9501,
|
|
"margin_dpo/margin_mean": 148.39761352539062,
|
|
"margin_dpo/margin_std": 154.8343048095703,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.42584434654919234,
|
|
"fcm_dpo/beta": 0.004017127677798271,
|
|
"fcm_dpo/delta": -0.07293753325939178,
|
|
"fcm_dpo/margin": 166.34750366210938,
|
|
"fcm_dpo/q_t": 0.35577893257141113,
|
|
"grad_norm": 28.974525451660156,
|
|
"learning_rate": 3.568162605525952e-07,
|
|
"logits/chosen": -0.28096064925193787,
|
|
"logits/rejected": -0.26907119154930115,
|
|
"logps/chosen": -278.18634033203125,
|
|
"logps/ref_chosen": -58.37105178833008,
|
|
"logps/ref_rejected": -91.59428405761719,
|
|
"logps/rejected": -477.75714111328125,
|
|
"loss": 0.9404,
|
|
"margin_dpo/margin_mean": 166.34750366210938,
|
|
"margin_dpo/margin_std": 178.529296875,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.4331864904552129,
|
|
"fcm_dpo/beta": 0.0038156050723046064,
|
|
"fcm_dpo/delta": -0.006700708530843258,
|
|
"fcm_dpo/margin": 158.75344848632812,
|
|
"fcm_dpo/q_t": 0.3686682879924774,
|
|
"grad_norm": 48.21137619018555,
|
|
"learning_rate": 3.509802512179737e-07,
|
|
"logits/chosen": -0.30817854404449463,
|
|
"logits/rejected": -0.2976076900959015,
|
|
"logps/chosen": -266.36083984375,
|
|
"logps/ref_chosen": -55.113426208496094,
|
|
"logps/ref_rejected": -85.29283905029297,
|
|
"logps/rejected": -455.293701171875,
|
|
"loss": 0.9886,
|
|
"margin_dpo/margin_mean": 158.75344848632812,
|
|
"margin_dpo/margin_std": 190.89013671875,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.44052863436123346,
|
|
"fcm_dpo/beta": 0.003914177417755127,
|
|
"fcm_dpo/delta": -0.005944193806499243,
|
|
"fcm_dpo/margin": 154.51087951660156,
|
|
"fcm_dpo/q_t": 0.37021997570991516,
|
|
"grad_norm": 48.85098648071289,
|
|
"learning_rate": 3.4507772230088147e-07,
|
|
"logits/chosen": -0.3540688157081604,
|
|
"logits/rejected": -0.35069912672042847,
|
|
"logps/chosen": -299.68817138671875,
|
|
"logps/ref_chosen": -59.46582794189453,
|
|
"logps/ref_rejected": -97.03690338134766,
|
|
"logps/rejected": -491.77008056640625,
|
|
"loss": 0.9968,
|
|
"margin_dpo/margin_mean": 154.51087951660156,
|
|
"margin_dpo/margin_std": 190.1680145263672,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.447870778267254,
|
|
"fcm_dpo/beta": 0.0037945318035781384,
|
|
"fcm_dpo/delta": 0.018312707543373108,
|
|
"fcm_dpo/margin": 153.49285888671875,
|
|
"fcm_dpo/q_t": 0.3729243278503418,
|
|
"grad_norm": 30.252052307128906,
|
|
"learning_rate": 3.391125620245535e-07,
|
|
"logits/chosen": -0.38246288895606995,
|
|
"logits/rejected": -0.36992448568344116,
|
|
"logps/chosen": -264.3889465332031,
|
|
"logps/ref_chosen": -62.78144454956055,
|
|
"logps/ref_rejected": -91.95039367675781,
|
|
"logps/rejected": -447.05078125,
|
|
"loss": 0.9864,
|
|
"margin_dpo/margin_mean": 153.49285888671875,
|
|
"margin_dpo/margin_std": 174.68467712402344,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.4552129221732746,
|
|
"fcm_dpo/beta": 0.0039411550387740135,
|
|
"fcm_dpo/delta": 0.013458488509058952,
|
|
"fcm_dpo/margin": 148.8785400390625,
|
|
"fcm_dpo/q_t": 0.3692263960838318,
|
|
"grad_norm": 32.29579544067383,
|
|
"learning_rate": 3.3308869986991487e-07,
|
|
"logits/chosen": -0.3574088215827942,
|
|
"logits/rejected": -0.33746927976608276,
|
|
"logps/chosen": -252.6819610595703,
|
|
"logps/ref_chosen": -61.359039306640625,
|
|
"logps/ref_rejected": -82.75496673583984,
|
|
"logps/rejected": -422.9564514160156,
|
|
"loss": 0.9708,
|
|
"margin_dpo/margin_mean": 148.87855529785156,
|
|
"margin_dpo/margin_std": 156.66542053222656,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.46255506607929514,
|
|
"fcm_dpo/beta": 0.0038400962948799133,
|
|
"fcm_dpo/delta": -0.01564035564661026,
|
|
"fcm_dpo/margin": 159.8279571533203,
|
|
"fcm_dpo/q_t": 0.3695194125175476,
|
|
"grad_norm": 48.496238708496094,
|
|
"learning_rate": 3.270101039870797e-07,
|
|
"logits/chosen": -0.30073413252830505,
|
|
"logits/rejected": -0.29121869802474976,
|
|
"logps/chosen": -291.8459167480469,
|
|
"logps/ref_chosen": -51.77602005004883,
|
|
"logps/ref_rejected": -84.58292388916016,
|
|
"logps/rejected": -484.48077392578125,
|
|
"loss": 0.9925,
|
|
"margin_dpo/margin_mean": 159.8279571533203,
|
|
"margin_dpo/margin_std": 199.2019805908203,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.4698972099853157,
|
|
"fcm_dpo/beta": 0.0036372647155076265,
|
|
"fcm_dpo/delta": -0.10146065801382065,
|
|
"fcm_dpo/margin": 190.74334716796875,
|
|
"fcm_dpo/q_t": 0.3486129343509674,
|
|
"grad_norm": 38.94823455810547,
|
|
"learning_rate": 3.208807785813777e-07,
|
|
"logits/chosen": -0.3392784595489502,
|
|
"logits/rejected": -0.3321293592453003,
|
|
"logps/chosen": -283.16229248046875,
|
|
"logps/ref_chosen": -56.777862548828125,
|
|
"logps/ref_rejected": -99.26368713378906,
|
|
"logps/rejected": -516.3914184570312,
|
|
"loss": 0.9121,
|
|
"margin_dpo/margin_mean": 190.74334716796875,
|
|
"margin_dpo/margin_std": 189.1331787109375,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.47723935389133626,
|
|
"fcm_dpo/beta": 0.003477086080238223,
|
|
"fcm_dpo/delta": 0.012917397543787956,
|
|
"fcm_dpo/margin": 168.87332153320312,
|
|
"fcm_dpo/q_t": 0.3705959916114807,
|
|
"grad_norm": 46.6136474609375,
|
|
"learning_rate": 3.147047612756302e-07,
|
|
"logits/chosen": -0.3095089793205261,
|
|
"logits/rejected": -0.29444074630737305,
|
|
"logps/chosen": -309.4768981933594,
|
|
"logps/ref_chosen": -58.28468704223633,
|
|
"logps/ref_rejected": -83.80326843261719,
|
|
"logps/rejected": -503.86883544921875,
|
|
"loss": 0.9858,
|
|
"margin_dpo/margin_mean": 168.87332153320312,
|
|
"margin_dpo/margin_std": 188.2744140625,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.4845814977973568,
|
|
"fcm_dpo/beta": 0.0035190985072404146,
|
|
"fcm_dpo/delta": -0.028615426272153854,
|
|
"fcm_dpo/margin": 178.0848388671875,
|
|
"fcm_dpo/q_t": 0.3618434965610504,
|
|
"grad_norm": 33.46125030517578,
|
|
"learning_rate": 3.084861204504122e-07,
|
|
"logits/chosen": -0.3480074405670166,
|
|
"logits/rejected": -0.3351406455039978,
|
|
"logps/chosen": -294.0130615234375,
|
|
"logps/ref_chosen": -62.75822067260742,
|
|
"logps/ref_rejected": -94.04203033447266,
|
|
"logps/rejected": -503.3817443847656,
|
|
"loss": 0.9518,
|
|
"margin_dpo/margin_mean": 178.0848388671875,
|
|
"margin_dpo/margin_std": 184.51199340820312,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.4919236417033774,
|
|
"fcm_dpo/beta": 0.003449521493166685,
|
|
"fcm_dpo/delta": 0.025535067543387413,
|
|
"fcm_dpo/margin": 166.8475799560547,
|
|
"fcm_dpo/q_t": 0.374795138835907,
|
|
"grad_norm": 38.16062927246094,
|
|
"learning_rate": 3.022289525640531e-07,
|
|
"logits/chosen": -0.35363438725471497,
|
|
"logits/rejected": -0.3413279056549072,
|
|
"logps/chosen": -310.823486328125,
|
|
"logps/ref_chosen": -58.59650421142578,
|
|
"logps/ref_rejected": -88.69586944580078,
|
|
"logps/rejected": -507.77044677734375,
|
|
"loss": 0.9994,
|
|
"margin_dpo/margin_mean": 166.8475799560547,
|
|
"margin_dpo/margin_std": 196.09652709960938,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.49926578560939794,
|
|
"fcm_dpo/beta": 0.00335273751989007,
|
|
"fcm_dpo/delta": -0.06508271396160126,
|
|
"fcm_dpo/margin": 196.6968536376953,
|
|
"fcm_dpo/q_t": 0.3557378649711609,
|
|
"grad_norm": 33.71820831298828,
|
|
"learning_rate": 2.959373794541426e-07,
|
|
"logits/chosen": -0.3746200203895569,
|
|
"logits/rejected": -0.3606112599372864,
|
|
"logps/chosen": -308.72821044921875,
|
|
"logps/ref_chosen": -58.18162155151367,
|
|
"logps/ref_rejected": -94.44358825683594,
|
|
"logps/rejected": -541.68701171875,
|
|
"loss": 0.9382,
|
|
"margin_dpo/margin_mean": 196.6968536376953,
|
|
"margin_dpo/margin_std": 204.6141357421875,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.5066079295154186,
|
|
"fcm_dpo/beta": 0.003242532955482602,
|
|
"fcm_dpo/delta": -0.03586641699075699,
|
|
"fcm_dpo/margin": 195.18536376953125,
|
|
"fcm_dpo/q_t": 0.36007246375083923,
|
|
"grad_norm": 31.20911979675293,
|
|
"learning_rate": 2.896155456223163e-07,
|
|
"logits/chosen": -0.4067712724208832,
|
|
"logits/rejected": -0.3969467282295227,
|
|
"logps/chosen": -296.33489990234375,
|
|
"logps/ref_chosen": -57.9904899597168,
|
|
"logps/ref_rejected": -99.11092376708984,
|
|
"logps/rejected": -532.6407470703125,
|
|
"loss": 0.9402,
|
|
"margin_dpo/margin_mean": 195.18536376953125,
|
|
"margin_dpo/margin_std": 193.4260711669922,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.5139500734214391,
|
|
"fcm_dpo/beta": 0.0032154968939721584,
|
|
"fcm_dpo/delta": 0.021350596100091934,
|
|
"fcm_dpo/margin": 180.19900512695312,
|
|
"fcm_dpo/q_t": 0.3731505274772644,
|
|
"grad_norm": 44.616703033447266,
|
|
"learning_rate": 2.8326761550411346e-07,
|
|
"logits/chosen": -0.38301217555999756,
|
|
"logits/rejected": -0.37859946489334106,
|
|
"logps/chosen": -335.93255615234375,
|
|
"logps/ref_chosen": -58.29923629760742,
|
|
"logps/ref_rejected": -89.25711822509766,
|
|
"logps/rejected": -547.0894775390625,
|
|
"loss": 0.9973,
|
|
"margin_dpo/margin_mean": 180.19900512695312,
|
|
"margin_dpo/margin_std": 214.97982788085938,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.5212922173274597,
|
|
"fcm_dpo/beta": 0.0032494659535586834,
|
|
"fcm_dpo/delta": -0.003944025840610266,
|
|
"fcm_dpo/margin": 185.5841522216797,
|
|
"fcm_dpo/q_t": 0.3689090609550476,
|
|
"grad_norm": 71.0094223022461,
|
|
"learning_rate": 2.7689777072570284e-07,
|
|
"logits/chosen": -0.4027875065803528,
|
|
"logits/rejected": -0.3871976435184479,
|
|
"logps/chosen": -334.85272216796875,
|
|
"logps/ref_chosen": -60.788482666015625,
|
|
"logps/ref_rejected": -85.94129943847656,
|
|
"logps/rejected": -545.5897216796875,
|
|
"loss": 0.9792,
|
|
"margin_dpo/margin_mean": 185.5841522216797,
|
|
"margin_dpo/margin_std": 211.45913696289062,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.5286343612334802,
|
|
"fcm_dpo/beta": 0.0032737895380705595,
|
|
"fcm_dpo/delta": 0.027207667008042336,
|
|
"fcm_dpo/margin": 163.24708557128906,
|
|
"fcm_dpo/q_t": 0.3836524188518524,
|
|
"grad_norm": 41.69169616699219,
|
|
"learning_rate": 2.7051020734928443e-07,
|
|
"logits/chosen": -0.37818944454193115,
|
|
"logits/rejected": -0.3610234558582306,
|
|
"logps/chosen": -335.9218444824219,
|
|
"logps/ref_chosen": -57.6871337890625,
|
|
"logps/ref_rejected": -80.62527465820312,
|
|
"logps/rejected": -522.1071166992188,
|
|
"loss": 1.0286,
|
|
"margin_dpo/margin_mean": 163.24708557128906,
|
|
"margin_dpo/margin_std": 207.1353759765625,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.5359765051395007,
|
|
"fcm_dpo/beta": 0.0032265144400298595,
|
|
"fcm_dpo/delta": -0.0519678071141243,
|
|
"fcm_dpo/margin": 200.8280029296875,
|
|
"fcm_dpo/q_t": 0.35923275351524353,
|
|
"grad_norm": 34.91660690307617,
|
|
"learning_rate": 2.641091331089811e-07,
|
|
"logits/chosen": -0.33706134557724,
|
|
"logits/rejected": -0.3363405764102936,
|
|
"logps/chosen": -300.1896667480469,
|
|
"logps/ref_chosen": -51.490867614746094,
|
|
"logps/ref_rejected": -91.02871704101562,
|
|
"logps/rejected": -540.5555419921875,
|
|
"loss": 0.9461,
|
|
"margin_dpo/margin_mean": 200.82803344726562,
|
|
"margin_dpo/margin_std": 215.3280487060547,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.5433186490455213,
|
|
"fcm_dpo/beta": 0.0032107695005834103,
|
|
"fcm_dpo/delta": 0.05830075219273567,
|
|
"fcm_dpo/margin": 169.78909301757812,
|
|
"fcm_dpo/q_t": 0.37912872433662415,
|
|
"grad_norm": 50.44911193847656,
|
|
"learning_rate": 2.5769876463904263e-07,
|
|
"logits/chosen": -0.3650849461555481,
|
|
"logits/rejected": -0.35905131697654724,
|
|
"logps/chosen": -303.07366943359375,
|
|
"logps/ref_chosen": -58.113502502441406,
|
|
"logps/ref_rejected": -89.43451690673828,
|
|
"logps/rejected": -504.18377685546875,
|
|
"loss": 1.0101,
|
|
"margin_dpo/margin_mean": 169.78909301757812,
|
|
"margin_dpo/margin_std": 197.232666015625,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.5506607929515418,
|
|
"fcm_dpo/beta": 0.003249173518270254,
|
|
"fcm_dpo/delta": -0.03621614724397659,
|
|
"fcm_dpo/margin": 194.74484252929688,
|
|
"fcm_dpo/q_t": 0.3639177083969116,
|
|
"grad_norm": 40.54216003417969,
|
|
"learning_rate": 2.512833246961859e-07,
|
|
"logits/chosen": -0.41527968645095825,
|
|
"logits/rejected": -0.3979854881763458,
|
|
"logps/chosen": -325.7327880859375,
|
|
"logps/ref_chosen": -65.23600769042969,
|
|
"logps/ref_rejected": -89.24995422363281,
|
|
"logps/rejected": -544.4915771484375,
|
|
"loss": 0.9763,
|
|
"margin_dpo/margin_mean": 194.74484252929688,
|
|
"margin_dpo/margin_std": 230.34060668945312,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.5580029368575624,
|
|
"fcm_dpo/beta": 0.003087093820795417,
|
|
"fcm_dpo/delta": -0.08724673092365265,
|
|
"fcm_dpo/margin": 220.7941131591797,
|
|
"fcm_dpo/q_t": 0.3510977625846863,
|
|
"grad_norm": 30.83326530456543,
|
|
"learning_rate": 2.4486703937790243e-07,
|
|
"logits/chosen": -0.35653212666511536,
|
|
"logits/rejected": -0.36338263750076294,
|
|
"logps/chosen": -309.36334228515625,
|
|
"logps/ref_chosen": -53.33893966674805,
|
|
"logps/ref_rejected": -102.15375518798828,
|
|
"logps/rejected": -578.9722900390625,
|
|
"loss": 0.9291,
|
|
"margin_dpo/margin_mean": 220.79409790039062,
|
|
"margin_dpo/margin_std": 231.2888641357422,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.5653450807635829,
|
|
"fcm_dpo/beta": 0.0030052317306399345,
|
|
"fcm_dpo/delta": 0.018545908853411674,
|
|
"fcm_dpo/margin": 193.73928833007812,
|
|
"fcm_dpo/q_t": 0.37150126695632935,
|
|
"grad_norm": 33.003395080566406,
|
|
"learning_rate": 2.3845413533856514e-07,
|
|
"logits/chosen": -0.3649640679359436,
|
|
"logits/rejected": -0.3546612858772278,
|
|
"logps/chosen": -344.1658630371094,
|
|
"logps/ref_chosen": -58.36262130737305,
|
|
"logps/ref_rejected": -89.44685363769531,
|
|
"logps/rejected": -568.9893798828125,
|
|
"loss": 0.9882,
|
|
"margin_dpo/margin_mean": 193.73927307128906,
|
|
"margin_dpo/margin_std": 221.2782440185547,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.5726872246696035,
|
|
"fcm_dpo/beta": 0.0029720370657742023,
|
|
"fcm_dpo/delta": -0.022272076457738876,
|
|
"fcm_dpo/margin": 208.7709503173828,
|
|
"fcm_dpo/q_t": 0.36599045991897583,
|
|
"grad_norm": 34.66581726074219,
|
|
"learning_rate": 2.320488370051681e-07,
|
|
"logits/chosen": -0.3872172236442566,
|
|
"logits/rejected": -0.3775530159473419,
|
|
"logps/chosen": -367.145263671875,
|
|
"logps/ref_chosen": -56.380653381347656,
|
|
"logps/ref_rejected": -90.47447204589844,
|
|
"logps/rejected": -610.010009765625,
|
|
"loss": 0.9741,
|
|
"margin_dpo/margin_mean": 208.7709503173828,
|
|
"margin_dpo/margin_std": 244.04592895507812,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.580029368575624,
|
|
"fcm_dpo/beta": 0.0030248172115534544,
|
|
"fcm_dpo/delta": 0.0180402509868145,
|
|
"fcm_dpo/margin": 192.4487762451172,
|
|
"fcm_dpo/q_t": 0.37626224756240845,
|
|
"grad_norm": 38.42062759399414,
|
|
"learning_rate": 2.2565536379453404e-07,
|
|
"logits/chosen": -0.40114063024520874,
|
|
"logits/rejected": -0.39468201994895935,
|
|
"logps/chosen": -349.0114440917969,
|
|
"logps/ref_chosen": -55.95304489135742,
|
|
"logps/ref_rejected": -87.13162994384766,
|
|
"logps/rejected": -572.6387939453125,
|
|
"loss": 1.0072,
|
|
"margin_dpo/margin_mean": 192.4487762451172,
|
|
"margin_dpo/margin_std": 244.62527465820312,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.5873715124816447,
|
|
"fcm_dpo/beta": 0.0028987762052565813,
|
|
"fcm_dpo/delta": -0.08543933928012848,
|
|
"fcm_dpo/margin": 234.6150665283203,
|
|
"fcm_dpo/q_t": 0.3521673381328583,
|
|
"grad_norm": 28.816965103149414,
|
|
"learning_rate": 2.192779273338215e-07,
|
|
"logits/chosen": -0.4242188036441803,
|
|
"logits/rejected": -0.40835875272750854,
|
|
"logps/chosen": -368.6856994628906,
|
|
"logps/ref_chosen": -64.59160614013672,
|
|
"logps/ref_rejected": -96.700927734375,
|
|
"logps/rejected": -635.4100341796875,
|
|
"loss": 0.9245,
|
|
"margin_dpo/margin_mean": 234.6150665283203,
|
|
"margin_dpo/margin_std": 239.69271850585938,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.5873715124816447,
|
|
"eval_fcm_dpo/beta": 0.003817289602011442,
|
|
"eval_fcm_dpo/delta": 0.06625650823116302,
|
|
"eval_fcm_dpo/margin": 134.67041015625,
|
|
"eval_fcm_dpo/q_t": 0.39730218052864075,
|
|
"eval_logits/chosen": -0.43868309259414673,
|
|
"eval_logits/rejected": -0.4222582280635834,
|
|
"eval_logps/chosen": -454.1283264160156,
|
|
"eval_logps/ref_chosen": -79.05104064941406,
|
|
"eval_logps/ref_rejected": -86.79793548583984,
|
|
"eval_logps/rejected": -596.5455932617188,
|
|
"eval_loss": 0.5729401111602783,
|
|
"eval_margin_dpo/margin_mean": 134.67041015625,
|
|
"eval_margin_dpo/margin_std": 251.6155242919922,
|
|
"eval_runtime": 39.7615,
|
|
"eval_samples_per_second": 58.826,
|
|
"eval_steps_per_second": 1.861,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.5947136563876652,
|
|
"fcm_dpo/beta": 0.003784316824749112,
|
|
"fcm_dpo/delta": -0.2645108699798584,
|
|
"fcm_dpo/margin": 221.6338348388672,
|
|
"fcm_dpo/q_t": 0.3333556056022644,
|
|
"grad_norm": 62.11888885498047,
|
|
"learning_rate": 2.129207286861638e-07,
|
|
"logits/chosen": -0.40359169244766235,
|
|
"logits/rejected": -0.39246273040771484,
|
|
"logps/chosen": -352.8330078125,
|
|
"logps/ref_chosen": -53.61777877807617,
|
|
"logps/ref_rejected": -81.28938293457031,
|
|
"logps/rejected": -602.1383666992188,
|
|
"loss": 0.9244,
|
|
"margin_dpo/margin_mean": 221.6338348388672,
|
|
"margin_dpo/margin_std": 262.4577941894531,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.6020558002936858,
|
|
"fcm_dpo/beta": 0.0033690209966152906,
|
|
"fcm_dpo/delta": -0.02751757577061653,
|
|
"fcm_dpo/margin": 185.41323852539062,
|
|
"fcm_dpo/q_t": 0.3654642701148987,
|
|
"grad_norm": 36.665096282958984,
|
|
"learning_rate": 2.065879555832674e-07,
|
|
"logits/chosen": -0.4079107642173767,
|
|
"logits/rejected": -0.3933170437812805,
|
|
"logps/chosen": -350.5545654296875,
|
|
"logps/ref_chosen": -58.9287223815918,
|
|
"logps/ref_rejected": -85.55818176269531,
|
|
"logps/rejected": -562.5972290039062,
|
|
"loss": 0.9824,
|
|
"margin_dpo/margin_mean": 185.41323852539062,
|
|
"margin_dpo/margin_std": 221.5907440185547,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.6093979441997063,
|
|
"fcm_dpo/beta": 0.003266123589128256,
|
|
"fcm_dpo/delta": -0.03337870165705681,
|
|
"fcm_dpo/margin": 192.88467407226562,
|
|
"fcm_dpo/q_t": 0.3626883924007416,
|
|
"grad_norm": 39.306392669677734,
|
|
"learning_rate": 2.002837796667909e-07,
|
|
"logits/chosen": -0.382286936044693,
|
|
"logits/rejected": -0.3848063051700592,
|
|
"logps/chosen": -352.2253112792969,
|
|
"logps/ref_chosen": -58.45662307739258,
|
|
"logps/ref_rejected": -93.67063903808594,
|
|
"logps/rejected": -580.3240356445312,
|
|
"loss": 0.9633,
|
|
"margin_dpo/margin_mean": 192.88467407226562,
|
|
"margin_dpo/margin_std": 213.9280548095703,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.6167400881057269,
|
|
"fcm_dpo/beta": 0.003045933786779642,
|
|
"fcm_dpo/delta": -0.07158443331718445,
|
|
"fcm_dpo/margin": 218.24789428710938,
|
|
"fcm_dpo/q_t": 0.3554866909980774,
|
|
"grad_norm": 52.39727020263672,
|
|
"learning_rate": 1.9401235374032425e-07,
|
|
"logits/chosen": -0.4089987874031067,
|
|
"logits/rejected": -0.3841163218021393,
|
|
"logps/chosen": -361.6821594238281,
|
|
"logps/ref_chosen": -64.2349853515625,
|
|
"logps/ref_rejected": -84.86299133300781,
|
|
"logps/rejected": -600.5580444335938,
|
|
"loss": 0.9405,
|
|
"margin_dpo/margin_mean": 218.24789428710938,
|
|
"margin_dpo/margin_std": 232.1405487060547,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.6240822320117474,
|
|
"fcm_dpo/beta": 0.0030709414277225733,
|
|
"fcm_dpo/delta": 0.03329915553331375,
|
|
"fcm_dpo/margin": 185.03384399414062,
|
|
"fcm_dpo/q_t": 0.37453344464302063,
|
|
"grad_norm": 36.52717590332031,
|
|
"learning_rate": 1.8777780903377732e-07,
|
|
"logits/chosen": -0.41324177384376526,
|
|
"logits/rejected": -0.39911994338035583,
|
|
"logps/chosen": -333.1438293457031,
|
|
"logps/ref_chosen": -56.054161071777344,
|
|
"logps/ref_rejected": -85.17119598388672,
|
|
"logps/rejected": -547.294677734375,
|
|
"loss": 1.0006,
|
|
"margin_dpo/margin_mean": 185.03384399414062,
|
|
"margin_dpo/margin_std": 220.0364990234375,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.631424375917768,
|
|
"fcm_dpo/beta": 0.003164885099977255,
|
|
"fcm_dpo/delta": 0.03732316568493843,
|
|
"fcm_dpo/margin": 178.30136108398438,
|
|
"fcm_dpo/q_t": 0.37670689821243286,
|
|
"grad_norm": 40.842708587646484,
|
|
"learning_rate": 1.8158425248197928e-07,
|
|
"logits/chosen": -0.4269745945930481,
|
|
"logits/rejected": -0.40301522612571716,
|
|
"logps/chosen": -347.55450439453125,
|
|
"logps/ref_chosen": -69.24568176269531,
|
|
"logps/ref_rejected": -91.8664321899414,
|
|
"logps/rejected": -548.4766235351562,
|
|
"loss": 1.0109,
|
|
"margin_dpo/margin_mean": 178.30136108398438,
|
|
"margin_dpo/margin_std": 218.34555053710938,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.6387665198237885,
|
|
"fcm_dpo/beta": 0.003134062048047781,
|
|
"fcm_dpo/delta": -0.024000858888030052,
|
|
"fcm_dpo/margin": 198.20034790039062,
|
|
"fcm_dpo/q_t": 0.3648958206176758,
|
|
"grad_norm": 33.455562591552734,
|
|
"learning_rate": 1.7543576401928218e-07,
|
|
"logits/chosen": -0.38360780477523804,
|
|
"logits/rejected": -0.3695995807647705,
|
|
"logps/chosen": -344.5923767089844,
|
|
"logps/ref_chosen": -60.03449630737305,
|
|
"logps/ref_rejected": -90.6872329711914,
|
|
"logps/rejected": -573.4454345703125,
|
|
"loss": 0.9696,
|
|
"margin_dpo/margin_mean": 198.2003631591797,
|
|
"margin_dpo/margin_std": 223.5625457763672,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.6461086637298091,
|
|
"fcm_dpo/beta": 0.0031414516270160675,
|
|
"fcm_dpo/delta": 0.008422891609370708,
|
|
"fcm_dpo/margin": 188.22352600097656,
|
|
"fcm_dpo/q_t": 0.3708716332912445,
|
|
"grad_norm": 39.6731071472168,
|
|
"learning_rate": 1.6933639389195134e-07,
|
|
"logits/chosen": -0.4058264195919037,
|
|
"logits/rejected": -0.38181501626968384,
|
|
"logps/chosen": -336.13507080078125,
|
|
"logps/ref_chosen": -65.50349426269531,
|
|
"logps/ref_rejected": -85.66627502441406,
|
|
"logps/rejected": -544.5213623046875,
|
|
"loss": 0.9793,
|
|
"margin_dpo/margin_mean": 188.22352600097656,
|
|
"margin_dpo/margin_std": 209.7587127685547,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.6534508076358296,
|
|
"fcm_dpo/beta": 0.00310450023971498,
|
|
"fcm_dpo/delta": -0.04807734861969948,
|
|
"fcm_dpo/margin": 207.53652954101562,
|
|
"fcm_dpo/q_t": 0.36341750621795654,
|
|
"grad_norm": 31.921981811523438,
|
|
"learning_rate": 1.6329015999011182e-07,
|
|
"logits/chosen": -0.3922547996044159,
|
|
"logits/rejected": -0.37669098377227783,
|
|
"logps/chosen": -361.6647033691406,
|
|
"logps/ref_chosen": -60.72443389892578,
|
|
"logps/ref_rejected": -89.9255142211914,
|
|
"logps/rejected": -598.40234375,
|
|
"loss": 0.9762,
|
|
"margin_dpo/margin_mean": 207.53652954101562,
|
|
"margin_dpo/margin_std": 250.76455688476562,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.6607929515418502,
|
|
"fcm_dpo/beta": 0.002883550710976124,
|
|
"fcm_dpo/delta": -0.08991138637065887,
|
|
"fcm_dpo/margin": 237.09793090820312,
|
|
"fcm_dpo/q_t": 0.35191330313682556,
|
|
"grad_norm": 38.20564270019531,
|
|
"learning_rate": 1.573010452010098e-07,
|
|
"logits/chosen": -0.378944993019104,
|
|
"logits/rejected": -0.3763580918312073,
|
|
"logps/chosen": -349.90106201171875,
|
|
"logps/ref_chosen": -59.96248245239258,
|
|
"logps/ref_rejected": -98.71509552001953,
|
|
"logps/rejected": -625.751708984375,
|
|
"loss": 0.924,
|
|
"margin_dpo/margin_mean": 237.09793090820312,
|
|
"margin_dpo/margin_std": 247.1533660888672,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.6681350954478708,
|
|
"fcm_dpo/beta": 0.002764140721410513,
|
|
"fcm_dpo/delta": -0.03560318797826767,
|
|
"fcm_dpo/margin": 229.11276245117188,
|
|
"fcm_dpo/q_t": 0.362155020236969,
|
|
"grad_norm": 35.098609924316406,
|
|
"learning_rate": 1.5137299478533064e-07,
|
|
"logits/chosen": -0.3881308138370514,
|
|
"logits/rejected": -0.3764522671699524,
|
|
"logps/chosen": -346.89105224609375,
|
|
"logps/ref_chosen": -54.48131561279297,
|
|
"logps/ref_rejected": -90.6321029663086,
|
|
"logps/rejected": -612.1546630859375,
|
|
"loss": 0.9596,
|
|
"margin_dpo/margin_mean": 229.11276245117188,
|
|
"margin_dpo/margin_std": 254.59921264648438,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.6754772393538914,
|
|
"fcm_dpo/beta": 0.0025613114703446627,
|
|
"fcm_dpo/delta": -0.05846347659826279,
|
|
"fcm_dpo/margin": 255.3362274169922,
|
|
"fcm_dpo/q_t": 0.3553471565246582,
|
|
"grad_norm": 46.92758560180664,
|
|
"learning_rate": 1.4550991377830423e-07,
|
|
"logits/chosen": -0.4343814253807068,
|
|
"logits/rejected": -0.4337409436702728,
|
|
"logps/chosen": -382.7040100097656,
|
|
"logps/ref_chosen": -52.97611618041992,
|
|
"logps/ref_rejected": -95.65971374511719,
|
|
"logps/rejected": -680.7238159179688,
|
|
"loss": 0.9276,
|
|
"margin_dpo/margin_mean": 255.3362579345703,
|
|
"margin_dpo/margin_std": 251.95751953125,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.6828193832599119,
|
|
"fcm_dpo/beta": 0.0025722947902977467,
|
|
"fcm_dpo/delta": -0.020649045705795288,
|
|
"fcm_dpo/margin": 240.62417602539062,
|
|
"fcm_dpo/q_t": 0.36578595638275146,
|
|
"grad_norm": 36.07568359375,
|
|
"learning_rate": 1.3971566441730714e-07,
|
|
"logits/chosen": -0.46626290678977966,
|
|
"logits/rejected": -0.47067561745643616,
|
|
"logps/chosen": -415.87188720703125,
|
|
"logps/ref_chosen": -58.2827033996582,
|
|
"logps/ref_rejected": -94.15567779541016,
|
|
"logps/rejected": -692.3690185546875,
|
|
"loss": 0.9691,
|
|
"margin_dpo/margin_mean": 240.62417602539062,
|
|
"margin_dpo/margin_std": 267.12054443359375,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.6901615271659325,
|
|
"fcm_dpo/beta": 0.0024534829426556826,
|
|
"fcm_dpo/delta": -0.02453800104558468,
|
|
"fcm_dpo/margin": 253.6896209716797,
|
|
"fcm_dpo/q_t": 0.36288315057754517,
|
|
"grad_norm": 28.073619842529297,
|
|
"learning_rate": 1.339940635976592e-07,
|
|
"logits/chosen": -0.5113226175308228,
|
|
"logits/rejected": -0.49717050790786743,
|
|
"logps/chosen": -398.2627868652344,
|
|
"logps/ref_chosen": -62.69774627685547,
|
|
"logps/ref_rejected": -96.33873748779297,
|
|
"logps/rejected": -685.5933837890625,
|
|
"loss": 0.9596,
|
|
"margin_dpo/margin_mean": 253.6896209716797,
|
|
"margin_dpo/margin_std": 270.79937744140625,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.697503671071953,
|
|
"fcm_dpo/beta": 0.002478546230122447,
|
|
"fcm_dpo/delta": 0.038639336824417114,
|
|
"fcm_dpo/margin": 227.10226440429688,
|
|
"fcm_dpo/q_t": 0.3776451051235199,
|
|
"grad_norm": 29.91801643371582,
|
|
"learning_rate": 1.2834888035828596e-07,
|
|
"logits/chosen": -0.5382334589958191,
|
|
"logits/rejected": -0.5220173597335815,
|
|
"logps/chosen": -366.8069763183594,
|
|
"logps/ref_chosen": -61.12194061279297,
|
|
"logps/ref_rejected": -92.42192077636719,
|
|
"logps/rejected": -625.2091674804688,
|
|
"loss": 1.0046,
|
|
"margin_dpo/margin_mean": 227.10226440429688,
|
|
"margin_dpo/margin_std": 276.1416320800781,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.7048458149779736,
|
|
"fcm_dpo/beta": 0.0025619766674935818,
|
|
"fcm_dpo/delta": 0.04407358169555664,
|
|
"fcm_dpo/margin": 217.63076782226562,
|
|
"fcm_dpo/q_t": 0.37629732489585876,
|
|
"grad_norm": 55.00226974487305,
|
|
"learning_rate": 1.227838333989088e-07,
|
|
"logits/chosen": -0.5085734128952026,
|
|
"logits/rejected": -0.49212780594825745,
|
|
"logps/chosen": -347.72064208984375,
|
|
"logps/ref_chosen": -53.550628662109375,
|
|
"logps/ref_rejected": -82.65167999267578,
|
|
"logps/rejected": -594.4524536132812,
|
|
"loss": 1.001,
|
|
"margin_dpo/margin_mean": 217.63070678710938,
|
|
"margin_dpo/margin_std": 248.6515350341797,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.7121879588839941,
|
|
"fcm_dpo/beta": 0.0026028165593743324,
|
|
"fcm_dpo/delta": -0.024844055995345116,
|
|
"fcm_dpo/margin": 239.23391723632812,
|
|
"fcm_dpo/q_t": 0.3650123178958893,
|
|
"grad_norm": 38.28968811035156,
|
|
"learning_rate": 1.1730258863039347e-07,
|
|
"logits/chosen": -0.5096001029014587,
|
|
"logits/rejected": -0.5005736351013184,
|
|
"logps/chosen": -372.1683654785156,
|
|
"logps/ref_chosen": -60.76704788208008,
|
|
"logps/ref_rejected": -92.70649719238281,
|
|
"logps/rejected": -643.341796875,
|
|
"loss": 0.9685,
|
|
"margin_dpo/margin_mean": 239.23391723632812,
|
|
"margin_dpo/margin_std": 269.9613037109375,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.7195301027900147,
|
|
"fcm_dpo/beta": 0.002558878855779767,
|
|
"fcm_dpo/delta": 0.009935403242707253,
|
|
"fcm_dpo/margin": 230.77432250976562,
|
|
"fcm_dpo/q_t": 0.3699950575828552,
|
|
"grad_norm": 33.77944564819336,
|
|
"learning_rate": 1.1190875675987355e-07,
|
|
"logits/chosen": -0.5016877055168152,
|
|
"logits/rejected": -0.4948623776435852,
|
|
"logps/chosen": -340.79034423828125,
|
|
"logps/ref_chosen": -53.78142547607422,
|
|
"logps/ref_rejected": -91.67438507080078,
|
|
"logps/rejected": -609.4576416015625,
|
|
"loss": 0.977,
|
|
"margin_dpo/margin_mean": 230.77432250976562,
|
|
"margin_dpo/margin_std": 254.27835083007812,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.7268722466960352,
|
|
"fcm_dpo/beta": 0.002750278217718005,
|
|
"fcm_dpo/delta": 0.05883105471730232,
|
|
"fcm_dpo/margin": 196.99557495117188,
|
|
"fcm_dpo/q_t": 0.38119006156921387,
|
|
"grad_norm": 49.648441314697266,
|
|
"learning_rate": 1.0660589091223854e-07,
|
|
"logits/chosen": -0.501973032951355,
|
|
"logits/rejected": -0.49266910552978516,
|
|
"logps/chosen": -345.7253112792969,
|
|
"logps/ref_chosen": -58.9004020690918,
|
|
"logps/ref_rejected": -82.15424346923828,
|
|
"logps/rejected": -565.9747314453125,
|
|
"loss": 1.021,
|
|
"margin_dpo/margin_mean": 196.99557495117188,
|
|
"margin_dpo/margin_std": 244.4354248046875,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.7342143906020558,
|
|
"fcm_dpo/beta": 0.002816407708451152,
|
|
"fcm_dpo/delta": 0.02024873159825802,
|
|
"fcm_dpo/margin": 205.9034423828125,
|
|
"fcm_dpo/q_t": 0.37294405698776245,
|
|
"grad_norm": 48.712711334228516,
|
|
"learning_rate": 1.0139748428955333e-07,
|
|
"logits/chosen": -0.5191727876663208,
|
|
"logits/rejected": -0.5140315294265747,
|
|
"logps/chosen": -387.0251159667969,
|
|
"logps/ref_chosen": -62.13483810424805,
|
|
"logps/ref_rejected": -87.91773223876953,
|
|
"logps/rejected": -618.7114868164062,
|
|
"loss": 0.9971,
|
|
"margin_dpo/margin_mean": 205.90341186523438,
|
|
"margin_dpo/margin_std": 241.8444366455078,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.7415565345080763,
|
|
"fcm_dpo/beta": 0.002710042055696249,
|
|
"fcm_dpo/delta": -0.02353382483124733,
|
|
"fcm_dpo/margin": 229.17935180664062,
|
|
"fcm_dpo/q_t": 0.36337199807167053,
|
|
"grad_norm": 56.58249282836914,
|
|
"learning_rate": 9.628696786995188e-08,
|
|
"logits/chosen": -0.5033208131790161,
|
|
"logits/rejected": -0.48864540457725525,
|
|
"logps/chosen": -374.4068298339844,
|
|
"logps/ref_chosen": -62.631813049316406,
|
|
"logps/ref_rejected": -87.59168243408203,
|
|
"logps/rejected": -628.546142578125,
|
|
"loss": 0.9598,
|
|
"margin_dpo/margin_mean": 229.17935180664062,
|
|
"margin_dpo/margin_std": 242.96011352539062,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.748898678414097,
|
|
"fcm_dpo/beta": 0.0027464856393635273,
|
|
"fcm_dpo/delta": 0.013351870700716972,
|
|
"fcm_dpo/margin": 213.7869873046875,
|
|
"fcm_dpo/q_t": 0.37243741750717163,
|
|
"grad_norm": 31.705190658569336,
|
|
"learning_rate": 9.127770814751932e-08,
|
|
"logits/chosen": -0.47533559799194336,
|
|
"logits/rejected": -0.4699474275112152,
|
|
"logps/chosen": -376.91949462890625,
|
|
"logps/ref_chosen": -60.552574157714844,
|
|
"logps/ref_rejected": -91.0874252319336,
|
|
"logps/rejected": -621.2413330078125,
|
|
"loss": 0.9892,
|
|
"margin_dpo/margin_mean": 213.7869873046875,
|
|
"margin_dpo/margin_std": 251.62619018554688,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.7562408223201175,
|
|
"fcm_dpo/beta": 0.0027470688801258802,
|
|
"fcm_dpo/delta": 0.029249707236886024,
|
|
"fcm_dpo/margin": 208.3932647705078,
|
|
"fcm_dpo/q_t": 0.37528282403945923,
|
|
"grad_norm": 40.00129318237305,
|
|
"learning_rate": 8.637300491465272e-08,
|
|
"logits/chosen": -0.4838427007198334,
|
|
"logits/rejected": -0.4740561544895172,
|
|
"logps/chosen": -362.11346435546875,
|
|
"logps/ref_chosen": -60.9382438659668,
|
|
"logps/ref_rejected": -83.36767578125,
|
|
"logps/rejected": -592.9361572265625,
|
|
"loss": 1.0079,
|
|
"margin_dpo/margin_mean": 208.3932647705078,
|
|
"margin_dpo/margin_std": 256.8492736816406,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.7635829662261381,
|
|
"fcm_dpo/beta": 0.002783264499157667,
|
|
"fcm_dpo/delta": -0.007280466612428427,
|
|
"fcm_dpo/margin": 217.810302734375,
|
|
"fcm_dpo/q_t": 0.3678578734397888,
|
|
"grad_norm": 32.118003845214844,
|
|
"learning_rate": 8.15760890883607e-08,
|
|
"logits/chosen": -0.48051896691322327,
|
|
"logits/rejected": -0.4790908396244049,
|
|
"logps/chosen": -364.2611999511719,
|
|
"logps/ref_chosen": -65.47642517089844,
|
|
"logps/ref_rejected": -98.10872650146484,
|
|
"logps/rejected": -614.7037963867188,
|
|
"loss": 0.972,
|
|
"margin_dpo/margin_mean": 217.810302734375,
|
|
"margin_dpo/margin_std": 243.54904174804688,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.7709251101321586,
|
|
"fcm_dpo/beta": 0.002879193751141429,
|
|
"fcm_dpo/delta": 0.00723473122343421,
|
|
"fcm_dpo/margin": 205.75973510742188,
|
|
"fcm_dpo/q_t": 0.368082195520401,
|
|
"grad_norm": 43.291744232177734,
|
|
"learning_rate": 7.689012058193384e-08,
|
|
"logits/chosen": -0.49591636657714844,
|
|
"logits/rejected": -0.4880523085594177,
|
|
"logps/chosen": -354.49664306640625,
|
|
"logps/ref_chosen": -59.072021484375,
|
|
"logps/ref_rejected": -82.8821792602539,
|
|
"logps/rejected": -584.0665283203125,
|
|
"loss": 0.9739,
|
|
"margin_dpo/margin_mean": 205.7597198486328,
|
|
"margin_dpo/margin_std": 222.150390625,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.7782672540381792,
|
|
"fcm_dpo/beta": 0.002823830349370837,
|
|
"fcm_dpo/delta": -0.012813677079975605,
|
|
"fcm_dpo/margin": 216.4855499267578,
|
|
"fcm_dpo/q_t": 0.36813658475875854,
|
|
"grad_norm": 35.54737091064453,
|
|
"learning_rate": 7.231818622338822e-08,
|
|
"logits/chosen": -0.5179253816604614,
|
|
"logits/rejected": -0.5039080381393433,
|
|
"logps/chosen": -397.35479736328125,
|
|
"logps/ref_chosen": -61.11234664916992,
|
|
"logps/ref_rejected": -87.01112365722656,
|
|
"logps/rejected": -639.7390747070312,
|
|
"loss": 0.985,
|
|
"margin_dpo/margin_mean": 216.4855499267578,
|
|
"margin_dpo/margin_std": 256.6390380859375,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.7856093979441997,
|
|
"fcm_dpo/beta": 0.0028174181934446096,
|
|
"fcm_dpo/delta": -0.0029197714757174253,
|
|
"fcm_dpo/margin": 213.47830200195312,
|
|
"fcm_dpo/q_t": 0.3706435561180115,
|
|
"grad_norm": 48.269222259521484,
|
|
"learning_rate": 6.786329772205246e-08,
|
|
"logits/chosen": -0.5222650766372681,
|
|
"logits/rejected": -0.5029199719429016,
|
|
"logps/chosen": -414.7535705566406,
|
|
"logps/ref_chosen": -60.96736526489258,
|
|
"logps/ref_rejected": -81.727294921875,
|
|
"logps/rejected": -648.9918212890625,
|
|
"loss": 0.9981,
|
|
"margin_dpo/margin_mean": 213.4783172607422,
|
|
"margin_dpo/margin_std": 261.0979919433594,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.7929515418502202,
|
|
"fcm_dpo/beta": 0.002668160479515791,
|
|
"fcm_dpo/delta": -0.07808558642864227,
|
|
"fcm_dpo/margin": 251.76071166992188,
|
|
"fcm_dpo/q_t": 0.35538631677627563,
|
|
"grad_norm": 36.01322937011719,
|
|
"learning_rate": 6.352838968463919e-08,
|
|
"logits/chosen": -0.5208684206008911,
|
|
"logits/rejected": -0.5084649324417114,
|
|
"logps/chosen": -376.98809814453125,
|
|
"logps/ref_chosen": -58.64385986328125,
|
|
"logps/ref_rejected": -90.40965270996094,
|
|
"logps/rejected": -660.5145874023438,
|
|
"loss": 0.9397,
|
|
"margin_dpo/margin_mean": 251.76071166992188,
|
|
"margin_dpo/margin_std": 272.35394287109375,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.8002936857562408,
|
|
"fcm_dpo/beta": 0.002645547967404127,
|
|
"fcm_dpo/delta": 0.01414306741207838,
|
|
"fcm_dpo/margin": 221.38949584960938,
|
|
"fcm_dpo/q_t": 0.3734440207481384,
|
|
"grad_norm": 34.583797454833984,
|
|
"learning_rate": 5.9316317682106294e-08,
|
|
"logits/chosen": -0.5391398668289185,
|
|
"logits/rejected": -0.5384151339530945,
|
|
"logps/chosen": -415.88970947265625,
|
|
"logps/ref_chosen": -64.73474884033203,
|
|
"logps/ref_rejected": -100.44208526611328,
|
|
"logps/rejected": -672.986572265625,
|
|
"loss": 0.9969,
|
|
"margin_dpo/margin_mean": 221.38949584960938,
|
|
"margin_dpo/margin_std": 267.8731384277344,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.8076358296622613,
|
|
"fcm_dpo/beta": 0.002687679138034582,
|
|
"fcm_dpo/delta": 0.049287859350442886,
|
|
"fcm_dpo/margin": 205.9238739013672,
|
|
"fcm_dpo/q_t": 0.37848031520843506,
|
|
"grad_norm": 35.05332565307617,
|
|
"learning_rate": 5.5229856368582376e-08,
|
|
"logits/chosen": -0.5319250226020813,
|
|
"logits/rejected": -0.523718535900116,
|
|
"logps/chosen": -401.68536376953125,
|
|
"logps/ref_chosen": -59.13951873779297,
|
|
"logps/ref_rejected": -87.15635681152344,
|
|
"logps/rejected": -635.6260986328125,
|
|
"loss": 1.009,
|
|
"margin_dpo/margin_mean": 205.9238739013672,
|
|
"margin_dpo/margin_std": 250.6122283935547,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.8149779735682819,
|
|
"fcm_dpo/beta": 0.002561022061854601,
|
|
"fcm_dpo/delta": -0.10589297860860825,
|
|
"fcm_dpo/margin": 271.74420166015625,
|
|
"fcm_dpo/q_t": 0.3499818444252014,
|
|
"grad_norm": 34.610145568847656,
|
|
"learning_rate": 5.127169765359515e-08,
|
|
"logits/chosen": -0.5212115049362183,
|
|
"logits/rejected": -0.5161057114601135,
|
|
"logps/chosen": -394.1856384277344,
|
|
"logps/ref_chosen": -62.1995849609375,
|
|
"logps/ref_rejected": -102.51883697509766,
|
|
"logps/rejected": -706.2490844726562,
|
|
"loss": 0.9221,
|
|
"margin_dpo/margin_mean": 271.74420166015625,
|
|
"margin_dpo/margin_std": 282.8084411621094,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.8223201174743024,
|
|
"fcm_dpo/beta": 0.0024848259054124355,
|
|
"fcm_dpo/delta": 0.015132618136703968,
|
|
"fcm_dpo/margin": 235.41403198242188,
|
|
"fcm_dpo/q_t": 0.3717556595802307,
|
|
"grad_norm": 34.969547271728516,
|
|
"learning_rate": 4.7444448928806615e-08,
|
|
"logits/chosen": -0.5141924619674683,
|
|
"logits/rejected": -0.5002972483634949,
|
|
"logps/chosen": -410.2688903808594,
|
|
"logps/ref_chosen": -61.541908264160156,
|
|
"logps/ref_rejected": -97.37491607666016,
|
|
"logps/rejected": -681.5159912109375,
|
|
"loss": 0.983,
|
|
"margin_dpo/margin_mean": 235.41403198242188,
|
|
"margin_dpo/margin_std": 259.69488525390625,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.8296622613803231,
|
|
"fcm_dpo/beta": 0.0025754589587450027,
|
|
"fcm_dpo/delta": 0.02127731405198574,
|
|
"fcm_dpo/margin": 224.775634765625,
|
|
"fcm_dpo/q_t": 0.37311750650405884,
|
|
"grad_norm": 35.28373718261719,
|
|
"learning_rate": 4.375063135042445e-08,
|
|
"logits/chosen": -0.48246508836746216,
|
|
"logits/rejected": -0.4706944525241852,
|
|
"logps/chosen": -427.0626525878906,
|
|
"logps/ref_chosen": -62.85475540161133,
|
|
"logps/ref_rejected": -93.8392105102539,
|
|
"logps/rejected": -682.8226318359375,
|
|
"loss": 0.9922,
|
|
"margin_dpo/margin_mean": 224.775634765625,
|
|
"margin_dpo/margin_std": 259.5643005371094,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.8370044052863436,
|
|
"fcm_dpo/beta": 0.0025527041871100664,
|
|
"fcm_dpo/delta": -0.007842612452805042,
|
|
"fcm_dpo/margin": 237.66757202148438,
|
|
"fcm_dpo/q_t": 0.3671664595603943,
|
|
"grad_norm": 32.344730377197266,
|
|
"learning_rate": 4.019267817841834e-08,
|
|
"logits/chosen": -0.49153321981430054,
|
|
"logits/rejected": -0.4876040816307068,
|
|
"logps/chosen": -414.8893127441406,
|
|
"logps/ref_chosen": -57.98622512817383,
|
|
"logps/ref_rejected": -87.91555786132812,
|
|
"logps/rejected": -682.4862060546875,
|
|
"loss": 0.9792,
|
|
"margin_dpo/margin_mean": 237.66757202148438,
|
|
"margin_dpo/margin_std": 269.2391052246094,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.8443465491923642,
|
|
"fcm_dpo/beta": 0.002516696462407708,
|
|
"fcm_dpo/delta": -0.01592089980840683,
|
|
"fcm_dpo/margin": 243.9696044921875,
|
|
"fcm_dpo/q_t": 0.36822399497032166,
|
|
"grad_norm": 46.91700744628906,
|
|
"learning_rate": 3.677293317363864e-08,
|
|
"logits/chosen": -0.4880523085594177,
|
|
"logits/rejected": -0.4857984483242035,
|
|
"logps/chosen": -424.9076232910156,
|
|
"logps/ref_chosen": -55.194114685058594,
|
|
"logps/ref_rejected": -89.68229675292969,
|
|
"logps/rejected": -703.3653564453125,
|
|
"loss": 0.9947,
|
|
"margin_dpo/margin_mean": 243.9696044921875,
|
|
"margin_dpo/margin_std": 302.0916748046875,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.8516886930983847,
|
|
"fcm_dpo/beta": 0.0025378705468028784,
|
|
"fcm_dpo/delta": 0.005721461959183216,
|
|
"fcm_dpo/margin": 233.96383666992188,
|
|
"fcm_dpo/q_t": 0.36850807070732117,
|
|
"grad_norm": 26.867847442626953,
|
|
"learning_rate": 3.349364905389032e-08,
|
|
"logits/chosen": -0.49183306097984314,
|
|
"logits/rejected": -0.4879623055458069,
|
|
"logps/chosen": -380.11614990234375,
|
|
"logps/ref_chosen": -54.605796813964844,
|
|
"logps/ref_rejected": -88.9614486694336,
|
|
"logps/rejected": -648.4356689453125,
|
|
"loss": 0.9794,
|
|
"margin_dpo/margin_mean": 233.9638214111328,
|
|
"margin_dpo/margin_std": 259.25726318359375,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.8590308370044053,
|
|
"fcm_dpo/beta": 0.002550755860283971,
|
|
"fcm_dpo/delta": 0.00011723488569259644,
|
|
"fcm_dpo/margin": 234.6815185546875,
|
|
"fcm_dpo/q_t": 0.36916983127593994,
|
|
"grad_norm": 38.553314208984375,
|
|
"learning_rate": 3.035698600998121e-08,
|
|
"logits/chosen": -0.52159583568573,
|
|
"logits/rejected": -0.5172958970069885,
|
|
"logps/chosen": -393.91827392578125,
|
|
"logps/ref_chosen": -59.03770065307617,
|
|
"logps/ref_rejected": -94.4029541015625,
|
|
"logps/rejected": -663.9650268554688,
|
|
"loss": 0.9843,
|
|
"margin_dpo/margin_mean": 234.6815185546875,
|
|
"margin_dpo/margin_std": 265.8410339355469,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.8663729809104258,
|
|
"fcm_dpo/beta": 0.0025425164494663477,
|
|
"fcm_dpo/delta": -0.02153742127120495,
|
|
"fcm_dpo/margin": 243.96188354492188,
|
|
"fcm_dpo/q_t": 0.36495503783226013,
|
|
"grad_norm": 31.69843101501465,
|
|
"learning_rate": 2.736501028272095e-08,
|
|
"logits/chosen": -0.500575602054596,
|
|
"logits/rejected": -0.5034629702568054,
|
|
"logps/chosen": -374.412353515625,
|
|
"logps/ref_chosen": -53.5163688659668,
|
|
"logps/ref_rejected": -99.90290832519531,
|
|
"logps/rejected": -664.7607421875,
|
|
"loss": 0.9663,
|
|
"margin_dpo/margin_mean": 243.96188354492188,
|
|
"margin_dpo/margin_std": 272.521728515625,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.8737151248164464,
|
|
"fcm_dpo/beta": 0.002531964797526598,
|
|
"fcm_dpo/delta": 0.040782030671834946,
|
|
"fcm_dpo/margin": 221.4866485595703,
|
|
"fcm_dpo/q_t": 0.37717491388320923,
|
|
"grad_norm": 27.417510986328125,
|
|
"learning_rate": 2.451969280180849e-08,
|
|
"logits/chosen": -0.4844892621040344,
|
|
"logits/rejected": -0.463656485080719,
|
|
"logps/chosen": -368.6888427734375,
|
|
"logps/ref_chosen": -51.44538497924805,
|
|
"logps/ref_rejected": -77.43083190917969,
|
|
"logps/rejected": -616.1609497070312,
|
|
"loss": 1.0148,
|
|
"margin_dpo/margin_mean": 221.4866485595703,
|
|
"margin_dpo/margin_std": 274.03509521484375,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.8810572687224669,
|
|
"fcm_dpo/beta": 0.002594124060124159,
|
|
"fcm_dpo/delta": 0.02012869343161583,
|
|
"fcm_dpo/margin": 223.6268310546875,
|
|
"fcm_dpo/q_t": 0.3721489906311035,
|
|
"grad_norm": 38.589534759521484,
|
|
"learning_rate": 2.1822907887504932e-08,
|
|
"logits/chosen": -0.4984382688999176,
|
|
"logits/rejected": -0.4893369674682617,
|
|
"logps/chosen": -383.90936279296875,
|
|
"logps/ref_chosen": -57.161705017089844,
|
|
"logps/ref_rejected": -87.73274230957031,
|
|
"logps/rejected": -638.107177734375,
|
|
"loss": 0.9964,
|
|
"margin_dpo/margin_mean": 223.6268310546875,
|
|
"margin_dpo/margin_std": 261.2717590332031,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.8810572687224669,
|
|
"eval_fcm_dpo/beta": 0.003444387810304761,
|
|
"eval_fcm_dpo/delta": 0.05230085551738739,
|
|
"eval_fcm_dpo/margin": 156.23291015625,
|
|
"eval_fcm_dpo/q_t": 0.39142969250679016,
|
|
"eval_logits/chosen": -0.5275665521621704,
|
|
"eval_logits/rejected": -0.5133021473884583,
|
|
"eval_logps/chosen": -459.30078125,
|
|
"eval_logps/ref_chosen": -79.05104064941406,
|
|
"eval_logps/ref_rejected": -86.79793548583984,
|
|
"eval_logps/rejected": -623.2805786132812,
|
|
"eval_loss": 0.55719393491745,
|
|
"eval_margin_dpo/margin_mean": 156.23291015625,
|
|
"eval_margin_dpo/margin_std": 271.4756774902344,
|
|
"eval_runtime": 39.7472,
|
|
"eval_samples_per_second": 58.847,
|
|
"eval_steps_per_second": 1.862,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.8883994126284875,
|
|
"fcm_dpo/beta": 0.0032743208575993776,
|
|
"fcm_dpo/delta": -0.2591201961040497,
|
|
"fcm_dpo/margin": 255.2403106689453,
|
|
"fcm_dpo/q_t": 0.32824546098709106,
|
|
"grad_norm": 34.93064498901367,
|
|
"learning_rate": 1.9276432015946446e-08,
|
|
"logits/chosen": -0.5040395855903625,
|
|
"logits/rejected": -0.4974172115325928,
|
|
"logps/chosen": -369.64263916015625,
|
|
"logps/ref_chosen": -58.169830322265625,
|
|
"logps/ref_rejected": -95.36891174316406,
|
|
"logps/rejected": -662.0820922851562,
|
|
"loss": 0.8815,
|
|
"margin_dpo/margin_mean": 255.2403106689453,
|
|
"margin_dpo/margin_std": 277.72467041015625,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 0.895741556534508,
|
|
"fcm_dpo/beta": 0.0028547747060656548,
|
|
"fcm_dpo/delta": -0.07051874697208405,
|
|
"fcm_dpo/margin": 233.07736206054688,
|
|
"fcm_dpo/q_t": 0.3554316461086273,
|
|
"grad_norm": 36.279544830322266,
|
|
"learning_rate": 1.6881942648911074e-08,
|
|
"logits/chosen": -0.5211396813392639,
|
|
"logits/rejected": -0.5063233971595764,
|
|
"logps/chosen": -370.6011047363281,
|
|
"logps/ref_chosen": -58.97087860107422,
|
|
"logps/ref_rejected": -89.0286865234375,
|
|
"logps/rejected": -633.7362670898438,
|
|
"loss": 0.9375,
|
|
"margin_dpo/margin_mean": 233.07736206054688,
|
|
"margin_dpo/margin_std": 247.29598999023438,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.9030837004405287,
|
|
"fcm_dpo/beta": 0.0026367397513240576,
|
|
"fcm_dpo/delta": -0.06413199007511139,
|
|
"fcm_dpo/margin": 249.89028930664062,
|
|
"fcm_dpo/q_t": 0.3541339635848999,
|
|
"grad_norm": 35.62370300292969,
|
|
"learning_rate": 1.4641017128809801e-08,
|
|
"logits/chosen": -0.4915865361690521,
|
|
"logits/rejected": -0.5005960464477539,
|
|
"logps/chosen": -372.3035583496094,
|
|
"logps/ref_chosen": -58.081878662109375,
|
|
"logps/ref_rejected": -95.92155456542969,
|
|
"logps/rejected": -660.0335083007812,
|
|
"loss": 0.9241,
|
|
"margin_dpo/margin_mean": 249.89022827148438,
|
|
"margin_dpo/margin_std": 243.20175170898438,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 0.9104258443465492,
|
|
"fcm_dpo/beta": 0.002739082556217909,
|
|
"fcm_dpo/delta": 0.0633806362748146,
|
|
"fcm_dpo/margin": 196.49574279785156,
|
|
"fcm_dpo/q_t": 0.3842490315437317,
|
|
"grad_norm": 41.11799240112305,
|
|
"learning_rate": 1.2555131639630567e-08,
|
|
"logits/chosen": -0.4931033253669739,
|
|
"logits/rejected": -0.477125346660614,
|
|
"logps/chosen": -404.1942138671875,
|
|
"logps/ref_chosen": -62.203094482421875,
|
|
"logps/ref_rejected": -80.53683471679688,
|
|
"logps/rejected": -619.023681640625,
|
|
"loss": 1.0346,
|
|
"margin_dpo/margin_mean": 196.49574279785156,
|
|
"margin_dpo/margin_std": 260.45947265625,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.9177679882525698,
|
|
"fcm_dpo/beta": 0.0026761619374156,
|
|
"fcm_dpo/delta": -0.042652104049921036,
|
|
"fcm_dpo/margin": 238.6814727783203,
|
|
"fcm_dpo/q_t": 0.3606039881706238,
|
|
"grad_norm": 37.73185348510742,
|
|
"learning_rate": 1.0625660234518913e-08,
|
|
"logits/chosen": -0.49364280700683594,
|
|
"logits/rejected": -0.48199111223220825,
|
|
"logps/chosen": -384.24359130859375,
|
|
"logps/ref_chosen": -61.727455139160156,
|
|
"logps/ref_rejected": -88.4387435913086,
|
|
"logps/rejected": -649.6363525390625,
|
|
"loss": 0.9522,
|
|
"margin_dpo/margin_mean": 238.68148803710938,
|
|
"margin_dpo/margin_std": 257.99114990234375,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.9251101321585903,
|
|
"fcm_dpo/beta": 0.002640167949721217,
|
|
"fcm_dpo/delta": 0.01016196422278881,
|
|
"fcm_dpo/margin": 223.45654296875,
|
|
"fcm_dpo/q_t": 0.3729027211666107,
|
|
"grad_norm": 40.86648178100586,
|
|
"learning_rate": 8.85387393063622e-09,
|
|
"logits/chosen": -0.4925254285335541,
|
|
"logits/rejected": -0.48594093322753906,
|
|
"logps/chosen": -378.8038024902344,
|
|
"logps/ref_chosen": -61.30865478515625,
|
|
"logps/ref_rejected": -96.54997253417969,
|
|
"logps/rejected": -637.5016479492188,
|
|
"loss": 0.9872,
|
|
"margin_dpo/margin_mean": 223.45654296875,
|
|
"margin_dpo/margin_std": 258.1217956542969,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.9324522760646109,
|
|
"fcm_dpo/beta": 0.0027915460523217916,
|
|
"fcm_dpo/delta": 0.07615941017866135,
|
|
"fcm_dpo/margin": 189.08535766601562,
|
|
"fcm_dpo/q_t": 0.38567864894866943,
|
|
"grad_norm": 37.90126419067383,
|
|
"learning_rate": 7.240939871891699e-09,
|
|
"logits/chosen": -0.49808454513549805,
|
|
"logits/rejected": -0.4813234806060791,
|
|
"logps/chosen": -401.56622314453125,
|
|
"logps/ref_chosen": -63.7315673828125,
|
|
"logps/ref_rejected": -89.66435241699219,
|
|
"logps/rejected": -616.5842895507812,
|
|
"loss": 1.0422,
|
|
"margin_dpo/margin_mean": 189.08535766601562,
|
|
"margin_dpo/margin_std": 255.0254364013672,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 0.9397944199706314,
|
|
"fcm_dpo/beta": 0.002841885667294264,
|
|
"fcm_dpo/delta": -0.047639258205890656,
|
|
"fcm_dpo/margin": 226.51620483398438,
|
|
"fcm_dpo/q_t": 0.35998988151550293,
|
|
"grad_norm": 34.88581085205078,
|
|
"learning_rate": 5.7879205600998296e-09,
|
|
"logits/chosen": -0.4770120084285736,
|
|
"logits/rejected": -0.46177831292152405,
|
|
"logps/chosen": -379.24822998046875,
|
|
"logps/ref_chosen": -59.17915725708008,
|
|
"logps/ref_rejected": -88.51210021972656,
|
|
"logps/rejected": -635.0973510742188,
|
|
"loss": 0.9492,
|
|
"margin_dpo/margin_mean": 226.51620483398438,
|
|
"margin_dpo/margin_std": 240.34689331054688,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.947136563876652,
|
|
"fcm_dpo/beta": 0.0027807278092950583,
|
|
"fcm_dpo/delta": 0.045675117522478104,
|
|
"fcm_dpo/margin": 200.19139099121094,
|
|
"fcm_dpo/q_t": 0.3824850618839264,
|
|
"grad_norm": 49.77201461791992,
|
|
"learning_rate": 4.495773155069299e-09,
|
|
"logits/chosen": -0.5024424195289612,
|
|
"logits/rejected": -0.4973903298377991,
|
|
"logps/chosen": -392.8785400390625,
|
|
"logps/ref_chosen": -59.50596237182617,
|
|
"logps/ref_rejected": -93.92404174804688,
|
|
"logps/rejected": -627.488037109375,
|
|
"loss": 1.0362,
|
|
"margin_dpo/margin_mean": 200.19139099121094,
|
|
"margin_dpo/margin_std": 274.2567138671875,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 0.9544787077826725,
|
|
"fcm_dpo/beta": 0.0028442046605050564,
|
|
"fcm_dpo/delta": -0.03724042326211929,
|
|
"fcm_dpo/margin": 222.9950408935547,
|
|
"fcm_dpo/q_t": 0.3662676215171814,
|
|
"grad_norm": 32.262725830078125,
|
|
"learning_rate": 3.3653488440851253e-09,
|
|
"logits/chosen": -0.4864223003387451,
|
|
"logits/rejected": -0.4813409745693207,
|
|
"logps/chosen": -373.74139404296875,
|
|
"logps/ref_chosen": -57.774566650390625,
|
|
"logps/ref_rejected": -89.61600494384766,
|
|
"logps/rejected": -628.5778198242188,
|
|
"loss": 0.983,
|
|
"margin_dpo/margin_mean": 222.9950408935547,
|
|
"margin_dpo/margin_std": 272.8062438964844,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.9618208516886931,
|
|
"fcm_dpo/beta": 0.002694058697670698,
|
|
"fcm_dpo/delta": -0.033371347934007645,
|
|
"fcm_dpo/margin": 234.06857299804688,
|
|
"fcm_dpo/q_t": 0.36482754349708557,
|
|
"grad_norm": 33.339229583740234,
|
|
"learning_rate": 2.397392281198729e-09,
|
|
"logits/chosen": -0.490518182516098,
|
|
"logits/rejected": -0.4940160810947418,
|
|
"logps/chosen": -366.3544006347656,
|
|
"logps/ref_chosen": -55.68403244018555,
|
|
"logps/ref_rejected": -102.4081802368164,
|
|
"logps/rejected": -647.1470947265625,
|
|
"loss": 0.9655,
|
|
"margin_dpo/margin_mean": 234.06857299804688,
|
|
"margin_dpo/margin_std": 270.1910705566406,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 0.9691629955947136,
|
|
"fcm_dpo/beta": 0.002519825007766485,
|
|
"fcm_dpo/delta": -0.08081191033124924,
|
|
"fcm_dpo/margin": 267.1984558105469,
|
|
"fcm_dpo/q_t": 0.3513564467430115,
|
|
"grad_norm": 35.18833541870117,
|
|
"learning_rate": 1.592541096695571e-09,
|
|
"logits/chosen": -0.4812515676021576,
|
|
"logits/rejected": -0.4766325354576111,
|
|
"logps/chosen": -370.0708312988281,
|
|
"logps/ref_chosen": -59.19981002807617,
|
|
"logps/ref_rejected": -94.19200134277344,
|
|
"logps/rejected": -672.2614135742188,
|
|
"loss": 0.9108,
|
|
"margin_dpo/margin_mean": 267.1984558105469,
|
|
"margin_dpo/margin_std": 252.93112182617188,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.9765051395007343,
|
|
"fcm_dpo/beta": 0.0024876741226762533,
|
|
"fcm_dpo/delta": -0.004373815376311541,
|
|
"fcm_dpo/margin": 242.69509887695312,
|
|
"fcm_dpo/q_t": 0.36792677640914917,
|
|
"grad_norm": 32.33696746826172,
|
|
"learning_rate": 9.513254770636137e-10,
|
|
"logits/chosen": -0.49785465002059937,
|
|
"logits/rejected": -0.4872364103794098,
|
|
"logps/chosen": -370.6224365234375,
|
|
"logps/ref_chosen": -61.2533073425293,
|
|
"logps/ref_rejected": -95.86351013183594,
|
|
"logps/rejected": -647.927734375,
|
|
"loss": 0.9751,
|
|
"margin_dpo/margin_mean": 242.69509887695312,
|
|
"margin_dpo/margin_std": 275.34063720703125,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 0.9838472834067548,
|
|
"fcm_dpo/beta": 0.0024808943271636963,
|
|
"fcm_dpo/delta": -0.016260143369436264,
|
|
"fcm_dpo/margin": 247.93661499023438,
|
|
"fcm_dpo/q_t": 0.36324819922447205,
|
|
"grad_norm": 26.02719497680664,
|
|
"learning_rate": 4.741678157389739e-10,
|
|
"logits/chosen": -0.48116713762283325,
|
|
"logits/rejected": -0.46722927689552307,
|
|
"logps/chosen": -372.7713928222656,
|
|
"logps/ref_chosen": -62.95263671875,
|
|
"logps/ref_rejected": -92.4662094116211,
|
|
"logps/rejected": -650.2215576171875,
|
|
"loss": 0.9554,
|
|
"margin_dpo/margin_mean": 247.93661499023438,
|
|
"margin_dpo/margin_std": 256.9874572753906,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.9911894273127754,
|
|
"fcm_dpo/beta": 0.002432363573461771,
|
|
"fcm_dpo/delta": -0.023618485778570175,
|
|
"fcm_dpo/margin": 255.810302734375,
|
|
"fcm_dpo/q_t": 0.361946165561676,
|
|
"grad_norm": 27.06682777404785,
|
|
"learning_rate": 1.6138243485910863e-10,
|
|
"logits/chosen": -0.47107481956481934,
|
|
"logits/rejected": -0.46272093057632446,
|
|
"logps/chosen": -361.4900817871094,
|
|
"logps/ref_chosen": -48.5856819152832,
|
|
"logps/ref_rejected": -81.27871704101562,
|
|
"logps/rejected": -649.9933471679688,
|
|
"loss": 0.947,
|
|
"margin_dpo/margin_mean": 255.81027221679688,
|
|
"margin_dpo/margin_std": 257.8631286621094,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 0.9985315712187959,
|
|
"fcm_dpo/beta": 0.0023933127522468567,
|
|
"fcm_dpo/delta": 0.015318195335566998,
|
|
"fcm_dpo/margin": 244.6057586669922,
|
|
"fcm_dpo/q_t": 0.36930760741233826,
|
|
"grad_norm": 29.81550407409668,
|
|
"learning_rate": 1.31753782067201e-11,
|
|
"logits/chosen": -0.5069360733032227,
|
|
"logits/rejected": -0.4939002990722656,
|
|
"logps/chosen": -374.1042175292969,
|
|
"logps/ref_chosen": -60.25421905517578,
|
|
"logps/ref_rejected": -87.23457336425781,
|
|
"logps/rejected": -645.6903076171875,
|
|
"loss": 0.9708,
|
|
"margin_dpo/margin_mean": 244.6057586669922,
|
|
"margin_dpo/margin_std": 257.46160888671875,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"step": 681,
|
|
"total_flos": 0.0,
|
|
"train_loss": 0.9952153347312266,
|
|
"train_runtime": 1856.6581,
|
|
"train_samples_per_second": 23.482,
|
|
"train_steps_per_second": 0.367
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 681,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 50,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": false,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|