Files
llama-3-8b-base-margin-dpo-…/trainer_state.json
ModelHub XC d238cc8b5c 初始化项目,由ModelHub XC社区提供模型
Model: jackf857/llama-3-8b-base-margin-dpo-hh-4xh100
Source: Original Platform
2026-04-24 16:00:00 +08:00

1049 lines
36 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9977324263038548,
"eval_steps": 500,
"global_step": 330,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0030234315948601664,
"grad_norm": 81.58016967773438,
"learning_rate": 0.0,
"logits/chosen": 0.23480726778507233,
"logits/rejected": 0.21675193309783936,
"logps/chosen": -61.14366912841797,
"logps/ref_chosen": -61.123748779296875,
"logps/ref_rejected": -71.40178680419922,
"logps/rejected": -71.51543426513672,
"loss": 5.5395,
"margin_dpo/margin_mean": 0.09371411800384521,
"margin_dpo/margin_std": 0.3346460461616516,
"step": 1
},
{
"epoch": 0.015117157974300832,
"grad_norm": 90.00988006591797,
"learning_rate": 6.060606060606061e-08,
"logits/chosen": 0.16730807721614838,
"logits/rejected": 0.12714773416519165,
"logps/chosen": -70.84980773925781,
"logps/ref_chosen": -70.824462890625,
"logps/ref_rejected": -94.2794418334961,
"logps/rejected": -94.26243591308594,
"loss": 5.5601,
"margin_dpo/margin_mean": -0.042349204421043396,
"margin_dpo/margin_std": 0.2587328553199768,
"step": 5
},
{
"epoch": 0.030234315948601664,
"grad_norm": 90.63359832763672,
"learning_rate": 1.3636363636363635e-07,
"logits/chosen": 0.17656652629375458,
"logits/rejected": 0.14827647805213928,
"logps/chosen": -67.78923034667969,
"logps/ref_chosen": -67.75859832763672,
"logps/ref_rejected": -93.46939849853516,
"logps/rejected": -93.48274993896484,
"loss": 5.5391,
"margin_dpo/margin_mean": -0.01728438213467598,
"margin_dpo/margin_std": 0.2816511392593384,
"step": 10
},
{
"epoch": 0.045351473922902494,
"grad_norm": 98.18887329101562,
"learning_rate": 2.121212121212121e-07,
"logits/chosen": 0.20216646790504456,
"logits/rejected": 0.14840646088123322,
"logps/chosen": -68.50216674804688,
"logps/ref_chosen": -68.47833251953125,
"logps/ref_rejected": -95.5621109008789,
"logps/rejected": -95.56497955322266,
"loss": 5.5485,
"margin_dpo/margin_mean": -0.020975470542907715,
"margin_dpo/margin_std": 0.28279370069503784,
"step": 15
},
{
"epoch": 0.06046863189720333,
"grad_norm": 123.30350494384766,
"learning_rate": 2.878787878787879e-07,
"logits/chosen": 0.173910990357399,
"logits/rejected": 0.16142013669013977,
"logps/chosen": -74.85772705078125,
"logps/ref_chosen": -74.8140869140625,
"logps/ref_rejected": -87.42041015625,
"logps/rejected": -87.47251892089844,
"loss": 5.5388,
"margin_dpo/margin_mean": 0.008459245786070824,
"margin_dpo/margin_std": 0.24091216921806335,
"step": 20
},
{
"epoch": 0.07558578987150416,
"grad_norm": 88.44825744628906,
"learning_rate": 3.636363636363636e-07,
"logits/chosen": 0.16035175323486328,
"logits/rejected": 0.13576187193393707,
"logps/chosen": -67.19379425048828,
"logps/ref_chosen": -67.13701629638672,
"logps/ref_rejected": -75.0597152709961,
"logps/rejected": -75.19458770751953,
"loss": 5.5267,
"margin_dpo/margin_mean": 0.07809482514858246,
"margin_dpo/margin_std": 0.2976154685020447,
"step": 25
},
{
"epoch": 0.09070294784580499,
"grad_norm": 84.55319213867188,
"learning_rate": 4.3939393939393937e-07,
"logits/chosen": 0.17548812925815582,
"logits/rejected": 0.1398809403181076,
"logps/chosen": -63.702415466308594,
"logps/ref_chosen": -63.52486038208008,
"logps/ref_rejected": -87.17579650878906,
"logps/rejected": -87.42718505859375,
"loss": 5.5257,
"margin_dpo/margin_mean": 0.07383458316326141,
"margin_dpo/margin_std": 0.33446845412254333,
"step": 30
},
{
"epoch": 0.10582010582010581,
"grad_norm": 113.05147552490234,
"learning_rate": 4.999860140229787e-07,
"logits/chosen": 0.1826407015323639,
"logits/rejected": 0.14065364003181458,
"logps/chosen": -69.69740295410156,
"logps/ref_chosen": -69.33306884765625,
"logps/ref_rejected": -90.44664764404297,
"logps/rejected": -91.03494262695312,
"loss": 5.4862,
"margin_dpo/margin_mean": 0.2239576280117035,
"margin_dpo/margin_std": 0.5184648633003235,
"step": 35
},
{
"epoch": 0.12093726379440665,
"grad_norm": 88.12389373779297,
"learning_rate": 4.994966691179711e-07,
"logits/chosen": 0.19844599068164825,
"logits/rejected": 0.16636203229427338,
"logps/chosen": -62.56508255004883,
"logps/ref_chosen": -61.858070373535156,
"logps/ref_rejected": -80.20703887939453,
"logps/rejected": -81.1908950805664,
"loss": 5.4345,
"margin_dpo/margin_mean": 0.27684053778648376,
"margin_dpo/margin_std": 0.6676496267318726,
"step": 40
},
{
"epoch": 0.1360544217687075,
"grad_norm": 95.42424011230469,
"learning_rate": 4.983095894354857e-07,
"logits/chosen": 0.21447840332984924,
"logits/rejected": 0.17494550347328186,
"logps/chosen": -62.428611755371094,
"logps/ref_chosen": -61.316184997558594,
"logps/ref_rejected": -82.8878402709961,
"logps/rejected": -84.5842056274414,
"loss": 5.3562,
"margin_dpo/margin_mean": 0.5839391946792603,
"margin_dpo/margin_std": 0.9301155805587769,
"step": 45
},
{
"epoch": 0.15117157974300832,
"grad_norm": 101.37110900878906,
"learning_rate": 4.964280947263676e-07,
"logits/chosen": 0.2370089590549469,
"logits/rejected": 0.20617277920246124,
"logps/chosen": -72.69735717773438,
"logps/ref_chosen": -70.95834350585938,
"logps/ref_rejected": -90.73750305175781,
"logps/rejected": -93.19664001464844,
"loss": 5.3659,
"margin_dpo/margin_mean": 0.7201217412948608,
"margin_dpo/margin_std": 1.4574058055877686,
"step": 50
},
{
"epoch": 0.16628873771730915,
"grad_norm": 94.82052612304688,
"learning_rate": 4.938574467213517e-07,
"logits/chosen": 0.23567061126232147,
"logits/rejected": 0.2179073542356491,
"logps/chosen": -70.44107818603516,
"logps/ref_chosen": -67.76860046386719,
"logps/ref_rejected": -79.32777404785156,
"logps/rejected": -82.77432250976562,
"loss": 5.3203,
"margin_dpo/margin_mean": 0.7740915417671204,
"margin_dpo/margin_std": 1.9899765253067017,
"step": 55
},
{
"epoch": 0.18140589569160998,
"grad_norm": 87.9364242553711,
"learning_rate": 4.906048344162676e-07,
"logits/chosen": 0.29000595211982727,
"logits/rejected": 0.2418111115694046,
"logps/chosen": -70.21463012695312,
"logps/ref_chosen": -66.68450164794922,
"logps/ref_rejected": -94.57969665527344,
"logps/rejected": -99.15840148925781,
"loss": 5.2483,
"margin_dpo/margin_mean": 1.0485769510269165,
"margin_dpo/margin_std": 2.1871743202209473,
"step": 60
},
{
"epoch": 0.1965230536659108,
"grad_norm": 88.09169006347656,
"learning_rate": 4.866793539675126e-07,
"logits/chosen": 0.29900461435317993,
"logits/rejected": 0.2591327428817749,
"logps/chosen": -68.1446304321289,
"logps/ref_chosen": -63.981719970703125,
"logps/ref_rejected": -93.4894790649414,
"logps/rejected": -99.29120635986328,
"loss": 5.2102,
"margin_dpo/margin_mean": 1.6388165950775146,
"margin_dpo/margin_std": 3.181628704071045,
"step": 65
},
{
"epoch": 0.21164021164021163,
"grad_norm": 115.32525634765625,
"learning_rate": 4.820919832540181e-07,
"logits/chosen": 0.2913913130760193,
"logits/rejected": 0.2636095881462097,
"logps/chosen": -72.38068389892578,
"logps/ref_chosen": -67.56693267822266,
"logps/ref_rejected": -82.09600067138672,
"logps/rejected": -88.61162567138672,
"loss": 5.1673,
"margin_dpo/margin_mean": 1.7018649578094482,
"margin_dpo/margin_std": 3.521580219268799,
"step": 70
},
{
"epoch": 0.22675736961451248,
"grad_norm": 81.07879638671875,
"learning_rate": 4.768555511768486e-07,
"logits/chosen": 0.3089480400085449,
"logits/rejected": 0.2643401324748993,
"logps/chosen": -68.97869873046875,
"logps/ref_chosen": -63.85206985473633,
"logps/ref_rejected": -95.73777770996094,
"logps/rejected": -102.76531982421875,
"loss": 5.2271,
"margin_dpo/margin_mean": 1.900897741317749,
"margin_dpo/margin_std": 3.6623237133026123,
"step": 75
},
{
"epoch": 0.2418745275888133,
"grad_norm": 75.76371002197266,
"learning_rate": 4.7098470178228755e-07,
"logits/chosen": 0.26771390438079834,
"logits/rejected": 0.25195786356925964,
"logps/chosen": -71.69243621826172,
"logps/ref_chosen": -66.58103942871094,
"logps/ref_rejected": -79.17964172363281,
"logps/rejected": -85.94296264648438,
"loss": 5.048,
"margin_dpo/margin_mean": 1.6519315242767334,
"margin_dpo/margin_std": 3.1414542198181152,
"step": 80
},
{
"epoch": 0.25699168556311414,
"grad_norm": 93.39420318603516,
"learning_rate": 4.6449585330874425e-07,
"logits/chosen": 0.3026628792285919,
"logits/rejected": 0.2860621213912964,
"logps/chosen": -72.08036041259766,
"logps/ref_chosen": -66.16217041015625,
"logps/ref_rejected": -74.4969711303711,
"logps/rejected": -81.99044036865234,
"loss": 4.9839,
"margin_dpo/margin_mean": 1.5752999782562256,
"margin_dpo/margin_std": 3.1635475158691406,
"step": 85
},
{
"epoch": 0.272108843537415,
"grad_norm": 87.9010009765625,
"learning_rate": 4.5740715227200897e-07,
"logits/chosen": 0.3158201277256012,
"logits/rejected": 0.27566593885421753,
"logps/chosen": -70.36933135986328,
"logps/ref_chosen": -63.90424728393555,
"logps/ref_rejected": -84.99391174316406,
"logps/rejected": -94.2701644897461,
"loss": 4.8563,
"margin_dpo/margin_mean": 2.811182737350464,
"margin_dpo/margin_std": 3.8479418754577637,
"step": 90
},
{
"epoch": 0.2872260015117158,
"grad_norm": 87.73194885253906,
"learning_rate": 4.4973842271726024e-07,
"logits/chosen": 0.30465131998062134,
"logits/rejected": 0.28732430934906006,
"logps/chosen": -77.35557556152344,
"logps/ref_chosen": -69.69574737548828,
"logps/ref_rejected": -86.1413803100586,
"logps/rejected": -96.3828125,
"loss": 4.8249,
"margin_dpo/margin_mean": 2.5815961360931396,
"margin_dpo/margin_std": 4.658370494842529,
"step": 95
},
{
"epoch": 0.30234315948601664,
"grad_norm": 125.7916030883789,
"learning_rate": 4.415111107797445e-07,
"logits/chosen": 0.3128359615802765,
"logits/rejected": 0.2995418310165405,
"logps/chosen": -75.94481658935547,
"logps/ref_chosen": -67.1341323852539,
"logps/ref_rejected": -76.26823425292969,
"logps/rejected": -87.71977996826172,
"loss": 4.7625,
"margin_dpo/margin_mean": 2.640864133834839,
"margin_dpo/margin_std": 4.733086585998535,
"step": 100
},
{
"epoch": 0.31746031746031744,
"grad_norm": 86.5322036743164,
"learning_rate": 4.327482247091679e-07,
"logits/chosen": 0.32607540488243103,
"logits/rejected": 0.257303923368454,
"logps/chosen": -79.38796997070312,
"logps/ref_chosen": -68.39376068115234,
"logps/ref_rejected": -109.96052551269531,
"logps/rejected": -124.94000244140625,
"loss": 4.6261,
"margin_dpo/margin_mean": 3.985279083251953,
"margin_dpo/margin_std": 5.20723819732666,
"step": 105
},
{
"epoch": 0.3325774754346183,
"grad_norm": 149.49224853515625,
"learning_rate": 4.234742705255272e-07,
"logits/chosen": 0.3160625696182251,
"logits/rejected": 0.2818891406059265,
"logps/chosen": -81.50082397460938,
"logps/ref_chosen": -69.2634048461914,
"logps/ref_rejected": -86.07990264892578,
"logps/rejected": -102.79105377197266,
"loss": 4.5827,
"margin_dpo/margin_mean": 4.473735332489014,
"margin_dpo/margin_std": 6.190529823303223,
"step": 110
},
{
"epoch": 0.3476946334089191,
"grad_norm": 90.54156494140625,
"learning_rate": 4.137151834863213e-07,
"logits/chosen": 0.3247663080692291,
"logits/rejected": 0.2914278209209442,
"logps/chosen": -69.39289855957031,
"logps/ref_chosen": -56.60735321044922,
"logps/ref_rejected": -84.99754333496094,
"logps/rejected": -102.23129272460938,
"loss": 4.4996,
"margin_dpo/margin_mean": 4.448202133178711,
"margin_dpo/margin_std": 6.064272880554199,
"step": 115
},
{
"epoch": 0.36281179138321995,
"grad_norm": 94.05331420898438,
"learning_rate": 4.0349825555680045e-07,
"logits/chosen": 0.3232622742652893,
"logits/rejected": 0.292279452085495,
"logps/chosen": -87.42243957519531,
"logps/ref_chosen": -73.8878173828125,
"logps/ref_rejected": -85.6266098022461,
"logps/rejected": -103.971435546875,
"loss": 4.5135,
"margin_dpo/margin_mean": 4.81019401550293,
"margin_dpo/margin_std": 7.502870082855225,
"step": 120
},
{
"epoch": 0.3779289493575208,
"grad_norm": 142.06993103027344,
"learning_rate": 3.9285205908608934e-07,
"logits/chosen": 0.3267795145511627,
"logits/rejected": 0.31954893469810486,
"logps/chosen": -79.02787017822266,
"logps/ref_chosen": -67.94248962402344,
"logps/ref_rejected": -77.52119445800781,
"logps/rejected": -94.49152374267578,
"loss": 4.4618,
"margin_dpo/margin_mean": 5.884944915771484,
"margin_dpo/margin_std": 7.9281463623046875,
"step": 125
},
{
"epoch": 0.3930461073318216,
"grad_norm": 106.90230560302734,
"learning_rate": 3.818063669026256e-07,
"logits/chosen": 0.3250274062156677,
"logits/rejected": 0.3016881048679352,
"logps/chosen": -74.74659729003906,
"logps/ref_chosen": -64.16435241699219,
"logps/ref_rejected": -80.56021881103516,
"logps/rejected": -96.44326782226562,
"loss": 4.3914,
"margin_dpo/margin_mean": 5.300816535949707,
"margin_dpo/margin_std": 8.750526428222656,
"step": 130
},
{
"epoch": 0.40816326530612246,
"grad_norm": 98.56690216064453,
"learning_rate": 3.7039206905237656e-07,
"logits/chosen": 0.35155966877937317,
"logits/rejected": 0.3246091604232788,
"logps/chosen": -74.25929260253906,
"logps/ref_chosen": -64.49832916259766,
"logps/ref_rejected": -79.48457336425781,
"logps/rejected": -95.14707946777344,
"loss": 4.387,
"margin_dpo/margin_mean": 5.901560306549072,
"margin_dpo/margin_std": 8.794408798217773,
"step": 135
},
{
"epoch": 0.42328042328042326,
"grad_norm": 90.99429321289062,
"learning_rate": 3.586410864126781e-07,
"logits/chosen": 0.3063794672489166,
"logits/rejected": 0.2815917134284973,
"logps/chosen": -79.69918823242188,
"logps/ref_chosen": -69.09117889404297,
"logps/ref_rejected": -82.47627258300781,
"logps/rejected": -99.3084487915039,
"loss": 4.2096,
"margin_dpo/margin_mean": 6.2241644859313965,
"margin_dpo/margin_std": 9.110525131225586,
"step": 140
},
{
"epoch": 0.4383975812547241,
"grad_norm": 95.44803619384766,
"learning_rate": 3.465862814232821e-07,
"logits/chosen": 0.3120853900909424,
"logits/rejected": 0.29045969247817993,
"logps/chosen": -73.12217712402344,
"logps/ref_chosen": -63.355613708496094,
"logps/ref_rejected": -80.33558654785156,
"logps/rejected": -97.30397033691406,
"loss": 4.152,
"margin_dpo/margin_mean": 7.2018280029296875,
"margin_dpo/margin_std": 9.665465354919434,
"step": 145
},
{
"epoch": 0.45351473922902497,
"grad_norm": 105.1060562133789,
"learning_rate": 3.3426136618426043e-07,
"logits/chosen": 0.3459467589855194,
"logits/rejected": 0.3136577606201172,
"logps/chosen": -82.36650085449219,
"logps/ref_chosen": -68.49040985107422,
"logps/ref_rejected": -86.91236877441406,
"logps/rejected": -107.78336334228516,
"loss": 4.1192,
"margin_dpo/margin_mean": 6.994899749755859,
"margin_dpo/margin_std": 10.663551330566406,
"step": 150
},
{
"epoch": 0.46863189720332576,
"grad_norm": 108.24873352050781,
"learning_rate": 3.2170080817777257e-07,
"logits/chosen": 0.38198521733283997,
"logits/rejected": 0.33321067690849304,
"logps/chosen": -67.14759826660156,
"logps/ref_chosen": -52.3751220703125,
"logps/ref_rejected": -86.2134017944336,
"logps/rejected": -108.62422180175781,
"loss": 4.551,
"margin_dpo/margin_mean": 7.638341426849365,
"margin_dpo/margin_std": 8.237049102783203,
"step": 155
},
{
"epoch": 0.4837490551776266,
"grad_norm": 304.49713134765625,
"learning_rate": 3.0893973387735683e-07,
"logits/chosen": 0.35799717903137207,
"logits/rejected": 0.3205992877483368,
"logps/chosen": -74.4216537475586,
"logps/ref_chosen": -60.02544403076172,
"logps/ref_rejected": -82.36589050292969,
"logps/rejected": -104.4999771118164,
"loss": 4.3222,
"margin_dpo/margin_mean": 7.7378830909729,
"margin_dpo/margin_std": 10.117125511169434,
"step": 160
},
{
"epoch": 0.4988662131519274,
"grad_norm": 134.80690002441406,
"learning_rate": 2.9601383051430505e-07,
"logits/chosen": 0.3366810381412506,
"logits/rejected": 0.30750972032546997,
"logps/chosen": -75.96559143066406,
"logps/ref_chosen": -62.833656311035156,
"logps/ref_rejected": -78.71475982666016,
"logps/rejected": -99.648193359375,
"loss": 4.2507,
"margin_dpo/margin_mean": 7.801492214202881,
"margin_dpo/margin_std": 8.849695205688477,
"step": 165
},
{
"epoch": 0.5139833711262283,
"grad_norm": 82.21798706054688,
"learning_rate": 2.8295924627584004e-07,
"logits/chosen": 0.3062411844730377,
"logits/rejected": 0.26870957016944885,
"logps/chosen": -79.08585357666016,
"logps/ref_chosen": -64.85685729980469,
"logps/ref_rejected": -86.95464324951172,
"logps/rejected": -108.34187316894531,
"loss": 4.3014,
"margin_dpo/margin_mean": 7.158215522766113,
"margin_dpo/margin_std": 9.102995872497559,
"step": 170
},
{
"epoch": 0.5291005291005291,
"grad_norm": 96.37153625488281,
"learning_rate": 2.698124892141971e-07,
"logits/chosen": 0.3409620225429535,
"logits/rejected": 0.31324949860572815,
"logps/chosen": -82.41763305664062,
"logps/ref_chosen": -68.58393859863281,
"logps/ref_rejected": -87.6428451538086,
"logps/rejected": -108.9012680053711,
"loss": 4.2173,
"margin_dpo/margin_mean": 7.424722194671631,
"margin_dpo/margin_std": 9.777175903320312,
"step": 175
},
{
"epoch": 0.54421768707483,
"grad_norm": 96.39199829101562,
"learning_rate": 2.5661032514931834e-07,
"logits/chosen": 0.33950427174568176,
"logits/rejected": 0.2805086672306061,
"logps/chosen": -78.31340026855469,
"logps/ref_chosen": -64.23633575439453,
"logps/ref_rejected": -93.94469451904297,
"logps/rejected": -116.1845703125,
"loss": 4.2657,
"margin_dpo/margin_mean": 8.162816047668457,
"margin_dpo/margin_std": 11.162989616394043,
"step": 180
},
{
"epoch": 0.5593348450491308,
"grad_norm": 182.84828186035156,
"learning_rate": 2.4338967485068164e-07,
"logits/chosen": 0.32963842153549194,
"logits/rejected": 0.2764475643634796,
"logps/chosen": -69.95085144042969,
"logps/ref_chosen": -58.443382263183594,
"logps/ref_rejected": -90.00855255126953,
"logps/rejected": -109.18995666503906,
"loss": 4.1988,
"margin_dpo/margin_mean": 7.6739397048950195,
"margin_dpo/margin_std": 9.626836776733398,
"step": 185
},
{
"epoch": 0.5744520030234316,
"grad_norm": 91.07161712646484,
"learning_rate": 2.3018751078580283e-07,
"logits/chosen": 0.3513370752334595,
"logits/rejected": 0.31120267510414124,
"logps/chosen": -69.91144561767578,
"logps/ref_chosen": -59.20270919799805,
"logps/ref_rejected": -80.10565185546875,
"logps/rejected": -98.33421325683594,
"loss": 4.3202,
"margin_dpo/margin_mean": 7.519822597503662,
"margin_dpo/margin_std": 10.744632720947266,
"step": 190
},
{
"epoch": 0.5895691609977324,
"grad_norm": 102.23469543457031,
"learning_rate": 2.170407537241599e-07,
"logits/chosen": 0.3388921618461609,
"logits/rejected": 0.31254586577415466,
"logps/chosen": -83.7293701171875,
"logps/ref_chosen": -71.23518371582031,
"logps/ref_rejected": -87.15229797363281,
"logps/rejected": -105.95997619628906,
"loss": 4.2702,
"margin_dpo/margin_mean": 6.3134870529174805,
"margin_dpo/margin_std": 9.368511199951172,
"step": 195
},
{
"epoch": 0.6046863189720333,
"grad_norm": 137.64295959472656,
"learning_rate": 2.0398616948569493e-07,
"logits/chosen": 0.3222205638885498,
"logits/rejected": 0.2856692671775818,
"logps/chosen": -87.09626770019531,
"logps/ref_chosen": -75.97196960449219,
"logps/ref_rejected": -96.42564392089844,
"logps/rejected": -115.5928955078125,
"loss": 4.146,
"margin_dpo/margin_mean": 8.04294204711914,
"margin_dpo/margin_std": 9.636971473693848,
"step": 200
},
{
"epoch": 0.6198034769463341,
"grad_norm": 85.71459197998047,
"learning_rate": 1.9106026612264315e-07,
"logits/chosen": 0.3130974769592285,
"logits/rejected": 0.2715243697166443,
"logps/chosen": -72.00007629394531,
"logps/ref_chosen": -62.02211380004883,
"logps/ref_rejected": -76.29920196533203,
"logps/rejected": -93.18082427978516,
"loss": 3.9771,
"margin_dpo/margin_mean": 6.903660774230957,
"margin_dpo/margin_std": 9.337553024291992,
"step": 205
},
{
"epoch": 0.6349206349206349,
"grad_norm": 196.58677673339844,
"learning_rate": 1.782991918222275e-07,
"logits/chosen": 0.296235591173172,
"logits/rejected": 0.2868461012840271,
"logps/chosen": -72.46338653564453,
"logps/ref_chosen": -60.48310470581055,
"logps/ref_rejected": -72.61172485351562,
"logps/rejected": -90.38494873046875,
"loss": 4.3204,
"margin_dpo/margin_mean": 5.792932987213135,
"margin_dpo/margin_std": 10.098730087280273,
"step": 210
},
{
"epoch": 0.6500377928949358,
"grad_norm": 165.42698669433594,
"learning_rate": 1.6573863381573954e-07,
"logits/chosen": 0.31669360399246216,
"logits/rejected": 0.2738528847694397,
"logps/chosen": -74.34906005859375,
"logps/ref_chosen": -63.61262130737305,
"logps/ref_rejected": -81.14451599121094,
"logps/rejected": -99.35204315185547,
"loss": 4.317,
"margin_dpo/margin_mean": 7.471091270446777,
"margin_dpo/margin_std": 9.406660079956055,
"step": 215
},
{
"epoch": 0.6651549508692366,
"grad_norm": 126.4444580078125,
"learning_rate": 1.534137185767178e-07,
"logits/chosen": 0.3267589211463928,
"logits/rejected": 0.29430729150772095,
"logps/chosen": -70.58761596679688,
"logps/ref_chosen": -59.307090759277344,
"logps/ref_rejected": -79.07124328613281,
"logps/rejected": -98.34722900390625,
"loss": 4.2117,
"margin_dpo/margin_mean": 7.995469570159912,
"margin_dpo/margin_std": 10.366331100463867,
"step": 220
},
{
"epoch": 0.6802721088435374,
"grad_norm": 199.91612243652344,
"learning_rate": 1.4135891358732205e-07,
"logits/chosen": 0.32855069637298584,
"logits/rejected": 0.2847681939601898,
"logps/chosen": -69.90028381347656,
"logps/ref_chosen": -57.278472900390625,
"logps/ref_rejected": -80.68949890136719,
"logps/rejected": -101.09757995605469,
"loss": 4.1752,
"margin_dpo/margin_mean": 7.786253452301025,
"margin_dpo/margin_std": 8.894170761108398,
"step": 225
},
{
"epoch": 0.6953892668178382,
"grad_norm": 112.55675506591797,
"learning_rate": 1.2960793094762345e-07,
"logits/chosen": 0.31637102365493774,
"logits/rejected": 0.2705737352371216,
"logps/chosen": -67.14997863769531,
"logps/ref_chosen": -55.35211181640625,
"logps/ref_rejected": -74.94232177734375,
"logps/rejected": -93.53108215332031,
"loss": 4.0339,
"margin_dpo/margin_mean": 6.790894508361816,
"margin_dpo/margin_std": 8.641973495483398,
"step": 230
},
{
"epoch": 0.7105064247921391,
"grad_norm": 112.31910705566406,
"learning_rate": 1.1819363309737438e-07,
"logits/chosen": 0.308750182390213,
"logits/rejected": 0.281294584274292,
"logps/chosen": -85.90498352050781,
"logps/ref_chosen": -72.5401611328125,
"logps/ref_rejected": -87.4328842163086,
"logps/rejected": -108.58726501464844,
"loss": 4.0637,
"margin_dpo/margin_mean": 7.789558410644531,
"margin_dpo/margin_std": 8.866066932678223,
"step": 235
},
{
"epoch": 0.7256235827664399,
"grad_norm": 185.29803466796875,
"learning_rate": 1.0714794091391072e-07,
"logits/chosen": 0.26519325375556946,
"logits/rejected": 0.22336944937705994,
"logps/chosen": -76.12914276123047,
"logps/ref_chosen": -63.8089714050293,
"logps/ref_rejected": -82.06550598144531,
"logps/rejected": -101.86104583740234,
"loss": 4.3151,
"margin_dpo/margin_mean": 7.475350856781006,
"margin_dpo/margin_std": 10.75648021697998,
"step": 240
},
{
"epoch": 0.7407407407407407,
"grad_norm": 149.37997436523438,
"learning_rate": 9.650174444319956e-08,
"logits/chosen": 0.30272477865219116,
"logits/rejected": 0.254997581243515,
"logps/chosen": -84.19644165039062,
"logps/ref_chosen": -70.94512939453125,
"logps/ref_rejected": -93.30872344970703,
"logps/rejected": -113.64608001708984,
"loss": 4.3435,
"margin_dpo/margin_mean": 7.086047172546387,
"margin_dpo/margin_std": 9.911200523376465,
"step": 245
},
{
"epoch": 0.7558578987150416,
"grad_norm": 140.13421630859375,
"learning_rate": 8.628481651367875e-08,
"logits/chosen": 0.3051221966743469,
"logits/rejected": 0.2856391370296478,
"logps/chosen": -80.27454376220703,
"logps/ref_chosen": -69.08379364013672,
"logps/ref_rejected": -80.87445831298828,
"logps/rejected": -99.18595886230469,
"loss": 4.3199,
"margin_dpo/margin_mean": 7.1207475662231445,
"margin_dpo/margin_std": 9.214082717895508,
"step": 250
},
{
"epoch": 0.7709750566893424,
"grad_norm": 304.12945556640625,
"learning_rate": 7.652572947447272e-08,
"logits/chosen": 0.32066917419433594,
"logits/rejected": 0.28739050030708313,
"logps/chosen": -71.73036193847656,
"logps/ref_chosen": -60.4463005065918,
"logps/ref_rejected": -81.14623260498047,
"logps/rejected": -100.41820526123047,
"loss": 4.1994,
"margin_dpo/margin_mean": 7.987916469573975,
"margin_dpo/margin_std": 9.643037796020508,
"step": 255
},
{
"epoch": 0.7860922146636432,
"grad_norm": 105.7574691772461,
"learning_rate": 6.725177529083209e-08,
"logits/chosen": 0.3291199207305908,
"logits/rejected": 0.28542113304138184,
"logps/chosen": -69.7227554321289,
"logps/ref_chosen": -56.893035888671875,
"logps/ref_rejected": -75.53782653808594,
"logps/rejected": -94.80742645263672,
"loss": 4.2701,
"margin_dpo/margin_mean": 6.439896583557129,
"margin_dpo/margin_std": 9.173990249633789,
"step": 260
},
{
"epoch": 0.8012093726379441,
"grad_norm": 233.89466857910156,
"learning_rate": 5.848888922025552e-08,
"logits/chosen": 0.30791908502578735,
"logits/rejected": 0.2965497076511383,
"logps/chosen": -85.73365783691406,
"logps/ref_chosen": -73.87647247314453,
"logps/ref_rejected": -87.94293212890625,
"logps/rejected": -107.0521469116211,
"loss": 4.3947,
"margin_dpo/margin_mean": 7.252041816711426,
"margin_dpo/margin_std": 9.087135314941406,
"step": 265
},
{
"epoch": 0.8163265306122449,
"grad_norm": 147.993896484375,
"learning_rate": 5.026157728273966e-08,
"logits/chosen": 0.34359079599380493,
"logits/rejected": 0.30148428678512573,
"logps/chosen": -74.4441146850586,
"logps/ref_chosen": -62.79448318481445,
"logps/ref_rejected": -83.41739654541016,
"logps/rejected": -102.64530944824219,
"loss": 4.1986,
"margin_dpo/margin_mean": 7.578291416168213,
"margin_dpo/margin_std": 8.663103103637695,
"step": 270
},
{
"epoch": 0.8314436885865457,
"grad_norm": 155.3345489501953,
"learning_rate": 4.259284772799099e-08,
"logits/chosen": 0.3387976288795471,
"logits/rejected": 0.31123754382133484,
"logps/chosen": -84.95855712890625,
"logps/ref_chosen": -72.84954833984375,
"logps/ref_rejected": -83.61592102050781,
"logps/rejected": -103.26789855957031,
"loss": 4.087,
"margin_dpo/margin_mean": 7.54297399520874,
"margin_dpo/margin_std": 8.927976608276367,
"step": 275
},
{
"epoch": 0.8465608465608465,
"grad_norm": 287.4856872558594,
"learning_rate": 3.550414669125573e-08,
"logits/chosen": 0.31973937153816223,
"logits/rejected": 0.2855887711048126,
"logps/chosen": -82.02851867675781,
"logps/ref_chosen": -69.28929138183594,
"logps/ref_rejected": -91.50147247314453,
"logps/rejected": -111.91573333740234,
"loss": 4.1572,
"margin_dpo/margin_mean": 7.6750168800354,
"margin_dpo/margin_std": 9.739129066467285,
"step": 280
},
{
"epoch": 0.8616780045351474,
"grad_norm": 226.05104064941406,
"learning_rate": 2.9015298217712453e-08,
"logits/chosen": 0.3073372542858124,
"logits/rejected": 0.26051101088523865,
"logps/chosen": -69.77713012695312,
"logps/ref_chosen": -59.243553161621094,
"logps/ref_rejected": -84.64031219482422,
"logps/rejected": -103.9446792602539,
"loss": 3.9796,
"margin_dpo/margin_mean": 8.770793914794922,
"margin_dpo/margin_std": 9.0573148727417,
"step": 285
},
{
"epoch": 0.8767951625094482,
"grad_norm": 85.13925170898438,
"learning_rate": 2.3144448823151392e-08,
"logits/chosen": 0.33762457966804504,
"logits/rejected": 0.3229420781135559,
"logps/chosen": -85.04512023925781,
"logps/ref_chosen": -72.36077117919922,
"logps/ref_rejected": -79.59184265136719,
"logps/rejected": -98.2451171875,
"loss": 4.3675,
"margin_dpo/margin_mean": 5.968916416168213,
"margin_dpo/margin_std": 8.47230339050293,
"step": 290
},
{
"epoch": 0.891912320483749,
"grad_norm": 309.4778747558594,
"learning_rate": 1.7908016745981856e-08,
"logits/chosen": 0.34688106179237366,
"logits/rejected": 0.31043171882629395,
"logps/chosen": -72.95063781738281,
"logps/ref_chosen": -60.1508674621582,
"logps/ref_rejected": -82.7198486328125,
"logps/rejected": -102.7656478881836,
"loss": 4.2038,
"margin_dpo/margin_mean": 7.2460432052612305,
"margin_dpo/margin_std": 9.161026000976562,
"step": 295
},
{
"epoch": 0.9070294784580499,
"grad_norm": 99.49198913574219,
"learning_rate": 1.3320646032487393e-08,
"logits/chosen": 0.32416361570358276,
"logits/rejected": 0.27511733770370483,
"logps/chosen": -63.5518913269043,
"logps/ref_chosen": -52.43305587768555,
"logps/ref_rejected": -72.8746566772461,
"logps/rejected": -90.66789245605469,
"loss": 4.4178,
"margin_dpo/margin_mean": 6.674383640289307,
"margin_dpo/margin_std": 8.800198554992676,
"step": 300
},
{
"epoch": 0.9221466364323507,
"grad_norm": 233.15171813964844,
"learning_rate": 9.395165583732379e-09,
"logits/chosen": 0.3059995770454407,
"logits/rejected": 0.29525426030158997,
"logps/chosen": -76.87275695800781,
"logps/ref_chosen": -65.71403503417969,
"logps/ref_rejected": -77.94856262207031,
"logps/rejected": -96.7153549194336,
"loss": 4.1559,
"margin_dpo/margin_mean": 7.6080756187438965,
"margin_dpo/margin_std": 9.828929901123047,
"step": 305
},
{
"epoch": 0.9372637944066515,
"grad_norm": 172.29246520996094,
"learning_rate": 6.142553278648238e-09,
"logits/chosen": 0.30058151483535767,
"logits/rejected": 0.2866828441619873,
"logps/chosen": -79.814208984375,
"logps/ref_chosen": -68.97371673583984,
"logps/ref_rejected": -81.00027465820312,
"logps/rejected": -100.09336853027344,
"loss": 4.1113,
"margin_dpo/margin_mean": 8.252619743347168,
"margin_dpo/margin_std": 8.724787712097168,
"step": 310
},
{
"epoch": 0.9523809523809523,
"grad_norm": 125.14547729492188,
"learning_rate": 3.5719052736323806e-09,
"logits/chosen": 0.35351628065109253,
"logits/rejected": 0.31507110595703125,
"logps/chosen": -72.15638732910156,
"logps/ref_chosen": -61.5161247253418,
"logps/ref_rejected": -87.0755844116211,
"logps/rejected": -105.89952087402344,
"loss": 4.0851,
"margin_dpo/margin_mean": 8.183670997619629,
"margin_dpo/margin_std": 9.665485382080078,
"step": 315
},
{
"epoch": 0.9674981103552532,
"grad_norm": 307.77130126953125,
"learning_rate": 1.690410564514244e-09,
"logits/chosen": 0.33188897371292114,
"logits/rejected": 0.296653687953949,
"logps/chosen": -84.79729461669922,
"logps/ref_chosen": -72.22561645507812,
"logps/ref_rejected": -89.82965087890625,
"logps/rejected": -109.34381103515625,
"loss": 4.1523,
"margin_dpo/margin_mean": 6.942486763000488,
"margin_dpo/margin_std": 9.199056625366211,
"step": 320
},
{
"epoch": 0.982615268329554,
"grad_norm": 99.29190826416016,
"learning_rate": 5.033308820289184e-10,
"logits/chosen": 0.30717378854751587,
"logits/rejected": 0.271228551864624,
"logps/chosen": -76.24370574951172,
"logps/ref_chosen": -63.10752487182617,
"logps/ref_rejected": -82.31756591796875,
"logps/rejected": -102.01248168945312,
"loss": 4.143,
"margin_dpo/margin_mean": 6.558733940124512,
"margin_dpo/margin_std": 9.522359848022461,
"step": 325
},
{
"epoch": 0.9977324263038548,
"grad_norm": 134.73052978515625,
"learning_rate": 1.3985977021235829e-11,
"logits/chosen": 0.33866098523139954,
"logits/rejected": 0.29104405641555786,
"logps/chosen": -79.09715270996094,
"logps/ref_chosen": -67.11092376708984,
"logps/ref_rejected": -92.10676574707031,
"logps/rejected": -112.00486755371094,
"loss": 4.2478,
"margin_dpo/margin_mean": 7.911886692047119,
"margin_dpo/margin_std": 10.329122543334961,
"step": 330
},
{
"epoch": 0.9977324263038548,
"step": 330,
"total_flos": 0.0,
"train_loss": 4.569111451235685,
"train_runtime": 1865.9225,
"train_samples_per_second": 22.689,
"train_steps_per_second": 0.177
}
],
"logging_steps": 5,
"max_steps": 330,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}