Files
qwen3-8b-base-orpo-ultrafee…/trainer_state.json
ModelHub XC 801d57b5a2 初始化项目,由ModelHub XC社区提供模型
Model: jackf857/qwen3-8b-base-orpo-ultrafeedback-4xh200-batch-128
Source: Original Platform
2026-05-31 14:11:53 +08:00

946 lines
34 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9989528795811519,
"eval_steps": 200,
"global_step": 477,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0020942408376963353,
"grad_norm": 37.46305465698242,
"learning_rate": 0.0,
"log_odds_chosen": 0.35378384590148926,
"log_odds_ratio": -0.6519296765327454,
"logits/chosen": 2.203179359436035,
"logits/rejected": 2.035616397857666,
"logps/chosen": -1.1535288095474243,
"logps/rejected": -1.4391145706176758,
"loss": 10.2211,
"nll_loss": 1.4494060277938843,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.011535286903381348,
"rewards/margins": 0.002855856902897358,
"rewards/rejected": -0.014391143806278706,
"step": 1
},
{
"epoch": 0.020942408376963352,
"grad_norm": 37.87759780883789,
"learning_rate": 9.375e-08,
"log_odds_chosen": 0.30660638213157654,
"log_odds_ratio": -0.662986159324646,
"logits/chosen": 1.9456572532653809,
"logits/rejected": 1.8670408725738525,
"logps/chosen": -1.1083024740219116,
"logps/rejected": -1.3244930505752563,
"loss": 10.1264,
"nll_loss": 1.2528527975082397,
"rewards/accuracies": 0.6076388955116272,
"rewards/chosen": -0.011083023622632027,
"rewards/margins": 0.002161906799301505,
"rewards/rejected": -0.013244930654764175,
"step": 10
},
{
"epoch": 0.041884816753926704,
"grad_norm": 40.62479782104492,
"learning_rate": 1.9791666666666664e-07,
"log_odds_chosen": 0.26383697986602783,
"log_odds_ratio": -0.6774462461471558,
"logits/chosen": 1.8936617374420166,
"logits/rejected": 1.8155641555786133,
"logps/chosen": -1.129002332687378,
"logps/rejected": -1.3111597299575806,
"loss": 9.8951,
"nll_loss": 1.2187750339508057,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.011290023103356361,
"rewards/margins": 0.001821571378968656,
"rewards/rejected": -0.013111594133079052,
"step": 20
},
{
"epoch": 0.06282722513089005,
"grad_norm": 36.72233963012695,
"learning_rate": 3.020833333333333e-07,
"log_odds_chosen": 0.15129676461219788,
"log_odds_ratio": -0.7099167704582214,
"logits/chosen": 1.9489176273345947,
"logits/rejected": 1.9070332050323486,
"logps/chosen": -1.0984728336334229,
"logps/rejected": -1.2049810886383057,
"loss": 10.154,
"nll_loss": 1.2440111637115479,
"rewards/accuracies": 0.578125,
"rewards/chosen": -0.010984729044139385,
"rewards/margins": 0.0010650831973180175,
"rewards/rejected": -0.012049810960888863,
"step": 30
},
{
"epoch": 0.08376963350785341,
"grad_norm": 31.673852920532227,
"learning_rate": 4.0625e-07,
"log_odds_chosen": 0.2637297511100769,
"log_odds_ratio": -0.6847748160362244,
"logits/chosen": 1.778116226196289,
"logits/rejected": 1.79119873046875,
"logps/chosen": -1.0361906290054321,
"logps/rejected": -1.2129265069961548,
"loss": 9.5835,
"nll_loss": 1.1749727725982666,
"rewards/accuracies": 0.590624988079071,
"rewards/chosen": -0.010361905209720135,
"rewards/margins": 0.0017673596739768982,
"rewards/rejected": -0.012129265815019608,
"step": 40
},
{
"epoch": 0.10471204188481675,
"grad_norm": 16.47711753845215,
"learning_rate": 4.999932966293553e-07,
"log_odds_chosen": 0.3281434178352356,
"log_odds_ratio": -0.6789900064468384,
"logits/chosen": 1.996852159500122,
"logits/rejected": 2.0322041511535645,
"logps/chosen": -0.9071202278137207,
"logps/rejected": -1.1231104135513306,
"loss": 9.6245,
"nll_loss": 1.200535535812378,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.009071202017366886,
"rewards/margins": 0.0021599007304757833,
"rewards/rejected": -0.011231102980673313,
"step": 50
},
{
"epoch": 0.1256544502617801,
"grad_norm": 17.272981643676758,
"learning_rate": 4.991893270335525e-07,
"log_odds_chosen": 0.2221679985523224,
"log_odds_ratio": -0.7138159275054932,
"logits/chosen": 1.8936437368392944,
"logits/rejected": 1.8823055028915405,
"logps/chosen": -0.9890943765640259,
"logps/rejected": -1.1324011087417603,
"loss": 9.5413,
"nll_loss": 1.162929892539978,
"rewards/accuracies": 0.5406249761581421,
"rewards/chosen": -0.009890943765640259,
"rewards/margins": 0.0014330670237541199,
"rewards/rejected": -0.011324010789394379,
"step": 60
},
{
"epoch": 0.14659685863874344,
"grad_norm": 12.330283164978027,
"learning_rate": 4.970496218214204e-07,
"log_odds_chosen": 0.27448800206184387,
"log_odds_ratio": -0.697861909866333,
"logits/chosen": 2.0014004707336426,
"logits/rejected": 2.046846866607666,
"logps/chosen": -0.961329460144043,
"logps/rejected": -1.1528202295303345,
"loss": 9.2376,
"nll_loss": 1.1857097148895264,
"rewards/accuracies": 0.578125,
"rewards/chosen": -0.009613295085728168,
"rewards/margins": 0.0019149081781506538,
"rewards/rejected": -0.011528202332556248,
"step": 70
},
{
"epoch": 0.16753926701570682,
"grad_norm": 11.02938175201416,
"learning_rate": 4.935856505068998e-07,
"log_odds_chosen": 0.31524786353111267,
"log_odds_ratio": -0.6606825590133667,
"logits/chosen": 1.8413927555084229,
"logits/rejected": 1.8493198156356812,
"logps/chosen": -0.9049292802810669,
"logps/rejected": -1.0926640033721924,
"loss": 8.9813,
"nll_loss": 1.1445410251617432,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.009049292653799057,
"rewards/margins": 0.0018773479387164116,
"rewards/rejected": -0.010926639661192894,
"step": 80
},
{
"epoch": 0.18848167539267016,
"grad_norm": 9.860730171203613,
"learning_rate": 4.8881598109976e-07,
"log_odds_chosen": 0.3749118447303772,
"log_odds_ratio": -0.6490113139152527,
"logits/chosen": 1.7958896160125732,
"logits/rejected": 1.7599939107894897,
"logps/chosen": -0.8722783923149109,
"logps/rejected": -1.1078213453292847,
"loss": 9.2508,
"nll_loss": 1.0980162620544434,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.008722783997654915,
"rewards/margins": 0.0023554288782179356,
"rewards/rejected": -0.011078213341534138,
"step": 90
},
{
"epoch": 0.2094240837696335,
"grad_norm": 10.169231414794922,
"learning_rate": 4.827661805750437e-07,
"log_odds_chosen": 0.32181161642074585,
"log_odds_ratio": -0.6511259078979492,
"logits/chosen": 1.7957969903945923,
"logits/rejected": 1.779897689819336,
"logps/chosen": -0.8846995234489441,
"logps/rejected": -1.0705549716949463,
"loss": 9.0284,
"nll_loss": 1.08339524269104,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.008846994489431381,
"rewards/margins": 0.0018585551297292113,
"rewards/rejected": -0.010705549269914627,
"step": 100
},
{
"epoch": 0.23036649214659685,
"grad_norm": 11.416997909545898,
"learning_rate": 4.75468677825789e-07,
"log_odds_chosen": 0.4410727918148041,
"log_odds_ratio": -0.6154537796974182,
"logits/chosen": 1.8986194133758545,
"logits/rejected": 1.9162569046020508,
"logps/chosen": -0.8505121469497681,
"logps/rejected": -1.1303095817565918,
"loss": 9.0025,
"nll_loss": 1.1048234701156616,
"rewards/accuracies": 0.659375011920929,
"rewards/chosen": -0.008505119942128658,
"rewards/margins": 0.002797975903376937,
"rewards/rejected": -0.011303097009658813,
"step": 110
},
{
"epoch": 0.2513089005235602,
"grad_norm": 8.807473182678223,
"learning_rate": 4.669625898336438e-07,
"log_odds_chosen": 0.2529251277446747,
"log_odds_ratio": -0.6907952427864075,
"logits/chosen": 1.961059808731079,
"logits/rejected": 1.9387576580047607,
"logps/chosen": -0.9018535614013672,
"logps/rejected": -1.0625946521759033,
"loss": 8.9547,
"nll_loss": 1.0763670206069946,
"rewards/accuracies": 0.5531250238418579,
"rewards/chosen": -0.009018534794449806,
"rewards/margins": 0.0016074117738753557,
"rewards/rejected": -0.010625948198139668,
"step": 120
},
{
"epoch": 0.27225130890052357,
"grad_norm": 8.736103057861328,
"learning_rate": 4.5729351198915705e-07,
"log_odds_chosen": 0.34425991773605347,
"log_odds_ratio": -0.6504599452018738,
"logits/chosen": 1.858128309249878,
"logits/rejected": 1.95168936252594,
"logps/chosen": -0.8997282981872559,
"logps/rejected": -1.0928587913513184,
"loss": 9.0819,
"nll_loss": 1.0748308897018433,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.00899728387594223,
"rewards/margins": 0.0019313046941533685,
"rewards/rejected": -0.01092858798801899,
"step": 130
},
{
"epoch": 0.2931937172774869,
"grad_norm": 8.833135604858398,
"learning_rate": 4.4651327368569684e-07,
"log_odds_chosen": 0.334553062915802,
"log_odds_ratio": -0.6699846982955933,
"logits/chosen": 1.8222471475601196,
"logits/rejected": 1.8125450611114502,
"logps/chosen": -0.899248480796814,
"logps/rejected": -1.1075996160507202,
"loss": 9.0727,
"nll_loss": 1.161645531654358,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.008992486633360386,
"rewards/margins": 0.002083510160446167,
"rewards/rejected": -0.011075995862483978,
"step": 140
},
{
"epoch": 0.31413612565445026,
"grad_norm": 8.58157730102539,
"learning_rate": 4.346796604970912e-07,
"log_odds_chosen": 0.3596678674221039,
"log_odds_ratio": -0.6549097299575806,
"logits/chosen": 2.0077967643737793,
"logits/rejected": 1.9518957138061523,
"logps/chosen": -0.8897331357002258,
"logps/rejected": -1.1077500581741333,
"loss": 9.0157,
"nll_loss": 1.1182167530059814,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.008897329680621624,
"rewards/margins": 0.0021801693364977837,
"rewards/rejected": -0.011077499017119408,
"step": 150
},
{
"epoch": 0.33507853403141363,
"grad_norm": 7.509969711303711,
"learning_rate": 4.218561044282098e-07,
"log_odds_chosen": 0.37523385882377625,
"log_odds_ratio": -0.639797031879425,
"logits/chosen": 1.9479191303253174,
"logits/rejected": 1.9333696365356445,
"logps/chosen": -0.8889120221138,
"logps/rejected": -1.131272554397583,
"loss": 9.0784,
"nll_loss": 1.1669073104858398,
"rewards/accuracies": 0.6343749761581421,
"rewards/chosen": -0.008889119140803814,
"rewards/margins": 0.002423606114462018,
"rewards/rejected": -0.011312725953757763,
"step": 160
},
{
"epoch": 0.35602094240837695,
"grad_norm": 10.229013442993164,
"learning_rate": 4.081113438988443e-07,
"log_odds_chosen": 0.25382956862449646,
"log_odds_ratio": -0.6958078145980835,
"logits/chosen": 1.9296739101409912,
"logits/rejected": 1.8618618249893188,
"logps/chosen": -0.870949923992157,
"logps/rejected": -1.0173413753509521,
"loss": 8.9875,
"nll_loss": 1.104835867881775,
"rewards/accuracies": 0.5843750238418579,
"rewards/chosen": -0.008709498681128025,
"rewards/margins": 0.0014639139408245683,
"rewards/rejected": -0.010173412971198559,
"step": 170
},
{
"epoch": 0.3769633507853403,
"grad_norm": 9.28216552734375,
"learning_rate": 3.935190552834828e-07,
"log_odds_chosen": 0.28805920481681824,
"log_odds_ratio": -0.6893592476844788,
"logits/chosen": 1.9231271743774414,
"logits/rejected": 1.826939344406128,
"logps/chosen": -0.8867882490158081,
"logps/rejected": -1.0307583808898926,
"loss": 8.7846,
"nll_loss": 1.1256717443466187,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.008867883123457432,
"rewards/margins": 0.0014397003687918186,
"rewards/rejected": -0.010307582095265388,
"step": 180
},
{
"epoch": 0.39790575916230364,
"grad_norm": 8.50275993347168,
"learning_rate": 3.781574579820464e-07,
"log_odds_chosen": 0.38345667719841003,
"log_odds_ratio": -0.6357052326202393,
"logits/chosen": 1.7336671352386475,
"logits/rejected": 1.7102609872817993,
"logps/chosen": -0.8580729365348816,
"logps/rejected": -1.0796293020248413,
"loss": 8.8459,
"nll_loss": 1.041303038597107,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": -0.008580728434026241,
"rewards/margins": 0.0022155637852847576,
"rewards/rejected": -0.010796292684972286,
"step": 190
},
{
"epoch": 0.418848167539267,
"grad_norm": 8.013388633728027,
"learning_rate": 3.621088951385353e-07,
"log_odds_chosen": 0.31622734665870667,
"log_odds_ratio": -0.6587765216827393,
"logits/chosen": 1.788631796836853,
"logits/rejected": 1.7873141765594482,
"logps/chosen": -0.8683417439460754,
"logps/rejected": -1.0568631887435913,
"loss": 8.6911,
"nll_loss": 1.0708550214767456,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.008683416061103344,
"rewards/margins": 0.0018852159846574068,
"rewards/rejected": -0.010568631812930107,
"step": 200
},
{
"epoch": 0.418848167539267,
"eval_log_odds_chosen": 0.3057795763015747,
"eval_log_odds_ratio": -0.6610966324806213,
"eval_logits/chosen": 2.0427238941192627,
"eval_logits/rejected": 2.0572261810302734,
"eval_logps/chosen": -0.8642156720161438,
"eval_logps/rejected": -1.0644797086715698,
"eval_loss": 1.0935468673706055,
"eval_nll_loss": 1.1233118772506714,
"eval_rewards/accuracies": 0.6100000143051147,
"eval_rewards/chosen": -0.008642155677080154,
"eval_rewards/margins": 0.0020026403944939375,
"eval_rewards/rejected": -0.010644798167049885,
"eval_runtime": 46.7517,
"eval_samples_per_second": 42.779,
"eval_steps_per_second": 5.347,
"step": 200
},
{
"epoch": 0.4397905759162304,
"grad_norm": 7.279272556304932,
"learning_rate": 3.454593922550693e-07,
"log_odds_chosen": 0.301203191280365,
"log_odds_ratio": -0.6769061088562012,
"logits/chosen": 1.896989107131958,
"logits/rejected": 1.8812087774276733,
"logps/chosen": -0.8712860345840454,
"logps/rejected": -1.0698888301849365,
"loss": 9.0687,
"nll_loss": 1.1046525239944458,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.008712859824299812,
"rewards/margins": 0.0019860276952385902,
"rewards/rejected": -0.010698886588215828,
"step": 210
},
{
"epoch": 0.4607329842931937,
"grad_norm": 8.950860023498535,
"learning_rate": 3.2829819606729477e-07,
"log_odds_chosen": 0.2927771508693695,
"log_odds_ratio": -0.6683081984519958,
"logits/chosen": 1.983677625656128,
"logits/rejected": 2.009464740753174,
"logps/chosen": -0.9059684872627258,
"logps/rejected": -1.0988754034042358,
"loss": 8.9995,
"nll_loss": 1.1874160766601562,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.009059684351086617,
"rewards/margins": 0.0019290696363896132,
"rewards/rejected": -0.010988753288984299,
"step": 220
},
{
"epoch": 0.4816753926701571,
"grad_norm": 12.437826156616211,
"learning_rate": 3.1071729615293424e-07,
"log_odds_chosen": 0.3832097351551056,
"log_odds_ratio": -0.6394175291061401,
"logits/chosen": 1.6963777542114258,
"logits/rejected": 1.7382042407989502,
"logps/chosen": -0.878866970539093,
"logps/rejected": -1.1088063716888428,
"loss": 8.6532,
"nll_loss": 1.0316081047058105,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.008788668550550938,
"rewards/margins": 0.002299393992871046,
"rewards/rejected": -0.011088063009083271,
"step": 230
},
{
"epoch": 0.5026178010471204,
"grad_norm": 8.457469940185547,
"learning_rate": 2.9281093183781403e-07,
"log_odds_chosen": 0.31373724341392517,
"log_odds_ratio": -0.6869611144065857,
"logits/chosen": 1.7616941928863525,
"logits/rejected": 1.7711395025253296,
"logps/chosen": -0.8636420965194702,
"logps/rejected": -1.0460337400436401,
"loss": 8.8157,
"nll_loss": 1.0462042093276978,
"rewards/accuracies": 0.6343749761581421,
"rewards/chosen": -0.008636420592665672,
"rewards/margins": 0.0018239166820421815,
"rewards/rejected": -0.010460336692631245,
"step": 240
},
{
"epoch": 0.5235602094240838,
"grad_norm": 7.4062604904174805,
"learning_rate": 2.7467508704251135e-07,
"log_odds_chosen": 0.3899185359477997,
"log_odds_ratio": -0.6580259203910828,
"logits/chosen": 1.8792108297348022,
"logits/rejected": 1.7834640741348267,
"logps/chosen": -0.8666375279426575,
"logps/rejected": -1.1335757970809937,
"loss": 8.8203,
"nll_loss": 1.1227291822433472,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.008666375651955605,
"rewards/margins": 0.002669382141903043,
"rewards/rejected": -0.011335758492350578,
"step": 250
},
{
"epoch": 0.5445026178010471,
"grad_norm": 8.197929382324219,
"learning_rate": 2.5640697577740815e-07,
"log_odds_chosen": 0.3152967393398285,
"log_odds_ratio": -0.6758849620819092,
"logits/chosen": 1.7824039459228516,
"logits/rejected": 1.766455054283142,
"logps/chosen": -0.8502113223075867,
"logps/rejected": -1.0350358486175537,
"loss": 8.805,
"nll_loss": 1.0837422609329224,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.008502112701535225,
"rewards/margins": 0.001848246669396758,
"rewards/rejected": -0.010350359603762627,
"step": 260
},
{
"epoch": 0.5654450261780105,
"grad_norm": 7.904941558837891,
"learning_rate": 2.381045210440644e-07,
"log_odds_chosen": 0.28941792249679565,
"log_odds_ratio": -0.6771480441093445,
"logits/chosen": 1.8082011938095093,
"logits/rejected": 1.841059923171997,
"logps/chosen": -0.8608342409133911,
"logps/rejected": -1.0518665313720703,
"loss": 8.5612,
"nll_loss": 1.0589611530303955,
"rewards/accuracies": 0.596875011920929,
"rewards/chosen": -0.008608341217041016,
"rewards/margins": 0.0019103230442851782,
"rewards/rejected": -0.010518666356801987,
"step": 270
},
{
"epoch": 0.5863874345549738,
"grad_norm": 9.047080039978027,
"learning_rate": 2.1986582993616925e-07,
"log_odds_chosen": 0.35996752977371216,
"log_odds_ratio": -0.651909589767456,
"logits/chosen": 1.741289496421814,
"logits/rejected": 1.7088711261749268,
"logps/chosen": -0.8389939069747925,
"logps/rejected": -1.060254693031311,
"loss": 8.6897,
"nll_loss": 1.0688748359680176,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.008389937691390514,
"rewards/margins": 0.002212608465924859,
"rewards/rejected": -0.010602546855807304,
"step": 280
},
{
"epoch": 0.6073298429319371,
"grad_norm": 7.390078067779541,
"learning_rate": 2.0178866775369774e-07,
"log_odds_chosen": 0.31485018134117126,
"log_odds_ratio": -0.6769185066223145,
"logits/chosen": 1.8932464122772217,
"logits/rejected": 1.8560025691986084,
"logps/chosen": -0.8659391403198242,
"logps/rejected": -1.0502351522445679,
"loss": 8.7301,
"nll_loss": 1.1085679531097412,
"rewards/accuracies": 0.5718749761581421,
"rewards/chosen": -0.008659390732645988,
"rewards/margins": 0.0018429612973704934,
"rewards/rejected": -0.010502351447939873,
"step": 290
},
{
"epoch": 0.6282722513089005,
"grad_norm": 7.00534200668335,
"learning_rate": 1.839699339491937e-07,
"log_odds_chosen": 0.2003917694091797,
"log_odds_ratio": -0.7062225937843323,
"logits/chosen": 1.7795374393463135,
"logits/rejected": 1.7968852519989014,
"logps/chosen": -0.9032294154167175,
"logps/rejected": -1.0224246978759766,
"loss": 8.5708,
"nll_loss": 1.07076096534729,
"rewards/accuracies": 0.559374988079071,
"rewards/chosen": -0.00903229508548975,
"rewards/margins": 0.0011919522657990456,
"rewards/rejected": -0.010224247351288795,
"step": 300
},
{
"epoch": 0.6492146596858639,
"grad_norm": 8.29725170135498,
"learning_rate": 1.6650514271527465e-07,
"log_odds_chosen": 0.33252888917922974,
"log_odds_ratio": -0.6427666544914246,
"logits/chosen": 1.7817466259002686,
"logits/rejected": 1.805783987045288,
"logps/chosen": -0.8416460752487183,
"logps/rejected": -1.0354901552200317,
"loss": 8.5523,
"nll_loss": 1.023045301437378,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.008416460826992989,
"rewards/margins": 0.0019384392071515322,
"rewards/rejected": -0.010354900732636452,
"step": 310
},
{
"epoch": 0.6701570680628273,
"grad_norm": 7.233508110046387,
"learning_rate": 1.4948791099758052e-07,
"log_odds_chosen": 0.302054762840271,
"log_odds_ratio": -0.6652564406394958,
"logits/chosen": 1.8713328838348389,
"logits/rejected": 1.906873106956482,
"logps/chosen": -0.857520580291748,
"logps/rejected": -1.0245308876037598,
"loss": 8.7328,
"nll_loss": 1.081656813621521,
"rewards/accuracies": 0.609375,
"rewards/chosen": -0.008575205691158772,
"rewards/margins": 0.0016701031709089875,
"rewards/rejected": -0.010245309211313725,
"step": 320
},
{
"epoch": 0.6910994764397905,
"grad_norm": 7.668047904968262,
"learning_rate": 1.3300945667758012e-07,
"log_odds_chosen": 0.3296203017234802,
"log_odds_ratio": -0.6710628867149353,
"logits/chosen": 1.8118549585342407,
"logits/rejected": 1.7958993911743164,
"logps/chosen": -0.8951608538627625,
"logps/rejected": -1.0847995281219482,
"loss": 8.9004,
"nll_loss": 1.1010843515396118,
"rewards/accuracies": 0.590624988079071,
"rewards/chosen": -0.008951608091592789,
"rewards/margins": 0.0018963876646012068,
"rewards/rejected": -0.010847995989024639,
"step": 330
},
{
"epoch": 0.7120418848167539,
"grad_norm": 8.6635160446167,
"learning_rate": 1.1715810961514072e-07,
"log_odds_chosen": 0.3200518488883972,
"log_odds_ratio": -0.6835609078407288,
"logits/chosen": 1.8284895420074463,
"logits/rejected": 1.8095057010650635,
"logps/chosen": -0.8995221853256226,
"logps/rejected": -1.0893176794052124,
"loss": 8.7465,
"nll_loss": 1.092341661453247,
"rewards/accuracies": 0.590624988079071,
"rewards/chosen": -0.008995221927762032,
"rewards/margins": 0.0018979553133249283,
"rewards/rejected": -0.01089317724108696,
"step": 340
},
{
"epoch": 0.7329842931937173,
"grad_norm": 8.274894714355469,
"learning_rate": 1.0201883817182949e-07,
"log_odds_chosen": 0.3764176368713379,
"log_odds_ratio": -0.6291422843933105,
"logits/chosen": 1.890041708946228,
"logits/rejected": 1.9048725366592407,
"logps/chosen": -0.8920204043388367,
"logps/rejected": -1.1221367120742798,
"loss": 8.845,
"nll_loss": 1.1252596378326416,
"rewards/accuracies": 0.640625,
"rewards/chosen": -0.00892020296305418,
"rewards/margins": 0.00230116187594831,
"rewards/rejected": -0.011221365071833134,
"step": 350
},
{
"epoch": 0.7539267015706806,
"grad_norm": 8.172623634338379,
"learning_rate": 8.76727937529367e-08,
"log_odds_chosen": 0.3156259059906006,
"log_odds_ratio": -0.6558908224105835,
"logits/chosen": 1.8350282907485962,
"logits/rejected": 1.8624794483184814,
"logps/chosen": -0.8711791038513184,
"logps/rejected": -1.0731276273727417,
"loss": 8.8469,
"nll_loss": 1.1009576320648193,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.008711791597306728,
"rewards/margins": 0.002019485691562295,
"rewards/rejected": -0.010731276124715805,
"step": 360
},
{
"epoch": 0.774869109947644,
"grad_norm": 9.326078414916992,
"learning_rate": 7.419687580962222e-08,
"log_odds_chosen": 0.3530608117580414,
"log_odds_ratio": -0.6621404886245728,
"logits/chosen": 1.9310886859893799,
"logits/rejected": 1.8785558938980103,
"logps/chosen": -0.9094289541244507,
"logps/rejected": -1.133821725845337,
"loss": 8.7173,
"nll_loss": 1.1301552057266235,
"rewards/accuracies": 0.5687500238418579,
"rewards/chosen": -0.00909428857266903,
"rewards/margins": 0.0022439290769398212,
"rewards/rejected": -0.011338217183947563,
"step": 370
},
{
"epoch": 0.7958115183246073,
"grad_norm": 9.022133827209473,
"learning_rate": 6.166331963291519e-08,
"log_odds_chosen": 0.23490826785564423,
"log_odds_ratio": -0.6955921053886414,
"logits/chosen": 1.9562733173370361,
"logits/rejected": 1.8897705078125,
"logps/chosen": -0.8423633575439453,
"logps/rejected": -0.994970977306366,
"loss": 8.9164,
"nll_loss": 1.1008055210113525,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.008423633873462677,
"rewards/margins": 0.0015260763466358185,
"rewards/rejected": -0.009949709288775921,
"step": 380
},
{
"epoch": 0.8167539267015707,
"grad_norm": 7.358635425567627,
"learning_rate": 5.013930914912476e-08,
"log_odds_chosen": 0.27963709831237793,
"log_odds_ratio": -0.6734436750411987,
"logits/chosen": 1.956067681312561,
"logits/rejected": 1.9839175939559937,
"logps/chosen": -0.8422489166259766,
"logps/rejected": -1.0293291807174683,
"loss": 8.6461,
"nll_loss": 1.0561668872833252,
"rewards/accuracies": 0.578125,
"rewards/chosen": -0.008422489278018475,
"rewards/margins": 0.0018708031857386231,
"rewards/rejected": -0.010293291881680489,
"step": 390
},
{
"epoch": 0.837696335078534,
"grad_norm": 7.309168338775635,
"learning_rate": 3.968661679220467e-08,
"log_odds_chosen": 0.18631207942962646,
"log_odds_ratio": -0.7142513990402222,
"logits/chosen": 1.8434158563613892,
"logits/rejected": 1.8182004690170288,
"logps/chosen": -0.8909440040588379,
"logps/rejected": -1.0001533031463623,
"loss": 8.6763,
"nll_loss": 1.1097790002822876,
"rewards/accuracies": 0.5406249761581421,
"rewards/chosen": -0.008909439668059349,
"rewards/margins": 0.00109209178481251,
"rewards/rejected": -0.01000153087079525,
"step": 400
},
{
"epoch": 0.837696335078534,
"eval_log_odds_chosen": 0.3030896484851837,
"eval_log_odds_ratio": -0.6629699468612671,
"eval_logits/chosen": 2.1568901538848877,
"eval_logits/rejected": 2.191537857055664,
"eval_logps/chosen": -0.8522689342498779,
"eval_logps/rejected": -1.0496091842651367,
"eval_loss": 1.078381061553955,
"eval_nll_loss": 1.1099687814712524,
"eval_rewards/accuracies": 0.6060000061988831,
"eval_rewards/chosen": -0.008522690273821354,
"eval_rewards/margins": 0.0019734008237719536,
"eval_rewards/rejected": -0.010496090166270733,
"eval_runtime": 46.7843,
"eval_samples_per_second": 42.749,
"eval_steps_per_second": 5.344,
"step": 400
},
{
"epoch": 0.8586387434554974,
"grad_norm": 8.156121253967285,
"learning_rate": 3.036127238347164e-08,
"log_odds_chosen": 0.31570926308631897,
"log_odds_ratio": -0.6613708734512329,
"logits/chosen": 1.936348557472229,
"logits/rejected": 1.903748869895935,
"logps/chosen": -0.8457509875297546,
"logps/rejected": -1.0446395874023438,
"loss": 8.7334,
"nll_loss": 1.0835765600204468,
"rewards/accuracies": 0.609375,
"rewards/chosen": -0.00845750980079174,
"rewards/margins": 0.001988885225728154,
"rewards/rejected": -0.010446394793689251,
"step": 410
},
{
"epoch": 0.8795811518324608,
"grad_norm": 8.324801445007324,
"learning_rate": 2.2213262793589482e-08,
"log_odds_chosen": 0.29435402154922485,
"log_odds_ratio": -0.6616442799568176,
"logits/chosen": 1.8585201501846313,
"logits/rejected": 1.8436048030853271,
"logps/chosen": -0.8651920557022095,
"logps/rejected": -1.0383055210113525,
"loss": 8.6628,
"nll_loss": 1.0541940927505493,
"rewards/accuracies": 0.596875011920929,
"rewards/chosen": -0.008651919662952423,
"rewards/margins": 0.0017311364645138383,
"rewards/rejected": -0.010383055545389652,
"step": 420
},
{
"epoch": 0.900523560209424,
"grad_norm": 8.076932907104492,
"learning_rate": 1.5286263996730026e-08,
"log_odds_chosen": 0.48232072591781616,
"log_odds_ratio": -0.6256499290466309,
"logits/chosen": 1.8976824283599854,
"logits/rejected": 1.8619095087051392,
"logps/chosen": -0.8179370760917664,
"logps/rejected": -1.1194853782653809,
"loss": 8.6524,
"nll_loss": 1.1020301580429077,
"rewards/accuracies": 0.609375,
"rewards/chosen": -0.008179371245205402,
"rewards/margins": 0.003015482099726796,
"rewards/rejected": -0.011194853112101555,
"step": 430
},
{
"epoch": 0.9214659685863874,
"grad_norm": 8.184592247009277,
"learning_rate": 9.617406953185136e-09,
"log_odds_chosen": 0.3493059575557709,
"log_odds_ratio": -0.646949052810669,
"logits/chosen": 1.9365053176879883,
"logits/rejected": 1.9345598220825195,
"logps/chosen": -0.8272320032119751,
"logps/rejected": -1.0139485597610474,
"loss": 8.7643,
"nll_loss": 1.0985225439071655,
"rewards/accuracies": 0.628125011920929,
"rewards/chosen": -0.008272320032119751,
"rewards/margins": 0.0018671646248549223,
"rewards/rejected": -0.010139484889805317,
"step": 440
},
{
"epoch": 0.9424083769633508,
"grad_norm": 8.757542610168457,
"learning_rate": 5.2370785753763356e-09,
"log_odds_chosen": 0.2785571217536926,
"log_odds_ratio": -0.6923194527626038,
"logits/chosen": 1.9607532024383545,
"logits/rejected": 1.9347816705703735,
"logps/chosen": -0.8750311732292175,
"logps/rejected": -1.0376728773117065,
"loss": 8.7208,
"nll_loss": 1.1206778287887573,
"rewards/accuracies": 0.6156250238418579,
"rewards/chosen": -0.0087503120303154,
"rewards/margins": 0.0016264161095023155,
"rewards/rejected": -0.010376728139817715,
"step": 450
},
{
"epoch": 0.9633507853403142,
"grad_norm": 7.935389995574951,
"learning_rate": 2.168758844148272e-09,
"log_odds_chosen": 0.33909493684768677,
"log_odds_ratio": -0.654638409614563,
"logits/chosen": 2.066657304763794,
"logits/rejected": 2.039240598678589,
"logps/chosen": -0.8697013854980469,
"logps/rejected": -1.0564416646957397,
"loss": 8.7399,
"nll_loss": 1.149594783782959,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.00869701337069273,
"rewards/margins": 0.0018674019956961274,
"rewards/rejected": -0.010564416646957397,
"step": 460
},
{
"epoch": 0.9842931937172775,
"grad_norm": 7.827225685119629,
"learning_rate": 4.288949484559934e-10,
"log_odds_chosen": 0.38105446100234985,
"log_odds_ratio": -0.6435109376907349,
"logits/chosen": 1.9544579982757568,
"logits/rejected": 1.9189164638519287,
"logps/chosen": -0.8262852430343628,
"logps/rejected": -1.0520846843719482,
"loss": 8.7101,
"nll_loss": 1.105509638786316,
"rewards/accuracies": 0.6031249761581421,
"rewards/chosen": -0.00826285220682621,
"rewards/margins": 0.002257994608953595,
"rewards/rejected": -0.010520846582949162,
"step": 470
},
{
"epoch": 0.9989528795811519,
"step": 477,
"total_flos": 0.0,
"train_loss": 8.957356926780077,
"train_runtime": 5488.1377,
"train_samples_per_second": 11.139,
"train_steps_per_second": 0.087
}
],
"logging_steps": 10,
"max_steps": 477,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}