{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9989528795811519, "eval_steps": 200, "global_step": 477, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0020942408376963353, "grad_norm": 37.46305465698242, "learning_rate": 0.0, "log_odds_chosen": 0.35378384590148926, "log_odds_ratio": -0.6519296765327454, "logits/chosen": 2.203179359436035, "logits/rejected": 2.035616397857666, "logps/chosen": -1.1535288095474243, "logps/rejected": -1.4391145706176758, "loss": 10.2211, "nll_loss": 1.4494060277938843, "rewards/accuracies": 0.6875, "rewards/chosen": -0.011535286903381348, "rewards/margins": 0.002855856902897358, "rewards/rejected": -0.014391143806278706, "step": 1 }, { "epoch": 0.020942408376963352, "grad_norm": 37.87759780883789, "learning_rate": 9.375e-08, "log_odds_chosen": 0.30660638213157654, "log_odds_ratio": -0.662986159324646, "logits/chosen": 1.9456572532653809, "logits/rejected": 1.8670408725738525, "logps/chosen": -1.1083024740219116, "logps/rejected": -1.3244930505752563, "loss": 10.1264, "nll_loss": 1.2528527975082397, "rewards/accuracies": 0.6076388955116272, "rewards/chosen": -0.011083023622632027, "rewards/margins": 0.002161906799301505, "rewards/rejected": -0.013244930654764175, "step": 10 }, { "epoch": 0.041884816753926704, "grad_norm": 40.62479782104492, "learning_rate": 1.9791666666666664e-07, "log_odds_chosen": 0.26383697986602783, "log_odds_ratio": -0.6774462461471558, "logits/chosen": 1.8936617374420166, "logits/rejected": 1.8155641555786133, "logps/chosen": -1.129002332687378, "logps/rejected": -1.3111597299575806, "loss": 9.8951, "nll_loss": 1.2187750339508057, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.011290023103356361, "rewards/margins": 0.001821571378968656, "rewards/rejected": -0.013111594133079052, "step": 20 }, { "epoch": 0.06282722513089005, "grad_norm": 36.72233963012695, "learning_rate": 3.020833333333333e-07, "log_odds_chosen": 0.15129676461219788, "log_odds_ratio": -0.7099167704582214, "logits/chosen": 1.9489176273345947, "logits/rejected": 1.9070332050323486, "logps/chosen": -1.0984728336334229, "logps/rejected": -1.2049810886383057, "loss": 10.154, "nll_loss": 1.2440111637115479, "rewards/accuracies": 0.578125, "rewards/chosen": -0.010984729044139385, "rewards/margins": 0.0010650831973180175, "rewards/rejected": -0.012049810960888863, "step": 30 }, { "epoch": 0.08376963350785341, "grad_norm": 31.673852920532227, "learning_rate": 4.0625e-07, "log_odds_chosen": 0.2637297511100769, "log_odds_ratio": -0.6847748160362244, "logits/chosen": 1.778116226196289, "logits/rejected": 1.79119873046875, "logps/chosen": -1.0361906290054321, "logps/rejected": -1.2129265069961548, "loss": 9.5835, "nll_loss": 1.1749727725982666, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.010361905209720135, "rewards/margins": 0.0017673596739768982, "rewards/rejected": -0.012129265815019608, "step": 40 }, { "epoch": 0.10471204188481675, "grad_norm": 16.47711753845215, "learning_rate": 4.999932966293553e-07, "log_odds_chosen": 0.3281434178352356, "log_odds_ratio": -0.6789900064468384, "logits/chosen": 1.996852159500122, "logits/rejected": 2.0322041511535645, "logps/chosen": -0.9071202278137207, "logps/rejected": -1.1231104135513306, "loss": 9.6245, "nll_loss": 1.200535535812378, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.009071202017366886, "rewards/margins": 0.0021599007304757833, "rewards/rejected": -0.011231102980673313, "step": 50 }, { "epoch": 0.1256544502617801, "grad_norm": 17.272981643676758, "learning_rate": 4.991893270335525e-07, "log_odds_chosen": 0.2221679985523224, "log_odds_ratio": -0.7138159275054932, "logits/chosen": 1.8936437368392944, "logits/rejected": 1.8823055028915405, "logps/chosen": -0.9890943765640259, "logps/rejected": -1.1324011087417603, "loss": 9.5413, "nll_loss": 1.162929892539978, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": -0.009890943765640259, "rewards/margins": 0.0014330670237541199, "rewards/rejected": -0.011324010789394379, "step": 60 }, { "epoch": 0.14659685863874344, "grad_norm": 12.330283164978027, "learning_rate": 4.970496218214204e-07, "log_odds_chosen": 0.27448800206184387, "log_odds_ratio": -0.697861909866333, "logits/chosen": 2.0014004707336426, "logits/rejected": 2.046846866607666, "logps/chosen": -0.961329460144043, "logps/rejected": -1.1528202295303345, "loss": 9.2376, "nll_loss": 1.1857097148895264, "rewards/accuracies": 0.578125, "rewards/chosen": -0.009613295085728168, "rewards/margins": 0.0019149081781506538, "rewards/rejected": -0.011528202332556248, "step": 70 }, { "epoch": 0.16753926701570682, "grad_norm": 11.02938175201416, "learning_rate": 4.935856505068998e-07, "log_odds_chosen": 0.31524786353111267, "log_odds_ratio": -0.6606825590133667, "logits/chosen": 1.8413927555084229, "logits/rejected": 1.8493198156356812, "logps/chosen": -0.9049292802810669, "logps/rejected": -1.0926640033721924, "loss": 8.9813, "nll_loss": 1.1445410251617432, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.009049292653799057, "rewards/margins": 0.0018773479387164116, "rewards/rejected": -0.010926639661192894, "step": 80 }, { "epoch": 0.18848167539267016, "grad_norm": 9.860730171203613, "learning_rate": 4.8881598109976e-07, "log_odds_chosen": 0.3749118447303772, "log_odds_ratio": -0.6490113139152527, "logits/chosen": 1.7958896160125732, "logits/rejected": 1.7599939107894897, "logps/chosen": -0.8722783923149109, "logps/rejected": -1.1078213453292847, "loss": 9.2508, "nll_loss": 1.0980162620544434, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.008722783997654915, "rewards/margins": 0.0023554288782179356, "rewards/rejected": -0.011078213341534138, "step": 90 }, { "epoch": 0.2094240837696335, "grad_norm": 10.169231414794922, "learning_rate": 4.827661805750437e-07, "log_odds_chosen": 0.32181161642074585, "log_odds_ratio": -0.6511259078979492, "logits/chosen": 1.7957969903945923, "logits/rejected": 1.779897689819336, "logps/chosen": -0.8846995234489441, "logps/rejected": -1.0705549716949463, "loss": 9.0284, "nll_loss": 1.08339524269104, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.008846994489431381, "rewards/margins": 0.0018585551297292113, "rewards/rejected": -0.010705549269914627, "step": 100 }, { "epoch": 0.23036649214659685, "grad_norm": 11.416997909545898, "learning_rate": 4.75468677825789e-07, "log_odds_chosen": 0.4410727918148041, "log_odds_ratio": -0.6154537796974182, "logits/chosen": 1.8986194133758545, "logits/rejected": 1.9162569046020508, "logps/chosen": -0.8505121469497681, "logps/rejected": -1.1303095817565918, "loss": 9.0025, "nll_loss": 1.1048234701156616, "rewards/accuracies": 0.659375011920929, "rewards/chosen": -0.008505119942128658, "rewards/margins": 0.002797975903376937, "rewards/rejected": -0.011303097009658813, "step": 110 }, { "epoch": 0.2513089005235602, "grad_norm": 8.807473182678223, "learning_rate": 4.669625898336438e-07, "log_odds_chosen": 0.2529251277446747, "log_odds_ratio": -0.6907952427864075, "logits/chosen": 1.961059808731079, "logits/rejected": 1.9387576580047607, "logps/chosen": -0.9018535614013672, "logps/rejected": -1.0625946521759033, "loss": 8.9547, "nll_loss": 1.0763670206069946, "rewards/accuracies": 0.5531250238418579, "rewards/chosen": -0.009018534794449806, "rewards/margins": 0.0016074117738753557, "rewards/rejected": -0.010625948198139668, "step": 120 }, { "epoch": 0.27225130890052357, "grad_norm": 8.736103057861328, "learning_rate": 4.5729351198915705e-07, "log_odds_chosen": 0.34425991773605347, "log_odds_ratio": -0.6504599452018738, "logits/chosen": 1.858128309249878, "logits/rejected": 1.95168936252594, "logps/chosen": -0.8997282981872559, "logps/rejected": -1.0928587913513184, "loss": 9.0819, "nll_loss": 1.0748308897018433, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.00899728387594223, "rewards/margins": 0.0019313046941533685, "rewards/rejected": -0.01092858798801899, "step": 130 }, { "epoch": 0.2931937172774869, "grad_norm": 8.833135604858398, "learning_rate": 4.4651327368569684e-07, "log_odds_chosen": 0.334553062915802, "log_odds_ratio": -0.6699846982955933, "logits/chosen": 1.8222471475601196, "logits/rejected": 1.8125450611114502, "logps/chosen": -0.899248480796814, "logps/rejected": -1.1075996160507202, "loss": 9.0727, "nll_loss": 1.161645531654358, "rewards/accuracies": 0.59375, "rewards/chosen": -0.008992486633360386, "rewards/margins": 0.002083510160446167, "rewards/rejected": -0.011075995862483978, "step": 140 }, { "epoch": 0.31413612565445026, "grad_norm": 8.58157730102539, "learning_rate": 4.346796604970912e-07, "log_odds_chosen": 0.3596678674221039, "log_odds_ratio": -0.6549097299575806, "logits/chosen": 2.0077967643737793, "logits/rejected": 1.9518957138061523, "logps/chosen": -0.8897331357002258, "logps/rejected": -1.1077500581741333, "loss": 9.0157, "nll_loss": 1.1182167530059814, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.008897329680621624, "rewards/margins": 0.0021801693364977837, "rewards/rejected": -0.011077499017119408, "step": 150 }, { "epoch": 0.33507853403141363, "grad_norm": 7.509969711303711, "learning_rate": 4.218561044282098e-07, "log_odds_chosen": 0.37523385882377625, "log_odds_ratio": -0.639797031879425, "logits/chosen": 1.9479191303253174, "logits/rejected": 1.9333696365356445, "logps/chosen": -0.8889120221138, "logps/rejected": -1.131272554397583, "loss": 9.0784, "nll_loss": 1.1669073104858398, "rewards/accuracies": 0.6343749761581421, "rewards/chosen": -0.008889119140803814, "rewards/margins": 0.002423606114462018, "rewards/rejected": -0.011312725953757763, "step": 160 }, { "epoch": 0.35602094240837695, "grad_norm": 10.229013442993164, "learning_rate": 4.081113438988443e-07, "log_odds_chosen": 0.25382956862449646, "log_odds_ratio": -0.6958078145980835, "logits/chosen": 1.9296739101409912, "logits/rejected": 1.8618618249893188, "logps/chosen": -0.870949923992157, "logps/rejected": -1.0173413753509521, "loss": 8.9875, "nll_loss": 1.104835867881775, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": -0.008709498681128025, "rewards/margins": 0.0014639139408245683, "rewards/rejected": -0.010173412971198559, "step": 170 }, { "epoch": 0.3769633507853403, "grad_norm": 9.28216552734375, "learning_rate": 3.935190552834828e-07, "log_odds_chosen": 0.28805920481681824, "log_odds_ratio": -0.6893592476844788, "logits/chosen": 1.9231271743774414, "logits/rejected": 1.826939344406128, "logps/chosen": -0.8867882490158081, "logps/rejected": -1.0307583808898926, "loss": 8.7846, "nll_loss": 1.1256717443466187, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.008867883123457432, "rewards/margins": 0.0014397003687918186, "rewards/rejected": -0.010307582095265388, "step": 180 }, { "epoch": 0.39790575916230364, "grad_norm": 8.50275993347168, "learning_rate": 3.781574579820464e-07, "log_odds_chosen": 0.38345667719841003, "log_odds_ratio": -0.6357052326202393, "logits/chosen": 1.7336671352386475, "logits/rejected": 1.7102609872817993, "logps/chosen": -0.8580729365348816, "logps/rejected": -1.0796293020248413, "loss": 8.8459, "nll_loss": 1.041303038597107, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.008580728434026241, "rewards/margins": 0.0022155637852847576, "rewards/rejected": -0.010796292684972286, "step": 190 }, { "epoch": 0.418848167539267, "grad_norm": 8.013388633728027, "learning_rate": 3.621088951385353e-07, "log_odds_chosen": 0.31622734665870667, "log_odds_ratio": -0.6587765216827393, "logits/chosen": 1.788631796836853, "logits/rejected": 1.7873141765594482, "logps/chosen": -0.8683417439460754, "logps/rejected": -1.0568631887435913, "loss": 8.6911, "nll_loss": 1.0708550214767456, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.008683416061103344, "rewards/margins": 0.0018852159846574068, "rewards/rejected": -0.010568631812930107, "step": 200 }, { "epoch": 0.418848167539267, "eval_log_odds_chosen": 0.3057795763015747, "eval_log_odds_ratio": -0.6610966324806213, "eval_logits/chosen": 2.0427238941192627, "eval_logits/rejected": 2.0572261810302734, "eval_logps/chosen": -0.8642156720161438, "eval_logps/rejected": -1.0644797086715698, "eval_loss": 1.0935468673706055, "eval_nll_loss": 1.1233118772506714, "eval_rewards/accuracies": 0.6100000143051147, "eval_rewards/chosen": -0.008642155677080154, "eval_rewards/margins": 0.0020026403944939375, "eval_rewards/rejected": -0.010644798167049885, "eval_runtime": 46.7517, "eval_samples_per_second": 42.779, "eval_steps_per_second": 5.347, "step": 200 }, { "epoch": 0.4397905759162304, "grad_norm": 7.279272556304932, "learning_rate": 3.454593922550693e-07, "log_odds_chosen": 0.301203191280365, "log_odds_ratio": -0.6769061088562012, "logits/chosen": 1.896989107131958, "logits/rejected": 1.8812087774276733, "logps/chosen": -0.8712860345840454, "logps/rejected": -1.0698888301849365, "loss": 9.0687, "nll_loss": 1.1046525239944458, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.008712859824299812, "rewards/margins": 0.0019860276952385902, "rewards/rejected": -0.010698886588215828, "step": 210 }, { "epoch": 0.4607329842931937, "grad_norm": 8.950860023498535, "learning_rate": 3.2829819606729477e-07, "log_odds_chosen": 0.2927771508693695, "log_odds_ratio": -0.6683081984519958, "logits/chosen": 1.983677625656128, "logits/rejected": 2.009464740753174, "logps/chosen": -0.9059684872627258, "logps/rejected": -1.0988754034042358, "loss": 8.9995, "nll_loss": 1.1874160766601562, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.009059684351086617, "rewards/margins": 0.0019290696363896132, "rewards/rejected": -0.010988753288984299, "step": 220 }, { "epoch": 0.4816753926701571, "grad_norm": 12.437826156616211, "learning_rate": 3.1071729615293424e-07, "log_odds_chosen": 0.3832097351551056, "log_odds_ratio": -0.6394175291061401, "logits/chosen": 1.6963777542114258, "logits/rejected": 1.7382042407989502, "logps/chosen": -0.878866970539093, "logps/rejected": -1.1088063716888428, "loss": 8.6532, "nll_loss": 1.0316081047058105, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.008788668550550938, "rewards/margins": 0.002299393992871046, "rewards/rejected": -0.011088063009083271, "step": 230 }, { "epoch": 0.5026178010471204, "grad_norm": 8.457469940185547, "learning_rate": 2.9281093183781403e-07, "log_odds_chosen": 0.31373724341392517, "log_odds_ratio": -0.6869611144065857, "logits/chosen": 1.7616941928863525, "logits/rejected": 1.7711395025253296, "logps/chosen": -0.8636420965194702, "logps/rejected": -1.0460337400436401, "loss": 8.8157, "nll_loss": 1.0462042093276978, "rewards/accuracies": 0.6343749761581421, "rewards/chosen": -0.008636420592665672, "rewards/margins": 0.0018239166820421815, "rewards/rejected": -0.010460336692631245, "step": 240 }, { "epoch": 0.5235602094240838, "grad_norm": 7.4062604904174805, "learning_rate": 2.7467508704251135e-07, "log_odds_chosen": 0.3899185359477997, "log_odds_ratio": -0.6580259203910828, "logits/chosen": 1.8792108297348022, "logits/rejected": 1.7834640741348267, "logps/chosen": -0.8666375279426575, "logps/rejected": -1.1335757970809937, "loss": 8.8203, "nll_loss": 1.1227291822433472, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.008666375651955605, "rewards/margins": 0.002669382141903043, "rewards/rejected": -0.011335758492350578, "step": 250 }, { "epoch": 0.5445026178010471, "grad_norm": 8.197929382324219, "learning_rate": 2.5640697577740815e-07, "log_odds_chosen": 0.3152967393398285, "log_odds_ratio": -0.6758849620819092, "logits/chosen": 1.7824039459228516, "logits/rejected": 1.766455054283142, "logps/chosen": -0.8502113223075867, "logps/rejected": -1.0350358486175537, "loss": 8.805, "nll_loss": 1.0837422609329224, "rewards/accuracies": 0.59375, "rewards/chosen": -0.008502112701535225, "rewards/margins": 0.001848246669396758, "rewards/rejected": -0.010350359603762627, "step": 260 }, { "epoch": 0.5654450261780105, "grad_norm": 7.904941558837891, "learning_rate": 2.381045210440644e-07, "log_odds_chosen": 0.28941792249679565, "log_odds_ratio": -0.6771480441093445, "logits/chosen": 1.8082011938095093, "logits/rejected": 1.841059923171997, "logps/chosen": -0.8608342409133911, "logps/rejected": -1.0518665313720703, "loss": 8.5612, "nll_loss": 1.0589611530303955, "rewards/accuracies": 0.596875011920929, "rewards/chosen": -0.008608341217041016, "rewards/margins": 0.0019103230442851782, "rewards/rejected": -0.010518666356801987, "step": 270 }, { "epoch": 0.5863874345549738, "grad_norm": 9.047080039978027, "learning_rate": 2.1986582993616925e-07, "log_odds_chosen": 0.35996752977371216, "log_odds_ratio": -0.651909589767456, "logits/chosen": 1.741289496421814, "logits/rejected": 1.7088711261749268, "logps/chosen": -0.8389939069747925, "logps/rejected": -1.060254693031311, "loss": 8.6897, "nll_loss": 1.0688748359680176, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.008389937691390514, "rewards/margins": 0.002212608465924859, "rewards/rejected": -0.010602546855807304, "step": 280 }, { "epoch": 0.6073298429319371, "grad_norm": 7.390078067779541, "learning_rate": 2.0178866775369774e-07, "log_odds_chosen": 0.31485018134117126, "log_odds_ratio": -0.6769185066223145, "logits/chosen": 1.8932464122772217, "logits/rejected": 1.8560025691986084, "logps/chosen": -0.8659391403198242, "logps/rejected": -1.0502351522445679, "loss": 8.7301, "nll_loss": 1.1085679531097412, "rewards/accuracies": 0.5718749761581421, "rewards/chosen": -0.008659390732645988, "rewards/margins": 0.0018429612973704934, "rewards/rejected": -0.010502351447939873, "step": 290 }, { "epoch": 0.6282722513089005, "grad_norm": 7.00534200668335, "learning_rate": 1.839699339491937e-07, "log_odds_chosen": 0.2003917694091797, "log_odds_ratio": -0.7062225937843323, "logits/chosen": 1.7795374393463135, "logits/rejected": 1.7968852519989014, "logps/chosen": -0.9032294154167175, "logps/rejected": -1.0224246978759766, "loss": 8.5708, "nll_loss": 1.07076096534729, "rewards/accuracies": 0.559374988079071, "rewards/chosen": -0.00903229508548975, "rewards/margins": 0.0011919522657990456, "rewards/rejected": -0.010224247351288795, "step": 300 }, { "epoch": 0.6492146596858639, "grad_norm": 8.29725170135498, "learning_rate": 1.6650514271527465e-07, "log_odds_chosen": 0.33252888917922974, "log_odds_ratio": -0.6427666544914246, "logits/chosen": 1.7817466259002686, "logits/rejected": 1.805783987045288, "logps/chosen": -0.8416460752487183, "logps/rejected": -1.0354901552200317, "loss": 8.5523, "nll_loss": 1.023045301437378, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.008416460826992989, "rewards/margins": 0.0019384392071515322, "rewards/rejected": -0.010354900732636452, "step": 310 }, { "epoch": 0.6701570680628273, "grad_norm": 7.233508110046387, "learning_rate": 1.4948791099758052e-07, "log_odds_chosen": 0.302054762840271, "log_odds_ratio": -0.6652564406394958, "logits/chosen": 1.8713328838348389, "logits/rejected": 1.906873106956482, "logps/chosen": -0.857520580291748, "logps/rejected": -1.0245308876037598, "loss": 8.7328, "nll_loss": 1.081656813621521, "rewards/accuracies": 0.609375, "rewards/chosen": -0.008575205691158772, "rewards/margins": 0.0016701031709089875, "rewards/rejected": -0.010245309211313725, "step": 320 }, { "epoch": 0.6910994764397905, "grad_norm": 7.668047904968262, "learning_rate": 1.3300945667758012e-07, "log_odds_chosen": 0.3296203017234802, "log_odds_ratio": -0.6710628867149353, "logits/chosen": 1.8118549585342407, "logits/rejected": 1.7958993911743164, "logps/chosen": -0.8951608538627625, "logps/rejected": -1.0847995281219482, "loss": 8.9004, "nll_loss": 1.1010843515396118, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.008951608091592789, "rewards/margins": 0.0018963876646012068, "rewards/rejected": -0.010847995989024639, "step": 330 }, { "epoch": 0.7120418848167539, "grad_norm": 8.6635160446167, "learning_rate": 1.1715810961514072e-07, "log_odds_chosen": 0.3200518488883972, "log_odds_ratio": -0.6835609078407288, "logits/chosen": 1.8284895420074463, "logits/rejected": 1.8095057010650635, "logps/chosen": -0.8995221853256226, "logps/rejected": -1.0893176794052124, "loss": 8.7465, "nll_loss": 1.092341661453247, "rewards/accuracies": 0.590624988079071, "rewards/chosen": -0.008995221927762032, "rewards/margins": 0.0018979553133249283, "rewards/rejected": -0.01089317724108696, "step": 340 }, { "epoch": 0.7329842931937173, "grad_norm": 8.274894714355469, "learning_rate": 1.0201883817182949e-07, "log_odds_chosen": 0.3764176368713379, "log_odds_ratio": -0.6291422843933105, "logits/chosen": 1.890041708946228, "logits/rejected": 1.9048725366592407, "logps/chosen": -0.8920204043388367, "logps/rejected": -1.1221367120742798, "loss": 8.845, "nll_loss": 1.1252596378326416, "rewards/accuracies": 0.640625, "rewards/chosen": -0.00892020296305418, "rewards/margins": 0.00230116187594831, "rewards/rejected": -0.011221365071833134, "step": 350 }, { "epoch": 0.7539267015706806, "grad_norm": 8.172623634338379, "learning_rate": 8.76727937529367e-08, "log_odds_chosen": 0.3156259059906006, "log_odds_ratio": -0.6558908224105835, "logits/chosen": 1.8350282907485962, "logits/rejected": 1.8624794483184814, "logps/chosen": -0.8711791038513184, "logps/rejected": -1.0731276273727417, "loss": 8.8469, "nll_loss": 1.1009576320648193, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.008711791597306728, "rewards/margins": 0.002019485691562295, "rewards/rejected": -0.010731276124715805, "step": 360 }, { "epoch": 0.774869109947644, "grad_norm": 9.326078414916992, "learning_rate": 7.419687580962222e-08, "log_odds_chosen": 0.3530608117580414, "log_odds_ratio": -0.6621404886245728, "logits/chosen": 1.9310886859893799, "logits/rejected": 1.8785558938980103, "logps/chosen": -0.9094289541244507, "logps/rejected": -1.133821725845337, "loss": 8.7173, "nll_loss": 1.1301552057266235, "rewards/accuracies": 0.5687500238418579, "rewards/chosen": -0.00909428857266903, "rewards/margins": 0.0022439290769398212, "rewards/rejected": -0.011338217183947563, "step": 370 }, { "epoch": 0.7958115183246073, "grad_norm": 9.022133827209473, "learning_rate": 6.166331963291519e-08, "log_odds_chosen": 0.23490826785564423, "log_odds_ratio": -0.6955921053886414, "logits/chosen": 1.9562733173370361, "logits/rejected": 1.8897705078125, "logps/chosen": -0.8423633575439453, "logps/rejected": -0.994970977306366, "loss": 8.9164, "nll_loss": 1.1008055210113525, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.008423633873462677, "rewards/margins": 0.0015260763466358185, "rewards/rejected": -0.009949709288775921, "step": 380 }, { "epoch": 0.8167539267015707, "grad_norm": 7.358635425567627, "learning_rate": 5.013930914912476e-08, "log_odds_chosen": 0.27963709831237793, "log_odds_ratio": -0.6734436750411987, "logits/chosen": 1.956067681312561, "logits/rejected": 1.9839175939559937, "logps/chosen": -0.8422489166259766, "logps/rejected": -1.0293291807174683, "loss": 8.6461, "nll_loss": 1.0561668872833252, "rewards/accuracies": 0.578125, "rewards/chosen": -0.008422489278018475, "rewards/margins": 0.0018708031857386231, "rewards/rejected": -0.010293291881680489, "step": 390 }, { "epoch": 0.837696335078534, "grad_norm": 7.309168338775635, "learning_rate": 3.968661679220467e-08, "log_odds_chosen": 0.18631207942962646, "log_odds_ratio": -0.7142513990402222, "logits/chosen": 1.8434158563613892, "logits/rejected": 1.8182004690170288, "logps/chosen": -0.8909440040588379, "logps/rejected": -1.0001533031463623, "loss": 8.6763, "nll_loss": 1.1097790002822876, "rewards/accuracies": 0.5406249761581421, "rewards/chosen": -0.008909439668059349, "rewards/margins": 0.00109209178481251, "rewards/rejected": -0.01000153087079525, "step": 400 }, { "epoch": 0.837696335078534, "eval_log_odds_chosen": 0.3030896484851837, "eval_log_odds_ratio": -0.6629699468612671, "eval_logits/chosen": 2.1568901538848877, "eval_logits/rejected": 2.191537857055664, "eval_logps/chosen": -0.8522689342498779, "eval_logps/rejected": -1.0496091842651367, "eval_loss": 1.078381061553955, "eval_nll_loss": 1.1099687814712524, "eval_rewards/accuracies": 0.6060000061988831, "eval_rewards/chosen": -0.008522690273821354, "eval_rewards/margins": 0.0019734008237719536, "eval_rewards/rejected": -0.010496090166270733, "eval_runtime": 46.7843, "eval_samples_per_second": 42.749, "eval_steps_per_second": 5.344, "step": 400 }, { "epoch": 0.8586387434554974, "grad_norm": 8.156121253967285, "learning_rate": 3.036127238347164e-08, "log_odds_chosen": 0.31570926308631897, "log_odds_ratio": -0.6613708734512329, "logits/chosen": 1.936348557472229, "logits/rejected": 1.903748869895935, "logps/chosen": -0.8457509875297546, "logps/rejected": -1.0446395874023438, "loss": 8.7334, "nll_loss": 1.0835765600204468, "rewards/accuracies": 0.609375, "rewards/chosen": -0.00845750980079174, "rewards/margins": 0.001988885225728154, "rewards/rejected": -0.010446394793689251, "step": 410 }, { "epoch": 0.8795811518324608, "grad_norm": 8.324801445007324, "learning_rate": 2.2213262793589482e-08, "log_odds_chosen": 0.29435402154922485, "log_odds_ratio": -0.6616442799568176, "logits/chosen": 1.8585201501846313, "logits/rejected": 1.8436048030853271, "logps/chosen": -0.8651920557022095, "logps/rejected": -1.0383055210113525, "loss": 8.6628, "nll_loss": 1.0541940927505493, "rewards/accuracies": 0.596875011920929, "rewards/chosen": -0.008651919662952423, "rewards/margins": 0.0017311364645138383, "rewards/rejected": -0.010383055545389652, "step": 420 }, { "epoch": 0.900523560209424, "grad_norm": 8.076932907104492, "learning_rate": 1.5286263996730026e-08, "log_odds_chosen": 0.48232072591781616, "log_odds_ratio": -0.6256499290466309, "logits/chosen": 1.8976824283599854, "logits/rejected": 1.8619095087051392, "logps/chosen": -0.8179370760917664, "logps/rejected": -1.1194853782653809, "loss": 8.6524, "nll_loss": 1.1020301580429077, "rewards/accuracies": 0.609375, "rewards/chosen": -0.008179371245205402, "rewards/margins": 0.003015482099726796, "rewards/rejected": -0.011194853112101555, "step": 430 }, { "epoch": 0.9214659685863874, "grad_norm": 8.184592247009277, "learning_rate": 9.617406953185136e-09, "log_odds_chosen": 0.3493059575557709, "log_odds_ratio": -0.646949052810669, "logits/chosen": 1.9365053176879883, "logits/rejected": 1.9345598220825195, "logps/chosen": -0.8272320032119751, "logps/rejected": -1.0139485597610474, "loss": 8.7643, "nll_loss": 1.0985225439071655, "rewards/accuracies": 0.628125011920929, "rewards/chosen": -0.008272320032119751, "rewards/margins": 0.0018671646248549223, "rewards/rejected": -0.010139484889805317, "step": 440 }, { "epoch": 0.9424083769633508, "grad_norm": 8.757542610168457, "learning_rate": 5.2370785753763356e-09, "log_odds_chosen": 0.2785571217536926, "log_odds_ratio": -0.6923194527626038, "logits/chosen": 1.9607532024383545, "logits/rejected": 1.9347816705703735, "logps/chosen": -0.8750311732292175, "logps/rejected": -1.0376728773117065, "loss": 8.7208, "nll_loss": 1.1206778287887573, "rewards/accuracies": 0.6156250238418579, "rewards/chosen": -0.0087503120303154, "rewards/margins": 0.0016264161095023155, "rewards/rejected": -0.010376728139817715, "step": 450 }, { "epoch": 0.9633507853403142, "grad_norm": 7.935389995574951, "learning_rate": 2.168758844148272e-09, "log_odds_chosen": 0.33909493684768677, "log_odds_ratio": -0.654638409614563, "logits/chosen": 2.066657304763794, "logits/rejected": 2.039240598678589, "logps/chosen": -0.8697013854980469, "logps/rejected": -1.0564416646957397, "loss": 8.7399, "nll_loss": 1.149594783782959, "rewards/accuracies": 0.59375, "rewards/chosen": -0.00869701337069273, "rewards/margins": 0.0018674019956961274, "rewards/rejected": -0.010564416646957397, "step": 460 }, { "epoch": 0.9842931937172775, "grad_norm": 7.827225685119629, "learning_rate": 4.288949484559934e-10, "log_odds_chosen": 0.38105446100234985, "log_odds_ratio": -0.6435109376907349, "logits/chosen": 1.9544579982757568, "logits/rejected": 1.9189164638519287, "logps/chosen": -0.8262852430343628, "logps/rejected": -1.0520846843719482, "loss": 8.7101, "nll_loss": 1.105509638786316, "rewards/accuracies": 0.6031249761581421, "rewards/chosen": -0.00826285220682621, "rewards/margins": 0.002257994608953595, "rewards/rejected": -0.010520846582949162, "step": 470 }, { "epoch": 0.9989528795811519, "step": 477, "total_flos": 0.0, "train_loss": 8.957356926780077, "train_runtime": 5488.1377, "train_samples_per_second": 11.139, "train_steps_per_second": 0.087 } ], "logging_steps": 10, "max_steps": 477, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }