{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9989528795811519, "eval_steps": 200, "global_step": 477, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0020942408376963353, "grad_norm": 70.76434326171875, "learning_rate": 0.0, "logits/chosen": -0.5995081663131714, "logits/rejected": -0.6144353747367859, "logps/chosen": -267.5271911621094, "logps/rejected": -204.23904418945312, "loss": 19.467, "nll_loss": 1.0874961614608765, "rewards/accuracies": 0.46875, "rewards/chosen": -2.675271511077881, "rewards/margins": -0.6328814029693604, "rewards/rejected": -2.0423905849456787, "step": 1 }, { "epoch": 0.020942408376963352, "grad_norm": 66.30955505371094, "learning_rate": 9.375e-08, "logits/chosen": -0.6324494481086731, "logits/rejected": -0.6370331645011902, "logps/chosen": -296.5570068359375, "logps/rejected": -258.9596252441406, "loss": 18.6053, "nll_loss": 1.038037657737732, "rewards/accuracies": 0.4826388955116272, "rewards/chosen": -2.9655702114105225, "rewards/margins": -0.37597405910491943, "rewards/rejected": -2.5895962715148926, "step": 10 }, { "epoch": 0.041884816753926704, "grad_norm": 70.56298828125, "learning_rate": 1.9791666666666664e-07, "logits/chosen": -0.5974550247192383, "logits/rejected": -0.6282276511192322, "logps/chosen": -297.4111022949219, "logps/rejected": -256.52508544921875, "loss": 18.3633, "nll_loss": 1.0587749481201172, "rewards/accuracies": 0.45625001192092896, "rewards/chosen": -2.9741110801696777, "rewards/margins": -0.4088606834411621, "rewards/rejected": -2.5652506351470947, "step": 20 }, { "epoch": 0.06282722513089005, "grad_norm": 58.68699645996094, "learning_rate": 3.020833333333333e-07, "logits/chosen": -0.6186209321022034, "logits/rejected": -0.6106249094009399, "logps/chosen": -274.9568786621094, "logps/rejected": -245.85302734375, "loss": 18.1303, "nll_loss": 1.0308423042297363, "rewards/accuracies": 0.47187501192092896, "rewards/chosen": -2.749568462371826, "rewards/margins": -0.2910384237766266, "rewards/rejected": -2.4585299491882324, "step": 30 }, { "epoch": 0.08376963350785341, "grad_norm": 51.15740966796875, "learning_rate": 4.0625e-07, "logits/chosen": -0.6243024468421936, "logits/rejected": -0.6493061780929565, "logps/chosen": -273.6658630371094, "logps/rejected": -256.23516845703125, "loss": 18.0446, "nll_loss": 0.984009861946106, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": -2.7366585731506348, "rewards/margins": -0.17430701851844788, "rewards/rejected": -2.562351703643799, "step": 40 }, { "epoch": 0.10471204188481675, "grad_norm": 44.492061614990234, "learning_rate": 4.999932966293553e-07, "logits/chosen": -0.6370277404785156, "logits/rejected": -0.677150547504425, "logps/chosen": -267.9862976074219, "logps/rejected": -263.2257385253906, "loss": 17.4079, "nll_loss": 0.9246982336044312, "rewards/accuracies": 0.4375, "rewards/chosen": -2.679863452911377, "rewards/margins": -0.04760568588972092, "rewards/rejected": -2.6322576999664307, "step": 50 }, { "epoch": 0.1256544502617801, "grad_norm": 46.6144905090332, "learning_rate": 4.991893270335525e-07, "logits/chosen": -0.645717203617096, "logits/rejected": -0.6591695547103882, "logps/chosen": -267.19000244140625, "logps/rejected": -248.19009399414062, "loss": 17.5598, "nll_loss": 0.9888619184494019, "rewards/accuracies": 0.46875, "rewards/chosen": -2.6719002723693848, "rewards/margins": -0.18999925255775452, "rewards/rejected": -2.481900930404663, "step": 60 }, { "epoch": 0.14659685863874344, "grad_norm": 47.23382568359375, "learning_rate": 4.970496218214204e-07, "logits/chosen": -0.6946207284927368, "logits/rejected": -0.6999990344047546, "logps/chosen": -272.85723876953125, "logps/rejected": -265.056640625, "loss": 17.1179, "nll_loss": 1.0355949401855469, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": -2.728572368621826, "rewards/margins": -0.07800592482089996, "rewards/rejected": -2.650566577911377, "step": 70 }, { "epoch": 0.16753926701570682, "grad_norm": 44.47047805786133, "learning_rate": 4.935856505068998e-07, "logits/chosen": -0.6580323576927185, "logits/rejected": -0.6536714434623718, "logps/chosen": -264.0460510253906, "logps/rejected": -245.66616821289062, "loss": 17.0495, "nll_loss": 0.9749844670295715, "rewards/accuracies": 0.4437499940395355, "rewards/chosen": -2.64046049118042, "rewards/margins": -0.18379904329776764, "rewards/rejected": -2.4566617012023926, "step": 80 }, { "epoch": 0.18848167539267016, "grad_norm": 43.76174545288086, "learning_rate": 4.8881598109976e-07, "logits/chosen": -0.6580432057380676, "logits/rejected": -0.6635259389877319, "logps/chosen": -260.81396484375, "logps/rejected": -237.932373046875, "loss": 17.2031, "nll_loss": 0.9465915560722351, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -2.608139753341675, "rewards/margins": -0.22881582379341125, "rewards/rejected": -2.379323959350586, "step": 90 }, { "epoch": 0.2094240837696335, "grad_norm": 52.968414306640625, "learning_rate": 4.827661805750437e-07, "logits/chosen": -0.6285615563392639, "logits/rejected": -0.6467532515525818, "logps/chosen": -256.7583923339844, "logps/rejected": -259.3887634277344, "loss": 17.1236, "nll_loss": 0.9392696619033813, "rewards/accuracies": 0.5625, "rewards/chosen": -2.5675835609436035, "rewards/margins": 0.026304006576538086, "rewards/rejected": -2.5938878059387207, "step": 100 }, { "epoch": 0.23036649214659685, "grad_norm": 43.268592834472656, "learning_rate": 4.75468677825789e-07, "logits/chosen": -0.6423302888870239, "logits/rejected": -0.6667316555976868, "logps/chosen": -255.0517120361328, "logps/rejected": -249.2233428955078, "loss": 16.8436, "nll_loss": 0.8955658078193665, "rewards/accuracies": 0.484375, "rewards/chosen": -2.5505168437957764, "rewards/margins": -0.058283496648073196, "rewards/rejected": -2.4922332763671875, "step": 110 }, { "epoch": 0.2513089005235602, "grad_norm": 44.98443603515625, "learning_rate": 4.669625898336438e-07, "logits/chosen": -0.6440318822860718, "logits/rejected": -0.6476173400878906, "logps/chosen": -251.17318725585938, "logps/rejected": -258.1605529785156, "loss": 17.2149, "nll_loss": 0.9454558491706848, "rewards/accuracies": 0.46875, "rewards/chosen": -2.5117321014404297, "rewards/margins": 0.06987308710813522, "rewards/rejected": -2.5816054344177246, "step": 120 }, { "epoch": 0.27225130890052357, "grad_norm": 45.67206573486328, "learning_rate": 4.5729351198915705e-07, "logits/chosen": -0.6568298935890198, "logits/rejected": -0.6463169455528259, "logps/chosen": -256.1776428222656, "logps/rejected": -261.1674499511719, "loss": 17.4786, "nll_loss": 0.9527324438095093, "rewards/accuracies": 0.53125, "rewards/chosen": -2.5617761611938477, "rewards/margins": 0.049898095428943634, "rewards/rejected": -2.6116745471954346, "step": 130 }, { "epoch": 0.2931937172774869, "grad_norm": 46.12214279174805, "learning_rate": 4.4651327368569684e-07, "logits/chosen": -0.6722804307937622, "logits/rejected": -0.6767354011535645, "logps/chosen": -256.47052001953125, "logps/rejected": -241.89675903320312, "loss": 16.9861, "nll_loss": 0.9813588857650757, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -2.5647053718566895, "rewards/margins": -0.14573803544044495, "rewards/rejected": -2.4189672470092773, "step": 140 }, { "epoch": 0.31413612565445026, "grad_norm": 41.50273132324219, "learning_rate": 4.346796604970912e-07, "logits/chosen": -0.6918385624885559, "logits/rejected": -0.6956163644790649, "logps/chosen": -277.5316162109375, "logps/rejected": -241.47592163085938, "loss": 17.2163, "nll_loss": 0.9886034727096558, "rewards/accuracies": 0.40312498807907104, "rewards/chosen": -2.7753162384033203, "rewards/margins": -0.36055710911750793, "rewards/rejected": -2.4147589206695557, "step": 150 }, { "epoch": 0.33507853403141363, "grad_norm": 43.583335876464844, "learning_rate": 4.218561044282098e-07, "logits/chosen": -0.6789825558662415, "logits/rejected": -0.6846081614494324, "logps/chosen": -260.73504638671875, "logps/rejected": -242.71273803710938, "loss": 17.1598, "nll_loss": 0.9698236584663391, "rewards/accuracies": 0.4906249940395355, "rewards/chosen": -2.6073505878448486, "rewards/margins": -0.1802230179309845, "rewards/rejected": -2.4271275997161865, "step": 160 }, { "epoch": 0.35602094240837695, "grad_norm": 46.515716552734375, "learning_rate": 4.081113438988443e-07, "logits/chosen": -0.660742461681366, "logits/rejected": -0.6625587940216064, "logps/chosen": -270.3114013671875, "logps/rejected": -229.07958984375, "loss": 17.5632, "nll_loss": 0.9593551754951477, "rewards/accuracies": 0.46875, "rewards/chosen": -2.7031142711639404, "rewards/margins": -0.4123184084892273, "rewards/rejected": -2.2907958030700684, "step": 170 }, { "epoch": 0.3769633507853403, "grad_norm": 44.22664260864258, "learning_rate": 3.935190552834828e-07, "logits/chosen": -0.6569573879241943, "logits/rejected": -0.672781229019165, "logps/chosen": -266.2065734863281, "logps/rejected": -238.51296997070312, "loss": 16.9221, "nll_loss": 0.986883282661438, "rewards/accuracies": 0.46875, "rewards/chosen": -2.6620657444000244, "rewards/margins": -0.27693620324134827, "rewards/rejected": -2.385129451751709, "step": 180 }, { "epoch": 0.39790575916230364, "grad_norm": 43.466007232666016, "learning_rate": 3.781574579820464e-07, "logits/chosen": -0.6655328273773193, "logits/rejected": -0.683971107006073, "logps/chosen": -261.59442138671875, "logps/rejected": -244.1781768798828, "loss": 17.1813, "nll_loss": 0.9573402404785156, "rewards/accuracies": 0.5, "rewards/chosen": -2.6159441471099854, "rewards/margins": -0.17416232824325562, "rewards/rejected": -2.441781520843506, "step": 190 }, { "epoch": 0.418848167539267, "grad_norm": 46.04378128051758, "learning_rate": 3.621088951385353e-07, "logits/chosen": -0.6769067645072937, "logits/rejected": -0.6942776441574097, "logps/chosen": -235.3308563232422, "logps/rejected": -231.94094848632812, "loss": 17.0014, "nll_loss": 0.9534599184989929, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -2.353308916091919, "rewards/margins": -0.03389930725097656, "rewards/rejected": -2.3194096088409424, "step": 200 }, { "epoch": 0.418848167539267, "eval_logits/chosen": -0.6766718626022339, "eval_logits/rejected": -0.6742084622383118, "eval_logps/chosen": -270.51043701171875, "eval_logps/rejected": -255.9008331298828, "eval_loss": 2.0830676555633545, "eval_nll_loss": 0.9401236176490784, "eval_rewards/accuracies": 0.5019999742507935, "eval_rewards/chosen": -2.7051045894622803, "eval_rewards/margins": -0.1460963785648346, "eval_rewards/rejected": -2.5590081214904785, "eval_runtime": 40.7467, "eval_samples_per_second": 49.084, "eval_steps_per_second": 3.068, "step": 200 }, { "epoch": 0.4397905759162304, "grad_norm": 43.57101058959961, "learning_rate": 3.454593922550693e-07, "logits/chosen": -0.6506687998771667, "logits/rejected": -0.6705228090286255, "logps/chosen": -269.6361389160156, "logps/rejected": -255.491943359375, "loss": 17.0614, "nll_loss": 0.9551501274108887, "rewards/accuracies": 0.4593749940395355, "rewards/chosen": -2.696361541748047, "rewards/margins": -0.14144185185432434, "rewards/rejected": -2.554919481277466, "step": 210 }, { "epoch": 0.4607329842931937, "grad_norm": 48.80193328857422, "learning_rate": 3.2829819606729477e-07, "logits/chosen": -0.6907462477684021, "logits/rejected": -0.6869294047355652, "logps/chosen": -264.3199157714844, "logps/rejected": -263.21435546875, "loss": 17.3722, "nll_loss": 1.016068696975708, "rewards/accuracies": 0.5093749761581421, "rewards/chosen": -2.6431994438171387, "rewards/margins": -0.01105602364987135, "rewards/rejected": -2.632143020629883, "step": 220 }, { "epoch": 0.4816753926701571, "grad_norm": 49.72327423095703, "learning_rate": 3.1071729615293424e-07, "logits/chosen": -0.6712943315505981, "logits/rejected": -0.6651626825332642, "logps/chosen": -258.03948974609375, "logps/rejected": -247.8812713623047, "loss": 16.7045, "nll_loss": 0.9863218069076538, "rewards/accuracies": 0.5, "rewards/chosen": -2.5803945064544678, "rewards/margins": -0.10158199071884155, "rewards/rejected": -2.4788126945495605, "step": 230 }, { "epoch": 0.5026178010471204, "grad_norm": 47.963985443115234, "learning_rate": 2.9281093183781403e-07, "logits/chosen": -0.6394098997116089, "logits/rejected": -0.6638405323028564, "logps/chosen": -253.6232452392578, "logps/rejected": -248.09030151367188, "loss": 17.0898, "nll_loss": 0.9433158040046692, "rewards/accuracies": 0.46562498807907104, "rewards/chosen": -2.5362324714660645, "rewards/margins": -0.05532960966229439, "rewards/rejected": -2.480902910232544, "step": 240 }, { "epoch": 0.5235602094240838, "grad_norm": 47.86073303222656, "learning_rate": 2.7467508704251135e-07, "logits/chosen": -0.6551315784454346, "logits/rejected": -0.6670433282852173, "logps/chosen": -270.5923156738281, "logps/rejected": -261.06988525390625, "loss": 17.1281, "nll_loss": 0.9717443585395813, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": -2.705922842025757, "rewards/margins": -0.09522420912981033, "rewards/rejected": -2.610698938369751, "step": 250 }, { "epoch": 0.5445026178010471, "grad_norm": 47.56459045410156, "learning_rate": 2.5640697577740815e-07, "logits/chosen": -0.6732845306396484, "logits/rejected": -0.6699023842811584, "logps/chosen": -270.72198486328125, "logps/rejected": -260.89947509765625, "loss": 17.1471, "nll_loss": 0.9890007972717285, "rewards/accuracies": 0.4781250059604645, "rewards/chosen": -2.7072196006774902, "rewards/margins": -0.09822509437799454, "rewards/rejected": -2.608994245529175, "step": 260 }, { "epoch": 0.5654450261780105, "grad_norm": 48.21664810180664, "learning_rate": 2.381045210440644e-07, "logits/chosen": -0.6522115468978882, "logits/rejected": -0.6585103273391724, "logps/chosen": -263.1189880371094, "logps/rejected": -262.07476806640625, "loss": 16.5867, "nll_loss": 0.9758440852165222, "rewards/accuracies": 0.515625, "rewards/chosen": -2.6311895847320557, "rewards/margins": -0.010442105121910572, "rewards/rejected": -2.6207478046417236, "step": 270 }, { "epoch": 0.5863874345549738, "grad_norm": 48.17446517944336, "learning_rate": 2.1986582993616925e-07, "logits/chosen": -0.6916307210922241, "logits/rejected": -0.6949232816696167, "logps/chosen": -283.92138671875, "logps/rejected": -252.03695678710938, "loss": 16.3066, "nll_loss": 0.9775869250297546, "rewards/accuracies": 0.4906249940395355, "rewards/chosen": -2.8392136096954346, "rewards/margins": -0.31884413957595825, "rewards/rejected": -2.5203697681427, "step": 280 }, { "epoch": 0.6073298429319371, "grad_norm": 50.038490295410156, "learning_rate": 2.0178866775369774e-07, "logits/chosen": -0.6787586212158203, "logits/rejected": -0.6551983952522278, "logps/chosen": -288.01885986328125, "logps/rejected": -289.47210693359375, "loss": 17.0343, "nll_loss": 0.9661861658096313, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -2.880188465118408, "rewards/margins": 0.014532634988427162, "rewards/rejected": -2.8947207927703857, "step": 290 }, { "epoch": 0.6282722513089005, "grad_norm": 51.18766784667969, "learning_rate": 1.839699339491937e-07, "logits/chosen": -0.6984601616859436, "logits/rejected": -0.6806785464286804, "logps/chosen": -260.677001953125, "logps/rejected": -259.806640625, "loss": 16.2516, "nll_loss": 1.0030128955841064, "rewards/accuracies": 0.5625, "rewards/chosen": -2.6067698001861572, "rewards/margins": -0.008703561499714851, "rewards/rejected": -2.5980663299560547, "step": 300 }, { "epoch": 0.6492146596858639, "grad_norm": 52.35702896118164, "learning_rate": 1.6650514271527465e-07, "logits/chosen": -0.6617860198020935, "logits/rejected": -0.6756520867347717, "logps/chosen": -277.81988525390625, "logps/rejected": -275.82440185546875, "loss": 16.6359, "nll_loss": 0.9435958862304688, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": -2.778198719024658, "rewards/margins": -0.01995471678674221, "rewards/rejected": -2.7582437992095947, "step": 310 }, { "epoch": 0.6701570680628273, "grad_norm": 54.676971435546875, "learning_rate": 1.4948791099758052e-07, "logits/chosen": -0.6700472235679626, "logits/rejected": -0.6831429600715637, "logps/chosen": -268.69464111328125, "logps/rejected": -261.7899169921875, "loss": 17.0965, "nll_loss": 0.9783571362495422, "rewards/accuracies": 0.534375011920929, "rewards/chosen": -2.686946392059326, "rewards/margins": -0.06904693692922592, "rewards/rejected": -2.6178994178771973, "step": 320 }, { "epoch": 0.6910994764397905, "grad_norm": 53.3182487487793, "learning_rate": 1.3300945667758012e-07, "logits/chosen": -0.6883733868598938, "logits/rejected": -0.6983016133308411, "logps/chosen": -267.2320861816406, "logps/rejected": -268.79241943359375, "loss": 16.5789, "nll_loss": 0.9990630149841309, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -2.672320604324341, "rewards/margins": 0.015603733249008656, "rewards/rejected": -2.68792462348938, "step": 330 }, { "epoch": 0.7120418848167539, "grad_norm": 60.07493591308594, "learning_rate": 1.1715810961514072e-07, "logits/chosen": -0.6815968155860901, "logits/rejected": -0.684557318687439, "logps/chosen": -242.76406860351562, "logps/rejected": -240.29312133789062, "loss": 16.7382, "nll_loss": 0.9848185777664185, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": -2.427640676498413, "rewards/margins": -0.024709587916731834, "rewards/rejected": -2.402930736541748, "step": 340 }, { "epoch": 0.7329842931937173, "grad_norm": 65.03939056396484, "learning_rate": 1.0201883817182949e-07, "logits/chosen": -0.7134624719619751, "logits/rejected": -0.7202504277229309, "logps/chosen": -281.94329833984375, "logps/rejected": -274.9791564941406, "loss": 16.9451, "nll_loss": 1.0326627492904663, "rewards/accuracies": 0.46562498807907104, "rewards/chosen": -2.8194327354431152, "rewards/margins": -0.06964141130447388, "rewards/rejected": -2.7497916221618652, "step": 350 }, { "epoch": 0.7539267015706806, "grad_norm": 61.326541900634766, "learning_rate": 8.76727937529367e-08, "logits/chosen": -0.6863014101982117, "logits/rejected": -0.6968516111373901, "logps/chosen": -264.82586669921875, "logps/rejected": -264.02423095703125, "loss": 16.3796, "nll_loss": 0.9723429679870605, "rewards/accuracies": 0.5218750238418579, "rewards/chosen": -2.648258686065674, "rewards/margins": -0.008016402833163738, "rewards/rejected": -2.640242338180542, "step": 360 }, { "epoch": 0.774869109947644, "grad_norm": 63.97214889526367, "learning_rate": 7.419687580962222e-08, "logits/chosen": -0.7269243001937866, "logits/rejected": -0.7250298261642456, "logps/chosen": -285.830078125, "logps/rejected": -268.97650146484375, "loss": 16.7584, "nll_loss": 1.045341968536377, "rewards/accuracies": 0.5093749761581421, "rewards/chosen": -2.858299970626831, "rewards/margins": -0.16853561997413635, "rewards/rejected": -2.6897647380828857, "step": 370 }, { "epoch": 0.7958115183246073, "grad_norm": 69.49169921875, "learning_rate": 6.166331963291519e-08, "logits/chosen": -0.6884605884552002, "logits/rejected": -0.6921494603157043, "logps/chosen": -285.6980895996094, "logps/rejected": -267.3122863769531, "loss": 17.1889, "nll_loss": 0.9817717671394348, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -2.856981039047241, "rewards/margins": -0.18385852873325348, "rewards/rejected": -2.6731224060058594, "step": 380 }, { "epoch": 0.8167539267015707, "grad_norm": 51.80916213989258, "learning_rate": 5.013930914912476e-08, "logits/chosen": -0.7183871269226074, "logits/rejected": -0.7136878967285156, "logps/chosen": -279.07049560546875, "logps/rejected": -281.7729797363281, "loss": 15.9599, "nll_loss": 0.9877471923828125, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": -2.7907042503356934, "rewards/margins": 0.027025192975997925, "rewards/rejected": -2.8177294731140137, "step": 390 }, { "epoch": 0.837696335078534, "grad_norm": 51.51334762573242, "learning_rate": 3.968661679220467e-08, "logits/chosen": -0.6999576091766357, "logits/rejected": -0.709846019744873, "logps/chosen": -280.9571228027344, "logps/rejected": -258.52105712890625, "loss": 16.5359, "nll_loss": 1.0165809392929077, "rewards/accuracies": 0.46875, "rewards/chosen": -2.8095710277557373, "rewards/margins": -0.2243608683347702, "rewards/rejected": -2.585210084915161, "step": 400 }, { "epoch": 0.837696335078534, "eval_logits/chosen": -0.7198893427848816, "eval_logits/rejected": -0.717597246170044, "eval_logps/chosen": -272.6577453613281, "eval_logps/rejected": -266.8027038574219, "eval_loss": 2.0329947471618652, "eval_nll_loss": 0.9492927193641663, "eval_rewards/accuracies": 0.515999972820282, "eval_rewards/chosen": -2.7265775203704834, "eval_rewards/margins": -0.05855049937963486, "eval_rewards/rejected": -2.668027400970459, "eval_runtime": 40.6786, "eval_samples_per_second": 49.166, "eval_steps_per_second": 3.073, "step": 400 }, { "epoch": 0.8586387434554974, "grad_norm": 68.03474426269531, "learning_rate": 3.036127238347164e-08, "logits/chosen": -0.6865943670272827, "logits/rejected": -0.6730343699455261, "logps/chosen": -275.6883239746094, "logps/rejected": -273.2828674316406, "loss": 16.7848, "nll_loss": 0.9662915468215942, "rewards/accuracies": 0.5, "rewards/chosen": -2.756883144378662, "rewards/margins": -0.024054694920778275, "rewards/rejected": -2.7328288555145264, "step": 410 }, { "epoch": 0.8795811518324608, "grad_norm": 61.58699417114258, "learning_rate": 2.2213262793589482e-08, "logits/chosen": -0.6929936408996582, "logits/rejected": -0.6882384419441223, "logps/chosen": -292.38946533203125, "logps/rejected": -280.07513427734375, "loss": 16.3599, "nll_loss": 0.9645106196403503, "rewards/accuracies": 0.5, "rewards/chosen": -2.9238946437835693, "rewards/margins": -0.12314357608556747, "rewards/rejected": -2.800751209259033, "step": 420 }, { "epoch": 0.900523560209424, "grad_norm": 49.81018829345703, "learning_rate": 1.5286263996730026e-08, "logits/chosen": -0.689557671546936, "logits/rejected": -0.7012760639190674, "logps/chosen": -279.8668518066406, "logps/rejected": -269.4853210449219, "loss": 16.75, "nll_loss": 0.9698156118392944, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": -2.7986679077148438, "rewards/margins": -0.10381509363651276, "rewards/rejected": -2.6948530673980713, "step": 430 }, { "epoch": 0.9214659685863874, "grad_norm": 58.9363899230957, "learning_rate": 9.617406953185136e-09, "logits/chosen": -0.6873979568481445, "logits/rejected": -0.7001478672027588, "logps/chosen": -281.0133361816406, "logps/rejected": -289.45672607421875, "loss": 16.5338, "nll_loss": 0.9704917073249817, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -2.810133457183838, "rewards/margins": 0.08443373441696167, "rewards/rejected": -2.8945672512054443, "step": 440 }, { "epoch": 0.9424083769633508, "grad_norm": 54.53861999511719, "learning_rate": 5.2370785753763356e-09, "logits/chosen": -0.7115738987922668, "logits/rejected": -0.7190148830413818, "logps/chosen": -279.23431396484375, "logps/rejected": -262.49847412109375, "loss": 16.6766, "nll_loss": 0.9821802377700806, "rewards/accuracies": 0.515625, "rewards/chosen": -2.7923433780670166, "rewards/margins": -0.16735850274562836, "rewards/rejected": -2.6249847412109375, "step": 450 }, { "epoch": 0.9633507853403142, "grad_norm": 63.032047271728516, "learning_rate": 2.168758844148272e-09, "logits/chosen": -0.7155815958976746, "logits/rejected": -0.7291616797447205, "logps/chosen": -297.2403869628906, "logps/rejected": -281.8386535644531, "loss": 16.78, "nll_loss": 1.0376783609390259, "rewards/accuracies": 0.49687498807907104, "rewards/chosen": -2.9724037647247314, "rewards/margins": -0.15401716530323029, "rewards/rejected": -2.8183865547180176, "step": 460 }, { "epoch": 0.9842931937172775, "grad_norm": 56.330345153808594, "learning_rate": 4.288949484559934e-10, "logits/chosen": -0.6912795305252075, "logits/rejected": -0.6842938661575317, "logps/chosen": -260.95965576171875, "logps/rejected": -258.5793762207031, "loss": 17.0646, "nll_loss": 0.9770845174789429, "rewards/accuracies": 0.528124988079071, "rewards/chosen": -2.6095967292785645, "rewards/margins": -0.02380307950079441, "rewards/rejected": -2.5857937335968018, "step": 470 }, { "epoch": 0.9989528795811519, "step": 477, "total_flos": 0.0, "train_loss": 17.025300079921507, "train_runtime": 4689.5603, "train_samples_per_second": 13.036, "train_steps_per_second": 0.102 } ], "logging_steps": 10, "max_steps": 477, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }