779 lines
26 KiB
JSON
779 lines
26 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.9984301412872841,
|
|
"eval_steps": 500,
|
|
"global_step": 477,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0020931449502878076,
|
|
"grad_norm": 4.5917708857834985,
|
|
"learning_rate": 1.0416666666666666e-08,
|
|
"logits/chosen": -0.8526347279548645,
|
|
"logits/rejected": -0.7768423557281494,
|
|
"logps/chosen": -363.13519287109375,
|
|
"logps/rejected": -364.9631042480469,
|
|
"loss": 0.6931,
|
|
"rewards/accuracies": 0.0,
|
|
"rewards/chosen": 0.0,
|
|
"rewards/margins": 0.0,
|
|
"rewards/rejected": 0.0,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.020931449502878074,
|
|
"grad_norm": 4.404674449441554,
|
|
"learning_rate": 1.0416666666666667e-07,
|
|
"logits/chosen": -0.7482305765151978,
|
|
"logits/rejected": -0.7081854343414307,
|
|
"logps/chosen": -311.2024841308594,
|
|
"logps/rejected": -284.1365966796875,
|
|
"loss": 0.6931,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": 2.1014602680224925e-05,
|
|
"rewards/margins": 8.458160300506279e-05,
|
|
"rewards/rejected": -6.356705853249878e-05,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.04186289900575615,
|
|
"grad_norm": 4.2921416180433765,
|
|
"learning_rate": 2.0833333333333333e-07,
|
|
"logits/chosen": -0.7403894066810608,
|
|
"logits/rejected": -0.6793709993362427,
|
|
"logps/chosen": -324.6893005371094,
|
|
"logps/rejected": -290.2327575683594,
|
|
"loss": 0.693,
|
|
"rewards/accuracies": 0.4749999940395355,
|
|
"rewards/chosen": 0.0014148516347631812,
|
|
"rewards/margins": 0.0002438486844766885,
|
|
"rewards/rejected": 0.0011710028629750013,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.06279434850863422,
|
|
"grad_norm": 4.218970991450984,
|
|
"learning_rate": 3.1249999999999997e-07,
|
|
"logits/chosen": -0.7561457753181458,
|
|
"logits/rejected": -0.7098526954650879,
|
|
"logps/chosen": -295.118408203125,
|
|
"logps/rejected": -255.83407592773438,
|
|
"loss": 0.6917,
|
|
"rewards/accuracies": 0.53125,
|
|
"rewards/chosen": 0.008535891771316528,
|
|
"rewards/margins": 0.0028298485558480024,
|
|
"rewards/rejected": 0.005706042982637882,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.0837257980115123,
|
|
"grad_norm": 3.614368719783126,
|
|
"learning_rate": 4.1666666666666667e-07,
|
|
"logits/chosen": -0.7299980521202087,
|
|
"logits/rejected": -0.6768942475318909,
|
|
"logps/chosen": -267.0862121582031,
|
|
"logps/rejected": -267.53863525390625,
|
|
"loss": 0.6885,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": 0.020161841064691544,
|
|
"rewards/margins": 0.008152564987540245,
|
|
"rewards/rejected": 0.012009273283183575,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.10465724751439037,
|
|
"grad_norm": 3.6040620075273546,
|
|
"learning_rate": 4.999731868769026e-07,
|
|
"logits/chosen": -0.7151128053665161,
|
|
"logits/rejected": -0.6647322177886963,
|
|
"logps/chosen": -296.5942077636719,
|
|
"logps/rejected": -277.5081787109375,
|
|
"loss": 0.6833,
|
|
"rewards/accuracies": 0.6187499761581421,
|
|
"rewards/chosen": 0.0398605577647686,
|
|
"rewards/margins": 0.02836265228688717,
|
|
"rewards/rejected": 0.011497899889945984,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.12558869701726844,
|
|
"grad_norm": 3.5922190973220163,
|
|
"learning_rate": 4.990353313429303e-07,
|
|
"logits/chosen": -0.7289865016937256,
|
|
"logits/rejected": -0.6785635352134705,
|
|
"logps/chosen": -262.1878967285156,
|
|
"logps/rejected": -253.5371856689453,
|
|
"loss": 0.6783,
|
|
"rewards/accuracies": 0.6812499761581421,
|
|
"rewards/chosen": 0.0495939627289772,
|
|
"rewards/margins": 0.042832277715206146,
|
|
"rewards/rejected": 0.0067616915330290794,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.14652014652014653,
|
|
"grad_norm": 3.6371057840364927,
|
|
"learning_rate": 4.967625656594781e-07,
|
|
"logits/chosen": -0.6846636533737183,
|
|
"logits/rejected": -0.6486319303512573,
|
|
"logps/chosen": -304.8815002441406,
|
|
"logps/rejected": -293.3005065917969,
|
|
"loss": 0.6683,
|
|
"rewards/accuracies": 0.606249988079071,
|
|
"rewards/chosen": 0.014333389699459076,
|
|
"rewards/margins": 0.050421230494976044,
|
|
"rewards/rejected": -0.036087844520807266,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.1674515960230246,
|
|
"grad_norm": 4.641698206330642,
|
|
"learning_rate": 4.93167072587771e-07,
|
|
"logits/chosen": -0.7803142070770264,
|
|
"logits/rejected": -0.6576212048530579,
|
|
"logps/chosen": -338.9702453613281,
|
|
"logps/rejected": -270.46124267578125,
|
|
"loss": 0.6684,
|
|
"rewards/accuracies": 0.643750011920929,
|
|
"rewards/chosen": -0.041751302778720856,
|
|
"rewards/margins": 0.06293781846761703,
|
|
"rewards/rejected": -0.10468912124633789,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.18838304552590268,
|
|
"grad_norm": 5.809449166665223,
|
|
"learning_rate": 4.882681251368548e-07,
|
|
"logits/chosen": -0.730857253074646,
|
|
"logits/rejected": -0.6790161728858948,
|
|
"logps/chosen": -270.4856872558594,
|
|
"logps/rejected": -280.201171875,
|
|
"loss": 0.6533,
|
|
"rewards/accuracies": 0.65625,
|
|
"rewards/chosen": -0.05735975503921509,
|
|
"rewards/margins": 0.0867304801940918,
|
|
"rewards/rejected": -0.14409023523330688,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.20931449502878074,
|
|
"grad_norm": 6.629060471777615,
|
|
"learning_rate": 4.820919832540181e-07,
|
|
"logits/chosen": -0.819484531879425,
|
|
"logits/rejected": -0.7444473505020142,
|
|
"logps/chosen": -320.7297058105469,
|
|
"logps/rejected": -315.49786376953125,
|
|
"loss": 0.6452,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": -0.106062151491642,
|
|
"rewards/margins": 0.15797743201255798,
|
|
"rewards/rejected": -0.2640395760536194,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.2302459445316588,
|
|
"grad_norm": 8.885427777088127,
|
|
"learning_rate": 4.7467175306295647e-07,
|
|
"logits/chosen": -0.7485495805740356,
|
|
"logits/rejected": -0.6900595426559448,
|
|
"logps/chosen": -313.8240966796875,
|
|
"logps/rejected": -310.7196960449219,
|
|
"loss": 0.6442,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": -0.1286977082490921,
|
|
"rewards/margins": 0.1550484299659729,
|
|
"rewards/rejected": -0.2837461233139038,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.25117739403453687,
|
|
"grad_norm": 10.779857080720818,
|
|
"learning_rate": 4.6604720940421207e-07,
|
|
"logits/chosen": -0.6856316328048706,
|
|
"logits/rejected": -0.6849483251571655,
|
|
"logps/chosen": -303.8964538574219,
|
|
"logps/rejected": -321.4309387207031,
|
|
"loss": 0.6199,
|
|
"rewards/accuracies": 0.706250011920929,
|
|
"rewards/chosen": -0.2916944622993469,
|
|
"rewards/margins": 0.21943287551403046,
|
|
"rewards/rejected": -0.5111273527145386,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.272108843537415,
|
|
"grad_norm": 11.982309184139016,
|
|
"learning_rate": 4.5626458262912735e-07,
|
|
"logits/chosen": -0.6801525950431824,
|
|
"logits/rejected": -0.6393054723739624,
|
|
"logps/chosen": -319.20672607421875,
|
|
"logps/rejected": -333.1614685058594,
|
|
"loss": 0.609,
|
|
"rewards/accuracies": 0.6625000238418579,
|
|
"rewards/chosen": -0.27636662125587463,
|
|
"rewards/margins": 0.2492021769285202,
|
|
"rewards/rejected": -0.5255688428878784,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.29304029304029305,
|
|
"grad_norm": 19.151178395769573,
|
|
"learning_rate": 4.453763107901675e-07,
|
|
"logits/chosen": -0.7184507846832275,
|
|
"logits/rejected": -0.6374621987342834,
|
|
"logps/chosen": -356.2397766113281,
|
|
"logps/rejected": -337.32354736328125,
|
|
"loss": 0.6109,
|
|
"rewards/accuracies": 0.65625,
|
|
"rewards/chosen": -0.3082619607448578,
|
|
"rewards/margins": 0.31162941455841064,
|
|
"rewards/rejected": -0.6198914051055908,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.3139717425431711,
|
|
"grad_norm": 13.824909994267095,
|
|
"learning_rate": 4.3344075855595097e-07,
|
|
"logits/chosen": -0.6901696920394897,
|
|
"logits/rejected": -0.6279430389404297,
|
|
"logps/chosen": -353.95184326171875,
|
|
"logps/rejected": -346.9781494140625,
|
|
"loss": 0.6132,
|
|
"rewards/accuracies": 0.675000011920929,
|
|
"rewards/chosen": -0.6322077512741089,
|
|
"rewards/margins": 0.2923499643802643,
|
|
"rewards/rejected": -0.9245575666427612,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.3349031920460492,
|
|
"grad_norm": 13.009971235384292,
|
|
"learning_rate": 4.2052190435769554e-07,
|
|
"logits/chosen": -0.7091597318649292,
|
|
"logits/rejected": -0.6455188989639282,
|
|
"logps/chosen": -340.92657470703125,
|
|
"logps/rejected": -347.0225830078125,
|
|
"loss": 0.603,
|
|
"rewards/accuracies": 0.706250011920929,
|
|
"rewards/chosen": -0.5523598790168762,
|
|
"rewards/margins": 0.33642885088920593,
|
|
"rewards/rejected": -0.8887887001037598,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.35583464154892724,
|
|
"grad_norm": 13.829339790258013,
|
|
"learning_rate": 4.0668899744407567e-07,
|
|
"logits/chosen": -0.6223039627075195,
|
|
"logits/rejected": -0.5784906148910522,
|
|
"logps/chosen": -351.1839904785156,
|
|
"logps/rejected": -358.60479736328125,
|
|
"loss": 0.5953,
|
|
"rewards/accuracies": 0.6625000238418579,
|
|
"rewards/chosen": -0.8050142526626587,
|
|
"rewards/margins": 0.3386740982532501,
|
|
"rewards/rejected": -1.143688440322876,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.37676609105180536,
|
|
"grad_norm": 20.849086588528724,
|
|
"learning_rate": 3.920161866827889e-07,
|
|
"logits/chosen": -0.6424199342727661,
|
|
"logits/rejected": -0.5930343270301819,
|
|
"logps/chosen": -358.6197204589844,
|
|
"logps/rejected": -367.137451171875,
|
|
"loss": 0.5849,
|
|
"rewards/accuracies": 0.6625000238418579,
|
|
"rewards/chosen": -0.8172851800918579,
|
|
"rewards/margins": 0.3052961528301239,
|
|
"rewards/rejected": -1.1225812435150146,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.3976975405546834,
|
|
"grad_norm": 17.753231429350524,
|
|
"learning_rate": 3.765821230985757e-07,
|
|
"logits/chosen": -0.6292937994003296,
|
|
"logits/rejected": -0.615179717540741,
|
|
"logps/chosen": -343.19952392578125,
|
|
"logps/rejected": -375.33929443359375,
|
|
"loss": 0.5817,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": -0.68468177318573,
|
|
"rewards/margins": 0.34494417905807495,
|
|
"rewards/rejected": -1.0296258926391602,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.4186289900575615,
|
|
"grad_norm": 28.151101969379706,
|
|
"learning_rate": 3.604695382782159e-07,
|
|
"logits/chosen": -0.5903419256210327,
|
|
"logits/rejected": -0.5930633544921875,
|
|
"logps/chosen": -360.65179443359375,
|
|
"logps/rejected": -412.225830078125,
|
|
"loss": 0.5821,
|
|
"rewards/accuracies": 0.706250011920929,
|
|
"rewards/chosen": -1.0000778436660767,
|
|
"rewards/margins": 0.33244088292121887,
|
|
"rewards/rejected": -1.3325188159942627,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.43956043956043955,
|
|
"grad_norm": 17.118814416105273,
|
|
"learning_rate": 3.4376480090239047e-07,
|
|
"logits/chosen": -0.6688283085823059,
|
|
"logits/rejected": -0.5644041895866394,
|
|
"logps/chosen": -433.589599609375,
|
|
"logps/rejected": -432.74993896484375,
|
|
"loss": 0.5853,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -1.17396080493927,
|
|
"rewards/margins": 0.4916624426841736,
|
|
"rewards/rejected": -1.6656233072280884,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.4604918890633176,
|
|
"grad_norm": 23.06196255102623,
|
|
"learning_rate": 3.265574537815398e-07,
|
|
"logits/chosen": -0.5818850994110107,
|
|
"logits/rejected": -0.5700303316116333,
|
|
"logps/chosen": -351.98638916015625,
|
|
"logps/rejected": -410.9193420410156,
|
|
"loss": 0.5704,
|
|
"rewards/accuracies": 0.6937500238418579,
|
|
"rewards/chosen": -1.0643993616104126,
|
|
"rewards/margins": 0.5418257713317871,
|
|
"rewards/rejected": -1.6062252521514893,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.48142333856619574,
|
|
"grad_norm": 21.04733996523729,
|
|
"learning_rate": 3.0893973387735683e-07,
|
|
"logits/chosen": -0.6803761720657349,
|
|
"logits/rejected": -0.6139528751373291,
|
|
"logps/chosen": -355.9638671875,
|
|
"logps/rejected": -392.5525207519531,
|
|
"loss": 0.5753,
|
|
"rewards/accuracies": 0.7437499761581421,
|
|
"rewards/chosen": -0.8678590059280396,
|
|
"rewards/margins": 0.6038464307785034,
|
|
"rewards/rejected": -1.4717055559158325,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.5023547880690737,
|
|
"grad_norm": 18.458075241435953,
|
|
"learning_rate": 2.910060778827554e-07,
|
|
"logits/chosen": -0.6669884920120239,
|
|
"logits/rejected": -0.5953234434127808,
|
|
"logps/chosen": -363.3609313964844,
|
|
"logps/rejected": -395.0166320800781,
|
|
"loss": 0.5445,
|
|
"rewards/accuracies": 0.7124999761581421,
|
|
"rewards/chosen": -0.6804043054580688,
|
|
"rewards/margins": 0.501660168170929,
|
|
"rewards/rejected": -1.1820645332336426,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.5232862375719518,
|
|
"grad_norm": 24.001388696637466,
|
|
"learning_rate": 2.7285261601056697e-07,
|
|
"logits/chosen": -0.6821622252464294,
|
|
"logits/rejected": -0.5736308693885803,
|
|
"logps/chosen": -392.59375,
|
|
"logps/rejected": -420.7662658691406,
|
|
"loss": 0.5592,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -1.037414789199829,
|
|
"rewards/margins": 0.6777431964874268,
|
|
"rewards/rejected": -1.7151581048965454,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.54421768707483,
|
|
"grad_norm": 20.80336944317341,
|
|
"learning_rate": 2.5457665670441937e-07,
|
|
"logits/chosen": -0.6666806936264038,
|
|
"logits/rejected": -0.6545027494430542,
|
|
"logps/chosen": -379.15423583984375,
|
|
"logps/rejected": -414.10302734375,
|
|
"loss": 0.549,
|
|
"rewards/accuracies": 0.668749988079071,
|
|
"rewards/chosen": -1.1116763353347778,
|
|
"rewards/margins": 0.48681873083114624,
|
|
"rewards/rejected": -1.5984950065612793,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.565149136577708,
|
|
"grad_norm": 18.999186941850777,
|
|
"learning_rate": 2.3627616503391812e-07,
|
|
"logits/chosen": -0.6578361988067627,
|
|
"logits/rejected": -0.6166576147079468,
|
|
"logps/chosen": -415.3619079589844,
|
|
"logps/rejected": -453.8072814941406,
|
|
"loss": 0.5596,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": -1.0673983097076416,
|
|
"rewards/margins": 0.6960801482200623,
|
|
"rewards/rejected": -1.7634785175323486,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.5860805860805861,
|
|
"grad_norm": 25.011257430836675,
|
|
"learning_rate": 2.1804923757009882e-07,
|
|
"logits/chosen": -0.5656932592391968,
|
|
"logits/rejected": -0.5232654809951782,
|
|
"logps/chosen": -409.4795837402344,
|
|
"logps/rejected": -441.3401794433594,
|
|
"loss": 0.5636,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": -1.4023996591567993,
|
|
"rewards/margins": 0.5914163589477539,
|
|
"rewards/rejected": -1.9938161373138428,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.6070120355834642,
|
|
"grad_norm": 16.649025738470474,
|
|
"learning_rate": 1.9999357655598891e-07,
|
|
"logits/chosen": -0.617931067943573,
|
|
"logits/rejected": -0.5761314034461975,
|
|
"logps/chosen": -406.4532775878906,
|
|
"logps/rejected": -464.67822265625,
|
|
"loss": 0.5591,
|
|
"rewards/accuracies": 0.71875,
|
|
"rewards/chosen": -1.4842880964279175,
|
|
"rewards/margins": 0.5774600505828857,
|
|
"rewards/rejected": -2.0617482662200928,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.6279434850863422,
|
|
"grad_norm": 16.730004932577106,
|
|
"learning_rate": 1.8220596619089573e-07,
|
|
"logits/chosen": -0.6562352180480957,
|
|
"logits/rejected": -0.5790780186653137,
|
|
"logps/chosen": -451.573486328125,
|
|
"logps/rejected": -457.2960510253906,
|
|
"loss": 0.5394,
|
|
"rewards/accuracies": 0.668749988079071,
|
|
"rewards/chosen": -1.1794800758361816,
|
|
"rewards/margins": 0.5321540832519531,
|
|
"rewards/rejected": -1.7116340398788452,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.6488749345892203,
|
|
"grad_norm": 20.824128290150536,
|
|
"learning_rate": 1.647817538357072e-07,
|
|
"logits/chosen": -0.6351410150527954,
|
|
"logits/rejected": -0.5707007050514221,
|
|
"logps/chosen": -443.58978271484375,
|
|
"logps/rejected": -464.4762268066406,
|
|
"loss": 0.5379,
|
|
"rewards/accuracies": 0.737500011920929,
|
|
"rewards/chosen": -1.2887599468231201,
|
|
"rewards/margins": 0.7534160614013672,
|
|
"rewards/rejected": -2.042175769805908,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.6698063840920984,
|
|
"grad_norm": 21.443811248225554,
|
|
"learning_rate": 1.478143389201113e-07,
|
|
"logits/chosen": -0.652029275894165,
|
|
"logits/rejected": -0.5766469240188599,
|
|
"logps/chosen": -430.428466796875,
|
|
"logps/rejected": -456.797607421875,
|
|
"loss": 0.5387,
|
|
"rewards/accuracies": 0.7124999761581421,
|
|
"rewards/chosen": -1.6468877792358398,
|
|
"rewards/margins": 0.5889655351638794,
|
|
"rewards/rejected": -2.235853433609009,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.6907378335949764,
|
|
"grad_norm": 18.054876727682743,
|
|
"learning_rate": 1.3139467229135998e-07,
|
|
"logits/chosen": -0.6749883890151978,
|
|
"logits/rejected": -0.6647608876228333,
|
|
"logps/chosen": -418.3038635253906,
|
|
"logps/rejected": -472.7947692871094,
|
|
"loss": 0.5367,
|
|
"rewards/accuracies": 0.7124999761581421,
|
|
"rewards/chosen": -1.3353520631790161,
|
|
"rewards/margins": 0.6326448917388916,
|
|
"rewards/rejected": -1.9679968357086182,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.7116692830978545,
|
|
"grad_norm": 26.52212132673544,
|
|
"learning_rate": 1.1561076868822755e-07,
|
|
"logits/chosen": -0.6122914552688599,
|
|
"logits/rejected": -0.5800005793571472,
|
|
"logps/chosen": -433.57635498046875,
|
|
"logps/rejected": -465.54669189453125,
|
|
"loss": 0.5428,
|
|
"rewards/accuracies": 0.7124999761581421,
|
|
"rewards/chosen": -1.294762134552002,
|
|
"rewards/margins": 0.587591290473938,
|
|
"rewards/rejected": -1.8823535442352295,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.7326007326007326,
|
|
"grad_norm": 19.655279922879483,
|
|
"learning_rate": 1.0054723495346482e-07,
|
|
"logits/chosen": -0.6827625036239624,
|
|
"logits/rejected": -0.6138468980789185,
|
|
"logps/chosen": -397.7406921386719,
|
|
"logps/rejected": -434.5318298339844,
|
|
"loss": 0.5167,
|
|
"rewards/accuracies": 0.7124999761581421,
|
|
"rewards/chosen": -1.2331862449645996,
|
|
"rewards/margins": 0.6665691137313843,
|
|
"rewards/rejected": -1.8997554779052734,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.7535321821036107,
|
|
"grad_norm": 19.235838308842027,
|
|
"learning_rate": 8.628481651367875e-08,
|
|
"logits/chosen": -0.6631180047988892,
|
|
"logits/rejected": -0.5895651578903198,
|
|
"logps/chosen": -461.24176025390625,
|
|
"logps/rejected": -491.12176513671875,
|
|
"loss": 0.5577,
|
|
"rewards/accuracies": 0.699999988079071,
|
|
"rewards/chosen": -1.4328138828277588,
|
|
"rewards/margins": 0.6876929402351379,
|
|
"rewards/rejected": -2.120506763458252,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.7744636316064888,
|
|
"grad_norm": 19.962323367834856,
|
|
"learning_rate": 7.289996455765748e-08,
|
|
"logits/chosen": -0.6887942552566528,
|
|
"logits/rejected": -0.6230372190475464,
|
|
"logps/chosen": -414.96270751953125,
|
|
"logps/rejected": -449.39874267578125,
|
|
"loss": 0.5411,
|
|
"rewards/accuracies": 0.706250011920929,
|
|
"rewards/chosen": -1.4094620943069458,
|
|
"rewards/margins": 0.6524969935417175,
|
|
"rewards/rejected": -2.0619590282440186,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.7953950811093669,
|
|
"grad_norm": 17.262620635801852,
|
|
"learning_rate": 6.046442623320145e-08,
|
|
"logits/chosen": -0.6020098924636841,
|
|
"logits/rejected": -0.6000246405601501,
|
|
"logps/chosen": -398.9418029785156,
|
|
"logps/rejected": -501.1026916503906,
|
|
"loss": 0.5387,
|
|
"rewards/accuracies": 0.7437499761581421,
|
|
"rewards/chosen": -1.440071940422058,
|
|
"rewards/margins": 0.8659427762031555,
|
|
"rewards/rejected": -2.3060147762298584,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.8163265306122449,
|
|
"grad_norm": 23.268294087473826,
|
|
"learning_rate": 4.904486005914027e-08,
|
|
"logits/chosen": -0.7086952328681946,
|
|
"logits/rejected": -0.6402121782302856,
|
|
"logps/chosen": -475.9898376464844,
|
|
"logps/rejected": -513.5247802734375,
|
|
"loss": 0.5207,
|
|
"rewards/accuracies": 0.768750011920929,
|
|
"rewards/chosen": -1.3465341329574585,
|
|
"rewards/margins": 0.7135976552963257,
|
|
"rewards/rejected": -2.060131788253784,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.837257980115123,
|
|
"grad_norm": 20.66365114424933,
|
|
"learning_rate": 3.8702478614051345e-08,
|
|
"logits/chosen": -0.641961932182312,
|
|
"logits/rejected": -0.5901409983634949,
|
|
"logps/chosen": -395.93157958984375,
|
|
"logps/rejected": -442.00244140625,
|
|
"loss": 0.5353,
|
|
"rewards/accuracies": 0.71875,
|
|
"rewards/chosen": -1.2355709075927734,
|
|
"rewards/margins": 0.6957671642303467,
|
|
"rewards/rejected": -1.9313379526138306,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.858189429618001,
|
|
"grad_norm": 18.94839439294029,
|
|
"learning_rate": 2.9492720416985e-08,
|
|
"logits/chosen": -0.7429651021957397,
|
|
"logits/rejected": -0.6672912836074829,
|
|
"logps/chosen": -440.3075256347656,
|
|
"logps/rejected": -467.0520935058594,
|
|
"loss": 0.5506,
|
|
"rewards/accuracies": 0.731249988079071,
|
|
"rewards/chosen": -1.2028748989105225,
|
|
"rewards/margins": 0.7443949580192566,
|
|
"rewards/rejected": -1.9472697973251343,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.8791208791208791,
|
|
"grad_norm": 19.442404647939284,
|
|
"learning_rate": 2.1464952759020856e-08,
|
|
"logits/chosen": -0.597920298576355,
|
|
"logits/rejected": -0.586058497428894,
|
|
"logps/chosen": -406.3937072753906,
|
|
"logps/rejected": -479.6614685058594,
|
|
"loss": 0.5384,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": -1.3360610008239746,
|
|
"rewards/margins": 0.6579158902168274,
|
|
"rewards/rejected": -1.9939768314361572,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.9000523286237572,
|
|
"grad_norm": 19.433771609383193,
|
|
"learning_rate": 1.4662207078575684e-08,
|
|
"logits/chosen": -0.6416221857070923,
|
|
"logits/rejected": -0.5748856663703918,
|
|
"logps/chosen": -432.43292236328125,
|
|
"logps/rejected": -473.4640197753906,
|
|
"loss": 0.5268,
|
|
"rewards/accuracies": 0.6937500238418579,
|
|
"rewards/chosen": -1.289609670639038,
|
|
"rewards/margins": 0.6828420162200928,
|
|
"rewards/rejected": -1.9724515676498413,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.9209837781266352,
|
|
"grad_norm": 25.563733256044383,
|
|
"learning_rate": 9.12094829893642e-09,
|
|
"logits/chosen": -0.7145225405693054,
|
|
"logits/rejected": -0.6558529138565063,
|
|
"logps/chosen": -401.7915344238281,
|
|
"logps/rejected": -416.718994140625,
|
|
"loss": 0.5331,
|
|
"rewards/accuracies": 0.699999988079071,
|
|
"rewards/chosen": -1.2713123559951782,
|
|
"rewards/margins": 0.5642818212509155,
|
|
"rewards/rejected": -1.8355941772460938,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.9419152276295133,
|
|
"grad_norm": 18.949969564615213,
|
|
"learning_rate": 4.8708793644441086e-09,
|
|
"logits/chosen": -0.5803197622299194,
|
|
"logits/rejected": -0.5434113144874573,
|
|
"logps/chosen": -410.92730712890625,
|
|
"logps/rejected": -468.50323486328125,
|
|
"loss": 0.534,
|
|
"rewards/accuracies": 0.7124999761581421,
|
|
"rewards/chosen": -1.2884615659713745,
|
|
"rewards/margins": 0.7078900337219238,
|
|
"rewards/rejected": -1.9963515996932983,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.9628466771323915,
|
|
"grad_norm": 22.7412601308732,
|
|
"learning_rate": 1.9347820230782295e-09,
|
|
"logits/chosen": -0.6422279477119446,
|
|
"logits/rejected": -0.5603567361831665,
|
|
"logps/chosen": -407.45465087890625,
|
|
"logps/rejected": -431.9442443847656,
|
|
"loss": 0.5408,
|
|
"rewards/accuracies": 0.7437499761581421,
|
|
"rewards/chosen": -1.2375915050506592,
|
|
"rewards/margins": 0.686954140663147,
|
|
"rewards/rejected": -1.9245456457138062,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.9837781266352695,
|
|
"grad_norm": 17.505664359316036,
|
|
"learning_rate": 3.2839470889836627e-10,
|
|
"logits/chosen": -0.6509039998054504,
|
|
"logits/rejected": -0.6031205654144287,
|
|
"logps/chosen": -423.6900939941406,
|
|
"logps/rejected": -475.7867736816406,
|
|
"loss": 0.5134,
|
|
"rewards/accuracies": 0.71875,
|
|
"rewards/chosen": -1.1757431030273438,
|
|
"rewards/margins": 0.6970219612121582,
|
|
"rewards/rejected": -1.8727651834487915,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.9984301412872841,
|
|
"eval_logits/chosen": -0.5977884531021118,
|
|
"eval_logits/rejected": -0.5622259378433228,
|
|
"eval_logps/chosen": -401.6270751953125,
|
|
"eval_logps/rejected": -481.71246337890625,
|
|
"eval_loss": 0.5371974110603333,
|
|
"eval_rewards/accuracies": 0.765625,
|
|
"eval_rewards/chosen": -1.1911048889160156,
|
|
"eval_rewards/margins": 0.8110275268554688,
|
|
"eval_rewards/rejected": -2.0021324157714844,
|
|
"eval_runtime": 196.6339,
|
|
"eval_samples_per_second": 10.171,
|
|
"eval_steps_per_second": 0.163,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 0.9984301412872841,
|
|
"step": 477,
|
|
"total_flos": 0.0,
|
|
"train_loss": 0.5847830807387954,
|
|
"train_runtime": 56722.3251,
|
|
"train_samples_per_second": 1.078,
|
|
"train_steps_per_second": 0.008
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 477,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 4,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|