1128 lines
44 KiB
JSON
1128 lines
44 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.26171159382360637,
|
|
"eval_steps": 500,
|
|
"global_step": 500,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0005234231876472127,
|
|
"grad_norm": 132.6717987060547,
|
|
"learning_rate": 0.0,
|
|
"logits/chosen": -0.40118408203125,
|
|
"logits/rejected": -0.41802978515625,
|
|
"logps/chosen": -297.609375,
|
|
"logps/rejected": -247.84375,
|
|
"logps/weighted_chosen": -4.7568359375,
|
|
"logps/weighted_rejected": -3.47998046875,
|
|
"loss": 0.6914,
|
|
"rewards/accuracies": 0.0,
|
|
"rewards/chosen": 0.0,
|
|
"rewards/margins": 0.0,
|
|
"rewards/rejected": 0.0,
|
|
"rewards/weighted_accuracies": 0.0,
|
|
"rewards/weighted_chosen": 0.0,
|
|
"rewards/weighted_margins": 0.0,
|
|
"rewards/weighted_rejected": 0.0,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.005234231876472127,
|
|
"grad_norm": 226.00839233398438,
|
|
"learning_rate": 4.6875e-08,
|
|
"logits/chosen": -0.3175845742225647,
|
|
"logits/rejected": -0.3532341718673706,
|
|
"logps/chosen": -275.5841979980469,
|
|
"logps/rejected": -255.84548950195312,
|
|
"logps/weighted_chosen": -2.651665687561035,
|
|
"logps/weighted_rejected": -2.88427734375,
|
|
"loss": 0.6921,
|
|
"rewards/accuracies": 0.25,
|
|
"rewards/chosen": -0.0731336772441864,
|
|
"rewards/margins": -0.0670572891831398,
|
|
"rewards/rejected": -0.006076388992369175,
|
|
"rewards/weighted_accuracies": 0.2951388955116272,
|
|
"rewards/weighted_chosen": -0.0015835232334211469,
|
|
"rewards/weighted_margins": -0.0009324815473519266,
|
|
"rewards/weighted_rejected": -0.0006510416860692203,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.010468463752944255,
|
|
"grad_norm": 14.726158142089844,
|
|
"learning_rate": 9.895833333333332e-08,
|
|
"logits/chosen": -0.29781341552734375,
|
|
"logits/rejected": -0.3136836886405945,
|
|
"logps/chosen": -294.4296875,
|
|
"logps/rejected": -272.58984375,
|
|
"logps/weighted_chosen": -2.458728075027466,
|
|
"logps/weighted_rejected": -2.455883741378784,
|
|
"loss": 0.6924,
|
|
"rewards/accuracies": 0.30000001192092896,
|
|
"rewards/chosen": -0.14013671875,
|
|
"rewards/margins": -0.13369140028953552,
|
|
"rewards/rejected": -0.0064453124068677425,
|
|
"rewards/weighted_accuracies": 0.34062498807907104,
|
|
"rewards/weighted_chosen": -0.0013603210682049394,
|
|
"rewards/weighted_margins": -0.0010925292735919356,
|
|
"rewards/weighted_rejected": -0.0002677917364053428,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.015702695629416383,
|
|
"grad_norm": 77.6125259399414,
|
|
"learning_rate": 1.5104166666666664e-07,
|
|
"logits/chosen": -0.2918853759765625,
|
|
"logits/rejected": -0.3377639651298523,
|
|
"logps/chosen": -298.05859375,
|
|
"logps/rejected": -268.0132751464844,
|
|
"logps/weighted_chosen": -2.4350829124450684,
|
|
"logps/weighted_rejected": -2.7343993186950684,
|
|
"loss": 0.6926,
|
|
"rewards/accuracies": 0.24062499403953552,
|
|
"rewards/chosen": -0.06621094048023224,
|
|
"rewards/margins": -0.1640625,
|
|
"rewards/rejected": 0.09785155951976776,
|
|
"rewards/weighted_accuracies": 0.3187499940395355,
|
|
"rewards/weighted_chosen": 0.0014068603049963713,
|
|
"rewards/weighted_margins": -0.0015777588123455644,
|
|
"rewards/weighted_rejected": 0.0029846192337572575,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.02093692750588851,
|
|
"grad_norm": 30.666196823120117,
|
|
"learning_rate": 2.03125e-07,
|
|
"logits/chosen": -0.30072021484375,
|
|
"logits/rejected": -0.3433845639228821,
|
|
"logps/chosen": -278.68829345703125,
|
|
"logps/rejected": -253.90780639648438,
|
|
"logps/weighted_chosen": -2.506396532058716,
|
|
"logps/weighted_rejected": -2.8416504859924316,
|
|
"loss": 0.6908,
|
|
"rewards/accuracies": 0.3062500059604645,
|
|
"rewards/chosen": 0.063232421875,
|
|
"rewards/margins": 0.04838867112994194,
|
|
"rewards/rejected": 0.014843749813735485,
|
|
"rewards/weighted_accuracies": 0.40312498807907104,
|
|
"rewards/weighted_chosen": 0.0042968750931322575,
|
|
"rewards/weighted_margins": 0.0019538879860192537,
|
|
"rewards/weighted_rejected": 0.0023429871071130037,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.02617115938236064,
|
|
"grad_norm": 18.60569953918457,
|
|
"learning_rate": 2.552083333333333e-07,
|
|
"logits/chosen": -0.2819870114326477,
|
|
"logits/rejected": -0.32059136033058167,
|
|
"logps/chosen": -280.31951904296875,
|
|
"logps/rejected": -267.4359436035156,
|
|
"logps/weighted_chosen": -2.4267334938049316,
|
|
"logps/weighted_rejected": -2.529711961746216,
|
|
"loss": 0.6891,
|
|
"rewards/accuracies": 0.3187499940395355,
|
|
"rewards/chosen": -0.03535156324505806,
|
|
"rewards/margins": -0.13984374701976776,
|
|
"rewards/rejected": 0.1044921875,
|
|
"rewards/weighted_accuracies": 0.3968749940395355,
|
|
"rewards/weighted_chosen": 0.0039031982887536287,
|
|
"rewards/weighted_margins": 0.005755615420639515,
|
|
"rewards/weighted_rejected": -0.0018524170154705644,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.031405391258832765,
|
|
"grad_norm": 38.21036911010742,
|
|
"learning_rate": 3.0729166666666665e-07,
|
|
"logits/chosen": -0.31453245878219604,
|
|
"logits/rejected": -0.30809077620506287,
|
|
"logps/chosen": -277.66015625,
|
|
"logps/rejected": -261.7445373535156,
|
|
"logps/weighted_chosen": -2.8622069358825684,
|
|
"logps/weighted_rejected": -2.7553467750549316,
|
|
"loss": 0.6894,
|
|
"rewards/accuracies": 0.35624998807907104,
|
|
"rewards/chosen": 0.04150390625,
|
|
"rewards/margins": 0.08027343451976776,
|
|
"rewards/rejected": -0.03876953199505806,
|
|
"rewards/weighted_accuracies": 0.4312500059604645,
|
|
"rewards/weighted_chosen": 0.0006561279296875,
|
|
"rewards/weighted_margins": 0.006243896670639515,
|
|
"rewards/weighted_rejected": -0.005587768740952015,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.036639623135304895,
|
|
"grad_norm": 69.19047546386719,
|
|
"learning_rate": 3.59375e-07,
|
|
"logits/chosen": -0.3177490234375,
|
|
"logits/rejected": -0.3246749937534332,
|
|
"logps/chosen": -289.76251220703125,
|
|
"logps/rejected": -244.92578125,
|
|
"logps/weighted_chosen": -2.3438963890075684,
|
|
"logps/weighted_rejected": -2.7010498046875,
|
|
"loss": 0.6841,
|
|
"rewards/accuracies": 0.49687498807907104,
|
|
"rewards/chosen": 0.29765623807907104,
|
|
"rewards/margins": 0.4546875059604645,
|
|
"rewards/rejected": -0.15703125298023224,
|
|
"rewards/weighted_accuracies": 0.5406249761581421,
|
|
"rewards/weighted_chosen": 0.01530532818287611,
|
|
"rewards/weighted_margins": 0.01918792724609375,
|
|
"rewards/weighted_rejected": -0.0038825988303869963,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.04187385501177702,
|
|
"grad_norm": 51.98476791381836,
|
|
"learning_rate": 4.114583333333333e-07,
|
|
"logits/chosen": -0.2850998044013977,
|
|
"logits/rejected": -0.30662041902542114,
|
|
"logps/chosen": -289.234375,
|
|
"logps/rejected": -270.375,
|
|
"logps/weighted_chosen": -2.5325684547424316,
|
|
"logps/weighted_rejected": -2.796435594558716,
|
|
"loss": 0.6747,
|
|
"rewards/accuracies": 0.5062500238418579,
|
|
"rewards/chosen": 0.512499988079071,
|
|
"rewards/margins": 0.6001952886581421,
|
|
"rewards/rejected": -0.08769531548023224,
|
|
"rewards/weighted_accuracies": 0.5562499761581421,
|
|
"rewards/weighted_chosen": 0.036380767822265625,
|
|
"rewards/weighted_margins": 0.04396667331457138,
|
|
"rewards/weighted_rejected": -0.007586670108139515,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.04710808688824915,
|
|
"grad_norm": 30.52783203125,
|
|
"learning_rate": 4.6354166666666664e-07,
|
|
"logits/chosen": -0.3142959475517273,
|
|
"logits/rejected": -0.3075408935546875,
|
|
"logps/chosen": -280.11407470703125,
|
|
"logps/rejected": -257.95233154296875,
|
|
"logps/weighted_chosen": -2.719482421875,
|
|
"logps/weighted_rejected": -2.88037109375,
|
|
"loss": 0.6687,
|
|
"rewards/accuracies": 0.5062500238418579,
|
|
"rewards/chosen": 0.5205078125,
|
|
"rewards/margins": 0.737109363079071,
|
|
"rewards/rejected": -0.21660156548023224,
|
|
"rewards/weighted_accuracies": 0.621874988079071,
|
|
"rewards/weighted_chosen": 0.06780395656824112,
|
|
"rewards/weighted_margins": 0.07340697944164276,
|
|
"rewards/weighted_rejected": -0.0056396485306322575,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.05234231876472128,
|
|
"grad_norm": 69.397705078125,
|
|
"learning_rate": 5.156249999999999e-07,
|
|
"logits/chosen": -0.28213196992874146,
|
|
"logits/rejected": -0.3543289303779602,
|
|
"logps/chosen": -290.71875,
|
|
"logps/rejected": -286.73126220703125,
|
|
"logps/weighted_chosen": -2.2228636741638184,
|
|
"logps/weighted_rejected": -2.8367552757263184,
|
|
"loss": 0.6848,
|
|
"rewards/accuracies": 0.5718749761581421,
|
|
"rewards/chosen": 0.24521484971046448,
|
|
"rewards/margins": 1.0690429210662842,
|
|
"rewards/rejected": -0.8238281011581421,
|
|
"rewards/weighted_accuracies": 0.5843750238418579,
|
|
"rewards/weighted_chosen": 0.05242309719324112,
|
|
"rewards/weighted_margins": 0.05032653734087944,
|
|
"rewards/weighted_rejected": 0.0021240233909338713,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.05757655064119341,
|
|
"grad_norm": 36.600040435791016,
|
|
"learning_rate": 5.677083333333333e-07,
|
|
"logits/chosen": -0.33063429594039917,
|
|
"logits/rejected": -0.319937139749527,
|
|
"logps/chosen": -296.82501220703125,
|
|
"logps/rejected": -262.2984313964844,
|
|
"logps/weighted_chosen": -2.8468017578125,
|
|
"logps/weighted_rejected": -2.9306397438049316,
|
|
"loss": 0.6773,
|
|
"rewards/accuracies": 0.578125,
|
|
"rewards/chosen": -0.474609375,
|
|
"rewards/margins": 1.053613305091858,
|
|
"rewards/rejected": -1.528222680091858,
|
|
"rewards/weighted_accuracies": 0.534375011920929,
|
|
"rewards/weighted_chosen": 0.013439941219985485,
|
|
"rewards/weighted_margins": 0.05541381984949112,
|
|
"rewards/weighted_rejected": -0.04198913648724556,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.06281078251766553,
|
|
"grad_norm": 57.109580993652344,
|
|
"learning_rate": 6.197916666666666e-07,
|
|
"logits/chosen": -0.33633461594581604,
|
|
"logits/rejected": -0.36155110597610474,
|
|
"logps/chosen": -295.3687438964844,
|
|
"logps/rejected": -256.1953125,
|
|
"logps/weighted_chosen": -2.161865234375,
|
|
"logps/weighted_rejected": -2.4251465797424316,
|
|
"loss": 0.6791,
|
|
"rewards/accuracies": 0.5718749761581421,
|
|
"rewards/chosen": -0.72900390625,
|
|
"rewards/margins": 1.641210913658142,
|
|
"rewards/rejected": -2.3702149391174316,
|
|
"rewards/weighted_accuracies": 0.5562499761581421,
|
|
"rewards/weighted_chosen": 0.007176590152084827,
|
|
"rewards/weighted_margins": 0.05286560207605362,
|
|
"rewards/weighted_rejected": -0.04570160061120987,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.06804501439413765,
|
|
"grad_norm": 39.176841735839844,
|
|
"learning_rate": 6.718749999999999e-07,
|
|
"logits/chosen": -0.29625242948532104,
|
|
"logits/rejected": -0.2914108335971832,
|
|
"logps/chosen": -306.6781311035156,
|
|
"logps/rejected": -280.15936279296875,
|
|
"logps/weighted_chosen": -2.188079833984375,
|
|
"logps/weighted_rejected": -2.5787596702575684,
|
|
"loss": 0.6659,
|
|
"rewards/accuracies": 0.612500011920929,
|
|
"rewards/chosen": -0.644238293170929,
|
|
"rewards/margins": 1.972265601158142,
|
|
"rewards/rejected": -2.616406202316284,
|
|
"rewards/weighted_accuracies": 0.606249988079071,
|
|
"rewards/weighted_chosen": 0.01349639892578125,
|
|
"rewards/weighted_margins": 0.0841522216796875,
|
|
"rewards/weighted_rejected": -0.07064209133386612,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.07327924627060979,
|
|
"grad_norm": 52.14993667602539,
|
|
"learning_rate": 7.239583333333333e-07,
|
|
"logits/chosen": -0.3304199278354645,
|
|
"logits/rejected": -0.3464847505092621,
|
|
"logps/chosen": -301.4390563964844,
|
|
"logps/rejected": -277.9515686035156,
|
|
"logps/weighted_chosen": -2.554003953933716,
|
|
"logps/weighted_rejected": -2.881591796875,
|
|
"loss": 0.6581,
|
|
"rewards/accuracies": 0.6343749761581421,
|
|
"rewards/chosen": -2.746875047683716,
|
|
"rewards/margins": 2.744921922683716,
|
|
"rewards/rejected": -5.491991996765137,
|
|
"rewards/weighted_accuracies": 0.6312500238418579,
|
|
"rewards/weighted_chosen": -0.02762756310403347,
|
|
"rewards/weighted_margins": 0.11510010063648224,
|
|
"rewards/weighted_rejected": -0.14276733994483948,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.07851347814708191,
|
|
"grad_norm": 22.611814498901367,
|
|
"learning_rate": 7.760416666666666e-07,
|
|
"logits/chosen": -0.2870376706123352,
|
|
"logits/rejected": -0.2975311279296875,
|
|
"logps/chosen": -287.859375,
|
|
"logps/rejected": -257.54296875,
|
|
"logps/weighted_chosen": -3.089892625808716,
|
|
"logps/weighted_rejected": -3.1946043968200684,
|
|
"loss": 0.6544,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -3.7095704078674316,
|
|
"rewards/margins": 2.942578077316284,
|
|
"rewards/rejected": -6.652148246765137,
|
|
"rewards/weighted_accuracies": 0.6312500238418579,
|
|
"rewards/weighted_chosen": -0.005145263858139515,
|
|
"rewards/weighted_margins": 0.16416625678539276,
|
|
"rewards/weighted_rejected": -0.16951599717140198,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.08374771002355404,
|
|
"grad_norm": 15.511767387390137,
|
|
"learning_rate": 8.28125e-07,
|
|
"logits/chosen": -0.3232177793979645,
|
|
"logits/rejected": -0.3726806640625,
|
|
"logps/chosen": -308.91796875,
|
|
"logps/rejected": -282.15704345703125,
|
|
"logps/weighted_chosen": -2.5903563499450684,
|
|
"logps/weighted_rejected": -2.742602586746216,
|
|
"loss": 0.6211,
|
|
"rewards/accuracies": 0.6625000238418579,
|
|
"rewards/chosen": -4.956835746765137,
|
|
"rewards/margins": 3.9961915016174316,
|
|
"rewards/rejected": -8.953222274780273,
|
|
"rewards/weighted_accuracies": 0.6499999761581421,
|
|
"rewards/weighted_chosen": -0.002410888671875,
|
|
"rewards/weighted_margins": 0.23797607421875,
|
|
"rewards/weighted_rejected": -0.24028320610523224,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.08898194190002617,
|
|
"grad_norm": 167.33956909179688,
|
|
"learning_rate": 8.802083333333333e-07,
|
|
"logits/chosen": -0.36021536588668823,
|
|
"logits/rejected": -0.3597045838832855,
|
|
"logps/chosen": -311.03045654296875,
|
|
"logps/rejected": -270.46875,
|
|
"logps/weighted_chosen": -2.8318848609924316,
|
|
"logps/weighted_rejected": -3.139453172683716,
|
|
"loss": 0.6949,
|
|
"rewards/accuracies": 0.609375,
|
|
"rewards/chosen": -8.0087890625,
|
|
"rewards/margins": 4.345898628234863,
|
|
"rewards/rejected": -12.354199409484863,
|
|
"rewards/weighted_accuracies": 0.640625,
|
|
"rewards/weighted_chosen": -0.01859130896627903,
|
|
"rewards/weighted_margins": 0.20853272080421448,
|
|
"rewards/weighted_rejected": -0.22731323540210724,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.0942161737764983,
|
|
"grad_norm": 64.57138061523438,
|
|
"learning_rate": 9.322916666666666e-07,
|
|
"logits/chosen": -0.33618468046188354,
|
|
"logits/rejected": -0.3534431457519531,
|
|
"logps/chosen": -284.2171936035156,
|
|
"logps/rejected": -272.12969970703125,
|
|
"logps/weighted_chosen": -2.694580078125,
|
|
"logps/weighted_rejected": -3.225878953933716,
|
|
"loss": 0.6814,
|
|
"rewards/accuracies": 0.6343749761581421,
|
|
"rewards/chosen": -10.43701171875,
|
|
"rewards/margins": 5.353320121765137,
|
|
"rewards/rejected": -15.7919921875,
|
|
"rewards/weighted_accuracies": 0.6187499761581421,
|
|
"rewards/weighted_chosen": -0.08297424018383026,
|
|
"rewards/weighted_margins": 0.26459962129592896,
|
|
"rewards/weighted_rejected": -0.347381591796875,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.09945040565297043,
|
|
"grad_norm": 49.0852165222168,
|
|
"learning_rate": 9.84375e-07,
|
|
"logits/chosen": -0.354086309671402,
|
|
"logits/rejected": -0.38891831040382385,
|
|
"logps/chosen": -319.17498779296875,
|
|
"logps/rejected": -283.31561279296875,
|
|
"logps/weighted_chosen": -2.5078492164611816,
|
|
"logps/weighted_rejected": -3.016357421875,
|
|
"loss": 0.6496,
|
|
"rewards/accuracies": 0.6187499761581421,
|
|
"rewards/chosen": -13.561426162719727,
|
|
"rewards/margins": 5.937890529632568,
|
|
"rewards/rejected": -19.498828887939453,
|
|
"rewards/weighted_accuracies": 0.6343749761581421,
|
|
"rewards/weighted_chosen": -0.16942748427391052,
|
|
"rewards/weighted_margins": 0.24410399794578552,
|
|
"rewards/weighted_rejected": -0.41356199979782104,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.10468463752944256,
|
|
"grad_norm": 53.46296691894531,
|
|
"learning_rate": 9.99959085414323e-07,
|
|
"logits/chosen": -0.37868577241897583,
|
|
"logits/rejected": -0.4114578366279602,
|
|
"logps/chosen": -324.7124938964844,
|
|
"logps/rejected": -279.72967529296875,
|
|
"logps/weighted_chosen": -2.8757567405700684,
|
|
"logps/weighted_rejected": -3.3623046875,
|
|
"loss": 0.639,
|
|
"rewards/accuracies": 0.609375,
|
|
"rewards/chosen": -15.428125381469727,
|
|
"rewards/margins": 6.552148342132568,
|
|
"rewards/rejected": -21.975000381469727,
|
|
"rewards/weighted_accuracies": 0.6343749761581421,
|
|
"rewards/weighted_chosen": -0.18135985732078552,
|
|
"rewards/weighted_margins": 0.29008787870407104,
|
|
"rewards/weighted_rejected": -0.471527099609375,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.10991886940591468,
|
|
"grad_norm": 24.815481185913086,
|
|
"learning_rate": 9.997587035630105e-07,
|
|
"logits/chosen": -0.3853309750556946,
|
|
"logits/rejected": -0.4257049560546875,
|
|
"logps/chosen": -302.82891845703125,
|
|
"logps/rejected": -308.671875,
|
|
"logps/weighted_chosen": -2.632519483566284,
|
|
"logps/weighted_rejected": -3.3669190406799316,
|
|
"loss": 0.6558,
|
|
"rewards/accuracies": 0.65625,
|
|
"rewards/chosen": -15.814453125,
|
|
"rewards/margins": 8.331445693969727,
|
|
"rewards/rejected": -24.146093368530273,
|
|
"rewards/weighted_accuracies": 0.659375011920929,
|
|
"rewards/weighted_chosen": -0.225901797413826,
|
|
"rewards/weighted_margins": 0.23236694931983948,
|
|
"rewards/weighted_rejected": -0.45829468965530396,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.11515310128238682,
|
|
"grad_norm": 24.175745010375977,
|
|
"learning_rate": 9.99391406364405e-07,
|
|
"logits/chosen": -0.37365952134132385,
|
|
"logits/rejected": -0.3758789002895355,
|
|
"logps/chosen": -309.34686279296875,
|
|
"logps/rejected": -293.98126220703125,
|
|
"logps/weighted_chosen": -3.002514600753784,
|
|
"logps/weighted_rejected": -3.453906297683716,
|
|
"loss": 0.6732,
|
|
"rewards/accuracies": 0.6312500238418579,
|
|
"rewards/chosen": -17.203418731689453,
|
|
"rewards/margins": 7.933203220367432,
|
|
"rewards/rejected": -25.137109756469727,
|
|
"rewards/weighted_accuracies": 0.6156250238418579,
|
|
"rewards/weighted_chosen": -0.259225457906723,
|
|
"rewards/weighted_margins": 0.29540252685546875,
|
|
"rewards/weighted_rejected": -0.5546798706054688,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.12038733315885894,
|
|
"grad_norm": 85.15988159179688,
|
|
"learning_rate": 9.988573164927884e-07,
|
|
"logits/chosen": -0.3097473084926605,
|
|
"logits/rejected": -0.3477935791015625,
|
|
"logps/chosen": -286.5078125,
|
|
"logps/rejected": -281.8453063964844,
|
|
"logps/weighted_chosen": -2.66943359375,
|
|
"logps/weighted_rejected": -3.1229491233825684,
|
|
"loss": 0.6646,
|
|
"rewards/accuracies": 0.6656249761581421,
|
|
"rewards/chosen": -17.690723419189453,
|
|
"rewards/margins": 12.424609184265137,
|
|
"rewards/rejected": -30.110157012939453,
|
|
"rewards/weighted_accuracies": 0.65625,
|
|
"rewards/weighted_chosen": -0.2716217041015625,
|
|
"rewards/weighted_margins": 0.32661741971969604,
|
|
"rewards/weighted_rejected": -0.5983597040176392,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.12562156503533106,
|
|
"grad_norm": 26.17377471923828,
|
|
"learning_rate": 9.98156612329838e-07,
|
|
"logits/chosen": -0.39516907930374146,
|
|
"logits/rejected": -0.44511109590530396,
|
|
"logps/chosen": -286.74884033203125,
|
|
"logps/rejected": -318.22735595703125,
|
|
"logps/weighted_chosen": -2.6696534156799316,
|
|
"logps/weighted_rejected": -3.4151854515075684,
|
|
"loss": 0.643,
|
|
"rewards/accuracies": 0.6312500238418579,
|
|
"rewards/chosen": -21.990428924560547,
|
|
"rewards/margins": 14.028905868530273,
|
|
"rewards/rejected": -36.013282775878906,
|
|
"rewards/weighted_accuracies": 0.6625000238418579,
|
|
"rewards/weighted_chosen": -0.2329559326171875,
|
|
"rewards/weighted_margins": 0.3950134217739105,
|
|
"rewards/weighted_rejected": -0.6281493902206421,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.13085579691180318,
|
|
"grad_norm": 56.73057174682617,
|
|
"learning_rate": 9.97289527905053e-07,
|
|
"logits/chosen": -0.40631332993507385,
|
|
"logits/rejected": -0.4203124940395355,
|
|
"logps/chosen": -290.1703186035156,
|
|
"logps/rejected": -291.6328125,
|
|
"logps/weighted_chosen": -3.051513671875,
|
|
"logps/weighted_rejected": -3.3163819313049316,
|
|
"loss": 0.677,
|
|
"rewards/accuracies": 0.606249988079071,
|
|
"rewards/chosen": -25.742870330810547,
|
|
"rewards/margins": 9.973828315734863,
|
|
"rewards/rejected": -35.72148513793945,
|
|
"rewards/weighted_accuracies": 0.6187499761581421,
|
|
"rewards/weighted_chosen": -0.2856689393520355,
|
|
"rewards/weighted_margins": 0.253326416015625,
|
|
"rewards/weighted_rejected": -0.5388733148574829,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.1360900287882753,
|
|
"grad_norm": 17.766258239746094,
|
|
"learning_rate": 9.962563528175875e-07,
|
|
"logits/chosen": -0.3611465394496918,
|
|
"logits/rejected": -0.39628905057907104,
|
|
"logps/chosen": -324.36639404296875,
|
|
"logps/rejected": -297.765625,
|
|
"logps/weighted_chosen": -2.652392625808716,
|
|
"logps/weighted_rejected": -3.535571336746216,
|
|
"loss": 0.6414,
|
|
"rewards/accuracies": 0.578125,
|
|
"rewards/chosen": -25.621288299560547,
|
|
"rewards/margins": 11.306055068969727,
|
|
"rewards/rejected": -36.93359375,
|
|
"rewards/weighted_accuracies": 0.637499988079071,
|
|
"rewards/weighted_chosen": -0.2533508241176605,
|
|
"rewards/weighted_margins": 0.2956604063510895,
|
|
"rewards/weighted_rejected": -0.5490142703056335,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.14132426066474746,
|
|
"grad_norm": 17.552453994750977,
|
|
"learning_rate": 9.950574321395277e-07,
|
|
"logits/chosen": -0.41735154390335083,
|
|
"logits/rejected": -0.441476434469223,
|
|
"logps/chosen": -314.5093688964844,
|
|
"logps/rejected": -295.7093811035156,
|
|
"logps/weighted_chosen": -2.864941358566284,
|
|
"logps/weighted_rejected": -3.25732421875,
|
|
"loss": 0.661,
|
|
"rewards/accuracies": 0.628125011920929,
|
|
"rewards/chosen": -25.1123046875,
|
|
"rewards/margins": 7.519726753234863,
|
|
"rewards/rejected": -32.62890625,
|
|
"rewards/weighted_accuracies": 0.609375,
|
|
"rewards/weighted_chosen": -0.3035888671875,
|
|
"rewards/weighted_margins": 0.2833190858364105,
|
|
"rewards/weighted_rejected": -0.5868393182754517,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.14655849254121958,
|
|
"grad_norm": 47.66518020629883,
|
|
"learning_rate": 9.936931663006413e-07,
|
|
"logits/chosen": -0.4760284423828125,
|
|
"logits/rejected": -0.46795654296875,
|
|
"logps/chosen": -323.48126220703125,
|
|
"logps/rejected": -313.2875061035156,
|
|
"logps/weighted_chosen": -2.794970750808716,
|
|
"logps/weighted_rejected": -3.3581910133361816,
|
|
"loss": 0.6169,
|
|
"rewards/accuracies": 0.690625011920929,
|
|
"rewards/chosen": -20.707616806030273,
|
|
"rewards/margins": 13.166601181030273,
|
|
"rewards/rejected": -33.86640548706055,
|
|
"rewards/weighted_accuracies": 0.7093750238418579,
|
|
"rewards/weighted_chosen": -0.10174255073070526,
|
|
"rewards/weighted_margins": 0.34544676542282104,
|
|
"rewards/weighted_rejected": -0.447021484375,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.1517927244176917,
|
|
"grad_norm": 32.503883361816406,
|
|
"learning_rate": 9.921640109546357e-07,
|
|
"logits/chosen": -0.44742050766944885,
|
|
"logits/rejected": -0.5166229009628296,
|
|
"logps/chosen": -292.1796875,
|
|
"logps/rejected": -289.6234436035156,
|
|
"logps/weighted_chosen": -2.7469239234924316,
|
|
"logps/weighted_rejected": -3.9541258811950684,
|
|
"loss": 0.6249,
|
|
"rewards/accuracies": 0.628125011920929,
|
|
"rewards/chosen": -25.293359756469727,
|
|
"rewards/margins": 12.698633193969727,
|
|
"rewards/rejected": -37.994529724121094,
|
|
"rewards/weighted_accuracies": 0.628125011920929,
|
|
"rewards/weighted_chosen": -0.15215758979320526,
|
|
"rewards/weighted_margins": 0.4393859803676605,
|
|
"rewards/weighted_rejected": -0.5915588140487671,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.15702695629416383,
|
|
"grad_norm": 17.32170867919922,
|
|
"learning_rate": 9.90470476826975e-07,
|
|
"logits/chosen": -0.5146636962890625,
|
|
"logits/rejected": -0.515917956829071,
|
|
"logps/chosen": -302.3570251464844,
|
|
"logps/rejected": -313.68438720703125,
|
|
"logps/weighted_chosen": -2.6830201148986816,
|
|
"logps/weighted_rejected": -3.202099561691284,
|
|
"loss": 0.6526,
|
|
"rewards/accuracies": 0.653124988079071,
|
|
"rewards/chosen": -32.978126525878906,
|
|
"rewards/margins": 13.435937881469727,
|
|
"rewards/rejected": -46.408203125,
|
|
"rewards/weighted_accuracies": 0.621874988079071,
|
|
"rewards/weighted_chosen": -0.23505249619483948,
|
|
"rewards/weighted_margins": 0.33623045682907104,
|
|
"rewards/weighted_rejected": -0.5710296630859375,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.16226118817063595,
|
|
"grad_norm": 25.855854034423828,
|
|
"learning_rate": 9.886131295443002e-07,
|
|
"logits/chosen": -0.6332122683525085,
|
|
"logits/rejected": -0.6879852414131165,
|
|
"logps/chosen": -315.02264404296875,
|
|
"logps/rejected": -296.54998779296875,
|
|
"logps/weighted_chosen": -2.8891844749450684,
|
|
"logps/weighted_rejected": -3.3497071266174316,
|
|
"loss": 0.6099,
|
|
"rewards/accuracies": 0.653124988079071,
|
|
"rewards/chosen": -33.83808517456055,
|
|
"rewards/margins": 12.542577743530273,
|
|
"rewards/rejected": -46.39081954956055,
|
|
"rewards/weighted_accuracies": 0.668749988079071,
|
|
"rewards/weighted_chosen": -0.20775146782398224,
|
|
"rewards/weighted_margins": 0.507794201374054,
|
|
"rewards/weighted_rejected": -0.715728759765625,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.16749542004710807,
|
|
"grad_norm": 19.11484718322754,
|
|
"learning_rate": 9.865925894455166e-07,
|
|
"logits/chosen": -0.730267345905304,
|
|
"logits/rejected": -0.746167004108429,
|
|
"logps/chosen": -338.2242126464844,
|
|
"logps/rejected": -307.18280029296875,
|
|
"logps/weighted_chosen": -2.9883790016174316,
|
|
"logps/weighted_rejected": -3.5892090797424316,
|
|
"loss": 0.6942,
|
|
"rewards/accuracies": 0.6312500238418579,
|
|
"rewards/chosen": -38.233009338378906,
|
|
"rewards/margins": 11.564062118530273,
|
|
"rewards/rejected": -49.80937576293945,
|
|
"rewards/weighted_accuracies": 0.612500011920929,
|
|
"rewards/weighted_chosen": -0.3507080078125,
|
|
"rewards/weighted_margins": 0.3366760313510895,
|
|
"rewards/weighted_rejected": -0.6871337890625,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.17272965192358022,
|
|
"grad_norm": 57.19697570800781,
|
|
"learning_rate": 9.84409531374603e-07,
|
|
"logits/chosen": -0.6843910217285156,
|
|
"logits/rejected": -0.6659576296806335,
|
|
"logps/chosen": -345.46875,
|
|
"logps/rejected": -316.2515563964844,
|
|
"logps/weighted_chosen": -3.05517578125,
|
|
"logps/weighted_rejected": -3.5519776344299316,
|
|
"loss": 0.6569,
|
|
"rewards/accuracies": 0.668749988079071,
|
|
"rewards/chosen": -40.732032775878906,
|
|
"rewards/margins": 12.651952743530273,
|
|
"rewards/rejected": -53.38984298706055,
|
|
"rewards/weighted_accuracies": 0.6468750238418579,
|
|
"rewards/weighted_chosen": -0.3262878358364105,
|
|
"rewards/weighted_margins": 0.346893310546875,
|
|
"rewards/weighted_rejected": -0.6730865240097046,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.17796388380005235,
|
|
"grad_norm": 52.49288558959961,
|
|
"learning_rate": 9.820646844552219e-07,
|
|
"logits/chosen": -0.6993133425712585,
|
|
"logits/rejected": -0.7529846429824829,
|
|
"logps/chosen": -313.59295654296875,
|
|
"logps/rejected": -322.1499938964844,
|
|
"logps/weighted_chosen": -3.0488524436950684,
|
|
"logps/weighted_rejected": -3.440136671066284,
|
|
"loss": 0.6287,
|
|
"rewards/accuracies": 0.6812499761581421,
|
|
"rewards/chosen": -37.06660079956055,
|
|
"rewards/margins": 19.494531631469727,
|
|
"rewards/rejected": -56.556640625,
|
|
"rewards/weighted_accuracies": 0.6968749761581421,
|
|
"rewards/weighted_chosen": -0.30719298124313354,
|
|
"rewards/weighted_margins": 0.448944091796875,
|
|
"rewards/weighted_rejected": -0.755999743938446,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.18319811567652447,
|
|
"grad_norm": 15.657389640808105,
|
|
"learning_rate": 9.795588318471964e-07,
|
|
"logits/chosen": -0.7813507318496704,
|
|
"logits/rejected": -0.7874206304550171,
|
|
"logps/chosen": -299.80157470703125,
|
|
"logps/rejected": -331.4375,
|
|
"logps/weighted_chosen": -2.84619140625,
|
|
"logps/weighted_rejected": -3.315380811691284,
|
|
"loss": 0.6405,
|
|
"rewards/accuracies": 0.6312500238418579,
|
|
"rewards/chosen": -40.32304763793945,
|
|
"rewards/margins": 14.830663681030273,
|
|
"rewards/rejected": -55.15625,
|
|
"rewards/weighted_accuracies": 0.6468750238418579,
|
|
"rewards/weighted_chosen": -0.315826416015625,
|
|
"rewards/weighted_margins": 0.386627197265625,
|
|
"rewards/weighted_rejected": -0.702471911907196,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.1884323475529966,
|
|
"grad_norm": 16.19976806640625,
|
|
"learning_rate": 9.768928104849415e-07,
|
|
"logits/chosen": -0.801177978515625,
|
|
"logits/rejected": -0.799664318561554,
|
|
"logps/chosen": -323.5171813964844,
|
|
"logps/rejected": -305.046875,
|
|
"logps/weighted_chosen": -3.1164307594299316,
|
|
"logps/weighted_rejected": -3.3475098609924316,
|
|
"loss": 0.6865,
|
|
"rewards/accuracies": 0.621874988079071,
|
|
"rewards/chosen": -40.823829650878906,
|
|
"rewards/margins": 15.389843940734863,
|
|
"rewards/rejected": -56.216407775878906,
|
|
"rewards/weighted_accuracies": 0.659375011920929,
|
|
"rewards/weighted_chosen": -0.31828004121780396,
|
|
"rewards/weighted_margins": 0.3831420838832855,
|
|
"rewards/weighted_rejected": -0.7014526128768921,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.19366657942946872,
|
|
"grad_norm": 89.87427520751953,
|
|
"learning_rate": 9.740675107979355e-07,
|
|
"logits/chosen": -0.7640800476074219,
|
|
"logits/rejected": -0.7867538332939148,
|
|
"logps/chosen": -361.13751220703125,
|
|
"logps/rejected": -334.97967529296875,
|
|
"logps/weighted_chosen": -2.5084471702575684,
|
|
"logps/weighted_rejected": -3.4689698219299316,
|
|
"loss": 0.6531,
|
|
"rewards/accuracies": 0.6031249761581421,
|
|
"rewards/chosen": -46.098045349121094,
|
|
"rewards/margins": 14.188085556030273,
|
|
"rewards/rejected": -60.26640701293945,
|
|
"rewards/weighted_accuracies": 0.671875,
|
|
"rewards/weighted_chosen": -0.36387938261032104,
|
|
"rewards/weighted_margins": 0.3567260801792145,
|
|
"rewards/weighted_rejected": -0.720538318157196,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.19890081130594087,
|
|
"grad_norm": 22.484216690063477,
|
|
"learning_rate": 9.71083876413323e-07,
|
|
"logits/chosen": -0.7209137082099915,
|
|
"logits/rejected": -0.7318176031112671,
|
|
"logps/chosen": -353.6031188964844,
|
|
"logps/rejected": -339.16485595703125,
|
|
"logps/weighted_chosen": -2.70361328125,
|
|
"logps/weighted_rejected": -3.5843749046325684,
|
|
"loss": 0.6589,
|
|
"rewards/accuracies": 0.6156250238418579,
|
|
"rewards/chosen": -51.237892150878906,
|
|
"rewards/margins": 18.424999237060547,
|
|
"rewards/rejected": -69.64530944824219,
|
|
"rewards/weighted_accuracies": 0.6312500238418579,
|
|
"rewards/weighted_chosen": -0.4475158751010895,
|
|
"rewards/weighted_margins": 0.3267761170864105,
|
|
"rewards/weighted_rejected": -0.7747405767440796,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.204135043182413,
|
|
"grad_norm": 21.885372161865234,
|
|
"learning_rate": 9.67942903840751e-07,
|
|
"logits/chosen": -0.7708206176757812,
|
|
"logits/rejected": -0.8207153081893921,
|
|
"logps/chosen": -355.18438720703125,
|
|
"logps/rejected": -350.47186279296875,
|
|
"logps/weighted_chosen": -2.8836669921875,
|
|
"logps/weighted_rejected": -3.5904297828674316,
|
|
"loss": 0.6028,
|
|
"rewards/accuracies": 0.675000011920929,
|
|
"rewards/chosen": -50.973045349121094,
|
|
"rewards/margins": 25.190038681030273,
|
|
"rewards/rejected": -76.1617202758789,
|
|
"rewards/weighted_accuracies": 0.7093750238418579,
|
|
"rewards/weighted_chosen": -0.39097899198532104,
|
|
"rewards/weighted_margins": 0.4941650331020355,
|
|
"rewards/weighted_rejected": -0.884967029094696,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.2093692750588851,
|
|
"grad_norm": 26.357742309570312,
|
|
"learning_rate": 9.646456421395447e-07,
|
|
"logits/chosen": -0.805267333984375,
|
|
"logits/rejected": -0.8178039789199829,
|
|
"logps/chosen": -377.52813720703125,
|
|
"logps/rejected": -392.0296936035156,
|
|
"logps/weighted_chosen": -2.7947998046875,
|
|
"logps/weighted_rejected": -3.697582960128784,
|
|
"loss": 0.6296,
|
|
"rewards/accuracies": 0.6656249761581421,
|
|
"rewards/chosen": -59.2109375,
|
|
"rewards/margins": 29.121875762939453,
|
|
"rewards/rejected": -88.32890319824219,
|
|
"rewards/weighted_accuracies": 0.653124988079071,
|
|
"rewards/weighted_chosen": -0.41761475801467896,
|
|
"rewards/weighted_margins": 0.38171082735061646,
|
|
"rewards/weighted_rejected": -0.7994705438613892,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.21460350693535724,
|
|
"grad_norm": 21.382999420166016,
|
|
"learning_rate": 9.611931925683266e-07,
|
|
"logits/chosen": -0.7703964114189148,
|
|
"logits/rejected": -0.808850109577179,
|
|
"logps/chosen": -367.3140563964844,
|
|
"logps/rejected": -348.0687561035156,
|
|
"logps/weighted_chosen": -2.711962938308716,
|
|
"logps/weighted_rejected": -3.4615721702575684,
|
|
"loss": 0.5758,
|
|
"rewards/accuracies": 0.668749988079071,
|
|
"rewards/chosen": -61.02734375,
|
|
"rewards/margins": 23.316797256469727,
|
|
"rewards/rejected": -84.34687805175781,
|
|
"rewards/weighted_accuracies": 0.6937500238418579,
|
|
"rewards/weighted_chosen": -0.40519410371780396,
|
|
"rewards/weighted_margins": 0.521441638469696,
|
|
"rewards/weighted_rejected": -0.9261535406112671,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.21983773881182936,
|
|
"grad_norm": 23.030996322631836,
|
|
"learning_rate": 9.575867082172085e-07,
|
|
"logits/chosen": -0.7789466977119446,
|
|
"logits/rejected": -0.8260132074356079,
|
|
"logps/chosen": -372.22344970703125,
|
|
"logps/rejected": -367.0171813964844,
|
|
"logps/weighted_chosen": -3.114550828933716,
|
|
"logps/weighted_rejected": -3.364208936691284,
|
|
"loss": 0.6211,
|
|
"rewards/accuracies": 0.6656249761581421,
|
|
"rewards/chosen": -67.16816711425781,
|
|
"rewards/margins": 29.731639862060547,
|
|
"rewards/rejected": -96.90156555175781,
|
|
"rewards/weighted_accuracies": 0.659375011920929,
|
|
"rewards/weighted_chosen": -0.5122925043106079,
|
|
"rewards/weighted_margins": 0.5218566656112671,
|
|
"rewards/weighted_rejected": -1.0339782238006592,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.22507197068830148,
|
|
"grad_norm": 16.442333221435547,
|
|
"learning_rate": 9.538273936226673e-07,
|
|
"logits/chosen": -0.830523669719696,
|
|
"logits/rejected": -0.8667358160018921,
|
|
"logps/chosen": -328.4546813964844,
|
|
"logps/rejected": -347.9593811035156,
|
|
"logps/weighted_chosen": -3.373584032058716,
|
|
"logps/weighted_rejected": -3.832958936691284,
|
|
"loss": 0.6425,
|
|
"rewards/accuracies": 0.621874988079071,
|
|
"rewards/chosen": -62.920310974121094,
|
|
"rewards/margins": 20.668750762939453,
|
|
"rewards/rejected": -83.5894546508789,
|
|
"rewards/weighted_accuracies": 0.612500011920929,
|
|
"rewards/weighted_chosen": -0.459890753030777,
|
|
"rewards/weighted_margins": 0.39284056425094604,
|
|
"rewards/weighted_rejected": -0.8525451421737671,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.23030620256477363,
|
|
"grad_norm": 21.955875396728516,
|
|
"learning_rate": 9.499165043652391e-07,
|
|
"logits/chosen": -0.8598114252090454,
|
|
"logits/rejected": -0.868182361125946,
|
|
"logps/chosen": -358.21563720703125,
|
|
"logps/rejected": -356.26251220703125,
|
|
"logps/weighted_chosen": -3.4171142578125,
|
|
"logps/weighted_rejected": -3.6997313499450684,
|
|
"loss": 0.624,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -66.96504211425781,
|
|
"rewards/margins": 22.563282012939453,
|
|
"rewards/rejected": -89.5503921508789,
|
|
"rewards/weighted_accuracies": 0.6625000238418579,
|
|
"rewards/weighted_chosen": -0.5841079950332642,
|
|
"rewards/weighted_margins": 0.434326171875,
|
|
"rewards/weighted_rejected": -1.018707275390625,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.23554043444124576,
|
|
"grad_norm": 75.56902313232422,
|
|
"learning_rate": 9.458553466501665e-07,
|
|
"logits/chosen": -0.9330536127090454,
|
|
"logits/rejected": -0.9642333984375,
|
|
"logps/chosen": -352.6187438964844,
|
|
"logps/rejected": -336.0218811035156,
|
|
"logps/weighted_chosen": -3.4129395484924316,
|
|
"logps/weighted_rejected": -3.74462890625,
|
|
"loss": 0.6566,
|
|
"rewards/accuracies": 0.659375011920929,
|
|
"rewards/chosen": -66.412109375,
|
|
"rewards/margins": 25.757617950439453,
|
|
"rewards/rejected": -92.181640625,
|
|
"rewards/weighted_accuracies": 0.6875,
|
|
"rewards/weighted_chosen": -0.689007580280304,
|
|
"rewards/weighted_margins": 0.4539245665073395,
|
|
"rewards/weighted_rejected": -1.143212914466858,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.24077466631771788,
|
|
"grad_norm": 19.516427993774414,
|
|
"learning_rate": 9.416452768711366e-07,
|
|
"logits/chosen": -0.945111095905304,
|
|
"logits/rejected": -0.9787231683731079,
|
|
"logps/chosen": -369.3671875,
|
|
"logps/rejected": -358.9624938964844,
|
|
"logps/weighted_chosen": -3.1959471702575684,
|
|
"logps/weighted_rejected": -3.948193311691284,
|
|
"loss": 0.6392,
|
|
"rewards/accuracies": 0.621874988079071,
|
|
"rewards/chosen": -77.17265319824219,
|
|
"rewards/margins": 21.916015625,
|
|
"rewards/rejected": -99.0796890258789,
|
|
"rewards/weighted_accuracies": 0.668749988079071,
|
|
"rewards/weighted_chosen": -0.67822265625,
|
|
"rewards/weighted_margins": 0.532788097858429,
|
|
"rewards/weighted_rejected": -1.2112305164337158,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.24600889819419,
|
|
"grad_norm": 19.182979583740234,
|
|
"learning_rate": 9.372877011572557e-07,
|
|
"logits/chosen": -0.9224609136581421,
|
|
"logits/rejected": -0.9388214349746704,
|
|
"logps/chosen": -391.6937561035156,
|
|
"logps/rejected": -377.0625,
|
|
"logps/weighted_chosen": -3.224560499191284,
|
|
"logps/weighted_rejected": -3.783252000808716,
|
|
"loss": 0.6162,
|
|
"rewards/accuracies": 0.621874988079071,
|
|
"rewards/chosen": -80.95976257324219,
|
|
"rewards/margins": 21.617578506469727,
|
|
"rewards/rejected": -102.59883117675781,
|
|
"rewards/weighted_accuracies": 0.6781250238418579,
|
|
"rewards/weighted_chosen": -0.684155285358429,
|
|
"rewards/weighted_margins": 0.5555480718612671,
|
|
"rewards/weighted_rejected": -1.23956298828125,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.2512431300706621,
|
|
"grad_norm": 31.75469970703125,
|
|
"learning_rate": 9.327840749034141e-07,
|
|
"logits/chosen": -0.969561755657196,
|
|
"logits/rejected": -0.998791515827179,
|
|
"logps/chosen": -362.1859436035156,
|
|
"logps/rejected": -385.29998779296875,
|
|
"logps/weighted_chosen": -3.0771241188049316,
|
|
"logps/weighted_rejected": -4.388257026672363,
|
|
"loss": 0.6296,
|
|
"rewards/accuracies": 0.6812499761581421,
|
|
"rewards/chosen": -75.1123046875,
|
|
"rewards/margins": 33.66425704956055,
|
|
"rewards/rejected": -108.75,
|
|
"rewards/weighted_accuracies": 0.6781250238418579,
|
|
"rewards/weighted_chosen": -0.606555163860321,
|
|
"rewards/weighted_margins": 0.612231433391571,
|
|
"rewards/weighted_rejected": -1.2182190418243408,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.2564773619471343,
|
|
"grad_norm": 37.024818420410156,
|
|
"learning_rate": 9.281359022841965e-07,
|
|
"logits/chosen": -0.846588134765625,
|
|
"logits/rejected": -0.859790027141571,
|
|
"logps/chosen": -352.46405029296875,
|
|
"logps/rejected": -355.24688720703125,
|
|
"logps/weighted_chosen": -3.219531297683716,
|
|
"logps/weighted_rejected": -4.648681640625,
|
|
"loss": 0.5897,
|
|
"rewards/accuracies": 0.640625,
|
|
"rewards/chosen": -77.3949203491211,
|
|
"rewards/margins": 32.93046951293945,
|
|
"rewards/rejected": -110.32890319824219,
|
|
"rewards/weighted_accuracies": 0.703125,
|
|
"rewards/weighted_chosen": -0.7288268804550171,
|
|
"rewards/weighted_margins": 0.6741577386856079,
|
|
"rewards/weighted_rejected": -1.403161644935608,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.26171159382360637,
|
|
"grad_norm": 17.740766525268555,
|
|
"learning_rate": 9.233447357514989e-07,
|
|
"logits/chosen": -0.8205505609512329,
|
|
"logits/rejected": -0.863543689250946,
|
|
"logps/chosen": -375.52032470703125,
|
|
"logps/rejected": -378.3500061035156,
|
|
"logps/weighted_chosen": -3.53125,
|
|
"logps/weighted_rejected": -4.106689453125,
|
|
"loss": 0.6305,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": -77.591796875,
|
|
"rewards/margins": 30.978906631469727,
|
|
"rewards/rejected": -108.54609680175781,
|
|
"rewards/weighted_accuracies": 0.668749988079071,
|
|
"rewards/weighted_chosen": -0.6407378911972046,
|
|
"rewards/weighted_margins": 0.6662231683731079,
|
|
"rewards/weighted_rejected": -1.30645751953125,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.26171159382360637,
|
|
"eval_logits/chosen": -0.9400458931922913,
|
|
"eval_logits/rejected": -0.955981433391571,
|
|
"eval_logps/chosen": -371.72900390625,
|
|
"eval_logps/rejected": -379.6419982910156,
|
|
"eval_logps/weighted_chosen": -3.214712381362915,
|
|
"eval_logps/weighted_rejected": -4.0158867835998535,
|
|
"eval_loss": 0.6316163539886475,
|
|
"eval_rewards/accuracies": 0.6349999904632568,
|
|
"eval_rewards/chosen": -82.98784637451172,
|
|
"eval_rewards/margins": 28.939437866210938,
|
|
"eval_rewards/rejected": -111.93875122070312,
|
|
"eval_rewards/weighted_accuracies": 0.6725000143051147,
|
|
"eval_rewards/weighted_chosen": -0.6669993996620178,
|
|
"eval_rewards/weighted_margins": 0.5506796836853027,
|
|
"eval_rewards/weighted_rejected": -1.2176789045333862,
|
|
"eval_runtime": 1162.5522,
|
|
"eval_samples_per_second": 1.72,
|
|
"eval_steps_per_second": 0.43,
|
|
"step": 500
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 1911,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|