4133 lines
162 KiB
JSON
4133 lines
162 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 1.0,
|
|
"eval_steps": 500,
|
|
"global_step": 1911,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0005234231876472127,
|
|
"grad_norm": 132.6717987060547,
|
|
"learning_rate": 0.0,
|
|
"logits/chosen": -0.40118408203125,
|
|
"logits/rejected": -0.41802978515625,
|
|
"logps/chosen": -297.609375,
|
|
"logps/rejected": -247.84375,
|
|
"logps/weighted_chosen": -4.7568359375,
|
|
"logps/weighted_rejected": -3.47998046875,
|
|
"loss": 0.6914,
|
|
"rewards/accuracies": 0.0,
|
|
"rewards/chosen": 0.0,
|
|
"rewards/margins": 0.0,
|
|
"rewards/rejected": 0.0,
|
|
"rewards/weighted_accuracies": 0.0,
|
|
"rewards/weighted_chosen": 0.0,
|
|
"rewards/weighted_margins": 0.0,
|
|
"rewards/weighted_rejected": 0.0,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.005234231876472127,
|
|
"grad_norm": 226.00839233398438,
|
|
"learning_rate": 4.6875e-08,
|
|
"logits/chosen": -0.3175845742225647,
|
|
"logits/rejected": -0.3532341718673706,
|
|
"logps/chosen": -275.5841979980469,
|
|
"logps/rejected": -255.84548950195312,
|
|
"logps/weighted_chosen": -2.651665687561035,
|
|
"logps/weighted_rejected": -2.88427734375,
|
|
"loss": 0.6921,
|
|
"rewards/accuracies": 0.25,
|
|
"rewards/chosen": -0.0731336772441864,
|
|
"rewards/margins": -0.0670572891831398,
|
|
"rewards/rejected": -0.006076388992369175,
|
|
"rewards/weighted_accuracies": 0.2951388955116272,
|
|
"rewards/weighted_chosen": -0.0015835232334211469,
|
|
"rewards/weighted_margins": -0.0009324815473519266,
|
|
"rewards/weighted_rejected": -0.0006510416860692203,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.010468463752944255,
|
|
"grad_norm": 14.726158142089844,
|
|
"learning_rate": 9.895833333333332e-08,
|
|
"logits/chosen": -0.29781341552734375,
|
|
"logits/rejected": -0.3136836886405945,
|
|
"logps/chosen": -294.4296875,
|
|
"logps/rejected": -272.58984375,
|
|
"logps/weighted_chosen": -2.458728075027466,
|
|
"logps/weighted_rejected": -2.455883741378784,
|
|
"loss": 0.6924,
|
|
"rewards/accuracies": 0.30000001192092896,
|
|
"rewards/chosen": -0.14013671875,
|
|
"rewards/margins": -0.13369140028953552,
|
|
"rewards/rejected": -0.0064453124068677425,
|
|
"rewards/weighted_accuracies": 0.34062498807907104,
|
|
"rewards/weighted_chosen": -0.0013603210682049394,
|
|
"rewards/weighted_margins": -0.0010925292735919356,
|
|
"rewards/weighted_rejected": -0.0002677917364053428,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.015702695629416383,
|
|
"grad_norm": 77.6125259399414,
|
|
"learning_rate": 1.5104166666666664e-07,
|
|
"logits/chosen": -0.2918853759765625,
|
|
"logits/rejected": -0.3377639651298523,
|
|
"logps/chosen": -298.05859375,
|
|
"logps/rejected": -268.0132751464844,
|
|
"logps/weighted_chosen": -2.4350829124450684,
|
|
"logps/weighted_rejected": -2.7343993186950684,
|
|
"loss": 0.6926,
|
|
"rewards/accuracies": 0.24062499403953552,
|
|
"rewards/chosen": -0.06621094048023224,
|
|
"rewards/margins": -0.1640625,
|
|
"rewards/rejected": 0.09785155951976776,
|
|
"rewards/weighted_accuracies": 0.3187499940395355,
|
|
"rewards/weighted_chosen": 0.0014068603049963713,
|
|
"rewards/weighted_margins": -0.0015777588123455644,
|
|
"rewards/weighted_rejected": 0.0029846192337572575,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.02093692750588851,
|
|
"grad_norm": 30.666196823120117,
|
|
"learning_rate": 2.03125e-07,
|
|
"logits/chosen": -0.30072021484375,
|
|
"logits/rejected": -0.3433845639228821,
|
|
"logps/chosen": -278.68829345703125,
|
|
"logps/rejected": -253.90780639648438,
|
|
"logps/weighted_chosen": -2.506396532058716,
|
|
"logps/weighted_rejected": -2.8416504859924316,
|
|
"loss": 0.6908,
|
|
"rewards/accuracies": 0.3062500059604645,
|
|
"rewards/chosen": 0.063232421875,
|
|
"rewards/margins": 0.04838867112994194,
|
|
"rewards/rejected": 0.014843749813735485,
|
|
"rewards/weighted_accuracies": 0.40312498807907104,
|
|
"rewards/weighted_chosen": 0.0042968750931322575,
|
|
"rewards/weighted_margins": 0.0019538879860192537,
|
|
"rewards/weighted_rejected": 0.0023429871071130037,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.02617115938236064,
|
|
"grad_norm": 18.60569953918457,
|
|
"learning_rate": 2.552083333333333e-07,
|
|
"logits/chosen": -0.2819870114326477,
|
|
"logits/rejected": -0.32059136033058167,
|
|
"logps/chosen": -280.31951904296875,
|
|
"logps/rejected": -267.4359436035156,
|
|
"logps/weighted_chosen": -2.4267334938049316,
|
|
"logps/weighted_rejected": -2.529711961746216,
|
|
"loss": 0.6891,
|
|
"rewards/accuracies": 0.3187499940395355,
|
|
"rewards/chosen": -0.03535156324505806,
|
|
"rewards/margins": -0.13984374701976776,
|
|
"rewards/rejected": 0.1044921875,
|
|
"rewards/weighted_accuracies": 0.3968749940395355,
|
|
"rewards/weighted_chosen": 0.0039031982887536287,
|
|
"rewards/weighted_margins": 0.005755615420639515,
|
|
"rewards/weighted_rejected": -0.0018524170154705644,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.031405391258832765,
|
|
"grad_norm": 38.21036911010742,
|
|
"learning_rate": 3.0729166666666665e-07,
|
|
"logits/chosen": -0.31453245878219604,
|
|
"logits/rejected": -0.30809077620506287,
|
|
"logps/chosen": -277.66015625,
|
|
"logps/rejected": -261.7445373535156,
|
|
"logps/weighted_chosen": -2.8622069358825684,
|
|
"logps/weighted_rejected": -2.7553467750549316,
|
|
"loss": 0.6894,
|
|
"rewards/accuracies": 0.35624998807907104,
|
|
"rewards/chosen": 0.04150390625,
|
|
"rewards/margins": 0.08027343451976776,
|
|
"rewards/rejected": -0.03876953199505806,
|
|
"rewards/weighted_accuracies": 0.4312500059604645,
|
|
"rewards/weighted_chosen": 0.0006561279296875,
|
|
"rewards/weighted_margins": 0.006243896670639515,
|
|
"rewards/weighted_rejected": -0.005587768740952015,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.036639623135304895,
|
|
"grad_norm": 69.19047546386719,
|
|
"learning_rate": 3.59375e-07,
|
|
"logits/chosen": -0.3177490234375,
|
|
"logits/rejected": -0.3246749937534332,
|
|
"logps/chosen": -289.76251220703125,
|
|
"logps/rejected": -244.92578125,
|
|
"logps/weighted_chosen": -2.3438963890075684,
|
|
"logps/weighted_rejected": -2.7010498046875,
|
|
"loss": 0.6841,
|
|
"rewards/accuracies": 0.49687498807907104,
|
|
"rewards/chosen": 0.29765623807907104,
|
|
"rewards/margins": 0.4546875059604645,
|
|
"rewards/rejected": -0.15703125298023224,
|
|
"rewards/weighted_accuracies": 0.5406249761581421,
|
|
"rewards/weighted_chosen": 0.01530532818287611,
|
|
"rewards/weighted_margins": 0.01918792724609375,
|
|
"rewards/weighted_rejected": -0.0038825988303869963,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.04187385501177702,
|
|
"grad_norm": 51.98476791381836,
|
|
"learning_rate": 4.114583333333333e-07,
|
|
"logits/chosen": -0.2850998044013977,
|
|
"logits/rejected": -0.30662041902542114,
|
|
"logps/chosen": -289.234375,
|
|
"logps/rejected": -270.375,
|
|
"logps/weighted_chosen": -2.5325684547424316,
|
|
"logps/weighted_rejected": -2.796435594558716,
|
|
"loss": 0.6747,
|
|
"rewards/accuracies": 0.5062500238418579,
|
|
"rewards/chosen": 0.512499988079071,
|
|
"rewards/margins": 0.6001952886581421,
|
|
"rewards/rejected": -0.08769531548023224,
|
|
"rewards/weighted_accuracies": 0.5562499761581421,
|
|
"rewards/weighted_chosen": 0.036380767822265625,
|
|
"rewards/weighted_margins": 0.04396667331457138,
|
|
"rewards/weighted_rejected": -0.007586670108139515,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.04710808688824915,
|
|
"grad_norm": 30.52783203125,
|
|
"learning_rate": 4.6354166666666664e-07,
|
|
"logits/chosen": -0.3142959475517273,
|
|
"logits/rejected": -0.3075408935546875,
|
|
"logps/chosen": -280.11407470703125,
|
|
"logps/rejected": -257.95233154296875,
|
|
"logps/weighted_chosen": -2.719482421875,
|
|
"logps/weighted_rejected": -2.88037109375,
|
|
"loss": 0.6687,
|
|
"rewards/accuracies": 0.5062500238418579,
|
|
"rewards/chosen": 0.5205078125,
|
|
"rewards/margins": 0.737109363079071,
|
|
"rewards/rejected": -0.21660156548023224,
|
|
"rewards/weighted_accuracies": 0.621874988079071,
|
|
"rewards/weighted_chosen": 0.06780395656824112,
|
|
"rewards/weighted_margins": 0.07340697944164276,
|
|
"rewards/weighted_rejected": -0.0056396485306322575,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.05234231876472128,
|
|
"grad_norm": 69.397705078125,
|
|
"learning_rate": 5.156249999999999e-07,
|
|
"logits/chosen": -0.28213196992874146,
|
|
"logits/rejected": -0.3543289303779602,
|
|
"logps/chosen": -290.71875,
|
|
"logps/rejected": -286.73126220703125,
|
|
"logps/weighted_chosen": -2.2228636741638184,
|
|
"logps/weighted_rejected": -2.8367552757263184,
|
|
"loss": 0.6848,
|
|
"rewards/accuracies": 0.5718749761581421,
|
|
"rewards/chosen": 0.24521484971046448,
|
|
"rewards/margins": 1.0690429210662842,
|
|
"rewards/rejected": -0.8238281011581421,
|
|
"rewards/weighted_accuracies": 0.5843750238418579,
|
|
"rewards/weighted_chosen": 0.05242309719324112,
|
|
"rewards/weighted_margins": 0.05032653734087944,
|
|
"rewards/weighted_rejected": 0.0021240233909338713,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.05757655064119341,
|
|
"grad_norm": 36.600040435791016,
|
|
"learning_rate": 5.677083333333333e-07,
|
|
"logits/chosen": -0.33063429594039917,
|
|
"logits/rejected": -0.319937139749527,
|
|
"logps/chosen": -296.82501220703125,
|
|
"logps/rejected": -262.2984313964844,
|
|
"logps/weighted_chosen": -2.8468017578125,
|
|
"logps/weighted_rejected": -2.9306397438049316,
|
|
"loss": 0.6773,
|
|
"rewards/accuracies": 0.578125,
|
|
"rewards/chosen": -0.474609375,
|
|
"rewards/margins": 1.053613305091858,
|
|
"rewards/rejected": -1.528222680091858,
|
|
"rewards/weighted_accuracies": 0.534375011920929,
|
|
"rewards/weighted_chosen": 0.013439941219985485,
|
|
"rewards/weighted_margins": 0.05541381984949112,
|
|
"rewards/weighted_rejected": -0.04198913648724556,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.06281078251766553,
|
|
"grad_norm": 57.109580993652344,
|
|
"learning_rate": 6.197916666666666e-07,
|
|
"logits/chosen": -0.33633461594581604,
|
|
"logits/rejected": -0.36155110597610474,
|
|
"logps/chosen": -295.3687438964844,
|
|
"logps/rejected": -256.1953125,
|
|
"logps/weighted_chosen": -2.161865234375,
|
|
"logps/weighted_rejected": -2.4251465797424316,
|
|
"loss": 0.6791,
|
|
"rewards/accuracies": 0.5718749761581421,
|
|
"rewards/chosen": -0.72900390625,
|
|
"rewards/margins": 1.641210913658142,
|
|
"rewards/rejected": -2.3702149391174316,
|
|
"rewards/weighted_accuracies": 0.5562499761581421,
|
|
"rewards/weighted_chosen": 0.007176590152084827,
|
|
"rewards/weighted_margins": 0.05286560207605362,
|
|
"rewards/weighted_rejected": -0.04570160061120987,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.06804501439413765,
|
|
"grad_norm": 39.176841735839844,
|
|
"learning_rate": 6.718749999999999e-07,
|
|
"logits/chosen": -0.29625242948532104,
|
|
"logits/rejected": -0.2914108335971832,
|
|
"logps/chosen": -306.6781311035156,
|
|
"logps/rejected": -280.15936279296875,
|
|
"logps/weighted_chosen": -2.188079833984375,
|
|
"logps/weighted_rejected": -2.5787596702575684,
|
|
"loss": 0.6659,
|
|
"rewards/accuracies": 0.612500011920929,
|
|
"rewards/chosen": -0.644238293170929,
|
|
"rewards/margins": 1.972265601158142,
|
|
"rewards/rejected": -2.616406202316284,
|
|
"rewards/weighted_accuracies": 0.606249988079071,
|
|
"rewards/weighted_chosen": 0.01349639892578125,
|
|
"rewards/weighted_margins": 0.0841522216796875,
|
|
"rewards/weighted_rejected": -0.07064209133386612,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.07327924627060979,
|
|
"grad_norm": 52.14993667602539,
|
|
"learning_rate": 7.239583333333333e-07,
|
|
"logits/chosen": -0.3304199278354645,
|
|
"logits/rejected": -0.3464847505092621,
|
|
"logps/chosen": -301.4390563964844,
|
|
"logps/rejected": -277.9515686035156,
|
|
"logps/weighted_chosen": -2.554003953933716,
|
|
"logps/weighted_rejected": -2.881591796875,
|
|
"loss": 0.6581,
|
|
"rewards/accuracies": 0.6343749761581421,
|
|
"rewards/chosen": -2.746875047683716,
|
|
"rewards/margins": 2.744921922683716,
|
|
"rewards/rejected": -5.491991996765137,
|
|
"rewards/weighted_accuracies": 0.6312500238418579,
|
|
"rewards/weighted_chosen": -0.02762756310403347,
|
|
"rewards/weighted_margins": 0.11510010063648224,
|
|
"rewards/weighted_rejected": -0.14276733994483948,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.07851347814708191,
|
|
"grad_norm": 22.611814498901367,
|
|
"learning_rate": 7.760416666666666e-07,
|
|
"logits/chosen": -0.2870376706123352,
|
|
"logits/rejected": -0.2975311279296875,
|
|
"logps/chosen": -287.859375,
|
|
"logps/rejected": -257.54296875,
|
|
"logps/weighted_chosen": -3.089892625808716,
|
|
"logps/weighted_rejected": -3.1946043968200684,
|
|
"loss": 0.6544,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -3.7095704078674316,
|
|
"rewards/margins": 2.942578077316284,
|
|
"rewards/rejected": -6.652148246765137,
|
|
"rewards/weighted_accuracies": 0.6312500238418579,
|
|
"rewards/weighted_chosen": -0.005145263858139515,
|
|
"rewards/weighted_margins": 0.16416625678539276,
|
|
"rewards/weighted_rejected": -0.16951599717140198,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.08374771002355404,
|
|
"grad_norm": 15.511767387390137,
|
|
"learning_rate": 8.28125e-07,
|
|
"logits/chosen": -0.3232177793979645,
|
|
"logits/rejected": -0.3726806640625,
|
|
"logps/chosen": -308.91796875,
|
|
"logps/rejected": -282.15704345703125,
|
|
"logps/weighted_chosen": -2.5903563499450684,
|
|
"logps/weighted_rejected": -2.742602586746216,
|
|
"loss": 0.6211,
|
|
"rewards/accuracies": 0.6625000238418579,
|
|
"rewards/chosen": -4.956835746765137,
|
|
"rewards/margins": 3.9961915016174316,
|
|
"rewards/rejected": -8.953222274780273,
|
|
"rewards/weighted_accuracies": 0.6499999761581421,
|
|
"rewards/weighted_chosen": -0.002410888671875,
|
|
"rewards/weighted_margins": 0.23797607421875,
|
|
"rewards/weighted_rejected": -0.24028320610523224,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.08898194190002617,
|
|
"grad_norm": 167.33956909179688,
|
|
"learning_rate": 8.802083333333333e-07,
|
|
"logits/chosen": -0.36021536588668823,
|
|
"logits/rejected": -0.3597045838832855,
|
|
"logps/chosen": -311.03045654296875,
|
|
"logps/rejected": -270.46875,
|
|
"logps/weighted_chosen": -2.8318848609924316,
|
|
"logps/weighted_rejected": -3.139453172683716,
|
|
"loss": 0.6949,
|
|
"rewards/accuracies": 0.609375,
|
|
"rewards/chosen": -8.0087890625,
|
|
"rewards/margins": 4.345898628234863,
|
|
"rewards/rejected": -12.354199409484863,
|
|
"rewards/weighted_accuracies": 0.640625,
|
|
"rewards/weighted_chosen": -0.01859130896627903,
|
|
"rewards/weighted_margins": 0.20853272080421448,
|
|
"rewards/weighted_rejected": -0.22731323540210724,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.0942161737764983,
|
|
"grad_norm": 64.57138061523438,
|
|
"learning_rate": 9.322916666666666e-07,
|
|
"logits/chosen": -0.33618468046188354,
|
|
"logits/rejected": -0.3534431457519531,
|
|
"logps/chosen": -284.2171936035156,
|
|
"logps/rejected": -272.12969970703125,
|
|
"logps/weighted_chosen": -2.694580078125,
|
|
"logps/weighted_rejected": -3.225878953933716,
|
|
"loss": 0.6814,
|
|
"rewards/accuracies": 0.6343749761581421,
|
|
"rewards/chosen": -10.43701171875,
|
|
"rewards/margins": 5.353320121765137,
|
|
"rewards/rejected": -15.7919921875,
|
|
"rewards/weighted_accuracies": 0.6187499761581421,
|
|
"rewards/weighted_chosen": -0.08297424018383026,
|
|
"rewards/weighted_margins": 0.26459962129592896,
|
|
"rewards/weighted_rejected": -0.347381591796875,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.09945040565297043,
|
|
"grad_norm": 49.0852165222168,
|
|
"learning_rate": 9.84375e-07,
|
|
"logits/chosen": -0.354086309671402,
|
|
"logits/rejected": -0.38891831040382385,
|
|
"logps/chosen": -319.17498779296875,
|
|
"logps/rejected": -283.31561279296875,
|
|
"logps/weighted_chosen": -2.5078492164611816,
|
|
"logps/weighted_rejected": -3.016357421875,
|
|
"loss": 0.6496,
|
|
"rewards/accuracies": 0.6187499761581421,
|
|
"rewards/chosen": -13.561426162719727,
|
|
"rewards/margins": 5.937890529632568,
|
|
"rewards/rejected": -19.498828887939453,
|
|
"rewards/weighted_accuracies": 0.6343749761581421,
|
|
"rewards/weighted_chosen": -0.16942748427391052,
|
|
"rewards/weighted_margins": 0.24410399794578552,
|
|
"rewards/weighted_rejected": -0.41356199979782104,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.10468463752944256,
|
|
"grad_norm": 53.46296691894531,
|
|
"learning_rate": 9.99959085414323e-07,
|
|
"logits/chosen": -0.37868577241897583,
|
|
"logits/rejected": -0.4114578366279602,
|
|
"logps/chosen": -324.7124938964844,
|
|
"logps/rejected": -279.72967529296875,
|
|
"logps/weighted_chosen": -2.8757567405700684,
|
|
"logps/weighted_rejected": -3.3623046875,
|
|
"loss": 0.639,
|
|
"rewards/accuracies": 0.609375,
|
|
"rewards/chosen": -15.428125381469727,
|
|
"rewards/margins": 6.552148342132568,
|
|
"rewards/rejected": -21.975000381469727,
|
|
"rewards/weighted_accuracies": 0.6343749761581421,
|
|
"rewards/weighted_chosen": -0.18135985732078552,
|
|
"rewards/weighted_margins": 0.29008787870407104,
|
|
"rewards/weighted_rejected": -0.471527099609375,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.10991886940591468,
|
|
"grad_norm": 24.815481185913086,
|
|
"learning_rate": 9.997587035630105e-07,
|
|
"logits/chosen": -0.3853309750556946,
|
|
"logits/rejected": -0.4257049560546875,
|
|
"logps/chosen": -302.82891845703125,
|
|
"logps/rejected": -308.671875,
|
|
"logps/weighted_chosen": -2.632519483566284,
|
|
"logps/weighted_rejected": -3.3669190406799316,
|
|
"loss": 0.6558,
|
|
"rewards/accuracies": 0.65625,
|
|
"rewards/chosen": -15.814453125,
|
|
"rewards/margins": 8.331445693969727,
|
|
"rewards/rejected": -24.146093368530273,
|
|
"rewards/weighted_accuracies": 0.659375011920929,
|
|
"rewards/weighted_chosen": -0.225901797413826,
|
|
"rewards/weighted_margins": 0.23236694931983948,
|
|
"rewards/weighted_rejected": -0.45829468965530396,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.11515310128238682,
|
|
"grad_norm": 24.175745010375977,
|
|
"learning_rate": 9.99391406364405e-07,
|
|
"logits/chosen": -0.37365952134132385,
|
|
"logits/rejected": -0.3758789002895355,
|
|
"logps/chosen": -309.34686279296875,
|
|
"logps/rejected": -293.98126220703125,
|
|
"logps/weighted_chosen": -3.002514600753784,
|
|
"logps/weighted_rejected": -3.453906297683716,
|
|
"loss": 0.6732,
|
|
"rewards/accuracies": 0.6312500238418579,
|
|
"rewards/chosen": -17.203418731689453,
|
|
"rewards/margins": 7.933203220367432,
|
|
"rewards/rejected": -25.137109756469727,
|
|
"rewards/weighted_accuracies": 0.6156250238418579,
|
|
"rewards/weighted_chosen": -0.259225457906723,
|
|
"rewards/weighted_margins": 0.29540252685546875,
|
|
"rewards/weighted_rejected": -0.5546798706054688,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.12038733315885894,
|
|
"grad_norm": 85.15988159179688,
|
|
"learning_rate": 9.988573164927884e-07,
|
|
"logits/chosen": -0.3097473084926605,
|
|
"logits/rejected": -0.3477935791015625,
|
|
"logps/chosen": -286.5078125,
|
|
"logps/rejected": -281.8453063964844,
|
|
"logps/weighted_chosen": -2.66943359375,
|
|
"logps/weighted_rejected": -3.1229491233825684,
|
|
"loss": 0.6646,
|
|
"rewards/accuracies": 0.6656249761581421,
|
|
"rewards/chosen": -17.690723419189453,
|
|
"rewards/margins": 12.424609184265137,
|
|
"rewards/rejected": -30.110157012939453,
|
|
"rewards/weighted_accuracies": 0.65625,
|
|
"rewards/weighted_chosen": -0.2716217041015625,
|
|
"rewards/weighted_margins": 0.32661741971969604,
|
|
"rewards/weighted_rejected": -0.5983597040176392,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.12562156503533106,
|
|
"grad_norm": 26.17377471923828,
|
|
"learning_rate": 9.98156612329838e-07,
|
|
"logits/chosen": -0.39516907930374146,
|
|
"logits/rejected": -0.44511109590530396,
|
|
"logps/chosen": -286.74884033203125,
|
|
"logps/rejected": -318.22735595703125,
|
|
"logps/weighted_chosen": -2.6696534156799316,
|
|
"logps/weighted_rejected": -3.4151854515075684,
|
|
"loss": 0.643,
|
|
"rewards/accuracies": 0.6312500238418579,
|
|
"rewards/chosen": -21.990428924560547,
|
|
"rewards/margins": 14.028905868530273,
|
|
"rewards/rejected": -36.013282775878906,
|
|
"rewards/weighted_accuracies": 0.6625000238418579,
|
|
"rewards/weighted_chosen": -0.2329559326171875,
|
|
"rewards/weighted_margins": 0.3950134217739105,
|
|
"rewards/weighted_rejected": -0.6281493902206421,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.13085579691180318,
|
|
"grad_norm": 56.73057174682617,
|
|
"learning_rate": 9.97289527905053e-07,
|
|
"logits/chosen": -0.40631332993507385,
|
|
"logits/rejected": -0.4203124940395355,
|
|
"logps/chosen": -290.1703186035156,
|
|
"logps/rejected": -291.6328125,
|
|
"logps/weighted_chosen": -3.051513671875,
|
|
"logps/weighted_rejected": -3.3163819313049316,
|
|
"loss": 0.677,
|
|
"rewards/accuracies": 0.606249988079071,
|
|
"rewards/chosen": -25.742870330810547,
|
|
"rewards/margins": 9.973828315734863,
|
|
"rewards/rejected": -35.72148513793945,
|
|
"rewards/weighted_accuracies": 0.6187499761581421,
|
|
"rewards/weighted_chosen": -0.2856689393520355,
|
|
"rewards/weighted_margins": 0.253326416015625,
|
|
"rewards/weighted_rejected": -0.5388733148574829,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.1360900287882753,
|
|
"grad_norm": 17.766258239746094,
|
|
"learning_rate": 9.962563528175875e-07,
|
|
"logits/chosen": -0.3611465394496918,
|
|
"logits/rejected": -0.39628905057907104,
|
|
"logps/chosen": -324.36639404296875,
|
|
"logps/rejected": -297.765625,
|
|
"logps/weighted_chosen": -2.652392625808716,
|
|
"logps/weighted_rejected": -3.535571336746216,
|
|
"loss": 0.6414,
|
|
"rewards/accuracies": 0.578125,
|
|
"rewards/chosen": -25.621288299560547,
|
|
"rewards/margins": 11.306055068969727,
|
|
"rewards/rejected": -36.93359375,
|
|
"rewards/weighted_accuracies": 0.637499988079071,
|
|
"rewards/weighted_chosen": -0.2533508241176605,
|
|
"rewards/weighted_margins": 0.2956604063510895,
|
|
"rewards/weighted_rejected": -0.5490142703056335,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.14132426066474746,
|
|
"grad_norm": 17.552453994750977,
|
|
"learning_rate": 9.950574321395277e-07,
|
|
"logits/chosen": -0.41735154390335083,
|
|
"logits/rejected": -0.441476434469223,
|
|
"logps/chosen": -314.5093688964844,
|
|
"logps/rejected": -295.7093811035156,
|
|
"logps/weighted_chosen": -2.864941358566284,
|
|
"logps/weighted_rejected": -3.25732421875,
|
|
"loss": 0.661,
|
|
"rewards/accuracies": 0.628125011920929,
|
|
"rewards/chosen": -25.1123046875,
|
|
"rewards/margins": 7.519726753234863,
|
|
"rewards/rejected": -32.62890625,
|
|
"rewards/weighted_accuracies": 0.609375,
|
|
"rewards/weighted_chosen": -0.3035888671875,
|
|
"rewards/weighted_margins": 0.2833190858364105,
|
|
"rewards/weighted_rejected": -0.5868393182754517,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.14655849254121958,
|
|
"grad_norm": 47.66518020629883,
|
|
"learning_rate": 9.936931663006413e-07,
|
|
"logits/chosen": -0.4760284423828125,
|
|
"logits/rejected": -0.46795654296875,
|
|
"logps/chosen": -323.48126220703125,
|
|
"logps/rejected": -313.2875061035156,
|
|
"logps/weighted_chosen": -2.794970750808716,
|
|
"logps/weighted_rejected": -3.3581910133361816,
|
|
"loss": 0.6169,
|
|
"rewards/accuracies": 0.690625011920929,
|
|
"rewards/chosen": -20.707616806030273,
|
|
"rewards/margins": 13.166601181030273,
|
|
"rewards/rejected": -33.86640548706055,
|
|
"rewards/weighted_accuracies": 0.7093750238418579,
|
|
"rewards/weighted_chosen": -0.10174255073070526,
|
|
"rewards/weighted_margins": 0.34544676542282104,
|
|
"rewards/weighted_rejected": -0.447021484375,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.1517927244176917,
|
|
"grad_norm": 32.503883361816406,
|
|
"learning_rate": 9.921640109546357e-07,
|
|
"logits/chosen": -0.44742050766944885,
|
|
"logits/rejected": -0.5166229009628296,
|
|
"logps/chosen": -292.1796875,
|
|
"logps/rejected": -289.6234436035156,
|
|
"logps/weighted_chosen": -2.7469239234924316,
|
|
"logps/weighted_rejected": -3.9541258811950684,
|
|
"loss": 0.6249,
|
|
"rewards/accuracies": 0.628125011920929,
|
|
"rewards/chosen": -25.293359756469727,
|
|
"rewards/margins": 12.698633193969727,
|
|
"rewards/rejected": -37.994529724121094,
|
|
"rewards/weighted_accuracies": 0.628125011920929,
|
|
"rewards/weighted_chosen": -0.15215758979320526,
|
|
"rewards/weighted_margins": 0.4393859803676605,
|
|
"rewards/weighted_rejected": -0.5915588140487671,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.15702695629416383,
|
|
"grad_norm": 17.32170867919922,
|
|
"learning_rate": 9.90470476826975e-07,
|
|
"logits/chosen": -0.5146636962890625,
|
|
"logits/rejected": -0.515917956829071,
|
|
"logps/chosen": -302.3570251464844,
|
|
"logps/rejected": -313.68438720703125,
|
|
"logps/weighted_chosen": -2.6830201148986816,
|
|
"logps/weighted_rejected": -3.202099561691284,
|
|
"loss": 0.6526,
|
|
"rewards/accuracies": 0.653124988079071,
|
|
"rewards/chosen": -32.978126525878906,
|
|
"rewards/margins": 13.435937881469727,
|
|
"rewards/rejected": -46.408203125,
|
|
"rewards/weighted_accuracies": 0.621874988079071,
|
|
"rewards/weighted_chosen": -0.23505249619483948,
|
|
"rewards/weighted_margins": 0.33623045682907104,
|
|
"rewards/weighted_rejected": -0.5710296630859375,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.16226118817063595,
|
|
"grad_norm": 25.855854034423828,
|
|
"learning_rate": 9.886131295443002e-07,
|
|
"logits/chosen": -0.6332122683525085,
|
|
"logits/rejected": -0.6879852414131165,
|
|
"logps/chosen": -315.02264404296875,
|
|
"logps/rejected": -296.54998779296875,
|
|
"logps/weighted_chosen": -2.8891844749450684,
|
|
"logps/weighted_rejected": -3.3497071266174316,
|
|
"loss": 0.6099,
|
|
"rewards/accuracies": 0.653124988079071,
|
|
"rewards/chosen": -33.83808517456055,
|
|
"rewards/margins": 12.542577743530273,
|
|
"rewards/rejected": -46.39081954956055,
|
|
"rewards/weighted_accuracies": 0.668749988079071,
|
|
"rewards/weighted_chosen": -0.20775146782398224,
|
|
"rewards/weighted_margins": 0.507794201374054,
|
|
"rewards/weighted_rejected": -0.715728759765625,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.16749542004710807,
|
|
"grad_norm": 19.11484718322754,
|
|
"learning_rate": 9.865925894455166e-07,
|
|
"logits/chosen": -0.730267345905304,
|
|
"logits/rejected": -0.746167004108429,
|
|
"logps/chosen": -338.2242126464844,
|
|
"logps/rejected": -307.18280029296875,
|
|
"logps/weighted_chosen": -2.9883790016174316,
|
|
"logps/weighted_rejected": -3.5892090797424316,
|
|
"loss": 0.6942,
|
|
"rewards/accuracies": 0.6312500238418579,
|
|
"rewards/chosen": -38.233009338378906,
|
|
"rewards/margins": 11.564062118530273,
|
|
"rewards/rejected": -49.80937576293945,
|
|
"rewards/weighted_accuracies": 0.612500011920929,
|
|
"rewards/weighted_chosen": -0.3507080078125,
|
|
"rewards/weighted_margins": 0.3366760313510895,
|
|
"rewards/weighted_rejected": -0.6871337890625,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.17272965192358022,
|
|
"grad_norm": 57.19697570800781,
|
|
"learning_rate": 9.84409531374603e-07,
|
|
"logits/chosen": -0.6843910217285156,
|
|
"logits/rejected": -0.6659576296806335,
|
|
"logps/chosen": -345.46875,
|
|
"logps/rejected": -316.2515563964844,
|
|
"logps/weighted_chosen": -3.05517578125,
|
|
"logps/weighted_rejected": -3.5519776344299316,
|
|
"loss": 0.6569,
|
|
"rewards/accuracies": 0.668749988079071,
|
|
"rewards/chosen": -40.732032775878906,
|
|
"rewards/margins": 12.651952743530273,
|
|
"rewards/rejected": -53.38984298706055,
|
|
"rewards/weighted_accuracies": 0.6468750238418579,
|
|
"rewards/weighted_chosen": -0.3262878358364105,
|
|
"rewards/weighted_margins": 0.346893310546875,
|
|
"rewards/weighted_rejected": -0.6730865240097046,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.17796388380005235,
|
|
"grad_norm": 52.49288558959961,
|
|
"learning_rate": 9.820646844552219e-07,
|
|
"logits/chosen": -0.6993133425712585,
|
|
"logits/rejected": -0.7529846429824829,
|
|
"logps/chosen": -313.59295654296875,
|
|
"logps/rejected": -322.1499938964844,
|
|
"logps/weighted_chosen": -3.0488524436950684,
|
|
"logps/weighted_rejected": -3.440136671066284,
|
|
"loss": 0.6287,
|
|
"rewards/accuracies": 0.6812499761581421,
|
|
"rewards/chosen": -37.06660079956055,
|
|
"rewards/margins": 19.494531631469727,
|
|
"rewards/rejected": -56.556640625,
|
|
"rewards/weighted_accuracies": 0.6968749761581421,
|
|
"rewards/weighted_chosen": -0.30719298124313354,
|
|
"rewards/weighted_margins": 0.448944091796875,
|
|
"rewards/weighted_rejected": -0.755999743938446,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.18319811567652447,
|
|
"grad_norm": 15.657389640808105,
|
|
"learning_rate": 9.795588318471964e-07,
|
|
"logits/chosen": -0.7813507318496704,
|
|
"logits/rejected": -0.7874206304550171,
|
|
"logps/chosen": -299.80157470703125,
|
|
"logps/rejected": -331.4375,
|
|
"logps/weighted_chosen": -2.84619140625,
|
|
"logps/weighted_rejected": -3.315380811691284,
|
|
"loss": 0.6405,
|
|
"rewards/accuracies": 0.6312500238418579,
|
|
"rewards/chosen": -40.32304763793945,
|
|
"rewards/margins": 14.830663681030273,
|
|
"rewards/rejected": -55.15625,
|
|
"rewards/weighted_accuracies": 0.6468750238418579,
|
|
"rewards/weighted_chosen": -0.315826416015625,
|
|
"rewards/weighted_margins": 0.386627197265625,
|
|
"rewards/weighted_rejected": -0.702471911907196,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.1884323475529966,
|
|
"grad_norm": 16.19976806640625,
|
|
"learning_rate": 9.768928104849415e-07,
|
|
"logits/chosen": -0.801177978515625,
|
|
"logits/rejected": -0.799664318561554,
|
|
"logps/chosen": -323.5171813964844,
|
|
"logps/rejected": -305.046875,
|
|
"logps/weighted_chosen": -3.1164307594299316,
|
|
"logps/weighted_rejected": -3.3475098609924316,
|
|
"loss": 0.6865,
|
|
"rewards/accuracies": 0.621874988079071,
|
|
"rewards/chosen": -40.823829650878906,
|
|
"rewards/margins": 15.389843940734863,
|
|
"rewards/rejected": -56.216407775878906,
|
|
"rewards/weighted_accuracies": 0.659375011920929,
|
|
"rewards/weighted_chosen": -0.31828004121780396,
|
|
"rewards/weighted_margins": 0.3831420838832855,
|
|
"rewards/weighted_rejected": -0.7014526128768921,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.19366657942946872,
|
|
"grad_norm": 89.87427520751953,
|
|
"learning_rate": 9.740675107979355e-07,
|
|
"logits/chosen": -0.7640800476074219,
|
|
"logits/rejected": -0.7867538332939148,
|
|
"logps/chosen": -361.13751220703125,
|
|
"logps/rejected": -334.97967529296875,
|
|
"logps/weighted_chosen": -2.5084471702575684,
|
|
"logps/weighted_rejected": -3.4689698219299316,
|
|
"loss": 0.6531,
|
|
"rewards/accuracies": 0.6031249761581421,
|
|
"rewards/chosen": -46.098045349121094,
|
|
"rewards/margins": 14.188085556030273,
|
|
"rewards/rejected": -60.26640701293945,
|
|
"rewards/weighted_accuracies": 0.671875,
|
|
"rewards/weighted_chosen": -0.36387938261032104,
|
|
"rewards/weighted_margins": 0.3567260801792145,
|
|
"rewards/weighted_rejected": -0.720538318157196,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.19890081130594087,
|
|
"grad_norm": 22.484216690063477,
|
|
"learning_rate": 9.71083876413323e-07,
|
|
"logits/chosen": -0.7209137082099915,
|
|
"logits/rejected": -0.7318176031112671,
|
|
"logps/chosen": -353.6031188964844,
|
|
"logps/rejected": -339.16485595703125,
|
|
"logps/weighted_chosen": -2.70361328125,
|
|
"logps/weighted_rejected": -3.5843749046325684,
|
|
"loss": 0.6589,
|
|
"rewards/accuracies": 0.6156250238418579,
|
|
"rewards/chosen": -51.237892150878906,
|
|
"rewards/margins": 18.424999237060547,
|
|
"rewards/rejected": -69.64530944824219,
|
|
"rewards/weighted_accuracies": 0.6312500238418579,
|
|
"rewards/weighted_chosen": -0.4475158751010895,
|
|
"rewards/weighted_margins": 0.3267761170864105,
|
|
"rewards/weighted_rejected": -0.7747405767440796,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.204135043182413,
|
|
"grad_norm": 21.885372161865234,
|
|
"learning_rate": 9.67942903840751e-07,
|
|
"logits/chosen": -0.7708206176757812,
|
|
"logits/rejected": -0.8207153081893921,
|
|
"logps/chosen": -355.18438720703125,
|
|
"logps/rejected": -350.47186279296875,
|
|
"logps/weighted_chosen": -2.8836669921875,
|
|
"logps/weighted_rejected": -3.5904297828674316,
|
|
"loss": 0.6028,
|
|
"rewards/accuracies": 0.675000011920929,
|
|
"rewards/chosen": -50.973045349121094,
|
|
"rewards/margins": 25.190038681030273,
|
|
"rewards/rejected": -76.1617202758789,
|
|
"rewards/weighted_accuracies": 0.7093750238418579,
|
|
"rewards/weighted_chosen": -0.39097899198532104,
|
|
"rewards/weighted_margins": 0.4941650331020355,
|
|
"rewards/weighted_rejected": -0.884967029094696,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.2093692750588851,
|
|
"grad_norm": 26.357742309570312,
|
|
"learning_rate": 9.646456421395447e-07,
|
|
"logits/chosen": -0.805267333984375,
|
|
"logits/rejected": -0.8178039789199829,
|
|
"logps/chosen": -377.52813720703125,
|
|
"logps/rejected": -392.0296936035156,
|
|
"logps/weighted_chosen": -2.7947998046875,
|
|
"logps/weighted_rejected": -3.697582960128784,
|
|
"loss": 0.6296,
|
|
"rewards/accuracies": 0.6656249761581421,
|
|
"rewards/chosen": -59.2109375,
|
|
"rewards/margins": 29.121875762939453,
|
|
"rewards/rejected": -88.32890319824219,
|
|
"rewards/weighted_accuracies": 0.653124988079071,
|
|
"rewards/weighted_chosen": -0.41761475801467896,
|
|
"rewards/weighted_margins": 0.38171082735061646,
|
|
"rewards/weighted_rejected": -0.7994705438613892,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.21460350693535724,
|
|
"grad_norm": 21.382999420166016,
|
|
"learning_rate": 9.611931925683266e-07,
|
|
"logits/chosen": -0.7703964114189148,
|
|
"logits/rejected": -0.808850109577179,
|
|
"logps/chosen": -367.3140563964844,
|
|
"logps/rejected": -348.0687561035156,
|
|
"logps/weighted_chosen": -2.711962938308716,
|
|
"logps/weighted_rejected": -3.4615721702575684,
|
|
"loss": 0.5758,
|
|
"rewards/accuracies": 0.668749988079071,
|
|
"rewards/chosen": -61.02734375,
|
|
"rewards/margins": 23.316797256469727,
|
|
"rewards/rejected": -84.34687805175781,
|
|
"rewards/weighted_accuracies": 0.6937500238418579,
|
|
"rewards/weighted_chosen": -0.40519410371780396,
|
|
"rewards/weighted_margins": 0.521441638469696,
|
|
"rewards/weighted_rejected": -0.9261535406112671,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.21983773881182936,
|
|
"grad_norm": 23.030996322631836,
|
|
"learning_rate": 9.575867082172085e-07,
|
|
"logits/chosen": -0.7789466977119446,
|
|
"logits/rejected": -0.8260132074356079,
|
|
"logps/chosen": -372.22344970703125,
|
|
"logps/rejected": -367.0171813964844,
|
|
"logps/weighted_chosen": -3.114550828933716,
|
|
"logps/weighted_rejected": -3.364208936691284,
|
|
"loss": 0.6211,
|
|
"rewards/accuracies": 0.6656249761581421,
|
|
"rewards/chosen": -67.16816711425781,
|
|
"rewards/margins": 29.731639862060547,
|
|
"rewards/rejected": -96.90156555175781,
|
|
"rewards/weighted_accuracies": 0.659375011920929,
|
|
"rewards/weighted_chosen": -0.5122925043106079,
|
|
"rewards/weighted_margins": 0.5218566656112671,
|
|
"rewards/weighted_rejected": -1.0339782238006592,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.22507197068830148,
|
|
"grad_norm": 16.442333221435547,
|
|
"learning_rate": 9.538273936226673e-07,
|
|
"logits/chosen": -0.830523669719696,
|
|
"logits/rejected": -0.8667358160018921,
|
|
"logps/chosen": -328.4546813964844,
|
|
"logps/rejected": -347.9593811035156,
|
|
"logps/weighted_chosen": -3.373584032058716,
|
|
"logps/weighted_rejected": -3.832958936691284,
|
|
"loss": 0.6425,
|
|
"rewards/accuracies": 0.621874988079071,
|
|
"rewards/chosen": -62.920310974121094,
|
|
"rewards/margins": 20.668750762939453,
|
|
"rewards/rejected": -83.5894546508789,
|
|
"rewards/weighted_accuracies": 0.612500011920929,
|
|
"rewards/weighted_chosen": -0.459890753030777,
|
|
"rewards/weighted_margins": 0.39284056425094604,
|
|
"rewards/weighted_rejected": -0.8525451421737671,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.23030620256477363,
|
|
"grad_norm": 21.955875396728516,
|
|
"learning_rate": 9.499165043652391e-07,
|
|
"logits/chosen": -0.8598114252090454,
|
|
"logits/rejected": -0.868182361125946,
|
|
"logps/chosen": -358.21563720703125,
|
|
"logps/rejected": -356.26251220703125,
|
|
"logps/weighted_chosen": -3.4171142578125,
|
|
"logps/weighted_rejected": -3.6997313499450684,
|
|
"loss": 0.624,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -66.96504211425781,
|
|
"rewards/margins": 22.563282012939453,
|
|
"rewards/rejected": -89.5503921508789,
|
|
"rewards/weighted_accuracies": 0.6625000238418579,
|
|
"rewards/weighted_chosen": -0.5841079950332642,
|
|
"rewards/weighted_margins": 0.434326171875,
|
|
"rewards/weighted_rejected": -1.018707275390625,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.23554043444124576,
|
|
"grad_norm": 75.56902313232422,
|
|
"learning_rate": 9.458553466501665e-07,
|
|
"logits/chosen": -0.9330536127090454,
|
|
"logits/rejected": -0.9642333984375,
|
|
"logps/chosen": -352.6187438964844,
|
|
"logps/rejected": -336.0218811035156,
|
|
"logps/weighted_chosen": -3.4129395484924316,
|
|
"logps/weighted_rejected": -3.74462890625,
|
|
"loss": 0.6566,
|
|
"rewards/accuracies": 0.659375011920929,
|
|
"rewards/chosen": -66.412109375,
|
|
"rewards/margins": 25.757617950439453,
|
|
"rewards/rejected": -92.181640625,
|
|
"rewards/weighted_accuracies": 0.6875,
|
|
"rewards/weighted_chosen": -0.689007580280304,
|
|
"rewards/weighted_margins": 0.4539245665073395,
|
|
"rewards/weighted_rejected": -1.143212914466858,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.24077466631771788,
|
|
"grad_norm": 19.516427993774414,
|
|
"learning_rate": 9.416452768711366e-07,
|
|
"logits/chosen": -0.945111095905304,
|
|
"logits/rejected": -0.9787231683731079,
|
|
"logps/chosen": -369.3671875,
|
|
"logps/rejected": -358.9624938964844,
|
|
"logps/weighted_chosen": -3.1959471702575684,
|
|
"logps/weighted_rejected": -3.948193311691284,
|
|
"loss": 0.6392,
|
|
"rewards/accuracies": 0.621874988079071,
|
|
"rewards/chosen": -77.17265319824219,
|
|
"rewards/margins": 21.916015625,
|
|
"rewards/rejected": -99.0796890258789,
|
|
"rewards/weighted_accuracies": 0.668749988079071,
|
|
"rewards/weighted_chosen": -0.67822265625,
|
|
"rewards/weighted_margins": 0.532788097858429,
|
|
"rewards/weighted_rejected": -1.2112305164337158,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.24600889819419,
|
|
"grad_norm": 19.182979583740234,
|
|
"learning_rate": 9.372877011572557e-07,
|
|
"logits/chosen": -0.9224609136581421,
|
|
"logits/rejected": -0.9388214349746704,
|
|
"logps/chosen": -391.6937561035156,
|
|
"logps/rejected": -377.0625,
|
|
"logps/weighted_chosen": -3.224560499191284,
|
|
"logps/weighted_rejected": -3.783252000808716,
|
|
"loss": 0.6162,
|
|
"rewards/accuracies": 0.621874988079071,
|
|
"rewards/chosen": -80.95976257324219,
|
|
"rewards/margins": 21.617578506469727,
|
|
"rewards/rejected": -102.59883117675781,
|
|
"rewards/weighted_accuracies": 0.6781250238418579,
|
|
"rewards/weighted_chosen": -0.684155285358429,
|
|
"rewards/weighted_margins": 0.5555480718612671,
|
|
"rewards/weighted_rejected": -1.23956298828125,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.2512431300706621,
|
|
"grad_norm": 31.75469970703125,
|
|
"learning_rate": 9.327840749034141e-07,
|
|
"logits/chosen": -0.969561755657196,
|
|
"logits/rejected": -0.998791515827179,
|
|
"logps/chosen": -362.1859436035156,
|
|
"logps/rejected": -385.29998779296875,
|
|
"logps/weighted_chosen": -3.0771241188049316,
|
|
"logps/weighted_rejected": -4.388257026672363,
|
|
"loss": 0.6296,
|
|
"rewards/accuracies": 0.6812499761581421,
|
|
"rewards/chosen": -75.1123046875,
|
|
"rewards/margins": 33.66425704956055,
|
|
"rewards/rejected": -108.75,
|
|
"rewards/weighted_accuracies": 0.6781250238418579,
|
|
"rewards/weighted_chosen": -0.606555163860321,
|
|
"rewards/weighted_margins": 0.612231433391571,
|
|
"rewards/weighted_rejected": -1.2182190418243408,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.2564773619471343,
|
|
"grad_norm": 37.024818420410156,
|
|
"learning_rate": 9.281359022841965e-07,
|
|
"logits/chosen": -0.846588134765625,
|
|
"logits/rejected": -0.859790027141571,
|
|
"logps/chosen": -352.46405029296875,
|
|
"logps/rejected": -355.24688720703125,
|
|
"logps/weighted_chosen": -3.219531297683716,
|
|
"logps/weighted_rejected": -4.648681640625,
|
|
"loss": 0.5897,
|
|
"rewards/accuracies": 0.640625,
|
|
"rewards/chosen": -77.3949203491211,
|
|
"rewards/margins": 32.93046951293945,
|
|
"rewards/rejected": -110.32890319824219,
|
|
"rewards/weighted_accuracies": 0.703125,
|
|
"rewards/weighted_chosen": -0.7288268804550171,
|
|
"rewards/weighted_margins": 0.6741577386856079,
|
|
"rewards/weighted_rejected": -1.403161644935608,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.26171159382360637,
|
|
"grad_norm": 17.740766525268555,
|
|
"learning_rate": 9.233447357514989e-07,
|
|
"logits/chosen": -0.8205505609512329,
|
|
"logits/rejected": -0.863543689250946,
|
|
"logps/chosen": -375.52032470703125,
|
|
"logps/rejected": -378.3500061035156,
|
|
"logps/weighted_chosen": -3.53125,
|
|
"logps/weighted_rejected": -4.106689453125,
|
|
"loss": 0.6305,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": -77.591796875,
|
|
"rewards/margins": 30.978906631469727,
|
|
"rewards/rejected": -108.54609680175781,
|
|
"rewards/weighted_accuracies": 0.668749988079071,
|
|
"rewards/weighted_chosen": -0.6407378911972046,
|
|
"rewards/weighted_margins": 0.6662231683731079,
|
|
"rewards/weighted_rejected": -1.30645751953125,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.26171159382360637,
|
|
"eval_logits/chosen": -0.9400458931922913,
|
|
"eval_logits/rejected": -0.955981433391571,
|
|
"eval_logps/chosen": -371.72900390625,
|
|
"eval_logps/rejected": -379.6419982910156,
|
|
"eval_logps/weighted_chosen": -3.214712381362915,
|
|
"eval_logps/weighted_rejected": -4.0158867835998535,
|
|
"eval_loss": 0.6316163539886475,
|
|
"eval_rewards/accuracies": 0.6349999904632568,
|
|
"eval_rewards/chosen": -82.98784637451172,
|
|
"eval_rewards/margins": 28.939437866210938,
|
|
"eval_rewards/rejected": -111.93875122070312,
|
|
"eval_rewards/weighted_accuracies": 0.6725000143051147,
|
|
"eval_rewards/weighted_chosen": -0.6669993996620178,
|
|
"eval_rewards/weighted_margins": 0.5506796836853027,
|
|
"eval_rewards/weighted_rejected": -1.2176789045333862,
|
|
"eval_runtime": 1162.5522,
|
|
"eval_samples_per_second": 1.72,
|
|
"eval_steps_per_second": 0.43,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.2669458257000785,
|
|
"grad_norm": 59.14344787597656,
|
|
"learning_rate": 9.184121755160232e-07,
|
|
"logits/chosen": -0.9093383550643921,
|
|
"logits/rejected": -0.9390915036201477,
|
|
"logps/chosen": -378.3890686035156,
|
|
"logps/rejected": -401.62811279296875,
|
|
"logps/weighted_chosen": -3.4715576171875,
|
|
"logps/weighted_rejected": -4.080712795257568,
|
|
"loss": 0.6505,
|
|
"rewards/accuracies": 0.659375011920929,
|
|
"rewards/chosen": -77.12968444824219,
|
|
"rewards/margins": 35.939842224121094,
|
|
"rewards/rejected": -113.0718765258789,
|
|
"rewards/weighted_accuracies": 0.6968749761581421,
|
|
"rewards/weighted_chosen": -0.6525024175643921,
|
|
"rewards/weighted_margins": 0.52886962890625,
|
|
"rewards/weighted_rejected": -1.1813843250274658,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.2721800575765506,
|
|
"grad_norm": 33.64823913574219,
|
|
"learning_rate": 9.133398690128193e-07,
|
|
"logits/chosen": -0.942626953125,
|
|
"logits/rejected": -0.965716540813446,
|
|
"logps/chosen": -400.1890563964844,
|
|
"logps/rejected": -403.63592529296875,
|
|
"logps/weighted_chosen": -3.2196044921875,
|
|
"logps/weighted_rejected": -4.1656494140625,
|
|
"loss": 0.6062,
|
|
"rewards/accuracies": 0.6625000238418579,
|
|
"rewards/chosen": -78.64530944824219,
|
|
"rewards/margins": 44.30859375,
|
|
"rewards/rejected": -122.9917984008789,
|
|
"rewards/weighted_accuracies": 0.7406250238418579,
|
|
"rewards/weighted_chosen": -0.48695677518844604,
|
|
"rewards/weighted_margins": 0.6421569585800171,
|
|
"rewards/weighted_rejected": -1.129034399986267,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.27741428945302277,
|
|
"grad_norm": 36.97419738769531,
|
|
"learning_rate": 9.081295103510554e-07,
|
|
"logits/chosen": -0.9060531854629517,
|
|
"logits/rejected": -0.954547107219696,
|
|
"logps/chosen": -377.2640686035156,
|
|
"logps/rejected": -402.17498779296875,
|
|
"logps/weighted_chosen": -2.8717284202575684,
|
|
"logps/weighted_rejected": -4.353662014007568,
|
|
"loss": 0.5138,
|
|
"rewards/accuracies": 0.653124988079071,
|
|
"rewards/chosen": -93.2210922241211,
|
|
"rewards/margins": 39.227149963378906,
|
|
"rewards/rejected": -132.44140625,
|
|
"rewards/weighted_accuracies": 0.7250000238418579,
|
|
"rewards/weighted_chosen": -0.5675109624862671,
|
|
"rewards/weighted_margins": 0.93292236328125,
|
|
"rewards/weighted_rejected": -1.501245141029358,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.2826485213294949,
|
|
"grad_norm": 23.371498107910156,
|
|
"learning_rate": 9.027828397481989e-07,
|
|
"logits/chosen": -0.8855453729629517,
|
|
"logits/rejected": -0.911120593547821,
|
|
"logps/chosen": -357.30078125,
|
|
"logps/rejected": -385.8531188964844,
|
|
"logps/weighted_chosen": -3.6192626953125,
|
|
"logps/weighted_rejected": -4.450634956359863,
|
|
"loss": 0.6284,
|
|
"rewards/accuracies": 0.628125011920929,
|
|
"rewards/chosen": -96.7691421508789,
|
|
"rewards/margins": 33.722267150878906,
|
|
"rewards/rejected": -130.5457000732422,
|
|
"rewards/weighted_accuracies": 0.643750011920929,
|
|
"rewards/weighted_chosen": -0.788897693157196,
|
|
"rewards/weighted_margins": 0.5829833745956421,
|
|
"rewards/weighted_rejected": -1.3713562488555908,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.287882753205967,
|
|
"grad_norm": 21.5579833984375,
|
|
"learning_rate": 8.973016429487988e-07,
|
|
"logits/chosen": -0.9223998785018921,
|
|
"logits/rejected": -0.9320526123046875,
|
|
"logps/chosen": -376.43438720703125,
|
|
"logps/rejected": -393.18438720703125,
|
|
"logps/weighted_chosen": -3.503002882003784,
|
|
"logps/weighted_rejected": -3.9817872047424316,
|
|
"loss": 0.6093,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": -94.5347671508789,
|
|
"rewards/margins": 44.30546951293945,
|
|
"rewards/rejected": -138.84414672851562,
|
|
"rewards/weighted_accuracies": 0.6968749761581421,
|
|
"rewards/weighted_chosen": -0.7487853765487671,
|
|
"rewards/weighted_margins": 0.62567138671875,
|
|
"rewards/weighted_rejected": -1.3746826648712158,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.29311698508243916,
|
|
"grad_norm": 55.23554992675781,
|
|
"learning_rate": 8.916877506280601e-07,
|
|
"logits/chosen": -0.969989001750946,
|
|
"logits/rejected": -0.9665802121162415,
|
|
"logps/chosen": -383.078125,
|
|
"logps/rejected": -397.75,
|
|
"logps/weighted_chosen": -3.5821290016174316,
|
|
"logps/weighted_rejected": -4.008593559265137,
|
|
"loss": 0.6282,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": -102.19023132324219,
|
|
"rewards/margins": 41.46562576293945,
|
|
"rewards/rejected": -143.642578125,
|
|
"rewards/weighted_accuracies": 0.6781250238418579,
|
|
"rewards/weighted_chosen": -0.7835754156112671,
|
|
"rewards/weighted_margins": 0.644238293170929,
|
|
"rewards/weighted_rejected": -1.427978515625,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.29835121695891126,
|
|
"grad_norm": 18.939470291137695,
|
|
"learning_rate": 8.85943037780415e-07,
|
|
"logits/chosen": -1.0394058227539062,
|
|
"logits/rejected": -1.042639136314392,
|
|
"logps/chosen": -384.078125,
|
|
"logps/rejected": -366.9984436035156,
|
|
"logps/weighted_chosen": -3.3567872047424316,
|
|
"logps/weighted_rejected": -3.9019775390625,
|
|
"loss": 0.5908,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -97.7894515991211,
|
|
"rewards/margins": 27.4404296875,
|
|
"rewards/rejected": -125.2328109741211,
|
|
"rewards/weighted_accuracies": 0.703125,
|
|
"rewards/weighted_chosen": -0.7109375,
|
|
"rewards/weighted_margins": 0.566577136516571,
|
|
"rewards/weighted_rejected": -1.2770659923553467,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.3035854488353834,
|
|
"grad_norm": 54.97072982788086,
|
|
"learning_rate": 8.800694230932884e-07,
|
|
"logits/chosen": -0.958050549030304,
|
|
"logits/rejected": -0.9729766845703125,
|
|
"logps/chosen": -381.4140625,
|
|
"logps/rejected": -385.4296875,
|
|
"logps/weighted_chosen": -2.9930176734924316,
|
|
"logps/weighted_rejected": -3.6538329124450684,
|
|
"loss": 0.6283,
|
|
"rewards/accuracies": 0.606249988079071,
|
|
"rewards/chosen": -91.8843765258789,
|
|
"rewards/margins": 28.43359375,
|
|
"rewards/rejected": -120.30078125,
|
|
"rewards/weighted_accuracies": 0.659375011920929,
|
|
"rewards/weighted_chosen": -0.5365799069404602,
|
|
"rewards/weighted_margins": 0.4689392149448395,
|
|
"rewards/weighted_rejected": -1.005883812904358,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.30881968071185556,
|
|
"grad_norm": 187.63499450683594,
|
|
"learning_rate": 8.740688683062723e-07,
|
|
"logits/chosen": -1.0116729736328125,
|
|
"logits/rejected": -1.0273834466934204,
|
|
"logps/chosen": -427.6859436035156,
|
|
"logps/rejected": -400.0640563964844,
|
|
"logps/weighted_chosen": -2.997143507003784,
|
|
"logps/weighted_rejected": -3.7496094703674316,
|
|
"loss": 0.6363,
|
|
"rewards/accuracies": 0.578125,
|
|
"rewards/chosen": -98.6539077758789,
|
|
"rewards/margins": 25.619726181030273,
|
|
"rewards/rejected": -124.2671890258789,
|
|
"rewards/weighted_accuracies": 0.675000011920929,
|
|
"rewards/weighted_chosen": -0.539746105670929,
|
|
"rewards/weighted_margins": 0.48472291231155396,
|
|
"rewards/weighted_rejected": -1.023950219154358,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.31405391258832765,
|
|
"grad_norm": 17.906530380249023,
|
|
"learning_rate": 8.679433775559215e-07,
|
|
"logits/chosen": -0.991503894329071,
|
|
"logits/rejected": -1.0299193859100342,
|
|
"logps/chosen": -428.88592529296875,
|
|
"logps/rejected": -425.1625061035156,
|
|
"logps/weighted_chosen": -2.877368211746216,
|
|
"logps/weighted_rejected": -3.985302686691284,
|
|
"loss": 0.5932,
|
|
"rewards/accuracies": 0.628125011920929,
|
|
"rewards/chosen": -104.1353530883789,
|
|
"rewards/margins": 35.59746170043945,
|
|
"rewards/rejected": -139.7765655517578,
|
|
"rewards/weighted_accuracies": 0.659375011920929,
|
|
"rewards/weighted_chosen": -0.6103607416152954,
|
|
"rewards/weighted_margins": 0.5564330816268921,
|
|
"rewards/weighted_rejected": -1.166479468345642,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.3192881444647998,
|
|
"grad_norm": 33.20716094970703,
|
|
"learning_rate": 8.616949967063871e-07,
|
|
"logits/chosen": -0.9755920171737671,
|
|
"logits/rejected": -1.0198791027069092,
|
|
"logps/chosen": -370.45001220703125,
|
|
"logps/rejected": -400.4906311035156,
|
|
"logps/weighted_chosen": -3.3270506858825684,
|
|
"logps/weighted_rejected": -3.9315428733825684,
|
|
"loss": 0.6822,
|
|
"rewards/accuracies": 0.65625,
|
|
"rewards/chosen": -102.12422180175781,
|
|
"rewards/margins": 37.60078048706055,
|
|
"rewards/rejected": -139.7734375,
|
|
"rewards/weighted_accuracies": 0.640625,
|
|
"rewards/weighted_chosen": -0.7267090082168579,
|
|
"rewards/weighted_margins": 0.43719482421875,
|
|
"rewards/weighted_rejected": -1.163793921470642,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.3245223763412719,
|
|
"grad_norm": 13.562949180603027,
|
|
"learning_rate": 8.553258126661154e-07,
|
|
"logits/chosen": -1.00177001953125,
|
|
"logits/rejected": -1.010014295578003,
|
|
"logps/chosen": -390.65234375,
|
|
"logps/rejected": -402.65155029296875,
|
|
"logps/weighted_chosen": -3.4473876953125,
|
|
"logps/weighted_rejected": -4.26806640625,
|
|
"loss": 0.6605,
|
|
"rewards/accuracies": 0.6187499761581421,
|
|
"rewards/chosen": -110.80390930175781,
|
|
"rewards/margins": 35.68242263793945,
|
|
"rewards/rejected": -146.5031280517578,
|
|
"rewards/weighted_accuracies": 0.6937500238418579,
|
|
"rewards/weighted_chosen": -0.7691070437431335,
|
|
"rewards/weighted_margins": 0.534271240234375,
|
|
"rewards/weighted_rejected": -1.3035888671875,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.32975660821774405,
|
|
"grad_norm": 29.253734588623047,
|
|
"learning_rate": 8.488379526908368e-07,
|
|
"logits/chosen": -0.978869616985321,
|
|
"logits/rejected": -0.9867492914199829,
|
|
"logps/chosen": -406.9437561035156,
|
|
"logps/rejected": -431.9906311035156,
|
|
"logps/weighted_chosen": -3.346386671066284,
|
|
"logps/weighted_rejected": -4.080664157867432,
|
|
"loss": 0.5991,
|
|
"rewards/accuracies": 0.65625,
|
|
"rewards/chosen": -116.3648452758789,
|
|
"rewards/margins": 46.68730545043945,
|
|
"rewards/rejected": -163.06405639648438,
|
|
"rewards/weighted_accuracies": 0.706250011920929,
|
|
"rewards/weighted_chosen": -0.757556140422821,
|
|
"rewards/weighted_margins": 0.597582995891571,
|
|
"rewards/weighted_rejected": -1.3551514148712158,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.33499084009421615,
|
|
"grad_norm": 28.59862518310547,
|
|
"learning_rate": 8.422335836730802e-07,
|
|
"logits/chosen": -0.983142077922821,
|
|
"logits/rejected": -0.9791107177734375,
|
|
"logps/chosen": -378.4984436035156,
|
|
"logps/rejected": -432.71563720703125,
|
|
"logps/weighted_chosen": -3.094311475753784,
|
|
"logps/weighted_rejected": -3.8768067359924316,
|
|
"loss": 0.6061,
|
|
"rewards/accuracies": 0.643750011920929,
|
|
"rewards/chosen": -104.8597640991211,
|
|
"rewards/margins": 50.2001953125,
|
|
"rewards/rejected": -155.0695343017578,
|
|
"rewards/weighted_accuracies": 0.699999988079071,
|
|
"rewards/weighted_chosen": -0.6736419796943665,
|
|
"rewards/weighted_margins": 0.5768188238143921,
|
|
"rewards/weighted_rejected": -1.250451683998108,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.3402250719706883,
|
|
"grad_norm": 20.690876007080078,
|
|
"learning_rate": 8.355149114184485e-07,
|
|
"logits/chosen": -1.030615210533142,
|
|
"logits/rejected": -1.0146636962890625,
|
|
"logps/chosen": -416.39373779296875,
|
|
"logps/rejected": -445.0625,
|
|
"logps/weighted_chosen": -3.255859375,
|
|
"logps/weighted_rejected": -3.76806640625,
|
|
"loss": 0.6048,
|
|
"rewards/accuracies": 0.6468750238418579,
|
|
"rewards/chosen": -108.90547180175781,
|
|
"rewards/margins": 53.869140625,
|
|
"rewards/rejected": -162.74258422851562,
|
|
"rewards/weighted_accuracies": 0.6781250238418579,
|
|
"rewards/weighted_chosen": -0.700115978717804,
|
|
"rewards/weighted_margins": 0.5948852300643921,
|
|
"rewards/weighted_rejected": -1.2950623035430908,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.34545930384716045,
|
|
"grad_norm": 33.73557662963867,
|
|
"learning_rate": 8.286841799088963e-07,
|
|
"logits/chosen": -1.0694351196289062,
|
|
"logits/rejected": -1.0623047351837158,
|
|
"logps/chosen": -395.19219970703125,
|
|
"logps/rejected": -412.45001220703125,
|
|
"logps/weighted_chosen": -2.8808836936950684,
|
|
"logps/weighted_rejected": -3.636523485183716,
|
|
"loss": 0.6207,
|
|
"rewards/accuracies": 0.612500011920929,
|
|
"rewards/chosen": -111.27070617675781,
|
|
"rewards/margins": 38.04765701293945,
|
|
"rewards/rejected": -149.3312530517578,
|
|
"rewards/weighted_accuracies": 0.6875,
|
|
"rewards/weighted_chosen": -0.649340808391571,
|
|
"rewards/weighted_margins": 0.5112365484237671,
|
|
"rewards/weighted_rejected": -1.1605103015899658,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.35069353572363254,
|
|
"grad_norm": 18.913761138916016,
|
|
"learning_rate": 8.217436705532599e-07,
|
|
"logits/chosen": -1.0736572742462158,
|
|
"logits/rejected": -1.090576171875,
|
|
"logps/chosen": -431.78125,
|
|
"logps/rejected": -421.4312438964844,
|
|
"logps/weighted_chosen": -3.0259766578674316,
|
|
"logps/weighted_rejected": -3.81298828125,
|
|
"loss": 0.6004,
|
|
"rewards/accuracies": 0.640625,
|
|
"rewards/chosen": -129.1144561767578,
|
|
"rewards/margins": 28.649608612060547,
|
|
"rewards/rejected": -157.74844360351562,
|
|
"rewards/weighted_accuracies": 0.684374988079071,
|
|
"rewards/weighted_chosen": -0.6614929437637329,
|
|
"rewards/weighted_margins": 0.636364758014679,
|
|
"rewards/weighted_rejected": -1.2976195812225342,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.3559277676001047,
|
|
"grad_norm": 22.967056274414062,
|
|
"learning_rate": 8.14695701425284e-07,
|
|
"logits/chosen": -1.051629662513733,
|
|
"logits/rejected": -1.0860717296600342,
|
|
"logps/chosen": -432.98126220703125,
|
|
"logps/rejected": -426.5953063964844,
|
|
"logps/weighted_chosen": -3.1195311546325684,
|
|
"logps/weighted_rejected": -3.96337890625,
|
|
"loss": 0.5827,
|
|
"rewards/accuracies": 0.581250011920929,
|
|
"rewards/chosen": -127.5308609008789,
|
|
"rewards/margins": 36.908592224121094,
|
|
"rewards/rejected": -164.46484375,
|
|
"rewards/weighted_accuracies": 0.6875,
|
|
"rewards/weighted_chosen": -0.686236560344696,
|
|
"rewards/weighted_margins": 0.6052306890487671,
|
|
"rewards/weighted_rejected": -1.2908813953399658,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.3611619994765768,
|
|
"grad_norm": 15.409049034118652,
|
|
"learning_rate": 8.075426264894046e-07,
|
|
"logits/chosen": -1.006170630455017,
|
|
"logits/rejected": -1.0271179676055908,
|
|
"logps/chosen": -434.3828125,
|
|
"logps/rejected": -452.90625,
|
|
"logps/weighted_chosen": -3.187304735183716,
|
|
"logps/weighted_rejected": -4.466699123382568,
|
|
"loss": 0.5501,
|
|
"rewards/accuracies": 0.675000011920929,
|
|
"rewards/chosen": -128.9619140625,
|
|
"rewards/margins": 46.51679611206055,
|
|
"rewards/rejected": -175.45468139648438,
|
|
"rewards/weighted_accuracies": 0.737500011920929,
|
|
"rewards/weighted_chosen": -0.6887573003768921,
|
|
"rewards/weighted_margins": 0.76141357421875,
|
|
"rewards/weighted_rejected": -1.4498474597930908,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.36639623135304894,
|
|
"grad_norm": 23.13039207458496,
|
|
"learning_rate": 8.002868348145435e-07,
|
|
"logits/chosen": -0.9920509457588196,
|
|
"logits/rejected": -0.996777355670929,
|
|
"logps/chosen": -439.54998779296875,
|
|
"logps/rejected": -444.30780029296875,
|
|
"logps/weighted_chosen": -3.31005859375,
|
|
"logps/weighted_rejected": -3.691967725753784,
|
|
"loss": 0.6102,
|
|
"rewards/accuracies": 0.628125011920929,
|
|
"rewards/chosen": -139.38632202148438,
|
|
"rewards/margins": 36.769142150878906,
|
|
"rewards/rejected": -176.1867218017578,
|
|
"rewards/weighted_accuracies": 0.7093750238418579,
|
|
"rewards/weighted_chosen": -0.8051727414131165,
|
|
"rewards/weighted_margins": 0.5639587640762329,
|
|
"rewards/weighted_rejected": -1.3695800304412842,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.3716304632295211,
|
|
"grad_norm": 38.38508224487305,
|
|
"learning_rate": 7.92930749776179e-07,
|
|
"logits/chosen": -1.0172607898712158,
|
|
"logits/rejected": -1.042083740234375,
|
|
"logps/chosen": -422.10546875,
|
|
"logps/rejected": -444.6812438964844,
|
|
"logps/weighted_chosen": -3.483569383621216,
|
|
"logps/weighted_rejected": -4.216650485992432,
|
|
"loss": 0.6316,
|
|
"rewards/accuracies": 0.621874988079071,
|
|
"rewards/chosen": -150.42578125,
|
|
"rewards/margins": 33.904685974121094,
|
|
"rewards/rejected": -184.33438110351562,
|
|
"rewards/weighted_accuracies": 0.659375011920929,
|
|
"rewards/weighted_chosen": -0.72515869140625,
|
|
"rewards/weighted_margins": 0.590161144733429,
|
|
"rewards/weighted_rejected": -1.3153502941131592,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.3768646951059932,
|
|
"grad_norm": 20.4711971282959,
|
|
"learning_rate": 7.854768282469582e-07,
|
|
"logits/chosen": -1.0770995616912842,
|
|
"logits/rejected": -1.114935278892517,
|
|
"logps/chosen": -401.88592529296875,
|
|
"logps/rejected": -458.3374938964844,
|
|
"logps/weighted_chosen": -3.099194288253784,
|
|
"logps/weighted_rejected": -3.990966796875,
|
|
"loss": 0.5978,
|
|
"rewards/accuracies": 0.653124988079071,
|
|
"rewards/chosen": -124.43046569824219,
|
|
"rewards/margins": 56.217384338378906,
|
|
"rewards/rejected": -180.6015625,
|
|
"rewards/weighted_accuracies": 0.7093750238418579,
|
|
"rewards/weighted_chosen": -0.6303802728652954,
|
|
"rewards/weighted_margins": 0.679125964641571,
|
|
"rewards/weighted_rejected": -1.3104156255722046,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.38209892698246534,
|
|
"grad_norm": 15.2982177734375,
|
|
"learning_rate": 7.779275597761215e-07,
|
|
"logits/chosen": -1.0406615734100342,
|
|
"logits/rejected": -1.097131371498108,
|
|
"logps/chosen": -414.5625,
|
|
"logps/rejected": -462.5453186035156,
|
|
"logps/weighted_chosen": -3.3689942359924316,
|
|
"logps/weighted_rejected": -4.090185642242432,
|
|
"loss": 0.5689,
|
|
"rewards/accuracies": 0.6781250238418579,
|
|
"rewards/chosen": -138.33358764648438,
|
|
"rewards/margins": 55.842185974121094,
|
|
"rewards/rejected": -194.16171264648438,
|
|
"rewards/weighted_accuracies": 0.734375,
|
|
"rewards/weighted_chosen": -0.6730102300643921,
|
|
"rewards/weighted_margins": 0.699902355670929,
|
|
"rewards/weighted_rejected": -1.373620629310608,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.38733315885893743,
|
|
"grad_norm": 37.01581954956055,
|
|
"learning_rate": 7.702854657580126e-07,
|
|
"logits/chosen": -1.1022522449493408,
|
|
"logits/rejected": -1.1134154796600342,
|
|
"logps/chosen": -459.6156311035156,
|
|
"logps/rejected": -462.4125061035156,
|
|
"logps/weighted_chosen": -3.382946729660034,
|
|
"logps/weighted_rejected": -4.401709079742432,
|
|
"loss": 0.6148,
|
|
"rewards/accuracies": 0.653124988079071,
|
|
"rewards/chosen": -164.8722686767578,
|
|
"rewards/margins": 43.28515625,
|
|
"rewards/rejected": -208.21133422851562,
|
|
"rewards/weighted_accuracies": 0.668749988079071,
|
|
"rewards/weighted_chosen": -0.8088958859443665,
|
|
"rewards/weighted_margins": 0.698986828327179,
|
|
"rewards/weighted_rejected": -1.5074951648712158,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.3925673907354096,
|
|
"grad_norm": 26.924480438232422,
|
|
"learning_rate": 7.625530985899547e-07,
|
|
"logits/chosen": -1.0611861944198608,
|
|
"logits/rejected": -1.075714111328125,
|
|
"logps/chosen": -430.6312561035156,
|
|
"logps/rejected": -461.9828186035156,
|
|
"logps/weighted_chosen": -3.210217237472534,
|
|
"logps/weighted_rejected": -4.483691215515137,
|
|
"loss": 0.5815,
|
|
"rewards/accuracies": 0.5874999761581421,
|
|
"rewards/chosen": -163.78555297851562,
|
|
"rewards/margins": 53.288673400878906,
|
|
"rewards/rejected": -216.96054077148438,
|
|
"rewards/weighted_accuracies": 0.675000011920929,
|
|
"rewards/weighted_chosen": -0.876666247844696,
|
|
"rewards/weighted_margins": 0.7388671636581421,
|
|
"rewards/weighted_rejected": -1.6160767078399658,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.39780162261188173,
|
|
"grad_norm": 18.056201934814453,
|
|
"learning_rate": 7.547330408197694e-07,
|
|
"logits/chosen": -1.0437713861465454,
|
|
"logits/rejected": -1.075250267982483,
|
|
"logps/chosen": -460.1875,
|
|
"logps/rejected": -450.8109436035156,
|
|
"logps/weighted_chosen": -3.229296922683716,
|
|
"logps/weighted_rejected": -4.191064357757568,
|
|
"loss": 0.6146,
|
|
"rewards/accuracies": 0.6000000238418579,
|
|
"rewards/chosen": -164.56640625,
|
|
"rewards/margins": 29.973241806030273,
|
|
"rewards/rejected": -194.6171875,
|
|
"rewards/weighted_accuracies": 0.668749988079071,
|
|
"rewards/weighted_chosen": -0.7951415777206421,
|
|
"rewards/weighted_margins": 0.599578857421875,
|
|
"rewards/weighted_rejected": -1.395105004310608,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.40303585448835383,
|
|
"grad_norm": 16.393836975097656,
|
|
"learning_rate": 7.468279042832271e-07,
|
|
"logits/chosen": -1.0454833507537842,
|
|
"logits/rejected": -1.0705687999725342,
|
|
"logps/chosen": -416.57421875,
|
|
"logps/rejected": -483.1390686035156,
|
|
"logps/weighted_chosen": -3.134448289871216,
|
|
"logps/weighted_rejected": -3.8697752952575684,
|
|
"loss": 0.6132,
|
|
"rewards/accuracies": 0.6468750238418579,
|
|
"rewards/chosen": -136.8953094482422,
|
|
"rewards/margins": 56.886329650878906,
|
|
"rewards/rejected": -193.72305297851562,
|
|
"rewards/weighted_accuracies": 0.6625000238418579,
|
|
"rewards/weighted_chosen": -0.7508605718612671,
|
|
"rewards/weighted_margins": 0.5269104242324829,
|
|
"rewards/weighted_rejected": -1.277490258216858,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.408270086364826,
|
|
"grad_norm": 16.254776000976562,
|
|
"learning_rate": 7.388403292317154e-07,
|
|
"logits/chosen": -1.0213134288787842,
|
|
"logits/rejected": -1.072851538658142,
|
|
"logps/chosen": -445.00469970703125,
|
|
"logps/rejected": -451.1468811035156,
|
|
"logps/weighted_chosen": -3.063525438308716,
|
|
"logps/weighted_rejected": -3.897021532058716,
|
|
"loss": 0.6134,
|
|
"rewards/accuracies": 0.609375,
|
|
"rewards/chosen": -144.0207061767578,
|
|
"rewards/margins": 41.165626525878906,
|
|
"rewards/rejected": -185.15585327148438,
|
|
"rewards/weighted_accuracies": 0.71875,
|
|
"rewards/weighted_chosen": -0.752685546875,
|
|
"rewards/weighted_margins": 0.5895751714706421,
|
|
"rewards/weighted_rejected": -1.341894507408142,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.4135043182412981,
|
|
"grad_norm": 19.472450256347656,
|
|
"learning_rate": 7.307729834504154e-07,
|
|
"logits/chosen": -1.052435278892517,
|
|
"logits/rejected": -1.10076904296875,
|
|
"logps/chosen": -457.2640686035156,
|
|
"logps/rejected": -488.16876220703125,
|
|
"logps/weighted_chosen": -3.143115282058716,
|
|
"logps/weighted_rejected": -4.254638671875,
|
|
"loss": 0.6137,
|
|
"rewards/accuracies": 0.5843750238418579,
|
|
"rewards/chosen": -175.60116577148438,
|
|
"rewards/margins": 46.61640548706055,
|
|
"rewards/rejected": -222.21328735351562,
|
|
"rewards/weighted_accuracies": 0.6812499761581421,
|
|
"rewards/weighted_chosen": -0.878021240234375,
|
|
"rewards/weighted_margins": 0.650500476360321,
|
|
"rewards/weighted_rejected": -1.5286986827850342,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.4187385501177702,
|
|
"grad_norm": 24.911523818969727,
|
|
"learning_rate": 7.226285613672847e-07,
|
|
"logits/chosen": -1.0021483898162842,
|
|
"logits/rejected": -1.031951904296875,
|
|
"logps/chosen": -466.4765625,
|
|
"logps/rejected": -547.578125,
|
|
"logps/weighted_chosen": -3.3023438453674316,
|
|
"logps/weighted_rejected": -4.483691215515137,
|
|
"loss": 0.6142,
|
|
"rewards/accuracies": 0.640625,
|
|
"rewards/chosen": -189.1164093017578,
|
|
"rewards/margins": 79.8050765991211,
|
|
"rewards/rejected": -268.85467529296875,
|
|
"rewards/weighted_accuracies": 0.706250011920929,
|
|
"rewards/weighted_chosen": -0.978710949420929,
|
|
"rewards/weighted_margins": 0.6761840581893921,
|
|
"rewards/weighted_rejected": -1.6549804210662842,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.4239727819942423,
|
|
"grad_norm": 16.060869216918945,
|
|
"learning_rate": 7.144097831531398e-07,
|
|
"logits/chosen": -0.978619396686554,
|
|
"logits/rejected": -1.0038635730743408,
|
|
"logps/chosen": -456.95623779296875,
|
|
"logps/rejected": -505.2093811035156,
|
|
"logps/weighted_chosen": -3.171826124191284,
|
|
"logps/weighted_rejected": -4.205761909484863,
|
|
"loss": 0.5645,
|
|
"rewards/accuracies": 0.628125011920929,
|
|
"rewards/chosen": -186.97421264648438,
|
|
"rewards/margins": 54.0078125,
|
|
"rewards/rejected": -240.85311889648438,
|
|
"rewards/weighted_accuracies": 0.71875,
|
|
"rewards/weighted_chosen": -0.788342297077179,
|
|
"rewards/weighted_margins": 0.7865844964981079,
|
|
"rewards/weighted_rejected": -1.574896216392517,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.42920701387071447,
|
|
"grad_norm": 23.292619705200195,
|
|
"learning_rate": 7.061193938131396e-07,
|
|
"logits/chosen": -0.9266418218612671,
|
|
"logits/rejected": -0.9759277105331421,
|
|
"logps/chosen": -492.421875,
|
|
"logps/rejected": -497.2515563964844,
|
|
"logps/weighted_chosen": -3.4576172828674316,
|
|
"logps/weighted_rejected": -4.116650581359863,
|
|
"loss": 0.5666,
|
|
"rewards/accuracies": 0.578125,
|
|
"rewards/chosen": -192.64022827148438,
|
|
"rewards/margins": 39.222267150878906,
|
|
"rewards/rejected": -231.8562469482422,
|
|
"rewards/weighted_accuracies": 0.6937500238418579,
|
|
"rewards/weighted_chosen": -0.797924816608429,
|
|
"rewards/weighted_margins": 0.775280773639679,
|
|
"rewards/weighted_rejected": -1.573089599609375,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.4344412457471866,
|
|
"grad_norm": 23.763275146484375,
|
|
"learning_rate": 6.977601622699789e-07,
|
|
"logits/chosen": -0.9908691644668579,
|
|
"logits/rejected": -1.057653784751892,
|
|
"logps/chosen": -459.80157470703125,
|
|
"logps/rejected": -544.8687744140625,
|
|
"logps/weighted_chosen": -3.350512742996216,
|
|
"logps/weighted_rejected": -4.385839939117432,
|
|
"loss": 0.5077,
|
|
"rewards/accuracies": 0.6781250238418579,
|
|
"rewards/chosen": -175.025390625,
|
|
"rewards/margins": 90.8042984008789,
|
|
"rewards/rejected": -265.86798095703125,
|
|
"rewards/weighted_accuracies": 0.7281249761581421,
|
|
"rewards/weighted_chosen": -0.699688732624054,
|
|
"rewards/weighted_margins": 0.9197998046875,
|
|
"rewards/weighted_rejected": -1.61993408203125,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.4396754776236587,
|
|
"grad_norm": 29.07372283935547,
|
|
"learning_rate": 6.893348804390882e-07,
|
|
"logits/chosen": -1.094964623451233,
|
|
"logits/rejected": -1.1045074462890625,
|
|
"logps/chosen": -521.2327880859375,
|
|
"logps/rejected": -545.9468994140625,
|
|
"logps/weighted_chosen": -3.6615967750549316,
|
|
"logps/weighted_rejected": -4.320361137390137,
|
|
"loss": 0.5747,
|
|
"rewards/accuracies": 0.5843750238418579,
|
|
"rewards/chosen": -225.86563110351562,
|
|
"rewards/margins": 60.388671875,
|
|
"rewards/rejected": -286.2398376464844,
|
|
"rewards/weighted_accuracies": 0.715624988079071,
|
|
"rewards/weighted_chosen": -0.912017822265625,
|
|
"rewards/weighted_margins": 0.748242199420929,
|
|
"rewards/weighted_rejected": -1.660058617591858,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.44490970950013087,
|
|
"grad_norm": 23.878381729125977,
|
|
"learning_rate": 6.808463622961578e-07,
|
|
"logits/chosen": -1.0891234874725342,
|
|
"logits/rejected": -1.1309936046600342,
|
|
"logps/chosen": -545.3046875,
|
|
"logps/rejected": -615.7484130859375,
|
|
"logps/weighted_chosen": -3.533984422683716,
|
|
"logps/weighted_rejected": -4.504004001617432,
|
|
"loss": 0.5472,
|
|
"rewards/accuracies": 0.6000000238418579,
|
|
"rewards/chosen": -252.8015594482422,
|
|
"rewards/margins": 81.0531234741211,
|
|
"rewards/rejected": -333.59295654296875,
|
|
"rewards/weighted_accuracies": 0.715624988079071,
|
|
"rewards/weighted_chosen": -0.967456042766571,
|
|
"rewards/weighted_margins": 0.86865234375,
|
|
"rewards/weighted_rejected": -1.835351586341858,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.45014394137660296,
|
|
"grad_norm": 101.535888671875,
|
|
"learning_rate": 6.722974429372925e-07,
|
|
"logits/chosen": -1.0688354969024658,
|
|
"logits/rejected": -1.1046874523162842,
|
|
"logps/chosen": -578.9781494140625,
|
|
"logps/rejected": -604.3499755859375,
|
|
"logps/weighted_chosen": -3.37939453125,
|
|
"logps/weighted_rejected": -5.040380954742432,
|
|
"loss": 0.5051,
|
|
"rewards/accuracies": 0.606249988079071,
|
|
"rewards/chosen": -275.2124938964844,
|
|
"rewards/margins": 66.9710922241211,
|
|
"rewards/rejected": -342.20001220703125,
|
|
"rewards/weighted_accuracies": 0.7749999761581421,
|
|
"rewards/weighted_chosen": -1.086279273033142,
|
|
"rewards/weighted_margins": 1.05950927734375,
|
|
"rewards/weighted_rejected": -2.1461181640625,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.4553781732530751,
|
|
"grad_norm": 66.56680297851562,
|
|
"learning_rate": 6.636909776321128e-07,
|
|
"logits/chosen": -1.1214478015899658,
|
|
"logits/rejected": -1.11016845703125,
|
|
"logps/chosen": -493.3843688964844,
|
|
"logps/rejected": -590.8125,
|
|
"logps/weighted_chosen": -3.670654296875,
|
|
"logps/weighted_rejected": -4.723730564117432,
|
|
"loss": 0.5107,
|
|
"rewards/accuracies": 0.612500011920929,
|
|
"rewards/chosen": -231.6789093017578,
|
|
"rewards/margins": 94.42109680175781,
|
|
"rewards/rejected": -326.2250061035156,
|
|
"rewards/weighted_accuracies": 0.746874988079071,
|
|
"rewards/weighted_chosen": -1.0464904308319092,
|
|
"rewards/weighted_margins": 0.9440551996231079,
|
|
"rewards/weighted_rejected": -1.99102783203125,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.46061240512954726,
|
|
"grad_norm": 23.494997024536133,
|
|
"learning_rate": 6.550298408701174e-07,
|
|
"logits/chosen": -1.094885230064392,
|
|
"logits/rejected": -1.1415894031524658,
|
|
"logps/chosen": -534.1663818359375,
|
|
"logps/rejected": -621.9547119140625,
|
|
"logps/weighted_chosen": -3.794140577316284,
|
|
"logps/weighted_rejected": -5.148291110992432,
|
|
"loss": 0.5174,
|
|
"rewards/accuracies": 0.637499988079071,
|
|
"rewards/chosen": -247.2595672607422,
|
|
"rewards/margins": 92.109375,
|
|
"rewards/rejected": -339.3968811035156,
|
|
"rewards/weighted_accuracies": 0.753125011920929,
|
|
"rewards/weighted_chosen": -0.9449707269668579,
|
|
"rewards/weighted_margins": 0.938586413860321,
|
|
"rewards/weighted_rejected": -1.8829224109649658,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.46584663700601936,
|
|
"grad_norm": 27.359371185302734,
|
|
"learning_rate": 6.463169254006276e-07,
|
|
"logits/chosen": -1.1160705089569092,
|
|
"logits/rejected": -1.157629370689392,
|
|
"logps/chosen": -538.1109619140625,
|
|
"logps/rejected": -562.7062377929688,
|
|
"logps/weighted_chosen": -3.73779296875,
|
|
"logps/weighted_rejected": -4.817724704742432,
|
|
"loss": 0.5154,
|
|
"rewards/accuracies": 0.628125011920929,
|
|
"rewards/chosen": -256.6499938964844,
|
|
"rewards/margins": 59.66523361206055,
|
|
"rewards/rejected": -316.37225341796875,
|
|
"rewards/weighted_accuracies": 0.71875,
|
|
"rewards/weighted_chosen": -1.019891381263733,
|
|
"rewards/weighted_margins": 1.008874535560608,
|
|
"rewards/weighted_rejected": -2.028430223464966,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.4710808688824915,
|
|
"grad_norm": 20.988025665283203,
|
|
"learning_rate": 6.375551412666326e-07,
|
|
"logits/chosen": -1.0879943370819092,
|
|
"logits/rejected": -1.1199951171875,
|
|
"logps/chosen": -514.9187622070312,
|
|
"logps/rejected": -545.7906494140625,
|
|
"logps/weighted_chosen": -3.4129395484924316,
|
|
"logps/weighted_rejected": -4.592138767242432,
|
|
"loss": 0.6113,
|
|
"rewards/accuracies": 0.5843750238418579,
|
|
"rewards/chosen": -240.97891235351562,
|
|
"rewards/margins": 48.454689025878906,
|
|
"rewards/rejected": -289.4117126464844,
|
|
"rewards/weighted_accuracies": 0.703125,
|
|
"rewards/weighted_chosen": -1.1000854969024658,
|
|
"rewards/weighted_margins": 0.7867187261581421,
|
|
"rewards/weighted_rejected": -1.887121558189392,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.4763151007589636,
|
|
"grad_norm": 29.907148361206055,
|
|
"learning_rate": 6.287474148328583e-07,
|
|
"logits/chosen": -1.0193603038787842,
|
|
"logits/rejected": -0.9993133544921875,
|
|
"logps/chosen": -474.3374938964844,
|
|
"logps/rejected": -501.23748779296875,
|
|
"logps/weighted_chosen": -3.7416014671325684,
|
|
"logps/weighted_rejected": -5.238329887390137,
|
|
"loss": 0.5727,
|
|
"rewards/accuracies": 0.612500011920929,
|
|
"rewards/chosen": -203.36563110351562,
|
|
"rewards/margins": 50.95781326293945,
|
|
"rewards/rejected": -254.2734375,
|
|
"rewards/weighted_accuracies": 0.6968749761581421,
|
|
"rewards/weighted_chosen": -1.043182373046875,
|
|
"rewards/weighted_margins": 0.8107665777206421,
|
|
"rewards/weighted_rejected": -1.8539307117462158,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.48154933263543576,
|
|
"grad_norm": 36.32797622680664,
|
|
"learning_rate": 6.198966878083857e-07,
|
|
"logits/chosen": -1.0350799560546875,
|
|
"logits/rejected": -1.0523681640625,
|
|
"logps/chosen": -488.9765625,
|
|
"logps/rejected": -553.484375,
|
|
"logps/weighted_chosen": -3.7232666015625,
|
|
"logps/weighted_rejected": -4.598974704742432,
|
|
"loss": 0.5581,
|
|
"rewards/accuracies": 0.6312500238418579,
|
|
"rewards/chosen": -217.74844360351562,
|
|
"rewards/margins": 68.5667953491211,
|
|
"rewards/rejected": -286.3515625,
|
|
"rewards/weighted_accuracies": 0.734375,
|
|
"rewards/weighted_chosen": -1.01214599609375,
|
|
"rewards/weighted_margins": 0.775787353515625,
|
|
"rewards/weighted_rejected": -1.787988305091858,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.48678356451190785,
|
|
"grad_norm": 40.16273880004883,
|
|
"learning_rate": 6.110059162641439e-07,
|
|
"logits/chosen": -1.0597412586212158,
|
|
"logits/rejected": -1.0781066417694092,
|
|
"logps/chosen": -513.3226318359375,
|
|
"logps/rejected": -559.4593505859375,
|
|
"logps/weighted_chosen": -3.147705078125,
|
|
"logps/weighted_rejected": -4.214404106140137,
|
|
"loss": 0.5481,
|
|
"rewards/accuracies": 0.6312500238418579,
|
|
"rewards/chosen": -231.1085968017578,
|
|
"rewards/margins": 61.108985900878906,
|
|
"rewards/rejected": -292.21954345703125,
|
|
"rewards/weighted_accuracies": 0.721875011920929,
|
|
"rewards/weighted_chosen": -0.8689819574356079,
|
|
"rewards/weighted_margins": 0.7522827386856079,
|
|
"rewards/weighted_rejected": -1.6212646961212158,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.49201779638838,
|
|
"grad_norm": 29.65454864501953,
|
|
"learning_rate": 6.020780696456059e-07,
|
|
"logits/chosen": -1.072198510169983,
|
|
"logits/rejected": -1.104650855064392,
|
|
"logps/chosen": -511.18280029296875,
|
|
"logps/rejected": -601.0250244140625,
|
|
"logps/weighted_chosen": -3.1954102516174316,
|
|
"logps/weighted_rejected": -4.517724514007568,
|
|
"loss": 0.5407,
|
|
"rewards/accuracies": 0.643750011920929,
|
|
"rewards/chosen": -240.52108764648438,
|
|
"rewards/margins": 93.419921875,
|
|
"rewards/rejected": -333.953125,
|
|
"rewards/weighted_accuracies": 0.731249988079071,
|
|
"rewards/weighted_chosen": -1.0927855968475342,
|
|
"rewards/weighted_margins": 0.8496459722518921,
|
|
"rewards/weighted_rejected": -1.942968726158142,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.49725202826485215,
|
|
"grad_norm": 210.50332641601562,
|
|
"learning_rate": 5.931161297810185e-07,
|
|
"logits/chosen": -1.132635474205017,
|
|
"logits/rejected": -1.1451904773712158,
|
|
"logps/chosen": -574.6031494140625,
|
|
"logps/rejected": -629.4656372070312,
|
|
"logps/weighted_chosen": -4.126172065734863,
|
|
"logps/weighted_rejected": -5.016747951507568,
|
|
"loss": 0.5998,
|
|
"rewards/accuracies": 0.574999988079071,
|
|
"rewards/chosen": -301.52264404296875,
|
|
"rewards/margins": 66.5218734741211,
|
|
"rewards/rejected": -368.080078125,
|
|
"rewards/weighted_accuracies": 0.6656249761581421,
|
|
"rewards/weighted_chosen": -1.307519555091858,
|
|
"rewards/weighted_margins": 0.74249267578125,
|
|
"rewards/weighted_rejected": -2.0491180419921875,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.5024862601413242,
|
|
"grad_norm": 113.20726013183594,
|
|
"learning_rate": 5.841230898854959e-07,
|
|
"logits/chosen": -1.070550560951233,
|
|
"logits/rejected": -1.0872802734375,
|
|
"logps/chosen": -652.0281372070312,
|
|
"logps/rejected": -711.1765747070312,
|
|
"logps/weighted_chosen": -3.9981932640075684,
|
|
"logps/weighted_rejected": -5.147070407867432,
|
|
"loss": 0.6329,
|
|
"rewards/accuracies": 0.612500011920929,
|
|
"rewards/chosen": -351.828125,
|
|
"rewards/margins": 97.4195327758789,
|
|
"rewards/rejected": -449.2261657714844,
|
|
"rewards/weighted_accuracies": 0.684374988079071,
|
|
"rewards/weighted_chosen": -1.564361572265625,
|
|
"rewards/weighted_margins": 0.872241199016571,
|
|
"rewards/weighted_rejected": -2.436718702316284,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.5077204920177963,
|
|
"grad_norm": 22.200820922851562,
|
|
"learning_rate": 5.751019535613102e-07,
|
|
"logits/chosen": -0.987274169921875,
|
|
"logits/rejected": -1.0052611827850342,
|
|
"logps/chosen": -531.3624877929688,
|
|
"logps/rejected": -610.1218872070312,
|
|
"logps/weighted_chosen": -3.8460450172424316,
|
|
"logps/weighted_rejected": -5.256982326507568,
|
|
"loss": 0.5675,
|
|
"rewards/accuracies": 0.65625,
|
|
"rewards/chosen": -271.6910095214844,
|
|
"rewards/margins": 91.53633117675781,
|
|
"rewards/rejected": -363.22344970703125,
|
|
"rewards/weighted_accuracies": 0.690625011920929,
|
|
"rewards/weighted_chosen": -1.357843041419983,
|
|
"rewards/weighted_margins": 1.01031494140625,
|
|
"rewards/weighted_rejected": -2.367443799972534,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 0.5129547238942685,
|
|
"grad_norm": 17.362323760986328,
|
|
"learning_rate": 5.660557337947117e-07,
|
|
"logits/chosen": -0.9707549810409546,
|
|
"logits/rejected": -0.983325183391571,
|
|
"logps/chosen": -549.1953125,
|
|
"logps/rejected": -586.7874755859375,
|
|
"logps/weighted_chosen": -3.279223680496216,
|
|
"logps/weighted_rejected": -4.507519721984863,
|
|
"loss": 0.5466,
|
|
"rewards/accuracies": 0.6156250238418579,
|
|
"rewards/chosen": -254.6687469482422,
|
|
"rewards/margins": 74.56758117675781,
|
|
"rewards/rejected": -329.2632751464844,
|
|
"rewards/weighted_accuracies": 0.699999988079071,
|
|
"rewards/weighted_chosen": -1.0866820812225342,
|
|
"rewards/weighted_margins": 0.8655151128768921,
|
|
"rewards/weighted_rejected": -1.9528076648712158,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.5181889557707406,
|
|
"grad_norm": 39.01738739013672,
|
|
"learning_rate": 5.569874519496174e-07,
|
|
"logits/chosen": -0.963134765625,
|
|
"logits/rejected": -1.01763916015625,
|
|
"logps/chosen": -488.7406311035156,
|
|
"logps/rejected": -554.3687744140625,
|
|
"logps/weighted_chosen": -3.665576219558716,
|
|
"logps/weighted_rejected": -4.876318454742432,
|
|
"loss": 0.5929,
|
|
"rewards/accuracies": 0.590624988079071,
|
|
"rewards/chosen": -213.4460906982422,
|
|
"rewards/margins": 73.8890609741211,
|
|
"rewards/rejected": -287.3500061035156,
|
|
"rewards/weighted_accuracies": 0.690625011920929,
|
|
"rewards/weighted_chosen": -0.959338366985321,
|
|
"rewards/weighted_margins": 0.7813965082168579,
|
|
"rewards/weighted_rejected": -1.740045189857483,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 0.5234231876472127,
|
|
"grad_norm": 33.2608642578125,
|
|
"learning_rate": 5.47900136758499e-07,
|
|
"logits/chosen": -0.9298340082168579,
|
|
"logits/rejected": -0.989898681640625,
|
|
"logps/chosen": -527.16015625,
|
|
"logps/rejected": -566.2453002929688,
|
|
"logps/weighted_chosen": -3.71044921875,
|
|
"logps/weighted_rejected": -4.887304782867432,
|
|
"loss": 0.5395,
|
|
"rewards/accuracies": 0.590624988079071,
|
|
"rewards/chosen": -259.56170654296875,
|
|
"rewards/margins": 64.66015625,
|
|
"rewards/rejected": -324.302734375,
|
|
"rewards/weighted_accuracies": 0.7093750238418579,
|
|
"rewards/weighted_chosen": -1.119836449623108,
|
|
"rewards/weighted_margins": 0.944445788860321,
|
|
"rewards/weighted_rejected": -2.063854932785034,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.5234231876472127,
|
|
"eval_logits/chosen": -1.0472733974456787,
|
|
"eval_logits/rejected": -1.0595996379852295,
|
|
"eval_logps/chosen": -573.6119995117188,
|
|
"eval_logps/rejected": -629.1840209960938,
|
|
"eval_logps/weighted_chosen": -3.815713405609131,
|
|
"eval_logps/weighted_rejected": -4.930161476135254,
|
|
"eval_loss": 0.5728335976600647,
|
|
"eval_rewards/accuracies": 0.5989999771118164,
|
|
"eval_rewards/chosen": -284.8971252441406,
|
|
"eval_rewards/margins": 76.59700012207031,
|
|
"eval_rewards/rejected": -361.5224914550781,
|
|
"eval_rewards/weighted_accuracies": 0.7070000171661377,
|
|
"eval_rewards/weighted_chosen": -1.2679998874664307,
|
|
"eval_rewards/weighted_margins": 0.8639541268348694,
|
|
"eval_rewards/weighted_rejected": -2.1319541931152344,
|
|
"eval_runtime": 1366.4223,
|
|
"eval_samples_per_second": 1.464,
|
|
"eval_steps_per_second": 0.366,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.528657419523685,
|
|
"grad_norm": 30.763290405273438,
|
|
"learning_rate": 5.387968233108113e-07,
|
|
"logits/chosen": -0.9412124752998352,
|
|
"logits/rejected": -0.9331512451171875,
|
|
"logps/chosen": -583.2468872070312,
|
|
"logps/rejected": -633.9749755859375,
|
|
"logps/weighted_chosen": -4.168408393859863,
|
|
"logps/weighted_rejected": -5.263281345367432,
|
|
"loss": 0.5622,
|
|
"rewards/accuracies": 0.574999988079071,
|
|
"rewards/chosen": -298.86407470703125,
|
|
"rewards/margins": 80.30000305175781,
|
|
"rewards/rejected": -379.27032470703125,
|
|
"rewards/weighted_accuracies": 0.734375,
|
|
"rewards/weighted_chosen": -1.356286644935608,
|
|
"rewards/weighted_margins": 0.906384289264679,
|
|
"rewards/weighted_rejected": -2.262927293777466,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 0.533891651400157,
|
|
"grad_norm": 18.650068283081055,
|
|
"learning_rate": 5.296805520392962e-07,
|
|
"logits/chosen": -1.010156273841858,
|
|
"logits/rejected": -1.0419880151748657,
|
|
"logps/chosen": -606.2078247070312,
|
|
"logps/rejected": -638.6656494140625,
|
|
"logps/weighted_chosen": -3.4315428733825684,
|
|
"logps/weighted_rejected": -4.852490425109863,
|
|
"loss": 0.6285,
|
|
"rewards/accuracies": 0.581250011920929,
|
|
"rewards/chosen": -293.66796875,
|
|
"rewards/margins": 62.296485900878906,
|
|
"rewards/rejected": -355.8941345214844,
|
|
"rewards/weighted_accuracies": 0.659375011920929,
|
|
"rewards/weighted_chosen": -1.4029419422149658,
|
|
"rewards/weighted_margins": 0.724365234375,
|
|
"rewards/weighted_rejected": -2.1269164085388184,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 0.5391258832766291,
|
|
"grad_norm": 28.17354393005371,
|
|
"learning_rate": 5.205543677045049e-07,
|
|
"logits/chosen": -0.9372314214706421,
|
|
"logits/rejected": -0.974993884563446,
|
|
"logps/chosen": -493.52655029296875,
|
|
"logps/rejected": -522.546875,
|
|
"logps/weighted_chosen": -3.700146436691284,
|
|
"logps/weighted_rejected": -4.827466011047363,
|
|
"loss": 0.5313,
|
|
"rewards/accuracies": 0.596875011920929,
|
|
"rewards/chosen": -219.2234344482422,
|
|
"rewards/margins": 58.09453201293945,
|
|
"rewards/rejected": -277.33203125,
|
|
"rewards/weighted_accuracies": 0.7593749761581421,
|
|
"rewards/weighted_chosen": -1.0529053211212158,
|
|
"rewards/weighted_margins": 0.86126708984375,
|
|
"rewards/weighted_rejected": -1.9149185419082642,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 0.5443601151531012,
|
|
"grad_norm": 40.029666900634766,
|
|
"learning_rate": 5.114213183778697e-07,
|
|
"logits/chosen": -1.017327904701233,
|
|
"logits/rejected": -1.0485351085662842,
|
|
"logps/chosen": -514.8624877929688,
|
|
"logps/rejected": -569.7859497070312,
|
|
"logps/weighted_chosen": -4.155713081359863,
|
|
"logps/weighted_rejected": -4.990136623382568,
|
|
"loss": 0.5441,
|
|
"rewards/accuracies": 0.609375,
|
|
"rewards/chosen": -234.3312530517578,
|
|
"rewards/margins": 78.56758117675781,
|
|
"rewards/rejected": -312.8812561035156,
|
|
"rewards/weighted_accuracies": 0.715624988079071,
|
|
"rewards/weighted_chosen": -1.097131371498108,
|
|
"rewards/weighted_margins": 0.8939269781112671,
|
|
"rewards/weighted_rejected": -1.9906127452850342,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 0.5495943470295734,
|
|
"grad_norm": 37.25775146484375,
|
|
"learning_rate": 5.022844544236754e-07,
|
|
"logits/chosen": -0.9515380859375,
|
|
"logits/rejected": -0.961352527141571,
|
|
"logps/chosen": -573.2562255859375,
|
|
"logps/rejected": -641.7312622070312,
|
|
"logps/weighted_chosen": -4.061865329742432,
|
|
"logps/weighted_rejected": -5.167675971984863,
|
|
"loss": 0.5774,
|
|
"rewards/accuracies": 0.6031249761581421,
|
|
"rewards/chosen": -293.25311279296875,
|
|
"rewards/margins": 93.615234375,
|
|
"rewards/rejected": -386.7679748535156,
|
|
"rewards/weighted_accuracies": 0.715624988079071,
|
|
"rewards/weighted_chosen": -1.2718932628631592,
|
|
"rewards/weighted_margins": 0.8445068597793579,
|
|
"rewards/weighted_rejected": -2.1155028343200684,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.5548285789060455,
|
|
"grad_norm": 26.58415985107422,
|
|
"learning_rate": 4.931468274802608e-07,
|
|
"logits/chosen": -0.9689911007881165,
|
|
"logits/rejected": -0.9828445315361023,
|
|
"logps/chosen": -585.3031005859375,
|
|
"logps/rejected": -649.8265380859375,
|
|
"logps/weighted_chosen": -3.440234422683716,
|
|
"logps/weighted_rejected": -4.7862548828125,
|
|
"loss": 0.5493,
|
|
"rewards/accuracies": 0.6031249761581421,
|
|
"rewards/chosen": -303.04412841796875,
|
|
"rewards/margins": 85.91679382324219,
|
|
"rewards/rejected": -388.83087158203125,
|
|
"rewards/weighted_accuracies": 0.7281249761581421,
|
|
"rewards/weighted_chosen": -1.2489440441131592,
|
|
"rewards/weighted_margins": 0.8478637933731079,
|
|
"rewards/weighted_rejected": -2.095629930496216,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 0.5600628107825176,
|
|
"grad_norm": 25.14666175842285,
|
|
"learning_rate": 4.840114894407974e-07,
|
|
"logits/chosen": -0.9988906979560852,
|
|
"logits/rejected": -1.0262877941131592,
|
|
"logps/chosen": -564.2750244140625,
|
|
"logps/rejected": -604.7640380859375,
|
|
"logps/weighted_chosen": -3.8533082008361816,
|
|
"logps/weighted_rejected": -4.584790229797363,
|
|
"loss": 0.5612,
|
|
"rewards/accuracies": 0.6031249761581421,
|
|
"rewards/chosen": -287.8531188964844,
|
|
"rewards/margins": 65.25312805175781,
|
|
"rewards/rejected": -353.18670654296875,
|
|
"rewards/weighted_accuracies": 0.6968749761581421,
|
|
"rewards/weighted_chosen": -1.2451751232147217,
|
|
"rewards/weighted_margins": 0.882397472858429,
|
|
"rewards/weighted_rejected": -2.1273193359375,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 0.5652970426589898,
|
|
"grad_norm": 34.41138458251953,
|
|
"learning_rate": 4.748814914339811e-07,
|
|
"logits/chosen": -0.9615001678466797,
|
|
"logits/rejected": -0.990710437297821,
|
|
"logps/chosen": -606.1984252929688,
|
|
"logps/rejected": -647.2062377929688,
|
|
"logps/weighted_chosen": -3.8893065452575684,
|
|
"logps/weighted_rejected": -4.611474514007568,
|
|
"loss": 0.5687,
|
|
"rewards/accuracies": 0.6000000238418579,
|
|
"rewards/chosen": -321.95098876953125,
|
|
"rewards/margins": 69.9859390258789,
|
|
"rewards/rejected": -391.935546875,
|
|
"rewards/weighted_accuracies": 0.6937500238418579,
|
|
"rewards/weighted_chosen": -1.2852051258087158,
|
|
"rewards/weighted_margins": 0.868273913860321,
|
|
"rewards/weighted_rejected": -2.153552293777466,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 0.5705312745354619,
|
|
"grad_norm": 20.902027130126953,
|
|
"learning_rate": 4.657598828049801e-07,
|
|
"logits/chosen": -1.0034713745117188,
|
|
"logits/rejected": -1.0612213611602783,
|
|
"logps/chosen": -613.8250122070312,
|
|
"logps/rejected": -699.54296875,
|
|
"logps/weighted_chosen": -3.7084593772888184,
|
|
"logps/weighted_rejected": -4.5335693359375,
|
|
"loss": 0.537,
|
|
"rewards/accuracies": 0.6000000238418579,
|
|
"rewards/chosen": -323.48028564453125,
|
|
"rewards/margins": 95.4664077758789,
|
|
"rewards/rejected": -418.94842529296875,
|
|
"rewards/weighted_accuracies": 0.7124999761581421,
|
|
"rewards/weighted_chosen": -1.2582886219024658,
|
|
"rewards/weighted_margins": 0.9008544683456421,
|
|
"rewards/weighted_rejected": -2.15960693359375,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 0.575765506411934,
|
|
"grad_norm": 40.39773178100586,
|
|
"learning_rate": 4.566497100969792e-07,
|
|
"logits/chosen": -0.9749755859375,
|
|
"logits/rejected": -0.9959548711776733,
|
|
"logps/chosen": -720.859375,
|
|
"logps/rejected": -785.3062744140625,
|
|
"logps/weighted_chosen": -4.180810451507568,
|
|
"logps/weighted_rejected": -5.250244140625,
|
|
"loss": 0.5644,
|
|
"rewards/accuracies": 0.621874988079071,
|
|
"rewards/chosen": -398.22674560546875,
|
|
"rewards/margins": 88.7249984741211,
|
|
"rewards/rejected": -486.95782470703125,
|
|
"rewards/weighted_accuracies": 0.778124988079071,
|
|
"rewards/weighted_chosen": -1.482934594154358,
|
|
"rewards/weighted_margins": 0.999176025390625,
|
|
"rewards/weighted_rejected": -2.483081102371216,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.5809997382884062,
|
|
"grad_norm": 31.201040267944336,
|
|
"learning_rate": 4.475540160336576e-07,
|
|
"logits/chosen": -0.992321789264679,
|
|
"logits/rejected": -1.0310242176055908,
|
|
"logps/chosen": -624.4796752929688,
|
|
"logps/rejected": -671.109375,
|
|
"logps/weighted_chosen": -4.22021484375,
|
|
"logps/weighted_rejected": -5.428515434265137,
|
|
"loss": 0.5427,
|
|
"rewards/accuracies": 0.609375,
|
|
"rewards/chosen": -326.1617126464844,
|
|
"rewards/margins": 86.7378921508789,
|
|
"rewards/rejected": -412.96405029296875,
|
|
"rewards/weighted_accuracies": 0.715624988079071,
|
|
"rewards/weighted_chosen": -1.3425171375274658,
|
|
"rewards/weighted_margins": 1.0147826671600342,
|
|
"rewards/weighted_rejected": -2.3572998046875,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 0.5862339701648783,
|
|
"grad_norm": 47.25414276123047,
|
|
"learning_rate": 4.3847583850294565e-07,
|
|
"logits/chosen": -0.9623962640762329,
|
|
"logits/rejected": -0.9765838384628296,
|
|
"logps/chosen": -671.4148559570312,
|
|
"logps/rejected": -715.609375,
|
|
"logps/weighted_chosen": -4.4444580078125,
|
|
"logps/weighted_rejected": -5.09375,
|
|
"loss": 0.5772,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": -380.1949157714844,
|
|
"rewards/margins": 75.4898452758789,
|
|
"rewards/rejected": -455.4906311035156,
|
|
"rewards/weighted_accuracies": 0.71875,
|
|
"rewards/weighted_chosen": -1.4877197742462158,
|
|
"rewards/weighted_margins": 0.929028332233429,
|
|
"rewards/weighted_rejected": -2.4179930686950684,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 0.5914682020413504,
|
|
"grad_norm": 30.387371063232422,
|
|
"learning_rate": 4.294182095423934e-07,
|
|
"logits/chosen": -0.939868152141571,
|
|
"logits/rejected": -0.9976135492324829,
|
|
"logps/chosen": -623.6375122070312,
|
|
"logps/rejected": -687.7578125,
|
|
"logps/weighted_chosen": -3.7587890625,
|
|
"logps/weighted_rejected": -4.962597846984863,
|
|
"loss": 0.5553,
|
|
"rewards/accuracies": 0.5874999761581421,
|
|
"rewards/chosen": -338.00079345703125,
|
|
"rewards/margins": 88.7320327758789,
|
|
"rewards/rejected": -426.6976623535156,
|
|
"rewards/weighted_accuracies": 0.703125,
|
|
"rewards/weighted_chosen": -1.33770751953125,
|
|
"rewards/weighted_margins": 0.9156738519668579,
|
|
"rewards/weighted_rejected": -2.252673387527466,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 0.5967024339178225,
|
|
"grad_norm": 15.42784309387207,
|
|
"learning_rate": 4.20384154326496e-07,
|
|
"logits/chosen": -0.9435394406318665,
|
|
"logits/rejected": -0.9906860589981079,
|
|
"logps/chosen": -516.5921630859375,
|
|
"logps/rejected": -537.7835693359375,
|
|
"logps/weighted_chosen": -3.5862059593200684,
|
|
"logps/weighted_rejected": -4.702197074890137,
|
|
"loss": 0.5867,
|
|
"rewards/accuracies": 0.6031249761581421,
|
|
"rewards/chosen": -255.84805297851562,
|
|
"rewards/margins": 46.25468826293945,
|
|
"rewards/rejected": -302.1802673339844,
|
|
"rewards/weighted_accuracies": 0.6968749761581421,
|
|
"rewards/weighted_chosen": -1.0679458379745483,
|
|
"rewards/weighted_margins": 0.756103515625,
|
|
"rewards/weighted_rejected": -1.824121117591858,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 0.6019366657942947,
|
|
"grad_norm": 27.973642349243164,
|
|
"learning_rate": 4.1137669015630863e-07,
|
|
"logits/chosen": -0.9399688839912415,
|
|
"logits/rejected": -0.9874938726425171,
|
|
"logps/chosen": -538.5843505859375,
|
|
"logps/rejected": -606.5062255859375,
|
|
"logps/weighted_chosen": -3.3528809547424316,
|
|
"logps/weighted_rejected": -4.451220512390137,
|
|
"loss": 0.5538,
|
|
"rewards/accuracies": 0.6656249761581421,
|
|
"rewards/chosen": -242.4329071044922,
|
|
"rewards/margins": 80.2808609008789,
|
|
"rewards/rejected": -322.6656188964844,
|
|
"rewards/weighted_accuracies": 0.71875,
|
|
"rewards/weighted_chosen": -1.035125732421875,
|
|
"rewards/weighted_margins": 0.787548840045929,
|
|
"rewards/weighted_rejected": -1.822839379310608,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 0.6071708976707668,
|
|
"grad_norm": 16.80686378479004,
|
|
"learning_rate": 4.023988254516943e-07,
|
|
"logits/chosen": -0.9526001214981079,
|
|
"logits/rejected": -1.002233862876892,
|
|
"logps/chosen": -565.6570434570312,
|
|
"logps/rejected": -599.0374755859375,
|
|
"logps/weighted_chosen": -3.925537109375,
|
|
"logps/weighted_rejected": -4.567919731140137,
|
|
"loss": 0.4945,
|
|
"rewards/accuracies": 0.6312500238418579,
|
|
"rewards/chosen": -264.87188720703125,
|
|
"rewards/margins": 67.05058288574219,
|
|
"rewards/rejected": -331.87774658203125,
|
|
"rewards/weighted_accuracies": 0.734375,
|
|
"rewards/weighted_chosen": -0.928997814655304,
|
|
"rewards/weighted_margins": 0.8846801519393921,
|
|
"rewards/weighted_rejected": -1.813256859779358,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 0.6124051295472389,
|
|
"grad_norm": 26.579771041870117,
|
|
"learning_rate": 3.9345355874653366e-07,
|
|
"logits/chosen": -0.964202880859375,
|
|
"logits/rejected": -0.984423816204071,
|
|
"logps/chosen": -594.9468994140625,
|
|
"logps/rejected": -598.2398681640625,
|
|
"logps/weighted_chosen": -3.7232666015625,
|
|
"logps/weighted_rejected": -4.643334865570068,
|
|
"loss": 0.6137,
|
|
"rewards/accuracies": 0.578125,
|
|
"rewards/chosen": -297.5835876464844,
|
|
"rewards/margins": 50.535545349121094,
|
|
"rewards/rejected": -348.0869140625,
|
|
"rewards/weighted_accuracies": 0.6812499761581421,
|
|
"rewards/weighted_chosen": -1.2086670398712158,
|
|
"rewards/weighted_margins": 0.742016613483429,
|
|
"rewards/weighted_rejected": -1.9512207508087158,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 0.6176393614237111,
|
|
"grad_norm": 30.812177658081055,
|
|
"learning_rate": 3.8454387768724157e-07,
|
|
"logits/chosen": -1.005767822265625,
|
|
"logits/rejected": -1.008856177330017,
|
|
"logps/chosen": -506.8109436035156,
|
|
"logps/rejected": -517.0929565429688,
|
|
"logps/weighted_chosen": -3.8594727516174316,
|
|
"logps/weighted_rejected": -4.857763767242432,
|
|
"loss": 0.5536,
|
|
"rewards/accuracies": 0.559374988079071,
|
|
"rewards/chosen": -233.98828125,
|
|
"rewards/margins": 52.480857849121094,
|
|
"rewards/rejected": -286.2953186035156,
|
|
"rewards/weighted_accuracies": 0.6937500238418579,
|
|
"rewards/weighted_chosen": -1.0302002429962158,
|
|
"rewards/weighted_margins": 0.875408947467804,
|
|
"rewards/weighted_rejected": -1.905310034751892,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 0.6228735933001832,
|
|
"grad_norm": 21.219039916992188,
|
|
"learning_rate": 3.7567275803491525e-07,
|
|
"logits/chosen": -1.0049774646759033,
|
|
"logits/rejected": -1.033941626548767,
|
|
"logps/chosen": -568.109375,
|
|
"logps/rejected": -577.7015380859375,
|
|
"logps/weighted_chosen": -3.424023389816284,
|
|
"logps/weighted_rejected": -4.737890720367432,
|
|
"loss": 0.5183,
|
|
"rewards/accuracies": 0.590624988079071,
|
|
"rewards/chosen": -265.75665283203125,
|
|
"rewards/margins": 58.340232849121094,
|
|
"rewards/rejected": -324.1148376464844,
|
|
"rewards/weighted_accuracies": 0.737500011920929,
|
|
"rewards/weighted_chosen": -0.9457031488418579,
|
|
"rewards/weighted_margins": 0.9256957769393921,
|
|
"rewards/weighted_rejected": -1.871618628501892,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 0.6281078251766553,
|
|
"grad_norm": 24.416122436523438,
|
|
"learning_rate": 3.66843162671456e-07,
|
|
"logits/chosen": -0.992877185344696,
|
|
"logits/rejected": -1.0040404796600342,
|
|
"logps/chosen": -542.8703002929688,
|
|
"logps/rejected": -650.1124877929688,
|
|
"logps/weighted_chosen": -4.203027248382568,
|
|
"logps/weighted_rejected": -4.642626762390137,
|
|
"loss": 0.6415,
|
|
"rewards/accuracies": 0.5874999761581421,
|
|
"rewards/chosen": -268.76171875,
|
|
"rewards/margins": 109.0687484741211,
|
|
"rewards/rejected": -377.92889404296875,
|
|
"rewards/weighted_accuracies": 0.640625,
|
|
"rewards/weighted_chosen": -1.240747094154358,
|
|
"rewards/weighted_margins": 0.7083190679550171,
|
|
"rewards/weighted_rejected": -1.9493834972381592,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.6333420570531274,
|
|
"grad_norm": 30.961528778076172,
|
|
"learning_rate": 3.5805804060998924e-07,
|
|
"logits/chosen": -0.988171398639679,
|
|
"logits/rejected": -1.0112731456756592,
|
|
"logps/chosen": -576.0609130859375,
|
|
"logps/rejected": -664.0671997070312,
|
|
"logps/weighted_chosen": -3.3628907203674316,
|
|
"logps/weighted_rejected": -4.699365139007568,
|
|
"loss": 0.5317,
|
|
"rewards/accuracies": 0.6812499761581421,
|
|
"rewards/chosen": -275.33905029296875,
|
|
"rewards/margins": 118.02656555175781,
|
|
"rewards/rejected": -393.25311279296875,
|
|
"rewards/weighted_accuracies": 0.75,
|
|
"rewards/weighted_chosen": -1.125707983970642,
|
|
"rewards/weighted_margins": 0.971728503704071,
|
|
"rewards/weighted_rejected": -2.0982666015625,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 0.6385762889295996,
|
|
"grad_norm": 41.25253677368164,
|
|
"learning_rate": 3.493203260099197e-07,
|
|
"logits/chosen": -0.99005126953125,
|
|
"logits/rejected": -1.052459716796875,
|
|
"logps/chosen": -616.2062377929688,
|
|
"logps/rejected": -658.71875,
|
|
"logps/weighted_chosen": -3.477587938308716,
|
|
"logps/weighted_rejected": -4.777050971984863,
|
|
"loss": 0.565,
|
|
"rewards/accuracies": 0.578125,
|
|
"rewards/chosen": -328.61328125,
|
|
"rewards/margins": 57.30156326293945,
|
|
"rewards/rejected": -385.90899658203125,
|
|
"rewards/weighted_accuracies": 0.7124999761581421,
|
|
"rewards/weighted_chosen": -1.273229956626892,
|
|
"rewards/weighted_margins": 0.817840576171875,
|
|
"rewards/weighted_rejected": -2.0914306640625,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 0.6438105208060717,
|
|
"grad_norm": 44.952823638916016,
|
|
"learning_rate": 3.4063293719694407e-07,
|
|
"logits/chosen": -0.9899932742118835,
|
|
"logits/rejected": -1.038726806640625,
|
|
"logps/chosen": -558.2008056640625,
|
|
"logps/rejected": -622.3187255859375,
|
|
"logps/weighted_chosen": -3.8162598609924316,
|
|
"logps/weighted_rejected": -4.878759860992432,
|
|
"loss": 0.5962,
|
|
"rewards/accuracies": 0.628125011920929,
|
|
"rewards/chosen": -281.5263671875,
|
|
"rewards/margins": 83.4378890991211,
|
|
"rewards/rejected": -365.0884704589844,
|
|
"rewards/weighted_accuracies": 0.6968749761581421,
|
|
"rewards/weighted_chosen": -1.238305687904358,
|
|
"rewards/weighted_margins": 0.806610107421875,
|
|
"rewards/weighted_rejected": -2.045300245285034,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 0.6490447526825438,
|
|
"grad_norm": 17.675373077392578,
|
|
"learning_rate": 3.319987756883559e-07,
|
|
"logits/chosen": -1.0293700695037842,
|
|
"logits/rejected": -1.05963134765625,
|
|
"logps/chosen": -563.71875,
|
|
"logps/rejected": -646.96875,
|
|
"logps/weighted_chosen": -3.506591796875,
|
|
"logps/weighted_rejected": -4.776757717132568,
|
|
"loss": 0.5093,
|
|
"rewards/accuracies": 0.6468750238418579,
|
|
"rewards/chosen": -286.01483154296875,
|
|
"rewards/margins": 91.8824234008789,
|
|
"rewards/rejected": -377.9115295410156,
|
|
"rewards/weighted_accuracies": 0.75,
|
|
"rewards/weighted_chosen": -1.059393286705017,
|
|
"rewards/weighted_margins": 1.0341796875,
|
|
"rewards/weighted_rejected": -2.09356689453125,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 0.654278984559016,
|
|
"grad_norm": 30.464399337768555,
|
|
"learning_rate": 3.234207252239607e-07,
|
|
"logits/chosen": -1.0212494134902954,
|
|
"logits/rejected": -1.053070068359375,
|
|
"logps/chosen": -624.4281005859375,
|
|
"logps/rejected": -653.6124877929688,
|
|
"logps/weighted_chosen": -4.078759670257568,
|
|
"logps/weighted_rejected": -4.910693168640137,
|
|
"loss": 0.5845,
|
|
"rewards/accuracies": 0.6000000238418579,
|
|
"rewards/chosen": -331.91796875,
|
|
"rewards/margins": 68.6617202758789,
|
|
"rewards/rejected": -400.6312561035156,
|
|
"rewards/weighted_accuracies": 0.6937500238418579,
|
|
"rewards/weighted_chosen": -1.3435547351837158,
|
|
"rewards/weighted_margins": 0.7761596441268921,
|
|
"rewards/weighted_rejected": -2.1199707984924316,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 0.6595132164354881,
|
|
"grad_norm": 22.67099380493164,
|
|
"learning_rate": 3.1490165080293175e-07,
|
|
"logits/chosen": -1.032771348953247,
|
|
"logits/rejected": -1.0862915515899658,
|
|
"logps/chosen": -546.2867431640625,
|
|
"logps/rejected": -636.9609375,
|
|
"logps/weighted_chosen": -3.594482421875,
|
|
"logps/weighted_rejected": -4.584668159484863,
|
|
"loss": 0.5515,
|
|
"rewards/accuracies": 0.606249988079071,
|
|
"rewards/chosen": -278.97149658203125,
|
|
"rewards/margins": 94.80000305175781,
|
|
"rewards/rejected": -373.8335876464844,
|
|
"rewards/weighted_accuracies": 0.71875,
|
|
"rewards/weighted_chosen": -1.1841598749160767,
|
|
"rewards/weighted_margins": 0.892077624797821,
|
|
"rewards/weighted_rejected": -2.0762572288513184,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 0.6647474483119602,
|
|
"grad_norm": 18.550798416137695,
|
|
"learning_rate": 3.06444397726922e-07,
|
|
"logits/chosen": -1.000738501548767,
|
|
"logits/rejected": -1.0697616338729858,
|
|
"logps/chosen": -617.4578247070312,
|
|
"logps/rejected": -667.5968627929688,
|
|
"logps/weighted_chosen": -3.587646484375,
|
|
"logps/weighted_rejected": -5.254980564117432,
|
|
"loss": 0.5184,
|
|
"rewards/accuracies": 0.628125011920929,
|
|
"rewards/chosen": -321.51385498046875,
|
|
"rewards/margins": 83.83515930175781,
|
|
"rewards/rejected": -405.3265686035156,
|
|
"rewards/weighted_accuracies": 0.715624988079071,
|
|
"rewards/weighted_chosen": -1.1221191883087158,
|
|
"rewards/weighted_margins": 1.162841796875,
|
|
"rewards/weighted_rejected": -2.2856812477111816,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 0.6699816801884323,
|
|
"grad_norm": 22.748411178588867,
|
|
"learning_rate": 2.980517906497586e-07,
|
|
"logits/chosen": -1.0525604486465454,
|
|
"logits/rejected": -1.106359839439392,
|
|
"logps/chosen": -608.9031372070312,
|
|
"logps/rejected": -702.9249877929688,
|
|
"logps/weighted_chosen": -3.8238282203674316,
|
|
"logps/weighted_rejected": -5.245898246765137,
|
|
"loss": 0.5269,
|
|
"rewards/accuracies": 0.6187499761581421,
|
|
"rewards/chosen": -316.0335998535156,
|
|
"rewards/margins": 114.6685562133789,
|
|
"rewards/rejected": -430.79608154296875,
|
|
"rewards/weighted_accuracies": 0.7406250238418579,
|
|
"rewards/weighted_chosen": -1.237799048423767,
|
|
"rewards/weighted_margins": 0.95733642578125,
|
|
"rewards/weighted_rejected": -2.19537353515625,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 0.6752159120649045,
|
|
"grad_norm": 25.510196685791016,
|
|
"learning_rate": 2.89726632634029e-07,
|
|
"logits/chosen": -1.055084228515625,
|
|
"logits/rejected": -1.07818603515625,
|
|
"logps/chosen": -630.3406372070312,
|
|
"logps/rejected": -696.7406005859375,
|
|
"logps/weighted_chosen": -3.6622557640075684,
|
|
"logps/weighted_rejected": -4.793408393859863,
|
|
"loss": 0.5721,
|
|
"rewards/accuracies": 0.59375,
|
|
"rewards/chosen": -356.0171813964844,
|
|
"rewards/margins": 85.5914077758789,
|
|
"rewards/rejected": -441.40625,
|
|
"rewards/weighted_accuracies": 0.7093750238418579,
|
|
"rewards/weighted_chosen": -1.368402123451233,
|
|
"rewards/weighted_margins": 0.8181518316268921,
|
|
"rewards/weighted_rejected": -2.1871337890625,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 0.6804501439413766,
|
|
"grad_norm": 3403.25146484375,
|
|
"learning_rate": 2.814717042148827e-07,
|
|
"logits/chosen": -1.0733153820037842,
|
|
"logits/rejected": -1.1039886474609375,
|
|
"logps/chosen": -579.2171630859375,
|
|
"logps/rejected": -664.2179565429688,
|
|
"logps/weighted_chosen": -4.351758003234863,
|
|
"logps/weighted_rejected": -5.040478706359863,
|
|
"loss": 0.5988,
|
|
"rewards/accuracies": 0.5718749761581421,
|
|
"rewards/chosen": -305.47967529296875,
|
|
"rewards/margins": 97.9154281616211,
|
|
"rewards/rejected": -403.4019470214844,
|
|
"rewards/weighted_accuracies": 0.6937500238418579,
|
|
"rewards/weighted_chosen": -1.3027832508087158,
|
|
"rewards/weighted_margins": 0.817980945110321,
|
|
"rewards/weighted_rejected": -2.1208739280700684,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.6856843758178487,
|
|
"grad_norm": 17.00541877746582,
|
|
"learning_rate": 2.7328976247135416e-07,
|
|
"logits/chosen": -1.098138451576233,
|
|
"logits/rejected": -1.1229279041290283,
|
|
"logps/chosen": -568.8577880859375,
|
|
"logps/rejected": -613.6781005859375,
|
|
"logps/weighted_chosen": -3.7420411109924316,
|
|
"logps/weighted_rejected": -4.709765434265137,
|
|
"loss": 0.6077,
|
|
"rewards/accuracies": 0.606249988079071,
|
|
"rewards/chosen": -279.4331970214844,
|
|
"rewards/margins": 82.03047180175781,
|
|
"rewards/rejected": -361.5140686035156,
|
|
"rewards/weighted_accuracies": 0.684374988079071,
|
|
"rewards/weighted_chosen": -1.1976807117462158,
|
|
"rewards/weighted_margins": 0.7601562738418579,
|
|
"rewards/weighted_rejected": -1.9588134288787842,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 0.6909186076943209,
|
|
"grad_norm": 19.76185417175293,
|
|
"learning_rate": 2.651835401055217e-07,
|
|
"logits/chosen": -1.06744384765625,
|
|
"logits/rejected": -1.0995299816131592,
|
|
"logps/chosen": -551.1812744140625,
|
|
"logps/rejected": -621.4281005859375,
|
|
"logps/weighted_chosen": -3.578369140625,
|
|
"logps/weighted_rejected": -4.4482421875,
|
|
"loss": 0.6022,
|
|
"rewards/accuracies": 0.6156250238418579,
|
|
"rewards/chosen": -273.7007751464844,
|
|
"rewards/margins": 82.255859375,
|
|
"rewards/rejected": -355.9320373535156,
|
|
"rewards/weighted_accuracies": 0.715624988079071,
|
|
"rewards/weighted_chosen": -1.109167456626892,
|
|
"rewards/weighted_margins": 0.736828625202179,
|
|
"rewards/weighted_rejected": -1.8462402820587158,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 0.696152839570793,
|
|
"grad_norm": 38.951107025146484,
|
|
"learning_rate": 2.571557445298055e-07,
|
|
"logits/chosen": -1.060308814048767,
|
|
"logits/rejected": -1.1077148914337158,
|
|
"logps/chosen": -517.7781372070312,
|
|
"logps/rejected": -573.0328369140625,
|
|
"logps/weighted_chosen": -3.490673780441284,
|
|
"logps/weighted_rejected": -4.511181831359863,
|
|
"loss": 0.5684,
|
|
"rewards/accuracies": 0.6343749761581421,
|
|
"rewards/chosen": -239.5070343017578,
|
|
"rewards/margins": 69.423828125,
|
|
"rewards/rejected": -308.96209716796875,
|
|
"rewards/weighted_accuracies": 0.690625011920929,
|
|
"rewards/weighted_chosen": -0.9827636480331421,
|
|
"rewards/weighted_margins": 0.767047107219696,
|
|
"rewards/weighted_rejected": -1.749639868736267,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 0.7013870714472651,
|
|
"grad_norm": 24.909101486206055,
|
|
"learning_rate": 2.49209056962716e-07,
|
|
"logits/chosen": -1.0824463367462158,
|
|
"logits/rejected": -1.101318359375,
|
|
"logps/chosen": -593.8343505859375,
|
|
"logps/rejected": -619.0203247070312,
|
|
"logps/weighted_chosen": -3.7747559547424316,
|
|
"logps/weighted_rejected": -4.873144626617432,
|
|
"loss": 0.5711,
|
|
"rewards/accuracies": 0.59375,
|
|
"rewards/chosen": -279.1812438964844,
|
|
"rewards/margins": 72.595703125,
|
|
"rewards/rejected": -351.7855529785156,
|
|
"rewards/weighted_accuracies": 0.675000011920929,
|
|
"rewards/weighted_chosen": -1.0869140625,
|
|
"rewards/weighted_margins": 0.7144775390625,
|
|
"rewards/weighted_rejected": -1.8008911609649658,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 0.7066213033237373,
|
|
"grad_norm": 24.351770401000977,
|
|
"learning_rate": 2.41346131533347e-07,
|
|
"logits/chosen": -1.13226318359375,
|
|
"logits/rejected": -1.141271948814392,
|
|
"logps/chosen": -624.3922119140625,
|
|
"logps/rejected": -672.2625122070312,
|
|
"logps/weighted_chosen": -3.3324952125549316,
|
|
"logps/weighted_rejected": -4.617163181304932,
|
|
"loss": 0.5572,
|
|
"rewards/accuracies": 0.609375,
|
|
"rewards/chosen": -317.93731689453125,
|
|
"rewards/margins": 79.771484375,
|
|
"rewards/rejected": -397.5132751464844,
|
|
"rewards/weighted_accuracies": 0.721875011920929,
|
|
"rewards/weighted_chosen": -1.1239502429962158,
|
|
"rewards/weighted_margins": 0.762219250202179,
|
|
"rewards/weighted_rejected": -1.88616943359375,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 0.7118555352002094,
|
|
"grad_norm": 51.18987274169922,
|
|
"learning_rate": 2.3356959439491898e-07,
|
|
"logits/chosen": -1.053808569908142,
|
|
"logits/rejected": -1.1220916509628296,
|
|
"logps/chosen": -584.7609252929688,
|
|
"logps/rejected": -641.2109375,
|
|
"logps/weighted_chosen": -4.107861518859863,
|
|
"logps/weighted_rejected": -4.883447170257568,
|
|
"loss": 0.5525,
|
|
"rewards/accuracies": 0.609375,
|
|
"rewards/chosen": -314.8609313964844,
|
|
"rewards/margins": 84.24922180175781,
|
|
"rewards/rejected": -399.08319091796875,
|
|
"rewards/weighted_accuracies": 0.706250011920929,
|
|
"rewards/weighted_chosen": -1.143286108970642,
|
|
"rewards/weighted_margins": 0.9397217035293579,
|
|
"rewards/weighted_rejected": -2.082202196121216,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 0.7170897670766815,
|
|
"grad_norm": 25.061872482299805,
|
|
"learning_rate": 2.258820428476645e-07,
|
|
"logits/chosen": -1.083398461341858,
|
|
"logits/rejected": -1.124755859375,
|
|
"logps/chosen": -615.3109130859375,
|
|
"logps/rejected": -711.2015380859375,
|
|
"logps/weighted_chosen": -3.571972608566284,
|
|
"logps/weighted_rejected": -4.434179782867432,
|
|
"loss": 0.5486,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -336.1929626464844,
|
|
"rewards/margins": 105.92304992675781,
|
|
"rewards/rejected": -442.1929626464844,
|
|
"rewards/weighted_accuracies": 0.7093750238418579,
|
|
"rewards/weighted_chosen": -1.143713355064392,
|
|
"rewards/weighted_margins": 0.7840820550918579,
|
|
"rewards/weighted_rejected": -1.9280884265899658,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 0.7223239989531536,
|
|
"grad_norm": 50.347843170166016,
|
|
"learning_rate": 2.1828604447135245e-07,
|
|
"logits/chosen": -1.015539526939392,
|
|
"logits/rejected": -1.0683166980743408,
|
|
"logps/chosen": -660.08203125,
|
|
"logps/rejected": -700.8531494140625,
|
|
"logps/weighted_chosen": -4.080639839172363,
|
|
"logps/weighted_rejected": -5.272363185882568,
|
|
"loss": 0.5525,
|
|
"rewards/accuracies": 0.5874999761581421,
|
|
"rewards/chosen": -390.23712158203125,
|
|
"rewards/margins": 54.01288986206055,
|
|
"rewards/rejected": -444.32110595703125,
|
|
"rewards/weighted_accuracies": 0.6968749761581421,
|
|
"rewards/weighted_chosen": -1.2563965320587158,
|
|
"rewards/weighted_margins": 0.91107177734375,
|
|
"rewards/weighted_rejected": -2.1673583984375,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 0.7275582308296258,
|
|
"grad_norm": 29.76629638671875,
|
|
"learning_rate": 2.1078413626773545e-07,
|
|
"logits/chosen": -1.0745728015899658,
|
|
"logits/rejected": -1.095086693763733,
|
|
"logps/chosen": -615.3843994140625,
|
|
"logps/rejected": -720.0593872070312,
|
|
"logps/weighted_chosen": -3.6241729259490967,
|
|
"logps/weighted_rejected": -5.315381050109863,
|
|
"loss": 0.5552,
|
|
"rewards/accuracies": 0.6031249761581421,
|
|
"rewards/chosen": -335.61053466796875,
|
|
"rewards/margins": 115.576171875,
|
|
"rewards/rejected": -451.1439514160156,
|
|
"rewards/weighted_accuracies": 0.7250000238418579,
|
|
"rewards/weighted_chosen": -1.17816162109375,
|
|
"rewards/weighted_margins": 0.8593689203262329,
|
|
"rewards/weighted_rejected": -2.037463426589966,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 0.7327924627060979,
|
|
"grad_norm": 78.16152954101562,
|
|
"learning_rate": 2.0337882381321347e-07,
|
|
"logits/chosen": -1.062066674232483,
|
|
"logits/rejected": -1.0702636241912842,
|
|
"logps/chosen": -643.6328125,
|
|
"logps/rejected": -690.3914184570312,
|
|
"logps/weighted_chosen": -3.64990234375,
|
|
"logps/weighted_rejected": -4.711035251617432,
|
|
"loss": 0.5461,
|
|
"rewards/accuracies": 0.590624988079071,
|
|
"rewards/chosen": -352.107421875,
|
|
"rewards/margins": 84.32890319824219,
|
|
"rewards/rejected": -436.3828125,
|
|
"rewards/weighted_accuracies": 0.7281249761581421,
|
|
"rewards/weighted_chosen": -1.304632544517517,
|
|
"rewards/weighted_margins": 0.9240967035293579,
|
|
"rewards/weighted_rejected": -2.2285399436950684,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.73802669458257,
|
|
"grad_norm": 30.649791717529297,
|
|
"learning_rate": 1.960725804219905e-07,
|
|
"logits/chosen": -1.016119360923767,
|
|
"logits/rejected": -1.067724585533142,
|
|
"logps/chosen": -629.0554809570312,
|
|
"logps/rejected": -716.56640625,
|
|
"logps/weighted_chosen": -4.213110446929932,
|
|
"logps/weighted_rejected": -4.397546291351318,
|
|
"loss": 0.5731,
|
|
"rewards/accuracies": 0.606249988079071,
|
|
"rewards/chosen": -329.6851501464844,
|
|
"rewards/margins": 103.38749694824219,
|
|
"rewards/rejected": -433.10235595703125,
|
|
"rewards/weighted_accuracies": 0.684374988079071,
|
|
"rewards/weighted_chosen": -1.220544457435608,
|
|
"rewards/weighted_margins": 0.807751476764679,
|
|
"rewards/weighted_rejected": -2.0281982421875,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 0.7432609264590422,
|
|
"grad_norm": 22.40865707397461,
|
|
"learning_rate": 1.8886784632000824e-07,
|
|
"logits/chosen": -1.037255883216858,
|
|
"logits/rejected": -1.0631592273712158,
|
|
"logps/chosen": -600.8796997070312,
|
|
"logps/rejected": -739.5546875,
|
|
"logps/weighted_chosen": -3.5133299827575684,
|
|
"logps/weighted_rejected": -5.098974704742432,
|
|
"loss": 0.5074,
|
|
"rewards/accuracies": 0.653124988079071,
|
|
"rewards/chosen": -314.95819091796875,
|
|
"rewards/margins": 156.93203735351562,
|
|
"rewards/rejected": -471.8910217285156,
|
|
"rewards/weighted_accuracies": 0.746874988079071,
|
|
"rewards/weighted_chosen": -1.09588623046875,
|
|
"rewards/weighted_margins": 1.1151854991912842,
|
|
"rewards/weighted_rejected": -2.209277391433716,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 0.7484951583355143,
|
|
"grad_norm": 33.5097541809082,
|
|
"learning_rate": 1.8176702782993025e-07,
|
|
"logits/chosen": -1.0573241710662842,
|
|
"logits/rejected": -1.0565185546875,
|
|
"logps/chosen": -581.8117065429688,
|
|
"logps/rejected": -670.4046630859375,
|
|
"logps/weighted_chosen": -3.524365186691284,
|
|
"logps/weighted_rejected": -4.905322074890137,
|
|
"loss": 0.5604,
|
|
"rewards/accuracies": 0.606249988079071,
|
|
"rewards/chosen": -320.2757873535156,
|
|
"rewards/margins": 95.93476867675781,
|
|
"rewards/rejected": -416.09686279296875,
|
|
"rewards/weighted_accuracies": 0.6875,
|
|
"rewards/weighted_chosen": -1.2260253429412842,
|
|
"rewards/weighted_margins": 0.9058593511581421,
|
|
"rewards/weighted_rejected": -2.131915330886841,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 0.7537293902119864,
|
|
"grad_norm": 25.396400451660156,
|
|
"learning_rate": 1.7477249656745034e-07,
|
|
"logits/chosen": -0.9870361089706421,
|
|
"logits/rejected": -1.025244116783142,
|
|
"logps/chosen": -535.6265869140625,
|
|
"logps/rejected": -582.4281005859375,
|
|
"logps/weighted_chosen": -3.7501220703125,
|
|
"logps/weighted_rejected": -5.042870998382568,
|
|
"loss": 0.5337,
|
|
"rewards/accuracies": 0.609375,
|
|
"rewards/chosen": -284.6830139160156,
|
|
"rewards/margins": 71.47187805175781,
|
|
"rewards/rejected": -356.1968688964844,
|
|
"rewards/weighted_accuracies": 0.7562500238418579,
|
|
"rewards/weighted_chosen": -1.081658959388733,
|
|
"rewards/weighted_margins": 1.011804223060608,
|
|
"rewards/weighted_rejected": -2.094250440597534,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 0.7589636220884585,
|
|
"grad_norm": 46.104244232177734,
|
|
"learning_rate": 1.6788658864919118e-07,
|
|
"logits/chosen": -0.998852550983429,
|
|
"logits/rejected": -1.0865967273712158,
|
|
"logps/chosen": -691.5070190429688,
|
|
"logps/rejected": -765.0437622070312,
|
|
"logps/weighted_chosen": -3.697497606277466,
|
|
"logps/weighted_rejected": -4.519140720367432,
|
|
"loss": 0.5339,
|
|
"rewards/accuracies": 0.621874988079071,
|
|
"rewards/chosen": -376.79376220703125,
|
|
"rewards/margins": 104.693359375,
|
|
"rewards/rejected": -481.35467529296875,
|
|
"rewards/weighted_accuracies": 0.75,
|
|
"rewards/weighted_chosen": -1.15362548828125,
|
|
"rewards/weighted_margins": 0.9962402582168579,
|
|
"rewards/weighted_rejected": -2.150378465652466,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 0.7641978539649307,
|
|
"grad_norm": 29.475303649902344,
|
|
"learning_rate": 1.611116039124613e-07,
|
|
"logits/chosen": -0.993756115436554,
|
|
"logits/rejected": -1.0471680164337158,
|
|
"logps/chosen": -612.1336059570312,
|
|
"logps/rejected": -652.9812622070312,
|
|
"logps/weighted_chosen": -4.120263576507568,
|
|
"logps/weighted_rejected": -5.172119140625,
|
|
"loss": 0.5626,
|
|
"rewards/accuracies": 0.5843750238418579,
|
|
"rewards/chosen": -349.39178466796875,
|
|
"rewards/margins": 74.2535171508789,
|
|
"rewards/rejected": -423.4437561035156,
|
|
"rewards/weighted_accuracies": 0.6968749761581421,
|
|
"rewards/weighted_chosen": -1.174108862876892,
|
|
"rewards/weighted_margins": 0.957659900188446,
|
|
"rewards/weighted_rejected": -2.132617235183716,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 0.7694320858414028,
|
|
"grad_norm": 40.777061462402344,
|
|
"learning_rate": 1.5444980514712723e-07,
|
|
"logits/chosen": -1.0843079090118408,
|
|
"logits/rejected": -1.1043212413787842,
|
|
"logps/chosen": -677.7750244140625,
|
|
"logps/rejected": -797.6781005859375,
|
|
"logps/weighted_chosen": -3.82275390625,
|
|
"logps/weighted_rejected": -4.651171684265137,
|
|
"loss": 0.6086,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -366.34765625,
|
|
"rewards/margins": 125.67655944824219,
|
|
"rewards/rejected": -492.0234375,
|
|
"rewards/weighted_accuracies": 0.703125,
|
|
"rewards/weighted_chosen": -1.312963843345642,
|
|
"rewards/weighted_margins": 0.756854236125946,
|
|
"rewards/weighted_rejected": -2.069854736328125,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 0.7746663177178749,
|
|
"grad_norm": 29.14368438720703,
|
|
"learning_rate": 1.4790341733986083e-07,
|
|
"logits/chosen": -1.0463683605194092,
|
|
"logits/rejected": -1.0748412609100342,
|
|
"logps/chosen": -621.4453125,
|
|
"logps/rejected": -694.0281372070312,
|
|
"logps/weighted_chosen": -4.119336128234863,
|
|
"logps/weighted_rejected": -4.574511528015137,
|
|
"loss": 0.5669,
|
|
"rewards/accuracies": 0.6187499761581421,
|
|
"rewards/chosen": -334.2621154785156,
|
|
"rewards/margins": 97.23515319824219,
|
|
"rewards/rejected": -431.4808654785156,
|
|
"rewards/weighted_accuracies": 0.6812499761581421,
|
|
"rewards/weighted_chosen": -1.203649878501892,
|
|
"rewards/weighted_margins": 0.847003161907196,
|
|
"rewards/weighted_rejected": -2.0507445335388184,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 0.7799005495943471,
|
|
"grad_norm": 95.21968841552734,
|
|
"learning_rate": 1.4147462693101108e-07,
|
|
"logits/chosen": -1.0290710926055908,
|
|
"logits/rejected": -1.058990478515625,
|
|
"logps/chosen": -640.1702880859375,
|
|
"logps/rejected": -748.3312377929688,
|
|
"logps/weighted_chosen": -3.6262450218200684,
|
|
"logps/weighted_rejected": -4.870263576507568,
|
|
"loss": 0.5476,
|
|
"rewards/accuracies": 0.6187499761581421,
|
|
"rewards/chosen": -353.05682373046875,
|
|
"rewards/margins": 124.0137710571289,
|
|
"rewards/rejected": -476.9703063964844,
|
|
"rewards/weighted_accuracies": 0.7124999761581421,
|
|
"rewards/weighted_chosen": -1.14178466796875,
|
|
"rewards/weighted_margins": 1.012396216392517,
|
|
"rewards/weighted_rejected": -2.1542115211486816,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 0.7851347814708192,
|
|
"grad_norm": 29.23267936706543,
|
|
"learning_rate": 1.3516558108435177e-07,
|
|
"logits/chosen": -1.0289306640625,
|
|
"logits/rejected": -1.0320098400115967,
|
|
"logps/chosen": -576.7921752929688,
|
|
"logps/rejected": -693.203125,
|
|
"logps/weighted_chosen": -3.387939453125,
|
|
"logps/weighted_rejected": -5.066210746765137,
|
|
"loss": 0.538,
|
|
"rewards/accuracies": 0.609375,
|
|
"rewards/chosen": -297.62811279296875,
|
|
"rewards/margins": 130.5636749267578,
|
|
"rewards/rejected": -428.2289123535156,
|
|
"rewards/weighted_accuracies": 0.699999988079071,
|
|
"rewards/weighted_chosen": -0.9833618402481079,
|
|
"rewards/weighted_margins": 0.922503650188446,
|
|
"rewards/weighted_rejected": -1.9057738780975342,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.7851347814708192,
|
|
"eval_logits/chosen": -1.1175518035888672,
|
|
"eval_logits/rejected": -1.1384687423706055,
|
|
"eval_logps/chosen": -604.7919921875,
|
|
"eval_logps/rejected": -676.4500122070312,
|
|
"eval_logps/weighted_chosen": -3.6202943325042725,
|
|
"eval_logps/weighted_rejected": -4.712391376495361,
|
|
"eval_loss": 0.5499775409698486,
|
|
"eval_rewards/accuracies": 0.593500018119812,
|
|
"eval_rewards/chosen": -316.02337646484375,
|
|
"eval_rewards/margins": 92.78912353515625,
|
|
"eval_rewards/rejected": -408.82000732421875,
|
|
"eval_rewards/weighted_accuracies": 0.7145000100135803,
|
|
"eval_rewards/weighted_chosen": -1.0725815296173096,
|
|
"eval_rewards/weighted_margins": 0.8416025638580322,
|
|
"eval_rewards/weighted_rejected": -1.9141839742660522,
|
|
"eval_runtime": 1154.371,
|
|
"eval_samples_per_second": 1.733,
|
|
"eval_steps_per_second": 0.433,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.7903690133472913,
|
|
"grad_norm": 33.04483413696289,
|
|
"learning_rate": 1.2897838696994505e-07,
|
|
"logits/chosen": -1.023168921470642,
|
|
"logits/rejected": -1.0470459461212158,
|
|
"logps/chosen": -568.0797119140625,
|
|
"logps/rejected": -649.578125,
|
|
"logps/weighted_chosen": -4.047997951507568,
|
|
"logps/weighted_rejected": -4.915575981140137,
|
|
"loss": 0.595,
|
|
"rewards/accuracies": 0.6656249761581421,
|
|
"rewards/chosen": -295.654296875,
|
|
"rewards/margins": 91.7796859741211,
|
|
"rewards/rejected": -387.3218688964844,
|
|
"rewards/weighted_accuracies": 0.71875,
|
|
"rewards/weighted_chosen": -1.196380615234375,
|
|
"rewards/weighted_margins": 0.7684265375137329,
|
|
"rewards/weighted_rejected": -1.9652099609375,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 0.7956032452237635,
|
|
"grad_norm": 20.796615600585938,
|
|
"learning_rate": 1.229151110603649e-07,
|
|
"logits/chosen": -1.0638824701309204,
|
|
"logits/rejected": -1.1071808338165283,
|
|
"logps/chosen": -613.9664306640625,
|
|
"logps/rejected": -678.8499755859375,
|
|
"logps/weighted_chosen": -3.6717286109924316,
|
|
"logps/weighted_rejected": -4.715087890625,
|
|
"loss": 0.6142,
|
|
"rewards/accuracies": 0.628125011920929,
|
|
"rewards/chosen": -316.6216735839844,
|
|
"rewards/margins": 89.4507827758789,
|
|
"rewards/rejected": -406.25701904296875,
|
|
"rewards/weighted_accuracies": 0.699999988079071,
|
|
"rewards/weighted_chosen": -1.19403076171875,
|
|
"rewards/weighted_margins": 0.705639660358429,
|
|
"rewards/weighted_rejected": -1.89990234375,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 0.8008374771002356,
|
|
"grad_norm": 17.729528427124023,
|
|
"learning_rate": 1.1697777844051104e-07,
|
|
"logits/chosen": -1.0761749744415283,
|
|
"logits/rejected": -1.089080810546875,
|
|
"logps/chosen": -648.828125,
|
|
"logps/rejected": -752.1140747070312,
|
|
"logps/weighted_chosen": -3.615478515625,
|
|
"logps/weighted_rejected": -4.789502143859863,
|
|
"loss": 0.5181,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": -345.47882080078125,
|
|
"rewards/margins": 124.6578140258789,
|
|
"rewards/rejected": -469.9800720214844,
|
|
"rewards/weighted_accuracies": 0.737500011920929,
|
|
"rewards/weighted_chosen": -1.0861327648162842,
|
|
"rewards/weighted_margins": 0.8956298828125,
|
|
"rewards/weighted_rejected": -1.982019066810608,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 0.8060717089767077,
|
|
"grad_norm": 24.399063110351562,
|
|
"learning_rate": 1.111683721312477e-07,
|
|
"logits/chosen": -1.0483185052871704,
|
|
"logits/rejected": -1.0707489252090454,
|
|
"logps/chosen": -612.3883056640625,
|
|
"logps/rejected": -698.0546875,
|
|
"logps/weighted_chosen": -3.4658203125,
|
|
"logps/weighted_rejected": -4.220166206359863,
|
|
"loss": 0.558,
|
|
"rewards/accuracies": 0.5874999761581421,
|
|
"rewards/chosen": -336.87677001953125,
|
|
"rewards/margins": 101.2386703491211,
|
|
"rewards/rejected": -438.15234375,
|
|
"rewards/weighted_accuracies": 0.706250011920929,
|
|
"rewards/weighted_chosen": -1.160925269126892,
|
|
"rewards/weighted_margins": 0.888659656047821,
|
|
"rewards/weighted_rejected": -2.049755811691284,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 0.8113059408531798,
|
|
"grad_norm": 42.796451568603516,
|
|
"learning_rate": 1.0548883242709033e-07,
|
|
"logits/chosen": -0.978894054889679,
|
|
"logits/rejected": -1.035614013671875,
|
|
"logps/chosen": -627.6140747070312,
|
|
"logps/rejected": -770.4288940429688,
|
|
"logps/weighted_chosen": -4.208154201507568,
|
|
"logps/weighted_rejected": -5.158984184265137,
|
|
"loss": 0.4957,
|
|
"rewards/accuracies": 0.6812499761581421,
|
|
"rewards/chosen": -343.4136657714844,
|
|
"rewards/margins": 157.42578125,
|
|
"rewards/rejected": -500.8896484375,
|
|
"rewards/weighted_accuracies": 0.762499988079071,
|
|
"rewards/weighted_chosen": -1.1079833507537842,
|
|
"rewards/weighted_margins": 1.043493628501892,
|
|
"rewards/weighted_rejected": -2.151293992996216,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 0.816540172729652,
|
|
"grad_norm": 24.997255325317383,
|
|
"learning_rate": 9.994105624816379e-08,
|
|
"logits/chosen": -1.037078857421875,
|
|
"logits/rejected": -1.088891625404358,
|
|
"logps/chosen": -700.9812622070312,
|
|
"logps/rejected": -796.5750122070312,
|
|
"logps/weighted_chosen": -4.061865329742432,
|
|
"logps/weighted_rejected": -5.445410251617432,
|
|
"loss": 0.5357,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": -399.009765625,
|
|
"rewards/margins": 114.9976577758789,
|
|
"rewards/rejected": -514.0843505859375,
|
|
"rewards/weighted_accuracies": 0.7124999761581421,
|
|
"rewards/weighted_chosen": -1.3728148937225342,
|
|
"rewards/weighted_margins": 0.9924713373184204,
|
|
"rewards/weighted_rejected": -2.3653807640075684,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 0.821774404606124,
|
|
"grad_norm": 36.98191833496094,
|
|
"learning_rate": 9.452689650664514e-08,
|
|
"logits/chosen": -1.0295531749725342,
|
|
"logits/rejected": -1.0981414318084717,
|
|
"logps/chosen": -703.7531127929688,
|
|
"logps/rejected": -729.4656372070312,
|
|
"logps/weighted_chosen": -3.6918702125549316,
|
|
"logps/weighted_rejected": -4.560595512390137,
|
|
"loss": 0.5276,
|
|
"rewards/accuracies": 0.6156250238418579,
|
|
"rewards/chosen": -407.9234313964844,
|
|
"rewards/margins": 66.05390930175781,
|
|
"rewards/rejected": -474.13751220703125,
|
|
"rewards/weighted_accuracies": 0.71875,
|
|
"rewards/weighted_chosen": -1.312036156654358,
|
|
"rewards/weighted_margins": 0.935559093952179,
|
|
"rewards/weighted_rejected": -2.2478270530700684,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 0.8270086364825961,
|
|
"grad_norm": 25.41311264038086,
|
|
"learning_rate": 8.924816148790748e-08,
|
|
"logits/chosen": -1.0425536632537842,
|
|
"logits/rejected": -1.086340308189392,
|
|
"logps/chosen": -719.7468872070312,
|
|
"logps/rejected": -814.3968505859375,
|
|
"logps/weighted_chosen": -4.014013767242432,
|
|
"logps/weighted_rejected": -5.042284965515137,
|
|
"loss": 0.5361,
|
|
"rewards/accuracies": 0.6187499761581421,
|
|
"rewards/chosen": -425.23712158203125,
|
|
"rewards/margins": 110.99531555175781,
|
|
"rewards/rejected": -536.3671875,
|
|
"rewards/weighted_accuracies": 0.71875,
|
|
"rewards/weighted_chosen": -1.307092308998108,
|
|
"rewards/weighted_margins": 1.052404761314392,
|
|
"rewards/weighted_rejected": -2.360107421875,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 0.8322428683590684,
|
|
"grad_norm": 20.04231071472168,
|
|
"learning_rate": 8.410661424656607e-08,
|
|
"logits/chosen": -1.0793273448944092,
|
|
"logits/rejected": -1.0862334966659546,
|
|
"logps/chosen": -724.1187744140625,
|
|
"logps/rejected": -809.2515869140625,
|
|
"logps/weighted_chosen": -3.901904344558716,
|
|
"logps/weighted_rejected": -5.187939643859863,
|
|
"loss": 0.5287,
|
|
"rewards/accuracies": 0.578125,
|
|
"rewards/chosen": -430.724609375,
|
|
"rewards/margins": 118.25859069824219,
|
|
"rewards/rejected": -549.1734619140625,
|
|
"rewards/weighted_accuracies": 0.7250000238418579,
|
|
"rewards/weighted_chosen": -1.4421265125274658,
|
|
"rewards/weighted_margins": 1.137597680091858,
|
|
"rewards/weighted_rejected": -2.579272508621216,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 0.8374771002355405,
|
|
"grad_norm": 18.9372615814209,
|
|
"learning_rate": 7.910397201763308e-08,
|
|
"logits/chosen": -1.035308837890625,
|
|
"logits/rejected": -1.0541870594024658,
|
|
"logps/chosen": -706.3390502929688,
|
|
"logps/rejected": -799.5609130859375,
|
|
"logps/weighted_chosen": -3.9484620094299316,
|
|
"logps/weighted_rejected": -4.694140434265137,
|
|
"loss": 0.5996,
|
|
"rewards/accuracies": 0.543749988079071,
|
|
"rewards/chosen": -439.6175842285156,
|
|
"rewards/margins": 100.1382827758789,
|
|
"rewards/rejected": -539.5773315429688,
|
|
"rewards/weighted_accuracies": 0.7281249761581421,
|
|
"rewards/weighted_chosen": -1.442968726158142,
|
|
"rewards/weighted_margins": 0.917126476764679,
|
|
"rewards/weighted_rejected": -2.359942674636841,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.8427113321120125,
|
|
"grad_norm": 42.782772064208984,
|
|
"learning_rate": 7.424190564297489e-08,
|
|
"logits/chosen": -1.065649390220642,
|
|
"logits/rejected": -1.1031494140625,
|
|
"logps/chosen": -725.6617431640625,
|
|
"logps/rejected": -829.0968627929688,
|
|
"logps/weighted_chosen": -3.7089600563049316,
|
|
"logps/weighted_rejected": -4.779443264007568,
|
|
"loss": 0.5412,
|
|
"rewards/accuracies": 0.609375,
|
|
"rewards/chosen": -444.4730529785156,
|
|
"rewards/margins": 108.79219055175781,
|
|
"rewards/rejected": -553.4078369140625,
|
|
"rewards/weighted_accuracies": 0.699999988079071,
|
|
"rewards/weighted_chosen": -1.403631567955017,
|
|
"rewards/weighted_margins": 1.0037841796875,
|
|
"rewards/weighted_rejected": -2.4067625999450684,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 0.8479455639884846,
|
|
"grad_norm": 22.982988357543945,
|
|
"learning_rate": 6.952203901326464e-08,
|
|
"logits/chosen": -1.019067406654358,
|
|
"logits/rejected": -1.0470367670059204,
|
|
"logps/chosen": -708.2687377929688,
|
|
"logps/rejected": -808.2546997070312,
|
|
"logps/weighted_chosen": -3.988232374191284,
|
|
"logps/weighted_rejected": -5.289990425109863,
|
|
"loss": 0.5378,
|
|
"rewards/accuracies": 0.596875011920929,
|
|
"rewards/chosen": -419.03594970703125,
|
|
"rewards/margins": 118.74609375,
|
|
"rewards/rejected": -537.8722534179688,
|
|
"rewards/weighted_accuracies": 0.71875,
|
|
"rewards/weighted_chosen": -1.298193335533142,
|
|
"rewards/weighted_margins": 0.996386706829071,
|
|
"rewards/weighted_rejected": -2.294604539871216,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 0.8531797958649568,
|
|
"grad_norm": 34.882381439208984,
|
|
"learning_rate": 6.494594852561558e-08,
|
|
"logits/chosen": -1.0084228515625,
|
|
"logits/rejected": -1.042303442955017,
|
|
"logps/chosen": -723.1671752929688,
|
|
"logps/rejected": -805.4453125,
|
|
"logps/weighted_chosen": -3.838757276535034,
|
|
"logps/weighted_rejected": -5.274987697601318,
|
|
"loss": 0.5154,
|
|
"rewards/accuracies": 0.6312500238418579,
|
|
"rewards/chosen": -432.11346435546875,
|
|
"rewards/margins": 102.5433578491211,
|
|
"rewards/rejected": -534.5486450195312,
|
|
"rewards/weighted_accuracies": 0.71875,
|
|
"rewards/weighted_chosen": -1.2553589344024658,
|
|
"rewards/weighted_margins": 1.0247802734375,
|
|
"rewards/weighted_rejected": -2.280255079269409,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 0.8584140277414289,
|
|
"grad_norm": 39.33643341064453,
|
|
"learning_rate": 6.051516255707773e-08,
|
|
"logits/chosen": -1.0444824695587158,
|
|
"logits/rejected": -1.079583764076233,
|
|
"logps/chosen": -666.0718994140625,
|
|
"logps/rejected": -780.9375,
|
|
"logps/weighted_chosen": -3.915283203125,
|
|
"logps/weighted_rejected": -5.016845703125,
|
|
"loss": 0.5429,
|
|
"rewards/accuracies": 0.6187499761581421,
|
|
"rewards/chosen": -393.1158142089844,
|
|
"rewards/margins": 135.9542999267578,
|
|
"rewards/rejected": -528.6539306640625,
|
|
"rewards/weighted_accuracies": 0.6937500238418579,
|
|
"rewards/weighted_chosen": -1.3769409656524658,
|
|
"rewards/weighted_margins": 1.0125916004180908,
|
|
"rewards/weighted_rejected": -2.3889527320861816,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 0.863648259617901,
|
|
"grad_norm": 21.693418502807617,
|
|
"learning_rate": 5.6231160954171796e-08,
|
|
"logits/chosen": -0.9886413812637329,
|
|
"logits/rejected": -1.064294457435608,
|
|
"logps/chosen": -679.0531005859375,
|
|
"logps/rejected": -852.2890625,
|
|
"logps/weighted_chosen": -4.170117378234863,
|
|
"logps/weighted_rejected": -4.453076362609863,
|
|
"loss": 0.5474,
|
|
"rewards/accuracies": 0.640625,
|
|
"rewards/chosen": -408.7554626464844,
|
|
"rewards/margins": 169.57284545898438,
|
|
"rewards/rejected": -578.2144775390625,
|
|
"rewards/weighted_accuracies": 0.7281249761581421,
|
|
"rewards/weighted_chosen": -1.352716088294983,
|
|
"rewards/weighted_margins": 0.973614513874054,
|
|
"rewards/weighted_rejected": -2.326489210128784,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 0.8688824914943732,
|
|
"grad_norm": 29.99761199951172,
|
|
"learning_rate": 5.209537453863289e-08,
|
|
"logits/chosen": -1.0624526739120483,
|
|
"logits/rejected": -1.073492407798767,
|
|
"logps/chosen": -588.7734375,
|
|
"logps/rejected": -707.7062377929688,
|
|
"logps/weighted_chosen": -3.745361328125,
|
|
"logps/weighted_rejected": -4.813672065734863,
|
|
"loss": 0.5391,
|
|
"rewards/accuracies": 0.609375,
|
|
"rewards/chosen": -318.62890625,
|
|
"rewards/margins": 139.2451171875,
|
|
"rewards/rejected": -457.93359375,
|
|
"rewards/weighted_accuracies": 0.7406250238418579,
|
|
"rewards/weighted_chosen": -1.222619652748108,
|
|
"rewards/weighted_margins": 1.0465819835662842,
|
|
"rewards/weighted_rejected": -2.2699646949768066,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 0.8741167233708453,
|
|
"grad_norm": 48.07187271118164,
|
|
"learning_rate": 4.8109184629527344e-08,
|
|
"logits/chosen": -1.037384033203125,
|
|
"logits/rejected": -1.0457366704940796,
|
|
"logps/chosen": -638.6046752929688,
|
|
"logps/rejected": -779.0484619140625,
|
|
"logps/weighted_chosen": -3.819580078125,
|
|
"logps/weighted_rejected": -5.112841606140137,
|
|
"loss": 0.5366,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": -367.76287841796875,
|
|
"rewards/margins": 153.4968719482422,
|
|
"rewards/rejected": -521.04296875,
|
|
"rewards/weighted_accuracies": 0.7124999761581421,
|
|
"rewards/weighted_chosen": -1.20782470703125,
|
|
"rewards/weighted_margins": 1.06304931640625,
|
|
"rewards/weighted_rejected": -2.2711548805236816,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 0.8793509552473174,
|
|
"grad_norm": 26.007558822631836,
|
|
"learning_rate": 4.427392258190399e-08,
|
|
"logits/chosen": -1.080664038658142,
|
|
"logits/rejected": -1.092126488685608,
|
|
"logps/chosen": -695.2984619140625,
|
|
"logps/rejected": -768.0953369140625,
|
|
"logps/weighted_chosen": -3.857311964035034,
|
|
"logps/weighted_rejected": -5.328369140625,
|
|
"loss": 0.5647,
|
|
"rewards/accuracies": 0.565625011920929,
|
|
"rewards/chosen": -405.20623779296875,
|
|
"rewards/margins": 102.0152359008789,
|
|
"rewards/rejected": -507.4429626464844,
|
|
"rewards/weighted_accuracies": 0.7437499761581421,
|
|
"rewards/weighted_chosen": -1.279486060142517,
|
|
"rewards/weighted_margins": 0.929272472858429,
|
|
"rewards/weighted_rejected": -2.20843505859375,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 0.8845851871237895,
|
|
"grad_norm": 27.376941680908203,
|
|
"learning_rate": 4.059086934213141e-08,
|
|
"logits/chosen": -1.0285918712615967,
|
|
"logits/rejected": -1.064416527748108,
|
|
"logps/chosen": -683.9781494140625,
|
|
"logps/rejected": -730.2593994140625,
|
|
"logps/weighted_chosen": -3.7125487327575684,
|
|
"logps/weighted_rejected": -4.866064548492432,
|
|
"loss": 0.4997,
|
|
"rewards/accuracies": 0.581250011920929,
|
|
"rewards/chosen": -392.7562561035156,
|
|
"rewards/margins": 87.4644546508789,
|
|
"rewards/rejected": -480.1617126464844,
|
|
"rewards/weighted_accuracies": 0.753125011920929,
|
|
"rewards/weighted_chosen": -1.2972290515899658,
|
|
"rewards/weighted_margins": 1.099523901939392,
|
|
"rewards/weighted_rejected": -2.3959717750549316,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 0.8898194190002617,
|
|
"grad_norm": 23.167646408081055,
|
|
"learning_rate": 3.7061255020073346e-08,
|
|
"logits/chosen": -1.0998427867889404,
|
|
"logits/rejected": -1.1160888671875,
|
|
"logps/chosen": -686.8656005859375,
|
|
"logps/rejected": -774.8515625,
|
|
"logps/weighted_chosen": -3.696582078933716,
|
|
"logps/weighted_rejected": -4.601147651672363,
|
|
"loss": 0.519,
|
|
"rewards/accuracies": 0.6031249761581421,
|
|
"rewards/chosen": -400.40350341796875,
|
|
"rewards/margins": 112.77461242675781,
|
|
"rewards/rejected": -513.212890625,
|
|
"rewards/weighted_accuracies": 0.721875011920929,
|
|
"rewards/weighted_chosen": -1.2873656749725342,
|
|
"rewards/weighted_margins": 1.022790551185608,
|
|
"rewards/weighted_rejected": -2.310229539871216,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 0.8950536508767338,
|
|
"grad_norm": 26.41366195678711,
|
|
"learning_rate": 3.3686258478241027e-08,
|
|
"logits/chosen": -1.10308837890625,
|
|
"logits/rejected": -1.1109344959259033,
|
|
"logps/chosen": -737.3968505859375,
|
|
"logps/rejected": -790.0671997070312,
|
|
"logps/weighted_chosen": -3.860668897628784,
|
|
"logps/weighted_rejected": -4.782372951507568,
|
|
"loss": 0.583,
|
|
"rewards/accuracies": 0.5843750238418579,
|
|
"rewards/chosen": -425.9263610839844,
|
|
"rewards/margins": 85.51679992675781,
|
|
"rewards/rejected": -511.4320373535156,
|
|
"rewards/weighted_accuracies": 0.7093750238418579,
|
|
"rewards/weighted_chosen": -1.4254882335662842,
|
|
"rewards/weighted_margins": 0.78106689453125,
|
|
"rewards/weighted_rejected": -2.206738233566284,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 0.9002878827532059,
|
|
"grad_norm": 25.273611068725586,
|
|
"learning_rate": 3.0467006938063366e-08,
|
|
"logits/chosen": -1.0660889148712158,
|
|
"logits/rejected": -1.098059058189392,
|
|
"logps/chosen": -671.5203247070312,
|
|
"logps/rejected": -752.9375,
|
|
"logps/weighted_chosen": -3.584338426589966,
|
|
"logps/weighted_rejected": -4.9091796875,
|
|
"loss": 0.5652,
|
|
"rewards/accuracies": 0.6031249761581421,
|
|
"rewards/chosen": -398.796875,
|
|
"rewards/margins": 94.3648452758789,
|
|
"rewards/rejected": -493.16796875,
|
|
"rewards/weighted_accuracies": 0.699999988079071,
|
|
"rewards/weighted_chosen": -1.3784363269805908,
|
|
"rewards/weighted_margins": 1.0235106945037842,
|
|
"rewards/weighted_rejected": -2.401867628097534,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 0.9055221146296781,
|
|
"grad_norm": 41.00505447387695,
|
|
"learning_rate": 2.7404575603403646e-08,
|
|
"logits/chosen": -1.0376098155975342,
|
|
"logits/rejected": -1.067657470703125,
|
|
"logps/chosen": -645.05859375,
|
|
"logps/rejected": -789.4781494140625,
|
|
"logps/weighted_chosen": -4.015649318695068,
|
|
"logps/weighted_rejected": -5.325341701507568,
|
|
"loss": 0.4583,
|
|
"rewards/accuracies": 0.628125011920929,
|
|
"rewards/chosen": -370.9443359375,
|
|
"rewards/margins": 149.03262329101562,
|
|
"rewards/rejected": -520.0062255859375,
|
|
"rewards/weighted_accuracies": 0.7562500238418579,
|
|
"rewards/weighted_chosen": -1.2229797840118408,
|
|
"rewards/weighted_margins": 1.238745093345642,
|
|
"rewards/weighted_rejected": -2.4620728492736816,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 0.9107563465061502,
|
|
"grad_norm": 28.638446807861328,
|
|
"learning_rate": 2.4499987301450698e-08,
|
|
"logits/chosen": -1.068115234375,
|
|
"logits/rejected": -1.125952124595642,
|
|
"logps/chosen": -704.9406127929688,
|
|
"logps/rejected": -853.0062255859375,
|
|
"logps/weighted_chosen": -3.899169921875,
|
|
"logps/weighted_rejected": -5.107470512390137,
|
|
"loss": 0.4743,
|
|
"rewards/accuracies": 0.640625,
|
|
"rewards/chosen": -385.1449279785156,
|
|
"rewards/margins": 183.0597686767578,
|
|
"rewards/rejected": -568.173828125,
|
|
"rewards/weighted_accuracies": 0.768750011920929,
|
|
"rewards/weighted_chosen": -1.2654540538787842,
|
|
"rewards/weighted_margins": 1.137670874595642,
|
|
"rewards/weighted_rejected": -2.402661085128784,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 0.9159905783826223,
|
|
"grad_norm": 32.34984588623047,
|
|
"learning_rate": 2.1754212141102347e-08,
|
|
"logits/chosen": -1.071801781654358,
|
|
"logits/rejected": -1.0734984874725342,
|
|
"logps/chosen": -663.3663940429688,
|
|
"logps/rejected": -792.84375,
|
|
"logps/weighted_chosen": -4.340795993804932,
|
|
"logps/weighted_rejected": -5.438916206359863,
|
|
"loss": 0.4879,
|
|
"rewards/accuracies": 0.640625,
|
|
"rewards/chosen": -384.6597595214844,
|
|
"rewards/margins": 138.44686889648438,
|
|
"rewards/rejected": -523.0758056640625,
|
|
"rewards/weighted_accuracies": 0.762499988079071,
|
|
"rewards/weighted_chosen": -1.283929467201233,
|
|
"rewards/weighted_margins": 1.1354491710662842,
|
|
"rewards/weighted_rejected": -2.4184937477111816,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 0.9212248102590945,
|
|
"grad_norm": 28.857627868652344,
|
|
"learning_rate": 1.9168167188957586e-08,
|
|
"logits/chosen": -1.012640357017517,
|
|
"logits/rejected": -1.081701636314392,
|
|
"logps/chosen": -658.5015869140625,
|
|
"logps/rejected": -722.2390747070312,
|
|
"logps/weighted_chosen": -3.9429688453674316,
|
|
"logps/weighted_rejected": -4.90234375,
|
|
"loss": 0.5441,
|
|
"rewards/accuracies": 0.590624988079071,
|
|
"rewards/chosen": -377.6923828125,
|
|
"rewards/margins": 97.7406234741211,
|
|
"rewards/rejected": -475.71484375,
|
|
"rewards/weighted_accuracies": 0.71875,
|
|
"rewards/weighted_chosen": -1.31634521484375,
|
|
"rewards/weighted_margins": 1.0785338878631592,
|
|
"rewards/weighted_rejected": -2.396069288253784,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 0.9264590421355666,
|
|
"grad_norm": 39.25354766845703,
|
|
"learning_rate": 1.6742716163022865e-08,
|
|
"logits/chosen": -1.085870385169983,
|
|
"logits/rejected": -1.09857177734375,
|
|
"logps/chosen": -688.9390869140625,
|
|
"logps/rejected": -837.5187377929688,
|
|
"logps/weighted_chosen": -3.732714891433716,
|
|
"logps/weighted_rejected": -5.170654296875,
|
|
"loss": 0.4973,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": -392.45037841796875,
|
|
"rewards/margins": 168.8874969482422,
|
|
"rewards/rejected": -561.3019409179688,
|
|
"rewards/weighted_accuracies": 0.784375011920929,
|
|
"rewards/weighted_chosen": -1.3793151378631592,
|
|
"rewards/weighted_margins": 1.1719238758087158,
|
|
"rewards/weighted_rejected": -2.551684617996216,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 0.9316932740120387,
|
|
"grad_norm": 28.262041091918945,
|
|
"learning_rate": 1.4478669144238343e-08,
|
|
"logits/chosen": -1.0385589599609375,
|
|
"logits/rejected": -1.031134009361267,
|
|
"logps/chosen": -629.2132568359375,
|
|
"logps/rejected": -778.0023193359375,
|
|
"logps/weighted_chosen": -3.922070264816284,
|
|
"logps/weighted_rejected": -5.3369140625,
|
|
"loss": 0.5496,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": -366.00799560546875,
|
|
"rewards/margins": 156.92031860351562,
|
|
"rewards/rejected": -522.5992431640625,
|
|
"rewards/weighted_accuracies": 0.734375,
|
|
"rewards/weighted_chosen": -1.3271605968475342,
|
|
"rewards/weighted_margins": 1.061981201171875,
|
|
"rewards/weighted_rejected": -2.388622999191284,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 0.9369275058885108,
|
|
"grad_norm": 49.04172897338867,
|
|
"learning_rate": 1.23767823059166e-08,
|
|
"logits/chosen": -1.039398193359375,
|
|
"logits/rejected": -1.055413842201233,
|
|
"logps/chosen": -701.0413818359375,
|
|
"logps/rejected": -830.3093872070312,
|
|
"logps/weighted_chosen": -3.6467041969299316,
|
|
"logps/weighted_rejected": -4.895654201507568,
|
|
"loss": 0.5462,
|
|
"rewards/accuracies": 0.5687500238418579,
|
|
"rewards/chosen": -409.703125,
|
|
"rewards/margins": 139.953125,
|
|
"rewards/rejected": -549.7257690429688,
|
|
"rewards/weighted_accuracies": 0.721875011920929,
|
|
"rewards/weighted_chosen": -1.362951636314392,
|
|
"rewards/weighted_margins": 1.0340454578399658,
|
|
"rewards/weighted_rejected": -2.3976073265075684,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 0.942161737764983,
|
|
"grad_norm": 22.44223976135254,
|
|
"learning_rate": 1.0437757661187486e-08,
|
|
"logits/chosen": -1.063806176185608,
|
|
"logits/rejected": -1.0848114490509033,
|
|
"logps/chosen": -749.4093627929688,
|
|
"logps/rejected": -854.6468505859375,
|
|
"logps/weighted_chosen": -4.386376857757568,
|
|
"logps/weighted_rejected": -5.128662109375,
|
|
"loss": 0.5093,
|
|
"rewards/accuracies": 0.643750011920929,
|
|
"rewards/chosen": -433.10076904296875,
|
|
"rewards/margins": 140.12149047851562,
|
|
"rewards/rejected": -573.2366943359375,
|
|
"rewards/weighted_accuracies": 0.731249988079071,
|
|
"rewards/weighted_chosen": -1.32525634765625,
|
|
"rewards/weighted_margins": 1.0437133312225342,
|
|
"rewards/weighted_rejected": -2.3693480491638184,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.9473959696414551,
|
|
"grad_norm": 50.09382247924805,
|
|
"learning_rate": 8.662242828530953e-09,
|
|
"logits/chosen": -1.0404754877090454,
|
|
"logits/rejected": -1.0818588733673096,
|
|
"logps/chosen": -646.4109497070312,
|
|
"logps/rejected": -787.7718505859375,
|
|
"logps/weighted_chosen": -4.325634956359863,
|
|
"logps/weighted_rejected": -5.2548828125,
|
|
"loss": 0.5664,
|
|
"rewards/accuracies": 0.668749988079071,
|
|
"rewards/chosen": -384.98907470703125,
|
|
"rewards/margins": 149.36874389648438,
|
|
"rewards/rejected": -534.6765747070312,
|
|
"rewards/weighted_accuracies": 0.706250011920929,
|
|
"rewards/weighted_chosen": -1.4359314441680908,
|
|
"rewards/weighted_margins": 0.99676513671875,
|
|
"rewards/weighted_rejected": -2.4330811500549316,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 0.9526302015179272,
|
|
"grad_norm": 31.461109161376953,
|
|
"learning_rate": 7.050830815478082e-09,
|
|
"logits/chosen": -1.0684020519256592,
|
|
"logits/rejected": -1.0823822021484375,
|
|
"logps/chosen": -642.1265869140625,
|
|
"logps/rejected": -759.2999877929688,
|
|
"logps/weighted_chosen": -4.116991996765137,
|
|
"logps/weighted_rejected": -5.237597465515137,
|
|
"loss": 0.5132,
|
|
"rewards/accuracies": 0.612500011920929,
|
|
"rewards/chosen": -381.7503967285156,
|
|
"rewards/margins": 132.55233764648438,
|
|
"rewards/rejected": -514.3597412109375,
|
|
"rewards/weighted_accuracies": 0.731249988079071,
|
|
"rewards/weighted_chosen": -1.338903784751892,
|
|
"rewards/weighted_margins": 1.0992553234100342,
|
|
"rewards/weighted_rejected": -2.4381957054138184,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 0.9578644333943994,
|
|
"grad_norm": 26.419347763061523,
|
|
"learning_rate": 5.604059820551177e-09,
|
|
"logits/chosen": -1.067419409751892,
|
|
"logits/rejected": -1.0749084949493408,
|
|
"logps/chosen": -693.1859130859375,
|
|
"logps/rejected": -796.1453247070312,
|
|
"logps/weighted_chosen": -4.111572265625,
|
|
"logps/weighted_rejected": -5.332861423492432,
|
|
"loss": 0.5009,
|
|
"rewards/accuracies": 0.606249988079071,
|
|
"rewards/chosen": -394.80682373046875,
|
|
"rewards/margins": 140.7951202392578,
|
|
"rewards/rejected": -535.8195190429688,
|
|
"rewards/weighted_accuracies": 0.734375,
|
|
"rewards/weighted_chosen": -1.3236510753631592,
|
|
"rewards/weighted_margins": 1.1140258312225342,
|
|
"rewards/weighted_rejected": -2.43792724609375,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 0.9630986652708715,
|
|
"grad_norm": 53.944435119628906,
|
|
"learning_rate": 4.322413053509943e-09,
|
|
"logits/chosen": -1.050134301185608,
|
|
"logits/rejected": -1.075842261314392,
|
|
"logps/chosen": -727.1593627929688,
|
|
"logps/rejected": -827.8078002929688,
|
|
"logps/weighted_chosen": -3.7392578125,
|
|
"logps/weighted_rejected": -5.269579887390137,
|
|
"loss": 0.5188,
|
|
"rewards/accuracies": 0.6031249761581421,
|
|
"rewards/chosen": -425.8460998535156,
|
|
"rewards/margins": 129.8203125,
|
|
"rewards/rejected": -555.5859375,
|
|
"rewards/weighted_accuracies": 0.746874988079071,
|
|
"rewards/weighted_chosen": -1.332269310951233,
|
|
"rewards/weighted_margins": 1.074914574623108,
|
|
"rewards/weighted_rejected": -2.407489061355591,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 0.9683328971473436,
|
|
"grad_norm": 25.737455368041992,
|
|
"learning_rate": 3.206318573963418e-09,
|
|
"logits/chosen": -1.0623962879180908,
|
|
"logits/rejected": -1.0946044921875,
|
|
"logps/chosen": -675.3171997070312,
|
|
"logps/rejected": -770.2156372070312,
|
|
"logps/weighted_chosen": -4.039794921875,
|
|
"logps/weighted_rejected": -5.077490329742432,
|
|
"loss": 0.5267,
|
|
"rewards/accuracies": 0.59375,
|
|
"rewards/chosen": -389.2749938964844,
|
|
"rewards/margins": 121.53242492675781,
|
|
"rewards/rejected": -510.8343811035156,
|
|
"rewards/weighted_accuracies": 0.715624988079071,
|
|
"rewards/weighted_chosen": -1.363183617591858,
|
|
"rewards/weighted_margins": 1.0187866687774658,
|
|
"rewards/weighted_rejected": -2.381420850753784,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 0.9735671290238157,
|
|
"grad_norm": 21.196102142333984,
|
|
"learning_rate": 2.256149148401387e-09,
|
|
"logits/chosen": -1.0624481439590454,
|
|
"logits/rejected": -1.064080834388733,
|
|
"logps/chosen": -666.1961059570312,
|
|
"logps/rejected": -834.4312744140625,
|
|
"logps/weighted_chosen": -4.077294826507568,
|
|
"logps/weighted_rejected": -4.933495998382568,
|
|
"loss": 0.5029,
|
|
"rewards/accuracies": 0.640625,
|
|
"rewards/chosen": -393.24139404296875,
|
|
"rewards/margins": 176.5304718017578,
|
|
"rewards/rejected": -569.5242309570312,
|
|
"rewards/weighted_accuracies": 0.7593749761581421,
|
|
"rewards/weighted_chosen": -1.389379858970642,
|
|
"rewards/weighted_margins": 1.0392944812774658,
|
|
"rewards/weighted_rejected": -2.4274535179138184,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 0.9788013609002879,
|
|
"grad_norm": 28.988407135009766,
|
|
"learning_rate": 1.4722221256933676e-09,
|
|
"logits/chosen": -1.091333031654358,
|
|
"logits/rejected": -1.099829077720642,
|
|
"logps/chosen": -689.6468505859375,
|
|
"logps/rejected": -761.02734375,
|
|
"logps/weighted_chosen": -3.801464796066284,
|
|
"logps/weighted_rejected": -5.50390625,
|
|
"loss": 0.5502,
|
|
"rewards/accuracies": 0.596875011920929,
|
|
"rewards/chosen": -418.20428466796875,
|
|
"rewards/margins": 90.15116882324219,
|
|
"rewards/rejected": -508.333984375,
|
|
"rewards/weighted_accuracies": 0.7093750238418579,
|
|
"rewards/weighted_chosen": -1.4282042980194092,
|
|
"rewards/weighted_margins": 0.9919677972793579,
|
|
"rewards/weighted_rejected": -2.419872999191284,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 0.98403559277676,
|
|
"grad_norm": 31.547182083129883,
|
|
"learning_rate": 8.547993310970003e-10,
|
|
"logits/chosen": -1.0585174560546875,
|
|
"logits/rejected": -1.088720679283142,
|
|
"logps/chosen": -652.5765380859375,
|
|
"logps/rejected": -762.1343994140625,
|
|
"logps/weighted_chosen": -4.279052734375,
|
|
"logps/weighted_rejected": -5.315331935882568,
|
|
"loss": 0.5933,
|
|
"rewards/accuracies": 0.637499988079071,
|
|
"rewards/chosen": -371.46563720703125,
|
|
"rewards/margins": 122.81367492675781,
|
|
"rewards/rejected": -494.56329345703125,
|
|
"rewards/weighted_accuracies": 0.703125,
|
|
"rewards/weighted_chosen": -1.31964111328125,
|
|
"rewards/weighted_margins": 0.9644409418106079,
|
|
"rewards/weighted_rejected": -2.283703565597534,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 0.9892698246532321,
|
|
"grad_norm": 33.57475662231445,
|
|
"learning_rate": 4.040869788100032e-10,
|
|
"logits/chosen": -1.0377686023712158,
|
|
"logits/rejected": -1.065820336341858,
|
|
"logps/chosen": -664.56640625,
|
|
"logps/rejected": -729.34375,
|
|
"logps/weighted_chosen": -3.905956983566284,
|
|
"logps/weighted_rejected": -4.855029106140137,
|
|
"loss": 0.5181,
|
|
"rewards/accuracies": 0.6187499761581421,
|
|
"rewards/chosen": -385.83087158203125,
|
|
"rewards/margins": 97.8460922241211,
|
|
"rewards/rejected": -483.4437561035156,
|
|
"rewards/weighted_accuracies": 0.75,
|
|
"rewards/weighted_chosen": -1.3222167491912842,
|
|
"rewards/weighted_margins": 1.018835425376892,
|
|
"rewards/weighted_rejected": -2.341479539871216,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 0.9945040565297043,
|
|
"grad_norm": 23.152149200439453,
|
|
"learning_rate": 1.202356030968743e-10,
|
|
"logits/chosen": -1.079199194908142,
|
|
"logits/rejected": -1.0971558094024658,
|
|
"logps/chosen": -726.5609130859375,
|
|
"logps/rejected": -814.0968627929688,
|
|
"logps/weighted_chosen": -3.6655516624450684,
|
|
"logps/weighted_rejected": -4.904101371765137,
|
|
"loss": 0.5332,
|
|
"rewards/accuracies": 0.581250011920929,
|
|
"rewards/chosen": -419.51837158203125,
|
|
"rewards/margins": 128.578125,
|
|
"rewards/rejected": -548.2413940429688,
|
|
"rewards/weighted_accuracies": 0.7124999761581421,
|
|
"rewards/weighted_chosen": -1.3955078125,
|
|
"rewards/weighted_margins": 1.1153442859649658,
|
|
"rewards/weighted_rejected": -2.50994873046875,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 0.9997382884061764,
|
|
"grad_norm": 23.48067283630371,
|
|
"learning_rate": 3.3400080112211405e-12,
|
|
"logits/chosen": -1.0469787120819092,
|
|
"logits/rejected": -1.074462890625,
|
|
"logps/chosen": -742.5452880859375,
|
|
"logps/rejected": -856.6265869140625,
|
|
"logps/weighted_chosen": -3.984057664871216,
|
|
"logps/weighted_rejected": -5.161230564117432,
|
|
"loss": 0.4937,
|
|
"rewards/accuracies": 0.640625,
|
|
"rewards/chosen": -442.21466064453125,
|
|
"rewards/margins": 135.20547485351562,
|
|
"rewards/rejected": -577.2562255859375,
|
|
"rewards/weighted_accuracies": 0.765625,
|
|
"rewards/weighted_chosen": -1.3606140613555908,
|
|
"rewards/weighted_margins": 1.040490746498108,
|
|
"rewards/weighted_rejected": -2.401928663253784,
|
|
"step": 1910
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 1911,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|