2200 lines
86 KiB
JSON
2200 lines
86 KiB
JSON
|
|
{
|
||
|
|
"best_global_step": null,
|
||
|
|
"best_metric": null,
|
||
|
|
"best_model_checkpoint": null,
|
||
|
|
"epoch": 0.5234231876472127,
|
||
|
|
"eval_steps": 500,
|
||
|
|
"global_step": 1000,
|
||
|
|
"is_hyper_param_search": false,
|
||
|
|
"is_local_process_zero": true,
|
||
|
|
"is_world_process_zero": true,
|
||
|
|
"log_history": [
|
||
|
|
{
|
||
|
|
"epoch": 0.0005234231876472127,
|
||
|
|
"grad_norm": 126.29230499267578,
|
||
|
|
"learning_rate": 0.0,
|
||
|
|
"logits/chosen": -0.40118408203125,
|
||
|
|
"logits/rejected": -0.41802978515625,
|
||
|
|
"logps/chosen": -297.609375,
|
||
|
|
"logps/rejected": -247.84375,
|
||
|
|
"logps/weighted_chosen": -4.5152587890625,
|
||
|
|
"logps/weighted_rejected": -3.032470703125,
|
||
|
|
"loss": 0.6914,
|
||
|
|
"rewards/accuracies": 0.0,
|
||
|
|
"rewards/chosen": 0.0,
|
||
|
|
"rewards/margins": 0.0,
|
||
|
|
"rewards/rejected": 0.0,
|
||
|
|
"rewards/weighted_accuracies": 0.0,
|
||
|
|
"rewards/weighted_chosen": 0.0,
|
||
|
|
"rewards/weighted_margins": 0.0,
|
||
|
|
"rewards/weighted_rejected": 0.0,
|
||
|
|
"step": 1
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.005234231876472127,
|
||
|
|
"grad_norm": 296.4369812011719,
|
||
|
|
"learning_rate": 4.6875e-08,
|
||
|
|
"logits/chosen": -0.3177456259727478,
|
||
|
|
"logits/rejected": -0.3534359335899353,
|
||
|
|
"logps/chosen": -275.5711669921875,
|
||
|
|
"logps/rejected": -255.90451049804688,
|
||
|
|
"logps/weighted_chosen": -2.350965738296509,
|
||
|
|
"logps/weighted_rejected": -2.549940347671509,
|
||
|
|
"loss": 0.6917,
|
||
|
|
"rewards/accuracies": 0.25,
|
||
|
|
"rewards/chosen": -0.0401475690305233,
|
||
|
|
"rewards/margins": 0.04296875,
|
||
|
|
"rewards/rejected": -0.0831163227558136,
|
||
|
|
"rewards/weighted_accuracies": 0.3229166567325592,
|
||
|
|
"rewards/weighted_chosen": -0.00032212998485192657,
|
||
|
|
"rewards/weighted_margins": 0.00019327799964230508,
|
||
|
|
"rewards/weighted_rejected": -0.0005154079990461469,
|
||
|
|
"step": 10
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.010468463752944255,
|
||
|
|
"grad_norm": 21.67967414855957,
|
||
|
|
"learning_rate": 9.895833333333332e-08,
|
||
|
|
"logits/chosen": -0.29769057035446167,
|
||
|
|
"logits/rejected": -0.313650518655777,
|
||
|
|
"logps/chosen": -294.3374938964844,
|
||
|
|
"logps/rejected": -272.6703186035156,
|
||
|
|
"logps/weighted_chosen": -2.13031005859375,
|
||
|
|
"logps/weighted_rejected": -2.103222608566284,
|
||
|
|
"loss": 0.6908,
|
||
|
|
"rewards/accuracies": 0.3125,
|
||
|
|
"rewards/chosen": -0.06103515625,
|
||
|
|
"rewards/margins": -0.01318359375,
|
||
|
|
"rewards/rejected": -0.0478515625,
|
||
|
|
"rewards/weighted_accuracies": 0.4437499940395355,
|
||
|
|
"rewards/weighted_chosen": 0.0014366150135174394,
|
||
|
|
"rewards/weighted_margins": 0.0021545409690588713,
|
||
|
|
"rewards/weighted_rejected": -0.0007179260137490928,
|
||
|
|
"step": 20
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.015702695629416383,
|
||
|
|
"grad_norm": 76.9887466430664,
|
||
|
|
"learning_rate": 1.5104166666666664e-07,
|
||
|
|
"logits/chosen": -0.2917121946811676,
|
||
|
|
"logits/rejected": -0.337240606546402,
|
||
|
|
"logps/chosen": -298.02655029296875,
|
||
|
|
"logps/rejected": -268.12188720703125,
|
||
|
|
"logps/weighted_chosen": -2.0724120140075684,
|
||
|
|
"logps/weighted_rejected": -2.4466919898986816,
|
||
|
|
"loss": 0.6912,
|
||
|
|
"rewards/accuracies": 0.28125,
|
||
|
|
"rewards/chosen": -0.0062500000931322575,
|
||
|
|
"rewards/margins": -0.02509765699505806,
|
||
|
|
"rewards/rejected": 0.01884765550494194,
|
||
|
|
"rewards/weighted_accuracies": 0.4281249940395355,
|
||
|
|
"rewards/weighted_chosen": 0.0027938843704760075,
|
||
|
|
"rewards/weighted_margins": 0.0019706725142896175,
|
||
|
|
"rewards/weighted_rejected": 0.0008232116815634072,
|
||
|
|
"step": 30
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.02093692750588851,
|
||
|
|
"grad_norm": 32.98203659057617,
|
||
|
|
"learning_rate": 2.03125e-07,
|
||
|
|
"logits/chosen": -0.3011154234409332,
|
||
|
|
"logits/rejected": -0.3432762026786804,
|
||
|
|
"logps/chosen": -278.63751220703125,
|
||
|
|
"logps/rejected": -253.88125610351562,
|
||
|
|
"logps/weighted_chosen": -2.2070555686950684,
|
||
|
|
"logps/weighted_rejected": -2.605224609375,
|
||
|
|
"loss": 0.692,
|
||
|
|
"rewards/accuracies": 0.26249998807907104,
|
||
|
|
"rewards/chosen": 0.0034667968284338713,
|
||
|
|
"rewards/margins": -0.05991210788488388,
|
||
|
|
"rewards/rejected": 0.06337890774011612,
|
||
|
|
"rewards/weighted_accuracies": 0.35624998807907104,
|
||
|
|
"rewards/weighted_chosen": 0.0014549255138263106,
|
||
|
|
"rewards/weighted_margins": -0.00034332275390625,
|
||
|
|
"rewards/weighted_rejected": 0.0017982482677325606,
|
||
|
|
"step": 40
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.02617115938236064,
|
||
|
|
"grad_norm": 20.751684188842773,
|
||
|
|
"learning_rate": 2.552083333333333e-07,
|
||
|
|
"logits/chosen": -0.2822524905204773,
|
||
|
|
"logits/rejected": -0.32080918550491333,
|
||
|
|
"logps/chosen": -280.31329345703125,
|
||
|
|
"logps/rejected": -267.58709716796875,
|
||
|
|
"logps/weighted_chosen": -2.136962890625,
|
||
|
|
"logps/weighted_rejected": -2.1753907203674316,
|
||
|
|
"loss": 0.6883,
|
||
|
|
"rewards/accuracies": 0.3125,
|
||
|
|
"rewards/chosen": -0.07236327975988388,
|
||
|
|
"rewards/margins": -0.09189452975988388,
|
||
|
|
"rewards/rejected": 0.01953125,
|
||
|
|
"rewards/weighted_accuracies": 0.4375,
|
||
|
|
"rewards/weighted_chosen": 0.0054107666946947575,
|
||
|
|
"rewards/weighted_margins": 0.0078063965775072575,
|
||
|
|
"rewards/weighted_rejected": -0.0023956298828125,
|
||
|
|
"step": 50
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.031405391258832765,
|
||
|
|
"grad_norm": 40.70024108886719,
|
||
|
|
"learning_rate": 3.0729166666666665e-07,
|
||
|
|
"logits/chosen": -0.3149581849575043,
|
||
|
|
"logits/rejected": -0.3086872100830078,
|
||
|
|
"logps/chosen": -277.6031188964844,
|
||
|
|
"logps/rejected": -261.8031311035156,
|
||
|
|
"logps/weighted_chosen": -2.5905518531799316,
|
||
|
|
"logps/weighted_rejected": -2.4834961891174316,
|
||
|
|
"loss": 0.6874,
|
||
|
|
"rewards/accuracies": 0.3812499940395355,
|
||
|
|
"rewards/chosen": 0.03662109375,
|
||
|
|
"rewards/margins": 0.12646484375,
|
||
|
|
"rewards/rejected": -0.08984375,
|
||
|
|
"rewards/weighted_accuracies": 0.5,
|
||
|
|
"rewards/weighted_chosen": 0.0004280090215615928,
|
||
|
|
"rewards/weighted_margins": 0.01105651818215847,
|
||
|
|
"rewards/weighted_rejected": -0.01062927208840847,
|
||
|
|
"step": 60
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.036639623135304895,
|
||
|
|
"grad_norm": 67.51947021484375,
|
||
|
|
"learning_rate": 3.59375e-07,
|
||
|
|
"logits/chosen": -0.318746954202652,
|
||
|
|
"logits/rejected": -0.32574766874313354,
|
||
|
|
"logps/chosen": -289.90313720703125,
|
||
|
|
"logps/rejected": -245.04452514648438,
|
||
|
|
"logps/weighted_chosen": -2.098431348800659,
|
||
|
|
"logps/weighted_rejected": -2.392407178878784,
|
||
|
|
"loss": 0.6841,
|
||
|
|
"rewards/accuracies": 0.48750001192092896,
|
||
|
|
"rewards/chosen": 0.16708984971046448,
|
||
|
|
"rewards/margins": 0.4442382752895355,
|
||
|
|
"rewards/rejected": -0.27714842557907104,
|
||
|
|
"rewards/weighted_accuracies": 0.559374988079071,
|
||
|
|
"rewards/weighted_chosen": 0.015575408935546875,
|
||
|
|
"rewards/weighted_margins": 0.02174072340130806,
|
||
|
|
"rewards/weighted_rejected": -0.00616531353443861,
|
||
|
|
"step": 70
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.04187385501177702,
|
||
|
|
"grad_norm": 68.87100982666016,
|
||
|
|
"learning_rate": 4.114583333333333e-07,
|
||
|
|
"logits/chosen": -0.286581426858902,
|
||
|
|
"logits/rejected": -0.3082527220249176,
|
||
|
|
"logps/chosen": -289.5101623535156,
|
||
|
|
"logps/rejected": -270.4375,
|
||
|
|
"logps/weighted_chosen": -2.2385497093200684,
|
||
|
|
"logps/weighted_rejected": -2.4218382835388184,
|
||
|
|
"loss": 0.6727,
|
||
|
|
"rewards/accuracies": 0.4749999940395355,
|
||
|
|
"rewards/chosen": 0.31572264432907104,
|
||
|
|
"rewards/margins": 0.5547851324081421,
|
||
|
|
"rewards/rejected": -0.23906250298023224,
|
||
|
|
"rewards/weighted_accuracies": 0.596875011920929,
|
||
|
|
"rewards/weighted_chosen": 0.03613891452550888,
|
||
|
|
"rewards/weighted_margins": 0.05283202975988388,
|
||
|
|
"rewards/weighted_rejected": -0.01669769361615181,
|
||
|
|
"step": 80
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.04710808688824915,
|
||
|
|
"grad_norm": 40.29203414916992,
|
||
|
|
"learning_rate": 4.6354166666666664e-07,
|
||
|
|
"logits/chosen": -0.3158706724643707,
|
||
|
|
"logits/rejected": -0.30914992094039917,
|
||
|
|
"logps/chosen": -280.5726623535156,
|
||
|
|
"logps/rejected": -258.17657470703125,
|
||
|
|
"logps/weighted_chosen": -2.45281982421875,
|
||
|
|
"logps/weighted_rejected": -2.5444703102111816,
|
||
|
|
"loss": 0.6683,
|
||
|
|
"rewards/accuracies": 0.4906249940395355,
|
||
|
|
"rewards/chosen": 0.15966796875,
|
||
|
|
"rewards/margins": 0.599609375,
|
||
|
|
"rewards/rejected": -0.43994140625,
|
||
|
|
"rewards/weighted_accuracies": 0.581250011920929,
|
||
|
|
"rewards/weighted_chosen": 0.05808715894818306,
|
||
|
|
"rewards/weighted_margins": 0.07471618801355362,
|
||
|
|
"rewards/weighted_rejected": -0.0166168212890625,
|
||
|
|
"step": 90
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.05234231876472128,
|
||
|
|
"grad_norm": 46.855377197265625,
|
||
|
|
"learning_rate": 5.156249999999999e-07,
|
||
|
|
"logits/chosen": -0.2856552004814148,
|
||
|
|
"logits/rejected": -0.3585342466831207,
|
||
|
|
"logps/chosen": -291.05548095703125,
|
||
|
|
"logps/rejected": -287.078125,
|
||
|
|
"logps/weighted_chosen": -1.9577789306640625,
|
||
|
|
"logps/weighted_rejected": -2.532482862472534,
|
||
|
|
"loss": 0.6785,
|
||
|
|
"rewards/accuracies": 0.5625,
|
||
|
|
"rewards/chosen": -0.13925781846046448,
|
||
|
|
"rewards/margins": 0.9869140386581421,
|
||
|
|
"rewards/rejected": -1.1261718273162842,
|
||
|
|
"rewards/weighted_accuracies": 0.590624988079071,
|
||
|
|
"rewards/weighted_chosen": 0.03715210035443306,
|
||
|
|
"rewards/weighted_margins": 0.0635833740234375,
|
||
|
|
"rewards/weighted_rejected": -0.02643737755715847,
|
||
|
|
"step": 100
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.05757655064119341,
|
||
|
|
"grad_norm": 55.04579162597656,
|
||
|
|
"learning_rate": 5.677083333333333e-07,
|
||
|
|
"logits/chosen": -0.33493995666503906,
|
||
|
|
"logits/rejected": -0.3254844546318054,
|
||
|
|
"logps/chosen": -297.2953186035156,
|
||
|
|
"logps/rejected": -262.6773376464844,
|
||
|
|
"logps/weighted_chosen": -2.606689453125,
|
||
|
|
"logps/weighted_rejected": -2.648364305496216,
|
||
|
|
"loss": 0.6821,
|
||
|
|
"rewards/accuracies": 0.528124988079071,
|
||
|
|
"rewards/chosen": -0.9228515625,
|
||
|
|
"rewards/margins": 0.8955078125,
|
||
|
|
"rewards/rejected": -1.818359375,
|
||
|
|
"rewards/weighted_accuracies": 0.518750011920929,
|
||
|
|
"rewards/weighted_chosen": -0.005747986026108265,
|
||
|
|
"rewards/weighted_margins": 0.05161895602941513,
|
||
|
|
"rewards/weighted_rejected": -0.05732421949505806,
|
||
|
|
"step": 110
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.06281078251766553,
|
||
|
|
"grad_norm": 22.23135757446289,
|
||
|
|
"learning_rate": 6.197916666666666e-07,
|
||
|
|
"logits/chosen": -0.3393222689628601,
|
||
|
|
"logits/rejected": -0.36481350660324097,
|
||
|
|
"logps/chosen": -295.6703186035156,
|
||
|
|
"logps/rejected": -256.3296813964844,
|
||
|
|
"logps/weighted_chosen": -1.8351562023162842,
|
||
|
|
"logps/weighted_rejected": -2.124218702316284,
|
||
|
|
"loss": 0.6752,
|
||
|
|
"rewards/accuracies": 0.6000000238418579,
|
||
|
|
"rewards/chosen": -1.071679711341858,
|
||
|
|
"rewards/margins": 1.46142578125,
|
||
|
|
"rewards/rejected": -2.5331053733825684,
|
||
|
|
"rewards/weighted_accuracies": 0.546875,
|
||
|
|
"rewards/weighted_chosen": 0.0018810272449627519,
|
||
|
|
"rewards/weighted_margins": 0.06835174560546875,
|
||
|
|
"rewards/weighted_rejected": -0.0664466843008995,
|
||
|
|
"step": 120
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.06804501439413765,
|
||
|
|
"grad_norm": 57.93917465209961,
|
||
|
|
"learning_rate": 6.718749999999999e-07,
|
||
|
|
"logits/chosen": -0.30284881591796875,
|
||
|
|
"logits/rejected": -0.2989334166049957,
|
||
|
|
"logps/chosen": -306.5074157714844,
|
||
|
|
"logps/rejected": -279.8265686035156,
|
||
|
|
"logps/weighted_chosen": -1.910064697265625,
|
||
|
|
"logps/weighted_rejected": -2.2278685569763184,
|
||
|
|
"loss": 0.6738,
|
||
|
|
"rewards/accuracies": 0.578125,
|
||
|
|
"rewards/chosen": -0.45097655057907104,
|
||
|
|
"rewards/margins": 1.7268555164337158,
|
||
|
|
"rewards/rejected": -2.177734375,
|
||
|
|
"rewards/weighted_accuracies": 0.59375,
|
||
|
|
"rewards/weighted_chosen": 0.02166290208697319,
|
||
|
|
"rewards/weighted_margins": 0.07758025825023651,
|
||
|
|
"rewards/weighted_rejected": -0.05589141696691513,
|
||
|
|
"step": 130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.07327924627060979,
|
||
|
|
"grad_norm": 66.64070892333984,
|
||
|
|
"learning_rate": 7.239583333333333e-07,
|
||
|
|
"logits/chosen": -0.34190064668655396,
|
||
|
|
"logits/rejected": -0.3586837649345398,
|
||
|
|
"logps/chosen": -300.01483154296875,
|
||
|
|
"logps/rejected": -276.1703186035156,
|
||
|
|
"logps/weighted_chosen": -2.202807664871216,
|
||
|
|
"logps/weighted_rejected": -2.474353075027466,
|
||
|
|
"loss": 0.6635,
|
||
|
|
"rewards/accuracies": 0.6187499761581421,
|
||
|
|
"rewards/chosen": -1.3230469226837158,
|
||
|
|
"rewards/margins": 2.2220702171325684,
|
||
|
|
"rewards/rejected": -3.545117139816284,
|
||
|
|
"rewards/weighted_accuracies": 0.609375,
|
||
|
|
"rewards/weighted_chosen": 0.0006683349492959678,
|
||
|
|
"rewards/weighted_margins": 0.10604552924633026,
|
||
|
|
"rewards/weighted_rejected": -0.1053924560546875,
|
||
|
|
"step": 140
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.07851347814708191,
|
||
|
|
"grad_norm": 18.789766311645508,
|
||
|
|
"learning_rate": 7.760416666666666e-07,
|
||
|
|
"logits/chosen": -0.2976974546909332,
|
||
|
|
"logits/rejected": -0.3081321716308594,
|
||
|
|
"logps/chosen": -286.27813720703125,
|
||
|
|
"logps/rejected": -255.4640655517578,
|
||
|
|
"logps/weighted_chosen": -2.7657103538513184,
|
||
|
|
"logps/weighted_rejected": -2.831347703933716,
|
||
|
|
"loss": 0.6605,
|
||
|
|
"rewards/accuracies": 0.628125011920929,
|
||
|
|
"rewards/chosen": -2.104687452316284,
|
||
|
|
"rewards/margins": 2.5054688453674316,
|
||
|
|
"rewards/rejected": -4.610156059265137,
|
||
|
|
"rewards/weighted_accuracies": 0.621874988079071,
|
||
|
|
"rewards/weighted_chosen": -0.0018630981212481856,
|
||
|
|
"rewards/weighted_margins": 0.158416748046875,
|
||
|
|
"rewards/weighted_rejected": -0.1603546142578125,
|
||
|
|
"step": 150
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.08374771002355404,
|
||
|
|
"grad_norm": 51.51210021972656,
|
||
|
|
"learning_rate": 8.28125e-07,
|
||
|
|
"logits/chosen": -0.3341739773750305,
|
||
|
|
"logits/rejected": -0.3859619200229645,
|
||
|
|
"logps/chosen": -306.4765625,
|
||
|
|
"logps/rejected": -279.1148376464844,
|
||
|
|
"logps/weighted_chosen": -2.3189454078674316,
|
||
|
|
"logps/weighted_rejected": -2.36669921875,
|
||
|
|
"loss": 0.636,
|
||
|
|
"rewards/accuracies": 0.6312500238418579,
|
||
|
|
"rewards/chosen": -2.575390577316284,
|
||
|
|
"rewards/margins": 3.349609375,
|
||
|
|
"rewards/rejected": -5.925000190734863,
|
||
|
|
"rewards/weighted_accuracies": 0.6187499761581421,
|
||
|
|
"rewards/weighted_chosen": 0.02147369459271431,
|
||
|
|
"rewards/weighted_margins": 0.22438660264015198,
|
||
|
|
"rewards/weighted_rejected": -0.203105166554451,
|
||
|
|
"step": 160
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.08898194190002617,
|
||
|
|
"grad_norm": 398.3809509277344,
|
||
|
|
"learning_rate": 8.802083333333333e-07,
|
||
|
|
"logits/chosen": -0.36855775117874146,
|
||
|
|
"logits/rejected": -0.37070387601852417,
|
||
|
|
"logps/chosen": -307.1656188964844,
|
||
|
|
"logps/rejected": -265.78436279296875,
|
||
|
|
"logps/weighted_chosen": -2.459460496902466,
|
||
|
|
"logps/weighted_rejected": -2.757373094558716,
|
||
|
|
"loss": 0.6811,
|
||
|
|
"rewards/accuracies": 0.612500011920929,
|
||
|
|
"rewards/chosen": -4.097460746765137,
|
||
|
|
"rewards/margins": 3.488476514816284,
|
||
|
|
"rewards/rejected": -7.585839748382568,
|
||
|
|
"rewards/weighted_accuracies": 0.6187499761581421,
|
||
|
|
"rewards/weighted_chosen": 0.018505096435546875,
|
||
|
|
"rewards/weighted_margins": 0.19701537489891052,
|
||
|
|
"rewards/weighted_rejected": -0.17839965224266052,
|
||
|
|
"step": 170
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0942161737764983,
|
||
|
|
"grad_norm": 55.77580261230469,
|
||
|
|
"learning_rate": 9.322916666666666e-07,
|
||
|
|
"logits/chosen": -0.3392753601074219,
|
||
|
|
"logits/rejected": -0.35816192626953125,
|
||
|
|
"logps/chosen": -278.99530029296875,
|
||
|
|
"logps/rejected": -265.18359375,
|
||
|
|
"logps/weighted_chosen": -2.362103223800659,
|
||
|
|
"logps/weighted_rejected": -2.754711866378784,
|
||
|
|
"loss": 0.6944,
|
||
|
|
"rewards/accuracies": 0.606249988079071,
|
||
|
|
"rewards/chosen": -5.346972465515137,
|
||
|
|
"rewards/margins": 3.5015625953674316,
|
||
|
|
"rewards/rejected": -8.848730087280273,
|
||
|
|
"rewards/weighted_accuracies": 0.578125,
|
||
|
|
"rewards/weighted_chosen": -0.05782318115234375,
|
||
|
|
"rewards/weighted_margins": 0.16480103135108948,
|
||
|
|
"rewards/weighted_rejected": -0.22255554795265198,
|
||
|
|
"step": 180
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.09945040565297043,
|
||
|
|
"grad_norm": 38.015960693359375,
|
||
|
|
"learning_rate": 9.84375e-07,
|
||
|
|
"logits/chosen": -0.3686843812465668,
|
||
|
|
"logits/rejected": -0.4041244387626648,
|
||
|
|
"logps/chosen": -314.3070373535156,
|
||
|
|
"logps/rejected": -276.7484436035156,
|
||
|
|
"logps/weighted_chosen": -2.123486280441284,
|
||
|
|
"logps/weighted_rejected": -2.6261963844299316,
|
||
|
|
"loss": 0.6392,
|
||
|
|
"rewards/accuracies": 0.621874988079071,
|
||
|
|
"rewards/chosen": -8.581738471984863,
|
||
|
|
"rewards/margins": 4.317968845367432,
|
||
|
|
"rewards/rejected": -12.900781631469727,
|
||
|
|
"rewards/weighted_accuracies": 0.621874988079071,
|
||
|
|
"rewards/weighted_chosen": -0.129638671875,
|
||
|
|
"rewards/weighted_margins": 0.2160186767578125,
|
||
|
|
"rewards/weighted_rejected": -0.345590204000473,
|
||
|
|
"step": 190
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.10468463752944256,
|
||
|
|
"grad_norm": 46.52367401123047,
|
||
|
|
"learning_rate": 9.99959085414323e-07,
|
||
|
|
"logits/chosen": -0.4128967225551605,
|
||
|
|
"logits/rejected": -0.4471847414970398,
|
||
|
|
"logps/chosen": -320.0546875,
|
||
|
|
"logps/rejected": -273.11248779296875,
|
||
|
|
"logps/weighted_chosen": -2.5019164085388184,
|
||
|
|
"logps/weighted_rejected": -2.9936890602111816,
|
||
|
|
"loss": 0.6473,
|
||
|
|
"rewards/accuracies": 0.6187499761581421,
|
||
|
|
"rewards/chosen": -10.737597465515137,
|
||
|
|
"rewards/margins": 4.738671779632568,
|
||
|
|
"rewards/rejected": -15.476171493530273,
|
||
|
|
"rewards/weighted_accuracies": 0.6343749761581421,
|
||
|
|
"rewards/weighted_chosen": -0.11443634331226349,
|
||
|
|
"rewards/weighted_margins": 0.2610321044921875,
|
||
|
|
"rewards/weighted_rejected": -0.37534791231155396,
|
||
|
|
"step": 200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.10991886940591468,
|
||
|
|
"grad_norm": 21.238189697265625,
|
||
|
|
"learning_rate": 9.997587035630105e-07,
|
||
|
|
"logits/chosen": -0.4288749694824219,
|
||
|
|
"logits/rejected": -0.4688262939453125,
|
||
|
|
"logps/chosen": -300.0765686035156,
|
||
|
|
"logps/rejected": -304.63751220703125,
|
||
|
|
"logps/weighted_chosen": -2.32427978515625,
|
||
|
|
"logps/weighted_rejected": -3.0592284202575684,
|
||
|
|
"loss": 0.6424,
|
||
|
|
"rewards/accuracies": 0.6468750238418579,
|
||
|
|
"rewards/chosen": -13.117578506469727,
|
||
|
|
"rewards/margins": 7.013671875,
|
||
|
|
"rewards/rejected": -20.133594512939453,
|
||
|
|
"rewards/weighted_accuracies": 0.653124988079071,
|
||
|
|
"rewards/weighted_chosen": -0.21423491835594177,
|
||
|
|
"rewards/weighted_margins": 0.27025145292282104,
|
||
|
|
"rewards/weighted_rejected": -0.4845077395439148,
|
||
|
|
"step": 210
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.11515310128238682,
|
||
|
|
"grad_norm": 24.92041015625,
|
||
|
|
"learning_rate": 9.99391406364405e-07,
|
||
|
|
"logits/chosen": -0.42696380615234375,
|
||
|
|
"logits/rejected": -0.429006963968277,
|
||
|
|
"logps/chosen": -305.4906311035156,
|
||
|
|
"logps/rejected": -288.6312561035156,
|
||
|
|
"logps/weighted_chosen": -2.625018358230591,
|
||
|
|
"logps/weighted_rejected": -3.102160692214966,
|
||
|
|
"loss": 0.6601,
|
||
|
|
"rewards/accuracies": 0.621874988079071,
|
||
|
|
"rewards/chosen": -13.349413871765137,
|
||
|
|
"rewards/margins": 6.373632907867432,
|
||
|
|
"rewards/rejected": -19.72265625,
|
||
|
|
"rewards/weighted_accuracies": 0.6156250238418579,
|
||
|
|
"rewards/weighted_chosen": -0.20062866806983948,
|
||
|
|
"rewards/weighted_margins": 0.316873162984848,
|
||
|
|
"rewards/weighted_rejected": -0.5174545049667358,
|
||
|
|
"step": 220
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.12038733315885894,
|
||
|
|
"grad_norm": 147.95851135253906,
|
||
|
|
"learning_rate": 9.988573164927884e-07,
|
||
|
|
"logits/chosen": -0.3811447024345398,
|
||
|
|
"logits/rejected": -0.4161086976528168,
|
||
|
|
"logps/chosen": -281.33203125,
|
||
|
|
"logps/rejected": -274.234375,
|
||
|
|
"logps/weighted_chosen": -2.32806396484375,
|
||
|
|
"logps/weighted_rejected": -2.6552734375,
|
||
|
|
"loss": 0.7195,
|
||
|
|
"rewards/accuracies": 0.668749988079071,
|
||
|
|
"rewards/chosen": -12.46875,
|
||
|
|
"rewards/margins": 10.046093940734863,
|
||
|
|
"rewards/rejected": -22.515430450439453,
|
||
|
|
"rewards/weighted_accuracies": 0.6187499761581421,
|
||
|
|
"rewards/weighted_chosen": -0.24639587104320526,
|
||
|
|
"rewards/weighted_margins": 0.23908081650733948,
|
||
|
|
"rewards/weighted_rejected": -0.4853073060512543,
|
||
|
|
"step": 230
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.12562156503533106,
|
||
|
|
"grad_norm": 26.882122039794922,
|
||
|
|
"learning_rate": 9.98156612329838e-07,
|
||
|
|
"logits/chosen": -0.4748245179653168,
|
||
|
|
"logits/rejected": -0.5250595211982727,
|
||
|
|
"logps/chosen": -278.16717529296875,
|
||
|
|
"logps/rejected": -306.29376220703125,
|
||
|
|
"logps/weighted_chosen": -2.348803758621216,
|
||
|
|
"logps/weighted_rejected": -2.9455933570861816,
|
||
|
|
"loss": 0.6674,
|
||
|
|
"rewards/accuracies": 0.643750011920929,
|
||
|
|
"rewards/chosen": -13.405566215515137,
|
||
|
|
"rewards/margins": 10.753710746765137,
|
||
|
|
"rewards/rejected": -24.158985137939453,
|
||
|
|
"rewards/weighted_accuracies": 0.625,
|
||
|
|
"rewards/weighted_chosen": -0.14908751845359802,
|
||
|
|
"rewards/weighted_margins": 0.33162689208984375,
|
||
|
|
"rewards/weighted_rejected": -0.48021697998046875,
|
||
|
|
"step": 240
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.13085579691180318,
|
||
|
|
"grad_norm": 86.49760437011719,
|
||
|
|
"learning_rate": 9.97289527905053e-07,
|
||
|
|
"logits/chosen": -0.478302001953125,
|
||
|
|
"logits/rejected": -0.48861923813819885,
|
||
|
|
"logps/chosen": -277.0523376464844,
|
||
|
|
"logps/rejected": -275.80938720703125,
|
||
|
|
"logps/weighted_chosen": -2.61376953125,
|
||
|
|
"logps/weighted_rejected": -2.787853956222534,
|
||
|
|
"loss": 0.7022,
|
||
|
|
"rewards/accuracies": 0.6343749761581421,
|
||
|
|
"rewards/chosen": -12.673730850219727,
|
||
|
|
"rewards/margins": 7.1806640625,
|
||
|
|
"rewards/rejected": -19.852344512939453,
|
||
|
|
"rewards/weighted_accuracies": 0.5687500238418579,
|
||
|
|
"rewards/weighted_chosen": -0.12388916313648224,
|
||
|
|
"rewards/weighted_margins": 0.19627074897289276,
|
||
|
|
"rewards/weighted_rejected": -0.3203796446323395,
|
||
|
|
"step": 250
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1360900287882753,
|
||
|
|
"grad_norm": 19.698871612548828,
|
||
|
|
"learning_rate": 9.962563528175875e-07,
|
||
|
|
"logits/chosen": -0.4065658450126648,
|
||
|
|
"logits/rejected": -0.4432968199253082,
|
||
|
|
"logps/chosen": -310.62890625,
|
||
|
|
"logps/rejected": -281.46405029296875,
|
||
|
|
"logps/weighted_chosen": -2.184094190597534,
|
||
|
|
"logps/weighted_rejected": -3.0492796897888184,
|
||
|
|
"loss": 0.6507,
|
||
|
|
"rewards/accuracies": 0.596875011920929,
|
||
|
|
"rewards/chosen": -11.896581649780273,
|
||
|
|
"rewards/margins": 8.622265815734863,
|
||
|
|
"rewards/rejected": -20.520313262939453,
|
||
|
|
"rewards/weighted_accuracies": 0.606249988079071,
|
||
|
|
"rewards/weighted_chosen": -0.1260833740234375,
|
||
|
|
"rewards/weighted_margins": 0.25025635957717896,
|
||
|
|
"rewards/weighted_rejected": -0.3761749267578125,
|
||
|
|
"step": 260
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.14132426066474746,
|
||
|
|
"grad_norm": 16.363121032714844,
|
||
|
|
"learning_rate": 9.950574321395277e-07,
|
||
|
|
"logits/chosen": -0.42208632826805115,
|
||
|
|
"logits/rejected": -0.4458427429199219,
|
||
|
|
"logps/chosen": -305.9046936035156,
|
||
|
|
"logps/rejected": -286.06561279296875,
|
||
|
|
"logps/weighted_chosen": -2.40838623046875,
|
||
|
|
"logps/weighted_rejected": -2.7938475608825684,
|
||
|
|
"loss": 0.6573,
|
||
|
|
"rewards/accuracies": 0.606249988079071,
|
||
|
|
"rewards/chosen": -16.622364044189453,
|
||
|
|
"rewards/margins": 6.233202934265137,
|
||
|
|
"rewards/rejected": -22.855077743530273,
|
||
|
|
"rewards/weighted_accuracies": 0.6156250238418579,
|
||
|
|
"rewards/weighted_chosen": -0.189697265625,
|
||
|
|
"rewards/weighted_margins": 0.27490538358688354,
|
||
|
|
"rewards/weighted_rejected": -0.4645233154296875,
|
||
|
|
"step": 270
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.14655849254121958,
|
||
|
|
"grad_norm": 54.42692947387695,
|
||
|
|
"learning_rate": 9.936931663006413e-07,
|
||
|
|
"logits/chosen": -0.45263671875,
|
||
|
|
"logits/rejected": -0.44363707304000854,
|
||
|
|
"logps/chosen": -316.171875,
|
||
|
|
"logps/rejected": -303.3656311035156,
|
||
|
|
"logps/weighted_chosen": -2.4659423828125,
|
||
|
|
"logps/weighted_rejected": -3.0541749000549316,
|
||
|
|
"loss": 0.6068,
|
||
|
|
"rewards/accuracies": 0.6656249761581421,
|
||
|
|
"rewards/chosen": -13.402734756469727,
|
||
|
|
"rewards/margins": 10.619824409484863,
|
||
|
|
"rewards/rejected": -24.025390625,
|
||
|
|
"rewards/weighted_accuracies": 0.6875,
|
||
|
|
"rewards/weighted_chosen": -0.05214080959558487,
|
||
|
|
"rewards/weighted_margins": 0.40336912870407104,
|
||
|
|
"rewards/weighted_rejected": -0.455657958984375,
|
||
|
|
"step": 280
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1517927244176917,
|
||
|
|
"grad_norm": 39.20017623901367,
|
||
|
|
"learning_rate": 9.921640109546357e-07,
|
||
|
|
"logits/chosen": -0.42310255765914917,
|
||
|
|
"logits/rejected": -0.48920440673828125,
|
||
|
|
"logps/chosen": -283.7171936035156,
|
||
|
|
"logps/rejected": -278.1859436035156,
|
||
|
|
"logps/weighted_chosen": -2.396167039871216,
|
||
|
|
"logps/weighted_rejected": -3.5881590843200684,
|
||
|
|
"loss": 0.6649,
|
||
|
|
"rewards/accuracies": 0.643750011920929,
|
||
|
|
"rewards/chosen": -16.9111328125,
|
||
|
|
"rewards/margins": 9.704492568969727,
|
||
|
|
"rewards/rejected": -26.62109375,
|
||
|
|
"rewards/weighted_accuracies": 0.6031249761581421,
|
||
|
|
"rewards/weighted_chosen": -0.130279541015625,
|
||
|
|
"rewards/weighted_margins": 0.3882461488246918,
|
||
|
|
"rewards/weighted_rejected": -0.5187179446220398,
|
||
|
|
"step": 290
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.15702695629416383,
|
||
|
|
"grad_norm": 28.03601837158203,
|
||
|
|
"learning_rate": 9.90470476826975e-07,
|
||
|
|
"logits/chosen": -0.485189825296402,
|
||
|
|
"logits/rejected": -0.48862916231155396,
|
||
|
|
"logps/chosen": -289.09765625,
|
||
|
|
"logps/rejected": -297.625,
|
||
|
|
"logps/weighted_chosen": -2.2784485816955566,
|
||
|
|
"logps/weighted_rejected": -2.771862745285034,
|
||
|
|
"loss": 0.6608,
|
||
|
|
"rewards/accuracies": 0.640625,
|
||
|
|
"rewards/chosen": -19.676952362060547,
|
||
|
|
"rewards/margins": 10.679491996765137,
|
||
|
|
"rewards/rejected": -30.360937118530273,
|
||
|
|
"rewards/weighted_accuracies": 0.643750011920929,
|
||
|
|
"rewards/weighted_chosen": -0.1669921875,
|
||
|
|
"rewards/weighted_margins": 0.28967589139938354,
|
||
|
|
"rewards/weighted_rejected": -0.4566032290458679,
|
||
|
|
"step": 300
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.16226118817063595,
|
||
|
|
"grad_norm": 538.50927734375,
|
||
|
|
"learning_rate": 9.886131295443002e-07,
|
||
|
|
"logits/chosen": -0.654278576374054,
|
||
|
|
"logits/rejected": -0.7076683044433594,
|
||
|
|
"logps/chosen": -341.85467529296875,
|
||
|
|
"logps/rejected": -309.89764404296875,
|
||
|
|
"logps/weighted_chosen": -2.629150390625,
|
||
|
|
"logps/weighted_rejected": -2.8698973655700684,
|
||
|
|
"loss": 0.6788,
|
||
|
|
"rewards/accuracies": 0.515625,
|
||
|
|
"rewards/chosen": -60.568748474121094,
|
||
|
|
"rewards/margins": -0.7822265625,
|
||
|
|
"rewards/rejected": -59.785743713378906,
|
||
|
|
"rewards/weighted_accuracies": 0.6156250238418579,
|
||
|
|
"rewards/weighted_chosen": -0.24791869521141052,
|
||
|
|
"rewards/weighted_margins": 0.28089600801467896,
|
||
|
|
"rewards/weighted_rejected": -0.528765857219696,
|
||
|
|
"step": 310
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.16749542004710807,
|
||
|
|
"grad_norm": 34.656883239746094,
|
||
|
|
"learning_rate": 9.865925894455166e-07,
|
||
|
|
"logits/chosen": -0.7003936767578125,
|
||
|
|
"logits/rejected": -0.719250500202179,
|
||
|
|
"logps/chosen": -326.3960876464844,
|
||
|
|
"logps/rejected": -290.3453063964844,
|
||
|
|
"logps/weighted_chosen": -2.553356885910034,
|
||
|
|
"logps/weighted_rejected": -3.10992431640625,
|
||
|
|
"loss": 0.7054,
|
||
|
|
"rewards/accuracies": 0.6000000238418579,
|
||
|
|
"rewards/chosen": -26.532812118530273,
|
||
|
|
"rewards/margins": 6.3564453125,
|
||
|
|
"rewards/rejected": -32.88788986206055,
|
||
|
|
"rewards/weighted_accuracies": 0.609375,
|
||
|
|
"rewards/weighted_chosen": -0.18020018935203552,
|
||
|
|
"rewards/weighted_margins": 0.3489990234375,
|
||
|
|
"rewards/weighted_rejected": -0.5293639898300171,
|
||
|
|
"step": 320
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.17272965192358022,
|
||
|
|
"grad_norm": 61.1888542175293,
|
||
|
|
"learning_rate": 9.84409531374603e-07,
|
||
|
|
"logits/chosen": -0.6631911993026733,
|
||
|
|
"logits/rejected": -0.6448425054550171,
|
||
|
|
"logps/chosen": -324.87579345703125,
|
||
|
|
"logps/rejected": -291.71875,
|
||
|
|
"logps/weighted_chosen": -2.5611815452575684,
|
||
|
|
"logps/weighted_rejected": -3.060229539871216,
|
||
|
|
"loss": 0.6449,
|
||
|
|
"rewards/accuracies": 0.668749988079071,
|
||
|
|
"rewards/chosen": -20.070018768310547,
|
||
|
|
"rewards/margins": 8.8251953125,
|
||
|
|
"rewards/rejected": -28.8876953125,
|
||
|
|
"rewards/weighted_accuracies": 0.6625000238418579,
|
||
|
|
"rewards/weighted_chosen": -0.140888974070549,
|
||
|
|
"rewards/weighted_margins": 0.3719635009765625,
|
||
|
|
"rewards/weighted_rejected": -0.5127013921737671,
|
||
|
|
"step": 330
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.17796388380005235,
|
||
|
|
"grad_norm": 27.2315673828125,
|
||
|
|
"learning_rate": 9.820646844552219e-07,
|
||
|
|
"logits/chosen": -0.6496349573135376,
|
||
|
|
"logits/rejected": -0.7006805539131165,
|
||
|
|
"logps/chosen": -295.5882873535156,
|
||
|
|
"logps/rejected": -297.4906311035156,
|
||
|
|
"logps/weighted_chosen": -2.6988892555236816,
|
||
|
|
"logps/weighted_rejected": -2.898681640625,
|
||
|
|
"loss": 0.6788,
|
||
|
|
"rewards/accuracies": 0.6875,
|
||
|
|
"rewards/chosen": -19.176855087280273,
|
||
|
|
"rewards/margins": 12.649316787719727,
|
||
|
|
"rewards/rejected": -31.822460174560547,
|
||
|
|
"rewards/weighted_accuracies": 0.659375011920929,
|
||
|
|
"rewards/weighted_chosen": -0.23918533325195312,
|
||
|
|
"rewards/weighted_margins": 0.2961669862270355,
|
||
|
|
"rewards/weighted_rejected": -0.5351837277412415,
|
||
|
|
"step": 340
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.18319811567652447,
|
||
|
|
"grad_norm": 24.073888778686523,
|
||
|
|
"learning_rate": 9.795588318471964e-07,
|
||
|
|
"logits/chosen": -0.7137314081192017,
|
||
|
|
"logits/rejected": -0.7225399017333984,
|
||
|
|
"logps/chosen": -277.8890686035156,
|
||
|
|
"logps/rejected": -304.59063720703125,
|
||
|
|
"logps/weighted_chosen": -2.4300780296325684,
|
||
|
|
"logps/weighted_rejected": -2.771411180496216,
|
||
|
|
"loss": 0.6675,
|
||
|
|
"rewards/accuracies": 0.621874988079071,
|
||
|
|
"rewards/chosen": -18.445703506469727,
|
||
|
|
"rewards/margins": 9.8720703125,
|
||
|
|
"rewards/rejected": -28.31640625,
|
||
|
|
"rewards/weighted_accuracies": 0.637499988079071,
|
||
|
|
"rewards/weighted_chosen": -0.20159301161766052,
|
||
|
|
"rewards/weighted_margins": 0.2779785096645355,
|
||
|
|
"rewards/weighted_rejected": -0.47947996854782104,
|
||
|
|
"step": 350
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1884323475529966,
|
||
|
|
"grad_norm": 12.750471115112305,
|
||
|
|
"learning_rate": 9.768928104849415e-07,
|
||
|
|
"logits/chosen": -0.7212737798690796,
|
||
|
|
"logits/rejected": -0.7225433588027954,
|
||
|
|
"logps/chosen": -299.53594970703125,
|
||
|
|
"logps/rejected": -275.5718688964844,
|
||
|
|
"logps/weighted_chosen": -2.667529344558716,
|
||
|
|
"logps/weighted_rejected": -2.800830125808716,
|
||
|
|
"loss": 0.6916,
|
||
|
|
"rewards/accuracies": 0.612500011920929,
|
||
|
|
"rewards/chosen": -16.816015243530273,
|
||
|
|
"rewards/margins": 9.876562118530273,
|
||
|
|
"rewards/rejected": -26.690235137939453,
|
||
|
|
"rewards/weighted_accuracies": 0.643750011920929,
|
||
|
|
"rewards/weighted_chosen": -0.15018615126609802,
|
||
|
|
"rewards/weighted_margins": 0.301962286233902,
|
||
|
|
"rewards/weighted_rejected": -0.45206451416015625,
|
||
|
|
"step": 360
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.19366657942946872,
|
||
|
|
"grad_norm": 29.441747665405273,
|
||
|
|
"learning_rate": 9.740675107979355e-07,
|
||
|
|
"logits/chosen": -0.6865798830986023,
|
||
|
|
"logits/rejected": -0.7117553949356079,
|
||
|
|
"logps/chosen": -331.06561279296875,
|
||
|
|
"logps/rejected": -300.31719970703125,
|
||
|
|
"logps/weighted_chosen": -1.9907715320587158,
|
||
|
|
"logps/weighted_rejected": -2.932177782058716,
|
||
|
|
"loss": 0.6819,
|
||
|
|
"rewards/accuracies": 0.6499999761581421,
|
||
|
|
"rewards/chosen": -16.022266387939453,
|
||
|
|
"rewards/margins": 9.630078315734863,
|
||
|
|
"rewards/rejected": -25.654491424560547,
|
||
|
|
"rewards/weighted_accuracies": 0.640625,
|
||
|
|
"rewards/weighted_chosen": -0.18877258896827698,
|
||
|
|
"rewards/weighted_margins": 0.2808380126953125,
|
||
|
|
"rewards/weighted_rejected": -0.4699081480503082,
|
||
|
|
"step": 370
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.19890081130594087,
|
||
|
|
"grad_norm": 29.483524322509766,
|
||
|
|
"learning_rate": 9.71083876413323e-07,
|
||
|
|
"logits/chosen": -0.6637862920761108,
|
||
|
|
"logits/rejected": -0.669873058795929,
|
||
|
|
"logps/chosen": -322.3882751464844,
|
||
|
|
"logps/rejected": -300.85858154296875,
|
||
|
|
"logps/weighted_chosen": -2.189379930496216,
|
||
|
|
"logps/weighted_rejected": -2.9217162132263184,
|
||
|
|
"loss": 0.6846,
|
||
|
|
"rewards/accuracies": 0.6156250238418579,
|
||
|
|
"rewards/chosen": -20.004688262939453,
|
||
|
|
"rewards/margins": 11.246289253234863,
|
||
|
|
"rewards/rejected": -31.24609375,
|
||
|
|
"rewards/weighted_accuracies": 0.6000000238418579,
|
||
|
|
"rewards/weighted_chosen": -0.22467346489429474,
|
||
|
|
"rewards/weighted_margins": 0.2720580995082855,
|
||
|
|
"rewards/weighted_rejected": -0.4967102110385895,
|
||
|
|
"step": 380
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.204135043182413,
|
||
|
|
"grad_norm": 20.563907623291016,
|
||
|
|
"learning_rate": 9.67942903840751e-07,
|
||
|
|
"logits/chosen": -0.7051689028739929,
|
||
|
|
"logits/rejected": -0.7537201046943665,
|
||
|
|
"logps/chosen": -324.1015625,
|
||
|
|
"logps/rejected": -310.375,
|
||
|
|
"logps/weighted_chosen": -2.397631883621216,
|
||
|
|
"logps/weighted_rejected": -2.950610399246216,
|
||
|
|
"loss": 0.6478,
|
||
|
|
"rewards/accuracies": 0.7124999761581421,
|
||
|
|
"rewards/chosen": -19.788671493530273,
|
||
|
|
"rewards/margins": 16.317577362060547,
|
||
|
|
"rewards/rejected": -36.111328125,
|
||
|
|
"rewards/weighted_accuracies": 0.6625000238418579,
|
||
|
|
"rewards/weighted_chosen": -0.22498169541358948,
|
||
|
|
"rewards/weighted_margins": 0.3520751893520355,
|
||
|
|
"rewards/weighted_rejected": -0.5770629644393921,
|
||
|
|
"step": 390
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2093692750588851,
|
||
|
|
"grad_norm": 23.1771183013916,
|
||
|
|
"learning_rate": 9.646456421395447e-07,
|
||
|
|
"logits/chosen": -0.7504974603652954,
|
||
|
|
"logits/rejected": -0.7628723382949829,
|
||
|
|
"logps/chosen": -341.2171936035156,
|
||
|
|
"logps/rejected": -343.9375,
|
||
|
|
"logps/weighted_chosen": -2.2680420875549316,
|
||
|
|
"logps/weighted_rejected": -3.0065674781799316,
|
||
|
|
"loss": 0.6746,
|
||
|
|
"rewards/accuracies": 0.675000011920929,
|
||
|
|
"rewards/chosen": -22.944530487060547,
|
||
|
|
"rewards/margins": 17.315624237060547,
|
||
|
|
"rewards/rejected": -40.2587890625,
|
||
|
|
"rewards/weighted_accuracies": 0.621874988079071,
|
||
|
|
"rewards/weighted_chosen": -0.20337525010108948,
|
||
|
|
"rewards/weighted_margins": 0.216084286570549,
|
||
|
|
"rewards/weighted_rejected": -0.4196624755859375,
|
||
|
|
"step": 400
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.21460350693535724,
|
||
|
|
"grad_norm": 14.420520782470703,
|
||
|
|
"learning_rate": 9.611931925683266e-07,
|
||
|
|
"logits/chosen": -0.7154334783554077,
|
||
|
|
"logits/rejected": -0.7491073608398438,
|
||
|
|
"logps/chosen": -331.38983154296875,
|
||
|
|
"logps/rejected": -303.3890686035156,
|
||
|
|
"logps/weighted_chosen": -2.190844774246216,
|
||
|
|
"logps/weighted_rejected": -2.7472167015075684,
|
||
|
|
"loss": 0.6135,
|
||
|
|
"rewards/accuracies": 0.659375011920929,
|
||
|
|
"rewards/chosen": -25.111621856689453,
|
||
|
|
"rewards/margins": 14.543554306030273,
|
||
|
|
"rewards/rejected": -39.658592224121094,
|
||
|
|
"rewards/weighted_accuracies": 0.6625000238418579,
|
||
|
|
"rewards/weighted_chosen": -0.21762695908546448,
|
||
|
|
"rewards/weighted_margins": 0.36674195528030396,
|
||
|
|
"rewards/weighted_rejected": -0.584503173828125,
|
||
|
|
"step": 410
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.21983773881182936,
|
||
|
|
"grad_norm": 27.429603576660156,
|
||
|
|
"learning_rate": 9.575867082172085e-07,
|
||
|
|
"logits/chosen": -0.7379547357559204,
|
||
|
|
"logits/rejected": -0.7826202511787415,
|
||
|
|
"logps/chosen": -337.46405029296875,
|
||
|
|
"logps/rejected": -320.23907470703125,
|
||
|
|
"logps/weighted_chosen": -2.6387085914611816,
|
||
|
|
"logps/weighted_rejected": -2.712329149246216,
|
||
|
|
"loss": 0.6716,
|
||
|
|
"rewards/accuracies": 0.653124988079071,
|
||
|
|
"rewards/chosen": -32.554298400878906,
|
||
|
|
"rewards/margins": 17.513866424560547,
|
||
|
|
"rewards/rejected": -50.060157775878906,
|
||
|
|
"rewards/weighted_accuracies": 0.6468750238418579,
|
||
|
|
"rewards/weighted_chosen": -0.35613709688186646,
|
||
|
|
"rewards/weighted_margins": 0.38392335176467896,
|
||
|
|
"rewards/weighted_rejected": -0.7401062250137329,
|
||
|
|
"step": 420
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.22507197068830148,
|
||
|
|
"grad_norm": 18.97144889831543,
|
||
|
|
"learning_rate": 9.538273936226673e-07,
|
||
|
|
"logits/chosen": -0.778491199016571,
|
||
|
|
"logits/rejected": -0.811004638671875,
|
||
|
|
"logps/chosen": -292.83984375,
|
||
|
|
"logps/rejected": -304.05352783203125,
|
||
|
|
"logps/weighted_chosen": -2.796630859375,
|
||
|
|
"logps/weighted_rejected": -3.2444825172424316,
|
||
|
|
"loss": 0.6544,
|
||
|
|
"rewards/accuracies": 0.6156250238418579,
|
||
|
|
"rewards/chosen": -27.327733993530273,
|
||
|
|
"rewards/margins": 12.26318359375,
|
||
|
|
"rewards/rejected": -39.58984375,
|
||
|
|
"rewards/weighted_accuracies": 0.643750011920929,
|
||
|
|
"rewards/weighted_chosen": -0.21069030463695526,
|
||
|
|
"rewards/weighted_margins": 0.3678832948207855,
|
||
|
|
"rewards/weighted_rejected": -0.57806396484375,
|
||
|
|
"step": 430
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.23030620256477363,
|
||
|
|
"grad_norm": 25.43462371826172,
|
||
|
|
"learning_rate": 9.499165043652391e-07,
|
||
|
|
"logits/chosen": -0.7674010992050171,
|
||
|
|
"logits/rejected": -0.7686828374862671,
|
||
|
|
"logps/chosen": -319.55859375,
|
||
|
|
"logps/rejected": -309.03436279296875,
|
||
|
|
"logps/weighted_chosen": -2.82305908203125,
|
||
|
|
"logps/weighted_rejected": -2.993237257003784,
|
||
|
|
"loss": 0.631,
|
||
|
|
"rewards/accuracies": 0.637499988079071,
|
||
|
|
"rewards/chosen": -28.386133193969727,
|
||
|
|
"rewards/margins": 13.9384765625,
|
||
|
|
"rewards/rejected": -42.326072692871094,
|
||
|
|
"rewards/weighted_accuracies": 0.6343749761581421,
|
||
|
|
"rewards/weighted_chosen": -0.26903897523880005,
|
||
|
|
"rewards/weighted_margins": 0.353515625,
|
||
|
|
"rewards/weighted_rejected": -0.622546374797821,
|
||
|
|
"step": 440
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.23554043444124576,
|
||
|
|
"grad_norm": 65.78443908691406,
|
||
|
|
"learning_rate": 9.458553466501665e-07,
|
||
|
|
"logits/chosen": -0.8066772222518921,
|
||
|
|
"logits/rejected": -0.8363037109375,
|
||
|
|
"logps/chosen": -314.7945251464844,
|
||
|
|
"logps/rejected": -287.65313720703125,
|
||
|
|
"logps/weighted_chosen": -2.8233399391174316,
|
||
|
|
"logps/weighted_rejected": -3.013622999191284,
|
||
|
|
"loss": 0.6831,
|
||
|
|
"rewards/accuracies": 0.640625,
|
||
|
|
"rewards/chosen": -28.642578125,
|
||
|
|
"rewards/margins": 15.162694931030273,
|
||
|
|
"rewards/rejected": -43.80976486206055,
|
||
|
|
"rewards/weighted_accuracies": 0.684374988079071,
|
||
|
|
"rewards/weighted_chosen": -0.3802246153354645,
|
||
|
|
"rewards/weighted_margins": 0.3342132568359375,
|
||
|
|
"rewards/weighted_rejected": -0.7139984369277954,
|
||
|
|
"step": 450
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.24077466631771788,
|
||
|
|
"grad_norm": 13.290085792541504,
|
||
|
|
"learning_rate": 9.416452768711366e-07,
|
||
|
|
"logits/chosen": -0.7957550287246704,
|
||
|
|
"logits/rejected": -0.8287414312362671,
|
||
|
|
"logps/chosen": -323.0093688964844,
|
||
|
|
"logps/rejected": -306.98907470703125,
|
||
|
|
"logps/weighted_chosen": -2.544872999191284,
|
||
|
|
"logps/weighted_rejected": -3.100903272628784,
|
||
|
|
"loss": 0.6759,
|
||
|
|
"rewards/accuracies": 0.640625,
|
||
|
|
"rewards/chosen": -30.906835556030273,
|
||
|
|
"rewards/margins": 16.355859756469727,
|
||
|
|
"rewards/rejected": -47.24980545043945,
|
||
|
|
"rewards/weighted_accuracies": 0.643750011920929,
|
||
|
|
"rewards/weighted_chosen": -0.343759149312973,
|
||
|
|
"rewards/weighted_margins": 0.40337830781936646,
|
||
|
|
"rewards/weighted_rejected": -0.747100830078125,
|
||
|
|
"step": 460
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.24600889819419,
|
||
|
|
"grad_norm": 21.766939163208008,
|
||
|
|
"learning_rate": 9.372877011572557e-07,
|
||
|
|
"logits/chosen": -0.7200164794921875,
|
||
|
|
"logits/rejected": -0.742279052734375,
|
||
|
|
"logps/chosen": -342.75079345703125,
|
||
|
|
"logps/rejected": -318.60626220703125,
|
||
|
|
"logps/weighted_chosen": -2.5311522483825684,
|
||
|
|
"logps/weighted_rejected": -2.990124464035034,
|
||
|
|
"loss": 0.63,
|
||
|
|
"rewards/accuracies": 0.643750011920929,
|
||
|
|
"rewards/chosen": -32.077247619628906,
|
||
|
|
"rewards/margins": 11.883398056030273,
|
||
|
|
"rewards/rejected": -43.959373474121094,
|
||
|
|
"rewards/weighted_accuracies": 0.6656249761581421,
|
||
|
|
"rewards/weighted_chosen": -0.30525511503219604,
|
||
|
|
"rewards/weighted_margins": 0.45678406953811646,
|
||
|
|
"rewards/weighted_rejected": -0.7624969482421875,
|
||
|
|
"step": 470
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2512431300706621,
|
||
|
|
"grad_norm": 28.76239776611328,
|
||
|
|
"learning_rate": 9.327840749034141e-07,
|
||
|
|
"logits/chosen": -0.7930053472518921,
|
||
|
|
"logits/rejected": -0.8311401605606079,
|
||
|
|
"logps/chosen": -316.79998779296875,
|
||
|
|
"logps/rejected": -326.5062561035156,
|
||
|
|
"logps/weighted_chosen": -2.4120116233825684,
|
||
|
|
"logps/weighted_rejected": -3.591870069503784,
|
||
|
|
"loss": 0.6639,
|
||
|
|
"rewards/accuracies": 0.659375011920929,
|
||
|
|
"rewards/chosen": -29.640039443969727,
|
||
|
|
"rewards/margins": 20.350976943969727,
|
||
|
|
"rewards/rejected": -49.9853515625,
|
||
|
|
"rewards/weighted_accuracies": 0.6812499761581421,
|
||
|
|
"rewards/weighted_chosen": -0.27521055936813354,
|
||
|
|
"rewards/weighted_margins": 0.4522705078125,
|
||
|
|
"rewards/weighted_rejected": -0.7274719476699829,
|
||
|
|
"step": 480
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2564773619471343,
|
||
|
|
"grad_norm": 36.629127502441406,
|
||
|
|
"learning_rate": 9.281359022841965e-07,
|
||
|
|
"logits/chosen": -0.72747802734375,
|
||
|
|
"logits/rejected": -0.7426910400390625,
|
||
|
|
"logps/chosen": -308.5406188964844,
|
||
|
|
"logps/rejected": -300.71484375,
|
||
|
|
"logps/weighted_chosen": -2.6044554710388184,
|
||
|
|
"logps/weighted_rejected": -3.862866163253784,
|
||
|
|
"loss": 0.6178,
|
||
|
|
"rewards/accuracies": 0.6499999761581421,
|
||
|
|
"rewards/chosen": -33.54804611206055,
|
||
|
|
"rewards/margins": 22.3515625,
|
||
|
|
"rewards/rejected": -55.88496017456055,
|
||
|
|
"rewards/weighted_accuracies": 0.6968749761581421,
|
||
|
|
"rewards/weighted_chosen": -0.38198548555374146,
|
||
|
|
"rewards/weighted_margins": 0.525561511516571,
|
||
|
|
"rewards/weighted_rejected": -0.9073349237442017,
|
||
|
|
"step": 490
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.26171159382360637,
|
||
|
|
"grad_norm": 20.296154022216797,
|
||
|
|
"learning_rate": 9.233447357514989e-07,
|
||
|
|
"logits/chosen": -0.7092193365097046,
|
||
|
|
"logits/rejected": -0.751629650592804,
|
||
|
|
"logps/chosen": -337.10467529296875,
|
||
|
|
"logps/rejected": -328.71875,
|
||
|
|
"logps/weighted_chosen": -3.054370164871216,
|
||
|
|
"logps/weighted_rejected": -3.5334715843200684,
|
||
|
|
"loss": 0.6534,
|
||
|
|
"rewards/accuracies": 0.659375011920929,
|
||
|
|
"rewards/chosen": -39.15234375,
|
||
|
|
"rewards/margins": 19.770116806030273,
|
||
|
|
"rewards/rejected": -58.90898513793945,
|
||
|
|
"rewards/weighted_accuracies": 0.643750011920929,
|
||
|
|
"rewards/weighted_chosen": -0.470510870218277,
|
||
|
|
"rewards/weighted_margins": 0.566607654094696,
|
||
|
|
"rewards/weighted_rejected": -1.0376465320587158,
|
||
|
|
"step": 500
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.26171159382360637,
|
||
|
|
"eval_logits/chosen": -0.8052441477775574,
|
||
|
|
"eval_logits/rejected": -0.8225547075271606,
|
||
|
|
"eval_logps/chosen": -333.44000244140625,
|
||
|
|
"eval_logps/rejected": -331.98199462890625,
|
||
|
|
"eval_logps/weighted_chosen": -2.756896734237671,
|
||
|
|
"eval_logps/weighted_rejected": -3.441680908203125,
|
||
|
|
"eval_loss": 0.6561886668205261,
|
||
|
|
"eval_rewards/accuracies": 0.6370000243186951,
|
||
|
|
"eval_rewards/chosen": -44.67877960205078,
|
||
|
|
"eval_rewards/margins": 19.602703094482422,
|
||
|
|
"eval_rewards/rejected": -64.27362823486328,
|
||
|
|
"eval_rewards/weighted_accuracies": 0.6445000171661377,
|
||
|
|
"eval_rewards/weighted_chosen": -0.485819548368454,
|
||
|
|
"eval_rewards/weighted_margins": 0.4620407819747925,
|
||
|
|
"eval_rewards/weighted_rejected": -0.9478604793548584,
|
||
|
|
"eval_runtime": 1263.3333,
|
||
|
|
"eval_samples_per_second": 1.583,
|
||
|
|
"eval_steps_per_second": 0.396,
|
||
|
|
"step": 500
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2669458257000785,
|
||
|
|
"grad_norm": 44.70832824707031,
|
||
|
|
"learning_rate": 9.184121755160232e-07,
|
||
|
|
"logits/chosen": -0.7849181890487671,
|
||
|
|
"logits/rejected": -0.8171790838241577,
|
||
|
|
"logps/chosen": -344.84454345703125,
|
||
|
|
"logps/rejected": -356.3671875,
|
||
|
|
"logps/weighted_chosen": -2.9210448265075684,
|
||
|
|
"logps/weighted_rejected": -3.3969483375549316,
|
||
|
|
"loss": 0.6735,
|
||
|
|
"rewards/accuracies": 0.6468750238418579,
|
||
|
|
"rewards/chosen": -43.604881286621094,
|
||
|
|
"rewards/margins": 24.157032012939453,
|
||
|
|
"rewards/rejected": -67.75703430175781,
|
||
|
|
"rewards/weighted_accuracies": 0.643750011920929,
|
||
|
|
"rewards/weighted_chosen": -0.43794554471969604,
|
||
|
|
"rewards/weighted_margins": 0.407052606344223,
|
||
|
|
"rewards/weighted_rejected": -0.8448547124862671,
|
||
|
|
"step": 510
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2721800575765506,
|
||
|
|
"grad_norm": 34.907981872558594,
|
||
|
|
"learning_rate": 9.133398690128193e-07,
|
||
|
|
"logits/chosen": -0.8243468999862671,
|
||
|
|
"logits/rejected": -0.852618396282196,
|
||
|
|
"logps/chosen": -370.2640686035156,
|
||
|
|
"logps/rejected": -357.4296875,
|
||
|
|
"logps/weighted_chosen": -2.7059326171875,
|
||
|
|
"logps/weighted_rejected": -3.559033155441284,
|
||
|
|
"loss": 0.6233,
|
||
|
|
"rewards/accuracies": 0.6468750238418579,
|
||
|
|
"rewards/chosen": -48.807029724121094,
|
||
|
|
"rewards/margins": 28.050586700439453,
|
||
|
|
"rewards/rejected": -76.85234069824219,
|
||
|
|
"rewards/weighted_accuracies": 0.6875,
|
||
|
|
"rewards/weighted_chosen": -0.3096374571323395,
|
||
|
|
"rewards/weighted_margins": 0.5285431146621704,
|
||
|
|
"rewards/weighted_rejected": -0.838287353515625,
|
||
|
|
"step": 520
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.27741428945302277,
|
||
|
|
"grad_norm": 17.412511825561523,
|
||
|
|
"learning_rate": 9.081295103510554e-07,
|
||
|
|
"logits/chosen": -0.7943557500839233,
|
||
|
|
"logits/rejected": -0.8541763424873352,
|
||
|
|
"logps/chosen": -339.65313720703125,
|
||
|
|
"logps/rejected": -351.77813720703125,
|
||
|
|
"logps/weighted_chosen": -2.331298828125,
|
||
|
|
"logps/weighted_rejected": -3.5838379859924316,
|
||
|
|
"loss": 0.5587,
|
||
|
|
"rewards/accuracies": 0.65625,
|
||
|
|
"rewards/chosen": -55.494140625,
|
||
|
|
"rewards/margins": 26.642187118530273,
|
||
|
|
"rewards/rejected": -82.14140319824219,
|
||
|
|
"rewards/weighted_accuracies": 0.71875,
|
||
|
|
"rewards/weighted_chosen": -0.33439940214157104,
|
||
|
|
"rewards/weighted_margins": 0.7252563238143921,
|
||
|
|
"rewards/weighted_rejected": -1.0597717761993408,
|
||
|
|
"step": 530
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2826485213294949,
|
||
|
|
"grad_norm": 30.677711486816406,
|
||
|
|
"learning_rate": 9.027828397481989e-07,
|
||
|
|
"logits/chosen": -0.7925201654434204,
|
||
|
|
"logits/rejected": -0.8262939453125,
|
||
|
|
"logps/chosen": -318.59063720703125,
|
||
|
|
"logps/rejected": -337.55157470703125,
|
||
|
|
"logps/weighted_chosen": -3.059436082839966,
|
||
|
|
"logps/weighted_rejected": -3.750012159347534,
|
||
|
|
"loss": 0.6464,
|
||
|
|
"rewards/accuracies": 0.6468750238418579,
|
||
|
|
"rewards/chosen": -57.99492263793945,
|
||
|
|
"rewards/margins": 24.116796493530273,
|
||
|
|
"rewards/rejected": -82.107421875,
|
||
|
|
"rewards/weighted_accuracies": 0.637499988079071,
|
||
|
|
"rewards/weighted_chosen": -0.507769763469696,
|
||
|
|
"rewards/weighted_margins": 0.4883270263671875,
|
||
|
|
"rewards/weighted_rejected": -0.995800793170929,
|
||
|
|
"step": 540
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.287882753205967,
|
||
|
|
"grad_norm": 34.808658599853516,
|
||
|
|
"learning_rate": 8.973016429487988e-07,
|
||
|
|
"logits/chosen": -0.8280746340751648,
|
||
|
|
"logits/rejected": -0.8393570184707642,
|
||
|
|
"logps/chosen": -340.02032470703125,
|
||
|
|
"logps/rejected": -340.90625,
|
||
|
|
"logps/weighted_chosen": -3.016833543777466,
|
||
|
|
"logps/weighted_rejected": -3.3585205078125,
|
||
|
|
"loss": 0.6373,
|
||
|
|
"rewards/accuracies": 0.6937500238418579,
|
||
|
|
"rewards/chosen": -58.115234375,
|
||
|
|
"rewards/margins": 28.575389862060547,
|
||
|
|
"rewards/rejected": -86.7035140991211,
|
||
|
|
"rewards/weighted_accuracies": 0.6781250238418579,
|
||
|
|
"rewards/weighted_chosen": -0.5330657958984375,
|
||
|
|
"rewards/weighted_margins": 0.48836976289749146,
|
||
|
|
"rewards/weighted_rejected": -1.0212554931640625,
|
||
|
|
"step": 550
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.29311698508243916,
|
||
|
|
"grad_norm": 21.559553146362305,
|
||
|
|
"learning_rate": 8.916877506280601e-07,
|
||
|
|
"logits/chosen": -0.8576828241348267,
|
||
|
|
"logits/rejected": -0.850115954875946,
|
||
|
|
"logps/chosen": -343.0625,
|
||
|
|
"logps/rejected": -340.73126220703125,
|
||
|
|
"logps/weighted_chosen": -3.004504442214966,
|
||
|
|
"logps/weighted_rejected": -3.3214111328125,
|
||
|
|
"loss": 0.6493,
|
||
|
|
"rewards/accuracies": 0.637499988079071,
|
||
|
|
"rewards/chosen": -62.214454650878906,
|
||
|
|
"rewards/margins": 24.381053924560547,
|
||
|
|
"rewards/rejected": -86.59492492675781,
|
||
|
|
"rewards/weighted_accuracies": 0.659375011920929,
|
||
|
|
"rewards/weighted_chosen": -0.5099731683731079,
|
||
|
|
"rewards/weighted_margins": 0.535810112953186,
|
||
|
|
"rewards/weighted_rejected": -1.0458984375,
|
||
|
|
"step": 560
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.29835121695891126,
|
||
|
|
"grad_norm": 16.77034568786621,
|
||
|
|
"learning_rate": 8.85943037780415e-07,
|
||
|
|
"logits/chosen": -0.901629626750946,
|
||
|
|
"logits/rejected": -0.9031143188476562,
|
||
|
|
"logps/chosen": -347.7562561035156,
|
||
|
|
"logps/rejected": -320.90936279296875,
|
||
|
|
"logps/weighted_chosen": -2.9189209938049316,
|
||
|
|
"logps/weighted_rejected": -3.346874952316284,
|
||
|
|
"loss": 0.6796,
|
||
|
|
"rewards/accuracies": 0.6187499761581421,
|
||
|
|
"rewards/chosen": -61.52226638793945,
|
||
|
|
"rewards/margins": 17.764842987060547,
|
||
|
|
"rewards/rejected": -79.3128890991211,
|
||
|
|
"rewards/weighted_accuracies": 0.6499999761581421,
|
||
|
|
"rewards/weighted_chosen": -0.5591338872909546,
|
||
|
|
"rewards/weighted_margins": 0.44241029024124146,
|
||
|
|
"rewards/weighted_rejected": -1.00177001953125,
|
||
|
|
"step": 570
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3035854488353834,
|
||
|
|
"grad_norm": 23.179088592529297,
|
||
|
|
"learning_rate": 8.800694230932884e-07,
|
||
|
|
"logits/chosen": -0.808392345905304,
|
||
|
|
"logits/rejected": -0.8254486322402954,
|
||
|
|
"logps/chosen": -345.52032470703125,
|
||
|
|
"logps/rejected": -338.59844970703125,
|
||
|
|
"logps/weighted_chosen": -2.4705810546875,
|
||
|
|
"logps/weighted_rejected": -3.031982421875,
|
||
|
|
"loss": 0.6672,
|
||
|
|
"rewards/accuracies": 0.6000000238418579,
|
||
|
|
"rewards/chosen": -56.009376525878906,
|
||
|
|
"rewards/margins": 17.356250762939453,
|
||
|
|
"rewards/rejected": -73.3921890258789,
|
||
|
|
"rewards/weighted_accuracies": 0.643750011920929,
|
||
|
|
"rewards/weighted_chosen": -0.3469276428222656,
|
||
|
|
"rewards/weighted_margins": 0.38211363554000854,
|
||
|
|
"rewards/weighted_rejected": -0.7289062738418579,
|
||
|
|
"step": 580
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.30881968071185556,
|
||
|
|
"grad_norm": 16.077539443969727,
|
||
|
|
"learning_rate": 8.740688683062723e-07,
|
||
|
|
"logits/chosen": -0.8602691888809204,
|
||
|
|
"logits/rejected": -0.874432384967804,
|
||
|
|
"logps/chosen": -382.03436279296875,
|
||
|
|
"logps/rejected": -349.27655029296875,
|
||
|
|
"logps/weighted_chosen": -2.4807372093200684,
|
||
|
|
"logps/weighted_rejected": -3.101879835128784,
|
||
|
|
"loss": 0.6615,
|
||
|
|
"rewards/accuracies": 0.643750011920929,
|
||
|
|
"rewards/chosen": -52.95586013793945,
|
||
|
|
"rewards/margins": 20.580469131469727,
|
||
|
|
"rewards/rejected": -73.5445327758789,
|
||
|
|
"rewards/weighted_accuracies": 0.659375011920929,
|
||
|
|
"rewards/weighted_chosen": -0.31566160917282104,
|
||
|
|
"rewards/weighted_margins": 0.39473265409469604,
|
||
|
|
"rewards/weighted_rejected": -0.7103912234306335,
|
||
|
|
"step": 590
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.31405391258832765,
|
||
|
|
"grad_norm": 17.0419864654541,
|
||
|
|
"learning_rate": 8.679433775559215e-07,
|
||
|
|
"logits/chosen": -0.8191520571708679,
|
||
|
|
"logits/rejected": -0.8663116693496704,
|
||
|
|
"logps/chosen": -379.78125,
|
||
|
|
"logps/rejected": -361.57501220703125,
|
||
|
|
"logps/weighted_chosen": -2.305920362472534,
|
||
|
|
"logps/weighted_rejected": -3.3094482421875,
|
||
|
|
"loss": 0.6241,
|
||
|
|
"rewards/accuracies": 0.6499999761581421,
|
||
|
|
"rewards/chosen": -55.09687423706055,
|
||
|
|
"rewards/margins": 20.994531631469727,
|
||
|
|
"rewards/rejected": -76.0738296508789,
|
||
|
|
"rewards/weighted_accuracies": 0.653124988079071,
|
||
|
|
"rewards/weighted_chosen": -0.31669920682907104,
|
||
|
|
"rewards/weighted_margins": 0.4563964903354645,
|
||
|
|
"rewards/weighted_rejected": -0.7728790044784546,
|
||
|
|
"step": 600
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3192881444647998,
|
||
|
|
"grad_norm": 30.722089767456055,
|
||
|
|
"learning_rate": 8.616949967063871e-07,
|
||
|
|
"logits/chosen": -0.7851959466934204,
|
||
|
|
"logits/rejected": -0.82568359375,
|
||
|
|
"logps/chosen": -323.5859375,
|
||
|
|
"logps/rejected": -338.609375,
|
||
|
|
"logps/weighted_chosen": -2.7684326171875,
|
||
|
|
"logps/weighted_rejected": -3.191943407058716,
|
||
|
|
"loss": 0.6918,
|
||
|
|
"rewards/accuracies": 0.684374988079071,
|
||
|
|
"rewards/chosen": -55.26250076293945,
|
||
|
|
"rewards/margins": 22.568164825439453,
|
||
|
|
"rewards/rejected": -77.8238296508789,
|
||
|
|
"rewards/weighted_accuracies": 0.625,
|
||
|
|
"rewards/weighted_chosen": -0.423666387796402,
|
||
|
|
"rewards/weighted_margins": 0.310333251953125,
|
||
|
|
"rewards/weighted_rejected": -0.733630359172821,
|
||
|
|
"step": 610
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3245223763412719,
|
||
|
|
"grad_norm": 14.894518852233887,
|
||
|
|
"learning_rate": 8.553258126661154e-07,
|
||
|
|
"logits/chosen": -0.831768810749054,
|
||
|
|
"logits/rejected": -0.846484363079071,
|
||
|
|
"logps/chosen": -338.09686279296875,
|
||
|
|
"logps/rejected": -336.015625,
|
||
|
|
"logps/weighted_chosen": -2.84112548828125,
|
||
|
|
"logps/weighted_rejected": -3.4341063499450684,
|
||
|
|
"loss": 0.708,
|
||
|
|
"rewards/accuracies": 0.6187499761581421,
|
||
|
|
"rewards/chosen": -58.20390701293945,
|
||
|
|
"rewards/margins": 21.783594131469727,
|
||
|
|
"rewards/rejected": -79.9749984741211,
|
||
|
|
"rewards/weighted_accuracies": 0.6343749761581421,
|
||
|
|
"rewards/weighted_chosen": -0.4535583555698395,
|
||
|
|
"rewards/weighted_margins": 0.33486634492874146,
|
||
|
|
"rewards/weighted_rejected": -0.788104236125946,
|
||
|
|
"step": 620
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.32975660821774405,
|
||
|
|
"grad_norm": 23.813823699951172,
|
||
|
|
"learning_rate": 8.488379526908368e-07,
|
||
|
|
"logits/chosen": -0.826812744140625,
|
||
|
|
"logits/rejected": -0.837506115436554,
|
||
|
|
"logps/chosen": -352.4593811035156,
|
||
|
|
"logps/rejected": -357.29376220703125,
|
||
|
|
"logps/weighted_chosen": -2.6490235328674316,
|
||
|
|
"logps/weighted_rejected": -3.211181640625,
|
||
|
|
"loss": 0.6454,
|
||
|
|
"rewards/accuracies": 0.640625,
|
||
|
|
"rewards/chosen": -61.892189025878906,
|
||
|
|
"rewards/margins": 26.424219131469727,
|
||
|
|
"rewards/rejected": -88.33320617675781,
|
||
|
|
"rewards/weighted_accuracies": 0.6468750238418579,
|
||
|
|
"rewards/weighted_chosen": -0.3862060606479645,
|
||
|
|
"rewards/weighted_margins": 0.415771484375,
|
||
|
|
"rewards/weighted_rejected": -0.8020385503768921,
|
||
|
|
"step": 630
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.33499084009421615,
|
||
|
|
"grad_norm": 19.41891098022461,
|
||
|
|
"learning_rate": 8.422335836730802e-07,
|
||
|
|
"logits/chosen": -0.7994629144668579,
|
||
|
|
"logits/rejected": -0.7995041012763977,
|
||
|
|
"logps/chosen": -333.2593688964844,
|
||
|
|
"logps/rejected": -366.6499938964844,
|
||
|
|
"logps/weighted_chosen": -2.6496825218200684,
|
||
|
|
"logps/weighted_rejected": -3.1250548362731934,
|
||
|
|
"loss": 0.6734,
|
||
|
|
"rewards/accuracies": 0.6937500238418579,
|
||
|
|
"rewards/chosen": -59.58320236206055,
|
||
|
|
"rewards/margins": 29.476757049560547,
|
||
|
|
"rewards/rejected": -89.0625,
|
||
|
|
"rewards/weighted_accuracies": 0.675000011920929,
|
||
|
|
"rewards/weighted_chosen": -0.47590941190719604,
|
||
|
|
"rewards/weighted_margins": 0.394134521484375,
|
||
|
|
"rewards/weighted_rejected": -0.8701080083847046,
|
||
|
|
"step": 640
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3402250719706883,
|
||
|
|
"grad_norm": 16.819276809692383,
|
||
|
|
"learning_rate": 8.355149114184485e-07,
|
||
|
|
"logits/chosen": -0.846386730670929,
|
||
|
|
"logits/rejected": -0.8338836431503296,
|
||
|
|
"logps/chosen": -370.0062561035156,
|
||
|
|
"logps/rejected": -376.0843811035156,
|
||
|
|
"logps/weighted_chosen": -2.787951707839966,
|
||
|
|
"logps/weighted_rejected": -3.106738328933716,
|
||
|
|
"loss": 0.6483,
|
||
|
|
"rewards/accuracies": 0.6656249761581421,
|
||
|
|
"rewards/chosen": -62.594337463378906,
|
||
|
|
"rewards/margins": 31.204687118530273,
|
||
|
|
"rewards/rejected": -93.80000305175781,
|
||
|
|
"rewards/weighted_accuracies": 0.643750011920929,
|
||
|
|
"rewards/weighted_chosen": -0.5348541140556335,
|
||
|
|
"rewards/weighted_margins": 0.446258544921875,
|
||
|
|
"rewards/weighted_rejected": -0.980926513671875,
|
||
|
|
"step": 650
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.34545930384716045,
|
||
|
|
"grad_norm": 18.66504669189453,
|
||
|
|
"learning_rate": 8.286841799088963e-07,
|
||
|
|
"logits/chosen": -0.8683761358261108,
|
||
|
|
"logits/rejected": -0.860211193561554,
|
||
|
|
"logps/chosen": -344.94061279296875,
|
||
|
|
"logps/rejected": -343.61407470703125,
|
||
|
|
"logps/weighted_chosen": -2.3468871116638184,
|
||
|
|
"logps/weighted_rejected": -2.966168165206909,
|
||
|
|
"loss": 0.6577,
|
||
|
|
"rewards/accuracies": 0.628125011920929,
|
||
|
|
"rewards/chosen": -60.857032775878906,
|
||
|
|
"rewards/margins": 19.649999618530273,
|
||
|
|
"rewards/rejected": -80.50117492675781,
|
||
|
|
"rewards/weighted_accuracies": 0.643750011920929,
|
||
|
|
"rewards/weighted_chosen": -0.4206695556640625,
|
||
|
|
"rewards/weighted_margins": 0.41551512479782104,
|
||
|
|
"rewards/weighted_rejected": -0.8359512090682983,
|
||
|
|
"step": 660
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.35069353572363254,
|
||
|
|
"grad_norm": 26.941055297851562,
|
||
|
|
"learning_rate": 8.217436705532599e-07,
|
||
|
|
"logits/chosen": -0.8248077630996704,
|
||
|
|
"logits/rejected": -0.8512848019599915,
|
||
|
|
"logps/chosen": -369.5484313964844,
|
||
|
|
"logps/rejected": -348.30157470703125,
|
||
|
|
"logps/weighted_chosen": -2.3807740211486816,
|
||
|
|
"logps/weighted_rejected": -3.006176710128784,
|
||
|
|
"loss": 0.6373,
|
||
|
|
"rewards/accuracies": 0.6343749761581421,
|
||
|
|
"rewards/chosen": -66.71875,
|
||
|
|
"rewards/margins": 17.846094131469727,
|
||
|
|
"rewards/rejected": -84.57929992675781,
|
||
|
|
"rewards/weighted_accuracies": 0.6343749761581421,
|
||
|
|
"rewards/weighted_chosen": -0.3303161561489105,
|
||
|
|
"rewards/weighted_margins": 0.45032960176467896,
|
||
|
|
"rewards/weighted_rejected": -0.7810913324356079,
|
||
|
|
"step": 670
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3559277676001047,
|
||
|
|
"grad_norm": 1516.0828857421875,
|
||
|
|
"learning_rate": 8.14695701425284e-07,
|
||
|
|
"logits/chosen": -0.8020523190498352,
|
||
|
|
"logits/rejected": -0.845899224281311,
|
||
|
|
"logps/chosen": -371.13751220703125,
|
||
|
|
"logps/rejected": -352.38592529296875,
|
||
|
|
"logps/weighted_chosen": -2.49072265625,
|
||
|
|
"logps/weighted_rejected": -3.2708497047424316,
|
||
|
|
"loss": 0.5885,
|
||
|
|
"rewards/accuracies": 0.653124988079071,
|
||
|
|
"rewards/chosen": -65.716796875,
|
||
|
|
"rewards/margins": 24.369531631469727,
|
||
|
|
"rewards/rejected": -90.1097640991211,
|
||
|
|
"rewards/weighted_accuracies": 0.6937500238418579,
|
||
|
|
"rewards/weighted_chosen": -0.330526739358902,
|
||
|
|
"rewards/weighted_margins": 0.5371948480606079,
|
||
|
|
"rewards/weighted_rejected": -0.867462158203125,
|
||
|
|
"step": 680
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3611619994765768,
|
||
|
|
"grad_norm": 295.9232482910156,
|
||
|
|
"learning_rate": 8.075426264894046e-07,
|
||
|
|
"logits/chosen": -0.7686309814453125,
|
||
|
|
"logits/rejected": -0.805737316608429,
|
||
|
|
"logps/chosen": -370.75,
|
||
|
|
"logps/rejected": -373.64373779296875,
|
||
|
|
"logps/weighted_chosen": -2.509265184402466,
|
||
|
|
"logps/weighted_rejected": -3.719970703125,
|
||
|
|
"loss": 0.5532,
|
||
|
|
"rewards/accuracies": 0.6875,
|
||
|
|
"rewards/chosen": -65.3414077758789,
|
||
|
|
"rewards/margins": 30.711523056030273,
|
||
|
|
"rewards/rejected": -96.052734375,
|
||
|
|
"rewards/weighted_accuracies": 0.7406250238418579,
|
||
|
|
"rewards/weighted_chosen": -0.3384948670864105,
|
||
|
|
"rewards/weighted_margins": 0.6478027105331421,
|
||
|
|
"rewards/weighted_rejected": -0.9860213994979858,
|
||
|
|
"step": 690
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.36639623135304894,
|
||
|
|
"grad_norm": 80.06324768066406,
|
||
|
|
"learning_rate": 8.002868348145435e-07,
|
||
|
|
"logits/chosen": -0.7615035772323608,
|
||
|
|
"logits/rejected": -0.758954644203186,
|
||
|
|
"logps/chosen": -364.390625,
|
||
|
|
"logps/rejected": -353.75,
|
||
|
|
"logps/weighted_chosen": -2.562756299972534,
|
||
|
|
"logps/weighted_rejected": -2.821521043777466,
|
||
|
|
"loss": 0.6219,
|
||
|
|
"rewards/accuracies": 0.609375,
|
||
|
|
"rewards/chosen": -64.1957015991211,
|
||
|
|
"rewards/margins": 21.513866424560547,
|
||
|
|
"rewards/rejected": -85.70429992675781,
|
||
|
|
"rewards/weighted_accuracies": 0.671875,
|
||
|
|
"rewards/weighted_chosen": -0.38551026582717896,
|
||
|
|
"rewards/weighted_margins": 0.45988160371780396,
|
||
|
|
"rewards/weighted_rejected": -0.8446716070175171,
|
||
|
|
"step": 700
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3716304632295211,
|
||
|
|
"grad_norm": 1476.4896240234375,
|
||
|
|
"learning_rate": 7.92930749776179e-07,
|
||
|
|
"logits/chosen": -0.75201416015625,
|
||
|
|
"logits/rejected": -0.7803069949150085,
|
||
|
|
"logps/chosen": -337.1851501464844,
|
||
|
|
"logps/rejected": -346.5375061035156,
|
||
|
|
"logps/weighted_chosen": -2.752087354660034,
|
||
|
|
"logps/weighted_rejected": -3.417065382003784,
|
||
|
|
"loss": 0.6452,
|
||
|
|
"rewards/accuracies": 0.6499999761581421,
|
||
|
|
"rewards/chosen": -65.52070617675781,
|
||
|
|
"rewards/margins": 20.681640625,
|
||
|
|
"rewards/rejected": -86.203125,
|
||
|
|
"rewards/weighted_accuracies": 0.6499999761581421,
|
||
|
|
"rewards/weighted_chosen": -0.2934509217739105,
|
||
|
|
"rewards/weighted_margins": 0.537158191204071,
|
||
|
|
"rewards/weighted_rejected": -0.8301132321357727,
|
||
|
|
"step": 710
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3768646951059932,
|
||
|
|
"grad_norm": 37.869117736816406,
|
||
|
|
"learning_rate": 7.854768282469582e-07,
|
||
|
|
"logits/chosen": -0.814867377281189,
|
||
|
|
"logits/rejected": -0.8494598269462585,
|
||
|
|
"logps/chosen": -332.6953125,
|
||
|
|
"logps/rejected": -362.8187561035156,
|
||
|
|
"logps/weighted_chosen": -2.5288939476013184,
|
||
|
|
"logps/weighted_rejected": -3.1172118186950684,
|
||
|
|
"loss": 0.6496,
|
||
|
|
"rewards/accuracies": 0.640625,
|
||
|
|
"rewards/chosen": -55.247657775878906,
|
||
|
|
"rewards/margins": 29.8330078125,
|
||
|
|
"rewards/rejected": -85.080078125,
|
||
|
|
"rewards/weighted_accuracies": 0.659375011920929,
|
||
|
|
"rewards/weighted_chosen": -0.355978399515152,
|
||
|
|
"rewards/weighted_margins": 0.4522338807582855,
|
||
|
|
"rewards/weighted_rejected": -0.8084503412246704,
|
||
|
|
"step": 720
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.38209892698246534,
|
||
|
|
"grad_norm": 48.1231575012207,
|
||
|
|
"learning_rate": 7.779275597761215e-07,
|
||
|
|
"logits/chosen": -0.7673202753067017,
|
||
|
|
"logits/rejected": -0.81195068359375,
|
||
|
|
"logps/chosen": -330.94451904296875,
|
||
|
|
"logps/rejected": -355.31561279296875,
|
||
|
|
"logps/weighted_chosen": -2.6014404296875,
|
||
|
|
"logps/weighted_rejected": -3.1932616233825684,
|
||
|
|
"loss": 0.5952,
|
||
|
|
"rewards/accuracies": 0.690625011920929,
|
||
|
|
"rewards/chosen": -54.890235900878906,
|
||
|
|
"rewards/margins": 31.880468368530273,
|
||
|
|
"rewards/rejected": -86.75312805175781,
|
||
|
|
"rewards/weighted_accuracies": 0.7093750238418579,
|
||
|
|
"rewards/weighted_chosen": -0.2228240966796875,
|
||
|
|
"rewards/weighted_margins": 0.6149749755859375,
|
||
|
|
"rewards/weighted_rejected": -0.8373657464981079,
|
||
|
|
"step": 730
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.38733315885893743,
|
||
|
|
"grad_norm": 296.0299377441406,
|
||
|
|
"learning_rate": 7.702854657580126e-07,
|
||
|
|
"logits/chosen": -0.8295089602470398,
|
||
|
|
"logits/rejected": -0.843798816204071,
|
||
|
|
"logps/chosen": -352.19842529296875,
|
||
|
|
"logps/rejected": -333.7593688964844,
|
||
|
|
"logps/weighted_chosen": -2.45697021484375,
|
||
|
|
"logps/weighted_rejected": -3.375244140625,
|
||
|
|
"loss": 0.6318,
|
||
|
|
"rewards/accuracies": 0.659375011920929,
|
||
|
|
"rewards/chosen": -57.54804611206055,
|
||
|
|
"rewards/margins": 21.8720703125,
|
||
|
|
"rewards/rejected": -79.4625015258789,
|
||
|
|
"rewards/weighted_accuracies": 0.6781250238418579,
|
||
|
|
"rewards/weighted_chosen": -0.19748535752296448,
|
||
|
|
"rewards/weighted_margins": 0.5647827386856079,
|
||
|
|
"rewards/weighted_rejected": -0.762377917766571,
|
||
|
|
"step": 740
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3925673907354096,
|
||
|
|
"grad_norm": 222.2068328857422,
|
||
|
|
"learning_rate": 7.625530985899547e-07,
|
||
|
|
"logits/chosen": -0.8145691156387329,
|
||
|
|
"logits/rejected": -0.8263305425643921,
|
||
|
|
"logps/chosen": -328.7578125,
|
||
|
|
"logps/rejected": -331.390625,
|
||
|
|
"logps/weighted_chosen": -2.581188917160034,
|
||
|
|
"logps/weighted_rejected": -3.505688428878784,
|
||
|
|
"loss": 0.6377,
|
||
|
|
"rewards/accuracies": 0.6468750238418579,
|
||
|
|
"rewards/chosen": -61.987892150878906,
|
||
|
|
"rewards/margins": 24.421483993530273,
|
||
|
|
"rewards/rejected": -86.39921569824219,
|
||
|
|
"rewards/weighted_accuracies": 0.6656249761581421,
|
||
|
|
"rewards/weighted_chosen": -0.493093878030777,
|
||
|
|
"rewards/weighted_margins": 0.4742370545864105,
|
||
|
|
"rewards/weighted_rejected": -0.9672302007675171,
|
||
|
|
"step": 750
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.39780162261188173,
|
||
|
|
"grad_norm": 34.57517623901367,
|
||
|
|
"learning_rate": 7.547330408197694e-07,
|
||
|
|
"logits/chosen": -0.8249969482421875,
|
||
|
|
"logits/rejected": -0.8720428347587585,
|
||
|
|
"logps/chosen": -363.44219970703125,
|
||
|
|
"logps/rejected": -345.2250061035156,
|
||
|
|
"logps/weighted_chosen": -2.4618163108825684,
|
||
|
|
"logps/weighted_rejected": -3.28759765625,
|
||
|
|
"loss": 0.6383,
|
||
|
|
"rewards/accuracies": 0.6187499761581421,
|
||
|
|
"rewards/chosen": -67.7855453491211,
|
||
|
|
"rewards/margins": 21.181835174560547,
|
||
|
|
"rewards/rejected": -88.9535140991211,
|
||
|
|
"rewards/weighted_accuracies": 0.6343749761581421,
|
||
|
|
"rewards/weighted_chosen": -0.3757568299770355,
|
||
|
|
"rewards/weighted_margins": 0.4518585205078125,
|
||
|
|
"rewards/weighted_rejected": -0.8270477056503296,
|
||
|
|
"step": 760
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.40303585448835383,
|
||
|
|
"grad_norm": 26.55402946472168,
|
||
|
|
"learning_rate": 7.468279042832271e-07,
|
||
|
|
"logits/chosen": -0.8424628973007202,
|
||
|
|
"logits/rejected": -0.8771301507949829,
|
||
|
|
"logps/chosen": -347.6484375,
|
||
|
|
"logps/rejected": -388.42266845703125,
|
||
|
|
"logps/weighted_chosen": -2.660815477371216,
|
||
|
|
"logps/weighted_rejected": -3.1720213890075684,
|
||
|
|
"loss": 0.6743,
|
||
|
|
"rewards/accuracies": 0.653124988079071,
|
||
|
|
"rewards/chosen": -67.98554992675781,
|
||
|
|
"rewards/margins": 30.81640625,
|
||
|
|
"rewards/rejected": -98.75859069824219,
|
||
|
|
"rewards/weighted_accuracies": 0.6312500238418579,
|
||
|
|
"rewards/weighted_chosen": -0.5194793939590454,
|
||
|
|
"rewards/weighted_margins": 0.3566345274448395,
|
||
|
|
"rewards/weighted_rejected": -0.8764098882675171,
|
||
|
|
"step": 770
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.408270086364826,
|
||
|
|
"grad_norm": 14.42599868774414,
|
||
|
|
"learning_rate": 7.388403292317154e-07,
|
||
|
|
"logits/chosen": -0.7979522943496704,
|
||
|
|
"logits/rejected": -0.8573578000068665,
|
||
|
|
"logps/chosen": -367.4046936035156,
|
||
|
|
"logps/rejected": -358.2515563964844,
|
||
|
|
"logps/weighted_chosen": -2.490283250808716,
|
||
|
|
"logps/weighted_rejected": -3.1418213844299316,
|
||
|
|
"loss": 0.6398,
|
||
|
|
"rewards/accuracies": 0.65625,
|
||
|
|
"rewards/chosen": -66.2933578491211,
|
||
|
|
"rewards/margins": 26.066015243530273,
|
||
|
|
"rewards/rejected": -92.384765625,
|
||
|
|
"rewards/weighted_accuracies": 0.690625011920929,
|
||
|
|
"rewards/weighted_chosen": -0.461639404296875,
|
||
|
|
"rewards/weighted_margins": 0.452981561422348,
|
||
|
|
"rewards/weighted_rejected": -0.914324939250946,
|
||
|
|
"step": 780
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4135043182412981,
|
||
|
|
"grad_norm": 21.45990753173828,
|
||
|
|
"learning_rate": 7.307729834504154e-07,
|
||
|
|
"logits/chosen": -0.8032287359237671,
|
||
|
|
"logits/rejected": -0.8670104742050171,
|
||
|
|
"logps/chosen": -351.39532470703125,
|
||
|
|
"logps/rejected": -360.1734313964844,
|
||
|
|
"logps/weighted_chosen": -2.3328614234924316,
|
||
|
|
"logps/weighted_rejected": -3.198779344558716,
|
||
|
|
"loss": 0.6356,
|
||
|
|
"rewards/accuracies": 0.609375,
|
||
|
|
"rewards/chosen": -69.6167984008789,
|
||
|
|
"rewards/margins": 24.462499618530273,
|
||
|
|
"rewards/rejected": -94.07890319824219,
|
||
|
|
"rewards/weighted_accuracies": 0.675000011920929,
|
||
|
|
"rewards/weighted_chosen": -0.3527267575263977,
|
||
|
|
"rewards/weighted_margins": 0.45725250244140625,
|
||
|
|
"rewards/weighted_rejected": -0.8100005984306335,
|
||
|
|
"step": 790
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4187385501177702,
|
||
|
|
"grad_norm": 20.13976287841797,
|
||
|
|
"learning_rate": 7.226285613672847e-07,
|
||
|
|
"logits/chosen": -0.741473376750946,
|
||
|
|
"logits/rejected": -0.7820758819580078,
|
||
|
|
"logps/chosen": -342.1156311035156,
|
||
|
|
"logps/rejected": -382.6937561035156,
|
||
|
|
"logps/weighted_chosen": -2.3370361328125,
|
||
|
|
"logps/weighted_rejected": -3.3424315452575684,
|
||
|
|
"loss": 0.6236,
|
||
|
|
"rewards/accuracies": 0.703125,
|
||
|
|
"rewards/chosen": -64.8167953491211,
|
||
|
|
"rewards/margins": 39.174217224121094,
|
||
|
|
"rewards/rejected": -103.96992492675781,
|
||
|
|
"rewards/weighted_accuracies": 0.6781250238418579,
|
||
|
|
"rewards/weighted_chosen": -0.34785765409469604,
|
||
|
|
"rewards/weighted_margins": 0.5167236328125,
|
||
|
|
"rewards/weighted_rejected": -0.864398181438446,
|
||
|
|
"step": 800
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4239727819942423,
|
||
|
|
"grad_norm": 21.571788787841797,
|
||
|
|
"learning_rate": 7.144097831531398e-07,
|
||
|
|
"logits/chosen": -0.6900985836982727,
|
||
|
|
"logits/rejected": -0.7189788818359375,
|
||
|
|
"logps/chosen": -344.78436279296875,
|
||
|
|
"logps/rejected": -364.53436279296875,
|
||
|
|
"logps/weighted_chosen": -2.346606492996216,
|
||
|
|
"logps/weighted_rejected": -3.139209032058716,
|
||
|
|
"loss": 0.6171,
|
||
|
|
"rewards/accuracies": 0.640625,
|
||
|
|
"rewards/chosen": -74.72969055175781,
|
||
|
|
"rewards/margins": 25.596094131469727,
|
||
|
|
"rewards/rejected": -100.31640625,
|
||
|
|
"rewards/weighted_accuracies": 0.6937500238418579,
|
||
|
|
"rewards/weighted_chosen": -0.332855224609375,
|
||
|
|
"rewards/weighted_margins": 0.513507068157196,
|
||
|
|
"rewards/weighted_rejected": -0.8466736078262329,
|
||
|
|
"step": 810
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.42920701387071447,
|
||
|
|
"grad_norm": 23.761091232299805,
|
||
|
|
"learning_rate": 7.061193938131396e-07,
|
||
|
|
"logits/chosen": -0.620227038860321,
|
||
|
|
"logits/rejected": -0.6747413873672485,
|
||
|
|
"logps/chosen": -377.5609436035156,
|
||
|
|
"logps/rejected": -363.4296875,
|
||
|
|
"logps/weighted_chosen": -2.772265672683716,
|
||
|
|
"logps/weighted_rejected": -3.069580078125,
|
||
|
|
"loss": 0.6365,
|
||
|
|
"rewards/accuracies": 0.621874988079071,
|
||
|
|
"rewards/chosen": -77.7945327758789,
|
||
|
|
"rewards/margins": 20.221874237060547,
|
||
|
|
"rewards/rejected": -97.98515319824219,
|
||
|
|
"rewards/weighted_accuracies": 0.668749988079071,
|
||
|
|
"rewards/weighted_chosen": -0.3748535215854645,
|
||
|
|
"rewards/weighted_margins": 0.473724365234375,
|
||
|
|
"rewards/weighted_rejected": -0.8482757806777954,
|
||
|
|
"step": 820
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4344412457471866,
|
||
|
|
"grad_norm": 21.571779251098633,
|
||
|
|
"learning_rate": 6.977601622699789e-07,
|
||
|
|
"logits/chosen": -0.689013659954071,
|
||
|
|
"logits/rejected": -0.7498534917831421,
|
||
|
|
"logps/chosen": -354.41876220703125,
|
||
|
|
"logps/rejected": -392.3500061035156,
|
||
|
|
"logps/weighted_chosen": -2.6583251953125,
|
||
|
|
"logps/weighted_rejected": -3.377002000808716,
|
||
|
|
"loss": 0.5618,
|
||
|
|
"rewards/accuracies": 0.7093750238418579,
|
||
|
|
"rewards/chosen": -69.55390930175781,
|
||
|
|
"rewards/margins": 43.986717224121094,
|
||
|
|
"rewards/rejected": -113.5484390258789,
|
||
|
|
"rewards/weighted_accuracies": 0.7124999761581421,
|
||
|
|
"rewards/weighted_chosen": -0.28594970703125,
|
||
|
|
"rewards/weighted_margins": 0.6694701910018921,
|
||
|
|
"rewards/weighted_rejected": -0.955474853515625,
|
||
|
|
"step": 830
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4396754776236587,
|
||
|
|
"grad_norm": 21.845787048339844,
|
||
|
|
"learning_rate": 6.893348804390882e-07,
|
||
|
|
"logits/chosen": -0.7911956906318665,
|
||
|
|
"logits/rejected": -0.8087249994277954,
|
||
|
|
"logps/chosen": -377.0531311035156,
|
||
|
|
"logps/rejected": -377.48126220703125,
|
||
|
|
"logps/weighted_chosen": -2.844287157058716,
|
||
|
|
"logps/weighted_rejected": -3.24560546875,
|
||
|
|
"loss": 0.5927,
|
||
|
|
"rewards/accuracies": 0.659375011920929,
|
||
|
|
"rewards/chosen": -81.9203109741211,
|
||
|
|
"rewards/margins": 35.79375076293945,
|
||
|
|
"rewards/rejected": -117.70625305175781,
|
||
|
|
"rewards/weighted_accuracies": 0.703125,
|
||
|
|
"rewards/weighted_chosen": -0.3613952696323395,
|
||
|
|
"rewards/weighted_margins": 0.5852203369140625,
|
||
|
|
"rewards/weighted_rejected": -0.94671630859375,
|
||
|
|
"step": 840
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.44490970950013087,
|
||
|
|
"grad_norm": 13.673724174499512,
|
||
|
|
"learning_rate": 6.808463622961578e-07,
|
||
|
|
"logits/chosen": -0.765423595905304,
|
||
|
|
"logits/rejected": -0.8230966329574585,
|
||
|
|
"logps/chosen": -385.33905029296875,
|
||
|
|
"logps/rejected": -413.21563720703125,
|
||
|
|
"logps/weighted_chosen": -2.7145752906799316,
|
||
|
|
"logps/weighted_rejected": -3.412890672683716,
|
||
|
|
"loss": 0.5718,
|
||
|
|
"rewards/accuracies": 0.643750011920929,
|
||
|
|
"rewards/chosen": -92.6429672241211,
|
||
|
|
"rewards/margins": 38.67695236206055,
|
||
|
|
"rewards/rejected": -131.3136749267578,
|
||
|
|
"rewards/weighted_accuracies": 0.6781250238418579,
|
||
|
|
"rewards/weighted_chosen": -0.43825072050094604,
|
||
|
|
"rewards/weighted_margins": 0.649249255657196,
|
||
|
|
"rewards/weighted_rejected": -1.0877685546875,
|
||
|
|
"step": 850
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.45014394137660296,
|
||
|
|
"grad_norm": 17.156784057617188,
|
||
|
|
"learning_rate": 6.722974429372925e-07,
|
||
|
|
"logits/chosen": -0.733477771282196,
|
||
|
|
"logits/rejected": -0.7933975458145142,
|
||
|
|
"logps/chosen": -418.08282470703125,
|
||
|
|
"logps/rejected": -417.9937438964844,
|
||
|
|
"logps/weighted_chosen": -2.5140380859375,
|
||
|
|
"logps/weighted_rejected": -3.9316039085388184,
|
||
|
|
"loss": 0.5611,
|
||
|
|
"rewards/accuracies": 0.668749988079071,
|
||
|
|
"rewards/chosen": -114.50508117675781,
|
||
|
|
"rewards/margins": 41.392189025878906,
|
||
|
|
"rewards/rejected": -155.9523468017578,
|
||
|
|
"rewards/weighted_accuracies": 0.71875,
|
||
|
|
"rewards/weighted_chosen": -0.5683807134628296,
|
||
|
|
"rewards/weighted_margins": 0.804516613483429,
|
||
|
|
"rewards/weighted_rejected": -1.373052954673767,
|
||
|
|
"step": 860
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4553781732530751,
|
||
|
|
"grad_norm": 23.082002639770508,
|
||
|
|
"learning_rate": 6.636909776321128e-07,
|
||
|
|
"logits/chosen": -0.8063064813613892,
|
||
|
|
"logits/rejected": -0.802105724811554,
|
||
|
|
"logps/chosen": -369.12811279296875,
|
||
|
|
"logps/rejected": -413.70782470703125,
|
||
|
|
"logps/weighted_chosen": -2.945758104324341,
|
||
|
|
"logps/weighted_rejected": -3.6049561500549316,
|
||
|
|
"loss": 0.5946,
|
||
|
|
"rewards/accuracies": 0.6156250238418579,
|
||
|
|
"rewards/chosen": -107.4222640991211,
|
||
|
|
"rewards/margins": 41.763671875,
|
||
|
|
"rewards/rejected": -149.2078094482422,
|
||
|
|
"rewards/weighted_accuracies": 0.65625,
|
||
|
|
"rewards/weighted_chosen": -0.640515148639679,
|
||
|
|
"rewards/weighted_margins": 0.601641833782196,
|
||
|
|
"rewards/weighted_rejected": -1.2423064708709717,
|
||
|
|
"step": 870
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.46061240512954726,
|
||
|
|
"grad_norm": 27.672487258911133,
|
||
|
|
"learning_rate": 6.550298408701174e-07,
|
||
|
|
"logits/chosen": -0.778796374797821,
|
||
|
|
"logits/rejected": -0.830426037311554,
|
||
|
|
"logps/chosen": -389.8421936035156,
|
||
|
|
"logps/rejected": -428.6312561035156,
|
||
|
|
"logps/weighted_chosen": -3.13909912109375,
|
||
|
|
"logps/weighted_rejected": -4.012915134429932,
|
||
|
|
"loss": 0.6358,
|
||
|
|
"rewards/accuracies": 0.675000011920929,
|
||
|
|
"rewards/chosen": -103.14042663574219,
|
||
|
|
"rewards/margins": 43.176368713378906,
|
||
|
|
"rewards/rejected": -146.2609405517578,
|
||
|
|
"rewards/weighted_accuracies": 0.643750011920929,
|
||
|
|
"rewards/weighted_chosen": -0.5248870849609375,
|
||
|
|
"rewards/weighted_margins": 0.555926501750946,
|
||
|
|
"rewards/weighted_rejected": -1.0807831287384033,
|
||
|
|
"step": 880
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.46584663700601936,
|
||
|
|
"grad_norm": 20.153644561767578,
|
||
|
|
"learning_rate": 6.463169254006276e-07,
|
||
|
|
"logits/chosen": -0.7750915288925171,
|
||
|
|
"logits/rejected": -0.8219833374023438,
|
||
|
|
"logps/chosen": -377.8421936035156,
|
||
|
|
"logps/rejected": -378.10467529296875,
|
||
|
|
"logps/weighted_chosen": -2.7843995094299316,
|
||
|
|
"logps/weighted_rejected": -3.576098680496216,
|
||
|
|
"loss": 0.5705,
|
||
|
|
"rewards/accuracies": 0.653124988079071,
|
||
|
|
"rewards/chosen": -96.46601867675781,
|
||
|
|
"rewards/margins": 34.888282775878906,
|
||
|
|
"rewards/rejected": -131.3464813232422,
|
||
|
|
"rewards/weighted_accuracies": 0.71875,
|
||
|
|
"rewards/weighted_chosen": -0.39057618379592896,
|
||
|
|
"rewards/weighted_margins": 0.706072986125946,
|
||
|
|
"rewards/weighted_rejected": -1.0967223644256592,
|
||
|
|
"step": 890
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4710808688824915,
|
||
|
|
"grad_norm": 42.77175521850586,
|
||
|
|
"learning_rate": 6.375551412666326e-07,
|
||
|
|
"logits/chosen": -0.7759063839912415,
|
||
|
|
"logits/rejected": -0.8005096316337585,
|
||
|
|
"logps/chosen": -379.19219970703125,
|
||
|
|
"logps/rejected": -388.72344970703125,
|
||
|
|
"logps/weighted_chosen": -2.5501952171325684,
|
||
|
|
"logps/weighted_rejected": -3.5579466819763184,
|
||
|
|
"loss": 0.6503,
|
||
|
|
"rewards/accuracies": 0.581250011920929,
|
||
|
|
"rewards/chosen": -105.25859069824219,
|
||
|
|
"rewards/margins": 27.150781631469727,
|
||
|
|
"rewards/rejected": -132.42733764648438,
|
||
|
|
"rewards/weighted_accuracies": 0.6312500238418579,
|
||
|
|
"rewards/weighted_chosen": -0.5587005615234375,
|
||
|
|
"rewards/weighted_margins": 0.5813232660293579,
|
||
|
|
"rewards/weighted_rejected": -1.1405792236328125,
|
||
|
|
"step": 900
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4763151007589636,
|
||
|
|
"grad_norm": 24.800992965698242,
|
||
|
|
"learning_rate": 6.287474148328583e-07,
|
||
|
|
"logits/chosen": -0.7202819585800171,
|
||
|
|
"logits/rejected": -0.7240753173828125,
|
||
|
|
"logps/chosen": -371.1812438964844,
|
||
|
|
"logps/rejected": -371.109375,
|
||
|
|
"logps/weighted_chosen": -2.950915575027466,
|
||
|
|
"logps/weighted_rejected": -4.175073146820068,
|
||
|
|
"loss": 0.6282,
|
||
|
|
"rewards/accuracies": 0.628125011920929,
|
||
|
|
"rewards/chosen": -100.2894515991211,
|
||
|
|
"rewards/margins": 23.904687881469727,
|
||
|
|
"rewards/rejected": -124.20625305175781,
|
||
|
|
"rewards/weighted_accuracies": 0.6656249761581421,
|
||
|
|
"rewards/weighted_chosen": -0.5755615234375,
|
||
|
|
"rewards/weighted_margins": 0.5273803472518921,
|
||
|
|
"rewards/weighted_rejected": -1.1032683849334717,
|
||
|
|
"step": 910
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.48154933263543576,
|
||
|
|
"grad_norm": 18.644733428955078,
|
||
|
|
"learning_rate": 6.198966878083857e-07,
|
||
|
|
"logits/chosen": -0.7572265863418579,
|
||
|
|
"logits/rejected": -0.7787246704101562,
|
||
|
|
"logps/chosen": -368.4359436035156,
|
||
|
|
"logps/rejected": -402.46563720703125,
|
||
|
|
"logps/weighted_chosen": -2.8515868186950684,
|
||
|
|
"logps/weighted_rejected": -3.4952635765075684,
|
||
|
|
"loss": 0.6159,
|
||
|
|
"rewards/accuracies": 0.6656249761581421,
|
||
|
|
"rewards/chosen": -97.32890319824219,
|
||
|
|
"rewards/margins": 38.184959411621094,
|
||
|
|
"rewards/rejected": -135.45703125,
|
||
|
|
"rewards/weighted_accuracies": 0.684374988079071,
|
||
|
|
"rewards/weighted_chosen": -0.4532226622104645,
|
||
|
|
"rewards/weighted_margins": 0.559436023235321,
|
||
|
|
"rewards/weighted_rejected": -1.012457251548767,
|
||
|
|
"step": 920
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.48678356451190785,
|
||
|
|
"grad_norm": 44.07575988769531,
|
||
|
|
"learning_rate": 6.110059162641439e-07,
|
||
|
|
"logits/chosen": -0.7723480463027954,
|
||
|
|
"logits/rejected": -0.802471935749054,
|
||
|
|
"logps/chosen": -375.984375,
|
||
|
|
"logps/rejected": -391.7906188964844,
|
||
|
|
"logps/weighted_chosen": -2.391467332839966,
|
||
|
|
"logps/weighted_rejected": -3.1367430686950684,
|
||
|
|
"loss": 0.6244,
|
||
|
|
"rewards/accuracies": 0.675000011920929,
|
||
|
|
"rewards/chosen": -93.6128921508789,
|
||
|
|
"rewards/margins": 30.975391387939453,
|
||
|
|
"rewards/rejected": -124.58906555175781,
|
||
|
|
"rewards/weighted_accuracies": 0.6781250238418579,
|
||
|
|
"rewards/weighted_chosen": -0.4246048033237457,
|
||
|
|
"rewards/weighted_margins": 0.47894287109375,
|
||
|
|
"rewards/weighted_rejected": -0.904034435749054,
|
||
|
|
"step": 930
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.49201779638838,
|
||
|
|
"grad_norm": 26.321582794189453,
|
||
|
|
"learning_rate": 6.020780696456059e-07,
|
||
|
|
"logits/chosen": -0.7484909296035767,
|
||
|
|
"logits/rejected": -0.7926574945449829,
|
||
|
|
"logps/chosen": -359.21875,
|
||
|
|
"logps/rejected": -411.1890563964844,
|
||
|
|
"logps/weighted_chosen": -2.2330689430236816,
|
||
|
|
"logps/weighted_rejected": -3.3442625999450684,
|
||
|
|
"loss": 0.5653,
|
||
|
|
"rewards/accuracies": 0.6875,
|
||
|
|
"rewards/chosen": -88.5511703491211,
|
||
|
|
"rewards/margins": 55.419921875,
|
||
|
|
"rewards/rejected": -143.99453735351562,
|
||
|
|
"rewards/weighted_accuracies": 0.7093750238418579,
|
||
|
|
"rewards/weighted_chosen": -0.47887879610061646,
|
||
|
|
"rewards/weighted_margins": 0.6291259527206421,
|
||
|
|
"rewards/weighted_rejected": -1.1078612804412842,
|
||
|
|
"step": 940
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.49725202826485215,
|
||
|
|
"grad_norm": 44.13637924194336,
|
||
|
|
"learning_rate": 5.931161297810185e-07,
|
||
|
|
"logits/chosen": -0.8126861453056335,
|
||
|
|
"logits/rejected": -0.829357922077179,
|
||
|
|
"logps/chosen": -376.6875,
|
||
|
|
"logps/rejected": -399.51251220703125,
|
||
|
|
"logps/weighted_chosen": -3.1253294944763184,
|
||
|
|
"logps/weighted_rejected": -3.8741211891174316,
|
||
|
|
"loss": 0.6461,
|
||
|
|
"rewards/accuracies": 0.612500011920929,
|
||
|
|
"rewards/chosen": -103.4652328491211,
|
||
|
|
"rewards/margins": 34.713279724121094,
|
||
|
|
"rewards/rejected": -138.1457061767578,
|
||
|
|
"rewards/weighted_accuracies": 0.671875,
|
||
|
|
"rewards/weighted_chosen": -0.616656482219696,
|
||
|
|
"rewards/weighted_margins": 0.5488006472587585,
|
||
|
|
"rewards/weighted_rejected": -1.1659362316131592,
|
||
|
|
"step": 950
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5024862601413242,
|
||
|
|
"grad_norm": 24.634550094604492,
|
||
|
|
"learning_rate": 5.841230898854959e-07,
|
||
|
|
"logits/chosen": -0.742846667766571,
|
||
|
|
"logits/rejected": -0.765765368938446,
|
||
|
|
"logps/chosen": -421.84686279296875,
|
||
|
|
"logps/rejected": -429.4312438964844,
|
||
|
|
"logps/weighted_chosen": -2.9749999046325684,
|
||
|
|
"logps/weighted_rejected": -3.778076171875,
|
||
|
|
"loss": 0.6955,
|
||
|
|
"rewards/accuracies": 0.6625000238418579,
|
||
|
|
"rewards/chosen": -121.6539077758789,
|
||
|
|
"rewards/margins": 46.04961013793945,
|
||
|
|
"rewards/rejected": -167.69375610351562,
|
||
|
|
"rewards/weighted_accuracies": 0.6187499761581421,
|
||
|
|
"rewards/weighted_chosen": -0.820935070514679,
|
||
|
|
"rewards/weighted_margins": 0.548413097858429,
|
||
|
|
"rewards/weighted_rejected": -1.369299292564392,
|
||
|
|
"step": 960
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5077204920177963,
|
||
|
|
"grad_norm": 28.748939514160156,
|
||
|
|
"learning_rate": 5.751019535613102e-07,
|
||
|
|
"logits/chosen": -0.6985992193222046,
|
||
|
|
"logits/rejected": -0.7225921750068665,
|
||
|
|
"logps/chosen": -365.7984313964844,
|
||
|
|
"logps/rejected": -400.90625,
|
||
|
|
"logps/weighted_chosen": -2.9286131858825684,
|
||
|
|
"logps/weighted_rejected": -4.021093845367432,
|
||
|
|
"loss": 0.6325,
|
||
|
|
"rewards/accuracies": 0.684374988079071,
|
||
|
|
"rewards/chosen": -106.0390625,
|
||
|
|
"rewards/margins": 47.953514099121094,
|
||
|
|
"rewards/rejected": -153.97265625,
|
||
|
|
"rewards/weighted_accuracies": 0.668749988079071,
|
||
|
|
"rewards/weighted_chosen": -0.7157821655273438,
|
||
|
|
"rewards/weighted_margins": 0.7237914800643921,
|
||
|
|
"rewards/weighted_rejected": -1.439599633216858,
|
||
|
|
"step": 970
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5129547238942685,
|
||
|
|
"grad_norm": 15.98474407196045,
|
||
|
|
"learning_rate": 5.660557337947117e-07,
|
||
|
|
"logits/chosen": -0.6841033697128296,
|
||
|
|
"logits/rejected": -0.6997619867324829,
|
||
|
|
"logps/chosen": -409.1937561035156,
|
||
|
|
"logps/rejected": -406.05938720703125,
|
||
|
|
"logps/weighted_chosen": -2.480639696121216,
|
||
|
|
"logps/weighted_rejected": -3.3584961891174316,
|
||
|
|
"loss": 0.5997,
|
||
|
|
"rewards/accuracies": 0.640625,
|
||
|
|
"rewards/chosen": -114.5328140258789,
|
||
|
|
"rewards/margins": 34.099998474121094,
|
||
|
|
"rewards/rejected": -148.6570281982422,
|
||
|
|
"rewards/weighted_accuracies": 0.6968749761581421,
|
||
|
|
"rewards/weighted_chosen": -0.572741687297821,
|
||
|
|
"rewards/weighted_margins": 0.527575671672821,
|
||
|
|
"rewards/weighted_rejected": -1.100128173828125,
|
||
|
|
"step": 980
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5181889557707406,
|
||
|
|
"grad_norm": 41.099185943603516,
|
||
|
|
"learning_rate": 5.569874519496174e-07,
|
||
|
|
"logits/chosen": -0.7119758725166321,
|
||
|
|
"logits/rejected": -0.7671966552734375,
|
||
|
|
"logps/chosen": -381.44842529296875,
|
||
|
|
"logps/rejected": -410.5015563964844,
|
||
|
|
"logps/weighted_chosen": -2.8494019508361816,
|
||
|
|
"logps/weighted_rejected": -3.8323974609375,
|
||
|
|
"loss": 0.6259,
|
||
|
|
"rewards/accuracies": 0.6156250238418579,
|
||
|
|
"rewards/chosen": -106.146484375,
|
||
|
|
"rewards/margins": 37.33867263793945,
|
||
|
|
"rewards/rejected": -143.4011688232422,
|
||
|
|
"rewards/weighted_accuracies": 0.668749988079071,
|
||
|
|
"rewards/weighted_chosen": -0.4599502682685852,
|
||
|
|
"rewards/weighted_margins": 0.6096404790878296,
|
||
|
|
"rewards/weighted_rejected": -1.070257544517517,
|
||
|
|
"step": 990
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5234231876472127,
|
||
|
|
"grad_norm": 34.9498176574707,
|
||
|
|
"learning_rate": 5.47900136758499e-07,
|
||
|
|
"logits/chosen": -0.6499813199043274,
|
||
|
|
"logits/rejected": -0.7194549441337585,
|
||
|
|
"logps/chosen": -369.4429626464844,
|
||
|
|
"logps/rejected": -382.0953063964844,
|
||
|
|
"logps/weighted_chosen": -2.7041993141174316,
|
||
|
|
"logps/weighted_rejected": -3.534008741378784,
|
||
|
|
"loss": 0.5974,
|
||
|
|
"rewards/accuracies": 0.6187499761581421,
|
||
|
|
"rewards/chosen": -101.8080062866211,
|
||
|
|
"rewards/margins": 38.08086013793945,
|
||
|
|
"rewards/rejected": -139.9406280517578,
|
||
|
|
"rewards/weighted_accuracies": 0.706250011920929,
|
||
|
|
"rewards/weighted_chosen": -0.44673460721969604,
|
||
|
|
"rewards/weighted_margins": 0.6419677734375,
|
||
|
|
"rewards/weighted_rejected": -1.08880615234375,
|
||
|
|
"step": 1000
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5234231876472127,
|
||
|
|
"eval_logits/chosen": -0.7614516615867615,
|
||
|
|
"eval_logits/rejected": -0.7845029234886169,
|
||
|
|
"eval_logps/chosen": -403.2145080566406,
|
||
|
|
"eval_logps/rejected": -419.8420104980469,
|
||
|
|
"eval_logps/weighted_chosen": -2.8744430541992188,
|
||
|
|
"eval_logps/weighted_rejected": -3.6894454956054688,
|
||
|
|
"eval_loss": 0.6146492958068848,
|
||
|
|
"eval_rewards/accuracies": 0.6269999742507935,
|
||
|
|
"eval_rewards/chosen": -114.45649719238281,
|
||
|
|
"eval_rewards/margins": 37.66427993774414,
|
||
|
|
"eval_rewards/rejected": -152.1232452392578,
|
||
|
|
"eval_rewards/weighted_accuracies": 0.6679999828338623,
|
||
|
|
"eval_rewards/weighted_chosen": -0.6033662557601929,
|
||
|
|
"eval_rewards/weighted_margins": 0.5922585129737854,
|
||
|
|
"eval_rewards/weighted_rejected": -1.195624828338623,
|
||
|
|
"eval_runtime": 1076.2039,
|
||
|
|
"eval_samples_per_second": 1.858,
|
||
|
|
"eval_steps_per_second": 0.465,
|
||
|
|
"step": 1000
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"logging_steps": 10,
|
||
|
|
"max_steps": 1911,
|
||
|
|
"num_input_tokens_seen": 0,
|
||
|
|
"num_train_epochs": 1,
|
||
|
|
"save_steps": 500,
|
||
|
|
"stateful_callbacks": {
|
||
|
|
"TrainerControl": {
|
||
|
|
"args": {
|
||
|
|
"should_epoch_stop": false,
|
||
|
|
"should_evaluate": false,
|
||
|
|
"should_log": false,
|
||
|
|
"should_save": true,
|
||
|
|
"should_training_stop": false
|
||
|
|
},
|
||
|
|
"attributes": {}
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"total_flos": 0.0,
|
||
|
|
"train_batch_size": 1,
|
||
|
|
"trial_name": null,
|
||
|
|
"trial_params": null
|
||
|
|
}
|