699 lines
24 KiB
JSON
699 lines
24 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.8065532451165721,
|
|
"eval_steps": 100,
|
|
"global_step": 400,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.020163831127914304,
|
|
"grad_norm": 15.5,
|
|
"learning_rate": 3e-08,
|
|
"logits/chosen": -0.4867519736289978,
|
|
"logits/rejected": 0.7175194621086121,
|
|
"logps/chosen": -30.936298370361328,
|
|
"logps/rejected": -33.71613311767578,
|
|
"loss": 0.6928,
|
|
"rewards/accuracies": 0.4234375059604645,
|
|
"rewards/chosen": 0.0013589367736130953,
|
|
"rewards/margins": 0.0008156307740136981,
|
|
"rewards/rejected": 0.0005433057667687535,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.04032766225582861,
|
|
"grad_norm": 14.75,
|
|
"learning_rate": 6.333333333333333e-08,
|
|
"logits/chosen": -0.42843765020370483,
|
|
"logits/rejected": 0.7893258929252625,
|
|
"logps/chosen": -30.931198120117188,
|
|
"logps/rejected": -33.70547866821289,
|
|
"loss": 0.6923,
|
|
"rewards/accuracies": 0.546875,
|
|
"rewards/chosen": 0.0024414127692580223,
|
|
"rewards/margins": 0.0018455162644386292,
|
|
"rewards/rejected": 0.0005958965630270541,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.06049149338374291,
|
|
"grad_norm": 13.5,
|
|
"learning_rate": 9.666666666666666e-08,
|
|
"logits/chosen": -0.4219973087310791,
|
|
"logits/rejected": 0.6850587725639343,
|
|
"logps/chosen": -30.893016815185547,
|
|
"logps/rejected": -33.663429260253906,
|
|
"loss": 0.6918,
|
|
"rewards/accuracies": 0.5453125238418579,
|
|
"rewards/chosen": 0.004747429862618446,
|
|
"rewards/margins": 0.0027347125578671694,
|
|
"rewards/rejected": 0.0020127175375819206,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.08065532451165722,
|
|
"grad_norm": 15.125,
|
|
"learning_rate": 1.3e-07,
|
|
"logits/chosen": -0.46360141038894653,
|
|
"logits/rejected": 0.7466679215431213,
|
|
"logps/chosen": -30.849849700927734,
|
|
"logps/rejected": -33.69337463378906,
|
|
"loss": 0.6893,
|
|
"rewards/accuracies": 0.6812499761581421,
|
|
"rewards/chosen": 0.009496917016804218,
|
|
"rewards/margins": 0.007778028957545757,
|
|
"rewards/rejected": 0.001718888757750392,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.10081915563957151,
|
|
"grad_norm": 14.3125,
|
|
"learning_rate": 1.6333333333333331e-07,
|
|
"logits/chosen": -0.5125963091850281,
|
|
"logits/rejected": 0.7158086895942688,
|
|
"logps/chosen": -30.67547607421875,
|
|
"logps/rejected": -33.68535232543945,
|
|
"loss": 0.6864,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": 0.015847254544496536,
|
|
"rewards/margins": 0.013664955273270607,
|
|
"rewards/rejected": 0.002182298805564642,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.12098298676748583,
|
|
"grad_norm": 15.25,
|
|
"learning_rate": 1.9666666666666665e-07,
|
|
"logits/chosen": -0.5598157644271851,
|
|
"logits/rejected": 0.6922208070755005,
|
|
"logps/chosen": -30.706085205078125,
|
|
"logps/rejected": -33.59804153442383,
|
|
"loss": 0.6824,
|
|
"rewards/accuracies": 0.824999988079071,
|
|
"rewards/chosen": 0.02341878041625023,
|
|
"rewards/margins": 0.02176792547106743,
|
|
"rewards/rejected": 0.0016508543631061912,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.14114681789540012,
|
|
"grad_norm": 13.5625,
|
|
"learning_rate": 2.3e-07,
|
|
"logits/chosen": -0.5082000494003296,
|
|
"logits/rejected": 0.6696754693984985,
|
|
"logps/chosen": -30.65346908569336,
|
|
"logps/rejected": -33.60075378417969,
|
|
"loss": 0.6784,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 0.03311777114868164,
|
|
"rewards/margins": 0.02995235286653042,
|
|
"rewards/rejected": 0.003165417117998004,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.16131064902331443,
|
|
"grad_norm": 16.125,
|
|
"learning_rate": 2.633333333333333e-07,
|
|
"logits/chosen": -0.48689335584640503,
|
|
"logits/rejected": 0.7162936925888062,
|
|
"logps/chosen": -30.42413330078125,
|
|
"logps/rejected": -33.7276611328125,
|
|
"loss": 0.6722,
|
|
"rewards/accuracies": 0.871874988079071,
|
|
"rewards/chosen": 0.042802099138498306,
|
|
"rewards/margins": 0.04268326610326767,
|
|
"rewards/rejected": 0.0001188320602523163,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.18147448015122875,
|
|
"grad_norm": 15.0625,
|
|
"learning_rate": 2.966666666666667e-07,
|
|
"logits/chosen": -0.6490235328674316,
|
|
"logits/rejected": 0.663810670375824,
|
|
"logps/chosen": -30.447296142578125,
|
|
"logps/rejected": -33.84953689575195,
|
|
"loss": 0.6663,
|
|
"rewards/accuracies": 0.8843749761581421,
|
|
"rewards/chosen": 0.05355549976229668,
|
|
"rewards/margins": 0.05490420013666153,
|
|
"rewards/rejected": -0.0013486887328326702,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.20163831127914303,
|
|
"grad_norm": 14.6875,
|
|
"learning_rate": 3.3e-07,
|
|
"logits/chosen": -0.6071578860282898,
|
|
"logits/rejected": 0.6672302484512329,
|
|
"logps/chosen": -30.24630355834961,
|
|
"logps/rejected": -33.704193115234375,
|
|
"loss": 0.6589,
|
|
"rewards/accuracies": 0.8812500238418579,
|
|
"rewards/chosen": 0.06409426033496857,
|
|
"rewards/margins": 0.07057920843362808,
|
|
"rewards/rejected": -0.006484942976385355,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.20163831127914303,
|
|
"eval_logits/chosen": -0.8155572414398193,
|
|
"eval_logits/rejected": 0.4097934663295746,
|
|
"eval_logps/chosen": -30.2824764251709,
|
|
"eval_logps/rejected": -33.65603256225586,
|
|
"eval_loss": 0.656726062297821,
|
|
"eval_rewards/accuracies": 0.8702152967453003,
|
|
"eval_rewards/chosen": 0.06777840107679367,
|
|
"eval_rewards/margins": 0.0754016563296318,
|
|
"eval_rewards/rejected": -0.007623251993209124,
|
|
"eval_runtime": 44.5266,
|
|
"eval_samples_per_second": 37.506,
|
|
"eval_steps_per_second": 9.388,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.22180214240705734,
|
|
"grad_norm": 15.5,
|
|
"learning_rate": 3.6333333333333333e-07,
|
|
"logits/chosen": -0.6406155824661255,
|
|
"logits/rejected": 0.6143032312393188,
|
|
"logps/chosen": -30.176578521728516,
|
|
"logps/rejected": -33.91298294067383,
|
|
"loss": 0.6486,
|
|
"rewards/accuracies": 0.8999999761581421,
|
|
"rewards/chosen": 0.07676664739847183,
|
|
"rewards/margins": 0.09246878325939178,
|
|
"rewards/rejected": -0.0157021377235651,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.24196597353497165,
|
|
"grad_norm": 16.5,
|
|
"learning_rate": 3.9666666666666665e-07,
|
|
"logits/chosen": -0.6949701905250549,
|
|
"logits/rejected": 0.5297445058822632,
|
|
"logps/chosen": -30.219324111938477,
|
|
"logps/rejected": -33.860145568847656,
|
|
"loss": 0.639,
|
|
"rewards/accuracies": 0.871874988079071,
|
|
"rewards/chosen": 0.0840606540441513,
|
|
"rewards/margins": 0.11379051208496094,
|
|
"rewards/rejected": -0.02972986176609993,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.26212980466288593,
|
|
"grad_norm": 15.875,
|
|
"learning_rate": 4.2999999999999996e-07,
|
|
"logits/chosen": -0.8164470791816711,
|
|
"logits/rejected": 0.4915032386779785,
|
|
"logps/chosen": -29.9921817779541,
|
|
"logps/rejected": -34.11717224121094,
|
|
"loss": 0.6243,
|
|
"rewards/accuracies": 0.871874988079071,
|
|
"rewards/chosen": 0.09985624253749847,
|
|
"rewards/margins": 0.14640672504901886,
|
|
"rewards/rejected": -0.04655047133564949,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.28229363579080025,
|
|
"grad_norm": 17.25,
|
|
"learning_rate": 4.633333333333333e-07,
|
|
"logits/chosen": -0.8576955795288086,
|
|
"logits/rejected": 0.475827693939209,
|
|
"logps/chosen": -29.987823486328125,
|
|
"logps/rejected": -34.64146041870117,
|
|
"loss": 0.606,
|
|
"rewards/accuracies": 0.8687499761581421,
|
|
"rewards/chosen": 0.10108546912670135,
|
|
"rewards/margins": 0.1895439326763153,
|
|
"rewards/rejected": -0.08845846354961395,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.30245746691871456,
|
|
"grad_norm": 15.75,
|
|
"learning_rate": 4.966666666666666e-07,
|
|
"logits/chosen": -0.9962993860244751,
|
|
"logits/rejected": 0.2535431385040283,
|
|
"logps/chosen": -30.001697540283203,
|
|
"logps/rejected": -34.91169357299805,
|
|
"loss": 0.5907,
|
|
"rewards/accuracies": 0.8609374761581421,
|
|
"rewards/chosen": 0.0953492671251297,
|
|
"rewards/margins": 0.23015904426574707,
|
|
"rewards/rejected": -0.13480977714061737,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.32262129804662887,
|
|
"grad_norm": 19.5,
|
|
"learning_rate": 4.869942196531791e-07,
|
|
"logits/chosen": -1.1013362407684326,
|
|
"logits/rejected": 0.16825838387012482,
|
|
"logps/chosen": -30.271495819091797,
|
|
"logps/rejected": -35.96307373046875,
|
|
"loss": 0.558,
|
|
"rewards/accuracies": 0.8812500238418579,
|
|
"rewards/chosen": 0.07991272956132889,
|
|
"rewards/margins": 0.3163560628890991,
|
|
"rewards/rejected": -0.23644332587718964,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.3427851291745432,
|
|
"grad_norm": 20.0,
|
|
"learning_rate": 4.7254335260115607e-07,
|
|
"logits/chosen": -1.3539522886276245,
|
|
"logits/rejected": -0.050642382353544235,
|
|
"logps/chosen": -30.70322608947754,
|
|
"logps/rejected": -36.963932037353516,
|
|
"loss": 0.5439,
|
|
"rewards/accuracies": 0.854687511920929,
|
|
"rewards/chosen": 0.04025987908244133,
|
|
"rewards/margins": 0.3653072416782379,
|
|
"rewards/rejected": -0.3250473737716675,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.3629489603024575,
|
|
"grad_norm": 19.375,
|
|
"learning_rate": 4.5809248554913295e-07,
|
|
"logits/chosen": -1.5499536991119385,
|
|
"logits/rejected": -0.12814846634864807,
|
|
"logps/chosen": -30.65024185180664,
|
|
"logps/rejected": -38.008567810058594,
|
|
"loss": 0.5077,
|
|
"rewards/accuracies": 0.878125011920929,
|
|
"rewards/chosen": 0.030102571472525597,
|
|
"rewards/margins": 0.46569380164146423,
|
|
"rewards/rejected": -0.4355912208557129,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.38311279143037175,
|
|
"grad_norm": 21.75,
|
|
"learning_rate": 4.436416184971098e-07,
|
|
"logits/chosen": -1.6617387533187866,
|
|
"logits/rejected": -0.32283174991607666,
|
|
"logps/chosen": -30.94146156311035,
|
|
"logps/rejected": -39.50160598754883,
|
|
"loss": 0.4704,
|
|
"rewards/accuracies": 0.895312488079071,
|
|
"rewards/chosen": 0.004333639983087778,
|
|
"rewards/margins": 0.5781550407409668,
|
|
"rewards/rejected": -0.5738214254379272,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.40327662255828606,
|
|
"grad_norm": 14.6875,
|
|
"learning_rate": 4.291907514450867e-07,
|
|
"logits/chosen": -1.791582703590393,
|
|
"logits/rejected": -0.44964680075645447,
|
|
"logps/chosen": -31.526784896850586,
|
|
"logps/rejected": -39.81529998779297,
|
|
"loss": 0.4811,
|
|
"rewards/accuracies": 0.864062488079071,
|
|
"rewards/chosen": -0.03861772269010544,
|
|
"rewards/margins": 0.56865394115448,
|
|
"rewards/rejected": -0.607271671295166,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.40327662255828606,
|
|
"eval_logits/chosen": -1.988352656364441,
|
|
"eval_logits/rejected": -0.7473806738853455,
|
|
"eval_logps/chosen": -31.514951705932617,
|
|
"eval_logps/rejected": -40.30702209472656,
|
|
"eval_loss": 0.4655759632587433,
|
|
"eval_rewards/accuracies": 0.8690191507339478,
|
|
"eval_rewards/chosen": -0.05546921119093895,
|
|
"eval_rewards/margins": 0.6172530651092529,
|
|
"eval_rewards/rejected": -0.6727222204208374,
|
|
"eval_runtime": 44.074,
|
|
"eval_samples_per_second": 37.891,
|
|
"eval_steps_per_second": 9.484,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.42344045368620037,
|
|
"grad_norm": 17.875,
|
|
"learning_rate": 4.1473988439306354e-07,
|
|
"logits/chosen": -1.9743998050689697,
|
|
"logits/rejected": -0.7126643061637878,
|
|
"logps/chosen": -31.816967010498047,
|
|
"logps/rejected": -40.79069519042969,
|
|
"loss": 0.4591,
|
|
"rewards/accuracies": 0.879687488079071,
|
|
"rewards/chosen": -0.06654059141874313,
|
|
"rewards/margins": 0.6414004564285278,
|
|
"rewards/rejected": -0.7079410552978516,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.4436042848141147,
|
|
"grad_norm": 15.875,
|
|
"learning_rate": 4.002890173410404e-07,
|
|
"logits/chosen": -1.9798154830932617,
|
|
"logits/rejected": -0.7139844298362732,
|
|
"logps/chosen": -32.13506317138672,
|
|
"logps/rejected": -41.47466278076172,
|
|
"loss": 0.4525,
|
|
"rewards/accuracies": 0.8656250238418579,
|
|
"rewards/chosen": -0.0931621789932251,
|
|
"rewards/margins": 0.6879409551620483,
|
|
"rewards/rejected": -0.7811031341552734,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.463768115942029,
|
|
"grad_norm": 20.0,
|
|
"learning_rate": 3.8583815028901736e-07,
|
|
"logits/chosen": -2.0034127235412598,
|
|
"logits/rejected": -0.7428504228591919,
|
|
"logps/chosen": -31.648120880126953,
|
|
"logps/rejected": -42.38803482055664,
|
|
"loss": 0.4149,
|
|
"rewards/accuracies": 0.893750011920929,
|
|
"rewards/chosen": -0.07446546852588654,
|
|
"rewards/margins": 0.8017433285713196,
|
|
"rewards/rejected": -0.8762086629867554,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.4839319470699433,
|
|
"grad_norm": 18.25,
|
|
"learning_rate": 3.713872832369942e-07,
|
|
"logits/chosen": -2.0142064094543457,
|
|
"logits/rejected": -0.9079702496528625,
|
|
"logps/chosen": -32.108821868896484,
|
|
"logps/rejected": -43.06040954589844,
|
|
"loss": 0.4058,
|
|
"rewards/accuracies": 0.8890625238418579,
|
|
"rewards/chosen": -0.10588344186544418,
|
|
"rewards/margins": 0.8354071378707886,
|
|
"rewards/rejected": -0.941290557384491,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.5040957781978576,
|
|
"grad_norm": 16.5,
|
|
"learning_rate": 3.5693641618497107e-07,
|
|
"logits/chosen": -2.275698184967041,
|
|
"logits/rejected": -1.122511863708496,
|
|
"logps/chosen": -32.408470153808594,
|
|
"logps/rejected": -43.69465255737305,
|
|
"loss": 0.4089,
|
|
"rewards/accuracies": 0.875,
|
|
"rewards/chosen": -0.13990476727485657,
|
|
"rewards/margins": 0.8530368804931641,
|
|
"rewards/rejected": -0.992941677570343,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.5242596093257719,
|
|
"grad_norm": 13.9375,
|
|
"learning_rate": 3.4248554913294795e-07,
|
|
"logits/chosen": -2.2892613410949707,
|
|
"logits/rejected": -1.177565336227417,
|
|
"logps/chosen": -32.277252197265625,
|
|
"logps/rejected": -44.352439880371094,
|
|
"loss": 0.3905,
|
|
"rewards/accuracies": 0.8687499761581421,
|
|
"rewards/chosen": -0.13188917934894562,
|
|
"rewards/margins": 0.9304295778274536,
|
|
"rewards/rejected": -1.0623188018798828,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.5444234404536862,
|
|
"grad_norm": 16.875,
|
|
"learning_rate": 3.2803468208092484e-07,
|
|
"logits/chosen": -2.309782028198242,
|
|
"logits/rejected": -1.1110422611236572,
|
|
"logps/chosen": -32.4368896484375,
|
|
"logps/rejected": -44.47226333618164,
|
|
"loss": 0.3876,
|
|
"rewards/accuracies": 0.8734375238418579,
|
|
"rewards/chosen": -0.14435531198978424,
|
|
"rewards/margins": 0.945163369178772,
|
|
"rewards/rejected": -1.0895185470581055,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.5645872715816005,
|
|
"grad_norm": 20.0,
|
|
"learning_rate": 3.135838150289017e-07,
|
|
"logits/chosen": -2.4775633811950684,
|
|
"logits/rejected": -1.2171634435653687,
|
|
"logps/chosen": -31.85956382751465,
|
|
"logps/rejected": -44.78974151611328,
|
|
"loss": 0.3722,
|
|
"rewards/accuracies": 0.893750011920929,
|
|
"rewards/chosen": -0.10046832263469696,
|
|
"rewards/margins": 1.0068700313568115,
|
|
"rewards/rejected": -1.107338309288025,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.5847511027095148,
|
|
"grad_norm": 15.3125,
|
|
"learning_rate": 2.991329479768786e-07,
|
|
"logits/chosen": -2.3516058921813965,
|
|
"logits/rejected": -1.2547065019607544,
|
|
"logps/chosen": -32.33803939819336,
|
|
"logps/rejected": -44.54314422607422,
|
|
"loss": 0.3877,
|
|
"rewards/accuracies": 0.8734375238418579,
|
|
"rewards/chosen": -0.1320447474718094,
|
|
"rewards/margins": 0.960769534111023,
|
|
"rewards/rejected": -1.0928142070770264,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.6049149338374291,
|
|
"grad_norm": 14.3125,
|
|
"learning_rate": 2.846820809248555e-07,
|
|
"logits/chosen": -2.5609071254730225,
|
|
"logits/rejected": -1.3568521738052368,
|
|
"logps/chosen": -31.781259536743164,
|
|
"logps/rejected": -45.361305236816406,
|
|
"loss": 0.3534,
|
|
"rewards/accuracies": 0.8968750238418579,
|
|
"rewards/chosen": -0.0874972864985466,
|
|
"rewards/margins": 1.0803875923156738,
|
|
"rewards/rejected": -1.1678849458694458,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.6049149338374291,
|
|
"eval_logits/chosen": -2.652808666229248,
|
|
"eval_logits/rejected": -1.5094201564788818,
|
|
"eval_logps/chosen": -32.167049407958984,
|
|
"eval_logps/rejected": -45.16587829589844,
|
|
"eval_loss": 0.37229523062705994,
|
|
"eval_rewards/accuracies": 0.8755980730056763,
|
|
"eval_rewards/chosen": -0.12067891657352448,
|
|
"eval_rewards/margins": 1.0379289388656616,
|
|
"eval_rewards/rejected": -1.158607840538025,
|
|
"eval_runtime": 44.3186,
|
|
"eval_samples_per_second": 37.682,
|
|
"eval_steps_per_second": 9.432,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.6250787649653434,
|
|
"grad_norm": 12.125,
|
|
"learning_rate": 2.7023121387283236e-07,
|
|
"logits/chosen": -2.574951410293579,
|
|
"logits/rejected": -1.4039162397384644,
|
|
"logps/chosen": -32.359107971191406,
|
|
"logps/rejected": -45.18638229370117,
|
|
"loss": 0.3833,
|
|
"rewards/accuracies": 0.870312511920929,
|
|
"rewards/chosen": -0.12439367920160294,
|
|
"rewards/margins": 1.0330053567886353,
|
|
"rewards/rejected": -1.1573989391326904,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.6452425960932577,
|
|
"grad_norm": 16.125,
|
|
"learning_rate": 2.5578034682080925e-07,
|
|
"logits/chosen": -2.5044591426849365,
|
|
"logits/rejected": -1.4151450395584106,
|
|
"logps/chosen": -32.13268280029297,
|
|
"logps/rejected": -45.22514343261719,
|
|
"loss": 0.3719,
|
|
"rewards/accuracies": 0.875,
|
|
"rewards/chosen": -0.11237072944641113,
|
|
"rewards/margins": 1.047181487083435,
|
|
"rewards/rejected": -1.1595523357391357,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.665406427221172,
|
|
"grad_norm": 15.0,
|
|
"learning_rate": 2.4132947976878613e-07,
|
|
"logits/chosen": -2.631962776184082,
|
|
"logits/rejected": -1.3861881494522095,
|
|
"logps/chosen": -31.74435043334961,
|
|
"logps/rejected": -45.44641876220703,
|
|
"loss": 0.3586,
|
|
"rewards/accuracies": 0.885937511920929,
|
|
"rewards/chosen": -0.074491485953331,
|
|
"rewards/margins": 1.1014248132705688,
|
|
"rewards/rejected": -1.1759161949157715,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.6855702583490864,
|
|
"grad_norm": 16.0,
|
|
"learning_rate": 2.26878612716763e-07,
|
|
"logits/chosen": -2.5997676849365234,
|
|
"logits/rejected": -1.4487764835357666,
|
|
"logps/chosen": -31.48516845703125,
|
|
"logps/rejected": -45.57600021362305,
|
|
"loss": 0.3413,
|
|
"rewards/accuracies": 0.903124988079071,
|
|
"rewards/chosen": -0.054619044065475464,
|
|
"rewards/margins": 1.1495531797409058,
|
|
"rewards/rejected": -1.2041722536087036,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.7057340894770007,
|
|
"grad_norm": 12.9375,
|
|
"learning_rate": 2.1242774566473987e-07,
|
|
"logits/chosen": -2.581223726272583,
|
|
"logits/rejected": -1.451586127281189,
|
|
"logps/chosen": -31.826080322265625,
|
|
"logps/rejected": -45.72737503051758,
|
|
"loss": 0.3609,
|
|
"rewards/accuracies": 0.879687488079071,
|
|
"rewards/chosen": -0.08685452491044998,
|
|
"rewards/margins": 1.1219041347503662,
|
|
"rewards/rejected": -1.2087585926055908,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.725897920604915,
|
|
"grad_norm": 14.125,
|
|
"learning_rate": 1.9797687861271675e-07,
|
|
"logits/chosen": -2.6039886474609375,
|
|
"logits/rejected": -1.593008279800415,
|
|
"logps/chosen": -32.02338790893555,
|
|
"logps/rejected": -45.7207145690918,
|
|
"loss": 0.3591,
|
|
"rewards/accuracies": 0.8812500238418579,
|
|
"rewards/chosen": -0.09107818454504013,
|
|
"rewards/margins": 1.1208597421646118,
|
|
"rewards/rejected": -1.2119379043579102,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.7460617517328293,
|
|
"grad_norm": 14.3125,
|
|
"learning_rate": 1.8352601156069363e-07,
|
|
"logits/chosen": -2.6979289054870605,
|
|
"logits/rejected": -1.5796029567718506,
|
|
"logps/chosen": -31.300853729248047,
|
|
"logps/rejected": -45.72681427001953,
|
|
"loss": 0.3522,
|
|
"rewards/accuracies": 0.878125011920929,
|
|
"rewards/chosen": -0.03190985321998596,
|
|
"rewards/margins": 1.1756919622421265,
|
|
"rewards/rejected": -1.20760178565979,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.7662255828607435,
|
|
"grad_norm": 10.875,
|
|
"learning_rate": 1.690751445086705e-07,
|
|
"logits/chosen": -2.669769525527954,
|
|
"logits/rejected": -1.4769527912139893,
|
|
"logps/chosen": -30.98702049255371,
|
|
"logps/rejected": -46.33720779418945,
|
|
"loss": 0.3226,
|
|
"rewards/accuracies": 0.90625,
|
|
"rewards/chosen": -0.009888170287013054,
|
|
"rewards/margins": 1.2525317668914795,
|
|
"rewards/rejected": -1.2624199390411377,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.7863894139886578,
|
|
"grad_norm": 12.5,
|
|
"learning_rate": 1.546242774566474e-07,
|
|
"logits/chosen": -2.6219515800476074,
|
|
"logits/rejected": -1.4883930683135986,
|
|
"logps/chosen": -31.58437156677246,
|
|
"logps/rejected": -46.32578659057617,
|
|
"loss": 0.348,
|
|
"rewards/accuracies": 0.875,
|
|
"rewards/chosen": -0.05657508969306946,
|
|
"rewards/margins": 1.205727458000183,
|
|
"rewards/rejected": -1.2623026371002197,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.8065532451165721,
|
|
"grad_norm": 13.4375,
|
|
"learning_rate": 1.4017341040462428e-07,
|
|
"logits/chosen": -2.6918234825134277,
|
|
"logits/rejected": -1.4419605731964111,
|
|
"logps/chosen": -31.05796241760254,
|
|
"logps/rejected": -45.93518829345703,
|
|
"loss": 0.3322,
|
|
"rewards/accuracies": 0.8890625238418579,
|
|
"rewards/chosen": -0.011668933555483818,
|
|
"rewards/margins": 1.2215235233306885,
|
|
"rewards/rejected": -1.2331925630569458,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.8065532451165721,
|
|
"eval_logits/chosen": -2.7881991863250732,
|
|
"eval_logits/rejected": -1.6745110750198364,
|
|
"eval_logps/chosen": -31.51004981994629,
|
|
"eval_logps/rejected": -45.77407455444336,
|
|
"eval_loss": 0.3527255356311798,
|
|
"eval_rewards/accuracies": 0.8761961460113525,
|
|
"eval_rewards/chosen": -0.05497899651527405,
|
|
"eval_rewards/margins": 1.164448618888855,
|
|
"eval_rewards/rejected": -1.219427466392517,
|
|
"eval_runtime": 43.521,
|
|
"eval_samples_per_second": 38.372,
|
|
"eval_steps_per_second": 9.605,
|
|
"step": 400
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 496,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 100,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|