Model: jackf857/qwen3-8b-base-epsilon-dpo-ultrafeedback-4xh200-batch-128 Source: Original Platform
1392 lines
57 KiB
JSON
1392 lines
57 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.9989528795811519,
|
|
"eval_steps": 200,
|
|
"global_step": 477,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0020942408376963353,
|
|
"epsilon_dpo/beta": 0.009997084736824036,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.499971479177475,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.001938261673785746,
|
|
"epsilon_dpo/beta_margin_mean": 0.0001140289386967197,
|
|
"epsilon_dpo/beta_margin_std": 0.007753193378448486,
|
|
"epsilon_dpo/loss_margin_mean": 0.01704716682434082,
|
|
"grad_norm": 14.606449127197266,
|
|
"kl/avg_steps": 0.0390625,
|
|
"kl/beta": 0.009999999776482582,
|
|
"kl/n_epsilon_steps": 0.4765625,
|
|
"kl/p_epsilon_steps": 0.515625,
|
|
"learning_rate": 0.0,
|
|
"logits/chosen": 2.6271941661834717,
|
|
"logits/rejected": 2.237529993057251,
|
|
"logps/chosen": -267.3031921386719,
|
|
"logps/ref_chosen": -267.2525634765625,
|
|
"logps/ref_rejected": -219.97085571289062,
|
|
"logps/rejected": -220.0385284423828,
|
|
"loss": 5.5448,
|
|
"rewards/accuracies": 0.5546875,
|
|
"rewards/chosen": -0.0005317605682648718,
|
|
"rewards/margins": 0.00011403978714952245,
|
|
"rewards/rejected": -0.0006458003772422671,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.020942408376963352,
|
|
"epsilon_dpo/beta": 0.010005515068769455,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.5000517964363098,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.0022904376965016127,
|
|
"epsilon_dpo/beta_margin_mean": -0.0002073091600323096,
|
|
"epsilon_dpo/beta_margin_std": 0.009162054397165775,
|
|
"epsilon_dpo/loss_margin_mean": -0.014141757972538471,
|
|
"grad_norm": 13.806034088134766,
|
|
"kl/avg_steps": -0.013888888992369175,
|
|
"kl/beta": 0.010003137402236462,
|
|
"kl/n_epsilon_steps": 0.5017361044883728,
|
|
"kl/p_epsilon_steps": 0.4878472089767456,
|
|
"learning_rate": 9.375e-08,
|
|
"logits/chosen": 2.67746639251709,
|
|
"logits/rejected": 2.7837536334991455,
|
|
"logps/chosen": -282.07965087890625,
|
|
"logps/ref_chosen": -282.07989501953125,
|
|
"logps/ref_rejected": -261.4595642089844,
|
|
"logps/rejected": -261.4451904296875,
|
|
"loss": 5.5461,
|
|
"rewards/accuracies": 0.4913194477558136,
|
|
"rewards/chosen": -3.148229734506458e-05,
|
|
"rewards/margins": -0.00020731209951918572,
|
|
"rewards/rejected": 0.0001758297876222059,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.041884816753926704,
|
|
"epsilon_dpo/beta": 0.010010017082095146,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.5000718235969543,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.0022132620215415955,
|
|
"epsilon_dpo/beta_margin_mean": -0.0002871893811970949,
|
|
"epsilon_dpo/beta_margin_std": 0.008853326551616192,
|
|
"epsilon_dpo/loss_margin_mean": -0.02227994240820408,
|
|
"grad_norm": 15.510866165161133,
|
|
"kl/avg_steps": -0.02421874925494194,
|
|
"kl/beta": 0.010006600990891457,
|
|
"kl/n_epsilon_steps": 0.5078125,
|
|
"kl/p_epsilon_steps": 0.48359376192092896,
|
|
"learning_rate": 1.9791666666666664e-07,
|
|
"logits/chosen": 2.541713237762451,
|
|
"logits/rejected": 2.75179123878479,
|
|
"logps/chosen": -278.8614196777344,
|
|
"logps/ref_chosen": -278.8597106933594,
|
|
"logps/ref_rejected": -257.1719055175781,
|
|
"logps/rejected": -257.1513671875,
|
|
"loss": 5.5464,
|
|
"rewards/accuracies": 0.48515623807907104,
|
|
"rewards/chosen": -5.0874834414571524e-05,
|
|
"rewards/margins": -0.0002871867036446929,
|
|
"rewards/rejected": 0.00023631185467820615,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.06282722513089005,
|
|
"epsilon_dpo/beta": 0.01001377496868372,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.4998772144317627,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.0022012609988451004,
|
|
"epsilon_dpo/beta_margin_mean": 0.000491045939270407,
|
|
"epsilon_dpo/beta_margin_std": 0.008805298246443272,
|
|
"epsilon_dpo/loss_margin_mean": 0.055501788854599,
|
|
"grad_norm": 14.358946800231934,
|
|
"kl/avg_steps": 0.07187499850988388,
|
|
"kl/beta": 0.010019981302320957,
|
|
"kl/n_epsilon_steps": 0.45703125,
|
|
"kl/p_epsilon_steps": 0.5289062261581421,
|
|
"learning_rate": 3.020833333333333e-07,
|
|
"logits/chosen": 2.639504909515381,
|
|
"logits/rejected": 2.8058505058288574,
|
|
"logps/chosen": -273.9162902832031,
|
|
"logps/ref_chosen": -273.97674560546875,
|
|
"logps/ref_rejected": -257.2232360839844,
|
|
"logps/rejected": -257.2182922363281,
|
|
"loss": 5.5433,
|
|
"rewards/accuracies": 0.54296875,
|
|
"rewards/chosen": 0.0005733909783884883,
|
|
"rewards/margins": 0.0004910477437078953,
|
|
"rewards/rejected": 8.234316919697449e-05,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.08376963350785341,
|
|
"epsilon_dpo/beta": 0.009926706552505493,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.49948254227638245,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.0024200372863560915,
|
|
"epsilon_dpo/beta_margin_mean": 0.0020698602311313152,
|
|
"epsilon_dpo/beta_margin_std": 0.009680529125034809,
|
|
"epsilon_dpo/loss_margin_mean": 0.21598558127880096,
|
|
"grad_norm": 14.699762344360352,
|
|
"kl/avg_steps": 0.11953125149011612,
|
|
"kl/beta": 0.009937574155628681,
|
|
"kl/n_epsilon_steps": 0.4351562559604645,
|
|
"kl/p_epsilon_steps": 0.5546875,
|
|
"learning_rate": 4.0625e-07,
|
|
"logits/chosen": 2.59186053276062,
|
|
"logits/rejected": 2.7942440509796143,
|
|
"logps/chosen": -280.52899169921875,
|
|
"logps/ref_chosen": -280.8274841308594,
|
|
"logps/ref_rejected": -258.9448547363281,
|
|
"logps/rejected": -258.8622741699219,
|
|
"loss": 5.537,
|
|
"rewards/accuracies": 0.5726562738418579,
|
|
"rewards/chosen": 0.0029196988325566053,
|
|
"rewards/margins": 0.002069863025099039,
|
|
"rewards/rejected": 0.0008498359238728881,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.10471204188481675,
|
|
"epsilon_dpo/beta": 0.009684694930911064,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.4989333748817444,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.0033105709590017796,
|
|
"epsilon_dpo/beta_margin_mean": 0.004266691394150257,
|
|
"epsilon_dpo/beta_margin_std": 0.013243382796645164,
|
|
"epsilon_dpo/loss_margin_mean": 0.4500531256198883,
|
|
"grad_norm": 14.027534484863281,
|
|
"kl/avg_steps": 0.30390626192092896,
|
|
"kl/beta": 0.009713245555758476,
|
|
"kl/n_epsilon_steps": 0.34453123807907104,
|
|
"kl/p_epsilon_steps": 0.6484375,
|
|
"learning_rate": 4.999932966293553e-07,
|
|
"logits/chosen": 2.47767972946167,
|
|
"logits/rejected": 2.8026018142700195,
|
|
"logps/chosen": -277.54425048828125,
|
|
"logps/ref_chosen": -278.20208740234375,
|
|
"logps/ref_rejected": -265.7288818359375,
|
|
"logps/rejected": -265.5211181640625,
|
|
"loss": 5.5283,
|
|
"rewards/accuracies": 0.649218738079071,
|
|
"rewards/chosen": 0.006310028024017811,
|
|
"rewards/margins": 0.00426669092848897,
|
|
"rewards/rejected": 0.0020433368626981974,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.1256544502617801,
|
|
"epsilon_dpo/beta": 0.009375964291393757,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.4979146420955658,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.0050841751508414745,
|
|
"epsilon_dpo/beta_margin_mean": 0.008342581801116467,
|
|
"epsilon_dpo/beta_margin_std": 0.02034146524965763,
|
|
"epsilon_dpo/loss_margin_mean": 0.9050939679145813,
|
|
"grad_norm": 13.532852172851562,
|
|
"kl/avg_steps": 0.35546875,
|
|
"kl/beta": 0.009408445097506046,
|
|
"kl/n_epsilon_steps": 0.3187499940395355,
|
|
"kl/p_epsilon_steps": 0.6742187738418579,
|
|
"learning_rate": 4.991893270335525e-07,
|
|
"logits/chosen": 2.488196849822998,
|
|
"logits/rejected": 2.7562973499298096,
|
|
"logps/chosen": -267.5882263183594,
|
|
"logps/ref_chosen": -268.90765380859375,
|
|
"logps/ref_rejected": -259.67926025390625,
|
|
"logps/rejected": -259.2649230957031,
|
|
"loss": 5.5123,
|
|
"rewards/accuracies": 0.676562488079071,
|
|
"rewards/chosen": 0.012289796955883503,
|
|
"rewards/margins": 0.008342583663761616,
|
|
"rewards/rejected": 0.003947213292121887,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.14659685863874344,
|
|
"epsilon_dpo/beta": 0.009031310677528381,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.4967042803764343,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.00740186357870698,
|
|
"epsilon_dpo/beta_margin_mean": 0.013186539523303509,
|
|
"epsilon_dpo/beta_margin_std": 0.029618557542562485,
|
|
"epsilon_dpo/loss_margin_mean": 1.483746886253357,
|
|
"grad_norm": 13.820236206054688,
|
|
"kl/avg_steps": 0.3890624940395355,
|
|
"kl/beta": 0.009065655060112476,
|
|
"kl/n_epsilon_steps": 0.30078125,
|
|
"kl/p_epsilon_steps": 0.6898437738418579,
|
|
"learning_rate": 4.970496218214204e-07,
|
|
"logits/chosen": 2.474260091781616,
|
|
"logits/rejected": 2.7694077491760254,
|
|
"logps/chosen": -267.3814392089844,
|
|
"logps/ref_chosen": -269.73370361328125,
|
|
"logps/ref_rejected": -258.15594482421875,
|
|
"logps/rejected": -257.28741455078125,
|
|
"loss": 5.4935,
|
|
"rewards/accuracies": 0.702343761920929,
|
|
"rewards/chosen": 0.02110612951219082,
|
|
"rewards/margins": 0.013186539523303509,
|
|
"rewards/rejected": 0.00791959185153246,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.16753926701570682,
|
|
"epsilon_dpo/beta": 0.008663726039230824,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.49476176500320435,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.01098305732011795,
|
|
"epsilon_dpo/beta_margin_mean": 0.020962897688150406,
|
|
"epsilon_dpo/beta_margin_std": 0.04398656636476517,
|
|
"epsilon_dpo/loss_margin_mean": 2.4553990364074707,
|
|
"grad_norm": 13.310928344726562,
|
|
"kl/avg_steps": 0.4117187559604645,
|
|
"kl/beta": 0.008698700927197933,
|
|
"kl/n_epsilon_steps": 0.28984373807907104,
|
|
"kl/p_epsilon_steps": 0.7015625238418579,
|
|
"learning_rate": 4.935856505068998e-07,
|
|
"logits/chosen": 2.4028592109680176,
|
|
"logits/rejected": 2.7112083435058594,
|
|
"logps/chosen": -268.78997802734375,
|
|
"logps/ref_chosen": -273.09210205078125,
|
|
"logps/ref_rejected": -259.3874816894531,
|
|
"logps/rejected": -257.54071044921875,
|
|
"loss": 5.4638,
|
|
"rewards/accuracies": 0.703906238079071,
|
|
"rewards/chosen": 0.03706257790327072,
|
|
"rewards/margins": 0.020962897688150406,
|
|
"rewards/rejected": 0.016099678352475166,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.18848167539267016,
|
|
"epsilon_dpo/beta": 0.008329156786203384,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.49337729811668396,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.013919507153332233,
|
|
"epsilon_dpo/beta_margin_mean": 0.026513313874602318,
|
|
"epsilon_dpo/beta_margin_std": 0.05574870854616165,
|
|
"epsilon_dpo/loss_margin_mean": 3.229220151901245,
|
|
"grad_norm": 12.768597602844238,
|
|
"kl/avg_steps": 0.40625,
|
|
"kl/beta": 0.008362272754311562,
|
|
"kl/n_epsilon_steps": 0.29374998807907104,
|
|
"kl/p_epsilon_steps": 0.699999988079071,
|
|
"learning_rate": 4.8881598109976e-07,
|
|
"logits/chosen": 2.430711030960083,
|
|
"logits/rejected": 2.644582748413086,
|
|
"logps/chosen": -263.22772216796875,
|
|
"logps/ref_chosen": -270.48480224609375,
|
|
"logps/ref_rejected": -259.2120361328125,
|
|
"logps/rejected": -255.18417358398438,
|
|
"loss": 5.443,
|
|
"rewards/accuracies": 0.702343761920929,
|
|
"rewards/chosen": 0.0601632222533226,
|
|
"rewards/margins": 0.02651331201195717,
|
|
"rewards/rejected": 0.03364991024136543,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.2094240837696335,
|
|
"epsilon_dpo/beta": 0.008008182048797607,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.4916536211967468,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.01792542263865471,
|
|
"epsilon_dpo/beta_margin_mean": 0.03343886882066727,
|
|
"epsilon_dpo/beta_margin_std": 0.07184432446956635,
|
|
"epsilon_dpo/loss_margin_mean": 4.237745761871338,
|
|
"grad_norm": 12.262528419494629,
|
|
"kl/avg_steps": 0.3812499940395355,
|
|
"kl/beta": 0.00803801417350769,
|
|
"kl/n_epsilon_steps": 0.3031249940395355,
|
|
"kl/p_epsilon_steps": 0.684374988079071,
|
|
"learning_rate": 4.827661805750437e-07,
|
|
"logits/chosen": 2.3381965160369873,
|
|
"logits/rejected": 2.474226236343384,
|
|
"logps/chosen": -262.87408447265625,
|
|
"logps/ref_chosen": -272.49383544921875,
|
|
"logps/ref_rejected": -255.8369598388672,
|
|
"logps/rejected": -250.4550018310547,
|
|
"loss": 5.4178,
|
|
"rewards/accuracies": 0.6953125,
|
|
"rewards/chosen": 0.0767994076013565,
|
|
"rewards/margins": 0.033438872545957565,
|
|
"rewards/rejected": 0.04336053133010864,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.23036649214659685,
|
|
"epsilon_dpo/beta": 0.007680200040340424,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.4877113699913025,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.02195078134536743,
|
|
"epsilon_dpo/beta_margin_mean": 0.04926630109548569,
|
|
"epsilon_dpo/beta_margin_std": 0.08810068666934967,
|
|
"epsilon_dpo/loss_margin_mean": 6.498995780944824,
|
|
"grad_norm": 12.287609100341797,
|
|
"kl/avg_steps": 0.44140625,
|
|
"kl/beta": 0.007713483180850744,
|
|
"kl/n_epsilon_steps": 0.2718749940395355,
|
|
"kl/p_epsilon_steps": 0.7132812738418579,
|
|
"learning_rate": 4.75468677825789e-07,
|
|
"logits/chosen": 2.2321219444274902,
|
|
"logits/rejected": 2.585568904876709,
|
|
"logps/chosen": -263.58843994140625,
|
|
"logps/ref_chosen": -272.6753845214844,
|
|
"logps/ref_rejected": -260.817138671875,
|
|
"logps/rejected": -258.2291564941406,
|
|
"loss": 5.3585,
|
|
"rewards/accuracies": 0.7320312261581421,
|
|
"rewards/chosen": 0.06958577036857605,
|
|
"rewards/margins": 0.04926629737019539,
|
|
"rewards/rejected": 0.020319465547800064,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.2513089005235602,
|
|
"epsilon_dpo/beta": 0.007364341057837009,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.4861171245574951,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.027931923046708107,
|
|
"epsilon_dpo/beta_margin_mean": 0.05574618652462959,
|
|
"epsilon_dpo/beta_margin_std": 0.11227792501449585,
|
|
"epsilon_dpo/loss_margin_mean": 7.674368381500244,
|
|
"grad_norm": 12.68581485748291,
|
|
"kl/avg_steps": 0.3984375,
|
|
"kl/beta": 0.007393070962280035,
|
|
"kl/n_epsilon_steps": 0.296875,
|
|
"kl/p_epsilon_steps": 0.6953125,
|
|
"learning_rate": 4.669625898336438e-07,
|
|
"logits/chosen": 2.292116403579712,
|
|
"logits/rejected": 2.474891185760498,
|
|
"logps/chosen": -273.1396789550781,
|
|
"logps/ref_chosen": -279.50213623046875,
|
|
"logps/ref_rejected": -263.6972351074219,
|
|
"logps/rejected": -265.0091857910156,
|
|
"loss": 5.3381,
|
|
"rewards/accuracies": 0.7007812261581421,
|
|
"rewards/chosen": 0.046533744782209396,
|
|
"rewards/margins": 0.05574618652462959,
|
|
"rewards/rejected": -0.009212437085807323,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.27225130890052357,
|
|
"epsilon_dpo/beta": 0.007093364838510752,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.4820740818977356,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.03345402330160141,
|
|
"epsilon_dpo/beta_margin_mean": 0.07208652794361115,
|
|
"epsilon_dpo/beta_margin_std": 0.13469013571739197,
|
|
"epsilon_dpo/loss_margin_mean": 10.307097434997559,
|
|
"grad_norm": 15.22977352142334,
|
|
"kl/avg_steps": 0.3843750059604645,
|
|
"kl/beta": 0.0071199932135641575,
|
|
"kl/n_epsilon_steps": 0.3023437559604645,
|
|
"kl/p_epsilon_steps": 0.686718761920929,
|
|
"learning_rate": 4.5729351198915705e-07,
|
|
"logits/chosen": 2.230104923248291,
|
|
"logits/rejected": 2.4557857513427734,
|
|
"logps/chosen": -272.00311279296875,
|
|
"logps/ref_chosen": -278.95745849609375,
|
|
"logps/ref_rejected": -262.9747314453125,
|
|
"logps/rejected": -266.3275146484375,
|
|
"loss": 5.2805,
|
|
"rewards/accuracies": 0.70703125,
|
|
"rewards/chosen": 0.04882372170686722,
|
|
"rewards/margins": 0.07208652794361115,
|
|
"rewards/rejected": -0.02326280251145363,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.2931937172774869,
|
|
"epsilon_dpo/beta": 0.0068093957379460335,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.4802798628807068,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.0389549545943737,
|
|
"epsilon_dpo/beta_margin_mean": 0.07946081459522247,
|
|
"epsilon_dpo/beta_margin_std": 0.1572197675704956,
|
|
"epsilon_dpo/loss_margin_mean": 11.81810474395752,
|
|
"grad_norm": 11.451045989990234,
|
|
"kl/avg_steps": 0.40625,
|
|
"kl/beta": 0.006836493965238333,
|
|
"kl/n_epsilon_steps": 0.2906250059604645,
|
|
"kl/p_epsilon_steps": 0.6968749761581421,
|
|
"learning_rate": 4.4651327368569684e-07,
|
|
"logits/chosen": 2.035799741744995,
|
|
"logits/rejected": 2.3696587085723877,
|
|
"logps/chosen": -278.00701904296875,
|
|
"logps/ref_chosen": -282.004150390625,
|
|
"logps/ref_rejected": -268.6994934082031,
|
|
"logps/rejected": -276.5204772949219,
|
|
"loss": 5.2585,
|
|
"rewards/accuracies": 0.706250011920929,
|
|
"rewards/chosen": 0.02683289907872677,
|
|
"rewards/margins": 0.07946079969406128,
|
|
"rewards/rejected": -0.05262790992856026,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.31413612565445026,
|
|
"epsilon_dpo/beta": 0.0065385727211833,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.47647207975387573,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.04337490350008011,
|
|
"epsilon_dpo/beta_margin_mean": 0.09494680166244507,
|
|
"epsilon_dpo/beta_margin_std": 0.1755046844482422,
|
|
"epsilon_dpo/loss_margin_mean": 14.688570976257324,
|
|
"grad_norm": 12.580639839172363,
|
|
"kl/avg_steps": 0.39140623807907104,
|
|
"kl/beta": 0.006563636474311352,
|
|
"kl/n_epsilon_steps": 0.2984375059604645,
|
|
"kl/p_epsilon_steps": 0.6898437738418579,
|
|
"learning_rate": 4.346796604970912e-07,
|
|
"logits/chosen": 2.1158509254455566,
|
|
"logits/rejected": 2.3138821125030518,
|
|
"logps/chosen": -274.89691162109375,
|
|
"logps/ref_chosen": -278.5110778808594,
|
|
"logps/ref_rejected": -255.59854125976562,
|
|
"logps/rejected": -266.67291259765625,
|
|
"loss": 5.2052,
|
|
"rewards/accuracies": 0.71875,
|
|
"rewards/chosen": 0.023254716768860817,
|
|
"rewards/margins": 0.09494679421186447,
|
|
"rewards/rejected": -0.0716920793056488,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.33507853403141363,
|
|
"epsilon_dpo/beta": 0.006265554577112198,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.4711342453956604,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.04951424151659012,
|
|
"epsilon_dpo/beta_margin_mean": 0.11672033369541168,
|
|
"epsilon_dpo/beta_margin_std": 0.20064322650432587,
|
|
"epsilon_dpo/loss_margin_mean": 18.817256927490234,
|
|
"grad_norm": 12.49393367767334,
|
|
"kl/avg_steps": 0.4453125,
|
|
"kl/beta": 0.006292995996773243,
|
|
"kl/n_epsilon_steps": 0.27421873807907104,
|
|
"kl/p_epsilon_steps": 0.719531238079071,
|
|
"learning_rate": 4.218561044282098e-07,
|
|
"logits/chosen": 2.0132875442504883,
|
|
"logits/rejected": 2.3389055728912354,
|
|
"logps/chosen": -276.2854309082031,
|
|
"logps/ref_chosen": -276.8100280761719,
|
|
"logps/ref_rejected": -264.40625,
|
|
"logps/rejected": -282.6988525390625,
|
|
"loss": 5.1326,
|
|
"rewards/accuracies": 0.7250000238418579,
|
|
"rewards/chosen": 0.002674251329153776,
|
|
"rewards/margins": 0.11672033369541168,
|
|
"rewards/rejected": -0.11404608190059662,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.35602094240837695,
|
|
"epsilon_dpo/beta": 0.005999959539622068,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.46788015961647034,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.05059142783284187,
|
|
"epsilon_dpo/beta_margin_mean": 0.13001370429992676,
|
|
"epsilon_dpo/beta_margin_std": 0.2052367627620697,
|
|
"epsilon_dpo/loss_margin_mean": 21.894283294677734,
|
|
"grad_norm": 15.406351089477539,
|
|
"kl/avg_steps": 0.46875,
|
|
"kl/beta": 0.006027590483427048,
|
|
"kl/n_epsilon_steps": 0.26249998807907104,
|
|
"kl/p_epsilon_steps": 0.731249988079071,
|
|
"learning_rate": 4.081113438988443e-07,
|
|
"logits/chosen": 1.973179578781128,
|
|
"logits/rejected": 2.2208034992218018,
|
|
"logps/chosen": -282.03741455078125,
|
|
"logps/ref_chosen": -281.14337158203125,
|
|
"logps/ref_rejected": -250.2654266357422,
|
|
"logps/rejected": -273.05377197265625,
|
|
"loss": 5.0843,
|
|
"rewards/accuracies": 0.7359374761581421,
|
|
"rewards/chosen": -0.005938548129051924,
|
|
"rewards/margins": 0.13001371920108795,
|
|
"rewards/rejected": -0.13595226407051086,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.3769633507853403,
|
|
"epsilon_dpo/beta": 0.0057226200588047504,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.46952924132347107,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.05471862107515335,
|
|
"epsilon_dpo/beta_margin_mean": 0.12347264587879181,
|
|
"epsilon_dpo/beta_margin_std": 0.2224453240633011,
|
|
"epsilon_dpo/loss_margin_mean": 21.816726684570312,
|
|
"grad_norm": 24.414875030517578,
|
|
"kl/avg_steps": 0.45703125,
|
|
"kl/beta": 0.005748326890170574,
|
|
"kl/n_epsilon_steps": 0.26875001192092896,
|
|
"kl/p_epsilon_steps": 0.725781261920929,
|
|
"learning_rate": 3.935190552834828e-07,
|
|
"logits/chosen": 1.9551303386688232,
|
|
"logits/rejected": 2.1914541721343994,
|
|
"logps/chosen": -283.0456237792969,
|
|
"logps/ref_chosen": -279.8695068359375,
|
|
"logps/ref_rejected": -263.40533447265625,
|
|
"logps/rejected": -288.39813232421875,
|
|
"loss": 5.1163,
|
|
"rewards/accuracies": 0.723437488079071,
|
|
"rewards/chosen": -0.018750619143247604,
|
|
"rewards/margins": 0.1234726533293724,
|
|
"rewards/rejected": -0.1422232687473297,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.39790575916230364,
|
|
"epsilon_dpo/beta": 0.005460767075419426,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.462840735912323,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.05923638492822647,
|
|
"epsilon_dpo/beta_margin_mean": 0.15091852843761444,
|
|
"epsilon_dpo/beta_margin_std": 0.24113008379936218,
|
|
"epsilon_dpo/loss_margin_mean": 27.910152435302734,
|
|
"grad_norm": 19.144001007080078,
|
|
"kl/avg_steps": 0.47578126192092896,
|
|
"kl/beta": 0.005486341658979654,
|
|
"kl/n_epsilon_steps": 0.25703126192092896,
|
|
"kl/p_epsilon_steps": 0.7328125238418579,
|
|
"learning_rate": 3.781574579820464e-07,
|
|
"logits/chosen": 1.913297414779663,
|
|
"logits/rejected": 2.166954517364502,
|
|
"logps/chosen": -288.5598449707031,
|
|
"logps/ref_chosen": -278.2532958984375,
|
|
"logps/ref_rejected": -257.45025634765625,
|
|
"logps/rejected": -295.66693115234375,
|
|
"loss": 5.0227,
|
|
"rewards/accuracies": 0.741406261920929,
|
|
"rewards/chosen": -0.05687868595123291,
|
|
"rewards/margins": 0.15091851353645325,
|
|
"rewards/rejected": -0.20779721438884735,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.418848167539267,
|
|
"epsilon_dpo/beta": 0.005235456861555576,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.4651154577732086,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.06457895785570145,
|
|
"epsilon_dpo/beta_margin_mean": 0.14193181693553925,
|
|
"epsilon_dpo/beta_margin_std": 0.26321619749069214,
|
|
"epsilon_dpo/loss_margin_mean": 27.4693603515625,
|
|
"grad_norm": 20.511478424072266,
|
|
"kl/avg_steps": 0.38749998807907104,
|
|
"kl/beta": 0.005255300085991621,
|
|
"kl/n_epsilon_steps": 0.30390626192092896,
|
|
"kl/p_epsilon_steps": 0.69140625,
|
|
"learning_rate": 3.621088951385353e-07,
|
|
"logits/chosen": 1.876455307006836,
|
|
"logits/rejected": 2.166574001312256,
|
|
"logps/chosen": -285.0974426269531,
|
|
"logps/ref_chosen": -275.12750244140625,
|
|
"logps/ref_rejected": -260.0728759765625,
|
|
"logps/rejected": -297.5121154785156,
|
|
"loss": 5.0674,
|
|
"rewards/accuracies": 0.70703125,
|
|
"rewards/chosen": -0.053233105689287186,
|
|
"rewards/margins": 0.14193184673786163,
|
|
"rewards/rejected": -0.19516493380069733,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.418848167539267,
|
|
"eval_epsilon_dpo/beta": 0.00512322410941124,
|
|
"eval_epsilon_dpo/beta_margin_grad_mean": -0.464358389377594,
|
|
"eval_epsilon_dpo/beta_margin_grad_std": 0.06305021047592163,
|
|
"eval_epsilon_dpo/beta_margin_mean": 0.14517197012901306,
|
|
"eval_epsilon_dpo/beta_margin_std": 0.25747936964035034,
|
|
"eval_epsilon_dpo/loss_margin_mean": 28.677000045776367,
|
|
"eval_kl/n_epsilon_steps": 0.2930000126361847,
|
|
"eval_kl/p_epsilon_steps": 0.6990000009536743,
|
|
"eval_logits/chosen": 1.8063491582870483,
|
|
"eval_logits/rejected": 2.155062198638916,
|
|
"eval_logps/chosen": -291.77764892578125,
|
|
"eval_logps/ref_chosen": -280.4282531738281,
|
|
"eval_logps/ref_rejected": -264.7044677734375,
|
|
"eval_logps/rejected": -304.7308654785156,
|
|
"eval_loss": 0.6321755647659302,
|
|
"eval_rewards/accuracies": 0.7170000076293945,
|
|
"eval_rewards/chosen": -0.05901862308382988,
|
|
"eval_rewards/margins": 0.14517197012901306,
|
|
"eval_rewards/rejected": -0.20419058203697205,
|
|
"eval_runtime": 103.5445,
|
|
"eval_samples_per_second": 19.315,
|
|
"eval_steps_per_second": 1.207,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.4397905759162304,
|
|
"epsilon_dpo/beta": 0.005026308819651604,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.4626430571079254,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.06565666198730469,
|
|
"epsilon_dpo/beta_margin_mean": 0.15212179720401764,
|
|
"epsilon_dpo/beta_margin_std": 0.2678548991680145,
|
|
"epsilon_dpo/loss_margin_mean": 30.614501953125,
|
|
"grad_norm": 30.989282608032227,
|
|
"kl/avg_steps": 0.4203124940395355,
|
|
"kl/beta": 0.005047028884291649,
|
|
"kl/n_epsilon_steps": 0.28437501192092896,
|
|
"kl/p_epsilon_steps": 0.7046874761581421,
|
|
"learning_rate": 3.454593922550693e-07,
|
|
"logits/chosen": 1.8265072107315063,
|
|
"logits/rejected": 2.06158185005188,
|
|
"logps/chosen": -291.03253173828125,
|
|
"logps/ref_chosen": -279.7332763671875,
|
|
"logps/ref_rejected": -267.92437744140625,
|
|
"logps/rejected": -309.8381042480469,
|
|
"loss": 5.0314,
|
|
"rewards/accuracies": 0.7124999761581421,
|
|
"rewards/chosen": -0.057643067091703415,
|
|
"rewards/margins": 0.15212179720401764,
|
|
"rewards/rejected": -0.20976486802101135,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.4607329842931937,
|
|
"epsilon_dpo/beta": 0.004815506748855114,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.46018725633621216,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.06686625629663467,
|
|
"epsilon_dpo/beta_margin_mean": 0.16237930953502655,
|
|
"epsilon_dpo/beta_margin_std": 0.2736971378326416,
|
|
"epsilon_dpo/loss_margin_mean": 34.08965301513672,
|
|
"grad_norm": 27.191370010375977,
|
|
"kl/avg_steps": 0.4453125,
|
|
"kl/beta": 0.004836562555283308,
|
|
"kl/n_epsilon_steps": 0.27265626192092896,
|
|
"kl/p_epsilon_steps": 0.717968761920929,
|
|
"learning_rate": 3.2829819606729477e-07,
|
|
"logits/chosen": 1.8367538452148438,
|
|
"logits/rejected": 2.1368610858917236,
|
|
"logps/chosen": -304.51153564453125,
|
|
"logps/ref_chosen": -287.2923583984375,
|
|
"logps/ref_rejected": -270.8887023925781,
|
|
"logps/rejected": -322.1975402832031,
|
|
"loss": 4.9966,
|
|
"rewards/accuracies": 0.7265625,
|
|
"rewards/chosen": -0.08366179466247559,
|
|
"rewards/margins": 0.16237932443618774,
|
|
"rewards/rejected": -0.24604110419750214,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.4816753926701571,
|
|
"epsilon_dpo/beta": 0.004599227569997311,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.45680707693099976,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.06870144605636597,
|
|
"epsilon_dpo/beta_margin_mean": 0.1762588918209076,
|
|
"epsilon_dpo/beta_margin_std": 0.2809893488883972,
|
|
"epsilon_dpo/loss_margin_mean": 38.755615234375,
|
|
"grad_norm": 22.937519073486328,
|
|
"kl/avg_steps": 0.47734373807907104,
|
|
"kl/beta": 0.004620816558599472,
|
|
"kl/n_epsilon_steps": 0.2593750059604645,
|
|
"kl/p_epsilon_steps": 0.7367187738418579,
|
|
"learning_rate": 3.1071729615293424e-07,
|
|
"logits/chosen": 1.7133830785751343,
|
|
"logits/rejected": 2.039473533630371,
|
|
"logps/chosen": -293.60247802734375,
|
|
"logps/ref_chosen": -272.74945068359375,
|
|
"logps/ref_rejected": -258.1266784667969,
|
|
"logps/rejected": -317.7353515625,
|
|
"loss": 4.9502,
|
|
"rewards/accuracies": 0.739062488079071,
|
|
"rewards/chosen": -0.09684249013662338,
|
|
"rewards/margins": 0.1762588918209076,
|
|
"rewards/rejected": -0.2731013596057892,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.5026178010471204,
|
|
"epsilon_dpo/beta": 0.0043902210891246796,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.457236111164093,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.07059483975172043,
|
|
"epsilon_dpo/beta_margin_mean": 0.17473134398460388,
|
|
"epsilon_dpo/beta_margin_std": 0.2893211245536804,
|
|
"epsilon_dpo/loss_margin_mean": 40.25088882446289,
|
|
"grad_norm": 22.779020309448242,
|
|
"kl/avg_steps": 0.4468750059604645,
|
|
"kl/beta": 0.004409492947161198,
|
|
"kl/n_epsilon_steps": 0.2718749940395355,
|
|
"kl/p_epsilon_steps": 0.71875,
|
|
"learning_rate": 2.9281093183781403e-07,
|
|
"logits/chosen": 1.7209564447402954,
|
|
"logits/rejected": 2.0882318019866943,
|
|
"logps/chosen": -300.35296630859375,
|
|
"logps/ref_chosen": -280.094970703125,
|
|
"logps/ref_rejected": -263.1619873046875,
|
|
"logps/rejected": -323.6708679199219,
|
|
"loss": 4.9599,
|
|
"rewards/accuracies": 0.7289062738418579,
|
|
"rewards/chosen": -0.09005247056484222,
|
|
"rewards/margins": 0.17473134398460388,
|
|
"rewards/rejected": -0.2647838294506073,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.5235602094240838,
|
|
"epsilon_dpo/beta": 0.00419188616797328,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.4556571841239929,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.0703204870223999,
|
|
"epsilon_dpo/beta_margin_mean": 0.18116165697574615,
|
|
"epsilon_dpo/beta_margin_std": 0.2881784737110138,
|
|
"epsilon_dpo/loss_margin_mean": 43.624481201171875,
|
|
"grad_norm": 39.10613250732422,
|
|
"kl/avg_steps": 0.4593749940395355,
|
|
"kl/beta": 0.004210834391415119,
|
|
"kl/n_epsilon_steps": 0.26640623807907104,
|
|
"kl/p_epsilon_steps": 0.725781261920929,
|
|
"learning_rate": 2.7467508704251135e-07,
|
|
"logits/chosen": 1.741624116897583,
|
|
"logits/rejected": 1.9895031452178955,
|
|
"logps/chosen": -296.340576171875,
|
|
"logps/ref_chosen": -279.10601806640625,
|
|
"logps/ref_rejected": -255.9159698486328,
|
|
"logps/rejected": -316.7749938964844,
|
|
"loss": 4.9365,
|
|
"rewards/accuracies": 0.7359374761581421,
|
|
"rewards/chosen": -0.07299315184354782,
|
|
"rewards/margins": 0.18116167187690735,
|
|
"rewards/rejected": -0.25415483117103577,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.5445026178010471,
|
|
"epsilon_dpo/beta": 0.004008334130048752,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.4575107991695404,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.07278217375278473,
|
|
"epsilon_dpo/beta_margin_mean": 0.17386779189109802,
|
|
"epsilon_dpo/beta_margin_std": 0.2985754609107971,
|
|
"epsilon_dpo/loss_margin_mean": 43.82888412475586,
|
|
"grad_norm": 32.33043670654297,
|
|
"kl/avg_steps": 0.43828123807907104,
|
|
"kl/beta": 0.004025599919259548,
|
|
"kl/n_epsilon_steps": 0.2789062559604645,
|
|
"kl/p_epsilon_steps": 0.7171875238418579,
|
|
"learning_rate": 2.5640697577740815e-07,
|
|
"logits/chosen": 1.7184337377548218,
|
|
"logits/rejected": 1.9476096630096436,
|
|
"logps/chosen": -306.7433166503906,
|
|
"logps/ref_chosen": -279.7398986816406,
|
|
"logps/ref_rejected": -256.90155029296875,
|
|
"logps/rejected": -327.7337951660156,
|
|
"loss": 4.9692,
|
|
"rewards/accuracies": 0.72265625,
|
|
"rewards/chosen": -0.10899752378463745,
|
|
"rewards/margins": 0.17386779189109802,
|
|
"rewards/rejected": -0.28286534547805786,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.5654450261780105,
|
|
"epsilon_dpo/beta": 0.0038394411094486713,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.45551127195358276,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.07340405881404877,
|
|
"epsilon_dpo/beta_margin_mean": 0.18199250102043152,
|
|
"epsilon_dpo/beta_margin_std": 0.30104658007621765,
|
|
"epsilon_dpo/loss_margin_mean": 47.921356201171875,
|
|
"grad_norm": 26.059804916381836,
|
|
"kl/avg_steps": 0.44843751192092896,
|
|
"kl/beta": 0.00385635276325047,
|
|
"kl/n_epsilon_steps": 0.27031248807907104,
|
|
"kl/p_epsilon_steps": 0.71875,
|
|
"learning_rate": 2.381045210440644e-07,
|
|
"logits/chosen": 1.6957333087921143,
|
|
"logits/rejected": 1.981131911277771,
|
|
"logps/chosen": -306.7268981933594,
|
|
"logps/ref_chosen": -272.6238708496094,
|
|
"logps/ref_rejected": -256.24176025390625,
|
|
"logps/rejected": -338.26611328125,
|
|
"loss": 4.9401,
|
|
"rewards/accuracies": 0.7359374761581421,
|
|
"rewards/chosen": -0.13194236159324646,
|
|
"rewards/margins": 0.18199248611927032,
|
|
"rewards/rejected": -0.3139348328113556,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.5863874345549738,
|
|
"epsilon_dpo/beta": 0.0036588613875210285,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.4539538323879242,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.07207532227039337,
|
|
"epsilon_dpo/beta_margin_mean": 0.18829122185707092,
|
|
"epsilon_dpo/beta_margin_std": 0.2957257628440857,
|
|
"epsilon_dpo/loss_margin_mean": 51.929046630859375,
|
|
"grad_norm": 21.85626220703125,
|
|
"kl/avg_steps": 0.48906248807907104,
|
|
"kl/beta": 0.0036765006370842457,
|
|
"kl/n_epsilon_steps": 0.25078123807907104,
|
|
"kl/p_epsilon_steps": 0.7398437261581421,
|
|
"learning_rate": 2.1986582993616925e-07,
|
|
"logits/chosen": 1.5749285221099854,
|
|
"logits/rejected": 1.9680347442626953,
|
|
"logps/chosen": -298.32781982421875,
|
|
"logps/ref_chosen": -272.6661682128906,
|
|
"logps/ref_rejected": -259.3951721191406,
|
|
"logps/rejected": -336.98590087890625,
|
|
"loss": 4.9148,
|
|
"rewards/accuracies": 0.749218761920929,
|
|
"rewards/chosen": -0.09480254352092743,
|
|
"rewards/margins": 0.18829122185707092,
|
|
"rewards/rejected": -0.28309375047683716,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.6073298429319371,
|
|
"epsilon_dpo/beta": 0.00350450468249619,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.46083664894104004,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.07311917841434479,
|
|
"epsilon_dpo/beta_margin_mean": 0.1602335274219513,
|
|
"epsilon_dpo/beta_margin_std": 0.2994373142719269,
|
|
"epsilon_dpo/loss_margin_mean": 46.23841094970703,
|
|
"grad_norm": 34.233943939208984,
|
|
"kl/avg_steps": 0.3851562440395355,
|
|
"kl/beta": 0.003517721313983202,
|
|
"kl/n_epsilon_steps": 0.3031249940395355,
|
|
"kl/p_epsilon_steps": 0.688281238079071,
|
|
"learning_rate": 2.0178866775369774e-07,
|
|
"logits/chosen": 1.578467845916748,
|
|
"logits/rejected": 1.903235673904419,
|
|
"logps/chosen": -323.2730407714844,
|
|
"logps/ref_chosen": -287.4728698730469,
|
|
"logps/ref_rejected": -268.4922790527344,
|
|
"logps/rejected": -350.5308532714844,
|
|
"loss": 5.0191,
|
|
"rewards/accuracies": 0.7015625238418579,
|
|
"rewards/chosen": -0.1263677179813385,
|
|
"rewards/margins": 0.1602335274219513,
|
|
"rewards/rejected": -0.2866012454032898,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.6282722513089005,
|
|
"epsilon_dpo/beta": 0.003364184172824025,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.4569614827632904,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.07025741040706635,
|
|
"epsilon_dpo/beta_margin_mean": 0.17601335048675537,
|
|
"epsilon_dpo/beta_margin_std": 0.2878516614437103,
|
|
"epsilon_dpo/loss_margin_mean": 52.840850830078125,
|
|
"grad_norm": 19.78177833557129,
|
|
"kl/avg_steps": 0.4351562559604645,
|
|
"kl/beta": 0.003378564026206732,
|
|
"kl/n_epsilon_steps": 0.2789062559604645,
|
|
"kl/p_epsilon_steps": 0.714062511920929,
|
|
"learning_rate": 1.839699339491937e-07,
|
|
"logits/chosen": 1.6086456775665283,
|
|
"logits/rejected": 1.9709374904632568,
|
|
"logps/chosen": -301.5176696777344,
|
|
"logps/ref_chosen": -273.06646728515625,
|
|
"logps/ref_rejected": -266.1439208984375,
|
|
"logps/rejected": -347.4358825683594,
|
|
"loss": 4.9542,
|
|
"rewards/accuracies": 0.721875011920929,
|
|
"rewards/chosen": -0.09642257541418076,
|
|
"rewards/margins": 0.17601335048675537,
|
|
"rewards/rejected": -0.2724359333515167,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.6492146596858639,
|
|
"epsilon_dpo/beta": 0.0032132375054061413,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.45566052198410034,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.06932147592306137,
|
|
"epsilon_dpo/beta_margin_mean": 0.1810220181941986,
|
|
"epsilon_dpo/beta_margin_std": 0.28379470109939575,
|
|
"epsilon_dpo/loss_margin_mean": 56.88977813720703,
|
|
"grad_norm": 20.059579849243164,
|
|
"kl/avg_steps": 0.4867187440395355,
|
|
"kl/beta": 0.003228639718145132,
|
|
"kl/n_epsilon_steps": 0.25468748807907104,
|
|
"kl/p_epsilon_steps": 0.741406261920929,
|
|
"learning_rate": 1.6650514271527465e-07,
|
|
"logits/chosen": 1.593857765197754,
|
|
"logits/rejected": 1.952932596206665,
|
|
"logps/chosen": -313.94219970703125,
|
|
"logps/ref_chosen": -276.8886413574219,
|
|
"logps/ref_rejected": -256.80865478515625,
|
|
"logps/rejected": -350.75201416015625,
|
|
"loss": 4.9339,
|
|
"rewards/accuracies": 0.7367187738418579,
|
|
"rewards/chosen": -0.11971668899059296,
|
|
"rewards/margins": 0.1810220181941986,
|
|
"rewards/rejected": -0.30073872208595276,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.6701570680628273,
|
|
"epsilon_dpo/beta": 0.0030656014569103718,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.45544466376304626,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.06911682337522507,
|
|
"epsilon_dpo/beta_margin_mean": 0.18175189197063446,
|
|
"epsilon_dpo/beta_margin_std": 0.2825908660888672,
|
|
"epsilon_dpo/loss_margin_mean": 59.900352478027344,
|
|
"grad_norm": 24.982254028320312,
|
|
"kl/avg_steps": 0.47343748807907104,
|
|
"kl/beta": 0.003079873975366354,
|
|
"kl/n_epsilon_steps": 0.2593750059604645,
|
|
"kl/p_epsilon_steps": 0.7328125238418579,
|
|
"learning_rate": 1.4948791099758052e-07,
|
|
"logits/chosen": 1.6970676183700562,
|
|
"logits/rejected": 2.0628037452697754,
|
|
"logps/chosen": -321.9020080566406,
|
|
"logps/ref_chosen": -282.2432556152344,
|
|
"logps/ref_rejected": -256.89776611328125,
|
|
"logps/rejected": -356.45684814453125,
|
|
"loss": 4.9303,
|
|
"rewards/accuracies": 0.73828125,
|
|
"rewards/chosen": -0.12259833514690399,
|
|
"rewards/margins": 0.18175189197063446,
|
|
"rewards/rejected": -0.30435022711753845,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.6910994764397905,
|
|
"epsilon_dpo/beta": 0.002925318432971835,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.45976167917251587,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.06790686398744583,
|
|
"epsilon_dpo/beta_margin_mean": 0.16398653388023376,
|
|
"epsilon_dpo/beta_margin_std": 0.27741676568984985,
|
|
"epsilon_dpo/loss_margin_mean": 56.63254928588867,
|
|
"grad_norm": 35.780921936035156,
|
|
"kl/avg_steps": 0.46406251192092896,
|
|
"kl/beta": 0.0029386640526354313,
|
|
"kl/n_epsilon_steps": 0.2632812559604645,
|
|
"kl/p_epsilon_steps": 0.727343738079071,
|
|
"learning_rate": 1.3300945667758012e-07,
|
|
"logits/chosen": 1.6550931930541992,
|
|
"logits/rejected": 1.8850772380828857,
|
|
"logps/chosen": -316.6177062988281,
|
|
"logps/ref_chosen": -275.7609558105469,
|
|
"logps/ref_rejected": -263.5372619628906,
|
|
"logps/rejected": -361.02655029296875,
|
|
"loss": 4.9933,
|
|
"rewards/accuracies": 0.719531238079071,
|
|
"rewards/chosen": -0.12028974294662476,
|
|
"rewards/margins": 0.16398653388023376,
|
|
"rewards/rejected": -0.28427624702453613,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.7120418848167539,
|
|
"epsilon_dpo/beta": 0.0027930724900215864,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.4600375294685364,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.06828001886606216,
|
|
"epsilon_dpo/beta_margin_mean": 0.16278859972953796,
|
|
"epsilon_dpo/beta_margin_std": 0.2784718871116638,
|
|
"epsilon_dpo/loss_margin_mean": 58.940940856933594,
|
|
"grad_norm": 19.590518951416016,
|
|
"kl/avg_steps": 0.4609375,
|
|
"kl/beta": 0.0028057279996573925,
|
|
"kl/n_epsilon_steps": 0.26484376192092896,
|
|
"kl/p_epsilon_steps": 0.725781261920929,
|
|
"learning_rate": 1.1715810961514072e-07,
|
|
"logits/chosen": 1.6267999410629272,
|
|
"logits/rejected": 1.9399261474609375,
|
|
"logps/chosen": -319.0074157714844,
|
|
"logps/ref_chosen": -269.4908447265625,
|
|
"logps/ref_rejected": -253.1649627685547,
|
|
"logps/rejected": -361.62249755859375,
|
|
"loss": 4.9976,
|
|
"rewards/accuracies": 0.725781261920929,
|
|
"rewards/chosen": -0.13907715678215027,
|
|
"rewards/margins": 0.16278859972953796,
|
|
"rewards/rejected": -0.30186575651168823,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.7329842931937173,
|
|
"epsilon_dpo/beta": 0.0026765193324536085,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.4628540575504303,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.06378835439682007,
|
|
"epsilon_dpo/beta_margin_mean": 0.15105712413787842,
|
|
"epsilon_dpo/beta_margin_std": 0.25988245010375977,
|
|
"epsilon_dpo/loss_margin_mean": 57.061004638671875,
|
|
"grad_norm": 20.615802764892578,
|
|
"kl/avg_steps": 0.4242187440395355,
|
|
"kl/beta": 0.0026876390911638737,
|
|
"kl/n_epsilon_steps": 0.28515625,
|
|
"kl/p_epsilon_steps": 0.7093750238418579,
|
|
"learning_rate": 1.0201883817182949e-07,
|
|
"logits/chosen": 1.6629711389541626,
|
|
"logits/rejected": 2.020021915435791,
|
|
"logps/chosen": -344.3343811035156,
|
|
"logps/ref_chosen": -284.06365966796875,
|
|
"logps/ref_rejected": -260.7166442871094,
|
|
"logps/rejected": -378.0483703613281,
|
|
"loss": 5.0309,
|
|
"rewards/accuracies": 0.715624988079071,
|
|
"rewards/chosen": -0.16222040355205536,
|
|
"rewards/margins": 0.15105712413787842,
|
|
"rewards/rejected": -0.31327754259109497,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.7539267015706806,
|
|
"epsilon_dpo/beta": 0.002562676090747118,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.4644971787929535,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.06188509613275528,
|
|
"epsilon_dpo/beta_margin_mean": 0.14429207146167755,
|
|
"epsilon_dpo/beta_margin_std": 0.2519903779029846,
|
|
"epsilon_dpo/loss_margin_mean": 56.94682693481445,
|
|
"grad_norm": 28.58539581298828,
|
|
"kl/avg_steps": 0.4359374940395355,
|
|
"kl/beta": 0.0025736321695148945,
|
|
"kl/n_epsilon_steps": 0.27656251192092896,
|
|
"kl/p_epsilon_steps": 0.7124999761581421,
|
|
"learning_rate": 8.76727937529367e-08,
|
|
"logits/chosen": 1.558531403541565,
|
|
"logits/rejected": 1.9686288833618164,
|
|
"logps/chosen": -326.70318603515625,
|
|
"logps/ref_chosen": -269.2133483886719,
|
|
"logps/ref_rejected": -251.10647583007812,
|
|
"logps/rejected": -365.5430908203125,
|
|
"loss": 5.0524,
|
|
"rewards/accuracies": 0.7132812738418579,
|
|
"rewards/chosen": -0.14818084239959717,
|
|
"rewards/margins": 0.14429204165935516,
|
|
"rewards/rejected": -0.2924729287624359,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.774869109947644,
|
|
"epsilon_dpo/beta": 0.0024432847276329994,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.45972761511802673,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.06026551127433777,
|
|
"epsilon_dpo/beta_margin_mean": 0.16353142261505127,
|
|
"epsilon_dpo/beta_margin_std": 0.2452823668718338,
|
|
"epsilon_dpo/loss_margin_mean": 67.50531005859375,
|
|
"grad_norm": 18.816442489624023,
|
|
"kl/avg_steps": 0.500781238079071,
|
|
"kl/beta": 0.0024553355760872364,
|
|
"kl/n_epsilon_steps": 0.24609375,
|
|
"kl/p_epsilon_steps": 0.746874988079071,
|
|
"learning_rate": 7.419687580962222e-08,
|
|
"logits/chosen": 1.6747153997421265,
|
|
"logits/rejected": 1.9603767395019531,
|
|
"logps/chosen": -331.12542724609375,
|
|
"logps/ref_chosen": -276.8400573730469,
|
|
"logps/ref_rejected": -257.84912109375,
|
|
"logps/rejected": -379.6397705078125,
|
|
"loss": 4.9777,
|
|
"rewards/accuracies": 0.746874988079071,
|
|
"rewards/chosen": -0.13340650498867035,
|
|
"rewards/margins": 0.16353140771389008,
|
|
"rewards/rejected": -0.2969379425048828,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.7958115183246073,
|
|
"epsilon_dpo/beta": 0.0023312487173825502,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.46638360619544983,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.05908365920186043,
|
|
"epsilon_dpo/beta_margin_mean": 0.13641974329948425,
|
|
"epsilon_dpo/beta_margin_std": 0.23996075987815857,
|
|
"epsilon_dpo/loss_margin_mean": 59.121360778808594,
|
|
"grad_norm": 33.467586517333984,
|
|
"kl/avg_steps": 0.4359374940395355,
|
|
"kl/beta": 0.0023412262089550495,
|
|
"kl/n_epsilon_steps": 0.2789062559604645,
|
|
"kl/p_epsilon_steps": 0.71484375,
|
|
"learning_rate": 6.166331963291519e-08,
|
|
"logits/chosen": 1.7089202404022217,
|
|
"logits/rejected": 1.9208694696426392,
|
|
"logps/chosen": -356.5716857910156,
|
|
"logps/ref_chosen": -294.3582458496094,
|
|
"logps/ref_rejected": -266.00933837890625,
|
|
"logps/rejected": -387.34417724609375,
|
|
"loss": 5.0756,
|
|
"rewards/accuracies": 0.723437488079071,
|
|
"rewards/chosen": -0.14578744769096375,
|
|
"rewards/margins": 0.13641975820064545,
|
|
"rewards/rejected": -0.2822072207927704,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.8167539267015707,
|
|
"epsilon_dpo/beta": 0.0022311562206596136,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.4652669429779053,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.05686299130320549,
|
|
"epsilon_dpo/beta_margin_mean": 0.14073483645915985,
|
|
"epsilon_dpo/beta_margin_std": 0.23052707314491272,
|
|
"epsilon_dpo/loss_margin_mean": 63.751487731933594,
|
|
"grad_norm": 20.419815063476562,
|
|
"kl/avg_steps": 0.4375,
|
|
"kl/beta": 0.0022407451178878546,
|
|
"kl/n_epsilon_steps": 0.2750000059604645,
|
|
"kl/p_epsilon_steps": 0.7124999761581421,
|
|
"learning_rate": 5.013930914912476e-08,
|
|
"logits/chosen": 1.5366142988204956,
|
|
"logits/rejected": 1.9008190631866455,
|
|
"logps/chosen": -333.5438537597656,
|
|
"logps/ref_chosen": -271.92047119140625,
|
|
"logps/ref_rejected": -263.865478515625,
|
|
"logps/rejected": -389.2403259277344,
|
|
"loss": 5.0554,
|
|
"rewards/accuracies": 0.717968761920929,
|
|
"rewards/chosen": -0.13834409415721893,
|
|
"rewards/margins": 0.14073482155799866,
|
|
"rewards/rejected": -0.2790789306163788,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.837696335078534,
|
|
"epsilon_dpo/beta": 0.0021363936830312014,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.4690118730068207,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.05419831722974777,
|
|
"epsilon_dpo/beta_margin_mean": 0.12548907101154327,
|
|
"epsilon_dpo/beta_margin_std": 0.2197370082139969,
|
|
"epsilon_dpo/loss_margin_mean": 59.32947540283203,
|
|
"grad_norm": 16.475208282470703,
|
|
"kl/avg_steps": 0.42500001192092896,
|
|
"kl/beta": 0.0021453090012073517,
|
|
"kl/n_epsilon_steps": 0.2835937440395355,
|
|
"kl/p_epsilon_steps": 0.7085937261581421,
|
|
"learning_rate": 3.968661679220467e-08,
|
|
"logits/chosen": 1.5702852010726929,
|
|
"logits/rejected": 1.895922064781189,
|
|
"logps/chosen": -350.1571960449219,
|
|
"logps/ref_chosen": -284.8265075683594,
|
|
"logps/ref_rejected": -265.3280944824219,
|
|
"logps/rejected": -389.98828125,
|
|
"loss": 5.1073,
|
|
"rewards/accuracies": 0.7109375,
|
|
"rewards/chosen": -0.14023001492023468,
|
|
"rewards/margins": 0.12548907101154327,
|
|
"rewards/rejected": -0.26571911573410034,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.837696335078534,
|
|
"eval_epsilon_dpo/beta": 0.002089055487886071,
|
|
"eval_epsilon_dpo/beta_margin_grad_mean": -0.4698907434940338,
|
|
"eval_epsilon_dpo/beta_margin_grad_std": 0.05313246697187424,
|
|
"eval_epsilon_dpo/beta_margin_mean": 0.12187241017818451,
|
|
"eval_epsilon_dpo/beta_margin_std": 0.2152228057384491,
|
|
"eval_epsilon_dpo/loss_margin_mean": 59.03139877319336,
|
|
"eval_kl/n_epsilon_steps": 0.2854999899864197,
|
|
"eval_kl/p_epsilon_steps": 0.7085000276565552,
|
|
"eval_logits/chosen": 1.5736112594604492,
|
|
"eval_logits/rejected": 1.9568898677825928,
|
|
"eval_logps/chosen": -346.2501220703125,
|
|
"eval_logps/ref_chosen": -280.4282531738281,
|
|
"eval_logps/ref_rejected": -264.7044677734375,
|
|
"eval_logps/rejected": -389.5577392578125,
|
|
"eval_loss": 0.6402832269668579,
|
|
"eval_rewards/accuracies": 0.7164999842643738,
|
|
"eval_rewards/chosen": -0.13826368749141693,
|
|
"eval_rewards/margins": 0.12187241017818451,
|
|
"eval_rewards/rejected": -0.26013606786727905,
|
|
"eval_runtime": 103.0031,
|
|
"eval_samples_per_second": 19.417,
|
|
"eval_steps_per_second": 1.214,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.8586387434554974,
|
|
"epsilon_dpo/beta": 0.0020442053209990263,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.46692174673080444,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.05178702622652054,
|
|
"epsilon_dpo/beta_margin_mean": 0.13379183411598206,
|
|
"epsilon_dpo/beta_margin_std": 0.20962686836719513,
|
|
"epsilon_dpo/loss_margin_mean": 66.03794860839844,
|
|
"grad_norm": 41.441593170166016,
|
|
"kl/avg_steps": 0.45390623807907104,
|
|
"kl/beta": 0.0020533339120447636,
|
|
"kl/n_epsilon_steps": 0.26875001192092896,
|
|
"kl/p_epsilon_steps": 0.72265625,
|
|
"learning_rate": 3.036127238347164e-08,
|
|
"logits/chosen": 1.612749695777893,
|
|
"logits/rejected": 1.9225709438323975,
|
|
"logps/chosen": -344.31646728515625,
|
|
"logps/ref_chosen": -282.58233642578125,
|
|
"logps/ref_rejected": -266.00897216796875,
|
|
"logps/rejected": -393.7810363769531,
|
|
"loss": 5.0719,
|
|
"rewards/accuracies": 0.7398437261581421,
|
|
"rewards/chosen": -0.12682631611824036,
|
|
"rewards/margins": 0.13379183411598206,
|
|
"rewards/rejected": -0.2606181502342224,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.8795811518324608,
|
|
"epsilon_dpo/beta": 0.001955785322934389,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.4684430658817291,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.05116555094718933,
|
|
"epsilon_dpo/beta_margin_mean": 0.12757208943367004,
|
|
"epsilon_dpo/beta_margin_std": 0.207074373960495,
|
|
"epsilon_dpo/loss_margin_mean": 65.90140533447266,
|
|
"grad_norm": 19.453214645385742,
|
|
"kl/avg_steps": 0.46406251192092896,
|
|
"kl/beta": 0.001964703667908907,
|
|
"kl/n_epsilon_steps": 0.265625,
|
|
"kl/p_epsilon_steps": 0.729687511920929,
|
|
"learning_rate": 2.2213262793589482e-08,
|
|
"logits/chosen": 1.5862172842025757,
|
|
"logits/rejected": 1.9309051036834717,
|
|
"logps/chosen": -341.8611755371094,
|
|
"logps/ref_chosen": -281.11688232421875,
|
|
"logps/ref_rejected": -263.7762145996094,
|
|
"logps/rejected": -390.4219665527344,
|
|
"loss": 5.094,
|
|
"rewards/accuracies": 0.73046875,
|
|
"rewards/chosen": -0.11936762183904648,
|
|
"rewards/margins": 0.12757208943367004,
|
|
"rewards/rejected": -0.24693970382213593,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.900523560209424,
|
|
"epsilon_dpo/beta": 0.001865379512310028,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.46811485290527344,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.0480102077126503,
|
|
"epsilon_dpo/beta_margin_mean": 0.1287469118833542,
|
|
"epsilon_dpo/beta_margin_std": 0.19402021169662476,
|
|
"epsilon_dpo/loss_margin_mean": 69.65689849853516,
|
|
"grad_norm": 17.445083618164062,
|
|
"kl/avg_steps": 0.47343748807907104,
|
|
"kl/beta": 0.0018740678206086159,
|
|
"kl/n_epsilon_steps": 0.25859373807907104,
|
|
"kl/p_epsilon_steps": 0.7320312261581421,
|
|
"learning_rate": 1.5286263996730026e-08,
|
|
"logits/chosen": 1.5173814296722412,
|
|
"logits/rejected": 1.9054569005966187,
|
|
"logps/chosen": -337.60888671875,
|
|
"logps/ref_chosen": -282.20098876953125,
|
|
"logps/ref_rejected": -257.6202392578125,
|
|
"logps/rejected": -382.68505859375,
|
|
"loss": 5.0847,
|
|
"rewards/accuracies": 0.741406261920929,
|
|
"rewards/chosen": -0.10385727882385254,
|
|
"rewards/margins": 0.1287469118833542,
|
|
"rewards/rejected": -0.23260419070720673,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.9214659685863874,
|
|
"epsilon_dpo/beta": 0.0017827233532443643,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.4748317301273346,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.0455574207007885,
|
|
"epsilon_dpo/beta_margin_mean": 0.1015293225646019,
|
|
"epsilon_dpo/beta_margin_std": 0.18387706577777863,
|
|
"epsilon_dpo/loss_margin_mean": 57.55500030517578,
|
|
"grad_norm": 15.522335052490234,
|
|
"kl/avg_steps": 0.42109376192092896,
|
|
"kl/beta": 0.0017900926759466529,
|
|
"kl/n_epsilon_steps": 0.2835937440395355,
|
|
"kl/p_epsilon_steps": 0.7046874761581421,
|
|
"learning_rate": 9.617406953185136e-09,
|
|
"logits/chosen": 1.6178176403045654,
|
|
"logits/rejected": 1.9510142803192139,
|
|
"logps/chosen": -333.5023498535156,
|
|
"logps/ref_chosen": -272.00103759765625,
|
|
"logps/ref_rejected": -258.02813720703125,
|
|
"logps/rejected": -377.08441162109375,
|
|
"loss": 5.1835,
|
|
"rewards/accuracies": 0.703906238079071,
|
|
"rewards/chosen": -0.11019601672887802,
|
|
"rewards/margins": 0.1015293151140213,
|
|
"rewards/rejected": -0.2117253541946411,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.9424083769633508,
|
|
"epsilon_dpo/beta": 0.001706903101876378,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.4734385013580322,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.044034797698259354,
|
|
"epsilon_dpo/beta_margin_mean": 0.10707694292068481,
|
|
"epsilon_dpo/beta_margin_std": 0.1776462197303772,
|
|
"epsilon_dpo/loss_margin_mean": 63.39220428466797,
|
|
"grad_norm": 16.360170364379883,
|
|
"kl/avg_steps": 0.4546875059604645,
|
|
"kl/beta": 0.0017145348247140646,
|
|
"kl/n_epsilon_steps": 0.2671875059604645,
|
|
"kl/p_epsilon_steps": 0.721875011920929,
|
|
"learning_rate": 5.2370785753763356e-09,
|
|
"logits/chosen": 1.5754592418670654,
|
|
"logits/rejected": 1.9332977533340454,
|
|
"logps/chosen": -337.49688720703125,
|
|
"logps/ref_chosen": -278.8232421875,
|
|
"logps/ref_rejected": -256.79656982421875,
|
|
"logps/rejected": -378.8623962402344,
|
|
"loss": 5.16,
|
|
"rewards/accuracies": 0.72265625,
|
|
"rewards/chosen": -0.10077029466629028,
|
|
"rewards/margins": 0.10707694292068481,
|
|
"rewards/rejected": -0.2078472375869751,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.9633507853403142,
|
|
"epsilon_dpo/beta": 0.0016306890174746513,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.47516068816185,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.04221952706575394,
|
|
"epsilon_dpo/beta_margin_mean": 0.10008412599563599,
|
|
"epsilon_dpo/beta_margin_std": 0.17021533846855164,
|
|
"epsilon_dpo/loss_margin_mean": 61.97832107543945,
|
|
"grad_norm": 14.846392631530762,
|
|
"kl/avg_steps": 0.4546875059604645,
|
|
"kl/beta": 0.0016379815060645342,
|
|
"kl/n_epsilon_steps": 0.26953125,
|
|
"kl/p_epsilon_steps": 0.7242187261581421,
|
|
"learning_rate": 2.168758844148272e-09,
|
|
"logits/chosen": 1.6337049007415771,
|
|
"logits/rejected": 1.9634275436401367,
|
|
"logps/chosen": -353.42510986328125,
|
|
"logps/ref_chosen": -294.84185791015625,
|
|
"logps/ref_rejected": -276.9571533203125,
|
|
"logps/rejected": -397.5187072753906,
|
|
"loss": 5.184,
|
|
"rewards/accuracies": 0.7281249761581421,
|
|
"rewards/chosen": -0.09599287807941437,
|
|
"rewards/margins": 0.10008411109447479,
|
|
"rewards/rejected": -0.19607700407505035,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.9842931937172775,
|
|
"epsilon_dpo/beta": 0.0015589601825922728,
|
|
"epsilon_dpo/beta_margin_grad_mean": -0.47446101903915405,
|
|
"epsilon_dpo/beta_margin_grad_std": 0.04050491005182266,
|
|
"epsilon_dpo/beta_margin_mean": 0.10283418744802475,
|
|
"epsilon_dpo/beta_margin_std": 0.16317032277584076,
|
|
"epsilon_dpo/loss_margin_mean": 66.61624145507812,
|
|
"grad_norm": 14.901313781738281,
|
|
"kl/avg_steps": 0.46562498807907104,
|
|
"kl/beta": 0.0015660974895581603,
|
|
"kl/n_epsilon_steps": 0.2632812559604645,
|
|
"kl/p_epsilon_steps": 0.7289062738418579,
|
|
"learning_rate": 4.288949484559934e-10,
|
|
"logits/chosen": 1.5405309200286865,
|
|
"logits/rejected": 1.751405119895935,
|
|
"logps/chosen": -339.19415283203125,
|
|
"logps/ref_chosen": -285.2023620605469,
|
|
"logps/ref_rejected": -255.1339569091797,
|
|
"logps/rejected": -375.7419738769531,
|
|
"loss": 5.1712,
|
|
"rewards/accuracies": 0.733593761920929,
|
|
"rewards/chosen": -0.08475174009799957,
|
|
"rewards/margins": 0.10283420234918594,
|
|
"rewards/rejected": -0.18758592009544373,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.9989528795811519,
|
|
"step": 477,
|
|
"total_flos": 0.0,
|
|
"train_loss": 5.1642030939865915,
|
|
"train_runtime": 8287.5392,
|
|
"train_samples_per_second": 7.377,
|
|
"train_steps_per_second": 0.058
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 477,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 200,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 4,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|