Files
qwen3-8b-base-beta-dpo-hh-h…/trainer_state.json
ModelHub XC d8351d8ef7 初始化项目,由ModelHub XC社区提供模型
Model: jackf857/qwen3-8b-base-beta-dpo-hh-harmless-4xh200-batch-64
Source: Original Platform
2026-05-12 21:11:43 +08:00

13390 lines
540 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.999244142101285,
"eval_steps": 100,
"global_step": 661,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"beta_dpo/beta": 0.09873995184898376,
"beta_dpo/beta_margin_grad_mean": -0.5021346807479858,
"beta_dpo/beta_margin_grad_std": 0.008621793240308762,
"beta_dpo/beta_margin_mean": -0.00854283757507801,
"beta_dpo/beta_margin_std": 0.034500423818826675,
"beta_dpo/beta_used": 0.09873995184898376,
"beta_dpo/beta_used_raw": 0.09873995184898376,
"beta_dpo/gap_mean": -0.009267467074096203,
"beta_dpo/gap_std": 0.05077784135937691,
"beta_dpo/loss_margin_mean": -0.08983081579208374,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.0015117157974300832,
"grad_norm": 21.607027053833008,
"learning_rate": 0.0,
"logits/chosen": 1.4594056606292725,
"logits/rejected": 1.4684147834777832,
"loss": 1.3891,
"step": 1
},
{
"beta_dpo/beta": 0.09919409453868866,
"beta_dpo/beta_margin_grad_mean": -0.5008031725883484,
"beta_dpo/beta_margin_grad_std": 0.009846841916441917,
"beta_dpo/beta_margin_mean": -0.0032096824143081903,
"beta_dpo/beta_margin_std": 0.03941287845373154,
"beta_dpo/beta_used": 0.09919409453868866,
"beta_dpo/beta_used_raw": 0.09919409453868866,
"beta_dpo/gap_mean": -0.0228734128177166,
"beta_dpo/gap_std": 0.110123410820961,
"beta_dpo/loss_margin_mean": -0.0322224497795105,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.0030234315948601664,
"grad_norm": 20.140541076660156,
"learning_rate": 7.462686567164179e-09,
"logits/chosen": 1.4544942378997803,
"logits/rejected": 1.4357258081436157,
"loss": 1.39,
"step": 2
},
{
"beta_dpo/beta": 0.10366719961166382,
"beta_dpo/beta_margin_grad_mean": -0.5002375245094299,
"beta_dpo/beta_margin_grad_std": 0.011844536289572716,
"beta_dpo/beta_margin_mean": -0.0009551587863825262,
"beta_dpo/beta_margin_std": 0.04740604758262634,
"beta_dpo/beta_used": 0.10366719961166382,
"beta_dpo/beta_used_raw": 0.10366719961166382,
"beta_dpo/gap_mean": -0.02084210142493248,
"beta_dpo/gap_std": 0.17627675831317902,
"beta_dpo/loss_margin_mean": -0.01047566533088684,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.0045351473922902496,
"grad_norm": 24.431331634521484,
"learning_rate": 1.4925373134328357e-08,
"logits/chosen": 1.4479323625564575,
"logits/rejected": 1.4154329299926758,
"loss": 1.3822,
"step": 3
},
{
"beta_dpo/beta": 0.09961278736591339,
"beta_dpo/beta_margin_grad_mean": -0.4990696609020233,
"beta_dpo/beta_margin_grad_std": 0.010926141403615475,
"beta_dpo/beta_margin_mean": 0.003728417446836829,
"beta_dpo/beta_margin_std": 0.04374876245856285,
"beta_dpo/beta_used": 0.09961278736591339,
"beta_dpo/beta_used_raw": 0.09961278736591339,
"beta_dpo/gap_mean": -0.009441482834517956,
"beta_dpo/gap_std": 0.22680673003196716,
"beta_dpo/loss_margin_mean": 0.03748854994773865,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.006046863189720333,
"grad_norm": 24.549968719482422,
"learning_rate": 2.2388059701492534e-08,
"logits/chosen": 1.4826452732086182,
"logits/rejected": 1.4577124118804932,
"loss": 1.388,
"step": 4
},
{
"beta_dpo/beta": 0.0999765694141388,
"beta_dpo/beta_margin_grad_mean": -0.5009123086929321,
"beta_dpo/beta_margin_grad_std": 0.00964893214404583,
"beta_dpo/beta_margin_mean": -0.003652930725365877,
"beta_dpo/beta_margin_std": 0.03861139714717865,
"beta_dpo/beta_used": 0.0999765694141388,
"beta_dpo/beta_used_raw": 0.0999765694141388,
"beta_dpo/gap_mean": -0.012933394871652126,
"beta_dpo/gap_std": 0.26322224736213684,
"beta_dpo/loss_margin_mean": -0.03674338757991791,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.007558578987150416,
"grad_norm": 24.02710723876953,
"learning_rate": 2.9850746268656714e-08,
"logits/chosen": 1.430014967918396,
"logits/rejected": 1.422844648361206,
"loss": 1.3878,
"step": 5
},
{
"beta_dpo/beta": 0.09759774804115295,
"beta_dpo/beta_margin_grad_mean": -0.49920761585235596,
"beta_dpo/beta_margin_grad_std": 0.009975293651223183,
"beta_dpo/beta_margin_mean": 0.0031740572303533554,
"beta_dpo/beta_margin_std": 0.03991897776722908,
"beta_dpo/beta_used": 0.09759774804115295,
"beta_dpo/beta_used_raw": 0.09759774804115295,
"beta_dpo/gap_mean": -0.008989489637315273,
"beta_dpo/gap_std": 0.28406012058258057,
"beta_dpo/loss_margin_mean": 0.032966673374176025,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.009070294784580499,
"grad_norm": 22.517269134521484,
"learning_rate": 3.731343283582089e-08,
"logits/chosen": 1.7289844751358032,
"logits/rejected": 1.681814193725586,
"loss": 1.3912,
"step": 6
},
{
"beta_dpo/beta": 0.09864137321710587,
"beta_dpo/beta_margin_grad_mean": -0.4999319911003113,
"beta_dpo/beta_margin_grad_std": 0.010195241309702396,
"beta_dpo/beta_margin_mean": 0.0002745148667600006,
"beta_dpo/beta_margin_std": 0.040805213153362274,
"beta_dpo/beta_used": 0.09864137321710587,
"beta_dpo/beta_used_raw": 0.09864137321710587,
"beta_dpo/gap_mean": 0.0004895327147096395,
"beta_dpo/gap_std": 0.3131392300128937,
"beta_dpo/loss_margin_mean": 0.0023331642150878906,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.010582010582010581,
"grad_norm": 22.483606338500977,
"learning_rate": 4.477611940298507e-08,
"logits/chosen": 1.2273149490356445,
"logits/rejected": 1.2026118040084839,
"loss": 1.3886,
"step": 7
},
{
"beta_dpo/beta": 0.09902673959732056,
"beta_dpo/beta_margin_grad_mean": -0.501477062702179,
"beta_dpo/beta_margin_grad_std": 0.010539776645600796,
"beta_dpo/beta_margin_mean": -0.005913248751312494,
"beta_dpo/beta_margin_std": 0.04218650981783867,
"beta_dpo/beta_used": 0.09902673959732056,
"beta_dpo/beta_used_raw": 0.09902673959732056,
"beta_dpo/gap_mean": -0.013057660311460495,
"beta_dpo/gap_std": 0.33594292402267456,
"beta_dpo/loss_margin_mean": -0.05997839570045471,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.012093726379440665,
"grad_norm": 23.100969314575195,
"learning_rate": 5.223880597014925e-08,
"logits/chosen": 1.413482427597046,
"logits/rejected": 1.429722547531128,
"loss": 1.3894,
"step": 8
},
{
"beta_dpo/beta": 0.10009001195430756,
"beta_dpo/beta_margin_grad_mean": -0.5018208622932434,
"beta_dpo/beta_margin_grad_std": 0.010021732188761234,
"beta_dpo/beta_margin_mean": -0.007285799831151962,
"beta_dpo/beta_margin_std": 0.04011598229408264,
"beta_dpo/beta_used": 0.10009001195430756,
"beta_dpo/beta_used_raw": 0.10009001195430756,
"beta_dpo/gap_mean": -0.021802250295877457,
"beta_dpo/gap_std": 0.3482493460178375,
"beta_dpo/loss_margin_mean": -0.07284319400787354,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.013605442176870748,
"grad_norm": 23.189462661743164,
"learning_rate": 5.970149253731343e-08,
"logits/chosen": 1.6902210712432861,
"logits/rejected": 1.6170001029968262,
"loss": 1.3883,
"step": 9
},
{
"beta_dpo/beta": 0.10585639625787735,
"beta_dpo/beta_margin_grad_mean": -0.49980345368385315,
"beta_dpo/beta_margin_grad_std": 0.01263737864792347,
"beta_dpo/beta_margin_mean": 0.0007830193499103189,
"beta_dpo/beta_margin_std": 0.05059384927153587,
"beta_dpo/beta_used": 0.10585639625787735,
"beta_dpo/beta_used_raw": 0.10585639625787735,
"beta_dpo/gap_mean": -0.02278057485818863,
"beta_dpo/gap_std": 0.36068111658096313,
"beta_dpo/loss_margin_mean": 0.005588918924331665,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.015117157974300832,
"grad_norm": 25.323102951049805,
"learning_rate": 6.71641791044776e-08,
"logits/chosen": 1.6256885528564453,
"logits/rejected": 1.5875918865203857,
"loss": 1.3784,
"step": 10
},
{
"beta_dpo/beta": 0.10278887301683426,
"beta_dpo/beta_margin_grad_mean": -0.4984298646450043,
"beta_dpo/beta_margin_grad_std": 0.010885908268392086,
"beta_dpo/beta_margin_mean": 0.006281617563217878,
"beta_dpo/beta_margin_std": 0.04356975108385086,
"beta_dpo/beta_used": 0.10278887301683426,
"beta_dpo/beta_used_raw": 0.10278887301683426,
"beta_dpo/gap_mean": -0.004942757543176413,
"beta_dpo/gap_std": 0.3796635866165161,
"beta_dpo/loss_margin_mean": 0.061300128698349,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.016628873771730914,
"grad_norm": 24.30197525024414,
"learning_rate": 7.462686567164178e-08,
"logits/chosen": 1.2691853046417236,
"logits/rejected": 1.2496408224105835,
"loss": 1.3823,
"step": 11
},
{
"beta_dpo/beta": 0.09832623600959778,
"beta_dpo/beta_margin_grad_mean": -0.4999140799045563,
"beta_dpo/beta_margin_grad_std": 0.009057173505425453,
"beta_dpo/beta_margin_mean": 0.00034457247238606215,
"beta_dpo/beta_margin_std": 0.03624521940946579,
"beta_dpo/beta_used": 0.09832623600959778,
"beta_dpo/beta_used_raw": 0.09832623600959778,
"beta_dpo/gap_mean": -0.004351671785116196,
"beta_dpo/gap_std": 0.3818763792514801,
"beta_dpo/loss_margin_mean": 0.002451568841934204,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.018140589569160998,
"grad_norm": 22.108863830566406,
"learning_rate": 8.208955223880596e-08,
"logits/chosen": 1.3517913818359375,
"logits/rejected": 1.3473531007766724,
"loss": 1.3895,
"step": 12
},
{
"beta_dpo/beta": 0.09917062520980835,
"beta_dpo/beta_margin_grad_mean": -0.5009226202964783,
"beta_dpo/beta_margin_grad_std": 0.010060025379061699,
"beta_dpo/beta_margin_mean": -0.003692339640110731,
"beta_dpo/beta_margin_std": 0.04025454819202423,
"beta_dpo/beta_used": 0.09917062520980835,
"beta_dpo/beta_used_raw": 0.09917062520980835,
"beta_dpo/gap_mean": -0.00805948581546545,
"beta_dpo/gap_std": 0.3838382959365845,
"beta_dpo/loss_margin_mean": -0.03742155432701111,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.019652305366591082,
"grad_norm": 22.049461364746094,
"learning_rate": 8.955223880597014e-08,
"logits/chosen": 1.3082160949707031,
"logits/rejected": 1.2636491060256958,
"loss": 1.3887,
"step": 13
},
{
"beta_dpo/beta": 0.10183389484882355,
"beta_dpo/beta_margin_grad_mean": -0.4999306797981262,
"beta_dpo/beta_margin_grad_std": 0.00923539325594902,
"beta_dpo/beta_margin_mean": 0.00027715094620361924,
"beta_dpo/beta_margin_std": 0.036956243216991425,
"beta_dpo/beta_used": 0.10183389484882355,
"beta_dpo/beta_used_raw": 0.10183389484882355,
"beta_dpo/gap_mean": -0.006332115735858679,
"beta_dpo/gap_std": 0.3803662955760956,
"beta_dpo/loss_margin_mean": 0.0024544596672058105,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.021164021164021163,
"grad_norm": 25.343242645263672,
"learning_rate": 9.701492537313432e-08,
"logits/chosen": 1.3965303897857666,
"logits/rejected": 1.3902784585952759,
"loss": 1.384,
"step": 14
},
{
"beta_dpo/beta": 0.0993453860282898,
"beta_dpo/beta_margin_grad_mean": -0.5009843707084656,
"beta_dpo/beta_margin_grad_std": 0.012391936965286732,
"beta_dpo/beta_margin_mean": -0.003941703587770462,
"beta_dpo/beta_margin_std": 0.04959738627076149,
"beta_dpo/beta_used": 0.0993453860282898,
"beta_dpo/beta_used_raw": 0.0993453860282898,
"beta_dpo/gap_mean": -0.01158633641898632,
"beta_dpo/gap_std": 0.3963480591773987,
"beta_dpo/loss_margin_mean": -0.03971347212791443,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.022675736961451247,
"grad_norm": 23.105030059814453,
"learning_rate": 1.044776119402985e-07,
"logits/chosen": 1.5179550647735596,
"logits/rejected": 1.477945327758789,
"loss": 1.3889,
"step": 15
},
{
"beta_dpo/beta": 0.10452497750520706,
"beta_dpo/beta_margin_grad_mean": -0.4994063079357147,
"beta_dpo/beta_margin_grad_std": 0.009892878122627735,
"beta_dpo/beta_margin_mean": 0.002374407835304737,
"beta_dpo/beta_margin_std": 0.03958987817168236,
"beta_dpo/beta_used": 0.10452497750520706,
"beta_dpo/beta_used_raw": 0.10452497750520706,
"beta_dpo/gap_mean": -0.004868443123996258,
"beta_dpo/gap_std": 0.3979250192642212,
"beta_dpo/loss_margin_mean": 0.02236151695251465,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.02418745275888133,
"grad_norm": 22.657752990722656,
"learning_rate": 1.1194029850746268e-07,
"logits/chosen": 1.6581084728240967,
"logits/rejected": 1.6047765016555786,
"loss": 1.3791,
"step": 16
},
{
"beta_dpo/beta": 0.09847256541252136,
"beta_dpo/beta_margin_grad_mean": -0.5005159974098206,
"beta_dpo/beta_margin_grad_std": 0.009893263690173626,
"beta_dpo/beta_margin_mean": -0.0020661705639213324,
"beta_dpo/beta_margin_std": 0.03958994895219803,
"beta_dpo/beta_used": 0.09847256541252136,
"beta_dpo/beta_used_raw": 0.09847256541252136,
"beta_dpo/gap_mean": -0.008847428485751152,
"beta_dpo/gap_std": 0.3994016647338867,
"beta_dpo/loss_margin_mean": -0.02098524570465088,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.025699168556311415,
"grad_norm": 22.28753662109375,
"learning_rate": 1.1940298507462686e-07,
"logits/chosen": 1.3955719470977783,
"logits/rejected": 1.393104076385498,
"loss": 1.3899,
"step": 17
},
{
"beta_dpo/beta": 0.10181480646133423,
"beta_dpo/beta_margin_grad_mean": -0.5011930465698242,
"beta_dpo/beta_margin_grad_std": 0.009915145114064217,
"beta_dpo/beta_margin_mean": -0.004774391185492277,
"beta_dpo/beta_margin_std": 0.03967824578285217,
"beta_dpo/beta_used": 0.10181480646133423,
"beta_dpo/beta_used_raw": 0.10181480646133423,
"beta_dpo/gap_mean": -0.014493357390165329,
"beta_dpo/gap_std": 0.39799293875694275,
"beta_dpo/loss_margin_mean": -0.04706642031669617,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.027210884353741496,
"grad_norm": 21.89427375793457,
"learning_rate": 1.2686567164179106e-07,
"logits/chosen": 1.4782413244247437,
"logits/rejected": 1.4196343421936035,
"loss": 1.3849,
"step": 18
},
{
"beta_dpo/beta": 0.10298259556293488,
"beta_dpo/beta_margin_grad_mean": -0.5003270506858826,
"beta_dpo/beta_margin_grad_std": 0.009691756218671799,
"beta_dpo/beta_margin_mean": -0.0013136152410879731,
"beta_dpo/beta_margin_std": 0.03878864273428917,
"beta_dpo/beta_used": 0.10298259556293488,
"beta_dpo/beta_used_raw": 0.10298259556293488,
"beta_dpo/gap_mean": -0.014709733426570892,
"beta_dpo/gap_std": 0.39229732751846313,
"beta_dpo/loss_margin_mean": -0.012863218784332275,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.02872260015117158,
"grad_norm": 23.083358764648438,
"learning_rate": 1.343283582089552e-07,
"logits/chosen": 1.6602938175201416,
"logits/rejected": 1.655239462852478,
"loss": 1.3829,
"step": 19
},
{
"beta_dpo/beta": 0.10153305530548096,
"beta_dpo/beta_margin_grad_mean": -0.49922773241996765,
"beta_dpo/beta_margin_grad_std": 0.009931285865604877,
"beta_dpo/beta_margin_mean": 0.0030918291304260492,
"beta_dpo/beta_margin_std": 0.03974789381027222,
"beta_dpo/beta_used": 0.10153305530548096,
"beta_dpo/beta_used_raw": 0.10153305530548096,
"beta_dpo/gap_mean": -0.012392524629831314,
"beta_dpo/gap_std": 0.39484626054763794,
"beta_dpo/loss_margin_mean": 0.03128620982170105,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.030234315948601664,
"grad_norm": 24.105083465576172,
"learning_rate": 1.4179104477611938e-07,
"logits/chosen": 1.6466686725616455,
"logits/rejected": 1.6133846044540405,
"loss": 1.3852,
"step": 20
},
{
"beta_dpo/beta": 0.10090602189302444,
"beta_dpo/beta_margin_grad_mean": -0.5005959272384644,
"beta_dpo/beta_margin_grad_std": 0.009615312330424786,
"beta_dpo/beta_margin_mean": -0.002384437946602702,
"beta_dpo/beta_margin_std": 0.03847426176071167,
"beta_dpo/beta_used": 0.10090602189302444,
"beta_dpo/beta_used_raw": 0.10090602189302444,
"beta_dpo/gap_mean": -0.007610926404595375,
"beta_dpo/gap_std": 0.3931964635848999,
"beta_dpo/loss_margin_mean": -0.02530011534690857,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.031746031746031744,
"grad_norm": 22.50416374206543,
"learning_rate": 1.4925373134328355e-07,
"logits/chosen": 1.3186970949172974,
"logits/rejected": 1.2789273262023926,
"loss": 1.3855,
"step": 21
},
{
"beta_dpo/beta": 0.10063984990119934,
"beta_dpo/beta_margin_grad_mean": -0.4994290769100189,
"beta_dpo/beta_margin_grad_std": 0.010816080495715141,
"beta_dpo/beta_margin_mean": 0.0022858360316604376,
"beta_dpo/beta_margin_std": 0.04329133406281471,
"beta_dpo/beta_used": 0.10063984990119934,
"beta_dpo/beta_used_raw": 0.10063984990119934,
"beta_dpo/gap_mean": -0.005383708514273167,
"beta_dpo/gap_std": 0.3974972069263458,
"beta_dpo/loss_margin_mean": 0.022645294666290283,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.03325774754346183,
"grad_norm": 22.074689865112305,
"learning_rate": 1.5671641791044775e-07,
"logits/chosen": 1.4456074237823486,
"logits/rejected": 1.4467374086380005,
"loss": 1.3859,
"step": 22
},
{
"beta_dpo/beta": 0.10390889644622803,
"beta_dpo/beta_margin_grad_mean": -0.49791544675827026,
"beta_dpo/beta_margin_grad_std": 0.012759105302393436,
"beta_dpo/beta_margin_mean": 0.008349758572876453,
"beta_dpo/beta_margin_std": 0.05107416585087776,
"beta_dpo/beta_used": 0.10390889644622803,
"beta_dpo/beta_used_raw": 0.10390889644622803,
"beta_dpo/gap_mean": 0.001504638697952032,
"beta_dpo/gap_std": 0.40817511081695557,
"beta_dpo/loss_margin_mean": 0.07432505488395691,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.03476946334089191,
"grad_norm": 25.383527755737305,
"learning_rate": 1.6417910447761193e-07,
"logits/chosen": 1.665621042251587,
"logits/rejected": 1.6228258609771729,
"loss": 1.3794,
"step": 23
},
{
"beta_dpo/beta": 0.09863981604576111,
"beta_dpo/beta_margin_grad_mean": -0.4988223910331726,
"beta_dpo/beta_margin_grad_std": 0.010719557292759418,
"beta_dpo/beta_margin_mean": 0.004713835194706917,
"beta_dpo/beta_margin_std": 0.04289696365594864,
"beta_dpo/beta_used": 0.09863981604576111,
"beta_dpo/beta_used_raw": 0.09863981604576111,
"beta_dpo/gap_mean": 0.018340593203902245,
"beta_dpo/gap_std": 0.41763240098953247,
"beta_dpo/loss_margin_mean": 0.04628649353981018,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.036281179138321996,
"grad_norm": 22.189563751220703,
"learning_rate": 1.716417910447761e-07,
"logits/chosen": 1.7637710571289062,
"logits/rejected": 1.7281594276428223,
"loss": 1.3867,
"step": 24
},
{
"beta_dpo/beta": 0.100833460688591,
"beta_dpo/beta_margin_grad_mean": -0.49988317489624023,
"beta_dpo/beta_margin_grad_std": 0.012334803119301796,
"beta_dpo/beta_margin_mean": 0.0004626520967576653,
"beta_dpo/beta_margin_std": 0.04938330128788948,
"beta_dpo/beta_used": 0.100833460688591,
"beta_dpo/beta_used_raw": 0.100833460688591,
"beta_dpo/gap_mean": 0.018122181296348572,
"beta_dpo/gap_std": 0.43123096227645874,
"beta_dpo/loss_margin_mean": 0.003261953592300415,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.03779289493575208,
"grad_norm": 22.797252655029297,
"learning_rate": 1.7910447761194027e-07,
"logits/chosen": 1.3922135829925537,
"logits/rejected": 1.3515794277191162,
"loss": 1.3832,
"step": 25
},
{
"beta_dpo/beta": 0.09866522252559662,
"beta_dpo/beta_margin_grad_mean": -0.5007703900337219,
"beta_dpo/beta_margin_grad_std": 0.010712272487580776,
"beta_dpo/beta_margin_mean": -0.0030822933185845613,
"beta_dpo/beta_margin_std": 0.04288780689239502,
"beta_dpo/beta_used": 0.09866522252559662,
"beta_dpo/beta_used_raw": 0.09866522252559662,
"beta_dpo/gap_mean": 0.007343418896198273,
"beta_dpo/gap_std": 0.4358934164047241,
"beta_dpo/loss_margin_mean": -0.03148818016052246,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.039304610733182165,
"grad_norm": 22.201448440551758,
"learning_rate": 1.8656716417910447e-07,
"logits/chosen": 1.261330485343933,
"logits/rejected": 1.2732932567596436,
"loss": 1.3879,
"step": 26
},
{
"beta_dpo/beta": 0.09795667231082916,
"beta_dpo/beta_margin_grad_mean": -0.501259982585907,
"beta_dpo/beta_margin_grad_std": 0.01161261834204197,
"beta_dpo/beta_margin_mean": -0.005036745686084032,
"beta_dpo/beta_margin_std": 0.04648776724934578,
"beta_dpo/beta_used": 0.09795667231082916,
"beta_dpo/beta_used_raw": 0.09795667231082916,
"beta_dpo/gap_mean": -0.004225727170705795,
"beta_dpo/gap_std": 0.4409000873565674,
"beta_dpo/loss_margin_mean": -0.05272534489631653,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.04081632653061224,
"grad_norm": 23.33132553100586,
"learning_rate": 1.9402985074626865e-07,
"logits/chosen": 1.5242902040481567,
"logits/rejected": 1.505962610244751,
"loss": 1.3902,
"step": 27
},
{
"beta_dpo/beta": 0.09937025606632233,
"beta_dpo/beta_margin_grad_mean": -0.4999173581600189,
"beta_dpo/beta_margin_grad_std": 0.009457286447286606,
"beta_dpo/beta_margin_mean": 0.0003325306752230972,
"beta_dpo/beta_margin_std": 0.03784368187189102,
"beta_dpo/beta_used": 0.09937025606632233,
"beta_dpo/beta_used_raw": 0.09937025606632233,
"beta_dpo/gap_mean": -0.005930869374424219,
"beta_dpo/gap_std": 0.43624186515808105,
"beta_dpo/loss_margin_mean": 0.0017603635787963867,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.042328042328042326,
"grad_norm": 22.75115394592285,
"learning_rate": 2.0149253731343282e-07,
"logits/chosen": 1.402669906616211,
"logits/rejected": 1.4011223316192627,
"loss": 1.388,
"step": 28
},
{
"beta_dpo/beta": 0.09951446950435638,
"beta_dpo/beta_margin_grad_mean": -0.5003775954246521,
"beta_dpo/beta_margin_grad_std": 0.01097456831485033,
"beta_dpo/beta_margin_mean": -0.0015215803869068623,
"beta_dpo/beta_margin_std": 0.043978314846754074,
"beta_dpo/beta_used": 0.09951446950435638,
"beta_dpo/beta_used_raw": 0.09951446950435638,
"beta_dpo/gap_mean": -0.0060158781707286835,
"beta_dpo/gap_std": 0.4374222457408905,
"beta_dpo/loss_margin_mean": -0.015163600444793701,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.04383975812547241,
"grad_norm": 26.328489303588867,
"learning_rate": 2.08955223880597e-07,
"logits/chosen": 1.81168794631958,
"logits/rejected": 1.7704055309295654,
"loss": 1.3879,
"step": 29
},
{
"beta_dpo/beta": 0.0982619971036911,
"beta_dpo/beta_margin_grad_mean": -0.5009621977806091,
"beta_dpo/beta_margin_grad_std": 0.010776137933135033,
"beta_dpo/beta_margin_mean": -0.00385239627212286,
"beta_dpo/beta_margin_std": 0.04312770068645477,
"beta_dpo/beta_used": 0.0982619971036911,
"beta_dpo/beta_used_raw": 0.0982619971036911,
"beta_dpo/gap_mean": -0.008773903362452984,
"beta_dpo/gap_std": 0.4346309304237366,
"beta_dpo/loss_margin_mean": -0.03936275839805603,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.045351473922902494,
"grad_norm": 23.069562911987305,
"learning_rate": 2.1641791044776117e-07,
"logits/chosen": 1.6879940032958984,
"logits/rejected": 1.6044800281524658,
"loss": 1.3903,
"step": 30
},
{
"beta_dpo/beta": 0.10117494314908981,
"beta_dpo/beta_margin_grad_mean": -0.499036580324173,
"beta_dpo/beta_margin_grad_std": 0.01161203347146511,
"beta_dpo/beta_margin_mean": 0.0038549723103642464,
"beta_dpo/beta_margin_std": 0.046471331268548965,
"beta_dpo/beta_used": 0.10117494314908981,
"beta_dpo/beta_used_raw": 0.10117494314908981,
"beta_dpo/gap_mean": -0.004491984844207764,
"beta_dpo/gap_std": 0.4389868676662445,
"beta_dpo/loss_margin_mean": 0.03808090090751648,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.04686318972033258,
"grad_norm": 28.92486572265625,
"learning_rate": 2.2388059701492537e-07,
"logits/chosen": 1.694710612297058,
"logits/rejected": 1.6386480331420898,
"loss": 1.3851,
"step": 31
},
{
"beta_dpo/beta": 0.09735976159572601,
"beta_dpo/beta_margin_grad_mean": -0.5017613172531128,
"beta_dpo/beta_margin_grad_std": 0.01160483993589878,
"beta_dpo/beta_margin_mean": -0.007061361335217953,
"beta_dpo/beta_margin_std": 0.04650650918483734,
"beta_dpo/beta_used": 0.09735976159572601,
"beta_dpo/beta_used_raw": 0.09735976159572601,
"beta_dpo/gap_mean": -0.011080358177423477,
"beta_dpo/gap_std": 0.43803778290748596,
"beta_dpo/loss_margin_mean": -0.07157236337661743,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.04837490551776266,
"grad_norm": 22.146883010864258,
"learning_rate": 2.3134328358208954e-07,
"logits/chosen": 1.4716018438339233,
"logits/rejected": 1.5336263179779053,
"loss": 1.3918,
"step": 32
},
{
"beta_dpo/beta": 0.10330641269683838,
"beta_dpo/beta_margin_grad_mean": -0.4976757764816284,
"beta_dpo/beta_margin_grad_std": 0.011192507110536098,
"beta_dpo/beta_margin_mean": 0.009298978373408318,
"beta_dpo/beta_margin_std": 0.04479321837425232,
"beta_dpo/beta_used": 0.10330641269683838,
"beta_dpo/beta_used_raw": 0.10330641269683838,
"beta_dpo/gap_mean": 0.0022241901606321335,
"beta_dpo/gap_std": 0.4424448013305664,
"beta_dpo/loss_margin_mean": 0.08533850312232971,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.049886621315192746,
"grad_norm": 24.878881454467773,
"learning_rate": 2.388059701492537e-07,
"logits/chosen": 1.4082189798355103,
"logits/rejected": 1.3968687057495117,
"loss": 1.38,
"step": 33
},
{
"beta_dpo/beta": 0.0996209979057312,
"beta_dpo/beta_margin_grad_mean": -0.5004932284355164,
"beta_dpo/beta_margin_grad_std": 0.010038006119430065,
"beta_dpo/beta_margin_mean": -0.0019767105113714933,
"beta_dpo/beta_margin_std": 0.040175147354602814,
"beta_dpo/beta_used": 0.0996209979057312,
"beta_dpo/beta_used_raw": 0.0996209979057312,
"beta_dpo/gap_mean": 0.0003106839722022414,
"beta_dpo/gap_std": 0.4378555417060852,
"beta_dpo/loss_margin_mean": -0.019730672240257263,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.05139833711262283,
"grad_norm": 21.769670486450195,
"learning_rate": 2.4626865671641786e-07,
"logits/chosen": 1.5708086490631104,
"logits/rejected": 1.5470855236053467,
"loss": 1.3871,
"step": 34
},
{
"beta_dpo/beta": 0.10070754587650299,
"beta_dpo/beta_margin_grad_mean": -0.5005945563316345,
"beta_dpo/beta_margin_grad_std": 0.011351360939443111,
"beta_dpo/beta_margin_mean": -0.0023801266215741634,
"beta_dpo/beta_margin_std": 0.04543125256896019,
"beta_dpo/beta_used": 0.10070754587650299,
"beta_dpo/beta_used_raw": 0.10070754587650299,
"beta_dpo/gap_mean": -0.007040712982416153,
"beta_dpo/gap_std": 0.4391350746154785,
"beta_dpo/loss_margin_mean": -0.024149954319000244,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.05291005291005291,
"grad_norm": 21.194477081298828,
"learning_rate": 2.537313432835821e-07,
"logits/chosen": 1.4663602113723755,
"logits/rejected": 1.4484498500823975,
"loss": 1.386,
"step": 35
},
{
"beta_dpo/beta": 0.0950796902179718,
"beta_dpo/beta_margin_grad_mean": -0.5017680525779724,
"beta_dpo/beta_margin_grad_std": 0.009710317477583885,
"beta_dpo/beta_margin_mean": -0.00707436166703701,
"beta_dpo/beta_margin_std": 0.03885461017489433,
"beta_dpo/beta_used": 0.0950796902179718,
"beta_dpo/beta_used_raw": 0.0950796902179718,
"beta_dpo/gap_mean": -0.01551821082830429,
"beta_dpo/gap_std": 0.43908798694610596,
"beta_dpo/loss_margin_mean": -0.07440310716629028,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.05442176870748299,
"grad_norm": 23.735424041748047,
"learning_rate": 2.611940298507462e-07,
"logits/chosen": 1.4293709993362427,
"logits/rejected": 1.4123473167419434,
"loss": 1.3958,
"step": 36
},
{
"beta_dpo/beta": 0.1016513779759407,
"beta_dpo/beta_margin_grad_mean": -0.4968355596065521,
"beta_dpo/beta_margin_grad_std": 0.013145999051630497,
"beta_dpo/beta_margin_mean": 0.012668957002460957,
"beta_dpo/beta_margin_std": 0.05262959748506546,
"beta_dpo/beta_used": 0.1016513779759407,
"beta_dpo/beta_used_raw": 0.1016513779759407,
"beta_dpo/gap_mean": 0.0035870305728167295,
"beta_dpo/gap_std": 0.4464585483074188,
"beta_dpo/loss_margin_mean": 0.12334150075912476,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.055933484504913075,
"grad_norm": 27.381790161132812,
"learning_rate": 2.686567164179104e-07,
"logits/chosen": 1.5765271186828613,
"logits/rejected": 1.4575368165969849,
"loss": 1.3833,
"step": 37
},
{
"beta_dpo/beta": 0.09609992802143097,
"beta_dpo/beta_margin_grad_mean": -0.5025932192802429,
"beta_dpo/beta_margin_grad_std": 0.009760402143001556,
"beta_dpo/beta_margin_mean": -0.010377924889326096,
"beta_dpo/beta_margin_std": 0.03905599191784859,
"beta_dpo/beta_used": 0.09609992802143097,
"beta_dpo/beta_used_raw": 0.09609992802143097,
"beta_dpo/gap_mean": -0.0076413326896727085,
"beta_dpo/gap_std": 0.4474954605102539,
"beta_dpo/loss_margin_mean": -0.1089838445186615,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.05744520030234316,
"grad_norm": 22.253149032592773,
"learning_rate": 2.761194029850746e-07,
"logits/chosen": 1.660280704498291,
"logits/rejected": 1.65809166431427,
"loss": 1.3934,
"step": 38
},
{
"beta_dpo/beta": 0.09974405914545059,
"beta_dpo/beta_margin_grad_mean": -0.5005505681037903,
"beta_dpo/beta_margin_grad_std": 0.010305196046829224,
"beta_dpo/beta_margin_mean": -0.0022025478538125753,
"beta_dpo/beta_margin_std": 0.04123708978295326,
"beta_dpo/beta_used": 0.09974405914545059,
"beta_dpo/beta_used_raw": 0.09974405914545059,
"beta_dpo/gap_mean": -0.013334916904568672,
"beta_dpo/gap_std": 0.44173187017440796,
"beta_dpo/loss_margin_mean": -0.023197531700134277,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.05895691609977324,
"grad_norm": 24.880264282226562,
"learning_rate": 2.8358208955223876e-07,
"logits/chosen": 1.6895723342895508,
"logits/rejected": 1.665818691253662,
"loss": 1.3883,
"step": 39
},
{
"beta_dpo/beta": 0.09883079677820206,
"beta_dpo/beta_margin_grad_mean": -0.49824991822242737,
"beta_dpo/beta_margin_grad_std": 0.009885421022772789,
"beta_dpo/beta_margin_mean": 0.007007123902440071,
"beta_dpo/beta_margin_std": 0.03956810384988785,
"beta_dpo/beta_used": 0.09883079677820206,
"beta_dpo/beta_used_raw": 0.09883079677820206,
"beta_dpo/gap_mean": -0.0014165642205625772,
"beta_dpo/gap_std": 0.43331772089004517,
"beta_dpo/loss_margin_mean": 0.06831315159797668,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.06046863189720333,
"grad_norm": 23.338451385498047,
"learning_rate": 2.9104477611940296e-07,
"logits/chosen": 1.5526103973388672,
"logits/rejected": 1.4787428379058838,
"loss": 1.3884,
"step": 40
},
{
"beta_dpo/beta": 0.10304108262062073,
"beta_dpo/beta_margin_grad_mean": -0.49625322222709656,
"beta_dpo/beta_margin_grad_std": 0.011656548827886581,
"beta_dpo/beta_margin_mean": 0.014997422695159912,
"beta_dpo/beta_margin_std": 0.046651456505060196,
"beta_dpo/beta_used": 0.10304108262062073,
"beta_dpo/beta_used_raw": 0.10304108262062073,
"beta_dpo/gap_mean": 0.01947699673473835,
"beta_dpo/gap_std": 0.4391004145145416,
"beta_dpo/loss_margin_mean": 0.14550095796585083,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.06198034769463341,
"grad_norm": 24.844144821166992,
"learning_rate": 2.985074626865671e-07,
"logits/chosen": 1.5253885984420776,
"logits/rejected": 1.486491084098816,
"loss": 1.3794,
"step": 41
},
{
"beta_dpo/beta": 0.09910166263580322,
"beta_dpo/beta_margin_grad_mean": -0.500030517578125,
"beta_dpo/beta_margin_grad_std": 0.011837853118777275,
"beta_dpo/beta_margin_mean": -0.000116753377369605,
"beta_dpo/beta_margin_std": 0.04738757386803627,
"beta_dpo/beta_used": 0.09910166263580322,
"beta_dpo/beta_used_raw": 0.09910166263580322,
"beta_dpo/gap_mean": 0.02474522590637207,
"beta_dpo/gap_std": 0.44327157735824585,
"beta_dpo/loss_margin_mean": -0.0029853880405426025,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.06349206349206349,
"grad_norm": 23.967090606689453,
"learning_rate": 3.059701492537313e-07,
"logits/chosen": 1.3896113634109497,
"logits/rejected": 1.375231146812439,
"loss": 1.3854,
"step": 42
},
{
"beta_dpo/beta": 0.10354489833116531,
"beta_dpo/beta_margin_grad_mean": -0.49683722853660583,
"beta_dpo/beta_margin_grad_std": 0.01039121299982071,
"beta_dpo/beta_margin_mean": 0.012656980194151402,
"beta_dpo/beta_margin_std": 0.04158541187644005,
"beta_dpo/beta_used": 0.10354489833116531,
"beta_dpo/beta_used_raw": 0.10354489833116531,
"beta_dpo/gap_mean": 0.02907174453139305,
"beta_dpo/gap_std": 0.4337531328201294,
"beta_dpo/loss_margin_mean": 0.11490699648857117,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.06500377928949358,
"grad_norm": 24.746234893798828,
"learning_rate": 3.134328358208955e-07,
"logits/chosen": 1.403411626815796,
"logits/rejected": 1.3667316436767578,
"loss": 1.3767,
"step": 43
},
{
"beta_dpo/beta": 0.10267098248004913,
"beta_dpo/beta_margin_grad_mean": -0.4979555904865265,
"beta_dpo/beta_margin_grad_std": 0.009614716283977032,
"beta_dpo/beta_margin_mean": 0.008179724216461182,
"beta_dpo/beta_margin_std": 0.038474779576063156,
"beta_dpo/beta_used": 0.10267098248004913,
"beta_dpo/beta_used_raw": 0.10267098248004913,
"beta_dpo/gap_mean": 0.04934769868850708,
"beta_dpo/gap_std": 0.4201850891113281,
"beta_dpo/loss_margin_mean": 0.07566675543785095,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.06651549508692366,
"grad_norm": 24.72636604309082,
"learning_rate": 3.2089552238805965e-07,
"logits/chosen": 1.1089251041412354,
"logits/rejected": 1.0921913385391235,
"loss": 1.3766,
"step": 44
},
{
"beta_dpo/beta": 0.10441717505455017,
"beta_dpo/beta_margin_grad_mean": -0.4959341287612915,
"beta_dpo/beta_margin_grad_std": 0.011123725213110447,
"beta_dpo/beta_margin_mean": 0.016273343935608864,
"beta_dpo/beta_margin_std": 0.04451771080493927,
"beta_dpo/beta_used": 0.10441717505455017,
"beta_dpo/beta_used_raw": 0.10441717505455017,
"beta_dpo/gap_mean": 0.058637045323848724,
"beta_dpo/gap_std": 0.42201119661331177,
"beta_dpo/loss_margin_mean": 0.15418142080307007,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.06802721088435375,
"grad_norm": 25.480466842651367,
"learning_rate": 3.2835820895522385e-07,
"logits/chosen": 1.4839123487472534,
"logits/rejected": 1.468077540397644,
"loss": 1.3727,
"step": 45
},
{
"beta_dpo/beta": 0.10162113606929779,
"beta_dpo/beta_margin_grad_mean": -0.49845924973487854,
"beta_dpo/beta_margin_grad_std": 0.01084035076200962,
"beta_dpo/beta_margin_mean": 0.006165068130940199,
"beta_dpo/beta_margin_std": 0.04338241368532181,
"beta_dpo/beta_used": 0.10162113606929779,
"beta_dpo/beta_used_raw": 0.10162113606929779,
"beta_dpo/gap_mean": 0.0665898472070694,
"beta_dpo/gap_std": 0.4231005311012268,
"beta_dpo/loss_margin_mean": 0.06101316213607788,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.06953892668178382,
"grad_norm": 26.33711814880371,
"learning_rate": 3.3582089552238805e-07,
"logits/chosen": 1.376713514328003,
"logits/rejected": 1.3219760656356812,
"loss": 1.377,
"step": 46
},
{
"beta_dpo/beta": 0.10109131783246994,
"beta_dpo/beta_margin_grad_mean": -0.49842020869255066,
"beta_dpo/beta_margin_grad_std": 0.012417293153703213,
"beta_dpo/beta_margin_mean": 0.006329426076263189,
"beta_dpo/beta_margin_std": 0.04971562325954437,
"beta_dpo/beta_used": 0.10109131783246994,
"beta_dpo/beta_used_raw": 0.10109131783246994,
"beta_dpo/gap_mean": 0.061482757329940796,
"beta_dpo/gap_std": 0.43405160307884216,
"beta_dpo/loss_margin_mean": 0.06319385766983032,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.0710506424792139,
"grad_norm": 22.111841201782227,
"learning_rate": 3.432835820895522e-07,
"logits/chosen": 1.7184326648712158,
"logits/rejected": 1.69810152053833,
"loss": 1.3785,
"step": 47
},
{
"beta_dpo/beta": 0.099519282579422,
"beta_dpo/beta_margin_grad_mean": -0.49835386872291565,
"beta_dpo/beta_margin_grad_std": 0.010140984319150448,
"beta_dpo/beta_margin_mean": 0.006588014308363199,
"beta_dpo/beta_margin_std": 0.04058591276407242,
"beta_dpo/beta_used": 0.099519282579422,
"beta_dpo/beta_used_raw": 0.099519282579422,
"beta_dpo/gap_mean": 0.0627756342291832,
"beta_dpo/gap_std": 0.4295368492603302,
"beta_dpo/loss_margin_mean": 0.06416615843772888,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.07256235827664399,
"grad_norm": 22.274993896484375,
"learning_rate": 3.507462686567164e-07,
"logits/chosen": 1.595404028892517,
"logits/rejected": 1.588958740234375,
"loss": 1.3809,
"step": 48
},
{
"beta_dpo/beta": 0.10322128981351852,
"beta_dpo/beta_margin_grad_mean": -0.4979618787765503,
"beta_dpo/beta_margin_grad_std": 0.010646562092006207,
"beta_dpo/beta_margin_mean": 0.008156200870871544,
"beta_dpo/beta_margin_std": 0.04260906204581261,
"beta_dpo/beta_used": 0.10322128981351852,
"beta_dpo/beta_used_raw": 0.10322128981351852,
"beta_dpo/gap_mean": 0.06913349777460098,
"beta_dpo/gap_std": 0.4278194308280945,
"beta_dpo/loss_margin_mean": 0.07910655438899994,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.07407407407407407,
"grad_norm": 21.86302947998047,
"learning_rate": 3.5820895522388055e-07,
"logits/chosen": 1.4724366664886475,
"logits/rejected": 1.4565043449401855,
"loss": 1.374,
"step": 49
},
{
"beta_dpo/beta": 0.10015951097011566,
"beta_dpo/beta_margin_grad_mean": -0.49800705909729004,
"beta_dpo/beta_margin_grad_std": 0.010118241421878338,
"beta_dpo/beta_margin_mean": 0.007978866808116436,
"beta_dpo/beta_margin_std": 0.04050043225288391,
"beta_dpo/beta_used": 0.10015951097011566,
"beta_dpo/beta_used_raw": 0.10015951097011566,
"beta_dpo/gap_mean": 0.07339806854724884,
"beta_dpo/gap_std": 0.42607414722442627,
"beta_dpo/loss_margin_mean": 0.07699769735336304,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.07558578987150416,
"grad_norm": 22.100317001342773,
"learning_rate": 3.6567164179104475e-07,
"logits/chosen": 1.6594160795211792,
"logits/rejected": 1.640493392944336,
"loss": 1.3787,
"step": 50
},
{
"beta_dpo/beta": 0.09920643270015717,
"beta_dpo/beta_margin_grad_mean": -0.4983086585998535,
"beta_dpo/beta_margin_grad_std": 0.011117528192698956,
"beta_dpo/beta_margin_mean": 0.006771172862499952,
"beta_dpo/beta_margin_std": 0.04449303448200226,
"beta_dpo/beta_used": 0.09920643270015717,
"beta_dpo/beta_used_raw": 0.09920643270015717,
"beta_dpo/gap_mean": 0.06899771094322205,
"beta_dpo/gap_std": 0.4215458631515503,
"beta_dpo/loss_margin_mean": 0.06831052899360657,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.07709750566893424,
"grad_norm": 23.966360092163086,
"learning_rate": 3.7313432835820895e-07,
"logits/chosen": 1.289847731590271,
"logits/rejected": 1.2824013233184814,
"loss": 1.381,
"step": 51
},
{
"beta_dpo/beta": 0.09721747040748596,
"beta_dpo/beta_margin_grad_mean": -0.4997069239616394,
"beta_dpo/beta_margin_grad_std": 0.012052874080836773,
"beta_dpo/beta_margin_mean": 0.0011759058106690645,
"beta_dpo/beta_margin_std": 0.048246391117572784,
"beta_dpo/beta_used": 0.09721747040748596,
"beta_dpo/beta_used_raw": 0.09721747040748596,
"beta_dpo/gap_mean": 0.05793575569987297,
"beta_dpo/gap_std": 0.4385203719139099,
"beta_dpo/loss_margin_mean": 0.009815797209739685,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.07860922146636433,
"grad_norm": 25.5147705078125,
"learning_rate": 3.805970149253731e-07,
"logits/chosen": 1.5200705528259277,
"logits/rejected": 1.4433038234710693,
"loss": 1.3852,
"step": 52
},
{
"beta_dpo/beta": 0.09669992327690125,
"beta_dpo/beta_margin_grad_mean": -0.500171422958374,
"beta_dpo/beta_margin_grad_std": 0.00788611639291048,
"beta_dpo/beta_margin_mean": -0.0006850466597825289,
"beta_dpo/beta_margin_std": 0.031553711742162704,
"beta_dpo/beta_used": 0.09669992327690125,
"beta_dpo/beta_used_raw": 0.09669992327690125,
"beta_dpo/gap_mean": 0.05040828511118889,
"beta_dpo/gap_std": 0.4249057173728943,
"beta_dpo/loss_margin_mean": -0.007657676935195923,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.0801209372637944,
"grad_norm": 21.52613067626953,
"learning_rate": 3.880597014925373e-07,
"logits/chosen": 1.575990915298462,
"logits/rejected": 1.567566156387329,
"loss": 1.3869,
"step": 53
},
{
"beta_dpo/beta": 0.10164332389831543,
"beta_dpo/beta_margin_grad_mean": -0.4975181519985199,
"beta_dpo/beta_margin_grad_std": 0.009497878141701221,
"beta_dpo/beta_margin_mean": 0.009931082837283611,
"beta_dpo/beta_margin_std": 0.0380062460899353,
"beta_dpo/beta_used": 0.10164332389831543,
"beta_dpo/beta_used_raw": 0.10164332389831543,
"beta_dpo/gap_mean": 0.053338490426540375,
"beta_dpo/gap_std": 0.41552823781967163,
"beta_dpo/loss_margin_mean": 0.09771022200584412,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.08163265306122448,
"grad_norm": 21.51600456237793,
"learning_rate": 3.9552238805970144e-07,
"logits/chosen": 1.3746873140335083,
"logits/rejected": 1.3910648822784424,
"loss": 1.3784,
"step": 54
},
{
"beta_dpo/beta": 0.10384014993906021,
"beta_dpo/beta_margin_grad_mean": -0.49787670373916626,
"beta_dpo/beta_margin_grad_std": 0.01219708938151598,
"beta_dpo/beta_margin_mean": 0.008496826514601707,
"beta_dpo/beta_margin_std": 0.048810362815856934,
"beta_dpo/beta_used": 0.10384014993906021,
"beta_dpo/beta_used_raw": 0.10384014993906021,
"beta_dpo/gap_mean": 0.06035232171416283,
"beta_dpo/gap_std": 0.4211800992488861,
"beta_dpo/loss_margin_mean": 0.0819447934627533,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.08314436885865457,
"grad_norm": 24.281505584716797,
"learning_rate": 4.0298507462686564e-07,
"logits/chosen": 1.410477638244629,
"logits/rejected": 1.3666912317276,
"loss": 1.3739,
"step": 55
},
{
"beta_dpo/beta": 0.10036227107048035,
"beta_dpo/beta_margin_grad_mean": -0.49994948506355286,
"beta_dpo/beta_margin_grad_std": 0.010925770737230778,
"beta_dpo/beta_margin_mean": 0.00019966231775470078,
"beta_dpo/beta_margin_std": 0.04372824355959892,
"beta_dpo/beta_used": 0.10036227107048035,
"beta_dpo/beta_used_raw": 0.10036227107048035,
"beta_dpo/gap_mean": 0.0527767613530159,
"beta_dpo/gap_std": 0.42930376529693604,
"beta_dpo/loss_margin_mean": 0.00171700119972229,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.08465608465608465,
"grad_norm": 21.052114486694336,
"learning_rate": 4.1044776119402984e-07,
"logits/chosen": 1.5779602527618408,
"logits/rejected": 1.5794899463653564,
"loss": 1.3806,
"step": 56
},
{
"beta_dpo/beta": 0.10191956907510757,
"beta_dpo/beta_margin_grad_mean": -0.4974198043346405,
"beta_dpo/beta_margin_grad_std": 0.012272909283638,
"beta_dpo/beta_margin_mean": 0.010333304293453693,
"beta_dpo/beta_margin_std": 0.04913497716188431,
"beta_dpo/beta_used": 0.10191956907510757,
"beta_dpo/beta_used_raw": 0.10191956907510757,
"beta_dpo/gap_mean": 0.059144824743270874,
"beta_dpo/gap_std": 0.4319482445716858,
"beta_dpo/loss_margin_mean": 0.09830912947654724,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.08616780045351474,
"grad_norm": 26.80173683166504,
"learning_rate": 4.17910447761194e-07,
"logits/chosen": 1.4049038887023926,
"logits/rejected": 1.3124523162841797,
"loss": 1.377,
"step": 57
},
{
"beta_dpo/beta": 0.09840139746665955,
"beta_dpo/beta_margin_grad_mean": -0.49932199716567993,
"beta_dpo/beta_margin_grad_std": 0.011351993307471275,
"beta_dpo/beta_margin_mean": 0.002716128248721361,
"beta_dpo/beta_margin_std": 0.04545406624674797,
"beta_dpo/beta_used": 0.09840139746665955,
"beta_dpo/beta_used_raw": 0.09840139746665955,
"beta_dpo/gap_mean": 0.059776224195957184,
"beta_dpo/gap_std": 0.4370172321796417,
"beta_dpo/loss_margin_mean": 0.0205976665019989,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.08767951625094482,
"grad_norm": 23.33869743347168,
"learning_rate": 4.253731343283582e-07,
"logits/chosen": 1.4786138534545898,
"logits/rejected": 1.4423062801361084,
"loss": 1.3829,
"step": 58
},
{
"beta_dpo/beta": 0.10059243440628052,
"beta_dpo/beta_margin_grad_mean": -0.498147577047348,
"beta_dpo/beta_margin_grad_std": 0.01203033234924078,
"beta_dpo/beta_margin_mean": 0.007413546554744244,
"beta_dpo/beta_margin_std": 0.048155270516872406,
"beta_dpo/beta_used": 0.10059243440628052,
"beta_dpo/beta_used_raw": 0.10059243440628052,
"beta_dpo/gap_mean": 0.05429444462060928,
"beta_dpo/gap_std": 0.4411713182926178,
"beta_dpo/loss_margin_mean": 0.07427063584327698,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.08919123204837491,
"grad_norm": 20.473285675048828,
"learning_rate": 4.3283582089552234e-07,
"logits/chosen": 1.3001195192337036,
"logits/rejected": 1.1944963932037354,
"loss": 1.3801,
"step": 59
},
{
"beta_dpo/beta": 0.103457510471344,
"beta_dpo/beta_margin_grad_mean": -0.49937915802001953,
"beta_dpo/beta_margin_grad_std": 0.013366466388106346,
"beta_dpo/beta_margin_mean": 0.002477182075381279,
"beta_dpo/beta_margin_std": 0.05351593717932701,
"beta_dpo/beta_used": 0.103457510471344,
"beta_dpo/beta_used_raw": 0.103457510471344,
"beta_dpo/gap_mean": 0.046487562358379364,
"beta_dpo/gap_std": 0.45913559198379517,
"beta_dpo/loss_margin_mean": 0.02491551637649536,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.09070294784580499,
"grad_norm": 21.797914505004883,
"learning_rate": 4.4029850746268654e-07,
"logits/chosen": 1.4170093536376953,
"logits/rejected": 1.4231207370758057,
"loss": 1.3758,
"step": 60
},
{
"beta_dpo/beta": 0.09843544661998749,
"beta_dpo/beta_margin_grad_mean": -0.49705368280410767,
"beta_dpo/beta_margin_grad_std": 0.012809173204004765,
"beta_dpo/beta_margin_mean": 0.01179733220487833,
"beta_dpo/beta_margin_std": 0.051275238394737244,
"beta_dpo/beta_used": 0.09843544661998749,
"beta_dpo/beta_used_raw": 0.09843544661998749,
"beta_dpo/gap_mean": 0.05880650877952576,
"beta_dpo/gap_std": 0.46677011251449585,
"beta_dpo/loss_margin_mean": 0.1198408454656601,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.09221466364323508,
"grad_norm": 20.4912166595459,
"learning_rate": 4.4776119402985074e-07,
"logits/chosen": 1.6209700107574463,
"logits/rejected": 1.6182749271392822,
"loss": 1.3833,
"step": 61
},
{
"beta_dpo/beta": 0.09592962265014648,
"beta_dpo/beta_margin_grad_mean": -0.49833908677101135,
"beta_dpo/beta_margin_grad_std": 0.013690280728042126,
"beta_dpo/beta_margin_mean": 0.006649685092270374,
"beta_dpo/beta_margin_std": 0.05479509010910988,
"beta_dpo/beta_used": 0.09592962265014648,
"beta_dpo/beta_used_raw": 0.09592962265014648,
"beta_dpo/gap_mean": 0.06314882636070251,
"beta_dpo/gap_std": 0.4870232939720154,
"beta_dpo/loss_margin_mean": 0.06928093731403351,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.09372637944066516,
"grad_norm": 23.574623107910156,
"learning_rate": 4.552238805970149e-07,
"logits/chosen": 1.325179100036621,
"logits/rejected": 1.3118748664855957,
"loss": 1.3872,
"step": 62
},
{
"beta_dpo/beta": 0.10062983632087708,
"beta_dpo/beta_margin_grad_mean": -0.49723294377326965,
"beta_dpo/beta_margin_grad_std": 0.011747188866138458,
"beta_dpo/beta_margin_mean": 0.011074798181653023,
"beta_dpo/beta_margin_std": 0.047014713287353516,
"beta_dpo/beta_used": 0.10062983632087708,
"beta_dpo/beta_used_raw": 0.10062983632087708,
"beta_dpo/gap_mean": 0.07177233695983887,
"beta_dpo/gap_std": 0.48838692903518677,
"beta_dpo/loss_margin_mean": 0.1106141209602356,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.09523809523809523,
"grad_norm": 24.619401931762695,
"learning_rate": 4.626865671641791e-07,
"logits/chosen": 1.6953513622283936,
"logits/rejected": 1.6481925249099731,
"loss": 1.3783,
"step": 63
},
{
"beta_dpo/beta": 0.09594196081161499,
"beta_dpo/beta_margin_grad_mean": -0.4999556839466095,
"beta_dpo/beta_margin_grad_std": 0.012721442617475986,
"beta_dpo/beta_margin_mean": 0.0001893570297397673,
"beta_dpo/beta_margin_std": 0.05095384269952774,
"beta_dpo/beta_used": 0.09594196081161499,
"beta_dpo/beta_used_raw": 0.09594196081161499,
"beta_dpo/gap_mean": 0.061939239501953125,
"beta_dpo/gap_std": 0.5000776648521423,
"beta_dpo/loss_margin_mean": 0.002007901668548584,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.09674981103552532,
"grad_norm": 19.409799575805664,
"learning_rate": 4.701492537313433e-07,
"logits/chosen": 1.5105648040771484,
"logits/rejected": 1.4828753471374512,
"loss": 1.3871,
"step": 64
},
{
"beta_dpo/beta": 0.10893698036670685,
"beta_dpo/beta_margin_grad_mean": -0.4937191307544708,
"beta_dpo/beta_margin_grad_std": 0.01706167496740818,
"beta_dpo/beta_margin_mean": 0.025155218318104744,
"beta_dpo/beta_margin_std": 0.06835649907588959,
"beta_dpo/beta_used": 0.10893698036670685,
"beta_dpo/beta_used_raw": 0.10893698036670685,
"beta_dpo/gap_mean": 0.084006167948246,
"beta_dpo/gap_std": 0.5160300731658936,
"beta_dpo/loss_margin_mean": 0.23087024688720703,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.0982615268329554,
"grad_norm": 24.889301300048828,
"learning_rate": 4.776119402985074e-07,
"logits/chosen": 1.556587815284729,
"logits/rejected": 1.5368375778198242,
"loss": 1.3616,
"step": 65
},
{
"beta_dpo/beta": 0.10014477372169495,
"beta_dpo/beta_margin_grad_mean": -0.49655643105506897,
"beta_dpo/beta_margin_grad_std": 0.015136976726353168,
"beta_dpo/beta_margin_mean": 0.013793686404824257,
"beta_dpo/beta_margin_std": 0.06063022464513779,
"beta_dpo/beta_used": 0.10014477372169495,
"beta_dpo/beta_used_raw": 0.10014477372169495,
"beta_dpo/gap_mean": 0.09994551539421082,
"beta_dpo/gap_std": 0.5345540642738342,
"beta_dpo/loss_margin_mean": 0.13528499007225037,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.09977324263038549,
"grad_norm": 23.565357208251953,
"learning_rate": 4.850746268656717e-07,
"logits/chosen": 1.4598450660705566,
"logits/rejected": 1.4400157928466797,
"loss": 1.376,
"step": 66
},
{
"beta_dpo/beta": 0.09859488904476166,
"beta_dpo/beta_margin_grad_mean": -0.49907803535461426,
"beta_dpo/beta_margin_grad_std": 0.011991067789494991,
"beta_dpo/beta_margin_mean": 0.0036908036563545465,
"beta_dpo/beta_margin_std": 0.04799149930477142,
"beta_dpo/beta_used": 0.09859488904476166,
"beta_dpo/beta_used_raw": 0.09859488904476166,
"beta_dpo/gap_mean": 0.08958867192268372,
"beta_dpo/gap_std": 0.5328190922737122,
"beta_dpo/loss_margin_mean": 0.03765749931335449,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.10128495842781557,
"grad_norm": 25.641630172729492,
"learning_rate": 4.925373134328357e-07,
"logits/chosen": 1.3621433973312378,
"logits/rejected": 1.3432750701904297,
"loss": 1.3801,
"step": 67
},
{
"beta_dpo/beta": 0.09739673137664795,
"beta_dpo/beta_margin_grad_mean": -0.49787405133247375,
"beta_dpo/beta_margin_grad_std": 0.014542591758072376,
"beta_dpo/beta_margin_mean": 0.008516059257090092,
"beta_dpo/beta_margin_std": 0.05821725353598595,
"beta_dpo/beta_used": 0.09739673137664795,
"beta_dpo/beta_used_raw": 0.09739673137664795,
"beta_dpo/gap_mean": 0.08840759843587875,
"beta_dpo/gap_std": 0.5416070222854614,
"beta_dpo/loss_margin_mean": 0.08617928624153137,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.10279667422524566,
"grad_norm": 26.215946197509766,
"learning_rate": 5e-07,
"logits/chosen": 1.4207310676574707,
"logits/rejected": 1.3847835063934326,
"loss": 1.382,
"step": 68
},
{
"beta_dpo/beta": 0.11089831590652466,
"beta_dpo/beta_margin_grad_mean": -0.4936300218105316,
"beta_dpo/beta_margin_grad_std": 0.020087001845240593,
"beta_dpo/beta_margin_mean": 0.02549654059112072,
"beta_dpo/beta_margin_std": 0.08050806075334549,
"beta_dpo/beta_used": 0.11089831590652466,
"beta_dpo/beta_used_raw": 0.11089831590652466,
"beta_dpo/gap_mean": 0.10114619880914688,
"beta_dpo/gap_std": 0.5690314769744873,
"beta_dpo/loss_margin_mean": 0.21953517198562622,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.10430839002267574,
"grad_norm": 25.708833694458008,
"learning_rate": 4.999965034812934e-07,
"logits/chosen": 1.373316764831543,
"logits/rejected": 1.3507366180419922,
"loss": 1.3542,
"step": 69
},
{
"beta_dpo/beta": 0.105964794754982,
"beta_dpo/beta_margin_grad_mean": -0.49599337577819824,
"beta_dpo/beta_margin_grad_std": 0.016764765605330467,
"beta_dpo/beta_margin_mean": 0.016038598492741585,
"beta_dpo/beta_margin_std": 0.06713969260454178,
"beta_dpo/beta_used": 0.105964794754982,
"beta_dpo/beta_used_raw": 0.105964794754982,
"beta_dpo/gap_mean": 0.11604620516300201,
"beta_dpo/gap_std": 0.5841151475906372,
"beta_dpo/loss_margin_mean": 0.150816410779953,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.10582010582010581,
"grad_norm": 24.138580322265625,
"learning_rate": 4.999860140229787e-07,
"logits/chosen": 1.431084156036377,
"logits/rejected": 1.4090873003005981,
"loss": 1.3641,
"step": 70
},
{
"beta_dpo/beta": 0.09979788959026337,
"beta_dpo/beta_margin_grad_mean": -0.49800625443458557,
"beta_dpo/beta_margin_grad_std": 0.01896088756620884,
"beta_dpo/beta_margin_mean": 0.007973305881023407,
"beta_dpo/beta_margin_std": 0.07597094774246216,
"beta_dpo/beta_used": 0.09979788959026337,
"beta_dpo/beta_used_raw": 0.09979788959026337,
"beta_dpo/gap_mean": 0.11453382670879364,
"beta_dpo/gap_std": 0.6030242443084717,
"beta_dpo/loss_margin_mean": 0.07938975095748901,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1073318216175359,
"grad_norm": 22.322416305541992,
"learning_rate": 4.999685319184688e-07,
"logits/chosen": 1.312930703163147,
"logits/rejected": 1.324812412261963,
"loss": 1.3757,
"step": 71
},
{
"beta_dpo/beta": 0.1114841103553772,
"beta_dpo/beta_margin_grad_mean": -0.4935649633407593,
"beta_dpo/beta_margin_grad_std": 0.018391672521829605,
"beta_dpo/beta_margin_mean": 0.0257643461227417,
"beta_dpo/beta_margin_std": 0.07371597737073898,
"beta_dpo/beta_used": 0.1114841103553772,
"beta_dpo/beta_used_raw": 0.1114841103553772,
"beta_dpo/gap_mean": 0.13404789566993713,
"beta_dpo/gap_std": 0.6270595192909241,
"beta_dpo/loss_margin_mean": 0.23163816332817078,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.10884353741496598,
"grad_norm": 27.61007308959961,
"learning_rate": 4.999440576567755e-07,
"logits/chosen": 1.412034273147583,
"logits/rejected": 1.3404237031936646,
"loss": 1.3509,
"step": 72
},
{
"beta_dpo/beta": 0.09717012196779251,
"beta_dpo/beta_margin_grad_mean": -0.49801793694496155,
"beta_dpo/beta_margin_grad_std": 0.017343247309327126,
"beta_dpo/beta_margin_mean": 0.007944438606500626,
"beta_dpo/beta_margin_std": 0.06946875154972076,
"beta_dpo/beta_used": 0.09717012196779251,
"beta_dpo/beta_used_raw": 0.09717012196779251,
"beta_dpo/gap_mean": 0.12657299637794495,
"beta_dpo/gap_std": 0.6361806392669678,
"beta_dpo/loss_margin_mean": 0.08220607042312622,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.11035525321239607,
"grad_norm": 23.838298797607422,
"learning_rate": 4.999125919224965e-07,
"logits/chosen": 1.5537290573120117,
"logits/rejected": 1.5287607908248901,
"loss": 1.3792,
"step": 73
},
{
"beta_dpo/beta": 0.1059919074177742,
"beta_dpo/beta_margin_grad_mean": -0.4929755628108978,
"beta_dpo/beta_margin_grad_std": 0.017454126849770546,
"beta_dpo/beta_margin_mean": 0.028127092868089676,
"beta_dpo/beta_margin_std": 0.06989695876836777,
"beta_dpo/beta_used": 0.1059919074177742,
"beta_dpo/beta_used_raw": 0.1059919074177742,
"beta_dpo/gap_mean": 0.13354957103729248,
"beta_dpo/gap_std": 0.6440489292144775,
"beta_dpo/loss_margin_mean": 0.24771931767463684,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.11186696900982615,
"grad_norm": 24.128093719482422,
"learning_rate": 4.998741355957963e-07,
"logits/chosen": 1.481793999671936,
"logits/rejected": 1.3946576118469238,
"loss": 1.3613,
"step": 74
},
{
"beta_dpo/beta": 0.10442506521940231,
"beta_dpo/beta_margin_grad_mean": -0.49443042278289795,
"beta_dpo/beta_margin_grad_std": 0.019393526017665863,
"beta_dpo/beta_margin_mean": 0.02230740524828434,
"beta_dpo/beta_margin_std": 0.0777261033654213,
"beta_dpo/beta_used": 0.10442506521940231,
"beta_dpo/beta_used_raw": 0.10442506521940231,
"beta_dpo/gap_mean": 0.16668199002742767,
"beta_dpo/gap_std": 0.651796281337738,
"beta_dpo/loss_margin_mean": 0.21053069829940796,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.11337868480725624,
"grad_norm": 22.620519638061523,
"learning_rate": 4.998286897523808e-07,
"logits/chosen": 1.4410107135772705,
"logits/rejected": 1.3761870861053467,
"loss": 1.3621,
"step": 75
},
{
"beta_dpo/beta": 0.09164533764123917,
"beta_dpo/beta_margin_grad_mean": -0.4942088723182678,
"beta_dpo/beta_margin_grad_std": 0.016309738159179688,
"beta_dpo/beta_margin_mean": 0.02321462146937847,
"beta_dpo/beta_margin_std": 0.06536100059747696,
"beta_dpo/beta_used": 0.09164533764123917,
"beta_dpo/beta_used_raw": 0.09164533764123917,
"beta_dpo/gap_mean": 0.17394113540649414,
"beta_dpo/gap_std": 0.6712378263473511,
"beta_dpo/loss_margin_mean": 0.253339558839798,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.11489040060468632,
"grad_norm": 20.620342254638672,
"learning_rate": 4.997762556634679e-07,
"logits/chosen": 1.3254311084747314,
"logits/rejected": 1.2652392387390137,
"loss": 1.3836,
"step": 76
},
{
"beta_dpo/beta": 0.10088081657886505,
"beta_dpo/beta_margin_grad_mean": -0.4943234622478485,
"beta_dpo/beta_margin_grad_std": 0.019380716606974602,
"beta_dpo/beta_margin_mean": 0.02274668589234352,
"beta_dpo/beta_margin_std": 0.07762499898672104,
"beta_dpo/beta_used": 0.10088081657886505,
"beta_dpo/beta_used_raw": 0.10088081657886505,
"beta_dpo/gap_mean": 0.1924261748790741,
"beta_dpo/gap_std": 0.6849093437194824,
"beta_dpo/loss_margin_mean": 0.21507787704467773,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1164021164021164,
"grad_norm": 23.49284553527832,
"learning_rate": 4.99716834795752e-07,
"logits/chosen": 1.4381937980651855,
"logits/rejected": 1.39052152633667,
"loss": 1.3661,
"step": 77
},
{
"beta_dpo/beta": 0.09779095649719238,
"beta_dpo/beta_margin_grad_mean": -0.49503323435783386,
"beta_dpo/beta_margin_grad_std": 0.02156643010675907,
"beta_dpo/beta_margin_mean": 0.019915712997317314,
"beta_dpo/beta_margin_std": 0.08644842356443405,
"beta_dpo/beta_used": 0.09779095649719238,
"beta_dpo/beta_used_raw": 0.09779095649719238,
"beta_dpo/gap_mean": 0.181630477309227,
"beta_dpo/gap_std": 0.7211419343948364,
"beta_dpo/loss_margin_mean": 0.19882404804229736,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.11791383219954649,
"grad_norm": 22.498376846313477,
"learning_rate": 4.996504288113623e-07,
"logits/chosen": 1.4062104225158691,
"logits/rejected": 1.3975510597229004,
"loss": 1.3727,
"step": 78
},
{
"beta_dpo/beta": 0.09830057621002197,
"beta_dpo/beta_margin_grad_mean": -0.49084609746932983,
"beta_dpo/beta_margin_grad_std": 0.023657534271478653,
"beta_dpo/beta_margin_mean": 0.036744583398103714,
"beta_dpo/beta_margin_std": 0.09499835968017578,
"beta_dpo/beta_used": 0.09830057621002197,
"beta_dpo/beta_used_raw": 0.09830057621002197,
"beta_dpo/gap_mean": 0.2163337767124176,
"beta_dpo/gap_std": 0.7629668712615967,
"beta_dpo/loss_margin_mean": 0.36694100499153137,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.11942554799697656,
"grad_norm": 22.278594970703125,
"learning_rate": 4.995770395678171e-07,
"logits/chosen": 1.692272424697876,
"logits/rejected": 1.5986158847808838,
"loss": 1.3676,
"step": 79
},
{
"beta_dpo/beta": 0.09350171685218811,
"beta_dpo/beta_margin_grad_mean": -0.49590837955474854,
"beta_dpo/beta_margin_grad_std": 0.02149679884314537,
"beta_dpo/beta_margin_mean": 0.016413187608122826,
"beta_dpo/beta_margin_std": 0.08623309433460236,
"beta_dpo/beta_used": 0.09350171685218811,
"beta_dpo/beta_used_raw": 0.09350171685218811,
"beta_dpo/gap_mean": 0.21665816009044647,
"beta_dpo/gap_std": 0.7902263402938843,
"beta_dpo/loss_margin_mean": 0.17878860235214233,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.12093726379440665,
"grad_norm": 20.826610565185547,
"learning_rate": 4.994966691179711e-07,
"logits/chosen": 1.4082047939300537,
"logits/rejected": 1.3100817203521729,
"loss": 1.3765,
"step": 80
},
{
"beta_dpo/beta": 0.09851119667291641,
"beta_dpo/beta_margin_grad_mean": -0.49376627802848816,
"beta_dpo/beta_margin_grad_std": 0.019676726311445236,
"beta_dpo/beta_margin_mean": 0.024973532184958458,
"beta_dpo/beta_margin_std": 0.0788303092122078,
"beta_dpo/beta_used": 0.09851119667291641,
"beta_dpo/beta_used_raw": 0.09851119667291641,
"beta_dpo/gap_mean": 0.2203991711139679,
"beta_dpo/gap_std": 0.8004931211471558,
"beta_dpo/loss_margin_mean": 0.24607722461223602,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.12244897959183673,
"grad_norm": 22.618637084960938,
"learning_rate": 4.994093197099587e-07,
"logits/chosen": 1.1300098896026611,
"logits/rejected": 1.091421365737915,
"loss": 1.3666,
"step": 81
},
{
"beta_dpo/beta": 0.09958788752555847,
"beta_dpo/beta_margin_grad_mean": -0.4900076687335968,
"beta_dpo/beta_margin_grad_std": 0.02183517999947071,
"beta_dpo/beta_margin_mean": 0.04006734862923622,
"beta_dpo/beta_margin_std": 0.08758988231420517,
"beta_dpo/beta_used": 0.09958788752555847,
"beta_dpo/beta_used_raw": 0.09958788752555847,
"beta_dpo/gap_mean": 0.24891814589500427,
"beta_dpo/gap_std": 0.817732036113739,
"beta_dpo/loss_margin_mean": 0.4039810597896576,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.12396069538926682,
"grad_norm": 21.952186584472656,
"learning_rate": 4.993149937871306e-07,
"logits/chosen": 1.499792218208313,
"logits/rejected": 1.4229098558425903,
"loss": 1.3634,
"step": 82
},
{
"beta_dpo/beta": 0.10711533576250076,
"beta_dpo/beta_margin_grad_mean": -0.48857223987579346,
"beta_dpo/beta_margin_grad_std": 0.024558711796998978,
"beta_dpo/beta_margin_mean": 0.04585915803909302,
"beta_dpo/beta_margin_std": 0.09868450462818146,
"beta_dpo/beta_used": 0.10711533576250076,
"beta_dpo/beta_used_raw": 0.10711533576250076,
"beta_dpo/gap_mean": 0.28387853503227234,
"beta_dpo/gap_std": 0.8279096484184265,
"beta_dpo/loss_margin_mean": 0.41550129652023315,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1254724111866969,
"grad_norm": 25.619373321533203,
"learning_rate": 4.992136939879856e-07,
"logits/chosen": 1.5482455492019653,
"logits/rejected": 1.4713327884674072,
"loss": 1.344,
"step": 83
},
{
"beta_dpo/beta": 0.09682896733283997,
"beta_dpo/beta_margin_grad_mean": -0.4948498606681824,
"beta_dpo/beta_margin_grad_std": 0.02205750159919262,
"beta_dpo/beta_margin_mean": 0.020634762942790985,
"beta_dpo/beta_margin_std": 0.08842462301254272,
"beta_dpo/beta_used": 0.09682896733283997,
"beta_dpo/beta_used_raw": 0.09682896733283997,
"beta_dpo/gap_mean": 0.27833741903305054,
"beta_dpo/gap_std": 0.8445614576339722,
"beta_dpo/loss_margin_mean": 0.20018967986106873,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.12698412698412698,
"grad_norm": 23.43607521057129,
"learning_rate": 4.991054231460969e-07,
"logits/chosen": 1.5122936964035034,
"logits/rejected": 1.4564745426177979,
"loss": 1.3651,
"step": 84
},
{
"beta_dpo/beta": 0.1081511527299881,
"beta_dpo/beta_margin_grad_mean": -0.4898848831653595,
"beta_dpo/beta_margin_grad_std": 0.02815152332186699,
"beta_dpo/beta_margin_mean": 0.040507350116968155,
"beta_dpo/beta_margin_std": 0.11305945366621017,
"beta_dpo/beta_used": 0.1081511527299881,
"beta_dpo/beta_used_raw": 0.1081511527299881,
"beta_dpo/gap_mean": 0.27856504917144775,
"beta_dpo/gap_std": 0.8720511198043823,
"beta_dpo/loss_margin_mean": 0.3741960823535919,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.12849584278155707,
"grad_norm": 23.618709564208984,
"learning_rate": 4.989901842900325e-07,
"logits/chosen": 1.4183937311172485,
"logits/rejected": 1.3515069484710693,
"loss": 1.3433,
"step": 85
},
{
"beta_dpo/beta": 0.09931820631027222,
"beta_dpo/beta_margin_grad_mean": -0.49436455965042114,
"beta_dpo/beta_margin_grad_std": 0.023392099887132645,
"beta_dpo/beta_margin_mean": 0.022570841014385223,
"beta_dpo/beta_margin_std": 0.09379469603300095,
"beta_dpo/beta_used": 0.09931820631027222,
"beta_dpo/beta_used_raw": 0.09931820631027222,
"beta_dpo/gap_mean": 0.27649736404418945,
"beta_dpo/gap_std": 0.896049976348877,
"beta_dpo/loss_margin_mean": 0.22701409459114075,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.13000755857898716,
"grad_norm": 22.559938430786133,
"learning_rate": 4.988679806432711e-07,
"logits/chosen": 1.6345174312591553,
"logits/rejected": 1.585810661315918,
"loss": 1.3613,
"step": 86
},
{
"beta_dpo/beta": 0.10324035584926605,
"beta_dpo/beta_margin_grad_mean": -0.4915555417537689,
"beta_dpo/beta_margin_grad_std": 0.02729521505534649,
"beta_dpo/beta_margin_mean": 0.03396186605095863,
"beta_dpo/beta_margin_std": 0.10979495197534561,
"beta_dpo/beta_used": 0.10324035584926605,
"beta_dpo/beta_used_raw": 0.10324035584926605,
"beta_dpo/gap_mean": 0.27984869480133057,
"beta_dpo/gap_std": 0.9216774106025696,
"beta_dpo/loss_margin_mean": 0.3301246166229248,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.13151927437641722,
"grad_norm": 24.750186920166016,
"learning_rate": 4.987388156241114e-07,
"logits/chosen": 1.4320831298828125,
"logits/rejected": 1.3352614641189575,
"loss": 1.3535,
"step": 87
},
{
"beta_dpo/beta": 0.10066162049770355,
"beta_dpo/beta_margin_grad_mean": -0.4947110414505005,
"beta_dpo/beta_margin_grad_std": 0.026083989068865776,
"beta_dpo/beta_margin_mean": 0.02120215632021427,
"beta_dpo/beta_margin_std": 0.10460641980171204,
"beta_dpo/beta_used": 0.10066162049770355,
"beta_dpo/beta_used_raw": 0.10066162049770355,
"beta_dpo/gap_mean": 0.2821298837661743,
"beta_dpo/gap_std": 0.94524085521698,
"beta_dpo/loss_margin_mean": 0.18504422903060913,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1330309901738473,
"grad_norm": 23.80083656311035,
"learning_rate": 4.986026928455767e-07,
"logits/chosen": 1.5004366636276245,
"logits/rejected": 1.4935534000396729,
"loss": 1.3568,
"step": 88
},
{
"beta_dpo/beta": 0.09679665416479111,
"beta_dpo/beta_margin_grad_mean": -0.48473596572875977,
"beta_dpo/beta_margin_grad_std": 0.028986340388655663,
"beta_dpo/beta_margin_mean": 0.06136619672179222,
"beta_dpo/beta_margin_std": 0.11661099642515182,
"beta_dpo/beta_used": 0.09679665416479111,
"beta_dpo/beta_used_raw": 0.09679665416479111,
"beta_dpo/gap_mean": 0.3047249913215637,
"beta_dpo/gap_std": 0.9839344024658203,
"beta_dpo/loss_margin_mean": 0.6256879568099976,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1345427059712774,
"grad_norm": 19.98080825805664,
"learning_rate": 4.984596161153135e-07,
"logits/chosen": 1.4730298519134521,
"logits/rejected": 1.3510327339172363,
"loss": 1.3634,
"step": 89
},
{
"beta_dpo/beta": 0.10261310636997223,
"beta_dpo/beta_margin_grad_mean": -0.48896563053131104,
"beta_dpo/beta_margin_grad_std": 0.028216810896992683,
"beta_dpo/beta_margin_mean": 0.04428347200155258,
"beta_dpo/beta_margin_std": 0.11333856731653214,
"beta_dpo/beta_used": 0.10261310636997223,
"beta_dpo/beta_used_raw": 0.10261310636997223,
"beta_dpo/gap_mean": 0.35536807775497437,
"beta_dpo/gap_std": 1.0184149742126465,
"beta_dpo/loss_margin_mean": 0.4309338331222534,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1360544217687075,
"grad_norm": 24.94264030456543,
"learning_rate": 4.983095894354857e-07,
"logits/chosen": 1.406229019165039,
"logits/rejected": 1.3163142204284668,
"loss": 1.3475,
"step": 90
},
{
"beta_dpo/beta": 0.11208349466323853,
"beta_dpo/beta_margin_grad_mean": -0.48447299003601074,
"beta_dpo/beta_margin_grad_std": 0.033066846430301666,
"beta_dpo/beta_margin_mean": 0.06240526959300041,
"beta_dpo/beta_margin_std": 0.13280640542507172,
"beta_dpo/beta_used": 0.11208349466323853,
"beta_dpo/beta_used_raw": 0.11208349466323853,
"beta_dpo/gap_mean": 0.3759007453918457,
"beta_dpo/gap_std": 1.0446383953094482,
"beta_dpo/loss_margin_mean": 0.5622912049293518,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.13756613756613756,
"grad_norm": 24.20952033996582,
"learning_rate": 4.98152617002662e-07,
"logits/chosen": 1.263109803199768,
"logits/rejected": 1.2032928466796875,
"loss": 1.326,
"step": 91
},
{
"beta_dpo/beta": 0.09759242832660675,
"beta_dpo/beta_margin_grad_mean": -0.491456538438797,
"beta_dpo/beta_margin_grad_std": 0.03118491731584072,
"beta_dpo/beta_margin_mean": 0.03435541316866875,
"beta_dpo/beta_margin_std": 0.12529778480529785,
"beta_dpo/beta_used": 0.09759242832660675,
"beta_dpo/beta_used_raw": 0.09759242832660675,
"beta_dpo/gap_mean": 0.38296887278556824,
"beta_dpo/gap_std": 1.0825082063674927,
"beta_dpo/loss_margin_mean": 0.3481322228908539,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.13907785336356765,
"grad_norm": 23.6805362701416,
"learning_rate": 4.979887032076988e-07,
"logits/chosen": 1.3866524696350098,
"logits/rejected": 1.3360724449157715,
"loss": 1.3553,
"step": 92
},
{
"beta_dpo/beta": 0.09923055022954941,
"beta_dpo/beta_margin_grad_mean": -0.4930683970451355,
"beta_dpo/beta_margin_grad_std": 0.0345403254032135,
"beta_dpo/beta_margin_mean": 0.027934642508625984,
"beta_dpo/beta_margin_std": 0.1390235722064972,
"beta_dpo/beta_used": 0.09923055022954941,
"beta_dpo/beta_used_raw": 0.09923055022954941,
"beta_dpo/gap_mean": 0.36329329013824463,
"beta_dpo/gap_std": 1.1469428539276123,
"beta_dpo/loss_margin_mean": 0.2789202034473419,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.14058956916099774,
"grad_norm": 20.091751098632812,
"learning_rate": 4.978178526356172e-07,
"logits/chosen": 1.453279733657837,
"logits/rejected": 1.4085452556610107,
"loss": 1.3544,
"step": 93
},
{
"beta_dpo/beta": 0.11446872353553772,
"beta_dpo/beta_margin_grad_mean": -0.4835643172264099,
"beta_dpo/beta_margin_grad_std": 0.04806080833077431,
"beta_dpo/beta_margin_mean": 0.06689022481441498,
"beta_dpo/beta_margin_std": 0.19625984132289886,
"beta_dpo/beta_used": 0.11446872353553772,
"beta_dpo/beta_used_raw": 0.11446872353553772,
"beta_dpo/gap_mean": 0.39031505584716797,
"beta_dpo/gap_std": 1.2319457530975342,
"beta_dpo/loss_margin_mean": 0.5888001322746277,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1421012849584278,
"grad_norm": 25.610767364501953,
"learning_rate": 4.976400700654751e-07,
"logits/chosen": 1.3520464897155762,
"logits/rejected": 1.2797930240631104,
"loss": 1.3178,
"step": 94
},
{
"beta_dpo/beta": 0.10418045520782471,
"beta_dpo/beta_margin_grad_mean": -0.48502302169799805,
"beta_dpo/beta_margin_grad_std": 0.043376799672842026,
"beta_dpo/beta_margin_mean": 0.06061091274023056,
"beta_dpo/beta_margin_std": 0.17566770315170288,
"beta_dpo/beta_used": 0.10418045520782471,
"beta_dpo/beta_used_raw": 0.10418045520782471,
"beta_dpo/gap_mean": 0.44323962926864624,
"beta_dpo/gap_std": 1.3183009624481201,
"beta_dpo/loss_margin_mean": 0.5283056497573853,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1436130007558579,
"grad_norm": 24.98712158203125,
"learning_rate": 4.974553604702332e-07,
"logits/chosen": 1.3048655986785889,
"logits/rejected": 1.2389471530914307,
"loss": 1.3326,
"step": 95
},
{
"beta_dpo/beta": 0.0894891619682312,
"beta_dpo/beta_margin_grad_mean": -0.4840312898159027,
"beta_dpo/beta_margin_grad_std": 0.0363737978041172,
"beta_dpo/beta_margin_mean": 0.06439025700092316,
"beta_dpo/beta_margin_std": 0.14664606750011444,
"beta_dpo/beta_used": 0.0894891619682312,
"beta_dpo/beta_used_raw": 0.0894891619682312,
"beta_dpo/gap_mean": 0.46237361431121826,
"beta_dpo/gap_std": 1.373389720916748,
"beta_dpo/loss_margin_mean": 0.6972731351852417,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.14512471655328799,
"grad_norm": 19.369220733642578,
"learning_rate": 4.972637290166157e-07,
"logits/chosen": 1.2695260047912598,
"logits/rejected": 1.2192683219909668,
"loss": 1.3603,
"step": 96
},
{
"beta_dpo/beta": 0.08789724111557007,
"beta_dpo/beta_margin_grad_mean": -0.4935567378997803,
"beta_dpo/beta_margin_grad_std": 0.033925559371709824,
"beta_dpo/beta_margin_mean": 0.025890527293086052,
"beta_dpo/beta_margin_std": 0.1364428550004959,
"beta_dpo/beta_used": 0.08789724111557007,
"beta_dpo/beta_used_raw": 0.08789724111557007,
"beta_dpo/gap_mean": 0.45890724658966064,
"beta_dpo/gap_std": 1.4099913835525513,
"beta_dpo/loss_margin_mean": 0.2942034900188446,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.14663643235071808,
"grad_norm": 20.669422149658203,
"learning_rate": 4.970651810649666e-07,
"logits/chosen": 1.289090633392334,
"logits/rejected": 1.2603429555892944,
"loss": 1.3666,
"step": 97
},
{
"beta_dpo/beta": 0.10570737719535828,
"beta_dpo/beta_margin_grad_mean": -0.4919486939907074,
"beta_dpo/beta_margin_grad_std": 0.03829289227724075,
"beta_dpo/beta_margin_mean": 0.03243134915828705,
"beta_dpo/beta_margin_std": 0.15482714772224426,
"beta_dpo/beta_used": 0.10570737719535828,
"beta_dpo/beta_used_raw": 0.10570737719535828,
"beta_dpo/gap_mean": 0.4299718737602234,
"beta_dpo/gap_std": 1.4265832901000977,
"beta_dpo/loss_margin_mean": 0.3066960573196411,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.14814814814814814,
"grad_norm": 22.59371566772461,
"learning_rate": 4.968597221690985e-07,
"logits/chosen": 1.1470067501068115,
"logits/rejected": 1.1468849182128906,
"loss": 1.3348,
"step": 98
},
{
"beta_dpo/beta": 0.0829615443944931,
"beta_dpo/beta_margin_grad_mean": -0.4901537597179413,
"beta_dpo/beta_margin_grad_std": 0.04067037254571915,
"beta_dpo/beta_margin_mean": 0.04001061990857124,
"beta_dpo/beta_margin_std": 0.16489112377166748,
"beta_dpo/beta_used": 0.0829615443944931,
"beta_dpo/beta_used_raw": 0.0829615443944931,
"beta_dpo/gap_mean": 0.4272018373012543,
"beta_dpo/gap_std": 1.4833000898361206,
"beta_dpo/loss_margin_mean": 0.4821007251739502,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.14965986394557823,
"grad_norm": 20.694110870361328,
"learning_rate": 4.966473580761389e-07,
"logits/chosen": 1.3811016082763672,
"logits/rejected": 1.3564668893814087,
"loss": 1.3759,
"step": 99
},
{
"beta_dpo/beta": 0.09042062610387802,
"beta_dpo/beta_margin_grad_mean": -0.49155348539352417,
"beta_dpo/beta_margin_grad_std": 0.04488116875290871,
"beta_dpo/beta_margin_mean": 0.03428267315030098,
"beta_dpo/beta_margin_std": 0.18191133439540863,
"beta_dpo/beta_used": 0.09042062610387802,
"beta_dpo/beta_used_raw": 0.09042062610387802,
"beta_dpo/gap_mean": 0.41866934299468994,
"beta_dpo/gap_std": 1.5786731243133545,
"beta_dpo/loss_margin_mean": 0.38769927620887756,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.15117157974300832,
"grad_norm": 22.64974594116211,
"learning_rate": 4.964280947263676e-07,
"logits/chosen": 1.245833396911621,
"logits/rejected": 1.2450430393218994,
"loss": 1.3654,
"step": 100
},
{
"epoch": 0.15117157974300832,
"eval_beta_dpo/beta": 0.11095979809761047,
"eval_beta_dpo/beta_margin_grad_mean": -0.48243069648742676,
"eval_beta_dpo/beta_margin_grad_std": 0.052106164395809174,
"eval_beta_dpo/beta_margin_mean": 0.07168044149875641,
"eval_beta_dpo/beta_margin_std": 0.21300600469112396,
"eval_beta_dpo/beta_used": 0.11095979809761047,
"eval_beta_dpo/beta_used_raw": 0.11095979809761047,
"eval_beta_dpo/gap_mean": 0.4233362674713135,
"eval_beta_dpo/gap_std": 1.6301106214523315,
"eval_beta_dpo/loss_margin_mean": 0.6059994697570801,
"eval_beta_dpo/mask_keep_frac": 1.0,
"eval_logits/chosen": 1.3013670444488525,
"eval_logits/rejected": 1.2542670965194702,
"eval_loss": 0.6640572547912598,
"eval_runtime": 43.5006,
"eval_samples_per_second": 52.942,
"eval_steps_per_second": 1.655,
"step": 100
},
{
"beta_dpo/beta": 0.11204126477241516,
"beta_dpo/beta_margin_grad_mean": -0.48013564944267273,
"beta_dpo/beta_margin_grad_std": 0.042587053030729294,
"beta_dpo/beta_margin_mean": 0.07981999963521957,
"beta_dpo/beta_margin_std": 0.1719403713941574,
"beta_dpo/beta_used": 0.11204126477241516,
"beta_dpo/beta_used_raw": 0.11204126477241516,
"beta_dpo/gap_mean": 0.4711691737174988,
"beta_dpo/gap_std": 1.609398365020752,
"beta_dpo/loss_margin_mean": 0.7173241376876831,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.15268329554043839,
"grad_norm": 23.564899444580078,
"learning_rate": 4.96201938253052e-07,
"logits/chosen": 1.1492784023284912,
"logits/rejected": 1.1351875066757202,
"loss": 1.3168,
"step": 101
},
{
"beta_dpo/beta": 0.08911336958408356,
"beta_dpo/beta_margin_grad_mean": -0.490958571434021,
"beta_dpo/beta_margin_grad_std": 0.04404524341225624,
"beta_dpo/beta_margin_mean": 0.03683772310614586,
"beta_dpo/beta_margin_std": 0.17854470014572144,
"beta_dpo/beta_used": 0.08911336958408356,
"beta_dpo/beta_used_raw": 0.08911336958408356,
"beta_dpo/gap_mean": 0.46106693148612976,
"beta_dpo/gap_std": 1.6647722721099854,
"beta_dpo/loss_margin_mean": 0.4219280481338501,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.15419501133786848,
"grad_norm": 20.356643676757812,
"learning_rate": 4.959688949822748e-07,
"logits/chosen": 1.333066463470459,
"logits/rejected": 1.2851324081420898,
"loss": 1.3646,
"step": 102
},
{
"beta_dpo/beta": 0.10101747512817383,
"beta_dpo/beta_margin_grad_mean": -0.4781191647052765,
"beta_dpo/beta_margin_grad_std": 0.04582774639129639,
"beta_dpo/beta_margin_mean": 0.0886739194393158,
"beta_dpo/beta_margin_std": 0.18615014851093292,
"beta_dpo/beta_used": 0.10101747512817383,
"beta_dpo/beta_used_raw": 0.10101747512817383,
"beta_dpo/gap_mean": 0.5251193046569824,
"beta_dpo/gap_std": 1.7195401191711426,
"beta_dpo/loss_margin_mean": 0.8807109594345093,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.15570672713529857,
"grad_norm": 25.940797805786133,
"learning_rate": 4.957289714327572e-07,
"logits/chosen": 1.4399843215942383,
"logits/rejected": 1.4258053302764893,
"loss": 1.3361,
"step": 103
},
{
"beta_dpo/beta": 0.1078379899263382,
"beta_dpo/beta_margin_grad_mean": -0.47406965494155884,
"beta_dpo/beta_margin_grad_std": 0.053313929587602615,
"beta_dpo/beta_margin_mean": 0.1053386926651001,
"beta_dpo/beta_margin_std": 0.2173088937997818,
"beta_dpo/beta_used": 0.1078379899263382,
"beta_dpo/beta_used_raw": 0.1078379899263382,
"beta_dpo/gap_mean": 0.6082203388214111,
"beta_dpo/gap_std": 1.7673068046569824,
"beta_dpo/loss_margin_mean": 1.0192296504974365,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.15721844293272866,
"grad_norm": 27.010522842407227,
"learning_rate": 4.954821743156767e-07,
"logits/chosen": 1.3672761917114258,
"logits/rejected": 1.2913395166397095,
"loss": 1.3068,
"step": 104
},
{
"beta_dpo/beta": 0.11157245934009552,
"beta_dpo/beta_margin_grad_mean": -0.47606605291366577,
"beta_dpo/beta_margin_grad_std": 0.061335984617471695,
"beta_dpo/beta_margin_mean": 0.0973881185054779,
"beta_dpo/beta_margin_std": 0.24996986985206604,
"beta_dpo/beta_used": 0.11157245934009552,
"beta_dpo/beta_used_raw": 0.11157245934009552,
"beta_dpo/gap_mean": 0.680145263671875,
"beta_dpo/gap_std": 1.9032455682754517,
"beta_dpo/loss_margin_mean": 0.8816050291061401,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.15873015873015872,
"grad_norm": 26.681400299072266,
"learning_rate": 4.952285105344791e-07,
"logits/chosen": 1.4183114767074585,
"logits/rejected": 1.3744010925292969,
"loss": 1.2982,
"step": 105
},
{
"beta_dpo/beta": 0.10396211594343185,
"beta_dpo/beta_margin_grad_mean": -0.4831298291683197,
"beta_dpo/beta_margin_grad_std": 0.051441218703985214,
"beta_dpo/beta_margin_mean": 0.06840399652719498,
"beta_dpo/beta_margin_std": 0.20999610424041748,
"beta_dpo/beta_used": 0.10396211594343185,
"beta_dpo/beta_used_raw": 0.10396211594343185,
"beta_dpo/gap_mean": 0.6826244592666626,
"beta_dpo/gap_std": 1.9469351768493652,
"beta_dpo/loss_margin_mean": 0.6595271825790405,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1602418745275888,
"grad_norm": 23.215801239013672,
"learning_rate": 4.949679871846857e-07,
"logits/chosen": 1.3464417457580566,
"logits/rejected": 1.3212090730667114,
"loss": 1.3151,
"step": 106
},
{
"beta_dpo/beta": 0.08311143517494202,
"beta_dpo/beta_margin_grad_mean": -0.48951032757759094,
"beta_dpo/beta_margin_grad_std": 0.04360119625926018,
"beta_dpo/beta_margin_mean": 0.04215170443058014,
"beta_dpo/beta_margin_std": 0.1757575124502182,
"beta_dpo/beta_used": 0.08311143517494202,
"beta_dpo/beta_used_raw": 0.08311143517494202,
"beta_dpo/gap_mean": 0.6624685525894165,
"beta_dpo/gap_std": 1.9580414295196533,
"beta_dpo/loss_margin_mean": 0.48135077953338623,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1617535903250189,
"grad_norm": 21.3165340423584,
"learning_rate": 4.947006115536947e-07,
"logits/chosen": 1.1941533088684082,
"logits/rejected": 1.1890106201171875,
"loss": 1.3599,
"step": 107
},
{
"beta_dpo/beta": 0.10874947905540466,
"beta_dpo/beta_margin_grad_mean": -0.4867684543132782,
"beta_dpo/beta_margin_grad_std": 0.05567330867052078,
"beta_dpo/beta_margin_mean": 0.053287770599126816,
"beta_dpo/beta_margin_std": 0.22567662596702576,
"beta_dpo/beta_used": 0.10874947905540466,
"beta_dpo/beta_used_raw": 0.10874947905540466,
"beta_dpo/gap_mean": 0.6162758469581604,
"beta_dpo/gap_std": 1.9659650325775146,
"beta_dpo/loss_margin_mean": 0.495738685131073,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.16326530612244897,
"grad_norm": 24.595596313476562,
"learning_rate": 4.944263911205772e-07,
"logits/chosen": 1.3917639255523682,
"logits/rejected": 1.3184635639190674,
"loss": 1.3084,
"step": 108
},
{
"beta_dpo/beta": 0.08857216686010361,
"beta_dpo/beta_margin_grad_mean": -0.48171043395996094,
"beta_dpo/beta_margin_grad_std": 0.05316697433590889,
"beta_dpo/beta_margin_mean": 0.07432334870100021,
"beta_dpo/beta_margin_std": 0.2160109281539917,
"beta_dpo/beta_used": 0.08857216686010361,
"beta_dpo/beta_used_raw": 0.08857216686010361,
"beta_dpo/gap_mean": 0.6437417268753052,
"beta_dpo/gap_std": 2.0695106983184814,
"beta_dpo/loss_margin_mean": 0.8359496593475342,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.16477702191987906,
"grad_norm": 19.824125289916992,
"learning_rate": 4.941453335558681e-07,
"logits/chosen": 1.286027431488037,
"logits/rejected": 1.1748815774917603,
"loss": 1.349,
"step": 109
},
{
"beta_dpo/beta": 0.09814047068357468,
"beta_dpo/beta_margin_grad_mean": -0.49335938692092896,
"beta_dpo/beta_margin_grad_std": 0.05850514397025108,
"beta_dpo/beta_margin_mean": 0.026294706389307976,
"beta_dpo/beta_margin_std": 0.2390744686126709,
"beta_dpo/beta_used": 0.09814047068357468,
"beta_dpo/beta_used_raw": 0.09814047068357468,
"beta_dpo/gap_mean": 0.598505973815918,
"beta_dpo/gap_std": 2.148646831512451,
"beta_dpo/loss_margin_mean": 0.26830294728279114,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.16628873771730915,
"grad_norm": 22.9931640625,
"learning_rate": 4.938574467213517e-07,
"logits/chosen": 1.0979554653167725,
"logits/rejected": 1.1433002948760986,
"loss": 1.3376,
"step": 110
},
{
"beta_dpo/beta": 0.10206159949302673,
"beta_dpo/beta_margin_grad_mean": -0.48160073161125183,
"beta_dpo/beta_margin_grad_std": 0.05491115152835846,
"beta_dpo/beta_margin_mean": 0.07462549209594727,
"beta_dpo/beta_margin_std": 0.22261567413806915,
"beta_dpo/beta_used": 0.10206159949302673,
"beta_dpo/beta_used_raw": 0.10206159949302673,
"beta_dpo/gap_mean": 0.5927486419677734,
"beta_dpo/gap_std": 2.1546518802642822,
"beta_dpo/loss_margin_mean": 0.7219003438949585,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.16780045351473924,
"grad_norm": 22.11334800720215,
"learning_rate": 4.935627386698418e-07,
"logits/chosen": 1.544306755065918,
"logits/rejected": 1.4732277393341064,
"loss": 1.3285,
"step": 111
},
{
"beta_dpo/beta": 0.11867986619472504,
"beta_dpo/beta_margin_grad_mean": -0.4607756435871124,
"beta_dpo/beta_margin_grad_std": 0.06654529273509979,
"beta_dpo/beta_margin_mean": 0.16026724874973297,
"beta_dpo/beta_margin_std": 0.272041380405426,
"beta_dpo/beta_used": 0.11867986619472504,
"beta_dpo/beta_used_raw": 0.11867986619472504,
"beta_dpo/gap_mean": 0.720818817615509,
"beta_dpo/gap_std": 2.1644039154052734,
"beta_dpo/loss_margin_mean": 1.338735580444336,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1693121693121693,
"grad_norm": 28.8944091796875,
"learning_rate": 4.932612176449559e-07,
"logits/chosen": 1.4725078344345093,
"logits/rejected": 1.3775207996368408,
"loss": 1.2772,
"step": 112
},
{
"beta_dpo/beta": 0.09883327782154083,
"beta_dpo/beta_margin_grad_mean": -0.48374107480049133,
"beta_dpo/beta_margin_grad_std": 0.062059108167886734,
"beta_dpo/beta_margin_mean": 0.06639490276575089,
"beta_dpo/beta_margin_std": 0.2554260790348053,
"beta_dpo/beta_used": 0.09883327782154083,
"beta_dpo/beta_used_raw": 0.09883327782154083,
"beta_dpo/gap_mean": 0.7182120084762573,
"beta_dpo/gap_std": 2.257500171661377,
"beta_dpo/loss_margin_mean": 0.642180323600769,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1708238851095994,
"grad_norm": 21.073177337646484,
"learning_rate": 4.929528920808854e-07,
"logits/chosen": 1.2748922109603882,
"logits/rejected": 1.260689377784729,
"loss": 1.322,
"step": 113
},
{
"beta_dpo/beta": 0.09910961240530014,
"beta_dpo/beta_margin_grad_mean": -0.481128066778183,
"beta_dpo/beta_margin_grad_std": 0.05786411464214325,
"beta_dpo/beta_margin_mean": 0.0767405554652214,
"beta_dpo/beta_margin_std": 0.23470765352249146,
"beta_dpo/beta_used": 0.09910961240530014,
"beta_dpo/beta_used_raw": 0.09910961240530014,
"beta_dpo/gap_mean": 0.7387478351593018,
"beta_dpo/gap_std": 2.291405200958252,
"beta_dpo/loss_margin_mean": 0.7726707458496094,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.17233560090702948,
"grad_norm": 21.475975036621094,
"learning_rate": 4.92637770602159e-07,
"logits/chosen": 1.1779590845108032,
"logits/rejected": 1.1033458709716797,
"loss": 1.325,
"step": 114
},
{
"beta_dpo/beta": 0.098334401845932,
"beta_dpo/beta_margin_grad_mean": -0.471476286649704,
"beta_dpo/beta_margin_grad_std": 0.06358911842107773,
"beta_dpo/beta_margin_mean": 0.11761815845966339,
"beta_dpo/beta_margin_std": 0.26342275738716125,
"beta_dpo/beta_used": 0.098334401845932,
"beta_dpo/beta_used_raw": 0.098334401845932,
"beta_dpo/gap_mean": 0.8120362758636475,
"beta_dpo/gap_std": 2.3379993438720703,
"beta_dpo/loss_margin_mean": 1.1042755842208862,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.17384731670445955,
"grad_norm": 22.567617416381836,
"learning_rate": 4.923158620234019e-07,
"logits/chosen": 1.1222329139709473,
"logits/rejected": 1.0455198287963867,
"loss": 1.3132,
"step": 115
},
{
"beta_dpo/beta": 0.11336952447891235,
"beta_dpo/beta_margin_grad_mean": -0.4619391858577728,
"beta_dpo/beta_margin_grad_std": 0.06697308272123337,
"beta_dpo/beta_margin_mean": 0.15571817755699158,
"beta_dpo/beta_margin_std": 0.27545690536499023,
"beta_dpo/beta_used": 0.11336952447891235,
"beta_dpo/beta_used_raw": 0.11336952447891235,
"beta_dpo/gap_mean": 0.8705282211303711,
"beta_dpo/gap_std": 2.379244804382324,
"beta_dpo/loss_margin_mean": 1.3704155683517456,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.17535903250188964,
"grad_norm": 25.011146545410156,
"learning_rate": 4.91987175349089e-07,
"logits/chosen": 1.3455551862716675,
"logits/rejected": 1.290403962135315,
"loss": 1.2767,
"step": 116
},
{
"beta_dpo/beta": 0.0890221819281578,
"beta_dpo/beta_margin_grad_mean": -0.47394704818725586,
"beta_dpo/beta_margin_grad_std": 0.059818971902132034,
"beta_dpo/beta_margin_mean": 0.105972059071064,
"beta_dpo/beta_margin_std": 0.24295583367347717,
"beta_dpo/beta_used": 0.0890221819281578,
"beta_dpo/beta_used_raw": 0.0890221819281578,
"beta_dpo/gap_mean": 0.9534369111061096,
"beta_dpo/gap_std": 2.405941963195801,
"beta_dpo/loss_margin_mean": 1.0419647693634033,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.17687074829931973,
"grad_norm": 19.85879898071289,
"learning_rate": 4.916517197732933e-07,
"logits/chosen": 0.9891796708106995,
"logits/rejected": 0.9565305709838867,
"loss": 1.3148,
"step": 117
},
{
"beta_dpo/beta": 0.09003470093011856,
"beta_dpo/beta_margin_grad_mean": -0.4769829511642456,
"beta_dpo/beta_margin_grad_std": 0.0587238110601902,
"beta_dpo/beta_margin_mean": 0.09337636828422546,
"beta_dpo/beta_margin_std": 0.2402600198984146,
"beta_dpo/beta_used": 0.09003470093011856,
"beta_dpo/beta_used_raw": 0.09003470093011856,
"beta_dpo/gap_mean": 0.9489431977272034,
"beta_dpo/gap_std": 2.446256399154663,
"beta_dpo/loss_margin_mean": 1.0371025800704956,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.17838246409674982,
"grad_norm": 21.60437774658203,
"learning_rate": 4.913095046794281e-07,
"logits/chosen": 1.2734606266021729,
"logits/rejected": 1.2298357486724854,
"loss": 1.3131,
"step": 118
},
{
"beta_dpo/beta": 0.0933263748884201,
"beta_dpo/beta_margin_grad_mean": -0.47756442427635193,
"beta_dpo/beta_margin_grad_std": 0.08078356087207794,
"beta_dpo/beta_margin_mean": 0.09172452986240387,
"beta_dpo/beta_margin_std": 0.33637452125549316,
"beta_dpo/beta_used": 0.0933263748884201,
"beta_dpo/beta_used_raw": 0.0933263748884201,
"beta_dpo/gap_mean": 0.931819498538971,
"beta_dpo/gap_std": 2.5663132667541504,
"beta_dpo/loss_margin_mean": 0.9030270576477051,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.17989417989417988,
"grad_norm": 21.077136993408203,
"learning_rate": 4.909605396399855e-07,
"logits/chosen": 1.2811903953552246,
"logits/rejected": 1.2313565015792847,
"loss": 1.3155,
"step": 119
},
{
"beta_dpo/beta": 0.11881925165653229,
"beta_dpo/beta_margin_grad_mean": -0.4537702798843384,
"beta_dpo/beta_margin_grad_std": 0.06741677224636078,
"beta_dpo/beta_margin_mean": 0.18935920298099518,
"beta_dpo/beta_margin_std": 0.2780967950820923,
"beta_dpo/beta_used": 0.11881925165653229,
"beta_dpo/beta_used_raw": 0.11881925165653229,
"beta_dpo/gap_mean": 1.0412273406982422,
"beta_dpo/gap_std": 2.5785160064697266,
"beta_dpo/loss_margin_mean": 1.592591404914856,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.18140589569160998,
"grad_norm": 27.940187454223633,
"learning_rate": 4.906048344162676e-07,
"logits/chosen": 1.5376054048538208,
"logits/rejected": 1.464134693145752,
"loss": 1.2436,
"step": 120
},
{
"beta_dpo/beta": 0.09064137935638428,
"beta_dpo/beta_margin_grad_mean": -0.47120246291160583,
"beta_dpo/beta_margin_grad_std": 0.06724441051483154,
"beta_dpo/beta_margin_mean": 0.11866184324026108,
"beta_dpo/beta_margin_std": 0.27661600708961487,
"beta_dpo/beta_used": 0.09064137935638428,
"beta_dpo/beta_used_raw": 0.09064137935638428,
"beta_dpo/gap_mean": 1.1125645637512207,
"beta_dpo/gap_std": 2.647150993347168,
"beta_dpo/loss_margin_mean": 1.2891712188720703,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.18291761148904007,
"grad_norm": 20.935619354248047,
"learning_rate": 4.902423989581143e-07,
"logits/chosen": 1.4415934085845947,
"logits/rejected": 1.3207588195800781,
"loss": 1.3101,
"step": 121
},
{
"beta_dpo/beta": 0.07106913626194,
"beta_dpo/beta_margin_grad_mean": -0.4810297191143036,
"beta_dpo/beta_margin_grad_std": 0.05604850500822067,
"beta_dpo/beta_margin_mean": 0.07772746682167053,
"beta_dpo/beta_margin_std": 0.22904185950756073,
"beta_dpo/beta_used": 0.07106913626194,
"beta_dpo/beta_used_raw": 0.07106913626194,
"beta_dpo/gap_mean": 1.0930296182632446,
"beta_dpo/gap_std": 2.7709574699401855,
"beta_dpo/loss_margin_mean": 1.0711023807525635,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.18442932728647016,
"grad_norm": 17.042362213134766,
"learning_rate": 4.898732434036243e-07,
"logits/chosen": 1.2231804132461548,
"logits/rejected": 1.1708978414535522,
"loss": 1.3483,
"step": 122
},
{
"beta_dpo/beta": 0.09615612775087357,
"beta_dpo/beta_margin_grad_mean": -0.46688589453697205,
"beta_dpo/beta_margin_grad_std": 0.0644429549574852,
"beta_dpo/beta_margin_mean": 0.13525746762752533,
"beta_dpo/beta_margin_std": 0.2637210488319397,
"beta_dpo/beta_used": 0.09615612775087357,
"beta_dpo/beta_used_raw": 0.09615612775087357,
"beta_dpo/gap_mean": 1.1396965980529785,
"beta_dpo/gap_std": 2.8205223083496094,
"beta_dpo/loss_margin_mean": 1.378818154335022,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.18594104308390022,
"grad_norm": 20.993009567260742,
"learning_rate": 4.894973780788722e-07,
"logits/chosen": 1.1314191818237305,
"logits/rejected": 1.0978955030441284,
"loss": 1.2832,
"step": 123
},
{
"beta_dpo/beta": 0.11089035868644714,
"beta_dpo/beta_margin_grad_mean": -0.45878610014915466,
"beta_dpo/beta_margin_grad_std": 0.09837619960308075,
"beta_dpo/beta_margin_mean": 0.1747506558895111,
"beta_dpo/beta_margin_std": 0.42173826694488525,
"beta_dpo/beta_used": 0.11089035868644714,
"beta_dpo/beta_used_raw": 0.11089035868644714,
"beta_dpo/gap_mean": 1.1973506212234497,
"beta_dpo/gap_std": 2.9226651191711426,
"beta_dpo/loss_margin_mean": 1.5113928318023682,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1874527588813303,
"grad_norm": 23.7943172454834,
"learning_rate": 4.89114813497619e-07,
"logits/chosen": 1.3841103315353394,
"logits/rejected": 1.369675874710083,
"loss": 1.2484,
"step": 124
},
{
"beta_dpo/beta": 0.0960138589143753,
"beta_dpo/beta_margin_grad_mean": -0.46052810549736023,
"beta_dpo/beta_margin_grad_std": 0.0767231211066246,
"beta_dpo/beta_margin_mean": 0.16436608135700226,
"beta_dpo/beta_margin_std": 0.32474106550216675,
"beta_dpo/beta_used": 0.0960138589143753,
"beta_dpo/beta_used_raw": 0.0960138589143753,
"beta_dpo/gap_mean": 1.298264741897583,
"beta_dpo/gap_std": 2.985287666320801,
"beta_dpo/loss_margin_mean": 1.5015137195587158,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1889644746787604,
"grad_norm": 24.529010772705078,
"learning_rate": 4.887255603610184e-07,
"logits/chosen": 1.300724744796753,
"logits/rejected": 1.2149507999420166,
"loss": 1.2739,
"step": 125
},
{
"beta_dpo/beta": 0.04256870597600937,
"beta_dpo/beta_margin_grad_mean": -0.49036645889282227,
"beta_dpo/beta_margin_grad_std": 0.03939548879861832,
"beta_dpo/beta_margin_mean": 0.03884674236178398,
"beta_dpo/beta_margin_std": 0.15890488028526306,
"beta_dpo/beta_used": 0.04256870597600937,
"beta_dpo/beta_used_raw": 0.04256870597600937,
"beta_dpo/gap_mean": 1.199582576751709,
"beta_dpo/gap_std": 3.043370246887207,
"beta_dpo/loss_margin_mean": 0.8355753421783447,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.19047619047619047,
"grad_norm": 12.087654113769531,
"learning_rate": 4.883296295573176e-07,
"logits/chosen": 1.3451451063156128,
"logits/rejected": 1.3578057289123535,
"loss": 1.3751,
"step": 126
},
{
"beta_dpo/beta": 0.10232022404670715,
"beta_dpo/beta_margin_grad_mean": -0.4628364145755768,
"beta_dpo/beta_margin_grad_std": 0.0830625593662262,
"beta_dpo/beta_margin_mean": 0.15574322640895844,
"beta_dpo/beta_margin_std": 0.34838613867759705,
"beta_dpo/beta_used": 0.10232022404670715,
"beta_dpo/beta_used_raw": 0.10232022404670715,
"beta_dpo/gap_mean": 1.2289824485778809,
"beta_dpo/gap_std": 3.0530147552490234,
"beta_dpo/loss_margin_mean": 1.205783486366272,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.19198790627362056,
"grad_norm": 25.341848373413086,
"learning_rate": 4.87927032161552e-07,
"logits/chosen": 1.2351047992706299,
"logits/rejected": 1.2235056161880493,
"loss": 1.2569,
"step": 127
},
{
"beta_dpo/beta": 0.07750297337770462,
"beta_dpo/beta_margin_grad_mean": -0.47596895694732666,
"beta_dpo/beta_margin_grad_std": 0.08230820298194885,
"beta_dpo/beta_margin_mean": 0.09930390119552612,
"beta_dpo/beta_margin_std": 0.34651416540145874,
"beta_dpo/beta_used": 0.07750297337770462,
"beta_dpo/beta_used_raw": 0.07750297337770462,
"beta_dpo/gap_mean": 1.1985833644866943,
"beta_dpo/gap_std": 3.221245765686035,
"beta_dpo/loss_margin_mean": 1.22560453414917,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.19349962207105065,
"grad_norm": 21.444602966308594,
"learning_rate": 4.875177794352363e-07,
"logits/chosen": 1.3550379276275635,
"logits/rejected": 1.2482268810272217,
"loss": 1.318,
"step": 128
},
{
"beta_dpo/beta": 0.071719691157341,
"beta_dpo/beta_margin_grad_mean": -0.4847745895385742,
"beta_dpo/beta_margin_grad_std": 0.07170508801937103,
"beta_dpo/beta_margin_mean": 0.06323404610157013,
"beta_dpo/beta_margin_std": 0.29615992307662964,
"beta_dpo/beta_used": 0.071719691157341,
"beta_dpo/beta_used_raw": 0.071719691157341,
"beta_dpo/gap_mean": 1.13083016872406,
"beta_dpo/gap_std": 3.433652877807617,
"beta_dpo/loss_margin_mean": 0.8432696461677551,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.19501133786848074,
"grad_norm": 17.289493560791016,
"learning_rate": 4.871018828260491e-07,
"logits/chosen": 1.38811457157135,
"logits/rejected": 1.3899645805358887,
"loss": 1.3496,
"step": 129
},
{
"beta_dpo/beta": 0.0844159722328186,
"beta_dpo/beta_margin_grad_mean": -0.46980515122413635,
"beta_dpo/beta_margin_grad_std": 0.06665448844432831,
"beta_dpo/beta_margin_mean": 0.12363045662641525,
"beta_dpo/beta_margin_std": 0.272417277097702,
"beta_dpo/beta_used": 0.0844159722328186,
"beta_dpo/beta_used_raw": 0.0844159722328186,
"beta_dpo/gap_mean": 1.1865178346633911,
"beta_dpo/gap_std": 3.484450340270996,
"beta_dpo/loss_margin_mean": 1.487868070602417,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1965230536659108,
"grad_norm": 20.753738403320312,
"learning_rate": 4.866793539675126e-07,
"logits/chosen": 1.2308127880096436,
"logits/rejected": 1.1961195468902588,
"loss": 1.3181,
"step": 130
},
{
"beta_dpo/beta": 0.10693139582872391,
"beta_dpo/beta_margin_grad_mean": -0.4493742883205414,
"beta_dpo/beta_margin_grad_std": 0.10129056125879288,
"beta_dpo/beta_margin_mean": 0.21827171742916107,
"beta_dpo/beta_margin_std": 0.4491637051105499,
"beta_dpo/beta_used": 0.10693139582872391,
"beta_dpo/beta_used_raw": 0.10693139582872391,
"beta_dpo/gap_mean": 1.32501220703125,
"beta_dpo/gap_std": 3.5505361557006836,
"beta_dpo/loss_margin_mean": 2.0043649673461914,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1980347694633409,
"grad_norm": 24.34880828857422,
"learning_rate": 4.86250204678667e-07,
"logits/chosen": 1.4657068252563477,
"logits/rejected": 1.3357291221618652,
"loss": 1.2559,
"step": 131
},
{
"beta_dpo/beta": 0.10015638172626495,
"beta_dpo/beta_margin_grad_mean": -0.47250378131866455,
"beta_dpo/beta_margin_grad_std": 0.07719095796346664,
"beta_dpo/beta_margin_mean": 0.11288213729858398,
"beta_dpo/beta_margin_std": 0.3209708034992218,
"beta_dpo/beta_used": 0.10015638172626495,
"beta_dpo/beta_used_raw": 0.10015638172626495,
"beta_dpo/gap_mean": 1.313185453414917,
"beta_dpo/gap_std": 3.496281385421753,
"beta_dpo/loss_margin_mean": 1.1516209840774536,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.19954648526077098,
"grad_norm": 23.72968864440918,
"learning_rate": 4.858144469637408e-07,
"logits/chosen": 1.3843731880187988,
"logits/rejected": 1.329111099243164,
"loss": 1.269,
"step": 132
},
{
"beta_dpo/beta": 0.09442687034606934,
"beta_dpo/beta_margin_grad_mean": -0.4739525318145752,
"beta_dpo/beta_margin_grad_std": 0.08487001806497574,
"beta_dpo/beta_margin_mean": 0.10770122706890106,
"beta_dpo/beta_margin_std": 0.35225582122802734,
"beta_dpo/beta_used": 0.09442687034606934,
"beta_dpo/beta_used_raw": 0.09442687034606934,
"beta_dpo/gap_mean": 1.2856841087341309,
"beta_dpo/gap_std": 3.4838268756866455,
"beta_dpo/loss_margin_mean": 1.1189558506011963,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.20105820105820105,
"grad_norm": 21.97116470336914,
"learning_rate": 4.853720930118138e-07,
"logits/chosen": 1.1641952991485596,
"logits/rejected": 1.1831367015838623,
"loss": 1.283,
"step": 133
},
{
"beta_dpo/beta": 0.11237208545207977,
"beta_dpo/beta_margin_grad_mean": -0.4434339702129364,
"beta_dpo/beta_margin_grad_std": 0.11113660782575607,
"beta_dpo/beta_margin_mean": 0.2420782893896103,
"beta_dpo/beta_margin_std": 0.48950013518333435,
"beta_dpo/beta_used": 0.11237208545207977,
"beta_dpo/beta_used_raw": 0.11237208545207977,
"beta_dpo/gap_mean": 1.4053146839141846,
"beta_dpo/gap_std": 3.598628044128418,
"beta_dpo/loss_margin_mean": 2.1522929668426514,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.20256991685563114,
"grad_norm": 23.428014755249023,
"learning_rate": 4.849231551964771e-07,
"logits/chosen": 1.3531758785247803,
"logits/rejected": 1.2881067991256714,
"loss": 1.2356,
"step": 134
},
{
"beta_dpo/beta": 0.09549751877784729,
"beta_dpo/beta_margin_grad_mean": -0.46873098611831665,
"beta_dpo/beta_margin_grad_std": 0.08153299987316132,
"beta_dpo/beta_margin_mean": 0.12783578038215637,
"beta_dpo/beta_margin_std": 0.3363261818885803,
"beta_dpo/beta_used": 0.09549751877784729,
"beta_dpo/beta_used_raw": 0.09549751877784729,
"beta_dpo/gap_mean": 1.4151959419250488,
"beta_dpo/gap_std": 3.6370739936828613,
"beta_dpo/loss_margin_mean": 1.3293631076812744,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.20408163265306123,
"grad_norm": 20.11992835998535,
"learning_rate": 4.844676460754862e-07,
"logits/chosen": 1.2607423067092896,
"logits/rejected": 1.2528090476989746,
"loss": 1.2802,
"step": 135
},
{
"beta_dpo/beta": 0.10741549730300903,
"beta_dpo/beta_margin_grad_mean": -0.44642412662506104,
"beta_dpo/beta_margin_grad_std": 0.13632191717624664,
"beta_dpo/beta_margin_mean": 0.24029631912708282,
"beta_dpo/beta_margin_std": 0.6096994876861572,
"beta_dpo/beta_used": 0.10741549730300903,
"beta_dpo/beta_used_raw": 0.10741549730300903,
"beta_dpo/gap_mean": 1.5326428413391113,
"beta_dpo/gap_std": 3.845479965209961,
"beta_dpo/loss_margin_mean": 1.8686842918395996,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.20559334845049132,
"grad_norm": 27.862178802490234,
"learning_rate": 4.840055783904106e-07,
"logits/chosen": 1.2084816694259644,
"logits/rejected": 1.0885181427001953,
"loss": 1.2433,
"step": 136
},
{
"beta_dpo/beta": 0.08260773122310638,
"beta_dpo/beta_margin_grad_mean": -0.45568960905075073,
"beta_dpo/beta_margin_grad_std": 0.08530600368976593,
"beta_dpo/beta_margin_mean": 0.18747077882289886,
"beta_dpo/beta_margin_std": 0.3693680465221405,
"beta_dpo/beta_used": 0.08260773122310638,
"beta_dpo/beta_used_raw": 0.08260773122310638,
"beta_dpo/gap_mean": 1.5443904399871826,
"beta_dpo/gap_std": 3.970661163330078,
"beta_dpo/loss_margin_mean": 2.1117563247680664,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.20710506424792138,
"grad_norm": 17.30738639831543,
"learning_rate": 4.835369650662767e-07,
"logits/chosen": 1.235527515411377,
"logits/rejected": 1.187445878982544,
"loss": 1.2956,
"step": 137
},
{
"beta_dpo/beta": 0.08060777932405472,
"beta_dpo/beta_margin_grad_mean": -0.4737657308578491,
"beta_dpo/beta_margin_grad_std": 0.0942247286438942,
"beta_dpo/beta_margin_mean": 0.10887736827135086,
"beta_dpo/beta_margin_std": 0.39904049038887024,
"beta_dpo/beta_used": 0.08060777932405472,
"beta_dpo/beta_used_raw": 0.08060777932405472,
"beta_dpo/gap_mean": 1.5993130207061768,
"beta_dpo/gap_std": 4.022543430328369,
"beta_dpo/loss_margin_mean": 1.4106897115707397,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.20861678004535147,
"grad_norm": 17.746089935302734,
"learning_rate": 4.830618192112065e-07,
"logits/chosen": 1.0170423984527588,
"logits/rejected": 0.9589405655860901,
"loss": 1.2953,
"step": 138
},
{
"beta_dpo/beta": 0.10856246948242188,
"beta_dpo/beta_margin_grad_mean": -0.4657098054885864,
"beta_dpo/beta_margin_grad_std": 0.11793362349271774,
"beta_dpo/beta_margin_mean": 0.14230769872665405,
"beta_dpo/beta_margin_std": 0.5166020393371582,
"beta_dpo/beta_used": 0.10856246948242188,
"beta_dpo/beta_used_raw": 0.10856246948242188,
"beta_dpo/gap_mean": 1.5401983261108398,
"beta_dpo/gap_std": 4.134028434753418,
"beta_dpo/loss_margin_mean": 1.2866054773330688,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.21012849584278157,
"grad_norm": 25.271020889282227,
"learning_rate": 4.825801541160509e-07,
"logits/chosen": 1.1534160375595093,
"logits/rejected": 1.1338095664978027,
"loss": 1.2272,
"step": 139
},
{
"beta_dpo/beta": 0.12550222873687744,
"beta_dpo/beta_margin_grad_mean": -0.4339686632156372,
"beta_dpo/beta_margin_grad_std": 0.14929716289043427,
"beta_dpo/beta_margin_mean": 0.29860758781433105,
"beta_dpo/beta_margin_std": 0.6612539887428284,
"beta_dpo/beta_used": 0.12550222873687744,
"beta_dpo/beta_used_raw": 0.12550222873687744,
"beta_dpo/gap_mean": 1.656071424484253,
"beta_dpo/gap_std": 4.27004337310791,
"beta_dpo/loss_margin_mean": 2.2801015377044678,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.21164021164021163,
"grad_norm": 29.566516876220703,
"learning_rate": 4.820919832540181e-07,
"logits/chosen": 1.0163750648498535,
"logits/rejected": 0.9965271949768066,
"loss": 1.2062,
"step": 140
},
{
"beta_dpo/beta": 0.15269500017166138,
"beta_dpo/beta_margin_grad_mean": -0.4212610125541687,
"beta_dpo/beta_margin_grad_std": 0.16798368096351624,
"beta_dpo/beta_margin_mean": 0.35794302821159363,
"beta_dpo/beta_margin_std": 0.7773640155792236,
"beta_dpo/beta_used": 0.15269500017166138,
"beta_dpo/beta_used_raw": 0.15269500017166138,
"beta_dpo/gap_mean": 1.7711751461029053,
"beta_dpo/gap_std": 4.404110908508301,
"beta_dpo/loss_margin_mean": 2.247818946838379,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.21315192743764172,
"grad_norm": 29.784658432006836,
"learning_rate": 4.815973202802966e-07,
"logits/chosen": 1.078611135482788,
"logits/rejected": 1.057413101196289,
"loss": 1.1011,
"step": 141
},
{
"beta_dpo/beta": 0.06439286470413208,
"beta_dpo/beta_margin_grad_mean": -0.4754011332988739,
"beta_dpo/beta_margin_grad_std": 0.07499177008867264,
"beta_dpo/beta_margin_mean": 0.10258456319570541,
"beta_dpo/beta_margin_std": 0.31377243995666504,
"beta_dpo/beta_used": 0.06439286470413208,
"beta_dpo/beta_used_raw": 0.06439286470413208,
"beta_dpo/gap_mean": 1.7432494163513184,
"beta_dpo/gap_std": 4.436220169067383,
"beta_dpo/loss_margin_mean": 1.5465246438980103,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2146636432350718,
"grad_norm": 15.055063247680664,
"learning_rate": 4.810961790316729e-07,
"logits/chosen": 1.4186744689941406,
"logits/rejected": 1.3770642280578613,
"loss": 1.3194,
"step": 142
},
{
"beta_dpo/beta": 0.09157180786132812,
"beta_dpo/beta_margin_grad_mean": -0.47178128361701965,
"beta_dpo/beta_margin_grad_std": 0.11874634772539139,
"beta_dpo/beta_margin_mean": 0.12287455052137375,
"beta_dpo/beta_margin_std": 0.5095570087432861,
"beta_dpo/beta_used": 0.09157180786132812,
"beta_dpo/beta_used_raw": 0.09157180786132812,
"beta_dpo/gap_mean": 1.6417368650436401,
"beta_dpo/gap_std": 4.523853302001953,
"beta_dpo/loss_margin_mean": 1.2595770359039307,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2161753590325019,
"grad_norm": 21.15188217163086,
"learning_rate": 4.805885735261454e-07,
"logits/chosen": 1.0811662673950195,
"logits/rejected": 1.070796251296997,
"loss": 1.2878,
"step": 143
},
{
"beta_dpo/beta": 0.061950840055942535,
"beta_dpo/beta_margin_grad_mean": -0.4810461699962616,
"beta_dpo/beta_margin_grad_std": 0.0924961045384407,
"beta_dpo/beta_margin_mean": 0.08284606039524078,
"beta_dpo/beta_margin_std": 0.39326006174087524,
"beta_dpo/beta_used": 0.061950840055942535,
"beta_dpo/beta_used_raw": 0.061950840055942535,
"beta_dpo/gap_mean": 1.5933493375778198,
"beta_dpo/gap_std": 4.785058498382568,
"beta_dpo/loss_margin_mean": 1.0874993801116943,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.21768707482993196,
"grad_norm": 15.518567085266113,
"learning_rate": 4.800745179625307e-07,
"logits/chosen": 1.2409164905548096,
"logits/rejected": 1.2216153144836426,
"loss": 1.3392,
"step": 144
},
{
"beta_dpo/beta": 0.14211627840995789,
"beta_dpo/beta_margin_grad_mean": -0.4476253092288971,
"beta_dpo/beta_margin_grad_std": 0.17776721715927124,
"beta_dpo/beta_margin_mean": 0.22671912610530853,
"beta_dpo/beta_margin_std": 0.8023654222488403,
"beta_dpo/beta_used": 0.14211627840995789,
"beta_dpo/beta_used_raw": 0.14211627840995789,
"beta_dpo/gap_mean": 1.5565340518951416,
"beta_dpo/gap_std": 4.962738037109375,
"beta_dpo/loss_margin_mean": 1.6014819145202637,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.21919879062736206,
"grad_norm": 36.54046630859375,
"learning_rate": 4.795540267200686e-07,
"logits/chosen": 1.0764864683151245,
"logits/rejected": 1.0836384296417236,
"loss": 1.1807,
"step": 145
},
{
"beta_dpo/beta": 0.10875533521175385,
"beta_dpo/beta_margin_grad_mean": -0.470254123210907,
"beta_dpo/beta_margin_grad_std": 0.12472882121801376,
"beta_dpo/beta_margin_mean": 0.12233484536409378,
"beta_dpo/beta_margin_std": 0.5434421896934509,
"beta_dpo/beta_used": 0.10875533521175385,
"beta_dpo/beta_used_raw": 0.10875533521175385,
"beta_dpo/gap_mean": 1.456047534942627,
"beta_dpo/gap_std": 4.940698623657227,
"beta_dpo/loss_margin_mean": 1.1148018836975098,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.22071050642479215,
"grad_norm": 22.52288818359375,
"learning_rate": 4.790271143580173e-07,
"logits/chosen": 0.9124500751495361,
"logits/rejected": 0.9365319013595581,
"loss": 1.2602,
"step": 146
},
{
"beta_dpo/beta": 0.09407276660203934,
"beta_dpo/beta_margin_grad_mean": -0.4711886942386627,
"beta_dpo/beta_margin_grad_std": 0.11380936205387115,
"beta_dpo/beta_margin_mean": 0.12104871869087219,
"beta_dpo/beta_margin_std": 0.48799121379852295,
"beta_dpo/beta_used": 0.09407276660203934,
"beta_dpo/beta_used_raw": 0.09407276660203934,
"beta_dpo/gap_mean": 1.4357414245605469,
"beta_dpo/gap_std": 4.992695331573486,
"beta_dpo/loss_margin_mean": 1.252655267715454,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2222222222222222,
"grad_norm": 22.755189895629883,
"learning_rate": 4.784937956152489e-07,
"logits/chosen": 1.3807456493377686,
"logits/rejected": 1.3229422569274902,
"loss": 1.2887,
"step": 147
},
{
"beta_dpo/beta": 0.15577402710914612,
"beta_dpo/beta_margin_grad_mean": -0.4019036889076233,
"beta_dpo/beta_margin_grad_std": 0.17970719933509827,
"beta_dpo/beta_margin_mean": 0.5029462575912476,
"beta_dpo/beta_margin_std": 0.9811931252479553,
"beta_dpo/beta_used": 0.15577402710914612,
"beta_dpo/beta_used_raw": 0.15577402710914612,
"beta_dpo/gap_mean": 1.5689587593078613,
"beta_dpo/gap_std": 5.067873001098633,
"beta_dpo/loss_margin_mean": 2.766486167907715,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2237339380196523,
"grad_norm": 27.865285873413086,
"learning_rate": 4.779540854098347e-07,
"logits/chosen": 1.3178067207336426,
"logits/rejected": 1.2062838077545166,
"loss": 1.0949,
"step": 148
},
{
"beta_dpo/beta": 0.11285445094108582,
"beta_dpo/beta_margin_grad_mean": -0.4518367052078247,
"beta_dpo/beta_margin_grad_std": 0.1293925940990448,
"beta_dpo/beta_margin_mean": 0.20786000788211823,
"beta_dpo/beta_margin_std": 0.5592925548553467,
"beta_dpo/beta_used": 0.11285445094108582,
"beta_dpo/beta_used_raw": 0.11285445094108582,
"beta_dpo/gap_mean": 1.7002689838409424,
"beta_dpo/gap_std": 5.069310188293457,
"beta_dpo/loss_margin_mean": 1.872825026512146,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2252456538170824,
"grad_norm": 27.668947219848633,
"learning_rate": 4.774079988386296e-07,
"logits/chosen": 1.3109874725341797,
"logits/rejected": 1.2291481494903564,
"loss": 1.2286,
"step": 149
},
{
"beta_dpo/beta": 0.10829520225524902,
"beta_dpo/beta_margin_grad_mean": -0.41798102855682373,
"beta_dpo/beta_margin_grad_std": 0.15737159550189972,
"beta_dpo/beta_margin_mean": 0.3695879876613617,
"beta_dpo/beta_margin_std": 0.7158199548721313,
"beta_dpo/beta_used": 0.10829520225524902,
"beta_dpo/beta_used_raw": 0.10829520225524902,
"beta_dpo/gap_mean": 1.968583345413208,
"beta_dpo/gap_std": 5.368040561676025,
"beta_dpo/loss_margin_mean": 3.411351442337036,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.22675736961451248,
"grad_norm": 28.250102996826172,
"learning_rate": 4.768555511768486e-07,
"logits/chosen": 1.286353349685669,
"logits/rejected": 1.2769120931625366,
"loss": 1.2211,
"step": 150
},
{
"beta_dpo/beta": 0.11118942499160767,
"beta_dpo/beta_margin_grad_mean": -0.4280396103858948,
"beta_dpo/beta_margin_grad_std": 0.14326725900173187,
"beta_dpo/beta_margin_mean": 0.34020793437957764,
"beta_dpo/beta_margin_std": 0.6998366117477417,
"beta_dpo/beta_used": 0.11118942499160767,
"beta_dpo/beta_used_raw": 0.11118942499160767,
"beta_dpo/gap_mean": 2.2183287143707275,
"beta_dpo/gap_std": 5.535046577453613,
"beta_dpo/loss_margin_mean": 3.0132687091827393,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.22826908541194255,
"grad_norm": 23.84425163269043,
"learning_rate": 4.762967578776406e-07,
"logits/chosen": 1.1973973512649536,
"logits/rejected": 1.112386703491211,
"loss": 1.1943,
"step": 151
},
{
"beta_dpo/beta": 0.11190253496170044,
"beta_dpo/beta_margin_grad_mean": -0.4374425411224365,
"beta_dpo/beta_margin_grad_std": 0.14490434527397156,
"beta_dpo/beta_margin_mean": 0.2843562364578247,
"beta_dpo/beta_margin_std": 0.6773840188980103,
"beta_dpo/beta_used": 0.11190253496170044,
"beta_dpo/beta_used_raw": 0.11190253496170044,
"beta_dpo/gap_mean": 2.220742702484131,
"beta_dpo/gap_std": 5.594052314758301,
"beta_dpo/loss_margin_mean": 2.4405252933502197,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.22978080120937264,
"grad_norm": 26.946876525878906,
"learning_rate": 4.757316345716553e-07,
"logits/chosen": 1.482506513595581,
"logits/rejected": 1.3994556665420532,
"loss": 1.1789,
"step": 152
},
{
"beta_dpo/beta": 0.05399642884731293,
"beta_dpo/beta_margin_grad_mean": -0.4605247974395752,
"beta_dpo/beta_margin_grad_std": 0.09529552608728409,
"beta_dpo/beta_margin_mean": 0.17060205340385437,
"beta_dpo/beta_margin_std": 0.4153417646884918,
"beta_dpo/beta_used": 0.05399642884731293,
"beta_dpo/beta_used_raw": 0.03712339699268341,
"beta_dpo/gap_mean": 2.173558473587036,
"beta_dpo/gap_std": 5.56630802154541,
"beta_dpo/loss_margin_mean": 1.9774678945541382,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.23129251700680273,
"grad_norm": 17.23089027404785,
"learning_rate": 4.751601970666064e-07,
"logits/chosen": 1.0739203691482544,
"logits/rejected": 1.0208520889282227,
"loss": 1.2898,
"step": 153
},
{
"beta_dpo/beta": 0.10960409045219421,
"beta_dpo/beta_margin_grad_mean": -0.45456209778785706,
"beta_dpo/beta_margin_grad_std": 0.1558816283941269,
"beta_dpo/beta_margin_mean": 0.19854529201984406,
"beta_dpo/beta_margin_std": 0.6859627962112427,
"beta_dpo/beta_used": 0.10960409045219421,
"beta_dpo/beta_used_raw": 0.10960409045219421,
"beta_dpo/gap_mean": 2.186494827270508,
"beta_dpo/gap_std": 5.6433186531066895,
"beta_dpo/loss_margin_mean": 1.7503312826156616,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2328042328042328,
"grad_norm": 23.929061889648438,
"learning_rate": 4.745824613468292e-07,
"logits/chosen": 1.2299376726150513,
"logits/rejected": 1.1694262027740479,
"loss": 1.2253,
"step": 154
},
{
"beta_dpo/beta": 0.1383783370256424,
"beta_dpo/beta_margin_grad_mean": -0.42082664370536804,
"beta_dpo/beta_margin_grad_std": 0.18120653927326202,
"beta_dpo/beta_margin_mean": 0.3739163279533386,
"beta_dpo/beta_margin_std": 0.8708319067955017,
"beta_dpo/beta_used": 0.1383783370256424,
"beta_dpo/beta_used_raw": 0.1383783370256424,
"beta_dpo/gap_mean": 2.2229208946228027,
"beta_dpo/gap_std": 5.766082763671875,
"beta_dpo/loss_margin_mean": 2.6002511978149414,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.23431594860166288,
"grad_norm": 33.02286148071289,
"learning_rate": 4.7399844357283393e-07,
"logits/chosen": 0.9965620040893555,
"logits/rejected": 0.9758646488189697,
"loss": 1.1397,
"step": 155
},
{
"beta_dpo/beta": 0.1373191773891449,
"beta_dpo/beta_margin_grad_mean": -0.38749340176582336,
"beta_dpo/beta_margin_grad_std": 0.17854855954647064,
"beta_dpo/beta_margin_mean": 0.5832736492156982,
"beta_dpo/beta_margin_std": 0.9946950078010559,
"beta_dpo/beta_used": 0.1373191773891449,
"beta_dpo/beta_used_raw": 0.1373191773891449,
"beta_dpo/gap_mean": 2.40696120262146,
"beta_dpo/gap_std": 5.8164801597595215,
"beta_dpo/loss_margin_mean": 3.823298215866089,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.23582766439909297,
"grad_norm": 33.92511749267578,
"learning_rate": 4.7340816008085305e-07,
"logits/chosen": 1.2832014560699463,
"logits/rejected": 1.2574467658996582,
"loss": 1.1048,
"step": 156
},
{
"beta_dpo/beta": 0.09416471421718597,
"beta_dpo/beta_margin_grad_mean": -0.4425427317619324,
"beta_dpo/beta_margin_grad_std": 0.1606954038143158,
"beta_dpo/beta_margin_mean": 0.2935110926628113,
"beta_dpo/beta_margin_std": 0.8034055829048157,
"beta_dpo/beta_used": 0.09416471421718597,
"beta_dpo/beta_used_raw": 0.09416471421718597,
"beta_dpo/gap_mean": 2.5301156044006348,
"beta_dpo/gap_std": 5.906242847442627,
"beta_dpo/loss_margin_mean": 2.021934986114502,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.23733938019652306,
"grad_norm": 19.796140670776367,
"learning_rate": 4.728116273823847e-07,
"logits/chosen": 1.0816900730133057,
"logits/rejected": 1.103010892868042,
"loss": 1.1954,
"step": 157
},
{
"beta_dpo/beta": 0.1391805112361908,
"beta_dpo/beta_margin_grad_mean": -0.44501587748527527,
"beta_dpo/beta_margin_grad_std": 0.21129848062992096,
"beta_dpo/beta_margin_mean": 0.3119133710861206,
"beta_dpo/beta_margin_std": 1.1190910339355469,
"beta_dpo/beta_used": 0.1391805112361908,
"beta_dpo/beta_used_raw": 0.1391805112361908,
"beta_dpo/gap_mean": 2.3914170265197754,
"beta_dpo/gap_std": 6.122468948364258,
"beta_dpo/loss_margin_mean": 1.9657936096191406,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.23885109599395313,
"grad_norm": 31.855342864990234,
"learning_rate": 4.7220886216373085e-07,
"logits/chosen": 1.1423161029815674,
"logits/rejected": 1.120100975036621,
"loss": 1.1747,
"step": 158
},
{
"beta_dpo/beta": 0.08424553275108337,
"beta_dpo/beta_margin_grad_mean": -0.45276889204978943,
"beta_dpo/beta_margin_grad_std": 0.15022964775562286,
"beta_dpo/beta_margin_mean": 0.22191838920116425,
"beta_dpo/beta_margin_std": 0.7954735159873962,
"beta_dpo/beta_used": 0.08424553275108337,
"beta_dpo/beta_used_raw": 0.08424553275108337,
"beta_dpo/gap_mean": 2.37796688079834,
"beta_dpo/gap_std": 6.245815277099609,
"beta_dpo/loss_margin_mean": 2.7583186626434326,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.24036281179138322,
"grad_norm": 21.775339126586914,
"learning_rate": 4.715998812855304e-07,
"logits/chosen": 1.1031272411346436,
"logits/rejected": 1.0869781970977783,
"loss": 1.2194,
"step": 159
},
{
"beta_dpo/beta": 0.05589519441127777,
"beta_dpo/beta_margin_grad_mean": -0.4664691686630249,
"beta_dpo/beta_margin_grad_std": 0.09677547216415405,
"beta_dpo/beta_margin_mean": 0.14732001721858978,
"beta_dpo/beta_margin_std": 0.42592573165893555,
"beta_dpo/beta_used": 0.05589519441127777,
"beta_dpo/beta_used_raw": 0.05589519441127777,
"beta_dpo/gap_mean": 2.3008532524108887,
"beta_dpo/gap_std": 6.200883865356445,
"beta_dpo/loss_margin_mean": 1.9551855325698853,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2418745275888133,
"grad_norm": 14.37753963470459,
"learning_rate": 4.7098470178228755e-07,
"logits/chosen": 0.9814571142196655,
"logits/rejected": 0.9252926707267761,
"loss": 1.2975,
"step": 160
},
{
"beta_dpo/beta": 0.1535968780517578,
"beta_dpo/beta_margin_grad_mean": -0.4085511267185211,
"beta_dpo/beta_margin_grad_std": 0.18446137011051178,
"beta_dpo/beta_margin_mean": 0.3954410254955292,
"beta_dpo/beta_margin_std": 0.9252437353134155,
"beta_dpo/beta_used": 0.1535968780517578,
"beta_dpo/beta_used_raw": 0.1535968780517578,
"beta_dpo/gap_mean": 2.3478264808654785,
"beta_dpo/gap_std": 6.170980930328369,
"beta_dpo/loss_margin_mean": 2.628091335296631,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.24338624338624337,
"grad_norm": 29.959720611572266,
"learning_rate": 4.703633408618955e-07,
"logits/chosen": 1.5791351795196533,
"logits/rejected": 1.541215181350708,
"loss": 1.0217,
"step": 161
},
{
"beta_dpo/beta": 0.16446077823638916,
"beta_dpo/beta_margin_grad_mean": -0.36990848183631897,
"beta_dpo/beta_margin_grad_std": 0.1834045648574829,
"beta_dpo/beta_margin_mean": 0.5986179709434509,
"beta_dpo/beta_margin_std": 1.0068303346633911,
"beta_dpo/beta_used": 0.16446077823638916,
"beta_dpo/beta_used_raw": 0.16446077823638916,
"beta_dpo/gap_mean": 2.5687732696533203,
"beta_dpo/gap_std": 6.104150295257568,
"beta_dpo/loss_margin_mean": 3.706486463546753,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.24489795918367346,
"grad_norm": 33.98783874511719,
"learning_rate": 4.697358159051549e-07,
"logits/chosen": 1.3096582889556885,
"logits/rejected": 1.2636088132858276,
"loss": 0.9784,
"step": 162
},
{
"beta_dpo/beta": 0.046302828937768936,
"beta_dpo/beta_margin_grad_mean": -0.46506714820861816,
"beta_dpo/beta_margin_grad_std": 0.09726027399301529,
"beta_dpo/beta_margin_mean": 0.14802870154380798,
"beta_dpo/beta_margin_std": 0.41607552766799927,
"beta_dpo/beta_used": 0.046302828937768936,
"beta_dpo/beta_used_raw": 0.040558502078056335,
"beta_dpo/gap_mean": 2.5845842361450195,
"beta_dpo/gap_std": 6.127120018005371,
"beta_dpo/loss_margin_mean": 2.4678986072540283,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.24640967498110355,
"grad_norm": 14.543889045715332,
"learning_rate": 4.691021444652876e-07,
"logits/chosen": 1.0916495323181152,
"logits/rejected": 1.0530033111572266,
"loss": 1.3026,
"step": 163
},
{
"beta_dpo/beta": 0.1552918255329132,
"beta_dpo/beta_margin_grad_mean": -0.3846026659011841,
"beta_dpo/beta_margin_grad_std": 0.21561342477798462,
"beta_dpo/beta_margin_mean": 0.5803137421607971,
"beta_dpo/beta_margin_std": 1.0986645221710205,
"beta_dpo/beta_used": 0.1552918255329132,
"beta_dpo/beta_used_raw": 0.1552918255329132,
"beta_dpo/gap_mean": 2.7750930786132812,
"beta_dpo/gap_std": 6.231865406036377,
"beta_dpo/loss_margin_mean": 3.8408336639404297,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.24792139077853365,
"grad_norm": 33.88530349731445,
"learning_rate": 4.6846234426744624e-07,
"logits/chosen": 1.1498820781707764,
"logits/rejected": 1.050663948059082,
"loss": 1.0648,
"step": 164
},
{
"beta_dpo/beta": 0.07979045063257217,
"beta_dpo/beta_margin_grad_mean": -0.42745912075042725,
"beta_dpo/beta_margin_grad_std": 0.11547538638114929,
"beta_dpo/beta_margin_mean": 0.3313145935535431,
"beta_dpo/beta_margin_std": 0.5704610347747803,
"beta_dpo/beta_used": 0.07979045063257217,
"beta_dpo/beta_used_raw": 0.07979045063257217,
"beta_dpo/gap_mean": 2.877894878387451,
"beta_dpo/gap_std": 6.265934467315674,
"beta_dpo/loss_margin_mean": 3.389314889907837,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2494331065759637,
"grad_norm": 18.06438636779785,
"learning_rate": 4.678164332082175e-07,
"logits/chosen": 1.1153581142425537,
"logits/rejected": 1.0215842723846436,
"loss": 1.187,
"step": 165
},
{
"beta_dpo/beta": 0.10112225264310837,
"beta_dpo/beta_margin_grad_mean": -0.4338551461696625,
"beta_dpo/beta_margin_grad_std": 0.14187559485435486,
"beta_dpo/beta_margin_mean": 0.2968035340309143,
"beta_dpo/beta_margin_std": 0.6275408267974854,
"beta_dpo/beta_used": 0.10112225264310837,
"beta_dpo/beta_used_raw": 0.10112225264310837,
"beta_dpo/gap_mean": 2.9666242599487305,
"beta_dpo/gap_std": 6.206120491027832,
"beta_dpo/loss_margin_mean": 2.9079062938690186,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2509448223733938,
"grad_norm": 25.70857810974121,
"learning_rate": 4.6716442935512214e-07,
"logits/chosen": 1.021344542503357,
"logits/rejected": 0.9538024067878723,
"loss": 1.1783,
"step": 166
},
{
"beta_dpo/beta": 0.06665387749671936,
"beta_dpo/beta_margin_grad_mean": -0.42930203676223755,
"beta_dpo/beta_margin_grad_std": 0.11305904388427734,
"beta_dpo/beta_margin_mean": 0.3195487856864929,
"beta_dpo/beta_margin_std": 0.5409384369850159,
"beta_dpo/beta_used": 0.06665387749671936,
"beta_dpo/beta_used_raw": 0.06665387749671936,
"beta_dpo/gap_mean": 3.075497627258301,
"beta_dpo/gap_std": 6.163358688354492,
"beta_dpo/loss_margin_mean": 3.2095916271209717,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.25245653817082386,
"grad_norm": 19.956995010375977,
"learning_rate": 4.6650635094610966e-07,
"logits/chosen": 0.8962558507919312,
"logits/rejected": 0.8831173181533813,
"loss": 1.2122,
"step": 167
},
{
"beta_dpo/beta": 0.08960846066474915,
"beta_dpo/beta_margin_grad_mean": -0.4323587417602539,
"beta_dpo/beta_margin_grad_std": 0.16092197597026825,
"beta_dpo/beta_margin_mean": 0.3474786579608917,
"beta_dpo/beta_margin_std": 0.8413150310516357,
"beta_dpo/beta_used": 0.08960846066474915,
"beta_dpo/beta_used_raw": 0.07954014092683792,
"beta_dpo/gap_mean": 2.974499464035034,
"beta_dpo/gap_std": 6.192246437072754,
"beta_dpo/loss_margin_mean": 2.4720442295074463,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.25396825396825395,
"grad_norm": 21.843673706054688,
"learning_rate": 4.6584221638904767e-07,
"logits/chosen": 0.9326637983322144,
"logits/rejected": 0.877837061882019,
"loss": 1.1559,
"step": 168
},
{
"beta_dpo/beta": 0.11481602489948273,
"beta_dpo/beta_margin_grad_mean": -0.42150530219078064,
"beta_dpo/beta_margin_grad_std": 0.19413353502750397,
"beta_dpo/beta_margin_mean": 0.3658204674720764,
"beta_dpo/beta_margin_std": 0.9044493436813354,
"beta_dpo/beta_used": 0.11481602489948273,
"beta_dpo/beta_used_raw": 0.11481602489948273,
"beta_dpo/gap_mean": 2.906271457672119,
"beta_dpo/gap_std": 6.456612586975098,
"beta_dpo/loss_margin_mean": 3.1675376892089844,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.25547996976568405,
"grad_norm": 28.51230239868164,
"learning_rate": 4.651720442612075e-07,
"logits/chosen": 1.3749295473098755,
"logits/rejected": 1.37894606590271,
"loss": 1.1722,
"step": 169
},
{
"beta_dpo/beta": 0.06731998920440674,
"beta_dpo/beta_margin_grad_mean": -0.45301589369773865,
"beta_dpo/beta_margin_grad_std": 0.16824722290039062,
"beta_dpo/beta_margin_mean": 0.2330533117055893,
"beta_dpo/beta_margin_std": 0.8312608599662781,
"beta_dpo/beta_used": 0.06731998920440674,
"beta_dpo/beta_used_raw": 0.0670924261212349,
"beta_dpo/gap_mean": 2.697042465209961,
"beta_dpo/gap_std": 6.807780742645264,
"beta_dpo/loss_margin_mean": 1.8298805952072144,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.25699168556311414,
"grad_norm": 19.64339256286621,
"learning_rate": 4.6449585330874425e-07,
"logits/chosen": 1.0180065631866455,
"logits/rejected": 1.0438951253890991,
"loss": 1.2866,
"step": 170
},
{
"beta_dpo/beta": 0.1436457633972168,
"beta_dpo/beta_margin_grad_mean": -0.4028518795967102,
"beta_dpo/beta_margin_grad_std": 0.22378672659397125,
"beta_dpo/beta_margin_mean": 0.47177597880363464,
"beta_dpo/beta_margin_std": 1.18657386302948,
"beta_dpo/beta_used": 0.1436457633972168,
"beta_dpo/beta_used_raw": 0.1436457633972168,
"beta_dpo/gap_mean": 2.872105836868286,
"beta_dpo/gap_std": 7.098209381103516,
"beta_dpo/loss_margin_mean": 3.190307378768921,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2585034013605442,
"grad_norm": 33.582054138183594,
"learning_rate": 4.6381366244617224e-07,
"logits/chosen": 1.2623517513275146,
"logits/rejected": 1.1890509128570557,
"loss": 1.1151,
"step": 171
},
{
"beta_dpo/beta": 0.10588417947292328,
"beta_dpo/beta_margin_grad_mean": -0.42059195041656494,
"beta_dpo/beta_margin_grad_std": 0.15911069512367249,
"beta_dpo/beta_margin_mean": 0.3817267417907715,
"beta_dpo/beta_margin_std": 0.7768465876579285,
"beta_dpo/beta_used": 0.10588417947292328,
"beta_dpo/beta_used_raw": 0.10588417947292328,
"beta_dpo/gap_mean": 2.9096975326538086,
"beta_dpo/gap_std": 7.087888717651367,
"beta_dpo/loss_margin_mean": 3.553881883621216,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2600151171579743,
"grad_norm": 24.649341583251953,
"learning_rate": 4.631254907558365e-07,
"logits/chosen": 1.0718541145324707,
"logits/rejected": 1.0470389127731323,
"loss": 1.1812,
"step": 172
},
{
"beta_dpo/beta": 0.08374869078397751,
"beta_dpo/beta_margin_grad_mean": -0.41811954975128174,
"beta_dpo/beta_margin_grad_std": 0.1825326383113861,
"beta_dpo/beta_margin_mean": 0.5075841546058655,
"beta_dpo/beta_margin_std": 1.1430001258850098,
"beta_dpo/beta_used": 0.08374869078397751,
"beta_dpo/beta_used_raw": 0.055781036615371704,
"beta_dpo/gap_mean": 3.2391250133514404,
"beta_dpo/gap_std": 7.3421783447265625,
"beta_dpo/loss_margin_mean": 4.41142463684082,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2615268329554044,
"grad_norm": 20.36656951904297,
"learning_rate": 4.624313574873786e-07,
"logits/chosen": 1.2261898517608643,
"logits/rejected": 1.0682613849639893,
"loss": 1.1901,
"step": 173
},
{
"beta_dpo/beta": 0.1480782926082611,
"beta_dpo/beta_margin_grad_mean": -0.3678121864795685,
"beta_dpo/beta_margin_grad_std": 0.20388971269130707,
"beta_dpo/beta_margin_mean": 0.6513935923576355,
"beta_dpo/beta_margin_std": 1.0541517734527588,
"beta_dpo/beta_used": 0.1480782926082611,
"beta_dpo/beta_used_raw": 0.1480782926082611,
"beta_dpo/gap_mean": 3.404081344604492,
"beta_dpo/gap_std": 7.347430229187012,
"beta_dpo/loss_margin_mean": 4.346465110778809,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.26303854875283444,
"grad_norm": 30.293058395385742,
"learning_rate": 4.61731282057198e-07,
"logits/chosen": 1.1567468643188477,
"logits/rejected": 1.0777671337127686,
"loss": 0.9991,
"step": 174
},
{
"beta_dpo/beta": 0.14243370294570923,
"beta_dpo/beta_margin_grad_mean": -0.40375036001205444,
"beta_dpo/beta_margin_grad_std": 0.22832658886909485,
"beta_dpo/beta_margin_mean": 0.4617246687412262,
"beta_dpo/beta_margin_std": 1.3428500890731812,
"beta_dpo/beta_used": 0.14243370294570923,
"beta_dpo/beta_used_raw": 0.14243370294570923,
"beta_dpo/gap_mean": 3.4792134761810303,
"beta_dpo/gap_std": 7.564722061157227,
"beta_dpo/loss_margin_mean": 3.4761555194854736,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.26455026455026454,
"grad_norm": 30.447345733642578,
"learning_rate": 4.6102528404790965e-07,
"logits/chosen": 1.1785094738006592,
"logits/rejected": 1.1524642705917358,
"loss": 1.0605,
"step": 175
},
{
"beta_dpo/beta": 0.07570341974496841,
"beta_dpo/beta_margin_grad_mean": -0.4498152732849121,
"beta_dpo/beta_margin_grad_std": 0.15577325224876404,
"beta_dpo/beta_margin_mean": 0.248289555311203,
"beta_dpo/beta_margin_std": 0.7977651357650757,
"beta_dpo/beta_used": 0.07570341974496841,
"beta_dpo/beta_used_raw": 0.044923990964889526,
"beta_dpo/gap_mean": 3.310117244720459,
"beta_dpo/gap_std": 7.675562381744385,
"beta_dpo/loss_margin_mean": 2.364182233810425,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2660619803476946,
"grad_norm": 24.72120475769043,
"learning_rate": 4.603133832077953e-07,
"logits/chosen": 1.18973970413208,
"logits/rejected": 1.1454646587371826,
"loss": 1.1651,
"step": 176
},
{
"beta_dpo/beta": 0.14909859001636505,
"beta_dpo/beta_margin_grad_mean": -0.32062795758247375,
"beta_dpo/beta_margin_grad_std": 0.2098706066608429,
"beta_dpo/beta_margin_mean": 1.0084487199783325,
"beta_dpo/beta_margin_std": 1.2620028257369995,
"beta_dpo/beta_used": 0.14909859001636505,
"beta_dpo/beta_used_raw": 0.14909859001636505,
"beta_dpo/gap_mean": 3.647057056427002,
"beta_dpo/gap_std": 7.802684783935547,
"beta_dpo/loss_margin_mean": 6.57255744934082,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2675736961451247,
"grad_norm": 34.28105926513672,
"learning_rate": 4.5959559945025183e-07,
"logits/chosen": 1.3451666831970215,
"logits/rejected": 1.2215378284454346,
"loss": 1.0052,
"step": 177
},
{
"beta_dpo/beta": 0.12064201384782791,
"beta_dpo/beta_margin_grad_mean": -0.3828374147415161,
"beta_dpo/beta_margin_grad_std": 0.1705167591571808,
"beta_dpo/beta_margin_mean": 0.6022067666053772,
"beta_dpo/beta_margin_std": 0.9428675770759583,
"beta_dpo/beta_used": 0.12064201384782791,
"beta_dpo/beta_used_raw": 0.12064201384782791,
"beta_dpo/gap_mean": 3.9942588806152344,
"beta_dpo/gap_std": 7.683067321777344,
"beta_dpo/loss_margin_mean": 4.527779579162598,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2690854119425548,
"grad_norm": 24.384441375732422,
"learning_rate": 4.588719528532341e-07,
"logits/chosen": 1.0391870737075806,
"logits/rejected": 0.9791826605796814,
"loss": 0.9946,
"step": 178
},
{
"beta_dpo/beta": 0.05765051394701004,
"beta_dpo/beta_margin_grad_mean": -0.4596216380596161,
"beta_dpo/beta_margin_grad_std": 0.13480156660079956,
"beta_dpo/beta_margin_mean": 0.19259433448314667,
"beta_dpo/beta_margin_std": 0.6468202471733093,
"beta_dpo/beta_used": 0.05765051394701004,
"beta_dpo/beta_used_raw": 0.04329132288694382,
"beta_dpo/gap_mean": 3.7823781967163086,
"beta_dpo/gap_std": 7.81741189956665,
"beta_dpo/loss_margin_mean": 2.79002046585083,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2705971277399849,
"grad_norm": 20.551816940307617,
"learning_rate": 4.581424636586928e-07,
"logits/chosen": 1.23455011844635,
"logits/rejected": 1.2013146877288818,
"loss": 1.2363,
"step": 179
},
{
"beta_dpo/beta": 0.014693931676447392,
"beta_dpo/beta_margin_grad_mean": -0.4902806580066681,
"beta_dpo/beta_margin_grad_std": 0.035090334713459015,
"beta_dpo/beta_margin_mean": 0.039538200944662094,
"beta_dpo/beta_margin_std": 0.14256805181503296,
"beta_dpo/beta_used": 0.014693931676447392,
"beta_dpo/beta_used_raw": -0.0027506444603204727,
"beta_dpo/gap_mean": 3.567842483520508,
"beta_dpo/gap_std": 7.792623519897461,
"beta_dpo/loss_margin_mean": 2.5003037452697754,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.272108843537415,
"grad_norm": 5.100368022918701,
"learning_rate": 4.5740715227200897e-07,
"logits/chosen": 0.9564149379730225,
"logits/rejected": 0.9124714136123657,
"loss": 1.3559,
"step": 180
},
{
"beta_dpo/beta": 0.07625903934240341,
"beta_dpo/beta_margin_grad_mean": -0.43430396914482117,
"beta_dpo/beta_margin_grad_std": 0.13497142493724823,
"beta_dpo/beta_margin_mean": 0.29473230242729187,
"beta_dpo/beta_margin_std": 0.6254957318305969,
"beta_dpo/beta_used": 0.07625903934240341,
"beta_dpo/beta_used_raw": 0.07625903934240341,
"beta_dpo/gap_mean": 3.5772459506988525,
"beta_dpo/gap_std": 7.750424385070801,
"beta_dpo/loss_margin_mean": 3.6918673515319824,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.273620559334845,
"grad_norm": 16.89691734313965,
"learning_rate": 4.566660392614228e-07,
"logits/chosen": 1.1309115886688232,
"logits/rejected": 1.0724239349365234,
"loss": 1.1985,
"step": 181
},
{
"beta_dpo/beta": 0.16641435027122498,
"beta_dpo/beta_margin_grad_mean": -0.33495861291885376,
"beta_dpo/beta_margin_grad_std": 0.2150852531194687,
"beta_dpo/beta_margin_mean": 0.8996529579162598,
"beta_dpo/beta_margin_std": 1.2435740232467651,
"beta_dpo/beta_used": 0.16641435027122498,
"beta_dpo/beta_used_raw": 0.16641435027122498,
"beta_dpo/gap_mean": 3.81697416305542,
"beta_dpo/gap_std": 7.650733947753906,
"beta_dpo/loss_margin_mean": 5.414433002471924,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2751322751322751,
"grad_norm": 39.52751159667969,
"learning_rate": 4.5591914535745817e-07,
"logits/chosen": 1.3326389789581299,
"logits/rejected": 1.2464112043380737,
"loss": 0.959,
"step": 182
},
{
"beta_dpo/beta": 0.08825691789388657,
"beta_dpo/beta_margin_grad_mean": -0.45125502347946167,
"beta_dpo/beta_margin_grad_std": 0.2054792195558548,
"beta_dpo/beta_margin_mean": 0.30645766854286194,
"beta_dpo/beta_margin_std": 1.2501469850540161,
"beta_dpo/beta_used": 0.08825691789388657,
"beta_dpo/beta_used_raw": 0.07874220609664917,
"beta_dpo/gap_mean": 3.7444000244140625,
"beta_dpo/gap_std": 7.897290229797363,
"beta_dpo/loss_margin_mean": 2.5068931579589844,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2766439909297052,
"grad_norm": 23.31369972229004,
"learning_rate": 4.551664914523433e-07,
"logits/chosen": 0.8546992540359497,
"logits/rejected": 0.8199894428253174,
"loss": 1.2045,
"step": 183
},
{
"beta_dpo/beta": 0.06977789849042892,
"beta_dpo/beta_margin_grad_mean": -0.4251745641231537,
"beta_dpo/beta_margin_grad_std": 0.14231690764427185,
"beta_dpo/beta_margin_mean": 0.3553830087184906,
"beta_dpo/beta_margin_std": 0.6949700117111206,
"beta_dpo/beta_used": 0.06977789849042892,
"beta_dpo/beta_used_raw": 0.06977789849042892,
"beta_dpo/gap_mean": 3.7551708221435547,
"beta_dpo/gap_std": 7.767217636108398,
"beta_dpo/loss_margin_mean": 4.133913516998291,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2781557067271353,
"grad_norm": 17.760683059692383,
"learning_rate": 4.544080985994258e-07,
"logits/chosen": 1.1608402729034424,
"logits/rejected": 1.0770483016967773,
"loss": 1.1945,
"step": 184
},
{
"beta_dpo/beta": 0.128538578748703,
"beta_dpo/beta_margin_grad_mean": -0.3922068178653717,
"beta_dpo/beta_margin_grad_std": 0.23428599536418915,
"beta_dpo/beta_margin_mean": 0.6921870708465576,
"beta_dpo/beta_margin_std": 1.5621590614318848,
"beta_dpo/beta_used": 0.128538578748703,
"beta_dpo/beta_used_raw": 0.128538578748703,
"beta_dpo/gap_mean": 3.841433525085449,
"beta_dpo/gap_std": 8.024864196777344,
"beta_dpo/loss_margin_mean": 3.9666225910186768,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2796674225245654,
"grad_norm": 28.532594680786133,
"learning_rate": 4.5364398801258394e-07,
"logits/chosen": 1.2939105033874512,
"logits/rejected": 1.2379869222640991,
"loss": 1.063,
"step": 185
},
{
"beta_dpo/beta": 0.047264955937862396,
"beta_dpo/beta_margin_grad_mean": -0.45827677845954895,
"beta_dpo/beta_margin_grad_std": 0.14908140897750854,
"beta_dpo/beta_margin_mean": 0.19676923751831055,
"beta_dpo/beta_margin_std": 0.6972150802612305,
"beta_dpo/beta_used": 0.047264955937862396,
"beta_dpo/beta_used_raw": 0.042294420301914215,
"beta_dpo/gap_mean": 3.7729580402374268,
"beta_dpo/gap_std": 8.466205596923828,
"beta_dpo/loss_margin_mean": 3.8776252269744873,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2811791383219955,
"grad_norm": 16.762447357177734,
"learning_rate": 4.5287418106563354e-07,
"logits/chosen": 1.1804075241088867,
"logits/rejected": 1.0904101133346558,
"loss": 1.3025,
"step": 186
},
{
"beta_dpo/beta": 0.16905778646469116,
"beta_dpo/beta_margin_grad_mean": -0.4033774733543396,
"beta_dpo/beta_margin_grad_std": 0.2705581784248352,
"beta_dpo/beta_margin_mean": 0.6272789239883423,
"beta_dpo/beta_margin_std": 1.9046205282211304,
"beta_dpo/beta_used": 0.16905778646469116,
"beta_dpo/beta_used_raw": 0.16905778646469116,
"beta_dpo/gap_mean": 3.7473177909851074,
"beta_dpo/gap_std": 8.742372512817383,
"beta_dpo/loss_margin_mean": 3.4757022857666016,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.28269085411942557,
"grad_norm": 35.64015579223633,
"learning_rate": 4.520986992917297e-07,
"logits/chosen": 1.1060779094696045,
"logits/rejected": 1.0602669715881348,
"loss": 1.0671,
"step": 187
},
{
"beta_dpo/beta": 0.060239776968955994,
"beta_dpo/beta_margin_grad_mean": -0.45551395416259766,
"beta_dpo/beta_margin_grad_std": 0.11140614002943039,
"beta_dpo/beta_margin_mean": 0.19362890720367432,
"beta_dpo/beta_margin_std": 0.48972344398498535,
"beta_dpo/beta_used": 0.060239776968955994,
"beta_dpo/beta_used_raw": 0.060239776968955994,
"beta_dpo/gap_mean": 3.493543863296509,
"beta_dpo/gap_std": 8.605021476745605,
"beta_dpo/loss_margin_mean": 2.709864377975464,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2842025699168556,
"grad_norm": 14.569178581237793,
"learning_rate": 4.5131756438276466e-07,
"logits/chosen": 1.2817773818969727,
"logits/rejected": 1.2061257362365723,
"loss": 1.2657,
"step": 188
},
{
"beta_dpo/beta": 0.0808996930718422,
"beta_dpo/beta_margin_grad_mean": -0.4149724245071411,
"beta_dpo/beta_margin_grad_std": 0.19832564890384674,
"beta_dpo/beta_margin_mean": 0.44103261828422546,
"beta_dpo/beta_margin_std": 1.0756137371063232,
"beta_dpo/beta_used": 0.0808996930718422,
"beta_dpo/beta_used_raw": 0.0808996930718422,
"beta_dpo/gap_mean": 3.496494770050049,
"beta_dpo/gap_std": 8.628555297851562,
"beta_dpo/loss_margin_mean": 2.318549394607544,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2857142857142857,
"grad_norm": 27.97902488708496,
"learning_rate": 4.5053079818876096e-07,
"logits/chosen": 1.4758341312408447,
"logits/rejected": 1.4488728046417236,
"loss": 1.2182,
"step": 189
},
{
"beta_dpo/beta": 0.2020467072725296,
"beta_dpo/beta_margin_grad_mean": -0.2765614092350006,
"beta_dpo/beta_margin_grad_std": 0.2313910275697708,
"beta_dpo/beta_margin_mean": 1.4720041751861572,
"beta_dpo/beta_margin_std": 1.7474236488342285,
"beta_dpo/beta_used": 0.2020467072725296,
"beta_dpo/beta_used_raw": 0.2020467072725296,
"beta_dpo/gap_mean": 3.8497142791748047,
"beta_dpo/gap_std": 8.667133331298828,
"beta_dpo/loss_margin_mean": 7.28297758102417,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2872260015117158,
"grad_norm": 51.29981231689453,
"learning_rate": 4.4973842271726024e-07,
"logits/chosen": 1.2424674034118652,
"logits/rejected": 1.042587399482727,
"loss": 0.9284,
"step": 190
},
{
"beta_dpo/beta": 0.1297360062599182,
"beta_dpo/beta_margin_grad_mean": -0.4217360019683838,
"beta_dpo/beta_margin_grad_std": 0.23818956315517426,
"beta_dpo/beta_margin_mean": 0.4802582263946533,
"beta_dpo/beta_margin_std": 1.606734037399292,
"beta_dpo/beta_used": 0.1297360062599182,
"beta_dpo/beta_used_raw": 0.1297360062599182,
"beta_dpo/gap_mean": 3.9156153202056885,
"beta_dpo/gap_std": 8.844564437866211,
"beta_dpo/loss_margin_mean": 2.7418901920318604,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2887377173091459,
"grad_norm": 37.58261489868164,
"learning_rate": 4.48940460132708e-07,
"logits/chosen": 1.3863708972930908,
"logits/rejected": 1.3378045558929443,
"loss": 1.0989,
"step": 191
},
{
"beta_dpo/beta": 0.007039315067231655,
"beta_dpo/beta_margin_grad_mean": -0.49628469347953796,
"beta_dpo/beta_margin_grad_std": 0.012314299121499062,
"beta_dpo/beta_margin_mean": 0.014884297735989094,
"beta_dpo/beta_margin_std": 0.049333829432725906,
"beta_dpo/beta_used": 0.007039315067231655,
"beta_dpo/beta_used_raw": 0.0027586279902607203,
"beta_dpo/gap_mean": 3.5234975814819336,
"beta_dpo/gap_std": 8.557422637939453,
"beta_dpo/loss_margin_mean": 2.086303234100342,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.29024943310657597,
"grad_norm": 2.247262477874756,
"learning_rate": 4.481369327558329e-07,
"logits/chosen": 1.3338195085525513,
"logits/rejected": 1.2933076620101929,
"loss": 1.3727,
"step": 192
},
{
"beta_dpo/beta": 0.0940217524766922,
"beta_dpo/beta_margin_grad_mean": -0.4251381754875183,
"beta_dpo/beta_margin_grad_std": 0.17793121933937073,
"beta_dpo/beta_margin_mean": 0.38040265440940857,
"beta_dpo/beta_margin_std": 0.9228093028068542,
"beta_dpo/beta_used": 0.0940217524766922,
"beta_dpo/beta_used_raw": 0.0940217524766922,
"beta_dpo/gap_mean": 3.540949821472168,
"beta_dpo/gap_std": 8.371658325195312,
"beta_dpo/loss_margin_mean": 4.114996433258057,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.29176114890400606,
"grad_norm": 22.988906860351562,
"learning_rate": 4.47327863063023e-07,
"logits/chosen": 1.196512222290039,
"logits/rejected": 1.202284812927246,
"loss": 1.1708,
"step": 193
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4995115101337433,
"beta_dpo/beta_margin_grad_std": 0.0024271684233099222,
"beta_dpo/beta_margin_mean": 0.0019539878703653812,
"beta_dpo/beta_margin_std": 0.009708872064948082,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.04277173429727554,
"beta_dpo/gap_mean": 3.3145179748535156,
"beta_dpo/gap_std": 8.588244438171387,
"beta_dpo/loss_margin_mean": 1.953987717628479,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.29327286470143615,
"grad_norm": 0.27189475297927856,
"learning_rate": 4.4651327368569684e-07,
"logits/chosen": 0.9954877495765686,
"logits/rejected": 0.9740771055221558,
"loss": 1.3854,
"step": 194
},
{
"beta_dpo/beta": 0.08321140706539154,
"beta_dpo/beta_margin_grad_mean": -0.42134904861450195,
"beta_dpo/beta_margin_grad_std": 0.15325944125652313,
"beta_dpo/beta_margin_mean": 0.372994601726532,
"beta_dpo/beta_margin_std": 0.7367441654205322,
"beta_dpo/beta_used": 0.08321140706539154,
"beta_dpo/beta_used_raw": 0.08321140706539154,
"beta_dpo/gap_mean": 3.4207961559295654,
"beta_dpo/gap_std": 8.580717086791992,
"beta_dpo/loss_margin_mean": 3.769697904586792,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2947845804988662,
"grad_norm": 21.639326095581055,
"learning_rate": 4.4569318740967043e-07,
"logits/chosen": 1.2047412395477295,
"logits/rejected": 1.2157025337219238,
"loss": 1.2014,
"step": 195
},
{
"beta_dpo/beta": 0.1143503189086914,
"beta_dpo/beta_margin_grad_mean": -0.39839500188827515,
"beta_dpo/beta_margin_grad_std": 0.2235814779996872,
"beta_dpo/beta_margin_mean": 0.6355725526809692,
"beta_dpo/beta_margin_std": 1.5783125162124634,
"beta_dpo/beta_used": 0.1143503189086914,
"beta_dpo/beta_used_raw": 0.10994286090135574,
"beta_dpo/gap_mean": 3.2441186904907227,
"beta_dpo/gap_std": 8.651065826416016,
"beta_dpo/loss_margin_mean": 3.3098671436309814,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2962962962962963,
"grad_norm": 29.688894271850586,
"learning_rate": 4.448676271745197e-07,
"logits/chosen": 1.374154806137085,
"logits/rejected": 1.297581672668457,
"loss": 1.1026,
"step": 196
},
{
"beta_dpo/beta": 0.15817537903785706,
"beta_dpo/beta_margin_grad_mean": -0.3712634742259979,
"beta_dpo/beta_margin_grad_std": 0.2809968590736389,
"beta_dpo/beta_margin_mean": 0.7080994844436646,
"beta_dpo/beta_margin_std": 1.8238109350204468,
"beta_dpo/beta_used": 0.15817537903785706,
"beta_dpo/beta_used_raw": 0.15817537903785706,
"beta_dpo/gap_mean": 3.469177722930908,
"beta_dpo/gap_std": 9.123177528381348,
"beta_dpo/loss_margin_mean": 4.5948686599731445,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.29780801209372637,
"grad_norm": 36.089534759521484,
"learning_rate": 4.440366160729392e-07,
"logits/chosen": 1.0388797521591187,
"logits/rejected": 1.0116878747940063,
"loss": 1.1511,
"step": 197
},
{
"beta_dpo/beta": 0.18329089879989624,
"beta_dpo/beta_margin_grad_mean": -0.3411843776702881,
"beta_dpo/beta_margin_grad_std": 0.24735908210277557,
"beta_dpo/beta_margin_mean": 0.9059070348739624,
"beta_dpo/beta_margin_std": 1.6272207498550415,
"beta_dpo/beta_used": 0.18329089879989624,
"beta_dpo/beta_used_raw": 0.18329089879989624,
"beta_dpo/gap_mean": 3.727560043334961,
"beta_dpo/gap_std": 9.023991584777832,
"beta_dpo/loss_margin_mean": 4.673088550567627,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.29931972789115646,
"grad_norm": 32.435325622558594,
"learning_rate": 4.432001773500957e-07,
"logits/chosen": 1.2287187576293945,
"logits/rejected": 1.1591060161590576,
"loss": 0.9414,
"step": 198
},
{
"beta_dpo/beta": 0.20622490346431732,
"beta_dpo/beta_margin_grad_mean": -0.38525623083114624,
"beta_dpo/beta_margin_grad_std": 0.29314669966697693,
"beta_dpo/beta_margin_mean": 1.0052555799484253,
"beta_dpo/beta_margin_std": 2.765925645828247,
"beta_dpo/beta_used": 0.20622490346431732,
"beta_dpo/beta_used_raw": 0.20622490346431732,
"beta_dpo/gap_mean": 3.8399996757507324,
"beta_dpo/gap_std": 9.221573829650879,
"beta_dpo/loss_margin_mean": 4.202357769012451,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.30083144368858655,
"grad_norm": 58.93007278442383,
"learning_rate": 4.4235833440297856e-07,
"logits/chosen": 1.2598795890808105,
"logits/rejected": 1.1354758739471436,
"loss": 0.9711,
"step": 199
},
{
"beta_dpo/beta": 0.1798553764820099,
"beta_dpo/beta_margin_grad_mean": -0.3407643139362335,
"beta_dpo/beta_margin_grad_std": 0.2466670572757721,
"beta_dpo/beta_margin_mean": 1.192578911781311,
"beta_dpo/beta_margin_std": 2.1793160438537598,
"beta_dpo/beta_used": 0.1798553764820099,
"beta_dpo/beta_used_raw": 0.1798553764820099,
"beta_dpo/gap_mean": 4.228569030761719,
"beta_dpo/gap_std": 9.340324401855469,
"beta_dpo/loss_margin_mean": 5.604803562164307,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.30234315948601664,
"grad_norm": 41.332130432128906,
"learning_rate": 4.415111107797445e-07,
"logits/chosen": 0.9444395303726196,
"logits/rejected": 0.8375617265701294,
"loss": 0.9954,
"step": 200
},
{
"epoch": 0.30234315948601664,
"eval_beta_dpo/beta": 0.0999036431312561,
"eval_beta_dpo/beta_margin_grad_mean": -0.41453051567077637,
"eval_beta_dpo/beta_margin_grad_std": 0.1619892716407776,
"eval_beta_dpo/beta_margin_mean": 0.5098641514778137,
"eval_beta_dpo/beta_margin_std": 0.9505065083503723,
"eval_beta_dpo/beta_used": 0.0999036431312561,
"eval_beta_dpo/beta_used_raw": 0.09682810306549072,
"eval_beta_dpo/gap_mean": 4.2135772705078125,
"eval_beta_dpo/gap_std": 9.372955322265625,
"eval_beta_dpo/loss_margin_mean": 4.16071081161499,
"eval_beta_dpo/mask_keep_frac": 1.0,
"eval_logits/chosen": 0.917927086353302,
"eval_logits/rejected": 0.8638291954994202,
"eval_loss": 0.6120367050170898,
"eval_runtime": 43.5626,
"eval_samples_per_second": 52.866,
"eval_steps_per_second": 1.653,
"step": 200
},
{
"beta_dpo/beta": 0.1600230187177658,
"beta_dpo/beta_margin_grad_mean": -0.3555724620819092,
"beta_dpo/beta_margin_grad_std": 0.2434043139219284,
"beta_dpo/beta_margin_mean": 0.7401548624038696,
"beta_dpo/beta_margin_std": 1.3561351299285889,
"beta_dpo/beta_used": 0.1600230187177658,
"beta_dpo/beta_used_raw": 0.1600230187177658,
"beta_dpo/gap_mean": 4.290175437927246,
"beta_dpo/gap_std": 9.295166015625,
"beta_dpo/loss_margin_mean": 4.662273406982422,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.30385487528344673,
"grad_norm": 37.13080978393555,
"learning_rate": 4.4065853017905953e-07,
"logits/chosen": 1.0336084365844727,
"logits/rejected": 0.9708642959594727,
"loss": 0.9299,
"step": 201
},
{
"beta_dpo/beta": 0.15965449810028076,
"beta_dpo/beta_margin_grad_mean": -0.3542693853378296,
"beta_dpo/beta_margin_grad_std": 0.23076099157333374,
"beta_dpo/beta_margin_mean": 0.8342925310134888,
"beta_dpo/beta_margin_std": 1.3525999784469604,
"beta_dpo/beta_used": 0.15965449810028076,
"beta_dpo/beta_used_raw": 0.15965449810028076,
"beta_dpo/gap_mean": 4.402766227722168,
"beta_dpo/gap_std": 9.118497848510742,
"beta_dpo/loss_margin_mean": 5.167710304260254,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.30536659108087677,
"grad_norm": 32.50093460083008,
"learning_rate": 4.3980061644943575e-07,
"logits/chosen": 1.205030083656311,
"logits/rejected": 1.1125134229660034,
"loss": 0.9291,
"step": 202
},
{
"beta_dpo/beta": 0.07079961150884628,
"beta_dpo/beta_margin_grad_mean": -0.4141000807285309,
"beta_dpo/beta_margin_grad_std": 0.12530024349689484,
"beta_dpo/beta_margin_mean": 0.38183262944221497,
"beta_dpo/beta_margin_std": 0.5788713693618774,
"beta_dpo/beta_used": 0.07079961150884628,
"beta_dpo/beta_used_raw": 0.07079961150884628,
"beta_dpo/gap_mean": 4.539271831512451,
"beta_dpo/gap_std": 8.996452331542969,
"beta_dpo/loss_margin_mean": 5.262683868408203,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.30687830687830686,
"grad_norm": 17.81788444519043,
"learning_rate": 4.3893739358856455e-07,
"logits/chosen": 1.2785005569458008,
"logits/rejected": 1.1403093338012695,
"loss": 1.1588,
"step": 203
},
{
"beta_dpo/beta": 0.0848718211054802,
"beta_dpo/beta_margin_grad_mean": -0.40045851469039917,
"beta_dpo/beta_margin_grad_std": 0.17919519543647766,
"beta_dpo/beta_margin_mean": 0.5690321922302246,
"beta_dpo/beta_margin_std": 1.086669921875,
"beta_dpo/beta_used": 0.0848718211054802,
"beta_dpo/beta_used_raw": 0.0848718211054802,
"beta_dpo/gap_mean": 4.8224921226501465,
"beta_dpo/gap_std": 8.966915130615234,
"beta_dpo/loss_margin_mean": 5.354628086090088,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.30839002267573695,
"grad_norm": 21.749141693115234,
"learning_rate": 4.380688857426449e-07,
"logits/chosen": 1.0835531949996948,
"logits/rejected": 0.9498437643051147,
"loss": 1.1479,
"step": 204
},
{
"beta_dpo/beta": 0.038497406989336014,
"beta_dpo/beta_margin_grad_mean": -0.46281948685646057,
"beta_dpo/beta_margin_grad_std": 0.13370007276535034,
"beta_dpo/beta_margin_mean": 0.1775280237197876,
"beta_dpo/beta_margin_std": 0.639147162437439,
"beta_dpo/beta_used": 0.038497406989336014,
"beta_dpo/beta_used_raw": -0.006588853895664215,
"beta_dpo/gap_mean": 4.527703285217285,
"beta_dpo/gap_std": 8.944877624511719,
"beta_dpo/loss_margin_mean": 3.5750622749328613,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.30990173847316704,
"grad_norm": 11.258538246154785,
"learning_rate": 4.3719511720570814e-07,
"logits/chosen": 1.0344573259353638,
"logits/rejected": 1.001204490661621,
"loss": 1.2959,
"step": 205
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4995371997356415,
"beta_dpo/beta_margin_grad_std": 0.0023737018927931786,
"beta_dpo/beta_margin_mean": 0.0018513122340664268,
"beta_dpo/beta_margin_std": 0.009495068341493607,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.07761886715888977,
"beta_dpo/gap_mean": 4.082757949829102,
"beta_dpo/gap_std": 9.130447387695312,
"beta_dpo/loss_margin_mean": 1.8513121604919434,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.31141345427059713,
"grad_norm": 0.2955770492553711,
"learning_rate": 4.363161124189387e-07,
"logits/chosen": 1.3033794164657593,
"logits/rejected": 1.2756681442260742,
"loss": 1.3852,
"step": 206
},
{
"beta_dpo/beta": 0.04532025754451752,
"beta_dpo/beta_margin_grad_mean": -0.4514307379722595,
"beta_dpo/beta_margin_grad_std": 0.10880967974662781,
"beta_dpo/beta_margin_mean": 0.21289481222629547,
"beta_dpo/beta_margin_std": 0.4802875518798828,
"beta_dpo/beta_used": 0.04532025754451752,
"beta_dpo/beta_used_raw": 0.04532025754451752,
"beta_dpo/gap_mean": 4.084525108337402,
"beta_dpo/gap_std": 9.252772331237793,
"beta_dpo/loss_margin_mean": 4.4937334060668945,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3129251700680272,
"grad_norm": 12.335667610168457,
"learning_rate": 4.3543189596998986e-07,
"logits/chosen": 1.1771042346954346,
"logits/rejected": 1.0448236465454102,
"loss": 1.2689,
"step": 207
},
{
"beta_dpo/beta": 0.05620375648140907,
"beta_dpo/beta_margin_grad_mean": -0.4606337249279022,
"beta_dpo/beta_margin_grad_std": 0.13863146305084229,
"beta_dpo/beta_margin_mean": 0.1967998743057251,
"beta_dpo/beta_margin_std": 0.6741688847541809,
"beta_dpo/beta_used": 0.05620375648140907,
"beta_dpo/beta_used_raw": 0.05620375648140907,
"beta_dpo/gap_mean": 3.732440948486328,
"beta_dpo/gap_std": 9.030263900756836,
"beta_dpo/loss_margin_mean": 1.930777907371521,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3144368858654573,
"grad_norm": 18.521930694580078,
"learning_rate": 4.3454249259229664e-07,
"logits/chosen": 1.0590993165969849,
"logits/rejected": 1.065347671508789,
"loss": 1.2499,
"step": 208
},
{
"beta_dpo/beta": 0.21363940834999084,
"beta_dpo/beta_margin_grad_mean": -0.3402642011642456,
"beta_dpo/beta_margin_grad_std": 0.3089061975479126,
"beta_dpo/beta_margin_mean": 1.1954998970031738,
"beta_dpo/beta_margin_std": 2.3597922325134277,
"beta_dpo/beta_used": 0.21363940834999084,
"beta_dpo/beta_used_raw": 0.21363940834999084,
"beta_dpo/gap_mean": 4.00895357131958,
"beta_dpo/gap_std": 9.31790542602539,
"beta_dpo/loss_margin_mean": 5.562100410461426,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.31594860166288735,
"grad_norm": 54.91071319580078,
"learning_rate": 4.336479271643833e-07,
"logits/chosen": 1.2037627696990967,
"logits/rejected": 1.1780195236206055,
"loss": 1.0709,
"step": 209
},
{
"beta_dpo/beta": 0.17077326774597168,
"beta_dpo/beta_margin_grad_mean": -0.33405202627182007,
"beta_dpo/beta_margin_grad_std": 0.2563777565956116,
"beta_dpo/beta_margin_mean": 1.116236925125122,
"beta_dpo/beta_margin_std": 1.7664326429367065,
"beta_dpo/beta_used": 0.17077326774597168,
"beta_dpo/beta_used_raw": 0.17077326774597168,
"beta_dpo/gap_mean": 4.302438735961914,
"beta_dpo/gap_std": 9.480663299560547,
"beta_dpo/loss_margin_mean": 6.142838478088379,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.31746031746031744,
"grad_norm": 34.84131622314453,
"learning_rate": 4.327482247091679e-07,
"logits/chosen": 1.1268184185028076,
"logits/rejected": 1.012471318244934,
"loss": 0.9841,
"step": 210
},
{
"beta_dpo/beta": 0.061521291732788086,
"beta_dpo/beta_margin_grad_mean": -0.42673206329345703,
"beta_dpo/beta_margin_grad_std": 0.13907025754451752,
"beta_dpo/beta_margin_mean": 0.31983569264411926,
"beta_dpo/beta_margin_std": 0.6123623251914978,
"beta_dpo/beta_used": 0.061521291732788086,
"beta_dpo/beta_used_raw": 0.061521291732788086,
"beta_dpo/gap_mean": 4.582791805267334,
"beta_dpo/gap_std": 9.538564682006836,
"beta_dpo/loss_margin_mean": 5.329373836517334,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.31897203325774753,
"grad_norm": 17.159202575683594,
"learning_rate": 4.3184341039326217e-07,
"logits/chosen": 1.192129135131836,
"logits/rejected": 1.0898618698120117,
"loss": 1.2186,
"step": 211
},
{
"beta_dpo/beta": 0.18771663308143616,
"beta_dpo/beta_margin_grad_mean": -0.34989145398139954,
"beta_dpo/beta_margin_grad_std": 0.3033754527568817,
"beta_dpo/beta_margin_mean": 1.0186922550201416,
"beta_dpo/beta_margin_std": 1.9620866775512695,
"beta_dpo/beta_used": 0.18771663308143616,
"beta_dpo/beta_used_raw": 0.18771663308143616,
"beta_dpo/gap_mean": 4.730992317199707,
"beta_dpo/gap_std": 9.749069213867188,
"beta_dpo/loss_margin_mean": 5.481993198394775,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3204837490551776,
"grad_norm": 41.34913635253906,
"learning_rate": 4.309335095262675e-07,
"logits/chosen": 1.249527096748352,
"logits/rejected": 1.1798797845840454,
"loss": 1.0307,
"step": 212
},
{
"beta_dpo/beta": 0.05245200917124748,
"beta_dpo/beta_margin_grad_mean": -0.45048633217811584,
"beta_dpo/beta_margin_grad_std": 0.11693067103624344,
"beta_dpo/beta_margin_mean": 0.21350213885307312,
"beta_dpo/beta_margin_std": 0.5051944255828857,
"beta_dpo/beta_used": 0.05245200917124748,
"beta_dpo/beta_used_raw": 0.05245200917124748,
"beta_dpo/gap_mean": 4.776755332946777,
"beta_dpo/gap_std": 9.900781631469727,
"beta_dpo/loss_margin_mean": 4.304105758666992,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3219954648526077,
"grad_norm": 13.7152099609375,
"learning_rate": 4.3001854756006724e-07,
"logits/chosen": 1.0305734872817993,
"logits/rejected": 1.0351169109344482,
"loss": 1.2311,
"step": 213
},
{
"beta_dpo/beta": 0.06900745630264282,
"beta_dpo/beta_margin_grad_mean": -0.4406600594520569,
"beta_dpo/beta_margin_grad_std": 0.18241001665592194,
"beta_dpo/beta_margin_mean": 0.35362449288368225,
"beta_dpo/beta_margin_std": 1.0889477729797363,
"beta_dpo/beta_used": 0.06900745630264282,
"beta_dpo/beta_used_raw": 0.058939479291439056,
"beta_dpo/gap_mean": 4.457104206085205,
"beta_dpo/gap_std": 9.855401039123535,
"beta_dpo/loss_margin_mean": 3.4822311401367188,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3235071806500378,
"grad_norm": 18.02913475036621,
"learning_rate": 4.290985500881143e-07,
"logits/chosen": 0.9732145071029663,
"logits/rejected": 0.9562994241714478,
"loss": 1.1885,
"step": 214
},
{
"beta_dpo/beta": 0.14414142072200775,
"beta_dpo/beta_margin_grad_mean": -0.36944130063056946,
"beta_dpo/beta_margin_grad_std": 0.24839608371257782,
"beta_dpo/beta_margin_mean": 1.0585134029388428,
"beta_dpo/beta_margin_std": 2.0430054664611816,
"beta_dpo/beta_used": 0.14414142072200775,
"beta_dpo/beta_used_raw": 0.14414142072200775,
"beta_dpo/gap_mean": 4.5548095703125,
"beta_dpo/gap_std": 9.729708671569824,
"beta_dpo/loss_margin_mean": 5.855838298797607,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3250188964474679,
"grad_norm": 38.50313949584961,
"learning_rate": 4.281735428447157e-07,
"logits/chosen": 1.1447077989578247,
"logits/rejected": 1.0136210918426514,
"loss": 1.1588,
"step": 215
},
{
"beta_dpo/beta": 0.10134372115135193,
"beta_dpo/beta_margin_grad_mean": -0.3964030146598816,
"beta_dpo/beta_margin_grad_std": 0.18826571106910706,
"beta_dpo/beta_margin_mean": 0.48592090606689453,
"beta_dpo/beta_margin_std": 0.9031015634536743,
"beta_dpo/beta_used": 0.10134372115135193,
"beta_dpo/beta_used_raw": 0.10134372115135193,
"beta_dpo/gap_mean": 4.801600933074951,
"beta_dpo/gap_std": 9.757190704345703,
"beta_dpo/loss_margin_mean": 4.803330421447754,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.32653061224489793,
"grad_norm": 22.771522521972656,
"learning_rate": 4.2724355170431247e-07,
"logits/chosen": 1.1843764781951904,
"logits/rejected": 1.0623424053192139,
"loss": 1.0978,
"step": 216
},
{
"beta_dpo/beta": 0.04478123039007187,
"beta_dpo/beta_margin_grad_mean": -0.4540867805480957,
"beta_dpo/beta_margin_grad_std": 0.14419633150100708,
"beta_dpo/beta_margin_mean": 0.23471979796886444,
"beta_dpo/beta_margin_std": 0.7252242565155029,
"beta_dpo/beta_used": 0.04478123039007187,
"beta_dpo/beta_used_raw": 0.04400714486837387,
"beta_dpo/gap_mean": 4.86920166015625,
"beta_dpo/gap_std": 9.921016693115234,
"beta_dpo/loss_margin_mean": 5.44075345993042,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.328042328042328,
"grad_norm": 13.470901489257812,
"learning_rate": 4.26308602680756e-07,
"logits/chosen": 1.284195899963379,
"logits/rejected": 1.1664865016937256,
"loss": 1.2595,
"step": 217
},
{
"beta_dpo/beta": 0.09790407866239548,
"beta_dpo/beta_margin_grad_mean": -0.4215080440044403,
"beta_dpo/beta_margin_grad_std": 0.21584708988666534,
"beta_dpo/beta_margin_mean": 0.5518121719360352,
"beta_dpo/beta_margin_std": 1.4792039394378662,
"beta_dpo/beta_used": 0.09790407866239548,
"beta_dpo/beta_used_raw": 0.09790407866239548,
"beta_dpo/gap_mean": 4.505361557006836,
"beta_dpo/gap_std": 10.036420822143555,
"beta_dpo/loss_margin_mean": 3.0190885066986084,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3295540438397581,
"grad_norm": 25.01282501220703,
"learning_rate": 4.253687219265803e-07,
"logits/chosen": 1.1358180046081543,
"logits/rejected": 1.1257233619689941,
"loss": 1.1535,
"step": 218
},
{
"beta_dpo/beta": 0.07484374940395355,
"beta_dpo/beta_margin_grad_mean": -0.42955535650253296,
"beta_dpo/beta_margin_grad_std": 0.15756134688854218,
"beta_dpo/beta_margin_mean": 0.3232777416706085,
"beta_dpo/beta_margin_std": 0.7881091833114624,
"beta_dpo/beta_used": 0.07484374940395355,
"beta_dpo/beta_used_raw": 0.07484374940395355,
"beta_dpo/gap_mean": 4.551763534545898,
"beta_dpo/gap_std": 9.937515258789062,
"beta_dpo/loss_margin_mean": 4.4866251945495605,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3310657596371882,
"grad_norm": 15.652204513549805,
"learning_rate": 4.2442393573227043e-07,
"logits/chosen": 0.8975998163223267,
"logits/rejected": 0.8517352342605591,
"loss": 1.1609,
"step": 219
},
{
"beta_dpo/beta": 0.07051355391740799,
"beta_dpo/beta_margin_grad_mean": -0.4343184530735016,
"beta_dpo/beta_margin_grad_std": 0.1779230386018753,
"beta_dpo/beta_margin_mean": 0.348916232585907,
"beta_dpo/beta_margin_std": 0.9510504007339478,
"beta_dpo/beta_used": 0.07051355391740799,
"beta_dpo/beta_used_raw": 0.07051355391740799,
"beta_dpo/gap_mean": 4.42448616027832,
"beta_dpo/gap_std": 9.858744621276855,
"beta_dpo/loss_margin_mean": 3.9274308681488037,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3325774754346183,
"grad_norm": 18.120128631591797,
"learning_rate": 4.234742705255272e-07,
"logits/chosen": 1.033501148223877,
"logits/rejected": 0.9328892230987549,
"loss": 1.227,
"step": 220
},
{
"beta_dpo/beta": 0.131582111120224,
"beta_dpo/beta_margin_grad_mean": -0.3696447014808655,
"beta_dpo/beta_margin_grad_std": 0.2426803708076477,
"beta_dpo/beta_margin_mean": 0.754789412021637,
"beta_dpo/beta_margin_std": 1.385541319847107,
"beta_dpo/beta_used": 0.131582111120224,
"beta_dpo/beta_used_raw": 0.131582111120224,
"beta_dpo/gap_mean": 4.606115341186523,
"beta_dpo/gap_std": 9.945049285888672,
"beta_dpo/loss_margin_mean": 5.741983413696289,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3340891912320484,
"grad_norm": 27.25440788269043,
"learning_rate": 4.22519752870528e-07,
"logits/chosen": 1.1083698272705078,
"logits/rejected": 0.995618462562561,
"loss": 1.0808,
"step": 221
},
{
"beta_dpo/beta": 0.16800376772880554,
"beta_dpo/beta_margin_grad_mean": -0.3291609585285187,
"beta_dpo/beta_margin_grad_std": 0.2453443855047226,
"beta_dpo/beta_margin_mean": 1.1951490640640259,
"beta_dpo/beta_margin_std": 2.0346102714538574,
"beta_dpo/beta_used": 0.16800376772880554,
"beta_dpo/beta_used_raw": 0.16800376772880554,
"beta_dpo/gap_mean": 4.937000274658203,
"beta_dpo/gap_std": 9.868207931518555,
"beta_dpo/loss_margin_mean": 6.663474082946777,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3356009070294785,
"grad_norm": 39.613006591796875,
"learning_rate": 4.2156040946718343e-07,
"logits/chosen": 1.1834269762039185,
"logits/rejected": 1.0584017038345337,
"loss": 0.9994,
"step": 222
},
{
"beta_dpo/beta": 0.03971102833747864,
"beta_dpo/beta_margin_grad_mean": -0.447765976190567,
"beta_dpo/beta_margin_grad_std": 0.14194580912590027,
"beta_dpo/beta_margin_mean": 0.2512494623661041,
"beta_dpo/beta_margin_std": 0.7044557332992554,
"beta_dpo/beta_used": 0.03971102833747864,
"beta_dpo/beta_used_raw": 0.020069805905222893,
"beta_dpo/gap_mean": 5.080411434173584,
"beta_dpo/gap_std": 9.861115455627441,
"beta_dpo/loss_margin_mean": 5.334641456604004,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3371126228269085,
"grad_norm": 11.249165534973145,
"learning_rate": 4.2059626715039065e-07,
"logits/chosen": 0.9963165521621704,
"logits/rejected": 0.9416338205337524,
"loss": 1.2629,
"step": 223
},
{
"beta_dpo/beta": 0.038020312786102295,
"beta_dpo/beta_margin_grad_mean": -0.4604024589061737,
"beta_dpo/beta_margin_grad_std": 0.08493012934923172,
"beta_dpo/beta_margin_mean": 0.16689425706863403,
"beta_dpo/beta_margin_std": 0.35968098044395447,
"beta_dpo/beta_used": 0.038020312786102295,
"beta_dpo/beta_used_raw": 0.038020312786102295,
"beta_dpo/gap_mean": 5.034086227416992,
"beta_dpo/gap_std": 9.717823028564453,
"beta_dpo/loss_margin_mean": 4.373403072357178,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3386243386243386,
"grad_norm": 10.994402885437012,
"learning_rate": 4.1962735288928304e-07,
"logits/chosen": 1.1170026063919067,
"logits/rejected": 1.1115076541900635,
"loss": 1.2542,
"step": 224
},
{
"beta_dpo/beta": 0.062366921454668045,
"beta_dpo/beta_margin_grad_mean": -0.41421541571617126,
"beta_dpo/beta_margin_grad_std": 0.1663094013929367,
"beta_dpo/beta_margin_mean": 0.49164319038391113,
"beta_dpo/beta_margin_std": 1.0539456605911255,
"beta_dpo/beta_used": 0.062366921454668045,
"beta_dpo/beta_used_raw": 0.030658261850476265,
"beta_dpo/gap_mean": 5.1772003173828125,
"beta_dpo/gap_std": 9.866857528686523,
"beta_dpo/loss_margin_mean": 5.452095031738281,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3401360544217687,
"grad_norm": 19.45634651184082,
"learning_rate": 4.186536937864752e-07,
"logits/chosen": 1.111361026763916,
"logits/rejected": 0.9815366268157959,
"loss": 1.2066,
"step": 225
},
{
"beta_dpo/beta": 0.1158708781003952,
"beta_dpo/beta_margin_grad_mean": -0.3906730115413666,
"beta_dpo/beta_margin_grad_std": 0.2309074103832245,
"beta_dpo/beta_margin_mean": 0.6056569218635559,
"beta_dpo/beta_margin_std": 1.3018516302108765,
"beta_dpo/beta_used": 0.1158708781003952,
"beta_dpo/beta_used_raw": 0.1158708781003952,
"beta_dpo/gap_mean": 5.038943290710449,
"beta_dpo/gap_std": 10.077461242675781,
"beta_dpo/loss_margin_mean": 5.073428630828857,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3416477702191988,
"grad_norm": 24.180665969848633,
"learning_rate": 4.176753170773052e-07,
"logits/chosen": 1.2869049310684204,
"logits/rejected": 1.2370771169662476,
"loss": 1.064,
"step": 226
},
{
"beta_dpo/beta": 0.13195355236530304,
"beta_dpo/beta_margin_grad_mean": -0.38195154070854187,
"beta_dpo/beta_margin_grad_std": 0.25775906443595886,
"beta_dpo/beta_margin_mean": 0.6441952586174011,
"beta_dpo/beta_margin_std": 1.4211139678955078,
"beta_dpo/beta_used": 0.13195355236530304,
"beta_dpo/beta_used_raw": 0.13195355236530304,
"beta_dpo/gap_mean": 5.078580856323242,
"beta_dpo/gap_std": 10.176387786865234,
"beta_dpo/loss_margin_mean": 4.958683490753174,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3431594860166289,
"grad_norm": 37.188629150390625,
"learning_rate": 4.166922501290729e-07,
"logits/chosen": 1.082693099975586,
"logits/rejected": 1.007001519203186,
"loss": 1.0491,
"step": 227
},
{
"beta_dpo/beta": 0.0692511573433876,
"beta_dpo/beta_margin_grad_mean": -0.4339984655380249,
"beta_dpo/beta_margin_grad_std": 0.16297149658203125,
"beta_dpo/beta_margin_mean": 0.31314608454704285,
"beta_dpo/beta_margin_std": 0.7996662259101868,
"beta_dpo/beta_used": 0.0692511573433876,
"beta_dpo/beta_used_raw": 0.0692511573433876,
"beta_dpo/gap_mean": 4.954934120178223,
"beta_dpo/gap_std": 10.248208999633789,
"beta_dpo/loss_margin_mean": 4.483448505401611,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.34467120181405897,
"grad_norm": 18.75178337097168,
"learning_rate": 4.1570452044027405e-07,
"logits/chosen": 1.2173062562942505,
"logits/rejected": 1.1345555782318115,
"loss": 1.1872,
"step": 228
},
{
"beta_dpo/beta": 0.07860506325960159,
"beta_dpo/beta_margin_grad_mean": -0.41749757528305054,
"beta_dpo/beta_margin_grad_std": 0.15750160813331604,
"beta_dpo/beta_margin_mean": 0.380623459815979,
"beta_dpo/beta_margin_std": 0.7886734008789062,
"beta_dpo/beta_used": 0.07860506325960159,
"beta_dpo/beta_used_raw": 0.07860506325960159,
"beta_dpo/gap_mean": 4.904338836669922,
"beta_dpo/gap_std": 10.165533065795898,
"beta_dpo/loss_margin_mean": 4.610470771789551,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.34618291761148906,
"grad_norm": 20.0991153717041,
"learning_rate": 4.147121556398312e-07,
"logits/chosen": 1.318556547164917,
"logits/rejected": 1.169750690460205,
"loss": 1.1071,
"step": 229
},
{
"beta_dpo/beta": 0.08985284715890884,
"beta_dpo/beta_margin_grad_mean": -0.463742733001709,
"beta_dpo/beta_margin_grad_std": 0.22371892631053925,
"beta_dpo/beta_margin_mean": 0.1602984517812729,
"beta_dpo/beta_margin_std": 1.376175880432129,
"beta_dpo/beta_used": 0.08985284715890884,
"beta_dpo/beta_used_raw": 0.08985284715890884,
"beta_dpo/gap_mean": 4.563235282897949,
"beta_dpo/gap_std": 10.371292114257812,
"beta_dpo/loss_margin_mean": 3.3287789821624756,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3476946334089191,
"grad_norm": 28.62323570251465,
"learning_rate": 4.137151834863213e-07,
"logits/chosen": 1.1366546154022217,
"logits/rejected": 1.127577781677246,
"loss": 1.2091,
"step": 230
},
{
"beta_dpo/beta": 0.16073641180992126,
"beta_dpo/beta_margin_grad_mean": -0.37336307764053345,
"beta_dpo/beta_margin_grad_std": 0.25263532996177673,
"beta_dpo/beta_margin_mean": 1.3000943660736084,
"beta_dpo/beta_margin_std": 2.5289907455444336,
"beta_dpo/beta_used": 0.16073641180992126,
"beta_dpo/beta_used_raw": 0.16073641180992126,
"beta_dpo/gap_mean": 4.904563903808594,
"beta_dpo/gap_std": 10.250078201293945,
"beta_dpo/loss_margin_mean": 6.05340051651001,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3492063492063492,
"grad_norm": 46.785762786865234,
"learning_rate": 4.1271363186719835e-07,
"logits/chosen": 0.9993768334388733,
"logits/rejected": 0.9801430702209473,
"loss": 0.9956,
"step": 231
},
{
"beta_dpo/beta": 0.06868449598550797,
"beta_dpo/beta_margin_grad_mean": -0.4486519694328308,
"beta_dpo/beta_margin_grad_std": 0.15906988084316254,
"beta_dpo/beta_margin_mean": 0.2655814290046692,
"beta_dpo/beta_margin_std": 0.8026698231697083,
"beta_dpo/beta_used": 0.06868449598550797,
"beta_dpo/beta_used_raw": 0.06868449598550797,
"beta_dpo/gap_mean": 4.717308044433594,
"beta_dpo/gap_std": 10.394327163696289,
"beta_dpo/loss_margin_mean": 3.969224452972412,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3507180650037793,
"grad_norm": 23.025684356689453,
"learning_rate": 4.1170752879801436e-07,
"logits/chosen": 0.9813304543495178,
"logits/rejected": 0.9566335678100586,
"loss": 1.2252,
"step": 232
},
{
"beta_dpo/beta": 0.12250923365354538,
"beta_dpo/beta_margin_grad_mean": -0.4030444324016571,
"beta_dpo/beta_margin_grad_std": 0.2429264485836029,
"beta_dpo/beta_margin_mean": 0.8649188876152039,
"beta_dpo/beta_margin_std": 2.051518201828003,
"beta_dpo/beta_used": 0.12250923365354538,
"beta_dpo/beta_used_raw": 0.09468528628349304,
"beta_dpo/gap_mean": 4.685219764709473,
"beta_dpo/gap_std": 10.672860145568848,
"beta_dpo/loss_margin_mean": 3.5858311653137207,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.35222978080120937,
"grad_norm": 29.333518981933594,
"learning_rate": 4.106969024216348e-07,
"logits/chosen": 1.0836119651794434,
"logits/rejected": 1.0568479299545288,
"loss": 1.1273,
"step": 233
},
{
"beta_dpo/beta": 0.12277411669492722,
"beta_dpo/beta_margin_grad_mean": -0.4075966775417328,
"beta_dpo/beta_margin_grad_std": 0.2486211508512497,
"beta_dpo/beta_margin_mean": 0.7519962191581726,
"beta_dpo/beta_margin_std": 1.8956360816955566,
"beta_dpo/beta_used": 0.12277411669492722,
"beta_dpo/beta_used_raw": 0.11577944457530975,
"beta_dpo/gap_mean": 4.205699920654297,
"beta_dpo/gap_std": 10.738191604614258,
"beta_dpo/loss_margin_mean": 3.597349166870117,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.35374149659863946,
"grad_norm": 35.93962097167969,
"learning_rate": 4.09681781007452e-07,
"logits/chosen": 1.1078158617019653,
"logits/rejected": 1.0694584846496582,
"loss": 1.2538,
"step": 234
},
{
"beta_dpo/beta": 0.06808540970087051,
"beta_dpo/beta_margin_grad_mean": -0.4144769012928009,
"beta_dpo/beta_margin_grad_std": 0.1782931536436081,
"beta_dpo/beta_margin_mean": 0.4272496700286865,
"beta_dpo/beta_margin_std": 0.9646300077438354,
"beta_dpo/beta_used": 0.06808540970087051,
"beta_dpo/beta_used_raw": 0.06808540970087051,
"beta_dpo/gap_mean": 4.591578483581543,
"beta_dpo/gap_std": 10.67329216003418,
"beta_dpo/loss_margin_mean": 6.257509231567383,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.35525321239606955,
"grad_norm": 24.817764282226562,
"learning_rate": 4.08662192950594e-07,
"logits/chosen": 1.2615323066711426,
"logits/rejected": 1.2428040504455566,
"loss": 1.243,
"step": 235
},
{
"beta_dpo/beta": 0.12781299650669098,
"beta_dpo/beta_margin_grad_mean": -0.41924628615379333,
"beta_dpo/beta_margin_grad_std": 0.2403831034898758,
"beta_dpo/beta_margin_mean": 0.5158213376998901,
"beta_dpo/beta_margin_std": 1.6929750442504883,
"beta_dpo/beta_used": 0.12781299650669098,
"beta_dpo/beta_used_raw": 0.12781299650669098,
"beta_dpo/gap_mean": 4.6112799644470215,
"beta_dpo/gap_std": 10.800046920776367,
"beta_dpo/loss_margin_mean": 4.1561431884765625,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.35676492819349964,
"grad_norm": 32.81260299682617,
"learning_rate": 4.076381667711306e-07,
"logits/chosen": 1.0885969400405884,
"logits/rejected": 1.085923433303833,
"loss": 1.0883,
"step": 236
},
{
"beta_dpo/beta": 0.08545999974012375,
"beta_dpo/beta_margin_grad_mean": -0.4522503614425659,
"beta_dpo/beta_margin_grad_std": 0.2021704614162445,
"beta_dpo/beta_margin_mean": 0.24412307143211365,
"beta_dpo/beta_margin_std": 1.2302663326263428,
"beta_dpo/beta_used": 0.08545999974012375,
"beta_dpo/beta_used_raw": 0.05330658704042435,
"beta_dpo/gap_mean": 4.319942951202393,
"beta_dpo/gap_std": 10.680757522583008,
"beta_dpo/loss_margin_mean": 2.8172736167907715,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.35827664399092973,
"grad_norm": 23.791624069213867,
"learning_rate": 4.066097311132753e-07,
"logits/chosen": 1.0574512481689453,
"logits/rejected": 1.0594351291656494,
"loss": 1.1732,
"step": 237
},
{
"beta_dpo/beta": 0.11690382659435272,
"beta_dpo/beta_margin_grad_mean": -0.3783750832080841,
"beta_dpo/beta_margin_grad_std": 0.19828879833221436,
"beta_dpo/beta_margin_mean": 0.6061506271362305,
"beta_dpo/beta_margin_std": 1.0086196660995483,
"beta_dpo/beta_used": 0.11690382659435272,
"beta_dpo/beta_used_raw": 0.11690382659435272,
"beta_dpo/gap_mean": 4.383785247802734,
"beta_dpo/gap_std": 10.364669799804688,
"beta_dpo/loss_margin_mean": 5.172383785247803,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.35978835978835977,
"grad_norm": 28.334075927734375,
"learning_rate": 4.0557691474458414e-07,
"logits/chosen": 1.1180424690246582,
"logits/rejected": 1.1595218181610107,
"loss": 1.083,
"step": 238
},
{
"beta_dpo/beta": 0.12952181696891785,
"beta_dpo/beta_margin_grad_mean": -0.3834363520145416,
"beta_dpo/beta_margin_grad_std": 0.231191948056221,
"beta_dpo/beta_margin_mean": 0.8099231719970703,
"beta_dpo/beta_margin_std": 1.637900948524475,
"beta_dpo/beta_used": 0.12952181696891785,
"beta_dpo/beta_used_raw": 0.12952181696891785,
"beta_dpo/gap_mean": 4.5827317237854,
"beta_dpo/gap_std": 10.368627548217773,
"beta_dpo/loss_margin_mean": 5.8209710121154785,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.36130007558578986,
"grad_norm": 33.923667907714844,
"learning_rate": 4.045397465551513e-07,
"logits/chosen": 1.5741066932678223,
"logits/rejected": 1.3806960582733154,
"loss": 1.1438,
"step": 239
},
{
"beta_dpo/beta": 0.15690244734287262,
"beta_dpo/beta_margin_grad_mean": -0.3347066640853882,
"beta_dpo/beta_margin_grad_std": 0.2660229504108429,
"beta_dpo/beta_margin_mean": 1.1113700866699219,
"beta_dpo/beta_margin_std": 1.745145320892334,
"beta_dpo/beta_used": 0.15690244734287262,
"beta_dpo/beta_used_raw": 0.15690244734287262,
"beta_dpo/gap_mean": 5.033029556274414,
"beta_dpo/gap_std": 10.530231475830078,
"beta_dpo/loss_margin_mean": 7.105434894561768,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.36281179138321995,
"grad_norm": 41.656497955322266,
"learning_rate": 4.0349825555680045e-07,
"logits/chosen": 1.2912126779556274,
"logits/rejected": 1.1457183361053467,
"loss": 1.02,
"step": 240
},
{
"beta_dpo/beta": 0.05593106895685196,
"beta_dpo/beta_margin_grad_mean": -0.44817054271698,
"beta_dpo/beta_margin_grad_std": 0.13052473962306976,
"beta_dpo/beta_margin_mean": 0.23059462010860443,
"beta_dpo/beta_margin_std": 0.5865014791488647,
"beta_dpo/beta_used": 0.05593106895685196,
"beta_dpo/beta_used_raw": 0.05593106895685196,
"beta_dpo/gap_mean": 5.024003982543945,
"beta_dpo/gap_std": 10.371394157409668,
"beta_dpo/loss_margin_mean": 4.504753112792969,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.36432350718065004,
"grad_norm": 16.76007080078125,
"learning_rate": 4.0245247088227377e-07,
"logits/chosen": 1.0622036457061768,
"logits/rejected": 1.0596222877502441,
"loss": 1.2225,
"step": 241
},
{
"beta_dpo/beta": 0.12295199930667877,
"beta_dpo/beta_margin_grad_mean": -0.36693933606147766,
"beta_dpo/beta_margin_grad_std": 0.21783827245235443,
"beta_dpo/beta_margin_mean": 1.1881290674209595,
"beta_dpo/beta_margin_std": 2.176382064819336,
"beta_dpo/beta_used": 0.12295199930667877,
"beta_dpo/beta_used_raw": 0.09869402647018433,
"beta_dpo/gap_mean": 5.463100433349609,
"beta_dpo/gap_std": 10.322896957397461,
"beta_dpo/loss_margin_mean": 7.194202423095703,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.36583522297808013,
"grad_norm": 23.658649444580078,
"learning_rate": 4.0140242178441665e-07,
"logits/chosen": 1.0880014896392822,
"logits/rejected": 1.0291259288787842,
"loss": 1.0331,
"step": 242
},
{
"beta_dpo/beta": 0.0481017641723156,
"beta_dpo/beta_margin_grad_mean": -0.4544302225112915,
"beta_dpo/beta_margin_grad_std": 0.1332933008670807,
"beta_dpo/beta_margin_mean": 0.20558291673660278,
"beta_dpo/beta_margin_std": 0.616269052028656,
"beta_dpo/beta_used": 0.0481017641723156,
"beta_dpo/beta_used_raw": -0.00047880038619041443,
"beta_dpo/gap_mean": 5.221932411193848,
"beta_dpo/gap_std": 10.100500106811523,
"beta_dpo/loss_margin_mean": 3.924454927444458,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3673469387755102,
"grad_norm": 14.327198028564453,
"learning_rate": 4.003481376353596e-07,
"logits/chosen": 1.103372573852539,
"logits/rejected": 1.1234753131866455,
"loss": 1.226,
"step": 243
},
{
"beta_dpo/beta": 0.10611068457365036,
"beta_dpo/beta_margin_grad_mean": -0.3706439137458801,
"beta_dpo/beta_margin_grad_std": 0.18867537379264832,
"beta_dpo/beta_margin_mean": 0.6805884838104248,
"beta_dpo/beta_margin_std": 1.0468705892562866,
"beta_dpo/beta_used": 0.10611068457365036,
"beta_dpo/beta_used_raw": 0.10611068457365036,
"beta_dpo/gap_mean": 5.321632385253906,
"beta_dpo/gap_std": 9.931724548339844,
"beta_dpo/loss_margin_mean": 6.462033271789551,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3688586545729403,
"grad_norm": 28.391023635864258,
"learning_rate": 3.9928964792569654e-07,
"logits/chosen": 0.7984298467636108,
"logits/rejected": 0.7795098423957825,
"loss": 1.0613,
"step": 244
},
{
"beta_dpo/beta": 0.12491966784000397,
"beta_dpo/beta_margin_grad_mean": -0.34912219643592834,
"beta_dpo/beta_margin_grad_std": 0.2054431289434433,
"beta_dpo/beta_margin_mean": 0.7987427115440369,
"beta_dpo/beta_margin_std": 1.17366361618042,
"beta_dpo/beta_used": 0.12491966784000397,
"beta_dpo/beta_used_raw": 0.12491966784000397,
"beta_dpo/gap_mean": 5.4922099113464355,
"beta_dpo/gap_std": 9.798683166503906,
"beta_dpo/loss_margin_mean": 6.305713653564453,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.37037037037037035,
"grad_norm": 29.477256774902344,
"learning_rate": 3.982269822636601e-07,
"logits/chosen": 1.333395004272461,
"logits/rejected": 1.2788350582122803,
"loss": 0.9597,
"step": 245
},
{
"beta_dpo/beta": 0.0807776153087616,
"beta_dpo/beta_margin_grad_mean": -0.398184210062027,
"beta_dpo/beta_margin_grad_std": 0.18760626018047333,
"beta_dpo/beta_margin_mean": 0.4955151677131653,
"beta_dpo/beta_margin_std": 0.9998785853385925,
"beta_dpo/beta_used": 0.0807776153087616,
"beta_dpo/beta_used_raw": 0.0807776153087616,
"beta_dpo/gap_mean": 5.684264183044434,
"beta_dpo/gap_std": 10.004972457885742,
"beta_dpo/loss_margin_mean": 6.780579566955566,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.37188208616780044,
"grad_norm": 19.162378311157227,
"learning_rate": 3.971601703742932e-07,
"logits/chosen": 1.1533265113830566,
"logits/rejected": 1.0754314661026,
"loss": 1.0975,
"step": 246
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4993528425693512,
"beta_dpo/beta_margin_grad_std": 0.002463964279741049,
"beta_dpo/beta_margin_mean": 0.002588639734312892,
"beta_dpo/beta_margin_std": 0.009856174699962139,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.06930781155824661,
"beta_dpo/gap_mean": 5.321089267730713,
"beta_dpo/gap_std": 10.132028579711914,
"beta_dpo/loss_margin_mean": 2.588639497756958,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.37339380196523053,
"grad_norm": 0.303940087556839,
"learning_rate": 3.960892420986177e-07,
"logits/chosen": 1.131533145904541,
"logits/rejected": 1.086637020111084,
"loss": 1.3838,
"step": 247
},
{
"beta_dpo/beta": 0.051299843937158585,
"beta_dpo/beta_margin_grad_mean": -0.436193585395813,
"beta_dpo/beta_margin_grad_std": 0.17114627361297607,
"beta_dpo/beta_margin_mean": 0.35004010796546936,
"beta_dpo/beta_margin_std": 0.9127549529075623,
"beta_dpo/beta_used": 0.051299843937158585,
"beta_dpo/beta_used_raw": 0.026382185518741608,
"beta_dpo/gap_mean": 4.960646629333496,
"beta_dpo/gap_std": 10.275779724121094,
"beta_dpo/loss_margin_mean": 4.311920166015625,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3749055177626606,
"grad_norm": 19.132505416870117,
"learning_rate": 3.9501422739279953e-07,
"logits/chosen": 0.9868658185005188,
"logits/rejected": 1.0642544031143188,
"loss": 1.274,
"step": 248
},
{
"beta_dpo/beta": 0.025262191891670227,
"beta_dpo/beta_margin_grad_mean": -0.48927804827690125,
"beta_dpo/beta_margin_grad_std": 0.09066756814718246,
"beta_dpo/beta_margin_mean": 0.04305484890937805,
"beta_dpo/beta_margin_std": 0.39369264245033264,
"beta_dpo/beta_used": 0.025262191891670227,
"beta_dpo/beta_used_raw": 0.00993638951331377,
"beta_dpo/gap_mean": 4.441056251525879,
"beta_dpo/gap_std": 10.518145561218262,
"beta_dpo/loss_margin_mean": 1.0085103511810303,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3764172335600907,
"grad_norm": 8.390883445739746,
"learning_rate": 3.9393515632731094e-07,
"logits/chosen": 1.4377015829086304,
"logits/rejected": 1.458314061164856,
"loss": 1.3242,
"step": 249
},
{
"beta_dpo/beta": 0.15996399521827698,
"beta_dpo/beta_margin_grad_mean": -0.3432745635509491,
"beta_dpo/beta_margin_grad_std": 0.24865637719631195,
"beta_dpo/beta_margin_mean": 1.1924474239349365,
"beta_dpo/beta_margin_std": 2.246605157852173,
"beta_dpo/beta_used": 0.15996399521827698,
"beta_dpo/beta_used_raw": 0.15996399521827698,
"beta_dpo/gap_mean": 4.528580188751221,
"beta_dpo/gap_std": 10.77707576751709,
"beta_dpo/loss_margin_mean": 6.6261677742004395,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3779289493575208,
"grad_norm": 37.698692321777344,
"learning_rate": 3.9285205908608934e-07,
"logits/chosen": 0.9921024441719055,
"logits/rejected": 0.969779372215271,
"loss": 1.0786,
"step": 250
},
{
"beta_dpo/beta": 0.014929315075278282,
"beta_dpo/beta_margin_grad_mean": -0.4864945411682129,
"beta_dpo/beta_margin_grad_std": 0.036659833043813705,
"beta_dpo/beta_margin_mean": 0.05437476187944412,
"beta_dpo/beta_margin_std": 0.1476600021123886,
"beta_dpo/beta_used": 0.014929315075278282,
"beta_dpo/beta_used_raw": 0.014929315075278282,
"beta_dpo/gap_mean": 4.711355209350586,
"beta_dpo/gap_std": 10.758208274841309,
"beta_dpo/loss_margin_mean": 3.8696646690368652,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3794406651549509,
"grad_norm": 4.087891578674316,
"learning_rate": 3.9176496596569265e-07,
"logits/chosen": 1.3167650699615479,
"logits/rejected": 1.248030424118042,
"loss": 1.3404,
"step": 251
},
{
"beta_dpo/beta": 0.025902319699525833,
"beta_dpo/beta_margin_grad_mean": -0.4686071276664734,
"beta_dpo/beta_margin_grad_std": 0.09484216570854187,
"beta_dpo/beta_margin_mean": 0.13240431249141693,
"beta_dpo/beta_margin_std": 0.40574291348457336,
"beta_dpo/beta_used": 0.025902319699525833,
"beta_dpo/beta_used_raw": -0.0074716489762067795,
"beta_dpo/gap_mean": 4.2776288986206055,
"beta_dpo/gap_std": 10.861841201782227,
"beta_dpo/loss_margin_mean": 3.1439483165740967,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.38095238095238093,
"grad_norm": 8.144107818603516,
"learning_rate": 3.9067390737445254e-07,
"logits/chosen": 1.003096580505371,
"logits/rejected": 0.950405478477478,
"loss": 1.3267,
"step": 252
},
{
"beta_dpo/beta": 0.06798431277275085,
"beta_dpo/beta_margin_grad_mean": -0.42808982729911804,
"beta_dpo/beta_margin_grad_std": 0.18537691235542297,
"beta_dpo/beta_margin_mean": 0.4428131580352783,
"beta_dpo/beta_margin_std": 1.1600432395935059,
"beta_dpo/beta_used": 0.06798431277275085,
"beta_dpo/beta_used_raw": 0.021464969962835312,
"beta_dpo/gap_mean": 4.384973526000977,
"beta_dpo/gap_std": 10.881026268005371,
"beta_dpo/loss_margin_mean": 3.916627883911133,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.382464096749811,
"grad_norm": 25.51688575744629,
"learning_rate": 3.8957891383162304e-07,
"logits/chosen": 1.0553115606307983,
"logits/rejected": 1.0057858228683472,
"loss": 1.2632,
"step": 253
},
{
"beta_dpo/beta": 0.05142858996987343,
"beta_dpo/beta_margin_grad_mean": -0.4397808313369751,
"beta_dpo/beta_margin_grad_std": 0.17119485139846802,
"beta_dpo/beta_margin_mean": 0.3350658714771271,
"beta_dpo/beta_margin_std": 0.8966230750083923,
"beta_dpo/beta_used": 0.05142858996987343,
"beta_dpo/beta_used_raw": 0.043801601976156235,
"beta_dpo/gap_mean": 4.4246320724487305,
"beta_dpo/gap_std": 10.816822052001953,
"beta_dpo/loss_margin_mean": 5.208080768585205,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3839758125472411,
"grad_norm": 19.18077850341797,
"learning_rate": 3.884800159665276e-07,
"logits/chosen": 0.8030510544776917,
"logits/rejected": 0.777626633644104,
"loss": 1.3043,
"step": 254
},
{
"beta_dpo/beta": 0.16544781625270844,
"beta_dpo/beta_margin_grad_mean": -0.3781430125236511,
"beta_dpo/beta_margin_grad_std": 0.27625834941864014,
"beta_dpo/beta_margin_mean": 1.0486892461776733,
"beta_dpo/beta_margin_std": 2.800529718399048,
"beta_dpo/beta_used": 0.16544781625270844,
"beta_dpo/beta_used_raw": 0.1183973029255867,
"beta_dpo/gap_mean": 4.510273456573486,
"beta_dpo/gap_std": 10.781914710998535,
"beta_dpo/loss_margin_mean": 4.662529468536377,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3854875283446712,
"grad_norm": 57.56937789916992,
"learning_rate": 3.873772445177015e-07,
"logits/chosen": 1.3328914642333984,
"logits/rejected": 1.3053648471832275,
"loss": 1.0872,
"step": 255
},
{
"beta_dpo/beta": 0.13992524147033691,
"beta_dpo/beta_margin_grad_mean": -0.34854984283447266,
"beta_dpo/beta_margin_grad_std": 0.25158390402793884,
"beta_dpo/beta_margin_mean": 0.9280179738998413,
"beta_dpo/beta_margin_std": 1.6355485916137695,
"beta_dpo/beta_used": 0.13992524147033691,
"beta_dpo/beta_used_raw": 0.13992524147033691,
"beta_dpo/gap_mean": 4.742461204528809,
"beta_dpo/gap_std": 10.8091402053833,
"beta_dpo/loss_margin_mean": 6.324960708618164,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3869992441421013,
"grad_norm": 36.89436340332031,
"learning_rate": 3.862706303320329e-07,
"logits/chosen": 1.0001685619354248,
"logits/rejected": 0.90242600440979,
"loss": 1.1972,
"step": 256
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49879777431488037,
"beta_dpo/beta_margin_grad_std": 0.0027100411243736744,
"beta_dpo/beta_margin_mean": 0.004809120204299688,
"beta_dpo/beta_margin_std": 0.010840562172234058,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.040715523064136505,
"beta_dpo/gap_mean": 4.818190574645996,
"beta_dpo/gap_std": 10.846813201904297,
"beta_dpo/loss_margin_mean": 4.809120178222656,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3885109599395314,
"grad_norm": 0.31598353385925293,
"learning_rate": 3.851602043638994e-07,
"logits/chosen": 0.9752795696258545,
"logits/rejected": 0.9454945921897888,
"loss": 1.3838,
"step": 257
},
{
"beta_dpo/beta": 0.11642733216285706,
"beta_dpo/beta_margin_grad_mean": -0.3744834065437317,
"beta_dpo/beta_margin_grad_std": 0.2298222929239273,
"beta_dpo/beta_margin_mean": 0.8234491944313049,
"beta_dpo/beta_margin_std": 1.7443535327911377,
"beta_dpo/beta_used": 0.11642733216285706,
"beta_dpo/beta_used_raw": 0.10990360379219055,
"beta_dpo/gap_mean": 4.870306968688965,
"beta_dpo/gap_std": 10.817766189575195,
"beta_dpo/loss_margin_mean": 5.804533004760742,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3900226757369615,
"grad_norm": 37.911373138427734,
"learning_rate": 3.840459976743023e-07,
"logits/chosen": 1.1632795333862305,
"logits/rejected": 1.0844495296478271,
"loss": 1.1073,
"step": 258
},
{
"beta_dpo/beta": 0.31446343660354614,
"beta_dpo/beta_margin_grad_mean": -0.24687738716602325,
"beta_dpo/beta_margin_grad_std": 0.34187325835227966,
"beta_dpo/beta_margin_mean": 2.696594476699829,
"beta_dpo/beta_margin_std": 3.7241179943084717,
"beta_dpo/beta_used": 0.31446343660354614,
"beta_dpo/beta_used_raw": 0.31446343660354614,
"beta_dpo/gap_mean": 5.502510070800781,
"beta_dpo/gap_std": 11.044046401977539,
"beta_dpo/loss_margin_mean": 8.567591667175293,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3915343915343915,
"grad_norm": 65.49555969238281,
"learning_rate": 3.8292804142999796e-07,
"logits/chosen": 1.0487525463104248,
"logits/rejected": 0.9705426096916199,
"loss": 0.7479,
"step": 259
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49873608350753784,
"beta_dpo/beta_margin_grad_std": 0.002948764944449067,
"beta_dpo/beta_margin_mean": 0.005055864807218313,
"beta_dpo/beta_margin_std": 0.011795504949986935,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.0352797731757164,
"beta_dpo/gap_mean": 5.625866413116455,
"beta_dpo/gap_std": 11.170843124389648,
"beta_dpo/loss_margin_mean": 5.0558648109436035,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3930461073318216,
"grad_norm": 0.28733569383621216,
"learning_rate": 3.818063669026256e-07,
"logits/chosen": 1.2932058572769165,
"logits/rejected": 1.1448646783828735,
"loss": 1.3829,
"step": 260
},
{
"beta_dpo/beta": 0.09499503672122955,
"beta_dpo/beta_margin_grad_mean": -0.43870827555656433,
"beta_dpo/beta_margin_grad_std": 0.23113374412059784,
"beta_dpo/beta_margin_mean": 0.43297070264816284,
"beta_dpo/beta_margin_std": 1.5510969161987305,
"beta_dpo/beta_used": 0.09499503672122955,
"beta_dpo/beta_used_raw": 0.09000162035226822,
"beta_dpo/gap_mean": 5.343086242675781,
"beta_dpo/gap_std": 11.143077850341797,
"beta_dpo/loss_margin_mean": 3.999406576156616,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3945578231292517,
"grad_norm": 30.237884521484375,
"learning_rate": 3.806810054678331e-07,
"logits/chosen": 1.0707039833068848,
"logits/rejected": 1.1073495149612427,
"loss": 1.1924,
"step": 261
},
{
"beta_dpo/beta": 0.08395867794752121,
"beta_dpo/beta_margin_grad_mean": -0.41953787207603455,
"beta_dpo/beta_margin_grad_std": 0.20528234541416168,
"beta_dpo/beta_margin_mean": 0.5397913455963135,
"beta_dpo/beta_margin_std": 1.298319935798645,
"beta_dpo/beta_used": 0.08395867794752121,
"beta_dpo/beta_used_raw": 0.05914995074272156,
"beta_dpo/gap_mean": 5.180658340454102,
"beta_dpo/gap_std": 10.83466911315918,
"beta_dpo/loss_margin_mean": 5.200860500335693,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3960695389266818,
"grad_norm": 24.859472274780273,
"learning_rate": 3.7955198860439887e-07,
"logits/chosen": 1.3326735496520996,
"logits/rejected": 1.2338473796844482,
"loss": 1.1553,
"step": 262
},
{
"beta_dpo/beta": 0.09583983570337296,
"beta_dpo/beta_margin_grad_mean": -0.3918282091617584,
"beta_dpo/beta_margin_grad_std": 0.20861949026584625,
"beta_dpo/beta_margin_mean": 0.5731788277626038,
"beta_dpo/beta_margin_std": 1.2116243839263916,
"beta_dpo/beta_used": 0.09583983570337296,
"beta_dpo/beta_used_raw": 0.09583983570337296,
"beta_dpo/gap_mean": 5.476344108581543,
"beta_dpo/gap_std": 10.684277534484863,
"beta_dpo/loss_margin_mean": 6.516000747680664,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3975812547241119,
"grad_norm": 22.119905471801758,
"learning_rate": 3.784193478933516e-07,
"logits/chosen": 1.136518955230713,
"logits/rejected": 1.013533353805542,
"loss": 1.0766,
"step": 263
},
{
"beta_dpo/beta": 0.0488370805978775,
"beta_dpo/beta_margin_grad_mean": -0.4324823319911957,
"beta_dpo/beta_margin_grad_std": 0.10733956843614578,
"beta_dpo/beta_margin_mean": 0.2894676625728607,
"beta_dpo/beta_margin_std": 0.4639683961868286,
"beta_dpo/beta_used": 0.0488370805978775,
"beta_dpo/beta_used_raw": 0.0488370805978775,
"beta_dpo/gap_mean": 5.533038139343262,
"beta_dpo/gap_std": 10.499252319335938,
"beta_dpo/loss_margin_mean": 5.958970069885254,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.39909297052154197,
"grad_norm": 13.703917503356934,
"learning_rate": 3.7728311501708674e-07,
"logits/chosen": 1.2367783784866333,
"logits/rejected": 1.150953769683838,
"loss": 1.2056,
"step": 264
},
{
"beta_dpo/beta": 0.21453940868377686,
"beta_dpo/beta_margin_grad_mean": -0.28706979751586914,
"beta_dpo/beta_margin_grad_std": 0.2895905673503876,
"beta_dpo/beta_margin_mean": 1.877687692642212,
"beta_dpo/beta_margin_std": 2.552780866622925,
"beta_dpo/beta_used": 0.21453940868377686,
"beta_dpo/beta_used_raw": 0.21453940868377686,
"beta_dpo/gap_mean": 5.86796760559082,
"beta_dpo/gap_std": 10.677862167358398,
"beta_dpo/loss_margin_mean": 8.177618026733398,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.40060468631897206,
"grad_norm": 39.615272521972656,
"learning_rate": 3.7614332175848027e-07,
"logits/chosen": 1.2300488948822021,
"logits/rejected": 1.1951534748077393,
"loss": 0.785,
"step": 265
},
{
"beta_dpo/beta": 0.03808180242776871,
"beta_dpo/beta_margin_grad_mean": -0.4472852647304535,
"beta_dpo/beta_margin_grad_std": 0.11670338362455368,
"beta_dpo/beta_margin_mean": 0.24460670351982117,
"beta_dpo/beta_margin_std": 0.5672850608825684,
"beta_dpo/beta_used": 0.03808180242776871,
"beta_dpo/beta_used_raw": 0.010574424639344215,
"beta_dpo/gap_mean": 6.066445350646973,
"beta_dpo/gap_std": 10.548543930053711,
"beta_dpo/loss_margin_mean": 5.729500770568848,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4021164021164021,
"grad_norm": 12.612692832946777,
"learning_rate": 3.75e-07,
"logits/chosen": 0.9250593185424805,
"logits/rejected": 0.8699378967285156,
"loss": 1.2227,
"step": 266
},
{
"beta_dpo/beta": 0.11268052458763123,
"beta_dpo/beta_margin_grad_mean": -0.39133748412132263,
"beta_dpo/beta_margin_grad_std": 0.2150212526321411,
"beta_dpo/beta_margin_mean": 0.8038800954818726,
"beta_dpo/beta_margin_std": 1.653910756111145,
"beta_dpo/beta_used": 0.11268052458763123,
"beta_dpo/beta_used_raw": 0.10867670178413391,
"beta_dpo/gap_mean": 5.74346399307251,
"beta_dpo/gap_std": 10.44660758972168,
"beta_dpo/loss_margin_mean": 4.872748374938965,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4036281179138322,
"grad_norm": 21.86273956298828,
"learning_rate": 3.738531817228131e-07,
"logits/chosen": 1.1546790599822998,
"logits/rejected": 1.1331496238708496,
"loss": 1.0233,
"step": 267
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49919313192367554,
"beta_dpo/beta_margin_grad_std": 0.002566243289038539,
"beta_dpo/beta_margin_mean": 0.003227637615054846,
"beta_dpo/beta_margin_std": 0.01026532705873251,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.012797963805496693,
"beta_dpo/gap_mean": 5.344855308532715,
"beta_dpo/gap_std": 10.346288681030273,
"beta_dpo/loss_margin_mean": 3.227637529373169,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4051398337112623,
"grad_norm": 0.29407811164855957,
"learning_rate": 3.7270289900589204e-07,
"logits/chosen": 1.2539623975753784,
"logits/rejected": 1.2189905643463135,
"loss": 1.3828,
"step": 268
},
{
"beta_dpo/beta": 0.15564534068107605,
"beta_dpo/beta_margin_grad_mean": -0.3451966941356659,
"beta_dpo/beta_margin_grad_std": 0.2405506670475006,
"beta_dpo/beta_margin_mean": 0.9552483558654785,
"beta_dpo/beta_margin_std": 1.6470414400100708,
"beta_dpo/beta_used": 0.15564534068107605,
"beta_dpo/beta_used_raw": 0.15564534068107605,
"beta_dpo/gap_mean": 5.239716529846191,
"beta_dpo/gap_std": 10.26348876953125,
"beta_dpo/loss_margin_mean": 5.233408451080322,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.40665154950869237,
"grad_norm": 34.52106475830078,
"learning_rate": 3.7154918402511714e-07,
"logits/chosen": 1.3101131916046143,
"logits/rejected": 1.2675879001617432,
"loss": 0.9188,
"step": 269
},
{
"beta_dpo/beta": 0.08239845931529999,
"beta_dpo/beta_margin_grad_mean": -0.4100302755832672,
"beta_dpo/beta_margin_grad_std": 0.18438223004341125,
"beta_dpo/beta_margin_mean": 0.42504921555519104,
"beta_dpo/beta_margin_std": 0.8831678032875061,
"beta_dpo/beta_used": 0.08239845931529999,
"beta_dpo/beta_used_raw": 0.08239845931529999,
"beta_dpo/gap_mean": 5.3209733963012695,
"beta_dpo/gap_std": 10.249567031860352,
"beta_dpo/loss_margin_mean": 5.127548694610596,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.40816326530612246,
"grad_norm": 22.110576629638672,
"learning_rate": 3.7039206905237656e-07,
"logits/chosen": 1.289703369140625,
"logits/rejected": 1.1867177486419678,
"loss": 1.1083,
"step": 270
},
{
"beta_dpo/beta": 0.04837838187813759,
"beta_dpo/beta_margin_grad_mean": -0.458254337310791,
"beta_dpo/beta_margin_grad_std": 0.138353630900383,
"beta_dpo/beta_margin_mean": 0.21362274885177612,
"beta_dpo/beta_margin_std": 0.7236654162406921,
"beta_dpo/beta_used": 0.04837838187813759,
"beta_dpo/beta_used_raw": 0.04837838187813759,
"beta_dpo/gap_mean": 5.161229133605957,
"beta_dpo/gap_std": 10.45259952545166,
"beta_dpo/loss_margin_mean": 4.351984977722168,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.40967498110355255,
"grad_norm": 13.931172370910645,
"learning_rate": 3.692315864546635e-07,
"logits/chosen": 1.243234395980835,
"logits/rejected": 1.1881849765777588,
"loss": 1.2273,
"step": 271
},
{
"beta_dpo/beta": 0.18383634090423584,
"beta_dpo/beta_margin_grad_mean": -0.3311034142971039,
"beta_dpo/beta_margin_grad_std": 0.2728104293346405,
"beta_dpo/beta_margin_mean": 1.0728017091751099,
"beta_dpo/beta_margin_std": 1.831144094467163,
"beta_dpo/beta_used": 0.18383634090423584,
"beta_dpo/beta_used_raw": 0.18383634090423584,
"beta_dpo/gap_mean": 5.191141605377197,
"beta_dpo/gap_std": 10.428142547607422,
"beta_dpo/loss_margin_mean": 5.907552242279053,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.41118669690098264,
"grad_norm": 38.00907897949219,
"learning_rate": 3.6806776869317067e-07,
"logits/chosen": 1.0192674398422241,
"logits/rejected": 1.0176470279693604,
"loss": 0.863,
"step": 272
},
{
"beta_dpo/beta": 0.06558459997177124,
"beta_dpo/beta_margin_grad_mean": -0.42467087507247925,
"beta_dpo/beta_margin_grad_std": 0.18101128935813904,
"beta_dpo/beta_margin_mean": 0.3881887197494507,
"beta_dpo/beta_margin_std": 0.9979269504547119,
"beta_dpo/beta_used": 0.06558459997177124,
"beta_dpo/beta_used_raw": 0.04673774540424347,
"beta_dpo/gap_mean": 5.385332107543945,
"beta_dpo/gap_std": 10.592522621154785,
"beta_dpo/loss_margin_mean": 6.027635097503662,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4126984126984127,
"grad_norm": 22.860248565673828,
"learning_rate": 3.669006483223828e-07,
"logits/chosen": 1.0584709644317627,
"logits/rejected": 1.0032527446746826,
"loss": 1.2133,
"step": 273
},
{
"beta_dpo/beta": 0.23129317164421082,
"beta_dpo/beta_margin_grad_mean": -0.3218546211719513,
"beta_dpo/beta_margin_grad_std": 0.3190883696079254,
"beta_dpo/beta_margin_mean": 1.5642485618591309,
"beta_dpo/beta_margin_std": 2.794994354248047,
"beta_dpo/beta_used": 0.23129317164421082,
"beta_dpo/beta_used_raw": 0.23129317164421082,
"beta_dpo/gap_mean": 5.58009147644043,
"beta_dpo/gap_std": 10.845860481262207,
"beta_dpo/loss_margin_mean": 6.637640953063965,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.41421012849584277,
"grad_norm": 42.139060974121094,
"learning_rate": 3.657302579891656e-07,
"logits/chosen": 1.0761997699737549,
"logits/rejected": 1.0783820152282715,
"loss": 0.8496,
"step": 274
},
{
"beta_dpo/beta": 0.06791101396083832,
"beta_dpo/beta_margin_grad_mean": -0.39947086572647095,
"beta_dpo/beta_margin_grad_std": 0.18937627971172333,
"beta_dpo/beta_margin_mean": 0.59105384349823,
"beta_dpo/beta_margin_std": 1.123275876045227,
"beta_dpo/beta_used": 0.06791101396083832,
"beta_dpo/beta_used_raw": 0.04330751299858093,
"beta_dpo/gap_mean": 5.756000518798828,
"beta_dpo/gap_std": 10.81220817565918,
"beta_dpo/loss_margin_mean": 6.892657279968262,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.41572184429327286,
"grad_norm": 22.493019104003906,
"learning_rate": 3.645566304318526e-07,
"logits/chosen": 1.1675453186035156,
"logits/rejected": 1.063072681427002,
"loss": 1.159,
"step": 275
},
{
"beta_dpo/beta": 0.08544276654720306,
"beta_dpo/beta_margin_grad_mean": -0.3919548988342285,
"beta_dpo/beta_margin_grad_std": 0.17982997000217438,
"beta_dpo/beta_margin_mean": 0.5407912731170654,
"beta_dpo/beta_margin_std": 0.9322868585586548,
"beta_dpo/beta_used": 0.08544276654720306,
"beta_dpo/beta_used_raw": 0.08544276654720306,
"beta_dpo/gap_mean": 6.039332389831543,
"beta_dpo/gap_std": 10.800745010375977,
"beta_dpo/loss_margin_mean": 6.57222318649292,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.41723356009070295,
"grad_norm": 20.716075897216797,
"learning_rate": 3.633797984793294e-07,
"logits/chosen": 1.2874423265457153,
"logits/rejected": 1.245221495628357,
"loss": 1.0834,
"step": 276
},
{
"beta_dpo/beta": 0.006070706993341446,
"beta_dpo/beta_margin_grad_mean": -0.49736547470092773,
"beta_dpo/beta_margin_grad_std": 0.01935085654258728,
"beta_dpo/beta_margin_mean": 0.01056720968335867,
"beta_dpo/beta_margin_std": 0.07766212522983551,
"beta_dpo/beta_used": 0.006070706993341446,
"beta_dpo/beta_used_raw": -0.06264575570821762,
"beta_dpo/gap_mean": 5.546548843383789,
"beta_dpo/gap_std": 10.832958221435547,
"beta_dpo/loss_margin_mean": 2.432939291000366,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.41874527588813304,
"grad_norm": 2.5274527072906494,
"learning_rate": 3.6219979505011555e-07,
"logits/chosen": 1.2227504253387451,
"logits/rejected": 1.2546868324279785,
"loss": 1.3648,
"step": 277
},
{
"beta_dpo/beta": 0.02714432403445244,
"beta_dpo/beta_margin_grad_mean": -0.46082329750061035,
"beta_dpo/beta_margin_grad_std": 0.10387714952230453,
"beta_dpo/beta_margin_mean": 0.17332723736763,
"beta_dpo/beta_margin_std": 0.4567440152168274,
"beta_dpo/beta_used": 0.02714432403445244,
"beta_dpo/beta_used_raw": -0.023564567789435387,
"beta_dpo/gap_mean": 5.024744987487793,
"beta_dpo/gap_std": 10.663459777832031,
"beta_dpo/loss_margin_mean": 3.9546263217926025,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.42025699168556313,
"grad_norm": 16.09695053100586,
"learning_rate": 3.6101665315144353e-07,
"logits/chosen": 1.0592961311340332,
"logits/rejected": 0.9930233955383301,
"loss": 1.3156,
"step": 278
},
{
"beta_dpo/beta": 0.2835107147693634,
"beta_dpo/beta_margin_grad_mean": -0.2560528814792633,
"beta_dpo/beta_margin_grad_std": 0.3177822530269623,
"beta_dpo/beta_margin_mean": 2.377708911895752,
"beta_dpo/beta_margin_std": 3.3102450370788574,
"beta_dpo/beta_used": 0.2835107147693634,
"beta_dpo/beta_used_raw": 0.2835107147693634,
"beta_dpo/gap_mean": 5.569781303405762,
"beta_dpo/gap_std": 10.754709243774414,
"beta_dpo/loss_margin_mean": 8.566849708557129,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4217687074829932,
"grad_norm": 67.74427032470703,
"learning_rate": 3.5983040587833563e-07,
"logits/chosen": 1.1368944644927979,
"logits/rejected": 1.1035034656524658,
"loss": 0.8499,
"step": 279
},
{
"beta_dpo/beta": 0.11431509256362915,
"beta_dpo/beta_margin_grad_mean": -0.3244727551937103,
"beta_dpo/beta_margin_grad_std": 0.19820398092269897,
"beta_dpo/beta_margin_mean": 0.9455391764640808,
"beta_dpo/beta_margin_std": 1.1263186931610107,
"beta_dpo/beta_used": 0.11431509256362915,
"beta_dpo/beta_used_raw": 0.11431509256362915,
"beta_dpo/gap_mean": 6.155077934265137,
"beta_dpo/gap_std": 10.57052230834961,
"beta_dpo/loss_margin_mean": 8.297757148742676,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.42328042328042326,
"grad_norm": 22.813514709472656,
"learning_rate": 3.586410864126781e-07,
"logits/chosen": 1.3799774646759033,
"logits/rejected": 1.3165000677108765,
"loss": 0.9616,
"step": 280
},
{
"beta_dpo/beta": 0.10929633677005768,
"beta_dpo/beta_margin_grad_mean": -0.37381666898727417,
"beta_dpo/beta_margin_grad_std": 0.21858589351177216,
"beta_dpo/beta_margin_mean": 0.6713542342185974,
"beta_dpo/beta_margin_std": 1.1942230463027954,
"beta_dpo/beta_used": 0.10929633677005768,
"beta_dpo/beta_used_raw": 0.10929633677005768,
"beta_dpo/gap_mean": 6.20918083190918,
"beta_dpo/gap_std": 10.562750816345215,
"beta_dpo/loss_margin_mean": 6.347442626953125,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.42479213907785335,
"grad_norm": 24.40322494506836,
"learning_rate": 3.574487280222929e-07,
"logits/chosen": 1.1293048858642578,
"logits/rejected": 1.1514209508895874,
"loss": 1.0101,
"step": 281
},
{
"beta_dpo/beta": 0.1020803228020668,
"beta_dpo/beta_margin_grad_mean": -0.3907123804092407,
"beta_dpo/beta_margin_grad_std": 0.23549912869930267,
"beta_dpo/beta_margin_mean": 0.8850224614143372,
"beta_dpo/beta_margin_std": 1.7950531244277954,
"beta_dpo/beta_used": 0.1020803228020668,
"beta_dpo/beta_used_raw": 0.1020803228020668,
"beta_dpo/gap_mean": 6.277853488922119,
"beta_dpo/gap_std": 10.586102485656738,
"beta_dpo/loss_margin_mean": 6.8829665184021,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.42630385487528344,
"grad_norm": 28.942813873291016,
"learning_rate": 3.562533640600075e-07,
"logits/chosen": 0.8785597085952759,
"logits/rejected": 0.8606098890304565,
"loss": 1.1122,
"step": 282
},
{
"beta_dpo/beta": 0.09000475704669952,
"beta_dpo/beta_margin_grad_mean": -0.3931007385253906,
"beta_dpo/beta_margin_grad_std": 0.207363560795784,
"beta_dpo/beta_margin_mean": 0.5568903088569641,
"beta_dpo/beta_margin_std": 1.1324700117111206,
"beta_dpo/beta_used": 0.09000475704669952,
"beta_dpo/beta_used_raw": 0.09000475704669952,
"beta_dpo/gap_mean": 6.372895240783691,
"beta_dpo/gap_std": 10.772232055664062,
"beta_dpo/loss_margin_mean": 6.040248870849609,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.42781557067271353,
"grad_norm": 19.173545837402344,
"learning_rate": 3.550550279627215e-07,
"logits/chosen": 1.3081157207489014,
"logits/rejected": 1.1309893131256104,
"loss": 1.0395,
"step": 283
},
{
"beta_dpo/beta": 0.09521377086639404,
"beta_dpo/beta_margin_grad_mean": -0.38916173577308655,
"beta_dpo/beta_margin_grad_std": 0.2197965532541275,
"beta_dpo/beta_margin_mean": 0.722620964050293,
"beta_dpo/beta_margin_std": 1.6002602577209473,
"beta_dpo/beta_used": 0.09521377086639404,
"beta_dpo/beta_used_raw": 0.09234096854925156,
"beta_dpo/gap_mean": 6.439916610717773,
"beta_dpo/gap_std": 10.795588493347168,
"beta_dpo/loss_margin_mean": 6.818644046783447,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4293272864701436,
"grad_norm": 28.033769607543945,
"learning_rate": 3.5385375325047163e-07,
"logits/chosen": 1.2364400625228882,
"logits/rejected": 1.1874457597732544,
"loss": 1.0682,
"step": 284
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49894726276397705,
"beta_dpo/beta_margin_grad_std": 0.0030077884439378977,
"beta_dpo/beta_margin_mean": 0.0042111543007195,
"beta_dpo/beta_margin_std": 0.01203157752752304,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.047950584441423416,
"beta_dpo/gap_mean": 6.0694403648376465,
"beta_dpo/gap_std": 10.893739700317383,
"beta_dpo/loss_margin_mean": 4.211153984069824,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4308390022675737,
"grad_norm": 0.2864086627960205,
"learning_rate": 3.5264957352549375e-07,
"logits/chosen": 1.297670841217041,
"logits/rejected": 1.2642215490341187,
"loss": 1.3827,
"step": 285
},
{
"beta_dpo/beta": 0.12104212492704391,
"beta_dpo/beta_margin_grad_mean": -0.3920276463031769,
"beta_dpo/beta_margin_grad_std": 0.2352607548236847,
"beta_dpo/beta_margin_mean": 0.988191545009613,
"beta_dpo/beta_margin_std": 2.156294584274292,
"beta_dpo/beta_used": 0.12104212492704391,
"beta_dpo/beta_used_raw": 0.11507139354944229,
"beta_dpo/gap_mean": 6.1069135665893555,
"beta_dpo/gap_std": 11.12326717376709,
"beta_dpo/loss_margin_mean": 7.050894737243652,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4323507180650038,
"grad_norm": 43.05253219604492,
"learning_rate": 3.514425224712835e-07,
"logits/chosen": 1.1157772541046143,
"logits/rejected": 1.008533239364624,
"loss": 1.0778,
"step": 286
},
{
"beta_dpo/beta": 0.11036910116672516,
"beta_dpo/beta_margin_grad_mean": -0.3323964476585388,
"beta_dpo/beta_margin_grad_std": 0.21155521273612976,
"beta_dpo/beta_margin_mean": 1.0009580850601196,
"beta_dpo/beta_margin_std": 1.3563151359558105,
"beta_dpo/beta_used": 0.11036910116672516,
"beta_dpo/beta_used_raw": 0.11036910116672516,
"beta_dpo/gap_mean": 6.487942218780518,
"beta_dpo/gap_std": 11.09727954864502,
"beta_dpo/loss_margin_mean": 8.630319595336914,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.43386243386243384,
"grad_norm": 27.29483985900879,
"learning_rate": 3.502326338516534e-07,
"logits/chosen": 1.207090973854065,
"logits/rejected": 1.2322757244110107,
"loss": 1.0011,
"step": 287
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49865084886550903,
"beta_dpo/beta_margin_grad_std": 0.002663425402715802,
"beta_dpo/beta_margin_mean": 0.0053967977873981,
"beta_dpo/beta_margin_std": 0.01065417006611824,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.03904159739613533,
"beta_dpo/gap_mean": 6.5133233070373535,
"beta_dpo/gap_std": 11.077014923095703,
"beta_dpo/loss_margin_mean": 5.3967976570129395,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.43537414965986393,
"grad_norm": 0.2951047122478485,
"learning_rate": 3.490199415097892e-07,
"logits/chosen": 1.0747828483581543,
"logits/rejected": 1.0475144386291504,
"loss": 1.3821,
"step": 288
},
{
"beta_dpo/beta": 0.09553598612546921,
"beta_dpo/beta_margin_grad_mean": -0.3754231631755829,
"beta_dpo/beta_margin_grad_std": 0.1994851529598236,
"beta_dpo/beta_margin_mean": 0.7695130705833435,
"beta_dpo/beta_margin_std": 1.2959647178649902,
"beta_dpo/beta_used": 0.09553598612546921,
"beta_dpo/beta_used_raw": 0.08732537180185318,
"beta_dpo/gap_mean": 6.468659400939941,
"beta_dpo/gap_std": 10.938737869262695,
"beta_dpo/loss_margin_mean": 6.9596428871154785,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.436885865457294,
"grad_norm": 19.465930938720703,
"learning_rate": 3.4780447936730247e-07,
"logits/chosen": 1.0801656246185303,
"logits/rejected": 1.0307631492614746,
"loss": 1.0189,
"step": 289
},
{
"beta_dpo/beta": 0.045102305710315704,
"beta_dpo/beta_margin_grad_mean": -0.4342304766178131,
"beta_dpo/beta_margin_grad_std": 0.1358710080385208,
"beta_dpo/beta_margin_mean": 0.30848953127861023,
"beta_dpo/beta_margin_std": 0.6513628363609314,
"beta_dpo/beta_used": 0.045102305710315704,
"beta_dpo/beta_used_raw": 0.045102305710315704,
"beta_dpo/gap_mean": 6.521547317504883,
"beta_dpo/gap_std": 10.709592819213867,
"beta_dpo/loss_margin_mean": 5.988819599151611,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4383975812547241,
"grad_norm": 13.944698333740234,
"learning_rate": 3.465862814232821e-07,
"logits/chosen": 1.4063677787780762,
"logits/rejected": 1.392086386680603,
"loss": 1.1994,
"step": 290
},
{
"beta_dpo/beta": 0.17210541665554047,
"beta_dpo/beta_margin_grad_mean": -0.3703913986682892,
"beta_dpo/beta_margin_grad_std": 0.2652936577796936,
"beta_dpo/beta_margin_mean": 1.6986570358276367,
"beta_dpo/beta_margin_std": 3.4891014099121094,
"beta_dpo/beta_used": 0.17210541665554047,
"beta_dpo/beta_used_raw": 0.16101905703544617,
"beta_dpo/gap_mean": 6.368675231933594,
"beta_dpo/gap_std": 10.600616455078125,
"beta_dpo/loss_margin_mean": 6.906424522399902,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4399092970521542,
"grad_norm": 42.380653381347656,
"learning_rate": 3.4536538175334343e-07,
"logits/chosen": 1.1900293827056885,
"logits/rejected": 1.0464694499969482,
"loss": 0.9982,
"step": 291
},
{
"beta_dpo/beta": 0.07080691307783127,
"beta_dpo/beta_margin_grad_mean": -0.4200320839881897,
"beta_dpo/beta_margin_grad_std": 0.19748757779598236,
"beta_dpo/beta_margin_mean": 0.4610114097595215,
"beta_dpo/beta_margin_std": 1.2586936950683594,
"beta_dpo/beta_used": 0.07080691307783127,
"beta_dpo/beta_used_raw": 0.05992849916219711,
"beta_dpo/gap_mean": 6.459872245788574,
"beta_dpo/gap_std": 10.87993335723877,
"beta_dpo/loss_margin_mean": 5.627652645111084,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4414210128495843,
"grad_norm": 18.856821060180664,
"learning_rate": 3.4414181450867465e-07,
"logits/chosen": 1.0667200088500977,
"logits/rejected": 1.04791259765625,
"loss": 1.1284,
"step": 292
},
{
"beta_dpo/beta": 0.1957043707370758,
"beta_dpo/beta_margin_grad_mean": -0.29217982292175293,
"beta_dpo/beta_margin_grad_std": 0.30992838740348816,
"beta_dpo/beta_margin_mean": 1.8623554706573486,
"beta_dpo/beta_margin_std": 2.75933575630188,
"beta_dpo/beta_used": 0.1957043707370758,
"beta_dpo/beta_used_raw": 0.1957043707370758,
"beta_dpo/gap_mean": 6.795432090759277,
"beta_dpo/gap_std": 11.390796661376953,
"beta_dpo/loss_margin_mean": 9.698270797729492,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4429327286470144,
"grad_norm": 63.56986618041992,
"learning_rate": 3.4291561391508185e-07,
"logits/chosen": 1.1134048700332642,
"logits/rejected": 1.0620172023773193,
"loss": 1.0551,
"step": 293
},
{
"beta_dpo/beta": 0.11528073251247406,
"beta_dpo/beta_margin_grad_mean": -0.37978050112724304,
"beta_dpo/beta_margin_grad_std": 0.20134668052196503,
"beta_dpo/beta_margin_mean": 0.8404229283332825,
"beta_dpo/beta_margin_std": 1.5732080936431885,
"beta_dpo/beta_used": 0.11528073251247406,
"beta_dpo/beta_used_raw": 0.11528073251247406,
"beta_dpo/gap_mean": 6.880713939666748,
"beta_dpo/gap_std": 11.266508102416992,
"beta_dpo/loss_margin_mean": 5.479251384735107,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4444444444444444,
"grad_norm": 25.94324493408203,
"learning_rate": 3.4168681427203153e-07,
"logits/chosen": 1.0178157091140747,
"logits/rejected": 0.9835942387580872,
"loss": 0.9298,
"step": 294
},
{
"beta_dpo/beta": 0.031608808785676956,
"beta_dpo/beta_margin_grad_mean": -0.47087332606315613,
"beta_dpo/beta_margin_grad_std": 0.11631444841623306,
"beta_dpo/beta_margin_mean": 0.1305970549583435,
"beta_dpo/beta_margin_std": 0.5425670742988586,
"beta_dpo/beta_used": 0.031608808785676956,
"beta_dpo/beta_used_raw": 0.025034988299012184,
"beta_dpo/gap_mean": 6.435437202453613,
"beta_dpo/gap_std": 11.3424654006958,
"beta_dpo/loss_margin_mean": 4.683479309082031,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4459561602418745,
"grad_norm": 11.254670143127441,
"learning_rate": 3.4045544995169125e-07,
"logits/chosen": 1.1830737590789795,
"logits/rejected": 1.0612953901290894,
"loss": 1.2497,
"step": 295
},
{
"beta_dpo/beta": 0.04573505371809006,
"beta_dpo/beta_margin_grad_mean": -0.447557270526886,
"beta_dpo/beta_margin_grad_std": 0.15784353017807007,
"beta_dpo/beta_margin_mean": 0.2759431302547455,
"beta_dpo/beta_margin_std": 0.8301151394844055,
"beta_dpo/beta_used": 0.04573505371809006,
"beta_dpo/beta_used_raw": 0.03850052133202553,
"beta_dpo/gap_mean": 6.396709442138672,
"beta_dpo/gap_std": 11.416792869567871,
"beta_dpo/loss_margin_mean": 6.682880878448486,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4474678760393046,
"grad_norm": 15.308691024780273,
"learning_rate": 3.392215553979679e-07,
"logits/chosen": 1.1425731182098389,
"logits/rejected": 1.0387723445892334,
"loss": 1.2773,
"step": 296
},
{
"beta_dpo/beta": 0.1036151647567749,
"beta_dpo/beta_margin_grad_mean": -0.3551364839076996,
"beta_dpo/beta_margin_grad_std": 0.1977175921201706,
"beta_dpo/beta_margin_mean": 0.9006081223487854,
"beta_dpo/beta_margin_std": 1.3864402770996094,
"beta_dpo/beta_used": 0.1036151647567749,
"beta_dpo/beta_used_raw": 0.1036151647567749,
"beta_dpo/gap_mean": 6.470460891723633,
"beta_dpo/gap_std": 11.380804061889648,
"beta_dpo/loss_margin_mean": 7.736580848693848,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4489795918367347,
"grad_norm": 24.121685028076172,
"learning_rate": 3.3798516512554485e-07,
"logits/chosen": 1.1213431358337402,
"logits/rejected": 1.0656251907348633,
"loss": 0.9927,
"step": 297
},
{
"beta_dpo/beta": 0.08515971899032593,
"beta_dpo/beta_margin_grad_mean": -0.38800179958343506,
"beta_dpo/beta_margin_grad_std": 0.2151874601840973,
"beta_dpo/beta_margin_mean": 0.5751146078109741,
"beta_dpo/beta_margin_std": 1.111681580543518,
"beta_dpo/beta_used": 0.08515971899032593,
"beta_dpo/beta_used_raw": 0.08515971899032593,
"beta_dpo/gap_mean": 6.667867183685303,
"beta_dpo/gap_std": 11.660847663879395,
"beta_dpo/loss_margin_mean": 6.757538795471191,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4504913076341648,
"grad_norm": 24.019479751586914,
"learning_rate": 3.367463137189156e-07,
"logits/chosen": 1.1825034618377686,
"logits/rejected": 1.1736652851104736,
"loss": 1.039,
"step": 298
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4987265467643738,
"beta_dpo/beta_margin_grad_std": 0.0030703565571457148,
"beta_dpo/beta_margin_mean": 0.005094076506793499,
"beta_dpo/beta_margin_std": 0.012282279320061207,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.07316959649324417,
"beta_dpo/gap_mean": 6.437760353088379,
"beta_dpo/gap_std": 11.706324577331543,
"beta_dpo/loss_margin_mean": 5.094076156616211,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4520030234315949,
"grad_norm": 0.3018997311592102,
"learning_rate": 3.355050358314172e-07,
"logits/chosen": 1.1841320991516113,
"logits/rejected": 1.1034014225006104,
"loss": 1.3828,
"step": 299
},
{
"beta_dpo/beta": 0.09698092192411423,
"beta_dpo/beta_margin_grad_mean": -0.415668785572052,
"beta_dpo/beta_margin_grad_std": 0.25501230359077454,
"beta_dpo/beta_margin_mean": 0.5185216069221497,
"beta_dpo/beta_margin_std": 1.8599530458450317,
"beta_dpo/beta_used": 0.09698092192411423,
"beta_dpo/beta_used_raw": 0.09698092192411423,
"beta_dpo/gap_mean": 6.1991801261901855,
"beta_dpo/gap_std": 12.077482223510742,
"beta_dpo/loss_margin_mean": 5.203822135925293,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.45351473922902497,
"grad_norm": 37.997589111328125,
"learning_rate": 3.3426136618426043e-07,
"logits/chosen": 1.1969237327575684,
"logits/rejected": 1.0952715873718262,
"loss": 1.2007,
"step": 300
},
{
"epoch": 0.45351473922902497,
"eval_beta_dpo/beta": 0.1345755159854889,
"eval_beta_dpo/beta_margin_grad_mean": -0.3695407509803772,
"eval_beta_dpo/beta_margin_grad_std": 0.21694542467594147,
"eval_beta_dpo/beta_margin_mean": 1.0587594509124756,
"eval_beta_dpo/beta_margin_std": 1.7069097757339478,
"eval_beta_dpo/beta_used": 0.1345755159854889,
"eval_beta_dpo/beta_used_raw": 0.12730880081653595,
"eval_beta_dpo/gap_mean": 6.158328533172607,
"eval_beta_dpo/gap_std": 12.294206619262695,
"eval_beta_dpo/loss_margin_mean": 6.6134748458862305,
"eval_beta_dpo/mask_keep_frac": 1.0,
"eval_logits/chosen": 1.2326774597167969,
"eval_logits/rejected": 1.1646326780319214,
"eval_loss": 0.6114901900291443,
"eval_runtime": 43.5861,
"eval_samples_per_second": 52.838,
"eval_steps_per_second": 1.652,
"step": 300
},
{
"beta_dpo/beta": 0.056266117841005325,
"beta_dpo/beta_margin_grad_mean": -0.45459234714508057,
"beta_dpo/beta_margin_grad_std": 0.18454816937446594,
"beta_dpo/beta_margin_mean": 0.26655083894729614,
"beta_dpo/beta_margin_std": 0.9855477213859558,
"beta_dpo/beta_used": 0.056266117841005325,
"beta_dpo/beta_used_raw": 0.05165494605898857,
"beta_dpo/gap_mean": 6.26155948638916,
"beta_dpo/gap_std": 12.236114501953125,
"beta_dpo/loss_margin_mean": 6.358150005340576,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.455026455026455,
"grad_norm": 19.36774253845215,
"learning_rate": 3.3301533956555885e-07,
"logits/chosen": 1.201526165008545,
"logits/rejected": 1.1702396869659424,
"loss": 1.2374,
"step": 301
},
{
"beta_dpo/beta": 0.021665379405021667,
"beta_dpo/beta_margin_grad_mean": -0.4781515300273895,
"beta_dpo/beta_margin_grad_std": 0.09745253622531891,
"beta_dpo/beta_margin_mean": 0.09527567774057388,
"beta_dpo/beta_margin_std": 0.4254538416862488,
"beta_dpo/beta_used": 0.021665379405021667,
"beta_dpo/beta_used_raw": 0.00011988542973995209,
"beta_dpo/gap_mean": 5.8745622634887695,
"beta_dpo/gap_std": 12.359971046447754,
"beta_dpo/loss_margin_mean": 3.919048547744751,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4565381708238851,
"grad_norm": 8.723976135253906,
"learning_rate": 3.317669908293554e-07,
"logits/chosen": 1.0065600872039795,
"logits/rejected": 0.979686975479126,
"loss": 1.3005,
"step": 302
},
{
"beta_dpo/beta": 0.082459956407547,
"beta_dpo/beta_margin_grad_mean": -0.39630061388015747,
"beta_dpo/beta_margin_grad_std": 0.22607973217964172,
"beta_dpo/beta_margin_mean": 0.7640535831451416,
"beta_dpo/beta_margin_std": 1.684174656867981,
"beta_dpo/beta_used": 0.082459956407547,
"beta_dpo/beta_used_raw": 0.082459956407547,
"beta_dpo/gap_mean": 6.1549224853515625,
"beta_dpo/gap_std": 12.555089950561523,
"beta_dpo/loss_margin_mean": 8.189640045166016,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4580498866213152,
"grad_norm": 30.123430252075195,
"learning_rate": 3.3051635489464793e-07,
"logits/chosen": 1.1421207189559937,
"logits/rejected": 1.1050410270690918,
"loss": 1.2195,
"step": 303
},
{
"beta_dpo/beta": 0.04812777787446976,
"beta_dpo/beta_margin_grad_mean": -0.4235919415950775,
"beta_dpo/beta_margin_grad_std": 0.12599574029445648,
"beta_dpo/beta_margin_mean": 0.34446337819099426,
"beta_dpo/beta_margin_std": 0.5999777317047119,
"beta_dpo/beta_used": 0.04812777787446976,
"beta_dpo/beta_used_raw": 0.04812777787446976,
"beta_dpo/gap_mean": 6.333561897277832,
"beta_dpo/gap_std": 12.268714904785156,
"beta_dpo/loss_margin_mean": 7.1425042152404785,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4595616024187453,
"grad_norm": 13.932650566101074,
"learning_rate": 3.292634667444117e-07,
"logits/chosen": 1.1120836734771729,
"logits/rejected": 1.0443115234375,
"loss": 1.1942,
"step": 304
},
{
"beta_dpo/beta": 0.20659758150577545,
"beta_dpo/beta_margin_grad_mean": -0.3449561595916748,
"beta_dpo/beta_margin_grad_std": 0.32629552483558655,
"beta_dpo/beta_margin_mean": 1.5746331214904785,
"beta_dpo/beta_margin_std": 3.07080078125,
"beta_dpo/beta_used": 0.20659758150577545,
"beta_dpo/beta_used_raw": 0.20659758150577545,
"beta_dpo/gap_mean": 6.55972957611084,
"beta_dpo/gap_std": 12.377994537353516,
"beta_dpo/loss_margin_mean": 7.0496368408203125,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.46107331821617537,
"grad_norm": 51.580142974853516,
"learning_rate": 3.280083614246217e-07,
"logits/chosen": 1.274596929550171,
"logits/rejected": 1.3232698440551758,
"loss": 1.0202,
"step": 305
},
{
"beta_dpo/beta": 0.06457066535949707,
"beta_dpo/beta_margin_grad_mean": -0.4128532111644745,
"beta_dpo/beta_margin_grad_std": 0.18687398731708527,
"beta_dpo/beta_margin_mean": 0.552842915058136,
"beta_dpo/beta_margin_std": 1.2234153747558594,
"beta_dpo/beta_used": 0.06457066535949707,
"beta_dpo/beta_used_raw": 0.036076560616493225,
"beta_dpo/gap_mean": 6.60538387298584,
"beta_dpo/gap_std": 12.542646408081055,
"beta_dpo/loss_margin_mean": 6.732542514801025,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.46258503401360546,
"grad_norm": 20.216312408447266,
"learning_rate": 3.267510740432719e-07,
"logits/chosen": 1.3599079847335815,
"logits/rejected": 1.2298367023468018,
"loss": 1.1646,
"step": 306
},
{
"beta_dpo/beta": 0.06196129694581032,
"beta_dpo/beta_margin_grad_mean": -0.4338819980621338,
"beta_dpo/beta_margin_grad_std": 0.19024796783924103,
"beta_dpo/beta_margin_mean": 0.370510071516037,
"beta_dpo/beta_margin_std": 1.0366507768630981,
"beta_dpo/beta_used": 0.06196129694581032,
"beta_dpo/beta_used_raw": 0.06196129694581032,
"beta_dpo/gap_mean": 6.228489875793457,
"beta_dpo/gap_std": 12.6326904296875,
"beta_dpo/loss_margin_mean": 4.963455677032471,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.46409674981103555,
"grad_norm": 19.9281063079834,
"learning_rate": 3.2549163976939285e-07,
"logits/chosen": 1.2666585445404053,
"logits/rejected": 1.1813685894012451,
"loss": 1.1922,
"step": 307
},
{
"beta_dpo/beta": 0.09165829420089722,
"beta_dpo/beta_margin_grad_mean": -0.4129888415336609,
"beta_dpo/beta_margin_grad_std": 0.2287939041852951,
"beta_dpo/beta_margin_mean": 0.7246976494789124,
"beta_dpo/beta_margin_std": 1.8345637321472168,
"beta_dpo/beta_used": 0.09165829420089722,
"beta_dpo/beta_used_raw": 0.059608783572912216,
"beta_dpo/gap_mean": 6.315195560455322,
"beta_dpo/gap_std": 12.767260551452637,
"beta_dpo/loss_margin_mean": 6.2924628257751465,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4656084656084656,
"grad_norm": 42.03661346435547,
"learning_rate": 3.2423009383206874e-07,
"logits/chosen": 0.9810805320739746,
"logits/rejected": 1.040271520614624,
"loss": 1.2639,
"step": 308
},
{
"beta_dpo/beta": 0.10874129831790924,
"beta_dpo/beta_margin_grad_mean": -0.3518926501274109,
"beta_dpo/beta_margin_grad_std": 0.20076824724674225,
"beta_dpo/beta_margin_mean": 0.9579285979270935,
"beta_dpo/beta_margin_std": 1.5040465593338013,
"beta_dpo/beta_used": 0.10874129831790924,
"beta_dpo/beta_used_raw": 0.10874129831790924,
"beta_dpo/gap_mean": 6.503994941711426,
"beta_dpo/gap_std": 12.441705703735352,
"beta_dpo/loss_margin_mean": 7.363459587097168,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4671201814058957,
"grad_norm": 27.765520095825195,
"learning_rate": 3.229664715194511e-07,
"logits/chosen": 1.1616370677947998,
"logits/rejected": 1.129991054534912,
"loss": 1.0048,
"step": 309
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49967724084854126,
"beta_dpo/beta_margin_grad_std": 0.0030824244022369385,
"beta_dpo/beta_margin_mean": 0.0012910891091451049,
"beta_dpo/beta_margin_std": 0.012330072931945324,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.07865162193775177,
"beta_dpo/gap_mean": 5.778951644897461,
"beta_dpo/gap_std": 12.322010040283203,
"beta_dpo/loss_margin_mean": 1.2910891771316528,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.46863189720332576,
"grad_norm": 0.3216012120246887,
"learning_rate": 3.2170080817777257e-07,
"logits/chosen": 1.2230892181396484,
"logits/rejected": 1.218454122543335,
"loss": 1.3835,
"step": 310
},
{
"beta_dpo/beta": 0.13774374127388,
"beta_dpo/beta_margin_grad_mean": -0.3994937837123871,
"beta_dpo/beta_margin_grad_std": 0.2569511830806732,
"beta_dpo/beta_margin_mean": 1.1233173608779907,
"beta_dpo/beta_margin_std": 2.8929855823516846,
"beta_dpo/beta_used": 0.13774374127388,
"beta_dpo/beta_used_raw": 0.10212471336126328,
"beta_dpo/gap_mean": 5.679272651672363,
"beta_dpo/gap_std": 12.579723358154297,
"beta_dpo/loss_margin_mean": 6.761931896209717,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.47014361300075586,
"grad_norm": 40.673583984375,
"learning_rate": 3.204331392103574e-07,
"logits/chosen": 1.2187139987945557,
"logits/rejected": 1.0266274213790894,
"loss": 1.1275,
"step": 311
},
{
"beta_dpo/beta": 0.08971817046403885,
"beta_dpo/beta_margin_grad_mean": -0.39458373188972473,
"beta_dpo/beta_margin_grad_std": 0.20151464641094208,
"beta_dpo/beta_margin_mean": 0.6224772334098816,
"beta_dpo/beta_margin_std": 1.3102822303771973,
"beta_dpo/beta_used": 0.08971817046403885,
"beta_dpo/beta_used_raw": 0.08971817046403885,
"beta_dpo/gap_mean": 5.863433837890625,
"beta_dpo/gap_std": 12.588981628417969,
"beta_dpo/loss_margin_mean": 7.15087366104126,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.47165532879818595,
"grad_norm": 22.64099884033203,
"learning_rate": 3.1916350007663176e-07,
"logits/chosen": 1.1743152141571045,
"logits/rejected": 1.1165026426315308,
"loss": 1.1265,
"step": 312
},
{
"beta_dpo/beta": 0.12445323914289474,
"beta_dpo/beta_margin_grad_mean": -0.38570505380630493,
"beta_dpo/beta_margin_grad_std": 0.30296802520751953,
"beta_dpo/beta_margin_mean": 0.7150858640670776,
"beta_dpo/beta_margin_std": 1.8413292169570923,
"beta_dpo/beta_used": 0.12445323914289474,
"beta_dpo/beta_used_raw": 0.12445323914289474,
"beta_dpo/gap_mean": 5.904024124145508,
"beta_dpo/gap_std": 12.867403984069824,
"beta_dpo/loss_margin_mean": 5.75889778137207,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.47316704459561604,
"grad_norm": 32.521026611328125,
"learning_rate": 3.178919262911314e-07,
"logits/chosen": 1.4500136375427246,
"logits/rejected": 1.4065792560577393,
"loss": 1.1793,
"step": 313
},
{
"beta_dpo/beta": 0.16136297583580017,
"beta_dpo/beta_margin_grad_mean": -0.3725585341453552,
"beta_dpo/beta_margin_grad_std": 0.26283350586891174,
"beta_dpo/beta_margin_mean": 1.403192162513733,
"beta_dpo/beta_margin_std": 2.998973846435547,
"beta_dpo/beta_used": 0.16136297583580017,
"beta_dpo/beta_used_raw": 0.16136297583580017,
"beta_dpo/gap_mean": 6.142127990722656,
"beta_dpo/gap_std": 13.036579132080078,
"beta_dpo/loss_margin_mean": 6.873254299163818,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.47467876039304613,
"grad_norm": 47.72408676147461,
"learning_rate": 3.166184534225087e-07,
"logits/chosen": 1.1128228902816772,
"logits/rejected": 1.1113231182098389,
"loss": 1.079,
"step": 314
},
{
"beta_dpo/beta": 0.050388358533382416,
"beta_dpo/beta_margin_grad_mean": -0.41331613063812256,
"beta_dpo/beta_margin_grad_std": 0.13312646746635437,
"beta_dpo/beta_margin_mean": 0.4088205397129059,
"beta_dpo/beta_margin_std": 0.6722843647003174,
"beta_dpo/beta_used": 0.050388358533382416,
"beta_dpo/beta_used_raw": 0.050388358533382416,
"beta_dpo/gap_mean": 6.070713043212891,
"beta_dpo/gap_std": 12.985635757446289,
"beta_dpo/loss_margin_mean": 6.740433216094971,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.47619047619047616,
"grad_norm": 15.891672134399414,
"learning_rate": 3.1534311709253723e-07,
"logits/chosen": 1.2505284547805786,
"logits/rejected": 1.2232093811035156,
"loss": 1.1804,
"step": 315
},
{
"beta_dpo/beta": 0.14638181030750275,
"beta_dpo/beta_margin_grad_mean": -0.3598828613758087,
"beta_dpo/beta_margin_grad_std": 0.2745656967163086,
"beta_dpo/beta_margin_mean": 1.0968278646469116,
"beta_dpo/beta_margin_std": 2.148019552230835,
"beta_dpo/beta_used": 0.14638181030750275,
"beta_dpo/beta_used_raw": 0.14638181030750275,
"beta_dpo/gap_mean": 6.046994686126709,
"beta_dpo/gap_std": 12.95700740814209,
"beta_dpo/loss_margin_mean": 6.180335998535156,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.47770219198790626,
"grad_norm": 37.0686149597168,
"learning_rate": 3.1406595297511564e-07,
"logits/chosen": 1.2918689250946045,
"logits/rejected": 1.1598542928695679,
"loss": 0.9616,
"step": 316
},
{
"beta_dpo/beta": 0.13256600499153137,
"beta_dpo/beta_margin_grad_mean": -0.37093499302864075,
"beta_dpo/beta_margin_grad_std": 0.2640308141708374,
"beta_dpo/beta_margin_mean": 1.5134227275848389,
"beta_dpo/beta_margin_std": 2.976809024810791,
"beta_dpo/beta_used": 0.13256600499153137,
"beta_dpo/beta_used_raw": 0.13214673101902008,
"beta_dpo/gap_mean": 6.867618083953857,
"beta_dpo/gap_std": 13.056652069091797,
"beta_dpo/loss_margin_mean": 10.035300254821777,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.47921390778533635,
"grad_norm": 54.22406768798828,
"learning_rate": 3.1278699679526975e-07,
"logits/chosen": 1.369170069694519,
"logits/rejected": 1.328667163848877,
"loss": 1.3319,
"step": 317
},
{
"beta_dpo/beta": 0.05628693476319313,
"beta_dpo/beta_margin_grad_mean": -0.46722689270973206,
"beta_dpo/beta_margin_grad_std": 0.2057475745677948,
"beta_dpo/beta_margin_mean": 0.20529808104038239,
"beta_dpo/beta_margin_std": 1.1580020189285278,
"beta_dpo/beta_used": 0.05628693476319313,
"beta_dpo/beta_used_raw": 0.0030015483498573303,
"beta_dpo/gap_mean": 6.78084659576416,
"beta_dpo/gap_std": 13.355363845825195,
"beta_dpo/loss_margin_mean": 5.220831871032715,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.48072562358276644,
"grad_norm": 22.165693283081055,
"learning_rate": 3.1150628432815336e-07,
"logits/chosen": 1.3102058172225952,
"logits/rejected": 1.2508630752563477,
"loss": 1.2437,
"step": 318
},
{
"beta_dpo/beta": 0.03552532196044922,
"beta_dpo/beta_margin_grad_mean": -0.46162641048431396,
"beta_dpo/beta_margin_grad_std": 0.13534243404865265,
"beta_dpo/beta_margin_mean": 0.19126252830028534,
"beta_dpo/beta_margin_std": 0.6686474680900574,
"beta_dpo/beta_used": 0.03552532196044922,
"beta_dpo/beta_used_raw": 0.011105714365839958,
"beta_dpo/gap_mean": 6.4551591873168945,
"beta_dpo/gap_std": 13.360182762145996,
"beta_dpo/loss_margin_mean": 5.578036785125732,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.48223733938019653,
"grad_norm": 15.060959815979004,
"learning_rate": 3.1022385139804707e-07,
"logits/chosen": 0.8403013944625854,
"logits/rejected": 0.8464080691337585,
"loss": 1.252,
"step": 319
},
{
"beta_dpo/beta": 0.07511113584041595,
"beta_dpo/beta_margin_grad_mean": -0.4390046298503876,
"beta_dpo/beta_margin_grad_std": 0.22036148607730865,
"beta_dpo/beta_margin_mean": 0.39557981491088867,
"beta_dpo/beta_margin_std": 1.5242847204208374,
"beta_dpo/beta_used": 0.07511113584041595,
"beta_dpo/beta_used_raw": -0.0021282732486724854,
"beta_dpo/gap_mean": 6.0225372314453125,
"beta_dpo/gap_std": 13.459592819213867,
"beta_dpo/loss_margin_mean": 4.060967922210693,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4837490551776266,
"grad_norm": 24.02452850341797,
"learning_rate": 3.0893973387735683e-07,
"logits/chosen": 0.9690076112747192,
"logits/rejected": 0.8976876139640808,
"loss": 1.2212,
"step": 320
},
{
"beta_dpo/beta": 0.15998658537864685,
"beta_dpo/beta_margin_grad_mean": -0.3318832218647003,
"beta_dpo/beta_margin_grad_std": 0.23508228361606598,
"beta_dpo/beta_margin_mean": 1.7446743249893188,
"beta_dpo/beta_margin_std": 2.777869701385498,
"beta_dpo/beta_used": 0.15998658537864685,
"beta_dpo/beta_used_raw": 0.15998658537864685,
"beta_dpo/gap_mean": 6.446950912475586,
"beta_dpo/gap_std": 13.321550369262695,
"beta_dpo/loss_margin_mean": 8.086103439331055,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4852607709750567,
"grad_norm": 34.035491943359375,
"learning_rate": 3.0765396768561004e-07,
"logits/chosen": 1.2763044834136963,
"logits/rejected": 1.2507541179656982,
"loss": 0.9461,
"step": 321
},
{
"beta_dpo/beta": 0.2635050117969513,
"beta_dpo/beta_margin_grad_mean": -0.23470216989517212,
"beta_dpo/beta_margin_grad_std": 0.27463462948799133,
"beta_dpo/beta_margin_mean": 2.7218821048736572,
"beta_dpo/beta_margin_std": 3.420943021774292,
"beta_dpo/beta_used": 0.2635050117969513,
"beta_dpo/beta_used_raw": 0.2635050117969513,
"beta_dpo/gap_mean": 6.802641868591309,
"beta_dpo/gap_std": 13.241533279418945,
"beta_dpo/loss_margin_mean": 9.942191123962402,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.48677248677248675,
"grad_norm": 42.902835845947266,
"learning_rate": 3.063665887884511e-07,
"logits/chosen": 1.5488101243972778,
"logits/rejected": 1.4433493614196777,
"loss": 0.7142,
"step": 322
},
{
"beta_dpo/beta": 0.0697922334074974,
"beta_dpo/beta_margin_grad_mean": -0.4143967628479004,
"beta_dpo/beta_margin_grad_std": 0.2243691384792328,
"beta_dpo/beta_margin_mean": 0.6587510108947754,
"beta_dpo/beta_margin_std": 1.602243423461914,
"beta_dpo/beta_used": 0.0697922334074974,
"beta_dpo/beta_used_raw": 0.0697922334074974,
"beta_dpo/gap_mean": 6.970813751220703,
"beta_dpo/gap_std": 13.35682201385498,
"beta_dpo/loss_margin_mean": 7.288545608520508,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.48828420256991684,
"grad_norm": 25.457849502563477,
"learning_rate": 3.0507763319663517e-07,
"logits/chosen": 1.28651762008667,
"logits/rejected": 1.2437783479690552,
"loss": 1.22,
"step": 323
},
{
"beta_dpo/beta": 0.14148634672164917,
"beta_dpo/beta_margin_grad_mean": -0.36526909470558167,
"beta_dpo/beta_margin_grad_std": 0.24114990234375,
"beta_dpo/beta_margin_mean": 1.0672969818115234,
"beta_dpo/beta_margin_std": 1.9926927089691162,
"beta_dpo/beta_used": 0.14148634672164917,
"beta_dpo/beta_used_raw": 0.14148634672164917,
"beta_dpo/gap_mean": 7.207144737243652,
"beta_dpo/gap_std": 13.275566101074219,
"beta_dpo/loss_margin_mean": 7.679068088531494,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4897959183673469,
"grad_norm": 32.15392303466797,
"learning_rate": 3.0378713696502097e-07,
"logits/chosen": 1.0783584117889404,
"logits/rejected": 0.9778432250022888,
"loss": 0.9654,
"step": 324
},
{
"beta_dpo/beta": 0.0674436017870903,
"beta_dpo/beta_margin_grad_mean": -0.4117467999458313,
"beta_dpo/beta_margin_grad_std": 0.18462489545345306,
"beta_dpo/beta_margin_mean": 0.5190098881721497,
"beta_dpo/beta_margin_std": 1.1525095701217651,
"beta_dpo/beta_used": 0.0674436017870903,
"beta_dpo/beta_used_raw": 0.0674436017870903,
"beta_dpo/gap_mean": 7.158202171325684,
"beta_dpo/gap_std": 12.836915969848633,
"beta_dpo/loss_margin_mean": 6.354362487792969,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.491307634164777,
"grad_norm": 21.140745162963867,
"learning_rate": 3.0249513619156206e-07,
"logits/chosen": 1.1736620664596558,
"logits/rejected": 1.0785242319107056,
"loss": 1.1232,
"step": 325
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49881821870803833,
"beta_dpo/beta_margin_grad_std": 0.0027975537814199924,
"beta_dpo/beta_margin_mean": 0.004727249499410391,
"beta_dpo/beta_margin_std": 0.011190598830580711,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.03214829042553902,
"beta_dpo/gap_mean": 6.748882293701172,
"beta_dpo/gap_std": 12.50639533996582,
"beta_dpo/loss_margin_mean": 4.7272491455078125,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4928193499622071,
"grad_norm": 0.33258065581321716,
"learning_rate": 3.012016670162977e-07,
"logits/chosen": 1.0217170715332031,
"logits/rejected": 1.0124512910842896,
"loss": 1.3818,
"step": 326
},
{
"beta_dpo/beta": 0.03682481870055199,
"beta_dpo/beta_margin_grad_mean": -0.46519556641578674,
"beta_dpo/beta_margin_grad_std": 0.12718036770820618,
"beta_dpo/beta_margin_mean": 0.15980318188667297,
"beta_dpo/beta_margin_std": 0.6078633666038513,
"beta_dpo/beta_used": 0.03682481870055199,
"beta_dpo/beta_used_raw": 0.03682481870055199,
"beta_dpo/gap_mean": 6.296779632568359,
"beta_dpo/gap_std": 12.454865455627441,
"beta_dpo/loss_margin_mean": 4.670220851898193,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4943310657596372,
"grad_norm": 12.803543090820312,
"learning_rate": 2.99906765620341e-07,
"logits/chosen": 1.0091784000396729,
"logits/rejected": 0.9715848565101624,
"loss": 1.2383,
"step": 327
},
{
"beta_dpo/beta": 0.06305188685655594,
"beta_dpo/beta_margin_grad_mean": -0.4316686987876892,
"beta_dpo/beta_margin_grad_std": 0.1629015952348709,
"beta_dpo/beta_margin_mean": 0.3258446753025055,
"beta_dpo/beta_margin_std": 0.7818768620491028,
"beta_dpo/beta_used": 0.06305188685655594,
"beta_dpo/beta_used_raw": 0.06305188685655594,
"beta_dpo/gap_mean": 6.0933403968811035,
"beta_dpo/gap_std": 12.407817840576172,
"beta_dpo/loss_margin_mean": 4.723147869110107,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4958427815570673,
"grad_norm": 14.375814437866211,
"learning_rate": 2.9861046822486766e-07,
"logits/chosen": 1.2935378551483154,
"logits/rejected": 1.2577247619628906,
"loss": 1.1574,
"step": 328
},
{
"beta_dpo/beta": 0.0841306746006012,
"beta_dpo/beta_margin_grad_mean": -0.4047718942165375,
"beta_dpo/beta_margin_grad_std": 0.2134350687265396,
"beta_dpo/beta_margin_mean": 0.5284138917922974,
"beta_dpo/beta_margin_std": 1.1749253273010254,
"beta_dpo/beta_used": 0.0841306746006012,
"beta_dpo/beta_used_raw": 0.0841306746006012,
"beta_dpo/gap_mean": 5.981316089630127,
"beta_dpo/gap_std": 12.62000846862793,
"beta_dpo/loss_margin_mean": 6.10738468170166,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4973544973544973,
"grad_norm": 21.042234420776367,
"learning_rate": 2.9731281109010253e-07,
"logits/chosen": 1.3784832954406738,
"logits/rejected": 1.313535451889038,
"loss": 1.1062,
"step": 329
},
{
"beta_dpo/beta": 0.1372985541820526,
"beta_dpo/beta_margin_grad_mean": -0.3568761646747589,
"beta_dpo/beta_margin_grad_std": 0.2465440183877945,
"beta_dpo/beta_margin_mean": 1.5668509006500244,
"beta_dpo/beta_margin_std": 2.8006210327148438,
"beta_dpo/beta_used": 0.1372985541820526,
"beta_dpo/beta_used_raw": 0.08406403660774231,
"beta_dpo/gap_mean": 6.211638450622559,
"beta_dpo/gap_std": 12.863258361816406,
"beta_dpo/loss_margin_mean": 8.602145195007324,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4988662131519274,
"grad_norm": 36.83658981323242,
"learning_rate": 2.9601383051430505e-07,
"logits/chosen": 1.0632227659225464,
"logits/rejected": 1.0073779821395874,
"loss": 1.0342,
"step": 330
},
{
"beta_dpo/beta": 0.1793161928653717,
"beta_dpo/beta_margin_grad_mean": -0.3007383942604065,
"beta_dpo/beta_margin_grad_std": 0.26930364966392517,
"beta_dpo/beta_margin_mean": 1.5797462463378906,
"beta_dpo/beta_margin_std": 2.476447820663452,
"beta_dpo/beta_used": 0.1793161928653717,
"beta_dpo/beta_used_raw": 0.1793161928653717,
"beta_dpo/gap_mean": 6.820796012878418,
"beta_dpo/gap_std": 12.850607872009277,
"beta_dpo/loss_margin_mean": 8.462237358093262,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5003779289493575,
"grad_norm": 37.06329345703125,
"learning_rate": 2.947135628327544e-07,
"logits/chosen": 1.453250527381897,
"logits/rejected": 1.436427354812622,
"loss": 0.8528,
"step": 331
},
{
"beta_dpo/beta": 0.015641553327441216,
"beta_dpo/beta_margin_grad_mean": -0.46394628286361694,
"beta_dpo/beta_margin_grad_std": 0.06855617463588715,
"beta_dpo/beta_margin_mean": 0.15044867992401123,
"beta_dpo/beta_margin_std": 0.2898610234260559,
"beta_dpo/beta_used": 0.015641553327441216,
"beta_dpo/beta_used_raw": 0.015498769469559193,
"beta_dpo/gap_mean": 6.893322944641113,
"beta_dpo/gap_std": 12.798620223999023,
"beta_dpo/loss_margin_mean": 7.758445739746094,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5018896447467877,
"grad_norm": 5.801995277404785,
"learning_rate": 2.934120444167326e-07,
"logits/chosen": 1.297292947769165,
"logits/rejected": 1.2258504629135132,
"loss": 1.3071,
"step": 332
},
{
"beta_dpo/beta": 0.0764099657535553,
"beta_dpo/beta_margin_grad_mean": -0.3789524734020233,
"beta_dpo/beta_margin_grad_std": 0.16656707227230072,
"beta_dpo/beta_margin_mean": 0.6131932735443115,
"beta_dpo/beta_margin_std": 0.9245501756668091,
"beta_dpo/beta_used": 0.0764099657535553,
"beta_dpo/beta_used_raw": 0.0764099657535553,
"beta_dpo/gap_mean": 7.18435525894165,
"beta_dpo/gap_std": 12.589004516601562,
"beta_dpo/loss_margin_mean": 8.35302734375,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5034013605442177,
"grad_norm": 20.38193130493164,
"learning_rate": 2.921093116725076e-07,
"logits/chosen": 1.111952304840088,
"logits/rejected": 1.0473523139953613,
"loss": 1.0434,
"step": 333
},
{
"beta_dpo/beta": 0.052270177751779556,
"beta_dpo/beta_margin_grad_mean": -0.4547905921936035,
"beta_dpo/beta_margin_grad_std": 0.18179713189601898,
"beta_dpo/beta_margin_mean": 0.25288447737693787,
"beta_dpo/beta_margin_std": 1.0719202756881714,
"beta_dpo/beta_used": 0.052270177751779556,
"beta_dpo/beta_used_raw": -0.021642889827489853,
"beta_dpo/gap_mean": 6.781639099121094,
"beta_dpo/gap_std": 12.995382308959961,
"beta_dpo/loss_margin_mean": 3.978499412536621,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5049130763416477,
"grad_norm": 16.100820541381836,
"learning_rate": 2.9080540104031484e-07,
"logits/chosen": 1.1373687982559204,
"logits/rejected": 1.0688490867614746,
"loss": 1.1891,
"step": 334
},
{
"beta_dpo/beta": 0.07851317524909973,
"beta_dpo/beta_margin_grad_mean": -0.41749730706214905,
"beta_dpo/beta_margin_grad_std": 0.22520457208156586,
"beta_dpo/beta_margin_mean": 0.4524867534637451,
"beta_dpo/beta_margin_std": 1.195827603340149,
"beta_dpo/beta_used": 0.07851317524909973,
"beta_dpo/beta_used_raw": 0.07851317524909973,
"beta_dpo/gap_mean": 6.497610569000244,
"beta_dpo/gap_std": 13.426082611083984,
"beta_dpo/loss_margin_mean": 5.86445951461792,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5064247921390779,
"grad_norm": 22.561229705810547,
"learning_rate": 2.895003489933375e-07,
"logits/chosen": 1.066937804222107,
"logits/rejected": 1.034178376197815,
"loss": 1.1779,
"step": 335
},
{
"beta_dpo/beta": 0.005117136985063553,
"beta_dpo/beta_margin_grad_mean": -0.48917579650878906,
"beta_dpo/beta_margin_grad_std": 0.021274510771036148,
"beta_dpo/beta_margin_mean": 0.043470632284879684,
"beta_dpo/beta_margin_std": 0.08556756377220154,
"beta_dpo/beta_used": 0.005117136985063553,
"beta_dpo/beta_used_raw": -0.002717310097068548,
"beta_dpo/gap_mean": 6.687017440795898,
"beta_dpo/gap_std": 13.347187042236328,
"beta_dpo/loss_margin_mean": 6.967475891113281,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5079365079365079,
"grad_norm": 1.848929762840271,
"learning_rate": 2.8819419203668675e-07,
"logits/chosen": 0.8256216049194336,
"logits/rejected": 0.8187216520309448,
"loss": 1.3607,
"step": 336
},
{
"beta_dpo/beta": 0.05330893397331238,
"beta_dpo/beta_margin_grad_mean": -0.43421801924705505,
"beta_dpo/beta_margin_grad_std": 0.15648889541625977,
"beta_dpo/beta_margin_mean": 0.3032914996147156,
"beta_dpo/beta_margin_std": 0.7241752743721008,
"beta_dpo/beta_used": 0.05330893397331238,
"beta_dpo/beta_used_raw": 0.05330893397331238,
"beta_dpo/gap_mean": 6.46940279006958,
"beta_dpo/gap_std": 13.37681770324707,
"beta_dpo/loss_margin_mean": 5.691498756408691,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.509448223733938,
"grad_norm": 14.02002239227295,
"learning_rate": 2.8688696670638053e-07,
"logits/chosen": 1.020787000656128,
"logits/rejected": 0.9443519115447998,
"loss": 1.191,
"step": 337
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4988920986652374,
"beta_dpo/beta_margin_grad_std": 0.0028042481280863285,
"beta_dpo/beta_margin_mean": 0.004431764129549265,
"beta_dpo/beta_margin_std": 0.011217460036277771,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.023727454245090485,
"beta_dpo/gap_mean": 6.0999603271484375,
"beta_dpo/gap_std": 13.048229217529297,
"beta_dpo/loss_margin_mean": 4.431764125823975,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5109599395313681,
"grad_norm": 0.28760549426078796,
"learning_rate": 2.8557870956832133e-07,
"logits/chosen": 1.1092530488967896,
"logits/rejected": 1.0787663459777832,
"loss": 1.3823,
"step": 338
},
{
"beta_dpo/beta": 0.14106054604053497,
"beta_dpo/beta_margin_grad_mean": -0.33038145303726196,
"beta_dpo/beta_margin_grad_std": 0.26955631375312805,
"beta_dpo/beta_margin_mean": 1.0998313426971436,
"beta_dpo/beta_margin_std": 1.7985961437225342,
"beta_dpo/beta_used": 0.14106054604053497,
"beta_dpo/beta_used_raw": 0.14106054604053497,
"beta_dpo/gap_mean": 6.327607154846191,
"beta_dpo/gap_std": 12.90283489227295,
"beta_dpo/loss_margin_mean": 7.851966857910156,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5124716553287982,
"grad_norm": 40.547515869140625,
"learning_rate": 2.842694572172736e-07,
"logits/chosen": 1.076251745223999,
"logits/rejected": 0.9694498777389526,
"loss": 1.0281,
"step": 339
},
{
"beta_dpo/beta": 0.10330237448215485,
"beta_dpo/beta_margin_grad_mean": -0.44081035256385803,
"beta_dpo/beta_margin_grad_std": 0.23204687237739563,
"beta_dpo/beta_margin_mean": 0.5521944761276245,
"beta_dpo/beta_margin_std": 2.094428062438965,
"beta_dpo/beta_used": 0.10330237448215485,
"beta_dpo/beta_used_raw": 0.06831492483615875,
"beta_dpo/gap_mean": 6.3657684326171875,
"beta_dpo/gap_std": 13.076078414916992,
"beta_dpo/loss_margin_mean": 6.563114643096924,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5139833711262283,
"grad_norm": 34.1888427734375,
"learning_rate": 2.8295924627584004e-07,
"logits/chosen": 1.066762924194336,
"logits/rejected": 1.062217116355896,
"loss": 1.2085,
"step": 340
},
{
"beta_dpo/beta": 0.24964655935764313,
"beta_dpo/beta_margin_grad_mean": -0.36444416642189026,
"beta_dpo/beta_margin_grad_std": 0.28421640396118164,
"beta_dpo/beta_margin_mean": 2.929835796356201,
"beta_dpo/beta_margin_std": 5.628471374511719,
"beta_dpo/beta_used": 0.24964655935764313,
"beta_dpo/beta_used_raw": 0.23628321290016174,
"beta_dpo/gap_mean": 6.765471935272217,
"beta_dpo/gap_std": 13.197046279907227,
"beta_dpo/loss_margin_mean": 7.296074867248535,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5154950869236583,
"grad_norm": 63.85667419433594,
"learning_rate": 2.816481133934373e-07,
"logits/chosen": 1.4289345741271973,
"logits/rejected": 1.3677668571472168,
"loss": 0.9964,
"step": 341
},
{
"beta_dpo/beta": 0.13882164657115936,
"beta_dpo/beta_margin_grad_mean": -0.40003663301467896,
"beta_dpo/beta_margin_grad_std": 0.26059815287590027,
"beta_dpo/beta_margin_mean": 1.08782160282135,
"beta_dpo/beta_margin_std": 2.7480297088623047,
"beta_dpo/beta_used": 0.13882164657115936,
"beta_dpo/beta_used_raw": 0.13882164657115936,
"beta_dpo/gap_mean": 6.689360618591309,
"beta_dpo/gap_std": 13.186819076538086,
"beta_dpo/loss_margin_mean": 7.2520527839660645,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5170068027210885,
"grad_norm": 36.852256774902344,
"learning_rate": 2.8033609524527046e-07,
"logits/chosen": 1.1861512660980225,
"logits/rejected": 1.1343166828155518,
"loss": 1.203,
"step": 342
},
{
"beta_dpo/beta": 0.09241315722465515,
"beta_dpo/beta_margin_grad_mean": -0.41862672567367554,
"beta_dpo/beta_margin_grad_std": 0.23799264430999756,
"beta_dpo/beta_margin_mean": 0.6367702484130859,
"beta_dpo/beta_margin_std": 1.8354604244232178,
"beta_dpo/beta_used": 0.09241315722465515,
"beta_dpo/beta_used_raw": -0.0970378890633583,
"beta_dpo/gap_mean": 6.421136856079102,
"beta_dpo/gap_std": 13.049176216125488,
"beta_dpo/loss_margin_mean": 3.949489116668701,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5185185185185185,
"grad_norm": 26.807126998901367,
"learning_rate": 2.7902322853130753e-07,
"logits/chosen": 0.9365084767341614,
"logits/rejected": 0.9163832068443298,
"loss": 1.1698,
"step": 343
},
{
"beta_dpo/beta": 0.07383356988430023,
"beta_dpo/beta_margin_grad_mean": -0.38845568895339966,
"beta_dpo/beta_margin_grad_std": 0.19830799102783203,
"beta_dpo/beta_margin_mean": 0.6601214408874512,
"beta_dpo/beta_margin_std": 1.2798510789871216,
"beta_dpo/beta_used": 0.07383356988430023,
"beta_dpo/beta_used_raw": 0.07383356988430023,
"beta_dpo/gap_mean": 6.460176944732666,
"beta_dpo/gap_std": 12.917095184326172,
"beta_dpo/loss_margin_mean": 7.995361328125,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5200302343159486,
"grad_norm": 20.402359008789062,
"learning_rate": 2.7770954997525274e-07,
"logits/chosen": 1.2269436120986938,
"logits/rejected": 1.1481449604034424,
"loss": 1.1563,
"step": 344
},
{
"beta_dpo/beta": 0.191828653216362,
"beta_dpo/beta_margin_grad_mean": -0.35022681951522827,
"beta_dpo/beta_margin_grad_std": 0.30977630615234375,
"beta_dpo/beta_margin_mean": 1.2948695421218872,
"beta_dpo/beta_margin_std": 2.8370375633239746,
"beta_dpo/beta_used": 0.191828653216362,
"beta_dpo/beta_used_raw": 0.191828653216362,
"beta_dpo/gap_mean": 6.522076606750488,
"beta_dpo/gap_std": 13.085643768310547,
"beta_dpo/loss_margin_mean": 6.543193340301514,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5215419501133787,
"grad_norm": 43.98537063598633,
"learning_rate": 2.7639509632351927e-07,
"logits/chosen": 1.2063586711883545,
"logits/rejected": 1.1623212099075317,
"loss": 0.9284,
"step": 345
},
{
"beta_dpo/beta": 0.08791759610176086,
"beta_dpo/beta_margin_grad_mean": -0.40686461329460144,
"beta_dpo/beta_margin_grad_std": 0.24795067310333252,
"beta_dpo/beta_margin_mean": 0.8130276203155518,
"beta_dpo/beta_margin_std": 2.0670032501220703,
"beta_dpo/beta_used": 0.08791759610176086,
"beta_dpo/beta_used_raw": 0.08791759610176086,
"beta_dpo/gap_mean": 6.574892997741699,
"beta_dpo/gap_std": 13.083642959594727,
"beta_dpo/loss_margin_mean": 7.657172679901123,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5230536659108088,
"grad_norm": 30.677244186401367,
"learning_rate": 2.7507990434420123e-07,
"logits/chosen": 1.1106505393981934,
"logits/rejected": 1.0611116886138916,
"loss": 1.2396,
"step": 346
},
{
"beta_dpo/beta": 0.016010552644729614,
"beta_dpo/beta_margin_grad_mean": -0.4776521325111389,
"beta_dpo/beta_margin_grad_std": 0.08023162931203842,
"beta_dpo/beta_margin_mean": 0.09511476755142212,
"beta_dpo/beta_margin_std": 0.3387611508369446,
"beta_dpo/beta_used": 0.016010552644729614,
"beta_dpo/beta_used_raw": 0.012834769673645496,
"beta_dpo/gap_mean": 6.491988658905029,
"beta_dpo/gap_std": 13.22860336303711,
"beta_dpo/loss_margin_mean": 5.3407182693481445,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5245653817082389,
"grad_norm": 5.674924373626709,
"learning_rate": 2.737640108260456e-07,
"logits/chosen": 1.2309460639953613,
"logits/rejected": 1.1862425804138184,
"loss": 1.326,
"step": 347
},
{
"beta_dpo/beta": 0.06713169813156128,
"beta_dpo/beta_margin_grad_mean": -0.4124799072742462,
"beta_dpo/beta_margin_grad_std": 0.17748472094535828,
"beta_dpo/beta_margin_mean": 0.45152342319488525,
"beta_dpo/beta_margin_std": 0.93133944272995,
"beta_dpo/beta_used": 0.06713169813156128,
"beta_dpo/beta_used_raw": 0.06713169813156128,
"beta_dpo/gap_mean": 6.600510597229004,
"beta_dpo/gap_std": 13.38565444946289,
"beta_dpo/loss_margin_mean": 7.088160037994385,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5260770975056689,
"grad_norm": 17.118511199951172,
"learning_rate": 2.724474525774229e-07,
"logits/chosen": 1.4532029628753662,
"logits/rejected": 1.3983886241912842,
"loss": 1.1278,
"step": 348
},
{
"beta_dpo/beta": 0.12098747491836548,
"beta_dpo/beta_margin_grad_mean": -0.3674723207950592,
"beta_dpo/beta_margin_grad_std": 0.2333253175020218,
"beta_dpo/beta_margin_mean": 1.1785528659820557,
"beta_dpo/beta_margin_std": 2.257408618927002,
"beta_dpo/beta_used": 0.12098747491836548,
"beta_dpo/beta_used_raw": 0.12098747491836548,
"beta_dpo/gap_mean": 6.678452491760254,
"beta_dpo/gap_std": 13.459003448486328,
"beta_dpo/loss_margin_mean": 8.029808044433594,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.527588813303099,
"grad_norm": 36.4443359375,
"learning_rate": 2.711302664252973e-07,
"logits/chosen": 1.0890693664550781,
"logits/rejected": 1.0426480770111084,
"loss": 1.137,
"step": 349
},
{
"beta_dpo/beta": 0.16540199518203735,
"beta_dpo/beta_margin_grad_mean": -0.2708449363708496,
"beta_dpo/beta_margin_grad_std": 0.2463354766368866,
"beta_dpo/beta_margin_mean": 1.6865824460983276,
"beta_dpo/beta_margin_std": 1.9559446573257446,
"beta_dpo/beta_used": 0.16540199518203735,
"beta_dpo/beta_used_raw": 0.16540199518203735,
"beta_dpo/gap_mean": 7.410719871520996,
"beta_dpo/gap_std": 13.243711471557617,
"beta_dpo/loss_margin_mean": 10.080084800720215,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5291005291005291,
"grad_norm": 37.67925262451172,
"learning_rate": 2.698124892141971e-07,
"logits/chosen": 1.0085124969482422,
"logits/rejected": 0.9736948013305664,
"loss": 0.8858,
"step": 350
},
{
"beta_dpo/beta": 0.13289794325828552,
"beta_dpo/beta_margin_grad_mean": -0.35490015149116516,
"beta_dpo/beta_margin_grad_std": 0.27515533566474915,
"beta_dpo/beta_margin_mean": 0.9353875517845154,
"beta_dpo/beta_margin_std": 1.814262866973877,
"beta_dpo/beta_used": 0.13289794325828552,
"beta_dpo/beta_used_raw": 0.13289794325828552,
"beta_dpo/gap_mean": 7.435298919677734,
"beta_dpo/gap_std": 13.097363471984863,
"beta_dpo/loss_margin_mean": 6.503824234008789,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5306122448979592,
"grad_norm": 42.758670806884766,
"learning_rate": 2.6849415780518357e-07,
"logits/chosen": 1.409663438796997,
"logits/rejected": 1.3147722482681274,
"loss": 0.9475,
"step": 351
},
{
"beta_dpo/beta": 0.15361975133419037,
"beta_dpo/beta_margin_grad_mean": -0.3725324273109436,
"beta_dpo/beta_margin_grad_std": 0.2771722674369812,
"beta_dpo/beta_margin_mean": 1.3406981229782104,
"beta_dpo/beta_margin_std": 3.0171306133270264,
"beta_dpo/beta_used": 0.15361975133419037,
"beta_dpo/beta_used_raw": 0.15361975133419037,
"beta_dpo/gap_mean": 7.367338180541992,
"beta_dpo/gap_std": 13.319618225097656,
"beta_dpo/loss_margin_mean": 7.5593743324279785,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5321239606953893,
"grad_norm": 43.37005615234375,
"learning_rate": 2.6717530907482024e-07,
"logits/chosen": 1.3086192607879639,
"logits/rejected": 1.2862586975097656,
"loss": 1.0957,
"step": 352
},
{
"beta_dpo/beta": 0.2464243620634079,
"beta_dpo/beta_margin_grad_mean": -0.25405097007751465,
"beta_dpo/beta_margin_grad_std": 0.3099513649940491,
"beta_dpo/beta_margin_mean": 2.007615566253662,
"beta_dpo/beta_margin_std": 3.4321491718292236,
"beta_dpo/beta_used": 0.2464243620634079,
"beta_dpo/beta_used_raw": 0.2464243620634079,
"beta_dpo/gap_mean": 7.241891860961914,
"beta_dpo/gap_std": 13.446264266967773,
"beta_dpo/loss_margin_mean": 7.859658241271973,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5336356764928194,
"grad_norm": 41.63973617553711,
"learning_rate": 2.658559799141411e-07,
"logits/chosen": 1.1554973125457764,
"logits/rejected": 1.1289267539978027,
"loss": 0.6731,
"step": 353
},
{
"beta_dpo/beta": 0.1691766232252121,
"beta_dpo/beta_margin_grad_mean": -0.31530454754829407,
"beta_dpo/beta_margin_grad_std": 0.3026013970375061,
"beta_dpo/beta_margin_mean": 1.361480712890625,
"beta_dpo/beta_margin_std": 2.515624523162842,
"beta_dpo/beta_used": 0.1691766232252121,
"beta_dpo/beta_used_raw": 0.1691766232252121,
"beta_dpo/gap_mean": 7.580024242401123,
"beta_dpo/gap_std": 13.480710983276367,
"beta_dpo/loss_margin_mean": 8.209134101867676,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5351473922902494,
"grad_norm": 39.822025299072266,
"learning_rate": 2.6453620722761895e-07,
"logits/chosen": 1.223923683166504,
"logits/rejected": 1.1551811695098877,
"loss": 0.8025,
"step": 354
},
{
"beta_dpo/beta": 0.17666350305080414,
"beta_dpo/beta_margin_grad_mean": -0.29615840315818787,
"beta_dpo/beta_margin_grad_std": 0.2723563313484192,
"beta_dpo/beta_margin_mean": 1.595294713973999,
"beta_dpo/beta_margin_std": 2.3159079551696777,
"beta_dpo/beta_used": 0.17666350305080414,
"beta_dpo/beta_used_raw": 0.17666350305080414,
"beta_dpo/gap_mean": 7.708715438842773,
"beta_dpo/gap_std": 13.585740089416504,
"beta_dpo/loss_margin_mean": 8.833210945129395,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5366591080876795,
"grad_norm": 33.9071159362793,
"learning_rate": 2.632160279321328e-07,
"logits/chosen": 1.3096725940704346,
"logits/rejected": 1.1149065494537354,
"loss": 0.8918,
"step": 355
},
{
"beta_dpo/beta": 0.2004559338092804,
"beta_dpo/beta_margin_grad_mean": -0.28709742426872253,
"beta_dpo/beta_margin_grad_std": 0.3166719377040863,
"beta_dpo/beta_margin_mean": 1.9062695503234863,
"beta_dpo/beta_margin_std": 3.0024073123931885,
"beta_dpo/beta_used": 0.2004559338092804,
"beta_dpo/beta_used_raw": 0.2004559338092804,
"beta_dpo/gap_mean": 8.091878890991211,
"beta_dpo/gap_std": 13.65368938446045,
"beta_dpo/loss_margin_mean": 9.26539134979248,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5381708238851096,
"grad_norm": 40.02458953857422,
"learning_rate": 2.618954789559356e-07,
"logits/chosen": 1.2811923027038574,
"logits/rejected": 1.2076209783554077,
"loss": 0.8828,
"step": 356
},
{
"beta_dpo/beta": 0.018133217468857765,
"beta_dpo/beta_margin_grad_mean": -0.45670992136001587,
"beta_dpo/beta_margin_grad_std": 0.08694471418857574,
"beta_dpo/beta_margin_mean": 0.18453972041606903,
"beta_dpo/beta_margin_std": 0.37306493520736694,
"beta_dpo/beta_used": 0.018133217468857765,
"beta_dpo/beta_used_raw": 0.003822476603090763,
"beta_dpo/gap_mean": 8.109353065490723,
"beta_dpo/gap_std": 13.743289947509766,
"beta_dpo/loss_margin_mean": 7.264776229858398,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5396825396825397,
"grad_norm": 6.065606117248535,
"learning_rate": 2.6057459723762076e-07,
"logits/chosen": 1.1004765033721924,
"logits/rejected": 1.0495339632034302,
"loss": 1.2814,
"step": 357
},
{
"beta_dpo/beta": 0.051143769174814224,
"beta_dpo/beta_margin_grad_mean": -0.40527665615081787,
"beta_dpo/beta_margin_grad_std": 0.14958453178405762,
"beta_dpo/beta_margin_mean": 0.4351819157600403,
"beta_dpo/beta_margin_std": 0.7011198997497559,
"beta_dpo/beta_used": 0.051143769174814224,
"beta_dpo/beta_used_raw": 0.051143769174814224,
"beta_dpo/gap_mean": 7.787057876586914,
"beta_dpo/gap_std": 13.672910690307617,
"beta_dpo/loss_margin_mean": 7.942201137542725,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5411942554799698,
"grad_norm": 16.145584106445312,
"learning_rate": 2.5925341972508954e-07,
"logits/chosen": 1.260805606842041,
"logits/rejected": 1.259293556213379,
"loss": 1.1264,
"step": 358
},
{
"beta_dpo/beta": 0.06088128313422203,
"beta_dpo/beta_margin_grad_mean": -0.4172905385494232,
"beta_dpo/beta_margin_grad_std": 0.21149376034736633,
"beta_dpo/beta_margin_mean": 0.5262914896011353,
"beta_dpo/beta_margin_std": 1.3644154071807861,
"beta_dpo/beta_used": 0.06088128313422203,
"beta_dpo/beta_used_raw": 0.02194279432296753,
"beta_dpo/gap_mean": 7.3958940505981445,
"beta_dpo/gap_std": 13.696711540222168,
"beta_dpo/loss_margin_mean": 5.2692742347717285,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5427059712773998,
"grad_norm": 19.803897857666016,
"learning_rate": 2.579319833745169e-07,
"logits/chosen": 1.1320165395736694,
"logits/rejected": 1.1335536241531372,
"loss": 1.1932,
"step": 359
},
{
"beta_dpo/beta": 0.09361709654331207,
"beta_dpo/beta_margin_grad_mean": -0.4224591851234436,
"beta_dpo/beta_margin_grad_std": 0.2343314290046692,
"beta_dpo/beta_margin_mean": 0.7064631581306458,
"beta_dpo/beta_margin_std": 1.859923243522644,
"beta_dpo/beta_used": 0.09361709654331207,
"beta_dpo/beta_used_raw": 0.0917295590043068,
"beta_dpo/gap_mean": 7.435537338256836,
"beta_dpo/gap_std": 13.729305267333984,
"beta_dpo/loss_margin_mean": 7.1353888511657715,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.54421768707483,
"grad_norm": 30.30096435546875,
"learning_rate": 2.5661032514931834e-07,
"logits/chosen": 1.0677845478057861,
"logits/rejected": 0.9679094552993774,
"loss": 1.1544,
"step": 360
},
{
"beta_dpo/beta": 0.08572488278150558,
"beta_dpo/beta_margin_grad_mean": -0.37226271629333496,
"beta_dpo/beta_margin_grad_std": 0.20196816325187683,
"beta_dpo/beta_margin_mean": 0.7135973572731018,
"beta_dpo/beta_margin_std": 1.2914289236068726,
"beta_dpo/beta_used": 0.08572488278150558,
"beta_dpo/beta_used_raw": 0.08572488278150558,
"beta_dpo/gap_mean": 7.480558395385742,
"beta_dpo/gap_std": 13.642951965332031,
"beta_dpo/loss_margin_mean": 8.10063362121582,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.54572940287226,
"grad_norm": 23.389602661132812,
"learning_rate": 2.552884820191154e-07,
"logits/chosen": 1.0572106838226318,
"logits/rejected": 0.9873596429824829,
"loss": 1.0122,
"step": 361
},
{
"beta_dpo/beta": 0.049856528639793396,
"beta_dpo/beta_margin_grad_mean": -0.41956833004951477,
"beta_dpo/beta_margin_grad_std": 0.18193066120147705,
"beta_dpo/beta_margin_mean": 0.4650557339191437,
"beta_dpo/beta_margin_std": 1.1645866632461548,
"beta_dpo/beta_used": 0.049856528639793396,
"beta_dpo/beta_used_raw": 0.01058843731880188,
"beta_dpo/gap_mean": 7.225416660308838,
"beta_dpo/gap_std": 13.535463333129883,
"beta_dpo/loss_margin_mean": 6.341689586639404,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.54724111866969,
"grad_norm": 16.414335250854492,
"learning_rate": 2.53966490958702e-07,
"logits/chosen": 1.2252509593963623,
"logits/rejected": 1.0469253063201904,
"loss": 1.1834,
"step": 362
},
{
"beta_dpo/beta": 0.1461520940065384,
"beta_dpo/beta_margin_grad_mean": -0.3125424385070801,
"beta_dpo/beta_margin_grad_std": 0.24862688779830933,
"beta_dpo/beta_margin_mean": 1.4741637706756592,
"beta_dpo/beta_margin_std": 2.2805867195129395,
"beta_dpo/beta_used": 0.1461520940065384,
"beta_dpo/beta_used_raw": 0.1461520940065384,
"beta_dpo/gap_mean": 7.652776718139648,
"beta_dpo/gap_std": 13.390401840209961,
"beta_dpo/loss_margin_mean": 9.007533073425293,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5487528344671202,
"grad_norm": 43.86406707763672,
"learning_rate": 2.526443889470099e-07,
"logits/chosen": 1.382947564125061,
"logits/rejected": 1.213701844215393,
"loss": 0.9243,
"step": 363
},
{
"beta_dpo/beta": 0.12054399400949478,
"beta_dpo/beta_margin_grad_mean": -0.3811088800430298,
"beta_dpo/beta_margin_grad_std": 0.24092726409435272,
"beta_dpo/beta_margin_mean": 1.456974744796753,
"beta_dpo/beta_margin_std": 2.946803331375122,
"beta_dpo/beta_used": 0.12054399400949478,
"beta_dpo/beta_used_raw": 0.10439993441104889,
"beta_dpo/gap_mean": 8.08302116394043,
"beta_dpo/gap_std": 13.675590515136719,
"beta_dpo/loss_margin_mean": 10.058621406555176,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5502645502645502,
"grad_norm": 42.604251861572266,
"learning_rate": 2.513222129660744e-07,
"logits/chosen": 1.12633216381073,
"logits/rejected": 1.059206247329712,
"loss": 1.133,
"step": 364
},
{
"beta_dpo/beta": 0.06535232812166214,
"beta_dpo/beta_margin_grad_mean": -0.3880292773246765,
"beta_dpo/beta_margin_grad_std": 0.15547937154769897,
"beta_dpo/beta_margin_mean": 0.5160609483718872,
"beta_dpo/beta_margin_std": 0.7484007477760315,
"beta_dpo/beta_used": 0.06535232812166214,
"beta_dpo/beta_used_raw": 0.06535232812166214,
"beta_dpo/gap_mean": 8.05885124206543,
"beta_dpo/gap_std": 13.430015563964844,
"beta_dpo/loss_margin_mean": 7.844278335571289,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5517762660619804,
"grad_norm": 15.027542114257812,
"learning_rate": 2.5e-07,
"logits/chosen": 1.1900737285614014,
"logits/rejected": 1.177154302597046,
"loss": 1.0322,
"step": 365
},
{
"beta_dpo/beta": 0.030097341164946556,
"beta_dpo/beta_margin_grad_mean": -0.45781904458999634,
"beta_dpo/beta_margin_grad_std": 0.15457406640052795,
"beta_dpo/beta_margin_mean": 0.20955324172973633,
"beta_dpo/beta_margin_std": 0.7605064511299133,
"beta_dpo/beta_used": 0.030097341164946556,
"beta_dpo/beta_used_raw": -0.00335543230175972,
"beta_dpo/gap_mean": 7.919361591339111,
"beta_dpo/gap_std": 13.844223022460938,
"beta_dpo/loss_margin_mean": 7.396289348602295,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5532879818594104,
"grad_norm": 10.480829238891602,
"learning_rate": 2.486777870339255e-07,
"logits/chosen": 1.0998948812484741,
"logits/rejected": 1.0781323909759521,
"loss": 1.2607,
"step": 366
},
{
"beta_dpo/beta": 0.07937921583652496,
"beta_dpo/beta_margin_grad_mean": -0.41697001457214355,
"beta_dpo/beta_margin_grad_std": 0.2050413191318512,
"beta_dpo/beta_margin_mean": 0.40257585048675537,
"beta_dpo/beta_margin_std": 1.4262551069259644,
"beta_dpo/beta_used": 0.07937921583652496,
"beta_dpo/beta_used_raw": -0.006307244300842285,
"beta_dpo/gap_mean": 7.531491279602051,
"beta_dpo/gap_std": 13.592772483825684,
"beta_dpo/loss_margin_mean": 5.220627784729004,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5547996976568406,
"grad_norm": 21.611459732055664,
"learning_rate": 2.4735561105299014e-07,
"logits/chosen": 0.9986228942871094,
"logits/rejected": 0.9025825262069702,
"loss": 1.0436,
"step": 367
},
{
"beta_dpo/beta": 0.09011980891227722,
"beta_dpo/beta_margin_grad_mean": -0.408080130815506,
"beta_dpo/beta_margin_grad_std": 0.23834280669689178,
"beta_dpo/beta_margin_mean": 0.6598003506660461,
"beta_dpo/beta_margin_std": 1.8577990531921387,
"beta_dpo/beta_used": 0.09011980891227722,
"beta_dpo/beta_used_raw": 0.06833744049072266,
"beta_dpo/gap_mean": 7.3745951652526855,
"beta_dpo/gap_std": 13.662035942077637,
"beta_dpo/loss_margin_mean": 7.16412353515625,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5563114134542706,
"grad_norm": 28.219196319580078,
"learning_rate": 2.46033509041298e-07,
"logits/chosen": 0.8210684061050415,
"logits/rejected": 0.817505955696106,
"loss": 1.0656,
"step": 368
},
{
"beta_dpo/beta": 0.010743452236056328,
"beta_dpo/beta_margin_grad_mean": -0.4860118627548218,
"beta_dpo/beta_margin_grad_std": 0.04464971646666527,
"beta_dpo/beta_margin_mean": 0.05712589621543884,
"beta_dpo/beta_margin_std": 0.18325534462928772,
"beta_dpo/beta_used": 0.010743452236056328,
"beta_dpo/beta_used_raw": -0.0012179017066955566,
"beta_dpo/gap_mean": 6.9191412925720215,
"beta_dpo/gap_std": 13.665838241577148,
"beta_dpo/loss_margin_mean": 4.59527063369751,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5578231292517006,
"grad_norm": 4.703724384307861,
"learning_rate": 2.447115179808846e-07,
"logits/chosen": 0.9818326234817505,
"logits/rejected": 0.9543051719665527,
"loss": 1.3324,
"step": 369
},
{
"beta_dpo/beta": 0.26102516055107117,
"beta_dpo/beta_margin_grad_mean": -0.2952536344528198,
"beta_dpo/beta_margin_grad_std": 0.3286318778991699,
"beta_dpo/beta_margin_mean": 2.9795680046081543,
"beta_dpo/beta_margin_std": 5.268163204193115,
"beta_dpo/beta_used": 0.26102516055107117,
"beta_dpo/beta_used_raw": 0.26102516055107117,
"beta_dpo/gap_mean": 7.429380416870117,
"beta_dpo/gap_std": 13.954389572143555,
"beta_dpo/loss_margin_mean": 10.275045394897461,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5593348450491308,
"grad_norm": 74.33258056640625,
"learning_rate": 2.4338967485068164e-07,
"logits/chosen": 1.5886646509170532,
"logits/rejected": 1.5320854187011719,
"loss": 0.8804,
"step": 370
},
{
"beta_dpo/beta": 0.09414184093475342,
"beta_dpo/beta_margin_grad_mean": -0.38183629512786865,
"beta_dpo/beta_margin_grad_std": 0.2523384392261505,
"beta_dpo/beta_margin_mean": 0.665998637676239,
"beta_dpo/beta_margin_std": 1.4542341232299805,
"beta_dpo/beta_used": 0.09414184093475342,
"beta_dpo/beta_used_raw": 0.09414184093475342,
"beta_dpo/gap_mean": 7.24754524230957,
"beta_dpo/gap_std": 14.236856460571289,
"beta_dpo/loss_margin_mean": 6.794817924499512,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5608465608465608,
"grad_norm": 21.97515106201172,
"learning_rate": 2.420680166254831e-07,
"logits/chosen": 1.5336153507232666,
"logits/rejected": 1.5053898096084595,
"loss": 1.0365,
"step": 371
},
{
"beta_dpo/beta": 0.10913428664207458,
"beta_dpo/beta_margin_grad_mean": -0.37646549940109253,
"beta_dpo/beta_margin_grad_std": 0.246499165892601,
"beta_dpo/beta_margin_mean": 1.1655247211456299,
"beta_dpo/beta_margin_std": 2.3541650772094727,
"beta_dpo/beta_used": 0.10913428664207458,
"beta_dpo/beta_used_raw": 0.10913428664207458,
"beta_dpo/gap_mean": 7.113511085510254,
"beta_dpo/gap_std": 14.328683853149414,
"beta_dpo/loss_margin_mean": 6.992242813110352,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.562358276643991,
"grad_norm": 46.117122650146484,
"learning_rate": 2.4074658027491044e-07,
"logits/chosen": 1.2923402786254883,
"logits/rejected": 1.1961686611175537,
"loss": 1.2216,
"step": 372
},
{
"beta_dpo/beta": 0.10140194743871689,
"beta_dpo/beta_margin_grad_mean": -0.39248228073120117,
"beta_dpo/beta_margin_grad_std": 0.24916405975818634,
"beta_dpo/beta_margin_mean": 0.6541171669960022,
"beta_dpo/beta_margin_std": 1.4341338872909546,
"beta_dpo/beta_used": 0.10140194743871689,
"beta_dpo/beta_used_raw": 0.10140194743871689,
"beta_dpo/gap_mean": 7.229263782501221,
"beta_dpo/gap_std": 14.314950942993164,
"beta_dpo/loss_margin_mean": 6.411010265350342,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.563869992441421,
"grad_norm": 28.40692901611328,
"learning_rate": 2.394254027623792e-07,
"logits/chosen": 1.1281664371490479,
"logits/rejected": 1.0395562648773193,
"loss": 1.0579,
"step": 373
},
{
"beta_dpo/beta": 0.24720382690429688,
"beta_dpo/beta_margin_grad_mean": -0.2915995121002197,
"beta_dpo/beta_margin_grad_std": 0.3164134621620178,
"beta_dpo/beta_margin_mean": 2.604962110519409,
"beta_dpo/beta_margin_std": 4.2764787673950195,
"beta_dpo/beta_used": 0.24720382690429688,
"beta_dpo/beta_used_raw": 0.24720382690429688,
"beta_dpo/gap_mean": 7.416611194610596,
"beta_dpo/gap_std": 14.550976753234863,
"beta_dpo/loss_margin_mean": 9.614022254943848,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5653817082388511,
"grad_norm": 72.6122817993164,
"learning_rate": 2.381045210440644e-07,
"logits/chosen": 1.1132843494415283,
"logits/rejected": 1.0851848125457764,
"loss": 0.8943,
"step": 374
},
{
"beta_dpo/beta": 0.20675544440746307,
"beta_dpo/beta_margin_grad_mean": -0.33286479115486145,
"beta_dpo/beta_margin_grad_std": 0.3486374318599701,
"beta_dpo/beta_margin_mean": 1.4182250499725342,
"beta_dpo/beta_margin_std": 3.0830986499786377,
"beta_dpo/beta_used": 0.20675544440746307,
"beta_dpo/beta_used_raw": 0.20675544440746307,
"beta_dpo/gap_mean": 7.565443515777588,
"beta_dpo/gap_std": 14.595691680908203,
"beta_dpo/loss_margin_mean": 6.784230709075928,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5668934240362812,
"grad_norm": 52.81209182739258,
"learning_rate": 2.3678397206786715e-07,
"logits/chosen": 1.3927481174468994,
"logits/rejected": 1.3600114583969116,
"loss": 0.8848,
"step": 375
},
{
"beta_dpo/beta": 0.04299633204936981,
"beta_dpo/beta_margin_grad_mean": -0.42016491293907166,
"beta_dpo/beta_margin_grad_std": 0.15737684071063995,
"beta_dpo/beta_margin_mean": 0.37388625741004944,
"beta_dpo/beta_margin_std": 0.7507635354995728,
"beta_dpo/beta_used": 0.04299633204936981,
"beta_dpo/beta_used_raw": 0.04299633204936981,
"beta_dpo/gap_mean": 7.693660259246826,
"beta_dpo/gap_std": 14.870655059814453,
"beta_dpo/loss_margin_mean": 8.864215850830078,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5684051398337112,
"grad_norm": 13.12270736694336,
"learning_rate": 2.3546379277238103e-07,
"logits/chosen": 1.1703935861587524,
"logits/rejected": 1.066407322883606,
"loss": 1.2036,
"step": 376
},
{
"beta_dpo/beta": 0.11132267117500305,
"beta_dpo/beta_margin_grad_mean": -0.3877478241920471,
"beta_dpo/beta_margin_grad_std": 0.22526662051677704,
"beta_dpo/beta_margin_mean": 0.8313254117965698,
"beta_dpo/beta_margin_std": 1.8189681768417358,
"beta_dpo/beta_used": 0.11132267117500305,
"beta_dpo/beta_used_raw": 0.11132267117500305,
"beta_dpo/gap_mean": 7.574334144592285,
"beta_dpo/gap_std": 14.641395568847656,
"beta_dpo/loss_margin_mean": 6.86510705947876,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5699168556311414,
"grad_norm": 28.927581787109375,
"learning_rate": 2.3414402008585886e-07,
"logits/chosen": 1.182497501373291,
"logits/rejected": 1.1518537998199463,
"loss": 1.0515,
"step": 377
},
{
"beta_dpo/beta": 0.07517936825752258,
"beta_dpo/beta_margin_grad_mean": -0.41363537311553955,
"beta_dpo/beta_margin_grad_std": 0.22291339933872223,
"beta_dpo/beta_margin_mean": 0.6230934858322144,
"beta_dpo/beta_margin_std": 1.8685015439987183,
"beta_dpo/beta_used": 0.07517936825752258,
"beta_dpo/beta_used_raw": 0.07517936825752258,
"beta_dpo/gap_mean": 7.3569440841674805,
"beta_dpo/gap_std": 14.392763137817383,
"beta_dpo/loss_margin_mean": 6.59552526473999,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5714285714285714,
"grad_norm": 24.532310485839844,
"learning_rate": 2.3282469092517977e-07,
"logits/chosen": 1.4094161987304688,
"logits/rejected": 1.3490303754806519,
"loss": 1.119,
"step": 378
},
{
"beta_dpo/beta": 0.22540748119354248,
"beta_dpo/beta_margin_grad_mean": -0.3058602511882782,
"beta_dpo/beta_margin_grad_std": 0.3339531719684601,
"beta_dpo/beta_margin_mean": 1.8993895053863525,
"beta_dpo/beta_margin_std": 3.3336374759674072,
"beta_dpo/beta_used": 0.22540748119354248,
"beta_dpo/beta_used_raw": 0.22540748119354248,
"beta_dpo/gap_mean": 7.495121002197266,
"beta_dpo/gap_std": 14.666516304016113,
"beta_dpo/loss_margin_mean": 8.280893325805664,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5729402872260015,
"grad_norm": 60.54188919067383,
"learning_rate": 2.3150584219481643e-07,
"logits/chosen": 1.4404394626617432,
"logits/rejected": 1.3509386777877808,
"loss": 1.0322,
"step": 379
},
{
"beta_dpo/beta": 0.24636900424957275,
"beta_dpo/beta_margin_grad_mean": -0.278732031583786,
"beta_dpo/beta_margin_grad_std": 0.3075398802757263,
"beta_dpo/beta_margin_mean": 2.4282400608062744,
"beta_dpo/beta_margin_std": 4.70290470123291,
"beta_dpo/beta_used": 0.24636900424957275,
"beta_dpo/beta_used_raw": 0.24636900424957275,
"beta_dpo/gap_mean": 7.887577056884766,
"beta_dpo/gap_std": 14.830470085144043,
"beta_dpo/loss_margin_mean": 9.414112091064453,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5744520030234316,
"grad_norm": 58.42963409423828,
"learning_rate": 2.3018751078580283e-07,
"logits/chosen": 1.442448377609253,
"logits/rejected": 1.3922609090805054,
"loss": 0.9649,
"step": 380
},
{
"beta_dpo/beta": 0.1303085833787918,
"beta_dpo/beta_margin_grad_mean": -0.38812023401260376,
"beta_dpo/beta_margin_grad_std": 0.25397399067878723,
"beta_dpo/beta_margin_mean": 1.1557432413101196,
"beta_dpo/beta_margin_std": 3.0310275554656982,
"beta_dpo/beta_used": 0.1303085833787918,
"beta_dpo/beta_used_raw": 0.061051756143569946,
"beta_dpo/gap_mean": 7.441038131713867,
"beta_dpo/gap_std": 14.655248641967773,
"beta_dpo/loss_margin_mean": 5.61381721496582,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5759637188208617,
"grad_norm": 33.84221267700195,
"learning_rate": 2.288697335747027e-07,
"logits/chosen": 1.2152440547943115,
"logits/rejected": 1.1710684299468994,
"loss": 1.1738,
"step": 381
},
{
"beta_dpo/beta": 0.08941483497619629,
"beta_dpo/beta_margin_grad_mean": -0.4051651060581207,
"beta_dpo/beta_margin_grad_std": 0.21740548312664032,
"beta_dpo/beta_margin_mean": 0.7790390849113464,
"beta_dpo/beta_margin_std": 1.8734796047210693,
"beta_dpo/beta_used": 0.08941483497619629,
"beta_dpo/beta_used_raw": 0.08169528841972351,
"beta_dpo/gap_mean": 7.350861549377441,
"beta_dpo/gap_std": 14.560256958007812,
"beta_dpo/loss_margin_mean": 6.928147315979004,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5774754346182918,
"grad_norm": 28.881624221801758,
"learning_rate": 2.2755254742257706e-07,
"logits/chosen": 1.2367597818374634,
"logits/rejected": 1.1454555988311768,
"loss": 1.1139,
"step": 382
},
{
"beta_dpo/beta": 0.09634870290756226,
"beta_dpo/beta_margin_grad_mean": -0.39008820056915283,
"beta_dpo/beta_margin_grad_std": 0.25229260325431824,
"beta_dpo/beta_margin_mean": 0.6619503498077393,
"beta_dpo/beta_margin_std": 1.489913821220398,
"beta_dpo/beta_used": 0.09634870290756226,
"beta_dpo/beta_used_raw": 0.09634870290756226,
"beta_dpo/gap_mean": 7.342663764953613,
"beta_dpo/gap_std": 14.746681213378906,
"beta_dpo/loss_margin_mean": 6.870544910430908,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5789871504157218,
"grad_norm": 25.512887954711914,
"learning_rate": 2.2623598917395436e-07,
"logits/chosen": 1.021744966506958,
"logits/rejected": 1.087134599685669,
"loss": 1.0863,
"step": 383
},
{
"beta_dpo/beta": 0.0626574382185936,
"beta_dpo/beta_margin_grad_mean": -0.3971060514450073,
"beta_dpo/beta_margin_grad_std": 0.1797652393579483,
"beta_dpo/beta_margin_mean": 0.4878333806991577,
"beta_dpo/beta_margin_std": 0.8790075778961182,
"beta_dpo/beta_used": 0.0626574382185936,
"beta_dpo/beta_used_raw": 0.0626574382185936,
"beta_dpo/gap_mean": 7.4141693115234375,
"beta_dpo/gap_std": 14.61959457397461,
"beta_dpo/loss_margin_mean": 7.721750736236572,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5804988662131519,
"grad_norm": 17.06849479675293,
"learning_rate": 2.2492009565579875e-07,
"logits/chosen": 1.3298002481460571,
"logits/rejected": 1.2677011489868164,
"loss": 1.1088,
"step": 384
},
{
"beta_dpo/beta": 0.004450375679880381,
"beta_dpo/beta_margin_grad_mean": -0.489875853061676,
"beta_dpo/beta_margin_grad_std": 0.020829180255532265,
"beta_dpo/beta_margin_mean": 0.04063662141561508,
"beta_dpo/beta_margin_std": 0.08365319669246674,
"beta_dpo/beta_used": 0.004450375679880381,
"beta_dpo/beta_used_raw": -0.018079757690429688,
"beta_dpo/gap_mean": 7.440016269683838,
"beta_dpo/gap_std": 14.209115028381348,
"beta_dpo/loss_margin_mean": 8.146535873413086,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.582010582010582,
"grad_norm": 1.7125223875045776,
"learning_rate": 2.2360490367648084e-07,
"logits/chosen": 1.1286897659301758,
"logits/rejected": 1.0630574226379395,
"loss": 1.3614,
"step": 385
},
{
"beta_dpo/beta": 0.04179831221699715,
"beta_dpo/beta_margin_grad_mean": -0.4530133903026581,
"beta_dpo/beta_margin_grad_std": 0.12521444261074066,
"beta_dpo/beta_margin_mean": 0.19750449061393738,
"beta_dpo/beta_margin_std": 0.5692038536071777,
"beta_dpo/beta_used": 0.04179831221699715,
"beta_dpo/beta_used_raw": 0.04179831221699715,
"beta_dpo/gap_mean": 7.274169921875,
"beta_dpo/gap_std": 14.249858856201172,
"beta_dpo/loss_margin_mean": 5.243067741394043,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5835222978080121,
"grad_norm": 11.668978691101074,
"learning_rate": 2.2229045002474724e-07,
"logits/chosen": 1.163759708404541,
"logits/rejected": 1.086532473564148,
"loss": 1.1707,
"step": 386
},
{
"beta_dpo/beta": 0.10992548614740372,
"beta_dpo/beta_margin_grad_mean": -0.35167068243026733,
"beta_dpo/beta_margin_grad_std": 0.24510644376277924,
"beta_dpo/beta_margin_mean": 1.0464832782745361,
"beta_dpo/beta_margin_std": 1.9593183994293213,
"beta_dpo/beta_used": 0.10992548614740372,
"beta_dpo/beta_used_raw": 0.10992548614740372,
"beta_dpo/gap_mean": 7.317193984985352,
"beta_dpo/gap_std": 14.160797119140625,
"beta_dpo/loss_margin_mean": 8.948436737060547,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5850340136054422,
"grad_norm": 24.931472778320312,
"learning_rate": 2.209767714686924e-07,
"logits/chosen": 1.3423492908477783,
"logits/rejected": 1.2523207664489746,
"loss": 1.0216,
"step": 387
},
{
"beta_dpo/beta": 0.08252957463264465,
"beta_dpo/beta_margin_grad_mean": -0.4158702790737152,
"beta_dpo/beta_margin_grad_std": 0.2159751057624817,
"beta_dpo/beta_margin_mean": 0.6033073663711548,
"beta_dpo/beta_margin_std": 1.501455307006836,
"beta_dpo/beta_used": 0.08252957463264465,
"beta_dpo/beta_used_raw": 0.0030330345034599304,
"beta_dpo/gap_mean": 7.176567077636719,
"beta_dpo/gap_std": 13.799575805664062,
"beta_dpo/loss_margin_mean": 4.720012187957764,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5865457294028723,
"grad_norm": 31.726829528808594,
"learning_rate": 2.1966390475472954e-07,
"logits/chosen": 1.3642137050628662,
"logits/rejected": 1.3656563758850098,
"loss": 1.1469,
"step": 388
},
{
"beta_dpo/beta": 0.10920540988445282,
"beta_dpo/beta_margin_grad_mean": -0.3846765160560608,
"beta_dpo/beta_margin_grad_std": 0.2381884902715683,
"beta_dpo/beta_margin_mean": 0.8563576936721802,
"beta_dpo/beta_margin_std": 1.8833948373794556,
"beta_dpo/beta_used": 0.10920540988445282,
"beta_dpo/beta_used_raw": 0.10920540988445282,
"beta_dpo/gap_mean": 6.994778633117676,
"beta_dpo/gap_std": 13.427654266357422,
"beta_dpo/loss_margin_mean": 7.5880303382873535,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5880574452003023,
"grad_norm": 35.045440673828125,
"learning_rate": 2.1835188660656265e-07,
"logits/chosen": 1.0244269371032715,
"logits/rejected": 1.0011541843414307,
"loss": 1.1488,
"step": 389
},
{
"beta_dpo/beta": 0.033905960619449615,
"beta_dpo/beta_margin_grad_mean": -0.43266651034355164,
"beta_dpo/beta_margin_grad_std": 0.10585252940654755,
"beta_dpo/beta_margin_mean": 0.28706708550453186,
"beta_dpo/beta_margin_std": 0.4559902548789978,
"beta_dpo/beta_used": 0.033905960619449615,
"beta_dpo/beta_used_raw": 0.033905960619449615,
"beta_dpo/gap_mean": 7.247902870178223,
"beta_dpo/gap_std": 13.372995376586914,
"beta_dpo/loss_margin_mean": 8.466453552246094,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5895691609977324,
"grad_norm": 7.7616095542907715,
"learning_rate": 2.170407537241599e-07,
"logits/chosen": 1.4455749988555908,
"logits/rejected": 1.369256615638733,
"loss": 1.217,
"step": 390
},
{
"beta_dpo/beta": 0.22762346267700195,
"beta_dpo/beta_margin_grad_mean": -0.2824380099773407,
"beta_dpo/beta_margin_grad_std": 0.321077436208725,
"beta_dpo/beta_margin_mean": 2.477963447570801,
"beta_dpo/beta_margin_std": 3.847912073135376,
"beta_dpo/beta_used": 0.22762346267700195,
"beta_dpo/beta_used_raw": 0.22762346267700195,
"beta_dpo/gap_mean": 7.8365912437438965,
"beta_dpo/gap_std": 13.6998872756958,
"beta_dpo/loss_margin_mean": 11.036505699157715,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5910808767951625,
"grad_norm": 79.25921630859375,
"learning_rate": 2.1573054278272636e-07,
"logits/chosen": 0.9788742065429688,
"logits/rejected": 0.9468849301338196,
"loss": 0.943,
"step": 391
},
{
"beta_dpo/beta": 0.18245580792427063,
"beta_dpo/beta_margin_grad_mean": -0.27842310070991516,
"beta_dpo/beta_margin_grad_std": 0.2902601659297943,
"beta_dpo/beta_margin_mean": 2.0409109592437744,
"beta_dpo/beta_margin_std": 2.8519318103790283,
"beta_dpo/beta_used": 0.18245580792427063,
"beta_dpo/beta_used_raw": 0.18245580792427063,
"beta_dpo/gap_mean": 8.379539489746094,
"beta_dpo/gap_std": 13.949304580688477,
"beta_dpo/loss_margin_mean": 10.906364440917969,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5925925925925926,
"grad_norm": 38.86481475830078,
"learning_rate": 2.1442129043167873e-07,
"logits/chosen": 1.164830207824707,
"logits/rejected": 1.159719705581665,
"loss": 0.8137,
"step": 392
},
{
"beta_dpo/beta": 0.0202580988407135,
"beta_dpo/beta_margin_grad_mean": -0.4727727770805359,
"beta_dpo/beta_margin_grad_std": 0.07772287726402283,
"beta_dpo/beta_margin_mean": 0.11153332889080048,
"beta_dpo/beta_margin_std": 0.3320378363132477,
"beta_dpo/beta_used": 0.0202580988407135,
"beta_dpo/beta_used_raw": 0.016610777005553246,
"beta_dpo/gap_mean": 8.489538192749023,
"beta_dpo/gap_std": 13.79062271118164,
"beta_dpo/loss_margin_mean": 7.332988262176514,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5941043083900227,
"grad_norm": 7.228002548217773,
"learning_rate": 2.131130332936195e-07,
"logits/chosen": 0.9965734481811523,
"logits/rejected": 0.9394361972808838,
"loss": 1.2643,
"step": 393
},
{
"beta_dpo/beta": 0.10417785495519638,
"beta_dpo/beta_margin_grad_mean": -0.3671894669532776,
"beta_dpo/beta_margin_grad_std": 0.23936332762241364,
"beta_dpo/beta_margin_mean": 0.9281326532363892,
"beta_dpo/beta_margin_std": 1.7436189651489258,
"beta_dpo/beta_used": 0.10417785495519638,
"beta_dpo/beta_used_raw": 0.10417785495519638,
"beta_dpo/gap_mean": 8.309196472167969,
"beta_dpo/gap_std": 13.411027908325195,
"beta_dpo/loss_margin_mean": 8.375308990478516,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5956160241874527,
"grad_norm": 22.398984909057617,
"learning_rate": 2.1180580796331323e-07,
"logits/chosen": 1.3453733921051025,
"logits/rejected": 1.2923827171325684,
"loss": 0.9597,
"step": 394
},
{
"beta_dpo/beta": 0.08681511133909225,
"beta_dpo/beta_margin_grad_mean": -0.40546634793281555,
"beta_dpo/beta_margin_grad_std": 0.2304772436618805,
"beta_dpo/beta_margin_mean": 0.7605048418045044,
"beta_dpo/beta_margin_std": 1.90763258934021,
"beta_dpo/beta_used": 0.08681511133909225,
"beta_dpo/beta_used_raw": -0.011999711394309998,
"beta_dpo/gap_mean": 7.920681476593018,
"beta_dpo/gap_std": 13.151678085327148,
"beta_dpo/loss_margin_mean": 6.229694843292236,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5971277399848829,
"grad_norm": 18.830989837646484,
"learning_rate": 2.104996510066625e-07,
"logits/chosen": 1.1814665794372559,
"logits/rejected": 1.0725936889648438,
"loss": 1.054,
"step": 395
},
{
"beta_dpo/beta": 0.0445711687207222,
"beta_dpo/beta_margin_grad_mean": -0.41445091366767883,
"beta_dpo/beta_margin_grad_std": 0.12426353991031647,
"beta_dpo/beta_margin_mean": 0.392553448677063,
"beta_dpo/beta_margin_std": 0.6115043759346008,
"beta_dpo/beta_used": 0.0445711687207222,
"beta_dpo/beta_used_raw": 0.0445711687207222,
"beta_dpo/gap_mean": 8.018841743469238,
"beta_dpo/gap_std": 12.900693893432617,
"beta_dpo/loss_margin_mean": 7.28317928314209,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5986394557823129,
"grad_norm": 17.15723419189453,
"learning_rate": 2.0919459895968517e-07,
"logits/chosen": 1.4864282608032227,
"logits/rejected": 1.3753361701965332,
"loss": 1.1277,
"step": 396
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4987220764160156,
"beta_dpo/beta_margin_grad_std": 0.0034480225294828415,
"beta_dpo/beta_margin_mean": 0.0051118070259690285,
"beta_dpo/beta_margin_std": 0.013792959041893482,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.0704718828201294,
"beta_dpo/gap_mean": 7.361849784851074,
"beta_dpo/gap_std": 12.713207244873047,
"beta_dpo/loss_margin_mean": 5.111806869506836,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.600151171579743,
"grad_norm": 0.3319074213504791,
"learning_rate": 2.078906883274924e-07,
"logits/chosen": 1.247459888458252,
"logits/rejected": 1.2278985977172852,
"loss": 1.3818,
"step": 397
},
{
"beta_dpo/beta": 0.12302777916193008,
"beta_dpo/beta_margin_grad_mean": -0.38068652153015137,
"beta_dpo/beta_margin_grad_std": 0.24687737226486206,
"beta_dpo/beta_margin_mean": 1.1479579210281372,
"beta_dpo/beta_margin_std": 2.429410219192505,
"beta_dpo/beta_used": 0.12302777916193008,
"beta_dpo/beta_used_raw": 0.12302777916193008,
"beta_dpo/gap_mean": 7.570580005645752,
"beta_dpo/gap_std": 12.983465194702148,
"beta_dpo/loss_margin_mean": 9.056266784667969,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6016628873771731,
"grad_norm": 28.814918518066406,
"learning_rate": 2.065879555832674e-07,
"logits/chosen": 1.1736929416656494,
"logits/rejected": 1.0705779790878296,
"loss": 1.0416,
"step": 398
},
{
"beta_dpo/beta": 0.1019376814365387,
"beta_dpo/beta_margin_grad_mean": -0.3764630854129791,
"beta_dpo/beta_margin_grad_std": 0.25056028366088867,
"beta_dpo/beta_margin_mean": 1.1306095123291016,
"beta_dpo/beta_margin_std": 2.293529748916626,
"beta_dpo/beta_used": 0.1019376814365387,
"beta_dpo/beta_used_raw": 0.022106006741523743,
"beta_dpo/gap_mean": 7.877155303955078,
"beta_dpo/gap_std": 12.939423561096191,
"beta_dpo/loss_margin_mean": 8.506237983703613,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6031746031746031,
"grad_norm": 34.429656982421875,
"learning_rate": 2.052864371672457e-07,
"logits/chosen": 1.2226908206939697,
"logits/rejected": 1.0775423049926758,
"loss": 1.1386,
"step": 399
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49878740310668945,
"beta_dpo/beta_margin_grad_std": 0.002995472401380539,
"beta_dpo/beta_margin_mean": 0.004850634373724461,
"beta_dpo/beta_margin_std": 0.011982507072389126,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.10486699640750885,
"beta_dpo/gap_mean": 7.399901390075684,
"beta_dpo/gap_std": 12.670374870300293,
"beta_dpo/loss_margin_mean": 4.8506340980529785,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6046863189720333,
"grad_norm": 0.3414646089076996,
"learning_rate": 2.0398616948569493e-07,
"logits/chosen": 1.4321517944335938,
"logits/rejected": 1.3374395370483398,
"loss": 1.3823,
"step": 400
},
{
"epoch": 0.6046863189720333,
"eval_beta_dpo/beta": 0.10191422700881958,
"eval_beta_dpo/beta_margin_grad_mean": -0.38811933994293213,
"eval_beta_dpo/beta_margin_grad_std": 0.17382743954658508,
"eval_beta_dpo/beta_margin_mean": 0.8735393285751343,
"eval_beta_dpo/beta_margin_std": 1.3551836013793945,
"eval_beta_dpo/beta_used": 0.10191422700881958,
"eval_beta_dpo/beta_used_raw": 0.08637029677629471,
"eval_beta_dpo/gap_mean": 7.206363201141357,
"eval_beta_dpo/gap_std": 12.652938842773438,
"eval_beta_dpo/loss_margin_mean": 6.979201793670654,
"eval_beta_dpo/mask_keep_frac": 1.0,
"eval_logits/chosen": 1.2964799404144287,
"eval_logits/rejected": 1.2259721755981445,
"eval_loss": 0.5950996279716492,
"eval_runtime": 43.5338,
"eval_samples_per_second": 52.901,
"eval_steps_per_second": 1.654,
"step": 400
},
{
"beta_dpo/beta": 0.22263628244400024,
"beta_dpo/beta_margin_grad_mean": -0.23202191293239594,
"beta_dpo/beta_margin_grad_std": 0.28018221259117126,
"beta_dpo/beta_margin_mean": 2.3617444038391113,
"beta_dpo/beta_margin_std": 2.6966373920440674,
"beta_dpo/beta_used": 0.22263628244400024,
"beta_dpo/beta_used_raw": 0.22263628244400024,
"beta_dpo/gap_mean": 7.699178695678711,
"beta_dpo/gap_std": 12.536005973815918,
"beta_dpo/loss_margin_mean": 10.631646156311035,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6061980347694633,
"grad_norm": 38.35382080078125,
"learning_rate": 2.0268718890989752e-07,
"logits/chosen": 1.3760192394256592,
"logits/rejected": 1.285376787185669,
"loss": 0.7726,
"step": 401
},
{
"beta_dpo/beta": 0.10150312632322311,
"beta_dpo/beta_margin_grad_mean": -0.3532201647758484,
"beta_dpo/beta_margin_grad_std": 0.22857801616191864,
"beta_dpo/beta_margin_mean": 0.8699325323104858,
"beta_dpo/beta_margin_std": 1.4455386400222778,
"beta_dpo/beta_used": 0.10150312632322311,
"beta_dpo/beta_used_raw": 0.10150312632322311,
"beta_dpo/gap_mean": 7.951926231384277,
"beta_dpo/gap_std": 12.835227012634277,
"beta_dpo/loss_margin_mean": 8.36948013305664,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6077097505668935,
"grad_norm": 26.841323852539062,
"learning_rate": 2.013895317751323e-07,
"logits/chosen": 1.2534160614013672,
"logits/rejected": 1.2528060674667358,
"loss": 0.9371,
"step": 402
},
{
"beta_dpo/beta": 0.07242181897163391,
"beta_dpo/beta_margin_grad_mean": -0.3970552384853363,
"beta_dpo/beta_margin_grad_std": 0.21366021037101746,
"beta_dpo/beta_margin_mean": 0.7245399951934814,
"beta_dpo/beta_margin_std": 1.604691743850708,
"beta_dpo/beta_used": 0.07242181897163391,
"beta_dpo/beta_used_raw": 0.0063858553767204285,
"beta_dpo/gap_mean": 7.986859321594238,
"beta_dpo/gap_std": 13.46160888671875,
"beta_dpo/loss_margin_mean": 8.63148021697998,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6092214663643235,
"grad_norm": 19.193462371826172,
"learning_rate": 2.0009323437965898e-07,
"logits/chosen": 1.312724232673645,
"logits/rejected": 1.2132437229156494,
"loss": 1.1025,
"step": 403
},
{
"beta_dpo/beta": 0.18393541872501373,
"beta_dpo/beta_margin_grad_mean": -0.30706873536109924,
"beta_dpo/beta_margin_grad_std": 0.27825888991355896,
"beta_dpo/beta_margin_mean": 1.7985137701034546,
"beta_dpo/beta_margin_std": 2.601863145828247,
"beta_dpo/beta_used": 0.18393541872501373,
"beta_dpo/beta_used_raw": 0.18393541872501373,
"beta_dpo/gap_mean": 8.376396179199219,
"beta_dpo/gap_std": 13.487390518188477,
"beta_dpo/loss_margin_mean": 9.271535873413086,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6107331821617535,
"grad_norm": 37.4723014831543,
"learning_rate": 1.9879833298370237e-07,
"logits/chosen": 1.2238993644714355,
"logits/rejected": 1.1487646102905273,
"loss": 0.825,
"step": 404
},
{
"beta_dpo/beta": 0.06863485276699066,
"beta_dpo/beta_margin_grad_mean": -0.383706271648407,
"beta_dpo/beta_margin_grad_std": 0.1839965283870697,
"beta_dpo/beta_margin_mean": 0.762927234172821,
"beta_dpo/beta_margin_std": 1.382511019706726,
"beta_dpo/beta_used": 0.06863485276699066,
"beta_dpo/beta_used_raw": -0.03406190127134323,
"beta_dpo/gap_mean": 8.272315979003906,
"beta_dpo/gap_std": 13.200721740722656,
"beta_dpo/loss_margin_mean": 6.787307262420654,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6122448979591837,
"grad_norm": 18.98505401611328,
"learning_rate": 1.975048638084379e-07,
"logits/chosen": 1.0490002632141113,
"logits/rejected": 1.010096788406372,
"loss": 1.0575,
"step": 405
},
{
"beta_dpo/beta": 0.2777172923088074,
"beta_dpo/beta_margin_grad_mean": -0.2499067783355713,
"beta_dpo/beta_margin_grad_std": 0.3056153953075409,
"beta_dpo/beta_margin_mean": 2.591289758682251,
"beta_dpo/beta_margin_std": 4.360300064086914,
"beta_dpo/beta_used": 0.2777172923088074,
"beta_dpo/beta_used_raw": 0.2777172923088074,
"beta_dpo/gap_mean": 8.169546127319336,
"beta_dpo/gap_std": 13.291061401367188,
"beta_dpo/loss_margin_mean": 9.693116188049316,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6137566137566137,
"grad_norm": 27.147951126098633,
"learning_rate": 1.9621286303497914e-07,
"logits/chosen": 1.38291335105896,
"logits/rejected": 1.1928125619888306,
"loss": 0.4666,
"step": 406
},
{
"beta_dpo/beta": 0.09770042449235916,
"beta_dpo/beta_margin_grad_mean": -0.3965763449668884,
"beta_dpo/beta_margin_grad_std": 0.2294262796640396,
"beta_dpo/beta_margin_mean": 0.648348331451416,
"beta_dpo/beta_margin_std": 1.6382852792739868,
"beta_dpo/beta_used": 0.09770042449235916,
"beta_dpo/beta_used_raw": 0.09770042449235916,
"beta_dpo/gap_mean": 7.964360237121582,
"beta_dpo/gap_std": 13.302556991577148,
"beta_dpo/loss_margin_mean": 6.1560211181640625,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6152683295540439,
"grad_norm": 26.721057891845703,
"learning_rate": 1.9492236680336483e-07,
"logits/chosen": 1.1335489749908447,
"logits/rejected": 0.9993535280227661,
"loss": 0.9793,
"step": 407
},
{
"beta_dpo/beta": 0.03132964298129082,
"beta_dpo/beta_margin_grad_mean": -0.42250344157218933,
"beta_dpo/beta_margin_grad_std": 0.13585536181926727,
"beta_dpo/beta_margin_mean": 0.3733391761779785,
"beta_dpo/beta_margin_std": 0.6798368096351624,
"beta_dpo/beta_used": 0.03132964298129082,
"beta_dpo/beta_used_raw": -0.010662967339158058,
"beta_dpo/gap_mean": 8.06380844116211,
"beta_dpo/gap_std": 13.13924503326416,
"beta_dpo/loss_margin_mean": 9.731854438781738,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6167800453514739,
"grad_norm": 9.80321216583252,
"learning_rate": 1.9363341121154895e-07,
"logits/chosen": 1.1080222129821777,
"logits/rejected": 0.993887186050415,
"loss": 1.2241,
"step": 408
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49895596504211426,
"beta_dpo/beta_margin_grad_std": 0.0032634278759360313,
"beta_dpo/beta_margin_mean": 0.0041763512417674065,
"beta_dpo/beta_margin_std": 0.013054397888481617,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.13152112066745758,
"beta_dpo/gap_mean": 7.688338756561279,
"beta_dpo/gap_std": 13.180634498596191,
"beta_dpo/loss_margin_mean": 4.1763505935668945,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.618291761148904,
"grad_norm": 0.39368870854377747,
"learning_rate": 1.9234603231438994e-07,
"logits/chosen": 1.1868549585342407,
"logits/rejected": 1.1779615879058838,
"loss": 1.3825,
"step": 409
},
{
"beta_dpo/beta": 0.06593397259712219,
"beta_dpo/beta_margin_grad_mean": -0.3910011649131775,
"beta_dpo/beta_margin_grad_std": 0.18957333266735077,
"beta_dpo/beta_margin_mean": 0.7101063132286072,
"beta_dpo/beta_margin_std": 1.3572521209716797,
"beta_dpo/beta_used": 0.06593397259712219,
"beta_dpo/beta_used_raw": 0.02093401923775673,
"beta_dpo/gap_mean": 7.732787132263184,
"beta_dpo/gap_std": 13.139517784118652,
"beta_dpo/loss_margin_mean": 8.426246643066406,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6198034769463341,
"grad_norm": 23.81303596496582,
"learning_rate": 1.9106026612264315e-07,
"logits/chosen": 0.9190754890441895,
"logits/rejected": 0.8898967504501343,
"loss": 1.1263,
"step": 410
},
{
"beta_dpo/beta": 0.09488093107938766,
"beta_dpo/beta_margin_grad_mean": -0.38552436232566833,
"beta_dpo/beta_margin_grad_std": 0.23449452221393585,
"beta_dpo/beta_margin_mean": 0.7124552130699158,
"beta_dpo/beta_margin_std": 1.548302412033081,
"beta_dpo/beta_used": 0.09488093107938766,
"beta_dpo/beta_used_raw": 0.09488093107938766,
"beta_dpo/gap_mean": 7.6596527099609375,
"beta_dpo/gap_std": 13.327152252197266,
"beta_dpo/loss_margin_mean": 8.116888046264648,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6213151927437641,
"grad_norm": 26.718870162963867,
"learning_rate": 1.8977614860195296e-07,
"logits/chosen": 1.1888914108276367,
"logits/rejected": 1.1344188451766968,
"loss": 1.0957,
"step": 411
},
{
"beta_dpo/beta": 0.0628020390868187,
"beta_dpo/beta_margin_grad_mean": -0.415872186422348,
"beta_dpo/beta_margin_grad_std": 0.18325024843215942,
"beta_dpo/beta_margin_mean": 0.4076176881790161,
"beta_dpo/beta_margin_std": 0.8952716588973999,
"beta_dpo/beta_used": 0.0628020390868187,
"beta_dpo/beta_used_raw": 0.0628020390868187,
"beta_dpo/gap_mean": 7.711529731750488,
"beta_dpo/gap_std": 13.572291374206543,
"beta_dpo/loss_margin_mean": 6.901672840118408,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6228269085411943,
"grad_norm": 16.974454879760742,
"learning_rate": 1.8849371567184662e-07,
"logits/chosen": 1.0629103183746338,
"logits/rejected": 1.0026750564575195,
"loss": 1.0983,
"step": 412
},
{
"beta_dpo/beta": 0.02931269444525242,
"beta_dpo/beta_margin_grad_mean": -0.46073606610298157,
"beta_dpo/beta_margin_grad_std": 0.13029100000858307,
"beta_dpo/beta_margin_mean": 0.1788182109594345,
"beta_dpo/beta_margin_std": 0.6013691425323486,
"beta_dpo/beta_used": 0.02931269444525242,
"beta_dpo/beta_used_raw": -0.01891515776515007,
"beta_dpo/gap_mean": 7.34831428527832,
"beta_dpo/gap_std": 13.715509414672852,
"beta_dpo/loss_margin_mean": 6.074856281280518,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6243386243386243,
"grad_norm": 10.13986873626709,
"learning_rate": 1.872130032047302e-07,
"logits/chosen": 1.0452518463134766,
"logits/rejected": 0.9700994491577148,
"loss": 1.2518,
"step": 413
},
{
"beta_dpo/beta": 0.008443448692560196,
"beta_dpo/beta_margin_grad_mean": -0.4799124598503113,
"beta_dpo/beta_margin_grad_std": 0.04836396127939224,
"beta_dpo/beta_margin_mean": 0.08206567168235779,
"beta_dpo/beta_margin_std": 0.19825638830661774,
"beta_dpo/beta_used": 0.008443448692560196,
"beta_dpo/beta_used_raw": -0.0005194572731852531,
"beta_dpo/gap_mean": 7.575908184051514,
"beta_dpo/gap_std": 13.915918350219727,
"beta_dpo/loss_margin_mean": 9.079741477966309,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6258503401360545,
"grad_norm": 2.957254648208618,
"learning_rate": 1.8593404702488436e-07,
"logits/chosen": 1.1927499771118164,
"logits/rejected": 1.1158475875854492,
"loss": 1.3398,
"step": 414
},
{
"beta_dpo/beta": 0.153883695602417,
"beta_dpo/beta_margin_grad_mean": -0.3574841320514679,
"beta_dpo/beta_margin_grad_std": 0.30664384365081787,
"beta_dpo/beta_margin_mean": 1.2643290758132935,
"beta_dpo/beta_margin_std": 2.7988193035125732,
"beta_dpo/beta_used": 0.153883695602417,
"beta_dpo/beta_used_raw": 0.153883695602417,
"beta_dpo/gap_mean": 7.751776695251465,
"beta_dpo/gap_std": 14.118085861206055,
"beta_dpo/loss_margin_mean": 8.410996437072754,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6273620559334845,
"grad_norm": 49.67338180541992,
"learning_rate": 1.846568829074628e-07,
"logits/chosen": 1.3675744533538818,
"logits/rejected": 1.315309762954712,
"loss": 1.0424,
"step": 415
},
{
"beta_dpo/beta": 0.057433560490608215,
"beta_dpo/beta_margin_grad_mean": -0.4372212290763855,
"beta_dpo/beta_margin_grad_std": 0.18154680728912354,
"beta_dpo/beta_margin_mean": 0.38836348056793213,
"beta_dpo/beta_margin_std": 1.0910459756851196,
"beta_dpo/beta_used": 0.057433560490608215,
"beta_dpo/beta_used_raw": 0.006889820098876953,
"beta_dpo/gap_mean": 7.063965797424316,
"beta_dpo/gap_std": 13.99463939666748,
"beta_dpo/loss_margin_mean": 3.8573639392852783,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6288737717309146,
"grad_norm": 19.25937271118164,
"learning_rate": 1.8338154657749128e-07,
"logits/chosen": 1.1650896072387695,
"logits/rejected": 1.077005386352539,
"loss": 1.227,
"step": 416
},
{
"beta_dpo/beta": 0.16283152997493744,
"beta_dpo/beta_margin_grad_mean": -0.29045432806015015,
"beta_dpo/beta_margin_grad_std": 0.2815614640712738,
"beta_dpo/beta_margin_mean": 1.6562851667404175,
"beta_dpo/beta_margin_std": 2.3946468830108643,
"beta_dpo/beta_used": 0.16283152997493744,
"beta_dpo/beta_used_raw": 0.16283152997493744,
"beta_dpo/gap_mean": 7.376601219177246,
"beta_dpo/gap_std": 14.058598518371582,
"beta_dpo/loss_margin_mean": 9.845354080200195,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6303854875283447,
"grad_norm": 47.979278564453125,
"learning_rate": 1.8210807370886849e-07,
"logits/chosen": 1.4944508075714111,
"logits/rejected": 1.4134340286254883,
"loss": 0.9838,
"step": 417
},
{
"beta_dpo/beta": 0.0036787008866667747,
"beta_dpo/beta_margin_grad_mean": -0.4948444366455078,
"beta_dpo/beta_margin_grad_std": 0.016248730942606926,
"beta_dpo/beta_margin_mean": 0.020664792507886887,
"beta_dpo/beta_margin_std": 0.06512568145990372,
"beta_dpo/beta_used": 0.0036787008866667747,
"beta_dpo/beta_used_raw": -0.04231725633144379,
"beta_dpo/gap_mean": 7.137986183166504,
"beta_dpo/gap_std": 14.092864036560059,
"beta_dpo/loss_margin_mean": 3.5258655548095703,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6318972033257747,
"grad_norm": 1.5100537538528442,
"learning_rate": 1.8083649992336825e-07,
"logits/chosen": 1.3490641117095947,
"logits/rejected": 1.325473666191101,
"loss": 1.3665,
"step": 418
},
{
"beta_dpo/beta": 0.1923871785402298,
"beta_dpo/beta_margin_grad_mean": -0.2901119589805603,
"beta_dpo/beta_margin_grad_std": 0.25045278668403625,
"beta_dpo/beta_margin_mean": 2.2992641925811768,
"beta_dpo/beta_margin_std": 3.499342679977417,
"beta_dpo/beta_used": 0.1923871785402298,
"beta_dpo/beta_used_raw": 0.1923871785402298,
"beta_dpo/gap_mean": 7.463541507720947,
"beta_dpo/gap_std": 14.002967834472656,
"beta_dpo/loss_margin_mean": 10.991048812866211,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6334089191232048,
"grad_norm": 33.13517761230469,
"learning_rate": 1.7956686078964255e-07,
"logits/chosen": 1.0279557704925537,
"logits/rejected": 0.9559741020202637,
"loss": 0.8391,
"step": 419
},
{
"beta_dpo/beta": 0.06206429749727249,
"beta_dpo/beta_margin_grad_mean": -0.4446491003036499,
"beta_dpo/beta_margin_grad_std": 0.2273784577846527,
"beta_dpo/beta_margin_mean": 0.3325929343700409,
"beta_dpo/beta_margin_std": 1.5433344841003418,
"beta_dpo/beta_used": 0.06206429749727249,
"beta_dpo/beta_used_raw": 0.0054812245070934296,
"beta_dpo/gap_mean": 7.510132789611816,
"beta_dpo/gap_std": 14.362464904785156,
"beta_dpo/loss_margin_mean": 6.678454399108887,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6349206349206349,
"grad_norm": 20.000598907470703,
"learning_rate": 1.782991918222275e-07,
"logits/chosen": 1.1560618877410889,
"logits/rejected": 1.063835620880127,
"loss": 1.2286,
"step": 420
},
{
"beta_dpo/beta": 0.2239060401916504,
"beta_dpo/beta_margin_grad_mean": -0.3560366630554199,
"beta_dpo/beta_margin_grad_std": 0.3599107563495636,
"beta_dpo/beta_margin_mean": 1.437946081161499,
"beta_dpo/beta_margin_std": 3.8129684925079346,
"beta_dpo/beta_used": 0.2239060401916504,
"beta_dpo/beta_used_raw": 0.2239060401916504,
"beta_dpo/gap_mean": 7.295876502990723,
"beta_dpo/gap_std": 14.796440124511719,
"beta_dpo/loss_margin_mean": 6.508144378662109,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.636432350718065,
"grad_norm": 64.45843505859375,
"learning_rate": 1.7703352848054887e-07,
"logits/chosen": 1.0824806690216064,
"logits/rejected": 0.9518294334411621,
"loss": 1.1997,
"step": 421
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49806472659111023,
"beta_dpo/beta_margin_grad_std": 0.00356177962385118,
"beta_dpo/beta_margin_mean": 0.0077416617423295975,
"beta_dpo/beta_margin_std": 0.014248386025428772,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.10803447663784027,
"beta_dpo/gap_mean": 7.264566898345947,
"beta_dpo/gap_std": 14.936932563781738,
"beta_dpo/loss_margin_mean": 7.741661071777344,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6379440665154951,
"grad_norm": 0.3464619219303131,
"learning_rate": 1.7576990616793137e-07,
"logits/chosen": 1.0727565288543701,
"logits/rejected": 1.0302057266235352,
"loss": 1.3825,
"step": 422
},
{
"beta_dpo/beta": 0.15241782367229462,
"beta_dpo/beta_margin_grad_mean": -0.32938140630722046,
"beta_dpo/beta_margin_grad_std": 0.2497241348028183,
"beta_dpo/beta_margin_mean": 1.409651279449463,
"beta_dpo/beta_margin_std": 2.4026806354522705,
"beta_dpo/beta_used": 0.15241782367229462,
"beta_dpo/beta_used_raw": 0.15241782367229462,
"beta_dpo/gap_mean": 7.592950344085693,
"beta_dpo/gap_std": 14.759190559387207,
"beta_dpo/loss_margin_mean": 8.833235740661621,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6394557823129252,
"grad_norm": 33.850650787353516,
"learning_rate": 1.745083602306071e-07,
"logits/chosen": 1.3902552127838135,
"logits/rejected": 1.2815285921096802,
"loss": 0.868,
"step": 423
},
{
"beta_dpo/beta": 0.10763978958129883,
"beta_dpo/beta_margin_grad_mean": -0.3627639710903168,
"beta_dpo/beta_margin_grad_std": 0.23971767723560333,
"beta_dpo/beta_margin_mean": 0.8668607473373413,
"beta_dpo/beta_margin_std": 1.660807728767395,
"beta_dpo/beta_used": 0.10763978958129883,
"beta_dpo/beta_used_raw": 0.10763978958129883,
"beta_dpo/gap_mean": 7.692775726318359,
"beta_dpo/gap_std": 14.601524353027344,
"beta_dpo/loss_margin_mean": 8.42215633392334,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6409674981103552,
"grad_norm": 24.332290649414062,
"learning_rate": 1.7324892595672804e-07,
"logits/chosen": 1.3804824352264404,
"logits/rejected": 1.353201150894165,
"loss": 0.9775,
"step": 424
},
{
"beta_dpo/beta": 0.22863678634166718,
"beta_dpo/beta_margin_grad_mean": -0.2999263405799866,
"beta_dpo/beta_margin_grad_std": 0.3022958040237427,
"beta_dpo/beta_margin_mean": 2.481389045715332,
"beta_dpo/beta_margin_std": 3.8900444507598877,
"beta_dpo/beta_used": 0.22863678634166718,
"beta_dpo/beta_used_raw": 0.22863678634166718,
"beta_dpo/gap_mean": 8.06348705291748,
"beta_dpo/gap_std": 14.56132698059082,
"beta_dpo/loss_margin_mean": 10.167325019836426,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6424792139077853,
"grad_norm": 64.42094421386719,
"learning_rate": 1.7199163857537824e-07,
"logits/chosen": 1.2863309383392334,
"logits/rejected": 1.2393105030059814,
"loss": 1.0282,
"step": 425
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4992324113845825,
"beta_dpo/beta_margin_grad_std": 0.004430633503943682,
"beta_dpo/beta_margin_mean": 0.0030706829857081175,
"beta_dpo/beta_margin_std": 0.01772434450685978,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.1351933777332306,
"beta_dpo/gap_mean": 7.544498443603516,
"beta_dpo/gap_std": 14.976876258850098,
"beta_dpo/loss_margin_mean": 3.0706827640533447,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6439909297052154,
"grad_norm": 0.3542765974998474,
"learning_rate": 1.7073653325558828e-07,
"logits/chosen": 1.4472196102142334,
"logits/rejected": 1.4191794395446777,
"loss": 1.3827,
"step": 426
},
{
"beta_dpo/beta": 0.033856652677059174,
"beta_dpo/beta_margin_grad_mean": -0.4432615637779236,
"beta_dpo/beta_margin_grad_std": 0.1333458572626114,
"beta_dpo/beta_margin_mean": 0.2626633048057556,
"beta_dpo/beta_margin_std": 0.6418102383613586,
"beta_dpo/beta_used": 0.033856652677059174,
"beta_dpo/beta_used_raw": 0.033856652677059174,
"beta_dpo/gap_mean": 7.30242919921875,
"beta_dpo/gap_std": 15.157771110534668,
"beta_dpo/loss_margin_mean": 7.709880828857422,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6455026455026455,
"grad_norm": 11.39405632019043,
"learning_rate": 1.6948364510535218e-07,
"logits/chosen": 1.1545445919036865,
"logits/rejected": 1.0615546703338623,
"loss": 1.2381,
"step": 427
},
{
"beta_dpo/beta": 0.17536821961402893,
"beta_dpo/beta_margin_grad_mean": -0.32765766978263855,
"beta_dpo/beta_margin_grad_std": 0.29305100440979004,
"beta_dpo/beta_margin_mean": 1.2307204008102417,
"beta_dpo/beta_margin_std": 2.660304069519043,
"beta_dpo/beta_used": 0.17536821961402893,
"beta_dpo/beta_used_raw": 0.17536821961402893,
"beta_dpo/gap_mean": 7.2855329513549805,
"beta_dpo/gap_std": 15.077669143676758,
"beta_dpo/loss_margin_mean": 7.104245185852051,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6470143613000756,
"grad_norm": 45.964027404785156,
"learning_rate": 1.6823300917064458e-07,
"logits/chosen": 1.4189622402191162,
"logits/rejected": 1.3559203147888184,
"loss": 0.8662,
"step": 428
},
{
"beta_dpo/beta": 0.25602617859840393,
"beta_dpo/beta_margin_grad_mean": -0.2635319232940674,
"beta_dpo/beta_margin_grad_std": 0.33962488174438477,
"beta_dpo/beta_margin_mean": 2.2876291275024414,
"beta_dpo/beta_margin_std": 3.774615526199341,
"beta_dpo/beta_used": 0.25602617859840393,
"beta_dpo/beta_used_raw": 0.25602617859840393,
"beta_dpo/gap_mean": 7.527216911315918,
"beta_dpo/gap_std": 14.919697761535645,
"beta_dpo/loss_margin_mean": 8.933639526367188,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6485260770975056,
"grad_norm": 61.03815460205078,
"learning_rate": 1.669846604344412e-07,
"logits/chosen": 1.4187898635864258,
"logits/rejected": 1.4321041107177734,
"loss": 0.9074,
"step": 429
},
{
"beta_dpo/beta": 0.2114437073469162,
"beta_dpo/beta_margin_grad_mean": -0.2747531235218048,
"beta_dpo/beta_margin_grad_std": 0.2975596785545349,
"beta_dpo/beta_margin_mean": 2.024601697921753,
"beta_dpo/beta_margin_std": 2.9273641109466553,
"beta_dpo/beta_used": 0.2114437073469162,
"beta_dpo/beta_used_raw": 0.2114437073469162,
"beta_dpo/gap_mean": 7.846027374267578,
"beta_dpo/gap_std": 14.999692916870117,
"beta_dpo/loss_margin_mean": 9.444135665893555,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6500377928949358,
"grad_norm": 42.52581024169922,
"learning_rate": 1.6573863381573954e-07,
"logits/chosen": 1.240324854850769,
"logits/rejected": 1.2547059059143066,
"loss": 0.7219,
"step": 430
},
{
"beta_dpo/beta": 0.10541002452373505,
"beta_dpo/beta_margin_grad_mean": -0.3474757969379425,
"beta_dpo/beta_margin_grad_std": 0.25322234630584717,
"beta_dpo/beta_margin_mean": 0.8967947363853455,
"beta_dpo/beta_margin_std": 1.6649945974349976,
"beta_dpo/beta_used": 0.10541002452373505,
"beta_dpo/beta_used_raw": 0.10541002452373505,
"beta_dpo/gap_mean": 8.028791427612305,
"beta_dpo/gap_std": 14.900115013122559,
"beta_dpo/loss_margin_mean": 8.783801078796387,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6515495086923658,
"grad_norm": 24.540010452270508,
"learning_rate": 1.6449496416858282e-07,
"logits/chosen": 1.1816718578338623,
"logits/rejected": 1.1253862380981445,
"loss": 0.9805,
"step": 431
},
{
"beta_dpo/beta": 0.18534046411514282,
"beta_dpo/beta_margin_grad_mean": -0.3596099615097046,
"beta_dpo/beta_margin_grad_std": 0.33429720997810364,
"beta_dpo/beta_margin_mean": 1.294745683670044,
"beta_dpo/beta_margin_std": 3.190850257873535,
"beta_dpo/beta_used": 0.18534046411514282,
"beta_dpo/beta_used_raw": 0.18534046411514282,
"beta_dpo/gap_mean": 8.034162521362305,
"beta_dpo/gap_std": 15.062065124511719,
"beta_dpo/loss_margin_mean": 7.130716800689697,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6530612244897959,
"grad_norm": 48.48855972290039,
"learning_rate": 1.632536862810844e-07,
"logits/chosen": 1.2895960807800293,
"logits/rejected": 1.258962869644165,
"loss": 0.9771,
"step": 432
},
{
"beta_dpo/beta": 0.15797749161720276,
"beta_dpo/beta_margin_grad_mean": -0.31328731775283813,
"beta_dpo/beta_margin_grad_std": 0.3253113925457001,
"beta_dpo/beta_margin_mean": 1.807498574256897,
"beta_dpo/beta_margin_std": 3.004568576812744,
"beta_dpo/beta_used": 0.15797749161720276,
"beta_dpo/beta_used_raw": 0.15797749161720276,
"beta_dpo/gap_mean": 8.159663200378418,
"beta_dpo/gap_std": 15.607994079589844,
"beta_dpo/loss_margin_mean": 10.39791488647461,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.654572940287226,
"grad_norm": 48.692752838134766,
"learning_rate": 1.6201483487445515e-07,
"logits/chosen": 1.429356336593628,
"logits/rejected": 1.3947436809539795,
"loss": 1.14,
"step": 433
},
{
"beta_dpo/beta": 0.2732776999473572,
"beta_dpo/beta_margin_grad_mean": -0.2960241138935089,
"beta_dpo/beta_margin_grad_std": 0.3341895043849945,
"beta_dpo/beta_margin_mean": 3.3064119815826416,
"beta_dpo/beta_margin_std": 5.642823219299316,
"beta_dpo/beta_used": 0.2732776999473572,
"beta_dpo/beta_used_raw": 0.2732776999473572,
"beta_dpo/gap_mean": 8.857320785522461,
"beta_dpo/gap_std": 15.690851211547852,
"beta_dpo/loss_margin_mean": 10.634892463684082,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.656084656084656,
"grad_norm": 73.32202911376953,
"learning_rate": 1.6077844460203204e-07,
"logits/chosen": 1.5748176574707031,
"logits/rejected": 1.4792296886444092,
"loss": 0.9697,
"step": 434
},
{
"beta_dpo/beta": 0.09363246709108353,
"beta_dpo/beta_margin_grad_mean": -0.3969593346118927,
"beta_dpo/beta_margin_grad_std": 0.24074605107307434,
"beta_dpo/beta_margin_mean": 0.8078661561012268,
"beta_dpo/beta_margin_std": 2.0362329483032227,
"beta_dpo/beta_used": 0.09363246709108353,
"beta_dpo/beta_used_raw": 0.09363246709108353,
"beta_dpo/gap_mean": 8.576042175292969,
"beta_dpo/gap_std": 15.559226989746094,
"beta_dpo/loss_margin_mean": 7.586299896240234,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6575963718820862,
"grad_norm": 26.006427764892578,
"learning_rate": 1.5954455004830878e-07,
"logits/chosen": 1.435976266860962,
"logits/rejected": 1.415736198425293,
"loss": 1.0994,
"step": 435
},
{
"beta_dpo/beta": 0.08419980108737946,
"beta_dpo/beta_margin_grad_mean": -0.3715381920337677,
"beta_dpo/beta_margin_grad_std": 0.24329714477062225,
"beta_dpo/beta_margin_mean": 0.6914465427398682,
"beta_dpo/beta_margin_std": 1.359487533569336,
"beta_dpo/beta_used": 0.08419980108737946,
"beta_dpo/beta_used_raw": 0.08419980108737946,
"beta_dpo/gap_mean": 8.449478149414062,
"beta_dpo/gap_std": 15.672661781311035,
"beta_dpo/loss_margin_mean": 8.278998374938965,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6591080876795162,
"grad_norm": 23.006898880004883,
"learning_rate": 1.5831318572796847e-07,
"logits/chosen": 1.3777055740356445,
"logits/rejected": 1.3134238719940186,
"loss": 1.0802,
"step": 436
},
{
"beta_dpo/beta": 0.1277225762605667,
"beta_dpo/beta_margin_grad_mean": -0.3472208082675934,
"beta_dpo/beta_margin_grad_std": 0.2929202914237976,
"beta_dpo/beta_margin_mean": 1.1181834936141968,
"beta_dpo/beta_margin_std": 2.1439664363861084,
"beta_dpo/beta_used": 0.1277225762605667,
"beta_dpo/beta_used_raw": 0.1277225762605667,
"beta_dpo/gap_mean": 8.755277633666992,
"beta_dpo/gap_std": 15.802358627319336,
"beta_dpo/loss_margin_mean": 8.655381202697754,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6606198034769464,
"grad_norm": 32.87360382080078,
"learning_rate": 1.5708438608491815e-07,
"logits/chosen": 1.4010035991668701,
"logits/rejected": 1.249039888381958,
"loss": 0.9412,
"step": 437
},
{
"beta_dpo/beta": 0.1653028279542923,
"beta_dpo/beta_margin_grad_mean": -0.3463587164878845,
"beta_dpo/beta_margin_grad_std": 0.25206202268600464,
"beta_dpo/beta_margin_mean": 2.2152247428894043,
"beta_dpo/beta_margin_std": 3.8490896224975586,
"beta_dpo/beta_used": 0.1653028279542923,
"beta_dpo/beta_used_raw": 0.10271503031253815,
"beta_dpo/gap_mean": 8.468579292297363,
"beta_dpo/gap_std": 15.612485885620117,
"beta_dpo/loss_margin_mean": 9.343290328979492,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6621315192743764,
"grad_norm": 51.89805603027344,
"learning_rate": 1.558581854913253e-07,
"logits/chosen": 1.3848375082015991,
"logits/rejected": 1.3263285160064697,
"loss": 1.0567,
"step": 438
},
{
"beta_dpo/beta": 0.1043519601225853,
"beta_dpo/beta_margin_grad_mean": -0.36386561393737793,
"beta_dpo/beta_margin_grad_std": 0.23430514335632324,
"beta_dpo/beta_margin_mean": 1.2251368761062622,
"beta_dpo/beta_margin_std": 2.3316707611083984,
"beta_dpo/beta_used": 0.1043519601225853,
"beta_dpo/beta_used_raw": 0.1043519601225853,
"beta_dpo/gap_mean": 8.62759780883789,
"beta_dpo/gap_std": 15.364070892333984,
"beta_dpo/loss_margin_mean": 8.964755058288574,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6636432350718064,
"grad_norm": 31.003793716430664,
"learning_rate": 1.5463461824665658e-07,
"logits/chosen": 1.3128708600997925,
"logits/rejected": 1.2499792575836182,
"loss": 1.0547,
"step": 439
},
{
"beta_dpo/beta": 0.12300290167331696,
"beta_dpo/beta_margin_grad_mean": -0.2966170907020569,
"beta_dpo/beta_margin_grad_std": 0.21793873608112335,
"beta_dpo/beta_margin_mean": 1.7257822751998901,
"beta_dpo/beta_margin_std": 2.4124960899353027,
"beta_dpo/beta_used": 0.12300290167331696,
"beta_dpo/beta_used_raw": 0.12300290167331696,
"beta_dpo/gap_mean": 9.290130615234375,
"beta_dpo/gap_std": 15.132123947143555,
"beta_dpo/loss_margin_mean": 12.742350578308105,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6651549508692366,
"grad_norm": 30.541147232055664,
"learning_rate": 1.534137185767178e-07,
"logits/chosen": 1.3556729555130005,
"logits/rejected": 1.211385726928711,
"loss": 0.918,
"step": 440
},
{
"beta_dpo/beta": 0.07583055645227432,
"beta_dpo/beta_margin_grad_mean": -0.38547220826148987,
"beta_dpo/beta_margin_grad_std": 0.2417447417974472,
"beta_dpo/beta_margin_mean": 0.9196439385414124,
"beta_dpo/beta_margin_std": 1.828843355178833,
"beta_dpo/beta_used": 0.07583055645227432,
"beta_dpo/beta_used_raw": -0.05472517013549805,
"beta_dpo/gap_mean": 9.688538551330566,
"beta_dpo/gap_std": 14.977852821350098,
"beta_dpo/loss_margin_mean": 9.575737953186035,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6666666666666666,
"grad_norm": 22.2325382232666,
"learning_rate": 1.521955206326976e-07,
"logits/chosen": 1.3583239316940308,
"logits/rejected": 1.2205604314804077,
"loss": 1.1443,
"step": 441
},
{
"beta_dpo/beta": 0.10827788710594177,
"beta_dpo/beta_margin_grad_mean": -0.3940413296222687,
"beta_dpo/beta_margin_grad_std": 0.24407008290290833,
"beta_dpo/beta_margin_mean": 0.9771229028701782,
"beta_dpo/beta_margin_std": 2.162266969680786,
"beta_dpo/beta_used": 0.10827788710594177,
"beta_dpo/beta_used_raw": 0.02357833832502365,
"beta_dpo/gap_mean": 9.418302536010742,
"beta_dpo/gap_std": 14.739940643310547,
"beta_dpo/loss_margin_mean": 8.74870491027832,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6681783824640968,
"grad_norm": 37.141815185546875,
"learning_rate": 1.5098005849021078e-07,
"logits/chosen": 1.0895839929580688,
"logits/rejected": 1.0859336853027344,
"loss": 0.9904,
"step": 442
},
{
"beta_dpo/beta": 0.07059619575738907,
"beta_dpo/beta_margin_grad_mean": -0.40955042839050293,
"beta_dpo/beta_margin_grad_std": 0.22130858898162842,
"beta_dpo/beta_margin_mean": 0.7329308986663818,
"beta_dpo/beta_margin_std": 1.8423486948013306,
"beta_dpo/beta_used": 0.07059619575738907,
"beta_dpo/beta_used_raw": 0.0686114951968193,
"beta_dpo/gap_mean": 9.67289924621582,
"beta_dpo/gap_std": 14.846346855163574,
"beta_dpo/loss_margin_mean": 11.07690143585205,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6696900982615268,
"grad_norm": 20.315444946289062,
"learning_rate": 1.4976736614834662e-07,
"logits/chosen": 1.119490146636963,
"logits/rejected": 1.0487221479415894,
"loss": 1.1261,
"step": 443
},
{
"beta_dpo/beta": 0.01301487721502781,
"beta_dpo/beta_margin_grad_mean": -0.4735754728317261,
"beta_dpo/beta_margin_grad_std": 0.06174745038151741,
"beta_dpo/beta_margin_mean": 0.10920752584934235,
"beta_dpo/beta_margin_std": 0.2566579580307007,
"beta_dpo/beta_used": 0.01301487721502781,
"beta_dpo/beta_used_raw": -0.10069774836301804,
"beta_dpo/gap_mean": 9.321052551269531,
"beta_dpo/gap_std": 14.88135814666748,
"beta_dpo/loss_margin_mean": 6.3398823738098145,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.671201814058957,
"grad_norm": 4.826290607452393,
"learning_rate": 1.4855747752871654e-07,
"logits/chosen": 1.2667855024337769,
"logits/rejected": 1.1672453880310059,
"loss": 1.294,
"step": 444
},
{
"beta_dpo/beta": 0.12245304882526398,
"beta_dpo/beta_margin_grad_mean": -0.3227024972438812,
"beta_dpo/beta_margin_grad_std": 0.23704025149345398,
"beta_dpo/beta_margin_mean": 1.4407753944396973,
"beta_dpo/beta_margin_std": 2.2449750900268555,
"beta_dpo/beta_used": 0.12245304882526398,
"beta_dpo/beta_used_raw": 0.12245304882526398,
"beta_dpo/gap_mean": 9.358579635620117,
"beta_dpo/gap_std": 14.765151023864746,
"beta_dpo/loss_margin_mean": 10.7389554977417,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.672713529856387,
"grad_norm": 32.277809143066406,
"learning_rate": 1.473504264745062e-07,
"logits/chosen": 1.4639275074005127,
"logits/rejected": 1.4567673206329346,
"loss": 0.9672,
"step": 445
},
{
"beta_dpo/beta": 0.23159979283809662,
"beta_dpo/beta_margin_grad_mean": -0.2718433737754822,
"beta_dpo/beta_margin_grad_std": 0.2508421540260315,
"beta_dpo/beta_margin_mean": 3.571117401123047,
"beta_dpo/beta_margin_std": 4.958829402923584,
"beta_dpo/beta_used": 0.23159979283809662,
"beta_dpo/beta_used_raw": 0.23159979283809662,
"beta_dpo/gap_mean": 9.482922554016113,
"beta_dpo/gap_std": 14.227436065673828,
"beta_dpo/loss_margin_mean": 11.683351516723633,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.674225245653817,
"grad_norm": 44.217838287353516,
"learning_rate": 1.461462467495284e-07,
"logits/chosen": 1.1846120357513428,
"logits/rejected": 1.1139745712280273,
"loss": 0.8702,
"step": 446
},
{
"beta_dpo/beta": 0.04251420870423317,
"beta_dpo/beta_margin_grad_mean": -0.4159136116504669,
"beta_dpo/beta_margin_grad_std": 0.14413373172283173,
"beta_dpo/beta_margin_mean": 0.3935755789279938,
"beta_dpo/beta_margin_std": 0.6931925415992737,
"beta_dpo/beta_used": 0.04251420870423317,
"beta_dpo/beta_used_raw": 0.04251420870423317,
"beta_dpo/gap_mean": 9.748671531677246,
"beta_dpo/gap_std": 14.281017303466797,
"beta_dpo/loss_margin_mean": 9.072275161743164,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6757369614512472,
"grad_norm": 13.691399574279785,
"learning_rate": 1.4494497203727843e-07,
"logits/chosen": 1.2142938375473022,
"logits/rejected": 1.043416976928711,
"loss": 1.1222,
"step": 447
},
{
"beta_dpo/beta": 0.08456003665924072,
"beta_dpo/beta_margin_grad_mean": -0.3609682023525238,
"beta_dpo/beta_margin_grad_std": 0.21768730878829956,
"beta_dpo/beta_margin_mean": 0.764079213142395,
"beta_dpo/beta_margin_std": 1.2195158004760742,
"beta_dpo/beta_used": 0.08456003665924072,
"beta_dpo/beta_used_raw": 0.08456003665924072,
"beta_dpo/gap_mean": 9.542774200439453,
"beta_dpo/gap_std": 14.50007152557373,
"beta_dpo/loss_margin_mean": 8.9943265914917,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6772486772486772,
"grad_norm": 19.512845993041992,
"learning_rate": 1.4374663593999256e-07,
"logits/chosen": 1.164783000946045,
"logits/rejected": 1.1104214191436768,
"loss": 0.9485,
"step": 448
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49916499853134155,
"beta_dpo/beta_margin_grad_std": 0.003720273729413748,
"beta_dpo/beta_margin_mean": 0.0033401332329958677,
"beta_dpo/beta_margin_std": 0.014882085844874382,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.14399409294128418,
"beta_dpo/gap_mean": 8.810772895812988,
"beta_dpo/gap_std": 14.4425687789917,
"beta_dpo/loss_margin_mean": 3.3401331901550293,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6787603930461074,
"grad_norm": 0.32376447319984436,
"learning_rate": 1.4255127197770707e-07,
"logits/chosen": 0.9270842671394348,
"logits/rejected": 0.9556566476821899,
"loss": 1.3816,
"step": 449
},
{
"beta_dpo/beta": 0.14452773332595825,
"beta_dpo/beta_margin_grad_mean": -0.34042176604270935,
"beta_dpo/beta_margin_grad_std": 0.3004254698753357,
"beta_dpo/beta_margin_mean": 1.0860893726348877,
"beta_dpo/beta_margin_std": 2.2014052867889404,
"beta_dpo/beta_used": 0.14452773332595825,
"beta_dpo/beta_used_raw": 0.14452773332595825,
"beta_dpo/gap_mean": 8.172224998474121,
"beta_dpo/gap_std": 14.656105041503906,
"beta_dpo/loss_margin_mean": 7.372296333312988,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6802721088435374,
"grad_norm": 34.80299758911133,
"learning_rate": 1.4135891358732205e-07,
"logits/chosen": 1.0534474849700928,
"logits/rejected": 0.9382642507553101,
"loss": 0.9493,
"step": 450
},
{
"beta_dpo/beta": 0.01695121079683304,
"beta_dpo/beta_margin_grad_mean": -0.4738851487636566,
"beta_dpo/beta_margin_grad_std": 0.05885161831974983,
"beta_dpo/beta_margin_mean": 0.10612621158361435,
"beta_dpo/beta_margin_std": 0.23932921886444092,
"beta_dpo/beta_used": 0.01695121079683304,
"beta_dpo/beta_used_raw": 0.01695121079683304,
"beta_dpo/gap_mean": 7.888227462768555,
"beta_dpo/gap_std": 14.654131889343262,
"beta_dpo/loss_margin_mean": 6.234951019287109,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6817838246409675,
"grad_norm": 5.221630096435547,
"learning_rate": 1.4016959412166437e-07,
"logits/chosen": 1.241917371749878,
"logits/rejected": 1.1958801746368408,
"loss": 1.2904,
"step": 451
},
{
"beta_dpo/beta": 0.0702991634607315,
"beta_dpo/beta_margin_grad_mean": -0.39360693097114563,
"beta_dpo/beta_margin_grad_std": 0.21342332661151886,
"beta_dpo/beta_margin_mean": 0.5900993943214417,
"beta_dpo/beta_margin_std": 1.2495468854904175,
"beta_dpo/beta_used": 0.0702991634607315,
"beta_dpo/beta_used_raw": 0.0702991634607315,
"beta_dpo/gap_mean": 7.974970817565918,
"beta_dpo/gap_std": 14.791309356689453,
"beta_dpo/loss_margin_mean": 8.745607376098633,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6832955404383976,
"grad_norm": 20.654272079467773,
"learning_rate": 1.3898334684855645e-07,
"logits/chosen": 0.9901279807090759,
"logits/rejected": 0.8948749899864197,
"loss": 1.1521,
"step": 452
},
{
"beta_dpo/beta": 0.047453392297029495,
"beta_dpo/beta_margin_grad_mean": -0.4430086612701416,
"beta_dpo/beta_margin_grad_std": 0.15654928982257843,
"beta_dpo/beta_margin_mean": 0.2556281089782715,
"beta_dpo/beta_margin_std": 0.7583603858947754,
"beta_dpo/beta_used": 0.047453392297029495,
"beta_dpo/beta_used_raw": 0.047453392297029495,
"beta_dpo/gap_mean": 7.704753398895264,
"beta_dpo/gap_std": 14.739446640014648,
"beta_dpo/loss_margin_mean": 6.674213409423828,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6848072562358276,
"grad_norm": 13.569918632507324,
"learning_rate": 1.3780020494988445e-07,
"logits/chosen": 1.1262736320495605,
"logits/rejected": 1.0599395036697388,
"loss": 1.1781,
"step": 453
},
{
"beta_dpo/beta": 0.14269126951694489,
"beta_dpo/beta_margin_grad_mean": -0.3225584328174591,
"beta_dpo/beta_margin_grad_std": 0.25810903310775757,
"beta_dpo/beta_margin_mean": 1.4232326745986938,
"beta_dpo/beta_margin_std": 2.898785352706909,
"beta_dpo/beta_used": 0.14269126951694489,
"beta_dpo/beta_used_raw": 0.14269126951694489,
"beta_dpo/gap_mean": 8.057350158691406,
"beta_dpo/gap_std": 14.961078643798828,
"beta_dpo/loss_margin_mean": 9.678709983825684,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6863189720332578,
"grad_norm": 25.957433700561523,
"learning_rate": 1.366202015206706e-07,
"logits/chosen": 1.2365267276763916,
"logits/rejected": 1.1713523864746094,
"loss": 0.9434,
"step": 454
},
{
"beta_dpo/beta": 0.11408072710037231,
"beta_dpo/beta_margin_grad_mean": -0.3201211988925934,
"beta_dpo/beta_margin_grad_std": 0.24275214970111847,
"beta_dpo/beta_margin_mean": 1.144683599472046,
"beta_dpo/beta_margin_std": 1.7552571296691895,
"beta_dpo/beta_used": 0.11408072710037231,
"beta_dpo/beta_used_raw": 0.11408072710037231,
"beta_dpo/gap_mean": 8.407580375671387,
"beta_dpo/gap_std": 14.920415878295898,
"beta_dpo/loss_margin_mean": 10.018362045288086,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6878306878306878,
"grad_norm": 28.53400993347168,
"learning_rate": 1.354433695681474e-07,
"logits/chosen": 1.0570340156555176,
"logits/rejected": 1.0129978656768799,
"loss": 0.8906,
"step": 455
},
{
"beta_dpo/beta": 0.06565161049365997,
"beta_dpo/beta_margin_grad_mean": -0.3941415548324585,
"beta_dpo/beta_margin_grad_std": 0.1878412365913391,
"beta_dpo/beta_margin_mean": 0.6595984697341919,
"beta_dpo/beta_margin_std": 1.2374985218048096,
"beta_dpo/beta_used": 0.06565161049365997,
"beta_dpo/beta_used_raw": 0.03332943841814995,
"beta_dpo/gap_mean": 8.574639320373535,
"beta_dpo/gap_std": 14.622611999511719,
"beta_dpo/loss_margin_mean": 8.794672012329102,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6893424036281179,
"grad_norm": 17.137107849121094,
"learning_rate": 1.3426974201083439e-07,
"logits/chosen": 1.129429578781128,
"logits/rejected": 1.1141908168792725,
"loss": 1.065,
"step": 456
},
{
"beta_dpo/beta": 0.010343266651034355,
"beta_dpo/beta_margin_grad_mean": -0.48004212975502014,
"beta_dpo/beta_margin_grad_std": 0.044787127524614334,
"beta_dpo/beta_margin_mean": 0.0815126895904541,
"beta_dpo/beta_margin_std": 0.18423019349575043,
"beta_dpo/beta_used": 0.010343266651034355,
"beta_dpo/beta_used_raw": -0.03720933198928833,
"beta_dpo/gap_mean": 8.280400276184082,
"beta_dpo/gap_std": 14.3084077835083,
"beta_dpo/loss_margin_mean": 6.3437018394470215,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.690854119425548,
"grad_norm": 4.230249404907227,
"learning_rate": 1.3309935167761717e-07,
"logits/chosen": 1.163110375404358,
"logits/rejected": 1.06003737449646,
"loss": 1.3216,
"step": 457
},
{
"beta_dpo/beta": 0.10353825986385345,
"beta_dpo/beta_margin_grad_mean": -0.3194752335548401,
"beta_dpo/beta_margin_grad_std": 0.2263987511396408,
"beta_dpo/beta_margin_mean": 1.070265769958496,
"beta_dpo/beta_margin_std": 1.5786514282226562,
"beta_dpo/beta_used": 0.10353825986385345,
"beta_dpo/beta_used_raw": 0.10353825986385345,
"beta_dpo/gap_mean": 8.416302680969238,
"beta_dpo/gap_std": 14.113754272460938,
"beta_dpo/loss_margin_mean": 10.443164825439453,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6923658352229781,
"grad_norm": 25.92171287536621,
"learning_rate": 1.3193223130682936e-07,
"logits/chosen": 1.3735662698745728,
"logits/rejected": 1.2102110385894775,
"loss": 1.0459,
"step": 458
},
{
"beta_dpo/beta": 0.10046427696943283,
"beta_dpo/beta_margin_grad_mean": -0.33836182951927185,
"beta_dpo/beta_margin_grad_std": 0.20904038846492767,
"beta_dpo/beta_margin_mean": 1.4206905364990234,
"beta_dpo/beta_margin_std": 2.245626449584961,
"beta_dpo/beta_used": 0.10046427696943283,
"beta_dpo/beta_used_raw": 0.0645713359117508,
"beta_dpo/gap_mean": 8.288217544555664,
"beta_dpo/gap_std": 13.640267372131348,
"beta_dpo/loss_margin_mean": 8.557767868041992,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6938775510204082,
"grad_norm": 23.96784019470215,
"learning_rate": 1.3076841354533658e-07,
"logits/chosen": 1.3754119873046875,
"logits/rejected": 1.3155990839004517,
"loss": 0.95,
"step": 459
},
{
"beta_dpo/beta": 0.096731998026371,
"beta_dpo/beta_margin_grad_mean": -0.34345245361328125,
"beta_dpo/beta_margin_grad_std": 0.2221241593360901,
"beta_dpo/beta_margin_mean": 1.0429019927978516,
"beta_dpo/beta_margin_std": 1.7028886079788208,
"beta_dpo/beta_used": 0.096731998026371,
"beta_dpo/beta_used_raw": 0.096731998026371,
"beta_dpo/gap_mean": 8.955610275268555,
"beta_dpo/gap_std": 13.669034957885742,
"beta_dpo/loss_margin_mean": 10.950597763061523,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6953892668178382,
"grad_norm": 26.167713165283203,
"learning_rate": 1.2960793094762345e-07,
"logits/chosen": 1.0446248054504395,
"logits/rejected": 0.9257493019104004,
"loss": 0.9978,
"step": 460
},
{
"beta_dpo/beta": 0.1565985530614853,
"beta_dpo/beta_margin_grad_mean": -0.33175206184387207,
"beta_dpo/beta_margin_grad_std": 0.2527690827846527,
"beta_dpo/beta_margin_mean": 1.9918843507766724,
"beta_dpo/beta_margin_std": 3.381699562072754,
"beta_dpo/beta_used": 0.1565985530614853,
"beta_dpo/beta_used_raw": 0.13997702300548553,
"beta_dpo/gap_mean": 9.272143363952637,
"beta_dpo/gap_std": 13.67574405670166,
"beta_dpo/loss_margin_mean": 9.674901962280273,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6969009826152683,
"grad_norm": 33.40471649169922,
"learning_rate": 1.2845081597488286e-07,
"logits/chosen": 1.3084503412246704,
"logits/rejected": 1.2334516048431396,
"loss": 0.8777,
"step": 461
},
{
"beta_dpo/beta": 0.06886428594589233,
"beta_dpo/beta_margin_grad_mean": -0.35868188738822937,
"beta_dpo/beta_margin_grad_std": 0.1917419135570526,
"beta_dpo/beta_margin_mean": 0.7064536213874817,
"beta_dpo/beta_margin_std": 0.992012083530426,
"beta_dpo/beta_used": 0.06886428594589233,
"beta_dpo/beta_used_raw": 0.06886428594589233,
"beta_dpo/gap_mean": 9.344978332519531,
"beta_dpo/gap_std": 13.81993293762207,
"beta_dpo/loss_margin_mean": 10.36449909210205,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6984126984126984,
"grad_norm": 19.233129501342773,
"learning_rate": 1.27297100994108e-07,
"logits/chosen": 1.10292649269104,
"logits/rejected": 1.0218987464904785,
"loss": 1.0363,
"step": 462
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.497945100069046,
"beta_dpo/beta_margin_grad_std": 0.0035107722505927086,
"beta_dpo/beta_margin_mean": 0.008220227435231209,
"beta_dpo/beta_margin_std": 0.014044022187590599,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.06016698479652405,
"beta_dpo/gap_mean": 9.277082443237305,
"beta_dpo/gap_std": 13.934642791748047,
"beta_dpo/loss_margin_mean": 8.220227241516113,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6999244142101285,
"grad_norm": 0.3398761451244354,
"learning_rate": 1.2614681827718695e-07,
"logits/chosen": 1.186434030532837,
"logits/rejected": 1.1574755907058716,
"loss": 1.3797,
"step": 463
},
{
"beta_dpo/beta": 0.09463178366422653,
"beta_dpo/beta_margin_grad_mean": -0.3775191009044647,
"beta_dpo/beta_margin_grad_std": 0.26763007044792175,
"beta_dpo/beta_margin_mean": 0.7947764992713928,
"beta_dpo/beta_margin_std": 2.0486159324645996,
"beta_dpo/beta_used": 0.09463178366422653,
"beta_dpo/beta_used_raw": 0.09463178366422653,
"beta_dpo/gap_mean": 9.045785903930664,
"beta_dpo/gap_std": 14.590141296386719,
"beta_dpo/loss_margin_mean": 9.205949783325195,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7014361300075586,
"grad_norm": 26.988676071166992,
"learning_rate": 1.2500000000000005e-07,
"logits/chosen": 1.324162483215332,
"logits/rejected": 1.288191795349121,
"loss": 1.0955,
"step": 464
},
{
"beta_dpo/beta": 0.06897910684347153,
"beta_dpo/beta_margin_grad_mean": -0.37157633900642395,
"beta_dpo/beta_margin_grad_std": 0.21252475678920746,
"beta_dpo/beta_margin_mean": 0.6985571980476379,
"beta_dpo/beta_margin_std": 1.129198670387268,
"beta_dpo/beta_used": 0.06897910684347153,
"beta_dpo/beta_used_raw": 0.06897910684347153,
"beta_dpo/gap_mean": 9.353326797485352,
"beta_dpo/gap_std": 14.839263916015625,
"beta_dpo/loss_margin_mean": 10.117259979248047,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7029478458049887,
"grad_norm": 19.14287567138672,
"learning_rate": 1.238566782415197e-07,
"logits/chosen": 1.4786460399627686,
"logits/rejected": 1.394470453262329,
"loss": 1.0684,
"step": 465
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49872010946273804,
"beta_dpo/beta_margin_grad_std": 0.0033063730224967003,
"beta_dpo/beta_margin_mean": 0.005119941662997007,
"beta_dpo/beta_margin_std": 0.013226281851530075,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.1248481273651123,
"beta_dpo/gap_mean": 8.757827758789062,
"beta_dpo/gap_std": 14.707776069641113,
"beta_dpo/loss_margin_mean": 5.119941711425781,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7044595616024187,
"grad_norm": 0.38169077038764954,
"learning_rate": 1.2271688498291334e-07,
"logits/chosen": 1.1370337009429932,
"logits/rejected": 1.1599440574645996,
"loss": 1.3813,
"step": 466
},
{
"beta_dpo/beta": 0.06722957640886307,
"beta_dpo/beta_margin_grad_mean": -0.41416195034980774,
"beta_dpo/beta_margin_grad_std": 0.20898796617984772,
"beta_dpo/beta_margin_mean": 0.6015309691429138,
"beta_dpo/beta_margin_std": 1.5544705390930176,
"beta_dpo/beta_used": 0.06722957640886307,
"beta_dpo/beta_used_raw": 0.01915111020207405,
"beta_dpo/gap_mean": 8.545236587524414,
"beta_dpo/gap_std": 14.711740493774414,
"beta_dpo/loss_margin_mean": 8.57152271270752,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7059712773998488,
"grad_norm": 22.553224563598633,
"learning_rate": 1.2158065210664848e-07,
"logits/chosen": 1.2510708570480347,
"logits/rejected": 1.0964205265045166,
"loss": 1.1364,
"step": 467
},
{
"beta_dpo/beta": 0.13395223021507263,
"beta_dpo/beta_margin_grad_mean": -0.3565409183502197,
"beta_dpo/beta_margin_grad_std": 0.26658982038497925,
"beta_dpo/beta_margin_mean": 1.3280830383300781,
"beta_dpo/beta_margin_std": 3.2243897914886475,
"beta_dpo/beta_used": 0.13395223021507263,
"beta_dpo/beta_used_raw": 0.09521180391311646,
"beta_dpo/gap_mean": 8.586424827575684,
"beta_dpo/gap_std": 14.758886337280273,
"beta_dpo/loss_margin_mean": 9.17181396484375,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7074829931972789,
"grad_norm": 56.513893127441406,
"learning_rate": 1.204480113956011e-07,
"logits/chosen": 1.235276460647583,
"logits/rejected": 1.240452766418457,
"loss": 1.0568,
"step": 468
},
{
"beta_dpo/beta": 0.19092893600463867,
"beta_dpo/beta_margin_grad_mean": -0.3903719186782837,
"beta_dpo/beta_margin_grad_std": 0.27532485127449036,
"beta_dpo/beta_margin_mean": 2.0709922313690186,
"beta_dpo/beta_margin_std": 4.380462646484375,
"beta_dpo/beta_used": 0.19092893600463867,
"beta_dpo/beta_used_raw": 0.11754559725522995,
"beta_dpo/gap_mean": 8.598621368408203,
"beta_dpo/gap_std": 14.475069046020508,
"beta_dpo/loss_margin_mean": 7.082053184509277,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.708994708994709,
"grad_norm": 65.13701629638672,
"learning_rate": 1.1931899453216697e-07,
"logits/chosen": 1.3469481468200684,
"logits/rejected": 1.3463125228881836,
"loss": 1.1198,
"step": 469
},
{
"beta_dpo/beta": 0.07104767858982086,
"beta_dpo/beta_margin_grad_mean": -0.3992937207221985,
"beta_dpo/beta_margin_grad_std": 0.19436946511268616,
"beta_dpo/beta_margin_mean": 0.4970458745956421,
"beta_dpo/beta_margin_std": 1.114266037940979,
"beta_dpo/beta_used": 0.07104767858982086,
"beta_dpo/beta_used_raw": 0.07104767858982086,
"beta_dpo/gap_mean": 8.183364868164062,
"beta_dpo/gap_std": 14.048234939575195,
"beta_dpo/loss_margin_mean": 7.861882209777832,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7105064247921391,
"grad_norm": 19.9486083984375,
"learning_rate": 1.1819363309737438e-07,
"logits/chosen": 1.2430088520050049,
"logits/rejected": 1.1504111289978027,
"loss": 1.0578,
"step": 470
},
{
"beta_dpo/beta": 0.22089746594429016,
"beta_dpo/beta_margin_grad_mean": -0.28740575909614563,
"beta_dpo/beta_margin_grad_std": 0.33232811093330383,
"beta_dpo/beta_margin_mean": 2.1565606594085693,
"beta_dpo/beta_margin_std": 3.5191762447357178,
"beta_dpo/beta_used": 0.22089746594429016,
"beta_dpo/beta_used_raw": 0.22089746594429016,
"beta_dpo/gap_mean": 8.464906692504883,
"beta_dpo/gap_std": 14.241508483886719,
"beta_dpo/loss_margin_mean": 9.804491996765137,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7120181405895691,
"grad_norm": 59.95513916015625,
"learning_rate": 1.1707195857000215e-07,
"logits/chosen": 1.2362382411956787,
"logits/rejected": 1.1866122484207153,
"loss": 0.8833,
"step": 471
},
{
"beta_dpo/beta": 0.06996319442987442,
"beta_dpo/beta_margin_grad_mean": -0.4005456268787384,
"beta_dpo/beta_margin_grad_std": 0.24495269358158112,
"beta_dpo/beta_margin_mean": 0.7803270220756531,
"beta_dpo/beta_margin_std": 1.9341996908187866,
"beta_dpo/beta_used": 0.06996319442987442,
"beta_dpo/beta_used_raw": 0.04906560108065605,
"beta_dpo/gap_mean": 8.674680709838867,
"beta_dpo/gap_std": 14.96885871887207,
"beta_dpo/loss_margin_mean": 9.845968246459961,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7135298563869993,
"grad_norm": 35.19158172607422,
"learning_rate": 1.1595400232569768e-07,
"logits/chosen": 0.8848438262939453,
"logits/rejected": 0.8536281585693359,
"loss": 1.2495,
"step": 472
},
{
"beta_dpo/beta": 0.22186213731765747,
"beta_dpo/beta_margin_grad_mean": -0.29160040616989136,
"beta_dpo/beta_margin_grad_std": 0.3274621367454529,
"beta_dpo/beta_margin_mean": 2.4625768661499023,
"beta_dpo/beta_margin_std": 4.43284797668457,
"beta_dpo/beta_used": 0.22186213731765747,
"beta_dpo/beta_used_raw": 0.22186213731765747,
"beta_dpo/gap_mean": 9.114084243774414,
"beta_dpo/gap_std": 15.43545150756836,
"beta_dpo/loss_margin_mean": 10.468867301940918,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7150415721844293,
"grad_norm": 59.935829162597656,
"learning_rate": 1.1483979563610069e-07,
"logits/chosen": 1.2678196430206299,
"logits/rejected": 1.165730595588684,
"loss": 0.9532,
"step": 473
},
{
"beta_dpo/beta": 0.02063870057463646,
"beta_dpo/beta_margin_grad_mean": -0.46851664781570435,
"beta_dpo/beta_margin_grad_std": 0.09417974948883057,
"beta_dpo/beta_margin_mean": 0.13540899753570557,
"beta_dpo/beta_margin_std": 0.40995728969573975,
"beta_dpo/beta_used": 0.02063870057463646,
"beta_dpo/beta_used_raw": -0.001780906692147255,
"beta_dpo/gap_mean": 8.859273910522461,
"beta_dpo/gap_std": 15.522665977478027,
"beta_dpo/loss_margin_mean": 7.19165563583374,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7165532879818595,
"grad_norm": 8.50357437133789,
"learning_rate": 1.1372936966796709e-07,
"logits/chosen": 1.464232325553894,
"logits/rejected": 1.3536427021026611,
"loss": 1.2578,
"step": 474
},
{
"beta_dpo/beta": 0.20778724551200867,
"beta_dpo/beta_margin_grad_mean": -0.2396034300327301,
"beta_dpo/beta_margin_grad_std": 0.2754622995853424,
"beta_dpo/beta_margin_mean": 2.4503684043884277,
"beta_dpo/beta_margin_std": 2.977999210357666,
"beta_dpo/beta_used": 0.20778724551200867,
"beta_dpo/beta_used_raw": 0.20778724551200867,
"beta_dpo/gap_mean": 9.182262420654297,
"beta_dpo/gap_std": 15.229242324829102,
"beta_dpo/loss_margin_mean": 11.754033088684082,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7180650037792895,
"grad_norm": 39.44532012939453,
"learning_rate": 1.126227554822985e-07,
"logits/chosen": 1.407026767730713,
"logits/rejected": 1.3673948049545288,
"loss": 0.6444,
"step": 475
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49834784865379333,
"beta_dpo/beta_margin_grad_std": 0.003492003073915839,
"beta_dpo/beta_margin_mean": 0.006608948577195406,
"beta_dpo/beta_margin_std": 0.013968821614980698,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.09851482510566711,
"beta_dpo/gap_mean": 8.904387474060059,
"beta_dpo/gap_std": 14.980789184570312,
"beta_dpo/loss_margin_mean": 6.608948230743408,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7195767195767195,
"grad_norm": 0.363756388425827,
"learning_rate": 1.1151998403347243e-07,
"logits/chosen": 1.1348334550857544,
"logits/rejected": 1.100001335144043,
"loss": 1.3807,
"step": 476
},
{
"beta_dpo/beta": 0.015278504230082035,
"beta_dpo/beta_margin_grad_mean": -0.47819918394088745,
"beta_dpo/beta_margin_grad_std": 0.09855031967163086,
"beta_dpo/beta_margin_mean": 0.0943133756518364,
"beta_dpo/beta_margin_std": 0.41970664262771606,
"beta_dpo/beta_used": 0.015278504230082035,
"beta_dpo/beta_used_raw": -0.012433012947440147,
"beta_dpo/gap_mean": 8.45599365234375,
"beta_dpo/gap_std": 15.0723876953125,
"beta_dpo/loss_margin_mean": 6.3728251457214355,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7210884353741497,
"grad_norm": 6.164888858795166,
"learning_rate": 1.1042108616837692e-07,
"logits/chosen": 1.5054833889007568,
"logits/rejected": 1.481032371520996,
"loss": 1.3075,
"step": 477
},
{
"beta_dpo/beta": 0.10024324804544449,
"beta_dpo/beta_margin_grad_mean": -0.43241098523139954,
"beta_dpo/beta_margin_grad_std": 0.26925480365753174,
"beta_dpo/beta_margin_mean": 0.5425744652748108,
"beta_dpo/beta_margin_std": 2.0107645988464355,
"beta_dpo/beta_used": 0.10024324804544449,
"beta_dpo/beta_used_raw": 0.10024324804544449,
"beta_dpo/gap_mean": 7.973514556884766,
"beta_dpo/gap_std": 15.64834976196289,
"beta_dpo/loss_margin_mean": 5.676856994628906,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7226001511715797,
"grad_norm": 25.647165298461914,
"learning_rate": 1.0932609262554746e-07,
"logits/chosen": 1.0902628898620605,
"logits/rejected": 1.083869218826294,
"loss": 1.2064,
"step": 478
},
{
"beta_dpo/beta": 0.10473272204399109,
"beta_dpo/beta_margin_grad_mean": -0.39839571714401245,
"beta_dpo/beta_margin_grad_std": 0.2627533972263336,
"beta_dpo/beta_margin_mean": 0.6739475131034851,
"beta_dpo/beta_margin_std": 2.4179022312164307,
"beta_dpo/beta_used": 0.10473272204399109,
"beta_dpo/beta_used_raw": 0.10473272204399109,
"beta_dpo/gap_mean": 7.648787975311279,
"beta_dpo/gap_std": 15.70901107788086,
"beta_dpo/loss_margin_mean": 6.562697410583496,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7241118669690099,
"grad_norm": 32.01771926879883,
"learning_rate": 1.0823503403430734e-07,
"logits/chosen": 1.2927186489105225,
"logits/rejected": 1.2298321723937988,
"loss": 1.2245,
"step": 479
},
{
"beta_dpo/beta": 0.2699778079986572,
"beta_dpo/beta_margin_grad_mean": -0.29545360803604126,
"beta_dpo/beta_margin_grad_std": 0.3526819348335266,
"beta_dpo/beta_margin_mean": 2.6411521434783936,
"beta_dpo/beta_margin_std": 4.677126884460449,
"beta_dpo/beta_used": 0.2699778079986572,
"beta_dpo/beta_used_raw": 0.2699778079986572,
"beta_dpo/gap_mean": 7.776112079620361,
"beta_dpo/gap_std": 15.986560821533203,
"beta_dpo/loss_margin_mean": 9.44406795501709,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7256235827664399,
"grad_norm": 77.899658203125,
"learning_rate": 1.0714794091391072e-07,
"logits/chosen": 1.0709260702133179,
"logits/rejected": 1.0799660682678223,
"loss": 1.1733,
"step": 480
},
{
"beta_dpo/beta": 0.08630968630313873,
"beta_dpo/beta_margin_grad_mean": -0.4045952260494232,
"beta_dpo/beta_margin_grad_std": 0.22337859869003296,
"beta_dpo/beta_margin_mean": 0.8984798192977905,
"beta_dpo/beta_margin_std": 2.0487589836120605,
"beta_dpo/beta_used": 0.08630968630313873,
"beta_dpo/beta_used_raw": 0.07817493379116058,
"beta_dpo/gap_mean": 7.974575519561768,
"beta_dpo/gap_std": 16.128808975219727,
"beta_dpo/loss_margin_mean": 8.648720741271973,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.72713529856387,
"grad_norm": 33.76292419433594,
"learning_rate": 1.0606484367268906e-07,
"logits/chosen": 1.3945153951644897,
"logits/rejected": 1.3756455183029175,
"loss": 1.178,
"step": 481
},
{
"beta_dpo/beta": 0.017980381846427917,
"beta_dpo/beta_margin_grad_mean": -0.47610631585121155,
"beta_dpo/beta_margin_grad_std": 0.11001280695199966,
"beta_dpo/beta_margin_mean": 0.10760781168937683,
"beta_dpo/beta_margin_std": 0.4911026358604431,
"beta_dpo/beta_used": 0.017980381846427917,
"beta_dpo/beta_used_raw": 0.012024441733956337,
"beta_dpo/gap_mean": 8.00039005279541,
"beta_dpo/gap_std": 16.554798126220703,
"beta_dpo/loss_margin_mean": 8.078429222106934,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7286470143613001,
"grad_norm": 8.63095474243164,
"learning_rate": 1.0498577260720048e-07,
"logits/chosen": 1.307666301727295,
"logits/rejected": 1.1709094047546387,
"loss": 1.2931,
"step": 482
},
{
"beta_dpo/beta": 0.1698511689901352,
"beta_dpo/beta_margin_grad_mean": -0.28366097807884216,
"beta_dpo/beta_margin_grad_std": 0.26790836453437805,
"beta_dpo/beta_margin_mean": 1.9325002431869507,
"beta_dpo/beta_margin_std": 2.926262617111206,
"beta_dpo/beta_used": 0.1698511689901352,
"beta_dpo/beta_used_raw": 0.1698511689901352,
"beta_dpo/gap_mean": 8.486933708190918,
"beta_dpo/gap_std": 16.541275024414062,
"beta_dpo/loss_margin_mean": 10.961893081665039,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7301587301587301,
"grad_norm": 35.92266082763672,
"learning_rate": 1.0391075790138232e-07,
"logits/chosen": 1.5374231338500977,
"logits/rejected": 1.3803998231887817,
"loss": 0.8204,
"step": 483
},
{
"beta_dpo/beta": 0.08772915601730347,
"beta_dpo/beta_margin_grad_mean": -0.3965490758419037,
"beta_dpo/beta_margin_grad_std": 0.22108224034309387,
"beta_dpo/beta_margin_mean": 0.635413646697998,
"beta_dpo/beta_margin_std": 1.6489046812057495,
"beta_dpo/beta_used": 0.08772915601730347,
"beta_dpo/beta_used_raw": 0.08772915601730347,
"beta_dpo/gap_mean": 8.550213813781738,
"beta_dpo/gap_std": 16.0025691986084,
"beta_dpo/loss_margin_mean": 8.223631858825684,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7316704459561603,
"grad_norm": 27.35234832763672,
"learning_rate": 1.0283982962570681e-07,
"logits/chosen": 1.3222270011901855,
"logits/rejected": 1.3129990100860596,
"loss": 1.0035,
"step": 484
},
{
"beta_dpo/beta": 0.1828226000070572,
"beta_dpo/beta_margin_grad_mean": -0.3307192921638489,
"beta_dpo/beta_margin_grad_std": 0.23482932150363922,
"beta_dpo/beta_margin_mean": 2.4338417053222656,
"beta_dpo/beta_margin_std": 4.331328868865967,
"beta_dpo/beta_used": 0.1828226000070572,
"beta_dpo/beta_used_raw": 0.129967600107193,
"beta_dpo/gap_mean": 8.83953857421875,
"beta_dpo/gap_std": 15.51352596282959,
"beta_dpo/loss_margin_mean": 8.875746726989746,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7331821617535903,
"grad_norm": 40.624786376953125,
"learning_rate": 1.0177301773633992e-07,
"logits/chosen": 1.3951783180236816,
"logits/rejected": 1.3369786739349365,
"loss": 0.8641,
"step": 485
},
{
"beta_dpo/beta": 0.17471832036972046,
"beta_dpo/beta_margin_grad_mean": -0.39315420389175415,
"beta_dpo/beta_margin_grad_std": 0.2959369421005249,
"beta_dpo/beta_margin_mean": 1.8131951093673706,
"beta_dpo/beta_margin_std": 5.117852687835693,
"beta_dpo/beta_used": 0.17471832036972046,
"beta_dpo/beta_used_raw": 0.16305719316005707,
"beta_dpo/gap_mean": 8.485563278198242,
"beta_dpo/gap_std": 15.768863677978516,
"beta_dpo/loss_margin_mean": 8.391579627990723,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7346938775510204,
"grad_norm": 65.43634796142578,
"learning_rate": 1.007103520743035e-07,
"logits/chosen": 1.2871387004852295,
"logits/rejected": 1.1449122428894043,
"loss": 1.2522,
"step": 486
},
{
"beta_dpo/beta": 0.1086234524846077,
"beta_dpo/beta_margin_grad_mean": -0.40604156255722046,
"beta_dpo/beta_margin_grad_std": 0.25962209701538086,
"beta_dpo/beta_margin_mean": 0.7545509934425354,
"beta_dpo/beta_margin_std": 2.2605080604553223,
"beta_dpo/beta_used": 0.1086234524846077,
"beta_dpo/beta_used_raw": 0.1086234524846077,
"beta_dpo/gap_mean": 8.383829116821289,
"beta_dpo/gap_std": 15.744401931762695,
"beta_dpo/loss_margin_mean": 7.316266059875488,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7362055933484505,
"grad_norm": 33.63584899902344,
"learning_rate": 9.965186236464046e-08,
"logits/chosen": 1.435591459274292,
"logits/rejected": 1.3444766998291016,
"loss": 1.0565,
"step": 487
},
{
"beta_dpo/beta": 0.18012477457523346,
"beta_dpo/beta_margin_grad_mean": -0.3828466832637787,
"beta_dpo/beta_margin_grad_std": 0.2732307016849518,
"beta_dpo/beta_margin_mean": 2.27314829826355,
"beta_dpo/beta_margin_std": 4.74143123626709,
"beta_dpo/beta_used": 0.18012477457523346,
"beta_dpo/beta_used_raw": 0.1745169460773468,
"beta_dpo/gap_mean": 8.307104110717773,
"beta_dpo/gap_std": 15.859813690185547,
"beta_dpo/loss_margin_mean": 9.146766662597656,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7377173091458806,
"grad_norm": 63.05045700073242,
"learning_rate": 9.859757821558337e-08,
"logits/chosen": 0.9542595744132996,
"logits/rejected": 0.8827604055404663,
"loss": 1.2242,
"step": 488
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49902424216270447,
"beta_dpo/beta_margin_grad_std": 0.0032371412962675095,
"beta_dpo/beta_margin_mean": 0.003903293749317527,
"beta_dpo/beta_margin_std": 0.012949378229677677,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.10531962662935257,
"beta_dpo/gap_mean": 7.896786212921143,
"beta_dpo/gap_std": 15.445693969726562,
"beta_dpo/loss_margin_mean": 3.9032936096191406,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7392290249433107,
"grad_norm": 0.40414562821388245,
"learning_rate": 9.754752911772615e-08,
"logits/chosen": 1.4719191789627075,
"logits/rejected": 1.3779878616333008,
"loss": 1.3819,
"step": 489
},
{
"beta_dpo/beta": 0.29670846462249756,
"beta_dpo/beta_margin_grad_mean": -0.29541918635368347,
"beta_dpo/beta_margin_grad_std": 0.3639431297779083,
"beta_dpo/beta_margin_mean": 2.9408721923828125,
"beta_dpo/beta_margin_std": 5.338799953460693,
"beta_dpo/beta_used": 0.29670846462249756,
"beta_dpo/beta_used_raw": 0.29670846462249756,
"beta_dpo/gap_mean": 8.032998085021973,
"beta_dpo/gap_std": 15.67213249206543,
"beta_dpo/loss_margin_mean": 10.069779396057129,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7407407407407407,
"grad_norm": 69.37432861328125,
"learning_rate": 9.650174444319956e-08,
"logits/chosen": 1.4777637720108032,
"logits/rejected": 1.4448156356811523,
"loss": 1.1275,
"step": 490
},
{
"beta_dpo/beta": 0.10166777670383453,
"beta_dpo/beta_margin_grad_mean": -0.36870265007019043,
"beta_dpo/beta_margin_grad_std": 0.2362639605998993,
"beta_dpo/beta_margin_mean": 1.1705293655395508,
"beta_dpo/beta_margin_std": 2.1599667072296143,
"beta_dpo/beta_used": 0.10166777670383453,
"beta_dpo/beta_used_raw": -0.012001670897006989,
"beta_dpo/gap_mean": 8.308603286743164,
"beta_dpo/gap_std": 15.477779388427734,
"beta_dpo/loss_margin_mean": 8.63475227355957,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7422524565381708,
"grad_norm": 39.177001953125,
"learning_rate": 9.546025344484868e-08,
"logits/chosen": 1.3601679801940918,
"logits/rejected": 1.2689048051834106,
"loss": 1.1337,
"step": 491
},
{
"beta_dpo/beta": 0.09824459999799728,
"beta_dpo/beta_margin_grad_mean": -0.39903581142425537,
"beta_dpo/beta_margin_grad_std": 0.23087172210216522,
"beta_dpo/beta_margin_mean": 0.777897298336029,
"beta_dpo/beta_margin_std": 2.0594301223754883,
"beta_dpo/beta_used": 0.09824459999799728,
"beta_dpo/beta_used_raw": 0.02383984625339508,
"beta_dpo/gap_mean": 7.6909990310668945,
"beta_dpo/gap_std": 15.357696533203125,
"beta_dpo/loss_margin_mean": 5.5825066566467285,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7437641723356009,
"grad_norm": 30.07907485961914,
"learning_rate": 9.442308525541589e-08,
"logits/chosen": 1.2555835247039795,
"logits/rejected": 1.1515380144119263,
"loss": 1.067,
"step": 492
},
{
"beta_dpo/beta": 0.21834628283977509,
"beta_dpo/beta_margin_grad_mean": -0.3189516067504883,
"beta_dpo/beta_margin_grad_std": 0.3405011296272278,
"beta_dpo/beta_margin_mean": 2.1703264713287354,
"beta_dpo/beta_margin_std": 4.150738716125488,
"beta_dpo/beta_used": 0.21834628283977509,
"beta_dpo/beta_used_raw": 0.21834628283977509,
"beta_dpo/gap_mean": 7.796796798706055,
"beta_dpo/gap_std": 15.415250778198242,
"beta_dpo/loss_margin_mean": 9.13962459564209,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.745275888133031,
"grad_norm": 64.61181640625,
"learning_rate": 9.339026888672468e-08,
"logits/chosen": 1.3729231357574463,
"logits/rejected": 1.2633564472198486,
"loss": 1.142,
"step": 493
},
{
"beta_dpo/beta": 0.07543282955884933,
"beta_dpo/beta_margin_grad_mean": -0.4118766486644745,
"beta_dpo/beta_margin_grad_std": 0.23261648416519165,
"beta_dpo/beta_margin_mean": 0.5448687076568604,
"beta_dpo/beta_margin_std": 1.7095155715942383,
"beta_dpo/beta_used": 0.07543282955884933,
"beta_dpo/beta_used_raw": 0.07543282955884933,
"beta_dpo/gap_mean": 8.163684844970703,
"beta_dpo/gap_std": 15.739279747009277,
"beta_dpo/loss_margin_mean": 8.561339378356934,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7467876039304611,
"grad_norm": 30.267333984375,
"learning_rate": 9.236183322886945e-08,
"logits/chosen": 1.2366876602172852,
"logits/rejected": 1.2264430522918701,
"loss": 1.2012,
"step": 494
},
{
"beta_dpo/beta": 0.09734344482421875,
"beta_dpo/beta_margin_grad_mean": -0.3877275884151459,
"beta_dpo/beta_margin_grad_std": 0.22462578117847443,
"beta_dpo/beta_margin_mean": 0.8795979619026184,
"beta_dpo/beta_margin_std": 1.8222239017486572,
"beta_dpo/beta_used": 0.09734344482421875,
"beta_dpo/beta_used_raw": 0.03870324790477753,
"beta_dpo/gap_mean": 7.832492828369141,
"beta_dpo/gap_std": 15.831649780273438,
"beta_dpo/loss_margin_mean": 6.959440231323242,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7482993197278912,
"grad_norm": 39.15364456176758,
"learning_rate": 9.133780704940594e-08,
"logits/chosen": 1.3102976083755493,
"logits/rejected": 1.2166869640350342,
"loss": 1.1147,
"step": 495
},
{
"beta_dpo/beta": 0.04615124687552452,
"beta_dpo/beta_margin_grad_mean": -0.4308703541755676,
"beta_dpo/beta_margin_grad_std": 0.177626371383667,
"beta_dpo/beta_margin_mean": 0.38025563955307007,
"beta_dpo/beta_margin_std": 0.9773518443107605,
"beta_dpo/beta_used": 0.04615124687552452,
"beta_dpo/beta_used_raw": 0.04151741415262222,
"beta_dpo/gap_mean": 8.264655113220215,
"beta_dpo/gap_std": 15.770124435424805,
"beta_dpo/loss_margin_mean": 9.904504776000977,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7498110355253212,
"grad_norm": 19.862531661987305,
"learning_rate": 9.031821899254797e-08,
"logits/chosen": 1.567601203918457,
"logits/rejected": 1.4194416999816895,
"loss": 1.1998,
"step": 496
},
{
"beta_dpo/beta": 0.1375540941953659,
"beta_dpo/beta_margin_grad_mean": -0.35231631994247437,
"beta_dpo/beta_margin_grad_std": 0.2726176381111145,
"beta_dpo/beta_margin_mean": 1.4071952104568481,
"beta_dpo/beta_margin_std": 3.0741336345672607,
"beta_dpo/beta_used": 0.1375540941953659,
"beta_dpo/beta_used_raw": 0.1375540941953659,
"beta_dpo/gap_mean": 8.478071212768555,
"beta_dpo/gap_std": 15.789005279541016,
"beta_dpo/loss_margin_mean": 9.306314468383789,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7513227513227513,
"grad_norm": 47.519474029541016,
"learning_rate": 8.930309757836516e-08,
"logits/chosen": 1.2274080514907837,
"logits/rejected": 1.1559877395629883,
"loss": 1.205,
"step": 497
},
{
"beta_dpo/beta": 0.14629700779914856,
"beta_dpo/beta_margin_grad_mean": -0.37526050209999084,
"beta_dpo/beta_margin_grad_std": 0.25155338644981384,
"beta_dpo/beta_margin_mean": 1.5782233476638794,
"beta_dpo/beta_margin_std": 3.1664230823516846,
"beta_dpo/beta_used": 0.14629700779914856,
"beta_dpo/beta_used_raw": 0.12825556099414825,
"beta_dpo/gap_mean": 8.519954681396484,
"beta_dpo/gap_std": 15.585509300231934,
"beta_dpo/loss_margin_mean": 9.394691467285156,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7528344671201814,
"grad_norm": 65.0245132446289,
"learning_rate": 8.829247120198563e-08,
"logits/chosen": 1.311582088470459,
"logits/rejected": 1.2794764041900635,
"loss": 1.0241,
"step": 498
},
{
"beta_dpo/beta": 0.19346806406974792,
"beta_dpo/beta_margin_grad_mean": -0.40234655141830444,
"beta_dpo/beta_margin_grad_std": 0.31278374791145325,
"beta_dpo/beta_margin_mean": 1.59882390499115,
"beta_dpo/beta_margin_std": 5.184474945068359,
"beta_dpo/beta_used": 0.19346806406974792,
"beta_dpo/beta_used_raw": 0.18081435561180115,
"beta_dpo/gap_mean": 8.605300903320312,
"beta_dpo/gap_std": 15.78538990020752,
"beta_dpo/loss_margin_mean": 8.464212417602539,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7543461829176115,
"grad_norm": 50.980735778808594,
"learning_rate": 8.728636813280163e-08,
"logits/chosen": 1.264394998550415,
"logits/rejected": 1.1896038055419922,
"loss": 1.447,
"step": 499
},
{
"beta_dpo/beta": 0.06631321460008621,
"beta_dpo/beta_margin_grad_mean": -0.41595977544784546,
"beta_dpo/beta_margin_grad_std": 0.21724657714366913,
"beta_dpo/beta_margin_mean": 0.5961994528770447,
"beta_dpo/beta_margin_std": 1.4269115924835205,
"beta_dpo/beta_used": 0.06631321460008621,
"beta_dpo/beta_used_raw": 0.06631321460008621,
"beta_dpo/gap_mean": 8.392349243164062,
"beta_dpo/gap_std": 15.650519371032715,
"beta_dpo/loss_margin_mean": 7.612263202667236,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7558578987150416,
"grad_norm": 26.242412567138672,
"learning_rate": 8.628481651367875e-08,
"logits/chosen": 1.347449541091919,
"logits/rejected": 1.3151437044143677,
"loss": 1.2301,
"step": 500
},
{
"epoch": 0.7558578987150416,
"eval_beta_dpo/beta": 0.12900032103061676,
"eval_beta_dpo/beta_margin_grad_mean": -0.3624914288520813,
"eval_beta_dpo/beta_margin_grad_std": 0.20973293483257294,
"eval_beta_dpo/beta_margin_mean": 1.3653007745742798,
"eval_beta_dpo/beta_margin_std": 2.0186424255371094,
"eval_beta_dpo/beta_used": 0.12900032103061676,
"eval_beta_dpo/beta_used_raw": 0.11448737978935242,
"eval_beta_dpo/gap_mean": 8.435112953186035,
"eval_beta_dpo/gap_std": 15.631720542907715,
"eval_beta_dpo/loss_margin_mean": 8.676569938659668,
"eval_beta_dpo/mask_keep_frac": 1.0,
"eval_logits/chosen": 1.2194130420684814,
"eval_logits/rejected": 1.1512391567230225,
"eval_loss": 0.6310787796974182,
"eval_runtime": 43.5066,
"eval_samples_per_second": 52.934,
"eval_steps_per_second": 1.655,
"step": 500
},
{
"beta_dpo/beta": 0.0842582955956459,
"beta_dpo/beta_margin_grad_mean": -0.40922731161117554,
"beta_dpo/beta_margin_grad_std": 0.2037898600101471,
"beta_dpo/beta_margin_mean": 0.7561125755310059,
"beta_dpo/beta_margin_std": 1.7290011644363403,
"beta_dpo/beta_used": 0.0842582955956459,
"beta_dpo/beta_used_raw": 0.0739816352725029,
"beta_dpo/gap_mean": 8.32042121887207,
"beta_dpo/gap_std": 15.216630935668945,
"beta_dpo/loss_margin_mean": 7.961922645568848,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7573696145124716,
"grad_norm": 26.170494079589844,
"learning_rate": 8.528784436016878e-08,
"logits/chosen": 1.1607296466827393,
"logits/rejected": 1.1347827911376953,
"loss": 1.1527,
"step": 501
},
{
"beta_dpo/beta": 0.03724071756005287,
"beta_dpo/beta_margin_grad_mean": -0.4399970769882202,
"beta_dpo/beta_margin_grad_std": 0.13782690465450287,
"beta_dpo/beta_margin_mean": 0.2895679175853729,
"beta_dpo/beta_margin_std": 0.6904258131980896,
"beta_dpo/beta_used": 0.03724071756005287,
"beta_dpo/beta_used_raw": 0.012635238468647003,
"beta_dpo/gap_mean": 8.11561393737793,
"beta_dpo/gap_std": 14.806890487670898,
"beta_dpo/loss_margin_mean": 6.198487758636475,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7588813303099018,
"grad_norm": 19.033384323120117,
"learning_rate": 8.4295479559726e-08,
"logits/chosen": 1.2781716585159302,
"logits/rejected": 1.2537305355072021,
"loss": 1.1892,
"step": 502
},
{
"beta_dpo/beta": 0.26611024141311646,
"beta_dpo/beta_margin_grad_mean": -0.2744055390357971,
"beta_dpo/beta_margin_grad_std": 0.3376629650592804,
"beta_dpo/beta_margin_mean": 2.5834662914276123,
"beta_dpo/beta_margin_std": 4.09039306640625,
"beta_dpo/beta_used": 0.26611024141311646,
"beta_dpo/beta_used_raw": 0.26611024141311646,
"beta_dpo/gap_mean": 8.190786361694336,
"beta_dpo/gap_std": 14.82516860961914,
"beta_dpo/loss_margin_mean": 9.649856567382812,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7603930461073318,
"grad_norm": 57.269222259521484,
"learning_rate": 8.330774987092712e-08,
"logits/chosen": 1.3074579238891602,
"logits/rejected": 1.3287543058395386,
"loss": 0.9177,
"step": 503
},
{
"beta_dpo/beta": 0.11062437295913696,
"beta_dpo/beta_margin_grad_mean": -0.337758868932724,
"beta_dpo/beta_margin_grad_std": 0.22527188062667847,
"beta_dpo/beta_margin_mean": 1.5635826587677002,
"beta_dpo/beta_margin_std": 2.6599931716918945,
"beta_dpo/beta_used": 0.11062437295913696,
"beta_dpo/beta_used_raw": 0.084391288459301,
"beta_dpo/gap_mean": 8.561979293823242,
"beta_dpo/gap_std": 14.650800704956055,
"beta_dpo/loss_margin_mean": 11.275338172912598,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7619047619047619,
"grad_norm": 22.946237564086914,
"learning_rate": 8.232468292269479e-08,
"logits/chosen": 1.335453748703003,
"logits/rejected": 1.306289792060852,
"loss": 0.9216,
"step": 504
},
{
"beta_dpo/beta": 0.07179925590753555,
"beta_dpo/beta_margin_grad_mean": -0.41799870133399963,
"beta_dpo/beta_margin_grad_std": 0.24787545204162598,
"beta_dpo/beta_margin_mean": 0.6309289932250977,
"beta_dpo/beta_margin_std": 1.9183470010757446,
"beta_dpo/beta_used": 0.07179925590753555,
"beta_dpo/beta_used_raw": 0.06938499212265015,
"beta_dpo/gap_mean": 8.652692794799805,
"beta_dpo/gap_std": 15.009008407592773,
"beta_dpo/loss_margin_mean": 6.836058616638184,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.763416477702192,
"grad_norm": 28.69261932373047,
"learning_rate": 8.134630621352483e-08,
"logits/chosen": 1.0778157711029053,
"logits/rejected": 0.9853509664535522,
"loss": 1.2664,
"step": 505
},
{
"beta_dpo/beta": 0.1097840741276741,
"beta_dpo/beta_margin_grad_mean": -0.39366090297698975,
"beta_dpo/beta_margin_grad_std": 0.2954460084438324,
"beta_dpo/beta_margin_mean": 0.6261573433876038,
"beta_dpo/beta_margin_std": 1.9519262313842773,
"beta_dpo/beta_used": 0.1097840741276741,
"beta_dpo/beta_used_raw": 0.1097840741276741,
"beta_dpo/gap_mean": 8.113224983215332,
"beta_dpo/gap_std": 15.423563003540039,
"beta_dpo/loss_margin_mean": 5.594672203063965,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.764928193499622,
"grad_norm": 29.990312576293945,
"learning_rate": 8.037264711071698e-08,
"logits/chosen": 1.100189447402954,
"logits/rejected": 1.08306086063385,
"loss": 1.1331,
"step": 506
},
{
"beta_dpo/beta": 0.055617570877075195,
"beta_dpo/beta_margin_grad_mean": -0.4389042258262634,
"beta_dpo/beta_margin_grad_std": 0.19821825623512268,
"beta_dpo/beta_margin_mean": 0.4292410612106323,
"beta_dpo/beta_margin_std": 1.3530104160308838,
"beta_dpo/beta_used": 0.055617570877075195,
"beta_dpo/beta_used_raw": 0.055617570877075195,
"beta_dpo/gap_mean": 7.837147235870361,
"beta_dpo/gap_std": 15.702282905578613,
"beta_dpo/loss_margin_mean": 7.381948947906494,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7664399092970522,
"grad_norm": 19.953712463378906,
"learning_rate": 7.940373284960933e-08,
"logits/chosen": 1.2273082733154297,
"logits/rejected": 1.1556060314178467,
"loss": 1.2762,
"step": 507
},
{
"beta_dpo/beta": 0.12468338012695312,
"beta_dpo/beta_margin_grad_mean": -0.3594365417957306,
"beta_dpo/beta_margin_grad_std": 0.25680065155029297,
"beta_dpo/beta_margin_mean": 1.5802284479141235,
"beta_dpo/beta_margin_std": 3.037984609603882,
"beta_dpo/beta_used": 0.12468338012695312,
"beta_dpo/beta_used_raw": 0.019509881734848022,
"beta_dpo/gap_mean": 7.87746524810791,
"beta_dpo/gap_std": 15.627901077270508,
"beta_dpo/loss_margin_mean": 9.403909683227539,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7679516250944822,
"grad_norm": 52.756736755371094,
"learning_rate": 7.843959053281663e-08,
"logits/chosen": 1.192077398300171,
"logits/rejected": 1.028613805770874,
"loss": 1.2675,
"step": 508
},
{
"beta_dpo/beta": 0.041742824018001556,
"beta_dpo/beta_margin_grad_mean": -0.4245273470878601,
"beta_dpo/beta_margin_grad_std": 0.17522205412387848,
"beta_dpo/beta_margin_mean": 0.370586633682251,
"beta_dpo/beta_margin_std": 0.8615016341209412,
"beta_dpo/beta_used": 0.041742824018001556,
"beta_dpo/beta_used_raw": 0.011772872880101204,
"beta_dpo/gap_mean": 8.242198944091797,
"beta_dpo/gap_std": 15.512893676757812,
"beta_dpo/loss_margin_mean": 8.89169979095459,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7694633408919124,
"grad_norm": 17.92835235595703,
"learning_rate": 7.748024712947204e-08,
"logits/chosen": 1.2415645122528076,
"logits/rejected": 1.2159892320632935,
"loss": 1.2149,
"step": 509
},
{
"beta_dpo/beta": 0.08406226336956024,
"beta_dpo/beta_margin_grad_mean": -0.3821350336074829,
"beta_dpo/beta_margin_grad_std": 0.2446945309638977,
"beta_dpo/beta_margin_mean": 1.0495874881744385,
"beta_dpo/beta_margin_std": 2.1299333572387695,
"beta_dpo/beta_used": 0.08406226336956024,
"beta_dpo/beta_used_raw": 0.06441254913806915,
"beta_dpo/gap_mean": 8.698486328125,
"beta_dpo/gap_std": 15.522483825683594,
"beta_dpo/loss_margin_mean": 10.599325180053711,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7709750566893424,
"grad_norm": 35.02284622192383,
"learning_rate": 7.652572947447272e-08,
"logits/chosen": 1.340779185295105,
"logits/rejected": 1.2162147760391235,
"loss": 1.1841,
"step": 510
},
{
"beta_dpo/beta": 0.10140527784824371,
"beta_dpo/beta_margin_grad_mean": -0.3813478350639343,
"beta_dpo/beta_margin_grad_std": 0.24966783821582794,
"beta_dpo/beta_margin_mean": 1.2869607210159302,
"beta_dpo/beta_margin_std": 2.6547703742980957,
"beta_dpo/beta_used": 0.10140527784824371,
"beta_dpo/beta_used_raw": 0.09635543823242188,
"beta_dpo/gap_mean": 9.028369903564453,
"beta_dpo/gap_std": 15.63941478729248,
"beta_dpo/loss_margin_mean": 11.385443687438965,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7724867724867724,
"grad_norm": 32.982994079589844,
"learning_rate": 7.557606426772961e-08,
"logits/chosen": 1.0060919523239136,
"logits/rejected": 0.9972001910209656,
"loss": 1.1686,
"step": 511
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4980872869491577,
"beta_dpo/beta_margin_grad_std": 0.003784729167819023,
"beta_dpo/beta_margin_mean": 0.0076514980755746365,
"beta_dpo/beta_margin_std": 0.015140078961849213,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.15243175625801086,
"beta_dpo/gap_mean": 9.063308715820312,
"beta_dpo/gap_std": 15.602668762207031,
"beta_dpo/loss_margin_mean": 7.6514973640441895,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7739984882842026,
"grad_norm": 0.38143348693847656,
"learning_rate": 7.463127807341966e-08,
"logits/chosen": 1.1713460683822632,
"logits/rejected": 1.083141803741455,
"loss": 1.3815,
"step": 512
},
{
"beta_dpo/beta": 0.029492665082216263,
"beta_dpo/beta_margin_grad_mean": -0.4373173117637634,
"beta_dpo/beta_margin_grad_std": 0.09860417991876602,
"beta_dpo/beta_margin_mean": 0.2670486569404602,
"beta_dpo/beta_margin_std": 0.4258429706096649,
"beta_dpo/beta_used": 0.029492665082216263,
"beta_dpo/beta_used_raw": 0.029492665082216263,
"beta_dpo/gap_mean": 9.020488739013672,
"beta_dpo/gap_std": 15.474786758422852,
"beta_dpo/loss_margin_mean": 8.947519302368164,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7755102040816326,
"grad_norm": 8.190389633178711,
"learning_rate": 7.369139731924401e-08,
"logits/chosen": 1.411609411239624,
"logits/rejected": 1.3289296627044678,
"loss": 1.2034,
"step": 513
},
{
"beta_dpo/beta": 0.18778222799301147,
"beta_dpo/beta_margin_grad_mean": -0.29142025113105774,
"beta_dpo/beta_margin_grad_std": 0.2884121537208557,
"beta_dpo/beta_margin_mean": 1.84999680519104,
"beta_dpo/beta_margin_std": 2.9089887142181396,
"beta_dpo/beta_used": 0.18778222799301147,
"beta_dpo/beta_used_raw": 0.18778222799301147,
"beta_dpo/gap_mean": 9.031063079833984,
"beta_dpo/gap_std": 15.233624458312988,
"beta_dpo/loss_margin_mean": 9.787365913391113,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7770219198790628,
"grad_norm": 32.78883743286133,
"learning_rate": 7.275644829568747e-08,
"logits/chosen": 1.2265822887420654,
"logits/rejected": 1.1920336484909058,
"loss": 0.7064,
"step": 514
},
{
"beta_dpo/beta": 0.09070698916912079,
"beta_dpo/beta_margin_grad_mean": -0.41331547498703003,
"beta_dpo/beta_margin_grad_std": 0.24861948192119598,
"beta_dpo/beta_margin_mean": 0.8262215852737427,
"beta_dpo/beta_margin_std": 2.2216925621032715,
"beta_dpo/beta_used": 0.09070698916912079,
"beta_dpo/beta_used_raw": 0.07224002480506897,
"beta_dpo/gap_mean": 8.951355934143066,
"beta_dpo/gap_std": 15.244037628173828,
"beta_dpo/loss_margin_mean": 7.946232318878174,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7785336356764928,
"grad_norm": 29.780132293701172,
"learning_rate": 7.182645715528435e-08,
"logits/chosen": 1.508201241493225,
"logits/rejected": 1.4241424798965454,
"loss": 1.1395,
"step": 515
},
{
"beta_dpo/beta": 0.06425631046295166,
"beta_dpo/beta_margin_grad_mean": -0.40338146686553955,
"beta_dpo/beta_margin_grad_std": 0.18757954239845276,
"beta_dpo/beta_margin_mean": 0.4550299048423767,
"beta_dpo/beta_margin_std": 0.9595562219619751,
"beta_dpo/beta_used": 0.06425631046295166,
"beta_dpo/beta_used_raw": 0.06425631046295166,
"beta_dpo/gap_mean": 8.591944694519043,
"beta_dpo/gap_std": 15.072626113891602,
"beta_dpo/loss_margin_mean": 7.4303460121154785,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.780045351473923,
"grad_norm": 16.75914764404297,
"learning_rate": 7.090144991188568e-08,
"logits/chosen": 1.0630478858947754,
"logits/rejected": 0.9949425458908081,
"loss": 1.048,
"step": 516
},
{
"beta_dpo/beta": 0.029759714379906654,
"beta_dpo/beta_margin_grad_mean": -0.4609003961086273,
"beta_dpo/beta_margin_grad_std": 0.12479053437709808,
"beta_dpo/beta_margin_mean": 0.18986453115940094,
"beta_dpo/beta_margin_std": 0.6050887107849121,
"beta_dpo/beta_used": 0.029759714379906654,
"beta_dpo/beta_used_raw": -0.029511921107769012,
"beta_dpo/gap_mean": 8.238603591918945,
"beta_dpo/gap_std": 15.265298843383789,
"beta_dpo/loss_margin_mean": 6.269947528839111,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.781557067271353,
"grad_norm": 10.626750946044922,
"learning_rate": 6.998145243993284e-08,
"logits/chosen": 1.2810413837432861,
"logits/rejected": 1.2711362838745117,
"loss": 1.2496,
"step": 517
},
{
"beta_dpo/beta": 0.05780534818768501,
"beta_dpo/beta_margin_grad_mean": -0.437270849943161,
"beta_dpo/beta_margin_grad_std": 0.20933924615383148,
"beta_dpo/beta_margin_mean": 0.46789804100990295,
"beta_dpo/beta_margin_std": 1.412877082824707,
"beta_dpo/beta_used": 0.05780534818768501,
"beta_dpo/beta_used_raw": 0.03885362669825554,
"beta_dpo/gap_mean": 7.883532524108887,
"beta_dpo/gap_std": 15.234363555908203,
"beta_dpo/loss_margin_mean": 6.780441761016846,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.783068783068783,
"grad_norm": 18.98142433166504,
"learning_rate": 6.906649047373245e-08,
"logits/chosen": 1.0652557611465454,
"logits/rejected": 0.9838038682937622,
"loss": 1.2426,
"step": 518
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4986930787563324,
"beta_dpo/beta_margin_grad_std": 0.004700750112533569,
"beta_dpo/beta_margin_mean": 0.005228013265877962,
"beta_dpo/beta_margin_std": 0.0188044011592865,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.10169404745101929,
"beta_dpo/gap_mean": 7.39609432220459,
"beta_dpo/gap_std": 15.862621307373047,
"beta_dpo/loss_margin_mean": 5.228013038635254,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7845804988662132,
"grad_norm": 0.3468739688396454,
"learning_rate": 6.815658960673781e-08,
"logits/chosen": 1.4341715574264526,
"logits/rejected": 1.3577253818511963,
"loss": 1.3823,
"step": 519
},
{
"beta_dpo/beta": 0.11534042656421661,
"beta_dpo/beta_margin_grad_mean": -0.3637125492095947,
"beta_dpo/beta_margin_grad_std": 0.23695090413093567,
"beta_dpo/beta_margin_mean": 1.313083291053772,
"beta_dpo/beta_margin_std": 2.40010404586792,
"beta_dpo/beta_used": 0.11534042656421661,
"beta_dpo/beta_used_raw": 0.013542748987674713,
"beta_dpo/gap_mean": 7.62299919128418,
"beta_dpo/gap_std": 15.442781448364258,
"beta_dpo/loss_margin_mean": 7.702384948730469,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7860922146636432,
"grad_norm": 27.789459228515625,
"learning_rate": 6.725177529083209e-08,
"logits/chosen": 1.3084077835083008,
"logits/rejected": 1.2144858837127686,
"loss": 1.033,
"step": 520
},
{
"beta_dpo/beta": 0.10655572265386581,
"beta_dpo/beta_margin_grad_mean": -0.35950157046318054,
"beta_dpo/beta_margin_grad_std": 0.2488071769475937,
"beta_dpo/beta_margin_mean": 1.0529520511627197,
"beta_dpo/beta_margin_std": 1.9023741483688354,
"beta_dpo/beta_used": 0.10655572265386581,
"beta_dpo/beta_used_raw": 0.10655572265386581,
"beta_dpo/gap_mean": 7.508369445800781,
"beta_dpo/gap_std": 15.275403022766113,
"beta_dpo/loss_margin_mean": 8.58330249786377,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7876039304610734,
"grad_norm": 26.136470794677734,
"learning_rate": 6.63520728356167e-08,
"logits/chosen": 1.0999151468276978,
"logits/rejected": 0.9847517013549805,
"loss": 1.0651,
"step": 521
},
{
"beta_dpo/beta": 0.2019219547510147,
"beta_dpo/beta_margin_grad_mean": -0.3990466892719269,
"beta_dpo/beta_margin_grad_std": 0.30489471554756165,
"beta_dpo/beta_margin_mean": 1.3157075643539429,
"beta_dpo/beta_margin_std": 5.186375617980957,
"beta_dpo/beta_used": 0.2019219547510147,
"beta_dpo/beta_used_raw": 0.2019219547510147,
"beta_dpo/gap_mean": 7.4453840255737305,
"beta_dpo/gap_std": 15.706568717956543,
"beta_dpo/loss_margin_mean": 6.2311625480651855,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7891156462585034,
"grad_norm": 60.13499450683594,
"learning_rate": 6.545750740770336e-08,
"logits/chosen": 1.1132246255874634,
"logits/rejected": 1.1097826957702637,
"loss": 1.2254,
"step": 522
},
{
"beta_dpo/beta": 0.13557228446006775,
"beta_dpo/beta_margin_grad_mean": -0.3367370665073395,
"beta_dpo/beta_margin_grad_std": 0.28432610630989075,
"beta_dpo/beta_margin_mean": 1.3112093210220337,
"beta_dpo/beta_margin_std": 2.233006000518799,
"beta_dpo/beta_used": 0.13557228446006775,
"beta_dpo/beta_used_raw": 0.13557228446006775,
"beta_dpo/gap_mean": 7.6764936447143555,
"beta_dpo/gap_std": 15.761053085327148,
"beta_dpo/loss_margin_mean": 8.553004264831543,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7906273620559335,
"grad_norm": 43.26845932006836,
"learning_rate": 6.456810403001012e-08,
"logits/chosen": 1.3522729873657227,
"logits/rejected": 1.2240285873413086,
"loss": 1.0392,
"step": 523
},
{
"beta_dpo/beta": 0.14213837683200836,
"beta_dpo/beta_margin_grad_mean": -0.3865470588207245,
"beta_dpo/beta_margin_grad_std": 0.30028629302978516,
"beta_dpo/beta_margin_mean": 0.9078271985054016,
"beta_dpo/beta_margin_std": 2.4876906871795654,
"beta_dpo/beta_used": 0.14213837683200836,
"beta_dpo/beta_used_raw": 0.14213837683200836,
"beta_dpo/gap_mean": 7.359951972961426,
"beta_dpo/gap_std": 15.630659103393555,
"beta_dpo/loss_margin_mean": 6.1383748054504395,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7921390778533636,
"grad_norm": 57.35697555541992,
"learning_rate": 6.368388758106134e-08,
"logits/chosen": 1.179058313369751,
"logits/rejected": 1.1576694250106812,
"loss": 1.163,
"step": 524
},
{
"beta_dpo/beta": 0.08663511276245117,
"beta_dpo/beta_margin_grad_mean": -0.4115186929702759,
"beta_dpo/beta_margin_grad_std": 0.246219202876091,
"beta_dpo/beta_margin_mean": 0.7762193083763123,
"beta_dpo/beta_margin_std": 1.9916539192199707,
"beta_dpo/beta_used": 0.08663511276245117,
"beta_dpo/beta_used_raw": 0.04566335305571556,
"beta_dpo/gap_mean": 7.272393226623535,
"beta_dpo/gap_std": 15.498950958251953,
"beta_dpo/loss_margin_mean": 6.056159973144531,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7936507936507936,
"grad_norm": 30.81611442565918,
"learning_rate": 6.280488279429185e-08,
"logits/chosen": 1.0387253761291504,
"logits/rejected": 1.0043344497680664,
"loss": 1.1954,
"step": 525
},
{
"beta_dpo/beta": 0.1067892462015152,
"beta_dpo/beta_margin_grad_mean": -0.37539657950401306,
"beta_dpo/beta_margin_grad_std": 0.23800675570964813,
"beta_dpo/beta_margin_mean": 1.3335126638412476,
"beta_dpo/beta_margin_std": 2.6430134773254395,
"beta_dpo/beta_used": 0.1067892462015152,
"beta_dpo/beta_used_raw": 0.0007416233420372009,
"beta_dpo/gap_mean": 7.282289981842041,
"beta_dpo/gap_std": 15.198205947875977,
"beta_dpo/loss_margin_mean": 6.869879245758057,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7951625094482238,
"grad_norm": 30.191267013549805,
"learning_rate": 6.193111425735515e-08,
"logits/chosen": 1.1532905101776123,
"logits/rejected": 1.0733742713928223,
"loss": 1.1578,
"step": 526
},
{
"beta_dpo/beta": 0.0481080487370491,
"beta_dpo/beta_margin_grad_mean": -0.44247305393218994,
"beta_dpo/beta_margin_grad_std": 0.1592341810464859,
"beta_dpo/beta_margin_mean": 0.2580515146255493,
"beta_dpo/beta_margin_std": 0.7128713130950928,
"beta_dpo/beta_used": 0.0481080487370491,
"beta_dpo/beta_used_raw": 0.0481080487370491,
"beta_dpo/gap_mean": 6.703248500823975,
"beta_dpo/gap_std": 14.977252960205078,
"beta_dpo/loss_margin_mean": 5.508336067199707,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7966742252456538,
"grad_norm": 13.756309509277344,
"learning_rate": 6.106260641143546e-08,
"logits/chosen": 1.4220060110092163,
"logits/rejected": 1.2894335985183716,
"loss": 1.2061,
"step": 527
},
{
"beta_dpo/beta": 0.13932912051677704,
"beta_dpo/beta_margin_grad_mean": -0.3513142764568329,
"beta_dpo/beta_margin_grad_std": 0.27392539381980896,
"beta_dpo/beta_margin_mean": 1.1128238439559937,
"beta_dpo/beta_margin_std": 2.790832042694092,
"beta_dpo/beta_used": 0.13932912051677704,
"beta_dpo/beta_used_raw": 0.13932912051677704,
"beta_dpo/gap_mean": 6.893251419067383,
"beta_dpo/gap_std": 15.283125877380371,
"beta_dpo/loss_margin_mean": 8.12575912475586,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7981859410430839,
"grad_norm": 33.40256118774414,
"learning_rate": 6.019938355056422e-08,
"logits/chosen": 1.4654052257537842,
"logits/rejected": 1.361560583114624,
"loss": 1.0162,
"step": 528
},
{
"beta_dpo/beta": 0.3706626892089844,
"beta_dpo/beta_margin_grad_mean": -0.24648523330688477,
"beta_dpo/beta_margin_grad_std": 0.2973049283027649,
"beta_dpo/beta_margin_mean": 6.077200412750244,
"beta_dpo/beta_margin_std": 8.564191818237305,
"beta_dpo/beta_used": 0.3706626892089844,
"beta_dpo/beta_used_raw": 0.3706626892089844,
"beta_dpo/gap_mean": 8.090949058532715,
"beta_dpo/gap_std": 15.185094833374023,
"beta_dpo/loss_margin_mean": 13.548951148986816,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.799697656840514,
"grad_norm": 73.29319763183594,
"learning_rate": 5.934146982094049e-08,
"logits/chosen": 1.0952523946762085,
"logits/rejected": 1.0364768505096436,
"loss": 0.8932,
"step": 529
},
{
"beta_dpo/beta": 0.16738095879554749,
"beta_dpo/beta_margin_grad_mean": -0.32800406217575073,
"beta_dpo/beta_margin_grad_std": 0.3087153732776642,
"beta_dpo/beta_margin_mean": 1.573071002960205,
"beta_dpo/beta_margin_std": 3.1769614219665527,
"beta_dpo/beta_used": 0.16738095879554749,
"beta_dpo/beta_used_raw": 0.16738095879554749,
"beta_dpo/gap_mean": 8.358198165893555,
"beta_dpo/gap_std": 15.306008338928223,
"beta_dpo/loss_margin_mean": 9.372478485107422,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8012093726379441,
"grad_norm": 50.17977523803711,
"learning_rate": 5.848888922025552e-08,
"logits/chosen": 0.977916955947876,
"logits/rejected": 0.9343423843383789,
"loss": 1.0008,
"step": 530
},
{
"beta_dpo/beta": 0.12758412957191467,
"beta_dpo/beta_margin_grad_mean": -0.36042869091033936,
"beta_dpo/beta_margin_grad_std": 0.2259923368692398,
"beta_dpo/beta_margin_mean": 1.3581695556640625,
"beta_dpo/beta_margin_std": 2.5714547634124756,
"beta_dpo/beta_used": 0.12758412957191467,
"beta_dpo/beta_used_raw": 0.1111588403582573,
"beta_dpo/gap_mean": 8.160855293273926,
"beta_dpo/gap_std": 14.826172828674316,
"beta_dpo/loss_margin_mean": 7.639277458190918,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8027210884353742,
"grad_norm": 32.76897048950195,
"learning_rate": 5.7641665597021435e-08,
"logits/chosen": 1.1931384801864624,
"logits/rejected": 1.1271367073059082,
"loss": 0.9711,
"step": 531
},
{
"beta_dpo/beta": 0.09142545610666275,
"beta_dpo/beta_margin_grad_mean": -0.3953922390937805,
"beta_dpo/beta_margin_grad_std": 0.22126024961471558,
"beta_dpo/beta_margin_mean": 0.6120261549949646,
"beta_dpo/beta_margin_std": 1.5054887533187866,
"beta_dpo/beta_used": 0.09142545610666275,
"beta_dpo/beta_used_raw": 0.09142545610666275,
"beta_dpo/gap_mean": 8.395885467529297,
"beta_dpo/gap_std": 14.686279296875,
"beta_dpo/loss_margin_mean": 8.282346725463867,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8042328042328042,
"grad_norm": 27.678478240966797,
"learning_rate": 5.679982264990424e-08,
"logits/chosen": 1.0794322490692139,
"logits/rejected": 1.0217549800872803,
"loss": 1.0608,
"step": 532
},
{
"beta_dpo/beta": 0.08020737767219543,
"beta_dpo/beta_margin_grad_mean": -0.40842196345329285,
"beta_dpo/beta_margin_grad_std": 0.2721438407897949,
"beta_dpo/beta_margin_mean": 0.7550824284553528,
"beta_dpo/beta_margin_std": 2.28764009475708,
"beta_dpo/beta_used": 0.08020737767219543,
"beta_dpo/beta_used_raw": 0.07088775187730789,
"beta_dpo/gap_mean": 8.168783187866211,
"beta_dpo/gap_std": 15.009658813476562,
"beta_dpo/loss_margin_mean": 8.0230712890625,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8057445200302343,
"grad_norm": 32.82867431640625,
"learning_rate": 5.596338392706076e-08,
"logits/chosen": 1.4970355033874512,
"logits/rejected": 1.3700031042099,
"loss": 1.4064,
"step": 533
},
{
"beta_dpo/beta": 0.15440692007541656,
"beta_dpo/beta_margin_grad_mean": -0.36432546377182007,
"beta_dpo/beta_margin_grad_std": 0.27774715423583984,
"beta_dpo/beta_margin_mean": 2.107167959213257,
"beta_dpo/beta_margin_std": 4.183852672576904,
"beta_dpo/beta_used": 0.15440692007541656,
"beta_dpo/beta_used_raw": 0.1260472536087036,
"beta_dpo/gap_mean": 8.621475219726562,
"beta_dpo/gap_std": 15.336591720581055,
"beta_dpo/loss_margin_mean": 9.650552749633789,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8072562358276644,
"grad_norm": 55.175025939941406,
"learning_rate": 5.513237282548033e-08,
"logits/chosen": 1.0398293733596802,
"logits/rejected": 1.02956223487854,
"loss": 1.3208,
"step": 534
},
{
"beta_dpo/beta": 0.17304572463035583,
"beta_dpo/beta_margin_grad_mean": -0.33443713188171387,
"beta_dpo/beta_margin_grad_std": 0.3046601116657257,
"beta_dpo/beta_margin_mean": 1.5655525922775269,
"beta_dpo/beta_margin_std": 2.966926097869873,
"beta_dpo/beta_used": 0.17304572463035583,
"beta_dpo/beta_used_raw": 0.17304572463035583,
"beta_dpo/gap_mean": 8.521682739257812,
"beta_dpo/gap_std": 15.320834159851074,
"beta_dpo/loss_margin_mean": 8.643691062927246,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8087679516250945,
"grad_norm": 40.93467330932617,
"learning_rate": 5.430681259032957e-08,
"logits/chosen": 1.0570318698883057,
"logits/rejected": 0.9800806641578674,
"loss": 0.9346,
"step": 535
},
{
"beta_dpo/beta": 0.16060255467891693,
"beta_dpo/beta_margin_grad_mean": -0.35615992546081543,
"beta_dpo/beta_margin_grad_std": 0.23471446335315704,
"beta_dpo/beta_margin_mean": 1.9467401504516602,
"beta_dpo/beta_margin_std": 3.6517608165740967,
"beta_dpo/beta_used": 0.16060255467891693,
"beta_dpo/beta_used_raw": 0.10881784558296204,
"beta_dpo/gap_mean": 8.824699401855469,
"beta_dpo/gap_std": 15.217926025390625,
"beta_dpo/loss_margin_mean": 10.188438415527344,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8102796674225246,
"grad_norm": 29.208940505981445,
"learning_rate": 5.3486726314303175e-08,
"logits/chosen": 1.1016263961791992,
"logits/rejected": 1.0059990882873535,
"loss": 0.9383,
"step": 536
},
{
"beta_dpo/beta": 0.047818832099437714,
"beta_dpo/beta_margin_grad_mean": -0.43136221170425415,
"beta_dpo/beta_margin_grad_std": 0.16197457909584045,
"beta_dpo/beta_margin_mean": 0.33009785413742065,
"beta_dpo/beta_margin_std": 0.8123959898948669,
"beta_dpo/beta_used": 0.047818832099437714,
"beta_dpo/beta_used_raw": 0.047818832099437714,
"beta_dpo/gap_mean": 8.446852684020996,
"beta_dpo/gap_std": 14.9360933303833,
"beta_dpo/loss_margin_mean": 6.56203556060791,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8117913832199547,
"grad_norm": 14.774935722351074,
"learning_rate": 5.267213693697695e-08,
"logits/chosen": 1.2608040571212769,
"logits/rejected": 1.1432770490646362,
"loss": 1.1518,
"step": 537
},
{
"beta_dpo/beta": 0.05728251487016678,
"beta_dpo/beta_margin_grad_mean": -0.3964690566062927,
"beta_dpo/beta_margin_grad_std": 0.18261270225048065,
"beta_dpo/beta_margin_mean": 0.6189707517623901,
"beta_dpo/beta_margin_std": 1.2403920888900757,
"beta_dpo/beta_used": 0.05728251487016678,
"beta_dpo/beta_used_raw": 0.02719694934785366,
"beta_dpo/gap_mean": 8.421884536743164,
"beta_dpo/gap_std": 14.781656265258789,
"beta_dpo/loss_margin_mean": 9.203042984008789,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8133030990173847,
"grad_norm": 15.689529418945312,
"learning_rate": 5.1863067244167144e-08,
"logits/chosen": 1.07925283908844,
"logits/rejected": 1.0223917961120605,
"loss": 1.0953,
"step": 538
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49804821610450745,
"beta_dpo/beta_margin_grad_std": 0.0030981386080384254,
"beta_dpo/beta_margin_mean": 0.007807582151144743,
"beta_dpo/beta_margin_std": 0.012393561191856861,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.0549732930958271,
"beta_dpo/gap_mean": 8.416460037231445,
"beta_dpo/gap_std": 14.297332763671875,
"beta_dpo/loss_margin_mean": 7.807581424713135,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8148148148148148,
"grad_norm": 0.33566319942474365,
"learning_rate": 5.105953986729195e-08,
"logits/chosen": 1.3011579513549805,
"logits/rejected": 1.134751558303833,
"loss": 1.3805,
"step": 539
},
{
"beta_dpo/beta": 0.2007492184638977,
"beta_dpo/beta_margin_grad_mean": -0.30484333634376526,
"beta_dpo/beta_margin_grad_std": 0.31224846839904785,
"beta_dpo/beta_margin_mean": 1.9463728666305542,
"beta_dpo/beta_margin_std": 3.1974806785583496,
"beta_dpo/beta_used": 0.2007492184638977,
"beta_dpo/beta_used_raw": 0.2007492184638977,
"beta_dpo/gap_mean": 8.602801322937012,
"beta_dpo/gap_std": 14.396449089050293,
"beta_dpo/loss_margin_mean": 9.807937622070312,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8163265306122449,
"grad_norm": 48.30702590942383,
"learning_rate": 5.026157728273966e-08,
"logits/chosen": 1.4509068727493286,
"logits/rejected": 1.3168927431106567,
"loss": 0.9022,
"step": 540
},
{
"beta_dpo/beta": 0.21265891194343567,
"beta_dpo/beta_margin_grad_mean": -0.2759414613246918,
"beta_dpo/beta_margin_grad_std": 0.33086225390434265,
"beta_dpo/beta_margin_mean": 2.2098233699798584,
"beta_dpo/beta_margin_std": 3.124072790145874,
"beta_dpo/beta_used": 0.21265891194343567,
"beta_dpo/beta_used_raw": 0.21265891194343567,
"beta_dpo/gap_mean": 8.943014144897461,
"beta_dpo/gap_std": 14.430560111999512,
"beta_dpo/loss_margin_mean": 10.11587905883789,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.817838246409675,
"grad_norm": 56.282127380371094,
"learning_rate": 4.9469201811239035e-08,
"logits/chosen": 1.3912067413330078,
"logits/rejected": 1.4174320697784424,
"loss": 0.8092,
"step": 541
},
{
"beta_dpo/beta": 0.3174495995044708,
"beta_dpo/beta_margin_grad_mean": -0.2677096426486969,
"beta_dpo/beta_margin_grad_std": 0.3603222668170929,
"beta_dpo/beta_margin_mean": 3.70037579536438,
"beta_dpo/beta_margin_std": 5.134766101837158,
"beta_dpo/beta_used": 0.3174495995044708,
"beta_dpo/beta_used_raw": 0.3174495995044708,
"beta_dpo/gap_mean": 9.359155654907227,
"beta_dpo/gap_std": 14.655593872070312,
"beta_dpo/loss_margin_mean": 11.443473815917969,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8193499622071051,
"grad_norm": 91.55038452148438,
"learning_rate": 4.868243561723534e-08,
"logits/chosen": 1.085046648979187,
"logits/rejected": 1.0205934047698975,
"loss": 1.0193,
"step": 542
},
{
"beta_dpo/beta": 0.04393059015274048,
"beta_dpo/beta_margin_grad_mean": -0.415033757686615,
"beta_dpo/beta_margin_grad_std": 0.15476207435131073,
"beta_dpo/beta_margin_mean": 0.38421371579170227,
"beta_dpo/beta_margin_std": 0.7130559682846069,
"beta_dpo/beta_used": 0.04393059015274048,
"beta_dpo/beta_used_raw": 0.04393059015274048,
"beta_dpo/gap_mean": 9.273094177246094,
"beta_dpo/gap_std": 14.880241394042969,
"beta_dpo/loss_margin_mean": 8.799640655517578,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8208616780045351,
"grad_norm": 11.100589752197266,
"learning_rate": 4.790130070827028e-08,
"logits/chosen": 1.190391182899475,
"logits/rejected": 1.0452004671096802,
"loss": 1.1361,
"step": 543
},
{
"beta_dpo/beta": 0.115420401096344,
"beta_dpo/beta_margin_grad_mean": -0.3272760510444641,
"beta_dpo/beta_margin_grad_std": 0.28145524859428406,
"beta_dpo/beta_margin_mean": 1.3125842809677124,
"beta_dpo/beta_margin_std": 2.0508055686950684,
"beta_dpo/beta_used": 0.115420401096344,
"beta_dpo/beta_used_raw": 0.115420401096344,
"beta_dpo/gap_mean": 9.590657234191895,
"beta_dpo/gap_std": 15.433180809020996,
"beta_dpo/loss_margin_mean": 11.46908950805664,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8223733938019653,
"grad_norm": 33.516334533691406,
"learning_rate": 4.7125818934366454e-08,
"logits/chosen": 1.0202606916427612,
"logits/rejected": 0.9330880641937256,
"loss": 1.0445,
"step": 544
},
{
"beta_dpo/beta": 0.11217702925205231,
"beta_dpo/beta_margin_grad_mean": -0.3709193468093872,
"beta_dpo/beta_margin_grad_std": 0.27607184648513794,
"beta_dpo/beta_margin_mean": 0.8507373929023743,
"beta_dpo/beta_margin_std": 1.8569505214691162,
"beta_dpo/beta_used": 0.11217702925205231,
"beta_dpo/beta_used_raw": 0.11217702925205231,
"beta_dpo/gap_mean": 9.49941349029541,
"beta_dpo/gap_std": 15.811461448669434,
"beta_dpo/loss_margin_mean": 7.796383380889893,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8238851095993953,
"grad_norm": 22.20750617980957,
"learning_rate": 4.635601198741607e-08,
"logits/chosen": 1.396355152130127,
"logits/rejected": 1.287622332572937,
"loss": 0.9378,
"step": 545
},
{
"beta_dpo/beta": 0.15419848263263702,
"beta_dpo/beta_margin_grad_mean": -0.2972928583621979,
"beta_dpo/beta_margin_grad_std": 0.28409242630004883,
"beta_dpo/beta_margin_mean": 1.5118775367736816,
"beta_dpo/beta_margin_std": 2.3105311393737793,
"beta_dpo/beta_used": 0.15419848263263702,
"beta_dpo/beta_used_raw": 0.15419848263263702,
"beta_dpo/gap_mean": 9.365312576293945,
"beta_dpo/gap_std": 15.491350173950195,
"beta_dpo/loss_margin_mean": 9.728423118591309,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8253968253968254,
"grad_norm": 39.418800354003906,
"learning_rate": 4.559190140057428e-08,
"logits/chosen": 1.0795893669128418,
"logits/rejected": 1.0732557773590088,
"loss": 0.8788,
"step": 546
},
{
"beta_dpo/beta": 0.11869224160909653,
"beta_dpo/beta_margin_grad_mean": -0.3248843550682068,
"beta_dpo/beta_margin_grad_std": 0.22885605692863464,
"beta_dpo/beta_margin_mean": 1.2935576438903809,
"beta_dpo/beta_margin_std": 2.110837459564209,
"beta_dpo/beta_used": 0.11869224160909653,
"beta_dpo/beta_used_raw": 0.11869224160909653,
"beta_dpo/gap_mean": 9.626115798950195,
"beta_dpo/gap_std": 15.377988815307617,
"beta_dpo/loss_margin_mean": 10.859363555908203,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8269085411942555,
"grad_norm": 26.864322662353516,
"learning_rate": 4.483350854765672e-08,
"logits/chosen": 1.1849254369735718,
"logits/rejected": 1.130568265914917,
"loss": 0.9387,
"step": 547
},
{
"beta_dpo/beta": 0.03195616975426674,
"beta_dpo/beta_margin_grad_mean": -0.4449770152568817,
"beta_dpo/beta_margin_grad_std": 0.15359994769096375,
"beta_dpo/beta_margin_mean": 0.2758224308490753,
"beta_dpo/beta_margin_std": 0.771864116191864,
"beta_dpo/beta_used": 0.03195616975426674,
"beta_dpo/beta_used_raw": -0.0379025973379612,
"beta_dpo/gap_mean": 9.175468444824219,
"beta_dpo/gap_std": 15.103775024414062,
"beta_dpo/loss_margin_mean": 6.729221820831299,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8284202569916855,
"grad_norm": 11.9760160446167,
"learning_rate": 4.4080854642541826e-08,
"logits/chosen": 1.213660717010498,
"logits/rejected": 1.1112244129180908,
"loss": 1.2281,
"step": 548
},
{
"beta_dpo/beta": 0.015496051870286465,
"beta_dpo/beta_margin_grad_mean": -0.4723176658153534,
"beta_dpo/beta_margin_grad_std": 0.06963703036308289,
"beta_dpo/beta_margin_mean": 0.11424464732408524,
"beta_dpo/beta_margin_std": 0.28728026151657104,
"beta_dpo/beta_used": 0.015496051870286465,
"beta_dpo/beta_used_raw": 0.015496051870286465,
"beta_dpo/gap_mean": 8.908472061157227,
"beta_dpo/gap_std": 15.119009017944336,
"beta_dpo/loss_margin_mean": 7.55164909362793,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8299319727891157,
"grad_norm": 6.398705959320068,
"learning_rate": 4.333396073857723e-08,
"logits/chosen": 1.3798398971557617,
"logits/rejected": 1.299780011177063,
"loss": 1.2846,
"step": 549
},
{
"beta_dpo/beta": 0.06067263334989548,
"beta_dpo/beta_margin_grad_mean": -0.42208606004714966,
"beta_dpo/beta_margin_grad_std": 0.19494439661502838,
"beta_dpo/beta_margin_mean": 0.38519254326820374,
"beta_dpo/beta_margin_std": 1.0421887636184692,
"beta_dpo/beta_used": 0.06067263334989548,
"beta_dpo/beta_used_raw": 0.06067263334989548,
"beta_dpo/gap_mean": 8.732294082641602,
"beta_dpo/gap_std": 15.221040725708008,
"beta_dpo/loss_margin_mean": 7.314587593078613,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8314436885865457,
"grad_norm": 17.624757766723633,
"learning_rate": 4.259284772799099e-08,
"logits/chosen": 1.256799578666687,
"logits/rejected": 1.1990289688110352,
"loss": 1.1007,
"step": 550
},
{
"beta_dpo/beta": 0.046534232795238495,
"beta_dpo/beta_margin_grad_mean": -0.42071956396102905,
"beta_dpo/beta_margin_grad_std": 0.12429229170084,
"beta_dpo/beta_margin_mean": 0.3460140526294708,
"beta_dpo/beta_margin_std": 0.5744323134422302,
"beta_dpo/beta_used": 0.046534232795238495,
"beta_dpo/beta_used_raw": 0.046534232795238495,
"beta_dpo/gap_mean": 8.32465934753418,
"beta_dpo/gap_std": 14.818896293640137,
"beta_dpo/loss_margin_mean": 7.320366382598877,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8329554043839759,
"grad_norm": 15.450972557067871,
"learning_rate": 4.1857536341307176e-08,
"logits/chosen": 1.4911260604858398,
"logits/rejected": 1.3917850255966187,
"loss": 1.1257,
"step": 551
},
{
"beta_dpo/beta": 0.19184796512126923,
"beta_dpo/beta_margin_grad_mean": -0.3128667175769806,
"beta_dpo/beta_margin_grad_std": 0.30318787693977356,
"beta_dpo/beta_margin_mean": 1.7558873891830444,
"beta_dpo/beta_margin_std": 3.194880247116089,
"beta_dpo/beta_used": 0.19184796512126923,
"beta_dpo/beta_used_raw": 0.19184796512126923,
"beta_dpo/gap_mean": 8.063959121704102,
"beta_dpo/gap_std": 14.653057098388672,
"beta_dpo/loss_margin_mean": 7.569924831390381,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8344671201814059,
"grad_norm": 34.19086837768555,
"learning_rate": 4.112804714676593e-08,
"logits/chosen": 1.1748685836791992,
"logits/rejected": 1.1179265975952148,
"loss": 0.769,
"step": 552
},
{
"beta_dpo/beta": 0.143024280667305,
"beta_dpo/beta_margin_grad_mean": -0.42807796597480774,
"beta_dpo/beta_margin_grad_std": 0.28312036395072937,
"beta_dpo/beta_margin_mean": 1.2726179361343384,
"beta_dpo/beta_margin_std": 4.292966365814209,
"beta_dpo/beta_used": 0.143024280667305,
"beta_dpo/beta_used_raw": 0.143024280667305,
"beta_dpo/gap_mean": 8.228954315185547,
"beta_dpo/gap_std": 15.217555046081543,
"beta_dpo/loss_margin_mean": 8.160815238952637,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8359788359788359,
"grad_norm": 52.660762786865234,
"learning_rate": 4.0404400549748144e-08,
"logits/chosen": 1.212083101272583,
"logits/rejected": 1.1077077388763428,
"loss": 1.2839,
"step": 553
},
{
"beta_dpo/beta": 0.06598697602748871,
"beta_dpo/beta_margin_grad_mean": -0.39833810925483704,
"beta_dpo/beta_margin_grad_std": 0.18544040620326996,
"beta_dpo/beta_margin_mean": 0.6325341463088989,
"beta_dpo/beta_margin_std": 1.203471302986145,
"beta_dpo/beta_used": 0.06598697602748871,
"beta_dpo/beta_used_raw": 0.01051008328795433,
"beta_dpo/gap_mean": 8.333694458007812,
"beta_dpo/gap_std": 14.93002986907959,
"beta_dpo/loss_margin_mean": 9.03442096710205,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8374905517762661,
"grad_norm": 20.9935302734375,
"learning_rate": 3.968661679220467e-08,
"logits/chosen": 1.3394625186920166,
"logits/rejected": 1.3322782516479492,
"loss": 1.1239,
"step": 554
},
{
"beta_dpo/beta": 0.09749078750610352,
"beta_dpo/beta_margin_grad_mean": -0.37117937207221985,
"beta_dpo/beta_margin_grad_std": 0.2390754222869873,
"beta_dpo/beta_margin_mean": 1.1656479835510254,
"beta_dpo/beta_margin_std": 2.159295082092285,
"beta_dpo/beta_used": 0.09749078750610352,
"beta_dpo/beta_used_raw": 0.04684508964419365,
"beta_dpo/gap_mean": 8.137611389160156,
"beta_dpo/gap_std": 15.190990447998047,
"beta_dpo/loss_margin_mean": 8.135710716247559,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8390022675736961,
"grad_norm": 35.28615188598633,
"learning_rate": 3.89747159520904e-08,
"logits/chosen": 1.434206485748291,
"logits/rejected": 1.3939523696899414,
"loss": 1.1265,
"step": 555
},
{
"beta_dpo/beta": 0.07983244955539703,
"beta_dpo/beta_margin_grad_mean": -0.3930589556694031,
"beta_dpo/beta_margin_grad_std": 0.22528424859046936,
"beta_dpo/beta_margin_mean": 0.5776308178901672,
"beta_dpo/beta_margin_std": 1.3057823181152344,
"beta_dpo/beta_used": 0.07983244955539703,
"beta_dpo/beta_used_raw": 0.07983244955539703,
"beta_dpo/gap_mean": 8.181859016418457,
"beta_dpo/gap_std": 15.160832405090332,
"beta_dpo/loss_margin_mean": 7.280274868011475,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8405139833711263,
"grad_norm": 20.49454689025879,
"learning_rate": 3.826871794280192e-08,
"logits/chosen": 1.2871716022491455,
"logits/rejected": 1.2277119159698486,
"loss": 1.1067,
"step": 556
},
{
"beta_dpo/beta": 0.08392275869846344,
"beta_dpo/beta_margin_grad_mean": -0.3781295716762543,
"beta_dpo/beta_margin_grad_std": 0.23083707690238953,
"beta_dpo/beta_margin_mean": 1.063275694847107,
"beta_dpo/beta_margin_std": 2.229184150695801,
"beta_dpo/beta_used": 0.08392275869846344,
"beta_dpo/beta_used_raw": 0.07373453676700592,
"beta_dpo/gap_mean": 8.439523696899414,
"beta_dpo/gap_std": 15.30473518371582,
"beta_dpo/loss_margin_mean": 9.074495315551758,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8420256991685563,
"grad_norm": 20.826927185058594,
"learning_rate": 3.756864251262143e-08,
"logits/chosen": 1.1249200105667114,
"logits/rejected": 0.9844628572463989,
"loss": 1.0709,
"step": 557
},
{
"beta_dpo/beta": 0.11504756659269333,
"beta_dpo/beta_margin_grad_mean": -0.38101866841316223,
"beta_dpo/beta_margin_grad_std": 0.2687925398349762,
"beta_dpo/beta_margin_mean": 1.3793085813522339,
"beta_dpo/beta_margin_std": 3.3186142444610596,
"beta_dpo/beta_used": 0.11504756659269333,
"beta_dpo/beta_used_raw": 0.10775712877511978,
"beta_dpo/gap_mean": 8.540435791015625,
"beta_dpo/gap_std": 15.425100326538086,
"beta_dpo/loss_margin_mean": 9.191947937011719,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8435374149659864,
"grad_norm": 41.15348815917969,
"learning_rate": 3.687450924416341e-08,
"logits/chosen": 1.323371410369873,
"logits/rejected": 1.2763679027557373,
"loss": 1.1547,
"step": 558
},
{
"beta_dpo/beta": 0.07971999794244766,
"beta_dpo/beta_margin_grad_mean": -0.4077509939670563,
"beta_dpo/beta_margin_grad_std": 0.24463067948818207,
"beta_dpo/beta_margin_mean": 0.8806779980659485,
"beta_dpo/beta_margin_std": 2.2131831645965576,
"beta_dpo/beta_used": 0.07971999794244766,
"beta_dpo/beta_used_raw": 0.07355686277151108,
"beta_dpo/gap_mean": 8.507637023925781,
"beta_dpo/gap_std": 15.659688949584961,
"beta_dpo/loss_margin_mean": 8.043795585632324,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8450491307634165,
"grad_norm": 29.638961791992188,
"learning_rate": 3.6186337553827743e-08,
"logits/chosen": 1.4750277996063232,
"logits/rejected": 1.3416969776153564,
"loss": 1.2295,
"step": 559
},
{
"beta_dpo/beta": 0.17043396830558777,
"beta_dpo/beta_margin_grad_mean": -0.3465725779533386,
"beta_dpo/beta_margin_grad_std": 0.2609609067440033,
"beta_dpo/beta_margin_mean": 2.157318115234375,
"beta_dpo/beta_margin_std": 4.162698268890381,
"beta_dpo/beta_used": 0.17043396830558777,
"beta_dpo/beta_used_raw": 0.11978065967559814,
"beta_dpo/gap_mean": 8.458297729492188,
"beta_dpo/gap_std": 15.643919944763184,
"beta_dpo/loss_margin_mean": 10.064728736877441,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8465608465608465,
"grad_norm": 43.67535400390625,
"learning_rate": 3.550414669125573e-08,
"logits/chosen": 1.3294655084609985,
"logits/rejected": 1.285670280456543,
"loss": 1.0217,
"step": 560
},
{
"beta_dpo/beta": 0.08011330664157867,
"beta_dpo/beta_margin_grad_mean": -0.3740430474281311,
"beta_dpo/beta_margin_grad_std": 0.20077289640903473,
"beta_dpo/beta_margin_mean": 0.9686254262924194,
"beta_dpo/beta_margin_std": 1.77787446975708,
"beta_dpo/beta_used": 0.08011330664157867,
"beta_dpo/beta_used_raw": 0.041644688695669174,
"beta_dpo/gap_mean": 9.038721084594727,
"beta_dpo/gap_std": 15.586071014404297,
"beta_dpo/loss_margin_mean": 10.723516464233398,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8480725623582767,
"grad_norm": 23.633291244506836,
"learning_rate": 3.482795573879241e-08,
"logits/chosen": 1.4188854694366455,
"logits/rejected": 1.3814911842346191,
"loss": 1.0808,
"step": 561
},
{
"beta_dpo/beta": 0.022397657856345177,
"beta_dpo/beta_margin_grad_mean": -0.444037526845932,
"beta_dpo/beta_margin_grad_std": 0.1145317479968071,
"beta_dpo/beta_margin_mean": 0.25500166416168213,
"beta_dpo/beta_margin_std": 0.533501148223877,
"beta_dpo/beta_used": 0.022397657856345177,
"beta_dpo/beta_used_raw": -0.03293367847800255,
"beta_dpo/gap_mean": 9.185551643371582,
"beta_dpo/gap_std": 15.79057502746582,
"beta_dpo/loss_margin_mean": 9.271513938903809,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8495842781557067,
"grad_norm": 8.53104019165039,
"learning_rate": 3.415778361095226e-08,
"logits/chosen": 1.391019344329834,
"logits/rejected": 1.3353910446166992,
"loss": 1.2529,
"step": 562
},
{
"beta_dpo/beta": 0.2550284266471863,
"beta_dpo/beta_margin_grad_mean": -0.280933141708374,
"beta_dpo/beta_margin_grad_std": 0.347322553396225,
"beta_dpo/beta_margin_mean": 2.570730686187744,
"beta_dpo/beta_margin_std": 5.0097527503967285,
"beta_dpo/beta_used": 0.2550284266471863,
"beta_dpo/beta_used_raw": 0.2550284266471863,
"beta_dpo/gap_mean": 9.215568542480469,
"beta_dpo/gap_std": 16.322410583496094,
"beta_dpo/loss_margin_mean": 10.128461837768555,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8510959939531368,
"grad_norm": 46.124263763427734,
"learning_rate": 3.349364905389032e-08,
"logits/chosen": 1.3676221370697021,
"logits/rejected": 1.2895047664642334,
"loss": 0.9148,
"step": 563
},
{
"beta_dpo/beta": 0.21877868473529816,
"beta_dpo/beta_margin_grad_mean": -0.3078286945819855,
"beta_dpo/beta_margin_grad_std": 0.34798845648765564,
"beta_dpo/beta_margin_mean": 1.959875464439392,
"beta_dpo/beta_margin_std": 3.711064338684082,
"beta_dpo/beta_used": 0.21877868473529816,
"beta_dpo/beta_used_raw": 0.21877868473529816,
"beta_dpo/gap_mean": 9.217426300048828,
"beta_dpo/gap_std": 16.47534942626953,
"beta_dpo/loss_margin_mean": 8.940733909606934,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8526077097505669,
"grad_norm": 56.61745071411133,
"learning_rate": 3.283557064487785e-08,
"logits/chosen": 1.2832797765731812,
"logits/rejected": 1.250802755355835,
"loss": 0.8822,
"step": 564
},
{
"beta_dpo/beta": 0.09792563319206238,
"beta_dpo/beta_margin_grad_mean": -0.38408800959587097,
"beta_dpo/beta_margin_grad_std": 0.24419058859348297,
"beta_dpo/beta_margin_mean": 1.0215169191360474,
"beta_dpo/beta_margin_std": 2.458621025085449,
"beta_dpo/beta_used": 0.09792563319206238,
"beta_dpo/beta_used_raw": -0.03560522943735123,
"beta_dpo/gap_mean": 9.025110244750977,
"beta_dpo/gap_std": 16.28216552734375,
"beta_dpo/loss_margin_mean": 8.508374214172363,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.854119425547997,
"grad_norm": 36.234596252441406,
"learning_rate": 3.218356679178252e-08,
"logits/chosen": 1.1627860069274902,
"logits/rejected": 1.1426182985305786,
"loss": 1.1816,
"step": 565
},
{
"beta_dpo/beta": 0.08629482984542847,
"beta_dpo/beta_margin_grad_mean": -0.392479807138443,
"beta_dpo/beta_margin_grad_std": 0.24591444432735443,
"beta_dpo/beta_margin_mean": 0.9406875371932983,
"beta_dpo/beta_margin_std": 2.2037181854248047,
"beta_dpo/beta_used": 0.08629482984542847,
"beta_dpo/beta_used_raw": 0.07739803940057755,
"beta_dpo/gap_mean": 8.676336288452148,
"beta_dpo/gap_std": 16.342041015625,
"beta_dpo/loss_margin_mean": 7.305891513824463,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8556311413454271,
"grad_norm": 31.71550750732422,
"learning_rate": 3.1537655732553764e-08,
"logits/chosen": 1.0283910036087036,
"logits/rejected": 0.997430145740509,
"loss": 1.1747,
"step": 566
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49769994616508484,
"beta_dpo/beta_margin_grad_std": 0.003700168803334236,
"beta_dpo/beta_margin_mean": 0.009200900793075562,
"beta_dpo/beta_margin_std": 0.014802070334553719,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.018071264028549194,
"beta_dpo/gap_mean": 8.921218872070312,
"beta_dpo/gap_std": 16.182456970214844,
"beta_dpo/loss_margin_mean": 9.200900077819824,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8571428571428571,
"grad_norm": 0.4065539836883545,
"learning_rate": 3.089785553471233e-08,
"logits/chosen": 1.2240984439849854,
"logits/rejected": 1.1093523502349854,
"loss": 1.3794,
"step": 567
},
{
"beta_dpo/beta": 0.09786317497491837,
"beta_dpo/beta_margin_grad_mean": -0.3605889678001404,
"beta_dpo/beta_margin_grad_std": 0.2532772719860077,
"beta_dpo/beta_margin_mean": 1.508376121520996,
"beta_dpo/beta_margin_std": 2.6363301277160645,
"beta_dpo/beta_used": 0.09786317497491837,
"beta_dpo/beta_used_raw": 0.03654339164495468,
"beta_dpo/gap_mean": 8.895964622497559,
"beta_dpo/gap_std": 15.841314315795898,
"beta_dpo/loss_margin_mean": 10.623019218444824,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8586545729402872,
"grad_norm": 36.745548248291016,
"learning_rate": 3.026418409484513e-08,
"logits/chosen": 1.1992294788360596,
"logits/rejected": 1.1165108680725098,
"loss": 1.2022,
"step": 568
},
{
"beta_dpo/beta": 0.09338933974504471,
"beta_dpo/beta_margin_grad_mean": -0.3815169036388397,
"beta_dpo/beta_margin_grad_std": 0.2267947942018509,
"beta_dpo/beta_margin_mean": 0.9926204681396484,
"beta_dpo/beta_margin_std": 1.9438563585281372,
"beta_dpo/beta_used": 0.09338933974504471,
"beta_dpo/beta_used_raw": 0.05557567998766899,
"beta_dpo/gap_mean": 9.067608833312988,
"beta_dpo/gap_std": 15.582027435302734,
"beta_dpo/loss_margin_mean": 6.859289169311523,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8601662887377173,
"grad_norm": 26.79338836669922,
"learning_rate": 2.963665913810451e-08,
"logits/chosen": 1.0522103309631348,
"logits/rejected": 1.0461184978485107,
"loss": 1.1025,
"step": 569
},
{
"beta_dpo/beta": 0.203478142619133,
"beta_dpo/beta_margin_grad_mean": -0.2527143359184265,
"beta_dpo/beta_margin_grad_std": 0.2713932991027832,
"beta_dpo/beta_margin_mean": 2.6176295280456543,
"beta_dpo/beta_margin_std": 3.5322751998901367,
"beta_dpo/beta_used": 0.203478142619133,
"beta_dpo/beta_used_raw": 0.203478142619133,
"beta_dpo/gap_mean": 9.13400650024414,
"beta_dpo/gap_std": 15.616695404052734,
"beta_dpo/loss_margin_mean": 12.05258560180664,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8616780045351474,
"grad_norm": 36.613887786865234,
"learning_rate": 2.9015298217712453e-08,
"logits/chosen": 1.024835228919983,
"logits/rejected": 0.9980064630508423,
"loss": 0.7511,
"step": 570
},
{
"beta_dpo/beta": 0.07235896587371826,
"beta_dpo/beta_margin_grad_mean": -0.42983123660087585,
"beta_dpo/beta_margin_grad_std": 0.23050378262996674,
"beta_dpo/beta_margin_mean": 0.4017201066017151,
"beta_dpo/beta_margin_std": 1.6213804483413696,
"beta_dpo/beta_used": 0.07235896587371826,
"beta_dpo/beta_used_raw": 0.0374261848628521,
"beta_dpo/gap_mean": 8.91083812713623,
"beta_dpo/gap_std": 15.429361343383789,
"beta_dpo/loss_margin_mean": 5.915493965148926,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8631897203325775,
"grad_norm": 17.79202651977539,
"learning_rate": 2.840011871446962e-08,
"logits/chosen": 1.2358368635177612,
"logits/rejected": 1.2157400846481323,
"loss": 1.0641,
"step": 571
},
{
"beta_dpo/beta": 0.059745196253061295,
"beta_dpo/beta_margin_grad_mean": -0.4255921542644501,
"beta_dpo/beta_margin_grad_std": 0.20457926392555237,
"beta_dpo/beta_margin_mean": 0.40605655312538147,
"beta_dpo/beta_margin_std": 1.3714659214019775,
"beta_dpo/beta_used": 0.059745196253061295,
"beta_dpo/beta_used_raw": 0.033621691167354584,
"beta_dpo/gap_mean": 8.525805473327637,
"beta_dpo/gap_std": 15.347637176513672,
"beta_dpo/loss_margin_mean": 7.5314860343933105,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8647014361300076,
"grad_norm": 21.431163787841797,
"learning_rate": 2.7791137836269158e-08,
"logits/chosen": 1.2674634456634521,
"logits/rejected": 1.276109218597412,
"loss": 1.1399,
"step": 572
},
{
"beta_dpo/beta": 0.1454625129699707,
"beta_dpo/beta_margin_grad_mean": -0.3354867100715637,
"beta_dpo/beta_margin_grad_std": 0.3068930506706238,
"beta_dpo/beta_margin_mean": 1.4749563932418823,
"beta_dpo/beta_margin_std": 3.0397462844848633,
"beta_dpo/beta_used": 0.1454625129699707,
"beta_dpo/beta_used_raw": 0.1454625129699707,
"beta_dpo/gap_mean": 8.657966613769531,
"beta_dpo/gap_std": 15.54708480834961,
"beta_dpo/loss_margin_mean": 9.776642799377441,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8662131519274376,
"grad_norm": 40.563289642333984,
"learning_rate": 2.718837261761528e-08,
"logits/chosen": 1.1352970600128174,
"logits/rejected": 1.0640968084335327,
"loss": 0.9789,
"step": 573
},
{
"beta_dpo/beta": 0.3303490877151489,
"beta_dpo/beta_margin_grad_mean": -0.2543141543865204,
"beta_dpo/beta_margin_grad_std": 0.32116472721099854,
"beta_dpo/beta_margin_mean": 4.120840549468994,
"beta_dpo/beta_margin_std": 5.648134708404541,
"beta_dpo/beta_used": 0.3303490877151489,
"beta_dpo/beta_used_raw": 0.3303490877151489,
"beta_dpo/gap_mean": 9.069449424743652,
"beta_dpo/gap_std": 15.577247619628906,
"beta_dpo/loss_margin_mean": 11.595686912536621,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8677248677248677,
"grad_norm": 68.58607482910156,
"learning_rate": 2.659183991914696e-08,
"logits/chosen": 1.4592229127883911,
"logits/rejected": 1.4214026927947998,
"loss": 0.6931,
"step": 574
},
{
"beta_dpo/beta": 0.006673333700746298,
"beta_dpo/beta_margin_grad_mean": -0.48022425174713135,
"beta_dpo/beta_margin_grad_std": 0.044581227004528046,
"beta_dpo/beta_margin_mean": 0.08041688799858093,
"beta_dpo/beta_margin_std": 0.1813589334487915,
"beta_dpo/beta_used": 0.006673333700746298,
"beta_dpo/beta_used_raw": -0.17582564055919647,
"beta_dpo/gap_mean": 9.28346061706543,
"beta_dpo/gap_std": 15.865880966186523,
"beta_dpo/loss_margin_mean": 7.327334403991699,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8692365835222978,
"grad_norm": 2.8228843212127686,
"learning_rate": 2.600155642716606e-08,
"logits/chosen": 1.2757470607757568,
"logits/rejected": 1.2021794319152832,
"loss": 1.3409,
"step": 575
},
{
"beta_dpo/beta": 0.2936015725135803,
"beta_dpo/beta_margin_grad_mean": -0.23937273025512695,
"beta_dpo/beta_margin_grad_std": 0.3489878177642822,
"beta_dpo/beta_margin_mean": 3.9037559032440186,
"beta_dpo/beta_margin_std": 5.193808555603027,
"beta_dpo/beta_used": 0.2936015725135803,
"beta_dpo/beta_used_raw": 0.2936015725135803,
"beta_dpo/gap_mean": 9.523399353027344,
"beta_dpo/gap_std": 15.896087646484375,
"beta_dpo/loss_margin_mean": 12.640140533447266,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8707482993197279,
"grad_norm": 60.06475067138672,
"learning_rate": 2.5417538653170754e-08,
"logits/chosen": 1.3256943225860596,
"logits/rejected": 1.226050615310669,
"loss": 1.0017,
"step": 576
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4983120858669281,
"beta_dpo/beta_margin_grad_std": 0.003915362525731325,
"beta_dpo/beta_margin_mean": 0.006752183195203543,
"beta_dpo/beta_margin_std": 0.015662673860788345,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.07891594618558884,
"beta_dpo/gap_mean": 9.165458679199219,
"beta_dpo/gap_std": 15.950639724731445,
"beta_dpo/loss_margin_mean": 6.752182960510254,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.872260015117158,
"grad_norm": 0.3212597966194153,
"learning_rate": 2.4839802933393607e-08,
"logits/chosen": 1.0376708507537842,
"logits/rejected": 1.0282740592956543,
"loss": 1.3802,
"step": 577
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4986441433429718,
"beta_dpo/beta_margin_grad_std": 0.0036979857832193375,
"beta_dpo/beta_margin_mean": 0.0054238177835941315,
"beta_dpo/beta_margin_std": 0.014793048612773418,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.11922827363014221,
"beta_dpo/gap_mean": 8.606918334960938,
"beta_dpo/gap_std": 15.817480087280273,
"beta_dpo/loss_margin_mean": 5.4238176345825195,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.873771730914588,
"grad_norm": 0.2846592962741852,
"learning_rate": 2.4268365428344733e-08,
"logits/chosen": 1.3664073944091797,
"logits/rejected": 1.3180696964263916,
"loss": 1.3814,
"step": 578
},
{
"beta_dpo/beta": 0.10250888019800186,
"beta_dpo/beta_margin_grad_mean": -0.3974369466304779,
"beta_dpo/beta_margin_grad_std": 0.2421928346157074,
"beta_dpo/beta_margin_mean": 0.8760362863540649,
"beta_dpo/beta_margin_std": 2.4804162979125977,
"beta_dpo/beta_used": 0.10250888019800186,
"beta_dpo/beta_used_raw": 0.06492999196052551,
"beta_dpo/gap_mean": 8.539130210876465,
"beta_dpo/gap_std": 15.541910171508789,
"beta_dpo/loss_margin_mean": 9.44791030883789,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8752834467120182,
"grad_norm": 29.04618263244629,
"learning_rate": 2.3703242122359357e-08,
"logits/chosen": 1.0949928760528564,
"logits/rejected": 1.0640246868133545,
"loss": 1.0228,
"step": 579
},
{
"beta_dpo/beta": 0.07085993140935898,
"beta_dpo/beta_margin_grad_mean": -0.3925807476043701,
"beta_dpo/beta_margin_grad_std": 0.22485065460205078,
"beta_dpo/beta_margin_mean": 0.738163948059082,
"beta_dpo/beta_margin_std": 1.6701350212097168,
"beta_dpo/beta_used": 0.07085993140935898,
"beta_dpo/beta_used_raw": 0.04937838017940521,
"beta_dpo/gap_mean": 8.2997407913208,
"beta_dpo/gap_std": 15.717334747314453,
"beta_dpo/loss_margin_mean": 7.733586311340332,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8767951625094482,
"grad_norm": 19.86193084716797,
"learning_rate": 2.3144448823151392e-08,
"logits/chosen": 1.2375001907348633,
"logits/rejected": 1.1523990631103516,
"loss": 1.0934,
"step": 580
},
{
"beta_dpo/beta": 0.2419264167547226,
"beta_dpo/beta_margin_grad_mean": -0.2971521019935608,
"beta_dpo/beta_margin_grad_std": 0.29189354181289673,
"beta_dpo/beta_margin_mean": 2.6469461917877197,
"beta_dpo/beta_margin_std": 5.749914646148682,
"beta_dpo/beta_used": 0.2419264167547226,
"beta_dpo/beta_used_raw": 0.2419264167547226,
"beta_dpo/gap_mean": 8.610160827636719,
"beta_dpo/gap_std": 15.83781623840332,
"beta_dpo/loss_margin_mean": 8.952991485595703,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8783068783068783,
"grad_norm": 63.67040252685547,
"learning_rate": 2.259200116137039e-08,
"logits/chosen": 1.2289148569107056,
"logits/rejected": 1.1664001941680908,
"loss": 1.1243,
"step": 581
},
{
"beta_dpo/beta": 0.0896616280078888,
"beta_dpo/beta_margin_grad_mean": -0.4158861041069031,
"beta_dpo/beta_margin_grad_std": 0.2637236416339874,
"beta_dpo/beta_margin_mean": 0.8592485785484314,
"beta_dpo/beta_margin_std": 2.4116122722625732,
"beta_dpo/beta_used": 0.0896616280078888,
"beta_dpo/beta_used_raw": 0.06425061821937561,
"beta_dpo/gap_mean": 8.28162956237793,
"beta_dpo/gap_std": 15.801387786865234,
"beta_dpo/loss_margin_mean": 7.660096645355225,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8798185941043084,
"grad_norm": 34.26008605957031,
"learning_rate": 2.204591459016525e-08,
"logits/chosen": 0.972122848033905,
"logits/rejected": 0.9931256771087646,
"loss": 1.3696,
"step": 582
},
{
"beta_dpo/beta": 0.016050279140472412,
"beta_dpo/beta_margin_grad_mean": -0.4742169678211212,
"beta_dpo/beta_margin_grad_std": 0.08747411519289017,
"beta_dpo/beta_margin_mean": 0.10880840569734573,
"beta_dpo/beta_margin_std": 0.36883479356765747,
"beta_dpo/beta_used": 0.016050279140472412,
"beta_dpo/beta_used_raw": -0.04681949317455292,
"beta_dpo/gap_mean": 7.949472427368164,
"beta_dpo/gap_std": 16.228107452392578,
"beta_dpo/loss_margin_mean": 5.8783698081970215,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8813303099017384,
"grad_norm": 7.858330726623535,
"learning_rate": 2.1506204384751064e-08,
"logits/chosen": 1.2844696044921875,
"logits/rejected": 1.1568622589111328,
"loss": 1.3039,
"step": 583
},
{
"beta_dpo/beta": 0.259249210357666,
"beta_dpo/beta_margin_grad_mean": -0.3204815089702606,
"beta_dpo/beta_margin_grad_std": 0.35193508863449097,
"beta_dpo/beta_margin_mean": 2.6451680660247803,
"beta_dpo/beta_margin_std": 4.729509353637695,
"beta_dpo/beta_used": 0.259249210357666,
"beta_dpo/beta_used_raw": 0.259249210357666,
"beta_dpo/gap_mean": 8.241252899169922,
"beta_dpo/gap_std": 16.267311096191406,
"beta_dpo/loss_margin_mean": 9.067033767700195,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8828420256991686,
"grad_norm": 57.75297164916992,
"learning_rate": 2.09728856419826e-08,
"logits/chosen": 1.3111417293548584,
"logits/rejected": 1.1976943016052246,
"loss": 0.9995,
"step": 584
},
{
"beta_dpo/beta": 0.16461847722530365,
"beta_dpo/beta_margin_grad_mean": -0.39904317259788513,
"beta_dpo/beta_margin_grad_std": 0.2841614782810211,
"beta_dpo/beta_margin_mean": 1.2675442695617676,
"beta_dpo/beta_margin_std": 3.9270401000976562,
"beta_dpo/beta_used": 0.16461847722530365,
"beta_dpo/beta_used_raw": 0.14850212633609772,
"beta_dpo/gap_mean": 7.797574520111084,
"beta_dpo/gap_std": 15.795310974121094,
"beta_dpo/loss_margin_mean": 6.529803276062012,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8843537414965986,
"grad_norm": 49.424468994140625,
"learning_rate": 2.044597327993153e-08,
"logits/chosen": 1.2629481554031372,
"logits/rejected": 1.21295166015625,
"loss": 1.0875,
"step": 585
},
{
"beta_dpo/beta": 0.14468266069889069,
"beta_dpo/beta_margin_grad_mean": -0.3076460361480713,
"beta_dpo/beta_margin_grad_std": 0.26763400435447693,
"beta_dpo/beta_margin_mean": 1.4467971324920654,
"beta_dpo/beta_margin_std": 2.1389639377593994,
"beta_dpo/beta_used": 0.14468266069889069,
"beta_dpo/beta_used_raw": 0.14468266069889069,
"beta_dpo/gap_mean": 8.13833999633789,
"beta_dpo/gap_std": 15.544172286987305,
"beta_dpo/loss_margin_mean": 10.076703071594238,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8858654572940288,
"grad_norm": 40.17343521118164,
"learning_rate": 1.9925482037469187e-08,
"logits/chosen": 1.5505293607711792,
"logits/rejected": 1.4857732057571411,
"loss": 1.1052,
"step": 586
},
{
"beta_dpo/beta": 0.17714223265647888,
"beta_dpo/beta_margin_grad_mean": -0.3253132998943329,
"beta_dpo/beta_margin_grad_std": 0.2992173135280609,
"beta_dpo/beta_margin_mean": 1.910219669342041,
"beta_dpo/beta_margin_std": 3.6033554077148438,
"beta_dpo/beta_used": 0.17714223265647888,
"beta_dpo/beta_used_raw": 0.17714223265647888,
"beta_dpo/gap_mean": 8.57592487335205,
"beta_dpo/gap_std": 15.818859100341797,
"beta_dpo/loss_margin_mean": 10.5182523727417,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8873771730914588,
"grad_norm": 54.24024963378906,
"learning_rate": 1.9411426473854687e-08,
"logits/chosen": 1.3305864334106445,
"logits/rejected": 1.324970006942749,
"loss": 0.9144,
"step": 587
},
{
"beta_dpo/beta": 0.10519608110189438,
"beta_dpo/beta_margin_grad_mean": -0.3480878472328186,
"beta_dpo/beta_margin_grad_std": 0.24397340416908264,
"beta_dpo/beta_margin_mean": 0.7981663942337036,
"beta_dpo/beta_margin_std": 1.4612581729888916,
"beta_dpo/beta_used": 0.10519608110189438,
"beta_dpo/beta_used_raw": 0.10519608110189438,
"beta_dpo/gap_mean": 8.550680160522461,
"beta_dpo/gap_std": 15.71683120727539,
"beta_dpo/loss_margin_mean": 7.717644214630127,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8888888888888888,
"grad_norm": 22.78832244873047,
"learning_rate": 1.890382096832699e-08,
"logits/chosen": 1.1842995882034302,
"logits/rejected": 1.1582131385803223,
"loss": 0.9884,
"step": 588
},
{
"beta_dpo/beta": 0.1493714153766632,
"beta_dpo/beta_margin_grad_mean": -0.30983808636665344,
"beta_dpo/beta_margin_grad_std": 0.25290393829345703,
"beta_dpo/beta_margin_mean": 1.4465526342391968,
"beta_dpo/beta_margin_std": 2.117835283279419,
"beta_dpo/beta_used": 0.1493714153766632,
"beta_dpo/beta_used_raw": 0.1493714153766632,
"beta_dpo/gap_mean": 8.739282608032227,
"beta_dpo/gap_std": 15.474782943725586,
"beta_dpo/loss_margin_mean": 10.057303428649902,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.890400604686319,
"grad_norm": 29.803503036499023,
"learning_rate": 1.840267971970344e-08,
"logits/chosen": 1.0262922048568726,
"logits/rejected": 1.0313639640808105,
"loss": 0.7967,
"step": 589
},
{
"beta_dpo/beta": 0.1304425448179245,
"beta_dpo/beta_margin_grad_mean": -0.28577888011932373,
"beta_dpo/beta_margin_grad_std": 0.23786579072475433,
"beta_dpo/beta_margin_mean": 1.4526952505111694,
"beta_dpo/beta_margin_std": 1.8333840370178223,
"beta_dpo/beta_used": 0.1304425448179245,
"beta_dpo/beta_used_raw": 0.1304425448179245,
"beta_dpo/gap_mean": 9.032739639282227,
"beta_dpo/gap_std": 14.90475845336914,
"beta_dpo/loss_margin_mean": 10.960782051086426,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.891912320483749,
"grad_norm": 30.006053924560547,
"learning_rate": 1.7908016745981856e-08,
"logits/chosen": 1.282177448272705,
"logits/rejected": 1.2717326879501343,
"loss": 0.8333,
"step": 590
},
{
"beta_dpo/beta": 0.09730789065361023,
"beta_dpo/beta_margin_grad_mean": -0.3583000600337982,
"beta_dpo/beta_margin_grad_std": 0.24750088155269623,
"beta_dpo/beta_margin_mean": 1.3048549890518188,
"beta_dpo/beta_margin_std": 2.435183048248291,
"beta_dpo/beta_used": 0.09730789065361023,
"beta_dpo/beta_used_raw": 0.022840075194835663,
"beta_dpo/gap_mean": 9.362573623657227,
"beta_dpo/gap_std": 14.858987808227539,
"beta_dpo/loss_margin_mean": 8.98420238494873,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8934240362811792,
"grad_norm": 28.505207061767578,
"learning_rate": 1.7419845883949098e-08,
"logits/chosen": 1.1761068105697632,
"logits/rejected": 1.1118135452270508,
"loss": 1.1498,
"step": 591
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49769604206085205,
"beta_dpo/beta_margin_grad_std": 0.004002981819212437,
"beta_dpo/beta_margin_mean": 0.009216600097715855,
"beta_dpo/beta_margin_std": 0.016013581305742264,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.06022973731160164,
"beta_dpo/gap_mean": 9.103561401367188,
"beta_dpo/gap_std": 15.066686630249023,
"beta_dpo/loss_margin_mean": 9.21660041809082,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8949357520786092,
"grad_norm": 0.33266639709472656,
"learning_rate": 1.6938180788793556e-08,
"logits/chosen": 1.1416816711425781,
"logits/rejected": 1.0328766107559204,
"loss": 1.3799,
"step": 592
},
{
"beta_dpo/beta": 0.1512017697095871,
"beta_dpo/beta_margin_grad_mean": -0.37732359766960144,
"beta_dpo/beta_margin_grad_std": 0.2817494869232178,
"beta_dpo/beta_margin_mean": 1.483587622642517,
"beta_dpo/beta_margin_std": 3.5750889778137207,
"beta_dpo/beta_used": 0.1512017697095871,
"beta_dpo/beta_used_raw": 0.1512017697095871,
"beta_dpo/gap_mean": 9.142255783081055,
"beta_dpo/gap_std": 15.243630409240723,
"beta_dpo/loss_margin_mean": 9.367238998413086,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8964474678760394,
"grad_norm": 46.93597412109375,
"learning_rate": 1.6463034933723336e-08,
"logits/chosen": 1.2500877380371094,
"logits/rejected": 1.1615597009658813,
"loss": 1.061,
"step": 593
},
{
"beta_dpo/beta": 0.06817789375782013,
"beta_dpo/beta_margin_grad_mean": -0.42273271083831787,
"beta_dpo/beta_margin_grad_std": 0.22297364473342896,
"beta_dpo/beta_margin_mean": 0.5673213601112366,
"beta_dpo/beta_margin_std": 1.5483680963516235,
"beta_dpo/beta_used": 0.06817789375782013,
"beta_dpo/beta_used_raw": 0.042327724397182465,
"beta_dpo/gap_mean": 8.802755355834961,
"beta_dpo/gap_std": 15.327518463134766,
"beta_dpo/loss_margin_mean": 5.755950927734375,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8979591836734694,
"grad_norm": 21.160133361816406,
"learning_rate": 1.5994421609589385e-08,
"logits/chosen": 1.3079917430877686,
"logits/rejected": 1.2591397762298584,
"loss": 1.1284,
"step": 594
},
{
"beta_dpo/beta": 0.2774176597595215,
"beta_dpo/beta_margin_grad_mean": -0.24613875150680542,
"beta_dpo/beta_margin_grad_std": 0.3464008867740631,
"beta_dpo/beta_margin_mean": 2.811112403869629,
"beta_dpo/beta_margin_std": 4.953824520111084,
"beta_dpo/beta_used": 0.2774176597595215,
"beta_dpo/beta_used_raw": 0.2774176597595215,
"beta_dpo/gap_mean": 8.735965728759766,
"beta_dpo/gap_std": 15.631080627441406,
"beta_dpo/loss_margin_mean": 10.130765914916992,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8994708994708994,
"grad_norm": 64.07139587402344,
"learning_rate": 1.553235392451377e-08,
"logits/chosen": 1.4716193675994873,
"logits/rejected": 1.3438546657562256,
"loss": 0.8868,
"step": 595
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4990660846233368,
"beta_dpo/beta_margin_grad_std": 0.0036789732985198498,
"beta_dpo/beta_margin_mean": 0.0037360715214163065,
"beta_dpo/beta_margin_std": 0.014717076905071735,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.18999116122722626,
"beta_dpo/gap_mean": 8.070295333862305,
"beta_dpo/gap_std": 15.721334457397461,
"beta_dpo/loss_margin_mean": 3.7360713481903076,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9009826152683296,
"grad_norm": 0.2939727306365967,
"learning_rate": 1.507684480352292e-08,
"logits/chosen": 1.0870261192321777,
"logits/rejected": 1.1027809381484985,
"loss": 1.3831,
"step": 596
},
{
"beta_dpo/beta": 0.10379099100828171,
"beta_dpo/beta_margin_grad_mean": -0.3936522901058197,
"beta_dpo/beta_margin_grad_std": 0.22852064669132233,
"beta_dpo/beta_margin_mean": 0.9076488614082336,
"beta_dpo/beta_margin_std": 1.9588474035263062,
"beta_dpo/beta_used": 0.10379099100828171,
"beta_dpo/beta_used_raw": 0.05178498104214668,
"beta_dpo/gap_mean": 8.036808013916016,
"beta_dpo/gap_std": 15.484565734863281,
"beta_dpo/loss_margin_mean": 9.03473949432373,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9024943310657596,
"grad_norm": 24.972312927246094,
"learning_rate": 1.4627906988186111e-08,
"logits/chosen": 1.3164136409759521,
"logits/rejected": 1.314997911453247,
"loss": 1.0269,
"step": 597
},
{
"beta_dpo/beta": 0.01767176389694214,
"beta_dpo/beta_margin_grad_mean": -0.4686887562274933,
"beta_dpo/beta_margin_grad_std": 0.0846300721168518,
"beta_dpo/beta_margin_mean": 0.1313055008649826,
"beta_dpo/beta_margin_std": 0.3668515980243683,
"beta_dpo/beta_used": 0.01767176389694214,
"beta_dpo/beta_used_raw": -0.05600941181182861,
"beta_dpo/gap_mean": 8.039358139038086,
"beta_dpo/gap_std": 15.246614456176758,
"beta_dpo/loss_margin_mean": 8.034171104431152,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9040060468631897,
"grad_norm": 7.187092304229736,
"learning_rate": 1.4185553036259095e-08,
"logits/chosen": 1.18892240524292,
"logits/rejected": 1.112982988357544,
"loss": 1.2838,
"step": 598
},
{
"beta_dpo/beta": 0.08740982413291931,
"beta_dpo/beta_margin_grad_mean": -0.41112831234931946,
"beta_dpo/beta_margin_grad_std": 0.2523919939994812,
"beta_dpo/beta_margin_mean": 0.6939030885696411,
"beta_dpo/beta_margin_std": 2.0126750469207764,
"beta_dpo/beta_used": 0.08740982413291931,
"beta_dpo/beta_used_raw": 0.03378527611494064,
"beta_dpo/gap_mean": 7.967884063720703,
"beta_dpo/gap_std": 15.234304428100586,
"beta_dpo/loss_margin_mean": 7.490134239196777,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9055177626606198,
"grad_norm": 27.660696029663086,
"learning_rate": 1.3749795321332885e-08,
"logits/chosen": 1.2530118227005005,
"logits/rejected": 1.2126647233963013,
"loss": 1.1492,
"step": 599
},
{
"beta_dpo/beta": 0.11240988969802856,
"beta_dpo/beta_margin_grad_mean": -0.3938973546028137,
"beta_dpo/beta_margin_grad_std": 0.25913897156715393,
"beta_dpo/beta_margin_mean": 0.6383807063102722,
"beta_dpo/beta_margin_std": 1.723300814628601,
"beta_dpo/beta_used": 0.11240988969802856,
"beta_dpo/beta_used_raw": 0.11240988969802856,
"beta_dpo/gap_mean": 7.678271293640137,
"beta_dpo/gap_std": 15.040021896362305,
"beta_dpo/loss_margin_mean": 6.692630290985107,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9070294784580499,
"grad_norm": 31.70339012145996,
"learning_rate": 1.3320646032487393e-08,
"logits/chosen": 1.3531250953674316,
"logits/rejected": 1.2455155849456787,
"loss": 1.0426,
"step": 600
},
{
"epoch": 0.9070294784580499,
"eval_beta_dpo/beta": 0.16284236311912537,
"eval_beta_dpo/beta_margin_grad_mean": -0.34278053045272827,
"eval_beta_dpo/beta_margin_grad_std": 0.24819053709506989,
"eval_beta_dpo/beta_margin_mean": 1.6614328622817993,
"eval_beta_dpo/beta_margin_std": 2.518134117126465,
"eval_beta_dpo/beta_used": 0.16284236311912537,
"eval_beta_dpo/beta_used_raw": 0.1531587541103363,
"eval_beta_dpo/gap_mean": 7.747578144073486,
"eval_beta_dpo/gap_std": 14.899542808532715,
"eval_beta_dpo/loss_margin_mean": 8.63355827331543,
"eval_beta_dpo/mask_keep_frac": 1.0,
"eval_logits/chosen": 1.2070788145065308,
"eval_logits/rejected": 1.14027738571167,
"eval_loss": 0.6518335938453674,
"eval_runtime": 43.528,
"eval_samples_per_second": 52.909,
"eval_steps_per_second": 1.654,
"step": 600
},
{
"beta_dpo/beta": 0.10141826421022415,
"beta_dpo/beta_margin_grad_mean": -0.34344714879989624,
"beta_dpo/beta_margin_grad_std": 0.22835154831409454,
"beta_dpo/beta_margin_mean": 1.0757205486297607,
"beta_dpo/beta_margin_std": 1.7910301685333252,
"beta_dpo/beta_used": 0.10141826421022415,
"beta_dpo/beta_used_raw": 0.10141826421022415,
"beta_dpo/gap_mean": 8.107170104980469,
"beta_dpo/gap_std": 14.828914642333984,
"beta_dpo/loss_margin_mean": 9.79207992553711,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.90854119425548,
"grad_norm": 22.663358688354492,
"learning_rate": 1.2898117173950868e-08,
"logits/chosen": 1.166858196258545,
"logits/rejected": 1.0885355472564697,
"loss": 1.0032,
"step": 601
},
{
"beta_dpo/beta": 0.1556924283504486,
"beta_dpo/beta_margin_grad_mean": -0.29175105690956116,
"beta_dpo/beta_margin_grad_std": 0.25710615515708923,
"beta_dpo/beta_margin_mean": 1.5223388671875,
"beta_dpo/beta_margin_std": 2.0580081939697266,
"beta_dpo/beta_used": 0.1556924283504486,
"beta_dpo/beta_used_raw": 0.1556924283504486,
"beta_dpo/gap_mean": 8.32609748840332,
"beta_dpo/gap_std": 14.536138534545898,
"beta_dpo/loss_margin_mean": 9.849860191345215,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.91005291005291,
"grad_norm": 32.89277267456055,
"learning_rate": 1.2482220564763667e-08,
"logits/chosen": 1.137499213218689,
"logits/rejected": 1.0821897983551025,
"loss": 0.8122,
"step": 602
},
{
"beta_dpo/beta": 0.03461594507098198,
"beta_dpo/beta_margin_grad_mean": -0.4290250837802887,
"beta_dpo/beta_margin_grad_std": 0.13414107263088226,
"beta_dpo/beta_margin_mean": 0.33947205543518066,
"beta_dpo/beta_margin_std": 0.6727907061576843,
"beta_dpo/beta_used": 0.03461594507098198,
"beta_dpo/beta_used_raw": -0.00844324380159378,
"beta_dpo/gap_mean": 8.558271408081055,
"beta_dpo/gap_std": 14.459668159484863,
"beta_dpo/loss_margin_mean": 8.81813907623291,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9115646258503401,
"grad_norm": 11.329524040222168,
"learning_rate": 1.2072967838448051e-08,
"logits/chosen": 0.8011248707771301,
"logits/rejected": 0.7711565494537354,
"loss": 1.1882,
"step": 603
},
{
"beta_dpo/beta": 0.13645920157432556,
"beta_dpo/beta_margin_grad_mean": -0.3494945764541626,
"beta_dpo/beta_margin_grad_std": 0.27078843116760254,
"beta_dpo/beta_margin_mean": 1.1950139999389648,
"beta_dpo/beta_margin_std": 2.49851655960083,
"beta_dpo/beta_used": 0.13645920157432556,
"beta_dpo/beta_used_raw": 0.13645920157432556,
"beta_dpo/gap_mean": 8.562814712524414,
"beta_dpo/gap_std": 14.682696342468262,
"beta_dpo/loss_margin_mean": 8.797730445861816,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9130763416477702,
"grad_norm": 42.889583587646484,
"learning_rate": 1.1670370442682459e-08,
"logits/chosen": 1.1775734424591064,
"logits/rejected": 1.1526458263397217,
"loss": 1.0029,
"step": 604
},
{
"beta_dpo/beta": 0.024525772780179977,
"beta_dpo/beta_margin_grad_mean": -0.46467721462249756,
"beta_dpo/beta_margin_grad_std": 0.12513820827007294,
"beta_dpo/beta_margin_mean": 0.16219019889831543,
"beta_dpo/beta_margin_std": 0.5986112356185913,
"beta_dpo/beta_used": 0.024525772780179977,
"beta_dpo/beta_used_raw": -0.03071369044482708,
"beta_dpo/gap_mean": 8.287137985229492,
"beta_dpo/gap_std": 14.952404975891113,
"beta_dpo/loss_margin_mean": 6.617243766784668,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9145880574452003,
"grad_norm": 9.076241493225098,
"learning_rate": 1.1274439638981532e-08,
"logits/chosen": 1.2894868850708008,
"logits/rejected": 1.2614383697509766,
"loss": 1.2515,
"step": 605
},
{
"beta_dpo/beta": 0.09253361076116562,
"beta_dpo/beta_margin_grad_mean": -0.37834733724594116,
"beta_dpo/beta_margin_grad_std": 0.23104941844940186,
"beta_dpo/beta_margin_mean": 1.1032108068466187,
"beta_dpo/beta_margin_std": 2.277953624725342,
"beta_dpo/beta_used": 0.09253361076116562,
"beta_dpo/beta_used_raw": 0.07488581538200378,
"beta_dpo/gap_mean": 8.222452163696289,
"beta_dpo/gap_std": 14.61074447631836,
"beta_dpo/loss_margin_mean": 9.209124565124512,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9160997732426304,
"grad_norm": 27.21750831604004,
"learning_rate": 1.0885186502381016e-08,
"logits/chosen": 1.1829588413238525,
"logits/rejected": 1.117699384689331,
"loss": 1.0745,
"step": 606
},
{
"beta_dpo/beta": 0.0583856962621212,
"beta_dpo/beta_margin_grad_mean": -0.3807748556137085,
"beta_dpo/beta_margin_grad_std": 0.19601111114025116,
"beta_dpo/beta_margin_mean": 0.7777360677719116,
"beta_dpo/beta_margin_std": 1.383155345916748,
"beta_dpo/beta_used": 0.0583856962621212,
"beta_dpo/beta_used_raw": 0.0036773681640625,
"beta_dpo/gap_mean": 8.679821968078613,
"beta_dpo/gap_std": 14.6058931350708,
"beta_dpo/loss_margin_mean": 8.788156509399414,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9176114890400605,
"grad_norm": 20.968740463256836,
"learning_rate": 1.0502621921127774e-08,
"logits/chosen": 1.1087911128997803,
"logits/rejected": 1.060031533241272,
"loss": 1.1452,
"step": 607
},
{
"beta_dpo/beta": 0.03544434905052185,
"beta_dpo/beta_margin_grad_mean": -0.46062541007995605,
"beta_dpo/beta_margin_grad_std": 0.12345908582210541,
"beta_dpo/beta_margin_mean": 0.17531032860279083,
"beta_dpo/beta_margin_std": 0.54575514793396,
"beta_dpo/beta_used": 0.03544434905052185,
"beta_dpo/beta_used_raw": 0.03544434905052185,
"beta_dpo/gap_mean": 7.942313194274902,
"beta_dpo/gap_std": 14.522319793701172,
"beta_dpo/loss_margin_mean": 5.154706954956055,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9191232048374905,
"grad_norm": 14.060853004455566,
"learning_rate": 1.0126756596375685e-08,
"logits/chosen": 1.320879340171814,
"logits/rejected": 1.1834135055541992,
"loss": 1.2106,
"step": 608
},
{
"beta_dpo/beta": 0.24092288315296173,
"beta_dpo/beta_margin_grad_mean": -0.28901004791259766,
"beta_dpo/beta_margin_grad_std": 0.2751197814941406,
"beta_dpo/beta_margin_mean": 2.6792190074920654,
"beta_dpo/beta_margin_std": 4.407725811004639,
"beta_dpo/beta_used": 0.24092288315296173,
"beta_dpo/beta_used_raw": 0.24092288315296173,
"beta_dpo/gap_mean": 8.123943328857422,
"beta_dpo/gap_std": 14.249687194824219,
"beta_dpo/loss_margin_mean": 8.886117935180664,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9206349206349206,
"grad_norm": 43.7921257019043,
"learning_rate": 9.757601041885694e-09,
"logits/chosen": 1.4151957035064697,
"logits/rejected": 1.3399848937988281,
"loss": 0.7838,
"step": 609
},
{
"beta_dpo/beta": 0.08768744766712189,
"beta_dpo/beta_margin_grad_mean": -0.37437552213668823,
"beta_dpo/beta_margin_grad_std": 0.24727308750152588,
"beta_dpo/beta_margin_mean": 0.8633731007575989,
"beta_dpo/beta_margin_std": 1.718218445777893,
"beta_dpo/beta_used": 0.08768744766712189,
"beta_dpo/beta_used_raw": 0.08768744766712189,
"beta_dpo/gap_mean": 8.23539924621582,
"beta_dpo/gap_std": 14.590499877929688,
"beta_dpo/loss_margin_mean": 9.4933500289917,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9221466364323507,
"grad_norm": 31.680171966552734,
"learning_rate": 9.395165583732379e-09,
"logits/chosen": 1.3596689701080322,
"logits/rejected": 1.2988369464874268,
"loss": 1.1331,
"step": 610
},
{
"beta_dpo/beta": 0.0011600707657635212,
"beta_dpo/beta_margin_grad_mean": -0.49776604771614075,
"beta_dpo/beta_margin_grad_std": 0.004311481025069952,
"beta_dpo/beta_margin_mean": 0.00893681962043047,
"beta_dpo/beta_margin_std": 0.017248092219233513,
"beta_dpo/beta_used": 0.0011600707657635212,
"beta_dpo/beta_used_raw": -0.03458194062113762,
"beta_dpo/gap_mean": 8.225613594055176,
"beta_dpo/gap_std": 14.824548721313477,
"beta_dpo/loss_margin_mean": 7.506056308746338,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9236583522297808,
"grad_norm": 0.3966653347015381,
"learning_rate": 9.03946036001449e-09,
"logits/chosen": 1.2817761898040771,
"logits/rejected": 1.2514748573303223,
"loss": 1.3793,
"step": 611
},
{
"beta_dpo/beta": 0.14685246348381042,
"beta_dpo/beta_margin_grad_mean": -0.3220314681529999,
"beta_dpo/beta_margin_grad_std": 0.2983168363571167,
"beta_dpo/beta_margin_mean": 1.4434245824813843,
"beta_dpo/beta_margin_std": 2.5013015270233154,
"beta_dpo/beta_used": 0.14685246348381042,
"beta_dpo/beta_used_raw": 0.14685246348381042,
"beta_dpo/gap_mean": 8.368420600891113,
"beta_dpo/gap_std": 15.056795120239258,
"beta_dpo/loss_margin_mean": 9.817299842834473,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9251700680272109,
"grad_norm": 29.391437530517578,
"learning_rate": 8.690495320571839e-09,
"logits/chosen": 1.2257752418518066,
"logits/rejected": 1.165148138999939,
"loss": 0.8716,
"step": 612
},
{
"beta_dpo/beta": 0.42597997188568115,
"beta_dpo/beta_margin_grad_mean": -0.22234027087688446,
"beta_dpo/beta_margin_grad_std": 0.35421091318130493,
"beta_dpo/beta_margin_mean": 5.603759765625,
"beta_dpo/beta_margin_std": 7.522528171539307,
"beta_dpo/beta_used": 0.42597997188568115,
"beta_dpo/beta_used_raw": 0.42597997188568115,
"beta_dpo/gap_mean": 8.936678886413574,
"beta_dpo/gap_std": 15.329559326171875,
"beta_dpo/loss_margin_mean": 12.598337173461914,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.926681783824641,
"grad_norm": 62.93752670288086,
"learning_rate": 8.348280226706722e-09,
"logits/chosen": 1.1820428371429443,
"logits/rejected": 1.2033995389938354,
"loss": 0.58,
"step": 613
},
{
"beta_dpo/beta": 0.09866636246442795,
"beta_dpo/beta_margin_grad_mean": -0.35599809885025024,
"beta_dpo/beta_margin_grad_std": 0.24741540849208832,
"beta_dpo/beta_margin_mean": 0.7811317443847656,
"beta_dpo/beta_margin_std": 1.3711615800857544,
"beta_dpo/beta_used": 0.09866636246442795,
"beta_dpo/beta_used_raw": 0.09866636246442795,
"beta_dpo/gap_mean": 9.042693138122559,
"beta_dpo/gap_std": 15.193923950195312,
"beta_dpo/loss_margin_mean": 7.999011516571045,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9281934996220711,
"grad_norm": 26.360315322875977,
"learning_rate": 8.012824650910937e-09,
"logits/chosen": 1.4054988622665405,
"logits/rejected": 1.3883737325668335,
"loss": 0.9817,
"step": 614
},
{
"beta_dpo/beta": 0.09118252992630005,
"beta_dpo/beta_margin_grad_mean": -0.35918137431144714,
"beta_dpo/beta_margin_grad_std": 0.24969178438186646,
"beta_dpo/beta_margin_mean": 0.8582610487937927,
"beta_dpo/beta_margin_std": 1.515799880027771,
"beta_dpo/beta_used": 0.09118252992630005,
"beta_dpo/beta_used_raw": 0.09118252992630005,
"beta_dpo/gap_mean": 9.102291107177734,
"beta_dpo/gap_std": 15.299556732177734,
"beta_dpo/loss_margin_mean": 9.143074989318848,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9297052154195011,
"grad_norm": 28.177825927734375,
"learning_rate": 7.684137976598088e-09,
"logits/chosen": 1.1988410949707031,
"logits/rejected": 1.130389928817749,
"loss": 1.0275,
"step": 615
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49804696440696716,
"beta_dpo/beta_margin_grad_std": 0.003881829557940364,
"beta_dpo/beta_margin_mean": 0.007812697440385818,
"beta_dpo/beta_margin_std": 0.015528511255979538,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.08658087253570557,
"beta_dpo/gap_mean": 8.891166687011719,
"beta_dpo/gap_std": 15.544229507446289,
"beta_dpo/loss_margin_mean": 7.812697410583496,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9312169312169312,
"grad_norm": 0.43700990080833435,
"learning_rate": 7.36222939784098e-09,
"logits/chosen": 1.1711599826812744,
"logits/rejected": 1.0668097734451294,
"loss": 1.3806,
"step": 616
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.498007595539093,
"beta_dpo/beta_margin_grad_std": 0.0031482342164963484,
"beta_dpo/beta_margin_mean": 0.007969984784722328,
"beta_dpo/beta_margin_std": 0.012593930587172508,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.033831048756837845,
"beta_dpo/gap_mean": 8.742076873779297,
"beta_dpo/gap_std": 15.113273620605469,
"beta_dpo/loss_margin_mean": 7.969984531402588,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9327286470143613,
"grad_norm": 0.3276781439781189,
"learning_rate": 7.047107919114586e-09,
"logits/chosen": 1.1438710689544678,
"logits/rejected": 1.1049525737762451,
"loss": 1.3798,
"step": 617
},
{
"beta_dpo/beta": 0.024464260786771774,
"beta_dpo/beta_margin_grad_mean": -0.4384101927280426,
"beta_dpo/beta_margin_grad_std": 0.12578776478767395,
"beta_dpo/beta_margin_mean": 0.2871224582195282,
"beta_dpo/beta_margin_std": 0.5958966016769409,
"beta_dpo/beta_used": 0.024464260786771774,
"beta_dpo/beta_used_raw": -0.026629796251654625,
"beta_dpo/gap_mean": 8.742508888244629,
"beta_dpo/gap_std": 15.030193328857422,
"beta_dpo/loss_margin_mean": 8.431224822998047,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9342403628117913,
"grad_norm": 9.483170509338379,
"learning_rate": 6.738782355044048e-09,
"logits/chosen": 1.2045261859893799,
"logits/rejected": 1.0894981622695923,
"loss": 1.2553,
"step": 618
},
{
"beta_dpo/beta": 0.2674694061279297,
"beta_dpo/beta_margin_grad_mean": -0.3007112145423889,
"beta_dpo/beta_margin_grad_std": 0.32441580295562744,
"beta_dpo/beta_margin_mean": 2.8940837383270264,
"beta_dpo/beta_margin_std": 5.033651828765869,
"beta_dpo/beta_used": 0.2674694061279297,
"beta_dpo/beta_used_raw": 0.2674694061279297,
"beta_dpo/gap_mean": 8.856648445129395,
"beta_dpo/gap_std": 15.28053092956543,
"beta_dpo/loss_margin_mean": 9.983675956726074,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9357520786092215,
"grad_norm": 63.246665954589844,
"learning_rate": 6.437261330158206e-09,
"logits/chosen": 1.392554521560669,
"logits/rejected": 1.2864084243774414,
"loss": 0.801,
"step": 619
},
{
"beta_dpo/beta": 0.08081783354282379,
"beta_dpo/beta_margin_grad_mean": -0.3847609758377075,
"beta_dpo/beta_margin_grad_std": 0.22657740116119385,
"beta_dpo/beta_margin_mean": 0.9509609341621399,
"beta_dpo/beta_margin_std": 1.8485666513442993,
"beta_dpo/beta_used": 0.08081783354282379,
"beta_dpo/beta_used_raw": 0.021886445581912994,
"beta_dpo/gap_mean": 8.459717750549316,
"beta_dpo/gap_std": 15.082178115844727,
"beta_dpo/loss_margin_mean": 7.745196342468262,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9372637944066515,
"grad_norm": 24.837596893310547,
"learning_rate": 6.142553278648238e-09,
"logits/chosen": 1.6400712728500366,
"logits/rejected": 1.645829439163208,
"loss": 1.0802,
"step": 620
},
{
"beta_dpo/beta": 0.11531262844800949,
"beta_dpo/beta_margin_grad_mean": -0.3503705561161041,
"beta_dpo/beta_margin_grad_std": 0.25440457463264465,
"beta_dpo/beta_margin_mean": 0.9773139953613281,
"beta_dpo/beta_margin_std": 1.751634120941162,
"beta_dpo/beta_used": 0.11531262844800949,
"beta_dpo/beta_used_raw": 0.11531262844800949,
"beta_dpo/gap_mean": 8.3915433883667,
"beta_dpo/gap_std": 14.808649063110352,
"beta_dpo/loss_margin_mean": 7.840453147888184,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9387755102040817,
"grad_norm": 28.006378173828125,
"learning_rate": 5.854666444131934e-09,
"logits/chosen": 1.398921251296997,
"logits/rejected": 1.2527947425842285,
"loss": 1.0032,
"step": 621
},
{
"beta_dpo/beta": 0.1956021785736084,
"beta_dpo/beta_margin_grad_mean": -0.3046272099018097,
"beta_dpo/beta_margin_grad_std": 0.3041934072971344,
"beta_dpo/beta_margin_mean": 2.0099432468414307,
"beta_dpo/beta_margin_std": 3.8020520210266113,
"beta_dpo/beta_used": 0.1956021785736084,
"beta_dpo/beta_used_raw": 0.1956021785736084,
"beta_dpo/gap_mean": 8.671491622924805,
"beta_dpo/gap_std": 14.802274703979492,
"beta_dpo/loss_margin_mean": 9.858983993530273,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9402872260015117,
"grad_norm": 49.47909927368164,
"learning_rate": 5.573608879422875e-09,
"logits/chosen": 1.1854503154754639,
"logits/rejected": 1.1152417659759521,
"loss": 1.0806,
"step": 622
},
{
"beta_dpo/beta": 0.06421691924333572,
"beta_dpo/beta_margin_grad_mean": -0.4157204031944275,
"beta_dpo/beta_margin_grad_std": 0.1975506693124771,
"beta_dpo/beta_margin_mean": 0.5457938313484192,
"beta_dpo/beta_margin_std": 1.3326746225357056,
"beta_dpo/beta_used": 0.06421691924333572,
"beta_dpo/beta_used_raw": 0.040875114500522614,
"beta_dpo/gap_mean": 8.688669204711914,
"beta_dpo/gap_std": 14.794839859008789,
"beta_dpo/loss_margin_mean": 8.007980346679688,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9417989417989417,
"grad_norm": 21.249887466430664,
"learning_rate": 5.299388446305342e-09,
"logits/chosen": 1.0395543575286865,
"logits/rejected": 0.9585334658622742,
"loss": 1.0954,
"step": 623
},
{
"beta_dpo/beta": 0.17201095819473267,
"beta_dpo/beta_margin_grad_mean": -0.2755333483219147,
"beta_dpo/beta_margin_grad_std": 0.2739318907260895,
"beta_dpo/beta_margin_mean": 2.029400587081909,
"beta_dpo/beta_margin_std": 3.091554641723633,
"beta_dpo/beta_used": 0.17201095819473267,
"beta_dpo/beta_used_raw": 0.17201095819473267,
"beta_dpo/gap_mean": 9.086647033691406,
"beta_dpo/gap_std": 14.942371368408203,
"beta_dpo/loss_margin_mean": 11.444154739379883,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9433106575963719,
"grad_norm": 50.13128662109375,
"learning_rate": 5.03201281531429e-09,
"logits/chosen": 1.2946569919586182,
"logits/rejected": 1.168874979019165,
"loss": 0.8639,
"step": 624
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49886149168014526,
"beta_dpo/beta_margin_grad_std": 0.0035701736342161894,
"beta_dpo/beta_margin_mean": 0.004554293118417263,
"beta_dpo/beta_margin_std": 0.014281506650149822,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.17264024913311005,
"beta_dpo/gap_mean": 8.407045364379883,
"beta_dpo/gap_std": 14.818851470947266,
"beta_dpo/loss_margin_mean": 4.554293155670166,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9448223733938019,
"grad_norm": 0.32094186544418335,
"learning_rate": 4.7714894655209174e-09,
"logits/chosen": 1.2408746480941772,
"logits/rejected": 1.1343789100646973,
"loss": 1.3825,
"step": 625
},
{
"beta_dpo/beta": 0.07005998492240906,
"beta_dpo/beta_margin_grad_mean": -0.44059884548187256,
"beta_dpo/beta_margin_grad_std": 0.2527449429035187,
"beta_dpo/beta_margin_mean": 0.6223716139793396,
"beta_dpo/beta_margin_std": 2.1118099689483643,
"beta_dpo/beta_used": 0.07005998492240906,
"beta_dpo/beta_used_raw": -0.004164740443229675,
"beta_dpo/gap_mean": 8.391353607177734,
"beta_dpo/gap_std": 15.319479942321777,
"beta_dpo/loss_margin_mean": 9.000487327575684,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9463340891912321,
"grad_norm": 36.80985641479492,
"learning_rate": 4.517825684323323e-09,
"logits/chosen": 1.2944220304489136,
"logits/rejected": 1.1371581554412842,
"loss": 1.3288,
"step": 626
},
{
"beta_dpo/beta": 0.07213520258665085,
"beta_dpo/beta_margin_grad_mean": -0.38978075981140137,
"beta_dpo/beta_margin_grad_std": 0.20375964045524597,
"beta_dpo/beta_margin_mean": 0.7643184065818787,
"beta_dpo/beta_margin_std": 1.5277169942855835,
"beta_dpo/beta_used": 0.07213520258665085,
"beta_dpo/beta_used_raw": 0.04761495813727379,
"beta_dpo/gap_mean": 8.814528465270996,
"beta_dpo/gap_std": 15.30355453491211,
"beta_dpo/loss_margin_mean": 11.308126449584961,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9478458049886621,
"grad_norm": 21.390562057495117,
"learning_rate": 4.271028567242818e-09,
"logits/chosen": 1.2814748287200928,
"logits/rejected": 1.1207035779953003,
"loss": 1.0657,
"step": 627
},
{
"beta_dpo/beta": 0.21702194213867188,
"beta_dpo/beta_margin_grad_mean": -0.28619271516799927,
"beta_dpo/beta_margin_grad_std": 0.35538989305496216,
"beta_dpo/beta_margin_mean": 2.4516913890838623,
"beta_dpo/beta_margin_std": 4.084339141845703,
"beta_dpo/beta_used": 0.21702194213867188,
"beta_dpo/beta_used_raw": 0.21702194213867188,
"beta_dpo/gap_mean": 9.10338306427002,
"beta_dpo/gap_std": 16.06523323059082,
"beta_dpo/loss_margin_mean": 11.028809547424316,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9493575207860923,
"grad_norm": 71.84501647949219,
"learning_rate": 4.0311050177251895e-09,
"logits/chosen": 1.2058839797973633,
"logits/rejected": 1.2187294960021973,
"loss": 1.1868,
"step": 628
},
{
"beta_dpo/beta": 0.040276795625686646,
"beta_dpo/beta_margin_grad_mean": -0.42245370149612427,
"beta_dpo/beta_margin_grad_std": 0.1697092205286026,
"beta_dpo/beta_margin_mean": 0.4575227200984955,
"beta_dpo/beta_margin_std": 1.0323092937469482,
"beta_dpo/beta_used": 0.040276795625686646,
"beta_dpo/beta_used_raw": 0.03951174020767212,
"beta_dpo/gap_mean": 9.401752471923828,
"beta_dpo/gap_std": 15.793481826782227,
"beta_dpo/loss_margin_mean": 9.8300199508667,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9508692365835223,
"grad_norm": 13.410911560058594,
"learning_rate": 3.798061746947995e-09,
"logits/chosen": 1.3186110258102417,
"logits/rejected": 1.2518651485443115,
"loss": 1.1975,
"step": 629
},
{
"beta_dpo/beta": 0.1225634217262268,
"beta_dpo/beta_margin_grad_mean": -0.37748420238494873,
"beta_dpo/beta_margin_grad_std": 0.25147491693496704,
"beta_dpo/beta_margin_mean": 1.265876293182373,
"beta_dpo/beta_margin_std": 2.6474194526672363,
"beta_dpo/beta_used": 0.1225634217262268,
"beta_dpo/beta_used_raw": 0.1225634217262268,
"beta_dpo/gap_mean": 9.436227798461914,
"beta_dpo/gap_std": 15.730070114135742,
"beta_dpo/loss_margin_mean": 9.3777494430542,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9523809523809523,
"grad_norm": 34.975379943847656,
"learning_rate": 3.5719052736323806e-09,
"logits/chosen": 1.0783604383468628,
"logits/rejected": 1.0250047445297241,
"loss": 1.0213,
"step": 630
},
{
"beta_dpo/beta": 0.09131479263305664,
"beta_dpo/beta_margin_grad_mean": -0.34115666151046753,
"beta_dpo/beta_margin_grad_std": 0.2506091594696045,
"beta_dpo/beta_margin_mean": 0.923401415348053,
"beta_dpo/beta_margin_std": 1.4584547281265259,
"beta_dpo/beta_used": 0.09131479263305664,
"beta_dpo/beta_used_raw": 0.09131479263305664,
"beta_dpo/gap_mean": 9.782768249511719,
"beta_dpo/gap_std": 15.912504196166992,
"beta_dpo/loss_margin_mean": 10.662687301635742,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9538926681783825,
"grad_norm": 23.484634399414062,
"learning_rate": 3.352641923861144e-09,
"logits/chosen": 1.3594732284545898,
"logits/rejected": 1.2386384010314941,
"loss": 0.9504,
"step": 631
},
{
"beta_dpo/beta": 0.189894437789917,
"beta_dpo/beta_margin_grad_mean": -0.31988996267318726,
"beta_dpo/beta_margin_grad_std": 0.3233727812767029,
"beta_dpo/beta_margin_mean": 1.8445459604263306,
"beta_dpo/beta_margin_std": 3.505603790283203,
"beta_dpo/beta_used": 0.189894437789917,
"beta_dpo/beta_used_raw": 0.189894437789917,
"beta_dpo/gap_mean": 9.691600799560547,
"beta_dpo/gap_std": 16.004518508911133,
"beta_dpo/loss_margin_mean": 9.54232406616211,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9554043839758125,
"grad_norm": 43.28837966918945,
"learning_rate": 3.140277830901428e-09,
"logits/chosen": 1.2820234298706055,
"logits/rejected": 1.2324151992797852,
"loss": 0.8801,
"step": 632
},
{
"beta_dpo/beta": 0.061312876641750336,
"beta_dpo/beta_margin_grad_mean": -0.36580348014831543,
"beta_dpo/beta_margin_grad_std": 0.18776066601276398,
"beta_dpo/beta_margin_mean": 0.7045513987541199,
"beta_dpo/beta_margin_std": 1.044027328491211,
"beta_dpo/beta_used": 0.061312876641750336,
"beta_dpo/beta_used_raw": 0.061312876641750336,
"beta_dpo/gap_mean": 9.78476333618164,
"beta_dpo/gap_std": 15.973169326782227,
"beta_dpo/loss_margin_mean": 11.0767822265625,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9569160997732427,
"grad_norm": 16.645475387573242,
"learning_rate": 2.9348189350335007e-09,
"logits/chosen": 1.1682333946228027,
"logits/rejected": 1.1017271280288696,
"loss": 1.0502,
"step": 633
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49894630908966064,
"beta_dpo/beta_margin_grad_std": 0.0039056446403265,
"beta_dpo/beta_margin_mean": 0.004215083085000515,
"beta_dpo/beta_margin_std": 0.015623592771589756,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.17823907732963562,
"beta_dpo/gap_mean": 9.161681175231934,
"beta_dpo/gap_std": 16.027673721313477,
"beta_dpo/loss_margin_mean": 4.21508264541626,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9584278155706727,
"grad_norm": 0.32190123200416565,
"learning_rate": 2.736270983384276e-09,
"logits/chosen": 1.1025863885879517,
"logits/rejected": 1.10835862159729,
"loss": 1.3818,
"step": 634
},
{
"beta_dpo/beta": 0.02330617606639862,
"beta_dpo/beta_margin_grad_mean": -0.46098482608795166,
"beta_dpo/beta_margin_grad_std": 0.11248224973678589,
"beta_dpo/beta_margin_mean": 0.17786218225955963,
"beta_dpo/beta_margin_std": 0.5247640013694763,
"beta_dpo/beta_used": 0.02330617606639862,
"beta_dpo/beta_used_raw": -0.025716857984662056,
"beta_dpo/gap_mean": 8.629072189331055,
"beta_dpo/gap_std": 15.859984397888184,
"beta_dpo/loss_margin_mean": 7.23160457611084,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9599395313681028,
"grad_norm": 9.109109878540039,
"learning_rate": 2.5446395297668287e-09,
"logits/chosen": 1.237937331199646,
"logits/rejected": 1.1790293455123901,
"loss": 1.2575,
"step": 635
},
{
"beta_dpo/beta": 0.21303845942020416,
"beta_dpo/beta_margin_grad_mean": -0.2767031490802765,
"beta_dpo/beta_margin_grad_std": 0.2881619930267334,
"beta_dpo/beta_margin_mean": 2.355743646621704,
"beta_dpo/beta_margin_std": 3.89518404006958,
"beta_dpo/beta_used": 0.21303845942020416,
"beta_dpo/beta_used_raw": 0.21303845942020416,
"beta_dpo/gap_mean": 8.832971572875977,
"beta_dpo/gap_std": 15.4896240234375,
"beta_dpo/loss_margin_mean": 10.791437149047852,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9614512471655329,
"grad_norm": 53.85847854614258,
"learning_rate": 2.359929934524829e-09,
"logits/chosen": 1.0027883052825928,
"logits/rejected": 0.8940525054931641,
"loss": 0.9391,
"step": 636
},
{
"beta_dpo/beta": 0.057768288999795914,
"beta_dpo/beta_margin_grad_mean": -0.4107164740562439,
"beta_dpo/beta_margin_grad_std": 0.19992104172706604,
"beta_dpo/beta_margin_mean": 0.639686644077301,
"beta_dpo/beta_margin_std": 1.4747357368469238,
"beta_dpo/beta_used": 0.057768288999795914,
"beta_dpo/beta_used_raw": 0.039929118007421494,
"beta_dpo/gap_mean": 8.947272300720215,
"beta_dpo/gap_std": 15.440877914428711,
"beta_dpo/loss_margin_mean": 9.36134147644043,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9629629629629629,
"grad_norm": 18.227508544921875,
"learning_rate": 2.1821473643827137e-09,
"logits/chosen": 1.2675358057022095,
"logits/rejected": 1.1809782981872559,
"loss": 1.1807,
"step": 637
},
{
"beta_dpo/beta": 0.13519155979156494,
"beta_dpo/beta_margin_grad_mean": -0.353465735912323,
"beta_dpo/beta_margin_grad_std": 0.2931253910064697,
"beta_dpo/beta_margin_mean": 1.101943016052246,
"beta_dpo/beta_margin_std": 2.174018144607544,
"beta_dpo/beta_used": 0.13519155979156494,
"beta_dpo/beta_used_raw": 0.13519155979156494,
"beta_dpo/gap_mean": 8.85607624053955,
"beta_dpo/gap_std": 15.520221710205078,
"beta_dpo/loss_margin_mean": 8.02245044708252,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9644746787603931,
"grad_norm": 34.01590347290039,
"learning_rate": 2.0112967923011646e-09,
"logits/chosen": 1.355816125869751,
"logits/rejected": 1.2652667760849,
"loss": 0.8726,
"step": 638
},
{
"beta_dpo/beta": 0.13248920440673828,
"beta_dpo/beta_margin_grad_mean": -0.3459218740463257,
"beta_dpo/beta_margin_grad_std": 0.2797539234161377,
"beta_dpo/beta_margin_mean": 1.3880894184112549,
"beta_dpo/beta_margin_std": 2.7971689701080322,
"beta_dpo/beta_used": 0.13248920440673828,
"beta_dpo/beta_used_raw": 0.13248920440673828,
"beta_dpo/gap_mean": 8.968145370483398,
"beta_dpo/gap_std": 15.500995635986328,
"beta_dpo/loss_margin_mean": 9.871132850646973,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9659863945578231,
"grad_norm": 36.6432991027832,
"learning_rate": 1.847382997337943e-09,
"logits/chosen": 0.9688248634338379,
"logits/rejected": 0.8566447496414185,
"loss": 0.9835,
"step": 639
},
{
"beta_dpo/beta": 0.0449477955698967,
"beta_dpo/beta_margin_grad_mean": -0.4395362138748169,
"beta_dpo/beta_margin_grad_std": 0.1477995216846466,
"beta_dpo/beta_margin_mean": 0.25694775581359863,
"beta_dpo/beta_margin_std": 0.7032347917556763,
"beta_dpo/beta_used": 0.0449477955698967,
"beta_dpo/beta_used_raw": 0.0449477955698967,
"beta_dpo/gap_mean": 8.584091186523438,
"beta_dpo/gap_std": 15.505561828613281,
"beta_dpo/loss_margin_mean": 5.612953186035156,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9674981103552532,
"grad_norm": 14.231745719909668,
"learning_rate": 1.690410564514244e-09,
"logits/chosen": 1.1696255207061768,
"logits/rejected": 1.0740346908569336,
"loss": 1.1325,
"step": 640
},
{
"beta_dpo/beta": 0.07587745040655136,
"beta_dpo/beta_margin_grad_mean": -0.40199270844459534,
"beta_dpo/beta_margin_grad_std": 0.21523261070251465,
"beta_dpo/beta_margin_mean": 0.6574034094810486,
"beta_dpo/beta_margin_std": 1.5263322591781616,
"beta_dpo/beta_used": 0.07587745040655136,
"beta_dpo/beta_used_raw": 0.02500532567501068,
"beta_dpo/gap_mean": 8.408025741577148,
"beta_dpo/gap_std": 15.306009292602539,
"beta_dpo/loss_margin_mean": 8.222708702087402,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9690098261526833,
"grad_norm": 25.847951889038086,
"learning_rate": 1.5403838846864692e-09,
"logits/chosen": 1.0766488313674927,
"logits/rejected": 1.095979928970337,
"loss": 1.0703,
"step": 641
},
{
"beta_dpo/beta": 0.08892130851745605,
"beta_dpo/beta_margin_grad_mean": -0.3497072160243988,
"beta_dpo/beta_margin_grad_std": 0.22195076942443848,
"beta_dpo/beta_margin_mean": 1.0798953771591187,
"beta_dpo/beta_margin_std": 1.7261310815811157,
"beta_dpo/beta_used": 0.08892130851745605,
"beta_dpo/beta_used_raw": 0.08892130851745605,
"beta_dpo/gap_mean": 8.763275146484375,
"beta_dpo/gap_std": 15.263031005859375,
"beta_dpo/loss_margin_mean": 9.946568489074707,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9705215419501134,
"grad_norm": 23.498695373535156,
"learning_rate": 1.3973071544233218e-09,
"logits/chosen": 1.5003564357757568,
"logits/rejected": 1.4900686740875244,
"loss": 1.036,
"step": 642
},
{
"beta_dpo/beta": 0.1755274385213852,
"beta_dpo/beta_margin_grad_mean": -0.31771519780158997,
"beta_dpo/beta_margin_grad_std": 0.31326013803482056,
"beta_dpo/beta_margin_mean": 1.7572897672653198,
"beta_dpo/beta_margin_std": 3.0267927646636963,
"beta_dpo/beta_used": 0.1755274385213852,
"beta_dpo/beta_used_raw": 0.1755274385213852,
"beta_dpo/gap_mean": 8.862909317016602,
"beta_dpo/gap_std": 15.451555252075195,
"beta_dpo/loss_margin_mean": 9.407503128051758,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9720332577475435,
"grad_norm": 47.7587776184082,
"learning_rate": 1.261184375888541e-09,
"logits/chosen": 1.3585262298583984,
"logits/rejected": 1.189072608947754,
"loss": 0.9789,
"step": 643
},
{
"beta_dpo/beta": 0.12742942571640015,
"beta_dpo/beta_margin_grad_mean": -0.3882279396057129,
"beta_dpo/beta_margin_grad_std": 0.29966428875923157,
"beta_dpo/beta_margin_mean": 0.8566918969154358,
"beta_dpo/beta_margin_std": 2.1793432235717773,
"beta_dpo/beta_used": 0.12742942571640015,
"beta_dpo/beta_used_raw": 0.12742942571640015,
"beta_dpo/gap_mean": 8.460193634033203,
"beta_dpo/gap_std": 15.709514617919922,
"beta_dpo/loss_margin_mean": 6.808633327484131,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9735449735449735,
"grad_norm": 33.20065689086914,
"learning_rate": 1.1320193567288527e-09,
"logits/chosen": 1.296072006225586,
"logits/rejected": 1.2559714317321777,
"loss": 1.0307,
"step": 644
},
{
"beta_dpo/beta": 0.2287611961364746,
"beta_dpo/beta_margin_grad_mean": -0.2579849362373352,
"beta_dpo/beta_margin_grad_std": 0.29286816716194153,
"beta_dpo/beta_margin_mean": 2.571612596511841,
"beta_dpo/beta_margin_std": 3.502375602722168,
"beta_dpo/beta_used": 0.2287611961364746,
"beta_dpo/beta_used_raw": 0.2287611961364746,
"beta_dpo/gap_mean": 8.823458671569824,
"beta_dpo/gap_std": 15.624380111694336,
"beta_dpo/loss_margin_mean": 11.115163803100586,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9750566893424036,
"grad_norm": 53.97190475463867,
"learning_rate": 1.0098157099674987e-09,
"logits/chosen": 1.3393789529800415,
"logits/rejected": 1.3428921699523926,
"loss": 0.7942,
"step": 645
},
{
"beta_dpo/beta": 0.046885035932064056,
"beta_dpo/beta_margin_grad_mean": -0.4096597731113434,
"beta_dpo/beta_margin_grad_std": 0.15604624152183533,
"beta_dpo/beta_margin_mean": 0.4211799204349518,
"beta_dpo/beta_margin_std": 0.8159288763999939,
"beta_dpo/beta_used": 0.046885035932064056,
"beta_dpo/beta_used_raw": 0.046885035932064056,
"beta_dpo/gap_mean": 8.929242134094238,
"beta_dpo/gap_std": 15.510902404785156,
"beta_dpo/loss_margin_mean": 9.470359802246094,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9765684051398337,
"grad_norm": 14.007503509521484,
"learning_rate": 8.945768539031783e-10,
"logits/chosen": 1.050734519958496,
"logits/rejected": 1.0254426002502441,
"loss": 1.1076,
"step": 646
},
{
"beta_dpo/beta": 0.30722373723983765,
"beta_dpo/beta_margin_grad_mean": -0.1448328197002411,
"beta_dpo/beta_margin_grad_std": 0.23842579126358032,
"beta_dpo/beta_margin_mean": 4.167687892913818,
"beta_dpo/beta_margin_std": 4.095274925231934,
"beta_dpo/beta_used": 0.30722373723983765,
"beta_dpo/beta_used_raw": 0.30722373723983765,
"beta_dpo/gap_mean": 9.657175064086914,
"beta_dpo/gap_std": 15.229113578796387,
"beta_dpo/loss_margin_mean": 13.549036026000977,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9780801209372638,
"grad_norm": 44.486412048339844,
"learning_rate": 7.863060120144316e-10,
"logits/chosen": 1.2251076698303223,
"logits/rejected": 1.1599578857421875,
"loss": 0.3313,
"step": 647
},
{
"beta_dpo/beta": 0.051618948578834534,
"beta_dpo/beta_margin_grad_mean": -0.41473251581192017,
"beta_dpo/beta_margin_grad_std": 0.20193688571453094,
"beta_dpo/beta_margin_mean": 0.48395270109176636,
"beta_dpo/beta_margin_std": 1.189092755317688,
"beta_dpo/beta_used": 0.051618948578834534,
"beta_dpo/beta_used_raw": 0.04244516044855118,
"beta_dpo/gap_mean": 9.734169006347656,
"beta_dpo/gap_std": 14.963768005371094,
"beta_dpo/loss_margin_mean": 8.589853286743164,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9795918367346939,
"grad_norm": 18.04836082458496,
"learning_rate": 6.850062128694045e-10,
"logits/chosen": 1.2029109001159668,
"logits/rejected": 1.1225261688232422,
"loss": 1.1111,
"step": 648
},
{
"beta_dpo/beta": 0.21300108730793,
"beta_dpo/beta_margin_grad_mean": -0.29137375950813293,
"beta_dpo/beta_margin_grad_std": 0.319355309009552,
"beta_dpo/beta_margin_mean": 2.1214795112609863,
"beta_dpo/beta_margin_std": 4.1887431144714355,
"beta_dpo/beta_used": 0.21300108730793,
"beta_dpo/beta_used_raw": 0.21300108730793,
"beta_dpo/gap_mean": 9.660211563110352,
"beta_dpo/gap_std": 15.06142807006836,
"beta_dpo/loss_margin_mean": 9.629173278808594,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.981103552532124,
"grad_norm": 47.07633972167969,
"learning_rate": 5.906802900412788e-10,
"logits/chosen": 1.3177015781402588,
"logits/rejected": 1.2371814250946045,
"loss": 0.6641,
"step": 649
},
{
"beta_dpo/beta": 0.18543455004692078,
"beta_dpo/beta_margin_grad_mean": -0.2878350019454956,
"beta_dpo/beta_margin_grad_std": 0.3191271126270294,
"beta_dpo/beta_margin_mean": 1.8004777431488037,
"beta_dpo/beta_margin_std": 2.68178653717041,
"beta_dpo/beta_used": 0.18543455004692078,
"beta_dpo/beta_used_raw": 0.18543455004692078,
"beta_dpo/gap_mean": 9.650936126708984,
"beta_dpo/gap_std": 14.927940368652344,
"beta_dpo/loss_margin_mean": 9.712797164916992,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.982615268329554,
"grad_norm": 41.930545806884766,
"learning_rate": 5.033308820289184e-10,
"logits/chosen": 1.3555996417999268,
"logits/rejected": 1.2612521648406982,
"loss": 0.8971,
"step": 650
},
{
"beta_dpo/beta": 0.05952965468168259,
"beta_dpo/beta_margin_grad_mean": -0.3843025863170624,
"beta_dpo/beta_margin_grad_std": 0.18352816998958588,
"beta_dpo/beta_margin_mean": 0.5975887775421143,
"beta_dpo/beta_margin_std": 0.9953944087028503,
"beta_dpo/beta_used": 0.05952965468168259,
"beta_dpo/beta_used_raw": 0.05952965468168259,
"beta_dpo/gap_mean": 9.495718002319336,
"beta_dpo/gap_std": 14.702865600585938,
"beta_dpo/loss_margin_mean": 9.376497268676758,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9841269841269841,
"grad_norm": 16.7528133392334,
"learning_rate": 4.2296043218295606e-10,
"logits/chosen": 1.25199294090271,
"logits/rejected": 1.124768853187561,
"loss": 1.0683,
"step": 651
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4982442855834961,
"beta_dpo/beta_margin_grad_std": 0.004070946015417576,
"beta_dpo/beta_margin_mean": 0.007023526821285486,
"beta_dpo/beta_margin_std": 0.016285018995404243,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.08277405798435211,
"beta_dpo/gap_mean": 9.258039474487305,
"beta_dpo/gap_std": 15.01901912689209,
"beta_dpo/loss_margin_mean": 7.023526191711426,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9856386999244142,
"grad_norm": 0.3672993779182434,
"learning_rate": 3.4957118863768176e-10,
"logits/chosen": 1.0834195613861084,
"logits/rejected": 1.1000077724456787,
"loss": 1.3801,
"step": 652
},
{
"beta_dpo/beta": 0.08435218036174774,
"beta_dpo/beta_margin_grad_mean": -0.3591226637363434,
"beta_dpo/beta_margin_grad_std": 0.2340759038925171,
"beta_dpo/beta_margin_mean": 0.7783568501472473,
"beta_dpo/beta_margin_std": 1.355913519859314,
"beta_dpo/beta_used": 0.08435218036174774,
"beta_dpo/beta_used_raw": 0.08435218036174774,
"beta_dpo/gap_mean": 9.125565528869629,
"beta_dpo/gap_std": 15.28455924987793,
"beta_dpo/loss_margin_mean": 9.264640808105469,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9871504157218443,
"grad_norm": 22.500999450683594,
"learning_rate": 2.831652042480093e-10,
"logits/chosen": 1.1946804523468018,
"logits/rejected": 1.1475324630737305,
"loss": 1.0293,
"step": 653
},
{
"beta_dpo/beta": 0.09523887932300568,
"beta_dpo/beta_margin_grad_mean": -0.3858698308467865,
"beta_dpo/beta_margin_grad_std": 0.2553237974643707,
"beta_dpo/beta_margin_mean": 1.1672877073287964,
"beta_dpo/beta_margin_std": 2.5366644859313965,
"beta_dpo/beta_used": 0.09523887932300568,
"beta_dpo/beta_used_raw": -0.015430465340614319,
"beta_dpo/gap_mean": 9.186834335327148,
"beta_dpo/gap_std": 15.484939575195312,
"beta_dpo/loss_margin_mean": 8.130887985229492,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9886621315192744,
"grad_norm": 27.98388671875,
"learning_rate": 2.2374433653205016e-10,
"logits/chosen": 1.151197075843811,
"logits/rejected": 0.9994099140167236,
"loss": 1.0911,
"step": 654
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49734944105148315,
"beta_dpo/beta_margin_grad_std": 0.00388297438621521,
"beta_dpo/beta_margin_mean": 0.010603162460029125,
"beta_dpo/beta_margin_std": 0.015533708967268467,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.01662539876997471,
"beta_dpo/gap_mean": 9.309677124023438,
"beta_dpo/gap_std": 15.503271102905273,
"beta_dpo/loss_margin_mean": 10.603161811828613,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9901738473167044,
"grad_norm": 0.3120063245296478,
"learning_rate": 1.7131024761923852e-10,
"logits/chosen": 1.1806678771972656,
"logits/rejected": 1.039980173110962,
"loss": 1.379,
"step": 655
},
{
"beta_dpo/beta": 0.08966214954853058,
"beta_dpo/beta_margin_grad_mean": -0.346645712852478,
"beta_dpo/beta_margin_grad_std": 0.21018153429031372,
"beta_dpo/beta_margin_mean": 0.82259601354599,
"beta_dpo/beta_margin_std": 1.2134861946105957,
"beta_dpo/beta_used": 0.08966214954853058,
"beta_dpo/beta_used_raw": 0.08966214954853058,
"beta_dpo/gap_mean": 9.15340805053711,
"beta_dpo/gap_std": 15.19101333618164,
"beta_dpo/loss_margin_mean": 9.192506790161133,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9916855631141346,
"grad_norm": 20.03281021118164,
"learning_rate": 1.2586440420372934e-10,
"logits/chosen": 1.1405537128448486,
"logits/rejected": 1.0787684917449951,
"loss": 0.8857,
"step": 656
},
{
"beta_dpo/beta": 0.08845320343971252,
"beta_dpo/beta_margin_grad_mean": -0.3202848732471466,
"beta_dpo/beta_margin_grad_std": 0.22860798239707947,
"beta_dpo/beta_margin_mean": 1.0682189464569092,
"beta_dpo/beta_margin_std": 1.452836513519287,
"beta_dpo/beta_used": 0.08845320343971252,
"beta_dpo/beta_used_raw": 0.08845320343971252,
"beta_dpo/gap_mean": 9.614997863769531,
"beta_dpo/gap_std": 15.207011222839355,
"beta_dpo/loss_margin_mean": 12.184465408325195,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9931972789115646,
"grad_norm": 25.735523223876953,
"learning_rate": 8.740807750345913e-11,
"logits/chosen": 1.3117201328277588,
"logits/rejected": 1.1885137557983398,
"loss": 0.9697,
"step": 657
},
{
"beta_dpo/beta": 0.08655724674463272,
"beta_dpo/beta_margin_grad_mean": -0.3984954059123993,
"beta_dpo/beta_margin_grad_std": 0.2469259351491928,
"beta_dpo/beta_margin_mean": 0.8934859037399292,
"beta_dpo/beta_margin_std": 2.263308525085449,
"beta_dpo/beta_used": 0.08655724674463272,
"beta_dpo/beta_used_raw": 0.04852905124425888,
"beta_dpo/gap_mean": 9.4644775390625,
"beta_dpo/gap_std": 15.579830169677734,
"beta_dpo/loss_margin_mean": 8.258312225341797,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9947089947089947,
"grad_norm": 25.514381408691406,
"learning_rate": 5.594234322453539e-11,
"logits/chosen": 1.261268138885498,
"logits/rejected": 1.220792293548584,
"loss": 1.0962,
"step": 658
},
{
"beta_dpo/beta": 0.034201011061668396,
"beta_dpo/beta_margin_grad_mean": -0.43599990010261536,
"beta_dpo/beta_margin_grad_std": 0.17003847658634186,
"beta_dpo/beta_margin_mean": 0.3266862630844116,
"beta_dpo/beta_margin_std": 0.9077629446983337,
"beta_dpo/beta_used": 0.034201011061668396,
"beta_dpo/beta_used_raw": -0.03887367993593216,
"beta_dpo/gap_mean": 9.18392276763916,
"beta_dpo/gap_std": 15.714540481567383,
"beta_dpo/loss_margin_mean": 6.176858425140381,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9962207105064248,
"grad_norm": 10.678601264953613,
"learning_rate": 3.146808153123293e-11,
"logits/chosen": 1.3522672653198242,
"logits/rejected": 1.2511718273162842,
"loss": 1.215,
"step": 659
},
{
"beta_dpo/beta": 0.29667574167251587,
"beta_dpo/beta_margin_grad_mean": -0.23005616664886475,
"beta_dpo/beta_margin_grad_std": 0.3280402719974518,
"beta_dpo/beta_margin_mean": 3.1850943565368652,
"beta_dpo/beta_margin_std": 3.9281609058380127,
"beta_dpo/beta_used": 0.29667574167251587,
"beta_dpo/beta_used_raw": 0.29667574167251587,
"beta_dpo/gap_mean": 9.110960006713867,
"beta_dpo/gap_std": 15.258062362670898,
"beta_dpo/loss_margin_mean": 10.734999656677246,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9977324263038548,
"grad_norm": 56.866981506347656,
"learning_rate": 1.3985977021235829e-11,
"logits/chosen": 1.2931430339813232,
"logits/rejected": 1.200553059577942,
"loss": 0.8159,
"step": 660
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4985651671886444,
"beta_dpo/beta_margin_grad_std": 0.004238836467266083,
"beta_dpo/beta_margin_mean": 0.00573985418304801,
"beta_dpo/beta_margin_std": 0.016957050189375877,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.14001157879829407,
"beta_dpo/gap_mean": 8.619951248168945,
"beta_dpo/gap_std": 15.422819137573242,
"beta_dpo/loss_margin_mean": 5.739853858947754,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.999244142101285,
"grad_norm": 0.30782225728034973,
"learning_rate": 3.4965187065971735e-12,
"logits/chosen": 1.1916680335998535,
"logits/rejected": 1.0985240936279297,
"loss": 1.3817,
"step": 661
},
{
"epoch": 0.999244142101285,
"step": 661,
"total_flos": 0.0,
"train_loss": 1.1644051905929953,
"train_runtime": 2186.2877,
"train_samples_per_second": 19.364,
"train_steps_per_second": 0.302
}
],
"logging_steps": 1,
"max_steps": 661,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}