{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.999244142101285, "eval_steps": 100, "global_step": 661, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "beta_dpo/beta": 0.09873995184898376, "beta_dpo/beta_margin_grad_mean": -0.5021346807479858, "beta_dpo/beta_margin_grad_std": 0.008621793240308762, "beta_dpo/beta_margin_mean": -0.00854283757507801, "beta_dpo/beta_margin_std": 0.034500423818826675, "beta_dpo/beta_used": 0.09873995184898376, "beta_dpo/beta_used_raw": 0.09873995184898376, "beta_dpo/gap_mean": -0.009267467074096203, "beta_dpo/gap_std": 0.05077784135937691, "beta_dpo/loss_margin_mean": -0.08983081579208374, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.0015117157974300832, "grad_norm": 21.607027053833008, "learning_rate": 0.0, "logits/chosen": 1.4594056606292725, "logits/rejected": 1.4684147834777832, "loss": 1.3891, "step": 1 }, { "beta_dpo/beta": 0.09919409453868866, "beta_dpo/beta_margin_grad_mean": -0.5008031725883484, "beta_dpo/beta_margin_grad_std": 0.009846841916441917, "beta_dpo/beta_margin_mean": -0.0032096824143081903, "beta_dpo/beta_margin_std": 0.03941287845373154, "beta_dpo/beta_used": 0.09919409453868866, "beta_dpo/beta_used_raw": 0.09919409453868866, "beta_dpo/gap_mean": -0.0228734128177166, "beta_dpo/gap_std": 0.110123410820961, "beta_dpo/loss_margin_mean": -0.0322224497795105, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.0030234315948601664, "grad_norm": 20.140541076660156, "learning_rate": 7.462686567164179e-09, "logits/chosen": 1.4544942378997803, "logits/rejected": 1.4357258081436157, "loss": 1.39, "step": 2 }, { "beta_dpo/beta": 0.10366719961166382, "beta_dpo/beta_margin_grad_mean": -0.5002375245094299, "beta_dpo/beta_margin_grad_std": 0.011844536289572716, "beta_dpo/beta_margin_mean": -0.0009551587863825262, "beta_dpo/beta_margin_std": 0.04740604758262634, "beta_dpo/beta_used": 0.10366719961166382, "beta_dpo/beta_used_raw": 0.10366719961166382, "beta_dpo/gap_mean": -0.02084210142493248, "beta_dpo/gap_std": 0.17627675831317902, "beta_dpo/loss_margin_mean": -0.01047566533088684, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.0045351473922902496, "grad_norm": 24.431331634521484, "learning_rate": 1.4925373134328357e-08, "logits/chosen": 1.4479323625564575, "logits/rejected": 1.4154329299926758, "loss": 1.3822, "step": 3 }, { "beta_dpo/beta": 0.09961278736591339, "beta_dpo/beta_margin_grad_mean": -0.4990696609020233, "beta_dpo/beta_margin_grad_std": 0.010926141403615475, "beta_dpo/beta_margin_mean": 0.003728417446836829, "beta_dpo/beta_margin_std": 0.04374876245856285, "beta_dpo/beta_used": 0.09961278736591339, "beta_dpo/beta_used_raw": 0.09961278736591339, "beta_dpo/gap_mean": -0.009441482834517956, "beta_dpo/gap_std": 0.22680673003196716, "beta_dpo/loss_margin_mean": 0.03748854994773865, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.006046863189720333, "grad_norm": 24.549968719482422, "learning_rate": 2.2388059701492534e-08, "logits/chosen": 1.4826452732086182, "logits/rejected": 1.4577124118804932, "loss": 1.388, "step": 4 }, { "beta_dpo/beta": 0.0999765694141388, "beta_dpo/beta_margin_grad_mean": -0.5009123086929321, "beta_dpo/beta_margin_grad_std": 0.00964893214404583, "beta_dpo/beta_margin_mean": -0.003652930725365877, "beta_dpo/beta_margin_std": 0.03861139714717865, "beta_dpo/beta_used": 0.0999765694141388, "beta_dpo/beta_used_raw": 0.0999765694141388, "beta_dpo/gap_mean": -0.012933394871652126, "beta_dpo/gap_std": 0.26322224736213684, "beta_dpo/loss_margin_mean": -0.03674338757991791, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.007558578987150416, "grad_norm": 24.02710723876953, "learning_rate": 2.9850746268656714e-08, "logits/chosen": 1.430014967918396, "logits/rejected": 1.422844648361206, "loss": 1.3878, "step": 5 }, { "beta_dpo/beta": 0.09759774804115295, "beta_dpo/beta_margin_grad_mean": -0.49920761585235596, "beta_dpo/beta_margin_grad_std": 0.009975293651223183, "beta_dpo/beta_margin_mean": 0.0031740572303533554, "beta_dpo/beta_margin_std": 0.03991897776722908, "beta_dpo/beta_used": 0.09759774804115295, "beta_dpo/beta_used_raw": 0.09759774804115295, "beta_dpo/gap_mean": -0.008989489637315273, "beta_dpo/gap_std": 0.28406012058258057, "beta_dpo/loss_margin_mean": 0.032966673374176025, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.009070294784580499, "grad_norm": 22.517269134521484, "learning_rate": 3.731343283582089e-08, "logits/chosen": 1.7289844751358032, "logits/rejected": 1.681814193725586, "loss": 1.3912, "step": 6 }, { "beta_dpo/beta": 0.09864137321710587, "beta_dpo/beta_margin_grad_mean": -0.4999319911003113, "beta_dpo/beta_margin_grad_std": 0.010195241309702396, "beta_dpo/beta_margin_mean": 0.0002745148667600006, "beta_dpo/beta_margin_std": 0.040805213153362274, "beta_dpo/beta_used": 0.09864137321710587, "beta_dpo/beta_used_raw": 0.09864137321710587, "beta_dpo/gap_mean": 0.0004895327147096395, "beta_dpo/gap_std": 0.3131392300128937, "beta_dpo/loss_margin_mean": 0.0023331642150878906, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.010582010582010581, "grad_norm": 22.483606338500977, "learning_rate": 4.477611940298507e-08, "logits/chosen": 1.2273149490356445, "logits/rejected": 1.2026118040084839, "loss": 1.3886, "step": 7 }, { "beta_dpo/beta": 0.09902673959732056, "beta_dpo/beta_margin_grad_mean": -0.501477062702179, "beta_dpo/beta_margin_grad_std": 0.010539776645600796, "beta_dpo/beta_margin_mean": -0.005913248751312494, "beta_dpo/beta_margin_std": 0.04218650981783867, "beta_dpo/beta_used": 0.09902673959732056, "beta_dpo/beta_used_raw": 0.09902673959732056, "beta_dpo/gap_mean": -0.013057660311460495, "beta_dpo/gap_std": 0.33594292402267456, "beta_dpo/loss_margin_mean": -0.05997839570045471, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.012093726379440665, "grad_norm": 23.100969314575195, "learning_rate": 5.223880597014925e-08, "logits/chosen": 1.413482427597046, "logits/rejected": 1.429722547531128, "loss": 1.3894, "step": 8 }, { "beta_dpo/beta": 0.10009001195430756, "beta_dpo/beta_margin_grad_mean": -0.5018208622932434, "beta_dpo/beta_margin_grad_std": 0.010021732188761234, "beta_dpo/beta_margin_mean": -0.007285799831151962, "beta_dpo/beta_margin_std": 0.04011598229408264, "beta_dpo/beta_used": 0.10009001195430756, "beta_dpo/beta_used_raw": 0.10009001195430756, "beta_dpo/gap_mean": -0.021802250295877457, "beta_dpo/gap_std": 0.3482493460178375, "beta_dpo/loss_margin_mean": -0.07284319400787354, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.013605442176870748, "grad_norm": 23.189462661743164, "learning_rate": 5.970149253731343e-08, "logits/chosen": 1.6902210712432861, "logits/rejected": 1.6170001029968262, "loss": 1.3883, "step": 9 }, { "beta_dpo/beta": 0.10585639625787735, "beta_dpo/beta_margin_grad_mean": -0.49980345368385315, "beta_dpo/beta_margin_grad_std": 0.01263737864792347, "beta_dpo/beta_margin_mean": 0.0007830193499103189, "beta_dpo/beta_margin_std": 0.05059384927153587, "beta_dpo/beta_used": 0.10585639625787735, "beta_dpo/beta_used_raw": 0.10585639625787735, "beta_dpo/gap_mean": -0.02278057485818863, "beta_dpo/gap_std": 0.36068111658096313, "beta_dpo/loss_margin_mean": 0.005588918924331665, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.015117157974300832, "grad_norm": 25.323102951049805, "learning_rate": 6.71641791044776e-08, "logits/chosen": 1.6256885528564453, "logits/rejected": 1.5875918865203857, "loss": 1.3784, "step": 10 }, { "beta_dpo/beta": 0.10278887301683426, "beta_dpo/beta_margin_grad_mean": -0.4984298646450043, "beta_dpo/beta_margin_grad_std": 0.010885908268392086, "beta_dpo/beta_margin_mean": 0.006281617563217878, "beta_dpo/beta_margin_std": 0.04356975108385086, "beta_dpo/beta_used": 0.10278887301683426, "beta_dpo/beta_used_raw": 0.10278887301683426, "beta_dpo/gap_mean": -0.004942757543176413, "beta_dpo/gap_std": 0.3796635866165161, "beta_dpo/loss_margin_mean": 0.061300128698349, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.016628873771730914, "grad_norm": 24.30197525024414, "learning_rate": 7.462686567164178e-08, "logits/chosen": 1.2691853046417236, "logits/rejected": 1.2496408224105835, "loss": 1.3823, "step": 11 }, { "beta_dpo/beta": 0.09832623600959778, "beta_dpo/beta_margin_grad_mean": -0.4999140799045563, "beta_dpo/beta_margin_grad_std": 0.009057173505425453, "beta_dpo/beta_margin_mean": 0.00034457247238606215, "beta_dpo/beta_margin_std": 0.03624521940946579, "beta_dpo/beta_used": 0.09832623600959778, "beta_dpo/beta_used_raw": 0.09832623600959778, "beta_dpo/gap_mean": -0.004351671785116196, "beta_dpo/gap_std": 0.3818763792514801, "beta_dpo/loss_margin_mean": 0.002451568841934204, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.018140589569160998, "grad_norm": 22.108863830566406, "learning_rate": 8.208955223880596e-08, "logits/chosen": 1.3517913818359375, "logits/rejected": 1.3473531007766724, "loss": 1.3895, "step": 12 }, { "beta_dpo/beta": 0.09917062520980835, "beta_dpo/beta_margin_grad_mean": -0.5009226202964783, "beta_dpo/beta_margin_grad_std": 0.010060025379061699, "beta_dpo/beta_margin_mean": -0.003692339640110731, "beta_dpo/beta_margin_std": 0.04025454819202423, "beta_dpo/beta_used": 0.09917062520980835, "beta_dpo/beta_used_raw": 0.09917062520980835, "beta_dpo/gap_mean": -0.00805948581546545, "beta_dpo/gap_std": 0.3838382959365845, "beta_dpo/loss_margin_mean": -0.03742155432701111, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.019652305366591082, "grad_norm": 22.049461364746094, "learning_rate": 8.955223880597014e-08, "logits/chosen": 1.3082160949707031, "logits/rejected": 1.2636491060256958, "loss": 1.3887, "step": 13 }, { "beta_dpo/beta": 0.10183389484882355, "beta_dpo/beta_margin_grad_mean": -0.4999306797981262, "beta_dpo/beta_margin_grad_std": 0.00923539325594902, "beta_dpo/beta_margin_mean": 0.00027715094620361924, "beta_dpo/beta_margin_std": 0.036956243216991425, "beta_dpo/beta_used": 0.10183389484882355, "beta_dpo/beta_used_raw": 0.10183389484882355, "beta_dpo/gap_mean": -0.006332115735858679, "beta_dpo/gap_std": 0.3803662955760956, "beta_dpo/loss_margin_mean": 0.0024544596672058105, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.021164021164021163, "grad_norm": 25.343242645263672, "learning_rate": 9.701492537313432e-08, "logits/chosen": 1.3965303897857666, "logits/rejected": 1.3902784585952759, "loss": 1.384, "step": 14 }, { "beta_dpo/beta": 0.0993453860282898, "beta_dpo/beta_margin_grad_mean": -0.5009843707084656, "beta_dpo/beta_margin_grad_std": 0.012391936965286732, "beta_dpo/beta_margin_mean": -0.003941703587770462, "beta_dpo/beta_margin_std": 0.04959738627076149, "beta_dpo/beta_used": 0.0993453860282898, "beta_dpo/beta_used_raw": 0.0993453860282898, "beta_dpo/gap_mean": -0.01158633641898632, "beta_dpo/gap_std": 0.3963480591773987, "beta_dpo/loss_margin_mean": -0.03971347212791443, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.022675736961451247, "grad_norm": 23.105030059814453, "learning_rate": 1.044776119402985e-07, "logits/chosen": 1.5179550647735596, "logits/rejected": 1.477945327758789, "loss": 1.3889, "step": 15 }, { "beta_dpo/beta": 0.10452497750520706, "beta_dpo/beta_margin_grad_mean": -0.4994063079357147, "beta_dpo/beta_margin_grad_std": 0.009892878122627735, "beta_dpo/beta_margin_mean": 0.002374407835304737, "beta_dpo/beta_margin_std": 0.03958987817168236, "beta_dpo/beta_used": 0.10452497750520706, "beta_dpo/beta_used_raw": 0.10452497750520706, "beta_dpo/gap_mean": -0.004868443123996258, "beta_dpo/gap_std": 0.3979250192642212, "beta_dpo/loss_margin_mean": 0.02236151695251465, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.02418745275888133, "grad_norm": 22.657752990722656, "learning_rate": 1.1194029850746268e-07, "logits/chosen": 1.6581084728240967, "logits/rejected": 1.6047765016555786, "loss": 1.3791, "step": 16 }, { "beta_dpo/beta": 0.09847256541252136, "beta_dpo/beta_margin_grad_mean": -0.5005159974098206, "beta_dpo/beta_margin_grad_std": 0.009893263690173626, "beta_dpo/beta_margin_mean": -0.0020661705639213324, "beta_dpo/beta_margin_std": 0.03958994895219803, "beta_dpo/beta_used": 0.09847256541252136, "beta_dpo/beta_used_raw": 0.09847256541252136, "beta_dpo/gap_mean": -0.008847428485751152, "beta_dpo/gap_std": 0.3994016647338867, "beta_dpo/loss_margin_mean": -0.02098524570465088, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.025699168556311415, "grad_norm": 22.28753662109375, "learning_rate": 1.1940298507462686e-07, "logits/chosen": 1.3955719470977783, "logits/rejected": 1.393104076385498, "loss": 1.3899, "step": 17 }, { "beta_dpo/beta": 0.10181480646133423, "beta_dpo/beta_margin_grad_mean": -0.5011930465698242, "beta_dpo/beta_margin_grad_std": 0.009915145114064217, "beta_dpo/beta_margin_mean": -0.004774391185492277, "beta_dpo/beta_margin_std": 0.03967824578285217, "beta_dpo/beta_used": 0.10181480646133423, "beta_dpo/beta_used_raw": 0.10181480646133423, "beta_dpo/gap_mean": -0.014493357390165329, "beta_dpo/gap_std": 0.39799293875694275, "beta_dpo/loss_margin_mean": -0.04706642031669617, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.027210884353741496, "grad_norm": 21.89427375793457, "learning_rate": 1.2686567164179106e-07, "logits/chosen": 1.4782413244247437, "logits/rejected": 1.4196343421936035, "loss": 1.3849, "step": 18 }, { "beta_dpo/beta": 0.10298259556293488, "beta_dpo/beta_margin_grad_mean": -0.5003270506858826, "beta_dpo/beta_margin_grad_std": 0.009691756218671799, "beta_dpo/beta_margin_mean": -0.0013136152410879731, "beta_dpo/beta_margin_std": 0.03878864273428917, "beta_dpo/beta_used": 0.10298259556293488, "beta_dpo/beta_used_raw": 0.10298259556293488, "beta_dpo/gap_mean": -0.014709733426570892, "beta_dpo/gap_std": 0.39229732751846313, "beta_dpo/loss_margin_mean": -0.012863218784332275, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.02872260015117158, "grad_norm": 23.083358764648438, "learning_rate": 1.343283582089552e-07, "logits/chosen": 1.6602938175201416, "logits/rejected": 1.655239462852478, "loss": 1.3829, "step": 19 }, { "beta_dpo/beta": 0.10153305530548096, "beta_dpo/beta_margin_grad_mean": -0.49922773241996765, "beta_dpo/beta_margin_grad_std": 0.009931285865604877, "beta_dpo/beta_margin_mean": 0.0030918291304260492, "beta_dpo/beta_margin_std": 0.03974789381027222, "beta_dpo/beta_used": 0.10153305530548096, "beta_dpo/beta_used_raw": 0.10153305530548096, "beta_dpo/gap_mean": -0.012392524629831314, "beta_dpo/gap_std": 0.39484626054763794, "beta_dpo/loss_margin_mean": 0.03128620982170105, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.030234315948601664, "grad_norm": 24.105083465576172, "learning_rate": 1.4179104477611938e-07, "logits/chosen": 1.6466686725616455, "logits/rejected": 1.6133846044540405, "loss": 1.3852, "step": 20 }, { "beta_dpo/beta": 0.10090602189302444, "beta_dpo/beta_margin_grad_mean": -0.5005959272384644, "beta_dpo/beta_margin_grad_std": 0.009615312330424786, "beta_dpo/beta_margin_mean": -0.002384437946602702, "beta_dpo/beta_margin_std": 0.03847426176071167, "beta_dpo/beta_used": 0.10090602189302444, "beta_dpo/beta_used_raw": 0.10090602189302444, "beta_dpo/gap_mean": -0.007610926404595375, "beta_dpo/gap_std": 0.3931964635848999, "beta_dpo/loss_margin_mean": -0.02530011534690857, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.031746031746031744, "grad_norm": 22.50416374206543, "learning_rate": 1.4925373134328355e-07, "logits/chosen": 1.3186970949172974, "logits/rejected": 1.2789273262023926, "loss": 1.3855, "step": 21 }, { "beta_dpo/beta": 0.10063984990119934, "beta_dpo/beta_margin_grad_mean": -0.4994290769100189, "beta_dpo/beta_margin_grad_std": 0.010816080495715141, "beta_dpo/beta_margin_mean": 0.0022858360316604376, "beta_dpo/beta_margin_std": 0.04329133406281471, "beta_dpo/beta_used": 0.10063984990119934, "beta_dpo/beta_used_raw": 0.10063984990119934, "beta_dpo/gap_mean": -0.005383708514273167, "beta_dpo/gap_std": 0.3974972069263458, "beta_dpo/loss_margin_mean": 0.022645294666290283, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.03325774754346183, "grad_norm": 22.074689865112305, "learning_rate": 1.5671641791044775e-07, "logits/chosen": 1.4456074237823486, "logits/rejected": 1.4467374086380005, "loss": 1.3859, "step": 22 }, { "beta_dpo/beta": 0.10390889644622803, "beta_dpo/beta_margin_grad_mean": -0.49791544675827026, "beta_dpo/beta_margin_grad_std": 0.012759105302393436, "beta_dpo/beta_margin_mean": 0.008349758572876453, "beta_dpo/beta_margin_std": 0.05107416585087776, "beta_dpo/beta_used": 0.10390889644622803, "beta_dpo/beta_used_raw": 0.10390889644622803, "beta_dpo/gap_mean": 0.001504638697952032, "beta_dpo/gap_std": 0.40817511081695557, "beta_dpo/loss_margin_mean": 0.07432505488395691, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.03476946334089191, "grad_norm": 25.383527755737305, "learning_rate": 1.6417910447761193e-07, "logits/chosen": 1.665621042251587, "logits/rejected": 1.6228258609771729, "loss": 1.3794, "step": 23 }, { "beta_dpo/beta": 0.09863981604576111, "beta_dpo/beta_margin_grad_mean": -0.4988223910331726, "beta_dpo/beta_margin_grad_std": 0.010719557292759418, "beta_dpo/beta_margin_mean": 0.004713835194706917, "beta_dpo/beta_margin_std": 0.04289696365594864, "beta_dpo/beta_used": 0.09863981604576111, "beta_dpo/beta_used_raw": 0.09863981604576111, "beta_dpo/gap_mean": 0.018340593203902245, "beta_dpo/gap_std": 0.41763240098953247, "beta_dpo/loss_margin_mean": 0.04628649353981018, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.036281179138321996, "grad_norm": 22.189563751220703, "learning_rate": 1.716417910447761e-07, "logits/chosen": 1.7637710571289062, "logits/rejected": 1.7281594276428223, "loss": 1.3867, "step": 24 }, { "beta_dpo/beta": 0.100833460688591, "beta_dpo/beta_margin_grad_mean": -0.49988317489624023, "beta_dpo/beta_margin_grad_std": 0.012334803119301796, "beta_dpo/beta_margin_mean": 0.0004626520967576653, "beta_dpo/beta_margin_std": 0.04938330128788948, "beta_dpo/beta_used": 0.100833460688591, "beta_dpo/beta_used_raw": 0.100833460688591, "beta_dpo/gap_mean": 0.018122181296348572, "beta_dpo/gap_std": 0.43123096227645874, "beta_dpo/loss_margin_mean": 0.003261953592300415, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.03779289493575208, "grad_norm": 22.797252655029297, "learning_rate": 1.7910447761194027e-07, "logits/chosen": 1.3922135829925537, "logits/rejected": 1.3515794277191162, "loss": 1.3832, "step": 25 }, { "beta_dpo/beta": 0.09866522252559662, "beta_dpo/beta_margin_grad_mean": -0.5007703900337219, "beta_dpo/beta_margin_grad_std": 0.010712272487580776, "beta_dpo/beta_margin_mean": -0.0030822933185845613, "beta_dpo/beta_margin_std": 0.04288780689239502, "beta_dpo/beta_used": 0.09866522252559662, "beta_dpo/beta_used_raw": 0.09866522252559662, "beta_dpo/gap_mean": 0.007343418896198273, "beta_dpo/gap_std": 0.4358934164047241, "beta_dpo/loss_margin_mean": -0.03148818016052246, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.039304610733182165, "grad_norm": 22.201448440551758, "learning_rate": 1.8656716417910447e-07, "logits/chosen": 1.261330485343933, "logits/rejected": 1.2732932567596436, "loss": 1.3879, "step": 26 }, { "beta_dpo/beta": 0.09795667231082916, "beta_dpo/beta_margin_grad_mean": -0.501259982585907, "beta_dpo/beta_margin_grad_std": 0.01161261834204197, "beta_dpo/beta_margin_mean": -0.005036745686084032, "beta_dpo/beta_margin_std": 0.04648776724934578, "beta_dpo/beta_used": 0.09795667231082916, "beta_dpo/beta_used_raw": 0.09795667231082916, "beta_dpo/gap_mean": -0.004225727170705795, "beta_dpo/gap_std": 0.4409000873565674, "beta_dpo/loss_margin_mean": -0.05272534489631653, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.04081632653061224, "grad_norm": 23.33132553100586, "learning_rate": 1.9402985074626865e-07, "logits/chosen": 1.5242902040481567, "logits/rejected": 1.505962610244751, "loss": 1.3902, "step": 27 }, { "beta_dpo/beta": 0.09937025606632233, "beta_dpo/beta_margin_grad_mean": -0.4999173581600189, "beta_dpo/beta_margin_grad_std": 0.009457286447286606, "beta_dpo/beta_margin_mean": 0.0003325306752230972, "beta_dpo/beta_margin_std": 0.03784368187189102, "beta_dpo/beta_used": 0.09937025606632233, "beta_dpo/beta_used_raw": 0.09937025606632233, "beta_dpo/gap_mean": -0.005930869374424219, "beta_dpo/gap_std": 0.43624186515808105, "beta_dpo/loss_margin_mean": 0.0017603635787963867, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.042328042328042326, "grad_norm": 22.75115394592285, "learning_rate": 2.0149253731343282e-07, "logits/chosen": 1.402669906616211, "logits/rejected": 1.4011223316192627, "loss": 1.388, "step": 28 }, { "beta_dpo/beta": 0.09951446950435638, "beta_dpo/beta_margin_grad_mean": -0.5003775954246521, "beta_dpo/beta_margin_grad_std": 0.01097456831485033, "beta_dpo/beta_margin_mean": -0.0015215803869068623, "beta_dpo/beta_margin_std": 0.043978314846754074, "beta_dpo/beta_used": 0.09951446950435638, "beta_dpo/beta_used_raw": 0.09951446950435638, "beta_dpo/gap_mean": -0.0060158781707286835, "beta_dpo/gap_std": 0.4374222457408905, "beta_dpo/loss_margin_mean": -0.015163600444793701, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.04383975812547241, "grad_norm": 26.328489303588867, "learning_rate": 2.08955223880597e-07, "logits/chosen": 1.81168794631958, "logits/rejected": 1.7704055309295654, "loss": 1.3879, "step": 29 }, { "beta_dpo/beta": 0.0982619971036911, "beta_dpo/beta_margin_grad_mean": -0.5009621977806091, "beta_dpo/beta_margin_grad_std": 0.010776137933135033, "beta_dpo/beta_margin_mean": -0.00385239627212286, "beta_dpo/beta_margin_std": 0.04312770068645477, "beta_dpo/beta_used": 0.0982619971036911, "beta_dpo/beta_used_raw": 0.0982619971036911, "beta_dpo/gap_mean": -0.008773903362452984, "beta_dpo/gap_std": 0.4346309304237366, "beta_dpo/loss_margin_mean": -0.03936275839805603, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.045351473922902494, "grad_norm": 23.069562911987305, "learning_rate": 2.1641791044776117e-07, "logits/chosen": 1.6879940032958984, "logits/rejected": 1.6044800281524658, "loss": 1.3903, "step": 30 }, { "beta_dpo/beta": 0.10117494314908981, "beta_dpo/beta_margin_grad_mean": -0.499036580324173, "beta_dpo/beta_margin_grad_std": 0.01161203347146511, "beta_dpo/beta_margin_mean": 0.0038549723103642464, "beta_dpo/beta_margin_std": 0.046471331268548965, "beta_dpo/beta_used": 0.10117494314908981, "beta_dpo/beta_used_raw": 0.10117494314908981, "beta_dpo/gap_mean": -0.004491984844207764, "beta_dpo/gap_std": 0.4389868676662445, "beta_dpo/loss_margin_mean": 0.03808090090751648, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.04686318972033258, "grad_norm": 28.92486572265625, "learning_rate": 2.2388059701492537e-07, "logits/chosen": 1.694710612297058, "logits/rejected": 1.6386480331420898, "loss": 1.3851, "step": 31 }, { "beta_dpo/beta": 0.09735976159572601, "beta_dpo/beta_margin_grad_mean": -0.5017613172531128, "beta_dpo/beta_margin_grad_std": 0.01160483993589878, "beta_dpo/beta_margin_mean": -0.007061361335217953, "beta_dpo/beta_margin_std": 0.04650650918483734, "beta_dpo/beta_used": 0.09735976159572601, "beta_dpo/beta_used_raw": 0.09735976159572601, "beta_dpo/gap_mean": -0.011080358177423477, "beta_dpo/gap_std": 0.43803778290748596, "beta_dpo/loss_margin_mean": -0.07157236337661743, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.04837490551776266, "grad_norm": 22.146883010864258, "learning_rate": 2.3134328358208954e-07, "logits/chosen": 1.4716018438339233, "logits/rejected": 1.5336263179779053, "loss": 1.3918, "step": 32 }, { "beta_dpo/beta": 0.10330641269683838, "beta_dpo/beta_margin_grad_mean": -0.4976757764816284, "beta_dpo/beta_margin_grad_std": 0.011192507110536098, "beta_dpo/beta_margin_mean": 0.009298978373408318, "beta_dpo/beta_margin_std": 0.04479321837425232, "beta_dpo/beta_used": 0.10330641269683838, "beta_dpo/beta_used_raw": 0.10330641269683838, "beta_dpo/gap_mean": 0.0022241901606321335, "beta_dpo/gap_std": 0.4424448013305664, "beta_dpo/loss_margin_mean": 0.08533850312232971, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.049886621315192746, "grad_norm": 24.878881454467773, "learning_rate": 2.388059701492537e-07, "logits/chosen": 1.4082189798355103, "logits/rejected": 1.3968687057495117, "loss": 1.38, "step": 33 }, { "beta_dpo/beta": 0.0996209979057312, "beta_dpo/beta_margin_grad_mean": -0.5004932284355164, "beta_dpo/beta_margin_grad_std": 0.010038006119430065, "beta_dpo/beta_margin_mean": -0.0019767105113714933, "beta_dpo/beta_margin_std": 0.040175147354602814, "beta_dpo/beta_used": 0.0996209979057312, "beta_dpo/beta_used_raw": 0.0996209979057312, "beta_dpo/gap_mean": 0.0003106839722022414, "beta_dpo/gap_std": 0.4378555417060852, "beta_dpo/loss_margin_mean": -0.019730672240257263, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.05139833711262283, "grad_norm": 21.769670486450195, "learning_rate": 2.4626865671641786e-07, "logits/chosen": 1.5708086490631104, "logits/rejected": 1.5470855236053467, "loss": 1.3871, "step": 34 }, { "beta_dpo/beta": 0.10070754587650299, "beta_dpo/beta_margin_grad_mean": -0.5005945563316345, "beta_dpo/beta_margin_grad_std": 0.011351360939443111, "beta_dpo/beta_margin_mean": -0.0023801266215741634, "beta_dpo/beta_margin_std": 0.04543125256896019, "beta_dpo/beta_used": 0.10070754587650299, "beta_dpo/beta_used_raw": 0.10070754587650299, "beta_dpo/gap_mean": -0.007040712982416153, "beta_dpo/gap_std": 0.4391350746154785, "beta_dpo/loss_margin_mean": -0.024149954319000244, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.05291005291005291, "grad_norm": 21.194477081298828, "learning_rate": 2.537313432835821e-07, "logits/chosen": 1.4663602113723755, "logits/rejected": 1.4484498500823975, "loss": 1.386, "step": 35 }, { "beta_dpo/beta": 0.0950796902179718, "beta_dpo/beta_margin_grad_mean": -0.5017680525779724, "beta_dpo/beta_margin_grad_std": 0.009710317477583885, "beta_dpo/beta_margin_mean": -0.00707436166703701, "beta_dpo/beta_margin_std": 0.03885461017489433, "beta_dpo/beta_used": 0.0950796902179718, "beta_dpo/beta_used_raw": 0.0950796902179718, "beta_dpo/gap_mean": -0.01551821082830429, "beta_dpo/gap_std": 0.43908798694610596, "beta_dpo/loss_margin_mean": -0.07440310716629028, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.05442176870748299, "grad_norm": 23.735424041748047, "learning_rate": 2.611940298507462e-07, "logits/chosen": 1.4293709993362427, "logits/rejected": 1.4123473167419434, "loss": 1.3958, "step": 36 }, { "beta_dpo/beta": 0.1016513779759407, "beta_dpo/beta_margin_grad_mean": -0.4968355596065521, "beta_dpo/beta_margin_grad_std": 0.013145999051630497, "beta_dpo/beta_margin_mean": 0.012668957002460957, "beta_dpo/beta_margin_std": 0.05262959748506546, "beta_dpo/beta_used": 0.1016513779759407, "beta_dpo/beta_used_raw": 0.1016513779759407, "beta_dpo/gap_mean": 0.0035870305728167295, "beta_dpo/gap_std": 0.4464585483074188, "beta_dpo/loss_margin_mean": 0.12334150075912476, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.055933484504913075, "grad_norm": 27.381790161132812, "learning_rate": 2.686567164179104e-07, "logits/chosen": 1.5765271186828613, "logits/rejected": 1.4575368165969849, "loss": 1.3833, "step": 37 }, { "beta_dpo/beta": 0.09609992802143097, "beta_dpo/beta_margin_grad_mean": -0.5025932192802429, "beta_dpo/beta_margin_grad_std": 0.009760402143001556, "beta_dpo/beta_margin_mean": -0.010377924889326096, "beta_dpo/beta_margin_std": 0.03905599191784859, "beta_dpo/beta_used": 0.09609992802143097, "beta_dpo/beta_used_raw": 0.09609992802143097, "beta_dpo/gap_mean": -0.0076413326896727085, "beta_dpo/gap_std": 0.4474954605102539, "beta_dpo/loss_margin_mean": -0.1089838445186615, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.05744520030234316, "grad_norm": 22.253149032592773, "learning_rate": 2.761194029850746e-07, "logits/chosen": 1.660280704498291, "logits/rejected": 1.65809166431427, "loss": 1.3934, "step": 38 }, { "beta_dpo/beta": 0.09974405914545059, "beta_dpo/beta_margin_grad_mean": -0.5005505681037903, "beta_dpo/beta_margin_grad_std": 0.010305196046829224, "beta_dpo/beta_margin_mean": -0.0022025478538125753, "beta_dpo/beta_margin_std": 0.04123708978295326, "beta_dpo/beta_used": 0.09974405914545059, "beta_dpo/beta_used_raw": 0.09974405914545059, "beta_dpo/gap_mean": -0.013334916904568672, "beta_dpo/gap_std": 0.44173187017440796, "beta_dpo/loss_margin_mean": -0.023197531700134277, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.05895691609977324, "grad_norm": 24.880264282226562, "learning_rate": 2.8358208955223876e-07, "logits/chosen": 1.6895723342895508, "logits/rejected": 1.665818691253662, "loss": 1.3883, "step": 39 }, { "beta_dpo/beta": 0.09883079677820206, "beta_dpo/beta_margin_grad_mean": -0.49824991822242737, "beta_dpo/beta_margin_grad_std": 0.009885421022772789, "beta_dpo/beta_margin_mean": 0.007007123902440071, "beta_dpo/beta_margin_std": 0.03956810384988785, "beta_dpo/beta_used": 0.09883079677820206, "beta_dpo/beta_used_raw": 0.09883079677820206, "beta_dpo/gap_mean": -0.0014165642205625772, "beta_dpo/gap_std": 0.43331772089004517, "beta_dpo/loss_margin_mean": 0.06831315159797668, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.06046863189720333, "grad_norm": 23.338451385498047, "learning_rate": 2.9104477611940296e-07, "logits/chosen": 1.5526103973388672, "logits/rejected": 1.4787428379058838, "loss": 1.3884, "step": 40 }, { "beta_dpo/beta": 0.10304108262062073, "beta_dpo/beta_margin_grad_mean": -0.49625322222709656, "beta_dpo/beta_margin_grad_std": 0.011656548827886581, "beta_dpo/beta_margin_mean": 0.014997422695159912, "beta_dpo/beta_margin_std": 0.046651456505060196, "beta_dpo/beta_used": 0.10304108262062073, "beta_dpo/beta_used_raw": 0.10304108262062073, "beta_dpo/gap_mean": 0.01947699673473835, "beta_dpo/gap_std": 0.4391004145145416, "beta_dpo/loss_margin_mean": 0.14550095796585083, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.06198034769463341, "grad_norm": 24.844144821166992, "learning_rate": 2.985074626865671e-07, "logits/chosen": 1.5253885984420776, "logits/rejected": 1.486491084098816, "loss": 1.3794, "step": 41 }, { "beta_dpo/beta": 0.09910166263580322, "beta_dpo/beta_margin_grad_mean": -0.500030517578125, "beta_dpo/beta_margin_grad_std": 0.011837853118777275, "beta_dpo/beta_margin_mean": -0.000116753377369605, "beta_dpo/beta_margin_std": 0.04738757386803627, "beta_dpo/beta_used": 0.09910166263580322, "beta_dpo/beta_used_raw": 0.09910166263580322, "beta_dpo/gap_mean": 0.02474522590637207, "beta_dpo/gap_std": 0.44327157735824585, "beta_dpo/loss_margin_mean": -0.0029853880405426025, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.06349206349206349, "grad_norm": 23.967090606689453, "learning_rate": 3.059701492537313e-07, "logits/chosen": 1.3896113634109497, "logits/rejected": 1.375231146812439, "loss": 1.3854, "step": 42 }, { "beta_dpo/beta": 0.10354489833116531, "beta_dpo/beta_margin_grad_mean": -0.49683722853660583, "beta_dpo/beta_margin_grad_std": 0.01039121299982071, "beta_dpo/beta_margin_mean": 0.012656980194151402, "beta_dpo/beta_margin_std": 0.04158541187644005, "beta_dpo/beta_used": 0.10354489833116531, "beta_dpo/beta_used_raw": 0.10354489833116531, "beta_dpo/gap_mean": 0.02907174453139305, "beta_dpo/gap_std": 0.4337531328201294, "beta_dpo/loss_margin_mean": 0.11490699648857117, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.06500377928949358, "grad_norm": 24.746234893798828, "learning_rate": 3.134328358208955e-07, "logits/chosen": 1.403411626815796, "logits/rejected": 1.3667316436767578, "loss": 1.3767, "step": 43 }, { "beta_dpo/beta": 0.10267098248004913, "beta_dpo/beta_margin_grad_mean": -0.4979555904865265, "beta_dpo/beta_margin_grad_std": 0.009614716283977032, "beta_dpo/beta_margin_mean": 0.008179724216461182, "beta_dpo/beta_margin_std": 0.038474779576063156, "beta_dpo/beta_used": 0.10267098248004913, "beta_dpo/beta_used_raw": 0.10267098248004913, "beta_dpo/gap_mean": 0.04934769868850708, "beta_dpo/gap_std": 0.4201850891113281, "beta_dpo/loss_margin_mean": 0.07566675543785095, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.06651549508692366, "grad_norm": 24.72636604309082, "learning_rate": 3.2089552238805965e-07, "logits/chosen": 1.1089251041412354, "logits/rejected": 1.0921913385391235, "loss": 1.3766, "step": 44 }, { "beta_dpo/beta": 0.10441717505455017, "beta_dpo/beta_margin_grad_mean": -0.4959341287612915, "beta_dpo/beta_margin_grad_std": 0.011123725213110447, "beta_dpo/beta_margin_mean": 0.016273343935608864, "beta_dpo/beta_margin_std": 0.04451771080493927, "beta_dpo/beta_used": 0.10441717505455017, "beta_dpo/beta_used_raw": 0.10441717505455017, "beta_dpo/gap_mean": 0.058637045323848724, "beta_dpo/gap_std": 0.42201119661331177, "beta_dpo/loss_margin_mean": 0.15418142080307007, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.06802721088435375, "grad_norm": 25.480466842651367, "learning_rate": 3.2835820895522385e-07, "logits/chosen": 1.4839123487472534, "logits/rejected": 1.468077540397644, "loss": 1.3727, "step": 45 }, { "beta_dpo/beta": 0.10162113606929779, "beta_dpo/beta_margin_grad_mean": -0.49845924973487854, "beta_dpo/beta_margin_grad_std": 0.01084035076200962, "beta_dpo/beta_margin_mean": 0.006165068130940199, "beta_dpo/beta_margin_std": 0.04338241368532181, "beta_dpo/beta_used": 0.10162113606929779, "beta_dpo/beta_used_raw": 0.10162113606929779, "beta_dpo/gap_mean": 0.0665898472070694, "beta_dpo/gap_std": 0.4231005311012268, "beta_dpo/loss_margin_mean": 0.06101316213607788, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.06953892668178382, "grad_norm": 26.33711814880371, "learning_rate": 3.3582089552238805e-07, "logits/chosen": 1.376713514328003, "logits/rejected": 1.3219760656356812, "loss": 1.377, "step": 46 }, { "beta_dpo/beta": 0.10109131783246994, "beta_dpo/beta_margin_grad_mean": -0.49842020869255066, "beta_dpo/beta_margin_grad_std": 0.012417293153703213, "beta_dpo/beta_margin_mean": 0.006329426076263189, "beta_dpo/beta_margin_std": 0.04971562325954437, "beta_dpo/beta_used": 0.10109131783246994, "beta_dpo/beta_used_raw": 0.10109131783246994, "beta_dpo/gap_mean": 0.061482757329940796, "beta_dpo/gap_std": 0.43405160307884216, "beta_dpo/loss_margin_mean": 0.06319385766983032, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.0710506424792139, "grad_norm": 22.111841201782227, "learning_rate": 3.432835820895522e-07, "logits/chosen": 1.7184326648712158, "logits/rejected": 1.69810152053833, "loss": 1.3785, "step": 47 }, { "beta_dpo/beta": 0.099519282579422, "beta_dpo/beta_margin_grad_mean": -0.49835386872291565, "beta_dpo/beta_margin_grad_std": 0.010140984319150448, "beta_dpo/beta_margin_mean": 0.006588014308363199, "beta_dpo/beta_margin_std": 0.04058591276407242, "beta_dpo/beta_used": 0.099519282579422, "beta_dpo/beta_used_raw": 0.099519282579422, "beta_dpo/gap_mean": 0.0627756342291832, "beta_dpo/gap_std": 0.4295368492603302, "beta_dpo/loss_margin_mean": 0.06416615843772888, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.07256235827664399, "grad_norm": 22.274993896484375, "learning_rate": 3.507462686567164e-07, "logits/chosen": 1.595404028892517, "logits/rejected": 1.588958740234375, "loss": 1.3809, "step": 48 }, { "beta_dpo/beta": 0.10322128981351852, "beta_dpo/beta_margin_grad_mean": -0.4979618787765503, "beta_dpo/beta_margin_grad_std": 0.010646562092006207, "beta_dpo/beta_margin_mean": 0.008156200870871544, "beta_dpo/beta_margin_std": 0.04260906204581261, "beta_dpo/beta_used": 0.10322128981351852, "beta_dpo/beta_used_raw": 0.10322128981351852, "beta_dpo/gap_mean": 0.06913349777460098, "beta_dpo/gap_std": 0.4278194308280945, "beta_dpo/loss_margin_mean": 0.07910655438899994, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.07407407407407407, "grad_norm": 21.86302947998047, "learning_rate": 3.5820895522388055e-07, "logits/chosen": 1.4724366664886475, "logits/rejected": 1.4565043449401855, "loss": 1.374, "step": 49 }, { "beta_dpo/beta": 0.10015951097011566, "beta_dpo/beta_margin_grad_mean": -0.49800705909729004, "beta_dpo/beta_margin_grad_std": 0.010118241421878338, "beta_dpo/beta_margin_mean": 0.007978866808116436, "beta_dpo/beta_margin_std": 0.04050043225288391, "beta_dpo/beta_used": 0.10015951097011566, "beta_dpo/beta_used_raw": 0.10015951097011566, "beta_dpo/gap_mean": 0.07339806854724884, "beta_dpo/gap_std": 0.42607414722442627, "beta_dpo/loss_margin_mean": 0.07699769735336304, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.07558578987150416, "grad_norm": 22.100317001342773, "learning_rate": 3.6567164179104475e-07, "logits/chosen": 1.6594160795211792, "logits/rejected": 1.640493392944336, "loss": 1.3787, "step": 50 }, { "beta_dpo/beta": 0.09920643270015717, "beta_dpo/beta_margin_grad_mean": -0.4983086585998535, "beta_dpo/beta_margin_grad_std": 0.011117528192698956, "beta_dpo/beta_margin_mean": 0.006771172862499952, "beta_dpo/beta_margin_std": 0.04449303448200226, "beta_dpo/beta_used": 0.09920643270015717, "beta_dpo/beta_used_raw": 0.09920643270015717, "beta_dpo/gap_mean": 0.06899771094322205, "beta_dpo/gap_std": 0.4215458631515503, "beta_dpo/loss_margin_mean": 0.06831052899360657, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.07709750566893424, "grad_norm": 23.966360092163086, "learning_rate": 3.7313432835820895e-07, "logits/chosen": 1.289847731590271, "logits/rejected": 1.2824013233184814, "loss": 1.381, "step": 51 }, { "beta_dpo/beta": 0.09721747040748596, "beta_dpo/beta_margin_grad_mean": -0.4997069239616394, "beta_dpo/beta_margin_grad_std": 0.012052874080836773, "beta_dpo/beta_margin_mean": 0.0011759058106690645, "beta_dpo/beta_margin_std": 0.048246391117572784, "beta_dpo/beta_used": 0.09721747040748596, "beta_dpo/beta_used_raw": 0.09721747040748596, "beta_dpo/gap_mean": 0.05793575569987297, "beta_dpo/gap_std": 0.4385203719139099, "beta_dpo/loss_margin_mean": 0.009815797209739685, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.07860922146636433, "grad_norm": 25.5147705078125, "learning_rate": 3.805970149253731e-07, "logits/chosen": 1.5200705528259277, "logits/rejected": 1.4433038234710693, "loss": 1.3852, "step": 52 }, { "beta_dpo/beta": 0.09669992327690125, "beta_dpo/beta_margin_grad_mean": -0.500171422958374, "beta_dpo/beta_margin_grad_std": 0.00788611639291048, "beta_dpo/beta_margin_mean": -0.0006850466597825289, "beta_dpo/beta_margin_std": 0.031553711742162704, "beta_dpo/beta_used": 0.09669992327690125, "beta_dpo/beta_used_raw": 0.09669992327690125, "beta_dpo/gap_mean": 0.05040828511118889, "beta_dpo/gap_std": 0.4249057173728943, "beta_dpo/loss_margin_mean": -0.007657676935195923, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.0801209372637944, "grad_norm": 21.52613067626953, "learning_rate": 3.880597014925373e-07, "logits/chosen": 1.575990915298462, "logits/rejected": 1.567566156387329, "loss": 1.3869, "step": 53 }, { "beta_dpo/beta": 0.10164332389831543, "beta_dpo/beta_margin_grad_mean": -0.4975181519985199, "beta_dpo/beta_margin_grad_std": 0.009497878141701221, "beta_dpo/beta_margin_mean": 0.009931082837283611, "beta_dpo/beta_margin_std": 0.0380062460899353, "beta_dpo/beta_used": 0.10164332389831543, "beta_dpo/beta_used_raw": 0.10164332389831543, "beta_dpo/gap_mean": 0.053338490426540375, "beta_dpo/gap_std": 0.41552823781967163, "beta_dpo/loss_margin_mean": 0.09771022200584412, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.08163265306122448, "grad_norm": 21.51600456237793, "learning_rate": 3.9552238805970144e-07, "logits/chosen": 1.3746873140335083, "logits/rejected": 1.3910648822784424, "loss": 1.3784, "step": 54 }, { "beta_dpo/beta": 0.10384014993906021, "beta_dpo/beta_margin_grad_mean": -0.49787670373916626, "beta_dpo/beta_margin_grad_std": 0.01219708938151598, "beta_dpo/beta_margin_mean": 0.008496826514601707, "beta_dpo/beta_margin_std": 0.048810362815856934, "beta_dpo/beta_used": 0.10384014993906021, "beta_dpo/beta_used_raw": 0.10384014993906021, "beta_dpo/gap_mean": 0.06035232171416283, "beta_dpo/gap_std": 0.4211800992488861, "beta_dpo/loss_margin_mean": 0.0819447934627533, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.08314436885865457, "grad_norm": 24.281505584716797, "learning_rate": 4.0298507462686564e-07, "logits/chosen": 1.410477638244629, "logits/rejected": 1.3666912317276, "loss": 1.3739, "step": 55 }, { "beta_dpo/beta": 0.10036227107048035, "beta_dpo/beta_margin_grad_mean": -0.49994948506355286, "beta_dpo/beta_margin_grad_std": 0.010925770737230778, "beta_dpo/beta_margin_mean": 0.00019966231775470078, "beta_dpo/beta_margin_std": 0.04372824355959892, "beta_dpo/beta_used": 0.10036227107048035, "beta_dpo/beta_used_raw": 0.10036227107048035, "beta_dpo/gap_mean": 0.0527767613530159, "beta_dpo/gap_std": 0.42930376529693604, "beta_dpo/loss_margin_mean": 0.00171700119972229, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.08465608465608465, "grad_norm": 21.052114486694336, "learning_rate": 4.1044776119402984e-07, "logits/chosen": 1.5779602527618408, "logits/rejected": 1.5794899463653564, "loss": 1.3806, "step": 56 }, { "beta_dpo/beta": 0.10191956907510757, "beta_dpo/beta_margin_grad_mean": -0.4974198043346405, "beta_dpo/beta_margin_grad_std": 0.012272909283638, "beta_dpo/beta_margin_mean": 0.010333304293453693, "beta_dpo/beta_margin_std": 0.04913497716188431, "beta_dpo/beta_used": 0.10191956907510757, "beta_dpo/beta_used_raw": 0.10191956907510757, "beta_dpo/gap_mean": 0.059144824743270874, "beta_dpo/gap_std": 0.4319482445716858, "beta_dpo/loss_margin_mean": 0.09830912947654724, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.08616780045351474, "grad_norm": 26.80173683166504, "learning_rate": 4.17910447761194e-07, "logits/chosen": 1.4049038887023926, "logits/rejected": 1.3124523162841797, "loss": 1.377, "step": 57 }, { "beta_dpo/beta": 0.09840139746665955, "beta_dpo/beta_margin_grad_mean": -0.49932199716567993, "beta_dpo/beta_margin_grad_std": 0.011351993307471275, "beta_dpo/beta_margin_mean": 0.002716128248721361, "beta_dpo/beta_margin_std": 0.04545406624674797, "beta_dpo/beta_used": 0.09840139746665955, "beta_dpo/beta_used_raw": 0.09840139746665955, "beta_dpo/gap_mean": 0.059776224195957184, "beta_dpo/gap_std": 0.4370172321796417, "beta_dpo/loss_margin_mean": 0.0205976665019989, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.08767951625094482, "grad_norm": 23.33869743347168, "learning_rate": 4.253731343283582e-07, "logits/chosen": 1.4786138534545898, "logits/rejected": 1.4423062801361084, "loss": 1.3829, "step": 58 }, { "beta_dpo/beta": 0.10059243440628052, "beta_dpo/beta_margin_grad_mean": -0.498147577047348, "beta_dpo/beta_margin_grad_std": 0.01203033234924078, "beta_dpo/beta_margin_mean": 0.007413546554744244, "beta_dpo/beta_margin_std": 0.048155270516872406, "beta_dpo/beta_used": 0.10059243440628052, "beta_dpo/beta_used_raw": 0.10059243440628052, "beta_dpo/gap_mean": 0.05429444462060928, "beta_dpo/gap_std": 0.4411713182926178, "beta_dpo/loss_margin_mean": 0.07427063584327698, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.08919123204837491, "grad_norm": 20.473285675048828, "learning_rate": 4.3283582089552234e-07, "logits/chosen": 1.3001195192337036, "logits/rejected": 1.1944963932037354, "loss": 1.3801, "step": 59 }, { "beta_dpo/beta": 0.103457510471344, "beta_dpo/beta_margin_grad_mean": -0.49937915802001953, "beta_dpo/beta_margin_grad_std": 0.013366466388106346, "beta_dpo/beta_margin_mean": 0.002477182075381279, "beta_dpo/beta_margin_std": 0.05351593717932701, "beta_dpo/beta_used": 0.103457510471344, "beta_dpo/beta_used_raw": 0.103457510471344, "beta_dpo/gap_mean": 0.046487562358379364, "beta_dpo/gap_std": 0.45913559198379517, "beta_dpo/loss_margin_mean": 0.02491551637649536, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.09070294784580499, "grad_norm": 21.797914505004883, "learning_rate": 4.4029850746268654e-07, "logits/chosen": 1.4170093536376953, "logits/rejected": 1.4231207370758057, "loss": 1.3758, "step": 60 }, { "beta_dpo/beta": 0.09843544661998749, "beta_dpo/beta_margin_grad_mean": -0.49705368280410767, "beta_dpo/beta_margin_grad_std": 0.012809173204004765, "beta_dpo/beta_margin_mean": 0.01179733220487833, "beta_dpo/beta_margin_std": 0.051275238394737244, "beta_dpo/beta_used": 0.09843544661998749, "beta_dpo/beta_used_raw": 0.09843544661998749, "beta_dpo/gap_mean": 0.05880650877952576, "beta_dpo/gap_std": 0.46677011251449585, "beta_dpo/loss_margin_mean": 0.1198408454656601, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.09221466364323508, "grad_norm": 20.4912166595459, "learning_rate": 4.4776119402985074e-07, "logits/chosen": 1.6209700107574463, "logits/rejected": 1.6182749271392822, "loss": 1.3833, "step": 61 }, { "beta_dpo/beta": 0.09592962265014648, "beta_dpo/beta_margin_grad_mean": -0.49833908677101135, "beta_dpo/beta_margin_grad_std": 0.013690280728042126, "beta_dpo/beta_margin_mean": 0.006649685092270374, "beta_dpo/beta_margin_std": 0.05479509010910988, "beta_dpo/beta_used": 0.09592962265014648, "beta_dpo/beta_used_raw": 0.09592962265014648, "beta_dpo/gap_mean": 0.06314882636070251, "beta_dpo/gap_std": 0.4870232939720154, "beta_dpo/loss_margin_mean": 0.06928093731403351, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.09372637944066516, "grad_norm": 23.574623107910156, "learning_rate": 4.552238805970149e-07, "logits/chosen": 1.325179100036621, "logits/rejected": 1.3118748664855957, "loss": 1.3872, "step": 62 }, { "beta_dpo/beta": 0.10062983632087708, "beta_dpo/beta_margin_grad_mean": -0.49723294377326965, "beta_dpo/beta_margin_grad_std": 0.011747188866138458, "beta_dpo/beta_margin_mean": 0.011074798181653023, "beta_dpo/beta_margin_std": 0.047014713287353516, "beta_dpo/beta_used": 0.10062983632087708, "beta_dpo/beta_used_raw": 0.10062983632087708, "beta_dpo/gap_mean": 0.07177233695983887, "beta_dpo/gap_std": 0.48838692903518677, "beta_dpo/loss_margin_mean": 0.1106141209602356, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.09523809523809523, "grad_norm": 24.619401931762695, "learning_rate": 4.626865671641791e-07, "logits/chosen": 1.6953513622283936, "logits/rejected": 1.6481925249099731, "loss": 1.3783, "step": 63 }, { "beta_dpo/beta": 0.09594196081161499, "beta_dpo/beta_margin_grad_mean": -0.4999556839466095, "beta_dpo/beta_margin_grad_std": 0.012721442617475986, "beta_dpo/beta_margin_mean": 0.0001893570297397673, "beta_dpo/beta_margin_std": 0.05095384269952774, "beta_dpo/beta_used": 0.09594196081161499, "beta_dpo/beta_used_raw": 0.09594196081161499, "beta_dpo/gap_mean": 0.061939239501953125, "beta_dpo/gap_std": 0.5000776648521423, "beta_dpo/loss_margin_mean": 0.002007901668548584, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.09674981103552532, "grad_norm": 19.409799575805664, "learning_rate": 4.701492537313433e-07, "logits/chosen": 1.5105648040771484, "logits/rejected": 1.4828753471374512, "loss": 1.3871, "step": 64 }, { "beta_dpo/beta": 0.10893698036670685, "beta_dpo/beta_margin_grad_mean": -0.4937191307544708, "beta_dpo/beta_margin_grad_std": 0.01706167496740818, "beta_dpo/beta_margin_mean": 0.025155218318104744, "beta_dpo/beta_margin_std": 0.06835649907588959, "beta_dpo/beta_used": 0.10893698036670685, "beta_dpo/beta_used_raw": 0.10893698036670685, "beta_dpo/gap_mean": 0.084006167948246, "beta_dpo/gap_std": 0.5160300731658936, "beta_dpo/loss_margin_mean": 0.23087024688720703, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.0982615268329554, "grad_norm": 24.889301300048828, "learning_rate": 4.776119402985074e-07, "logits/chosen": 1.556587815284729, "logits/rejected": 1.5368375778198242, "loss": 1.3616, "step": 65 }, { "beta_dpo/beta": 0.10014477372169495, "beta_dpo/beta_margin_grad_mean": -0.49655643105506897, "beta_dpo/beta_margin_grad_std": 0.015136976726353168, "beta_dpo/beta_margin_mean": 0.013793686404824257, "beta_dpo/beta_margin_std": 0.06063022464513779, "beta_dpo/beta_used": 0.10014477372169495, "beta_dpo/beta_used_raw": 0.10014477372169495, "beta_dpo/gap_mean": 0.09994551539421082, "beta_dpo/gap_std": 0.5345540642738342, "beta_dpo/loss_margin_mean": 0.13528499007225037, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.09977324263038549, "grad_norm": 23.565357208251953, "learning_rate": 4.850746268656717e-07, "logits/chosen": 1.4598450660705566, "logits/rejected": 1.4400157928466797, "loss": 1.376, "step": 66 }, { "beta_dpo/beta": 0.09859488904476166, "beta_dpo/beta_margin_grad_mean": -0.49907803535461426, "beta_dpo/beta_margin_grad_std": 0.011991067789494991, "beta_dpo/beta_margin_mean": 0.0036908036563545465, "beta_dpo/beta_margin_std": 0.04799149930477142, "beta_dpo/beta_used": 0.09859488904476166, "beta_dpo/beta_used_raw": 0.09859488904476166, "beta_dpo/gap_mean": 0.08958867192268372, "beta_dpo/gap_std": 0.5328190922737122, "beta_dpo/loss_margin_mean": 0.03765749931335449, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.10128495842781557, "grad_norm": 25.641630172729492, "learning_rate": 4.925373134328357e-07, "logits/chosen": 1.3621433973312378, "logits/rejected": 1.3432750701904297, "loss": 1.3801, "step": 67 }, { "beta_dpo/beta": 0.09739673137664795, "beta_dpo/beta_margin_grad_mean": -0.49787405133247375, "beta_dpo/beta_margin_grad_std": 0.014542591758072376, "beta_dpo/beta_margin_mean": 0.008516059257090092, "beta_dpo/beta_margin_std": 0.05821725353598595, "beta_dpo/beta_used": 0.09739673137664795, "beta_dpo/beta_used_raw": 0.09739673137664795, "beta_dpo/gap_mean": 0.08840759843587875, "beta_dpo/gap_std": 0.5416070222854614, "beta_dpo/loss_margin_mean": 0.08617928624153137, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.10279667422524566, "grad_norm": 26.215946197509766, "learning_rate": 5e-07, "logits/chosen": 1.4207310676574707, "logits/rejected": 1.3847835063934326, "loss": 1.382, "step": 68 }, { "beta_dpo/beta": 0.11089831590652466, "beta_dpo/beta_margin_grad_mean": -0.4936300218105316, "beta_dpo/beta_margin_grad_std": 0.020087001845240593, "beta_dpo/beta_margin_mean": 0.02549654059112072, "beta_dpo/beta_margin_std": 0.08050806075334549, "beta_dpo/beta_used": 0.11089831590652466, "beta_dpo/beta_used_raw": 0.11089831590652466, "beta_dpo/gap_mean": 0.10114619880914688, "beta_dpo/gap_std": 0.5690314769744873, "beta_dpo/loss_margin_mean": 0.21953517198562622, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.10430839002267574, "grad_norm": 25.708833694458008, "learning_rate": 4.999965034812934e-07, "logits/chosen": 1.373316764831543, "logits/rejected": 1.3507366180419922, "loss": 1.3542, "step": 69 }, { "beta_dpo/beta": 0.105964794754982, "beta_dpo/beta_margin_grad_mean": -0.49599337577819824, "beta_dpo/beta_margin_grad_std": 0.016764765605330467, "beta_dpo/beta_margin_mean": 0.016038598492741585, "beta_dpo/beta_margin_std": 0.06713969260454178, "beta_dpo/beta_used": 0.105964794754982, "beta_dpo/beta_used_raw": 0.105964794754982, "beta_dpo/gap_mean": 0.11604620516300201, "beta_dpo/gap_std": 0.5841151475906372, "beta_dpo/loss_margin_mean": 0.150816410779953, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.10582010582010581, "grad_norm": 24.138580322265625, "learning_rate": 4.999860140229787e-07, "logits/chosen": 1.431084156036377, "logits/rejected": 1.4090873003005981, "loss": 1.3641, "step": 70 }, { "beta_dpo/beta": 0.09979788959026337, "beta_dpo/beta_margin_grad_mean": -0.49800625443458557, "beta_dpo/beta_margin_grad_std": 0.01896088756620884, "beta_dpo/beta_margin_mean": 0.007973305881023407, "beta_dpo/beta_margin_std": 0.07597094774246216, "beta_dpo/beta_used": 0.09979788959026337, "beta_dpo/beta_used_raw": 0.09979788959026337, "beta_dpo/gap_mean": 0.11453382670879364, "beta_dpo/gap_std": 0.6030242443084717, "beta_dpo/loss_margin_mean": 0.07938975095748901, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1073318216175359, "grad_norm": 22.322416305541992, "learning_rate": 4.999685319184688e-07, "logits/chosen": 1.312930703163147, "logits/rejected": 1.324812412261963, "loss": 1.3757, "step": 71 }, { "beta_dpo/beta": 0.1114841103553772, "beta_dpo/beta_margin_grad_mean": -0.4935649633407593, "beta_dpo/beta_margin_grad_std": 0.018391672521829605, "beta_dpo/beta_margin_mean": 0.0257643461227417, "beta_dpo/beta_margin_std": 0.07371597737073898, "beta_dpo/beta_used": 0.1114841103553772, "beta_dpo/beta_used_raw": 0.1114841103553772, "beta_dpo/gap_mean": 0.13404789566993713, "beta_dpo/gap_std": 0.6270595192909241, "beta_dpo/loss_margin_mean": 0.23163816332817078, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.10884353741496598, "grad_norm": 27.61007308959961, "learning_rate": 4.999440576567755e-07, "logits/chosen": 1.412034273147583, "logits/rejected": 1.3404237031936646, "loss": 1.3509, "step": 72 }, { "beta_dpo/beta": 0.09717012196779251, "beta_dpo/beta_margin_grad_mean": -0.49801793694496155, "beta_dpo/beta_margin_grad_std": 0.017343247309327126, "beta_dpo/beta_margin_mean": 0.007944438606500626, "beta_dpo/beta_margin_std": 0.06946875154972076, "beta_dpo/beta_used": 0.09717012196779251, "beta_dpo/beta_used_raw": 0.09717012196779251, "beta_dpo/gap_mean": 0.12657299637794495, "beta_dpo/gap_std": 0.6361806392669678, "beta_dpo/loss_margin_mean": 0.08220607042312622, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.11035525321239607, "grad_norm": 23.838298797607422, "learning_rate": 4.999125919224965e-07, "logits/chosen": 1.5537290573120117, "logits/rejected": 1.5287607908248901, "loss": 1.3792, "step": 73 }, { "beta_dpo/beta": 0.1059919074177742, "beta_dpo/beta_margin_grad_mean": -0.4929755628108978, "beta_dpo/beta_margin_grad_std": 0.017454126849770546, "beta_dpo/beta_margin_mean": 0.028127092868089676, "beta_dpo/beta_margin_std": 0.06989695876836777, "beta_dpo/beta_used": 0.1059919074177742, "beta_dpo/beta_used_raw": 0.1059919074177742, "beta_dpo/gap_mean": 0.13354957103729248, "beta_dpo/gap_std": 0.6440489292144775, "beta_dpo/loss_margin_mean": 0.24771931767463684, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.11186696900982615, "grad_norm": 24.128093719482422, "learning_rate": 4.998741355957963e-07, "logits/chosen": 1.481793999671936, "logits/rejected": 1.3946576118469238, "loss": 1.3613, "step": 74 }, { "beta_dpo/beta": 0.10442506521940231, "beta_dpo/beta_margin_grad_mean": -0.49443042278289795, "beta_dpo/beta_margin_grad_std": 0.019393526017665863, "beta_dpo/beta_margin_mean": 0.02230740524828434, "beta_dpo/beta_margin_std": 0.0777261033654213, "beta_dpo/beta_used": 0.10442506521940231, "beta_dpo/beta_used_raw": 0.10442506521940231, "beta_dpo/gap_mean": 0.16668199002742767, "beta_dpo/gap_std": 0.651796281337738, "beta_dpo/loss_margin_mean": 0.21053069829940796, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.11337868480725624, "grad_norm": 22.620519638061523, "learning_rate": 4.998286897523808e-07, "logits/chosen": 1.4410107135772705, "logits/rejected": 1.3761870861053467, "loss": 1.3621, "step": 75 }, { "beta_dpo/beta": 0.09164533764123917, "beta_dpo/beta_margin_grad_mean": -0.4942088723182678, "beta_dpo/beta_margin_grad_std": 0.016309738159179688, "beta_dpo/beta_margin_mean": 0.02321462146937847, "beta_dpo/beta_margin_std": 0.06536100059747696, "beta_dpo/beta_used": 0.09164533764123917, "beta_dpo/beta_used_raw": 0.09164533764123917, "beta_dpo/gap_mean": 0.17394113540649414, "beta_dpo/gap_std": 0.6712378263473511, "beta_dpo/loss_margin_mean": 0.253339558839798, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.11489040060468632, "grad_norm": 20.620342254638672, "learning_rate": 4.997762556634679e-07, "logits/chosen": 1.3254311084747314, "logits/rejected": 1.2652392387390137, "loss": 1.3836, "step": 76 }, { "beta_dpo/beta": 0.10088081657886505, "beta_dpo/beta_margin_grad_mean": -0.4943234622478485, "beta_dpo/beta_margin_grad_std": 0.019380716606974602, "beta_dpo/beta_margin_mean": 0.02274668589234352, "beta_dpo/beta_margin_std": 0.07762499898672104, "beta_dpo/beta_used": 0.10088081657886505, "beta_dpo/beta_used_raw": 0.10088081657886505, "beta_dpo/gap_mean": 0.1924261748790741, "beta_dpo/gap_std": 0.6849093437194824, "beta_dpo/loss_margin_mean": 0.21507787704467773, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1164021164021164, "grad_norm": 23.49284553527832, "learning_rate": 4.99716834795752e-07, "logits/chosen": 1.4381937980651855, "logits/rejected": 1.39052152633667, "loss": 1.3661, "step": 77 }, { "beta_dpo/beta": 0.09779095649719238, "beta_dpo/beta_margin_grad_mean": -0.49503323435783386, "beta_dpo/beta_margin_grad_std": 0.02156643010675907, "beta_dpo/beta_margin_mean": 0.019915712997317314, "beta_dpo/beta_margin_std": 0.08644842356443405, "beta_dpo/beta_used": 0.09779095649719238, "beta_dpo/beta_used_raw": 0.09779095649719238, "beta_dpo/gap_mean": 0.181630477309227, "beta_dpo/gap_std": 0.7211419343948364, "beta_dpo/loss_margin_mean": 0.19882404804229736, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.11791383219954649, "grad_norm": 22.498376846313477, "learning_rate": 4.996504288113623e-07, "logits/chosen": 1.4062104225158691, "logits/rejected": 1.3975510597229004, "loss": 1.3727, "step": 78 }, { "beta_dpo/beta": 0.09830057621002197, "beta_dpo/beta_margin_grad_mean": -0.49084609746932983, "beta_dpo/beta_margin_grad_std": 0.023657534271478653, "beta_dpo/beta_margin_mean": 0.036744583398103714, "beta_dpo/beta_margin_std": 0.09499835968017578, "beta_dpo/beta_used": 0.09830057621002197, "beta_dpo/beta_used_raw": 0.09830057621002197, "beta_dpo/gap_mean": 0.2163337767124176, "beta_dpo/gap_std": 0.7629668712615967, "beta_dpo/loss_margin_mean": 0.36694100499153137, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.11942554799697656, "grad_norm": 22.278594970703125, "learning_rate": 4.995770395678171e-07, "logits/chosen": 1.692272424697876, "logits/rejected": 1.5986158847808838, "loss": 1.3676, "step": 79 }, { "beta_dpo/beta": 0.09350171685218811, "beta_dpo/beta_margin_grad_mean": -0.49590837955474854, "beta_dpo/beta_margin_grad_std": 0.02149679884314537, "beta_dpo/beta_margin_mean": 0.016413187608122826, "beta_dpo/beta_margin_std": 0.08623309433460236, "beta_dpo/beta_used": 0.09350171685218811, "beta_dpo/beta_used_raw": 0.09350171685218811, "beta_dpo/gap_mean": 0.21665816009044647, "beta_dpo/gap_std": 0.7902263402938843, "beta_dpo/loss_margin_mean": 0.17878860235214233, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.12093726379440665, "grad_norm": 20.826610565185547, "learning_rate": 4.994966691179711e-07, "logits/chosen": 1.4082047939300537, "logits/rejected": 1.3100817203521729, "loss": 1.3765, "step": 80 }, { "beta_dpo/beta": 0.09851119667291641, "beta_dpo/beta_margin_grad_mean": -0.49376627802848816, "beta_dpo/beta_margin_grad_std": 0.019676726311445236, "beta_dpo/beta_margin_mean": 0.024973532184958458, "beta_dpo/beta_margin_std": 0.0788303092122078, "beta_dpo/beta_used": 0.09851119667291641, "beta_dpo/beta_used_raw": 0.09851119667291641, "beta_dpo/gap_mean": 0.2203991711139679, "beta_dpo/gap_std": 0.8004931211471558, "beta_dpo/loss_margin_mean": 0.24607722461223602, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.12244897959183673, "grad_norm": 22.618637084960938, "learning_rate": 4.994093197099587e-07, "logits/chosen": 1.1300098896026611, "logits/rejected": 1.091421365737915, "loss": 1.3666, "step": 81 }, { "beta_dpo/beta": 0.09958788752555847, "beta_dpo/beta_margin_grad_mean": -0.4900076687335968, "beta_dpo/beta_margin_grad_std": 0.02183517999947071, "beta_dpo/beta_margin_mean": 0.04006734862923622, "beta_dpo/beta_margin_std": 0.08758988231420517, "beta_dpo/beta_used": 0.09958788752555847, "beta_dpo/beta_used_raw": 0.09958788752555847, "beta_dpo/gap_mean": 0.24891814589500427, "beta_dpo/gap_std": 0.817732036113739, "beta_dpo/loss_margin_mean": 0.4039810597896576, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.12396069538926682, "grad_norm": 21.952186584472656, "learning_rate": 4.993149937871306e-07, "logits/chosen": 1.499792218208313, "logits/rejected": 1.4229098558425903, "loss": 1.3634, "step": 82 }, { "beta_dpo/beta": 0.10711533576250076, "beta_dpo/beta_margin_grad_mean": -0.48857223987579346, "beta_dpo/beta_margin_grad_std": 0.024558711796998978, "beta_dpo/beta_margin_mean": 0.04585915803909302, "beta_dpo/beta_margin_std": 0.09868450462818146, "beta_dpo/beta_used": 0.10711533576250076, "beta_dpo/beta_used_raw": 0.10711533576250076, "beta_dpo/gap_mean": 0.28387853503227234, "beta_dpo/gap_std": 0.8279096484184265, "beta_dpo/loss_margin_mean": 0.41550129652023315, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1254724111866969, "grad_norm": 25.619373321533203, "learning_rate": 4.992136939879856e-07, "logits/chosen": 1.5482455492019653, "logits/rejected": 1.4713327884674072, "loss": 1.344, "step": 83 }, { "beta_dpo/beta": 0.09682896733283997, "beta_dpo/beta_margin_grad_mean": -0.4948498606681824, "beta_dpo/beta_margin_grad_std": 0.02205750159919262, "beta_dpo/beta_margin_mean": 0.020634762942790985, "beta_dpo/beta_margin_std": 0.08842462301254272, "beta_dpo/beta_used": 0.09682896733283997, "beta_dpo/beta_used_raw": 0.09682896733283997, "beta_dpo/gap_mean": 0.27833741903305054, "beta_dpo/gap_std": 0.8445614576339722, "beta_dpo/loss_margin_mean": 0.20018967986106873, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.12698412698412698, "grad_norm": 23.43607521057129, "learning_rate": 4.991054231460969e-07, "logits/chosen": 1.5122936964035034, "logits/rejected": 1.4564745426177979, "loss": 1.3651, "step": 84 }, { "beta_dpo/beta": 0.1081511527299881, "beta_dpo/beta_margin_grad_mean": -0.4898848831653595, "beta_dpo/beta_margin_grad_std": 0.02815152332186699, "beta_dpo/beta_margin_mean": 0.040507350116968155, "beta_dpo/beta_margin_std": 0.11305945366621017, "beta_dpo/beta_used": 0.1081511527299881, "beta_dpo/beta_used_raw": 0.1081511527299881, "beta_dpo/gap_mean": 0.27856504917144775, "beta_dpo/gap_std": 0.8720511198043823, "beta_dpo/loss_margin_mean": 0.3741960823535919, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.12849584278155707, "grad_norm": 23.618709564208984, "learning_rate": 4.989901842900325e-07, "logits/chosen": 1.4183937311172485, "logits/rejected": 1.3515069484710693, "loss": 1.3433, "step": 85 }, { "beta_dpo/beta": 0.09931820631027222, "beta_dpo/beta_margin_grad_mean": -0.49436455965042114, "beta_dpo/beta_margin_grad_std": 0.023392099887132645, "beta_dpo/beta_margin_mean": 0.022570841014385223, "beta_dpo/beta_margin_std": 0.09379469603300095, "beta_dpo/beta_used": 0.09931820631027222, "beta_dpo/beta_used_raw": 0.09931820631027222, "beta_dpo/gap_mean": 0.27649736404418945, "beta_dpo/gap_std": 0.896049976348877, "beta_dpo/loss_margin_mean": 0.22701409459114075, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.13000755857898716, "grad_norm": 22.559938430786133, "learning_rate": 4.988679806432711e-07, "logits/chosen": 1.6345174312591553, "logits/rejected": 1.585810661315918, "loss": 1.3613, "step": 86 }, { "beta_dpo/beta": 0.10324035584926605, "beta_dpo/beta_margin_grad_mean": -0.4915555417537689, "beta_dpo/beta_margin_grad_std": 0.02729521505534649, "beta_dpo/beta_margin_mean": 0.03396186605095863, "beta_dpo/beta_margin_std": 0.10979495197534561, "beta_dpo/beta_used": 0.10324035584926605, "beta_dpo/beta_used_raw": 0.10324035584926605, "beta_dpo/gap_mean": 0.27984869480133057, "beta_dpo/gap_std": 0.9216774106025696, "beta_dpo/loss_margin_mean": 0.3301246166229248, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.13151927437641722, "grad_norm": 24.750186920166016, "learning_rate": 4.987388156241114e-07, "logits/chosen": 1.4320831298828125, "logits/rejected": 1.3352614641189575, "loss": 1.3535, "step": 87 }, { "beta_dpo/beta": 0.10066162049770355, "beta_dpo/beta_margin_grad_mean": -0.4947110414505005, "beta_dpo/beta_margin_grad_std": 0.026083989068865776, "beta_dpo/beta_margin_mean": 0.02120215632021427, "beta_dpo/beta_margin_std": 0.10460641980171204, "beta_dpo/beta_used": 0.10066162049770355, "beta_dpo/beta_used_raw": 0.10066162049770355, "beta_dpo/gap_mean": 0.2821298837661743, "beta_dpo/gap_std": 0.94524085521698, "beta_dpo/loss_margin_mean": 0.18504422903060913, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1330309901738473, "grad_norm": 23.80083656311035, "learning_rate": 4.986026928455767e-07, "logits/chosen": 1.5004366636276245, "logits/rejected": 1.4935534000396729, "loss": 1.3568, "step": 88 }, { "beta_dpo/beta": 0.09679665416479111, "beta_dpo/beta_margin_grad_mean": -0.48473596572875977, "beta_dpo/beta_margin_grad_std": 0.028986340388655663, "beta_dpo/beta_margin_mean": 0.06136619672179222, "beta_dpo/beta_margin_std": 0.11661099642515182, "beta_dpo/beta_used": 0.09679665416479111, "beta_dpo/beta_used_raw": 0.09679665416479111, "beta_dpo/gap_mean": 0.3047249913215637, "beta_dpo/gap_std": 0.9839344024658203, "beta_dpo/loss_margin_mean": 0.6256879568099976, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1345427059712774, "grad_norm": 19.98080825805664, "learning_rate": 4.984596161153135e-07, "logits/chosen": 1.4730298519134521, "logits/rejected": 1.3510327339172363, "loss": 1.3634, "step": 89 }, { "beta_dpo/beta": 0.10261310636997223, "beta_dpo/beta_margin_grad_mean": -0.48896563053131104, "beta_dpo/beta_margin_grad_std": 0.028216810896992683, "beta_dpo/beta_margin_mean": 0.04428347200155258, "beta_dpo/beta_margin_std": 0.11333856731653214, "beta_dpo/beta_used": 0.10261310636997223, "beta_dpo/beta_used_raw": 0.10261310636997223, "beta_dpo/gap_mean": 0.35536807775497437, "beta_dpo/gap_std": 1.0184149742126465, "beta_dpo/loss_margin_mean": 0.4309338331222534, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1360544217687075, "grad_norm": 24.94264030456543, "learning_rate": 4.983095894354857e-07, "logits/chosen": 1.406229019165039, "logits/rejected": 1.3163142204284668, "loss": 1.3475, "step": 90 }, { "beta_dpo/beta": 0.11208349466323853, "beta_dpo/beta_margin_grad_mean": -0.48447299003601074, "beta_dpo/beta_margin_grad_std": 0.033066846430301666, "beta_dpo/beta_margin_mean": 0.06240526959300041, "beta_dpo/beta_margin_std": 0.13280640542507172, "beta_dpo/beta_used": 0.11208349466323853, "beta_dpo/beta_used_raw": 0.11208349466323853, "beta_dpo/gap_mean": 0.3759007453918457, "beta_dpo/gap_std": 1.0446383953094482, "beta_dpo/loss_margin_mean": 0.5622912049293518, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.13756613756613756, "grad_norm": 24.20952033996582, "learning_rate": 4.98152617002662e-07, "logits/chosen": 1.263109803199768, "logits/rejected": 1.2032928466796875, "loss": 1.326, "step": 91 }, { "beta_dpo/beta": 0.09759242832660675, "beta_dpo/beta_margin_grad_mean": -0.491456538438797, "beta_dpo/beta_margin_grad_std": 0.03118491731584072, "beta_dpo/beta_margin_mean": 0.03435541316866875, "beta_dpo/beta_margin_std": 0.12529778480529785, "beta_dpo/beta_used": 0.09759242832660675, "beta_dpo/beta_used_raw": 0.09759242832660675, "beta_dpo/gap_mean": 0.38296887278556824, "beta_dpo/gap_std": 1.0825082063674927, "beta_dpo/loss_margin_mean": 0.3481322228908539, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.13907785336356765, "grad_norm": 23.6805362701416, "learning_rate": 4.979887032076988e-07, "logits/chosen": 1.3866524696350098, "logits/rejected": 1.3360724449157715, "loss": 1.3553, "step": 92 }, { "beta_dpo/beta": 0.09923055022954941, "beta_dpo/beta_margin_grad_mean": -0.4930683970451355, "beta_dpo/beta_margin_grad_std": 0.0345403254032135, "beta_dpo/beta_margin_mean": 0.027934642508625984, "beta_dpo/beta_margin_std": 0.1390235722064972, "beta_dpo/beta_used": 0.09923055022954941, "beta_dpo/beta_used_raw": 0.09923055022954941, "beta_dpo/gap_mean": 0.36329329013824463, "beta_dpo/gap_std": 1.1469428539276123, "beta_dpo/loss_margin_mean": 0.2789202034473419, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.14058956916099774, "grad_norm": 20.091751098632812, "learning_rate": 4.978178526356172e-07, "logits/chosen": 1.453279733657837, "logits/rejected": 1.4085452556610107, "loss": 1.3544, "step": 93 }, { "beta_dpo/beta": 0.11446872353553772, "beta_dpo/beta_margin_grad_mean": -0.4835643172264099, "beta_dpo/beta_margin_grad_std": 0.04806080833077431, "beta_dpo/beta_margin_mean": 0.06689022481441498, "beta_dpo/beta_margin_std": 0.19625984132289886, "beta_dpo/beta_used": 0.11446872353553772, "beta_dpo/beta_used_raw": 0.11446872353553772, "beta_dpo/gap_mean": 0.39031505584716797, "beta_dpo/gap_std": 1.2319457530975342, "beta_dpo/loss_margin_mean": 0.5888001322746277, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1421012849584278, "grad_norm": 25.610767364501953, "learning_rate": 4.976400700654751e-07, "logits/chosen": 1.3520464897155762, "logits/rejected": 1.2797930240631104, "loss": 1.3178, "step": 94 }, { "beta_dpo/beta": 0.10418045520782471, "beta_dpo/beta_margin_grad_mean": -0.48502302169799805, "beta_dpo/beta_margin_grad_std": 0.043376799672842026, "beta_dpo/beta_margin_mean": 0.06061091274023056, "beta_dpo/beta_margin_std": 0.17566770315170288, "beta_dpo/beta_used": 0.10418045520782471, "beta_dpo/beta_used_raw": 0.10418045520782471, "beta_dpo/gap_mean": 0.44323962926864624, "beta_dpo/gap_std": 1.3183009624481201, "beta_dpo/loss_margin_mean": 0.5283056497573853, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1436130007558579, "grad_norm": 24.98712158203125, "learning_rate": 4.974553604702332e-07, "logits/chosen": 1.3048655986785889, "logits/rejected": 1.2389471530914307, "loss": 1.3326, "step": 95 }, { "beta_dpo/beta": 0.0894891619682312, "beta_dpo/beta_margin_grad_mean": -0.4840312898159027, "beta_dpo/beta_margin_grad_std": 0.0363737978041172, "beta_dpo/beta_margin_mean": 0.06439025700092316, "beta_dpo/beta_margin_std": 0.14664606750011444, "beta_dpo/beta_used": 0.0894891619682312, "beta_dpo/beta_used_raw": 0.0894891619682312, "beta_dpo/gap_mean": 0.46237361431121826, "beta_dpo/gap_std": 1.373389720916748, "beta_dpo/loss_margin_mean": 0.6972731351852417, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.14512471655328799, "grad_norm": 19.369220733642578, "learning_rate": 4.972637290166157e-07, "logits/chosen": 1.2695260047912598, "logits/rejected": 1.2192683219909668, "loss": 1.3603, "step": 96 }, { "beta_dpo/beta": 0.08789724111557007, "beta_dpo/beta_margin_grad_mean": -0.4935567378997803, "beta_dpo/beta_margin_grad_std": 0.033925559371709824, "beta_dpo/beta_margin_mean": 0.025890527293086052, "beta_dpo/beta_margin_std": 0.1364428550004959, "beta_dpo/beta_used": 0.08789724111557007, "beta_dpo/beta_used_raw": 0.08789724111557007, "beta_dpo/gap_mean": 0.45890724658966064, "beta_dpo/gap_std": 1.4099913835525513, "beta_dpo/loss_margin_mean": 0.2942034900188446, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.14663643235071808, "grad_norm": 20.669422149658203, "learning_rate": 4.970651810649666e-07, "logits/chosen": 1.289090633392334, "logits/rejected": 1.2603429555892944, "loss": 1.3666, "step": 97 }, { "beta_dpo/beta": 0.10570737719535828, "beta_dpo/beta_margin_grad_mean": -0.4919486939907074, "beta_dpo/beta_margin_grad_std": 0.03829289227724075, "beta_dpo/beta_margin_mean": 0.03243134915828705, "beta_dpo/beta_margin_std": 0.15482714772224426, "beta_dpo/beta_used": 0.10570737719535828, "beta_dpo/beta_used_raw": 0.10570737719535828, "beta_dpo/gap_mean": 0.4299718737602234, "beta_dpo/gap_std": 1.4265832901000977, "beta_dpo/loss_margin_mean": 0.3066960573196411, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.14814814814814814, "grad_norm": 22.59371566772461, "learning_rate": 4.968597221690985e-07, "logits/chosen": 1.1470067501068115, "logits/rejected": 1.1468849182128906, "loss": 1.3348, "step": 98 }, { "beta_dpo/beta": 0.0829615443944931, "beta_dpo/beta_margin_grad_mean": -0.4901537597179413, "beta_dpo/beta_margin_grad_std": 0.04067037254571915, "beta_dpo/beta_margin_mean": 0.04001061990857124, "beta_dpo/beta_margin_std": 0.16489112377166748, "beta_dpo/beta_used": 0.0829615443944931, "beta_dpo/beta_used_raw": 0.0829615443944931, "beta_dpo/gap_mean": 0.4272018373012543, "beta_dpo/gap_std": 1.4833000898361206, "beta_dpo/loss_margin_mean": 0.4821007251739502, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.14965986394557823, "grad_norm": 20.694110870361328, "learning_rate": 4.966473580761389e-07, "logits/chosen": 1.3811016082763672, "logits/rejected": 1.3564668893814087, "loss": 1.3759, "step": 99 }, { "beta_dpo/beta": 0.09042062610387802, "beta_dpo/beta_margin_grad_mean": -0.49155348539352417, "beta_dpo/beta_margin_grad_std": 0.04488116875290871, "beta_dpo/beta_margin_mean": 0.03428267315030098, "beta_dpo/beta_margin_std": 0.18191133439540863, "beta_dpo/beta_used": 0.09042062610387802, "beta_dpo/beta_used_raw": 0.09042062610387802, "beta_dpo/gap_mean": 0.41866934299468994, "beta_dpo/gap_std": 1.5786731243133545, "beta_dpo/loss_margin_mean": 0.38769927620887756, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.15117157974300832, "grad_norm": 22.64974594116211, "learning_rate": 4.964280947263676e-07, "logits/chosen": 1.245833396911621, "logits/rejected": 1.2450430393218994, "loss": 1.3654, "step": 100 }, { "epoch": 0.15117157974300832, "eval_beta_dpo/beta": 0.11095979809761047, "eval_beta_dpo/beta_margin_grad_mean": -0.48243069648742676, "eval_beta_dpo/beta_margin_grad_std": 0.052106164395809174, "eval_beta_dpo/beta_margin_mean": 0.07168044149875641, "eval_beta_dpo/beta_margin_std": 0.21300600469112396, "eval_beta_dpo/beta_used": 0.11095979809761047, "eval_beta_dpo/beta_used_raw": 0.11095979809761047, "eval_beta_dpo/gap_mean": 0.4233362674713135, "eval_beta_dpo/gap_std": 1.6301106214523315, "eval_beta_dpo/loss_margin_mean": 0.6059994697570801, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": 1.3013670444488525, "eval_logits/rejected": 1.2542670965194702, "eval_loss": 0.6640572547912598, "eval_runtime": 43.5006, "eval_samples_per_second": 52.942, "eval_steps_per_second": 1.655, "step": 100 }, { "beta_dpo/beta": 0.11204126477241516, "beta_dpo/beta_margin_grad_mean": -0.48013564944267273, "beta_dpo/beta_margin_grad_std": 0.042587053030729294, "beta_dpo/beta_margin_mean": 0.07981999963521957, "beta_dpo/beta_margin_std": 0.1719403713941574, "beta_dpo/beta_used": 0.11204126477241516, "beta_dpo/beta_used_raw": 0.11204126477241516, "beta_dpo/gap_mean": 0.4711691737174988, "beta_dpo/gap_std": 1.609398365020752, "beta_dpo/loss_margin_mean": 0.7173241376876831, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.15268329554043839, "grad_norm": 23.564899444580078, "learning_rate": 4.96201938253052e-07, "logits/chosen": 1.1492784023284912, "logits/rejected": 1.1351875066757202, "loss": 1.3168, "step": 101 }, { "beta_dpo/beta": 0.08911336958408356, "beta_dpo/beta_margin_grad_mean": -0.490958571434021, "beta_dpo/beta_margin_grad_std": 0.04404524341225624, "beta_dpo/beta_margin_mean": 0.03683772310614586, "beta_dpo/beta_margin_std": 0.17854470014572144, "beta_dpo/beta_used": 0.08911336958408356, "beta_dpo/beta_used_raw": 0.08911336958408356, "beta_dpo/gap_mean": 0.46106693148612976, "beta_dpo/gap_std": 1.6647722721099854, "beta_dpo/loss_margin_mean": 0.4219280481338501, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.15419501133786848, "grad_norm": 20.356643676757812, "learning_rate": 4.959688949822748e-07, "logits/chosen": 1.333066463470459, "logits/rejected": 1.2851324081420898, "loss": 1.3646, "step": 102 }, { "beta_dpo/beta": 0.10101747512817383, "beta_dpo/beta_margin_grad_mean": -0.4781191647052765, "beta_dpo/beta_margin_grad_std": 0.04582774639129639, "beta_dpo/beta_margin_mean": 0.0886739194393158, "beta_dpo/beta_margin_std": 0.18615014851093292, "beta_dpo/beta_used": 0.10101747512817383, "beta_dpo/beta_used_raw": 0.10101747512817383, "beta_dpo/gap_mean": 0.5251193046569824, "beta_dpo/gap_std": 1.7195401191711426, "beta_dpo/loss_margin_mean": 0.8807109594345093, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.15570672713529857, "grad_norm": 25.940797805786133, "learning_rate": 4.957289714327572e-07, "logits/chosen": 1.4399843215942383, "logits/rejected": 1.4258053302764893, "loss": 1.3361, "step": 103 }, { "beta_dpo/beta": 0.1078379899263382, "beta_dpo/beta_margin_grad_mean": -0.47406965494155884, "beta_dpo/beta_margin_grad_std": 0.053313929587602615, "beta_dpo/beta_margin_mean": 0.1053386926651001, "beta_dpo/beta_margin_std": 0.2173088937997818, "beta_dpo/beta_used": 0.1078379899263382, "beta_dpo/beta_used_raw": 0.1078379899263382, "beta_dpo/gap_mean": 0.6082203388214111, "beta_dpo/gap_std": 1.7673068046569824, "beta_dpo/loss_margin_mean": 1.0192296504974365, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.15721844293272866, "grad_norm": 27.010522842407227, "learning_rate": 4.954821743156767e-07, "logits/chosen": 1.3672761917114258, "logits/rejected": 1.2913395166397095, "loss": 1.3068, "step": 104 }, { "beta_dpo/beta": 0.11157245934009552, "beta_dpo/beta_margin_grad_mean": -0.47606605291366577, "beta_dpo/beta_margin_grad_std": 0.061335984617471695, "beta_dpo/beta_margin_mean": 0.0973881185054779, "beta_dpo/beta_margin_std": 0.24996986985206604, "beta_dpo/beta_used": 0.11157245934009552, "beta_dpo/beta_used_raw": 0.11157245934009552, "beta_dpo/gap_mean": 0.680145263671875, "beta_dpo/gap_std": 1.9032455682754517, "beta_dpo/loss_margin_mean": 0.8816050291061401, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.15873015873015872, "grad_norm": 26.681400299072266, "learning_rate": 4.952285105344791e-07, "logits/chosen": 1.4183114767074585, "logits/rejected": 1.3744010925292969, "loss": 1.2982, "step": 105 }, { "beta_dpo/beta": 0.10396211594343185, "beta_dpo/beta_margin_grad_mean": -0.4831298291683197, "beta_dpo/beta_margin_grad_std": 0.051441218703985214, "beta_dpo/beta_margin_mean": 0.06840399652719498, "beta_dpo/beta_margin_std": 0.20999610424041748, "beta_dpo/beta_used": 0.10396211594343185, "beta_dpo/beta_used_raw": 0.10396211594343185, "beta_dpo/gap_mean": 0.6826244592666626, "beta_dpo/gap_std": 1.9469351768493652, "beta_dpo/loss_margin_mean": 0.6595271825790405, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1602418745275888, "grad_norm": 23.215801239013672, "learning_rate": 4.949679871846857e-07, "logits/chosen": 1.3464417457580566, "logits/rejected": 1.3212090730667114, "loss": 1.3151, "step": 106 }, { "beta_dpo/beta": 0.08311143517494202, "beta_dpo/beta_margin_grad_mean": -0.48951032757759094, "beta_dpo/beta_margin_grad_std": 0.04360119625926018, "beta_dpo/beta_margin_mean": 0.04215170443058014, "beta_dpo/beta_margin_std": 0.1757575124502182, "beta_dpo/beta_used": 0.08311143517494202, "beta_dpo/beta_used_raw": 0.08311143517494202, "beta_dpo/gap_mean": 0.6624685525894165, "beta_dpo/gap_std": 1.9580414295196533, "beta_dpo/loss_margin_mean": 0.48135077953338623, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1617535903250189, "grad_norm": 21.3165340423584, "learning_rate": 4.947006115536947e-07, "logits/chosen": 1.1941533088684082, "logits/rejected": 1.1890106201171875, "loss": 1.3599, "step": 107 }, { "beta_dpo/beta": 0.10874947905540466, "beta_dpo/beta_margin_grad_mean": -0.4867684543132782, "beta_dpo/beta_margin_grad_std": 0.05567330867052078, "beta_dpo/beta_margin_mean": 0.053287770599126816, "beta_dpo/beta_margin_std": 0.22567662596702576, "beta_dpo/beta_used": 0.10874947905540466, "beta_dpo/beta_used_raw": 0.10874947905540466, "beta_dpo/gap_mean": 0.6162758469581604, "beta_dpo/gap_std": 1.9659650325775146, "beta_dpo/loss_margin_mean": 0.495738685131073, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.16326530612244897, "grad_norm": 24.595596313476562, "learning_rate": 4.944263911205772e-07, "logits/chosen": 1.3917639255523682, "logits/rejected": 1.3184635639190674, "loss": 1.3084, "step": 108 }, { "beta_dpo/beta": 0.08857216686010361, "beta_dpo/beta_margin_grad_mean": -0.48171043395996094, "beta_dpo/beta_margin_grad_std": 0.05316697433590889, "beta_dpo/beta_margin_mean": 0.07432334870100021, "beta_dpo/beta_margin_std": 0.2160109281539917, "beta_dpo/beta_used": 0.08857216686010361, "beta_dpo/beta_used_raw": 0.08857216686010361, "beta_dpo/gap_mean": 0.6437417268753052, "beta_dpo/gap_std": 2.0695106983184814, "beta_dpo/loss_margin_mean": 0.8359496593475342, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.16477702191987906, "grad_norm": 19.824125289916992, "learning_rate": 4.941453335558681e-07, "logits/chosen": 1.286027431488037, "logits/rejected": 1.1748815774917603, "loss": 1.349, "step": 109 }, { "beta_dpo/beta": 0.09814047068357468, "beta_dpo/beta_margin_grad_mean": -0.49335938692092896, "beta_dpo/beta_margin_grad_std": 0.05850514397025108, "beta_dpo/beta_margin_mean": 0.026294706389307976, "beta_dpo/beta_margin_std": 0.2390744686126709, "beta_dpo/beta_used": 0.09814047068357468, "beta_dpo/beta_used_raw": 0.09814047068357468, "beta_dpo/gap_mean": 0.598505973815918, "beta_dpo/gap_std": 2.148646831512451, "beta_dpo/loss_margin_mean": 0.26830294728279114, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.16628873771730915, "grad_norm": 22.9931640625, "learning_rate": 4.938574467213517e-07, "logits/chosen": 1.0979554653167725, "logits/rejected": 1.1433002948760986, "loss": 1.3376, "step": 110 }, { "beta_dpo/beta": 0.10206159949302673, "beta_dpo/beta_margin_grad_mean": -0.48160073161125183, "beta_dpo/beta_margin_grad_std": 0.05491115152835846, "beta_dpo/beta_margin_mean": 0.07462549209594727, "beta_dpo/beta_margin_std": 0.22261567413806915, "beta_dpo/beta_used": 0.10206159949302673, "beta_dpo/beta_used_raw": 0.10206159949302673, "beta_dpo/gap_mean": 0.5927486419677734, "beta_dpo/gap_std": 2.1546518802642822, "beta_dpo/loss_margin_mean": 0.7219003438949585, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.16780045351473924, "grad_norm": 22.11334800720215, "learning_rate": 4.935627386698418e-07, "logits/chosen": 1.544306755065918, "logits/rejected": 1.4732277393341064, "loss": 1.3285, "step": 111 }, { "beta_dpo/beta": 0.11867986619472504, "beta_dpo/beta_margin_grad_mean": -0.4607756435871124, "beta_dpo/beta_margin_grad_std": 0.06654529273509979, "beta_dpo/beta_margin_mean": 0.16026724874973297, "beta_dpo/beta_margin_std": 0.272041380405426, "beta_dpo/beta_used": 0.11867986619472504, "beta_dpo/beta_used_raw": 0.11867986619472504, "beta_dpo/gap_mean": 0.720818817615509, "beta_dpo/gap_std": 2.1644039154052734, "beta_dpo/loss_margin_mean": 1.338735580444336, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1693121693121693, "grad_norm": 28.8944091796875, "learning_rate": 4.932612176449559e-07, "logits/chosen": 1.4725078344345093, "logits/rejected": 1.3775207996368408, "loss": 1.2772, "step": 112 }, { "beta_dpo/beta": 0.09883327782154083, "beta_dpo/beta_margin_grad_mean": -0.48374107480049133, "beta_dpo/beta_margin_grad_std": 0.062059108167886734, "beta_dpo/beta_margin_mean": 0.06639490276575089, "beta_dpo/beta_margin_std": 0.2554260790348053, "beta_dpo/beta_used": 0.09883327782154083, "beta_dpo/beta_used_raw": 0.09883327782154083, "beta_dpo/gap_mean": 0.7182120084762573, "beta_dpo/gap_std": 2.257500171661377, "beta_dpo/loss_margin_mean": 0.642180323600769, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1708238851095994, "grad_norm": 21.073177337646484, "learning_rate": 4.929528920808854e-07, "logits/chosen": 1.2748922109603882, "logits/rejected": 1.260689377784729, "loss": 1.322, "step": 113 }, { "beta_dpo/beta": 0.09910961240530014, "beta_dpo/beta_margin_grad_mean": -0.481128066778183, "beta_dpo/beta_margin_grad_std": 0.05786411464214325, "beta_dpo/beta_margin_mean": 0.0767405554652214, "beta_dpo/beta_margin_std": 0.23470765352249146, "beta_dpo/beta_used": 0.09910961240530014, "beta_dpo/beta_used_raw": 0.09910961240530014, "beta_dpo/gap_mean": 0.7387478351593018, "beta_dpo/gap_std": 2.291405200958252, "beta_dpo/loss_margin_mean": 0.7726707458496094, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.17233560090702948, "grad_norm": 21.475975036621094, "learning_rate": 4.92637770602159e-07, "logits/chosen": 1.1779590845108032, "logits/rejected": 1.1033458709716797, "loss": 1.325, "step": 114 }, { "beta_dpo/beta": 0.098334401845932, "beta_dpo/beta_margin_grad_mean": -0.471476286649704, "beta_dpo/beta_margin_grad_std": 0.06358911842107773, "beta_dpo/beta_margin_mean": 0.11761815845966339, "beta_dpo/beta_margin_std": 0.26342275738716125, "beta_dpo/beta_used": 0.098334401845932, "beta_dpo/beta_used_raw": 0.098334401845932, "beta_dpo/gap_mean": 0.8120362758636475, "beta_dpo/gap_std": 2.3379993438720703, "beta_dpo/loss_margin_mean": 1.1042755842208862, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.17384731670445955, "grad_norm": 22.567617416381836, "learning_rate": 4.923158620234019e-07, "logits/chosen": 1.1222329139709473, "logits/rejected": 1.0455198287963867, "loss": 1.3132, "step": 115 }, { "beta_dpo/beta": 0.11336952447891235, "beta_dpo/beta_margin_grad_mean": -0.4619391858577728, "beta_dpo/beta_margin_grad_std": 0.06697308272123337, "beta_dpo/beta_margin_mean": 0.15571817755699158, "beta_dpo/beta_margin_std": 0.27545690536499023, "beta_dpo/beta_used": 0.11336952447891235, "beta_dpo/beta_used_raw": 0.11336952447891235, "beta_dpo/gap_mean": 0.8705282211303711, "beta_dpo/gap_std": 2.379244804382324, "beta_dpo/loss_margin_mean": 1.3704155683517456, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.17535903250188964, "grad_norm": 25.011146545410156, "learning_rate": 4.91987175349089e-07, "logits/chosen": 1.3455551862716675, "logits/rejected": 1.290403962135315, "loss": 1.2767, "step": 116 }, { "beta_dpo/beta": 0.0890221819281578, "beta_dpo/beta_margin_grad_mean": -0.47394704818725586, "beta_dpo/beta_margin_grad_std": 0.059818971902132034, "beta_dpo/beta_margin_mean": 0.105972059071064, "beta_dpo/beta_margin_std": 0.24295583367347717, "beta_dpo/beta_used": 0.0890221819281578, "beta_dpo/beta_used_raw": 0.0890221819281578, "beta_dpo/gap_mean": 0.9534369111061096, "beta_dpo/gap_std": 2.405941963195801, "beta_dpo/loss_margin_mean": 1.0419647693634033, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.17687074829931973, "grad_norm": 19.85879898071289, "learning_rate": 4.916517197732933e-07, "logits/chosen": 0.9891796708106995, "logits/rejected": 0.9565305709838867, "loss": 1.3148, "step": 117 }, { "beta_dpo/beta": 0.09003470093011856, "beta_dpo/beta_margin_grad_mean": -0.4769829511642456, "beta_dpo/beta_margin_grad_std": 0.0587238110601902, "beta_dpo/beta_margin_mean": 0.09337636828422546, "beta_dpo/beta_margin_std": 0.2402600198984146, "beta_dpo/beta_used": 0.09003470093011856, "beta_dpo/beta_used_raw": 0.09003470093011856, "beta_dpo/gap_mean": 0.9489431977272034, "beta_dpo/gap_std": 2.446256399154663, "beta_dpo/loss_margin_mean": 1.0371025800704956, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.17838246409674982, "grad_norm": 21.60437774658203, "learning_rate": 4.913095046794281e-07, "logits/chosen": 1.2734606266021729, "logits/rejected": 1.2298357486724854, "loss": 1.3131, "step": 118 }, { "beta_dpo/beta": 0.0933263748884201, "beta_dpo/beta_margin_grad_mean": -0.47756442427635193, "beta_dpo/beta_margin_grad_std": 0.08078356087207794, "beta_dpo/beta_margin_mean": 0.09172452986240387, "beta_dpo/beta_margin_std": 0.33637452125549316, "beta_dpo/beta_used": 0.0933263748884201, "beta_dpo/beta_used_raw": 0.0933263748884201, "beta_dpo/gap_mean": 0.931819498538971, "beta_dpo/gap_std": 2.5663132667541504, "beta_dpo/loss_margin_mean": 0.9030270576477051, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.17989417989417988, "grad_norm": 21.077136993408203, "learning_rate": 4.909605396399855e-07, "logits/chosen": 1.2811903953552246, "logits/rejected": 1.2313565015792847, "loss": 1.3155, "step": 119 }, { "beta_dpo/beta": 0.11881925165653229, "beta_dpo/beta_margin_grad_mean": -0.4537702798843384, "beta_dpo/beta_margin_grad_std": 0.06741677224636078, "beta_dpo/beta_margin_mean": 0.18935920298099518, "beta_dpo/beta_margin_std": 0.2780967950820923, "beta_dpo/beta_used": 0.11881925165653229, "beta_dpo/beta_used_raw": 0.11881925165653229, "beta_dpo/gap_mean": 1.0412273406982422, "beta_dpo/gap_std": 2.5785160064697266, "beta_dpo/loss_margin_mean": 1.592591404914856, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.18140589569160998, "grad_norm": 27.940187454223633, "learning_rate": 4.906048344162676e-07, "logits/chosen": 1.5376054048538208, "logits/rejected": 1.464134693145752, "loss": 1.2436, "step": 120 }, { "beta_dpo/beta": 0.09064137935638428, "beta_dpo/beta_margin_grad_mean": -0.47120246291160583, "beta_dpo/beta_margin_grad_std": 0.06724441051483154, "beta_dpo/beta_margin_mean": 0.11866184324026108, "beta_dpo/beta_margin_std": 0.27661600708961487, "beta_dpo/beta_used": 0.09064137935638428, "beta_dpo/beta_used_raw": 0.09064137935638428, "beta_dpo/gap_mean": 1.1125645637512207, "beta_dpo/gap_std": 2.647150993347168, "beta_dpo/loss_margin_mean": 1.2891712188720703, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.18291761148904007, "grad_norm": 20.935619354248047, "learning_rate": 4.902423989581143e-07, "logits/chosen": 1.4415934085845947, "logits/rejected": 1.3207588195800781, "loss": 1.3101, "step": 121 }, { "beta_dpo/beta": 0.07106913626194, "beta_dpo/beta_margin_grad_mean": -0.4810297191143036, "beta_dpo/beta_margin_grad_std": 0.05604850500822067, "beta_dpo/beta_margin_mean": 0.07772746682167053, "beta_dpo/beta_margin_std": 0.22904185950756073, "beta_dpo/beta_used": 0.07106913626194, "beta_dpo/beta_used_raw": 0.07106913626194, "beta_dpo/gap_mean": 1.0930296182632446, "beta_dpo/gap_std": 2.7709574699401855, "beta_dpo/loss_margin_mean": 1.0711023807525635, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.18442932728647016, "grad_norm": 17.042362213134766, "learning_rate": 4.898732434036243e-07, "logits/chosen": 1.2231804132461548, "logits/rejected": 1.1708978414535522, "loss": 1.3483, "step": 122 }, { "beta_dpo/beta": 0.09615612775087357, "beta_dpo/beta_margin_grad_mean": -0.46688589453697205, "beta_dpo/beta_margin_grad_std": 0.0644429549574852, "beta_dpo/beta_margin_mean": 0.13525746762752533, "beta_dpo/beta_margin_std": 0.2637210488319397, "beta_dpo/beta_used": 0.09615612775087357, "beta_dpo/beta_used_raw": 0.09615612775087357, "beta_dpo/gap_mean": 1.1396965980529785, "beta_dpo/gap_std": 2.8205223083496094, "beta_dpo/loss_margin_mean": 1.378818154335022, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.18594104308390022, "grad_norm": 20.993009567260742, "learning_rate": 4.894973780788722e-07, "logits/chosen": 1.1314191818237305, "logits/rejected": 1.0978955030441284, "loss": 1.2832, "step": 123 }, { "beta_dpo/beta": 0.11089035868644714, "beta_dpo/beta_margin_grad_mean": -0.45878610014915466, "beta_dpo/beta_margin_grad_std": 0.09837619960308075, "beta_dpo/beta_margin_mean": 0.1747506558895111, "beta_dpo/beta_margin_std": 0.42173826694488525, "beta_dpo/beta_used": 0.11089035868644714, "beta_dpo/beta_used_raw": 0.11089035868644714, "beta_dpo/gap_mean": 1.1973506212234497, "beta_dpo/gap_std": 2.9226651191711426, "beta_dpo/loss_margin_mean": 1.5113928318023682, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1874527588813303, "grad_norm": 23.7943172454834, "learning_rate": 4.89114813497619e-07, "logits/chosen": 1.3841103315353394, "logits/rejected": 1.369675874710083, "loss": 1.2484, "step": 124 }, { "beta_dpo/beta": 0.0960138589143753, "beta_dpo/beta_margin_grad_mean": -0.46052810549736023, "beta_dpo/beta_margin_grad_std": 0.0767231211066246, "beta_dpo/beta_margin_mean": 0.16436608135700226, "beta_dpo/beta_margin_std": 0.32474106550216675, "beta_dpo/beta_used": 0.0960138589143753, "beta_dpo/beta_used_raw": 0.0960138589143753, "beta_dpo/gap_mean": 1.298264741897583, "beta_dpo/gap_std": 2.985287666320801, "beta_dpo/loss_margin_mean": 1.5015137195587158, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1889644746787604, "grad_norm": 24.529010772705078, "learning_rate": 4.887255603610184e-07, "logits/chosen": 1.300724744796753, "logits/rejected": 1.2149507999420166, "loss": 1.2739, "step": 125 }, { "beta_dpo/beta": 0.04256870597600937, "beta_dpo/beta_margin_grad_mean": -0.49036645889282227, "beta_dpo/beta_margin_grad_std": 0.03939548879861832, "beta_dpo/beta_margin_mean": 0.03884674236178398, "beta_dpo/beta_margin_std": 0.15890488028526306, "beta_dpo/beta_used": 0.04256870597600937, "beta_dpo/beta_used_raw": 0.04256870597600937, "beta_dpo/gap_mean": 1.199582576751709, "beta_dpo/gap_std": 3.043370246887207, "beta_dpo/loss_margin_mean": 0.8355753421783447, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.19047619047619047, "grad_norm": 12.087654113769531, "learning_rate": 4.883296295573176e-07, "logits/chosen": 1.3451451063156128, "logits/rejected": 1.3578057289123535, "loss": 1.3751, "step": 126 }, { "beta_dpo/beta": 0.10232022404670715, "beta_dpo/beta_margin_grad_mean": -0.4628364145755768, "beta_dpo/beta_margin_grad_std": 0.0830625593662262, "beta_dpo/beta_margin_mean": 0.15574322640895844, "beta_dpo/beta_margin_std": 0.34838613867759705, "beta_dpo/beta_used": 0.10232022404670715, "beta_dpo/beta_used_raw": 0.10232022404670715, "beta_dpo/gap_mean": 1.2289824485778809, "beta_dpo/gap_std": 3.0530147552490234, "beta_dpo/loss_margin_mean": 1.205783486366272, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.19198790627362056, "grad_norm": 25.341848373413086, "learning_rate": 4.87927032161552e-07, "logits/chosen": 1.2351047992706299, "logits/rejected": 1.2235056161880493, "loss": 1.2569, "step": 127 }, { "beta_dpo/beta": 0.07750297337770462, "beta_dpo/beta_margin_grad_mean": -0.47596895694732666, "beta_dpo/beta_margin_grad_std": 0.08230820298194885, "beta_dpo/beta_margin_mean": 0.09930390119552612, "beta_dpo/beta_margin_std": 0.34651416540145874, "beta_dpo/beta_used": 0.07750297337770462, "beta_dpo/beta_used_raw": 0.07750297337770462, "beta_dpo/gap_mean": 1.1985833644866943, "beta_dpo/gap_std": 3.221245765686035, "beta_dpo/loss_margin_mean": 1.22560453414917, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.19349962207105065, "grad_norm": 21.444602966308594, "learning_rate": 4.875177794352363e-07, "logits/chosen": 1.3550379276275635, "logits/rejected": 1.2482268810272217, "loss": 1.318, "step": 128 }, { "beta_dpo/beta": 0.071719691157341, "beta_dpo/beta_margin_grad_mean": -0.4847745895385742, "beta_dpo/beta_margin_grad_std": 0.07170508801937103, "beta_dpo/beta_margin_mean": 0.06323404610157013, "beta_dpo/beta_margin_std": 0.29615992307662964, "beta_dpo/beta_used": 0.071719691157341, "beta_dpo/beta_used_raw": 0.071719691157341, "beta_dpo/gap_mean": 1.13083016872406, "beta_dpo/gap_std": 3.433652877807617, "beta_dpo/loss_margin_mean": 0.8432696461677551, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.19501133786848074, "grad_norm": 17.289493560791016, "learning_rate": 4.871018828260491e-07, "logits/chosen": 1.38811457157135, "logits/rejected": 1.3899645805358887, "loss": 1.3496, "step": 129 }, { "beta_dpo/beta": 0.0844159722328186, "beta_dpo/beta_margin_grad_mean": -0.46980515122413635, "beta_dpo/beta_margin_grad_std": 0.06665448844432831, "beta_dpo/beta_margin_mean": 0.12363045662641525, "beta_dpo/beta_margin_std": 0.272417277097702, "beta_dpo/beta_used": 0.0844159722328186, "beta_dpo/beta_used_raw": 0.0844159722328186, "beta_dpo/gap_mean": 1.1865178346633911, "beta_dpo/gap_std": 3.484450340270996, "beta_dpo/loss_margin_mean": 1.487868070602417, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1965230536659108, "grad_norm": 20.753738403320312, "learning_rate": 4.866793539675126e-07, "logits/chosen": 1.2308127880096436, "logits/rejected": 1.1961195468902588, "loss": 1.3181, "step": 130 }, { "beta_dpo/beta": 0.10693139582872391, "beta_dpo/beta_margin_grad_mean": -0.4493742883205414, "beta_dpo/beta_margin_grad_std": 0.10129056125879288, "beta_dpo/beta_margin_mean": 0.21827171742916107, "beta_dpo/beta_margin_std": 0.4491637051105499, "beta_dpo/beta_used": 0.10693139582872391, "beta_dpo/beta_used_raw": 0.10693139582872391, "beta_dpo/gap_mean": 1.32501220703125, "beta_dpo/gap_std": 3.5505361557006836, "beta_dpo/loss_margin_mean": 2.0043649673461914, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.1980347694633409, "grad_norm": 24.34880828857422, "learning_rate": 4.86250204678667e-07, "logits/chosen": 1.4657068252563477, "logits/rejected": 1.3357291221618652, "loss": 1.2559, "step": 131 }, { "beta_dpo/beta": 0.10015638172626495, "beta_dpo/beta_margin_grad_mean": -0.47250378131866455, "beta_dpo/beta_margin_grad_std": 0.07719095796346664, "beta_dpo/beta_margin_mean": 0.11288213729858398, "beta_dpo/beta_margin_std": 0.3209708034992218, "beta_dpo/beta_used": 0.10015638172626495, "beta_dpo/beta_used_raw": 0.10015638172626495, "beta_dpo/gap_mean": 1.313185453414917, "beta_dpo/gap_std": 3.496281385421753, "beta_dpo/loss_margin_mean": 1.1516209840774536, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.19954648526077098, "grad_norm": 23.72968864440918, "learning_rate": 4.858144469637408e-07, "logits/chosen": 1.3843731880187988, "logits/rejected": 1.329111099243164, "loss": 1.269, "step": 132 }, { "beta_dpo/beta": 0.09442687034606934, "beta_dpo/beta_margin_grad_mean": -0.4739525318145752, "beta_dpo/beta_margin_grad_std": 0.08487001806497574, "beta_dpo/beta_margin_mean": 0.10770122706890106, "beta_dpo/beta_margin_std": 0.35225582122802734, "beta_dpo/beta_used": 0.09442687034606934, "beta_dpo/beta_used_raw": 0.09442687034606934, "beta_dpo/gap_mean": 1.2856841087341309, "beta_dpo/gap_std": 3.4838268756866455, "beta_dpo/loss_margin_mean": 1.1189558506011963, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.20105820105820105, "grad_norm": 21.97116470336914, "learning_rate": 4.853720930118138e-07, "logits/chosen": 1.1641952991485596, "logits/rejected": 1.1831367015838623, "loss": 1.283, "step": 133 }, { "beta_dpo/beta": 0.11237208545207977, "beta_dpo/beta_margin_grad_mean": -0.4434339702129364, "beta_dpo/beta_margin_grad_std": 0.11113660782575607, "beta_dpo/beta_margin_mean": 0.2420782893896103, "beta_dpo/beta_margin_std": 0.48950013518333435, "beta_dpo/beta_used": 0.11237208545207977, "beta_dpo/beta_used_raw": 0.11237208545207977, "beta_dpo/gap_mean": 1.4053146839141846, "beta_dpo/gap_std": 3.598628044128418, "beta_dpo/loss_margin_mean": 2.1522929668426514, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.20256991685563114, "grad_norm": 23.428014755249023, "learning_rate": 4.849231551964771e-07, "logits/chosen": 1.3531758785247803, "logits/rejected": 1.2881067991256714, "loss": 1.2356, "step": 134 }, { "beta_dpo/beta": 0.09549751877784729, "beta_dpo/beta_margin_grad_mean": -0.46873098611831665, "beta_dpo/beta_margin_grad_std": 0.08153299987316132, "beta_dpo/beta_margin_mean": 0.12783578038215637, "beta_dpo/beta_margin_std": 0.3363261818885803, "beta_dpo/beta_used": 0.09549751877784729, "beta_dpo/beta_used_raw": 0.09549751877784729, "beta_dpo/gap_mean": 1.4151959419250488, "beta_dpo/gap_std": 3.6370739936828613, "beta_dpo/loss_margin_mean": 1.3293631076812744, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.20408163265306123, "grad_norm": 20.11992835998535, "learning_rate": 4.844676460754862e-07, "logits/chosen": 1.2607423067092896, "logits/rejected": 1.2528090476989746, "loss": 1.2802, "step": 135 }, { "beta_dpo/beta": 0.10741549730300903, "beta_dpo/beta_margin_grad_mean": -0.44642412662506104, "beta_dpo/beta_margin_grad_std": 0.13632191717624664, "beta_dpo/beta_margin_mean": 0.24029631912708282, "beta_dpo/beta_margin_std": 0.6096994876861572, "beta_dpo/beta_used": 0.10741549730300903, "beta_dpo/beta_used_raw": 0.10741549730300903, "beta_dpo/gap_mean": 1.5326428413391113, "beta_dpo/gap_std": 3.845479965209961, "beta_dpo/loss_margin_mean": 1.8686842918395996, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.20559334845049132, "grad_norm": 27.862178802490234, "learning_rate": 4.840055783904106e-07, "logits/chosen": 1.2084816694259644, "logits/rejected": 1.0885181427001953, "loss": 1.2433, "step": 136 }, { "beta_dpo/beta": 0.08260773122310638, "beta_dpo/beta_margin_grad_mean": -0.45568960905075073, "beta_dpo/beta_margin_grad_std": 0.08530600368976593, "beta_dpo/beta_margin_mean": 0.18747077882289886, "beta_dpo/beta_margin_std": 0.3693680465221405, "beta_dpo/beta_used": 0.08260773122310638, "beta_dpo/beta_used_raw": 0.08260773122310638, "beta_dpo/gap_mean": 1.5443904399871826, "beta_dpo/gap_std": 3.970661163330078, "beta_dpo/loss_margin_mean": 2.1117563247680664, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.20710506424792138, "grad_norm": 17.30738639831543, "learning_rate": 4.835369650662767e-07, "logits/chosen": 1.235527515411377, "logits/rejected": 1.187445878982544, "loss": 1.2956, "step": 137 }, { "beta_dpo/beta": 0.08060777932405472, "beta_dpo/beta_margin_grad_mean": -0.4737657308578491, "beta_dpo/beta_margin_grad_std": 0.0942247286438942, "beta_dpo/beta_margin_mean": 0.10887736827135086, "beta_dpo/beta_margin_std": 0.39904049038887024, "beta_dpo/beta_used": 0.08060777932405472, "beta_dpo/beta_used_raw": 0.08060777932405472, "beta_dpo/gap_mean": 1.5993130207061768, "beta_dpo/gap_std": 4.022543430328369, "beta_dpo/loss_margin_mean": 1.4106897115707397, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.20861678004535147, "grad_norm": 17.746089935302734, "learning_rate": 4.830618192112065e-07, "logits/chosen": 1.0170423984527588, "logits/rejected": 0.9589405655860901, "loss": 1.2953, "step": 138 }, { "beta_dpo/beta": 0.10856246948242188, "beta_dpo/beta_margin_grad_mean": -0.4657098054885864, "beta_dpo/beta_margin_grad_std": 0.11793362349271774, "beta_dpo/beta_margin_mean": 0.14230769872665405, "beta_dpo/beta_margin_std": 0.5166020393371582, "beta_dpo/beta_used": 0.10856246948242188, "beta_dpo/beta_used_raw": 0.10856246948242188, "beta_dpo/gap_mean": 1.5401983261108398, "beta_dpo/gap_std": 4.134028434753418, "beta_dpo/loss_margin_mean": 1.2866054773330688, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.21012849584278157, "grad_norm": 25.271020889282227, "learning_rate": 4.825801541160509e-07, "logits/chosen": 1.1534160375595093, "logits/rejected": 1.1338095664978027, "loss": 1.2272, "step": 139 }, { "beta_dpo/beta": 0.12550222873687744, "beta_dpo/beta_margin_grad_mean": -0.4339686632156372, "beta_dpo/beta_margin_grad_std": 0.14929716289043427, "beta_dpo/beta_margin_mean": 0.29860758781433105, "beta_dpo/beta_margin_std": 0.6612539887428284, "beta_dpo/beta_used": 0.12550222873687744, "beta_dpo/beta_used_raw": 0.12550222873687744, "beta_dpo/gap_mean": 1.656071424484253, "beta_dpo/gap_std": 4.27004337310791, "beta_dpo/loss_margin_mean": 2.2801015377044678, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.21164021164021163, "grad_norm": 29.566516876220703, "learning_rate": 4.820919832540181e-07, "logits/chosen": 1.0163750648498535, "logits/rejected": 0.9965271949768066, "loss": 1.2062, "step": 140 }, { "beta_dpo/beta": 0.15269500017166138, "beta_dpo/beta_margin_grad_mean": -0.4212610125541687, "beta_dpo/beta_margin_grad_std": 0.16798368096351624, "beta_dpo/beta_margin_mean": 0.35794302821159363, "beta_dpo/beta_margin_std": 0.7773640155792236, "beta_dpo/beta_used": 0.15269500017166138, "beta_dpo/beta_used_raw": 0.15269500017166138, "beta_dpo/gap_mean": 1.7711751461029053, "beta_dpo/gap_std": 4.404110908508301, "beta_dpo/loss_margin_mean": 2.247818946838379, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.21315192743764172, "grad_norm": 29.784658432006836, "learning_rate": 4.815973202802966e-07, "logits/chosen": 1.078611135482788, "logits/rejected": 1.057413101196289, "loss": 1.1011, "step": 141 }, { "beta_dpo/beta": 0.06439286470413208, "beta_dpo/beta_margin_grad_mean": -0.4754011332988739, "beta_dpo/beta_margin_grad_std": 0.07499177008867264, "beta_dpo/beta_margin_mean": 0.10258456319570541, "beta_dpo/beta_margin_std": 0.31377243995666504, "beta_dpo/beta_used": 0.06439286470413208, "beta_dpo/beta_used_raw": 0.06439286470413208, "beta_dpo/gap_mean": 1.7432494163513184, "beta_dpo/gap_std": 4.436220169067383, "beta_dpo/loss_margin_mean": 1.5465246438980103, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2146636432350718, "grad_norm": 15.055063247680664, "learning_rate": 4.810961790316729e-07, "logits/chosen": 1.4186744689941406, "logits/rejected": 1.3770642280578613, "loss": 1.3194, "step": 142 }, { "beta_dpo/beta": 0.09157180786132812, "beta_dpo/beta_margin_grad_mean": -0.47178128361701965, "beta_dpo/beta_margin_grad_std": 0.11874634772539139, "beta_dpo/beta_margin_mean": 0.12287455052137375, "beta_dpo/beta_margin_std": 0.5095570087432861, "beta_dpo/beta_used": 0.09157180786132812, "beta_dpo/beta_used_raw": 0.09157180786132812, "beta_dpo/gap_mean": 1.6417368650436401, "beta_dpo/gap_std": 4.523853302001953, "beta_dpo/loss_margin_mean": 1.2595770359039307, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2161753590325019, "grad_norm": 21.15188217163086, "learning_rate": 4.805885735261454e-07, "logits/chosen": 1.0811662673950195, "logits/rejected": 1.070796251296997, "loss": 1.2878, "step": 143 }, { "beta_dpo/beta": 0.061950840055942535, "beta_dpo/beta_margin_grad_mean": -0.4810461699962616, "beta_dpo/beta_margin_grad_std": 0.0924961045384407, "beta_dpo/beta_margin_mean": 0.08284606039524078, "beta_dpo/beta_margin_std": 0.39326006174087524, "beta_dpo/beta_used": 0.061950840055942535, "beta_dpo/beta_used_raw": 0.061950840055942535, "beta_dpo/gap_mean": 1.5933493375778198, "beta_dpo/gap_std": 4.785058498382568, "beta_dpo/loss_margin_mean": 1.0874993801116943, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.21768707482993196, "grad_norm": 15.518567085266113, "learning_rate": 4.800745179625307e-07, "logits/chosen": 1.2409164905548096, "logits/rejected": 1.2216153144836426, "loss": 1.3392, "step": 144 }, { "beta_dpo/beta": 0.14211627840995789, "beta_dpo/beta_margin_grad_mean": -0.4476253092288971, "beta_dpo/beta_margin_grad_std": 0.17776721715927124, "beta_dpo/beta_margin_mean": 0.22671912610530853, "beta_dpo/beta_margin_std": 0.8023654222488403, "beta_dpo/beta_used": 0.14211627840995789, "beta_dpo/beta_used_raw": 0.14211627840995789, "beta_dpo/gap_mean": 1.5565340518951416, "beta_dpo/gap_std": 4.962738037109375, "beta_dpo/loss_margin_mean": 1.6014819145202637, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.21919879062736206, "grad_norm": 36.54046630859375, "learning_rate": 4.795540267200686e-07, "logits/chosen": 1.0764864683151245, "logits/rejected": 1.0836384296417236, "loss": 1.1807, "step": 145 }, { "beta_dpo/beta": 0.10875533521175385, "beta_dpo/beta_margin_grad_mean": -0.470254123210907, "beta_dpo/beta_margin_grad_std": 0.12472882121801376, "beta_dpo/beta_margin_mean": 0.12233484536409378, "beta_dpo/beta_margin_std": 0.5434421896934509, "beta_dpo/beta_used": 0.10875533521175385, "beta_dpo/beta_used_raw": 0.10875533521175385, "beta_dpo/gap_mean": 1.456047534942627, "beta_dpo/gap_std": 4.940698623657227, "beta_dpo/loss_margin_mean": 1.1148018836975098, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.22071050642479215, "grad_norm": 22.52288818359375, "learning_rate": 4.790271143580173e-07, "logits/chosen": 0.9124500751495361, "logits/rejected": 0.9365319013595581, "loss": 1.2602, "step": 146 }, { "beta_dpo/beta": 0.09407276660203934, "beta_dpo/beta_margin_grad_mean": -0.4711886942386627, "beta_dpo/beta_margin_grad_std": 0.11380936205387115, "beta_dpo/beta_margin_mean": 0.12104871869087219, "beta_dpo/beta_margin_std": 0.48799121379852295, "beta_dpo/beta_used": 0.09407276660203934, "beta_dpo/beta_used_raw": 0.09407276660203934, "beta_dpo/gap_mean": 1.4357414245605469, "beta_dpo/gap_std": 4.992695331573486, "beta_dpo/loss_margin_mean": 1.252655267715454, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2222222222222222, "grad_norm": 22.755189895629883, "learning_rate": 4.784937956152489e-07, "logits/chosen": 1.3807456493377686, "logits/rejected": 1.3229422569274902, "loss": 1.2887, "step": 147 }, { "beta_dpo/beta": 0.15577402710914612, "beta_dpo/beta_margin_grad_mean": -0.4019036889076233, "beta_dpo/beta_margin_grad_std": 0.17970719933509827, "beta_dpo/beta_margin_mean": 0.5029462575912476, "beta_dpo/beta_margin_std": 0.9811931252479553, "beta_dpo/beta_used": 0.15577402710914612, "beta_dpo/beta_used_raw": 0.15577402710914612, "beta_dpo/gap_mean": 1.5689587593078613, "beta_dpo/gap_std": 5.067873001098633, "beta_dpo/loss_margin_mean": 2.766486167907715, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2237339380196523, "grad_norm": 27.865285873413086, "learning_rate": 4.779540854098347e-07, "logits/chosen": 1.3178067207336426, "logits/rejected": 1.2062838077545166, "loss": 1.0949, "step": 148 }, { "beta_dpo/beta": 0.11285445094108582, "beta_dpo/beta_margin_grad_mean": -0.4518367052078247, "beta_dpo/beta_margin_grad_std": 0.1293925940990448, "beta_dpo/beta_margin_mean": 0.20786000788211823, "beta_dpo/beta_margin_std": 0.5592925548553467, "beta_dpo/beta_used": 0.11285445094108582, "beta_dpo/beta_used_raw": 0.11285445094108582, "beta_dpo/gap_mean": 1.7002689838409424, "beta_dpo/gap_std": 5.069310188293457, "beta_dpo/loss_margin_mean": 1.872825026512146, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2252456538170824, "grad_norm": 27.668947219848633, "learning_rate": 4.774079988386296e-07, "logits/chosen": 1.3109874725341797, "logits/rejected": 1.2291481494903564, "loss": 1.2286, "step": 149 }, { "beta_dpo/beta": 0.10829520225524902, "beta_dpo/beta_margin_grad_mean": -0.41798102855682373, "beta_dpo/beta_margin_grad_std": 0.15737159550189972, "beta_dpo/beta_margin_mean": 0.3695879876613617, "beta_dpo/beta_margin_std": 0.7158199548721313, "beta_dpo/beta_used": 0.10829520225524902, "beta_dpo/beta_used_raw": 0.10829520225524902, "beta_dpo/gap_mean": 1.968583345413208, "beta_dpo/gap_std": 5.368040561676025, "beta_dpo/loss_margin_mean": 3.411351442337036, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.22675736961451248, "grad_norm": 28.250102996826172, "learning_rate": 4.768555511768486e-07, "logits/chosen": 1.286353349685669, "logits/rejected": 1.2769120931625366, "loss": 1.2211, "step": 150 }, { "beta_dpo/beta": 0.11118942499160767, "beta_dpo/beta_margin_grad_mean": -0.4280396103858948, "beta_dpo/beta_margin_grad_std": 0.14326725900173187, "beta_dpo/beta_margin_mean": 0.34020793437957764, "beta_dpo/beta_margin_std": 0.6998366117477417, "beta_dpo/beta_used": 0.11118942499160767, "beta_dpo/beta_used_raw": 0.11118942499160767, "beta_dpo/gap_mean": 2.2183287143707275, "beta_dpo/gap_std": 5.535046577453613, "beta_dpo/loss_margin_mean": 3.0132687091827393, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.22826908541194255, "grad_norm": 23.84425163269043, "learning_rate": 4.762967578776406e-07, "logits/chosen": 1.1973973512649536, "logits/rejected": 1.112386703491211, "loss": 1.1943, "step": 151 }, { "beta_dpo/beta": 0.11190253496170044, "beta_dpo/beta_margin_grad_mean": -0.4374425411224365, "beta_dpo/beta_margin_grad_std": 0.14490434527397156, "beta_dpo/beta_margin_mean": 0.2843562364578247, "beta_dpo/beta_margin_std": 0.6773840188980103, "beta_dpo/beta_used": 0.11190253496170044, "beta_dpo/beta_used_raw": 0.11190253496170044, "beta_dpo/gap_mean": 2.220742702484131, "beta_dpo/gap_std": 5.594052314758301, "beta_dpo/loss_margin_mean": 2.4405252933502197, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.22978080120937264, "grad_norm": 26.946876525878906, "learning_rate": 4.757316345716553e-07, "logits/chosen": 1.482506513595581, "logits/rejected": 1.3994556665420532, "loss": 1.1789, "step": 152 }, { "beta_dpo/beta": 0.05399642884731293, "beta_dpo/beta_margin_grad_mean": -0.4605247974395752, "beta_dpo/beta_margin_grad_std": 0.09529552608728409, "beta_dpo/beta_margin_mean": 0.17060205340385437, "beta_dpo/beta_margin_std": 0.4153417646884918, "beta_dpo/beta_used": 0.05399642884731293, "beta_dpo/beta_used_raw": 0.03712339699268341, "beta_dpo/gap_mean": 2.173558473587036, "beta_dpo/gap_std": 5.56630802154541, "beta_dpo/loss_margin_mean": 1.9774678945541382, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.23129251700680273, "grad_norm": 17.23089027404785, "learning_rate": 4.751601970666064e-07, "logits/chosen": 1.0739203691482544, "logits/rejected": 1.0208520889282227, "loss": 1.2898, "step": 153 }, { "beta_dpo/beta": 0.10960409045219421, "beta_dpo/beta_margin_grad_mean": -0.45456209778785706, "beta_dpo/beta_margin_grad_std": 0.1558816283941269, "beta_dpo/beta_margin_mean": 0.19854529201984406, "beta_dpo/beta_margin_std": 0.6859627962112427, "beta_dpo/beta_used": 0.10960409045219421, "beta_dpo/beta_used_raw": 0.10960409045219421, "beta_dpo/gap_mean": 2.186494827270508, "beta_dpo/gap_std": 5.6433186531066895, "beta_dpo/loss_margin_mean": 1.7503312826156616, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2328042328042328, "grad_norm": 23.929061889648438, "learning_rate": 4.745824613468292e-07, "logits/chosen": 1.2299376726150513, "logits/rejected": 1.1694262027740479, "loss": 1.2253, "step": 154 }, { "beta_dpo/beta": 0.1383783370256424, "beta_dpo/beta_margin_grad_mean": -0.42082664370536804, "beta_dpo/beta_margin_grad_std": 0.18120653927326202, "beta_dpo/beta_margin_mean": 0.3739163279533386, "beta_dpo/beta_margin_std": 0.8708319067955017, "beta_dpo/beta_used": 0.1383783370256424, "beta_dpo/beta_used_raw": 0.1383783370256424, "beta_dpo/gap_mean": 2.2229208946228027, "beta_dpo/gap_std": 5.766082763671875, "beta_dpo/loss_margin_mean": 2.6002511978149414, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.23431594860166288, "grad_norm": 33.02286148071289, "learning_rate": 4.7399844357283393e-07, "logits/chosen": 0.9965620040893555, "logits/rejected": 0.9758646488189697, "loss": 1.1397, "step": 155 }, { "beta_dpo/beta": 0.1373191773891449, "beta_dpo/beta_margin_grad_mean": -0.38749340176582336, "beta_dpo/beta_margin_grad_std": 0.17854855954647064, "beta_dpo/beta_margin_mean": 0.5832736492156982, "beta_dpo/beta_margin_std": 0.9946950078010559, "beta_dpo/beta_used": 0.1373191773891449, "beta_dpo/beta_used_raw": 0.1373191773891449, "beta_dpo/gap_mean": 2.40696120262146, "beta_dpo/gap_std": 5.8164801597595215, "beta_dpo/loss_margin_mean": 3.823298215866089, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.23582766439909297, "grad_norm": 33.92511749267578, "learning_rate": 4.7340816008085305e-07, "logits/chosen": 1.2832014560699463, "logits/rejected": 1.2574467658996582, "loss": 1.1048, "step": 156 }, { "beta_dpo/beta": 0.09416471421718597, "beta_dpo/beta_margin_grad_mean": -0.4425427317619324, "beta_dpo/beta_margin_grad_std": 0.1606954038143158, "beta_dpo/beta_margin_mean": 0.2935110926628113, "beta_dpo/beta_margin_std": 0.8034055829048157, "beta_dpo/beta_used": 0.09416471421718597, "beta_dpo/beta_used_raw": 0.09416471421718597, "beta_dpo/gap_mean": 2.5301156044006348, "beta_dpo/gap_std": 5.906242847442627, "beta_dpo/loss_margin_mean": 2.021934986114502, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.23733938019652306, "grad_norm": 19.796140670776367, "learning_rate": 4.728116273823847e-07, "logits/chosen": 1.0816900730133057, "logits/rejected": 1.103010892868042, "loss": 1.1954, "step": 157 }, { "beta_dpo/beta": 0.1391805112361908, "beta_dpo/beta_margin_grad_mean": -0.44501587748527527, "beta_dpo/beta_margin_grad_std": 0.21129848062992096, "beta_dpo/beta_margin_mean": 0.3119133710861206, "beta_dpo/beta_margin_std": 1.1190910339355469, "beta_dpo/beta_used": 0.1391805112361908, "beta_dpo/beta_used_raw": 0.1391805112361908, "beta_dpo/gap_mean": 2.3914170265197754, "beta_dpo/gap_std": 6.122468948364258, "beta_dpo/loss_margin_mean": 1.9657936096191406, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.23885109599395313, "grad_norm": 31.855342864990234, "learning_rate": 4.7220886216373085e-07, "logits/chosen": 1.1423161029815674, "logits/rejected": 1.120100975036621, "loss": 1.1747, "step": 158 }, { "beta_dpo/beta": 0.08424553275108337, "beta_dpo/beta_margin_grad_mean": -0.45276889204978943, "beta_dpo/beta_margin_grad_std": 0.15022964775562286, "beta_dpo/beta_margin_mean": 0.22191838920116425, "beta_dpo/beta_margin_std": 0.7954735159873962, "beta_dpo/beta_used": 0.08424553275108337, "beta_dpo/beta_used_raw": 0.08424553275108337, "beta_dpo/gap_mean": 2.37796688079834, "beta_dpo/gap_std": 6.245815277099609, "beta_dpo/loss_margin_mean": 2.7583186626434326, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.24036281179138322, "grad_norm": 21.775339126586914, "learning_rate": 4.715998812855304e-07, "logits/chosen": 1.1031272411346436, "logits/rejected": 1.0869781970977783, "loss": 1.2194, "step": 159 }, { "beta_dpo/beta": 0.05589519441127777, "beta_dpo/beta_margin_grad_mean": -0.4664691686630249, "beta_dpo/beta_margin_grad_std": 0.09677547216415405, "beta_dpo/beta_margin_mean": 0.14732001721858978, "beta_dpo/beta_margin_std": 0.42592573165893555, "beta_dpo/beta_used": 0.05589519441127777, "beta_dpo/beta_used_raw": 0.05589519441127777, "beta_dpo/gap_mean": 2.3008532524108887, "beta_dpo/gap_std": 6.200883865356445, "beta_dpo/loss_margin_mean": 1.9551855325698853, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2418745275888133, "grad_norm": 14.37753963470459, "learning_rate": 4.7098470178228755e-07, "logits/chosen": 0.9814571142196655, "logits/rejected": 0.9252926707267761, "loss": 1.2975, "step": 160 }, { "beta_dpo/beta": 0.1535968780517578, "beta_dpo/beta_margin_grad_mean": -0.4085511267185211, "beta_dpo/beta_margin_grad_std": 0.18446137011051178, "beta_dpo/beta_margin_mean": 0.3954410254955292, "beta_dpo/beta_margin_std": 0.9252437353134155, "beta_dpo/beta_used": 0.1535968780517578, "beta_dpo/beta_used_raw": 0.1535968780517578, "beta_dpo/gap_mean": 2.3478264808654785, "beta_dpo/gap_std": 6.170980930328369, "beta_dpo/loss_margin_mean": 2.628091335296631, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.24338624338624337, "grad_norm": 29.959720611572266, "learning_rate": 4.703633408618955e-07, "logits/chosen": 1.5791351795196533, "logits/rejected": 1.541215181350708, "loss": 1.0217, "step": 161 }, { "beta_dpo/beta": 0.16446077823638916, "beta_dpo/beta_margin_grad_mean": -0.36990848183631897, "beta_dpo/beta_margin_grad_std": 0.1834045648574829, "beta_dpo/beta_margin_mean": 0.5986179709434509, "beta_dpo/beta_margin_std": 1.0068303346633911, "beta_dpo/beta_used": 0.16446077823638916, "beta_dpo/beta_used_raw": 0.16446077823638916, "beta_dpo/gap_mean": 2.5687732696533203, "beta_dpo/gap_std": 6.104150295257568, "beta_dpo/loss_margin_mean": 3.706486463546753, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.24489795918367346, "grad_norm": 33.98783874511719, "learning_rate": 4.697358159051549e-07, "logits/chosen": 1.3096582889556885, "logits/rejected": 1.2636088132858276, "loss": 0.9784, "step": 162 }, { "beta_dpo/beta": 0.046302828937768936, "beta_dpo/beta_margin_grad_mean": -0.46506714820861816, "beta_dpo/beta_margin_grad_std": 0.09726027399301529, "beta_dpo/beta_margin_mean": 0.14802870154380798, "beta_dpo/beta_margin_std": 0.41607552766799927, "beta_dpo/beta_used": 0.046302828937768936, "beta_dpo/beta_used_raw": 0.040558502078056335, "beta_dpo/gap_mean": 2.5845842361450195, "beta_dpo/gap_std": 6.127120018005371, "beta_dpo/loss_margin_mean": 2.4678986072540283, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.24640967498110355, "grad_norm": 14.543889045715332, "learning_rate": 4.691021444652876e-07, "logits/chosen": 1.0916495323181152, "logits/rejected": 1.0530033111572266, "loss": 1.3026, "step": 163 }, { "beta_dpo/beta": 0.1552918255329132, "beta_dpo/beta_margin_grad_mean": -0.3846026659011841, "beta_dpo/beta_margin_grad_std": 0.21561342477798462, "beta_dpo/beta_margin_mean": 0.5803137421607971, "beta_dpo/beta_margin_std": 1.0986645221710205, "beta_dpo/beta_used": 0.1552918255329132, "beta_dpo/beta_used_raw": 0.1552918255329132, "beta_dpo/gap_mean": 2.7750930786132812, "beta_dpo/gap_std": 6.231865406036377, "beta_dpo/loss_margin_mean": 3.8408336639404297, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.24792139077853365, "grad_norm": 33.88530349731445, "learning_rate": 4.6846234426744624e-07, "logits/chosen": 1.1498820781707764, "logits/rejected": 1.050663948059082, "loss": 1.0648, "step": 164 }, { "beta_dpo/beta": 0.07979045063257217, "beta_dpo/beta_margin_grad_mean": -0.42745912075042725, "beta_dpo/beta_margin_grad_std": 0.11547538638114929, "beta_dpo/beta_margin_mean": 0.3313145935535431, "beta_dpo/beta_margin_std": 0.5704610347747803, "beta_dpo/beta_used": 0.07979045063257217, "beta_dpo/beta_used_raw": 0.07979045063257217, "beta_dpo/gap_mean": 2.877894878387451, "beta_dpo/gap_std": 6.265934467315674, "beta_dpo/loss_margin_mean": 3.389314889907837, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2494331065759637, "grad_norm": 18.06438636779785, "learning_rate": 4.678164332082175e-07, "logits/chosen": 1.1153581142425537, "logits/rejected": 1.0215842723846436, "loss": 1.187, "step": 165 }, { "beta_dpo/beta": 0.10112225264310837, "beta_dpo/beta_margin_grad_mean": -0.4338551461696625, "beta_dpo/beta_margin_grad_std": 0.14187559485435486, "beta_dpo/beta_margin_mean": 0.2968035340309143, "beta_dpo/beta_margin_std": 0.6275408267974854, "beta_dpo/beta_used": 0.10112225264310837, "beta_dpo/beta_used_raw": 0.10112225264310837, "beta_dpo/gap_mean": 2.9666242599487305, "beta_dpo/gap_std": 6.206120491027832, "beta_dpo/loss_margin_mean": 2.9079062938690186, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2509448223733938, "grad_norm": 25.70857810974121, "learning_rate": 4.6716442935512214e-07, "logits/chosen": 1.021344542503357, "logits/rejected": 0.9538024067878723, "loss": 1.1783, "step": 166 }, { "beta_dpo/beta": 0.06665387749671936, "beta_dpo/beta_margin_grad_mean": -0.42930203676223755, "beta_dpo/beta_margin_grad_std": 0.11305904388427734, "beta_dpo/beta_margin_mean": 0.3195487856864929, "beta_dpo/beta_margin_std": 0.5409384369850159, "beta_dpo/beta_used": 0.06665387749671936, "beta_dpo/beta_used_raw": 0.06665387749671936, "beta_dpo/gap_mean": 3.075497627258301, "beta_dpo/gap_std": 6.163358688354492, "beta_dpo/loss_margin_mean": 3.2095916271209717, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.25245653817082386, "grad_norm": 19.956995010375977, "learning_rate": 4.6650635094610966e-07, "logits/chosen": 0.8962558507919312, "logits/rejected": 0.8831173181533813, "loss": 1.2122, "step": 167 }, { "beta_dpo/beta": 0.08960846066474915, "beta_dpo/beta_margin_grad_mean": -0.4323587417602539, "beta_dpo/beta_margin_grad_std": 0.16092197597026825, "beta_dpo/beta_margin_mean": 0.3474786579608917, "beta_dpo/beta_margin_std": 0.8413150310516357, "beta_dpo/beta_used": 0.08960846066474915, "beta_dpo/beta_used_raw": 0.07954014092683792, "beta_dpo/gap_mean": 2.974499464035034, "beta_dpo/gap_std": 6.192246437072754, "beta_dpo/loss_margin_mean": 2.4720442295074463, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.25396825396825395, "grad_norm": 21.843673706054688, "learning_rate": 4.6584221638904767e-07, "logits/chosen": 0.9326637983322144, "logits/rejected": 0.877837061882019, "loss": 1.1559, "step": 168 }, { "beta_dpo/beta": 0.11481602489948273, "beta_dpo/beta_margin_grad_mean": -0.42150530219078064, "beta_dpo/beta_margin_grad_std": 0.19413353502750397, "beta_dpo/beta_margin_mean": 0.3658204674720764, "beta_dpo/beta_margin_std": 0.9044493436813354, "beta_dpo/beta_used": 0.11481602489948273, "beta_dpo/beta_used_raw": 0.11481602489948273, "beta_dpo/gap_mean": 2.906271457672119, "beta_dpo/gap_std": 6.456612586975098, "beta_dpo/loss_margin_mean": 3.1675376892089844, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.25547996976568405, "grad_norm": 28.51230239868164, "learning_rate": 4.651720442612075e-07, "logits/chosen": 1.3749295473098755, "logits/rejected": 1.37894606590271, "loss": 1.1722, "step": 169 }, { "beta_dpo/beta": 0.06731998920440674, "beta_dpo/beta_margin_grad_mean": -0.45301589369773865, "beta_dpo/beta_margin_grad_std": 0.16824722290039062, "beta_dpo/beta_margin_mean": 0.2330533117055893, "beta_dpo/beta_margin_std": 0.8312608599662781, "beta_dpo/beta_used": 0.06731998920440674, "beta_dpo/beta_used_raw": 0.0670924261212349, "beta_dpo/gap_mean": 2.697042465209961, "beta_dpo/gap_std": 6.807780742645264, "beta_dpo/loss_margin_mean": 1.8298805952072144, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.25699168556311414, "grad_norm": 19.64339256286621, "learning_rate": 4.6449585330874425e-07, "logits/chosen": 1.0180065631866455, "logits/rejected": 1.0438951253890991, "loss": 1.2866, "step": 170 }, { "beta_dpo/beta": 0.1436457633972168, "beta_dpo/beta_margin_grad_mean": -0.4028518795967102, "beta_dpo/beta_margin_grad_std": 0.22378672659397125, "beta_dpo/beta_margin_mean": 0.47177597880363464, "beta_dpo/beta_margin_std": 1.18657386302948, "beta_dpo/beta_used": 0.1436457633972168, "beta_dpo/beta_used_raw": 0.1436457633972168, "beta_dpo/gap_mean": 2.872105836868286, "beta_dpo/gap_std": 7.098209381103516, "beta_dpo/loss_margin_mean": 3.190307378768921, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2585034013605442, "grad_norm": 33.582054138183594, "learning_rate": 4.6381366244617224e-07, "logits/chosen": 1.2623517513275146, "logits/rejected": 1.1890509128570557, "loss": 1.1151, "step": 171 }, { "beta_dpo/beta": 0.10588417947292328, "beta_dpo/beta_margin_grad_mean": -0.42059195041656494, "beta_dpo/beta_margin_grad_std": 0.15911069512367249, "beta_dpo/beta_margin_mean": 0.3817267417907715, "beta_dpo/beta_margin_std": 0.7768465876579285, "beta_dpo/beta_used": 0.10588417947292328, "beta_dpo/beta_used_raw": 0.10588417947292328, "beta_dpo/gap_mean": 2.9096975326538086, "beta_dpo/gap_std": 7.087888717651367, "beta_dpo/loss_margin_mean": 3.553881883621216, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2600151171579743, "grad_norm": 24.649341583251953, "learning_rate": 4.631254907558365e-07, "logits/chosen": 1.0718541145324707, "logits/rejected": 1.0470389127731323, "loss": 1.1812, "step": 172 }, { "beta_dpo/beta": 0.08374869078397751, "beta_dpo/beta_margin_grad_mean": -0.41811954975128174, "beta_dpo/beta_margin_grad_std": 0.1825326383113861, "beta_dpo/beta_margin_mean": 0.5075841546058655, "beta_dpo/beta_margin_std": 1.1430001258850098, "beta_dpo/beta_used": 0.08374869078397751, "beta_dpo/beta_used_raw": 0.055781036615371704, "beta_dpo/gap_mean": 3.2391250133514404, "beta_dpo/gap_std": 7.3421783447265625, "beta_dpo/loss_margin_mean": 4.41142463684082, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2615268329554044, "grad_norm": 20.36656951904297, "learning_rate": 4.624313574873786e-07, "logits/chosen": 1.2261898517608643, "logits/rejected": 1.0682613849639893, "loss": 1.1901, "step": 173 }, { "beta_dpo/beta": 0.1480782926082611, "beta_dpo/beta_margin_grad_mean": -0.3678121864795685, "beta_dpo/beta_margin_grad_std": 0.20388971269130707, "beta_dpo/beta_margin_mean": 0.6513935923576355, "beta_dpo/beta_margin_std": 1.0541517734527588, "beta_dpo/beta_used": 0.1480782926082611, "beta_dpo/beta_used_raw": 0.1480782926082611, "beta_dpo/gap_mean": 3.404081344604492, "beta_dpo/gap_std": 7.347430229187012, "beta_dpo/loss_margin_mean": 4.346465110778809, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.26303854875283444, "grad_norm": 30.293058395385742, "learning_rate": 4.61731282057198e-07, "logits/chosen": 1.1567468643188477, "logits/rejected": 1.0777671337127686, "loss": 0.9991, "step": 174 }, { "beta_dpo/beta": 0.14243370294570923, "beta_dpo/beta_margin_grad_mean": -0.40375036001205444, "beta_dpo/beta_margin_grad_std": 0.22832658886909485, "beta_dpo/beta_margin_mean": 0.4617246687412262, "beta_dpo/beta_margin_std": 1.3428500890731812, "beta_dpo/beta_used": 0.14243370294570923, "beta_dpo/beta_used_raw": 0.14243370294570923, "beta_dpo/gap_mean": 3.4792134761810303, "beta_dpo/gap_std": 7.564722061157227, "beta_dpo/loss_margin_mean": 3.4761555194854736, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.26455026455026454, "grad_norm": 30.447345733642578, "learning_rate": 4.6102528404790965e-07, "logits/chosen": 1.1785094738006592, "logits/rejected": 1.1524642705917358, "loss": 1.0605, "step": 175 }, { "beta_dpo/beta": 0.07570341974496841, "beta_dpo/beta_margin_grad_mean": -0.4498152732849121, "beta_dpo/beta_margin_grad_std": 0.15577325224876404, "beta_dpo/beta_margin_mean": 0.248289555311203, "beta_dpo/beta_margin_std": 0.7977651357650757, "beta_dpo/beta_used": 0.07570341974496841, "beta_dpo/beta_used_raw": 0.044923990964889526, "beta_dpo/gap_mean": 3.310117244720459, "beta_dpo/gap_std": 7.675562381744385, "beta_dpo/loss_margin_mean": 2.364182233810425, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2660619803476946, "grad_norm": 24.72120475769043, "learning_rate": 4.603133832077953e-07, "logits/chosen": 1.18973970413208, "logits/rejected": 1.1454646587371826, "loss": 1.1651, "step": 176 }, { "beta_dpo/beta": 0.14909859001636505, "beta_dpo/beta_margin_grad_mean": -0.32062795758247375, "beta_dpo/beta_margin_grad_std": 0.2098706066608429, "beta_dpo/beta_margin_mean": 1.0084487199783325, "beta_dpo/beta_margin_std": 1.2620028257369995, "beta_dpo/beta_used": 0.14909859001636505, "beta_dpo/beta_used_raw": 0.14909859001636505, "beta_dpo/gap_mean": 3.647057056427002, "beta_dpo/gap_std": 7.802684783935547, "beta_dpo/loss_margin_mean": 6.57255744934082, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2675736961451247, "grad_norm": 34.28105926513672, "learning_rate": 4.5959559945025183e-07, "logits/chosen": 1.3451666831970215, "logits/rejected": 1.2215378284454346, "loss": 1.0052, "step": 177 }, { "beta_dpo/beta": 0.12064201384782791, "beta_dpo/beta_margin_grad_mean": -0.3828374147415161, "beta_dpo/beta_margin_grad_std": 0.1705167591571808, "beta_dpo/beta_margin_mean": 0.6022067666053772, "beta_dpo/beta_margin_std": 0.9428675770759583, "beta_dpo/beta_used": 0.12064201384782791, "beta_dpo/beta_used_raw": 0.12064201384782791, "beta_dpo/gap_mean": 3.9942588806152344, "beta_dpo/gap_std": 7.683067321777344, "beta_dpo/loss_margin_mean": 4.527779579162598, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2690854119425548, "grad_norm": 24.384441375732422, "learning_rate": 4.588719528532341e-07, "logits/chosen": 1.0391870737075806, "logits/rejected": 0.9791826605796814, "loss": 0.9946, "step": 178 }, { "beta_dpo/beta": 0.05765051394701004, "beta_dpo/beta_margin_grad_mean": -0.4596216380596161, "beta_dpo/beta_margin_grad_std": 0.13480156660079956, "beta_dpo/beta_margin_mean": 0.19259433448314667, "beta_dpo/beta_margin_std": 0.6468202471733093, "beta_dpo/beta_used": 0.05765051394701004, "beta_dpo/beta_used_raw": 0.04329132288694382, "beta_dpo/gap_mean": 3.7823781967163086, "beta_dpo/gap_std": 7.81741189956665, "beta_dpo/loss_margin_mean": 2.79002046585083, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2705971277399849, "grad_norm": 20.551816940307617, "learning_rate": 4.581424636586928e-07, "logits/chosen": 1.23455011844635, "logits/rejected": 1.2013146877288818, "loss": 1.2363, "step": 179 }, { "beta_dpo/beta": 0.014693931676447392, "beta_dpo/beta_margin_grad_mean": -0.4902806580066681, "beta_dpo/beta_margin_grad_std": 0.035090334713459015, "beta_dpo/beta_margin_mean": 0.039538200944662094, "beta_dpo/beta_margin_std": 0.14256805181503296, "beta_dpo/beta_used": 0.014693931676447392, "beta_dpo/beta_used_raw": -0.0027506444603204727, "beta_dpo/gap_mean": 3.567842483520508, "beta_dpo/gap_std": 7.792623519897461, "beta_dpo/loss_margin_mean": 2.5003037452697754, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.272108843537415, "grad_norm": 5.100368022918701, "learning_rate": 4.5740715227200897e-07, "logits/chosen": 0.9564149379730225, "logits/rejected": 0.9124714136123657, "loss": 1.3559, "step": 180 }, { "beta_dpo/beta": 0.07625903934240341, "beta_dpo/beta_margin_grad_mean": -0.43430396914482117, "beta_dpo/beta_margin_grad_std": 0.13497142493724823, "beta_dpo/beta_margin_mean": 0.29473230242729187, "beta_dpo/beta_margin_std": 0.6254957318305969, "beta_dpo/beta_used": 0.07625903934240341, "beta_dpo/beta_used_raw": 0.07625903934240341, "beta_dpo/gap_mean": 3.5772459506988525, "beta_dpo/gap_std": 7.750424385070801, "beta_dpo/loss_margin_mean": 3.6918673515319824, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.273620559334845, "grad_norm": 16.89691734313965, "learning_rate": 4.566660392614228e-07, "logits/chosen": 1.1309115886688232, "logits/rejected": 1.0724239349365234, "loss": 1.1985, "step": 181 }, { "beta_dpo/beta": 0.16641435027122498, "beta_dpo/beta_margin_grad_mean": -0.33495861291885376, "beta_dpo/beta_margin_grad_std": 0.2150852531194687, "beta_dpo/beta_margin_mean": 0.8996529579162598, "beta_dpo/beta_margin_std": 1.2435740232467651, "beta_dpo/beta_used": 0.16641435027122498, "beta_dpo/beta_used_raw": 0.16641435027122498, "beta_dpo/gap_mean": 3.81697416305542, "beta_dpo/gap_std": 7.650733947753906, "beta_dpo/loss_margin_mean": 5.414433002471924, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2751322751322751, "grad_norm": 39.52751159667969, "learning_rate": 4.5591914535745817e-07, "logits/chosen": 1.3326389789581299, "logits/rejected": 1.2464112043380737, "loss": 0.959, "step": 182 }, { "beta_dpo/beta": 0.08825691789388657, "beta_dpo/beta_margin_grad_mean": -0.45125502347946167, "beta_dpo/beta_margin_grad_std": 0.2054792195558548, "beta_dpo/beta_margin_mean": 0.30645766854286194, "beta_dpo/beta_margin_std": 1.2501469850540161, "beta_dpo/beta_used": 0.08825691789388657, "beta_dpo/beta_used_raw": 0.07874220609664917, "beta_dpo/gap_mean": 3.7444000244140625, "beta_dpo/gap_std": 7.897290229797363, "beta_dpo/loss_margin_mean": 2.5068931579589844, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2766439909297052, "grad_norm": 23.31369972229004, "learning_rate": 4.551664914523433e-07, "logits/chosen": 0.8546992540359497, "logits/rejected": 0.8199894428253174, "loss": 1.2045, "step": 183 }, { "beta_dpo/beta": 0.06977789849042892, "beta_dpo/beta_margin_grad_mean": -0.4251745641231537, "beta_dpo/beta_margin_grad_std": 0.14231690764427185, "beta_dpo/beta_margin_mean": 0.3553830087184906, "beta_dpo/beta_margin_std": 0.6949700117111206, "beta_dpo/beta_used": 0.06977789849042892, "beta_dpo/beta_used_raw": 0.06977789849042892, "beta_dpo/gap_mean": 3.7551708221435547, "beta_dpo/gap_std": 7.767217636108398, "beta_dpo/loss_margin_mean": 4.133913516998291, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2781557067271353, "grad_norm": 17.760683059692383, "learning_rate": 4.544080985994258e-07, "logits/chosen": 1.1608402729034424, "logits/rejected": 1.0770483016967773, "loss": 1.1945, "step": 184 }, { "beta_dpo/beta": 0.128538578748703, "beta_dpo/beta_margin_grad_mean": -0.3922068178653717, "beta_dpo/beta_margin_grad_std": 0.23428599536418915, "beta_dpo/beta_margin_mean": 0.6921870708465576, "beta_dpo/beta_margin_std": 1.5621590614318848, "beta_dpo/beta_used": 0.128538578748703, "beta_dpo/beta_used_raw": 0.128538578748703, "beta_dpo/gap_mean": 3.841433525085449, "beta_dpo/gap_std": 8.024864196777344, "beta_dpo/loss_margin_mean": 3.9666225910186768, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2796674225245654, "grad_norm": 28.532594680786133, "learning_rate": 4.5364398801258394e-07, "logits/chosen": 1.2939105033874512, "logits/rejected": 1.2379869222640991, "loss": 1.063, "step": 185 }, { "beta_dpo/beta": 0.047264955937862396, "beta_dpo/beta_margin_grad_mean": -0.45827677845954895, "beta_dpo/beta_margin_grad_std": 0.14908140897750854, "beta_dpo/beta_margin_mean": 0.19676923751831055, "beta_dpo/beta_margin_std": 0.6972150802612305, "beta_dpo/beta_used": 0.047264955937862396, "beta_dpo/beta_used_raw": 0.042294420301914215, "beta_dpo/gap_mean": 3.7729580402374268, "beta_dpo/gap_std": 8.466205596923828, "beta_dpo/loss_margin_mean": 3.8776252269744873, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2811791383219955, "grad_norm": 16.762447357177734, "learning_rate": 4.5287418106563354e-07, "logits/chosen": 1.1804075241088867, "logits/rejected": 1.0904101133346558, "loss": 1.3025, "step": 186 }, { "beta_dpo/beta": 0.16905778646469116, "beta_dpo/beta_margin_grad_mean": -0.4033774733543396, "beta_dpo/beta_margin_grad_std": 0.2705581784248352, "beta_dpo/beta_margin_mean": 0.6272789239883423, "beta_dpo/beta_margin_std": 1.9046205282211304, "beta_dpo/beta_used": 0.16905778646469116, "beta_dpo/beta_used_raw": 0.16905778646469116, "beta_dpo/gap_mean": 3.7473177909851074, "beta_dpo/gap_std": 8.742372512817383, "beta_dpo/loss_margin_mean": 3.4757022857666016, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.28269085411942557, "grad_norm": 35.64015579223633, "learning_rate": 4.520986992917297e-07, "logits/chosen": 1.1060779094696045, "logits/rejected": 1.0602669715881348, "loss": 1.0671, "step": 187 }, { "beta_dpo/beta": 0.060239776968955994, "beta_dpo/beta_margin_grad_mean": -0.45551395416259766, "beta_dpo/beta_margin_grad_std": 0.11140614002943039, "beta_dpo/beta_margin_mean": 0.19362890720367432, "beta_dpo/beta_margin_std": 0.48972344398498535, "beta_dpo/beta_used": 0.060239776968955994, "beta_dpo/beta_used_raw": 0.060239776968955994, "beta_dpo/gap_mean": 3.493543863296509, "beta_dpo/gap_std": 8.605021476745605, "beta_dpo/loss_margin_mean": 2.709864377975464, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2842025699168556, "grad_norm": 14.569178581237793, "learning_rate": 4.5131756438276466e-07, "logits/chosen": 1.2817773818969727, "logits/rejected": 1.2061257362365723, "loss": 1.2657, "step": 188 }, { "beta_dpo/beta": 0.0808996930718422, "beta_dpo/beta_margin_grad_mean": -0.4149724245071411, "beta_dpo/beta_margin_grad_std": 0.19832564890384674, "beta_dpo/beta_margin_mean": 0.44103261828422546, "beta_dpo/beta_margin_std": 1.0756137371063232, "beta_dpo/beta_used": 0.0808996930718422, "beta_dpo/beta_used_raw": 0.0808996930718422, "beta_dpo/gap_mean": 3.496494770050049, "beta_dpo/gap_std": 8.628555297851562, "beta_dpo/loss_margin_mean": 2.318549394607544, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2857142857142857, "grad_norm": 27.97902488708496, "learning_rate": 4.5053079818876096e-07, "logits/chosen": 1.4758341312408447, "logits/rejected": 1.4488728046417236, "loss": 1.2182, "step": 189 }, { "beta_dpo/beta": 0.2020467072725296, "beta_dpo/beta_margin_grad_mean": -0.2765614092350006, "beta_dpo/beta_margin_grad_std": 0.2313910275697708, "beta_dpo/beta_margin_mean": 1.4720041751861572, "beta_dpo/beta_margin_std": 1.7474236488342285, "beta_dpo/beta_used": 0.2020467072725296, "beta_dpo/beta_used_raw": 0.2020467072725296, "beta_dpo/gap_mean": 3.8497142791748047, "beta_dpo/gap_std": 8.667133331298828, "beta_dpo/loss_margin_mean": 7.28297758102417, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2872260015117158, "grad_norm": 51.29981231689453, "learning_rate": 4.4973842271726024e-07, "logits/chosen": 1.2424674034118652, "logits/rejected": 1.042587399482727, "loss": 0.9284, "step": 190 }, { "beta_dpo/beta": 0.1297360062599182, "beta_dpo/beta_margin_grad_mean": -0.4217360019683838, "beta_dpo/beta_margin_grad_std": 0.23818956315517426, "beta_dpo/beta_margin_mean": 0.4802582263946533, "beta_dpo/beta_margin_std": 1.606734037399292, "beta_dpo/beta_used": 0.1297360062599182, "beta_dpo/beta_used_raw": 0.1297360062599182, "beta_dpo/gap_mean": 3.9156153202056885, "beta_dpo/gap_std": 8.844564437866211, "beta_dpo/loss_margin_mean": 2.7418901920318604, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2887377173091459, "grad_norm": 37.58261489868164, "learning_rate": 4.48940460132708e-07, "logits/chosen": 1.3863708972930908, "logits/rejected": 1.3378045558929443, "loss": 1.0989, "step": 191 }, { "beta_dpo/beta": 0.007039315067231655, "beta_dpo/beta_margin_grad_mean": -0.49628469347953796, "beta_dpo/beta_margin_grad_std": 0.012314299121499062, "beta_dpo/beta_margin_mean": 0.014884297735989094, "beta_dpo/beta_margin_std": 0.049333829432725906, "beta_dpo/beta_used": 0.007039315067231655, "beta_dpo/beta_used_raw": 0.0027586279902607203, "beta_dpo/gap_mean": 3.5234975814819336, "beta_dpo/gap_std": 8.557422637939453, "beta_dpo/loss_margin_mean": 2.086303234100342, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.29024943310657597, "grad_norm": 2.247262477874756, "learning_rate": 4.481369327558329e-07, "logits/chosen": 1.3338195085525513, "logits/rejected": 1.2933076620101929, "loss": 1.3727, "step": 192 }, { "beta_dpo/beta": 0.0940217524766922, "beta_dpo/beta_margin_grad_mean": -0.4251381754875183, "beta_dpo/beta_margin_grad_std": 0.17793121933937073, "beta_dpo/beta_margin_mean": 0.38040265440940857, "beta_dpo/beta_margin_std": 0.9228093028068542, "beta_dpo/beta_used": 0.0940217524766922, "beta_dpo/beta_used_raw": 0.0940217524766922, "beta_dpo/gap_mean": 3.540949821472168, "beta_dpo/gap_std": 8.371658325195312, "beta_dpo/loss_margin_mean": 4.114996433258057, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.29176114890400606, "grad_norm": 22.988906860351562, "learning_rate": 4.47327863063023e-07, "logits/chosen": 1.196512222290039, "logits/rejected": 1.202284812927246, "loss": 1.1708, "step": 193 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4995115101337433, "beta_dpo/beta_margin_grad_std": 0.0024271684233099222, "beta_dpo/beta_margin_mean": 0.0019539878703653812, "beta_dpo/beta_margin_std": 0.009708872064948082, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.04277173429727554, "beta_dpo/gap_mean": 3.3145179748535156, "beta_dpo/gap_std": 8.588244438171387, "beta_dpo/loss_margin_mean": 1.953987717628479, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.29327286470143615, "grad_norm": 0.27189475297927856, "learning_rate": 4.4651327368569684e-07, "logits/chosen": 0.9954877495765686, "logits/rejected": 0.9740771055221558, "loss": 1.3854, "step": 194 }, { "beta_dpo/beta": 0.08321140706539154, "beta_dpo/beta_margin_grad_mean": -0.42134904861450195, "beta_dpo/beta_margin_grad_std": 0.15325944125652313, "beta_dpo/beta_margin_mean": 0.372994601726532, "beta_dpo/beta_margin_std": 0.7367441654205322, "beta_dpo/beta_used": 0.08321140706539154, "beta_dpo/beta_used_raw": 0.08321140706539154, "beta_dpo/gap_mean": 3.4207961559295654, "beta_dpo/gap_std": 8.580717086791992, "beta_dpo/loss_margin_mean": 3.769697904586792, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2947845804988662, "grad_norm": 21.639326095581055, "learning_rate": 4.4569318740967043e-07, "logits/chosen": 1.2047412395477295, "logits/rejected": 1.2157025337219238, "loss": 1.2014, "step": 195 }, { "beta_dpo/beta": 0.1143503189086914, "beta_dpo/beta_margin_grad_mean": -0.39839500188827515, "beta_dpo/beta_margin_grad_std": 0.2235814779996872, "beta_dpo/beta_margin_mean": 0.6355725526809692, "beta_dpo/beta_margin_std": 1.5783125162124634, "beta_dpo/beta_used": 0.1143503189086914, "beta_dpo/beta_used_raw": 0.10994286090135574, "beta_dpo/gap_mean": 3.2441186904907227, "beta_dpo/gap_std": 8.651065826416016, "beta_dpo/loss_margin_mean": 3.3098671436309814, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.2962962962962963, "grad_norm": 29.688894271850586, "learning_rate": 4.448676271745197e-07, "logits/chosen": 1.374154806137085, "logits/rejected": 1.297581672668457, "loss": 1.1026, "step": 196 }, { "beta_dpo/beta": 0.15817537903785706, "beta_dpo/beta_margin_grad_mean": -0.3712634742259979, "beta_dpo/beta_margin_grad_std": 0.2809968590736389, "beta_dpo/beta_margin_mean": 0.7080994844436646, "beta_dpo/beta_margin_std": 1.8238109350204468, "beta_dpo/beta_used": 0.15817537903785706, "beta_dpo/beta_used_raw": 0.15817537903785706, "beta_dpo/gap_mean": 3.469177722930908, "beta_dpo/gap_std": 9.123177528381348, "beta_dpo/loss_margin_mean": 4.5948686599731445, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.29780801209372637, "grad_norm": 36.089534759521484, "learning_rate": 4.440366160729392e-07, "logits/chosen": 1.0388797521591187, "logits/rejected": 1.0116878747940063, "loss": 1.1511, "step": 197 }, { "beta_dpo/beta": 0.18329089879989624, "beta_dpo/beta_margin_grad_mean": -0.3411843776702881, "beta_dpo/beta_margin_grad_std": 0.24735908210277557, "beta_dpo/beta_margin_mean": 0.9059070348739624, "beta_dpo/beta_margin_std": 1.6272207498550415, "beta_dpo/beta_used": 0.18329089879989624, "beta_dpo/beta_used_raw": 0.18329089879989624, "beta_dpo/gap_mean": 3.727560043334961, "beta_dpo/gap_std": 9.023991584777832, "beta_dpo/loss_margin_mean": 4.673088550567627, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.29931972789115646, "grad_norm": 32.435325622558594, "learning_rate": 4.432001773500957e-07, "logits/chosen": 1.2287187576293945, "logits/rejected": 1.1591060161590576, "loss": 0.9414, "step": 198 }, { "beta_dpo/beta": 0.20622490346431732, "beta_dpo/beta_margin_grad_mean": -0.38525623083114624, "beta_dpo/beta_margin_grad_std": 0.29314669966697693, "beta_dpo/beta_margin_mean": 1.0052555799484253, "beta_dpo/beta_margin_std": 2.765925645828247, "beta_dpo/beta_used": 0.20622490346431732, "beta_dpo/beta_used_raw": 0.20622490346431732, "beta_dpo/gap_mean": 3.8399996757507324, "beta_dpo/gap_std": 9.221573829650879, "beta_dpo/loss_margin_mean": 4.202357769012451, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.30083144368858655, "grad_norm": 58.93007278442383, "learning_rate": 4.4235833440297856e-07, "logits/chosen": 1.2598795890808105, "logits/rejected": 1.1354758739471436, "loss": 0.9711, "step": 199 }, { "beta_dpo/beta": 0.1798553764820099, "beta_dpo/beta_margin_grad_mean": -0.3407643139362335, "beta_dpo/beta_margin_grad_std": 0.2466670572757721, "beta_dpo/beta_margin_mean": 1.192578911781311, "beta_dpo/beta_margin_std": 2.1793160438537598, "beta_dpo/beta_used": 0.1798553764820099, "beta_dpo/beta_used_raw": 0.1798553764820099, "beta_dpo/gap_mean": 4.228569030761719, "beta_dpo/gap_std": 9.340324401855469, "beta_dpo/loss_margin_mean": 5.604803562164307, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.30234315948601664, "grad_norm": 41.332130432128906, "learning_rate": 4.415111107797445e-07, "logits/chosen": 0.9444395303726196, "logits/rejected": 0.8375617265701294, "loss": 0.9954, "step": 200 }, { "epoch": 0.30234315948601664, "eval_beta_dpo/beta": 0.0999036431312561, "eval_beta_dpo/beta_margin_grad_mean": -0.41453051567077637, "eval_beta_dpo/beta_margin_grad_std": 0.1619892716407776, "eval_beta_dpo/beta_margin_mean": 0.5098641514778137, "eval_beta_dpo/beta_margin_std": 0.9505065083503723, "eval_beta_dpo/beta_used": 0.0999036431312561, "eval_beta_dpo/beta_used_raw": 0.09682810306549072, "eval_beta_dpo/gap_mean": 4.2135772705078125, "eval_beta_dpo/gap_std": 9.372955322265625, "eval_beta_dpo/loss_margin_mean": 4.16071081161499, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": 0.917927086353302, "eval_logits/rejected": 0.8638291954994202, "eval_loss": 0.6120367050170898, "eval_runtime": 43.5626, "eval_samples_per_second": 52.866, "eval_steps_per_second": 1.653, "step": 200 }, { "beta_dpo/beta": 0.1600230187177658, "beta_dpo/beta_margin_grad_mean": -0.3555724620819092, "beta_dpo/beta_margin_grad_std": 0.2434043139219284, "beta_dpo/beta_margin_mean": 0.7401548624038696, "beta_dpo/beta_margin_std": 1.3561351299285889, "beta_dpo/beta_used": 0.1600230187177658, "beta_dpo/beta_used_raw": 0.1600230187177658, "beta_dpo/gap_mean": 4.290175437927246, "beta_dpo/gap_std": 9.295166015625, "beta_dpo/loss_margin_mean": 4.662273406982422, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.30385487528344673, "grad_norm": 37.13080978393555, "learning_rate": 4.4065853017905953e-07, "logits/chosen": 1.0336084365844727, "logits/rejected": 0.9708642959594727, "loss": 0.9299, "step": 201 }, { "beta_dpo/beta": 0.15965449810028076, "beta_dpo/beta_margin_grad_mean": -0.3542693853378296, "beta_dpo/beta_margin_grad_std": 0.23076099157333374, "beta_dpo/beta_margin_mean": 0.8342925310134888, "beta_dpo/beta_margin_std": 1.3525999784469604, "beta_dpo/beta_used": 0.15965449810028076, "beta_dpo/beta_used_raw": 0.15965449810028076, "beta_dpo/gap_mean": 4.402766227722168, "beta_dpo/gap_std": 9.118497848510742, "beta_dpo/loss_margin_mean": 5.167710304260254, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.30536659108087677, "grad_norm": 32.50093460083008, "learning_rate": 4.3980061644943575e-07, "logits/chosen": 1.205030083656311, "logits/rejected": 1.1125134229660034, "loss": 0.9291, "step": 202 }, { "beta_dpo/beta": 0.07079961150884628, "beta_dpo/beta_margin_grad_mean": -0.4141000807285309, "beta_dpo/beta_margin_grad_std": 0.12530024349689484, "beta_dpo/beta_margin_mean": 0.38183262944221497, "beta_dpo/beta_margin_std": 0.5788713693618774, "beta_dpo/beta_used": 0.07079961150884628, "beta_dpo/beta_used_raw": 0.07079961150884628, "beta_dpo/gap_mean": 4.539271831512451, "beta_dpo/gap_std": 8.996452331542969, "beta_dpo/loss_margin_mean": 5.262683868408203, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.30687830687830686, "grad_norm": 17.81788444519043, "learning_rate": 4.3893739358856455e-07, "logits/chosen": 1.2785005569458008, "logits/rejected": 1.1403093338012695, "loss": 1.1588, "step": 203 }, { "beta_dpo/beta": 0.0848718211054802, "beta_dpo/beta_margin_grad_mean": -0.40045851469039917, "beta_dpo/beta_margin_grad_std": 0.17919519543647766, "beta_dpo/beta_margin_mean": 0.5690321922302246, "beta_dpo/beta_margin_std": 1.086669921875, "beta_dpo/beta_used": 0.0848718211054802, "beta_dpo/beta_used_raw": 0.0848718211054802, "beta_dpo/gap_mean": 4.8224921226501465, "beta_dpo/gap_std": 8.966915130615234, "beta_dpo/loss_margin_mean": 5.354628086090088, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.30839002267573695, "grad_norm": 21.749141693115234, "learning_rate": 4.380688857426449e-07, "logits/chosen": 1.0835531949996948, "logits/rejected": 0.9498437643051147, "loss": 1.1479, "step": 204 }, { "beta_dpo/beta": 0.038497406989336014, "beta_dpo/beta_margin_grad_mean": -0.46281948685646057, "beta_dpo/beta_margin_grad_std": 0.13370007276535034, "beta_dpo/beta_margin_mean": 0.1775280237197876, "beta_dpo/beta_margin_std": 0.639147162437439, "beta_dpo/beta_used": 0.038497406989336014, "beta_dpo/beta_used_raw": -0.006588853895664215, "beta_dpo/gap_mean": 4.527703285217285, "beta_dpo/gap_std": 8.944877624511719, "beta_dpo/loss_margin_mean": 3.5750622749328613, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.30990173847316704, "grad_norm": 11.258538246154785, "learning_rate": 4.3719511720570814e-07, "logits/chosen": 1.0344573259353638, "logits/rejected": 1.001204490661621, "loss": 1.2959, "step": 205 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4995371997356415, "beta_dpo/beta_margin_grad_std": 0.0023737018927931786, "beta_dpo/beta_margin_mean": 0.0018513122340664268, "beta_dpo/beta_margin_std": 0.009495068341493607, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.07761886715888977, "beta_dpo/gap_mean": 4.082757949829102, "beta_dpo/gap_std": 9.130447387695312, "beta_dpo/loss_margin_mean": 1.8513121604919434, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.31141345427059713, "grad_norm": 0.2955770492553711, "learning_rate": 4.363161124189387e-07, "logits/chosen": 1.3033794164657593, "logits/rejected": 1.2756681442260742, "loss": 1.3852, "step": 206 }, { "beta_dpo/beta": 0.04532025754451752, "beta_dpo/beta_margin_grad_mean": -0.4514307379722595, "beta_dpo/beta_margin_grad_std": 0.10880967974662781, "beta_dpo/beta_margin_mean": 0.21289481222629547, "beta_dpo/beta_margin_std": 0.4802875518798828, "beta_dpo/beta_used": 0.04532025754451752, "beta_dpo/beta_used_raw": 0.04532025754451752, "beta_dpo/gap_mean": 4.084525108337402, "beta_dpo/gap_std": 9.252772331237793, "beta_dpo/loss_margin_mean": 4.4937334060668945, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3129251700680272, "grad_norm": 12.335667610168457, "learning_rate": 4.3543189596998986e-07, "logits/chosen": 1.1771042346954346, "logits/rejected": 1.0448236465454102, "loss": 1.2689, "step": 207 }, { "beta_dpo/beta": 0.05620375648140907, "beta_dpo/beta_margin_grad_mean": -0.4606337249279022, "beta_dpo/beta_margin_grad_std": 0.13863146305084229, "beta_dpo/beta_margin_mean": 0.1967998743057251, "beta_dpo/beta_margin_std": 0.6741688847541809, "beta_dpo/beta_used": 0.05620375648140907, "beta_dpo/beta_used_raw": 0.05620375648140907, "beta_dpo/gap_mean": 3.732440948486328, "beta_dpo/gap_std": 9.030263900756836, "beta_dpo/loss_margin_mean": 1.930777907371521, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3144368858654573, "grad_norm": 18.521930694580078, "learning_rate": 4.3454249259229664e-07, "logits/chosen": 1.0590993165969849, "logits/rejected": 1.065347671508789, "loss": 1.2499, "step": 208 }, { "beta_dpo/beta": 0.21363940834999084, "beta_dpo/beta_margin_grad_mean": -0.3402642011642456, "beta_dpo/beta_margin_grad_std": 0.3089061975479126, "beta_dpo/beta_margin_mean": 1.1954998970031738, "beta_dpo/beta_margin_std": 2.3597922325134277, "beta_dpo/beta_used": 0.21363940834999084, "beta_dpo/beta_used_raw": 0.21363940834999084, "beta_dpo/gap_mean": 4.00895357131958, "beta_dpo/gap_std": 9.31790542602539, "beta_dpo/loss_margin_mean": 5.562100410461426, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.31594860166288735, "grad_norm": 54.91071319580078, "learning_rate": 4.336479271643833e-07, "logits/chosen": 1.2037627696990967, "logits/rejected": 1.1780195236206055, "loss": 1.0709, "step": 209 }, { "beta_dpo/beta": 0.17077326774597168, "beta_dpo/beta_margin_grad_mean": -0.33405202627182007, "beta_dpo/beta_margin_grad_std": 0.2563777565956116, "beta_dpo/beta_margin_mean": 1.116236925125122, "beta_dpo/beta_margin_std": 1.7664326429367065, "beta_dpo/beta_used": 0.17077326774597168, "beta_dpo/beta_used_raw": 0.17077326774597168, "beta_dpo/gap_mean": 4.302438735961914, "beta_dpo/gap_std": 9.480663299560547, "beta_dpo/loss_margin_mean": 6.142838478088379, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.31746031746031744, "grad_norm": 34.84131622314453, "learning_rate": 4.327482247091679e-07, "logits/chosen": 1.1268184185028076, "logits/rejected": 1.012471318244934, "loss": 0.9841, "step": 210 }, { "beta_dpo/beta": 0.061521291732788086, "beta_dpo/beta_margin_grad_mean": -0.42673206329345703, "beta_dpo/beta_margin_grad_std": 0.13907025754451752, "beta_dpo/beta_margin_mean": 0.31983569264411926, "beta_dpo/beta_margin_std": 0.6123623251914978, "beta_dpo/beta_used": 0.061521291732788086, "beta_dpo/beta_used_raw": 0.061521291732788086, "beta_dpo/gap_mean": 4.582791805267334, "beta_dpo/gap_std": 9.538564682006836, "beta_dpo/loss_margin_mean": 5.329373836517334, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.31897203325774753, "grad_norm": 17.159202575683594, "learning_rate": 4.3184341039326217e-07, "logits/chosen": 1.192129135131836, "logits/rejected": 1.0898618698120117, "loss": 1.2186, "step": 211 }, { "beta_dpo/beta": 0.18771663308143616, "beta_dpo/beta_margin_grad_mean": -0.34989145398139954, "beta_dpo/beta_margin_grad_std": 0.3033754527568817, "beta_dpo/beta_margin_mean": 1.0186922550201416, "beta_dpo/beta_margin_std": 1.9620866775512695, "beta_dpo/beta_used": 0.18771663308143616, "beta_dpo/beta_used_raw": 0.18771663308143616, "beta_dpo/gap_mean": 4.730992317199707, "beta_dpo/gap_std": 9.749069213867188, "beta_dpo/loss_margin_mean": 5.481993198394775, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3204837490551776, "grad_norm": 41.34913635253906, "learning_rate": 4.309335095262675e-07, "logits/chosen": 1.249527096748352, "logits/rejected": 1.1798797845840454, "loss": 1.0307, "step": 212 }, { "beta_dpo/beta": 0.05245200917124748, "beta_dpo/beta_margin_grad_mean": -0.45048633217811584, "beta_dpo/beta_margin_grad_std": 0.11693067103624344, "beta_dpo/beta_margin_mean": 0.21350213885307312, "beta_dpo/beta_margin_std": 0.5051944255828857, "beta_dpo/beta_used": 0.05245200917124748, "beta_dpo/beta_used_raw": 0.05245200917124748, "beta_dpo/gap_mean": 4.776755332946777, "beta_dpo/gap_std": 9.900781631469727, "beta_dpo/loss_margin_mean": 4.304105758666992, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3219954648526077, "grad_norm": 13.7152099609375, "learning_rate": 4.3001854756006724e-07, "logits/chosen": 1.0305734872817993, "logits/rejected": 1.0351169109344482, "loss": 1.2311, "step": 213 }, { "beta_dpo/beta": 0.06900745630264282, "beta_dpo/beta_margin_grad_mean": -0.4406600594520569, "beta_dpo/beta_margin_grad_std": 0.18241001665592194, "beta_dpo/beta_margin_mean": 0.35362449288368225, "beta_dpo/beta_margin_std": 1.0889477729797363, "beta_dpo/beta_used": 0.06900745630264282, "beta_dpo/beta_used_raw": 0.058939479291439056, "beta_dpo/gap_mean": 4.457104206085205, "beta_dpo/gap_std": 9.855401039123535, "beta_dpo/loss_margin_mean": 3.4822311401367188, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3235071806500378, "grad_norm": 18.02913475036621, "learning_rate": 4.290985500881143e-07, "logits/chosen": 0.9732145071029663, "logits/rejected": 0.9562994241714478, "loss": 1.1885, "step": 214 }, { "beta_dpo/beta": 0.14414142072200775, "beta_dpo/beta_margin_grad_mean": -0.36944130063056946, "beta_dpo/beta_margin_grad_std": 0.24839608371257782, "beta_dpo/beta_margin_mean": 1.0585134029388428, "beta_dpo/beta_margin_std": 2.0430054664611816, "beta_dpo/beta_used": 0.14414142072200775, "beta_dpo/beta_used_raw": 0.14414142072200775, "beta_dpo/gap_mean": 4.5548095703125, "beta_dpo/gap_std": 9.729708671569824, "beta_dpo/loss_margin_mean": 5.855838298797607, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3250188964474679, "grad_norm": 38.50313949584961, "learning_rate": 4.281735428447157e-07, "logits/chosen": 1.1447077989578247, "logits/rejected": 1.0136210918426514, "loss": 1.1588, "step": 215 }, { "beta_dpo/beta": 0.10134372115135193, "beta_dpo/beta_margin_grad_mean": -0.3964030146598816, "beta_dpo/beta_margin_grad_std": 0.18826571106910706, "beta_dpo/beta_margin_mean": 0.48592090606689453, "beta_dpo/beta_margin_std": 0.9031015634536743, "beta_dpo/beta_used": 0.10134372115135193, "beta_dpo/beta_used_raw": 0.10134372115135193, "beta_dpo/gap_mean": 4.801600933074951, "beta_dpo/gap_std": 9.757190704345703, "beta_dpo/loss_margin_mean": 4.803330421447754, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.32653061224489793, "grad_norm": 22.771522521972656, "learning_rate": 4.2724355170431247e-07, "logits/chosen": 1.1843764781951904, "logits/rejected": 1.0623424053192139, "loss": 1.0978, "step": 216 }, { "beta_dpo/beta": 0.04478123039007187, "beta_dpo/beta_margin_grad_mean": -0.4540867805480957, "beta_dpo/beta_margin_grad_std": 0.14419633150100708, "beta_dpo/beta_margin_mean": 0.23471979796886444, "beta_dpo/beta_margin_std": 0.7252242565155029, "beta_dpo/beta_used": 0.04478123039007187, "beta_dpo/beta_used_raw": 0.04400714486837387, "beta_dpo/gap_mean": 4.86920166015625, "beta_dpo/gap_std": 9.921016693115234, "beta_dpo/loss_margin_mean": 5.44075345993042, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.328042328042328, "grad_norm": 13.470901489257812, "learning_rate": 4.26308602680756e-07, "logits/chosen": 1.284195899963379, "logits/rejected": 1.1664865016937256, "loss": 1.2595, "step": 217 }, { "beta_dpo/beta": 0.09790407866239548, "beta_dpo/beta_margin_grad_mean": -0.4215080440044403, "beta_dpo/beta_margin_grad_std": 0.21584708988666534, "beta_dpo/beta_margin_mean": 0.5518121719360352, "beta_dpo/beta_margin_std": 1.4792039394378662, "beta_dpo/beta_used": 0.09790407866239548, "beta_dpo/beta_used_raw": 0.09790407866239548, "beta_dpo/gap_mean": 4.505361557006836, "beta_dpo/gap_std": 10.036420822143555, "beta_dpo/loss_margin_mean": 3.0190885066986084, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3295540438397581, "grad_norm": 25.01282501220703, "learning_rate": 4.253687219265803e-07, "logits/chosen": 1.1358180046081543, "logits/rejected": 1.1257233619689941, "loss": 1.1535, "step": 218 }, { "beta_dpo/beta": 0.07484374940395355, "beta_dpo/beta_margin_grad_mean": -0.42955535650253296, "beta_dpo/beta_margin_grad_std": 0.15756134688854218, "beta_dpo/beta_margin_mean": 0.3232777416706085, "beta_dpo/beta_margin_std": 0.7881091833114624, "beta_dpo/beta_used": 0.07484374940395355, "beta_dpo/beta_used_raw": 0.07484374940395355, "beta_dpo/gap_mean": 4.551763534545898, "beta_dpo/gap_std": 9.937515258789062, "beta_dpo/loss_margin_mean": 4.4866251945495605, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3310657596371882, "grad_norm": 15.652204513549805, "learning_rate": 4.2442393573227043e-07, "logits/chosen": 0.8975998163223267, "logits/rejected": 0.8517352342605591, "loss": 1.1609, "step": 219 }, { "beta_dpo/beta": 0.07051355391740799, "beta_dpo/beta_margin_grad_mean": -0.4343184530735016, "beta_dpo/beta_margin_grad_std": 0.1779230386018753, "beta_dpo/beta_margin_mean": 0.348916232585907, "beta_dpo/beta_margin_std": 0.9510504007339478, "beta_dpo/beta_used": 0.07051355391740799, "beta_dpo/beta_used_raw": 0.07051355391740799, "beta_dpo/gap_mean": 4.42448616027832, "beta_dpo/gap_std": 9.858744621276855, "beta_dpo/loss_margin_mean": 3.9274308681488037, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3325774754346183, "grad_norm": 18.120128631591797, "learning_rate": 4.234742705255272e-07, "logits/chosen": 1.033501148223877, "logits/rejected": 0.9328892230987549, "loss": 1.227, "step": 220 }, { "beta_dpo/beta": 0.131582111120224, "beta_dpo/beta_margin_grad_mean": -0.3696447014808655, "beta_dpo/beta_margin_grad_std": 0.2426803708076477, "beta_dpo/beta_margin_mean": 0.754789412021637, "beta_dpo/beta_margin_std": 1.385541319847107, "beta_dpo/beta_used": 0.131582111120224, "beta_dpo/beta_used_raw": 0.131582111120224, "beta_dpo/gap_mean": 4.606115341186523, "beta_dpo/gap_std": 9.945049285888672, "beta_dpo/loss_margin_mean": 5.741983413696289, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3340891912320484, "grad_norm": 27.25440788269043, "learning_rate": 4.22519752870528e-07, "logits/chosen": 1.1083698272705078, "logits/rejected": 0.995618462562561, "loss": 1.0808, "step": 221 }, { "beta_dpo/beta": 0.16800376772880554, "beta_dpo/beta_margin_grad_mean": -0.3291609585285187, "beta_dpo/beta_margin_grad_std": 0.2453443855047226, "beta_dpo/beta_margin_mean": 1.1951490640640259, "beta_dpo/beta_margin_std": 2.0346102714538574, "beta_dpo/beta_used": 0.16800376772880554, "beta_dpo/beta_used_raw": 0.16800376772880554, "beta_dpo/gap_mean": 4.937000274658203, "beta_dpo/gap_std": 9.868207931518555, "beta_dpo/loss_margin_mean": 6.663474082946777, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3356009070294785, "grad_norm": 39.613006591796875, "learning_rate": 4.2156040946718343e-07, "logits/chosen": 1.1834269762039185, "logits/rejected": 1.0584017038345337, "loss": 0.9994, "step": 222 }, { "beta_dpo/beta": 0.03971102833747864, "beta_dpo/beta_margin_grad_mean": -0.447765976190567, "beta_dpo/beta_margin_grad_std": 0.14194580912590027, "beta_dpo/beta_margin_mean": 0.2512494623661041, "beta_dpo/beta_margin_std": 0.7044557332992554, "beta_dpo/beta_used": 0.03971102833747864, "beta_dpo/beta_used_raw": 0.020069805905222893, "beta_dpo/gap_mean": 5.080411434173584, "beta_dpo/gap_std": 9.861115455627441, "beta_dpo/loss_margin_mean": 5.334641456604004, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3371126228269085, "grad_norm": 11.249165534973145, "learning_rate": 4.2059626715039065e-07, "logits/chosen": 0.9963165521621704, "logits/rejected": 0.9416338205337524, "loss": 1.2629, "step": 223 }, { "beta_dpo/beta": 0.038020312786102295, "beta_dpo/beta_margin_grad_mean": -0.4604024589061737, "beta_dpo/beta_margin_grad_std": 0.08493012934923172, "beta_dpo/beta_margin_mean": 0.16689425706863403, "beta_dpo/beta_margin_std": 0.35968098044395447, "beta_dpo/beta_used": 0.038020312786102295, "beta_dpo/beta_used_raw": 0.038020312786102295, "beta_dpo/gap_mean": 5.034086227416992, "beta_dpo/gap_std": 9.717823028564453, "beta_dpo/loss_margin_mean": 4.373403072357178, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3386243386243386, "grad_norm": 10.994402885437012, "learning_rate": 4.1962735288928304e-07, "logits/chosen": 1.1170026063919067, "logits/rejected": 1.1115076541900635, "loss": 1.2542, "step": 224 }, { "beta_dpo/beta": 0.062366921454668045, "beta_dpo/beta_margin_grad_mean": -0.41421541571617126, "beta_dpo/beta_margin_grad_std": 0.1663094013929367, "beta_dpo/beta_margin_mean": 0.49164319038391113, "beta_dpo/beta_margin_std": 1.0539456605911255, "beta_dpo/beta_used": 0.062366921454668045, "beta_dpo/beta_used_raw": 0.030658261850476265, "beta_dpo/gap_mean": 5.1772003173828125, "beta_dpo/gap_std": 9.866857528686523, "beta_dpo/loss_margin_mean": 5.452095031738281, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3401360544217687, "grad_norm": 19.45634651184082, "learning_rate": 4.186536937864752e-07, "logits/chosen": 1.111361026763916, "logits/rejected": 0.9815366268157959, "loss": 1.2066, "step": 225 }, { "beta_dpo/beta": 0.1158708781003952, "beta_dpo/beta_margin_grad_mean": -0.3906730115413666, "beta_dpo/beta_margin_grad_std": 0.2309074103832245, "beta_dpo/beta_margin_mean": 0.6056569218635559, "beta_dpo/beta_margin_std": 1.3018516302108765, "beta_dpo/beta_used": 0.1158708781003952, "beta_dpo/beta_used_raw": 0.1158708781003952, "beta_dpo/gap_mean": 5.038943290710449, "beta_dpo/gap_std": 10.077461242675781, "beta_dpo/loss_margin_mean": 5.073428630828857, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3416477702191988, "grad_norm": 24.180665969848633, "learning_rate": 4.176753170773052e-07, "logits/chosen": 1.2869049310684204, "logits/rejected": 1.2370771169662476, "loss": 1.064, "step": 226 }, { "beta_dpo/beta": 0.13195355236530304, "beta_dpo/beta_margin_grad_mean": -0.38195154070854187, "beta_dpo/beta_margin_grad_std": 0.25775906443595886, "beta_dpo/beta_margin_mean": 0.6441952586174011, "beta_dpo/beta_margin_std": 1.4211139678955078, "beta_dpo/beta_used": 0.13195355236530304, "beta_dpo/beta_used_raw": 0.13195355236530304, "beta_dpo/gap_mean": 5.078580856323242, "beta_dpo/gap_std": 10.176387786865234, "beta_dpo/loss_margin_mean": 4.958683490753174, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3431594860166289, "grad_norm": 37.188629150390625, "learning_rate": 4.166922501290729e-07, "logits/chosen": 1.082693099975586, "logits/rejected": 1.007001519203186, "loss": 1.0491, "step": 227 }, { "beta_dpo/beta": 0.0692511573433876, "beta_dpo/beta_margin_grad_mean": -0.4339984655380249, "beta_dpo/beta_margin_grad_std": 0.16297149658203125, "beta_dpo/beta_margin_mean": 0.31314608454704285, "beta_dpo/beta_margin_std": 0.7996662259101868, "beta_dpo/beta_used": 0.0692511573433876, "beta_dpo/beta_used_raw": 0.0692511573433876, "beta_dpo/gap_mean": 4.954934120178223, "beta_dpo/gap_std": 10.248208999633789, "beta_dpo/loss_margin_mean": 4.483448505401611, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.34467120181405897, "grad_norm": 18.75178337097168, "learning_rate": 4.1570452044027405e-07, "logits/chosen": 1.2173062562942505, "logits/rejected": 1.1345555782318115, "loss": 1.1872, "step": 228 }, { "beta_dpo/beta": 0.07860506325960159, "beta_dpo/beta_margin_grad_mean": -0.41749757528305054, "beta_dpo/beta_margin_grad_std": 0.15750160813331604, "beta_dpo/beta_margin_mean": 0.380623459815979, "beta_dpo/beta_margin_std": 0.7886734008789062, "beta_dpo/beta_used": 0.07860506325960159, "beta_dpo/beta_used_raw": 0.07860506325960159, "beta_dpo/gap_mean": 4.904338836669922, "beta_dpo/gap_std": 10.165533065795898, "beta_dpo/loss_margin_mean": 4.610470771789551, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.34618291761148906, "grad_norm": 20.0991153717041, "learning_rate": 4.147121556398312e-07, "logits/chosen": 1.318556547164917, "logits/rejected": 1.169750690460205, "loss": 1.1071, "step": 229 }, { "beta_dpo/beta": 0.08985284715890884, "beta_dpo/beta_margin_grad_mean": -0.463742733001709, "beta_dpo/beta_margin_grad_std": 0.22371892631053925, "beta_dpo/beta_margin_mean": 0.1602984517812729, "beta_dpo/beta_margin_std": 1.376175880432129, "beta_dpo/beta_used": 0.08985284715890884, "beta_dpo/beta_used_raw": 0.08985284715890884, "beta_dpo/gap_mean": 4.563235282897949, "beta_dpo/gap_std": 10.371292114257812, "beta_dpo/loss_margin_mean": 3.3287789821624756, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3476946334089191, "grad_norm": 28.62323570251465, "learning_rate": 4.137151834863213e-07, "logits/chosen": 1.1366546154022217, "logits/rejected": 1.127577781677246, "loss": 1.2091, "step": 230 }, { "beta_dpo/beta": 0.16073641180992126, "beta_dpo/beta_margin_grad_mean": -0.37336307764053345, "beta_dpo/beta_margin_grad_std": 0.25263532996177673, "beta_dpo/beta_margin_mean": 1.3000943660736084, "beta_dpo/beta_margin_std": 2.5289907455444336, "beta_dpo/beta_used": 0.16073641180992126, "beta_dpo/beta_used_raw": 0.16073641180992126, "beta_dpo/gap_mean": 4.904563903808594, "beta_dpo/gap_std": 10.250078201293945, "beta_dpo/loss_margin_mean": 6.05340051651001, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3492063492063492, "grad_norm": 46.785762786865234, "learning_rate": 4.1271363186719835e-07, "logits/chosen": 0.9993768334388733, "logits/rejected": 0.9801430702209473, "loss": 0.9956, "step": 231 }, { "beta_dpo/beta": 0.06868449598550797, "beta_dpo/beta_margin_grad_mean": -0.4486519694328308, "beta_dpo/beta_margin_grad_std": 0.15906988084316254, "beta_dpo/beta_margin_mean": 0.2655814290046692, "beta_dpo/beta_margin_std": 0.8026698231697083, "beta_dpo/beta_used": 0.06868449598550797, "beta_dpo/beta_used_raw": 0.06868449598550797, "beta_dpo/gap_mean": 4.717308044433594, "beta_dpo/gap_std": 10.394327163696289, "beta_dpo/loss_margin_mean": 3.969224452972412, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3507180650037793, "grad_norm": 23.025684356689453, "learning_rate": 4.1170752879801436e-07, "logits/chosen": 0.9813304543495178, "logits/rejected": 0.9566335678100586, "loss": 1.2252, "step": 232 }, { "beta_dpo/beta": 0.12250923365354538, "beta_dpo/beta_margin_grad_mean": -0.4030444324016571, "beta_dpo/beta_margin_grad_std": 0.2429264485836029, "beta_dpo/beta_margin_mean": 0.8649188876152039, "beta_dpo/beta_margin_std": 2.051518201828003, "beta_dpo/beta_used": 0.12250923365354538, "beta_dpo/beta_used_raw": 0.09468528628349304, "beta_dpo/gap_mean": 4.685219764709473, "beta_dpo/gap_std": 10.672860145568848, "beta_dpo/loss_margin_mean": 3.5858311653137207, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.35222978080120937, "grad_norm": 29.333518981933594, "learning_rate": 4.106969024216348e-07, "logits/chosen": 1.0836119651794434, "logits/rejected": 1.0568479299545288, "loss": 1.1273, "step": 233 }, { "beta_dpo/beta": 0.12277411669492722, "beta_dpo/beta_margin_grad_mean": -0.4075966775417328, "beta_dpo/beta_margin_grad_std": 0.2486211508512497, "beta_dpo/beta_margin_mean": 0.7519962191581726, "beta_dpo/beta_margin_std": 1.8956360816955566, "beta_dpo/beta_used": 0.12277411669492722, "beta_dpo/beta_used_raw": 0.11577944457530975, "beta_dpo/gap_mean": 4.205699920654297, "beta_dpo/gap_std": 10.738191604614258, "beta_dpo/loss_margin_mean": 3.597349166870117, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.35374149659863946, "grad_norm": 35.93962097167969, "learning_rate": 4.09681781007452e-07, "logits/chosen": 1.1078158617019653, "logits/rejected": 1.0694584846496582, "loss": 1.2538, "step": 234 }, { "beta_dpo/beta": 0.06808540970087051, "beta_dpo/beta_margin_grad_mean": -0.4144769012928009, "beta_dpo/beta_margin_grad_std": 0.1782931536436081, "beta_dpo/beta_margin_mean": 0.4272496700286865, "beta_dpo/beta_margin_std": 0.9646300077438354, "beta_dpo/beta_used": 0.06808540970087051, "beta_dpo/beta_used_raw": 0.06808540970087051, "beta_dpo/gap_mean": 4.591578483581543, "beta_dpo/gap_std": 10.67329216003418, "beta_dpo/loss_margin_mean": 6.257509231567383, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.35525321239606955, "grad_norm": 24.817764282226562, "learning_rate": 4.08662192950594e-07, "logits/chosen": 1.2615323066711426, "logits/rejected": 1.2428040504455566, "loss": 1.243, "step": 235 }, { "beta_dpo/beta": 0.12781299650669098, "beta_dpo/beta_margin_grad_mean": -0.41924628615379333, "beta_dpo/beta_margin_grad_std": 0.2403831034898758, "beta_dpo/beta_margin_mean": 0.5158213376998901, "beta_dpo/beta_margin_std": 1.6929750442504883, "beta_dpo/beta_used": 0.12781299650669098, "beta_dpo/beta_used_raw": 0.12781299650669098, "beta_dpo/gap_mean": 4.6112799644470215, "beta_dpo/gap_std": 10.800046920776367, "beta_dpo/loss_margin_mean": 4.1561431884765625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.35676492819349964, "grad_norm": 32.81260299682617, "learning_rate": 4.076381667711306e-07, "logits/chosen": 1.0885969400405884, "logits/rejected": 1.085923433303833, "loss": 1.0883, "step": 236 }, { "beta_dpo/beta": 0.08545999974012375, "beta_dpo/beta_margin_grad_mean": -0.4522503614425659, "beta_dpo/beta_margin_grad_std": 0.2021704614162445, "beta_dpo/beta_margin_mean": 0.24412307143211365, "beta_dpo/beta_margin_std": 1.2302663326263428, "beta_dpo/beta_used": 0.08545999974012375, "beta_dpo/beta_used_raw": 0.05330658704042435, "beta_dpo/gap_mean": 4.319942951202393, "beta_dpo/gap_std": 10.680757522583008, "beta_dpo/loss_margin_mean": 2.8172736167907715, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.35827664399092973, "grad_norm": 23.791624069213867, "learning_rate": 4.066097311132753e-07, "logits/chosen": 1.0574512481689453, "logits/rejected": 1.0594351291656494, "loss": 1.1732, "step": 237 }, { "beta_dpo/beta": 0.11690382659435272, "beta_dpo/beta_margin_grad_mean": -0.3783750832080841, "beta_dpo/beta_margin_grad_std": 0.19828879833221436, "beta_dpo/beta_margin_mean": 0.6061506271362305, "beta_dpo/beta_margin_std": 1.0086196660995483, "beta_dpo/beta_used": 0.11690382659435272, "beta_dpo/beta_used_raw": 0.11690382659435272, "beta_dpo/gap_mean": 4.383785247802734, "beta_dpo/gap_std": 10.364669799804688, "beta_dpo/loss_margin_mean": 5.172383785247803, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.35978835978835977, "grad_norm": 28.334075927734375, "learning_rate": 4.0557691474458414e-07, "logits/chosen": 1.1180424690246582, "logits/rejected": 1.1595218181610107, "loss": 1.083, "step": 238 }, { "beta_dpo/beta": 0.12952181696891785, "beta_dpo/beta_margin_grad_mean": -0.3834363520145416, "beta_dpo/beta_margin_grad_std": 0.231191948056221, "beta_dpo/beta_margin_mean": 0.8099231719970703, "beta_dpo/beta_margin_std": 1.637900948524475, "beta_dpo/beta_used": 0.12952181696891785, "beta_dpo/beta_used_raw": 0.12952181696891785, "beta_dpo/gap_mean": 4.5827317237854, "beta_dpo/gap_std": 10.368627548217773, "beta_dpo/loss_margin_mean": 5.8209710121154785, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.36130007558578986, "grad_norm": 33.923667907714844, "learning_rate": 4.045397465551513e-07, "logits/chosen": 1.5741066932678223, "logits/rejected": 1.3806960582733154, "loss": 1.1438, "step": 239 }, { "beta_dpo/beta": 0.15690244734287262, "beta_dpo/beta_margin_grad_mean": -0.3347066640853882, "beta_dpo/beta_margin_grad_std": 0.2660229504108429, "beta_dpo/beta_margin_mean": 1.1113700866699219, "beta_dpo/beta_margin_std": 1.745145320892334, "beta_dpo/beta_used": 0.15690244734287262, "beta_dpo/beta_used_raw": 0.15690244734287262, "beta_dpo/gap_mean": 5.033029556274414, "beta_dpo/gap_std": 10.530231475830078, "beta_dpo/loss_margin_mean": 7.105434894561768, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.36281179138321995, "grad_norm": 41.656497955322266, "learning_rate": 4.0349825555680045e-07, "logits/chosen": 1.2912126779556274, "logits/rejected": 1.1457183361053467, "loss": 1.02, "step": 240 }, { "beta_dpo/beta": 0.05593106895685196, "beta_dpo/beta_margin_grad_mean": -0.44817054271698, "beta_dpo/beta_margin_grad_std": 0.13052473962306976, "beta_dpo/beta_margin_mean": 0.23059462010860443, "beta_dpo/beta_margin_std": 0.5865014791488647, "beta_dpo/beta_used": 0.05593106895685196, "beta_dpo/beta_used_raw": 0.05593106895685196, "beta_dpo/gap_mean": 5.024003982543945, "beta_dpo/gap_std": 10.371394157409668, "beta_dpo/loss_margin_mean": 4.504753112792969, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.36432350718065004, "grad_norm": 16.76007080078125, "learning_rate": 4.0245247088227377e-07, "logits/chosen": 1.0622036457061768, "logits/rejected": 1.0596222877502441, "loss": 1.2225, "step": 241 }, { "beta_dpo/beta": 0.12295199930667877, "beta_dpo/beta_margin_grad_mean": -0.36693933606147766, "beta_dpo/beta_margin_grad_std": 0.21783827245235443, "beta_dpo/beta_margin_mean": 1.1881290674209595, "beta_dpo/beta_margin_std": 2.176382064819336, "beta_dpo/beta_used": 0.12295199930667877, "beta_dpo/beta_used_raw": 0.09869402647018433, "beta_dpo/gap_mean": 5.463100433349609, "beta_dpo/gap_std": 10.322896957397461, "beta_dpo/loss_margin_mean": 7.194202423095703, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.36583522297808013, "grad_norm": 23.658649444580078, "learning_rate": 4.0140242178441665e-07, "logits/chosen": 1.0880014896392822, "logits/rejected": 1.0291259288787842, "loss": 1.0331, "step": 242 }, { "beta_dpo/beta": 0.0481017641723156, "beta_dpo/beta_margin_grad_mean": -0.4544302225112915, "beta_dpo/beta_margin_grad_std": 0.1332933008670807, "beta_dpo/beta_margin_mean": 0.20558291673660278, "beta_dpo/beta_margin_std": 0.616269052028656, "beta_dpo/beta_used": 0.0481017641723156, "beta_dpo/beta_used_raw": -0.00047880038619041443, "beta_dpo/gap_mean": 5.221932411193848, "beta_dpo/gap_std": 10.100500106811523, "beta_dpo/loss_margin_mean": 3.924454927444458, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3673469387755102, "grad_norm": 14.327198028564453, "learning_rate": 4.003481376353596e-07, "logits/chosen": 1.103372573852539, "logits/rejected": 1.1234753131866455, "loss": 1.226, "step": 243 }, { "beta_dpo/beta": 0.10611068457365036, "beta_dpo/beta_margin_grad_mean": -0.3706439137458801, "beta_dpo/beta_margin_grad_std": 0.18867537379264832, "beta_dpo/beta_margin_mean": 0.6805884838104248, "beta_dpo/beta_margin_std": 1.0468705892562866, "beta_dpo/beta_used": 0.10611068457365036, "beta_dpo/beta_used_raw": 0.10611068457365036, "beta_dpo/gap_mean": 5.321632385253906, "beta_dpo/gap_std": 9.931724548339844, "beta_dpo/loss_margin_mean": 6.462033271789551, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3688586545729403, "grad_norm": 28.391023635864258, "learning_rate": 3.9928964792569654e-07, "logits/chosen": 0.7984298467636108, "logits/rejected": 0.7795098423957825, "loss": 1.0613, "step": 244 }, { "beta_dpo/beta": 0.12491966784000397, "beta_dpo/beta_margin_grad_mean": -0.34912219643592834, "beta_dpo/beta_margin_grad_std": 0.2054431289434433, "beta_dpo/beta_margin_mean": 0.7987427115440369, "beta_dpo/beta_margin_std": 1.17366361618042, "beta_dpo/beta_used": 0.12491966784000397, "beta_dpo/beta_used_raw": 0.12491966784000397, "beta_dpo/gap_mean": 5.4922099113464355, "beta_dpo/gap_std": 9.798683166503906, "beta_dpo/loss_margin_mean": 6.305713653564453, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.37037037037037035, "grad_norm": 29.477256774902344, "learning_rate": 3.982269822636601e-07, "logits/chosen": 1.333395004272461, "logits/rejected": 1.2788350582122803, "loss": 0.9597, "step": 245 }, { "beta_dpo/beta": 0.0807776153087616, "beta_dpo/beta_margin_grad_mean": -0.398184210062027, "beta_dpo/beta_margin_grad_std": 0.18760626018047333, "beta_dpo/beta_margin_mean": 0.4955151677131653, "beta_dpo/beta_margin_std": 0.9998785853385925, "beta_dpo/beta_used": 0.0807776153087616, "beta_dpo/beta_used_raw": 0.0807776153087616, "beta_dpo/gap_mean": 5.684264183044434, "beta_dpo/gap_std": 10.004972457885742, "beta_dpo/loss_margin_mean": 6.780579566955566, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.37188208616780044, "grad_norm": 19.162378311157227, "learning_rate": 3.971601703742932e-07, "logits/chosen": 1.1533265113830566, "logits/rejected": 1.0754314661026, "loss": 1.0975, "step": 246 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4993528425693512, "beta_dpo/beta_margin_grad_std": 0.002463964279741049, "beta_dpo/beta_margin_mean": 0.002588639734312892, "beta_dpo/beta_margin_std": 0.009856174699962139, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.06930781155824661, "beta_dpo/gap_mean": 5.321089267730713, "beta_dpo/gap_std": 10.132028579711914, "beta_dpo/loss_margin_mean": 2.588639497756958, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.37339380196523053, "grad_norm": 0.303940087556839, "learning_rate": 3.960892420986177e-07, "logits/chosen": 1.131533145904541, "logits/rejected": 1.086637020111084, "loss": 1.3838, "step": 247 }, { "beta_dpo/beta": 0.051299843937158585, "beta_dpo/beta_margin_grad_mean": -0.436193585395813, "beta_dpo/beta_margin_grad_std": 0.17114627361297607, "beta_dpo/beta_margin_mean": 0.35004010796546936, "beta_dpo/beta_margin_std": 0.9127549529075623, "beta_dpo/beta_used": 0.051299843937158585, "beta_dpo/beta_used_raw": 0.026382185518741608, "beta_dpo/gap_mean": 4.960646629333496, "beta_dpo/gap_std": 10.275779724121094, "beta_dpo/loss_margin_mean": 4.311920166015625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3749055177626606, "grad_norm": 19.132505416870117, "learning_rate": 3.9501422739279953e-07, "logits/chosen": 0.9868658185005188, "logits/rejected": 1.0642544031143188, "loss": 1.274, "step": 248 }, { "beta_dpo/beta": 0.025262191891670227, "beta_dpo/beta_margin_grad_mean": -0.48927804827690125, "beta_dpo/beta_margin_grad_std": 0.09066756814718246, "beta_dpo/beta_margin_mean": 0.04305484890937805, "beta_dpo/beta_margin_std": 0.39369264245033264, "beta_dpo/beta_used": 0.025262191891670227, "beta_dpo/beta_used_raw": 0.00993638951331377, "beta_dpo/gap_mean": 4.441056251525879, "beta_dpo/gap_std": 10.518145561218262, "beta_dpo/loss_margin_mean": 1.0085103511810303, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3764172335600907, "grad_norm": 8.390883445739746, "learning_rate": 3.9393515632731094e-07, "logits/chosen": 1.4377015829086304, "logits/rejected": 1.458314061164856, "loss": 1.3242, "step": 249 }, { "beta_dpo/beta": 0.15996399521827698, "beta_dpo/beta_margin_grad_mean": -0.3432745635509491, "beta_dpo/beta_margin_grad_std": 0.24865637719631195, "beta_dpo/beta_margin_mean": 1.1924474239349365, "beta_dpo/beta_margin_std": 2.246605157852173, "beta_dpo/beta_used": 0.15996399521827698, "beta_dpo/beta_used_raw": 0.15996399521827698, "beta_dpo/gap_mean": 4.528580188751221, "beta_dpo/gap_std": 10.77707576751709, "beta_dpo/loss_margin_mean": 6.6261677742004395, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3779289493575208, "grad_norm": 37.698692321777344, "learning_rate": 3.9285205908608934e-07, "logits/chosen": 0.9921024441719055, "logits/rejected": 0.969779372215271, "loss": 1.0786, "step": 250 }, { "beta_dpo/beta": 0.014929315075278282, "beta_dpo/beta_margin_grad_mean": -0.4864945411682129, "beta_dpo/beta_margin_grad_std": 0.036659833043813705, "beta_dpo/beta_margin_mean": 0.05437476187944412, "beta_dpo/beta_margin_std": 0.1476600021123886, "beta_dpo/beta_used": 0.014929315075278282, "beta_dpo/beta_used_raw": 0.014929315075278282, "beta_dpo/gap_mean": 4.711355209350586, "beta_dpo/gap_std": 10.758208274841309, "beta_dpo/loss_margin_mean": 3.8696646690368652, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3794406651549509, "grad_norm": 4.087891578674316, "learning_rate": 3.9176496596569265e-07, "logits/chosen": 1.3167650699615479, "logits/rejected": 1.248030424118042, "loss": 1.3404, "step": 251 }, { "beta_dpo/beta": 0.025902319699525833, "beta_dpo/beta_margin_grad_mean": -0.4686071276664734, "beta_dpo/beta_margin_grad_std": 0.09484216570854187, "beta_dpo/beta_margin_mean": 0.13240431249141693, "beta_dpo/beta_margin_std": 0.40574291348457336, "beta_dpo/beta_used": 0.025902319699525833, "beta_dpo/beta_used_raw": -0.0074716489762067795, "beta_dpo/gap_mean": 4.2776288986206055, "beta_dpo/gap_std": 10.861841201782227, "beta_dpo/loss_margin_mean": 3.1439483165740967, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.38095238095238093, "grad_norm": 8.144107818603516, "learning_rate": 3.9067390737445254e-07, "logits/chosen": 1.003096580505371, "logits/rejected": 0.950405478477478, "loss": 1.3267, "step": 252 }, { "beta_dpo/beta": 0.06798431277275085, "beta_dpo/beta_margin_grad_mean": -0.42808982729911804, "beta_dpo/beta_margin_grad_std": 0.18537691235542297, "beta_dpo/beta_margin_mean": 0.4428131580352783, "beta_dpo/beta_margin_std": 1.1600432395935059, "beta_dpo/beta_used": 0.06798431277275085, "beta_dpo/beta_used_raw": 0.021464969962835312, "beta_dpo/gap_mean": 4.384973526000977, "beta_dpo/gap_std": 10.881026268005371, "beta_dpo/loss_margin_mean": 3.916627883911133, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.382464096749811, "grad_norm": 25.51688575744629, "learning_rate": 3.8957891383162304e-07, "logits/chosen": 1.0553115606307983, "logits/rejected": 1.0057858228683472, "loss": 1.2632, "step": 253 }, { "beta_dpo/beta": 0.05142858996987343, "beta_dpo/beta_margin_grad_mean": -0.4397808313369751, "beta_dpo/beta_margin_grad_std": 0.17119485139846802, "beta_dpo/beta_margin_mean": 0.3350658714771271, "beta_dpo/beta_margin_std": 0.8966230750083923, "beta_dpo/beta_used": 0.05142858996987343, "beta_dpo/beta_used_raw": 0.043801601976156235, "beta_dpo/gap_mean": 4.4246320724487305, "beta_dpo/gap_std": 10.816822052001953, "beta_dpo/loss_margin_mean": 5.208080768585205, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3839758125472411, "grad_norm": 19.18077850341797, "learning_rate": 3.884800159665276e-07, "logits/chosen": 0.8030510544776917, "logits/rejected": 0.777626633644104, "loss": 1.3043, "step": 254 }, { "beta_dpo/beta": 0.16544781625270844, "beta_dpo/beta_margin_grad_mean": -0.3781430125236511, "beta_dpo/beta_margin_grad_std": 0.27625834941864014, "beta_dpo/beta_margin_mean": 1.0486892461776733, "beta_dpo/beta_margin_std": 2.800529718399048, "beta_dpo/beta_used": 0.16544781625270844, "beta_dpo/beta_used_raw": 0.1183973029255867, "beta_dpo/gap_mean": 4.510273456573486, "beta_dpo/gap_std": 10.781914710998535, "beta_dpo/loss_margin_mean": 4.662529468536377, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3854875283446712, "grad_norm": 57.56937789916992, "learning_rate": 3.873772445177015e-07, "logits/chosen": 1.3328914642333984, "logits/rejected": 1.3053648471832275, "loss": 1.0872, "step": 255 }, { "beta_dpo/beta": 0.13992524147033691, "beta_dpo/beta_margin_grad_mean": -0.34854984283447266, "beta_dpo/beta_margin_grad_std": 0.25158390402793884, "beta_dpo/beta_margin_mean": 0.9280179738998413, "beta_dpo/beta_margin_std": 1.6355485916137695, "beta_dpo/beta_used": 0.13992524147033691, "beta_dpo/beta_used_raw": 0.13992524147033691, "beta_dpo/gap_mean": 4.742461204528809, "beta_dpo/gap_std": 10.8091402053833, "beta_dpo/loss_margin_mean": 6.324960708618164, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3869992441421013, "grad_norm": 36.89436340332031, "learning_rate": 3.862706303320329e-07, "logits/chosen": 1.0001685619354248, "logits/rejected": 0.90242600440979, "loss": 1.1972, "step": 256 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49879777431488037, "beta_dpo/beta_margin_grad_std": 0.0027100411243736744, "beta_dpo/beta_margin_mean": 0.004809120204299688, "beta_dpo/beta_margin_std": 0.010840562172234058, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.040715523064136505, "beta_dpo/gap_mean": 4.818190574645996, "beta_dpo/gap_std": 10.846813201904297, "beta_dpo/loss_margin_mean": 4.809120178222656, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3885109599395314, "grad_norm": 0.31598353385925293, "learning_rate": 3.851602043638994e-07, "logits/chosen": 0.9752795696258545, "logits/rejected": 0.9454945921897888, "loss": 1.3838, "step": 257 }, { "beta_dpo/beta": 0.11642733216285706, "beta_dpo/beta_margin_grad_mean": -0.3744834065437317, "beta_dpo/beta_margin_grad_std": 0.2298222929239273, "beta_dpo/beta_margin_mean": 0.8234491944313049, "beta_dpo/beta_margin_std": 1.7443535327911377, "beta_dpo/beta_used": 0.11642733216285706, "beta_dpo/beta_used_raw": 0.10990360379219055, "beta_dpo/gap_mean": 4.870306968688965, "beta_dpo/gap_std": 10.817766189575195, "beta_dpo/loss_margin_mean": 5.804533004760742, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3900226757369615, "grad_norm": 37.911373138427734, "learning_rate": 3.840459976743023e-07, "logits/chosen": 1.1632795333862305, "logits/rejected": 1.0844495296478271, "loss": 1.1073, "step": 258 }, { "beta_dpo/beta": 0.31446343660354614, "beta_dpo/beta_margin_grad_mean": -0.24687738716602325, "beta_dpo/beta_margin_grad_std": 0.34187325835227966, "beta_dpo/beta_margin_mean": 2.696594476699829, "beta_dpo/beta_margin_std": 3.7241179943084717, "beta_dpo/beta_used": 0.31446343660354614, "beta_dpo/beta_used_raw": 0.31446343660354614, "beta_dpo/gap_mean": 5.502510070800781, "beta_dpo/gap_std": 11.044046401977539, "beta_dpo/loss_margin_mean": 8.567591667175293, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3915343915343915, "grad_norm": 65.49555969238281, "learning_rate": 3.8292804142999796e-07, "logits/chosen": 1.0487525463104248, "logits/rejected": 0.9705426096916199, "loss": 0.7479, "step": 259 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49873608350753784, "beta_dpo/beta_margin_grad_std": 0.002948764944449067, "beta_dpo/beta_margin_mean": 0.005055864807218313, "beta_dpo/beta_margin_std": 0.011795504949986935, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.0352797731757164, "beta_dpo/gap_mean": 5.625866413116455, "beta_dpo/gap_std": 11.170843124389648, "beta_dpo/loss_margin_mean": 5.0558648109436035, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3930461073318216, "grad_norm": 0.28733569383621216, "learning_rate": 3.818063669026256e-07, "logits/chosen": 1.2932058572769165, "logits/rejected": 1.1448646783828735, "loss": 1.3829, "step": 260 }, { "beta_dpo/beta": 0.09499503672122955, "beta_dpo/beta_margin_grad_mean": -0.43870827555656433, "beta_dpo/beta_margin_grad_std": 0.23113374412059784, "beta_dpo/beta_margin_mean": 0.43297070264816284, "beta_dpo/beta_margin_std": 1.5510969161987305, "beta_dpo/beta_used": 0.09499503672122955, "beta_dpo/beta_used_raw": 0.09000162035226822, "beta_dpo/gap_mean": 5.343086242675781, "beta_dpo/gap_std": 11.143077850341797, "beta_dpo/loss_margin_mean": 3.999406576156616, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3945578231292517, "grad_norm": 30.237884521484375, "learning_rate": 3.806810054678331e-07, "logits/chosen": 1.0707039833068848, "logits/rejected": 1.1073495149612427, "loss": 1.1924, "step": 261 }, { "beta_dpo/beta": 0.08395867794752121, "beta_dpo/beta_margin_grad_mean": -0.41953787207603455, "beta_dpo/beta_margin_grad_std": 0.20528234541416168, "beta_dpo/beta_margin_mean": 0.5397913455963135, "beta_dpo/beta_margin_std": 1.298319935798645, "beta_dpo/beta_used": 0.08395867794752121, "beta_dpo/beta_used_raw": 0.05914995074272156, "beta_dpo/gap_mean": 5.180658340454102, "beta_dpo/gap_std": 10.83466911315918, "beta_dpo/loss_margin_mean": 5.200860500335693, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3960695389266818, "grad_norm": 24.859472274780273, "learning_rate": 3.7955198860439887e-07, "logits/chosen": 1.3326735496520996, "logits/rejected": 1.2338473796844482, "loss": 1.1553, "step": 262 }, { "beta_dpo/beta": 0.09583983570337296, "beta_dpo/beta_margin_grad_mean": -0.3918282091617584, "beta_dpo/beta_margin_grad_std": 0.20861949026584625, "beta_dpo/beta_margin_mean": 0.5731788277626038, "beta_dpo/beta_margin_std": 1.2116243839263916, "beta_dpo/beta_used": 0.09583983570337296, "beta_dpo/beta_used_raw": 0.09583983570337296, "beta_dpo/gap_mean": 5.476344108581543, "beta_dpo/gap_std": 10.684277534484863, "beta_dpo/loss_margin_mean": 6.516000747680664, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.3975812547241119, "grad_norm": 22.119905471801758, "learning_rate": 3.784193478933516e-07, "logits/chosen": 1.136518955230713, "logits/rejected": 1.013533353805542, "loss": 1.0766, "step": 263 }, { "beta_dpo/beta": 0.0488370805978775, "beta_dpo/beta_margin_grad_mean": -0.4324823319911957, "beta_dpo/beta_margin_grad_std": 0.10733956843614578, "beta_dpo/beta_margin_mean": 0.2894676625728607, "beta_dpo/beta_margin_std": 0.4639683961868286, "beta_dpo/beta_used": 0.0488370805978775, "beta_dpo/beta_used_raw": 0.0488370805978775, "beta_dpo/gap_mean": 5.533038139343262, "beta_dpo/gap_std": 10.499252319335938, "beta_dpo/loss_margin_mean": 5.958970069885254, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.39909297052154197, "grad_norm": 13.703917503356934, "learning_rate": 3.7728311501708674e-07, "logits/chosen": 1.2367783784866333, "logits/rejected": 1.150953769683838, "loss": 1.2056, "step": 264 }, { "beta_dpo/beta": 0.21453940868377686, "beta_dpo/beta_margin_grad_mean": -0.28706979751586914, "beta_dpo/beta_margin_grad_std": 0.2895905673503876, "beta_dpo/beta_margin_mean": 1.877687692642212, "beta_dpo/beta_margin_std": 2.552780866622925, "beta_dpo/beta_used": 0.21453940868377686, "beta_dpo/beta_used_raw": 0.21453940868377686, "beta_dpo/gap_mean": 5.86796760559082, "beta_dpo/gap_std": 10.677862167358398, "beta_dpo/loss_margin_mean": 8.177618026733398, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.40060468631897206, "grad_norm": 39.615272521972656, "learning_rate": 3.7614332175848027e-07, "logits/chosen": 1.2300488948822021, "logits/rejected": 1.1951534748077393, "loss": 0.785, "step": 265 }, { "beta_dpo/beta": 0.03808180242776871, "beta_dpo/beta_margin_grad_mean": -0.4472852647304535, "beta_dpo/beta_margin_grad_std": 0.11670338362455368, "beta_dpo/beta_margin_mean": 0.24460670351982117, "beta_dpo/beta_margin_std": 0.5672850608825684, "beta_dpo/beta_used": 0.03808180242776871, "beta_dpo/beta_used_raw": 0.010574424639344215, "beta_dpo/gap_mean": 6.066445350646973, "beta_dpo/gap_std": 10.548543930053711, "beta_dpo/loss_margin_mean": 5.729500770568848, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4021164021164021, "grad_norm": 12.612692832946777, "learning_rate": 3.75e-07, "logits/chosen": 0.9250593185424805, "logits/rejected": 0.8699378967285156, "loss": 1.2227, "step": 266 }, { "beta_dpo/beta": 0.11268052458763123, "beta_dpo/beta_margin_grad_mean": -0.39133748412132263, "beta_dpo/beta_margin_grad_std": 0.2150212526321411, "beta_dpo/beta_margin_mean": 0.8038800954818726, "beta_dpo/beta_margin_std": 1.653910756111145, "beta_dpo/beta_used": 0.11268052458763123, "beta_dpo/beta_used_raw": 0.10867670178413391, "beta_dpo/gap_mean": 5.74346399307251, "beta_dpo/gap_std": 10.44660758972168, "beta_dpo/loss_margin_mean": 4.872748374938965, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4036281179138322, "grad_norm": 21.86273956298828, "learning_rate": 3.738531817228131e-07, "logits/chosen": 1.1546790599822998, "logits/rejected": 1.1331496238708496, "loss": 1.0233, "step": 267 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49919313192367554, "beta_dpo/beta_margin_grad_std": 0.002566243289038539, "beta_dpo/beta_margin_mean": 0.003227637615054846, "beta_dpo/beta_margin_std": 0.01026532705873251, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.012797963805496693, "beta_dpo/gap_mean": 5.344855308532715, "beta_dpo/gap_std": 10.346288681030273, "beta_dpo/loss_margin_mean": 3.227637529373169, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4051398337112623, "grad_norm": 0.29407811164855957, "learning_rate": 3.7270289900589204e-07, "logits/chosen": 1.2539623975753784, "logits/rejected": 1.2189905643463135, "loss": 1.3828, "step": 268 }, { "beta_dpo/beta": 0.15564534068107605, "beta_dpo/beta_margin_grad_mean": -0.3451966941356659, "beta_dpo/beta_margin_grad_std": 0.2405506670475006, "beta_dpo/beta_margin_mean": 0.9552483558654785, "beta_dpo/beta_margin_std": 1.6470414400100708, "beta_dpo/beta_used": 0.15564534068107605, "beta_dpo/beta_used_raw": 0.15564534068107605, "beta_dpo/gap_mean": 5.239716529846191, "beta_dpo/gap_std": 10.26348876953125, "beta_dpo/loss_margin_mean": 5.233408451080322, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.40665154950869237, "grad_norm": 34.52106475830078, "learning_rate": 3.7154918402511714e-07, "logits/chosen": 1.3101131916046143, "logits/rejected": 1.2675879001617432, "loss": 0.9188, "step": 269 }, { "beta_dpo/beta": 0.08239845931529999, "beta_dpo/beta_margin_grad_mean": -0.4100302755832672, "beta_dpo/beta_margin_grad_std": 0.18438223004341125, "beta_dpo/beta_margin_mean": 0.42504921555519104, "beta_dpo/beta_margin_std": 0.8831678032875061, "beta_dpo/beta_used": 0.08239845931529999, "beta_dpo/beta_used_raw": 0.08239845931529999, "beta_dpo/gap_mean": 5.3209733963012695, "beta_dpo/gap_std": 10.249567031860352, "beta_dpo/loss_margin_mean": 5.127548694610596, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.40816326530612246, "grad_norm": 22.110576629638672, "learning_rate": 3.7039206905237656e-07, "logits/chosen": 1.289703369140625, "logits/rejected": 1.1867177486419678, "loss": 1.1083, "step": 270 }, { "beta_dpo/beta": 0.04837838187813759, "beta_dpo/beta_margin_grad_mean": -0.458254337310791, "beta_dpo/beta_margin_grad_std": 0.138353630900383, "beta_dpo/beta_margin_mean": 0.21362274885177612, "beta_dpo/beta_margin_std": 0.7236654162406921, "beta_dpo/beta_used": 0.04837838187813759, "beta_dpo/beta_used_raw": 0.04837838187813759, "beta_dpo/gap_mean": 5.161229133605957, "beta_dpo/gap_std": 10.45259952545166, "beta_dpo/loss_margin_mean": 4.351984977722168, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.40967498110355255, "grad_norm": 13.931172370910645, "learning_rate": 3.692315864546635e-07, "logits/chosen": 1.243234395980835, "logits/rejected": 1.1881849765777588, "loss": 1.2273, "step": 271 }, { "beta_dpo/beta": 0.18383634090423584, "beta_dpo/beta_margin_grad_mean": -0.3311034142971039, "beta_dpo/beta_margin_grad_std": 0.2728104293346405, "beta_dpo/beta_margin_mean": 1.0728017091751099, "beta_dpo/beta_margin_std": 1.831144094467163, "beta_dpo/beta_used": 0.18383634090423584, "beta_dpo/beta_used_raw": 0.18383634090423584, "beta_dpo/gap_mean": 5.191141605377197, "beta_dpo/gap_std": 10.428142547607422, "beta_dpo/loss_margin_mean": 5.907552242279053, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.41118669690098264, "grad_norm": 38.00907897949219, "learning_rate": 3.6806776869317067e-07, "logits/chosen": 1.0192674398422241, "logits/rejected": 1.0176470279693604, "loss": 0.863, "step": 272 }, { "beta_dpo/beta": 0.06558459997177124, "beta_dpo/beta_margin_grad_mean": -0.42467087507247925, "beta_dpo/beta_margin_grad_std": 0.18101128935813904, "beta_dpo/beta_margin_mean": 0.3881887197494507, "beta_dpo/beta_margin_std": 0.9979269504547119, "beta_dpo/beta_used": 0.06558459997177124, "beta_dpo/beta_used_raw": 0.04673774540424347, "beta_dpo/gap_mean": 5.385332107543945, "beta_dpo/gap_std": 10.592522621154785, "beta_dpo/loss_margin_mean": 6.027635097503662, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4126984126984127, "grad_norm": 22.860248565673828, "learning_rate": 3.669006483223828e-07, "logits/chosen": 1.0584709644317627, "logits/rejected": 1.0032527446746826, "loss": 1.2133, "step": 273 }, { "beta_dpo/beta": 0.23129317164421082, "beta_dpo/beta_margin_grad_mean": -0.3218546211719513, "beta_dpo/beta_margin_grad_std": 0.3190883696079254, "beta_dpo/beta_margin_mean": 1.5642485618591309, "beta_dpo/beta_margin_std": 2.794994354248047, "beta_dpo/beta_used": 0.23129317164421082, "beta_dpo/beta_used_raw": 0.23129317164421082, "beta_dpo/gap_mean": 5.58009147644043, "beta_dpo/gap_std": 10.845860481262207, "beta_dpo/loss_margin_mean": 6.637640953063965, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.41421012849584277, "grad_norm": 42.139060974121094, "learning_rate": 3.657302579891656e-07, "logits/chosen": 1.0761997699737549, "logits/rejected": 1.0783820152282715, "loss": 0.8496, "step": 274 }, { "beta_dpo/beta": 0.06791101396083832, "beta_dpo/beta_margin_grad_mean": -0.39947086572647095, "beta_dpo/beta_margin_grad_std": 0.18937627971172333, "beta_dpo/beta_margin_mean": 0.59105384349823, "beta_dpo/beta_margin_std": 1.123275876045227, "beta_dpo/beta_used": 0.06791101396083832, "beta_dpo/beta_used_raw": 0.04330751299858093, "beta_dpo/gap_mean": 5.756000518798828, "beta_dpo/gap_std": 10.81220817565918, "beta_dpo/loss_margin_mean": 6.892657279968262, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.41572184429327286, "grad_norm": 22.493019104003906, "learning_rate": 3.645566304318526e-07, "logits/chosen": 1.1675453186035156, "logits/rejected": 1.063072681427002, "loss": 1.159, "step": 275 }, { "beta_dpo/beta": 0.08544276654720306, "beta_dpo/beta_margin_grad_mean": -0.3919548988342285, "beta_dpo/beta_margin_grad_std": 0.17982997000217438, "beta_dpo/beta_margin_mean": 0.5407912731170654, "beta_dpo/beta_margin_std": 0.9322868585586548, "beta_dpo/beta_used": 0.08544276654720306, "beta_dpo/beta_used_raw": 0.08544276654720306, "beta_dpo/gap_mean": 6.039332389831543, "beta_dpo/gap_std": 10.800745010375977, "beta_dpo/loss_margin_mean": 6.57222318649292, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.41723356009070295, "grad_norm": 20.716075897216797, "learning_rate": 3.633797984793294e-07, "logits/chosen": 1.2874423265457153, "logits/rejected": 1.245221495628357, "loss": 1.0834, "step": 276 }, { "beta_dpo/beta": 0.006070706993341446, "beta_dpo/beta_margin_grad_mean": -0.49736547470092773, "beta_dpo/beta_margin_grad_std": 0.01935085654258728, "beta_dpo/beta_margin_mean": 0.01056720968335867, "beta_dpo/beta_margin_std": 0.07766212522983551, "beta_dpo/beta_used": 0.006070706993341446, "beta_dpo/beta_used_raw": -0.06264575570821762, "beta_dpo/gap_mean": 5.546548843383789, "beta_dpo/gap_std": 10.832958221435547, "beta_dpo/loss_margin_mean": 2.432939291000366, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.41874527588813304, "grad_norm": 2.5274527072906494, "learning_rate": 3.6219979505011555e-07, "logits/chosen": 1.2227504253387451, "logits/rejected": 1.2546868324279785, "loss": 1.3648, "step": 277 }, { "beta_dpo/beta": 0.02714432403445244, "beta_dpo/beta_margin_grad_mean": -0.46082329750061035, "beta_dpo/beta_margin_grad_std": 0.10387714952230453, "beta_dpo/beta_margin_mean": 0.17332723736763, "beta_dpo/beta_margin_std": 0.4567440152168274, "beta_dpo/beta_used": 0.02714432403445244, "beta_dpo/beta_used_raw": -0.023564567789435387, "beta_dpo/gap_mean": 5.024744987487793, "beta_dpo/gap_std": 10.663459777832031, "beta_dpo/loss_margin_mean": 3.9546263217926025, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.42025699168556313, "grad_norm": 16.09695053100586, "learning_rate": 3.6101665315144353e-07, "logits/chosen": 1.0592961311340332, "logits/rejected": 0.9930233955383301, "loss": 1.3156, "step": 278 }, { "beta_dpo/beta": 0.2835107147693634, "beta_dpo/beta_margin_grad_mean": -0.2560528814792633, "beta_dpo/beta_margin_grad_std": 0.3177822530269623, "beta_dpo/beta_margin_mean": 2.377708911895752, "beta_dpo/beta_margin_std": 3.3102450370788574, "beta_dpo/beta_used": 0.2835107147693634, "beta_dpo/beta_used_raw": 0.2835107147693634, "beta_dpo/gap_mean": 5.569781303405762, "beta_dpo/gap_std": 10.754709243774414, "beta_dpo/loss_margin_mean": 8.566849708557129, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4217687074829932, "grad_norm": 67.74427032470703, "learning_rate": 3.5983040587833563e-07, "logits/chosen": 1.1368944644927979, "logits/rejected": 1.1035034656524658, "loss": 0.8499, "step": 279 }, { "beta_dpo/beta": 0.11431509256362915, "beta_dpo/beta_margin_grad_mean": -0.3244727551937103, "beta_dpo/beta_margin_grad_std": 0.19820398092269897, "beta_dpo/beta_margin_mean": 0.9455391764640808, "beta_dpo/beta_margin_std": 1.1263186931610107, "beta_dpo/beta_used": 0.11431509256362915, "beta_dpo/beta_used_raw": 0.11431509256362915, "beta_dpo/gap_mean": 6.155077934265137, "beta_dpo/gap_std": 10.57052230834961, "beta_dpo/loss_margin_mean": 8.297757148742676, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.42328042328042326, "grad_norm": 22.813514709472656, "learning_rate": 3.586410864126781e-07, "logits/chosen": 1.3799774646759033, "logits/rejected": 1.3165000677108765, "loss": 0.9616, "step": 280 }, { "beta_dpo/beta": 0.10929633677005768, "beta_dpo/beta_margin_grad_mean": -0.37381666898727417, "beta_dpo/beta_margin_grad_std": 0.21858589351177216, "beta_dpo/beta_margin_mean": 0.6713542342185974, "beta_dpo/beta_margin_std": 1.1942230463027954, "beta_dpo/beta_used": 0.10929633677005768, "beta_dpo/beta_used_raw": 0.10929633677005768, "beta_dpo/gap_mean": 6.20918083190918, "beta_dpo/gap_std": 10.562750816345215, "beta_dpo/loss_margin_mean": 6.347442626953125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.42479213907785335, "grad_norm": 24.40322494506836, "learning_rate": 3.574487280222929e-07, "logits/chosen": 1.1293048858642578, "logits/rejected": 1.1514209508895874, "loss": 1.0101, "step": 281 }, { "beta_dpo/beta": 0.1020803228020668, "beta_dpo/beta_margin_grad_mean": -0.3907123804092407, "beta_dpo/beta_margin_grad_std": 0.23549912869930267, "beta_dpo/beta_margin_mean": 0.8850224614143372, "beta_dpo/beta_margin_std": 1.7950531244277954, "beta_dpo/beta_used": 0.1020803228020668, "beta_dpo/beta_used_raw": 0.1020803228020668, "beta_dpo/gap_mean": 6.277853488922119, "beta_dpo/gap_std": 10.586102485656738, "beta_dpo/loss_margin_mean": 6.8829665184021, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.42630385487528344, "grad_norm": 28.942813873291016, "learning_rate": 3.562533640600075e-07, "logits/chosen": 0.8785597085952759, "logits/rejected": 0.8606098890304565, "loss": 1.1122, "step": 282 }, { "beta_dpo/beta": 0.09000475704669952, "beta_dpo/beta_margin_grad_mean": -0.3931007385253906, "beta_dpo/beta_margin_grad_std": 0.207363560795784, "beta_dpo/beta_margin_mean": 0.5568903088569641, "beta_dpo/beta_margin_std": 1.1324700117111206, "beta_dpo/beta_used": 0.09000475704669952, "beta_dpo/beta_used_raw": 0.09000475704669952, "beta_dpo/gap_mean": 6.372895240783691, "beta_dpo/gap_std": 10.772232055664062, "beta_dpo/loss_margin_mean": 6.040248870849609, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.42781557067271353, "grad_norm": 19.173545837402344, "learning_rate": 3.550550279627215e-07, "logits/chosen": 1.3081157207489014, "logits/rejected": 1.1309893131256104, "loss": 1.0395, "step": 283 }, { "beta_dpo/beta": 0.09521377086639404, "beta_dpo/beta_margin_grad_mean": -0.38916173577308655, "beta_dpo/beta_margin_grad_std": 0.2197965532541275, "beta_dpo/beta_margin_mean": 0.722620964050293, "beta_dpo/beta_margin_std": 1.6002602577209473, "beta_dpo/beta_used": 0.09521377086639404, "beta_dpo/beta_used_raw": 0.09234096854925156, "beta_dpo/gap_mean": 6.439916610717773, "beta_dpo/gap_std": 10.795588493347168, "beta_dpo/loss_margin_mean": 6.818644046783447, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4293272864701436, "grad_norm": 28.033769607543945, "learning_rate": 3.5385375325047163e-07, "logits/chosen": 1.2364400625228882, "logits/rejected": 1.1874457597732544, "loss": 1.0682, "step": 284 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49894726276397705, "beta_dpo/beta_margin_grad_std": 0.0030077884439378977, "beta_dpo/beta_margin_mean": 0.0042111543007195, "beta_dpo/beta_margin_std": 0.01203157752752304, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.047950584441423416, "beta_dpo/gap_mean": 6.0694403648376465, "beta_dpo/gap_std": 10.893739700317383, "beta_dpo/loss_margin_mean": 4.211153984069824, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4308390022675737, "grad_norm": 0.2864086627960205, "learning_rate": 3.5264957352549375e-07, "logits/chosen": 1.297670841217041, "logits/rejected": 1.2642215490341187, "loss": 1.3827, "step": 285 }, { "beta_dpo/beta": 0.12104212492704391, "beta_dpo/beta_margin_grad_mean": -0.3920276463031769, "beta_dpo/beta_margin_grad_std": 0.2352607548236847, "beta_dpo/beta_margin_mean": 0.988191545009613, "beta_dpo/beta_margin_std": 2.156294584274292, "beta_dpo/beta_used": 0.12104212492704391, "beta_dpo/beta_used_raw": 0.11507139354944229, "beta_dpo/gap_mean": 6.1069135665893555, "beta_dpo/gap_std": 11.12326717376709, "beta_dpo/loss_margin_mean": 7.050894737243652, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4323507180650038, "grad_norm": 43.05253219604492, "learning_rate": 3.514425224712835e-07, "logits/chosen": 1.1157772541046143, "logits/rejected": 1.008533239364624, "loss": 1.0778, "step": 286 }, { "beta_dpo/beta": 0.11036910116672516, "beta_dpo/beta_margin_grad_mean": -0.3323964476585388, "beta_dpo/beta_margin_grad_std": 0.21155521273612976, "beta_dpo/beta_margin_mean": 1.0009580850601196, "beta_dpo/beta_margin_std": 1.3563151359558105, "beta_dpo/beta_used": 0.11036910116672516, "beta_dpo/beta_used_raw": 0.11036910116672516, "beta_dpo/gap_mean": 6.487942218780518, "beta_dpo/gap_std": 11.09727954864502, "beta_dpo/loss_margin_mean": 8.630319595336914, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.43386243386243384, "grad_norm": 27.29483985900879, "learning_rate": 3.502326338516534e-07, "logits/chosen": 1.207090973854065, "logits/rejected": 1.2322757244110107, "loss": 1.0011, "step": 287 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49865084886550903, "beta_dpo/beta_margin_grad_std": 0.002663425402715802, "beta_dpo/beta_margin_mean": 0.0053967977873981, "beta_dpo/beta_margin_std": 0.01065417006611824, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.03904159739613533, "beta_dpo/gap_mean": 6.5133233070373535, "beta_dpo/gap_std": 11.077014923095703, "beta_dpo/loss_margin_mean": 5.3967976570129395, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.43537414965986393, "grad_norm": 0.2951047122478485, "learning_rate": 3.490199415097892e-07, "logits/chosen": 1.0747828483581543, "logits/rejected": 1.0475144386291504, "loss": 1.3821, "step": 288 }, { "beta_dpo/beta": 0.09553598612546921, "beta_dpo/beta_margin_grad_mean": -0.3754231631755829, "beta_dpo/beta_margin_grad_std": 0.1994851529598236, "beta_dpo/beta_margin_mean": 0.7695130705833435, "beta_dpo/beta_margin_std": 1.2959647178649902, "beta_dpo/beta_used": 0.09553598612546921, "beta_dpo/beta_used_raw": 0.08732537180185318, "beta_dpo/gap_mean": 6.468659400939941, "beta_dpo/gap_std": 10.938737869262695, "beta_dpo/loss_margin_mean": 6.9596428871154785, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.436885865457294, "grad_norm": 19.465930938720703, "learning_rate": 3.4780447936730247e-07, "logits/chosen": 1.0801656246185303, "logits/rejected": 1.0307631492614746, "loss": 1.0189, "step": 289 }, { "beta_dpo/beta": 0.045102305710315704, "beta_dpo/beta_margin_grad_mean": -0.4342304766178131, "beta_dpo/beta_margin_grad_std": 0.1358710080385208, "beta_dpo/beta_margin_mean": 0.30848953127861023, "beta_dpo/beta_margin_std": 0.6513628363609314, "beta_dpo/beta_used": 0.045102305710315704, "beta_dpo/beta_used_raw": 0.045102305710315704, "beta_dpo/gap_mean": 6.521547317504883, "beta_dpo/gap_std": 10.709592819213867, "beta_dpo/loss_margin_mean": 5.988819599151611, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4383975812547241, "grad_norm": 13.944698333740234, "learning_rate": 3.465862814232821e-07, "logits/chosen": 1.4063677787780762, "logits/rejected": 1.392086386680603, "loss": 1.1994, "step": 290 }, { "beta_dpo/beta": 0.17210541665554047, "beta_dpo/beta_margin_grad_mean": -0.3703913986682892, "beta_dpo/beta_margin_grad_std": 0.2652936577796936, "beta_dpo/beta_margin_mean": 1.6986570358276367, "beta_dpo/beta_margin_std": 3.4891014099121094, "beta_dpo/beta_used": 0.17210541665554047, "beta_dpo/beta_used_raw": 0.16101905703544617, "beta_dpo/gap_mean": 6.368675231933594, "beta_dpo/gap_std": 10.600616455078125, "beta_dpo/loss_margin_mean": 6.906424522399902, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4399092970521542, "grad_norm": 42.380653381347656, "learning_rate": 3.4536538175334343e-07, "logits/chosen": 1.1900293827056885, "logits/rejected": 1.0464694499969482, "loss": 0.9982, "step": 291 }, { "beta_dpo/beta": 0.07080691307783127, "beta_dpo/beta_margin_grad_mean": -0.4200320839881897, "beta_dpo/beta_margin_grad_std": 0.19748757779598236, "beta_dpo/beta_margin_mean": 0.4610114097595215, "beta_dpo/beta_margin_std": 1.2586936950683594, "beta_dpo/beta_used": 0.07080691307783127, "beta_dpo/beta_used_raw": 0.05992849916219711, "beta_dpo/gap_mean": 6.459872245788574, "beta_dpo/gap_std": 10.87993335723877, "beta_dpo/loss_margin_mean": 5.627652645111084, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4414210128495843, "grad_norm": 18.856821060180664, "learning_rate": 3.4414181450867465e-07, "logits/chosen": 1.0667200088500977, "logits/rejected": 1.04791259765625, "loss": 1.1284, "step": 292 }, { "beta_dpo/beta": 0.1957043707370758, "beta_dpo/beta_margin_grad_mean": -0.29217982292175293, "beta_dpo/beta_margin_grad_std": 0.30992838740348816, "beta_dpo/beta_margin_mean": 1.8623554706573486, "beta_dpo/beta_margin_std": 2.75933575630188, "beta_dpo/beta_used": 0.1957043707370758, "beta_dpo/beta_used_raw": 0.1957043707370758, "beta_dpo/gap_mean": 6.795432090759277, "beta_dpo/gap_std": 11.390796661376953, "beta_dpo/loss_margin_mean": 9.698270797729492, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4429327286470144, "grad_norm": 63.56986618041992, "learning_rate": 3.4291561391508185e-07, "logits/chosen": 1.1134048700332642, "logits/rejected": 1.0620172023773193, "loss": 1.0551, "step": 293 }, { "beta_dpo/beta": 0.11528073251247406, "beta_dpo/beta_margin_grad_mean": -0.37978050112724304, "beta_dpo/beta_margin_grad_std": 0.20134668052196503, "beta_dpo/beta_margin_mean": 0.8404229283332825, "beta_dpo/beta_margin_std": 1.5732080936431885, "beta_dpo/beta_used": 0.11528073251247406, "beta_dpo/beta_used_raw": 0.11528073251247406, "beta_dpo/gap_mean": 6.880713939666748, "beta_dpo/gap_std": 11.266508102416992, "beta_dpo/loss_margin_mean": 5.479251384735107, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4444444444444444, "grad_norm": 25.94324493408203, "learning_rate": 3.4168681427203153e-07, "logits/chosen": 1.0178157091140747, "logits/rejected": 0.9835942387580872, "loss": 0.9298, "step": 294 }, { "beta_dpo/beta": 0.031608808785676956, "beta_dpo/beta_margin_grad_mean": -0.47087332606315613, "beta_dpo/beta_margin_grad_std": 0.11631444841623306, "beta_dpo/beta_margin_mean": 0.1305970549583435, "beta_dpo/beta_margin_std": 0.5425670742988586, "beta_dpo/beta_used": 0.031608808785676956, "beta_dpo/beta_used_raw": 0.025034988299012184, "beta_dpo/gap_mean": 6.435437202453613, "beta_dpo/gap_std": 11.3424654006958, "beta_dpo/loss_margin_mean": 4.683479309082031, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4459561602418745, "grad_norm": 11.254670143127441, "learning_rate": 3.4045544995169125e-07, "logits/chosen": 1.1830737590789795, "logits/rejected": 1.0612953901290894, "loss": 1.2497, "step": 295 }, { "beta_dpo/beta": 0.04573505371809006, "beta_dpo/beta_margin_grad_mean": -0.447557270526886, "beta_dpo/beta_margin_grad_std": 0.15784353017807007, "beta_dpo/beta_margin_mean": 0.2759431302547455, "beta_dpo/beta_margin_std": 0.8301151394844055, "beta_dpo/beta_used": 0.04573505371809006, "beta_dpo/beta_used_raw": 0.03850052133202553, "beta_dpo/gap_mean": 6.396709442138672, "beta_dpo/gap_std": 11.416792869567871, "beta_dpo/loss_margin_mean": 6.682880878448486, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4474678760393046, "grad_norm": 15.308691024780273, "learning_rate": 3.392215553979679e-07, "logits/chosen": 1.1425731182098389, "logits/rejected": 1.0387723445892334, "loss": 1.2773, "step": 296 }, { "beta_dpo/beta": 0.1036151647567749, "beta_dpo/beta_margin_grad_mean": -0.3551364839076996, "beta_dpo/beta_margin_grad_std": 0.1977175921201706, "beta_dpo/beta_margin_mean": 0.9006081223487854, "beta_dpo/beta_margin_std": 1.3864402770996094, "beta_dpo/beta_used": 0.1036151647567749, "beta_dpo/beta_used_raw": 0.1036151647567749, "beta_dpo/gap_mean": 6.470460891723633, "beta_dpo/gap_std": 11.380804061889648, "beta_dpo/loss_margin_mean": 7.736580848693848, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4489795918367347, "grad_norm": 24.121685028076172, "learning_rate": 3.3798516512554485e-07, "logits/chosen": 1.1213431358337402, "logits/rejected": 1.0656251907348633, "loss": 0.9927, "step": 297 }, { "beta_dpo/beta": 0.08515971899032593, "beta_dpo/beta_margin_grad_mean": -0.38800179958343506, "beta_dpo/beta_margin_grad_std": 0.2151874601840973, "beta_dpo/beta_margin_mean": 0.5751146078109741, "beta_dpo/beta_margin_std": 1.111681580543518, "beta_dpo/beta_used": 0.08515971899032593, "beta_dpo/beta_used_raw": 0.08515971899032593, "beta_dpo/gap_mean": 6.667867183685303, "beta_dpo/gap_std": 11.660847663879395, "beta_dpo/loss_margin_mean": 6.757538795471191, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4504913076341648, "grad_norm": 24.019479751586914, "learning_rate": 3.367463137189156e-07, "logits/chosen": 1.1825034618377686, "logits/rejected": 1.1736652851104736, "loss": 1.039, "step": 298 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4987265467643738, "beta_dpo/beta_margin_grad_std": 0.0030703565571457148, "beta_dpo/beta_margin_mean": 0.005094076506793499, "beta_dpo/beta_margin_std": 0.012282279320061207, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.07316959649324417, "beta_dpo/gap_mean": 6.437760353088379, "beta_dpo/gap_std": 11.706324577331543, "beta_dpo/loss_margin_mean": 5.094076156616211, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4520030234315949, "grad_norm": 0.3018997311592102, "learning_rate": 3.355050358314172e-07, "logits/chosen": 1.1841320991516113, "logits/rejected": 1.1034014225006104, "loss": 1.3828, "step": 299 }, { "beta_dpo/beta": 0.09698092192411423, "beta_dpo/beta_margin_grad_mean": -0.415668785572052, "beta_dpo/beta_margin_grad_std": 0.25501230359077454, "beta_dpo/beta_margin_mean": 0.5185216069221497, "beta_dpo/beta_margin_std": 1.8599530458450317, "beta_dpo/beta_used": 0.09698092192411423, "beta_dpo/beta_used_raw": 0.09698092192411423, "beta_dpo/gap_mean": 6.1991801261901855, "beta_dpo/gap_std": 12.077482223510742, "beta_dpo/loss_margin_mean": 5.203822135925293, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.45351473922902497, "grad_norm": 37.997589111328125, "learning_rate": 3.3426136618426043e-07, "logits/chosen": 1.1969237327575684, "logits/rejected": 1.0952715873718262, "loss": 1.2007, "step": 300 }, { "epoch": 0.45351473922902497, "eval_beta_dpo/beta": 0.1345755159854889, "eval_beta_dpo/beta_margin_grad_mean": -0.3695407509803772, "eval_beta_dpo/beta_margin_grad_std": 0.21694542467594147, "eval_beta_dpo/beta_margin_mean": 1.0587594509124756, "eval_beta_dpo/beta_margin_std": 1.7069097757339478, "eval_beta_dpo/beta_used": 0.1345755159854889, "eval_beta_dpo/beta_used_raw": 0.12730880081653595, "eval_beta_dpo/gap_mean": 6.158328533172607, "eval_beta_dpo/gap_std": 12.294206619262695, "eval_beta_dpo/loss_margin_mean": 6.6134748458862305, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": 1.2326774597167969, "eval_logits/rejected": 1.1646326780319214, "eval_loss": 0.6114901900291443, "eval_runtime": 43.5861, "eval_samples_per_second": 52.838, "eval_steps_per_second": 1.652, "step": 300 }, { "beta_dpo/beta": 0.056266117841005325, "beta_dpo/beta_margin_grad_mean": -0.45459234714508057, "beta_dpo/beta_margin_grad_std": 0.18454816937446594, "beta_dpo/beta_margin_mean": 0.26655083894729614, "beta_dpo/beta_margin_std": 0.9855477213859558, "beta_dpo/beta_used": 0.056266117841005325, "beta_dpo/beta_used_raw": 0.05165494605898857, "beta_dpo/gap_mean": 6.26155948638916, "beta_dpo/gap_std": 12.236114501953125, "beta_dpo/loss_margin_mean": 6.358150005340576, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.455026455026455, "grad_norm": 19.36774253845215, "learning_rate": 3.3301533956555885e-07, "logits/chosen": 1.201526165008545, "logits/rejected": 1.1702396869659424, "loss": 1.2374, "step": 301 }, { "beta_dpo/beta": 0.021665379405021667, "beta_dpo/beta_margin_grad_mean": -0.4781515300273895, "beta_dpo/beta_margin_grad_std": 0.09745253622531891, "beta_dpo/beta_margin_mean": 0.09527567774057388, "beta_dpo/beta_margin_std": 0.4254538416862488, "beta_dpo/beta_used": 0.021665379405021667, "beta_dpo/beta_used_raw": 0.00011988542973995209, "beta_dpo/gap_mean": 5.8745622634887695, "beta_dpo/gap_std": 12.359971046447754, "beta_dpo/loss_margin_mean": 3.919048547744751, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4565381708238851, "grad_norm": 8.723976135253906, "learning_rate": 3.317669908293554e-07, "logits/chosen": 1.0065600872039795, "logits/rejected": 0.979686975479126, "loss": 1.3005, "step": 302 }, { "beta_dpo/beta": 0.082459956407547, "beta_dpo/beta_margin_grad_mean": -0.39630061388015747, "beta_dpo/beta_margin_grad_std": 0.22607973217964172, "beta_dpo/beta_margin_mean": 0.7640535831451416, "beta_dpo/beta_margin_std": 1.684174656867981, "beta_dpo/beta_used": 0.082459956407547, "beta_dpo/beta_used_raw": 0.082459956407547, "beta_dpo/gap_mean": 6.1549224853515625, "beta_dpo/gap_std": 12.555089950561523, "beta_dpo/loss_margin_mean": 8.189640045166016, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4580498866213152, "grad_norm": 30.123430252075195, "learning_rate": 3.3051635489464793e-07, "logits/chosen": 1.1421207189559937, "logits/rejected": 1.1050410270690918, "loss": 1.2195, "step": 303 }, { "beta_dpo/beta": 0.04812777787446976, "beta_dpo/beta_margin_grad_mean": -0.4235919415950775, "beta_dpo/beta_margin_grad_std": 0.12599574029445648, "beta_dpo/beta_margin_mean": 0.34446337819099426, "beta_dpo/beta_margin_std": 0.5999777317047119, "beta_dpo/beta_used": 0.04812777787446976, "beta_dpo/beta_used_raw": 0.04812777787446976, "beta_dpo/gap_mean": 6.333561897277832, "beta_dpo/gap_std": 12.268714904785156, "beta_dpo/loss_margin_mean": 7.1425042152404785, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4595616024187453, "grad_norm": 13.932650566101074, "learning_rate": 3.292634667444117e-07, "logits/chosen": 1.1120836734771729, "logits/rejected": 1.0443115234375, "loss": 1.1942, "step": 304 }, { "beta_dpo/beta": 0.20659758150577545, "beta_dpo/beta_margin_grad_mean": -0.3449561595916748, "beta_dpo/beta_margin_grad_std": 0.32629552483558655, "beta_dpo/beta_margin_mean": 1.5746331214904785, "beta_dpo/beta_margin_std": 3.07080078125, "beta_dpo/beta_used": 0.20659758150577545, "beta_dpo/beta_used_raw": 0.20659758150577545, "beta_dpo/gap_mean": 6.55972957611084, "beta_dpo/gap_std": 12.377994537353516, "beta_dpo/loss_margin_mean": 7.0496368408203125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.46107331821617537, "grad_norm": 51.580142974853516, "learning_rate": 3.280083614246217e-07, "logits/chosen": 1.274596929550171, "logits/rejected": 1.3232698440551758, "loss": 1.0202, "step": 305 }, { "beta_dpo/beta": 0.06457066535949707, "beta_dpo/beta_margin_grad_mean": -0.4128532111644745, "beta_dpo/beta_margin_grad_std": 0.18687398731708527, "beta_dpo/beta_margin_mean": 0.552842915058136, "beta_dpo/beta_margin_std": 1.2234153747558594, "beta_dpo/beta_used": 0.06457066535949707, "beta_dpo/beta_used_raw": 0.036076560616493225, "beta_dpo/gap_mean": 6.60538387298584, "beta_dpo/gap_std": 12.542646408081055, "beta_dpo/loss_margin_mean": 6.732542514801025, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.46258503401360546, "grad_norm": 20.216312408447266, "learning_rate": 3.267510740432719e-07, "logits/chosen": 1.3599079847335815, "logits/rejected": 1.2298367023468018, "loss": 1.1646, "step": 306 }, { "beta_dpo/beta": 0.06196129694581032, "beta_dpo/beta_margin_grad_mean": -0.4338819980621338, "beta_dpo/beta_margin_grad_std": 0.19024796783924103, "beta_dpo/beta_margin_mean": 0.370510071516037, "beta_dpo/beta_margin_std": 1.0366507768630981, "beta_dpo/beta_used": 0.06196129694581032, "beta_dpo/beta_used_raw": 0.06196129694581032, "beta_dpo/gap_mean": 6.228489875793457, "beta_dpo/gap_std": 12.6326904296875, "beta_dpo/loss_margin_mean": 4.963455677032471, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.46409674981103555, "grad_norm": 19.9281063079834, "learning_rate": 3.2549163976939285e-07, "logits/chosen": 1.2666585445404053, "logits/rejected": 1.1813685894012451, "loss": 1.1922, "step": 307 }, { "beta_dpo/beta": 0.09165829420089722, "beta_dpo/beta_margin_grad_mean": -0.4129888415336609, "beta_dpo/beta_margin_grad_std": 0.2287939041852951, "beta_dpo/beta_margin_mean": 0.7246976494789124, "beta_dpo/beta_margin_std": 1.8345637321472168, "beta_dpo/beta_used": 0.09165829420089722, "beta_dpo/beta_used_raw": 0.059608783572912216, "beta_dpo/gap_mean": 6.315195560455322, "beta_dpo/gap_std": 12.767260551452637, "beta_dpo/loss_margin_mean": 6.2924628257751465, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4656084656084656, "grad_norm": 42.03661346435547, "learning_rate": 3.2423009383206874e-07, "logits/chosen": 0.9810805320739746, "logits/rejected": 1.040271520614624, "loss": 1.2639, "step": 308 }, { "beta_dpo/beta": 0.10874129831790924, "beta_dpo/beta_margin_grad_mean": -0.3518926501274109, "beta_dpo/beta_margin_grad_std": 0.20076824724674225, "beta_dpo/beta_margin_mean": 0.9579285979270935, "beta_dpo/beta_margin_std": 1.5040465593338013, "beta_dpo/beta_used": 0.10874129831790924, "beta_dpo/beta_used_raw": 0.10874129831790924, "beta_dpo/gap_mean": 6.503994941711426, "beta_dpo/gap_std": 12.441705703735352, "beta_dpo/loss_margin_mean": 7.363459587097168, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4671201814058957, "grad_norm": 27.765520095825195, "learning_rate": 3.229664715194511e-07, "logits/chosen": 1.1616370677947998, "logits/rejected": 1.129991054534912, "loss": 1.0048, "step": 309 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49967724084854126, "beta_dpo/beta_margin_grad_std": 0.0030824244022369385, "beta_dpo/beta_margin_mean": 0.0012910891091451049, "beta_dpo/beta_margin_std": 0.012330072931945324, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.07865162193775177, "beta_dpo/gap_mean": 5.778951644897461, "beta_dpo/gap_std": 12.322010040283203, "beta_dpo/loss_margin_mean": 1.2910891771316528, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.46863189720332576, "grad_norm": 0.3216012120246887, "learning_rate": 3.2170080817777257e-07, "logits/chosen": 1.2230892181396484, "logits/rejected": 1.218454122543335, "loss": 1.3835, "step": 310 }, { "beta_dpo/beta": 0.13774374127388, "beta_dpo/beta_margin_grad_mean": -0.3994937837123871, "beta_dpo/beta_margin_grad_std": 0.2569511830806732, "beta_dpo/beta_margin_mean": 1.1233173608779907, "beta_dpo/beta_margin_std": 2.8929855823516846, "beta_dpo/beta_used": 0.13774374127388, "beta_dpo/beta_used_raw": 0.10212471336126328, "beta_dpo/gap_mean": 5.679272651672363, "beta_dpo/gap_std": 12.579723358154297, "beta_dpo/loss_margin_mean": 6.761931896209717, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.47014361300075586, "grad_norm": 40.673583984375, "learning_rate": 3.204331392103574e-07, "logits/chosen": 1.2187139987945557, "logits/rejected": 1.0266274213790894, "loss": 1.1275, "step": 311 }, { "beta_dpo/beta": 0.08971817046403885, "beta_dpo/beta_margin_grad_mean": -0.39458373188972473, "beta_dpo/beta_margin_grad_std": 0.20151464641094208, "beta_dpo/beta_margin_mean": 0.6224772334098816, "beta_dpo/beta_margin_std": 1.3102822303771973, "beta_dpo/beta_used": 0.08971817046403885, "beta_dpo/beta_used_raw": 0.08971817046403885, "beta_dpo/gap_mean": 5.863433837890625, "beta_dpo/gap_std": 12.588981628417969, "beta_dpo/loss_margin_mean": 7.15087366104126, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.47165532879818595, "grad_norm": 22.64099884033203, "learning_rate": 3.1916350007663176e-07, "logits/chosen": 1.1743152141571045, "logits/rejected": 1.1165026426315308, "loss": 1.1265, "step": 312 }, { "beta_dpo/beta": 0.12445323914289474, "beta_dpo/beta_margin_grad_mean": -0.38570505380630493, "beta_dpo/beta_margin_grad_std": 0.30296802520751953, "beta_dpo/beta_margin_mean": 0.7150858640670776, "beta_dpo/beta_margin_std": 1.8413292169570923, "beta_dpo/beta_used": 0.12445323914289474, "beta_dpo/beta_used_raw": 0.12445323914289474, "beta_dpo/gap_mean": 5.904024124145508, "beta_dpo/gap_std": 12.867403984069824, "beta_dpo/loss_margin_mean": 5.75889778137207, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.47316704459561604, "grad_norm": 32.521026611328125, "learning_rate": 3.178919262911314e-07, "logits/chosen": 1.4500136375427246, "logits/rejected": 1.4065792560577393, "loss": 1.1793, "step": 313 }, { "beta_dpo/beta": 0.16136297583580017, "beta_dpo/beta_margin_grad_mean": -0.3725585341453552, "beta_dpo/beta_margin_grad_std": 0.26283350586891174, "beta_dpo/beta_margin_mean": 1.403192162513733, "beta_dpo/beta_margin_std": 2.998973846435547, "beta_dpo/beta_used": 0.16136297583580017, "beta_dpo/beta_used_raw": 0.16136297583580017, "beta_dpo/gap_mean": 6.142127990722656, "beta_dpo/gap_std": 13.036579132080078, "beta_dpo/loss_margin_mean": 6.873254299163818, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.47467876039304613, "grad_norm": 47.72408676147461, "learning_rate": 3.166184534225087e-07, "logits/chosen": 1.1128228902816772, "logits/rejected": 1.1113231182098389, "loss": 1.079, "step": 314 }, { "beta_dpo/beta": 0.050388358533382416, "beta_dpo/beta_margin_grad_mean": -0.41331613063812256, "beta_dpo/beta_margin_grad_std": 0.13312646746635437, "beta_dpo/beta_margin_mean": 0.4088205397129059, "beta_dpo/beta_margin_std": 0.6722843647003174, "beta_dpo/beta_used": 0.050388358533382416, "beta_dpo/beta_used_raw": 0.050388358533382416, "beta_dpo/gap_mean": 6.070713043212891, "beta_dpo/gap_std": 12.985635757446289, "beta_dpo/loss_margin_mean": 6.740433216094971, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.47619047619047616, "grad_norm": 15.891672134399414, "learning_rate": 3.1534311709253723e-07, "logits/chosen": 1.2505284547805786, "logits/rejected": 1.2232093811035156, "loss": 1.1804, "step": 315 }, { "beta_dpo/beta": 0.14638181030750275, "beta_dpo/beta_margin_grad_mean": -0.3598828613758087, "beta_dpo/beta_margin_grad_std": 0.2745656967163086, "beta_dpo/beta_margin_mean": 1.0968278646469116, "beta_dpo/beta_margin_std": 2.148019552230835, "beta_dpo/beta_used": 0.14638181030750275, "beta_dpo/beta_used_raw": 0.14638181030750275, "beta_dpo/gap_mean": 6.046994686126709, "beta_dpo/gap_std": 12.95700740814209, "beta_dpo/loss_margin_mean": 6.180335998535156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.47770219198790626, "grad_norm": 37.0686149597168, "learning_rate": 3.1406595297511564e-07, "logits/chosen": 1.2918689250946045, "logits/rejected": 1.1598542928695679, "loss": 0.9616, "step": 316 }, { "beta_dpo/beta": 0.13256600499153137, "beta_dpo/beta_margin_grad_mean": -0.37093499302864075, "beta_dpo/beta_margin_grad_std": 0.2640308141708374, "beta_dpo/beta_margin_mean": 1.5134227275848389, "beta_dpo/beta_margin_std": 2.976809024810791, "beta_dpo/beta_used": 0.13256600499153137, "beta_dpo/beta_used_raw": 0.13214673101902008, "beta_dpo/gap_mean": 6.867618083953857, "beta_dpo/gap_std": 13.056652069091797, "beta_dpo/loss_margin_mean": 10.035300254821777, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.47921390778533635, "grad_norm": 54.22406768798828, "learning_rate": 3.1278699679526975e-07, "logits/chosen": 1.369170069694519, "logits/rejected": 1.328667163848877, "loss": 1.3319, "step": 317 }, { "beta_dpo/beta": 0.05628693476319313, "beta_dpo/beta_margin_grad_mean": -0.46722689270973206, "beta_dpo/beta_margin_grad_std": 0.2057475745677948, "beta_dpo/beta_margin_mean": 0.20529808104038239, "beta_dpo/beta_margin_std": 1.1580020189285278, "beta_dpo/beta_used": 0.05628693476319313, "beta_dpo/beta_used_raw": 0.0030015483498573303, "beta_dpo/gap_mean": 6.78084659576416, "beta_dpo/gap_std": 13.355363845825195, "beta_dpo/loss_margin_mean": 5.220831871032715, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.48072562358276644, "grad_norm": 22.165693283081055, "learning_rate": 3.1150628432815336e-07, "logits/chosen": 1.3102058172225952, "logits/rejected": 1.2508630752563477, "loss": 1.2437, "step": 318 }, { "beta_dpo/beta": 0.03552532196044922, "beta_dpo/beta_margin_grad_mean": -0.46162641048431396, "beta_dpo/beta_margin_grad_std": 0.13534243404865265, "beta_dpo/beta_margin_mean": 0.19126252830028534, "beta_dpo/beta_margin_std": 0.6686474680900574, "beta_dpo/beta_used": 0.03552532196044922, "beta_dpo/beta_used_raw": 0.011105714365839958, "beta_dpo/gap_mean": 6.4551591873168945, "beta_dpo/gap_std": 13.360182762145996, "beta_dpo/loss_margin_mean": 5.578036785125732, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.48223733938019653, "grad_norm": 15.060959815979004, "learning_rate": 3.1022385139804707e-07, "logits/chosen": 0.8403013944625854, "logits/rejected": 0.8464080691337585, "loss": 1.252, "step": 319 }, { "beta_dpo/beta": 0.07511113584041595, "beta_dpo/beta_margin_grad_mean": -0.4390046298503876, "beta_dpo/beta_margin_grad_std": 0.22036148607730865, "beta_dpo/beta_margin_mean": 0.39557981491088867, "beta_dpo/beta_margin_std": 1.5242847204208374, "beta_dpo/beta_used": 0.07511113584041595, "beta_dpo/beta_used_raw": -0.0021282732486724854, "beta_dpo/gap_mean": 6.0225372314453125, "beta_dpo/gap_std": 13.459592819213867, "beta_dpo/loss_margin_mean": 4.060967922210693, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4837490551776266, "grad_norm": 24.02452850341797, "learning_rate": 3.0893973387735683e-07, "logits/chosen": 0.9690076112747192, "logits/rejected": 0.8976876139640808, "loss": 1.2212, "step": 320 }, { "beta_dpo/beta": 0.15998658537864685, "beta_dpo/beta_margin_grad_mean": -0.3318832218647003, "beta_dpo/beta_margin_grad_std": 0.23508228361606598, "beta_dpo/beta_margin_mean": 1.7446743249893188, "beta_dpo/beta_margin_std": 2.777869701385498, "beta_dpo/beta_used": 0.15998658537864685, "beta_dpo/beta_used_raw": 0.15998658537864685, "beta_dpo/gap_mean": 6.446950912475586, "beta_dpo/gap_std": 13.321550369262695, "beta_dpo/loss_margin_mean": 8.086103439331055, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4852607709750567, "grad_norm": 34.035491943359375, "learning_rate": 3.0765396768561004e-07, "logits/chosen": 1.2763044834136963, "logits/rejected": 1.2507541179656982, "loss": 0.9461, "step": 321 }, { "beta_dpo/beta": 0.2635050117969513, "beta_dpo/beta_margin_grad_mean": -0.23470216989517212, "beta_dpo/beta_margin_grad_std": 0.27463462948799133, "beta_dpo/beta_margin_mean": 2.7218821048736572, "beta_dpo/beta_margin_std": 3.420943021774292, "beta_dpo/beta_used": 0.2635050117969513, "beta_dpo/beta_used_raw": 0.2635050117969513, "beta_dpo/gap_mean": 6.802641868591309, "beta_dpo/gap_std": 13.241533279418945, "beta_dpo/loss_margin_mean": 9.942191123962402, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.48677248677248675, "grad_norm": 42.902835845947266, "learning_rate": 3.063665887884511e-07, "logits/chosen": 1.5488101243972778, "logits/rejected": 1.4433493614196777, "loss": 0.7142, "step": 322 }, { "beta_dpo/beta": 0.0697922334074974, "beta_dpo/beta_margin_grad_mean": -0.4143967628479004, "beta_dpo/beta_margin_grad_std": 0.2243691384792328, "beta_dpo/beta_margin_mean": 0.6587510108947754, "beta_dpo/beta_margin_std": 1.602243423461914, "beta_dpo/beta_used": 0.0697922334074974, "beta_dpo/beta_used_raw": 0.0697922334074974, "beta_dpo/gap_mean": 6.970813751220703, "beta_dpo/gap_std": 13.35682201385498, "beta_dpo/loss_margin_mean": 7.288545608520508, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.48828420256991684, "grad_norm": 25.457849502563477, "learning_rate": 3.0507763319663517e-07, "logits/chosen": 1.28651762008667, "logits/rejected": 1.2437783479690552, "loss": 1.22, "step": 323 }, { "beta_dpo/beta": 0.14148634672164917, "beta_dpo/beta_margin_grad_mean": -0.36526909470558167, "beta_dpo/beta_margin_grad_std": 0.24114990234375, "beta_dpo/beta_margin_mean": 1.0672969818115234, "beta_dpo/beta_margin_std": 1.9926927089691162, "beta_dpo/beta_used": 0.14148634672164917, "beta_dpo/beta_used_raw": 0.14148634672164917, "beta_dpo/gap_mean": 7.207144737243652, "beta_dpo/gap_std": 13.275566101074219, "beta_dpo/loss_margin_mean": 7.679068088531494, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4897959183673469, "grad_norm": 32.15392303466797, "learning_rate": 3.0378713696502097e-07, "logits/chosen": 1.0783584117889404, "logits/rejected": 0.9778432250022888, "loss": 0.9654, "step": 324 }, { "beta_dpo/beta": 0.0674436017870903, "beta_dpo/beta_margin_grad_mean": -0.4117467999458313, "beta_dpo/beta_margin_grad_std": 0.18462489545345306, "beta_dpo/beta_margin_mean": 0.5190098881721497, "beta_dpo/beta_margin_std": 1.1525095701217651, "beta_dpo/beta_used": 0.0674436017870903, "beta_dpo/beta_used_raw": 0.0674436017870903, "beta_dpo/gap_mean": 7.158202171325684, "beta_dpo/gap_std": 12.836915969848633, "beta_dpo/loss_margin_mean": 6.354362487792969, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.491307634164777, "grad_norm": 21.140745162963867, "learning_rate": 3.0249513619156206e-07, "logits/chosen": 1.1736620664596558, "logits/rejected": 1.0785242319107056, "loss": 1.1232, "step": 325 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49881821870803833, "beta_dpo/beta_margin_grad_std": 0.0027975537814199924, "beta_dpo/beta_margin_mean": 0.004727249499410391, "beta_dpo/beta_margin_std": 0.011190598830580711, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.03214829042553902, "beta_dpo/gap_mean": 6.748882293701172, "beta_dpo/gap_std": 12.50639533996582, "beta_dpo/loss_margin_mean": 4.7272491455078125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4928193499622071, "grad_norm": 0.33258065581321716, "learning_rate": 3.012016670162977e-07, "logits/chosen": 1.0217170715332031, "logits/rejected": 1.0124512910842896, "loss": 1.3818, "step": 326 }, { "beta_dpo/beta": 0.03682481870055199, "beta_dpo/beta_margin_grad_mean": -0.46519556641578674, "beta_dpo/beta_margin_grad_std": 0.12718036770820618, "beta_dpo/beta_margin_mean": 0.15980318188667297, "beta_dpo/beta_margin_std": 0.6078633666038513, "beta_dpo/beta_used": 0.03682481870055199, "beta_dpo/beta_used_raw": 0.03682481870055199, "beta_dpo/gap_mean": 6.296779632568359, "beta_dpo/gap_std": 12.454865455627441, "beta_dpo/loss_margin_mean": 4.670220851898193, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4943310657596372, "grad_norm": 12.803543090820312, "learning_rate": 2.99906765620341e-07, "logits/chosen": 1.0091784000396729, "logits/rejected": 0.9715848565101624, "loss": 1.2383, "step": 327 }, { "beta_dpo/beta": 0.06305188685655594, "beta_dpo/beta_margin_grad_mean": -0.4316686987876892, "beta_dpo/beta_margin_grad_std": 0.1629015952348709, "beta_dpo/beta_margin_mean": 0.3258446753025055, "beta_dpo/beta_margin_std": 0.7818768620491028, "beta_dpo/beta_used": 0.06305188685655594, "beta_dpo/beta_used_raw": 0.06305188685655594, "beta_dpo/gap_mean": 6.0933403968811035, "beta_dpo/gap_std": 12.407817840576172, "beta_dpo/loss_margin_mean": 4.723147869110107, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4958427815570673, "grad_norm": 14.375814437866211, "learning_rate": 2.9861046822486766e-07, "logits/chosen": 1.2935378551483154, "logits/rejected": 1.2577247619628906, "loss": 1.1574, "step": 328 }, { "beta_dpo/beta": 0.0841306746006012, "beta_dpo/beta_margin_grad_mean": -0.4047718942165375, "beta_dpo/beta_margin_grad_std": 0.2134350687265396, "beta_dpo/beta_margin_mean": 0.5284138917922974, "beta_dpo/beta_margin_std": 1.1749253273010254, "beta_dpo/beta_used": 0.0841306746006012, "beta_dpo/beta_used_raw": 0.0841306746006012, "beta_dpo/gap_mean": 5.981316089630127, "beta_dpo/gap_std": 12.62000846862793, "beta_dpo/loss_margin_mean": 6.10738468170166, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4973544973544973, "grad_norm": 21.042234420776367, "learning_rate": 2.9731281109010253e-07, "logits/chosen": 1.3784832954406738, "logits/rejected": 1.313535451889038, "loss": 1.1062, "step": 329 }, { "beta_dpo/beta": 0.1372985541820526, "beta_dpo/beta_margin_grad_mean": -0.3568761646747589, "beta_dpo/beta_margin_grad_std": 0.2465440183877945, "beta_dpo/beta_margin_mean": 1.5668509006500244, "beta_dpo/beta_margin_std": 2.8006210327148438, "beta_dpo/beta_used": 0.1372985541820526, "beta_dpo/beta_used_raw": 0.08406403660774231, "beta_dpo/gap_mean": 6.211638450622559, "beta_dpo/gap_std": 12.863258361816406, "beta_dpo/loss_margin_mean": 8.602145195007324, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.4988662131519274, "grad_norm": 36.83658981323242, "learning_rate": 2.9601383051430505e-07, "logits/chosen": 1.0632227659225464, "logits/rejected": 1.0073779821395874, "loss": 1.0342, "step": 330 }, { "beta_dpo/beta": 0.1793161928653717, "beta_dpo/beta_margin_grad_mean": -0.3007383942604065, "beta_dpo/beta_margin_grad_std": 0.26930364966392517, "beta_dpo/beta_margin_mean": 1.5797462463378906, "beta_dpo/beta_margin_std": 2.476447820663452, "beta_dpo/beta_used": 0.1793161928653717, "beta_dpo/beta_used_raw": 0.1793161928653717, "beta_dpo/gap_mean": 6.820796012878418, "beta_dpo/gap_std": 12.850607872009277, "beta_dpo/loss_margin_mean": 8.462237358093262, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5003779289493575, "grad_norm": 37.06329345703125, "learning_rate": 2.947135628327544e-07, "logits/chosen": 1.453250527381897, "logits/rejected": 1.436427354812622, "loss": 0.8528, "step": 331 }, { "beta_dpo/beta": 0.015641553327441216, "beta_dpo/beta_margin_grad_mean": -0.46394628286361694, "beta_dpo/beta_margin_grad_std": 0.06855617463588715, "beta_dpo/beta_margin_mean": 0.15044867992401123, "beta_dpo/beta_margin_std": 0.2898610234260559, "beta_dpo/beta_used": 0.015641553327441216, "beta_dpo/beta_used_raw": 0.015498769469559193, "beta_dpo/gap_mean": 6.893322944641113, "beta_dpo/gap_std": 12.798620223999023, "beta_dpo/loss_margin_mean": 7.758445739746094, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5018896447467877, "grad_norm": 5.801995277404785, "learning_rate": 2.934120444167326e-07, "logits/chosen": 1.297292947769165, "logits/rejected": 1.2258504629135132, "loss": 1.3071, "step": 332 }, { "beta_dpo/beta": 0.0764099657535553, "beta_dpo/beta_margin_grad_mean": -0.3789524734020233, "beta_dpo/beta_margin_grad_std": 0.16656707227230072, "beta_dpo/beta_margin_mean": 0.6131932735443115, "beta_dpo/beta_margin_std": 0.9245501756668091, "beta_dpo/beta_used": 0.0764099657535553, "beta_dpo/beta_used_raw": 0.0764099657535553, "beta_dpo/gap_mean": 7.18435525894165, "beta_dpo/gap_std": 12.589004516601562, "beta_dpo/loss_margin_mean": 8.35302734375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5034013605442177, "grad_norm": 20.38193130493164, "learning_rate": 2.921093116725076e-07, "logits/chosen": 1.111952304840088, "logits/rejected": 1.0473523139953613, "loss": 1.0434, "step": 333 }, { "beta_dpo/beta": 0.052270177751779556, "beta_dpo/beta_margin_grad_mean": -0.4547905921936035, "beta_dpo/beta_margin_grad_std": 0.18179713189601898, "beta_dpo/beta_margin_mean": 0.25288447737693787, "beta_dpo/beta_margin_std": 1.0719202756881714, "beta_dpo/beta_used": 0.052270177751779556, "beta_dpo/beta_used_raw": -0.021642889827489853, "beta_dpo/gap_mean": 6.781639099121094, "beta_dpo/gap_std": 12.995382308959961, "beta_dpo/loss_margin_mean": 3.978499412536621, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5049130763416477, "grad_norm": 16.100820541381836, "learning_rate": 2.9080540104031484e-07, "logits/chosen": 1.1373687982559204, "logits/rejected": 1.0688490867614746, "loss": 1.1891, "step": 334 }, { "beta_dpo/beta": 0.07851317524909973, "beta_dpo/beta_margin_grad_mean": -0.41749730706214905, "beta_dpo/beta_margin_grad_std": 0.22520457208156586, "beta_dpo/beta_margin_mean": 0.4524867534637451, "beta_dpo/beta_margin_std": 1.195827603340149, "beta_dpo/beta_used": 0.07851317524909973, "beta_dpo/beta_used_raw": 0.07851317524909973, "beta_dpo/gap_mean": 6.497610569000244, "beta_dpo/gap_std": 13.426082611083984, "beta_dpo/loss_margin_mean": 5.86445951461792, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5064247921390779, "grad_norm": 22.561229705810547, "learning_rate": 2.895003489933375e-07, "logits/chosen": 1.066937804222107, "logits/rejected": 1.034178376197815, "loss": 1.1779, "step": 335 }, { "beta_dpo/beta": 0.005117136985063553, "beta_dpo/beta_margin_grad_mean": -0.48917579650878906, "beta_dpo/beta_margin_grad_std": 0.021274510771036148, "beta_dpo/beta_margin_mean": 0.043470632284879684, "beta_dpo/beta_margin_std": 0.08556756377220154, "beta_dpo/beta_used": 0.005117136985063553, "beta_dpo/beta_used_raw": -0.002717310097068548, "beta_dpo/gap_mean": 6.687017440795898, "beta_dpo/gap_std": 13.347187042236328, "beta_dpo/loss_margin_mean": 6.967475891113281, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5079365079365079, "grad_norm": 1.848929762840271, "learning_rate": 2.8819419203668675e-07, "logits/chosen": 0.8256216049194336, "logits/rejected": 0.8187216520309448, "loss": 1.3607, "step": 336 }, { "beta_dpo/beta": 0.05330893397331238, "beta_dpo/beta_margin_grad_mean": -0.43421801924705505, "beta_dpo/beta_margin_grad_std": 0.15648889541625977, "beta_dpo/beta_margin_mean": 0.3032914996147156, "beta_dpo/beta_margin_std": 0.7241752743721008, "beta_dpo/beta_used": 0.05330893397331238, "beta_dpo/beta_used_raw": 0.05330893397331238, "beta_dpo/gap_mean": 6.46940279006958, "beta_dpo/gap_std": 13.37681770324707, "beta_dpo/loss_margin_mean": 5.691498756408691, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.509448223733938, "grad_norm": 14.02002239227295, "learning_rate": 2.8688696670638053e-07, "logits/chosen": 1.020787000656128, "logits/rejected": 0.9443519115447998, "loss": 1.191, "step": 337 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4988920986652374, "beta_dpo/beta_margin_grad_std": 0.0028042481280863285, "beta_dpo/beta_margin_mean": 0.004431764129549265, "beta_dpo/beta_margin_std": 0.011217460036277771, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.023727454245090485, "beta_dpo/gap_mean": 6.0999603271484375, "beta_dpo/gap_std": 13.048229217529297, "beta_dpo/loss_margin_mean": 4.431764125823975, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5109599395313681, "grad_norm": 0.28760549426078796, "learning_rate": 2.8557870956832133e-07, "logits/chosen": 1.1092530488967896, "logits/rejected": 1.0787663459777832, "loss": 1.3823, "step": 338 }, { "beta_dpo/beta": 0.14106054604053497, "beta_dpo/beta_margin_grad_mean": -0.33038145303726196, "beta_dpo/beta_margin_grad_std": 0.26955631375312805, "beta_dpo/beta_margin_mean": 1.0998313426971436, "beta_dpo/beta_margin_std": 1.7985961437225342, "beta_dpo/beta_used": 0.14106054604053497, "beta_dpo/beta_used_raw": 0.14106054604053497, "beta_dpo/gap_mean": 6.327607154846191, "beta_dpo/gap_std": 12.90283489227295, "beta_dpo/loss_margin_mean": 7.851966857910156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5124716553287982, "grad_norm": 40.547515869140625, "learning_rate": 2.842694572172736e-07, "logits/chosen": 1.076251745223999, "logits/rejected": 0.9694498777389526, "loss": 1.0281, "step": 339 }, { "beta_dpo/beta": 0.10330237448215485, "beta_dpo/beta_margin_grad_mean": -0.44081035256385803, "beta_dpo/beta_margin_grad_std": 0.23204687237739563, "beta_dpo/beta_margin_mean": 0.5521944761276245, "beta_dpo/beta_margin_std": 2.094428062438965, "beta_dpo/beta_used": 0.10330237448215485, "beta_dpo/beta_used_raw": 0.06831492483615875, "beta_dpo/gap_mean": 6.3657684326171875, "beta_dpo/gap_std": 13.076078414916992, "beta_dpo/loss_margin_mean": 6.563114643096924, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5139833711262283, "grad_norm": 34.1888427734375, "learning_rate": 2.8295924627584004e-07, "logits/chosen": 1.066762924194336, "logits/rejected": 1.062217116355896, "loss": 1.2085, "step": 340 }, { "beta_dpo/beta": 0.24964655935764313, "beta_dpo/beta_margin_grad_mean": -0.36444416642189026, "beta_dpo/beta_margin_grad_std": 0.28421640396118164, "beta_dpo/beta_margin_mean": 2.929835796356201, "beta_dpo/beta_margin_std": 5.628471374511719, "beta_dpo/beta_used": 0.24964655935764313, "beta_dpo/beta_used_raw": 0.23628321290016174, "beta_dpo/gap_mean": 6.765471935272217, "beta_dpo/gap_std": 13.197046279907227, "beta_dpo/loss_margin_mean": 7.296074867248535, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5154950869236583, "grad_norm": 63.85667419433594, "learning_rate": 2.816481133934373e-07, "logits/chosen": 1.4289345741271973, "logits/rejected": 1.3677668571472168, "loss": 0.9964, "step": 341 }, { "beta_dpo/beta": 0.13882164657115936, "beta_dpo/beta_margin_grad_mean": -0.40003663301467896, "beta_dpo/beta_margin_grad_std": 0.26059815287590027, "beta_dpo/beta_margin_mean": 1.08782160282135, "beta_dpo/beta_margin_std": 2.7480297088623047, "beta_dpo/beta_used": 0.13882164657115936, "beta_dpo/beta_used_raw": 0.13882164657115936, "beta_dpo/gap_mean": 6.689360618591309, "beta_dpo/gap_std": 13.186819076538086, "beta_dpo/loss_margin_mean": 7.2520527839660645, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5170068027210885, "grad_norm": 36.852256774902344, "learning_rate": 2.8033609524527046e-07, "logits/chosen": 1.1861512660980225, "logits/rejected": 1.1343166828155518, "loss": 1.203, "step": 342 }, { "beta_dpo/beta": 0.09241315722465515, "beta_dpo/beta_margin_grad_mean": -0.41862672567367554, "beta_dpo/beta_margin_grad_std": 0.23799264430999756, "beta_dpo/beta_margin_mean": 0.6367702484130859, "beta_dpo/beta_margin_std": 1.8354604244232178, "beta_dpo/beta_used": 0.09241315722465515, "beta_dpo/beta_used_raw": -0.0970378890633583, "beta_dpo/gap_mean": 6.421136856079102, "beta_dpo/gap_std": 13.049176216125488, "beta_dpo/loss_margin_mean": 3.949489116668701, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5185185185185185, "grad_norm": 26.807126998901367, "learning_rate": 2.7902322853130753e-07, "logits/chosen": 0.9365084767341614, "logits/rejected": 0.9163832068443298, "loss": 1.1698, "step": 343 }, { "beta_dpo/beta": 0.07383356988430023, "beta_dpo/beta_margin_grad_mean": -0.38845568895339966, "beta_dpo/beta_margin_grad_std": 0.19830799102783203, "beta_dpo/beta_margin_mean": 0.6601214408874512, "beta_dpo/beta_margin_std": 1.2798510789871216, "beta_dpo/beta_used": 0.07383356988430023, "beta_dpo/beta_used_raw": 0.07383356988430023, "beta_dpo/gap_mean": 6.460176944732666, "beta_dpo/gap_std": 12.917095184326172, "beta_dpo/loss_margin_mean": 7.995361328125, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5200302343159486, "grad_norm": 20.402359008789062, "learning_rate": 2.7770954997525274e-07, "logits/chosen": 1.2269436120986938, "logits/rejected": 1.1481449604034424, "loss": 1.1563, "step": 344 }, { "beta_dpo/beta": 0.191828653216362, "beta_dpo/beta_margin_grad_mean": -0.35022681951522827, "beta_dpo/beta_margin_grad_std": 0.30977630615234375, "beta_dpo/beta_margin_mean": 1.2948695421218872, "beta_dpo/beta_margin_std": 2.8370375633239746, "beta_dpo/beta_used": 0.191828653216362, "beta_dpo/beta_used_raw": 0.191828653216362, "beta_dpo/gap_mean": 6.522076606750488, "beta_dpo/gap_std": 13.085643768310547, "beta_dpo/loss_margin_mean": 6.543193340301514, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5215419501133787, "grad_norm": 43.98537063598633, "learning_rate": 2.7639509632351927e-07, "logits/chosen": 1.2063586711883545, "logits/rejected": 1.1623212099075317, "loss": 0.9284, "step": 345 }, { "beta_dpo/beta": 0.08791759610176086, "beta_dpo/beta_margin_grad_mean": -0.40686461329460144, "beta_dpo/beta_margin_grad_std": 0.24795067310333252, "beta_dpo/beta_margin_mean": 0.8130276203155518, "beta_dpo/beta_margin_std": 2.0670032501220703, "beta_dpo/beta_used": 0.08791759610176086, "beta_dpo/beta_used_raw": 0.08791759610176086, "beta_dpo/gap_mean": 6.574892997741699, "beta_dpo/gap_std": 13.083642959594727, "beta_dpo/loss_margin_mean": 7.657172679901123, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5230536659108088, "grad_norm": 30.677244186401367, "learning_rate": 2.7507990434420123e-07, "logits/chosen": 1.1106505393981934, "logits/rejected": 1.0611116886138916, "loss": 1.2396, "step": 346 }, { "beta_dpo/beta": 0.016010552644729614, "beta_dpo/beta_margin_grad_mean": -0.4776521325111389, "beta_dpo/beta_margin_grad_std": 0.08023162931203842, "beta_dpo/beta_margin_mean": 0.09511476755142212, "beta_dpo/beta_margin_std": 0.3387611508369446, "beta_dpo/beta_used": 0.016010552644729614, "beta_dpo/beta_used_raw": 0.012834769673645496, "beta_dpo/gap_mean": 6.491988658905029, "beta_dpo/gap_std": 13.22860336303711, "beta_dpo/loss_margin_mean": 5.3407182693481445, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5245653817082389, "grad_norm": 5.674924373626709, "learning_rate": 2.737640108260456e-07, "logits/chosen": 1.2309460639953613, "logits/rejected": 1.1862425804138184, "loss": 1.326, "step": 347 }, { "beta_dpo/beta": 0.06713169813156128, "beta_dpo/beta_margin_grad_mean": -0.4124799072742462, "beta_dpo/beta_margin_grad_std": 0.17748472094535828, "beta_dpo/beta_margin_mean": 0.45152342319488525, "beta_dpo/beta_margin_std": 0.93133944272995, "beta_dpo/beta_used": 0.06713169813156128, "beta_dpo/beta_used_raw": 0.06713169813156128, "beta_dpo/gap_mean": 6.600510597229004, "beta_dpo/gap_std": 13.38565444946289, "beta_dpo/loss_margin_mean": 7.088160037994385, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5260770975056689, "grad_norm": 17.118511199951172, "learning_rate": 2.724474525774229e-07, "logits/chosen": 1.4532029628753662, "logits/rejected": 1.3983886241912842, "loss": 1.1278, "step": 348 }, { "beta_dpo/beta": 0.12098747491836548, "beta_dpo/beta_margin_grad_mean": -0.3674723207950592, "beta_dpo/beta_margin_grad_std": 0.2333253175020218, "beta_dpo/beta_margin_mean": 1.1785528659820557, "beta_dpo/beta_margin_std": 2.257408618927002, "beta_dpo/beta_used": 0.12098747491836548, "beta_dpo/beta_used_raw": 0.12098747491836548, "beta_dpo/gap_mean": 6.678452491760254, "beta_dpo/gap_std": 13.459003448486328, "beta_dpo/loss_margin_mean": 8.029808044433594, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.527588813303099, "grad_norm": 36.4443359375, "learning_rate": 2.711302664252973e-07, "logits/chosen": 1.0890693664550781, "logits/rejected": 1.0426480770111084, "loss": 1.137, "step": 349 }, { "beta_dpo/beta": 0.16540199518203735, "beta_dpo/beta_margin_grad_mean": -0.2708449363708496, "beta_dpo/beta_margin_grad_std": 0.2463354766368866, "beta_dpo/beta_margin_mean": 1.6865824460983276, "beta_dpo/beta_margin_std": 1.9559446573257446, "beta_dpo/beta_used": 0.16540199518203735, "beta_dpo/beta_used_raw": 0.16540199518203735, "beta_dpo/gap_mean": 7.410719871520996, "beta_dpo/gap_std": 13.243711471557617, "beta_dpo/loss_margin_mean": 10.080084800720215, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5291005291005291, "grad_norm": 37.67925262451172, "learning_rate": 2.698124892141971e-07, "logits/chosen": 1.0085124969482422, "logits/rejected": 0.9736948013305664, "loss": 0.8858, "step": 350 }, { "beta_dpo/beta": 0.13289794325828552, "beta_dpo/beta_margin_grad_mean": -0.35490015149116516, "beta_dpo/beta_margin_grad_std": 0.27515533566474915, "beta_dpo/beta_margin_mean": 0.9353875517845154, "beta_dpo/beta_margin_std": 1.814262866973877, "beta_dpo/beta_used": 0.13289794325828552, "beta_dpo/beta_used_raw": 0.13289794325828552, "beta_dpo/gap_mean": 7.435298919677734, "beta_dpo/gap_std": 13.097363471984863, "beta_dpo/loss_margin_mean": 6.503824234008789, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5306122448979592, "grad_norm": 42.758670806884766, "learning_rate": 2.6849415780518357e-07, "logits/chosen": 1.409663438796997, "logits/rejected": 1.3147722482681274, "loss": 0.9475, "step": 351 }, { "beta_dpo/beta": 0.15361975133419037, "beta_dpo/beta_margin_grad_mean": -0.3725324273109436, "beta_dpo/beta_margin_grad_std": 0.2771722674369812, "beta_dpo/beta_margin_mean": 1.3406981229782104, "beta_dpo/beta_margin_std": 3.0171306133270264, "beta_dpo/beta_used": 0.15361975133419037, "beta_dpo/beta_used_raw": 0.15361975133419037, "beta_dpo/gap_mean": 7.367338180541992, "beta_dpo/gap_std": 13.319618225097656, "beta_dpo/loss_margin_mean": 7.5593743324279785, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5321239606953893, "grad_norm": 43.37005615234375, "learning_rate": 2.6717530907482024e-07, "logits/chosen": 1.3086192607879639, "logits/rejected": 1.2862586975097656, "loss": 1.0957, "step": 352 }, { "beta_dpo/beta": 0.2464243620634079, "beta_dpo/beta_margin_grad_mean": -0.25405097007751465, "beta_dpo/beta_margin_grad_std": 0.3099513649940491, "beta_dpo/beta_margin_mean": 2.007615566253662, "beta_dpo/beta_margin_std": 3.4321491718292236, "beta_dpo/beta_used": 0.2464243620634079, "beta_dpo/beta_used_raw": 0.2464243620634079, "beta_dpo/gap_mean": 7.241891860961914, "beta_dpo/gap_std": 13.446264266967773, "beta_dpo/loss_margin_mean": 7.859658241271973, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5336356764928194, "grad_norm": 41.63973617553711, "learning_rate": 2.658559799141411e-07, "logits/chosen": 1.1554973125457764, "logits/rejected": 1.1289267539978027, "loss": 0.6731, "step": 353 }, { "beta_dpo/beta": 0.1691766232252121, "beta_dpo/beta_margin_grad_mean": -0.31530454754829407, "beta_dpo/beta_margin_grad_std": 0.3026013970375061, "beta_dpo/beta_margin_mean": 1.361480712890625, "beta_dpo/beta_margin_std": 2.515624523162842, "beta_dpo/beta_used": 0.1691766232252121, "beta_dpo/beta_used_raw": 0.1691766232252121, "beta_dpo/gap_mean": 7.580024242401123, "beta_dpo/gap_std": 13.480710983276367, "beta_dpo/loss_margin_mean": 8.209134101867676, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5351473922902494, "grad_norm": 39.822025299072266, "learning_rate": 2.6453620722761895e-07, "logits/chosen": 1.223923683166504, "logits/rejected": 1.1551811695098877, "loss": 0.8025, "step": 354 }, { "beta_dpo/beta": 0.17666350305080414, "beta_dpo/beta_margin_grad_mean": -0.29615840315818787, "beta_dpo/beta_margin_grad_std": 0.2723563313484192, "beta_dpo/beta_margin_mean": 1.595294713973999, "beta_dpo/beta_margin_std": 2.3159079551696777, "beta_dpo/beta_used": 0.17666350305080414, "beta_dpo/beta_used_raw": 0.17666350305080414, "beta_dpo/gap_mean": 7.708715438842773, "beta_dpo/gap_std": 13.585740089416504, "beta_dpo/loss_margin_mean": 8.833210945129395, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5366591080876795, "grad_norm": 33.9071159362793, "learning_rate": 2.632160279321328e-07, "logits/chosen": 1.3096725940704346, "logits/rejected": 1.1149065494537354, "loss": 0.8918, "step": 355 }, { "beta_dpo/beta": 0.2004559338092804, "beta_dpo/beta_margin_grad_mean": -0.28709742426872253, "beta_dpo/beta_margin_grad_std": 0.3166719377040863, "beta_dpo/beta_margin_mean": 1.9062695503234863, "beta_dpo/beta_margin_std": 3.0024073123931885, "beta_dpo/beta_used": 0.2004559338092804, "beta_dpo/beta_used_raw": 0.2004559338092804, "beta_dpo/gap_mean": 8.091878890991211, "beta_dpo/gap_std": 13.65368938446045, "beta_dpo/loss_margin_mean": 9.26539134979248, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5381708238851096, "grad_norm": 40.02458953857422, "learning_rate": 2.618954789559356e-07, "logits/chosen": 1.2811923027038574, "logits/rejected": 1.2076209783554077, "loss": 0.8828, "step": 356 }, { "beta_dpo/beta": 0.018133217468857765, "beta_dpo/beta_margin_grad_mean": -0.45670992136001587, "beta_dpo/beta_margin_grad_std": 0.08694471418857574, "beta_dpo/beta_margin_mean": 0.18453972041606903, "beta_dpo/beta_margin_std": 0.37306493520736694, "beta_dpo/beta_used": 0.018133217468857765, "beta_dpo/beta_used_raw": 0.003822476603090763, "beta_dpo/gap_mean": 8.109353065490723, "beta_dpo/gap_std": 13.743289947509766, "beta_dpo/loss_margin_mean": 7.264776229858398, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5396825396825397, "grad_norm": 6.065606117248535, "learning_rate": 2.6057459723762076e-07, "logits/chosen": 1.1004765033721924, "logits/rejected": 1.0495339632034302, "loss": 1.2814, "step": 357 }, { "beta_dpo/beta": 0.051143769174814224, "beta_dpo/beta_margin_grad_mean": -0.40527665615081787, "beta_dpo/beta_margin_grad_std": 0.14958453178405762, "beta_dpo/beta_margin_mean": 0.4351819157600403, "beta_dpo/beta_margin_std": 0.7011198997497559, "beta_dpo/beta_used": 0.051143769174814224, "beta_dpo/beta_used_raw": 0.051143769174814224, "beta_dpo/gap_mean": 7.787057876586914, "beta_dpo/gap_std": 13.672910690307617, "beta_dpo/loss_margin_mean": 7.942201137542725, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5411942554799698, "grad_norm": 16.145584106445312, "learning_rate": 2.5925341972508954e-07, "logits/chosen": 1.260805606842041, "logits/rejected": 1.259293556213379, "loss": 1.1264, "step": 358 }, { "beta_dpo/beta": 0.06088128313422203, "beta_dpo/beta_margin_grad_mean": -0.4172905385494232, "beta_dpo/beta_margin_grad_std": 0.21149376034736633, "beta_dpo/beta_margin_mean": 0.5262914896011353, "beta_dpo/beta_margin_std": 1.3644154071807861, "beta_dpo/beta_used": 0.06088128313422203, "beta_dpo/beta_used_raw": 0.02194279432296753, "beta_dpo/gap_mean": 7.3958940505981445, "beta_dpo/gap_std": 13.696711540222168, "beta_dpo/loss_margin_mean": 5.2692742347717285, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5427059712773998, "grad_norm": 19.803897857666016, "learning_rate": 2.579319833745169e-07, "logits/chosen": 1.1320165395736694, "logits/rejected": 1.1335536241531372, "loss": 1.1932, "step": 359 }, { "beta_dpo/beta": 0.09361709654331207, "beta_dpo/beta_margin_grad_mean": -0.4224591851234436, "beta_dpo/beta_margin_grad_std": 0.2343314290046692, "beta_dpo/beta_margin_mean": 0.7064631581306458, "beta_dpo/beta_margin_std": 1.859923243522644, "beta_dpo/beta_used": 0.09361709654331207, "beta_dpo/beta_used_raw": 0.0917295590043068, "beta_dpo/gap_mean": 7.435537338256836, "beta_dpo/gap_std": 13.729305267333984, "beta_dpo/loss_margin_mean": 7.1353888511657715, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.54421768707483, "grad_norm": 30.30096435546875, "learning_rate": 2.5661032514931834e-07, "logits/chosen": 1.0677845478057861, "logits/rejected": 0.9679094552993774, "loss": 1.1544, "step": 360 }, { "beta_dpo/beta": 0.08572488278150558, "beta_dpo/beta_margin_grad_mean": -0.37226271629333496, "beta_dpo/beta_margin_grad_std": 0.20196816325187683, "beta_dpo/beta_margin_mean": 0.7135973572731018, "beta_dpo/beta_margin_std": 1.2914289236068726, "beta_dpo/beta_used": 0.08572488278150558, "beta_dpo/beta_used_raw": 0.08572488278150558, "beta_dpo/gap_mean": 7.480558395385742, "beta_dpo/gap_std": 13.642951965332031, "beta_dpo/loss_margin_mean": 8.10063362121582, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.54572940287226, "grad_norm": 23.389602661132812, "learning_rate": 2.552884820191154e-07, "logits/chosen": 1.0572106838226318, "logits/rejected": 0.9873596429824829, "loss": 1.0122, "step": 361 }, { "beta_dpo/beta": 0.049856528639793396, "beta_dpo/beta_margin_grad_mean": -0.41956833004951477, "beta_dpo/beta_margin_grad_std": 0.18193066120147705, "beta_dpo/beta_margin_mean": 0.4650557339191437, "beta_dpo/beta_margin_std": 1.1645866632461548, "beta_dpo/beta_used": 0.049856528639793396, "beta_dpo/beta_used_raw": 0.01058843731880188, "beta_dpo/gap_mean": 7.225416660308838, "beta_dpo/gap_std": 13.535463333129883, "beta_dpo/loss_margin_mean": 6.341689586639404, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.54724111866969, "grad_norm": 16.414335250854492, "learning_rate": 2.53966490958702e-07, "logits/chosen": 1.2252509593963623, "logits/rejected": 1.0469253063201904, "loss": 1.1834, "step": 362 }, { "beta_dpo/beta": 0.1461520940065384, "beta_dpo/beta_margin_grad_mean": -0.3125424385070801, "beta_dpo/beta_margin_grad_std": 0.24862688779830933, "beta_dpo/beta_margin_mean": 1.4741637706756592, "beta_dpo/beta_margin_std": 2.2805867195129395, "beta_dpo/beta_used": 0.1461520940065384, "beta_dpo/beta_used_raw": 0.1461520940065384, "beta_dpo/gap_mean": 7.652776718139648, "beta_dpo/gap_std": 13.390401840209961, "beta_dpo/loss_margin_mean": 9.007533073425293, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5487528344671202, "grad_norm": 43.86406707763672, "learning_rate": 2.526443889470099e-07, "logits/chosen": 1.382947564125061, "logits/rejected": 1.213701844215393, "loss": 0.9243, "step": 363 }, { "beta_dpo/beta": 0.12054399400949478, "beta_dpo/beta_margin_grad_mean": -0.3811088800430298, "beta_dpo/beta_margin_grad_std": 0.24092726409435272, "beta_dpo/beta_margin_mean": 1.456974744796753, "beta_dpo/beta_margin_std": 2.946803331375122, "beta_dpo/beta_used": 0.12054399400949478, "beta_dpo/beta_used_raw": 0.10439993441104889, "beta_dpo/gap_mean": 8.08302116394043, "beta_dpo/gap_std": 13.675590515136719, "beta_dpo/loss_margin_mean": 10.058621406555176, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5502645502645502, "grad_norm": 42.604251861572266, "learning_rate": 2.513222129660744e-07, "logits/chosen": 1.12633216381073, "logits/rejected": 1.059206247329712, "loss": 1.133, "step": 364 }, { "beta_dpo/beta": 0.06535232812166214, "beta_dpo/beta_margin_grad_mean": -0.3880292773246765, "beta_dpo/beta_margin_grad_std": 0.15547937154769897, "beta_dpo/beta_margin_mean": 0.5160609483718872, "beta_dpo/beta_margin_std": 0.7484007477760315, "beta_dpo/beta_used": 0.06535232812166214, "beta_dpo/beta_used_raw": 0.06535232812166214, "beta_dpo/gap_mean": 8.05885124206543, "beta_dpo/gap_std": 13.430015563964844, "beta_dpo/loss_margin_mean": 7.844278335571289, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5517762660619804, "grad_norm": 15.027542114257812, "learning_rate": 2.5e-07, "logits/chosen": 1.1900737285614014, "logits/rejected": 1.177154302597046, "loss": 1.0322, "step": 365 }, { "beta_dpo/beta": 0.030097341164946556, "beta_dpo/beta_margin_grad_mean": -0.45781904458999634, "beta_dpo/beta_margin_grad_std": 0.15457406640052795, "beta_dpo/beta_margin_mean": 0.20955324172973633, "beta_dpo/beta_margin_std": 0.7605064511299133, "beta_dpo/beta_used": 0.030097341164946556, "beta_dpo/beta_used_raw": -0.00335543230175972, "beta_dpo/gap_mean": 7.919361591339111, "beta_dpo/gap_std": 13.844223022460938, "beta_dpo/loss_margin_mean": 7.396289348602295, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5532879818594104, "grad_norm": 10.480829238891602, "learning_rate": 2.486777870339255e-07, "logits/chosen": 1.0998948812484741, "logits/rejected": 1.0781323909759521, "loss": 1.2607, "step": 366 }, { "beta_dpo/beta": 0.07937921583652496, "beta_dpo/beta_margin_grad_mean": -0.41697001457214355, "beta_dpo/beta_margin_grad_std": 0.2050413191318512, "beta_dpo/beta_margin_mean": 0.40257585048675537, "beta_dpo/beta_margin_std": 1.4262551069259644, "beta_dpo/beta_used": 0.07937921583652496, "beta_dpo/beta_used_raw": -0.006307244300842285, "beta_dpo/gap_mean": 7.531491279602051, "beta_dpo/gap_std": 13.592772483825684, "beta_dpo/loss_margin_mean": 5.220627784729004, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5547996976568406, "grad_norm": 21.611459732055664, "learning_rate": 2.4735561105299014e-07, "logits/chosen": 0.9986228942871094, "logits/rejected": 0.9025825262069702, "loss": 1.0436, "step": 367 }, { "beta_dpo/beta": 0.09011980891227722, "beta_dpo/beta_margin_grad_mean": -0.408080130815506, "beta_dpo/beta_margin_grad_std": 0.23834280669689178, "beta_dpo/beta_margin_mean": 0.6598003506660461, "beta_dpo/beta_margin_std": 1.8577990531921387, "beta_dpo/beta_used": 0.09011980891227722, "beta_dpo/beta_used_raw": 0.06833744049072266, "beta_dpo/gap_mean": 7.3745951652526855, "beta_dpo/gap_std": 13.662035942077637, "beta_dpo/loss_margin_mean": 7.16412353515625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5563114134542706, "grad_norm": 28.219196319580078, "learning_rate": 2.46033509041298e-07, "logits/chosen": 0.8210684061050415, "logits/rejected": 0.817505955696106, "loss": 1.0656, "step": 368 }, { "beta_dpo/beta": 0.010743452236056328, "beta_dpo/beta_margin_grad_mean": -0.4860118627548218, "beta_dpo/beta_margin_grad_std": 0.04464971646666527, "beta_dpo/beta_margin_mean": 0.05712589621543884, "beta_dpo/beta_margin_std": 0.18325534462928772, "beta_dpo/beta_used": 0.010743452236056328, "beta_dpo/beta_used_raw": -0.0012179017066955566, "beta_dpo/gap_mean": 6.9191412925720215, "beta_dpo/gap_std": 13.665838241577148, "beta_dpo/loss_margin_mean": 4.59527063369751, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5578231292517006, "grad_norm": 4.703724384307861, "learning_rate": 2.447115179808846e-07, "logits/chosen": 0.9818326234817505, "logits/rejected": 0.9543051719665527, "loss": 1.3324, "step": 369 }, { "beta_dpo/beta": 0.26102516055107117, "beta_dpo/beta_margin_grad_mean": -0.2952536344528198, "beta_dpo/beta_margin_grad_std": 0.3286318778991699, "beta_dpo/beta_margin_mean": 2.9795680046081543, "beta_dpo/beta_margin_std": 5.268163204193115, "beta_dpo/beta_used": 0.26102516055107117, "beta_dpo/beta_used_raw": 0.26102516055107117, "beta_dpo/gap_mean": 7.429380416870117, "beta_dpo/gap_std": 13.954389572143555, "beta_dpo/loss_margin_mean": 10.275045394897461, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5593348450491308, "grad_norm": 74.33258056640625, "learning_rate": 2.4338967485068164e-07, "logits/chosen": 1.5886646509170532, "logits/rejected": 1.5320854187011719, "loss": 0.8804, "step": 370 }, { "beta_dpo/beta": 0.09414184093475342, "beta_dpo/beta_margin_grad_mean": -0.38183629512786865, "beta_dpo/beta_margin_grad_std": 0.2523384392261505, "beta_dpo/beta_margin_mean": 0.665998637676239, "beta_dpo/beta_margin_std": 1.4542341232299805, "beta_dpo/beta_used": 0.09414184093475342, "beta_dpo/beta_used_raw": 0.09414184093475342, "beta_dpo/gap_mean": 7.24754524230957, "beta_dpo/gap_std": 14.236856460571289, "beta_dpo/loss_margin_mean": 6.794817924499512, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5608465608465608, "grad_norm": 21.97515106201172, "learning_rate": 2.420680166254831e-07, "logits/chosen": 1.5336153507232666, "logits/rejected": 1.5053898096084595, "loss": 1.0365, "step": 371 }, { "beta_dpo/beta": 0.10913428664207458, "beta_dpo/beta_margin_grad_mean": -0.37646549940109253, "beta_dpo/beta_margin_grad_std": 0.246499165892601, "beta_dpo/beta_margin_mean": 1.1655247211456299, "beta_dpo/beta_margin_std": 2.3541650772094727, "beta_dpo/beta_used": 0.10913428664207458, "beta_dpo/beta_used_raw": 0.10913428664207458, "beta_dpo/gap_mean": 7.113511085510254, "beta_dpo/gap_std": 14.328683853149414, "beta_dpo/loss_margin_mean": 6.992242813110352, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.562358276643991, "grad_norm": 46.117122650146484, "learning_rate": 2.4074658027491044e-07, "logits/chosen": 1.2923402786254883, "logits/rejected": 1.1961686611175537, "loss": 1.2216, "step": 372 }, { "beta_dpo/beta": 0.10140194743871689, "beta_dpo/beta_margin_grad_mean": -0.39248228073120117, "beta_dpo/beta_margin_grad_std": 0.24916405975818634, "beta_dpo/beta_margin_mean": 0.6541171669960022, "beta_dpo/beta_margin_std": 1.4341338872909546, "beta_dpo/beta_used": 0.10140194743871689, "beta_dpo/beta_used_raw": 0.10140194743871689, "beta_dpo/gap_mean": 7.229263782501221, "beta_dpo/gap_std": 14.314950942993164, "beta_dpo/loss_margin_mean": 6.411010265350342, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.563869992441421, "grad_norm": 28.40692901611328, "learning_rate": 2.394254027623792e-07, "logits/chosen": 1.1281664371490479, "logits/rejected": 1.0395562648773193, "loss": 1.0579, "step": 373 }, { "beta_dpo/beta": 0.24720382690429688, "beta_dpo/beta_margin_grad_mean": -0.2915995121002197, "beta_dpo/beta_margin_grad_std": 0.3164134621620178, "beta_dpo/beta_margin_mean": 2.604962110519409, "beta_dpo/beta_margin_std": 4.2764787673950195, "beta_dpo/beta_used": 0.24720382690429688, "beta_dpo/beta_used_raw": 0.24720382690429688, "beta_dpo/gap_mean": 7.416611194610596, "beta_dpo/gap_std": 14.550976753234863, "beta_dpo/loss_margin_mean": 9.614022254943848, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5653817082388511, "grad_norm": 72.6122817993164, "learning_rate": 2.381045210440644e-07, "logits/chosen": 1.1132843494415283, "logits/rejected": 1.0851848125457764, "loss": 0.8943, "step": 374 }, { "beta_dpo/beta": 0.20675544440746307, "beta_dpo/beta_margin_grad_mean": -0.33286479115486145, "beta_dpo/beta_margin_grad_std": 0.3486374318599701, "beta_dpo/beta_margin_mean": 1.4182250499725342, "beta_dpo/beta_margin_std": 3.0830986499786377, "beta_dpo/beta_used": 0.20675544440746307, "beta_dpo/beta_used_raw": 0.20675544440746307, "beta_dpo/gap_mean": 7.565443515777588, "beta_dpo/gap_std": 14.595691680908203, "beta_dpo/loss_margin_mean": 6.784230709075928, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5668934240362812, "grad_norm": 52.81209182739258, "learning_rate": 2.3678397206786715e-07, "logits/chosen": 1.3927481174468994, "logits/rejected": 1.3600114583969116, "loss": 0.8848, "step": 375 }, { "beta_dpo/beta": 0.04299633204936981, "beta_dpo/beta_margin_grad_mean": -0.42016491293907166, "beta_dpo/beta_margin_grad_std": 0.15737684071063995, "beta_dpo/beta_margin_mean": 0.37388625741004944, "beta_dpo/beta_margin_std": 0.7507635354995728, "beta_dpo/beta_used": 0.04299633204936981, "beta_dpo/beta_used_raw": 0.04299633204936981, "beta_dpo/gap_mean": 7.693660259246826, "beta_dpo/gap_std": 14.870655059814453, "beta_dpo/loss_margin_mean": 8.864215850830078, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5684051398337112, "grad_norm": 13.12270736694336, "learning_rate": 2.3546379277238103e-07, "logits/chosen": 1.1703935861587524, "logits/rejected": 1.066407322883606, "loss": 1.2036, "step": 376 }, { "beta_dpo/beta": 0.11132267117500305, "beta_dpo/beta_margin_grad_mean": -0.3877478241920471, "beta_dpo/beta_margin_grad_std": 0.22526662051677704, "beta_dpo/beta_margin_mean": 0.8313254117965698, "beta_dpo/beta_margin_std": 1.8189681768417358, "beta_dpo/beta_used": 0.11132267117500305, "beta_dpo/beta_used_raw": 0.11132267117500305, "beta_dpo/gap_mean": 7.574334144592285, "beta_dpo/gap_std": 14.641395568847656, "beta_dpo/loss_margin_mean": 6.86510705947876, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5699168556311414, "grad_norm": 28.927581787109375, "learning_rate": 2.3414402008585886e-07, "logits/chosen": 1.182497501373291, "logits/rejected": 1.1518537998199463, "loss": 1.0515, "step": 377 }, { "beta_dpo/beta": 0.07517936825752258, "beta_dpo/beta_margin_grad_mean": -0.41363537311553955, "beta_dpo/beta_margin_grad_std": 0.22291339933872223, "beta_dpo/beta_margin_mean": 0.6230934858322144, "beta_dpo/beta_margin_std": 1.8685015439987183, "beta_dpo/beta_used": 0.07517936825752258, "beta_dpo/beta_used_raw": 0.07517936825752258, "beta_dpo/gap_mean": 7.3569440841674805, "beta_dpo/gap_std": 14.392763137817383, "beta_dpo/loss_margin_mean": 6.59552526473999, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5714285714285714, "grad_norm": 24.532310485839844, "learning_rate": 2.3282469092517977e-07, "logits/chosen": 1.4094161987304688, "logits/rejected": 1.3490303754806519, "loss": 1.119, "step": 378 }, { "beta_dpo/beta": 0.22540748119354248, "beta_dpo/beta_margin_grad_mean": -0.3058602511882782, "beta_dpo/beta_margin_grad_std": 0.3339531719684601, "beta_dpo/beta_margin_mean": 1.8993895053863525, "beta_dpo/beta_margin_std": 3.3336374759674072, "beta_dpo/beta_used": 0.22540748119354248, "beta_dpo/beta_used_raw": 0.22540748119354248, "beta_dpo/gap_mean": 7.495121002197266, "beta_dpo/gap_std": 14.666516304016113, "beta_dpo/loss_margin_mean": 8.280893325805664, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5729402872260015, "grad_norm": 60.54188919067383, "learning_rate": 2.3150584219481643e-07, "logits/chosen": 1.4404394626617432, "logits/rejected": 1.3509386777877808, "loss": 1.0322, "step": 379 }, { "beta_dpo/beta": 0.24636900424957275, "beta_dpo/beta_margin_grad_mean": -0.278732031583786, "beta_dpo/beta_margin_grad_std": 0.3075398802757263, "beta_dpo/beta_margin_mean": 2.4282400608062744, "beta_dpo/beta_margin_std": 4.70290470123291, "beta_dpo/beta_used": 0.24636900424957275, "beta_dpo/beta_used_raw": 0.24636900424957275, "beta_dpo/gap_mean": 7.887577056884766, "beta_dpo/gap_std": 14.830470085144043, "beta_dpo/loss_margin_mean": 9.414112091064453, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5744520030234316, "grad_norm": 58.42963409423828, "learning_rate": 2.3018751078580283e-07, "logits/chosen": 1.442448377609253, "logits/rejected": 1.3922609090805054, "loss": 0.9649, "step": 380 }, { "beta_dpo/beta": 0.1303085833787918, "beta_dpo/beta_margin_grad_mean": -0.38812023401260376, "beta_dpo/beta_margin_grad_std": 0.25397399067878723, "beta_dpo/beta_margin_mean": 1.1557432413101196, "beta_dpo/beta_margin_std": 3.0310275554656982, "beta_dpo/beta_used": 0.1303085833787918, "beta_dpo/beta_used_raw": 0.061051756143569946, "beta_dpo/gap_mean": 7.441038131713867, "beta_dpo/gap_std": 14.655248641967773, "beta_dpo/loss_margin_mean": 5.61381721496582, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5759637188208617, "grad_norm": 33.84221267700195, "learning_rate": 2.288697335747027e-07, "logits/chosen": 1.2152440547943115, "logits/rejected": 1.1710684299468994, "loss": 1.1738, "step": 381 }, { "beta_dpo/beta": 0.08941483497619629, "beta_dpo/beta_margin_grad_mean": -0.4051651060581207, "beta_dpo/beta_margin_grad_std": 0.21740548312664032, "beta_dpo/beta_margin_mean": 0.7790390849113464, "beta_dpo/beta_margin_std": 1.8734796047210693, "beta_dpo/beta_used": 0.08941483497619629, "beta_dpo/beta_used_raw": 0.08169528841972351, "beta_dpo/gap_mean": 7.350861549377441, "beta_dpo/gap_std": 14.560256958007812, "beta_dpo/loss_margin_mean": 6.928147315979004, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5774754346182918, "grad_norm": 28.881624221801758, "learning_rate": 2.2755254742257706e-07, "logits/chosen": 1.2367597818374634, "logits/rejected": 1.1454555988311768, "loss": 1.1139, "step": 382 }, { "beta_dpo/beta": 0.09634870290756226, "beta_dpo/beta_margin_grad_mean": -0.39008820056915283, "beta_dpo/beta_margin_grad_std": 0.25229260325431824, "beta_dpo/beta_margin_mean": 0.6619503498077393, "beta_dpo/beta_margin_std": 1.489913821220398, "beta_dpo/beta_used": 0.09634870290756226, "beta_dpo/beta_used_raw": 0.09634870290756226, "beta_dpo/gap_mean": 7.342663764953613, "beta_dpo/gap_std": 14.746681213378906, "beta_dpo/loss_margin_mean": 6.870544910430908, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5789871504157218, "grad_norm": 25.512887954711914, "learning_rate": 2.2623598917395436e-07, "logits/chosen": 1.021744966506958, "logits/rejected": 1.087134599685669, "loss": 1.0863, "step": 383 }, { "beta_dpo/beta": 0.0626574382185936, "beta_dpo/beta_margin_grad_mean": -0.3971060514450073, "beta_dpo/beta_margin_grad_std": 0.1797652393579483, "beta_dpo/beta_margin_mean": 0.4878333806991577, "beta_dpo/beta_margin_std": 0.8790075778961182, "beta_dpo/beta_used": 0.0626574382185936, "beta_dpo/beta_used_raw": 0.0626574382185936, "beta_dpo/gap_mean": 7.4141693115234375, "beta_dpo/gap_std": 14.61959457397461, "beta_dpo/loss_margin_mean": 7.721750736236572, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5804988662131519, "grad_norm": 17.06849479675293, "learning_rate": 2.2492009565579875e-07, "logits/chosen": 1.3298002481460571, "logits/rejected": 1.2677011489868164, "loss": 1.1088, "step": 384 }, { "beta_dpo/beta": 0.004450375679880381, "beta_dpo/beta_margin_grad_mean": -0.489875853061676, "beta_dpo/beta_margin_grad_std": 0.020829180255532265, "beta_dpo/beta_margin_mean": 0.04063662141561508, "beta_dpo/beta_margin_std": 0.08365319669246674, "beta_dpo/beta_used": 0.004450375679880381, "beta_dpo/beta_used_raw": -0.018079757690429688, "beta_dpo/gap_mean": 7.440016269683838, "beta_dpo/gap_std": 14.209115028381348, "beta_dpo/loss_margin_mean": 8.146535873413086, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.582010582010582, "grad_norm": 1.7125223875045776, "learning_rate": 2.2360490367648084e-07, "logits/chosen": 1.1286897659301758, "logits/rejected": 1.0630574226379395, "loss": 1.3614, "step": 385 }, { "beta_dpo/beta": 0.04179831221699715, "beta_dpo/beta_margin_grad_mean": -0.4530133903026581, "beta_dpo/beta_margin_grad_std": 0.12521444261074066, "beta_dpo/beta_margin_mean": 0.19750449061393738, "beta_dpo/beta_margin_std": 0.5692038536071777, "beta_dpo/beta_used": 0.04179831221699715, "beta_dpo/beta_used_raw": 0.04179831221699715, "beta_dpo/gap_mean": 7.274169921875, "beta_dpo/gap_std": 14.249858856201172, "beta_dpo/loss_margin_mean": 5.243067741394043, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5835222978080121, "grad_norm": 11.668978691101074, "learning_rate": 2.2229045002474724e-07, "logits/chosen": 1.163759708404541, "logits/rejected": 1.086532473564148, "loss": 1.1707, "step": 386 }, { "beta_dpo/beta": 0.10992548614740372, "beta_dpo/beta_margin_grad_mean": -0.35167068243026733, "beta_dpo/beta_margin_grad_std": 0.24510644376277924, "beta_dpo/beta_margin_mean": 1.0464832782745361, "beta_dpo/beta_margin_std": 1.9593183994293213, "beta_dpo/beta_used": 0.10992548614740372, "beta_dpo/beta_used_raw": 0.10992548614740372, "beta_dpo/gap_mean": 7.317193984985352, "beta_dpo/gap_std": 14.160797119140625, "beta_dpo/loss_margin_mean": 8.948436737060547, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5850340136054422, "grad_norm": 24.931472778320312, "learning_rate": 2.209767714686924e-07, "logits/chosen": 1.3423492908477783, "logits/rejected": 1.2523207664489746, "loss": 1.0216, "step": 387 }, { "beta_dpo/beta": 0.08252957463264465, "beta_dpo/beta_margin_grad_mean": -0.4158702790737152, "beta_dpo/beta_margin_grad_std": 0.2159751057624817, "beta_dpo/beta_margin_mean": 0.6033073663711548, "beta_dpo/beta_margin_std": 1.501455307006836, "beta_dpo/beta_used": 0.08252957463264465, "beta_dpo/beta_used_raw": 0.0030330345034599304, "beta_dpo/gap_mean": 7.176567077636719, "beta_dpo/gap_std": 13.799575805664062, "beta_dpo/loss_margin_mean": 4.720012187957764, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5865457294028723, "grad_norm": 31.726829528808594, "learning_rate": 2.1966390475472954e-07, "logits/chosen": 1.3642137050628662, "logits/rejected": 1.3656563758850098, "loss": 1.1469, "step": 388 }, { "beta_dpo/beta": 0.10920540988445282, "beta_dpo/beta_margin_grad_mean": -0.3846765160560608, "beta_dpo/beta_margin_grad_std": 0.2381884902715683, "beta_dpo/beta_margin_mean": 0.8563576936721802, "beta_dpo/beta_margin_std": 1.8833948373794556, "beta_dpo/beta_used": 0.10920540988445282, "beta_dpo/beta_used_raw": 0.10920540988445282, "beta_dpo/gap_mean": 6.994778633117676, "beta_dpo/gap_std": 13.427654266357422, "beta_dpo/loss_margin_mean": 7.5880303382873535, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5880574452003023, "grad_norm": 35.045440673828125, "learning_rate": 2.1835188660656265e-07, "logits/chosen": 1.0244269371032715, "logits/rejected": 1.0011541843414307, "loss": 1.1488, "step": 389 }, { "beta_dpo/beta": 0.033905960619449615, "beta_dpo/beta_margin_grad_mean": -0.43266651034355164, "beta_dpo/beta_margin_grad_std": 0.10585252940654755, "beta_dpo/beta_margin_mean": 0.28706708550453186, "beta_dpo/beta_margin_std": 0.4559902548789978, "beta_dpo/beta_used": 0.033905960619449615, "beta_dpo/beta_used_raw": 0.033905960619449615, "beta_dpo/gap_mean": 7.247902870178223, "beta_dpo/gap_std": 13.372995376586914, "beta_dpo/loss_margin_mean": 8.466453552246094, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5895691609977324, "grad_norm": 7.7616095542907715, "learning_rate": 2.170407537241599e-07, "logits/chosen": 1.4455749988555908, "logits/rejected": 1.369256615638733, "loss": 1.217, "step": 390 }, { "beta_dpo/beta": 0.22762346267700195, "beta_dpo/beta_margin_grad_mean": -0.2824380099773407, "beta_dpo/beta_margin_grad_std": 0.321077436208725, "beta_dpo/beta_margin_mean": 2.477963447570801, "beta_dpo/beta_margin_std": 3.847912073135376, "beta_dpo/beta_used": 0.22762346267700195, "beta_dpo/beta_used_raw": 0.22762346267700195, "beta_dpo/gap_mean": 7.8365912437438965, "beta_dpo/gap_std": 13.6998872756958, "beta_dpo/loss_margin_mean": 11.036505699157715, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5910808767951625, "grad_norm": 79.25921630859375, "learning_rate": 2.1573054278272636e-07, "logits/chosen": 0.9788742065429688, "logits/rejected": 0.9468849301338196, "loss": 0.943, "step": 391 }, { "beta_dpo/beta": 0.18245580792427063, "beta_dpo/beta_margin_grad_mean": -0.27842310070991516, "beta_dpo/beta_margin_grad_std": 0.2902601659297943, "beta_dpo/beta_margin_mean": 2.0409109592437744, "beta_dpo/beta_margin_std": 2.8519318103790283, "beta_dpo/beta_used": 0.18245580792427063, "beta_dpo/beta_used_raw": 0.18245580792427063, "beta_dpo/gap_mean": 8.379539489746094, "beta_dpo/gap_std": 13.949304580688477, "beta_dpo/loss_margin_mean": 10.906364440917969, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5925925925925926, "grad_norm": 38.86481475830078, "learning_rate": 2.1442129043167873e-07, "logits/chosen": 1.164830207824707, "logits/rejected": 1.159719705581665, "loss": 0.8137, "step": 392 }, { "beta_dpo/beta": 0.0202580988407135, "beta_dpo/beta_margin_grad_mean": -0.4727727770805359, "beta_dpo/beta_margin_grad_std": 0.07772287726402283, "beta_dpo/beta_margin_mean": 0.11153332889080048, "beta_dpo/beta_margin_std": 0.3320378363132477, "beta_dpo/beta_used": 0.0202580988407135, "beta_dpo/beta_used_raw": 0.016610777005553246, "beta_dpo/gap_mean": 8.489538192749023, "beta_dpo/gap_std": 13.79062271118164, "beta_dpo/loss_margin_mean": 7.332988262176514, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5941043083900227, "grad_norm": 7.228002548217773, "learning_rate": 2.131130332936195e-07, "logits/chosen": 0.9965734481811523, "logits/rejected": 0.9394361972808838, "loss": 1.2643, "step": 393 }, { "beta_dpo/beta": 0.10417785495519638, "beta_dpo/beta_margin_grad_mean": -0.3671894669532776, "beta_dpo/beta_margin_grad_std": 0.23936332762241364, "beta_dpo/beta_margin_mean": 0.9281326532363892, "beta_dpo/beta_margin_std": 1.7436189651489258, "beta_dpo/beta_used": 0.10417785495519638, "beta_dpo/beta_used_raw": 0.10417785495519638, "beta_dpo/gap_mean": 8.309196472167969, "beta_dpo/gap_std": 13.411027908325195, "beta_dpo/loss_margin_mean": 8.375308990478516, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5956160241874527, "grad_norm": 22.398984909057617, "learning_rate": 2.1180580796331323e-07, "logits/chosen": 1.3453733921051025, "logits/rejected": 1.2923827171325684, "loss": 0.9597, "step": 394 }, { "beta_dpo/beta": 0.08681511133909225, "beta_dpo/beta_margin_grad_mean": -0.40546634793281555, "beta_dpo/beta_margin_grad_std": 0.2304772436618805, "beta_dpo/beta_margin_mean": 0.7605048418045044, "beta_dpo/beta_margin_std": 1.90763258934021, "beta_dpo/beta_used": 0.08681511133909225, "beta_dpo/beta_used_raw": -0.011999711394309998, "beta_dpo/gap_mean": 7.920681476593018, "beta_dpo/gap_std": 13.151678085327148, "beta_dpo/loss_margin_mean": 6.229694843292236, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5971277399848829, "grad_norm": 18.830989837646484, "learning_rate": 2.104996510066625e-07, "logits/chosen": 1.1814665794372559, "logits/rejected": 1.0725936889648438, "loss": 1.054, "step": 395 }, { "beta_dpo/beta": 0.0445711687207222, "beta_dpo/beta_margin_grad_mean": -0.41445091366767883, "beta_dpo/beta_margin_grad_std": 0.12426353991031647, "beta_dpo/beta_margin_mean": 0.392553448677063, "beta_dpo/beta_margin_std": 0.6115043759346008, "beta_dpo/beta_used": 0.0445711687207222, "beta_dpo/beta_used_raw": 0.0445711687207222, "beta_dpo/gap_mean": 8.018841743469238, "beta_dpo/gap_std": 12.900693893432617, "beta_dpo/loss_margin_mean": 7.28317928314209, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.5986394557823129, "grad_norm": 17.15723419189453, "learning_rate": 2.0919459895968517e-07, "logits/chosen": 1.4864282608032227, "logits/rejected": 1.3753361701965332, "loss": 1.1277, "step": 396 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4987220764160156, "beta_dpo/beta_margin_grad_std": 0.0034480225294828415, "beta_dpo/beta_margin_mean": 0.0051118070259690285, "beta_dpo/beta_margin_std": 0.013792959041893482, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.0704718828201294, "beta_dpo/gap_mean": 7.361849784851074, "beta_dpo/gap_std": 12.713207244873047, "beta_dpo/loss_margin_mean": 5.111806869506836, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.600151171579743, "grad_norm": 0.3319074213504791, "learning_rate": 2.078906883274924e-07, "logits/chosen": 1.247459888458252, "logits/rejected": 1.2278985977172852, "loss": 1.3818, "step": 397 }, { "beta_dpo/beta": 0.12302777916193008, "beta_dpo/beta_margin_grad_mean": -0.38068652153015137, "beta_dpo/beta_margin_grad_std": 0.24687737226486206, "beta_dpo/beta_margin_mean": 1.1479579210281372, "beta_dpo/beta_margin_std": 2.429410219192505, "beta_dpo/beta_used": 0.12302777916193008, "beta_dpo/beta_used_raw": 0.12302777916193008, "beta_dpo/gap_mean": 7.570580005645752, "beta_dpo/gap_std": 12.983465194702148, "beta_dpo/loss_margin_mean": 9.056266784667969, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6016628873771731, "grad_norm": 28.814918518066406, "learning_rate": 2.065879555832674e-07, "logits/chosen": 1.1736929416656494, "logits/rejected": 1.0705779790878296, "loss": 1.0416, "step": 398 }, { "beta_dpo/beta": 0.1019376814365387, "beta_dpo/beta_margin_grad_mean": -0.3764630854129791, "beta_dpo/beta_margin_grad_std": 0.25056028366088867, "beta_dpo/beta_margin_mean": 1.1306095123291016, "beta_dpo/beta_margin_std": 2.293529748916626, "beta_dpo/beta_used": 0.1019376814365387, "beta_dpo/beta_used_raw": 0.022106006741523743, "beta_dpo/gap_mean": 7.877155303955078, "beta_dpo/gap_std": 12.939423561096191, "beta_dpo/loss_margin_mean": 8.506237983703613, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6031746031746031, "grad_norm": 34.429656982421875, "learning_rate": 2.052864371672457e-07, "logits/chosen": 1.2226908206939697, "logits/rejected": 1.0775423049926758, "loss": 1.1386, "step": 399 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49878740310668945, "beta_dpo/beta_margin_grad_std": 0.002995472401380539, "beta_dpo/beta_margin_mean": 0.004850634373724461, "beta_dpo/beta_margin_std": 0.011982507072389126, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.10486699640750885, "beta_dpo/gap_mean": 7.399901390075684, "beta_dpo/gap_std": 12.670374870300293, "beta_dpo/loss_margin_mean": 4.8506340980529785, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6046863189720333, "grad_norm": 0.3414646089076996, "learning_rate": 2.0398616948569493e-07, "logits/chosen": 1.4321517944335938, "logits/rejected": 1.3374395370483398, "loss": 1.3823, "step": 400 }, { "epoch": 0.6046863189720333, "eval_beta_dpo/beta": 0.10191422700881958, "eval_beta_dpo/beta_margin_grad_mean": -0.38811933994293213, "eval_beta_dpo/beta_margin_grad_std": 0.17382743954658508, "eval_beta_dpo/beta_margin_mean": 0.8735393285751343, "eval_beta_dpo/beta_margin_std": 1.3551836013793945, "eval_beta_dpo/beta_used": 0.10191422700881958, "eval_beta_dpo/beta_used_raw": 0.08637029677629471, "eval_beta_dpo/gap_mean": 7.206363201141357, "eval_beta_dpo/gap_std": 12.652938842773438, "eval_beta_dpo/loss_margin_mean": 6.979201793670654, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": 1.2964799404144287, "eval_logits/rejected": 1.2259721755981445, "eval_loss": 0.5950996279716492, "eval_runtime": 43.5338, "eval_samples_per_second": 52.901, "eval_steps_per_second": 1.654, "step": 400 }, { "beta_dpo/beta": 0.22263628244400024, "beta_dpo/beta_margin_grad_mean": -0.23202191293239594, "beta_dpo/beta_margin_grad_std": 0.28018221259117126, "beta_dpo/beta_margin_mean": 2.3617444038391113, "beta_dpo/beta_margin_std": 2.6966373920440674, "beta_dpo/beta_used": 0.22263628244400024, "beta_dpo/beta_used_raw": 0.22263628244400024, "beta_dpo/gap_mean": 7.699178695678711, "beta_dpo/gap_std": 12.536005973815918, "beta_dpo/loss_margin_mean": 10.631646156311035, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6061980347694633, "grad_norm": 38.35382080078125, "learning_rate": 2.0268718890989752e-07, "logits/chosen": 1.3760192394256592, "logits/rejected": 1.285376787185669, "loss": 0.7726, "step": 401 }, { "beta_dpo/beta": 0.10150312632322311, "beta_dpo/beta_margin_grad_mean": -0.3532201647758484, "beta_dpo/beta_margin_grad_std": 0.22857801616191864, "beta_dpo/beta_margin_mean": 0.8699325323104858, "beta_dpo/beta_margin_std": 1.4455386400222778, "beta_dpo/beta_used": 0.10150312632322311, "beta_dpo/beta_used_raw": 0.10150312632322311, "beta_dpo/gap_mean": 7.951926231384277, "beta_dpo/gap_std": 12.835227012634277, "beta_dpo/loss_margin_mean": 8.36948013305664, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6077097505668935, "grad_norm": 26.841323852539062, "learning_rate": 2.013895317751323e-07, "logits/chosen": 1.2534160614013672, "logits/rejected": 1.2528060674667358, "loss": 0.9371, "step": 402 }, { "beta_dpo/beta": 0.07242181897163391, "beta_dpo/beta_margin_grad_mean": -0.3970552384853363, "beta_dpo/beta_margin_grad_std": 0.21366021037101746, "beta_dpo/beta_margin_mean": 0.7245399951934814, "beta_dpo/beta_margin_std": 1.604691743850708, "beta_dpo/beta_used": 0.07242181897163391, "beta_dpo/beta_used_raw": 0.0063858553767204285, "beta_dpo/gap_mean": 7.986859321594238, "beta_dpo/gap_std": 13.46160888671875, "beta_dpo/loss_margin_mean": 8.63148021697998, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6092214663643235, "grad_norm": 19.193462371826172, "learning_rate": 2.0009323437965898e-07, "logits/chosen": 1.312724232673645, "logits/rejected": 1.2132437229156494, "loss": 1.1025, "step": 403 }, { "beta_dpo/beta": 0.18393541872501373, "beta_dpo/beta_margin_grad_mean": -0.30706873536109924, "beta_dpo/beta_margin_grad_std": 0.27825888991355896, "beta_dpo/beta_margin_mean": 1.7985137701034546, "beta_dpo/beta_margin_std": 2.601863145828247, "beta_dpo/beta_used": 0.18393541872501373, "beta_dpo/beta_used_raw": 0.18393541872501373, "beta_dpo/gap_mean": 8.376396179199219, "beta_dpo/gap_std": 13.487390518188477, "beta_dpo/loss_margin_mean": 9.271535873413086, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6107331821617535, "grad_norm": 37.4723014831543, "learning_rate": 1.9879833298370237e-07, "logits/chosen": 1.2238993644714355, "logits/rejected": 1.1487646102905273, "loss": 0.825, "step": 404 }, { "beta_dpo/beta": 0.06863485276699066, "beta_dpo/beta_margin_grad_mean": -0.383706271648407, "beta_dpo/beta_margin_grad_std": 0.1839965283870697, "beta_dpo/beta_margin_mean": 0.762927234172821, "beta_dpo/beta_margin_std": 1.382511019706726, "beta_dpo/beta_used": 0.06863485276699066, "beta_dpo/beta_used_raw": -0.03406190127134323, "beta_dpo/gap_mean": 8.272315979003906, "beta_dpo/gap_std": 13.200721740722656, "beta_dpo/loss_margin_mean": 6.787307262420654, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6122448979591837, "grad_norm": 18.98505401611328, "learning_rate": 1.975048638084379e-07, "logits/chosen": 1.0490002632141113, "logits/rejected": 1.010096788406372, "loss": 1.0575, "step": 405 }, { "beta_dpo/beta": 0.2777172923088074, "beta_dpo/beta_margin_grad_mean": -0.2499067783355713, "beta_dpo/beta_margin_grad_std": 0.3056153953075409, "beta_dpo/beta_margin_mean": 2.591289758682251, "beta_dpo/beta_margin_std": 4.360300064086914, "beta_dpo/beta_used": 0.2777172923088074, "beta_dpo/beta_used_raw": 0.2777172923088074, "beta_dpo/gap_mean": 8.169546127319336, "beta_dpo/gap_std": 13.291061401367188, "beta_dpo/loss_margin_mean": 9.693116188049316, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6137566137566137, "grad_norm": 27.147951126098633, "learning_rate": 1.9621286303497914e-07, "logits/chosen": 1.38291335105896, "logits/rejected": 1.1928125619888306, "loss": 0.4666, "step": 406 }, { "beta_dpo/beta": 0.09770042449235916, "beta_dpo/beta_margin_grad_mean": -0.3965763449668884, "beta_dpo/beta_margin_grad_std": 0.2294262796640396, "beta_dpo/beta_margin_mean": 0.648348331451416, "beta_dpo/beta_margin_std": 1.6382852792739868, "beta_dpo/beta_used": 0.09770042449235916, "beta_dpo/beta_used_raw": 0.09770042449235916, "beta_dpo/gap_mean": 7.964360237121582, "beta_dpo/gap_std": 13.302556991577148, "beta_dpo/loss_margin_mean": 6.1560211181640625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6152683295540439, "grad_norm": 26.721057891845703, "learning_rate": 1.9492236680336483e-07, "logits/chosen": 1.1335489749908447, "logits/rejected": 0.9993535280227661, "loss": 0.9793, "step": 407 }, { "beta_dpo/beta": 0.03132964298129082, "beta_dpo/beta_margin_grad_mean": -0.42250344157218933, "beta_dpo/beta_margin_grad_std": 0.13585536181926727, "beta_dpo/beta_margin_mean": 0.3733391761779785, "beta_dpo/beta_margin_std": 0.6798368096351624, "beta_dpo/beta_used": 0.03132964298129082, "beta_dpo/beta_used_raw": -0.010662967339158058, "beta_dpo/gap_mean": 8.06380844116211, "beta_dpo/gap_std": 13.13924503326416, "beta_dpo/loss_margin_mean": 9.731854438781738, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6167800453514739, "grad_norm": 9.80321216583252, "learning_rate": 1.9363341121154895e-07, "logits/chosen": 1.1080222129821777, "logits/rejected": 0.993887186050415, "loss": 1.2241, "step": 408 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49895596504211426, "beta_dpo/beta_margin_grad_std": 0.0032634278759360313, "beta_dpo/beta_margin_mean": 0.0041763512417674065, "beta_dpo/beta_margin_std": 0.013054397888481617, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.13152112066745758, "beta_dpo/gap_mean": 7.688338756561279, "beta_dpo/gap_std": 13.180634498596191, "beta_dpo/loss_margin_mean": 4.1763505935668945, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.618291761148904, "grad_norm": 0.39368870854377747, "learning_rate": 1.9234603231438994e-07, "logits/chosen": 1.1868549585342407, "logits/rejected": 1.1779615879058838, "loss": 1.3825, "step": 409 }, { "beta_dpo/beta": 0.06593397259712219, "beta_dpo/beta_margin_grad_mean": -0.3910011649131775, "beta_dpo/beta_margin_grad_std": 0.18957333266735077, "beta_dpo/beta_margin_mean": 0.7101063132286072, "beta_dpo/beta_margin_std": 1.3572521209716797, "beta_dpo/beta_used": 0.06593397259712219, "beta_dpo/beta_used_raw": 0.02093401923775673, "beta_dpo/gap_mean": 7.732787132263184, "beta_dpo/gap_std": 13.139517784118652, "beta_dpo/loss_margin_mean": 8.426246643066406, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6198034769463341, "grad_norm": 23.81303596496582, "learning_rate": 1.9106026612264315e-07, "logits/chosen": 0.9190754890441895, "logits/rejected": 0.8898967504501343, "loss": 1.1263, "step": 410 }, { "beta_dpo/beta": 0.09488093107938766, "beta_dpo/beta_margin_grad_mean": -0.38552436232566833, "beta_dpo/beta_margin_grad_std": 0.23449452221393585, "beta_dpo/beta_margin_mean": 0.7124552130699158, "beta_dpo/beta_margin_std": 1.548302412033081, "beta_dpo/beta_used": 0.09488093107938766, "beta_dpo/beta_used_raw": 0.09488093107938766, "beta_dpo/gap_mean": 7.6596527099609375, "beta_dpo/gap_std": 13.327152252197266, "beta_dpo/loss_margin_mean": 8.116888046264648, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6213151927437641, "grad_norm": 26.718870162963867, "learning_rate": 1.8977614860195296e-07, "logits/chosen": 1.1888914108276367, "logits/rejected": 1.1344188451766968, "loss": 1.0957, "step": 411 }, { "beta_dpo/beta": 0.0628020390868187, "beta_dpo/beta_margin_grad_mean": -0.415872186422348, "beta_dpo/beta_margin_grad_std": 0.18325024843215942, "beta_dpo/beta_margin_mean": 0.4076176881790161, "beta_dpo/beta_margin_std": 0.8952716588973999, "beta_dpo/beta_used": 0.0628020390868187, "beta_dpo/beta_used_raw": 0.0628020390868187, "beta_dpo/gap_mean": 7.711529731750488, "beta_dpo/gap_std": 13.572291374206543, "beta_dpo/loss_margin_mean": 6.901672840118408, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6228269085411943, "grad_norm": 16.974454879760742, "learning_rate": 1.8849371567184662e-07, "logits/chosen": 1.0629103183746338, "logits/rejected": 1.0026750564575195, "loss": 1.0983, "step": 412 }, { "beta_dpo/beta": 0.02931269444525242, "beta_dpo/beta_margin_grad_mean": -0.46073606610298157, "beta_dpo/beta_margin_grad_std": 0.13029100000858307, "beta_dpo/beta_margin_mean": 0.1788182109594345, "beta_dpo/beta_margin_std": 0.6013691425323486, "beta_dpo/beta_used": 0.02931269444525242, "beta_dpo/beta_used_raw": -0.01891515776515007, "beta_dpo/gap_mean": 7.34831428527832, "beta_dpo/gap_std": 13.715509414672852, "beta_dpo/loss_margin_mean": 6.074856281280518, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6243386243386243, "grad_norm": 10.13986873626709, "learning_rate": 1.872130032047302e-07, "logits/chosen": 1.0452518463134766, "logits/rejected": 0.9700994491577148, "loss": 1.2518, "step": 413 }, { "beta_dpo/beta": 0.008443448692560196, "beta_dpo/beta_margin_grad_mean": -0.4799124598503113, "beta_dpo/beta_margin_grad_std": 0.04836396127939224, "beta_dpo/beta_margin_mean": 0.08206567168235779, "beta_dpo/beta_margin_std": 0.19825638830661774, "beta_dpo/beta_used": 0.008443448692560196, "beta_dpo/beta_used_raw": -0.0005194572731852531, "beta_dpo/gap_mean": 7.575908184051514, "beta_dpo/gap_std": 13.915918350219727, "beta_dpo/loss_margin_mean": 9.079741477966309, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6258503401360545, "grad_norm": 2.957254648208618, "learning_rate": 1.8593404702488436e-07, "logits/chosen": 1.1927499771118164, "logits/rejected": 1.1158475875854492, "loss": 1.3398, "step": 414 }, { "beta_dpo/beta": 0.153883695602417, "beta_dpo/beta_margin_grad_mean": -0.3574841320514679, "beta_dpo/beta_margin_grad_std": 0.30664384365081787, "beta_dpo/beta_margin_mean": 1.2643290758132935, "beta_dpo/beta_margin_std": 2.7988193035125732, "beta_dpo/beta_used": 0.153883695602417, "beta_dpo/beta_used_raw": 0.153883695602417, "beta_dpo/gap_mean": 7.751776695251465, "beta_dpo/gap_std": 14.118085861206055, "beta_dpo/loss_margin_mean": 8.410996437072754, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6273620559334845, "grad_norm": 49.67338180541992, "learning_rate": 1.846568829074628e-07, "logits/chosen": 1.3675744533538818, "logits/rejected": 1.315309762954712, "loss": 1.0424, "step": 415 }, { "beta_dpo/beta": 0.057433560490608215, "beta_dpo/beta_margin_grad_mean": -0.4372212290763855, "beta_dpo/beta_margin_grad_std": 0.18154680728912354, "beta_dpo/beta_margin_mean": 0.38836348056793213, "beta_dpo/beta_margin_std": 1.0910459756851196, "beta_dpo/beta_used": 0.057433560490608215, "beta_dpo/beta_used_raw": 0.006889820098876953, "beta_dpo/gap_mean": 7.063965797424316, "beta_dpo/gap_std": 13.99463939666748, "beta_dpo/loss_margin_mean": 3.8573639392852783, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6288737717309146, "grad_norm": 19.25937271118164, "learning_rate": 1.8338154657749128e-07, "logits/chosen": 1.1650896072387695, "logits/rejected": 1.077005386352539, "loss": 1.227, "step": 416 }, { "beta_dpo/beta": 0.16283152997493744, "beta_dpo/beta_margin_grad_mean": -0.29045432806015015, "beta_dpo/beta_margin_grad_std": 0.2815614640712738, "beta_dpo/beta_margin_mean": 1.6562851667404175, "beta_dpo/beta_margin_std": 2.3946468830108643, "beta_dpo/beta_used": 0.16283152997493744, "beta_dpo/beta_used_raw": 0.16283152997493744, "beta_dpo/gap_mean": 7.376601219177246, "beta_dpo/gap_std": 14.058598518371582, "beta_dpo/loss_margin_mean": 9.845354080200195, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6303854875283447, "grad_norm": 47.979278564453125, "learning_rate": 1.8210807370886849e-07, "logits/chosen": 1.4944508075714111, "logits/rejected": 1.4134340286254883, "loss": 0.9838, "step": 417 }, { "beta_dpo/beta": 0.0036787008866667747, "beta_dpo/beta_margin_grad_mean": -0.4948444366455078, "beta_dpo/beta_margin_grad_std": 0.016248730942606926, "beta_dpo/beta_margin_mean": 0.020664792507886887, "beta_dpo/beta_margin_std": 0.06512568145990372, "beta_dpo/beta_used": 0.0036787008866667747, "beta_dpo/beta_used_raw": -0.04231725633144379, "beta_dpo/gap_mean": 7.137986183166504, "beta_dpo/gap_std": 14.092864036560059, "beta_dpo/loss_margin_mean": 3.5258655548095703, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6318972033257747, "grad_norm": 1.5100537538528442, "learning_rate": 1.8083649992336825e-07, "logits/chosen": 1.3490641117095947, "logits/rejected": 1.325473666191101, "loss": 1.3665, "step": 418 }, { "beta_dpo/beta": 0.1923871785402298, "beta_dpo/beta_margin_grad_mean": -0.2901119589805603, "beta_dpo/beta_margin_grad_std": 0.25045278668403625, "beta_dpo/beta_margin_mean": 2.2992641925811768, "beta_dpo/beta_margin_std": 3.499342679977417, "beta_dpo/beta_used": 0.1923871785402298, "beta_dpo/beta_used_raw": 0.1923871785402298, "beta_dpo/gap_mean": 7.463541507720947, "beta_dpo/gap_std": 14.002967834472656, "beta_dpo/loss_margin_mean": 10.991048812866211, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6334089191232048, "grad_norm": 33.13517761230469, "learning_rate": 1.7956686078964255e-07, "logits/chosen": 1.0279557704925537, "logits/rejected": 0.9559741020202637, "loss": 0.8391, "step": 419 }, { "beta_dpo/beta": 0.06206429749727249, "beta_dpo/beta_margin_grad_mean": -0.4446491003036499, "beta_dpo/beta_margin_grad_std": 0.2273784577846527, "beta_dpo/beta_margin_mean": 0.3325929343700409, "beta_dpo/beta_margin_std": 1.5433344841003418, "beta_dpo/beta_used": 0.06206429749727249, "beta_dpo/beta_used_raw": 0.0054812245070934296, "beta_dpo/gap_mean": 7.510132789611816, "beta_dpo/gap_std": 14.362464904785156, "beta_dpo/loss_margin_mean": 6.678454399108887, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6349206349206349, "grad_norm": 20.000598907470703, "learning_rate": 1.782991918222275e-07, "logits/chosen": 1.1560618877410889, "logits/rejected": 1.063835620880127, "loss": 1.2286, "step": 420 }, { "beta_dpo/beta": 0.2239060401916504, "beta_dpo/beta_margin_grad_mean": -0.3560366630554199, "beta_dpo/beta_margin_grad_std": 0.3599107563495636, "beta_dpo/beta_margin_mean": 1.437946081161499, "beta_dpo/beta_margin_std": 3.8129684925079346, "beta_dpo/beta_used": 0.2239060401916504, "beta_dpo/beta_used_raw": 0.2239060401916504, "beta_dpo/gap_mean": 7.295876502990723, "beta_dpo/gap_std": 14.796440124511719, "beta_dpo/loss_margin_mean": 6.508144378662109, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.636432350718065, "grad_norm": 64.45843505859375, "learning_rate": 1.7703352848054887e-07, "logits/chosen": 1.0824806690216064, "logits/rejected": 0.9518294334411621, "loss": 1.1997, "step": 421 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49806472659111023, "beta_dpo/beta_margin_grad_std": 0.00356177962385118, "beta_dpo/beta_margin_mean": 0.0077416617423295975, "beta_dpo/beta_margin_std": 0.014248386025428772, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.10803447663784027, "beta_dpo/gap_mean": 7.264566898345947, "beta_dpo/gap_std": 14.936932563781738, "beta_dpo/loss_margin_mean": 7.741661071777344, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6379440665154951, "grad_norm": 0.3464619219303131, "learning_rate": 1.7576990616793137e-07, "logits/chosen": 1.0727565288543701, "logits/rejected": 1.0302057266235352, "loss": 1.3825, "step": 422 }, { "beta_dpo/beta": 0.15241782367229462, "beta_dpo/beta_margin_grad_mean": -0.32938140630722046, "beta_dpo/beta_margin_grad_std": 0.2497241348028183, "beta_dpo/beta_margin_mean": 1.409651279449463, "beta_dpo/beta_margin_std": 2.4026806354522705, "beta_dpo/beta_used": 0.15241782367229462, "beta_dpo/beta_used_raw": 0.15241782367229462, "beta_dpo/gap_mean": 7.592950344085693, "beta_dpo/gap_std": 14.759190559387207, "beta_dpo/loss_margin_mean": 8.833235740661621, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6394557823129252, "grad_norm": 33.850650787353516, "learning_rate": 1.745083602306071e-07, "logits/chosen": 1.3902552127838135, "logits/rejected": 1.2815285921096802, "loss": 0.868, "step": 423 }, { "beta_dpo/beta": 0.10763978958129883, "beta_dpo/beta_margin_grad_mean": -0.3627639710903168, "beta_dpo/beta_margin_grad_std": 0.23971767723560333, "beta_dpo/beta_margin_mean": 0.8668607473373413, "beta_dpo/beta_margin_std": 1.660807728767395, "beta_dpo/beta_used": 0.10763978958129883, "beta_dpo/beta_used_raw": 0.10763978958129883, "beta_dpo/gap_mean": 7.692775726318359, "beta_dpo/gap_std": 14.601524353027344, "beta_dpo/loss_margin_mean": 8.42215633392334, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6409674981103552, "grad_norm": 24.332290649414062, "learning_rate": 1.7324892595672804e-07, "logits/chosen": 1.3804824352264404, "logits/rejected": 1.353201150894165, "loss": 0.9775, "step": 424 }, { "beta_dpo/beta": 0.22863678634166718, "beta_dpo/beta_margin_grad_mean": -0.2999263405799866, "beta_dpo/beta_margin_grad_std": 0.3022958040237427, "beta_dpo/beta_margin_mean": 2.481389045715332, "beta_dpo/beta_margin_std": 3.8900444507598877, "beta_dpo/beta_used": 0.22863678634166718, "beta_dpo/beta_used_raw": 0.22863678634166718, "beta_dpo/gap_mean": 8.06348705291748, "beta_dpo/gap_std": 14.56132698059082, "beta_dpo/loss_margin_mean": 10.167325019836426, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6424792139077853, "grad_norm": 64.42094421386719, "learning_rate": 1.7199163857537824e-07, "logits/chosen": 1.2863309383392334, "logits/rejected": 1.2393105030059814, "loss": 1.0282, "step": 425 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4992324113845825, "beta_dpo/beta_margin_grad_std": 0.004430633503943682, "beta_dpo/beta_margin_mean": 0.0030706829857081175, "beta_dpo/beta_margin_std": 0.01772434450685978, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.1351933777332306, "beta_dpo/gap_mean": 7.544498443603516, "beta_dpo/gap_std": 14.976876258850098, "beta_dpo/loss_margin_mean": 3.0706827640533447, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6439909297052154, "grad_norm": 0.3542765974998474, "learning_rate": 1.7073653325558828e-07, "logits/chosen": 1.4472196102142334, "logits/rejected": 1.4191794395446777, "loss": 1.3827, "step": 426 }, { "beta_dpo/beta": 0.033856652677059174, "beta_dpo/beta_margin_grad_mean": -0.4432615637779236, "beta_dpo/beta_margin_grad_std": 0.1333458572626114, "beta_dpo/beta_margin_mean": 0.2626633048057556, "beta_dpo/beta_margin_std": 0.6418102383613586, "beta_dpo/beta_used": 0.033856652677059174, "beta_dpo/beta_used_raw": 0.033856652677059174, "beta_dpo/gap_mean": 7.30242919921875, "beta_dpo/gap_std": 15.157771110534668, "beta_dpo/loss_margin_mean": 7.709880828857422, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6455026455026455, "grad_norm": 11.39405632019043, "learning_rate": 1.6948364510535218e-07, "logits/chosen": 1.1545445919036865, "logits/rejected": 1.0615546703338623, "loss": 1.2381, "step": 427 }, { "beta_dpo/beta": 0.17536821961402893, "beta_dpo/beta_margin_grad_mean": -0.32765766978263855, "beta_dpo/beta_margin_grad_std": 0.29305100440979004, "beta_dpo/beta_margin_mean": 1.2307204008102417, "beta_dpo/beta_margin_std": 2.660304069519043, "beta_dpo/beta_used": 0.17536821961402893, "beta_dpo/beta_used_raw": 0.17536821961402893, "beta_dpo/gap_mean": 7.2855329513549805, "beta_dpo/gap_std": 15.077669143676758, "beta_dpo/loss_margin_mean": 7.104245185852051, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6470143613000756, "grad_norm": 45.964027404785156, "learning_rate": 1.6823300917064458e-07, "logits/chosen": 1.4189622402191162, "logits/rejected": 1.3559203147888184, "loss": 0.8662, "step": 428 }, { "beta_dpo/beta": 0.25602617859840393, "beta_dpo/beta_margin_grad_mean": -0.2635319232940674, "beta_dpo/beta_margin_grad_std": 0.33962488174438477, "beta_dpo/beta_margin_mean": 2.2876291275024414, "beta_dpo/beta_margin_std": 3.774615526199341, "beta_dpo/beta_used": 0.25602617859840393, "beta_dpo/beta_used_raw": 0.25602617859840393, "beta_dpo/gap_mean": 7.527216911315918, "beta_dpo/gap_std": 14.919697761535645, "beta_dpo/loss_margin_mean": 8.933639526367188, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6485260770975056, "grad_norm": 61.03815460205078, "learning_rate": 1.669846604344412e-07, "logits/chosen": 1.4187898635864258, "logits/rejected": 1.4321041107177734, "loss": 0.9074, "step": 429 }, { "beta_dpo/beta": 0.2114437073469162, "beta_dpo/beta_margin_grad_mean": -0.2747531235218048, "beta_dpo/beta_margin_grad_std": 0.2975596785545349, "beta_dpo/beta_margin_mean": 2.024601697921753, "beta_dpo/beta_margin_std": 2.9273641109466553, "beta_dpo/beta_used": 0.2114437073469162, "beta_dpo/beta_used_raw": 0.2114437073469162, "beta_dpo/gap_mean": 7.846027374267578, "beta_dpo/gap_std": 14.999692916870117, "beta_dpo/loss_margin_mean": 9.444135665893555, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6500377928949358, "grad_norm": 42.52581024169922, "learning_rate": 1.6573863381573954e-07, "logits/chosen": 1.240324854850769, "logits/rejected": 1.2547059059143066, "loss": 0.7219, "step": 430 }, { "beta_dpo/beta": 0.10541002452373505, "beta_dpo/beta_margin_grad_mean": -0.3474757969379425, "beta_dpo/beta_margin_grad_std": 0.25322234630584717, "beta_dpo/beta_margin_mean": 0.8967947363853455, "beta_dpo/beta_margin_std": 1.6649945974349976, "beta_dpo/beta_used": 0.10541002452373505, "beta_dpo/beta_used_raw": 0.10541002452373505, "beta_dpo/gap_mean": 8.028791427612305, "beta_dpo/gap_std": 14.900115013122559, "beta_dpo/loss_margin_mean": 8.783801078796387, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6515495086923658, "grad_norm": 24.540010452270508, "learning_rate": 1.6449496416858282e-07, "logits/chosen": 1.1816718578338623, "logits/rejected": 1.1253862380981445, "loss": 0.9805, "step": 431 }, { "beta_dpo/beta": 0.18534046411514282, "beta_dpo/beta_margin_grad_mean": -0.3596099615097046, "beta_dpo/beta_margin_grad_std": 0.33429720997810364, "beta_dpo/beta_margin_mean": 1.294745683670044, "beta_dpo/beta_margin_std": 3.190850257873535, "beta_dpo/beta_used": 0.18534046411514282, "beta_dpo/beta_used_raw": 0.18534046411514282, "beta_dpo/gap_mean": 8.034162521362305, "beta_dpo/gap_std": 15.062065124511719, "beta_dpo/loss_margin_mean": 7.130716800689697, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6530612244897959, "grad_norm": 48.48855972290039, "learning_rate": 1.632536862810844e-07, "logits/chosen": 1.2895960807800293, "logits/rejected": 1.258962869644165, "loss": 0.9771, "step": 432 }, { "beta_dpo/beta": 0.15797749161720276, "beta_dpo/beta_margin_grad_mean": -0.31328731775283813, "beta_dpo/beta_margin_grad_std": 0.3253113925457001, "beta_dpo/beta_margin_mean": 1.807498574256897, "beta_dpo/beta_margin_std": 3.004568576812744, "beta_dpo/beta_used": 0.15797749161720276, "beta_dpo/beta_used_raw": 0.15797749161720276, "beta_dpo/gap_mean": 8.159663200378418, "beta_dpo/gap_std": 15.607994079589844, "beta_dpo/loss_margin_mean": 10.39791488647461, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.654572940287226, "grad_norm": 48.692752838134766, "learning_rate": 1.6201483487445515e-07, "logits/chosen": 1.429356336593628, "logits/rejected": 1.3947436809539795, "loss": 1.14, "step": 433 }, { "beta_dpo/beta": 0.2732776999473572, "beta_dpo/beta_margin_grad_mean": -0.2960241138935089, "beta_dpo/beta_margin_grad_std": 0.3341895043849945, "beta_dpo/beta_margin_mean": 3.3064119815826416, "beta_dpo/beta_margin_std": 5.642823219299316, "beta_dpo/beta_used": 0.2732776999473572, "beta_dpo/beta_used_raw": 0.2732776999473572, "beta_dpo/gap_mean": 8.857320785522461, "beta_dpo/gap_std": 15.690851211547852, "beta_dpo/loss_margin_mean": 10.634892463684082, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.656084656084656, "grad_norm": 73.32202911376953, "learning_rate": 1.6077844460203204e-07, "logits/chosen": 1.5748176574707031, "logits/rejected": 1.4792296886444092, "loss": 0.9697, "step": 434 }, { "beta_dpo/beta": 0.09363246709108353, "beta_dpo/beta_margin_grad_mean": -0.3969593346118927, "beta_dpo/beta_margin_grad_std": 0.24074605107307434, "beta_dpo/beta_margin_mean": 0.8078661561012268, "beta_dpo/beta_margin_std": 2.0362329483032227, "beta_dpo/beta_used": 0.09363246709108353, "beta_dpo/beta_used_raw": 0.09363246709108353, "beta_dpo/gap_mean": 8.576042175292969, "beta_dpo/gap_std": 15.559226989746094, "beta_dpo/loss_margin_mean": 7.586299896240234, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6575963718820862, "grad_norm": 26.006427764892578, "learning_rate": 1.5954455004830878e-07, "logits/chosen": 1.435976266860962, "logits/rejected": 1.415736198425293, "loss": 1.0994, "step": 435 }, { "beta_dpo/beta": 0.08419980108737946, "beta_dpo/beta_margin_grad_mean": -0.3715381920337677, "beta_dpo/beta_margin_grad_std": 0.24329714477062225, "beta_dpo/beta_margin_mean": 0.6914465427398682, "beta_dpo/beta_margin_std": 1.359487533569336, "beta_dpo/beta_used": 0.08419980108737946, "beta_dpo/beta_used_raw": 0.08419980108737946, "beta_dpo/gap_mean": 8.449478149414062, "beta_dpo/gap_std": 15.672661781311035, "beta_dpo/loss_margin_mean": 8.278998374938965, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6591080876795162, "grad_norm": 23.006898880004883, "learning_rate": 1.5831318572796847e-07, "logits/chosen": 1.3777055740356445, "logits/rejected": 1.3134238719940186, "loss": 1.0802, "step": 436 }, { "beta_dpo/beta": 0.1277225762605667, "beta_dpo/beta_margin_grad_mean": -0.3472208082675934, "beta_dpo/beta_margin_grad_std": 0.2929202914237976, "beta_dpo/beta_margin_mean": 1.1181834936141968, "beta_dpo/beta_margin_std": 2.1439664363861084, "beta_dpo/beta_used": 0.1277225762605667, "beta_dpo/beta_used_raw": 0.1277225762605667, "beta_dpo/gap_mean": 8.755277633666992, "beta_dpo/gap_std": 15.802358627319336, "beta_dpo/loss_margin_mean": 8.655381202697754, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6606198034769464, "grad_norm": 32.87360382080078, "learning_rate": 1.5708438608491815e-07, "logits/chosen": 1.4010035991668701, "logits/rejected": 1.249039888381958, "loss": 0.9412, "step": 437 }, { "beta_dpo/beta": 0.1653028279542923, "beta_dpo/beta_margin_grad_mean": -0.3463587164878845, "beta_dpo/beta_margin_grad_std": 0.25206202268600464, "beta_dpo/beta_margin_mean": 2.2152247428894043, "beta_dpo/beta_margin_std": 3.8490896224975586, "beta_dpo/beta_used": 0.1653028279542923, "beta_dpo/beta_used_raw": 0.10271503031253815, "beta_dpo/gap_mean": 8.468579292297363, "beta_dpo/gap_std": 15.612485885620117, "beta_dpo/loss_margin_mean": 9.343290328979492, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6621315192743764, "grad_norm": 51.89805603027344, "learning_rate": 1.558581854913253e-07, "logits/chosen": 1.3848375082015991, "logits/rejected": 1.3263285160064697, "loss": 1.0567, "step": 438 }, { "beta_dpo/beta": 0.1043519601225853, "beta_dpo/beta_margin_grad_mean": -0.36386561393737793, "beta_dpo/beta_margin_grad_std": 0.23430514335632324, "beta_dpo/beta_margin_mean": 1.2251368761062622, "beta_dpo/beta_margin_std": 2.3316707611083984, "beta_dpo/beta_used": 0.1043519601225853, "beta_dpo/beta_used_raw": 0.1043519601225853, "beta_dpo/gap_mean": 8.62759780883789, "beta_dpo/gap_std": 15.364070892333984, "beta_dpo/loss_margin_mean": 8.964755058288574, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6636432350718064, "grad_norm": 31.003793716430664, "learning_rate": 1.5463461824665658e-07, "logits/chosen": 1.3128708600997925, "logits/rejected": 1.2499792575836182, "loss": 1.0547, "step": 439 }, { "beta_dpo/beta": 0.12300290167331696, "beta_dpo/beta_margin_grad_mean": -0.2966170907020569, "beta_dpo/beta_margin_grad_std": 0.21793873608112335, "beta_dpo/beta_margin_mean": 1.7257822751998901, "beta_dpo/beta_margin_std": 2.4124960899353027, "beta_dpo/beta_used": 0.12300290167331696, "beta_dpo/beta_used_raw": 0.12300290167331696, "beta_dpo/gap_mean": 9.290130615234375, "beta_dpo/gap_std": 15.132123947143555, "beta_dpo/loss_margin_mean": 12.742350578308105, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6651549508692366, "grad_norm": 30.541147232055664, "learning_rate": 1.534137185767178e-07, "logits/chosen": 1.3556729555130005, "logits/rejected": 1.211385726928711, "loss": 0.918, "step": 440 }, { "beta_dpo/beta": 0.07583055645227432, "beta_dpo/beta_margin_grad_mean": -0.38547220826148987, "beta_dpo/beta_margin_grad_std": 0.2417447417974472, "beta_dpo/beta_margin_mean": 0.9196439385414124, "beta_dpo/beta_margin_std": 1.828843355178833, "beta_dpo/beta_used": 0.07583055645227432, "beta_dpo/beta_used_raw": -0.05472517013549805, "beta_dpo/gap_mean": 9.688538551330566, "beta_dpo/gap_std": 14.977852821350098, "beta_dpo/loss_margin_mean": 9.575737953186035, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6666666666666666, "grad_norm": 22.2325382232666, "learning_rate": 1.521955206326976e-07, "logits/chosen": 1.3583239316940308, "logits/rejected": 1.2205604314804077, "loss": 1.1443, "step": 441 }, { "beta_dpo/beta": 0.10827788710594177, "beta_dpo/beta_margin_grad_mean": -0.3940413296222687, "beta_dpo/beta_margin_grad_std": 0.24407008290290833, "beta_dpo/beta_margin_mean": 0.9771229028701782, "beta_dpo/beta_margin_std": 2.162266969680786, "beta_dpo/beta_used": 0.10827788710594177, "beta_dpo/beta_used_raw": 0.02357833832502365, "beta_dpo/gap_mean": 9.418302536010742, "beta_dpo/gap_std": 14.739940643310547, "beta_dpo/loss_margin_mean": 8.74870491027832, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6681783824640968, "grad_norm": 37.141815185546875, "learning_rate": 1.5098005849021078e-07, "logits/chosen": 1.0895839929580688, "logits/rejected": 1.0859336853027344, "loss": 0.9904, "step": 442 }, { "beta_dpo/beta": 0.07059619575738907, "beta_dpo/beta_margin_grad_mean": -0.40955042839050293, "beta_dpo/beta_margin_grad_std": 0.22130858898162842, "beta_dpo/beta_margin_mean": 0.7329308986663818, "beta_dpo/beta_margin_std": 1.8423486948013306, "beta_dpo/beta_used": 0.07059619575738907, "beta_dpo/beta_used_raw": 0.0686114951968193, "beta_dpo/gap_mean": 9.67289924621582, "beta_dpo/gap_std": 14.846346855163574, "beta_dpo/loss_margin_mean": 11.07690143585205, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6696900982615268, "grad_norm": 20.315444946289062, "learning_rate": 1.4976736614834662e-07, "logits/chosen": 1.119490146636963, "logits/rejected": 1.0487221479415894, "loss": 1.1261, "step": 443 }, { "beta_dpo/beta": 0.01301487721502781, "beta_dpo/beta_margin_grad_mean": -0.4735754728317261, "beta_dpo/beta_margin_grad_std": 0.06174745038151741, "beta_dpo/beta_margin_mean": 0.10920752584934235, "beta_dpo/beta_margin_std": 0.2566579580307007, "beta_dpo/beta_used": 0.01301487721502781, "beta_dpo/beta_used_raw": -0.10069774836301804, "beta_dpo/gap_mean": 9.321052551269531, "beta_dpo/gap_std": 14.88135814666748, "beta_dpo/loss_margin_mean": 6.3398823738098145, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.671201814058957, "grad_norm": 4.826290607452393, "learning_rate": 1.4855747752871654e-07, "logits/chosen": 1.2667855024337769, "logits/rejected": 1.1672453880310059, "loss": 1.294, "step": 444 }, { "beta_dpo/beta": 0.12245304882526398, "beta_dpo/beta_margin_grad_mean": -0.3227024972438812, "beta_dpo/beta_margin_grad_std": 0.23704025149345398, "beta_dpo/beta_margin_mean": 1.4407753944396973, "beta_dpo/beta_margin_std": 2.2449750900268555, "beta_dpo/beta_used": 0.12245304882526398, "beta_dpo/beta_used_raw": 0.12245304882526398, "beta_dpo/gap_mean": 9.358579635620117, "beta_dpo/gap_std": 14.765151023864746, "beta_dpo/loss_margin_mean": 10.7389554977417, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.672713529856387, "grad_norm": 32.277809143066406, "learning_rate": 1.473504264745062e-07, "logits/chosen": 1.4639275074005127, "logits/rejected": 1.4567673206329346, "loss": 0.9672, "step": 445 }, { "beta_dpo/beta": 0.23159979283809662, "beta_dpo/beta_margin_grad_mean": -0.2718433737754822, "beta_dpo/beta_margin_grad_std": 0.2508421540260315, "beta_dpo/beta_margin_mean": 3.571117401123047, "beta_dpo/beta_margin_std": 4.958829402923584, "beta_dpo/beta_used": 0.23159979283809662, "beta_dpo/beta_used_raw": 0.23159979283809662, "beta_dpo/gap_mean": 9.482922554016113, "beta_dpo/gap_std": 14.227436065673828, "beta_dpo/loss_margin_mean": 11.683351516723633, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.674225245653817, "grad_norm": 44.217838287353516, "learning_rate": 1.461462467495284e-07, "logits/chosen": 1.1846120357513428, "logits/rejected": 1.1139745712280273, "loss": 0.8702, "step": 446 }, { "beta_dpo/beta": 0.04251420870423317, "beta_dpo/beta_margin_grad_mean": -0.4159136116504669, "beta_dpo/beta_margin_grad_std": 0.14413373172283173, "beta_dpo/beta_margin_mean": 0.3935755789279938, "beta_dpo/beta_margin_std": 0.6931925415992737, "beta_dpo/beta_used": 0.04251420870423317, "beta_dpo/beta_used_raw": 0.04251420870423317, "beta_dpo/gap_mean": 9.748671531677246, "beta_dpo/gap_std": 14.281017303466797, "beta_dpo/loss_margin_mean": 9.072275161743164, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6757369614512472, "grad_norm": 13.691399574279785, "learning_rate": 1.4494497203727843e-07, "logits/chosen": 1.2142938375473022, "logits/rejected": 1.043416976928711, "loss": 1.1222, "step": 447 }, { "beta_dpo/beta": 0.08456003665924072, "beta_dpo/beta_margin_grad_mean": -0.3609682023525238, "beta_dpo/beta_margin_grad_std": 0.21768730878829956, "beta_dpo/beta_margin_mean": 0.764079213142395, "beta_dpo/beta_margin_std": 1.2195158004760742, "beta_dpo/beta_used": 0.08456003665924072, "beta_dpo/beta_used_raw": 0.08456003665924072, "beta_dpo/gap_mean": 9.542774200439453, "beta_dpo/gap_std": 14.50007152557373, "beta_dpo/loss_margin_mean": 8.9943265914917, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6772486772486772, "grad_norm": 19.512845993041992, "learning_rate": 1.4374663593999256e-07, "logits/chosen": 1.164783000946045, "logits/rejected": 1.1104214191436768, "loss": 0.9485, "step": 448 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49916499853134155, "beta_dpo/beta_margin_grad_std": 0.003720273729413748, "beta_dpo/beta_margin_mean": 0.0033401332329958677, "beta_dpo/beta_margin_std": 0.014882085844874382, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.14399409294128418, "beta_dpo/gap_mean": 8.810772895812988, "beta_dpo/gap_std": 14.4425687789917, "beta_dpo/loss_margin_mean": 3.3401331901550293, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6787603930461074, "grad_norm": 0.32376447319984436, "learning_rate": 1.4255127197770707e-07, "logits/chosen": 0.9270842671394348, "logits/rejected": 0.9556566476821899, "loss": 1.3816, "step": 449 }, { "beta_dpo/beta": 0.14452773332595825, "beta_dpo/beta_margin_grad_mean": -0.34042176604270935, "beta_dpo/beta_margin_grad_std": 0.3004254698753357, "beta_dpo/beta_margin_mean": 1.0860893726348877, "beta_dpo/beta_margin_std": 2.2014052867889404, "beta_dpo/beta_used": 0.14452773332595825, "beta_dpo/beta_used_raw": 0.14452773332595825, "beta_dpo/gap_mean": 8.172224998474121, "beta_dpo/gap_std": 14.656105041503906, "beta_dpo/loss_margin_mean": 7.372296333312988, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6802721088435374, "grad_norm": 34.80299758911133, "learning_rate": 1.4135891358732205e-07, "logits/chosen": 1.0534474849700928, "logits/rejected": 0.9382642507553101, "loss": 0.9493, "step": 450 }, { "beta_dpo/beta": 0.01695121079683304, "beta_dpo/beta_margin_grad_mean": -0.4738851487636566, "beta_dpo/beta_margin_grad_std": 0.05885161831974983, "beta_dpo/beta_margin_mean": 0.10612621158361435, "beta_dpo/beta_margin_std": 0.23932921886444092, "beta_dpo/beta_used": 0.01695121079683304, "beta_dpo/beta_used_raw": 0.01695121079683304, "beta_dpo/gap_mean": 7.888227462768555, "beta_dpo/gap_std": 14.654131889343262, "beta_dpo/loss_margin_mean": 6.234951019287109, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6817838246409675, "grad_norm": 5.221630096435547, "learning_rate": 1.4016959412166437e-07, "logits/chosen": 1.241917371749878, "logits/rejected": 1.1958801746368408, "loss": 1.2904, "step": 451 }, { "beta_dpo/beta": 0.0702991634607315, "beta_dpo/beta_margin_grad_mean": -0.39360693097114563, "beta_dpo/beta_margin_grad_std": 0.21342332661151886, "beta_dpo/beta_margin_mean": 0.5900993943214417, "beta_dpo/beta_margin_std": 1.2495468854904175, "beta_dpo/beta_used": 0.0702991634607315, "beta_dpo/beta_used_raw": 0.0702991634607315, "beta_dpo/gap_mean": 7.974970817565918, "beta_dpo/gap_std": 14.791309356689453, "beta_dpo/loss_margin_mean": 8.745607376098633, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6832955404383976, "grad_norm": 20.654272079467773, "learning_rate": 1.3898334684855645e-07, "logits/chosen": 0.9901279807090759, "logits/rejected": 0.8948749899864197, "loss": 1.1521, "step": 452 }, { "beta_dpo/beta": 0.047453392297029495, "beta_dpo/beta_margin_grad_mean": -0.4430086612701416, "beta_dpo/beta_margin_grad_std": 0.15654928982257843, "beta_dpo/beta_margin_mean": 0.2556281089782715, "beta_dpo/beta_margin_std": 0.7583603858947754, "beta_dpo/beta_used": 0.047453392297029495, "beta_dpo/beta_used_raw": 0.047453392297029495, "beta_dpo/gap_mean": 7.704753398895264, "beta_dpo/gap_std": 14.739446640014648, "beta_dpo/loss_margin_mean": 6.674213409423828, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6848072562358276, "grad_norm": 13.569918632507324, "learning_rate": 1.3780020494988445e-07, "logits/chosen": 1.1262736320495605, "logits/rejected": 1.0599395036697388, "loss": 1.1781, "step": 453 }, { "beta_dpo/beta": 0.14269126951694489, "beta_dpo/beta_margin_grad_mean": -0.3225584328174591, "beta_dpo/beta_margin_grad_std": 0.25810903310775757, "beta_dpo/beta_margin_mean": 1.4232326745986938, "beta_dpo/beta_margin_std": 2.898785352706909, "beta_dpo/beta_used": 0.14269126951694489, "beta_dpo/beta_used_raw": 0.14269126951694489, "beta_dpo/gap_mean": 8.057350158691406, "beta_dpo/gap_std": 14.961078643798828, "beta_dpo/loss_margin_mean": 9.678709983825684, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6863189720332578, "grad_norm": 25.957433700561523, "learning_rate": 1.366202015206706e-07, "logits/chosen": 1.2365267276763916, "logits/rejected": 1.1713523864746094, "loss": 0.9434, "step": 454 }, { "beta_dpo/beta": 0.11408072710037231, "beta_dpo/beta_margin_grad_mean": -0.3201211988925934, "beta_dpo/beta_margin_grad_std": 0.24275214970111847, "beta_dpo/beta_margin_mean": 1.144683599472046, "beta_dpo/beta_margin_std": 1.7552571296691895, "beta_dpo/beta_used": 0.11408072710037231, "beta_dpo/beta_used_raw": 0.11408072710037231, "beta_dpo/gap_mean": 8.407580375671387, "beta_dpo/gap_std": 14.920415878295898, "beta_dpo/loss_margin_mean": 10.018362045288086, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6878306878306878, "grad_norm": 28.53400993347168, "learning_rate": 1.354433695681474e-07, "logits/chosen": 1.0570340156555176, "logits/rejected": 1.0129978656768799, "loss": 0.8906, "step": 455 }, { "beta_dpo/beta": 0.06565161049365997, "beta_dpo/beta_margin_grad_mean": -0.3941415548324585, "beta_dpo/beta_margin_grad_std": 0.1878412365913391, "beta_dpo/beta_margin_mean": 0.6595984697341919, "beta_dpo/beta_margin_std": 1.2374985218048096, "beta_dpo/beta_used": 0.06565161049365997, "beta_dpo/beta_used_raw": 0.03332943841814995, "beta_dpo/gap_mean": 8.574639320373535, "beta_dpo/gap_std": 14.622611999511719, "beta_dpo/loss_margin_mean": 8.794672012329102, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6893424036281179, "grad_norm": 17.137107849121094, "learning_rate": 1.3426974201083439e-07, "logits/chosen": 1.129429578781128, "logits/rejected": 1.1141908168792725, "loss": 1.065, "step": 456 }, { "beta_dpo/beta": 0.010343266651034355, "beta_dpo/beta_margin_grad_mean": -0.48004212975502014, "beta_dpo/beta_margin_grad_std": 0.044787127524614334, "beta_dpo/beta_margin_mean": 0.0815126895904541, "beta_dpo/beta_margin_std": 0.18423019349575043, "beta_dpo/beta_used": 0.010343266651034355, "beta_dpo/beta_used_raw": -0.03720933198928833, "beta_dpo/gap_mean": 8.280400276184082, "beta_dpo/gap_std": 14.3084077835083, "beta_dpo/loss_margin_mean": 6.3437018394470215, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.690854119425548, "grad_norm": 4.230249404907227, "learning_rate": 1.3309935167761717e-07, "logits/chosen": 1.163110375404358, "logits/rejected": 1.06003737449646, "loss": 1.3216, "step": 457 }, { "beta_dpo/beta": 0.10353825986385345, "beta_dpo/beta_margin_grad_mean": -0.3194752335548401, "beta_dpo/beta_margin_grad_std": 0.2263987511396408, "beta_dpo/beta_margin_mean": 1.070265769958496, "beta_dpo/beta_margin_std": 1.5786514282226562, "beta_dpo/beta_used": 0.10353825986385345, "beta_dpo/beta_used_raw": 0.10353825986385345, "beta_dpo/gap_mean": 8.416302680969238, "beta_dpo/gap_std": 14.113754272460938, "beta_dpo/loss_margin_mean": 10.443164825439453, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6923658352229781, "grad_norm": 25.92171287536621, "learning_rate": 1.3193223130682936e-07, "logits/chosen": 1.3735662698745728, "logits/rejected": 1.2102110385894775, "loss": 1.0459, "step": 458 }, { "beta_dpo/beta": 0.10046427696943283, "beta_dpo/beta_margin_grad_mean": -0.33836182951927185, "beta_dpo/beta_margin_grad_std": 0.20904038846492767, "beta_dpo/beta_margin_mean": 1.4206905364990234, "beta_dpo/beta_margin_std": 2.245626449584961, "beta_dpo/beta_used": 0.10046427696943283, "beta_dpo/beta_used_raw": 0.0645713359117508, "beta_dpo/gap_mean": 8.288217544555664, "beta_dpo/gap_std": 13.640267372131348, "beta_dpo/loss_margin_mean": 8.557767868041992, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6938775510204082, "grad_norm": 23.96784019470215, "learning_rate": 1.3076841354533658e-07, "logits/chosen": 1.3754119873046875, "logits/rejected": 1.3155990839004517, "loss": 0.95, "step": 459 }, { "beta_dpo/beta": 0.096731998026371, "beta_dpo/beta_margin_grad_mean": -0.34345245361328125, "beta_dpo/beta_margin_grad_std": 0.2221241593360901, "beta_dpo/beta_margin_mean": 1.0429019927978516, "beta_dpo/beta_margin_std": 1.7028886079788208, "beta_dpo/beta_used": 0.096731998026371, "beta_dpo/beta_used_raw": 0.096731998026371, "beta_dpo/gap_mean": 8.955610275268555, "beta_dpo/gap_std": 13.669034957885742, "beta_dpo/loss_margin_mean": 10.950597763061523, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6953892668178382, "grad_norm": 26.167713165283203, "learning_rate": 1.2960793094762345e-07, "logits/chosen": 1.0446248054504395, "logits/rejected": 0.9257493019104004, "loss": 0.9978, "step": 460 }, { "beta_dpo/beta": 0.1565985530614853, "beta_dpo/beta_margin_grad_mean": -0.33175206184387207, "beta_dpo/beta_margin_grad_std": 0.2527690827846527, "beta_dpo/beta_margin_mean": 1.9918843507766724, "beta_dpo/beta_margin_std": 3.381699562072754, "beta_dpo/beta_used": 0.1565985530614853, "beta_dpo/beta_used_raw": 0.13997702300548553, "beta_dpo/gap_mean": 9.272143363952637, "beta_dpo/gap_std": 13.67574405670166, "beta_dpo/loss_margin_mean": 9.674901962280273, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6969009826152683, "grad_norm": 33.40471649169922, "learning_rate": 1.2845081597488286e-07, "logits/chosen": 1.3084503412246704, "logits/rejected": 1.2334516048431396, "loss": 0.8777, "step": 461 }, { "beta_dpo/beta": 0.06886428594589233, "beta_dpo/beta_margin_grad_mean": -0.35868188738822937, "beta_dpo/beta_margin_grad_std": 0.1917419135570526, "beta_dpo/beta_margin_mean": 0.7064536213874817, "beta_dpo/beta_margin_std": 0.992012083530426, "beta_dpo/beta_used": 0.06886428594589233, "beta_dpo/beta_used_raw": 0.06886428594589233, "beta_dpo/gap_mean": 9.344978332519531, "beta_dpo/gap_std": 13.81993293762207, "beta_dpo/loss_margin_mean": 10.36449909210205, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6984126984126984, "grad_norm": 19.233129501342773, "learning_rate": 1.27297100994108e-07, "logits/chosen": 1.10292649269104, "logits/rejected": 1.0218987464904785, "loss": 1.0363, "step": 462 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.497945100069046, "beta_dpo/beta_margin_grad_std": 0.0035107722505927086, "beta_dpo/beta_margin_mean": 0.008220227435231209, "beta_dpo/beta_margin_std": 0.014044022187590599, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.06016698479652405, "beta_dpo/gap_mean": 9.277082443237305, "beta_dpo/gap_std": 13.934642791748047, "beta_dpo/loss_margin_mean": 8.220227241516113, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.6999244142101285, "grad_norm": 0.3398761451244354, "learning_rate": 1.2614681827718695e-07, "logits/chosen": 1.186434030532837, "logits/rejected": 1.1574755907058716, "loss": 1.3797, "step": 463 }, { "beta_dpo/beta": 0.09463178366422653, "beta_dpo/beta_margin_grad_mean": -0.3775191009044647, "beta_dpo/beta_margin_grad_std": 0.26763007044792175, "beta_dpo/beta_margin_mean": 0.7947764992713928, "beta_dpo/beta_margin_std": 2.0486159324645996, "beta_dpo/beta_used": 0.09463178366422653, "beta_dpo/beta_used_raw": 0.09463178366422653, "beta_dpo/gap_mean": 9.045785903930664, "beta_dpo/gap_std": 14.590141296386719, "beta_dpo/loss_margin_mean": 9.205949783325195, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7014361300075586, "grad_norm": 26.988676071166992, "learning_rate": 1.2500000000000005e-07, "logits/chosen": 1.324162483215332, "logits/rejected": 1.288191795349121, "loss": 1.0955, "step": 464 }, { "beta_dpo/beta": 0.06897910684347153, "beta_dpo/beta_margin_grad_mean": -0.37157633900642395, "beta_dpo/beta_margin_grad_std": 0.21252475678920746, "beta_dpo/beta_margin_mean": 0.6985571980476379, "beta_dpo/beta_margin_std": 1.129198670387268, "beta_dpo/beta_used": 0.06897910684347153, "beta_dpo/beta_used_raw": 0.06897910684347153, "beta_dpo/gap_mean": 9.353326797485352, "beta_dpo/gap_std": 14.839263916015625, "beta_dpo/loss_margin_mean": 10.117259979248047, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7029478458049887, "grad_norm": 19.14287567138672, "learning_rate": 1.238566782415197e-07, "logits/chosen": 1.4786460399627686, "logits/rejected": 1.394470453262329, "loss": 1.0684, "step": 465 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49872010946273804, "beta_dpo/beta_margin_grad_std": 0.0033063730224967003, "beta_dpo/beta_margin_mean": 0.005119941662997007, "beta_dpo/beta_margin_std": 0.013226281851530075, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.1248481273651123, "beta_dpo/gap_mean": 8.757827758789062, "beta_dpo/gap_std": 14.707776069641113, "beta_dpo/loss_margin_mean": 5.119941711425781, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7044595616024187, "grad_norm": 0.38169077038764954, "learning_rate": 1.2271688498291334e-07, "logits/chosen": 1.1370337009429932, "logits/rejected": 1.1599440574645996, "loss": 1.3813, "step": 466 }, { "beta_dpo/beta": 0.06722957640886307, "beta_dpo/beta_margin_grad_mean": -0.41416195034980774, "beta_dpo/beta_margin_grad_std": 0.20898796617984772, "beta_dpo/beta_margin_mean": 0.6015309691429138, "beta_dpo/beta_margin_std": 1.5544705390930176, "beta_dpo/beta_used": 0.06722957640886307, "beta_dpo/beta_used_raw": 0.01915111020207405, "beta_dpo/gap_mean": 8.545236587524414, "beta_dpo/gap_std": 14.711740493774414, "beta_dpo/loss_margin_mean": 8.57152271270752, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7059712773998488, "grad_norm": 22.553224563598633, "learning_rate": 1.2158065210664848e-07, "logits/chosen": 1.2510708570480347, "logits/rejected": 1.0964205265045166, "loss": 1.1364, "step": 467 }, { "beta_dpo/beta": 0.13395223021507263, "beta_dpo/beta_margin_grad_mean": -0.3565409183502197, "beta_dpo/beta_margin_grad_std": 0.26658982038497925, "beta_dpo/beta_margin_mean": 1.3280830383300781, "beta_dpo/beta_margin_std": 3.2243897914886475, "beta_dpo/beta_used": 0.13395223021507263, "beta_dpo/beta_used_raw": 0.09521180391311646, "beta_dpo/gap_mean": 8.586424827575684, "beta_dpo/gap_std": 14.758886337280273, "beta_dpo/loss_margin_mean": 9.17181396484375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7074829931972789, "grad_norm": 56.513893127441406, "learning_rate": 1.204480113956011e-07, "logits/chosen": 1.235276460647583, "logits/rejected": 1.240452766418457, "loss": 1.0568, "step": 468 }, { "beta_dpo/beta": 0.19092893600463867, "beta_dpo/beta_margin_grad_mean": -0.3903719186782837, "beta_dpo/beta_margin_grad_std": 0.27532485127449036, "beta_dpo/beta_margin_mean": 2.0709922313690186, "beta_dpo/beta_margin_std": 4.380462646484375, "beta_dpo/beta_used": 0.19092893600463867, "beta_dpo/beta_used_raw": 0.11754559725522995, "beta_dpo/gap_mean": 8.598621368408203, "beta_dpo/gap_std": 14.475069046020508, "beta_dpo/loss_margin_mean": 7.082053184509277, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.708994708994709, "grad_norm": 65.13701629638672, "learning_rate": 1.1931899453216697e-07, "logits/chosen": 1.3469481468200684, "logits/rejected": 1.3463125228881836, "loss": 1.1198, "step": 469 }, { "beta_dpo/beta": 0.07104767858982086, "beta_dpo/beta_margin_grad_mean": -0.3992937207221985, "beta_dpo/beta_margin_grad_std": 0.19436946511268616, "beta_dpo/beta_margin_mean": 0.4970458745956421, "beta_dpo/beta_margin_std": 1.114266037940979, "beta_dpo/beta_used": 0.07104767858982086, "beta_dpo/beta_used_raw": 0.07104767858982086, "beta_dpo/gap_mean": 8.183364868164062, "beta_dpo/gap_std": 14.048234939575195, "beta_dpo/loss_margin_mean": 7.861882209777832, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7105064247921391, "grad_norm": 19.9486083984375, "learning_rate": 1.1819363309737438e-07, "logits/chosen": 1.2430088520050049, "logits/rejected": 1.1504111289978027, "loss": 1.0578, "step": 470 }, { "beta_dpo/beta": 0.22089746594429016, "beta_dpo/beta_margin_grad_mean": -0.28740575909614563, "beta_dpo/beta_margin_grad_std": 0.33232811093330383, "beta_dpo/beta_margin_mean": 2.1565606594085693, "beta_dpo/beta_margin_std": 3.5191762447357178, "beta_dpo/beta_used": 0.22089746594429016, "beta_dpo/beta_used_raw": 0.22089746594429016, "beta_dpo/gap_mean": 8.464906692504883, "beta_dpo/gap_std": 14.241508483886719, "beta_dpo/loss_margin_mean": 9.804491996765137, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7120181405895691, "grad_norm": 59.95513916015625, "learning_rate": 1.1707195857000215e-07, "logits/chosen": 1.2362382411956787, "logits/rejected": 1.1866122484207153, "loss": 0.8833, "step": 471 }, { "beta_dpo/beta": 0.06996319442987442, "beta_dpo/beta_margin_grad_mean": -0.4005456268787384, "beta_dpo/beta_margin_grad_std": 0.24495269358158112, "beta_dpo/beta_margin_mean": 0.7803270220756531, "beta_dpo/beta_margin_std": 1.9341996908187866, "beta_dpo/beta_used": 0.06996319442987442, "beta_dpo/beta_used_raw": 0.04906560108065605, "beta_dpo/gap_mean": 8.674680709838867, "beta_dpo/gap_std": 14.96885871887207, "beta_dpo/loss_margin_mean": 9.845968246459961, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7135298563869993, "grad_norm": 35.19158172607422, "learning_rate": 1.1595400232569768e-07, "logits/chosen": 0.8848438262939453, "logits/rejected": 0.8536281585693359, "loss": 1.2495, "step": 472 }, { "beta_dpo/beta": 0.22186213731765747, "beta_dpo/beta_margin_grad_mean": -0.29160040616989136, "beta_dpo/beta_margin_grad_std": 0.3274621367454529, "beta_dpo/beta_margin_mean": 2.4625768661499023, "beta_dpo/beta_margin_std": 4.43284797668457, "beta_dpo/beta_used": 0.22186213731765747, "beta_dpo/beta_used_raw": 0.22186213731765747, "beta_dpo/gap_mean": 9.114084243774414, "beta_dpo/gap_std": 15.43545150756836, "beta_dpo/loss_margin_mean": 10.468867301940918, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7150415721844293, "grad_norm": 59.935829162597656, "learning_rate": 1.1483979563610069e-07, "logits/chosen": 1.2678196430206299, "logits/rejected": 1.165730595588684, "loss": 0.9532, "step": 473 }, { "beta_dpo/beta": 0.02063870057463646, "beta_dpo/beta_margin_grad_mean": -0.46851664781570435, "beta_dpo/beta_margin_grad_std": 0.09417974948883057, "beta_dpo/beta_margin_mean": 0.13540899753570557, "beta_dpo/beta_margin_std": 0.40995728969573975, "beta_dpo/beta_used": 0.02063870057463646, "beta_dpo/beta_used_raw": -0.001780906692147255, "beta_dpo/gap_mean": 8.859273910522461, "beta_dpo/gap_std": 15.522665977478027, "beta_dpo/loss_margin_mean": 7.19165563583374, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7165532879818595, "grad_norm": 8.50357437133789, "learning_rate": 1.1372936966796709e-07, "logits/chosen": 1.464232325553894, "logits/rejected": 1.3536427021026611, "loss": 1.2578, "step": 474 }, { "beta_dpo/beta": 0.20778724551200867, "beta_dpo/beta_margin_grad_mean": -0.2396034300327301, "beta_dpo/beta_margin_grad_std": 0.2754622995853424, "beta_dpo/beta_margin_mean": 2.4503684043884277, "beta_dpo/beta_margin_std": 2.977999210357666, "beta_dpo/beta_used": 0.20778724551200867, "beta_dpo/beta_used_raw": 0.20778724551200867, "beta_dpo/gap_mean": 9.182262420654297, "beta_dpo/gap_std": 15.229242324829102, "beta_dpo/loss_margin_mean": 11.754033088684082, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7180650037792895, "grad_norm": 39.44532012939453, "learning_rate": 1.126227554822985e-07, "logits/chosen": 1.407026767730713, "logits/rejected": 1.3673948049545288, "loss": 0.6444, "step": 475 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49834784865379333, "beta_dpo/beta_margin_grad_std": 0.003492003073915839, "beta_dpo/beta_margin_mean": 0.006608948577195406, "beta_dpo/beta_margin_std": 0.013968821614980698, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.09851482510566711, "beta_dpo/gap_mean": 8.904387474060059, "beta_dpo/gap_std": 14.980789184570312, "beta_dpo/loss_margin_mean": 6.608948230743408, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7195767195767195, "grad_norm": 0.363756388425827, "learning_rate": 1.1151998403347243e-07, "logits/chosen": 1.1348334550857544, "logits/rejected": 1.100001335144043, "loss": 1.3807, "step": 476 }, { "beta_dpo/beta": 0.015278504230082035, "beta_dpo/beta_margin_grad_mean": -0.47819918394088745, "beta_dpo/beta_margin_grad_std": 0.09855031967163086, "beta_dpo/beta_margin_mean": 0.0943133756518364, "beta_dpo/beta_margin_std": 0.41970664262771606, "beta_dpo/beta_used": 0.015278504230082035, "beta_dpo/beta_used_raw": -0.012433012947440147, "beta_dpo/gap_mean": 8.45599365234375, "beta_dpo/gap_std": 15.0723876953125, "beta_dpo/loss_margin_mean": 6.3728251457214355, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7210884353741497, "grad_norm": 6.164888858795166, "learning_rate": 1.1042108616837692e-07, "logits/chosen": 1.5054833889007568, "logits/rejected": 1.481032371520996, "loss": 1.3075, "step": 477 }, { "beta_dpo/beta": 0.10024324804544449, "beta_dpo/beta_margin_grad_mean": -0.43241098523139954, "beta_dpo/beta_margin_grad_std": 0.26925480365753174, "beta_dpo/beta_margin_mean": 0.5425744652748108, "beta_dpo/beta_margin_std": 2.0107645988464355, "beta_dpo/beta_used": 0.10024324804544449, "beta_dpo/beta_used_raw": 0.10024324804544449, "beta_dpo/gap_mean": 7.973514556884766, "beta_dpo/gap_std": 15.64834976196289, "beta_dpo/loss_margin_mean": 5.676856994628906, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7226001511715797, "grad_norm": 25.647165298461914, "learning_rate": 1.0932609262554746e-07, "logits/chosen": 1.0902628898620605, "logits/rejected": 1.083869218826294, "loss": 1.2064, "step": 478 }, { "beta_dpo/beta": 0.10473272204399109, "beta_dpo/beta_margin_grad_mean": -0.39839571714401245, "beta_dpo/beta_margin_grad_std": 0.2627533972263336, "beta_dpo/beta_margin_mean": 0.6739475131034851, "beta_dpo/beta_margin_std": 2.4179022312164307, "beta_dpo/beta_used": 0.10473272204399109, "beta_dpo/beta_used_raw": 0.10473272204399109, "beta_dpo/gap_mean": 7.648787975311279, "beta_dpo/gap_std": 15.70901107788086, "beta_dpo/loss_margin_mean": 6.562697410583496, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7241118669690099, "grad_norm": 32.01771926879883, "learning_rate": 1.0823503403430734e-07, "logits/chosen": 1.2927186489105225, "logits/rejected": 1.2298321723937988, "loss": 1.2245, "step": 479 }, { "beta_dpo/beta": 0.2699778079986572, "beta_dpo/beta_margin_grad_mean": -0.29545360803604126, "beta_dpo/beta_margin_grad_std": 0.3526819348335266, "beta_dpo/beta_margin_mean": 2.6411521434783936, "beta_dpo/beta_margin_std": 4.677126884460449, "beta_dpo/beta_used": 0.2699778079986572, "beta_dpo/beta_used_raw": 0.2699778079986572, "beta_dpo/gap_mean": 7.776112079620361, "beta_dpo/gap_std": 15.986560821533203, "beta_dpo/loss_margin_mean": 9.44406795501709, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7256235827664399, "grad_norm": 77.899658203125, "learning_rate": 1.0714794091391072e-07, "logits/chosen": 1.0709260702133179, "logits/rejected": 1.0799660682678223, "loss": 1.1733, "step": 480 }, { "beta_dpo/beta": 0.08630968630313873, "beta_dpo/beta_margin_grad_mean": -0.4045952260494232, "beta_dpo/beta_margin_grad_std": 0.22337859869003296, "beta_dpo/beta_margin_mean": 0.8984798192977905, "beta_dpo/beta_margin_std": 2.0487589836120605, "beta_dpo/beta_used": 0.08630968630313873, "beta_dpo/beta_used_raw": 0.07817493379116058, "beta_dpo/gap_mean": 7.974575519561768, "beta_dpo/gap_std": 16.128808975219727, "beta_dpo/loss_margin_mean": 8.648720741271973, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.72713529856387, "grad_norm": 33.76292419433594, "learning_rate": 1.0606484367268906e-07, "logits/chosen": 1.3945153951644897, "logits/rejected": 1.3756455183029175, "loss": 1.178, "step": 481 }, { "beta_dpo/beta": 0.017980381846427917, "beta_dpo/beta_margin_grad_mean": -0.47610631585121155, "beta_dpo/beta_margin_grad_std": 0.11001280695199966, "beta_dpo/beta_margin_mean": 0.10760781168937683, "beta_dpo/beta_margin_std": 0.4911026358604431, "beta_dpo/beta_used": 0.017980381846427917, "beta_dpo/beta_used_raw": 0.012024441733956337, "beta_dpo/gap_mean": 8.00039005279541, "beta_dpo/gap_std": 16.554798126220703, "beta_dpo/loss_margin_mean": 8.078429222106934, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7286470143613001, "grad_norm": 8.63095474243164, "learning_rate": 1.0498577260720048e-07, "logits/chosen": 1.307666301727295, "logits/rejected": 1.1709094047546387, "loss": 1.2931, "step": 482 }, { "beta_dpo/beta": 0.1698511689901352, "beta_dpo/beta_margin_grad_mean": -0.28366097807884216, "beta_dpo/beta_margin_grad_std": 0.26790836453437805, "beta_dpo/beta_margin_mean": 1.9325002431869507, "beta_dpo/beta_margin_std": 2.926262617111206, "beta_dpo/beta_used": 0.1698511689901352, "beta_dpo/beta_used_raw": 0.1698511689901352, "beta_dpo/gap_mean": 8.486933708190918, "beta_dpo/gap_std": 16.541275024414062, "beta_dpo/loss_margin_mean": 10.961893081665039, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7301587301587301, "grad_norm": 35.92266082763672, "learning_rate": 1.0391075790138232e-07, "logits/chosen": 1.5374231338500977, "logits/rejected": 1.3803998231887817, "loss": 0.8204, "step": 483 }, { "beta_dpo/beta": 0.08772915601730347, "beta_dpo/beta_margin_grad_mean": -0.3965490758419037, "beta_dpo/beta_margin_grad_std": 0.22108224034309387, "beta_dpo/beta_margin_mean": 0.635413646697998, "beta_dpo/beta_margin_std": 1.6489046812057495, "beta_dpo/beta_used": 0.08772915601730347, "beta_dpo/beta_used_raw": 0.08772915601730347, "beta_dpo/gap_mean": 8.550213813781738, "beta_dpo/gap_std": 16.0025691986084, "beta_dpo/loss_margin_mean": 8.223631858825684, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7316704459561603, "grad_norm": 27.35234832763672, "learning_rate": 1.0283982962570681e-07, "logits/chosen": 1.3222270011901855, "logits/rejected": 1.3129990100860596, "loss": 1.0035, "step": 484 }, { "beta_dpo/beta": 0.1828226000070572, "beta_dpo/beta_margin_grad_mean": -0.3307192921638489, "beta_dpo/beta_margin_grad_std": 0.23482932150363922, "beta_dpo/beta_margin_mean": 2.4338417053222656, "beta_dpo/beta_margin_std": 4.331328868865967, "beta_dpo/beta_used": 0.1828226000070572, "beta_dpo/beta_used_raw": 0.129967600107193, "beta_dpo/gap_mean": 8.83953857421875, "beta_dpo/gap_std": 15.51352596282959, "beta_dpo/loss_margin_mean": 8.875746726989746, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7331821617535903, "grad_norm": 40.624786376953125, "learning_rate": 1.0177301773633992e-07, "logits/chosen": 1.3951783180236816, "logits/rejected": 1.3369786739349365, "loss": 0.8641, "step": 485 }, { "beta_dpo/beta": 0.17471832036972046, "beta_dpo/beta_margin_grad_mean": -0.39315420389175415, "beta_dpo/beta_margin_grad_std": 0.2959369421005249, "beta_dpo/beta_margin_mean": 1.8131951093673706, "beta_dpo/beta_margin_std": 5.117852687835693, "beta_dpo/beta_used": 0.17471832036972046, "beta_dpo/beta_used_raw": 0.16305719316005707, "beta_dpo/gap_mean": 8.485563278198242, "beta_dpo/gap_std": 15.768863677978516, "beta_dpo/loss_margin_mean": 8.391579627990723, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7346938775510204, "grad_norm": 65.43634796142578, "learning_rate": 1.007103520743035e-07, "logits/chosen": 1.2871387004852295, "logits/rejected": 1.1449122428894043, "loss": 1.2522, "step": 486 }, { "beta_dpo/beta": 0.1086234524846077, "beta_dpo/beta_margin_grad_mean": -0.40604156255722046, "beta_dpo/beta_margin_grad_std": 0.25962209701538086, "beta_dpo/beta_margin_mean": 0.7545509934425354, "beta_dpo/beta_margin_std": 2.2605080604553223, "beta_dpo/beta_used": 0.1086234524846077, "beta_dpo/beta_used_raw": 0.1086234524846077, "beta_dpo/gap_mean": 8.383829116821289, "beta_dpo/gap_std": 15.744401931762695, "beta_dpo/loss_margin_mean": 7.316266059875488, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7362055933484505, "grad_norm": 33.63584899902344, "learning_rate": 9.965186236464046e-08, "logits/chosen": 1.435591459274292, "logits/rejected": 1.3444766998291016, "loss": 1.0565, "step": 487 }, { "beta_dpo/beta": 0.18012477457523346, "beta_dpo/beta_margin_grad_mean": -0.3828466832637787, "beta_dpo/beta_margin_grad_std": 0.2732307016849518, "beta_dpo/beta_margin_mean": 2.27314829826355, "beta_dpo/beta_margin_std": 4.74143123626709, "beta_dpo/beta_used": 0.18012477457523346, "beta_dpo/beta_used_raw": 0.1745169460773468, "beta_dpo/gap_mean": 8.307104110717773, "beta_dpo/gap_std": 15.859813690185547, "beta_dpo/loss_margin_mean": 9.146766662597656, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7377173091458806, "grad_norm": 63.05045700073242, "learning_rate": 9.859757821558337e-08, "logits/chosen": 0.9542595744132996, "logits/rejected": 0.8827604055404663, "loss": 1.2242, "step": 488 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49902424216270447, "beta_dpo/beta_margin_grad_std": 0.0032371412962675095, "beta_dpo/beta_margin_mean": 0.003903293749317527, "beta_dpo/beta_margin_std": 0.012949378229677677, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.10531962662935257, "beta_dpo/gap_mean": 7.896786212921143, "beta_dpo/gap_std": 15.445693969726562, "beta_dpo/loss_margin_mean": 3.9032936096191406, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7392290249433107, "grad_norm": 0.40414562821388245, "learning_rate": 9.754752911772615e-08, "logits/chosen": 1.4719191789627075, "logits/rejected": 1.3779878616333008, "loss": 1.3819, "step": 489 }, { "beta_dpo/beta": 0.29670846462249756, "beta_dpo/beta_margin_grad_mean": -0.29541918635368347, "beta_dpo/beta_margin_grad_std": 0.3639431297779083, "beta_dpo/beta_margin_mean": 2.9408721923828125, "beta_dpo/beta_margin_std": 5.338799953460693, "beta_dpo/beta_used": 0.29670846462249756, "beta_dpo/beta_used_raw": 0.29670846462249756, "beta_dpo/gap_mean": 8.032998085021973, "beta_dpo/gap_std": 15.67213249206543, "beta_dpo/loss_margin_mean": 10.069779396057129, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7407407407407407, "grad_norm": 69.37432861328125, "learning_rate": 9.650174444319956e-08, "logits/chosen": 1.4777637720108032, "logits/rejected": 1.4448156356811523, "loss": 1.1275, "step": 490 }, { "beta_dpo/beta": 0.10166777670383453, "beta_dpo/beta_margin_grad_mean": -0.36870265007019043, "beta_dpo/beta_margin_grad_std": 0.2362639605998993, "beta_dpo/beta_margin_mean": 1.1705293655395508, "beta_dpo/beta_margin_std": 2.1599667072296143, "beta_dpo/beta_used": 0.10166777670383453, "beta_dpo/beta_used_raw": -0.012001670897006989, "beta_dpo/gap_mean": 8.308603286743164, "beta_dpo/gap_std": 15.477779388427734, "beta_dpo/loss_margin_mean": 8.63475227355957, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7422524565381708, "grad_norm": 39.177001953125, "learning_rate": 9.546025344484868e-08, "logits/chosen": 1.3601679801940918, "logits/rejected": 1.2689048051834106, "loss": 1.1337, "step": 491 }, { "beta_dpo/beta": 0.09824459999799728, "beta_dpo/beta_margin_grad_mean": -0.39903581142425537, "beta_dpo/beta_margin_grad_std": 0.23087172210216522, "beta_dpo/beta_margin_mean": 0.777897298336029, "beta_dpo/beta_margin_std": 2.0594301223754883, "beta_dpo/beta_used": 0.09824459999799728, "beta_dpo/beta_used_raw": 0.02383984625339508, "beta_dpo/gap_mean": 7.6909990310668945, "beta_dpo/gap_std": 15.357696533203125, "beta_dpo/loss_margin_mean": 5.5825066566467285, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7437641723356009, "grad_norm": 30.07907485961914, "learning_rate": 9.442308525541589e-08, "logits/chosen": 1.2555835247039795, "logits/rejected": 1.1515380144119263, "loss": 1.067, "step": 492 }, { "beta_dpo/beta": 0.21834628283977509, "beta_dpo/beta_margin_grad_mean": -0.3189516067504883, "beta_dpo/beta_margin_grad_std": 0.3405011296272278, "beta_dpo/beta_margin_mean": 2.1703264713287354, "beta_dpo/beta_margin_std": 4.150738716125488, "beta_dpo/beta_used": 0.21834628283977509, "beta_dpo/beta_used_raw": 0.21834628283977509, "beta_dpo/gap_mean": 7.796796798706055, "beta_dpo/gap_std": 15.415250778198242, "beta_dpo/loss_margin_mean": 9.13962459564209, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.745275888133031, "grad_norm": 64.61181640625, "learning_rate": 9.339026888672468e-08, "logits/chosen": 1.3729231357574463, "logits/rejected": 1.2633564472198486, "loss": 1.142, "step": 493 }, { "beta_dpo/beta": 0.07543282955884933, "beta_dpo/beta_margin_grad_mean": -0.4118766486644745, "beta_dpo/beta_margin_grad_std": 0.23261648416519165, "beta_dpo/beta_margin_mean": 0.5448687076568604, "beta_dpo/beta_margin_std": 1.7095155715942383, "beta_dpo/beta_used": 0.07543282955884933, "beta_dpo/beta_used_raw": 0.07543282955884933, "beta_dpo/gap_mean": 8.163684844970703, "beta_dpo/gap_std": 15.739279747009277, "beta_dpo/loss_margin_mean": 8.561339378356934, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7467876039304611, "grad_norm": 30.267333984375, "learning_rate": 9.236183322886945e-08, "logits/chosen": 1.2366876602172852, "logits/rejected": 1.2264430522918701, "loss": 1.2012, "step": 494 }, { "beta_dpo/beta": 0.09734344482421875, "beta_dpo/beta_margin_grad_mean": -0.3877275884151459, "beta_dpo/beta_margin_grad_std": 0.22462578117847443, "beta_dpo/beta_margin_mean": 0.8795979619026184, "beta_dpo/beta_margin_std": 1.8222239017486572, "beta_dpo/beta_used": 0.09734344482421875, "beta_dpo/beta_used_raw": 0.03870324790477753, "beta_dpo/gap_mean": 7.832492828369141, "beta_dpo/gap_std": 15.831649780273438, "beta_dpo/loss_margin_mean": 6.959440231323242, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7482993197278912, "grad_norm": 39.15364456176758, "learning_rate": 9.133780704940594e-08, "logits/chosen": 1.3102976083755493, "logits/rejected": 1.2166869640350342, "loss": 1.1147, "step": 495 }, { "beta_dpo/beta": 0.04615124687552452, "beta_dpo/beta_margin_grad_mean": -0.4308703541755676, "beta_dpo/beta_margin_grad_std": 0.177626371383667, "beta_dpo/beta_margin_mean": 0.38025563955307007, "beta_dpo/beta_margin_std": 0.9773518443107605, "beta_dpo/beta_used": 0.04615124687552452, "beta_dpo/beta_used_raw": 0.04151741415262222, "beta_dpo/gap_mean": 8.264655113220215, "beta_dpo/gap_std": 15.770124435424805, "beta_dpo/loss_margin_mean": 9.904504776000977, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7498110355253212, "grad_norm": 19.862531661987305, "learning_rate": 9.031821899254797e-08, "logits/chosen": 1.567601203918457, "logits/rejected": 1.4194416999816895, "loss": 1.1998, "step": 496 }, { "beta_dpo/beta": 0.1375540941953659, "beta_dpo/beta_margin_grad_mean": -0.35231631994247437, "beta_dpo/beta_margin_grad_std": 0.2726176381111145, "beta_dpo/beta_margin_mean": 1.4071952104568481, "beta_dpo/beta_margin_std": 3.0741336345672607, "beta_dpo/beta_used": 0.1375540941953659, "beta_dpo/beta_used_raw": 0.1375540941953659, "beta_dpo/gap_mean": 8.478071212768555, "beta_dpo/gap_std": 15.789005279541016, "beta_dpo/loss_margin_mean": 9.306314468383789, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7513227513227513, "grad_norm": 47.519474029541016, "learning_rate": 8.930309757836516e-08, "logits/chosen": 1.2274080514907837, "logits/rejected": 1.1559877395629883, "loss": 1.205, "step": 497 }, { "beta_dpo/beta": 0.14629700779914856, "beta_dpo/beta_margin_grad_mean": -0.37526050209999084, "beta_dpo/beta_margin_grad_std": 0.25155338644981384, "beta_dpo/beta_margin_mean": 1.5782233476638794, "beta_dpo/beta_margin_std": 3.1664230823516846, "beta_dpo/beta_used": 0.14629700779914856, "beta_dpo/beta_used_raw": 0.12825556099414825, "beta_dpo/gap_mean": 8.519954681396484, "beta_dpo/gap_std": 15.585509300231934, "beta_dpo/loss_margin_mean": 9.394691467285156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7528344671201814, "grad_norm": 65.0245132446289, "learning_rate": 8.829247120198563e-08, "logits/chosen": 1.311582088470459, "logits/rejected": 1.2794764041900635, "loss": 1.0241, "step": 498 }, { "beta_dpo/beta": 0.19346806406974792, "beta_dpo/beta_margin_grad_mean": -0.40234655141830444, "beta_dpo/beta_margin_grad_std": 0.31278374791145325, "beta_dpo/beta_margin_mean": 1.59882390499115, "beta_dpo/beta_margin_std": 5.184474945068359, "beta_dpo/beta_used": 0.19346806406974792, "beta_dpo/beta_used_raw": 0.18081435561180115, "beta_dpo/gap_mean": 8.605300903320312, "beta_dpo/gap_std": 15.78538990020752, "beta_dpo/loss_margin_mean": 8.464212417602539, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7543461829176115, "grad_norm": 50.980735778808594, "learning_rate": 8.728636813280163e-08, "logits/chosen": 1.264394998550415, "logits/rejected": 1.1896038055419922, "loss": 1.447, "step": 499 }, { "beta_dpo/beta": 0.06631321460008621, "beta_dpo/beta_margin_grad_mean": -0.41595977544784546, "beta_dpo/beta_margin_grad_std": 0.21724657714366913, "beta_dpo/beta_margin_mean": 0.5961994528770447, "beta_dpo/beta_margin_std": 1.4269115924835205, "beta_dpo/beta_used": 0.06631321460008621, "beta_dpo/beta_used_raw": 0.06631321460008621, "beta_dpo/gap_mean": 8.392349243164062, "beta_dpo/gap_std": 15.650519371032715, "beta_dpo/loss_margin_mean": 7.612263202667236, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7558578987150416, "grad_norm": 26.242412567138672, "learning_rate": 8.628481651367875e-08, "logits/chosen": 1.347449541091919, "logits/rejected": 1.3151437044143677, "loss": 1.2301, "step": 500 }, { "epoch": 0.7558578987150416, "eval_beta_dpo/beta": 0.12900032103061676, "eval_beta_dpo/beta_margin_grad_mean": -0.3624914288520813, "eval_beta_dpo/beta_margin_grad_std": 0.20973293483257294, "eval_beta_dpo/beta_margin_mean": 1.3653007745742798, "eval_beta_dpo/beta_margin_std": 2.0186424255371094, "eval_beta_dpo/beta_used": 0.12900032103061676, "eval_beta_dpo/beta_used_raw": 0.11448737978935242, "eval_beta_dpo/gap_mean": 8.435112953186035, "eval_beta_dpo/gap_std": 15.631720542907715, "eval_beta_dpo/loss_margin_mean": 8.676569938659668, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": 1.2194130420684814, "eval_logits/rejected": 1.1512391567230225, "eval_loss": 0.6310787796974182, "eval_runtime": 43.5066, "eval_samples_per_second": 52.934, "eval_steps_per_second": 1.655, "step": 500 }, { "beta_dpo/beta": 0.0842582955956459, "beta_dpo/beta_margin_grad_mean": -0.40922731161117554, "beta_dpo/beta_margin_grad_std": 0.2037898600101471, "beta_dpo/beta_margin_mean": 0.7561125755310059, "beta_dpo/beta_margin_std": 1.7290011644363403, "beta_dpo/beta_used": 0.0842582955956459, "beta_dpo/beta_used_raw": 0.0739816352725029, "beta_dpo/gap_mean": 8.32042121887207, "beta_dpo/gap_std": 15.216630935668945, "beta_dpo/loss_margin_mean": 7.961922645568848, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7573696145124716, "grad_norm": 26.170494079589844, "learning_rate": 8.528784436016878e-08, "logits/chosen": 1.1607296466827393, "logits/rejected": 1.1347827911376953, "loss": 1.1527, "step": 501 }, { "beta_dpo/beta": 0.03724071756005287, "beta_dpo/beta_margin_grad_mean": -0.4399970769882202, "beta_dpo/beta_margin_grad_std": 0.13782690465450287, "beta_dpo/beta_margin_mean": 0.2895679175853729, "beta_dpo/beta_margin_std": 0.6904258131980896, "beta_dpo/beta_used": 0.03724071756005287, "beta_dpo/beta_used_raw": 0.012635238468647003, "beta_dpo/gap_mean": 8.11561393737793, "beta_dpo/gap_std": 14.806890487670898, "beta_dpo/loss_margin_mean": 6.198487758636475, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7588813303099018, "grad_norm": 19.033384323120117, "learning_rate": 8.4295479559726e-08, "logits/chosen": 1.2781716585159302, "logits/rejected": 1.2537305355072021, "loss": 1.1892, "step": 502 }, { "beta_dpo/beta": 0.26611024141311646, "beta_dpo/beta_margin_grad_mean": -0.2744055390357971, "beta_dpo/beta_margin_grad_std": 0.3376629650592804, "beta_dpo/beta_margin_mean": 2.5834662914276123, "beta_dpo/beta_margin_std": 4.09039306640625, "beta_dpo/beta_used": 0.26611024141311646, "beta_dpo/beta_used_raw": 0.26611024141311646, "beta_dpo/gap_mean": 8.190786361694336, "beta_dpo/gap_std": 14.82516860961914, "beta_dpo/loss_margin_mean": 9.649856567382812, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7603930461073318, "grad_norm": 57.269222259521484, "learning_rate": 8.330774987092712e-08, "logits/chosen": 1.3074579238891602, "logits/rejected": 1.3287543058395386, "loss": 0.9177, "step": 503 }, { "beta_dpo/beta": 0.11062437295913696, "beta_dpo/beta_margin_grad_mean": -0.337758868932724, "beta_dpo/beta_margin_grad_std": 0.22527188062667847, "beta_dpo/beta_margin_mean": 1.5635826587677002, "beta_dpo/beta_margin_std": 2.6599931716918945, "beta_dpo/beta_used": 0.11062437295913696, "beta_dpo/beta_used_raw": 0.084391288459301, "beta_dpo/gap_mean": 8.561979293823242, "beta_dpo/gap_std": 14.650800704956055, "beta_dpo/loss_margin_mean": 11.275338172912598, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7619047619047619, "grad_norm": 22.946237564086914, "learning_rate": 8.232468292269479e-08, "logits/chosen": 1.335453748703003, "logits/rejected": 1.306289792060852, "loss": 0.9216, "step": 504 }, { "beta_dpo/beta": 0.07179925590753555, "beta_dpo/beta_margin_grad_mean": -0.41799870133399963, "beta_dpo/beta_margin_grad_std": 0.24787545204162598, "beta_dpo/beta_margin_mean": 0.6309289932250977, "beta_dpo/beta_margin_std": 1.9183470010757446, "beta_dpo/beta_used": 0.07179925590753555, "beta_dpo/beta_used_raw": 0.06938499212265015, "beta_dpo/gap_mean": 8.652692794799805, "beta_dpo/gap_std": 15.009008407592773, "beta_dpo/loss_margin_mean": 6.836058616638184, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.763416477702192, "grad_norm": 28.69261932373047, "learning_rate": 8.134630621352483e-08, "logits/chosen": 1.0778157711029053, "logits/rejected": 0.9853509664535522, "loss": 1.2664, "step": 505 }, { "beta_dpo/beta": 0.1097840741276741, "beta_dpo/beta_margin_grad_mean": -0.39366090297698975, "beta_dpo/beta_margin_grad_std": 0.2954460084438324, "beta_dpo/beta_margin_mean": 0.6261573433876038, "beta_dpo/beta_margin_std": 1.9519262313842773, "beta_dpo/beta_used": 0.1097840741276741, "beta_dpo/beta_used_raw": 0.1097840741276741, "beta_dpo/gap_mean": 8.113224983215332, "beta_dpo/gap_std": 15.423563003540039, "beta_dpo/loss_margin_mean": 5.594672203063965, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.764928193499622, "grad_norm": 29.990312576293945, "learning_rate": 8.037264711071698e-08, "logits/chosen": 1.100189447402954, "logits/rejected": 1.08306086063385, "loss": 1.1331, "step": 506 }, { "beta_dpo/beta": 0.055617570877075195, "beta_dpo/beta_margin_grad_mean": -0.4389042258262634, "beta_dpo/beta_margin_grad_std": 0.19821825623512268, "beta_dpo/beta_margin_mean": 0.4292410612106323, "beta_dpo/beta_margin_std": 1.3530104160308838, "beta_dpo/beta_used": 0.055617570877075195, "beta_dpo/beta_used_raw": 0.055617570877075195, "beta_dpo/gap_mean": 7.837147235870361, "beta_dpo/gap_std": 15.702282905578613, "beta_dpo/loss_margin_mean": 7.381948947906494, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7664399092970522, "grad_norm": 19.953712463378906, "learning_rate": 7.940373284960933e-08, "logits/chosen": 1.2273082733154297, "logits/rejected": 1.1556060314178467, "loss": 1.2762, "step": 507 }, { "beta_dpo/beta": 0.12468338012695312, "beta_dpo/beta_margin_grad_mean": -0.3594365417957306, "beta_dpo/beta_margin_grad_std": 0.25680065155029297, "beta_dpo/beta_margin_mean": 1.5802284479141235, "beta_dpo/beta_margin_std": 3.037984609603882, "beta_dpo/beta_used": 0.12468338012695312, "beta_dpo/beta_used_raw": 0.019509881734848022, "beta_dpo/gap_mean": 7.87746524810791, "beta_dpo/gap_std": 15.627901077270508, "beta_dpo/loss_margin_mean": 9.403909683227539, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7679516250944822, "grad_norm": 52.756736755371094, "learning_rate": 7.843959053281663e-08, "logits/chosen": 1.192077398300171, "logits/rejected": 1.028613805770874, "loss": 1.2675, "step": 508 }, { "beta_dpo/beta": 0.041742824018001556, "beta_dpo/beta_margin_grad_mean": -0.4245273470878601, "beta_dpo/beta_margin_grad_std": 0.17522205412387848, "beta_dpo/beta_margin_mean": 0.370586633682251, "beta_dpo/beta_margin_std": 0.8615016341209412, "beta_dpo/beta_used": 0.041742824018001556, "beta_dpo/beta_used_raw": 0.011772872880101204, "beta_dpo/gap_mean": 8.242198944091797, "beta_dpo/gap_std": 15.512893676757812, "beta_dpo/loss_margin_mean": 8.89169979095459, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7694633408919124, "grad_norm": 17.92835235595703, "learning_rate": 7.748024712947204e-08, "logits/chosen": 1.2415645122528076, "logits/rejected": 1.2159892320632935, "loss": 1.2149, "step": 509 }, { "beta_dpo/beta": 0.08406226336956024, "beta_dpo/beta_margin_grad_mean": -0.3821350336074829, "beta_dpo/beta_margin_grad_std": 0.2446945309638977, "beta_dpo/beta_margin_mean": 1.0495874881744385, "beta_dpo/beta_margin_std": 2.1299333572387695, "beta_dpo/beta_used": 0.08406226336956024, "beta_dpo/beta_used_raw": 0.06441254913806915, "beta_dpo/gap_mean": 8.698486328125, "beta_dpo/gap_std": 15.522483825683594, "beta_dpo/loss_margin_mean": 10.599325180053711, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7709750566893424, "grad_norm": 35.02284622192383, "learning_rate": 7.652572947447272e-08, "logits/chosen": 1.340779185295105, "logits/rejected": 1.2162147760391235, "loss": 1.1841, "step": 510 }, { "beta_dpo/beta": 0.10140527784824371, "beta_dpo/beta_margin_grad_mean": -0.3813478350639343, "beta_dpo/beta_margin_grad_std": 0.24966783821582794, "beta_dpo/beta_margin_mean": 1.2869607210159302, "beta_dpo/beta_margin_std": 2.6547703742980957, "beta_dpo/beta_used": 0.10140527784824371, "beta_dpo/beta_used_raw": 0.09635543823242188, "beta_dpo/gap_mean": 9.028369903564453, "beta_dpo/gap_std": 15.63941478729248, "beta_dpo/loss_margin_mean": 11.385443687438965, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7724867724867724, "grad_norm": 32.982994079589844, "learning_rate": 7.557606426772961e-08, "logits/chosen": 1.0060919523239136, "logits/rejected": 0.9972001910209656, "loss": 1.1686, "step": 511 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4980872869491577, "beta_dpo/beta_margin_grad_std": 0.003784729167819023, "beta_dpo/beta_margin_mean": 0.0076514980755746365, "beta_dpo/beta_margin_std": 0.015140078961849213, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.15243175625801086, "beta_dpo/gap_mean": 9.063308715820312, "beta_dpo/gap_std": 15.602668762207031, "beta_dpo/loss_margin_mean": 7.6514973640441895, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7739984882842026, "grad_norm": 0.38143348693847656, "learning_rate": 7.463127807341966e-08, "logits/chosen": 1.1713460683822632, "logits/rejected": 1.083141803741455, "loss": 1.3815, "step": 512 }, { "beta_dpo/beta": 0.029492665082216263, "beta_dpo/beta_margin_grad_mean": -0.4373173117637634, "beta_dpo/beta_margin_grad_std": 0.09860417991876602, "beta_dpo/beta_margin_mean": 0.2670486569404602, "beta_dpo/beta_margin_std": 0.4258429706096649, "beta_dpo/beta_used": 0.029492665082216263, "beta_dpo/beta_used_raw": 0.029492665082216263, "beta_dpo/gap_mean": 9.020488739013672, "beta_dpo/gap_std": 15.474786758422852, "beta_dpo/loss_margin_mean": 8.947519302368164, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7755102040816326, "grad_norm": 8.190389633178711, "learning_rate": 7.369139731924401e-08, "logits/chosen": 1.411609411239624, "logits/rejected": 1.3289296627044678, "loss": 1.2034, "step": 513 }, { "beta_dpo/beta": 0.18778222799301147, "beta_dpo/beta_margin_grad_mean": -0.29142025113105774, "beta_dpo/beta_margin_grad_std": 0.2884121537208557, "beta_dpo/beta_margin_mean": 1.84999680519104, "beta_dpo/beta_margin_std": 2.9089887142181396, "beta_dpo/beta_used": 0.18778222799301147, "beta_dpo/beta_used_raw": 0.18778222799301147, "beta_dpo/gap_mean": 9.031063079833984, "beta_dpo/gap_std": 15.233624458312988, "beta_dpo/loss_margin_mean": 9.787365913391113, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7770219198790628, "grad_norm": 32.78883743286133, "learning_rate": 7.275644829568747e-08, "logits/chosen": 1.2265822887420654, "logits/rejected": 1.1920336484909058, "loss": 0.7064, "step": 514 }, { "beta_dpo/beta": 0.09070698916912079, "beta_dpo/beta_margin_grad_mean": -0.41331547498703003, "beta_dpo/beta_margin_grad_std": 0.24861948192119598, "beta_dpo/beta_margin_mean": 0.8262215852737427, "beta_dpo/beta_margin_std": 2.2216925621032715, "beta_dpo/beta_used": 0.09070698916912079, "beta_dpo/beta_used_raw": 0.07224002480506897, "beta_dpo/gap_mean": 8.951355934143066, "beta_dpo/gap_std": 15.244037628173828, "beta_dpo/loss_margin_mean": 7.946232318878174, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7785336356764928, "grad_norm": 29.780132293701172, "learning_rate": 7.182645715528435e-08, "logits/chosen": 1.508201241493225, "logits/rejected": 1.4241424798965454, "loss": 1.1395, "step": 515 }, { "beta_dpo/beta": 0.06425631046295166, "beta_dpo/beta_margin_grad_mean": -0.40338146686553955, "beta_dpo/beta_margin_grad_std": 0.18757954239845276, "beta_dpo/beta_margin_mean": 0.4550299048423767, "beta_dpo/beta_margin_std": 0.9595562219619751, "beta_dpo/beta_used": 0.06425631046295166, "beta_dpo/beta_used_raw": 0.06425631046295166, "beta_dpo/gap_mean": 8.591944694519043, "beta_dpo/gap_std": 15.072626113891602, "beta_dpo/loss_margin_mean": 7.4303460121154785, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.780045351473923, "grad_norm": 16.75914764404297, "learning_rate": 7.090144991188568e-08, "logits/chosen": 1.0630478858947754, "logits/rejected": 0.9949425458908081, "loss": 1.048, "step": 516 }, { "beta_dpo/beta": 0.029759714379906654, "beta_dpo/beta_margin_grad_mean": -0.4609003961086273, "beta_dpo/beta_margin_grad_std": 0.12479053437709808, "beta_dpo/beta_margin_mean": 0.18986453115940094, "beta_dpo/beta_margin_std": 0.6050887107849121, "beta_dpo/beta_used": 0.029759714379906654, "beta_dpo/beta_used_raw": -0.029511921107769012, "beta_dpo/gap_mean": 8.238603591918945, "beta_dpo/gap_std": 15.265298843383789, "beta_dpo/loss_margin_mean": 6.269947528839111, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.781557067271353, "grad_norm": 10.626750946044922, "learning_rate": 6.998145243993284e-08, "logits/chosen": 1.2810413837432861, "logits/rejected": 1.2711362838745117, "loss": 1.2496, "step": 517 }, { "beta_dpo/beta": 0.05780534818768501, "beta_dpo/beta_margin_grad_mean": -0.437270849943161, "beta_dpo/beta_margin_grad_std": 0.20933924615383148, "beta_dpo/beta_margin_mean": 0.46789804100990295, "beta_dpo/beta_margin_std": 1.412877082824707, "beta_dpo/beta_used": 0.05780534818768501, "beta_dpo/beta_used_raw": 0.03885362669825554, "beta_dpo/gap_mean": 7.883532524108887, "beta_dpo/gap_std": 15.234363555908203, "beta_dpo/loss_margin_mean": 6.780441761016846, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.783068783068783, "grad_norm": 18.98142433166504, "learning_rate": 6.906649047373245e-08, "logits/chosen": 1.0652557611465454, "logits/rejected": 0.9838038682937622, "loss": 1.2426, "step": 518 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4986930787563324, "beta_dpo/beta_margin_grad_std": 0.004700750112533569, "beta_dpo/beta_margin_mean": 0.005228013265877962, "beta_dpo/beta_margin_std": 0.0188044011592865, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.10169404745101929, "beta_dpo/gap_mean": 7.39609432220459, "beta_dpo/gap_std": 15.862621307373047, "beta_dpo/loss_margin_mean": 5.228013038635254, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7845804988662132, "grad_norm": 0.3468739688396454, "learning_rate": 6.815658960673781e-08, "logits/chosen": 1.4341715574264526, "logits/rejected": 1.3577253818511963, "loss": 1.3823, "step": 519 }, { "beta_dpo/beta": 0.11534042656421661, "beta_dpo/beta_margin_grad_mean": -0.3637125492095947, "beta_dpo/beta_margin_grad_std": 0.23695090413093567, "beta_dpo/beta_margin_mean": 1.313083291053772, "beta_dpo/beta_margin_std": 2.40010404586792, "beta_dpo/beta_used": 0.11534042656421661, "beta_dpo/beta_used_raw": 0.013542748987674713, "beta_dpo/gap_mean": 7.62299919128418, "beta_dpo/gap_std": 15.442781448364258, "beta_dpo/loss_margin_mean": 7.702384948730469, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7860922146636432, "grad_norm": 27.789459228515625, "learning_rate": 6.725177529083209e-08, "logits/chosen": 1.3084077835083008, "logits/rejected": 1.2144858837127686, "loss": 1.033, "step": 520 }, { "beta_dpo/beta": 0.10655572265386581, "beta_dpo/beta_margin_grad_mean": -0.35950157046318054, "beta_dpo/beta_margin_grad_std": 0.2488071769475937, "beta_dpo/beta_margin_mean": 1.0529520511627197, "beta_dpo/beta_margin_std": 1.9023741483688354, "beta_dpo/beta_used": 0.10655572265386581, "beta_dpo/beta_used_raw": 0.10655572265386581, "beta_dpo/gap_mean": 7.508369445800781, "beta_dpo/gap_std": 15.275403022766113, "beta_dpo/loss_margin_mean": 8.58330249786377, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7876039304610734, "grad_norm": 26.136470794677734, "learning_rate": 6.63520728356167e-08, "logits/chosen": 1.0999151468276978, "logits/rejected": 0.9847517013549805, "loss": 1.0651, "step": 521 }, { "beta_dpo/beta": 0.2019219547510147, "beta_dpo/beta_margin_grad_mean": -0.3990466892719269, "beta_dpo/beta_margin_grad_std": 0.30489471554756165, "beta_dpo/beta_margin_mean": 1.3157075643539429, "beta_dpo/beta_margin_std": 5.186375617980957, "beta_dpo/beta_used": 0.2019219547510147, "beta_dpo/beta_used_raw": 0.2019219547510147, "beta_dpo/gap_mean": 7.4453840255737305, "beta_dpo/gap_std": 15.706568717956543, "beta_dpo/loss_margin_mean": 6.2311625480651855, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7891156462585034, "grad_norm": 60.13499450683594, "learning_rate": 6.545750740770336e-08, "logits/chosen": 1.1132246255874634, "logits/rejected": 1.1097826957702637, "loss": 1.2254, "step": 522 }, { "beta_dpo/beta": 0.13557228446006775, "beta_dpo/beta_margin_grad_mean": -0.3367370665073395, "beta_dpo/beta_margin_grad_std": 0.28432610630989075, "beta_dpo/beta_margin_mean": 1.3112093210220337, "beta_dpo/beta_margin_std": 2.233006000518799, "beta_dpo/beta_used": 0.13557228446006775, "beta_dpo/beta_used_raw": 0.13557228446006775, "beta_dpo/gap_mean": 7.6764936447143555, "beta_dpo/gap_std": 15.761053085327148, "beta_dpo/loss_margin_mean": 8.553004264831543, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7906273620559335, "grad_norm": 43.26845932006836, "learning_rate": 6.456810403001012e-08, "logits/chosen": 1.3522729873657227, "logits/rejected": 1.2240285873413086, "loss": 1.0392, "step": 523 }, { "beta_dpo/beta": 0.14213837683200836, "beta_dpo/beta_margin_grad_mean": -0.3865470588207245, "beta_dpo/beta_margin_grad_std": 0.30028629302978516, "beta_dpo/beta_margin_mean": 0.9078271985054016, "beta_dpo/beta_margin_std": 2.4876906871795654, "beta_dpo/beta_used": 0.14213837683200836, "beta_dpo/beta_used_raw": 0.14213837683200836, "beta_dpo/gap_mean": 7.359951972961426, "beta_dpo/gap_std": 15.630659103393555, "beta_dpo/loss_margin_mean": 6.1383748054504395, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7921390778533636, "grad_norm": 57.35697555541992, "learning_rate": 6.368388758106134e-08, "logits/chosen": 1.179058313369751, "logits/rejected": 1.1576694250106812, "loss": 1.163, "step": 524 }, { "beta_dpo/beta": 0.08663511276245117, "beta_dpo/beta_margin_grad_mean": -0.4115186929702759, "beta_dpo/beta_margin_grad_std": 0.246219202876091, "beta_dpo/beta_margin_mean": 0.7762193083763123, "beta_dpo/beta_margin_std": 1.9916539192199707, "beta_dpo/beta_used": 0.08663511276245117, "beta_dpo/beta_used_raw": 0.04566335305571556, "beta_dpo/gap_mean": 7.272393226623535, "beta_dpo/gap_std": 15.498950958251953, "beta_dpo/loss_margin_mean": 6.056159973144531, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7936507936507936, "grad_norm": 30.81611442565918, "learning_rate": 6.280488279429185e-08, "logits/chosen": 1.0387253761291504, "logits/rejected": 1.0043344497680664, "loss": 1.1954, "step": 525 }, { "beta_dpo/beta": 0.1067892462015152, "beta_dpo/beta_margin_grad_mean": -0.37539657950401306, "beta_dpo/beta_margin_grad_std": 0.23800675570964813, "beta_dpo/beta_margin_mean": 1.3335126638412476, "beta_dpo/beta_margin_std": 2.6430134773254395, "beta_dpo/beta_used": 0.1067892462015152, "beta_dpo/beta_used_raw": 0.0007416233420372009, "beta_dpo/gap_mean": 7.282289981842041, "beta_dpo/gap_std": 15.198205947875977, "beta_dpo/loss_margin_mean": 6.869879245758057, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7951625094482238, "grad_norm": 30.191267013549805, "learning_rate": 6.193111425735515e-08, "logits/chosen": 1.1532905101776123, "logits/rejected": 1.0733742713928223, "loss": 1.1578, "step": 526 }, { "beta_dpo/beta": 0.0481080487370491, "beta_dpo/beta_margin_grad_mean": -0.44247305393218994, "beta_dpo/beta_margin_grad_std": 0.1592341810464859, "beta_dpo/beta_margin_mean": 0.2580515146255493, "beta_dpo/beta_margin_std": 0.7128713130950928, "beta_dpo/beta_used": 0.0481080487370491, "beta_dpo/beta_used_raw": 0.0481080487370491, "beta_dpo/gap_mean": 6.703248500823975, "beta_dpo/gap_std": 14.977252960205078, "beta_dpo/loss_margin_mean": 5.508336067199707, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7966742252456538, "grad_norm": 13.756309509277344, "learning_rate": 6.106260641143546e-08, "logits/chosen": 1.4220060110092163, "logits/rejected": 1.2894335985183716, "loss": 1.2061, "step": 527 }, { "beta_dpo/beta": 0.13932912051677704, "beta_dpo/beta_margin_grad_mean": -0.3513142764568329, "beta_dpo/beta_margin_grad_std": 0.27392539381980896, "beta_dpo/beta_margin_mean": 1.1128238439559937, "beta_dpo/beta_margin_std": 2.790832042694092, "beta_dpo/beta_used": 0.13932912051677704, "beta_dpo/beta_used_raw": 0.13932912051677704, "beta_dpo/gap_mean": 6.893251419067383, "beta_dpo/gap_std": 15.283125877380371, "beta_dpo/loss_margin_mean": 8.12575912475586, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.7981859410430839, "grad_norm": 33.40256118774414, "learning_rate": 6.019938355056422e-08, "logits/chosen": 1.4654052257537842, "logits/rejected": 1.361560583114624, "loss": 1.0162, "step": 528 }, { "beta_dpo/beta": 0.3706626892089844, "beta_dpo/beta_margin_grad_mean": -0.24648523330688477, "beta_dpo/beta_margin_grad_std": 0.2973049283027649, "beta_dpo/beta_margin_mean": 6.077200412750244, "beta_dpo/beta_margin_std": 8.564191818237305, "beta_dpo/beta_used": 0.3706626892089844, "beta_dpo/beta_used_raw": 0.3706626892089844, "beta_dpo/gap_mean": 8.090949058532715, "beta_dpo/gap_std": 15.185094833374023, "beta_dpo/loss_margin_mean": 13.548951148986816, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.799697656840514, "grad_norm": 73.29319763183594, "learning_rate": 5.934146982094049e-08, "logits/chosen": 1.0952523946762085, "logits/rejected": 1.0364768505096436, "loss": 0.8932, "step": 529 }, { "beta_dpo/beta": 0.16738095879554749, "beta_dpo/beta_margin_grad_mean": -0.32800406217575073, "beta_dpo/beta_margin_grad_std": 0.3087153732776642, "beta_dpo/beta_margin_mean": 1.573071002960205, "beta_dpo/beta_margin_std": 3.1769614219665527, "beta_dpo/beta_used": 0.16738095879554749, "beta_dpo/beta_used_raw": 0.16738095879554749, "beta_dpo/gap_mean": 8.358198165893555, "beta_dpo/gap_std": 15.306008338928223, "beta_dpo/loss_margin_mean": 9.372478485107422, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8012093726379441, "grad_norm": 50.17977523803711, "learning_rate": 5.848888922025552e-08, "logits/chosen": 0.977916955947876, "logits/rejected": 0.9343423843383789, "loss": 1.0008, "step": 530 }, { "beta_dpo/beta": 0.12758412957191467, "beta_dpo/beta_margin_grad_mean": -0.36042869091033936, "beta_dpo/beta_margin_grad_std": 0.2259923368692398, "beta_dpo/beta_margin_mean": 1.3581695556640625, "beta_dpo/beta_margin_std": 2.5714547634124756, "beta_dpo/beta_used": 0.12758412957191467, "beta_dpo/beta_used_raw": 0.1111588403582573, "beta_dpo/gap_mean": 8.160855293273926, "beta_dpo/gap_std": 14.826172828674316, "beta_dpo/loss_margin_mean": 7.639277458190918, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8027210884353742, "grad_norm": 32.76897048950195, "learning_rate": 5.7641665597021435e-08, "logits/chosen": 1.1931384801864624, "logits/rejected": 1.1271367073059082, "loss": 0.9711, "step": 531 }, { "beta_dpo/beta": 0.09142545610666275, "beta_dpo/beta_margin_grad_mean": -0.3953922390937805, "beta_dpo/beta_margin_grad_std": 0.22126024961471558, "beta_dpo/beta_margin_mean": 0.6120261549949646, "beta_dpo/beta_margin_std": 1.5054887533187866, "beta_dpo/beta_used": 0.09142545610666275, "beta_dpo/beta_used_raw": 0.09142545610666275, "beta_dpo/gap_mean": 8.395885467529297, "beta_dpo/gap_std": 14.686279296875, "beta_dpo/loss_margin_mean": 8.282346725463867, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8042328042328042, "grad_norm": 27.678478240966797, "learning_rate": 5.679982264990424e-08, "logits/chosen": 1.0794322490692139, "logits/rejected": 1.0217549800872803, "loss": 1.0608, "step": 532 }, { "beta_dpo/beta": 0.08020737767219543, "beta_dpo/beta_margin_grad_mean": -0.40842196345329285, "beta_dpo/beta_margin_grad_std": 0.2721438407897949, "beta_dpo/beta_margin_mean": 0.7550824284553528, "beta_dpo/beta_margin_std": 2.28764009475708, "beta_dpo/beta_used": 0.08020737767219543, "beta_dpo/beta_used_raw": 0.07088775187730789, "beta_dpo/gap_mean": 8.168783187866211, "beta_dpo/gap_std": 15.009658813476562, "beta_dpo/loss_margin_mean": 8.0230712890625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8057445200302343, "grad_norm": 32.82867431640625, "learning_rate": 5.596338392706076e-08, "logits/chosen": 1.4970355033874512, "logits/rejected": 1.3700031042099, "loss": 1.4064, "step": 533 }, { "beta_dpo/beta": 0.15440692007541656, "beta_dpo/beta_margin_grad_mean": -0.36432546377182007, "beta_dpo/beta_margin_grad_std": 0.27774715423583984, "beta_dpo/beta_margin_mean": 2.107167959213257, "beta_dpo/beta_margin_std": 4.183852672576904, "beta_dpo/beta_used": 0.15440692007541656, "beta_dpo/beta_used_raw": 0.1260472536087036, "beta_dpo/gap_mean": 8.621475219726562, "beta_dpo/gap_std": 15.336591720581055, "beta_dpo/loss_margin_mean": 9.650552749633789, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8072562358276644, "grad_norm": 55.175025939941406, "learning_rate": 5.513237282548033e-08, "logits/chosen": 1.0398293733596802, "logits/rejected": 1.02956223487854, "loss": 1.3208, "step": 534 }, { "beta_dpo/beta": 0.17304572463035583, "beta_dpo/beta_margin_grad_mean": -0.33443713188171387, "beta_dpo/beta_margin_grad_std": 0.3046601116657257, "beta_dpo/beta_margin_mean": 1.5655525922775269, "beta_dpo/beta_margin_std": 2.966926097869873, "beta_dpo/beta_used": 0.17304572463035583, "beta_dpo/beta_used_raw": 0.17304572463035583, "beta_dpo/gap_mean": 8.521682739257812, "beta_dpo/gap_std": 15.320834159851074, "beta_dpo/loss_margin_mean": 8.643691062927246, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8087679516250945, "grad_norm": 40.93467330932617, "learning_rate": 5.430681259032957e-08, "logits/chosen": 1.0570318698883057, "logits/rejected": 0.9800806641578674, "loss": 0.9346, "step": 535 }, { "beta_dpo/beta": 0.16060255467891693, "beta_dpo/beta_margin_grad_mean": -0.35615992546081543, "beta_dpo/beta_margin_grad_std": 0.23471446335315704, "beta_dpo/beta_margin_mean": 1.9467401504516602, "beta_dpo/beta_margin_std": 3.6517608165740967, "beta_dpo/beta_used": 0.16060255467891693, "beta_dpo/beta_used_raw": 0.10881784558296204, "beta_dpo/gap_mean": 8.824699401855469, "beta_dpo/gap_std": 15.217926025390625, "beta_dpo/loss_margin_mean": 10.188438415527344, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8102796674225246, "grad_norm": 29.208940505981445, "learning_rate": 5.3486726314303175e-08, "logits/chosen": 1.1016263961791992, "logits/rejected": 1.0059990882873535, "loss": 0.9383, "step": 536 }, { "beta_dpo/beta": 0.047818832099437714, "beta_dpo/beta_margin_grad_mean": -0.43136221170425415, "beta_dpo/beta_margin_grad_std": 0.16197457909584045, "beta_dpo/beta_margin_mean": 0.33009785413742065, "beta_dpo/beta_margin_std": 0.8123959898948669, "beta_dpo/beta_used": 0.047818832099437714, "beta_dpo/beta_used_raw": 0.047818832099437714, "beta_dpo/gap_mean": 8.446852684020996, "beta_dpo/gap_std": 14.9360933303833, "beta_dpo/loss_margin_mean": 6.56203556060791, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8117913832199547, "grad_norm": 14.774935722351074, "learning_rate": 5.267213693697695e-08, "logits/chosen": 1.2608040571212769, "logits/rejected": 1.1432770490646362, "loss": 1.1518, "step": 537 }, { "beta_dpo/beta": 0.05728251487016678, "beta_dpo/beta_margin_grad_mean": -0.3964690566062927, "beta_dpo/beta_margin_grad_std": 0.18261270225048065, "beta_dpo/beta_margin_mean": 0.6189707517623901, "beta_dpo/beta_margin_std": 1.2403920888900757, "beta_dpo/beta_used": 0.05728251487016678, "beta_dpo/beta_used_raw": 0.02719694934785366, "beta_dpo/gap_mean": 8.421884536743164, "beta_dpo/gap_std": 14.781656265258789, "beta_dpo/loss_margin_mean": 9.203042984008789, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8133030990173847, "grad_norm": 15.689529418945312, "learning_rate": 5.1863067244167144e-08, "logits/chosen": 1.07925283908844, "logits/rejected": 1.0223917961120605, "loss": 1.0953, "step": 538 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49804821610450745, "beta_dpo/beta_margin_grad_std": 0.0030981386080384254, "beta_dpo/beta_margin_mean": 0.007807582151144743, "beta_dpo/beta_margin_std": 0.012393561191856861, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.0549732930958271, "beta_dpo/gap_mean": 8.416460037231445, "beta_dpo/gap_std": 14.297332763671875, "beta_dpo/loss_margin_mean": 7.807581424713135, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8148148148148148, "grad_norm": 0.33566319942474365, "learning_rate": 5.105953986729195e-08, "logits/chosen": 1.3011579513549805, "logits/rejected": 1.134751558303833, "loss": 1.3805, "step": 539 }, { "beta_dpo/beta": 0.2007492184638977, "beta_dpo/beta_margin_grad_mean": -0.30484333634376526, "beta_dpo/beta_margin_grad_std": 0.31224846839904785, "beta_dpo/beta_margin_mean": 1.9463728666305542, "beta_dpo/beta_margin_std": 3.1974806785583496, "beta_dpo/beta_used": 0.2007492184638977, "beta_dpo/beta_used_raw": 0.2007492184638977, "beta_dpo/gap_mean": 8.602801322937012, "beta_dpo/gap_std": 14.396449089050293, "beta_dpo/loss_margin_mean": 9.807937622070312, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8163265306122449, "grad_norm": 48.30702590942383, "learning_rate": 5.026157728273966e-08, "logits/chosen": 1.4509068727493286, "logits/rejected": 1.3168927431106567, "loss": 0.9022, "step": 540 }, { "beta_dpo/beta": 0.21265891194343567, "beta_dpo/beta_margin_grad_mean": -0.2759414613246918, "beta_dpo/beta_margin_grad_std": 0.33086225390434265, "beta_dpo/beta_margin_mean": 2.2098233699798584, "beta_dpo/beta_margin_std": 3.124072790145874, "beta_dpo/beta_used": 0.21265891194343567, "beta_dpo/beta_used_raw": 0.21265891194343567, "beta_dpo/gap_mean": 8.943014144897461, "beta_dpo/gap_std": 14.430560111999512, "beta_dpo/loss_margin_mean": 10.11587905883789, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.817838246409675, "grad_norm": 56.282127380371094, "learning_rate": 4.9469201811239035e-08, "logits/chosen": 1.3912067413330078, "logits/rejected": 1.4174320697784424, "loss": 0.8092, "step": 541 }, { "beta_dpo/beta": 0.3174495995044708, "beta_dpo/beta_margin_grad_mean": -0.2677096426486969, "beta_dpo/beta_margin_grad_std": 0.3603222668170929, "beta_dpo/beta_margin_mean": 3.70037579536438, "beta_dpo/beta_margin_std": 5.134766101837158, "beta_dpo/beta_used": 0.3174495995044708, "beta_dpo/beta_used_raw": 0.3174495995044708, "beta_dpo/gap_mean": 9.359155654907227, "beta_dpo/gap_std": 14.655593872070312, "beta_dpo/loss_margin_mean": 11.443473815917969, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8193499622071051, "grad_norm": 91.55038452148438, "learning_rate": 4.868243561723534e-08, "logits/chosen": 1.085046648979187, "logits/rejected": 1.0205934047698975, "loss": 1.0193, "step": 542 }, { "beta_dpo/beta": 0.04393059015274048, "beta_dpo/beta_margin_grad_mean": -0.415033757686615, "beta_dpo/beta_margin_grad_std": 0.15476207435131073, "beta_dpo/beta_margin_mean": 0.38421371579170227, "beta_dpo/beta_margin_std": 0.7130559682846069, "beta_dpo/beta_used": 0.04393059015274048, "beta_dpo/beta_used_raw": 0.04393059015274048, "beta_dpo/gap_mean": 9.273094177246094, "beta_dpo/gap_std": 14.880241394042969, "beta_dpo/loss_margin_mean": 8.799640655517578, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8208616780045351, "grad_norm": 11.100589752197266, "learning_rate": 4.790130070827028e-08, "logits/chosen": 1.190391182899475, "logits/rejected": 1.0452004671096802, "loss": 1.1361, "step": 543 }, { "beta_dpo/beta": 0.115420401096344, "beta_dpo/beta_margin_grad_mean": -0.3272760510444641, "beta_dpo/beta_margin_grad_std": 0.28145524859428406, "beta_dpo/beta_margin_mean": 1.3125842809677124, "beta_dpo/beta_margin_std": 2.0508055686950684, "beta_dpo/beta_used": 0.115420401096344, "beta_dpo/beta_used_raw": 0.115420401096344, "beta_dpo/gap_mean": 9.590657234191895, "beta_dpo/gap_std": 15.433180809020996, "beta_dpo/loss_margin_mean": 11.46908950805664, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8223733938019653, "grad_norm": 33.516334533691406, "learning_rate": 4.7125818934366454e-08, "logits/chosen": 1.0202606916427612, "logits/rejected": 0.9330880641937256, "loss": 1.0445, "step": 544 }, { "beta_dpo/beta": 0.11217702925205231, "beta_dpo/beta_margin_grad_mean": -0.3709193468093872, "beta_dpo/beta_margin_grad_std": 0.27607184648513794, "beta_dpo/beta_margin_mean": 0.8507373929023743, "beta_dpo/beta_margin_std": 1.8569505214691162, "beta_dpo/beta_used": 0.11217702925205231, "beta_dpo/beta_used_raw": 0.11217702925205231, "beta_dpo/gap_mean": 9.49941349029541, "beta_dpo/gap_std": 15.811461448669434, "beta_dpo/loss_margin_mean": 7.796383380889893, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8238851095993953, "grad_norm": 22.20750617980957, "learning_rate": 4.635601198741607e-08, "logits/chosen": 1.396355152130127, "logits/rejected": 1.287622332572937, "loss": 0.9378, "step": 545 }, { "beta_dpo/beta": 0.15419848263263702, "beta_dpo/beta_margin_grad_mean": -0.2972928583621979, "beta_dpo/beta_margin_grad_std": 0.28409242630004883, "beta_dpo/beta_margin_mean": 1.5118775367736816, "beta_dpo/beta_margin_std": 2.3105311393737793, "beta_dpo/beta_used": 0.15419848263263702, "beta_dpo/beta_used_raw": 0.15419848263263702, "beta_dpo/gap_mean": 9.365312576293945, "beta_dpo/gap_std": 15.491350173950195, "beta_dpo/loss_margin_mean": 9.728423118591309, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8253968253968254, "grad_norm": 39.418800354003906, "learning_rate": 4.559190140057428e-08, "logits/chosen": 1.0795893669128418, "logits/rejected": 1.0732557773590088, "loss": 0.8788, "step": 546 }, { "beta_dpo/beta": 0.11869224160909653, "beta_dpo/beta_margin_grad_mean": -0.3248843550682068, "beta_dpo/beta_margin_grad_std": 0.22885605692863464, "beta_dpo/beta_margin_mean": 1.2935576438903809, "beta_dpo/beta_margin_std": 2.110837459564209, "beta_dpo/beta_used": 0.11869224160909653, "beta_dpo/beta_used_raw": 0.11869224160909653, "beta_dpo/gap_mean": 9.626115798950195, "beta_dpo/gap_std": 15.377988815307617, "beta_dpo/loss_margin_mean": 10.859363555908203, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8269085411942555, "grad_norm": 26.864322662353516, "learning_rate": 4.483350854765672e-08, "logits/chosen": 1.1849254369735718, "logits/rejected": 1.130568265914917, "loss": 0.9387, "step": 547 }, { "beta_dpo/beta": 0.03195616975426674, "beta_dpo/beta_margin_grad_mean": -0.4449770152568817, "beta_dpo/beta_margin_grad_std": 0.15359994769096375, "beta_dpo/beta_margin_mean": 0.2758224308490753, "beta_dpo/beta_margin_std": 0.771864116191864, "beta_dpo/beta_used": 0.03195616975426674, "beta_dpo/beta_used_raw": -0.0379025973379612, "beta_dpo/gap_mean": 9.175468444824219, "beta_dpo/gap_std": 15.103775024414062, "beta_dpo/loss_margin_mean": 6.729221820831299, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8284202569916855, "grad_norm": 11.9760160446167, "learning_rate": 4.4080854642541826e-08, "logits/chosen": 1.213660717010498, "logits/rejected": 1.1112244129180908, "loss": 1.2281, "step": 548 }, { "beta_dpo/beta": 0.015496051870286465, "beta_dpo/beta_margin_grad_mean": -0.4723176658153534, "beta_dpo/beta_margin_grad_std": 0.06963703036308289, "beta_dpo/beta_margin_mean": 0.11424464732408524, "beta_dpo/beta_margin_std": 0.28728026151657104, "beta_dpo/beta_used": 0.015496051870286465, "beta_dpo/beta_used_raw": 0.015496051870286465, "beta_dpo/gap_mean": 8.908472061157227, "beta_dpo/gap_std": 15.119009017944336, "beta_dpo/loss_margin_mean": 7.55164909362793, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8299319727891157, "grad_norm": 6.398705959320068, "learning_rate": 4.333396073857723e-08, "logits/chosen": 1.3798398971557617, "logits/rejected": 1.299780011177063, "loss": 1.2846, "step": 549 }, { "beta_dpo/beta": 0.06067263334989548, "beta_dpo/beta_margin_grad_mean": -0.42208606004714966, "beta_dpo/beta_margin_grad_std": 0.19494439661502838, "beta_dpo/beta_margin_mean": 0.38519254326820374, "beta_dpo/beta_margin_std": 1.0421887636184692, "beta_dpo/beta_used": 0.06067263334989548, "beta_dpo/beta_used_raw": 0.06067263334989548, "beta_dpo/gap_mean": 8.732294082641602, "beta_dpo/gap_std": 15.221040725708008, "beta_dpo/loss_margin_mean": 7.314587593078613, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8314436885865457, "grad_norm": 17.624757766723633, "learning_rate": 4.259284772799099e-08, "logits/chosen": 1.256799578666687, "logits/rejected": 1.1990289688110352, "loss": 1.1007, "step": 550 }, { "beta_dpo/beta": 0.046534232795238495, "beta_dpo/beta_margin_grad_mean": -0.42071956396102905, "beta_dpo/beta_margin_grad_std": 0.12429229170084, "beta_dpo/beta_margin_mean": 0.3460140526294708, "beta_dpo/beta_margin_std": 0.5744323134422302, "beta_dpo/beta_used": 0.046534232795238495, "beta_dpo/beta_used_raw": 0.046534232795238495, "beta_dpo/gap_mean": 8.32465934753418, "beta_dpo/gap_std": 14.818896293640137, "beta_dpo/loss_margin_mean": 7.320366382598877, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8329554043839759, "grad_norm": 15.450972557067871, "learning_rate": 4.1857536341307176e-08, "logits/chosen": 1.4911260604858398, "logits/rejected": 1.3917850255966187, "loss": 1.1257, "step": 551 }, { "beta_dpo/beta": 0.19184796512126923, "beta_dpo/beta_margin_grad_mean": -0.3128667175769806, "beta_dpo/beta_margin_grad_std": 0.30318787693977356, "beta_dpo/beta_margin_mean": 1.7558873891830444, "beta_dpo/beta_margin_std": 3.194880247116089, "beta_dpo/beta_used": 0.19184796512126923, "beta_dpo/beta_used_raw": 0.19184796512126923, "beta_dpo/gap_mean": 8.063959121704102, "beta_dpo/gap_std": 14.653057098388672, "beta_dpo/loss_margin_mean": 7.569924831390381, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8344671201814059, "grad_norm": 34.19086837768555, "learning_rate": 4.112804714676593e-08, "logits/chosen": 1.1748685836791992, "logits/rejected": 1.1179265975952148, "loss": 0.769, "step": 552 }, { "beta_dpo/beta": 0.143024280667305, "beta_dpo/beta_margin_grad_mean": -0.42807796597480774, "beta_dpo/beta_margin_grad_std": 0.28312036395072937, "beta_dpo/beta_margin_mean": 1.2726179361343384, "beta_dpo/beta_margin_std": 4.292966365814209, "beta_dpo/beta_used": 0.143024280667305, "beta_dpo/beta_used_raw": 0.143024280667305, "beta_dpo/gap_mean": 8.228954315185547, "beta_dpo/gap_std": 15.217555046081543, "beta_dpo/loss_margin_mean": 8.160815238952637, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8359788359788359, "grad_norm": 52.660762786865234, "learning_rate": 4.0404400549748144e-08, "logits/chosen": 1.212083101272583, "logits/rejected": 1.1077077388763428, "loss": 1.2839, "step": 553 }, { "beta_dpo/beta": 0.06598697602748871, "beta_dpo/beta_margin_grad_mean": -0.39833810925483704, "beta_dpo/beta_margin_grad_std": 0.18544040620326996, "beta_dpo/beta_margin_mean": 0.6325341463088989, "beta_dpo/beta_margin_std": 1.203471302986145, "beta_dpo/beta_used": 0.06598697602748871, "beta_dpo/beta_used_raw": 0.01051008328795433, "beta_dpo/gap_mean": 8.333694458007812, "beta_dpo/gap_std": 14.93002986907959, "beta_dpo/loss_margin_mean": 9.03442096710205, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8374905517762661, "grad_norm": 20.9935302734375, "learning_rate": 3.968661679220467e-08, "logits/chosen": 1.3394625186920166, "logits/rejected": 1.3322782516479492, "loss": 1.1239, "step": 554 }, { "beta_dpo/beta": 0.09749078750610352, "beta_dpo/beta_margin_grad_mean": -0.37117937207221985, "beta_dpo/beta_margin_grad_std": 0.2390754222869873, "beta_dpo/beta_margin_mean": 1.1656479835510254, "beta_dpo/beta_margin_std": 2.159295082092285, "beta_dpo/beta_used": 0.09749078750610352, "beta_dpo/beta_used_raw": 0.04684508964419365, "beta_dpo/gap_mean": 8.137611389160156, "beta_dpo/gap_std": 15.190990447998047, "beta_dpo/loss_margin_mean": 8.135710716247559, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8390022675736961, "grad_norm": 35.28615188598633, "learning_rate": 3.89747159520904e-08, "logits/chosen": 1.434206485748291, "logits/rejected": 1.3939523696899414, "loss": 1.1265, "step": 555 }, { "beta_dpo/beta": 0.07983244955539703, "beta_dpo/beta_margin_grad_mean": -0.3930589556694031, "beta_dpo/beta_margin_grad_std": 0.22528424859046936, "beta_dpo/beta_margin_mean": 0.5776308178901672, "beta_dpo/beta_margin_std": 1.3057823181152344, "beta_dpo/beta_used": 0.07983244955539703, "beta_dpo/beta_used_raw": 0.07983244955539703, "beta_dpo/gap_mean": 8.181859016418457, "beta_dpo/gap_std": 15.160832405090332, "beta_dpo/loss_margin_mean": 7.280274868011475, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8405139833711263, "grad_norm": 20.49454689025879, "learning_rate": 3.826871794280192e-08, "logits/chosen": 1.2871716022491455, "logits/rejected": 1.2277119159698486, "loss": 1.1067, "step": 556 }, { "beta_dpo/beta": 0.08392275869846344, "beta_dpo/beta_margin_grad_mean": -0.3781295716762543, "beta_dpo/beta_margin_grad_std": 0.23083707690238953, "beta_dpo/beta_margin_mean": 1.063275694847107, "beta_dpo/beta_margin_std": 2.229184150695801, "beta_dpo/beta_used": 0.08392275869846344, "beta_dpo/beta_used_raw": 0.07373453676700592, "beta_dpo/gap_mean": 8.439523696899414, "beta_dpo/gap_std": 15.30473518371582, "beta_dpo/loss_margin_mean": 9.074495315551758, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8420256991685563, "grad_norm": 20.826927185058594, "learning_rate": 3.756864251262143e-08, "logits/chosen": 1.1249200105667114, "logits/rejected": 0.9844628572463989, "loss": 1.0709, "step": 557 }, { "beta_dpo/beta": 0.11504756659269333, "beta_dpo/beta_margin_grad_mean": -0.38101866841316223, "beta_dpo/beta_margin_grad_std": 0.2687925398349762, "beta_dpo/beta_margin_mean": 1.3793085813522339, "beta_dpo/beta_margin_std": 3.3186142444610596, "beta_dpo/beta_used": 0.11504756659269333, "beta_dpo/beta_used_raw": 0.10775712877511978, "beta_dpo/gap_mean": 8.540435791015625, "beta_dpo/gap_std": 15.425100326538086, "beta_dpo/loss_margin_mean": 9.191947937011719, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8435374149659864, "grad_norm": 41.15348815917969, "learning_rate": 3.687450924416341e-08, "logits/chosen": 1.323371410369873, "logits/rejected": 1.2763679027557373, "loss": 1.1547, "step": 558 }, { "beta_dpo/beta": 0.07971999794244766, "beta_dpo/beta_margin_grad_mean": -0.4077509939670563, "beta_dpo/beta_margin_grad_std": 0.24463067948818207, "beta_dpo/beta_margin_mean": 0.8806779980659485, "beta_dpo/beta_margin_std": 2.2131831645965576, "beta_dpo/beta_used": 0.07971999794244766, "beta_dpo/beta_used_raw": 0.07355686277151108, "beta_dpo/gap_mean": 8.507637023925781, "beta_dpo/gap_std": 15.659688949584961, "beta_dpo/loss_margin_mean": 8.043795585632324, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8450491307634165, "grad_norm": 29.638961791992188, "learning_rate": 3.6186337553827743e-08, "logits/chosen": 1.4750277996063232, "logits/rejected": 1.3416969776153564, "loss": 1.2295, "step": 559 }, { "beta_dpo/beta": 0.17043396830558777, "beta_dpo/beta_margin_grad_mean": -0.3465725779533386, "beta_dpo/beta_margin_grad_std": 0.2609609067440033, "beta_dpo/beta_margin_mean": 2.157318115234375, "beta_dpo/beta_margin_std": 4.162698268890381, "beta_dpo/beta_used": 0.17043396830558777, "beta_dpo/beta_used_raw": 0.11978065967559814, "beta_dpo/gap_mean": 8.458297729492188, "beta_dpo/gap_std": 15.643919944763184, "beta_dpo/loss_margin_mean": 10.064728736877441, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8465608465608465, "grad_norm": 43.67535400390625, "learning_rate": 3.550414669125573e-08, "logits/chosen": 1.3294655084609985, "logits/rejected": 1.285670280456543, "loss": 1.0217, "step": 560 }, { "beta_dpo/beta": 0.08011330664157867, "beta_dpo/beta_margin_grad_mean": -0.3740430474281311, "beta_dpo/beta_margin_grad_std": 0.20077289640903473, "beta_dpo/beta_margin_mean": 0.9686254262924194, "beta_dpo/beta_margin_std": 1.77787446975708, "beta_dpo/beta_used": 0.08011330664157867, "beta_dpo/beta_used_raw": 0.041644688695669174, "beta_dpo/gap_mean": 9.038721084594727, "beta_dpo/gap_std": 15.586071014404297, "beta_dpo/loss_margin_mean": 10.723516464233398, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8480725623582767, "grad_norm": 23.633291244506836, "learning_rate": 3.482795573879241e-08, "logits/chosen": 1.4188854694366455, "logits/rejected": 1.3814911842346191, "loss": 1.0808, "step": 561 }, { "beta_dpo/beta": 0.022397657856345177, "beta_dpo/beta_margin_grad_mean": -0.444037526845932, "beta_dpo/beta_margin_grad_std": 0.1145317479968071, "beta_dpo/beta_margin_mean": 0.25500166416168213, "beta_dpo/beta_margin_std": 0.533501148223877, "beta_dpo/beta_used": 0.022397657856345177, "beta_dpo/beta_used_raw": -0.03293367847800255, "beta_dpo/gap_mean": 9.185551643371582, "beta_dpo/gap_std": 15.79057502746582, "beta_dpo/loss_margin_mean": 9.271513938903809, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8495842781557067, "grad_norm": 8.53104019165039, "learning_rate": 3.415778361095226e-08, "logits/chosen": 1.391019344329834, "logits/rejected": 1.3353910446166992, "loss": 1.2529, "step": 562 }, { "beta_dpo/beta": 0.2550284266471863, "beta_dpo/beta_margin_grad_mean": -0.280933141708374, "beta_dpo/beta_margin_grad_std": 0.347322553396225, "beta_dpo/beta_margin_mean": 2.570730686187744, "beta_dpo/beta_margin_std": 5.0097527503967285, "beta_dpo/beta_used": 0.2550284266471863, "beta_dpo/beta_used_raw": 0.2550284266471863, "beta_dpo/gap_mean": 9.215568542480469, "beta_dpo/gap_std": 16.322410583496094, "beta_dpo/loss_margin_mean": 10.128461837768555, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8510959939531368, "grad_norm": 46.124263763427734, "learning_rate": 3.349364905389032e-08, "logits/chosen": 1.3676221370697021, "logits/rejected": 1.2895047664642334, "loss": 0.9148, "step": 563 }, { "beta_dpo/beta": 0.21877868473529816, "beta_dpo/beta_margin_grad_mean": -0.3078286945819855, "beta_dpo/beta_margin_grad_std": 0.34798845648765564, "beta_dpo/beta_margin_mean": 1.959875464439392, "beta_dpo/beta_margin_std": 3.711064338684082, "beta_dpo/beta_used": 0.21877868473529816, "beta_dpo/beta_used_raw": 0.21877868473529816, "beta_dpo/gap_mean": 9.217426300048828, "beta_dpo/gap_std": 16.47534942626953, "beta_dpo/loss_margin_mean": 8.940733909606934, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8526077097505669, "grad_norm": 56.61745071411133, "learning_rate": 3.283557064487785e-08, "logits/chosen": 1.2832797765731812, "logits/rejected": 1.250802755355835, "loss": 0.8822, "step": 564 }, { "beta_dpo/beta": 0.09792563319206238, "beta_dpo/beta_margin_grad_mean": -0.38408800959587097, "beta_dpo/beta_margin_grad_std": 0.24419058859348297, "beta_dpo/beta_margin_mean": 1.0215169191360474, "beta_dpo/beta_margin_std": 2.458621025085449, "beta_dpo/beta_used": 0.09792563319206238, "beta_dpo/beta_used_raw": -0.03560522943735123, "beta_dpo/gap_mean": 9.025110244750977, "beta_dpo/gap_std": 16.28216552734375, "beta_dpo/loss_margin_mean": 8.508374214172363, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.854119425547997, "grad_norm": 36.234596252441406, "learning_rate": 3.218356679178252e-08, "logits/chosen": 1.1627860069274902, "logits/rejected": 1.1426182985305786, "loss": 1.1816, "step": 565 }, { "beta_dpo/beta": 0.08629482984542847, "beta_dpo/beta_margin_grad_mean": -0.392479807138443, "beta_dpo/beta_margin_grad_std": 0.24591444432735443, "beta_dpo/beta_margin_mean": 0.9406875371932983, "beta_dpo/beta_margin_std": 2.2037181854248047, "beta_dpo/beta_used": 0.08629482984542847, "beta_dpo/beta_used_raw": 0.07739803940057755, "beta_dpo/gap_mean": 8.676336288452148, "beta_dpo/gap_std": 16.342041015625, "beta_dpo/loss_margin_mean": 7.305891513824463, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8556311413454271, "grad_norm": 31.71550750732422, "learning_rate": 3.1537655732553764e-08, "logits/chosen": 1.0283910036087036, "logits/rejected": 0.997430145740509, "loss": 1.1747, "step": 566 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49769994616508484, "beta_dpo/beta_margin_grad_std": 0.003700168803334236, "beta_dpo/beta_margin_mean": 0.009200900793075562, "beta_dpo/beta_margin_std": 0.014802070334553719, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.018071264028549194, "beta_dpo/gap_mean": 8.921218872070312, "beta_dpo/gap_std": 16.182456970214844, "beta_dpo/loss_margin_mean": 9.200900077819824, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8571428571428571, "grad_norm": 0.4065539836883545, "learning_rate": 3.089785553471233e-08, "logits/chosen": 1.2240984439849854, "logits/rejected": 1.1093523502349854, "loss": 1.3794, "step": 567 }, { "beta_dpo/beta": 0.09786317497491837, "beta_dpo/beta_margin_grad_mean": -0.3605889678001404, "beta_dpo/beta_margin_grad_std": 0.2532772719860077, "beta_dpo/beta_margin_mean": 1.508376121520996, "beta_dpo/beta_margin_std": 2.6363301277160645, "beta_dpo/beta_used": 0.09786317497491837, "beta_dpo/beta_used_raw": 0.03654339164495468, "beta_dpo/gap_mean": 8.895964622497559, "beta_dpo/gap_std": 15.841314315795898, "beta_dpo/loss_margin_mean": 10.623019218444824, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8586545729402872, "grad_norm": 36.745548248291016, "learning_rate": 3.026418409484513e-08, "logits/chosen": 1.1992294788360596, "logits/rejected": 1.1165108680725098, "loss": 1.2022, "step": 568 }, { "beta_dpo/beta": 0.09338933974504471, "beta_dpo/beta_margin_grad_mean": -0.3815169036388397, "beta_dpo/beta_margin_grad_std": 0.2267947942018509, "beta_dpo/beta_margin_mean": 0.9926204681396484, "beta_dpo/beta_margin_std": 1.9438563585281372, "beta_dpo/beta_used": 0.09338933974504471, "beta_dpo/beta_used_raw": 0.05557567998766899, "beta_dpo/gap_mean": 9.067608833312988, "beta_dpo/gap_std": 15.582027435302734, "beta_dpo/loss_margin_mean": 6.859289169311523, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8601662887377173, "grad_norm": 26.79338836669922, "learning_rate": 2.963665913810451e-08, "logits/chosen": 1.0522103309631348, "logits/rejected": 1.0461184978485107, "loss": 1.1025, "step": 569 }, { "beta_dpo/beta": 0.203478142619133, "beta_dpo/beta_margin_grad_mean": -0.2527143359184265, "beta_dpo/beta_margin_grad_std": 0.2713932991027832, "beta_dpo/beta_margin_mean": 2.6176295280456543, "beta_dpo/beta_margin_std": 3.5322751998901367, "beta_dpo/beta_used": 0.203478142619133, "beta_dpo/beta_used_raw": 0.203478142619133, "beta_dpo/gap_mean": 9.13400650024414, "beta_dpo/gap_std": 15.616695404052734, "beta_dpo/loss_margin_mean": 12.05258560180664, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8616780045351474, "grad_norm": 36.613887786865234, "learning_rate": 2.9015298217712453e-08, "logits/chosen": 1.024835228919983, "logits/rejected": 0.9980064630508423, "loss": 0.7511, "step": 570 }, { "beta_dpo/beta": 0.07235896587371826, "beta_dpo/beta_margin_grad_mean": -0.42983123660087585, "beta_dpo/beta_margin_grad_std": 0.23050378262996674, "beta_dpo/beta_margin_mean": 0.4017201066017151, "beta_dpo/beta_margin_std": 1.6213804483413696, "beta_dpo/beta_used": 0.07235896587371826, "beta_dpo/beta_used_raw": 0.0374261848628521, "beta_dpo/gap_mean": 8.91083812713623, "beta_dpo/gap_std": 15.429361343383789, "beta_dpo/loss_margin_mean": 5.915493965148926, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8631897203325775, "grad_norm": 17.79202651977539, "learning_rate": 2.840011871446962e-08, "logits/chosen": 1.2358368635177612, "logits/rejected": 1.2157400846481323, "loss": 1.0641, "step": 571 }, { "beta_dpo/beta": 0.059745196253061295, "beta_dpo/beta_margin_grad_mean": -0.4255921542644501, "beta_dpo/beta_margin_grad_std": 0.20457926392555237, "beta_dpo/beta_margin_mean": 0.40605655312538147, "beta_dpo/beta_margin_std": 1.3714659214019775, "beta_dpo/beta_used": 0.059745196253061295, "beta_dpo/beta_used_raw": 0.033621691167354584, "beta_dpo/gap_mean": 8.525805473327637, "beta_dpo/gap_std": 15.347637176513672, "beta_dpo/loss_margin_mean": 7.5314860343933105, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8647014361300076, "grad_norm": 21.431163787841797, "learning_rate": 2.7791137836269158e-08, "logits/chosen": 1.2674634456634521, "logits/rejected": 1.276109218597412, "loss": 1.1399, "step": 572 }, { "beta_dpo/beta": 0.1454625129699707, "beta_dpo/beta_margin_grad_mean": -0.3354867100715637, "beta_dpo/beta_margin_grad_std": 0.3068930506706238, "beta_dpo/beta_margin_mean": 1.4749563932418823, "beta_dpo/beta_margin_std": 3.0397462844848633, "beta_dpo/beta_used": 0.1454625129699707, "beta_dpo/beta_used_raw": 0.1454625129699707, "beta_dpo/gap_mean": 8.657966613769531, "beta_dpo/gap_std": 15.54708480834961, "beta_dpo/loss_margin_mean": 9.776642799377441, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8662131519274376, "grad_norm": 40.563289642333984, "learning_rate": 2.718837261761528e-08, "logits/chosen": 1.1352970600128174, "logits/rejected": 1.0640968084335327, "loss": 0.9789, "step": 573 }, { "beta_dpo/beta": 0.3303490877151489, "beta_dpo/beta_margin_grad_mean": -0.2543141543865204, "beta_dpo/beta_margin_grad_std": 0.32116472721099854, "beta_dpo/beta_margin_mean": 4.120840549468994, "beta_dpo/beta_margin_std": 5.648134708404541, "beta_dpo/beta_used": 0.3303490877151489, "beta_dpo/beta_used_raw": 0.3303490877151489, "beta_dpo/gap_mean": 9.069449424743652, "beta_dpo/gap_std": 15.577247619628906, "beta_dpo/loss_margin_mean": 11.595686912536621, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8677248677248677, "grad_norm": 68.58607482910156, "learning_rate": 2.659183991914696e-08, "logits/chosen": 1.4592229127883911, "logits/rejected": 1.4214026927947998, "loss": 0.6931, "step": 574 }, { "beta_dpo/beta": 0.006673333700746298, "beta_dpo/beta_margin_grad_mean": -0.48022425174713135, "beta_dpo/beta_margin_grad_std": 0.044581227004528046, "beta_dpo/beta_margin_mean": 0.08041688799858093, "beta_dpo/beta_margin_std": 0.1813589334487915, "beta_dpo/beta_used": 0.006673333700746298, "beta_dpo/beta_used_raw": -0.17582564055919647, "beta_dpo/gap_mean": 9.28346061706543, "beta_dpo/gap_std": 15.865880966186523, "beta_dpo/loss_margin_mean": 7.327334403991699, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8692365835222978, "grad_norm": 2.8228843212127686, "learning_rate": 2.600155642716606e-08, "logits/chosen": 1.2757470607757568, "logits/rejected": 1.2021794319152832, "loss": 1.3409, "step": 575 }, { "beta_dpo/beta": 0.2936015725135803, "beta_dpo/beta_margin_grad_mean": -0.23937273025512695, "beta_dpo/beta_margin_grad_std": 0.3489878177642822, "beta_dpo/beta_margin_mean": 3.9037559032440186, "beta_dpo/beta_margin_std": 5.193808555603027, "beta_dpo/beta_used": 0.2936015725135803, "beta_dpo/beta_used_raw": 0.2936015725135803, "beta_dpo/gap_mean": 9.523399353027344, "beta_dpo/gap_std": 15.896087646484375, "beta_dpo/loss_margin_mean": 12.640140533447266, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8707482993197279, "grad_norm": 60.06475067138672, "learning_rate": 2.5417538653170754e-08, "logits/chosen": 1.3256943225860596, "logits/rejected": 1.226050615310669, "loss": 1.0017, "step": 576 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4983120858669281, "beta_dpo/beta_margin_grad_std": 0.003915362525731325, "beta_dpo/beta_margin_mean": 0.006752183195203543, "beta_dpo/beta_margin_std": 0.015662673860788345, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.07891594618558884, "beta_dpo/gap_mean": 9.165458679199219, "beta_dpo/gap_std": 15.950639724731445, "beta_dpo/loss_margin_mean": 6.752182960510254, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.872260015117158, "grad_norm": 0.3212597966194153, "learning_rate": 2.4839802933393607e-08, "logits/chosen": 1.0376708507537842, "logits/rejected": 1.0282740592956543, "loss": 1.3802, "step": 577 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4986441433429718, "beta_dpo/beta_margin_grad_std": 0.0036979857832193375, "beta_dpo/beta_margin_mean": 0.0054238177835941315, "beta_dpo/beta_margin_std": 0.014793048612773418, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.11922827363014221, "beta_dpo/gap_mean": 8.606918334960938, "beta_dpo/gap_std": 15.817480087280273, "beta_dpo/loss_margin_mean": 5.4238176345825195, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.873771730914588, "grad_norm": 0.2846592962741852, "learning_rate": 2.4268365428344733e-08, "logits/chosen": 1.3664073944091797, "logits/rejected": 1.3180696964263916, "loss": 1.3814, "step": 578 }, { "beta_dpo/beta": 0.10250888019800186, "beta_dpo/beta_margin_grad_mean": -0.3974369466304779, "beta_dpo/beta_margin_grad_std": 0.2421928346157074, "beta_dpo/beta_margin_mean": 0.8760362863540649, "beta_dpo/beta_margin_std": 2.4804162979125977, "beta_dpo/beta_used": 0.10250888019800186, "beta_dpo/beta_used_raw": 0.06492999196052551, "beta_dpo/gap_mean": 8.539130210876465, "beta_dpo/gap_std": 15.541910171508789, "beta_dpo/loss_margin_mean": 9.44791030883789, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8752834467120182, "grad_norm": 29.04618263244629, "learning_rate": 2.3703242122359357e-08, "logits/chosen": 1.0949928760528564, "logits/rejected": 1.0640246868133545, "loss": 1.0228, "step": 579 }, { "beta_dpo/beta": 0.07085993140935898, "beta_dpo/beta_margin_grad_mean": -0.3925807476043701, "beta_dpo/beta_margin_grad_std": 0.22485065460205078, "beta_dpo/beta_margin_mean": 0.738163948059082, "beta_dpo/beta_margin_std": 1.6701350212097168, "beta_dpo/beta_used": 0.07085993140935898, "beta_dpo/beta_used_raw": 0.04937838017940521, "beta_dpo/gap_mean": 8.2997407913208, "beta_dpo/gap_std": 15.717334747314453, "beta_dpo/loss_margin_mean": 7.733586311340332, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8767951625094482, "grad_norm": 19.86193084716797, "learning_rate": 2.3144448823151392e-08, "logits/chosen": 1.2375001907348633, "logits/rejected": 1.1523990631103516, "loss": 1.0934, "step": 580 }, { "beta_dpo/beta": 0.2419264167547226, "beta_dpo/beta_margin_grad_mean": -0.2971521019935608, "beta_dpo/beta_margin_grad_std": 0.29189354181289673, "beta_dpo/beta_margin_mean": 2.6469461917877197, "beta_dpo/beta_margin_std": 5.749914646148682, "beta_dpo/beta_used": 0.2419264167547226, "beta_dpo/beta_used_raw": 0.2419264167547226, "beta_dpo/gap_mean": 8.610160827636719, "beta_dpo/gap_std": 15.83781623840332, "beta_dpo/loss_margin_mean": 8.952991485595703, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8783068783068783, "grad_norm": 63.67040252685547, "learning_rate": 2.259200116137039e-08, "logits/chosen": 1.2289148569107056, "logits/rejected": 1.1664001941680908, "loss": 1.1243, "step": 581 }, { "beta_dpo/beta": 0.0896616280078888, "beta_dpo/beta_margin_grad_mean": -0.4158861041069031, "beta_dpo/beta_margin_grad_std": 0.2637236416339874, "beta_dpo/beta_margin_mean": 0.8592485785484314, "beta_dpo/beta_margin_std": 2.4116122722625732, "beta_dpo/beta_used": 0.0896616280078888, "beta_dpo/beta_used_raw": 0.06425061821937561, "beta_dpo/gap_mean": 8.28162956237793, "beta_dpo/gap_std": 15.801387786865234, "beta_dpo/loss_margin_mean": 7.660096645355225, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8798185941043084, "grad_norm": 34.26008605957031, "learning_rate": 2.204591459016525e-08, "logits/chosen": 0.972122848033905, "logits/rejected": 0.9931256771087646, "loss": 1.3696, "step": 582 }, { "beta_dpo/beta": 0.016050279140472412, "beta_dpo/beta_margin_grad_mean": -0.4742169678211212, "beta_dpo/beta_margin_grad_std": 0.08747411519289017, "beta_dpo/beta_margin_mean": 0.10880840569734573, "beta_dpo/beta_margin_std": 0.36883479356765747, "beta_dpo/beta_used": 0.016050279140472412, "beta_dpo/beta_used_raw": -0.04681949317455292, "beta_dpo/gap_mean": 7.949472427368164, "beta_dpo/gap_std": 16.228107452392578, "beta_dpo/loss_margin_mean": 5.8783698081970215, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8813303099017384, "grad_norm": 7.858330726623535, "learning_rate": 2.1506204384751064e-08, "logits/chosen": 1.2844696044921875, "logits/rejected": 1.1568622589111328, "loss": 1.3039, "step": 583 }, { "beta_dpo/beta": 0.259249210357666, "beta_dpo/beta_margin_grad_mean": -0.3204815089702606, "beta_dpo/beta_margin_grad_std": 0.35193508863449097, "beta_dpo/beta_margin_mean": 2.6451680660247803, "beta_dpo/beta_margin_std": 4.729509353637695, "beta_dpo/beta_used": 0.259249210357666, "beta_dpo/beta_used_raw": 0.259249210357666, "beta_dpo/gap_mean": 8.241252899169922, "beta_dpo/gap_std": 16.267311096191406, "beta_dpo/loss_margin_mean": 9.067033767700195, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8828420256991686, "grad_norm": 57.75297164916992, "learning_rate": 2.09728856419826e-08, "logits/chosen": 1.3111417293548584, "logits/rejected": 1.1976943016052246, "loss": 0.9995, "step": 584 }, { "beta_dpo/beta": 0.16461847722530365, "beta_dpo/beta_margin_grad_mean": -0.39904317259788513, "beta_dpo/beta_margin_grad_std": 0.2841614782810211, "beta_dpo/beta_margin_mean": 1.2675442695617676, "beta_dpo/beta_margin_std": 3.9270401000976562, "beta_dpo/beta_used": 0.16461847722530365, "beta_dpo/beta_used_raw": 0.14850212633609772, "beta_dpo/gap_mean": 7.797574520111084, "beta_dpo/gap_std": 15.795310974121094, "beta_dpo/loss_margin_mean": 6.529803276062012, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8843537414965986, "grad_norm": 49.424468994140625, "learning_rate": 2.044597327993153e-08, "logits/chosen": 1.2629481554031372, "logits/rejected": 1.21295166015625, "loss": 1.0875, "step": 585 }, { "beta_dpo/beta": 0.14468266069889069, "beta_dpo/beta_margin_grad_mean": -0.3076460361480713, "beta_dpo/beta_margin_grad_std": 0.26763400435447693, "beta_dpo/beta_margin_mean": 1.4467971324920654, "beta_dpo/beta_margin_std": 2.1389639377593994, "beta_dpo/beta_used": 0.14468266069889069, "beta_dpo/beta_used_raw": 0.14468266069889069, "beta_dpo/gap_mean": 8.13833999633789, "beta_dpo/gap_std": 15.544172286987305, "beta_dpo/loss_margin_mean": 10.076703071594238, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8858654572940288, "grad_norm": 40.17343521118164, "learning_rate": 1.9925482037469187e-08, "logits/chosen": 1.5505293607711792, "logits/rejected": 1.4857732057571411, "loss": 1.1052, "step": 586 }, { "beta_dpo/beta": 0.17714223265647888, "beta_dpo/beta_margin_grad_mean": -0.3253132998943329, "beta_dpo/beta_margin_grad_std": 0.2992173135280609, "beta_dpo/beta_margin_mean": 1.910219669342041, "beta_dpo/beta_margin_std": 3.6033554077148438, "beta_dpo/beta_used": 0.17714223265647888, "beta_dpo/beta_used_raw": 0.17714223265647888, "beta_dpo/gap_mean": 8.57592487335205, "beta_dpo/gap_std": 15.818859100341797, "beta_dpo/loss_margin_mean": 10.5182523727417, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8873771730914588, "grad_norm": 54.24024963378906, "learning_rate": 1.9411426473854687e-08, "logits/chosen": 1.3305864334106445, "logits/rejected": 1.324970006942749, "loss": 0.9144, "step": 587 }, { "beta_dpo/beta": 0.10519608110189438, "beta_dpo/beta_margin_grad_mean": -0.3480878472328186, "beta_dpo/beta_margin_grad_std": 0.24397340416908264, "beta_dpo/beta_margin_mean": 0.7981663942337036, "beta_dpo/beta_margin_std": 1.4612581729888916, "beta_dpo/beta_used": 0.10519608110189438, "beta_dpo/beta_used_raw": 0.10519608110189438, "beta_dpo/gap_mean": 8.550680160522461, "beta_dpo/gap_std": 15.71683120727539, "beta_dpo/loss_margin_mean": 7.717644214630127, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8888888888888888, "grad_norm": 22.78832244873047, "learning_rate": 1.890382096832699e-08, "logits/chosen": 1.1842995882034302, "logits/rejected": 1.1582131385803223, "loss": 0.9884, "step": 588 }, { "beta_dpo/beta": 0.1493714153766632, "beta_dpo/beta_margin_grad_mean": -0.30983808636665344, "beta_dpo/beta_margin_grad_std": 0.25290393829345703, "beta_dpo/beta_margin_mean": 1.4465526342391968, "beta_dpo/beta_margin_std": 2.117835283279419, "beta_dpo/beta_used": 0.1493714153766632, "beta_dpo/beta_used_raw": 0.1493714153766632, "beta_dpo/gap_mean": 8.739282608032227, "beta_dpo/gap_std": 15.474782943725586, "beta_dpo/loss_margin_mean": 10.057303428649902, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.890400604686319, "grad_norm": 29.803503036499023, "learning_rate": 1.840267971970344e-08, "logits/chosen": 1.0262922048568726, "logits/rejected": 1.0313639640808105, "loss": 0.7967, "step": 589 }, { "beta_dpo/beta": 0.1304425448179245, "beta_dpo/beta_margin_grad_mean": -0.28577888011932373, "beta_dpo/beta_margin_grad_std": 0.23786579072475433, "beta_dpo/beta_margin_mean": 1.4526952505111694, "beta_dpo/beta_margin_std": 1.8333840370178223, "beta_dpo/beta_used": 0.1304425448179245, "beta_dpo/beta_used_raw": 0.1304425448179245, "beta_dpo/gap_mean": 9.032739639282227, "beta_dpo/gap_std": 14.90475845336914, "beta_dpo/loss_margin_mean": 10.960782051086426, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.891912320483749, "grad_norm": 30.006053924560547, "learning_rate": 1.7908016745981856e-08, "logits/chosen": 1.282177448272705, "logits/rejected": 1.2717326879501343, "loss": 0.8333, "step": 590 }, { "beta_dpo/beta": 0.09730789065361023, "beta_dpo/beta_margin_grad_mean": -0.3583000600337982, "beta_dpo/beta_margin_grad_std": 0.24750088155269623, "beta_dpo/beta_margin_mean": 1.3048549890518188, "beta_dpo/beta_margin_std": 2.435183048248291, "beta_dpo/beta_used": 0.09730789065361023, "beta_dpo/beta_used_raw": 0.022840075194835663, "beta_dpo/gap_mean": 9.362573623657227, "beta_dpo/gap_std": 14.858987808227539, "beta_dpo/loss_margin_mean": 8.98420238494873, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8934240362811792, "grad_norm": 28.505207061767578, "learning_rate": 1.7419845883949098e-08, "logits/chosen": 1.1761068105697632, "logits/rejected": 1.1118135452270508, "loss": 1.1498, "step": 591 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49769604206085205, "beta_dpo/beta_margin_grad_std": 0.004002981819212437, "beta_dpo/beta_margin_mean": 0.009216600097715855, "beta_dpo/beta_margin_std": 0.016013581305742264, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.06022973731160164, "beta_dpo/gap_mean": 9.103561401367188, "beta_dpo/gap_std": 15.066686630249023, "beta_dpo/loss_margin_mean": 9.21660041809082, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8949357520786092, "grad_norm": 0.33266639709472656, "learning_rate": 1.6938180788793556e-08, "logits/chosen": 1.1416816711425781, "logits/rejected": 1.0328766107559204, "loss": 1.3799, "step": 592 }, { "beta_dpo/beta": 0.1512017697095871, "beta_dpo/beta_margin_grad_mean": -0.37732359766960144, "beta_dpo/beta_margin_grad_std": 0.2817494869232178, "beta_dpo/beta_margin_mean": 1.483587622642517, "beta_dpo/beta_margin_std": 3.5750889778137207, "beta_dpo/beta_used": 0.1512017697095871, "beta_dpo/beta_used_raw": 0.1512017697095871, "beta_dpo/gap_mean": 9.142255783081055, "beta_dpo/gap_std": 15.243630409240723, "beta_dpo/loss_margin_mean": 9.367238998413086, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8964474678760394, "grad_norm": 46.93597412109375, "learning_rate": 1.6463034933723336e-08, "logits/chosen": 1.2500877380371094, "logits/rejected": 1.1615597009658813, "loss": 1.061, "step": 593 }, { "beta_dpo/beta": 0.06817789375782013, "beta_dpo/beta_margin_grad_mean": -0.42273271083831787, "beta_dpo/beta_margin_grad_std": 0.22297364473342896, "beta_dpo/beta_margin_mean": 0.5673213601112366, "beta_dpo/beta_margin_std": 1.5483680963516235, "beta_dpo/beta_used": 0.06817789375782013, "beta_dpo/beta_used_raw": 0.042327724397182465, "beta_dpo/gap_mean": 8.802755355834961, "beta_dpo/gap_std": 15.327518463134766, "beta_dpo/loss_margin_mean": 5.755950927734375, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8979591836734694, "grad_norm": 21.160133361816406, "learning_rate": 1.5994421609589385e-08, "logits/chosen": 1.3079917430877686, "logits/rejected": 1.2591397762298584, "loss": 1.1284, "step": 594 }, { "beta_dpo/beta": 0.2774176597595215, "beta_dpo/beta_margin_grad_mean": -0.24613875150680542, "beta_dpo/beta_margin_grad_std": 0.3464008867740631, "beta_dpo/beta_margin_mean": 2.811112403869629, "beta_dpo/beta_margin_std": 4.953824520111084, "beta_dpo/beta_used": 0.2774176597595215, "beta_dpo/beta_used_raw": 0.2774176597595215, "beta_dpo/gap_mean": 8.735965728759766, "beta_dpo/gap_std": 15.631080627441406, "beta_dpo/loss_margin_mean": 10.130765914916992, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.8994708994708994, "grad_norm": 64.07139587402344, "learning_rate": 1.553235392451377e-08, "logits/chosen": 1.4716193675994873, "logits/rejected": 1.3438546657562256, "loss": 0.8868, "step": 595 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4990660846233368, "beta_dpo/beta_margin_grad_std": 0.0036789732985198498, "beta_dpo/beta_margin_mean": 0.0037360715214163065, "beta_dpo/beta_margin_std": 0.014717076905071735, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.18999116122722626, "beta_dpo/gap_mean": 8.070295333862305, "beta_dpo/gap_std": 15.721334457397461, "beta_dpo/loss_margin_mean": 3.7360713481903076, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9009826152683296, "grad_norm": 0.2939727306365967, "learning_rate": 1.507684480352292e-08, "logits/chosen": 1.0870261192321777, "logits/rejected": 1.1027809381484985, "loss": 1.3831, "step": 596 }, { "beta_dpo/beta": 0.10379099100828171, "beta_dpo/beta_margin_grad_mean": -0.3936522901058197, "beta_dpo/beta_margin_grad_std": 0.22852064669132233, "beta_dpo/beta_margin_mean": 0.9076488614082336, "beta_dpo/beta_margin_std": 1.9588474035263062, "beta_dpo/beta_used": 0.10379099100828171, "beta_dpo/beta_used_raw": 0.05178498104214668, "beta_dpo/gap_mean": 8.036808013916016, "beta_dpo/gap_std": 15.484565734863281, "beta_dpo/loss_margin_mean": 9.03473949432373, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9024943310657596, "grad_norm": 24.972312927246094, "learning_rate": 1.4627906988186111e-08, "logits/chosen": 1.3164136409759521, "logits/rejected": 1.314997911453247, "loss": 1.0269, "step": 597 }, { "beta_dpo/beta": 0.01767176389694214, "beta_dpo/beta_margin_grad_mean": -0.4686887562274933, "beta_dpo/beta_margin_grad_std": 0.0846300721168518, "beta_dpo/beta_margin_mean": 0.1313055008649826, "beta_dpo/beta_margin_std": 0.3668515980243683, "beta_dpo/beta_used": 0.01767176389694214, "beta_dpo/beta_used_raw": -0.05600941181182861, "beta_dpo/gap_mean": 8.039358139038086, "beta_dpo/gap_std": 15.246614456176758, "beta_dpo/loss_margin_mean": 8.034171104431152, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9040060468631897, "grad_norm": 7.187092304229736, "learning_rate": 1.4185553036259095e-08, "logits/chosen": 1.18892240524292, "logits/rejected": 1.112982988357544, "loss": 1.2838, "step": 598 }, { "beta_dpo/beta": 0.08740982413291931, "beta_dpo/beta_margin_grad_mean": -0.41112831234931946, "beta_dpo/beta_margin_grad_std": 0.2523919939994812, "beta_dpo/beta_margin_mean": 0.6939030885696411, "beta_dpo/beta_margin_std": 2.0126750469207764, "beta_dpo/beta_used": 0.08740982413291931, "beta_dpo/beta_used_raw": 0.03378527611494064, "beta_dpo/gap_mean": 7.967884063720703, "beta_dpo/gap_std": 15.234304428100586, "beta_dpo/loss_margin_mean": 7.490134239196777, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9055177626606198, "grad_norm": 27.660696029663086, "learning_rate": 1.3749795321332885e-08, "logits/chosen": 1.2530118227005005, "logits/rejected": 1.2126647233963013, "loss": 1.1492, "step": 599 }, { "beta_dpo/beta": 0.11240988969802856, "beta_dpo/beta_margin_grad_mean": -0.3938973546028137, "beta_dpo/beta_margin_grad_std": 0.25913897156715393, "beta_dpo/beta_margin_mean": 0.6383807063102722, "beta_dpo/beta_margin_std": 1.723300814628601, "beta_dpo/beta_used": 0.11240988969802856, "beta_dpo/beta_used_raw": 0.11240988969802856, "beta_dpo/gap_mean": 7.678271293640137, "beta_dpo/gap_std": 15.040021896362305, "beta_dpo/loss_margin_mean": 6.692630290985107, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9070294784580499, "grad_norm": 31.70339012145996, "learning_rate": 1.3320646032487393e-08, "logits/chosen": 1.3531250953674316, "logits/rejected": 1.2455155849456787, "loss": 1.0426, "step": 600 }, { "epoch": 0.9070294784580499, "eval_beta_dpo/beta": 0.16284236311912537, "eval_beta_dpo/beta_margin_grad_mean": -0.34278053045272827, "eval_beta_dpo/beta_margin_grad_std": 0.24819053709506989, "eval_beta_dpo/beta_margin_mean": 1.6614328622817993, "eval_beta_dpo/beta_margin_std": 2.518134117126465, "eval_beta_dpo/beta_used": 0.16284236311912537, "eval_beta_dpo/beta_used_raw": 0.1531587541103363, "eval_beta_dpo/gap_mean": 7.747578144073486, "eval_beta_dpo/gap_std": 14.899542808532715, "eval_beta_dpo/loss_margin_mean": 8.63355827331543, "eval_beta_dpo/mask_keep_frac": 1.0, "eval_logits/chosen": 1.2070788145065308, "eval_logits/rejected": 1.14027738571167, "eval_loss": 0.6518335938453674, "eval_runtime": 43.528, "eval_samples_per_second": 52.909, "eval_steps_per_second": 1.654, "step": 600 }, { "beta_dpo/beta": 0.10141826421022415, "beta_dpo/beta_margin_grad_mean": -0.34344714879989624, "beta_dpo/beta_margin_grad_std": 0.22835154831409454, "beta_dpo/beta_margin_mean": 1.0757205486297607, "beta_dpo/beta_margin_std": 1.7910301685333252, "beta_dpo/beta_used": 0.10141826421022415, "beta_dpo/beta_used_raw": 0.10141826421022415, "beta_dpo/gap_mean": 8.107170104980469, "beta_dpo/gap_std": 14.828914642333984, "beta_dpo/loss_margin_mean": 9.79207992553711, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.90854119425548, "grad_norm": 22.663358688354492, "learning_rate": 1.2898117173950868e-08, "logits/chosen": 1.166858196258545, "logits/rejected": 1.0885355472564697, "loss": 1.0032, "step": 601 }, { "beta_dpo/beta": 0.1556924283504486, "beta_dpo/beta_margin_grad_mean": -0.29175105690956116, "beta_dpo/beta_margin_grad_std": 0.25710615515708923, "beta_dpo/beta_margin_mean": 1.5223388671875, "beta_dpo/beta_margin_std": 2.0580081939697266, "beta_dpo/beta_used": 0.1556924283504486, "beta_dpo/beta_used_raw": 0.1556924283504486, "beta_dpo/gap_mean": 8.32609748840332, "beta_dpo/gap_std": 14.536138534545898, "beta_dpo/loss_margin_mean": 9.849860191345215, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.91005291005291, "grad_norm": 32.89277267456055, "learning_rate": 1.2482220564763667e-08, "logits/chosen": 1.137499213218689, "logits/rejected": 1.0821897983551025, "loss": 0.8122, "step": 602 }, { "beta_dpo/beta": 0.03461594507098198, "beta_dpo/beta_margin_grad_mean": -0.4290250837802887, "beta_dpo/beta_margin_grad_std": 0.13414107263088226, "beta_dpo/beta_margin_mean": 0.33947205543518066, "beta_dpo/beta_margin_std": 0.6727907061576843, "beta_dpo/beta_used": 0.03461594507098198, "beta_dpo/beta_used_raw": -0.00844324380159378, "beta_dpo/gap_mean": 8.558271408081055, "beta_dpo/gap_std": 14.459668159484863, "beta_dpo/loss_margin_mean": 8.81813907623291, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9115646258503401, "grad_norm": 11.329524040222168, "learning_rate": 1.2072967838448051e-08, "logits/chosen": 0.8011248707771301, "logits/rejected": 0.7711565494537354, "loss": 1.1882, "step": 603 }, { "beta_dpo/beta": 0.13645920157432556, "beta_dpo/beta_margin_grad_mean": -0.3494945764541626, "beta_dpo/beta_margin_grad_std": 0.27078843116760254, "beta_dpo/beta_margin_mean": 1.1950139999389648, "beta_dpo/beta_margin_std": 2.49851655960083, "beta_dpo/beta_used": 0.13645920157432556, "beta_dpo/beta_used_raw": 0.13645920157432556, "beta_dpo/gap_mean": 8.562814712524414, "beta_dpo/gap_std": 14.682696342468262, "beta_dpo/loss_margin_mean": 8.797730445861816, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9130763416477702, "grad_norm": 42.889583587646484, "learning_rate": 1.1670370442682459e-08, "logits/chosen": 1.1775734424591064, "logits/rejected": 1.1526458263397217, "loss": 1.0029, "step": 604 }, { "beta_dpo/beta": 0.024525772780179977, "beta_dpo/beta_margin_grad_mean": -0.46467721462249756, "beta_dpo/beta_margin_grad_std": 0.12513820827007294, "beta_dpo/beta_margin_mean": 0.16219019889831543, "beta_dpo/beta_margin_std": 0.5986112356185913, "beta_dpo/beta_used": 0.024525772780179977, "beta_dpo/beta_used_raw": -0.03071369044482708, "beta_dpo/gap_mean": 8.287137985229492, "beta_dpo/gap_std": 14.952404975891113, "beta_dpo/loss_margin_mean": 6.617243766784668, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9145880574452003, "grad_norm": 9.076241493225098, "learning_rate": 1.1274439638981532e-08, "logits/chosen": 1.2894868850708008, "logits/rejected": 1.2614383697509766, "loss": 1.2515, "step": 605 }, { "beta_dpo/beta": 0.09253361076116562, "beta_dpo/beta_margin_grad_mean": -0.37834733724594116, "beta_dpo/beta_margin_grad_std": 0.23104941844940186, "beta_dpo/beta_margin_mean": 1.1032108068466187, "beta_dpo/beta_margin_std": 2.277953624725342, "beta_dpo/beta_used": 0.09253361076116562, "beta_dpo/beta_used_raw": 0.07488581538200378, "beta_dpo/gap_mean": 8.222452163696289, "beta_dpo/gap_std": 14.61074447631836, "beta_dpo/loss_margin_mean": 9.209124565124512, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9160997732426304, "grad_norm": 27.21750831604004, "learning_rate": 1.0885186502381016e-08, "logits/chosen": 1.1829588413238525, "logits/rejected": 1.117699384689331, "loss": 1.0745, "step": 606 }, { "beta_dpo/beta": 0.0583856962621212, "beta_dpo/beta_margin_grad_mean": -0.3807748556137085, "beta_dpo/beta_margin_grad_std": 0.19601111114025116, "beta_dpo/beta_margin_mean": 0.7777360677719116, "beta_dpo/beta_margin_std": 1.383155345916748, "beta_dpo/beta_used": 0.0583856962621212, "beta_dpo/beta_used_raw": 0.0036773681640625, "beta_dpo/gap_mean": 8.679821968078613, "beta_dpo/gap_std": 14.6058931350708, "beta_dpo/loss_margin_mean": 8.788156509399414, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9176114890400605, "grad_norm": 20.968740463256836, "learning_rate": 1.0502621921127774e-08, "logits/chosen": 1.1087911128997803, "logits/rejected": 1.060031533241272, "loss": 1.1452, "step": 607 }, { "beta_dpo/beta": 0.03544434905052185, "beta_dpo/beta_margin_grad_mean": -0.46062541007995605, "beta_dpo/beta_margin_grad_std": 0.12345908582210541, "beta_dpo/beta_margin_mean": 0.17531032860279083, "beta_dpo/beta_margin_std": 0.54575514793396, "beta_dpo/beta_used": 0.03544434905052185, "beta_dpo/beta_used_raw": 0.03544434905052185, "beta_dpo/gap_mean": 7.942313194274902, "beta_dpo/gap_std": 14.522319793701172, "beta_dpo/loss_margin_mean": 5.154706954956055, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9191232048374905, "grad_norm": 14.060853004455566, "learning_rate": 1.0126756596375685e-08, "logits/chosen": 1.320879340171814, "logits/rejected": 1.1834135055541992, "loss": 1.2106, "step": 608 }, { "beta_dpo/beta": 0.24092288315296173, "beta_dpo/beta_margin_grad_mean": -0.28901004791259766, "beta_dpo/beta_margin_grad_std": 0.2751197814941406, "beta_dpo/beta_margin_mean": 2.6792190074920654, "beta_dpo/beta_margin_std": 4.407725811004639, "beta_dpo/beta_used": 0.24092288315296173, "beta_dpo/beta_used_raw": 0.24092288315296173, "beta_dpo/gap_mean": 8.123943328857422, "beta_dpo/gap_std": 14.249687194824219, "beta_dpo/loss_margin_mean": 8.886117935180664, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9206349206349206, "grad_norm": 43.7921257019043, "learning_rate": 9.757601041885694e-09, "logits/chosen": 1.4151957035064697, "logits/rejected": 1.3399848937988281, "loss": 0.7838, "step": 609 }, { "beta_dpo/beta": 0.08768744766712189, "beta_dpo/beta_margin_grad_mean": -0.37437552213668823, "beta_dpo/beta_margin_grad_std": 0.24727308750152588, "beta_dpo/beta_margin_mean": 0.8633731007575989, "beta_dpo/beta_margin_std": 1.718218445777893, "beta_dpo/beta_used": 0.08768744766712189, "beta_dpo/beta_used_raw": 0.08768744766712189, "beta_dpo/gap_mean": 8.23539924621582, "beta_dpo/gap_std": 14.590499877929688, "beta_dpo/loss_margin_mean": 9.4933500289917, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9221466364323507, "grad_norm": 31.680171966552734, "learning_rate": 9.395165583732379e-09, "logits/chosen": 1.3596689701080322, "logits/rejected": 1.2988369464874268, "loss": 1.1331, "step": 610 }, { "beta_dpo/beta": 0.0011600707657635212, "beta_dpo/beta_margin_grad_mean": -0.49776604771614075, "beta_dpo/beta_margin_grad_std": 0.004311481025069952, "beta_dpo/beta_margin_mean": 0.00893681962043047, "beta_dpo/beta_margin_std": 0.017248092219233513, "beta_dpo/beta_used": 0.0011600707657635212, "beta_dpo/beta_used_raw": -0.03458194062113762, "beta_dpo/gap_mean": 8.225613594055176, "beta_dpo/gap_std": 14.824548721313477, "beta_dpo/loss_margin_mean": 7.506056308746338, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9236583522297808, "grad_norm": 0.3966653347015381, "learning_rate": 9.03946036001449e-09, "logits/chosen": 1.2817761898040771, "logits/rejected": 1.2514748573303223, "loss": 1.3793, "step": 611 }, { "beta_dpo/beta": 0.14685246348381042, "beta_dpo/beta_margin_grad_mean": -0.3220314681529999, "beta_dpo/beta_margin_grad_std": 0.2983168363571167, "beta_dpo/beta_margin_mean": 1.4434245824813843, "beta_dpo/beta_margin_std": 2.5013015270233154, "beta_dpo/beta_used": 0.14685246348381042, "beta_dpo/beta_used_raw": 0.14685246348381042, "beta_dpo/gap_mean": 8.368420600891113, "beta_dpo/gap_std": 15.056795120239258, "beta_dpo/loss_margin_mean": 9.817299842834473, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9251700680272109, "grad_norm": 29.391437530517578, "learning_rate": 8.690495320571839e-09, "logits/chosen": 1.2257752418518066, "logits/rejected": 1.165148138999939, "loss": 0.8716, "step": 612 }, { "beta_dpo/beta": 0.42597997188568115, "beta_dpo/beta_margin_grad_mean": -0.22234027087688446, "beta_dpo/beta_margin_grad_std": 0.35421091318130493, "beta_dpo/beta_margin_mean": 5.603759765625, "beta_dpo/beta_margin_std": 7.522528171539307, "beta_dpo/beta_used": 0.42597997188568115, "beta_dpo/beta_used_raw": 0.42597997188568115, "beta_dpo/gap_mean": 8.936678886413574, "beta_dpo/gap_std": 15.329559326171875, "beta_dpo/loss_margin_mean": 12.598337173461914, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.926681783824641, "grad_norm": 62.93752670288086, "learning_rate": 8.348280226706722e-09, "logits/chosen": 1.1820428371429443, "logits/rejected": 1.2033995389938354, "loss": 0.58, "step": 613 }, { "beta_dpo/beta": 0.09866636246442795, "beta_dpo/beta_margin_grad_mean": -0.35599809885025024, "beta_dpo/beta_margin_grad_std": 0.24741540849208832, "beta_dpo/beta_margin_mean": 0.7811317443847656, "beta_dpo/beta_margin_std": 1.3711615800857544, "beta_dpo/beta_used": 0.09866636246442795, "beta_dpo/beta_used_raw": 0.09866636246442795, "beta_dpo/gap_mean": 9.042693138122559, "beta_dpo/gap_std": 15.193923950195312, "beta_dpo/loss_margin_mean": 7.999011516571045, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9281934996220711, "grad_norm": 26.360315322875977, "learning_rate": 8.012824650910937e-09, "logits/chosen": 1.4054988622665405, "logits/rejected": 1.3883737325668335, "loss": 0.9817, "step": 614 }, { "beta_dpo/beta": 0.09118252992630005, "beta_dpo/beta_margin_grad_mean": -0.35918137431144714, "beta_dpo/beta_margin_grad_std": 0.24969178438186646, "beta_dpo/beta_margin_mean": 0.8582610487937927, "beta_dpo/beta_margin_std": 1.515799880027771, "beta_dpo/beta_used": 0.09118252992630005, "beta_dpo/beta_used_raw": 0.09118252992630005, "beta_dpo/gap_mean": 9.102291107177734, "beta_dpo/gap_std": 15.299556732177734, "beta_dpo/loss_margin_mean": 9.143074989318848, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9297052154195011, "grad_norm": 28.177825927734375, "learning_rate": 7.684137976598088e-09, "logits/chosen": 1.1988410949707031, "logits/rejected": 1.130389928817749, "loss": 1.0275, "step": 615 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49804696440696716, "beta_dpo/beta_margin_grad_std": 0.003881829557940364, "beta_dpo/beta_margin_mean": 0.007812697440385818, "beta_dpo/beta_margin_std": 0.015528511255979538, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.08658087253570557, "beta_dpo/gap_mean": 8.891166687011719, "beta_dpo/gap_std": 15.544229507446289, "beta_dpo/loss_margin_mean": 7.812697410583496, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9312169312169312, "grad_norm": 0.43700990080833435, "learning_rate": 7.36222939784098e-09, "logits/chosen": 1.1711599826812744, "logits/rejected": 1.0668097734451294, "loss": 1.3806, "step": 616 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.498007595539093, "beta_dpo/beta_margin_grad_std": 0.0031482342164963484, "beta_dpo/beta_margin_mean": 0.007969984784722328, "beta_dpo/beta_margin_std": 0.012593930587172508, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.033831048756837845, "beta_dpo/gap_mean": 8.742076873779297, "beta_dpo/gap_std": 15.113273620605469, "beta_dpo/loss_margin_mean": 7.969984531402588, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9327286470143613, "grad_norm": 0.3276781439781189, "learning_rate": 7.047107919114586e-09, "logits/chosen": 1.1438710689544678, "logits/rejected": 1.1049525737762451, "loss": 1.3798, "step": 617 }, { "beta_dpo/beta": 0.024464260786771774, "beta_dpo/beta_margin_grad_mean": -0.4384101927280426, "beta_dpo/beta_margin_grad_std": 0.12578776478767395, "beta_dpo/beta_margin_mean": 0.2871224582195282, "beta_dpo/beta_margin_std": 0.5958966016769409, "beta_dpo/beta_used": 0.024464260786771774, "beta_dpo/beta_used_raw": -0.026629796251654625, "beta_dpo/gap_mean": 8.742508888244629, "beta_dpo/gap_std": 15.030193328857422, "beta_dpo/loss_margin_mean": 8.431224822998047, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9342403628117913, "grad_norm": 9.483170509338379, "learning_rate": 6.738782355044048e-09, "logits/chosen": 1.2045261859893799, "logits/rejected": 1.0894981622695923, "loss": 1.2553, "step": 618 }, { "beta_dpo/beta": 0.2674694061279297, "beta_dpo/beta_margin_grad_mean": -0.3007112145423889, "beta_dpo/beta_margin_grad_std": 0.32441580295562744, "beta_dpo/beta_margin_mean": 2.8940837383270264, "beta_dpo/beta_margin_std": 5.033651828765869, "beta_dpo/beta_used": 0.2674694061279297, "beta_dpo/beta_used_raw": 0.2674694061279297, "beta_dpo/gap_mean": 8.856648445129395, "beta_dpo/gap_std": 15.28053092956543, "beta_dpo/loss_margin_mean": 9.983675956726074, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9357520786092215, "grad_norm": 63.246665954589844, "learning_rate": 6.437261330158206e-09, "logits/chosen": 1.392554521560669, "logits/rejected": 1.2864084243774414, "loss": 0.801, "step": 619 }, { "beta_dpo/beta": 0.08081783354282379, "beta_dpo/beta_margin_grad_mean": -0.3847609758377075, "beta_dpo/beta_margin_grad_std": 0.22657740116119385, "beta_dpo/beta_margin_mean": 0.9509609341621399, "beta_dpo/beta_margin_std": 1.8485666513442993, "beta_dpo/beta_used": 0.08081783354282379, "beta_dpo/beta_used_raw": 0.021886445581912994, "beta_dpo/gap_mean": 8.459717750549316, "beta_dpo/gap_std": 15.082178115844727, "beta_dpo/loss_margin_mean": 7.745196342468262, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9372637944066515, "grad_norm": 24.837596893310547, "learning_rate": 6.142553278648238e-09, "logits/chosen": 1.6400712728500366, "logits/rejected": 1.645829439163208, "loss": 1.0802, "step": 620 }, { "beta_dpo/beta": 0.11531262844800949, "beta_dpo/beta_margin_grad_mean": -0.3503705561161041, "beta_dpo/beta_margin_grad_std": 0.25440457463264465, "beta_dpo/beta_margin_mean": 0.9773139953613281, "beta_dpo/beta_margin_std": 1.751634120941162, "beta_dpo/beta_used": 0.11531262844800949, "beta_dpo/beta_used_raw": 0.11531262844800949, "beta_dpo/gap_mean": 8.3915433883667, "beta_dpo/gap_std": 14.808649063110352, "beta_dpo/loss_margin_mean": 7.840453147888184, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9387755102040817, "grad_norm": 28.006378173828125, "learning_rate": 5.854666444131934e-09, "logits/chosen": 1.398921251296997, "logits/rejected": 1.2527947425842285, "loss": 1.0032, "step": 621 }, { "beta_dpo/beta": 0.1956021785736084, "beta_dpo/beta_margin_grad_mean": -0.3046272099018097, "beta_dpo/beta_margin_grad_std": 0.3041934072971344, "beta_dpo/beta_margin_mean": 2.0099432468414307, "beta_dpo/beta_margin_std": 3.8020520210266113, "beta_dpo/beta_used": 0.1956021785736084, "beta_dpo/beta_used_raw": 0.1956021785736084, "beta_dpo/gap_mean": 8.671491622924805, "beta_dpo/gap_std": 14.802274703979492, "beta_dpo/loss_margin_mean": 9.858983993530273, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9402872260015117, "grad_norm": 49.47909927368164, "learning_rate": 5.573608879422875e-09, "logits/chosen": 1.1854503154754639, "logits/rejected": 1.1152417659759521, "loss": 1.0806, "step": 622 }, { "beta_dpo/beta": 0.06421691924333572, "beta_dpo/beta_margin_grad_mean": -0.4157204031944275, "beta_dpo/beta_margin_grad_std": 0.1975506693124771, "beta_dpo/beta_margin_mean": 0.5457938313484192, "beta_dpo/beta_margin_std": 1.3326746225357056, "beta_dpo/beta_used": 0.06421691924333572, "beta_dpo/beta_used_raw": 0.040875114500522614, "beta_dpo/gap_mean": 8.688669204711914, "beta_dpo/gap_std": 14.794839859008789, "beta_dpo/loss_margin_mean": 8.007980346679688, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9417989417989417, "grad_norm": 21.249887466430664, "learning_rate": 5.299388446305342e-09, "logits/chosen": 1.0395543575286865, "logits/rejected": 0.9585334658622742, "loss": 1.0954, "step": 623 }, { "beta_dpo/beta": 0.17201095819473267, "beta_dpo/beta_margin_grad_mean": -0.2755333483219147, "beta_dpo/beta_margin_grad_std": 0.2739318907260895, "beta_dpo/beta_margin_mean": 2.029400587081909, "beta_dpo/beta_margin_std": 3.091554641723633, "beta_dpo/beta_used": 0.17201095819473267, "beta_dpo/beta_used_raw": 0.17201095819473267, "beta_dpo/gap_mean": 9.086647033691406, "beta_dpo/gap_std": 14.942371368408203, "beta_dpo/loss_margin_mean": 11.444154739379883, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9433106575963719, "grad_norm": 50.13128662109375, "learning_rate": 5.03201281531429e-09, "logits/chosen": 1.2946569919586182, "logits/rejected": 1.168874979019165, "loss": 0.8639, "step": 624 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49886149168014526, "beta_dpo/beta_margin_grad_std": 0.0035701736342161894, "beta_dpo/beta_margin_mean": 0.004554293118417263, "beta_dpo/beta_margin_std": 0.014281506650149822, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.17264024913311005, "beta_dpo/gap_mean": 8.407045364379883, "beta_dpo/gap_std": 14.818851470947266, "beta_dpo/loss_margin_mean": 4.554293155670166, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9448223733938019, "grad_norm": 0.32094186544418335, "learning_rate": 4.7714894655209174e-09, "logits/chosen": 1.2408746480941772, "logits/rejected": 1.1343789100646973, "loss": 1.3825, "step": 625 }, { "beta_dpo/beta": 0.07005998492240906, "beta_dpo/beta_margin_grad_mean": -0.44059884548187256, "beta_dpo/beta_margin_grad_std": 0.2527449429035187, "beta_dpo/beta_margin_mean": 0.6223716139793396, "beta_dpo/beta_margin_std": 2.1118099689483643, "beta_dpo/beta_used": 0.07005998492240906, "beta_dpo/beta_used_raw": -0.004164740443229675, "beta_dpo/gap_mean": 8.391353607177734, "beta_dpo/gap_std": 15.319479942321777, "beta_dpo/loss_margin_mean": 9.000487327575684, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9463340891912321, "grad_norm": 36.80985641479492, "learning_rate": 4.517825684323323e-09, "logits/chosen": 1.2944220304489136, "logits/rejected": 1.1371581554412842, "loss": 1.3288, "step": 626 }, { "beta_dpo/beta": 0.07213520258665085, "beta_dpo/beta_margin_grad_mean": -0.38978075981140137, "beta_dpo/beta_margin_grad_std": 0.20375964045524597, "beta_dpo/beta_margin_mean": 0.7643184065818787, "beta_dpo/beta_margin_std": 1.5277169942855835, "beta_dpo/beta_used": 0.07213520258665085, "beta_dpo/beta_used_raw": 0.04761495813727379, "beta_dpo/gap_mean": 8.814528465270996, "beta_dpo/gap_std": 15.30355453491211, "beta_dpo/loss_margin_mean": 11.308126449584961, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9478458049886621, "grad_norm": 21.390562057495117, "learning_rate": 4.271028567242818e-09, "logits/chosen": 1.2814748287200928, "logits/rejected": 1.1207035779953003, "loss": 1.0657, "step": 627 }, { "beta_dpo/beta": 0.21702194213867188, "beta_dpo/beta_margin_grad_mean": -0.28619271516799927, "beta_dpo/beta_margin_grad_std": 0.35538989305496216, "beta_dpo/beta_margin_mean": 2.4516913890838623, "beta_dpo/beta_margin_std": 4.084339141845703, "beta_dpo/beta_used": 0.21702194213867188, "beta_dpo/beta_used_raw": 0.21702194213867188, "beta_dpo/gap_mean": 9.10338306427002, "beta_dpo/gap_std": 16.06523323059082, "beta_dpo/loss_margin_mean": 11.028809547424316, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9493575207860923, "grad_norm": 71.84501647949219, "learning_rate": 4.0311050177251895e-09, "logits/chosen": 1.2058839797973633, "logits/rejected": 1.2187294960021973, "loss": 1.1868, "step": 628 }, { "beta_dpo/beta": 0.040276795625686646, "beta_dpo/beta_margin_grad_mean": -0.42245370149612427, "beta_dpo/beta_margin_grad_std": 0.1697092205286026, "beta_dpo/beta_margin_mean": 0.4575227200984955, "beta_dpo/beta_margin_std": 1.0323092937469482, "beta_dpo/beta_used": 0.040276795625686646, "beta_dpo/beta_used_raw": 0.03951174020767212, "beta_dpo/gap_mean": 9.401752471923828, "beta_dpo/gap_std": 15.793481826782227, "beta_dpo/loss_margin_mean": 9.8300199508667, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9508692365835223, "grad_norm": 13.410911560058594, "learning_rate": 3.798061746947995e-09, "logits/chosen": 1.3186110258102417, "logits/rejected": 1.2518651485443115, "loss": 1.1975, "step": 629 }, { "beta_dpo/beta": 0.1225634217262268, "beta_dpo/beta_margin_grad_mean": -0.37748420238494873, "beta_dpo/beta_margin_grad_std": 0.25147491693496704, "beta_dpo/beta_margin_mean": 1.265876293182373, "beta_dpo/beta_margin_std": 2.6474194526672363, "beta_dpo/beta_used": 0.1225634217262268, "beta_dpo/beta_used_raw": 0.1225634217262268, "beta_dpo/gap_mean": 9.436227798461914, "beta_dpo/gap_std": 15.730070114135742, "beta_dpo/loss_margin_mean": 9.3777494430542, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9523809523809523, "grad_norm": 34.975379943847656, "learning_rate": 3.5719052736323806e-09, "logits/chosen": 1.0783604383468628, "logits/rejected": 1.0250047445297241, "loss": 1.0213, "step": 630 }, { "beta_dpo/beta": 0.09131479263305664, "beta_dpo/beta_margin_grad_mean": -0.34115666151046753, "beta_dpo/beta_margin_grad_std": 0.2506091594696045, "beta_dpo/beta_margin_mean": 0.923401415348053, "beta_dpo/beta_margin_std": 1.4584547281265259, "beta_dpo/beta_used": 0.09131479263305664, "beta_dpo/beta_used_raw": 0.09131479263305664, "beta_dpo/gap_mean": 9.782768249511719, "beta_dpo/gap_std": 15.912504196166992, "beta_dpo/loss_margin_mean": 10.662687301635742, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9538926681783825, "grad_norm": 23.484634399414062, "learning_rate": 3.352641923861144e-09, "logits/chosen": 1.3594732284545898, "logits/rejected": 1.2386384010314941, "loss": 0.9504, "step": 631 }, { "beta_dpo/beta": 0.189894437789917, "beta_dpo/beta_margin_grad_mean": -0.31988996267318726, "beta_dpo/beta_margin_grad_std": 0.3233727812767029, "beta_dpo/beta_margin_mean": 1.8445459604263306, "beta_dpo/beta_margin_std": 3.505603790283203, "beta_dpo/beta_used": 0.189894437789917, "beta_dpo/beta_used_raw": 0.189894437789917, "beta_dpo/gap_mean": 9.691600799560547, "beta_dpo/gap_std": 16.004518508911133, "beta_dpo/loss_margin_mean": 9.54232406616211, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9554043839758125, "grad_norm": 43.28837966918945, "learning_rate": 3.140277830901428e-09, "logits/chosen": 1.2820234298706055, "logits/rejected": 1.2324151992797852, "loss": 0.8801, "step": 632 }, { "beta_dpo/beta": 0.061312876641750336, "beta_dpo/beta_margin_grad_mean": -0.36580348014831543, "beta_dpo/beta_margin_grad_std": 0.18776066601276398, "beta_dpo/beta_margin_mean": 0.7045513987541199, "beta_dpo/beta_margin_std": 1.044027328491211, "beta_dpo/beta_used": 0.061312876641750336, "beta_dpo/beta_used_raw": 0.061312876641750336, "beta_dpo/gap_mean": 9.78476333618164, "beta_dpo/gap_std": 15.973169326782227, "beta_dpo/loss_margin_mean": 11.0767822265625, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9569160997732427, "grad_norm": 16.645475387573242, "learning_rate": 2.9348189350335007e-09, "logits/chosen": 1.1682333946228027, "logits/rejected": 1.1017271280288696, "loss": 1.0502, "step": 633 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49894630908966064, "beta_dpo/beta_margin_grad_std": 0.0039056446403265, "beta_dpo/beta_margin_mean": 0.004215083085000515, "beta_dpo/beta_margin_std": 0.015623592771589756, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.17823907732963562, "beta_dpo/gap_mean": 9.161681175231934, "beta_dpo/gap_std": 16.027673721313477, "beta_dpo/loss_margin_mean": 4.21508264541626, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9584278155706727, "grad_norm": 0.32190123200416565, "learning_rate": 2.736270983384276e-09, "logits/chosen": 1.1025863885879517, "logits/rejected": 1.10835862159729, "loss": 1.3818, "step": 634 }, { "beta_dpo/beta": 0.02330617606639862, "beta_dpo/beta_margin_grad_mean": -0.46098482608795166, "beta_dpo/beta_margin_grad_std": 0.11248224973678589, "beta_dpo/beta_margin_mean": 0.17786218225955963, "beta_dpo/beta_margin_std": 0.5247640013694763, "beta_dpo/beta_used": 0.02330617606639862, "beta_dpo/beta_used_raw": -0.025716857984662056, "beta_dpo/gap_mean": 8.629072189331055, "beta_dpo/gap_std": 15.859984397888184, "beta_dpo/loss_margin_mean": 7.23160457611084, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9599395313681028, "grad_norm": 9.109109878540039, "learning_rate": 2.5446395297668287e-09, "logits/chosen": 1.237937331199646, "logits/rejected": 1.1790293455123901, "loss": 1.2575, "step": 635 }, { "beta_dpo/beta": 0.21303845942020416, "beta_dpo/beta_margin_grad_mean": -0.2767031490802765, "beta_dpo/beta_margin_grad_std": 0.2881619930267334, "beta_dpo/beta_margin_mean": 2.355743646621704, "beta_dpo/beta_margin_std": 3.89518404006958, "beta_dpo/beta_used": 0.21303845942020416, "beta_dpo/beta_used_raw": 0.21303845942020416, "beta_dpo/gap_mean": 8.832971572875977, "beta_dpo/gap_std": 15.4896240234375, "beta_dpo/loss_margin_mean": 10.791437149047852, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9614512471655329, "grad_norm": 53.85847854614258, "learning_rate": 2.359929934524829e-09, "logits/chosen": 1.0027883052825928, "logits/rejected": 0.8940525054931641, "loss": 0.9391, "step": 636 }, { "beta_dpo/beta": 0.057768288999795914, "beta_dpo/beta_margin_grad_mean": -0.4107164740562439, "beta_dpo/beta_margin_grad_std": 0.19992104172706604, "beta_dpo/beta_margin_mean": 0.639686644077301, "beta_dpo/beta_margin_std": 1.4747357368469238, "beta_dpo/beta_used": 0.057768288999795914, "beta_dpo/beta_used_raw": 0.039929118007421494, "beta_dpo/gap_mean": 8.947272300720215, "beta_dpo/gap_std": 15.440877914428711, "beta_dpo/loss_margin_mean": 9.36134147644043, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9629629629629629, "grad_norm": 18.227508544921875, "learning_rate": 2.1821473643827137e-09, "logits/chosen": 1.2675358057022095, "logits/rejected": 1.1809782981872559, "loss": 1.1807, "step": 637 }, { "beta_dpo/beta": 0.13519155979156494, "beta_dpo/beta_margin_grad_mean": -0.353465735912323, "beta_dpo/beta_margin_grad_std": 0.2931253910064697, "beta_dpo/beta_margin_mean": 1.101943016052246, "beta_dpo/beta_margin_std": 2.174018144607544, "beta_dpo/beta_used": 0.13519155979156494, "beta_dpo/beta_used_raw": 0.13519155979156494, "beta_dpo/gap_mean": 8.85607624053955, "beta_dpo/gap_std": 15.520221710205078, "beta_dpo/loss_margin_mean": 8.02245044708252, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9644746787603931, "grad_norm": 34.01590347290039, "learning_rate": 2.0112967923011646e-09, "logits/chosen": 1.355816125869751, "logits/rejected": 1.2652667760849, "loss": 0.8726, "step": 638 }, { "beta_dpo/beta": 0.13248920440673828, "beta_dpo/beta_margin_grad_mean": -0.3459218740463257, "beta_dpo/beta_margin_grad_std": 0.2797539234161377, "beta_dpo/beta_margin_mean": 1.3880894184112549, "beta_dpo/beta_margin_std": 2.7971689701080322, "beta_dpo/beta_used": 0.13248920440673828, "beta_dpo/beta_used_raw": 0.13248920440673828, "beta_dpo/gap_mean": 8.968145370483398, "beta_dpo/gap_std": 15.500995635986328, "beta_dpo/loss_margin_mean": 9.871132850646973, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9659863945578231, "grad_norm": 36.6432991027832, "learning_rate": 1.847382997337943e-09, "logits/chosen": 0.9688248634338379, "logits/rejected": 0.8566447496414185, "loss": 0.9835, "step": 639 }, { "beta_dpo/beta": 0.0449477955698967, "beta_dpo/beta_margin_grad_mean": -0.4395362138748169, "beta_dpo/beta_margin_grad_std": 0.1477995216846466, "beta_dpo/beta_margin_mean": 0.25694775581359863, "beta_dpo/beta_margin_std": 0.7032347917556763, "beta_dpo/beta_used": 0.0449477955698967, "beta_dpo/beta_used_raw": 0.0449477955698967, "beta_dpo/gap_mean": 8.584091186523438, "beta_dpo/gap_std": 15.505561828613281, "beta_dpo/loss_margin_mean": 5.612953186035156, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9674981103552532, "grad_norm": 14.231745719909668, "learning_rate": 1.690410564514244e-09, "logits/chosen": 1.1696255207061768, "logits/rejected": 1.0740346908569336, "loss": 1.1325, "step": 640 }, { "beta_dpo/beta": 0.07587745040655136, "beta_dpo/beta_margin_grad_mean": -0.40199270844459534, "beta_dpo/beta_margin_grad_std": 0.21523261070251465, "beta_dpo/beta_margin_mean": 0.6574034094810486, "beta_dpo/beta_margin_std": 1.5263322591781616, "beta_dpo/beta_used": 0.07587745040655136, "beta_dpo/beta_used_raw": 0.02500532567501068, "beta_dpo/gap_mean": 8.408025741577148, "beta_dpo/gap_std": 15.306009292602539, "beta_dpo/loss_margin_mean": 8.222708702087402, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9690098261526833, "grad_norm": 25.847951889038086, "learning_rate": 1.5403838846864692e-09, "logits/chosen": 1.0766488313674927, "logits/rejected": 1.095979928970337, "loss": 1.0703, "step": 641 }, { "beta_dpo/beta": 0.08892130851745605, "beta_dpo/beta_margin_grad_mean": -0.3497072160243988, "beta_dpo/beta_margin_grad_std": 0.22195076942443848, "beta_dpo/beta_margin_mean": 1.0798953771591187, "beta_dpo/beta_margin_std": 1.7261310815811157, "beta_dpo/beta_used": 0.08892130851745605, "beta_dpo/beta_used_raw": 0.08892130851745605, "beta_dpo/gap_mean": 8.763275146484375, "beta_dpo/gap_std": 15.263031005859375, "beta_dpo/loss_margin_mean": 9.946568489074707, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9705215419501134, "grad_norm": 23.498695373535156, "learning_rate": 1.3973071544233218e-09, "logits/chosen": 1.5003564357757568, "logits/rejected": 1.4900686740875244, "loss": 1.036, "step": 642 }, { "beta_dpo/beta": 0.1755274385213852, "beta_dpo/beta_margin_grad_mean": -0.31771519780158997, "beta_dpo/beta_margin_grad_std": 0.31326013803482056, "beta_dpo/beta_margin_mean": 1.7572897672653198, "beta_dpo/beta_margin_std": 3.0267927646636963, "beta_dpo/beta_used": 0.1755274385213852, "beta_dpo/beta_used_raw": 0.1755274385213852, "beta_dpo/gap_mean": 8.862909317016602, "beta_dpo/gap_std": 15.451555252075195, "beta_dpo/loss_margin_mean": 9.407503128051758, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9720332577475435, "grad_norm": 47.7587776184082, "learning_rate": 1.261184375888541e-09, "logits/chosen": 1.3585262298583984, "logits/rejected": 1.189072608947754, "loss": 0.9789, "step": 643 }, { "beta_dpo/beta": 0.12742942571640015, "beta_dpo/beta_margin_grad_mean": -0.3882279396057129, "beta_dpo/beta_margin_grad_std": 0.29966428875923157, "beta_dpo/beta_margin_mean": 0.8566918969154358, "beta_dpo/beta_margin_std": 2.1793432235717773, "beta_dpo/beta_used": 0.12742942571640015, "beta_dpo/beta_used_raw": 0.12742942571640015, "beta_dpo/gap_mean": 8.460193634033203, "beta_dpo/gap_std": 15.709514617919922, "beta_dpo/loss_margin_mean": 6.808633327484131, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9735449735449735, "grad_norm": 33.20065689086914, "learning_rate": 1.1320193567288527e-09, "logits/chosen": 1.296072006225586, "logits/rejected": 1.2559714317321777, "loss": 1.0307, "step": 644 }, { "beta_dpo/beta": 0.2287611961364746, "beta_dpo/beta_margin_grad_mean": -0.2579849362373352, "beta_dpo/beta_margin_grad_std": 0.29286816716194153, "beta_dpo/beta_margin_mean": 2.571612596511841, "beta_dpo/beta_margin_std": 3.502375602722168, "beta_dpo/beta_used": 0.2287611961364746, "beta_dpo/beta_used_raw": 0.2287611961364746, "beta_dpo/gap_mean": 8.823458671569824, "beta_dpo/gap_std": 15.624380111694336, "beta_dpo/loss_margin_mean": 11.115163803100586, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9750566893424036, "grad_norm": 53.97190475463867, "learning_rate": 1.0098157099674987e-09, "logits/chosen": 1.3393789529800415, "logits/rejected": 1.3428921699523926, "loss": 0.7942, "step": 645 }, { "beta_dpo/beta": 0.046885035932064056, "beta_dpo/beta_margin_grad_mean": -0.4096597731113434, "beta_dpo/beta_margin_grad_std": 0.15604624152183533, "beta_dpo/beta_margin_mean": 0.4211799204349518, "beta_dpo/beta_margin_std": 0.8159288763999939, "beta_dpo/beta_used": 0.046885035932064056, "beta_dpo/beta_used_raw": 0.046885035932064056, "beta_dpo/gap_mean": 8.929242134094238, "beta_dpo/gap_std": 15.510902404785156, "beta_dpo/loss_margin_mean": 9.470359802246094, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9765684051398337, "grad_norm": 14.007503509521484, "learning_rate": 8.945768539031783e-10, "logits/chosen": 1.050734519958496, "logits/rejected": 1.0254426002502441, "loss": 1.1076, "step": 646 }, { "beta_dpo/beta": 0.30722373723983765, "beta_dpo/beta_margin_grad_mean": -0.1448328197002411, "beta_dpo/beta_margin_grad_std": 0.23842579126358032, "beta_dpo/beta_margin_mean": 4.167687892913818, "beta_dpo/beta_margin_std": 4.095274925231934, "beta_dpo/beta_used": 0.30722373723983765, "beta_dpo/beta_used_raw": 0.30722373723983765, "beta_dpo/gap_mean": 9.657175064086914, "beta_dpo/gap_std": 15.229113578796387, "beta_dpo/loss_margin_mean": 13.549036026000977, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9780801209372638, "grad_norm": 44.486412048339844, "learning_rate": 7.863060120144316e-10, "logits/chosen": 1.2251076698303223, "logits/rejected": 1.1599578857421875, "loss": 0.3313, "step": 647 }, { "beta_dpo/beta": 0.051618948578834534, "beta_dpo/beta_margin_grad_mean": -0.41473251581192017, "beta_dpo/beta_margin_grad_std": 0.20193688571453094, "beta_dpo/beta_margin_mean": 0.48395270109176636, "beta_dpo/beta_margin_std": 1.189092755317688, "beta_dpo/beta_used": 0.051618948578834534, "beta_dpo/beta_used_raw": 0.04244516044855118, "beta_dpo/gap_mean": 9.734169006347656, "beta_dpo/gap_std": 14.963768005371094, "beta_dpo/loss_margin_mean": 8.589853286743164, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9795918367346939, "grad_norm": 18.04836082458496, "learning_rate": 6.850062128694045e-10, "logits/chosen": 1.2029109001159668, "logits/rejected": 1.1225261688232422, "loss": 1.1111, "step": 648 }, { "beta_dpo/beta": 0.21300108730793, "beta_dpo/beta_margin_grad_mean": -0.29137375950813293, "beta_dpo/beta_margin_grad_std": 0.319355309009552, "beta_dpo/beta_margin_mean": 2.1214795112609863, "beta_dpo/beta_margin_std": 4.1887431144714355, "beta_dpo/beta_used": 0.21300108730793, "beta_dpo/beta_used_raw": 0.21300108730793, "beta_dpo/gap_mean": 9.660211563110352, "beta_dpo/gap_std": 15.06142807006836, "beta_dpo/loss_margin_mean": 9.629173278808594, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.981103552532124, "grad_norm": 47.07633972167969, "learning_rate": 5.906802900412788e-10, "logits/chosen": 1.3177015781402588, "logits/rejected": 1.2371814250946045, "loss": 0.6641, "step": 649 }, { "beta_dpo/beta": 0.18543455004692078, "beta_dpo/beta_margin_grad_mean": -0.2878350019454956, "beta_dpo/beta_margin_grad_std": 0.3191271126270294, "beta_dpo/beta_margin_mean": 1.8004777431488037, "beta_dpo/beta_margin_std": 2.68178653717041, "beta_dpo/beta_used": 0.18543455004692078, "beta_dpo/beta_used_raw": 0.18543455004692078, "beta_dpo/gap_mean": 9.650936126708984, "beta_dpo/gap_std": 14.927940368652344, "beta_dpo/loss_margin_mean": 9.712797164916992, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.982615268329554, "grad_norm": 41.930545806884766, "learning_rate": 5.033308820289184e-10, "logits/chosen": 1.3555996417999268, "logits/rejected": 1.2612521648406982, "loss": 0.8971, "step": 650 }, { "beta_dpo/beta": 0.05952965468168259, "beta_dpo/beta_margin_grad_mean": -0.3843025863170624, "beta_dpo/beta_margin_grad_std": 0.18352816998958588, "beta_dpo/beta_margin_mean": 0.5975887775421143, "beta_dpo/beta_margin_std": 0.9953944087028503, "beta_dpo/beta_used": 0.05952965468168259, "beta_dpo/beta_used_raw": 0.05952965468168259, "beta_dpo/gap_mean": 9.495718002319336, "beta_dpo/gap_std": 14.702865600585938, "beta_dpo/loss_margin_mean": 9.376497268676758, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9841269841269841, "grad_norm": 16.7528133392334, "learning_rate": 4.2296043218295606e-10, "logits/chosen": 1.25199294090271, "logits/rejected": 1.124768853187561, "loss": 1.0683, "step": 651 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4982442855834961, "beta_dpo/beta_margin_grad_std": 0.004070946015417576, "beta_dpo/beta_margin_mean": 0.007023526821285486, "beta_dpo/beta_margin_std": 0.016285018995404243, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.08277405798435211, "beta_dpo/gap_mean": 9.258039474487305, "beta_dpo/gap_std": 15.01901912689209, "beta_dpo/loss_margin_mean": 7.023526191711426, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9856386999244142, "grad_norm": 0.3672993779182434, "learning_rate": 3.4957118863768176e-10, "logits/chosen": 1.0834195613861084, "logits/rejected": 1.1000077724456787, "loss": 1.3801, "step": 652 }, { "beta_dpo/beta": 0.08435218036174774, "beta_dpo/beta_margin_grad_mean": -0.3591226637363434, "beta_dpo/beta_margin_grad_std": 0.2340759038925171, "beta_dpo/beta_margin_mean": 0.7783568501472473, "beta_dpo/beta_margin_std": 1.355913519859314, "beta_dpo/beta_used": 0.08435218036174774, "beta_dpo/beta_used_raw": 0.08435218036174774, "beta_dpo/gap_mean": 9.125565528869629, "beta_dpo/gap_std": 15.28455924987793, "beta_dpo/loss_margin_mean": 9.264640808105469, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9871504157218443, "grad_norm": 22.500999450683594, "learning_rate": 2.831652042480093e-10, "logits/chosen": 1.1946804523468018, "logits/rejected": 1.1475324630737305, "loss": 1.0293, "step": 653 }, { "beta_dpo/beta": 0.09523887932300568, "beta_dpo/beta_margin_grad_mean": -0.3858698308467865, "beta_dpo/beta_margin_grad_std": 0.2553237974643707, "beta_dpo/beta_margin_mean": 1.1672877073287964, "beta_dpo/beta_margin_std": 2.5366644859313965, "beta_dpo/beta_used": 0.09523887932300568, "beta_dpo/beta_used_raw": -0.015430465340614319, "beta_dpo/gap_mean": 9.186834335327148, "beta_dpo/gap_std": 15.484939575195312, "beta_dpo/loss_margin_mean": 8.130887985229492, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9886621315192744, "grad_norm": 27.98388671875, "learning_rate": 2.2374433653205016e-10, "logits/chosen": 1.151197075843811, "logits/rejected": 0.9994099140167236, "loss": 1.0911, "step": 654 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.49734944105148315, "beta_dpo/beta_margin_grad_std": 0.00388297438621521, "beta_dpo/beta_margin_mean": 0.010603162460029125, "beta_dpo/beta_margin_std": 0.015533708967268467, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.01662539876997471, "beta_dpo/gap_mean": 9.309677124023438, "beta_dpo/gap_std": 15.503271102905273, "beta_dpo/loss_margin_mean": 10.603161811828613, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9901738473167044, "grad_norm": 0.3120063245296478, "learning_rate": 1.7131024761923852e-10, "logits/chosen": 1.1806678771972656, "logits/rejected": 1.039980173110962, "loss": 1.379, "step": 655 }, { "beta_dpo/beta": 0.08966214954853058, "beta_dpo/beta_margin_grad_mean": -0.346645712852478, "beta_dpo/beta_margin_grad_std": 0.21018153429031372, "beta_dpo/beta_margin_mean": 0.82259601354599, "beta_dpo/beta_margin_std": 1.2134861946105957, "beta_dpo/beta_used": 0.08966214954853058, "beta_dpo/beta_used_raw": 0.08966214954853058, "beta_dpo/gap_mean": 9.15340805053711, "beta_dpo/gap_std": 15.19101333618164, "beta_dpo/loss_margin_mean": 9.192506790161133, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9916855631141346, "grad_norm": 20.03281021118164, "learning_rate": 1.2586440420372934e-10, "logits/chosen": 1.1405537128448486, "logits/rejected": 1.0787684917449951, "loss": 0.8857, "step": 656 }, { "beta_dpo/beta": 0.08845320343971252, "beta_dpo/beta_margin_grad_mean": -0.3202848732471466, "beta_dpo/beta_margin_grad_std": 0.22860798239707947, "beta_dpo/beta_margin_mean": 1.0682189464569092, "beta_dpo/beta_margin_std": 1.452836513519287, "beta_dpo/beta_used": 0.08845320343971252, "beta_dpo/beta_used_raw": 0.08845320343971252, "beta_dpo/gap_mean": 9.614997863769531, "beta_dpo/gap_std": 15.207011222839355, "beta_dpo/loss_margin_mean": 12.184465408325195, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9931972789115646, "grad_norm": 25.735523223876953, "learning_rate": 8.740807750345913e-11, "logits/chosen": 1.3117201328277588, "logits/rejected": 1.1885137557983398, "loss": 0.9697, "step": 657 }, { "beta_dpo/beta": 0.08655724674463272, "beta_dpo/beta_margin_grad_mean": -0.3984954059123993, "beta_dpo/beta_margin_grad_std": 0.2469259351491928, "beta_dpo/beta_margin_mean": 0.8934859037399292, "beta_dpo/beta_margin_std": 2.263308525085449, "beta_dpo/beta_used": 0.08655724674463272, "beta_dpo/beta_used_raw": 0.04852905124425888, "beta_dpo/gap_mean": 9.4644775390625, "beta_dpo/gap_std": 15.579830169677734, "beta_dpo/loss_margin_mean": 8.258312225341797, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9947089947089947, "grad_norm": 25.514381408691406, "learning_rate": 5.594234322453539e-11, "logits/chosen": 1.261268138885498, "logits/rejected": 1.220792293548584, "loss": 1.0962, "step": 658 }, { "beta_dpo/beta": 0.034201011061668396, "beta_dpo/beta_margin_grad_mean": -0.43599990010261536, "beta_dpo/beta_margin_grad_std": 0.17003847658634186, "beta_dpo/beta_margin_mean": 0.3266862630844116, "beta_dpo/beta_margin_std": 0.9077629446983337, "beta_dpo/beta_used": 0.034201011061668396, "beta_dpo/beta_used_raw": -0.03887367993593216, "beta_dpo/gap_mean": 9.18392276763916, "beta_dpo/gap_std": 15.714540481567383, "beta_dpo/loss_margin_mean": 6.176858425140381, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9962207105064248, "grad_norm": 10.678601264953613, "learning_rate": 3.146808153123293e-11, "logits/chosen": 1.3522672653198242, "logits/rejected": 1.2511718273162842, "loss": 1.215, "step": 659 }, { "beta_dpo/beta": 0.29667574167251587, "beta_dpo/beta_margin_grad_mean": -0.23005616664886475, "beta_dpo/beta_margin_grad_std": 0.3280402719974518, "beta_dpo/beta_margin_mean": 3.1850943565368652, "beta_dpo/beta_margin_std": 3.9281609058380127, "beta_dpo/beta_used": 0.29667574167251587, "beta_dpo/beta_used_raw": 0.29667574167251587, "beta_dpo/gap_mean": 9.110960006713867, "beta_dpo/gap_std": 15.258062362670898, "beta_dpo/loss_margin_mean": 10.734999656677246, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.9977324263038548, "grad_norm": 56.866981506347656, "learning_rate": 1.3985977021235829e-11, "logits/chosen": 1.2931430339813232, "logits/rejected": 1.200553059577942, "loss": 0.8159, "step": 660 }, { "beta_dpo/beta": 0.0010000000474974513, "beta_dpo/beta_margin_grad_mean": -0.4985651671886444, "beta_dpo/beta_margin_grad_std": 0.004238836467266083, "beta_dpo/beta_margin_mean": 0.00573985418304801, "beta_dpo/beta_margin_std": 0.016957050189375877, "beta_dpo/beta_used": 0.0010000000474974513, "beta_dpo/beta_used_raw": -0.14001157879829407, "beta_dpo/gap_mean": 8.619951248168945, "beta_dpo/gap_std": 15.422819137573242, "beta_dpo/loss_margin_mean": 5.739853858947754, "beta_dpo/mask_keep_frac": 0.78125, "epoch": 0.999244142101285, "grad_norm": 0.30782225728034973, "learning_rate": 3.4965187065971735e-12, "logits/chosen": 1.1916680335998535, "logits/rejected": 1.0985240936279297, "loss": 1.3817, "step": 661 }, { "epoch": 0.999244142101285, "step": 661, "total_flos": 0.0, "train_loss": 1.1644051905929953, "train_runtime": 2186.2877, "train_samples_per_second": 19.364, "train_steps_per_second": 0.302 } ], "logging_steps": 1, "max_steps": 661, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }