Files
mistral-7b-base-beta-dpo-hh…/trainer_state.json
ModelHub XC eb326fd6d8 初始化项目,由ModelHub XC社区提供模型
Model: W-61/mistral-7b-base-beta-dpo-hh-harmless-4xh200-batch-64
Source: Original Platform
2026-05-30 22:35:26 +08:00

13390 lines
540 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.999244142101285,
"eval_steps": 100,
"global_step": 661,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"beta_dpo/beta": 0.09949193149805069,
"beta_dpo/beta_margin_grad_mean": -0.5002161860466003,
"beta_dpo/beta_margin_grad_std": 0.00568619929254055,
"beta_dpo/beta_margin_mean": -0.0008644365007057786,
"beta_dpo/beta_margin_std": 0.02274876832962036,
"beta_dpo/beta_used": 0.09949193149805069,
"beta_dpo/beta_used_raw": 0.09949193149805069,
"beta_dpo/gap_mean": -0.002860965905711055,
"beta_dpo/gap_std": 0.027476027607917786,
"beta_dpo/loss_margin_mean": -0.00900276005268097,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.0015117157974300832,
"grad_norm": 68.95387268066406,
"learning_rate": 0.0,
"logits/chosen": -3.487456798553467,
"logits/rejected": -3.4948604106903076,
"loss": 1.3875,
"step": 1
},
{
"beta_dpo/beta": 0.09910144656896591,
"beta_dpo/beta_margin_grad_mean": -0.5004775524139404,
"beta_dpo/beta_margin_grad_std": 0.004114280920475721,
"beta_dpo/beta_margin_mean": -0.0019103928934782743,
"beta_dpo/beta_margin_std": 0.01645863801240921,
"beta_dpo/beta_used": 0.09910144656896591,
"beta_dpo/beta_used_raw": 0.09910144656896591,
"beta_dpo/gap_mean": -0.004164176527410746,
"beta_dpo/gap_std": 0.05989988148212433,
"beta_dpo/loss_margin_mean": -0.01922258734703064,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.0030234315948601664,
"grad_norm": 63.74178695678711,
"learning_rate": 7.462686567164179e-09,
"logits/chosen": -3.489974021911621,
"logits/rejected": -3.4899895191192627,
"loss": 1.3882,
"step": 2
},
{
"beta_dpo/beta": 0.1007503867149353,
"beta_dpo/beta_margin_grad_mean": -0.5004459619522095,
"beta_dpo/beta_margin_grad_std": 0.0060578202828764915,
"beta_dpo/beta_margin_mean": -0.0017849474679678679,
"beta_dpo/beta_margin_std": 0.024235889315605164,
"beta_dpo/beta_used": 0.1007503867149353,
"beta_dpo/beta_used_raw": 0.1007503867149353,
"beta_dpo/gap_mean": -0.004537786357104778,
"beta_dpo/gap_std": 0.09120701253414154,
"beta_dpo/loss_margin_mean": -0.018549904227256775,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.0045351473922902496,
"grad_norm": 94.64966583251953,
"learning_rate": 1.4925373134328357e-08,
"logits/chosen": -3.492800235748291,
"logits/rejected": -3.5052385330200195,
"loss": 1.3855,
"step": 3
},
{
"beta_dpo/beta": 0.09986072778701782,
"beta_dpo/beta_margin_grad_mean": -0.4987858533859253,
"beta_dpo/beta_margin_grad_std": 0.006075920071452856,
"beta_dpo/beta_margin_mean": 0.004857912659645081,
"beta_dpo/beta_margin_std": 0.024309273809194565,
"beta_dpo/beta_used": 0.09986072778701782,
"beta_dpo/beta_used_raw": 0.09986072778701782,
"beta_dpo/gap_mean": -0.0007102746749296784,
"beta_dpo/gap_std": 0.12064293026924133,
"beta_dpo/loss_margin_mean": 0.04852989315986633,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.006046863189720333,
"grad_norm": 78.82814025878906,
"learning_rate": 2.2388059701492534e-08,
"logits/chosen": -3.47836971282959,
"logits/rejected": -3.4763317108154297,
"loss": 1.3866,
"step": 4
},
{
"beta_dpo/beta": 0.10005674511194229,
"beta_dpo/beta_margin_grad_mean": -0.4992569386959076,
"beta_dpo/beta_margin_grad_std": 0.0056086876429617405,
"beta_dpo/beta_margin_mean": 0.002972628688439727,
"beta_dpo/beta_margin_std": 0.0224379301071167,
"beta_dpo/beta_used": 0.10005674511194229,
"beta_dpo/beta_used_raw": 0.10005674511194229,
"beta_dpo/gap_mean": 0.006695480085909367,
"beta_dpo/gap_std": 0.13884103298187256,
"beta_dpo/loss_margin_mean": 0.029651284217834473,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.007558578987150416,
"grad_norm": 77.07207489013672,
"learning_rate": 2.9850746268656714e-08,
"logits/chosen": -3.465094804763794,
"logits/rejected": -3.4696974754333496,
"loss": 1.3856,
"step": 5
},
{
"beta_dpo/beta": 0.10029098391532898,
"beta_dpo/beta_margin_grad_mean": -0.4995466470718384,
"beta_dpo/beta_margin_grad_std": 0.006639161147177219,
"beta_dpo/beta_margin_mean": 0.001812646514736116,
"beta_dpo/beta_margin_std": 0.026568656787276268,
"beta_dpo/beta_used": 0.10029098391532898,
"beta_dpo/beta_used_raw": 0.10029098391532898,
"beta_dpo/gap_mean": 0.010851222090423107,
"beta_dpo/gap_std": 0.15967890620231628,
"beta_dpo/loss_margin_mean": 0.017943859100341797,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.009070294784580499,
"grad_norm": 74.04022979736328,
"learning_rate": 3.731343283582089e-08,
"logits/chosen": -3.4667019844055176,
"logits/rejected": -3.4714226722717285,
"loss": 1.3848,
"step": 6
},
{
"beta_dpo/beta": 0.10088849067687988,
"beta_dpo/beta_margin_grad_mean": -0.4983506500720978,
"beta_dpo/beta_margin_grad_std": 0.0047491928562521935,
"beta_dpo/beta_margin_mean": 0.006597965024411678,
"beta_dpo/beta_margin_std": 0.018998507410287857,
"beta_dpo/beta_used": 0.10088849067687988,
"beta_dpo/beta_used_raw": 0.10088849067687988,
"beta_dpo/gap_mean": 0.019090309739112854,
"beta_dpo/gap_std": 0.17145544290542603,
"beta_dpo/loss_margin_mean": 0.06506466865539551,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.010582010582010581,
"grad_norm": 69.01170349121094,
"learning_rate": 4.477611940298507e-08,
"logits/chosen": -3.471862316131592,
"logits/rejected": -3.473724842071533,
"loss": 1.3829,
"step": 7
},
{
"beta_dpo/beta": 0.10075643658638,
"beta_dpo/beta_margin_grad_mean": -0.49915874004364014,
"beta_dpo/beta_margin_grad_std": 0.00561918830499053,
"beta_dpo/beta_margin_mean": 0.0033658454194664955,
"beta_dpo/beta_margin_std": 0.022479888051748276,
"beta_dpo/beta_used": 0.10075643658638,
"beta_dpo/beta_used_raw": 0.10075643658638,
"beta_dpo/gap_mean": 0.02159273251891136,
"beta_dpo/gap_std": 0.17980128526687622,
"beta_dpo/loss_margin_mean": 0.03358778357505798,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.012093726379440665,
"grad_norm": 71.97833251953125,
"learning_rate": 5.223880597014925e-08,
"logits/chosen": -3.4647488594055176,
"logits/rejected": -3.462214469909668,
"loss": 1.3828,
"step": 8
},
{
"beta_dpo/beta": 0.09967806935310364,
"beta_dpo/beta_margin_grad_mean": -0.4994949698448181,
"beta_dpo/beta_margin_grad_std": 0.005284009501338005,
"beta_dpo/beta_margin_mean": 0.002020241692662239,
"beta_dpo/beta_margin_std": 0.021138343960046768,
"beta_dpo/beta_used": 0.09967806935310364,
"beta_dpo/beta_used_raw": 0.09967806935310364,
"beta_dpo/gap_mean": 0.022652022540569305,
"beta_dpo/gap_std": 0.18474653363227844,
"beta_dpo/loss_margin_mean": 0.020203545689582825,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.013605442176870748,
"grad_norm": 75.72856140136719,
"learning_rate": 5.970149253731343e-08,
"logits/chosen": -3.485848903656006,
"logits/rejected": -3.501671314239502,
"loss": 1.3846,
"step": 9
},
{
"beta_dpo/beta": 0.10177969187498093,
"beta_dpo/beta_margin_grad_mean": -0.4997633695602417,
"beta_dpo/beta_margin_grad_std": 0.006110870745033026,
"beta_dpo/beta_margin_mean": 0.0009462524903938174,
"beta_dpo/beta_margin_std": 0.024446699768304825,
"beta_dpo/beta_used": 0.10177969187498093,
"beta_dpo/beta_used_raw": 0.10177969187498093,
"beta_dpo/gap_mean": 0.017854779958724976,
"beta_dpo/gap_std": 0.19548800587654114,
"beta_dpo/loss_margin_mean": 0.008596926927566528,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.015117157974300832,
"grad_norm": 75.9504165649414,
"learning_rate": 6.71641791044776e-08,
"logits/chosen": -3.483827590942383,
"logits/rejected": -3.4898672103881836,
"loss": 1.3815,
"step": 10
},
{
"beta_dpo/beta": 0.10070285201072693,
"beta_dpo/beta_margin_grad_mean": -0.4989999830722809,
"beta_dpo/beta_margin_grad_std": 0.006797553040087223,
"beta_dpo/beta_margin_mean": 0.0039999885484576225,
"beta_dpo/beta_margin_std": 0.027194734662771225,
"beta_dpo/beta_used": 0.10070285201072693,
"beta_dpo/beta_used_raw": 0.10070285201072693,
"beta_dpo/gap_mean": 0.023937324061989784,
"beta_dpo/gap_std": 0.20705005526542664,
"beta_dpo/loss_margin_mean": 0.03899078071117401,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.016628873771730914,
"grad_norm": 65.5156478881836,
"learning_rate": 7.462686567164178e-08,
"logits/chosen": -3.488257884979248,
"logits/rejected": -3.497443199157715,
"loss": 1.3827,
"step": 11
},
{
"beta_dpo/beta": 0.10033339262008667,
"beta_dpo/beta_margin_grad_mean": -0.500347375869751,
"beta_dpo/beta_margin_grad_std": 0.004478678107261658,
"beta_dpo/beta_margin_mean": -0.0013897416647523642,
"beta_dpo/beta_margin_std": 0.01791626773774624,
"beta_dpo/beta_used": 0.10033339262008667,
"beta_dpo/beta_used_raw": 0.10033339262008667,
"beta_dpo/gap_mean": 0.018025288358330727,
"beta_dpo/gap_std": 0.2077764868736267,
"beta_dpo/loss_margin_mean": -0.01385033130645752,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.018140589569160998,
"grad_norm": 73.49842071533203,
"learning_rate": 8.208955223880596e-08,
"logits/chosen": -3.4729955196380615,
"logits/rejected": -3.473330497741699,
"loss": 1.384,
"step": 12
},
{
"beta_dpo/beta": 0.09898576885461807,
"beta_dpo/beta_margin_grad_mean": -0.4998151957988739,
"beta_dpo/beta_margin_grad_std": 0.005596262402832508,
"beta_dpo/beta_margin_mean": 0.0007393779815174639,
"beta_dpo/beta_margin_std": 0.022389404475688934,
"beta_dpo/beta_used": 0.09898576885461807,
"beta_dpo/beta_used_raw": 0.09898576885461807,
"beta_dpo/gap_mean": 0.016271326690912247,
"beta_dpo/gap_std": 0.2102234661579132,
"beta_dpo/loss_margin_mean": 0.007069885730743408,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.019652305366591082,
"grad_norm": 69.19833374023438,
"learning_rate": 8.955223880597014e-08,
"logits/chosen": -3.4913244247436523,
"logits/rejected": -3.503750801086426,
"loss": 1.3864,
"step": 13
},
{
"beta_dpo/beta": 0.09997418522834778,
"beta_dpo/beta_margin_grad_mean": -0.4998185336589813,
"beta_dpo/beta_margin_grad_std": 0.00562079856172204,
"beta_dpo/beta_margin_mean": 0.0007253867224790156,
"beta_dpo/beta_margin_std": 0.022488731890916824,
"beta_dpo/beta_used": 0.09997418522834778,
"beta_dpo/beta_used_raw": 0.09997418522834778,
"beta_dpo/gap_mean": 0.01367080770432949,
"beta_dpo/gap_std": 0.21450088918209076,
"beta_dpo/loss_margin_mean": 0.00729447603225708,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.021164021164021163,
"grad_norm": 78.86023712158203,
"learning_rate": 9.701492537313432e-08,
"logits/chosen": -3.468996286392212,
"logits/rejected": -3.480945587158203,
"loss": 1.385,
"step": 14
},
{
"beta_dpo/beta": 0.10166953504085541,
"beta_dpo/beta_margin_grad_mean": -0.4993080496788025,
"beta_dpo/beta_margin_grad_std": 0.005408703349530697,
"beta_dpo/beta_margin_mean": 0.0027681647334247828,
"beta_dpo/beta_margin_std": 0.021638209000229836,
"beta_dpo/beta_used": 0.10166953504085541,
"beta_dpo/beta_used_raw": 0.10166953504085541,
"beta_dpo/gap_mean": 0.012517506256699562,
"beta_dpo/gap_std": 0.21141119301319122,
"beta_dpo/loss_margin_mean": 0.026329442858695984,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.022675736961451247,
"grad_norm": 77.17921447753906,
"learning_rate": 1.044776119402985e-07,
"logits/chosen": -3.487567663192749,
"logits/rejected": -3.491109848022461,
"loss": 1.3822,
"step": 15
},
{
"beta_dpo/beta": 0.10217370092868805,
"beta_dpo/beta_margin_grad_mean": -0.49842318892478943,
"beta_dpo/beta_margin_grad_std": 0.004959672223776579,
"beta_dpo/beta_margin_mean": 0.006308517884463072,
"beta_dpo/beta_margin_std": 0.01984489895403385,
"beta_dpo/beta_used": 0.10217370092868805,
"beta_dpo/beta_used_raw": 0.10217370092868805,
"beta_dpo/gap_mean": 0.022726912051439285,
"beta_dpo/gap_std": 0.20642614364624023,
"beta_dpo/loss_margin_mean": 0.061732217669487,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.02418745275888133,
"grad_norm": 67.19996643066406,
"learning_rate": 1.1194029850746268e-07,
"logits/chosen": -3.4754326343536377,
"logits/rejected": -3.489719867706299,
"loss": 1.3803,
"step": 16
},
{
"beta_dpo/beta": 0.09984943270683289,
"beta_dpo/beta_margin_grad_mean": -0.49886709451675415,
"beta_dpo/beta_margin_grad_std": 0.005485767964273691,
"beta_dpo/beta_margin_mean": 0.004532321821898222,
"beta_dpo/beta_margin_std": 0.021946530789136887,
"beta_dpo/beta_used": 0.09984943270683289,
"beta_dpo/beta_used_raw": 0.09984943270683289,
"beta_dpo/gap_mean": 0.02862522192299366,
"beta_dpo/gap_std": 0.21039307117462158,
"beta_dpo/loss_margin_mean": 0.04543180763721466,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.025699168556311415,
"grad_norm": 72.48587799072266,
"learning_rate": 1.1940298507462686e-07,
"logits/chosen": -3.4845752716064453,
"logits/rejected": -3.4855475425720215,
"loss": 1.3837,
"step": 17
},
{
"beta_dpo/beta": 0.09787815809249878,
"beta_dpo/beta_margin_grad_mean": -0.49952730536460876,
"beta_dpo/beta_margin_grad_std": 0.005441566463559866,
"beta_dpo/beta_margin_mean": 0.0018912701634690166,
"beta_dpo/beta_margin_std": 0.021770119667053223,
"beta_dpo/beta_used": 0.09787815809249878,
"beta_dpo/beta_used_raw": 0.09787815809249878,
"beta_dpo/gap_mean": 0.02734116092324257,
"beta_dpo/gap_std": 0.2113610804080963,
"beta_dpo/loss_margin_mean": 0.019237250089645386,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.027210884353741496,
"grad_norm": 64.46170043945312,
"learning_rate": 1.2686567164179106e-07,
"logits/chosen": -3.4654946327209473,
"logits/rejected": -3.4752371311187744,
"loss": 1.3871,
"step": 18
},
{
"beta_dpo/beta": 0.10058543086051941,
"beta_dpo/beta_margin_grad_mean": -0.4984739124774933,
"beta_dpo/beta_margin_grad_std": 0.005919734016060829,
"beta_dpo/beta_margin_mean": 0.006105437409132719,
"beta_dpo/beta_margin_std": 0.023681944236159325,
"beta_dpo/beta_used": 0.10058543086051941,
"beta_dpo/beta_used_raw": 0.10058543086051941,
"beta_dpo/gap_mean": 0.03188147768378258,
"beta_dpo/gap_std": 0.2180713713169098,
"beta_dpo/loss_margin_mean": 0.060575321316719055,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.02872260015117158,
"grad_norm": 74.11320495605469,
"learning_rate": 1.343283582089552e-07,
"logits/chosen": -3.49847412109375,
"logits/rejected": -3.4848098754882812,
"loss": 1.3822,
"step": 19
},
{
"beta_dpo/beta": 0.09968103468418121,
"beta_dpo/beta_margin_grad_mean": -0.49871742725372314,
"beta_dpo/beta_margin_grad_std": 0.00446416437625885,
"beta_dpo/beta_margin_mean": 0.0051309531554579735,
"beta_dpo/beta_margin_std": 0.017858445644378662,
"beta_dpo/beta_used": 0.09968103468418121,
"beta_dpo/beta_used_raw": 0.09968103468418121,
"beta_dpo/gap_mean": 0.03486326336860657,
"beta_dpo/gap_std": 0.21194185316562653,
"beta_dpo/loss_margin_mean": 0.0515841543674469,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.030234315948601664,
"grad_norm": 74.15331268310547,
"learning_rate": 1.4179104477611938e-07,
"logits/chosen": -3.480916738510132,
"logits/rejected": -3.487682342529297,
"loss": 1.3834,
"step": 20
},
{
"beta_dpo/beta": 0.10118047147989273,
"beta_dpo/beta_margin_grad_mean": -0.49813440442085266,
"beta_dpo/beta_margin_grad_std": 0.006876260042190552,
"beta_dpo/beta_margin_mean": 0.007462440058588982,
"beta_dpo/beta_margin_std": 0.027516059577465057,
"beta_dpo/beta_used": 0.10118047147989273,
"beta_dpo/beta_used_raw": 0.10118047147989273,
"beta_dpo/gap_mean": 0.04063236713409424,
"beta_dpo/gap_std": 0.2174699306488037,
"beta_dpo/loss_margin_mean": 0.0733160525560379,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.031746031746031744,
"grad_norm": 76.7116928100586,
"learning_rate": 1.4925373134328355e-07,
"logits/chosen": -3.474630832672119,
"logits/rejected": -3.4847922325134277,
"loss": 1.3802,
"step": 21
},
{
"beta_dpo/beta": 0.1014518290758133,
"beta_dpo/beta_margin_grad_mean": -0.4986741244792938,
"beta_dpo/beta_margin_grad_std": 0.0053654685616493225,
"beta_dpo/beta_margin_mean": 0.00530435424298048,
"beta_dpo/beta_margin_std": 0.021464822813868523,
"beta_dpo/beta_used": 0.1014518290758133,
"beta_dpo/beta_used_raw": 0.1014518290758133,
"beta_dpo/gap_mean": 0.045100364834070206,
"beta_dpo/gap_std": 0.22138892114162445,
"beta_dpo/loss_margin_mean": 0.05228887498378754,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.03325774754346183,
"grad_norm": 70.56732177734375,
"learning_rate": 1.5671641791044775e-07,
"logits/chosen": -3.4839634895324707,
"logits/rejected": -3.489530086517334,
"loss": 1.3793,
"step": 22
},
{
"beta_dpo/beta": 0.10004732012748718,
"beta_dpo/beta_margin_grad_mean": -0.4988941550254822,
"beta_dpo/beta_margin_grad_std": 0.006940007209777832,
"beta_dpo/beta_margin_mean": 0.004424452316015959,
"beta_dpo/beta_margin_std": 0.027765844017267227,
"beta_dpo/beta_used": 0.10004732012748718,
"beta_dpo/beta_used_raw": 0.10004732012748718,
"beta_dpo/gap_mean": 0.04534055292606354,
"beta_dpo/gap_std": 0.22986072301864624,
"beta_dpo/loss_margin_mean": 0.044214025139808655,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.03476946334089191,
"grad_norm": 81.26911926269531,
"learning_rate": 1.6417910447761193e-07,
"logits/chosen": -3.491093158721924,
"logits/rejected": -3.4994139671325684,
"loss": 1.3818,
"step": 23
},
{
"beta_dpo/beta": 0.10039770603179932,
"beta_dpo/beta_margin_grad_mean": -0.49830013513565063,
"beta_dpo/beta_margin_grad_std": 0.005815350916236639,
"beta_dpo/beta_margin_mean": 0.006800240837037563,
"beta_dpo/beta_margin_std": 0.0232648067176342,
"beta_dpo/beta_used": 0.10039770603179932,
"beta_dpo/beta_used_raw": 0.10039770603179932,
"beta_dpo/gap_mean": 0.050212785601615906,
"beta_dpo/gap_std": 0.23121167719364166,
"beta_dpo/loss_margin_mean": 0.06687352061271667,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.036281179138321996,
"grad_norm": 67.30644989013672,
"learning_rate": 1.716417910447761e-07,
"logits/chosen": -3.4758753776550293,
"logits/rejected": -3.484215497970581,
"loss": 1.3806,
"step": 24
},
{
"beta_dpo/beta": 0.10184156894683838,
"beta_dpo/beta_margin_grad_mean": -0.496889591217041,
"beta_dpo/beta_margin_grad_std": 0.0062943859957158566,
"beta_dpo/beta_margin_mean": 0.012444637715816498,
"beta_dpo/beta_margin_std": 0.025184577330946922,
"beta_dpo/beta_used": 0.10184156894683838,
"beta_dpo/beta_used_raw": 0.10184156894683838,
"beta_dpo/gap_mean": 0.05874401330947876,
"beta_dpo/gap_std": 0.2349245548248291,
"beta_dpo/loss_margin_mean": 0.12228862941265106,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.03779289493575208,
"grad_norm": 71.70790100097656,
"learning_rate": 1.7910447761194027e-07,
"logits/chosen": -3.5098114013671875,
"logits/rejected": -3.5169005393981934,
"loss": 1.3773,
"step": 25
},
{
"beta_dpo/beta": 0.10080444812774658,
"beta_dpo/beta_margin_grad_mean": -0.49682313203811646,
"beta_dpo/beta_margin_grad_std": 0.006813944783061743,
"beta_dpo/beta_margin_mean": 0.012710830196738243,
"beta_dpo/beta_margin_std": 0.02726481482386589,
"beta_dpo/beta_used": 0.10080444812774658,
"beta_dpo/beta_used_raw": 0.10080444812774658,
"beta_dpo/gap_mean": 0.07424643635749817,
"beta_dpo/gap_std": 0.2401646077632904,
"beta_dpo/loss_margin_mean": 0.1253078132867813,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.039304610733182165,
"grad_norm": 72.3993148803711,
"learning_rate": 1.8656716417910447e-07,
"logits/chosen": -3.464113712310791,
"logits/rejected": -3.465458393096924,
"loss": 1.3775,
"step": 26
},
{
"beta_dpo/beta": 0.10233546793460846,
"beta_dpo/beta_margin_grad_mean": -0.49768123030662537,
"beta_dpo/beta_margin_grad_std": 0.00761442631483078,
"beta_dpo/beta_margin_mean": 0.00927521288394928,
"beta_dpo/beta_margin_std": 0.030466170981526375,
"beta_dpo/beta_used": 0.10233546793460846,
"beta_dpo/beta_used_raw": 0.10233546793460846,
"beta_dpo/gap_mean": 0.07654713094234467,
"beta_dpo/gap_std": 0.25189656019210815,
"beta_dpo/loss_margin_mean": 0.09068039059638977,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.04081632653061224,
"grad_norm": 75.62002563476562,
"learning_rate": 1.9402985074626865e-07,
"logits/chosen": -3.477059841156006,
"logits/rejected": -3.4793524742126465,
"loss": 1.3746,
"step": 27
},
{
"beta_dpo/beta": 0.1009044274687767,
"beta_dpo/beta_margin_grad_mean": -0.4972415268421173,
"beta_dpo/beta_margin_grad_std": 0.008482665754854679,
"beta_dpo/beta_margin_mean": 0.011036441661417484,
"beta_dpo/beta_margin_std": 0.033949114382267,
"beta_dpo/beta_used": 0.1009044274687767,
"beta_dpo/beta_used_raw": 0.1009044274687767,
"beta_dpo/gap_mean": 0.08251934498548508,
"beta_dpo/gap_std": 0.26663610339164734,
"beta_dpo/loss_margin_mean": 0.10953138768672943,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.042328042328042326,
"grad_norm": 72.76057434082031,
"learning_rate": 2.0149253731343282e-07,
"logits/chosen": -3.4753365516662598,
"logits/rejected": -3.479668617248535,
"loss": 1.3765,
"step": 28
},
{
"beta_dpo/beta": 0.10132871568202972,
"beta_dpo/beta_margin_grad_mean": -0.4966908395290375,
"beta_dpo/beta_margin_grad_std": 0.00963142141699791,
"beta_dpo/beta_margin_mean": 0.013242037035524845,
"beta_dpo/beta_margin_std": 0.03854740783572197,
"beta_dpo/beta_used": 0.10132871568202972,
"beta_dpo/beta_used_raw": 0.10132871568202972,
"beta_dpo/gap_mean": 0.08902300894260406,
"beta_dpo/gap_std": 0.2860987186431885,
"beta_dpo/loss_margin_mean": 0.12946908175945282,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.04383975812547241,
"grad_norm": 80.11695098876953,
"learning_rate": 2.08955223880597e-07,
"logits/chosen": -3.4917006492614746,
"logits/rejected": -3.4941205978393555,
"loss": 1.3749,
"step": 29
},
{
"beta_dpo/beta": 0.10037538409233093,
"beta_dpo/beta_margin_grad_mean": -0.497522234916687,
"beta_dpo/beta_margin_grad_std": 0.010261783376336098,
"beta_dpo/beta_margin_mean": 0.009913492947816849,
"beta_dpo/beta_margin_std": 0.041076745837926865,
"beta_dpo/beta_used": 0.10037538409233093,
"beta_dpo/beta_used_raw": 0.10037538409233093,
"beta_dpo/gap_mean": 0.09355901181697845,
"beta_dpo/gap_std": 0.309474915266037,
"beta_dpo/loss_margin_mean": 0.0988030731678009,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.045351473922902494,
"grad_norm": 75.7330551147461,
"learning_rate": 2.1641791044776117e-07,
"logits/chosen": -3.4795217514038086,
"logits/rejected": -3.4918265342712402,
"loss": 1.3764,
"step": 30
},
{
"beta_dpo/beta": 0.10266469419002533,
"beta_dpo/beta_margin_grad_mean": -0.49661797285079956,
"beta_dpo/beta_margin_grad_std": 0.012874443084001541,
"beta_dpo/beta_margin_mean": 0.013542445376515388,
"beta_dpo/beta_margin_std": 0.05157284811139107,
"beta_dpo/beta_used": 0.10266469419002533,
"beta_dpo/beta_used_raw": 0.10266469419002533,
"beta_dpo/gap_mean": 0.09349072724580765,
"beta_dpo/gap_std": 0.33484184741973877,
"beta_dpo/loss_margin_mean": 0.12851548194885254,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.04686318972033258,
"grad_norm": 84.76753997802734,
"learning_rate": 2.2388059701492537e-07,
"logits/chosen": -3.484382152557373,
"logits/rejected": -3.4840521812438965,
"loss": 1.3723,
"step": 31
},
{
"beta_dpo/beta": 0.10319270193576813,
"beta_dpo/beta_margin_grad_mean": -0.4945366382598877,
"beta_dpo/beta_margin_grad_std": 0.011012133210897446,
"beta_dpo/beta_margin_mean": 0.021867286413908005,
"beta_dpo/beta_margin_std": 0.04407740384340286,
"beta_dpo/beta_used": 0.10319270193576813,
"beta_dpo/beta_used_raw": 0.10319270193576813,
"beta_dpo/gap_mean": 0.11525650322437286,
"beta_dpo/gap_std": 0.35890108346939087,
"beta_dpo/loss_margin_mean": 0.21101342141628265,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.04837490551776266,
"grad_norm": 76.85587310791016,
"learning_rate": 2.3134328358208954e-07,
"logits/chosen": -3.490706443786621,
"logits/rejected": -3.491973876953125,
"loss": 1.3691,
"step": 32
},
{
"beta_dpo/beta": 0.1052633598446846,
"beta_dpo/beta_margin_grad_mean": -0.49329954385757446,
"beta_dpo/beta_margin_grad_std": 0.014426704496145248,
"beta_dpo/beta_margin_mean": 0.02681746333837509,
"beta_dpo/beta_margin_std": 0.057773277163505554,
"beta_dpo/beta_used": 0.1052633598446846,
"beta_dpo/beta_used_raw": 0.1052633598446846,
"beta_dpo/gap_mean": 0.1396564096212387,
"beta_dpo/gap_std": 0.3812027871608734,
"beta_dpo/loss_margin_mean": 0.2525438666343689,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.049886621315192746,
"grad_norm": 84.2064208984375,
"learning_rate": 2.388059701492537e-07,
"logits/chosen": -3.480504035949707,
"logits/rejected": -3.4880025386810303,
"loss": 1.3624,
"step": 33
},
{
"beta_dpo/beta": 0.09583413600921631,
"beta_dpo/beta_margin_grad_mean": -0.49526041746139526,
"beta_dpo/beta_margin_grad_std": 0.010195734910666943,
"beta_dpo/beta_margin_mean": 0.018971463665366173,
"beta_dpo/beta_margin_std": 0.04081565514206886,
"beta_dpo/beta_used": 0.09583413600921631,
"beta_dpo/beta_used_raw": 0.09583413600921631,
"beta_dpo/gap_mean": 0.1564980447292328,
"beta_dpo/gap_std": 0.39692509174346924,
"beta_dpo/loss_margin_mean": 0.1971401423215866,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.05139833711262283,
"grad_norm": 64.87889099121094,
"learning_rate": 2.4626865671641786e-07,
"logits/chosen": -3.4754815101623535,
"logits/rejected": -3.48795747756958,
"loss": 1.378,
"step": 34
},
{
"beta_dpo/beta": 0.10109281539916992,
"beta_dpo/beta_margin_grad_mean": -0.49520573019981384,
"beta_dpo/beta_margin_grad_std": 0.013709837570786476,
"beta_dpo/beta_margin_mean": 0.019193029031157494,
"beta_dpo/beta_margin_std": 0.054904498159885406,
"beta_dpo/beta_used": 0.10109281539916992,
"beta_dpo/beta_used_raw": 0.10109281539916992,
"beta_dpo/gap_mean": 0.16002866625785828,
"beta_dpo/gap_std": 0.4229516386985779,
"beta_dpo/loss_margin_mean": 0.189878448843956,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.05291005291005291,
"grad_norm": 66.06998443603516,
"learning_rate": 2.537313432835821e-07,
"logits/chosen": -3.4892160892486572,
"logits/rejected": -3.489070415496826,
"loss": 1.3687,
"step": 35
},
{
"beta_dpo/beta": 0.09607753157615662,
"beta_dpo/beta_margin_grad_mean": -0.49553588032722473,
"beta_dpo/beta_margin_grad_std": 0.01821037009358406,
"beta_dpo/beta_margin_mean": 0.017847422510385513,
"beta_dpo/beta_margin_std": 0.07308873534202576,
"beta_dpo/beta_used": 0.09607753157615662,
"beta_dpo/beta_used_raw": 0.09607753157615662,
"beta_dpo/gap_mean": 0.1608276665210724,
"beta_dpo/gap_std": 0.47150135040283203,
"beta_dpo/loss_margin_mean": 0.17752361297607422,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.05442176870748299,
"grad_norm": 69.9321517944336,
"learning_rate": 2.611940298507462e-07,
"logits/chosen": -3.4815025329589844,
"logits/rejected": -3.489722967147827,
"loss": 1.3766,
"step": 36
},
{
"beta_dpo/beta": 0.10805092751979828,
"beta_dpo/beta_margin_grad_mean": -0.48942428827285767,
"beta_dpo/beta_margin_grad_std": 0.02250627428293228,
"beta_dpo/beta_margin_mean": 0.04226859286427498,
"beta_dpo/beta_margin_std": 0.09062261879444122,
"beta_dpo/beta_used": 0.10805092751979828,
"beta_dpo/beta_used_raw": 0.10805092751979828,
"beta_dpo/gap_mean": 0.2018118053674698,
"beta_dpo/gap_std": 0.5458605289459229,
"beta_dpo/loss_margin_mean": 0.38961470127105713,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.055933484504913075,
"grad_norm": 88.1091537475586,
"learning_rate": 2.686567164179104e-07,
"logits/chosen": -3.4984347820281982,
"logits/rejected": -3.527602434158325,
"loss": 1.3507,
"step": 37
},
{
"beta_dpo/beta": 0.09843359887599945,
"beta_dpo/beta_margin_grad_mean": -0.49174827337265015,
"beta_dpo/beta_margin_grad_std": 0.016898149624466896,
"beta_dpo/beta_margin_mean": 0.03306391090154648,
"beta_dpo/beta_margin_std": 0.06772169470787048,
"beta_dpo/beta_used": 0.09843359887599945,
"beta_dpo/beta_used_raw": 0.09843359887599945,
"beta_dpo/gap_mean": 0.2306603342294693,
"beta_dpo/gap_std": 0.57441246509552,
"beta_dpo/loss_margin_mean": 0.3296223282814026,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.05744520030234316,
"grad_norm": 72.93190002441406,
"learning_rate": 2.761194029850746e-07,
"logits/chosen": -3.4699416160583496,
"logits/rejected": -3.474957227706909,
"loss": 1.366,
"step": 38
},
{
"beta_dpo/beta": 0.10100552439689636,
"beta_dpo/beta_margin_grad_mean": -0.48797351121902466,
"beta_dpo/beta_margin_grad_std": 0.02125493995845318,
"beta_dpo/beta_margin_mean": 0.04824261739850044,
"beta_dpo/beta_margin_std": 0.08532541245222092,
"beta_dpo/beta_used": 0.10100552439689636,
"beta_dpo/beta_used_raw": 0.10100552439689636,
"beta_dpo/gap_mean": 0.26710981130599976,
"beta_dpo/gap_std": 0.6145649552345276,
"beta_dpo/loss_margin_mean": 0.47763076424598694,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.05895691609977324,
"grad_norm": 74.70683288574219,
"learning_rate": 2.8358208955223876e-07,
"logits/chosen": -3.4818758964538574,
"logits/rejected": -3.4779043197631836,
"loss": 1.3584,
"step": 39
},
{
"beta_dpo/beta": 0.10295109450817108,
"beta_dpo/beta_margin_grad_mean": -0.48708972334861755,
"beta_dpo/beta_margin_grad_std": 0.01960979588329792,
"beta_dpo/beta_margin_mean": 0.051765426993370056,
"beta_dpo/beta_margin_std": 0.07871639728546143,
"beta_dpo/beta_used": 0.10295109450817108,
"beta_dpo/beta_used_raw": 0.10295109450817108,
"beta_dpo/gap_mean": 0.31209754943847656,
"beta_dpo/gap_std": 0.6482617855072021,
"beta_dpo/loss_margin_mean": 0.5017773509025574,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.06046863189720333,
"grad_norm": 104.72574615478516,
"learning_rate": 2.9104477611940296e-07,
"logits/chosen": -3.454075813293457,
"logits/rejected": -3.4554495811462402,
"loss": 1.3501,
"step": 40
},
{
"beta_dpo/beta": 0.11064809560775757,
"beta_dpo/beta_margin_grad_mean": -0.47926777601242065,
"beta_dpo/beta_margin_grad_std": 0.03134298324584961,
"beta_dpo/beta_margin_mean": 0.08335942775011063,
"beta_dpo/beta_margin_std": 0.1263163834810257,
"beta_dpo/beta_used": 0.11064809560775757,
"beta_dpo/beta_used_raw": 0.11064809560775757,
"beta_dpo/gap_mean": 0.38600417971611023,
"beta_dpo/gap_std": 0.731959342956543,
"beta_dpo/loss_margin_mean": 0.7464129328727722,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.06198034769463341,
"grad_norm": 85.89790344238281,
"learning_rate": 2.985074626865671e-07,
"logits/chosen": -3.5105371475219727,
"logits/rejected": -3.508096933364868,
"loss": 1.3258,
"step": 41
},
{
"beta_dpo/beta": 0.09837515652179718,
"beta_dpo/beta_margin_grad_mean": -0.48587504029273987,
"beta_dpo/beta_margin_grad_std": 0.02415025420486927,
"beta_dpo/beta_margin_mean": 0.056625593453645706,
"beta_dpo/beta_margin_std": 0.09701266139745712,
"beta_dpo/beta_used": 0.09837515652179718,
"beta_dpo/beta_used_raw": 0.09837515652179718,
"beta_dpo/gap_mean": 0.4286617040634155,
"beta_dpo/gap_std": 0.7775646448135376,
"beta_dpo/loss_margin_mean": 0.5715749859809875,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.06349206349206349,
"grad_norm": 70.29313659667969,
"learning_rate": 3.059701492537313e-07,
"logits/chosen": -3.471945285797119,
"logits/rejected": -3.475313186645508,
"loss": 1.3477,
"step": 42
},
{
"beta_dpo/beta": 0.10859352350234985,
"beta_dpo/beta_margin_grad_mean": -0.4790210723876953,
"beta_dpo/beta_margin_grad_std": 0.03158368915319443,
"beta_dpo/beta_margin_mean": 0.08437040448188782,
"beta_dpo/beta_margin_std": 0.12720288336277008,
"beta_dpo/beta_used": 0.10859352350234985,
"beta_dpo/beta_used_raw": 0.10859352350234985,
"beta_dpo/gap_mean": 0.4834892153739929,
"beta_dpo/gap_std": 0.8535457849502563,
"beta_dpo/loss_margin_mean": 0.7795947790145874,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.06500377928949358,
"grad_norm": 85.16110229492188,
"learning_rate": 3.134328358208955e-07,
"logits/chosen": -3.475992441177368,
"logits/rejected": -3.490475654602051,
"loss": 1.3202,
"step": 43
},
{
"beta_dpo/beta": 0.09638853371143341,
"beta_dpo/beta_margin_grad_mean": -0.4809112548828125,
"beta_dpo/beta_margin_grad_std": 0.032165560871362686,
"beta_dpo/beta_margin_mean": 0.07681908458471298,
"beta_dpo/beta_margin_std": 0.12961971759796143,
"beta_dpo/beta_used": 0.09638853371143341,
"beta_dpo/beta_used_raw": 0.09638853371143341,
"beta_dpo/gap_mean": 0.5448230504989624,
"beta_dpo/gap_std": 0.9320578575134277,
"beta_dpo/loss_margin_mean": 0.7893993258476257,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.06651549508692366,
"grad_norm": 80.78567504882812,
"learning_rate": 3.2089552238805965e-07,
"logits/chosen": -3.471874952316284,
"logits/rejected": -3.4676132202148438,
"loss": 1.3411,
"step": 44
},
{
"beta_dpo/beta": 0.10317748785018921,
"beta_dpo/beta_margin_grad_mean": -0.47540462017059326,
"beta_dpo/beta_margin_grad_std": 0.02889878675341606,
"beta_dpo/beta_margin_mean": 0.09890253841876984,
"beta_dpo/beta_margin_std": 0.11674586683511734,
"beta_dpo/beta_used": 0.10317748785018921,
"beta_dpo/beta_used_raw": 0.10317748785018921,
"beta_dpo/gap_mean": 0.614529013633728,
"beta_dpo/gap_std": 0.9891531467437744,
"beta_dpo/loss_margin_mean": 0.9499869346618652,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.06802721088435375,
"grad_norm": 82.41168975830078,
"learning_rate": 3.2835820895522385e-07,
"logits/chosen": -3.477660655975342,
"logits/rejected": -3.4796509742736816,
"loss": 1.3189,
"step": 45
},
{
"beta_dpo/beta": 0.10708046704530716,
"beta_dpo/beta_margin_grad_mean": -0.4760693609714508,
"beta_dpo/beta_margin_grad_std": 0.040219008922576904,
"beta_dpo/beta_margin_mean": 0.09645616263151169,
"beta_dpo/beta_margin_std": 0.16234652698040009,
"beta_dpo/beta_used": 0.10708046704530716,
"beta_dpo/beta_used_raw": 0.10708046704530716,
"beta_dpo/gap_mean": 0.6618906855583191,
"beta_dpo/gap_std": 1.0743083953857422,
"beta_dpo/loss_margin_mean": 0.8980126976966858,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.06953892668178382,
"grad_norm": 82.76873779296875,
"learning_rate": 3.3582089552238805e-07,
"logits/chosen": -3.4865612983703613,
"logits/rejected": -3.501478672027588,
"loss": 1.3078,
"step": 46
},
{
"beta_dpo/beta": 0.1050390973687172,
"beta_dpo/beta_margin_grad_mean": -0.47665971517562866,
"beta_dpo/beta_margin_grad_std": 0.0366806834936142,
"beta_dpo/beta_margin_mean": 0.09390005469322205,
"beta_dpo/beta_margin_std": 0.14768268167972565,
"beta_dpo/beta_used": 0.1050390973687172,
"beta_dpo/beta_used_raw": 0.1050390973687172,
"beta_dpo/gap_mean": 0.7110254764556885,
"beta_dpo/gap_std": 1.1429574489593506,
"beta_dpo/loss_margin_mean": 0.8911280632019043,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.0710506424792139,
"grad_norm": 78.87757110595703,
"learning_rate": 3.432835820895522e-07,
"logits/chosen": -3.4726479053497314,
"logits/rejected": -3.4852752685546875,
"loss": 1.3059,
"step": 47
},
{
"beta_dpo/beta": 0.09973321855068207,
"beta_dpo/beta_margin_grad_mean": -0.47354698181152344,
"beta_dpo/beta_margin_grad_std": 0.03766561299562454,
"beta_dpo/beta_margin_mean": 0.10683294385671616,
"beta_dpo/beta_margin_std": 0.1530088633298874,
"beta_dpo/beta_used": 0.09973321855068207,
"beta_dpo/beta_used_raw": 0.09973321855068207,
"beta_dpo/gap_mean": 0.7833503484725952,
"beta_dpo/gap_std": 1.2213890552520752,
"beta_dpo/loss_margin_mean": 1.0755493640899658,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.07256235827664399,
"grad_norm": 65.8699722290039,
"learning_rate": 3.507462686567164e-07,
"logits/chosen": -3.4720230102539062,
"logits/rejected": -3.4713778495788574,
"loss": 1.3125,
"step": 48
},
{
"beta_dpo/beta": 0.10683902353048325,
"beta_dpo/beta_margin_grad_mean": -0.47896090149879456,
"beta_dpo/beta_margin_grad_std": 0.0569651760160923,
"beta_dpo/beta_margin_mean": 0.08544404804706573,
"beta_dpo/beta_margin_std": 0.23137980699539185,
"beta_dpo/beta_used": 0.10683902353048325,
"beta_dpo/beta_used_raw": 0.10683902353048325,
"beta_dpo/gap_mean": 0.7888141870498657,
"beta_dpo/gap_std": 1.371895432472229,
"beta_dpo/loss_margin_mean": 0.7935976982116699,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.07407407407407407,
"grad_norm": 76.08472442626953,
"learning_rate": 3.5820895522388055e-07,
"logits/chosen": -3.4482154846191406,
"logits/rejected": -3.4542245864868164,
"loss": 1.2973,
"step": 49
},
{
"beta_dpo/beta": 0.1023285984992981,
"beta_dpo/beta_margin_grad_mean": -0.4778675138950348,
"beta_dpo/beta_margin_grad_std": 0.043978314846754074,
"beta_dpo/beta_margin_mean": 0.08967769891023636,
"beta_dpo/beta_margin_std": 0.17872853577136993,
"beta_dpo/beta_used": 0.1023285984992981,
"beta_dpo/beta_used_raw": 0.1023285984992981,
"beta_dpo/gap_mean": 0.8107864856719971,
"beta_dpo/gap_std": 1.4515961408615112,
"beta_dpo/loss_margin_mean": 0.8468451499938965,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.07558578987150416,
"grad_norm": 77.416748046875,
"learning_rate": 3.6567164179104475e-07,
"logits/chosen": -3.4656267166137695,
"logits/rejected": -3.474292039871216,
"loss": 1.3056,
"step": 50
},
{
"beta_dpo/beta": 0.09330181777477264,
"beta_dpo/beta_margin_grad_mean": -0.4759213626384735,
"beta_dpo/beta_margin_grad_std": 0.05245961993932724,
"beta_dpo/beta_margin_mean": 0.09779670089483261,
"beta_dpo/beta_margin_std": 0.21340785920619965,
"beta_dpo/beta_used": 0.09330181777477264,
"beta_dpo/beta_used_raw": 0.09330181777477264,
"beta_dpo/gap_mean": 0.8370683193206787,
"beta_dpo/gap_std": 1.5868926048278809,
"beta_dpo/loss_margin_mean": 1.0528600215911865,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.07709750566893424,
"grad_norm": 80.22843933105469,
"learning_rate": 3.7313432835820895e-07,
"logits/chosen": -3.4780006408691406,
"logits/rejected": -3.4716055393218994,
"loss": 1.324,
"step": 51
},
{
"beta_dpo/beta": 0.10757172107696533,
"beta_dpo/beta_margin_grad_mean": -0.4713566303253174,
"beta_dpo/beta_margin_grad_std": 0.06157148256897926,
"beta_dpo/beta_margin_mean": 0.1164376363158226,
"beta_dpo/beta_margin_std": 0.2508537769317627,
"beta_dpo/beta_used": 0.10757172107696533,
"beta_dpo/beta_used_raw": 0.10757172107696533,
"beta_dpo/gap_mean": 0.879679799079895,
"beta_dpo/gap_std": 1.7295918464660645,
"beta_dpo/loss_margin_mean": 1.0719107389450073,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.07860922146636433,
"grad_norm": 90.09736633300781,
"learning_rate": 3.805970149253731e-07,
"logits/chosen": -3.493281841278076,
"logits/rejected": -3.510999917984009,
"loss": 1.2854,
"step": 52
},
{
"beta_dpo/beta": 0.11585356295108795,
"beta_dpo/beta_margin_grad_mean": -0.44591474533081055,
"beta_dpo/beta_margin_grad_std": 0.07729143649339676,
"beta_dpo/beta_margin_mean": 0.2241854965686798,
"beta_dpo/beta_margin_std": 0.3247769773006439,
"beta_dpo/beta_used": 0.11585356295108795,
"beta_dpo/beta_used_raw": 0.11585356295108795,
"beta_dpo/gap_mean": 1.0488958358764648,
"beta_dpo/gap_std": 1.8787274360656738,
"beta_dpo/loss_margin_mean": 1.7876276969909668,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.0801209372637944,
"grad_norm": 79.20471954345703,
"learning_rate": 3.880597014925373e-07,
"logits/chosen": -3.4753904342651367,
"logits/rejected": -3.4808661937713623,
"loss": 1.2475,
"step": 53
},
{
"beta_dpo/beta": 0.09257584810256958,
"beta_dpo/beta_margin_grad_mean": -0.4745701849460602,
"beta_dpo/beta_margin_grad_std": 0.05094355344772339,
"beta_dpo/beta_margin_mean": 0.10317223519086838,
"beta_dpo/beta_margin_std": 0.2069990187883377,
"beta_dpo/beta_used": 0.09257584810256958,
"beta_dpo/beta_used_raw": 0.09257584810256958,
"beta_dpo/gap_mean": 1.0485970973968506,
"beta_dpo/gap_std": 1.9558327198028564,
"beta_dpo/loss_margin_mean": 1.1117992401123047,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.08163265306122448,
"grad_norm": 61.774192810058594,
"learning_rate": 3.9552238805970144e-07,
"logits/chosen": -3.459986925125122,
"logits/rejected": -3.4660282135009766,
"loss": 1.3082,
"step": 54
},
{
"beta_dpo/beta": 0.12305180728435516,
"beta_dpo/beta_margin_grad_mean": -0.436162531375885,
"beta_dpo/beta_margin_grad_std": 0.0924496054649353,
"beta_dpo/beta_margin_mean": 0.26776817440986633,
"beta_dpo/beta_margin_std": 0.3947688043117523,
"beta_dpo/beta_used": 0.12305180728435516,
"beta_dpo/beta_used_raw": 0.12305180728435516,
"beta_dpo/gap_mean": 1.2106801271438599,
"beta_dpo/gap_std": 2.1652746200561523,
"beta_dpo/loss_margin_mean": 2.1424360275268555,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.08314436885865457,
"grad_norm": 85.41687774658203,
"learning_rate": 4.0298507462686564e-07,
"logits/chosen": -3.4503068923950195,
"logits/rejected": -3.4672203063964844,
"loss": 1.2096,
"step": 55
},
{
"beta_dpo/beta": 0.113812655210495,
"beta_dpo/beta_margin_grad_mean": -0.43281856179237366,
"beta_dpo/beta_margin_grad_std": 0.08880013972520828,
"beta_dpo/beta_margin_mean": 0.28000012040138245,
"beta_dpo/beta_margin_std": 0.3765174448490143,
"beta_dpo/beta_used": 0.113812655210495,
"beta_dpo/beta_used_raw": 0.113812655210495,
"beta_dpo/gap_mean": 1.4440956115722656,
"beta_dpo/gap_std": 2.366764545440674,
"beta_dpo/loss_margin_mean": 2.447467803955078,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.08465608465608465,
"grad_norm": 78.24384307861328,
"learning_rate": 4.1044776119402984e-07,
"logits/chosen": -3.474026679992676,
"logits/rejected": -3.471069574356079,
"loss": 1.2186,
"step": 56
},
{
"beta_dpo/beta": 0.12063010782003403,
"beta_dpo/beta_margin_grad_mean": -0.4170219302177429,
"beta_dpo/beta_margin_grad_std": 0.11409434676170349,
"beta_dpo/beta_margin_mean": 0.35627323389053345,
"beta_dpo/beta_margin_std": 0.49630510807037354,
"beta_dpo/beta_used": 0.12063010782003403,
"beta_dpo/beta_used_raw": 0.12063010782003403,
"beta_dpo/gap_mean": 1.71101713180542,
"beta_dpo/gap_std": 2.6714401245117188,
"beta_dpo/loss_margin_mean": 2.9442179203033447,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.08616780045351474,
"grad_norm": 86.52571105957031,
"learning_rate": 4.17910447761194e-07,
"logits/chosen": -3.4749884605407715,
"logits/rejected": -3.4794540405273438,
"loss": 1.1814,
"step": 57
},
{
"beta_dpo/beta": 0.14217594265937805,
"beta_dpo/beta_margin_grad_mean": -0.41697031259536743,
"beta_dpo/beta_margin_grad_std": 0.12016920745372772,
"beta_dpo/beta_margin_mean": 0.3542155623435974,
"beta_dpo/beta_margin_std": 0.5216997861862183,
"beta_dpo/beta_used": 0.14217594265937805,
"beta_dpo/beta_used_raw": 0.14217594265937805,
"beta_dpo/gap_mean": 1.9042582511901855,
"beta_dpo/gap_std": 2.851851463317871,
"beta_dpo/loss_margin_mean": 2.408334732055664,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.08767951625094482,
"grad_norm": 109.38397979736328,
"learning_rate": 4.253731343283582e-07,
"logits/chosen": -3.4674854278564453,
"logits/rejected": -3.4712154865264893,
"loss": 1.0854,
"step": 58
},
{
"beta_dpo/beta": 0.13565833866596222,
"beta_dpo/beta_margin_grad_mean": -0.40875622630119324,
"beta_dpo/beta_margin_grad_std": 0.13919131457805634,
"beta_dpo/beta_margin_mean": 0.4166140556335449,
"beta_dpo/beta_margin_std": 0.6682167649269104,
"beta_dpo/beta_used": 0.13565833866596222,
"beta_dpo/beta_used_raw": 0.13565833866596222,
"beta_dpo/gap_mean": 1.9503271579742432,
"beta_dpo/gap_std": 2.9919891357421875,
"beta_dpo/loss_margin_mean": 2.674647092819214,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.08919123204837491,
"grad_norm": 95.52985382080078,
"learning_rate": 4.3283582089552234e-07,
"logits/chosen": -3.4265875816345215,
"logits/rejected": -3.443531036376953,
"loss": 1.0797,
"step": 59
},
{
"beta_dpo/beta": 0.09772248566150665,
"beta_dpo/beta_margin_grad_mean": -0.44351616501808167,
"beta_dpo/beta_margin_grad_std": 0.10381980240345001,
"beta_dpo/beta_margin_mean": 0.23878909647464752,
"beta_dpo/beta_margin_std": 0.443925142288208,
"beta_dpo/beta_used": 0.09772248566150665,
"beta_dpo/beta_used_raw": 0.09772248566150665,
"beta_dpo/gap_mean": 2.108832359313965,
"beta_dpo/gap_std": 3.2798352241516113,
"beta_dpo/loss_margin_mean": 2.4079318046569824,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.09070294784580499,
"grad_norm": 77.57376098632812,
"learning_rate": 4.4029850746268654e-07,
"logits/chosen": -3.4555981159210205,
"logits/rejected": -3.449897050857544,
"loss": 1.2118,
"step": 60
},
{
"beta_dpo/beta": 0.09358173608779907,
"beta_dpo/beta_margin_grad_mean": -0.44476786255836487,
"beta_dpo/beta_margin_grad_std": 0.12264274060726166,
"beta_dpo/beta_margin_mean": 0.23790551722049713,
"beta_dpo/beta_margin_std": 0.542164146900177,
"beta_dpo/beta_used": 0.09358173608779907,
"beta_dpo/beta_used_raw": 0.09358173608779907,
"beta_dpo/gap_mean": 2.1635830402374268,
"beta_dpo/gap_std": 3.625548839569092,
"beta_dpo/loss_margin_mean": 2.532042980194092,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.09221466364323508,
"grad_norm": 69.81476593017578,
"learning_rate": 4.4776119402985074e-07,
"logits/chosen": -3.4459221363067627,
"logits/rejected": -3.4527337551116943,
"loss": 1.2301,
"step": 61
},
{
"beta_dpo/beta": 0.1260245144367218,
"beta_dpo/beta_margin_grad_mean": -0.37954550981521606,
"beta_dpo/beta_margin_grad_std": 0.14129452407360077,
"beta_dpo/beta_margin_mean": 0.556390643119812,
"beta_dpo/beta_margin_std": 0.6934806704521179,
"beta_dpo/beta_used": 0.1260245144367218,
"beta_dpo/beta_used_raw": 0.1260245144367218,
"beta_dpo/gap_mean": 2.493056297302246,
"beta_dpo/gap_std": 4.033053874969482,
"beta_dpo/loss_margin_mean": 4.416388988494873,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.09372637944066516,
"grad_norm": 100.77880096435547,
"learning_rate": 4.552238805970149e-07,
"logits/chosen": -3.4615097045898438,
"logits/rejected": -3.462885856628418,
"loss": 1.11,
"step": 62
},
{
"beta_dpo/beta": 0.19291238486766815,
"beta_dpo/beta_margin_grad_mean": -0.32733672857284546,
"beta_dpo/beta_margin_grad_std": 0.210123673081398,
"beta_dpo/beta_margin_mean": 0.8742516040802002,
"beta_dpo/beta_margin_std": 1.2748658657073975,
"beta_dpo/beta_used": 0.19291238486766815,
"beta_dpo/beta_used_raw": 0.19291238486766815,
"beta_dpo/gap_mean": 2.880000591278076,
"beta_dpo/gap_std": 4.315876483917236,
"beta_dpo/loss_margin_mean": 4.4908342361450195,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.09523809523809523,
"grad_norm": 110.8193588256836,
"learning_rate": 4.626865671641791e-07,
"logits/chosen": -3.424607038497925,
"logits/rejected": -3.427009105682373,
"loss": 0.8325,
"step": 63
},
{
"beta_dpo/beta": 0.042021431028842926,
"beta_dpo/beta_margin_grad_mean": -0.47857698798179626,
"beta_dpo/beta_margin_grad_std": 0.05938207358121872,
"beta_dpo/beta_margin_mean": 0.08715548366308212,
"beta_dpo/beta_margin_std": 0.24319951236248016,
"beta_dpo/beta_used": 0.042021431028842926,
"beta_dpo/beta_used_raw": 0.042021431028842926,
"beta_dpo/gap_mean": 2.88523268699646,
"beta_dpo/gap_std": 4.664064407348633,
"beta_dpo/loss_margin_mean": 2.17319917678833,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.09674981103552532,
"grad_norm": 35.73752975463867,
"learning_rate": 4.701492537313433e-07,
"logits/chosen": -3.4301936626434326,
"logits/rejected": -3.4358339309692383,
"loss": 1.3121,
"step": 64
},
{
"beta_dpo/beta": 0.10541808605194092,
"beta_dpo/beta_margin_grad_mean": -0.40356844663619995,
"beta_dpo/beta_margin_grad_std": 0.16677476465702057,
"beta_dpo/beta_margin_mean": 0.44347381591796875,
"beta_dpo/beta_margin_std": 0.7979322671890259,
"beta_dpo/beta_used": 0.10541808605194092,
"beta_dpo/beta_used_raw": 0.10541808605194092,
"beta_dpo/gap_mean": 3.0799174308776855,
"beta_dpo/gap_std": 5.104286193847656,
"beta_dpo/loss_margin_mean": 4.116312503814697,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.0982615268329554,
"grad_norm": 98.1263656616211,
"learning_rate": 4.776119402985074e-07,
"logits/chosen": -3.4343981742858887,
"logits/rejected": -3.4361720085144043,
"loss": 1.1526,
"step": 65
},
{
"beta_dpo/beta": 0.1767566055059433,
"beta_dpo/beta_margin_grad_mean": -0.35773029923439026,
"beta_dpo/beta_margin_grad_std": 0.2308226078748703,
"beta_dpo/beta_margin_mean": 0.7839902639389038,
"beta_dpo/beta_margin_std": 1.4043595790863037,
"beta_dpo/beta_used": 0.1767566055059433,
"beta_dpo/beta_used_raw": 0.1767566055059433,
"beta_dpo/gap_mean": 3.3063292503356934,
"beta_dpo/gap_std": 5.49251651763916,
"beta_dpo/loss_margin_mean": 4.658176898956299,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.09977324263038549,
"grad_norm": 141.7904815673828,
"learning_rate": 4.850746268656717e-07,
"logits/chosen": -3.467107057571411,
"logits/rejected": -3.4679934978485107,
"loss": 0.907,
"step": 66
},
{
"beta_dpo/beta": 0.09946347773075104,
"beta_dpo/beta_margin_grad_mean": -0.40777695178985596,
"beta_dpo/beta_margin_grad_std": 0.18374623358249664,
"beta_dpo/beta_margin_mean": 0.4506603181362152,
"beta_dpo/beta_margin_std": 0.9327126741409302,
"beta_dpo/beta_used": 0.09946347773075104,
"beta_dpo/beta_used_raw": 0.09946347773075104,
"beta_dpo/gap_mean": 3.526148796081543,
"beta_dpo/gap_std": 6.106088638305664,
"beta_dpo/loss_margin_mean": 4.415529251098633,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.10128495842781557,
"grad_norm": 99.5558090209961,
"learning_rate": 4.925373134328357e-07,
"logits/chosen": -3.4243123531341553,
"logits/rejected": -3.4297595024108887,
"loss": 1.16,
"step": 67
},
{
"beta_dpo/beta": 0.1491156965494156,
"beta_dpo/beta_margin_grad_mean": -0.3757003843784332,
"beta_dpo/beta_margin_grad_std": 0.2276735007762909,
"beta_dpo/beta_margin_mean": 0.7526575326919556,
"beta_dpo/beta_margin_std": 1.405693531036377,
"beta_dpo/beta_used": 0.1491156965494156,
"beta_dpo/beta_used_raw": 0.1491156965494156,
"beta_dpo/gap_mean": 3.7691352367401123,
"beta_dpo/gap_std": 6.529148101806641,
"beta_dpo/loss_margin_mean": 4.495668411254883,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.10279667422524566,
"grad_norm": 125.12950134277344,
"learning_rate": 5e-07,
"logits/chosen": -3.4447755813598633,
"logits/rejected": -3.453941822052002,
"loss": 0.985,
"step": 68
},
{
"beta_dpo/beta": 0.078451007604599,
"beta_dpo/beta_margin_grad_mean": -0.41160911321640015,
"beta_dpo/beta_margin_grad_std": 0.1623861938714981,
"beta_dpo/beta_margin_mean": 0.42135173082351685,
"beta_dpo/beta_margin_std": 0.80238938331604,
"beta_dpo/beta_used": 0.078451007604599,
"beta_dpo/beta_used_raw": 0.078451007604599,
"beta_dpo/gap_mean": 3.8410778045654297,
"beta_dpo/gap_std": 6.965381622314453,
"beta_dpo/loss_margin_mean": 4.968033790588379,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.10430839002267574,
"grad_norm": 91.32450103759766,
"learning_rate": 4.999965034812934e-07,
"logits/chosen": -3.4167556762695312,
"logits/rejected": -3.4244508743286133,
"loss": 1.1994,
"step": 69
},
{
"beta_dpo/beta": 0.142277330160141,
"beta_dpo/beta_margin_grad_mean": -0.3595953583717346,
"beta_dpo/beta_margin_grad_std": 0.23513264954090118,
"beta_dpo/beta_margin_mean": 0.8268713355064392,
"beta_dpo/beta_margin_std": 1.4967344999313354,
"beta_dpo/beta_used": 0.142277330160141,
"beta_dpo/beta_used_raw": 0.142277330160141,
"beta_dpo/gap_mean": 4.193761348724365,
"beta_dpo/gap_std": 7.399883270263672,
"beta_dpo/loss_margin_mean": 5.270178318023682,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.10582010582010581,
"grad_norm": 117.31448364257812,
"learning_rate": 4.999860140229787e-07,
"logits/chosen": -3.401844024658203,
"logits/rejected": -3.400637149810791,
"loss": 0.9858,
"step": 70
},
{
"beta_dpo/beta": 0.10826882719993591,
"beta_dpo/beta_margin_grad_mean": -0.40783292055130005,
"beta_dpo/beta_margin_grad_std": 0.1903211772441864,
"beta_dpo/beta_margin_mean": 0.449673056602478,
"beta_dpo/beta_margin_std": 0.9671619534492493,
"beta_dpo/beta_used": 0.10826882719993591,
"beta_dpo/beta_used_raw": 0.10826882719993591,
"beta_dpo/gap_mean": 4.15489387512207,
"beta_dpo/gap_std": 7.742700576782227,
"beta_dpo/loss_margin_mean": 4.325258731842041,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1073318216175359,
"grad_norm": 104.58674621582031,
"learning_rate": 4.999685319184688e-07,
"logits/chosen": -3.408806324005127,
"logits/rejected": -3.406421184539795,
"loss": 1.0901,
"step": 71
},
{
"beta_dpo/beta": 0.11632607132196426,
"beta_dpo/beta_margin_grad_mean": -0.34931424260139465,
"beta_dpo/beta_margin_grad_std": 0.23163333535194397,
"beta_dpo/beta_margin_mean": 0.9699787497520447,
"beta_dpo/beta_margin_std": 1.607363224029541,
"beta_dpo/beta_used": 0.11632607132196426,
"beta_dpo/beta_used_raw": 0.11632607132196426,
"beta_dpo/gap_mean": 4.78761625289917,
"beta_dpo/gap_std": 8.365766525268555,
"beta_dpo/loss_margin_mean": 7.621516227722168,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.10884353741496598,
"grad_norm": 114.22623443603516,
"learning_rate": 4.999440576567755e-07,
"logits/chosen": -3.3776087760925293,
"logits/rejected": -3.390096664428711,
"loss": 1.1005,
"step": 72
},
{
"beta_dpo/beta": 0.06673535704612732,
"beta_dpo/beta_margin_grad_mean": -0.4608076512813568,
"beta_dpo/beta_margin_grad_std": 0.17916692793369293,
"beta_dpo/beta_margin_mean": 0.18453335762023926,
"beta_dpo/beta_margin_std": 0.8172470927238464,
"beta_dpo/beta_used": 0.06673535704612732,
"beta_dpo/beta_used_raw": 0.06673535704612732,
"beta_dpo/gap_mean": 4.497790336608887,
"beta_dpo/gap_std": 9.094923973083496,
"beta_dpo/loss_margin_mean": 2.876835584640503,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.11035525321239607,
"grad_norm": 74.64056396484375,
"learning_rate": 4.999125919224965e-07,
"logits/chosen": -3.3894991874694824,
"logits/rejected": -3.391726016998291,
"loss": 1.2207,
"step": 73
},
{
"beta_dpo/beta": 0.25261440873146057,
"beta_dpo/beta_margin_grad_mean": -0.2889004051685333,
"beta_dpo/beta_margin_grad_std": 0.2619003355503082,
"beta_dpo/beta_margin_mean": 2.9105119705200195,
"beta_dpo/beta_margin_std": 4.6201043128967285,
"beta_dpo/beta_used": 0.25261440873146057,
"beta_dpo/beta_used_raw": 0.25261440873146057,
"beta_dpo/gap_mean": 4.9444684982299805,
"beta_dpo/gap_std": 9.611654281616211,
"beta_dpo/loss_margin_mean": 8.769340515136719,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.11186696900982615,
"grad_norm": 182.684326171875,
"learning_rate": 4.998741355957963e-07,
"logits/chosen": -3.3527235984802246,
"logits/rejected": -3.357039451599121,
"loss": 0.8304,
"step": 74
},
{
"beta_dpo/beta": 0.08466437458992004,
"beta_dpo/beta_margin_grad_mean": -0.4008246660232544,
"beta_dpo/beta_margin_grad_std": 0.21186015009880066,
"beta_dpo/beta_margin_mean": 0.5326976180076599,
"beta_dpo/beta_margin_std": 1.2269001007080078,
"beta_dpo/beta_used": 0.08466437458992004,
"beta_dpo/beta_used_raw": 0.08466437458992004,
"beta_dpo/gap_mean": 5.70491886138916,
"beta_dpo/gap_std": 10.372549057006836,
"beta_dpo/loss_margin_mean": 7.167640686035156,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.11337868480725624,
"grad_norm": 102.36708068847656,
"learning_rate": 4.998286897523808e-07,
"logits/chosen": -3.351316213607788,
"logits/rejected": -3.3699235916137695,
"loss": 1.144,
"step": 75
},
{
"beta_dpo/beta": 0.18824619054794312,
"beta_dpo/beta_margin_grad_mean": -0.2869580090045929,
"beta_dpo/beta_margin_grad_std": 0.24816998839378357,
"beta_dpo/beta_margin_mean": 2.1148622035980225,
"beta_dpo/beta_margin_std": 3.13854718208313,
"beta_dpo/beta_used": 0.18824619054794312,
"beta_dpo/beta_used_raw": 0.18824619054794312,
"beta_dpo/gap_mean": 6.352941513061523,
"beta_dpo/gap_std": 10.743419647216797,
"beta_dpo/loss_margin_mean": 9.534296989440918,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.11489040060468632,
"grad_norm": 171.01199340820312,
"learning_rate": 4.997762556634679e-07,
"logits/chosen": -3.327301025390625,
"logits/rejected": -3.3365983963012695,
"loss": 1.0189,
"step": 76
},
{
"beta_dpo/beta": 0.14552097022533417,
"beta_dpo/beta_margin_grad_mean": -0.304735392332077,
"beta_dpo/beta_margin_grad_std": 0.25858816504478455,
"beta_dpo/beta_margin_mean": 1.3738024234771729,
"beta_dpo/beta_margin_std": 1.8939155340194702,
"beta_dpo/beta_used": 0.14552097022533417,
"beta_dpo/beta_used_raw": 0.14552097022533417,
"beta_dpo/gap_mean": 6.843048095703125,
"beta_dpo/gap_std": 11.032407760620117,
"beta_dpo/loss_margin_mean": 9.01302433013916,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1164021164021164,
"grad_norm": 120.21430969238281,
"learning_rate": 4.99716834795752e-07,
"logits/chosen": -3.3611555099487305,
"logits/rejected": -3.373170852661133,
"loss": 0.9049,
"step": 77
},
{
"beta_dpo/beta": 0.041494932025671005,
"beta_dpo/beta_margin_grad_mean": -0.43122273683547974,
"beta_dpo/beta_margin_grad_std": 0.1662428379058838,
"beta_dpo/beta_margin_mean": 0.35608235001564026,
"beta_dpo/beta_margin_std": 0.8586031794548035,
"beta_dpo/beta_used": 0.041494932025671005,
"beta_dpo/beta_used_raw": -0.01816452667117119,
"beta_dpo/gap_mean": 6.814278602600098,
"beta_dpo/gap_std": 11.402191162109375,
"beta_dpo/loss_margin_mean": 5.548126697540283,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.11791383219954649,
"grad_norm": 72.10340118408203,
"learning_rate": 4.996504288113623e-07,
"logits/chosen": -3.3710875511169434,
"logits/rejected": -3.3661742210388184,
"loss": 1.252,
"step": 78
},
{
"beta_dpo/beta": 0.1977195143699646,
"beta_dpo/beta_margin_grad_mean": -0.25049352645874023,
"beta_dpo/beta_margin_grad_std": 0.2710304260253906,
"beta_dpo/beta_margin_mean": 2.008237838745117,
"beta_dpo/beta_margin_std": 2.7375316619873047,
"beta_dpo/beta_used": 0.1977195143699646,
"beta_dpo/beta_used_raw": 0.1977195143699646,
"beta_dpo/gap_mean": 7.165606498718262,
"beta_dpo/gap_std": 11.763540267944336,
"beta_dpo/loss_margin_mean": 10.299997329711914,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.11942554799697656,
"grad_norm": 199.5998077392578,
"learning_rate": 4.995770395678171e-07,
"logits/chosen": -3.3480472564697266,
"logits/rejected": -3.3739683628082275,
"loss": 0.6999,
"step": 79
},
{
"beta_dpo/beta": 0.17561544477939606,
"beta_dpo/beta_margin_grad_mean": -0.36698946356773376,
"beta_dpo/beta_margin_grad_std": 0.28652095794677734,
"beta_dpo/beta_margin_mean": 1.9338186979293823,
"beta_dpo/beta_margin_std": 4.184875965118408,
"beta_dpo/beta_used": 0.17561544477939606,
"beta_dpo/beta_used_raw": 0.14658761024475098,
"beta_dpo/gap_mean": 7.466344833374023,
"beta_dpo/gap_std": 12.135894775390625,
"beta_dpo/loss_margin_mean": 9.14448070526123,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.12093726379440665,
"grad_norm": 242.46189880371094,
"learning_rate": 4.994966691179711e-07,
"logits/chosen": -3.362766981124878,
"logits/rejected": -3.373871326446533,
"loss": 1.2158,
"step": 80
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4974750578403473,
"beta_dpo/beta_margin_grad_std": 0.0035836591850966215,
"beta_dpo/beta_margin_mean": 0.010100403800606728,
"beta_dpo/beta_margin_std": 0.014335720799863338,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.018039202317595482,
"beta_dpo/gap_mean": 8.088890075683594,
"beta_dpo/gap_std": 12.616556167602539,
"beta_dpo/loss_margin_mean": 10.100403785705566,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.12244897959183673,
"grad_norm": 1.2602194547653198,
"learning_rate": 4.994093197099587e-07,
"logits/chosen": -3.343071460723877,
"logits/rejected": -3.3518471717834473,
"loss": 1.3802,
"step": 81
},
{
"beta_dpo/beta": 0.1988849639892578,
"beta_dpo/beta_margin_grad_mean": -0.2265806794166565,
"beta_dpo/beta_margin_grad_std": 0.280320942401886,
"beta_dpo/beta_margin_mean": 2.634272575378418,
"beta_dpo/beta_margin_std": 2.8367440700531006,
"beta_dpo/beta_used": 0.1988849639892578,
"beta_dpo/beta_used_raw": 0.1988849639892578,
"beta_dpo/gap_mean": 8.929794311523438,
"beta_dpo/gap_std": 12.968416213989258,
"beta_dpo/loss_margin_mean": 13.391870498657227,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.12396069538926682,
"grad_norm": 192.18162536621094,
"learning_rate": 4.993149937871306e-07,
"logits/chosen": -3.3310623168945312,
"logits/rejected": -3.341090202331543,
"loss": 0.7932,
"step": 82
},
{
"beta_dpo/beta": 0.1843191534280777,
"beta_dpo/beta_margin_grad_mean": -0.2833937108516693,
"beta_dpo/beta_margin_grad_std": 0.28331419825553894,
"beta_dpo/beta_margin_mean": 1.9241818189620972,
"beta_dpo/beta_margin_std": 3.19730806350708,
"beta_dpo/beta_used": 0.1843191534280777,
"beta_dpo/beta_used_raw": 0.1843191534280777,
"beta_dpo/gap_mean": 9.336427688598633,
"beta_dpo/gap_std": 13.179704666137695,
"beta_dpo/loss_margin_mean": 11.15524673461914,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1254724111866969,
"grad_norm": 238.143798828125,
"learning_rate": 4.992136939879856e-07,
"logits/chosen": -3.3477907180786133,
"logits/rejected": -3.3669016361236572,
"loss": 0.8472,
"step": 83
},
{
"beta_dpo/beta": 0.11267973482608795,
"beta_dpo/beta_margin_grad_mean": -0.3190261721611023,
"beta_dpo/beta_margin_grad_std": 0.23150216042995453,
"beta_dpo/beta_margin_mean": 1.1134321689605713,
"beta_dpo/beta_margin_std": 1.629770278930664,
"beta_dpo/beta_used": 0.11267973482608795,
"beta_dpo/beta_used_raw": 0.11267973482608795,
"beta_dpo/gap_mean": 9.562017440795898,
"beta_dpo/gap_std": 13.26020622253418,
"beta_dpo/loss_margin_mean": 9.725201606750488,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.12698412698412698,
"grad_norm": 113.03916931152344,
"learning_rate": 4.991054231460969e-07,
"logits/chosen": -3.329392671585083,
"logits/rejected": -3.3444466590881348,
"loss": 0.8401,
"step": 84
},
{
"beta_dpo/beta": 0.09879438579082489,
"beta_dpo/beta_margin_grad_mean": -0.383115291595459,
"beta_dpo/beta_margin_grad_std": 0.24746352434158325,
"beta_dpo/beta_margin_mean": 1.2028299570083618,
"beta_dpo/beta_margin_std": 2.4348294734954834,
"beta_dpo/beta_used": 0.09879438579082489,
"beta_dpo/beta_used_raw": 0.08103566616773605,
"beta_dpo/gap_mean": 9.678817749023438,
"beta_dpo/gap_std": 13.180517196655273,
"beta_dpo/loss_margin_mean": 10.931384086608887,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.12849584278155707,
"grad_norm": 111.39769744873047,
"learning_rate": 4.989901842900325e-07,
"logits/chosen": -3.321516990661621,
"logits/rejected": -3.3222968578338623,
"loss": 1.0688,
"step": 85
},
{
"beta_dpo/beta": 0.07136266678571701,
"beta_dpo/beta_margin_grad_mean": -0.3871707320213318,
"beta_dpo/beta_margin_grad_std": 0.23100006580352783,
"beta_dpo/beta_margin_mean": 0.9229219555854797,
"beta_dpo/beta_margin_std": 1.8082295656204224,
"beta_dpo/beta_used": 0.07136266678571701,
"beta_dpo/beta_used_raw": 0.015174761414527893,
"beta_dpo/gap_mean": 9.801012992858887,
"beta_dpo/gap_std": 13.513420104980469,
"beta_dpo/loss_margin_mean": 8.277644157409668,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.13000755857898716,
"grad_norm": 101.34297180175781,
"learning_rate": 4.988679806432711e-07,
"logits/chosen": -3.3395376205444336,
"logits/rejected": -3.3449158668518066,
"loss": 1.1039,
"step": 86
},
{
"beta_dpo/beta": 0.14920227229595184,
"beta_dpo/beta_margin_grad_mean": -0.2833729684352875,
"beta_dpo/beta_margin_grad_std": 0.2721627950668335,
"beta_dpo/beta_margin_mean": 1.798041820526123,
"beta_dpo/beta_margin_std": 2.643514633178711,
"beta_dpo/beta_used": 0.14920227229595184,
"beta_dpo/beta_used_raw": 0.14920227229595184,
"beta_dpo/gap_mean": 9.804052352905273,
"beta_dpo/gap_std": 13.875849723815918,
"beta_dpo/loss_margin_mean": 11.038612365722656,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.13151927437641722,
"grad_norm": 127.10075378417969,
"learning_rate": 4.987388156241114e-07,
"logits/chosen": -3.3276474475860596,
"logits/rejected": -3.3481569290161133,
"loss": 0.7641,
"step": 87
},
{
"beta_dpo/beta": 0.1592729240655899,
"beta_dpo/beta_margin_grad_mean": -0.33554723858833313,
"beta_dpo/beta_margin_grad_std": 0.23475810885429382,
"beta_dpo/beta_margin_mean": 2.3355441093444824,
"beta_dpo/beta_margin_std": 3.7071549892425537,
"beta_dpo/beta_used": 0.1592729240655899,
"beta_dpo/beta_used_raw": 0.10196053981781006,
"beta_dpo/gap_mean": 10.079328536987305,
"beta_dpo/gap_std": 14.221284866333008,
"beta_dpo/loss_margin_mean": 10.8495454788208,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1330309901738473,
"grad_norm": 137.05709838867188,
"learning_rate": 4.986026928455767e-07,
"logits/chosen": -3.3265223503112793,
"logits/rejected": -3.321218729019165,
"loss": 0.9096,
"step": 88
},
{
"beta_dpo/beta": 0.004432837013155222,
"beta_dpo/beta_margin_grad_mean": -0.4919796288013458,
"beta_dpo/beta_margin_grad_std": 0.023224812000989914,
"beta_dpo/beta_margin_mean": 0.03219681233167648,
"beta_dpo/beta_margin_std": 0.09327611327171326,
"beta_dpo/beta_used": 0.004432837013155222,
"beta_dpo/beta_used_raw": -0.05657649785280228,
"beta_dpo/gap_mean": 9.50676155090332,
"beta_dpo/gap_std": 14.471736907958984,
"beta_dpo/loss_margin_mean": 6.9620680809021,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1345427059712774,
"grad_norm": 8.242236137390137,
"learning_rate": 4.984596161153135e-07,
"logits/chosen": -3.310055732727051,
"logits/rejected": -3.339357852935791,
"loss": 1.3537,
"step": 89
},
{
"beta_dpo/beta": 0.20705099403858185,
"beta_dpo/beta_margin_grad_mean": -0.33812034130096436,
"beta_dpo/beta_margin_grad_std": 0.25459006428718567,
"beta_dpo/beta_margin_mean": 2.6444900035858154,
"beta_dpo/beta_margin_std": 5.163547992706299,
"beta_dpo/beta_used": 0.20705099403858185,
"beta_dpo/beta_used_raw": 0.14731192588806152,
"beta_dpo/gap_mean": 9.444209098815918,
"beta_dpo/gap_std": 14.568005561828613,
"beta_dpo/loss_margin_mean": 10.64816665649414,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1360544217687075,
"grad_norm": 119.21234130859375,
"learning_rate": 4.983095894354857e-07,
"logits/chosen": -3.306340217590332,
"logits/rejected": -3.3120946884155273,
"loss": 0.8974,
"step": 90
},
{
"beta_dpo/beta": 0.03152452036738396,
"beta_dpo/beta_margin_grad_mean": -0.4407292306423187,
"beta_dpo/beta_margin_grad_std": 0.15332013368606567,
"beta_dpo/beta_margin_mean": 0.2929452359676361,
"beta_dpo/beta_margin_std": 0.7780600190162659,
"beta_dpo/beta_used": 0.03152452036738396,
"beta_dpo/beta_used_raw": 0.009608536958694458,
"beta_dpo/gap_mean": 9.621158599853516,
"beta_dpo/gap_std": 14.920358657836914,
"beta_dpo/loss_margin_mean": 9.786998748779297,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.13756613756613756,
"grad_norm": 38.26823043823242,
"learning_rate": 4.98152617002662e-07,
"logits/chosen": -3.3391175270080566,
"logits/rejected": -3.3490490913391113,
"loss": 1.2081,
"step": 91
},
{
"beta_dpo/beta": 0.13908042013645172,
"beta_dpo/beta_margin_grad_mean": -0.35717082023620605,
"beta_dpo/beta_margin_grad_std": 0.24447351694107056,
"beta_dpo/beta_margin_mean": 1.8169987201690674,
"beta_dpo/beta_margin_std": 3.39219069480896,
"beta_dpo/beta_used": 0.13908042013645172,
"beta_dpo/beta_used_raw": 0.041757889091968536,
"beta_dpo/gap_mean": 9.578210830688477,
"beta_dpo/gap_std": 14.993568420410156,
"beta_dpo/loss_margin_mean": 10.055578231811523,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.13907785336356765,
"grad_norm": 186.3566436767578,
"learning_rate": 4.979887032076988e-07,
"logits/chosen": -3.2997212409973145,
"logits/rejected": -3.32633376121521,
"loss": 1.0076,
"step": 92
},
{
"beta_dpo/beta": 0.10875581204891205,
"beta_dpo/beta_margin_grad_mean": -0.35252755880355835,
"beta_dpo/beta_margin_grad_std": 0.2674812376499176,
"beta_dpo/beta_margin_mean": 0.9274733066558838,
"beta_dpo/beta_margin_std": 1.7559343576431274,
"beta_dpo/beta_used": 0.10875581204891205,
"beta_dpo/beta_used_raw": 0.10875581204891205,
"beta_dpo/gap_mean": 9.541094779968262,
"beta_dpo/gap_std": 15.093782424926758,
"beta_dpo/loss_margin_mean": 8.429204940795898,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.14058956916099774,
"grad_norm": 106.12804412841797,
"learning_rate": 4.978178526356172e-07,
"logits/chosen": -3.297229766845703,
"logits/rejected": -3.302245616912842,
"loss": 0.9099,
"step": 93
},
{
"beta_dpo/beta": 0.10703656077384949,
"beta_dpo/beta_margin_grad_mean": -0.3893989324569702,
"beta_dpo/beta_margin_grad_std": 0.2398282289505005,
"beta_dpo/beta_margin_mean": 0.9909035563468933,
"beta_dpo/beta_margin_std": 2.612060070037842,
"beta_dpo/beta_used": 0.10703656077384949,
"beta_dpo/beta_used_raw": 0.08424051105976105,
"beta_dpo/gap_mean": 9.722146987915039,
"beta_dpo/gap_std": 15.254247665405273,
"beta_dpo/loss_margin_mean": 10.538039207458496,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1421012849584278,
"grad_norm": 116.64041137695312,
"learning_rate": 4.976400700654751e-07,
"logits/chosen": -3.3025875091552734,
"logits/rejected": -3.3130154609680176,
"loss": 0.9871,
"step": 94
},
{
"beta_dpo/beta": 0.3490155339241028,
"beta_dpo/beta_margin_grad_mean": -0.2538135051727295,
"beta_dpo/beta_margin_grad_std": 0.3552146255970001,
"beta_dpo/beta_margin_mean": 4.290498733520508,
"beta_dpo/beta_margin_std": 6.96637487411499,
"beta_dpo/beta_used": 0.3490155339241028,
"beta_dpo/beta_used_raw": 0.3490155339241028,
"beta_dpo/gap_mean": 10.081417083740234,
"beta_dpo/gap_std": 15.954303741455078,
"beta_dpo/loss_margin_mean": 12.237871170043945,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1436130007558579,
"grad_norm": 406.6619567871094,
"learning_rate": 4.974553604702332e-07,
"logits/chosen": -3.3025193214416504,
"logits/rejected": -3.310507297515869,
"loss": 0.7284,
"step": 95
},
{
"beta_dpo/beta": 0.06385838240385056,
"beta_dpo/beta_margin_grad_mean": -0.36486878991127014,
"beta_dpo/beta_margin_grad_std": 0.2169518917798996,
"beta_dpo/beta_margin_mean": 0.9240705370903015,
"beta_dpo/beta_margin_std": 1.6645779609680176,
"beta_dpo/beta_used": 0.06385838240385056,
"beta_dpo/beta_used_raw": 0.06385838240385056,
"beta_dpo/gap_mean": 10.47520923614502,
"beta_dpo/gap_std": 16.70541763305664,
"beta_dpo/loss_margin_mean": 12.856772422790527,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.14512471655328799,
"grad_norm": 71.98176574707031,
"learning_rate": 4.972637290166157e-07,
"logits/chosen": -3.3070459365844727,
"logits/rejected": -3.322648048400879,
"loss": 1.0676,
"step": 96
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4981251657009125,
"beta_dpo/beta_margin_grad_std": 0.004490617197006941,
"beta_dpo/beta_margin_mean": 0.007500056177377701,
"beta_dpo/beta_margin_std": 0.017964085564017296,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.03594258427619934,
"beta_dpo/gap_mean": 10.320394515991211,
"beta_dpo/gap_std": 17.007476806640625,
"beta_dpo/loss_margin_mean": 7.50005578994751,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.14663643235071808,
"grad_norm": 1.7046000957489014,
"learning_rate": 4.970651810649666e-07,
"logits/chosen": -3.3001856803894043,
"logits/rejected": -3.3082635402679443,
"loss": 1.3783,
"step": 97
},
{
"beta_dpo/beta": 0.1992720514535904,
"beta_dpo/beta_margin_grad_mean": -0.37769633531570435,
"beta_dpo/beta_margin_grad_std": 0.28409111499786377,
"beta_dpo/beta_margin_mean": 2.664219856262207,
"beta_dpo/beta_margin_std": 5.922903060913086,
"beta_dpo/beta_used": 0.1992720514535904,
"beta_dpo/beta_used_raw": 0.15579071640968323,
"beta_dpo/gap_mean": 10.343599319458008,
"beta_dpo/gap_std": 17.06980323791504,
"beta_dpo/loss_margin_mean": 11.241314888000488,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.14814814814814814,
"grad_norm": 442.90679931640625,
"learning_rate": 4.968597221690985e-07,
"logits/chosen": -3.3171591758728027,
"logits/rejected": -3.3124804496765137,
"loss": 1.1151,
"step": 98
},
{
"beta_dpo/beta": 0.2299761325120926,
"beta_dpo/beta_margin_grad_mean": -0.30642494559288025,
"beta_dpo/beta_margin_grad_std": 0.2615741193294525,
"beta_dpo/beta_margin_mean": 3.1031556129455566,
"beta_dpo/beta_margin_std": 6.05756950378418,
"beta_dpo/beta_used": 0.2299761325120926,
"beta_dpo/beta_used_raw": 0.2299761325120926,
"beta_dpo/gap_mean": 10.645599365234375,
"beta_dpo/gap_std": 16.824813842773438,
"beta_dpo/loss_margin_mean": 12.99911117553711,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.14965986394557823,
"grad_norm": 276.3519287109375,
"learning_rate": 4.966473580761389e-07,
"logits/chosen": -3.28450870513916,
"logits/rejected": -3.29331636428833,
"loss": 1.0461,
"step": 99
},
{
"beta_dpo/beta": 0.13315755128860474,
"beta_dpo/beta_margin_grad_mean": -0.3134312033653259,
"beta_dpo/beta_margin_grad_std": 0.3042459487915039,
"beta_dpo/beta_margin_mean": 1.7308166027069092,
"beta_dpo/beta_margin_std": 2.553818702697754,
"beta_dpo/beta_used": 0.13315755128860474,
"beta_dpo/beta_used_raw": 0.13315755128860474,
"beta_dpo/gap_mean": 11.017045974731445,
"beta_dpo/gap_std": 17.152666091918945,
"beta_dpo/loss_margin_mean": 12.954180717468262,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.15117157974300832,
"grad_norm": 196.86199951171875,
"learning_rate": 4.964280947263676e-07,
"logits/chosen": -3.333110809326172,
"logits/rejected": -3.3278937339782715,
"loss": 0.9953,
"step": 100
},
{
"epoch": 0.15117157974300832,
"eval_beta_dpo/beta": 0.061328914016485214,
"eval_beta_dpo/beta_margin_grad_mean": -0.42010626196861267,
"eval_beta_dpo/beta_margin_grad_std": 0.12268673628568649,
"eval_beta_dpo/beta_margin_mean": 0.7724127173423767,
"eval_beta_dpo/beta_margin_std": 1.1312227249145508,
"eval_beta_dpo/beta_used": 0.061328914016485214,
"eval_beta_dpo/beta_used_raw": -0.026162950322031975,
"eval_beta_dpo/gap_mean": 11.231735229492188,
"eval_beta_dpo/gap_std": 17.3662052154541,
"eval_beta_dpo/loss_margin_mean": 9.129016876220703,
"eval_beta_dpo/mask_keep_frac": 1.0,
"eval_logits/chosen": -3.3065404891967773,
"eval_logits/rejected": -3.3154821395874023,
"eval_loss": 0.648465633392334,
"eval_runtime": 36.8664,
"eval_samples_per_second": 62.469,
"eval_steps_per_second": 1.953,
"step": 100
},
{
"beta_dpo/beta": 0.10702672600746155,
"beta_dpo/beta_margin_grad_mean": -0.33292368054389954,
"beta_dpo/beta_margin_grad_std": 0.25112685561180115,
"beta_dpo/beta_margin_mean": 1.6487940549850464,
"beta_dpo/beta_margin_std": 2.924790620803833,
"beta_dpo/beta_used": 0.10702672600746155,
"beta_dpo/beta_used_raw": 0.04923146218061447,
"beta_dpo/gap_mean": 11.476530075073242,
"beta_dpo/gap_std": 17.14791488647461,
"beta_dpo/loss_margin_mean": 13.512877464294434,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.15268329554043839,
"grad_norm": 109.97279357910156,
"learning_rate": 4.96201938253052e-07,
"logits/chosen": -3.2759666442871094,
"logits/rejected": -3.2825520038604736,
"loss": 1.1453,
"step": 101
},
{
"beta_dpo/beta": 0.3127828538417816,
"beta_dpo/beta_margin_grad_mean": -0.28607505559921265,
"beta_dpo/beta_margin_grad_std": 0.3804353177547455,
"beta_dpo/beta_margin_mean": 3.4580235481262207,
"beta_dpo/beta_margin_std": 7.335368633270264,
"beta_dpo/beta_used": 0.3127828538417816,
"beta_dpo/beta_used_raw": 0.3127828538417816,
"beta_dpo/gap_mean": 11.59719181060791,
"beta_dpo/gap_std": 17.798389434814453,
"beta_dpo/loss_margin_mean": 11.870264053344727,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.15419501133786848,
"grad_norm": 446.07623291015625,
"learning_rate": 4.959688949822748e-07,
"logits/chosen": -3.313218593597412,
"logits/rejected": -3.3073246479034424,
"loss": 1.0181,
"step": 102
},
{
"beta_dpo/beta": 0.10902046412229538,
"beta_dpo/beta_margin_grad_mean": -0.35592639446258545,
"beta_dpo/beta_margin_grad_std": 0.24294497072696686,
"beta_dpo/beta_margin_mean": 1.3516149520874023,
"beta_dpo/beta_margin_std": 2.5189883708953857,
"beta_dpo/beta_used": 0.10902046412229538,
"beta_dpo/beta_used_raw": 0.10902046412229538,
"beta_dpo/gap_mean": 11.740519523620605,
"beta_dpo/gap_std": 17.701553344726562,
"beta_dpo/loss_margin_mean": 11.213488578796387,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.15570672713529857,
"grad_norm": 145.86431884765625,
"learning_rate": 4.957289714327572e-07,
"logits/chosen": -3.2825355529785156,
"logits/rejected": -3.2872812747955322,
"loss": 0.9671,
"step": 103
},
{
"beta_dpo/beta": 0.053928766399621964,
"beta_dpo/beta_margin_grad_mean": -0.40135452151298523,
"beta_dpo/beta_margin_grad_std": 0.2233276218175888,
"beta_dpo/beta_margin_mean": 0.8575385212898254,
"beta_dpo/beta_margin_std": 1.8291181325912476,
"beta_dpo/beta_used": 0.053928766399621964,
"beta_dpo/beta_used_raw": 0.0007075034081935883,
"beta_dpo/gap_mean": 11.516962051391602,
"beta_dpo/gap_std": 17.790897369384766,
"beta_dpo/loss_margin_mean": 12.073646545410156,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.15721844293272866,
"grad_norm": 122.20460510253906,
"learning_rate": 4.954821743156767e-07,
"logits/chosen": -3.2585346698760986,
"logits/rejected": -3.2844367027282715,
"loss": 1.2087,
"step": 104
},
{
"beta_dpo/beta": 0.08077219873666763,
"beta_dpo/beta_margin_grad_mean": -0.37532395124435425,
"beta_dpo/beta_margin_grad_std": 0.25372788310050964,
"beta_dpo/beta_margin_mean": 1.0991127490997314,
"beta_dpo/beta_margin_std": 2.2974021434783936,
"beta_dpo/beta_used": 0.08077219873666763,
"beta_dpo/beta_used_raw": -0.011477030813694,
"beta_dpo/gap_mean": 11.434713363647461,
"beta_dpo/gap_std": 17.877893447875977,
"beta_dpo/loss_margin_mean": 10.53282356262207,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.15873015873015872,
"grad_norm": 186.44876098632812,
"learning_rate": 4.952285105344791e-07,
"logits/chosen": -3.2931785583496094,
"logits/rejected": -3.3190624713897705,
"loss": 1.2273,
"step": 105
},
{
"beta_dpo/beta": 0.12502261996269226,
"beta_dpo/beta_margin_grad_mean": -0.32863035798072815,
"beta_dpo/beta_margin_grad_std": 0.2691400945186615,
"beta_dpo/beta_margin_mean": 1.324576497077942,
"beta_dpo/beta_margin_std": 2.8297853469848633,
"beta_dpo/beta_used": 0.12502261996269226,
"beta_dpo/beta_used_raw": 0.12502261996269226,
"beta_dpo/gap_mean": 11.368999481201172,
"beta_dpo/gap_std": 18.08009147644043,
"beta_dpo/loss_margin_mean": 9.703131675720215,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1602418745275888,
"grad_norm": 195.53463745117188,
"learning_rate": 4.949679871846857e-07,
"logits/chosen": -3.2830629348754883,
"logits/rejected": -3.283719778060913,
"loss": 0.9661,
"step": 106
},
{
"beta_dpo/beta": 0.16479669511318207,
"beta_dpo/beta_margin_grad_mean": -0.4065779447555542,
"beta_dpo/beta_margin_grad_std": 0.3104124367237091,
"beta_dpo/beta_margin_mean": 1.5142831802368164,
"beta_dpo/beta_margin_std": 5.243839740753174,
"beta_dpo/beta_used": 0.16479669511318207,
"beta_dpo/beta_used_raw": 0.14887505769729614,
"beta_dpo/gap_mean": 11.298818588256836,
"beta_dpo/gap_std": 18.31937599182129,
"beta_dpo/loss_margin_mean": 11.326576232910156,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1617535903250189,
"grad_norm": 419.9315185546875,
"learning_rate": 4.947006115536947e-07,
"logits/chosen": -3.336994171142578,
"logits/rejected": -3.33107852935791,
"loss": 1.4917,
"step": 107
},
{
"beta_dpo/beta": 0.27067288756370544,
"beta_dpo/beta_margin_grad_mean": -0.2779853641986847,
"beta_dpo/beta_margin_grad_std": 0.36746031045913696,
"beta_dpo/beta_margin_mean": 3.8700578212738037,
"beta_dpo/beta_margin_std": 5.521655082702637,
"beta_dpo/beta_used": 0.27067288756370544,
"beta_dpo/beta_used_raw": 0.27067288756370544,
"beta_dpo/gap_mean": 11.675544738769531,
"beta_dpo/gap_std": 18.756027221679688,
"beta_dpo/loss_margin_mean": 14.086289405822754,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.16326530612244897,
"grad_norm": 398.2725830078125,
"learning_rate": 4.944263911205772e-07,
"logits/chosen": -3.319603681564331,
"logits/rejected": -3.329835891723633,
"loss": 1.0832,
"step": 108
},
{
"beta_dpo/beta": 0.13511931896209717,
"beta_dpo/beta_margin_grad_mean": -0.2998422682285309,
"beta_dpo/beta_margin_grad_std": 0.25949904322624207,
"beta_dpo/beta_margin_mean": 1.695755124092102,
"beta_dpo/beta_margin_std": 2.8789122104644775,
"beta_dpo/beta_used": 0.13511931896209717,
"beta_dpo/beta_used_raw": 0.13511931896209717,
"beta_dpo/gap_mean": 12.130621910095215,
"beta_dpo/gap_std": 18.512916564941406,
"beta_dpo/loss_margin_mean": 13.88644027709961,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.16477702191987906,
"grad_norm": 182.8151397705078,
"learning_rate": 4.941453335558681e-07,
"logits/chosen": -3.304962158203125,
"logits/rejected": -3.3313684463500977,
"loss": 0.8526,
"step": 109
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4973180592060089,
"beta_dpo/beta_margin_grad_std": 0.00407151784747839,
"beta_dpo/beta_margin_mean": 0.010728972032666206,
"beta_dpo/beta_margin_std": 0.01628948375582695,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.02168526127934456,
"beta_dpo/gap_mean": 11.985815048217773,
"beta_dpo/gap_std": 18.2330322265625,
"beta_dpo/loss_margin_mean": 10.728971481323242,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.16628873771730915,
"grad_norm": 1.6084506511688232,
"learning_rate": 4.938574467213517e-07,
"logits/chosen": -3.334700584411621,
"logits/rejected": -3.315509796142578,
"loss": 1.3764,
"step": 110
},
{
"beta_dpo/beta": 0.07999280840158463,
"beta_dpo/beta_margin_grad_mean": -0.32643070816993713,
"beta_dpo/beta_margin_grad_std": 0.22944535315036774,
"beta_dpo/beta_margin_mean": 1.0365407466888428,
"beta_dpo/beta_margin_std": 1.3925178050994873,
"beta_dpo/beta_used": 0.07999280840158463,
"beta_dpo/beta_used_raw": 0.07999280840158463,
"beta_dpo/gap_mean": 11.973346710205078,
"beta_dpo/gap_std": 17.88604736328125,
"beta_dpo/loss_margin_mean": 12.886275291442871,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.16780045351473924,
"grad_norm": 101.35095977783203,
"learning_rate": 4.935627386698418e-07,
"logits/chosen": -3.304298162460327,
"logits/rejected": -3.32550048828125,
"loss": 0.9563,
"step": 111
},
{
"beta_dpo/beta": 0.30072295665740967,
"beta_dpo/beta_margin_grad_mean": -0.2684202492237091,
"beta_dpo/beta_margin_grad_std": 0.343357115983963,
"beta_dpo/beta_margin_mean": 4.119093418121338,
"beta_dpo/beta_margin_std": 7.785393238067627,
"beta_dpo/beta_used": 0.30072295665740967,
"beta_dpo/beta_used_raw": 0.30072295665740967,
"beta_dpo/gap_mean": 12.52230167388916,
"beta_dpo/gap_std": 18.061965942382812,
"beta_dpo/loss_margin_mean": 14.822799682617188,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1693121693121693,
"grad_norm": 336.1982727050781,
"learning_rate": 4.932612176449559e-07,
"logits/chosen": -3.347121238708496,
"logits/rejected": -3.3621816635131836,
"loss": 0.7794,
"step": 112
},
{
"beta_dpo/beta": 0.09019893407821655,
"beta_dpo/beta_margin_grad_mean": -0.3130282759666443,
"beta_dpo/beta_margin_grad_std": 0.25264549255371094,
"beta_dpo/beta_margin_mean": 1.2460495233535767,
"beta_dpo/beta_margin_std": 1.8240975141525269,
"beta_dpo/beta_used": 0.09019893407821655,
"beta_dpo/beta_used_raw": 0.09019893407821655,
"beta_dpo/gap_mean": 12.521149635314941,
"beta_dpo/gap_std": 18.38436508178711,
"beta_dpo/loss_margin_mean": 12.998438835144043,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1708238851095994,
"grad_norm": 93.59181213378906,
"learning_rate": 4.929528920808854e-07,
"logits/chosen": -3.287510633468628,
"logits/rejected": -3.3015475273132324,
"loss": 0.8636,
"step": 113
},
{
"beta_dpo/beta": 0.031618744134902954,
"beta_dpo/beta_margin_grad_mean": -0.4364977180957794,
"beta_dpo/beta_margin_grad_std": 0.1669948250055313,
"beta_dpo/beta_margin_mean": 0.31983914971351624,
"beta_dpo/beta_margin_std": 0.8308923244476318,
"beta_dpo/beta_used": 0.031618744134902954,
"beta_dpo/beta_used_raw": 0.031618744134902954,
"beta_dpo/gap_mean": 12.35753345489502,
"beta_dpo/gap_std": 18.50242042541504,
"beta_dpo/loss_margin_mean": 10.953230857849121,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.17233560090702948,
"grad_norm": 62.89752197265625,
"learning_rate": 4.92637770602159e-07,
"logits/chosen": -3.289074659347534,
"logits/rejected": -3.304262161254883,
"loss": 1.1957,
"step": 114
},
{
"beta_dpo/beta": 0.06044984608888626,
"beta_dpo/beta_margin_grad_mean": -0.3418780565261841,
"beta_dpo/beta_margin_grad_std": 0.17799124121665955,
"beta_dpo/beta_margin_mean": 0.7988328337669373,
"beta_dpo/beta_margin_std": 0.9704313278198242,
"beta_dpo/beta_used": 0.06044984608888626,
"beta_dpo/beta_used_raw": 0.06044984608888626,
"beta_dpo/gap_mean": 12.469953536987305,
"beta_dpo/gap_std": 17.994335174560547,
"beta_dpo/loss_margin_mean": 13.105659484863281,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.17384731670445955,
"grad_norm": 57.57271194458008,
"learning_rate": 4.923158620234019e-07,
"logits/chosen": -3.319486618041992,
"logits/rejected": -3.3349642753601074,
"loss": 0.9164,
"step": 115
},
{
"beta_dpo/beta": 0.0968373566865921,
"beta_dpo/beta_margin_grad_mean": -0.38010072708129883,
"beta_dpo/beta_margin_grad_std": 0.2710416913032532,
"beta_dpo/beta_margin_mean": 1.3985378742218018,
"beta_dpo/beta_margin_std": 2.8638670444488525,
"beta_dpo/beta_used": 0.0968373566865921,
"beta_dpo/beta_used_raw": 0.09599150717258453,
"beta_dpo/gap_mean": 12.721582412719727,
"beta_dpo/gap_std": 17.726070404052734,
"beta_dpo/loss_margin_mean": 14.203131675720215,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.17535903250188964,
"grad_norm": 177.60740661621094,
"learning_rate": 4.91987175349089e-07,
"logits/chosen": -3.3052141666412354,
"logits/rejected": -3.3214163780212402,
"loss": 1.311,
"step": 116
},
{
"beta_dpo/beta": 0.132050022482872,
"beta_dpo/beta_margin_grad_mean": -0.32548680901527405,
"beta_dpo/beta_margin_grad_std": 0.23864923417568207,
"beta_dpo/beta_margin_mean": 2.548675060272217,
"beta_dpo/beta_margin_std": 3.935479164123535,
"beta_dpo/beta_used": 0.132050022482872,
"beta_dpo/beta_used_raw": 0.11093597859144211,
"beta_dpo/gap_mean": 13.294087409973145,
"beta_dpo/gap_std": 17.970184326171875,
"beta_dpo/loss_margin_mean": 14.694787979125977,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.17687074829931973,
"grad_norm": 204.1652069091797,
"learning_rate": 4.916517197732933e-07,
"logits/chosen": -3.3179454803466797,
"logits/rejected": -3.3263401985168457,
"loss": 0.9744,
"step": 117
},
{
"beta_dpo/beta": 0.05474819988012314,
"beta_dpo/beta_margin_grad_mean": -0.35628294944763184,
"beta_dpo/beta_margin_grad_std": 0.17293918132781982,
"beta_dpo/beta_margin_mean": 0.6831283569335938,
"beta_dpo/beta_margin_std": 0.8625761270523071,
"beta_dpo/beta_used": 0.05474819988012314,
"beta_dpo/beta_used_raw": 0.05474819988012314,
"beta_dpo/gap_mean": 12.97573471069336,
"beta_dpo/gap_std": 17.96988296508789,
"beta_dpo/loss_margin_mean": 12.617765426635742,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.17838246409674982,
"grad_norm": 64.94096374511719,
"learning_rate": 4.913095046794281e-07,
"logits/chosen": -3.3115124702453613,
"logits/rejected": -3.3236846923828125,
"loss": 0.9531,
"step": 118
},
{
"beta_dpo/beta": 0.02536691352725029,
"beta_dpo/beta_margin_grad_mean": -0.4406428039073944,
"beta_dpo/beta_margin_grad_std": 0.15547248721122742,
"beta_dpo/beta_margin_mean": 0.2915641963481903,
"beta_dpo/beta_margin_std": 0.7470220327377319,
"beta_dpo/beta_used": 0.02536691352725029,
"beta_dpo/beta_used_raw": -0.06982914358377457,
"beta_dpo/gap_mean": 12.281299591064453,
"beta_dpo/gap_std": 17.952869415283203,
"beta_dpo/loss_margin_mean": 8.689970970153809,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.17989417989417988,
"grad_norm": 43.38718795776367,
"learning_rate": 4.909605396399855e-07,
"logits/chosen": -3.361435890197754,
"logits/rejected": -3.3659839630126953,
"loss": 1.2301,
"step": 119
},
{
"beta_dpo/beta": 0.12747015058994293,
"beta_dpo/beta_margin_grad_mean": -0.3678387701511383,
"beta_dpo/beta_margin_grad_std": 0.2749040424823761,
"beta_dpo/beta_margin_mean": 2.0166592597961426,
"beta_dpo/beta_margin_std": 3.838064670562744,
"beta_dpo/beta_used": 0.12747015058994293,
"beta_dpo/beta_used_raw": 0.04408044368028641,
"beta_dpo/gap_mean": 12.654379844665527,
"beta_dpo/gap_std": 18.12213897705078,
"beta_dpo/loss_margin_mean": 14.718048095703125,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.18140589569160998,
"grad_norm": 216.78616333007812,
"learning_rate": 4.906048344162676e-07,
"logits/chosen": -3.328334331512451,
"logits/rejected": -3.343393325805664,
"loss": 1.1061,
"step": 120
},
{
"beta_dpo/beta": 0.04815954342484474,
"beta_dpo/beta_margin_grad_mean": -0.40159502625465393,
"beta_dpo/beta_margin_grad_std": 0.20640181005001068,
"beta_dpo/beta_margin_mean": 0.5983519554138184,
"beta_dpo/beta_margin_std": 1.435462474822998,
"beta_dpo/beta_used": 0.04815954342484474,
"beta_dpo/beta_used_raw": 0.04493497684597969,
"beta_dpo/gap_mean": 12.72224235534668,
"beta_dpo/gap_std": 18.192995071411133,
"beta_dpo/loss_margin_mean": 13.013574600219727,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.18291761148904007,
"grad_norm": 50.60379409790039,
"learning_rate": 4.902423989581143e-07,
"logits/chosen": -3.313652276992798,
"logits/rejected": -3.3476529121398926,
"loss": 1.1083,
"step": 121
},
{
"beta_dpo/beta": 0.17443186044692993,
"beta_dpo/beta_margin_grad_mean": -0.2969356179237366,
"beta_dpo/beta_margin_grad_std": 0.27412667870521545,
"beta_dpo/beta_margin_mean": 2.469672203063965,
"beta_dpo/beta_margin_std": 3.7644035816192627,
"beta_dpo/beta_used": 0.17443186044692993,
"beta_dpo/beta_used_raw": 0.17443186044692993,
"beta_dpo/gap_mean": 12.795064926147461,
"beta_dpo/gap_std": 17.958881378173828,
"beta_dpo/loss_margin_mean": 13.30368423461914,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.18442932728647016,
"grad_norm": 171.00291442871094,
"learning_rate": 4.898732434036243e-07,
"logits/chosen": -3.323568105697632,
"logits/rejected": -3.3388819694519043,
"loss": 0.7791,
"step": 122
},
{
"beta_dpo/beta": 0.15132403373718262,
"beta_dpo/beta_margin_grad_mean": -0.30665212869644165,
"beta_dpo/beta_margin_grad_std": 0.23419080674648285,
"beta_dpo/beta_margin_mean": 2.5235509872436523,
"beta_dpo/beta_margin_std": 3.878403663635254,
"beta_dpo/beta_used": 0.15132403373718262,
"beta_dpo/beta_used_raw": 0.15132403373718262,
"beta_dpo/gap_mean": 13.087821960449219,
"beta_dpo/gap_std": 17.78058624267578,
"beta_dpo/loss_margin_mean": 14.883736610412598,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.18594104308390022,
"grad_norm": 108.17544555664062,
"learning_rate": 4.894973780788722e-07,
"logits/chosen": -3.3349435329437256,
"logits/rejected": -3.3474483489990234,
"loss": 0.8074,
"step": 123
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4965851604938507,
"beta_dpo/beta_margin_grad_std": 0.004146179184317589,
"beta_dpo/beta_margin_mean": 0.013660573400557041,
"beta_dpo/beta_margin_std": 0.01658688299357891,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.09001453220844269,
"beta_dpo/gap_mean": 13.315803527832031,
"beta_dpo/gap_std": 17.493318557739258,
"beta_dpo/loss_margin_mean": 13.66057300567627,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1874527588813303,
"grad_norm": 1.6013367176055908,
"learning_rate": 4.89114813497619e-07,
"logits/chosen": -3.3313074111938477,
"logits/rejected": -3.355624198913574,
"loss": 1.3762,
"step": 124
},
{
"beta_dpo/beta": 0.05969487130641937,
"beta_dpo/beta_margin_grad_mean": -0.3555901050567627,
"beta_dpo/beta_margin_grad_std": 0.19629493355751038,
"beta_dpo/beta_margin_mean": 1.0195149183273315,
"beta_dpo/beta_margin_std": 1.603021502494812,
"beta_dpo/beta_used": 0.05969487130641937,
"beta_dpo/beta_used_raw": -0.020462922751903534,
"beta_dpo/gap_mean": 13.516692161560059,
"beta_dpo/gap_std": 17.451478958129883,
"beta_dpo/loss_margin_mean": 13.241378784179688,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1889644746787604,
"grad_norm": 64.26988983154297,
"learning_rate": 4.887255603610184e-07,
"logits/chosen": -3.3624677658081055,
"logits/rejected": -3.3855817317962646,
"loss": 0.9877,
"step": 125
},
{
"beta_dpo/beta": 0.004270387347787619,
"beta_dpo/beta_margin_grad_mean": -0.4860527813434601,
"beta_dpo/beta_margin_grad_std": 0.02659439854323864,
"beta_dpo/beta_margin_mean": 0.05607512220740318,
"beta_dpo/beta_margin_std": 0.10699854791164398,
"beta_dpo/beta_used": 0.004270387347787619,
"beta_dpo/beta_used_raw": -0.04096106067299843,
"beta_dpo/gap_mean": 12.902446746826172,
"beta_dpo/gap_std": 17.716262817382812,
"beta_dpo/loss_margin_mean": 11.313679695129395,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.19047619047619047,
"grad_norm": 8.320745468139648,
"learning_rate": 4.883296295573176e-07,
"logits/chosen": -3.352705478668213,
"logits/rejected": -3.3358869552612305,
"loss": 1.3413,
"step": 126
},
{
"beta_dpo/beta": 0.043115854263305664,
"beta_dpo/beta_margin_grad_mean": -0.3736512064933777,
"beta_dpo/beta_margin_grad_std": 0.1453510969877243,
"beta_dpo/beta_margin_mean": 0.5749568939208984,
"beta_dpo/beta_margin_std": 0.6788315176963806,
"beta_dpo/beta_used": 0.043115854263305664,
"beta_dpo/beta_used_raw": 0.043115854263305664,
"beta_dpo/gap_mean": 13.043050765991211,
"beta_dpo/gap_std": 17.539508819580078,
"beta_dpo/loss_margin_mean": 13.345462799072266,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.19198790627362056,
"grad_norm": 51.55445861816406,
"learning_rate": 4.87927032161552e-07,
"logits/chosen": -3.3343491554260254,
"logits/rejected": -3.329026460647583,
"loss": 1.0171,
"step": 127
},
{
"beta_dpo/beta": 0.14029397070407867,
"beta_dpo/beta_margin_grad_mean": -0.36535340547561646,
"beta_dpo/beta_margin_grad_std": 0.28294602036476135,
"beta_dpo/beta_margin_mean": 1.8758174180984497,
"beta_dpo/beta_margin_std": 3.9560065269470215,
"beta_dpo/beta_used": 0.14029397070407867,
"beta_dpo/beta_used_raw": 0.12094033509492874,
"beta_dpo/gap_mean": 13.112518310546875,
"beta_dpo/gap_std": 17.750701904296875,
"beta_dpo/loss_margin_mean": 13.866514205932617,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.19349962207105065,
"grad_norm": 200.18682861328125,
"learning_rate": 4.875177794352363e-07,
"logits/chosen": -3.3554821014404297,
"logits/rejected": -3.3803372383117676,
"loss": 0.883,
"step": 128
},
{
"beta_dpo/beta": 0.023648953065276146,
"beta_dpo/beta_margin_grad_mean": -0.4368211328983307,
"beta_dpo/beta_margin_grad_std": 0.1546202152967453,
"beta_dpo/beta_margin_mean": 0.31830909848213196,
"beta_dpo/beta_margin_std": 0.7734503149986267,
"beta_dpo/beta_used": 0.023648953065276146,
"beta_dpo/beta_used_raw": 0.008171427063643932,
"beta_dpo/gap_mean": 13.18086051940918,
"beta_dpo/gap_std": 18.537490844726562,
"beta_dpo/loss_margin_mean": 13.067851066589355,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.19501133786848074,
"grad_norm": 43.7829475402832,
"learning_rate": 4.871018828260491e-07,
"logits/chosen": -3.345930576324463,
"logits/rejected": -3.3373584747314453,
"loss": 1.22,
"step": 129
},
{
"beta_dpo/beta": 0.04935254156589508,
"beta_dpo/beta_margin_grad_mean": -0.3538309931755066,
"beta_dpo/beta_margin_grad_std": 0.20434898138046265,
"beta_dpo/beta_margin_mean": 0.751793622970581,
"beta_dpo/beta_margin_std": 1.1252886056900024,
"beta_dpo/beta_used": 0.04935254156589508,
"beta_dpo/beta_used_raw": 0.04935254156589508,
"beta_dpo/gap_mean": 13.687162399291992,
"beta_dpo/gap_std": 18.96971893310547,
"beta_dpo/loss_margin_mean": 16.04789161682129,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1965230536659108,
"grad_norm": 64.4195556640625,
"learning_rate": 4.866793539675126e-07,
"logits/chosen": -3.3322033882141113,
"logits/rejected": -3.3460071086883545,
"loss": 1.0315,
"step": 130
},
{
"beta_dpo/beta": 0.147065669298172,
"beta_dpo/beta_margin_grad_mean": -0.342940092086792,
"beta_dpo/beta_margin_grad_std": 0.2701457142829895,
"beta_dpo/beta_margin_mean": 2.4948854446411133,
"beta_dpo/beta_margin_std": 4.793673515319824,
"beta_dpo/beta_used": 0.147065669298172,
"beta_dpo/beta_used_raw": 0.13867664337158203,
"beta_dpo/gap_mean": 13.932304382324219,
"beta_dpo/gap_std": 19.49631118774414,
"beta_dpo/loss_margin_mean": 15.796794891357422,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1980347694633409,
"grad_norm": 142.7205810546875,
"learning_rate": 4.86250204678667e-07,
"logits/chosen": -3.324023962020874,
"logits/rejected": -3.3562068939208984,
"loss": 1.0579,
"step": 131
},
{
"beta_dpo/beta": 0.09857457131147385,
"beta_dpo/beta_margin_grad_mean": -0.3639555871486664,
"beta_dpo/beta_margin_grad_std": 0.2656742036342621,
"beta_dpo/beta_margin_mean": 1.3347876071929932,
"beta_dpo/beta_margin_std": 2.7419517040252686,
"beta_dpo/beta_used": 0.09857457131147385,
"beta_dpo/beta_used_raw": 0.0864531397819519,
"beta_dpo/gap_mean": 14.162351608276367,
"beta_dpo/gap_std": 19.121501922607422,
"beta_dpo/loss_margin_mean": 15.047807693481445,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.19954648526077098,
"grad_norm": 107.42191314697266,
"learning_rate": 4.858144469637408e-07,
"logits/chosen": -3.3415815830230713,
"logits/rejected": -3.3510897159576416,
"loss": 1.047,
"step": 132
},
{
"beta_dpo/beta": 0.0017336343880742788,
"beta_dpo/beta_margin_grad_mean": -0.4941368103027344,
"beta_dpo/beta_margin_grad_std": 0.01007362175732851,
"beta_dpo/beta_margin_mean": 0.023465832695364952,
"beta_dpo/beta_margin_std": 0.04031944274902344,
"beta_dpo/beta_used": 0.0017336343880742788,
"beta_dpo/beta_used_raw": -0.04028265178203583,
"beta_dpo/gap_mean": 14.131145477294922,
"beta_dpo/gap_std": 19.188018798828125,
"beta_dpo/loss_margin_mean": 12.276420593261719,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.20105820105820105,
"grad_norm": 2.9273688793182373,
"learning_rate": 4.853720930118138e-07,
"logits/chosen": -3.3473305702209473,
"logits/rejected": -3.345794677734375,
"loss": 1.3656,
"step": 133
},
{
"beta_dpo/beta": 0.2282017022371292,
"beta_dpo/beta_margin_grad_mean": -0.2386937141418457,
"beta_dpo/beta_margin_grad_std": 0.3078445494174957,
"beta_dpo/beta_margin_mean": 3.9094603061676025,
"beta_dpo/beta_margin_std": 5.4880571365356445,
"beta_dpo/beta_used": 0.2282017022371292,
"beta_dpo/beta_used_raw": 0.2282017022371292,
"beta_dpo/gap_mean": 14.498592376708984,
"beta_dpo/gap_std": 19.41600227355957,
"beta_dpo/loss_margin_mean": 17.716445922851562,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.20256991685563114,
"grad_norm": 223.05345153808594,
"learning_rate": 4.849231551964771e-07,
"logits/chosen": -3.3360257148742676,
"logits/rejected": -3.351398468017578,
"loss": 0.7892,
"step": 134
},
{
"beta_dpo/beta": 0.04041796550154686,
"beta_dpo/beta_margin_grad_mean": -0.4121745824813843,
"beta_dpo/beta_margin_grad_std": 0.182328462600708,
"beta_dpo/beta_margin_mean": 0.48748740553855896,
"beta_dpo/beta_margin_std": 1.0515720844268799,
"beta_dpo/beta_used": 0.04041796550154686,
"beta_dpo/beta_used_raw": 0.04041796550154686,
"beta_dpo/gap_mean": 14.310811996459961,
"beta_dpo/gap_std": 19.547698974609375,
"beta_dpo/loss_margin_mean": 12.49234390258789,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.20408163265306123,
"grad_norm": 61.08085632324219,
"learning_rate": 4.844676460754862e-07,
"logits/chosen": -3.327956199645996,
"logits/rejected": -3.3364243507385254,
"loss": 1.0703,
"step": 135
},
{
"beta_dpo/beta": 0.1498934030532837,
"beta_dpo/beta_margin_grad_mean": -0.31762951612472534,
"beta_dpo/beta_margin_grad_std": 0.29164251685142517,
"beta_dpo/beta_margin_mean": 2.570695400238037,
"beta_dpo/beta_margin_std": 4.504148960113525,
"beta_dpo/beta_used": 0.1498934030532837,
"beta_dpo/beta_used_raw": 0.1498934030532837,
"beta_dpo/gap_mean": 14.416141510009766,
"beta_dpo/gap_std": 19.936372756958008,
"beta_dpo/loss_margin_mean": 14.88786792755127,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.20559334845049132,
"grad_norm": 277.1800842285156,
"learning_rate": 4.840055783904106e-07,
"logits/chosen": -3.333883285522461,
"logits/rejected": -3.3552680015563965,
"loss": 1.1056,
"step": 136
},
{
"beta_dpo/beta": 0.08450040221214294,
"beta_dpo/beta_margin_grad_mean": -0.37931200861930847,
"beta_dpo/beta_margin_grad_std": 0.24967019259929657,
"beta_dpo/beta_margin_mean": 1.2663687467575073,
"beta_dpo/beta_margin_std": 2.509981155395508,
"beta_dpo/beta_used": 0.08450040221214294,
"beta_dpo/beta_used_raw": 0.06899924576282501,
"beta_dpo/gap_mean": 13.820323944091797,
"beta_dpo/gap_std": 20.457162857055664,
"beta_dpo/loss_margin_mean": 11.979299545288086,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.20710506424792138,
"grad_norm": 155.48312377929688,
"learning_rate": 4.835369650662767e-07,
"logits/chosen": -3.3146543502807617,
"logits/rejected": -3.327342987060547,
"loss": 1.1313,
"step": 137
},
{
"beta_dpo/beta": 0.09970663487911224,
"beta_dpo/beta_margin_grad_mean": -0.3133697807788849,
"beta_dpo/beta_margin_grad_std": 0.271743506193161,
"beta_dpo/beta_margin_mean": 1.1647833585739136,
"beta_dpo/beta_margin_std": 2.0706329345703125,
"beta_dpo/beta_used": 0.09970663487911224,
"beta_dpo/beta_used_raw": 0.09970663487911224,
"beta_dpo/gap_mean": 13.604002952575684,
"beta_dpo/gap_std": 20.282379150390625,
"beta_dpo/loss_margin_mean": 11.7897310256958,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.20861678004535147,
"grad_norm": 70.18770599365234,
"learning_rate": 4.830618192112065e-07,
"logits/chosen": -3.335930347442627,
"logits/rejected": -3.342681407928467,
"loss": 0.7101,
"step": 138
},
{
"beta_dpo/beta": 0.1375807821750641,
"beta_dpo/beta_margin_grad_mean": -0.35882195830345154,
"beta_dpo/beta_margin_grad_std": 0.26474788784980774,
"beta_dpo/beta_margin_mean": 2.2247705459594727,
"beta_dpo/beta_margin_std": 4.507172584533691,
"beta_dpo/beta_used": 0.1375807821750641,
"beta_dpo/beta_used_raw": 0.13499371707439423,
"beta_dpo/gap_mean": 13.861026763916016,
"beta_dpo/gap_std": 20.320680618286133,
"beta_dpo/loss_margin_mean": 16.075502395629883,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.21012849584278157,
"grad_norm": 128.69879150390625,
"learning_rate": 4.825801541160509e-07,
"logits/chosen": -3.340639114379883,
"logits/rejected": -3.3341877460479736,
"loss": 0.9392,
"step": 139
},
{
"beta_dpo/beta": 0.3182719051837921,
"beta_dpo/beta_margin_grad_mean": -0.2815694808959961,
"beta_dpo/beta_margin_grad_std": 0.30072757601737976,
"beta_dpo/beta_margin_mean": 6.113375186920166,
"beta_dpo/beta_margin_std": 11.143556594848633,
"beta_dpo/beta_used": 0.3182719051837921,
"beta_dpo/beta_used_raw": 0.3182719051837921,
"beta_dpo/gap_mean": 14.645448684692383,
"beta_dpo/gap_std": 20.552196502685547,
"beta_dpo/loss_margin_mean": 18.274456024169922,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.21164021164021163,
"grad_norm": 366.61572265625,
"learning_rate": 4.820919832540181e-07,
"logits/chosen": -3.338654041290283,
"logits/rejected": -3.350745439529419,
"loss": 0.942,
"step": 140
},
{
"beta_dpo/beta": 0.008213422261178493,
"beta_dpo/beta_margin_grad_mean": -0.4638515114784241,
"beta_dpo/beta_margin_grad_std": 0.06582221388816833,
"beta_dpo/beta_margin_mean": 0.15062931180000305,
"beta_dpo/beta_margin_std": 0.2794095277786255,
"beta_dpo/beta_used": 0.008213422261178493,
"beta_dpo/beta_used_raw": -0.015965929254889488,
"beta_dpo/gap_mean": 14.991787910461426,
"beta_dpo/gap_std": 20.643108367919922,
"beta_dpo/loss_margin_mean": 15.102987289428711,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.21315192743764172,
"grad_norm": 16.13588523864746,
"learning_rate": 4.815973202802966e-07,
"logits/chosen": -3.3420023918151855,
"logits/rejected": -3.356560707092285,
"loss": 1.2876,
"step": 141
},
{
"beta_dpo/beta": 0.014141488820314407,
"beta_dpo/beta_margin_grad_mean": -0.44556012749671936,
"beta_dpo/beta_margin_grad_std": 0.09043306857347488,
"beta_dpo/beta_margin_mean": 0.23377063870429993,
"beta_dpo/beta_margin_std": 0.3970645070075989,
"beta_dpo/beta_used": 0.014141488820314407,
"beta_dpo/beta_used_raw": -0.04336583614349365,
"beta_dpo/gap_mean": 14.939939498901367,
"beta_dpo/gap_std": 20.174400329589844,
"beta_dpo/loss_margin_mean": 15.348499298095703,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2146636432350718,
"grad_norm": 23.85763931274414,
"learning_rate": 4.810961790316729e-07,
"logits/chosen": -3.333073616027832,
"logits/rejected": -3.3334572315216064,
"loss": 1.228,
"step": 142
},
{
"beta_dpo/beta": 0.07208716869354248,
"beta_dpo/beta_margin_grad_mean": -0.39046409726142883,
"beta_dpo/beta_margin_grad_std": 0.2325820028781891,
"beta_dpo/beta_margin_mean": 0.7290058732032776,
"beta_dpo/beta_margin_std": 2.102476119995117,
"beta_dpo/beta_used": 0.07208716869354248,
"beta_dpo/beta_used_raw": 0.017211638391017914,
"beta_dpo/gap_mean": 14.151898384094238,
"beta_dpo/gap_std": 20.170135498046875,
"beta_dpo/loss_margin_mean": 9.85452938079834,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2161753590325019,
"grad_norm": 77.52479553222656,
"learning_rate": 4.805885735261454e-07,
"logits/chosen": -3.3221468925476074,
"logits/rejected": -3.3139724731445312,
"loss": 0.9909,
"step": 143
},
{
"beta_dpo/beta": 0.0595400407910347,
"beta_dpo/beta_margin_grad_mean": -0.3856506943702698,
"beta_dpo/beta_margin_grad_std": 0.22696533799171448,
"beta_dpo/beta_margin_mean": 0.6182453632354736,
"beta_dpo/beta_margin_std": 1.3108974695205688,
"beta_dpo/beta_used": 0.0595400407910347,
"beta_dpo/beta_used_raw": 0.0595400407910347,
"beta_dpo/gap_mean": 13.700199127197266,
"beta_dpo/gap_std": 20.346786499023438,
"beta_dpo/loss_margin_mean": 11.316890716552734,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.21768707482993196,
"grad_norm": 64.55220794677734,
"learning_rate": 4.800745179625307e-07,
"logits/chosen": -3.3223190307617188,
"logits/rejected": -3.324495792388916,
"loss": 0.9382,
"step": 144
},
{
"beta_dpo/beta": 0.14726224541664124,
"beta_dpo/beta_margin_grad_mean": -0.27921420335769653,
"beta_dpo/beta_margin_grad_std": 0.2893240749835968,
"beta_dpo/beta_margin_mean": 2.3826241493225098,
"beta_dpo/beta_margin_std": 3.6393394470214844,
"beta_dpo/beta_used": 0.14726224541664124,
"beta_dpo/beta_used_raw": 0.14726224541664124,
"beta_dpo/gap_mean": 13.917230606079102,
"beta_dpo/gap_std": 20.533353805541992,
"beta_dpo/loss_margin_mean": 16.579933166503906,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.21919879062736206,
"grad_norm": 189.30157470703125,
"learning_rate": 4.795540267200686e-07,
"logits/chosen": -3.346303939819336,
"logits/rejected": -3.3454699516296387,
"loss": 0.8055,
"step": 145
},
{
"beta_dpo/beta": 0.06543400138616562,
"beta_dpo/beta_margin_grad_mean": -0.3946565091609955,
"beta_dpo/beta_margin_grad_std": 0.23178449273109436,
"beta_dpo/beta_margin_mean": 0.9500102996826172,
"beta_dpo/beta_margin_std": 2.0022237300872803,
"beta_dpo/beta_used": 0.06543400138616562,
"beta_dpo/beta_used_raw": 0.05708005279302597,
"beta_dpo/gap_mean": 13.826836585998535,
"beta_dpo/gap_std": 20.62220001220703,
"beta_dpo/loss_margin_mean": 13.209084510803223,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.22071050642479215,
"grad_norm": 102.3393783569336,
"learning_rate": 4.790271143580173e-07,
"logits/chosen": -3.3137712478637695,
"logits/rejected": -3.308354139328003,
"loss": 1.1162,
"step": 146
},
{
"beta_dpo/beta": 0.09549230337142944,
"beta_dpo/beta_margin_grad_mean": -0.3872320353984833,
"beta_dpo/beta_margin_grad_std": 0.2581307291984558,
"beta_dpo/beta_margin_mean": 1.4723294973373413,
"beta_dpo/beta_margin_std": 3.005223035812378,
"beta_dpo/beta_used": 0.09549230337142944,
"beta_dpo/beta_used_raw": 0.07679538428783417,
"beta_dpo/gap_mean": 13.881373405456543,
"beta_dpo/gap_std": 20.608814239501953,
"beta_dpo/loss_margin_mean": 14.324885368347168,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2222222222222222,
"grad_norm": 163.24281311035156,
"learning_rate": 4.784937956152489e-07,
"logits/chosen": -3.3383469581604004,
"logits/rejected": -3.337419033050537,
"loss": 1.0803,
"step": 147
},
{
"beta_dpo/beta": 0.049887314438819885,
"beta_dpo/beta_margin_grad_mean": -0.3772048354148865,
"beta_dpo/beta_margin_grad_std": 0.20383627712726593,
"beta_dpo/beta_margin_mean": 1.0147607326507568,
"beta_dpo/beta_margin_std": 1.9053665399551392,
"beta_dpo/beta_used": 0.049887314438819885,
"beta_dpo/beta_used_raw": 0.009546924382448196,
"beta_dpo/gap_mean": 14.160110473632812,
"beta_dpo/gap_std": 20.884716033935547,
"beta_dpo/loss_margin_mean": 16.530052185058594,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2237339380196523,
"grad_norm": 62.660152435302734,
"learning_rate": 4.779540854098347e-07,
"logits/chosen": -3.3094582557678223,
"logits/rejected": -3.32807993888855,
"loss": 1.0848,
"step": 148
},
{
"beta_dpo/beta": 0.0466022863984108,
"beta_dpo/beta_margin_grad_mean": -0.41487088799476624,
"beta_dpo/beta_margin_grad_std": 0.23305167257785797,
"beta_dpo/beta_margin_mean": 0.6102204322814941,
"beta_dpo/beta_margin_std": 1.6688002347946167,
"beta_dpo/beta_used": 0.0466022863984108,
"beta_dpo/beta_used_raw": 0.010175202041864395,
"beta_dpo/gap_mean": 14.3313570022583,
"beta_dpo/gap_std": 21.182098388671875,
"beta_dpo/loss_margin_mean": 13.540119171142578,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2252456538170824,
"grad_norm": 96.15580749511719,
"learning_rate": 4.774079988386296e-07,
"logits/chosen": -3.2743802070617676,
"logits/rejected": -3.284924030303955,
"loss": 1.1845,
"step": 149
},
{
"beta_dpo/beta": 0.19884531199932098,
"beta_dpo/beta_margin_grad_mean": -0.21449099481105804,
"beta_dpo/beta_margin_grad_std": 0.30374783277511597,
"beta_dpo/beta_margin_mean": 4.370296001434326,
"beta_dpo/beta_margin_std": 5.271862506866455,
"beta_dpo/beta_used": 0.19884531199932098,
"beta_dpo/beta_used_raw": 0.19884531199932098,
"beta_dpo/gap_mean": 15.378077507019043,
"beta_dpo/gap_std": 21.256366729736328,
"beta_dpo/loss_margin_mean": 20.71977996826172,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.22675736961451248,
"grad_norm": 213.7008819580078,
"learning_rate": 4.768555511768486e-07,
"logits/chosen": -3.3375020027160645,
"logits/rejected": -3.343487024307251,
"loss": 0.8479,
"step": 150
},
{
"beta_dpo/beta": 0.2805306613445282,
"beta_dpo/beta_margin_grad_mean": -0.25795936584472656,
"beta_dpo/beta_margin_grad_std": 0.36935240030288696,
"beta_dpo/beta_margin_mean": 4.957238674163818,
"beta_dpo/beta_margin_std": 7.137806415557861,
"beta_dpo/beta_used": 0.2805306613445282,
"beta_dpo/beta_used_raw": 0.2805306613445282,
"beta_dpo/gap_mean": 15.795341491699219,
"beta_dpo/gap_std": 21.945510864257812,
"beta_dpo/loss_margin_mean": 17.629186630249023,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.22826908541194255,
"grad_norm": 554.007568359375,
"learning_rate": 4.762967578776406e-07,
"logits/chosen": -3.322493076324463,
"logits/rejected": -3.3346638679504395,
"loss": 0.8538,
"step": 151
},
{
"beta_dpo/beta": 0.17714881896972656,
"beta_dpo/beta_margin_grad_mean": -0.33929643034935,
"beta_dpo/beta_margin_grad_std": 0.2884059250354767,
"beta_dpo/beta_margin_mean": 3.272763729095459,
"beta_dpo/beta_margin_std": 6.407361030578613,
"beta_dpo/beta_used": 0.17714881896972656,
"beta_dpo/beta_used_raw": 0.0768561065196991,
"beta_dpo/gap_mean": 15.577496528625488,
"beta_dpo/gap_std": 22.255632400512695,
"beta_dpo/loss_margin_mean": 14.780200958251953,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.22978080120937264,
"grad_norm": 280.85235595703125,
"learning_rate": 4.757316345716553e-07,
"logits/chosen": -3.323756217956543,
"logits/rejected": -3.33445405960083,
"loss": 1.0671,
"step": 152
},
{
"beta_dpo/beta": 0.08415161818265915,
"beta_dpo/beta_margin_grad_mean": -0.3395489752292633,
"beta_dpo/beta_margin_grad_std": 0.22877615690231323,
"beta_dpo/beta_margin_mean": 1.5211905241012573,
"beta_dpo/beta_margin_std": 2.5890166759490967,
"beta_dpo/beta_used": 0.08415161818265915,
"beta_dpo/beta_used_raw": 0.05796004831790924,
"beta_dpo/gap_mean": 15.996269226074219,
"beta_dpo/gap_std": 21.825340270996094,
"beta_dpo/loss_margin_mean": 17.323328018188477,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.23129251700680273,
"grad_norm": 151.55897521972656,
"learning_rate": 4.751601970666064e-07,
"logits/chosen": -3.3437438011169434,
"logits/rejected": -3.3381764888763428,
"loss": 0.9072,
"step": 153
},
{
"beta_dpo/beta": 0.06097334995865822,
"beta_dpo/beta_margin_grad_mean": -0.39823541045188904,
"beta_dpo/beta_margin_grad_std": 0.2587531805038452,
"beta_dpo/beta_margin_mean": 0.762320339679718,
"beta_dpo/beta_margin_std": 2.397218704223633,
"beta_dpo/beta_used": 0.06097334995865822,
"beta_dpo/beta_used_raw": 0.0043108463287353516,
"beta_dpo/gap_mean": 15.49760627746582,
"beta_dpo/gap_std": 22.214107513427734,
"beta_dpo/loss_margin_mean": 12.32646369934082,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2328042328042328,
"grad_norm": 114.33980560302734,
"learning_rate": 4.745824613468292e-07,
"logits/chosen": -3.3356807231903076,
"logits/rejected": -3.3275413513183594,
"loss": 1.203,
"step": 154
},
{
"beta_dpo/beta": 0.20179255306720734,
"beta_dpo/beta_margin_grad_mean": -0.3471258282661438,
"beta_dpo/beta_margin_grad_std": 0.2985984981060028,
"beta_dpo/beta_margin_mean": 4.433549404144287,
"beta_dpo/beta_margin_std": 8.430581092834473,
"beta_dpo/beta_used": 0.20179255306720734,
"beta_dpo/beta_used_raw": 0.18239660561084747,
"beta_dpo/gap_mean": 15.837427139282227,
"beta_dpo/gap_std": 22.66343879699707,
"beta_dpo/loss_margin_mean": 17.480371475219727,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.23431594860166288,
"grad_norm": 381.34991455078125,
"learning_rate": 4.7399844357283393e-07,
"logits/chosen": -3.342803478240967,
"logits/rejected": -3.3430895805358887,
"loss": 1.2675,
"step": 155
},
{
"beta_dpo/beta": 0.017000947147607803,
"beta_dpo/beta_margin_grad_mean": -0.4466518759727478,
"beta_dpo/beta_margin_grad_std": 0.10175792872905731,
"beta_dpo/beta_margin_mean": 0.2248079627752304,
"beta_dpo/beta_margin_std": 0.44073355197906494,
"beta_dpo/beta_used": 0.017000947147607803,
"beta_dpo/beta_used_raw": 0.017000947147607803,
"beta_dpo/gap_mean": 15.509801864624023,
"beta_dpo/gap_std": 22.530162811279297,
"beta_dpo/loss_margin_mean": 14.006168365478516,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.23582766439909297,
"grad_norm": 27.029804229736328,
"learning_rate": 4.7340816008085305e-07,
"logits/chosen": -3.3521556854248047,
"logits/rejected": -3.353640079498291,
"loss": 1.1947,
"step": 156
},
{
"beta_dpo/beta": 0.1710837334394455,
"beta_dpo/beta_margin_grad_mean": -0.3583267629146576,
"beta_dpo/beta_margin_grad_std": 0.2934838533401489,
"beta_dpo/beta_margin_mean": 3.050497531890869,
"beta_dpo/beta_margin_std": 5.634936332702637,
"beta_dpo/beta_used": 0.1710837334394455,
"beta_dpo/beta_used_raw": 0.07584992796182632,
"beta_dpo/gap_mean": 15.374656677246094,
"beta_dpo/gap_std": 22.11768341064453,
"beta_dpo/loss_margin_mean": 14.452281951904297,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.23733938019652306,
"grad_norm": 171.72964477539062,
"learning_rate": 4.728116273823847e-07,
"logits/chosen": -3.3176865577697754,
"logits/rejected": -3.3212122917175293,
"loss": 0.9895,
"step": 157
},
{
"beta_dpo/beta": 0.16476041078567505,
"beta_dpo/beta_margin_grad_mean": -0.2862817645072937,
"beta_dpo/beta_margin_grad_std": 0.3239341378211975,
"beta_dpo/beta_margin_mean": 2.1851229667663574,
"beta_dpo/beta_margin_std": 3.4346442222595215,
"beta_dpo/beta_used": 0.16476041078567505,
"beta_dpo/beta_used_raw": 0.16476041078567505,
"beta_dpo/gap_mean": 14.694307327270508,
"beta_dpo/gap_std": 21.618305206298828,
"beta_dpo/loss_margin_mean": 12.814839363098145,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.23885109599395313,
"grad_norm": 192.72439575195312,
"learning_rate": 4.7220886216373085e-07,
"logits/chosen": -3.324219226837158,
"logits/rejected": -3.3319594860076904,
"loss": 0.8983,
"step": 158
},
{
"beta_dpo/beta": 0.007234493736177683,
"beta_dpo/beta_margin_grad_mean": -0.47092530131340027,
"beta_dpo/beta_margin_grad_std": 0.04833231866359711,
"beta_dpo/beta_margin_mean": 0.11883436888456345,
"beta_dpo/beta_margin_std": 0.20030085742473602,
"beta_dpo/beta_used": 0.007234493736177683,
"beta_dpo/beta_used_raw": -0.00955403782427311,
"beta_dpo/gap_mean": 14.409493446350098,
"beta_dpo/gap_std": 21.04364013671875,
"beta_dpo/loss_margin_mean": 13.555635452270508,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.24036281179138322,
"grad_norm": 13.922643661499023,
"learning_rate": 4.715998812855304e-07,
"logits/chosen": -3.317610263824463,
"logits/rejected": -3.3311636447906494,
"loss": 1.2992,
"step": 159
},
{
"beta_dpo/beta": 0.10042039304971695,
"beta_dpo/beta_margin_grad_mean": -0.37785497307777405,
"beta_dpo/beta_margin_grad_std": 0.2555430829524994,
"beta_dpo/beta_margin_mean": 1.435318946838379,
"beta_dpo/beta_margin_std": 3.2165510654449463,
"beta_dpo/beta_used": 0.10042039304971695,
"beta_dpo/beta_used_raw": -0.09143070876598358,
"beta_dpo/gap_mean": 14.128339767456055,
"beta_dpo/gap_std": 20.657943725585938,
"beta_dpo/loss_margin_mean": 12.320213317871094,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2418745275888133,
"grad_norm": 115.22747802734375,
"learning_rate": 4.7098470178228755e-07,
"logits/chosen": -3.2877392768859863,
"logits/rejected": -3.3001627922058105,
"loss": 1.016,
"step": 160
},
{
"beta_dpo/beta": 0.058217551559209824,
"beta_dpo/beta_margin_grad_mean": -0.4167746901512146,
"beta_dpo/beta_margin_grad_std": 0.2455618679523468,
"beta_dpo/beta_margin_mean": 0.7758399844169617,
"beta_dpo/beta_margin_std": 2.212191104888916,
"beta_dpo/beta_used": 0.058217551559209824,
"beta_dpo/beta_used_raw": 0.01114998385310173,
"beta_dpo/gap_mean": 13.952075958251953,
"beta_dpo/gap_std": 20.97524642944336,
"beta_dpo/loss_margin_mean": 12.638019561767578,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.24338624338624337,
"grad_norm": 87.436279296875,
"learning_rate": 4.703633408618955e-07,
"logits/chosen": -3.3042478561401367,
"logits/rejected": -3.316617965698242,
"loss": 1.1296,
"step": 161
},
{
"beta_dpo/beta": 0.22748082876205444,
"beta_dpo/beta_margin_grad_mean": -0.20840942859649658,
"beta_dpo/beta_margin_grad_std": 0.28813979029655457,
"beta_dpo/beta_margin_mean": 4.266569137573242,
"beta_dpo/beta_margin_std": 5.0452094078063965,
"beta_dpo/beta_used": 0.22748082876205444,
"beta_dpo/beta_used_raw": 0.22748082876205444,
"beta_dpo/gap_mean": 14.301603317260742,
"beta_dpo/gap_std": 20.577198028564453,
"beta_dpo/loss_margin_mean": 17.888410568237305,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.24489795918367346,
"grad_norm": 234.487060546875,
"learning_rate": 4.697358159051549e-07,
"logits/chosen": -3.3278586864471436,
"logits/rejected": -3.3324155807495117,
"loss": 0.8148,
"step": 162
},
{
"beta_dpo/beta": 0.1334669440984726,
"beta_dpo/beta_margin_grad_mean": -0.32191550731658936,
"beta_dpo/beta_margin_grad_std": 0.23917384445667267,
"beta_dpo/beta_margin_mean": 2.9123542308807373,
"beta_dpo/beta_margin_std": 4.524738788604736,
"beta_dpo/beta_used": 0.1334669440984726,
"beta_dpo/beta_used_raw": 0.0748547613620758,
"beta_dpo/gap_mean": 14.945584297180176,
"beta_dpo/gap_std": 20.219942092895508,
"beta_dpo/loss_margin_mean": 17.941997528076172,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.24640967498110355,
"grad_norm": 129.17434692382812,
"learning_rate": 4.691021444652876e-07,
"logits/chosen": -3.31915020942688,
"logits/rejected": -3.3229875564575195,
"loss": 0.9564,
"step": 163
},
{
"beta_dpo/beta": 0.09072095900774002,
"beta_dpo/beta_margin_grad_mean": -0.32996121048927307,
"beta_dpo/beta_margin_grad_std": 0.2191038280725479,
"beta_dpo/beta_margin_mean": 1.4329532384872437,
"beta_dpo/beta_margin_std": 2.1871438026428223,
"beta_dpo/beta_used": 0.09072095900774002,
"beta_dpo/beta_used_raw": 0.09072095900774002,
"beta_dpo/gap_mean": 15.211969375610352,
"beta_dpo/gap_std": 20.254531860351562,
"beta_dpo/loss_margin_mean": 14.932231903076172,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.24792139077853365,
"grad_norm": 70.2077865600586,
"learning_rate": 4.6846234426744624e-07,
"logits/chosen": -3.3396153450012207,
"logits/rejected": -3.347288131713867,
"loss": 0.8678,
"step": 164
},
{
"beta_dpo/beta": 0.12051883339881897,
"beta_dpo/beta_margin_grad_mean": -0.2684549391269684,
"beta_dpo/beta_margin_grad_std": 0.24503667652606964,
"beta_dpo/beta_margin_mean": 1.8624266386032104,
"beta_dpo/beta_margin_std": 2.4064364433288574,
"beta_dpo/beta_used": 0.12051883339881897,
"beta_dpo/beta_used_raw": 0.12051883339881897,
"beta_dpo/gap_mean": 15.14497184753418,
"beta_dpo/gap_std": 19.699504852294922,
"beta_dpo/loss_margin_mean": 14.89899730682373,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2494331065759637,
"grad_norm": 89.30416107177734,
"learning_rate": 4.678164332082175e-07,
"logits/chosen": -3.311272144317627,
"logits/rejected": -3.313823938369751,
"loss": 0.6259,
"step": 165
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49724671244621277,
"beta_dpo/beta_margin_grad_std": 0.0040176804177463055,
"beta_dpo/beta_margin_mean": 0.011014166288077831,
"beta_dpo/beta_margin_std": 0.016072683036327362,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.1693364530801773,
"beta_dpo/gap_mean": 14.756368637084961,
"beta_dpo/gap_std": 19.05362319946289,
"beta_dpo/loss_margin_mean": 11.014165878295898,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2509448223733938,
"grad_norm": 1.6793140172958374,
"learning_rate": 4.6716442935512214e-07,
"logits/chosen": -3.310637950897217,
"logits/rejected": -3.3459417819976807,
"loss": 1.3761,
"step": 166
},
{
"beta_dpo/beta": 0.06182756647467613,
"beta_dpo/beta_margin_grad_mean": -0.3825829327106476,
"beta_dpo/beta_margin_grad_std": 0.2320934534072876,
"beta_dpo/beta_margin_mean": 0.8981536626815796,
"beta_dpo/beta_margin_std": 1.8215402364730835,
"beta_dpo/beta_used": 0.06182756647467613,
"beta_dpo/beta_used_raw": 0.020578034222126007,
"beta_dpo/gap_mean": 14.261093139648438,
"beta_dpo/gap_std": 18.644453048706055,
"beta_dpo/loss_margin_mean": 13.281981468200684,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.25245653817082386,
"grad_norm": 78.32828521728516,
"learning_rate": 4.6650635094610966e-07,
"logits/chosen": -3.3038928508758545,
"logits/rejected": -3.306029796600342,
"loss": 1.0166,
"step": 167
},
{
"beta_dpo/beta": 0.027534862980246544,
"beta_dpo/beta_margin_grad_mean": -0.4381820559501648,
"beta_dpo/beta_margin_grad_std": 0.15032429993152618,
"beta_dpo/beta_margin_mean": 0.2957206666469574,
"beta_dpo/beta_margin_std": 0.7526847124099731,
"beta_dpo/beta_used": 0.027534862980246544,
"beta_dpo/beta_used_raw": 0.027534862980246544,
"beta_dpo/gap_mean": 13.721107482910156,
"beta_dpo/gap_std": 18.38541603088379,
"beta_dpo/loss_margin_mean": 11.09146499633789,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.25396825396825395,
"grad_norm": 39.39701461791992,
"learning_rate": 4.6584221638904767e-07,
"logits/chosen": -3.3236608505249023,
"logits/rejected": -3.339106559753418,
"loss": 1.146,
"step": 168
},
{
"beta_dpo/beta": 0.012150160036981106,
"beta_dpo/beta_margin_grad_mean": -0.4621245861053467,
"beta_dpo/beta_margin_grad_std": 0.07575680315494537,
"beta_dpo/beta_margin_mean": 0.15889044106006622,
"beta_dpo/beta_margin_std": 0.32937243580818176,
"beta_dpo/beta_used": 0.012150160036981106,
"beta_dpo/beta_used_raw": -0.017097095027565956,
"beta_dpo/gap_mean": 13.431455612182617,
"beta_dpo/gap_std": 18.825054168701172,
"beta_dpo/loss_margin_mean": 12.781668663024902,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.25547996976568405,
"grad_norm": 19.16012191772461,
"learning_rate": 4.651720442612075e-07,
"logits/chosen": -3.2991950511932373,
"logits/rejected": -3.3047733306884766,
"loss": 1.2584,
"step": 169
},
{
"beta_dpo/beta": 0.2144870012998581,
"beta_dpo/beta_margin_grad_mean": -0.20379450917243958,
"beta_dpo/beta_margin_grad_std": 0.27501732110977173,
"beta_dpo/beta_margin_mean": 3.4251480102539062,
"beta_dpo/beta_margin_std": 4.072988033294678,
"beta_dpo/beta_used": 0.2144870012998581,
"beta_dpo/beta_used_raw": 0.2144870012998581,
"beta_dpo/gap_mean": 13.796789169311523,
"beta_dpo/gap_std": 18.566261291503906,
"beta_dpo/loss_margin_mean": 15.981682777404785,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.25699168556311414,
"grad_norm": 142.732666015625,
"learning_rate": 4.6449585330874425e-07,
"logits/chosen": -3.3022265434265137,
"logits/rejected": -3.3024675846099854,
"loss": 0.5073,
"step": 170
},
{
"beta_dpo/beta": 0.07076213508844376,
"beta_dpo/beta_margin_grad_mean": -0.34848496317863464,
"beta_dpo/beta_margin_grad_std": 0.24325984716415405,
"beta_dpo/beta_margin_mean": 1.2304704189300537,
"beta_dpo/beta_margin_std": 2.1269867420196533,
"beta_dpo/beta_used": 0.07076213508844376,
"beta_dpo/beta_used_raw": 0.03332207724452019,
"beta_dpo/gap_mean": 14.206443786621094,
"beta_dpo/gap_std": 18.874755859375,
"beta_dpo/loss_margin_mean": 15.383790016174316,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2585034013605442,
"grad_norm": 105.55207061767578,
"learning_rate": 4.6381366244617224e-07,
"logits/chosen": -3.3019609451293945,
"logits/rejected": -3.3179931640625,
"loss": 1.109,
"step": 171
},
{
"beta_dpo/beta": 0.0536719411611557,
"beta_dpo/beta_margin_grad_mean": -0.33865830302238464,
"beta_dpo/beta_margin_grad_std": 0.2031082957983017,
"beta_dpo/beta_margin_mean": 0.841856062412262,
"beta_dpo/beta_margin_std": 1.1628142595291138,
"beta_dpo/beta_used": 0.0536719411611557,
"beta_dpo/beta_used_raw": 0.0536719411611557,
"beta_dpo/gap_mean": 14.301804542541504,
"beta_dpo/gap_std": 19.452861785888672,
"beta_dpo/loss_margin_mean": 16.217395782470703,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2600151171579743,
"grad_norm": 58.22761154174805,
"learning_rate": 4.631254907558365e-07,
"logits/chosen": -3.3333513736724854,
"logits/rejected": -3.3494720458984375,
"loss": 0.9375,
"step": 172
},
{
"beta_dpo/beta": 0.21331170201301575,
"beta_dpo/beta_margin_grad_mean": -0.3400728702545166,
"beta_dpo/beta_margin_grad_std": 0.2709886133670807,
"beta_dpo/beta_margin_mean": 4.779065132141113,
"beta_dpo/beta_margin_std": 7.826768398284912,
"beta_dpo/beta_used": 0.21331170201301575,
"beta_dpo/beta_used_raw": 0.18569156527519226,
"beta_dpo/gap_mean": 15.125507354736328,
"beta_dpo/gap_std": 20.31143569946289,
"beta_dpo/loss_margin_mean": 16.441253662109375,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2615268329554044,
"grad_norm": 292.74566650390625,
"learning_rate": 4.624313574873786e-07,
"logits/chosen": -3.304172992706299,
"logits/rejected": -3.322803020477295,
"loss": 1.0188,
"step": 173
},
{
"beta_dpo/beta": 0.0062421588227152824,
"beta_dpo/beta_margin_grad_mean": -0.47547459602355957,
"beta_dpo/beta_margin_grad_std": 0.03574493154883385,
"beta_dpo/beta_margin_mean": 0.09859683364629745,
"beta_dpo/beta_margin_std": 0.1440763771533966,
"beta_dpo/beta_used": 0.0062421588227152824,
"beta_dpo/beta_used_raw": 0.0062421588227152824,
"beta_dpo/gap_mean": 14.888914108276367,
"beta_dpo/gap_std": 21.216093063354492,
"beta_dpo/loss_margin_mean": 15.613706588745117,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.26303854875283444,
"grad_norm": 10.061347007751465,
"learning_rate": 4.61731282057198e-07,
"logits/chosen": -3.2965517044067383,
"logits/rejected": -3.316274404525757,
"loss": 1.3077,
"step": 174
},
{
"beta_dpo/beta": 0.2785711884498596,
"beta_dpo/beta_margin_grad_mean": -0.22018657624721527,
"beta_dpo/beta_margin_grad_std": 0.3459617793560028,
"beta_dpo/beta_margin_mean": 5.1916093826293945,
"beta_dpo/beta_margin_std": 6.421470642089844,
"beta_dpo/beta_used": 0.2785711884498596,
"beta_dpo/beta_used_raw": 0.2785711884498596,
"beta_dpo/gap_mean": 15.445871353149414,
"beta_dpo/gap_std": 21.41485595703125,
"beta_dpo/loss_margin_mean": 18.914648056030273,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.26455026455026454,
"grad_norm": 280.62322998046875,
"learning_rate": 4.6102528404790965e-07,
"logits/chosen": -3.3232455253601074,
"logits/rejected": -3.315721035003662,
"loss": 0.7615,
"step": 175
},
{
"beta_dpo/beta": 0.09155848622322083,
"beta_dpo/beta_margin_grad_mean": -0.35378387570381165,
"beta_dpo/beta_margin_grad_std": 0.24764010310173035,
"beta_dpo/beta_margin_mean": 1.6799997091293335,
"beta_dpo/beta_margin_std": 3.233999252319336,
"beta_dpo/beta_used": 0.09155848622322083,
"beta_dpo/beta_used_raw": -0.0362052246928215,
"beta_dpo/gap_mean": 15.972650527954102,
"beta_dpo/gap_std": 21.989521026611328,
"beta_dpo/loss_margin_mean": 16.272171020507812,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2660619803476946,
"grad_norm": 125.4261474609375,
"learning_rate": 4.603133832077953e-07,
"logits/chosen": -3.324751615524292,
"logits/rejected": -3.3269219398498535,
"loss": 1.0249,
"step": 176
},
{
"beta_dpo/beta": 0.18628999590873718,
"beta_dpo/beta_margin_grad_mean": -0.169387549161911,
"beta_dpo/beta_margin_grad_std": 0.2704985439777374,
"beta_dpo/beta_margin_mean": 4.050033092498779,
"beta_dpo/beta_margin_std": 4.005526542663574,
"beta_dpo/beta_used": 0.18628999590873718,
"beta_dpo/beta_used_raw": 0.18628999590873718,
"beta_dpo/gap_mean": 16.732494354248047,
"beta_dpo/gap_std": 21.966861724853516,
"beta_dpo/loss_margin_mean": 21.59198570251465,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2675736961451247,
"grad_norm": 186.697265625,
"learning_rate": 4.5959559945025183e-07,
"logits/chosen": -3.311750888824463,
"logits/rejected": -3.3363020420074463,
"loss": 0.6518,
"step": 177
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49686744809150696,
"beta_dpo/beta_margin_grad_std": 0.005036898888647556,
"beta_dpo/beta_margin_mean": 0.012532144784927368,
"beta_dpo/beta_margin_std": 0.02015141397714615,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.22806313633918762,
"beta_dpo/gap_mean": 16.291656494140625,
"beta_dpo/gap_std": 21.730037689208984,
"beta_dpo/loss_margin_mean": 12.532143592834473,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2690854119425548,
"grad_norm": 1.6683392524719238,
"learning_rate": 4.588719528532341e-07,
"logits/chosen": -3.304107666015625,
"logits/rejected": -3.312753200531006,
"loss": 1.3756,
"step": 178
},
{
"beta_dpo/beta": 0.08604112267494202,
"beta_dpo/beta_margin_grad_mean": -0.36135733127593994,
"beta_dpo/beta_margin_grad_std": 0.25707748532295227,
"beta_dpo/beta_margin_mean": 1.8054059743881226,
"beta_dpo/beta_margin_std": 3.473832845687866,
"beta_dpo/beta_used": 0.08604112267494202,
"beta_dpo/beta_used_raw": -0.0028993189334869385,
"beta_dpo/gap_mean": 16.365154266357422,
"beta_dpo/gap_std": 21.843692779541016,
"beta_dpo/loss_margin_mean": 18.55828094482422,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2705971277399849,
"grad_norm": 118.9190902709961,
"learning_rate": 4.581424636586928e-07,
"logits/chosen": -3.2978873252868652,
"logits/rejected": -3.296720266342163,
"loss": 1.0073,
"step": 179
},
{
"beta_dpo/beta": 0.025969501584768295,
"beta_dpo/beta_margin_grad_mean": -0.4097326695919037,
"beta_dpo/beta_margin_grad_std": 0.15282127261161804,
"beta_dpo/beta_margin_mean": 0.4748833477497101,
"beta_dpo/beta_margin_std": 0.8532183170318604,
"beta_dpo/beta_used": 0.025969501584768295,
"beta_dpo/beta_used_raw": -0.14239290356636047,
"beta_dpo/gap_mean": 16.275737762451172,
"beta_dpo/gap_std": 22.001022338867188,
"beta_dpo/loss_margin_mean": 15.27760124206543,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.272108843537415,
"grad_norm": 30.002450942993164,
"learning_rate": 4.5740715227200897e-07,
"logits/chosen": -3.3086326122283936,
"logits/rejected": -3.3033132553100586,
"loss": 1.1487,
"step": 180
},
{
"beta_dpo/beta": 0.14988866448402405,
"beta_dpo/beta_margin_grad_mean": -0.29601407051086426,
"beta_dpo/beta_margin_grad_std": 0.2319822609424591,
"beta_dpo/beta_margin_mean": 3.5474066734313965,
"beta_dpo/beta_margin_std": 5.365967750549316,
"beta_dpo/beta_used": 0.14988866448402405,
"beta_dpo/beta_used_raw": 0.10269590467214584,
"beta_dpo/gap_mean": 16.893951416015625,
"beta_dpo/gap_std": 21.91824722290039,
"beta_dpo/loss_margin_mean": 20.77993392944336,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.273620559334845,
"grad_norm": 72.06979370117188,
"learning_rate": 4.566660392614228e-07,
"logits/chosen": -3.303773880004883,
"logits/rejected": -3.3182597160339355,
"loss": 0.8003,
"step": 181
},
{
"beta_dpo/beta": 0.16635611653327942,
"beta_dpo/beta_margin_grad_mean": -0.3139011263847351,
"beta_dpo/beta_margin_grad_std": 0.28551292419433594,
"beta_dpo/beta_margin_mean": 3.821103811264038,
"beta_dpo/beta_margin_std": 7.0303215980529785,
"beta_dpo/beta_used": 0.16635611653327942,
"beta_dpo/beta_used_raw": 0.16635611653327942,
"beta_dpo/gap_mean": 17.882640838623047,
"beta_dpo/gap_std": 22.00394630432129,
"beta_dpo/loss_margin_mean": 21.96054458618164,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2751322751322751,
"grad_norm": 389.318603515625,
"learning_rate": 4.5591914535745817e-07,
"logits/chosen": -3.2827534675598145,
"logits/rejected": -3.3011820316314697,
"loss": 1.3734,
"step": 182
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4973997473716736,
"beta_dpo/beta_margin_grad_std": 0.005496290512382984,
"beta_dpo/beta_margin_mean": 0.010402663610875607,
"beta_dpo/beta_margin_std": 0.021989716216921806,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.2942630350589752,
"beta_dpo/gap_mean": 17.208568572998047,
"beta_dpo/gap_std": 22.319711685180664,
"beta_dpo/loss_margin_mean": 10.402663230895996,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2766439909297052,
"grad_norm": 1.7042104005813599,
"learning_rate": 4.551664914523433e-07,
"logits/chosen": -3.2784993648529053,
"logits/rejected": -3.2837271690368652,
"loss": 1.3758,
"step": 183
},
{
"beta_dpo/beta": 0.06759776175022125,
"beta_dpo/beta_margin_grad_mean": -0.3611212968826294,
"beta_dpo/beta_margin_grad_std": 0.2309117615222931,
"beta_dpo/beta_margin_mean": 1.376258134841919,
"beta_dpo/beta_margin_std": 2.513367176055908,
"beta_dpo/beta_used": 0.06759776175022125,
"beta_dpo/beta_used_raw": 0.04414837062358856,
"beta_dpo/gap_mean": 17.1044921875,
"beta_dpo/gap_std": 22.272825241088867,
"beta_dpo/loss_margin_mean": 19.561098098754883,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2781557067271353,
"grad_norm": 130.62432861328125,
"learning_rate": 4.544080985994258e-07,
"logits/chosen": -3.2587945461273193,
"logits/rejected": -3.2747392654418945,
"loss": 1.1321,
"step": 184
},
{
"beta_dpo/beta": 0.29575276374816895,
"beta_dpo/beta_margin_grad_mean": -0.273440957069397,
"beta_dpo/beta_margin_grad_std": 0.36424484848976135,
"beta_dpo/beta_margin_mean": 5.976340293884277,
"beta_dpo/beta_margin_std": 9.795293807983398,
"beta_dpo/beta_used": 0.29575276374816895,
"beta_dpo/beta_used_raw": 0.29575276374816895,
"beta_dpo/gap_mean": 17.594070434570312,
"beta_dpo/gap_std": 22.862598419189453,
"beta_dpo/loss_margin_mean": 19.736000061035156,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2796674225245654,
"grad_norm": 400.15191650390625,
"learning_rate": 4.5364398801258394e-07,
"logits/chosen": -3.2343192100524902,
"logits/rejected": -3.2469515800476074,
"loss": 1.1488,
"step": 185
},
{
"beta_dpo/beta": 0.13505233824253082,
"beta_dpo/beta_margin_grad_mean": -0.3404614329338074,
"beta_dpo/beta_margin_grad_std": 0.2723199725151062,
"beta_dpo/beta_margin_mean": 3.0053255558013916,
"beta_dpo/beta_margin_std": 5.504380226135254,
"beta_dpo/beta_used": 0.13505233824253082,
"beta_dpo/beta_used_raw": 0.07200624793767929,
"beta_dpo/gap_mean": 18.211688995361328,
"beta_dpo/gap_std": 23.061481475830078,
"beta_dpo/loss_margin_mean": 21.62055015563965,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2811791383219955,
"grad_norm": 185.14002990722656,
"learning_rate": 4.5287418106563354e-07,
"logits/chosen": -3.274827003479004,
"logits/rejected": -3.2913155555725098,
"loss": 0.9694,
"step": 186
},
{
"beta_dpo/beta": 0.13739535212516785,
"beta_dpo/beta_margin_grad_mean": -0.3313996195793152,
"beta_dpo/beta_margin_grad_std": 0.2579861283302307,
"beta_dpo/beta_margin_mean": 2.851552963256836,
"beta_dpo/beta_margin_std": 5.720654010772705,
"beta_dpo/beta_used": 0.13739535212516785,
"beta_dpo/beta_used_raw": 0.13739535212516785,
"beta_dpo/gap_mean": 18.755878448486328,
"beta_dpo/gap_std": 23.751571655273438,
"beta_dpo/loss_margin_mean": 20.688385009765625,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.28269085411942557,
"grad_norm": 96.4170150756836,
"learning_rate": 4.520986992917297e-07,
"logits/chosen": -3.285475730895996,
"logits/rejected": -3.286539316177368,
"loss": 0.8125,
"step": 187
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.494864821434021,
"beta_dpo/beta_margin_grad_std": 0.006589571945369244,
"beta_dpo/beta_margin_mean": 0.02054525725543499,
"beta_dpo/beta_margin_std": 0.02636602707207203,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.11430029571056366,
"beta_dpo/gap_mean": 19.172245025634766,
"beta_dpo/gap_std": 24.501110076904297,
"beta_dpo/loss_margin_mean": 20.545255661010742,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2842025699168556,
"grad_norm": 2.0644304752349854,
"learning_rate": 4.5131756438276466e-07,
"logits/chosen": -3.2795681953430176,
"logits/rejected": -3.2722291946411133,
"loss": 1.3709,
"step": 188
},
{
"beta_dpo/beta": 0.14308232069015503,
"beta_dpo/beta_margin_grad_mean": -0.25916537642478943,
"beta_dpo/beta_margin_grad_std": 0.3050540089607239,
"beta_dpo/beta_margin_mean": 2.8555290699005127,
"beta_dpo/beta_margin_std": 3.7916877269744873,
"beta_dpo/beta_used": 0.14308232069015503,
"beta_dpo/beta_used_raw": 0.14308232069015503,
"beta_dpo/gap_mean": 19.251052856445312,
"beta_dpo/gap_std": 24.474946975708008,
"beta_dpo/loss_margin_mean": 18.743436813354492,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2857142857142857,
"grad_norm": 160.66934204101562,
"learning_rate": 4.5053079818876096e-07,
"logits/chosen": -3.262721061706543,
"logits/rejected": -3.2614657878875732,
"loss": 0.6836,
"step": 189
},
{
"beta_dpo/beta": 0.03282923623919487,
"beta_dpo/beta_margin_grad_mean": -0.38839754462242126,
"beta_dpo/beta_margin_grad_std": 0.19055677950382233,
"beta_dpo/beta_margin_mean": 0.6823714971542358,
"beta_dpo/beta_margin_std": 1.2347010374069214,
"beta_dpo/beta_used": 0.03282923623919487,
"beta_dpo/beta_used_raw": 0.02927407994866371,
"beta_dpo/gap_mean": 19.367252349853516,
"beta_dpo/gap_std": 24.682527542114258,
"beta_dpo/loss_margin_mean": 21.144336700439453,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2872260015117158,
"grad_norm": 47.77622604370117,
"learning_rate": 4.4973842271726024e-07,
"logits/chosen": -3.2488441467285156,
"logits/rejected": -3.2798590660095215,
"loss": 1.0755,
"step": 190
},
{
"beta_dpo/beta": 0.11748480051755905,
"beta_dpo/beta_margin_grad_mean": -0.29674074053764343,
"beta_dpo/beta_margin_grad_std": 0.2827965021133423,
"beta_dpo/beta_margin_mean": 2.221395254135132,
"beta_dpo/beta_margin_std": 3.81189227104187,
"beta_dpo/beta_used": 0.11748480051755905,
"beta_dpo/beta_used_raw": 0.11748480051755905,
"beta_dpo/gap_mean": 19.401161193847656,
"beta_dpo/gap_std": 25.253219604492188,
"beta_dpo/loss_margin_mean": 19.080123901367188,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2887377173091459,
"grad_norm": 155.86167907714844,
"learning_rate": 4.48940460132708e-07,
"logits/chosen": -3.2814383506774902,
"logits/rejected": -3.2890639305114746,
"loss": 0.7357,
"step": 191
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4971839189529419,
"beta_dpo/beta_margin_grad_std": 0.006073605734854937,
"beta_dpo/beta_margin_mean": 0.011266072280704975,
"beta_dpo/beta_margin_std": 0.024298807606101036,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.2048242688179016,
"beta_dpo/gap_mean": 18.151763916015625,
"beta_dpo/gap_std": 25.211868286132812,
"beta_dpo/loss_margin_mean": 11.266072273254395,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.29024943310657597,
"grad_norm": 1.9646292924880981,
"learning_rate": 4.481369327558329e-07,
"logits/chosen": -3.246117115020752,
"logits/rejected": -3.2485339641571045,
"loss": 1.3733,
"step": 192
},
{
"beta_dpo/beta": 0.11741842329502106,
"beta_dpo/beta_margin_grad_mean": -0.35161691904067993,
"beta_dpo/beta_margin_grad_std": 0.277233749628067,
"beta_dpo/beta_margin_mean": 2.721064805984497,
"beta_dpo/beta_margin_std": 5.209236145019531,
"beta_dpo/beta_used": 0.11741842329502106,
"beta_dpo/beta_used_raw": 0.10825469344854355,
"beta_dpo/gap_mean": 18.415042877197266,
"beta_dpo/gap_std": 25.374204635620117,
"beta_dpo/loss_margin_mean": 21.18407440185547,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.29176114890400606,
"grad_norm": 230.92539978027344,
"learning_rate": 4.47327863063023e-07,
"logits/chosen": -3.228006362915039,
"logits/rejected": -3.219388723373413,
"loss": 1.2094,
"step": 193
},
{
"beta_dpo/beta": 0.23822082579135895,
"beta_dpo/beta_margin_grad_mean": -0.39058759808540344,
"beta_dpo/beta_margin_grad_std": 0.3258950114250183,
"beta_dpo/beta_margin_mean": 6.6397705078125,
"beta_dpo/beta_margin_std": 12.369970321655273,
"beta_dpo/beta_used": 0.23822082579135895,
"beta_dpo/beta_used_raw": 0.13888207077980042,
"beta_dpo/gap_mean": 19.15418243408203,
"beta_dpo/gap_std": 25.954063415527344,
"beta_dpo/loss_margin_mean": 21.19514274597168,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.29327286470143615,
"grad_norm": 779.953369140625,
"learning_rate": 4.4651327368569684e-07,
"logits/chosen": -3.2417874336242676,
"logits/rejected": -3.2450783252716064,
"loss": 1.9997,
"step": 194
},
{
"beta_dpo/beta": 0.2709110677242279,
"beta_dpo/beta_margin_grad_mean": -0.17781004309654236,
"beta_dpo/beta_margin_grad_std": 0.30291855335235596,
"beta_dpo/beta_margin_mean": 6.938320636749268,
"beta_dpo/beta_margin_std": 8.197758674621582,
"beta_dpo/beta_used": 0.2709110677242279,
"beta_dpo/beta_used_raw": 0.2709110677242279,
"beta_dpo/gap_mean": 19.833911895751953,
"beta_dpo/gap_std": 25.908788681030273,
"beta_dpo/loss_margin_mean": 25.43059539794922,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2947845804988662,
"grad_norm": 454.9778137207031,
"learning_rate": 4.4569318740967043e-07,
"logits/chosen": -3.2771263122558594,
"logits/rejected": -3.2649998664855957,
"loss": 1.4759,
"step": 195
},
{
"beta_dpo/beta": 0.006062302738428116,
"beta_dpo/beta_margin_grad_mean": -0.47434884309768677,
"beta_dpo/beta_margin_grad_std": 0.05719894543290138,
"beta_dpo/beta_margin_mean": 0.10431405156850815,
"beta_dpo/beta_margin_std": 0.23538783192634583,
"beta_dpo/beta_used": 0.006062302738428116,
"beta_dpo/beta_used_raw": -0.002708437852561474,
"beta_dpo/gap_mean": 20.151742935180664,
"beta_dpo/gap_std": 26.28466796875,
"beta_dpo/loss_margin_mean": 21.88735580444336,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2962962962962963,
"grad_norm": 15.817523956298828,
"learning_rate": 4.448676271745197e-07,
"logits/chosen": -3.2736239433288574,
"logits/rejected": -3.2925784587860107,
"loss": 1.2881,
"step": 196
},
{
"beta_dpo/beta": 0.29121875762939453,
"beta_dpo/beta_margin_grad_mean": -0.20116354525089264,
"beta_dpo/beta_margin_grad_std": 0.3477545380592346,
"beta_dpo/beta_margin_mean": 6.0863165855407715,
"beta_dpo/beta_margin_std": 8.08215045928955,
"beta_dpo/beta_used": 0.29121875762939453,
"beta_dpo/beta_used_raw": 0.29121875762939453,
"beta_dpo/gap_mean": 20.43471908569336,
"beta_dpo/gap_std": 26.642892837524414,
"beta_dpo/loss_margin_mean": 20.978351593017578,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.29780801209372637,
"grad_norm": 315.002685546875,
"learning_rate": 4.440366160729392e-07,
"logits/chosen": -3.2312936782836914,
"logits/rejected": -3.2578063011169434,
"loss": 0.7178,
"step": 197
},
{
"beta_dpo/beta": 0.26085951924324036,
"beta_dpo/beta_margin_grad_mean": -0.35325923562049866,
"beta_dpo/beta_margin_grad_std": 0.3079518675804138,
"beta_dpo/beta_margin_mean": 6.148621559143066,
"beta_dpo/beta_margin_std": 10.981660842895508,
"beta_dpo/beta_used": 0.26085951924324036,
"beta_dpo/beta_used_raw": 0.17097902297973633,
"beta_dpo/gap_mean": 20.784318923950195,
"beta_dpo/gap_std": 26.447837829589844,
"beta_dpo/loss_margin_mean": 22.010072708129883,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.29931972789115646,
"grad_norm": 468.2619323730469,
"learning_rate": 4.432001773500957e-07,
"logits/chosen": -3.237609386444092,
"logits/rejected": -3.2565200328826904,
"loss": 1.0384,
"step": 198
},
{
"beta_dpo/beta": 0.1917811632156372,
"beta_dpo/beta_margin_grad_mean": -0.29717642068862915,
"beta_dpo/beta_margin_grad_std": 0.27208179235458374,
"beta_dpo/beta_margin_mean": 4.629688739776611,
"beta_dpo/beta_margin_std": 8.40585708618164,
"beta_dpo/beta_used": 0.1917811632156372,
"beta_dpo/beta_used_raw": 0.1917811632156372,
"beta_dpo/gap_mean": 20.602859497070312,
"beta_dpo/gap_std": 26.541152954101562,
"beta_dpo/loss_margin_mean": 20.05705451965332,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.30083144368858655,
"grad_norm": 306.9058532714844,
"learning_rate": 4.4235833440297856e-07,
"logits/chosen": -3.2496700286865234,
"logits/rejected": -3.2558960914611816,
"loss": 1.1221,
"step": 199
},
{
"beta_dpo/beta": 0.13134683668613434,
"beta_dpo/beta_margin_grad_mean": -0.36728352308273315,
"beta_dpo/beta_margin_grad_std": 0.28023678064346313,
"beta_dpo/beta_margin_mean": 3.4423487186431885,
"beta_dpo/beta_margin_std": 6.672379493713379,
"beta_dpo/beta_used": 0.13134683668613434,
"beta_dpo/beta_used_raw": 0.1196913868188858,
"beta_dpo/gap_mean": 21.225582122802734,
"beta_dpo/gap_std": 27.08563995361328,
"beta_dpo/loss_margin_mean": 22.524049758911133,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.30234315948601664,
"grad_norm": 221.3130340576172,
"learning_rate": 4.415111107797445e-07,
"logits/chosen": -3.266768455505371,
"logits/rejected": -3.2701830863952637,
"loss": 1.0952,
"step": 200
},
{
"epoch": 0.30234315948601664,
"eval_beta_dpo/beta": 0.016000408679246902,
"eval_beta_dpo/beta_margin_grad_mean": -0.46594667434692383,
"eval_beta_dpo/beta_margin_grad_std": 0.04558912664651871,
"eval_beta_dpo/beta_margin_mean": 0.34341442584991455,
"eval_beta_dpo/beta_margin_std": 0.4801006615161896,
"eval_beta_dpo/beta_used": 0.016000408679246902,
"eval_beta_dpo/beta_used_raw": -0.2764070928096771,
"eval_beta_dpo/gap_mean": 21.098539352416992,
"eval_beta_dpo/gap_std": 27.064327239990234,
"eval_beta_dpo/loss_margin_mean": 14.82508659362793,
"eval_beta_dpo/mask_keep_frac": 1.0,
"eval_logits/chosen": -3.2892954349517822,
"eval_logits/rejected": -3.294360637664795,
"eval_loss": 0.6745692491531372,
"eval_runtime": 36.2978,
"eval_samples_per_second": 63.447,
"eval_steps_per_second": 1.984,
"step": 200
},
{
"beta_dpo/beta": 0.16286759078502655,
"beta_dpo/beta_margin_grad_mean": -0.3552103340625763,
"beta_dpo/beta_margin_grad_std": 0.28962671756744385,
"beta_dpo/beta_margin_mean": 4.282315254211426,
"beta_dpo/beta_margin_std": 8.11639404296875,
"beta_dpo/beta_used": 0.16286759078502655,
"beta_dpo/beta_used_raw": 0.07481355965137482,
"beta_dpo/gap_mean": 21.18798065185547,
"beta_dpo/gap_std": 27.247983932495117,
"beta_dpo/loss_margin_mean": 19.636842727661133,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.30385487528344673,
"grad_norm": 338.0337829589844,
"learning_rate": 4.4065853017905953e-07,
"logits/chosen": -3.2681736946105957,
"logits/rejected": -3.267396926879883,
"loss": 1.281,
"step": 201
},
{
"beta_dpo/beta": 0.041739847511053085,
"beta_dpo/beta_margin_grad_mean": -0.37753400206565857,
"beta_dpo/beta_margin_grad_std": 0.21136833727359772,
"beta_dpo/beta_margin_mean": 0.6857576966285706,
"beta_dpo/beta_margin_std": 1.3818042278289795,
"beta_dpo/beta_used": 0.041739847511053085,
"beta_dpo/beta_used_raw": 0.041739847511053085,
"beta_dpo/gap_mean": 19.68051528930664,
"beta_dpo/gap_std": 27.123966217041016,
"beta_dpo/loss_margin_mean": 14.209870338439941,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.30536659108087677,
"grad_norm": 62.69889450073242,
"learning_rate": 4.3980061644943575e-07,
"logits/chosen": -3.2481625080108643,
"logits/rejected": -3.2584476470947266,
"loss": 0.9603,
"step": 202
},
{
"beta_dpo/beta": 0.07021359354257584,
"beta_dpo/beta_margin_grad_mean": -0.3578731417655945,
"beta_dpo/beta_margin_grad_std": 0.2622828483581543,
"beta_dpo/beta_margin_mean": 1.4660753011703491,
"beta_dpo/beta_margin_std": 3.2228591442108154,
"beta_dpo/beta_used": 0.07021359354257584,
"beta_dpo/beta_used_raw": -0.050061143934726715,
"beta_dpo/gap_mean": 19.621105194091797,
"beta_dpo/gap_std": 27.237117767333984,
"beta_dpo/loss_margin_mean": 19.756534576416016,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.30687830687830686,
"grad_norm": 130.79299926757812,
"learning_rate": 4.3893739358856455e-07,
"logits/chosen": -3.278709650039673,
"logits/rejected": -3.3059582710266113,
"loss": 1.0217,
"step": 203
},
{
"beta_dpo/beta": 0.29017174243927,
"beta_dpo/beta_margin_grad_mean": -0.3226993680000305,
"beta_dpo/beta_margin_grad_std": 0.29813989996910095,
"beta_dpo/beta_margin_mean": 7.778024196624756,
"beta_dpo/beta_margin_std": 12.823948860168457,
"beta_dpo/beta_used": 0.29017174243927,
"beta_dpo/beta_used_raw": 0.13998277485370636,
"beta_dpo/gap_mean": 19.98705291748047,
"beta_dpo/gap_std": 26.805776596069336,
"beta_dpo/loss_margin_mean": 20.49005699157715,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.30839002267573695,
"grad_norm": 338.6097412109375,
"learning_rate": 4.380688857426449e-07,
"logits/chosen": -3.238096237182617,
"logits/rejected": -3.2416350841522217,
"loss": 1.2486,
"step": 204
},
{
"beta_dpo/beta": 0.2872874438762665,
"beta_dpo/beta_margin_grad_mean": -0.19735579192638397,
"beta_dpo/beta_margin_grad_std": 0.34152576327323914,
"beta_dpo/beta_margin_mean": 6.9864420890808105,
"beta_dpo/beta_margin_std": 8.581732749938965,
"beta_dpo/beta_used": 0.2872874438762665,
"beta_dpo/beta_used_raw": 0.2872874438762665,
"beta_dpo/gap_mean": 20.530967712402344,
"beta_dpo/gap_std": 26.722898483276367,
"beta_dpo/loss_margin_mean": 24.720294952392578,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.30990173847316704,
"grad_norm": 366.4891357421875,
"learning_rate": 4.3719511720570814e-07,
"logits/chosen": -3.26456880569458,
"logits/rejected": -3.279543876647949,
"loss": 0.9758,
"step": 205
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49615031480789185,
"beta_dpo/beta_margin_grad_std": 0.0071373567916452885,
"beta_dpo/beta_margin_mean": 0.015402463264763355,
"beta_dpo/beta_margin_std": 0.02855776436626911,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.22389058768749237,
"beta_dpo/gap_mean": 19.731658935546875,
"beta_dpo/gap_std": 27.339256286621094,
"beta_dpo/loss_margin_mean": 15.402462005615234,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.31141345427059713,
"grad_norm": 1.963984489440918,
"learning_rate": 4.363161124189387e-07,
"logits/chosen": -3.260132312774658,
"logits/rejected": -3.271867275238037,
"loss": 1.3721,
"step": 206
},
{
"beta_dpo/beta": 0.22665925323963165,
"beta_dpo/beta_margin_grad_mean": -0.2844540476799011,
"beta_dpo/beta_margin_grad_std": 0.2940859794616699,
"beta_dpo/beta_margin_mean": 4.840798854827881,
"beta_dpo/beta_margin_std": 8.58215618133545,
"beta_dpo/beta_used": 0.22665925323963165,
"beta_dpo/beta_used_raw": 0.22665925323963165,
"beta_dpo/gap_mean": 19.807737350463867,
"beta_dpo/gap_std": 27.32258415222168,
"beta_dpo/loss_margin_mean": 20.418903350830078,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3129251700680272,
"grad_norm": 443.6466064453125,
"learning_rate": 4.3543189596998986e-07,
"logits/chosen": -3.270188808441162,
"logits/rejected": -3.292062759399414,
"loss": 1.324,
"step": 207
},
{
"beta_dpo/beta": 0.19777607917785645,
"beta_dpo/beta_margin_grad_mean": -0.3751469850540161,
"beta_dpo/beta_margin_grad_std": 0.304066926240921,
"beta_dpo/beta_margin_mean": 4.046874046325684,
"beta_dpo/beta_margin_std": 8.362845420837402,
"beta_dpo/beta_used": 0.19777607917785645,
"beta_dpo/beta_used_raw": 0.18612337112426758,
"beta_dpo/gap_mean": 19.386688232421875,
"beta_dpo/gap_std": 27.094337463378906,
"beta_dpo/loss_margin_mean": 17.85321807861328,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3144368858654573,
"grad_norm": 328.3226623535156,
"learning_rate": 4.3454249259229664e-07,
"logits/chosen": -3.2676501274108887,
"logits/rejected": -3.263576030731201,
"loss": 1.2187,
"step": 208
},
{
"beta_dpo/beta": 0.25237298011779785,
"beta_dpo/beta_margin_grad_mean": -0.22381648421287537,
"beta_dpo/beta_margin_grad_std": 0.31607723236083984,
"beta_dpo/beta_margin_mean": 5.337110996246338,
"beta_dpo/beta_margin_std": 8.909046173095703,
"beta_dpo/beta_used": 0.25237298011779785,
"beta_dpo/beta_used_raw": 0.25237298011779785,
"beta_dpo/gap_mean": 19.410289764404297,
"beta_dpo/gap_std": 27.153701782226562,
"beta_dpo/loss_margin_mean": 19.8518009185791,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.31594860166288735,
"grad_norm": 313.1184387207031,
"learning_rate": 4.336479271643833e-07,
"logits/chosen": -3.266294002532959,
"logits/rejected": -3.2863268852233887,
"loss": 0.6238,
"step": 209
},
{
"beta_dpo/beta": 0.17239555716514587,
"beta_dpo/beta_margin_grad_mean": -0.3229129910469055,
"beta_dpo/beta_margin_grad_std": 0.2825334370136261,
"beta_dpo/beta_margin_mean": 4.328238010406494,
"beta_dpo/beta_margin_std": 7.7128071784973145,
"beta_dpo/beta_used": 0.17239555716514587,
"beta_dpo/beta_used_raw": 0.17239555716514587,
"beta_dpo/gap_mean": 19.874401092529297,
"beta_dpo/gap_std": 27.465452194213867,
"beta_dpo/loss_margin_mean": 22.63663673400879,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.31746031746031744,
"grad_norm": 297.1786804199219,
"learning_rate": 4.327482247091679e-07,
"logits/chosen": -3.2740461826324463,
"logits/rejected": -3.299710512161255,
"loss": 1.2982,
"step": 210
},
{
"beta_dpo/beta": 0.0435391403734684,
"beta_dpo/beta_margin_grad_mean": -0.338135302066803,
"beta_dpo/beta_margin_grad_std": 0.18870781362056732,
"beta_dpo/beta_margin_mean": 0.8430750370025635,
"beta_dpo/beta_margin_std": 1.062753677368164,
"beta_dpo/beta_used": 0.0435391403734684,
"beta_dpo/beta_used_raw": 0.0435391403734684,
"beta_dpo/gap_mean": 19.97136878967285,
"beta_dpo/gap_std": 27.011962890625,
"beta_dpo/loss_margin_mean": 19.604583740234375,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.31897203325774753,
"grad_norm": 67.0111312866211,
"learning_rate": 4.3184341039326217e-07,
"logits/chosen": -3.2469844818115234,
"logits/rejected": -3.279109001159668,
"loss": 0.8721,
"step": 211
},
{
"beta_dpo/beta": 0.20144376158714294,
"beta_dpo/beta_margin_grad_mean": -0.23173686861991882,
"beta_dpo/beta_margin_grad_std": 0.32392561435699463,
"beta_dpo/beta_margin_mean": 4.304144382476807,
"beta_dpo/beta_margin_std": 5.742385387420654,
"beta_dpo/beta_used": 0.20144376158714294,
"beta_dpo/beta_used_raw": 0.20144376158714294,
"beta_dpo/gap_mean": 20.256540298461914,
"beta_dpo/gap_std": 27.046674728393555,
"beta_dpo/loss_margin_mean": 20.608171463012695,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3204837490551776,
"grad_norm": 216.76519775390625,
"learning_rate": 4.309335095262675e-07,
"logits/chosen": -3.25557804107666,
"logits/rejected": -3.262904644012451,
"loss": 0.7518,
"step": 212
},
{
"beta_dpo/beta": 0.04288367182016373,
"beta_dpo/beta_margin_grad_mean": -0.39783409237861633,
"beta_dpo/beta_margin_grad_std": 0.21729709208011627,
"beta_dpo/beta_margin_mean": 0.784849226474762,
"beta_dpo/beta_margin_std": 1.8496593236923218,
"beta_dpo/beta_used": 0.04288367182016373,
"beta_dpo/beta_used_raw": -0.05440632253885269,
"beta_dpo/gap_mean": 19.813491821289062,
"beta_dpo/gap_std": 27.621681213378906,
"beta_dpo/loss_margin_mean": 17.58714485168457,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3219954648526077,
"grad_norm": 79.05302429199219,
"learning_rate": 4.3001854756006724e-07,
"logits/chosen": -3.295498847961426,
"logits/rejected": -3.2903060913085938,
"loss": 1.025,
"step": 213
},
{
"beta_dpo/beta": 0.14685657620429993,
"beta_dpo/beta_margin_grad_mean": -0.2411939799785614,
"beta_dpo/beta_margin_grad_std": 0.33106184005737305,
"beta_dpo/beta_margin_mean": 3.089200973510742,
"beta_dpo/beta_margin_std": 4.400245189666748,
"beta_dpo/beta_used": 0.14685657620429993,
"beta_dpo/beta_used_raw": 0.14685657620429993,
"beta_dpo/gap_mean": 19.749910354614258,
"beta_dpo/gap_std": 28.07058334350586,
"beta_dpo/loss_margin_mean": 20.78150177001953,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3235071806500378,
"grad_norm": 147.87237548828125,
"learning_rate": 4.290985500881143e-07,
"logits/chosen": -3.282254457473755,
"logits/rejected": -3.2713708877563477,
"loss": 0.7766,
"step": 214
},
{
"beta_dpo/beta": 0.3372025489807129,
"beta_dpo/beta_margin_grad_mean": -0.20430360734462738,
"beta_dpo/beta_margin_grad_std": 0.3473961651325226,
"beta_dpo/beta_margin_mean": 8.567434310913086,
"beta_dpo/beta_margin_std": 10.161273956298828,
"beta_dpo/beta_used": 0.3372025489807129,
"beta_dpo/beta_used_raw": 0.3372025489807129,
"beta_dpo/gap_mean": 20.45541763305664,
"beta_dpo/gap_std": 28.54821014404297,
"beta_dpo/loss_margin_mean": 24.713115692138672,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3250188964474679,
"grad_norm": 478.9543151855469,
"learning_rate": 4.281735428447157e-07,
"logits/chosen": -3.2881035804748535,
"logits/rejected": -3.295775890350342,
"loss": 0.9709,
"step": 215
},
{
"beta_dpo/beta": 0.029055660590529442,
"beta_dpo/beta_margin_grad_mean": -0.40032076835632324,
"beta_dpo/beta_margin_grad_std": 0.19772301614284515,
"beta_dpo/beta_margin_mean": 0.6975875496864319,
"beta_dpo/beta_margin_std": 1.4651895761489868,
"beta_dpo/beta_used": 0.029055660590529442,
"beta_dpo/beta_used_raw": 0.009354954585433006,
"beta_dpo/gap_mean": 20.790666580200195,
"beta_dpo/gap_std": 28.88433265686035,
"beta_dpo/loss_margin_mean": 18.89692497253418,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.32653061224489793,
"grad_norm": 56.74131393432617,
"learning_rate": 4.2724355170431247e-07,
"logits/chosen": -3.3080687522888184,
"logits/rejected": -3.325687885284424,
"loss": 1.0933,
"step": 216
},
{
"beta_dpo/beta": 0.11183890700340271,
"beta_dpo/beta_margin_grad_mean": -0.3460896909236908,
"beta_dpo/beta_margin_grad_std": 0.2733393907546997,
"beta_dpo/beta_margin_mean": 2.4672842025756836,
"beta_dpo/beta_margin_std": 5.133706092834473,
"beta_dpo/beta_used": 0.11183890700340271,
"beta_dpo/beta_used_raw": 0.11183890700340271,
"beta_dpo/gap_mean": 20.666297912597656,
"beta_dpo/gap_std": 28.4071044921875,
"beta_dpo/loss_margin_mean": 22.13884162902832,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.328042328042328,
"grad_norm": 205.36830139160156,
"learning_rate": 4.26308602680756e-07,
"logits/chosen": -3.277308940887451,
"logits/rejected": -3.2828803062438965,
"loss": 1.0462,
"step": 217
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49692150950431824,
"beta_dpo/beta_margin_grad_std": 0.007039310876280069,
"beta_dpo/beta_margin_mean": 0.012317297048866749,
"beta_dpo/beta_margin_std": 0.028165044263005257,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.15160530805587769,
"beta_dpo/gap_mean": 19.414508819580078,
"beta_dpo/gap_std": 28.22842025756836,
"beta_dpo/loss_margin_mean": 12.317296981811523,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3295540438397581,
"grad_norm": 2.0457305908203125,
"learning_rate": 4.253687219265803e-07,
"logits/chosen": -3.293532133102417,
"logits/rejected": -3.290022611618042,
"loss": 1.3712,
"step": 218
},
{
"beta_dpo/beta": 0.011915156617760658,
"beta_dpo/beta_margin_grad_mean": -0.45243147015571594,
"beta_dpo/beta_margin_grad_std": 0.0773100033402443,
"beta_dpo/beta_margin_mean": 0.19927677512168884,
"beta_dpo/beta_margin_std": 0.328296422958374,
"beta_dpo/beta_used": 0.011915156617760658,
"beta_dpo/beta_used_raw": -0.004701080732047558,
"beta_dpo/gap_mean": 18.749467849731445,
"beta_dpo/gap_std": 27.01136016845703,
"beta_dpo/loss_margin_mean": 16.104183197021484,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3310657596371882,
"grad_norm": 25.289398193359375,
"learning_rate": 4.2442393573227043e-07,
"logits/chosen": -3.290285110473633,
"logits/rejected": -3.2973287105560303,
"loss": 1.211,
"step": 219
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49513691663742065,
"beta_dpo/beta_margin_grad_std": 0.0066979266703128815,
"beta_dpo/beta_margin_mean": 0.019456947222352028,
"beta_dpo/beta_margin_std": 0.026801228523254395,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.16698744893074036,
"beta_dpo/gap_mean": 18.785911560058594,
"beta_dpo/gap_std": 26.530315399169922,
"beta_dpo/loss_margin_mean": 19.456945419311523,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3325774754346183,
"grad_norm": 1.7459897994995117,
"learning_rate": 4.234742705255272e-07,
"logits/chosen": -3.255657196044922,
"logits/rejected": -3.2655575275421143,
"loss": 1.3721,
"step": 220
},
{
"beta_dpo/beta": 0.17717352509498596,
"beta_dpo/beta_margin_grad_mean": -0.28028541803359985,
"beta_dpo/beta_margin_grad_std": 0.3005788326263428,
"beta_dpo/beta_margin_mean": 3.4370474815368652,
"beta_dpo/beta_margin_std": 6.133222579956055,
"beta_dpo/beta_used": 0.17717352509498596,
"beta_dpo/beta_used_raw": 0.17717352509498596,
"beta_dpo/gap_mean": 18.612773895263672,
"beta_dpo/gap_std": 26.989688873291016,
"beta_dpo/loss_margin_mean": 18.417015075683594,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3340891912320484,
"grad_norm": 160.68197631835938,
"learning_rate": 4.22519752870528e-07,
"logits/chosen": -3.317051410675049,
"logits/rejected": -3.3476691246032715,
"loss": 0.6941,
"step": 221
},
{
"beta_dpo/beta": 0.057558316737413406,
"beta_dpo/beta_margin_grad_mean": -0.34050142765045166,
"beta_dpo/beta_margin_grad_std": 0.21491679549217224,
"beta_dpo/beta_margin_mean": 1.4228301048278809,
"beta_dpo/beta_margin_std": 2.227675676345825,
"beta_dpo/beta_used": 0.057558316737413406,
"beta_dpo/beta_used_raw": -0.017362136393785477,
"beta_dpo/gap_mean": 19.714611053466797,
"beta_dpo/gap_std": 27.128393173217773,
"beta_dpo/loss_margin_mean": 26.319135665893555,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3356009070294785,
"grad_norm": 81.41806030273438,
"learning_rate": 4.2156040946718343e-07,
"logits/chosen": -3.302428722381592,
"logits/rejected": -3.330812454223633,
"loss": 0.9621,
"step": 222
},
{
"beta_dpo/beta": 0.11043448746204376,
"beta_dpo/beta_margin_grad_mean": -0.3506692051887512,
"beta_dpo/beta_margin_grad_std": 0.27599218487739563,
"beta_dpo/beta_margin_mean": 2.4996182918548584,
"beta_dpo/beta_margin_std": 4.717770099639893,
"beta_dpo/beta_used": 0.11043448746204376,
"beta_dpo/beta_used_raw": 0.0481397770345211,
"beta_dpo/gap_mean": 20.32034683227539,
"beta_dpo/gap_std": 27.67890167236328,
"beta_dpo/loss_margin_mean": 20.74583625793457,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3371126228269085,
"grad_norm": 229.6683349609375,
"learning_rate": 4.2059626715039065e-07,
"logits/chosen": -3.2972192764282227,
"logits/rejected": -3.313932418823242,
"loss": 1.323,
"step": 223
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4962630271911621,
"beta_dpo/beta_margin_grad_std": 0.006226621102541685,
"beta_dpo/beta_margin_mean": 0.014951368793845177,
"beta_dpo/beta_margin_std": 0.02491535060107708,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.22395360469818115,
"beta_dpo/gap_mean": 19.58163070678711,
"beta_dpo/gap_std": 27.313385009765625,
"beta_dpo/loss_margin_mean": 14.95136833190918,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3386243386243386,
"grad_norm": 1.8963252305984497,
"learning_rate": 4.1962735288928304e-07,
"logits/chosen": -3.3100385665893555,
"logits/rejected": -3.324625253677368,
"loss": 1.3722,
"step": 224
},
{
"beta_dpo/beta": 0.11167913675308228,
"beta_dpo/beta_margin_grad_mean": -0.3448824882507324,
"beta_dpo/beta_margin_grad_std": 0.28536394238471985,
"beta_dpo/beta_margin_mean": 2.633960247039795,
"beta_dpo/beta_margin_std": 4.381831169128418,
"beta_dpo/beta_used": 0.11167913675308228,
"beta_dpo/beta_used_raw": -0.005499660968780518,
"beta_dpo/gap_mean": 19.675987243652344,
"beta_dpo/gap_std": 26.83489227294922,
"beta_dpo/loss_margin_mean": 21.864295959472656,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3401360544217687,
"grad_norm": 215.1800079345703,
"learning_rate": 4.186536937864752e-07,
"logits/chosen": -3.2780818939208984,
"logits/rejected": -3.2924938201904297,
"loss": 1.1357,
"step": 225
},
{
"beta_dpo/beta": 0.21144188940525055,
"beta_dpo/beta_margin_grad_mean": -0.2692214846611023,
"beta_dpo/beta_margin_grad_std": 0.3168647885322571,
"beta_dpo/beta_margin_mean": 5.076745986938477,
"beta_dpo/beta_margin_std": 9.32989501953125,
"beta_dpo/beta_used": 0.21144188940525055,
"beta_dpo/beta_used_raw": 0.21144188940525055,
"beta_dpo/gap_mean": 20.018922805786133,
"beta_dpo/gap_std": 26.834392547607422,
"beta_dpo/loss_margin_mean": 22.54623031616211,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3416477702191988,
"grad_norm": 314.6019287109375,
"learning_rate": 4.176753170773052e-07,
"logits/chosen": -3.272228240966797,
"logits/rejected": -3.277193069458008,
"loss": 1.3053,
"step": 226
},
{
"beta_dpo/beta": 0.2749040126800537,
"beta_dpo/beta_margin_grad_mean": -0.23924539983272552,
"beta_dpo/beta_margin_grad_std": 0.35665369033813477,
"beta_dpo/beta_margin_mean": 5.016239166259766,
"beta_dpo/beta_margin_std": 7.605776309967041,
"beta_dpo/beta_used": 0.2749040126800537,
"beta_dpo/beta_used_raw": 0.2749040126800537,
"beta_dpo/gap_mean": 20.03274154663086,
"beta_dpo/gap_std": 27.213176727294922,
"beta_dpo/loss_margin_mean": 17.853012084960938,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3431594860166289,
"grad_norm": 239.6156768798828,
"learning_rate": 4.166922501290729e-07,
"logits/chosen": -3.2811617851257324,
"logits/rejected": -3.2786879539489746,
"loss": 0.5139,
"step": 227
},
{
"beta_dpo/beta": 0.09486433863639832,
"beta_dpo/beta_margin_grad_mean": -0.35757672786712646,
"beta_dpo/beta_margin_grad_std": 0.27951931953430176,
"beta_dpo/beta_margin_mean": 2.100579261779785,
"beta_dpo/beta_margin_std": 4.581786632537842,
"beta_dpo/beta_used": 0.09486433863639832,
"beta_dpo/beta_used_raw": 0.039446499198675156,
"beta_dpo/gap_mean": 20.004276275634766,
"beta_dpo/gap_std": 27.34688949584961,
"beta_dpo/loss_margin_mean": 21.535043716430664,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.34467120181405897,
"grad_norm": 125.40672302246094,
"learning_rate": 4.1570452044027405e-07,
"logits/chosen": -3.26662015914917,
"logits/rejected": -3.273747205734253,
"loss": 0.9515,
"step": 228
},
{
"beta_dpo/beta": 0.15300440788269043,
"beta_dpo/beta_margin_grad_mean": -0.29611581563949585,
"beta_dpo/beta_margin_grad_std": 0.3123247027397156,
"beta_dpo/beta_margin_mean": 3.213453531265259,
"beta_dpo/beta_margin_std": 5.308069229125977,
"beta_dpo/beta_used": 0.15300440788269043,
"beta_dpo/beta_used_raw": 0.15300440788269043,
"beta_dpo/gap_mean": 19.881502151489258,
"beta_dpo/gap_std": 27.33250617980957,
"beta_dpo/loss_margin_mean": 19.248979568481445,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.34618291761148906,
"grad_norm": 204.28244018554688,
"learning_rate": 4.147121556398312e-07,
"logits/chosen": -3.281358242034912,
"logits/rejected": -3.281156301498413,
"loss": 0.8386,
"step": 229
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4952212870121002,
"beta_dpo/beta_margin_grad_std": 0.007806302979588509,
"beta_dpo/beta_margin_mean": 0.019118983298540115,
"beta_dpo/beta_margin_std": 0.03124173916876316,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.08556269854307175,
"beta_dpo/gap_mean": 19.63062286376953,
"beta_dpo/gap_std": 27.971975326538086,
"beta_dpo/loss_margin_mean": 19.118982315063477,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3476946334089191,
"grad_norm": 1.7689595222473145,
"learning_rate": 4.137151834863213e-07,
"logits/chosen": -3.2919821739196777,
"logits/rejected": -3.292602062225342,
"loss": 1.3699,
"step": 230
},
{
"beta_dpo/beta": 0.48542118072509766,
"beta_dpo/beta_margin_grad_mean": -0.16059984266757965,
"beta_dpo/beta_margin_grad_std": 0.3215428292751312,
"beta_dpo/beta_margin_mean": 13.145872116088867,
"beta_dpo/beta_margin_std": 14.5763521194458,
"beta_dpo/beta_used": 0.48542118072509766,
"beta_dpo/beta_used_raw": 0.48542118072509766,
"beta_dpo/gap_mean": 20.86912727355957,
"beta_dpo/gap_std": 27.646385192871094,
"beta_dpo/loss_margin_mean": 25.624082565307617,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3492063492063492,
"grad_norm": 487.8958435058594,
"learning_rate": 4.1271363186719835e-07,
"logits/chosen": -3.275543689727783,
"logits/rejected": -3.2655463218688965,
"loss": 1.0957,
"step": 231
},
{
"beta_dpo/beta": 0.11720205843448639,
"beta_dpo/beta_margin_grad_mean": -0.3646947145462036,
"beta_dpo/beta_margin_grad_std": 0.286128968000412,
"beta_dpo/beta_margin_mean": 2.2955892086029053,
"beta_dpo/beta_margin_std": 4.999972343444824,
"beta_dpo/beta_used": 0.11720205843448639,
"beta_dpo/beta_used_raw": 0.02803657203912735,
"beta_dpo/gap_mean": 20.76835823059082,
"beta_dpo/gap_std": 27.87753677368164,
"beta_dpo/loss_margin_mean": 20.00889015197754,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3507180650037793,
"grad_norm": 175.38694763183594,
"learning_rate": 4.1170752879801436e-07,
"logits/chosen": -3.2884902954101562,
"logits/rejected": -3.3059821128845215,
"loss": 1.1498,
"step": 232
},
{
"beta_dpo/beta": 0.03464564308524132,
"beta_dpo/beta_margin_grad_mean": -0.39322006702423096,
"beta_dpo/beta_margin_grad_std": 0.20333139598369598,
"beta_dpo/beta_margin_mean": 0.709579586982727,
"beta_dpo/beta_margin_std": 1.3930869102478027,
"beta_dpo/beta_used": 0.03464564308524132,
"beta_dpo/beta_used_raw": -0.08792220056056976,
"beta_dpo/gap_mean": 20.235342025756836,
"beta_dpo/gap_std": 27.681325912475586,
"beta_dpo/loss_margin_mean": 15.653010368347168,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.35222978080120937,
"grad_norm": 55.460296630859375,
"learning_rate": 4.106969024216348e-07,
"logits/chosen": -3.2439560890197754,
"logits/rejected": -3.2535340785980225,
"loss": 1.0405,
"step": 233
},
{
"beta_dpo/beta": 0.1527194380760193,
"beta_dpo/beta_margin_grad_mean": -0.3214091956615448,
"beta_dpo/beta_margin_grad_std": 0.26886996626853943,
"beta_dpo/beta_margin_mean": 3.466355085372925,
"beta_dpo/beta_margin_std": 6.77988338470459,
"beta_dpo/beta_used": 0.1527194380760193,
"beta_dpo/beta_used_raw": -0.0909288078546524,
"beta_dpo/gap_mean": 18.938209533691406,
"beta_dpo/gap_std": 27.582704544067383,
"beta_dpo/loss_margin_mean": 16.264286041259766,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.35374149659863946,
"grad_norm": 266.7095947265625,
"learning_rate": 4.09681781007452e-07,
"logits/chosen": -3.269228935241699,
"logits/rejected": -3.269357442855835,
"loss": 0.9565,
"step": 234
},
{
"beta_dpo/beta": 0.30560052394866943,
"beta_dpo/beta_margin_grad_mean": -0.14003857970237732,
"beta_dpo/beta_margin_grad_std": 0.27959078550338745,
"beta_dpo/beta_margin_mean": 7.613363742828369,
"beta_dpo/beta_margin_std": 7.636228084564209,
"beta_dpo/beta_used": 0.30560052394866943,
"beta_dpo/beta_used_raw": 0.30560052394866943,
"beta_dpo/gap_mean": 19.932910919189453,
"beta_dpo/gap_std": 27.26492691040039,
"beta_dpo/loss_margin_mean": 25.049949645996094,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.35525321239606955,
"grad_norm": 323.9731140136719,
"learning_rate": 4.08662192950594e-07,
"logits/chosen": -3.3123879432678223,
"logits/rejected": -3.316612720489502,
"loss": 0.4978,
"step": 235
},
{
"beta_dpo/beta": 0.37364262342453003,
"beta_dpo/beta_margin_grad_mean": -0.20121777057647705,
"beta_dpo/beta_margin_grad_std": 0.36329779028892517,
"beta_dpo/beta_margin_mean": 8.671346664428711,
"beta_dpo/beta_margin_std": 10.987950325012207,
"beta_dpo/beta_used": 0.37364262342453003,
"beta_dpo/beta_used_raw": 0.37364262342453003,
"beta_dpo/gap_mean": 20.939083099365234,
"beta_dpo/gap_std": 27.456195831298828,
"beta_dpo/loss_margin_mean": 23.402854919433594,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.35676492819349964,
"grad_norm": 661.39013671875,
"learning_rate": 4.076381667711306e-07,
"logits/chosen": -3.2854065895080566,
"logits/rejected": -3.274115800857544,
"loss": 0.9594,
"step": 236
},
{
"beta_dpo/beta": 0.09945555031299591,
"beta_dpo/beta_margin_grad_mean": -0.27587732672691345,
"beta_dpo/beta_margin_grad_std": 0.28971001505851746,
"beta_dpo/beta_margin_mean": 2.4051265716552734,
"beta_dpo/beta_margin_std": 3.4533803462982178,
"beta_dpo/beta_used": 0.09945555031299591,
"beta_dpo/beta_used_raw": 0.09945555031299591,
"beta_dpo/gap_mean": 21.28778076171875,
"beta_dpo/gap_std": 27.852872848510742,
"beta_dpo/loss_margin_mean": 23.50334358215332,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.35827664399092973,
"grad_norm": 211.1277313232422,
"learning_rate": 4.066097311132753e-07,
"logits/chosen": -3.2915210723876953,
"logits/rejected": -3.293576955795288,
"loss": 0.856,
"step": 237
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4955797791481018,
"beta_dpo/beta_margin_grad_std": 0.006702260579913855,
"beta_dpo/beta_margin_mean": 0.017684506252408028,
"beta_dpo/beta_margin_std": 0.026815088465809822,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.2128760814666748,
"beta_dpo/gap_mean": 20.925275802612305,
"beta_dpo/gap_std": 27.556137084960938,
"beta_dpo/loss_margin_mean": 17.684505462646484,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.35978835978835977,
"grad_norm": 7.044036865234375,
"learning_rate": 4.0557691474458414e-07,
"logits/chosen": -3.2529449462890625,
"logits/rejected": -3.262035608291626,
"loss": 1.3708,
"step": 238
},
{
"beta_dpo/beta": 0.05461409315466881,
"beta_dpo/beta_margin_grad_mean": -0.38040855526924133,
"beta_dpo/beta_margin_grad_std": 0.2407120168209076,
"beta_dpo/beta_margin_mean": 1.208554983139038,
"beta_dpo/beta_margin_std": 2.6001651287078857,
"beta_dpo/beta_used": 0.05461409315466881,
"beta_dpo/beta_used_raw": 0.03262433409690857,
"beta_dpo/gap_mean": 20.93581199645996,
"beta_dpo/gap_std": 27.82280731201172,
"beta_dpo/loss_margin_mean": 23.322006225585938,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.36130007558578986,
"grad_norm": 84.934326171875,
"learning_rate": 4.045397465551513e-07,
"logits/chosen": -3.267176628112793,
"logits/rejected": -3.295515537261963,
"loss": 1.0566,
"step": 239
},
{
"beta_dpo/beta": 0.4454389214515686,
"beta_dpo/beta_margin_grad_mean": -0.1757676601409912,
"beta_dpo/beta_margin_grad_std": 0.33982476592063904,
"beta_dpo/beta_margin_mean": 10.719533920288086,
"beta_dpo/beta_margin_std": 10.665504455566406,
"beta_dpo/beta_used": 0.4454389214515686,
"beta_dpo/beta_used_raw": 0.4454389214515686,
"beta_dpo/gap_mean": 21.506826400756836,
"beta_dpo/gap_std": 27.147459030151367,
"beta_dpo/loss_margin_mean": 24.314071655273438,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.36281179138321995,
"grad_norm": 513.2327880859375,
"learning_rate": 4.0349825555680045e-07,
"logits/chosen": -3.2560102939605713,
"logits/rejected": -3.2747344970703125,
"loss": 0.5211,
"step": 240
},
{
"beta_dpo/beta": 0.06813672184944153,
"beta_dpo/beta_margin_grad_mean": -0.363924503326416,
"beta_dpo/beta_margin_grad_std": 0.23285789787769318,
"beta_dpo/beta_margin_mean": 1.3553699254989624,
"beta_dpo/beta_margin_std": 2.7104809284210205,
"beta_dpo/beta_used": 0.06813672184944153,
"beta_dpo/beta_used_raw": -0.07167855650186539,
"beta_dpo/gap_mean": 21.41180419921875,
"beta_dpo/gap_std": 26.821788787841797,
"beta_dpo/loss_margin_mean": 19.534835815429688,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.36432350718065004,
"grad_norm": 66.69193267822266,
"learning_rate": 4.0245247088227377e-07,
"logits/chosen": -3.25724720954895,
"logits/rejected": -3.2746810913085938,
"loss": 0.9479,
"step": 241
},
{
"beta_dpo/beta": 0.05317524075508118,
"beta_dpo/beta_margin_grad_mean": -0.29675236344337463,
"beta_dpo/beta_margin_grad_std": 0.21193645894527435,
"beta_dpo/beta_margin_mean": 1.4541407823562622,
"beta_dpo/beta_margin_std": 1.9665377140045166,
"beta_dpo/beta_used": 0.05317524075508118,
"beta_dpo/beta_used_raw": 0.05317524075508118,
"beta_dpo/gap_mean": 22.155099868774414,
"beta_dpo/gap_std": 27.43655776977539,
"beta_dpo/loss_margin_mean": 25.84035873413086,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.36583522297808013,
"grad_norm": 67.37097930908203,
"learning_rate": 4.0140242178441665e-07,
"logits/chosen": -3.2400145530700684,
"logits/rejected": -3.2478156089782715,
"loss": 0.8496,
"step": 242
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4958913326263428,
"beta_dpo/beta_margin_grad_std": 0.006393721327185631,
"beta_dpo/beta_margin_mean": 0.016438335180282593,
"beta_dpo/beta_margin_std": 0.02558121271431446,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.09002360701560974,
"beta_dpo/gap_mean": 21.35116958618164,
"beta_dpo/gap_std": 27.41248321533203,
"beta_dpo/loss_margin_mean": 16.43833351135254,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3673469387755102,
"grad_norm": 1.8394923210144043,
"learning_rate": 4.003481376353596e-07,
"logits/chosen": -3.2727673053741455,
"logits/rejected": -3.2656126022338867,
"loss": 1.3683,
"step": 243
},
{
"beta_dpo/beta": 0.32955145835876465,
"beta_dpo/beta_margin_grad_mean": -0.1536446064710617,
"beta_dpo/beta_margin_grad_std": 0.2893112897872925,
"beta_dpo/beta_margin_mean": 8.640202522277832,
"beta_dpo/beta_margin_std": 7.9720234870910645,
"beta_dpo/beta_used": 0.32955145835876465,
"beta_dpo/beta_used_raw": 0.32955145835876465,
"beta_dpo/gap_mean": 21.91408348083496,
"beta_dpo/gap_std": 26.89801025390625,
"beta_dpo/loss_margin_mean": 26.282230377197266,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3688586545729403,
"grad_norm": 358.5315856933594,
"learning_rate": 3.9928964792569654e-07,
"logits/chosen": -3.2303824424743652,
"logits/rejected": -3.2301125526428223,
"loss": 0.642,
"step": 244
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4936436414718628,
"beta_dpo/beta_margin_grad_std": 0.006070741917937994,
"beta_dpo/beta_margin_mean": 0.025431012734770775,
"beta_dpo/beta_margin_std": 0.024291541427373886,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.06644029915332794,
"beta_dpo/gap_mean": 22.572145462036133,
"beta_dpo/gap_std": 26.450565338134766,
"beta_dpo/loss_margin_mean": 25.431011199951172,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.37037037037037035,
"grad_norm": 2.086357831954956,
"learning_rate": 3.982269822636601e-07,
"logits/chosen": -3.228754997253418,
"logits/rejected": -3.2338013648986816,
"loss": 1.3667,
"step": 245
},
{
"beta_dpo/beta": 0.04253571480512619,
"beta_dpo/beta_margin_grad_mean": -0.37704116106033325,
"beta_dpo/beta_margin_grad_std": 0.24304994940757751,
"beta_dpo/beta_margin_mean": 0.9785481691360474,
"beta_dpo/beta_margin_std": 2.127553701400757,
"beta_dpo/beta_used": 0.04253571480512619,
"beta_dpo/beta_used_raw": -0.015344377607107162,
"beta_dpo/gap_mean": 22.191429138183594,
"beta_dpo/gap_std": 27.405582427978516,
"beta_dpo/loss_margin_mean": 20.26742935180664,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.37188208616780044,
"grad_norm": 85.47932434082031,
"learning_rate": 3.971601703742932e-07,
"logits/chosen": -3.2285194396972656,
"logits/rejected": -3.2426650524139404,
"loss": 1.072,
"step": 246
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4956916272640228,
"beta_dpo/beta_margin_grad_std": 0.007161261048167944,
"beta_dpo/beta_margin_mean": 0.017237938940525055,
"beta_dpo/beta_margin_std": 0.028652969747781754,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.20848971605300903,
"beta_dpo/gap_mean": 21.35602569580078,
"beta_dpo/gap_std": 28.036100387573242,
"beta_dpo/loss_margin_mean": 17.237937927246094,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.37339380196523053,
"grad_norm": 2.1120619773864746,
"learning_rate": 3.960892420986177e-07,
"logits/chosen": -3.240222930908203,
"logits/rejected": -3.24595046043396,
"loss": 1.3703,
"step": 247
},
{
"beta_dpo/beta": 0.16819608211517334,
"beta_dpo/beta_margin_grad_mean": -0.32370662689208984,
"beta_dpo/beta_margin_grad_std": 0.2876364588737488,
"beta_dpo/beta_margin_mean": 4.243810176849365,
"beta_dpo/beta_margin_std": 8.333431243896484,
"beta_dpo/beta_used": 0.16819608211517334,
"beta_dpo/beta_used_raw": 0.15640771389007568,
"beta_dpo/gap_mean": 21.29971694946289,
"beta_dpo/gap_std": 28.30411720275879,
"beta_dpo/loss_margin_mean": 22.132286071777344,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3749055177626606,
"grad_norm": 158.7293243408203,
"learning_rate": 3.9501422739279953e-07,
"logits/chosen": -3.2170867919921875,
"logits/rejected": -3.1951141357421875,
"loss": 0.8385,
"step": 248
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4974122941493988,
"beta_dpo/beta_margin_grad_std": 0.007395448163151741,
"beta_dpo/beta_margin_mean": 0.010353420861065388,
"beta_dpo/beta_margin_std": 0.029589040204882622,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.43927276134490967,
"beta_dpo/gap_mean": 19.619159698486328,
"beta_dpo/gap_std": 28.558395385742188,
"beta_dpo/loss_margin_mean": 10.35342025756836,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3764172335600907,
"grad_norm": 1.9444804191589355,
"learning_rate": 3.9393515632731094e-07,
"logits/chosen": -3.195737361907959,
"logits/rejected": -3.178128957748413,
"loss": 1.3758,
"step": 249
},
{
"beta_dpo/beta": 0.6326093673706055,
"beta_dpo/beta_margin_grad_mean": -0.18713483214378357,
"beta_dpo/beta_margin_grad_std": 0.357028067111969,
"beta_dpo/beta_margin_mean": 17.81401252746582,
"beta_dpo/beta_margin_std": 19.00658416748047,
"beta_dpo/beta_used": 0.6326093673706055,
"beta_dpo/beta_used_raw": 0.6326093673706055,
"beta_dpo/gap_mean": 20.576045989990234,
"beta_dpo/gap_std": 28.874229431152344,
"beta_dpo/loss_margin_mean": 27.96128273010254,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3779289493575208,
"grad_norm": 770.96875,
"learning_rate": 3.9285205908608934e-07,
"logits/chosen": -3.1680002212524414,
"logits/rejected": -3.1686441898345947,
"loss": 0.6203,
"step": 250
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4964104890823364,
"beta_dpo/beta_margin_grad_std": 0.007367901504039764,
"beta_dpo/beta_margin_mean": 0.014362086541950703,
"beta_dpo/beta_margin_std": 0.02948029339313507,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.33875617384910583,
"beta_dpo/gap_mean": 20.447429656982422,
"beta_dpo/gap_std": 28.979272842407227,
"beta_dpo/loss_margin_mean": 14.362086296081543,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3794406651549509,
"grad_norm": 2.251450300216675,
"learning_rate": 3.9176496596569265e-07,
"logits/chosen": -3.1883554458618164,
"logits/rejected": -3.1973702907562256,
"loss": 1.3733,
"step": 251
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4954353868961334,
"beta_dpo/beta_margin_grad_std": 0.008493933826684952,
"beta_dpo/beta_margin_mean": 0.018263807520270348,
"beta_dpo/beta_margin_std": 0.033991072326898575,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.20695188641548157,
"beta_dpo/gap_mean": 19.295875549316406,
"beta_dpo/gap_std": 29.833553314208984,
"beta_dpo/loss_margin_mean": 18.26380729675293,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.38095238095238093,
"grad_norm": 1.9482089281082153,
"learning_rate": 3.9067390737445254e-07,
"logits/chosen": -3.1611921787261963,
"logits/rejected": -3.1702466011047363,
"loss": 1.3723,
"step": 252
},
{
"beta_dpo/beta": 0.15635497868061066,
"beta_dpo/beta_margin_grad_mean": -0.36020636558532715,
"beta_dpo/beta_margin_grad_std": 0.2878256142139435,
"beta_dpo/beta_margin_mean": 3.688851833343506,
"beta_dpo/beta_margin_std": 8.520410537719727,
"beta_dpo/beta_used": 0.15635497868061066,
"beta_dpo/beta_used_raw": 0.11544579267501831,
"beta_dpo/gap_mean": 19.734420776367188,
"beta_dpo/gap_std": 30.259693145751953,
"beta_dpo/loss_margin_mean": 19.922733306884766,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.382464096749811,
"grad_norm": 350.47784423828125,
"learning_rate": 3.8957891383162304e-07,
"logits/chosen": -3.1174440383911133,
"logits/rejected": -3.1282904148101807,
"loss": 1.8085,
"step": 253
},
{
"beta_dpo/beta": 0.08370675146579742,
"beta_dpo/beta_margin_grad_mean": -0.3806697130203247,
"beta_dpo/beta_margin_grad_std": 0.2770395576953888,
"beta_dpo/beta_margin_mean": 2.0539305210113525,
"beta_dpo/beta_margin_std": 4.707830905914307,
"beta_dpo/beta_used": 0.08370675146579742,
"beta_dpo/beta_used_raw": -0.06456176191568375,
"beta_dpo/gap_mean": 19.630905151367188,
"beta_dpo/gap_std": 30.923583984375,
"beta_dpo/loss_margin_mean": 18.080299377441406,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3839758125472411,
"grad_norm": 216.11676025390625,
"learning_rate": 3.884800159665276e-07,
"logits/chosen": -3.170478343963623,
"logits/rejected": -3.1861348152160645,
"loss": 1.1379,
"step": 254
},
{
"beta_dpo/beta": 0.22263258695602417,
"beta_dpo/beta_margin_grad_mean": -0.32336732745170593,
"beta_dpo/beta_margin_grad_std": 0.28287842869758606,
"beta_dpo/beta_margin_mean": 7.826587677001953,
"beta_dpo/beta_margin_std": 14.295151710510254,
"beta_dpo/beta_used": 0.22263258695602417,
"beta_dpo/beta_used_raw": 0.11471735686063766,
"beta_dpo/gap_mean": 20.62637710571289,
"beta_dpo/gap_std": 32.07659912109375,
"beta_dpo/loss_margin_mean": 26.220294952392578,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3854875283446712,
"grad_norm": 650.1557006835938,
"learning_rate": 3.873772445177015e-07,
"logits/chosen": -3.1712052822113037,
"logits/rejected": -3.179037094116211,
"loss": 2.1495,
"step": 255
},
{
"beta_dpo/beta": 0.16558979451656342,
"beta_dpo/beta_margin_grad_mean": -0.3782199025154114,
"beta_dpo/beta_margin_grad_std": 0.307743638753891,
"beta_dpo/beta_margin_mean": 4.105532169342041,
"beta_dpo/beta_margin_std": 8.570287704467773,
"beta_dpo/beta_used": 0.16558979451656342,
"beta_dpo/beta_used_raw": 0.1294323205947876,
"beta_dpo/gap_mean": 21.077714920043945,
"beta_dpo/gap_std": 32.54633331298828,
"beta_dpo/loss_margin_mean": 24.78595542907715,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3869992441421013,
"grad_norm": 582.988037109375,
"learning_rate": 3.862706303320329e-07,
"logits/chosen": -3.144796848297119,
"logits/rejected": -3.1616220474243164,
"loss": 2.1925,
"step": 256
},
{
"beta_dpo/beta": 0.2947536110877991,
"beta_dpo/beta_margin_grad_mean": -0.17713895440101624,
"beta_dpo/beta_margin_grad_std": 0.3150961697101593,
"beta_dpo/beta_margin_mean": 8.477952003479004,
"beta_dpo/beta_margin_std": 12.130805015563965,
"beta_dpo/beta_used": 0.2947536110877991,
"beta_dpo/beta_used_raw": 0.2947536110877991,
"beta_dpo/gap_mean": 22.49274444580078,
"beta_dpo/gap_std": 33.52637481689453,
"beta_dpo/loss_margin_mean": 29.009363174438477,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3885109599395314,
"grad_norm": 413.6116027832031,
"learning_rate": 3.851602043638994e-07,
"logits/chosen": -3.1539788246154785,
"logits/rejected": -3.1706466674804688,
"loss": 0.7214,
"step": 257
},
{
"beta_dpo/beta": 0.2633950710296631,
"beta_dpo/beta_margin_grad_mean": -0.29857465624809265,
"beta_dpo/beta_margin_grad_std": 0.2786755859851837,
"beta_dpo/beta_margin_mean": 8.789876937866211,
"beta_dpo/beta_margin_std": 13.870450019836426,
"beta_dpo/beta_used": 0.2633950710296631,
"beta_dpo/beta_used_raw": -0.04980570077896118,
"beta_dpo/gap_mean": 23.006622314453125,
"beta_dpo/gap_std": 33.62416076660156,
"beta_dpo/loss_margin_mean": 26.75441551208496,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3900226757369615,
"grad_norm": 657.8723754882812,
"learning_rate": 3.840459976743023e-07,
"logits/chosen": -3.138174295425415,
"logits/rejected": -3.165945053100586,
"loss": 1.6342,
"step": 258
},
{
"beta_dpo/beta": 0.34684550762176514,
"beta_dpo/beta_margin_grad_mean": -0.20287807285785675,
"beta_dpo/beta_margin_grad_std": 0.3509141206741333,
"beta_dpo/beta_margin_mean": 11.281720161437988,
"beta_dpo/beta_margin_std": 12.954914093017578,
"beta_dpo/beta_used": 0.34684550762176514,
"beta_dpo/beta_used_raw": 0.34684550762176514,
"beta_dpo/gap_mean": 24.934921264648438,
"beta_dpo/gap_std": 33.59513473510742,
"beta_dpo/loss_margin_mean": 31.152149200439453,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3915343915343915,
"grad_norm": 723.6175537109375,
"learning_rate": 3.8292804142999796e-07,
"logits/chosen": -3.1428725719451904,
"logits/rejected": -3.1634674072265625,
"loss": 1.524,
"step": 259
},
{
"beta_dpo/beta": 0.049117155373096466,
"beta_dpo/beta_margin_grad_mean": -0.3828244209289551,
"beta_dpo/beta_margin_grad_std": 0.2421586811542511,
"beta_dpo/beta_margin_mean": 1.1651880741119385,
"beta_dpo/beta_margin_std": 2.5332014560699463,
"beta_dpo/beta_used": 0.049117155373096466,
"beta_dpo/beta_used_raw": -0.10537585616111755,
"beta_dpo/gap_mean": 25.15224266052246,
"beta_dpo/gap_std": 33.792938232421875,
"beta_dpo/loss_margin_mean": 25.78077507019043,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3930461073318216,
"grad_norm": 107.15544128417969,
"learning_rate": 3.818063669026256e-07,
"logits/chosen": -3.123897075653076,
"logits/rejected": -3.1420979499816895,
"loss": 1.153,
"step": 260
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49438992142677307,
"beta_dpo/beta_margin_grad_std": 0.007078561000525951,
"beta_dpo/beta_margin_mean": 0.022446028888225555,
"beta_dpo/beta_margin_std": 0.028323406353592873,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.12177471816539764,
"beta_dpo/gap_mean": 24.7503662109375,
"beta_dpo/gap_std": 33.19297409057617,
"beta_dpo/loss_margin_mean": 22.446027755737305,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3945578231292517,
"grad_norm": 2.29372501373291,
"learning_rate": 3.806810054678331e-07,
"logits/chosen": -3.1444356441497803,
"logits/rejected": -3.134612560272217,
"loss": 1.3655,
"step": 261
},
{
"beta_dpo/beta": 0.14830927550792694,
"beta_dpo/beta_margin_grad_mean": -0.35249003767967224,
"beta_dpo/beta_margin_grad_std": 0.2969633638858795,
"beta_dpo/beta_margin_mean": 3.340433120727539,
"beta_dpo/beta_margin_std": 6.50543737411499,
"beta_dpo/beta_used": 0.14830927550792694,
"beta_dpo/beta_used_raw": 0.07512392103672028,
"beta_dpo/gap_mean": 24.296005249023438,
"beta_dpo/gap_std": 32.12373352050781,
"beta_dpo/loss_margin_mean": 22.67593765258789,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3960695389266818,
"grad_norm": 253.0749969482422,
"learning_rate": 3.7955198860439887e-07,
"logits/chosen": -3.1409544944763184,
"logits/rejected": -3.160794258117676,
"loss": 1.0158,
"step": 262
},
{
"beta_dpo/beta": 0.002609849674627185,
"beta_dpo/beta_margin_grad_mean": -0.4891809821128845,
"beta_dpo/beta_margin_grad_std": 0.023940352723002434,
"beta_dpo/beta_margin_mean": 0.04340985417366028,
"beta_dpo/beta_margin_std": 0.09623526781797409,
"beta_dpo/beta_used": 0.002609849674627185,
"beta_dpo/beta_used_raw": -0.1014128252863884,
"beta_dpo/gap_mean": 23.623592376708984,
"beta_dpo/gap_std": 31.66000747680664,
"beta_dpo/loss_margin_mean": 19.01215934753418,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3975812547241119,
"grad_norm": 6.745489120483398,
"learning_rate": 3.784193478933516e-07,
"logits/chosen": -3.1504030227661133,
"logits/rejected": -3.1817703247070312,
"loss": 1.3338,
"step": 263
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49462515115737915,
"beta_dpo/beta_margin_grad_std": 0.007510695606470108,
"beta_dpo/beta_margin_mean": 0.021505767479538918,
"beta_dpo/beta_margin_std": 0.03005310706794262,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.3539937138557434,
"beta_dpo/gap_mean": 22.857873916625977,
"beta_dpo/gap_std": 31.427021026611328,
"beta_dpo/loss_margin_mean": 21.505767822265625,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.39909297052154197,
"grad_norm": 2.348557472229004,
"learning_rate": 3.7728311501708674e-07,
"logits/chosen": -3.163296699523926,
"logits/rejected": -3.170408248901367,
"loss": 1.3712,
"step": 264
},
{
"beta_dpo/beta": 0.3959474563598633,
"beta_dpo/beta_margin_grad_mean": -0.26147815585136414,
"beta_dpo/beta_margin_grad_std": 0.40265628695487976,
"beta_dpo/beta_margin_mean": 9.483842849731445,
"beta_dpo/beta_margin_std": 16.816068649291992,
"beta_dpo/beta_used": 0.3959474563598633,
"beta_dpo/beta_used_raw": 0.3959474563598633,
"beta_dpo/gap_mean": 22.634410858154297,
"beta_dpo/gap_std": 31.731470108032227,
"beta_dpo/loss_margin_mean": 22.369054794311523,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.40060468631897206,
"grad_norm": 776.9564208984375,
"learning_rate": 3.7614332175848027e-07,
"logits/chosen": -3.119457244873047,
"logits/rejected": -3.1327133178710938,
"loss": 2.2347,
"step": 265
},
{
"beta_dpo/beta": 0.005646655801683664,
"beta_dpo/beta_margin_grad_mean": -0.47482630610466003,
"beta_dpo/beta_margin_grad_std": 0.06348370015621185,
"beta_dpo/beta_margin_mean": 0.10376403480768204,
"beta_dpo/beta_margin_std": 0.26494264602661133,
"beta_dpo/beta_used": 0.005646655801683664,
"beta_dpo/beta_used_raw": -0.04304119572043419,
"beta_dpo/gap_mean": 22.353633880615234,
"beta_dpo/gap_std": 31.866960525512695,
"beta_dpo/loss_margin_mean": 19.274166107177734,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4021164021164021,
"grad_norm": 15.863897323608398,
"learning_rate": 3.75e-07,
"logits/chosen": -3.150226354598999,
"logits/rejected": -3.180527687072754,
"loss": 1.283,
"step": 266
},
{
"beta_dpo/beta": 0.05279780179262161,
"beta_dpo/beta_margin_grad_mean": -0.35302603244781494,
"beta_dpo/beta_margin_grad_std": 0.21988117694854736,
"beta_dpo/beta_margin_mean": 1.6686517000198364,
"beta_dpo/beta_margin_std": 2.897858142852783,
"beta_dpo/beta_used": 0.05279780179262161,
"beta_dpo/beta_used_raw": -0.19854456186294556,
"beta_dpo/gap_mean": 21.90056610107422,
"beta_dpo/gap_std": 31.87493324279785,
"beta_dpo/loss_margin_mean": 23.186372756958008,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4036281179138322,
"grad_norm": 69.50460815429688,
"learning_rate": 3.738531817228131e-07,
"logits/chosen": -3.114777088165283,
"logits/rejected": -3.127145290374756,
"loss": 1.0439,
"step": 267
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4949630796909332,
"beta_dpo/beta_margin_grad_std": 0.007268788758665323,
"beta_dpo/beta_margin_mean": 0.02015492133796215,
"beta_dpo/beta_margin_std": 0.029093481600284576,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.20411354303359985,
"beta_dpo/gap_mean": 21.996845245361328,
"beta_dpo/gap_std": 31.68831443786621,
"beta_dpo/loss_margin_mean": 20.15492057800293,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4051398337112623,
"grad_norm": 2.0732991695404053,
"learning_rate": 3.7270289900589204e-07,
"logits/chosen": -3.1337013244628906,
"logits/rejected": -3.14176607131958,
"loss": 1.3696,
"step": 268
},
{
"beta_dpo/beta": 0.13603897392749786,
"beta_dpo/beta_margin_grad_mean": -0.33785995841026306,
"beta_dpo/beta_margin_grad_std": 0.27836501598358154,
"beta_dpo/beta_margin_mean": 3.901336908340454,
"beta_dpo/beta_margin_std": 6.962329387664795,
"beta_dpo/beta_used": 0.13603897392749786,
"beta_dpo/beta_used_raw": -0.08372128009796143,
"beta_dpo/gap_mean": 21.401334762573242,
"beta_dpo/gap_std": 31.724838256835938,
"beta_dpo/loss_margin_mean": 20.39788246154785,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.40665154950869237,
"grad_norm": 313.16552734375,
"learning_rate": 3.7154918402511714e-07,
"logits/chosen": -3.145228862762451,
"logits/rejected": -3.1478958129882812,
"loss": 1.3126,
"step": 269
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49513691663742065,
"beta_dpo/beta_margin_grad_std": 0.007251319475471973,
"beta_dpo/beta_margin_mean": 0.019458677619695663,
"beta_dpo/beta_margin_std": 0.02901856042444706,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.1537623405456543,
"beta_dpo/gap_mean": 21.666542053222656,
"beta_dpo/gap_std": 31.45153045654297,
"beta_dpo/loss_margin_mean": 19.458675384521484,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.40816326530612246,
"grad_norm": 2.4263079166412354,
"learning_rate": 3.7039206905237656e-07,
"logits/chosen": -3.1230382919311523,
"logits/rejected": -3.15578293800354,
"loss": 1.3691,
"step": 270
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49473121762275696,
"beta_dpo/beta_margin_grad_std": 0.009824409149587154,
"beta_dpo/beta_margin_mean": 0.021087976172566414,
"beta_dpo/beta_margin_std": 0.03932539001107216,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.3831254541873932,
"beta_dpo/gap_mean": 21.330623626708984,
"beta_dpo/gap_std": 32.055625915527344,
"beta_dpo/loss_margin_mean": 21.087974548339844,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.40967498110355255,
"grad_norm": 2.2488882541656494,
"learning_rate": 3.692315864546635e-07,
"logits/chosen": -3.1240901947021484,
"logits/rejected": -3.150813579559326,
"loss": 1.3732,
"step": 271
},
{
"beta_dpo/beta": 0.5425400733947754,
"beta_dpo/beta_margin_grad_mean": -0.13226215541362762,
"beta_dpo/beta_margin_grad_std": 0.2805606424808502,
"beta_dpo/beta_margin_mean": 17.538484573364258,
"beta_dpo/beta_margin_std": 15.896415710449219,
"beta_dpo/beta_used": 0.5425400733947754,
"beta_dpo/beta_used_raw": 0.5425400733947754,
"beta_dpo/gap_mean": 22.86958122253418,
"beta_dpo/gap_std": 32.0460205078125,
"beta_dpo/loss_margin_mean": 32.03363037109375,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.41118669690098264,
"grad_norm": 575.8597412109375,
"learning_rate": 3.6806776869317067e-07,
"logits/chosen": -3.1413285732269287,
"logits/rejected": -3.1322383880615234,
"loss": 0.9124,
"step": 272
},
{
"beta_dpo/beta": 0.025581976398825645,
"beta_dpo/beta_margin_grad_mean": -0.38602912425994873,
"beta_dpo/beta_margin_grad_std": 0.19099541008472443,
"beta_dpo/beta_margin_mean": 0.7089114189147949,
"beta_dpo/beta_margin_std": 1.3611912727355957,
"beta_dpo/beta_used": 0.025581976398825645,
"beta_dpo/beta_used_raw": -0.055108197033405304,
"beta_dpo/gap_mean": 23.795854568481445,
"beta_dpo/gap_std": 32.6051139831543,
"beta_dpo/loss_margin_mean": 26.4256649017334,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4126984126984127,
"grad_norm": 50.3040885925293,
"learning_rate": 3.669006483223828e-07,
"logits/chosen": -3.1269006729125977,
"logits/rejected": -3.153369426727295,
"loss": 1.0815,
"step": 273
},
{
"beta_dpo/beta": 0.10980037599802017,
"beta_dpo/beta_margin_grad_mean": -0.33080747723579407,
"beta_dpo/beta_margin_grad_std": 0.2663941979408264,
"beta_dpo/beta_margin_mean": 3.319112539291382,
"beta_dpo/beta_margin_std": 6.285099983215332,
"beta_dpo/beta_used": 0.10980037599802017,
"beta_dpo/beta_used_raw": 0.0991683080792427,
"beta_dpo/gap_mean": 23.904796600341797,
"beta_dpo/gap_std": 33.53688049316406,
"beta_dpo/loss_margin_mean": 25.596914291381836,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.41421012849584277,
"grad_norm": 172.63458251953125,
"learning_rate": 3.657302579891656e-07,
"logits/chosen": -3.1327056884765625,
"logits/rejected": -3.134082794189453,
"loss": 0.9537,
"step": 274
},
{
"beta_dpo/beta": 0.04865710437297821,
"beta_dpo/beta_margin_grad_mean": -0.35247406363487244,
"beta_dpo/beta_margin_grad_std": 0.22889384627342224,
"beta_dpo/beta_margin_mean": 1.5195780992507935,
"beta_dpo/beta_margin_std": 2.562974452972412,
"beta_dpo/beta_used": 0.04865710437297821,
"beta_dpo/beta_used_raw": 0.03514295443892479,
"beta_dpo/gap_mean": 24.787761688232422,
"beta_dpo/gap_std": 33.226646423339844,
"beta_dpo/loss_margin_mean": 28.79128074645996,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.41572184429327286,
"grad_norm": 89.05858612060547,
"learning_rate": 3.645566304318526e-07,
"logits/chosen": -3.131340742111206,
"logits/rejected": -3.1462135314941406,
"loss": 0.9959,
"step": 275
},
{
"beta_dpo/beta": 0.2305426448583603,
"beta_dpo/beta_margin_grad_mean": -0.35385870933532715,
"beta_dpo/beta_margin_grad_std": 0.2957158386707306,
"beta_dpo/beta_margin_mean": 6.705293655395508,
"beta_dpo/beta_margin_std": 12.562973976135254,
"beta_dpo/beta_used": 0.2305426448583603,
"beta_dpo/beta_used_raw": 0.22652789950370789,
"beta_dpo/gap_mean": 25.334609985351562,
"beta_dpo/gap_std": 33.08951187133789,
"beta_dpo/loss_margin_mean": 25.393156051635742,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.41723356009070295,
"grad_norm": 458.3935241699219,
"learning_rate": 3.633797984793294e-07,
"logits/chosen": -3.112989664077759,
"logits/rejected": -3.117621660232544,
"loss": 1.2243,
"step": 276
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49625149369239807,
"beta_dpo/beta_margin_grad_std": 0.009044786915183067,
"beta_dpo/beta_margin_mean": 0.014998854137957096,
"beta_dpo/beta_margin_std": 0.036193422973155975,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.21672311425209045,
"beta_dpo/gap_mean": 23.66399383544922,
"beta_dpo/gap_std": 33.6298713684082,
"beta_dpo/loss_margin_mean": 14.998852729797363,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.41874527588813304,
"grad_norm": 2.0547492504119873,
"learning_rate": 3.6219979505011555e-07,
"logits/chosen": -3.115434169769287,
"logits/rejected": -3.1027560234069824,
"loss": 1.3682,
"step": 277
},
{
"beta_dpo/beta": 0.2880277633666992,
"beta_dpo/beta_margin_grad_mean": -0.3204805850982666,
"beta_dpo/beta_margin_grad_std": 0.29095593094825745,
"beta_dpo/beta_margin_mean": 8.897863388061523,
"beta_dpo/beta_margin_std": 15.400246620178223,
"beta_dpo/beta_used": 0.2880277633666992,
"beta_dpo/beta_used_raw": 0.2747938930988312,
"beta_dpo/gap_mean": 22.691707611083984,
"beta_dpo/gap_std": 33.781612396240234,
"beta_dpo/loss_margin_mean": 22.27745819091797,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.42025699168556313,
"grad_norm": 1209.379150390625,
"learning_rate": 3.6101665315144353e-07,
"logits/chosen": -3.124952554702759,
"logits/rejected": -3.1554203033447266,
"loss": 1.7403,
"step": 278
},
{
"beta_dpo/beta": 0.2570856809616089,
"beta_dpo/beta_margin_grad_mean": -0.19151908159255981,
"beta_dpo/beta_margin_grad_std": 0.2793121933937073,
"beta_dpo/beta_margin_mean": 8.134492874145508,
"beta_dpo/beta_margin_std": 10.537945747375488,
"beta_dpo/beta_used": 0.2570856809616089,
"beta_dpo/beta_used_raw": 0.2570856809616089,
"beta_dpo/gap_mean": 23.83153533935547,
"beta_dpo/gap_std": 33.45591735839844,
"beta_dpo/loss_margin_mean": 29.187664031982422,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4217687074829932,
"grad_norm": 237.06866455078125,
"learning_rate": 3.5983040587833563e-07,
"logits/chosen": -3.129159450531006,
"logits/rejected": -3.1331429481506348,
"loss": 0.574,
"step": 279
},
{
"beta_dpo/beta": 0.16108359396457672,
"beta_dpo/beta_margin_grad_mean": -0.2740139365196228,
"beta_dpo/beta_margin_grad_std": 0.26515132188796997,
"beta_dpo/beta_margin_mean": 4.792131423950195,
"beta_dpo/beta_margin_std": 7.665543079376221,
"beta_dpo/beta_used": 0.16108359396457672,
"beta_dpo/beta_used_raw": 0.16108359396457672,
"beta_dpo/gap_mean": 24.996429443359375,
"beta_dpo/gap_std": 33.09587860107422,
"beta_dpo/loss_margin_mean": 28.564146041870117,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.42328042328042326,
"grad_norm": 321.95233154296875,
"learning_rate": 3.586410864126781e-07,
"logits/chosen": -3.11377215385437,
"logits/rejected": -3.1314854621887207,
"loss": 1.1929,
"step": 280
},
{
"beta_dpo/beta": 0.1646243929862976,
"beta_dpo/beta_margin_grad_mean": -0.2770417332649231,
"beta_dpo/beta_margin_grad_std": 0.2952728867530823,
"beta_dpo/beta_margin_mean": 4.605935096740723,
"beta_dpo/beta_margin_std": 8.153287887573242,
"beta_dpo/beta_used": 0.1646243929862976,
"beta_dpo/beta_used_raw": 0.1646243929862976,
"beta_dpo/gap_mean": 25.285266876220703,
"beta_dpo/gap_std": 33.30162811279297,
"beta_dpo/loss_margin_mean": 26.789043426513672,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.42479213907785335,
"grad_norm": 241.8627471923828,
"learning_rate": 3.574487280222929e-07,
"logits/chosen": -3.115567684173584,
"logits/rejected": -3.0991907119750977,
"loss": 0.8431,
"step": 281
},
{
"beta_dpo/beta": 0.05279150977730751,
"beta_dpo/beta_margin_grad_mean": -0.3536136746406555,
"beta_dpo/beta_margin_grad_std": 0.24429401755332947,
"beta_dpo/beta_margin_mean": 1.628803014755249,
"beta_dpo/beta_margin_std": 3.227506637573242,
"beta_dpo/beta_used": 0.05279150977730751,
"beta_dpo/beta_used_raw": 0.018425598740577698,
"beta_dpo/gap_mean": 25.778846740722656,
"beta_dpo/gap_std": 33.7174072265625,
"beta_dpo/loss_margin_mean": 28.50662612915039,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.42630385487528344,
"grad_norm": 129.214111328125,
"learning_rate": 3.562533640600075e-07,
"logits/chosen": -3.127708673477173,
"logits/rejected": -3.144028663635254,
"loss": 1.1211,
"step": 282
},
{
"beta_dpo/beta": 0.037356920540332794,
"beta_dpo/beta_margin_grad_mean": -0.38511744141578674,
"beta_dpo/beta_margin_grad_std": 0.23253542184829712,
"beta_dpo/beta_margin_mean": 0.8937662243843079,
"beta_dpo/beta_margin_std": 1.8212085962295532,
"beta_dpo/beta_used": 0.037356920540332794,
"beta_dpo/beta_used_raw": -0.00982586294412613,
"beta_dpo/gap_mean": 25.79236602783203,
"beta_dpo/gap_std": 33.630287170410156,
"beta_dpo/loss_margin_mean": 24.450763702392578,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.42781557067271353,
"grad_norm": 77.45944213867188,
"learning_rate": 3.550550279627215e-07,
"logits/chosen": -3.1351306438446045,
"logits/rejected": -3.1759040355682373,
"loss": 1.0516,
"step": 283
},
{
"beta_dpo/beta": 0.23237170279026031,
"beta_dpo/beta_margin_grad_mean": -0.2390763908624649,
"beta_dpo/beta_margin_grad_std": 0.34266528487205505,
"beta_dpo/beta_margin_mean": 5.581095218658447,
"beta_dpo/beta_margin_std": 8.388540267944336,
"beta_dpo/beta_used": 0.23237170279026031,
"beta_dpo/beta_used_raw": 0.23237170279026031,
"beta_dpo/gap_mean": 25.81268310546875,
"beta_dpo/gap_std": 33.67734146118164,
"beta_dpo/loss_margin_mean": 24.30912208557129,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4293272864701436,
"grad_norm": 394.56317138671875,
"learning_rate": 3.5385375325047163e-07,
"logits/chosen": -3.1143360137939453,
"logits/rejected": -3.1349940299987793,
"loss": 0.8281,
"step": 284
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4946158826351166,
"beta_dpo/beta_margin_grad_std": 0.008639446459710598,
"beta_dpo/beta_margin_mean": 0.02154547907412052,
"beta_dpo/beta_margin_std": 0.03457494452595711,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.3157495856285095,
"beta_dpo/gap_mean": 24.874420166015625,
"beta_dpo/gap_std": 33.82136535644531,
"beta_dpo/loss_margin_mean": 21.54547882080078,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4308390022675737,
"grad_norm": 2.731858968734741,
"learning_rate": 3.5264957352549375e-07,
"logits/chosen": -3.119109630584717,
"logits/rejected": -3.1246259212493896,
"loss": 1.3686,
"step": 285
},
{
"beta_dpo/beta": 0.3855738937854767,
"beta_dpo/beta_margin_grad_mean": -0.16172371804714203,
"beta_dpo/beta_margin_grad_std": 0.32219523191452026,
"beta_dpo/beta_margin_mean": 14.698241233825684,
"beta_dpo/beta_margin_std": 21.05354118347168,
"beta_dpo/beta_used": 0.3855738937854767,
"beta_dpo/beta_used_raw": 0.3855738937854767,
"beta_dpo/gap_mean": 25.75153160095215,
"beta_dpo/gap_std": 34.764095306396484,
"beta_dpo/loss_margin_mean": 34.22718048095703,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4323507180650038,
"grad_norm": 403.9075012207031,
"learning_rate": 3.514425224712835e-07,
"logits/chosen": -3.1518945693969727,
"logits/rejected": -3.187169075012207,
"loss": 0.7101,
"step": 286
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49369317293167114,
"beta_dpo/beta_margin_grad_std": 0.00864699762314558,
"beta_dpo/beta_margin_mean": 0.025238126516342163,
"beta_dpo/beta_margin_std": 0.03460447117686272,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.18191684782505035,
"beta_dpo/gap_mean": 26.148000717163086,
"beta_dpo/gap_std": 35.01335144042969,
"beta_dpo/loss_margin_mean": 25.23812484741211,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.43386243386243384,
"grad_norm": 1.7744338512420654,
"learning_rate": 3.502326338516534e-07,
"logits/chosen": -3.075338840484619,
"logits/rejected": -3.087643623352051,
"loss": 1.3652,
"step": 287
},
{
"beta_dpo/beta": 0.07703159749507904,
"beta_dpo/beta_margin_grad_mean": -0.3526802957057953,
"beta_dpo/beta_margin_grad_std": 0.26365530490875244,
"beta_dpo/beta_margin_mean": 1.9095759391784668,
"beta_dpo/beta_margin_std": 3.8526155948638916,
"beta_dpo/beta_used": 0.07703159749507904,
"beta_dpo/beta_used_raw": 0.07703159749507904,
"beta_dpo/gap_mean": 25.93368148803711,
"beta_dpo/gap_std": 35.061134338378906,
"beta_dpo/loss_margin_mean": 23.770599365234375,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.43537414965986393,
"grad_norm": 240.46803283691406,
"learning_rate": 3.490199415097892e-07,
"logits/chosen": -3.124175548553467,
"logits/rejected": -3.1412646770477295,
"loss": 1.1649,
"step": 288
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49461233615875244,
"beta_dpo/beta_margin_grad_std": 0.008139989338815212,
"beta_dpo/beta_margin_mean": 0.021558493375778198,
"beta_dpo/beta_margin_std": 0.03257429599761963,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.16580015420913696,
"beta_dpo/gap_mean": 25.205562591552734,
"beta_dpo/gap_std": 34.782997131347656,
"beta_dpo/loss_margin_mean": 21.558490753173828,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.436885865457294,
"grad_norm": 2.0100018978118896,
"learning_rate": 3.4780447936730247e-07,
"logits/chosen": -3.0957653522491455,
"logits/rejected": -3.1041059494018555,
"loss": 1.3658,
"step": 289
},
{
"beta_dpo/beta": 0.08695121854543686,
"beta_dpo/beta_margin_grad_mean": -0.2400132119655609,
"beta_dpo/beta_margin_grad_std": 0.26995664834976196,
"beta_dpo/beta_margin_mean": 2.7945497035980225,
"beta_dpo/beta_margin_std": 3.4169130325317383,
"beta_dpo/beta_used": 0.08695121854543686,
"beta_dpo/beta_used_raw": 0.08695121854543686,
"beta_dpo/gap_mean": 26.052141189575195,
"beta_dpo/gap_std": 34.53221893310547,
"beta_dpo/loss_margin_mean": 31.12602424621582,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4383975812547241,
"grad_norm": 127.68869018554688,
"learning_rate": 3.465862814232821e-07,
"logits/chosen": -3.113938808441162,
"logits/rejected": -3.1347272396087646,
"loss": 0.7997,
"step": 290
},
{
"beta_dpo/beta": 0.39539340138435364,
"beta_dpo/beta_margin_grad_mean": -0.17899632453918457,
"beta_dpo/beta_margin_grad_std": 0.31650540232658386,
"beta_dpo/beta_margin_mean": 13.287332534790039,
"beta_dpo/beta_margin_std": 20.05366325378418,
"beta_dpo/beta_used": 0.39539340138435364,
"beta_dpo/beta_used_raw": 0.39539340138435364,
"beta_dpo/gap_mean": 26.921337127685547,
"beta_dpo/gap_std": 35.714210510253906,
"beta_dpo/loss_margin_mean": 32.074607849121094,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4399092970521542,
"grad_norm": 124.12476348876953,
"learning_rate": 3.4536538175334343e-07,
"logits/chosen": -3.1052820682525635,
"logits/rejected": -3.1392335891723633,
"loss": 0.3092,
"step": 291
},
{
"beta_dpo/beta": 0.2568642795085907,
"beta_dpo/beta_margin_grad_mean": -0.32715895771980286,
"beta_dpo/beta_margin_grad_std": 0.28328001499176025,
"beta_dpo/beta_margin_mean": 7.2955803871154785,
"beta_dpo/beta_margin_std": 13.455894470214844,
"beta_dpo/beta_used": 0.2568642795085907,
"beta_dpo/beta_used_raw": 0.14661070704460144,
"beta_dpo/gap_mean": 26.693593978881836,
"beta_dpo/gap_std": 35.771141052246094,
"beta_dpo/loss_margin_mean": 24.197370529174805,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4414210128495843,
"grad_norm": 232.77931213378906,
"learning_rate": 3.4414181450867465e-07,
"logits/chosen": -3.114081382751465,
"logits/rejected": -3.1203083992004395,
"loss": 0.8163,
"step": 292
},
{
"beta_dpo/beta": 0.16329149901866913,
"beta_dpo/beta_margin_grad_mean": -0.21914827823638916,
"beta_dpo/beta_margin_grad_std": 0.32468345761299133,
"beta_dpo/beta_margin_mean": 4.863969802856445,
"beta_dpo/beta_margin_std": 6.511205673217773,
"beta_dpo/beta_used": 0.16329149901866913,
"beta_dpo/beta_used_raw": 0.16329149901866913,
"beta_dpo/gap_mean": 27.06631088256836,
"beta_dpo/gap_std": 36.21523666381836,
"beta_dpo/loss_margin_mean": 29.423086166381836,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4429327286470144,
"grad_norm": 210.0855255126953,
"learning_rate": 3.4291561391508185e-07,
"logits/chosen": -3.091078281402588,
"logits/rejected": -3.123889207839966,
"loss": 0.8069,
"step": 293
},
{
"beta_dpo/beta": 0.2324911504983902,
"beta_dpo/beta_margin_grad_mean": -0.290340781211853,
"beta_dpo/beta_margin_grad_std": 0.2663319408893585,
"beta_dpo/beta_margin_mean": 7.494677543640137,
"beta_dpo/beta_margin_std": 11.694056510925293,
"beta_dpo/beta_used": 0.2324911504983902,
"beta_dpo/beta_used_raw": -0.005689352750778198,
"beta_dpo/gap_mean": 27.724380493164062,
"beta_dpo/gap_std": 35.48869705200195,
"beta_dpo/loss_margin_mean": 29.247159957885742,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4444444444444444,
"grad_norm": 25.350112915039062,
"learning_rate": 3.4168681427203153e-07,
"logits/chosen": -3.119723081588745,
"logits/rejected": -3.12868070602417,
"loss": 0.7012,
"step": 294
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4931543469429016,
"beta_dpo/beta_margin_grad_std": 0.007606986910104752,
"beta_dpo/beta_margin_mean": 0.027391238138079643,
"beta_dpo/beta_margin_std": 0.030441265553236008,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.04372384026646614,
"beta_dpo/gap_mean": 27.602527618408203,
"beta_dpo/gap_std": 34.61024475097656,
"beta_dpo/loss_margin_mean": 27.391237258911133,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4459561602418745,
"grad_norm": 2.2545580863952637,
"learning_rate": 3.4045544995169125e-07,
"logits/chosen": -3.111898422241211,
"logits/rejected": -3.147273540496826,
"loss": 1.3614,
"step": 295
},
{
"beta_dpo/beta": 0.3043525218963623,
"beta_dpo/beta_margin_grad_mean": -0.23906980454921722,
"beta_dpo/beta_margin_grad_std": 0.30588892102241516,
"beta_dpo/beta_margin_mean": 9.003376007080078,
"beta_dpo/beta_margin_std": 15.752115249633789,
"beta_dpo/beta_used": 0.3043525218963623,
"beta_dpo/beta_used_raw": 0.3043525218963623,
"beta_dpo/gap_mean": 28.519481658935547,
"beta_dpo/gap_std": 35.11084747314453,
"beta_dpo/loss_margin_mean": 32.819244384765625,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4474678760393046,
"grad_norm": 623.9177856445312,
"learning_rate": 3.392215553979679e-07,
"logits/chosen": -3.124897003173828,
"logits/rejected": -3.137674331665039,
"loss": 1.2411,
"step": 296
},
{
"beta_dpo/beta": 0.23584313690662384,
"beta_dpo/beta_margin_grad_mean": -0.21231801807880402,
"beta_dpo/beta_margin_grad_std": 0.330021470785141,
"beta_dpo/beta_margin_mean": 6.7015581130981445,
"beta_dpo/beta_margin_std": 7.194095611572266,
"beta_dpo/beta_used": 0.23584313690662384,
"beta_dpo/beta_used_raw": 0.23584313690662384,
"beta_dpo/gap_mean": 28.46847152709961,
"beta_dpo/gap_std": 34.36936950683594,
"beta_dpo/loss_margin_mean": 29.151132583618164,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4489795918367347,
"grad_norm": 383.8264465332031,
"learning_rate": 3.3798516512554485e-07,
"logits/chosen": -3.0995192527770996,
"logits/rejected": -3.104422092437744,
"loss": 0.7781,
"step": 297
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49327847361564636,
"beta_dpo/beta_margin_grad_std": 0.00809707585722208,
"beta_dpo/beta_margin_mean": 0.026895053684711456,
"beta_dpo/beta_margin_std": 0.032401006668806076,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.1754533052444458,
"beta_dpo/gap_mean": 28.4078369140625,
"beta_dpo/gap_std": 34.15364456176758,
"beta_dpo/loss_margin_mean": 26.89505386352539,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4504913076341648,
"grad_norm": 2.5030081272125244,
"learning_rate": 3.367463137189156e-07,
"logits/chosen": -3.1021337509155273,
"logits/rejected": -3.110154628753662,
"loss": 1.3628,
"step": 298
},
{
"beta_dpo/beta": 0.027304884046316147,
"beta_dpo/beta_margin_grad_mean": -0.37381458282470703,
"beta_dpo/beta_margin_grad_std": 0.20344248414039612,
"beta_dpo/beta_margin_mean": 0.8136813640594482,
"beta_dpo/beta_margin_std": 1.3851598501205444,
"beta_dpo/beta_used": 0.027304884046316147,
"beta_dpo/beta_used_raw": -0.10461076349020004,
"beta_dpo/gap_mean": 28.38265037536621,
"beta_dpo/gap_std": 34.259788513183594,
"beta_dpo/loss_margin_mean": 27.814064025878906,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4520030234315949,
"grad_norm": 46.56258010864258,
"learning_rate": 3.355050358314172e-07,
"logits/chosen": -3.117727756500244,
"logits/rejected": -3.119908571243286,
"loss": 1.0292,
"step": 299
},
{
"beta_dpo/beta": 0.03998471051454544,
"beta_dpo/beta_margin_grad_mean": -0.3602712154388428,
"beta_dpo/beta_margin_grad_std": 0.21306991577148438,
"beta_dpo/beta_margin_mean": 1.1096277236938477,
"beta_dpo/beta_margin_std": 1.8668972253799438,
"beta_dpo/beta_used": 0.03998471051454544,
"beta_dpo/beta_used_raw": -0.006209302693605423,
"beta_dpo/gap_mean": 27.957683563232422,
"beta_dpo/gap_std": 34.10755157470703,
"beta_dpo/loss_margin_mean": 25.57542610168457,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.45351473922902497,
"grad_norm": 72.46057891845703,
"learning_rate": 3.3426136618426043e-07,
"logits/chosen": -3.077998399734497,
"logits/rejected": -3.09999942779541,
"loss": 0.957,
"step": 300
},
{
"epoch": 0.45351473922902497,
"eval_beta_dpo/beta": 0.008930782787501812,
"eval_beta_dpo/beta_margin_grad_mean": -0.48181334137916565,
"eval_beta_dpo/beta_margin_grad_std": 0.02493375353515148,
"eval_beta_dpo/beta_margin_mean": 0.24725361168384552,
"eval_beta_dpo/beta_margin_std": 0.31334593892097473,
"eval_beta_dpo/beta_used": 0.008930782787501812,
"eval_beta_dpo/beta_used_raw": -0.4499760866165161,
"eval_beta_dpo/gap_mean": 27.71484375,
"eval_beta_dpo/gap_std": 34.235591888427734,
"eval_beta_dpo/loss_margin_mean": 18.54857635498047,
"eval_beta_dpo/mask_keep_frac": 1.0,
"eval_logits/chosen": -3.1462209224700928,
"eval_logits/rejected": -3.1521239280700684,
"eval_loss": 0.6778478622436523,
"eval_runtime": 36.3038,
"eval_samples_per_second": 63.437,
"eval_steps_per_second": 1.983,
"step": 300
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4945734441280365,
"beta_dpo/beta_margin_grad_std": 0.008833258412778378,
"beta_dpo/beta_margin_mean": 0.02171691320836544,
"beta_dpo/beta_margin_std": 0.035356417298316956,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.43761324882507324,
"beta_dpo/gap_mean": 27.043323516845703,
"beta_dpo/gap_std": 34.602012634277344,
"beta_dpo/loss_margin_mean": 21.7169132232666,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.455026455026455,
"grad_norm": 2.3677213191986084,
"learning_rate": 3.3301533956555885e-07,
"logits/chosen": -3.0917038917541504,
"logits/rejected": -3.093848705291748,
"loss": 1.3685,
"step": 301
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4958673417568207,
"beta_dpo/beta_margin_grad_std": 0.00956540647894144,
"beta_dpo/beta_margin_mean": 0.016539258882403374,
"beta_dpo/beta_margin_std": 0.03828737139701843,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.4286280870437622,
"beta_dpo/gap_mean": 25.154075622558594,
"beta_dpo/gap_std": 35.083038330078125,
"beta_dpo/loss_margin_mean": 16.53925895690918,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4565381708238851,
"grad_norm": 2.3276662826538086,
"learning_rate": 3.317669908293554e-07,
"logits/chosen": -3.0932130813598633,
"logits/rejected": -3.1112184524536133,
"loss": 1.3702,
"step": 302
},
{
"beta_dpo/beta": 0.22752873599529266,
"beta_dpo/beta_margin_grad_mean": -0.2653931975364685,
"beta_dpo/beta_margin_grad_std": 0.3641763925552368,
"beta_dpo/beta_margin_mean": 5.339740753173828,
"beta_dpo/beta_margin_std": 10.414875030517578,
"beta_dpo/beta_used": 0.22752873599529266,
"beta_dpo/beta_used_raw": 0.22752873599529266,
"beta_dpo/gap_mean": 25.054428100585938,
"beta_dpo/gap_std": 35.5212287902832,
"beta_dpo/loss_margin_mean": 26.166316986083984,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4580498866213152,
"grad_norm": 575.6218872070312,
"learning_rate": 3.3051635489464793e-07,
"logits/chosen": -3.102856397628784,
"logits/rejected": -3.1255390644073486,
"loss": 1.4269,
"step": 303
},
{
"beta_dpo/beta": 0.2756160795688629,
"beta_dpo/beta_margin_grad_mean": -0.14261303842067719,
"beta_dpo/beta_margin_grad_std": 0.25643348693847656,
"beta_dpo/beta_margin_mean": 8.64633560180664,
"beta_dpo/beta_margin_std": 8.03923511505127,
"beta_dpo/beta_used": 0.2756160795688629,
"beta_dpo/beta_used_raw": 0.2756160795688629,
"beta_dpo/gap_mean": 25.673105239868164,
"beta_dpo/gap_std": 34.77922821044922,
"beta_dpo/loss_margin_mean": 31.018600463867188,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4595616024187453,
"grad_norm": 258.0193786621094,
"learning_rate": 3.292634667444117e-07,
"logits/chosen": -3.096405267715454,
"logits/rejected": -3.104170799255371,
"loss": 0.4785,
"step": 304
},
{
"beta_dpo/beta": 0.024367928504943848,
"beta_dpo/beta_margin_grad_mean": -0.4015085995197296,
"beta_dpo/beta_margin_grad_std": 0.1910669356584549,
"beta_dpo/beta_margin_mean": 0.6269474625587463,
"beta_dpo/beta_margin_std": 1.2529315948486328,
"beta_dpo/beta_used": 0.024367928504943848,
"beta_dpo/beta_used_raw": 0.008822512812912464,
"beta_dpo/gap_mean": 25.67166519165039,
"beta_dpo/gap_std": 34.137535095214844,
"beta_dpo/loss_margin_mean": 22.330886840820312,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.46107331821617537,
"grad_norm": 56.8546028137207,
"learning_rate": 3.280083614246217e-07,
"logits/chosen": -3.1241707801818848,
"logits/rejected": -3.1085455417633057,
"loss": 1.1314,
"step": 305
},
{
"beta_dpo/beta": 0.12957826256752014,
"beta_dpo/beta_margin_grad_mean": -0.3111119866371155,
"beta_dpo/beta_margin_grad_std": 0.25792446732521057,
"beta_dpo/beta_margin_mean": 3.537123203277588,
"beta_dpo/beta_margin_std": 5.821466445922852,
"beta_dpo/beta_used": 0.12957826256752014,
"beta_dpo/beta_used_raw": -0.16957488656044006,
"beta_dpo/gap_mean": 25.289710998535156,
"beta_dpo/gap_std": 33.20673751831055,
"beta_dpo/loss_margin_mean": 24.207487106323242,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.46258503401360546,
"grad_norm": 100.6487045288086,
"learning_rate": 3.267510740432719e-07,
"logits/chosen": -3.0455853939056396,
"logits/rejected": -3.0711987018585205,
"loss": 0.8367,
"step": 306
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49670496582984924,
"beta_dpo/beta_margin_grad_std": 0.00668869586661458,
"beta_dpo/beta_margin_mean": 0.013183152303099632,
"beta_dpo/beta_margin_std": 0.02676079049706459,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.22974413633346558,
"beta_dpo/gap_mean": 23.26692771911621,
"beta_dpo/gap_std": 32.258201599121094,
"beta_dpo/loss_margin_mean": 13.183152198791504,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.46409674981103555,
"grad_norm": 2.3683605194091797,
"learning_rate": 3.2549163976939285e-07,
"logits/chosen": -3.0820600986480713,
"logits/rejected": -3.088656425476074,
"loss": 1.3688,
"step": 307
},
{
"beta_dpo/beta": 0.17736674845218658,
"beta_dpo/beta_margin_grad_mean": -0.3689655363559723,
"beta_dpo/beta_margin_grad_std": 0.2941977381706238,
"beta_dpo/beta_margin_mean": 4.363021373748779,
"beta_dpo/beta_margin_std": 8.855971336364746,
"beta_dpo/beta_used": 0.17736674845218658,
"beta_dpo/beta_used_raw": 0.05397101491689682,
"beta_dpo/gap_mean": 22.642669677734375,
"beta_dpo/gap_std": 31.97774314880371,
"beta_dpo/loss_margin_mean": 20.16625213623047,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4656084656084656,
"grad_norm": 431.4309997558594,
"learning_rate": 3.2423009383206874e-07,
"logits/chosen": -3.1188559532165527,
"logits/rejected": -3.113797426223755,
"loss": 1.2251,
"step": 308
},
{
"beta_dpo/beta": 0.31901365518569946,
"beta_dpo/beta_margin_grad_mean": -0.19414803385734558,
"beta_dpo/beta_margin_grad_std": 0.33326566219329834,
"beta_dpo/beta_margin_mean": 8.445496559143066,
"beta_dpo/beta_margin_std": 9.520452499389648,
"beta_dpo/beta_used": 0.31901365518569946,
"beta_dpo/beta_used_raw": 0.31901365518569946,
"beta_dpo/gap_mean": 22.634998321533203,
"beta_dpo/gap_std": 31.254989624023438,
"beta_dpo/loss_margin_mean": 25.834514617919922,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4671201814058957,
"grad_norm": 631.443603515625,
"learning_rate": 3.229664715194511e-07,
"logits/chosen": -3.1062450408935547,
"logits/rejected": -3.117509126663208,
"loss": 1.2065,
"step": 309
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49598562717437744,
"beta_dpo/beta_margin_grad_std": 0.007933667860925198,
"beta_dpo/beta_margin_mean": 0.016062457114458084,
"beta_dpo/beta_margin_std": 0.03174532577395439,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.21138682961463928,
"beta_dpo/gap_mean": 21.895801544189453,
"beta_dpo/gap_std": 31.27450180053711,
"beta_dpo/loss_margin_mean": 16.062456130981445,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.46863189720332576,
"grad_norm": 2.2562851905822754,
"learning_rate": 3.2170080817777257e-07,
"logits/chosen": -3.1440863609313965,
"logits/rejected": -3.1391515731811523,
"loss": 1.3698,
"step": 310
},
{
"beta_dpo/beta": 0.04605334252119064,
"beta_dpo/beta_margin_grad_mean": -0.3821720480918884,
"beta_dpo/beta_margin_grad_std": 0.22194671630859375,
"beta_dpo/beta_margin_mean": 0.9799299836158752,
"beta_dpo/beta_margin_std": 2.2561373710632324,
"beta_dpo/beta_used": 0.04605334252119064,
"beta_dpo/beta_used_raw": -0.2480972558259964,
"beta_dpo/gap_mean": 21.276159286499023,
"beta_dpo/gap_std": 31.672595977783203,
"beta_dpo/loss_margin_mean": 17.68033218383789,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.47014361300075586,
"grad_norm": 90.18292236328125,
"learning_rate": 3.204331392103574e-07,
"logits/chosen": -3.092625141143799,
"logits/rejected": -3.1545519828796387,
"loss": 1.0585,
"step": 311
},
{
"beta_dpo/beta": 0.19550317525863647,
"beta_dpo/beta_margin_grad_mean": -0.32613497972488403,
"beta_dpo/beta_margin_grad_std": 0.2660830020904541,
"beta_dpo/beta_margin_mean": 5.628912448883057,
"beta_dpo/beta_margin_std": 9.664520263671875,
"beta_dpo/beta_used": 0.19550317525863647,
"beta_dpo/beta_used_raw": 0.18216973543167114,
"beta_dpo/gap_mean": 21.88336944580078,
"beta_dpo/gap_std": 31.547531127929688,
"beta_dpo/loss_margin_mean": 27.235628128051758,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.47165532879818595,
"grad_norm": 248.5424041748047,
"learning_rate": 3.1916350007663176e-07,
"logits/chosen": -3.0510053634643555,
"logits/rejected": -3.0680179595947266,
"loss": 0.8905,
"step": 312
},
{
"beta_dpo/beta": 0.07774581015110016,
"beta_dpo/beta_margin_grad_mean": -0.3854230046272278,
"beta_dpo/beta_margin_grad_std": 0.26763999462127686,
"beta_dpo/beta_margin_mean": 1.5867866277694702,
"beta_dpo/beta_margin_std": 4.212478160858154,
"beta_dpo/beta_used": 0.07774581015110016,
"beta_dpo/beta_used_raw": -0.010134972631931305,
"beta_dpo/gap_mean": 21.556020736694336,
"beta_dpo/gap_std": 30.96898651123047,
"beta_dpo/loss_margin_mean": 18.918975830078125,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.47316704459561604,
"grad_norm": 182.7671356201172,
"learning_rate": 3.178919262911314e-07,
"logits/chosen": -3.0897302627563477,
"logits/rejected": -3.090061664581299,
"loss": 1.2014,
"step": 313
},
{
"beta_dpo/beta": 0.3596024513244629,
"beta_dpo/beta_margin_grad_mean": -0.3246336877346039,
"beta_dpo/beta_margin_grad_std": 0.27894648909568787,
"beta_dpo/beta_margin_mean": 10.439764022827148,
"beta_dpo/beta_margin_std": 17.289690017700195,
"beta_dpo/beta_used": 0.3596024513244629,
"beta_dpo/beta_used_raw": 0.1643376499414444,
"beta_dpo/gap_mean": 22.33397102355957,
"beta_dpo/gap_std": 30.98306655883789,
"beta_dpo/loss_margin_mean": 26.475399017333984,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.47467876039304613,
"grad_norm": 748.7551879882812,
"learning_rate": 3.166184534225087e-07,
"logits/chosen": -3.0986547470092773,
"logits/rejected": -3.084320545196533,
"loss": 1.0381,
"step": 314
},
{
"beta_dpo/beta": 0.10931921005249023,
"beta_dpo/beta_margin_grad_mean": -0.23580533266067505,
"beta_dpo/beta_margin_grad_std": 0.2497079223394394,
"beta_dpo/beta_margin_mean": 2.5652661323547363,
"beta_dpo/beta_margin_std": 2.8321359157562256,
"beta_dpo/beta_used": 0.10931921005249023,
"beta_dpo/beta_used_raw": 0.10931921005249023,
"beta_dpo/gap_mean": 22.541690826416016,
"beta_dpo/gap_std": 30.285381317138672,
"beta_dpo/loss_margin_mean": 24.391767501831055,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.47619047619047616,
"grad_norm": 132.3946533203125,
"learning_rate": 3.1534311709253723e-07,
"logits/chosen": -3.1117889881134033,
"logits/rejected": -3.1100306510925293,
"loss": 0.7136,
"step": 315
},
{
"beta_dpo/beta": 0.24178995192050934,
"beta_dpo/beta_margin_grad_mean": -0.28057804703712463,
"beta_dpo/beta_margin_grad_std": 0.2632658779621124,
"beta_dpo/beta_margin_mean": 8.426165580749512,
"beta_dpo/beta_margin_std": 13.852724075317383,
"beta_dpo/beta_used": 0.24178995192050934,
"beta_dpo/beta_used_raw": 0.24178995192050934,
"beta_dpo/gap_mean": 22.66561508178711,
"beta_dpo/gap_std": 29.550121307373047,
"beta_dpo/loss_margin_mean": 24.974931716918945,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.47770219198790626,
"grad_norm": 232.22964477539062,
"learning_rate": 3.1406595297511564e-07,
"logits/chosen": -3.0905113220214844,
"logits/rejected": -3.130472183227539,
"loss": 0.8534,
"step": 316
},
{
"beta_dpo/beta": 0.05425131693482399,
"beta_dpo/beta_margin_grad_mean": -0.34794288873672485,
"beta_dpo/beta_margin_grad_std": 0.22771987318992615,
"beta_dpo/beta_margin_mean": 1.4577387571334839,
"beta_dpo/beta_margin_std": 2.5639567375183105,
"beta_dpo/beta_used": 0.05425131693482399,
"beta_dpo/beta_used_raw": -0.01501971110701561,
"beta_dpo/gap_mean": 23.502927780151367,
"beta_dpo/gap_std": 29.66475486755371,
"beta_dpo/loss_margin_mean": 23.33938217163086,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.47921390778533635,
"grad_norm": 134.48892211914062,
"learning_rate": 3.1278699679526975e-07,
"logits/chosen": -3.080479621887207,
"logits/rejected": -3.094151020050049,
"loss": 0.9584,
"step": 317
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4947490096092224,
"beta_dpo/beta_margin_grad_std": 0.007676298264414072,
"beta_dpo/beta_margin_mean": 0.021011577919125557,
"beta_dpo/beta_margin_std": 0.03071926161646843,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.3076367974281311,
"beta_dpo/gap_mean": 23.12640380859375,
"beta_dpo/gap_std": 30.08043670654297,
"beta_dpo/loss_margin_mean": 21.011577606201172,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.48072562358276644,
"grad_norm": 2.19538950920105,
"learning_rate": 3.1150628432815336e-07,
"logits/chosen": -3.1016392707824707,
"logits/rejected": -3.126415491104126,
"loss": 1.3702,
"step": 318
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49478694796562195,
"beta_dpo/beta_margin_grad_std": 0.007146132178604603,
"beta_dpo/beta_margin_mean": 0.020857563242316246,
"beta_dpo/beta_margin_std": 0.028595075011253357,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.11540670692920685,
"beta_dpo/gap_mean": 22.578601837158203,
"beta_dpo/gap_std": 29.50411605834961,
"beta_dpo/loss_margin_mean": 20.857563018798828,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.48223733938019653,
"grad_norm": 2.7037503719329834,
"learning_rate": 3.1022385139804707e-07,
"logits/chosen": -3.122105121612549,
"logits/rejected": -3.1292996406555176,
"loss": 1.3676,
"step": 319
},
{
"beta_dpo/beta": 0.013909117318689823,
"beta_dpo/beta_margin_grad_mean": -0.4385174512863159,
"beta_dpo/beta_margin_grad_std": 0.12322162836790085,
"beta_dpo/beta_margin_mean": 0.2849900722503662,
"beta_dpo/beta_margin_std": 0.5963538289070129,
"beta_dpo/beta_used": 0.013909117318689823,
"beta_dpo/beta_used_raw": -0.02531212382018566,
"beta_dpo/gap_mean": 21.585987091064453,
"beta_dpo/gap_std": 29.407155990600586,
"beta_dpo/loss_margin_mean": 17.481781005859375,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4837490551776266,
"grad_norm": 31.11320686340332,
"learning_rate": 3.0893973387735683e-07,
"logits/chosen": -3.0919084548950195,
"logits/rejected": -3.0990891456604004,
"loss": 1.1796,
"step": 320
},
{
"beta_dpo/beta": 0.10938524454832077,
"beta_dpo/beta_margin_grad_mean": -0.3188154101371765,
"beta_dpo/beta_margin_grad_std": 0.26359623670578003,
"beta_dpo/beta_margin_mean": 2.4428858757019043,
"beta_dpo/beta_margin_std": 5.073835372924805,
"beta_dpo/beta_used": 0.10938524454832077,
"beta_dpo/beta_used_raw": -0.029399722814559937,
"beta_dpo/gap_mean": 21.24488067626953,
"beta_dpo/gap_std": 29.5472469329834,
"beta_dpo/loss_margin_mean": 18.210344314575195,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4852607709750567,
"grad_norm": 81.23643493652344,
"learning_rate": 3.0765396768561004e-07,
"logits/chosen": -3.050200939178467,
"logits/rejected": -3.052530288696289,
"loss": 0.816,
"step": 321
},
{
"beta_dpo/beta": 0.33700641989707947,
"beta_dpo/beta_margin_grad_mean": -0.19358323514461517,
"beta_dpo/beta_margin_grad_std": 0.3171708583831787,
"beta_dpo/beta_margin_mean": 9.05479621887207,
"beta_dpo/beta_margin_std": 12.222949028015137,
"beta_dpo/beta_used": 0.33700641989707947,
"beta_dpo/beta_used_raw": 0.33700641989707947,
"beta_dpo/gap_mean": 21.0283203125,
"beta_dpo/gap_std": 29.169437408447266,
"beta_dpo/loss_margin_mean": 23.73666763305664,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.48677248677248675,
"grad_norm": 504.5341796875,
"learning_rate": 3.063665887884511e-07,
"logits/chosen": -3.0815377235412598,
"logits/rejected": -3.115631341934204,
"loss": 0.6304,
"step": 322
},
{
"beta_dpo/beta": 0.04038708657026291,
"beta_dpo/beta_margin_grad_mean": -0.40935397148132324,
"beta_dpo/beta_margin_grad_std": 0.24039596319198608,
"beta_dpo/beta_margin_mean": 0.7104516625404358,
"beta_dpo/beta_margin_std": 2.1563162803649902,
"beta_dpo/beta_used": 0.04038708657026291,
"beta_dpo/beta_used_raw": -0.09349645674228668,
"beta_dpo/gap_mean": 21.222349166870117,
"beta_dpo/gap_std": 30.24327850341797,
"beta_dpo/loss_margin_mean": 20.792327880859375,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.48828420256991684,
"grad_norm": 89.33271789550781,
"learning_rate": 3.0507763319663517e-07,
"logits/chosen": -3.0897750854492188,
"logits/rejected": -3.1133480072021484,
"loss": 1.0797,
"step": 323
},
{
"beta_dpo/beta": 0.06554640829563141,
"beta_dpo/beta_margin_grad_mean": -0.34972235560417175,
"beta_dpo/beta_margin_grad_std": 0.22968170046806335,
"beta_dpo/beta_margin_mean": 1.5705469846725464,
"beta_dpo/beta_margin_std": 2.733186960220337,
"beta_dpo/beta_used": 0.06554640829563141,
"beta_dpo/beta_used_raw": 0.030260443687438965,
"beta_dpo/gap_mean": 21.68192481994629,
"beta_dpo/gap_std": 30.1425724029541,
"beta_dpo/loss_margin_mean": 23.29752540588379,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4897959183673469,
"grad_norm": 141.7134246826172,
"learning_rate": 3.0378713696502097e-07,
"logits/chosen": -3.0900096893310547,
"logits/rejected": -3.1014513969421387,
"loss": 0.9758,
"step": 324
},
{
"beta_dpo/beta": 0.1374254822731018,
"beta_dpo/beta_margin_grad_mean": -0.33930516242980957,
"beta_dpo/beta_margin_grad_std": 0.26967304944992065,
"beta_dpo/beta_margin_mean": 4.346703052520752,
"beta_dpo/beta_margin_std": 7.695768356323242,
"beta_dpo/beta_used": 0.1374254822731018,
"beta_dpo/beta_used_raw": 0.08679935336112976,
"beta_dpo/gap_mean": 22.803340911865234,
"beta_dpo/gap_std": 30.76717758178711,
"beta_dpo/loss_margin_mean": 27.983734130859375,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.491307634164777,
"grad_norm": 266.2857971191406,
"learning_rate": 3.0249513619156206e-07,
"logits/chosen": -3.080242156982422,
"logits/rejected": -3.102264881134033,
"loss": 1.0491,
"step": 325
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49647924304008484,
"beta_dpo/beta_margin_grad_std": 0.0077100652270019054,
"beta_dpo/beta_margin_mean": 0.014087283983826637,
"beta_dpo/beta_margin_std": 0.03085058182477951,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.25530245900154114,
"beta_dpo/gap_mean": 21.672607421875,
"beta_dpo/gap_std": 30.98316192626953,
"beta_dpo/loss_margin_mean": 14.08728313446045,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4928193499622071,
"grad_norm": 2.3197293281555176,
"learning_rate": 3.012016670162977e-07,
"logits/chosen": -3.093902587890625,
"logits/rejected": -3.079530715942383,
"loss": 1.3708,
"step": 326
},
{
"beta_dpo/beta": 0.08799388259649277,
"beta_dpo/beta_margin_grad_mean": -0.360331654548645,
"beta_dpo/beta_margin_grad_std": 0.24783527851104736,
"beta_dpo/beta_margin_mean": 2.034658670425415,
"beta_dpo/beta_margin_std": 4.489603042602539,
"beta_dpo/beta_used": 0.08799388259649277,
"beta_dpo/beta_used_raw": -0.13810209929943085,
"beta_dpo/gap_mean": 21.196407318115234,
"beta_dpo/gap_std": 31.41318702697754,
"beta_dpo/loss_margin_mean": 20.30933380126953,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4943310657596372,
"grad_norm": 131.3867645263672,
"learning_rate": 2.99906765620341e-07,
"logits/chosen": -3.1136116981506348,
"logits/rejected": -3.109332799911499,
"loss": 0.9219,
"step": 327
},
{
"beta_dpo/beta": 0.18263430893421173,
"beta_dpo/beta_margin_grad_mean": -0.27841395139694214,
"beta_dpo/beta_margin_grad_std": 0.3048444986343384,
"beta_dpo/beta_margin_mean": 3.9657938480377197,
"beta_dpo/beta_margin_std": 6.533381938934326,
"beta_dpo/beta_used": 0.18263430893421173,
"beta_dpo/beta_used_raw": 0.18263430893421173,
"beta_dpo/gap_mean": 21.557958602905273,
"beta_dpo/gap_std": 31.18227767944336,
"beta_dpo/loss_margin_mean": 23.918094635009766,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4958427815570673,
"grad_norm": 292.41162109375,
"learning_rate": 2.9861046822486766e-07,
"logits/chosen": -3.1102488040924072,
"logits/rejected": -3.1353330612182617,
"loss": 0.9977,
"step": 328
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49358275532722473,
"beta_dpo/beta_margin_grad_std": 0.008446736261248589,
"beta_dpo/beta_margin_mean": 0.025679970160126686,
"beta_dpo/beta_margin_std": 0.03380631282925606,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.14240968227386475,
"beta_dpo/gap_mean": 22.238048553466797,
"beta_dpo/gap_std": 31.547744750976562,
"beta_dpo/loss_margin_mean": 25.679967880249023,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4973544973544973,
"grad_norm": 2.604764461517334,
"learning_rate": 2.9731281109010253e-07,
"logits/chosen": -3.081068992614746,
"logits/rejected": -3.1000638008117676,
"loss": 1.3684,
"step": 329
},
{
"beta_dpo/beta": 0.2137632519006729,
"beta_dpo/beta_margin_grad_mean": -0.3222753703594208,
"beta_dpo/beta_margin_grad_std": 0.26691463589668274,
"beta_dpo/beta_margin_mean": 6.505101203918457,
"beta_dpo/beta_margin_std": 10.973447799682617,
"beta_dpo/beta_used": 0.2137632519006729,
"beta_dpo/beta_used_raw": 0.034893929958343506,
"beta_dpo/gap_mean": 22.187713623046875,
"beta_dpo/gap_std": 31.6619930267334,
"beta_dpo/loss_margin_mean": 23.690584182739258,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4988662131519274,
"grad_norm": 177.8109130859375,
"learning_rate": 2.9601383051430505e-07,
"logits/chosen": -3.0457208156585693,
"logits/rejected": -3.0508902072906494,
"loss": 0.7966,
"step": 330
},
{
"beta_dpo/beta": 0.422149658203125,
"beta_dpo/beta_margin_grad_mean": -0.16604149341583252,
"beta_dpo/beta_margin_grad_std": 0.31206512451171875,
"beta_dpo/beta_margin_mean": 15.053980827331543,
"beta_dpo/beta_margin_std": 18.832624435424805,
"beta_dpo/beta_used": 0.422149658203125,
"beta_dpo/beta_used_raw": 0.422149658203125,
"beta_dpo/gap_mean": 24.266925811767578,
"beta_dpo/gap_std": 31.902421951293945,
"beta_dpo/loss_margin_mean": 34.640830993652344,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5003779289493575,
"grad_norm": 952.3717651367188,
"learning_rate": 2.947135628327544e-07,
"logits/chosen": -3.0645158290863037,
"logits/rejected": -3.066699981689453,
"loss": 1.2058,
"step": 331
},
{
"beta_dpo/beta": 0.08614806830883026,
"beta_dpo/beta_margin_grad_mean": -0.24676524102687836,
"beta_dpo/beta_margin_grad_std": 0.25958746671676636,
"beta_dpo/beta_margin_mean": 2.62640118598938,
"beta_dpo/beta_margin_std": 3.29146671295166,
"beta_dpo/beta_used": 0.08614806830883026,
"beta_dpo/beta_used_raw": 0.08614806830883026,
"beta_dpo/gap_mean": 25.25523567199707,
"beta_dpo/gap_std": 32.544822692871094,
"beta_dpo/loss_margin_mean": 28.859230041503906,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5018896447467877,
"grad_norm": 113.77040100097656,
"learning_rate": 2.934120444167326e-07,
"logits/chosen": -3.1057910919189453,
"logits/rejected": -3.105198383331299,
"loss": 0.8209,
"step": 332
},
{
"beta_dpo/beta": 0.46624481678009033,
"beta_dpo/beta_margin_grad_mean": -0.19371409714221954,
"beta_dpo/beta_margin_grad_std": 0.36706358194351196,
"beta_dpo/beta_margin_mean": 15.309916496276855,
"beta_dpo/beta_margin_std": 15.727444648742676,
"beta_dpo/beta_used": 0.46624481678009033,
"beta_dpo/beta_used_raw": 0.46624481678009033,
"beta_dpo/gap_mean": 26.6208553314209,
"beta_dpo/gap_std": 32.83556365966797,
"beta_dpo/loss_margin_mean": 32.52452850341797,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5034013605442177,
"grad_norm": 676.2604370117188,
"learning_rate": 2.921093116725076e-07,
"logits/chosen": -3.084399461746216,
"logits/rejected": -3.111693859100342,
"loss": 1.1624,
"step": 333
},
{
"beta_dpo/beta": 0.13088715076446533,
"beta_dpo/beta_margin_grad_mean": -0.27541545033454895,
"beta_dpo/beta_margin_grad_std": 0.3252545893192291,
"beta_dpo/beta_margin_mean": 3.337428092956543,
"beta_dpo/beta_margin_std": 5.199645519256592,
"beta_dpo/beta_used": 0.13088715076446533,
"beta_dpo/beta_used_raw": 0.13088715076446533,
"beta_dpo/gap_mean": 26.728729248046875,
"beta_dpo/gap_std": 33.057777404785156,
"beta_dpo/loss_margin_mean": 25.13127899169922,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5049130763416477,
"grad_norm": 203.92538452148438,
"learning_rate": 2.9080540104031484e-07,
"logits/chosen": -3.062560796737671,
"logits/rejected": -3.090407609939575,
"loss": 0.7656,
"step": 334
},
{
"beta_dpo/beta": 0.09639487415552139,
"beta_dpo/beta_margin_grad_mean": -0.34168577194213867,
"beta_dpo/beta_margin_grad_std": 0.2604123055934906,
"beta_dpo/beta_margin_mean": 2.6311426162719727,
"beta_dpo/beta_margin_std": 4.993647575378418,
"beta_dpo/beta_used": 0.09639487415552139,
"beta_dpo/beta_used_raw": -0.053624749183654785,
"beta_dpo/gap_mean": 26.683143615722656,
"beta_dpo/gap_std": 32.86904525756836,
"beta_dpo/loss_margin_mean": 26.375513076782227,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5064247921390779,
"grad_norm": 98.15483093261719,
"learning_rate": 2.895003489933375e-07,
"logits/chosen": -3.034583568572998,
"logits/rejected": -3.0533030033111572,
"loss": 0.9138,
"step": 335
},
{
"beta_dpo/beta": 0.02283310517668724,
"beta_dpo/beta_margin_grad_mean": -0.36344635486602783,
"beta_dpo/beta_margin_grad_std": 0.1789086014032364,
"beta_dpo/beta_margin_mean": 0.8441472053527832,
"beta_dpo/beta_margin_std": 1.3744276762008667,
"beta_dpo/beta_used": 0.02283310517668724,
"beta_dpo/beta_used_raw": -0.07438748329877853,
"beta_dpo/gap_mean": 27.459758758544922,
"beta_dpo/gap_std": 33.12605285644531,
"beta_dpo/loss_margin_mean": 30.238086700439453,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5079365079365079,
"grad_norm": 54.78199005126953,
"learning_rate": 2.8819419203668675e-07,
"logits/chosen": -3.0631165504455566,
"logits/rejected": -3.077131748199463,
"loss": 1.0358,
"step": 336
},
{
"beta_dpo/beta": 0.2168283760547638,
"beta_dpo/beta_margin_grad_mean": -0.2655988335609436,
"beta_dpo/beta_margin_grad_std": 0.370185524225235,
"beta_dpo/beta_margin_mean": 5.198827743530273,
"beta_dpo/beta_margin_std": 8.442659378051758,
"beta_dpo/beta_used": 0.2168283760547638,
"beta_dpo/beta_used_raw": 0.2168283760547638,
"beta_dpo/gap_mean": 26.88589096069336,
"beta_dpo/gap_std": 34.229557037353516,
"beta_dpo/loss_margin_mean": 23.979177474975586,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.509448223733938,
"grad_norm": 383.6023254394531,
"learning_rate": 2.8688696670638053e-07,
"logits/chosen": -3.093763828277588,
"logits/rejected": -3.103431224822998,
"loss": 0.8139,
"step": 337
},
{
"beta_dpo/beta": 0.15525385737419128,
"beta_dpo/beta_margin_grad_mean": -0.22968794405460358,
"beta_dpo/beta_margin_grad_std": 0.3068045377731323,
"beta_dpo/beta_margin_mean": 4.072103977203369,
"beta_dpo/beta_margin_std": 6.031183242797852,
"beta_dpo/beta_used": 0.15525385737419128,
"beta_dpo/beta_used_raw": 0.15525385737419128,
"beta_dpo/gap_mean": 26.548250198364258,
"beta_dpo/gap_std": 34.126678466796875,
"beta_dpo/loss_margin_mean": 26.27375030517578,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5109599395313681,
"grad_norm": 221.94122314453125,
"learning_rate": 2.8557870956832133e-07,
"logits/chosen": -3.0307488441467285,
"logits/rejected": -3.0351829528808594,
"loss": 0.6189,
"step": 338
},
{
"beta_dpo/beta": 0.09032527357339859,
"beta_dpo/beta_margin_grad_mean": -0.35613423585891724,
"beta_dpo/beta_margin_grad_std": 0.28700828552246094,
"beta_dpo/beta_margin_mean": 2.6703038215637207,
"beta_dpo/beta_margin_std": 5.1349921226501465,
"beta_dpo/beta_used": 0.09032527357339859,
"beta_dpo/beta_used_raw": -0.07975070178508759,
"beta_dpo/gap_mean": 26.73078155517578,
"beta_dpo/gap_std": 34.21294021606445,
"beta_dpo/loss_margin_mean": 27.1737117767334,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5124716553287982,
"grad_norm": 277.2163391113281,
"learning_rate": 2.842694572172736e-07,
"logits/chosen": -3.0108513832092285,
"logits/rejected": -3.042417526245117,
"loss": 1.2379,
"step": 339
},
{
"beta_dpo/beta": 0.04224841296672821,
"beta_dpo/beta_margin_grad_mean": -0.38084977865219116,
"beta_dpo/beta_margin_grad_std": 0.24013683199882507,
"beta_dpo/beta_margin_mean": 1.1847941875457764,
"beta_dpo/beta_margin_std": 2.4049177169799805,
"beta_dpo/beta_used": 0.04224841296672821,
"beta_dpo/beta_used_raw": 0.038572296500205994,
"beta_dpo/gap_mean": 26.897045135498047,
"beta_dpo/gap_std": 34.5772819519043,
"beta_dpo/loss_margin_mean": 28.697805404663086,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5139833711262283,
"grad_norm": 86.58854675292969,
"learning_rate": 2.8295924627584004e-07,
"logits/chosen": -3.0347347259521484,
"logits/rejected": -3.033782958984375,
"loss": 1.1256,
"step": 340
},
{
"beta_dpo/beta": 0.3806644678115845,
"beta_dpo/beta_margin_grad_mean": -0.33302900195121765,
"beta_dpo/beta_margin_grad_std": 0.3049252927303314,
"beta_dpo/beta_margin_mean": 13.137605667114258,
"beta_dpo/beta_margin_std": 22.449344635009766,
"beta_dpo/beta_used": 0.3806644678115845,
"beta_dpo/beta_used_raw": 0.15880805253982544,
"beta_dpo/gap_mean": 27.218364715576172,
"beta_dpo/gap_std": 34.92621994018555,
"beta_dpo/loss_margin_mean": 25.560880661010742,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5154950869236583,
"grad_norm": 676.5250244140625,
"learning_rate": 2.816481133934373e-07,
"logits/chosen": -3.041550397872925,
"logits/rejected": -3.067988395690918,
"loss": 1.6666,
"step": 341
},
{
"beta_dpo/beta": 0.12396855652332306,
"beta_dpo/beta_margin_grad_mean": -0.39350685477256775,
"beta_dpo/beta_margin_grad_std": 0.29025062918663025,
"beta_dpo/beta_margin_mean": 3.8081181049346924,
"beta_dpo/beta_margin_std": 7.531540870666504,
"beta_dpo/beta_used": 0.12396855652332306,
"beta_dpo/beta_used_raw": -0.028024710714817047,
"beta_dpo/gap_mean": 27.129390716552734,
"beta_dpo/gap_std": 35.468772888183594,
"beta_dpo/loss_margin_mean": 29.53389549255371,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5170068027210885,
"grad_norm": 300.1034851074219,
"learning_rate": 2.8033609524527046e-07,
"logits/chosen": -2.9938724040985107,
"logits/rejected": -3.0116021633148193,
"loss": 1.3446,
"step": 342
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4942605793476105,
"beta_dpo/beta_margin_grad_std": 0.007328690029680729,
"beta_dpo/beta_margin_mean": 0.022965017706155777,
"beta_dpo/beta_margin_std": 0.02932850830256939,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.37825995683670044,
"beta_dpo/gap_mean": 26.96743392944336,
"beta_dpo/gap_std": 34.50664520263672,
"beta_dpo/loss_margin_mean": 22.965015411376953,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5185185185185185,
"grad_norm": 2.441251039505005,
"learning_rate": 2.7902322853130753e-07,
"logits/chosen": -3.1121671199798584,
"logits/rejected": -3.112107753753662,
"loss": 1.3675,
"step": 343
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4937654137611389,
"beta_dpo/beta_margin_grad_std": 0.008288533426821232,
"beta_dpo/beta_margin_mean": 0.024948405101895332,
"beta_dpo/beta_margin_std": 0.03317340835928917,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.12604382634162903,
"beta_dpo/gap_mean": 26.214689254760742,
"beta_dpo/gap_std": 34.25294494628906,
"beta_dpo/loss_margin_mean": 24.94840431213379,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5200302343159486,
"grad_norm": 2.634648323059082,
"learning_rate": 2.7770954997525274e-07,
"logits/chosen": -3.0305612087249756,
"logits/rejected": -3.0571539402008057,
"loss": 1.3642,
"step": 344
},
{
"beta_dpo/beta": 0.4966987371444702,
"beta_dpo/beta_margin_grad_mean": -0.27788203954696655,
"beta_dpo/beta_margin_grad_std": 0.40353062748908997,
"beta_dpo/beta_margin_mean": 13.674936294555664,
"beta_dpo/beta_margin_std": 20.05881690979004,
"beta_dpo/beta_used": 0.4966987371444702,
"beta_dpo/beta_used_raw": 0.4966987371444702,
"beta_dpo/gap_mean": 26.283050537109375,
"beta_dpo/gap_std": 34.696529388427734,
"beta_dpo/loss_margin_mean": 26.915929794311523,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5215419501133787,
"grad_norm": 643.2703857421875,
"learning_rate": 2.7639509632351927e-07,
"logits/chosen": -3.063925266265869,
"logits/rejected": -3.0827674865722656,
"loss": 1.4227,
"step": 345
},
{
"beta_dpo/beta": 0.12412844598293304,
"beta_dpo/beta_margin_grad_mean": -0.27781710028648376,
"beta_dpo/beta_margin_grad_std": 0.3056802749633789,
"beta_dpo/beta_margin_mean": 3.6865780353546143,
"beta_dpo/beta_margin_std": 6.440756797790527,
"beta_dpo/beta_used": 0.12412844598293304,
"beta_dpo/beta_used_raw": 0.12412844598293304,
"beta_dpo/gap_mean": 26.50796127319336,
"beta_dpo/gap_std": 34.49790954589844,
"beta_dpo/loss_margin_mean": 28.906465530395508,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5230536659108088,
"grad_norm": 330.2554016113281,
"learning_rate": 2.7507990434420123e-07,
"logits/chosen": -3.0775437355041504,
"logits/rejected": -3.0960750579833984,
"loss": 0.9698,
"step": 346
},
{
"beta_dpo/beta": 0.006778246723115444,
"beta_dpo/beta_margin_grad_mean": -0.450185626745224,
"beta_dpo/beta_margin_grad_std": 0.08872085064649582,
"beta_dpo/beta_margin_mean": 0.21163569390773773,
"beta_dpo/beta_margin_std": 0.3837166428565979,
"beta_dpo/beta_used": 0.006778246723115444,
"beta_dpo/beta_used_raw": -0.2274744063615799,
"beta_dpo/gap_mean": 26.954315185546875,
"beta_dpo/gap_std": 35.01631164550781,
"beta_dpo/loss_margin_mean": 26.906822204589844,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5245653817082389,
"grad_norm": 16.952098846435547,
"learning_rate": 2.737640108260456e-07,
"logits/chosen": -3.0027999877929688,
"logits/rejected": -3.021895170211792,
"loss": 1.2436,
"step": 347
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49310407042503357,
"beta_dpo/beta_margin_grad_std": 0.008573891595005989,
"beta_dpo/beta_margin_mean": 0.027595363557338715,
"beta_dpo/beta_margin_std": 0.03431578353047371,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.10145647823810577,
"beta_dpo/gap_mean": 27.0013427734375,
"beta_dpo/gap_std": 34.96954345703125,
"beta_dpo/loss_margin_mean": 27.59536361694336,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5260770975056689,
"grad_norm": 2.287372589111328,
"learning_rate": 2.724474525774229e-07,
"logits/chosen": -3.0348544120788574,
"logits/rejected": -3.048654556274414,
"loss": 1.363,
"step": 348
},
{
"beta_dpo/beta": 0.43581968545913696,
"beta_dpo/beta_margin_grad_mean": -0.2282140702009201,
"beta_dpo/beta_margin_grad_std": 0.3532249629497528,
"beta_dpo/beta_margin_mean": 15.442870140075684,
"beta_dpo/beta_margin_std": 23.58800506591797,
"beta_dpo/beta_used": 0.43581968545913696,
"beta_dpo/beta_used_raw": 0.43581968545913696,
"beta_dpo/gap_mean": 27.023746490478516,
"beta_dpo/gap_std": 34.86962127685547,
"beta_dpo/loss_margin_mean": 30.510757446289062,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.527588813303099,
"grad_norm": 881.6084594726562,
"learning_rate": 2.711302664252973e-07,
"logits/chosen": -3.040865898132324,
"logits/rejected": -3.05991792678833,
"loss": 1.1449,
"step": 349
},
{
"beta_dpo/beta": 0.18699753284454346,
"beta_dpo/beta_margin_grad_mean": -0.20494963228702545,
"beta_dpo/beta_margin_grad_std": 0.3321399390697479,
"beta_dpo/beta_margin_mean": 5.577596187591553,
"beta_dpo/beta_margin_std": 6.836084365844727,
"beta_dpo/beta_used": 0.18699753284454346,
"beta_dpo/beta_used_raw": 0.18699753284454346,
"beta_dpo/gap_mean": 28.044925689697266,
"beta_dpo/gap_std": 35.316062927246094,
"beta_dpo/loss_margin_mean": 29.924606323242188,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5291005291005291,
"grad_norm": 306.10894775390625,
"learning_rate": 2.698124892141971e-07,
"logits/chosen": -3.0329248905181885,
"logits/rejected": -3.0526227951049805,
"loss": 1.1619,
"step": 350
},
{
"beta_dpo/beta": 0.1541515588760376,
"beta_dpo/beta_margin_grad_mean": -0.25190550088882446,
"beta_dpo/beta_margin_grad_std": 0.3161279857158661,
"beta_dpo/beta_margin_mean": 4.117360591888428,
"beta_dpo/beta_margin_std": 8.284092903137207,
"beta_dpo/beta_used": 0.1541515588760376,
"beta_dpo/beta_used_raw": 0.1541515588760376,
"beta_dpo/gap_mean": 28.019916534423828,
"beta_dpo/gap_std": 36.171875,
"beta_dpo/loss_margin_mean": 27.668113708496094,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5306122448979592,
"grad_norm": 276.933837890625,
"learning_rate": 2.6849415780518357e-07,
"logits/chosen": -3.0552468299865723,
"logits/rejected": -3.074361801147461,
"loss": 0.9665,
"step": 351
},
{
"beta_dpo/beta": 0.08424170315265656,
"beta_dpo/beta_margin_grad_mean": -0.3242875039577484,
"beta_dpo/beta_margin_grad_std": 0.2602542042732239,
"beta_dpo/beta_margin_mean": 2.619900703430176,
"beta_dpo/beta_margin_std": 4.4611945152282715,
"beta_dpo/beta_used": 0.08424170315265656,
"beta_dpo/beta_used_raw": 0.0368683747947216,
"beta_dpo/gap_mean": 28.124879837036133,
"beta_dpo/gap_std": 35.932098388671875,
"beta_dpo/loss_margin_mean": 28.422788619995117,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5321239606953893,
"grad_norm": 190.7087860107422,
"learning_rate": 2.6717530907482024e-07,
"logits/chosen": -3.0320374965667725,
"logits/rejected": -3.0459280014038086,
"loss": 1.0425,
"step": 352
},
{
"beta_dpo/beta": 0.18638579547405243,
"beta_dpo/beta_margin_grad_mean": -0.29680606722831726,
"beta_dpo/beta_margin_grad_std": 0.2870534360408783,
"beta_dpo/beta_margin_mean": 6.207462787628174,
"beta_dpo/beta_margin_std": 10.147928237915039,
"beta_dpo/beta_used": 0.18638579547405243,
"beta_dpo/beta_used_raw": 0.18638579547405243,
"beta_dpo/gap_mean": 27.78827667236328,
"beta_dpo/gap_std": 35.74189758300781,
"beta_dpo/loss_margin_mean": 28.24288558959961,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5336356764928194,
"grad_norm": 242.77536010742188,
"learning_rate": 2.658559799141411e-07,
"logits/chosen": -3.063218355178833,
"logits/rejected": -3.0526952743530273,
"loss": 0.9444,
"step": 353
},
{
"beta_dpo/beta": 0.23877619206905365,
"beta_dpo/beta_margin_grad_mean": -0.34234434366226196,
"beta_dpo/beta_margin_grad_std": 0.31028464436531067,
"beta_dpo/beta_margin_mean": 8.543641090393066,
"beta_dpo/beta_margin_std": 14.812517166137695,
"beta_dpo/beta_used": 0.23877619206905365,
"beta_dpo/beta_used_raw": -0.002296730875968933,
"beta_dpo/gap_mean": 28.852535247802734,
"beta_dpo/gap_std": 35.82675552368164,
"beta_dpo/loss_margin_mean": 32.170711517333984,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5351473922902494,
"grad_norm": 581.1064453125,
"learning_rate": 2.6453620722761895e-07,
"logits/chosen": -3.008025646209717,
"logits/rejected": -3.0519351959228516,
"loss": 1.4547,
"step": 354
},
{
"beta_dpo/beta": 0.22954122722148895,
"beta_dpo/beta_margin_grad_mean": -0.24678458273410797,
"beta_dpo/beta_margin_grad_std": 0.3609658479690552,
"beta_dpo/beta_margin_mean": 6.767334461212158,
"beta_dpo/beta_margin_std": 9.707292556762695,
"beta_dpo/beta_used": 0.22954122722148895,
"beta_dpo/beta_used_raw": 0.22954122722148895,
"beta_dpo/gap_mean": 28.857769012451172,
"beta_dpo/gap_std": 36.830535888671875,
"beta_dpo/loss_margin_mean": 30.060108184814453,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5366591080876795,
"grad_norm": 343.97576904296875,
"learning_rate": 2.632160279321328e-07,
"logits/chosen": -3.011507749557495,
"logits/rejected": -3.050220489501953,
"loss": 0.9037,
"step": 355
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4941824674606323,
"beta_dpo/beta_margin_grad_std": 0.010640624910593033,
"beta_dpo/beta_margin_mean": 0.023282045498490334,
"beta_dpo/beta_margin_std": 0.0425870306789875,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.31006890535354614,
"beta_dpo/gap_mean": 28.550745010375977,
"beta_dpo/gap_std": 37.39855194091797,
"beta_dpo/loss_margin_mean": 23.28204345703125,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5381708238851096,
"grad_norm": 2.3557276725769043,
"learning_rate": 2.618954789559356e-07,
"logits/chosen": -3.0701308250427246,
"logits/rejected": -3.1028971672058105,
"loss": 1.365,
"step": 356
},
{
"beta_dpo/beta": 0.20575466752052307,
"beta_dpo/beta_margin_grad_mean": -0.3113231658935547,
"beta_dpo/beta_margin_grad_std": 0.2871396243572235,
"beta_dpo/beta_margin_mean": 7.273690223693848,
"beta_dpo/beta_margin_std": 11.427718162536621,
"beta_dpo/beta_used": 0.20575466752052307,
"beta_dpo/beta_used_raw": 0.0213395357131958,
"beta_dpo/gap_mean": 28.180965423583984,
"beta_dpo/gap_std": 36.696205139160156,
"beta_dpo/loss_margin_mean": 27.05858612060547,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5396825396825397,
"grad_norm": 166.8899688720703,
"learning_rate": 2.6057459723762076e-07,
"logits/chosen": -3.0578231811523438,
"logits/rejected": -3.068431854248047,
"loss": 0.8761,
"step": 357
},
{
"beta_dpo/beta": 0.33314621448516846,
"beta_dpo/beta_margin_grad_mean": -0.1788199245929718,
"beta_dpo/beta_margin_grad_std": 0.3286896347999573,
"beta_dpo/beta_margin_mean": 11.201356887817383,
"beta_dpo/beta_margin_std": 12.418009757995605,
"beta_dpo/beta_used": 0.33314621448516846,
"beta_dpo/beta_used_raw": 0.33314621448516846,
"beta_dpo/gap_mean": 28.484966278076172,
"beta_dpo/gap_std": 36.438079833984375,
"beta_dpo/loss_margin_mean": 33.80877685546875,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5411942554799698,
"grad_norm": 385.5603942871094,
"learning_rate": 2.5925341972508954e-07,
"logits/chosen": -3.0701661109924316,
"logits/rejected": -3.071199893951416,
"loss": 0.7726,
"step": 358
},
{
"beta_dpo/beta": 0.12201699614524841,
"beta_dpo/beta_margin_grad_mean": -0.36096030473709106,
"beta_dpo/beta_margin_grad_std": 0.28947874903678894,
"beta_dpo/beta_margin_mean": 3.56563138961792,
"beta_dpo/beta_margin_std": 7.321877479553223,
"beta_dpo/beta_used": 0.12201699614524841,
"beta_dpo/beta_used_raw": -0.1470046043395996,
"beta_dpo/gap_mean": 27.712411880493164,
"beta_dpo/gap_std": 35.134971618652344,
"beta_dpo/loss_margin_mean": 22.88962745666504,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5427059712773998,
"grad_norm": 316.13494873046875,
"learning_rate": 2.579319833745169e-07,
"logits/chosen": -3.070073366165161,
"logits/rejected": -3.0710277557373047,
"loss": 1.2098,
"step": 359
},
{
"beta_dpo/beta": 0.05261658504605293,
"beta_dpo/beta_margin_grad_mean": -0.35027578473091125,
"beta_dpo/beta_margin_grad_std": 0.22255617380142212,
"beta_dpo/beta_margin_mean": 1.4933468103408813,
"beta_dpo/beta_margin_std": 2.535097122192383,
"beta_dpo/beta_used": 0.05261658504605293,
"beta_dpo/beta_used_raw": 0.05161638185381889,
"beta_dpo/gap_mean": 27.89132308959961,
"beta_dpo/gap_std": 35.49271774291992,
"beta_dpo/loss_margin_mean": 28.442256927490234,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.54421768707483,
"grad_norm": 103.88701629638672,
"learning_rate": 2.5661032514931834e-07,
"logits/chosen": -3.0532474517822266,
"logits/rejected": -3.082444190979004,
"loss": 0.9755,
"step": 360
},
{
"beta_dpo/beta": 0.16896192729473114,
"beta_dpo/beta_margin_grad_mean": -0.25768980383872986,
"beta_dpo/beta_margin_grad_std": 0.34136348962783813,
"beta_dpo/beta_margin_mean": 4.6439409255981445,
"beta_dpo/beta_margin_std": 6.361680030822754,
"beta_dpo/beta_used": 0.16896192729473114,
"beta_dpo/beta_used_raw": 0.16896192729473114,
"beta_dpo/gap_mean": 27.861595153808594,
"beta_dpo/gap_std": 35.75341796875,
"beta_dpo/loss_margin_mean": 27.495941162109375,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.54572940287226,
"grad_norm": 217.03408813476562,
"learning_rate": 2.552884820191154e-07,
"logits/chosen": -3.047079086303711,
"logits/rejected": -3.0629630088806152,
"loss": 0.8215,
"step": 361
},
{
"beta_dpo/beta": 0.023837152868509293,
"beta_dpo/beta_margin_grad_mean": -0.4032648205757141,
"beta_dpo/beta_margin_grad_std": 0.2020334005355835,
"beta_dpo/beta_margin_mean": 0.6487870812416077,
"beta_dpo/beta_margin_std": 1.410294771194458,
"beta_dpo/beta_used": 0.023837152868509293,
"beta_dpo/beta_used_raw": -0.07430331408977509,
"beta_dpo/gap_mean": 27.96929359436035,
"beta_dpo/gap_std": 36.01551818847656,
"beta_dpo/loss_margin_mean": 29.493749618530273,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.54724111866969,
"grad_norm": 45.558921813964844,
"learning_rate": 2.53966490958702e-07,
"logits/chosen": -3.0698437690734863,
"logits/rejected": -3.108633518218994,
"loss": 1.1054,
"step": 362
},
{
"beta_dpo/beta": 0.012740159407258034,
"beta_dpo/beta_margin_grad_mean": -0.4181475043296814,
"beta_dpo/beta_margin_grad_std": 0.1294555813074112,
"beta_dpo/beta_margin_mean": 0.4015863537788391,
"beta_dpo/beta_margin_std": 0.7025443911552429,
"beta_dpo/beta_used": 0.012740159407258034,
"beta_dpo/beta_used_raw": -0.04291588068008423,
"beta_dpo/gap_mean": 28.640567779541016,
"beta_dpo/gap_std": 36.29180908203125,
"beta_dpo/loss_margin_mean": 31.320804595947266,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5487528344671202,
"grad_norm": 26.676973342895508,
"learning_rate": 2.526443889470099e-07,
"logits/chosen": -3.0346968173980713,
"logits/rejected": -3.092362642288208,
"loss": 1.1394,
"step": 363
},
{
"beta_dpo/beta": 0.16321328282356262,
"beta_dpo/beta_margin_grad_mean": -0.26698994636535645,
"beta_dpo/beta_margin_grad_std": 0.33409571647644043,
"beta_dpo/beta_margin_mean": 5.1491007804870605,
"beta_dpo/beta_margin_std": 7.250192165374756,
"beta_dpo/beta_used": 0.16321328282356262,
"beta_dpo/beta_used_raw": 0.16321328282356262,
"beta_dpo/gap_mean": 29.141733169555664,
"beta_dpo/gap_std": 37.76863098144531,
"beta_dpo/loss_margin_mean": 32.329715728759766,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5502645502645502,
"grad_norm": 212.32305908203125,
"learning_rate": 2.513222129660744e-07,
"logits/chosen": -3.0427498817443848,
"logits/rejected": -3.0514578819274902,
"loss": 0.8291,
"step": 364
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49362868070602417,
"beta_dpo/beta_margin_grad_std": 0.00868096761405468,
"beta_dpo/beta_margin_mean": 0.025498641654849052,
"beta_dpo/beta_margin_std": 0.03475683555006981,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.3776160776615143,
"beta_dpo/gap_mean": 28.81844139099121,
"beta_dpo/gap_std": 37.955848693847656,
"beta_dpo/loss_margin_mean": 25.498640060424805,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5517762660619804,
"grad_norm": 2.6512248516082764,
"learning_rate": 2.5e-07,
"logits/chosen": -3.06278133392334,
"logits/rejected": -3.0636744499206543,
"loss": 1.3657,
"step": 365
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49392661452293396,
"beta_dpo/beta_margin_grad_std": 0.008409538306295872,
"beta_dpo/beta_margin_mean": 0.024302352219820023,
"beta_dpo/beta_margin_std": 0.03365331143140793,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.2066284716129303,
"beta_dpo/gap_mean": 27.96464729309082,
"beta_dpo/gap_std": 37.263710021972656,
"beta_dpo/loss_margin_mean": 24.302350997924805,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5532879818594104,
"grad_norm": 2.931002378463745,
"learning_rate": 2.486777870339255e-07,
"logits/chosen": -3.0443501472473145,
"logits/rejected": -3.047083854675293,
"loss": 1.3638,
"step": 366
},
{
"beta_dpo/beta": 0.02233603037893772,
"beta_dpo/beta_margin_grad_mean": -0.4081335961818695,
"beta_dpo/beta_margin_grad_std": 0.19261126220226288,
"beta_dpo/beta_margin_mean": 0.5572895407676697,
"beta_dpo/beta_margin_std": 1.2073376178741455,
"beta_dpo/beta_used": 0.02233603037893772,
"beta_dpo/beta_used_raw": -0.29430317878723145,
"beta_dpo/gap_mean": 27.48886489868164,
"beta_dpo/gap_std": 36.262733459472656,
"beta_dpo/loss_margin_mean": 24.916200637817383,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5547996976568406,
"grad_norm": 69.64478302001953,
"learning_rate": 2.4735561105299014e-07,
"logits/chosen": -3.0140490531921387,
"logits/rejected": -3.0410256385803223,
"loss": 1.0924,
"step": 367
},
{
"beta_dpo/beta": 0.26728492975234985,
"beta_dpo/beta_margin_grad_mean": -0.34833693504333496,
"beta_dpo/beta_margin_grad_std": 0.301403284072876,
"beta_dpo/beta_margin_mean": 8.373762130737305,
"beta_dpo/beta_margin_std": 16.855165481567383,
"beta_dpo/beta_used": 0.26728492975234985,
"beta_dpo/beta_used_raw": 0.2518630623817444,
"beta_dpo/gap_mean": 27.578670501708984,
"beta_dpo/gap_std": 36.712608337402344,
"beta_dpo/loss_margin_mean": 27.794593811035156,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5563114134542706,
"grad_norm": 449.7203369140625,
"learning_rate": 2.46033509041298e-07,
"logits/chosen": -3.0827462673187256,
"logits/rejected": -3.080749750137329,
"loss": 1.1387,
"step": 368
},
{
"beta_dpo/beta": 0.29284167289733887,
"beta_dpo/beta_margin_grad_mean": -0.21756578981876373,
"beta_dpo/beta_margin_grad_std": 0.34182238578796387,
"beta_dpo/beta_margin_mean": 8.50714111328125,
"beta_dpo/beta_margin_std": 10.777283668518066,
"beta_dpo/beta_used": 0.29284167289733887,
"beta_dpo/beta_used_raw": 0.29284167289733887,
"beta_dpo/gap_mean": 27.435359954833984,
"beta_dpo/gap_std": 37.216773986816406,
"beta_dpo/loss_margin_mean": 28.464027404785156,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5578231292517006,
"grad_norm": 283.1849365234375,
"learning_rate": 2.447115179808846e-07,
"logits/chosen": -3.03106689453125,
"logits/rejected": -3.041107177734375,
"loss": 0.8772,
"step": 369
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49333760142326355,
"beta_dpo/beta_margin_grad_std": 0.01029953919351101,
"beta_dpo/beta_margin_mean": 0.026663921773433685,
"beta_dpo/beta_margin_std": 0.04122249782085419,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.25127974152565,
"beta_dpo/gap_mean": 27.672956466674805,
"beta_dpo/gap_std": 37.77960205078125,
"beta_dpo/loss_margin_mean": 26.663921356201172,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5593348450491308,
"grad_norm": 2.7624871730804443,
"learning_rate": 2.4338967485068164e-07,
"logits/chosen": -2.998018503189087,
"logits/rejected": -3.0110349655151367,
"loss": 1.3648,
"step": 370
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4933054745197296,
"beta_dpo/beta_margin_grad_std": 0.00889476016163826,
"beta_dpo/beta_margin_mean": 0.026788493618369102,
"beta_dpo/beta_margin_std": 0.035594578832387924,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.10879316926002502,
"beta_dpo/gap_mean": 27.25868797302246,
"beta_dpo/gap_std": 37.62822723388672,
"beta_dpo/loss_margin_mean": 26.78849220275879,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5608465608465608,
"grad_norm": 2.443422794342041,
"learning_rate": 2.420680166254831e-07,
"logits/chosen": -3.0092287063598633,
"logits/rejected": -3.0086848735809326,
"loss": 1.3629,
"step": 371
},
{
"beta_dpo/beta": 0.2747513949871063,
"beta_dpo/beta_margin_grad_mean": -0.3540157377719879,
"beta_dpo/beta_margin_grad_std": 0.29853445291519165,
"beta_dpo/beta_margin_mean": 9.724706649780273,
"beta_dpo/beta_margin_std": 18.833621978759766,
"beta_dpo/beta_used": 0.2747513949871063,
"beta_dpo/beta_used_raw": -0.10304847359657288,
"beta_dpo/gap_mean": 26.12897300720215,
"beta_dpo/gap_std": 37.682151794433594,
"beta_dpo/loss_margin_mean": 23.0501766204834,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.562358276643991,
"grad_norm": 932.7671508789062,
"learning_rate": 2.4074658027491044e-07,
"logits/chosen": -3.0054891109466553,
"logits/rejected": -3.0314760208129883,
"loss": 2.2511,
"step": 372
},
{
"beta_dpo/beta": 0.2764260172843933,
"beta_dpo/beta_margin_grad_mean": -0.3450475335121155,
"beta_dpo/beta_margin_grad_std": 0.29912158846855164,
"beta_dpo/beta_margin_mean": 7.09043025970459,
"beta_dpo/beta_margin_std": 18.69756507873535,
"beta_dpo/beta_used": 0.2764260172843933,
"beta_dpo/beta_used_raw": 0.21868909895420074,
"beta_dpo/gap_mean": 26.588321685791016,
"beta_dpo/gap_std": 38.78236389160156,
"beta_dpo/loss_margin_mean": 26.749818801879883,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.563869992441421,
"grad_norm": 524.5596923828125,
"learning_rate": 2.394254027623792e-07,
"logits/chosen": -3.018575668334961,
"logits/rejected": -3.024758815765381,
"loss": 1.8229,
"step": 373
},
{
"beta_dpo/beta": 0.38422077894210815,
"beta_dpo/beta_margin_grad_mean": -0.15745118260383606,
"beta_dpo/beta_margin_grad_std": 0.32486772537231445,
"beta_dpo/beta_margin_mean": 14.442462921142578,
"beta_dpo/beta_margin_std": 14.842865943908691,
"beta_dpo/beta_used": 0.38422077894210815,
"beta_dpo/beta_used_raw": 0.38422077894210815,
"beta_dpo/gap_mean": 28.249542236328125,
"beta_dpo/gap_std": 38.90614318847656,
"beta_dpo/loss_margin_mean": 37.55119705200195,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5653817082388511,
"grad_norm": 693.2622680664062,
"learning_rate": 2.381045210440644e-07,
"logits/chosen": -3.0473880767822266,
"logits/rejected": -3.0371317863464355,
"loss": 1.7168,
"step": 374
},
{
"beta_dpo/beta": 0.057004883885383606,
"beta_dpo/beta_margin_grad_mean": -0.3686520457267761,
"beta_dpo/beta_margin_grad_std": 0.2601342499256134,
"beta_dpo/beta_margin_mean": 1.6653286218643188,
"beta_dpo/beta_margin_std": 3.5258891582489014,
"beta_dpo/beta_used": 0.057004883885383606,
"beta_dpo/beta_used_raw": -0.18324482440948486,
"beta_dpo/gap_mean": 28.376663208007812,
"beta_dpo/gap_std": 38.72953796386719,
"beta_dpo/loss_margin_mean": 25.014204025268555,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5668934240362812,
"grad_norm": 185.9263916015625,
"learning_rate": 2.3678397206786715e-07,
"logits/chosen": -3.0153164863586426,
"logits/rejected": -3.0256357192993164,
"loss": 1.2298,
"step": 375
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49177441000938416,
"beta_dpo/beta_margin_grad_std": 0.01139663252979517,
"beta_dpo/beta_margin_mean": 0.03292226418852806,
"beta_dpo/beta_margin_std": 0.04562165588140488,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.01881096512079239,
"beta_dpo/gap_mean": 28.85112762451172,
"beta_dpo/gap_std": 39.73705291748047,
"beta_dpo/loss_margin_mean": 32.92226028442383,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5684051398337112,
"grad_norm": 2.6528704166412354,
"learning_rate": 2.3546379277238103e-07,
"logits/chosen": -2.9775443077087402,
"logits/rejected": -3.0031423568725586,
"loss": 1.36,
"step": 376
},
{
"beta_dpo/beta": 0.12980836629867554,
"beta_dpo/beta_margin_grad_mean": -0.38568049669265747,
"beta_dpo/beta_margin_grad_std": 0.30072495341300964,
"beta_dpo/beta_margin_mean": 3.0573172569274902,
"beta_dpo/beta_margin_std": 8.1069917678833,
"beta_dpo/beta_used": 0.12980836629867554,
"beta_dpo/beta_used_raw": 0.06634081900119781,
"beta_dpo/gap_mean": 28.19588851928711,
"beta_dpo/gap_std": 40.07501220703125,
"beta_dpo/loss_margin_mean": 24.444398880004883,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5699168556311414,
"grad_norm": 490.80743408203125,
"learning_rate": 2.3414402008585886e-07,
"logits/chosen": -2.9569051265716553,
"logits/rejected": -2.962362289428711,
"loss": 1.138,
"step": 377
},
{
"beta_dpo/beta": 0.2009427845478058,
"beta_dpo/beta_margin_grad_mean": -0.39364734292030334,
"beta_dpo/beta_margin_grad_std": 0.31605786085128784,
"beta_dpo/beta_margin_mean": 5.381883144378662,
"beta_dpo/beta_margin_std": 13.935928344726562,
"beta_dpo/beta_used": 0.2009427845478058,
"beta_dpo/beta_used_raw": 0.021990105509757996,
"beta_dpo/gap_mean": 26.762958526611328,
"beta_dpo/gap_std": 39.15788269042969,
"beta_dpo/loss_margin_mean": 21.002058029174805,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5714285714285714,
"grad_norm": 779.2059326171875,
"learning_rate": 2.3282469092517977e-07,
"logits/chosen": -2.988287925720215,
"logits/rejected": -2.997986316680908,
"loss": 1.8012,
"step": 378
},
{
"beta_dpo/beta": 0.29767459630966187,
"beta_dpo/beta_margin_grad_mean": -0.20546753704547882,
"beta_dpo/beta_margin_grad_std": 0.3395988643169403,
"beta_dpo/beta_margin_mean": 9.896245002746582,
"beta_dpo/beta_margin_std": 15.230827331542969,
"beta_dpo/beta_used": 0.29767459630966187,
"beta_dpo/beta_used_raw": 0.29767459630966187,
"beta_dpo/gap_mean": 27.635543823242188,
"beta_dpo/gap_std": 39.90657043457031,
"beta_dpo/loss_margin_mean": 32.11721420288086,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5729402872260015,
"grad_norm": 325.6419372558594,
"learning_rate": 2.3150584219481643e-07,
"logits/chosen": -3.069061040878296,
"logits/rejected": -3.1075985431671143,
"loss": 0.7379,
"step": 379
},
{
"beta_dpo/beta": 0.39250868558883667,
"beta_dpo/beta_margin_grad_mean": -0.1666214019060135,
"beta_dpo/beta_margin_grad_std": 0.3332377076148987,
"beta_dpo/beta_margin_mean": 14.069883346557617,
"beta_dpo/beta_margin_std": 16.81194305419922,
"beta_dpo/beta_used": 0.39250868558883667,
"beta_dpo/beta_used_raw": 0.39250868558883667,
"beta_dpo/gap_mean": 28.805389404296875,
"beta_dpo/gap_std": 40.448875427246094,
"beta_dpo/loss_margin_mean": 35.44111251831055,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5744520030234316,
"grad_norm": 638.1084594726562,
"learning_rate": 2.3018751078580283e-07,
"logits/chosen": -2.9937350749969482,
"logits/rejected": -2.993100881576538,
"loss": 1.7685,
"step": 380
},
{
"beta_dpo/beta": 0.32273223996162415,
"beta_dpo/beta_margin_grad_mean": -0.400060772895813,
"beta_dpo/beta_margin_grad_std": 0.32593268156051636,
"beta_dpo/beta_margin_mean": 6.122303009033203,
"beta_dpo/beta_margin_std": 22.122623443603516,
"beta_dpo/beta_used": 0.32273223996162415,
"beta_dpo/beta_used_raw": 0.09878802299499512,
"beta_dpo/gap_mean": 27.545970916748047,
"beta_dpo/gap_std": 40.81670379638672,
"beta_dpo/loss_margin_mean": 17.91020965576172,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5759637188208617,
"grad_norm": 668.0379028320312,
"learning_rate": 2.288697335747027e-07,
"logits/chosen": -2.9997167587280273,
"logits/rejected": -2.9869627952575684,
"loss": 1.4539,
"step": 381
},
{
"beta_dpo/beta": 0.3062552213668823,
"beta_dpo/beta_margin_grad_mean": -0.2633870244026184,
"beta_dpo/beta_margin_grad_std": 0.3874484598636627,
"beta_dpo/beta_margin_mean": 9.107242584228516,
"beta_dpo/beta_margin_std": 14.23564338684082,
"beta_dpo/beta_used": 0.3062552213668823,
"beta_dpo/beta_used_raw": 0.3062552213668823,
"beta_dpo/gap_mean": 26.631755828857422,
"beta_dpo/gap_std": 40.66786575317383,
"beta_dpo/loss_margin_mean": 26.08523941040039,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5774754346182918,
"grad_norm": 366.4364318847656,
"learning_rate": 2.2755254742257706e-07,
"logits/chosen": -3.0169169902801514,
"logits/rejected": -3.03079891204834,
"loss": 0.9439,
"step": 382
},
{
"beta_dpo/beta": 0.03148249536752701,
"beta_dpo/beta_margin_grad_mean": -0.4024004638195038,
"beta_dpo/beta_margin_grad_std": 0.2572653889656067,
"beta_dpo/beta_margin_mean": 0.9517198204994202,
"beta_dpo/beta_margin_std": 2.287767171859741,
"beta_dpo/beta_used": 0.03148249536752701,
"beta_dpo/beta_used_raw": -0.11997513473033905,
"beta_dpo/gap_mean": 27.111793518066406,
"beta_dpo/gap_std": 41.19513702392578,
"beta_dpo/loss_margin_mean": 26.842315673828125,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5789871504157218,
"grad_norm": 132.0910186767578,
"learning_rate": 2.2623598917395436e-07,
"logits/chosen": -2.999669313430786,
"logits/rejected": -2.9849891662597656,
"loss": 1.3339,
"step": 383
},
{
"beta_dpo/beta": 0.11498643457889557,
"beta_dpo/beta_margin_grad_mean": -0.30243608355522156,
"beta_dpo/beta_margin_grad_std": 0.2668159008026123,
"beta_dpo/beta_margin_mean": 4.041726589202881,
"beta_dpo/beta_margin_std": 6.5240478515625,
"beta_dpo/beta_used": 0.11498643457889557,
"beta_dpo/beta_used_raw": 0.11375071108341217,
"beta_dpo/gap_mean": 27.615703582763672,
"beta_dpo/gap_std": 41.406524658203125,
"beta_dpo/loss_margin_mean": 32.565528869628906,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5804988662131519,
"grad_norm": 219.2327423095703,
"learning_rate": 2.2492009565579875e-07,
"logits/chosen": -2.9780848026275635,
"logits/rejected": -2.9932589530944824,
"loss": 1.013,
"step": 384
},
{
"beta_dpo/beta": 0.3769190013408661,
"beta_dpo/beta_margin_grad_mean": -0.2724689245223999,
"beta_dpo/beta_margin_grad_std": 0.2645536959171295,
"beta_dpo/beta_margin_mean": 14.097354888916016,
"beta_dpo/beta_margin_std": 20.308109283447266,
"beta_dpo/beta_used": 0.3769190013408661,
"beta_dpo/beta_used_raw": 0.36033499240875244,
"beta_dpo/gap_mean": 29.11199378967285,
"beta_dpo/gap_std": 39.8790283203125,
"beta_dpo/loss_margin_mean": 34.771392822265625,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.582010582010582,
"grad_norm": 285.1043701171875,
"learning_rate": 2.2360490367648084e-07,
"logits/chosen": -3.0191240310668945,
"logits/rejected": -3.0336661338806152,
"loss": 0.7273,
"step": 385
},
{
"beta_dpo/beta": 0.19765952229499817,
"beta_dpo/beta_margin_grad_mean": -0.3405255973339081,
"beta_dpo/beta_margin_grad_std": 0.30749204754829407,
"beta_dpo/beta_margin_mean": 4.965733528137207,
"beta_dpo/beta_margin_std": 11.302127838134766,
"beta_dpo/beta_used": 0.19765952229499817,
"beta_dpo/beta_used_raw": -0.18348905444145203,
"beta_dpo/gap_mean": 28.022891998291016,
"beta_dpo/gap_std": 40.65996170043945,
"beta_dpo/loss_margin_mean": 21.723342895507812,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5835222978080121,
"grad_norm": 191.1682586669922,
"learning_rate": 2.2229045002474724e-07,
"logits/chosen": -3.016357183456421,
"logits/rejected": -3.035351276397705,
"loss": 1.1637,
"step": 386
},
{
"beta_dpo/beta": 0.3863077163696289,
"beta_dpo/beta_margin_grad_mean": -0.17502714693546295,
"beta_dpo/beta_margin_grad_std": 0.3491981327533722,
"beta_dpo/beta_margin_mean": 14.741477966308594,
"beta_dpo/beta_margin_std": 15.115537643432617,
"beta_dpo/beta_used": 0.3863077163696289,
"beta_dpo/beta_used_raw": 0.3863077163696289,
"beta_dpo/gap_mean": 29.337215423583984,
"beta_dpo/gap_std": 40.27283477783203,
"beta_dpo/loss_margin_mean": 38.22383499145508,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5850340136054422,
"grad_norm": 652.61865234375,
"learning_rate": 2.209767714686924e-07,
"logits/chosen": -2.9998183250427246,
"logits/rejected": -3.038905620574951,
"loss": 1.2516,
"step": 387
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4945504367351532,
"beta_dpo/beta_margin_grad_std": 0.010549246333539486,
"beta_dpo/beta_margin_mean": 0.021811524406075478,
"beta_dpo/beta_margin_std": 0.04222576692700386,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.2677161395549774,
"beta_dpo/gap_mean": 28.763246536254883,
"beta_dpo/gap_std": 40.887359619140625,
"beta_dpo/loss_margin_mean": 21.811521530151367,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5865457294028723,
"grad_norm": 2.4893360137939453,
"learning_rate": 2.1966390475472954e-07,
"logits/chosen": -3.0375568866729736,
"logits/rejected": -3.0330111980438232,
"loss": 1.364,
"step": 388
},
{
"beta_dpo/beta": 0.2958033084869385,
"beta_dpo/beta_margin_grad_mean": -0.21837277710437775,
"beta_dpo/beta_margin_grad_std": 0.37667161226272583,
"beta_dpo/beta_margin_mean": 10.13884449005127,
"beta_dpo/beta_margin_std": 13.346165657043457,
"beta_dpo/beta_used": 0.2958033084869385,
"beta_dpo/beta_used_raw": 0.2958033084869385,
"beta_dpo/gap_mean": 29.039878845214844,
"beta_dpo/gap_std": 40.79185485839844,
"beta_dpo/loss_margin_mean": 33.79916000366211,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5880574452003023,
"grad_norm": 706.0709838867188,
"learning_rate": 2.1835188660656265e-07,
"logits/chosen": -3.015286445617676,
"logits/rejected": -3.031208038330078,
"loss": 2.2031,
"step": 389
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4932987093925476,
"beta_dpo/beta_margin_grad_std": 0.009519056417047977,
"beta_dpo/beta_margin_mean": 0.026817994192242622,
"beta_dpo/beta_margin_std": 0.03809916600584984,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.2738952040672302,
"beta_dpo/gap_mean": 29.062936782836914,
"beta_dpo/gap_std": 40.41197967529297,
"beta_dpo/loss_margin_mean": 26.8179931640625,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5895691609977324,
"grad_norm": 2.219215154647827,
"learning_rate": 2.170407537241599e-07,
"logits/chosen": -2.981475353240967,
"logits/rejected": -2.988084316253662,
"loss": 1.3638,
"step": 390
},
{
"beta_dpo/beta": 0.625146746635437,
"beta_dpo/beta_margin_grad_mean": -0.23137876391410828,
"beta_dpo/beta_margin_grad_std": 0.37583622336387634,
"beta_dpo/beta_margin_mean": 23.871423721313477,
"beta_dpo/beta_margin_std": 28.901975631713867,
"beta_dpo/beta_used": 0.625146746635437,
"beta_dpo/beta_used_raw": 0.625146746635437,
"beta_dpo/gap_mean": 30.184946060180664,
"beta_dpo/gap_std": 40.649269104003906,
"beta_dpo/loss_margin_mean": 37.41094970703125,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5910808767951625,
"grad_norm": 1463.567626953125,
"learning_rate": 2.1573054278272636e-07,
"logits/chosen": -3.0244569778442383,
"logits/rejected": -3.035482883453369,
"loss": 1.5648,
"step": 391
},
{
"beta_dpo/beta": 0.5046176314353943,
"beta_dpo/beta_margin_grad_mean": -0.16831432282924652,
"beta_dpo/beta_margin_grad_std": 0.3403349220752716,
"beta_dpo/beta_margin_mean": 19.103736877441406,
"beta_dpo/beta_margin_std": 22.98224639892578,
"beta_dpo/beta_used": 0.5046176314353943,
"beta_dpo/beta_used_raw": 0.5046176314353943,
"beta_dpo/gap_mean": 30.835647583007812,
"beta_dpo/gap_std": 41.523895263671875,
"beta_dpo/loss_margin_mean": 36.13017654418945,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5925925925925926,
"grad_norm": 592.4343872070312,
"learning_rate": 2.1442129043167873e-07,
"logits/chosen": -3.0418100357055664,
"logits/rejected": -3.0473031997680664,
"loss": 1.8052,
"step": 392
},
{
"beta_dpo/beta": 0.2411310076713562,
"beta_dpo/beta_margin_grad_mean": -0.3177638351917267,
"beta_dpo/beta_margin_grad_std": 0.27696797251701355,
"beta_dpo/beta_margin_mean": 10.78797721862793,
"beta_dpo/beta_margin_std": 16.936288833618164,
"beta_dpo/beta_used": 0.2411310076713562,
"beta_dpo/beta_used_raw": 0.0982653796672821,
"beta_dpo/gap_mean": 32.383819580078125,
"beta_dpo/gap_std": 40.91410827636719,
"beta_dpo/loss_margin_mean": 33.14302062988281,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5941043083900227,
"grad_norm": 452.4428405761719,
"learning_rate": 2.131130332936195e-07,
"logits/chosen": -3.0123586654663086,
"logits/rejected": -3.02311110496521,
"loss": 0.966,
"step": 393
},
{
"beta_dpo/beta": 0.22652791440486908,
"beta_dpo/beta_margin_grad_mean": -0.33459609746932983,
"beta_dpo/beta_margin_grad_std": 0.29433995485305786,
"beta_dpo/beta_margin_mean": 8.418416023254395,
"beta_dpo/beta_margin_std": 15.686582565307617,
"beta_dpo/beta_used": 0.22652791440486908,
"beta_dpo/beta_used_raw": -0.014699012041091919,
"beta_dpo/gap_mean": 31.08023452758789,
"beta_dpo/gap_std": 40.0783805847168,
"beta_dpo/loss_margin_mean": 29.241104125976562,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5956160241874527,
"grad_norm": 492.2149658203125,
"learning_rate": 2.1180580796331323e-07,
"logits/chosen": -3.041472911834717,
"logits/rejected": -3.0596466064453125,
"loss": 1.4461,
"step": 394
},
{
"beta_dpo/beta": 0.14701415598392487,
"beta_dpo/beta_margin_grad_mean": -0.32391709089279175,
"beta_dpo/beta_margin_grad_std": 0.26708272099494934,
"beta_dpo/beta_margin_mean": 5.0513153076171875,
"beta_dpo/beta_margin_std": 9.305150032043457,
"beta_dpo/beta_used": 0.14701415598392487,
"beta_dpo/beta_used_raw": -0.07249976694583893,
"beta_dpo/gap_mean": 30.491722106933594,
"beta_dpo/gap_std": 39.65027618408203,
"beta_dpo/loss_margin_mean": 27.321931838989258,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5971277399848829,
"grad_norm": 392.08123779296875,
"learning_rate": 2.104996510066625e-07,
"logits/chosen": -2.976252555847168,
"logits/rejected": -3.0006847381591797,
"loss": 1.0728,
"step": 395
},
{
"beta_dpo/beta": 0.41436514258384705,
"beta_dpo/beta_margin_grad_mean": -0.285800963640213,
"beta_dpo/beta_margin_grad_std": 0.2666924297809601,
"beta_dpo/beta_margin_mean": 18.4917049407959,
"beta_dpo/beta_margin_std": 25.92902946472168,
"beta_dpo/beta_used": 0.41436514258384705,
"beta_dpo/beta_used_raw": 0.09369680285453796,
"beta_dpo/gap_mean": 31.421274185180664,
"beta_dpo/gap_std": 38.407318115234375,
"beta_dpo/loss_margin_mean": 31.650283813476562,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5986394557823129,
"grad_norm": 255.34703063964844,
"learning_rate": 2.0919459895968517e-07,
"logits/chosen": -3.0301437377929688,
"logits/rejected": -3.054694414138794,
"loss": 0.8463,
"step": 396
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49589958786964417,
"beta_dpo/beta_margin_grad_std": 0.010871745645999908,
"beta_dpo/beta_margin_mean": 0.016410168260335922,
"beta_dpo/beta_margin_std": 0.04352058470249176,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.2955280542373657,
"beta_dpo/gap_mean": 28.55809211730957,
"beta_dpo/gap_std": 38.24231719970703,
"beta_dpo/loss_margin_mean": 16.410167694091797,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.600151171579743,
"grad_norm": 2.5978617668151855,
"learning_rate": 2.078906883274924e-07,
"logits/chosen": -3.0240797996520996,
"logits/rejected": -3.0396976470947266,
"loss": 1.3646,
"step": 397
},
{
"beta_dpo/beta": 0.13738001883029938,
"beta_dpo/beta_margin_grad_mean": -0.23586338758468628,
"beta_dpo/beta_margin_grad_std": 0.3398585915565491,
"beta_dpo/beta_margin_mean": 4.376008033752441,
"beta_dpo/beta_margin_std": 5.610034942626953,
"beta_dpo/beta_used": 0.13738001883029938,
"beta_dpo/beta_used_raw": 0.13738001883029938,
"beta_dpo/gap_mean": 28.59261703491211,
"beta_dpo/gap_std": 39.029197692871094,
"beta_dpo/loss_margin_mean": 32.331600189208984,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6016628873771731,
"grad_norm": 248.6461639404297,
"learning_rate": 2.065879555832674e-07,
"logits/chosen": -3.0186073780059814,
"logits/rejected": -3.0323636531829834,
"loss": 0.874,
"step": 398
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4914765954017639,
"beta_dpo/beta_margin_grad_std": 0.010397281497716904,
"beta_dpo/beta_margin_mean": 0.0341142974793911,
"beta_dpo/beta_margin_std": 0.041627272963523865,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.07908609509468079,
"beta_dpo/gap_mean": 29.60391616821289,
"beta_dpo/gap_std": 39.34052658081055,
"beta_dpo/loss_margin_mean": 34.114295959472656,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6031746031746031,
"grad_norm": 2.631343364715576,
"learning_rate": 2.052864371672457e-07,
"logits/chosen": -3.0201542377471924,
"logits/rejected": -3.059485673904419,
"loss": 1.3601,
"step": 399
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49394434690475464,
"beta_dpo/beta_margin_grad_std": 0.008329670876264572,
"beta_dpo/beta_margin_mean": 0.02423146180808544,
"beta_dpo/beta_margin_std": 0.033334020525217056,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.24517488479614258,
"beta_dpo/gap_mean": 29.253145217895508,
"beta_dpo/gap_std": 38.769901275634766,
"beta_dpo/loss_margin_mean": 24.231460571289062,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6046863189720333,
"grad_norm": 2.8842198848724365,
"learning_rate": 2.0398616948569493e-07,
"logits/chosen": -3.052142381668091,
"logits/rejected": -3.0777342319488525,
"loss": 1.3632,
"step": 400
},
{
"epoch": 0.6046863189720333,
"eval_beta_dpo/beta": 0.029446229338645935,
"eval_beta_dpo/beta_margin_grad_mean": -0.4573783874511719,
"eval_beta_dpo/beta_margin_grad_std": 0.056579407304525375,
"eval_beta_dpo/beta_margin_mean": 0.9100630879402161,
"eval_beta_dpo/beta_margin_std": 1.195685625076294,
"eval_beta_dpo/beta_used": 0.029446229338645935,
"eval_beta_dpo/beta_used_raw": -0.3581295311450958,
"eval_beta_dpo/gap_mean": 28.76643180847168,
"eval_beta_dpo/gap_std": 38.37847137451172,
"eval_beta_dpo/loss_margin_mean": 21.130935668945312,
"eval_beta_dpo/mask_keep_frac": 1.0,
"eval_logits/chosen": -3.0744099617004395,
"eval_logits/rejected": -3.0810747146606445,
"eval_loss": 0.7274584174156189,
"eval_runtime": 36.2627,
"eval_samples_per_second": 63.509,
"eval_steps_per_second": 1.986,
"step": 400
},
{
"beta_dpo/beta": 0.11721974611282349,
"beta_dpo/beta_margin_grad_mean": -0.35206279158592224,
"beta_dpo/beta_margin_grad_std": 0.28108495473861694,
"beta_dpo/beta_margin_mean": 3.3964405059814453,
"beta_dpo/beta_margin_std": 7.061282634735107,
"beta_dpo/beta_used": 0.11721974611282349,
"beta_dpo/beta_used_raw": 0.09774797409772873,
"beta_dpo/gap_mean": 29.4639892578125,
"beta_dpo/gap_std": 37.854820251464844,
"beta_dpo/loss_margin_mean": 32.480064392089844,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6061980347694633,
"grad_norm": 187.5461883544922,
"learning_rate": 2.0268718890989752e-07,
"logits/chosen": -2.9957950115203857,
"logits/rejected": -3.0160136222839355,
"loss": 1.2168,
"step": 401
},
{
"beta_dpo/beta": 0.05155543237924576,
"beta_dpo/beta_margin_grad_mean": -0.3367193043231964,
"beta_dpo/beta_margin_grad_std": 0.2195734679698944,
"beta_dpo/beta_margin_mean": 1.6674182415008545,
"beta_dpo/beta_margin_std": 2.7201528549194336,
"beta_dpo/beta_used": 0.05155543237924576,
"beta_dpo/beta_used_raw": -0.07895001769065857,
"beta_dpo/gap_mean": 29.160747528076172,
"beta_dpo/gap_std": 37.533660888671875,
"beta_dpo/loss_margin_mean": 25.307361602783203,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6077097505668935,
"grad_norm": 86.19723510742188,
"learning_rate": 2.013895317751323e-07,
"logits/chosen": -3.0285935401916504,
"logits/rejected": -3.0081098079681396,
"loss": 0.9101,
"step": 402
},
{
"beta_dpo/beta": 0.22628659009933472,
"beta_dpo/beta_margin_grad_mean": -0.3428897559642792,
"beta_dpo/beta_margin_grad_std": 0.28803175687789917,
"beta_dpo/beta_margin_mean": 8.585755348205566,
"beta_dpo/beta_margin_std": 15.24824047088623,
"beta_dpo/beta_used": 0.22628659009933472,
"beta_dpo/beta_used_raw": 0.15616539120674133,
"beta_dpo/gap_mean": 29.993492126464844,
"beta_dpo/gap_std": 38.41106414794922,
"beta_dpo/loss_margin_mean": 38.23483657836914,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6092214663643235,
"grad_norm": 382.0389404296875,
"learning_rate": 2.0009323437965898e-07,
"logits/chosen": -2.9947726726531982,
"logits/rejected": -3.021214485168457,
"loss": 1.1869,
"step": 403
},
{
"beta_dpo/beta": 0.29033389687538147,
"beta_dpo/beta_margin_grad_mean": -0.31872478127479553,
"beta_dpo/beta_margin_grad_std": 0.27382490038871765,
"beta_dpo/beta_margin_mean": 12.520002365112305,
"beta_dpo/beta_margin_std": 21.002424240112305,
"beta_dpo/beta_used": 0.29033389687538147,
"beta_dpo/beta_used_raw": 0.09338931739330292,
"beta_dpo/gap_mean": 31.210363388061523,
"beta_dpo/gap_std": 38.91590118408203,
"beta_dpo/loss_margin_mean": 32.66154479980469,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6107331821617535,
"grad_norm": 643.2323608398438,
"learning_rate": 1.9879833298370237e-07,
"logits/chosen": -3.054755210876465,
"logits/rejected": -3.0784716606140137,
"loss": 2.2771,
"step": 404
},
{
"beta_dpo/beta": 0.08867108821868896,
"beta_dpo/beta_margin_grad_mean": -0.35953488945961,
"beta_dpo/beta_margin_grad_std": 0.26592856645584106,
"beta_dpo/beta_margin_mean": 3.2900259494781494,
"beta_dpo/beta_margin_std": 5.851770401000977,
"beta_dpo/beta_used": 0.08867108821868896,
"beta_dpo/beta_used_raw": -0.21567538380622864,
"beta_dpo/gap_mean": 30.805099487304688,
"beta_dpo/gap_std": 38.458656311035156,
"beta_dpo/loss_margin_mean": 28.532512664794922,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6122448979591837,
"grad_norm": 219.62863159179688,
"learning_rate": 1.975048638084379e-07,
"logits/chosen": -2.99334979057312,
"logits/rejected": -3.013242721557617,
"loss": 1.1381,
"step": 405
},
{
"beta_dpo/beta": 0.48128390312194824,
"beta_dpo/beta_margin_grad_mean": -0.36179736256599426,
"beta_dpo/beta_margin_grad_std": 0.3154648244380951,
"beta_dpo/beta_margin_mean": 17.335620880126953,
"beta_dpo/beta_margin_std": 34.6451416015625,
"beta_dpo/beta_used": 0.48128390312194824,
"beta_dpo/beta_used_raw": 0.39883124828338623,
"beta_dpo/gap_mean": 30.695274353027344,
"beta_dpo/gap_std": 38.449737548828125,
"beta_dpo/loss_margin_mean": 32.28055953979492,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6137566137566137,
"grad_norm": 532.7105712890625,
"learning_rate": 1.9621286303497914e-07,
"logits/chosen": -3.000096082687378,
"logits/rejected": -3.0485429763793945,
"loss": 0.9884,
"step": 406
},
{
"beta_dpo/beta": 0.18012605607509613,
"beta_dpo/beta_margin_grad_mean": -0.2155662477016449,
"beta_dpo/beta_margin_grad_std": 0.3102184534072876,
"beta_dpo/beta_margin_mean": 5.451420783996582,
"beta_dpo/beta_margin_std": 7.179676055908203,
"beta_dpo/beta_used": 0.18012605607509613,
"beta_dpo/beta_used_raw": 0.18012605607509613,
"beta_dpo/gap_mean": 30.456653594970703,
"beta_dpo/gap_std": 37.66099166870117,
"beta_dpo/loss_margin_mean": 29.90361213684082,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6152683295540439,
"grad_norm": 104.91793060302734,
"learning_rate": 1.9492236680336483e-07,
"logits/chosen": -3.095395088195801,
"logits/rejected": -3.1127164363861084,
"loss": 0.4795,
"step": 407
},
{
"beta_dpo/beta": 0.1678144782781601,
"beta_dpo/beta_margin_grad_mean": -0.27898651361465454,
"beta_dpo/beta_margin_grad_std": 0.2516646981239319,
"beta_dpo/beta_margin_mean": 7.00124454498291,
"beta_dpo/beta_margin_std": 10.054356575012207,
"beta_dpo/beta_used": 0.1678144782781601,
"beta_dpo/beta_used_raw": 0.11202029883861542,
"beta_dpo/gap_mean": 30.880258560180664,
"beta_dpo/gap_std": 36.1904182434082,
"beta_dpo/loss_margin_mean": 35.354042053222656,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6167800453514739,
"grad_norm": 217.7000274658203,
"learning_rate": 1.9363341121154895e-07,
"logits/chosen": -3.02614688873291,
"logits/rejected": -3.044498920440674,
"loss": 0.9372,
"step": 408
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4943253993988037,
"beta_dpo/beta_margin_grad_std": 0.00914519652724266,
"beta_dpo/beta_margin_mean": 0.022709691897034645,
"beta_dpo/beta_margin_std": 0.03660096228122711,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.5368869304656982,
"beta_dpo/gap_mean": 30.207996368408203,
"beta_dpo/gap_std": 36.25111389160156,
"beta_dpo/loss_margin_mean": 22.709692001342773,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.618291761148904,
"grad_norm": 2.2463226318359375,
"learning_rate": 1.9234603231438994e-07,
"logits/chosen": -3.043461799621582,
"logits/rejected": -3.0443849563598633,
"loss": 1.367,
"step": 409
},
{
"beta_dpo/beta": 0.08402707427740097,
"beta_dpo/beta_margin_grad_mean": -0.28692978620529175,
"beta_dpo/beta_margin_grad_std": 0.24823537468910217,
"beta_dpo/beta_margin_mean": 3.4619128704071045,
"beta_dpo/beta_margin_std": 5.12234354019165,
"beta_dpo/beta_used": 0.08402707427740097,
"beta_dpo/beta_used_raw": -0.09456826746463776,
"beta_dpo/gap_mean": 30.54737091064453,
"beta_dpo/gap_std": 35.517127990722656,
"beta_dpo/loss_margin_mean": 32.21046829223633,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6198034769463341,
"grad_norm": 120.86051940917969,
"learning_rate": 1.9106026612264315e-07,
"logits/chosen": -3.0650954246520996,
"logits/rejected": -3.070061206817627,
"loss": 0.9332,
"step": 410
},
{
"beta_dpo/beta": 0.027193760499358177,
"beta_dpo/beta_margin_grad_mean": -0.37177857756614685,
"beta_dpo/beta_margin_grad_std": 0.2116282731294632,
"beta_dpo/beta_margin_mean": 0.9016957879066467,
"beta_dpo/beta_margin_std": 1.7975363731384277,
"beta_dpo/beta_used": 0.027193760499358177,
"beta_dpo/beta_used_raw": -0.04066654294729233,
"beta_dpo/gap_mean": 30.096771240234375,
"beta_dpo/gap_std": 36.01396179199219,
"beta_dpo/loss_margin_mean": 28.125648498535156,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6213151927437641,
"grad_norm": 66.50321960449219,
"learning_rate": 1.8977614860195296e-07,
"logits/chosen": -3.014916181564331,
"logits/rejected": -3.0385901927948,
"loss": 0.999,
"step": 411
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4926305115222931,
"beta_dpo/beta_margin_grad_std": 0.0076973093673586845,
"beta_dpo/beta_margin_mean": 0.029488172382116318,
"beta_dpo/beta_margin_std": 0.030803833156824112,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.2814818024635315,
"beta_dpo/gap_mean": 29.791940689086914,
"beta_dpo/gap_std": 35.42867660522461,
"beta_dpo/loss_margin_mean": 29.488170623779297,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6228269085411943,
"grad_norm": 2.461094379425049,
"learning_rate": 1.8849371567184662e-07,
"logits/chosen": -3.009608745574951,
"logits/rejected": -3.0188848972320557,
"loss": 1.3632,
"step": 412
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49362313747406006,
"beta_dpo/beta_margin_grad_std": 0.010564255528151989,
"beta_dpo/beta_margin_mean": 0.025521280243992805,
"beta_dpo/beta_margin_std": 0.04228663817048073,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.4660704731941223,
"beta_dpo/gap_mean": 29.05756378173828,
"beta_dpo/gap_std": 36.2657470703125,
"beta_dpo/loss_margin_mean": 25.521278381347656,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6243386243386243,
"grad_norm": 2.5777804851531982,
"learning_rate": 1.872130032047302e-07,
"logits/chosen": -3.073596477508545,
"logits/rejected": -3.076204776763916,
"loss": 1.367,
"step": 413
},
{
"beta_dpo/beta": 0.0779917985200882,
"beta_dpo/beta_margin_grad_mean": -0.3404553532600403,
"beta_dpo/beta_margin_grad_std": 0.25039440393447876,
"beta_dpo/beta_margin_mean": 2.3758316040039062,
"beta_dpo/beta_margin_std": 4.421693325042725,
"beta_dpo/beta_used": 0.0779917985200882,
"beta_dpo/beta_used_raw": -0.03603484481573105,
"beta_dpo/gap_mean": 29.065872192382812,
"beta_dpo/gap_std": 36.048789978027344,
"beta_dpo/loss_margin_mean": 30.15563201904297,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6258503401360545,
"grad_norm": 127.85913848876953,
"learning_rate": 1.8593404702488436e-07,
"logits/chosen": -3.034503221511841,
"logits/rejected": -3.0492706298828125,
"loss": 0.9021,
"step": 414
},
{
"beta_dpo/beta": 0.05307367071509361,
"beta_dpo/beta_margin_grad_mean": -0.35378262400627136,
"beta_dpo/beta_margin_grad_std": 0.23300494253635406,
"beta_dpo/beta_margin_mean": 1.6274826526641846,
"beta_dpo/beta_margin_std": 2.9165117740631104,
"beta_dpo/beta_used": 0.05307367071509361,
"beta_dpo/beta_used_raw": 0.05307367071509361,
"beta_dpo/gap_mean": 29.417644500732422,
"beta_dpo/gap_std": 35.23187255859375,
"beta_dpo/loss_margin_mean": 31.10089111328125,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6273620559334845,
"grad_norm": 176.93101501464844,
"learning_rate": 1.846568829074628e-07,
"logits/chosen": -2.9848177433013916,
"logits/rejected": -2.995086669921875,
"loss": 1.0372,
"step": 415
},
{
"beta_dpo/beta": 0.3304360508918762,
"beta_dpo/beta_margin_grad_mean": -0.31155529618263245,
"beta_dpo/beta_margin_grad_std": 0.28502368927001953,
"beta_dpo/beta_margin_mean": 11.89297103881836,
"beta_dpo/beta_margin_std": 19.53858184814453,
"beta_dpo/beta_used": 0.3304360508918762,
"beta_dpo/beta_used_raw": 0.20077964663505554,
"beta_dpo/gap_mean": 28.080665588378906,
"beta_dpo/gap_std": 35.113502502441406,
"beta_dpo/loss_margin_mean": 23.66207504272461,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6288737717309146,
"grad_norm": 763.5574340820312,
"learning_rate": 1.8338154657749128e-07,
"logits/chosen": -3.023198127746582,
"logits/rejected": -3.030958652496338,
"loss": 1.4262,
"step": 416
},
{
"beta_dpo/beta": 0.14367084205150604,
"beta_dpo/beta_margin_grad_mean": -0.19789853692054749,
"beta_dpo/beta_margin_grad_std": 0.30493274331092834,
"beta_dpo/beta_margin_mean": 4.5488409996032715,
"beta_dpo/beta_margin_std": 5.6324896812438965,
"beta_dpo/beta_used": 0.14367084205150604,
"beta_dpo/beta_used_raw": 0.14367084205150604,
"beta_dpo/gap_mean": 28.43101692199707,
"beta_dpo/gap_std": 35.292335510253906,
"beta_dpo/loss_margin_mean": 30.2642879486084,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6303854875283447,
"grad_norm": 211.81895446777344,
"learning_rate": 1.8210807370886849e-07,
"logits/chosen": -2.993131637573242,
"logits/rejected": -3.003584861755371,
"loss": 0.7942,
"step": 417
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4951647222042084,
"beta_dpo/beta_margin_grad_std": 0.00956287793815136,
"beta_dpo/beta_margin_mean": 0.019352145493030548,
"beta_dpo/beta_margin_std": 0.03827900066971779,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.5479909181594849,
"beta_dpo/gap_mean": 28.049537658691406,
"beta_dpo/gap_std": 35.543701171875,
"beta_dpo/loss_margin_mean": 19.352144241333008,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6318972033257747,
"grad_norm": 2.309333324432373,
"learning_rate": 1.8083649992336825e-07,
"logits/chosen": -3.0097949504852295,
"logits/rejected": -3.013964891433716,
"loss": 1.3693,
"step": 418
},
{
"beta_dpo/beta": 0.24485455453395844,
"beta_dpo/beta_margin_grad_mean": -0.3431459069252014,
"beta_dpo/beta_margin_grad_std": 0.3013096749782562,
"beta_dpo/beta_margin_mean": 9.05129623413086,
"beta_dpo/beta_margin_std": 15.675983428955078,
"beta_dpo/beta_used": 0.24485455453395844,
"beta_dpo/beta_used_raw": 0.19044339656829834,
"beta_dpo/gap_mean": 28.351285934448242,
"beta_dpo/gap_std": 36.18492126464844,
"beta_dpo/loss_margin_mean": 36.2522087097168,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6334089191232048,
"grad_norm": 535.1361694335938,
"learning_rate": 1.7956686078964255e-07,
"logits/chosen": -3.008601188659668,
"logits/rejected": -3.037018299102783,
"loss": 1.331,
"step": 419
},
{
"beta_dpo/beta": 0.059331752359867096,
"beta_dpo/beta_margin_grad_mean": -0.38873177766799927,
"beta_dpo/beta_margin_grad_std": 0.2625668942928314,
"beta_dpo/beta_margin_mean": 1.455504298210144,
"beta_dpo/beta_margin_std": 3.291039228439331,
"beta_dpo/beta_used": 0.059331752359867096,
"beta_dpo/beta_used_raw": -0.013141274452209473,
"beta_dpo/gap_mean": 27.697145462036133,
"beta_dpo/gap_std": 36.58726119995117,
"beta_dpo/loss_margin_mean": 20.262928009033203,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6349206349206349,
"grad_norm": 120.16482543945312,
"learning_rate": 1.782991918222275e-07,
"logits/chosen": -3.011108875274658,
"logits/rejected": -3.016571044921875,
"loss": 1.1169,
"step": 420
},
{
"beta_dpo/beta": 0.41717052459716797,
"beta_dpo/beta_margin_grad_mean": -0.3158358931541443,
"beta_dpo/beta_margin_grad_std": 0.292575865983963,
"beta_dpo/beta_margin_mean": 13.852155685424805,
"beta_dpo/beta_margin_std": 28.46140480041504,
"beta_dpo/beta_used": 0.41717052459716797,
"beta_dpo/beta_used_raw": 0.26456567645072937,
"beta_dpo/gap_mean": 27.4444580078125,
"beta_dpo/gap_std": 37.893394470214844,
"beta_dpo/loss_margin_mean": 27.985321044921875,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.636432350718065,
"grad_norm": 733.0397338867188,
"learning_rate": 1.7703352848054887e-07,
"logits/chosen": -2.9851346015930176,
"logits/rejected": -3.0097498893737793,
"loss": 2.0376,
"step": 421
},
{
"beta_dpo/beta": 0.05253172665834427,
"beta_dpo/beta_margin_grad_mean": -0.35536831617355347,
"beta_dpo/beta_margin_grad_std": 0.2513173818588257,
"beta_dpo/beta_margin_mean": 1.5365536212921143,
"beta_dpo/beta_margin_std": 2.828378915786743,
"beta_dpo/beta_used": 0.05253172665834427,
"beta_dpo/beta_used_raw": -0.20523816347122192,
"beta_dpo/gap_mean": 26.63359832763672,
"beta_dpo/gap_std": 37.69291687011719,
"beta_dpo/loss_margin_mean": 24.736282348632812,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6379440665154951,
"grad_norm": 144.82969665527344,
"learning_rate": 1.7576990616793137e-07,
"logits/chosen": -3.0424623489379883,
"logits/rejected": -3.045719623565674,
"loss": 1.0531,
"step": 422
},
{
"beta_dpo/beta": 0.31688183546066284,
"beta_dpo/beta_margin_grad_mean": -0.19457308948040009,
"beta_dpo/beta_margin_grad_std": 0.30100706219673157,
"beta_dpo/beta_margin_mean": 11.033187866210938,
"beta_dpo/beta_margin_std": 17.46510887145996,
"beta_dpo/beta_used": 0.31688183546066284,
"beta_dpo/beta_used_raw": 0.31688183546066284,
"beta_dpo/gap_mean": 27.839336395263672,
"beta_dpo/gap_std": 37.669700622558594,
"beta_dpo/loss_margin_mean": 33.5172233581543,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6394557823129252,
"grad_norm": 999.29296875,
"learning_rate": 1.745083602306071e-07,
"logits/chosen": -2.9774489402770996,
"logits/rejected": -3.0142605304718018,
"loss": 1.3781,
"step": 423
},
{
"beta_dpo/beta": 0.09759822487831116,
"beta_dpo/beta_margin_grad_mean": -0.28802889585494995,
"beta_dpo/beta_margin_grad_std": 0.22548428177833557,
"beta_dpo/beta_margin_mean": 3.754995584487915,
"beta_dpo/beta_margin_std": 5.6953959465026855,
"beta_dpo/beta_used": 0.09759822487831116,
"beta_dpo/beta_used_raw": 0.06761516630649567,
"beta_dpo/gap_mean": 29.380714416503906,
"beta_dpo/gap_std": 37.17816162109375,
"beta_dpo/loss_margin_mean": 36.763824462890625,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6409674981103552,
"grad_norm": 84.32976531982422,
"learning_rate": 1.7324892595672804e-07,
"logits/chosen": -3.0425024032592773,
"logits/rejected": -3.0614097118377686,
"loss": 0.8042,
"step": 424
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4941061735153198,
"beta_dpo/beta_margin_grad_std": 0.00797404907643795,
"beta_dpo/beta_margin_mean": 0.02358274534344673,
"beta_dpo/beta_margin_std": 0.03190897777676582,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.13788114488124847,
"beta_dpo/gap_mean": 28.752634048461914,
"beta_dpo/gap_std": 36.62226867675781,
"beta_dpo/loss_margin_mean": 23.582744598388672,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6424792139077853,
"grad_norm": 2.602902412414551,
"learning_rate": 1.7199163857537824e-07,
"logits/chosen": -2.9604344367980957,
"logits/rejected": -2.958548069000244,
"loss": 1.3619,
"step": 425
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4955054521560669,
"beta_dpo/beta_margin_grad_std": 0.009816068224608898,
"beta_dpo/beta_margin_mean": 0.017988240346312523,
"beta_dpo/beta_margin_std": 0.039290908724069595,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.49269723892211914,
"beta_dpo/gap_mean": 27.00345230102539,
"beta_dpo/gap_std": 36.83736038208008,
"beta_dpo/loss_margin_mean": 17.98824119567871,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6439909297052154,
"grad_norm": 2.686110734939575,
"learning_rate": 1.7073653325558828e-07,
"logits/chosen": -2.993440628051758,
"logits/rejected": -2.9941649436950684,
"loss": 1.3694,
"step": 426
},
{
"beta_dpo/beta": 0.21917250752449036,
"beta_dpo/beta_margin_grad_mean": -0.30823761224746704,
"beta_dpo/beta_margin_grad_std": 0.2705315351486206,
"beta_dpo/beta_margin_mean": 7.877319812774658,
"beta_dpo/beta_margin_std": 13.52176284790039,
"beta_dpo/beta_used": 0.21917250752449036,
"beta_dpo/beta_used_raw": 0.17343935370445251,
"beta_dpo/gap_mean": 27.387710571289062,
"beta_dpo/gap_std": 37.20250701904297,
"beta_dpo/loss_margin_mean": 33.293609619140625,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6455026455026455,
"grad_norm": 284.2796630859375,
"learning_rate": 1.6948364510535218e-07,
"logits/chosen": -3.02168345451355,
"logits/rejected": -3.048999786376953,
"loss": 0.8126,
"step": 427
},
{
"beta_dpo/beta": 0.3071382939815521,
"beta_dpo/beta_margin_grad_mean": -0.351523220539093,
"beta_dpo/beta_margin_grad_std": 0.3131656348705292,
"beta_dpo/beta_margin_mean": 13.213186264038086,
"beta_dpo/beta_margin_std": 23.171049118041992,
"beta_dpo/beta_used": 0.3071382939815521,
"beta_dpo/beta_used_raw": 0.019071310758590698,
"beta_dpo/gap_mean": 27.952465057373047,
"beta_dpo/gap_std": 37.053226470947266,
"beta_dpo/loss_margin_mean": 32.0423583984375,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6470143613000756,
"grad_norm": 785.1513671875,
"learning_rate": 1.6823300917064458e-07,
"logits/chosen": -3.028686285018921,
"logits/rejected": -3.0231781005859375,
"loss": 2.2194,
"step": 428
},
{
"beta_dpo/beta": 0.40533530712127686,
"beta_dpo/beta_margin_grad_mean": -0.23250898718833923,
"beta_dpo/beta_margin_grad_std": 0.35834386944770813,
"beta_dpo/beta_margin_mean": 12.332311630249023,
"beta_dpo/beta_margin_std": 21.895021438598633,
"beta_dpo/beta_used": 0.40533530712127686,
"beta_dpo/beta_used_raw": 0.40533530712127686,
"beta_dpo/gap_mean": 28.197933197021484,
"beta_dpo/gap_std": 37.31829833984375,
"beta_dpo/loss_margin_mean": 26.527587890625,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6485260770975056,
"grad_norm": 386.2767639160156,
"learning_rate": 1.669846604344412e-07,
"logits/chosen": -2.974191904067993,
"logits/rejected": -2.967235565185547,
"loss": 0.6899,
"step": 429
},
{
"beta_dpo/beta": 0.24217864871025085,
"beta_dpo/beta_margin_grad_mean": -0.19283412396907806,
"beta_dpo/beta_margin_grad_std": 0.3163716197013855,
"beta_dpo/beta_margin_mean": 8.279568672180176,
"beta_dpo/beta_margin_std": 10.411409378051758,
"beta_dpo/beta_used": 0.24217864871025085,
"beta_dpo/beta_used_raw": 0.24217864871025085,
"beta_dpo/gap_mean": 29.146041870117188,
"beta_dpo/gap_std": 36.97269058227539,
"beta_dpo/loss_margin_mean": 31.7739200592041,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6500377928949358,
"grad_norm": 472.55072021484375,
"learning_rate": 1.6573863381573954e-07,
"logits/chosen": -2.986631393432617,
"logits/rejected": -2.97794771194458,
"loss": 0.663,
"step": 430
},
{
"beta_dpo/beta": 0.10047898441553116,
"beta_dpo/beta_margin_grad_mean": -0.3267359733581543,
"beta_dpo/beta_margin_grad_std": 0.24772731959819794,
"beta_dpo/beta_margin_mean": 2.857672929763794,
"beta_dpo/beta_margin_std": 4.949996471405029,
"beta_dpo/beta_used": 0.10047898441553116,
"beta_dpo/beta_used_raw": -0.2611073851585388,
"beta_dpo/gap_mean": 28.47191619873047,
"beta_dpo/gap_std": 36.198570251464844,
"beta_dpo/loss_margin_mean": 26.27246856689453,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6515495086923658,
"grad_norm": 55.17536544799805,
"learning_rate": 1.6449496416858282e-07,
"logits/chosen": -3.016430377960205,
"logits/rejected": -3.0456418991088867,
"loss": 0.8078,
"step": 431
},
{
"beta_dpo/beta": 0.22804366052150726,
"beta_dpo/beta_margin_grad_mean": -0.20772166550159454,
"beta_dpo/beta_margin_grad_std": 0.3391772210597992,
"beta_dpo/beta_margin_mean": 6.465082168579102,
"beta_dpo/beta_margin_std": 8.044727325439453,
"beta_dpo/beta_used": 0.22804366052150726,
"beta_dpo/beta_used_raw": 0.22804366052150726,
"beta_dpo/gap_mean": 28.469558715820312,
"beta_dpo/gap_std": 35.54988098144531,
"beta_dpo/loss_margin_mean": 27.847829818725586,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6530612244897959,
"grad_norm": 517.2797241210938,
"learning_rate": 1.632536862810844e-07,
"logits/chosen": -3.02810001373291,
"logits/rejected": -3.046234130859375,
"loss": 1.0916,
"step": 432
},
{
"beta_dpo/beta": 0.5039178729057312,
"beta_dpo/beta_margin_grad_mean": -0.18501636385917664,
"beta_dpo/beta_margin_grad_std": 0.3553808629512787,
"beta_dpo/beta_margin_mean": 18.54363250732422,
"beta_dpo/beta_margin_std": 20.644752502441406,
"beta_dpo/beta_used": 0.5039178729057312,
"beta_dpo/beta_used_raw": 0.5039178729057312,
"beta_dpo/gap_mean": 29.425575256347656,
"beta_dpo/gap_std": 36.11632537841797,
"beta_dpo/loss_margin_mean": 36.797874450683594,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.654572940287226,
"grad_norm": 758.8828735351562,
"learning_rate": 1.6201483487445515e-07,
"logits/chosen": -2.982597589492798,
"logits/rejected": -2.9782962799072266,
"loss": 1.5323,
"step": 433
},
{
"beta_dpo/beta": 0.04688744619488716,
"beta_dpo/beta_margin_grad_mean": -0.3494400978088379,
"beta_dpo/beta_margin_grad_std": 0.2307935357093811,
"beta_dpo/beta_margin_mean": 1.8151441812515259,
"beta_dpo/beta_margin_std": 3.081648349761963,
"beta_dpo/beta_used": 0.04688744619488716,
"beta_dpo/beta_used_raw": -0.08956971764564514,
"beta_dpo/gap_mean": 30.386577606201172,
"beta_dpo/gap_std": 36.93446350097656,
"beta_dpo/loss_margin_mean": 30.103981018066406,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.656084656084656,
"grad_norm": 147.99685668945312,
"learning_rate": 1.6077844460203204e-07,
"logits/chosen": -2.9807381629943848,
"logits/rejected": -3.004141330718994,
"loss": 1.0338,
"step": 434
},
{
"beta_dpo/beta": 0.1664637327194214,
"beta_dpo/beta_margin_grad_mean": -0.21661928296089172,
"beta_dpo/beta_margin_grad_std": 0.2999856173992157,
"beta_dpo/beta_margin_mean": 5.165381908416748,
"beta_dpo/beta_margin_std": 6.401003360748291,
"beta_dpo/beta_used": 0.1664637327194214,
"beta_dpo/beta_used_raw": 0.1664637327194214,
"beta_dpo/gap_mean": 29.894763946533203,
"beta_dpo/gap_std": 37.02755355834961,
"beta_dpo/loss_margin_mean": 30.329971313476562,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6575963718820862,
"grad_norm": 153.4250030517578,
"learning_rate": 1.5954455004830878e-07,
"logits/chosen": -2.9743881225585938,
"logits/rejected": -2.9850637912750244,
"loss": 0.5309,
"step": 435
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4939667284488678,
"beta_dpo/beta_margin_grad_std": 0.009263965301215649,
"beta_dpo/beta_margin_mean": 0.024143004789948463,
"beta_dpo/beta_margin_std": 0.03707313910126686,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.08352816104888916,
"beta_dpo/gap_mean": 29.20709991455078,
"beta_dpo/gap_std": 36.946022033691406,
"beta_dpo/loss_margin_mean": 24.143003463745117,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6591080876795162,
"grad_norm": 2.9641666412353516,
"learning_rate": 1.5831318572796847e-07,
"logits/chosen": -2.9855284690856934,
"logits/rejected": -2.9985339641571045,
"loss": 1.3605,
"step": 436
},
{
"beta_dpo/beta": 0.12375855445861816,
"beta_dpo/beta_margin_grad_mean": -0.31876465678215027,
"beta_dpo/beta_margin_grad_std": 0.2591817378997803,
"beta_dpo/beta_margin_mean": 4.931870937347412,
"beta_dpo/beta_margin_std": 8.040031433105469,
"beta_dpo/beta_used": 0.12375855445861816,
"beta_dpo/beta_used_raw": -0.06239933520555496,
"beta_dpo/gap_mean": 29.688655853271484,
"beta_dpo/gap_std": 37.500450134277344,
"beta_dpo/loss_margin_mean": 32.000179290771484,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6606198034769464,
"grad_norm": 202.52598571777344,
"learning_rate": 1.5708438608491815e-07,
"logits/chosen": -2.9799609184265137,
"logits/rejected": -3.0229856967926025,
"loss": 0.9282,
"step": 437
},
{
"beta_dpo/beta": 0.15766139328479767,
"beta_dpo/beta_margin_grad_mean": -0.2849089503288269,
"beta_dpo/beta_margin_grad_std": 0.24615149199962616,
"beta_dpo/beta_margin_mean": 5.739384174346924,
"beta_dpo/beta_margin_std": 8.78180980682373,
"beta_dpo/beta_used": 0.15766139328479767,
"beta_dpo/beta_used_raw": 0.023802101612091064,
"beta_dpo/gap_mean": 28.854013442993164,
"beta_dpo/gap_std": 37.47654724121094,
"beta_dpo/loss_margin_mean": 28.225496292114258,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6621315192743764,
"grad_norm": 140.12808227539062,
"learning_rate": 1.558581854913253e-07,
"logits/chosen": -2.959249496459961,
"logits/rejected": -2.969465970993042,
"loss": 0.7395,
"step": 438
},
{
"beta_dpo/beta": 0.2725057303905487,
"beta_dpo/beta_margin_grad_mean": -0.22866128385066986,
"beta_dpo/beta_margin_grad_std": 0.32659637928009033,
"beta_dpo/beta_margin_mean": 9.943758010864258,
"beta_dpo/beta_margin_std": 15.145366668701172,
"beta_dpo/beta_used": 0.2725057303905487,
"beta_dpo/beta_used_raw": 0.2725057303905487,
"beta_dpo/gap_mean": 29.03016471862793,
"beta_dpo/gap_std": 37.50492477416992,
"beta_dpo/loss_margin_mean": 30.412309646606445,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6636432350718064,
"grad_norm": 166.89295959472656,
"learning_rate": 1.5463461824665658e-07,
"logits/chosen": -3.059014320373535,
"logits/rejected": -3.0698959827423096,
"loss": 0.5287,
"step": 439
},
{
"beta_dpo/beta": 0.15036743879318237,
"beta_dpo/beta_margin_grad_mean": -0.3440133035182953,
"beta_dpo/beta_margin_grad_std": 0.29287025332450867,
"beta_dpo/beta_margin_mean": 4.517815589904785,
"beta_dpo/beta_margin_std": 8.261082649230957,
"beta_dpo/beta_used": 0.15036743879318237,
"beta_dpo/beta_used_raw": 0.10082431882619858,
"beta_dpo/gap_mean": 30.075698852539062,
"beta_dpo/gap_std": 37.652427673339844,
"beta_dpo/loss_margin_mean": 32.3798713684082,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6651549508692366,
"grad_norm": 454.4780578613281,
"learning_rate": 1.534137185767178e-07,
"logits/chosen": -2.961894989013672,
"logits/rejected": -2.9933114051818848,
"loss": 1.3947,
"step": 440
},
{
"beta_dpo/beta": 0.06749773770570755,
"beta_dpo/beta_margin_grad_mean": -0.3118380308151245,
"beta_dpo/beta_margin_grad_std": 0.22741641104221344,
"beta_dpo/beta_margin_mean": 2.2036492824554443,
"beta_dpo/beta_margin_std": 3.3733274936676025,
"beta_dpo/beta_used": 0.06749773770570755,
"beta_dpo/beta_used_raw": -0.059857144951820374,
"beta_dpo/gap_mean": 30.235416412353516,
"beta_dpo/gap_std": 36.371238708496094,
"beta_dpo/loss_margin_mean": 31.540372848510742,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6666666666666666,
"grad_norm": 61.62382888793945,
"learning_rate": 1.521955206326976e-07,
"logits/chosen": -2.981652021408081,
"logits/rejected": -3.013087749481201,
"loss": 0.8221,
"step": 441
},
{
"beta_dpo/beta": 0.08830945193767548,
"beta_dpo/beta_margin_grad_mean": -0.3359754979610443,
"beta_dpo/beta_margin_grad_std": 0.26238736510276794,
"beta_dpo/beta_margin_mean": 2.6826870441436768,
"beta_dpo/beta_margin_std": 4.5776567459106445,
"beta_dpo/beta_used": 0.08830945193767548,
"beta_dpo/beta_used_raw": -0.2134900987148285,
"beta_dpo/gap_mean": 29.962265014648438,
"beta_dpo/gap_std": 35.663848876953125,
"beta_dpo/loss_margin_mean": 28.262073516845703,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6681783824640968,
"grad_norm": 205.87567138671875,
"learning_rate": 1.5098005849021078e-07,
"logits/chosen": -3.003020763397217,
"logits/rejected": -3.0152530670166016,
"loss": 1.0074,
"step": 442
},
{
"beta_dpo/beta": 0.14007267355918884,
"beta_dpo/beta_margin_grad_mean": -0.35146015882492065,
"beta_dpo/beta_margin_grad_std": 0.2797658443450928,
"beta_dpo/beta_margin_mean": 4.511338710784912,
"beta_dpo/beta_margin_std": 8.455418586730957,
"beta_dpo/beta_used": 0.14007267355918884,
"beta_dpo/beta_used_raw": -0.07340739667415619,
"beta_dpo/gap_mean": 29.874935150146484,
"beta_dpo/gap_std": 35.72840118408203,
"beta_dpo/loss_margin_mean": 29.933927536010742,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6696900982615268,
"grad_norm": 320.9236145019531,
"learning_rate": 1.4976736614834662e-07,
"logits/chosen": -2.984293222427368,
"logits/rejected": -3.0145263671875,
"loss": 1.2493,
"step": 443
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4955978989601135,
"beta_dpo/beta_margin_grad_std": 0.009687363170087337,
"beta_dpo/beta_margin_mean": 0.017617596313357353,
"beta_dpo/beta_margin_std": 0.03877225145697594,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.3052191138267517,
"beta_dpo/gap_mean": 28.419496536254883,
"beta_dpo/gap_std": 36.03498840332031,
"beta_dpo/loss_margin_mean": 17.617595672607422,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.671201814058957,
"grad_norm": 2.4595789909362793,
"learning_rate": 1.4855747752871654e-07,
"logits/chosen": -2.986158847808838,
"logits/rejected": -3.02463960647583,
"loss": 1.365,
"step": 444
},
{
"beta_dpo/beta": 0.22040501236915588,
"beta_dpo/beta_margin_grad_mean": -0.23939883708953857,
"beta_dpo/beta_margin_grad_std": 0.3706842064857483,
"beta_dpo/beta_margin_mean": 6.504096031188965,
"beta_dpo/beta_margin_std": 8.581731796264648,
"beta_dpo/beta_used": 0.22040501236915588,
"beta_dpo/beta_used_raw": 0.22040501236915588,
"beta_dpo/gap_mean": 27.95121192932129,
"beta_dpo/gap_std": 36.85621643066406,
"beta_dpo/loss_margin_mean": 29.340797424316406,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.672713529856387,
"grad_norm": 393.7691955566406,
"learning_rate": 1.473504264745062e-07,
"logits/chosen": -2.961167335510254,
"logits/rejected": -2.9654603004455566,
"loss": 1.0141,
"step": 445
},
{
"beta_dpo/beta": 0.340822696685791,
"beta_dpo/beta_margin_grad_mean": -0.2775871157646179,
"beta_dpo/beta_margin_grad_std": 0.2622576355934143,
"beta_dpo/beta_margin_mean": 16.909029006958008,
"beta_dpo/beta_margin_std": 24.973739624023438,
"beta_dpo/beta_used": 0.340822696685791,
"beta_dpo/beta_used_raw": 0.294472873210907,
"beta_dpo/gap_mean": 28.8350830078125,
"beta_dpo/gap_std": 36.57999038696289,
"beta_dpo/loss_margin_mean": 37.94091033935547,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.674225245653817,
"grad_norm": 358.96331787109375,
"learning_rate": 1.461462467495284e-07,
"logits/chosen": -2.9686427116394043,
"logits/rejected": -2.9872775077819824,
"loss": 1.6237,
"step": 446
},
{
"beta_dpo/beta": 0.44366323947906494,
"beta_dpo/beta_margin_grad_mean": -0.1775507628917694,
"beta_dpo/beta_margin_grad_std": 0.3296290636062622,
"beta_dpo/beta_margin_mean": 16.851099014282227,
"beta_dpo/beta_margin_std": 23.538476943969727,
"beta_dpo/beta_used": 0.44366323947906494,
"beta_dpo/beta_used_raw": 0.44366323947906494,
"beta_dpo/gap_mean": 30.866836547851562,
"beta_dpo/gap_std": 37.17848587036133,
"beta_dpo/loss_margin_mean": 36.8920783996582,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6757369614512472,
"grad_norm": 504.9360656738281,
"learning_rate": 1.4494497203727843e-07,
"logits/chosen": -2.979090690612793,
"logits/rejected": -3.029752492904663,
"loss": 1.3514,
"step": 447
},
{
"beta_dpo/beta": 0.1897728443145752,
"beta_dpo/beta_margin_grad_mean": -0.18827150762081146,
"beta_dpo/beta_margin_grad_std": 0.3237147629261017,
"beta_dpo/beta_margin_mean": 6.277503967285156,
"beta_dpo/beta_margin_std": 6.867645263671875,
"beta_dpo/beta_used": 0.1897728443145752,
"beta_dpo/beta_used_raw": 0.1897728443145752,
"beta_dpo/gap_mean": 31.5931396484375,
"beta_dpo/gap_std": 37.23057556152344,
"beta_dpo/loss_margin_mean": 33.25175094604492,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6772486772486772,
"grad_norm": 248.3333740234375,
"learning_rate": 1.4374663593999256e-07,
"logits/chosen": -2.9631669521331787,
"logits/rejected": -2.976621150970459,
"loss": 0.8557,
"step": 448
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49524787068367004,
"beta_dpo/beta_margin_grad_std": 0.0076606497168540955,
"beta_dpo/beta_margin_mean": 0.019013898447155952,
"beta_dpo/beta_margin_std": 0.03065245971083641,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.4072267711162567,
"beta_dpo/gap_mean": 29.96826171875,
"beta_dpo/gap_std": 36.12980651855469,
"beta_dpo/loss_margin_mean": 19.013896942138672,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6787603930461074,
"grad_norm": 2.6026723384857178,
"learning_rate": 1.4255127197770707e-07,
"logits/chosen": -3.0260300636291504,
"logits/rejected": -3.0174269676208496,
"loss": 1.365,
"step": 449
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4947327673435211,
"beta_dpo/beta_margin_grad_std": 0.008629199117422104,
"beta_dpo/beta_margin_mean": 0.02107871323823929,
"beta_dpo/beta_margin_std": 0.03453676775097847,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.6087408065795898,
"beta_dpo/gap_mean": 28.068256378173828,
"beta_dpo/gap_std": 35.76659393310547,
"beta_dpo/loss_margin_mean": 21.078712463378906,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6802721088435374,
"grad_norm": 2.508104085922241,
"learning_rate": 1.4135891358732205e-07,
"logits/chosen": -2.941275119781494,
"logits/rejected": -2.997265577316284,
"loss": 1.3703,
"step": 450
},
{
"beta_dpo/beta": 0.051626212894916534,
"beta_dpo/beta_margin_grad_mean": -0.3609742522239685,
"beta_dpo/beta_margin_grad_std": 0.22043851017951965,
"beta_dpo/beta_margin_mean": 1.607721209526062,
"beta_dpo/beta_margin_std": 3.0264225006103516,
"beta_dpo/beta_used": 0.051626212894916534,
"beta_dpo/beta_used_raw": -0.07908162474632263,
"beta_dpo/gap_mean": 27.535343170166016,
"beta_dpo/gap_std": 35.23435974121094,
"beta_dpo/loss_margin_mean": 27.886857986450195,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6817838246409675,
"grad_norm": 67.10790252685547,
"learning_rate": 1.4016959412166437e-07,
"logits/chosen": -2.9985599517822266,
"logits/rejected": -3.016568183898926,
"loss": 0.983,
"step": 451
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49271759390830994,
"beta_dpo/beta_margin_grad_std": 0.009384777396917343,
"beta_dpo/beta_margin_mean": 0.029145939275622368,
"beta_dpo/beta_margin_std": 0.037574782967567444,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.1551307588815689,
"beta_dpo/gap_mean": 27.912967681884766,
"beta_dpo/gap_std": 35.67900848388672,
"beta_dpo/loss_margin_mean": 29.145936965942383,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6832955404383976,
"grad_norm": 2.733677387237549,
"learning_rate": 1.3898334684855645e-07,
"logits/chosen": -2.980307102203369,
"logits/rejected": -3.0058016777038574,
"loss": 1.363,
"step": 452
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4922681748867035,
"beta_dpo/beta_margin_grad_std": 0.009023171849548817,
"beta_dpo/beta_margin_mean": 0.030942685902118683,
"beta_dpo/beta_margin_std": 0.03612072020769119,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.34397417306900024,
"beta_dpo/gap_mean": 28.23801040649414,
"beta_dpo/gap_std": 35.68151092529297,
"beta_dpo/loss_margin_mean": 30.942684173583984,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6848072562358276,
"grad_norm": 2.8947949409484863,
"learning_rate": 1.3780020494988445e-07,
"logits/chosen": -2.9505789279937744,
"logits/rejected": -2.9637527465820312,
"loss": 1.3658,
"step": 453
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49272945523262024,
"beta_dpo/beta_margin_grad_std": 0.00977272354066372,
"beta_dpo/beta_margin_mean": 0.029096750542521477,
"beta_dpo/beta_margin_std": 0.039117682725191116,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.2079845815896988,
"beta_dpo/gap_mean": 28.801530838012695,
"beta_dpo/gap_std": 36.700557708740234,
"beta_dpo/loss_margin_mean": 29.096750259399414,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6863189720332578,
"grad_norm": 2.9485905170440674,
"learning_rate": 1.366202015206706e-07,
"logits/chosen": -2.9418320655822754,
"logits/rejected": -2.953274726867676,
"loss": 1.363,
"step": 454
},
{
"beta_dpo/beta": 0.04326212778687477,
"beta_dpo/beta_margin_grad_mean": -0.3606717586517334,
"beta_dpo/beta_margin_grad_std": 0.23235774040222168,
"beta_dpo/beta_margin_mean": 1.6770412921905518,
"beta_dpo/beta_margin_std": 2.9725475311279297,
"beta_dpo/beta_used": 0.04326212778687477,
"beta_dpo/beta_used_raw": -0.25650086998939514,
"beta_dpo/gap_mean": 29.796241760253906,
"beta_dpo/gap_std": 37.28318405151367,
"beta_dpo/loss_margin_mean": 35.360774993896484,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6878306878306878,
"grad_norm": 123.70121002197266,
"learning_rate": 1.354433695681474e-07,
"logits/chosen": -2.949462413787842,
"logits/rejected": -2.9476213455200195,
"loss": 1.1219,
"step": 455
},
{
"beta_dpo/beta": 0.05017132684588432,
"beta_dpo/beta_margin_grad_mean": -0.40448522567749023,
"beta_dpo/beta_margin_grad_std": 0.27879607677459717,
"beta_dpo/beta_margin_mean": 1.2627125978469849,
"beta_dpo/beta_margin_std": 3.3613815307617188,
"beta_dpo/beta_used": 0.05017132684588432,
"beta_dpo/beta_used_raw": 0.045974329113960266,
"beta_dpo/gap_mean": 29.84475326538086,
"beta_dpo/gap_std": 38.46610641479492,
"beta_dpo/loss_margin_mean": 28.446945190429688,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6893424036281179,
"grad_norm": 131.1843719482422,
"learning_rate": 1.3426974201083439e-07,
"logits/chosen": -2.9341540336608887,
"logits/rejected": -2.959643602371216,
"loss": 1.1835,
"step": 456
},
{
"beta_dpo/beta": 0.2062607854604721,
"beta_dpo/beta_margin_grad_mean": -0.32546162605285645,
"beta_dpo/beta_margin_grad_std": 0.282577782869339,
"beta_dpo/beta_margin_mean": 8.263957023620605,
"beta_dpo/beta_margin_std": 12.933406829833984,
"beta_dpo/beta_used": 0.2062607854604721,
"beta_dpo/beta_used_raw": -0.04059891402721405,
"beta_dpo/gap_mean": 30.066280364990234,
"beta_dpo/gap_std": 38.42988586425781,
"beta_dpo/loss_margin_mean": 29.572206497192383,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.690854119425548,
"grad_norm": 404.93133544921875,
"learning_rate": 1.3309935167761717e-07,
"logits/chosen": -2.8899693489074707,
"logits/rejected": -2.927942991256714,
"loss": 1.0866,
"step": 457
},
{
"beta_dpo/beta": 0.06866870075464249,
"beta_dpo/beta_margin_grad_mean": -0.3728755712509155,
"beta_dpo/beta_margin_grad_std": 0.2513536214828491,
"beta_dpo/beta_margin_mean": 2.0318596363067627,
"beta_dpo/beta_margin_std": 4.29193639755249,
"beta_dpo/beta_used": 0.06866870075464249,
"beta_dpo/beta_used_raw": -0.02202005684375763,
"beta_dpo/gap_mean": 30.349742889404297,
"beta_dpo/gap_std": 39.295570373535156,
"beta_dpo/loss_margin_mean": 34.04402542114258,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6923658352229781,
"grad_norm": 130.452880859375,
"learning_rate": 1.3193223130682936e-07,
"logits/chosen": -2.879459857940674,
"logits/rejected": -2.9372262954711914,
"loss": 0.9576,
"step": 458
},
{
"beta_dpo/beta": 0.14638648927211761,
"beta_dpo/beta_margin_grad_mean": -0.28407731652259827,
"beta_dpo/beta_margin_grad_std": 0.25495174527168274,
"beta_dpo/beta_margin_mean": 7.257359504699707,
"beta_dpo/beta_margin_std": 11.758667945861816,
"beta_dpo/beta_used": 0.14638648927211761,
"beta_dpo/beta_used_raw": 0.1360008269548416,
"beta_dpo/gap_mean": 29.87301254272461,
"beta_dpo/gap_std": 40.10071563720703,
"beta_dpo/loss_margin_mean": 32.61771011352539,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6938775510204082,
"grad_norm": 211.43577575683594,
"learning_rate": 1.3076841354533658e-07,
"logits/chosen": -2.912708282470703,
"logits/rejected": -2.9239847660064697,
"loss": 0.9333,
"step": 459
},
{
"beta_dpo/beta": 0.14245007932186127,
"beta_dpo/beta_margin_grad_mean": -0.38360172510147095,
"beta_dpo/beta_margin_grad_std": 0.30201467871665955,
"beta_dpo/beta_margin_mean": 4.727894306182861,
"beta_dpo/beta_margin_std": 9.328141212463379,
"beta_dpo/beta_used": 0.14245007932186127,
"beta_dpo/beta_used_raw": 0.05454842001199722,
"beta_dpo/gap_mean": 32.104637145996094,
"beta_dpo/gap_std": 40.898643493652344,
"beta_dpo/loss_margin_mean": 37.70212936401367,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6953892668178382,
"grad_norm": 312.2969055175781,
"learning_rate": 1.2960793094762345e-07,
"logits/chosen": -2.8840432167053223,
"logits/rejected": -2.932039260864258,
"loss": 1.1696,
"step": 460
},
{
"beta_dpo/beta": 0.1491033136844635,
"beta_dpo/beta_margin_grad_mean": -0.3280187249183655,
"beta_dpo/beta_margin_grad_std": 0.2687358558177948,
"beta_dpo/beta_margin_mean": 6.421268463134766,
"beta_dpo/beta_margin_std": 12.01246166229248,
"beta_dpo/beta_used": 0.1491033136844635,
"beta_dpo/beta_used_raw": -0.07928402721881866,
"beta_dpo/gap_mean": 33.18560791015625,
"beta_dpo/gap_std": 41.554012298583984,
"beta_dpo/loss_margin_mean": 37.490901947021484,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6969009826152683,
"grad_norm": 274.51336669921875,
"learning_rate": 1.2845081597488286e-07,
"logits/chosen": -2.8532447814941406,
"logits/rejected": -2.8888659477233887,
"loss": 0.8844,
"step": 461
},
{
"beta_dpo/beta": 0.3675364553928375,
"beta_dpo/beta_margin_grad_mean": -0.24762766063213348,
"beta_dpo/beta_margin_grad_std": 0.2799862027168274,
"beta_dpo/beta_margin_mean": 15.530708312988281,
"beta_dpo/beta_margin_std": 25.592716217041016,
"beta_dpo/beta_used": 0.3675364553928375,
"beta_dpo/beta_used_raw": 0.3675364553928375,
"beta_dpo/gap_mean": 34.30308532714844,
"beta_dpo/gap_std": 41.602821350097656,
"beta_dpo/loss_margin_mean": 40.83872985839844,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6984126984126984,
"grad_norm": 1062.2481689453125,
"learning_rate": 1.27297100994108e-07,
"logits/chosen": -2.857814311981201,
"logits/rejected": -2.865307569503784,
"loss": 1.7849,
"step": 462
},
{
"beta_dpo/beta": 0.012533308006823063,
"beta_dpo/beta_margin_grad_mean": -0.40381601452827454,
"beta_dpo/beta_margin_grad_std": 0.1651657372713089,
"beta_dpo/beta_margin_mean": 0.5023635029792786,
"beta_dpo/beta_margin_std": 0.8901291489601135,
"beta_dpo/beta_used": 0.012533308006823063,
"beta_dpo/beta_used_raw": -0.2637104094028473,
"beta_dpo/gap_mean": 34.78700256347656,
"beta_dpo/gap_std": 41.961753845214844,
"beta_dpo/loss_margin_mean": 34.03054428100586,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6999244142101285,
"grad_norm": 31.238935470581055,
"learning_rate": 1.2614681827718695e-07,
"logits/chosen": -2.890806198120117,
"logits/rejected": -2.878354549407959,
"loss": 1.1205,
"step": 463
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49027693271636963,
"beta_dpo/beta_margin_grad_std": 0.012361356988549232,
"beta_dpo/beta_margin_mean": 0.03892575949430466,
"beta_dpo/beta_margin_std": 0.04949839040637016,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.3940258026123047,
"beta_dpo/gap_mean": 34.684288024902344,
"beta_dpo/gap_std": 43.00639343261719,
"beta_dpo/loss_margin_mean": 38.925758361816406,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7014361300075586,
"grad_norm": 4.210781574249268,
"learning_rate": 1.2500000000000005e-07,
"logits/chosen": -2.8452868461608887,
"logits/rejected": -2.8584518432617188,
"loss": 1.3604,
"step": 464
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49044856429100037,
"beta_dpo/beta_margin_grad_std": 0.012582222931087017,
"beta_dpo/beta_margin_mean": 0.03824080526828766,
"beta_dpo/beta_margin_std": 0.05038909986615181,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.4250331521034241,
"beta_dpo/gap_mean": 35.82416915893555,
"beta_dpo/gap_std": 44.42228698730469,
"beta_dpo/loss_margin_mean": 38.24080276489258,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7029478458049887,
"grad_norm": 3.7858383655548096,
"learning_rate": 1.238566782415197e-07,
"logits/chosen": -2.8738555908203125,
"logits/rejected": -2.898587226867676,
"loss": 1.3598,
"step": 465
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4945682883262634,
"beta_dpo/beta_margin_grad_std": 0.01177225448191166,
"beta_dpo/beta_margin_mean": 0.021742146462202072,
"beta_dpo/beta_margin_std": 0.04713207110762596,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.6088250875473022,
"beta_dpo/gap_mean": 34.10150909423828,
"beta_dpo/gap_std": 45.46623229980469,
"beta_dpo/loss_margin_mean": 21.742145538330078,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7044595616024187,
"grad_norm": 3.8012847900390625,
"learning_rate": 1.2271688498291334e-07,
"logits/chosen": -2.8463258743286133,
"logits/rejected": -2.841275215148926,
"loss": 1.3644,
"step": 466
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.48994848132133484,
"beta_dpo/beta_margin_grad_std": 0.01253302488476038,
"beta_dpo/beta_margin_mean": 0.04024511203169823,
"beta_dpo/beta_margin_std": 0.050207603722810745,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.18654143810272217,
"beta_dpo/gap_mean": 34.15043640136719,
"beta_dpo/gap_std": 45.88709259033203,
"beta_dpo/loss_margin_mean": 40.24510955810547,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7059712773998488,
"grad_norm": 4.15172815322876,
"learning_rate": 1.2158065210664848e-07,
"logits/chosen": -2.7906570434570312,
"logits/rejected": -2.8386688232421875,
"loss": 1.3574,
"step": 467
},
{
"beta_dpo/beta": 0.4981394112110138,
"beta_dpo/beta_margin_grad_mean": -0.18455180525779724,
"beta_dpo/beta_margin_grad_std": 0.33006787300109863,
"beta_dpo/beta_margin_mean": 25.867090225219727,
"beta_dpo/beta_margin_std": 44.431175231933594,
"beta_dpo/beta_used": 0.4981394112110138,
"beta_dpo/beta_used_raw": 0.4981394112110138,
"beta_dpo/gap_mean": 36.898414611816406,
"beta_dpo/gap_std": 47.32759094238281,
"beta_dpo/loss_margin_mean": 51.3314094543457,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7074829931972789,
"grad_norm": 1016.423583984375,
"learning_rate": 1.204480113956011e-07,
"logits/chosen": -2.839628219604492,
"logits/rejected": -2.8357181549072266,
"loss": 1.5654,
"step": 468
},
{
"beta_dpo/beta": 0.01080307736992836,
"beta_dpo/beta_margin_grad_mean": -0.4090682566165924,
"beta_dpo/beta_margin_grad_std": 0.14942912757396698,
"beta_dpo/beta_margin_mean": 0.48849332332611084,
"beta_dpo/beta_margin_std": 0.8895741105079651,
"beta_dpo/beta_used": 0.01080307736992836,
"beta_dpo/beta_used_raw": -0.01903732866048813,
"beta_dpo/gap_mean": 37.885498046875,
"beta_dpo/gap_std": 48.62867736816406,
"beta_dpo/loss_margin_mean": 35.774200439453125,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.708994708994709,
"grad_norm": 43.972171783447266,
"learning_rate": 1.1931899453216697e-07,
"logits/chosen": -2.8145484924316406,
"logits/rejected": -2.826870918273926,
"loss": 1.1359,
"step": 469
},
{
"beta_dpo/beta": 0.005608946550637484,
"beta_dpo/beta_margin_grad_mean": -0.44782423973083496,
"beta_dpo/beta_margin_grad_std": 0.08384717255830765,
"beta_dpo/beta_margin_mean": 0.22157049179077148,
"beta_dpo/beta_margin_std": 0.3649922013282776,
"beta_dpo/beta_used": 0.005608946550637484,
"beta_dpo/beta_used_raw": -0.051736194640398026,
"beta_dpo/gap_mean": 37.81916427612305,
"beta_dpo/gap_std": 48.586944580078125,
"beta_dpo/loss_margin_mean": 42.41155242919922,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7105064247921391,
"grad_norm": 26.199756622314453,
"learning_rate": 1.1819363309737438e-07,
"logits/chosen": -2.7599127292633057,
"logits/rejected": -2.787717819213867,
"loss": 1.2242,
"step": 470
},
{
"beta_dpo/beta": 0.5907325148582458,
"beta_dpo/beta_margin_grad_mean": -0.11496514827013016,
"beta_dpo/beta_margin_grad_std": 0.292705237865448,
"beta_dpo/beta_margin_mean": 33.369873046875,
"beta_dpo/beta_margin_std": 42.265533447265625,
"beta_dpo/beta_used": 0.5907325148582458,
"beta_dpo/beta_used_raw": 0.5907325148582458,
"beta_dpo/gap_mean": 40.03142166137695,
"beta_dpo/gap_std": 48.31740188598633,
"beta_dpo/loss_margin_mean": 52.58283996582031,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7120181405895691,
"grad_norm": 1157.378662109375,
"learning_rate": 1.1707195857000215e-07,
"logits/chosen": -2.7885494232177734,
"logits/rejected": -2.8036623001098633,
"loss": 2.6333,
"step": 471
},
{
"beta_dpo/beta": 0.07265999913215637,
"beta_dpo/beta_margin_grad_mean": -0.3512791395187378,
"beta_dpo/beta_margin_grad_std": 0.2786218822002411,
"beta_dpo/beta_margin_mean": 4.18007230758667,
"beta_dpo/beta_margin_std": 7.867624759674072,
"beta_dpo/beta_used": 0.07265999913215637,
"beta_dpo/beta_used_raw": -0.3325417637825012,
"beta_dpo/gap_mean": 41.11811065673828,
"beta_dpo/gap_std": 50.19427490234375,
"beta_dpo/loss_margin_mean": 45.41288757324219,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7135298563869993,
"grad_norm": 421.1426086425781,
"learning_rate": 1.1595400232569768e-07,
"logits/chosen": -2.7796170711517334,
"logits/rejected": -2.7934298515319824,
"loss": 1.4017,
"step": 472
},
{
"beta_dpo/beta": 0.23743751645088196,
"beta_dpo/beta_margin_grad_mean": -0.37403836846351624,
"beta_dpo/beta_margin_grad_std": 0.30802056193351746,
"beta_dpo/beta_margin_mean": 9.776598930358887,
"beta_dpo/beta_margin_std": 21.84530258178711,
"beta_dpo/beta_used": 0.23743751645088196,
"beta_dpo/beta_used_raw": -0.2949034571647644,
"beta_dpo/gap_mean": 41.25324249267578,
"beta_dpo/gap_std": 52.74125671386719,
"beta_dpo/loss_margin_mean": 37.91928482055664,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7150415721844293,
"grad_norm": 874.90771484375,
"learning_rate": 1.1483979563610069e-07,
"logits/chosen": -2.7615790367126465,
"logits/rejected": -2.819824695587158,
"loss": 2.1061,
"step": 473
},
{
"beta_dpo/beta": 0.3522808253765106,
"beta_dpo/beta_margin_grad_mean": -0.40077441930770874,
"beta_dpo/beta_margin_grad_std": 0.3069063127040863,
"beta_dpo/beta_margin_mean": 15.927785873413086,
"beta_dpo/beta_margin_std": 33.118064880371094,
"beta_dpo/beta_used": 0.3522808253765106,
"beta_dpo/beta_used_raw": 0.16157013177871704,
"beta_dpo/gap_mean": 41.311546325683594,
"beta_dpo/gap_std": 54.17529296875,
"beta_dpo/loss_margin_mean": 40.076534271240234,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7165532879818595,
"grad_norm": 1089.6541748046875,
"learning_rate": 1.1372936966796709e-07,
"logits/chosen": -2.758329153060913,
"logits/rejected": -2.801651954650879,
"loss": 1.42,
"step": 474
},
{
"beta_dpo/beta": 0.6577058434486389,
"beta_dpo/beta_margin_grad_mean": -0.14120624959468842,
"beta_dpo/beta_margin_grad_std": 0.31994450092315674,
"beta_dpo/beta_margin_mean": 38.25525665283203,
"beta_dpo/beta_margin_std": 46.2388801574707,
"beta_dpo/beta_used": 0.6577058434486389,
"beta_dpo/beta_used_raw": 0.6577058434486389,
"beta_dpo/gap_mean": 43.44903564453125,
"beta_dpo/gap_std": 54.750579833984375,
"beta_dpo/loss_margin_mean": 58.09202194213867,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7180650037792895,
"grad_norm": 1087.662353515625,
"learning_rate": 1.126227554822985e-07,
"logits/chosen": -2.7532217502593994,
"logits/rejected": -2.7668323516845703,
"loss": 0.8006,
"step": 475
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4904637038707733,
"beta_dpo/beta_margin_grad_std": 0.01358871627599001,
"beta_dpo/beta_margin_mean": 0.0381828173995018,
"beta_dpo/beta_margin_std": 0.0544172078371048,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.6799896359443665,
"beta_dpo/gap_mean": 43.584678649902344,
"beta_dpo/gap_std": 55.18130111694336,
"beta_dpo/loss_margin_mean": 38.18281555175781,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7195767195767195,
"grad_norm": 3.9220597743988037,
"learning_rate": 1.1151998403347243e-07,
"logits/chosen": -2.7706384658813477,
"logits/rejected": -2.7767281532287598,
"loss": 1.3565,
"step": 476
},
{
"beta_dpo/beta": 0.0515187531709671,
"beta_dpo/beta_margin_grad_mean": -0.3912544250488281,
"beta_dpo/beta_margin_grad_std": 0.2792060077190399,
"beta_dpo/beta_margin_mean": 2.122750759124756,
"beta_dpo/beta_margin_std": 5.050689220428467,
"beta_dpo/beta_used": 0.0515187531709671,
"beta_dpo/beta_used_raw": -0.3054247200489044,
"beta_dpo/gap_mean": 42.680397033691406,
"beta_dpo/gap_std": 56.65534210205078,
"beta_dpo/loss_margin_mean": 40.76393508911133,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7210884353741497,
"grad_norm": 268.27996826171875,
"learning_rate": 1.1042108616837692e-07,
"logits/chosen": -2.7462360858917236,
"logits/rejected": -2.781938076019287,
"loss": 1.5907,
"step": 477
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4899767339229584,
"beta_dpo/beta_margin_grad_std": 0.014997678808867931,
"beta_dpo/beta_margin_mean": 0.040140360593795776,
"beta_dpo/beta_margin_std": 0.06006384268403053,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.3576127588748932,
"beta_dpo/gap_mean": 42.49079895019531,
"beta_dpo/gap_std": 57.602779388427734,
"beta_dpo/loss_margin_mean": 40.140357971191406,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7226001511715797,
"grad_norm": 3.5894758701324463,
"learning_rate": 1.0932609262554746e-07,
"logits/chosen": -2.7310919761657715,
"logits/rejected": -2.729006767272949,
"loss": 1.3525,
"step": 478
},
{
"beta_dpo/beta": 0.02417217753827572,
"beta_dpo/beta_margin_grad_mean": -0.3980015814304352,
"beta_dpo/beta_margin_grad_std": 0.21414901316165924,
"beta_dpo/beta_margin_mean": 0.8745595216751099,
"beta_dpo/beta_margin_std": 1.965387225151062,
"beta_dpo/beta_used": 0.02417217753827572,
"beta_dpo/beta_used_raw": -0.27406013011932373,
"beta_dpo/gap_mean": 40.88475036621094,
"beta_dpo/gap_std": 57.084991455078125,
"beta_dpo/loss_margin_mean": 32.70804977416992,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7241118669690099,
"grad_norm": 64.1123046875,
"learning_rate": 1.0823503403430734e-07,
"logits/chosen": -2.707150936126709,
"logits/rejected": -2.7152490615844727,
"loss": 1.0242,
"step": 479
},
{
"beta_dpo/beta": 0.973773181438446,
"beta_dpo/beta_margin_grad_mean": -0.22256873548030853,
"beta_dpo/beta_margin_grad_std": 0.4061585068702698,
"beta_dpo/beta_margin_mean": 50.128910064697266,
"beta_dpo/beta_margin_std": 66.56196594238281,
"beta_dpo/beta_used": 0.973773181438446,
"beta_dpo/beta_used_raw": 0.973773181438446,
"beta_dpo/gap_mean": 40.585201263427734,
"beta_dpo/gap_std": 57.819732666015625,
"beta_dpo/loss_margin_mean": 46.63157272338867,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7256235827664399,
"grad_norm": 2886.19775390625,
"learning_rate": 1.0714794091391072e-07,
"logits/chosen": -2.7781190872192383,
"logits/rejected": -2.77724027633667,
"loss": 3.4544,
"step": 480
},
{
"beta_dpo/beta": 0.1825007051229477,
"beta_dpo/beta_margin_grad_mean": -0.20635956525802612,
"beta_dpo/beta_margin_grad_std": 0.33250176906585693,
"beta_dpo/beta_margin_mean": 7.717591285705566,
"beta_dpo/beta_margin_std": 9.530905723571777,
"beta_dpo/beta_used": 0.1825007051229477,
"beta_dpo/beta_used_raw": 0.1825007051229477,
"beta_dpo/gap_mean": 42.10963439941406,
"beta_dpo/gap_std": 57.092857360839844,
"beta_dpo/loss_margin_mean": 43.11017990112305,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.72713529856387,
"grad_norm": 440.2613220214844,
"learning_rate": 1.0606484367268906e-07,
"logits/chosen": -2.787419319152832,
"logits/rejected": -2.780320644378662,
"loss": 1.0162,
"step": 481
},
{
"beta_dpo/beta": 0.1774512678384781,
"beta_dpo/beta_margin_grad_mean": -0.3469817638397217,
"beta_dpo/beta_margin_grad_std": 0.3088356852531433,
"beta_dpo/beta_margin_mean": 7.2963032722473145,
"beta_dpo/beta_margin_std": 18.42062759399414,
"beta_dpo/beta_used": 0.1774512678384781,
"beta_dpo/beta_used_raw": -0.19875358045101166,
"beta_dpo/gap_mean": 41.351280212402344,
"beta_dpo/gap_std": 57.856937408447266,
"beta_dpo/loss_margin_mean": 37.356292724609375,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7286470143613001,
"grad_norm": 362.2626037597656,
"learning_rate": 1.0498577260720048e-07,
"logits/chosen": -2.7591712474823,
"logits/rejected": -2.8289999961853027,
"loss": 1.0535,
"step": 482
},
{
"beta_dpo/beta": 0.15078911185264587,
"beta_dpo/beta_margin_grad_mean": -0.19500455260276794,
"beta_dpo/beta_margin_grad_std": 0.319975346326828,
"beta_dpo/beta_margin_mean": 6.7245588302612305,
"beta_dpo/beta_margin_std": 9.449666023254395,
"beta_dpo/beta_used": 0.15078911185264587,
"beta_dpo/beta_used_raw": 0.15078911185264587,
"beta_dpo/gap_mean": 41.55865478515625,
"beta_dpo/gap_std": 57.50669479370117,
"beta_dpo/loss_margin_mean": 46.46189880371094,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7301587301587301,
"grad_norm": 398.8206481933594,
"learning_rate": 1.0391075790138232e-07,
"logits/chosen": -2.689525604248047,
"logits/rejected": -2.752610206604004,
"loss": 0.741,
"step": 483
},
{
"beta_dpo/beta": 0.09219953417778015,
"beta_dpo/beta_margin_grad_mean": -0.3376636207103729,
"beta_dpo/beta_margin_grad_std": 0.3007601499557495,
"beta_dpo/beta_margin_mean": 3.813218593597412,
"beta_dpo/beta_margin_std": 7.575406074523926,
"beta_dpo/beta_used": 0.09219953417778015,
"beta_dpo/beta_used_raw": -0.1167166456580162,
"beta_dpo/gap_mean": 41.57493209838867,
"beta_dpo/gap_std": 56.57539367675781,
"beta_dpo/loss_margin_mean": 39.47372055053711,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7316704459561603,
"grad_norm": 492.06988525390625,
"learning_rate": 1.0283982962570681e-07,
"logits/chosen": -2.6704444885253906,
"logits/rejected": -2.6830573081970215,
"loss": 1.4066,
"step": 484
},
{
"beta_dpo/beta": 0.40363559126853943,
"beta_dpo/beta_margin_grad_mean": -0.31707045435905457,
"beta_dpo/beta_margin_grad_std": 0.29281070828437805,
"beta_dpo/beta_margin_mean": 21.213218688964844,
"beta_dpo/beta_margin_std": 39.091835021972656,
"beta_dpo/beta_used": 0.40363559126853943,
"beta_dpo/beta_used_raw": -0.09030476212501526,
"beta_dpo/gap_mean": 41.633697509765625,
"beta_dpo/gap_std": 56.02287292480469,
"beta_dpo/loss_margin_mean": 37.29250717163086,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7331821617535903,
"grad_norm": 2081.590576171875,
"learning_rate": 1.0177301773633992e-07,
"logits/chosen": -2.696157932281494,
"logits/rejected": -2.7043862342834473,
"loss": 2.5686,
"step": 485
},
{
"beta_dpo/beta": 0.10442067682743073,
"beta_dpo/beta_margin_grad_mean": -0.3749229609966278,
"beta_dpo/beta_margin_grad_std": 0.3003118634223938,
"beta_dpo/beta_margin_mean": 3.722522020339966,
"beta_dpo/beta_margin_std": 8.481477737426758,
"beta_dpo/beta_used": 0.10442067682743073,
"beta_dpo/beta_used_raw": -0.01430542767047882,
"beta_dpo/gap_mean": 40.47251892089844,
"beta_dpo/gap_std": 55.193885803222656,
"beta_dpo/loss_margin_mean": 41.57960891723633,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7346938775510204,
"grad_norm": 276.54681396484375,
"learning_rate": 1.007103520743035e-07,
"logits/chosen": -2.6733226776123047,
"logits/rejected": -2.7371678352355957,
"loss": 1.174,
"step": 486
},
{
"beta_dpo/beta": 0.07994943112134933,
"beta_dpo/beta_margin_grad_mean": -0.3323688805103302,
"beta_dpo/beta_margin_grad_std": 0.2599465847015381,
"beta_dpo/beta_margin_mean": 2.785461902618408,
"beta_dpo/beta_margin_std": 5.125765800476074,
"beta_dpo/beta_used": 0.07994943112134933,
"beta_dpo/beta_used_raw": -0.12181156873703003,
"beta_dpo/gap_mean": 39.985862731933594,
"beta_dpo/gap_std": 54.396183013916016,
"beta_dpo/loss_margin_mean": 34.94841384887695,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7362055933484505,
"grad_norm": 163.62640380859375,
"learning_rate": 9.965186236464046e-08,
"logits/chosen": -2.6691818237304688,
"logits/rejected": -2.7056899070739746,
"loss": 0.8702,
"step": 487
},
{
"beta_dpo/beta": 0.5008640885353088,
"beta_dpo/beta_margin_grad_mean": -0.3386906087398529,
"beta_dpo/beta_margin_grad_std": 0.30464500188827515,
"beta_dpo/beta_margin_mean": 28.58328628540039,
"beta_dpo/beta_margin_std": 49.5888557434082,
"beta_dpo/beta_used": 0.5008640885353088,
"beta_dpo/beta_used_raw": 0.34469401836395264,
"beta_dpo/gap_mean": 40.6063346862793,
"beta_dpo/gap_std": 54.928916931152344,
"beta_dpo/loss_margin_mean": 48.46849060058594,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7377173091458806,
"grad_norm": 1919.3291015625,
"learning_rate": 9.859757821558337e-08,
"logits/chosen": -2.689507484436035,
"logits/rejected": -2.7045979499816895,
"loss": 3.0325,
"step": 488
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4926265478134155,
"beta_dpo/beta_margin_grad_std": 0.012725806795060635,
"beta_dpo/beta_margin_mean": 0.029519159346818924,
"beta_dpo/beta_margin_std": 0.05095401778817177,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.43343132734298706,
"beta_dpo/gap_mean": 39.814117431640625,
"beta_dpo/gap_std": 54.60805130004883,
"beta_dpo/loss_margin_mean": 29.51915740966797,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7392290249433107,
"grad_norm": 4.167266845703125,
"learning_rate": 9.754752911772615e-08,
"logits/chosen": -2.7549619674682617,
"logits/rejected": -2.792065143585205,
"loss": 1.3561,
"step": 489
},
{
"beta_dpo/beta": 0.32752177119255066,
"beta_dpo/beta_margin_grad_mean": -0.39252033829689026,
"beta_dpo/beta_margin_grad_std": 0.3261478543281555,
"beta_dpo/beta_margin_mean": 12.92074203491211,
"beta_dpo/beta_margin_std": 32.389190673828125,
"beta_dpo/beta_used": 0.32752177119255066,
"beta_dpo/beta_used_raw": 0.2527086138725281,
"beta_dpo/gap_mean": 39.23323059082031,
"beta_dpo/gap_std": 56.25166320800781,
"beta_dpo/loss_margin_mean": 39.49768829345703,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7407407407407407,
"grad_norm": 1155.421630859375,
"learning_rate": 9.650174444319956e-08,
"logits/chosen": -2.711261749267578,
"logits/rejected": -2.7159712314605713,
"loss": 3.4279,
"step": 490
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4900553822517395,
"beta_dpo/beta_margin_grad_std": 0.013235099613666534,
"beta_dpo/beta_margin_mean": 0.03981310874223709,
"beta_dpo/beta_margin_std": 0.05299828574061394,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.2943836748600006,
"beta_dpo/gap_mean": 39.666542053222656,
"beta_dpo/gap_std": 56.23136901855469,
"beta_dpo/loss_margin_mean": 39.813106536865234,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7422524565381708,
"grad_norm": 3.6654624938964844,
"learning_rate": 9.546025344484868e-08,
"logits/chosen": -2.712951421737671,
"logits/rejected": -2.7393627166748047,
"loss": 1.3539,
"step": 491
},
{
"beta_dpo/beta": 0.5559797286987305,
"beta_dpo/beta_margin_grad_mean": -0.3582126796245575,
"beta_dpo/beta_margin_grad_std": 0.31471025943756104,
"beta_dpo/beta_margin_mean": 29.00932502746582,
"beta_dpo/beta_margin_std": 51.71617889404297,
"beta_dpo/beta_used": 0.5559797286987305,
"beta_dpo/beta_used_raw": 0.42610257863998413,
"beta_dpo/gap_mean": 38.09209060668945,
"beta_dpo/gap_std": 57.30845260620117,
"beta_dpo/loss_margin_mean": 36.10185623168945,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7437641723356009,
"grad_norm": 1250.3416748046875,
"learning_rate": 9.442308525541589e-08,
"logits/chosen": -2.703014850616455,
"logits/rejected": -2.743101119995117,
"loss": 2.3617,
"step": 492
},
{
"beta_dpo/beta": 0.15209481120109558,
"beta_dpo/beta_margin_grad_mean": -0.31275543570518494,
"beta_dpo/beta_margin_grad_std": 0.2850077450275421,
"beta_dpo/beta_margin_mean": 7.864630699157715,
"beta_dpo/beta_margin_std": 15.609386444091797,
"beta_dpo/beta_used": 0.15209481120109558,
"beta_dpo/beta_used_raw": 0.09873668849468231,
"beta_dpo/gap_mean": 39.294410705566406,
"beta_dpo/gap_std": 56.776641845703125,
"beta_dpo/loss_margin_mean": 44.313941955566406,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.745275888133031,
"grad_norm": 693.7211303710938,
"learning_rate": 9.339026888672468e-08,
"logits/chosen": -2.722355842590332,
"logits/rejected": -2.7542717456817627,
"loss": 2.4299,
"step": 493
},
{
"beta_dpo/beta": 0.07930776476860046,
"beta_dpo/beta_margin_grad_mean": -0.35822421312332153,
"beta_dpo/beta_margin_grad_std": 0.29255738854408264,
"beta_dpo/beta_margin_mean": 3.3953824043273926,
"beta_dpo/beta_margin_std": 7.435774803161621,
"beta_dpo/beta_used": 0.07930776476860046,
"beta_dpo/beta_used_raw": -0.20857512950897217,
"beta_dpo/gap_mean": 39.77642822265625,
"beta_dpo/gap_std": 57.30767822265625,
"beta_dpo/loss_margin_mean": 36.8887939453125,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7467876039304611,
"grad_norm": 316.9527587890625,
"learning_rate": 9.236183322886945e-08,
"logits/chosen": -2.739696502685547,
"logits/rejected": -2.7579593658447266,
"loss": 1.1247,
"step": 494
},
{
"beta_dpo/beta": 0.33298665285110474,
"beta_dpo/beta_margin_grad_mean": -0.32901203632354736,
"beta_dpo/beta_margin_grad_std": 0.2863346338272095,
"beta_dpo/beta_margin_mean": 15.57753849029541,
"beta_dpo/beta_margin_std": 29.28587532043457,
"beta_dpo/beta_used": 0.33298665285110474,
"beta_dpo/beta_used_raw": -0.00022649765014648438,
"beta_dpo/gap_mean": 38.87417221069336,
"beta_dpo/gap_std": 55.89385986328125,
"beta_dpo/loss_margin_mean": 38.79734420776367,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7482993197278912,
"grad_norm": 713.429443359375,
"learning_rate": 9.133780704940594e-08,
"logits/chosen": -2.6686229705810547,
"logits/rejected": -2.7093167304992676,
"loss": 0.9881,
"step": 495
},
{
"beta_dpo/beta": 0.02213066816329956,
"beta_dpo/beta_margin_grad_mean": -0.4068155586719513,
"beta_dpo/beta_margin_grad_std": 0.2209998369216919,
"beta_dpo/beta_margin_mean": 0.7566680312156677,
"beta_dpo/beta_margin_std": 1.8222705125808716,
"beta_dpo/beta_used": 0.02213066816329956,
"beta_dpo/beta_used_raw": -0.06461194157600403,
"beta_dpo/gap_mean": 38.767662048339844,
"beta_dpo/gap_std": 56.514461517333984,
"beta_dpo/loss_margin_mean": 35.313419342041016,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7498110355253212,
"grad_norm": 87.11608123779297,
"learning_rate": 9.031821899254797e-08,
"logits/chosen": -2.733874559402466,
"logits/rejected": -2.7735462188720703,
"loss": 1.1315,
"step": 496
},
{
"beta_dpo/beta": 0.12018337845802307,
"beta_dpo/beta_margin_grad_mean": -0.15946322679519653,
"beta_dpo/beta_margin_grad_std": 0.2790553867816925,
"beta_dpo/beta_margin_mean": 7.110559463500977,
"beta_dpo/beta_margin_std": 6.706122875213623,
"beta_dpo/beta_used": 0.12018337845802307,
"beta_dpo/beta_used_raw": 0.12018337845802307,
"beta_dpo/gap_mean": 41.55035400390625,
"beta_dpo/gap_std": 56.61823272705078,
"beta_dpo/loss_margin_mean": 59.153717041015625,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7513227513227513,
"grad_norm": 290.6864013671875,
"learning_rate": 8.930309757836516e-08,
"logits/chosen": -2.71683931350708,
"logits/rejected": -2.7388899326324463,
"loss": 0.8565,
"step": 497
},
{
"beta_dpo/beta": 0.2165343165397644,
"beta_dpo/beta_margin_grad_mean": -0.3753109276294708,
"beta_dpo/beta_margin_grad_std": 0.30145102739334106,
"beta_dpo/beta_margin_mean": 10.487258911132812,
"beta_dpo/beta_margin_std": 21.614362716674805,
"beta_dpo/beta_used": 0.2165343165397644,
"beta_dpo/beta_used_raw": 0.09735321253538132,
"beta_dpo/gap_mean": 42.46245193481445,
"beta_dpo/gap_std": 57.141075134277344,
"beta_dpo/loss_margin_mean": 44.00096893310547,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7528344671201814,
"grad_norm": 961.116455078125,
"learning_rate": 8.829247120198563e-08,
"logits/chosen": -2.7068562507629395,
"logits/rejected": -2.713627815246582,
"loss": 2.3702,
"step": 498
},
{
"beta_dpo/beta": 0.19605065882205963,
"beta_dpo/beta_margin_grad_mean": -0.39544767141342163,
"beta_dpo/beta_margin_grad_std": 0.3248600363731384,
"beta_dpo/beta_margin_mean": 9.355982780456543,
"beta_dpo/beta_margin_std": 21.50864028930664,
"beta_dpo/beta_used": 0.19605065882205963,
"beta_dpo/beta_used_raw": 0.004853472113609314,
"beta_dpo/gap_mean": 43.18915557861328,
"beta_dpo/gap_std": 58.87891387939453,
"beta_dpo/loss_margin_mean": 44.659873962402344,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7543461829176115,
"grad_norm": 743.588623046875,
"learning_rate": 8.728636813280163e-08,
"logits/chosen": -2.692368507385254,
"logits/rejected": -2.726922035217285,
"loss": 3.2996,
"step": 499
},
{
"beta_dpo/beta": 0.1744510680437088,
"beta_dpo/beta_margin_grad_mean": -0.30420351028442383,
"beta_dpo/beta_margin_grad_std": 0.2772481441497803,
"beta_dpo/beta_margin_mean": 8.870034217834473,
"beta_dpo/beta_margin_std": 15.547243118286133,
"beta_dpo/beta_used": 0.1744510680437088,
"beta_dpo/beta_used_raw": -0.004995211958885193,
"beta_dpo/gap_mean": 43.78472900390625,
"beta_dpo/gap_std": 59.16692352294922,
"beta_dpo/loss_margin_mean": 48.520931243896484,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7558578987150416,
"grad_norm": 359.8203125,
"learning_rate": 8.628481651367875e-08,
"logits/chosen": -2.729337692260742,
"logits/rejected": -2.7200241088867188,
"loss": 1.2415,
"step": 500
},
{
"epoch": 0.7558578987150416,
"eval_beta_dpo/beta": 0.06070829555392265,
"eval_beta_dpo/beta_margin_grad_mean": -0.43526893854141235,
"eval_beta_dpo/beta_margin_grad_std": 0.08183299005031586,
"eval_beta_dpo/beta_margin_mean": 2.933407783508301,
"eval_beta_dpo/beta_margin_std": 3.5973763465881348,
"eval_beta_dpo/beta_used": 0.06070829555392265,
"eval_beta_dpo/beta_used_raw": -0.4269829988479614,
"eval_beta_dpo/gap_mean": 44.15704345703125,
"eval_beta_dpo/gap_std": 58.8353271484375,
"eval_beta_dpo/loss_margin_mean": 35.37399673461914,
"eval_beta_dpo/mask_keep_frac": 1.0,
"eval_logits/chosen": -2.762294054031372,
"eval_logits/rejected": -2.7785770893096924,
"eval_loss": 0.8583182096481323,
"eval_runtime": 36.37,
"eval_samples_per_second": 63.321,
"eval_steps_per_second": 1.98,
"step": 500
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49090656638145447,
"beta_dpo/beta_margin_grad_std": 0.011450248770415783,
"beta_dpo/beta_margin_mean": 0.036405812948942184,
"beta_dpo/beta_margin_std": 0.04586649313569069,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.5008817911148071,
"beta_dpo/gap_mean": 42.739036560058594,
"beta_dpo/gap_std": 56.7427978515625,
"beta_dpo/loss_margin_mean": 36.40580749511719,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7573696145124716,
"grad_norm": 3.569932460784912,
"learning_rate": 8.528784436016878e-08,
"logits/chosen": -2.7037782669067383,
"logits/rejected": -2.695845603942871,
"loss": 1.3542,
"step": 501
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49141430854797363,
"beta_dpo/beta_margin_grad_std": 0.011822287924587727,
"beta_dpo/beta_margin_mean": 0.034368664026260376,
"beta_dpo/beta_margin_std": 0.04734347015619278,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.616624116897583,
"beta_dpo/gap_mean": 41.67316436767578,
"beta_dpo/gap_std": 55.443443298339844,
"beta_dpo/loss_margin_mean": 34.36865997314453,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7588813303099018,
"grad_norm": 4.4559831619262695,
"learning_rate": 8.4295479559726e-08,
"logits/chosen": -2.7137112617492676,
"logits/rejected": -2.738381862640381,
"loss": 1.3572,
"step": 502
},
{
"beta_dpo/beta": 0.3707210421562195,
"beta_dpo/beta_margin_grad_mean": -0.3491879105567932,
"beta_dpo/beta_margin_grad_std": 0.3027651011943817,
"beta_dpo/beta_margin_mean": 15.77136516571045,
"beta_dpo/beta_margin_std": 40.02242660522461,
"beta_dpo/beta_used": 0.3707210421562195,
"beta_dpo/beta_used_raw": 0.2508103847503662,
"beta_dpo/gap_mean": 40.90766143798828,
"beta_dpo/gap_std": 55.69822311401367,
"beta_dpo/loss_margin_mean": 38.29641342163086,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7603930461073318,
"grad_norm": 1319.376220703125,
"learning_rate": 8.330774987092712e-08,
"logits/chosen": -2.710824966430664,
"logits/rejected": -2.6981759071350098,
"loss": 2.776,
"step": 503
},
{
"beta_dpo/beta": 0.28216904401779175,
"beta_dpo/beta_margin_grad_mean": -0.3438568115234375,
"beta_dpo/beta_margin_grad_std": 0.30565929412841797,
"beta_dpo/beta_margin_mean": 15.786298751831055,
"beta_dpo/beta_margin_std": 28.352420806884766,
"beta_dpo/beta_used": 0.28216904401779175,
"beta_dpo/beta_used_raw": -0.08858853578567505,
"beta_dpo/gap_mean": 40.74700164794922,
"beta_dpo/gap_std": 55.91231155395508,
"beta_dpo/loss_margin_mean": 45.25965881347656,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7619047619047619,
"grad_norm": 789.7346801757812,
"learning_rate": 8.232468292269479e-08,
"logits/chosen": -2.6995792388916016,
"logits/rejected": -2.6963398456573486,
"loss": 2.6669,
"step": 504
},
{
"beta_dpo/beta": 0.22853413224220276,
"beta_dpo/beta_margin_grad_mean": -0.3785220682621002,
"beta_dpo/beta_margin_grad_std": 0.31999051570892334,
"beta_dpo/beta_margin_mean": 10.497669219970703,
"beta_dpo/beta_margin_std": 23.13285255432129,
"beta_dpo/beta_used": 0.22853413224220276,
"beta_dpo/beta_used_raw": -0.23188593983650208,
"beta_dpo/gap_mean": 40.928749084472656,
"beta_dpo/gap_std": 57.20260238647461,
"beta_dpo/loss_margin_mean": 33.34831619262695,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.763416477702192,
"grad_norm": 902.9845581054688,
"learning_rate": 8.134630621352483e-08,
"logits/chosen": -2.7248096466064453,
"logits/rejected": -2.7551441192626953,
"loss": 2.0222,
"step": 505
},
{
"beta_dpo/beta": 0.24293901026248932,
"beta_dpo/beta_margin_grad_mean": -0.28122228384017944,
"beta_dpo/beta_margin_grad_std": 0.3899361491203308,
"beta_dpo/beta_margin_mean": 8.826823234558105,
"beta_dpo/beta_margin_std": 19.38666343688965,
"beta_dpo/beta_used": 0.24293901026248932,
"beta_dpo/beta_used_raw": 0.24293901026248932,
"beta_dpo/gap_mean": 39.355960845947266,
"beta_dpo/gap_std": 58.168418884277344,
"beta_dpo/loss_margin_mean": 36.38151931762695,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.764928193499622,
"grad_norm": 1123.488525390625,
"learning_rate": 8.037264711071698e-08,
"logits/chosen": -2.7153756618499756,
"logits/rejected": -2.7147321701049805,
"loss": 3.9304,
"step": 506
},
{
"beta_dpo/beta": 0.26356324553489685,
"beta_dpo/beta_margin_grad_mean": -0.3092266321182251,
"beta_dpo/beta_margin_grad_std": 0.29034730792045593,
"beta_dpo/beta_margin_mean": 12.295702934265137,
"beta_dpo/beta_margin_std": 26.49407958984375,
"beta_dpo/beta_used": 0.26356324553489685,
"beta_dpo/beta_used_raw": 0.16478615999221802,
"beta_dpo/gap_mean": 39.833335876464844,
"beta_dpo/gap_std": 59.442283630371094,
"beta_dpo/loss_margin_mean": 44.284969329833984,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7664399092970522,
"grad_norm": 531.67529296875,
"learning_rate": 7.940373284960933e-08,
"logits/chosen": -2.705458641052246,
"logits/rejected": -2.7373507022857666,
"loss": 1.0875,
"step": 507
},
{
"beta_dpo/beta": 0.5739637017250061,
"beta_dpo/beta_margin_grad_mean": -0.21459561586380005,
"beta_dpo/beta_margin_grad_std": 0.3837727904319763,
"beta_dpo/beta_margin_mean": 24.81824493408203,
"beta_dpo/beta_margin_std": 49.48651885986328,
"beta_dpo/beta_used": 0.5739637017250061,
"beta_dpo/beta_used_raw": 0.5739637017250061,
"beta_dpo/gap_mean": 40.61821746826172,
"beta_dpo/gap_std": 59.360069274902344,
"beta_dpo/loss_margin_mean": 43.16511535644531,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7679516250944822,
"grad_norm": 2711.739501953125,
"learning_rate": 7.843959053281663e-08,
"logits/chosen": -2.720541000366211,
"logits/rejected": -2.79325532913208,
"loss": 5.5206,
"step": 508
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49008607864379883,
"beta_dpo/beta_margin_grad_std": 0.012960444204509258,
"beta_dpo/beta_margin_mean": 0.03969065845012665,
"beta_dpo/beta_margin_std": 0.0518970787525177,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.379862904548645,
"beta_dpo/gap_mean": 40.70125198364258,
"beta_dpo/gap_std": 58.98310089111328,
"beta_dpo/loss_margin_mean": 39.69065856933594,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7694633408919124,
"grad_norm": 4.5289483070373535,
"learning_rate": 7.748024712947204e-08,
"logits/chosen": -2.7247955799102783,
"logits/rejected": -2.742762804031372,
"loss": 1.3543,
"step": 509
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.48754072189331055,
"beta_dpo/beta_margin_grad_std": 0.015329192392528057,
"beta_dpo/beta_margin_mean": 0.04989998787641525,
"beta_dpo/beta_margin_std": 0.061411548405885696,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.46870729327201843,
"beta_dpo/gap_mean": 42.109127044677734,
"beta_dpo/gap_std": 59.28163146972656,
"beta_dpo/loss_margin_mean": 49.899986267089844,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7709750566893424,
"grad_norm": 4.867334365844727,
"learning_rate": 7.652572947447272e-08,
"logits/chosen": -2.674614429473877,
"logits/rejected": -2.7252471446990967,
"loss": 1.3545,
"step": 510
},
{
"beta_dpo/beta": 0.20752891898155212,
"beta_dpo/beta_margin_grad_mean": -0.20747746527194977,
"beta_dpo/beta_margin_grad_std": 0.35087651014328003,
"beta_dpo/beta_margin_mean": 10.76735782623291,
"beta_dpo/beta_margin_std": 14.685410499572754,
"beta_dpo/beta_used": 0.20752891898155212,
"beta_dpo/beta_used_raw": 0.20752891898155212,
"beta_dpo/gap_mean": 43.691673278808594,
"beta_dpo/gap_std": 60.531578063964844,
"beta_dpo/loss_margin_mean": 54.25608444213867,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7724867724867724,
"grad_norm": 521.0980224609375,
"learning_rate": 7.557606426772961e-08,
"logits/chosen": -2.6949658393859863,
"logits/rejected": -2.7207558155059814,
"loss": 2.0411,
"step": 511
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.48948413133621216,
"beta_dpo/beta_margin_grad_std": 0.014411956071853638,
"beta_dpo/beta_margin_mean": 0.042109742760658264,
"beta_dpo/beta_margin_std": 0.057718053460121155,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.5245345830917358,
"beta_dpo/gap_mean": 44.23836135864258,
"beta_dpo/gap_std": 60.7104606628418,
"beta_dpo/loss_margin_mean": 42.1097412109375,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7739984882842026,
"grad_norm": 4.707949161529541,
"learning_rate": 7.463127807341966e-08,
"logits/chosen": -2.730802059173584,
"logits/rejected": -2.729092597961426,
"loss": 1.3534,
"step": 512
},
{
"beta_dpo/beta": 0.18585793673992157,
"beta_dpo/beta_margin_grad_mean": -0.23562981188297272,
"beta_dpo/beta_margin_grad_std": 0.29042237997055054,
"beta_dpo/beta_margin_mean": 9.61783504486084,
"beta_dpo/beta_margin_std": 16.55315589904785,
"beta_dpo/beta_used": 0.18585793673992157,
"beta_dpo/beta_used_raw": 0.18585793673992157,
"beta_dpo/gap_mean": 44.56898498535156,
"beta_dpo/gap_std": 60.356040954589844,
"beta_dpo/loss_margin_mean": 44.24302291870117,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7755102040816326,
"grad_norm": 579.6964721679688,
"learning_rate": 7.369139731924401e-08,
"logits/chosen": -2.6522293090820312,
"logits/rejected": -2.6841511726379395,
"loss": 0.7497,
"step": 513
},
{
"beta_dpo/beta": 0.6994319558143616,
"beta_dpo/beta_margin_grad_mean": -0.15793399512767792,
"beta_dpo/beta_margin_grad_std": 0.34477755427360535,
"beta_dpo/beta_margin_mean": 38.30527114868164,
"beta_dpo/beta_margin_std": 44.85448455810547,
"beta_dpo/beta_used": 0.6994319558143616,
"beta_dpo/beta_used_raw": 0.6994319558143616,
"beta_dpo/gap_mean": 45.162689208984375,
"beta_dpo/gap_std": 59.23829650878906,
"beta_dpo/loss_margin_mean": 53.46204376220703,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7770219198790628,
"grad_norm": 1937.4132080078125,
"learning_rate": 7.275644829568747e-08,
"logits/chosen": -2.7020692825317383,
"logits/rejected": -2.7054526805877686,
"loss": 1.9092,
"step": 514
},
{
"beta_dpo/beta": 0.009622273966670036,
"beta_dpo/beta_margin_grad_mean": -0.4048810601234436,
"beta_dpo/beta_margin_grad_std": 0.1544850915670395,
"beta_dpo/beta_margin_mean": 0.4853326976299286,
"beta_dpo/beta_margin_std": 0.8304917812347412,
"beta_dpo/beta_used": 0.009622273966670036,
"beta_dpo/beta_used_raw": -0.013449749909341335,
"beta_dpo/gap_mean": 45.89698028564453,
"beta_dpo/gap_std": 59.16020965576172,
"beta_dpo/loss_margin_mean": 47.00020980834961,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7785336356764928,
"grad_norm": 38.7457160949707,
"learning_rate": 7.182645715528435e-08,
"logits/chosen": -2.658329963684082,
"logits/rejected": -2.6947593688964844,
"loss": 1.1149,
"step": 515
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4907180666923523,
"beta_dpo/beta_margin_grad_std": 0.014946096576750278,
"beta_dpo/beta_margin_mean": 0.03717074170708656,
"beta_dpo/beta_margin_std": 0.05986913666129112,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.3845486044883728,
"beta_dpo/gap_mean": 44.72285842895508,
"beta_dpo/gap_std": 59.19635772705078,
"beta_dpo/loss_margin_mean": 37.170738220214844,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.780045351473923,
"grad_norm": 3.5393691062927246,
"learning_rate": 7.090144991188568e-08,
"logits/chosen": -2.6673450469970703,
"logits/rejected": -2.685701847076416,
"loss": 1.3505,
"step": 516
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4910920560359955,
"beta_dpo/beta_margin_grad_std": 0.014533808454871178,
"beta_dpo/beta_margin_mean": 0.035668447613716125,
"beta_dpo/beta_margin_std": 0.058203116059303284,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.4118385910987854,
"beta_dpo/gap_mean": 43.163978576660156,
"beta_dpo/gap_std": 59.21691131591797,
"beta_dpo/loss_margin_mean": 35.6684455871582,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.781557067271353,
"grad_norm": 4.225611209869385,
"learning_rate": 6.998145243993284e-08,
"logits/chosen": -2.6880226135253906,
"logits/rejected": -2.6727945804595947,
"loss": 1.3527,
"step": 517
},
{
"beta_dpo/beta": 0.18050672113895416,
"beta_dpo/beta_margin_grad_mean": -0.37036359310150146,
"beta_dpo/beta_margin_grad_std": 0.30285516381263733,
"beta_dpo/beta_margin_mean": 9.467116355895996,
"beta_dpo/beta_margin_std": 17.349821090698242,
"beta_dpo/beta_used": 0.18050672113895416,
"beta_dpo/beta_used_raw": -0.2277805060148239,
"beta_dpo/gap_mean": 42.35028076171875,
"beta_dpo/gap_std": 58.71092224121094,
"beta_dpo/loss_margin_mean": 42.672080993652344,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.783068783068783,
"grad_norm": 1116.5013427734375,
"learning_rate": 6.906649047373245e-08,
"logits/chosen": -2.704665422439575,
"logits/rejected": -2.7393040657043457,
"loss": 1.9926,
"step": 518
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49066850543022156,
"beta_dpo/beta_margin_grad_std": 0.016804302111268044,
"beta_dpo/beta_margin_mean": 0.0373702310025692,
"beta_dpo/beta_margin_std": 0.06730356067419052,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.3206685781478882,
"beta_dpo/gap_mean": 41.75722885131836,
"beta_dpo/gap_std": 60.208457946777344,
"beta_dpo/loss_margin_mean": 37.3702278137207,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7845804988662132,
"grad_norm": 4.537266731262207,
"learning_rate": 6.815658960673781e-08,
"logits/chosen": -2.676978588104248,
"logits/rejected": -2.6939690113067627,
"loss": 1.3527,
"step": 519
},
{
"beta_dpo/beta": 0.20510099828243256,
"beta_dpo/beta_margin_grad_mean": -0.3535204231739044,
"beta_dpo/beta_margin_grad_std": 0.3087931275367737,
"beta_dpo/beta_margin_mean": 8.220118522644043,
"beta_dpo/beta_margin_std": 18.738656997680664,
"beta_dpo/beta_used": 0.20510099828243256,
"beta_dpo/beta_used_raw": 0.11168855428695679,
"beta_dpo/gap_mean": 41.049041748046875,
"beta_dpo/gap_std": 59.8758430480957,
"beta_dpo/loss_margin_mean": 34.11420822143555,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7860922146636432,
"grad_norm": 505.3785400390625,
"learning_rate": 6.725177529083209e-08,
"logits/chosen": -2.6468472480773926,
"logits/rejected": -2.6863021850585938,
"loss": 1.5582,
"step": 520
},
{
"beta_dpo/beta": 0.3797077238559723,
"beta_dpo/beta_margin_grad_mean": -0.3206307291984558,
"beta_dpo/beta_margin_grad_std": 0.2996864318847656,
"beta_dpo/beta_margin_mean": 23.054044723510742,
"beta_dpo/beta_margin_std": 38.42852020263672,
"beta_dpo/beta_used": 0.3797077238559723,
"beta_dpo/beta_used_raw": 0.33471980690956116,
"beta_dpo/gap_mean": 40.93505859375,
"beta_dpo/gap_std": 59.90309143066406,
"beta_dpo/loss_margin_mean": 48.176727294921875,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7876039304610734,
"grad_norm": 1331.1934814453125,
"learning_rate": 6.63520728356167e-08,
"logits/chosen": -2.725958824157715,
"logits/rejected": -2.7716081142425537,
"loss": 4.2967,
"step": 521
},
{
"beta_dpo/beta": 0.3732588291168213,
"beta_dpo/beta_margin_grad_mean": -0.20111659169197083,
"beta_dpo/beta_margin_grad_std": 0.35144945979118347,
"beta_dpo/beta_margin_mean": 17.039627075195312,
"beta_dpo/beta_margin_std": 22.248046875,
"beta_dpo/beta_used": 0.3732588291168213,
"beta_dpo/beta_used_raw": 0.3732588291168213,
"beta_dpo/gap_mean": 42.58722686767578,
"beta_dpo/gap_std": 59.817386627197266,
"beta_dpo/loss_margin_mean": 45.58156204223633,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7891156462585034,
"grad_norm": 870.8029174804688,
"learning_rate": 6.545750740770336e-08,
"logits/chosen": -2.685265302658081,
"logits/rejected": -2.6789164543151855,
"loss": 2.3226,
"step": 522
},
{
"beta_dpo/beta": 0.4820902943611145,
"beta_dpo/beta_margin_grad_mean": -0.22910968959331512,
"beta_dpo/beta_margin_grad_std": 0.4086511433124542,
"beta_dpo/beta_margin_mean": 21.961559295654297,
"beta_dpo/beta_margin_std": 31.645612716674805,
"beta_dpo/beta_used": 0.4820902943611145,
"beta_dpo/beta_used_raw": 0.4820902943611145,
"beta_dpo/gap_mean": 43.25143051147461,
"beta_dpo/gap_std": 60.66827392578125,
"beta_dpo/loss_margin_mean": 44.9202995300293,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7906273620559335,
"grad_norm": 1632.2755126953125,
"learning_rate": 6.456810403001012e-08,
"logits/chosen": -2.7161006927490234,
"logits/rejected": -2.7626240253448486,
"loss": 4.9571,
"step": 523
},
{
"beta_dpo/beta": 0.10415734350681305,
"beta_dpo/beta_margin_grad_mean": -0.36675214767456055,
"beta_dpo/beta_margin_grad_std": 0.2674957513809204,
"beta_dpo/beta_margin_mean": 4.917224884033203,
"beta_dpo/beta_margin_std": 9.322827339172363,
"beta_dpo/beta_used": 0.10415734350681305,
"beta_dpo/beta_used_raw": -0.0208590030670166,
"beta_dpo/gap_mean": 42.01835632324219,
"beta_dpo/gap_std": 60.799781799316406,
"beta_dpo/loss_margin_mean": 38.786258697509766,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7921390778533636,
"grad_norm": 278.8160095214844,
"learning_rate": 6.368388758106134e-08,
"logits/chosen": -2.7579870223999023,
"logits/rejected": -2.7684619426727295,
"loss": 1.4521,
"step": 524
},
{
"beta_dpo/beta": 0.1328941434621811,
"beta_dpo/beta_margin_grad_mean": -0.3332993686199188,
"beta_dpo/beta_margin_grad_std": 0.26771458983421326,
"beta_dpo/beta_margin_mean": 5.369054794311523,
"beta_dpo/beta_margin_std": 11.418255805969238,
"beta_dpo/beta_used": 0.1328941434621811,
"beta_dpo/beta_used_raw": -0.1355496495962143,
"beta_dpo/gap_mean": 40.92710876464844,
"beta_dpo/gap_std": 61.039215087890625,
"beta_dpo/loss_margin_mean": 34.65338134765625,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7936507936507936,
"grad_norm": 420.6562805175781,
"learning_rate": 6.280488279429185e-08,
"logits/chosen": -2.7576797008514404,
"logits/rejected": -2.757020950317383,
"loss": 1.1612,
"step": 525
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4936671257019043,
"beta_dpo/beta_margin_grad_std": 0.013691714033484459,
"beta_dpo/beta_margin_mean": 0.02535291761159897,
"beta_dpo/beta_margin_std": 0.054825231432914734,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.6394113898277283,
"beta_dpo/gap_mean": 39.1685676574707,
"beta_dpo/gap_std": 59.67204284667969,
"beta_dpo/loss_margin_mean": 25.352916717529297,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7951625094482238,
"grad_norm": 4.625575542449951,
"learning_rate": 6.193111425735515e-08,
"logits/chosen": -2.695737600326538,
"logits/rejected": -2.7326161861419678,
"loss": 1.3601,
"step": 526
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49305132031440735,
"beta_dpo/beta_margin_grad_std": 0.014519465155899525,
"beta_dpo/beta_margin_mean": 0.02782551757991314,
"beta_dpo/beta_margin_std": 0.058140479028224945,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.17464077472686768,
"beta_dpo/gap_mean": 36.03892517089844,
"beta_dpo/gap_std": 59.28942108154297,
"beta_dpo/loss_margin_mean": 27.825515747070312,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7966742252456538,
"grad_norm": 4.227888584136963,
"learning_rate": 6.106260641143546e-08,
"logits/chosen": -2.652071237564087,
"logits/rejected": -2.6945571899414062,
"loss": 1.3557,
"step": 527
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4925900101661682,
"beta_dpo/beta_margin_grad_std": 0.014309762977063656,
"beta_dpo/beta_margin_mean": 0.02966652624309063,
"beta_dpo/beta_margin_std": 0.05729580298066139,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.5310070514678955,
"beta_dpo/gap_mean": 35.63081359863281,
"beta_dpo/gap_std": 59.07182312011719,
"beta_dpo/loss_margin_mean": 29.66652488708496,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7981859410430839,
"grad_norm": 3.7201552391052246,
"learning_rate": 6.019938355056422e-08,
"logits/chosen": -2.6412606239318848,
"logits/rejected": -2.6760683059692383,
"loss": 1.3618,
"step": 528
},
{
"beta_dpo/beta": 1.4114124774932861,
"beta_dpo/beta_margin_grad_mean": -0.17690998315811157,
"beta_dpo/beta_margin_grad_std": 0.3721368908882141,
"beta_dpo/beta_margin_mean": 94.305419921875,
"beta_dpo/beta_margin_std": 109.623291015625,
"beta_dpo/beta_used": 1.4114124774932861,
"beta_dpo/beta_used_raw": 1.4114124774932861,
"beta_dpo/gap_mean": 39.14246368408203,
"beta_dpo/gap_std": 59.363304138183594,
"beta_dpo/loss_margin_mean": 62.783653259277344,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.799697656840514,
"grad_norm": 3297.61962890625,
"learning_rate": 5.934146982094049e-08,
"logits/chosen": -2.6939330101013184,
"logits/rejected": -2.718538284301758,
"loss": 5.9051,
"step": 529
},
{
"beta_dpo/beta": 0.17238160967826843,
"beta_dpo/beta_margin_grad_mean": -0.22959469258785248,
"beta_dpo/beta_margin_grad_std": 0.3280448913574219,
"beta_dpo/beta_margin_mean": 8.542181968688965,
"beta_dpo/beta_margin_std": 10.939250946044922,
"beta_dpo/beta_used": 0.17238160967826843,
"beta_dpo/beta_used_raw": 0.17238160967826843,
"beta_dpo/gap_mean": 41.31953430175781,
"beta_dpo/gap_std": 59.120887756347656,
"beta_dpo/loss_margin_mean": 49.01192092895508,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8012093726379441,
"grad_norm": 402.5897521972656,
"learning_rate": 5.848888922025552e-08,
"logits/chosen": -2.6831681728363037,
"logits/rejected": -2.6938679218292236,
"loss": 1.0653,
"step": 530
},
{
"beta_dpo/beta": 0.20381604135036469,
"beta_dpo/beta_margin_grad_mean": -0.35280048847198486,
"beta_dpo/beta_margin_grad_std": 0.27263426780700684,
"beta_dpo/beta_margin_mean": 9.475677490234375,
"beta_dpo/beta_margin_std": 18.85011100769043,
"beta_dpo/beta_used": 0.20381604135036469,
"beta_dpo/beta_used_raw": 0.05179119110107422,
"beta_dpo/gap_mean": 40.65738296508789,
"beta_dpo/gap_std": 60.33680725097656,
"beta_dpo/loss_margin_mean": 37.609867095947266,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8027210884353742,
"grad_norm": 721.6512451171875,
"learning_rate": 5.7641665597021435e-08,
"logits/chosen": -2.6561365127563477,
"logits/rejected": -2.7007155418395996,
"loss": 1.0704,
"step": 531
},
{
"beta_dpo/beta": 0.5565502643585205,
"beta_dpo/beta_margin_grad_mean": -0.17941914498806,
"beta_dpo/beta_margin_grad_std": 0.3398556709289551,
"beta_dpo/beta_margin_mean": 33.13414764404297,
"beta_dpo/beta_margin_std": 49.94964599609375,
"beta_dpo/beta_used": 0.5565502643585205,
"beta_dpo/beta_used_raw": 0.5565502643585205,
"beta_dpo/gap_mean": 42.603416442871094,
"beta_dpo/gap_std": 60.625701904296875,
"beta_dpo/loss_margin_mean": 54.4295539855957,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8042328042328042,
"grad_norm": 2332.498046875,
"learning_rate": 5.679982264990424e-08,
"logits/chosen": -2.671722888946533,
"logits/rejected": -2.690765380859375,
"loss": 4.9389,
"step": 532
},
{
"beta_dpo/beta": 0.08064591884613037,
"beta_dpo/beta_margin_grad_mean": -0.3793724775314331,
"beta_dpo/beta_margin_grad_std": 0.30262112617492676,
"beta_dpo/beta_margin_mean": 2.689401865005493,
"beta_dpo/beta_margin_std": 7.671005725860596,
"beta_dpo/beta_used": 0.08064591884613037,
"beta_dpo/beta_used_raw": -0.5107542276382446,
"beta_dpo/gap_mean": 41.580352783203125,
"beta_dpo/gap_std": 59.28820037841797,
"beta_dpo/loss_margin_mean": 30.574068069458008,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8057445200302343,
"grad_norm": 319.44219970703125,
"learning_rate": 5.596338392706076e-08,
"logits/chosen": -2.6407032012939453,
"logits/rejected": -2.677487373352051,
"loss": 1.1372,
"step": 533
},
{
"beta_dpo/beta": 0.28966397047042847,
"beta_dpo/beta_margin_grad_mean": -0.26224929094314575,
"beta_dpo/beta_margin_grad_std": 0.3490891456604004,
"beta_dpo/beta_margin_mean": 15.298602104187012,
"beta_dpo/beta_margin_std": 28.032827377319336,
"beta_dpo/beta_used": 0.28966397047042847,
"beta_dpo/beta_used_raw": 0.28966397047042847,
"beta_dpo/gap_mean": 42.06398010253906,
"beta_dpo/gap_std": 60.664085388183594,
"beta_dpo/loss_margin_mean": 44.23617172241211,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8072562358276644,
"grad_norm": 1128.4276123046875,
"learning_rate": 5.513237282548033e-08,
"logits/chosen": -2.7209525108337402,
"logits/rejected": -2.7407164573669434,
"loss": 2.1069,
"step": 534
},
{
"beta_dpo/beta": 0.18617966771125793,
"beta_dpo/beta_margin_grad_mean": -0.3503064513206482,
"beta_dpo/beta_margin_grad_std": 0.2936100959777832,
"beta_dpo/beta_margin_mean": 10.054865837097168,
"beta_dpo/beta_margin_std": 21.22418785095215,
"beta_dpo/beta_used": 0.18617966771125793,
"beta_dpo/beta_used_raw": -0.012800291180610657,
"beta_dpo/gap_mean": 42.36915588378906,
"beta_dpo/gap_std": 62.56495666503906,
"beta_dpo/loss_margin_mean": 48.45128631591797,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8087679516250945,
"grad_norm": 501.5768127441406,
"learning_rate": 5.430681259032957e-08,
"logits/chosen": -2.7039687633514404,
"logits/rejected": -2.7297236919403076,
"loss": 1.6171,
"step": 535
},
{
"beta_dpo/beta": 0.16233938932418823,
"beta_dpo/beta_margin_grad_mean": -0.2145552784204483,
"beta_dpo/beta_margin_grad_std": 0.2856879234313965,
"beta_dpo/beta_margin_mean": 8.161446571350098,
"beta_dpo/beta_margin_std": 12.291303634643555,
"beta_dpo/beta_used": 0.16233938932418823,
"beta_dpo/beta_used_raw": 0.16233938932418823,
"beta_dpo/gap_mean": 44.00806427001953,
"beta_dpo/gap_std": 62.28424072265625,
"beta_dpo/loss_margin_mean": 49.89377212524414,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8102796674225246,
"grad_norm": 491.6341857910156,
"learning_rate": 5.3486726314303175e-08,
"logits/chosen": -2.6543660163879395,
"logits/rejected": -2.6955974102020264,
"loss": 1.154,
"step": 536
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4869405925273895,
"beta_dpo/beta_margin_grad_std": 0.015572650358080864,
"beta_dpo/beta_margin_mean": 0.05230266600847244,
"beta_dpo/beta_margin_std": 0.06237604841589928,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.19751229882240295,
"beta_dpo/gap_mean": 45.57786560058594,
"beta_dpo/gap_std": 62.4625244140625,
"beta_dpo/loss_margin_mean": 52.30266189575195,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8117913832199547,
"grad_norm": 3.94612455368042,
"learning_rate": 5.267213693697695e-08,
"logits/chosen": -2.6674160957336426,
"logits/rejected": -2.7232227325439453,
"loss": 1.3469,
"step": 537
},
{
"beta_dpo/beta": 0.281332790851593,
"beta_dpo/beta_margin_grad_mean": -0.1457832306623459,
"beta_dpo/beta_margin_grad_std": 0.3034435212612152,
"beta_dpo/beta_margin_mean": 15.984519958496094,
"beta_dpo/beta_margin_std": 18.725618362426758,
"beta_dpo/beta_used": 0.281332790851593,
"beta_dpo/beta_used_raw": 0.281332790851593,
"beta_dpo/gap_mean": 46.69129180908203,
"beta_dpo/gap_std": 62.50321960449219,
"beta_dpo/loss_margin_mean": 54.41521453857422,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8133030990173847,
"grad_norm": 404.3365783691406,
"learning_rate": 5.1863067244167144e-08,
"logits/chosen": -2.685150623321533,
"logits/rejected": -2.69342041015625,
"loss": 1.124,
"step": 538
},
{
"beta_dpo/beta": 0.003658185014501214,
"beta_dpo/beta_margin_grad_mean": -0.4645317792892456,
"beta_dpo/beta_margin_grad_std": 0.0686301663517952,
"beta_dpo/beta_margin_mean": 0.14727774262428284,
"beta_dpo/beta_margin_std": 0.28779175877571106,
"beta_dpo/beta_used": 0.003658185014501214,
"beta_dpo/beta_used_raw": -0.4632405936717987,
"beta_dpo/gap_mean": 46.15460205078125,
"beta_dpo/gap_std": 61.67210388183594,
"beta_dpo/loss_margin_mean": 36.68147659301758,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8148148148148148,
"grad_norm": 14.169596672058105,
"learning_rate": 5.105953986729195e-08,
"logits/chosen": -2.695448398590088,
"logits/rejected": -2.7702274322509766,
"loss": 1.2546,
"step": 539
},
{
"beta_dpo/beta": 0.3899621069431305,
"beta_dpo/beta_margin_grad_mean": -0.3473030626773834,
"beta_dpo/beta_margin_grad_std": 0.3095802962779999,
"beta_dpo/beta_margin_mean": 23.450132369995117,
"beta_dpo/beta_margin_std": 43.04521560668945,
"beta_dpo/beta_used": 0.3899621069431305,
"beta_dpo/beta_used_raw": 0.24834245443344116,
"beta_dpo/gap_mean": 45.328651428222656,
"beta_dpo/gap_std": 61.643280029296875,
"beta_dpo/loss_margin_mean": 48.5355224609375,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8163265306122449,
"grad_norm": 1687.021240234375,
"learning_rate": 5.026157728273966e-08,
"logits/chosen": -2.661731243133545,
"logits/rejected": -2.721250534057617,
"loss": 5.0481,
"step": 540
},
{
"beta_dpo/beta": 0.4383842349052429,
"beta_dpo/beta_margin_grad_mean": -0.3423454463481903,
"beta_dpo/beta_margin_grad_std": 0.3038384020328522,
"beta_dpo/beta_margin_mean": 20.71125602722168,
"beta_dpo/beta_margin_std": 44.29029083251953,
"beta_dpo/beta_used": 0.4383842349052429,
"beta_dpo/beta_used_raw": 0.11720219254493713,
"beta_dpo/gap_mean": 45.792572021484375,
"beta_dpo/gap_std": 62.074562072753906,
"beta_dpo/loss_margin_mean": 42.424739837646484,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.817838246409675,
"grad_norm": 1500.9615478515625,
"learning_rate": 4.9469201811239035e-08,
"logits/chosen": -2.6188478469848633,
"logits/rejected": -2.602254867553711,
"loss": 1.5266,
"step": 541
},
{
"beta_dpo/beta": 0.6556390523910522,
"beta_dpo/beta_margin_grad_mean": -0.20644766092300415,
"beta_dpo/beta_margin_grad_std": 0.38132748007774353,
"beta_dpo/beta_margin_mean": 31.148107528686523,
"beta_dpo/beta_margin_std": 52.01056671142578,
"beta_dpo/beta_used": 0.6556390523910522,
"beta_dpo/beta_used_raw": 0.6556390523910522,
"beta_dpo/gap_mean": 46.190338134765625,
"beta_dpo/gap_std": 62.900184631347656,
"beta_dpo/loss_margin_mean": 49.45319747924805,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8193499622071051,
"grad_norm": 1859.2935791015625,
"learning_rate": 4.868243561723534e-08,
"logits/chosen": -2.661344528198242,
"logits/rejected": -2.68660569190979,
"loss": 2.7462,
"step": 542
},
{
"beta_dpo/beta": 0.4987204074859619,
"beta_dpo/beta_margin_grad_mean": -0.14901922643184662,
"beta_dpo/beta_margin_grad_std": 0.2977834939956665,
"beta_dpo/beta_margin_mean": 30.138824462890625,
"beta_dpo/beta_margin_std": 42.48214340209961,
"beta_dpo/beta_used": 0.4987204074859619,
"beta_dpo/beta_used_raw": 0.4987204074859619,
"beta_dpo/gap_mean": 46.941497802734375,
"beta_dpo/gap_std": 62.01499938964844,
"beta_dpo/loss_margin_mean": 54.9544677734375,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8208616780045351,
"grad_norm": 863.5515747070312,
"learning_rate": 4.790130070827028e-08,
"logits/chosen": -2.646941661834717,
"logits/rejected": -2.7055516242980957,
"loss": 1.5678,
"step": 543
},
{
"beta_dpo/beta": 0.1872519850730896,
"beta_dpo/beta_margin_grad_mean": -0.31010645627975464,
"beta_dpo/beta_margin_grad_std": 0.27586114406585693,
"beta_dpo/beta_margin_mean": 10.743023872375488,
"beta_dpo/beta_margin_std": 18.55499839782715,
"beta_dpo/beta_used": 0.1872519850730896,
"beta_dpo/beta_used_raw": 0.021043449640274048,
"beta_dpo/gap_mean": 48.91101837158203,
"beta_dpo/gap_std": 61.6202392578125,
"beta_dpo/loss_margin_mean": 55.6162109375,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8223733938019653,
"grad_norm": 474.7829895019531,
"learning_rate": 4.7125818934366454e-08,
"logits/chosen": -2.670353889465332,
"logits/rejected": -2.7138702869415283,
"loss": 0.8989,
"step": 544
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.48910388350486755,
"beta_dpo/beta_margin_grad_std": 0.013874729163944721,
"beta_dpo/beta_margin_mean": 0.043630167841911316,
"beta_dpo/beta_margin_std": 0.05556848645210266,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.1730966717004776,
"beta_dpo/gap_mean": 48.88160705566406,
"beta_dpo/gap_std": 60.836570739746094,
"beta_dpo/loss_margin_mean": 43.630165100097656,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8238851095993953,
"grad_norm": 3.9678640365600586,
"learning_rate": 4.635601198741607e-08,
"logits/chosen": -2.6898350715637207,
"logits/rejected": -2.7201786041259766,
"loss": 1.3431,
"step": 545
},
{
"beta_dpo/beta": 0.3082360625267029,
"beta_dpo/beta_margin_grad_mean": -0.3328179121017456,
"beta_dpo/beta_margin_grad_std": 0.2885495126247406,
"beta_dpo/beta_margin_mean": 15.173134803771973,
"beta_dpo/beta_margin_std": 26.40501594543457,
"beta_dpo/beta_used": 0.3082360625267029,
"beta_dpo/beta_used_raw": 0.026352345943450928,
"beta_dpo/gap_mean": 47.82192611694336,
"beta_dpo/gap_std": 59.33293914794922,
"beta_dpo/loss_margin_mean": 44.03695297241211,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8253968253968254,
"grad_norm": 988.4298706054688,
"learning_rate": 4.559190140057428e-08,
"logits/chosen": -2.604769706726074,
"logits/rejected": -2.61057710647583,
"loss": 1.3104,
"step": 546
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.48737552762031555,
"beta_dpo/beta_margin_grad_std": 0.015205537900328636,
"beta_dpo/beta_margin_mean": 0.05055927485227585,
"beta_dpo/beta_margin_std": 0.060925960540771484,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.32710736989974976,
"beta_dpo/gap_mean": 48.19280242919922,
"beta_dpo/gap_std": 59.7764892578125,
"beta_dpo/loss_margin_mean": 50.55927276611328,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8269085411942555,
"grad_norm": 3.9216151237487793,
"learning_rate": 4.483350854765672e-08,
"logits/chosen": -2.702695608139038,
"logits/rejected": -2.7354962825775146,
"loss": 1.3463,
"step": 547
},
{
"beta_dpo/beta": 0.08089688420295715,
"beta_dpo/beta_margin_grad_mean": -0.34109532833099365,
"beta_dpo/beta_margin_grad_std": 0.26831504702568054,
"beta_dpo/beta_margin_mean": 3.4130594730377197,
"beta_dpo/beta_margin_std": 7.480922222137451,
"beta_dpo/beta_used": 0.08089688420295715,
"beta_dpo/beta_used_raw": -0.3346138894557953,
"beta_dpo/gap_mean": 45.551795959472656,
"beta_dpo/gap_std": 58.918609619140625,
"beta_dpo/loss_margin_mean": 34.3262825012207,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8284202569916855,
"grad_norm": 117.26342010498047,
"learning_rate": 4.4080854642541826e-08,
"logits/chosen": -2.7186174392700195,
"logits/rejected": -2.7511534690856934,
"loss": 0.9831,
"step": 548
},
{
"beta_dpo/beta": 0.16079770028591156,
"beta_dpo/beta_margin_grad_mean": -0.3676168918609619,
"beta_dpo/beta_margin_grad_std": 0.2950216233730316,
"beta_dpo/beta_margin_mean": 6.911593914031982,
"beta_dpo/beta_margin_std": 16.279003143310547,
"beta_dpo/beta_used": 0.16079770028591156,
"beta_dpo/beta_used_raw": -0.14775623381137848,
"beta_dpo/gap_mean": 43.952392578125,
"beta_dpo/gap_std": 58.409446716308594,
"beta_dpo/loss_margin_mean": 37.30677795410156,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8299319727891157,
"grad_norm": 667.4703369140625,
"learning_rate": 4.333396073857723e-08,
"logits/chosen": -2.665083169937134,
"logits/rejected": -2.7149558067321777,
"loss": 1.735,
"step": 549
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4928406774997711,
"beta_dpo/beta_margin_grad_std": 0.01840727962553501,
"beta_dpo/beta_margin_mean": 0.028669806197285652,
"beta_dpo/beta_margin_std": 0.0737244263291359,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.4056813418865204,
"beta_dpo/gap_mean": 42.487064361572266,
"beta_dpo/gap_std": 60.82746887207031,
"beta_dpo/loss_margin_mean": 28.6698055267334,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8314436885865457,
"grad_norm": 4.606810092926025,
"learning_rate": 4.259284772799099e-08,
"logits/chosen": -2.658696413040161,
"logits/rejected": -2.670809507369995,
"loss": 1.3534,
"step": 550
},
{
"beta_dpo/beta": 0.22617414593696594,
"beta_dpo/beta_margin_grad_mean": -0.3524710536003113,
"beta_dpo/beta_margin_grad_std": 0.30751875042915344,
"beta_dpo/beta_margin_mean": 8.762946128845215,
"beta_dpo/beta_margin_std": 21.531352996826172,
"beta_dpo/beta_used": 0.22617414593696594,
"beta_dpo/beta_used_raw": 0.04135200381278992,
"beta_dpo/gap_mean": 40.189510345458984,
"beta_dpo/gap_std": 60.546417236328125,
"beta_dpo/loss_margin_mean": 36.95454025268555,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8329554043839759,
"grad_norm": 493.1956787109375,
"learning_rate": 4.1857536341307176e-08,
"logits/chosen": -2.6752843856811523,
"logits/rejected": -2.7005228996276855,
"loss": 1.6817,
"step": 551
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49255049228668213,
"beta_dpo/beta_margin_grad_std": 0.014759018085896969,
"beta_dpo/beta_margin_mean": 0.029821420088410378,
"beta_dpo/beta_margin_std": 0.05910492688417435,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.3311285376548767,
"beta_dpo/gap_mean": 38.61370849609375,
"beta_dpo/gap_std": 60.83926773071289,
"beta_dpo/loss_margin_mean": 29.82141876220703,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8344671201814059,
"grad_norm": 3.8606088161468506,
"learning_rate": 4.112804714676593e-08,
"logits/chosen": -2.6869289875030518,
"logits/rejected": -2.716911792755127,
"loss": 1.3556,
"step": 552
},
{
"beta_dpo/beta": 0.3258926272392273,
"beta_dpo/beta_margin_grad_mean": -0.23501437902450562,
"beta_dpo/beta_margin_grad_std": 0.3916976749897003,
"beta_dpo/beta_margin_mean": 16.936145782470703,
"beta_dpo/beta_margin_std": 22.796802520751953,
"beta_dpo/beta_used": 0.3258926272392273,
"beta_dpo/beta_used_raw": 0.3258926272392273,
"beta_dpo/gap_mean": 40.338661193847656,
"beta_dpo/gap_std": 60.826988220214844,
"beta_dpo/loss_margin_mean": 52.69847106933594,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8359788359788359,
"grad_norm": 1291.6273193359375,
"learning_rate": 4.0404400549748144e-08,
"logits/chosen": -2.6371967792510986,
"logits/rejected": -2.6834166049957275,
"loss": 2.2743,
"step": 553
},
{
"beta_dpo/beta": 0.15449394285678864,
"beta_dpo/beta_margin_grad_mean": -0.3572808802127838,
"beta_dpo/beta_margin_grad_std": 0.28501877188682556,
"beta_dpo/beta_margin_mean": 6.621963977813721,
"beta_dpo/beta_margin_std": 14.800515174865723,
"beta_dpo/beta_used": 0.15449394285678864,
"beta_dpo/beta_used_raw": -0.3769558072090149,
"beta_dpo/gap_mean": 41.05055236816406,
"beta_dpo/gap_std": 60.93577575683594,
"beta_dpo/loss_margin_mean": 41.461669921875,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8374905517762661,
"grad_norm": 770.6651611328125,
"learning_rate": 3.968661679220467e-08,
"logits/chosen": -2.7024216651916504,
"logits/rejected": -2.714076042175293,
"loss": 1.4889,
"step": 554
},
{
"beta_dpo/beta": 0.7318522334098816,
"beta_dpo/beta_margin_grad_mean": -0.34280702471733093,
"beta_dpo/beta_margin_grad_std": 0.30473846197128296,
"beta_dpo/beta_margin_mean": 42.75510025024414,
"beta_dpo/beta_margin_std": 77.02257537841797,
"beta_dpo/beta_used": 0.7318522334098816,
"beta_dpo/beta_used_raw": 0.5461255311965942,
"beta_dpo/gap_mean": 40.325279235839844,
"beta_dpo/gap_std": 61.363433837890625,
"beta_dpo/loss_margin_mean": 41.5892333984375,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8390022675736961,
"grad_norm": 1357.3326416015625,
"learning_rate": 3.89747159520904e-08,
"logits/chosen": -2.640911102294922,
"logits/rejected": -2.6422338485717773,
"loss": 2.0458,
"step": 555
},
{
"beta_dpo/beta": 0.12523804605007172,
"beta_dpo/beta_margin_grad_mean": -0.34922054409980774,
"beta_dpo/beta_margin_grad_std": 0.2761671245098114,
"beta_dpo/beta_margin_mean": 5.768284797668457,
"beta_dpo/beta_margin_std": 11.772993087768555,
"beta_dpo/beta_used": 0.12523804605007172,
"beta_dpo/beta_used_raw": -0.3097414970397949,
"beta_dpo/gap_mean": 41.716514587402344,
"beta_dpo/gap_std": 61.88613510131836,
"beta_dpo/loss_margin_mean": 43.49522018432617,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8405139833711263,
"grad_norm": 824.6611328125,
"learning_rate": 3.826871794280192e-08,
"logits/chosen": -2.606398820877075,
"logits/rejected": -2.626181125640869,
"loss": 2.4349,
"step": 556
},
{
"beta_dpo/beta": 0.475053071975708,
"beta_dpo/beta_margin_grad_mean": -0.338980108499527,
"beta_dpo/beta_margin_grad_std": 0.3049916923046112,
"beta_dpo/beta_margin_mean": 31.90798568725586,
"beta_dpo/beta_margin_std": 49.599910736083984,
"beta_dpo/beta_used": 0.475053071975708,
"beta_dpo/beta_used_raw": 0.0741761326789856,
"beta_dpo/gap_mean": 43.92063903808594,
"beta_dpo/gap_std": 61.88815689086914,
"beta_dpo/loss_margin_mean": 52.65357971191406,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8420256991685563,
"grad_norm": 2362.4228515625,
"learning_rate": 3.756864251262143e-08,
"logits/chosen": -2.6281399726867676,
"logits/rejected": -2.681450366973877,
"loss": 1.4266,
"step": 557
},
{
"beta_dpo/beta": 0.06146103888750076,
"beta_dpo/beta_margin_grad_mean": -0.3510986864566803,
"beta_dpo/beta_margin_grad_std": 0.2637402415275574,
"beta_dpo/beta_margin_mean": 3.4399425983428955,
"beta_dpo/beta_margin_std": 6.441949844360352,
"beta_dpo/beta_used": 0.06146103888750076,
"beta_dpo/beta_used_raw": -0.19650453329086304,
"beta_dpo/gap_mean": 44.777374267578125,
"beta_dpo/gap_std": 61.062469482421875,
"beta_dpo/loss_margin_mean": 52.67274856567383,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8435374149659864,
"grad_norm": 230.94940185546875,
"learning_rate": 3.687450924416341e-08,
"logits/chosen": -2.6728081703186035,
"logits/rejected": -2.6919565200805664,
"loss": 1.0793,
"step": 558
},
{
"beta_dpo/beta": 0.38971662521362305,
"beta_dpo/beta_margin_grad_mean": -0.3183648884296417,
"beta_dpo/beta_margin_grad_std": 0.29581478238105774,
"beta_dpo/beta_margin_mean": 23.02225685119629,
"beta_dpo/beta_margin_std": 37.03925323486328,
"beta_dpo/beta_used": 0.38971662521362305,
"beta_dpo/beta_used_raw": 0.26015961170196533,
"beta_dpo/gap_mean": 46.305084228515625,
"beta_dpo/gap_std": 61.043853759765625,
"beta_dpo/loss_margin_mean": 48.54761505126953,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8450491307634165,
"grad_norm": 868.8684692382812,
"learning_rate": 3.6186337553827743e-08,
"logits/chosen": -2.674811363220215,
"logits/rejected": -2.719515323638916,
"loss": 2.4704,
"step": 559
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4892803728580475,
"beta_dpo/beta_margin_grad_std": 0.015276779420673847,
"beta_dpo/beta_margin_mean": 0.042931199073791504,
"beta_dpo/beta_margin_std": 0.06118900701403618,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.5526399612426758,
"beta_dpo/gap_mean": 45.3603401184082,
"beta_dpo/gap_std": 61.738525390625,
"beta_dpo/loss_margin_mean": 42.93119812011719,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8465608465608465,
"grad_norm": 3.9332680702209473,
"learning_rate": 3.550414669125573e-08,
"logits/chosen": -2.687546730041504,
"logits/rejected": -2.7129406929016113,
"loss": 1.3528,
"step": 560
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4891974925994873,
"beta_dpo/beta_margin_grad_std": 0.015051293186843395,
"beta_dpo/beta_margin_mean": 0.043262675404548645,
"beta_dpo/beta_margin_std": 0.06029163673520088,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.47285085916519165,
"beta_dpo/gap_mean": 45.07012939453125,
"beta_dpo/gap_std": 61.351409912109375,
"beta_dpo/loss_margin_mean": 43.26267623901367,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8480725623582767,
"grad_norm": 5.872406005859375,
"learning_rate": 3.482795573879241e-08,
"logits/chosen": -2.710063934326172,
"logits/rejected": -2.717824935913086,
"loss": 1.3517,
"step": 561
},
{
"beta_dpo/beta": 0.09827530384063721,
"beta_dpo/beta_margin_grad_mean": -0.3188078701496124,
"beta_dpo/beta_margin_grad_std": 0.24310383200645447,
"beta_dpo/beta_margin_mean": 5.576371192932129,
"beta_dpo/beta_margin_std": 9.145209312438965,
"beta_dpo/beta_used": 0.09827530384063721,
"beta_dpo/beta_used_raw": -0.6985861659049988,
"beta_dpo/gap_mean": 45.44583511352539,
"beta_dpo/gap_std": 60.678016662597656,
"beta_dpo/loss_margin_mean": 44.74995040893555,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8495842781557067,
"grad_norm": 384.5655517578125,
"learning_rate": 3.415778361095226e-08,
"logits/chosen": -2.6769933700561523,
"logits/rejected": -2.707970380783081,
"loss": 1.037,
"step": 562
},
{
"beta_dpo/beta": 0.5065726637840271,
"beta_dpo/beta_margin_grad_mean": -0.19759216904640198,
"beta_dpo/beta_margin_grad_std": 0.3802008330821991,
"beta_dpo/beta_margin_mean": 26.610563278198242,
"beta_dpo/beta_margin_std": 34.39125442504883,
"beta_dpo/beta_used": 0.5065726637840271,
"beta_dpo/beta_used_raw": 0.5065726637840271,
"beta_dpo/gap_mean": 45.60718536376953,
"beta_dpo/gap_std": 61.100364685058594,
"beta_dpo/loss_margin_mean": 51.522247314453125,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8510959939531368,
"grad_norm": 1682.5528564453125,
"learning_rate": 3.349364905389032e-08,
"logits/chosen": -2.6109561920166016,
"logits/rejected": -2.63857364654541,
"loss": 2.5953,
"step": 563
},
{
"beta_dpo/beta": 0.33434048295021057,
"beta_dpo/beta_margin_grad_mean": -0.3251274526119232,
"beta_dpo/beta_margin_grad_std": 0.29672136902809143,
"beta_dpo/beta_margin_mean": 17.791189193725586,
"beta_dpo/beta_margin_std": 37.987796783447266,
"beta_dpo/beta_used": 0.33434048295021057,
"beta_dpo/beta_used_raw": 0.3103909194469452,
"beta_dpo/gap_mean": 46.764015197753906,
"beta_dpo/gap_std": 61.399383544921875,
"beta_dpo/loss_margin_mean": 51.34377670288086,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8526077097505669,
"grad_norm": 323.5574951171875,
"learning_rate": 3.283557064487785e-08,
"logits/chosen": -2.7021117210388184,
"logits/rejected": -2.728177547454834,
"loss": 0.9074,
"step": 564
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4917549788951874,
"beta_dpo/beta_margin_grad_std": 0.014812292531132698,
"beta_dpo/beta_margin_mean": 0.03301194682717323,
"beta_dpo/beta_margin_std": 0.05931118503212929,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.7162899971008301,
"beta_dpo/gap_mean": 44.52302932739258,
"beta_dpo/gap_std": 61.823211669921875,
"beta_dpo/loss_margin_mean": 33.01194381713867,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.854119425547997,
"grad_norm": 3.8082151412963867,
"learning_rate": 3.218356679178252e-08,
"logits/chosen": -2.66007137298584,
"logits/rejected": -2.680148124694824,
"loss": 1.3563,
"step": 565
},
{
"beta_dpo/beta": 0.6846121549606323,
"beta_dpo/beta_margin_grad_mean": -0.2204248458147049,
"beta_dpo/beta_margin_grad_std": 0.3947845995426178,
"beta_dpo/beta_margin_mean": 41.83022689819336,
"beta_dpo/beta_margin_std": 55.214500427246094,
"beta_dpo/beta_used": 0.6846121549606323,
"beta_dpo/beta_used_raw": 0.6846121549606323,
"beta_dpo/gap_mean": 45.49637985229492,
"beta_dpo/gap_std": 61.635169982910156,
"beta_dpo/loss_margin_mean": 55.270782470703125,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8556311413454271,
"grad_norm": 3381.755126953125,
"learning_rate": 3.1537655732553764e-08,
"logits/chosen": -2.678783655166626,
"logits/rejected": -2.68731951713562,
"loss": 3.5014,
"step": 566
},
{
"beta_dpo/beta": 0.11898145079612732,
"beta_dpo/beta_margin_grad_mean": -0.2830747365951538,
"beta_dpo/beta_margin_grad_std": 0.2540491223335266,
"beta_dpo/beta_margin_mean": 7.840543270111084,
"beta_dpo/beta_margin_std": 11.777994155883789,
"beta_dpo/beta_used": 0.11898145079612732,
"beta_dpo/beta_used_raw": -0.3021107614040375,
"beta_dpo/gap_mean": 47.88221740722656,
"beta_dpo/gap_std": 60.788387298583984,
"beta_dpo/loss_margin_mean": 49.669471740722656,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8571428571428571,
"grad_norm": 287.8054504394531,
"learning_rate": 3.089785553471233e-08,
"logits/chosen": -2.6855921745300293,
"logits/rejected": -2.72359037399292,
"loss": 0.9714,
"step": 567
},
{
"beta_dpo/beta": 0.1126384437084198,
"beta_dpo/beta_margin_grad_mean": -0.33147335052490234,
"beta_dpo/beta_margin_grad_std": 0.2764538824558258,
"beta_dpo/beta_margin_mean": 6.823496341705322,
"beta_dpo/beta_margin_std": 11.87881088256836,
"beta_dpo/beta_used": 0.1126384437084198,
"beta_dpo/beta_used_raw": -0.024167485535144806,
"beta_dpo/gap_mean": 46.8142204284668,
"beta_dpo/gap_std": 59.064796447753906,
"beta_dpo/loss_margin_mean": 48.74604415893555,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8586545729402872,
"grad_norm": 370.638427734375,
"learning_rate": 3.026418409484513e-08,
"logits/chosen": -2.6945395469665527,
"logits/rejected": -2.7361483573913574,
"loss": 1.1515,
"step": 568
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49009791016578674,
"beta_dpo/beta_margin_grad_std": 0.014846453443169594,
"beta_dpo/beta_margin_mean": 0.03965507820248604,
"beta_dpo/beta_margin_std": 0.05946631357073784,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.4391339421272278,
"beta_dpo/gap_mean": 46.97361755371094,
"beta_dpo/gap_std": 59.10280227661133,
"beta_dpo/loss_margin_mean": 39.65507507324219,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8601662887377173,
"grad_norm": 4.464386940002441,
"learning_rate": 2.963665913810451e-08,
"logits/chosen": -2.668788433074951,
"logits/rejected": -2.679075241088867,
"loss": 1.3493,
"step": 569
},
{
"beta_dpo/beta": 0.9029349684715271,
"beta_dpo/beta_margin_grad_mean": -0.3065827488899231,
"beta_dpo/beta_margin_grad_std": 0.29563117027282715,
"beta_dpo/beta_margin_mean": 70.92729187011719,
"beta_dpo/beta_margin_std": 114.28070068359375,
"beta_dpo/beta_used": 0.9029349684715271,
"beta_dpo/beta_used_raw": 0.6830695271492004,
"beta_dpo/gap_mean": 47.96229934692383,
"beta_dpo/gap_std": 59.62773895263672,
"beta_dpo/loss_margin_mean": 64.41072845458984,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8616780045351474,
"grad_norm": 781.4393920898438,
"learning_rate": 2.9015298217712453e-08,
"logits/chosen": -2.6695735454559326,
"logits/rejected": -2.6966354846954346,
"loss": 1.4145,
"step": 570
},
{
"beta_dpo/beta": 0.14786839485168457,
"beta_dpo/beta_margin_grad_mean": -0.35911333560943604,
"beta_dpo/beta_margin_grad_std": 0.2933298647403717,
"beta_dpo/beta_margin_mean": 6.836848258972168,
"beta_dpo/beta_margin_std": 13.719381332397461,
"beta_dpo/beta_used": 0.14786839485168457,
"beta_dpo/beta_used_raw": -0.18180185556411743,
"beta_dpo/gap_mean": 47.217105865478516,
"beta_dpo/gap_std": 60.4058723449707,
"beta_dpo/loss_margin_mean": 36.39863967895508,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8631897203325775,
"grad_norm": 588.5823974609375,
"learning_rate": 2.840011871446962e-08,
"logits/chosen": -2.660149097442627,
"logits/rejected": -2.665757417678833,
"loss": 1.6662,
"step": 571
},
{
"beta_dpo/beta": 0.3506244122982025,
"beta_dpo/beta_margin_grad_mean": -0.34980422258377075,
"beta_dpo/beta_margin_grad_std": 0.30930382013320923,
"beta_dpo/beta_margin_mean": 16.844135284423828,
"beta_dpo/beta_margin_std": 33.19049835205078,
"beta_dpo/beta_used": 0.3506244122982025,
"beta_dpo/beta_used_raw": 0.3069079518318176,
"beta_dpo/gap_mean": 46.96336364746094,
"beta_dpo/gap_std": 59.50727844238281,
"beta_dpo/loss_margin_mean": 44.716285705566406,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8647014361300076,
"grad_norm": 698.576171875,
"learning_rate": 2.7791137836269158e-08,
"logits/chosen": -2.72560977935791,
"logits/rejected": -2.6912665367126465,
"loss": 1.6549,
"step": 572
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.48835763335227966,
"beta_dpo/beta_margin_grad_std": 0.017284568399190903,
"beta_dpo/beta_margin_mean": 0.046639442443847656,
"beta_dpo/beta_margin_std": 0.0692763701081276,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.31975650787353516,
"beta_dpo/gap_mean": 46.835479736328125,
"beta_dpo/gap_std": 60.77733612060547,
"beta_dpo/loss_margin_mean": 46.63943862915039,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8662131519274376,
"grad_norm": 4.514516830444336,
"learning_rate": 2.718837261761528e-08,
"logits/chosen": -2.689098834991455,
"logits/rejected": -2.7141900062561035,
"loss": 1.3473,
"step": 573
},
{
"beta_dpo/beta": 0.5320430994033813,
"beta_dpo/beta_margin_grad_mean": -0.2053694725036621,
"beta_dpo/beta_margin_grad_std": 0.3366325795650482,
"beta_dpo/beta_margin_mean": 33.57503890991211,
"beta_dpo/beta_margin_std": 48.08414840698242,
"beta_dpo/beta_used": 0.5320430994033813,
"beta_dpo/beta_used_raw": 0.5320430994033813,
"beta_dpo/gap_mean": 46.90580749511719,
"beta_dpo/gap_std": 60.166412353515625,
"beta_dpo/loss_margin_mean": 53.00767517089844,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8677248677248677,
"grad_norm": 1298.696044921875,
"learning_rate": 2.659183991914696e-08,
"logits/chosen": -2.641983985900879,
"logits/rejected": -2.6648993492126465,
"loss": 1.0964,
"step": 574
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4908713698387146,
"beta_dpo/beta_margin_grad_std": 0.01577117294073105,
"beta_dpo/beta_margin_mean": 0.03656105324625969,
"beta_dpo/beta_margin_std": 0.06318365782499313,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.8852093815803528,
"beta_dpo/gap_mean": 46.991451263427734,
"beta_dpo/gap_std": 60.7982292175293,
"beta_dpo/loss_margin_mean": 36.56105041503906,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8692365835222978,
"grad_norm": 4.405168056488037,
"learning_rate": 2.600155642716606e-08,
"logits/chosen": -2.6793715953826904,
"logits/rejected": -2.735421895980835,
"loss": 1.3564,
"step": 575
},
{
"beta_dpo/beta": 0.49797600507736206,
"beta_dpo/beta_margin_grad_mean": -0.30947670340538025,
"beta_dpo/beta_margin_grad_std": 0.2893446385860443,
"beta_dpo/beta_margin_mean": 30.291072845458984,
"beta_dpo/beta_margin_std": 55.82719421386719,
"beta_dpo/beta_used": 0.49797600507736206,
"beta_dpo/beta_used_raw": 0.11030000448226929,
"beta_dpo/gap_mean": 47.00640869140625,
"beta_dpo/gap_std": 61.392356872558594,
"beta_dpo/loss_margin_mean": 52.25484085083008,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8707482993197279,
"grad_norm": 710.93017578125,
"learning_rate": 2.5417538653170754e-08,
"logits/chosen": -2.652390480041504,
"logits/rejected": -2.703554153442383,
"loss": 0.9258,
"step": 576
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49140772223472595,
"beta_dpo/beta_margin_grad_std": 0.015221024863421917,
"beta_dpo/beta_margin_mean": 0.03440757095813751,
"beta_dpo/beta_margin_std": 0.060963064432144165,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.5801578164100647,
"beta_dpo/gap_mean": 44.7833137512207,
"beta_dpo/gap_std": 61.34014892578125,
"beta_dpo/loss_margin_mean": 34.407569885253906,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.872260015117158,
"grad_norm": 3.743579149246216,
"learning_rate": 2.4839802933393607e-08,
"logits/chosen": -2.6809592247009277,
"logits/rejected": -2.6941497325897217,
"loss": 1.3538,
"step": 577
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.490521103143692,
"beta_dpo/beta_margin_grad_std": 0.013443054631352425,
"beta_dpo/beta_margin_mean": 0.03795962780714035,
"beta_dpo/beta_margin_std": 0.05385306850075722,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.27034834027290344,
"beta_dpo/gap_mean": 43.665016174316406,
"beta_dpo/gap_std": 60.30692672729492,
"beta_dpo/loss_margin_mean": 37.959625244140625,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.873771730914588,
"grad_norm": 3.6446995735168457,
"learning_rate": 2.4268365428344733e-08,
"logits/chosen": -2.629558801651001,
"logits/rejected": -2.6443064212799072,
"loss": 1.3497,
"step": 578
},
{
"beta_dpo/beta": 0.4658081829547882,
"beta_dpo/beta_margin_grad_mean": -0.3299787640571594,
"beta_dpo/beta_margin_grad_std": 0.29749563336372375,
"beta_dpo/beta_margin_mean": 27.888103485107422,
"beta_dpo/beta_margin_std": 50.06583023071289,
"beta_dpo/beta_used": 0.4658081829547882,
"beta_dpo/beta_used_raw": 0.3958699703216553,
"beta_dpo/gap_mean": 45.86872100830078,
"beta_dpo/gap_std": 60.440391540527344,
"beta_dpo/loss_margin_mean": 60.93756866455078,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8752834467120182,
"grad_norm": 744.2311401367188,
"learning_rate": 2.3703242122359357e-08,
"logits/chosen": -2.675605297088623,
"logits/rejected": -2.6713008880615234,
"loss": 1.8259,
"step": 579
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49142566323280334,
"beta_dpo/beta_margin_grad_std": 0.015153970569372177,
"beta_dpo/beta_margin_mean": 0.034334223717451096,
"beta_dpo/beta_margin_std": 0.06069787219166756,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.7968156337738037,
"beta_dpo/gap_mean": 44.41089630126953,
"beta_dpo/gap_std": 60.85872268676758,
"beta_dpo/loss_margin_mean": 34.33422088623047,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8767951625094482,
"grad_norm": 3.7005584239959717,
"learning_rate": 2.3144448823151392e-08,
"logits/chosen": -2.689199447631836,
"logits/rejected": -2.719898223876953,
"loss": 1.3575,
"step": 580
},
{
"beta_dpo/beta": 0.7862246036529541,
"beta_dpo/beta_margin_grad_mean": -0.16341674327850342,
"beta_dpo/beta_margin_grad_std": 0.3616049289703369,
"beta_dpo/beta_margin_mean": 39.6046028137207,
"beta_dpo/beta_margin_std": 50.12466049194336,
"beta_dpo/beta_used": 0.7862246036529541,
"beta_dpo/beta_used_raw": 0.7862246036529541,
"beta_dpo/gap_mean": 45.356910705566406,
"beta_dpo/gap_std": 60.91173553466797,
"beta_dpo/loss_margin_mean": 49.52141571044922,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8783068783068783,
"grad_norm": 2152.57861328125,
"learning_rate": 2.259200116137039e-08,
"logits/chosen": -2.6786694526672363,
"logits/rejected": -2.711623430252075,
"loss": 4.1808,
"step": 581
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.48708194494247437,
"beta_dpo/beta_margin_grad_std": 0.016725635156035423,
"beta_dpo/beta_margin_mean": 0.05174446851015091,
"beta_dpo/beta_margin_std": 0.0670098289847374,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.32603561878204346,
"beta_dpo/gap_mean": 46.38182067871094,
"beta_dpo/gap_std": 61.84091567993164,
"beta_dpo/loss_margin_mean": 51.74446487426758,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8798185941043084,
"grad_norm": 4.562225341796875,
"learning_rate": 2.204591459016525e-08,
"logits/chosen": -2.6497602462768555,
"logits/rejected": -2.6249165534973145,
"loss": 1.3481,
"step": 582
},
{
"beta_dpo/beta": 0.026739204302430153,
"beta_dpo/beta_margin_grad_mean": -0.38849934935569763,
"beta_dpo/beta_margin_grad_std": 0.24935057759284973,
"beta_dpo/beta_margin_mean": 1.3460389375686646,
"beta_dpo/beta_margin_std": 2.851071357727051,
"beta_dpo/beta_used": 0.026739204302430153,
"beta_dpo/beta_used_raw": -0.28397732973098755,
"beta_dpo/gap_mean": 46.57801055908203,
"beta_dpo/gap_std": 63.39801025390625,
"beta_dpo/loss_margin_mean": 45.611568450927734,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8813303099017384,
"grad_norm": 87.90982055664062,
"learning_rate": 2.1506204384751064e-08,
"logits/chosen": -2.648636817932129,
"logits/rejected": -2.7286105155944824,
"loss": 1.0609,
"step": 583
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4901319742202759,
"beta_dpo/beta_margin_grad_std": 0.014690570533275604,
"beta_dpo/beta_margin_mean": 0.03951896354556084,
"beta_dpo/beta_margin_std": 0.05884556472301483,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.20548459887504578,
"beta_dpo/gap_mean": 45.45520782470703,
"beta_dpo/gap_std": 63.303741455078125,
"beta_dpo/loss_margin_mean": 39.518959045410156,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8828420256991686,
"grad_norm": 4.558004379272461,
"learning_rate": 2.09728856419826e-08,
"logits/chosen": -2.587111711502075,
"logits/rejected": -2.6333353519439697,
"loss": 1.3471,
"step": 584
},
{
"beta_dpo/beta": 0.43229246139526367,
"beta_dpo/beta_margin_grad_mean": -0.34557801485061646,
"beta_dpo/beta_margin_grad_std": 0.30445998907089233,
"beta_dpo/beta_margin_mean": 21.776687622070312,
"beta_dpo/beta_margin_std": 50.94138717651367,
"beta_dpo/beta_used": 0.43229246139526367,
"beta_dpo/beta_used_raw": 0.049025118350982666,
"beta_dpo/gap_mean": 44.17786407470703,
"beta_dpo/gap_std": 62.77634048461914,
"beta_dpo/loss_margin_mean": 42.17787170410156,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8843537414965986,
"grad_norm": 347.8465881347656,
"learning_rate": 2.044597327993153e-08,
"logits/chosen": -2.7210912704467773,
"logits/rejected": -2.743596076965332,
"loss": 0.9183,
"step": 585
},
{
"beta_dpo/beta": 0.5478598475456238,
"beta_dpo/beta_margin_grad_mean": -0.13486941158771515,
"beta_dpo/beta_margin_grad_std": 0.30780330300331116,
"beta_dpo/beta_margin_mean": 28.558656692504883,
"beta_dpo/beta_margin_std": 34.09098815917969,
"beta_dpo/beta_used": 0.5478598475456238,
"beta_dpo/beta_used_raw": 0.5478598475456238,
"beta_dpo/gap_mean": 45.71546936035156,
"beta_dpo/gap_std": 62.273223876953125,
"beta_dpo/loss_margin_mean": 52.50735855102539,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8858654572940288,
"grad_norm": 594.8992309570312,
"learning_rate": 1.9925482037469187e-08,
"logits/chosen": -2.641079902648926,
"logits/rejected": -2.650176525115967,
"loss": 0.6024,
"step": 586
},
{
"beta_dpo/beta": 0.6760488748550415,
"beta_dpo/beta_margin_grad_mean": -0.24379543960094452,
"beta_dpo/beta_margin_grad_std": 0.40585577487945557,
"beta_dpo/beta_margin_mean": 36.56929397583008,
"beta_dpo/beta_margin_std": 50.73309326171875,
"beta_dpo/beta_used": 0.6760488748550415,
"beta_dpo/beta_used_raw": 0.6760488748550415,
"beta_dpo/gap_mean": 47.27666091918945,
"beta_dpo/gap_std": 63.39990997314453,
"beta_dpo/loss_margin_mean": 51.81309127807617,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8873771730914588,
"grad_norm": 2910.705810546875,
"learning_rate": 1.9411426473854687e-08,
"logits/chosen": -2.6720352172851562,
"logits/rejected": -2.664752244949341,
"loss": 3.7419,
"step": 587
},
{
"beta_dpo/beta": 0.5917388200759888,
"beta_dpo/beta_margin_grad_mean": -0.16118545830249786,
"beta_dpo/beta_margin_grad_std": 0.3344132602214813,
"beta_dpo/beta_margin_mean": 30.505475997924805,
"beta_dpo/beta_margin_std": 43.3480110168457,
"beta_dpo/beta_used": 0.5917388200759888,
"beta_dpo/beta_used_raw": 0.5917388200759888,
"beta_dpo/gap_mean": 47.58997344970703,
"beta_dpo/gap_std": 63.348236083984375,
"beta_dpo/loss_margin_mean": 49.69615173339844,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8888888888888888,
"grad_norm": 1808.064453125,
"learning_rate": 1.890382096832699e-08,
"logits/chosen": -2.671614170074463,
"logits/rejected": -2.7003884315490723,
"loss": 4.4247,
"step": 588
},
{
"beta_dpo/beta": 0.421763151884079,
"beta_dpo/beta_margin_grad_mean": -0.17055396735668182,
"beta_dpo/beta_margin_grad_std": 0.3382036089897156,
"beta_dpo/beta_margin_mean": 24.8367919921875,
"beta_dpo/beta_margin_std": 33.23596954345703,
"beta_dpo/beta_used": 0.421763151884079,
"beta_dpo/beta_used_raw": 0.421763151884079,
"beta_dpo/gap_mean": 47.91116714477539,
"beta_dpo/gap_std": 62.123374938964844,
"beta_dpo/loss_margin_mean": 53.15658950805664,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.890400604686319,
"grad_norm": 956.9037475585938,
"learning_rate": 1.840267971970344e-08,
"logits/chosen": -2.6805222034454346,
"logits/rejected": -2.696932315826416,
"loss": 1.6051,
"step": 589
},
{
"beta_dpo/beta": 0.31593888998031616,
"beta_dpo/beta_margin_grad_mean": -0.23733378946781158,
"beta_dpo/beta_margin_grad_std": 0.34482425451278687,
"beta_dpo/beta_margin_mean": 16.363954544067383,
"beta_dpo/beta_margin_std": 33.442989349365234,
"beta_dpo/beta_used": 0.31593888998031616,
"beta_dpo/beta_used_raw": 0.31593888998031616,
"beta_dpo/gap_mean": 49.48387908935547,
"beta_dpo/gap_std": 62.66719055175781,
"beta_dpo/loss_margin_mean": 53.68710708618164,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.891912320483749,
"grad_norm": 1125.697998046875,
"learning_rate": 1.7908016745981856e-08,
"logits/chosen": -2.671731472015381,
"logits/rejected": -2.695211887359619,
"loss": 1.4225,
"step": 590
},
{
"beta_dpo/beta": 0.3611750602722168,
"beta_dpo/beta_margin_grad_mean": -0.36769920587539673,
"beta_dpo/beta_margin_grad_std": 0.31189650297164917,
"beta_dpo/beta_margin_mean": 23.30474281311035,
"beta_dpo/beta_margin_std": 38.24365234375,
"beta_dpo/beta_used": 0.3611750602722168,
"beta_dpo/beta_used_raw": -0.048559755086898804,
"beta_dpo/gap_mean": 50.10858917236328,
"beta_dpo/gap_std": 63.43110275268555,
"beta_dpo/loss_margin_mean": 48.033172607421875,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8934240362811792,
"grad_norm": 1010.1416625976562,
"learning_rate": 1.7419845883949098e-08,
"logits/chosen": -2.662808895111084,
"logits/rejected": -2.7068371772766113,
"loss": 2.6003,
"step": 591
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4882203936576843,
"beta_dpo/beta_margin_grad_std": 0.014940670691430569,
"beta_dpo/beta_margin_mean": 0.0471792109310627,
"beta_dpo/beta_margin_std": 0.05986550450325012,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.9182393550872803,
"beta_dpo/gap_mean": 49.40886688232422,
"beta_dpo/gap_std": 63.50605010986328,
"beta_dpo/loss_margin_mean": 47.17920684814453,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8949357520786092,
"grad_norm": 4.937822341918945,
"learning_rate": 1.6938180788793556e-08,
"logits/chosen": -2.6216418743133545,
"logits/rejected": -2.681025981903076,
"loss": 1.3546,
"step": 592
},
{
"beta_dpo/beta": 0.10008588433265686,
"beta_dpo/beta_margin_grad_mean": -0.34281057119369507,
"beta_dpo/beta_margin_grad_std": 0.2690125107765198,
"beta_dpo/beta_margin_mean": 4.608383655548096,
"beta_dpo/beta_margin_std": 9.81347942352295,
"beta_dpo/beta_used": 0.10008588433265686,
"beta_dpo/beta_used_raw": -0.28076955676078796,
"beta_dpo/gap_mean": 48.20735549926758,
"beta_dpo/gap_std": 62.75986099243164,
"beta_dpo/loss_margin_mean": 45.53506088256836,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8964474678760394,
"grad_norm": 453.401611328125,
"learning_rate": 1.6463034933723336e-08,
"logits/chosen": -2.620387077331543,
"logits/rejected": -2.6645336151123047,
"loss": 1.3823,
"step": 593
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4902368187904358,
"beta_dpo/beta_margin_grad_std": 0.01430630125105381,
"beta_dpo/beta_margin_mean": 0.03909473866224289,
"beta_dpo/beta_margin_std": 0.05729776993393898,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.563217282295227,
"beta_dpo/gap_mean": 46.71723937988281,
"beta_dpo/gap_std": 61.984397888183594,
"beta_dpo/loss_margin_mean": 39.0947380065918,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8979591836734694,
"grad_norm": 4.723624229431152,
"learning_rate": 1.5994421609589385e-08,
"logits/chosen": -2.684835910797119,
"logits/rejected": -2.688138484954834,
"loss": 1.3514,
"step": 594
},
{
"beta_dpo/beta": 0.7683069705963135,
"beta_dpo/beta_margin_grad_mean": -0.16124965250492096,
"beta_dpo/beta_margin_grad_std": 0.3314729332923889,
"beta_dpo/beta_margin_mean": 46.35795593261719,
"beta_dpo/beta_margin_std": 64.7205810546875,
"beta_dpo/beta_used": 0.7683069705963135,
"beta_dpo/beta_used_raw": 0.7683069705963135,
"beta_dpo/gap_mean": 47.41358184814453,
"beta_dpo/gap_std": 62.83539962768555,
"beta_dpo/loss_margin_mean": 56.00600814819336,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8994708994708994,
"grad_norm": 5064.8017578125,
"learning_rate": 1.553235392451377e-08,
"logits/chosen": -2.6298999786376953,
"logits/rejected": -2.6741812229156494,
"loss": 2.9971,
"step": 595
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49240824580192566,
"beta_dpo/beta_margin_grad_std": 0.016294801607728004,
"beta_dpo/beta_margin_mean": 0.03041478991508484,
"beta_dpo/beta_margin_std": 0.06527598202228546,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.981214702129364,
"beta_dpo/gap_mean": 45.94303512573242,
"beta_dpo/gap_std": 63.294654846191406,
"beta_dpo/loss_margin_mean": 30.4147891998291,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9009826152683296,
"grad_norm": 3.551957368850708,
"learning_rate": 1.507684480352292e-08,
"logits/chosen": -2.719653606414795,
"logits/rejected": -2.707707405090332,
"loss": 1.3593,
"step": 596
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.48790696263313293,
"beta_dpo/beta_margin_grad_std": 0.015724794939160347,
"beta_dpo/beta_margin_mean": 0.04844098910689354,
"beta_dpo/beta_margin_std": 0.06300117075443268,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.3929826617240906,
"beta_dpo/gap_mean": 45.600364685058594,
"beta_dpo/gap_std": 63.82142639160156,
"beta_dpo/loss_margin_mean": 48.44098663330078,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9024943310657596,
"grad_norm": 3.981309413909912,
"learning_rate": 1.4627906988186111e-08,
"logits/chosen": -2.629239320755005,
"logits/rejected": -2.6321868896484375,
"loss": 1.35,
"step": 597
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4906459450721741,
"beta_dpo/beta_margin_grad_std": 0.016122223809361458,
"beta_dpo/beta_margin_mean": 0.03746494650840759,
"beta_dpo/beta_margin_std": 0.06457507610321045,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.32325291633605957,
"beta_dpo/gap_mean": 44.20635223388672,
"beta_dpo/gap_std": 63.54792022705078,
"beta_dpo/loss_margin_mean": 37.464942932128906,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9040060468631897,
"grad_norm": 3.6207661628723145,
"learning_rate": 1.4185553036259095e-08,
"logits/chosen": -2.6546905040740967,
"logits/rejected": -2.6918768882751465,
"loss": 1.3501,
"step": 598
},
{
"beta_dpo/beta": 0.11703047156333923,
"beta_dpo/beta_margin_grad_mean": -0.3772951662540436,
"beta_dpo/beta_margin_grad_std": 0.2943384647369385,
"beta_dpo/beta_margin_mean": 4.737819671630859,
"beta_dpo/beta_margin_std": 12.399438858032227,
"beta_dpo/beta_used": 0.11703047156333923,
"beta_dpo/beta_used_raw": -0.01774664968252182,
"beta_dpo/gap_mean": 42.406776428222656,
"beta_dpo/gap_std": 65.09292602539062,
"beta_dpo/loss_margin_mean": 35.07368087768555,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9055177626606198,
"grad_norm": 222.8874969482422,
"learning_rate": 1.3749795321332885e-08,
"logits/chosen": -2.6827566623687744,
"logits/rejected": -2.701910972595215,
"loss": 1.0998,
"step": 599
},
{
"beta_dpo/beta": 0.31094202399253845,
"beta_dpo/beta_margin_grad_mean": -0.33834904432296753,
"beta_dpo/beta_margin_grad_std": 0.28769829869270325,
"beta_dpo/beta_margin_mean": 19.670934677124023,
"beta_dpo/beta_margin_std": 35.01140594482422,
"beta_dpo/beta_used": 0.31094202399253845,
"beta_dpo/beta_used_raw": 0.25774458050727844,
"beta_dpo/gap_mean": 42.59223175048828,
"beta_dpo/gap_std": 65.00656127929688,
"beta_dpo/loss_margin_mean": 48.77783966064453,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9070294784580499,
"grad_norm": 1108.4788818359375,
"learning_rate": 1.3320646032487393e-08,
"logits/chosen": -2.6578688621520996,
"logits/rejected": -2.695185422897339,
"loss": 2.3016,
"step": 600
},
{
"epoch": 0.9070294784580499,
"eval_beta_dpo/beta": 0.16006897389888763,
"eval_beta_dpo/beta_margin_grad_mean": -0.382272869348526,
"eval_beta_dpo/beta_margin_grad_std": 0.16653159260749817,
"eval_beta_dpo/beta_margin_mean": 8.13154411315918,
"eval_beta_dpo/beta_margin_std": 10.172322273254395,
"eval_beta_dpo/beta_used": 0.16006897389888763,
"eval_beta_dpo/beta_used_raw": -0.1685781031847,
"eval_beta_dpo/gap_mean": 43.68259048461914,
"eval_beta_dpo/gap_std": 65.11566162109375,
"eval_beta_dpo/loss_margin_mean": 39.20629119873047,
"eval_beta_dpo/mask_keep_frac": 1.0,
"eval_logits/chosen": -2.739290475845337,
"eval_logits/rejected": -2.7541418075561523,
"eval_loss": 1.292609453201294,
"eval_runtime": 36.3199,
"eval_samples_per_second": 63.409,
"eval_steps_per_second": 1.982,
"step": 600
},
{
"beta_dpo/beta": 0.16864097118377686,
"beta_dpo/beta_margin_grad_mean": -0.257927268743515,
"beta_dpo/beta_margin_grad_std": 0.3531711995601654,
"beta_dpo/beta_margin_mean": 8.603113174438477,
"beta_dpo/beta_margin_std": 12.788561820983887,
"beta_dpo/beta_used": 0.16864097118377686,
"beta_dpo/beta_used_raw": 0.16864097118377686,
"beta_dpo/gap_mean": 44.62376403808594,
"beta_dpo/gap_std": 65.33360290527344,
"beta_dpo/loss_margin_mean": 47.54133224487305,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.90854119425548,
"grad_norm": 535.887451171875,
"learning_rate": 1.2898117173950868e-08,
"logits/chosen": -2.6898365020751953,
"logits/rejected": -2.736027240753174,
"loss": 1.576,
"step": 601
},
{
"beta_dpo/beta": 0.07933580130338669,
"beta_dpo/beta_margin_grad_mean": -0.32244589924812317,
"beta_dpo/beta_margin_grad_std": 0.2566680610179901,
"beta_dpo/beta_margin_mean": 4.081573486328125,
"beta_dpo/beta_margin_std": 7.243903636932373,
"beta_dpo/beta_used": 0.07933580130338669,
"beta_dpo/beta_used_raw": 0.05116073787212372,
"beta_dpo/gap_mean": 44.761383056640625,
"beta_dpo/gap_std": 64.28461456298828,
"beta_dpo/loss_margin_mean": 48.29131317138672,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.91005291005291,
"grad_norm": 439.05218505859375,
"learning_rate": 1.2482220564763667e-08,
"logits/chosen": -2.6923227310180664,
"logits/rejected": -2.697751045227051,
"loss": 1.1586,
"step": 602
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4896172881126404,
"beta_dpo/beta_margin_grad_std": 0.016847671940922737,
"beta_dpo/beta_margin_mean": 0.04159487411379814,
"beta_dpo/beta_margin_std": 0.06750661134719849,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.42653343081474304,
"beta_dpo/gap_mean": 44.85320281982422,
"beta_dpo/gap_std": 64.16732788085938,
"beta_dpo/loss_margin_mean": 41.594871520996094,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9115646258503401,
"grad_norm": 4.901760578155518,
"learning_rate": 1.2072967838448051e-08,
"logits/chosen": -2.668689250946045,
"logits/rejected": -2.7003610134124756,
"loss": 1.3513,
"step": 603
},
{
"beta_dpo/beta": 0.3185281753540039,
"beta_dpo/beta_margin_grad_mean": -0.34246817231178284,
"beta_dpo/beta_margin_grad_std": 0.29718223214149475,
"beta_dpo/beta_margin_mean": 14.821910858154297,
"beta_dpo/beta_margin_std": 35.994895935058594,
"beta_dpo/beta_used": 0.3185281753540039,
"beta_dpo/beta_used_raw": -0.35138705372810364,
"beta_dpo/gap_mean": 43.115814208984375,
"beta_dpo/gap_std": 65.3883056640625,
"beta_dpo/loss_margin_mean": 38.19163131713867,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9130763416477702,
"grad_norm": 1009.60888671875,
"learning_rate": 1.1670370442682459e-08,
"logits/chosen": -2.695526123046875,
"logits/rejected": -2.6826400756835938,
"loss": 2.4601,
"step": 604
},
{
"beta_dpo/beta": 0.19613006711006165,
"beta_dpo/beta_margin_grad_mean": -0.33283427357673645,
"beta_dpo/beta_margin_grad_std": 0.2794617712497711,
"beta_dpo/beta_margin_mean": 8.730825424194336,
"beta_dpo/beta_margin_std": 19.20995330810547,
"beta_dpo/beta_used": 0.19613006711006165,
"beta_dpo/beta_used_raw": -0.11835774779319763,
"beta_dpo/gap_mean": 43.39642333984375,
"beta_dpo/gap_std": 65.54288482666016,
"beta_dpo/loss_margin_mean": 44.109275817871094,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9145880574452003,
"grad_norm": 454.4317932128906,
"learning_rate": 1.1274439638981532e-08,
"logits/chosen": -2.6736767292022705,
"logits/rejected": -2.698230028152466,
"loss": 1.1924,
"step": 605
},
{
"beta_dpo/beta": 0.15788358449935913,
"beta_dpo/beta_margin_grad_mean": -0.33934134244918823,
"beta_dpo/beta_margin_grad_std": 0.29730185866355896,
"beta_dpo/beta_margin_mean": 7.944419860839844,
"beta_dpo/beta_margin_std": 16.363065719604492,
"beta_dpo/beta_used": 0.15788358449935913,
"beta_dpo/beta_used_raw": -0.1490476429462433,
"beta_dpo/gap_mean": 43.95667266845703,
"beta_dpo/gap_std": 65.00943756103516,
"beta_dpo/loss_margin_mean": 47.72287368774414,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9160997732426304,
"grad_norm": 456.1502380371094,
"learning_rate": 1.0885186502381016e-08,
"logits/chosen": -2.68801212310791,
"logits/rejected": -2.725857734680176,
"loss": 1.1833,
"step": 606
},
{
"beta_dpo/beta": 0.058823633939027786,
"beta_dpo/beta_margin_grad_mean": -0.34928181767463684,
"beta_dpo/beta_margin_grad_std": 0.2856190800666809,
"beta_dpo/beta_margin_mean": 3.743802547454834,
"beta_dpo/beta_margin_std": 6.441350936889648,
"beta_dpo/beta_used": 0.058823633939027786,
"beta_dpo/beta_used_raw": -0.2072100192308426,
"beta_dpo/gap_mean": 46.15118408203125,
"beta_dpo/gap_std": 64.77732849121094,
"beta_dpo/loss_margin_mean": 54.09783935546875,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9176114890400605,
"grad_norm": 391.70599365234375,
"learning_rate": 1.0502621921127774e-08,
"logits/chosen": -2.6742727756500244,
"logits/rejected": -2.6755614280700684,
"loss": 1.544,
"step": 607
},
{
"beta_dpo/beta": 0.09076043963432312,
"beta_dpo/beta_margin_grad_mean": -0.37318694591522217,
"beta_dpo/beta_margin_grad_std": 0.29319775104522705,
"beta_dpo/beta_margin_mean": 3.9786016941070557,
"beta_dpo/beta_margin_std": 9.074618339538574,
"beta_dpo/beta_used": 0.09076043963432312,
"beta_dpo/beta_used_raw": -0.24247410893440247,
"beta_dpo/gap_mean": 43.72791290283203,
"beta_dpo/gap_std": 63.787208557128906,
"beta_dpo/loss_margin_mean": 33.294742584228516,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9191232048374905,
"grad_norm": 533.9596557617188,
"learning_rate": 1.0126756596375685e-08,
"logits/chosen": -2.6911814212799072,
"logits/rejected": -2.73805570602417,
"loss": 2.0222,
"step": 608
},
{
"beta_dpo/beta": 0.04691994562745094,
"beta_dpo/beta_margin_grad_mean": -0.3009467124938965,
"beta_dpo/beta_margin_grad_std": 0.21611282229423523,
"beta_dpo/beta_margin_mean": 2.724210739135742,
"beta_dpo/beta_margin_std": 4.51793909072876,
"beta_dpo/beta_used": 0.04691994562745094,
"beta_dpo/beta_used_raw": 0.025028718635439873,
"beta_dpo/gap_mean": 45.130619049072266,
"beta_dpo/gap_std": 62.89480209350586,
"beta_dpo/loss_margin_mean": 51.13569641113281,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9206349206349206,
"grad_norm": 53.00703811645508,
"learning_rate": 9.757601041885694e-09,
"logits/chosen": -2.6229584217071533,
"logits/rejected": -2.637420177459717,
"loss": 0.8375,
"step": 609
},
{
"beta_dpo/beta": 0.09450700134038925,
"beta_dpo/beta_margin_grad_mean": -0.3247065842151642,
"beta_dpo/beta_margin_grad_std": 0.2635004222393036,
"beta_dpo/beta_margin_mean": 5.4679741859436035,
"beta_dpo/beta_margin_std": 10.656018257141113,
"beta_dpo/beta_used": 0.09450700134038925,
"beta_dpo/beta_used_raw": -0.16219983994960785,
"beta_dpo/gap_mean": 45.62958526611328,
"beta_dpo/gap_std": 63.55073547363281,
"beta_dpo/loss_margin_mean": 51.04203414916992,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9221466364323507,
"grad_norm": 231.1525421142578,
"learning_rate": 9.395165583732379e-09,
"logits/chosen": -2.659266710281372,
"logits/rejected": -2.67087459564209,
"loss": 1.1302,
"step": 610
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4915010929107666,
"beta_dpo/beta_margin_grad_std": 0.012929055839776993,
"beta_dpo/beta_margin_mean": 0.03402576968073845,
"beta_dpo/beta_margin_std": 0.0517716147005558,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.49436575174331665,
"beta_dpo/gap_mean": 44.93272399902344,
"beta_dpo/gap_std": 62.146820068359375,
"beta_dpo/loss_margin_mean": 34.0257682800293,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9236583522297808,
"grad_norm": 3.9169530868530273,
"learning_rate": 9.03946036001449e-09,
"logits/chosen": -2.686603307723999,
"logits/rejected": -2.7164177894592285,
"loss": 1.3521,
"step": 611
},
{
"beta_dpo/beta": 0.5432992577552795,
"beta_dpo/beta_margin_grad_mean": -0.15547636151313782,
"beta_dpo/beta_margin_grad_std": 0.30775806307792664,
"beta_dpo/beta_margin_mean": 31.741825103759766,
"beta_dpo/beta_margin_std": 52.140472412109375,
"beta_dpo/beta_used": 0.5432992577552795,
"beta_dpo/beta_used_raw": 0.5432992577552795,
"beta_dpo/gap_mean": 44.699771881103516,
"beta_dpo/gap_std": 63.031394958496094,
"beta_dpo/loss_margin_mean": 52.14598846435547,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9251700680272109,
"grad_norm": 903.3458251953125,
"learning_rate": 8.690495320571839e-09,
"logits/chosen": -2.7321419715881348,
"logits/rejected": -2.7761948108673096,
"loss": 1.0432,
"step": 612
},
{
"beta_dpo/beta": 0.6847690939903259,
"beta_dpo/beta_margin_grad_mean": -0.15985107421875,
"beta_dpo/beta_margin_grad_std": 0.342172235250473,
"beta_dpo/beta_margin_mean": 40.27997970581055,
"beta_dpo/beta_margin_std": 41.0804557800293,
"beta_dpo/beta_used": 0.6847690939903259,
"beta_dpo/beta_used_raw": 0.6847690939903259,
"beta_dpo/gap_mean": 47.31453323364258,
"beta_dpo/gap_std": 62.68354797363281,
"beta_dpo/loss_margin_mean": 58.733970642089844,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.926681783824641,
"grad_norm": 962.0908203125,
"learning_rate": 8.348280226706722e-09,
"logits/chosen": -2.6662611961364746,
"logits/rejected": -2.666443347930908,
"loss": 1.5508,
"step": 613
},
{
"beta_dpo/beta": 0.45406830310821533,
"beta_dpo/beta_margin_grad_mean": -0.14712685346603394,
"beta_dpo/beta_margin_grad_std": 0.3178809881210327,
"beta_dpo/beta_margin_mean": 26.492536544799805,
"beta_dpo/beta_margin_std": 27.315082550048828,
"beta_dpo/beta_used": 0.45406830310821533,
"beta_dpo/beta_used_raw": 0.45406830310821533,
"beta_dpo/gap_mean": 49.33307647705078,
"beta_dpo/gap_std": 62.564170837402344,
"beta_dpo/loss_margin_mean": 58.573570251464844,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9281934996220711,
"grad_norm": 1251.2257080078125,
"learning_rate": 8.012824650910937e-09,
"logits/chosen": -2.6480016708374023,
"logits/rejected": -2.650477409362793,
"loss": 3.7381,
"step": 614
},
{
"beta_dpo/beta": 0.2266254723072052,
"beta_dpo/beta_margin_grad_mean": -0.33106034994125366,
"beta_dpo/beta_margin_grad_std": 0.29621782898902893,
"beta_dpo/beta_margin_mean": 16.26293182373047,
"beta_dpo/beta_margin_std": 29.231475830078125,
"beta_dpo/beta_used": 0.2266254723072052,
"beta_dpo/beta_used_raw": -0.08037641644477844,
"beta_dpo/gap_mean": 51.66609191894531,
"beta_dpo/gap_std": 63.533966064453125,
"beta_dpo/loss_margin_mean": 55.990055084228516,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9297052154195011,
"grad_norm": 1692.2303466796875,
"learning_rate": 7.684137976598088e-09,
"logits/chosen": -2.7013931274414062,
"logits/rejected": -2.7283122539520264,
"loss": 4.8218,
"step": 615
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.48794546723365784,
"beta_dpo/beta_margin_grad_std": 0.01570320688188076,
"beta_dpo/beta_margin_mean": 0.04827674850821495,
"beta_dpo/beta_margin_std": 0.06289937347173691,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.3228529393672943,
"beta_dpo/gap_mean": 50.67670440673828,
"beta_dpo/gap_std": 63.63441848754883,
"beta_dpo/loss_margin_mean": 48.2767448425293,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9312169312169312,
"grad_norm": 4.385542869567871,
"learning_rate": 7.36222939784098e-09,
"logits/chosen": -2.641757011413574,
"logits/rejected": -2.667628288269043,
"loss": 1.3437,
"step": 616
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.48837581276893616,
"beta_dpo/beta_margin_grad_std": 0.01476855855435133,
"beta_dpo/beta_margin_mean": 0.04654671624302864,
"beta_dpo/beta_margin_std": 0.05915853753685951,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.8279430866241455,
"beta_dpo/gap_mean": 50.42936706542969,
"beta_dpo/gap_std": 62.97998046875,
"beta_dpo/loss_margin_mean": 46.546714782714844,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9327286470143613,
"grad_norm": 4.693226337432861,
"learning_rate": 7.047107919114586e-09,
"logits/chosen": -2.6448731422424316,
"logits/rejected": -2.678791046142578,
"loss": 1.3522,
"step": 617
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.48894065618515015,
"beta_dpo/beta_margin_grad_std": 0.014615356922149658,
"beta_dpo/beta_margin_mean": 0.044289905577898026,
"beta_dpo/beta_margin_std": 0.0585482232272625,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.20385049283504486,
"beta_dpo/gap_mean": 49.48298645019531,
"beta_dpo/gap_std": 61.756935119628906,
"beta_dpo/loss_margin_mean": 44.2899055480957,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9342403628117913,
"grad_norm": 4.098353862762451,
"learning_rate": 6.738782355044048e-09,
"logits/chosen": -2.6989409923553467,
"logits/rejected": -2.742736339569092,
"loss": 1.343,
"step": 618
},
{
"beta_dpo/beta": 0.05981595069169998,
"beta_dpo/beta_margin_grad_mean": -0.3577544689178467,
"beta_dpo/beta_margin_grad_std": 0.2613312900066376,
"beta_dpo/beta_margin_mean": 3.053403854370117,
"beta_dpo/beta_margin_std": 5.86320161819458,
"beta_dpo/beta_used": 0.05981595069169998,
"beta_dpo/beta_used_raw": -0.0736929327249527,
"beta_dpo/gap_mean": 48.345306396484375,
"beta_dpo/gap_std": 61.484928131103516,
"beta_dpo/loss_margin_mean": 45.6826286315918,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9357520786092215,
"grad_norm": 231.64077758789062,
"learning_rate": 6.437261330158206e-09,
"logits/chosen": -2.621891498565674,
"logits/rejected": -2.662619113922119,
"loss": 1.0265,
"step": 619
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4897708594799042,
"beta_dpo/beta_margin_grad_std": 0.016919763758778572,
"beta_dpo/beta_margin_mean": 0.04097270593047142,
"beta_dpo/beta_margin_std": 0.06777238100767136,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.27026185393333435,
"beta_dpo/gap_mean": 46.54864501953125,
"beta_dpo/gap_std": 62.34131622314453,
"beta_dpo/loss_margin_mean": 40.97270584106445,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9372637944066515,
"grad_norm": 4.460054397583008,
"learning_rate": 6.142553278648238e-09,
"logits/chosen": -2.6672420501708984,
"logits/rejected": -2.6600871086120605,
"loss": 1.3471,
"step": 620
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49017781019210815,
"beta_dpo/beta_margin_grad_std": 0.016016369685530663,
"beta_dpo/beta_margin_mean": 0.03934101015329361,
"beta_dpo/beta_margin_std": 0.06416355073451996,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.2695315480232239,
"beta_dpo/gap_mean": 44.97822570800781,
"beta_dpo/gap_std": 62.6904296875,
"beta_dpo/loss_margin_mean": 39.341007232666016,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9387755102040817,
"grad_norm": 5.045653343200684,
"learning_rate": 5.854666444131934e-09,
"logits/chosen": -2.657578468322754,
"logits/rejected": -2.72951078414917,
"loss": 1.3485,
"step": 621
},
{
"beta_dpo/beta": 0.04698815196752548,
"beta_dpo/beta_margin_grad_mean": -0.36041608452796936,
"beta_dpo/beta_margin_grad_std": 0.2401597797870636,
"beta_dpo/beta_margin_mean": 2.0823357105255127,
"beta_dpo/beta_margin_std": 4.529184341430664,
"beta_dpo/beta_used": 0.04698815196752548,
"beta_dpo/beta_used_raw": 0.003569558262825012,
"beta_dpo/gap_mean": 44.71732711791992,
"beta_dpo/gap_std": 62.19575881958008,
"beta_dpo/loss_margin_mean": 41.447021484375,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9402872260015117,
"grad_norm": 154.41818237304688,
"learning_rate": 5.573608879422875e-09,
"logits/chosen": -2.7066431045532227,
"logits/rejected": -2.7310023307800293,
"loss": 1.0009,
"step": 622
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4911818206310272,
"beta_dpo/beta_margin_grad_std": 0.014753853902220726,
"beta_dpo/beta_margin_mean": 0.0353122353553772,
"beta_dpo/beta_margin_std": 0.059086430817842484,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.30502232909202576,
"beta_dpo/gap_mean": 43.35747528076172,
"beta_dpo/gap_std": 61.82522964477539,
"beta_dpo/loss_margin_mean": 35.312232971191406,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9417989417989417,
"grad_norm": 4.888934135437012,
"learning_rate": 5.299388446305342e-09,
"logits/chosen": -2.701735496520996,
"logits/rejected": -2.7184062004089355,
"loss": 1.3507,
"step": 623
},
{
"beta_dpo/beta": 0.24348057806491852,
"beta_dpo/beta_margin_grad_mean": -0.3350781798362732,
"beta_dpo/beta_margin_grad_std": 0.29120299220085144,
"beta_dpo/beta_margin_mean": 14.595932960510254,
"beta_dpo/beta_margin_std": 29.82645034790039,
"beta_dpo/beta_used": 0.24348057806491852,
"beta_dpo/beta_used_raw": 0.03254944086074829,
"beta_dpo/gap_mean": 45.11775207519531,
"beta_dpo/gap_std": 63.17668914794922,
"beta_dpo/loss_margin_mean": 57.301719665527344,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9433106575963719,
"grad_norm": 992.426025390625,
"learning_rate": 5.03201281531429e-09,
"logits/chosen": -2.650886297225952,
"logits/rejected": -2.7026054859161377,
"loss": 3.3259,
"step": 624
},
{
"beta_dpo/beta": 0.17052163183689117,
"beta_dpo/beta_margin_grad_mean": -0.3552238941192627,
"beta_dpo/beta_margin_grad_std": 0.3150428533554077,
"beta_dpo/beta_margin_mean": 8.42929458618164,
"beta_dpo/beta_margin_std": 17.509658813476562,
"beta_dpo/beta_used": 0.17052163183689117,
"beta_dpo/beta_used_raw": 0.011962205171585083,
"beta_dpo/gap_mean": 44.10435485839844,
"beta_dpo/gap_std": 63.476158142089844,
"beta_dpo/loss_margin_mean": 38.564598083496094,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9448223733938019,
"grad_norm": 605.406982421875,
"learning_rate": 4.7714894655209174e-09,
"logits/chosen": -2.6164121627807617,
"logits/rejected": -2.6647305488586426,
"loss": 2.2167,
"step": 625
},
{
"beta_dpo/beta": 0.5995941162109375,
"beta_dpo/beta_margin_grad_mean": -0.37211719155311584,
"beta_dpo/beta_margin_grad_std": 0.32004252076148987,
"beta_dpo/beta_margin_mean": 34.19970703125,
"beta_dpo/beta_margin_std": 66.08871459960938,
"beta_dpo/beta_used": 0.5995941162109375,
"beta_dpo/beta_used_raw": 0.41719281673431396,
"beta_dpo/gap_mean": 45.84862518310547,
"beta_dpo/gap_std": 64.30546569824219,
"beta_dpo/loss_margin_mean": 53.29762649536133,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9463340891912321,
"grad_norm": 1210.412109375,
"learning_rate": 4.517825684323323e-09,
"logits/chosen": -2.609795093536377,
"logits/rejected": -2.6795785427093506,
"loss": 1.7134,
"step": 626
},
{
"beta_dpo/beta": 0.5332940816879272,
"beta_dpo/beta_margin_grad_mean": -0.17192615568637848,
"beta_dpo/beta_margin_grad_std": 0.35103702545166016,
"beta_dpo/beta_margin_mean": 29.522315979003906,
"beta_dpo/beta_margin_std": 42.06474304199219,
"beta_dpo/beta_used": 0.5332940816879272,
"beta_dpo/beta_used_raw": 0.5332940816879272,
"beta_dpo/gap_mean": 47.599143981933594,
"beta_dpo/gap_std": 65.24098205566406,
"beta_dpo/loss_margin_mean": 59.54181671142578,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9478458049886621,
"grad_norm": 1807.4373779296875,
"learning_rate": 4.271028567242818e-09,
"logits/chosen": -2.6838269233703613,
"logits/rejected": -2.7577645778656006,
"loss": 2.0301,
"step": 627
},
{
"beta_dpo/beta": 0.9057918190956116,
"beta_dpo/beta_margin_grad_mean": -0.1649731993675232,
"beta_dpo/beta_margin_grad_std": 0.3386947214603424,
"beta_dpo/beta_margin_mean": 59.12750244140625,
"beta_dpo/beta_margin_std": 72.5323486328125,
"beta_dpo/beta_used": 0.9057918190956116,
"beta_dpo/beta_used_raw": 0.9057918190956116,
"beta_dpo/gap_mean": 48.67937088012695,
"beta_dpo/gap_std": 66.37429809570312,
"beta_dpo/loss_margin_mean": 54.8211669921875,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9493575207860923,
"grad_norm": 1693.4759521484375,
"learning_rate": 4.0311050177251895e-09,
"logits/chosen": -2.6874046325683594,
"logits/rejected": -2.7123756408691406,
"loss": 3.691,
"step": 628
},
{
"beta_dpo/beta": 0.23058763146400452,
"beta_dpo/beta_margin_grad_mean": -0.22378967702388763,
"beta_dpo/beta_margin_grad_std": 0.3259342908859253,
"beta_dpo/beta_margin_mean": 10.928329467773438,
"beta_dpo/beta_margin_std": 18.11894989013672,
"beta_dpo/beta_used": 0.23058763146400452,
"beta_dpo/beta_used_raw": 0.23058763146400452,
"beta_dpo/gap_mean": 48.796653747558594,
"beta_dpo/gap_std": 63.12718200683594,
"beta_dpo/loss_margin_mean": 43.14870834350586,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9508692365835223,
"grad_norm": 795.4987182617188,
"learning_rate": 3.798061746947995e-09,
"logits/chosen": -2.6901187896728516,
"logits/rejected": -2.703768014907837,
"loss": 1.3562,
"step": 629
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4876892566680908,
"beta_dpo/beta_margin_grad_std": 0.015770576894283295,
"beta_dpo/beta_margin_mean": 0.04931268468499184,
"beta_dpo/beta_margin_std": 0.06319800019264221,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.21424424648284912,
"beta_dpo/gap_mean": 48.620269775390625,
"beta_dpo/gap_std": 62.675270080566406,
"beta_dpo/loss_margin_mean": 49.31268310546875,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9523809523809523,
"grad_norm": 3.732545852661133,
"learning_rate": 3.5719052736323806e-09,
"logits/chosen": -2.7009708881378174,
"logits/rejected": -2.7310919761657715,
"loss": 1.3441,
"step": 630
},
{
"beta_dpo/beta": 0.13656026124954224,
"beta_dpo/beta_margin_grad_mean": -0.3304164409637451,
"beta_dpo/beta_margin_grad_std": 0.28156745433807373,
"beta_dpo/beta_margin_mean": 9.255078315734863,
"beta_dpo/beta_margin_std": 15.206001281738281,
"beta_dpo/beta_used": 0.13656026124954224,
"beta_dpo/beta_used_raw": -0.24415385723114014,
"beta_dpo/gap_mean": 50.703880310058594,
"beta_dpo/gap_std": 63.21595764160156,
"beta_dpo/loss_margin_mean": 57.89152908325195,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9538926681783825,
"grad_norm": 520.2734985351562,
"learning_rate": 3.352641923861144e-09,
"logits/chosen": -2.6408612728118896,
"logits/rejected": -2.7099227905273438,
"loss": 1.6559,
"step": 631
},
{
"beta_dpo/beta": 0.35730719566345215,
"beta_dpo/beta_margin_grad_mean": -0.33872702717781067,
"beta_dpo/beta_margin_grad_std": 0.3022365868091583,
"beta_dpo/beta_margin_mean": 20.990575790405273,
"beta_dpo/beta_margin_std": 34.35315704345703,
"beta_dpo/beta_used": 0.35730719566345215,
"beta_dpo/beta_used_raw": 0.27769792079925537,
"beta_dpo/gap_mean": 51.40171813964844,
"beta_dpo/gap_std": 62.56011962890625,
"beta_dpo/loss_margin_mean": 55.30367660522461,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9554043839758125,
"grad_norm": 1589.6190185546875,
"learning_rate": 3.140277830901428e-09,
"logits/chosen": -2.6545939445495605,
"logits/rejected": -2.657032012939453,
"loss": 2.6254,
"step": 632
},
{
"beta_dpo/beta": 0.004144558683037758,
"beta_dpo/beta_margin_grad_mean": -0.45725518465042114,
"beta_dpo/beta_margin_grad_std": 0.08634334057569504,
"beta_dpo/beta_margin_mean": 0.1822691112756729,
"beta_dpo/beta_margin_std": 0.3737095594406128,
"beta_dpo/beta_used": 0.004144558683037758,
"beta_dpo/beta_used_raw": -0.6120012402534485,
"beta_dpo/gap_mean": 48.40888214111328,
"beta_dpo/gap_std": 62.7435302734375,
"beta_dpo/loss_margin_mean": 34.49918746948242,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9569160997732427,
"grad_norm": 14.633933067321777,
"learning_rate": 2.9348189350335007e-09,
"logits/chosen": -2.6546261310577393,
"logits/rejected": -2.6814990043640137,
"loss": 1.2401,
"step": 633
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4926002025604248,
"beta_dpo/beta_margin_grad_std": 0.014817976392805576,
"beta_dpo/beta_margin_mean": 0.029641717672348022,
"beta_dpo/beta_margin_std": 0.059361252933740616,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -1.0031733512878418,
"beta_dpo/gap_mean": 45.527591705322266,
"beta_dpo/gap_std": 62.52671432495117,
"beta_dpo/loss_margin_mean": 29.64171600341797,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9584278155706727,
"grad_norm": 3.942791700363159,
"learning_rate": 2.736270983384276e-09,
"logits/chosen": -2.637674331665039,
"logits/rejected": -2.6252646446228027,
"loss": 1.3601,
"step": 634
},
{
"beta_dpo/beta": 0.02143486775457859,
"beta_dpo/beta_margin_grad_mean": -0.4092518091201782,
"beta_dpo/beta_margin_grad_std": 0.24451853334903717,
"beta_dpo/beta_margin_mean": 0.7495732307434082,
"beta_dpo/beta_margin_std": 2.171191692352295,
"beta_dpo/beta_used": 0.02143486775457859,
"beta_dpo/beta_used_raw": -0.07430359721183777,
"beta_dpo/gap_mean": 43.78990173339844,
"beta_dpo/gap_std": 63.080482482910156,
"beta_dpo/loss_margin_mean": 39.18009567260742,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9599395313681028,
"grad_norm": 128.81625366210938,
"learning_rate": 2.5446395297668287e-09,
"logits/chosen": -2.723635673522949,
"logits/rejected": -2.7572028636932373,
"loss": 1.1247,
"step": 635
},
{
"beta_dpo/beta": 0.10870007425546646,
"beta_dpo/beta_margin_grad_mean": -0.3280467092990875,
"beta_dpo/beta_margin_grad_std": 0.26943886280059814,
"beta_dpo/beta_margin_mean": 5.701779842376709,
"beta_dpo/beta_margin_std": 10.197867393493652,
"beta_dpo/beta_used": 0.10870007425546646,
"beta_dpo/beta_used_raw": 0.04050559550523758,
"beta_dpo/gap_mean": 44.87833786010742,
"beta_dpo/gap_std": 62.010650634765625,
"beta_dpo/loss_margin_mean": 50.85565948486328,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9614512471655329,
"grad_norm": 454.9655456542969,
"learning_rate": 2.359929934524829e-09,
"logits/chosen": -2.6658878326416016,
"logits/rejected": -2.726693630218506,
"loss": 1.3966,
"step": 636
},
{
"beta_dpo/beta": 0.02533043548464775,
"beta_dpo/beta_margin_grad_mean": -0.3730124533176422,
"beta_dpo/beta_margin_grad_std": 0.24714794754981995,
"beta_dpo/beta_margin_mean": 1.1759974956512451,
"beta_dpo/beta_margin_std": 2.531526565551758,
"beta_dpo/beta_used": 0.02533043548464775,
"beta_dpo/beta_used_raw": -0.0743693932890892,
"beta_dpo/gap_mean": 45.429656982421875,
"beta_dpo/gap_std": 61.94932556152344,
"beta_dpo/loss_margin_mean": 47.75582504272461,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9629629629629629,
"grad_norm": 112.1050033569336,
"learning_rate": 2.1821473643827137e-09,
"logits/chosen": -2.692873001098633,
"logits/rejected": -2.7226829528808594,
"loss": 1.0988,
"step": 637
},
{
"beta_dpo/beta": 0.392764687538147,
"beta_dpo/beta_margin_grad_mean": -0.19437937438488007,
"beta_dpo/beta_margin_grad_std": 0.3297037184238434,
"beta_dpo/beta_margin_mean": 20.477251052856445,
"beta_dpo/beta_margin_std": 27.82265281677246,
"beta_dpo/beta_used": 0.392764687538147,
"beta_dpo/beta_used_raw": 0.392764687538147,
"beta_dpo/gap_mean": 46.582801818847656,
"beta_dpo/gap_std": 62.45842361450195,
"beta_dpo/loss_margin_mean": 51.22517013549805,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9644746787603931,
"grad_norm": 1120.524658203125,
"learning_rate": 2.0112967923011646e-09,
"logits/chosen": -2.6447203159332275,
"logits/rejected": -2.671504020690918,
"loss": 1.4728,
"step": 638
},
{
"beta_dpo/beta": 0.34388256072998047,
"beta_dpo/beta_margin_grad_mean": -0.3614674210548401,
"beta_dpo/beta_margin_grad_std": 0.3070048689842224,
"beta_dpo/beta_margin_mean": 17.764925003051758,
"beta_dpo/beta_margin_std": 32.02020263671875,
"beta_dpo/beta_used": 0.34388256072998047,
"beta_dpo/beta_used_raw": -0.06688737869262695,
"beta_dpo/gap_mean": 46.94060516357422,
"beta_dpo/gap_std": 61.81489562988281,
"beta_dpo/loss_margin_mean": 47.23959732055664,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9659863945578231,
"grad_norm": 1660.4727783203125,
"learning_rate": 1.847382997337943e-09,
"logits/chosen": -2.6560726165771484,
"logits/rejected": -2.696206569671631,
"loss": 1.579,
"step": 639
},
{
"beta_dpo/beta": 0.7060970067977905,
"beta_dpo/beta_margin_grad_mean": -0.1816413551568985,
"beta_dpo/beta_margin_grad_std": 0.37319278717041016,
"beta_dpo/beta_margin_mean": 36.43185043334961,
"beta_dpo/beta_margin_std": 47.868900299072266,
"beta_dpo/beta_used": 0.7060970067977905,
"beta_dpo/beta_used_raw": 0.7060970067977905,
"beta_dpo/gap_mean": 47.429561614990234,
"beta_dpo/gap_std": 62.916141510009766,
"beta_dpo/loss_margin_mean": 51.59613037109375,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9674981103552532,
"grad_norm": 2258.9169921875,
"learning_rate": 1.690410564514244e-09,
"logits/chosen": -2.6873271465301514,
"logits/rejected": -2.727292537689209,
"loss": 5.4355,
"step": 640
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49105432629585266,
"beta_dpo/beta_margin_grad_std": 0.0135034816339612,
"beta_dpo/beta_margin_mean": 0.03582516312599182,
"beta_dpo/beta_margin_std": 0.05409466102719307,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.8339321613311768,
"beta_dpo/gap_mean": 45.91423416137695,
"beta_dpo/gap_std": 62.17848205566406,
"beta_dpo/loss_margin_mean": 35.82516098022461,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9690098261526833,
"grad_norm": 4.1548380851745605,
"learning_rate": 1.5403838846864692e-09,
"logits/chosen": -2.6482882499694824,
"logits/rejected": -2.662318468093872,
"loss": 1.3567,
"step": 641
},
{
"beta_dpo/beta": 0.23349756002426147,
"beta_dpo/beta_margin_grad_mean": -0.3614916503429413,
"beta_dpo/beta_margin_grad_std": 0.29659217596054077,
"beta_dpo/beta_margin_mean": 11.944047927856445,
"beta_dpo/beta_margin_std": 25.50196075439453,
"beta_dpo/beta_used": 0.23349756002426147,
"beta_dpo/beta_used_raw": -0.3991941511631012,
"beta_dpo/gap_mean": 45.326568603515625,
"beta_dpo/gap_std": 62.26630401611328,
"beta_dpo/loss_margin_mean": 42.00018310546875,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9705215419501134,
"grad_norm": 867.032958984375,
"learning_rate": 1.3973071544233218e-09,
"logits/chosen": -2.685100793838501,
"logits/rejected": -2.67539381980896,
"loss": 2.268,
"step": 642
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49193012714385986,
"beta_dpo/beta_margin_grad_std": 0.014358686283230782,
"beta_dpo/beta_margin_mean": 0.03231479600071907,
"beta_dpo/beta_margin_std": 0.05750858411192894,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.6026580333709717,
"beta_dpo/gap_mean": 43.12543869018555,
"beta_dpo/gap_std": 61.69993591308594,
"beta_dpo/loss_margin_mean": 32.31479263305664,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9720332577475435,
"grad_norm": 4.907909870147705,
"learning_rate": 1.261184375888541e-09,
"logits/chosen": -2.6672868728637695,
"logits/rejected": -2.7138211727142334,
"loss": 1.3558,
"step": 643
},
{
"beta_dpo/beta": 0.6081154942512512,
"beta_dpo/beta_margin_grad_mean": -0.34746262431144714,
"beta_dpo/beta_margin_grad_std": 0.31283101439476013,
"beta_dpo/beta_margin_mean": 30.244873046875,
"beta_dpo/beta_margin_std": 66.24939727783203,
"beta_dpo/beta_used": 0.6081154942512512,
"beta_dpo/beta_used_raw": 0.6020084023475647,
"beta_dpo/gap_mean": 42.613712310791016,
"beta_dpo/gap_std": 62.06809616088867,
"beta_dpo/loss_margin_mean": 45.869632720947266,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9735449735449735,
"grad_norm": 1124.218017578125,
"learning_rate": 1.1320193567288527e-09,
"logits/chosen": -2.6450648307800293,
"logits/rejected": -2.667994976043701,
"loss": 2.2666,
"step": 644
},
{
"beta_dpo/beta": 0.4541359841823578,
"beta_dpo/beta_margin_grad_mean": -0.27940618991851807,
"beta_dpo/beta_margin_grad_std": 0.2722318768501282,
"beta_dpo/beta_margin_mean": 26.9298152923584,
"beta_dpo/beta_margin_std": 44.21894836425781,
"beta_dpo/beta_used": 0.4541359841823578,
"beta_dpo/beta_used_raw": 0.40730053186416626,
"beta_dpo/gap_mean": 44.1930046081543,
"beta_dpo/gap_std": 61.355472564697266,
"beta_dpo/loss_margin_mean": 47.67912673950195,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9750566893424036,
"grad_norm": 470.2606201171875,
"learning_rate": 1.0098157099674987e-09,
"logits/chosen": -2.657165050506592,
"logits/rejected": -2.6617069244384766,
"loss": 1.38,
"step": 645
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4881736934185028,
"beta_dpo/beta_margin_grad_std": 0.01566481776535511,
"beta_dpo/beta_margin_mean": 0.04736267775297165,
"beta_dpo/beta_margin_std": 0.06274493038654327,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.4280562400817871,
"beta_dpo/gap_mean": 44.21896743774414,
"beta_dpo/gap_std": 61.35779571533203,
"beta_dpo/loss_margin_mean": 47.362674713134766,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9765684051398337,
"grad_norm": 4.742650032043457,
"learning_rate": 8.945768539031783e-10,
"logits/chosen": -2.672578811645508,
"logits/rejected": -2.6946420669555664,
"loss": 1.3519,
"step": 646
},
{
"beta_dpo/beta": 0.47655943036079407,
"beta_dpo/beta_margin_grad_mean": -0.32146018743515015,
"beta_dpo/beta_margin_grad_std": 0.2852969765663147,
"beta_dpo/beta_margin_mean": 30.615407943725586,
"beta_dpo/beta_margin_std": 52.7960319519043,
"beta_dpo/beta_used": 0.47655943036079407,
"beta_dpo/beta_used_raw": 0.3814311623573303,
"beta_dpo/gap_mean": 46.42333984375,
"beta_dpo/gap_std": 61.456687927246094,
"beta_dpo/loss_margin_mean": 59.471378326416016,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9780801209372638,
"grad_norm": 1220.833984375,
"learning_rate": 7.863060120144316e-10,
"logits/chosen": -2.688619613647461,
"logits/rejected": -2.7265939712524414,
"loss": 1.394,
"step": 647
},
{
"beta_dpo/beta": 0.5712614059448242,
"beta_dpo/beta_margin_grad_mean": -0.35060861706733704,
"beta_dpo/beta_margin_grad_std": 0.3148672878742218,
"beta_dpo/beta_margin_mean": 32.07784652709961,
"beta_dpo/beta_margin_std": 68.7822036743164,
"beta_dpo/beta_used": 0.5712614059448242,
"beta_dpo/beta_used_raw": 0.37917831540107727,
"beta_dpo/gap_mean": 47.899803161621094,
"beta_dpo/gap_std": 62.45370864868164,
"beta_dpo/loss_margin_mean": 48.78077697753906,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9795918367346939,
"grad_norm": 1129.4110107421875,
"learning_rate": 6.850062128694045e-10,
"logits/chosen": -2.653449535369873,
"logits/rejected": -2.67905330657959,
"loss": 1.9834,
"step": 648
},
{
"beta_dpo/beta": 0.4049026668071747,
"beta_dpo/beta_margin_grad_mean": -0.20922957360744476,
"beta_dpo/beta_margin_grad_std": 0.3691186010837555,
"beta_dpo/beta_margin_mean": 20.532917022705078,
"beta_dpo/beta_margin_std": 29.904996871948242,
"beta_dpo/beta_used": 0.4049026668071747,
"beta_dpo/beta_used_raw": 0.4049026668071747,
"beta_dpo/gap_mean": 47.93426513671875,
"beta_dpo/gap_std": 62.62583923339844,
"beta_dpo/loss_margin_mean": 47.987239837646484,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.981103552532124,
"grad_norm": 1016.6148681640625,
"learning_rate": 5.906802900412788e-10,
"logits/chosen": -2.611091136932373,
"logits/rejected": -2.6330745220184326,
"loss": 2.0432,
"step": 649
},
{
"beta_dpo/beta": 0.1802101582288742,
"beta_dpo/beta_margin_grad_mean": -0.21415099501609802,
"beta_dpo/beta_margin_grad_std": 0.3358995318412781,
"beta_dpo/beta_margin_mean": 9.91841983795166,
"beta_dpo/beta_margin_std": 12.939188957214355,
"beta_dpo/beta_used": 0.1802101582288742,
"beta_dpo/beta_used_raw": 0.1802101582288742,
"beta_dpo/gap_mean": 48.672733306884766,
"beta_dpo/gap_std": 63.85776138305664,
"beta_dpo/loss_margin_mean": 55.11127853393555,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.982615268329554,
"grad_norm": 547.158935546875,
"learning_rate": 5.033308820289184e-10,
"logits/chosen": -2.678342819213867,
"logits/rejected": -2.7141497135162354,
"loss": 1.4694,
"step": 650
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.490220844745636,
"beta_dpo/beta_margin_grad_std": 0.014912915416061878,
"beta_dpo/beta_margin_mean": 0.039162568747997284,
"beta_dpo/beta_margin_std": 0.05972345918416977,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.6605195999145508,
"beta_dpo/gap_mean": 47.09050750732422,
"beta_dpo/gap_std": 63.09800720214844,
"beta_dpo/loss_margin_mean": 39.162567138671875,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9841269841269841,
"grad_norm": 3.8208343982696533,
"learning_rate": 4.2296043218295606e-10,
"logits/chosen": -2.668421506881714,
"logits/rejected": -2.7090401649475098,
"loss": 1.3531,
"step": 651
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4899626076221466,
"beta_dpo/beta_margin_grad_std": 0.015524974092841148,
"beta_dpo/beta_margin_mean": 0.040194738656282425,
"beta_dpo/beta_margin_std": 0.062190212309360504,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.5771675109863281,
"beta_dpo/gap_mean": 46.139862060546875,
"beta_dpo/gap_std": 62.530296325683594,
"beta_dpo/loss_margin_mean": 40.19473648071289,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9856386999244142,
"grad_norm": 4.492720603942871,
"learning_rate": 3.4957118863768176e-10,
"logits/chosen": -2.669163227081299,
"logits/rejected": -2.6763458251953125,
"loss": 1.3523,
"step": 652
},
{
"beta_dpo/beta": 0.08541844040155411,
"beta_dpo/beta_margin_grad_mean": -0.36232131719589233,
"beta_dpo/beta_margin_grad_std": 0.269138902425766,
"beta_dpo/beta_margin_mean": 4.381135940551758,
"beta_dpo/beta_margin_std": 8.300312995910645,
"beta_dpo/beta_used": 0.08541844040155411,
"beta_dpo/beta_used_raw": -0.15151172876358032,
"beta_dpo/gap_mean": 46.68292999267578,
"beta_dpo/gap_std": 63.072364807128906,
"beta_dpo/loss_margin_mean": 51.156280517578125,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9871504157218443,
"grad_norm": 235.69155883789062,
"learning_rate": 2.831652042480093e-10,
"logits/chosen": -2.698141098022461,
"logits/rejected": -2.714226722717285,
"loss": 1.165,
"step": 653
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4916025996208191,
"beta_dpo/beta_margin_grad_std": 0.014544263482093811,
"beta_dpo/beta_margin_mean": 0.033634938299655914,
"beta_dpo/beta_margin_std": 0.05826953798532486,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.8502531051635742,
"beta_dpo/gap_mean": 45.716835021972656,
"beta_dpo/gap_std": 62.543975830078125,
"beta_dpo/loss_margin_mean": 33.63493728637695,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9886621315192744,
"grad_norm": 4.136869430541992,
"learning_rate": 2.2374433653205016e-10,
"logits/chosen": -2.6694984436035156,
"logits/rejected": -2.724447727203369,
"loss": 1.3572,
"step": 654
},
{
"beta_dpo/beta": 0.005714002996683121,
"beta_dpo/beta_margin_grad_mean": -0.42852216958999634,
"beta_dpo/beta_margin_grad_std": 0.10619087517261505,
"beta_dpo/beta_margin_mean": 0.31733012199401855,
"beta_dpo/beta_margin_std": 0.49290141463279724,
"beta_dpo/beta_used": 0.005714002996683121,
"beta_dpo/beta_used_raw": 0.0007717101834714413,
"beta_dpo/gap_mean": 44.994659423828125,
"beta_dpo/gap_std": 61.005882263183594,
"beta_dpo/loss_margin_mean": 45.900482177734375,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9901738473167044,
"grad_norm": 22.22044563293457,
"learning_rate": 1.7131024761923852e-10,
"logits/chosen": -2.679999589920044,
"logits/rejected": -2.732828140258789,
"loss": 1.1879,
"step": 655
},
{
"beta_dpo/beta": 0.6009721159934998,
"beta_dpo/beta_margin_grad_mean": -0.3375764787197113,
"beta_dpo/beta_margin_grad_std": 0.31433162093162537,
"beta_dpo/beta_margin_mean": 33.633056640625,
"beta_dpo/beta_margin_std": 62.72954177856445,
"beta_dpo/beta_used": 0.6009721159934998,
"beta_dpo/beta_used_raw": 0.2453356385231018,
"beta_dpo/gap_mean": 45.758460998535156,
"beta_dpo/gap_std": 61.461029052734375,
"beta_dpo/loss_margin_mean": 52.54164123535156,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9916855631141346,
"grad_norm": 1435.6373291015625,
"learning_rate": 1.2586440420372934e-10,
"logits/chosen": -2.749436378479004,
"logits/rejected": -2.761258125305176,
"loss": 2.4934,
"step": 656
},
{
"beta_dpo/beta": 0.5924053192138672,
"beta_dpo/beta_margin_grad_mean": -0.10536504536867142,
"beta_dpo/beta_margin_grad_std": 0.27900680899620056,
"beta_dpo/beta_margin_mean": 37.769229888916016,
"beta_dpo/beta_margin_std": 38.588802337646484,
"beta_dpo/beta_used": 0.5924053192138672,
"beta_dpo/beta_used_raw": 0.5924053192138672,
"beta_dpo/gap_mean": 48.455291748046875,
"beta_dpo/gap_std": 62.5181884765625,
"beta_dpo/loss_margin_mean": 64.0851058959961,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9931972789115646,
"grad_norm": 901.1080932617188,
"learning_rate": 8.740807750345913e-11,
"logits/chosen": -2.6236462593078613,
"logits/rejected": -2.6605048179626465,
"loss": 0.882,
"step": 657
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.4894210994243622,
"beta_dpo/beta_margin_grad_std": 0.01632015034556389,
"beta_dpo/beta_margin_mean": 0.04237865284085274,
"beta_dpo/beta_margin_std": 0.06539247184991837,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.7204711437225342,
"beta_dpo/gap_mean": 48.004180908203125,
"beta_dpo/gap_std": 62.23754119873047,
"beta_dpo/loss_margin_mean": 42.3786506652832,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9947089947089947,
"grad_norm": 3.7113711833953857,
"learning_rate": 5.594234322453539e-11,
"logits/chosen": -2.6681385040283203,
"logits/rejected": -2.682823419570923,
"loss": 1.353,
"step": 658
},
{
"beta_dpo/beta": 0.0010000000474974513,
"beta_dpo/beta_margin_grad_mean": -0.49211886525154114,
"beta_dpo/beta_margin_grad_std": 0.015781141817569733,
"beta_dpo/beta_margin_mean": 0.03156294301152229,
"beta_dpo/beta_margin_std": 0.06321074068546295,
"beta_dpo/beta_used": 0.0010000000474974513,
"beta_dpo/beta_used_raw": -0.6728357076644897,
"beta_dpo/gap_mean": 46.467201232910156,
"beta_dpo/gap_std": 62.880088806152344,
"beta_dpo/loss_margin_mean": 31.562942504882812,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9962207105064248,
"grad_norm": 3.7238643169403076,
"learning_rate": 3.146808153123293e-11,
"logits/chosen": -2.6363942623138428,
"logits/rejected": -2.659372568130493,
"loss": 1.3537,
"step": 659
},
{
"beta_dpo/beta": 0.9708003997802734,
"beta_dpo/beta_margin_grad_mean": -0.17687278985977173,
"beta_dpo/beta_margin_grad_std": 0.3737434148788452,
"beta_dpo/beta_margin_mean": 49.68276596069336,
"beta_dpo/beta_margin_std": 61.30149459838867,
"beta_dpo/beta_used": 0.9708003997802734,
"beta_dpo/beta_used_raw": 0.9708003997802734,
"beta_dpo/gap_mean": 45.81683349609375,
"beta_dpo/gap_std": 62.97257995605469,
"beta_dpo/loss_margin_mean": 51.25537872314453,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9977324263038548,
"grad_norm": 2283.9326171875,
"learning_rate": 1.3985977021235829e-11,
"logits/chosen": -2.642078399658203,
"logits/rejected": -2.6817831993103027,
"loss": 3.5476,
"step": 660
},
{
"beta_dpo/beta": 0.07907932996749878,
"beta_dpo/beta_margin_grad_mean": -0.3486056625843048,
"beta_dpo/beta_margin_grad_std": 0.2862682342529297,
"beta_dpo/beta_margin_mean": 3.647887945175171,
"beta_dpo/beta_margin_std": 8.705880165100098,
"beta_dpo/beta_used": 0.07907932996749878,
"beta_dpo/beta_used_raw": -0.3208945393562317,
"beta_dpo/gap_mean": 43.73834991455078,
"beta_dpo/gap_std": 63.08509063720703,
"beta_dpo/loss_margin_mean": 33.91815185546875,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.999244142101285,
"grad_norm": 502.38873291015625,
"learning_rate": 3.4965187065971735e-12,
"logits/chosen": -2.689065933227539,
"logits/rejected": -2.731311559677124,
"loss": 1.2566,
"step": 661
},
{
"epoch": 0.999244142101285,
"step": 661,
"total_flos": 0.0,
"train_loss": 1.3336656033181207,
"train_runtime": 3770.6222,
"train_samples_per_second": 11.228,
"train_steps_per_second": 0.175
}
],
"logging_steps": 1,
"max_steps": 661,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}