Files
llama-3-8b-base-beta-dpo-ul…/trainer_state.json
ModelHub XC 4768635cd7 初始化项目,由ModelHub XC社区提供模型
Model: W-61/llama-3-8b-base-beta-dpo-ultrafeedback-4xh200-batch-128-20260424-044124
Source: Original Platform
2026-05-09 15:51:41 +08:00

6752 lines
234 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9989528795811519,
"eval_steps": 200,
"global_step": 477,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"beta_dpo/beta_used": 0.01011180505156517,
"beta_dpo/beta_used_raw": 0.01011180505156517,
"beta_dpo/gap_mean": -0.015508938580751419,
"beta_dpo/gap_std": 0.2148897498846054,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.0020942408376963353,
"grad_norm": 34.31053161621094,
"learning_rate": 0.0,
"logits/chosen": -0.5995081663131714,
"logits/rejected": -0.6144353747367859,
"loss": 5.5447,
"step": 1
},
{
"beta_dpo/beta_used": 0.009844036772847176,
"beta_dpo/beta_used_raw": 0.009844036772847176,
"beta_dpo/gap_mean": -0.0009143210481852293,
"beta_dpo/gap_std": 0.4510902464389801,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.004188481675392671,
"grad_norm": 29.54327392578125,
"learning_rate": 1.0416666666666666e-08,
"logits/chosen": -0.6431564688682556,
"logits/rejected": -0.5975700616836548,
"loss": 5.5466,
"step": 2
},
{
"beta_dpo/beta_used": 0.010173876769840717,
"beta_dpo/beta_used_raw": 0.010173876769840717,
"beta_dpo/gap_mean": -0.016529276967048645,
"beta_dpo/gap_std": 0.5596910119056702,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.0062827225130890054,
"grad_norm": 29.85909652709961,
"learning_rate": 2.083333333333333e-08,
"logits/chosen": -0.6880007982254028,
"logits/rejected": -0.7442882061004639,
"loss": 5.5438,
"step": 3
},
{
"beta_dpo/beta_used": 0.010584751144051552,
"beta_dpo/beta_used_raw": 0.010584751144051552,
"beta_dpo/gap_mean": -0.009412091225385666,
"beta_dpo/gap_std": 0.690794050693512,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.008376963350785341,
"grad_norm": 38.64099884033203,
"learning_rate": 3.125e-08,
"logits/chosen": -0.6261060833930969,
"logits/rejected": -0.5069095492362976,
"loss": 5.5411,
"step": 4
},
{
"beta_dpo/beta_used": 0.009799078106880188,
"beta_dpo/beta_used_raw": 0.009799078106880188,
"beta_dpo/gap_mean": 0.02601781114935875,
"beta_dpo/gap_std": 0.7904683947563171,
"beta_dpo/mask_keep_frac": 0.9375,
"epoch": 0.010471204188481676,
"grad_norm": 36.012081146240234,
"learning_rate": 4.166666666666666e-08,
"logits/chosen": -0.5312447547912598,
"logits/rejected": -0.5814427137374878,
"loss": 5.5449,
"step": 5
},
{
"beta_dpo/beta_used": 0.009586527943611145,
"beta_dpo/beta_used_raw": 0.009586527943611145,
"beta_dpo/gap_mean": 0.041127197444438934,
"beta_dpo/gap_std": 0.8036903738975525,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.012565445026178011,
"grad_norm": 30.233118057250977,
"learning_rate": 5.208333333333333e-08,
"logits/chosen": -0.6583905220031738,
"logits/rejected": -0.656255304813385,
"loss": 5.5456,
"step": 6
},
{
"beta_dpo/beta_used": 0.010109594091773033,
"beta_dpo/beta_used_raw": 0.010109594091773033,
"beta_dpo/gap_mean": 0.05177360400557518,
"beta_dpo/gap_std": 0.7368500232696533,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.014659685863874346,
"grad_norm": 33.09341812133789,
"learning_rate": 6.25e-08,
"logits/chosen": -0.5148481726646423,
"logits/rejected": -0.5897587537765503,
"loss": 5.5416,
"step": 7
},
{
"beta_dpo/beta_used": 0.010191082023084164,
"beta_dpo/beta_used_raw": 0.010191082023084164,
"beta_dpo/gap_mean": 0.01677882857620716,
"beta_dpo/gap_std": 0.7229223847389221,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.016753926701570682,
"grad_norm": 35.61125564575195,
"learning_rate": 7.291666666666667e-08,
"logits/chosen": -0.7006567716598511,
"logits/rejected": -0.7195206880569458,
"loss": 5.5429,
"step": 8
},
{
"beta_dpo/beta_used": 0.009976114146411419,
"beta_dpo/beta_used_raw": 0.009976114146411419,
"beta_dpo/gap_mean": 0.020590361207723618,
"beta_dpo/gap_std": 0.7182962894439697,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.018848167539267015,
"grad_norm": 28.307985305786133,
"learning_rate": 8.333333333333333e-08,
"logits/chosen": -0.6901550889015198,
"logits/rejected": -0.6974665522575378,
"loss": 5.5439,
"step": 9
},
{
"beta_dpo/beta_used": 0.009834789671003819,
"beta_dpo/beta_used_raw": 0.009834789671003819,
"beta_dpo/gap_mean": 0.01076302770525217,
"beta_dpo/gap_std": 0.699016809463501,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.020942408376963352,
"grad_norm": 28.891916275024414,
"learning_rate": 9.375e-08,
"logits/chosen": -0.6282883882522583,
"logits/rejected": -0.6301394701004028,
"loss": 5.5458,
"step": 10
},
{
"beta_dpo/beta_used": 0.009896289557218552,
"beta_dpo/beta_used_raw": 0.009896289557218552,
"beta_dpo/gap_mean": -0.03149949014186859,
"beta_dpo/gap_std": 0.6834414005279541,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.023036649214659685,
"grad_norm": 33.830101013183594,
"learning_rate": 1.0416666666666667e-07,
"logits/chosen": -0.5225973129272461,
"logits/rejected": -0.6075971126556396,
"loss": 5.5463,
"step": 11
},
{
"beta_dpo/beta_used": 0.010411511175334454,
"beta_dpo/beta_used_raw": 0.010411511175334454,
"beta_dpo/gap_mean": 0.003659537062048912,
"beta_dpo/gap_std": 0.6871599555015564,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.025130890052356022,
"grad_norm": 35.04637145996094,
"learning_rate": 1.1458333333333332e-07,
"logits/chosen": -0.6008322834968567,
"logits/rejected": -0.5699715614318848,
"loss": 5.5394,
"step": 12
},
{
"beta_dpo/beta_used": 0.009875054471194744,
"beta_dpo/beta_used_raw": 0.009875054471194744,
"beta_dpo/gap_mean": 0.05279437080025673,
"beta_dpo/gap_std": 0.6677561402320862,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.027225130890052355,
"grad_norm": 31.19098472595215,
"learning_rate": 1.25e-07,
"logits/chosen": -0.7021859288215637,
"logits/rejected": -0.6853169202804565,
"loss": 5.5435,
"step": 13
},
{
"beta_dpo/beta_used": 0.009974612854421139,
"beta_dpo/beta_used_raw": 0.009974612854421139,
"beta_dpo/gap_mean": 0.024167632684111595,
"beta_dpo/gap_std": 0.6448996663093567,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.02931937172774869,
"grad_norm": 31.935443878173828,
"learning_rate": 1.3541666666666666e-07,
"logits/chosen": -0.5705533027648926,
"logits/rejected": -0.6388446688652039,
"loss": 5.5451,
"step": 14
},
{
"beta_dpo/beta_used": 0.010165886022150517,
"beta_dpo/beta_used_raw": 0.010165886022150517,
"beta_dpo/gap_mean": 0.050552304834127426,
"beta_dpo/gap_std": 0.682822585105896,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.031413612565445025,
"grad_norm": 35.0179443359375,
"learning_rate": 1.4583333333333335e-07,
"logits/chosen": -0.6065237522125244,
"logits/rejected": -0.6314604878425598,
"loss": 5.5405,
"step": 15
},
{
"beta_dpo/beta_used": 0.009956092573702335,
"beta_dpo/beta_used_raw": 0.009956092573702335,
"beta_dpo/gap_mean": 0.07386220246553421,
"beta_dpo/gap_std": 0.705920934677124,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.033507853403141365,
"grad_norm": 33.774627685546875,
"learning_rate": 1.5624999999999999e-07,
"logits/chosen": -0.6334318518638611,
"logits/rejected": -0.6558720469474792,
"loss": 5.5422,
"step": 16
},
{
"beta_dpo/beta_used": 0.009694953449070454,
"beta_dpo/beta_used_raw": 0.009694953449070454,
"beta_dpo/gap_mean": 0.004169606603682041,
"beta_dpo/gap_std": 0.7264626622200012,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.0356020942408377,
"grad_norm": 33.06454086303711,
"learning_rate": 1.6666666666666665e-07,
"logits/chosen": -0.5417214632034302,
"logits/rejected": -0.5611686110496521,
"loss": 5.5471,
"step": 17
},
{
"beta_dpo/beta_used": 0.010714413598179817,
"beta_dpo/beta_used_raw": 0.010714413598179817,
"beta_dpo/gap_mean": 0.02533562108874321,
"beta_dpo/gap_std": 0.7237865924835205,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.03769633507853403,
"grad_norm": 37.169307708740234,
"learning_rate": 1.7708333333333334e-07,
"logits/chosen": -0.4865175485610962,
"logits/rejected": -0.5460414886474609,
"loss": 5.5388,
"step": 18
},
{
"beta_dpo/beta_used": 0.01004834845662117,
"beta_dpo/beta_used_raw": 0.01004834845662117,
"beta_dpo/gap_mean": 0.029139002785086632,
"beta_dpo/gap_std": 0.7092792987823486,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.039790575916230364,
"grad_norm": 33.33395004272461,
"learning_rate": 1.875e-07,
"logits/chosen": -0.639908492565155,
"logits/rejected": -0.6775057315826416,
"loss": 5.5437,
"step": 19
},
{
"beta_dpo/beta_used": 0.009934858419001102,
"beta_dpo/beta_used_raw": 0.009934858419001102,
"beta_dpo/gap_mean": 0.03032633848488331,
"beta_dpo/gap_std": 0.6968315839767456,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.041884816753926704,
"grad_norm": 32.22944259643555,
"learning_rate": 1.9791666666666664e-07,
"logits/chosen": -0.658079206943512,
"logits/rejected": -0.6970005631446838,
"loss": 5.5448,
"step": 20
},
{
"beta_dpo/beta_used": 0.010048963129520416,
"beta_dpo/beta_used_raw": 0.010048963129520416,
"beta_dpo/gap_mean": 0.06978250294923782,
"beta_dpo/gap_std": 0.7305155992507935,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.04397905759162304,
"grad_norm": 31.048315048217773,
"learning_rate": 2.0833333333333333e-07,
"logits/chosen": -0.6539341807365417,
"logits/rejected": -0.6931516528129578,
"loss": 5.5406,
"step": 21
},
{
"beta_dpo/beta_used": 0.009562183171510696,
"beta_dpo/beta_used_raw": 0.009562183171510696,
"beta_dpo/gap_mean": 0.05501282587647438,
"beta_dpo/gap_std": 0.7383480072021484,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.04607329842931937,
"grad_norm": 27.25322723388672,
"learning_rate": 2.1875e-07,
"logits/chosen": -0.6295111775398254,
"logits/rejected": -0.6111897230148315,
"loss": 5.546,
"step": 22
},
{
"beta_dpo/beta_used": 0.009907824918627739,
"beta_dpo/beta_used_raw": 0.009907824918627739,
"beta_dpo/gap_mean": 0.08610469102859497,
"beta_dpo/gap_std": 0.7474377751350403,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.048167539267015703,
"grad_norm": 32.43076705932617,
"learning_rate": 2.2916666666666663e-07,
"logits/chosen": -0.6042340397834778,
"logits/rejected": -0.6491126418113708,
"loss": 5.5425,
"step": 23
},
{
"beta_dpo/beta_used": 0.010367114096879959,
"beta_dpo/beta_used_raw": 0.010367114096879959,
"beta_dpo/gap_mean": 0.154057115316391,
"beta_dpo/gap_std": 0.7526560425758362,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.050261780104712044,
"grad_norm": 33.83905029296875,
"learning_rate": 2.3958333333333335e-07,
"logits/chosen": -0.5649707317352295,
"logits/rejected": -0.42430925369262695,
"loss": 5.5343,
"step": 24
},
{
"beta_dpo/beta_used": 0.010158861055970192,
"beta_dpo/beta_used_raw": 0.010158861055970192,
"beta_dpo/gap_mean": 0.19064763188362122,
"beta_dpo/gap_std": 0.7487001419067383,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.05235602094240838,
"grad_norm": 31.9135684967041,
"learning_rate": 2.5e-07,
"logits/chosen": -0.4519118368625641,
"logits/rejected": -0.46168017387390137,
"loss": 5.5363,
"step": 25
},
{
"beta_dpo/beta_used": 0.00981106236577034,
"beta_dpo/beta_used_raw": 0.00981106236577034,
"beta_dpo/gap_mean": 0.15975670516490936,
"beta_dpo/gap_std": 0.8194867968559265,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.05445026178010471,
"grad_norm": 31.96481704711914,
"learning_rate": 2.604166666666667e-07,
"logits/chosen": -0.7263092398643494,
"logits/rejected": -0.733163058757782,
"loss": 5.5392,
"step": 26
},
{
"beta_dpo/beta_used": 0.010345407761633396,
"beta_dpo/beta_used_raw": 0.010345407761633396,
"beta_dpo/gap_mean": 0.17174594104290009,
"beta_dpo/gap_std": 0.8231180310249329,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.05654450261780105,
"grad_norm": 32.44462966918945,
"learning_rate": 2.708333333333333e-07,
"logits/chosen": -0.6372715830802917,
"logits/rejected": -0.6687661409378052,
"loss": 5.5353,
"step": 27
},
{
"beta_dpo/beta_used": 0.009617293253540993,
"beta_dpo/beta_used_raw": 0.009617293253540993,
"beta_dpo/gap_mean": 0.17203421890735626,
"beta_dpo/gap_std": 0.8581656217575073,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.05863874345549738,
"grad_norm": 27.413312911987305,
"learning_rate": 2.8125e-07,
"logits/chosen": -0.6947358250617981,
"logits/rejected": -0.6780796647071838,
"loss": 5.5409,
"step": 28
},
{
"beta_dpo/beta_used": 0.009752588346600533,
"beta_dpo/beta_used_raw": 0.009752588346600533,
"beta_dpo/gap_mean": 0.170832097530365,
"beta_dpo/gap_std": 0.8217583298683167,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.060732984293193716,
"grad_norm": 34.61642837524414,
"learning_rate": 2.916666666666667e-07,
"logits/chosen": -0.6086971163749695,
"logits/rejected": -0.5876795649528503,
"loss": 5.5375,
"step": 29
},
{
"beta_dpo/beta_used": 0.009866783395409584,
"beta_dpo/beta_used_raw": 0.009866783395409584,
"beta_dpo/gap_mean": 0.19807885587215424,
"beta_dpo/gap_std": 0.8146649599075317,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.06282722513089005,
"grad_norm": 31.527999877929688,
"learning_rate": 3.020833333333333e-07,
"logits/chosen": -0.5960394144058228,
"logits/rejected": -0.5833207964897156,
"loss": 5.539,
"step": 30
},
{
"beta_dpo/beta_used": 0.0096372589468956,
"beta_dpo/beta_used_raw": 0.0096372589468956,
"beta_dpo/gap_mean": 0.22884601354599,
"beta_dpo/gap_std": 0.8796005249023438,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.06492146596858639,
"grad_norm": 26.304962158203125,
"learning_rate": 3.1249999999999997e-07,
"logits/chosen": -0.5981181859970093,
"logits/rejected": -0.6432889103889465,
"loss": 5.5367,
"step": 31
},
{
"beta_dpo/beta_used": 0.010047816671431065,
"beta_dpo/beta_used_raw": 0.010047816671431065,
"beta_dpo/gap_mean": 0.2255675345659256,
"beta_dpo/gap_std": 0.9126529097557068,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.06701570680628273,
"grad_norm": 30.972850799560547,
"learning_rate": 3.2291666666666666e-07,
"logits/chosen": -0.6324287056922913,
"logits/rejected": -0.6502685546875,
"loss": 5.5354,
"step": 32
},
{
"beta_dpo/beta_used": 0.010098990052938461,
"beta_dpo/beta_used_raw": 0.010098990052938461,
"beta_dpo/gap_mean": 0.32231834530830383,
"beta_dpo/gap_std": 0.9891802072525024,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.06910994764397906,
"grad_norm": 31.14387321472168,
"learning_rate": 3.333333333333333e-07,
"logits/chosen": -0.5357920527458191,
"logits/rejected": -0.6322364211082458,
"loss": 5.5305,
"step": 33
},
{
"beta_dpo/beta_used": 0.009454782120883465,
"beta_dpo/beta_used_raw": 0.009454782120883465,
"beta_dpo/gap_mean": 0.44986480474472046,
"beta_dpo/gap_std": 1.0094612836837769,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.0712041884816754,
"grad_norm": 32.375919342041016,
"learning_rate": 3.4375e-07,
"logits/chosen": -0.7248749136924744,
"logits/rejected": -0.7035080194473267,
"loss": 5.5304,
"step": 34
},
{
"beta_dpo/beta_used": 0.009926512837409973,
"beta_dpo/beta_used_raw": 0.009926512837409973,
"beta_dpo/gap_mean": 0.4365549683570862,
"beta_dpo/gap_std": 1.0834380388259888,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.07329842931937172,
"grad_norm": 30.43588638305664,
"learning_rate": 3.541666666666667e-07,
"logits/chosen": -0.6929864287376404,
"logits/rejected": -0.6378797888755798,
"loss": 5.5286,
"step": 35
},
{
"beta_dpo/beta_used": 0.009561766870319843,
"beta_dpo/beta_used_raw": 0.009561766870319843,
"beta_dpo/gap_mean": 0.49735885858535767,
"beta_dpo/gap_std": 1.1678481101989746,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.07539267015706806,
"grad_norm": 31.427370071411133,
"learning_rate": 3.645833333333333e-07,
"logits/chosen": -0.5668917298316956,
"logits/rejected": -0.6229207515716553,
"loss": 5.528,
"step": 36
},
{
"beta_dpo/beta_used": 0.010987182147800922,
"beta_dpo/beta_used_raw": 0.010987182147800922,
"beta_dpo/gap_mean": 0.507057249546051,
"beta_dpo/gap_std": 1.272064447402954,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.0774869109947644,
"grad_norm": 41.305686950683594,
"learning_rate": 3.75e-07,
"logits/chosen": -0.5756943225860596,
"logits/rejected": -0.6139695048332214,
"loss": 5.5141,
"step": 37
},
{
"beta_dpo/beta_used": 0.010229920968413353,
"beta_dpo/beta_used_raw": 0.010229920968413353,
"beta_dpo/gap_mean": 0.4955774247646332,
"beta_dpo/gap_std": 1.377665400505066,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.07958115183246073,
"grad_norm": 36.85758972167969,
"learning_rate": 3.8541666666666665e-07,
"logits/chosen": -0.5929851531982422,
"logits/rejected": -0.5943086743354797,
"loss": 5.5237,
"step": 38
},
{
"beta_dpo/beta_used": 0.010049818083643913,
"beta_dpo/beta_used_raw": 0.010049818083643913,
"beta_dpo/gap_mean": 0.7315759062767029,
"beta_dpo/gap_std": 1.3812720775604248,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.08167539267015707,
"grad_norm": 33.5423469543457,
"learning_rate": 3.958333333333333e-07,
"logits/chosen": -0.5810495018959045,
"logits/rejected": -0.5888175964355469,
"loss": 5.511,
"step": 39
},
{
"beta_dpo/beta_used": 0.008092176169157028,
"beta_dpo/beta_used_raw": 0.008092176169157028,
"beta_dpo/gap_mean": 0.7477964162826538,
"beta_dpo/gap_std": 1.5241725444793701,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.08376963350785341,
"grad_norm": 24.289613723754883,
"learning_rate": 4.0625e-07,
"logits/chosen": -0.6579009890556335,
"logits/rejected": -0.7191402316093445,
"loss": 5.5302,
"step": 40
},
{
"beta_dpo/beta_used": 0.009270838461816311,
"beta_dpo/beta_used_raw": 0.009270838461816311,
"beta_dpo/gap_mean": 0.7307737469673157,
"beta_dpo/gap_std": 1.632360577583313,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.08586387434554973,
"grad_norm": 30.218177795410156,
"learning_rate": 4.1666666666666667e-07,
"logits/chosen": -0.5917030572891235,
"logits/rejected": -0.668786346912384,
"loss": 5.5194,
"step": 41
},
{
"beta_dpo/beta_used": 0.00970252975821495,
"beta_dpo/beta_used_raw": 0.00970252975821495,
"beta_dpo/gap_mean": 0.8179957270622253,
"beta_dpo/gap_std": 1.7464549541473389,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.08795811518324607,
"grad_norm": 28.182844161987305,
"learning_rate": 4.270833333333333e-07,
"logits/chosen": -0.6131463050842285,
"logits/rejected": -0.6607965230941772,
"loss": 5.5155,
"step": 42
},
{
"beta_dpo/beta_used": 0.011301547288894653,
"beta_dpo/beta_used_raw": 0.011301547288894653,
"beta_dpo/gap_mean": 0.8352429270744324,
"beta_dpo/gap_std": 1.9265403747558594,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.09005235602094241,
"grad_norm": 39.69644546508789,
"learning_rate": 4.375e-07,
"logits/chosen": -0.5696575045585632,
"logits/rejected": -0.5967999696731567,
"loss": 5.4945,
"step": 43
},
{
"beta_dpo/beta_used": 0.011869620531797409,
"beta_dpo/beta_used_raw": 0.011869620531797409,
"beta_dpo/gap_mean": 0.9845832586288452,
"beta_dpo/gap_std": 2.1420016288757324,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.09214659685863874,
"grad_norm": 44.15154266357422,
"learning_rate": 4.479166666666667e-07,
"logits/chosen": -0.5550575852394104,
"logits/rejected": -0.6399248838424683,
"loss": 5.478,
"step": 44
},
{
"beta_dpo/beta_used": 0.009358462877571583,
"beta_dpo/beta_used_raw": 0.009358462877571583,
"beta_dpo/gap_mean": 1.1377849578857422,
"beta_dpo/gap_std": 2.3049428462982178,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.09424083769633508,
"grad_norm": 34.14745330810547,
"learning_rate": 4.5833333333333327e-07,
"logits/chosen": -0.685950756072998,
"logits/rejected": -0.7422507405281067,
"loss": 5.4987,
"step": 45
},
{
"beta_dpo/beta_used": 0.009525664150714874,
"beta_dpo/beta_used_raw": 0.009525664150714874,
"beta_dpo/gap_mean": 1.1683762073516846,
"beta_dpo/gap_std": 2.3120195865631104,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.09633507853403141,
"grad_norm": 31.92166519165039,
"learning_rate": 4.6874999999999996e-07,
"logits/chosen": -0.6844733357429504,
"logits/rejected": -0.6822009682655334,
"loss": 5.5016,
"step": 46
},
{
"beta_dpo/beta_used": 0.008399980142712593,
"beta_dpo/beta_used_raw": 0.008399980142712593,
"beta_dpo/gap_mean": 1.1559507846832275,
"beta_dpo/gap_std": 2.4187076091766357,
"beta_dpo/mask_keep_frac": 0.59375,
"epoch": 0.09842931937172775,
"grad_norm": 26.383420944213867,
"learning_rate": 4.791666666666667e-07,
"logits/chosen": -0.6458744406700134,
"logits/rejected": -0.6522045135498047,
"loss": 5.5085,
"step": 47
},
{
"beta_dpo/beta_used": 0.007347858510911465,
"beta_dpo/beta_used_raw": 0.007347858510911465,
"beta_dpo/gap_mean": 1.0993822813034058,
"beta_dpo/gap_std": 2.6655614376068115,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.10052356020942409,
"grad_norm": 22.220972061157227,
"learning_rate": 4.895833333333333e-07,
"logits/chosen": -0.5958544611930847,
"logits/rejected": -0.6661175489425659,
"loss": 5.5207,
"step": 48
},
{
"beta_dpo/beta_used": 0.008892661891877651,
"beta_dpo/beta_used_raw": 0.008892661891877651,
"beta_dpo/gap_mean": 1.1662849187850952,
"beta_dpo/gap_std": 2.745657205581665,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.10261780104712041,
"grad_norm": 33.02886962890625,
"learning_rate": 5e-07,
"logits/chosen": -0.644591212272644,
"logits/rejected": -0.6800640225410461,
"loss": 5.5039,
"step": 49
},
{
"beta_dpo/beta_used": 0.008311200886964798,
"beta_dpo/beta_used_raw": 0.008311200886964798,
"beta_dpo/gap_mean": 1.091849684715271,
"beta_dpo/gap_std": 2.904430866241455,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.10471204188481675,
"grad_norm": 29.243675231933594,
"learning_rate": 4.999932966293553e-07,
"logits/chosen": -0.6915316581726074,
"logits/rejected": -0.6876245737075806,
"loss": 5.5135,
"step": 50
},
{
"beta_dpo/beta_used": 0.012040691450238228,
"beta_dpo/beta_used_raw": 0.012040691450238228,
"beta_dpo/gap_mean": 1.3487975597381592,
"beta_dpo/gap_std": 3.2586777210235596,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.1068062827225131,
"grad_norm": 53.16353988647461,
"learning_rate": 4.999731868769026e-07,
"logits/chosen": -0.6590722799301147,
"logits/rejected": -0.6033743619918823,
"loss": 5.4366,
"step": 51
},
{
"beta_dpo/beta_used": 0.011686221696436405,
"beta_dpo/beta_used_raw": 0.011686221696436405,
"beta_dpo/gap_mean": 1.7514865398406982,
"beta_dpo/gap_std": 3.606762647628784,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.10890052356020942,
"grad_norm": 41.719051361083984,
"learning_rate": 4.99939671821067e-07,
"logits/chosen": -0.6899721622467041,
"logits/rejected": -0.6855327486991882,
"loss": 5.4423,
"step": 52
},
{
"beta_dpo/beta_used": 0.014263564720749855,
"beta_dpo/beta_used_raw": 0.014263564720749855,
"beta_dpo/gap_mean": 1.7108714580535889,
"beta_dpo/gap_std": 3.8523051738739014,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.11099476439790576,
"grad_norm": 56.673248291015625,
"learning_rate": 4.998927532591591e-07,
"logits/chosen": -0.7215074300765991,
"logits/rejected": -0.6849179863929749,
"loss": 5.3767,
"step": 53
},
{
"beta_dpo/beta_used": 0.008228869177401066,
"beta_dpo/beta_used_raw": 0.006880041211843491,
"beta_dpo/gap_mean": 1.9823561906814575,
"beta_dpo/gap_std": 4.244045734405518,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.1130890052356021,
"grad_norm": 27.707256317138672,
"learning_rate": 4.998324337072792e-07,
"logits/chosen": -0.6958556175231934,
"logits/rejected": -0.7273838520050049,
"loss": 5.4762,
"step": 54
},
{
"beta_dpo/beta_used": 0.009436525404453278,
"beta_dpo/beta_used_raw": 0.009436525404453278,
"beta_dpo/gap_mean": 1.670468807220459,
"beta_dpo/gap_std": 4.168619155883789,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.11518324607329843,
"grad_norm": 32.20229721069336,
"learning_rate": 4.997587164001815e-07,
"logits/chosen": -0.61600261926651,
"logits/rejected": -0.6316042542457581,
"loss": 5.4736,
"step": 55
},
{
"beta_dpo/beta_used": 0.012474480085074902,
"beta_dpo/beta_used_raw": 0.012474480085074902,
"beta_dpo/gap_mean": 1.908013939857483,
"beta_dpo/gap_std": 4.524105072021484,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.11727748691099477,
"grad_norm": 46.529991149902344,
"learning_rate": 4.996716052911017e-07,
"logits/chosen": -0.5789837837219238,
"logits/rejected": -0.6456868052482605,
"loss": 5.3918,
"step": 56
},
{
"beta_dpo/beta_used": 0.010171854868531227,
"beta_dpo/beta_used_raw": 0.010171854868531227,
"beta_dpo/gap_mean": 2.9082393646240234,
"beta_dpo/gap_std": 4.872549057006836,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.1193717277486911,
"grad_norm": 36.80557632446289,
"learning_rate": 4.99571105051544e-07,
"logits/chosen": -0.7390983700752258,
"logits/rejected": -0.7615019679069519,
"loss": 5.4256,
"step": 57
},
{
"beta_dpo/beta_used": 0.008224893361330032,
"beta_dpo/beta_used_raw": 0.007648976054042578,
"beta_dpo/gap_mean": 2.4738152027130127,
"beta_dpo/gap_std": 4.7731475830078125,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.12146596858638743,
"grad_norm": 29.444232940673828,
"learning_rate": 4.994572210710314e-07,
"logits/chosen": -0.5107704401016235,
"logits/rejected": -0.5453117489814758,
"loss": 5.4671,
"step": 58
},
{
"beta_dpo/beta_used": 0.004772379528731108,
"beta_dpo/beta_used_raw": 0.0037195575423538685,
"beta_dpo/gap_mean": 2.086270570755005,
"beta_dpo/gap_std": 5.413858413696289,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.12356020942408377,
"grad_norm": 18.721179962158203,
"learning_rate": 4.993299594568162e-07,
"logits/chosen": -0.4708081781864166,
"logits/rejected": -0.5131938457489014,
"loss": 5.513,
"step": 59
},
{
"beta_dpo/beta_used": 0.013422971591353416,
"beta_dpo/beta_used_raw": 0.013422971591353416,
"beta_dpo/gap_mean": 1.9685330390930176,
"beta_dpo/gap_std": 5.735987663269043,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.1256544502617801,
"grad_norm": 41.163246154785156,
"learning_rate": 4.991893270335525e-07,
"logits/chosen": -0.7342594861984253,
"logits/rejected": -0.7558184266090393,
"loss": 5.3913,
"step": 60
},
{
"beta_dpo/beta_used": 0.01401711255311966,
"beta_dpo/beta_used_raw": 0.013643806800246239,
"beta_dpo/gap_mean": 2.660452127456665,
"beta_dpo/gap_std": 6.109948635101318,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.12774869109947645,
"grad_norm": 64.77427673339844,
"learning_rate": 4.990353313429303e-07,
"logits/chosen": -0.7148650288581848,
"logits/rejected": -0.6945707201957703,
"loss": 5.3027,
"step": 61
},
{
"beta_dpo/beta_used": 0.00857143197208643,
"beta_dpo/beta_used_raw": 0.007453832309693098,
"beta_dpo/gap_mean": 2.698399543762207,
"beta_dpo/gap_std": 6.293516159057617,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.12984293193717278,
"grad_norm": 32.562408447265625,
"learning_rate": 4.988679806432711e-07,
"logits/chosen": -0.605577826499939,
"logits/rejected": -0.636237621307373,
"loss": 5.4644,
"step": 62
},
{
"beta_dpo/beta_used": 0.010178687050938606,
"beta_dpo/beta_used_raw": 0.009732894599437714,
"beta_dpo/gap_mean": 2.676795721054077,
"beta_dpo/gap_std": 6.444081783294678,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.1319371727748691,
"grad_norm": 40.96943664550781,
"learning_rate": 4.986872839090852e-07,
"logits/chosen": -0.7076640129089355,
"logits/rejected": -0.6968494653701782,
"loss": 5.3937,
"step": 63
},
{
"beta_dpo/beta_used": 0.013196549378335476,
"beta_dpo/beta_used_raw": 0.013196549378335476,
"beta_dpo/gap_mean": 3.017939567565918,
"beta_dpo/gap_std": 6.541075229644775,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.13403141361256546,
"grad_norm": 50.38563919067383,
"learning_rate": 4.9849325083059e-07,
"logits/chosen": -0.6310144662857056,
"logits/rejected": -0.623473048210144,
"loss": 5.3114,
"step": 64
},
{
"beta_dpo/beta_used": 0.00806832779198885,
"beta_dpo/beta_used_raw": 0.007314560003578663,
"beta_dpo/gap_mean": 3.0990614891052246,
"beta_dpo/gap_std": 6.7054572105407715,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.13612565445026178,
"grad_norm": 30.428876876831055,
"learning_rate": 4.982858918131906e-07,
"logits/chosen": -0.703696608543396,
"logits/rejected": -0.7108103632926941,
"loss": 5.4262,
"step": 65
},
{
"beta_dpo/beta_used": 0.00982650276273489,
"beta_dpo/beta_used_raw": 0.00982650276273489,
"beta_dpo/gap_mean": 3.185175657272339,
"beta_dpo/gap_std": 7.470331192016602,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.1382198952879581,
"grad_norm": 40.387332916259766,
"learning_rate": 4.980652179769217e-07,
"logits/chosen": -0.7345768809318542,
"logits/rejected": -0.7284728288650513,
"loss": 5.3961,
"step": 66
},
{
"beta_dpo/beta_used": 0.010977521538734436,
"beta_dpo/beta_used_raw": 0.010384490713477135,
"beta_dpo/gap_mean": 3.2669320106506348,
"beta_dpo/gap_std": 7.810610294342041,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.14031413612565444,
"grad_norm": 43.06589889526367,
"learning_rate": 4.978312411558517e-07,
"logits/chosen": -0.7585128545761108,
"logits/rejected": -0.7754156589508057,
"loss": 5.3403,
"step": 67
},
{
"beta_dpo/beta_used": 0.006959153804928064,
"beta_dpo/beta_used_raw": 0.0068751610815525055,
"beta_dpo/gap_mean": 3.3277812004089355,
"beta_dpo/gap_std": 8.508169174194336,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.1424083769633508,
"grad_norm": 28.538686752319336,
"learning_rate": 4.975839738974473e-07,
"logits/chosen": -0.7104411125183105,
"logits/rejected": -0.7601235508918762,
"loss": 5.4432,
"step": 68
},
{
"beta_dpo/beta_used": 0.014520850963890553,
"beta_dpo/beta_used_raw": 0.013226198963820934,
"beta_dpo/gap_mean": 4.106817245483398,
"beta_dpo/gap_std": 8.52523422241211,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.14450261780104712,
"grad_norm": 54.57806396484375,
"learning_rate": 4.97323429461901e-07,
"logits/chosen": -0.7267593741416931,
"logits/rejected": -0.7121102809906006,
"loss": 5.1806,
"step": 69
},
{
"beta_dpo/beta_used": 0.012321692891418934,
"beta_dpo/beta_used_raw": 0.010563489980995655,
"beta_dpo/gap_mean": 4.343552112579346,
"beta_dpo/gap_std": 9.016190528869629,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.14659685863874344,
"grad_norm": 43.94473648071289,
"learning_rate": 4.970496218214204e-07,
"logits/chosen": -0.7021334171295166,
"logits/rejected": -0.7124741673469543,
"loss": 5.2922,
"step": 70
},
{
"beta_dpo/beta_used": 0.012451138347387314,
"beta_dpo/beta_used_raw": 0.011505233123898506,
"beta_dpo/gap_mean": 4.387954235076904,
"beta_dpo/gap_std": 9.844895362854004,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.1486910994764398,
"grad_norm": 44.80491638183594,
"learning_rate": 4.967625656594781e-07,
"logits/chosen": -0.61981600522995,
"logits/rejected": -0.5610257387161255,
"loss": 5.2723,
"step": 71
},
{
"beta_dpo/beta_used": 0.0142544936388731,
"beta_dpo/beta_used_raw": 0.010479929856956005,
"beta_dpo/gap_mean": 4.6102423667907715,
"beta_dpo/gap_std": 9.631770133972168,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.15078534031413612,
"grad_norm": 50.041893005371094,
"learning_rate": 4.964622763700252e-07,
"logits/chosen": -0.6500818729400635,
"logits/rejected": -0.6521684527397156,
"loss": 5.2495,
"step": 72
},
{
"beta_dpo/beta_used": 0.00896795466542244,
"beta_dpo/beta_used_raw": 0.006745354738086462,
"beta_dpo/gap_mean": 4.212050437927246,
"beta_dpo/gap_std": 10.10843276977539,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.15287958115183245,
"grad_norm": 41.38795471191406,
"learning_rate": 4.961487700566646e-07,
"logits/chosen": -0.6939983367919922,
"logits/rejected": -0.750190019607544,
"loss": 5.3721,
"step": 73
},
{
"beta_dpo/beta_used": 0.008795595727860928,
"beta_dpo/beta_used_raw": 0.0059730554930865765,
"beta_dpo/gap_mean": 3.9412529468536377,
"beta_dpo/gap_std": 10.357192039489746,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.1549738219895288,
"grad_norm": 50.13154220581055,
"learning_rate": 4.958220635317885e-07,
"logits/chosen": -0.7438157200813293,
"logits/rejected": -0.7368298768997192,
"loss": 5.3539,
"step": 74
},
{
"beta_dpo/beta_used": 0.018827691674232483,
"beta_dpo/beta_used_raw": 0.018391240388154984,
"beta_dpo/gap_mean": 4.437331199645996,
"beta_dpo/gap_std": 10.493773460388184,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.15706806282722513,
"grad_norm": 66.55812072753906,
"learning_rate": 4.954821743156767e-07,
"logits/chosen": -0.6884775757789612,
"logits/rejected": -0.6879805326461792,
"loss": 5.077,
"step": 75
},
{
"beta_dpo/beta_used": 0.008767299354076385,
"beta_dpo/beta_used_raw": 0.004196059890091419,
"beta_dpo/gap_mean": 5.429379463195801,
"beta_dpo/gap_std": 10.738119125366211,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.15916230366492146,
"grad_norm": 39.24678421020508,
"learning_rate": 4.951291206355559e-07,
"logits/chosen": -0.6956934332847595,
"logits/rejected": -0.7201342582702637,
"loss": 5.33,
"step": 76
},
{
"beta_dpo/beta_used": 0.005481656640768051,
"beta_dpo/beta_used_raw": 0.001297416165471077,
"beta_dpo/gap_mean": 5.060276031494141,
"beta_dpo/gap_std": 11.49527359008789,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.1612565445026178,
"grad_norm": 26.497804641723633,
"learning_rate": 4.947629214246236e-07,
"logits/chosen": -0.5094698071479797,
"logits/rejected": -0.5404853820800781,
"loss": 5.2853,
"step": 77
},
{
"beta_dpo/beta_used": 0.020722726359963417,
"beta_dpo/beta_used_raw": 0.020722726359963417,
"beta_dpo/gap_mean": 5.881702899932861,
"beta_dpo/gap_std": 12.785398483276367,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.16335078534031414,
"grad_norm": 74.64260864257812,
"learning_rate": 4.943835963210323e-07,
"logits/chosen": -0.7535753846168518,
"logits/rejected": -0.6771411895751953,
"loss": 4.8878,
"step": 78
},
{
"beta_dpo/beta_used": 0.00859091617166996,
"beta_dpo/beta_used_raw": 0.003717180108651519,
"beta_dpo/gap_mean": 6.6240081787109375,
"beta_dpo/gap_std": 12.910642623901367,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.16544502617801046,
"grad_norm": 35.39107131958008,
"learning_rate": 4.939911656668361e-07,
"logits/chosen": -0.6833164691925049,
"logits/rejected": -0.6924921274185181,
"loss": 5.2913,
"step": 79
},
{
"beta_dpo/beta_used": 0.01494982186704874,
"beta_dpo/beta_used_raw": 0.011456114239990711,
"beta_dpo/gap_mean": 5.857873916625977,
"beta_dpo/gap_std": 13.087008476257324,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.16753926701570682,
"grad_norm": 63.859745025634766,
"learning_rate": 4.935856505068998e-07,
"logits/chosen": -0.6719616055488586,
"logits/rejected": -0.6523293852806091,
"loss": 5.0917,
"step": 80
},
{
"beta_dpo/beta_used": 0.009839367121458054,
"beta_dpo/beta_used_raw": 0.005198465194553137,
"beta_dpo/gap_mean": 6.319545269012451,
"beta_dpo/gap_std": 13.469895362854004,
"beta_dpo/mask_keep_frac": 0.90625,
"epoch": 0.16963350785340314,
"grad_norm": 50.84832000732422,
"learning_rate": 4.93167072587771e-07,
"logits/chosen": -0.6479890942573547,
"logits/rejected": -0.654083788394928,
"loss": 5.2317,
"step": 81
},
{
"beta_dpo/beta_used": 0.007517299614846706,
"beta_dpo/beta_used_raw": 0.0021146952640265226,
"beta_dpo/gap_mean": 6.252190113067627,
"beta_dpo/gap_std": 13.920367240905762,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.17172774869109947,
"grad_norm": 38.04156494140625,
"learning_rate": 4.92735454356513e-07,
"logits/chosen": -0.6817227005958557,
"logits/rejected": -0.6929246187210083,
"loss": 5.3369,
"step": 82
},
{
"beta_dpo/beta_used": 0.02531317248940468,
"beta_dpo/beta_used_raw": 0.021217333152890205,
"beta_dpo/gap_mean": 6.274941921234131,
"beta_dpo/gap_std": 14.928312301635742,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.17382198952879582,
"grad_norm": 80.58802795410156,
"learning_rate": 4.922908189595017e-07,
"logits/chosen": -0.6450899243354797,
"logits/rejected": -0.6248490810394287,
"loss": 4.6335,
"step": 83
},
{
"beta_dpo/beta_used": 0.007469699718058109,
"beta_dpo/beta_used_raw": 0.005744083784520626,
"beta_dpo/gap_mean": 6.542113780975342,
"beta_dpo/gap_std": 15.590079307556152,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.17591623036649215,
"grad_norm": 34.23841094970703,
"learning_rate": 4.918331902411841e-07,
"logits/chosen": -0.7670571208000183,
"logits/rejected": -0.7832205891609192,
"loss": 5.3401,
"step": 84
},
{
"beta_dpo/beta_used": 0.01102585531771183,
"beta_dpo/beta_used_raw": 0.0029200459830462933,
"beta_dpo/gap_mean": 5.665676593780518,
"beta_dpo/gap_std": 15.28662395477295,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.17801047120418848,
"grad_norm": 48.48334884643555,
"learning_rate": 4.913625927427995e-07,
"logits/chosen": -0.6236759424209595,
"logits/rejected": -0.6183326244354248,
"loss": 5.2517,
"step": 85
},
{
"beta_dpo/beta_used": 0.01666923239827156,
"beta_dpo/beta_used_raw": 0.01666923239827156,
"beta_dpo/gap_mean": 6.055604934692383,
"beta_dpo/gap_std": 15.560418128967285,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.18010471204188483,
"grad_norm": 64.15731811523438,
"learning_rate": 4.908790517010636e-07,
"logits/chosen": -0.6312252879142761,
"logits/rejected": -0.598781943321228,
"loss": 5.1042,
"step": 86
},
{
"beta_dpo/beta_used": 0.013364073820412159,
"beta_dpo/beta_used_raw": 0.0035636532120406628,
"beta_dpo/gap_mean": 7.059961318969727,
"beta_dpo/gap_std": 15.8635835647583,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.18219895287958116,
"grad_norm": 60.10577392578125,
"learning_rate": 4.903825930468148e-07,
"logits/chosen": -0.7649690508842468,
"logits/rejected": -0.6910430192947388,
"loss": 5.0837,
"step": 87
},
{
"beta_dpo/beta_used": 0.006094816140830517,
"beta_dpo/beta_used_raw": -0.001817956566810608,
"beta_dpo/gap_mean": 7.482639312744141,
"beta_dpo/gap_std": 16.63443374633789,
"beta_dpo/mask_keep_frac": 0.53125,
"epoch": 0.18429319371727748,
"grad_norm": 39.89524841308594,
"learning_rate": 4.898732434036243e-07,
"logits/chosen": -0.5936161875724792,
"logits/rejected": -0.697693943977356,
"loss": 5.3929,
"step": 88
},
{
"beta_dpo/beta_used": 0.014302433468401432,
"beta_dpo/beta_used_raw": 0.008434826508164406,
"beta_dpo/gap_mean": 6.980473518371582,
"beta_dpo/gap_std": 17.23158073425293,
"beta_dpo/mask_keep_frac": 0.90625,
"epoch": 0.18638743455497384,
"grad_norm": 62.69277572631836,
"learning_rate": 4.893510300863676e-07,
"logits/chosen": -0.7539916038513184,
"logits/rejected": -0.8090816736221313,
"loss": 5.1073,
"step": 89
},
{
"beta_dpo/beta_used": 0.012626252137124538,
"beta_dpo/beta_used_raw": 0.007227581460028887,
"beta_dpo/gap_mean": 7.1677327156066895,
"beta_dpo/gap_std": 16.382646560668945,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.18848167539267016,
"grad_norm": 53.65109634399414,
"learning_rate": 4.8881598109976e-07,
"logits/chosen": -0.7130351662635803,
"logits/rejected": -0.7106346487998962,
"loss": 5.1351,
"step": 90
},
{
"beta_dpo/beta_used": 0.009408114477992058,
"beta_dpo/beta_used_raw": -0.0041107251308858395,
"beta_dpo/gap_mean": 7.250586986541748,
"beta_dpo/gap_std": 16.855989456176758,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.1905759162303665,
"grad_norm": 51.472225189208984,
"learning_rate": 4.882681251368548e-07,
"logits/chosen": -0.6540703177452087,
"logits/rejected": -0.7079422473907471,
"loss": 5.1118,
"step": 91
},
{
"beta_dpo/beta_used": 0.030049897730350494,
"beta_dpo/beta_used_raw": 0.026972174644470215,
"beta_dpo/gap_mean": 7.000770568847656,
"beta_dpo/gap_std": 17.0972843170166,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.19267015706806281,
"grad_norm": 105.66353607177734,
"learning_rate": 4.877074915775048e-07,
"logits/chosen": -0.7033326625823975,
"logits/rejected": -0.6728801727294922,
"loss": 4.4387,
"step": 92
},
{
"beta_dpo/beta_used": 0.008473473601043224,
"beta_dpo/beta_used_raw": 0.002239819150418043,
"beta_dpo/gap_mean": 7.2776947021484375,
"beta_dpo/gap_std": 17.40105628967285,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.19476439790575917,
"grad_norm": 41.199073791503906,
"learning_rate": 4.871341104867864e-07,
"logits/chosen": -0.6418673396110535,
"logits/rejected": -0.7276042699813843,
"loss": 5.2607,
"step": 93
},
{
"beta_dpo/beta_used": 0.011333908885717392,
"beta_dpo/beta_used_raw": 0.0038538086228072643,
"beta_dpo/gap_mean": 7.516191482543945,
"beta_dpo/gap_std": 18.00417709350586,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.1968586387434555,
"grad_norm": 51.31498718261719,
"learning_rate": 4.865480126133871e-07,
"logits/chosen": -0.5814552307128906,
"logits/rejected": -0.6306831240653992,
"loss": 5.1584,
"step": 94
},
{
"beta_dpo/beta_used": 0.012983493506908417,
"beta_dpo/beta_used_raw": 0.007386527489870787,
"beta_dpo/gap_mean": 7.620054244995117,
"beta_dpo/gap_std": 18.478008270263672,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.19895287958115182,
"grad_norm": 51.42605209350586,
"learning_rate": 4.859492293879573e-07,
"logits/chosen": -0.7178781032562256,
"logits/rejected": -0.7296870946884155,
"loss": 5.1229,
"step": 95
},
{
"beta_dpo/beta_used": 0.014206080697476864,
"beta_dpo/beta_used_raw": 0.008307880721986294,
"beta_dpo/gap_mean": 8.099912643432617,
"beta_dpo/gap_std": 19.668779373168945,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.20104712041884817,
"grad_norm": 57.065765380859375,
"learning_rate": 4.853377929214243e-07,
"logits/chosen": -0.587355375289917,
"logits/rejected": -0.597959578037262,
"loss": 5.0654,
"step": 96
},
{
"beta_dpo/beta_used": 0.010406726971268654,
"beta_dpo/beta_used_raw": 0.006649984512478113,
"beta_dpo/gap_mean": 8.605752944946289,
"beta_dpo/gap_std": 19.875154495239258,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.2031413612565445,
"grad_norm": 50.55111312866211,
"learning_rate": 4.847137360032699e-07,
"logits/chosen": -0.6221433877944946,
"logits/rejected": -0.5777587890625,
"loss": 5.1555,
"step": 97
},
{
"beta_dpo/beta_used": 0.012800071388483047,
"beta_dpo/beta_used_raw": 0.0028023526538163424,
"beta_dpo/gap_mean": 9.07392692565918,
"beta_dpo/gap_std": 19.343914031982422,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.20523560209424083,
"grad_norm": 69.06121826171875,
"learning_rate": 4.84077092099773e-07,
"logits/chosen": -0.7695798277854919,
"logits/rejected": -0.7975507974624634,
"loss": 5.1301,
"step": 98
},
{
"beta_dpo/beta_used": 0.01906406879425049,
"beta_dpo/beta_used_raw": 0.010687445290386677,
"beta_dpo/gap_mean": 7.9853668212890625,
"beta_dpo/gap_std": 21.019094467163086,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.20732984293193718,
"grad_norm": 66.07819366455078,
"learning_rate": 4.834278953522137e-07,
"logits/chosen": -0.7368970513343811,
"logits/rejected": -0.7557910680770874,
"loss": 4.8975,
"step": 99
},
{
"beta_dpo/beta_used": 0.009735495783388615,
"beta_dpo/beta_used_raw": 0.00393830519169569,
"beta_dpo/gap_mean": 9.077505111694336,
"beta_dpo/gap_std": 21.074779510498047,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.2094240837696335,
"grad_norm": 48.8430061340332,
"learning_rate": 4.827661805750437e-07,
"logits/chosen": -0.7334079742431641,
"logits/rejected": -0.7196102738380432,
"loss": 5.1873,
"step": 100
},
{
"beta_dpo/beta_used": 0.023290041834115982,
"beta_dpo/beta_used_raw": 0.017167603597044945,
"beta_dpo/gap_mean": 9.945512771606445,
"beta_dpo/gap_std": 22.141578674316406,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.21151832460732983,
"grad_norm": 79.32262420654297,
"learning_rate": 4.820919832540181e-07,
"logits/chosen": -0.4960673749446869,
"logits/rejected": -0.5593528747558594,
"loss": 4.595,
"step": 101
},
{
"beta_dpo/beta_used": 0.01968398503959179,
"beta_dpo/beta_used_raw": 0.010066845454275608,
"beta_dpo/gap_mean": 10.21080493927002,
"beta_dpo/gap_std": 21.471494674682617,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.2136125654450262,
"grad_norm": 65.2578125,
"learning_rate": 4.814053395442932e-07,
"logits/chosen": -0.699000358581543,
"logits/rejected": -0.720572829246521,
"loss": 4.72,
"step": 102
},
{
"beta_dpo/beta_used": 0.011599740013480186,
"beta_dpo/beta_used_raw": 0.0011850475566461682,
"beta_dpo/gap_mean": 10.333209991455078,
"beta_dpo/gap_std": 21.639957427978516,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.2157068062827225,
"grad_norm": 43.60222244262695,
"learning_rate": 4.807062862684873e-07,
"logits/chosen": -0.7409847974777222,
"logits/rejected": -0.7405369877815247,
"loss": 5.0793,
"step": 103
},
{
"beta_dpo/beta_used": 0.022328007966279984,
"beta_dpo/beta_used_raw": 0.013688994571566582,
"beta_dpo/gap_mean": 8.519068717956543,
"beta_dpo/gap_std": 22.0716495513916,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.21780104712041884,
"grad_norm": 91.37364196777344,
"learning_rate": 4.799948609147061e-07,
"logits/chosen": -0.7835843563079834,
"logits/rejected": -0.8219706416130066,
"loss": 4.6679,
"step": 104
},
{
"beta_dpo/beta_used": 0.028743159025907516,
"beta_dpo/beta_used_raw": 0.028356103226542473,
"beta_dpo/gap_mean": 10.944629669189453,
"beta_dpo/gap_std": 22.042673110961914,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.2198952879581152,
"grad_norm": 100.99183654785156,
"learning_rate": 4.792711016345321e-07,
"logits/chosen": -0.7158729434013367,
"logits/rejected": -0.739811897277832,
"loss": 4.3696,
"step": 105
},
{
"beta_dpo/beta_used": 0.02642572484910488,
"beta_dpo/beta_used_raw": 0.010559840127825737,
"beta_dpo/gap_mean": 9.867205619812012,
"beta_dpo/gap_std": 22.872636795043945,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.22198952879581152,
"grad_norm": 96.6792221069336,
"learning_rate": 4.785350472409791e-07,
"logits/chosen": -0.6776769161224365,
"logits/rejected": -0.7080086469650269,
"loss": 4.6016,
"step": 106
},
{
"beta_dpo/beta_used": 0.01445105578750372,
"beta_dpo/beta_used_raw": 0.012959499843418598,
"beta_dpo/gap_mean": 10.998950958251953,
"beta_dpo/gap_std": 23.701820373535156,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.22408376963350785,
"grad_norm": 66.5599594116211,
"learning_rate": 4.777867372064105e-07,
"logits/chosen": -0.7649465203285217,
"logits/rejected": -0.8023307919502258,
"loss": 4.9656,
"step": 107
},
{
"beta_dpo/beta_used": 0.032948337495326996,
"beta_dpo/beta_used_raw": 0.02698555961251259,
"beta_dpo/gap_mean": 12.660971641540527,
"beta_dpo/gap_std": 24.206636428833008,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.2261780104712042,
"grad_norm": 102.94635772705078,
"learning_rate": 4.770262116604223e-07,
"logits/chosen": -0.7107124924659729,
"logits/rejected": -0.7374171614646912,
"loss": 4.3364,
"step": 108
},
{
"beta_dpo/beta_used": 0.007749465759843588,
"beta_dpo/beta_used_raw": -0.0016765656182542443,
"beta_dpo/gap_mean": 13.632909774780273,
"beta_dpo/gap_std": 24.86305809020996,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.22827225130890053,
"grad_norm": 43.27128601074219,
"learning_rate": 4.7625351138769166e-07,
"logits/chosen": -0.7678626775741577,
"logits/rejected": -0.760747492313385,
"loss": 5.1962,
"step": 109
},
{
"beta_dpo/beta_used": 0.013254636898636818,
"beta_dpo/beta_used_raw": 0.003388074692338705,
"beta_dpo/gap_mean": 13.47364330291748,
"beta_dpo/gap_std": 25.939802169799805,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.23036649214659685,
"grad_norm": 52.916778564453125,
"learning_rate": 4.75468677825789e-07,
"logits/chosen": -0.7187046408653259,
"logits/rejected": -0.6971960663795471,
"loss": 4.9661,
"step": 110
},
{
"beta_dpo/beta_used": 0.018351394683122635,
"beta_dpo/beta_used_raw": 0.010718288831412792,
"beta_dpo/gap_mean": 13.720507621765137,
"beta_dpo/gap_std": 26.687028884887695,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2324607329842932,
"grad_norm": 82.68915557861328,
"learning_rate": 4.7467175306295647e-07,
"logits/chosen": -0.824735701084137,
"logits/rejected": -0.7799985408782959,
"loss": 4.8194,
"step": 111
},
{
"beta_dpo/beta_used": 0.011828150600194931,
"beta_dpo/beta_used_raw": -0.005314134992659092,
"beta_dpo/gap_mean": 12.305923461914062,
"beta_dpo/gap_std": 26.428997039794922,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.23455497382198953,
"grad_norm": 56.26085662841797,
"learning_rate": 4.7386277983585053e-07,
"logits/chosen": -0.6889740228652954,
"logits/rejected": -0.7342170476913452,
"loss": 5.0215,
"step": 112
},
{
"beta_dpo/beta_used": 0.021786488592624664,
"beta_dpo/beta_used_raw": 0.011742182075977325,
"beta_dpo/gap_mean": 14.276546478271484,
"beta_dpo/gap_std": 29.68646812438965,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.23664921465968586,
"grad_norm": 80.65067291259766,
"learning_rate": 4.7304180152725024e-07,
"logits/chosen": -0.6449406743049622,
"logits/rejected": -0.6256552338600159,
"loss": 4.4312,
"step": 113
},
{
"beta_dpo/beta_used": 0.020888667553663254,
"beta_dpo/beta_used_raw": 0.007870053872466087,
"beta_dpo/gap_mean": 12.523893356323242,
"beta_dpo/gap_std": 28.998544692993164,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.2387434554973822,
"grad_norm": 59.69179153442383,
"learning_rate": 4.7220886216373085e-07,
"logits/chosen": -0.7064129710197449,
"logits/rejected": -0.7065778970718384,
"loss": 4.5294,
"step": 114
},
{
"beta_dpo/beta_used": 0.013495873659849167,
"beta_dpo/beta_used_raw": -0.008398683741688728,
"beta_dpo/gap_mean": 10.547378540039062,
"beta_dpo/gap_std": 27.94576644897461,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.24083769633507854,
"grad_norm": 56.25117492675781,
"learning_rate": 4.7136400641330245e-07,
"logits/chosen": -0.7171422839164734,
"logits/rejected": -0.6828722357749939,
"loss": 4.9334,
"step": 115
},
{
"beta_dpo/beta_used": 0.016198089346289635,
"beta_dpo/beta_used_raw": 0.011391330510377884,
"beta_dpo/gap_mean": 10.625633239746094,
"beta_dpo/gap_std": 27.245738983154297,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.24293193717277486,
"grad_norm": 72.90325927734375,
"learning_rate": 4.70507279583015e-07,
"logits/chosen": -0.7499311566352844,
"logits/rejected": -0.739253580570221,
"loss": 4.9531,
"step": 116
},
{
"beta_dpo/beta_used": 0.036482565104961395,
"beta_dpo/beta_used_raw": 0.02595018595457077,
"beta_dpo/gap_mean": 10.865323066711426,
"beta_dpo/gap_std": 26.646053314208984,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.2450261780104712,
"grad_norm": 128.06033325195312,
"learning_rate": 4.6963872761652834e-07,
"logits/chosen": -0.7866169214248657,
"logits/rejected": -0.8020620346069336,
"loss": 4.1584,
"step": 117
},
{
"beta_dpo/beta_used": 0.02256722003221512,
"beta_dpo/beta_used_raw": 0.004887686111032963,
"beta_dpo/gap_mean": 12.92835807800293,
"beta_dpo/gap_std": 27.222332000732422,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.24712041884816754,
"grad_norm": 121.85209655761719,
"learning_rate": 4.687583970916486e-07,
"logits/chosen": -0.6286183595657349,
"logits/rejected": -0.6127574443817139,
"loss": 4.7058,
"step": 118
},
{
"beta_dpo/beta_used": 0.012178106233477592,
"beta_dpo/beta_used_raw": 0.0018881040159612894,
"beta_dpo/gap_mean": 12.664083480834961,
"beta_dpo/gap_std": 29.877716064453125,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.24921465968586387,
"grad_norm": 52.12855529785156,
"learning_rate": 4.6786633521783005e-07,
"logits/chosen": -0.8367944359779358,
"logits/rejected": -0.8432599306106567,
"loss": 4.9455,
"step": 119
},
{
"beta_dpo/beta_used": 0.011312302201986313,
"beta_dpo/beta_used_raw": -0.009262747131288052,
"beta_dpo/gap_mean": 12.50714111328125,
"beta_dpo/gap_std": 29.64698028564453,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.2513089005235602,
"grad_norm": 62.29435729980469,
"learning_rate": 4.669625898336438e-07,
"logits/chosen": -0.7120507955551147,
"logits/rejected": -0.7823662161827087,
"loss": 5.029,
"step": 120
},
{
"beta_dpo/beta_used": 0.0014778866898268461,
"beta_dpo/beta_used_raw": -0.02501249685883522,
"beta_dpo/gap_mean": 11.199564933776855,
"beta_dpo/gap_std": 29.29185676574707,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2534031413612565,
"grad_norm": 7.333785057067871,
"learning_rate": 4.6604720940421207e-07,
"logits/chosen": -0.7075969576835632,
"logits/rejected": -0.7335148453712463,
"loss": 5.5081,
"step": 121
},
{
"beta_dpo/beta_used": 0.014175733551383018,
"beta_dpo/beta_used_raw": -0.001818017102777958,
"beta_dpo/gap_mean": 12.57092571258545,
"beta_dpo/gap_std": 31.017070770263672,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.2554973821989529,
"grad_norm": 65.50248718261719,
"learning_rate": 4.651202430186092e-07,
"logits/chosen": -0.8409684300422668,
"logits/rejected": -0.8054923415184021,
"loss": 4.9411,
"step": 122
},
{
"beta_dpo/beta_used": 0.03022715263068676,
"beta_dpo/beta_used_raw": 0.01728089153766632,
"beta_dpo/gap_mean": 14.076234817504883,
"beta_dpo/gap_std": 31.252927780151367,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.25759162303664923,
"grad_norm": 140.3761444091797,
"learning_rate": 4.6418174038722924e-07,
"logits/chosen": -0.6981220245361328,
"logits/rejected": -0.7018057107925415,
"loss": 4.4325,
"step": 123
},
{
"beta_dpo/beta_used": 0.016084099188447,
"beta_dpo/beta_used_raw": 0.007035914342850447,
"beta_dpo/gap_mean": 15.691198348999023,
"beta_dpo/gap_std": 30.451919555664062,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.25968586387434556,
"grad_norm": 93.5594482421875,
"learning_rate": 4.6323175183912023e-07,
"logits/chosen": -0.8233157992362976,
"logits/rejected": -0.7800065279006958,
"loss": 4.7914,
"step": 124
},
{
"beta_dpo/beta_used": 0.020123766735196114,
"beta_dpo/beta_used_raw": -0.0011871629394590855,
"beta_dpo/gap_mean": 13.572896957397461,
"beta_dpo/gap_std": 31.540260314941406,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.2617801047120419,
"grad_norm": 81.28208923339844,
"learning_rate": 4.6227032831928483e-07,
"logits/chosen": -0.7599564790725708,
"logits/rejected": -0.6782684326171875,
"loss": 4.704,
"step": 125
},
{
"beta_dpo/beta_used": 0.02084464207291603,
"beta_dpo/beta_used_raw": 0.010123949497938156,
"beta_dpo/gap_mean": 14.827800750732422,
"beta_dpo/gap_std": 32.751522064208984,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.2638743455497382,
"grad_norm": 91.95066833496094,
"learning_rate": 4.612975213859487e-07,
"logits/chosen": -0.7613145112991333,
"logits/rejected": -0.7944775819778442,
"loss": 4.6694,
"step": 126
},
{
"beta_dpo/beta_used": 0.023009877651929855,
"beta_dpo/beta_used_raw": 0.014726024121046066,
"beta_dpo/gap_mean": 14.955554962158203,
"beta_dpo/gap_std": 33.054447174072266,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.26596858638743454,
"grad_norm": 91.58712005615234,
"learning_rate": 4.603133832077953e-07,
"logits/chosen": -0.8286364674568176,
"logits/rejected": -0.8062022924423218,
"loss": 4.5869,
"step": 127
},
{
"beta_dpo/beta_used": 0.0319821797311306,
"beta_dpo/beta_used_raw": 0.02191462367773056,
"beta_dpo/gap_mean": 17.882171630859375,
"beta_dpo/gap_std": 33.18529510498047,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.2680628272251309,
"grad_norm": 106.6471939086914,
"learning_rate": 4.5931796656116837e-07,
"logits/chosen": -0.699189305305481,
"logits/rejected": -0.6564383506774902,
"loss": 4.2103,
"step": 128
},
{
"beta_dpo/beta_used": 0.019659318029880524,
"beta_dpo/beta_used_raw": 0.015033195726573467,
"beta_dpo/gap_mean": 17.7318058013916,
"beta_dpo/gap_std": 33.44122314453125,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.27015706806282724,
"grad_norm": 72.52957153320312,
"learning_rate": 4.5831132482724193e-07,
"logits/chosen": -0.7713093161582947,
"logits/rejected": -0.7497988939285278,
"loss": 4.4109,
"step": 129
},
{
"beta_dpo/beta_used": 0.00805729627609253,
"beta_dpo/beta_used_raw": -0.008298722095787525,
"beta_dpo/gap_mean": 18.417720794677734,
"beta_dpo/gap_std": 34.202728271484375,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.27225130890052357,
"grad_norm": 62.98481369018555,
"learning_rate": 4.5729351198915705e-07,
"logits/chosen": -0.7118815779685974,
"logits/rejected": -0.7767693996429443,
"loss": 5.0954,
"step": 130
},
{
"beta_dpo/beta_used": 0.0160170029848814,
"beta_dpo/beta_used_raw": 0.00947889219969511,
"beta_dpo/gap_mean": 16.390932083129883,
"beta_dpo/gap_std": 35.38821029663086,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.2743455497382199,
"grad_norm": 78.16362762451172,
"learning_rate": 4.5626458262912735e-07,
"logits/chosen": -0.7344637513160706,
"logits/rejected": -0.7118038535118103,
"loss": 4.8656,
"step": 131
},
{
"beta_dpo/beta_used": 0.03713168576359749,
"beta_dpo/beta_used_raw": 0.014095718041062355,
"beta_dpo/gap_mean": 16.76073455810547,
"beta_dpo/gap_std": 35.335784912109375,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.2764397905759162,
"grad_norm": 105.79364013671875,
"learning_rate": 4.5522459192551166e-07,
"logits/chosen": -0.8273689150810242,
"logits/rejected": -0.79078209400177,
"loss": 4.106,
"step": 132
},
{
"beta_dpo/beta_used": 0.021496238186955452,
"beta_dpo/beta_used_raw": 0.006832793354988098,
"beta_dpo/gap_mean": 18.750276565551758,
"beta_dpo/gap_std": 36.73375701904297,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.27853403141361255,
"grad_norm": 94.68896484375,
"learning_rate": 4.541735956498554e-07,
"logits/chosen": -0.8012921214103699,
"logits/rejected": -0.8170878291130066,
"loss": 4.4552,
"step": 133
},
{
"beta_dpo/beta_used": 0.012021646834909916,
"beta_dpo/beta_used_raw": -0.007044796831905842,
"beta_dpo/gap_mean": 16.580371856689453,
"beta_dpo/gap_std": 34.95547866821289,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.2806282722513089,
"grad_norm": 57.19186019897461,
"learning_rate": 4.5311165016389914e-07,
"logits/chosen": -0.8249697685241699,
"logits/rejected": -0.807636022567749,
"loss": 4.8618,
"step": 134
},
{
"beta_dpo/beta_used": 0.02594444341957569,
"beta_dpo/beta_used_raw": 0.009492763318121433,
"beta_dpo/gap_mean": 16.52640151977539,
"beta_dpo/gap_std": 31.791019439697266,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.28272251308900526,
"grad_norm": 157.5948028564453,
"learning_rate": 4.520388124165564e-07,
"logits/chosen": -0.7143105268478394,
"logits/rejected": -0.7277257442474365,
"loss": 4.7733,
"step": 135
},
{
"beta_dpo/beta_used": 0.018465936183929443,
"beta_dpo/beta_used_raw": 0.010593372397124767,
"beta_dpo/gap_mean": 17.351146697998047,
"beta_dpo/gap_std": 33.06019592285156,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.2848167539267016,
"grad_norm": 131.78701782226562,
"learning_rate": 4.5095513994085974e-07,
"logits/chosen": -0.7398912906646729,
"logits/rejected": -0.7863351702690125,
"loss": 4.8188,
"step": 136
},
{
"beta_dpo/beta_used": 0.011953875422477722,
"beta_dpo/beta_used_raw": 0.0052395714446902275,
"beta_dpo/gap_mean": 16.20960235595703,
"beta_dpo/gap_std": 35.670745849609375,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.2869109947643979,
"grad_norm": 74.34517669677734,
"learning_rate": 4.498606908508753e-07,
"logits/chosen": -0.80860835313797,
"logits/rejected": -0.7614427804946899,
"loss": 5.0427,
"step": 137
},
{
"beta_dpo/beta_used": 0.02045310102403164,
"beta_dpo/beta_used_raw": -0.0056061288341879845,
"beta_dpo/gap_mean": 17.586992263793945,
"beta_dpo/gap_std": 36.90517807006836,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.28900523560209423,
"grad_norm": 143.13523864746094,
"learning_rate": 4.487555238385862e-07,
"logits/chosen": -0.7992879152297974,
"logits/rejected": -0.8304911851882935,
"loss": 5.2096,
"step": 138
},
{
"beta_dpo/beta_used": 0.016061272472143173,
"beta_dpo/beta_used_raw": -0.0029601496644318104,
"beta_dpo/gap_mean": 15.417540550231934,
"beta_dpo/gap_std": 36.3847541809082,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.29109947643979056,
"grad_norm": 95.46815490722656,
"learning_rate": 4.476396981707453e-07,
"logits/chosen": -0.7314491271972656,
"logits/rejected": -0.7732853293418884,
"loss": 5.0469,
"step": 139
},
{
"beta_dpo/beta_used": 0.044663287699222565,
"beta_dpo/beta_used_raw": 0.04263610392808914,
"beta_dpo/gap_mean": 15.303201675415039,
"beta_dpo/gap_std": 34.73930358886719,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.2931937172774869,
"grad_norm": 174.06341552734375,
"learning_rate": 4.4651327368569684e-07,
"logits/chosen": -0.8693004846572876,
"logits/rejected": -0.8686134815216064,
"loss": 3.9564,
"step": 140
},
{
"beta_dpo/beta_used": 0.021187350153923035,
"beta_dpo/beta_used_raw": 0.019022824242711067,
"beta_dpo/gap_mean": 19.225461959838867,
"beta_dpo/gap_std": 34.109764099121094,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.29528795811518327,
"grad_norm": 110.43495178222656,
"learning_rate": 4.453763107901675e-07,
"logits/chosen": -0.7424483299255371,
"logits/rejected": -0.7873528599739075,
"loss": 4.6015,
"step": 141
},
{
"beta_dpo/beta_used": 0.0332464836537838,
"beta_dpo/beta_used_raw": 0.008531760424375534,
"beta_dpo/gap_mean": 18.10867691040039,
"beta_dpo/gap_std": 36.432342529296875,
"beta_dpo/mask_keep_frac": 0.90625,
"epoch": 0.2973821989528796,
"grad_norm": 122.61527252197266,
"learning_rate": 4.4422887045602674e-07,
"logits/chosen": -0.7638643383979797,
"logits/rejected": -0.7775416970252991,
"loss": 4.2986,
"step": 142
},
{
"beta_dpo/beta_used": 0.016291283071041107,
"beta_dpo/beta_used_raw": 0.0006667158449999988,
"beta_dpo/gap_mean": 18.518922805786133,
"beta_dpo/gap_std": 35.83793258666992,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.2994764397905759,
"grad_norm": 79.98748016357422,
"learning_rate": 4.4307101421701755e-07,
"logits/chosen": -0.8503552675247192,
"logits/rejected": -0.8338074088096619,
"loss": 4.739,
"step": 143
},
{
"beta_dpo/beta_used": 0.012389753945171833,
"beta_dpo/beta_used_raw": 0.00042197853326797485,
"beta_dpo/gap_mean": 18.385601043701172,
"beta_dpo/gap_std": 36.555580139160156,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.30157068062827225,
"grad_norm": 74.62446594238281,
"learning_rate": 4.419028041654559e-07,
"logits/chosen": -0.8731358051300049,
"logits/rejected": -0.867561936378479,
"loss": 4.9057,
"step": 144
},
{
"beta_dpo/beta_used": 0.016797425225377083,
"beta_dpo/beta_used_raw": -0.007564428262412548,
"beta_dpo/gap_mean": 18.421340942382812,
"beta_dpo/gap_std": 35.51329040527344,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.3036649214659686,
"grad_norm": 102.74947357177734,
"learning_rate": 4.4072430294890166e-07,
"logits/chosen": -0.7647844552993774,
"logits/rejected": -0.766077995300293,
"loss": 4.7931,
"step": 145
},
{
"beta_dpo/beta_used": 0.007330628577619791,
"beta_dpo/beta_used_raw": -0.01089246105402708,
"beta_dpo/gap_mean": 19.39159393310547,
"beta_dpo/gap_std": 33.0991325378418,
"beta_dpo/mask_keep_frac": 0.90625,
"epoch": 0.3057591623036649,
"grad_norm": 57.71752166748047,
"learning_rate": 4.395355737667985e-07,
"logits/chosen": -0.807758092880249,
"logits/rejected": -0.821743905544281,
"loss": 5.1453,
"step": 146
},
{
"beta_dpo/beta_used": 0.017891917377710342,
"beta_dpo/beta_used_raw": 0.0008026466239243746,
"beta_dpo/gap_mean": 16.48558235168457,
"beta_dpo/gap_std": 33.77042007446289,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.3078534031413613,
"grad_norm": 94.41093444824219,
"learning_rate": 4.3833668036708483e-07,
"logits/chosen": -0.8066427111625671,
"logits/rejected": -0.8248432278633118,
"loss": 4.7533,
"step": 147
},
{
"beta_dpo/beta_used": 0.009967929683625698,
"beta_dpo/beta_used_raw": -0.007602631114423275,
"beta_dpo/gap_mean": 16.034523010253906,
"beta_dpo/gap_std": 36.380615234375,
"beta_dpo/mask_keep_frac": 0.90625,
"epoch": 0.3099476439790576,
"grad_norm": 74.8528823852539,
"learning_rate": 4.3712768704277524e-07,
"logits/chosen": -0.8988285660743713,
"logits/rejected": -0.9119629859924316,
"loss": 5.3024,
"step": 148
},
{
"beta_dpo/beta_used": 0.029269058257341385,
"beta_dpo/beta_used_raw": 0.024799324572086334,
"beta_dpo/gap_mean": 18.69751739501953,
"beta_dpo/gap_std": 34.20708465576172,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.31204188481675393,
"grad_norm": 95.18089294433594,
"learning_rate": 4.3590865862851263e-07,
"logits/chosen": -0.9157636761665344,
"logits/rejected": -0.8866834044456482,
"loss": 3.9915,
"step": 149
},
{
"beta_dpo/beta_used": 0.028355229645967484,
"beta_dpo/beta_used_raw": 0.014602387323975563,
"beta_dpo/gap_mean": 18.82350730895996,
"beta_dpo/gap_std": 33.63038635253906,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.31413612565445026,
"grad_norm": 127.4208984375,
"learning_rate": 4.346796604970912e-07,
"logits/chosen": -0.8425026535987854,
"logits/rejected": -0.7345662117004395,
"loss": 4.1692,
"step": 150
},
{
"beta_dpo/beta_used": 0.0352584645152092,
"beta_dpo/beta_used_raw": 0.030752388760447502,
"beta_dpo/gap_mean": 19.252273559570312,
"beta_dpo/gap_std": 36.00699996948242,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.3162303664921466,
"grad_norm": 101.46421813964844,
"learning_rate": 4.3344075855595097e-07,
"logits/chosen": -0.764532208442688,
"logits/rejected": -0.7699897885322571,
"loss": 3.9384,
"step": 151
},
{
"beta_dpo/beta_used": 0.030788574367761612,
"beta_dpo/beta_used_raw": 0.01764693856239319,
"beta_dpo/gap_mean": 21.685163497924805,
"beta_dpo/gap_std": 36.85689163208008,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.3183246073298429,
"grad_norm": 102.9702377319336,
"learning_rate": 4.3219201924364323e-07,
"logits/chosen": -0.9068971872329712,
"logits/rejected": -0.9211371541023254,
"loss": 3.8419,
"step": 152
},
{
"beta_dpo/beta_used": 0.024467987939715385,
"beta_dpo/beta_used_raw": 0.015887044370174408,
"beta_dpo/gap_mean": 24.365219116210938,
"beta_dpo/gap_std": 36.4759521484375,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.3204188481675393,
"grad_norm": 111.00398254394531,
"learning_rate": 4.309335095262675e-07,
"logits/chosen": -0.7987594604492188,
"logits/rejected": -0.7632243037223816,
"loss": 4.1512,
"step": 153
},
{
"beta_dpo/beta_used": 0.04007789492607117,
"beta_dpo/beta_used_raw": 0.01483201328665018,
"beta_dpo/gap_mean": 25.266956329345703,
"beta_dpo/gap_std": 39.56476593017578,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3225130890052356,
"grad_norm": 123.44865417480469,
"learning_rate": 4.2966529689388064e-07,
"logits/chosen": -0.8501051068305969,
"logits/rejected": -0.8371157646179199,
"loss": 3.4469,
"step": 154
},
{
"beta_dpo/beta_used": 0.022579234093427658,
"beta_dpo/beta_used_raw": -0.02031770907342434,
"beta_dpo/gap_mean": 21.613218307495117,
"beta_dpo/gap_std": 39.026023864746094,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.32460732984293195,
"grad_norm": 129.148193359375,
"learning_rate": 4.2838744935687716e-07,
"logits/chosen": -0.761044442653656,
"logits/rejected": -0.7877327799797058,
"loss": 4.4611,
"step": 155
},
{
"beta_dpo/beta_used": 0.029358845204114914,
"beta_dpo/beta_used_raw": 0.013587499037384987,
"beta_dpo/gap_mean": 23.304094314575195,
"beta_dpo/gap_std": 41.368614196777344,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.3267015706806283,
"grad_norm": 138.49502563476562,
"learning_rate": 4.271000354423425e-07,
"logits/chosen": -0.7635002732276917,
"logits/rejected": -0.8206408023834229,
"loss": 4.1597,
"step": 156
},
{
"beta_dpo/beta_used": 0.01539008691906929,
"beta_dpo/beta_used_raw": -0.02126063033938408,
"beta_dpo/gap_mean": 24.20404624938965,
"beta_dpo/gap_std": 41.25341033935547,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3287958115183246,
"grad_norm": 83.74942016601562,
"learning_rate": 4.258031241903777e-07,
"logits/chosen": -0.8348160982131958,
"logits/rejected": -0.7768077850341797,
"loss": 4.6953,
"step": 157
},
{
"beta_dpo/beta_used": 0.03978518396615982,
"beta_dpo/beta_used_raw": 0.028111770749092102,
"beta_dpo/gap_mean": 22.0745849609375,
"beta_dpo/gap_std": 39.07844924926758,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.3308900523560209,
"grad_norm": 135.35491943359375,
"learning_rate": 4.2449678515039743e-07,
"logits/chosen": -0.8687289357185364,
"logits/rejected": -0.8547466993331909,
"loss": 3.9821,
"step": 158
},
{
"beta_dpo/beta_used": 0.02545471116900444,
"beta_dpo/beta_used_raw": -0.00017212284728884697,
"beta_dpo/gap_mean": 19.57489776611328,
"beta_dpo/gap_std": 41.78768539428711,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.33298429319371725,
"grad_norm": 144.7659912109375,
"learning_rate": 4.2318108837739986e-07,
"logits/chosen": -0.8753824234008789,
"logits/rejected": -0.8525476455688477,
"loss": 4.8274,
"step": 159
},
{
"beta_dpo/beta_used": 0.039335690438747406,
"beta_dpo/beta_used_raw": 0.02949613332748413,
"beta_dpo/gap_mean": 21.556251525878906,
"beta_dpo/gap_std": 38.69097137451172,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.33507853403141363,
"grad_norm": 240.3103790283203,
"learning_rate": 4.218561044282098e-07,
"logits/chosen": -0.86173415184021,
"logits/rejected": -0.8341448903083801,
"loss": 3.7782,
"step": 160
},
{
"beta_dpo/beta_used": 0.030621008947491646,
"beta_dpo/beta_used_raw": -0.0023182015866041183,
"beta_dpo/gap_mean": 22.37126922607422,
"beta_dpo/gap_std": 39.51905059814453,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.33717277486910996,
"grad_norm": 150.58641052246094,
"learning_rate": 4.2052190435769554e-07,
"logits/chosen": -0.8699642419815063,
"logits/rejected": -0.86982661485672,
"loss": 4.2166,
"step": 161
},
{
"beta_dpo/beta_used": 0.021013660356402397,
"beta_dpo/beta_used_raw": 0.005534999072551727,
"beta_dpo/gap_mean": 22.425281524658203,
"beta_dpo/gap_std": 40.90775680541992,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.3392670157068063,
"grad_norm": 135.9263916015625,
"learning_rate": 4.1917855971495763e-07,
"logits/chosen": -0.8752709031105042,
"logits/rejected": -0.8557614684104919,
"loss": 4.5621,
"step": 162
},
{
"beta_dpo/beta_used": 0.02784748375415802,
"beta_dpo/beta_used_raw": 0.005173914600163698,
"beta_dpo/gap_mean": 20.817134857177734,
"beta_dpo/gap_std": 40.16265106201172,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.3413612565445026,
"grad_norm": 132.69302368164062,
"learning_rate": 4.1782614253949255e-07,
"logits/chosen": -0.771392822265625,
"logits/rejected": -0.794430673122406,
"loss": 4.3633,
"step": 163
},
{
"beta_dpo/beta_used": 0.021030288189649582,
"beta_dpo/beta_used_raw": 0.004847892560064793,
"beta_dpo/gap_mean": 20.410049438476562,
"beta_dpo/gap_std": 41.04210662841797,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.34345549738219894,
"grad_norm": 151.0008087158203,
"learning_rate": 4.164647253573289e-07,
"logits/chosen": -0.9349634647369385,
"logits/rejected": -0.8864374160766602,
"loss": 4.5775,
"step": 164
},
{
"beta_dpo/beta_used": 0.009714031592011452,
"beta_dpo/beta_used_raw": -0.03288843855261803,
"beta_dpo/gap_mean": 20.747264862060547,
"beta_dpo/gap_std": 39.629669189453125,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.34554973821989526,
"grad_norm": 57.11280822753906,
"learning_rate": 4.1509438117713863e-07,
"logits/chosen": -0.8814147114753723,
"logits/rejected": -0.8542748093605042,
"loss": 5.1159,
"step": 165
},
{
"beta_dpo/beta_used": 0.010636869817972183,
"beta_dpo/beta_used_raw": -0.009791170246899128,
"beta_dpo/gap_mean": 20.025184631347656,
"beta_dpo/gap_std": 39.09601974487305,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.34764397905759165,
"grad_norm": 241.4670867919922,
"learning_rate": 4.137151834863213e-07,
"logits/chosen": -0.8034209609031677,
"logits/rejected": -0.7690469026565552,
"loss": 5.1549,
"step": 166
},
{
"beta_dpo/beta_used": 0.03181453049182892,
"beta_dpo/beta_used_raw": 0.028030332177877426,
"beta_dpo/gap_mean": 20.967866897583008,
"beta_dpo/gap_std": 40.07197952270508,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.34973821989528797,
"grad_norm": 176.5508270263672,
"learning_rate": 4.123272062470633e-07,
"logits/chosen": -0.8304077982902527,
"logits/rejected": -0.7818213105201721,
"loss": 4.3848,
"step": 167
},
{
"beta_dpo/beta_used": 0.03028823807835579,
"beta_dpo/beta_used_raw": 0.020576341077685356,
"beta_dpo/gap_mean": 22.560794830322266,
"beta_dpo/gap_std": 43.39508819580078,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.3518324607329843,
"grad_norm": 110.01106262207031,
"learning_rate": 4.1093052389237174e-07,
"logits/chosen": -0.7103608846664429,
"logits/rejected": -0.7231693267822266,
"loss": 3.9635,
"step": 168
},
{
"beta_dpo/beta_used": 0.04548133164644241,
"beta_dpo/beta_used_raw": 0.04410823807120323,
"beta_dpo/gap_mean": 24.36121940612793,
"beta_dpo/gap_std": 41.35852813720703,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.3539267015706806,
"grad_norm": 325.0240478515625,
"learning_rate": 4.0952521132208267e-07,
"logits/chosen": -0.8891708254814148,
"logits/rejected": -0.8906590938568115,
"loss": 3.4009,
"step": 169
},
{
"beta_dpo/beta_used": 0.013463572598993778,
"beta_dpo/beta_used_raw": -0.014656160026788712,
"beta_dpo/gap_mean": 27.53852081298828,
"beta_dpo/gap_std": 41.62273406982422,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.35602094240837695,
"grad_norm": 111.54733276367188,
"learning_rate": 4.081113438988443e-07,
"logits/chosen": -0.8540668487548828,
"logits/rejected": -0.8349031805992126,
"loss": 4.8316,
"step": 170
},
{
"beta_dpo/beta_used": 0.022017715498805046,
"beta_dpo/beta_used_raw": 0.0025704074651002884,
"beta_dpo/gap_mean": 26.90930938720703,
"beta_dpo/gap_std": 38.87221908569336,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.3581151832460733,
"grad_norm": 160.54473876953125,
"learning_rate": 4.0668899744407567e-07,
"logits/chosen": -0.8043861985206604,
"logits/rejected": -0.8006876707077026,
"loss": 4.4335,
"step": 171
},
{
"beta_dpo/beta_used": 0.008449875749647617,
"beta_dpo/beta_used_raw": -0.03602520003914833,
"beta_dpo/gap_mean": 22.49981117248535,
"beta_dpo/gap_std": 37.147884368896484,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.36020942408376966,
"grad_norm": 74.52308654785156,
"learning_rate": 4.0525824823390043e-07,
"logits/chosen": -0.8162400722503662,
"logits/rejected": -0.8232384324073792,
"loss": 5.1239,
"step": 172
},
{
"beta_dpo/beta_used": 0.016885017976164818,
"beta_dpo/beta_used_raw": -0.014056820422410965,
"beta_dpo/gap_mean": 18.819292068481445,
"beta_dpo/gap_std": 36.193111419677734,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.362303664921466,
"grad_norm": 80.40719604492188,
"learning_rate": 4.0381917299505686e-07,
"logits/chosen": -0.7334867119789124,
"logits/rejected": -0.7083029747009277,
"loss": 4.7244,
"step": 173
},
{
"beta_dpo/beta_used": 0.03210830315947533,
"beta_dpo/beta_used_raw": 0.02511240914463997,
"beta_dpo/gap_mean": 20.32571792602539,
"beta_dpo/gap_std": 35.956050872802734,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.3643979057591623,
"grad_norm": 83.13336181640625,
"learning_rate": 4.0237184890078243e-07,
"logits/chosen": -0.8683218359947205,
"logits/rejected": -0.8630374073982239,
"loss": 4.126,
"step": 174
},
{
"beta_dpo/beta_used": 0.03025994263589382,
"beta_dpo/beta_used_raw": 0.012775203213095665,
"beta_dpo/gap_mean": 20.767414093017578,
"beta_dpo/gap_std": 35.31028747558594,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.36649214659685864,
"grad_norm": 133.00521850585938,
"learning_rate": 4.00916353566676e-07,
"logits/chosen": -0.7180600762367249,
"logits/rejected": -0.7292754650115967,
"loss": 4.5442,
"step": 175
},
{
"beta_dpo/beta_used": 0.020637210458517075,
"beta_dpo/beta_used_raw": 0.006442366633564234,
"beta_dpo/gap_mean": 19.601943969726562,
"beta_dpo/gap_std": 39.14218521118164,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.36858638743455496,
"grad_norm": 93.0556640625,
"learning_rate": 3.994527650465352e-07,
"logits/chosen": -0.7302559018135071,
"logits/rejected": -0.7689952850341797,
"loss": 4.4981,
"step": 176
},
{
"beta_dpo/beta_used": 0.014554323628544807,
"beta_dpo/beta_used_raw": -0.010481350123882294,
"beta_dpo/gap_mean": 17.167186737060547,
"beta_dpo/gap_std": 39.22663497924805,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.3706806282722513,
"grad_norm": 78.07917785644531,
"learning_rate": 3.979811618281705e-07,
"logits/chosen": -0.7889816761016846,
"logits/rejected": -0.7952367067337036,
"loss": 4.7903,
"step": 177
},
{
"beta_dpo/beta_used": 0.021441150456666946,
"beta_dpo/beta_used_raw": 0.002083552535623312,
"beta_dpo/gap_mean": 20.210954666137695,
"beta_dpo/gap_std": 39.11219787597656,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.37277486910994767,
"grad_norm": 100.62411499023438,
"learning_rate": 3.9650162282919654e-07,
"logits/chosen": -0.6511439681053162,
"logits/rejected": -0.6596049666404724,
"loss": 4.3602,
"step": 178
},
{
"beta_dpo/beta_used": 0.0247175469994545,
"beta_dpo/beta_used_raw": -6.247404962778091e-05,
"beta_dpo/gap_mean": 19.763917922973633,
"beta_dpo/gap_std": 37.68657302856445,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.374869109947644,
"grad_norm": 76.8095932006836,
"learning_rate": 3.9501422739279953e-07,
"logits/chosen": -0.7534154057502747,
"logits/rejected": -0.7417958974838257,
"loss": 4.166,
"step": 179
},
{
"beta_dpo/beta_used": 0.03843570500612259,
"beta_dpo/beta_used_raw": 0.017177987843751907,
"beta_dpo/gap_mean": 19.746444702148438,
"beta_dpo/gap_std": 37.75269317626953,
"beta_dpo/mask_keep_frac": 0.59375,
"epoch": 0.3769633507853403,
"grad_norm": 138.32301330566406,
"learning_rate": 3.935190552834828e-07,
"logits/chosen": -0.6509720087051392,
"logits/rejected": -0.7430813312530518,
"loss": 3.856,
"step": 180
},
{
"beta_dpo/beta_used": 0.026693008840084076,
"beta_dpo/beta_used_raw": 0.021555408835411072,
"beta_dpo/gap_mean": 20.54876136779785,
"beta_dpo/gap_std": 38.2152214050293,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.37905759162303665,
"grad_norm": 180.86692810058594,
"learning_rate": 3.920161866827889e-07,
"logits/chosen": -0.813086211681366,
"logits/rejected": -0.8329648971557617,
"loss": 4.3399,
"step": 181
},
{
"beta_dpo/beta_used": 0.029124662280082703,
"beta_dpo/beta_used_raw": 0.01702137291431427,
"beta_dpo/gap_mean": 21.305740356445312,
"beta_dpo/gap_std": 37.635379791259766,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.381151832460733,
"grad_norm": 117.95497131347656,
"learning_rate": 3.90505702185e-07,
"logits/chosen": -0.6139867305755615,
"logits/rejected": -0.722787082195282,
"loss": 3.9507,
"step": 182
},
{
"beta_dpo/beta_used": 0.023187464103102684,
"beta_dpo/beta_used_raw": -0.002321781124919653,
"beta_dpo/gap_mean": 23.785552978515625,
"beta_dpo/gap_std": 39.93912887573242,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.3832460732984293,
"grad_norm": 109.51704406738281,
"learning_rate": 3.889876827928156e-07,
"logits/chosen": -0.7375423312187195,
"logits/rejected": -0.7235562205314636,
"loss": 4.2095,
"step": 183
},
{
"beta_dpo/beta_used": 0.03651594743132591,
"beta_dpo/beta_used_raw": 0.03403354063630104,
"beta_dpo/gap_mean": 26.601848602294922,
"beta_dpo/gap_std": 41.58767318725586,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.38534031413612563,
"grad_norm": 98.02659606933594,
"learning_rate": 3.874622099130087e-07,
"logits/chosen": -0.7697808742523193,
"logits/rejected": -0.7725228071212769,
"loss": 3.7018,
"step": 184
},
{
"beta_dpo/beta_used": 0.02182621695101261,
"beta_dpo/beta_used_raw": -0.006090118549764156,
"beta_dpo/gap_mean": 26.3581485748291,
"beta_dpo/gap_std": 42.42856216430664,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.387434554973822,
"grad_norm": 218.3275146484375,
"learning_rate": 3.859293653520604e-07,
"logits/chosen": -0.8233194351196289,
"logits/rejected": -0.8047745227813721,
"loss": 4.246,
"step": 185
},
{
"beta_dpo/beta_used": 0.03138742968440056,
"beta_dpo/beta_used_raw": 0.009212229400873184,
"beta_dpo/gap_mean": 24.505887985229492,
"beta_dpo/gap_std": 41.48905563354492,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.38952879581151834,
"grad_norm": 124.85476684570312,
"learning_rate": 3.8438923131177237e-07,
"logits/chosen": -0.7481645345687866,
"logits/rejected": -0.7928252220153809,
"loss": 3.7283,
"step": 186
},
{
"beta_dpo/beta_used": 0.010927281342446804,
"beta_dpo/beta_used_raw": -0.0037998317275196314,
"beta_dpo/gap_mean": 22.086095809936523,
"beta_dpo/gap_std": 42.30852127075195,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.39162303664921466,
"grad_norm": 67.12848663330078,
"learning_rate": 3.828418903848593e-07,
"logits/chosen": -0.6687250137329102,
"logits/rejected": -0.666191816329956,
"loss": 5.0962,
"step": 187
},
{
"beta_dpo/beta_used": 0.02534855529665947,
"beta_dpo/beta_used_raw": 0.011756940744817257,
"beta_dpo/gap_mean": 21.788326263427734,
"beta_dpo/gap_std": 42.513572692871094,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.393717277486911,
"grad_norm": 134.685791015625,
"learning_rate": 3.812874255505191e-07,
"logits/chosen": -0.8032656908035278,
"logits/rejected": -0.775035560131073,
"loss": 4.2692,
"step": 188
},
{
"beta_dpo/beta_used": 0.030971940606832504,
"beta_dpo/beta_used_raw": 0.02102605067193508,
"beta_dpo/gap_mean": 23.528629302978516,
"beta_dpo/gap_std": 41.77531433105469,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.3958115183246073,
"grad_norm": 136.64242553710938,
"learning_rate": 3.797259201699833e-07,
"logits/chosen": -0.809384286403656,
"logits/rejected": -0.8046677112579346,
"loss": 4.0389,
"step": 189
},
{
"beta_dpo/beta_used": 0.01858203113079071,
"beta_dpo/beta_used_raw": -0.0006800373084843159,
"beta_dpo/gap_mean": 24.665685653686523,
"beta_dpo/gap_std": 41.014503479003906,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.39790575916230364,
"grad_norm": 119.0221176147461,
"learning_rate": 3.781574579820464e-07,
"logits/chosen": -0.7358199954032898,
"logits/rejected": -0.7636604905128479,
"loss": 4.5646,
"step": 190
},
{
"beta_dpo/beta_used": 0.031428806483745575,
"beta_dpo/beta_used_raw": -0.002010398544371128,
"beta_dpo/gap_mean": 24.216768264770508,
"beta_dpo/gap_std": 43.79417419433594,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.4,
"grad_norm": 139.61459350585938,
"learning_rate": 3.765821230985757e-07,
"logits/chosen": -0.8139005899429321,
"logits/rejected": -0.7801560163497925,
"loss": 4.2518,
"step": 191
},
{
"beta_dpo/beta_used": 0.03002096898853779,
"beta_dpo/beta_used_raw": 0.009661837480962276,
"beta_dpo/gap_mean": 24.45059585571289,
"beta_dpo/gap_std": 42.039737701416016,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.40209424083769635,
"grad_norm": 168.8079071044922,
"learning_rate": 3.75e-07,
"logits/chosen": -0.812671422958374,
"logits/rejected": -0.8485623002052307,
"loss": 3.8821,
"step": 192
},
{
"beta_dpo/beta_used": 0.02032877318561077,
"beta_dpo/beta_used_raw": 0.0041326722130179405,
"beta_dpo/gap_mean": 23.045848846435547,
"beta_dpo/gap_std": 43.16719436645508,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.4041884816753927,
"grad_norm": 104.38407897949219,
"learning_rate": 3.734111735307796e-07,
"logits/chosen": -0.8072720766067505,
"logits/rejected": -0.839698851108551,
"loss": 4.4311,
"step": 193
},
{
"beta_dpo/beta_used": 0.020913559943437576,
"beta_dpo/beta_used_raw": -0.004458375740796328,
"beta_dpo/gap_mean": 23.628847122192383,
"beta_dpo/gap_std": 41.59272766113281,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.406282722513089,
"grad_norm": 118.84005737304688,
"learning_rate": 3.7181572889485623e-07,
"logits/chosen": -0.8274001479148865,
"logits/rejected": -0.8259969353675842,
"loss": 4.3544,
"step": 194
},
{
"beta_dpo/beta_used": 0.0032001424115151167,
"beta_dpo/beta_used_raw": -0.02794015407562256,
"beta_dpo/gap_mean": 20.366397857666016,
"beta_dpo/gap_std": 40.22095489501953,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4083769633507853,
"grad_norm": 26.801753997802734,
"learning_rate": 3.7021375165108377e-07,
"logits/chosen": -0.8312541246414185,
"logits/rejected": -0.8000338077545166,
"loss": 5.3373,
"step": 195
},
{
"beta_dpo/beta_used": 0.031204037368297577,
"beta_dpo/beta_used_raw": 0.022474460303783417,
"beta_dpo/gap_mean": 21.7479190826416,
"beta_dpo/gap_std": 39.999412536621094,
"beta_dpo/mask_keep_frac": 0.96875,
"epoch": 0.41047120418848165,
"grad_norm": 175.2480926513672,
"learning_rate": 3.6860532770864005e-07,
"logits/chosen": -0.8379102945327759,
"logits/rejected": -0.8230741620063782,
"loss": 4.1619,
"step": 196
},
{
"beta_dpo/beta_used": 0.04341711848974228,
"beta_dpo/beta_used_raw": 0.04169736057519913,
"beta_dpo/gap_mean": 25.090091705322266,
"beta_dpo/gap_std": 41.287593841552734,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.41256544502617803,
"grad_norm": 260.1309814453125,
"learning_rate": 3.6699054332241985e-07,
"logits/chosen": -0.6981998682022095,
"logits/rejected": -0.7817898392677307,
"loss": 3.6891,
"step": 197
},
{
"beta_dpo/beta_used": 0.018048102036118507,
"beta_dpo/beta_used_raw": 0.005947708152234554,
"beta_dpo/gap_mean": 27.571151733398438,
"beta_dpo/gap_std": 44.79579544067383,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.41465968586387436,
"grad_norm": 111.63640594482422,
"learning_rate": 3.653694850884091e-07,
"logits/chosen": -0.7784479856491089,
"logits/rejected": -0.7769980430603027,
"loss": 4.6279,
"step": 198
},
{
"beta_dpo/beta_used": 0.026122871786355972,
"beta_dpo/beta_used_raw": 0.008887620642781258,
"beta_dpo/gap_mean": 26.642627716064453,
"beta_dpo/gap_std": 45.17276382446289,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.4167539267015707,
"grad_norm": 211.3232879638672,
"learning_rate": 3.6374223993904124e-07,
"logits/chosen": -0.7652086019515991,
"logits/rejected": -0.7274236679077148,
"loss": 4.526,
"step": 199
},
{
"beta_dpo/beta_used": 0.004629853181540966,
"beta_dpo/beta_used_raw": -0.015042738988995552,
"beta_dpo/gap_mean": 24.910205841064453,
"beta_dpo/gap_std": 47.183074951171875,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.418848167539267,
"grad_norm": 33.36002731323242,
"learning_rate": 3.621088951385353e-07,
"logits/chosen": -0.7769320607185364,
"logits/rejected": -0.8450891971588135,
"loss": 5.1899,
"step": 200
},
{
"epoch": 0.418848167539267,
"eval_beta_dpo/beta_used": 0.03460463136434555,
"eval_beta_dpo/beta_used_raw": 0.013989130035042763,
"eval_beta_dpo/gap_mean": 23.174381256103516,
"eval_beta_dpo/gap_std": 48.25934600830078,
"eval_beta_dpo/mask_keep_frac": 1.0,
"eval_logits/chosen": -0.815741777420044,
"eval_logits/rejected": -0.8024517893791199,
"eval_loss": 0.6122435331344604,
"eval_runtime": 82.2329,
"eval_samples_per_second": 24.321,
"eval_steps_per_second": 1.52,
"step": 200
},
{
"beta_dpo/beta_used": 0.03787456825375557,
"beta_dpo/beta_used_raw": 0.026411913335323334,
"beta_dpo/gap_mean": 23.28668785095215,
"beta_dpo/gap_std": 45.737125396728516,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.42094240837696334,
"grad_norm": 203.3619384765625,
"learning_rate": 3.604695382782159e-07,
"logits/chosen": -0.7913077473640442,
"logits/rejected": -0.8229740262031555,
"loss": 4.2735,
"step": 201
},
{
"beta_dpo/beta_used": 0.050101663917303085,
"beta_dpo/beta_used_raw": 0.027181357145309448,
"beta_dpo/gap_mean": 26.254316329956055,
"beta_dpo/gap_std": 47.33518600463867,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.42303664921465967,
"grad_norm": 260.6098327636719,
"learning_rate": 3.588242572718162e-07,
"logits/chosen": -0.8053906559944153,
"logits/rejected": -0.8041623830795288,
"loss": 3.7601,
"step": 202
},
{
"beta_dpo/beta_used": 0.009512822143733501,
"beta_dpo/beta_used_raw": -0.01119938027113676,
"beta_dpo/gap_mean": 23.491336822509766,
"beta_dpo/gap_std": 43.72566223144531,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.42513089005235605,
"grad_norm": 84.75527954101562,
"learning_rate": 3.571731403507635e-07,
"logits/chosen": -0.8103188872337341,
"logits/rejected": -0.8483298420906067,
"loss": 4.8249,
"step": 203
},
{
"beta_dpo/beta_used": 0.02792198956012726,
"beta_dpo/beta_used_raw": 0.01622004434466362,
"beta_dpo/gap_mean": 25.187780380249023,
"beta_dpo/gap_std": 43.19692611694336,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.4272251308900524,
"grad_norm": 155.26531982421875,
"learning_rate": 3.5551627605944746e-07,
"logits/chosen": -0.8938873410224915,
"logits/rejected": -0.8654384016990662,
"loss": 4.0644,
"step": 204
},
{
"beta_dpo/beta_used": 0.03272661939263344,
"beta_dpo/beta_used_raw": 0.005555758252739906,
"beta_dpo/gap_mean": 26.425273895263672,
"beta_dpo/gap_std": 45.58020782470703,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.4293193717277487,
"grad_norm": 67.79540252685547,
"learning_rate": 3.5385375325047163e-07,
"logits/chosen": -0.7698061466217041,
"logits/rejected": -0.76741623878479,
"loss": 4.3406,
"step": 205
},
{
"beta_dpo/beta_used": 0.032748252153396606,
"beta_dpo/beta_used_raw": 0.013970796950161457,
"beta_dpo/gap_mean": 28.709857940673828,
"beta_dpo/gap_std": 44.605228424072266,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.431413612565445,
"grad_norm": 372.34228515625,
"learning_rate": 3.5218566107988867e-07,
"logits/chosen": -0.7109194993972778,
"logits/rejected": -0.8103634119033813,
"loss": 4.6551,
"step": 206
},
{
"beta_dpo/beta_used": 0.015838006511330605,
"beta_dpo/beta_used_raw": -0.017275551334023476,
"beta_dpo/gap_mean": 25.304269790649414,
"beta_dpo/gap_std": 46.00745391845703,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.43350785340314135,
"grad_norm": 136.5730438232422,
"learning_rate": 3.505120890024195e-07,
"logits/chosen": -0.7835868000984192,
"logits/rejected": -0.8143876194953918,
"loss": 4.6841,
"step": 207
},
{
"beta_dpo/beta_used": 0.01868237368762493,
"beta_dpo/beta_used_raw": -0.0011156108230352402,
"beta_dpo/gap_mean": 24.15138816833496,
"beta_dpo/gap_std": 47.38937759399414,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.4356020942408377,
"grad_norm": 93.81692504882812,
"learning_rate": 3.4883312676665534e-07,
"logits/chosen": -0.8852607011795044,
"logits/rejected": -0.8384636640548706,
"loss": 4.3394,
"step": 208
},
{
"beta_dpo/beta_used": 0.025227809324860573,
"beta_dpo/beta_used_raw": -0.008135579526424408,
"beta_dpo/gap_mean": 22.95732879638672,
"beta_dpo/gap_std": 47.612056732177734,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.437696335078534,
"grad_norm": 296.78143310546875,
"learning_rate": 3.4714886441024573e-07,
"logits/chosen": -0.6929375529289246,
"logits/rejected": -0.6913096904754639,
"loss": 4.5351,
"step": 209
},
{
"beta_dpo/beta_used": 0.03174670785665512,
"beta_dpo/beta_used_raw": 0.003210625145584345,
"beta_dpo/gap_mean": 23.549930572509766,
"beta_dpo/gap_std": 46.66145706176758,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.4397905759162304,
"grad_norm": 154.39170837402344,
"learning_rate": 3.454593922550693e-07,
"logits/chosen": -0.7912762761116028,
"logits/rejected": -0.7809194326400757,
"loss": 4.5574,
"step": 210
},
{
"beta_dpo/beta_used": 0.02569686621427536,
"beta_dpo/beta_used_raw": 0.008063238114118576,
"beta_dpo/gap_mean": 27.75176429748535,
"beta_dpo/gap_std": 44.786964416503906,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.4418848167539267,
"grad_norm": 125.4310531616211,
"learning_rate": 3.4376480090239047e-07,
"logits/chosen": -0.9319531917572021,
"logits/rejected": -0.9190531969070435,
"loss": 4.106,
"step": 211
},
{
"beta_dpo/beta_used": 0.013355633243918419,
"beta_dpo/beta_used_raw": -0.006055002100765705,
"beta_dpo/gap_mean": 27.055316925048828,
"beta_dpo/gap_std": 43.12101364135742,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.44397905759162304,
"grad_norm": 71.85043334960938,
"learning_rate": 3.4206518122800055e-07,
"logits/chosen": -0.8760491609573364,
"logits/rejected": -0.8264781832695007,
"loss": 4.6725,
"step": 212
},
{
"beta_dpo/beta_used": 0.018737439066171646,
"beta_dpo/beta_used_raw": -0.015005623921751976,
"beta_dpo/gap_mean": 23.8645076751709,
"beta_dpo/gap_std": 44.43546676635742,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.44607329842931936,
"grad_norm": 226.47691345214844,
"learning_rate": 3.403606243773448e-07,
"logits/chosen": -0.9040374755859375,
"logits/rejected": -0.873714804649353,
"loss": 4.761,
"step": 213
},
{
"beta_dpo/beta_used": 0.03264402225613594,
"beta_dpo/beta_used_raw": 0.007331144995987415,
"beta_dpo/gap_mean": 23.217544555664062,
"beta_dpo/gap_std": 46.46554946899414,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4481675392670157,
"grad_norm": 235.9413604736328,
"learning_rate": 3.3865122176063385e-07,
"logits/chosen": -0.8038402795791626,
"logits/rejected": -0.8402938842773438,
"loss": 4.3773,
"step": 214
},
{
"beta_dpo/beta_used": 0.012464843690395355,
"beta_dpo/beta_used_raw": -0.03232930973172188,
"beta_dpo/gap_mean": 22.477909088134766,
"beta_dpo/gap_std": 47.451107025146484,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.450261780104712,
"grad_norm": 118.8662338256836,
"learning_rate": 3.3693706504794243e-07,
"logits/chosen": -0.872378945350647,
"logits/rejected": -0.8904660940170288,
"loss": 4.9657,
"step": 215
},
{
"beta_dpo/beta_used": 0.048138365149497986,
"beta_dpo/beta_used_raw": 0.032591041177511215,
"beta_dpo/gap_mean": 24.77643585205078,
"beta_dpo/gap_std": 48.98875427246094,
"beta_dpo/mask_keep_frac": 0.59375,
"epoch": 0.4523560209424084,
"grad_norm": 260.2225036621094,
"learning_rate": 3.3521824616429284e-07,
"logits/chosen": -0.8762063980102539,
"logits/rejected": -0.8824567794799805,
"loss": 3.8946,
"step": 216
},
{
"beta_dpo/beta_used": 0.02065902203321457,
"beta_dpo/beta_used_raw": 0.0016261846758425236,
"beta_dpo/gap_mean": 27.15247917175293,
"beta_dpo/gap_std": 48.955963134765625,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.4544502617801047,
"grad_norm": 117.39456939697266,
"learning_rate": 3.334948572847253e-07,
"logits/chosen": -0.7578608989715576,
"logits/rejected": -0.7313589453697205,
"loss": 4.2169,
"step": 217
},
{
"beta_dpo/beta_used": 0.03485836833715439,
"beta_dpo/beta_used_raw": 0.020042069256305695,
"beta_dpo/gap_mean": 29.53237533569336,
"beta_dpo/gap_std": 46.928466796875,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.45654450261780105,
"grad_norm": 340.87933349609375,
"learning_rate": 3.317669908293554e-07,
"logits/chosen": -0.8080700039863586,
"logits/rejected": -0.8047543168067932,
"loss": 4.0359,
"step": 218
},
{
"beta_dpo/beta_used": 0.028133587911725044,
"beta_dpo/beta_used_raw": 0.0015811556950211525,
"beta_dpo/gap_mean": 30.489063262939453,
"beta_dpo/gap_std": 46.79350280761719,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.4586387434554974,
"grad_norm": 111.17486572265625,
"learning_rate": 3.300347394584172e-07,
"logits/chosen": -0.8630120158195496,
"logits/rejected": -0.8839913606643677,
"loss": 4.1282,
"step": 219
},
{
"beta_dpo/beta_used": 0.021622518077492714,
"beta_dpo/beta_used_raw": -0.011343970894813538,
"beta_dpo/gap_mean": 30.70256805419922,
"beta_dpo/gap_std": 47.032894134521484,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.4607329842931937,
"grad_norm": 238.27330017089844,
"learning_rate": 3.2829819606729477e-07,
"logits/chosen": -0.8160958290100098,
"logits/rejected": -0.7701820135116577,
"loss": 4.703,
"step": 220
},
{
"beta_dpo/beta_used": 0.0057728588581085205,
"beta_dpo/beta_used_raw": -0.048508308827877045,
"beta_dpo/gap_mean": 26.71761703491211,
"beta_dpo/gap_std": 45.98579788208008,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.46282722513089003,
"grad_norm": 70.58045959472656,
"learning_rate": 3.265574537815398e-07,
"logits/chosen": -0.8309197425842285,
"logits/rejected": -0.8332974314689636,
"loss": 5.2443,
"step": 221
},
{
"beta_dpo/beta_used": 0.023440374061465263,
"beta_dpo/beta_used_raw": -0.0038303863257169724,
"beta_dpo/gap_mean": 26.571941375732422,
"beta_dpo/gap_std": 45.80172348022461,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.4649214659685864,
"grad_norm": 175.59906005859375,
"learning_rate": 3.248126059518784e-07,
"logits/chosen": -0.9114519953727722,
"logits/rejected": -0.8528196215629578,
"loss": 4.5703,
"step": 222
},
{
"beta_dpo/beta_used": 0.02268083207309246,
"beta_dpo/beta_used_raw": 0.017425578087568283,
"beta_dpo/gap_mean": 26.815799713134766,
"beta_dpo/gap_std": 44.76752471923828,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.46701570680628274,
"grad_norm": 131.18267822265625,
"learning_rate": 3.230637461492043e-07,
"logits/chosen": -0.7977765798568726,
"logits/rejected": -0.7418711185455322,
"loss": 4.2857,
"step": 223
},
{
"beta_dpo/beta_used": 0.027264375239610672,
"beta_dpo/beta_used_raw": 0.011616711504757404,
"beta_dpo/gap_mean": 27.529714584350586,
"beta_dpo/gap_std": 45.91986846923828,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.46910994764397906,
"grad_norm": 208.3940887451172,
"learning_rate": 3.213109681595612e-07,
"logits/chosen": -0.7965356707572937,
"logits/rejected": -0.791540801525116,
"loss": 4.1658,
"step": 224
},
{
"beta_dpo/beta_used": 0.013540107756853104,
"beta_dpo/beta_used_raw": -0.02534569799900055,
"beta_dpo/gap_mean": 28.939363479614258,
"beta_dpo/gap_std": 45.13759231567383,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.4712041884816754,
"grad_norm": 136.84378051757812,
"learning_rate": 3.1955436597911315e-07,
"logits/chosen": -0.7949999570846558,
"logits/rejected": -0.7891294360160828,
"loss": 4.8717,
"step": 225
},
{
"beta_dpo/beta_used": 0.011019091121852398,
"beta_dpo/beta_used_raw": -0.0036931331269443035,
"beta_dpo/gap_mean": 26.09113311767578,
"beta_dpo/gap_std": 47.119407653808594,
"beta_dpo/mask_keep_frac": 0.90625,
"epoch": 0.4732984293193717,
"grad_norm": 103.97045135498047,
"learning_rate": 3.1779403380910425e-07,
"logits/chosen": -0.8348425626754761,
"logits/rejected": -0.8312546014785767,
"loss": 4.7836,
"step": 226
},
{
"beta_dpo/beta_used": 0.02266230434179306,
"beta_dpo/beta_used_raw": 0.014338882640004158,
"beta_dpo/gap_mean": 26.389862060546875,
"beta_dpo/gap_std": 47.60458755493164,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.47539267015706804,
"grad_norm": 282.4552307128906,
"learning_rate": 3.160300660508064e-07,
"logits/chosen": -0.8365087509155273,
"logits/rejected": -0.8325910568237305,
"loss": 4.8797,
"step": 227
},
{
"beta_dpo/beta_used": 0.02304881624877453,
"beta_dpo/beta_used_raw": -0.007625843398272991,
"beta_dpo/gap_mean": 27.687213897705078,
"beta_dpo/gap_std": 46.798221588134766,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.4774869109947644,
"grad_norm": 99.42752075195312,
"learning_rate": 3.1426255730045695e-07,
"logits/chosen": -0.8232005834579468,
"logits/rejected": -0.785977840423584,
"loss": 4.3965,
"step": 228
},
{
"beta_dpo/beta_used": 0.024218367412686348,
"beta_dpo/beta_used_raw": -0.012653389945626259,
"beta_dpo/gap_mean": 31.94796371459961,
"beta_dpo/gap_std": 46.080589294433594,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.47958115183246075,
"grad_norm": 176.44334411621094,
"learning_rate": 3.1249160234418644e-07,
"logits/chosen": -0.843792736530304,
"logits/rejected": -0.8399423956871033,
"loss": 4.5637,
"step": 229
},
{
"beta_dpo/beta_used": 0.004419737029820681,
"beta_dpo/beta_used_raw": -0.03868510574102402,
"beta_dpo/gap_mean": 32.72969436645508,
"beta_dpo/gap_std": 48.032718658447266,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.4816753926701571,
"grad_norm": 40.30256652832031,
"learning_rate": 3.1071729615293424e-07,
"logits/chosen": -0.8502980470657349,
"logits/rejected": -0.8471386432647705,
"loss": 5.141,
"step": 230
},
{
"beta_dpo/beta_used": 0.002270770724862814,
"beta_dpo/beta_used_raw": -0.041417621076107025,
"beta_dpo/gap_mean": 29.029102325439453,
"beta_dpo/gap_std": 47.07488250732422,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.4837696335078534,
"grad_norm": 20.798189163208008,
"learning_rate": 3.0893973387735683e-07,
"logits/chosen": -0.7537152767181396,
"logits/rejected": -0.7852950096130371,
"loss": 5.3369,
"step": 231
},
{
"beta_dpo/beta_used": 0.021667521446943283,
"beta_dpo/beta_used_raw": -0.012663575820624828,
"beta_dpo/gap_mean": 26.52678108215332,
"beta_dpo/gap_std": 47.0605354309082,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.48586387434554973,
"grad_norm": 168.40077209472656,
"learning_rate": 3.071590108427243e-07,
"logits/chosen": -0.7681893706321716,
"logits/rejected": -0.7273673415184021,
"loss": 4.6679,
"step": 232
},
{
"beta_dpo/beta_used": 0.040682002902030945,
"beta_dpo/beta_used_raw": 0.029946379363536835,
"beta_dpo/gap_mean": 27.526458740234375,
"beta_dpo/gap_std": 47.81543731689453,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.48795811518324606,
"grad_norm": 153.6154022216797,
"learning_rate": 3.05375222543809e-07,
"logits/chosen": -0.8089311122894287,
"logits/rejected": -0.8404504060745239,
"loss": 3.544,
"step": 233
},
{
"beta_dpo/beta_used": 0.037120141088962555,
"beta_dpo/beta_used_raw": 0.02446739934384823,
"beta_dpo/gap_mean": 29.188695907592773,
"beta_dpo/gap_std": 50.91583251953125,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.4900523560209424,
"grad_norm": 203.7050018310547,
"learning_rate": 3.035884646397637e-07,
"logits/chosen": -0.8175359964370728,
"logits/rejected": -0.778833270072937,
"loss": 4.2035,
"step": 234
},
{
"beta_dpo/beta_used": 0.03462304174900055,
"beta_dpo/beta_used_raw": 0.023997776210308075,
"beta_dpo/gap_mean": 28.996198654174805,
"beta_dpo/gap_std": 53.151405334472656,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.49214659685863876,
"grad_norm": 170.55416870117188,
"learning_rate": 3.017988329489923e-07,
"logits/chosen": -0.8345946073532104,
"logits/rejected": -0.8394272923469543,
"loss": 4.0689,
"step": 235
},
{
"beta_dpo/beta_used": 0.03600964695215225,
"beta_dpo/beta_used_raw": 0.0005581271834671497,
"beta_dpo/gap_mean": 29.45612144470215,
"beta_dpo/gap_std": 52.83362579345703,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.4942408376963351,
"grad_norm": 189.6999053955078,
"learning_rate": 3.000064234440111e-07,
"logits/chosen": -0.8346319794654846,
"logits/rejected": -0.8440847396850586,
"loss": 3.9006,
"step": 236
},
{
"beta_dpo/beta_used": 0.019271746277809143,
"beta_dpo/beta_used_raw": -0.029383037239313126,
"beta_dpo/gap_mean": 30.120567321777344,
"beta_dpo/gap_std": 51.399436950683594,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.4963350785340314,
"grad_norm": 125.6007308959961,
"learning_rate": 2.9821133224630223e-07,
"logits/chosen": -0.7935413122177124,
"logits/rejected": -0.8029470443725586,
"loss": 4.544,
"step": 237
},
{
"beta_dpo/beta_used": 0.01742161437869072,
"beta_dpo/beta_used_raw": -0.03088521584868431,
"beta_dpo/gap_mean": 31.576923370361328,
"beta_dpo/gap_std": 51.387908935546875,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.49842931937172774,
"grad_norm": 101.49148559570312,
"learning_rate": 2.964136556211588e-07,
"logits/chosen": -0.8203566074371338,
"logits/rejected": -0.8182651996612549,
"loss": 4.2478,
"step": 238
},
{
"beta_dpo/beta_used": 0.03185847029089928,
"beta_dpo/beta_used_raw": -0.010265880264341831,
"beta_dpo/gap_mean": 28.438522338867188,
"beta_dpo/gap_std": 53.83900833129883,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.5005235602094241,
"grad_norm": 334.53521728515625,
"learning_rate": 2.946134899725226e-07,
"logits/chosen": -0.7999371886253357,
"logits/rejected": -0.8673533201217651,
"loss": 4.7214,
"step": 239
},
{
"beta_dpo/beta_used": 0.013771746307611465,
"beta_dpo/beta_used_raw": 0.0037962235510349274,
"beta_dpo/gap_mean": 29.074222564697266,
"beta_dpo/gap_std": 51.6200065612793,
"beta_dpo/mask_keep_frac": 0.96875,
"epoch": 0.5026178010471204,
"grad_norm": 125.98123168945312,
"learning_rate": 2.9281093183781403e-07,
"logits/chosen": -0.8858702182769775,
"logits/rejected": -0.9153672456741333,
"loss": 4.722,
"step": 240
},
{
"beta_dpo/beta_used": 0.008614077232778072,
"beta_dpo/beta_used_raw": -0.03193598613142967,
"beta_dpo/gap_mean": 27.712648391723633,
"beta_dpo/gap_std": 50.65081787109375,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.5047120418848168,
"grad_norm": 73.02886199951172,
"learning_rate": 2.910060778827554e-07,
"logits/chosen": -0.7879363298416138,
"logits/rejected": -0.7629251480102539,
"loss": 4.974,
"step": 241
},
{
"beta_dpo/beta_used": 0.017704099416732788,
"beta_dpo/beta_used_raw": -0.026023104786872864,
"beta_dpo/gap_mean": 26.438953399658203,
"beta_dpo/gap_std": 49.28800582885742,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.506806282722513,
"grad_norm": 155.94786071777344,
"learning_rate": 2.891990248961871e-07,
"logits/chosen": -0.8872713446617126,
"logits/rejected": -0.8689901828765869,
"loss": 4.6485,
"step": 242
},
{
"beta_dpo/beta_used": 0.03200588375329971,
"beta_dpo/beta_used_raw": 0.007553852163255215,
"beta_dpo/gap_mean": 29.208711624145508,
"beta_dpo/gap_std": 48.12644577026367,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.5089005235602094,
"grad_norm": 199.69061279296875,
"learning_rate": 2.873898697848762e-07,
"logits/chosen": -0.8087879419326782,
"logits/rejected": -0.7941450476646423,
"loss": 4.3873,
"step": 243
},
{
"beta_dpo/beta_used": 0.03249687701463699,
"beta_dpo/beta_used_raw": 0.00029761437326669693,
"beta_dpo/gap_mean": 33.442317962646484,
"beta_dpo/gap_std": 50.90048599243164,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5109947643979058,
"grad_norm": 206.1800079345703,
"learning_rate": 2.8557870956832133e-07,
"logits/chosen": -0.7746649384498596,
"logits/rejected": -0.687791645526886,
"loss": 3.88,
"step": 244
},
{
"beta_dpo/beta_used": 0.04741879552602768,
"beta_dpo/beta_used_raw": 0.029347646981477737,
"beta_dpo/gap_mean": 32.799251556396484,
"beta_dpo/gap_std": 47.67058563232422,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.5130890052356021,
"grad_norm": 173.1085968017578,
"learning_rate": 2.837656413735479e-07,
"logits/chosen": -0.8986497521400452,
"logits/rejected": -0.8961766958236694,
"loss": 3.3984,
"step": 245
},
{
"beta_dpo/beta_used": 0.0235223900526762,
"beta_dpo/beta_used_raw": -0.011849863454699516,
"beta_dpo/gap_mean": 30.94761848449707,
"beta_dpo/gap_std": 49.176815032958984,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.5151832460732985,
"grad_norm": 281.09698486328125,
"learning_rate": 2.8195076242990116e-07,
"logits/chosen": -0.8312711119651794,
"logits/rejected": -0.8494311571121216,
"loss": 4.6427,
"step": 246
},
{
"beta_dpo/beta_used": 0.02513197809457779,
"beta_dpo/beta_used_raw": -0.007078057155013084,
"beta_dpo/gap_mean": 28.14275550842285,
"beta_dpo/gap_std": 48.82672882080078,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.5172774869109947,
"grad_norm": 136.38978576660156,
"learning_rate": 2.801341700638307e-07,
"logits/chosen": -0.8171231746673584,
"logits/rejected": -0.8114153146743774,
"loss": 4.2088,
"step": 247
},
{
"beta_dpo/beta_used": 0.01641557179391384,
"beta_dpo/beta_used_raw": -0.02007879875600338,
"beta_dpo/gap_mean": 26.242324829101562,
"beta_dpo/gap_std": 45.897560119628906,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5193717277486911,
"grad_norm": 110.12213134765625,
"learning_rate": 2.7831596169367227e-07,
"logits/chosen": -0.7554613351821899,
"logits/rejected": -0.8297998905181885,
"loss": 4.4495,
"step": 248
},
{
"beta_dpo/beta_used": 0.013018419966101646,
"beta_dpo/beta_used_raw": -0.01035161130130291,
"beta_dpo/gap_mean": 21.812284469604492,
"beta_dpo/gap_std": 46.56766128540039,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.5214659685863874,
"grad_norm": 90.18573760986328,
"learning_rate": 2.7649623482442274e-07,
"logits/chosen": -0.818859338760376,
"logits/rejected": -0.8167266845703125,
"loss": 5.007,
"step": 249
},
{
"beta_dpo/beta_used": 0.05589645728468895,
"beta_dpo/beta_used_raw": 0.04678558558225632,
"beta_dpo/gap_mean": 24.672931671142578,
"beta_dpo/gap_std": 48.47020721435547,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5235602094240838,
"grad_norm": 336.7721862792969,
"learning_rate": 2.7467508704251135e-07,
"logits/chosen": -0.862523078918457,
"logits/rejected": -0.8510252237319946,
"loss": 3.9654,
"step": 250
},
{
"beta_dpo/beta_used": 0.018005074933171272,
"beta_dpo/beta_used_raw": -0.011267204768955708,
"beta_dpo/gap_mean": 24.661828994750977,
"beta_dpo/gap_std": 47.70268249511719,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.5256544502617801,
"grad_norm": 113.27556610107422,
"learning_rate": 2.7285261601056697e-07,
"logits/chosen": -0.9115744829177856,
"logits/rejected": -0.8325821757316589,
"loss": 5.02,
"step": 251
},
{
"beta_dpo/beta_used": 0.034432608634233475,
"beta_dpo/beta_used_raw": 0.016598613932728767,
"beta_dpo/gap_mean": 27.05451011657715,
"beta_dpo/gap_std": 50.06959915161133,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5277486910994764,
"grad_norm": 188.25326538085938,
"learning_rate": 2.7102891946217994e-07,
"logits/chosen": -0.9205706119537354,
"logits/rejected": -0.8480794429779053,
"loss": 4.3982,
"step": 252
},
{
"beta_dpo/beta_used": 0.030707869678735733,
"beta_dpo/beta_used_raw": 0.004673094488680363,
"beta_dpo/gap_mean": 26.41693878173828,
"beta_dpo/gap_std": 50.89750289916992,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.5298429319371728,
"grad_norm": 140.49769592285156,
"learning_rate": 2.692040951966617e-07,
"logits/chosen": -0.8601374626159668,
"logits/rejected": -0.8499505519866943,
"loss": 4.3829,
"step": 253
},
{
"beta_dpo/beta_used": 0.030033409595489502,
"beta_dpo/beta_used_raw": 0.012539991177618504,
"beta_dpo/gap_mean": 24.60215950012207,
"beta_dpo/gap_std": 47.5504035949707,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.5319371727748691,
"grad_norm": 98.29093933105469,
"learning_rate": 2.6737824107379947e-07,
"logits/chosen": -0.9111440777778625,
"logits/rejected": -0.8825950026512146,
"loss": 4.2623,
"step": 254
},
{
"beta_dpo/beta_used": 0.03537018597126007,
"beta_dpo/beta_used_raw": 0.024893784895539284,
"beta_dpo/gap_mean": 27.02058219909668,
"beta_dpo/gap_std": 46.62839126586914,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.5340314136125655,
"grad_norm": 235.2870330810547,
"learning_rate": 2.655514550086086e-07,
"logits/chosen": -0.7854205369949341,
"logits/rejected": -0.7229121327400208,
"loss": 3.8108,
"step": 255
},
{
"beta_dpo/beta_used": 0.042297471314668655,
"beta_dpo/beta_used_raw": 0.008141601458191872,
"beta_dpo/gap_mean": 28.063838958740234,
"beta_dpo/gap_std": 49.99174499511719,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.5361256544502618,
"grad_norm": 155.22914123535156,
"learning_rate": 2.6372383496608186e-07,
"logits/chosen": -0.8625849485397339,
"logits/rejected": -0.8409400582313538,
"loss": 3.9188,
"step": 256
},
{
"beta_dpo/beta_used": 0.020409418269991875,
"beta_dpo/beta_used_raw": 8.291192352771759e-05,
"beta_dpo/gap_mean": 29.060293197631836,
"beta_dpo/gap_std": 51.213829040527344,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5382198952879581,
"grad_norm": 107.94662475585938,
"learning_rate": 2.618954789559356e-07,
"logits/chosen": -0.7469907999038696,
"logits/rejected": -0.769670844078064,
"loss": 4.4188,
"step": 257
},
{
"beta_dpo/beta_used": 0.02360442467033863,
"beta_dpo/beta_used_raw": -0.016117922961711884,
"beta_dpo/gap_mean": 29.77499771118164,
"beta_dpo/gap_std": 46.84881591796875,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.5403141361256545,
"grad_norm": 134.36001586914062,
"learning_rate": 2.600664850273538e-07,
"logits/chosen": -0.803202748298645,
"logits/rejected": -0.7860767841339111,
"loss": 4.2857,
"step": 258
},
{
"beta_dpo/beta_used": 0.0034133887384086847,
"beta_dpo/beta_used_raw": -0.029627330601215363,
"beta_dpo/gap_mean": 26.850561141967773,
"beta_dpo/gap_std": 44.52630615234375,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5424083769633508,
"grad_norm": 30.7167911529541,
"learning_rate": 2.582369512637302e-07,
"logits/chosen": -0.6924210786819458,
"logits/rejected": -0.7782201766967773,
"loss": 5.2647,
"step": 259
},
{
"beta_dpo/beta_used": 0.008151357993483543,
"beta_dpo/beta_used_raw": -0.03724336996674538,
"beta_dpo/gap_mean": 21.897171020507812,
"beta_dpo/gap_std": 44.250633239746094,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.5445026178010471,
"grad_norm": 110.90091705322266,
"learning_rate": 2.5640697577740815e-07,
"logits/chosen": -0.7184336185455322,
"logits/rejected": -0.7615399956703186,
"loss": 5.2479,
"step": 260
},
{
"beta_dpo/beta_used": 0.03933139145374298,
"beta_dpo/beta_used_raw": 0.02173340693116188,
"beta_dpo/gap_mean": 20.506837844848633,
"beta_dpo/gap_std": 46.83831024169922,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.5465968586387434,
"grad_norm": 224.45547485351562,
"learning_rate": 2.5457665670441937e-07,
"logits/chosen": -0.6661160588264465,
"logits/rejected": -0.6675682067871094,
"loss": 4.4162,
"step": 261
},
{
"beta_dpo/beta_used": 0.014896124601364136,
"beta_dpo/beta_used_raw": -0.00048278551548719406,
"beta_dpo/gap_mean": 23.35280990600586,
"beta_dpo/gap_std": 45.730369567871094,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.5486910994764398,
"grad_norm": 101.4534683227539,
"learning_rate": 2.527460921992209e-07,
"logits/chosen": -0.7730051875114441,
"logits/rejected": -0.7815576791763306,
"loss": 4.7897,
"step": 262
},
{
"beta_dpo/beta_used": 0.02885139361023903,
"beta_dpo/beta_used_raw": -0.01011989638209343,
"beta_dpo/gap_mean": 26.439210891723633,
"beta_dpo/gap_std": 45.27045440673828,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5507853403141362,
"grad_norm": 95.77165985107422,
"learning_rate": 2.509153804294318e-07,
"logits/chosen": -0.7346749305725098,
"logits/rejected": -0.7492486238479614,
"loss": 4.3113,
"step": 263
},
{
"beta_dpo/beta_used": 0.04723266139626503,
"beta_dpo/beta_used_raw": 0.032619744539260864,
"beta_dpo/gap_mean": 26.848690032958984,
"beta_dpo/gap_std": 45.16484451293945,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5528795811518324,
"grad_norm": 249.46133422851562,
"learning_rate": 2.4908461957056825e-07,
"logits/chosen": -0.8002597093582153,
"logits/rejected": -0.7968762516975403,
"loss": 4.0313,
"step": 264
},
{
"beta_dpo/beta_used": 0.044382501393556595,
"beta_dpo/beta_used_raw": 0.019323019310832024,
"beta_dpo/gap_mean": 31.021467208862305,
"beta_dpo/gap_std": 46.95008087158203,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.5549738219895288,
"grad_norm": 179.83192443847656,
"learning_rate": 2.4725390780077905e-07,
"logits/chosen": -0.8639757633209229,
"logits/rejected": -0.8595830202102661,
"loss": 3.8278,
"step": 265
},
{
"beta_dpo/beta_used": 0.027583010494709015,
"beta_dpo/beta_used_raw": 0.015042563900351524,
"beta_dpo/gap_mean": 31.712360382080078,
"beta_dpo/gap_std": 45.211669921875,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.5570680628272251,
"grad_norm": 100.64574432373047,
"learning_rate": 2.454233432955807e-07,
"logits/chosen": -0.8895573019981384,
"logits/rejected": -0.8909889459609985,
"loss": 3.7383,
"step": 266
},
{
"beta_dpo/beta_used": 0.01041501946747303,
"beta_dpo/beta_used_raw": -0.028038477525115013,
"beta_dpo/gap_mean": 30.713520050048828,
"beta_dpo/gap_std": 45.09004211425781,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.5591623036649215,
"grad_norm": 82.85396575927734,
"learning_rate": 2.435930242225919e-07,
"logits/chosen": -0.7605207562446594,
"logits/rejected": -0.7826250195503235,
"loss": 4.8741,
"step": 267
},
{
"beta_dpo/beta_used": 0.03263188153505325,
"beta_dpo/beta_used_raw": 0.0181466955691576,
"beta_dpo/gap_mean": 27.47226905822754,
"beta_dpo/gap_std": 46.088748931884766,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.5612565445026177,
"grad_norm": 147.48915100097656,
"learning_rate": 2.4176304873626984e-07,
"logits/chosen": -0.7427608370780945,
"logits/rejected": -0.6938825249671936,
"loss": 3.6626,
"step": 268
},
{
"beta_dpo/beta_used": 0.013387175276875496,
"beta_dpo/beta_used_raw": -0.010312670841813087,
"beta_dpo/gap_mean": 26.25534439086914,
"beta_dpo/gap_std": 48.16028594970703,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.5633507853403141,
"grad_norm": 136.2506103515625,
"learning_rate": 2.399335149726463e-07,
"logits/chosen": -0.8059217929840088,
"logits/rejected": -0.7971139550209045,
"loss": 5.0023,
"step": 269
},
{
"beta_dpo/beta_used": 0.021209895610809326,
"beta_dpo/beta_used_raw": 0.007768834941089153,
"beta_dpo/gap_mean": 26.338743209838867,
"beta_dpo/gap_std": 52.1260986328125,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.5654450261780105,
"grad_norm": 129.21153259277344,
"learning_rate": 2.381045210440644e-07,
"logits/chosen": -0.8386709690093994,
"logits/rejected": -0.8653663396835327,
"loss": 4.4507,
"step": 270
},
{
"beta_dpo/beta_used": 0.0069845193065702915,
"beta_dpo/beta_used_raw": -0.020907670259475708,
"beta_dpo/gap_mean": 25.30891227722168,
"beta_dpo/gap_std": 49.086795806884766,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.5675392670157068,
"grad_norm": 40.63138961791992,
"learning_rate": 2.3627616503391812e-07,
"logits/chosen": -0.7387904524803162,
"logits/rejected": -0.7116048336029053,
"loss": 5.1414,
"step": 271
},
{
"beta_dpo/beta_used": 0.026611195877194405,
"beta_dpo/beta_used_raw": 0.016656765714287758,
"beta_dpo/gap_mean": 28.21249771118164,
"beta_dpo/gap_std": 49.86316680908203,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5696335078534032,
"grad_norm": 156.32347106933594,
"learning_rate": 2.344485449913914e-07,
"logits/chosen": -0.8664307594299316,
"logits/rejected": -0.8278294205665588,
"loss": 4.5218,
"step": 272
},
{
"beta_dpo/beta_used": 0.02900443784892559,
"beta_dpo/beta_used_raw": -0.009947888553142548,
"beta_dpo/gap_mean": 30.19207000732422,
"beta_dpo/gap_std": 51.4546012878418,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.5717277486910994,
"grad_norm": 303.7254638671875,
"learning_rate": 2.3262175892620062e-07,
"logits/chosen": -0.8640813231468201,
"logits/rejected": -0.8573806881904602,
"loss": 4.7414,
"step": 273
},
{
"beta_dpo/beta_used": 0.05624593421816826,
"beta_dpo/beta_used_raw": 0.053361114114522934,
"beta_dpo/gap_mean": 32.530738830566406,
"beta_dpo/gap_std": 51.59685516357422,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.5738219895287958,
"grad_norm": 273.17437744140625,
"learning_rate": 2.3079590480333827e-07,
"logits/chosen": -0.7935792207717896,
"logits/rejected": -0.8075500726699829,
"loss": 2.6873,
"step": 274
},
{
"beta_dpo/beta_used": 0.04389655217528343,
"beta_dpo/beta_used_raw": 0.03967411816120148,
"beta_dpo/gap_mean": 35.15380859375,
"beta_dpo/gap_std": 50.761661529541016,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.5759162303664922,
"grad_norm": 142.54107666015625,
"learning_rate": 2.2897108053782e-07,
"logits/chosen": -0.836929202079773,
"logits/rejected": -0.8122567534446716,
"loss": 3.1636,
"step": 275
},
{
"beta_dpo/beta_used": 0.008040083572268486,
"beta_dpo/beta_used_raw": -0.026715535670518875,
"beta_dpo/gap_mean": 36.45258712768555,
"beta_dpo/gap_std": 48.222740173339844,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.5780104712041885,
"grad_norm": 49.63078689575195,
"learning_rate": 2.2714738398943308e-07,
"logits/chosen": -0.9168733358383179,
"logits/rejected": -0.8658912181854248,
"loss": 4.7947,
"step": 276
},
{
"beta_dpo/beta_used": 0.017741093412041664,
"beta_dpo/beta_used_raw": -0.005734635051339865,
"beta_dpo/gap_mean": 30.747156143188477,
"beta_dpo/gap_std": 49.511741638183594,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.5801047120418849,
"grad_norm": 129.92147827148438,
"learning_rate": 2.2532491295748865e-07,
"logits/chosen": -0.7629660367965698,
"logits/rejected": -0.7584231495857239,
"loss": 4.432,
"step": 277
},
{
"beta_dpo/beta_used": 0.03449155017733574,
"beta_dpo/beta_used_raw": -0.003797696903347969,
"beta_dpo/gap_mean": 27.227996826171875,
"beta_dpo/gap_std": 50.867427825927734,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.5821989528795811,
"grad_norm": 177.40350341796875,
"learning_rate": 2.2350376517557726e-07,
"logits/chosen": -0.8415578603744507,
"logits/rejected": -0.8428290486335754,
"loss": 4.2994,
"step": 278
},
{
"beta_dpo/beta_used": 0.06249617412686348,
"beta_dpo/beta_used_raw": 0.05501677840948105,
"beta_dpo/gap_mean": 29.809844970703125,
"beta_dpo/gap_std": 52.175148010253906,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5842931937172775,
"grad_norm": 182.45668029785156,
"learning_rate": 2.2168403830632769e-07,
"logits/chosen": -0.7722773551940918,
"logits/rejected": -0.7824859619140625,
"loss": 2.8122,
"step": 279
},
{
"beta_dpo/beta_used": 0.007684089243412018,
"beta_dpo/beta_used_raw": -0.02332976460456848,
"beta_dpo/gap_mean": 30.218582153320312,
"beta_dpo/gap_std": 50.6556510925293,
"beta_dpo/mask_keep_frac": 0.5625,
"epoch": 0.5863874345549738,
"grad_norm": 57.70958709716797,
"learning_rate": 2.1986582993616925e-07,
"logits/chosen": -0.7730618715286255,
"logits/rejected": -0.809870719909668,
"loss": 5.0134,
"step": 280
},
{
"beta_dpo/beta_used": 0.00933461356908083,
"beta_dpo/beta_used_raw": -0.045306965708732605,
"beta_dpo/gap_mean": 30.127286911010742,
"beta_dpo/gap_std": 51.82423782348633,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.5884816753926702,
"grad_norm": 86.70292663574219,
"learning_rate": 2.1804923757009882e-07,
"logits/chosen": -0.7278214693069458,
"logits/rejected": -0.7206936478614807,
"loss": 5.0187,
"step": 281
},
{
"beta_dpo/beta_used": 0.028488921001553535,
"beta_dpo/beta_used_raw": -0.000575296813622117,
"beta_dpo/gap_mean": 29.9686336517334,
"beta_dpo/gap_std": 53.73543167114258,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.5905759162303665,
"grad_norm": 189.3360137939453,
"learning_rate": 2.1623435862645205e-07,
"logits/chosen": -0.9012278914451599,
"logits/rejected": -0.833315372467041,
"loss": 4.2414,
"step": 282
},
{
"beta_dpo/beta_used": 0.016119863837957382,
"beta_dpo/beta_used_raw": -0.0076263779774308205,
"beta_dpo/gap_mean": 28.940425872802734,
"beta_dpo/gap_std": 52.418643951416016,
"beta_dpo/mask_keep_frac": 0.59375,
"epoch": 0.5926701570680628,
"grad_norm": 132.74644470214844,
"learning_rate": 2.1442129043167873e-07,
"logits/chosen": -0.8086240887641907,
"logits/rejected": -0.7728883624076843,
"loss": 4.6917,
"step": 283
},
{
"beta_dpo/beta_used": 0.02810695767402649,
"beta_dpo/beta_used_raw": -0.01180135365575552,
"beta_dpo/gap_mean": 30.99124526977539,
"beta_dpo/gap_std": 53.4347038269043,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.5947643979057592,
"grad_norm": 113.83843231201172,
"learning_rate": 2.1261013021512378e-07,
"logits/chosen": -0.7596749067306519,
"logits/rejected": -0.7445765733718872,
"loss": 4.6012,
"step": 284
},
{
"beta_dpo/beta_used": 0.02452005073428154,
"beta_dpo/beta_used_raw": 0.0031681647524237633,
"beta_dpo/gap_mean": 25.594776153564453,
"beta_dpo/gap_std": 52.7824821472168,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.5968586387434555,
"grad_norm": 155.6459503173828,
"learning_rate": 2.1080097510381294e-07,
"logits/chosen": -0.8476120233535767,
"logits/rejected": -0.8108228445053101,
"loss": 4.5369,
"step": 285
},
{
"beta_dpo/beta_used": 0.015153134241700172,
"beta_dpo/beta_used_raw": -0.011404473334550858,
"beta_dpo/gap_mean": 26.985084533691406,
"beta_dpo/gap_std": 54.268184661865234,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.5989528795811518,
"grad_norm": 122.56304931640625,
"learning_rate": 2.089939221172446e-07,
"logits/chosen": -0.812626838684082,
"logits/rejected": -0.7711913585662842,
"loss": 4.5918,
"step": 286
},
{
"beta_dpo/beta_used": 0.04627405107021332,
"beta_dpo/beta_used_raw": 0.04036061465740204,
"beta_dpo/gap_mean": 28.02764320373535,
"beta_dpo/gap_std": 54.610694885253906,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.6010471204188481,
"grad_norm": 211.06204223632812,
"learning_rate": 2.0718906816218595e-07,
"logits/chosen": -0.8649301528930664,
"logits/rejected": -0.8563531041145325,
"loss": 3.8764,
"step": 287
},
{
"beta_dpo/beta_used": 0.0363273024559021,
"beta_dpo/beta_used_raw": 0.0149660874158144,
"beta_dpo/gap_mean": 25.59058380126953,
"beta_dpo/gap_std": 52.901607513427734,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.6031413612565445,
"grad_norm": 245.04263305664062,
"learning_rate": 2.053865100274774e-07,
"logits/chosen": -0.8099507093429565,
"logits/rejected": -0.7958436608314514,
"loss": 4.1567,
"step": 288
},
{
"beta_dpo/beta_used": 0.02292640507221222,
"beta_dpo/beta_used_raw": -0.006416676566004753,
"beta_dpo/gap_mean": 23.788057327270508,
"beta_dpo/gap_std": 52.41061782836914,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.6052356020942409,
"grad_norm": 123.16299438476562,
"learning_rate": 2.035863443788411e-07,
"logits/chosen": -0.8208717703819275,
"logits/rejected": -0.8096261620521545,
"loss": 4.6924,
"step": 289
},
{
"beta_dpo/beta_used": 0.011839738115668297,
"beta_dpo/beta_used_raw": -0.04058264195919037,
"beta_dpo/gap_mean": 24.799976348876953,
"beta_dpo/gap_std": 51.84151077270508,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.6073298429319371,
"grad_norm": 104.04353332519531,
"learning_rate": 2.0178866775369774e-07,
"logits/chosen": -0.7509340047836304,
"logits/rejected": -0.7044723629951477,
"loss": 4.8929,
"step": 290
},
{
"beta_dpo/beta_used": 0.03051171451807022,
"beta_dpo/beta_used_raw": 0.014080343768000603,
"beta_dpo/gap_mean": 26.96507453918457,
"beta_dpo/gap_std": 52.527767181396484,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6094240837696335,
"grad_norm": 175.74583435058594,
"learning_rate": 1.9999357655598891e-07,
"logits/chosen": -0.7986388802528381,
"logits/rejected": -0.8011342287063599,
"loss": 4.2739,
"step": 291
},
{
"beta_dpo/beta_used": 0.03412974625825882,
"beta_dpo/beta_used_raw": 0.020118406042456627,
"beta_dpo/gap_mean": 27.24551010131836,
"beta_dpo/gap_std": 51.756317138671875,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.6115183246073298,
"grad_norm": 189.43963623046875,
"learning_rate": 1.9820116705100775e-07,
"logits/chosen": -0.8060983419418335,
"logits/rejected": -0.7809661030769348,
"loss": 3.5835,
"step": 292
},
{
"beta_dpo/beta_used": 0.04781736806035042,
"beta_dpo/beta_used_raw": 0.021636206656694412,
"beta_dpo/gap_mean": 28.056617736816406,
"beta_dpo/gap_std": 53.96324920654297,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.6136125654450262,
"grad_norm": 324.6336669921875,
"learning_rate": 1.9641153536023642e-07,
"logits/chosen": -0.9069850444793701,
"logits/rejected": -0.7866148948669434,
"loss": 4.2395,
"step": 293
},
{
"beta_dpo/beta_used": 0.02367311529815197,
"beta_dpo/beta_used_raw": -0.020730314776301384,
"beta_dpo/gap_mean": 27.37270736694336,
"beta_dpo/gap_std": 53.96466064453125,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.6157068062827226,
"grad_norm": 273.607421875,
"learning_rate": 1.9462477745619106e-07,
"logits/chosen": -0.9232648611068726,
"logits/rejected": -0.8572964668273926,
"loss": 5.1925,
"step": 294
},
{
"beta_dpo/beta_used": 0.05513071268796921,
"beta_dpo/beta_used_raw": 0.0484839528799057,
"beta_dpo/gap_mean": 27.121583938598633,
"beta_dpo/gap_std": 53.563331604003906,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.6178010471204188,
"grad_norm": 294.5126647949219,
"learning_rate": 1.928409891572757e-07,
"logits/chosen": -0.7520920038223267,
"logits/rejected": -0.7938590049743652,
"loss": 4.2212,
"step": 295
},
{
"beta_dpo/beta_used": 0.0544293075799942,
"beta_dpo/beta_used_raw": 0.042427390813827515,
"beta_dpo/gap_mean": 32.12763214111328,
"beta_dpo/gap_std": 54.309146881103516,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6198952879581152,
"grad_norm": 404.3480529785156,
"learning_rate": 1.9106026612264315e-07,
"logits/chosen": -0.8700776696205139,
"logits/rejected": -0.8367108702659607,
"loss": 4.1609,
"step": 296
},
{
"beta_dpo/beta_used": 0.011236435733735561,
"beta_dpo/beta_used_raw": -0.024106943979859352,
"beta_dpo/gap_mean": 31.98879051208496,
"beta_dpo/gap_std": 54.55412292480469,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.6219895287958115,
"grad_norm": 132.0416717529297,
"learning_rate": 1.8928270384706582e-07,
"logits/chosen": -0.8638625741004944,
"logits/rejected": -0.870927095413208,
"loss": 5.1839,
"step": 297
},
{
"beta_dpo/beta_used": 0.04521133750677109,
"beta_dpo/beta_used_raw": 0.024881090968847275,
"beta_dpo/gap_mean": 29.161760330200195,
"beta_dpo/gap_std": 54.410213470458984,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.6240837696335079,
"grad_norm": 303.2014465332031,
"learning_rate": 1.875083976558136e-07,
"logits/chosen": -0.9359617829322815,
"logits/rejected": -0.894604504108429,
"loss": 5.0044,
"step": 298
},
{
"beta_dpo/beta_used": 0.03626459464430809,
"beta_dpo/beta_used_raw": -0.00014625024050474167,
"beta_dpo/gap_mean": 28.30124282836914,
"beta_dpo/gap_std": 53.62518310546875,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.6261780104712041,
"grad_norm": 139.76968383789062,
"learning_rate": 1.8573744269954297e-07,
"logits/chosen": -0.7561138868331909,
"logits/rejected": -0.7259418368339539,
"loss": 4.1468,
"step": 299
},
{
"beta_dpo/beta_used": 0.0271303653717041,
"beta_dpo/beta_used_raw": -0.004675944335758686,
"beta_dpo/gap_mean": 28.45832633972168,
"beta_dpo/gap_std": 51.58424377441406,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.6282722513089005,
"grad_norm": 269.99761962890625,
"learning_rate": 1.839699339491937e-07,
"logits/chosen": -0.7935373783111572,
"logits/rejected": -0.8128796815872192,
"loss": 4.8479,
"step": 300
},
{
"beta_dpo/beta_used": 0.02316497452557087,
"beta_dpo/beta_used_raw": -0.008034785278141499,
"beta_dpo/gap_mean": 27.73406982421875,
"beta_dpo/gap_std": 52.02341079711914,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.6303664921465969,
"grad_norm": 137.60336303710938,
"learning_rate": 1.8220596619089573e-07,
"logits/chosen": -0.8512569665908813,
"logits/rejected": -0.8470555543899536,
"loss": 4.1674,
"step": 301
},
{
"beta_dpo/beta_used": 0.033527493476867676,
"beta_dpo/beta_used_raw": 0.005997128784656525,
"beta_dpo/gap_mean": 29.30136489868164,
"beta_dpo/gap_std": 49.16413497924805,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.6324607329842932,
"grad_norm": 190.3458709716797,
"learning_rate": 1.8044563402088682e-07,
"logits/chosen": -0.7430394291877747,
"logits/rejected": -0.726094126701355,
"loss": 4.1724,
"step": 302
},
{
"beta_dpo/beta_used": 0.05859103798866272,
"beta_dpo/beta_used_raw": 0.053058795630931854,
"beta_dpo/gap_mean": 28.67943572998047,
"beta_dpo/gap_std": 50.48307418823242,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.6345549738219896,
"grad_norm": 523.866943359375,
"learning_rate": 1.7868903184043885e-07,
"logits/chosen": -0.8114441633224487,
"logits/rejected": -0.7551754117012024,
"loss": 3.9045,
"step": 303
},
{
"beta_dpo/beta_used": 0.014694188721477985,
"beta_dpo/beta_used_raw": 0.00047776661813259125,
"beta_dpo/gap_mean": 30.76772689819336,
"beta_dpo/gap_std": 52.48418426513672,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.6366492146596858,
"grad_norm": 182.02586364746094,
"learning_rate": 1.7693625385079574e-07,
"logits/chosen": -0.7341251373291016,
"logits/rejected": -0.7772490978240967,
"loss": 5.0507,
"step": 304
},
{
"beta_dpo/beta_used": 0.023470664396882057,
"beta_dpo/beta_used_raw": 0.006333658471703529,
"beta_dpo/gap_mean": 35.33549118041992,
"beta_dpo/gap_std": 51.53257751464844,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.6387434554973822,
"grad_norm": 80.4178695678711,
"learning_rate": 1.7518739404812155e-07,
"logits/chosen": -0.8776407837867737,
"logits/rejected": -0.8734537363052368,
"loss": 4.187,
"step": 305
},
{
"beta_dpo/beta_used": 0.019906463101506233,
"beta_dpo/beta_used_raw": -0.0246460922062397,
"beta_dpo/gap_mean": 35.897613525390625,
"beta_dpo/gap_std": 51.13701629638672,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.6408376963350786,
"grad_norm": 180.6241912841797,
"learning_rate": 1.7344254621846017e-07,
"logits/chosen": -0.7335799932479858,
"logits/rejected": -0.7366300225257874,
"loss": 4.9526,
"step": 306
},
{
"beta_dpo/beta_used": 0.03133513033390045,
"beta_dpo/beta_used_raw": 0.015121620148420334,
"beta_dpo/gap_mean": 31.835227966308594,
"beta_dpo/gap_std": 49.21765899658203,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.6429319371727749,
"grad_norm": 140.04568481445312,
"learning_rate": 1.717018039327053e-07,
"logits/chosen": -0.7875911593437195,
"logits/rejected": -0.8351340889930725,
"loss": 3.6112,
"step": 307
},
{
"beta_dpo/beta_used": 0.01386056188493967,
"beta_dpo/beta_used_raw": -0.003249811939895153,
"beta_dpo/gap_mean": 30.477500915527344,
"beta_dpo/gap_std": 48.171607971191406,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.6450261780104712,
"grad_norm": 77.68309020996094,
"learning_rate": 1.699652605415828e-07,
"logits/chosen": -0.7763692140579224,
"logits/rejected": -0.7668969631195068,
"loss": 4.5155,
"step": 308
},
{
"beta_dpo/beta_used": 0.0570245087146759,
"beta_dpo/beta_used_raw": 0.0544467568397522,
"beta_dpo/gap_mean": 28.889652252197266,
"beta_dpo/gap_std": 51.812313079833984,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6471204188481675,
"grad_norm": 348.3558654785156,
"learning_rate": 1.6823300917064458e-07,
"logits/chosen": -0.9028000831604004,
"logits/rejected": -0.9401339888572693,
"loss": 3.8498,
"step": 309
},
{
"beta_dpo/beta_used": 0.02267904207110405,
"beta_dpo/beta_used_raw": -0.012671604752540588,
"beta_dpo/gap_mean": 30.174596786499023,
"beta_dpo/gap_std": 52.781192779541016,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6492146596858639,
"grad_norm": 177.11566162109375,
"learning_rate": 1.6650514271527465e-07,
"logits/chosen": -0.7747019529342651,
"logits/rejected": -0.7555006146430969,
"loss": 4.7697,
"step": 310
},
{
"beta_dpo/beta_used": 0.0282583124935627,
"beta_dpo/beta_used_raw": -0.015359479002654552,
"beta_dpo/gap_mean": 32.193870544433594,
"beta_dpo/gap_std": 50.84648513793945,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.6513089005235602,
"grad_norm": 194.98880004882812,
"learning_rate": 1.647817538357072e-07,
"logits/chosen": -0.7590238451957703,
"logits/rejected": -0.752559244632721,
"loss": 4.6913,
"step": 311
},
{
"beta_dpo/beta_used": 0.02696000412106514,
"beta_dpo/beta_used_raw": -0.0136557100340724,
"beta_dpo/gap_mean": 31.40416145324707,
"beta_dpo/gap_std": 54.36616516113281,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.6534031413612565,
"grad_norm": 502.6810607910156,
"learning_rate": 1.6306293495205755e-07,
"logits/chosen": -0.8581979274749756,
"logits/rejected": -0.8272500038146973,
"loss": 4.5039,
"step": 312
},
{
"beta_dpo/beta_used": 0.02222803235054016,
"beta_dpo/beta_used_raw": -0.005380367860198021,
"beta_dpo/gap_mean": 28.833663940429688,
"beta_dpo/gap_std": 54.704952239990234,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6554973821989529,
"grad_norm": 152.86302185058594,
"learning_rate": 1.6134877823936607e-07,
"logits/chosen": -0.8324103355407715,
"logits/rejected": -0.8865740299224854,
"loss": 5.0221,
"step": 313
},
{
"beta_dpo/beta_used": 0.04997220262885094,
"beta_dpo/beta_used_raw": 0.045812323689460754,
"beta_dpo/gap_mean": 29.58029556274414,
"beta_dpo/gap_std": 53.47450637817383,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6575916230366492,
"grad_norm": 239.0943145751953,
"learning_rate": 1.5963937562265522e-07,
"logits/chosen": -0.806653618812561,
"logits/rejected": -0.7868531346321106,
"loss": 4.296,
"step": 314
},
{
"beta_dpo/beta_used": 0.03027864173054695,
"beta_dpo/beta_used_raw": 0.01615685038268566,
"beta_dpo/gap_mean": 32.16781997680664,
"beta_dpo/gap_std": 53.18808364868164,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.6596858638743456,
"grad_norm": 158.3759307861328,
"learning_rate": 1.5793481877199943e-07,
"logits/chosen": -0.842742919921875,
"logits/rejected": -0.8674212694168091,
"loss": 3.9229,
"step": 315
},
{
"beta_dpo/beta_used": 0.015697987750172615,
"beta_dpo/beta_used_raw": -0.022454766556620598,
"beta_dpo/gap_mean": 33.68966293334961,
"beta_dpo/gap_std": 55.241519927978516,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.6617801047120419,
"grad_norm": 124.0105209350586,
"learning_rate": 1.562351990976095e-07,
"logits/chosen": -0.7664201259613037,
"logits/rejected": -0.805154025554657,
"loss": 4.9355,
"step": 316
},
{
"beta_dpo/beta_used": 0.027155417948961258,
"beta_dpo/beta_used_raw": 0.006031029857695103,
"beta_dpo/gap_mean": 32.50454330444336,
"beta_dpo/gap_std": 53.5350341796875,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.6638743455497382,
"grad_norm": 162.24105834960938,
"learning_rate": 1.5454060774493065e-07,
"logits/chosen": -0.8139037489891052,
"logits/rejected": -0.77301025390625,
"loss": 4.884,
"step": 317
},
{
"beta_dpo/beta_used": 0.030678538605570793,
"beta_dpo/beta_used_raw": 0.004475907888263464,
"beta_dpo/gap_mean": 31.712360382080078,
"beta_dpo/gap_std": 49.18507766723633,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6659685863874345,
"grad_norm": 121.8013916015625,
"learning_rate": 1.5285113558975427e-07,
"logits/chosen": -0.7728986740112305,
"logits/rejected": -0.7226128578186035,
"loss": 4.1183,
"step": 318
},
{
"beta_dpo/beta_used": 0.029083475470542908,
"beta_dpo/beta_used_raw": 0.004641437903046608,
"beta_dpo/gap_mean": 34.69441223144531,
"beta_dpo/gap_std": 49.81436538696289,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.6680628272251309,
"grad_norm": 92.8158187866211,
"learning_rate": 1.5116687323334464e-07,
"logits/chosen": -0.8575960993766785,
"logits/rejected": -0.8856627345085144,
"loss": 4.0625,
"step": 319
},
{
"beta_dpo/beta_used": 0.02023179829120636,
"beta_dpo/beta_used_raw": -0.003970830701291561,
"beta_dpo/gap_mean": 33.205867767333984,
"beta_dpo/gap_std": 51.83220291137695,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.6701570680628273,
"grad_norm": 137.70501708984375,
"learning_rate": 1.4948791099758052e-07,
"logits/chosen": -0.8294675350189209,
"logits/rejected": -0.8444851040840149,
"loss": 4.3916,
"step": 320
},
{
"beta_dpo/beta_used": 0.01353040337562561,
"beta_dpo/beta_used_raw": -0.03319290652871132,
"beta_dpo/gap_mean": 28.161727905273438,
"beta_dpo/gap_std": 52.91798400878906,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.6722513089005235,
"grad_norm": 137.2801513671875,
"learning_rate": 1.478143389201113e-07,
"logits/chosen": -0.8113803267478943,
"logits/rejected": -0.7403082847595215,
"loss": 4.6654,
"step": 321
},
{
"beta_dpo/beta_used": 0.031200017780065536,
"beta_dpo/beta_used_raw": -0.0010120943188667297,
"beta_dpo/gap_mean": 26.58349609375,
"beta_dpo/gap_std": 53.48532485961914,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.6743455497382199,
"grad_norm": 241.01341247558594,
"learning_rate": 1.461462467495284e-07,
"logits/chosen": -0.7803442478179932,
"logits/rejected": -0.7769550085067749,
"loss": 5.2722,
"step": 322
},
{
"beta_dpo/beta_used": 0.010985669679939747,
"beta_dpo/beta_used_raw": -0.015756428241729736,
"beta_dpo/gap_mean": 26.728092193603516,
"beta_dpo/gap_std": 53.64677047729492,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.6764397905759162,
"grad_norm": 100.47509002685547,
"learning_rate": 1.4448372394055246e-07,
"logits/chosen": -0.9064484238624573,
"logits/rejected": -0.8854697346687317,
"loss": 4.6664,
"step": 323
},
{
"beta_dpo/beta_used": 0.05118248984217644,
"beta_dpo/beta_used_raw": 0.042741917073726654,
"beta_dpo/gap_mean": 29.620723724365234,
"beta_dpo/gap_std": 51.27871322631836,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6785340314136126,
"grad_norm": 227.78439331054688,
"learning_rate": 1.428268596492364e-07,
"logits/chosen": -0.8729650974273682,
"logits/rejected": -0.8735213875770569,
"loss": 3.7416,
"step": 324
},
{
"beta_dpo/beta_used": 0.023902013897895813,
"beta_dpo/beta_used_raw": -0.004822437651455402,
"beta_dpo/gap_mean": 32.345211029052734,
"beta_dpo/gap_std": 51.50432586669922,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.680628272251309,
"grad_norm": 370.4063415527344,
"learning_rate": 1.4117574272818386e-07,
"logits/chosen": -0.8013263940811157,
"logits/rejected": -0.7928324341773987,
"loss": 5.074,
"step": 325
},
{
"beta_dpo/beta_used": 0.008493431843817234,
"beta_dpo/beta_used_raw": -0.04024779424071312,
"beta_dpo/gap_mean": 30.87372589111328,
"beta_dpo/gap_std": 53.50398254394531,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.6827225130890052,
"grad_norm": 121.52214050292969,
"learning_rate": 1.3953046172178413e-07,
"logits/chosen": -0.8273008465766907,
"logits/rejected": -0.8141711950302124,
"loss": 5.1887,
"step": 326
},
{
"beta_dpo/beta_used": 0.03533978387713432,
"beta_dpo/beta_used_raw": 0.012006538920104504,
"beta_dpo/gap_mean": 31.25798988342285,
"beta_dpo/gap_std": 53.022621154785156,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.6848167539267016,
"grad_norm": 248.8169403076172,
"learning_rate": 1.3789110486146468e-07,
"logits/chosen": -0.8114765882492065,
"logits/rejected": -0.7771793603897095,
"loss": 3.8881,
"step": 327
},
{
"beta_dpo/beta_used": 0.017740879207849503,
"beta_dpo/beta_used_raw": -0.01438824087381363,
"beta_dpo/gap_mean": 33.111385345458984,
"beta_dpo/gap_std": 50.42515563964844,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.6869109947643979,
"grad_norm": 92.58521270751953,
"learning_rate": 1.362577600609588e-07,
"logits/chosen": -0.8299423456192017,
"logits/rejected": -0.8702976703643799,
"loss": 4.0943,
"step": 328
},
{
"beta_dpo/beta_used": 0.01667260378599167,
"beta_dpo/beta_used_raw": -0.009871412068605423,
"beta_dpo/gap_mean": 30.09588623046875,
"beta_dpo/gap_std": 52.19231033325195,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.6890052356020943,
"grad_norm": 138.4306182861328,
"learning_rate": 1.3463051491159093e-07,
"logits/chosen": -0.7766485810279846,
"logits/rejected": -0.8675934076309204,
"loss": 4.911,
"step": 329
},
{
"beta_dpo/beta_used": 0.046647775918245316,
"beta_dpo/beta_used_raw": 0.04041110351681709,
"beta_dpo/gap_mean": 29.8335018157959,
"beta_dpo/gap_std": 55.980369567871094,
"beta_dpo/mask_keep_frac": 0.5625,
"epoch": 0.6910994764397905,
"grad_norm": 1010.2858276367188,
"learning_rate": 1.3300945667758012e-07,
"logits/chosen": -0.8615760207176208,
"logits/rejected": -0.8630913496017456,
"loss": 4.2895,
"step": 330
},
{
"beta_dpo/beta_used": 0.02816726081073284,
"beta_dpo/beta_used_raw": -0.0015003189910203218,
"beta_dpo/gap_mean": 31.772533416748047,
"beta_dpo/gap_std": 55.0521354675293,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.6931937172774869,
"grad_norm": 259.1372375488281,
"learning_rate": 1.3139467229135998e-07,
"logits/chosen": -0.8795358538627625,
"logits/rejected": -0.8674964904785156,
"loss": 4.7036,
"step": 331
},
{
"beta_dpo/beta_used": 0.039203815162181854,
"beta_dpo/beta_used_raw": 0.01063997857272625,
"beta_dpo/gap_mean": 33.736488342285156,
"beta_dpo/gap_std": 56.953426361083984,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.6952879581151833,
"grad_norm": 263.4537658691406,
"learning_rate": 1.2978624834891626e-07,
"logits/chosen": -0.9462342262268066,
"logits/rejected": -0.9176090955734253,
"loss": 4.1015,
"step": 332
},
{
"beta_dpo/beta_used": 0.002037803176790476,
"beta_dpo/beta_used_raw": -0.032407838851213455,
"beta_dpo/gap_mean": 30.212459564208984,
"beta_dpo/gap_std": 55.63782501220703,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.6973821989528796,
"grad_norm": 22.458953857421875,
"learning_rate": 1.281842711051438e-07,
"logits/chosen": -0.8374227285385132,
"logits/rejected": -0.780229389667511,
"loss": 5.3569,
"step": 333
},
{
"beta_dpo/beta_used": 0.04165830835700035,
"beta_dpo/beta_used_raw": 0.03052227571606636,
"beta_dpo/gap_mean": 29.47317123413086,
"beta_dpo/gap_std": 53.91261672973633,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.6994764397905759,
"grad_norm": 258.20318603515625,
"learning_rate": 1.2658882646922033e-07,
"logits/chosen": -0.8327507376670837,
"logits/rejected": -0.790196418762207,
"loss": 4.3341,
"step": 334
},
{
"beta_dpo/beta_used": 0.023221183568239212,
"beta_dpo/beta_used_raw": -0.035209063440561295,
"beta_dpo/gap_mean": 32.27169418334961,
"beta_dpo/gap_std": 54.47612762451172,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.7015706806282722,
"grad_norm": 174.3998565673828,
"learning_rate": 1.2500000000000005e-07,
"logits/chosen": -0.818577229976654,
"logits/rejected": -0.8766403198242188,
"loss": 4.3985,
"step": 335
},
{
"beta_dpo/beta_used": 0.011233292520046234,
"beta_dpo/beta_used_raw": -0.022189803421497345,
"beta_dpo/gap_mean": 29.108884811401367,
"beta_dpo/gap_std": 56.85524368286133,
"beta_dpo/mask_keep_frac": 0.90625,
"epoch": 0.7036649214659686,
"grad_norm": 100.67388153076172,
"learning_rate": 1.2341787690142435e-07,
"logits/chosen": -0.7078570127487183,
"logits/rejected": -0.739229142665863,
"loss": 4.9627,
"step": 336
},
{
"beta_dpo/beta_used": 0.039661239832639694,
"beta_dpo/beta_used_raw": 0.014130711555480957,
"beta_dpo/gap_mean": 30.064481735229492,
"beta_dpo/gap_std": 55.913970947265625,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.7057591623036649,
"grad_norm": 278.1079406738281,
"learning_rate": 1.2184254201795363e-07,
"logits/chosen": -0.8292222023010254,
"logits/rejected": -0.7518793940544128,
"loss": 4.3712,
"step": 337
},
{
"beta_dpo/beta_used": 0.029370369389653206,
"beta_dpo/beta_used_raw": 0.012822807766497135,
"beta_dpo/gap_mean": 33.81048583984375,
"beta_dpo/gap_std": 54.04378890991211,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.7078534031413612,
"grad_norm": 194.63438415527344,
"learning_rate": 1.202740798300168e-07,
"logits/chosen": -0.8475313782691956,
"logits/rejected": -0.8578289151191711,
"loss": 4.1704,
"step": 338
},
{
"beta_dpo/beta_used": 0.06615243852138519,
"beta_dpo/beta_used_raw": 0.06248940899968147,
"beta_dpo/gap_mean": 34.177696228027344,
"beta_dpo/gap_std": 56.06435012817383,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7099476439790576,
"grad_norm": 368.3795471191406,
"learning_rate": 1.1871257444948096e-07,
"logits/chosen": -0.9380159974098206,
"logits/rejected": -0.9480760097503662,
"loss": 4.0919,
"step": 339
},
{
"beta_dpo/beta_used": 0.013038999401032925,
"beta_dpo/beta_used_raw": -0.0053863683715462685,
"beta_dpo/gap_mean": 33.19333267211914,
"beta_dpo/gap_std": 59.489295959472656,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.7120418848167539,
"grad_norm": 138.34683227539062,
"learning_rate": 1.1715810961514072e-07,
"logits/chosen": -0.7801686525344849,
"logits/rejected": -0.7577068209648132,
"loss": 4.9209,
"step": 340
},
{
"beta_dpo/beta_used": 0.035000525414943695,
"beta_dpo/beta_used_raw": 0.011500047519803047,
"beta_dpo/gap_mean": 28.83623504638672,
"beta_dpo/gap_std": 58.50289535522461,
"beta_dpo/mask_keep_frac": 0.90625,
"epoch": 0.7141361256544503,
"grad_norm": 218.66903686523438,
"learning_rate": 1.1561076868822755e-07,
"logits/chosen": -0.9182481169700623,
"logits/rejected": -0.8721767067909241,
"loss": 4.6165,
"step": 341
},
{
"beta_dpo/beta_used": 0.024583449587225914,
"beta_dpo/beta_used_raw": 0.018431413918733597,
"beta_dpo/gap_mean": 28.589534759521484,
"beta_dpo/gap_std": 57.362159729003906,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.7162303664921466,
"grad_norm": 233.12059020996094,
"learning_rate": 1.1407063464793965e-07,
"logits/chosen": -0.7965834736824036,
"logits/rejected": -0.8243657946586609,
"loss": 4.7252,
"step": 342
},
{
"beta_dpo/beta_used": 0.025925535708665848,
"beta_dpo/beta_used_raw": -0.011598478071391582,
"beta_dpo/gap_mean": 28.507904052734375,
"beta_dpo/gap_std": 55.28282928466797,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.7183246073298429,
"grad_norm": 255.1661376953125,
"learning_rate": 1.125377900869913e-07,
"logits/chosen": -0.900759756565094,
"logits/rejected": -0.8987997174263,
"loss": 5.1938,
"step": 343
},
{
"beta_dpo/beta_used": 0.05067792162299156,
"beta_dpo/beta_used_raw": 0.023350853472948074,
"beta_dpo/gap_mean": 28.617340087890625,
"beta_dpo/gap_std": 56.286258697509766,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.7204188481675393,
"grad_norm": 359.28851318359375,
"learning_rate": 1.110123172071844e-07,
"logits/chosen": -0.7748513221740723,
"logits/rejected": -0.7623203992843628,
"loss": 5.1968,
"step": 344
},
{
"beta_dpo/beta_used": 0.033438149839639664,
"beta_dpo/beta_used_raw": -0.00029300153255462646,
"beta_dpo/gap_mean": 30.098384857177734,
"beta_dpo/gap_std": 53.45401382446289,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.7225130890052356,
"grad_norm": 310.5905456542969,
"learning_rate": 1.09494297815e-07,
"logits/chosen": -0.8768536448478699,
"logits/rejected": -0.8476714491844177,
"loss": 4.7303,
"step": 345
},
{
"beta_dpo/beta_used": 0.05225639045238495,
"beta_dpo/beta_used_raw": 0.04538067430257797,
"beta_dpo/gap_mean": 30.668237686157227,
"beta_dpo/gap_std": 52.24396896362305,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.724607329842932,
"grad_norm": 311.78192138671875,
"learning_rate": 1.0798381331721107e-07,
"logits/chosen": -0.9083431959152222,
"logits/rejected": -0.8552351593971252,
"loss": 3.8896,
"step": 346
},
{
"beta_dpo/beta_used": 0.024577973410487175,
"beta_dpo/beta_used_raw": 0.0004575531929731369,
"beta_dpo/gap_mean": 31.580842971801758,
"beta_dpo/gap_std": 51.64503479003906,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.7267015706806282,
"grad_norm": 154.31671142578125,
"learning_rate": 1.0648094471651722e-07,
"logits/chosen": -0.7399212121963501,
"logits/rejected": -0.8290560841560364,
"loss": 4.3639,
"step": 347
},
{
"beta_dpo/beta_used": 0.014230488799512386,
"beta_dpo/beta_used_raw": -0.048038601875305176,
"beta_dpo/gap_mean": 27.234729766845703,
"beta_dpo/gap_std": 49.23517990112305,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.7287958115183246,
"grad_norm": 184.28305053710938,
"learning_rate": 1.0498577260720048e-07,
"logits/chosen": -0.9388685822486877,
"logits/rejected": -0.9415339231491089,
"loss": 5.0665,
"step": 348
},
{
"beta_dpo/beta_used": 0.050268374383449554,
"beta_dpo/beta_used_raw": 0.031838420778512955,
"beta_dpo/gap_mean": 30.112083435058594,
"beta_dpo/gap_std": 55.729190826416016,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.7308900523560209,
"grad_norm": 378.33599853515625,
"learning_rate": 1.0349837717080347e-07,
"logits/chosen": -0.9334988594055176,
"logits/rejected": -0.8848183751106262,
"loss": 3.9407,
"step": 349
},
{
"beta_dpo/beta_used": 0.0406358428299427,
"beta_dpo/beta_used_raw": 0.006889470852911472,
"beta_dpo/gap_mean": 31.848020553588867,
"beta_dpo/gap_std": 54.54989242553711,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.7329842931937173,
"grad_norm": 509.0325012207031,
"learning_rate": 1.0201883817182949e-07,
"logits/chosen": -0.8780160546302795,
"logits/rejected": -0.8359534740447998,
"loss": 4.1489,
"step": 350
},
{
"beta_dpo/beta_used": 0.012384520843625069,
"beta_dpo/beta_used_raw": -0.029308203607797623,
"beta_dpo/gap_mean": 28.5808162689209,
"beta_dpo/gap_std": 55.44742965698242,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.7350785340314137,
"grad_norm": 124.26021575927734,
"learning_rate": 1.0054723495346482e-07,
"logits/chosen": -0.9024979472160339,
"logits/rejected": -0.9018498063087463,
"loss": 4.9646,
"step": 351
},
{
"beta_dpo/beta_used": 0.051346320658922195,
"beta_dpo/beta_used_raw": 0.04155290499329567,
"beta_dpo/gap_mean": 31.388181686401367,
"beta_dpo/gap_std": 56.486900329589844,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.7371727748691099,
"grad_norm": 481.62066650390625,
"learning_rate": 9.908364643332398e-08,
"logits/chosen": -0.8058483600616455,
"logits/rejected": -0.7557932734489441,
"loss": 4.6374,
"step": 352
},
{
"beta_dpo/beta_used": 0.03087581694126129,
"beta_dpo/beta_used_raw": 0.0010065771639347076,
"beta_dpo/gap_mean": 33.28788375854492,
"beta_dpo/gap_std": 54.57392883300781,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.7392670157068063,
"grad_norm": 174.88623046875,
"learning_rate": 9.76281510992176e-08,
"logits/chosen": -0.7731785774230957,
"logits/rejected": -0.8036521673202515,
"loss": 4.1953,
"step": 353
},
{
"beta_dpo/beta_used": 0.013481578789651394,
"beta_dpo/beta_used_raw": -0.023063668981194496,
"beta_dpo/gap_mean": 29.690311431884766,
"beta_dpo/gap_std": 55.14631271362305,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.7413612565445026,
"grad_norm": 227.94309997558594,
"learning_rate": 9.618082700494318e-08,
"logits/chosen": -0.741845428943634,
"logits/rejected": -0.778709352016449,
"loss": 5.882,
"step": 354
},
{
"beta_dpo/beta_used": 0.06290622055530548,
"beta_dpo/beta_used_raw": 0.06290622055530548,
"beta_dpo/gap_mean": 31.194143295288086,
"beta_dpo/gap_std": 57.11370849609375,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.743455497382199,
"grad_norm": 247.5913543701172,
"learning_rate": 9.474175176609956e-08,
"logits/chosen": -0.9444049596786499,
"logits/rejected": -0.9045993089675903,
"loss": 3.1331,
"step": 355
},
{
"beta_dpo/beta_used": 0.03636765852570534,
"beta_dpo/beta_used_raw": 0.013633275404572487,
"beta_dpo/gap_mean": 28.3127498626709,
"beta_dpo/gap_std": 50.623878479003906,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.7455497382198953,
"grad_norm": 214.78062438964844,
"learning_rate": 9.331100255592436e-08,
"logits/chosen": -0.8152442574501038,
"logits/rejected": -0.8466963171958923,
"loss": 4.2956,
"step": 356
},
{
"beta_dpo/beta_used": 0.027711525559425354,
"beta_dpo/beta_used_raw": 0.011670958250761032,
"beta_dpo/gap_mean": 28.688819885253906,
"beta_dpo/gap_std": 51.74197006225586,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.7476439790575916,
"grad_norm": 158.7490234375,
"learning_rate": 9.18886561011557e-08,
"logits/chosen": -0.7832672595977783,
"logits/rejected": -0.74955153465271,
"loss": 3.9111,
"step": 357
},
{
"beta_dpo/beta_used": 0.024180788546800613,
"beta_dpo/beta_used_raw": 0.008531359024345875,
"beta_dpo/gap_mean": 33.06235122680664,
"beta_dpo/gap_std": 52.99840545654297,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.749738219895288,
"grad_norm": 165.2462615966797,
"learning_rate": 9.047478867791731e-08,
"logits/chosen": -0.8677491545677185,
"logits/rejected": -0.838107168674469,
"loss": 4.3925,
"step": 358
},
{
"beta_dpo/beta_used": 0.02725430205464363,
"beta_dpo/beta_used_raw": 0.005270563997328281,
"beta_dpo/gap_mean": 33.42242431640625,
"beta_dpo/gap_std": 51.58427810668945,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7518324607329843,
"grad_norm": 216.0394287109375,
"learning_rate": 8.906947610762825e-08,
"logits/chosen": -0.8172123432159424,
"logits/rejected": -0.849665105342865,
"loss": 4.5131,
"step": 359
},
{
"beta_dpo/beta_used": 0.013111414387822151,
"beta_dpo/beta_used_raw": 0.0025145215913653374,
"beta_dpo/gap_mean": 31.21525764465332,
"beta_dpo/gap_std": 54.58356857299805,
"beta_dpo/mask_keep_frac": 0.59375,
"epoch": 0.7539267015706806,
"grad_norm": 114.65906524658203,
"learning_rate": 8.76727937529367e-08,
"logits/chosen": -0.9042258262634277,
"logits/rejected": -0.9122740626335144,
"loss": 4.4779,
"step": 360
},
{
"beta_dpo/beta_used": 0.03473525866866112,
"beta_dpo/beta_used_raw": 0.028849830850958824,
"beta_dpo/gap_mean": 31.66191291809082,
"beta_dpo/gap_std": 55.895851135253906,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.7560209424083769,
"grad_norm": 128.73867797851562,
"learning_rate": 8.628481651367875e-08,
"logits/chosen": -0.8746165633201599,
"logits/rejected": -0.8471811413764954,
"loss": 3.525,
"step": 361
},
{
"beta_dpo/beta_used": 0.03337887302041054,
"beta_dpo/beta_used_raw": 0.015036560595035553,
"beta_dpo/gap_mean": 33.18673324584961,
"beta_dpo/gap_std": 54.25856018066406,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.7581151832460733,
"grad_norm": 265.6235046386719,
"learning_rate": 8.490561882286135e-08,
"logits/chosen": -0.8912657499313354,
"logits/rejected": -0.8793244957923889,
"loss": 3.8266,
"step": 362
},
{
"beta_dpo/beta_used": 0.0334957093000412,
"beta_dpo/beta_used_raw": 0.019749773666262627,
"beta_dpo/gap_mean": 32.70677947998047,
"beta_dpo/gap_std": 54.238922119140625,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7602094240837697,
"grad_norm": 328.0040588378906,
"learning_rate": 8.353527464267104e-08,
"logits/chosen": -0.8557516932487488,
"logits/rejected": -0.8278414011001587,
"loss": 4.4351,
"step": 363
},
{
"beta_dpo/beta_used": 0.019932106137275696,
"beta_dpo/beta_used_raw": -0.02457229606807232,
"beta_dpo/gap_mean": 31.01894760131836,
"beta_dpo/gap_std": 54.44854736328125,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.762303664921466,
"grad_norm": 89.25292205810547,
"learning_rate": 8.217385746050742e-08,
"logits/chosen": -0.8707149624824524,
"logits/rejected": -0.8504204750061035,
"loss": 4.7876,
"step": 364
},
{
"beta_dpo/beta_used": 0.052917227149009705,
"beta_dpo/beta_used_raw": 0.04524911195039749,
"beta_dpo/gap_mean": 28.029312133789062,
"beta_dpo/gap_std": 55.016151428222656,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7643979057591623,
"grad_norm": 375.6981506347656,
"learning_rate": 8.082144028504231e-08,
"logits/chosen": -0.8357688188552856,
"logits/rejected": -0.8424769639968872,
"loss": 4.549,
"step": 365
},
{
"beta_dpo/beta_used": 0.023991985246539116,
"beta_dpo/beta_used_raw": -0.00716618075966835,
"beta_dpo/gap_mean": 30.980024337768555,
"beta_dpo/gap_std": 55.70692443847656,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.7664921465968586,
"grad_norm": 168.83290100097656,
"learning_rate": 7.947809564230445e-08,
"logits/chosen": -0.8632270693778992,
"logits/rejected": -0.8815495371818542,
"loss": 4.2886,
"step": 366
},
{
"beta_dpo/beta_used": 0.024156922474503517,
"beta_dpo/beta_used_raw": -0.014136096462607384,
"beta_dpo/gap_mean": 32.812950134277344,
"beta_dpo/gap_std": 54.38077163696289,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.768586387434555,
"grad_norm": 272.86541748046875,
"learning_rate": 7.814389557179016e-08,
"logits/chosen": -0.8426069021224976,
"logits/rejected": -0.7946543097496033,
"loss": 4.6307,
"step": 367
},
{
"beta_dpo/beta_used": 0.05431270971894264,
"beta_dpo/beta_used_raw": 0.0433184877038002,
"beta_dpo/gap_mean": 35.47528839111328,
"beta_dpo/gap_std": 52.5758171081543,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7706806282722513,
"grad_norm": 146.9598388671875,
"learning_rate": 7.681891162260015e-08,
"logits/chosen": -0.9334856271743774,
"logits/rejected": -0.9025843739509583,
"loss": 2.8818,
"step": 368
},
{
"beta_dpo/beta_used": 0.024843934923410416,
"beta_dpo/beta_used_raw": -0.02314029261469841,
"beta_dpo/gap_mean": 37.284950256347656,
"beta_dpo/gap_std": 48.017791748046875,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.7727748691099476,
"grad_norm": 99.7154541015625,
"learning_rate": 7.550321484960251e-08,
"logits/chosen": -0.850791335105896,
"logits/rejected": -0.816204845905304,
"loss": 4.526,
"step": 369
},
{
"beta_dpo/beta_used": 0.005622061900794506,
"beta_dpo/beta_used_raw": -0.02220618724822998,
"beta_dpo/gap_mean": 36.12443161010742,
"beta_dpo/gap_std": 49.77077102661133,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.774869109947644,
"grad_norm": 41.49360275268555,
"learning_rate": 7.419687580962222e-08,
"logits/chosen": -0.8648772239685059,
"logits/rejected": -0.9024683237075806,
"loss": 4.9406,
"step": 370
},
{
"beta_dpo/beta_used": 0.006420304998755455,
"beta_dpo/beta_used_raw": -0.028947679325938225,
"beta_dpo/gap_mean": 30.36486053466797,
"beta_dpo/gap_std": 51.136146545410156,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.7769633507853403,
"grad_norm": 59.23979568481445,
"learning_rate": 7.289996455765748e-08,
"logits/chosen": -0.741977870464325,
"logits/rejected": -0.7357773184776306,
"loss": 4.9714,
"step": 371
},
{
"beta_dpo/beta_used": 0.06715603172779083,
"beta_dpo/beta_used_raw": 0.047685518860816956,
"beta_dpo/gap_mean": 32.393035888671875,
"beta_dpo/gap_std": 50.679080963134766,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.7790575916230367,
"grad_norm": 455.41925048828125,
"learning_rate": 7.161255064312283e-08,
"logits/chosen": -0.8044797778129578,
"logits/rejected": -0.7840807437896729,
"loss": 4.6727,
"step": 372
},
{
"beta_dpo/beta_used": 0.018992407247424126,
"beta_dpo/beta_used_raw": -0.0017184526659548283,
"beta_dpo/gap_mean": 33.258968353271484,
"beta_dpo/gap_std": 49.465057373046875,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.7811518324607329,
"grad_norm": 222.24200439453125,
"learning_rate": 7.033470310611945e-08,
"logits/chosen": -0.8912656903266907,
"logits/rejected": -0.8498582243919373,
"loss": 5.1636,
"step": 373
},
{
"beta_dpo/beta_used": 0.005610483232885599,
"beta_dpo/beta_used_raw": -0.04910598695278168,
"beta_dpo/gap_mean": 31.699514389038086,
"beta_dpo/gap_std": 52.40116500854492,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.7832460732984293,
"grad_norm": 51.2022590637207,
"learning_rate": 6.906649047373245e-08,
"logits/chosen": -0.820667028427124,
"logits/rejected": -0.8256031274795532,
"loss": 5.1379,
"step": 374
},
{
"beta_dpo/beta_used": 0.024863161146640778,
"beta_dpo/beta_used_raw": -0.014078973792493343,
"beta_dpo/gap_mean": 28.686037063598633,
"beta_dpo/gap_std": 51.921531677246094,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.7853403141361257,
"grad_norm": 157.56956481933594,
"learning_rate": 6.780798075635675e-08,
"logits/chosen": -0.861274242401123,
"logits/rejected": -0.8295719623565674,
"loss": 4.3679,
"step": 375
},
{
"beta_dpo/beta_used": 0.021679656580090523,
"beta_dpo/beta_used_raw": -0.001047454308718443,
"beta_dpo/gap_mean": 28.755699157714844,
"beta_dpo/gap_std": 52.53461837768555,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.787434554973822,
"grad_norm": 120.26818084716797,
"learning_rate": 6.655924144404906e-08,
"logits/chosen": -0.7974970936775208,
"logits/rejected": -0.754688024520874,
"loss": 4.3403,
"step": 376
},
{
"beta_dpo/beta_used": 0.021915648132562637,
"beta_dpo/beta_used_raw": -0.01294963899999857,
"beta_dpo/gap_mean": 26.834131240844727,
"beta_dpo/gap_std": 52.551292419433594,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.7895287958115184,
"grad_norm": 222.95492553710938,
"learning_rate": 6.532033950290885e-08,
"logits/chosen": -0.9012744426727295,
"logits/rejected": -0.8887965679168701,
"loss": 4.7781,
"step": 377
},
{
"beta_dpo/beta_used": 0.011818885803222656,
"beta_dpo/beta_used_raw": -0.029823636636137962,
"beta_dpo/gap_mean": 26.240825653076172,
"beta_dpo/gap_std": 51.9726448059082,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.7916230366492146,
"grad_norm": 142.94700622558594,
"learning_rate": 6.409134137148736e-08,
"logits/chosen": -0.8205504417419434,
"logits/rejected": -0.826806366443634,
"loss": 5.265,
"step": 378
},
{
"beta_dpo/beta_used": 0.038683511316776276,
"beta_dpo/beta_used_raw": 0.015086468309164047,
"beta_dpo/gap_mean": 28.403867721557617,
"beta_dpo/gap_std": 53.254478454589844,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.793717277486911,
"grad_norm": 217.25274658203125,
"learning_rate": 6.28723129572247e-08,
"logits/chosen": -0.8288396596908569,
"logits/rejected": -0.8588307499885559,
"loss": 4.2979,
"step": 379
},
{
"beta_dpo/beta_used": 0.017481593415141106,
"beta_dpo/beta_used_raw": -0.003103232476860285,
"beta_dpo/gap_mean": 29.25552749633789,
"beta_dpo/gap_std": 53.82293701171875,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.7958115183246073,
"grad_norm": 157.94027709960938,
"learning_rate": 6.166331963291519e-08,
"logits/chosen": -0.8616006970405579,
"logits/rejected": -0.8570124506950378,
"loss": 5.0083,
"step": 380
},
{
"beta_dpo/beta_used": 0.021431434899568558,
"beta_dpo/beta_used_raw": -0.011747539043426514,
"beta_dpo/gap_mean": 29.78434181213379,
"beta_dpo/gap_std": 51.756473541259766,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.7979057591623037,
"grad_norm": 144.8572235107422,
"learning_rate": 6.046442623320145e-08,
"logits/chosen": -0.8431472182273865,
"logits/rejected": -0.7634297013282776,
"loss": 4.6818,
"step": 381
},
{
"beta_dpo/beta_used": 0.03567413240671158,
"beta_dpo/beta_used_raw": 0.008900219574570656,
"beta_dpo/gap_mean": 31.605493545532227,
"beta_dpo/gap_std": 50.421817779541016,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.8,
"grad_norm": 190.073974609375,
"learning_rate": 5.9275697051098275e-08,
"logits/chosen": -0.8556850552558899,
"logits/rejected": -0.8041601777076721,
"loss": 4.0047,
"step": 382
},
{
"beta_dpo/beta_used": 0.020303381606936455,
"beta_dpo/beta_used_raw": 0.004482526797801256,
"beta_dpo/gap_mean": 33.393123626708984,
"beta_dpo/gap_std": 50.67055130004883,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.8020942408376963,
"grad_norm": 126.13748931884766,
"learning_rate": 5.809719583454414e-08,
"logits/chosen": -0.788833737373352,
"logits/rejected": -0.7815289497375488,
"loss": 4.2619,
"step": 383
},
{
"beta_dpo/beta_used": 0.01302328985184431,
"beta_dpo/beta_used_raw": -0.01569559797644615,
"beta_dpo/gap_mean": 30.79790687561035,
"beta_dpo/gap_std": 50.971168518066406,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.8041884816753927,
"grad_norm": 199.42294311523438,
"learning_rate": 5.6928985782982524e-08,
"logits/chosen": -0.8755144476890564,
"logits/rejected": -0.8719990253448486,
"loss": 5.012,
"step": 384
},
{
"beta_dpo/beta_used": 0.017824744805693626,
"beta_dpo/beta_used_raw": -0.004108890891075134,
"beta_dpo/gap_mean": 30.42023277282715,
"beta_dpo/gap_std": 50.25197219848633,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.806282722513089,
"grad_norm": 223.4486083984375,
"learning_rate": 5.57711295439732e-08,
"logits/chosen": -0.8377327919006348,
"logits/rejected": -0.8308869004249573,
"loss": 4.8747,
"step": 385
},
{
"beta_dpo/beta_used": 0.046246424317359924,
"beta_dpo/beta_used_raw": 0.02471497654914856,
"beta_dpo/gap_mean": 34.329776763916016,
"beta_dpo/gap_std": 49.33695983886719,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8083769633507853,
"grad_norm": 221.65078735351562,
"learning_rate": 5.4623689209832484e-08,
"logits/chosen": -0.7701154947280884,
"logits/rejected": -0.8202899694442749,
"loss": 3.8539,
"step": 386
},
{
"beta_dpo/beta_used": 0.04278576001524925,
"beta_dpo/beta_used_raw": 0.015627289190888405,
"beta_dpo/gap_mean": 31.348127365112305,
"beta_dpo/gap_std": 50.26094055175781,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.8104712041884817,
"grad_norm": 83.0886459350586,
"learning_rate": 5.3486726314303175e-08,
"logits/chosen": -0.8732501864433289,
"logits/rejected": -0.8548240661621094,
"loss": 3.9074,
"step": 387
},
{
"beta_dpo/beta_used": 0.009247594512999058,
"beta_dpo/beta_used_raw": -0.018486540764570236,
"beta_dpo/gap_mean": 29.602096557617188,
"beta_dpo/gap_std": 50.2357177734375,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.812565445026178,
"grad_norm": 58.26310348510742,
"learning_rate": 5.2360301829254745e-08,
"logits/chosen": -0.9190385937690735,
"logits/rejected": -0.884000301361084,
"loss": 4.9807,
"step": 388
},
{
"beta_dpo/beta_used": 0.03028152696788311,
"beta_dpo/beta_used_raw": -0.0006860191933810711,
"beta_dpo/gap_mean": 27.959213256835938,
"beta_dpo/gap_std": 51.936866760253906,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.8146596858638744,
"grad_norm": 152.3852081298828,
"learning_rate": 5.1244476161413806e-08,
"logits/chosen": -0.8672448396682739,
"logits/rejected": -0.8208280205726624,
"loss": 4.512,
"step": 389
},
{
"beta_dpo/beta_used": 0.02013925462961197,
"beta_dpo/beta_used_raw": 0.012077848426997662,
"beta_dpo/gap_mean": 29.23447608947754,
"beta_dpo/gap_std": 51.4747314453125,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.8167539267015707,
"grad_norm": 197.6220245361328,
"learning_rate": 5.013930914912476e-08,
"logits/chosen": -0.837507963180542,
"logits/rejected": -0.8486427664756775,
"loss": 4.7944,
"step": 390
},
{
"beta_dpo/beta_used": 0.012308573350310326,
"beta_dpo/beta_used_raw": -0.06008676812052727,
"beta_dpo/gap_mean": 30.96744155883789,
"beta_dpo/gap_std": 51.099151611328125,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.818848167539267,
"grad_norm": 130.78782653808594,
"learning_rate": 4.904486005914027e-08,
"logits/chosen": -0.8092834949493408,
"logits/rejected": -0.7616171836853027,
"loss": 5.218,
"step": 391
},
{
"beta_dpo/beta_used": 0.03401728719472885,
"beta_dpo/beta_used_raw": 0.01575944572687149,
"beta_dpo/gap_mean": 35.89379119873047,
"beta_dpo/gap_std": 50.69645690917969,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.8209424083769633,
"grad_norm": 141.2838134765625,
"learning_rate": 4.796118758344353e-08,
"logits/chosen": -0.8125319480895996,
"logits/rejected": -0.7968068718910217,
"loss": 3.9905,
"step": 392
},
{
"beta_dpo/beta_used": 0.029492482542991638,
"beta_dpo/beta_used_raw": 0.006723019294440746,
"beta_dpo/gap_mean": 31.739521026611328,
"beta_dpo/gap_std": 51.30779266357422,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.8230366492146597,
"grad_norm": 219.48927307128906,
"learning_rate": 4.688834983610082e-08,
"logits/chosen": -0.7747592926025391,
"logits/rejected": -0.7800062894821167,
"loss": 4.3227,
"step": 393
},
{
"beta_dpo/beta_used": 0.006166150793433189,
"beta_dpo/beta_used_raw": -0.024336861446499825,
"beta_dpo/gap_mean": 31.60442543029785,
"beta_dpo/gap_std": 52.29357147216797,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8251308900523561,
"grad_norm": 46.31927490234375,
"learning_rate": 4.582640435014459e-08,
"logits/chosen": -0.8091763257980347,
"logits/rejected": -0.8224099278450012,
"loss": 5.0968,
"step": 394
},
{
"beta_dpo/beta_used": 0.036968886852264404,
"beta_dpo/beta_used_raw": 0.023283787071704865,
"beta_dpo/gap_mean": 30.08101463317871,
"beta_dpo/gap_std": 49.931846618652344,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.8272251308900523,
"grad_norm": 206.76434326171875,
"learning_rate": 4.477540807448832e-08,
"logits/chosen": -0.8666899800300598,
"logits/rejected": -0.9089019894599915,
"loss": 3.6018,
"step": 395
},
{
"beta_dpo/beta_used": 0.02417484112083912,
"beta_dpo/beta_used_raw": -0.01262733619660139,
"beta_dpo/gap_mean": 32.86610412597656,
"beta_dpo/gap_std": 49.70528793334961,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.8293193717277487,
"grad_norm": 233.59030151367188,
"learning_rate": 4.373541737087263e-08,
"logits/chosen": -0.822211503982544,
"logits/rejected": -0.8186702728271484,
"loss": 4.8537,
"step": 396
},
{
"beta_dpo/beta_used": 0.02938215062022209,
"beta_dpo/beta_used_raw": -0.01723414473235607,
"beta_dpo/gap_mean": 31.259389877319336,
"beta_dpo/gap_std": 48.74763870239258,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.831413612565445,
"grad_norm": 204.6764373779297,
"learning_rate": 4.270648801084295e-08,
"logits/chosen": -0.9242237210273743,
"logits/rejected": -0.914775013923645,
"loss": 4.705,
"step": 397
},
{
"beta_dpo/beta_used": 0.028000906109809875,
"beta_dpo/beta_used_raw": 0.01324938703328371,
"beta_dpo/gap_mean": 28.033884048461914,
"beta_dpo/gap_std": 53.956783294677734,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.8335078534031414,
"grad_norm": 272.271240234375,
"learning_rate": 4.168867517275806e-08,
"logits/chosen": -0.7791767120361328,
"logits/rejected": -0.832636296749115,
"loss": 4.4102,
"step": 398
},
{
"beta_dpo/beta_used": 0.040391743183135986,
"beta_dpo/beta_used_raw": 0.029338005930185318,
"beta_dpo/gap_mean": 26.80057716369629,
"beta_dpo/gap_std": 53.54316711425781,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.8356020942408376,
"grad_norm": 146.1692352294922,
"learning_rate": 4.0682033438831584e-08,
"logits/chosen": -0.8662706613540649,
"logits/rejected": -0.8145262002944946,
"loss": 3.8751,
"step": 399
},
{
"beta_dpo/beta_used": 0.0640939474105835,
"beta_dpo/beta_used_raw": 0.046954307705163956,
"beta_dpo/gap_mean": 27.499759674072266,
"beta_dpo/gap_std": 49.925628662109375,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.837696335078534,
"grad_norm": 327.8544921875,
"learning_rate": 3.968661679220467e-08,
"logits/chosen": -0.9200219511985779,
"logits/rejected": -0.9016293883323669,
"loss": 3.5281,
"step": 400
},
{
"epoch": 0.837696335078534,
"eval_beta_dpo/beta_used": 0.043008919805288315,
"eval_beta_dpo/beta_used_raw": 0.020729079842567444,
"eval_beta_dpo/gap_mean": 28.022653579711914,
"eval_beta_dpo/gap_std": 50.3673095703125,
"eval_beta_dpo/mask_keep_frac": 1.0,
"eval_logits/chosen": -0.8453658223152161,
"eval_logits/rejected": -0.8282322883605957,
"eval_loss": 0.6356604099273682,
"eval_runtime": 81.5313,
"eval_samples_per_second": 24.53,
"eval_steps_per_second": 1.533,
"step": 400
},
{
"beta_dpo/beta_used": 0.025461485609412193,
"beta_dpo/beta_used_raw": -0.004204742610454559,
"beta_dpo/gap_mean": 29.487524032592773,
"beta_dpo/gap_std": 50.156776428222656,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8397905759162304,
"grad_norm": 56.73976135253906,
"learning_rate": 3.8702478614051345e-08,
"logits/chosen": -0.7528951168060303,
"logits/rejected": -0.719306230545044,
"loss": 4.5596,
"step": 401
},
{
"beta_dpo/beta_used": 0.02623908221721649,
"beta_dpo/beta_used_raw": 0.017582345753908157,
"beta_dpo/gap_mean": 31.024076461791992,
"beta_dpo/gap_std": 52.295101165771484,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.8418848167539267,
"grad_norm": 356.29595947265625,
"learning_rate": 3.772967168071517e-08,
"logits/chosen": -0.8574113845825195,
"logits/rejected": -0.8025684356689453,
"loss": 4.5168,
"step": 402
},
{
"beta_dpo/beta_used": 0.042898863554000854,
"beta_dpo/beta_used_raw": 0.03765055909752846,
"beta_dpo/gap_mean": 34.200382232666016,
"beta_dpo/gap_std": 48.579872131347656,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.8439790575916231,
"grad_norm": 138.32400512695312,
"learning_rate": 3.676824816087978e-08,
"logits/chosen": -0.7763471603393555,
"logits/rejected": -0.7996782064437866,
"loss": 3.3116,
"step": 403
},
{
"beta_dpo/beta_used": 0.0233171284198761,
"beta_dpo/beta_used_raw": 0.0011256425641477108,
"beta_dpo/gap_mean": 35.22697448730469,
"beta_dpo/gap_std": 51.0013427734375,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.8460732984293193,
"grad_norm": 113.2969741821289,
"learning_rate": 3.581825961277074e-08,
"logits/chosen": -0.8510360717773438,
"logits/rejected": -0.8215500116348267,
"loss": 4.0111,
"step": 404
},
{
"beta_dpo/beta_used": 0.012892654165625572,
"beta_dpo/beta_used_raw": 0.0009398059919476509,
"beta_dpo/gap_mean": 34.50669860839844,
"beta_dpo/gap_std": 52.5545654296875,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.8481675392670157,
"grad_norm": 106.28509521484375,
"learning_rate": 3.487975698139084e-08,
"logits/chosen": -0.6867244839668274,
"logits/rejected": -0.677395761013031,
"loss": 4.3154,
"step": 405
},
{
"beta_dpo/beta_used": 0.023643236607313156,
"beta_dpo/beta_used_raw": -0.009170491248369217,
"beta_dpo/gap_mean": 29.635848999023438,
"beta_dpo/gap_std": 49.92266082763672,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.8502617801047121,
"grad_norm": 134.22201538085938,
"learning_rate": 3.3952790595787986e-08,
"logits/chosen": -0.8843967318534851,
"logits/rejected": -0.8679218888282776,
"loss": 4.8186,
"step": 406
},
{
"beta_dpo/beta_used": 0.028158362954854965,
"beta_dpo/beta_used_raw": 0.01929015852510929,
"beta_dpo/gap_mean": 29.419769287109375,
"beta_dpo/gap_std": 50.9369010925293,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.8523560209424084,
"grad_norm": 164.86927795410156,
"learning_rate": 3.303741016635614e-08,
"logits/chosen": -0.8338272571563721,
"logits/rejected": -0.8456038236618042,
"loss": 3.7483,
"step": 407
},
{
"beta_dpo/beta_used": 0.04387975111603737,
"beta_dpo/beta_used_raw": 0.024929020553827286,
"beta_dpo/gap_mean": 29.930322647094727,
"beta_dpo/gap_std": 50.4144287109375,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.8544502617801047,
"grad_norm": 474.077880859375,
"learning_rate": 3.2133664782169944e-08,
"logits/chosen": -0.8612761497497559,
"logits/rejected": -0.7689127326011658,
"loss": 4.2207,
"step": 408
},
{
"beta_dpo/beta_used": 0.020363079383969307,
"beta_dpo/beta_used_raw": 0.00438337679952383,
"beta_dpo/gap_mean": 32.19656753540039,
"beta_dpo/gap_std": 51.08381652832031,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.856544502617801,
"grad_norm": 94.34365844726562,
"learning_rate": 3.12416029083514e-08,
"logits/chosen": -0.7993679642677307,
"logits/rejected": -0.8109673261642456,
"loss": 4.2298,
"step": 409
},
{
"beta_dpo/beta_used": 0.012268463149666786,
"beta_dpo/beta_used_raw": -0.01718856208026409,
"beta_dpo/gap_mean": 28.865314483642578,
"beta_dpo/gap_std": 51.235557556152344,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8586387434554974,
"grad_norm": 82.62427520751953,
"learning_rate": 3.036127238347164e-08,
"logits/chosen": -0.8782304525375366,
"logits/rejected": -0.8800264596939087,
"loss": 4.8676,
"step": 410
},
{
"beta_dpo/beta_used": 0.03287056088447571,
"beta_dpo/beta_used_raw": 0.008828896097838879,
"beta_dpo/gap_mean": 31.09811019897461,
"beta_dpo/gap_std": 50.671939849853516,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.8607329842931937,
"grad_norm": 111.97496032714844,
"learning_rate": 2.9492720416985e-08,
"logits/chosen": -0.7707123756408691,
"logits/rejected": -0.7606396675109863,
"loss": 3.8463,
"step": 411
},
{
"beta_dpo/beta_used": 0.02247859537601471,
"beta_dpo/beta_used_raw": -0.003850158303976059,
"beta_dpo/gap_mean": 30.756423950195312,
"beta_dpo/gap_std": 50.8740119934082,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.86282722513089,
"grad_norm": 121.05892181396484,
"learning_rate": 2.863599358669755e-08,
"logits/chosen": -0.779039740562439,
"logits/rejected": -0.7793789505958557,
"loss": 4.4319,
"step": 412
},
{
"beta_dpo/beta_used": 0.03222234919667244,
"beta_dpo/beta_used_raw": 0.015333538874983788,
"beta_dpo/gap_mean": 28.6728458404541,
"beta_dpo/gap_std": 49.384368896484375,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.8649214659685864,
"grad_norm": 162.366455078125,
"learning_rate": 2.7791137836269158e-08,
"logits/chosen": -0.9385237097740173,
"logits/rejected": -0.9121523499488831,
"loss": 4.167,
"step": 413
},
{
"beta_dpo/beta_used": 0.017970332875847816,
"beta_dpo/beta_used_raw": -0.019473586231470108,
"beta_dpo/gap_mean": 29.9796085357666,
"beta_dpo/gap_std": 50.113468170166016,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8670157068062827,
"grad_norm": 80.46460723876953,
"learning_rate": 2.6958198472749717e-08,
"logits/chosen": -0.9034287929534912,
"logits/rejected": -0.855298638343811,
"loss": 4.5007,
"step": 414
},
{
"beta_dpo/beta_used": 0.07080215215682983,
"beta_dpo/beta_used_raw": 0.06397496908903122,
"beta_dpo/gap_mean": 31.179443359375,
"beta_dpo/gap_std": 48.66398239135742,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.8691099476439791,
"grad_norm": 204.76092529296875,
"learning_rate": 2.613722016414943e-08,
"logits/chosen": -0.8139724731445312,
"logits/rejected": -0.7881863117218018,
"loss": 2.9196,
"step": 415
},
{
"beta_dpo/beta_used": 0.03896103799343109,
"beta_dpo/beta_used_raw": 0.02134716510772705,
"beta_dpo/gap_mean": 34.836082458496094,
"beta_dpo/gap_std": 50.03068923950195,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.8712041884816754,
"grad_norm": 203.65744018554688,
"learning_rate": 2.5328246937043525e-08,
"logits/chosen": -0.9353795647621155,
"logits/rejected": -0.8975551128387451,
"loss": 3.9669,
"step": 416
},
{
"beta_dpo/beta_used": 0.03526991605758667,
"beta_dpo/beta_used_raw": -0.01772877387702465,
"beta_dpo/gap_mean": 32.672035217285156,
"beta_dpo/gap_std": 49.94234085083008,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.8732984293193717,
"grad_norm": 189.84410095214844,
"learning_rate": 2.4531322174210973e-08,
"logits/chosen": -0.756232738494873,
"logits/rejected": -0.8090646266937256,
"loss": 4.3281,
"step": 417
},
{
"beta_dpo/beta_used": 0.045910660177469254,
"beta_dpo/beta_used_raw": 0.014944255352020264,
"beta_dpo/gap_mean": 30.950489044189453,
"beta_dpo/gap_std": 51.23707580566406,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.875392670157068,
"grad_norm": 164.94105529785156,
"learning_rate": 2.3746488612308295e-08,
"logits/chosen": -0.8820661306381226,
"logits/rejected": -0.8479762077331543,
"loss": 3.3815,
"step": 418
},
{
"beta_dpo/beta_used": 0.05247935280203819,
"beta_dpo/beta_used_raw": 0.038780488073825836,
"beta_dpo/gap_mean": 31.49786376953125,
"beta_dpo/gap_std": 52.62058639526367,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.8774869109947644,
"grad_norm": 147.83372497558594,
"learning_rate": 2.297378833957761e-08,
"logits/chosen": -0.7841131091117859,
"logits/rejected": -0.7802114486694336,
"loss": 3.6582,
"step": 419
},
{
"beta_dpo/beta_used": 0.057858943939208984,
"beta_dpo/beta_used_raw": 0.04030502960085869,
"beta_dpo/gap_mean": 34.97361755371094,
"beta_dpo/gap_std": 55.68037033081055,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.8795811518324608,
"grad_norm": 268.9122619628906,
"learning_rate": 2.2213262793589482e-08,
"logits/chosen": -0.7773014903068542,
"logits/rejected": -0.7394383549690247,
"loss": 3.8373,
"step": 420
},
{
"beta_dpo/beta_used": 0.05588060989975929,
"beta_dpo/beta_used_raw": 0.027968432754278183,
"beta_dpo/gap_mean": 35.70938491821289,
"beta_dpo/gap_std": 53.80148696899414,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.881675392670157,
"grad_norm": 263.6426086425781,
"learning_rate": 2.1464952759020856e-08,
"logits/chosen": -0.9263103008270264,
"logits/rejected": -0.9025065898895264,
"loss": 3.7191,
"step": 421
},
{
"beta_dpo/beta_used": 0.013131741434335709,
"beta_dpo/beta_used_raw": -0.032616935670375824,
"beta_dpo/gap_mean": 32.63302230834961,
"beta_dpo/gap_std": 54.2334098815918,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.8837696335078534,
"grad_norm": 111.65141296386719,
"learning_rate": 2.07288983654679e-08,
"logits/chosen": -0.7539777755737305,
"logits/rejected": -0.7705018520355225,
"loss": 4.7393,
"step": 422
},
{
"beta_dpo/beta_used": 0.04880265146493912,
"beta_dpo/beta_used_raw": 0.021930556744337082,
"beta_dpo/gap_mean": 33.534523010253906,
"beta_dpo/gap_std": 52.5704460144043,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.8858638743455497,
"grad_norm": 376.0655822753906,
"learning_rate": 2.0005139085293942e-08,
"logits/chosen": -0.8840563893318176,
"logits/rejected": -0.8793922662734985,
"loss": 4.27,
"step": 423
},
{
"beta_dpo/beta_used": 0.02224777452647686,
"beta_dpo/beta_used_raw": 0.0034151384606957436,
"beta_dpo/gap_mean": 34.246089935302734,
"beta_dpo/gap_std": 52.21100616455078,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.8879581151832461,
"grad_norm": 141.1042022705078,
"learning_rate": 1.9293713731512673e-08,
"logits/chosen": -0.8222829103469849,
"logits/rejected": -0.8296815156936646,
"loss": 3.9918,
"step": 424
},
{
"beta_dpo/beta_used": 0.03644920140504837,
"beta_dpo/beta_used_raw": -0.014164052903652191,
"beta_dpo/gap_mean": 32.60451889038086,
"beta_dpo/gap_std": 50.56034851074219,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.8900523560209425,
"grad_norm": 150.81195068359375,
"learning_rate": 1.8594660455706763e-08,
"logits/chosen": -0.8337830901145935,
"logits/rejected": -0.8451286554336548,
"loss": 3.8765,
"step": 425
},
{
"beta_dpo/beta_used": 0.03593583405017853,
"beta_dpo/beta_used_raw": 0.02168644592165947,
"beta_dpo/gap_mean": 29.087791442871094,
"beta_dpo/gap_std": 49.60078048706055,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.8921465968586387,
"grad_norm": 134.13816833496094,
"learning_rate": 1.7908016745981856e-08,
"logits/chosen": -0.7210733294487,
"logits/rejected": -0.7480963468551636,
"loss": 4.2344,
"step": 426
},
{
"beta_dpo/beta_used": 0.0667373538017273,
"beta_dpo/beta_used_raw": 0.033293262124061584,
"beta_dpo/gap_mean": 31.312068939208984,
"beta_dpo/gap_std": 51.69874572753906,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.8942408376963351,
"grad_norm": 339.73394775390625,
"learning_rate": 1.7233819424956247e-08,
"logits/chosen": -0.8241250514984131,
"logits/rejected": -0.7590780854225159,
"loss": 4.1269,
"step": 427
},
{
"beta_dpo/beta_used": 0.03810206055641174,
"beta_dpo/beta_used_raw": 0.005916805937886238,
"beta_dpo/gap_mean": 38.2218017578125,
"beta_dpo/gap_std": 51.52684020996094,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.8963350785340314,
"grad_norm": 648.3778076171875,
"learning_rate": 1.6572104647786245e-08,
"logits/chosen": -0.7526270747184753,
"logits/rejected": -0.8342408537864685,
"loss": 4.9188,
"step": 428
},
{
"beta_dpo/beta_used": 0.02025276981294155,
"beta_dpo/beta_used_raw": -0.01976284198462963,
"beta_dpo/gap_mean": 36.52273178100586,
"beta_dpo/gap_std": 54.76076126098633,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.8984293193717278,
"grad_norm": 89.35426330566406,
"learning_rate": 1.5922907900227017e-08,
"logits/chosen": -0.7855672240257263,
"logits/rejected": -0.769487202167511,
"loss": 4.5233,
"step": 429
},
{
"beta_dpo/beta_used": 0.010954808443784714,
"beta_dpo/beta_used_raw": -0.015571440570056438,
"beta_dpo/gap_mean": 34.89046859741211,
"beta_dpo/gap_std": 51.79176712036133,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.900523560209424,
"grad_norm": 66.42906188964844,
"learning_rate": 1.5286263996730026e-08,
"logits/chosen": -0.9020602703094482,
"logits/rejected": -0.799609899520874,
"loss": 4.4628,
"step": 430
},
{
"beta_dpo/beta_used": 0.004862995818257332,
"beta_dpo/beta_used_raw": -0.036979954689741135,
"beta_dpo/gap_mean": 29.470109939575195,
"beta_dpo/gap_std": 50.87688446044922,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.9026178010471204,
"grad_norm": 47.393733978271484,
"learning_rate": 1.4662207078575684e-08,
"logits/chosen": -0.8049024939537048,
"logits/rejected": -0.772520899772644,
"loss": 5.1653,
"step": 431
},
{
"beta_dpo/beta_used": 0.029558269307017326,
"beta_dpo/beta_used_raw": 0.020929085090756416,
"beta_dpo/gap_mean": 32.15821838378906,
"beta_dpo/gap_std": 52.068603515625,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.9047120418848168,
"grad_norm": 300.16351318359375,
"learning_rate": 1.40507706120426e-08,
"logits/chosen": -0.8398734331130981,
"logits/rejected": -0.8560636639595032,
"loss": 4.2815,
"step": 432
},
{
"beta_dpo/beta_used": 0.024735111743211746,
"beta_dpo/beta_used_raw": 0.006575713399797678,
"beta_dpo/gap_mean": 31.19025230407715,
"beta_dpo/gap_std": 52.5582389831543,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9068062827225131,
"grad_norm": 110.4648666381836,
"learning_rate": 1.345198738661285e-08,
"logits/chosen": -0.840786337852478,
"logits/rejected": -0.8298450708389282,
"loss": 4.0054,
"step": 433
},
{
"beta_dpo/beta_used": 0.017018688842654228,
"beta_dpo/beta_used_raw": -0.01911812275648117,
"beta_dpo/gap_mean": 28.489105224609375,
"beta_dpo/gap_std": 50.24304962158203,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.9089005235602095,
"grad_norm": 142.97439575195312,
"learning_rate": 1.2865889513213628e-08,
"logits/chosen": -0.8282724618911743,
"logits/rejected": -0.8246201276779175,
"loss": 4.5609,
"step": 434
},
{
"beta_dpo/beta_used": 0.02615453489124775,
"beta_dpo/beta_used_raw": 0.0022685863077640533,
"beta_dpo/gap_mean": 30.370590209960938,
"beta_dpo/gap_std": 50.549224853515625,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9109947643979057,
"grad_norm": 173.58056640625,
"learning_rate": 1.2292508422495157e-08,
"logits/chosen": -0.8596353530883789,
"logits/rejected": -0.8763912916183472,
"loss": 4.6802,
"step": 435
},
{
"beta_dpo/beta_used": 0.021672368049621582,
"beta_dpo/beta_used_raw": -0.016893737018108368,
"beta_dpo/gap_mean": 29.583505630493164,
"beta_dpo/gap_std": 53.356544494628906,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.9130890052356021,
"grad_norm": 171.13877868652344,
"learning_rate": 1.1731874863145142e-08,
"logits/chosen": -0.7855619192123413,
"logits/rejected": -0.8202630877494812,
"loss": 4.4808,
"step": 436
},
{
"beta_dpo/beta_used": 0.024419579654932022,
"beta_dpo/beta_used_raw": 0.019063415005803108,
"beta_dpo/gap_mean": 30.05594253540039,
"beta_dpo/gap_std": 54.0589485168457,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9151832460732985,
"grad_norm": 139.63742065429688,
"learning_rate": 1.118401890024001e-08,
"logits/chosen": -0.8779160976409912,
"logits/rejected": -0.850941002368927,
"loss": 4.1159,
"step": 437
},
{
"beta_dpo/beta_used": 0.012894796207547188,
"beta_dpo/beta_used_raw": -0.03390258550643921,
"beta_dpo/gap_mean": 26.959020614624023,
"beta_dpo/gap_std": 53.31471252441406,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.9172774869109948,
"grad_norm": 74.52314758300781,
"learning_rate": 1.06489699136324e-08,
"logits/chosen": -0.8029293417930603,
"logits/rejected": -0.807404100894928,
"loss": 4.9337,
"step": 438
},
{
"beta_dpo/beta_used": 0.041374292224645615,
"beta_dpo/beta_used_raw": 0.02538049779832363,
"beta_dpo/gap_mean": 26.866544723510742,
"beta_dpo/gap_std": 51.9473876953125,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.9193717277486911,
"grad_norm": 281.1230163574219,
"learning_rate": 1.0126756596375685e-08,
"logits/chosen": -0.8005992770195007,
"logits/rejected": -0.8386653065681458,
"loss": 4.2462,
"step": 439
},
{
"beta_dpo/beta_used": 0.020926889032125473,
"beta_dpo/beta_used_raw": -0.01957480050623417,
"beta_dpo/gap_mean": 25.91887092590332,
"beta_dpo/gap_std": 47.49887466430664,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.9214659685863874,
"grad_norm": 170.86585998535156,
"learning_rate": 9.617406953185136e-09,
"logits/chosen": -0.7887669801712036,
"logits/rejected": -0.786566972732544,
"loss": 4.7906,
"step": 440
},
{
"beta_dpo/beta_used": 0.040106188505887985,
"beta_dpo/beta_used_raw": 0.017551787197589874,
"beta_dpo/gap_mean": 27.56520652770996,
"beta_dpo/gap_std": 48.65106964111328,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.9235602094240838,
"grad_norm": 185.98721313476562,
"learning_rate": 9.12094829893642e-09,
"logits/chosen": -0.8935746550559998,
"logits/rejected": -0.8328600525856018,
"loss": 4.5269,
"step": 441
},
{
"beta_dpo/beta_used": 0.028513526543974876,
"beta_dpo/beta_used_raw": -0.0015279550570994616,
"beta_dpo/gap_mean": 30.204608917236328,
"beta_dpo/gap_std": 50.07164764404297,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.9256544502617801,
"grad_norm": 216.73915100097656,
"learning_rate": 8.637407257200496e-09,
"logits/chosen": -0.8646829724311829,
"logits/rejected": -0.8786430954933167,
"loss": 4.3975,
"step": 442
},
{
"beta_dpo/beta_used": 0.04191158711910248,
"beta_dpo/beta_used_raw": 0.0261215940117836,
"beta_dpo/gap_mean": 28.124242782592773,
"beta_dpo/gap_std": 48.77510070800781,
"beta_dpo/mask_keep_frac": 0.75,
"epoch": 0.9277486910994764,
"grad_norm": 246.431640625,
"learning_rate": 8.166809758815895e-09,
"logits/chosen": -0.7345380783081055,
"logits/rejected": -0.8072965145111084,
"loss": 4.057,
"step": 443
},
{
"beta_dpo/beta_used": 0.03024943172931671,
"beta_dpo/beta_used_raw": 0.010058403015136719,
"beta_dpo/gap_mean": 31.88334846496582,
"beta_dpo/gap_std": 50.78257369995117,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.9298429319371728,
"grad_norm": 217.31375122070312,
"learning_rate": 7.709181040498253e-09,
"logits/chosen": -0.7552200555801392,
"logits/rejected": -0.730567991733551,
"loss": 4.2506,
"step": 444
},
{
"beta_dpo/beta_used": 0.04256928712129593,
"beta_dpo/beta_used_raw": -0.013574687764048576,
"beta_dpo/gap_mean": 29.146665573120117,
"beta_dpo/gap_std": 53.06696701049805,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.9319371727748691,
"grad_norm": 349.0355529785156,
"learning_rate": 7.2645456434869965e-09,
"logits/chosen": -0.8601400256156921,
"logits/rejected": -0.8750321865081787,
"loss": 4.3467,
"step": 445
},
{
"beta_dpo/beta_used": 0.02674350142478943,
"beta_dpo/beta_used_raw": 0.010028916411101818,
"beta_dpo/gap_mean": 32.160865783691406,
"beta_dpo/gap_std": 53.44306564331055,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9340314136125655,
"grad_norm": 164.8477325439453,
"learning_rate": 6.832927412229017e-09,
"logits/chosen": -0.7708781361579895,
"logits/rejected": -0.7476394772529602,
"loss": 4.01,
"step": 446
},
{
"beta_dpo/beta_used": 0.027856381610035896,
"beta_dpo/beta_used_raw": 0.017823830246925354,
"beta_dpo/gap_mean": 33.03885269165039,
"beta_dpo/gap_std": 49.568260192871094,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.9361256544502617,
"grad_norm": 131.04490661621094,
"learning_rate": 6.414349493100129e-09,
"logits/chosen": -0.8864074349403381,
"logits/rejected": -0.8868736624717712,
"loss": 3.8027,
"step": 447
},
{
"beta_dpo/beta_used": 0.034370094537734985,
"beta_dpo/beta_used_raw": 0.008034870028495789,
"beta_dpo/gap_mean": 32.461265563964844,
"beta_dpo/gap_std": 48.22648239135742,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.9382198952879581,
"grad_norm": 350.2030334472656,
"learning_rate": 6.0088343331638756e-09,
"logits/chosen": -0.8367605209350586,
"logits/rejected": -0.8392966985702515,
"loss": 3.9396,
"step": 448
},
{
"beta_dpo/beta_used": 0.04194016754627228,
"beta_dpo/beta_used_raw": 0.03314446657896042,
"beta_dpo/gap_mean": 32.78199005126953,
"beta_dpo/gap_std": 49.67825698852539,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9403141361256544,
"grad_norm": 367.3363037109375,
"learning_rate": 5.616403678967624e-09,
"logits/chosen": -0.920991063117981,
"logits/rejected": -0.8886154294013977,
"loss": 3.4965,
"step": 449
},
{
"beta_dpo/beta_used": 0.005077804904431105,
"beta_dpo/beta_used_raw": -0.03199779987335205,
"beta_dpo/gap_mean": 33.04655456542969,
"beta_dpo/gap_std": 46.908870697021484,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.9424083769633508,
"grad_norm": 56.5856819152832,
"learning_rate": 5.2370785753763356e-09,
"logits/chosen": -0.8422713875770569,
"logits/rejected": -0.8291035890579224,
"loss": 5.1412,
"step": 450
},
{
"beta_dpo/beta_used": 0.023415734991431236,
"beta_dpo/beta_used_raw": -0.007750632241368294,
"beta_dpo/gap_mean": 31.3007869720459,
"beta_dpo/gap_std": 46.751678466796875,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.9445026178010472,
"grad_norm": 115.54217529296875,
"learning_rate": 4.8708793644441086e-09,
"logits/chosen": -0.7868208885192871,
"logits/rejected": -0.7739187479019165,
"loss": 4.6009,
"step": 451
},
{
"beta_dpo/beta_used": 0.02031254954636097,
"beta_dpo/beta_used_raw": -0.02357018180191517,
"beta_dpo/gap_mean": 32.508583068847656,
"beta_dpo/gap_std": 50.76416778564453,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.9465968586387434,
"grad_norm": 151.5921173095703,
"learning_rate": 4.517825684323323e-09,
"logits/chosen": -0.7874301075935364,
"logits/rejected": -0.7545861005783081,
"loss": 4.589,
"step": 452
},
{
"beta_dpo/beta_used": 0.015478750690817833,
"beta_dpo/beta_used_raw": -0.0005428898148238659,
"beta_dpo/gap_mean": 32.38516616821289,
"beta_dpo/gap_std": 49.00554275512695,
"beta_dpo/mask_keep_frac": 0.6875,
"epoch": 0.9486910994764398,
"grad_norm": 169.36245727539062,
"learning_rate": 4.1779364682113794e-09,
"logits/chosen": -0.8509343266487122,
"logits/rejected": -0.8427782654762268,
"loss": 4.7353,
"step": 453
},
{
"beta_dpo/beta_used": 0.012610476464033127,
"beta_dpo/beta_used_raw": -0.010584852658212185,
"beta_dpo/gap_mean": 32.17422103881836,
"beta_dpo/gap_std": 49.280479431152344,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.9507853403141361,
"grad_norm": 73.80915832519531,
"learning_rate": 3.851229943335393e-09,
"logits/chosen": -0.9344862699508667,
"logits/rejected": -0.9276149272918701,
"loss": 4.5409,
"step": 454
},
{
"beta_dpo/beta_used": 0.013800965622067451,
"beta_dpo/beta_used_raw": -0.027739258483052254,
"beta_dpo/gap_mean": 28.679340362548828,
"beta_dpo/gap_std": 50.449771881103516,
"beta_dpo/mask_keep_frac": 0.59375,
"epoch": 0.9528795811518325,
"grad_norm": 98.63684844970703,
"learning_rate": 3.5377236299748147e-09,
"logits/chosen": -0.8242367506027222,
"logits/rejected": -0.8344764113426208,
"loss": 4.7569,
"step": 455
},
{
"beta_dpo/beta_used": 0.06382787972688675,
"beta_dpo/beta_used_raw": 0.040650881826877594,
"beta_dpo/gap_mean": 29.863061904907227,
"beta_dpo/gap_std": 55.417232513427734,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.9549738219895288,
"grad_norm": 355.5675964355469,
"learning_rate": 3.2374343405217884e-09,
"logits/chosen": -0.884809136390686,
"logits/rejected": -0.8778659701347351,
"loss": 3.9301,
"step": 456
},
{
"beta_dpo/beta_used": 0.06500288099050522,
"beta_dpo/beta_used_raw": 0.04227167367935181,
"beta_dpo/gap_mean": 32.736595153808594,
"beta_dpo/gap_std": 59.960296630859375,
"beta_dpo/mask_keep_frac": 0.625,
"epoch": 0.9570680628272251,
"grad_norm": 157.1930389404297,
"learning_rate": 2.9503781785795713e-09,
"logits/chosen": -0.8487591743469238,
"logits/rejected": -0.8349891901016235,
"loss": 2.6759,
"step": 457
},
{
"beta_dpo/beta_used": 0.02374722994863987,
"beta_dpo/beta_used_raw": -0.04022517800331116,
"beta_dpo/gap_mean": 31.09552764892578,
"beta_dpo/gap_std": 56.911495208740234,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.9591623036649215,
"grad_norm": 411.55517578125,
"learning_rate": 2.6765705380989432e-09,
"logits/chosen": -0.8151242136955261,
"logits/rejected": -0.8581142425537109,
"loss": 4.7969,
"step": 458
},
{
"beta_dpo/beta_used": 0.05280781164765358,
"beta_dpo/beta_used_raw": 0.024646718055009842,
"beta_dpo/gap_mean": 29.90413475036621,
"beta_dpo/gap_std": 53.489784240722656,
"beta_dpo/mask_keep_frac": 0.9375,
"epoch": 0.9612565445026178,
"grad_norm": 256.2548522949219,
"learning_rate": 2.416026102552732e-09,
"logits/chosen": -0.8302851319313049,
"logits/rejected": -0.8471137285232544,
"loss": 3.843,
"step": 459
},
{
"beta_dpo/beta_used": 0.022719116881489754,
"beta_dpo/beta_used_raw": 0.002210780745372176,
"beta_dpo/gap_mean": 29.041353225708008,
"beta_dpo/gap_std": 52.842437744140625,
"beta_dpo/mask_keep_frac": 0.875,
"epoch": 0.9633507853403142,
"grad_norm": 148.7731475830078,
"learning_rate": 2.168758844148272e-09,
"logits/chosen": -0.8966348171234131,
"logits/rejected": -0.8892766833305359,
"loss": 4.8806,
"step": 460
},
{
"beta_dpo/beta_used": 0.03528280928730965,
"beta_dpo/beta_used_raw": 0.01805609092116356,
"beta_dpo/gap_mean": 29.886600494384766,
"beta_dpo/gap_std": 51.72296905517578,
"beta_dpo/mask_keep_frac": 0.65625,
"epoch": 0.9654450261780104,
"grad_norm": 198.60765075683594,
"learning_rate": 1.9347820230782295e-09,
"logits/chosen": -0.791793942451477,
"logits/rejected": -0.8195943236351013,
"loss": 4.1942,
"step": 461
},
{
"beta_dpo/beta_used": 0.052680741995573044,
"beta_dpo/beta_used_raw": 0.035064440220594406,
"beta_dpo/gap_mean": 32.32624816894531,
"beta_dpo/gap_std": 54.05101013183594,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9675392670157068,
"grad_norm": 282.3392639160156,
"learning_rate": 1.7141081868094209e-09,
"logits/chosen": -0.8903741240501404,
"logits/rejected": -0.8310127258300781,
"loss": 3.7547,
"step": 462
},
{
"beta_dpo/beta_used": 0.016039669513702393,
"beta_dpo/beta_used_raw": -0.020198073238134384,
"beta_dpo/gap_mean": 32.30640411376953,
"beta_dpo/gap_std": 52.92686080932617,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.9696335078534032,
"grad_norm": 90.29280090332031,
"learning_rate": 1.5067491694100153e-09,
"logits/chosen": -0.8517540693283081,
"logits/rejected": -0.853223443031311,
"loss": 4.4556,
"step": 463
},
{
"beta_dpo/beta_used": 0.02930094487965107,
"beta_dpo/beta_used_raw": 0.0013244133442640305,
"beta_dpo/gap_mean": 30.446823120117188,
"beta_dpo/gap_std": 51.893402099609375,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.9717277486910995,
"grad_norm": 141.81759643554688,
"learning_rate": 1.3127160909147672e-09,
"logits/chosen": -0.8333346843719482,
"logits/rejected": -0.8381949663162231,
"loss": 4.5807,
"step": 464
},
{
"beta_dpo/beta_used": 0.045830510556697845,
"beta_dpo/beta_used_raw": 0.018346037715673447,
"beta_dpo/gap_mean": 31.705657958984375,
"beta_dpo/gap_std": 50.60383987426758,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.9738219895287958,
"grad_norm": 118.63272094726562,
"learning_rate": 1.1320193567288527e-09,
"logits/chosen": -0.7933779358863831,
"logits/rejected": -0.7936585545539856,
"loss": 3.4035,
"step": 465
},
{
"beta_dpo/beta_used": 0.04178696125745773,
"beta_dpo/beta_used_raw": 0.0331585593521595,
"beta_dpo/gap_mean": 36.18540573120117,
"beta_dpo/gap_std": 50.454200744628906,
"beta_dpo/mask_keep_frac": 0.90625,
"epoch": 0.9759162303664921,
"grad_norm": 175.4940643310547,
"learning_rate": 9.64668657069706e-10,
"logits/chosen": -0.8239483833312988,
"logits/rejected": -0.7942164540290833,
"loss": 3.9939,
"step": 466
},
{
"beta_dpo/beta_used": 0.030592869967222214,
"beta_dpo/beta_used_raw": -0.0003968037199229002,
"beta_dpo/gap_mean": 32.80325698852539,
"beta_dpo/gap_std": 50.57613754272461,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.9780104712041885,
"grad_norm": 137.76971435546875,
"learning_rate": 8.106729664475176e-10,
"logits/chosen": -0.6968907117843628,
"logits/rejected": -0.6687761545181274,
"loss": 4.3001,
"step": 467
},
{
"beta_dpo/beta_used": 0.01786745898425579,
"beta_dpo/beta_used_raw": -0.03523392230272293,
"beta_dpo/gap_mean": 28.95020294189453,
"beta_dpo/gap_std": 52.5392951965332,
"beta_dpo/mask_keep_frac": 0.96875,
"epoch": 0.9801047120418848,
"grad_norm": 148.3948211669922,
"learning_rate": 6.700405431837585e-10,
"logits/chosen": -0.849189043045044,
"logits/rejected": -0.8099946975708008,
"loss": 4.8179,
"step": 468
},
{
"beta_dpo/beta_used": 0.05720680207014084,
"beta_dpo/beta_used_raw": 0.04207749292254448,
"beta_dpo/gap_mean": 31.435684204101562,
"beta_dpo/gap_std": 53.248329162597656,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.9821989528795811,
"grad_norm": 237.18161010742188,
"learning_rate": 5.427789289685347e-10,
"logits/chosen": -0.8365699052810669,
"logits/rejected": -0.7970238327980042,
"loss": 4.0884,
"step": 469
},
{
"beta_dpo/beta_used": 0.04907160997390747,
"beta_dpo/beta_used_raw": 0.026105834171175957,
"beta_dpo/gap_mean": 33.24174118041992,
"beta_dpo/gap_std": 52.36241912841797,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.9842931937172775,
"grad_norm": 157.68939208984375,
"learning_rate": 4.288949484559934e-10,
"logits/chosen": -0.7854397296905518,
"logits/rejected": -0.8018498420715332,
"loss": 3.7433,
"step": 470
},
{
"beta_dpo/beta_used": 0.0241762176156044,
"beta_dpo/beta_used_raw": -0.003967747092247009,
"beta_dpo/gap_mean": 34.16176223754883,
"beta_dpo/gap_std": 51.57313919067383,
"beta_dpo/mask_keep_frac": 0.71875,
"epoch": 0.9863874345549738,
"grad_norm": 123.31388854980469,
"learning_rate": 3.2839470889836627e-10,
"logits/chosen": -0.9119861125946045,
"logits/rejected": -0.8991633057594299,
"loss": 3.9463,
"step": 471
},
{
"beta_dpo/beta_used": 0.023680521175265312,
"beta_dpo/beta_used_raw": 0.00017686188220977783,
"beta_dpo/gap_mean": 34.94512176513672,
"beta_dpo/gap_std": 53.29269027709961,
"beta_dpo/mask_keep_frac": 0.84375,
"epoch": 0.9884816753926702,
"grad_norm": 265.7321472167969,
"learning_rate": 2.412835998185092e-10,
"logits/chosen": -0.8801113367080688,
"logits/rejected": -0.8942077159881592,
"loss": 4.6235,
"step": 472
},
{
"beta_dpo/beta_used": 0.030485741794109344,
"beta_dpo/beta_used_raw": 0.020329464226961136,
"beta_dpo/gap_mean": 35.68143844604492,
"beta_dpo/gap_std": 51.89659118652344,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.9905759162303664,
"grad_norm": 144.84486389160156,
"learning_rate": 1.6756629272085544e-10,
"logits/chosen": -0.8776077628135681,
"logits/rejected": -0.8777634501457214,
"loss": 3.8487,
"step": 473
},
{
"beta_dpo/beta_used": 0.039661701768636703,
"beta_dpo/beta_used_raw": -0.012342464178800583,
"beta_dpo/gap_mean": 36.153831481933594,
"beta_dpo/gap_std": 50.874114990234375,
"beta_dpo/mask_keep_frac": 0.78125,
"epoch": 0.9926701570680628,
"grad_norm": 272.6778259277344,
"learning_rate": 1.072467408408384e-10,
"logits/chosen": -0.8588881492614746,
"logits/rejected": -0.8895531892776489,
"loss": 4.2205,
"step": 474
},
{
"beta_dpo/beta_used": 0.011708030477166176,
"beta_dpo/beta_used_raw": -0.036866847425699234,
"beta_dpo/gap_mean": 30.167787551879883,
"beta_dpo/gap_std": 47.42060089111328,
"beta_dpo/mask_keep_frac": 0.375,
"epoch": 0.9947643979057592,
"grad_norm": 72.4432601928711,
"learning_rate": 6.032817893297793e-11,
"logits/chosen": -0.7738948464393616,
"logits/rejected": -0.8091400265693665,
"loss": 4.5426,
"step": 475
},
{
"beta_dpo/beta_used": 0.023084495216608047,
"beta_dpo/beta_used_raw": -0.026419004425406456,
"beta_dpo/gap_mean": 30.15393829345703,
"beta_dpo/gap_std": 47.20201110839844,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.9968586387434555,
"grad_norm": 92.88987731933594,
"learning_rate": 2.6813123097352287e-11,
"logits/chosen": -0.8247819542884827,
"logits/rejected": -0.8255200982093811,
"loss": 4.5438,
"step": 476
},
{
"beta_dpo/beta_used": 0.032955169677734375,
"beta_dpo/beta_used_raw": -0.01132938638329506,
"beta_dpo/gap_mean": 30.793474197387695,
"beta_dpo/gap_std": 53.303714752197266,
"beta_dpo/mask_keep_frac": 0.8125,
"epoch": 0.9989528795811519,
"grad_norm": 454.408203125,
"learning_rate": 6.7033706447061635e-12,
"logits/chosen": -0.7610109448432922,
"logits/rejected": -0.7843220233917236,
"loss": 4.6407,
"step": 477
},
{
"epoch": 0.9989528795811519,
"step": 477,
"total_flos": 0.0,
"train_loss": 4.632088508745909,
"train_runtime": 6811.5994,
"train_samples_per_second": 8.975,
"train_steps_per_second": 0.07
}
],
"logging_steps": 1,
"max_steps": 477,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}