Model: jackf857/llama-3-8b-base-slic-hf-ultrafeedback-4xh200-batch-128-20260428-054623 Source: Original Platform
896 lines
31 KiB
JSON
896 lines
31 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.9989528795811519,
|
|
"eval_steps": 200,
|
|
"global_step": 477,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0020942408376963353,
|
|
"grad_norm": 12262.6455078125,
|
|
"learning_rate": 0.0,
|
|
"logits/chosen": -0.6038292050361633,
|
|
"logits/rejected": -0.6174172163009644,
|
|
"logps/chosen": -275.28570556640625,
|
|
"logps/rejected": -222.9645233154297,
|
|
"loss": 3043.0391,
|
|
"rewards/accuracies": 0.46875,
|
|
"rewards/chosen": -275.28570556640625,
|
|
"rewards/margins": -52.3211669921875,
|
|
"rewards/rejected": -222.9645233154297,
|
|
"slic/ce_loss": 275.28570556640625,
|
|
"slic/rank_loss": 105.09413146972656,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.020942408376963352,
|
|
"grad_norm": 11722.5625,
|
|
"learning_rate": 9.375e-08,
|
|
"logits/chosen": -0.6442743539810181,
|
|
"logits/rejected": -0.6519261598587036,
|
|
"logps/chosen": -290.2613525390625,
|
|
"logps/rejected": -264.83160400390625,
|
|
"loss": 3090.2092,
|
|
"rewards/accuracies": 0.4626736044883728,
|
|
"rewards/chosen": -290.2613525390625,
|
|
"rewards/margins": -25.429737091064453,
|
|
"rewards/rejected": -264.83160400390625,
|
|
"slic/ce_loss": 290.2613525390625,
|
|
"slic/rank_loss": 96.01480102539062,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.041884816753926704,
|
|
"grad_norm": 12801.0009765625,
|
|
"learning_rate": 1.9791666666666664e-07,
|
|
"logits/chosen": -0.6172284483909607,
|
|
"logits/rejected": -0.631966769695282,
|
|
"logps/chosen": -286.7103576660156,
|
|
"logps/rejected": -259.05560302734375,
|
|
"loss": 3052.1316,
|
|
"rewards/accuracies": 0.47187501192092896,
|
|
"rewards/chosen": -286.7103576660156,
|
|
"rewards/margins": -27.65475082397461,
|
|
"rewards/rejected": -259.05560302734375,
|
|
"slic/ce_loss": 286.7103576660156,
|
|
"slic/rank_loss": 94.8061294555664,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.06282722513089005,
|
|
"grad_norm": 9744.6474609375,
|
|
"learning_rate": 3.020833333333333e-07,
|
|
"logits/chosen": -0.6371282935142517,
|
|
"logits/rejected": -0.6436103582382202,
|
|
"logps/chosen": -277.3744201660156,
|
|
"logps/rejected": -255.4679412841797,
|
|
"loss": 2954.9688,
|
|
"rewards/accuracies": 0.4820312559604645,
|
|
"rewards/chosen": -277.3744201660156,
|
|
"rewards/margins": -21.906490325927734,
|
|
"rewards/rejected": -255.4679412841797,
|
|
"slic/ce_loss": 277.3744201660156,
|
|
"slic/rank_loss": 91.99668884277344,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.08376963350785341,
|
|
"grad_norm": 8187.505859375,
|
|
"learning_rate": 4.0625e-07,
|
|
"logits/chosen": -0.6269849538803101,
|
|
"logits/rejected": -0.6466041803359985,
|
|
"logps/chosen": -279.5166320800781,
|
|
"logps/rejected": -251.68496704101562,
|
|
"loss": 3012.034,
|
|
"rewards/accuracies": 0.47734373807907104,
|
|
"rewards/chosen": -279.5166320800781,
|
|
"rewards/margins": -27.8316707611084,
|
|
"rewards/rejected": -251.68496704101562,
|
|
"slic/ce_loss": 279.5166320800781,
|
|
"slic/rank_loss": 96.98760223388672,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.10471204188481675,
|
|
"grad_norm": 7351.79052734375,
|
|
"learning_rate": 4.999932966293553e-07,
|
|
"logits/chosen": -0.6411020755767822,
|
|
"logits/rejected": -0.657455563545227,
|
|
"logps/chosen": -273.2268371582031,
|
|
"logps/rejected": -256.38946533203125,
|
|
"loss": 2900.1408,
|
|
"rewards/accuracies": 0.484375,
|
|
"rewards/chosen": -273.2268371582031,
|
|
"rewards/margins": -16.837379455566406,
|
|
"rewards/rejected": -256.38946533203125,
|
|
"slic/ce_loss": 273.2268371582031,
|
|
"slic/rank_loss": 89.29072570800781,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.1256544502617801,
|
|
"grad_norm": 6973.84375,
|
|
"learning_rate": 4.991893270335525e-07,
|
|
"logits/chosen": -0.6497636437416077,
|
|
"logits/rejected": -0.6595814228057861,
|
|
"logps/chosen": -261.78167724609375,
|
|
"logps/rejected": -248.3544921875,
|
|
"loss": 2815.4137,
|
|
"rewards/accuracies": 0.50390625,
|
|
"rewards/chosen": -261.78167724609375,
|
|
"rewards/margins": -13.427162170410156,
|
|
"rewards/rejected": -248.3544921875,
|
|
"slic/ce_loss": 261.78167724609375,
|
|
"slic/rank_loss": 90.14505767822266,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.14659685863874344,
|
|
"grad_norm": 7103.94580078125,
|
|
"learning_rate": 4.970496218214204e-07,
|
|
"logits/chosen": -0.6443999409675598,
|
|
"logits/rejected": -0.6562803983688354,
|
|
"logps/chosen": -261.08099365234375,
|
|
"logps/rejected": -245.8149871826172,
|
|
"loss": 2767.8164,
|
|
"rewards/accuracies": 0.48828125,
|
|
"rewards/chosen": -261.08099365234375,
|
|
"rewards/margins": -15.265989303588867,
|
|
"rewards/rejected": -245.8149871826172,
|
|
"slic/ce_loss": 261.08099365234375,
|
|
"slic/rank_loss": 84.89605712890625,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.16753926701570682,
|
|
"grad_norm": 6954.5859375,
|
|
"learning_rate": 4.935856505068998e-07,
|
|
"logits/chosen": -0.6128605008125305,
|
|
"logits/rejected": -0.6215260028839111,
|
|
"logps/chosen": -262.12835693359375,
|
|
"logps/rejected": -246.1211395263672,
|
|
"loss": 2764.8988,
|
|
"rewards/accuracies": 0.47578126192092896,
|
|
"rewards/chosen": -262.12835693359375,
|
|
"rewards/margins": -16.00722885131836,
|
|
"rewards/rejected": -246.1211395263672,
|
|
"slic/ce_loss": 262.12835693359375,
|
|
"slic/rank_loss": 83.48396301269531,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.18848167539267016,
|
|
"grad_norm": 6543.72314453125,
|
|
"learning_rate": 4.8881598109976e-07,
|
|
"logits/chosen": -0.6393535733222961,
|
|
"logits/rejected": -0.642610490322113,
|
|
"logps/chosen": -259.01385498046875,
|
|
"logps/rejected": -244.76968383789062,
|
|
"loss": 2757.5949,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -259.01385498046875,
|
|
"rewards/margins": -14.244140625,
|
|
"rewards/rejected": -244.76968383789062,
|
|
"slic/ce_loss": 259.01385498046875,
|
|
"slic/rank_loss": 85.68550872802734,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.2094240837696335,
|
|
"grad_norm": 7359.39697265625,
|
|
"learning_rate": 4.827661805750437e-07,
|
|
"logits/chosen": -0.6121981739997864,
|
|
"logits/rejected": -0.6247469782829285,
|
|
"logps/chosen": -260.1445007324219,
|
|
"logps/rejected": -240.54080200195312,
|
|
"loss": 2780.1023,
|
|
"rewards/accuracies": 0.500781238079071,
|
|
"rewards/chosen": -260.1445007324219,
|
|
"rewards/margins": -19.60370445251465,
|
|
"rewards/rejected": -240.54080200195312,
|
|
"slic/ce_loss": 260.1445007324219,
|
|
"slic/rank_loss": 87.36830139160156,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.23036649214659685,
|
|
"grad_norm": 6633.2919921875,
|
|
"learning_rate": 4.75468677825789e-07,
|
|
"logits/chosen": -0.6110386252403259,
|
|
"logits/rejected": -0.6201988458633423,
|
|
"logps/chosen": -259.8690185546875,
|
|
"logps/rejected": -244.67117309570312,
|
|
"loss": 2769.8471,
|
|
"rewards/accuracies": 0.47734373807907104,
|
|
"rewards/chosen": -259.8690185546875,
|
|
"rewards/margins": -15.197855949401855,
|
|
"rewards/rejected": -244.67117309570312,
|
|
"slic/ce_loss": 259.8690185546875,
|
|
"slic/rank_loss": 86.36186981201172,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.2513089005235602,
|
|
"grad_norm": 6849.99609375,
|
|
"learning_rate": 4.669625898336438e-07,
|
|
"logits/chosen": -0.6245466470718384,
|
|
"logits/rejected": -0.6278253197669983,
|
|
"logps/chosen": -264.4799499511719,
|
|
"logps/rejected": -248.22763061523438,
|
|
"loss": 2824.259,
|
|
"rewards/accuracies": 0.4749999940395355,
|
|
"rewards/chosen": -264.4799499511719,
|
|
"rewards/margins": -16.252330780029297,
|
|
"rewards/rejected": -248.22763061523438,
|
|
"slic/ce_loss": 264.4799499511719,
|
|
"slic/rank_loss": 88.55240631103516,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.27225130890052357,
|
|
"grad_norm": 6854.18701171875,
|
|
"learning_rate": 4.5729351198915705e-07,
|
|
"logits/chosen": -0.6144854426383972,
|
|
"logits/rejected": -0.6145707368850708,
|
|
"logps/chosen": -263.3558044433594,
|
|
"logps/rejected": -245.08395385742188,
|
|
"loss": 2830.0254,
|
|
"rewards/accuracies": 0.4906249940395355,
|
|
"rewards/chosen": -263.3558044433594,
|
|
"rewards/margins": -18.271860122680664,
|
|
"rewards/rejected": -245.08395385742188,
|
|
"slic/ce_loss": 263.3558044433594,
|
|
"slic/rank_loss": 90.39739227294922,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.2931937172774869,
|
|
"grad_norm": 7111.4072265625,
|
|
"learning_rate": 4.4651327368569684e-07,
|
|
"logits/chosen": -0.6158766150474548,
|
|
"logits/rejected": -0.610289454460144,
|
|
"logps/chosen": -265.9961853027344,
|
|
"logps/rejected": -250.8537139892578,
|
|
"loss": 2811.3402,
|
|
"rewards/accuracies": 0.4984374940395355,
|
|
"rewards/chosen": -265.9961853027344,
|
|
"rewards/margins": -15.142511367797852,
|
|
"rewards/rejected": -250.8537139892578,
|
|
"slic/ce_loss": 265.9961853027344,
|
|
"slic/rank_loss": 85.42132568359375,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.31413612565445026,
|
|
"grad_norm": 6560.322265625,
|
|
"learning_rate": 4.346796604970912e-07,
|
|
"logits/chosen": -0.6126202344894409,
|
|
"logits/rejected": -0.6171335577964783,
|
|
"logps/chosen": -262.45489501953125,
|
|
"logps/rejected": -238.64248657226562,
|
|
"loss": 2792.7324,
|
|
"rewards/accuracies": 0.46015626192092896,
|
|
"rewards/chosen": -262.45489501953125,
|
|
"rewards/margins": -23.812393188476562,
|
|
"rewards/rejected": -238.64248657226562,
|
|
"slic/ce_loss": 262.45489501953125,
|
|
"slic/rank_loss": 86.63667297363281,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.33507853403141363,
|
|
"grad_norm": 6536.52099609375,
|
|
"learning_rate": 4.218561044282098e-07,
|
|
"logits/chosen": -0.615364670753479,
|
|
"logits/rejected": -0.6180033087730408,
|
|
"logps/chosen": -260.1828308105469,
|
|
"logps/rejected": -246.5723114013672,
|
|
"loss": 2790.3223,
|
|
"rewards/accuracies": 0.49687498807907104,
|
|
"rewards/chosen": -260.1828308105469,
|
|
"rewards/margins": -13.610522270202637,
|
|
"rewards/rejected": -246.5723114013672,
|
|
"slic/ce_loss": 260.1828308105469,
|
|
"slic/rank_loss": 88.60743713378906,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.35602094240837695,
|
|
"grad_norm": 6896.39892578125,
|
|
"learning_rate": 4.081113438988443e-07,
|
|
"logits/chosen": -0.6077988147735596,
|
|
"logits/rejected": -0.6157752871513367,
|
|
"logps/chosen": -264.1897277832031,
|
|
"logps/rejected": -232.72091674804688,
|
|
"loss": 2870.3471,
|
|
"rewards/accuracies": 0.46406251192092896,
|
|
"rewards/chosen": -264.1897277832031,
|
|
"rewards/margins": -31.468795776367188,
|
|
"rewards/rejected": -232.72091674804688,
|
|
"slic/ce_loss": 264.1897277832031,
|
|
"slic/rank_loss": 94.60362243652344,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.3769633507853403,
|
|
"grad_norm": 6520.38671875,
|
|
"learning_rate": 3.935190552834828e-07,
|
|
"logits/chosen": -0.6066499352455139,
|
|
"logits/rejected": -0.6182885766029358,
|
|
"logps/chosen": -263.59375,
|
|
"logps/rejected": -244.91696166992188,
|
|
"loss": 2795.4867,
|
|
"rewards/accuracies": 0.4867187440395355,
|
|
"rewards/chosen": -263.59375,
|
|
"rewards/margins": -18.676807403564453,
|
|
"rewards/rejected": -244.91696166992188,
|
|
"slic/ce_loss": 263.59375,
|
|
"slic/rank_loss": 85.84205627441406,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.39790575916230364,
|
|
"grad_norm": 6230.771484375,
|
|
"learning_rate": 3.781574579820464e-07,
|
|
"logits/chosen": -0.6069104075431824,
|
|
"logits/rejected": -0.62060546875,
|
|
"logps/chosen": -261.4915466308594,
|
|
"logps/rejected": -239.55990600585938,
|
|
"loss": 2811.309,
|
|
"rewards/accuracies": 0.4781250059604645,
|
|
"rewards/chosen": -261.4915466308594,
|
|
"rewards/margins": -21.9316349029541,
|
|
"rewards/rejected": -239.55990600585938,
|
|
"slic/ce_loss": 261.4915466308594,
|
|
"slic/rank_loss": 89.92210388183594,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.418848167539267,
|
|
"grad_norm": 6762.1396484375,
|
|
"learning_rate": 3.621088951385353e-07,
|
|
"logits/chosen": -0.6015563011169434,
|
|
"logits/rejected": -0.6054785251617432,
|
|
"logps/chosen": -257.34716796875,
|
|
"logps/rejected": -241.6367950439453,
|
|
"loss": 2735.9918,
|
|
"rewards/accuracies": 0.49531251192092896,
|
|
"rewards/chosen": -257.34716796875,
|
|
"rewards/margins": -15.71037483215332,
|
|
"rewards/rejected": -241.6367950439453,
|
|
"slic/ce_loss": 257.34716796875,
|
|
"slic/rank_loss": 84.6518325805664,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.418848167539267,
|
|
"eval_logits/chosen": -0.6110028028488159,
|
|
"eval_logits/rejected": -0.6186715364456177,
|
|
"eval_logps/chosen": -262.1006164550781,
|
|
"eval_logps/rejected": -246.28273010253906,
|
|
"eval_loss": 345.5438232421875,
|
|
"eval_rewards/accuracies": 0.4884999990463257,
|
|
"eval_rewards/chosen": -262.1006164550781,
|
|
"eval_rewards/margins": -15.81789779663086,
|
|
"eval_rewards/rejected": -246.28273010253906,
|
|
"eval_runtime": 42.8327,
|
|
"eval_samples_per_second": 46.693,
|
|
"eval_slic/ce_loss": 262.1006164550781,
|
|
"eval_slic/rank_loss": 83.44320678710938,
|
|
"eval_steps_per_second": 2.918,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.4397905759162304,
|
|
"grad_norm": 6237.728515625,
|
|
"learning_rate": 3.454593922550693e-07,
|
|
"logits/chosen": -0.6002607941627502,
|
|
"logits/rejected": -0.600605309009552,
|
|
"logps/chosen": -262.0538024902344,
|
|
"logps/rejected": -250.4560546875,
|
|
"loss": 2777.6777,
|
|
"rewards/accuracies": 0.47968751192092896,
|
|
"rewards/chosen": -262.0538024902344,
|
|
"rewards/margins": -11.597768783569336,
|
|
"rewards/rejected": -250.4560546875,
|
|
"slic/ce_loss": 262.0538024902344,
|
|
"slic/rank_loss": 85.15589904785156,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.4607329842931937,
|
|
"grad_norm": 6908.84033203125,
|
|
"learning_rate": 3.2829819606729477e-07,
|
|
"logits/chosen": -0.599699854850769,
|
|
"logits/rejected": -0.6079216599464417,
|
|
"logps/chosen": -268.8377380371094,
|
|
"logps/rejected": -252.35330200195312,
|
|
"loss": 2872.3611,
|
|
"rewards/accuracies": 0.4820312559604645,
|
|
"rewards/chosen": -268.8377380371094,
|
|
"rewards/margins": -16.484455108642578,
|
|
"rewards/rejected": -252.35330200195312,
|
|
"slic/ce_loss": 268.8377380371094,
|
|
"slic/rank_loss": 90.20738983154297,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.4816753926701571,
|
|
"grad_norm": 6286.37451171875,
|
|
"learning_rate": 3.1071729615293424e-07,
|
|
"logits/chosen": -0.593070924282074,
|
|
"logits/rejected": -0.6033838987350464,
|
|
"logps/chosen": -256.0763244628906,
|
|
"logps/rejected": -239.1165771484375,
|
|
"loss": 2713.8352,
|
|
"rewards/accuracies": 0.484375,
|
|
"rewards/chosen": -256.0763244628906,
|
|
"rewards/margins": -16.959781646728516,
|
|
"rewards/rejected": -239.1165771484375,
|
|
"slic/ce_loss": 256.0763244628906,
|
|
"slic/rank_loss": 83.153076171875,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.5026178010471204,
|
|
"grad_norm": 6890.95263671875,
|
|
"learning_rate": 2.9281093183781403e-07,
|
|
"logits/chosen": -0.5985504388809204,
|
|
"logits/rejected": -0.6077064275741577,
|
|
"logps/chosen": -262.922607421875,
|
|
"logps/rejected": -244.4534454345703,
|
|
"loss": 2804.6604,
|
|
"rewards/accuracies": 0.4749999940395355,
|
|
"rewards/chosen": -262.922607421875,
|
|
"rewards/margins": -18.46915626525879,
|
|
"rewards/rejected": -244.4534454345703,
|
|
"slic/ce_loss": 262.922607421875,
|
|
"slic/rank_loss": 87.6599349975586,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.5235602094240838,
|
|
"grad_norm": 6481.29931640625,
|
|
"learning_rate": 2.7467508704251135e-07,
|
|
"logits/chosen": -0.5857258439064026,
|
|
"logits/rejected": -0.5922163128852844,
|
|
"logps/chosen": -261.371826171875,
|
|
"logps/rejected": -238.2184295654297,
|
|
"loss": 2811.9553,
|
|
"rewards/accuracies": 0.4749999940395355,
|
|
"rewards/chosen": -261.371826171875,
|
|
"rewards/margins": -23.153379440307617,
|
|
"rewards/rejected": -238.2184295654297,
|
|
"slic/ce_loss": 261.371826171875,
|
|
"slic/rank_loss": 90.12258911132812,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.5445026178010471,
|
|
"grad_norm": 6607.6845703125,
|
|
"learning_rate": 2.5640697577740815e-07,
|
|
"logits/chosen": -0.5988560914993286,
|
|
"logits/rejected": -0.5961240530014038,
|
|
"logps/chosen": -261.5967712402344,
|
|
"logps/rejected": -237.8933868408203,
|
|
"loss": 2822.6381,
|
|
"rewards/accuracies": 0.46484375,
|
|
"rewards/chosen": -261.5967712402344,
|
|
"rewards/margins": -23.703397750854492,
|
|
"rewards/rejected": -237.8933868408203,
|
|
"slic/ce_loss": 261.5967712402344,
|
|
"slic/rank_loss": 91.23295593261719,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.5654450261780105,
|
|
"grad_norm": 6657.15087890625,
|
|
"learning_rate": 2.381045210440644e-07,
|
|
"logits/chosen": -0.582733154296875,
|
|
"logits/rejected": -0.5935451984405518,
|
|
"logps/chosen": -254.5479278564453,
|
|
"logps/rejected": -237.6572265625,
|
|
"loss": 2701.4529,
|
|
"rewards/accuracies": 0.47968751192092896,
|
|
"rewards/chosen": -254.5479278564453,
|
|
"rewards/margins": -16.890687942504883,
|
|
"rewards/rejected": -237.6572265625,
|
|
"slic/ce_loss": 254.5479278564453,
|
|
"slic/rank_loss": 83.13374328613281,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.5863874345549738,
|
|
"grad_norm": 6212.56103515625,
|
|
"learning_rate": 2.1986582993616925e-07,
|
|
"logits/chosen": -0.5971206426620483,
|
|
"logits/rejected": -0.598262369632721,
|
|
"logps/chosen": -253.74880981445312,
|
|
"logps/rejected": -241.04623413085938,
|
|
"loss": 2685.0725,
|
|
"rewards/accuracies": 0.48906248807907104,
|
|
"rewards/chosen": -253.74880981445312,
|
|
"rewards/margins": -12.702553749084473,
|
|
"rewards/rejected": -241.04623413085938,
|
|
"slic/ce_loss": 253.74880981445312,
|
|
"slic/rank_loss": 81.88532257080078,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.6073298429319371,
|
|
"grad_norm": 6822.04150390625,
|
|
"learning_rate": 2.0178866775369774e-07,
|
|
"logits/chosen": -0.5831255316734314,
|
|
"logits/rejected": -0.5880999565124512,
|
|
"logps/chosen": -268.67706298828125,
|
|
"logps/rejected": -250.81631469726562,
|
|
"loss": 2880.4166,
|
|
"rewards/accuracies": 0.4781250059604645,
|
|
"rewards/chosen": -268.67706298828125,
|
|
"rewards/margins": -17.860719680786133,
|
|
"rewards/rejected": -250.81631469726562,
|
|
"slic/ce_loss": 268.67706298828125,
|
|
"slic/rank_loss": 91.37500762939453,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.6282722513089005,
|
|
"grad_norm": 6906.6796875,
|
|
"learning_rate": 1.839699339491937e-07,
|
|
"logits/chosen": -0.5904260277748108,
|
|
"logits/rejected": -0.5913136005401611,
|
|
"logps/chosen": -255.6902618408203,
|
|
"logps/rejected": -247.8364715576172,
|
|
"loss": 2685.1258,
|
|
"rewards/accuracies": 0.5093749761581421,
|
|
"rewards/chosen": -255.6902618408203,
|
|
"rewards/margins": -7.853767395019531,
|
|
"rewards/rejected": -247.8364715576172,
|
|
"slic/ce_loss": 255.6902618408203,
|
|
"slic/rank_loss": 79.95047760009766,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.6492146596858639,
|
|
"grad_norm": 6676.84130859375,
|
|
"learning_rate": 1.6650514271527465e-07,
|
|
"logits/chosen": -0.5759958028793335,
|
|
"logits/rejected": -0.5911142826080322,
|
|
"logps/chosen": -258.6521301269531,
|
|
"logps/rejected": -238.955322265625,
|
|
"loss": 2770.6453,
|
|
"rewards/accuracies": 0.4742187559604645,
|
|
"rewards/chosen": -258.6521301269531,
|
|
"rewards/margins": -19.696758270263672,
|
|
"rewards/rejected": -238.955322265625,
|
|
"slic/ce_loss": 258.6521301269531,
|
|
"slic/rank_loss": 87.67857360839844,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.6701570680628273,
|
|
"grad_norm": 7249.5908203125,
|
|
"learning_rate": 1.4948791099758052e-07,
|
|
"logits/chosen": -0.6019054651260376,
|
|
"logits/rejected": -0.5995901226997375,
|
|
"logps/chosen": -263.072021484375,
|
|
"logps/rejected": -240.22134399414062,
|
|
"loss": 2814.36,
|
|
"rewards/accuracies": 0.48515623807907104,
|
|
"rewards/chosen": -263.072021484375,
|
|
"rewards/margins": -22.85066795349121,
|
|
"rewards/rejected": -240.22134399414062,
|
|
"slic/ce_loss": 263.072021484375,
|
|
"slic/rank_loss": 88.72297668457031,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.6910994764397905,
|
|
"grad_norm": 6414.8857421875,
|
|
"learning_rate": 1.3300945667758012e-07,
|
|
"logits/chosen": -0.5962297320365906,
|
|
"logits/rejected": -0.5947962999343872,
|
|
"logps/chosen": -258.00311279296875,
|
|
"logps/rejected": -244.7356719970703,
|
|
"loss": 2729.925,
|
|
"rewards/accuracies": 0.5062500238418579,
|
|
"rewards/chosen": -258.00311279296875,
|
|
"rewards/margins": -13.2674560546875,
|
|
"rewards/rejected": -244.7356719970703,
|
|
"slic/ce_loss": 258.00311279296875,
|
|
"slic/rank_loss": 83.2375259399414,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.7120418848167539,
|
|
"grad_norm": 5969.27587890625,
|
|
"learning_rate": 1.1715810961514072e-07,
|
|
"logits/chosen": -0.5996378660202026,
|
|
"logits/rejected": -0.5939691662788391,
|
|
"logps/chosen": -251.0337677001953,
|
|
"logps/rejected": -234.95639038085938,
|
|
"loss": 2683.643,
|
|
"rewards/accuracies": 0.4859375059604645,
|
|
"rewards/chosen": -251.0337677001953,
|
|
"rewards/margins": -16.077373504638672,
|
|
"rewards/rejected": -234.95639038085938,
|
|
"slic/ce_loss": 251.0337677001953,
|
|
"slic/rank_loss": 84.42159271240234,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.7329842931937173,
|
|
"grad_norm": 8791.7958984375,
|
|
"learning_rate": 1.0201883817182949e-07,
|
|
"logits/chosen": -0.6010726094245911,
|
|
"logits/rejected": -0.6074205040931702,
|
|
"logps/chosen": -265.9036865234375,
|
|
"logps/rejected": -244.1355438232422,
|
|
"loss": 2822.1586,
|
|
"rewards/accuracies": 0.4546875059604645,
|
|
"rewards/chosen": -265.9036865234375,
|
|
"rewards/margins": -21.768173217773438,
|
|
"rewards/rejected": -244.1355438232422,
|
|
"slic/ce_loss": 265.9036865234375,
|
|
"slic/rank_loss": 86.86607360839844,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.7539267015706806,
|
|
"grad_norm": 6849.009765625,
|
|
"learning_rate": 8.76727937529367e-08,
|
|
"logits/chosen": -0.6024104356765747,
|
|
"logits/rejected": -0.6169945597648621,
|
|
"logps/chosen": -250.9459991455078,
|
|
"logps/rejected": -233.37088012695312,
|
|
"loss": 2662.359,
|
|
"rewards/accuracies": 0.5015624761581421,
|
|
"rewards/chosen": -250.9459991455078,
|
|
"rewards/margins": -17.57510757446289,
|
|
"rewards/rejected": -233.37088012695312,
|
|
"slic/ce_loss": 250.9459991455078,
|
|
"slic/rank_loss": 81.84888458251953,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.774869109947644,
|
|
"grad_norm": 6163.64599609375,
|
|
"learning_rate": 7.419687580962222e-08,
|
|
"logits/chosen": -0.5869948863983154,
|
|
"logits/rejected": -0.5933431386947632,
|
|
"logps/chosen": -257.76495361328125,
|
|
"logps/rejected": -240.93856811523438,
|
|
"loss": 2751.2512,
|
|
"rewards/accuracies": 0.49687498807907104,
|
|
"rewards/chosen": -257.76495361328125,
|
|
"rewards/margins": -16.826370239257812,
|
|
"rewards/rejected": -240.93856811523438,
|
|
"slic/ce_loss": 257.76495361328125,
|
|
"slic/rank_loss": 86.14141845703125,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.7958115183246073,
|
|
"grad_norm": 6802.92919921875,
|
|
"learning_rate": 6.166331963291519e-08,
|
|
"logits/chosen": -0.598025918006897,
|
|
"logits/rejected": -0.6036067008972168,
|
|
"logps/chosen": -275.9155578613281,
|
|
"logps/rejected": -248.60989379882812,
|
|
"loss": 2926.8623,
|
|
"rewards/accuracies": 0.47265625,
|
|
"rewards/chosen": -275.9155578613281,
|
|
"rewards/margins": -27.30564308166504,
|
|
"rewards/rejected": -248.60989379882812,
|
|
"slic/ce_loss": 275.9155578613281,
|
|
"slic/rank_loss": 89.94223022460938,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.8167539267015707,
|
|
"grad_norm": 6247.5087890625,
|
|
"learning_rate": 5.013930914912476e-08,
|
|
"logits/chosen": -0.5993385314941406,
|
|
"logits/rejected": -0.5995285511016846,
|
|
"logps/chosen": -253.06851196289062,
|
|
"logps/rejected": -245.85745239257812,
|
|
"loss": 2641.3674,
|
|
"rewards/accuracies": 0.5132812261581421,
|
|
"rewards/chosen": -253.06851196289062,
|
|
"rewards/margins": -7.211063385009766,
|
|
"rewards/rejected": -245.85745239257812,
|
|
"slic/ce_loss": 253.06851196289062,
|
|
"slic/rank_loss": 77.10240936279297,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.837696335078534,
|
|
"grad_norm": 6252.97314453125,
|
|
"learning_rate": 3.968661679220467e-08,
|
|
"logits/chosen": -0.5878058075904846,
|
|
"logits/rejected": -0.595999538898468,
|
|
"logps/chosen": -265.92987060546875,
|
|
"logps/rejected": -247.3778839111328,
|
|
"loss": 2791.6219,
|
|
"rewards/accuracies": 0.4625000059604645,
|
|
"rewards/chosen": -265.92987060546875,
|
|
"rewards/margins": -18.551965713500977,
|
|
"rewards/rejected": -247.3778839111328,
|
|
"slic/ce_loss": 265.92987060546875,
|
|
"slic/rank_loss": 83.02286529541016,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.837696335078534,
|
|
"eval_logits/chosen": -0.6036794185638428,
|
|
"eval_logits/rejected": -0.6097184419631958,
|
|
"eval_logps/chosen": -260.79754638671875,
|
|
"eval_logps/rejected": -247.10818481445312,
|
|
"eval_loss": 341.8598937988281,
|
|
"eval_rewards/accuracies": 0.4934999942779541,
|
|
"eval_rewards/chosen": -260.79754638671875,
|
|
"eval_rewards/margins": -13.689358711242676,
|
|
"eval_rewards/rejected": -247.10818481445312,
|
|
"eval_runtime": 42.51,
|
|
"eval_samples_per_second": 47.048,
|
|
"eval_slic/ce_loss": 260.79754638671875,
|
|
"eval_slic/rank_loss": 81.0623550415039,
|
|
"eval_steps_per_second": 2.94,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.8586387434554974,
|
|
"grad_norm": 5975.84033203125,
|
|
"learning_rate": 3.036127238347164e-08,
|
|
"logits/chosen": -0.6068440675735474,
|
|
"logits/rejected": -0.6084403991699219,
|
|
"logps/chosen": -263.7471008300781,
|
|
"logps/rejected": -248.447021484375,
|
|
"loss": 2812.4121,
|
|
"rewards/accuracies": 0.48750001192092896,
|
|
"rewards/chosen": -263.7471008300781,
|
|
"rewards/margins": -15.300073623657227,
|
|
"rewards/rejected": -248.447021484375,
|
|
"slic/ce_loss": 263.7471008300781,
|
|
"slic/rank_loss": 87.80433654785156,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.8795811518324608,
|
|
"grad_norm": 6574.27978515625,
|
|
"learning_rate": 2.2213262793589482e-08,
|
|
"logits/chosen": -0.6027593016624451,
|
|
"logits/rejected": -0.6067181825637817,
|
|
"logps/chosen": -262.3794860839844,
|
|
"logps/rejected": -246.2481231689453,
|
|
"loss": 2759.7773,
|
|
"rewards/accuracies": 0.48515623807907104,
|
|
"rewards/chosen": -262.3794860839844,
|
|
"rewards/margins": -16.13137435913086,
|
|
"rewards/rejected": -246.2481231689453,
|
|
"slic/ce_loss": 262.3794860839844,
|
|
"slic/rank_loss": 82.59269714355469,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.900523560209424,
|
|
"grad_norm": 6577.6103515625,
|
|
"learning_rate": 1.5286263996730026e-08,
|
|
"logits/chosen": -0.5887020826339722,
|
|
"logits/rejected": -0.6053365468978882,
|
|
"logps/chosen": -264.7728576660156,
|
|
"logps/rejected": -240.94216918945312,
|
|
"loss": 2829.2809,
|
|
"rewards/accuracies": 0.48359376192092896,
|
|
"rewards/chosen": -264.7728576660156,
|
|
"rewards/margins": -23.83070945739746,
|
|
"rewards/rejected": -240.94216918945312,
|
|
"slic/ce_loss": 264.7728576660156,
|
|
"slic/rank_loss": 88.88728332519531,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.9214659685863874,
|
|
"grad_norm": 6817.033203125,
|
|
"learning_rate": 9.617406953185136e-09,
|
|
"logits/chosen": -0.5980589985847473,
|
|
"logits/rejected": -0.6065895557403564,
|
|
"logps/chosen": -253.0465087890625,
|
|
"logps/rejected": -241.0380859375,
|
|
"loss": 2692.71,
|
|
"rewards/accuracies": 0.50390625,
|
|
"rewards/chosen": -253.0465087890625,
|
|
"rewards/margins": -12.008459091186523,
|
|
"rewards/rejected": -241.0380859375,
|
|
"slic/ce_loss": 253.0465087890625,
|
|
"slic/rank_loss": 83.54225158691406,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.9424083769633508,
|
|
"grad_norm": 6651.41357421875,
|
|
"learning_rate": 5.2370785753763356e-09,
|
|
"logits/chosen": -0.5972884893417358,
|
|
"logits/rejected": -0.6050039529800415,
|
|
"logps/chosen": -259.58685302734375,
|
|
"logps/rejected": -238.9262237548828,
|
|
"loss": 2777.9844,
|
|
"rewards/accuracies": 0.48750001192092896,
|
|
"rewards/chosen": -259.58685302734375,
|
|
"rewards/margins": -20.660663604736328,
|
|
"rewards/rejected": -238.9262237548828,
|
|
"slic/ce_loss": 259.58685302734375,
|
|
"slic/rank_loss": 87.66117095947266,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.9633507853403142,
|
|
"grad_norm": 7045.99609375,
|
|
"learning_rate": 2.168758844148272e-09,
|
|
"logits/chosen": -0.5794906616210938,
|
|
"logits/rejected": -0.589801549911499,
|
|
"logps/chosen": -275.762451171875,
|
|
"logps/rejected": -259.4653015136719,
|
|
"loss": 2896.2551,
|
|
"rewards/accuracies": 0.4867187440395355,
|
|
"rewards/chosen": -275.762451171875,
|
|
"rewards/margins": -16.297168731689453,
|
|
"rewards/rejected": -259.4653015136719,
|
|
"slic/ce_loss": 275.762451171875,
|
|
"slic/rank_loss": 86.26937866210938,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.9842931937172775,
|
|
"grad_norm": 7251.65869140625,
|
|
"learning_rate": 4.288949484559934e-10,
|
|
"logits/chosen": -0.5941784977912903,
|
|
"logits/rejected": -0.5964524149894714,
|
|
"logps/chosen": -265.3995056152344,
|
|
"logps/rejected": -239.29824829101562,
|
|
"loss": 2848.368,
|
|
"rewards/accuracies": 0.4867187440395355,
|
|
"rewards/chosen": -265.3995056152344,
|
|
"rewards/margins": -26.10125160217285,
|
|
"rewards/rejected": -239.29824829101562,
|
|
"slic/ce_loss": 265.3995056152344,
|
|
"slic/rank_loss": 90.64649963378906,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.9989528795811519,
|
|
"step": 477,
|
|
"total_flos": 0.0,
|
|
"train_loss": 2803.1413415552934,
|
|
"train_runtime": 5510.6328,
|
|
"train_samples_per_second": 11.094,
|
|
"train_steps_per_second": 0.087
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 477,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 200,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 4,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|