299 lines
9.8 KiB
JSON
299 lines
9.8 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 1.0,
|
|
"eval_steps": 500,
|
|
"global_step": 86,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.05813953488372093,
|
|
"grad_norm": 6.173820495605469,
|
|
"learning_rate": 2.222222222222222e-07,
|
|
"logits/chosen": -0.31656768918037415,
|
|
"logits/rejected": -0.34523338079452515,
|
|
"logps/chosen": -272.57794189453125,
|
|
"logps/rejected": -270.36444091796875,
|
|
"loss": 0.6931,
|
|
"rewards/accuracies": 0.518750011920929,
|
|
"rewards/chosen": 0.00012703397078439593,
|
|
"rewards/margins": 0.00034172655432485044,
|
|
"rewards/rejected": -0.0002146924816770479,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.11627906976744186,
|
|
"grad_norm": 11.29118824005127,
|
|
"learning_rate": 5e-07,
|
|
"logits/chosen": -0.3255475163459778,
|
|
"logits/rejected": -0.3258180320262909,
|
|
"logps/chosen": -284.53802490234375,
|
|
"logps/rejected": -276.3387451171875,
|
|
"loss": 0.6936,
|
|
"rewards/accuracies": 0.4468750059604645,
|
|
"rewards/chosen": 0.0012468498898670077,
|
|
"rewards/margins": -0.000429403327871114,
|
|
"rewards/rejected": 0.0016762532759457827,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.1744186046511628,
|
|
"grad_norm": 22.03050422668457,
|
|
"learning_rate": 4.948160396893553e-07,
|
|
"logits/chosen": -0.3352300226688385,
|
|
"logits/rejected": -0.3715844750404358,
|
|
"logps/chosen": -273.3583984375,
|
|
"logps/rejected": -256.2732849121094,
|
|
"loss": 0.6917,
|
|
"rewards/accuracies": 0.59375,
|
|
"rewards/chosen": 0.0064769028685987,
|
|
"rewards/margins": 0.0027799301315099,
|
|
"rewards/rejected": 0.003696972969919443,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.23255813953488372,
|
|
"grad_norm": 12.313726425170898,
|
|
"learning_rate": 4.794791463134399e-07,
|
|
"logits/chosen": -0.37685471773147583,
|
|
"logits/rejected": -0.3720394968986511,
|
|
"logps/chosen": -287.3114013671875,
|
|
"logps/rejected": -265.71466064453125,
|
|
"loss": 0.6903,
|
|
"rewards/accuracies": 0.628125011920929,
|
|
"rewards/chosen": 0.016553811728954315,
|
|
"rewards/margins": 0.005998819135129452,
|
|
"rewards/rejected": 0.010554992593824863,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.29069767441860467,
|
|
"grad_norm": 6.40844202041626,
|
|
"learning_rate": 4.5462536664464836e-07,
|
|
"logits/chosen": -0.3328778147697449,
|
|
"logits/rejected": -0.3745272159576416,
|
|
"logps/chosen": -294.00567626953125,
|
|
"logps/rejected": -251.9366455078125,
|
|
"loss": 0.6885,
|
|
"rewards/accuracies": 0.621874988079071,
|
|
"rewards/chosen": 0.02947135828435421,
|
|
"rewards/margins": 0.01008202601224184,
|
|
"rewards/rejected": 0.019389333203434944,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.3488372093023256,
|
|
"grad_norm": 9.740550994873047,
|
|
"learning_rate": 4.2128542874196107e-07,
|
|
"logits/chosen": -0.3988240659236908,
|
|
"logits/rejected": -0.3960237205028534,
|
|
"logps/chosen": -315.44189453125,
|
|
"logps/rejected": -269.29888916015625,
|
|
"loss": 0.6847,
|
|
"rewards/accuracies": 0.684374988079071,
|
|
"rewards/chosen": 0.04561670497059822,
|
|
"rewards/margins": 0.017333079129457474,
|
|
"rewards/rejected": 0.0282836202532053,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.4069767441860465,
|
|
"grad_norm": 4.856711387634277,
|
|
"learning_rate": 3.80841995924153e-07,
|
|
"logits/chosen": -0.35535919666290283,
|
|
"logits/rejected": -0.3754374086856842,
|
|
"logps/chosen": -284.4345397949219,
|
|
"logps/rejected": -253.5362091064453,
|
|
"loss": 0.6845,
|
|
"rewards/accuracies": 0.6625000238418579,
|
|
"rewards/chosen": 0.05356748774647713,
|
|
"rewards/margins": 0.018121790140867233,
|
|
"rewards/rejected": 0.035445697605609894,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.46511627906976744,
|
|
"grad_norm": 5.220498561859131,
|
|
"learning_rate": 3.349723253956541e-07,
|
|
"logits/chosen": -0.3333882689476013,
|
|
"logits/rejected": -0.3347395360469818,
|
|
"logps/chosen": -287.35186767578125,
|
|
"logps/rejected": -268.220703125,
|
|
"loss": 0.6792,
|
|
"rewards/accuracies": 0.706250011920929,
|
|
"rewards/chosen": 0.06367762386798859,
|
|
"rewards/margins": 0.0287557952105999,
|
|
"rewards/rejected": 0.03492182120680809,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.5232558139534884,
|
|
"grad_norm": 6.02023458480835,
|
|
"learning_rate": 2.8557870956832133e-07,
|
|
"logits/chosen": -0.3798816204071045,
|
|
"logits/rejected": -0.3992946743965149,
|
|
"logps/chosen": -291.197021484375,
|
|
"logps/rejected": -267.6119689941406,
|
|
"loss": 0.6806,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": 0.05876715108752251,
|
|
"rewards/margins": 0.02658895030617714,
|
|
"rewards/rejected": 0.03217820078134537,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.5813953488372093,
|
|
"grad_norm": 6.866888523101807,
|
|
"learning_rate": 2.3470958479453937e-07,
|
|
"logits/chosen": -0.3547196388244629,
|
|
"logits/rejected": -0.38883891701698303,
|
|
"logps/chosen": -254.36865234375,
|
|
"logps/rejected": -254.80191040039062,
|
|
"loss": 0.6745,
|
|
"rewards/accuracies": 0.7281249761581421,
|
|
"rewards/chosen": 0.0644092783331871,
|
|
"rewards/margins": 0.03914918377995491,
|
|
"rewards/rejected": 0.025260094553232193,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.6395348837209303,
|
|
"grad_norm": 5.712334632873535,
|
|
"learning_rate": 1.8447457926522452e-07,
|
|
"logits/chosen": -0.39016178250312805,
|
|
"logits/rejected": -0.3929459750652313,
|
|
"logps/chosen": -303.5281066894531,
|
|
"logps/rejected": -278.91619873046875,
|
|
"loss": 0.6777,
|
|
"rewards/accuracies": 0.659375011920929,
|
|
"rewards/chosen": 0.05406096577644348,
|
|
"rewards/margins": 0.03241851180791855,
|
|
"rewards/rejected": 0.021642452105879784,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.6976744186046512,
|
|
"grad_norm": 6.261183738708496,
|
|
"learning_rate": 1.369570231793286e-07,
|
|
"logits/chosen": -0.3772028386592865,
|
|
"logits/rejected": -0.42861443758010864,
|
|
"logps/chosen": -282.66925048828125,
|
|
"logps/rejected": -262.298583984375,
|
|
"loss": 0.6733,
|
|
"rewards/accuracies": 0.684374988079071,
|
|
"rewards/chosen": 0.054535817354917526,
|
|
"rewards/margins": 0.0425155907869339,
|
|
"rewards/rejected": 0.012020227499306202,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.7558139534883721,
|
|
"grad_norm": 5.369093894958496,
|
|
"learning_rate": 9.412754953531663e-08,
|
|
"logits/chosen": -0.4042905271053314,
|
|
"logits/rejected": -0.41270047426223755,
|
|
"logps/chosen": -297.8207702636719,
|
|
"logps/rejected": -262.1812744140625,
|
|
"loss": 0.6734,
|
|
"rewards/accuracies": 0.703125,
|
|
"rewards/chosen": 0.050960998982191086,
|
|
"rewards/margins": 0.042267706245183945,
|
|
"rewards/rejected": 0.008693288080394268,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.813953488372093,
|
|
"grad_norm": 6.778103828430176,
|
|
"learning_rate": 5.776236866515946e-08,
|
|
"logits/chosen": -0.35539665818214417,
|
|
"logits/rejected": -0.4030834138393402,
|
|
"logps/chosen": -306.32501220703125,
|
|
"logps/rejected": -253.0592803955078,
|
|
"loss": 0.6717,
|
|
"rewards/accuracies": 0.675000011920929,
|
|
"rewards/chosen": 0.04382815584540367,
|
|
"rewards/margins": 0.04540730267763138,
|
|
"rewards/rejected": -0.0015791511395946145,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.872093023255814,
|
|
"grad_norm": 5.866343021392822,
|
|
"learning_rate": 2.936960580341971e-08,
|
|
"logits/chosen": -0.3599171042442322,
|
|
"logits/rejected": -0.3811812400817871,
|
|
"logps/chosen": -268.90374755859375,
|
|
"logps/rejected": -259.1419677734375,
|
|
"loss": 0.6714,
|
|
"rewards/accuracies": 0.6937500238418579,
|
|
"rewards/chosen": 0.03630157560110092,
|
|
"rewards/margins": 0.04637160152196884,
|
|
"rewards/rejected": -0.010070028714835644,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.9302325581395349,
|
|
"grad_norm": 6.844763278961182,
|
|
"learning_rate": 1.0126756596375685e-08,
|
|
"logits/chosen": -0.34698766469955444,
|
|
"logits/rejected": -0.4250791072845459,
|
|
"logps/chosen": -274.50775146484375,
|
|
"logps/rejected": -277.90618896484375,
|
|
"loss": 0.6729,
|
|
"rewards/accuracies": 0.690625011920929,
|
|
"rewards/chosen": 0.03152482956647873,
|
|
"rewards/margins": 0.04362744092941284,
|
|
"rewards/rejected": -0.012102612294256687,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.9883720930232558,
|
|
"grad_norm": 5.770267963409424,
|
|
"learning_rate": 8.318543764516961e-10,
|
|
"logits/chosen": -0.39356738328933716,
|
|
"logits/rejected": -0.43114370107650757,
|
|
"logps/chosen": -282.12542724609375,
|
|
"logps/rejected": -264.55810546875,
|
|
"loss": 0.6732,
|
|
"rewards/accuracies": 0.668749988079071,
|
|
"rewards/chosen": 0.0381753146648407,
|
|
"rewards/margins": 0.0427427664399147,
|
|
"rewards/rejected": -0.004567448981106281,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"step": 86,
|
|
"total_flos": 0.0,
|
|
"train_loss": 0.6806982485360877,
|
|
"train_runtime": 636.997,
|
|
"train_samples_per_second": 8.636,
|
|
"train_steps_per_second": 0.135
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 86,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 50,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": false,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|