Files
0.001_idpo_noreplacerej_iter_2/trainer_state.json

255 lines
8.1 KiB
JSON
Raw Permalink Normal View History

{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9968652037617555,
"eval_steps": 500,
"global_step": 159,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 3.125e-08,
"logits/chosen": -2.689429759979248,
"logits/rejected": -2.571552276611328,
"logps/chosen": -143.16458129882812,
"logps/rejected": -203.93856811523438,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.06,
"learning_rate": 3.1249999999999997e-07,
"logits/chosen": -2.708827257156372,
"logits/rejected": -2.686070680618286,
"logps/chosen": -237.31149291992188,
"logps/rejected": -247.18511962890625,
"loss": 0.6899,
"rewards/accuracies": 0.4375,
"rewards/chosen": -0.01768648810684681,
"rewards/margins": -0.0019140999065712094,
"rewards/rejected": -0.015772389248013496,
"step": 10
},
{
"epoch": 0.13,
"learning_rate": 4.990353313429303e-07,
"logits/chosen": -2.6622233390808105,
"logits/rejected": -2.6616415977478027,
"logps/chosen": -279.48223876953125,
"logps/rejected": -328.67034912109375,
"loss": 0.6672,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": -0.3544561564922333,
"rewards/margins": 0.08781943470239639,
"rewards/rejected": -0.4422755837440491,
"step": 20
},
{
"epoch": 0.19,
"learning_rate": 4.882681251368548e-07,
"logits/chosen": -2.530141830444336,
"logits/rejected": -2.4791650772094727,
"logps/chosen": -242.94580078125,
"logps/rejected": -304.83319091796875,
"loss": 0.6578,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.23640844225883484,
"rewards/margins": 0.2698945105075836,
"rewards/rejected": -0.5063029527664185,
"step": 30
},
{
"epoch": 0.25,
"learning_rate": 4.6604720940421207e-07,
"logits/chosen": -2.3857040405273438,
"logits/rejected": -2.350623846054077,
"logps/chosen": -262.49359130859375,
"logps/rejected": -348.61285400390625,
"loss": 0.6375,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -0.4203917384147644,
"rewards/margins": 0.2930702269077301,
"rewards/rejected": -0.7134619951248169,
"step": 40
},
{
"epoch": 0.31,
"learning_rate": 4.3344075855595097e-07,
"logits/chosen": -2.2722487449645996,
"logits/rejected": -2.2434167861938477,
"logps/chosen": -265.3743591308594,
"logps/rejected": -326.30474853515625,
"loss": 0.6153,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.47392672300338745,
"rewards/margins": 0.19795864820480347,
"rewards/rejected": -0.6718853712081909,
"step": 50
},
{
"epoch": 0.38,
"learning_rate": 3.920161866827889e-07,
"logits/chosen": -2.1601130962371826,
"logits/rejected": -2.137726306915283,
"logps/chosen": -290.22393798828125,
"logps/rejected": -320.2867736816406,
"loss": 0.6209,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.5836684703826904,
"rewards/margins": 0.20530056953430176,
"rewards/rejected": -0.7889690399169922,
"step": 60
},
{
"epoch": 0.44,
"learning_rate": 3.4376480090239047e-07,
"logits/chosen": -2.1158602237701416,
"logits/rejected": -2.0663974285125732,
"logps/chosen": -261.1648864746094,
"logps/rejected": -325.1148681640625,
"loss": 0.629,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.49344220757484436,
"rewards/margins": 0.26634153723716736,
"rewards/rejected": -0.7597836852073669,
"step": 70
},
{
"epoch": 0.5,
"learning_rate": 2.910060778827554e-07,
"logits/chosen": -2.0561118125915527,
"logits/rejected": -2.0465025901794434,
"logps/chosen": -259.88214111328125,
"logps/rejected": -330.5330505371094,
"loss": 0.5935,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": -0.6631507873535156,
"rewards/margins": 0.2730056643486023,
"rewards/rejected": -0.9361563920974731,
"step": 80
},
{
"epoch": 0.56,
"learning_rate": 2.3627616503391812e-07,
"logits/chosen": -2.163104295730591,
"logits/rejected": -2.1310901641845703,
"logps/chosen": -251.44955444335938,
"logps/rejected": -355.19989013671875,
"loss": 0.6082,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.5009629726409912,
"rewards/margins": 0.4423709511756897,
"rewards/rejected": -0.9433339834213257,
"step": 90
},
{
"epoch": 0.63,
"learning_rate": 1.8220596619089573e-07,
"logits/chosen": -2.061826229095459,
"logits/rejected": -2.0707743167877197,
"logps/chosen": -275.9271545410156,
"logps/rejected": -368.3133850097656,
"loss": 0.6033,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.5269793272018433,
"rewards/margins": 0.3954086899757385,
"rewards/rejected": -0.9223880767822266,
"step": 100
},
{
"epoch": 0.69,
"learning_rate": 1.3139467229135998e-07,
"logits/chosen": -2.105185031890869,
"logits/rejected": -2.079219341278076,
"logps/chosen": -246.35910034179688,
"logps/rejected": -346.08642578125,
"loss": 0.594,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.5222919583320618,
"rewards/margins": 0.4267689287662506,
"rewards/rejected": -0.9490607976913452,
"step": 110
},
{
"epoch": 0.75,
"learning_rate": 8.628481651367875e-08,
"logits/chosen": -2.107372760772705,
"logits/rejected": -2.0185680389404297,
"logps/chosen": -271.4783935546875,
"logps/rejected": -370.3096923828125,
"loss": 0.5906,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.5639175176620483,
"rewards/margins": 0.45396023988723755,
"rewards/rejected": -1.0178776979446411,
"step": 120
},
{
"epoch": 0.82,
"learning_rate": 4.904486005914027e-08,
"logits/chosen": -2.119300365447998,
"logits/rejected": -2.106173515319824,
"logps/chosen": -282.1510009765625,
"logps/rejected": -376.592041015625,
"loss": 0.5895,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": -0.6762362718582153,
"rewards/margins": 0.3490751385688782,
"rewards/rejected": -1.0253114700317383,
"step": 130
},
{
"epoch": 0.88,
"learning_rate": 2.1464952759020856e-08,
"logits/chosen": -2.127058267593384,
"logits/rejected": -2.0316877365112305,
"logps/chosen": -290.1690979003906,
"logps/rejected": -389.51116943359375,
"loss": 0.5877,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.6831713318824768,
"rewards/margins": 0.4633623957633972,
"rewards/rejected": -1.146533727645874,
"step": 140
},
{
"epoch": 0.94,
"learning_rate": 4.8708793644441086e-09,
"logits/chosen": -2.0759072303771973,
"logits/rejected": -2.021393299102783,
"logps/chosen": -278.86285400390625,
"logps/rejected": -323.4399108886719,
"loss": 0.5954,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.6692460775375366,
"rewards/margins": 0.32712554931640625,
"rewards/rejected": -0.9963716268539429,
"step": 150
},
{
"epoch": 1.0,
"step": 159,
"total_flos": 0.0,
"train_loss": 0.6192928140268386,
"train_runtime": 2654.4881,
"train_samples_per_second": 7.677,
"train_steps_per_second": 0.06
}
],
"logging_steps": 10,
"max_steps": 159,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}