255 lines
8.1 KiB
JSON
255 lines
8.1 KiB
JSON
|
|
{
|
||
|
|
"best_metric": null,
|
||
|
|
"best_model_checkpoint": null,
|
||
|
|
"epoch": 0.9968652037617555,
|
||
|
|
"eval_steps": 500,
|
||
|
|
"global_step": 159,
|
||
|
|
"is_hyper_param_search": false,
|
||
|
|
"is_local_process_zero": true,
|
||
|
|
"is_world_process_zero": true,
|
||
|
|
"log_history": [
|
||
|
|
{
|
||
|
|
"epoch": 0.01,
|
||
|
|
"learning_rate": 3.125e-08,
|
||
|
|
"logits/chosen": -2.689429759979248,
|
||
|
|
"logits/rejected": -2.571552276611328,
|
||
|
|
"logps/chosen": -143.16458129882812,
|
||
|
|
"logps/rejected": -203.93856811523438,
|
||
|
|
"loss": 0.6931,
|
||
|
|
"rewards/accuracies": 0.0,
|
||
|
|
"rewards/chosen": 0.0,
|
||
|
|
"rewards/margins": 0.0,
|
||
|
|
"rewards/rejected": 0.0,
|
||
|
|
"step": 1
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.06,
|
||
|
|
"learning_rate": 3.1249999999999997e-07,
|
||
|
|
"logits/chosen": -2.708827257156372,
|
||
|
|
"logits/rejected": -2.686070680618286,
|
||
|
|
"logps/chosen": -237.31149291992188,
|
||
|
|
"logps/rejected": -247.18511962890625,
|
||
|
|
"loss": 0.6899,
|
||
|
|
"rewards/accuracies": 0.4375,
|
||
|
|
"rewards/chosen": -0.01768648810684681,
|
||
|
|
"rewards/margins": -0.0019140999065712094,
|
||
|
|
"rewards/rejected": -0.015772389248013496,
|
||
|
|
"step": 10
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.13,
|
||
|
|
"learning_rate": 4.990353313429303e-07,
|
||
|
|
"logits/chosen": -2.6622233390808105,
|
||
|
|
"logits/rejected": -2.6616415977478027,
|
||
|
|
"logps/chosen": -279.48223876953125,
|
||
|
|
"logps/rejected": -328.67034912109375,
|
||
|
|
"loss": 0.6672,
|
||
|
|
"rewards/accuracies": 0.518750011920929,
|
||
|
|
"rewards/chosen": -0.3544561564922333,
|
||
|
|
"rewards/margins": 0.08781943470239639,
|
||
|
|
"rewards/rejected": -0.4422755837440491,
|
||
|
|
"step": 20
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.19,
|
||
|
|
"learning_rate": 4.882681251368548e-07,
|
||
|
|
"logits/chosen": -2.530141830444336,
|
||
|
|
"logits/rejected": -2.4791650772094727,
|
||
|
|
"logps/chosen": -242.94580078125,
|
||
|
|
"logps/rejected": -304.83319091796875,
|
||
|
|
"loss": 0.6578,
|
||
|
|
"rewards/accuracies": 0.606249988079071,
|
||
|
|
"rewards/chosen": -0.23640844225883484,
|
||
|
|
"rewards/margins": 0.2698945105075836,
|
||
|
|
"rewards/rejected": -0.5063029527664185,
|
||
|
|
"step": 30
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.25,
|
||
|
|
"learning_rate": 4.6604720940421207e-07,
|
||
|
|
"logits/chosen": -2.3857040405273438,
|
||
|
|
"logits/rejected": -2.350623846054077,
|
||
|
|
"logps/chosen": -262.49359130859375,
|
||
|
|
"logps/rejected": -348.61285400390625,
|
||
|
|
"loss": 0.6375,
|
||
|
|
"rewards/accuracies": 0.612500011920929,
|
||
|
|
"rewards/chosen": -0.4203917384147644,
|
||
|
|
"rewards/margins": 0.2930702269077301,
|
||
|
|
"rewards/rejected": -0.7134619951248169,
|
||
|
|
"step": 40
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.31,
|
||
|
|
"learning_rate": 4.3344075855595097e-07,
|
||
|
|
"logits/chosen": -2.2722487449645996,
|
||
|
|
"logits/rejected": -2.2434167861938477,
|
||
|
|
"logps/chosen": -265.3743591308594,
|
||
|
|
"logps/rejected": -326.30474853515625,
|
||
|
|
"loss": 0.6153,
|
||
|
|
"rewards/accuracies": 0.5375000238418579,
|
||
|
|
"rewards/chosen": -0.47392672300338745,
|
||
|
|
"rewards/margins": 0.19795864820480347,
|
||
|
|
"rewards/rejected": -0.6718853712081909,
|
||
|
|
"step": 50
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.38,
|
||
|
|
"learning_rate": 3.920161866827889e-07,
|
||
|
|
"logits/chosen": -2.1601130962371826,
|
||
|
|
"logits/rejected": -2.137726306915283,
|
||
|
|
"logps/chosen": -290.22393798828125,
|
||
|
|
"logps/rejected": -320.2867736816406,
|
||
|
|
"loss": 0.6209,
|
||
|
|
"rewards/accuracies": 0.625,
|
||
|
|
"rewards/chosen": -0.5836684703826904,
|
||
|
|
"rewards/margins": 0.20530056953430176,
|
||
|
|
"rewards/rejected": -0.7889690399169922,
|
||
|
|
"step": 60
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.44,
|
||
|
|
"learning_rate": 3.4376480090239047e-07,
|
||
|
|
"logits/chosen": -2.1158602237701416,
|
||
|
|
"logits/rejected": -2.0663974285125732,
|
||
|
|
"logps/chosen": -261.1648864746094,
|
||
|
|
"logps/rejected": -325.1148681640625,
|
||
|
|
"loss": 0.629,
|
||
|
|
"rewards/accuracies": 0.637499988079071,
|
||
|
|
"rewards/chosen": -0.49344220757484436,
|
||
|
|
"rewards/margins": 0.26634153723716736,
|
||
|
|
"rewards/rejected": -0.7597836852073669,
|
||
|
|
"step": 70
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5,
|
||
|
|
"learning_rate": 2.910060778827554e-07,
|
||
|
|
"logits/chosen": -2.0561118125915527,
|
||
|
|
"logits/rejected": -2.0465025901794434,
|
||
|
|
"logps/chosen": -259.88214111328125,
|
||
|
|
"logps/rejected": -330.5330505371094,
|
||
|
|
"loss": 0.5935,
|
||
|
|
"rewards/accuracies": 0.643750011920929,
|
||
|
|
"rewards/chosen": -0.6631507873535156,
|
||
|
|
"rewards/margins": 0.2730056643486023,
|
||
|
|
"rewards/rejected": -0.9361563920974731,
|
||
|
|
"step": 80
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.56,
|
||
|
|
"learning_rate": 2.3627616503391812e-07,
|
||
|
|
"logits/chosen": -2.163104295730591,
|
||
|
|
"logits/rejected": -2.1310901641845703,
|
||
|
|
"logps/chosen": -251.44955444335938,
|
||
|
|
"logps/rejected": -355.19989013671875,
|
||
|
|
"loss": 0.6082,
|
||
|
|
"rewards/accuracies": 0.637499988079071,
|
||
|
|
"rewards/chosen": -0.5009629726409912,
|
||
|
|
"rewards/margins": 0.4423709511756897,
|
||
|
|
"rewards/rejected": -0.9433339834213257,
|
||
|
|
"step": 90
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.63,
|
||
|
|
"learning_rate": 1.8220596619089573e-07,
|
||
|
|
"logits/chosen": -2.061826229095459,
|
||
|
|
"logits/rejected": -2.0707743167877197,
|
||
|
|
"logps/chosen": -275.9271545410156,
|
||
|
|
"logps/rejected": -368.3133850097656,
|
||
|
|
"loss": 0.6033,
|
||
|
|
"rewards/accuracies": 0.699999988079071,
|
||
|
|
"rewards/chosen": -0.5269793272018433,
|
||
|
|
"rewards/margins": 0.3954086899757385,
|
||
|
|
"rewards/rejected": -0.9223880767822266,
|
||
|
|
"step": 100
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.69,
|
||
|
|
"learning_rate": 1.3139467229135998e-07,
|
||
|
|
"logits/chosen": -2.105185031890869,
|
||
|
|
"logits/rejected": -2.079219341278076,
|
||
|
|
"logps/chosen": -246.35910034179688,
|
||
|
|
"logps/rejected": -346.08642578125,
|
||
|
|
"loss": 0.594,
|
||
|
|
"rewards/accuracies": 0.6625000238418579,
|
||
|
|
"rewards/chosen": -0.5222919583320618,
|
||
|
|
"rewards/margins": 0.4267689287662506,
|
||
|
|
"rewards/rejected": -0.9490607976913452,
|
||
|
|
"step": 110
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.75,
|
||
|
|
"learning_rate": 8.628481651367875e-08,
|
||
|
|
"logits/chosen": -2.107372760772705,
|
||
|
|
"logits/rejected": -2.0185680389404297,
|
||
|
|
"logps/chosen": -271.4783935546875,
|
||
|
|
"logps/rejected": -370.3096923828125,
|
||
|
|
"loss": 0.5906,
|
||
|
|
"rewards/accuracies": 0.6875,
|
||
|
|
"rewards/chosen": -0.5639175176620483,
|
||
|
|
"rewards/margins": 0.45396023988723755,
|
||
|
|
"rewards/rejected": -1.0178776979446411,
|
||
|
|
"step": 120
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.82,
|
||
|
|
"learning_rate": 4.904486005914027e-08,
|
||
|
|
"logits/chosen": -2.119300365447998,
|
||
|
|
"logits/rejected": -2.106173515319824,
|
||
|
|
"logps/chosen": -282.1510009765625,
|
||
|
|
"logps/rejected": -376.592041015625,
|
||
|
|
"loss": 0.5895,
|
||
|
|
"rewards/accuracies": 0.643750011920929,
|
||
|
|
"rewards/chosen": -0.6762362718582153,
|
||
|
|
"rewards/margins": 0.3490751385688782,
|
||
|
|
"rewards/rejected": -1.0253114700317383,
|
||
|
|
"step": 130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.88,
|
||
|
|
"learning_rate": 2.1464952759020856e-08,
|
||
|
|
"logits/chosen": -2.127058267593384,
|
||
|
|
"logits/rejected": -2.0316877365112305,
|
||
|
|
"logps/chosen": -290.1690979003906,
|
||
|
|
"logps/rejected": -389.51116943359375,
|
||
|
|
"loss": 0.5877,
|
||
|
|
"rewards/accuracies": 0.699999988079071,
|
||
|
|
"rewards/chosen": -0.6831713318824768,
|
||
|
|
"rewards/margins": 0.4633623957633972,
|
||
|
|
"rewards/rejected": -1.146533727645874,
|
||
|
|
"step": 140
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.94,
|
||
|
|
"learning_rate": 4.8708793644441086e-09,
|
||
|
|
"logits/chosen": -2.0759072303771973,
|
||
|
|
"logits/rejected": -2.021393299102783,
|
||
|
|
"logps/chosen": -278.86285400390625,
|
||
|
|
"logps/rejected": -323.4399108886719,
|
||
|
|
"loss": 0.5954,
|
||
|
|
"rewards/accuracies": 0.6499999761581421,
|
||
|
|
"rewards/chosen": -0.6692460775375366,
|
||
|
|
"rewards/margins": 0.32712554931640625,
|
||
|
|
"rewards/rejected": -0.9963716268539429,
|
||
|
|
"step": 150
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0,
|
||
|
|
"step": 159,
|
||
|
|
"total_flos": 0.0,
|
||
|
|
"train_loss": 0.6192928140268386,
|
||
|
|
"train_runtime": 2654.4881,
|
||
|
|
"train_samples_per_second": 7.677,
|
||
|
|
"train_steps_per_second": 0.06
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"logging_steps": 10,
|
||
|
|
"max_steps": 159,
|
||
|
|
"num_input_tokens_seen": 0,
|
||
|
|
"num_train_epochs": 1,
|
||
|
|
"save_steps": 100,
|
||
|
|
"total_flos": 0.0,
|
||
|
|
"train_batch_size": 8,
|
||
|
|
"trial_name": null,
|
||
|
|
"trial_params": null
|
||
|
|
}
|