1553 lines
57 KiB
JSON
1553 lines
57 KiB
JSON
|
|
{
|
||
|
|
"best_global_step": null,
|
||
|
|
"best_metric": null,
|
||
|
|
"best_model_checkpoint": null,
|
||
|
|
"epoch": 1.0,
|
||
|
|
"eval_steps": 100,
|
||
|
|
"global_step": 340,
|
||
|
|
"is_hyper_param_search": false,
|
||
|
|
"is_local_process_zero": true,
|
||
|
|
"is_world_process_zero": true,
|
||
|
|
"log_history": [
|
||
|
|
{
|
||
|
|
"epoch": 0.0029411764705882353,
|
||
|
|
"grad_norm": 2.3687267303466797,
|
||
|
|
"kl/avg_steps": 0.0,
|
||
|
|
"kl/beta": 0.009999999776482582,
|
||
|
|
"kl/n_epsilon_steps": 0.5,
|
||
|
|
"kl/p_epsilon_steps": 0.5,
|
||
|
|
"learning_rate": 0.0,
|
||
|
|
"logits/chosen": -0.5232092142105103,
|
||
|
|
"logits/rejected": -0.36964714527130127,
|
||
|
|
"logps/chosen": -69.28079223632812,
|
||
|
|
"logps/ref_chosen": -69.2831802368164,
|
||
|
|
"logps/ref_rejected": -69.74366760253906,
|
||
|
|
"logps/rejected": -69.7318344116211,
|
||
|
|
"loss": 0.6932,
|
||
|
|
"rewards/accuracies": 0.515625,
|
||
|
|
"rewards/chosen": 9.683193638920784e-06,
|
||
|
|
"rewards/margins": -0.0001216536620631814,
|
||
|
|
"rewards/rejected": 0.00013133684115018696,
|
||
|
|
"step": 1
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.014705882352941176,
|
||
|
|
"grad_norm": 2.401517868041992,
|
||
|
|
"kl/avg_steps": 0.001953125,
|
||
|
|
"kl/beta": 0.009997854940593243,
|
||
|
|
"kl/n_epsilon_steps": 0.498046875,
|
||
|
|
"kl/p_epsilon_steps": 0.5,
|
||
|
|
"learning_rate": 5.88235294117647e-08,
|
||
|
|
"logits/chosen": -0.5336302518844604,
|
||
|
|
"logits/rejected": -0.41014784574508667,
|
||
|
|
"logps/chosen": -75.71084594726562,
|
||
|
|
"logps/ref_chosen": -75.70054626464844,
|
||
|
|
"logps/ref_rejected": -81.47293090820312,
|
||
|
|
"logps/rejected": -81.47822570800781,
|
||
|
|
"loss": 0.6932,
|
||
|
|
"rewards/accuracies": 0.505859375,
|
||
|
|
"rewards/chosen": -0.00011636512499535456,
|
||
|
|
"rewards/margins": -7.826486398698762e-05,
|
||
|
|
"rewards/rejected": -3.8100268284324557e-05,
|
||
|
|
"step": 5
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.029411764705882353,
|
||
|
|
"grad_norm": 2.312957525253296,
|
||
|
|
"kl/avg_steps": -0.05312500149011612,
|
||
|
|
"kl/beta": 0.010005339980125427,
|
||
|
|
"kl/n_epsilon_steps": 0.5234375,
|
||
|
|
"kl/p_epsilon_steps": 0.4703125059604645,
|
||
|
|
"learning_rate": 1.3235294117647057e-07,
|
||
|
|
"logits/chosen": -0.5401719808578491,
|
||
|
|
"logits/rejected": -0.4321846067905426,
|
||
|
|
"logps/chosen": -77.008544921875,
|
||
|
|
"logps/ref_chosen": -77.0025405883789,
|
||
|
|
"logps/ref_rejected": -82.64138793945312,
|
||
|
|
"logps/rejected": -82.64922332763672,
|
||
|
|
"loss": 0.6932,
|
||
|
|
"rewards/accuracies": 0.4765625,
|
||
|
|
"rewards/chosen": -7.36937508918345e-05,
|
||
|
|
"rewards/margins": -9.958527698472608e-06,
|
||
|
|
"rewards/rejected": -6.373519863700494e-05,
|
||
|
|
"step": 10
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.04411764705882353,
|
||
|
|
"grad_norm": 2.890460968017578,
|
||
|
|
"kl/avg_steps": 0.12968750298023224,
|
||
|
|
"kl/beta": 0.010008977726101875,
|
||
|
|
"kl/n_epsilon_steps": 0.43437498807907104,
|
||
|
|
"kl/p_epsilon_steps": 0.5640624761581421,
|
||
|
|
"learning_rate": 2.0588235294117645e-07,
|
||
|
|
"logits/chosen": -0.5125764608383179,
|
||
|
|
"logits/rejected": -0.4432317316532135,
|
||
|
|
"logps/chosen": -70.82783508300781,
|
||
|
|
"logps/ref_chosen": -70.83788299560547,
|
||
|
|
"logps/ref_rejected": -87.43305206298828,
|
||
|
|
"logps/rejected": -87.48735809326172,
|
||
|
|
"loss": 0.6928,
|
||
|
|
"rewards/accuracies": 0.5718749761581421,
|
||
|
|
"rewards/chosen": 8.707816596142948e-05,
|
||
|
|
"rewards/margins": 0.0006134368595667183,
|
||
|
|
"rewards/rejected": -0.0005263587227091193,
|
||
|
|
"step": 15
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.058823529411764705,
|
||
|
|
"grad_norm": 2.124864101409912,
|
||
|
|
"kl/avg_steps": 0.265625,
|
||
|
|
"kl/beta": 0.009920386597514153,
|
||
|
|
"kl/n_epsilon_steps": 0.3656249940395355,
|
||
|
|
"kl/p_epsilon_steps": 0.6312500238418579,
|
||
|
|
"learning_rate": 2.7941176470588235e-07,
|
||
|
|
"logits/chosen": -0.5464522242546082,
|
||
|
|
"logits/rejected": -0.4405369162559509,
|
||
|
|
"logps/chosen": -70.1437759399414,
|
||
|
|
"logps/ref_chosen": -70.1697006225586,
|
||
|
|
"logps/ref_rejected": -82.27420806884766,
|
||
|
|
"logps/rejected": -82.44139099121094,
|
||
|
|
"loss": 0.6922,
|
||
|
|
"rewards/accuracies": 0.6499999761581421,
|
||
|
|
"rewards/chosen": 0.00024056310940068215,
|
||
|
|
"rewards/margins": 0.0018755672499537468,
|
||
|
|
"rewards/rejected": -0.0016350041842088103,
|
||
|
|
"step": 20
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.07352941176470588,
|
||
|
|
"grad_norm": 2.521970510482788,
|
||
|
|
"kl/avg_steps": 0.543749988079071,
|
||
|
|
"kl/beta": 0.009726567193865776,
|
||
|
|
"kl/n_epsilon_steps": 0.22812500596046448,
|
||
|
|
"kl/p_epsilon_steps": 0.7718750238418579,
|
||
|
|
"learning_rate": 3.529411764705882e-07,
|
||
|
|
"logits/chosen": -0.568504273891449,
|
||
|
|
"logits/rejected": -0.4329379200935364,
|
||
|
|
"logps/chosen": -74.4179458618164,
|
||
|
|
"logps/ref_chosen": -74.5040283203125,
|
||
|
|
"logps/ref_rejected": -89.5297622680664,
|
||
|
|
"logps/rejected": -90.02223205566406,
|
||
|
|
"loss": 0.6904,
|
||
|
|
"rewards/accuracies": 0.7796875238418579,
|
||
|
|
"rewards/chosen": 0.0008119211415760219,
|
||
|
|
"rewards/margins": 0.005549114663153887,
|
||
|
|
"rewards/rejected": -0.00473719323053956,
|
||
|
|
"step": 25
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.08823529411764706,
|
||
|
|
"grad_norm": 2.3963418006896973,
|
||
|
|
"kl/avg_steps": 0.567187488079071,
|
||
|
|
"kl/beta": 0.00945484172552824,
|
||
|
|
"kl/n_epsilon_steps": 0.21562500298023224,
|
||
|
|
"kl/p_epsilon_steps": 0.7828124761581421,
|
||
|
|
"learning_rate": 4.264705882352941e-07,
|
||
|
|
"logits/chosen": -0.6653466820716858,
|
||
|
|
"logits/rejected": -0.49282917380332947,
|
||
|
|
"logps/chosen": -76.55107879638672,
|
||
|
|
"logps/ref_chosen": -76.60227966308594,
|
||
|
|
"logps/ref_rejected": -82.36322784423828,
|
||
|
|
"logps/rejected": -83.71476745605469,
|
||
|
|
"loss": 0.6867,
|
||
|
|
"rewards/accuracies": 0.8125,
|
||
|
|
"rewards/chosen": 0.0004493276646826416,
|
||
|
|
"rewards/margins": 0.01309473067522049,
|
||
|
|
"rewards/rejected": -0.012645403854548931,
|
||
|
|
"step": 30
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.10294117647058823,
|
||
|
|
"grad_norm": 2.311098337173462,
|
||
|
|
"kl/avg_steps": 0.5546875,
|
||
|
|
"kl/beta": 0.009198471903800964,
|
||
|
|
"kl/n_epsilon_steps": 0.22187499701976776,
|
||
|
|
"kl/p_epsilon_steps": 0.776562511920929,
|
||
|
|
"learning_rate": 5e-07,
|
||
|
|
"logits/chosen": -0.6610927581787109,
|
||
|
|
"logits/rejected": -0.5268033146858215,
|
||
|
|
"logps/chosen": -76.14710998535156,
|
||
|
|
"logps/ref_chosen": -75.79379272460938,
|
||
|
|
"logps/ref_rejected": -83.69039154052734,
|
||
|
|
"logps/rejected": -86.21320343017578,
|
||
|
|
"loss": 0.6835,
|
||
|
|
"rewards/accuracies": 0.809374988079071,
|
||
|
|
"rewards/chosen": -0.003281622426584363,
|
||
|
|
"rewards/margins": 0.019690800458192825,
|
||
|
|
"rewards/rejected": -0.02297242358326912,
|
||
|
|
"step": 35
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.11764705882352941,
|
||
|
|
"grad_norm": 2.681466817855835,
|
||
|
|
"kl/avg_steps": 0.4078125059604645,
|
||
|
|
"kl/beta": 0.008961381390690804,
|
||
|
|
"kl/n_epsilon_steps": 0.2953124940395355,
|
||
|
|
"kl/p_epsilon_steps": 0.703125,
|
||
|
|
"learning_rate": 4.996706849759452e-07,
|
||
|
|
"logits/chosen": -0.8570469617843628,
|
||
|
|
"logits/rejected": -0.7218376398086548,
|
||
|
|
"logps/chosen": -77.57659149169922,
|
||
|
|
"logps/ref_chosen": -75.21812438964844,
|
||
|
|
"logps/ref_rejected": -86.6792984008789,
|
||
|
|
"logps/rejected": -93.75047302246094,
|
||
|
|
"loss": 0.6732,
|
||
|
|
"rewards/accuracies": 0.78125,
|
||
|
|
"rewards/chosen": -0.021187324076890945,
|
||
|
|
"rewards/margins": 0.04168969392776489,
|
||
|
|
"rewards/rejected": -0.06287702172994614,
|
||
|
|
"step": 40
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1323529411764706,
|
||
|
|
"grad_norm": 3.2158050537109375,
|
||
|
|
"kl/avg_steps": 0.3187499940395355,
|
||
|
|
"kl/beta": 0.008803511038422585,
|
||
|
|
"kl/n_epsilon_steps": 0.34062498807907104,
|
||
|
|
"kl/p_epsilon_steps": 0.659375011920929,
|
||
|
|
"learning_rate": 4.986836074908615e-07,
|
||
|
|
"logits/chosen": -0.9623914957046509,
|
||
|
|
"logits/rejected": -0.8303581476211548,
|
||
|
|
"logps/chosen": -82.83192443847656,
|
||
|
|
"logps/ref_chosen": -77.2712173461914,
|
||
|
|
"logps/ref_rejected": -91.67030334472656,
|
||
|
|
"logps/rejected": -102.5731201171875,
|
||
|
|
"loss": 0.6715,
|
||
|
|
"rewards/accuracies": 0.7515624761581421,
|
||
|
|
"rewards/chosen": -0.049075882881879807,
|
||
|
|
"rewards/margins": 0.04632042720913887,
|
||
|
|
"rewards/rejected": -0.09539631009101868,
|
||
|
|
"step": 45
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.14705882352941177,
|
||
|
|
"grad_norm": 3.3687705993652344,
|
||
|
|
"kl/avg_steps": 0.20624999701976776,
|
||
|
|
"kl/beta": 0.008690183982253075,
|
||
|
|
"kl/n_epsilon_steps": 0.3968749940395355,
|
||
|
|
"kl/p_epsilon_steps": 0.6031249761581421,
|
||
|
|
"learning_rate": 4.970413680203148e-07,
|
||
|
|
"logits/chosen": -1.0613696575164795,
|
||
|
|
"logits/rejected": -0.921216607093811,
|
||
|
|
"logps/chosen": -82.58769226074219,
|
||
|
|
"logps/ref_chosen": -73.91633605957031,
|
||
|
|
"logps/ref_rejected": -79.92402648925781,
|
||
|
|
"logps/rejected": -94.2342758178711,
|
||
|
|
"loss": 0.6712,
|
||
|
|
"rewards/accuracies": 0.698437511920929,
|
||
|
|
"rewards/chosen": -0.07562652230262756,
|
||
|
|
"rewards/margins": 0.048155996948480606,
|
||
|
|
"rewards/rejected": -0.12378251552581787,
|
||
|
|
"step": 50
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.16176470588235295,
|
||
|
|
"grad_norm": 4.448400020599365,
|
||
|
|
"kl/avg_steps": 0.2109375,
|
||
|
|
"kl/beta": 0.00860314816236496,
|
||
|
|
"kl/n_epsilon_steps": 0.39375001192092896,
|
||
|
|
"kl/p_epsilon_steps": 0.604687511920929,
|
||
|
|
"learning_rate": 4.947482930773511e-07,
|
||
|
|
"logits/chosen": -1.1757243871688843,
|
||
|
|
"logits/rejected": -1.0121644735336304,
|
||
|
|
"logps/chosen": -91.91180419921875,
|
||
|
|
"logps/ref_chosen": -79.74378204345703,
|
||
|
|
"logps/ref_rejected": -83.18132019042969,
|
||
|
|
"logps/rejected": -103.12516021728516,
|
||
|
|
"loss": 0.6639,
|
||
|
|
"rewards/accuracies": 0.6859375238418579,
|
||
|
|
"rewards/chosen": -0.10501708835363388,
|
||
|
|
"rewards/margins": 0.06571148335933685,
|
||
|
|
"rewards/rejected": -0.17072856426239014,
|
||
|
|
"step": 55
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.17647058823529413,
|
||
|
|
"grad_norm": 3.918736219406128,
|
||
|
|
"kl/avg_steps": 0.171875,
|
||
|
|
"kl/beta": 0.008520014584064484,
|
||
|
|
"kl/n_epsilon_steps": 0.4140625,
|
||
|
|
"kl/p_epsilon_steps": 0.5859375,
|
||
|
|
"learning_rate": 4.918104238142103e-07,
|
||
|
|
"logits/chosen": -1.2249476909637451,
|
||
|
|
"logits/rejected": -1.1036134958267212,
|
||
|
|
"logps/chosen": -98.29524993896484,
|
||
|
|
"logps/ref_chosen": -81.61141967773438,
|
||
|
|
"logps/ref_rejected": -80.947998046875,
|
||
|
|
"logps/rejected": -105.27479553222656,
|
||
|
|
"loss": 0.6663,
|
||
|
|
"rewards/accuracies": 0.660937488079071,
|
||
|
|
"rewards/chosen": -0.14256855845451355,
|
||
|
|
"rewards/margins": 0.06371048837900162,
|
||
|
|
"rewards/rejected": -0.20627903938293457,
|
||
|
|
"step": 60
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.19117647058823528,
|
||
|
|
"grad_norm": 3.5865535736083984,
|
||
|
|
"kl/avg_steps": 0.27656251192092896,
|
||
|
|
"kl/beta": 0.00841777864843607,
|
||
|
|
"kl/n_epsilon_steps": 0.3609375059604645,
|
||
|
|
"kl/p_epsilon_steps": 0.637499988079071,
|
||
|
|
"learning_rate": 4.882355001067891e-07,
|
||
|
|
"logits/chosen": -1.2322965860366821,
|
||
|
|
"logits/rejected": -1.151759147644043,
|
||
|
|
"logps/chosen": -91.71420288085938,
|
||
|
|
"logps/ref_chosen": -75.09439849853516,
|
||
|
|
"logps/ref_rejected": -87.96830749511719,
|
||
|
|
"logps/rejected": -117.06733703613281,
|
||
|
|
"loss": 0.6479,
|
||
|
|
"rewards/accuracies": 0.6875,
|
||
|
|
"rewards/chosen": -0.14002391695976257,
|
||
|
|
"rewards/margins": 0.10341048240661621,
|
||
|
|
"rewards/rejected": -0.24343439936637878,
|
||
|
|
"step": 65
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.20588235294117646,
|
||
|
|
"grad_norm": 3.871297836303711,
|
||
|
|
"kl/avg_steps": 0.28437501192092896,
|
||
|
|
"kl/beta": 0.008305966854095459,
|
||
|
|
"kl/n_epsilon_steps": 0.3578124940395355,
|
||
|
|
"kl/p_epsilon_steps": 0.6421874761581421,
|
||
|
|
"learning_rate": 4.840329401637809e-07,
|
||
|
|
"logits/chosen": -1.2796670198440552,
|
||
|
|
"logits/rejected": -1.1985622644424438,
|
||
|
|
"logps/chosen": -89.69293975830078,
|
||
|
|
"logps/ref_chosen": -70.07804870605469,
|
||
|
|
"logps/ref_rejected": -88.98612976074219,
|
||
|
|
"logps/rejected": -122.0387954711914,
|
||
|
|
"loss": 0.6462,
|
||
|
|
"rewards/accuracies": 0.7265625,
|
||
|
|
"rewards/chosen": -0.16316808760166168,
|
||
|
|
"rewards/margins": 0.10986582934856415,
|
||
|
|
"rewards/rejected": -0.27303391695022583,
|
||
|
|
"step": 70
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.22058823529411764,
|
||
|
|
"grad_norm": 3.9685800075531006,
|
||
|
|
"kl/avg_steps": 0.1875,
|
||
|
|
"kl/beta": 0.008209030143916607,
|
||
|
|
"kl/n_epsilon_steps": 0.40625,
|
||
|
|
"kl/p_epsilon_steps": 0.59375,
|
||
|
|
"learning_rate": 4.792138157142157e-07,
|
||
|
|
"logits/chosen": -1.2629064321517944,
|
||
|
|
"logits/rejected": -1.1684788465499878,
|
||
|
|
"logps/chosen": -101.08387756347656,
|
||
|
|
"logps/ref_chosen": -77.74958801269531,
|
||
|
|
"logps/ref_rejected": -82.17206573486328,
|
||
|
|
"logps/rejected": -117.42021179199219,
|
||
|
|
"loss": 0.6538,
|
||
|
|
"rewards/accuracies": 0.6796875,
|
||
|
|
"rewards/chosen": -0.191951185464859,
|
||
|
|
"rewards/margins": 0.09596569836139679,
|
||
|
|
"rewards/rejected": -0.2879168391227722,
|
||
|
|
"step": 75
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.23529411764705882,
|
||
|
|
"grad_norm": 4.582348823547363,
|
||
|
|
"kl/avg_steps": 0.24375000596046448,
|
||
|
|
"kl/beta": 0.008118118159472942,
|
||
|
|
"kl/n_epsilon_steps": 0.37812501192092896,
|
||
|
|
"kl/p_epsilon_steps": 0.621874988079071,
|
||
|
|
"learning_rate": 4.737908228387656e-07,
|
||
|
|
"logits/chosen": -1.2720203399658203,
|
||
|
|
"logits/rejected": -1.218477725982666,
|
||
|
|
"logps/chosen": -107.53079986572266,
|
||
|
|
"logps/ref_chosen": -81.88478088378906,
|
||
|
|
"logps/ref_rejected": -90.519775390625,
|
||
|
|
"logps/rejected": -131.16079711914062,
|
||
|
|
"loss": 0.6438,
|
||
|
|
"rewards/accuracies": 0.6875,
|
||
|
|
"rewards/chosen": -0.20838662981987,
|
||
|
|
"rewards/margins": 0.11963216215372086,
|
||
|
|
"rewards/rejected": -0.32801881432533264,
|
||
|
|
"step": 80
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.25,
|
||
|
|
"grad_norm": 3.6524829864501953,
|
||
|
|
"kl/avg_steps": 0.2515625059604645,
|
||
|
|
"kl/beta": 0.0080325398594141,
|
||
|
|
"kl/n_epsilon_steps": 0.37187498807907104,
|
||
|
|
"kl/p_epsilon_steps": 0.6234375238418579,
|
||
|
|
"learning_rate": 4.6777824852166437e-07,
|
||
|
|
"logits/chosen": -1.2834303379058838,
|
||
|
|
"logits/rejected": -1.198880672454834,
|
||
|
|
"logps/chosen": -95.5977554321289,
|
||
|
|
"logps/ref_chosen": -70.41683197021484,
|
||
|
|
"logps/ref_rejected": -78.02936553955078,
|
||
|
|
"logps/rejected": -118.98405456542969,
|
||
|
|
"loss": 0.6418,
|
||
|
|
"rewards/accuracies": 0.684374988079071,
|
||
|
|
"rewards/chosen": -0.20263484120368958,
|
||
|
|
"rewards/margins": 0.12456460297107697,
|
||
|
|
"rewards/rejected": -0.32719942927360535,
|
||
|
|
"step": 85
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2647058823529412,
|
||
|
|
"grad_norm": 4.22735071182251,
|
||
|
|
"kl/avg_steps": 0.2874999940395355,
|
||
|
|
"kl/beta": 0.007919726893305779,
|
||
|
|
"kl/n_epsilon_steps": 0.35468751192092896,
|
||
|
|
"kl/p_epsilon_steps": 0.6421874761581421,
|
||
|
|
"learning_rate": 4.611919330113591e-07,
|
||
|
|
"logits/chosen": -1.2632228136062622,
|
||
|
|
"logits/rejected": -1.2163931131362915,
|
||
|
|
"logps/chosen": -105.8456039428711,
|
||
|
|
"logps/ref_chosen": -76.6160888671875,
|
||
|
|
"logps/ref_rejected": -89.49937438964844,
|
||
|
|
"logps/rejected": -136.4986572265625,
|
||
|
|
"loss": 0.6361,
|
||
|
|
"rewards/accuracies": 0.706250011920929,
|
||
|
|
"rewards/chosen": -0.23172405362129211,
|
||
|
|
"rewards/margins": 0.13836422562599182,
|
||
|
|
"rewards/rejected": -0.37008827924728394,
|
||
|
|
"step": 90
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.27941176470588236,
|
||
|
|
"grad_norm": 4.236695766448975,
|
||
|
|
"kl/avg_steps": 0.2750000059604645,
|
||
|
|
"kl/beta": 0.0078009068965911865,
|
||
|
|
"kl/n_epsilon_steps": 0.36250001192092896,
|
||
|
|
"kl/p_epsilon_steps": 0.637499988079071,
|
||
|
|
"learning_rate": 4.5404922808905543e-07,
|
||
|
|
"logits/chosen": -1.2625572681427002,
|
||
|
|
"logits/rejected": -1.2011988162994385,
|
||
|
|
"logps/chosen": -104.29510498046875,
|
||
|
|
"logps/ref_chosen": -73.50260162353516,
|
||
|
|
"logps/ref_rejected": -76.48811340332031,
|
||
|
|
"logps/rejected": -124.16410827636719,
|
||
|
|
"loss": 0.6411,
|
||
|
|
"rewards/accuracies": 0.7015625238418579,
|
||
|
|
"rewards/chosen": -0.24040882289409637,
|
||
|
|
"rewards/margins": 0.1294616460800171,
|
||
|
|
"rewards/rejected": -0.36987045407295227,
|
||
|
|
"step": 95
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.29411764705882354,
|
||
|
|
"grad_norm": 4.193100452423096,
|
||
|
|
"kl/avg_steps": 0.3656249940395355,
|
||
|
|
"kl/beta": 0.0076876478269696236,
|
||
|
|
"kl/n_epsilon_steps": 0.31718748807907104,
|
||
|
|
"kl/p_epsilon_steps": 0.682812511920929,
|
||
|
|
"learning_rate": 4.4636895135509966e-07,
|
||
|
|
"logits/chosen": -1.2317556142807007,
|
||
|
|
"logits/rejected": -1.1946831941604614,
|
||
|
|
"logps/chosen": -103.88249206542969,
|
||
|
|
"logps/ref_chosen": -72.6116714477539,
|
||
|
|
"logps/ref_rejected": -81.16241455078125,
|
||
|
|
"logps/rejected": -134.57403564453125,
|
||
|
|
"loss": 0.6236,
|
||
|
|
"rewards/accuracies": 0.746874988079071,
|
||
|
|
"rewards/chosen": -0.24038386344909668,
|
||
|
|
"rewards/margins": 0.16778317093849182,
|
||
|
|
"rewards/rejected": -0.4081670641899109,
|
||
|
|
"step": 100
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.29411764705882354,
|
||
|
|
"eval_kl/n_epsilon_steps": 0.4236111044883728,
|
||
|
|
"eval_kl/p_epsilon_steps": 0.5759548544883728,
|
||
|
|
"eval_logits/chosen": -1.2261141538619995,
|
||
|
|
"eval_logits/rejected": -1.1807267665863037,
|
||
|
|
"eval_logps/chosen": -127.64717864990234,
|
||
|
|
"eval_logps/ref_chosen": -87.82356262207031,
|
||
|
|
"eval_logps/ref_rejected": -82.81887817382812,
|
||
|
|
"eval_logps/rejected": -134.3017578125,
|
||
|
|
"eval_loss": 0.6636335253715515,
|
||
|
|
"eval_rewards/accuracies": 0.6124131679534912,
|
||
|
|
"eval_rewards/chosen": -0.30329596996307373,
|
||
|
|
"eval_rewards/margins": 0.08622786402702332,
|
||
|
|
"eval_rewards/rejected": -0.38952386379241943,
|
||
|
|
"eval_runtime": 22.4366,
|
||
|
|
"eval_samples_per_second": 104.249,
|
||
|
|
"eval_steps_per_second": 0.847,
|
||
|
|
"step": 100
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3088235294117647,
|
||
|
|
"grad_norm": 4.206778049468994,
|
||
|
|
"kl/avg_steps": 0.28125,
|
||
|
|
"kl/beta": 0.007563448045402765,
|
||
|
|
"kl/n_epsilon_steps": 0.359375,
|
||
|
|
"kl/p_epsilon_steps": 0.640625,
|
||
|
|
"learning_rate": 4.381713366536311e-07,
|
||
|
|
"logits/chosen": -1.2459900379180908,
|
||
|
|
"logits/rejected": -1.1858142614364624,
|
||
|
|
"logps/chosen": -112.22574615478516,
|
||
|
|
"logps/ref_chosen": -76.5867919921875,
|
||
|
|
"logps/ref_rejected": -84.33440399169922,
|
||
|
|
"logps/rejected": -140.7528533935547,
|
||
|
|
"loss": 0.6304,
|
||
|
|
"rewards/accuracies": 0.699999988079071,
|
||
|
|
"rewards/chosen": -0.2697572112083435,
|
||
|
|
"rewards/margins": 0.15455064177513123,
|
||
|
|
"rewards/rejected": -0.42430782318115234,
|
||
|
|
"step": 105
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3235294117647059,
|
||
|
|
"grad_norm": 5.154345989227295,
|
||
|
|
"kl/avg_steps": 0.28437501192092896,
|
||
|
|
"kl/beta": 0.007447557989507914,
|
||
|
|
"kl/n_epsilon_steps": 0.3578124940395355,
|
||
|
|
"kl/p_epsilon_steps": 0.6421874761581421,
|
||
|
|
"learning_rate": 4.2947798076611047e-07,
|
||
|
|
"logits/chosen": -1.2248286008834839,
|
||
|
|
"logits/rejected": -1.1694958209991455,
|
||
|
|
"logps/chosen": -118.81462097167969,
|
||
|
|
"logps/ref_chosen": -78.16385650634766,
|
||
|
|
"logps/ref_rejected": -83.61200714111328,
|
||
|
|
"logps/rejected": -146.4515838623047,
|
||
|
|
"loss": 0.6294,
|
||
|
|
"rewards/accuracies": 0.692187488079071,
|
||
|
|
"rewards/chosen": -0.3029385209083557,
|
||
|
|
"rewards/margins": 0.16256316006183624,
|
||
|
|
"rewards/rejected": -0.46550169587135315,
|
||
|
|
"step": 110
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3382352941176471,
|
||
|
|
"grad_norm": 5.148464679718018,
|
||
|
|
"kl/avg_steps": 0.35468751192092896,
|
||
|
|
"kl/beta": 0.007336863782256842,
|
||
|
|
"kl/n_epsilon_steps": 0.3218750059604645,
|
||
|
|
"kl/p_epsilon_steps": 0.676562488079071,
|
||
|
|
"learning_rate": 4.203117865141635e-07,
|
||
|
|
"logits/chosen": -1.2170436382293701,
|
||
|
|
"logits/rejected": -1.1504008769989014,
|
||
|
|
"logps/chosen": -118.66552734375,
|
||
|
|
"logps/ref_chosen": -74.8998031616211,
|
||
|
|
"logps/ref_rejected": -85.2784652709961,
|
||
|
|
"logps/rejected": -156.08580017089844,
|
||
|
|
"loss": 0.618,
|
||
|
|
"rewards/accuracies": 0.7265625,
|
||
|
|
"rewards/chosen": -0.3210408687591553,
|
||
|
|
"rewards/margins": 0.19527961313724518,
|
||
|
|
"rewards/rejected": -0.516320526599884,
|
||
|
|
"step": 115
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.35294117647058826,
|
||
|
|
"grad_norm": 5.226547718048096,
|
||
|
|
"kl/avg_steps": 0.265625,
|
||
|
|
"kl/beta": 0.007222268730401993,
|
||
|
|
"kl/n_epsilon_steps": 0.3671875,
|
||
|
|
"kl/p_epsilon_steps": 0.6328125,
|
||
|
|
"learning_rate": 4.106969024216348e-07,
|
||
|
|
"logits/chosen": -1.1989049911499023,
|
||
|
|
"logits/rejected": -1.1565752029418945,
|
||
|
|
"logps/chosen": -119.46983337402344,
|
||
|
|
"logps/ref_chosen": -73.58607482910156,
|
||
|
|
"logps/ref_rejected": -85.84365844726562,
|
||
|
|
"logps/rejected": -158.82113647460938,
|
||
|
|
"loss": 0.6197,
|
||
|
|
"rewards/accuracies": 0.698437511920929,
|
||
|
|
"rewards/chosen": -0.3315422534942627,
|
||
|
|
"rewards/margins": 0.19263319671154022,
|
||
|
|
"rewards/rejected": -0.5241755247116089,
|
||
|
|
"step": 120
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.36764705882352944,
|
||
|
|
"grad_norm": 5.764974594116211,
|
||
|
|
"kl/avg_steps": 0.30937498807907104,
|
||
|
|
"kl/beta": 0.007117821369320154,
|
||
|
|
"kl/n_epsilon_steps": 0.3453125059604645,
|
||
|
|
"kl/p_epsilon_steps": 0.6546875238418579,
|
||
|
|
"learning_rate": 4.006586590948141e-07,
|
||
|
|
"logits/chosen": -1.1903568506240845,
|
||
|
|
"logits/rejected": -1.130084753036499,
|
||
|
|
"logps/chosen": -130.13233947753906,
|
||
|
|
"logps/ref_chosen": -80.25770568847656,
|
||
|
|
"logps/ref_rejected": -81.34100341796875,
|
||
|
|
"logps/rejected": -161.29537963867188,
|
||
|
|
"loss": 0.6139,
|
||
|
|
"rewards/accuracies": 0.715624988079071,
|
||
|
|
"rewards/chosen": -0.3550013303756714,
|
||
|
|
"rewards/margins": 0.21088404953479767,
|
||
|
|
"rewards/rejected": -0.5658854246139526,
|
||
|
|
"step": 125
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.38235294117647056,
|
||
|
|
"grad_norm": 5.23793363571167,
|
||
|
|
"kl/avg_steps": 0.2718749940395355,
|
||
|
|
"kl/beta": 0.007017888128757477,
|
||
|
|
"kl/n_epsilon_steps": 0.36406248807907104,
|
||
|
|
"kl/p_epsilon_steps": 0.635937511920929,
|
||
|
|
"learning_rate": 3.9022350248844246e-07,
|
||
|
|
"logits/chosen": -1.1651326417922974,
|
||
|
|
"logits/rejected": -1.1263306140899658,
|
||
|
|
"logps/chosen": -128.90423583984375,
|
||
|
|
"logps/ref_chosen": -74.67902374267578,
|
||
|
|
"logps/ref_rejected": -84.1854019165039,
|
||
|
|
"logps/rejected": -167.0582733154297,
|
||
|
|
"loss": 0.6209,
|
||
|
|
"rewards/accuracies": 0.6812499761581421,
|
||
|
|
"rewards/chosen": -0.3804508149623871,
|
||
|
|
"rewards/margins": 0.19761842489242554,
|
||
|
|
"rewards/rejected": -0.5780693292617798,
|
||
|
|
"step": 130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.39705882352941174,
|
||
|
|
"grad_norm": 5.766234874725342,
|
||
|
|
"kl/avg_steps": 0.32499998807907104,
|
||
|
|
"kl/beta": 0.006909938994795084,
|
||
|
|
"kl/n_epsilon_steps": 0.3375000059604645,
|
||
|
|
"kl/p_epsilon_steps": 0.6625000238418579,
|
||
|
|
"learning_rate": 3.794189242333106e-07,
|
||
|
|
"logits/chosen": -1.1625608205795288,
|
||
|
|
"logits/rejected": -1.0963513851165771,
|
||
|
|
"logps/chosen": -138.46322631835938,
|
||
|
|
"logps/ref_chosen": -81.2975845336914,
|
||
|
|
"logps/ref_rejected": -87.74832916259766,
|
||
|
|
"logps/rejected": -174.3780059814453,
|
||
|
|
"loss": 0.6207,
|
||
|
|
"rewards/accuracies": 0.699999988079071,
|
||
|
|
"rewards/chosen": -0.39489540457725525,
|
||
|
|
"rewards/margins": 0.20013752579689026,
|
||
|
|
"rewards/rejected": -0.5950329303741455,
|
||
|
|
"step": 135
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4117647058823529,
|
||
|
|
"grad_norm": 4.888461112976074,
|
||
|
|
"kl/avg_steps": 0.3828125,
|
||
|
|
"kl/beta": 0.006796327419579029,
|
||
|
|
"kl/n_epsilon_steps": 0.30781251192092896,
|
||
|
|
"kl/p_epsilon_steps": 0.690625011920929,
|
||
|
|
"learning_rate": 3.6827338920900253e-07,
|
||
|
|
"logits/chosen": -1.172863483428955,
|
||
|
|
"logits/rejected": -1.106768012046814,
|
||
|
|
"logps/chosen": -121.53498840332031,
|
||
|
|
"logps/ref_chosen": -71.20382690429688,
|
||
|
|
"logps/ref_rejected": -84.62137603759766,
|
||
|
|
"logps/rejected": -170.3011474609375,
|
||
|
|
"loss": 0.6009,
|
||
|
|
"rewards/accuracies": 0.7281249761581421,
|
||
|
|
"rewards/chosen": -0.3417351245880127,
|
||
|
|
"rewards/margins": 0.2368461638689041,
|
||
|
|
"rewards/rejected": -0.5785812139511108,
|
||
|
|
"step": 140
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4264705882352941,
|
||
|
|
"grad_norm": 5.264912128448486,
|
||
|
|
"kl/avg_steps": 0.390625,
|
||
|
|
"kl/beta": 0.0066697075963020325,
|
||
|
|
"kl/n_epsilon_steps": 0.3046875,
|
||
|
|
"kl/p_epsilon_steps": 0.6953125,
|
||
|
|
"learning_rate": 3.568162605525952e-07,
|
||
|
|
"logits/chosen": -1.1562573909759521,
|
||
|
|
"logits/rejected": -1.0977518558502197,
|
||
|
|
"logps/chosen": -132.38858032226562,
|
||
|
|
"logps/ref_chosen": -78.03334045410156,
|
||
|
|
"logps/ref_rejected": -86.95343017578125,
|
||
|
|
"logps/rejected": -178.3859405517578,
|
||
|
|
"loss": 0.5992,
|
||
|
|
"rewards/accuracies": 0.7421875,
|
||
|
|
"rewards/chosen": -0.3619559407234192,
|
||
|
|
"rewards/margins": 0.2438311129808426,
|
||
|
|
"rewards/rejected": -0.6057869791984558,
|
||
|
|
"step": 145
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4411764705882353,
|
||
|
|
"grad_norm": 5.746659278869629,
|
||
|
|
"kl/avg_steps": 0.328125,
|
||
|
|
"kl/beta": 0.00654013454914093,
|
||
|
|
"kl/n_epsilon_steps": 0.3359375,
|
||
|
|
"kl/p_epsilon_steps": 0.6640625,
|
||
|
|
"learning_rate": 3.4507772230088147e-07,
|
||
|
|
"logits/chosen": -1.0893394947052002,
|
||
|
|
"logits/rejected": -1.0510880947113037,
|
||
|
|
"logps/chosen": -136.75088500976562,
|
||
|
|
"logps/ref_chosen": -73.69932556152344,
|
||
|
|
"logps/ref_rejected": -86.18521118164062,
|
||
|
|
"logps/rejected": -184.1068878173828,
|
||
|
|
"loss": 0.614,
|
||
|
|
"rewards/accuracies": 0.7015625238418579,
|
||
|
|
"rewards/chosen": -0.4121219515800476,
|
||
|
|
"rewards/margins": 0.22426274418830872,
|
||
|
|
"rewards/rejected": -0.6363847255706787,
|
||
|
|
"step": 150
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.45588235294117646,
|
||
|
|
"grad_norm": 5.235478401184082,
|
||
|
|
"kl/avg_steps": 0.34375,
|
||
|
|
"kl/beta": 0.006440295372158289,
|
||
|
|
"kl/n_epsilon_steps": 0.328125,
|
||
|
|
"kl/p_epsilon_steps": 0.671875,
|
||
|
|
"learning_rate": 3.3308869986991487e-07,
|
||
|
|
"logits/chosen": -1.0995477437973022,
|
||
|
|
"logits/rejected": -1.031232237815857,
|
||
|
|
"logps/chosen": -144.14666748046875,
|
||
|
|
"logps/ref_chosen": -78.81468963623047,
|
||
|
|
"logps/ref_rejected": -82.33976745605469,
|
||
|
|
"logps/rejected": -183.3446502685547,
|
||
|
|
"loss": 0.6118,
|
||
|
|
"rewards/accuracies": 0.703125,
|
||
|
|
"rewards/chosen": -0.4207354485988617,
|
||
|
|
"rewards/margins": 0.22600603103637695,
|
||
|
|
"rewards/rejected": -0.646741509437561,
|
||
|
|
"step": 155
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.47058823529411764,
|
||
|
|
"grad_norm": 5.473912239074707,
|
||
|
|
"kl/avg_steps": 0.3812499940395355,
|
||
|
|
"kl/beta": 0.0063315341249108315,
|
||
|
|
"kl/n_epsilon_steps": 0.30937498807907104,
|
||
|
|
"kl/p_epsilon_steps": 0.690625011920929,
|
||
|
|
"learning_rate": 3.208807785813777e-07,
|
||
|
|
"logits/chosen": -1.080108880996704,
|
||
|
|
"logits/rejected": -1.0139106512069702,
|
||
|
|
"logps/chosen": -132.09349060058594,
|
||
|
|
"logps/ref_chosen": -71.280517578125,
|
||
|
|
"logps/ref_rejected": -86.39788818359375,
|
||
|
|
"logps/rejected": -188.9801788330078,
|
||
|
|
"loss": 0.5951,
|
||
|
|
"rewards/accuracies": 0.7203124761581421,
|
||
|
|
"rewards/chosen": -0.3846417963504791,
|
||
|
|
"rewards/margins": 0.26059722900390625,
|
||
|
|
"rewards/rejected": -0.645238995552063,
|
||
|
|
"step": 160
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4852941176470588,
|
||
|
|
"grad_norm": 5.492692947387695,
|
||
|
|
"kl/avg_steps": 0.33125001192092896,
|
||
|
|
"kl/beta": 0.006211251951754093,
|
||
|
|
"kl/n_epsilon_steps": 0.3343749940395355,
|
||
|
|
"kl/p_epsilon_steps": 0.6656249761581421,
|
||
|
|
"learning_rate": 3.084861204504122e-07,
|
||
|
|
"logits/chosen": -1.064668893814087,
|
||
|
|
"logits/rejected": -0.9995222091674805,
|
||
|
|
"logps/chosen": -148.7730255126953,
|
||
|
|
"logps/ref_chosen": -79.35147094726562,
|
||
|
|
"logps/ref_rejected": -83.44163513183594,
|
||
|
|
"logps/rejected": -191.25628662109375,
|
||
|
|
"loss": 0.608,
|
||
|
|
"rewards/accuracies": 0.7093750238418579,
|
||
|
|
"rewards/chosen": -0.430931031703949,
|
||
|
|
"rewards/margins": 0.23459258675575256,
|
||
|
|
"rewards/rejected": -0.6655236482620239,
|
||
|
|
"step": 165
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5,
|
||
|
|
"grad_norm": 5.870633602142334,
|
||
|
|
"kl/avg_steps": 0.3499999940395355,
|
||
|
|
"kl/beta": 0.006105704233050346,
|
||
|
|
"kl/n_epsilon_steps": 0.32343751192092896,
|
||
|
|
"kl/p_epsilon_steps": 0.6734374761581421,
|
||
|
|
"learning_rate": 2.959373794541426e-07,
|
||
|
|
"logits/chosen": -1.0475225448608398,
|
||
|
|
"logits/rejected": -1.006306529045105,
|
||
|
|
"logps/chosen": -147.1262664794922,
|
||
|
|
"logps/ref_chosen": -75.01612854003906,
|
||
|
|
"logps/ref_rejected": -86.07945251464844,
|
||
|
|
"logps/rejected": -199.21173095703125,
|
||
|
|
"loss": 0.6032,
|
||
|
|
"rewards/accuracies": 0.7124999761581421,
|
||
|
|
"rewards/chosen": -0.4399870038032532,
|
||
|
|
"rewards/margins": 0.2465648353099823,
|
||
|
|
"rewards/rejected": -0.6865519285202026,
|
||
|
|
"step": 170
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5147058823529411,
|
||
|
|
"grad_norm": 5.422708988189697,
|
||
|
|
"kl/avg_steps": 0.41093748807907104,
|
||
|
|
"kl/beta": 0.0059935590252280235,
|
||
|
|
"kl/n_epsilon_steps": 0.29374998807907104,
|
||
|
|
"kl/p_epsilon_steps": 0.7046874761581421,
|
||
|
|
"learning_rate": 2.8326761550411346e-07,
|
||
|
|
"logits/chosen": -1.037719488143921,
|
||
|
|
"logits/rejected": -0.9720247387886047,
|
||
|
|
"logps/chosen": -149.66494750976562,
|
||
|
|
"logps/ref_chosen": -75.85931396484375,
|
||
|
|
"logps/ref_rejected": -88.4763412475586,
|
||
|
|
"logps/rejected": -206.0808563232422,
|
||
|
|
"loss": 0.5969,
|
||
|
|
"rewards/accuracies": 0.729687511920929,
|
||
|
|
"rewards/chosen": -0.4419892430305481,
|
||
|
|
"rewards/margins": 0.2586033344268799,
|
||
|
|
"rewards/rejected": -0.7005925178527832,
|
||
|
|
"step": 175
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5294117647058824,
|
||
|
|
"grad_norm": 5.140402793884277,
|
||
|
|
"kl/avg_steps": 0.30937498807907104,
|
||
|
|
"kl/beta": 0.005884683690965176,
|
||
|
|
"kl/n_epsilon_steps": 0.3453125059604645,
|
||
|
|
"kl/p_epsilon_steps": 0.6546875238418579,
|
||
|
|
"learning_rate": 2.7051020734928443e-07,
|
||
|
|
"logits/chosen": -1.0452353954315186,
|
||
|
|
"logits/rejected": -0.968549370765686,
|
||
|
|
"logps/chosen": -143.4625701904297,
|
||
|
|
"logps/ref_chosen": -74.5296859741211,
|
||
|
|
"logps/ref_rejected": -78.44059753417969,
|
||
|
|
"logps/rejected": -188.22622680664062,
|
||
|
|
"loss": 0.6093,
|
||
|
|
"rewards/accuracies": 0.692187488079071,
|
||
|
|
"rewards/chosen": -0.4056355059146881,
|
||
|
|
"rewards/margins": 0.23657508194446564,
|
||
|
|
"rewards/rejected": -0.6422106027603149,
|
||
|
|
"step": 180
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5441176470588235,
|
||
|
|
"grad_norm": 5.03032112121582,
|
||
|
|
"kl/avg_steps": 0.3984375,
|
||
|
|
"kl/beta": 0.005778872407972813,
|
||
|
|
"kl/n_epsilon_steps": 0.30000001192092896,
|
||
|
|
"kl/p_epsilon_steps": 0.698437511920929,
|
||
|
|
"learning_rate": 2.5769876463904263e-07,
|
||
|
|
"logits/chosen": -1.0298566818237305,
|
||
|
|
"logits/rejected": -0.9755008816719055,
|
||
|
|
"logps/chosen": -137.92031860351562,
|
||
|
|
"logps/ref_chosen": -70.28861999511719,
|
||
|
|
"logps/ref_rejected": -85.20851135253906,
|
||
|
|
"logps/rejected": -197.1123809814453,
|
||
|
|
"loss": 0.5968,
|
||
|
|
"rewards/accuracies": 0.7328125238418579,
|
||
|
|
"rewards/chosen": -0.3904454708099365,
|
||
|
|
"rewards/margins": 0.25234130024909973,
|
||
|
|
"rewards/rejected": -0.6427868008613586,
|
||
|
|
"step": 185
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5588235294117647,
|
||
|
|
"grad_norm": 6.057910919189453,
|
||
|
|
"kl/avg_steps": 0.359375,
|
||
|
|
"kl/beta": 0.005678877234458923,
|
||
|
|
"kl/n_epsilon_steps": 0.3203125,
|
||
|
|
"kl/p_epsilon_steps": 0.6796875,
|
||
|
|
"learning_rate": 2.4486703937790243e-07,
|
||
|
|
"logits/chosen": -1.0044220685958862,
|
||
|
|
"logits/rejected": -0.9527886509895325,
|
||
|
|
"logps/chosen": -151.2794952392578,
|
||
|
|
"logps/ref_chosen": -75.0217514038086,
|
||
|
|
"logps/ref_rejected": -90.4836654663086,
|
||
|
|
"logps/rejected": -214.67868041992188,
|
||
|
|
"loss": 0.5951,
|
||
|
|
"rewards/accuracies": 0.731249988079071,
|
||
|
|
"rewards/chosen": -0.43261224031448364,
|
||
|
|
"rewards/margins": 0.2681336998939514,
|
||
|
|
"rewards/rejected": -0.7007459402084351,
|
||
|
|
"step": 190
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5735294117647058,
|
||
|
|
"grad_norm": 5.573934555053711,
|
||
|
|
"kl/avg_steps": 0.3499999940395355,
|
||
|
|
"kl/beta": 0.005573070142418146,
|
||
|
|
"kl/n_epsilon_steps": 0.32499998807907104,
|
||
|
|
"kl/p_epsilon_steps": 0.675000011920929,
|
||
|
|
"learning_rate": 2.320488370051681e-07,
|
||
|
|
"logits/chosen": -0.989575207233429,
|
||
|
|
"logits/rejected": -0.9092248678207397,
|
||
|
|
"logps/chosen": -154.51953125,
|
||
|
|
"logps/ref_chosen": -73.42979431152344,
|
||
|
|
"logps/ref_rejected": -84.43408203125,
|
||
|
|
"logps/rejected": -211.646240234375,
|
||
|
|
"loss": 0.6019,
|
||
|
|
"rewards/accuracies": 0.721875011920929,
|
||
|
|
"rewards/chosen": -0.4517548084259033,
|
||
|
|
"rewards/margins": 0.25306665897369385,
|
||
|
|
"rewards/rejected": -0.7048214673995972,
|
||
|
|
"step": 195
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5882352941176471,
|
||
|
|
"grad_norm": 5.598110198974609,
|
||
|
|
"kl/avg_steps": 0.35468751192092896,
|
||
|
|
"kl/beta": 0.005477838683873415,
|
||
|
|
"kl/n_epsilon_steps": 0.3218750059604645,
|
||
|
|
"kl/p_epsilon_steps": 0.676562488079071,
|
||
|
|
"learning_rate": 2.192779273338215e-07,
|
||
|
|
"logits/chosen": -0.9810283780097961,
|
||
|
|
"logits/rejected": -0.8921745419502258,
|
||
|
|
"logps/chosen": -159.2919464111328,
|
||
|
|
"logps/ref_chosen": -77.8104019165039,
|
||
|
|
"logps/ref_rejected": -86.66553497314453,
|
||
|
|
"logps/rejected": -219.63626098632812,
|
||
|
|
"loss": 0.5934,
|
||
|
|
"rewards/accuracies": 0.7109375,
|
||
|
|
"rewards/chosen": -0.4459984302520752,
|
||
|
|
"rewards/margins": 0.27772727608680725,
|
||
|
|
"rewards/rejected": -0.7237256765365601,
|
||
|
|
"step": 200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5882352941176471,
|
||
|
|
"eval_kl/n_epsilon_steps": 0.3958333432674408,
|
||
|
|
"eval_kl/p_epsilon_steps": 0.6037326455116272,
|
||
|
|
"eval_logits/chosen": -0.9817464351654053,
|
||
|
|
"eval_logits/rejected": -0.8951107859611511,
|
||
|
|
"eval_logps/chosen": -182.90843200683594,
|
||
|
|
"eval_logps/ref_chosen": -87.82356262207031,
|
||
|
|
"eval_logps/ref_rejected": -82.81887817382812,
|
||
|
|
"eval_logps/rejected": -209.30340576171875,
|
||
|
|
"eval_loss": 0.6429124474525452,
|
||
|
|
"eval_rewards/accuracies": 0.6323784589767456,
|
||
|
|
"eval_rewards/chosen": -0.5156466960906982,
|
||
|
|
"eval_rewards/margins": 0.1663396805524826,
|
||
|
|
"eval_rewards/rejected": -0.6819863319396973,
|
||
|
|
"eval_runtime": 22.339,
|
||
|
|
"eval_samples_per_second": 104.705,
|
||
|
|
"eval_steps_per_second": 0.851,
|
||
|
|
"step": 200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6029411764705882,
|
||
|
|
"grad_norm": 5.395305156707764,
|
||
|
|
"kl/avg_steps": 0.31562501192092896,
|
||
|
|
"kl/beta": 0.005382629111409187,
|
||
|
|
"kl/n_epsilon_steps": 0.3421874940395355,
|
||
|
|
"kl/p_epsilon_steps": 0.6578124761581421,
|
||
|
|
"learning_rate": 2.065879555832674e-07,
|
||
|
|
"logits/chosen": -0.9339988827705383,
|
||
|
|
"logits/rejected": -0.8349924087524414,
|
||
|
|
"logps/chosen": -150.00833129882812,
|
||
|
|
"logps/ref_chosen": -71.83072662353516,
|
||
|
|
"logps/ref_rejected": -78.26126861572266,
|
||
|
|
"logps/rejected": -207.3239288330078,
|
||
|
|
"loss": 0.5976,
|
||
|
|
"rewards/accuracies": 0.7015625238418579,
|
||
|
|
"rewards/chosen": -0.42064207792282104,
|
||
|
|
"rewards/margins": 0.26989927887916565,
|
||
|
|
"rewards/rejected": -0.6905413866043091,
|
||
|
|
"step": 205
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6176470588235294,
|
||
|
|
"grad_norm": 8.636336326599121,
|
||
|
|
"kl/avg_steps": 0.3343749940395355,
|
||
|
|
"kl/beta": 0.005294554866850376,
|
||
|
|
"kl/n_epsilon_steps": 0.33281248807907104,
|
||
|
|
"kl/p_epsilon_steps": 0.667187511920929,
|
||
|
|
"learning_rate": 1.9401235374032425e-07,
|
||
|
|
"logits/chosen": -0.940881073474884,
|
||
|
|
"logits/rejected": -0.835827648639679,
|
||
|
|
"logps/chosen": -169.9760284423828,
|
||
|
|
"logps/ref_chosen": -81.13362121582031,
|
||
|
|
"logps/ref_rejected": -83.91246032714844,
|
||
|
|
"logps/rejected": -226.44479370117188,
|
||
|
|
"loss": 0.5961,
|
||
|
|
"rewards/accuracies": 0.7124999761581421,
|
||
|
|
"rewards/chosen": -0.4700423777103424,
|
||
|
|
"rewards/margins": 0.28001874685287476,
|
||
|
|
"rewards/rejected": -0.7500611543655396,
|
||
|
|
"step": 210
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6323529411764706,
|
||
|
|
"grad_norm": 5.697958946228027,
|
||
|
|
"kl/avg_steps": 0.37968748807907104,
|
||
|
|
"kl/beta": 0.005207170732319355,
|
||
|
|
"kl/n_epsilon_steps": 0.30937498807907104,
|
||
|
|
"kl/p_epsilon_steps": 0.6890624761581421,
|
||
|
|
"learning_rate": 1.8158425248197928e-07,
|
||
|
|
"logits/chosen": -0.9595499038696289,
|
||
|
|
"logits/rejected": -0.8254610300064087,
|
||
|
|
"logps/chosen": -168.97909545898438,
|
||
|
|
"logps/ref_chosen": -79.5214614868164,
|
||
|
|
"logps/ref_rejected": -83.58778381347656,
|
||
|
|
"logps/rejected": -225.5334014892578,
|
||
|
|
"loss": 0.5994,
|
||
|
|
"rewards/accuracies": 0.737500011920929,
|
||
|
|
"rewards/chosen": -0.4653104245662689,
|
||
|
|
"rewards/margins": 0.2690308690071106,
|
||
|
|
"rewards/rejected": -0.7343412637710571,
|
||
|
|
"step": 215
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6470588235294118,
|
||
|
|
"grad_norm": 5.304469108581543,
|
||
|
|
"kl/avg_steps": 0.3343749940395355,
|
||
|
|
"kl/beta": 0.005111886188387871,
|
||
|
|
"kl/n_epsilon_steps": 0.33281248807907104,
|
||
|
|
"kl/p_epsilon_steps": 0.667187511920929,
|
||
|
|
"learning_rate": 1.6933639389195134e-07,
|
||
|
|
"logits/chosen": -0.9539089202880859,
|
||
|
|
"logits/rejected": -0.8665965795516968,
|
||
|
|
"logps/chosen": -166.537353515625,
|
||
|
|
"logps/ref_chosen": -81.25938415527344,
|
||
|
|
"logps/ref_rejected": -83.04185485839844,
|
||
|
|
"logps/rejected": -215.3668670654297,
|
||
|
|
"loss": 0.6056,
|
||
|
|
"rewards/accuracies": 0.723437488079071,
|
||
|
|
"rewards/chosen": -0.43559327721595764,
|
||
|
|
"rewards/margins": 0.2370072603225708,
|
||
|
|
"rewards/rejected": -0.6726005673408508,
|
||
|
|
"step": 220
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6617647058823529,
|
||
|
|
"grad_norm": 5.622444152832031,
|
||
|
|
"kl/avg_steps": 0.4468750059604645,
|
||
|
|
"kl/beta": 0.005018714815378189,
|
||
|
|
"kl/n_epsilon_steps": 0.27656251192092896,
|
||
|
|
"kl/p_epsilon_steps": 0.723437488079071,
|
||
|
|
"learning_rate": 1.573010452010098e-07,
|
||
|
|
"logits/chosen": -0.9484726190567017,
|
||
|
|
"logits/rejected": -0.8518384695053101,
|
||
|
|
"logps/chosen": -162.01535034179688,
|
||
|
|
"logps/ref_chosen": -77.427001953125,
|
||
|
|
"logps/ref_rejected": -89.23592376708984,
|
||
|
|
"logps/rejected": -233.6844024658203,
|
||
|
|
"loss": 0.5839,
|
||
|
|
"rewards/accuracies": 0.765625,
|
||
|
|
"rewards/chosen": -0.4237908720970154,
|
||
|
|
"rewards/margins": 0.2962331175804138,
|
||
|
|
"rewards/rejected": -0.7200239896774292,
|
||
|
|
"step": 225
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6764705882352942,
|
||
|
|
"grad_norm": 5.60673189163208,
|
||
|
|
"kl/avg_steps": 0.48124998807907104,
|
||
|
|
"kl/beta": 0.004900630097836256,
|
||
|
|
"kl/n_epsilon_steps": 0.2593750059604645,
|
||
|
|
"kl/p_epsilon_steps": 0.7406250238418579,
|
||
|
|
"learning_rate": 1.4550991377830423e-07,
|
||
|
|
"logits/chosen": -0.9383388757705688,
|
||
|
|
"logits/rejected": -0.856258749961853,
|
||
|
|
"logps/chosen": -156.29066467285156,
|
||
|
|
"logps/ref_chosen": -70.1819839477539,
|
||
|
|
"logps/ref_rejected": -87.79248046875,
|
||
|
|
"logps/rejected": -232.82858276367188,
|
||
|
|
"loss": 0.5866,
|
||
|
|
"rewards/accuracies": 0.7671874761581421,
|
||
|
|
"rewards/chosen": -0.42099839448928833,
|
||
|
|
"rewards/margins": 0.2847110629081726,
|
||
|
|
"rewards/rejected": -0.7057094573974609,
|
||
|
|
"step": 230
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6911764705882353,
|
||
|
|
"grad_norm": 5.7163004875183105,
|
||
|
|
"kl/avg_steps": 0.4828124940395355,
|
||
|
|
"kl/beta": 0.004785512108355761,
|
||
|
|
"kl/n_epsilon_steps": 0.2578125,
|
||
|
|
"kl/p_epsilon_steps": 0.7406250238418579,
|
||
|
|
"learning_rate": 1.339940635976592e-07,
|
||
|
|
"logits/chosen": -0.8863986134529114,
|
||
|
|
"logits/rejected": -0.8059118390083313,
|
||
|
|
"logps/chosen": -174.5547637939453,
|
||
|
|
"logps/ref_chosen": -77.51251220703125,
|
||
|
|
"logps/ref_rejected": -89.81958770751953,
|
||
|
|
"logps/rejected": -251.2958526611328,
|
||
|
|
"loss": 0.583,
|
||
|
|
"rewards/accuracies": 0.7734375,
|
||
|
|
"rewards/chosen": -0.4634285569190979,
|
||
|
|
"rewards/margins": 0.30392760038375854,
|
||
|
|
"rewards/rejected": -0.7673560976982117,
|
||
|
|
"step": 235
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7058823529411765,
|
||
|
|
"grad_norm": 6.860780715942383,
|
||
|
|
"kl/avg_steps": 0.36250001192092896,
|
||
|
|
"kl/beta": 0.004683743230998516,
|
||
|
|
"kl/n_epsilon_steps": 0.3187499940395355,
|
||
|
|
"kl/p_epsilon_steps": 0.6812499761581421,
|
||
|
|
"learning_rate": 1.227838333989088e-07,
|
||
|
|
"logits/chosen": -0.8450605273246765,
|
||
|
|
"logits/rejected": -0.7272099256515503,
|
||
|
|
"logps/chosen": -177.47744750976562,
|
||
|
|
"logps/ref_chosen": -74.5803451538086,
|
||
|
|
"logps/ref_rejected": -81.81297302246094,
|
||
|
|
"logps/rejected": -243.76736450195312,
|
||
|
|
"loss": 0.5968,
|
||
|
|
"rewards/accuracies": 0.715624988079071,
|
||
|
|
"rewards/chosen": -0.4815599322319031,
|
||
|
|
"rewards/margins": 0.27244722843170166,
|
||
|
|
"rewards/rejected": -0.7540072202682495,
|
||
|
|
"step": 240
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7205882352941176,
|
||
|
|
"grad_norm": 5.382650852203369,
|
||
|
|
"kl/avg_steps": 0.4000000059604645,
|
||
|
|
"kl/beta": 0.004598929081112146,
|
||
|
|
"kl/n_epsilon_steps": 0.30000001192092896,
|
||
|
|
"kl/p_epsilon_steps": 0.699999988079071,
|
||
|
|
"learning_rate": 1.1190875675987355e-07,
|
||
|
|
"logits/chosen": -0.8378638029098511,
|
||
|
|
"logits/rejected": -0.7307332158088684,
|
||
|
|
"logps/chosen": -178.53826904296875,
|
||
|
|
"logps/ref_chosen": -76.56635284423828,
|
||
|
|
"logps/ref_rejected": -86.859130859375,
|
||
|
|
"logps/rejected": -255.5751495361328,
|
||
|
|
"loss": 0.5827,
|
||
|
|
"rewards/accuracies": 0.7281249761581421,
|
||
|
|
"rewards/chosen": -0.46838441491127014,
|
||
|
|
"rewards/margins": 0.30266332626342773,
|
||
|
|
"rewards/rejected": -0.7710477113723755,
|
||
|
|
"step": 245
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7352941176470589,
|
||
|
|
"grad_norm": 5.63203763961792,
|
||
|
|
"kl/avg_steps": 0.3499999940395355,
|
||
|
|
"kl/beta": 0.00451111001893878,
|
||
|
|
"kl/n_epsilon_steps": 0.32499998807907104,
|
||
|
|
"kl/p_epsilon_steps": 0.675000011920929,
|
||
|
|
"learning_rate": 1.0139748428955333e-07,
|
||
|
|
"logits/chosen": -0.8333392143249512,
|
||
|
|
"logits/rejected": -0.7355720400810242,
|
||
|
|
"logps/chosen": -183.86294555664062,
|
||
|
|
"logps/ref_chosen": -77.37183380126953,
|
||
|
|
"logps/ref_rejected": -79.96475219726562,
|
||
|
|
"logps/rejected": -237.01980590820312,
|
||
|
|
"loss": 0.6155,
|
||
|
|
"rewards/accuracies": 0.706250011920929,
|
||
|
|
"rewards/chosen": -0.4800783693790436,
|
||
|
|
"rewards/margins": 0.22398455440998077,
|
||
|
|
"rewards/rejected": -0.7040629982948303,
|
||
|
|
"step": 250
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.75,
|
||
|
|
"grad_norm": 5.822533130645752,
|
||
|
|
"kl/avg_steps": 0.35624998807907104,
|
||
|
|
"kl/beta": 0.004430105909705162,
|
||
|
|
"kl/n_epsilon_steps": 0.3218750059604645,
|
||
|
|
"kl/p_epsilon_steps": 0.6781250238418579,
|
||
|
|
"learning_rate": 9.127770814751932e-08,
|
||
|
|
"logits/chosen": -0.8416454195976257,
|
||
|
|
"logits/rejected": -0.7227948904037476,
|
||
|
|
"logps/chosen": -184.06822204589844,
|
||
|
|
"logps/ref_chosen": -79.62632751464844,
|
||
|
|
"logps/ref_rejected": -83.8196792602539,
|
||
|
|
"logps/rejected": -246.44998168945312,
|
||
|
|
"loss": 0.6013,
|
||
|
|
"rewards/accuracies": 0.699999988079071,
|
||
|
|
"rewards/chosen": -0.46239757537841797,
|
||
|
|
"rewards/margins": 0.25374993681907654,
|
||
|
|
"rewards/rejected": -0.7161475419998169,
|
||
|
|
"step": 255
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7647058823529411,
|
||
|
|
"grad_norm": 5.885540008544922,
|
||
|
|
"kl/avg_steps": 0.3687500059604645,
|
||
|
|
"kl/beta": 0.004350547678768635,
|
||
|
|
"kl/n_epsilon_steps": 0.31562501192092896,
|
||
|
|
"kl/p_epsilon_steps": 0.684374988079071,
|
||
|
|
"learning_rate": 8.15760890883607e-08,
|
||
|
|
"logits/chosen": -0.8616160154342651,
|
||
|
|
"logits/rejected": -0.7643041610717773,
|
||
|
|
"logps/chosen": -184.8510284423828,
|
||
|
|
"logps/ref_chosen": -80.03411865234375,
|
||
|
|
"logps/ref_rejected": -85.39453125,
|
||
|
|
"logps/rejected": -246.5211639404297,
|
||
|
|
"loss": 0.6056,
|
||
|
|
"rewards/accuracies": 0.7124999761581421,
|
||
|
|
"rewards/chosen": -0.4556017816066742,
|
||
|
|
"rewards/margins": 0.2411097288131714,
|
||
|
|
"rewards/rejected": -0.6967115998268127,
|
||
|
|
"step": 260
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7794117647058824,
|
||
|
|
"grad_norm": 5.461711883544922,
|
||
|
|
"kl/avg_steps": 0.3531250059604645,
|
||
|
|
"kl/beta": 0.0042734695598483086,
|
||
|
|
"kl/n_epsilon_steps": 0.32343751192092896,
|
||
|
|
"kl/p_epsilon_steps": 0.676562488079071,
|
||
|
|
"learning_rate": 7.231818622338822e-08,
|
||
|
|
"logits/chosen": -0.8387966156005859,
|
||
|
|
"logits/rejected": -0.7239198088645935,
|
||
|
|
"logps/chosen": -178.0113067626953,
|
||
|
|
"logps/ref_chosen": -76.63539123535156,
|
||
|
|
"logps/ref_rejected": -79.94613647460938,
|
||
|
|
"logps/rejected": -238.1660614013672,
|
||
|
|
"loss": 0.603,
|
||
|
|
"rewards/accuracies": 0.7109375,
|
||
|
|
"rewards/chosen": -0.432711660861969,
|
||
|
|
"rewards/margins": 0.2392859160900116,
|
||
|
|
"rewards/rejected": -0.6719975471496582,
|
||
|
|
"step": 265
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7941176470588235,
|
||
|
|
"grad_norm": 5.6931915283203125,
|
||
|
|
"kl/avg_steps": 0.3968749940395355,
|
||
|
|
"kl/beta": 0.004198429174721241,
|
||
|
|
"kl/n_epsilon_steps": 0.30156248807907104,
|
||
|
|
"kl/p_epsilon_steps": 0.698437511920929,
|
||
|
|
"learning_rate": 6.352838968463919e-08,
|
||
|
|
"logits/chosen": -0.8596851229667664,
|
||
|
|
"logits/rejected": -0.7193423509597778,
|
||
|
|
"logps/chosen": -173.6400604248047,
|
||
|
|
"logps/ref_chosen": -76.02762603759766,
|
||
|
|
"logps/ref_rejected": -80.83404541015625,
|
||
|
|
"logps/rejected": -236.97607421875,
|
||
|
|
"loss": 0.6021,
|
||
|
|
"rewards/accuracies": 0.731249988079071,
|
||
|
|
"rewards/chosen": -0.4092663824558258,
|
||
|
|
"rewards/margins": 0.24205096065998077,
|
||
|
|
"rewards/rejected": -0.6513173580169678,
|
||
|
|
"step": 270
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8088235294117647,
|
||
|
|
"grad_norm": 5.091865062713623,
|
||
|
|
"kl/avg_steps": 0.40625,
|
||
|
|
"kl/beta": 0.004111775197088718,
|
||
|
|
"kl/n_epsilon_steps": 0.296875,
|
||
|
|
"kl/p_epsilon_steps": 0.703125,
|
||
|
|
"learning_rate": 5.5229856368582376e-08,
|
||
|
|
"logits/chosen": -0.8502656817436218,
|
||
|
|
"logits/rejected": -0.7681766748428345,
|
||
|
|
"logps/chosen": -180.9755859375,
|
||
|
|
"logps/ref_chosen": -77.58733367919922,
|
||
|
|
"logps/ref_rejected": -88.50263214111328,
|
||
|
|
"logps/rejected": -254.04690551757812,
|
||
|
|
"loss": 0.5997,
|
||
|
|
"rewards/accuracies": 0.739062488079071,
|
||
|
|
"rewards/chosen": -0.4245019555091858,
|
||
|
|
"rewards/margins": 0.25200843811035156,
|
||
|
|
"rewards/rejected": -0.6765104532241821,
|
||
|
|
"step": 275
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8235294117647058,
|
||
|
|
"grad_norm": 5.737886905670166,
|
||
|
|
"kl/avg_steps": 0.42500001192092896,
|
||
|
|
"kl/beta": 0.004024769179522991,
|
||
|
|
"kl/n_epsilon_steps": 0.2874999940395355,
|
||
|
|
"kl/p_epsilon_steps": 0.7124999761581421,
|
||
|
|
"learning_rate": 4.7444448928806615e-08,
|
||
|
|
"logits/chosen": -0.876409649848938,
|
||
|
|
"logits/rejected": -0.7702105641365051,
|
||
|
|
"logps/chosen": -186.74009704589844,
|
||
|
|
"logps/ref_chosen": -81.46415710449219,
|
||
|
|
"logps/ref_rejected": -94.69911193847656,
|
||
|
|
"logps/rejected": -265.301025390625,
|
||
|
|
"loss": 0.5958,
|
||
|
|
"rewards/accuracies": 0.729687511920929,
|
||
|
|
"rewards/chosen": -0.4230107367038727,
|
||
|
|
"rewards/margins": 0.2593509256839752,
|
||
|
|
"rewards/rejected": -0.6823617219924927,
|
||
|
|
"step": 280
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8382352941176471,
|
||
|
|
"grad_norm": 5.05964469909668,
|
||
|
|
"kl/avg_steps": 0.4000000059604645,
|
||
|
|
"kl/beta": 0.003945710603147745,
|
||
|
|
"kl/n_epsilon_steps": 0.30000001192092896,
|
||
|
|
"kl/p_epsilon_steps": 0.699999988079071,
|
||
|
|
"learning_rate": 4.019267817841834e-08,
|
||
|
|
"logits/chosen": -0.8164280652999878,
|
||
|
|
"logits/rejected": -0.7374383211135864,
|
||
|
|
"logps/chosen": -183.66696166992188,
|
||
|
|
"logps/ref_chosen": -77.9266128540039,
|
||
|
|
"logps/ref_rejected": -85.77226257324219,
|
||
|
|
"logps/rejected": -251.9569854736328,
|
||
|
|
"loss": 0.6036,
|
||
|
|
"rewards/accuracies": 0.731249988079071,
|
||
|
|
"rewards/chosen": -0.41665568947792053,
|
||
|
|
"rewards/margins": 0.23489956557750702,
|
||
|
|
"rewards/rejected": -0.6515552997589111,
|
||
|
|
"step": 285
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8529411764705882,
|
||
|
|
"grad_norm": 4.995400428771973,
|
||
|
|
"kl/avg_steps": 0.37187498807907104,
|
||
|
|
"kl/beta": 0.003868584055453539,
|
||
|
|
"kl/n_epsilon_steps": 0.3140625059604645,
|
||
|
|
"kl/p_epsilon_steps": 0.6859375238418579,
|
||
|
|
"learning_rate": 3.349364905389032e-08,
|
||
|
|
"logits/chosen": -0.788993775844574,
|
||
|
|
"logits/rejected": -0.7104808688163757,
|
||
|
|
"logps/chosen": -178.77645874023438,
|
||
|
|
"logps/ref_chosen": -72.49942016601562,
|
||
|
|
"logps/ref_rejected": -83.77849578857422,
|
||
|
|
"logps/rejected": -253.64126586914062,
|
||
|
|
"loss": 0.6008,
|
||
|
|
"rewards/accuracies": 0.7250000238418579,
|
||
|
|
"rewards/chosen": -0.41073670983314514,
|
||
|
|
"rewards/margins": 0.24252267181873322,
|
||
|
|
"rewards/rejected": -0.6532593965530396,
|
||
|
|
"step": 290
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8676470588235294,
|
||
|
|
"grad_norm": 5.24601411819458,
|
||
|
|
"kl/avg_steps": 0.4000000059604645,
|
||
|
|
"kl/beta": 0.0037986349780112505,
|
||
|
|
"kl/n_epsilon_steps": 0.30000001192092896,
|
||
|
|
"kl/p_epsilon_steps": 0.699999988079071,
|
||
|
|
"learning_rate": 2.736501028272095e-08,
|
||
|
|
"logits/chosen": -0.7796621918678284,
|
||
|
|
"logits/rejected": -0.7315692901611328,
|
||
|
|
"logps/chosen": -182.55836486816406,
|
||
|
|
"logps/ref_chosen": -72.81735229492188,
|
||
|
|
"logps/ref_rejected": -91.62478637695312,
|
||
|
|
"logps/rejected": -265.3271789550781,
|
||
|
|
"loss": 0.6044,
|
||
|
|
"rewards/accuracies": 0.739062488079071,
|
||
|
|
"rewards/chosen": -0.4162219166755676,
|
||
|
|
"rewards/margins": 0.23918703198432922,
|
||
|
|
"rewards/rejected": -0.6554089784622192,
|
||
|
|
"step": 295
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8823529411764706,
|
||
|
|
"grad_norm": 5.059004306793213,
|
||
|
|
"kl/avg_steps": 0.39375001192092896,
|
||
|
|
"kl/beta": 0.003725191578269005,
|
||
|
|
"kl/n_epsilon_steps": 0.3031249940395355,
|
||
|
|
"kl/p_epsilon_steps": 0.6968749761581421,
|
||
|
|
"learning_rate": 2.1822907887504932e-08,
|
||
|
|
"logits/chosen": -0.7761374711990356,
|
||
|
|
"logits/rejected": -0.6450864672660828,
|
||
|
|
"logps/chosen": -178.6937255859375,
|
||
|
|
"logps/ref_chosen": -70.4697265625,
|
||
|
|
"logps/ref_rejected": -77.26274108886719,
|
||
|
|
"logps/rejected": -241.739013671875,
|
||
|
|
"loss": 0.6156,
|
||
|
|
"rewards/accuracies": 0.7265625,
|
||
|
|
"rewards/chosen": -0.40281882882118225,
|
||
|
|
"rewards/margins": 0.20612934231758118,
|
||
|
|
"rewards/rejected": -0.6089481115341187,
|
||
|
|
"step": 300
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8823529411764706,
|
||
|
|
"eval_kl/n_epsilon_steps": 0.3849826455116272,
|
||
|
|
"eval_kl/p_epsilon_steps": 0.6150173544883728,
|
||
|
|
"eval_logits/chosen": -0.8325175046920776,
|
||
|
|
"eval_logits/rejected": -0.7259347438812256,
|
||
|
|
"eval_logps/chosen": -208.17991638183594,
|
||
|
|
"eval_logps/ref_chosen": -87.82356262207031,
|
||
|
|
"eval_logps/ref_rejected": -82.81887817382812,
|
||
|
|
"eval_logps/rejected": -243.57456970214844,
|
||
|
|
"eval_loss": 0.6442785263061523,
|
||
|
|
"eval_rewards/accuracies": 0.6401909589767456,
|
||
|
|
"eval_rewards/chosen": -0.44304588437080383,
|
||
|
|
"eval_rewards/margins": 0.14540132880210876,
|
||
|
|
"eval_rewards/rejected": -0.5884472131729126,
|
||
|
|
"eval_runtime": 22.3967,
|
||
|
|
"eval_samples_per_second": 104.435,
|
||
|
|
"eval_steps_per_second": 0.848,
|
||
|
|
"step": 300
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8970588235294118,
|
||
|
|
"grad_norm": 5.704087734222412,
|
||
|
|
"kl/avg_steps": 0.375,
|
||
|
|
"kl/beta": 0.003651682287454605,
|
||
|
|
"kl/n_epsilon_steps": 0.3125,
|
||
|
|
"kl/p_epsilon_steps": 0.6875,
|
||
|
|
"learning_rate": 1.6881942648911074e-08,
|
||
|
|
"logits/chosen": -0.7806903719902039,
|
||
|
|
"logits/rejected": -0.7004286050796509,
|
||
|
|
"logps/chosen": -181.45826721191406,
|
||
|
|
"logps/ref_chosen": -75.5998764038086,
|
||
|
|
"logps/ref_rejected": -86.76122283935547,
|
||
|
|
"logps/rejected": -256.80096435546875,
|
||
|
|
"loss": 0.6049,
|
||
|
|
"rewards/accuracies": 0.7250000238418579,
|
||
|
|
"rewards/chosen": -0.3862135410308838,
|
||
|
|
"rewards/margins": 0.23094138503074646,
|
||
|
|
"rewards/rejected": -0.6171549558639526,
|
||
|
|
"step": 305
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9117647058823529,
|
||
|
|
"grad_norm": 5.218584060668945,
|
||
|
|
"kl/avg_steps": 0.3968749940395355,
|
||
|
|
"kl/beta": 0.0035780933685600758,
|
||
|
|
"kl/n_epsilon_steps": 0.30156248807907104,
|
||
|
|
"kl/p_epsilon_steps": 0.698437511920929,
|
||
|
|
"learning_rate": 1.2555131639630567e-08,
|
||
|
|
"logits/chosen": -0.7832438349723816,
|
||
|
|
"logits/rejected": -0.6719276309013367,
|
||
|
|
"logps/chosen": -191.44869995117188,
|
||
|
|
"logps/ref_chosen": -78.4868392944336,
|
||
|
|
"logps/ref_rejected": -83.08047485351562,
|
||
|
|
"logps/rejected": -258.40545654296875,
|
||
|
|
"loss": 0.6111,
|
||
|
|
"rewards/accuracies": 0.7265625,
|
||
|
|
"rewards/chosen": -0.4038282036781311,
|
||
|
|
"rewards/margins": 0.21982701122760773,
|
||
|
|
"rewards/rejected": -0.6236552000045776,
|
||
|
|
"step": 310
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9264705882352942,
|
||
|
|
"grad_norm": 6.10360860824585,
|
||
|
|
"kl/avg_steps": 0.3375000059604645,
|
||
|
|
"kl/beta": 0.0035165518056601286,
|
||
|
|
"kl/n_epsilon_steps": 0.33125001192092896,
|
||
|
|
"kl/p_epsilon_steps": 0.668749988079071,
|
||
|
|
"learning_rate": 8.85387393063622e-09,
|
||
|
|
"logits/chosen": -0.8057095408439636,
|
||
|
|
"logits/rejected": -0.7011617422103882,
|
||
|
|
"logps/chosen": -194.56436157226562,
|
||
|
|
"logps/ref_chosen": -79.54651641845703,
|
||
|
|
"logps/ref_rejected": -87.11808776855469,
|
||
|
|
"logps/rejected": -261.40032958984375,
|
||
|
|
"loss": 0.6153,
|
||
|
|
"rewards/accuracies": 0.699999988079071,
|
||
|
|
"rewards/chosen": -0.4042418897151947,
|
||
|
|
"rewards/margins": 0.20526555180549622,
|
||
|
|
"rewards/rejected": -0.6095074415206909,
|
||
|
|
"step": 315
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9411764705882353,
|
||
|
|
"grad_norm": 5.0830488204956055,
|
||
|
|
"kl/avg_steps": 0.28437501192092896,
|
||
|
|
"kl/beta": 0.0034615718759596348,
|
||
|
|
"kl/n_epsilon_steps": 0.3578124940395355,
|
||
|
|
"kl/p_epsilon_steps": 0.6421874761581421,
|
||
|
|
"learning_rate": 5.7879205600998296e-09,
|
||
|
|
"logits/chosen": -0.8048986196517944,
|
||
|
|
"logits/rejected": -0.6852750778198242,
|
||
|
|
"logps/chosen": -193.45582580566406,
|
||
|
|
"logps/ref_chosen": -78.56401062011719,
|
||
|
|
"logps/ref_rejected": -83.85292053222656,
|
||
|
|
"logps/rejected": -248.977783203125,
|
||
|
|
"loss": 0.6302,
|
||
|
|
"rewards/accuracies": 0.668749988079071,
|
||
|
|
"rewards/chosen": -0.39771518111228943,
|
||
|
|
"rewards/margins": 0.17076563835144043,
|
||
|
|
"rewards/rejected": -0.5684808492660522,
|
||
|
|
"step": 320
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9558823529411765,
|
||
|
|
"grad_norm": 5.110870361328125,
|
||
|
|
"kl/avg_steps": 0.3499999940395355,
|
||
|
|
"kl/beta": 0.0034066252410411835,
|
||
|
|
"kl/n_epsilon_steps": 0.32499998807907104,
|
||
|
|
"kl/p_epsilon_steps": 0.675000011920929,
|
||
|
|
"learning_rate": 3.3653488440851253e-09,
|
||
|
|
"logits/chosen": -0.7829563021659851,
|
||
|
|
"logits/rejected": -0.7219451665878296,
|
||
|
|
"logps/chosen": -183.75088500976562,
|
||
|
|
"logps/ref_chosen": -74.60850524902344,
|
||
|
|
"logps/ref_rejected": -86.81698608398438,
|
||
|
|
"logps/rejected": -254.2379150390625,
|
||
|
|
"loss": 0.6219,
|
||
|
|
"rewards/accuracies": 0.692187488079071,
|
||
|
|
"rewards/chosen": -0.3717408776283264,
|
||
|
|
"rewards/margins": 0.195209339261055,
|
||
|
|
"rewards/rejected": -0.5669502019882202,
|
||
|
|
"step": 325
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9705882352941176,
|
||
|
|
"grad_norm": 4.562494277954102,
|
||
|
|
"kl/avg_steps": 0.47343748807907104,
|
||
|
|
"kl/beta": 0.003342044074088335,
|
||
|
|
"kl/n_epsilon_steps": 0.26249998807907104,
|
||
|
|
"kl/p_epsilon_steps": 0.7359374761581421,
|
||
|
|
"learning_rate": 1.592541096695571e-09,
|
||
|
|
"logits/chosen": -0.7936745882034302,
|
||
|
|
"logits/rejected": -0.739700436592102,
|
||
|
|
"logps/chosen": -178.63034057617188,
|
||
|
|
"logps/ref_chosen": -74.63096618652344,
|
||
|
|
"logps/ref_rejected": -92.50404357910156,
|
||
|
|
"logps/rejected": -266.2847595214844,
|
||
|
|
"loss": 0.601,
|
||
|
|
"rewards/accuracies": 0.7578125,
|
||
|
|
"rewards/chosen": -0.34669384360313416,
|
||
|
|
"rewards/margins": 0.23011669516563416,
|
||
|
|
"rewards/rejected": -0.5768105387687683,
|
||
|
|
"step": 330
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9852941176470589,
|
||
|
|
"grad_norm": 4.651317596435547,
|
||
|
|
"kl/avg_steps": 0.35624998807907104,
|
||
|
|
"kl/beta": 0.003271129447966814,
|
||
|
|
"kl/n_epsilon_steps": 0.3218750059604645,
|
||
|
|
"kl/p_epsilon_steps": 0.6781250238418579,
|
||
|
|
"learning_rate": 4.741678157389739e-10,
|
||
|
|
"logits/chosen": -0.8402039408683777,
|
||
|
|
"logits/rejected": -0.7369452118873596,
|
||
|
|
"logps/chosen": -193.51834106445312,
|
||
|
|
"logps/ref_chosen": -81.25680541992188,
|
||
|
|
"logps/ref_rejected": -88.71739196777344,
|
||
|
|
"logps/rejected": -261.07110595703125,
|
||
|
|
"loss": 0.6167,
|
||
|
|
"rewards/accuracies": 0.7203124761581421,
|
||
|
|
"rewards/chosen": -0.3669508695602417,
|
||
|
|
"rewards/margins": 0.19351065158843994,
|
||
|
|
"rewards/rejected": -0.5604615211486816,
|
||
|
|
"step": 335
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0,
|
||
|
|
"grad_norm": 4.5893425941467285,
|
||
|
|
"kl/avg_steps": 0.37812501192092896,
|
||
|
|
"kl/beta": 0.003211395815014839,
|
||
|
|
"kl/n_epsilon_steps": 0.3109374940395355,
|
||
|
|
"kl/p_epsilon_steps": 0.6890624761581421,
|
||
|
|
"learning_rate": 1.31753782067201e-11,
|
||
|
|
"logits/chosen": -0.7557514905929565,
|
||
|
|
"logits/rejected": -0.6398700475692749,
|
||
|
|
"logps/chosen": -185.0140838623047,
|
||
|
|
"logps/ref_chosen": -72.54796600341797,
|
||
|
|
"logps/ref_rejected": -78.83277893066406,
|
||
|
|
"logps/rejected": -256.5284423828125,
|
||
|
|
"loss": 0.612,
|
||
|
|
"rewards/accuracies": 0.721875011920929,
|
||
|
|
"rewards/chosen": -0.36068642139434814,
|
||
|
|
"rewards/margins": 0.20638315379619598,
|
||
|
|
"rewards/rejected": -0.5670695900917053,
|
||
|
|
"step": 340
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0,
|
||
|
|
"step": 340,
|
||
|
|
"total_flos": 0.0,
|
||
|
|
"train_loss": 0.6232832217917723,
|
||
|
|
"train_runtime": 1489.7896,
|
||
|
|
"train_samples_per_second": 29.265,
|
||
|
|
"train_steps_per_second": 0.228
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"logging_steps": 5,
|
||
|
|
"max_steps": 340,
|
||
|
|
"num_input_tokens_seen": 0,
|
||
|
|
"num_train_epochs": 1,
|
||
|
|
"save_steps": 200,
|
||
|
|
"stateful_callbacks": {
|
||
|
|
"TrainerControl": {
|
||
|
|
"args": {
|
||
|
|
"should_epoch_stop": false,
|
||
|
|
"should_evaluate": false,
|
||
|
|
"should_log": false,
|
||
|
|
"should_save": true,
|
||
|
|
"should_training_stop": true
|
||
|
|
},
|
||
|
|
"attributes": {}
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"total_flos": 0.0,
|
||
|
|
"train_batch_size": 16,
|
||
|
|
"trial_name": null,
|
||
|
|
"trial_params": null
|
||
|
|
}
|