1097 lines
38 KiB
JSON
1097 lines
38 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 1.0,
|
|
"eval_steps": 100,
|
|
"global_step": 330,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0030303030303030303,
|
|
"grad_norm": 10.455310821533203,
|
|
"learning_rate": 0.0,
|
|
"logits/chosen": -0.818070113658905,
|
|
"logits/rejected": -0.7612971663475037,
|
|
"logps/chosen": -27.54741859436035,
|
|
"logps/ref_chosen": -27.53912353515625,
|
|
"logps/ref_rejected": -62.889225006103516,
|
|
"logps/rejected": -62.880741119384766,
|
|
"loss": 0.6926,
|
|
"margin_dpo/margin_mean": -0.01677680015563965,
|
|
"margin_dpo/margin_std": 0.1853054314851761,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.015151515151515152,
|
|
"grad_norm": 11.397998809814453,
|
|
"learning_rate": 6.060606060606061e-08,
|
|
"logits/chosen": -0.8404617309570312,
|
|
"logits/rejected": -0.8060516119003296,
|
|
"logps/chosen": -51.65924072265625,
|
|
"logps/ref_chosen": -51.643856048583984,
|
|
"logps/ref_rejected": -84.63095092773438,
|
|
"logps/rejected": -84.6202392578125,
|
|
"loss": 0.6933,
|
|
"margin_dpo/margin_mean": -0.0260981023311615,
|
|
"margin_dpo/margin_std": 0.3153693377971649,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.030303030303030304,
|
|
"grad_norm": 11.12632942199707,
|
|
"learning_rate": 1.3636363636363635e-07,
|
|
"logits/chosen": -0.7908369302749634,
|
|
"logits/rejected": -0.7584771513938904,
|
|
"logps/chosen": -64.20430755615234,
|
|
"logps/ref_chosen": -64.17414855957031,
|
|
"logps/ref_rejected": -96.51995849609375,
|
|
"logps/rejected": -96.55589294433594,
|
|
"loss": 0.6929,
|
|
"margin_dpo/margin_mean": 0.0057894946075975895,
|
|
"margin_dpo/margin_std": 0.33652475476264954,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.045454545454545456,
|
|
"grad_norm": 12.030816078186035,
|
|
"learning_rate": 2.121212121212121e-07,
|
|
"logits/chosen": -0.8053056001663208,
|
|
"logits/rejected": -0.8063974380493164,
|
|
"logps/chosen": -77.95388793945312,
|
|
"logps/ref_chosen": -77.93045806884766,
|
|
"logps/ref_rejected": -75.88431549072266,
|
|
"logps/rejected": -75.89156341552734,
|
|
"loss": 0.6927,
|
|
"margin_dpo/margin_mean": -0.016180897131562233,
|
|
"margin_dpo/margin_std": 0.3311070501804352,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.06060606060606061,
|
|
"grad_norm": 12.039678573608398,
|
|
"learning_rate": 2.878787878787879e-07,
|
|
"logits/chosen": -0.7935067415237427,
|
|
"logits/rejected": -0.7536638975143433,
|
|
"logps/chosen": -55.504188537597656,
|
|
"logps/ref_chosen": -55.51140213012695,
|
|
"logps/ref_rejected": -86.6218490600586,
|
|
"logps/rejected": -86.65962982177734,
|
|
"loss": 0.6927,
|
|
"margin_dpo/margin_mean": 0.0450122132897377,
|
|
"margin_dpo/margin_std": 0.37105274200439453,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.07575757575757576,
|
|
"grad_norm": 10.380696296691895,
|
|
"learning_rate": 3.636363636363636e-07,
|
|
"logits/chosen": -0.7800458669662476,
|
|
"logits/rejected": -0.7748220562934875,
|
|
"logps/chosen": -65.15885162353516,
|
|
"logps/ref_chosen": -65.15419006347656,
|
|
"logps/ref_rejected": -70.9836196899414,
|
|
"logps/rejected": -71.05149841308594,
|
|
"loss": 0.6929,
|
|
"margin_dpo/margin_mean": 0.06321928650140762,
|
|
"margin_dpo/margin_std": 0.355155885219574,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.09090909090909091,
|
|
"grad_norm": 10.88476276397705,
|
|
"learning_rate": 4.3939393939393937e-07,
|
|
"logits/chosen": -0.8358621597290039,
|
|
"logits/rejected": -0.8101686239242554,
|
|
"logps/chosen": -54.09563064575195,
|
|
"logps/ref_chosen": -54.000160217285156,
|
|
"logps/ref_rejected": -86.43263244628906,
|
|
"logps/rejected": -86.5849609375,
|
|
"loss": 0.6906,
|
|
"margin_dpo/margin_mean": 0.05685856193304062,
|
|
"margin_dpo/margin_std": 0.3642476797103882,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.10606060606060606,
|
|
"grad_norm": 12.026762962341309,
|
|
"learning_rate": 4.999860140229787e-07,
|
|
"logits/chosen": -0.811154842376709,
|
|
"logits/rejected": -0.7937377691268921,
|
|
"logps/chosen": -67.01231384277344,
|
|
"logps/ref_chosen": -66.8745346069336,
|
|
"logps/ref_rejected": -86.6573257446289,
|
|
"logps/rejected": -86.97063446044922,
|
|
"loss": 0.6891,
|
|
"margin_dpo/margin_mean": 0.1755320429801941,
|
|
"margin_dpo/margin_std": 0.46879833936691284,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.12121212121212122,
|
|
"grad_norm": 11.267840385437012,
|
|
"learning_rate": 4.994966691179711e-07,
|
|
"logits/chosen": -0.7241272926330566,
|
|
"logits/rejected": -0.6869423985481262,
|
|
"logps/chosen": -51.837364196777344,
|
|
"logps/ref_chosen": -51.43064498901367,
|
|
"logps/ref_rejected": -75.73628234863281,
|
|
"logps/rejected": -76.29964447021484,
|
|
"loss": 0.6848,
|
|
"margin_dpo/margin_mean": 0.15664692223072052,
|
|
"margin_dpo/margin_std": 0.6119893193244934,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.13636363636363635,
|
|
"grad_norm": 11.79084587097168,
|
|
"learning_rate": 4.983095894354857e-07,
|
|
"logits/chosen": -0.7654654383659363,
|
|
"logits/rejected": -0.7399241328239441,
|
|
"logps/chosen": -59.4940299987793,
|
|
"logps/ref_chosen": -58.967918395996094,
|
|
"logps/ref_rejected": -74.13176727294922,
|
|
"logps/rejected": -75.02941131591797,
|
|
"loss": 0.6777,
|
|
"margin_dpo/margin_mean": 0.37154078483581543,
|
|
"margin_dpo/margin_std": 0.763075590133667,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.15151515151515152,
|
|
"grad_norm": 12.672266006469727,
|
|
"learning_rate": 4.964280947263676e-07,
|
|
"logits/chosen": -0.7275325059890747,
|
|
"logits/rejected": -0.6958032250404358,
|
|
"logps/chosen": -56.945068359375,
|
|
"logps/ref_chosen": -55.99009323120117,
|
|
"logps/ref_rejected": -74.68233489990234,
|
|
"logps/rejected": -75.86155700683594,
|
|
"loss": 0.6755,
|
|
"margin_dpo/margin_mean": 0.22425690293312073,
|
|
"margin_dpo/margin_std": 1.2586849927902222,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.16666666666666666,
|
|
"grad_norm": 11.780351638793945,
|
|
"learning_rate": 4.938574467213517e-07,
|
|
"logits/chosen": -0.7339123487472534,
|
|
"logits/rejected": -0.7103201150894165,
|
|
"logps/chosen": -61.5482177734375,
|
|
"logps/ref_chosen": -60.068870544433594,
|
|
"logps/ref_rejected": -77.12890625,
|
|
"logps/rejected": -79.0832748413086,
|
|
"loss": 0.6714,
|
|
"margin_dpo/margin_mean": 0.4750184416770935,
|
|
"margin_dpo/margin_std": 1.5396963357925415,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.18181818181818182,
|
|
"grad_norm": 11.140870094299316,
|
|
"learning_rate": 4.906048344162676e-07,
|
|
"logits/chosen": -0.678428053855896,
|
|
"logits/rejected": -0.6509960889816284,
|
|
"logps/chosen": -60.9329719543457,
|
|
"logps/ref_chosen": -58.871849060058594,
|
|
"logps/ref_rejected": -76.81136322021484,
|
|
"logps/rejected": -79.64076232910156,
|
|
"loss": 0.6634,
|
|
"margin_dpo/margin_mean": 0.7682675123214722,
|
|
"margin_dpo/margin_std": 1.9303239583969116,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.19696969696969696,
|
|
"grad_norm": 11.366332054138184,
|
|
"learning_rate": 4.866793539675126e-07,
|
|
"logits/chosen": -0.6925519704818726,
|
|
"logits/rejected": -0.6610804796218872,
|
|
"logps/chosen": -69.35958099365234,
|
|
"logps/ref_chosen": -66.47074890136719,
|
|
"logps/ref_rejected": -100.35836029052734,
|
|
"logps/rejected": -104.43794250488281,
|
|
"loss": 0.6579,
|
|
"margin_dpo/margin_mean": 1.1907539367675781,
|
|
"margin_dpo/margin_std": 2.986706495285034,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.21212121212121213,
|
|
"grad_norm": 12.58990478515625,
|
|
"learning_rate": 4.820919832540181e-07,
|
|
"logits/chosen": -0.6219511032104492,
|
|
"logits/rejected": -0.6189069747924805,
|
|
"logps/chosen": -67.1957778930664,
|
|
"logps/ref_chosen": -64.2503662109375,
|
|
"logps/ref_rejected": -66.74681091308594,
|
|
"logps/rejected": -70.51075744628906,
|
|
"loss": 0.6519,
|
|
"margin_dpo/margin_mean": 0.8185291290283203,
|
|
"margin_dpo/margin_std": 2.976707935333252,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.22727272727272727,
|
|
"grad_norm": 11.002663612365723,
|
|
"learning_rate": 4.768555511768486e-07,
|
|
"logits/chosen": -0.5906602740287781,
|
|
"logits/rejected": -0.5815819501876831,
|
|
"logps/chosen": -71.80250549316406,
|
|
"logps/ref_chosen": -68.28721618652344,
|
|
"logps/ref_rejected": -76.16336822509766,
|
|
"logps/rejected": -80.22598266601562,
|
|
"loss": 0.6617,
|
|
"margin_dpo/margin_mean": 0.5473247170448303,
|
|
"margin_dpo/margin_std": 3.507791519165039,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.24242424242424243,
|
|
"grad_norm": 9.479778289794922,
|
|
"learning_rate": 4.7098470178228755e-07,
|
|
"logits/chosen": -0.6349095106124878,
|
|
"logits/rejected": -0.6179987788200378,
|
|
"logps/chosen": -57.898193359375,
|
|
"logps/ref_chosen": -54.811798095703125,
|
|
"logps/ref_rejected": -77.2701187133789,
|
|
"logps/rejected": -81.84941101074219,
|
|
"loss": 0.6448,
|
|
"margin_dpo/margin_mean": 1.4929004907608032,
|
|
"margin_dpo/margin_std": 3.287881851196289,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.25757575757575757,
|
|
"grad_norm": 10.064814567565918,
|
|
"learning_rate": 4.6449585330874425e-07,
|
|
"logits/chosen": -0.5931236147880554,
|
|
"logits/rejected": -0.5673755407333374,
|
|
"logps/chosen": -66.52117919921875,
|
|
"logps/ref_chosen": -62.9375,
|
|
"logps/ref_rejected": -89.00093078613281,
|
|
"logps/rejected": -94.03156280517578,
|
|
"loss": 0.6411,
|
|
"margin_dpo/margin_mean": 1.4469609260559082,
|
|
"margin_dpo/margin_std": 3.1353728771209717,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.2727272727272727,
|
|
"grad_norm": 10.42741584777832,
|
|
"learning_rate": 4.5740715227200897e-07,
|
|
"logits/chosen": -0.6528624296188354,
|
|
"logits/rejected": -0.6274086833000183,
|
|
"logps/chosen": -66.20284271240234,
|
|
"logps/ref_chosen": -62.151451110839844,
|
|
"logps/ref_rejected": -83.65849304199219,
|
|
"logps/rejected": -89.31423950195312,
|
|
"loss": 0.6262,
|
|
"margin_dpo/margin_mean": 1.6043474674224854,
|
|
"margin_dpo/margin_std": 3.8411917686462402,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.2878787878787879,
|
|
"grad_norm": 10.800503730773926,
|
|
"learning_rate": 4.4973842271726024e-07,
|
|
"logits/chosen": -0.5788562893867493,
|
|
"logits/rejected": -0.5660556554794312,
|
|
"logps/chosen": -67.69863891601562,
|
|
"logps/ref_chosen": -63.18915939331055,
|
|
"logps/ref_rejected": -77.06649017333984,
|
|
"logps/rejected": -83.23294067382812,
|
|
"loss": 0.6272,
|
|
"margin_dpo/margin_mean": 1.6569665670394897,
|
|
"margin_dpo/margin_std": 4.609116554260254,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.30303030303030304,
|
|
"grad_norm": 10.378731727600098,
|
|
"learning_rate": 4.415111107797445e-07,
|
|
"logits/chosen": -0.5960966348648071,
|
|
"logits/rejected": -0.5538562536239624,
|
|
"logps/chosen": -59.95014572143555,
|
|
"logps/ref_chosen": -55.48549270629883,
|
|
"logps/ref_rejected": -85.08012390136719,
|
|
"logps/rejected": -92.14093017578125,
|
|
"loss": 0.6266,
|
|
"margin_dpo/margin_mean": 2.5961520671844482,
|
|
"margin_dpo/margin_std": 4.217093467712402,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.30303030303030304,
|
|
"eval_logits/chosen": -0.5741320848464966,
|
|
"eval_logits/rejected": -0.5576887130737305,
|
|
"eval_logps/chosen": -75.61560821533203,
|
|
"eval_logps/ref_chosen": -71.49089813232422,
|
|
"eval_logps/ref_rejected": -76.31332397460938,
|
|
"eval_logps/rejected": -82.72161865234375,
|
|
"eval_loss": 0.6173638105392456,
|
|
"eval_margin_dpo/margin_mean": 2.28357195854187,
|
|
"eval_margin_dpo/margin_std": 3.9973862171173096,
|
|
"eval_runtime": 18.8686,
|
|
"eval_samples_per_second": 122.055,
|
|
"eval_steps_per_second": 0.954,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.3181818181818182,
|
|
"grad_norm": 12.402639389038086,
|
|
"learning_rate": 4.327482247091679e-07,
|
|
"logits/chosen": -0.5790421366691589,
|
|
"logits/rejected": -0.5531052350997925,
|
|
"logps/chosen": -76.99128723144531,
|
|
"logps/ref_chosen": -71.54103088378906,
|
|
"logps/ref_rejected": -98.70140075683594,
|
|
"logps/rejected": -106.15584564208984,
|
|
"loss": 0.6195,
|
|
"margin_dpo/margin_mean": 2.0041980743408203,
|
|
"margin_dpo/margin_std": 4.225128173828125,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.3333333333333333,
|
|
"grad_norm": 9.073996543884277,
|
|
"learning_rate": 4.234742705255272e-07,
|
|
"logits/chosen": -0.49020037055015564,
|
|
"logits/rejected": -0.48362722992897034,
|
|
"logps/chosen": -71.53330993652344,
|
|
"logps/ref_chosen": -66.31354522705078,
|
|
"logps/ref_rejected": -76.78019714355469,
|
|
"logps/rejected": -83.70118713378906,
|
|
"loss": 0.6149,
|
|
"margin_dpo/margin_mean": 1.7012172937393188,
|
|
"margin_dpo/margin_std": 4.63106632232666,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.3484848484848485,
|
|
"grad_norm": 10.4576997756958,
|
|
"learning_rate": 4.137151834863213e-07,
|
|
"logits/chosen": -0.5765933394432068,
|
|
"logits/rejected": -0.5322223901748657,
|
|
"logps/chosen": -62.665382385253906,
|
|
"logps/ref_chosen": -58.31931686401367,
|
|
"logps/ref_rejected": -88.27889251708984,
|
|
"logps/rejected": -95.86396789550781,
|
|
"loss": 0.6004,
|
|
"margin_dpo/margin_mean": 3.2390189170837402,
|
|
"margin_dpo/margin_std": 4.050782203674316,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.36363636363636365,
|
|
"grad_norm": 12.087044715881348,
|
|
"learning_rate": 4.0349825555680045e-07,
|
|
"logits/chosen": -0.6157968640327454,
|
|
"logits/rejected": -0.5801655650138855,
|
|
"logps/chosen": -66.97267150878906,
|
|
"logps/ref_chosen": -61.62066650390625,
|
|
"logps/ref_rejected": -103.57926177978516,
|
|
"logps/rejected": -112.13105773925781,
|
|
"loss": 0.6074,
|
|
"margin_dpo/margin_mean": 3.1997852325439453,
|
|
"margin_dpo/margin_std": 5.21464729309082,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.3787878787878788,
|
|
"grad_norm": 11.476883888244629,
|
|
"learning_rate": 3.9285205908608934e-07,
|
|
"logits/chosen": -0.5993348360061646,
|
|
"logits/rejected": -0.5867364406585693,
|
|
"logps/chosen": -84.22923278808594,
|
|
"logps/ref_chosen": -77.95762634277344,
|
|
"logps/ref_rejected": -80.53031158447266,
|
|
"logps/rejected": -88.4426040649414,
|
|
"loss": 0.614,
|
|
"margin_dpo/margin_mean": 1.6406761407852173,
|
|
"margin_dpo/margin_std": 5.179450511932373,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.3939393939393939,
|
|
"grad_norm": 12.546419143676758,
|
|
"learning_rate": 3.818063669026256e-07,
|
|
"logits/chosen": -0.5839983224868774,
|
|
"logits/rejected": -0.5685960054397583,
|
|
"logps/chosen": -75.35858154296875,
|
|
"logps/ref_chosen": -69.84893798828125,
|
|
"logps/ref_rejected": -97.6857681274414,
|
|
"logps/rejected": -106.6558837890625,
|
|
"loss": 0.5884,
|
|
"margin_dpo/margin_mean": 3.460472583770752,
|
|
"margin_dpo/margin_std": 6.851003170013428,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.4090909090909091,
|
|
"grad_norm": 10.323763847351074,
|
|
"learning_rate": 3.7039206905237656e-07,
|
|
"logits/chosen": -0.5967100858688354,
|
|
"logits/rejected": -0.6035032272338867,
|
|
"logps/chosen": -76.12150573730469,
|
|
"logps/ref_chosen": -69.49943542480469,
|
|
"logps/ref_rejected": -76.46887969970703,
|
|
"logps/rejected": -84.82896423339844,
|
|
"loss": 0.5886,
|
|
"margin_dpo/margin_mean": 1.7380040884017944,
|
|
"margin_dpo/margin_std": 5.351980686187744,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.42424242424242425,
|
|
"grad_norm": 9.629522323608398,
|
|
"learning_rate": 3.586410864126781e-07,
|
|
"logits/chosen": -0.5848367214202881,
|
|
"logits/rejected": -0.573132336139679,
|
|
"logps/chosen": -63.21686553955078,
|
|
"logps/ref_chosen": -58.184852600097656,
|
|
"logps/ref_rejected": -72.27442169189453,
|
|
"logps/rejected": -80.48677062988281,
|
|
"loss": 0.5704,
|
|
"margin_dpo/margin_mean": 3.1803410053253174,
|
|
"margin_dpo/margin_std": 5.574404239654541,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.4393939393939394,
|
|
"grad_norm": 11.897682189941406,
|
|
"learning_rate": 3.465862814232821e-07,
|
|
"logits/chosen": -0.5436482429504395,
|
|
"logits/rejected": -0.527529776096344,
|
|
"logps/chosen": -73.48857116699219,
|
|
"logps/ref_chosen": -67.29014587402344,
|
|
"logps/ref_rejected": -78.61517333984375,
|
|
"logps/rejected": -88.46278381347656,
|
|
"loss": 0.5554,
|
|
"margin_dpo/margin_mean": 3.6491763591766357,
|
|
"margin_dpo/margin_std": 5.883833885192871,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.45454545454545453,
|
|
"grad_norm": 11.066961288452148,
|
|
"learning_rate": 3.3426136618426043e-07,
|
|
"logits/chosen": -0.5548180341720581,
|
|
"logits/rejected": -0.5312086343765259,
|
|
"logps/chosen": -60.678245544433594,
|
|
"logps/ref_chosen": -53.7413330078125,
|
|
"logps/ref_rejected": -80.63525390625,
|
|
"logps/rejected": -91.57915496826172,
|
|
"loss": 0.5445,
|
|
"margin_dpo/margin_mean": 4.006979465484619,
|
|
"margin_dpo/margin_std": 5.5384626388549805,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.4696969696969697,
|
|
"grad_norm": 11.354157447814941,
|
|
"learning_rate": 3.2170080817777257e-07,
|
|
"logits/chosen": -0.5146440863609314,
|
|
"logits/rejected": -0.5049440264701843,
|
|
"logps/chosen": -64.72186279296875,
|
|
"logps/ref_chosen": -57.31132125854492,
|
|
"logps/ref_rejected": -74.34989929199219,
|
|
"logps/rejected": -85.43902587890625,
|
|
"loss": 0.5766,
|
|
"margin_dpo/margin_mean": 3.6785824298858643,
|
|
"margin_dpo/margin_std": 7.704632759094238,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.48484848484848486,
|
|
"grad_norm": 11.149504661560059,
|
|
"learning_rate": 3.0893973387735683e-07,
|
|
"logits/chosen": -0.5834041237831116,
|
|
"logits/rejected": -0.5603164434432983,
|
|
"logps/chosen": -66.89668273925781,
|
|
"logps/ref_chosen": -59.539772033691406,
|
|
"logps/ref_rejected": -84.16561126708984,
|
|
"logps/rejected": -96.21601867675781,
|
|
"loss": 0.5611,
|
|
"margin_dpo/margin_mean": 4.693480014801025,
|
|
"margin_dpo/margin_std": 6.964644432067871,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"grad_norm": 13.738677978515625,
|
|
"learning_rate": 2.9601383051430505e-07,
|
|
"logits/chosen": -0.5313447117805481,
|
|
"logits/rejected": -0.5091090798377991,
|
|
"logps/chosen": -74.807861328125,
|
|
"logps/ref_chosen": -66.78636169433594,
|
|
"logps/ref_rejected": -88.8131103515625,
|
|
"logps/rejected": -101.53221130371094,
|
|
"loss": 0.5602,
|
|
"margin_dpo/margin_mean": 4.697592735290527,
|
|
"margin_dpo/margin_std": 7.459498405456543,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.5151515151515151,
|
|
"grad_norm": 13.155235290527344,
|
|
"learning_rate": 2.8295924627584004e-07,
|
|
"logits/chosen": -0.5154544115066528,
|
|
"logits/rejected": -0.47907596826553345,
|
|
"logps/chosen": -55.303504943847656,
|
|
"logps/ref_chosen": -47.866973876953125,
|
|
"logps/ref_rejected": -84.14051818847656,
|
|
"logps/rejected": -98.28789520263672,
|
|
"loss": 0.5537,
|
|
"margin_dpo/margin_mean": 6.710854530334473,
|
|
"margin_dpo/margin_std": 8.341263771057129,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.5303030303030303,
|
|
"grad_norm": 14.890878677368164,
|
|
"learning_rate": 2.698124892141971e-07,
|
|
"logits/chosen": -0.5128508806228638,
|
|
"logits/rejected": -0.4919998049736023,
|
|
"logps/chosen": -65.23526763916016,
|
|
"logps/ref_chosen": -57.79303741455078,
|
|
"logps/ref_rejected": -76.8666000366211,
|
|
"logps/rejected": -91.17439270019531,
|
|
"loss": 0.5327,
|
|
"margin_dpo/margin_mean": 6.865555763244629,
|
|
"margin_dpo/margin_std": 8.957682609558105,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.5454545454545454,
|
|
"grad_norm": 12.345190048217773,
|
|
"learning_rate": 2.5661032514931834e-07,
|
|
"logits/chosen": -0.5460310578346252,
|
|
"logits/rejected": -0.5277290344238281,
|
|
"logps/chosen": -61.90520095825195,
|
|
"logps/ref_chosen": -53.86296844482422,
|
|
"logps/ref_rejected": -76.9208755493164,
|
|
"logps/rejected": -90.58650207519531,
|
|
"loss": 0.5397,
|
|
"margin_dpo/margin_mean": 5.623406887054443,
|
|
"margin_dpo/margin_std": 8.307819366455078,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.5606060606060606,
|
|
"grad_norm": 18.609071731567383,
|
|
"learning_rate": 2.4338967485068164e-07,
|
|
"logits/chosen": -0.4956757426261902,
|
|
"logits/rejected": -0.47750720381736755,
|
|
"logps/chosen": -69.359130859375,
|
|
"logps/ref_chosen": -60.57938766479492,
|
|
"logps/ref_rejected": -72.99809265136719,
|
|
"logps/rejected": -86.45264434814453,
|
|
"loss": 0.5407,
|
|
"margin_dpo/margin_mean": 4.674814701080322,
|
|
"margin_dpo/margin_std": 7.796820163726807,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.5757575757575758,
|
|
"grad_norm": 15.594287872314453,
|
|
"learning_rate": 2.3018751078580283e-07,
|
|
"logits/chosen": -0.5231366157531738,
|
|
"logits/rejected": -0.5017072558403015,
|
|
"logps/chosen": -63.6590461730957,
|
|
"logps/ref_chosen": -55.309478759765625,
|
|
"logps/ref_rejected": -75.77075958251953,
|
|
"logps/rejected": -89.84127807617188,
|
|
"loss": 0.5477,
|
|
"margin_dpo/margin_mean": 5.720963954925537,
|
|
"margin_dpo/margin_std": 10.631233215332031,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.5909090909090909,
|
|
"grad_norm": 13.909214973449707,
|
|
"learning_rate": 2.170407537241599e-07,
|
|
"logits/chosen": -0.5053573846817017,
|
|
"logits/rejected": -0.48142895102500916,
|
|
"logps/chosen": -76.45471954345703,
|
|
"logps/ref_chosen": -67.39129638671875,
|
|
"logps/ref_rejected": -94.1995620727539,
|
|
"logps/rejected": -109.12031555175781,
|
|
"loss": 0.5555,
|
|
"margin_dpo/margin_mean": 5.857341289520264,
|
|
"margin_dpo/margin_std": 9.257515907287598,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.6060606060606061,
|
|
"grad_norm": 14.265554428100586,
|
|
"learning_rate": 2.0398616948569493e-07,
|
|
"logits/chosen": -0.5393396019935608,
|
|
"logits/rejected": -0.5077868700027466,
|
|
"logps/chosen": -75.58625793457031,
|
|
"logps/ref_chosen": -65.90815734863281,
|
|
"logps/ref_rejected": -98.7196273803711,
|
|
"logps/rejected": -113.99732971191406,
|
|
"loss": 0.5253,
|
|
"margin_dpo/margin_mean": 5.5995988845825195,
|
|
"margin_dpo/margin_std": 10.336074829101562,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.6060606060606061,
|
|
"eval_logits/chosen": -0.5199635624885559,
|
|
"eval_logits/rejected": -0.5067822933197021,
|
|
"eval_logps/chosen": -79.58210754394531,
|
|
"eval_logps/ref_chosen": -71.49089813232422,
|
|
"eval_logps/ref_rejected": -76.31332397460938,
|
|
"eval_logps/rejected": -90.86639404296875,
|
|
"eval_loss": 0.543655276298523,
|
|
"eval_margin_dpo/margin_mean": 6.4618449211120605,
|
|
"eval_margin_dpo/margin_std": 9.544526100158691,
|
|
"eval_runtime": 18.8081,
|
|
"eval_samples_per_second": 122.447,
|
|
"eval_steps_per_second": 0.957,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.6212121212121212,
|
|
"grad_norm": 11.659725189208984,
|
|
"learning_rate": 1.9106026612264315e-07,
|
|
"logits/chosen": -0.5398346185684204,
|
|
"logits/rejected": -0.5087303519248962,
|
|
"logps/chosen": -59.74982833862305,
|
|
"logps/ref_chosen": -52.514007568359375,
|
|
"logps/ref_rejected": -94.02557373046875,
|
|
"logps/rejected": -109.4577865600586,
|
|
"loss": 0.508,
|
|
"margin_dpo/margin_mean": 8.196396827697754,
|
|
"margin_dpo/margin_std": 10.316641807556152,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.6363636363636364,
|
|
"grad_norm": 29.11798667907715,
|
|
"learning_rate": 1.782991918222275e-07,
|
|
"logits/chosen": -0.47662702202796936,
|
|
"logits/rejected": -0.46838369965553284,
|
|
"logps/chosen": -66.78819274902344,
|
|
"logps/ref_chosen": -57.89775466918945,
|
|
"logps/ref_rejected": -62.08463668823242,
|
|
"logps/rejected": -77.85931396484375,
|
|
"loss": 0.5482,
|
|
"margin_dpo/margin_mean": 6.8842339515686035,
|
|
"margin_dpo/margin_std": 11.393902778625488,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.6515151515151515,
|
|
"grad_norm": 23.676776885986328,
|
|
"learning_rate": 1.6573863381573954e-07,
|
|
"logits/chosen": -0.4756692945957184,
|
|
"logits/rejected": -0.4733617305755615,
|
|
"logps/chosen": -71.32975006103516,
|
|
"logps/ref_chosen": -63.36411666870117,
|
|
"logps/ref_rejected": -70.50566101074219,
|
|
"logps/rejected": -84.5431137084961,
|
|
"loss": 0.5442,
|
|
"margin_dpo/margin_mean": 6.07181453704834,
|
|
"margin_dpo/margin_std": 9.235767364501953,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.6666666666666666,
|
|
"grad_norm": 26.59471321105957,
|
|
"learning_rate": 1.534137185767178e-07,
|
|
"logits/chosen": -0.5520139932632446,
|
|
"logits/rejected": -0.5306358933448792,
|
|
"logps/chosen": -63.29638671875,
|
|
"logps/ref_chosen": -54.3653564453125,
|
|
"logps/ref_rejected": -80.68601989746094,
|
|
"logps/rejected": -97.40142822265625,
|
|
"loss": 0.529,
|
|
"margin_dpo/margin_mean": 7.784371852874756,
|
|
"margin_dpo/margin_std": 11.405842781066895,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.6818181818181818,
|
|
"grad_norm": 17.50434684753418,
|
|
"learning_rate": 1.4135891358732205e-07,
|
|
"logits/chosen": -0.5091781616210938,
|
|
"logits/rejected": -0.4780656397342682,
|
|
"logps/chosen": -74.7088851928711,
|
|
"logps/ref_chosen": -65.24610137939453,
|
|
"logps/ref_rejected": -85.6495590209961,
|
|
"logps/rejected": -103.7113265991211,
|
|
"loss": 0.5273,
|
|
"margin_dpo/margin_mean": 8.598976135253906,
|
|
"margin_dpo/margin_std": 11.525456428527832,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.696969696969697,
|
|
"grad_norm": 21.340883255004883,
|
|
"learning_rate": 1.2960793094762345e-07,
|
|
"logits/chosen": -0.4688114523887634,
|
|
"logits/rejected": -0.46031489968299866,
|
|
"logps/chosen": -79.30754089355469,
|
|
"logps/ref_chosen": -69.5623550415039,
|
|
"logps/ref_rejected": -86.65391540527344,
|
|
"logps/rejected": -102.97904968261719,
|
|
"loss": 0.5118,
|
|
"margin_dpo/margin_mean": 6.579934597015381,
|
|
"margin_dpo/margin_std": 10.335288047790527,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.7121212121212122,
|
|
"grad_norm": 20.29132652282715,
|
|
"learning_rate": 1.1819363309737438e-07,
|
|
"logits/chosen": -0.4904417097568512,
|
|
"logits/rejected": -0.4770389199256897,
|
|
"logps/chosen": -72.47919464111328,
|
|
"logps/ref_chosen": -62.41870880126953,
|
|
"logps/ref_rejected": -80.84742736816406,
|
|
"logps/rejected": -97.89503479003906,
|
|
"loss": 0.5133,
|
|
"margin_dpo/margin_mean": 6.987112998962402,
|
|
"margin_dpo/margin_std": 9.303082466125488,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.7272727272727273,
|
|
"grad_norm": 11.328718185424805,
|
|
"learning_rate": 1.0714794091391072e-07,
|
|
"logits/chosen": -0.5141887068748474,
|
|
"logits/rejected": -0.4992826581001282,
|
|
"logps/chosen": -68.79585266113281,
|
|
"logps/ref_chosen": -60.14348602294922,
|
|
"logps/ref_rejected": -84.51826477050781,
|
|
"logps/rejected": -101.74858856201172,
|
|
"loss": 0.5432,
|
|
"margin_dpo/margin_mean": 8.577953338623047,
|
|
"margin_dpo/margin_std": 10.39548397064209,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.7424242424242424,
|
|
"grad_norm": 21.313125610351562,
|
|
"learning_rate": 9.650174444319956e-08,
|
|
"logits/chosen": -0.5187879800796509,
|
|
"logits/rejected": -0.5011430382728577,
|
|
"logps/chosen": -68.9282455444336,
|
|
"logps/ref_chosen": -59.89912033081055,
|
|
"logps/ref_rejected": -76.29353332519531,
|
|
"logps/rejected": -93.21476745605469,
|
|
"loss": 0.549,
|
|
"margin_dpo/margin_mean": 7.892104148864746,
|
|
"margin_dpo/margin_std": 10.297919273376465,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.7575757575757576,
|
|
"grad_norm": 18.405746459960938,
|
|
"learning_rate": 8.628481651367875e-08,
|
|
"logits/chosen": -0.5289962887763977,
|
|
"logits/rejected": -0.5101832151412964,
|
|
"logps/chosen": -71.01588439941406,
|
|
"logps/ref_chosen": -61.324790954589844,
|
|
"logps/ref_rejected": -95.19871520996094,
|
|
"logps/rejected": -110.73634338378906,
|
|
"loss": 0.5381,
|
|
"margin_dpo/margin_mean": 5.8465423583984375,
|
|
"margin_dpo/margin_std": 11.49156379699707,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.7727272727272727,
|
|
"grad_norm": 29.608196258544922,
|
|
"learning_rate": 7.652572947447272e-08,
|
|
"logits/chosen": -0.5170688033103943,
|
|
"logits/rejected": -0.5108999013900757,
|
|
"logps/chosen": -82.85248565673828,
|
|
"logps/ref_chosen": -73.00435638427734,
|
|
"logps/ref_rejected": -89.8001937866211,
|
|
"logps/rejected": -106.5128402709961,
|
|
"loss": 0.5272,
|
|
"margin_dpo/margin_mean": 6.864515781402588,
|
|
"margin_dpo/margin_std": 10.157739639282227,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.7878787878787878,
|
|
"grad_norm": 35.19934844970703,
|
|
"learning_rate": 6.725177529083209e-08,
|
|
"logits/chosen": -0.5281625390052795,
|
|
"logits/rejected": -0.5114730596542358,
|
|
"logps/chosen": -65.01654815673828,
|
|
"logps/ref_chosen": -54.35801315307617,
|
|
"logps/ref_rejected": -78.89704895019531,
|
|
"logps/rejected": -97.48576354980469,
|
|
"loss": 0.5345,
|
|
"margin_dpo/margin_mean": 7.930176734924316,
|
|
"margin_dpo/margin_std": 12.07260513305664,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.803030303030303,
|
|
"grad_norm": 15.536827087402344,
|
|
"learning_rate": 5.848888922025552e-08,
|
|
"logits/chosen": -0.47202104330062866,
|
|
"logits/rejected": -0.4491683542728424,
|
|
"logps/chosen": -75.3332748413086,
|
|
"logps/ref_chosen": -64.1512451171875,
|
|
"logps/ref_rejected": -88.43415069580078,
|
|
"logps/rejected": -107.0230712890625,
|
|
"loss": 0.5559,
|
|
"margin_dpo/margin_mean": 7.406890869140625,
|
|
"margin_dpo/margin_std": 11.541508674621582,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.8181818181818182,
|
|
"grad_norm": 14.287105560302734,
|
|
"learning_rate": 5.026157728273966e-08,
|
|
"logits/chosen": -0.5008893013000488,
|
|
"logits/rejected": -0.4735264778137207,
|
|
"logps/chosen": -62.34975051879883,
|
|
"logps/ref_chosen": -51.93467330932617,
|
|
"logps/ref_rejected": -83.3440170288086,
|
|
"logps/rejected": -99.53559875488281,
|
|
"loss": 0.5252,
|
|
"margin_dpo/margin_mean": 5.776501655578613,
|
|
"margin_dpo/margin_std": 10.03078556060791,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.8333333333333334,
|
|
"grad_norm": 13.779406547546387,
|
|
"learning_rate": 4.259284772799099e-08,
|
|
"logits/chosen": -0.509304404258728,
|
|
"logits/rejected": -0.5035196542739868,
|
|
"logps/chosen": -74.07002258300781,
|
|
"logps/ref_chosen": -66.1004638671875,
|
|
"logps/ref_rejected": -77.46138000488281,
|
|
"logps/rejected": -94.65324401855469,
|
|
"loss": 0.5202,
|
|
"margin_dpo/margin_mean": 9.222299575805664,
|
|
"margin_dpo/margin_std": 10.624560356140137,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.8484848484848485,
|
|
"grad_norm": 28.201580047607422,
|
|
"learning_rate": 3.550414669125573e-08,
|
|
"logits/chosen": -0.5307421088218689,
|
|
"logits/rejected": -0.5124194622039795,
|
|
"logps/chosen": -78.31131744384766,
|
|
"logps/ref_chosen": -68.96475982666016,
|
|
"logps/ref_rejected": -93.81538391113281,
|
|
"logps/rejected": -110.4820327758789,
|
|
"loss": 0.5355,
|
|
"margin_dpo/margin_mean": 7.320086479187012,
|
|
"margin_dpo/margin_std": 12.83232307434082,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.8636363636363636,
|
|
"grad_norm": 18.593904495239258,
|
|
"learning_rate": 2.9015298217712453e-08,
|
|
"logits/chosen": -0.4980226457118988,
|
|
"logits/rejected": -0.46921929717063904,
|
|
"logps/chosen": -72.2420425415039,
|
|
"logps/ref_chosen": -61.95045852661133,
|
|
"logps/ref_rejected": -91.99930572509766,
|
|
"logps/rejected": -110.4931640625,
|
|
"loss": 0.5048,
|
|
"margin_dpo/margin_mean": 8.202288627624512,
|
|
"margin_dpo/margin_std": 12.118570327758789,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.8787878787878788,
|
|
"grad_norm": 19.532819747924805,
|
|
"learning_rate": 2.3144448823151392e-08,
|
|
"logits/chosen": -0.48515787720680237,
|
|
"logits/rejected": -0.46074217557907104,
|
|
"logps/chosen": -64.38178253173828,
|
|
"logps/ref_chosen": -54.1287727355957,
|
|
"logps/ref_rejected": -77.50074005126953,
|
|
"logps/rejected": -94.30645751953125,
|
|
"loss": 0.5432,
|
|
"margin_dpo/margin_mean": 6.552700996398926,
|
|
"margin_dpo/margin_std": 11.339497566223145,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.8939393939393939,
|
|
"grad_norm": 14.434176445007324,
|
|
"learning_rate": 1.7908016745981856e-08,
|
|
"logits/chosen": -0.4828720986843109,
|
|
"logits/rejected": -0.48095735907554626,
|
|
"logps/chosen": -71.822509765625,
|
|
"logps/ref_chosen": -61.227928161621094,
|
|
"logps/ref_rejected": -70.93891143798828,
|
|
"logps/rejected": -88.13584899902344,
|
|
"loss": 0.5307,
|
|
"margin_dpo/margin_mean": 6.602363586425781,
|
|
"margin_dpo/margin_std": 10.929509162902832,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.9090909090909091,
|
|
"grad_norm": 11.023996353149414,
|
|
"learning_rate": 1.3320646032487393e-08,
|
|
"logits/chosen": -0.5068015456199646,
|
|
"logits/rejected": -0.4941573143005371,
|
|
"logps/chosen": -68.61476135253906,
|
|
"logps/ref_chosen": -59.28802490234375,
|
|
"logps/ref_rejected": -82.7754898071289,
|
|
"logps/rejected": -100.3427505493164,
|
|
"loss": 0.5534,
|
|
"margin_dpo/margin_mean": 8.240517616271973,
|
|
"margin_dpo/margin_std": 10.162951469421387,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.9090909090909091,
|
|
"eval_logits/chosen": -0.49858054518699646,
|
|
"eval_logits/rejected": -0.48604482412338257,
|
|
"eval_logps/chosen": -80.9963607788086,
|
|
"eval_logps/ref_chosen": -71.49089813232422,
|
|
"eval_logps/ref_rejected": -76.31332397460938,
|
|
"eval_logps/rejected": -92.93927001953125,
|
|
"eval_loss": 0.5387622714042664,
|
|
"eval_margin_dpo/margin_mean": 7.120471000671387,
|
|
"eval_margin_dpo/margin_std": 10.49869155883789,
|
|
"eval_runtime": 18.8008,
|
|
"eval_samples_per_second": 122.495,
|
|
"eval_steps_per_second": 0.957,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.9242424242424242,
|
|
"grad_norm": 28.285140991210938,
|
|
"learning_rate": 9.395165583732379e-09,
|
|
"logits/chosen": -0.48444804549217224,
|
|
"logits/rejected": -0.4512646794319153,
|
|
"logps/chosen": -63.23552322387695,
|
|
"logps/ref_chosen": -54.85032272338867,
|
|
"logps/ref_rejected": -96.26322174072266,
|
|
"logps/rejected": -114.82981872558594,
|
|
"loss": 0.5254,
|
|
"margin_dpo/margin_mean": 10.181402206420898,
|
|
"margin_dpo/margin_std": 10.521098136901855,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.9393939393939394,
|
|
"grad_norm": 17.56390953063965,
|
|
"learning_rate": 6.142553278648238e-09,
|
|
"logits/chosen": -0.495095819234848,
|
|
"logits/rejected": -0.47865208983421326,
|
|
"logps/chosen": -76.20247650146484,
|
|
"logps/ref_chosen": -65.8403091430664,
|
|
"logps/ref_rejected": -88.9677963256836,
|
|
"logps/rejected": -106.7435073852539,
|
|
"loss": 0.5117,
|
|
"margin_dpo/margin_mean": 7.413548946380615,
|
|
"margin_dpo/margin_std": 10.833813667297363,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.9545454545454546,
|
|
"grad_norm": 11.377077102661133,
|
|
"learning_rate": 3.5719052736323806e-09,
|
|
"logits/chosen": -0.49148210883140564,
|
|
"logits/rejected": -0.4869101941585541,
|
|
"logps/chosen": -82.30244445800781,
|
|
"logps/ref_chosen": -72.73238372802734,
|
|
"logps/ref_rejected": -74.21096801757812,
|
|
"logps/rejected": -89.88545989990234,
|
|
"loss": 0.508,
|
|
"margin_dpo/margin_mean": 6.104436874389648,
|
|
"margin_dpo/margin_std": 9.512574195861816,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.9696969696969697,
|
|
"grad_norm": 13.178277969360352,
|
|
"learning_rate": 1.690410564514244e-09,
|
|
"logits/chosen": -0.49254482984542847,
|
|
"logits/rejected": -0.45911550521850586,
|
|
"logps/chosen": -76.04261779785156,
|
|
"logps/ref_chosen": -65.25657653808594,
|
|
"logps/ref_rejected": -91.9552993774414,
|
|
"logps/rejected": -111.62044525146484,
|
|
"loss": 0.529,
|
|
"margin_dpo/margin_mean": 8.879097938537598,
|
|
"margin_dpo/margin_std": 10.679101943969727,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.9848484848484849,
|
|
"grad_norm": 14.677971839904785,
|
|
"learning_rate": 5.033308820289184e-10,
|
|
"logits/chosen": -0.502629280090332,
|
|
"logits/rejected": -0.4776650071144104,
|
|
"logps/chosen": -61.78889846801758,
|
|
"logps/ref_chosen": -53.00225067138672,
|
|
"logps/ref_rejected": -69.4771957397461,
|
|
"logps/rejected": -87.58296966552734,
|
|
"loss": 0.5264,
|
|
"margin_dpo/margin_mean": 9.319120407104492,
|
|
"margin_dpo/margin_std": 10.821681022644043,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 16.924516677856445,
|
|
"learning_rate": 1.3985977021235829e-11,
|
|
"logits/chosen": -0.5281952023506165,
|
|
"logits/rejected": -0.5035934448242188,
|
|
"logps/chosen": -59.8553352355957,
|
|
"logps/ref_chosen": -51.018646240234375,
|
|
"logps/ref_rejected": -74.90043640136719,
|
|
"logps/rejected": -92.38591003417969,
|
|
"loss": 0.5287,
|
|
"margin_dpo/margin_mean": 8.648794174194336,
|
|
"margin_dpo/margin_std": 10.91873550415039,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"step": 330,
|
|
"total_flos": 0.0,
|
|
"train_loss": 0.5836806095007694,
|
|
"train_runtime": 1387.0612,
|
|
"train_samples_per_second": 30.522,
|
|
"train_steps_per_second": 0.238
|
|
}
|
|
],
|
|
"logging_steps": 5,
|
|
"max_steps": 330,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 200,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 16,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|