Model: W-61/qwen3-8b-base-margin-dpo-ultrafeedback-4xh200-batch-128-20260423-040315 Source: Original Platform
7231 lines
255 KiB
JSON
7231 lines
255 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.9989528795811519,
|
|
"eval_steps": 200,
|
|
"global_step": 477,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0020942408376963353,
|
|
"grad_norm": 14.617609977722168,
|
|
"learning_rate": 0.0,
|
|
"logits/chosen": 2.203179359436035,
|
|
"logits/rejected": 2.035616397857666,
|
|
"logps/chosen": -257.4821472167969,
|
|
"logps/ref_chosen": -257.55841064453125,
|
|
"logps/ref_rejected": -199.84764099121094,
|
|
"logps/rejected": -199.93338012695312,
|
|
"loss": 5.5446,
|
|
"margin_dpo/margin_mean": 0.16199058294296265,
|
|
"margin_dpo/margin_std": 0.6907856464385986,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.004188481675392671,
|
|
"grad_norm": 15.140374183654785,
|
|
"learning_rate": 1.0416666666666666e-08,
|
|
"logits/chosen": 2.1704792976379395,
|
|
"logits/rejected": 2.0754430294036865,
|
|
"logps/chosen": -224.03538513183594,
|
|
"logps/ref_chosen": -224.12454223632812,
|
|
"logps/ref_rejected": -182.62721252441406,
|
|
"logps/rejected": -182.67271423339844,
|
|
"loss": 5.5417,
|
|
"margin_dpo/margin_mean": 0.13464844226837158,
|
|
"margin_dpo/margin_std": 0.5429617166519165,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.0062827225130890054,
|
|
"grad_norm": 14.625223159790039,
|
|
"learning_rate": 2.083333333333333e-08,
|
|
"logits/chosen": 2.4683523178100586,
|
|
"logits/rejected": 2.463977098464966,
|
|
"logps/chosen": -312.9666748046875,
|
|
"logps/ref_chosen": -312.8153991699219,
|
|
"logps/ref_rejected": -291.1138916015625,
|
|
"logps/rejected": -291.2332763671875,
|
|
"loss": 5.5426,
|
|
"margin_dpo/margin_mean": -0.03191244602203369,
|
|
"margin_dpo/margin_std": 0.6326964497566223,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.008376963350785341,
|
|
"grad_norm": 15.790285110473633,
|
|
"learning_rate": 3.125e-08,
|
|
"logits/chosen": 1.5894497632980347,
|
|
"logits/rejected": 1.4774465560913086,
|
|
"logps/chosen": -310.7625427246094,
|
|
"logps/ref_chosen": -310.8699645996094,
|
|
"logps/ref_rejected": -323.95556640625,
|
|
"logps/rejected": -323.9718933105469,
|
|
"loss": 5.5437,
|
|
"margin_dpo/margin_mean": 0.12377279996871948,
|
|
"margin_dpo/margin_std": 0.9771984815597534,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.010471204188481676,
|
|
"grad_norm": 15.793586730957031,
|
|
"learning_rate": 4.166666666666666e-08,
|
|
"logits/chosen": 1.5695815086364746,
|
|
"logits/rejected": 1.5709682703018188,
|
|
"logps/chosen": -303.8356628417969,
|
|
"logps/ref_chosen": -303.7280578613281,
|
|
"logps/ref_rejected": -262.055419921875,
|
|
"logps/rejected": -261.8935546875,
|
|
"loss": 5.548,
|
|
"margin_dpo/margin_mean": -0.26944446563720703,
|
|
"margin_dpo/margin_std": 0.66167151927948,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.012565445026178011,
|
|
"grad_norm": 15.511699676513672,
|
|
"learning_rate": 5.208333333333333e-08,
|
|
"logits/chosen": 2.0192410945892334,
|
|
"logits/rejected": 1.9741183519363403,
|
|
"logps/chosen": -252.2058563232422,
|
|
"logps/ref_chosen": -252.3014373779297,
|
|
"logps/ref_rejected": -214.40451049804688,
|
|
"logps/rejected": -214.4804229736328,
|
|
"loss": 5.5507,
|
|
"margin_dpo/margin_mean": 0.1714714765548706,
|
|
"margin_dpo/margin_std": 0.6865968108177185,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.014659685863874346,
|
|
"grad_norm": 15.63283634185791,
|
|
"learning_rate": 6.25e-08,
|
|
"logits/chosen": 2.191936492919922,
|
|
"logits/rejected": 2.0201575756073,
|
|
"logps/chosen": -248.16464233398438,
|
|
"logps/ref_chosen": -248.10345458984375,
|
|
"logps/ref_rejected": -204.55133056640625,
|
|
"logps/rejected": -204.63514709472656,
|
|
"loss": 5.5465,
|
|
"margin_dpo/margin_mean": 0.022650957107543945,
|
|
"margin_dpo/margin_std": 0.7195451855659485,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.016753926701570682,
|
|
"grad_norm": 15.911747932434082,
|
|
"learning_rate": 7.291666666666667e-08,
|
|
"logits/chosen": 2.4633631706237793,
|
|
"logits/rejected": 2.229030132293701,
|
|
"logps/chosen": -446.24395751953125,
|
|
"logps/ref_chosen": -446.1068115234375,
|
|
"logps/ref_rejected": -316.3032531738281,
|
|
"logps/rejected": -316.33001708984375,
|
|
"loss": 5.5447,
|
|
"margin_dpo/margin_mean": -0.11035525798797607,
|
|
"margin_dpo/margin_std": 0.8465025424957275,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.018848167539267015,
|
|
"grad_norm": 14.066997528076172,
|
|
"learning_rate": 8.333333333333333e-08,
|
|
"logits/chosen": 1.9973905086517334,
|
|
"logits/rejected": 1.8876209259033203,
|
|
"logps/chosen": -291.28857421875,
|
|
"logps/ref_chosen": -291.0896911621094,
|
|
"logps/ref_rejected": -298.3818054199219,
|
|
"logps/rejected": -298.3582763671875,
|
|
"loss": 5.5483,
|
|
"margin_dpo/margin_mean": -0.22240149974822998,
|
|
"margin_dpo/margin_std": 0.7139020562171936,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.020942408376963352,
|
|
"grad_norm": 14.026876449584961,
|
|
"learning_rate": 9.375e-08,
|
|
"logits/chosen": 1.6050350666046143,
|
|
"logits/rejected": 1.755211591720581,
|
|
"logps/chosen": -221.44143676757812,
|
|
"logps/ref_chosen": -221.42408752441406,
|
|
"logps/ref_rejected": -210.35684204101562,
|
|
"logps/rejected": -210.39434814453125,
|
|
"loss": 5.544,
|
|
"margin_dpo/margin_mean": 0.02016240358352661,
|
|
"margin_dpo/margin_std": 0.5195479989051819,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.023036649214659685,
|
|
"grad_norm": 15.404158592224121,
|
|
"learning_rate": 1.0416666666666667e-07,
|
|
"logits/chosen": 1.8669978380203247,
|
|
"logits/rejected": 1.7889609336853027,
|
|
"logps/chosen": -307.2198181152344,
|
|
"logps/ref_chosen": -307.2149658203125,
|
|
"logps/ref_rejected": -264.55902099609375,
|
|
"logps/rejected": -264.7065734863281,
|
|
"loss": 5.5427,
|
|
"margin_dpo/margin_mean": 0.1427026391029358,
|
|
"margin_dpo/margin_std": 0.9485504627227783,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.025130890052356022,
|
|
"grad_norm": 14.81792163848877,
|
|
"learning_rate": 1.1458333333333332e-07,
|
|
"logits/chosen": 1.494691014289856,
|
|
"logits/rejected": 1.6338729858398438,
|
|
"logps/chosen": -273.935302734375,
|
|
"logps/ref_chosen": -273.97259521484375,
|
|
"logps/ref_rejected": -312.4557189941406,
|
|
"logps/rejected": -312.26611328125,
|
|
"loss": 5.5513,
|
|
"margin_dpo/margin_mean": -0.15232467651367188,
|
|
"margin_dpo/margin_std": 0.7628190517425537,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.027225130890052355,
|
|
"grad_norm": 14.786741256713867,
|
|
"learning_rate": 1.25e-07,
|
|
"logits/chosen": 1.8189257383346558,
|
|
"logits/rejected": 1.8658004999160767,
|
|
"logps/chosen": -264.774658203125,
|
|
"logps/ref_chosen": -264.722412109375,
|
|
"logps/ref_rejected": -264.62823486328125,
|
|
"logps/rejected": -264.7838134765625,
|
|
"loss": 5.5457,
|
|
"margin_dpo/margin_mean": 0.10335606336593628,
|
|
"margin_dpo/margin_std": 0.7768966555595398,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.02931937172774869,
|
|
"grad_norm": 15.321511268615723,
|
|
"learning_rate": 1.3541666666666666e-07,
|
|
"logits/chosen": 1.8423357009887695,
|
|
"logits/rejected": 1.6009153127670288,
|
|
"logps/chosen": -357.5430603027344,
|
|
"logps/ref_chosen": -357.3697509765625,
|
|
"logps/ref_rejected": -231.3351287841797,
|
|
"logps/rejected": -231.34188842773438,
|
|
"loss": 5.5436,
|
|
"margin_dpo/margin_mean": -0.16655707359313965,
|
|
"margin_dpo/margin_std": 0.6755635738372803,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.031413612565445025,
|
|
"grad_norm": 16.096477508544922,
|
|
"learning_rate": 1.4583333333333335e-07,
|
|
"logits/chosen": 2.050579071044922,
|
|
"logits/rejected": 1.9528357982635498,
|
|
"logps/chosen": -282.3099670410156,
|
|
"logps/ref_chosen": -282.4208984375,
|
|
"logps/ref_rejected": -193.90872192382812,
|
|
"logps/rejected": -193.78834533691406,
|
|
"loss": 5.5457,
|
|
"margin_dpo/margin_mean": -0.009424567222595215,
|
|
"margin_dpo/margin_std": 0.5681266784667969,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.033507853403141365,
|
|
"grad_norm": 16.60857391357422,
|
|
"learning_rate": 1.5624999999999999e-07,
|
|
"logits/chosen": 2.2264082431793213,
|
|
"logits/rejected": 1.9722710847854614,
|
|
"logps/chosen": -291.3759460449219,
|
|
"logps/ref_chosen": -291.56591796875,
|
|
"logps/ref_rejected": -252.4170684814453,
|
|
"logps/rejected": -252.54373168945312,
|
|
"loss": 5.54,
|
|
"margin_dpo/margin_mean": 0.31664133071899414,
|
|
"margin_dpo/margin_std": 0.7804574370384216,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.0356020942408377,
|
|
"grad_norm": 15.15626049041748,
|
|
"learning_rate": 1.6666666666666665e-07,
|
|
"logits/chosen": 1.9703552722930908,
|
|
"logits/rejected": 1.9993352890014648,
|
|
"logps/chosen": -343.3455505371094,
|
|
"logps/ref_chosen": -343.4768981933594,
|
|
"logps/ref_rejected": -338.89654541015625,
|
|
"logps/rejected": -338.8592224121094,
|
|
"loss": 5.5409,
|
|
"margin_dpo/margin_mean": 0.09399676322937012,
|
|
"margin_dpo/margin_std": 0.5367782115936279,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.03769633507853403,
|
|
"grad_norm": 15.167213439941406,
|
|
"learning_rate": 1.7708333333333334e-07,
|
|
"logits/chosen": 1.8425214290618896,
|
|
"logits/rejected": 1.8331950902938843,
|
|
"logps/chosen": -213.01934814453125,
|
|
"logps/ref_chosen": -213.05694580078125,
|
|
"logps/ref_rejected": -211.70962524414062,
|
|
"logps/rejected": -211.76414489746094,
|
|
"loss": 5.5491,
|
|
"margin_dpo/margin_mean": 0.09212470054626465,
|
|
"margin_dpo/margin_std": 0.6411672234535217,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.039790575916230364,
|
|
"grad_norm": 14.854358673095703,
|
|
"learning_rate": 1.875e-07,
|
|
"logits/chosen": 2.0766916275024414,
|
|
"logits/rejected": 2.0941522121429443,
|
|
"logps/chosen": -240.00901794433594,
|
|
"logps/ref_chosen": -240.0670928955078,
|
|
"logps/ref_rejected": -246.15377807617188,
|
|
"logps/rejected": -246.24050903320312,
|
|
"loss": 5.5489,
|
|
"margin_dpo/margin_mean": 0.14478152990341187,
|
|
"margin_dpo/margin_std": 0.584217369556427,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.041884816753926704,
|
|
"grad_norm": 15.4912748336792,
|
|
"learning_rate": 1.9791666666666664e-07,
|
|
"logits/chosen": 2.1966586112976074,
|
|
"logits/rejected": 1.9358861446380615,
|
|
"logps/chosen": -315.5570983886719,
|
|
"logps/ref_chosen": -315.71331787109375,
|
|
"logps/ref_rejected": -230.0822296142578,
|
|
"logps/rejected": -230.0750732421875,
|
|
"loss": 5.5455,
|
|
"margin_dpo/margin_mean": 0.14912045001983643,
|
|
"margin_dpo/margin_std": 0.5315914750099182,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.04397905759162304,
|
|
"grad_norm": 15.429500579833984,
|
|
"learning_rate": 2.0833333333333333e-07,
|
|
"logits/chosen": 2.09773588180542,
|
|
"logits/rejected": 2.0702552795410156,
|
|
"logps/chosen": -279.3077697753906,
|
|
"logps/ref_chosen": -279.2261657714844,
|
|
"logps/ref_rejected": -300.1985168457031,
|
|
"logps/rejected": -300.22119140625,
|
|
"loss": 5.5468,
|
|
"margin_dpo/margin_mean": -0.05891883373260498,
|
|
"margin_dpo/margin_std": 0.8325001001358032,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.04607329842931937,
|
|
"grad_norm": 13.734630584716797,
|
|
"learning_rate": 2.1875e-07,
|
|
"logits/chosen": 1.8216187953948975,
|
|
"logits/rejected": 1.9799120426177979,
|
|
"logps/chosen": -225.4229736328125,
|
|
"logps/ref_chosen": -225.4801788330078,
|
|
"logps/ref_rejected": -236.63134765625,
|
|
"logps/rejected": -236.60411071777344,
|
|
"loss": 5.5409,
|
|
"margin_dpo/margin_mean": 0.029949307441711426,
|
|
"margin_dpo/margin_std": 0.5145200490951538,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.048167539267015703,
|
|
"grad_norm": 15.402115821838379,
|
|
"learning_rate": 2.2916666666666663e-07,
|
|
"logits/chosen": 1.9867033958435059,
|
|
"logits/rejected": 1.8609161376953125,
|
|
"logps/chosen": -340.4596862792969,
|
|
"logps/ref_chosen": -340.510986328125,
|
|
"logps/ref_rejected": -273.1431579589844,
|
|
"logps/rejected": -273.184814453125,
|
|
"loss": 5.5456,
|
|
"margin_dpo/margin_mean": 0.09300780296325684,
|
|
"margin_dpo/margin_std": 0.5188795924186707,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.050261780104712044,
|
|
"grad_norm": 16.485750198364258,
|
|
"learning_rate": 2.3958333333333335e-07,
|
|
"logits/chosen": 1.7313284873962402,
|
|
"logits/rejected": 1.6817138195037842,
|
|
"logps/chosen": -274.0079040527344,
|
|
"logps/ref_chosen": -273.9709777832031,
|
|
"logps/ref_rejected": -269.8603210449219,
|
|
"logps/rejected": -269.9830017089844,
|
|
"loss": 5.5462,
|
|
"margin_dpo/margin_mean": 0.08572280406951904,
|
|
"margin_dpo/margin_std": 0.4962030053138733,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.05235602094240838,
|
|
"grad_norm": 14.515819549560547,
|
|
"learning_rate": 2.5e-07,
|
|
"logits/chosen": 1.7567241191864014,
|
|
"logits/rejected": 1.772882342338562,
|
|
"logps/chosen": -245.420654296875,
|
|
"logps/ref_chosen": -245.38388061523438,
|
|
"logps/ref_rejected": -251.77703857421875,
|
|
"logps/rejected": -251.8808135986328,
|
|
"loss": 5.5402,
|
|
"margin_dpo/margin_mean": 0.06698936223983765,
|
|
"margin_dpo/margin_std": 0.7465457916259766,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.05445026178010471,
|
|
"grad_norm": 15.561816215515137,
|
|
"learning_rate": 2.604166666666667e-07,
|
|
"logits/chosen": 1.6602405309677124,
|
|
"logits/rejected": 1.611204743385315,
|
|
"logps/chosen": -245.07839965820312,
|
|
"logps/ref_chosen": -245.162109375,
|
|
"logps/ref_rejected": -167.06671142578125,
|
|
"logps/rejected": -166.95631408691406,
|
|
"loss": 5.5441,
|
|
"margin_dpo/margin_mean": -0.026699483394622803,
|
|
"margin_dpo/margin_std": 0.7909866571426392,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.05654450261780105,
|
|
"grad_norm": 15.185941696166992,
|
|
"learning_rate": 2.708333333333333e-07,
|
|
"logits/chosen": 2.148705244064331,
|
|
"logits/rejected": 1.9048577547073364,
|
|
"logps/chosen": -309.2626037597656,
|
|
"logps/ref_chosen": -309.4706115722656,
|
|
"logps/ref_rejected": -200.16006469726562,
|
|
"logps/rejected": -200.23269653320312,
|
|
"loss": 5.5469,
|
|
"margin_dpo/margin_mean": 0.2806363105773926,
|
|
"margin_dpo/margin_std": 0.6260923147201538,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.05863874345549738,
|
|
"grad_norm": 15.434507369995117,
|
|
"learning_rate": 2.8125e-07,
|
|
"logits/chosen": 1.9996970891952515,
|
|
"logits/rejected": 2.1089255809783936,
|
|
"logps/chosen": -203.73443603515625,
|
|
"logps/ref_chosen": -203.72039794921875,
|
|
"logps/ref_rejected": -228.1062469482422,
|
|
"logps/rejected": -228.02944946289062,
|
|
"loss": 5.5409,
|
|
"margin_dpo/margin_mean": -0.09086447954177856,
|
|
"margin_dpo/margin_std": 0.3806726932525635,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.060732984293193716,
|
|
"grad_norm": 14.699873924255371,
|
|
"learning_rate": 2.916666666666667e-07,
|
|
"logits/chosen": 2.243607997894287,
|
|
"logits/rejected": 1.9699711799621582,
|
|
"logps/chosen": -341.47991943359375,
|
|
"logps/ref_chosen": -341.7933349609375,
|
|
"logps/ref_rejected": -323.7848815917969,
|
|
"logps/rejected": -323.83416748046875,
|
|
"loss": 5.5414,
|
|
"margin_dpo/margin_mean": 0.3627087473869324,
|
|
"margin_dpo/margin_std": 0.9482086896896362,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.06282722513089005,
|
|
"grad_norm": 14.436098098754883,
|
|
"learning_rate": 3.020833333333333e-07,
|
|
"logits/chosen": 1.4743300676345825,
|
|
"logits/rejected": 1.4441381692886353,
|
|
"logps/chosen": -239.34152221679688,
|
|
"logps/ref_chosen": -239.4767303466797,
|
|
"logps/ref_rejected": -228.0832977294922,
|
|
"logps/rejected": -228.0165252685547,
|
|
"loss": 5.5418,
|
|
"margin_dpo/margin_mean": 0.06841355562210083,
|
|
"margin_dpo/margin_std": 0.7110106348991394,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.06492146596858639,
|
|
"grad_norm": 13.857452392578125,
|
|
"learning_rate": 3.1249999999999997e-07,
|
|
"logits/chosen": 1.6719400882720947,
|
|
"logits/rejected": 1.52069091796875,
|
|
"logps/chosen": -268.8196105957031,
|
|
"logps/ref_chosen": -268.9744567871094,
|
|
"logps/ref_rejected": -221.5098114013672,
|
|
"logps/rejected": -221.68231201171875,
|
|
"loss": 5.5392,
|
|
"margin_dpo/margin_mean": 0.3273264765739441,
|
|
"margin_dpo/margin_std": 0.8021472692489624,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.06701570680628273,
|
|
"grad_norm": 15.621495246887207,
|
|
"learning_rate": 3.2291666666666666e-07,
|
|
"logits/chosen": 1.6164491176605225,
|
|
"logits/rejected": 1.4590275287628174,
|
|
"logps/chosen": -236.6236572265625,
|
|
"logps/ref_chosen": -236.76123046875,
|
|
"logps/ref_rejected": -191.0041046142578,
|
|
"logps/rejected": -190.91786193847656,
|
|
"loss": 5.5383,
|
|
"margin_dpo/margin_mean": 0.051319420337677,
|
|
"margin_dpo/margin_std": 0.562064528465271,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.06910994764397906,
|
|
"grad_norm": 14.935791015625,
|
|
"learning_rate": 3.333333333333333e-07,
|
|
"logits/chosen": 1.937072515487671,
|
|
"logits/rejected": 1.866725206375122,
|
|
"logps/chosen": -258.4335021972656,
|
|
"logps/ref_chosen": -258.6623840332031,
|
|
"logps/ref_rejected": -233.15805053710938,
|
|
"logps/rejected": -233.19522094726562,
|
|
"loss": 5.5401,
|
|
"margin_dpo/margin_mean": 0.26607221364974976,
|
|
"margin_dpo/margin_std": 0.9210672974586487,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.0712041884816754,
|
|
"grad_norm": 17.70219612121582,
|
|
"learning_rate": 3.4375e-07,
|
|
"logits/chosen": 2.076815128326416,
|
|
"logits/rejected": 2.0185177326202393,
|
|
"logps/chosen": -380.03729248046875,
|
|
"logps/ref_chosen": -380.25201416015625,
|
|
"logps/ref_rejected": -315.8236389160156,
|
|
"logps/rejected": -315.7915954589844,
|
|
"loss": 5.5395,
|
|
"margin_dpo/margin_mean": 0.18271714448928833,
|
|
"margin_dpo/margin_std": 0.7701175212860107,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.07329842931937172,
|
|
"grad_norm": 13.645162582397461,
|
|
"learning_rate": 3.541666666666667e-07,
|
|
"logits/chosen": 1.5646406412124634,
|
|
"logits/rejected": 1.7504596710205078,
|
|
"logps/chosen": -245.80335998535156,
|
|
"logps/ref_chosen": -246.0772705078125,
|
|
"logps/ref_rejected": -317.1019592285156,
|
|
"logps/rejected": -317.00274658203125,
|
|
"loss": 5.54,
|
|
"margin_dpo/margin_mean": 0.17473018169403076,
|
|
"margin_dpo/margin_std": 0.7614114284515381,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.07539267015706806,
|
|
"grad_norm": 17.520965576171875,
|
|
"learning_rate": 3.645833333333333e-07,
|
|
"logits/chosen": 1.7731884717941284,
|
|
"logits/rejected": 1.8305914402008057,
|
|
"logps/chosen": -343.9805908203125,
|
|
"logps/ref_chosen": -344.1368408203125,
|
|
"logps/ref_rejected": -343.6894836425781,
|
|
"logps/rejected": -343.47882080078125,
|
|
"loss": 5.5342,
|
|
"margin_dpo/margin_mean": -0.05438530445098877,
|
|
"margin_dpo/margin_std": 0.7112289071083069,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.0774869109947644,
|
|
"grad_norm": 15.14476203918457,
|
|
"learning_rate": 3.75e-07,
|
|
"logits/chosen": 1.9591785669326782,
|
|
"logits/rejected": 1.9149752855300903,
|
|
"logps/chosen": -310.9266357421875,
|
|
"logps/ref_chosen": -311.3376770019531,
|
|
"logps/ref_rejected": -278.5052185058594,
|
|
"logps/rejected": -278.489990234375,
|
|
"loss": 5.5375,
|
|
"margin_dpo/margin_mean": 0.3958609700202942,
|
|
"margin_dpo/margin_std": 0.6456325054168701,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.07958115183246073,
|
|
"grad_norm": 15.079659461975098,
|
|
"learning_rate": 3.8541666666666665e-07,
|
|
"logits/chosen": 2.1111977100372314,
|
|
"logits/rejected": 2.3584368228912354,
|
|
"logps/chosen": -193.07827758789062,
|
|
"logps/ref_chosen": -193.3851318359375,
|
|
"logps/ref_rejected": -234.6280975341797,
|
|
"logps/rejected": -234.42193603515625,
|
|
"loss": 5.5401,
|
|
"margin_dpo/margin_mean": 0.10068202018737793,
|
|
"margin_dpo/margin_std": 0.5997118353843689,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.08167539267015707,
|
|
"grad_norm": 15.749566078186035,
|
|
"learning_rate": 3.958333333333333e-07,
|
|
"logits/chosen": 1.7943568229675293,
|
|
"logits/rejected": 1.8780990839004517,
|
|
"logps/chosen": -290.79742431640625,
|
|
"logps/ref_chosen": -291.5687255859375,
|
|
"logps/ref_rejected": -317.7392578125,
|
|
"logps/rejected": -317.748779296875,
|
|
"loss": 5.5255,
|
|
"margin_dpo/margin_mean": 0.7807860374450684,
|
|
"margin_dpo/margin_std": 1.0324419736862183,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.08376963350785341,
|
|
"grad_norm": 15.053966522216797,
|
|
"learning_rate": 4.0625e-07,
|
|
"logits/chosen": 1.7152007818222046,
|
|
"logits/rejected": 1.685285210609436,
|
|
"logps/chosen": -211.45947265625,
|
|
"logps/ref_chosen": -211.951904296875,
|
|
"logps/ref_rejected": -166.82864379882812,
|
|
"logps/rejected": -166.58428955078125,
|
|
"loss": 5.528,
|
|
"margin_dpo/margin_mean": 0.24808716773986816,
|
|
"margin_dpo/margin_std": 0.9657536745071411,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.08586387434554973,
|
|
"grad_norm": 15.568541526794434,
|
|
"learning_rate": 4.1666666666666667e-07,
|
|
"logits/chosen": 1.968687653541565,
|
|
"logits/rejected": 1.8429195880889893,
|
|
"logps/chosen": -300.13665771484375,
|
|
"logps/ref_chosen": -300.6400146484375,
|
|
"logps/ref_rejected": -224.77317810058594,
|
|
"logps/rejected": -224.72613525390625,
|
|
"loss": 5.535,
|
|
"margin_dpo/margin_mean": 0.45627307891845703,
|
|
"margin_dpo/margin_std": 0.7364793419837952,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.08795811518324607,
|
|
"grad_norm": 14.581147193908691,
|
|
"learning_rate": 4.270833333333333e-07,
|
|
"logits/chosen": 2.1621668338775635,
|
|
"logits/rejected": 2.195481061935425,
|
|
"logps/chosen": -291.0929870605469,
|
|
"logps/ref_chosen": -291.4709167480469,
|
|
"logps/ref_rejected": -285.62982177734375,
|
|
"logps/rejected": -285.6851501464844,
|
|
"loss": 5.5294,
|
|
"margin_dpo/margin_mean": 0.43321943283081055,
|
|
"margin_dpo/margin_std": 1.0551120042800903,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.09005235602094241,
|
|
"grad_norm": 15.62813663482666,
|
|
"learning_rate": 4.375e-07,
|
|
"logits/chosen": 1.9382034540176392,
|
|
"logits/rejected": 1.9245309829711914,
|
|
"logps/chosen": -313.7782897949219,
|
|
"logps/ref_chosen": -314.3768615722656,
|
|
"logps/ref_rejected": -246.80313110351562,
|
|
"logps/rejected": -246.7808380126953,
|
|
"loss": 5.5273,
|
|
"margin_dpo/margin_mean": 0.5762431621551514,
|
|
"margin_dpo/margin_std": 0.9067457914352417,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.09214659685863874,
|
|
"grad_norm": 15.793681144714355,
|
|
"learning_rate": 4.479166666666667e-07,
|
|
"logits/chosen": 1.7762880325317383,
|
|
"logits/rejected": 1.7065317630767822,
|
|
"logps/chosen": -209.00802612304688,
|
|
"logps/ref_chosen": -209.8181915283203,
|
|
"logps/ref_rejected": -246.21340942382812,
|
|
"logps/rejected": -246.0368194580078,
|
|
"loss": 5.5223,
|
|
"margin_dpo/margin_mean": 0.633570671081543,
|
|
"margin_dpo/margin_std": 1.1882718801498413,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.09424083769633508,
|
|
"grad_norm": 16.929059982299805,
|
|
"learning_rate": 4.5833333333333327e-07,
|
|
"logits/chosen": 1.7606732845306396,
|
|
"logits/rejected": 1.5792968273162842,
|
|
"logps/chosen": -308.1605224609375,
|
|
"logps/ref_chosen": -309.0930480957031,
|
|
"logps/ref_rejected": -269.3559265136719,
|
|
"logps/rejected": -268.9593200683594,
|
|
"loss": 5.5261,
|
|
"margin_dpo/margin_mean": 0.535961389541626,
|
|
"margin_dpo/margin_std": 1.0746005773544312,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.09633507853403141,
|
|
"grad_norm": 16.489280700683594,
|
|
"learning_rate": 4.6874999999999996e-07,
|
|
"logits/chosen": 1.9223171472549438,
|
|
"logits/rejected": 1.9758403301239014,
|
|
"logps/chosen": -298.0412292480469,
|
|
"logps/ref_chosen": -298.72467041015625,
|
|
"logps/ref_rejected": -309.87786865234375,
|
|
"logps/rejected": -309.4717712402344,
|
|
"loss": 5.5236,
|
|
"margin_dpo/margin_mean": 0.277274489402771,
|
|
"margin_dpo/margin_std": 0.9340643882751465,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.09842931937172775,
|
|
"grad_norm": 13.506661415100098,
|
|
"learning_rate": 4.791666666666667e-07,
|
|
"logits/chosen": 1.6691988706588745,
|
|
"logits/rejected": 2.0380465984344482,
|
|
"logps/chosen": -215.84332275390625,
|
|
"logps/ref_chosen": -216.43553161621094,
|
|
"logps/ref_rejected": -292.6329345703125,
|
|
"logps/rejected": -291.96148681640625,
|
|
"loss": 5.5293,
|
|
"margin_dpo/margin_mean": -0.0792464017868042,
|
|
"margin_dpo/margin_std": 1.1294535398483276,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.10052356020942409,
|
|
"grad_norm": 14.86147403717041,
|
|
"learning_rate": 4.895833333333333e-07,
|
|
"logits/chosen": 2.211613178253174,
|
|
"logits/rejected": 2.186110496520996,
|
|
"logps/chosen": -234.05947875976562,
|
|
"logps/ref_chosen": -234.77496337890625,
|
|
"logps/ref_rejected": -240.41433715820312,
|
|
"logps/rejected": -240.24525451660156,
|
|
"loss": 5.5203,
|
|
"margin_dpo/margin_mean": 0.5463833212852478,
|
|
"margin_dpo/margin_std": 1.3220971822738647,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.10261780104712041,
|
|
"grad_norm": 15.43526840209961,
|
|
"learning_rate": 5e-07,
|
|
"logits/chosen": 1.7962108850479126,
|
|
"logits/rejected": 1.9277849197387695,
|
|
"logps/chosen": -245.73326110839844,
|
|
"logps/ref_chosen": -246.7688446044922,
|
|
"logps/ref_rejected": -253.47378540039062,
|
|
"logps/rejected": -253.3438720703125,
|
|
"loss": 5.5203,
|
|
"margin_dpo/margin_mean": 0.9057276248931885,
|
|
"margin_dpo/margin_std": 1.4221221208572388,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.10471204188481675,
|
|
"grad_norm": 15.111068725585938,
|
|
"learning_rate": 4.999932966293553e-07,
|
|
"logits/chosen": 2.2119665145874023,
|
|
"logits/rejected": 2.335810422897339,
|
|
"logps/chosen": -281.3116760253906,
|
|
"logps/ref_chosen": -282.61981201171875,
|
|
"logps/ref_rejected": -340.8515625,
|
|
"logps/rejected": -340.31781005859375,
|
|
"loss": 5.5284,
|
|
"margin_dpo/margin_mean": 0.7743173837661743,
|
|
"margin_dpo/margin_std": 1.4886585474014282,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.1068062827225131,
|
|
"grad_norm": 14.817649841308594,
|
|
"learning_rate": 4.999731868769026e-07,
|
|
"logits/chosen": 1.637377381324768,
|
|
"logits/rejected": 1.7862030267715454,
|
|
"logps/chosen": -244.794677734375,
|
|
"logps/ref_chosen": -245.87562561035156,
|
|
"logps/ref_rejected": -309.7420654296875,
|
|
"logps/rejected": -309.6230773925781,
|
|
"loss": 5.5202,
|
|
"margin_dpo/margin_mean": 0.961925208568573,
|
|
"margin_dpo/margin_std": 1.9343087673187256,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.10890052356020942,
|
|
"grad_norm": 17.035507202148438,
|
|
"learning_rate": 4.99939671821067e-07,
|
|
"logits/chosen": 1.8847734928131104,
|
|
"logits/rejected": 2.039155960083008,
|
|
"logps/chosen": -276.9980163574219,
|
|
"logps/ref_chosen": -278.3123474121094,
|
|
"logps/ref_rejected": -320.58203125,
|
|
"logps/rejected": -319.9336853027344,
|
|
"loss": 5.5067,
|
|
"margin_dpo/margin_mean": 0.6659917235374451,
|
|
"margin_dpo/margin_std": 1.508874773979187,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.11099476439790576,
|
|
"grad_norm": 15.945631980895996,
|
|
"learning_rate": 4.998927532591591e-07,
|
|
"logits/chosen": 2.085860013961792,
|
|
"logits/rejected": 2.0801711082458496,
|
|
"logps/chosen": -331.2710266113281,
|
|
"logps/ref_chosen": -332.776123046875,
|
|
"logps/ref_rejected": -325.1794128417969,
|
|
"logps/rejected": -324.69622802734375,
|
|
"loss": 5.5144,
|
|
"margin_dpo/margin_mean": 1.0218517780303955,
|
|
"margin_dpo/margin_std": 1.590319037437439,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.1130890052356021,
|
|
"grad_norm": 14.57484245300293,
|
|
"learning_rate": 4.998324337072792e-07,
|
|
"logits/chosen": 1.3913365602493286,
|
|
"logits/rejected": 1.4456019401550293,
|
|
"logps/chosen": -294.7577819824219,
|
|
"logps/ref_chosen": -296.2243347167969,
|
|
"logps/ref_rejected": -267.64251708984375,
|
|
"logps/rejected": -267.3682861328125,
|
|
"loss": 5.5131,
|
|
"margin_dpo/margin_mean": 1.1922770738601685,
|
|
"margin_dpo/margin_std": 1.47157621383667,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.11518324607329843,
|
|
"grad_norm": 12.808218955993652,
|
|
"learning_rate": 4.997587164001815e-07,
|
|
"logits/chosen": 1.8794647455215454,
|
|
"logits/rejected": 1.8777508735656738,
|
|
"logps/chosen": -197.05091857910156,
|
|
"logps/ref_chosen": -198.1138916015625,
|
|
"logps/ref_rejected": -185.93772888183594,
|
|
"logps/rejected": -185.5297088623047,
|
|
"loss": 5.522,
|
|
"margin_dpo/margin_mean": 0.6549429893493652,
|
|
"margin_dpo/margin_std": 1.0751301050186157,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.11727748691099477,
|
|
"grad_norm": 14.403154373168945,
|
|
"learning_rate": 4.996716052911017e-07,
|
|
"logits/chosen": 2.004265785217285,
|
|
"logits/rejected": 1.965603232383728,
|
|
"logps/chosen": -267.2569580078125,
|
|
"logps/ref_chosen": -268.8618469238281,
|
|
"logps/ref_rejected": -245.21348571777344,
|
|
"logps/rejected": -244.97555541992188,
|
|
"loss": 5.5085,
|
|
"margin_dpo/margin_mean": 1.3669579029083252,
|
|
"margin_dpo/margin_std": 1.768923282623291,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.1193717277486911,
|
|
"grad_norm": 17.60223960876465,
|
|
"learning_rate": 4.99571105051544e-07,
|
|
"logits/chosen": 2.1416828632354736,
|
|
"logits/rejected": 1.8643951416015625,
|
|
"logps/chosen": -286.81622314453125,
|
|
"logps/ref_chosen": -288.4784851074219,
|
|
"logps/ref_rejected": -239.400146484375,
|
|
"logps/rejected": -238.46566772460938,
|
|
"loss": 5.4919,
|
|
"margin_dpo/margin_mean": 0.7277634739875793,
|
|
"margin_dpo/margin_std": 1.487809658050537,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.12146596858638743,
|
|
"grad_norm": 14.885680198669434,
|
|
"learning_rate": 4.994572210710314e-07,
|
|
"logits/chosen": 1.8542314767837524,
|
|
"logits/rejected": 1.8795952796936035,
|
|
"logps/chosen": -276.6270446777344,
|
|
"logps/ref_chosen": -278.2837219238281,
|
|
"logps/ref_rejected": -262.5280456542969,
|
|
"logps/rejected": -262.4251708984375,
|
|
"loss": 5.5076,
|
|
"margin_dpo/margin_mean": 1.5538146495819092,
|
|
"margin_dpo/margin_std": 1.7451914548873901,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.12356020942408377,
|
|
"grad_norm": 15.256675720214844,
|
|
"learning_rate": 4.993299594568162e-07,
|
|
"logits/chosen": 1.695129632949829,
|
|
"logits/rejected": 1.76768159866333,
|
|
"logps/chosen": -231.92245483398438,
|
|
"logps/ref_chosen": -232.77662658691406,
|
|
"logps/ref_rejected": -226.2711181640625,
|
|
"logps/rejected": -225.69354248046875,
|
|
"loss": 5.5103,
|
|
"margin_dpo/margin_mean": 0.27659308910369873,
|
|
"margin_dpo/margin_std": 1.5459599494934082,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.1256544502617801,
|
|
"grad_norm": 14.66773509979248,
|
|
"learning_rate": 4.991893270335525e-07,
|
|
"logits/chosen": 1.8791348934173584,
|
|
"logits/rejected": 1.587320327758789,
|
|
"logps/chosen": -314.26800537109375,
|
|
"logps/ref_chosen": -315.6903991699219,
|
|
"logps/ref_rejected": -190.40899658203125,
|
|
"logps/rejected": -189.97193908691406,
|
|
"loss": 5.4951,
|
|
"margin_dpo/margin_mean": 0.9853799939155579,
|
|
"margin_dpo/margin_std": 1.7359843254089355,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.12774869109947645,
|
|
"grad_norm": 15.011839866638184,
|
|
"learning_rate": 4.990353313429303e-07,
|
|
"logits/chosen": 2.034388542175293,
|
|
"logits/rejected": 2.065513849258423,
|
|
"logps/chosen": -249.50173950195312,
|
|
"logps/ref_chosen": -251.527099609375,
|
|
"logps/ref_rejected": -261.0340270996094,
|
|
"logps/rejected": -259.8365173339844,
|
|
"loss": 5.503,
|
|
"margin_dpo/margin_mean": 0.8278936147689819,
|
|
"margin_dpo/margin_std": 1.6765094995498657,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.12984293193717278,
|
|
"grad_norm": 14.675267219543457,
|
|
"learning_rate": 4.988679806432711e-07,
|
|
"logits/chosen": 1.8862786293029785,
|
|
"logits/rejected": 1.8161289691925049,
|
|
"logps/chosen": -255.54144287109375,
|
|
"logps/ref_chosen": -257.3919982910156,
|
|
"logps/ref_rejected": -282.1814880371094,
|
|
"logps/rejected": -281.52008056640625,
|
|
"loss": 5.4967,
|
|
"margin_dpo/margin_mean": 1.189134955406189,
|
|
"margin_dpo/margin_std": 2.0515801906585693,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.1319371727748691,
|
|
"grad_norm": 14.802937507629395,
|
|
"learning_rate": 4.986872839090852e-07,
|
|
"logits/chosen": 2.1043763160705566,
|
|
"logits/rejected": 2.2130091190338135,
|
|
"logps/chosen": -320.39398193359375,
|
|
"logps/ref_chosen": -322.24725341796875,
|
|
"logps/ref_rejected": -327.70892333984375,
|
|
"logps/rejected": -326.7320251464844,
|
|
"loss": 5.4938,
|
|
"margin_dpo/margin_mean": 0.876427948474884,
|
|
"margin_dpo/margin_std": 2.8320491313934326,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.13403141361256546,
|
|
"grad_norm": 15.675727844238281,
|
|
"learning_rate": 4.9849325083059e-07,
|
|
"logits/chosen": 1.840427279472351,
|
|
"logits/rejected": 2.098795175552368,
|
|
"logps/chosen": -333.1174621582031,
|
|
"logps/ref_chosen": -335.7379455566406,
|
|
"logps/ref_rejected": -337.8742980957031,
|
|
"logps/rejected": -337.04913330078125,
|
|
"loss": 5.4794,
|
|
"margin_dpo/margin_mean": 1.795319676399231,
|
|
"margin_dpo/margin_std": 3.2046210765838623,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.13612565445026178,
|
|
"grad_norm": 14.844298362731934,
|
|
"learning_rate": 4.982858918131906e-07,
|
|
"logits/chosen": 1.8770238161087036,
|
|
"logits/rejected": 1.9210056066513062,
|
|
"logps/chosen": -309.9880676269531,
|
|
"logps/ref_chosen": -312.36358642578125,
|
|
"logps/ref_rejected": -300.220947265625,
|
|
"logps/rejected": -298.6002502441406,
|
|
"loss": 5.5002,
|
|
"margin_dpo/margin_mean": 0.754831075668335,
|
|
"margin_dpo/margin_std": 2.1830434799194336,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.1382198952879581,
|
|
"grad_norm": 14.79240894317627,
|
|
"learning_rate": 4.980652179769217e-07,
|
|
"logits/chosen": 1.8671329021453857,
|
|
"logits/rejected": 2.058912515640259,
|
|
"logps/chosen": -195.7026824951172,
|
|
"logps/ref_chosen": -198.186767578125,
|
|
"logps/ref_rejected": -248.18748474121094,
|
|
"logps/rejected": -247.15882873535156,
|
|
"loss": 5.4918,
|
|
"margin_dpo/margin_mean": 1.4554123878479004,
|
|
"margin_dpo/margin_std": 2.3160204887390137,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.14031413612565444,
|
|
"grad_norm": 14.211318969726562,
|
|
"learning_rate": 4.978312411558517e-07,
|
|
"logits/chosen": 2.104246139526367,
|
|
"logits/rejected": 2.138582706451416,
|
|
"logps/chosen": -289.1455078125,
|
|
"logps/ref_chosen": -291.9940490722656,
|
|
"logps/ref_rejected": -269.945068359375,
|
|
"logps/rejected": -268.6217346191406,
|
|
"loss": 5.4991,
|
|
"margin_dpo/margin_mean": 1.5252022743225098,
|
|
"margin_dpo/margin_std": 2.973376989364624,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.1424083769633508,
|
|
"grad_norm": 14.742173194885254,
|
|
"learning_rate": 4.975839738974473e-07,
|
|
"logits/chosen": 1.597095012664795,
|
|
"logits/rejected": 1.4467945098876953,
|
|
"logps/chosen": -287.52056884765625,
|
|
"logps/ref_chosen": -289.9323425292969,
|
|
"logps/ref_rejected": -225.7897491455078,
|
|
"logps/rejected": -225.21658325195312,
|
|
"loss": 5.4771,
|
|
"margin_dpo/margin_mean": 1.8386340141296387,
|
|
"margin_dpo/margin_std": 3.168964385986328,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.14450261780104712,
|
|
"grad_norm": 14.997187614440918,
|
|
"learning_rate": 4.97323429461901e-07,
|
|
"logits/chosen": 2.1530022621154785,
|
|
"logits/rejected": 2.05501651763916,
|
|
"logps/chosen": -263.3707580566406,
|
|
"logps/ref_chosen": -266.7104797363281,
|
|
"logps/ref_rejected": -229.5946502685547,
|
|
"logps/rejected": -228.25326538085938,
|
|
"loss": 5.4597,
|
|
"margin_dpo/margin_mean": 1.9983257055282593,
|
|
"margin_dpo/margin_std": 3.47263503074646,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.14659685863874344,
|
|
"grad_norm": 15.571727752685547,
|
|
"learning_rate": 4.970496218214204e-07,
|
|
"logits/chosen": 2.235600471496582,
|
|
"logits/rejected": 2.3245067596435547,
|
|
"logps/chosen": -265.7595520019531,
|
|
"logps/ref_chosen": -268.6711120605469,
|
|
"logps/ref_rejected": -261.61273193359375,
|
|
"logps/rejected": -260.5652160644531,
|
|
"loss": 5.4636,
|
|
"margin_dpo/margin_mean": 1.864060878753662,
|
|
"margin_dpo/margin_std": 2.7824549674987793,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.1486910994764398,
|
|
"grad_norm": 14.973682403564453,
|
|
"learning_rate": 4.967625656594781e-07,
|
|
"logits/chosen": 1.9091215133666992,
|
|
"logits/rejected": 1.9661266803741455,
|
|
"logps/chosen": -241.69586181640625,
|
|
"logps/ref_chosen": -244.97821044921875,
|
|
"logps/ref_rejected": -263.7174377441406,
|
|
"logps/rejected": -262.50848388671875,
|
|
"loss": 5.469,
|
|
"margin_dpo/margin_mean": 2.0734434127807617,
|
|
"margin_dpo/margin_std": 4.373683929443359,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.15078534031413612,
|
|
"grad_norm": 14.649141311645508,
|
|
"learning_rate": 4.964622763700252e-07,
|
|
"logits/chosen": 1.8277561664581299,
|
|
"logits/rejected": 1.8917427062988281,
|
|
"logps/chosen": -276.90264892578125,
|
|
"logps/ref_chosen": -280.0353698730469,
|
|
"logps/ref_rejected": -291.1289367675781,
|
|
"logps/rejected": -289.7200927734375,
|
|
"loss": 5.4668,
|
|
"margin_dpo/margin_mean": 1.7238655090332031,
|
|
"margin_dpo/margin_std": 3.216670274734497,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.15287958115183245,
|
|
"grad_norm": 14.681085586547852,
|
|
"learning_rate": 4.961487700566646e-07,
|
|
"logits/chosen": 2.040257453918457,
|
|
"logits/rejected": 2.0101261138916016,
|
|
"logps/chosen": -237.6248321533203,
|
|
"logps/ref_chosen": -241.37384033203125,
|
|
"logps/ref_rejected": -227.28871154785156,
|
|
"logps/rejected": -224.7103271484375,
|
|
"loss": 5.4684,
|
|
"margin_dpo/margin_mean": 1.1706353425979614,
|
|
"margin_dpo/margin_std": 2.684138774871826,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.1549738219895288,
|
|
"grad_norm": 16.057296752929688,
|
|
"learning_rate": 4.958220635317885e-07,
|
|
"logits/chosen": 1.7149076461791992,
|
|
"logits/rejected": 1.616335153579712,
|
|
"logps/chosen": -427.9046630859375,
|
|
"logps/ref_chosen": -432.6361389160156,
|
|
"logps/ref_rejected": -408.990478515625,
|
|
"logps/rejected": -406.4610595703125,
|
|
"loss": 5.4703,
|
|
"margin_dpo/margin_mean": 2.2020528316497803,
|
|
"margin_dpo/margin_std": 3.4136807918548584,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.15706806282722513,
|
|
"grad_norm": 15.3256254196167,
|
|
"learning_rate": 4.954821743156767e-07,
|
|
"logits/chosen": 1.8307483196258545,
|
|
"logits/rejected": 1.8694071769714355,
|
|
"logps/chosen": -277.4227294921875,
|
|
"logps/ref_chosen": -282.2913513183594,
|
|
"logps/ref_rejected": -227.30093383789062,
|
|
"logps/rejected": -225.89971923828125,
|
|
"loss": 5.4384,
|
|
"margin_dpo/margin_mean": 3.4673845767974854,
|
|
"margin_dpo/margin_std": 3.8357293605804443,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.15916230366492146,
|
|
"grad_norm": 16.74871253967285,
|
|
"learning_rate": 4.951291206355559e-07,
|
|
"logits/chosen": 1.9061857461929321,
|
|
"logits/rejected": 1.6594858169555664,
|
|
"logps/chosen": -272.63018798828125,
|
|
"logps/ref_chosen": -277.90081787109375,
|
|
"logps/ref_rejected": -214.1353302001953,
|
|
"logps/rejected": -211.89590454101562,
|
|
"loss": 5.431,
|
|
"margin_dpo/margin_mean": 3.031224250793457,
|
|
"margin_dpo/margin_std": 3.183443546295166,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.1612565445026178,
|
|
"grad_norm": 18.260398864746094,
|
|
"learning_rate": 4.947629214246236e-07,
|
|
"logits/chosen": 2.142491102218628,
|
|
"logits/rejected": 2.1160454750061035,
|
|
"logps/chosen": -278.9680480957031,
|
|
"logps/ref_chosen": -283.3741455078125,
|
|
"logps/ref_rejected": -239.51246643066406,
|
|
"logps/rejected": -237.45001220703125,
|
|
"loss": 5.4527,
|
|
"margin_dpo/margin_mean": 2.3436222076416016,
|
|
"margin_dpo/margin_std": 3.3090319633483887,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.16335078534031414,
|
|
"grad_norm": 14.633062362670898,
|
|
"learning_rate": 4.943835963210323e-07,
|
|
"logits/chosen": 1.6990811824798584,
|
|
"logits/rejected": 1.6937521696090698,
|
|
"logps/chosen": -202.76388549804688,
|
|
"logps/ref_chosen": -207.1702423095703,
|
|
"logps/ref_rejected": -196.26866149902344,
|
|
"logps/rejected": -194.35032653808594,
|
|
"loss": 5.4294,
|
|
"margin_dpo/margin_mean": 2.4880149364471436,
|
|
"margin_dpo/margin_std": 3.424355983734131,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.16544502617801046,
|
|
"grad_norm": 16.774738311767578,
|
|
"learning_rate": 4.939911656668361e-07,
|
|
"logits/chosen": 1.9445700645446777,
|
|
"logits/rejected": 2.229759454727173,
|
|
"logps/chosen": -208.6917724609375,
|
|
"logps/ref_chosen": -212.90396118164062,
|
|
"logps/ref_rejected": -242.32528686523438,
|
|
"logps/rejected": -239.5742950439453,
|
|
"loss": 5.4268,
|
|
"margin_dpo/margin_mean": 1.461201786994934,
|
|
"margin_dpo/margin_std": 4.005527973175049,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.16753926701570682,
|
|
"grad_norm": 15.637154579162598,
|
|
"learning_rate": 4.935856505068998e-07,
|
|
"logits/chosen": 1.3914299011230469,
|
|
"logits/rejected": 1.5492628812789917,
|
|
"logps/chosen": -251.85031127929688,
|
|
"logps/ref_chosen": -257.9057312011719,
|
|
"logps/ref_rejected": -246.391845703125,
|
|
"logps/rejected": -243.01177978515625,
|
|
"loss": 5.4504,
|
|
"margin_dpo/margin_mean": 2.6753690242767334,
|
|
"margin_dpo/margin_std": 3.9176571369171143,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.16963350785340314,
|
|
"grad_norm": 14.57850456237793,
|
|
"learning_rate": 4.93167072587771e-07,
|
|
"logits/chosen": 2.009546995162964,
|
|
"logits/rejected": 2.2237491607666016,
|
|
"logps/chosen": -220.03717041015625,
|
|
"logps/ref_chosen": -226.68576049804688,
|
|
"logps/ref_rejected": -215.2713623046875,
|
|
"logps/rejected": -212.5276336669922,
|
|
"loss": 5.4326,
|
|
"margin_dpo/margin_mean": 3.9048891067504883,
|
|
"margin_dpo/margin_std": 3.982060432434082,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.17172774869109947,
|
|
"grad_norm": 15.640838623046875,
|
|
"learning_rate": 4.92735454356513e-07,
|
|
"logits/chosen": 1.8449329137802124,
|
|
"logits/rejected": 1.773772954940796,
|
|
"logps/chosen": -290.084228515625,
|
|
"logps/ref_chosen": -296.12799072265625,
|
|
"logps/ref_rejected": -261.3748474121094,
|
|
"logps/rejected": -258.4228515625,
|
|
"loss": 5.4297,
|
|
"margin_dpo/margin_mean": 3.091761589050293,
|
|
"margin_dpo/margin_std": 4.757362365722656,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.17382198952879582,
|
|
"grad_norm": 15.583915710449219,
|
|
"learning_rate": 4.922908189595017e-07,
|
|
"logits/chosen": 1.8198847770690918,
|
|
"logits/rejected": 1.8020501136779785,
|
|
"logps/chosen": -255.5862274169922,
|
|
"logps/ref_chosen": -261.39862060546875,
|
|
"logps/ref_rejected": -279.9942626953125,
|
|
"logps/rejected": -276.3531799316406,
|
|
"loss": 5.4115,
|
|
"margin_dpo/margin_mean": 2.1713221073150635,
|
|
"margin_dpo/margin_std": 4.908409118652344,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.17591623036649215,
|
|
"grad_norm": 15.165980339050293,
|
|
"learning_rate": 4.918331902411841e-07,
|
|
"logits/chosen": 2.0596227645874023,
|
|
"logits/rejected": 1.9471057653427124,
|
|
"logps/chosen": -385.02862548828125,
|
|
"logps/ref_chosen": -392.54547119140625,
|
|
"logps/ref_rejected": -342.066162109375,
|
|
"logps/rejected": -336.90234375,
|
|
"loss": 5.4311,
|
|
"margin_dpo/margin_mean": 2.353001356124878,
|
|
"margin_dpo/margin_std": 5.491085052490234,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.17801047120418848,
|
|
"grad_norm": 13.898218154907227,
|
|
"learning_rate": 4.913625927427995e-07,
|
|
"logits/chosen": 1.505142331123352,
|
|
"logits/rejected": 1.6710948944091797,
|
|
"logps/chosen": -186.115478515625,
|
|
"logps/ref_chosen": -192.9306640625,
|
|
"logps/ref_rejected": -231.5825653076172,
|
|
"logps/rejected": -227.84988403320312,
|
|
"loss": 5.4719,
|
|
"margin_dpo/margin_mean": 3.082519054412842,
|
|
"margin_dpo/margin_std": 4.139708995819092,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.18010471204188483,
|
|
"grad_norm": 16.21003532409668,
|
|
"learning_rate": 4.908790517010636e-07,
|
|
"logits/chosen": 1.800257682800293,
|
|
"logits/rejected": 1.8198274374008179,
|
|
"logps/chosen": -306.5592346191406,
|
|
"logps/ref_chosen": -313.5525207519531,
|
|
"logps/ref_rejected": -285.59228515625,
|
|
"logps/rejected": -280.493896484375,
|
|
"loss": 5.4098,
|
|
"margin_dpo/margin_mean": 1.8949062824249268,
|
|
"margin_dpo/margin_std": 6.387627601623535,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.18219895287958116,
|
|
"grad_norm": 15.313575744628906,
|
|
"learning_rate": 4.903825930468148e-07,
|
|
"logits/chosen": 1.5017904043197632,
|
|
"logits/rejected": 1.4683315753936768,
|
|
"logps/chosen": -227.59046936035156,
|
|
"logps/ref_chosen": -236.03445434570312,
|
|
"logps/ref_rejected": -225.67410278320312,
|
|
"logps/rejected": -221.80938720703125,
|
|
"loss": 5.3899,
|
|
"margin_dpo/margin_mean": 4.5792436599731445,
|
|
"margin_dpo/margin_std": 6.2218732833862305,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.18429319371727748,
|
|
"grad_norm": 14.063505172729492,
|
|
"learning_rate": 4.898732434036243e-07,
|
|
"logits/chosen": 1.7088102102279663,
|
|
"logits/rejected": 1.6172915697097778,
|
|
"logps/chosen": -273.8514709472656,
|
|
"logps/ref_chosen": -280.1703186035156,
|
|
"logps/ref_rejected": -219.1881103515625,
|
|
"logps/rejected": -216.62831115722656,
|
|
"loss": 5.4232,
|
|
"margin_dpo/margin_mean": 3.7590301036834717,
|
|
"margin_dpo/margin_std": 6.66114616394043,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.18638743455497384,
|
|
"grad_norm": 15.837873458862305,
|
|
"learning_rate": 4.893510300863676e-07,
|
|
"logits/chosen": 2.1236486434936523,
|
|
"logits/rejected": 2.1029891967773438,
|
|
"logps/chosen": -202.62815856933594,
|
|
"logps/ref_chosen": -211.3966827392578,
|
|
"logps/ref_rejected": -171.04954528808594,
|
|
"logps/rejected": -165.4285430908203,
|
|
"loss": 5.3997,
|
|
"margin_dpo/margin_mean": 3.1475319862365723,
|
|
"margin_dpo/margin_std": 4.286096572875977,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.18848167539267016,
|
|
"grad_norm": 15.2637357711792,
|
|
"learning_rate": 4.8881598109976e-07,
|
|
"logits/chosen": 2.190295696258545,
|
|
"logits/rejected": 2.0822367668151855,
|
|
"logps/chosen": -271.2816467285156,
|
|
"logps/ref_chosen": -280.9217834472656,
|
|
"logps/ref_rejected": -245.75814819335938,
|
|
"logps/rejected": -239.31825256347656,
|
|
"loss": 5.4166,
|
|
"margin_dpo/margin_mean": 3.2002599239349365,
|
|
"margin_dpo/margin_std": 5.397155284881592,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.1905759162303665,
|
|
"grad_norm": 14.667741775512695,
|
|
"learning_rate": 4.882681251368548e-07,
|
|
"logits/chosen": 1.3757317066192627,
|
|
"logits/rejected": 1.691314697265625,
|
|
"logps/chosen": -121.55278778076172,
|
|
"logps/ref_chosen": -130.23472595214844,
|
|
"logps/ref_rejected": -177.76895141601562,
|
|
"logps/rejected": -172.3560028076172,
|
|
"loss": 5.4142,
|
|
"margin_dpo/margin_mean": 3.2689881324768066,
|
|
"margin_dpo/margin_std": 4.278058052062988,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.19267015706806281,
|
|
"grad_norm": 15.648965835571289,
|
|
"learning_rate": 4.877074915775048e-07,
|
|
"logits/chosen": 1.6639858484268188,
|
|
"logits/rejected": 1.4799772500991821,
|
|
"logps/chosen": -334.3116455078125,
|
|
"logps/ref_chosen": -344.4306335449219,
|
|
"logps/ref_rejected": -276.291748046875,
|
|
"logps/rejected": -270.984375,
|
|
"loss": 5.4004,
|
|
"margin_dpo/margin_mean": 4.811601161956787,
|
|
"margin_dpo/margin_std": 5.4291887283325195,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.19476439790575917,
|
|
"grad_norm": 14.095191955566406,
|
|
"learning_rate": 4.871341104867864e-07,
|
|
"logits/chosen": 1.9756680727005005,
|
|
"logits/rejected": 1.923811674118042,
|
|
"logps/chosen": -196.8525390625,
|
|
"logps/ref_chosen": -206.1533660888672,
|
|
"logps/ref_rejected": -231.759033203125,
|
|
"logps/rejected": -227.26364135742188,
|
|
"loss": 5.408,
|
|
"margin_dpo/margin_mean": 4.805446147918701,
|
|
"margin_dpo/margin_std": 6.056692600250244,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.1968586387434555,
|
|
"grad_norm": 15.55691909790039,
|
|
"learning_rate": 4.865480126133871e-07,
|
|
"logits/chosen": 1.7521295547485352,
|
|
"logits/rejected": 1.8241287469863892,
|
|
"logps/chosen": -250.36639404296875,
|
|
"logps/ref_chosen": -261.2528381347656,
|
|
"logps/ref_rejected": -269.2928771972656,
|
|
"logps/rejected": -263.65771484375,
|
|
"loss": 5.3919,
|
|
"margin_dpo/margin_mean": 5.2512712478637695,
|
|
"margin_dpo/margin_std": 7.996842384338379,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.19895287958115182,
|
|
"grad_norm": 16.091550827026367,
|
|
"learning_rate": 4.859492293879573e-07,
|
|
"logits/chosen": 1.884320855140686,
|
|
"logits/rejected": 1.6460635662078857,
|
|
"logps/chosen": -334.9806823730469,
|
|
"logps/ref_chosen": -345.480224609375,
|
|
"logps/ref_rejected": -294.0064697265625,
|
|
"logps/rejected": -288.2855529785156,
|
|
"loss": 5.3719,
|
|
"margin_dpo/margin_mean": 4.778585433959961,
|
|
"margin_dpo/margin_std": 8.979715347290039,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.20104712041884817,
|
|
"grad_norm": 15.211227416992188,
|
|
"learning_rate": 4.853377929214243e-07,
|
|
"logits/chosen": 1.442068099975586,
|
|
"logits/rejected": 1.3402963876724243,
|
|
"logps/chosen": -239.22625732421875,
|
|
"logps/ref_chosen": -249.85205078125,
|
|
"logps/ref_rejected": -274.1024169921875,
|
|
"logps/rejected": -266.5990295410156,
|
|
"loss": 5.3763,
|
|
"margin_dpo/margin_mean": 3.1224074363708496,
|
|
"margin_dpo/margin_std": 6.072546482086182,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.2031413612565445,
|
|
"grad_norm": 15.3523588180542,
|
|
"learning_rate": 4.847137360032699e-07,
|
|
"logits/chosen": 1.682770013809204,
|
|
"logits/rejected": 1.789080262184143,
|
|
"logps/chosen": -224.3016815185547,
|
|
"logps/ref_chosen": -233.62025451660156,
|
|
"logps/ref_rejected": -258.32647705078125,
|
|
"logps/rejected": -253.4158935546875,
|
|
"loss": 5.3694,
|
|
"margin_dpo/margin_mean": 4.407979965209961,
|
|
"margin_dpo/margin_std": 6.528227806091309,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.20523560209424083,
|
|
"grad_norm": 15.678691864013672,
|
|
"learning_rate": 4.84077092099773e-07,
|
|
"logits/chosen": 1.9161100387573242,
|
|
"logits/rejected": 2.1308605670928955,
|
|
"logps/chosen": -256.5081787109375,
|
|
"logps/ref_chosen": -267.27911376953125,
|
|
"logps/ref_rejected": -335.98284912109375,
|
|
"logps/rejected": -327.7537841796875,
|
|
"loss": 5.3668,
|
|
"margin_dpo/margin_mean": 2.5418522357940674,
|
|
"margin_dpo/margin_std": 7.70307731628418,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.20732984293193718,
|
|
"grad_norm": 15.061153411865234,
|
|
"learning_rate": 4.834278953522137e-07,
|
|
"logits/chosen": 1.8618698120117188,
|
|
"logits/rejected": 1.8229163885116577,
|
|
"logps/chosen": -275.3375244140625,
|
|
"logps/ref_chosen": -285.90435791015625,
|
|
"logps/ref_rejected": -278.0072021484375,
|
|
"logps/rejected": -271.364013671875,
|
|
"loss": 5.3595,
|
|
"margin_dpo/margin_mean": 3.9236538410186768,
|
|
"margin_dpo/margin_std": 9.717151641845703,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.2094240837696335,
|
|
"grad_norm": 15.537282943725586,
|
|
"learning_rate": 4.827661805750437e-07,
|
|
"logits/chosen": 1.6382941007614136,
|
|
"logits/rejected": 1.5340853929519653,
|
|
"logps/chosen": -327.0155944824219,
|
|
"logps/ref_chosen": -335.2471008300781,
|
|
"logps/ref_rejected": -304.7597351074219,
|
|
"logps/rejected": -300.1502380371094,
|
|
"loss": 5.3785,
|
|
"margin_dpo/margin_mean": 3.622096061706543,
|
|
"margin_dpo/margin_std": 7.071560859680176,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.21151832460732983,
|
|
"grad_norm": 15.370798110961914,
|
|
"learning_rate": 4.820919832540181e-07,
|
|
"logits/chosen": 1.507110357284546,
|
|
"logits/rejected": 1.7560882568359375,
|
|
"logps/chosen": -262.4298400878906,
|
|
"logps/ref_chosen": -272.9364318847656,
|
|
"logps/ref_rejected": -271.82366943359375,
|
|
"logps/rejected": -268.6051330566406,
|
|
"loss": 5.3734,
|
|
"margin_dpo/margin_mean": 7.288076400756836,
|
|
"margin_dpo/margin_std": 8.229599952697754,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.2136125654450262,
|
|
"grad_norm": 15.161721229553223,
|
|
"learning_rate": 4.814053395442932e-07,
|
|
"logits/chosen": 1.76149582862854,
|
|
"logits/rejected": 1.8778889179229736,
|
|
"logps/chosen": -151.35018920898438,
|
|
"logps/ref_chosen": -159.15536499023438,
|
|
"logps/ref_rejected": -191.47312927246094,
|
|
"logps/rejected": -188.49746704101562,
|
|
"loss": 5.305,
|
|
"margin_dpo/margin_mean": 4.829489707946777,
|
|
"margin_dpo/margin_std": 7.208276271820068,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.2157068062827225,
|
|
"grad_norm": 15.669957160949707,
|
|
"learning_rate": 4.807062862684873e-07,
|
|
"logits/chosen": 2.092226505279541,
|
|
"logits/rejected": 2.202396869659424,
|
|
"logps/chosen": -291.3448486328125,
|
|
"logps/ref_chosen": -301.0699768066406,
|
|
"logps/ref_rejected": -306.12469482421875,
|
|
"logps/rejected": -298.96844482421875,
|
|
"loss": 5.3684,
|
|
"margin_dpo/margin_mean": 2.5688788890838623,
|
|
"margin_dpo/margin_std": 8.519815444946289,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.21780104712041884,
|
|
"grad_norm": 14.514609336853027,
|
|
"learning_rate": 4.799948609147061e-07,
|
|
"logits/chosen": 1.875953197479248,
|
|
"logits/rejected": 1.7595632076263428,
|
|
"logps/chosen": -308.81158447265625,
|
|
"logps/ref_chosen": -316.44036865234375,
|
|
"logps/ref_rejected": -245.41790771484375,
|
|
"logps/rejected": -242.4790496826172,
|
|
"loss": 5.3826,
|
|
"margin_dpo/margin_mean": 4.689910411834717,
|
|
"margin_dpo/margin_std": 10.412727355957031,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.2198952879581152,
|
|
"grad_norm": 17.934940338134766,
|
|
"learning_rate": 4.792711016345321e-07,
|
|
"logits/chosen": 1.8088258504867554,
|
|
"logits/rejected": 1.6915315389633179,
|
|
"logps/chosen": -253.7894744873047,
|
|
"logps/ref_chosen": -264.70599365234375,
|
|
"logps/ref_rejected": -232.14236450195312,
|
|
"logps/rejected": -229.86798095703125,
|
|
"loss": 5.241,
|
|
"margin_dpo/margin_mean": 8.642158508300781,
|
|
"margin_dpo/margin_std": 9.996341705322266,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.22198952879581152,
|
|
"grad_norm": 16.461326599121094,
|
|
"learning_rate": 4.785350472409791e-07,
|
|
"logits/chosen": 1.8355200290679932,
|
|
"logits/rejected": 2.0365209579467773,
|
|
"logps/chosen": -274.2940673828125,
|
|
"logps/ref_chosen": -280.6784973144531,
|
|
"logps/ref_rejected": -353.0090026855469,
|
|
"logps/rejected": -352.0072021484375,
|
|
"loss": 5.3413,
|
|
"margin_dpo/margin_mean": 5.382654666900635,
|
|
"margin_dpo/margin_std": 9.296738624572754,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.22408376963350785,
|
|
"grad_norm": 16.5199031829834,
|
|
"learning_rate": 4.777867372064105e-07,
|
|
"logits/chosen": 1.6165478229522705,
|
|
"logits/rejected": 1.5343804359436035,
|
|
"logps/chosen": -327.95794677734375,
|
|
"logps/ref_chosen": -336.91058349609375,
|
|
"logps/ref_rejected": -280.02325439453125,
|
|
"logps/rejected": -277.4742431640625,
|
|
"loss": 5.2697,
|
|
"margin_dpo/margin_mean": 6.403599262237549,
|
|
"margin_dpo/margin_std": 9.143040657043457,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.2261780104712042,
|
|
"grad_norm": 16.170669555664062,
|
|
"learning_rate": 4.770262116604223e-07,
|
|
"logits/chosen": 1.8304221630096436,
|
|
"logits/rejected": 2.0288898944854736,
|
|
"logps/chosen": -224.6934356689453,
|
|
"logps/ref_chosen": -232.04891967773438,
|
|
"logps/ref_rejected": -248.3793487548828,
|
|
"logps/rejected": -246.96351623535156,
|
|
"loss": 5.2351,
|
|
"margin_dpo/margin_mean": 5.9396281242370605,
|
|
"margin_dpo/margin_std": 9.726874351501465,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.22827225130890053,
|
|
"grad_norm": 17.59023094177246,
|
|
"learning_rate": 4.7625351138769166e-07,
|
|
"logits/chosen": 1.8960250616073608,
|
|
"logits/rejected": 1.919461965560913,
|
|
"logps/chosen": -236.6331329345703,
|
|
"logps/ref_chosen": -243.42401123046875,
|
|
"logps/ref_rejected": -276.1861877441406,
|
|
"logps/rejected": -274.469482421875,
|
|
"loss": 5.2323,
|
|
"margin_dpo/margin_mean": 5.074185371398926,
|
|
"margin_dpo/margin_std": 8.536012649536133,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.23036649214659685,
|
|
"grad_norm": 15.948025703430176,
|
|
"learning_rate": 4.75468677825789e-07,
|
|
"logits/chosen": 1.6093004941940308,
|
|
"logits/rejected": 1.6397433280944824,
|
|
"logps/chosen": -234.94406127929688,
|
|
"logps/ref_chosen": -242.5493621826172,
|
|
"logps/ref_rejected": -195.59750366210938,
|
|
"logps/rejected": -193.1940155029297,
|
|
"loss": 5.246,
|
|
"margin_dpo/margin_mean": 5.201825141906738,
|
|
"margin_dpo/margin_std": 10.898710250854492,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.2324607329842932,
|
|
"grad_norm": 18.407573699951172,
|
|
"learning_rate": 4.7467175306295647e-07,
|
|
"logits/chosen": 1.6897281408309937,
|
|
"logits/rejected": 1.7771556377410889,
|
|
"logps/chosen": -272.2618408203125,
|
|
"logps/ref_chosen": -279.930908203125,
|
|
"logps/ref_rejected": -281.9147644042969,
|
|
"logps/rejected": -282.97882080078125,
|
|
"loss": 5.223,
|
|
"margin_dpo/margin_mean": 8.733101844787598,
|
|
"margin_dpo/margin_std": 11.250627517700195,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.23455497382198953,
|
|
"grad_norm": 14.606575012207031,
|
|
"learning_rate": 4.7386277983585053e-07,
|
|
"logits/chosen": 1.776769757270813,
|
|
"logits/rejected": 1.8782975673675537,
|
|
"logps/chosen": -243.624755859375,
|
|
"logps/ref_chosen": -246.89129638671875,
|
|
"logps/ref_rejected": -266.50506591796875,
|
|
"logps/rejected": -265.4157409667969,
|
|
"loss": 5.395,
|
|
"margin_dpo/margin_mean": 2.177186965942383,
|
|
"margin_dpo/margin_std": 10.55683422088623,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.23664921465968586,
|
|
"grad_norm": 16.843202590942383,
|
|
"learning_rate": 4.7304180152725024e-07,
|
|
"logits/chosen": 1.4505597352981567,
|
|
"logits/rejected": 1.5904918909072876,
|
|
"logps/chosen": -269.72711181640625,
|
|
"logps/ref_chosen": -276.4613342285156,
|
|
"logps/ref_rejected": -341.7659912109375,
|
|
"logps/rejected": -342.48956298828125,
|
|
"loss": 5.1716,
|
|
"margin_dpo/margin_mean": 7.457816123962402,
|
|
"margin_dpo/margin_std": 12.05935287475586,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.2387434554973822,
|
|
"grad_norm": 16.080974578857422,
|
|
"learning_rate": 4.7220886216373085e-07,
|
|
"logits/chosen": 1.6391416788101196,
|
|
"logits/rejected": 1.5624871253967285,
|
|
"logps/chosen": -247.58502197265625,
|
|
"logps/ref_chosen": -251.4463653564453,
|
|
"logps/ref_rejected": -210.03152465820312,
|
|
"logps/rejected": -213.17721557617188,
|
|
"loss": 5.3559,
|
|
"margin_dpo/margin_mean": 7.00706672668457,
|
|
"margin_dpo/margin_std": 8.853893280029297,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.24083769633507854,
|
|
"grad_norm": 15.944089889526367,
|
|
"learning_rate": 4.7136400641330245e-07,
|
|
"logits/chosen": 1.8735270500183105,
|
|
"logits/rejected": 1.6029636859893799,
|
|
"logps/chosen": -253.3223876953125,
|
|
"logps/ref_chosen": -257.82574462890625,
|
|
"logps/ref_rejected": -192.41648864746094,
|
|
"logps/rejected": -191.51156616210938,
|
|
"loss": 5.3403,
|
|
"margin_dpo/margin_mean": 3.5983924865722656,
|
|
"margin_dpo/margin_std": 8.953197479248047,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.24293193717277486,
|
|
"grad_norm": 16.291976928710938,
|
|
"learning_rate": 4.70507279583015e-07,
|
|
"logits/chosen": 1.695469856262207,
|
|
"logits/rejected": 1.8081481456756592,
|
|
"logps/chosen": -242.69943237304688,
|
|
"logps/ref_chosen": -248.17518615722656,
|
|
"logps/ref_rejected": -274.10870361328125,
|
|
"logps/rejected": -276.470703125,
|
|
"loss": 5.2459,
|
|
"margin_dpo/margin_mean": 7.837741851806641,
|
|
"margin_dpo/margin_std": 9.762773513793945,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.2450261780104712,
|
|
"grad_norm": 16.642024993896484,
|
|
"learning_rate": 4.6963872761652834e-07,
|
|
"logits/chosen": 1.6590253114700317,
|
|
"logits/rejected": 1.4430992603302002,
|
|
"logps/chosen": -229.59909057617188,
|
|
"logps/ref_chosen": -235.29620361328125,
|
|
"logps/ref_rejected": -190.87095642089844,
|
|
"logps/rejected": -194.7079620361328,
|
|
"loss": 5.2344,
|
|
"margin_dpo/margin_mean": 9.53414535522461,
|
|
"margin_dpo/margin_std": 9.101675033569336,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.24712041884816754,
|
|
"grad_norm": 20.776172637939453,
|
|
"learning_rate": 4.687583970916486e-07,
|
|
"logits/chosen": 1.6007872819900513,
|
|
"logits/rejected": 1.6555330753326416,
|
|
"logps/chosen": -256.0022277832031,
|
|
"logps/ref_chosen": -260.44781494140625,
|
|
"logps/ref_rejected": -308.3326416015625,
|
|
"logps/rejected": -313.2330627441406,
|
|
"loss": 5.2168,
|
|
"margin_dpo/margin_mean": 9.346000671386719,
|
|
"margin_dpo/margin_std": 13.443426132202148,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.24921465968586387,
|
|
"grad_norm": 15.843025207519531,
|
|
"learning_rate": 4.6786633521783005e-07,
|
|
"logits/chosen": 1.9080662727355957,
|
|
"logits/rejected": 2.017760753631592,
|
|
"logps/chosen": -282.1200866699219,
|
|
"logps/ref_chosen": -286.9692687988281,
|
|
"logps/ref_rejected": -331.7510986328125,
|
|
"logps/rejected": -331.0477600097656,
|
|
"loss": 5.2789,
|
|
"margin_dpo/margin_mean": 4.145843982696533,
|
|
"margin_dpo/margin_std": 13.910889625549316,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.2513089005235602,
|
|
"grad_norm": 16.17180061340332,
|
|
"learning_rate": 4.669625898336438e-07,
|
|
"logits/chosen": 1.9562854766845703,
|
|
"logits/rejected": 1.8660322427749634,
|
|
"logps/chosen": -278.0622253417969,
|
|
"logps/ref_chosen": -281.98077392578125,
|
|
"logps/ref_rejected": -283.52679443359375,
|
|
"logps/rejected": -288.8341369628906,
|
|
"loss": 5.2221,
|
|
"margin_dpo/margin_mean": 9.225922584533691,
|
|
"margin_dpo/margin_std": 13.011504173278809,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.2534031413612565,
|
|
"grad_norm": 14.78394889831543,
|
|
"learning_rate": 4.6604720940421207e-07,
|
|
"logits/chosen": 1.1911287307739258,
|
|
"logits/rejected": 1.5024229288101196,
|
|
"logps/chosen": -144.0179443359375,
|
|
"logps/ref_chosen": -145.69662475585938,
|
|
"logps/ref_rejected": -195.21612548828125,
|
|
"logps/rejected": -199.57223510742188,
|
|
"loss": 5.3557,
|
|
"margin_dpo/margin_mean": 6.034780025482178,
|
|
"margin_dpo/margin_std": 9.371333122253418,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.2554973821989529,
|
|
"grad_norm": 15.84109115600586,
|
|
"learning_rate": 4.651202430186092e-07,
|
|
"logits/chosen": 1.7703770399093628,
|
|
"logits/rejected": 2.0983457565307617,
|
|
"logps/chosen": -245.3528289794922,
|
|
"logps/ref_chosen": -252.1569366455078,
|
|
"logps/ref_rejected": -309.68548583984375,
|
|
"logps/rejected": -306.59942626953125,
|
|
"loss": 5.2869,
|
|
"margin_dpo/margin_mean": 3.718092441558838,
|
|
"margin_dpo/margin_std": 17.503427505493164,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.25759162303664923,
|
|
"grad_norm": 18.21240997314453,
|
|
"learning_rate": 4.6418174038722924e-07,
|
|
"logits/chosen": 1.6439917087554932,
|
|
"logits/rejected": 1.4977948665618896,
|
|
"logps/chosen": -358.5697326660156,
|
|
"logps/ref_chosen": -366.5253601074219,
|
|
"logps/ref_rejected": -285.2503662109375,
|
|
"logps/rejected": -286.8184814453125,
|
|
"loss": 5.1686,
|
|
"margin_dpo/margin_mean": 9.523737907409668,
|
|
"margin_dpo/margin_std": 12.473346710205078,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.25968586387434556,
|
|
"grad_norm": 16.816696166992188,
|
|
"learning_rate": 4.6323175183912023e-07,
|
|
"logits/chosen": 1.4895159006118774,
|
|
"logits/rejected": 1.6259382963180542,
|
|
"logps/chosen": -244.55775451660156,
|
|
"logps/ref_chosen": -251.4420623779297,
|
|
"logps/ref_rejected": -231.0302734375,
|
|
"logps/rejected": -230.74337768554688,
|
|
"loss": 5.2019,
|
|
"margin_dpo/margin_mean": 6.597394943237305,
|
|
"margin_dpo/margin_std": 15.111127853393555,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.2617801047120419,
|
|
"grad_norm": 16.386260986328125,
|
|
"learning_rate": 4.6227032831928483e-07,
|
|
"logits/chosen": 1.6032323837280273,
|
|
"logits/rejected": 1.598075032234192,
|
|
"logps/chosen": -242.79583740234375,
|
|
"logps/ref_chosen": -248.3984375,
|
|
"logps/ref_rejected": -307.77557373046875,
|
|
"logps/rejected": -308.05059814453125,
|
|
"loss": 5.2189,
|
|
"margin_dpo/margin_mean": 5.877676010131836,
|
|
"margin_dpo/margin_std": 13.811307907104492,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.2638743455497382,
|
|
"grad_norm": 16.69825553894043,
|
|
"learning_rate": 4.612975213859487e-07,
|
|
"logits/chosen": 1.7347309589385986,
|
|
"logits/rejected": 1.9158421754837036,
|
|
"logps/chosen": -291.75244140625,
|
|
"logps/ref_chosen": -295.82366943359375,
|
|
"logps/ref_rejected": -295.2666931152344,
|
|
"logps/rejected": -299.0240783691406,
|
|
"loss": 5.1658,
|
|
"margin_dpo/margin_mean": 7.8285651206970215,
|
|
"margin_dpo/margin_std": 13.573448181152344,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.26596858638743454,
|
|
"grad_norm": 16.867650985717773,
|
|
"learning_rate": 4.603133832077953e-07,
|
|
"logits/chosen": 1.1577448844909668,
|
|
"logits/rejected": 1.106504201889038,
|
|
"logps/chosen": -273.8604736328125,
|
|
"logps/ref_chosen": -279.496337890625,
|
|
"logps/ref_rejected": -278.802978515625,
|
|
"logps/rejected": -282.5022277832031,
|
|
"loss": 5.1019,
|
|
"margin_dpo/margin_mean": 9.335136413574219,
|
|
"margin_dpo/margin_std": 13.274786949157715,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.2680628272251309,
|
|
"grad_norm": 16.078149795532227,
|
|
"learning_rate": 4.5931796656116837e-07,
|
|
"logits/chosen": 1.4048773050308228,
|
|
"logits/rejected": 1.4003376960754395,
|
|
"logps/chosen": -258.86883544921875,
|
|
"logps/ref_chosen": -264.52252197265625,
|
|
"logps/ref_rejected": -239.76937866210938,
|
|
"logps/rejected": -247.31756591796875,
|
|
"loss": 5.057,
|
|
"margin_dpo/margin_mean": 13.201865196228027,
|
|
"margin_dpo/margin_std": 13.145772933959961,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.27015706806282724,
|
|
"grad_norm": 16.511791229248047,
|
|
"learning_rate": 4.5831132482724193e-07,
|
|
"logits/chosen": 1.5169470310211182,
|
|
"logits/rejected": 1.6699875593185425,
|
|
"logps/chosen": -290.08258056640625,
|
|
"logps/ref_chosen": -296.95233154296875,
|
|
"logps/ref_rejected": -260.0984802246094,
|
|
"logps/rejected": -267.3520812988281,
|
|
"loss": 5.1034,
|
|
"margin_dpo/margin_mean": 14.12340259552002,
|
|
"margin_dpo/margin_std": 17.277435302734375,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.27225130890052357,
|
|
"grad_norm": 21.19081687927246,
|
|
"learning_rate": 4.5729351198915705e-07,
|
|
"logits/chosen": 1.5894699096679688,
|
|
"logits/rejected": 1.8434865474700928,
|
|
"logps/chosen": -263.253173828125,
|
|
"logps/ref_chosen": -274.7286682128906,
|
|
"logps/ref_rejected": -325.187255859375,
|
|
"logps/rejected": -327.59503173828125,
|
|
"loss": 5.1333,
|
|
"margin_dpo/margin_mean": 13.883302688598633,
|
|
"margin_dpo/margin_std": 18.286108016967773,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.2743455497382199,
|
|
"grad_norm": 17.56028175354004,
|
|
"learning_rate": 4.5626458262912735e-07,
|
|
"logits/chosen": 1.44374680519104,
|
|
"logits/rejected": 1.400553822517395,
|
|
"logps/chosen": -270.84796142578125,
|
|
"logps/ref_chosen": -279.3233642578125,
|
|
"logps/ref_rejected": -299.2681884765625,
|
|
"logps/rejected": -304.762451171875,
|
|
"loss": 5.2149,
|
|
"margin_dpo/margin_mean": 13.969644546508789,
|
|
"margin_dpo/margin_std": 20.493703842163086,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.2764397905759162,
|
|
"grad_norm": 17.945659637451172,
|
|
"learning_rate": 4.5522459192551166e-07,
|
|
"logits/chosen": 1.5821537971496582,
|
|
"logits/rejected": 1.6266090869903564,
|
|
"logps/chosen": -281.5635681152344,
|
|
"logps/ref_chosen": -291.3346862792969,
|
|
"logps/ref_rejected": -283.13311767578125,
|
|
"logps/rejected": -291.1026306152344,
|
|
"loss": 5.1007,
|
|
"margin_dpo/margin_mean": 17.740650177001953,
|
|
"margin_dpo/margin_std": 17.511295318603516,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.27853403141361255,
|
|
"grad_norm": 16.447092056274414,
|
|
"learning_rate": 4.541735956498554e-07,
|
|
"logits/chosen": 1.6146799325942993,
|
|
"logits/rejected": 1.5560858249664307,
|
|
"logps/chosen": -223.23194885253906,
|
|
"logps/ref_chosen": -233.71875,
|
|
"logps/ref_rejected": -216.53781127929688,
|
|
"logps/rejected": -223.18417358398438,
|
|
"loss": 5.0683,
|
|
"margin_dpo/margin_mean": 17.133150100708008,
|
|
"margin_dpo/margin_std": 14.197755813598633,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.2806282722513089,
|
|
"grad_norm": 20.631309509277344,
|
|
"learning_rate": 4.5311165016389914e-07,
|
|
"logits/chosen": 1.920145869255066,
|
|
"logits/rejected": 1.981586217880249,
|
|
"logps/chosen": -348.9212951660156,
|
|
"logps/ref_chosen": -348.29547119140625,
|
|
"logps/ref_rejected": -343.04510498046875,
|
|
"logps/rejected": -351.0985412597656,
|
|
"loss": 5.22,
|
|
"margin_dpo/margin_mean": 7.427634239196777,
|
|
"margin_dpo/margin_std": 17.99872589111328,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.28272251308900526,
|
|
"grad_norm": 17.117826461791992,
|
|
"learning_rate": 4.520388124165564e-07,
|
|
"logits/chosen": 1.1408627033233643,
|
|
"logits/rejected": 0.9298585057258606,
|
|
"logps/chosen": -226.50486755371094,
|
|
"logps/ref_chosen": -232.59129333496094,
|
|
"logps/ref_rejected": -175.74066162109375,
|
|
"logps/rejected": -181.21482849121094,
|
|
"loss": 5.0762,
|
|
"margin_dpo/margin_mean": 11.560598373413086,
|
|
"margin_dpo/margin_std": 14.716351509094238,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.2848167539267016,
|
|
"grad_norm": 19.100990295410156,
|
|
"learning_rate": 4.5095513994085974e-07,
|
|
"logits/chosen": 1.0842311382293701,
|
|
"logits/rejected": 1.3095265626907349,
|
|
"logps/chosen": -183.53028869628906,
|
|
"logps/ref_chosen": -189.21795654296875,
|
|
"logps/ref_rejected": -191.75979614257812,
|
|
"logps/rejected": -200.99093627929688,
|
|
"loss": 5.1055,
|
|
"margin_dpo/margin_mean": 14.91882038116455,
|
|
"margin_dpo/margin_std": 16.662824630737305,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.2869109947643979,
|
|
"grad_norm": 17.85831642150879,
|
|
"learning_rate": 4.498606908508753e-07,
|
|
"logits/chosen": 1.86328125,
|
|
"logits/rejected": 1.6945784091949463,
|
|
"logps/chosen": -356.1871032714844,
|
|
"logps/ref_chosen": -358.9820861816406,
|
|
"logps/ref_rejected": -277.2926330566406,
|
|
"logps/rejected": -286.5790710449219,
|
|
"loss": 5.1522,
|
|
"margin_dpo/margin_mean": 12.081487655639648,
|
|
"margin_dpo/margin_std": 15.93864631652832,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.28900523560209423,
|
|
"grad_norm": 16.747636795043945,
|
|
"learning_rate": 4.487555238385862e-07,
|
|
"logits/chosen": 1.8756850957870483,
|
|
"logits/rejected": 1.9149004220962524,
|
|
"logps/chosen": -284.18572998046875,
|
|
"logps/ref_chosen": -283.7969055175781,
|
|
"logps/ref_rejected": -269.28643798828125,
|
|
"logps/rejected": -280.577880859375,
|
|
"loss": 5.1753,
|
|
"margin_dpo/margin_mean": 10.902585983276367,
|
|
"margin_dpo/margin_std": 22.474666595458984,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.29109947643979056,
|
|
"grad_norm": 16.213245391845703,
|
|
"learning_rate": 4.476396981707453e-07,
|
|
"logits/chosen": 1.4325203895568848,
|
|
"logits/rejected": 1.5791009664535522,
|
|
"logps/chosen": -218.7724609375,
|
|
"logps/ref_chosen": -221.46124267578125,
|
|
"logps/ref_rejected": -234.3295440673828,
|
|
"logps/rejected": -236.3891143798828,
|
|
"loss": 5.2792,
|
|
"margin_dpo/margin_mean": 4.748367786407471,
|
|
"margin_dpo/margin_std": 21.647659301757812,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.2931937172774869,
|
|
"grad_norm": 25.28333854675293,
|
|
"learning_rate": 4.4651327368569684e-07,
|
|
"logits/chosen": 1.5151917934417725,
|
|
"logits/rejected": 1.5757999420166016,
|
|
"logps/chosen": -237.8951873779297,
|
|
"logps/ref_chosen": -246.27151489257812,
|
|
"logps/ref_rejected": -255.00428771972656,
|
|
"logps/rejected": -261.1990661621094,
|
|
"loss": 5.0124,
|
|
"margin_dpo/margin_mean": 14.571114540100098,
|
|
"margin_dpo/margin_std": 15.991363525390625,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.29528795811518327,
|
|
"grad_norm": 23.41806411743164,
|
|
"learning_rate": 4.453763107901675e-07,
|
|
"logits/chosen": 1.5907336473464966,
|
|
"logits/rejected": 1.6988296508789062,
|
|
"logps/chosen": -264.4815979003906,
|
|
"logps/ref_chosen": -267.79345703125,
|
|
"logps/ref_rejected": -295.4119873046875,
|
|
"logps/rejected": -308.6583557128906,
|
|
"loss": 5.0535,
|
|
"margin_dpo/margin_mean": 16.5582332611084,
|
|
"margin_dpo/margin_std": 21.787641525268555,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.2973821989528796,
|
|
"grad_norm": 17.388578414916992,
|
|
"learning_rate": 4.4422887045602674e-07,
|
|
"logits/chosen": 1.8900976181030273,
|
|
"logits/rejected": 1.6236388683319092,
|
|
"logps/chosen": -341.6228942871094,
|
|
"logps/ref_chosen": -352.8658752441406,
|
|
"logps/ref_rejected": -219.5095672607422,
|
|
"logps/rejected": -223.31808471679688,
|
|
"loss": 5.0641,
|
|
"margin_dpo/margin_mean": 15.05146598815918,
|
|
"margin_dpo/margin_std": 16.49203109741211,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.2994764397905759,
|
|
"grad_norm": 18.831209182739258,
|
|
"learning_rate": 4.4307101421701755e-07,
|
|
"logits/chosen": 1.4374781847000122,
|
|
"logits/rejected": 1.3258070945739746,
|
|
"logps/chosen": -327.2297058105469,
|
|
"logps/ref_chosen": -336.38482666015625,
|
|
"logps/ref_rejected": -213.85707092285156,
|
|
"logps/rejected": -229.42831420898438,
|
|
"loss": 4.9898,
|
|
"margin_dpo/margin_mean": 24.726341247558594,
|
|
"margin_dpo/margin_std": 23.126943588256836,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.30157068062827225,
|
|
"grad_norm": 19.49022674560547,
|
|
"learning_rate": 4.419028041654559e-07,
|
|
"logits/chosen": 1.4996888637542725,
|
|
"logits/rejected": 1.4556653499603271,
|
|
"logps/chosen": -264.51727294921875,
|
|
"logps/ref_chosen": -274.0345458984375,
|
|
"logps/ref_rejected": -274.5603942871094,
|
|
"logps/rejected": -273.80316162109375,
|
|
"loss": 5.0793,
|
|
"margin_dpo/margin_mean": 8.760041236877441,
|
|
"margin_dpo/margin_std": 19.655494689941406,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.3036649214659686,
|
|
"grad_norm": 16.73856544494629,
|
|
"learning_rate": 4.4072430294890166e-07,
|
|
"logits/chosen": 1.6811779737472534,
|
|
"logits/rejected": 1.7291994094848633,
|
|
"logps/chosen": -269.10491943359375,
|
|
"logps/ref_chosen": -274.1513366699219,
|
|
"logps/ref_rejected": -226.63064575195312,
|
|
"logps/rejected": -239.5631866455078,
|
|
"loss": 4.9405,
|
|
"margin_dpo/margin_mean": 17.978939056396484,
|
|
"margin_dpo/margin_std": 23.945186614990234,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.3057591623036649,
|
|
"grad_norm": 29.0201358795166,
|
|
"learning_rate": 4.395355737667985e-07,
|
|
"logits/chosen": 1.4950841665267944,
|
|
"logits/rejected": 1.7295074462890625,
|
|
"logps/chosen": -227.69259643554688,
|
|
"logps/ref_chosen": -229.48269653320312,
|
|
"logps/ref_rejected": -249.7940216064453,
|
|
"logps/rejected": -259.9455261230469,
|
|
"loss": 5.0665,
|
|
"margin_dpo/margin_mean": 11.941591262817383,
|
|
"margin_dpo/margin_std": 17.751564025878906,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.3078534031413613,
|
|
"grad_norm": 16.784557342529297,
|
|
"learning_rate": 4.3833668036708483e-07,
|
|
"logits/chosen": 1.4946480989456177,
|
|
"logits/rejected": 1.4648349285125732,
|
|
"logps/chosen": -284.0057067871094,
|
|
"logps/ref_chosen": -290.8128356933594,
|
|
"logps/ref_rejected": -218.97787475585938,
|
|
"logps/rejected": -229.5756072998047,
|
|
"loss": 5.1681,
|
|
"margin_dpo/margin_mean": 17.40483856201172,
|
|
"margin_dpo/margin_std": 24.006677627563477,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.3099476439790576,
|
|
"grad_norm": 16.890840530395508,
|
|
"learning_rate": 4.3712768704277524e-07,
|
|
"logits/chosen": 1.5196683406829834,
|
|
"logits/rejected": 1.509922742843628,
|
|
"logps/chosen": -261.50762939453125,
|
|
"logps/ref_chosen": -263.70001220703125,
|
|
"logps/ref_rejected": -262.095703125,
|
|
"logps/rejected": -272.2943115234375,
|
|
"loss": 5.1353,
|
|
"margin_dpo/margin_mean": 12.390965461730957,
|
|
"margin_dpo/margin_std": 21.307296752929688,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.31204188481675393,
|
|
"grad_norm": 18.739173889160156,
|
|
"learning_rate": 4.3590865862851263e-07,
|
|
"logits/chosen": 1.9116061925888062,
|
|
"logits/rejected": 1.7204910516738892,
|
|
"logps/chosen": -344.4569396972656,
|
|
"logps/ref_chosen": -350.6168518066406,
|
|
"logps/ref_rejected": -277.2320251464844,
|
|
"logps/rejected": -288.07904052734375,
|
|
"loss": 5.0166,
|
|
"margin_dpo/margin_mean": 17.006885528564453,
|
|
"margin_dpo/margin_std": 17.385805130004883,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.31413612565445026,
|
|
"grad_norm": 17.36412811279297,
|
|
"learning_rate": 4.346796604970912e-07,
|
|
"logits/chosen": 1.934645652770996,
|
|
"logits/rejected": 1.848956823348999,
|
|
"logps/chosen": -261.2005920410156,
|
|
"logps/ref_chosen": -264.05096435546875,
|
|
"logps/ref_rejected": -286.02313232421875,
|
|
"logps/rejected": -298.2835693359375,
|
|
"loss": 5.0369,
|
|
"margin_dpo/margin_mean": 15.110857963562012,
|
|
"margin_dpo/margin_std": 18.34941291809082,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.3162303664921466,
|
|
"grad_norm": 20.5943546295166,
|
|
"learning_rate": 4.3344075855595097e-07,
|
|
"logits/chosen": 1.3573246002197266,
|
|
"logits/rejected": 1.373565673828125,
|
|
"logps/chosen": -254.92498779296875,
|
|
"logps/ref_chosen": -257.74664306640625,
|
|
"logps/ref_rejected": -256.2339172363281,
|
|
"logps/rejected": -267.41278076171875,
|
|
"loss": 4.7848,
|
|
"margin_dpo/margin_mean": 14.000543594360352,
|
|
"margin_dpo/margin_std": 23.76491928100586,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.3183246073298429,
|
|
"grad_norm": 21.099018096923828,
|
|
"learning_rate": 4.3219201924364323e-07,
|
|
"logits/chosen": 1.4018583297729492,
|
|
"logits/rejected": 1.803174376487732,
|
|
"logps/chosen": -245.9750213623047,
|
|
"logps/ref_chosen": -250.47512817382812,
|
|
"logps/ref_rejected": -322.36474609375,
|
|
"logps/rejected": -333.2466735839844,
|
|
"loss": 4.9817,
|
|
"margin_dpo/margin_mean": 15.381957054138184,
|
|
"margin_dpo/margin_std": 22.273956298828125,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.3204188481675393,
|
|
"grad_norm": 22.535673141479492,
|
|
"learning_rate": 4.309335095262675e-07,
|
|
"logits/chosen": 1.5490094423294067,
|
|
"logits/rejected": 1.5208497047424316,
|
|
"logps/chosen": -235.2023162841797,
|
|
"logps/ref_chosen": -238.36544799804688,
|
|
"logps/ref_rejected": -215.78970336914062,
|
|
"logps/rejected": -236.40200805664062,
|
|
"loss": 4.6931,
|
|
"margin_dpo/margin_mean": 23.775440216064453,
|
|
"margin_dpo/margin_std": 23.90810775756836,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.3225130890052356,
|
|
"grad_norm": 19.634624481201172,
|
|
"learning_rate": 4.2966529689388064e-07,
|
|
"logits/chosen": 1.213348627090454,
|
|
"logits/rejected": 1.2180352210998535,
|
|
"logps/chosen": -264.2608337402344,
|
|
"logps/ref_chosen": -259.7012939453125,
|
|
"logps/ref_rejected": -255.74172973632812,
|
|
"logps/rejected": -272.3033142089844,
|
|
"loss": 4.9699,
|
|
"margin_dpo/margin_mean": 12.002017974853516,
|
|
"margin_dpo/margin_std": 28.510345458984375,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.32460732984293195,
|
|
"grad_norm": 19.311044692993164,
|
|
"learning_rate": 4.2838744935687716e-07,
|
|
"logits/chosen": 1.4307262897491455,
|
|
"logits/rejected": 1.4176700115203857,
|
|
"logps/chosen": -324.7783203125,
|
|
"logps/ref_chosen": -325.11517333984375,
|
|
"logps/ref_rejected": -288.08380126953125,
|
|
"logps/rejected": -307.0673828125,
|
|
"loss": 5.1244,
|
|
"margin_dpo/margin_mean": 19.320411682128906,
|
|
"margin_dpo/margin_std": 28.002426147460938,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.3267015706806283,
|
|
"grad_norm": 19.045074462890625,
|
|
"learning_rate": 4.271000354423425e-07,
|
|
"logits/chosen": 1.6414060592651367,
|
|
"logits/rejected": 1.486697793006897,
|
|
"logps/chosen": -260.87078857421875,
|
|
"logps/ref_chosen": -263.62353515625,
|
|
"logps/ref_rejected": -183.94119262695312,
|
|
"logps/rejected": -202.97857666015625,
|
|
"loss": 4.8187,
|
|
"margin_dpo/margin_mean": 21.790143966674805,
|
|
"margin_dpo/margin_std": 19.9763240814209,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.3287958115183246,
|
|
"grad_norm": 24.014020919799805,
|
|
"learning_rate": 4.258031241903777e-07,
|
|
"logits/chosen": 1.4358762502670288,
|
|
"logits/rejected": 1.5518109798431396,
|
|
"logps/chosen": -248.9981231689453,
|
|
"logps/ref_chosen": -237.6883087158203,
|
|
"logps/ref_rejected": -232.87484741210938,
|
|
"logps/rejected": -254.34902954101562,
|
|
"loss": 5.0507,
|
|
"margin_dpo/margin_mean": 10.164348602294922,
|
|
"margin_dpo/margin_std": 22.118553161621094,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.3308900523560209,
|
|
"grad_norm": 19.902008056640625,
|
|
"learning_rate": 4.2449678515039743e-07,
|
|
"logits/chosen": 1.7699273824691772,
|
|
"logits/rejected": 1.8686857223510742,
|
|
"logps/chosen": -284.7595520019531,
|
|
"logps/ref_chosen": -279.62335205078125,
|
|
"logps/ref_rejected": -267.80615234375,
|
|
"logps/rejected": -285.53924560546875,
|
|
"loss": 4.9867,
|
|
"margin_dpo/margin_mean": 12.59688663482666,
|
|
"margin_dpo/margin_std": 24.365018844604492,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.33298429319371725,
|
|
"grad_norm": 22.825597763061523,
|
|
"learning_rate": 4.2318108837739986e-07,
|
|
"logits/chosen": 1.553140640258789,
|
|
"logits/rejected": 1.439896583557129,
|
|
"logps/chosen": -303.68487548828125,
|
|
"logps/ref_chosen": -301.5324401855469,
|
|
"logps/ref_rejected": -263.529541015625,
|
|
"logps/rejected": -274.5115966796875,
|
|
"loss": 5.1446,
|
|
"margin_dpo/margin_mean": 8.82960319519043,
|
|
"margin_dpo/margin_std": 30.41036605834961,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.33507853403141363,
|
|
"grad_norm": 20.597837448120117,
|
|
"learning_rate": 4.218561044282098e-07,
|
|
"logits/chosen": 1.9710590839385986,
|
|
"logits/rejected": 1.699224829673767,
|
|
"logps/chosen": -311.9967041015625,
|
|
"logps/ref_chosen": -314.1754455566406,
|
|
"logps/ref_rejected": -241.1903076171875,
|
|
"logps/rejected": -267.9695129394531,
|
|
"loss": 4.8583,
|
|
"margin_dpo/margin_mean": 28.957944869995117,
|
|
"margin_dpo/margin_std": 29.692768096923828,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.33717277486910996,
|
|
"grad_norm": 25.702106475830078,
|
|
"learning_rate": 4.2052190435769554e-07,
|
|
"logits/chosen": 1.345297932624817,
|
|
"logits/rejected": 1.2094160318374634,
|
|
"logps/chosen": -268.9353942871094,
|
|
"logps/ref_chosen": -271.0775451660156,
|
|
"logps/ref_rejected": -212.71853637695312,
|
|
"logps/rejected": -228.42201232910156,
|
|
"loss": 4.8254,
|
|
"margin_dpo/margin_mean": 17.845624923706055,
|
|
"margin_dpo/margin_std": 25.553813934326172,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.3392670157068063,
|
|
"grad_norm": 26.607454299926758,
|
|
"learning_rate": 4.1917855971495763e-07,
|
|
"logits/chosen": 1.6022093296051025,
|
|
"logits/rejected": 1.4934312105178833,
|
|
"logps/chosen": -293.98974609375,
|
|
"logps/ref_chosen": -296.7241516113281,
|
|
"logps/ref_rejected": -222.9241485595703,
|
|
"logps/rejected": -236.45635986328125,
|
|
"loss": 4.8946,
|
|
"margin_dpo/margin_mean": 16.266626358032227,
|
|
"margin_dpo/margin_std": 23.54033660888672,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.3413612565445026,
|
|
"grad_norm": 30.976999282836914,
|
|
"learning_rate": 4.1782614253949255e-07,
|
|
"logits/chosen": 1.7216789722442627,
|
|
"logits/rejected": 1.7484244108200073,
|
|
"logps/chosen": -246.4569549560547,
|
|
"logps/ref_chosen": -249.64366149902344,
|
|
"logps/ref_rejected": -244.58258056640625,
|
|
"logps/rejected": -260.55218505859375,
|
|
"loss": 4.7557,
|
|
"margin_dpo/margin_mean": 19.156299591064453,
|
|
"margin_dpo/margin_std": 21.998430252075195,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.34345549738219894,
|
|
"grad_norm": 22.384260177612305,
|
|
"learning_rate": 4.164647253573289e-07,
|
|
"logits/chosen": 1.4122521877288818,
|
|
"logits/rejected": 1.5924245119094849,
|
|
"logps/chosen": -214.8105926513672,
|
|
"logps/ref_chosen": -203.6176300048828,
|
|
"logps/ref_rejected": -216.5535888671875,
|
|
"logps/rejected": -240.29396057128906,
|
|
"loss": 4.8891,
|
|
"margin_dpo/margin_mean": 12.547422409057617,
|
|
"margin_dpo/margin_std": 23.612470626831055,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.34554973821989526,
|
|
"grad_norm": 28.401796340942383,
|
|
"learning_rate": 4.1509438117713863e-07,
|
|
"logits/chosen": 2.1252005100250244,
|
|
"logits/rejected": 2.1613521575927734,
|
|
"logps/chosen": -350.52978515625,
|
|
"logps/ref_chosen": -344.1730651855469,
|
|
"logps/ref_rejected": -304.00128173828125,
|
|
"logps/rejected": -327.9066467285156,
|
|
"loss": 4.9931,
|
|
"margin_dpo/margin_mean": 17.548656463623047,
|
|
"margin_dpo/margin_std": 27.731534957885742,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.34764397905759165,
|
|
"grad_norm": 19.331459045410156,
|
|
"learning_rate": 4.137151834863213e-07,
|
|
"logits/chosen": 1.646728277206421,
|
|
"logits/rejected": 1.640990138053894,
|
|
"logps/chosen": -242.68841552734375,
|
|
"logps/ref_chosen": -233.72891235351562,
|
|
"logps/ref_rejected": -208.29397583007812,
|
|
"logps/rejected": -224.4010467529297,
|
|
"loss": 5.0854,
|
|
"margin_dpo/margin_mean": 7.147580146789551,
|
|
"margin_dpo/margin_std": 26.665321350097656,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.34973821989528797,
|
|
"grad_norm": 19.852638244628906,
|
|
"learning_rate": 4.123272062470633e-07,
|
|
"logits/chosen": 1.6361035108566284,
|
|
"logits/rejected": 1.411129117012024,
|
|
"logps/chosen": -327.1979064941406,
|
|
"logps/ref_chosen": -326.10198974609375,
|
|
"logps/ref_rejected": -232.0992889404297,
|
|
"logps/rejected": -256.6214294433594,
|
|
"loss": 4.9473,
|
|
"margin_dpo/margin_mean": 23.426191329956055,
|
|
"margin_dpo/margin_std": 28.530874252319336,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.3518324607329843,
|
|
"grad_norm": 21.564373016357422,
|
|
"learning_rate": 4.1093052389237174e-07,
|
|
"logits/chosen": 1.3292641639709473,
|
|
"logits/rejected": 1.2153077125549316,
|
|
"logps/chosen": -246.62283325195312,
|
|
"logps/ref_chosen": -247.4376983642578,
|
|
"logps/ref_rejected": -216.68064880371094,
|
|
"logps/rejected": -241.31011962890625,
|
|
"loss": 4.7589,
|
|
"margin_dpo/margin_mean": 25.444313049316406,
|
|
"margin_dpo/margin_std": 17.723526000976562,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.3539267015706806,
|
|
"grad_norm": 19.15829086303711,
|
|
"learning_rate": 4.0952521132208267e-07,
|
|
"logits/chosen": 1.6247047185897827,
|
|
"logits/rejected": 1.7833751440048218,
|
|
"logps/chosen": -281.30078125,
|
|
"logps/ref_chosen": -285.1272277832031,
|
|
"logps/ref_rejected": -279.10943603515625,
|
|
"logps/rejected": -302.5621032714844,
|
|
"loss": 4.632,
|
|
"margin_dpo/margin_mean": 27.279136657714844,
|
|
"margin_dpo/margin_std": 23.28731346130371,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.35602094240837695,
|
|
"grad_norm": 24.02274513244629,
|
|
"learning_rate": 4.081113438988443e-07,
|
|
"logits/chosen": 1.5731761455535889,
|
|
"logits/rejected": 1.4810683727264404,
|
|
"logps/chosen": -357.42327880859375,
|
|
"logps/ref_chosen": -358.3712463378906,
|
|
"logps/ref_rejected": -245.13316345214844,
|
|
"logps/rejected": -264.8816223144531,
|
|
"loss": 4.7427,
|
|
"margin_dpo/margin_mean": 20.696434020996094,
|
|
"margin_dpo/margin_std": 30.21214485168457,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.3581151832460733,
|
|
"grad_norm": 23.137300491333008,
|
|
"learning_rate": 4.0668899744407567e-07,
|
|
"logits/chosen": 1.5857964754104614,
|
|
"logits/rejected": 1.465027093887329,
|
|
"logps/chosen": -269.0282897949219,
|
|
"logps/ref_chosen": -273.9371337890625,
|
|
"logps/ref_rejected": -241.6103515625,
|
|
"logps/rejected": -259.7757263183594,
|
|
"loss": 4.7322,
|
|
"margin_dpo/margin_mean": 23.074222564697266,
|
|
"margin_dpo/margin_std": 30.494476318359375,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.36020942408376966,
|
|
"grad_norm": 22.266416549682617,
|
|
"learning_rate": 4.0525824823390043e-07,
|
|
"logits/chosen": 1.6551828384399414,
|
|
"logits/rejected": 1.8315401077270508,
|
|
"logps/chosen": -254.328369140625,
|
|
"logps/ref_chosen": -255.1793975830078,
|
|
"logps/ref_rejected": -279.3556213378906,
|
|
"logps/rejected": -293.8561706542969,
|
|
"loss": 5.039,
|
|
"margin_dpo/margin_mean": 15.351570129394531,
|
|
"margin_dpo/margin_std": 22.528553009033203,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.362303664921466,
|
|
"grad_norm": 24.233097076416016,
|
|
"learning_rate": 4.0381917299505686e-07,
|
|
"logits/chosen": 1.626520037651062,
|
|
"logits/rejected": 1.3303242921829224,
|
|
"logps/chosen": -338.2034606933594,
|
|
"logps/ref_chosen": -333.66375732421875,
|
|
"logps/ref_rejected": -275.1485290527344,
|
|
"logps/rejected": -300.3768310546875,
|
|
"loss": 4.8793,
|
|
"margin_dpo/margin_mean": 20.68860626220703,
|
|
"margin_dpo/margin_std": 28.740659713745117,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.3643979057591623,
|
|
"grad_norm": 22.030344009399414,
|
|
"learning_rate": 4.0237184890078243e-07,
|
|
"logits/chosen": 1.9243297576904297,
|
|
"logits/rejected": 1.6874244213104248,
|
|
"logps/chosen": -354.771484375,
|
|
"logps/ref_chosen": -362.5843505859375,
|
|
"logps/ref_rejected": -250.0384521484375,
|
|
"logps/rejected": -277.5650939941406,
|
|
"loss": 4.6927,
|
|
"margin_dpo/margin_mean": 35.33952713012695,
|
|
"margin_dpo/margin_std": 33.1160774230957,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.36649214659685864,
|
|
"grad_norm": 35.58210754394531,
|
|
"learning_rate": 4.00916353566676e-07,
|
|
"logits/chosen": 1.5620782375335693,
|
|
"logits/rejected": 1.598710536956787,
|
|
"logps/chosen": -242.1133270263672,
|
|
"logps/ref_chosen": -231.65187072753906,
|
|
"logps/ref_rejected": -264.08526611328125,
|
|
"logps/rejected": -294.81854248046875,
|
|
"loss": 4.8994,
|
|
"margin_dpo/margin_mean": 20.271865844726562,
|
|
"margin_dpo/margin_std": 31.47553062438965,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.36858638743455496,
|
|
"grad_norm": 23.747568130493164,
|
|
"learning_rate": 3.994527650465352e-07,
|
|
"logits/chosen": 1.3475306034088135,
|
|
"logits/rejected": 1.4316266775131226,
|
|
"logps/chosen": -278.8919372558594,
|
|
"logps/ref_chosen": -271.37152099609375,
|
|
"logps/ref_rejected": -281.20074462890625,
|
|
"logps/rejected": -299.3853759765625,
|
|
"loss": 5.067,
|
|
"margin_dpo/margin_mean": 10.664226531982422,
|
|
"margin_dpo/margin_std": 33.30470657348633,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.3706806282722513,
|
|
"grad_norm": 21.642898559570312,
|
|
"learning_rate": 3.979811618281705e-07,
|
|
"logits/chosen": 1.6163108348846436,
|
|
"logits/rejected": 1.4132215976715088,
|
|
"logps/chosen": -270.68548583984375,
|
|
"logps/ref_chosen": -266.7376403808594,
|
|
"logps/ref_rejected": -217.114990234375,
|
|
"logps/rejected": -240.8184356689453,
|
|
"loss": 5.1559,
|
|
"margin_dpo/margin_mean": 19.755634307861328,
|
|
"margin_dpo/margin_std": 33.71812438964844,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.37277486910994767,
|
|
"grad_norm": 22.430517196655273,
|
|
"learning_rate": 3.9650162282919654e-07,
|
|
"logits/chosen": 1.463651180267334,
|
|
"logits/rejected": 1.5219404697418213,
|
|
"logps/chosen": -230.6317138671875,
|
|
"logps/ref_chosen": -230.67471313476562,
|
|
"logps/ref_rejected": -185.40577697753906,
|
|
"logps/rejected": -219.8003387451172,
|
|
"loss": 4.7678,
|
|
"margin_dpo/margin_mean": 34.437557220458984,
|
|
"margin_dpo/margin_std": 34.62123107910156,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.374869109947644,
|
|
"grad_norm": 30.32175064086914,
|
|
"learning_rate": 3.9501422739279953e-07,
|
|
"logits/chosen": 1.3042542934417725,
|
|
"logits/rejected": 1.3105361461639404,
|
|
"logps/chosen": -272.2032470703125,
|
|
"logps/ref_chosen": -267.849853515625,
|
|
"logps/ref_rejected": -270.5272521972656,
|
|
"logps/rejected": -286.4674987792969,
|
|
"loss": 4.9431,
|
|
"margin_dpo/margin_mean": 11.586915969848633,
|
|
"margin_dpo/margin_std": 27.10576629638672,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.3769633507853403,
|
|
"grad_norm": 41.640106201171875,
|
|
"learning_rate": 3.935190552834828e-07,
|
|
"logits/chosen": 1.7063815593719482,
|
|
"logits/rejected": 1.6488580703735352,
|
|
"logps/chosen": -302.65380859375,
|
|
"logps/ref_chosen": -296.4002685546875,
|
|
"logps/ref_rejected": -224.35203552246094,
|
|
"logps/rejected": -253.70095825195312,
|
|
"loss": 4.7457,
|
|
"margin_dpo/margin_mean": 23.0954647064209,
|
|
"margin_dpo/margin_std": 34.00359344482422,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.37905759162303665,
|
|
"grad_norm": 30.104999542236328,
|
|
"learning_rate": 3.920161866827889e-07,
|
|
"logits/chosen": 1.1796238422393799,
|
|
"logits/rejected": 1.132272481918335,
|
|
"logps/chosen": -241.59747314453125,
|
|
"logps/ref_chosen": -243.10891723632812,
|
|
"logps/ref_rejected": -231.96902465820312,
|
|
"logps/rejected": -256.28497314453125,
|
|
"loss": 4.8516,
|
|
"margin_dpo/margin_mean": 25.827394485473633,
|
|
"margin_dpo/margin_std": 33.010704040527344,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.381151832460733,
|
|
"grad_norm": 28.976200103759766,
|
|
"learning_rate": 3.90505702185e-07,
|
|
"logits/chosen": 1.584215521812439,
|
|
"logits/rejected": 1.6100966930389404,
|
|
"logps/chosen": -264.54351806640625,
|
|
"logps/ref_chosen": -263.5075988769531,
|
|
"logps/ref_rejected": -254.4083709716797,
|
|
"logps/rejected": -296.13641357421875,
|
|
"loss": 4.5773,
|
|
"margin_dpo/margin_mean": 40.692108154296875,
|
|
"margin_dpo/margin_std": 22.797809600830078,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.3832460732984293,
|
|
"grad_norm": 36.45281982421875,
|
|
"learning_rate": 3.889876827928156e-07,
|
|
"logits/chosen": 1.0563912391662598,
|
|
"logits/rejected": 1.159271001815796,
|
|
"logps/chosen": -233.31439208984375,
|
|
"logps/ref_chosen": -220.9555206298828,
|
|
"logps/ref_rejected": -224.3114471435547,
|
|
"logps/rejected": -247.57742309570312,
|
|
"loss": 4.7918,
|
|
"margin_dpo/margin_mean": 10.90709114074707,
|
|
"margin_dpo/margin_std": 35.502681732177734,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.38534031413612563,
|
|
"grad_norm": 24.624616622924805,
|
|
"learning_rate": 3.874622099130087e-07,
|
|
"logits/chosen": 1.666105031967163,
|
|
"logits/rejected": 1.6945122480392456,
|
|
"logps/chosen": -290.854736328125,
|
|
"logps/ref_chosen": -285.35125732421875,
|
|
"logps/ref_rejected": -282.2647705078125,
|
|
"logps/rejected": -324.21044921875,
|
|
"loss": 4.4107,
|
|
"margin_dpo/margin_mean": 36.442237854003906,
|
|
"margin_dpo/margin_std": 38.43600082397461,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.387434554973822,
|
|
"grad_norm": 29.312538146972656,
|
|
"learning_rate": 3.859293653520604e-07,
|
|
"logits/chosen": 1.6671961545944214,
|
|
"logits/rejected": 1.7346235513687134,
|
|
"logps/chosen": -326.77490234375,
|
|
"logps/ref_chosen": -324.6773986816406,
|
|
"logps/ref_rejected": -275.9365539550781,
|
|
"logps/rejected": -308.9313659667969,
|
|
"loss": 4.8262,
|
|
"margin_dpo/margin_mean": 30.89735221862793,
|
|
"margin_dpo/margin_std": 34.63751220703125,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.38952879581151834,
|
|
"grad_norm": 34.26539611816406,
|
|
"learning_rate": 3.8438923131177237e-07,
|
|
"logits/chosen": 1.594333291053772,
|
|
"logits/rejected": 1.5077753067016602,
|
|
"logps/chosen": -304.6080017089844,
|
|
"logps/ref_chosen": -287.4004211425781,
|
|
"logps/ref_rejected": -222.46803283691406,
|
|
"logps/rejected": -260.6602783203125,
|
|
"loss": 4.8356,
|
|
"margin_dpo/margin_mean": 20.984676361083984,
|
|
"margin_dpo/margin_std": 20.14511489868164,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.39162303664921466,
|
|
"grad_norm": 25.851015090942383,
|
|
"learning_rate": 3.828418903848593e-07,
|
|
"logits/chosen": 1.441859245300293,
|
|
"logits/rejected": 1.5745567083358765,
|
|
"logps/chosen": -401.31182861328125,
|
|
"logps/ref_chosen": -378.8255310058594,
|
|
"logps/ref_rejected": -319.38116455078125,
|
|
"logps/rejected": -365.0159912109375,
|
|
"loss": 4.8863,
|
|
"margin_dpo/margin_mean": 23.14852523803711,
|
|
"margin_dpo/margin_std": 45.19081115722656,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.393717277486911,
|
|
"grad_norm": 41.554141998291016,
|
|
"learning_rate": 3.812874255505191e-07,
|
|
"logits/chosen": 1.360278844833374,
|
|
"logits/rejected": 1.1752986907958984,
|
|
"logps/chosen": -250.5333251953125,
|
|
"logps/ref_chosen": -246.3994903564453,
|
|
"logps/ref_rejected": -204.85589599609375,
|
|
"logps/rejected": -239.05686950683594,
|
|
"loss": 4.8302,
|
|
"margin_dpo/margin_mean": 30.06714630126953,
|
|
"margin_dpo/margin_std": 34.51936340332031,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.3958115183246073,
|
|
"grad_norm": 38.48931884765625,
|
|
"learning_rate": 3.797259201699833e-07,
|
|
"logits/chosen": 1.4543706178665161,
|
|
"logits/rejected": 1.5096098184585571,
|
|
"logps/chosen": -264.8511047363281,
|
|
"logps/ref_chosen": -264.7483825683594,
|
|
"logps/ref_rejected": -292.3799743652344,
|
|
"logps/rejected": -328.85107421875,
|
|
"loss": 4.6022,
|
|
"margin_dpo/margin_mean": 36.36838912963867,
|
|
"margin_dpo/margin_std": 26.913986206054688,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.39790575916230364,
|
|
"grad_norm": 24.164396286010742,
|
|
"learning_rate": 3.781574579820464e-07,
|
|
"logits/chosen": 0.8813581466674805,
|
|
"logits/rejected": 0.9559296369552612,
|
|
"logps/chosen": -223.26422119140625,
|
|
"logps/ref_chosen": -211.2392120361328,
|
|
"logps/ref_rejected": -204.55384826660156,
|
|
"logps/rejected": -233.70541381835938,
|
|
"loss": 4.6669,
|
|
"margin_dpo/margin_mean": 17.12653350830078,
|
|
"margin_dpo/margin_std": 40.82097625732422,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"grad_norm": 28.65985107421875,
|
|
"learning_rate": 3.765821230985757e-07,
|
|
"logits/chosen": 1.188876748085022,
|
|
"logits/rejected": 1.3144832849502563,
|
|
"logps/chosen": -177.3275604248047,
|
|
"logps/ref_chosen": -175.97952270507812,
|
|
"logps/ref_rejected": -206.85325622558594,
|
|
"logps/rejected": -228.22000122070312,
|
|
"loss": 4.7686,
|
|
"margin_dpo/margin_mean": 20.018733978271484,
|
|
"margin_dpo/margin_std": 31.200864791870117,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.40209424083769635,
|
|
"grad_norm": 30.982559204101562,
|
|
"learning_rate": 3.75e-07,
|
|
"logits/chosen": 1.6706230640411377,
|
|
"logits/rejected": 1.854614496231079,
|
|
"logps/chosen": -253.131103515625,
|
|
"logps/ref_chosen": -241.5125732421875,
|
|
"logps/ref_rejected": -285.0710144042969,
|
|
"logps/rejected": -313.1866149902344,
|
|
"loss": 4.972,
|
|
"margin_dpo/margin_mean": 16.49706268310547,
|
|
"margin_dpo/margin_std": 54.19124221801758,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.4041884816753927,
|
|
"grad_norm": 28.91083526611328,
|
|
"learning_rate": 3.734111735307796e-07,
|
|
"logits/chosen": 1.7183902263641357,
|
|
"logits/rejected": 1.5575065612792969,
|
|
"logps/chosen": -255.6170196533203,
|
|
"logps/ref_chosen": -247.06581115722656,
|
|
"logps/ref_rejected": -221.4132537841797,
|
|
"logps/rejected": -248.817138671875,
|
|
"loss": 4.8338,
|
|
"margin_dpo/margin_mean": 18.852684020996094,
|
|
"margin_dpo/margin_std": 29.74808120727539,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.406282722513089,
|
|
"grad_norm": 35.922122955322266,
|
|
"learning_rate": 3.7181572889485623e-07,
|
|
"logits/chosen": 1.3995440006256104,
|
|
"logits/rejected": 1.4992262125015259,
|
|
"logps/chosen": -216.14686584472656,
|
|
"logps/ref_chosen": -208.60263061523438,
|
|
"logps/ref_rejected": -189.4849090576172,
|
|
"logps/rejected": -212.74192810058594,
|
|
"loss": 5.0447,
|
|
"margin_dpo/margin_mean": 15.712799072265625,
|
|
"margin_dpo/margin_std": 31.20469093322754,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.4083769633507853,
|
|
"grad_norm": 31.717365264892578,
|
|
"learning_rate": 3.7021375165108377e-07,
|
|
"logits/chosen": 1.464687466621399,
|
|
"logits/rejected": 1.4596346616744995,
|
|
"logps/chosen": -287.36065673828125,
|
|
"logps/ref_chosen": -278.51275634765625,
|
|
"logps/ref_rejected": -298.09185791015625,
|
|
"logps/rejected": -318.23797607421875,
|
|
"loss": 5.0759,
|
|
"margin_dpo/margin_mean": 11.298222541809082,
|
|
"margin_dpo/margin_std": 27.541015625,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.41047120418848165,
|
|
"grad_norm": 25.350812911987305,
|
|
"learning_rate": 3.6860532770864005e-07,
|
|
"logits/chosen": 1.1140937805175781,
|
|
"logits/rejected": 1.2928128242492676,
|
|
"logps/chosen": -213.8653564453125,
|
|
"logps/ref_chosen": -213.48568725585938,
|
|
"logps/ref_rejected": -216.8994903564453,
|
|
"logps/rejected": -244.5856170654297,
|
|
"loss": 4.6056,
|
|
"margin_dpo/margin_mean": 27.30645751953125,
|
|
"margin_dpo/margin_std": 27.342227935791016,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.41256544502617803,
|
|
"grad_norm": 26.07918357849121,
|
|
"learning_rate": 3.6699054332241985e-07,
|
|
"logits/chosen": 1.4508588314056396,
|
|
"logits/rejected": 1.4278172254562378,
|
|
"logps/chosen": -255.36036682128906,
|
|
"logps/ref_chosen": -256.396728515625,
|
|
"logps/ref_rejected": -185.2763671875,
|
|
"logps/rejected": -232.59405517578125,
|
|
"loss": 4.3517,
|
|
"margin_dpo/margin_mean": 48.35406494140625,
|
|
"margin_dpo/margin_std": 30.625558853149414,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.41465968586387436,
|
|
"grad_norm": 29.263551712036133,
|
|
"learning_rate": 3.653694850884091e-07,
|
|
"logits/chosen": 1.842124342918396,
|
|
"logits/rejected": 1.9477362632751465,
|
|
"logps/chosen": -362.33245849609375,
|
|
"logps/ref_chosen": -366.5196838378906,
|
|
"logps/ref_rejected": -361.7866516113281,
|
|
"logps/rejected": -392.13189697265625,
|
|
"loss": 4.6993,
|
|
"margin_dpo/margin_mean": 34.532501220703125,
|
|
"margin_dpo/margin_std": 40.5759162902832,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.4167539267015707,
|
|
"grad_norm": 32.77777862548828,
|
|
"learning_rate": 3.6374223993904124e-07,
|
|
"logits/chosen": 0.9073523879051208,
|
|
"logits/rejected": 0.9206515550613403,
|
|
"logps/chosen": -210.94586181640625,
|
|
"logps/ref_chosen": -207.86968994140625,
|
|
"logps/ref_rejected": -184.52076721191406,
|
|
"logps/rejected": -226.15948486328125,
|
|
"loss": 4.7308,
|
|
"margin_dpo/margin_mean": 38.562538146972656,
|
|
"margin_dpo/margin_std": 28.093774795532227,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.418848167539267,
|
|
"grad_norm": 28.612226486206055,
|
|
"learning_rate": 3.621088951385353e-07,
|
|
"logits/chosen": 1.3658336400985718,
|
|
"logits/rejected": 1.3859562873840332,
|
|
"logps/chosen": -281.68792724609375,
|
|
"logps/ref_chosen": -276.4098205566406,
|
|
"logps/ref_rejected": -252.23086547851562,
|
|
"logps/rejected": -272.3934326171875,
|
|
"loss": 4.9383,
|
|
"margin_dpo/margin_mean": 14.884419441223145,
|
|
"margin_dpo/margin_std": 53.33942794799805,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.418848167539267,
|
|
"eval_logits/chosen": 1.4300137758255005,
|
|
"eval_logits/rejected": 1.4696903228759766,
|
|
"eval_logps/chosen": -287.022705078125,
|
|
"eval_logps/ref_chosen": -281.4588928222656,
|
|
"eval_logps/ref_rejected": -261.84954833984375,
|
|
"eval_logps/rejected": -295.6717529296875,
|
|
"eval_loss": 0.597048819065094,
|
|
"eval_margin_dpo/margin_mean": 28.258426666259766,
|
|
"eval_margin_dpo/margin_std": 39.02444076538086,
|
|
"eval_runtime": 93.548,
|
|
"eval_samples_per_second": 21.379,
|
|
"eval_steps_per_second": 1.336,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.42094240837696334,
|
|
"grad_norm": 32.25981903076172,
|
|
"learning_rate": 3.604695382782159e-07,
|
|
"logits/chosen": 1.2966924905776978,
|
|
"logits/rejected": 1.4700032472610474,
|
|
"logps/chosen": -266.32220458984375,
|
|
"logps/ref_chosen": -265.32904052734375,
|
|
"logps/ref_rejected": -255.19529724121094,
|
|
"logps/rejected": -297.01544189453125,
|
|
"loss": 4.6702,
|
|
"margin_dpo/margin_mean": 40.82699203491211,
|
|
"margin_dpo/margin_std": 30.352630615234375,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.42303664921465967,
|
|
"grad_norm": 35.75096130371094,
|
|
"learning_rate": 3.588242572718162e-07,
|
|
"logits/chosen": 1.6444792747497559,
|
|
"logits/rejected": 1.5493888854980469,
|
|
"logps/chosen": -278.0890197753906,
|
|
"logps/ref_chosen": -274.6075439453125,
|
|
"logps/ref_rejected": -219.9969940185547,
|
|
"logps/rejected": -251.83348083496094,
|
|
"loss": 4.7312,
|
|
"margin_dpo/margin_mean": 28.354969024658203,
|
|
"margin_dpo/margin_std": 44.327239990234375,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.42513089005235605,
|
|
"grad_norm": 34.300376892089844,
|
|
"learning_rate": 3.571731403507635e-07,
|
|
"logits/chosen": 1.4360129833221436,
|
|
"logits/rejected": 1.3600637912750244,
|
|
"logps/chosen": -302.18707275390625,
|
|
"logps/ref_chosen": -295.6935119628906,
|
|
"logps/ref_rejected": -241.4007568359375,
|
|
"logps/rejected": -266.06201171875,
|
|
"loss": 4.8634,
|
|
"margin_dpo/margin_mean": 18.167736053466797,
|
|
"margin_dpo/margin_std": 24.508058547973633,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.4272251308900524,
|
|
"grad_norm": 28.439380645751953,
|
|
"learning_rate": 3.5551627605944746e-07,
|
|
"logits/chosen": 2.044978141784668,
|
|
"logits/rejected": 1.9398431777954102,
|
|
"logps/chosen": -398.93328857421875,
|
|
"logps/ref_chosen": -392.3414611816406,
|
|
"logps/ref_rejected": -291.4375915527344,
|
|
"logps/rejected": -327.0179443359375,
|
|
"loss": 4.5824,
|
|
"margin_dpo/margin_mean": 28.98847007751465,
|
|
"margin_dpo/margin_std": 38.196693420410156,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.4293193717277487,
|
|
"grad_norm": 27.83588981628418,
|
|
"learning_rate": 3.5385375325047163e-07,
|
|
"logits/chosen": 1.3867862224578857,
|
|
"logits/rejected": 1.657343864440918,
|
|
"logps/chosen": -191.40664672851562,
|
|
"logps/ref_chosen": -190.1780242919922,
|
|
"logps/ref_rejected": -275.8878479003906,
|
|
"logps/rejected": -311.2901306152344,
|
|
"loss": 4.6413,
|
|
"margin_dpo/margin_mean": 34.17367935180664,
|
|
"margin_dpo/margin_std": 32.90446090698242,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.431413612565445,
|
|
"grad_norm": 30.838573455810547,
|
|
"learning_rate": 3.5218566107988867e-07,
|
|
"logits/chosen": 0.9244284629821777,
|
|
"logits/rejected": 1.1862902641296387,
|
|
"logps/chosen": -277.1954040527344,
|
|
"logps/ref_chosen": -278.95977783203125,
|
|
"logps/ref_rejected": -296.458984375,
|
|
"logps/rejected": -318.1335144042969,
|
|
"loss": 4.6764,
|
|
"margin_dpo/margin_mean": 23.43887710571289,
|
|
"margin_dpo/margin_std": 38.043148040771484,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.43350785340314135,
|
|
"grad_norm": 31.335426330566406,
|
|
"learning_rate": 3.505120890024195e-07,
|
|
"logits/chosen": 1.658402681350708,
|
|
"logits/rejected": 1.8528728485107422,
|
|
"logps/chosen": -222.63587951660156,
|
|
"logps/ref_chosen": -219.367919921875,
|
|
"logps/ref_rejected": -231.6876678466797,
|
|
"logps/rejected": -260.1963195800781,
|
|
"loss": 4.9355,
|
|
"margin_dpo/margin_mean": 25.240697860717773,
|
|
"margin_dpo/margin_std": 46.75006866455078,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.4356020942408377,
|
|
"grad_norm": 34.799400329589844,
|
|
"learning_rate": 3.4883312676665534e-07,
|
|
"logits/chosen": 1.4422191381454468,
|
|
"logits/rejected": 1.4274728298187256,
|
|
"logps/chosen": -308.7106628417969,
|
|
"logps/ref_chosen": -303.848388671875,
|
|
"logps/ref_rejected": -252.1853485107422,
|
|
"logps/rejected": -288.1015625,
|
|
"loss": 4.7313,
|
|
"margin_dpo/margin_mean": 31.05390739440918,
|
|
"margin_dpo/margin_std": 41.717647552490234,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.437696335078534,
|
|
"grad_norm": 32.08354949951172,
|
|
"learning_rate": 3.4714886441024573e-07,
|
|
"logits/chosen": 1.48392915725708,
|
|
"logits/rejected": 1.2658922672271729,
|
|
"logps/chosen": -353.7491760253906,
|
|
"logps/ref_chosen": -347.6343688964844,
|
|
"logps/ref_rejected": -240.31988525390625,
|
|
"logps/rejected": -270.4415283203125,
|
|
"loss": 4.891,
|
|
"margin_dpo/margin_mean": 24.006847381591797,
|
|
"margin_dpo/margin_std": 38.43374252319336,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.4397905759162304,
|
|
"grad_norm": 41.964515686035156,
|
|
"learning_rate": 3.454593922550693e-07,
|
|
"logits/chosen": 1.7486904859542847,
|
|
"logits/rejected": 1.899224042892456,
|
|
"logps/chosen": -230.97503662109375,
|
|
"logps/ref_chosen": -236.3311767578125,
|
|
"logps/ref_rejected": -289.6016845703125,
|
|
"logps/rejected": -317.5093688964844,
|
|
"loss": 4.7272,
|
|
"margin_dpo/margin_mean": 33.26382064819336,
|
|
"margin_dpo/margin_std": 33.677276611328125,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.4418848167539267,
|
|
"grad_norm": 31.20326805114746,
|
|
"learning_rate": 3.4376480090239047e-07,
|
|
"logits/chosen": 1.3021446466445923,
|
|
"logits/rejected": 1.3574562072753906,
|
|
"logps/chosen": -205.39637756347656,
|
|
"logps/ref_chosen": -204.38107299804688,
|
|
"logps/ref_rejected": -212.449462890625,
|
|
"logps/rejected": -242.7283172607422,
|
|
"loss": 4.3731,
|
|
"margin_dpo/margin_mean": 29.263545989990234,
|
|
"margin_dpo/margin_std": 35.2406005859375,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.44397905759162304,
|
|
"grad_norm": 35.141807556152344,
|
|
"learning_rate": 3.4206518122800055e-07,
|
|
"logits/chosen": 1.1048368215560913,
|
|
"logits/rejected": 1.1881780624389648,
|
|
"logps/chosen": -241.2005615234375,
|
|
"logps/ref_chosen": -231.28570556640625,
|
|
"logps/ref_rejected": -222.65725708007812,
|
|
"logps/rejected": -248.41322326660156,
|
|
"loss": 4.7702,
|
|
"margin_dpo/margin_mean": 15.841072082519531,
|
|
"margin_dpo/margin_std": 39.27621841430664,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.44607329842931936,
|
|
"grad_norm": 27.873577117919922,
|
|
"learning_rate": 3.403606243773448e-07,
|
|
"logits/chosen": 1.5394573211669922,
|
|
"logits/rejected": 1.6554535627365112,
|
|
"logps/chosen": -336.94354248046875,
|
|
"logps/ref_chosen": -332.35968017578125,
|
|
"logps/ref_rejected": -329.94830322265625,
|
|
"logps/rejected": -365.0585021972656,
|
|
"loss": 4.7341,
|
|
"margin_dpo/margin_mean": 30.526296615600586,
|
|
"margin_dpo/margin_std": 37.60173034667969,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.4481675392670157,
|
|
"grad_norm": 33.36276626586914,
|
|
"learning_rate": 3.3865122176063385e-07,
|
|
"logits/chosen": 1.8208783864974976,
|
|
"logits/rejected": 1.9227497577667236,
|
|
"logps/chosen": -320.77886962890625,
|
|
"logps/ref_chosen": -303.07257080078125,
|
|
"logps/ref_rejected": -310.52001953125,
|
|
"logps/rejected": -347.1795654296875,
|
|
"loss": 4.8278,
|
|
"margin_dpo/margin_mean": 18.953208923339844,
|
|
"margin_dpo/margin_std": 34.65628433227539,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.450261780104712,
|
|
"grad_norm": 31.66336441040039,
|
|
"learning_rate": 3.3693706504794243e-07,
|
|
"logits/chosen": 2.0376367568969727,
|
|
"logits/rejected": 2.075817584991455,
|
|
"logps/chosen": -283.66595458984375,
|
|
"logps/ref_chosen": -286.654296875,
|
|
"logps/ref_rejected": -272.1281433105469,
|
|
"logps/rejected": -317.6420593261719,
|
|
"loss": 4.8219,
|
|
"margin_dpo/margin_mean": 48.50217819213867,
|
|
"margin_dpo/margin_std": 41.356727600097656,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.4523560209424084,
|
|
"grad_norm": 51.738441467285156,
|
|
"learning_rate": 3.3521824616429284e-07,
|
|
"logits/chosen": 1.378481149673462,
|
|
"logits/rejected": 1.2707972526550293,
|
|
"logps/chosen": -364.43603515625,
|
|
"logps/ref_chosen": -351.34417724609375,
|
|
"logps/ref_rejected": -290.5171813964844,
|
|
"logps/rejected": -327.94488525390625,
|
|
"loss": 4.7106,
|
|
"margin_dpo/margin_mean": 24.335878372192383,
|
|
"margin_dpo/margin_std": 42.64997100830078,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.4544502617801047,
|
|
"grad_norm": 35.788352966308594,
|
|
"learning_rate": 3.334948572847253e-07,
|
|
"logits/chosen": 1.5968372821807861,
|
|
"logits/rejected": 1.7124537229537964,
|
|
"logps/chosen": -279.4749755859375,
|
|
"logps/ref_chosen": -273.76788330078125,
|
|
"logps/ref_rejected": -286.2580261230469,
|
|
"logps/rejected": -343.501953125,
|
|
"loss": 4.4028,
|
|
"margin_dpo/margin_mean": 51.53682327270508,
|
|
"margin_dpo/margin_std": 38.95985412597656,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.45654450261780105,
|
|
"grad_norm": 41.920623779296875,
|
|
"learning_rate": 3.317669908293554e-07,
|
|
"logits/chosen": 1.5484070777893066,
|
|
"logits/rejected": 1.8093584775924683,
|
|
"logps/chosen": -235.32321166992188,
|
|
"logps/ref_chosen": -219.74948120117188,
|
|
"logps/ref_rejected": -308.801025390625,
|
|
"logps/rejected": -354.8564147949219,
|
|
"loss": 4.5987,
|
|
"margin_dpo/margin_mean": 30.481698989868164,
|
|
"margin_dpo/margin_std": 39.370521545410156,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.4586387434554974,
|
|
"grad_norm": 30.652027130126953,
|
|
"learning_rate": 3.300347394584172e-07,
|
|
"logits/chosen": 1.3850374221801758,
|
|
"logits/rejected": 1.4719693660736084,
|
|
"logps/chosen": -282.8400573730469,
|
|
"logps/ref_chosen": -264.65374755859375,
|
|
"logps/ref_rejected": -233.9711151123047,
|
|
"logps/rejected": -276.5810546875,
|
|
"loss": 4.6628,
|
|
"margin_dpo/margin_mean": 24.423654556274414,
|
|
"margin_dpo/margin_std": 40.1912841796875,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.4607329842931937,
|
|
"grad_norm": 45.71303939819336,
|
|
"learning_rate": 3.2829819606729477e-07,
|
|
"logits/chosen": 1.9091517925262451,
|
|
"logits/rejected": 1.744594931602478,
|
|
"logps/chosen": -315.3508605957031,
|
|
"logps/ref_chosen": -295.8961486816406,
|
|
"logps/ref_rejected": -219.56228637695312,
|
|
"logps/rejected": -273.97418212890625,
|
|
"loss": 4.5451,
|
|
"margin_dpo/margin_mean": 34.957191467285156,
|
|
"margin_dpo/margin_std": 39.46710968017578,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.46282722513089003,
|
|
"grad_norm": 29.89345932006836,
|
|
"learning_rate": 3.265574537815398e-07,
|
|
"logits/chosen": 1.0531387329101562,
|
|
"logits/rejected": 1.2520170211791992,
|
|
"logps/chosen": -302.5050048828125,
|
|
"logps/ref_chosen": -284.9080810546875,
|
|
"logps/ref_rejected": -310.0538330078125,
|
|
"logps/rejected": -356.16510009765625,
|
|
"loss": 4.8325,
|
|
"margin_dpo/margin_mean": 28.514326095581055,
|
|
"margin_dpo/margin_std": 32.273284912109375,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.4649214659685864,
|
|
"grad_norm": 47.45641326904297,
|
|
"learning_rate": 3.248126059518784e-07,
|
|
"logits/chosen": 1.3722844123840332,
|
|
"logits/rejected": 1.2933783531188965,
|
|
"logps/chosen": -329.0756530761719,
|
|
"logps/ref_chosen": -308.44622802734375,
|
|
"logps/ref_rejected": -254.99667358398438,
|
|
"logps/rejected": -303.69677734375,
|
|
"loss": 4.5312,
|
|
"margin_dpo/margin_mean": 28.07069969177246,
|
|
"margin_dpo/margin_std": 30.664283752441406,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.46701570680628274,
|
|
"grad_norm": 36.27118682861328,
|
|
"learning_rate": 3.230637461492043e-07,
|
|
"logits/chosen": 1.214386224746704,
|
|
"logits/rejected": 1.193519115447998,
|
|
"logps/chosen": -283.5272521972656,
|
|
"logps/ref_chosen": -258.5130310058594,
|
|
"logps/ref_rejected": -231.13885498046875,
|
|
"logps/rejected": -290.85296630859375,
|
|
"loss": 4.4404,
|
|
"margin_dpo/margin_mean": 34.699886322021484,
|
|
"margin_dpo/margin_std": 42.52618408203125,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.46910994764397906,
|
|
"grad_norm": 36.553733825683594,
|
|
"learning_rate": 3.213109681595612e-07,
|
|
"logits/chosen": 1.2857964038848877,
|
|
"logits/rejected": 1.433445692062378,
|
|
"logps/chosen": -248.49815368652344,
|
|
"logps/ref_chosen": -234.55177307128906,
|
|
"logps/ref_rejected": -208.4610595703125,
|
|
"logps/rejected": -271.1744689941406,
|
|
"loss": 4.3795,
|
|
"margin_dpo/margin_mean": 48.76702117919922,
|
|
"margin_dpo/margin_std": 35.48724365234375,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.4712041884816754,
|
|
"grad_norm": 40.446937561035156,
|
|
"learning_rate": 3.1955436597911315e-07,
|
|
"logits/chosen": 1.6013773679733276,
|
|
"logits/rejected": 1.722807765007019,
|
|
"logps/chosen": -360.8241882324219,
|
|
"logps/ref_chosen": -339.7688903808594,
|
|
"logps/ref_rejected": -347.96112060546875,
|
|
"logps/rejected": -397.2833251953125,
|
|
"loss": 4.9012,
|
|
"margin_dpo/margin_mean": 28.266937255859375,
|
|
"margin_dpo/margin_std": 49.008575439453125,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.4732984293193717,
|
|
"grad_norm": 37.459991455078125,
|
|
"learning_rate": 3.1779403380910425e-07,
|
|
"logits/chosen": 0.7877386808395386,
|
|
"logits/rejected": 1.0014675855636597,
|
|
"logps/chosen": -225.65472412109375,
|
|
"logps/ref_chosen": -209.56515502929688,
|
|
"logps/ref_rejected": -207.83871459960938,
|
|
"logps/rejected": -261.1890563964844,
|
|
"loss": 4.8962,
|
|
"margin_dpo/margin_mean": 37.260780334472656,
|
|
"margin_dpo/margin_std": 42.05327606201172,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.47539267015706804,
|
|
"grad_norm": 29.621501922607422,
|
|
"learning_rate": 3.160300660508064e-07,
|
|
"logits/chosen": 1.4312937259674072,
|
|
"logits/rejected": 1.644526481628418,
|
|
"logps/chosen": -278.422607421875,
|
|
"logps/ref_chosen": -252.69004821777344,
|
|
"logps/ref_rejected": -252.89427185058594,
|
|
"logps/rejected": -317.8539733886719,
|
|
"loss": 4.4608,
|
|
"margin_dpo/margin_mean": 39.22712707519531,
|
|
"margin_dpo/margin_std": 55.3231086730957,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.4774869109947644,
|
|
"grad_norm": 46.8765869140625,
|
|
"learning_rate": 3.1426255730045695e-07,
|
|
"logits/chosen": 1.5183122158050537,
|
|
"logits/rejected": 1.6018824577331543,
|
|
"logps/chosen": -235.6670684814453,
|
|
"logps/ref_chosen": -210.62913513183594,
|
|
"logps/ref_rejected": -174.08975219726562,
|
|
"logps/rejected": -226.82781982421875,
|
|
"loss": 4.4722,
|
|
"margin_dpo/margin_mean": 27.70014762878418,
|
|
"margin_dpo/margin_std": 42.50754165649414,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.47958115183246075,
|
|
"grad_norm": 39.29153823852539,
|
|
"learning_rate": 3.1249160234418644e-07,
|
|
"logits/chosen": 1.3761322498321533,
|
|
"logits/rejected": 1.3572083711624146,
|
|
"logps/chosen": -336.7579345703125,
|
|
"logps/ref_chosen": -315.1896057128906,
|
|
"logps/ref_rejected": -265.8664855957031,
|
|
"logps/rejected": -330.8331298828125,
|
|
"loss": 4.1991,
|
|
"margin_dpo/margin_mean": 43.39836883544922,
|
|
"margin_dpo/margin_std": 46.784549713134766,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.4816753926701571,
|
|
"grad_norm": 39.0309944152832,
|
|
"learning_rate": 3.1071729615293424e-07,
|
|
"logits/chosen": 0.9634809494018555,
|
|
"logits/rejected": 0.9632886648178101,
|
|
"logps/chosen": -258.40020751953125,
|
|
"logps/ref_chosen": -240.54244995117188,
|
|
"logps/ref_rejected": -262.5657043457031,
|
|
"logps/rejected": -319.0147705078125,
|
|
"loss": 4.5649,
|
|
"margin_dpo/margin_mean": 38.59132385253906,
|
|
"margin_dpo/margin_std": 50.162498474121094,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.4837696335078534,
|
|
"grad_norm": 55.88585662841797,
|
|
"learning_rate": 3.0893973387735683e-07,
|
|
"logits/chosen": 1.1088343858718872,
|
|
"logits/rejected": 1.1820017099380493,
|
|
"logps/chosen": -326.6155700683594,
|
|
"logps/ref_chosen": -290.8667907714844,
|
|
"logps/ref_rejected": -277.01739501953125,
|
|
"logps/rejected": -330.43194580078125,
|
|
"loss": 4.7561,
|
|
"margin_dpo/margin_mean": 17.66571807861328,
|
|
"margin_dpo/margin_std": 42.80667495727539,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.48586387434554973,
|
|
"grad_norm": 43.03524398803711,
|
|
"learning_rate": 3.071590108427243e-07,
|
|
"logits/chosen": 1.3966903686523438,
|
|
"logits/rejected": 1.5778224468231201,
|
|
"logps/chosen": -285.2840270996094,
|
|
"logps/ref_chosen": -260.0438232421875,
|
|
"logps/ref_rejected": -261.63507080078125,
|
|
"logps/rejected": -320.8078308105469,
|
|
"loss": 4.5309,
|
|
"margin_dpo/margin_mean": 33.93254089355469,
|
|
"margin_dpo/margin_std": 37.87663269042969,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.48795811518324606,
|
|
"grad_norm": 53.92399597167969,
|
|
"learning_rate": 3.05375222543809e-07,
|
|
"logits/chosen": 0.838955283164978,
|
|
"logits/rejected": 0.9410269856452942,
|
|
"logps/chosen": -240.427734375,
|
|
"logps/ref_chosen": -221.6608123779297,
|
|
"logps/ref_rejected": -261.16839599609375,
|
|
"logps/rejected": -328.1685485839844,
|
|
"loss": 4.5981,
|
|
"margin_dpo/margin_mean": 48.23321533203125,
|
|
"margin_dpo/margin_std": 38.929080963134766,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.4900523560209424,
|
|
"grad_norm": 33.82901382446289,
|
|
"learning_rate": 3.035884646397637e-07,
|
|
"logits/chosen": 1.1958811283111572,
|
|
"logits/rejected": 1.237013816833496,
|
|
"logps/chosen": -297.06036376953125,
|
|
"logps/ref_chosen": -281.4861145019531,
|
|
"logps/ref_rejected": -276.58441162109375,
|
|
"logps/rejected": -340.1748046875,
|
|
"loss": 4.5243,
|
|
"margin_dpo/margin_mean": 48.01616668701172,
|
|
"margin_dpo/margin_std": 41.946895599365234,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.49214659685863876,
|
|
"grad_norm": 44.3351936340332,
|
|
"learning_rate": 3.017988329489923e-07,
|
|
"logits/chosen": 1.6281356811523438,
|
|
"logits/rejected": 1.5668871402740479,
|
|
"logps/chosen": -301.54229736328125,
|
|
"logps/ref_chosen": -300.5598449707031,
|
|
"logps/ref_rejected": -259.905029296875,
|
|
"logps/rejected": -302.8565368652344,
|
|
"loss": 4.6976,
|
|
"margin_dpo/margin_mean": 41.96904754638672,
|
|
"margin_dpo/margin_std": 42.275962829589844,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.4942408376963351,
|
|
"grad_norm": 32.59733963012695,
|
|
"learning_rate": 3.000064234440111e-07,
|
|
"logits/chosen": 1.2152283191680908,
|
|
"logits/rejected": 1.2443571090698242,
|
|
"logps/chosen": -282.0902099609375,
|
|
"logps/ref_chosen": -270.4844665527344,
|
|
"logps/ref_rejected": -231.67613220214844,
|
|
"logps/rejected": -282.9165954589844,
|
|
"loss": 4.4441,
|
|
"margin_dpo/margin_mean": 39.63469696044922,
|
|
"margin_dpo/margin_std": 43.977577209472656,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.4963350785340314,
|
|
"grad_norm": 47.38420486450195,
|
|
"learning_rate": 2.9821133224630223e-07,
|
|
"logits/chosen": 1.266021490097046,
|
|
"logits/rejected": 1.4988112449645996,
|
|
"logps/chosen": -219.76870727539062,
|
|
"logps/ref_chosen": -194.99342346191406,
|
|
"logps/ref_rejected": -243.12779235839844,
|
|
"logps/rejected": -310.861083984375,
|
|
"loss": 4.5863,
|
|
"margin_dpo/margin_mean": 42.9580078125,
|
|
"margin_dpo/margin_std": 39.53506851196289,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.49842931937172774,
|
|
"grad_norm": 39.1376838684082,
|
|
"learning_rate": 2.964136556211588e-07,
|
|
"logits/chosen": 1.1324211359024048,
|
|
"logits/rejected": 1.0826090574264526,
|
|
"logps/chosen": -261.50543212890625,
|
|
"logps/ref_chosen": -240.9060516357422,
|
|
"logps/ref_rejected": -205.97012329101562,
|
|
"logps/rejected": -254.07977294921875,
|
|
"loss": 4.446,
|
|
"margin_dpo/margin_mean": 27.510299682617188,
|
|
"margin_dpo/margin_std": 46.14052200317383,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.5005235602094241,
|
|
"grad_norm": 40.104766845703125,
|
|
"learning_rate": 2.946134899725226e-07,
|
|
"logits/chosen": 1.3846328258514404,
|
|
"logits/rejected": 1.5582243204116821,
|
|
"logps/chosen": -303.406005859375,
|
|
"logps/ref_chosen": -277.0447998046875,
|
|
"logps/ref_rejected": -284.7602233886719,
|
|
"logps/rejected": -329.855712890625,
|
|
"loss": 4.701,
|
|
"margin_dpo/margin_mean": 18.734315872192383,
|
|
"margin_dpo/margin_std": 59.868492126464844,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.5026178010471204,
|
|
"grad_norm": 38.7380485534668,
|
|
"learning_rate": 2.9281093183781403e-07,
|
|
"logits/chosen": 1.1157575845718384,
|
|
"logits/rejected": 1.0506106615066528,
|
|
"logps/chosen": -290.3915710449219,
|
|
"logps/ref_chosen": -285.29144287109375,
|
|
"logps/ref_rejected": -212.11915588378906,
|
|
"logps/rejected": -265.2477111816406,
|
|
"loss": 4.3702,
|
|
"margin_dpo/margin_mean": 48.02838134765625,
|
|
"margin_dpo/margin_std": 42.991214752197266,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.5047120418848168,
|
|
"grad_norm": 41.1712646484375,
|
|
"learning_rate": 2.910060778827554e-07,
|
|
"logits/chosen": 1.3538631200790405,
|
|
"logits/rejected": 1.4909546375274658,
|
|
"logps/chosen": -260.87939453125,
|
|
"logps/ref_chosen": -254.9442901611328,
|
|
"logps/ref_rejected": -278.5121154785156,
|
|
"logps/rejected": -332.14483642578125,
|
|
"loss": 4.7755,
|
|
"margin_dpo/margin_mean": 47.697601318359375,
|
|
"margin_dpo/margin_std": 46.98115539550781,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.506806282722513,
|
|
"grad_norm": 33.75428771972656,
|
|
"learning_rate": 2.891990248961871e-07,
|
|
"logits/chosen": 2.038219690322876,
|
|
"logits/rejected": 1.9361552000045776,
|
|
"logps/chosen": -274.6824645996094,
|
|
"logps/ref_chosen": -264.16876220703125,
|
|
"logps/ref_rejected": -215.627197265625,
|
|
"logps/rejected": -269.078125,
|
|
"loss": 4.4651,
|
|
"margin_dpo/margin_mean": 42.937225341796875,
|
|
"margin_dpo/margin_std": 52.65818786621094,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.5089005235602094,
|
|
"grad_norm": 45.30524826049805,
|
|
"learning_rate": 2.873898697848762e-07,
|
|
"logits/chosen": 1.3483995199203491,
|
|
"logits/rejected": 1.3744844198226929,
|
|
"logps/chosen": -322.53204345703125,
|
|
"logps/ref_chosen": -313.7347106933594,
|
|
"logps/ref_rejected": -357.50054931640625,
|
|
"logps/rejected": -403.6512756347656,
|
|
"loss": 4.1712,
|
|
"margin_dpo/margin_mean": 37.353302001953125,
|
|
"margin_dpo/margin_std": 46.82036590576172,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.5109947643979058,
|
|
"grad_norm": 35.50245666503906,
|
|
"learning_rate": 2.8557870956832133e-07,
|
|
"logits/chosen": 1.1742347478866577,
|
|
"logits/rejected": 0.9833606481552124,
|
|
"logps/chosen": -291.64044189453125,
|
|
"logps/ref_chosen": -265.0720520019531,
|
|
"logps/ref_rejected": -235.29541015625,
|
|
"logps/rejected": -293.6773376464844,
|
|
"loss": 4.2395,
|
|
"margin_dpo/margin_mean": 31.813528060913086,
|
|
"margin_dpo/margin_std": 38.89430236816406,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.5130890052356021,
|
|
"grad_norm": 55.53532791137695,
|
|
"learning_rate": 2.837656413735479e-07,
|
|
"logits/chosen": 1.9007817506790161,
|
|
"logits/rejected": 1.6215357780456543,
|
|
"logps/chosen": -346.5862731933594,
|
|
"logps/ref_chosen": -338.6529235839844,
|
|
"logps/ref_rejected": -259.6473693847656,
|
|
"logps/rejected": -305.2576904296875,
|
|
"loss": 4.2236,
|
|
"margin_dpo/margin_mean": 37.676998138427734,
|
|
"margin_dpo/margin_std": 33.821868896484375,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.5151832460732985,
|
|
"grad_norm": 36.23422622680664,
|
|
"learning_rate": 2.8195076242990116e-07,
|
|
"logits/chosen": 1.154848337173462,
|
|
"logits/rejected": 1.1089671850204468,
|
|
"logps/chosen": -273.8831787109375,
|
|
"logps/ref_chosen": -254.98756408691406,
|
|
"logps/ref_rejected": -201.20782470703125,
|
|
"logps/rejected": -253.29745483398438,
|
|
"loss": 4.8429,
|
|
"margin_dpo/margin_mean": 33.19401550292969,
|
|
"margin_dpo/margin_std": 46.2290153503418,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.5172774869109947,
|
|
"grad_norm": 41.20656967163086,
|
|
"learning_rate": 2.801341700638307e-07,
|
|
"logits/chosen": 1.237385630607605,
|
|
"logits/rejected": 1.1006180047988892,
|
|
"logps/chosen": -284.4879455566406,
|
|
"logps/ref_chosen": -276.70361328125,
|
|
"logps/ref_rejected": -209.83523559570312,
|
|
"logps/rejected": -266.49542236328125,
|
|
"loss": 4.4147,
|
|
"margin_dpo/margin_mean": 48.87584686279297,
|
|
"margin_dpo/margin_std": 42.81962966918945,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.5193717277486911,
|
|
"grad_norm": 57.703697204589844,
|
|
"learning_rate": 2.7831596169367227e-07,
|
|
"logits/chosen": 1.0914226770401,
|
|
"logits/rejected": 1.1984620094299316,
|
|
"logps/chosen": -258.7278747558594,
|
|
"logps/ref_chosen": -249.7368621826172,
|
|
"logps/ref_rejected": -230.7808837890625,
|
|
"logps/rejected": -274.591552734375,
|
|
"loss": 4.7502,
|
|
"margin_dpo/margin_mean": 34.819644927978516,
|
|
"margin_dpo/margin_std": 40.67426300048828,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.5214659685863874,
|
|
"grad_norm": 47.22350311279297,
|
|
"learning_rate": 2.7649623482442274e-07,
|
|
"logits/chosen": 1.0606677532196045,
|
|
"logits/rejected": 1.1147487163543701,
|
|
"logps/chosen": -266.5928649902344,
|
|
"logps/ref_chosen": -229.43399047851562,
|
|
"logps/ref_rejected": -242.59182739257812,
|
|
"logps/rejected": -302.368896484375,
|
|
"loss": 4.5617,
|
|
"margin_dpo/margin_mean": 22.618181228637695,
|
|
"margin_dpo/margin_std": 44.011497497558594,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.5235602094240838,
|
|
"grad_norm": 34.5158576965332,
|
|
"learning_rate": 2.7467508704251135e-07,
|
|
"logits/chosen": 1.6137490272521973,
|
|
"logits/rejected": 1.7355223894119263,
|
|
"logps/chosen": -386.4211120605469,
|
|
"logps/ref_chosen": -374.47015380859375,
|
|
"logps/ref_rejected": -397.1805114746094,
|
|
"logps/rejected": -455.82952880859375,
|
|
"loss": 4.6106,
|
|
"margin_dpo/margin_mean": 46.698097229003906,
|
|
"margin_dpo/margin_std": 53.3001823425293,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.5256544502617801,
|
|
"grad_norm": 44.91852569580078,
|
|
"learning_rate": 2.7285261601056697e-07,
|
|
"logits/chosen": 1.0645577907562256,
|
|
"logits/rejected": 0.8425718545913696,
|
|
"logps/chosen": -355.9337463378906,
|
|
"logps/ref_chosen": -340.28240966796875,
|
|
"logps/ref_rejected": -255.56735229492188,
|
|
"logps/rejected": -305.7314147949219,
|
|
"loss": 4.5346,
|
|
"margin_dpo/margin_mean": 34.51277160644531,
|
|
"margin_dpo/margin_std": 47.6010627746582,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.5277486910994764,
|
|
"grad_norm": 30.532474517822266,
|
|
"learning_rate": 2.7102891946217994e-07,
|
|
"logits/chosen": 1.4391117095947266,
|
|
"logits/rejected": 1.4691420793533325,
|
|
"logps/chosen": -215.19662475585938,
|
|
"logps/ref_chosen": -198.7939453125,
|
|
"logps/ref_rejected": -212.86849975585938,
|
|
"logps/rejected": -271.3706359863281,
|
|
"loss": 4.5578,
|
|
"margin_dpo/margin_mean": 42.09947967529297,
|
|
"margin_dpo/margin_std": 46.31159973144531,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.5298429319371728,
|
|
"grad_norm": 45.59535598754883,
|
|
"learning_rate": 2.692040951966617e-07,
|
|
"logits/chosen": 1.448297142982483,
|
|
"logits/rejected": 1.3689329624176025,
|
|
"logps/chosen": -370.5470275878906,
|
|
"logps/ref_chosen": -343.3220520019531,
|
|
"logps/ref_rejected": -258.52044677734375,
|
|
"logps/rejected": -316.4342041015625,
|
|
"loss": 4.7989,
|
|
"margin_dpo/margin_mean": 30.688785552978516,
|
|
"margin_dpo/margin_std": 51.24434280395508,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.5319371727748691,
|
|
"grad_norm": 37.76780700683594,
|
|
"learning_rate": 2.6737824107379947e-07,
|
|
"logits/chosen": 1.4605791568756104,
|
|
"logits/rejected": 1.3956043720245361,
|
|
"logps/chosen": -326.6246337890625,
|
|
"logps/ref_chosen": -300.8880310058594,
|
|
"logps/ref_rejected": -288.5895690917969,
|
|
"logps/rejected": -342.62518310546875,
|
|
"loss": 4.4466,
|
|
"margin_dpo/margin_mean": 28.299026489257812,
|
|
"margin_dpo/margin_std": 43.362815856933594,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.5340314136125655,
|
|
"grad_norm": 38.82050323486328,
|
|
"learning_rate": 2.655514550086086e-07,
|
|
"logits/chosen": 1.3760805130004883,
|
|
"logits/rejected": 1.3785066604614258,
|
|
"logps/chosen": -309.2912902832031,
|
|
"logps/ref_chosen": -283.4182434082031,
|
|
"logps/ref_rejected": -317.677978515625,
|
|
"logps/rejected": -381.75701904296875,
|
|
"loss": 4.3792,
|
|
"margin_dpo/margin_mean": 38.20598220825195,
|
|
"margin_dpo/margin_std": 58.638553619384766,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.5361256544502618,
|
|
"grad_norm": 37.75017166137695,
|
|
"learning_rate": 2.6372383496608186e-07,
|
|
"logits/chosen": 1.3811287879943848,
|
|
"logits/rejected": 1.417975902557373,
|
|
"logps/chosen": -352.6160583496094,
|
|
"logps/ref_chosen": -333.6951599121094,
|
|
"logps/ref_rejected": -302.9135437011719,
|
|
"logps/rejected": -374.9257507324219,
|
|
"loss": 4.4641,
|
|
"margin_dpo/margin_mean": 53.091312408447266,
|
|
"margin_dpo/margin_std": 62.08375930786133,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.5382198952879581,
|
|
"grad_norm": 39.30986022949219,
|
|
"learning_rate": 2.618954789559356e-07,
|
|
"logits/chosen": 1.3786240816116333,
|
|
"logits/rejected": 1.5010215044021606,
|
|
"logps/chosen": -297.0694885253906,
|
|
"logps/ref_chosen": -269.2105712890625,
|
|
"logps/ref_rejected": -282.474365234375,
|
|
"logps/rejected": -354.8011474609375,
|
|
"loss": 4.3795,
|
|
"margin_dpo/margin_mean": 44.46790313720703,
|
|
"margin_dpo/margin_std": 52.83469772338867,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.5403141361256545,
|
|
"grad_norm": 64.80396270751953,
|
|
"learning_rate": 2.600664850273538e-07,
|
|
"logits/chosen": 1.1780776977539062,
|
|
"logits/rejected": 1.358864665031433,
|
|
"logps/chosen": -304.0466613769531,
|
|
"logps/ref_chosen": -274.53314208984375,
|
|
"logps/ref_rejected": -284.3149108886719,
|
|
"logps/rejected": -365.9967346191406,
|
|
"loss": 4.2167,
|
|
"margin_dpo/margin_mean": 52.16826248168945,
|
|
"margin_dpo/margin_std": 58.13621139526367,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.5424083769633508,
|
|
"grad_norm": 57.088375091552734,
|
|
"learning_rate": 2.582369512637302e-07,
|
|
"logits/chosen": 1.1932607889175415,
|
|
"logits/rejected": 1.141600489616394,
|
|
"logps/chosen": -255.16656494140625,
|
|
"logps/ref_chosen": -235.41139221191406,
|
|
"logps/ref_rejected": -217.746826171875,
|
|
"logps/rejected": -282.0968933105469,
|
|
"loss": 4.5131,
|
|
"margin_dpo/margin_mean": 44.59490966796875,
|
|
"margin_dpo/margin_std": 53.4549674987793,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.5445026178010471,
|
|
"grad_norm": 65.07582092285156,
|
|
"learning_rate": 2.5640697577740815e-07,
|
|
"logits/chosen": 0.8414401412010193,
|
|
"logits/rejected": 0.9391928911209106,
|
|
"logps/chosen": -242.9241943359375,
|
|
"logps/ref_chosen": -224.4993133544922,
|
|
"logps/ref_rejected": -215.19839477539062,
|
|
"logps/rejected": -268.3463439941406,
|
|
"loss": 5.2773,
|
|
"margin_dpo/margin_mean": 34.72306442260742,
|
|
"margin_dpo/margin_std": 62.74570083618164,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.5465968586387434,
|
|
"grad_norm": 62.26913070678711,
|
|
"learning_rate": 2.5457665670441937e-07,
|
|
"logits/chosen": 0.8039923906326294,
|
|
"logits/rejected": 0.6551789045333862,
|
|
"logps/chosen": -289.4314270019531,
|
|
"logps/ref_chosen": -251.2598114013672,
|
|
"logps/ref_rejected": -205.53323364257812,
|
|
"logps/rejected": -263.32464599609375,
|
|
"loss": 4.7681,
|
|
"margin_dpo/margin_mean": 19.61980438232422,
|
|
"margin_dpo/margin_std": 53.51505661010742,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.5486910994764398,
|
|
"grad_norm": 57.36088562011719,
|
|
"learning_rate": 2.527460921992209e-07,
|
|
"logits/chosen": 1.5565768480300903,
|
|
"logits/rejected": 1.5727018117904663,
|
|
"logps/chosen": -370.4512634277344,
|
|
"logps/ref_chosen": -347.8548889160156,
|
|
"logps/ref_rejected": -309.43011474609375,
|
|
"logps/rejected": -387.045166015625,
|
|
"loss": 4.4059,
|
|
"margin_dpo/margin_mean": 55.01873016357422,
|
|
"margin_dpo/margin_std": 52.59794235229492,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.5507853403141362,
|
|
"grad_norm": 80.06723022460938,
|
|
"learning_rate": 2.509153804294318e-07,
|
|
"logits/chosen": 1.2121027708053589,
|
|
"logits/rejected": 1.3596720695495605,
|
|
"logps/chosen": -301.9384765625,
|
|
"logps/ref_chosen": -261.0179443359375,
|
|
"logps/ref_rejected": -295.4287109375,
|
|
"logps/rejected": -357.9337463378906,
|
|
"loss": 4.7515,
|
|
"margin_dpo/margin_mean": 21.584484100341797,
|
|
"margin_dpo/margin_std": 49.02084732055664,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.5528795811518324,
|
|
"grad_norm": 65.33440399169922,
|
|
"learning_rate": 2.4908461957056825e-07,
|
|
"logits/chosen": 1.4055628776550293,
|
|
"logits/rejected": 1.2200889587402344,
|
|
"logps/chosen": -321.2078857421875,
|
|
"logps/ref_chosen": -297.6844482421875,
|
|
"logps/ref_rejected": -205.72137451171875,
|
|
"logps/rejected": -284.78070068359375,
|
|
"loss": 4.2315,
|
|
"margin_dpo/margin_mean": 55.53590393066406,
|
|
"margin_dpo/margin_std": 46.41938018798828,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.5549738219895288,
|
|
"grad_norm": 51.22663879394531,
|
|
"learning_rate": 2.4725390780077905e-07,
|
|
"logits/chosen": 1.3676010370254517,
|
|
"logits/rejected": 1.380630612373352,
|
|
"logps/chosen": -306.2537536621094,
|
|
"logps/ref_chosen": -285.8244323730469,
|
|
"logps/ref_rejected": -275.6885681152344,
|
|
"logps/rejected": -362.411865234375,
|
|
"loss": 4.4685,
|
|
"margin_dpo/margin_mean": 66.2939682006836,
|
|
"margin_dpo/margin_std": 51.13595199584961,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.5570680628272251,
|
|
"grad_norm": 56.91117477416992,
|
|
"learning_rate": 2.454233432955807e-07,
|
|
"logits/chosen": 1.253815770149231,
|
|
"logits/rejected": 1.3310532569885254,
|
|
"logps/chosen": -280.5023193359375,
|
|
"logps/ref_chosen": -273.0467834472656,
|
|
"logps/ref_rejected": -291.18133544921875,
|
|
"logps/rejected": -342.8387145996094,
|
|
"loss": 4.314,
|
|
"margin_dpo/margin_mean": 44.201820373535156,
|
|
"margin_dpo/margin_std": 46.703495025634766,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.5591623036649215,
|
|
"grad_norm": 44.81831741333008,
|
|
"learning_rate": 2.435930242225919e-07,
|
|
"logits/chosen": 1.1857373714447021,
|
|
"logits/rejected": 1.3162403106689453,
|
|
"logps/chosen": -294.0497131347656,
|
|
"logps/ref_chosen": -272.337890625,
|
|
"logps/ref_rejected": -279.97076416015625,
|
|
"logps/rejected": -351.3126525878906,
|
|
"loss": 4.6021,
|
|
"margin_dpo/margin_mean": 49.630027770996094,
|
|
"margin_dpo/margin_std": 56.707557678222656,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.5612565445026177,
|
|
"grad_norm": 65.94684600830078,
|
|
"learning_rate": 2.4176304873626984e-07,
|
|
"logits/chosen": 1.1858967542648315,
|
|
"logits/rejected": 1.2295567989349365,
|
|
"logps/chosen": -257.2110900878906,
|
|
"logps/ref_chosen": -235.03692626953125,
|
|
"logps/ref_rejected": -245.3459014892578,
|
|
"logps/rejected": -307.6155090332031,
|
|
"loss": 4.4782,
|
|
"margin_dpo/margin_mean": 40.09546661376953,
|
|
"margin_dpo/margin_std": 52.132469177246094,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.5633507853403141,
|
|
"grad_norm": 40.701568603515625,
|
|
"learning_rate": 2.399335149726463e-07,
|
|
"logits/chosen": 1.1352908611297607,
|
|
"logits/rejected": 1.3305590152740479,
|
|
"logps/chosen": -262.03607177734375,
|
|
"logps/ref_chosen": -240.3035430908203,
|
|
"logps/ref_rejected": -233.82675170898438,
|
|
"logps/rejected": -302.03057861328125,
|
|
"loss": 4.6077,
|
|
"margin_dpo/margin_mean": 46.471290588378906,
|
|
"margin_dpo/margin_std": 58.428775787353516,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.5654450261780105,
|
|
"grad_norm": 73.9042739868164,
|
|
"learning_rate": 2.381045210440644e-07,
|
|
"logits/chosen": 1.550492286682129,
|
|
"logits/rejected": 1.8568247556686401,
|
|
"logps/chosen": -273.93243408203125,
|
|
"logps/ref_chosen": -249.420166015625,
|
|
"logps/ref_rejected": -279.5133972167969,
|
|
"logps/rejected": -334.2721862792969,
|
|
"loss": 4.5035,
|
|
"margin_dpo/margin_mean": 30.24651336669922,
|
|
"margin_dpo/margin_std": 66.87718200683594,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.5675392670157068,
|
|
"grad_norm": 64.23003387451172,
|
|
"learning_rate": 2.3627616503391812e-07,
|
|
"logits/chosen": 1.003787636756897,
|
|
"logits/rejected": 1.0502243041992188,
|
|
"logps/chosen": -243.3548126220703,
|
|
"logps/ref_chosen": -227.45108032226562,
|
|
"logps/ref_rejected": -183.29275512695312,
|
|
"logps/rejected": -236.02288818359375,
|
|
"loss": 4.2933,
|
|
"margin_dpo/margin_mean": 36.826351165771484,
|
|
"margin_dpo/margin_std": 40.91915512084961,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.5696335078534032,
|
|
"grad_norm": 54.780860900878906,
|
|
"learning_rate": 2.344485449913914e-07,
|
|
"logits/chosen": 1.5306094884872437,
|
|
"logits/rejected": 1.417227029800415,
|
|
"logps/chosen": -370.7244873046875,
|
|
"logps/ref_chosen": -360.17462158203125,
|
|
"logps/ref_rejected": -241.59568786621094,
|
|
"logps/rejected": -302.3287658691406,
|
|
"loss": 4.4468,
|
|
"margin_dpo/margin_mean": 50.18324661254883,
|
|
"margin_dpo/margin_std": 52.38308334350586,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.5717277486910994,
|
|
"grad_norm": 76.68623352050781,
|
|
"learning_rate": 2.3262175892620062e-07,
|
|
"logits/chosen": 1.463561773300171,
|
|
"logits/rejected": 1.513543725013733,
|
|
"logps/chosen": -323.9863586425781,
|
|
"logps/ref_chosen": -309.366455078125,
|
|
"logps/ref_rejected": -271.2337951660156,
|
|
"logps/rejected": -335.74420166015625,
|
|
"loss": 4.3448,
|
|
"margin_dpo/margin_mean": 49.89052963256836,
|
|
"margin_dpo/margin_std": 69.81361389160156,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.5738219895287958,
|
|
"grad_norm": 40.13818359375,
|
|
"learning_rate": 2.3079590480333827e-07,
|
|
"logits/chosen": 1.596007227897644,
|
|
"logits/rejected": 1.7439165115356445,
|
|
"logps/chosen": -304.8982238769531,
|
|
"logps/ref_chosen": -295.56866455078125,
|
|
"logps/ref_rejected": -253.984130859375,
|
|
"logps/rejected": -311.5342102050781,
|
|
"loss": 4.2593,
|
|
"margin_dpo/margin_mean": 48.22050476074219,
|
|
"margin_dpo/margin_std": 47.02198791503906,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.5759162303664922,
|
|
"grad_norm": 51.986778259277344,
|
|
"learning_rate": 2.2897108053782e-07,
|
|
"logits/chosen": 0.9961601495742798,
|
|
"logits/rejected": 1.0950078964233398,
|
|
"logps/chosen": -251.74990844726562,
|
|
"logps/ref_chosen": -235.93154907226562,
|
|
"logps/ref_rejected": -230.19454956054688,
|
|
"logps/rejected": -288.5270080566406,
|
|
"loss": 4.061,
|
|
"margin_dpo/margin_mean": 42.51408386230469,
|
|
"margin_dpo/margin_std": 56.936180114746094,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.5780104712041885,
|
|
"grad_norm": 51.089576721191406,
|
|
"learning_rate": 2.2714738398943308e-07,
|
|
"logits/chosen": 1.7104884386062622,
|
|
"logits/rejected": 1.6105390787124634,
|
|
"logps/chosen": -365.57635498046875,
|
|
"logps/ref_chosen": -357.3829650878906,
|
|
"logps/ref_rejected": -273.025146484375,
|
|
"logps/rejected": -322.5356140136719,
|
|
"loss": 4.1898,
|
|
"margin_dpo/margin_mean": 41.317039489746094,
|
|
"margin_dpo/margin_std": 46.224205017089844,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.5801047120418849,
|
|
"grad_norm": 59.21977233886719,
|
|
"learning_rate": 2.2532491295748865e-07,
|
|
"logits/chosen": 1.0496208667755127,
|
|
"logits/rejected": 1.255394697189331,
|
|
"logps/chosen": -316.8267822265625,
|
|
"logps/ref_chosen": -289.98040771484375,
|
|
"logps/ref_rejected": -310.3972473144531,
|
|
"logps/rejected": -371.3232727050781,
|
|
"loss": 4.7638,
|
|
"margin_dpo/margin_mean": 34.079654693603516,
|
|
"margin_dpo/margin_std": 57.57683563232422,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.5821989528795811,
|
|
"grad_norm": 52.5463981628418,
|
|
"learning_rate": 2.2350376517557726e-07,
|
|
"logits/chosen": 0.8676056861877441,
|
|
"logits/rejected": 0.8314589262008667,
|
|
"logps/chosen": -256.13165283203125,
|
|
"logps/ref_chosen": -237.13531494140625,
|
|
"logps/ref_rejected": -232.33502197265625,
|
|
"logps/rejected": -290.1609802246094,
|
|
"loss": 4.9549,
|
|
"margin_dpo/margin_mean": 38.829627990722656,
|
|
"margin_dpo/margin_std": 55.37847900390625,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.5842931937172775,
|
|
"grad_norm": 45.49783706665039,
|
|
"learning_rate": 2.2168403830632769e-07,
|
|
"logits/chosen": 1.3541253805160522,
|
|
"logits/rejected": 1.4431573152542114,
|
|
"logps/chosen": -361.9897766113281,
|
|
"logps/ref_chosen": -354.13311767578125,
|
|
"logps/ref_rejected": -305.6336975097656,
|
|
"logps/rejected": -358.853271484375,
|
|
"loss": 4.1861,
|
|
"margin_dpo/margin_mean": 45.362972259521484,
|
|
"margin_dpo/margin_std": 40.74034118652344,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.5863874345549738,
|
|
"grad_norm": 45.03684997558594,
|
|
"learning_rate": 2.1986582993616925e-07,
|
|
"logits/chosen": 1.337092638015747,
|
|
"logits/rejected": 1.3559750318527222,
|
|
"logps/chosen": -274.98260498046875,
|
|
"logps/ref_chosen": -268.2659912109375,
|
|
"logps/ref_rejected": -232.44114685058594,
|
|
"logps/rejected": -289.997314453125,
|
|
"loss": 4.4116,
|
|
"margin_dpo/margin_mean": 50.83951950073242,
|
|
"margin_dpo/margin_std": 63.36719512939453,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.5884816753926702,
|
|
"grad_norm": 40.450538635253906,
|
|
"learning_rate": 2.1804923757009882e-07,
|
|
"logits/chosen": 1.3821660280227661,
|
|
"logits/rejected": 1.3343546390533447,
|
|
"logps/chosen": -287.6787414550781,
|
|
"logps/ref_chosen": -257.0721740722656,
|
|
"logps/ref_rejected": -248.18264770507812,
|
|
"logps/rejected": -319.9836730957031,
|
|
"loss": 4.3178,
|
|
"margin_dpo/margin_mean": 41.19449234008789,
|
|
"margin_dpo/margin_std": 56.606285095214844,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.5905759162303665,
|
|
"grad_norm": 57.53801345825195,
|
|
"learning_rate": 2.1623435862645205e-07,
|
|
"logits/chosen": 1.5275428295135498,
|
|
"logits/rejected": 1.6021305322647095,
|
|
"logps/chosen": -293.01007080078125,
|
|
"logps/ref_chosen": -269.2411804199219,
|
|
"logps/ref_rejected": -323.8949279785156,
|
|
"logps/rejected": -384.69775390625,
|
|
"loss": 4.58,
|
|
"margin_dpo/margin_mean": 37.03391647338867,
|
|
"margin_dpo/margin_std": 60.21036148071289,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.5926701570680628,
|
|
"grad_norm": 48.43301773071289,
|
|
"learning_rate": 2.1442129043167873e-07,
|
|
"logits/chosen": 1.0364420413970947,
|
|
"logits/rejected": 1.2966769933700562,
|
|
"logps/chosen": -279.5700378417969,
|
|
"logps/ref_chosen": -257.61688232421875,
|
|
"logps/ref_rejected": -234.8463134765625,
|
|
"logps/rejected": -297.4212646484375,
|
|
"loss": 4.4572,
|
|
"margin_dpo/margin_mean": 40.621826171875,
|
|
"margin_dpo/margin_std": 61.95793533325195,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.5947643979057592,
|
|
"grad_norm": 77.20549011230469,
|
|
"learning_rate": 2.1261013021512378e-07,
|
|
"logits/chosen": 1.3976938724517822,
|
|
"logits/rejected": 1.3549877405166626,
|
|
"logps/chosen": -252.79287719726562,
|
|
"logps/ref_chosen": -228.94891357421875,
|
|
"logps/ref_rejected": -288.43804931640625,
|
|
"logps/rejected": -346.0372009277344,
|
|
"loss": 4.6112,
|
|
"margin_dpo/margin_mean": 33.755226135253906,
|
|
"margin_dpo/margin_std": 55.99586486816406,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.5968586387434555,
|
|
"grad_norm": 57.3791389465332,
|
|
"learning_rate": 2.1080097510381294e-07,
|
|
"logits/chosen": 1.5715055465698242,
|
|
"logits/rejected": 1.4658746719360352,
|
|
"logps/chosen": -386.9960632324219,
|
|
"logps/ref_chosen": -364.84332275390625,
|
|
"logps/ref_rejected": -306.4946594238281,
|
|
"logps/rejected": -359.02520751953125,
|
|
"loss": 4.7905,
|
|
"margin_dpo/margin_mean": 30.377866744995117,
|
|
"margin_dpo/margin_std": 52.86358642578125,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.5989528795811518,
|
|
"grad_norm": 37.026641845703125,
|
|
"learning_rate": 2.089939221172446e-07,
|
|
"logits/chosen": 1.36098051071167,
|
|
"logits/rejected": 1.4245069026947021,
|
|
"logps/chosen": -299.2890625,
|
|
"logps/ref_chosen": -269.2027893066406,
|
|
"logps/ref_rejected": -286.9102478027344,
|
|
"logps/rejected": -346.26043701171875,
|
|
"loss": 4.6962,
|
|
"margin_dpo/margin_mean": 29.26395034790039,
|
|
"margin_dpo/margin_std": 46.75328063964844,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.6010471204188481,
|
|
"grad_norm": 58.61341094970703,
|
|
"learning_rate": 2.0718906816218595e-07,
|
|
"logits/chosen": 1.219170093536377,
|
|
"logits/rejected": 1.3217523097991943,
|
|
"logps/chosen": -259.4914855957031,
|
|
"logps/ref_chosen": -233.5873565673828,
|
|
"logps/ref_rejected": -230.03646850585938,
|
|
"logps/rejected": -291.0602722167969,
|
|
"loss": 4.594,
|
|
"margin_dpo/margin_mean": 35.11963653564453,
|
|
"margin_dpo/margin_std": 54.2941780090332,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.6031413612565445,
|
|
"grad_norm": 46.509056091308594,
|
|
"learning_rate": 2.053865100274774e-07,
|
|
"logits/chosen": 1.5584362745285034,
|
|
"logits/rejected": 1.3865753412246704,
|
|
"logps/chosen": -412.9979553222656,
|
|
"logps/ref_chosen": -378.4530029296875,
|
|
"logps/ref_rejected": -302.7226257324219,
|
|
"logps/rejected": -366.6625061035156,
|
|
"loss": 4.5476,
|
|
"margin_dpo/margin_mean": 29.394969940185547,
|
|
"margin_dpo/margin_std": 42.88922119140625,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.6052356020942409,
|
|
"grad_norm": 42.84352493286133,
|
|
"learning_rate": 2.035863443788411e-07,
|
|
"logits/chosen": 1.5689976215362549,
|
|
"logits/rejected": 1.5198795795440674,
|
|
"logps/chosen": -373.4714660644531,
|
|
"logps/ref_chosen": -342.27532958984375,
|
|
"logps/ref_rejected": -317.79638671875,
|
|
"logps/rejected": -370.5872802734375,
|
|
"loss": 4.944,
|
|
"margin_dpo/margin_mean": 21.594791412353516,
|
|
"margin_dpo/margin_std": 44.652915954589844,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.6073298429319371,
|
|
"grad_norm": 58.85520935058594,
|
|
"learning_rate": 2.0178866775369774e-07,
|
|
"logits/chosen": 1.3218892812728882,
|
|
"logits/rejected": 1.2929930686950684,
|
|
"logps/chosen": -374.0101623535156,
|
|
"logps/ref_chosen": -348.39788818359375,
|
|
"logps/ref_rejected": -349.3028564453125,
|
|
"logps/rejected": -415.2039489746094,
|
|
"loss": 4.6543,
|
|
"margin_dpo/margin_mean": 40.28877258300781,
|
|
"margin_dpo/margin_std": 68.06195831298828,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.6094240837696335,
|
|
"grad_norm": 39.78348922729492,
|
|
"learning_rate": 1.9999357655598891e-07,
|
|
"logits/chosen": 1.0092355012893677,
|
|
"logits/rejected": 1.146081805229187,
|
|
"logps/chosen": -268.5143737792969,
|
|
"logps/ref_chosen": -250.70835876464844,
|
|
"logps/ref_rejected": -240.2347869873047,
|
|
"logps/rejected": -312.80255126953125,
|
|
"loss": 4.2499,
|
|
"margin_dpo/margin_mean": 54.76176834106445,
|
|
"margin_dpo/margin_std": 55.031982421875,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.6115183246073298,
|
|
"grad_norm": 56.82017135620117,
|
|
"learning_rate": 1.9820116705100775e-07,
|
|
"logits/chosen": 1.0279195308685303,
|
|
"logits/rejected": 1.0469276905059814,
|
|
"logps/chosen": -285.55364990234375,
|
|
"logps/ref_chosen": -277.9742431640625,
|
|
"logps/ref_rejected": -260.510986328125,
|
|
"logps/rejected": -321.3108825683594,
|
|
"loss": 4.4704,
|
|
"margin_dpo/margin_mean": 53.22050094604492,
|
|
"margin_dpo/margin_std": 38.52600860595703,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.6136125654450262,
|
|
"grad_norm": 82.68972778320312,
|
|
"learning_rate": 1.9641153536023642e-07,
|
|
"logits/chosen": 1.8253206014633179,
|
|
"logits/rejected": 1.6558302640914917,
|
|
"logps/chosen": -322.6553039550781,
|
|
"logps/ref_chosen": -300.9186096191406,
|
|
"logps/ref_rejected": -257.6700439453125,
|
|
"logps/rejected": -320.5356750488281,
|
|
"loss": 4.4861,
|
|
"margin_dpo/margin_mean": 41.12899398803711,
|
|
"margin_dpo/margin_std": 47.92676544189453,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.6157068062827226,
|
|
"grad_norm": 59.99162673950195,
|
|
"learning_rate": 1.9462477745619106e-07,
|
|
"logits/chosen": 1.151703953742981,
|
|
"logits/rejected": 1.2953401803970337,
|
|
"logps/chosen": -282.59271240234375,
|
|
"logps/ref_chosen": -266.8080139160156,
|
|
"logps/ref_rejected": -283.26959228515625,
|
|
"logps/rejected": -355.72576904296875,
|
|
"loss": 4.5619,
|
|
"margin_dpo/margin_mean": 56.671478271484375,
|
|
"margin_dpo/margin_std": 48.01198959350586,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.6178010471204188,
|
|
"grad_norm": 43.26309585571289,
|
|
"learning_rate": 1.928409891572757e-07,
|
|
"logits/chosen": 1.2075080871582031,
|
|
"logits/rejected": 1.226868748664856,
|
|
"logps/chosen": -282.1271667480469,
|
|
"logps/ref_chosen": -240.19598388671875,
|
|
"logps/ref_rejected": -214.87818908691406,
|
|
"logps/rejected": -267.48272705078125,
|
|
"loss": 4.5008,
|
|
"margin_dpo/margin_mean": 10.673352241516113,
|
|
"margin_dpo/margin_std": 68.71654510498047,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.6198952879581152,
|
|
"grad_norm": 41.60967254638672,
|
|
"learning_rate": 1.9106026612264315e-07,
|
|
"logits/chosen": 1.4175227880477905,
|
|
"logits/rejected": 1.595857858657837,
|
|
"logps/chosen": -236.48651123046875,
|
|
"logps/ref_chosen": -227.85513305664062,
|
|
"logps/ref_rejected": -256.476318359375,
|
|
"logps/rejected": -316.8334655761719,
|
|
"loss": 4.0503,
|
|
"margin_dpo/margin_mean": 51.72578430175781,
|
|
"margin_dpo/margin_std": 55.78533935546875,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.6219895287958115,
|
|
"grad_norm": 49.98090744018555,
|
|
"learning_rate": 1.8928270384706582e-07,
|
|
"logits/chosen": 1.33966863155365,
|
|
"logits/rejected": 1.5027199983596802,
|
|
"logps/chosen": -248.77769470214844,
|
|
"logps/ref_chosen": -220.73609924316406,
|
|
"logps/ref_rejected": -272.24017333984375,
|
|
"logps/rejected": -329.5039367675781,
|
|
"loss": 4.359,
|
|
"margin_dpo/margin_mean": 29.222198486328125,
|
|
"margin_dpo/margin_std": 50.361183166503906,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.6240837696335079,
|
|
"grad_norm": 70.29344940185547,
|
|
"learning_rate": 1.875083976558136e-07,
|
|
"logits/chosen": 1.4000345468521118,
|
|
"logits/rejected": 1.3036651611328125,
|
|
"logps/chosen": -363.42401123046875,
|
|
"logps/ref_chosen": -346.2327880859375,
|
|
"logps/ref_rejected": -285.7917785644531,
|
|
"logps/rejected": -350.0803527832031,
|
|
"loss": 4.2786,
|
|
"margin_dpo/margin_mean": 47.0973014831543,
|
|
"margin_dpo/margin_std": 58.554161071777344,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.6261780104712041,
|
|
"grad_norm": 68.31159973144531,
|
|
"learning_rate": 1.8573744269954297e-07,
|
|
"logits/chosen": 1.3761045932769775,
|
|
"logits/rejected": 1.3730204105377197,
|
|
"logps/chosen": -297.95330810546875,
|
|
"logps/ref_chosen": -266.99658203125,
|
|
"logps/ref_rejected": -262.5125427246094,
|
|
"logps/rejected": -327.41265869140625,
|
|
"loss": 4.5174,
|
|
"margin_dpo/margin_mean": 33.94337463378906,
|
|
"margin_dpo/margin_std": 67.46819305419922,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.6282722513089005,
|
|
"grad_norm": 41.25592803955078,
|
|
"learning_rate": 1.839699339491937e-07,
|
|
"logits/chosen": 1.0598005056381226,
|
|
"logits/rejected": 1.1422343254089355,
|
|
"logps/chosen": -306.2508239746094,
|
|
"logps/ref_chosen": -281.19525146484375,
|
|
"logps/ref_rejected": -288.6803894042969,
|
|
"logps/rejected": -364.2799377441406,
|
|
"loss": 4.5063,
|
|
"margin_dpo/margin_mean": 50.54399490356445,
|
|
"margin_dpo/margin_std": 57.26484298706055,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.6303664921465969,
|
|
"grad_norm": 80.2366943359375,
|
|
"learning_rate": 1.8220596619089573e-07,
|
|
"logits/chosen": 1.5710808038711548,
|
|
"logits/rejected": 1.6003844738006592,
|
|
"logps/chosen": -304.22662353515625,
|
|
"logps/ref_chosen": -289.8253173828125,
|
|
"logps/ref_rejected": -327.8699645996094,
|
|
"logps/rejected": -389.2873840332031,
|
|
"loss": 4.5827,
|
|
"margin_dpo/margin_mean": 47.01613235473633,
|
|
"margin_dpo/margin_std": 53.770626068115234,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.6324607329842932,
|
|
"grad_norm": 45.32533645629883,
|
|
"learning_rate": 1.8044563402088682e-07,
|
|
"logits/chosen": 1.390702486038208,
|
|
"logits/rejected": 1.5861480236053467,
|
|
"logps/chosen": -341.7214660644531,
|
|
"logps/ref_chosen": -307.1119079589844,
|
|
"logps/ref_rejected": -296.61785888671875,
|
|
"logps/rejected": -385.1257019042969,
|
|
"loss": 4.3462,
|
|
"margin_dpo/margin_mean": 53.89823532104492,
|
|
"margin_dpo/margin_std": 60.43596267700195,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.6345549738219896,
|
|
"grad_norm": 35.210235595703125,
|
|
"learning_rate": 1.7868903184043885e-07,
|
|
"logits/chosen": 1.0414403676986694,
|
|
"logits/rejected": 1.2032232284545898,
|
|
"logps/chosen": -287.9827575683594,
|
|
"logps/ref_chosen": -261.281982421875,
|
|
"logps/ref_rejected": -287.9131164550781,
|
|
"logps/rejected": -370.0382385253906,
|
|
"loss": 4.4312,
|
|
"margin_dpo/margin_mean": 55.42430114746094,
|
|
"margin_dpo/margin_std": 58.83437728881836,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.6366492146596858,
|
|
"grad_norm": 53.23714065551758,
|
|
"learning_rate": 1.7693625385079574e-07,
|
|
"logits/chosen": 1.1299974918365479,
|
|
"logits/rejected": 1.1754674911499023,
|
|
"logps/chosen": -317.24932861328125,
|
|
"logps/ref_chosen": -276.4831848144531,
|
|
"logps/ref_rejected": -257.2686462402344,
|
|
"logps/rejected": -332.5347595214844,
|
|
"loss": 4.6016,
|
|
"margin_dpo/margin_mean": 34.49999237060547,
|
|
"margin_dpo/margin_std": 42.753028869628906,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.6387434554973822,
|
|
"grad_norm": 50.1977653503418,
|
|
"learning_rate": 1.7518739404812155e-07,
|
|
"logits/chosen": 1.1471208333969116,
|
|
"logits/rejected": 1.1764111518859863,
|
|
"logps/chosen": -272.23565673828125,
|
|
"logps/ref_chosen": -253.3165283203125,
|
|
"logps/ref_rejected": -225.20468139648438,
|
|
"logps/rejected": -278.5679931640625,
|
|
"loss": 4.0448,
|
|
"margin_dpo/margin_mean": 34.444190979003906,
|
|
"margin_dpo/margin_std": 46.56166076660156,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.6408376963350786,
|
|
"grad_norm": 51.97341537475586,
|
|
"learning_rate": 1.7344254621846017e-07,
|
|
"logits/chosen": 1.2266101837158203,
|
|
"logits/rejected": 1.1241114139556885,
|
|
"logps/chosen": -338.9609069824219,
|
|
"logps/ref_chosen": -324.57122802734375,
|
|
"logps/ref_rejected": -299.1585693359375,
|
|
"logps/rejected": -352.9222412109375,
|
|
"loss": 4.306,
|
|
"margin_dpo/margin_mean": 39.374000549316406,
|
|
"margin_dpo/margin_std": 74.34258270263672,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.6429319371727749,
|
|
"grad_norm": 45.46051025390625,
|
|
"learning_rate": 1.717018039327053e-07,
|
|
"logits/chosen": 1.1175193786621094,
|
|
"logits/rejected": 1.2578641176223755,
|
|
"logps/chosen": -320.50177001953125,
|
|
"logps/ref_chosen": -289.5794372558594,
|
|
"logps/ref_rejected": -262.92510986328125,
|
|
"logps/rejected": -347.1852722167969,
|
|
"loss": 4.1751,
|
|
"margin_dpo/margin_mean": 53.3377685546875,
|
|
"margin_dpo/margin_std": 65.59990692138672,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.6450261780104712,
|
|
"grad_norm": 34.76543045043945,
|
|
"learning_rate": 1.699652605415828e-07,
|
|
"logits/chosen": 1.338348388671875,
|
|
"logits/rejected": 1.3202842473983765,
|
|
"logps/chosen": -348.8934020996094,
|
|
"logps/ref_chosen": -305.04351806640625,
|
|
"logps/ref_rejected": -305.0120849609375,
|
|
"logps/rejected": -384.90301513671875,
|
|
"loss": 4.6399,
|
|
"margin_dpo/margin_mean": 36.04100036621094,
|
|
"margin_dpo/margin_std": 60.608455657958984,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.6471204188481675,
|
|
"grad_norm": 66.8311996459961,
|
|
"learning_rate": 1.6823300917064458e-07,
|
|
"logits/chosen": 1.619686484336853,
|
|
"logits/rejected": 1.3856267929077148,
|
|
"logps/chosen": -354.4423522949219,
|
|
"logps/ref_chosen": -316.80303955078125,
|
|
"logps/ref_rejected": -240.09307861328125,
|
|
"logps/rejected": -317.1116027832031,
|
|
"loss": 4.284,
|
|
"margin_dpo/margin_mean": 39.3791389465332,
|
|
"margin_dpo/margin_std": 59.34083938598633,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.6492146596858639,
|
|
"grad_norm": 61.75398254394531,
|
|
"learning_rate": 1.6650514271527465e-07,
|
|
"logits/chosen": 1.2451605796813965,
|
|
"logits/rejected": 1.5067654848098755,
|
|
"logps/chosen": -289.0413818359375,
|
|
"logps/ref_chosen": -240.17652893066406,
|
|
"logps/ref_rejected": -242.7730712890625,
|
|
"logps/rejected": -332.27880859375,
|
|
"loss": 4.4389,
|
|
"margin_dpo/margin_mean": 40.6408576965332,
|
|
"margin_dpo/margin_std": 47.202674865722656,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.6513089005235602,
|
|
"grad_norm": 44.972686767578125,
|
|
"learning_rate": 1.647817538357072e-07,
|
|
"logits/chosen": 1.1516824960708618,
|
|
"logits/rejected": 1.3081568479537964,
|
|
"logps/chosen": -300.0003662109375,
|
|
"logps/ref_chosen": -257.53515625,
|
|
"logps/ref_rejected": -249.1999053955078,
|
|
"logps/rejected": -334.6231689453125,
|
|
"loss": 4.4418,
|
|
"margin_dpo/margin_mean": 42.95802307128906,
|
|
"margin_dpo/margin_std": 56.176063537597656,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.6534031413612565,
|
|
"grad_norm": 70.0779037475586,
|
|
"learning_rate": 1.6306293495205755e-07,
|
|
"logits/chosen": 1.374745488166809,
|
|
"logits/rejected": 1.4253482818603516,
|
|
"logps/chosen": -301.5932922363281,
|
|
"logps/ref_chosen": -261.98828125,
|
|
"logps/ref_rejected": -238.6123504638672,
|
|
"logps/rejected": -317.7529296875,
|
|
"loss": 4.3747,
|
|
"margin_dpo/margin_mean": 39.53556442260742,
|
|
"margin_dpo/margin_std": 68.29449462890625,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.6554973821989529,
|
|
"grad_norm": 56.74006271362305,
|
|
"learning_rate": 1.6134877823936607e-07,
|
|
"logits/chosen": 1.480233073234558,
|
|
"logits/rejected": 1.6010148525238037,
|
|
"logps/chosen": -417.88751220703125,
|
|
"logps/ref_chosen": -380.5164794921875,
|
|
"logps/ref_rejected": -340.59722900390625,
|
|
"logps/rejected": -436.11846923828125,
|
|
"loss": 4.5172,
|
|
"margin_dpo/margin_mean": 58.15019607543945,
|
|
"margin_dpo/margin_std": 64.69535064697266,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.6575916230366492,
|
|
"grad_norm": 52.93495559692383,
|
|
"learning_rate": 1.5963937562265522e-07,
|
|
"logits/chosen": 1.3546419143676758,
|
|
"logits/rejected": 1.3760360479354858,
|
|
"logps/chosen": -288.9587707519531,
|
|
"logps/ref_chosen": -254.8392791748047,
|
|
"logps/ref_rejected": -233.38494873046875,
|
|
"logps/rejected": -312.0257263183594,
|
|
"loss": 4.4718,
|
|
"margin_dpo/margin_mean": 44.52123260498047,
|
|
"margin_dpo/margin_std": 63.62104415893555,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.6596858638743456,
|
|
"grad_norm": 41.35818862915039,
|
|
"learning_rate": 1.5793481877199943e-07,
|
|
"logits/chosen": 1.7810715436935425,
|
|
"logits/rejected": 1.7476561069488525,
|
|
"logps/chosen": -315.27471923828125,
|
|
"logps/ref_chosen": -287.1436767578125,
|
|
"logps/ref_rejected": -245.744873046875,
|
|
"logps/rejected": -311.196044921875,
|
|
"loss": 4.1818,
|
|
"margin_dpo/margin_mean": 37.32012939453125,
|
|
"margin_dpo/margin_std": 46.346778869628906,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.6617801047120419,
|
|
"grad_norm": 60.255733489990234,
|
|
"learning_rate": 1.562351990976095e-07,
|
|
"logits/chosen": 0.9633012413978577,
|
|
"logits/rejected": 1.0967074632644653,
|
|
"logps/chosen": -310.6409912109375,
|
|
"logps/ref_chosen": -278.97003173828125,
|
|
"logps/ref_rejected": -268.5596618652344,
|
|
"logps/rejected": -364.9547119140625,
|
|
"loss": 4.1869,
|
|
"margin_dpo/margin_mean": 64.72406005859375,
|
|
"margin_dpo/margin_std": 64.02919006347656,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.6638743455497382,
|
|
"grad_norm": 63.166786193847656,
|
|
"learning_rate": 1.5454060774493065e-07,
|
|
"logits/chosen": 1.2942625284194946,
|
|
"logits/rejected": 1.294532060623169,
|
|
"logps/chosen": -277.548095703125,
|
|
"logps/ref_chosen": -252.86656188964844,
|
|
"logps/ref_rejected": -236.70155334472656,
|
|
"logps/rejected": -304.314208984375,
|
|
"loss": 4.3267,
|
|
"margin_dpo/margin_mean": 42.931148529052734,
|
|
"margin_dpo/margin_std": 60.30817413330078,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 0.6659685863874345,
|
|
"grad_norm": 59.5412712097168,
|
|
"learning_rate": 1.5285113558975427e-07,
|
|
"logits/chosen": 1.2442307472229004,
|
|
"logits/rejected": 1.4497402906417847,
|
|
"logps/chosen": -252.0952606201172,
|
|
"logps/ref_chosen": -217.34515380859375,
|
|
"logps/ref_rejected": -243.4803009033203,
|
|
"logps/rejected": -328.711669921875,
|
|
"loss": 4.2442,
|
|
"margin_dpo/margin_mean": 50.481266021728516,
|
|
"margin_dpo/margin_std": 49.45831298828125,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.6680628272251309,
|
|
"grad_norm": 34.23768615722656,
|
|
"learning_rate": 1.5116687323334464e-07,
|
|
"logits/chosen": 1.0364283323287964,
|
|
"logits/rejected": 1.2878518104553223,
|
|
"logps/chosen": -290.0143737792969,
|
|
"logps/ref_chosen": -268.8816833496094,
|
|
"logps/ref_rejected": -275.4843444824219,
|
|
"logps/rejected": -347.4952697753906,
|
|
"loss": 4.0379,
|
|
"margin_dpo/margin_mean": 50.878273010253906,
|
|
"margin_dpo/margin_std": 42.63667678833008,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 0.6701570680628273,
|
|
"grad_norm": 55.01826477050781,
|
|
"learning_rate": 1.4948791099758052e-07,
|
|
"logits/chosen": 1.7030011415481567,
|
|
"logits/rejected": 1.6658614873886108,
|
|
"logps/chosen": -328.2850341796875,
|
|
"logps/ref_chosen": -307.4996337890625,
|
|
"logps/ref_rejected": -251.08456420898438,
|
|
"logps/rejected": -320.5891418457031,
|
|
"loss": 4.3706,
|
|
"margin_dpo/margin_mean": 48.71923065185547,
|
|
"margin_dpo/margin_std": 54.48359680175781,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.6722513089005235,
|
|
"grad_norm": 36.16436004638672,
|
|
"learning_rate": 1.478143389201113e-07,
|
|
"logits/chosen": 1.5252739191055298,
|
|
"logits/rejected": 1.3168452978134155,
|
|
"logps/chosen": -343.71514892578125,
|
|
"logps/ref_chosen": -309.8309631347656,
|
|
"logps/ref_rejected": -248.75213623046875,
|
|
"logps/rejected": -326.5061340332031,
|
|
"loss": 4.4452,
|
|
"margin_dpo/margin_mean": 43.869834899902344,
|
|
"margin_dpo/margin_std": 63.80504608154297,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 0.6743455497382199,
|
|
"grad_norm": 43.79127502441406,
|
|
"learning_rate": 1.461462467495284e-07,
|
|
"logits/chosen": 0.9715927243232727,
|
|
"logits/rejected": 1.0002247095108032,
|
|
"logps/chosen": -323.4966735839844,
|
|
"logps/ref_chosen": -291.58843994140625,
|
|
"logps/ref_rejected": -265.43023681640625,
|
|
"logps/rejected": -339.85174560546875,
|
|
"loss": 4.1918,
|
|
"margin_dpo/margin_mean": 42.5133056640625,
|
|
"margin_dpo/margin_std": 48.27851486206055,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.6764397905759162,
|
|
"grad_norm": 47.61728286743164,
|
|
"learning_rate": 1.4448372394055246e-07,
|
|
"logits/chosen": 1.0764468908309937,
|
|
"logits/rejected": 0.8316705822944641,
|
|
"logps/chosen": -385.0590515136719,
|
|
"logps/ref_chosen": -343.968017578125,
|
|
"logps/ref_rejected": -254.12161254882812,
|
|
"logps/rejected": -329.58868408203125,
|
|
"loss": 4.879,
|
|
"margin_dpo/margin_mean": 34.37602996826172,
|
|
"margin_dpo/margin_std": 67.8411636352539,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 0.6785340314136126,
|
|
"grad_norm": 35.46821975708008,
|
|
"learning_rate": 1.428268596492364e-07,
|
|
"logits/chosen": 1.5448215007781982,
|
|
"logits/rejected": 1.5194947719573975,
|
|
"logps/chosen": -213.710693359375,
|
|
"logps/ref_chosen": -206.94500732421875,
|
|
"logps/ref_rejected": -262.6962890625,
|
|
"logps/rejected": -316.6614990234375,
|
|
"loss": 3.9753,
|
|
"margin_dpo/margin_mean": 47.19957733154297,
|
|
"margin_dpo/margin_std": 56.4971923828125,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.680628272251309,
|
|
"grad_norm": 40.15785598754883,
|
|
"learning_rate": 1.4117574272818386e-07,
|
|
"logits/chosen": 1.4091088771820068,
|
|
"logits/rejected": 1.5492061376571655,
|
|
"logps/chosen": -311.66400146484375,
|
|
"logps/ref_chosen": -301.9862060546875,
|
|
"logps/ref_rejected": -333.42236328125,
|
|
"logps/rejected": -399.2699890136719,
|
|
"loss": 4.5302,
|
|
"margin_dpo/margin_mean": 56.1698112487793,
|
|
"margin_dpo/margin_std": 54.43414306640625,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.6827225130890052,
|
|
"grad_norm": 52.91168975830078,
|
|
"learning_rate": 1.3953046172178413e-07,
|
|
"logits/chosen": 0.951869785785675,
|
|
"logits/rejected": 1.228202223777771,
|
|
"logps/chosen": -177.7034912109375,
|
|
"logps/ref_chosen": -164.46109008789062,
|
|
"logps/ref_rejected": -249.89413452148438,
|
|
"logps/rejected": -324.3243713378906,
|
|
"loss": 4.3905,
|
|
"margin_dpo/margin_mean": 61.18782043457031,
|
|
"margin_dpo/margin_std": 53.454124450683594,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.6848167539267016,
|
|
"grad_norm": 42.79719543457031,
|
|
"learning_rate": 1.3789110486146468e-07,
|
|
"logits/chosen": 1.5188686847686768,
|
|
"logits/rejected": 1.4478236436843872,
|
|
"logps/chosen": -259.4933166503906,
|
|
"logps/ref_chosen": -246.3433837890625,
|
|
"logps/ref_rejected": -229.85508728027344,
|
|
"logps/rejected": -297.393798828125,
|
|
"loss": 4.1578,
|
|
"margin_dpo/margin_mean": 54.388763427734375,
|
|
"margin_dpo/margin_std": 67.07605743408203,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 0.6869109947643979,
|
|
"grad_norm": 62.9756965637207,
|
|
"learning_rate": 1.362577600609588e-07,
|
|
"logits/chosen": 0.8666256666183472,
|
|
"logits/rejected": 0.9310898780822754,
|
|
"logps/chosen": -325.38824462890625,
|
|
"logps/ref_chosen": -305.82012939453125,
|
|
"logps/ref_rejected": -273.3159484863281,
|
|
"logps/rejected": -348.3529357910156,
|
|
"loss": 4.3427,
|
|
"margin_dpo/margin_mean": 55.4688720703125,
|
|
"margin_dpo/margin_std": 43.20487594604492,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.6890052356020943,
|
|
"grad_norm": 51.00785827636719,
|
|
"learning_rate": 1.3463051491159093e-07,
|
|
"logits/chosen": 1.4473413228988647,
|
|
"logits/rejected": 1.7869523763656616,
|
|
"logps/chosen": -283.1918029785156,
|
|
"logps/ref_chosen": -258.7630615234375,
|
|
"logps/ref_rejected": -284.41131591796875,
|
|
"logps/rejected": -350.9872131347656,
|
|
"loss": 4.7045,
|
|
"margin_dpo/margin_mean": 42.1472053527832,
|
|
"margin_dpo/margin_std": 59.915836334228516,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 0.6910994764397905,
|
|
"grad_norm": 47.54462432861328,
|
|
"learning_rate": 1.3300945667758012e-07,
|
|
"logits/chosen": 1.554024338722229,
|
|
"logits/rejected": 1.5038138628005981,
|
|
"logps/chosen": -360.6363830566406,
|
|
"logps/ref_chosen": -330.3982238769531,
|
|
"logps/ref_rejected": -274.9824523925781,
|
|
"logps/rejected": -335.7353515625,
|
|
"loss": 4.7699,
|
|
"margin_dpo/margin_mean": 30.514692306518555,
|
|
"margin_dpo/margin_std": 57.54186248779297,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.6931937172774869,
|
|
"grad_norm": 43.25348663330078,
|
|
"learning_rate": 1.3139467229135998e-07,
|
|
"logits/chosen": 1.2080715894699097,
|
|
"logits/rejected": 1.1073570251464844,
|
|
"logps/chosen": -306.7835693359375,
|
|
"logps/ref_chosen": -279.2760009765625,
|
|
"logps/ref_rejected": -220.27761840820312,
|
|
"logps/rejected": -285.9614562988281,
|
|
"loss": 4.4308,
|
|
"margin_dpo/margin_mean": 38.1762580871582,
|
|
"margin_dpo/margin_std": 54.596893310546875,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 0.6952879581151833,
|
|
"grad_norm": 40.57438278198242,
|
|
"learning_rate": 1.2978624834891626e-07,
|
|
"logits/chosen": 1.1713310480117798,
|
|
"logits/rejected": 1.2179394960403442,
|
|
"logps/chosen": -245.34036254882812,
|
|
"logps/ref_chosen": -226.70223999023438,
|
|
"logps/ref_rejected": -205.92601013183594,
|
|
"logps/rejected": -280.2979736328125,
|
|
"loss": 4.2023,
|
|
"margin_dpo/margin_mean": 55.733863830566406,
|
|
"margin_dpo/margin_std": 48.06736755371094,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.6973821989528796,
|
|
"grad_norm": 51.89149475097656,
|
|
"learning_rate": 1.281842711051438e-07,
|
|
"logits/chosen": 1.1539713144302368,
|
|
"logits/rejected": 1.0611777305603027,
|
|
"logps/chosen": -303.71087646484375,
|
|
"logps/ref_chosen": -280.1510009765625,
|
|
"logps/ref_rejected": -231.2144012451172,
|
|
"logps/rejected": -309.7812194824219,
|
|
"loss": 4.5953,
|
|
"margin_dpo/margin_mean": 55.00693130493164,
|
|
"margin_dpo/margin_std": 62.069034576416016,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 0.6994764397905759,
|
|
"grad_norm": 38.94709014892578,
|
|
"learning_rate": 1.2658882646922033e-07,
|
|
"logits/chosen": 1.1479655504226685,
|
|
"logits/rejected": 1.1974003314971924,
|
|
"logps/chosen": -290.0271301269531,
|
|
"logps/ref_chosen": -269.64227294921875,
|
|
"logps/ref_rejected": -260.0500793457031,
|
|
"logps/rejected": -314.5991516113281,
|
|
"loss": 4.4026,
|
|
"margin_dpo/margin_mean": 34.16423416137695,
|
|
"margin_dpo/margin_std": 44.75538635253906,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.7015706806282722,
|
|
"grad_norm": 49.46840286254883,
|
|
"learning_rate": 1.2500000000000005e-07,
|
|
"logits/chosen": 1.2762084007263184,
|
|
"logits/rejected": 1.3634967803955078,
|
|
"logps/chosen": -351.87103271484375,
|
|
"logps/ref_chosen": -304.7079162597656,
|
|
"logps/ref_rejected": -269.1751403808594,
|
|
"logps/rejected": -330.24505615234375,
|
|
"loss": 4.5817,
|
|
"margin_dpo/margin_mean": 13.906787872314453,
|
|
"margin_dpo/margin_std": 66.35944366455078,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.7036649214659686,
|
|
"grad_norm": 34.12943649291992,
|
|
"learning_rate": 1.2341787690142435e-07,
|
|
"logits/chosen": 1.5096263885498047,
|
|
"logits/rejected": 1.792067289352417,
|
|
"logps/chosen": -218.86050415039062,
|
|
"logps/ref_chosen": -210.38368225097656,
|
|
"logps/ref_rejected": -229.12037658691406,
|
|
"logps/rejected": -289.0048522949219,
|
|
"loss": 4.5624,
|
|
"margin_dpo/margin_mean": 51.40761947631836,
|
|
"margin_dpo/margin_std": 47.56304931640625,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.7057591623036649,
|
|
"grad_norm": 52.81936264038086,
|
|
"learning_rate": 1.2184254201795363e-07,
|
|
"logits/chosen": 0.93461012840271,
|
|
"logits/rejected": 0.8486602306365967,
|
|
"logps/chosen": -299.5815734863281,
|
|
"logps/ref_chosen": -257.2767639160156,
|
|
"logps/ref_rejected": -297.5929260253906,
|
|
"logps/rejected": -374.0364074707031,
|
|
"loss": 4.4007,
|
|
"margin_dpo/margin_mean": 34.13860321044922,
|
|
"margin_dpo/margin_std": 52.06587219238281,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 0.7078534031413612,
|
|
"grad_norm": 37.59018325805664,
|
|
"learning_rate": 1.202740798300168e-07,
|
|
"logits/chosen": 1.5364826917648315,
|
|
"logits/rejected": 1.5837008953094482,
|
|
"logps/chosen": -274.78564453125,
|
|
"logps/ref_chosen": -257.8255310058594,
|
|
"logps/ref_rejected": -216.51162719726562,
|
|
"logps/rejected": -298.27276611328125,
|
|
"loss": 4.1313,
|
|
"margin_dpo/margin_mean": 64.8010482788086,
|
|
"margin_dpo/margin_std": 52.778377532958984,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.7099476439790576,
|
|
"grad_norm": 43.429386138916016,
|
|
"learning_rate": 1.1871257444948096e-07,
|
|
"logits/chosen": 1.5933047533035278,
|
|
"logits/rejected": 1.5398043394088745,
|
|
"logps/chosen": -267.3699645996094,
|
|
"logps/ref_chosen": -240.76815795898438,
|
|
"logps/ref_rejected": -244.97377014160156,
|
|
"logps/rejected": -315.84185791015625,
|
|
"loss": 4.1606,
|
|
"margin_dpo/margin_mean": 44.2662467956543,
|
|
"margin_dpo/margin_std": 55.80255126953125,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 0.7120418848167539,
|
|
"grad_norm": 35.77497482299805,
|
|
"learning_rate": 1.1715810961514072e-07,
|
|
"logits/chosen": 0.9345113039016724,
|
|
"logits/rejected": 1.0999596118927002,
|
|
"logps/chosen": -204.77218627929688,
|
|
"logps/ref_chosen": -187.35751342773438,
|
|
"logps/ref_rejected": -232.0410614013672,
|
|
"logps/rejected": -292.81396484375,
|
|
"loss": 4.428,
|
|
"margin_dpo/margin_mean": 43.358245849609375,
|
|
"margin_dpo/margin_std": 77.42390441894531,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.7141361256544503,
|
|
"grad_norm": 60.169677734375,
|
|
"learning_rate": 1.1561076868822755e-07,
|
|
"logits/chosen": 1.5765248537063599,
|
|
"logits/rejected": 1.8391637802124023,
|
|
"logps/chosen": -314.5309753417969,
|
|
"logps/ref_chosen": -283.4117736816406,
|
|
"logps/ref_rejected": -302.2451171875,
|
|
"logps/rejected": -380.4033203125,
|
|
"loss": 4.801,
|
|
"margin_dpo/margin_mean": 47.03903579711914,
|
|
"margin_dpo/margin_std": 53.40591049194336,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 0.7162303664921466,
|
|
"grad_norm": 45.74888610839844,
|
|
"learning_rate": 1.1407063464793965e-07,
|
|
"logits/chosen": 1.175806999206543,
|
|
"logits/rejected": 1.3320945501327515,
|
|
"logps/chosen": -249.10618591308594,
|
|
"logps/ref_chosen": -221.50335693359375,
|
|
"logps/ref_rejected": -244.48382568359375,
|
|
"logps/rejected": -306.7912292480469,
|
|
"loss": 4.3427,
|
|
"margin_dpo/margin_mean": 34.704566955566406,
|
|
"margin_dpo/margin_std": 38.48919677734375,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.7183246073298429,
|
|
"grad_norm": 33.789772033691406,
|
|
"learning_rate": 1.125377900869913e-07,
|
|
"logits/chosen": 1.5595121383666992,
|
|
"logits/rejected": 1.44582200050354,
|
|
"logps/chosen": -346.7395935058594,
|
|
"logps/ref_chosen": -340.46466064453125,
|
|
"logps/ref_rejected": -267.65313720703125,
|
|
"logps/rejected": -321.6587829589844,
|
|
"loss": 4.5708,
|
|
"margin_dpo/margin_mean": 47.7307243347168,
|
|
"margin_dpo/margin_std": 60.58958435058594,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 0.7204188481675393,
|
|
"grad_norm": 31.717029571533203,
|
|
"learning_rate": 1.110123172071844e-07,
|
|
"logits/chosen": 1.2925912141799927,
|
|
"logits/rejected": 1.426027774810791,
|
|
"logps/chosen": -333.0621643066406,
|
|
"logps/ref_chosen": -310.25018310546875,
|
|
"logps/ref_rejected": -281.16302490234375,
|
|
"logps/rejected": -352.4907531738281,
|
|
"loss": 4.3502,
|
|
"margin_dpo/margin_mean": 48.51573181152344,
|
|
"margin_dpo/margin_std": 64.95976257324219,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.7225130890052356,
|
|
"grad_norm": 51.54704284667969,
|
|
"learning_rate": 1.09494297815e-07,
|
|
"logits/chosen": 1.4484617710113525,
|
|
"logits/rejected": 1.585009217262268,
|
|
"logps/chosen": -307.612548828125,
|
|
"logps/ref_chosen": -284.6531066894531,
|
|
"logps/ref_rejected": -304.72369384765625,
|
|
"logps/rejected": -358.7782897949219,
|
|
"loss": 4.51,
|
|
"margin_dpo/margin_mean": 31.09515953063965,
|
|
"margin_dpo/margin_std": 56.86581039428711,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.724607329842932,
|
|
"grad_norm": 65.52984619140625,
|
|
"learning_rate": 1.0798381331721107e-07,
|
|
"logits/chosen": 0.9799513220787048,
|
|
"logits/rejected": 1.088678002357483,
|
|
"logps/chosen": -310.8472595214844,
|
|
"logps/ref_chosen": -255.6278076171875,
|
|
"logps/ref_rejected": -237.61305236816406,
|
|
"logps/rejected": -325.5954284667969,
|
|
"loss": 4.375,
|
|
"margin_dpo/margin_mean": 32.76295471191406,
|
|
"margin_dpo/margin_std": 52.9566535949707,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.7267015706806282,
|
|
"grad_norm": 57.053504943847656,
|
|
"learning_rate": 1.0648094471651722e-07,
|
|
"logits/chosen": 1.3673675060272217,
|
|
"logits/rejected": 1.3742492198944092,
|
|
"logps/chosen": -315.336181640625,
|
|
"logps/ref_chosen": -287.71807861328125,
|
|
"logps/ref_rejected": -276.2634582519531,
|
|
"logps/rejected": -353.022705078125,
|
|
"loss": 4.5065,
|
|
"margin_dpo/margin_mean": 49.14113998413086,
|
|
"margin_dpo/margin_std": 64.81655883789062,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 0.7287958115183246,
|
|
"grad_norm": 41.55851745605469,
|
|
"learning_rate": 1.0498577260720048e-07,
|
|
"logits/chosen": 1.4351952075958252,
|
|
"logits/rejected": 1.5722769498825073,
|
|
"logps/chosen": -296.4559020996094,
|
|
"logps/ref_chosen": -285.63232421875,
|
|
"logps/ref_rejected": -264.0018615722656,
|
|
"logps/rejected": -319.1177978515625,
|
|
"loss": 4.6334,
|
|
"margin_dpo/margin_mean": 44.29237365722656,
|
|
"margin_dpo/margin_std": 51.82339096069336,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.7308900523560209,
|
|
"grad_norm": 34.51556396484375,
|
|
"learning_rate": 1.0349837717080347e-07,
|
|
"logits/chosen": 1.2128405570983887,
|
|
"logits/rejected": 1.2827637195587158,
|
|
"logps/chosen": -378.03369140625,
|
|
"logps/ref_chosen": -347.98370361328125,
|
|
"logps/ref_rejected": -328.8855895996094,
|
|
"logps/rejected": -408.9764709472656,
|
|
"loss": 4.2865,
|
|
"margin_dpo/margin_mean": 50.040870666503906,
|
|
"margin_dpo/margin_std": 62.83095169067383,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 0.7329842931937173,
|
|
"grad_norm": 51.1704216003418,
|
|
"learning_rate": 1.0201883817182949e-07,
|
|
"logits/chosen": 1.6027448177337646,
|
|
"logits/rejected": 1.452850103378296,
|
|
"logps/chosen": -300.945556640625,
|
|
"logps/ref_chosen": -292.4501647949219,
|
|
"logps/ref_rejected": -188.39346313476562,
|
|
"logps/rejected": -268.33087158203125,
|
|
"loss": 4.4481,
|
|
"margin_dpo/margin_mean": 71.44198608398438,
|
|
"margin_dpo/margin_std": 57.31879806518555,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.7350785340314137,
|
|
"grad_norm": 54.72409439086914,
|
|
"learning_rate": 1.0054723495346482e-07,
|
|
"logits/chosen": 1.2379735708236694,
|
|
"logits/rejected": 1.2731664180755615,
|
|
"logps/chosen": -284.1971740722656,
|
|
"logps/ref_chosen": -267.4852294921875,
|
|
"logps/ref_rejected": -223.13552856445312,
|
|
"logps/rejected": -285.99163818359375,
|
|
"loss": 4.9619,
|
|
"margin_dpo/margin_mean": 46.14418029785156,
|
|
"margin_dpo/margin_std": 70.45701599121094,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 0.7371727748691099,
|
|
"grad_norm": 65.03557586669922,
|
|
"learning_rate": 9.908364643332398e-08,
|
|
"logits/chosen": 1.2324098348617554,
|
|
"logits/rejected": 1.4873018264770508,
|
|
"logps/chosen": -282.592529296875,
|
|
"logps/ref_chosen": -257.07952880859375,
|
|
"logps/ref_rejected": -294.4090881347656,
|
|
"logps/rejected": -372.9225158691406,
|
|
"loss": 4.2427,
|
|
"margin_dpo/margin_mean": 53.00044631958008,
|
|
"margin_dpo/margin_std": 67.86283111572266,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.7392670157068063,
|
|
"grad_norm": 39.74465560913086,
|
|
"learning_rate": 9.76281510992176e-08,
|
|
"logits/chosen": 1.1475257873535156,
|
|
"logits/rejected": 1.180965781211853,
|
|
"logps/chosen": -321.6578674316406,
|
|
"logps/ref_chosen": -290.9927062988281,
|
|
"logps/ref_rejected": -263.1128845214844,
|
|
"logps/rejected": -340.19183349609375,
|
|
"loss": 4.2739,
|
|
"margin_dpo/margin_mean": 46.413780212402344,
|
|
"margin_dpo/margin_std": 61.069339752197266,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 0.7413612565445026,
|
|
"grad_norm": 48.98942184448242,
|
|
"learning_rate": 9.618082700494318e-08,
|
|
"logits/chosen": 1.084821343421936,
|
|
"logits/rejected": 1.1856131553649902,
|
|
"logps/chosen": -224.8162384033203,
|
|
"logps/ref_chosen": -196.65435791015625,
|
|
"logps/ref_rejected": -193.15533447265625,
|
|
"logps/rejected": -250.77032470703125,
|
|
"loss": 4.9672,
|
|
"margin_dpo/margin_mean": 29.453073501586914,
|
|
"margin_dpo/margin_std": 54.51377487182617,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.743455497382199,
|
|
"grad_norm": 52.5598030090332,
|
|
"learning_rate": 9.474175176609956e-08,
|
|
"logits/chosen": 1.538980484008789,
|
|
"logits/rejected": 1.710750937461853,
|
|
"logps/chosen": -305.9091491699219,
|
|
"logps/ref_chosen": -277.7572937011719,
|
|
"logps/ref_rejected": -296.24908447265625,
|
|
"logps/rejected": -365.863525390625,
|
|
"loss": 4.304,
|
|
"margin_dpo/margin_mean": 41.46260452270508,
|
|
"margin_dpo/margin_std": 60.0238151550293,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.7455497382198953,
|
|
"grad_norm": 38.38217544555664,
|
|
"learning_rate": 9.331100255592436e-08,
|
|
"logits/chosen": 1.1549817323684692,
|
|
"logits/rejected": 1.270320177078247,
|
|
"logps/chosen": -250.93751525878906,
|
|
"logps/ref_chosen": -228.735595703125,
|
|
"logps/ref_rejected": -288.4073486328125,
|
|
"logps/rejected": -340.65838623046875,
|
|
"loss": 4.4604,
|
|
"margin_dpo/margin_mean": 30.04913902282715,
|
|
"margin_dpo/margin_std": 50.18395233154297,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.7476439790575916,
|
|
"grad_norm": 48.77327346801758,
|
|
"learning_rate": 9.18886561011557e-08,
|
|
"logits/chosen": 1.2281720638275146,
|
|
"logits/rejected": 1.2340593338012695,
|
|
"logps/chosen": -345.0635986328125,
|
|
"logps/ref_chosen": -327.5565185546875,
|
|
"logps/ref_rejected": -286.9888610839844,
|
|
"logps/rejected": -364.4182434082031,
|
|
"loss": 4.4785,
|
|
"margin_dpo/margin_mean": 59.92234802246094,
|
|
"margin_dpo/margin_std": 68.7721939086914,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 0.749738219895288,
|
|
"grad_norm": 33.63121795654297,
|
|
"learning_rate": 9.047478867791731e-08,
|
|
"logits/chosen": 1.2939711809158325,
|
|
"logits/rejected": 1.3026853799819946,
|
|
"logps/chosen": -300.43011474609375,
|
|
"logps/ref_chosen": -275.9919738769531,
|
|
"logps/ref_rejected": -226.95779418945312,
|
|
"logps/rejected": -304.18017578125,
|
|
"loss": 4.326,
|
|
"margin_dpo/margin_mean": 52.78423309326172,
|
|
"margin_dpo/margin_std": 65.29212951660156,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.7518324607329843,
|
|
"grad_norm": 41.51667022705078,
|
|
"learning_rate": 8.906947610762825e-08,
|
|
"logits/chosen": 1.193036675453186,
|
|
"logits/rejected": 1.3061870336532593,
|
|
"logps/chosen": -288.68792724609375,
|
|
"logps/ref_chosen": -265.4796447753906,
|
|
"logps/ref_rejected": -269.9594421386719,
|
|
"logps/rejected": -336.7837219238281,
|
|
"loss": 4.2962,
|
|
"margin_dpo/margin_mean": 43.615962982177734,
|
|
"margin_dpo/margin_std": 57.33641052246094,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 0.7539267015706806,
|
|
"grad_norm": 41.91855239868164,
|
|
"learning_rate": 8.76727937529367e-08,
|
|
"logits/chosen": 1.4477754831314087,
|
|
"logits/rejected": 1.4021023511886597,
|
|
"logps/chosen": -364.541015625,
|
|
"logps/ref_chosen": -336.95709228515625,
|
|
"logps/ref_rejected": -275.51239013671875,
|
|
"logps/rejected": -352.3710632324219,
|
|
"loss": 4.4654,
|
|
"margin_dpo/margin_mean": 49.27482604980469,
|
|
"margin_dpo/margin_std": 65.66383361816406,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.7560209424083769,
|
|
"grad_norm": 37.49740219116211,
|
|
"learning_rate": 8.628481651367875e-08,
|
|
"logits/chosen": 1.1130855083465576,
|
|
"logits/rejected": 1.3169794082641602,
|
|
"logps/chosen": -233.5477294921875,
|
|
"logps/ref_chosen": -223.0279541015625,
|
|
"logps/ref_rejected": -233.6653289794922,
|
|
"logps/rejected": -294.90289306640625,
|
|
"loss": 4.1403,
|
|
"margin_dpo/margin_mean": 50.717796325683594,
|
|
"margin_dpo/margin_std": 58.103668212890625,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 0.7581151832460733,
|
|
"grad_norm": 75.54791259765625,
|
|
"learning_rate": 8.490561882286135e-08,
|
|
"logits/chosen": 1.167074203491211,
|
|
"logits/rejected": 1.180873155593872,
|
|
"logps/chosen": -337.9937744140625,
|
|
"logps/ref_chosen": -298.1035461425781,
|
|
"logps/ref_rejected": -230.74783325195312,
|
|
"logps/rejected": -312.3794250488281,
|
|
"loss": 4.3668,
|
|
"margin_dpo/margin_mean": 41.741363525390625,
|
|
"margin_dpo/margin_std": 74.15339660644531,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.7602094240837697,
|
|
"grad_norm": 69.05213928222656,
|
|
"learning_rate": 8.353527464267104e-08,
|
|
"logits/chosen": 1.4149866104125977,
|
|
"logits/rejected": 1.3386046886444092,
|
|
"logps/chosen": -324.6624450683594,
|
|
"logps/ref_chosen": -315.25506591796875,
|
|
"logps/ref_rejected": -276.456298828125,
|
|
"logps/rejected": -352.84759521484375,
|
|
"loss": 4.1568,
|
|
"margin_dpo/margin_mean": 66.9839096069336,
|
|
"margin_dpo/margin_std": 64.92231750488281,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 0.762303664921466,
|
|
"grad_norm": 31.97148895263672,
|
|
"learning_rate": 8.217385746050742e-08,
|
|
"logits/chosen": 1.5564781427383423,
|
|
"logits/rejected": 1.3156113624572754,
|
|
"logps/chosen": -380.2980041503906,
|
|
"logps/ref_chosen": -336.43798828125,
|
|
"logps/ref_rejected": -259.9676818847656,
|
|
"logps/rejected": -339.92596435546875,
|
|
"loss": 4.5886,
|
|
"margin_dpo/margin_mean": 36.0982666015625,
|
|
"margin_dpo/margin_std": 53.2963752746582,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.7643979057591623,
|
|
"grad_norm": 53.09539794921875,
|
|
"learning_rate": 8.082144028504231e-08,
|
|
"logits/chosen": 1.0552072525024414,
|
|
"logits/rejected": 1.2801932096481323,
|
|
"logps/chosen": -227.60394287109375,
|
|
"logps/ref_chosen": -209.7356719970703,
|
|
"logps/ref_rejected": -294.5636291503906,
|
|
"logps/rejected": -367.4307861328125,
|
|
"loss": 4.2219,
|
|
"margin_dpo/margin_mean": 54.99891662597656,
|
|
"margin_dpo/margin_std": 50.34431457519531,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.7664921465968586,
|
|
"grad_norm": 56.880043029785156,
|
|
"learning_rate": 7.947809564230445e-08,
|
|
"logits/chosen": 1.2870928049087524,
|
|
"logits/rejected": 1.2019919157028198,
|
|
"logps/chosen": -343.0006408691406,
|
|
"logps/ref_chosen": -312.77142333984375,
|
|
"logps/ref_rejected": -273.8427734375,
|
|
"logps/rejected": -375.21856689453125,
|
|
"loss": 4.2553,
|
|
"margin_dpo/margin_mean": 71.1466293334961,
|
|
"margin_dpo/margin_std": 62.22167205810547,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.768586387434555,
|
|
"grad_norm": 47.9551887512207,
|
|
"learning_rate": 7.814389557179016e-08,
|
|
"logits/chosen": 1.7175925970077515,
|
|
"logits/rejected": 1.4891177415847778,
|
|
"logps/chosen": -329.87542724609375,
|
|
"logps/ref_chosen": -284.1925964355469,
|
|
"logps/ref_rejected": -208.8526611328125,
|
|
"logps/rejected": -294.8234558105469,
|
|
"loss": 4.1368,
|
|
"margin_dpo/margin_mean": 40.28790283203125,
|
|
"margin_dpo/margin_std": 68.52880859375,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 0.7706806282722513,
|
|
"grad_norm": 32.45009231567383,
|
|
"learning_rate": 7.681891162260015e-08,
|
|
"logits/chosen": 1.7018953561782837,
|
|
"logits/rejected": 1.5844436883926392,
|
|
"logps/chosen": -376.4222412109375,
|
|
"logps/ref_chosen": -360.64459228515625,
|
|
"logps/ref_rejected": -297.281005859375,
|
|
"logps/rejected": -367.4183654785156,
|
|
"loss": 3.9894,
|
|
"margin_dpo/margin_mean": 54.35973358154297,
|
|
"margin_dpo/margin_std": 49.44243621826172,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.7727748691099476,
|
|
"grad_norm": 85.05940246582031,
|
|
"learning_rate": 7.550321484960251e-08,
|
|
"logits/chosen": 1.4974400997161865,
|
|
"logits/rejected": 1.5112247467041016,
|
|
"logps/chosen": -364.4231872558594,
|
|
"logps/ref_chosen": -340.94610595703125,
|
|
"logps/ref_rejected": -285.1484069824219,
|
|
"logps/rejected": -367.0999450683594,
|
|
"loss": 4.5603,
|
|
"margin_dpo/margin_mean": 58.47446060180664,
|
|
"margin_dpo/margin_std": 59.77842712402344,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 0.774869109947644,
|
|
"grad_norm": 36.800376892089844,
|
|
"learning_rate": 7.419687580962222e-08,
|
|
"logits/chosen": 1.2962076663970947,
|
|
"logits/rejected": 1.5088801383972168,
|
|
"logps/chosen": -313.3735046386719,
|
|
"logps/ref_chosen": -276.9629211425781,
|
|
"logps/ref_rejected": -274.93865966796875,
|
|
"logps/rejected": -353.8816223144531,
|
|
"loss": 4.1904,
|
|
"margin_dpo/margin_mean": 42.532344818115234,
|
|
"margin_dpo/margin_std": 52.544273376464844,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.7769633507853403,
|
|
"grad_norm": 79.35717010498047,
|
|
"learning_rate": 7.289996455765748e-08,
|
|
"logits/chosen": 0.7718454599380493,
|
|
"logits/rejected": 1.0238559246063232,
|
|
"logps/chosen": -365.87713623046875,
|
|
"logps/ref_chosen": -323.23980712890625,
|
|
"logps/ref_rejected": -317.0935363769531,
|
|
"logps/rejected": -390.978759765625,
|
|
"loss": 4.4027,
|
|
"margin_dpo/margin_mean": 31.247907638549805,
|
|
"margin_dpo/margin_std": 54.091983795166016,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 0.7790575916230367,
|
|
"grad_norm": 33.439125061035156,
|
|
"learning_rate": 7.161255064312283e-08,
|
|
"logits/chosen": 1.3146398067474365,
|
|
"logits/rejected": 1.290389060974121,
|
|
"logps/chosen": -338.93792724609375,
|
|
"logps/ref_chosen": -303.75262451171875,
|
|
"logps/ref_rejected": -205.62069702148438,
|
|
"logps/rejected": -309.5386962890625,
|
|
"loss": 4.0137,
|
|
"margin_dpo/margin_mean": 68.732666015625,
|
|
"margin_dpo/margin_std": 57.33296585083008,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.7811518324607329,
|
|
"grad_norm": 45.49492263793945,
|
|
"learning_rate": 7.033470310611945e-08,
|
|
"logits/chosen": 1.4074095487594604,
|
|
"logits/rejected": 1.1533772945404053,
|
|
"logps/chosen": -377.86865234375,
|
|
"logps/ref_chosen": -346.5982666015625,
|
|
"logps/ref_rejected": -253.89280700683594,
|
|
"logps/rejected": -333.4144287109375,
|
|
"loss": 4.1968,
|
|
"margin_dpo/margin_mean": 48.25126266479492,
|
|
"margin_dpo/margin_std": 56.07286834716797,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 0.7832460732984293,
|
|
"grad_norm": 57.437164306640625,
|
|
"learning_rate": 6.906649047373245e-08,
|
|
"logits/chosen": 1.4347639083862305,
|
|
"logits/rejected": 1.5623588562011719,
|
|
"logps/chosen": -292.1949768066406,
|
|
"logps/ref_chosen": -252.59971618652344,
|
|
"logps/ref_rejected": -249.61476135253906,
|
|
"logps/rejected": -319.74609375,
|
|
"loss": 4.7414,
|
|
"margin_dpo/margin_mean": 30.536096572875977,
|
|
"margin_dpo/margin_std": 58.64886474609375,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.7853403141361257,
|
|
"grad_norm": 67.63172149658203,
|
|
"learning_rate": 6.780798075635675e-08,
|
|
"logits/chosen": 1.1573803424835205,
|
|
"logits/rejected": 1.0170570611953735,
|
|
"logps/chosen": -274.2476806640625,
|
|
"logps/ref_chosen": -247.3214569091797,
|
|
"logps/ref_rejected": -188.48236083984375,
|
|
"logps/rejected": -260.9528503417969,
|
|
"loss": 4.4518,
|
|
"margin_dpo/margin_mean": 45.54425811767578,
|
|
"margin_dpo/margin_std": 53.55020523071289,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.787434554973822,
|
|
"grad_norm": 43.886695861816406,
|
|
"learning_rate": 6.655924144404906e-08,
|
|
"logits/chosen": 1.1326963901519775,
|
|
"logits/rejected": 1.3887975215911865,
|
|
"logps/chosen": -327.3310852050781,
|
|
"logps/ref_chosen": -272.513916015625,
|
|
"logps/ref_rejected": -305.7491760253906,
|
|
"logps/rejected": -412.55767822265625,
|
|
"loss": 4.5376,
|
|
"margin_dpo/margin_mean": 51.99131393432617,
|
|
"margin_dpo/margin_std": 69.22034454345703,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 0.7895287958115184,
|
|
"grad_norm": 47.22002029418945,
|
|
"learning_rate": 6.532033950290885e-08,
|
|
"logits/chosen": 1.356651782989502,
|
|
"logits/rejected": 1.438635230064392,
|
|
"logps/chosen": -323.85699462890625,
|
|
"logps/ref_chosen": -298.3796081542969,
|
|
"logps/ref_rejected": -273.41839599609375,
|
|
"logps/rejected": -348.69940185546875,
|
|
"loss": 4.5118,
|
|
"margin_dpo/margin_mean": 49.80352020263672,
|
|
"margin_dpo/margin_std": 56.26287841796875,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 0.7916230366492146,
|
|
"grad_norm": 43.87843704223633,
|
|
"learning_rate": 6.409134137148736e-08,
|
|
"logits/chosen": 1.4002658128738403,
|
|
"logits/rejected": 1.4882569313049316,
|
|
"logps/chosen": -305.824462890625,
|
|
"logps/ref_chosen": -286.3173522949219,
|
|
"logps/ref_rejected": -271.7178955078125,
|
|
"logps/rejected": -340.6441345214844,
|
|
"loss": 4.5644,
|
|
"margin_dpo/margin_mean": 49.41912078857422,
|
|
"margin_dpo/margin_std": 56.88557434082031,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 0.793717277486911,
|
|
"grad_norm": 57.02334213256836,
|
|
"learning_rate": 6.28723129572247e-08,
|
|
"logits/chosen": 1.439416527748108,
|
|
"logits/rejected": 1.3806058168411255,
|
|
"logps/chosen": -271.30230712890625,
|
|
"logps/ref_chosen": -233.71743774414062,
|
|
"logps/ref_rejected": -206.68927001953125,
|
|
"logps/rejected": -284.8748474121094,
|
|
"loss": 4.5262,
|
|
"margin_dpo/margin_mean": 40.600711822509766,
|
|
"margin_dpo/margin_std": 50.349300384521484,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 0.7958115183246073,
|
|
"grad_norm": 72.12355041503906,
|
|
"learning_rate": 6.166331963291519e-08,
|
|
"logits/chosen": 1.7105507850646973,
|
|
"logits/rejected": 1.5248544216156006,
|
|
"logps/chosen": -387.1551513671875,
|
|
"logps/ref_chosen": -356.8863525390625,
|
|
"logps/ref_rejected": -354.776123046875,
|
|
"logps/rejected": -424.4413757324219,
|
|
"loss": 4.4649,
|
|
"margin_dpo/margin_mean": 39.396446228027344,
|
|
"margin_dpo/margin_std": 53.38307189941406,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.7979057591623037,
|
|
"grad_norm": 97.61827850341797,
|
|
"learning_rate": 6.046442623320145e-08,
|
|
"logits/chosen": 0.9507350325584412,
|
|
"logits/rejected": 1.0006842613220215,
|
|
"logps/chosen": -260.0545654296875,
|
|
"logps/ref_chosen": -235.81100463867188,
|
|
"logps/ref_rejected": -237.37062072753906,
|
|
"logps/rejected": -337.2359313964844,
|
|
"loss": 4.1799,
|
|
"margin_dpo/margin_mean": 75.62174987792969,
|
|
"margin_dpo/margin_std": 58.47461700439453,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"grad_norm": 42.927345275878906,
|
|
"learning_rate": 5.9275697051098275e-08,
|
|
"logits/chosen": 1.312659740447998,
|
|
"logits/rejected": 1.322435975074768,
|
|
"logps/chosen": -294.5480651855469,
|
|
"logps/ref_chosen": -259.17388916015625,
|
|
"logps/ref_rejected": -230.83482360839844,
|
|
"logps/rejected": -323.2818298339844,
|
|
"loss": 3.9698,
|
|
"margin_dpo/margin_mean": 57.07281494140625,
|
|
"margin_dpo/margin_std": 65.82691955566406,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 0.8020942408376963,
|
|
"grad_norm": 60.843833923339844,
|
|
"learning_rate": 5.809719583454414e-08,
|
|
"logits/chosen": 1.0891731977462769,
|
|
"logits/rejected": 1.3433376550674438,
|
|
"logps/chosen": -316.5683898925781,
|
|
"logps/ref_chosen": -269.8660583496094,
|
|
"logps/ref_rejected": -320.0415954589844,
|
|
"logps/rejected": -390.5964660644531,
|
|
"loss": 4.4135,
|
|
"margin_dpo/margin_mean": 23.852542877197266,
|
|
"margin_dpo/margin_std": 56.356651306152344,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 0.8041884816753927,
|
|
"grad_norm": 70.86746978759766,
|
|
"learning_rate": 5.6928985782982524e-08,
|
|
"logits/chosen": 1.2213385105133057,
|
|
"logits/rejected": 1.5940483808517456,
|
|
"logps/chosen": -313.1070251464844,
|
|
"logps/ref_chosen": -280.7498779296875,
|
|
"logps/ref_rejected": -324.9134216308594,
|
|
"logps/rejected": -393.7688293457031,
|
|
"loss": 4.9522,
|
|
"margin_dpo/margin_mean": 36.498287200927734,
|
|
"margin_dpo/margin_std": 67.0685043334961,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 0.806282722513089,
|
|
"grad_norm": 41.98042678833008,
|
|
"learning_rate": 5.57711295439732e-08,
|
|
"logits/chosen": 1.4527329206466675,
|
|
"logits/rejected": 1.5307879447937012,
|
|
"logps/chosen": -346.65673828125,
|
|
"logps/ref_chosen": -313.2212829589844,
|
|
"logps/ref_rejected": -256.9848937988281,
|
|
"logps/rejected": -341.84088134765625,
|
|
"loss": 4.4431,
|
|
"margin_dpo/margin_mean": 51.4205322265625,
|
|
"margin_dpo/margin_std": 66.022705078125,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.8083769633507853,
|
|
"grad_norm": 48.37839126586914,
|
|
"learning_rate": 5.4623689209832484e-08,
|
|
"logits/chosen": 1.6142921447753906,
|
|
"logits/rejected": 1.77769935131073,
|
|
"logps/chosen": -376.63311767578125,
|
|
"logps/ref_chosen": -342.4034423828125,
|
|
"logps/ref_rejected": -319.1665954589844,
|
|
"logps/rejected": -405.9571838378906,
|
|
"loss": 4.1689,
|
|
"margin_dpo/margin_mean": 52.56089401245117,
|
|
"margin_dpo/margin_std": 56.50434875488281,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 0.8104712041884817,
|
|
"grad_norm": 37.86534118652344,
|
|
"learning_rate": 5.3486726314303175e-08,
|
|
"logits/chosen": 1.3724387884140015,
|
|
"logits/rejected": 1.450311303138733,
|
|
"logps/chosen": -249.56488037109375,
|
|
"logps/ref_chosen": -209.16738891601562,
|
|
"logps/ref_rejected": -225.61949157714844,
|
|
"logps/rejected": -305.92340087890625,
|
|
"loss": 4.3007,
|
|
"margin_dpo/margin_mean": 39.906436920166016,
|
|
"margin_dpo/margin_std": 77.46627807617188,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 0.812565445026178,
|
|
"grad_norm": 86.58211517333984,
|
|
"learning_rate": 5.2360301829254745e-08,
|
|
"logits/chosen": 1.78019118309021,
|
|
"logits/rejected": 1.7659128904342651,
|
|
"logps/chosen": -381.0487365722656,
|
|
"logps/ref_chosen": -342.5128173828125,
|
|
"logps/ref_rejected": -296.8653564453125,
|
|
"logps/rejected": -390.75970458984375,
|
|
"loss": 4.5827,
|
|
"margin_dpo/margin_mean": 55.35844421386719,
|
|
"margin_dpo/margin_std": 75.9105224609375,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 0.8146596858638744,
|
|
"grad_norm": 73.34844207763672,
|
|
"learning_rate": 5.1244476161413806e-08,
|
|
"logits/chosen": 1.6132086515426636,
|
|
"logits/rejected": 1.4335089921951294,
|
|
"logps/chosen": -354.9220886230469,
|
|
"logps/ref_chosen": -336.53912353515625,
|
|
"logps/ref_rejected": -237.36383056640625,
|
|
"logps/rejected": -318.2511901855469,
|
|
"loss": 4.2202,
|
|
"margin_dpo/margin_mean": 62.504356384277344,
|
|
"margin_dpo/margin_std": 76.5676498413086,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 0.8167539267015707,
|
|
"grad_norm": 72.32861328125,
|
|
"learning_rate": 5.013930914912476e-08,
|
|
"logits/chosen": 1.4496684074401855,
|
|
"logits/rejected": 1.6118597984313965,
|
|
"logps/chosen": -313.1777648925781,
|
|
"logps/ref_chosen": -275.41680908203125,
|
|
"logps/ref_rejected": -300.94329833984375,
|
|
"logps/rejected": -397.3767395019531,
|
|
"loss": 4.4604,
|
|
"margin_dpo/margin_mean": 58.67253112792969,
|
|
"margin_dpo/margin_std": 57.53920364379883,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.818848167539267,
|
|
"grad_norm": 38.00588607788086,
|
|
"learning_rate": 4.904486005914027e-08,
|
|
"logits/chosen": 1.421841025352478,
|
|
"logits/rejected": 1.3273773193359375,
|
|
"logps/chosen": -301.99920654296875,
|
|
"logps/ref_chosen": -249.42276000976562,
|
|
"logps/ref_rejected": -187.71572875976562,
|
|
"logps/rejected": -270.2522888183594,
|
|
"loss": 4.2838,
|
|
"margin_dpo/margin_mean": 29.960115432739258,
|
|
"margin_dpo/margin_std": 43.918495178222656,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 0.8209424083769633,
|
|
"grad_norm": 45.547096252441406,
|
|
"learning_rate": 4.796118758344353e-08,
|
|
"logits/chosen": 1.0780835151672363,
|
|
"logits/rejected": 1.0823677778244019,
|
|
"logps/chosen": -326.24456787109375,
|
|
"logps/ref_chosen": -290.30438232421875,
|
|
"logps/ref_rejected": -262.2787780761719,
|
|
"logps/rejected": -357.97216796875,
|
|
"loss": 3.8296,
|
|
"margin_dpo/margin_mean": 59.75324249267578,
|
|
"margin_dpo/margin_std": 60.47087860107422,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 0.8230366492146597,
|
|
"grad_norm": 53.403141021728516,
|
|
"learning_rate": 4.688834983610082e-08,
|
|
"logits/chosen": 1.3256309032440186,
|
|
"logits/rejected": 1.1313904523849487,
|
|
"logps/chosen": -356.47021484375,
|
|
"logps/ref_chosen": -317.2633972167969,
|
|
"logps/ref_rejected": -237.91380310058594,
|
|
"logps/rejected": -326.9722900390625,
|
|
"loss": 4.5399,
|
|
"margin_dpo/margin_mean": 49.85166931152344,
|
|
"margin_dpo/margin_std": 60.655738830566406,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 0.8251308900523561,
|
|
"grad_norm": 44.783817291259766,
|
|
"learning_rate": 4.582640435014459e-08,
|
|
"logits/chosen": 1.5566421747207642,
|
|
"logits/rejected": 1.6724714040756226,
|
|
"logps/chosen": -406.4788818359375,
|
|
"logps/ref_chosen": -377.4843444824219,
|
|
"logps/ref_rejected": -298.2265319824219,
|
|
"logps/rejected": -381.8688659667969,
|
|
"loss": 4.4044,
|
|
"margin_dpo/margin_mean": 54.6478271484375,
|
|
"margin_dpo/margin_std": 60.59336853027344,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 0.8272251308900523,
|
|
"grad_norm": 43.559539794921875,
|
|
"learning_rate": 4.477540807448832e-08,
|
|
"logits/chosen": 1.251692771911621,
|
|
"logits/rejected": 1.2946186065673828,
|
|
"logps/chosen": -310.1583557128906,
|
|
"logps/ref_chosen": -281.3030090332031,
|
|
"logps/ref_rejected": -272.98968505859375,
|
|
"logps/rejected": -343.1010437011719,
|
|
"loss": 4.3814,
|
|
"margin_dpo/margin_mean": 41.256004333496094,
|
|
"margin_dpo/margin_std": 55.48289108276367,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.8293193717277487,
|
|
"grad_norm": 173.74839782714844,
|
|
"learning_rate": 4.373541737087263e-08,
|
|
"logits/chosen": 1.4800870418548584,
|
|
"logits/rejected": 1.4773489236831665,
|
|
"logps/chosen": -338.620849609375,
|
|
"logps/ref_chosen": -295.05364990234375,
|
|
"logps/ref_rejected": -255.04061889648438,
|
|
"logps/rejected": -347.4020080566406,
|
|
"loss": 4.402,
|
|
"margin_dpo/margin_mean": 48.79420471191406,
|
|
"margin_dpo/margin_std": 69.13119506835938,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 0.831413612565445,
|
|
"grad_norm": 48.23088836669922,
|
|
"learning_rate": 4.270648801084295e-08,
|
|
"logits/chosen": 1.4665961265563965,
|
|
"logits/rejected": 1.5797699689865112,
|
|
"logps/chosen": -311.6304016113281,
|
|
"logps/ref_chosen": -288.0824890136719,
|
|
"logps/ref_rejected": -270.2839050292969,
|
|
"logps/rejected": -332.85223388671875,
|
|
"loss": 4.47,
|
|
"margin_dpo/margin_mean": 39.020355224609375,
|
|
"margin_dpo/margin_std": 48.824378967285156,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 0.8335078534031414,
|
|
"grad_norm": 92.45420837402344,
|
|
"learning_rate": 4.168867517275806e-08,
|
|
"logits/chosen": 1.2100859880447388,
|
|
"logits/rejected": 1.4831223487854004,
|
|
"logps/chosen": -297.94940185546875,
|
|
"logps/ref_chosen": -252.48330688476562,
|
|
"logps/ref_rejected": -273.87139892578125,
|
|
"logps/rejected": -354.4050598144531,
|
|
"loss": 4.9205,
|
|
"margin_dpo/margin_mean": 35.06761932373047,
|
|
"margin_dpo/margin_std": 80.69606018066406,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 0.8356020942408376,
|
|
"grad_norm": 82.78327941894531,
|
|
"learning_rate": 4.0682033438831584e-08,
|
|
"logits/chosen": 1.3988953828811646,
|
|
"logits/rejected": 1.5033973455429077,
|
|
"logps/chosen": -330.5037536621094,
|
|
"logps/ref_chosen": -277.4305419921875,
|
|
"logps/ref_rejected": -271.1197204589844,
|
|
"logps/rejected": -356.5560607910156,
|
|
"loss": 4.4852,
|
|
"margin_dpo/margin_mean": 32.36311340332031,
|
|
"margin_dpo/margin_std": 66.02288818359375,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 0.837696335078534,
|
|
"grad_norm": 37.495018005371094,
|
|
"learning_rate": 3.968661679220467e-08,
|
|
"logits/chosen": 1.2931967973709106,
|
|
"logits/rejected": 1.240352988243103,
|
|
"logps/chosen": -299.0211181640625,
|
|
"logps/ref_chosen": -266.20025634765625,
|
|
"logps/ref_rejected": -217.865966796875,
|
|
"logps/rejected": -301.7166748046875,
|
|
"loss": 4.2739,
|
|
"margin_dpo/margin_mean": 51.02983093261719,
|
|
"margin_dpo/margin_std": 63.865108489990234,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.837696335078534,
|
|
"eval_logits/chosen": 1.193334937095642,
|
|
"eval_logits/rejected": 1.2366639375686646,
|
|
"eval_logps/chosen": -316.0413513183594,
|
|
"eval_logps/ref_chosen": -281.4588928222656,
|
|
"eval_logps/ref_rejected": -261.84954833984375,
|
|
"eval_logps/rejected": -345.1450500488281,
|
|
"eval_loss": 0.5601758360862732,
|
|
"eval_margin_dpo/margin_mean": 48.71305465698242,
|
|
"eval_margin_dpo/margin_std": 68.15460205078125,
|
|
"eval_runtime": 92.6746,
|
|
"eval_samples_per_second": 21.581,
|
|
"eval_steps_per_second": 1.349,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.8397905759162304,
|
|
"grad_norm": 42.00017547607422,
|
|
"learning_rate": 3.8702478614051345e-08,
|
|
"logits/chosen": 1.1790591478347778,
|
|
"logits/rejected": 1.3527344465255737,
|
|
"logps/chosen": -331.7237854003906,
|
|
"logps/ref_chosen": -293.3692932128906,
|
|
"logps/ref_rejected": -299.9609069824219,
|
|
"logps/rejected": -400.61529541015625,
|
|
"loss": 3.9949,
|
|
"margin_dpo/margin_mean": 62.299903869628906,
|
|
"margin_dpo/margin_std": 44.7177619934082,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 0.8418848167539267,
|
|
"grad_norm": 58.57191848754883,
|
|
"learning_rate": 3.772967168071517e-08,
|
|
"logits/chosen": 1.4228373765945435,
|
|
"logits/rejected": 1.3410090208053589,
|
|
"logps/chosen": -332.11737060546875,
|
|
"logps/ref_chosen": -279.55889892578125,
|
|
"logps/ref_rejected": -259.6942138671875,
|
|
"logps/rejected": -331.76983642578125,
|
|
"loss": 4.3682,
|
|
"margin_dpo/margin_mean": 19.51715660095215,
|
|
"margin_dpo/margin_std": 81.67544555664062,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 0.8439790575916231,
|
|
"grad_norm": 46.684837341308594,
|
|
"learning_rate": 3.676824816087978e-08,
|
|
"logits/chosen": 1.5173556804656982,
|
|
"logits/rejected": 1.6118801832199097,
|
|
"logps/chosen": -406.491455078125,
|
|
"logps/ref_chosen": -372.2436828613281,
|
|
"logps/ref_rejected": -285.5693359375,
|
|
"logps/rejected": -390.88140869140625,
|
|
"loss": 3.7148,
|
|
"margin_dpo/margin_mean": 71.0643081665039,
|
|
"margin_dpo/margin_std": 51.123374938964844,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 0.8460732984293193,
|
|
"grad_norm": 38.361358642578125,
|
|
"learning_rate": 3.581825961277074e-08,
|
|
"logits/chosen": 1.4560322761535645,
|
|
"logits/rejected": 1.349548578262329,
|
|
"logps/chosen": -372.926513671875,
|
|
"logps/ref_chosen": -328.00860595703125,
|
|
"logps/ref_rejected": -278.35009765625,
|
|
"logps/rejected": -352.3278503417969,
|
|
"loss": 4.4734,
|
|
"margin_dpo/margin_mean": 29.05979347229004,
|
|
"margin_dpo/margin_std": 75.3277816772461,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 0.8481675392670157,
|
|
"grad_norm": 50.2591438293457,
|
|
"learning_rate": 3.487975698139084e-08,
|
|
"logits/chosen": 1.3894712924957275,
|
|
"logits/rejected": 1.5616024732589722,
|
|
"logps/chosen": -270.36468505859375,
|
|
"logps/ref_chosen": -228.44178771972656,
|
|
"logps/ref_rejected": -244.33395385742188,
|
|
"logps/rejected": -330.79925537109375,
|
|
"loss": 4.2189,
|
|
"margin_dpo/margin_mean": 44.542388916015625,
|
|
"margin_dpo/margin_std": 59.48860168457031,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.8502617801047121,
|
|
"grad_norm": 63.63072204589844,
|
|
"learning_rate": 3.3952790595787986e-08,
|
|
"logits/chosen": 1.3200132846832275,
|
|
"logits/rejected": 1.2781833410263062,
|
|
"logps/chosen": -353.1100158691406,
|
|
"logps/ref_chosen": -321.44598388671875,
|
|
"logps/ref_rejected": -268.0445861816406,
|
|
"logps/rejected": -349.9507751464844,
|
|
"loss": 4.6261,
|
|
"margin_dpo/margin_mean": 50.24217987060547,
|
|
"margin_dpo/margin_std": 67.54312896728516,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 0.8523560209424084,
|
|
"grad_norm": 53.97494888305664,
|
|
"learning_rate": 3.303741016635614e-08,
|
|
"logits/chosen": 1.3076931238174438,
|
|
"logits/rejected": 1.1266769170761108,
|
|
"logps/chosen": -309.05596923828125,
|
|
"logps/ref_chosen": -247.0522918701172,
|
|
"logps/ref_rejected": -186.8645782470703,
|
|
"logps/rejected": -270.8433837890625,
|
|
"loss": 4.3225,
|
|
"margin_dpo/margin_mean": 21.975143432617188,
|
|
"margin_dpo/margin_std": 54.5270881652832,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 0.8544502617801047,
|
|
"grad_norm": 53.08890151977539,
|
|
"learning_rate": 3.2133664782169944e-08,
|
|
"logits/chosen": 1.1703412532806396,
|
|
"logits/rejected": 1.2470866441726685,
|
|
"logps/chosen": -253.00314331054688,
|
|
"logps/ref_chosen": -213.5513458251953,
|
|
"logps/ref_rejected": -267.9826354980469,
|
|
"logps/rejected": -361.67535400390625,
|
|
"loss": 4.2575,
|
|
"margin_dpo/margin_mean": 54.240936279296875,
|
|
"margin_dpo/margin_std": 45.82322692871094,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 0.856544502617801,
|
|
"grad_norm": 48.80266571044922,
|
|
"learning_rate": 3.12416029083514e-08,
|
|
"logits/chosen": 1.4997256994247437,
|
|
"logits/rejected": 1.6531615257263184,
|
|
"logps/chosen": -320.41851806640625,
|
|
"logps/ref_chosen": -280.0785217285156,
|
|
"logps/ref_rejected": -304.26910400390625,
|
|
"logps/rejected": -392.4356994628906,
|
|
"loss": 4.3055,
|
|
"margin_dpo/margin_mean": 47.82660675048828,
|
|
"margin_dpo/margin_std": 75.04901885986328,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 0.8586387434554974,
|
|
"grad_norm": 63.39336013793945,
|
|
"learning_rate": 3.036127238347164e-08,
|
|
"logits/chosen": 1.5989128351211548,
|
|
"logits/rejected": 1.5641684532165527,
|
|
"logps/chosen": -292.51336669921875,
|
|
"logps/ref_chosen": -260.9378662109375,
|
|
"logps/ref_rejected": -296.7695007324219,
|
|
"logps/rejected": -368.9515380859375,
|
|
"loss": 4.4542,
|
|
"margin_dpo/margin_mean": 40.606529235839844,
|
|
"margin_dpo/margin_std": 67.39288330078125,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.8607329842931937,
|
|
"grad_norm": 40.665950775146484,
|
|
"learning_rate": 2.9492720416985e-08,
|
|
"logits/chosen": 1.3025436401367188,
|
|
"logits/rejected": 1.4121453762054443,
|
|
"logps/chosen": -354.9593200683594,
|
|
"logps/ref_chosen": -330.0611877441406,
|
|
"logps/ref_rejected": -310.21368408203125,
|
|
"logps/rejected": -398.6880187988281,
|
|
"loss": 4.1892,
|
|
"margin_dpo/margin_mean": 63.576229095458984,
|
|
"margin_dpo/margin_std": 51.235904693603516,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 0.86282722513089,
|
|
"grad_norm": 55.58880615234375,
|
|
"learning_rate": 2.863599358669755e-08,
|
|
"logits/chosen": 1.1845945119857788,
|
|
"logits/rejected": 1.3782296180725098,
|
|
"logps/chosen": -288.62408447265625,
|
|
"logps/ref_chosen": -254.76255798339844,
|
|
"logps/ref_rejected": -315.16156005859375,
|
|
"logps/rejected": -383.30535888671875,
|
|
"loss": 4.4912,
|
|
"margin_dpo/margin_mean": 34.28225326538086,
|
|
"margin_dpo/margin_std": 53.833274841308594,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 0.8649214659685864,
|
|
"grad_norm": 81.09705352783203,
|
|
"learning_rate": 2.7791137836269158e-08,
|
|
"logits/chosen": 1.3580859899520874,
|
|
"logits/rejected": 1.3456140756607056,
|
|
"logps/chosen": -294.88671875,
|
|
"logps/ref_chosen": -260.4962463378906,
|
|
"logps/ref_rejected": -250.442626953125,
|
|
"logps/rejected": -319.46563720703125,
|
|
"loss": 4.5037,
|
|
"margin_dpo/margin_mean": 34.632568359375,
|
|
"margin_dpo/margin_std": 63.56281280517578,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 0.8670157068062827,
|
|
"grad_norm": 51.592769622802734,
|
|
"learning_rate": 2.6958198472749717e-08,
|
|
"logits/chosen": 1.6022746562957764,
|
|
"logits/rejected": 1.5306450128555298,
|
|
"logps/chosen": -428.2533264160156,
|
|
"logps/ref_chosen": -391.94610595703125,
|
|
"logps/ref_rejected": -287.8221130371094,
|
|
"logps/rejected": -345.431396484375,
|
|
"loss": 4.3943,
|
|
"margin_dpo/margin_mean": 21.30208396911621,
|
|
"margin_dpo/margin_std": 81.18660736083984,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 0.8691099476439791,
|
|
"grad_norm": 49.326297760009766,
|
|
"learning_rate": 2.613722016414943e-08,
|
|
"logits/chosen": 0.8005275726318359,
|
|
"logits/rejected": 0.8713321685791016,
|
|
"logps/chosen": -245.8915557861328,
|
|
"logps/ref_chosen": -223.82870483398438,
|
|
"logps/ref_rejected": -206.082763671875,
|
|
"logps/rejected": -291.24188232421875,
|
|
"loss": 4.0595,
|
|
"margin_dpo/margin_mean": 63.09626007080078,
|
|
"margin_dpo/margin_std": 63.13758087158203,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.8712041884816754,
|
|
"grad_norm": 48.53297424316406,
|
|
"learning_rate": 2.5328246937043525e-08,
|
|
"logits/chosen": 1.264346718788147,
|
|
"logits/rejected": 1.331539273262024,
|
|
"logps/chosen": -316.0765686035156,
|
|
"logps/ref_chosen": -303.91656494140625,
|
|
"logps/ref_rejected": -267.0210266113281,
|
|
"logps/rejected": -343.7052917480469,
|
|
"loss": 4.154,
|
|
"margin_dpo/margin_mean": 64.5242919921875,
|
|
"margin_dpo/margin_std": 58.65679168701172,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 0.8732984293193717,
|
|
"grad_norm": 43.959312438964844,
|
|
"learning_rate": 2.4531322174210973e-08,
|
|
"logits/chosen": 1.207326054573059,
|
|
"logits/rejected": 1.3471075296401978,
|
|
"logps/chosen": -298.7107849121094,
|
|
"logps/ref_chosen": -249.8788604736328,
|
|
"logps/ref_rejected": -210.48683166503906,
|
|
"logps/rejected": -290.503662109375,
|
|
"loss": 4.4556,
|
|
"margin_dpo/margin_mean": 31.184890747070312,
|
|
"margin_dpo/margin_std": 68.83541107177734,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 0.875392670157068,
|
|
"grad_norm": 57.87160110473633,
|
|
"learning_rate": 2.3746488612308295e-08,
|
|
"logits/chosen": 1.1209101676940918,
|
|
"logits/rejected": 1.000208854675293,
|
|
"logps/chosen": -411.3262023925781,
|
|
"logps/ref_chosen": -355.4482421875,
|
|
"logps/ref_rejected": -344.9490661621094,
|
|
"logps/rejected": -433.7691650390625,
|
|
"loss": 4.5932,
|
|
"margin_dpo/margin_mean": 32.94209671020508,
|
|
"margin_dpo/margin_std": 72.00234985351562,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 0.8774869109947644,
|
|
"grad_norm": 55.62238311767578,
|
|
"learning_rate": 2.297378833957761e-08,
|
|
"logits/chosen": 1.6920902729034424,
|
|
"logits/rejected": 1.618727445602417,
|
|
"logps/chosen": -433.79205322265625,
|
|
"logps/ref_chosen": -381.6947021484375,
|
|
"logps/ref_rejected": -322.436767578125,
|
|
"logps/rejected": -426.5209045410156,
|
|
"loss": 4.2079,
|
|
"margin_dpo/margin_mean": 51.986717224121094,
|
|
"margin_dpo/margin_std": 64.11946868896484,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 0.8795811518324608,
|
|
"grad_norm": 42.748905181884766,
|
|
"learning_rate": 2.2213262793589482e-08,
|
|
"logits/chosen": 1.0813815593719482,
|
|
"logits/rejected": 1.1282932758331299,
|
|
"logps/chosen": -290.3770446777344,
|
|
"logps/ref_chosen": -255.09539794921875,
|
|
"logps/ref_rejected": -254.58306884765625,
|
|
"logps/rejected": -328.9988708496094,
|
|
"loss": 4.2003,
|
|
"margin_dpo/margin_mean": 39.13414001464844,
|
|
"margin_dpo/margin_std": 68.49993896484375,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.881675392670157,
|
|
"grad_norm": 62.450592041015625,
|
|
"learning_rate": 2.1464952759020856e-08,
|
|
"logits/chosen": 1.2170071601867676,
|
|
"logits/rejected": 1.0278328657150269,
|
|
"logps/chosen": -303.1274719238281,
|
|
"logps/ref_chosen": -280.7524719238281,
|
|
"logps/ref_rejected": -187.39218139648438,
|
|
"logps/rejected": -256.837646484375,
|
|
"loss": 4.0246,
|
|
"margin_dpo/margin_mean": 47.07042694091797,
|
|
"margin_dpo/margin_std": 56.84251403808594,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 0.8837696335078534,
|
|
"grad_norm": 56.18370056152344,
|
|
"learning_rate": 2.07288983654679e-08,
|
|
"logits/chosen": 1.3475362062454224,
|
|
"logits/rejected": 1.4008029699325562,
|
|
"logps/chosen": -307.1512145996094,
|
|
"logps/ref_chosen": -278.18890380859375,
|
|
"logps/ref_rejected": -250.71591186523438,
|
|
"logps/rejected": -318.3471984863281,
|
|
"loss": 4.6777,
|
|
"margin_dpo/margin_mean": 38.6689453125,
|
|
"margin_dpo/margin_std": 72.85610961914062,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 0.8858638743455497,
|
|
"grad_norm": 64.5829849243164,
|
|
"learning_rate": 2.0005139085293942e-08,
|
|
"logits/chosen": 1.3403944969177246,
|
|
"logits/rejected": 1.4666098356246948,
|
|
"logps/chosen": -301.6260986328125,
|
|
"logps/ref_chosen": -281.21820068359375,
|
|
"logps/ref_rejected": -296.73907470703125,
|
|
"logps/rejected": -367.79888916015625,
|
|
"loss": 4.1139,
|
|
"margin_dpo/margin_mean": 50.65192413330078,
|
|
"margin_dpo/margin_std": 72.86007690429688,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 0.8879581151832461,
|
|
"grad_norm": 43.40376281738281,
|
|
"learning_rate": 1.9293713731512673e-08,
|
|
"logits/chosen": 1.1475857496261597,
|
|
"logits/rejected": 1.005727767944336,
|
|
"logps/chosen": -361.1445617675781,
|
|
"logps/ref_chosen": -339.32550048828125,
|
|
"logps/ref_rejected": -274.3222351074219,
|
|
"logps/rejected": -354.5352478027344,
|
|
"loss": 4.1116,
|
|
"margin_dpo/margin_mean": 58.39393615722656,
|
|
"margin_dpo/margin_std": 50.71818542480469,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 0.8900523560209425,
|
|
"grad_norm": 46.304969787597656,
|
|
"learning_rate": 1.8594660455706763e-08,
|
|
"logits/chosen": 1.2751210927963257,
|
|
"logits/rejected": 1.5024828910827637,
|
|
"logps/chosen": -283.94232177734375,
|
|
"logps/ref_chosen": -254.7490997314453,
|
|
"logps/ref_rejected": -266.1432800292969,
|
|
"logps/rejected": -349.154541015625,
|
|
"loss": 4.3925,
|
|
"margin_dpo/margin_mean": 53.81803894042969,
|
|
"margin_dpo/margin_std": 49.791236877441406,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.8921465968586387,
|
|
"grad_norm": 41.32174301147461,
|
|
"learning_rate": 1.7908016745981856e-08,
|
|
"logits/chosen": 0.896172285079956,
|
|
"logits/rejected": 1.0513912439346313,
|
|
"logps/chosen": -284.55767822265625,
|
|
"logps/ref_chosen": -264.97216796875,
|
|
"logps/ref_rejected": -244.05935668945312,
|
|
"logps/rejected": -316.9269714355469,
|
|
"loss": 4.2069,
|
|
"margin_dpo/margin_mean": 53.282081604003906,
|
|
"margin_dpo/margin_std": 63.170196533203125,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 0.8942408376963351,
|
|
"grad_norm": 42.068763732910156,
|
|
"learning_rate": 1.7233819424956247e-08,
|
|
"logits/chosen": 1.2293376922607422,
|
|
"logits/rejected": 1.2114195823669434,
|
|
"logps/chosen": -329.5334167480469,
|
|
"logps/ref_chosen": -301.6879577636719,
|
|
"logps/ref_rejected": -265.6783752441406,
|
|
"logps/rejected": -380.6156311035156,
|
|
"loss": 3.7144,
|
|
"margin_dpo/margin_mean": 87.09181213378906,
|
|
"margin_dpo/margin_std": 52.67485046386719,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 0.8963350785340314,
|
|
"grad_norm": 55.713130950927734,
|
|
"learning_rate": 1.6572104647786245e-08,
|
|
"logits/chosen": 1.5100287199020386,
|
|
"logits/rejected": 1.70786714553833,
|
|
"logps/chosen": -415.50616455078125,
|
|
"logps/ref_chosen": -376.43316650390625,
|
|
"logps/ref_rejected": -334.24676513671875,
|
|
"logps/rejected": -440.6484375,
|
|
"loss": 3.7971,
|
|
"margin_dpo/margin_mean": 67.32867431640625,
|
|
"margin_dpo/margin_std": 66.57231140136719,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 0.8984293193717278,
|
|
"grad_norm": 70.71985626220703,
|
|
"learning_rate": 1.5922907900227017e-08,
|
|
"logits/chosen": 1.4469355344772339,
|
|
"logits/rejected": 1.4792401790618896,
|
|
"logps/chosen": -247.02073669433594,
|
|
"logps/ref_chosen": -218.89503479003906,
|
|
"logps/ref_rejected": -236.546630859375,
|
|
"logps/rejected": -322.55419921875,
|
|
"loss": 4.4589,
|
|
"margin_dpo/margin_mean": 57.881866455078125,
|
|
"margin_dpo/margin_std": 76.20121002197266,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 0.900523560209424,
|
|
"grad_norm": 92.4472427368164,
|
|
"learning_rate": 1.5286263996730026e-08,
|
|
"logits/chosen": 1.3988643884658813,
|
|
"logits/rejected": 1.5516958236694336,
|
|
"logps/chosen": -318.7999267578125,
|
|
"logps/ref_chosen": -281.9652099609375,
|
|
"logps/ref_rejected": -266.40411376953125,
|
|
"logps/rejected": -329.5230712890625,
|
|
"loss": 4.4918,
|
|
"margin_dpo/margin_mean": 26.284305572509766,
|
|
"margin_dpo/margin_std": 64.46965789794922,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.9026178010471204,
|
|
"grad_norm": 42.31532287597656,
|
|
"learning_rate": 1.4662207078575684e-08,
|
|
"logits/chosen": 1.7694166898727417,
|
|
"logits/rejected": 1.8366968631744385,
|
|
"logps/chosen": -332.827880859375,
|
|
"logps/ref_chosen": -286.10888671875,
|
|
"logps/ref_rejected": -274.0017395019531,
|
|
"logps/rejected": -361.9835510253906,
|
|
"loss": 4.5107,
|
|
"margin_dpo/margin_mean": 41.2628173828125,
|
|
"margin_dpo/margin_std": 59.571441650390625,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 0.9047120418848168,
|
|
"grad_norm": 37.992881774902344,
|
|
"learning_rate": 1.40507706120426e-08,
|
|
"logits/chosen": 1.4250589609146118,
|
|
"logits/rejected": 1.6473501920700073,
|
|
"logps/chosen": -348.860595703125,
|
|
"logps/ref_chosen": -316.9443359375,
|
|
"logps/ref_rejected": -358.3585510253906,
|
|
"logps/rejected": -438.4804382324219,
|
|
"loss": 4.1495,
|
|
"margin_dpo/margin_mean": 48.20561981201172,
|
|
"margin_dpo/margin_std": 72.83356475830078,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 0.9068062827225131,
|
|
"grad_norm": 56.42523193359375,
|
|
"learning_rate": 1.345198738661285e-08,
|
|
"logits/chosen": 1.288971185684204,
|
|
"logits/rejected": 1.2402310371398926,
|
|
"logps/chosen": -312.3082275390625,
|
|
"logps/ref_chosen": -282.2297668457031,
|
|
"logps/ref_rejected": -258.5012512207031,
|
|
"logps/rejected": -328.95513916015625,
|
|
"loss": 4.5137,
|
|
"margin_dpo/margin_mean": 40.375389099121094,
|
|
"margin_dpo/margin_std": 55.306190490722656,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 0.9089005235602095,
|
|
"grad_norm": 34.96892547607422,
|
|
"learning_rate": 1.2865889513213628e-08,
|
|
"logits/chosen": 1.7188708782196045,
|
|
"logits/rejected": 1.7233338356018066,
|
|
"logps/chosen": -339.9571533203125,
|
|
"logps/ref_chosen": -313.5975646972656,
|
|
"logps/ref_rejected": -293.44097900390625,
|
|
"logps/rejected": -374.973876953125,
|
|
"loss": 4.2523,
|
|
"margin_dpo/margin_mean": 55.173301696777344,
|
|
"margin_dpo/margin_std": 61.733543395996094,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 0.9109947643979057,
|
|
"grad_norm": 84.76171875,
|
|
"learning_rate": 1.2292508422495157e-08,
|
|
"logits/chosen": 1.584928035736084,
|
|
"logits/rejected": 1.718322515487671,
|
|
"logps/chosen": -211.07582092285156,
|
|
"logps/ref_chosen": -191.58889770507812,
|
|
"logps/ref_rejected": -206.38133239746094,
|
|
"logps/rejected": -269.05389404296875,
|
|
"loss": 4.3653,
|
|
"margin_dpo/margin_mean": 43.18561553955078,
|
|
"margin_dpo/margin_std": 55.161441802978516,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.9130890052356021,
|
|
"grad_norm": 57.35254669189453,
|
|
"learning_rate": 1.1731874863145142e-08,
|
|
"logits/chosen": 1.098213791847229,
|
|
"logits/rejected": 1.1476788520812988,
|
|
"logps/chosen": -349.2528076171875,
|
|
"logps/ref_chosen": -329.4399719238281,
|
|
"logps/ref_rejected": -310.59783935546875,
|
|
"logps/rejected": -374.02459716796875,
|
|
"loss": 4.8351,
|
|
"margin_dpo/margin_mean": 43.61388397216797,
|
|
"margin_dpo/margin_std": 60.17463302612305,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 0.9151832460732985,
|
|
"grad_norm": 33.499061584472656,
|
|
"learning_rate": 1.118401890024001e-08,
|
|
"logits/chosen": 1.4528710842132568,
|
|
"logits/rejected": 1.5862656831741333,
|
|
"logps/chosen": -284.3026123046875,
|
|
"logps/ref_chosen": -245.99761962890625,
|
|
"logps/ref_rejected": -340.40283203125,
|
|
"logps/rejected": -442.78497314453125,
|
|
"loss": 4.1697,
|
|
"margin_dpo/margin_mean": 64.07716369628906,
|
|
"margin_dpo/margin_std": 62.12276077270508,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 0.9172774869109948,
|
|
"grad_norm": 139.29515075683594,
|
|
"learning_rate": 1.06489699136324e-08,
|
|
"logits/chosen": 1.1451233625411987,
|
|
"logits/rejected": 1.288071870803833,
|
|
"logps/chosen": -289.2056884765625,
|
|
"logps/ref_chosen": -264.5708923339844,
|
|
"logps/ref_rejected": -263.8647155761719,
|
|
"logps/rejected": -315.4420471191406,
|
|
"loss": 5.1452,
|
|
"margin_dpo/margin_mean": 26.942535400390625,
|
|
"margin_dpo/margin_std": 64.0194320678711,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 0.9193717277486911,
|
|
"grad_norm": 45.723548889160156,
|
|
"learning_rate": 1.0126756596375685e-08,
|
|
"logits/chosen": 1.449920892715454,
|
|
"logits/rejected": 1.4370176792144775,
|
|
"logps/chosen": -275.9388427734375,
|
|
"logps/ref_chosen": -236.2272491455078,
|
|
"logps/ref_rejected": -260.26531982421875,
|
|
"logps/rejected": -337.4900207519531,
|
|
"loss": 4.4005,
|
|
"margin_dpo/margin_mean": 37.513099670410156,
|
|
"margin_dpo/margin_std": 69.3291244506836,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 0.9214659685863874,
|
|
"grad_norm": 51.19523620605469,
|
|
"learning_rate": 9.617406953185136e-09,
|
|
"logits/chosen": 1.4269630908966064,
|
|
"logits/rejected": 1.2528316974639893,
|
|
"logps/chosen": -450.7151794433594,
|
|
"logps/ref_chosen": -402.7833557128906,
|
|
"logps/ref_rejected": -285.3439636230469,
|
|
"logps/rejected": -360.1721496582031,
|
|
"loss": 4.77,
|
|
"margin_dpo/margin_mean": 26.89635467529297,
|
|
"margin_dpo/margin_std": 57.31180191040039,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.9235602094240838,
|
|
"grad_norm": 45.438751220703125,
|
|
"learning_rate": 9.12094829893642e-09,
|
|
"logits/chosen": 1.4254412651062012,
|
|
"logits/rejected": 1.6387895345687866,
|
|
"logps/chosen": -382.6518859863281,
|
|
"logps/ref_chosen": -348.18212890625,
|
|
"logps/ref_rejected": -375.537353515625,
|
|
"logps/rejected": -471.38824462890625,
|
|
"loss": 4.0819,
|
|
"margin_dpo/margin_mean": 61.381141662597656,
|
|
"margin_dpo/margin_std": 54.50360870361328,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 0.9256544502617801,
|
|
"grad_norm": 53.282371520996094,
|
|
"learning_rate": 8.637407257200496e-09,
|
|
"logits/chosen": 1.2402985095977783,
|
|
"logits/rejected": 1.3638098239898682,
|
|
"logps/chosen": -275.3695373535156,
|
|
"logps/ref_chosen": -232.696044921875,
|
|
"logps/ref_rejected": -204.60752868652344,
|
|
"logps/rejected": -279.0164489746094,
|
|
"loss": 4.6929,
|
|
"margin_dpo/margin_mean": 31.735416412353516,
|
|
"margin_dpo/margin_std": 70.92927551269531,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 0.9277486910994764,
|
|
"grad_norm": 41.01181411743164,
|
|
"learning_rate": 8.166809758815895e-09,
|
|
"logits/chosen": 1.3078192472457886,
|
|
"logits/rejected": 1.3253602981567383,
|
|
"logps/chosen": -330.3334655761719,
|
|
"logps/ref_chosen": -275.13873291015625,
|
|
"logps/ref_rejected": -264.1988830566406,
|
|
"logps/rejected": -356.9568176269531,
|
|
"loss": 4.5058,
|
|
"margin_dpo/margin_mean": 37.56320571899414,
|
|
"margin_dpo/margin_std": 54.74559783935547,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 0.9298429319371728,
|
|
"grad_norm": 51.980506896972656,
|
|
"learning_rate": 7.709181040498253e-09,
|
|
"logits/chosen": 0.7922985553741455,
|
|
"logits/rejected": 0.9772998690605164,
|
|
"logps/chosen": -340.13336181640625,
|
|
"logps/ref_chosen": -305.7708740234375,
|
|
"logps/ref_rejected": -292.68731689453125,
|
|
"logps/rejected": -366.3361511230469,
|
|
"loss": 4.3732,
|
|
"margin_dpo/margin_mean": 39.28638458251953,
|
|
"margin_dpo/margin_std": 50.5058479309082,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 0.9319371727748691,
|
|
"grad_norm": 50.610992431640625,
|
|
"learning_rate": 7.2645456434869965e-09,
|
|
"logits/chosen": 1.336693286895752,
|
|
"logits/rejected": 1.4031049013137817,
|
|
"logps/chosen": -257.5855712890625,
|
|
"logps/ref_chosen": -240.496337890625,
|
|
"logps/ref_rejected": -226.0730743408203,
|
|
"logps/rejected": -288.43756103515625,
|
|
"loss": 4.6998,
|
|
"margin_dpo/margin_mean": 45.27527618408203,
|
|
"margin_dpo/margin_std": 74.52619934082031,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.9340314136125655,
|
|
"grad_norm": 62.949615478515625,
|
|
"learning_rate": 6.832927412229017e-09,
|
|
"logits/chosen": 1.3089869022369385,
|
|
"logits/rejected": 1.283385992050171,
|
|
"logps/chosen": -267.8168029785156,
|
|
"logps/ref_chosen": -244.18284606933594,
|
|
"logps/ref_rejected": -211.3776397705078,
|
|
"logps/rejected": -278.58465576171875,
|
|
"loss": 4.3051,
|
|
"margin_dpo/margin_mean": 43.57305145263672,
|
|
"margin_dpo/margin_std": 57.471893310546875,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 0.9361256544502617,
|
|
"grad_norm": 52.591312408447266,
|
|
"learning_rate": 6.414349493100129e-09,
|
|
"logits/chosen": 1.3533639907836914,
|
|
"logits/rejected": 1.4261730909347534,
|
|
"logps/chosen": -258.5954895019531,
|
|
"logps/ref_chosen": -237.29592895507812,
|
|
"logps/ref_rejected": -246.57034301757812,
|
|
"logps/rejected": -316.0662841796875,
|
|
"loss": 4.0452,
|
|
"margin_dpo/margin_mean": 48.1964225769043,
|
|
"margin_dpo/margin_std": 65.75294494628906,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 0.9382198952879581,
|
|
"grad_norm": 46.954933166503906,
|
|
"learning_rate": 6.0088343331638756e-09,
|
|
"logits/chosen": 1.6584538221359253,
|
|
"logits/rejected": 1.7075550556182861,
|
|
"logps/chosen": -336.6059265136719,
|
|
"logps/ref_chosen": -308.6024475097656,
|
|
"logps/ref_rejected": -277.87921142578125,
|
|
"logps/rejected": -346.0470275878906,
|
|
"loss": 4.3113,
|
|
"margin_dpo/margin_mean": 40.164310455322266,
|
|
"margin_dpo/margin_std": 57.308650970458984,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 0.9403141361256544,
|
|
"grad_norm": 54.72856140136719,
|
|
"learning_rate": 5.616403678967624e-09,
|
|
"logits/chosen": 1.8703218698501587,
|
|
"logits/rejected": 1.5918076038360596,
|
|
"logps/chosen": -386.6697998046875,
|
|
"logps/ref_chosen": -376.94281005859375,
|
|
"logps/ref_rejected": -267.11236572265625,
|
|
"logps/rejected": -342.6089782714844,
|
|
"loss": 4.1562,
|
|
"margin_dpo/margin_mean": 65.76960754394531,
|
|
"margin_dpo/margin_std": 61.83976745605469,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 0.9424083769633508,
|
|
"grad_norm": 65.85668182373047,
|
|
"learning_rate": 5.2370785753763356e-09,
|
|
"logits/chosen": 1.6628509759902954,
|
|
"logits/rejected": 1.4311879873275757,
|
|
"logps/chosen": -327.4361572265625,
|
|
"logps/ref_chosen": -312.619384765625,
|
|
"logps/ref_rejected": -215.62857055664062,
|
|
"logps/rejected": -281.2243957519531,
|
|
"loss": 4.5566,
|
|
"margin_dpo/margin_mean": 50.77903366088867,
|
|
"margin_dpo/margin_std": 56.17271041870117,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.9445026178010472,
|
|
"grad_norm": 55.37830352783203,
|
|
"learning_rate": 4.8708793644441086e-09,
|
|
"logits/chosen": 1.4510207176208496,
|
|
"logits/rejected": 1.5802251100540161,
|
|
"logps/chosen": -330.12811279296875,
|
|
"logps/ref_chosen": -296.6983947753906,
|
|
"logps/ref_rejected": -312.4747619628906,
|
|
"logps/rejected": -392.5194396972656,
|
|
"loss": 4.3406,
|
|
"margin_dpo/margin_mean": 46.6149787902832,
|
|
"margin_dpo/margin_std": 63.69156265258789,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 0.9465968586387434,
|
|
"grad_norm": 50.63416290283203,
|
|
"learning_rate": 4.517825684323323e-09,
|
|
"logits/chosen": 1.3996615409851074,
|
|
"logits/rejected": 1.581308364868164,
|
|
"logps/chosen": -329.9542541503906,
|
|
"logps/ref_chosen": -294.50958251953125,
|
|
"logps/ref_rejected": -295.56097412109375,
|
|
"logps/rejected": -362.0646667480469,
|
|
"loss": 4.3711,
|
|
"margin_dpo/margin_mean": 31.059064865112305,
|
|
"margin_dpo/margin_std": 63.657875061035156,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 0.9486910994764398,
|
|
"grad_norm": 44.21456527709961,
|
|
"learning_rate": 4.1779364682113794e-09,
|
|
"logits/chosen": 1.4765185117721558,
|
|
"logits/rejected": 1.6153349876403809,
|
|
"logps/chosen": -341.7298278808594,
|
|
"logps/ref_chosen": -308.21917724609375,
|
|
"logps/ref_rejected": -328.357421875,
|
|
"logps/rejected": -414.666748046875,
|
|
"loss": 4.1702,
|
|
"margin_dpo/margin_mean": 52.798675537109375,
|
|
"margin_dpo/margin_std": 59.44575881958008,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 0.9507853403141361,
|
|
"grad_norm": 36.86773681640625,
|
|
"learning_rate": 3.851229943335393e-09,
|
|
"logits/chosen": 1.8053613901138306,
|
|
"logits/rejected": 1.7590644359588623,
|
|
"logps/chosen": -365.978759765625,
|
|
"logps/ref_chosen": -332.5453796386719,
|
|
"logps/ref_rejected": -267.130615234375,
|
|
"logps/rejected": -339.3133544921875,
|
|
"loss": 4.3145,
|
|
"margin_dpo/margin_mean": 38.749393463134766,
|
|
"margin_dpo/margin_std": 65.05109405517578,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 0.9528795811518325,
|
|
"grad_norm": 49.20968246459961,
|
|
"learning_rate": 3.5377236299748147e-09,
|
|
"logits/chosen": 1.3919376134872437,
|
|
"logits/rejected": 1.5252500772476196,
|
|
"logps/chosen": -261.4302978515625,
|
|
"logps/ref_chosen": -240.13719177246094,
|
|
"logps/ref_rejected": -233.28451538085938,
|
|
"logps/rejected": -313.81097412109375,
|
|
"loss": 4.5531,
|
|
"margin_dpo/margin_mean": 59.233402252197266,
|
|
"margin_dpo/margin_std": 58.3806037902832,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.9549738219895288,
|
|
"grad_norm": 42.70205307006836,
|
|
"learning_rate": 3.2374343405217884e-09,
|
|
"logits/chosen": 1.635735034942627,
|
|
"logits/rejected": 1.8064732551574707,
|
|
"logps/chosen": -370.0901184082031,
|
|
"logps/ref_chosen": -334.82666015625,
|
|
"logps/ref_rejected": -313.20404052734375,
|
|
"logps/rejected": -386.83837890625,
|
|
"loss": 4.1309,
|
|
"margin_dpo/margin_mean": 38.370887756347656,
|
|
"margin_dpo/margin_std": 76.44358825683594,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 0.9570680628272251,
|
|
"grad_norm": 44.188438415527344,
|
|
"learning_rate": 2.9503781785795713e-09,
|
|
"logits/chosen": 1.4283052682876587,
|
|
"logits/rejected": 1.370866298675537,
|
|
"logps/chosen": -319.96136474609375,
|
|
"logps/ref_chosen": -299.60650634765625,
|
|
"logps/ref_rejected": -263.5287780761719,
|
|
"logps/rejected": -360.9966125488281,
|
|
"loss": 4.2728,
|
|
"margin_dpo/margin_mean": 77.11296844482422,
|
|
"margin_dpo/margin_std": 65.07032775878906,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 0.9591623036649215,
|
|
"grad_norm": 53.56594467163086,
|
|
"learning_rate": 2.6765705380989432e-09,
|
|
"logits/chosen": 1.6065071821212769,
|
|
"logits/rejected": 1.4925872087478638,
|
|
"logps/chosen": -309.9053955078125,
|
|
"logps/ref_chosen": -272.7044372558594,
|
|
"logps/ref_rejected": -235.6636962890625,
|
|
"logps/rejected": -300.68353271484375,
|
|
"loss": 4.3749,
|
|
"margin_dpo/margin_mean": 27.818876266479492,
|
|
"margin_dpo/margin_std": 55.92146301269531,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 0.9612565445026178,
|
|
"grad_norm": 38.86277389526367,
|
|
"learning_rate": 2.416026102552732e-09,
|
|
"logits/chosen": 1.428498387336731,
|
|
"logits/rejected": 1.3140032291412354,
|
|
"logps/chosen": -328.8189697265625,
|
|
"logps/ref_chosen": -280.32196044921875,
|
|
"logps/ref_rejected": -217.7216339111328,
|
|
"logps/rejected": -297.50244140625,
|
|
"loss": 4.8445,
|
|
"margin_dpo/margin_mean": 31.283798217773438,
|
|
"margin_dpo/margin_std": 54.594871520996094,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 0.9633507853403142,
|
|
"grad_norm": 91.57032012939453,
|
|
"learning_rate": 2.168758844148272e-09,
|
|
"logits/chosen": 1.443167805671692,
|
|
"logits/rejected": 1.4732171297073364,
|
|
"logps/chosen": -426.33984375,
|
|
"logps/ref_chosen": -387.5949401855469,
|
|
"logps/ref_rejected": -289.61505126953125,
|
|
"logps/rejected": -362.8568115234375,
|
|
"loss": 4.8551,
|
|
"margin_dpo/margin_mean": 34.49686050415039,
|
|
"margin_dpo/margin_std": 72.81037902832031,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.9654450261780104,
|
|
"grad_norm": 57.414451599121094,
|
|
"learning_rate": 1.9347820230782295e-09,
|
|
"logits/chosen": 1.5660542249679565,
|
|
"logits/rejected": 1.520784616470337,
|
|
"logps/chosen": -268.8662414550781,
|
|
"logps/ref_chosen": -247.67520141601562,
|
|
"logps/ref_rejected": -227.18458557128906,
|
|
"logps/rejected": -311.7144775390625,
|
|
"loss": 4.5036,
|
|
"margin_dpo/margin_mean": 63.33882141113281,
|
|
"margin_dpo/margin_std": 73.68379974365234,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 0.9675392670157068,
|
|
"grad_norm": 49.87493896484375,
|
|
"learning_rate": 1.7141081868094209e-09,
|
|
"logits/chosen": 1.4014174938201904,
|
|
"logits/rejected": 1.3472931385040283,
|
|
"logps/chosen": -378.84088134765625,
|
|
"logps/ref_chosen": -350.8253173828125,
|
|
"logps/ref_rejected": -262.5743713378906,
|
|
"logps/rejected": -355.7335205078125,
|
|
"loss": 4.116,
|
|
"margin_dpo/margin_mean": 65.14358520507812,
|
|
"margin_dpo/margin_std": 64.9459457397461,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 0.9696335078534032,
|
|
"grad_norm": 84.11515808105469,
|
|
"learning_rate": 1.5067491694100153e-09,
|
|
"logits/chosen": 1.2984429597854614,
|
|
"logits/rejected": 1.3639535903930664,
|
|
"logps/chosen": -271.6904602050781,
|
|
"logps/ref_chosen": -229.31683349609375,
|
|
"logps/ref_rejected": -227.2904815673828,
|
|
"logps/rejected": -293.2787170410156,
|
|
"loss": 4.4943,
|
|
"margin_dpo/margin_mean": 23.614595413208008,
|
|
"margin_dpo/margin_std": 70.91625213623047,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 0.9717277486910995,
|
|
"grad_norm": 51.687442779541016,
|
|
"learning_rate": 1.3127160909147672e-09,
|
|
"logits/chosen": 1.7068381309509277,
|
|
"logits/rejected": 1.6888086795806885,
|
|
"logps/chosen": -248.65789794921875,
|
|
"logps/ref_chosen": -226.55776977539062,
|
|
"logps/ref_rejected": -208.3471221923828,
|
|
"logps/rejected": -285.44244384765625,
|
|
"loss": 4.4572,
|
|
"margin_dpo/margin_mean": 54.995182037353516,
|
|
"margin_dpo/margin_std": 61.694210052490234,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 0.9738219895287958,
|
|
"grad_norm": 37.04083251953125,
|
|
"learning_rate": 1.1320193567288527e-09,
|
|
"logits/chosen": 1.1953487396240234,
|
|
"logits/rejected": 1.2031378746032715,
|
|
"logps/chosen": -311.39080810546875,
|
|
"logps/ref_chosen": -287.9401550292969,
|
|
"logps/ref_rejected": -305.5926818847656,
|
|
"logps/rejected": -382.921630859375,
|
|
"loss": 4.0925,
|
|
"margin_dpo/margin_mean": 53.878334045410156,
|
|
"margin_dpo/margin_std": 47.030704498291016,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.9759162303664921,
|
|
"grad_norm": 40.89207458496094,
|
|
"learning_rate": 9.64668657069706e-10,
|
|
"logits/chosen": 1.327850580215454,
|
|
"logits/rejected": 1.417038917541504,
|
|
"logps/chosen": -233.49664306640625,
|
|
"logps/ref_chosen": -224.32131958007812,
|
|
"logps/ref_rejected": -205.79212951660156,
|
|
"logps/rejected": -285.48529052734375,
|
|
"loss": 3.8998,
|
|
"margin_dpo/margin_mean": 70.51785278320312,
|
|
"margin_dpo/margin_std": 54.0175895690918,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 0.9780104712041885,
|
|
"grad_norm": 66.9071273803711,
|
|
"learning_rate": 8.106729664475176e-10,
|
|
"logits/chosen": 0.6929614543914795,
|
|
"logits/rejected": 0.9394963979721069,
|
|
"logps/chosen": -260.9166259765625,
|
|
"logps/ref_chosen": -227.0828094482422,
|
|
"logps/ref_rejected": -285.5081481933594,
|
|
"logps/rejected": -364.97393798828125,
|
|
"loss": 4.4527,
|
|
"margin_dpo/margin_mean": 45.63193893432617,
|
|
"margin_dpo/margin_std": 61.40578079223633,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 0.9801047120418848,
|
|
"grad_norm": 43.40538787841797,
|
|
"learning_rate": 6.700405431837585e-10,
|
|
"logits/chosen": 1.376441478729248,
|
|
"logits/rejected": 1.1604515314102173,
|
|
"logps/chosen": -349.08599853515625,
|
|
"logps/ref_chosen": -314.6758117675781,
|
|
"logps/ref_rejected": -290.43023681640625,
|
|
"logps/rejected": -357.7455749511719,
|
|
"loss": 4.5586,
|
|
"margin_dpo/margin_mean": 32.90521240234375,
|
|
"margin_dpo/margin_std": 61.592552185058594,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 0.9821989528795811,
|
|
"grad_norm": 48.64304733276367,
|
|
"learning_rate": 5.427789289685347e-10,
|
|
"logits/chosen": 1.2290468215942383,
|
|
"logits/rejected": 1.205275058746338,
|
|
"logps/chosen": -290.6042175292969,
|
|
"logps/ref_chosen": -269.7442321777344,
|
|
"logps/ref_rejected": -237.1964874267578,
|
|
"logps/rejected": -307.8065185546875,
|
|
"loss": 4.2722,
|
|
"margin_dpo/margin_mean": 49.750064849853516,
|
|
"margin_dpo/margin_std": 63.16607666015625,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 0.9842931937172775,
|
|
"grad_norm": 48.205284118652344,
|
|
"learning_rate": 4.288949484559934e-10,
|
|
"logits/chosen": 0.8344168066978455,
|
|
"logits/rejected": 0.8336673378944397,
|
|
"logps/chosen": -342.15057373046875,
|
|
"logps/ref_chosen": -326.9454650878906,
|
|
"logps/ref_rejected": -283.81768798828125,
|
|
"logps/rejected": -360.6470031738281,
|
|
"loss": 4.0516,
|
|
"margin_dpo/margin_mean": 61.624237060546875,
|
|
"margin_dpo/margin_std": 61.775753021240234,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.9863874345549738,
|
|
"grad_norm": 58.954227447509766,
|
|
"learning_rate": 3.2839470889836627e-10,
|
|
"logits/chosen": 1.4265129566192627,
|
|
"logits/rejected": 1.394487977027893,
|
|
"logps/chosen": -337.0513000488281,
|
|
"logps/ref_chosen": -309.4604797363281,
|
|
"logps/ref_rejected": -292.0646057128906,
|
|
"logps/rejected": -356.0199890136719,
|
|
"loss": 4.5083,
|
|
"margin_dpo/margin_mean": 36.36451721191406,
|
|
"margin_dpo/margin_std": 59.396270751953125,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 0.9884816753926702,
|
|
"grad_norm": 61.63766098022461,
|
|
"learning_rate": 2.412835998185092e-10,
|
|
"logits/chosen": 1.1254520416259766,
|
|
"logits/rejected": 1.199691891670227,
|
|
"logps/chosen": -210.82077026367188,
|
|
"logps/ref_chosen": -185.00701904296875,
|
|
"logps/ref_rejected": -208.1576385498047,
|
|
"logps/rejected": -272.89483642578125,
|
|
"loss": 4.1475,
|
|
"margin_dpo/margin_mean": 38.92341613769531,
|
|
"margin_dpo/margin_std": 71.17210388183594,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 0.9905759162303664,
|
|
"grad_norm": 56.59166717529297,
|
|
"learning_rate": 1.6756629272085544e-10,
|
|
"logits/chosen": 1.3166826963424683,
|
|
"logits/rejected": 1.1217594146728516,
|
|
"logps/chosen": -350.744140625,
|
|
"logps/ref_chosen": -330.0291442871094,
|
|
"logps/ref_rejected": -224.92051696777344,
|
|
"logps/rejected": -302.14898681640625,
|
|
"loss": 4.1337,
|
|
"margin_dpo/margin_mean": 56.51344299316406,
|
|
"margin_dpo/margin_std": 51.90734100341797,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 0.9926701570680628,
|
|
"grad_norm": 59.311004638671875,
|
|
"learning_rate": 1.072467408408384e-10,
|
|
"logits/chosen": 1.2557741403579712,
|
|
"logits/rejected": 1.3725204467773438,
|
|
"logps/chosen": -355.0524597167969,
|
|
"logps/ref_chosen": -315.9046936035156,
|
|
"logps/ref_rejected": -340.7234191894531,
|
|
"logps/rejected": -411.844482421875,
|
|
"loss": 4.5535,
|
|
"margin_dpo/margin_mean": 31.97334861755371,
|
|
"margin_dpo/margin_std": 64.91607666015625,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 0.9947643979057592,
|
|
"grad_norm": 54.42702102661133,
|
|
"learning_rate": 6.032817893297793e-11,
|
|
"logits/chosen": 0.872796356678009,
|
|
"logits/rejected": 0.9358187913894653,
|
|
"logps/chosen": -227.73483276367188,
|
|
"logps/ref_chosen": -202.84310913085938,
|
|
"logps/ref_rejected": -175.70704650878906,
|
|
"logps/rejected": -250.8938751220703,
|
|
"loss": 4.4903,
|
|
"margin_dpo/margin_mean": 50.29510498046875,
|
|
"margin_dpo/margin_std": 74.1104965209961,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.9968586387434555,
|
|
"grad_norm": 50.788307189941406,
|
|
"learning_rate": 2.6813123097352287e-11,
|
|
"logits/chosen": 1.07370924949646,
|
|
"logits/rejected": 1.213090181350708,
|
|
"logps/chosen": -292.42132568359375,
|
|
"logps/ref_chosen": -276.843505859375,
|
|
"logps/ref_rejected": -309.07757568359375,
|
|
"logps/rejected": -368.9504089355469,
|
|
"loss": 4.2622,
|
|
"margin_dpo/margin_mean": 44.29502868652344,
|
|
"margin_dpo/margin_std": 54.164615631103516,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 0.9989528795811519,
|
|
"grad_norm": 40.021400451660156,
|
|
"learning_rate": 6.7033706447061635e-12,
|
|
"logits/chosen": 0.8247851729393005,
|
|
"logits/rejected": 0.9125658869743347,
|
|
"logps/chosen": -297.199951171875,
|
|
"logps/ref_chosen": -262.76971435546875,
|
|
"logps/ref_rejected": -272.2499694824219,
|
|
"logps/rejected": -356.56658935546875,
|
|
"loss": 4.216,
|
|
"margin_dpo/margin_mean": 49.88636779785156,
|
|
"margin_dpo/margin_std": 77.48868560791016,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 0.9989528795811519,
|
|
"step": 477,
|
|
"total_flos": 0.0,
|
|
"train_loss": 4.779813265150698,
|
|
"train_runtime": 7822.2821,
|
|
"train_samples_per_second": 7.815,
|
|
"train_steps_per_second": 0.061
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 477,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 200,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 4,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|