Model: W-61/llama-3-8b-base-margin-dpo-hh-helpful-4xh200-batch-64-20260417-212312 Source: Original Platform
14477 lines
595 KiB
JSON
14477 lines
595 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 1.0,
|
|
"eval_steps": 100,
|
|
"global_step": 681,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0014684287812041115,
|
|
"grad_norm": 83.525146484375,
|
|
"learning_rate": 0.0,
|
|
"logits/chosen": -0.4974287748336792,
|
|
"logits/rejected": -0.43299180269241333,
|
|
"logps/chosen": -50.1435661315918,
|
|
"logps/ref_chosen": -50.14883804321289,
|
|
"logps/ref_rejected": -74.1280517578125,
|
|
"logps/rejected": -74.09991455078125,
|
|
"loss": 1.389,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.5005706548690796,
|
|
"margin_dpo/beta_margin_grad_std": 0.010499694384634495,
|
|
"margin_dpo/beta_margin_mean": -0.0022870064713060856,
|
|
"margin_dpo/beta_margin_std": 0.0420234240591526,
|
|
"margin_dpo/loss_margin_mean": -0.02287006378173828,
|
|
"margin_dpo/margin_mean": -0.02287048101425171,
|
|
"margin_dpo/margin_std": 0.41920793056488037,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.002936857562408223,
|
|
"grad_norm": 72.20420837402344,
|
|
"learning_rate": 7.246376811594203e-09,
|
|
"logits/chosen": -0.4953641891479492,
|
|
"logits/rejected": -0.4594460129737854,
|
|
"logps/chosen": -52.65569305419922,
|
|
"logps/ref_chosen": -52.620704650878906,
|
|
"logps/ref_rejected": -75.30413818359375,
|
|
"logps/rejected": -75.27340698242188,
|
|
"loss": 1.3932,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.5016425848007202,
|
|
"margin_dpo/beta_margin_grad_std": 0.008806563913822174,
|
|
"margin_dpo/beta_margin_mean": -0.006572261452674866,
|
|
"margin_dpo/beta_margin_std": 0.03523966670036316,
|
|
"margin_dpo/loss_margin_mean": -0.06572261452674866,
|
|
"margin_dpo/margin_mean": -0.06572240591049194,
|
|
"margin_dpo/margin_std": 0.35048407316207886,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.004405286343612335,
|
|
"grad_norm": 70.93851470947266,
|
|
"learning_rate": 1.4492753623188406e-08,
|
|
"logits/chosen": -0.4816606044769287,
|
|
"logits/rejected": -0.44218793511390686,
|
|
"logps/chosen": -60.9985466003418,
|
|
"logps/ref_chosen": -60.98159408569336,
|
|
"logps/ref_rejected": -68.67259216308594,
|
|
"logps/rejected": -68.67314147949219,
|
|
"loss": 1.3882,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.5004101395606995,
|
|
"margin_dpo/beta_margin_grad_std": 0.008285283111035824,
|
|
"margin_dpo/beta_margin_mean": -0.001640463131479919,
|
|
"margin_dpo/beta_margin_std": 0.03315068036317825,
|
|
"margin_dpo/loss_margin_mean": -0.01640462875366211,
|
|
"margin_dpo/margin_mean": -0.01640373468399048,
|
|
"margin_dpo/margin_std": 0.33020099997520447,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.005873715124816446,
|
|
"grad_norm": 71.9634780883789,
|
|
"learning_rate": 2.1739130434782606e-08,
|
|
"logits/chosen": -0.4688633680343628,
|
|
"logits/rejected": -0.4411826729774475,
|
|
"logps/chosen": -56.74000930786133,
|
|
"logps/ref_chosen": -56.76771545410156,
|
|
"logps/ref_rejected": -86.64710998535156,
|
|
"logps/rejected": -86.62959289550781,
|
|
"loss": 1.3857,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.49974533915519714,
|
|
"margin_dpo/beta_margin_grad_std": 0.010213336907327175,
|
|
"margin_dpo/beta_margin_mean": 0.0010185746941715479,
|
|
"margin_dpo/beta_margin_std": 0.04087061062455177,
|
|
"margin_dpo/loss_margin_mean": 0.01018574833869934,
|
|
"margin_dpo/margin_mean": 0.0101853609085083,
|
|
"margin_dpo/margin_std": 0.40629148483276367,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.007342143906020558,
|
|
"grad_norm": 89.44969940185547,
|
|
"learning_rate": 2.898550724637681e-08,
|
|
"logits/chosen": -0.5144953727722168,
|
|
"logits/rejected": -0.4707370400428772,
|
|
"logps/chosen": -53.81106185913086,
|
|
"logps/ref_chosen": -53.859375,
|
|
"logps/ref_rejected": -84.14918518066406,
|
|
"logps/rejected": -84.13066864013672,
|
|
"loss": 1.3838,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.49925631284713745,
|
|
"margin_dpo/beta_margin_grad_std": 0.010840461589396,
|
|
"margin_dpo/beta_margin_mean": 0.0029798836912959814,
|
|
"margin_dpo/beta_margin_std": 0.043392810970544815,
|
|
"margin_dpo/loss_margin_mean": 0.029798835515975952,
|
|
"margin_dpo/margin_mean": 0.02979910373687744,
|
|
"margin_dpo/margin_std": 0.4284527897834778,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.00881057268722467,
|
|
"grad_norm": 91.85087585449219,
|
|
"learning_rate": 3.6231884057971014e-08,
|
|
"logits/chosen": -0.5226503610610962,
|
|
"logits/rejected": -0.48189258575439453,
|
|
"logps/chosen": -63.01681137084961,
|
|
"logps/ref_chosen": -63.007484436035156,
|
|
"logps/ref_rejected": -92.64534759521484,
|
|
"logps/rejected": -92.65907287597656,
|
|
"loss": 1.3862,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.499889999628067,
|
|
"margin_dpo/beta_margin_grad_std": 0.009657730348408222,
|
|
"margin_dpo/beta_margin_mean": 0.00043985259253531694,
|
|
"margin_dpo/beta_margin_std": 0.03865039348602295,
|
|
"margin_dpo/loss_margin_mean": 0.004398524761199951,
|
|
"margin_dpo/margin_mean": 0.0043981969356536865,
|
|
"margin_dpo/margin_std": 0.37970417737960815,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.010279001468428781,
|
|
"grad_norm": 82.43697357177734,
|
|
"learning_rate": 4.347826086956521e-08,
|
|
"logits/chosen": -0.5088996887207031,
|
|
"logits/rejected": -0.4749848246574402,
|
|
"logps/chosen": -57.743560791015625,
|
|
"logps/ref_chosen": -57.774818420410156,
|
|
"logps/ref_rejected": -103.92059326171875,
|
|
"logps/rejected": -103.90592193603516,
|
|
"loss": 1.3851,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.4995860159397125,
|
|
"margin_dpo/beta_margin_grad_std": 0.01023741252720356,
|
|
"margin_dpo/beta_margin_mean": 0.0016585501143708825,
|
|
"margin_dpo/beta_margin_std": 0.04097241163253784,
|
|
"margin_dpo/loss_margin_mean": 0.016585499048233032,
|
|
"margin_dpo/margin_mean": 0.01658591628074646,
|
|
"margin_dpo/margin_std": 0.4064858555793762,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.011747430249632892,
|
|
"grad_norm": 79.04316711425781,
|
|
"learning_rate": 5.0724637681159424e-08,
|
|
"logits/chosen": -0.5012874007225037,
|
|
"logits/rejected": -0.4746849238872528,
|
|
"logps/chosen": -58.70497512817383,
|
|
"logps/ref_chosen": -58.716033935546875,
|
|
"logps/ref_rejected": -79.3114242553711,
|
|
"logps/rejected": -79.27145385742188,
|
|
"loss": 1.3896,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.5007215142250061,
|
|
"margin_dpo/beta_margin_grad_std": 0.009568445384502411,
|
|
"margin_dpo/beta_margin_mean": -0.0028907686937600374,
|
|
"margin_dpo/beta_margin_std": 0.038289591670036316,
|
|
"margin_dpo/loss_margin_mean": -0.028907686471939087,
|
|
"margin_dpo/margin_mean": -0.028907448053359985,
|
|
"margin_dpo/margin_std": 0.37828418612480164,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.013215859030837005,
|
|
"grad_norm": 85.21879577636719,
|
|
"learning_rate": 5.797101449275362e-08,
|
|
"logits/chosen": -0.4914604127407074,
|
|
"logits/rejected": -0.44458478689193726,
|
|
"logps/chosen": -69.87384033203125,
|
|
"logps/ref_chosen": -69.8668441772461,
|
|
"logps/ref_rejected": -99.6026611328125,
|
|
"logps/rejected": -99.62161254882812,
|
|
"loss": 1.3856,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.49970191717147827,
|
|
"margin_dpo/beta_margin_grad_std": 0.010726687498390675,
|
|
"margin_dpo/beta_margin_mean": 0.0011951536871492863,
|
|
"margin_dpo/beta_margin_std": 0.04292509704828262,
|
|
"margin_dpo/loss_margin_mean": 0.011951535940170288,
|
|
"margin_dpo/margin_mean": 0.011951416730880737,
|
|
"margin_dpo/margin_std": 0.4246274530887604,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.014684287812041116,
|
|
"grad_norm": 70.79057312011719,
|
|
"learning_rate": 6.521739130434782e-08,
|
|
"logits/chosen": -0.5021112561225891,
|
|
"logits/rejected": -0.45928800106048584,
|
|
"logps/chosen": -48.30955505371094,
|
|
"logps/ref_chosen": -48.35768508911133,
|
|
"logps/ref_rejected": -80.37206268310547,
|
|
"logps/rejected": -80.38316345214844,
|
|
"loss": 1.3808,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.4985186755657196,
|
|
"margin_dpo/beta_margin_grad_std": 0.010679498314857483,
|
|
"margin_dpo/beta_margin_mean": 0.005922754295170307,
|
|
"margin_dpo/beta_margin_std": 0.04276762157678604,
|
|
"margin_dpo/loss_margin_mean": 0.05922754108905792,
|
|
"margin_dpo/margin_mean": 0.05922728776931763,
|
|
"margin_dpo/margin_std": 0.425285279750824,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.016152716593245228,
|
|
"grad_norm": 68.34065246582031,
|
|
"learning_rate": 7.246376811594203e-08,
|
|
"logits/chosen": -0.46157172322273254,
|
|
"logits/rejected": -0.4366176128387451,
|
|
"logps/chosen": -52.98234558105469,
|
|
"logps/ref_chosen": -53.01685333251953,
|
|
"logps/ref_rejected": -87.78038024902344,
|
|
"logps/rejected": -87.7928466796875,
|
|
"loss": 1.382,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.4988263249397278,
|
|
"margin_dpo/beta_margin_grad_std": 0.009599917568266392,
|
|
"margin_dpo/beta_margin_mean": 0.004697933793067932,
|
|
"margin_dpo/beta_margin_std": 0.03841574117541313,
|
|
"margin_dpo/loss_margin_mean": 0.04697933793067932,
|
|
"margin_dpo/margin_mean": 0.04697957634925842,
|
|
"margin_dpo/margin_std": 0.3766877055168152,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.01762114537444934,
|
|
"grad_norm": 90.25657653808594,
|
|
"learning_rate": 7.971014492753623e-08,
|
|
"logits/chosen": -0.5372684001922607,
|
|
"logits/rejected": -0.5010780096054077,
|
|
"logps/chosen": -61.82605743408203,
|
|
"logps/ref_chosen": -61.80543518066406,
|
|
"logps/ref_rejected": -104.85826873779297,
|
|
"logps/rejected": -104.91586303710938,
|
|
"loss": 1.383,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.49907633662223816,
|
|
"margin_dpo/beta_margin_grad_std": 0.009649958461523056,
|
|
"margin_dpo/beta_margin_mean": 0.003697256790474057,
|
|
"margin_dpo/beta_margin_std": 0.03862835466861725,
|
|
"margin_dpo/loss_margin_mean": 0.036972567439079285,
|
|
"margin_dpo/margin_mean": 0.03697209060192108,
|
|
"margin_dpo/margin_std": 0.3801400065422058,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.01908957415565345,
|
|
"grad_norm": 79.32652282714844,
|
|
"learning_rate": 8.695652173913042e-08,
|
|
"logits/chosen": -0.4902585744857788,
|
|
"logits/rejected": -0.46292757987976074,
|
|
"logps/chosen": -64.28887176513672,
|
|
"logps/ref_chosen": -64.26036071777344,
|
|
"logps/ref_rejected": -87.20307922363281,
|
|
"logps/rejected": -87.23356628417969,
|
|
"loss": 1.3865,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.49995219707489014,
|
|
"margin_dpo/beta_margin_grad_std": 0.010526234284043312,
|
|
"margin_dpo/beta_margin_mean": 0.00019729437190108,
|
|
"margin_dpo/beta_margin_std": 0.04214153066277504,
|
|
"margin_dpo/loss_margin_mean": 0.0019729435443878174,
|
|
"margin_dpo/margin_mean": 0.0019735991954803467,
|
|
"margin_dpo/margin_std": 0.4049326777458191,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.020558002936857563,
|
|
"grad_norm": 85.4604263305664,
|
|
"learning_rate": 9.420289855072464e-08,
|
|
"logits/chosen": -0.489965558052063,
|
|
"logits/rejected": -0.4511108696460724,
|
|
"logps/chosen": -58.152305603027344,
|
|
"logps/ref_chosen": -58.11021423339844,
|
|
"logps/ref_rejected": -104.04708099365234,
|
|
"logps/rejected": -104.09505462646484,
|
|
"loss": 1.3863,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.4998512864112854,
|
|
"margin_dpo/beta_margin_grad_std": 0.011847623623907566,
|
|
"margin_dpo/beta_margin_mean": 0.000588723982218653,
|
|
"margin_dpo/beta_margin_std": 0.047432418912649155,
|
|
"margin_dpo/loss_margin_mean": 0.005887240171432495,
|
|
"margin_dpo/margin_mean": 0.005887240171432495,
|
|
"margin_dpo/margin_std": 0.47125041484832764,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.022026431718061675,
|
|
"grad_norm": 64.13221740722656,
|
|
"learning_rate": 1.0144927536231885e-07,
|
|
"logits/chosen": -0.46068376302719116,
|
|
"logits/rejected": -0.44027313590049744,
|
|
"logps/chosen": -56.97354507446289,
|
|
"logps/ref_chosen": -56.96691131591797,
|
|
"logps/ref_rejected": -80.80863952636719,
|
|
"logps/rejected": -80.85784912109375,
|
|
"loss": 1.3824,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.49893510341644287,
|
|
"margin_dpo/beta_margin_grad_std": 0.009985481388866901,
|
|
"margin_dpo/beta_margin_mean": 0.0042571574449539185,
|
|
"margin_dpo/beta_margin_std": 0.03996788337826729,
|
|
"margin_dpo/loss_margin_mean": 0.042571574449539185,
|
|
"margin_dpo/margin_mean": 0.042571812868118286,
|
|
"margin_dpo/margin_std": 0.39672398567199707,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.023494860499265784,
|
|
"grad_norm": 84.14559173583984,
|
|
"learning_rate": 1.0869565217391303e-07,
|
|
"logits/chosen": -0.52532559633255,
|
|
"logits/rejected": -0.4843023419380188,
|
|
"logps/chosen": -61.73296356201172,
|
|
"logps/ref_chosen": -61.739891052246094,
|
|
"logps/ref_rejected": -84.36947631835938,
|
|
"logps/rejected": -84.38020324707031,
|
|
"loss": 1.3848,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.49955853819847107,
|
|
"margin_dpo/beta_margin_grad_std": 0.008663349784910679,
|
|
"margin_dpo/beta_margin_mean": 0.001766052795574069,
|
|
"margin_dpo/beta_margin_std": 0.03466500714421272,
|
|
"margin_dpo/loss_margin_mean": 0.017660528421401978,
|
|
"margin_dpo/margin_mean": 0.01766011118888855,
|
|
"margin_dpo/margin_std": 0.3431432843208313,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.024963289280469897,
|
|
"grad_norm": 78.68696594238281,
|
|
"learning_rate": 1.1594202898550725e-07,
|
|
"logits/chosen": -0.5094451308250427,
|
|
"logits/rejected": -0.4733882546424866,
|
|
"logps/chosen": -67.70388793945312,
|
|
"logps/ref_chosen": -67.71033477783203,
|
|
"logps/ref_rejected": -85.37865447998047,
|
|
"logps/rejected": -85.42217254638672,
|
|
"loss": 1.3816,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.49875107407569885,
|
|
"margin_dpo/beta_margin_grad_std": 0.008506165817379951,
|
|
"margin_dpo/beta_margin_mean": 0.0049957516603171825,
|
|
"margin_dpo/beta_margin_std": 0.034035272896289825,
|
|
"margin_dpo/loss_margin_mean": 0.0499575138092041,
|
|
"margin_dpo/margin_mean": 0.04995712637901306,
|
|
"margin_dpo/margin_std": 0.3325832486152649,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.02643171806167401,
|
|
"grad_norm": 81.91975402832031,
|
|
"learning_rate": 1.2318840579710146e-07,
|
|
"logits/chosen": -0.4996645152568817,
|
|
"logits/rejected": -0.4448869228363037,
|
|
"logps/chosen": -47.723114013671875,
|
|
"logps/ref_chosen": -47.7394905090332,
|
|
"logps/ref_rejected": -75.4722900390625,
|
|
"logps/rejected": -75.5279541015625,
|
|
"loss": 1.3794,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.49819934368133545,
|
|
"margin_dpo/beta_margin_grad_std": 0.008676947094500065,
|
|
"margin_dpo/beta_margin_mean": 0.007204136345535517,
|
|
"margin_dpo/beta_margin_std": 0.03471643477678299,
|
|
"margin_dpo/loss_margin_mean": 0.07204136252403259,
|
|
"margin_dpo/margin_mean": 0.0720413327217102,
|
|
"margin_dpo/margin_std": 0.3442285656929016,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.027900146842878122,
|
|
"grad_norm": 73.45258331298828,
|
|
"learning_rate": 1.3043478260869563e-07,
|
|
"logits/chosen": -0.5062457323074341,
|
|
"logits/rejected": -0.45754408836364746,
|
|
"logps/chosen": -70.22134399414062,
|
|
"logps/ref_chosen": -70.20535278320312,
|
|
"logps/ref_rejected": -89.75758361816406,
|
|
"logps/rejected": -89.80667114257812,
|
|
"loss": 1.3833,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.49917319416999817,
|
|
"margin_dpo/beta_margin_grad_std": 0.009437629953026772,
|
|
"margin_dpo/beta_margin_mean": 0.003309211228042841,
|
|
"margin_dpo/beta_margin_std": 0.03776707127690315,
|
|
"margin_dpo/loss_margin_mean": 0.033092111349105835,
|
|
"margin_dpo/margin_mean": 0.03309273719787598,
|
|
"margin_dpo/margin_std": 0.3704480528831482,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.02936857562408223,
|
|
"grad_norm": 73.92622375488281,
|
|
"learning_rate": 1.3768115942028986e-07,
|
|
"logits/chosen": -0.5687921643257141,
|
|
"logits/rejected": -0.5141441226005554,
|
|
"logps/chosen": -50.828826904296875,
|
|
"logps/ref_chosen": -50.80324172973633,
|
|
"logps/ref_rejected": -78.8233413696289,
|
|
"logps/rejected": -78.88971710205078,
|
|
"loss": 1.3825,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.4989803433418274,
|
|
"margin_dpo/beta_margin_grad_std": 0.007609857711941004,
|
|
"margin_dpo/beta_margin_mean": 0.004078629892319441,
|
|
"margin_dpo/beta_margin_std": 0.03044736012816429,
|
|
"margin_dpo/loss_margin_mean": 0.040786296129226685,
|
|
"margin_dpo/margin_mean": 0.0407865047454834,
|
|
"margin_dpo/margin_std": 0.29486507177352905,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.030837004405286344,
|
|
"grad_norm": 77.78363037109375,
|
|
"learning_rate": 1.4492753623188405e-07,
|
|
"logits/chosen": -0.49086394906044006,
|
|
"logits/rejected": -0.4666551351547241,
|
|
"logps/chosen": -50.0500373840332,
|
|
"logps/ref_chosen": -50.063018798828125,
|
|
"logps/ref_rejected": -77.86878967285156,
|
|
"logps/rejected": -77.97210693359375,
|
|
"loss": 1.375,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.4970940947532654,
|
|
"margin_dpo/beta_margin_grad_std": 0.008713486604392529,
|
|
"margin_dpo/beta_margin_mean": 0.01162932813167572,
|
|
"margin_dpo/beta_margin_std": 0.03486839681863785,
|
|
"margin_dpo/loss_margin_mean": 0.1162932813167572,
|
|
"margin_dpo/margin_mean": 0.11629366874694824,
|
|
"margin_dpo/margin_std": 0.34371477365493774,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.032305433186490456,
|
|
"grad_norm": 84.3017349243164,
|
|
"learning_rate": 1.5217391304347825e-07,
|
|
"logits/chosen": -0.4743150472640991,
|
|
"logits/rejected": -0.4301157593727112,
|
|
"logps/chosen": -58.9935417175293,
|
|
"logps/ref_chosen": -59.05763626098633,
|
|
"logps/ref_rejected": -97.50466918945312,
|
|
"logps/rejected": -97.69529724121094,
|
|
"loss": 1.3615,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.4936363697052002,
|
|
"margin_dpo/beta_margin_grad_std": 0.01110980473458767,
|
|
"margin_dpo/beta_margin_mean": 0.025472251698374748,
|
|
"margin_dpo/beta_margin_std": 0.044476091861724854,
|
|
"margin_dpo/loss_margin_mean": 0.2547225058078766,
|
|
"margin_dpo/margin_mean": 0.2547217905521393,
|
|
"margin_dpo/margin_std": 0.4430729150772095,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.033773861967694566,
|
|
"grad_norm": 80.28763580322266,
|
|
"learning_rate": 1.5942028985507245e-07,
|
|
"logits/chosen": -0.4873223900794983,
|
|
"logits/rejected": -0.4646031856536865,
|
|
"logps/chosen": -60.04255676269531,
|
|
"logps/ref_chosen": -60.07769775390625,
|
|
"logps/ref_rejected": -81.1395492553711,
|
|
"logps/rejected": -81.33428955078125,
|
|
"loss": 1.364,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.49425840377807617,
|
|
"margin_dpo/beta_margin_grad_std": 0.011434967629611492,
|
|
"margin_dpo/beta_margin_mean": 0.022987453266978264,
|
|
"margin_dpo/beta_margin_std": 0.04579947143793106,
|
|
"margin_dpo/loss_margin_mean": 0.22987452149391174,
|
|
"margin_dpo/margin_mean": 0.22987452149391174,
|
|
"margin_dpo/margin_std": 0.4392421543598175,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.03524229074889868,
|
|
"grad_norm": 80.72453308105469,
|
|
"learning_rate": 1.6666666666666665e-07,
|
|
"logits/chosen": -0.479617714881897,
|
|
"logits/rejected": -0.46357664465904236,
|
|
"logps/chosen": -44.27165985107422,
|
|
"logps/ref_chosen": -44.29103469848633,
|
|
"logps/ref_rejected": -99.12521362304688,
|
|
"logps/rejected": -99.34617614746094,
|
|
"loss": 1.3629,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.49399420619010925,
|
|
"margin_dpo/beta_margin_grad_std": 0.01102045550942421,
|
|
"margin_dpo/beta_margin_mean": 0.02403390221297741,
|
|
"margin_dpo/beta_margin_std": 0.04411429166793823,
|
|
"margin_dpo/loss_margin_mean": 0.2403390109539032,
|
|
"margin_dpo/margin_mean": 0.24034002423286438,
|
|
"margin_dpo/margin_std": 0.42840874195098877,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.03671071953010279,
|
|
"grad_norm": 73.97421264648438,
|
|
"learning_rate": 1.7391304347826085e-07,
|
|
"logits/chosen": -0.49460622668266296,
|
|
"logits/rejected": -0.4645787179470062,
|
|
"logps/chosen": -52.51414489746094,
|
|
"logps/ref_chosen": -52.537052154541016,
|
|
"logps/ref_rejected": -89.34219360351562,
|
|
"logps/rejected": -89.54405975341797,
|
|
"loss": 1.3645,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.4943839907646179,
|
|
"margin_dpo/beta_margin_grad_std": 0.011376350186765194,
|
|
"margin_dpo/beta_margin_mean": 0.02247805707156658,
|
|
"margin_dpo/beta_margin_std": 0.045543402433395386,
|
|
"margin_dpo/loss_margin_mean": 0.22478055953979492,
|
|
"margin_dpo/margin_mean": 0.22478067874908447,
|
|
"margin_dpo/margin_std": 0.4543741047382355,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.0381791483113069,
|
|
"grad_norm": 87.36368560791016,
|
|
"learning_rate": 1.8115942028985507e-07,
|
|
"logits/chosen": -0.5448323488235474,
|
|
"logits/rejected": -0.5133931636810303,
|
|
"logps/chosen": -53.813804626464844,
|
|
"logps/ref_chosen": -53.92280578613281,
|
|
"logps/ref_rejected": -103.35971069335938,
|
|
"logps/rejected": -103.66832733154297,
|
|
"loss": 1.3457,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.48957008123397827,
|
|
"margin_dpo/beta_margin_grad_std": 0.013178465887904167,
|
|
"margin_dpo/beta_margin_mean": 0.04176199808716774,
|
|
"margin_dpo/beta_margin_std": 0.05279136076569557,
|
|
"margin_dpo/loss_margin_mean": 0.4176199734210968,
|
|
"margin_dpo/margin_mean": 0.41762077808380127,
|
|
"margin_dpo/margin_std": 0.5226191282272339,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.039647577092511016,
|
|
"grad_norm": 94.08861541748047,
|
|
"learning_rate": 1.8840579710144927e-07,
|
|
"logits/chosen": -0.5202087163925171,
|
|
"logits/rejected": -0.4837333858013153,
|
|
"logps/chosen": -42.766082763671875,
|
|
"logps/ref_chosen": -42.898529052734375,
|
|
"logps/ref_rejected": -98.72420501708984,
|
|
"logps/rejected": -99.09607696533203,
|
|
"loss": 1.3374,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.4874098598957062,
|
|
"margin_dpo/beta_margin_grad_std": 0.014595179818570614,
|
|
"margin_dpo/beta_margin_mean": 0.05043218284845352,
|
|
"margin_dpo/beta_margin_std": 0.05854206159710884,
|
|
"margin_dpo/loss_margin_mean": 0.504321813583374,
|
|
"margin_dpo/margin_mean": 0.5043210983276367,
|
|
"margin_dpo/margin_std": 0.5811291933059692,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.041116005873715125,
|
|
"grad_norm": 75.05455780029297,
|
|
"learning_rate": 1.9565217391304347e-07,
|
|
"logits/chosen": -0.5194311141967773,
|
|
"logits/rejected": -0.46526244282722473,
|
|
"logps/chosen": -60.553565979003906,
|
|
"logps/ref_chosen": -60.55650329589844,
|
|
"logps/ref_rejected": -91.40111541748047,
|
|
"logps/rejected": -91.7254409790039,
|
|
"loss": 1.3547,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.4918249249458313,
|
|
"margin_dpo/beta_margin_grad_std": 0.015058773569762707,
|
|
"margin_dpo/beta_margin_mean": 0.03272556886076927,
|
|
"margin_dpo/beta_margin_std": 0.06033402308821678,
|
|
"margin_dpo/loss_margin_mean": 0.3272556662559509,
|
|
"margin_dpo/margin_mean": 0.3272559344768524,
|
|
"margin_dpo/margin_std": 0.5973866581916809,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.042584434654919234,
|
|
"grad_norm": 90.46174621582031,
|
|
"learning_rate": 2.028985507246377e-07,
|
|
"logits/chosen": -0.5414900779724121,
|
|
"logits/rejected": -0.49426716566085815,
|
|
"logps/chosen": -57.68913269042969,
|
|
"logps/ref_chosen": -57.80778503417969,
|
|
"logps/ref_rejected": -97.39434814453125,
|
|
"logps/rejected": -97.86851501464844,
|
|
"loss": 1.3289,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.4852002263069153,
|
|
"margin_dpo/beta_margin_grad_std": 0.015466224402189255,
|
|
"margin_dpo/beta_margin_mean": 0.05928221344947815,
|
|
"margin_dpo/beta_margin_std": 0.062019772827625275,
|
|
"margin_dpo/loss_margin_mean": 0.5928221344947815,
|
|
"margin_dpo/margin_mean": 0.5928229689598083,
|
|
"margin_dpo/margin_std": 0.6189556121826172,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.04405286343612335,
|
|
"grad_norm": 87.33443450927734,
|
|
"learning_rate": 2.1014492753623187e-07,
|
|
"logits/chosen": -0.4894167184829712,
|
|
"logits/rejected": -0.45850175619125366,
|
|
"logps/chosen": -52.40911102294922,
|
|
"logps/ref_chosen": -52.57737350463867,
|
|
"logps/ref_rejected": -98.48921203613281,
|
|
"logps/rejected": -99.00884246826172,
|
|
"loss": 1.3197,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.48282885551452637,
|
|
"margin_dpo/beta_margin_grad_std": 0.01581035926938057,
|
|
"margin_dpo/beta_margin_mean": 0.06878980994224548,
|
|
"margin_dpo/beta_margin_std": 0.06341779977083206,
|
|
"margin_dpo/loss_margin_mean": 0.6878980398178101,
|
|
"margin_dpo/margin_mean": 0.6878979206085205,
|
|
"margin_dpo/margin_std": 0.62163245677948,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.04552129221732746,
|
|
"grad_norm": 67.94820404052734,
|
|
"learning_rate": 2.1739130434782607e-07,
|
|
"logits/chosen": -0.5108931064605713,
|
|
"logits/rejected": -0.4666990637779236,
|
|
"logps/chosen": -63.70445251464844,
|
|
"logps/ref_chosen": -63.806922912597656,
|
|
"logps/ref_rejected": -72.89400482177734,
|
|
"logps/rejected": -73.24160766601562,
|
|
"loss": 1.3429,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.48876938223838806,
|
|
"margin_dpo/beta_margin_grad_std": 0.0166544821113348,
|
|
"margin_dpo/beta_margin_mean": 0.04500679671764374,
|
|
"margin_dpo/beta_margin_std": 0.06682661920785904,
|
|
"margin_dpo/loss_margin_mean": 0.450067937374115,
|
|
"margin_dpo/margin_mean": 0.4500678479671478,
|
|
"margin_dpo/margin_std": 0.6665528416633606,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.04698972099853157,
|
|
"grad_norm": 82.90047454833984,
|
|
"learning_rate": 2.2463768115942027e-07,
|
|
"logits/chosen": -0.49858012795448303,
|
|
"logits/rejected": -0.45628952980041504,
|
|
"logps/chosen": -62.53711700439453,
|
|
"logps/ref_chosen": -62.739524841308594,
|
|
"logps/ref_rejected": -89.3175048828125,
|
|
"logps/rejected": -89.8597640991211,
|
|
"loss": 1.3154,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.48145589232444763,
|
|
"margin_dpo/beta_margin_grad_std": 0.023477083072066307,
|
|
"margin_dpo/beta_margin_mean": 0.07446718961000443,
|
|
"margin_dpo/beta_margin_std": 0.09461291879415512,
|
|
"margin_dpo/loss_margin_mean": 0.7446719408035278,
|
|
"margin_dpo/margin_mean": 0.7446720600128174,
|
|
"margin_dpo/margin_std": 0.9450139999389648,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.048458149779735685,
|
|
"grad_norm": 72.11341857910156,
|
|
"learning_rate": 2.318840579710145e-07,
|
|
"logits/chosen": -0.47633564472198486,
|
|
"logits/rejected": -0.4497436285018921,
|
|
"logps/chosen": -53.105873107910156,
|
|
"logps/ref_chosen": -53.26097106933594,
|
|
"logps/ref_rejected": -87.8851318359375,
|
|
"logps/rejected": -88.37184143066406,
|
|
"loss": 1.3243,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.48398149013519287,
|
|
"margin_dpo/beta_margin_grad_std": 0.016650153324007988,
|
|
"margin_dpo/beta_margin_mean": 0.06417950242757797,
|
|
"margin_dpo/beta_margin_std": 0.06679090112447739,
|
|
"margin_dpo/loss_margin_mean": 0.6417950391769409,
|
|
"margin_dpo/margin_mean": 0.6417955160140991,
|
|
"margin_dpo/margin_std": 0.6490182876586914,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.049926578560939794,
|
|
"grad_norm": 77.38883209228516,
|
|
"learning_rate": 2.391304347826087e-07,
|
|
"logits/chosen": -0.5127777457237244,
|
|
"logits/rejected": -0.49532148241996765,
|
|
"logps/chosen": -50.72978210449219,
|
|
"logps/ref_chosen": -50.81732940673828,
|
|
"logps/ref_rejected": -101.92184448242188,
|
|
"logps/rejected": -102.66510009765625,
|
|
"loss": 1.3068,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.4792894721031189,
|
|
"margin_dpo/beta_margin_grad_std": 0.021500185132026672,
|
|
"margin_dpo/beta_margin_mean": 0.0830799788236618,
|
|
"margin_dpo/beta_margin_std": 0.08640186488628387,
|
|
"margin_dpo/loss_margin_mean": 0.8307997584342957,
|
|
"margin_dpo/margin_mean": 0.8307995796203613,
|
|
"margin_dpo/margin_std": 0.8540636301040649,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.0513950073421439,
|
|
"grad_norm": 82.41116333007812,
|
|
"learning_rate": 2.463768115942029e-07,
|
|
"logits/chosen": -0.5374979972839355,
|
|
"logits/rejected": -0.5004309415817261,
|
|
"logps/chosen": -50.88545227050781,
|
|
"logps/ref_chosen": -51.02449035644531,
|
|
"logps/ref_rejected": -106.82443237304688,
|
|
"logps/rejected": -107.90895080566406,
|
|
"loss": 1.2708,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.4696078896522522,
|
|
"margin_dpo/beta_margin_grad_std": 0.02748698741197586,
|
|
"margin_dpo/beta_margin_mean": 0.12235570698976517,
|
|
"margin_dpo/beta_margin_std": 0.11256185173988342,
|
|
"margin_dpo/loss_margin_mean": 1.2235571146011353,
|
|
"margin_dpo/margin_mean": 1.2235569953918457,
|
|
"margin_dpo/margin_std": 1.111976146697998,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.05286343612334802,
|
|
"grad_norm": 72.79762268066406,
|
|
"learning_rate": 2.536231884057971e-07,
|
|
"logits/chosen": -0.5538948774337769,
|
|
"logits/rejected": -0.517404317855835,
|
|
"logps/chosen": -51.94648742675781,
|
|
"logps/ref_chosen": -51.991493225097656,
|
|
"logps/ref_rejected": -86.04061889648438,
|
|
"logps/rejected": -87.11822509765625,
|
|
"loss": 1.2813,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.47209563851356506,
|
|
"margin_dpo/beta_margin_grad_std": 0.03178109601140022,
|
|
"margin_dpo/beta_margin_mean": 0.11225982010364532,
|
|
"margin_dpo/beta_margin_std": 0.12831299006938934,
|
|
"margin_dpo/loss_margin_mean": 1.1225981712341309,
|
|
"margin_dpo/margin_mean": 1.122597098350525,
|
|
"margin_dpo/margin_std": 1.2439404726028442,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.05433186490455213,
|
|
"grad_norm": 61.13553237915039,
|
|
"learning_rate": 2.6086956521739126e-07,
|
|
"logits/chosen": -0.5280976295471191,
|
|
"logits/rejected": -0.4858455955982208,
|
|
"logps/chosen": -62.78415298461914,
|
|
"logps/ref_chosen": -62.807106018066406,
|
|
"logps/ref_rejected": -77.89507293701172,
|
|
"logps/rejected": -78.90142059326172,
|
|
"loss": 1.2911,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.47450384497642517,
|
|
"margin_dpo/beta_margin_grad_std": 0.03514566645026207,
|
|
"margin_dpo/beta_margin_mean": 0.10293034464120865,
|
|
"margin_dpo/beta_margin_std": 0.14328184723854065,
|
|
"margin_dpo/loss_margin_mean": 1.0293034315109253,
|
|
"margin_dpo/margin_mean": 1.0293034315109253,
|
|
"margin_dpo/margin_std": 1.3807631731033325,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.055800293685756244,
|
|
"grad_norm": 70.00904083251953,
|
|
"learning_rate": 2.681159420289855e-07,
|
|
"logits/chosen": -0.5190426111221313,
|
|
"logits/rejected": -0.4862367510795593,
|
|
"logps/chosen": -48.24530792236328,
|
|
"logps/ref_chosen": -48.39051818847656,
|
|
"logps/ref_rejected": -97.91244506835938,
|
|
"logps/rejected": -99.11785888671875,
|
|
"loss": 1.262,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.4666314721107483,
|
|
"margin_dpo/beta_margin_grad_std": 0.03878392279148102,
|
|
"margin_dpo/beta_margin_mean": 0.13506263494491577,
|
|
"margin_dpo/beta_margin_std": 0.15932665765285492,
|
|
"margin_dpo/loss_margin_mean": 1.3506262302398682,
|
|
"margin_dpo/margin_mean": 1.3506265878677368,
|
|
"margin_dpo/margin_std": 1.575331449508667,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.05726872246696035,
|
|
"grad_norm": 74.47781372070312,
|
|
"learning_rate": 2.753623188405797e-07,
|
|
"logits/chosen": -0.5537021160125732,
|
|
"logits/rejected": -0.5135682821273804,
|
|
"logps/chosen": -50.65707015991211,
|
|
"logps/ref_chosen": -50.75046920776367,
|
|
"logps/ref_rejected": -78.56951141357422,
|
|
"logps/rejected": -80.16737365722656,
|
|
"loss": 1.2298,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.45806559920310974,
|
|
"margin_dpo/beta_margin_grad_std": 0.036758922040462494,
|
|
"margin_dpo/beta_margin_mean": 0.16912682354450226,
|
|
"margin_dpo/beta_margin_std": 0.14913904666900635,
|
|
"margin_dpo/loss_margin_mean": 1.6912682056427002,
|
|
"margin_dpo/margin_mean": 1.6912682056427002,
|
|
"margin_dpo/margin_std": 1.4713746309280396,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.05873715124816446,
|
|
"grad_norm": 59.9489631652832,
|
|
"learning_rate": 2.8260869565217386e-07,
|
|
"logits/chosen": -0.5245569348335266,
|
|
"logits/rejected": -0.4949991703033447,
|
|
"logps/chosen": -57.77392578125,
|
|
"logps/ref_chosen": -57.985069274902344,
|
|
"logps/ref_rejected": -74.30007934570312,
|
|
"logps/rejected": -75.65821075439453,
|
|
"loss": 1.243,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.46128079295158386,
|
|
"margin_dpo/beta_margin_grad_std": 0.04237818345427513,
|
|
"margin_dpo/beta_margin_mean": 0.1569286286830902,
|
|
"margin_dpo/beta_margin_std": 0.1742551028728485,
|
|
"margin_dpo/loss_margin_mean": 1.5692862272262573,
|
|
"margin_dpo/margin_mean": 1.5692870616912842,
|
|
"margin_dpo/margin_std": 1.697884202003479,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.06020558002936858,
|
|
"grad_norm": 67.88613891601562,
|
|
"learning_rate": 2.898550724637681e-07,
|
|
"logits/chosen": -0.5592871308326721,
|
|
"logits/rejected": -0.5240367650985718,
|
|
"logps/chosen": -62.67747497558594,
|
|
"logps/ref_chosen": -62.69581604003906,
|
|
"logps/ref_rejected": -97.02352905273438,
|
|
"logps/rejected": -98.87300109863281,
|
|
"loss": 1.2195,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.4541543424129486,
|
|
"margin_dpo/beta_margin_grad_std": 0.05179302766919136,
|
|
"margin_dpo/beta_margin_mean": 0.18678142130374908,
|
|
"margin_dpo/beta_margin_std": 0.21468721330165863,
|
|
"margin_dpo/loss_margin_mean": 1.8678141832351685,
|
|
"margin_dpo/margin_mean": 1.867814540863037,
|
|
"margin_dpo/margin_std": 2.0870983600616455,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.06167400881057269,
|
|
"grad_norm": 78.81612396240234,
|
|
"learning_rate": 2.971014492753623e-07,
|
|
"logits/chosen": -0.5433309674263,
|
|
"logits/rejected": -0.49680295586586,
|
|
"logps/chosen": -58.707366943359375,
|
|
"logps/ref_chosen": -58.96642303466797,
|
|
"logps/ref_rejected": -109.90837097167969,
|
|
"logps/rejected": -112.25081634521484,
|
|
"loss": 1.1578,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.4366336166858673,
|
|
"margin_dpo/beta_margin_grad_std": 0.058427974581718445,
|
|
"margin_dpo/beta_margin_mean": 0.2601499557495117,
|
|
"margin_dpo/beta_margin_std": 0.24821382761001587,
|
|
"margin_dpo/loss_margin_mean": 2.601499557495117,
|
|
"margin_dpo/margin_mean": 2.601499319076538,
|
|
"margin_dpo/margin_std": 2.445554733276367,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.0631424375917768,
|
|
"grad_norm": 72.23222351074219,
|
|
"learning_rate": 3.043478260869565e-07,
|
|
"logits/chosen": -0.5568352341651917,
|
|
"logits/rejected": -0.532639741897583,
|
|
"logps/chosen": -53.65935516357422,
|
|
"logps/ref_chosen": -54.15599822998047,
|
|
"logps/ref_rejected": -96.48019409179688,
|
|
"logps/rejected": -98.41513061523438,
|
|
"loss": 1.1675,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.44025009870529175,
|
|
"margin_dpo/beta_margin_grad_std": 0.04758695140480995,
|
|
"margin_dpo/beta_margin_mean": 0.24315857887268066,
|
|
"margin_dpo/beta_margin_std": 0.19878432154655457,
|
|
"margin_dpo/loss_margin_mean": 2.4315857887268066,
|
|
"margin_dpo/margin_mean": 2.4315857887268066,
|
|
"margin_dpo/margin_std": 1.964142918586731,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.06461086637298091,
|
|
"grad_norm": 78.49581909179688,
|
|
"learning_rate": 3.115942028985507e-07,
|
|
"logits/chosen": -0.458575576543808,
|
|
"logits/rejected": -0.43896228075027466,
|
|
"logps/chosen": -49.86518859863281,
|
|
"logps/ref_chosen": -50.07849884033203,
|
|
"logps/ref_rejected": -108.78376007080078,
|
|
"logps/rejected": -111.42298889160156,
|
|
"loss": 1.1338,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.43024110794067383,
|
|
"margin_dpo/beta_margin_grad_std": 0.0542747788131237,
|
|
"margin_dpo/beta_margin_mean": 0.2852535545825958,
|
|
"margin_dpo/beta_margin_std": 0.2277490794658661,
|
|
"margin_dpo/loss_margin_mean": 2.8525354862213135,
|
|
"margin_dpo/margin_mean": 2.852534532546997,
|
|
"margin_dpo/margin_std": 2.270460605621338,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.06607929515418502,
|
|
"grad_norm": 62.053192138671875,
|
|
"learning_rate": 3.188405797101449e-07,
|
|
"logits/chosen": -0.4600446820259094,
|
|
"logits/rejected": -0.4469829797744751,
|
|
"logps/chosen": -48.24645233154297,
|
|
"logps/ref_chosen": -48.41493225097656,
|
|
"logps/ref_rejected": -77.93643188476562,
|
|
"logps/rejected": -80.1404037475586,
|
|
"loss": 1.1805,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.4424096643924713,
|
|
"margin_dpo/beta_margin_grad_std": 0.06356598436832428,
|
|
"margin_dpo/beta_margin_mean": 0.23724493384361267,
|
|
"margin_dpo/beta_margin_std": 0.2693977653980255,
|
|
"margin_dpo/loss_margin_mean": 2.3724491596221924,
|
|
"margin_dpo/margin_mean": 2.3724491596221924,
|
|
"margin_dpo/margin_std": 2.6500847339630127,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.06754772393538913,
|
|
"grad_norm": 69.27433013916016,
|
|
"learning_rate": 3.260869565217391e-07,
|
|
"logits/chosen": -0.5094949007034302,
|
|
"logits/rejected": -0.45755523443222046,
|
|
"logps/chosen": -55.80693435668945,
|
|
"logps/ref_chosen": -55.999427795410156,
|
|
"logps/ref_rejected": -95.652587890625,
|
|
"logps/rejected": -98.43904113769531,
|
|
"loss": 1.1354,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.42856818437576294,
|
|
"margin_dpo/beta_margin_grad_std": 0.07470017671585083,
|
|
"margin_dpo/beta_margin_mean": 0.2978942394256592,
|
|
"margin_dpo/beta_margin_std": 0.3255438506603241,
|
|
"margin_dpo/loss_margin_mean": 2.9789421558380127,
|
|
"margin_dpo/margin_mean": 2.9789419174194336,
|
|
"margin_dpo/margin_std": 3.244965076446533,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.06901615271659324,
|
|
"grad_norm": 65.2599868774414,
|
|
"learning_rate": 3.333333333333333e-07,
|
|
"logits/chosen": -0.5813416242599487,
|
|
"logits/rejected": -0.5291002988815308,
|
|
"logps/chosen": -57.496604919433594,
|
|
"logps/ref_chosen": -57.92607879638672,
|
|
"logps/ref_rejected": -94.67920684814453,
|
|
"logps/rejected": -97.23886108398438,
|
|
"loss": 1.1271,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.42738690972328186,
|
|
"margin_dpo/beta_margin_grad_std": 0.0637550950050354,
|
|
"margin_dpo/beta_margin_mean": 0.29891282320022583,
|
|
"margin_dpo/beta_margin_std": 0.26972696185112,
|
|
"margin_dpo/loss_margin_mean": 2.9891281127929688,
|
|
"margin_dpo/margin_mean": 2.989128351211548,
|
|
"margin_dpo/margin_std": 2.6342062950134277,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.07048458149779736,
|
|
"grad_norm": 73.67699432373047,
|
|
"learning_rate": 3.4057971014492755e-07,
|
|
"logits/chosen": -0.5998705625534058,
|
|
"logits/rejected": -0.5423353910446167,
|
|
"logps/chosen": -57.117156982421875,
|
|
"logps/ref_chosen": -57.188072204589844,
|
|
"logps/ref_rejected": -88.0166015625,
|
|
"logps/rejected": -91.08055877685547,
|
|
"loss": 1.1227,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.4244069755077362,
|
|
"margin_dpo/beta_margin_grad_std": 0.07627448439598083,
|
|
"margin_dpo/beta_margin_mean": 0.3134877383708954,
|
|
"margin_dpo/beta_margin_std": 0.32677435874938965,
|
|
"margin_dpo/loss_margin_mean": 3.1348772048950195,
|
|
"margin_dpo/margin_mean": 3.1348774433135986,
|
|
"margin_dpo/margin_std": 3.0109379291534424,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.07195301027900147,
|
|
"grad_norm": 61.355953216552734,
|
|
"learning_rate": 3.478260869565217e-07,
|
|
"logits/chosen": -0.5448025465011597,
|
|
"logits/rejected": -0.4857603907585144,
|
|
"logps/chosen": -61.36932373046875,
|
|
"logps/ref_chosen": -61.685264587402344,
|
|
"logps/ref_rejected": -83.76747131347656,
|
|
"logps/rejected": -87.26129913330078,
|
|
"loss": 1.0774,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.41020649671554565,
|
|
"margin_dpo/beta_margin_grad_std": 0.08793335407972336,
|
|
"margin_dpo/beta_margin_mean": 0.3809766173362732,
|
|
"margin_dpo/beta_margin_std": 0.3965732753276825,
|
|
"margin_dpo/loss_margin_mean": 3.8097660541534424,
|
|
"margin_dpo/margin_mean": 3.8097658157348633,
|
|
"margin_dpo/margin_std": 3.869323253631592,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.07342143906020558,
|
|
"grad_norm": 62.80997085571289,
|
|
"learning_rate": 3.5507246376811595e-07,
|
|
"logits/chosen": -0.5425491333007812,
|
|
"logits/rejected": -0.5065620541572571,
|
|
"logps/chosen": -58.89775848388672,
|
|
"logps/ref_chosen": -58.72413635253906,
|
|
"logps/ref_rejected": -96.35814666748047,
|
|
"logps/rejected": -100.69513702392578,
|
|
"loss": 1.0518,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.40193936228752136,
|
|
"margin_dpo/beta_margin_grad_std": 0.09261800348758698,
|
|
"margin_dpo/beta_margin_mean": 0.4163365066051483,
|
|
"margin_dpo/beta_margin_std": 0.4100196361541748,
|
|
"margin_dpo/loss_margin_mean": 4.163364887237549,
|
|
"margin_dpo/margin_mean": 4.163365364074707,
|
|
"margin_dpo/margin_std": 4.094795227050781,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.07488986784140969,
|
|
"grad_norm": 52.91781234741211,
|
|
"learning_rate": 3.6231884057971015e-07,
|
|
"logits/chosen": -0.5184497833251953,
|
|
"logits/rejected": -0.4852331280708313,
|
|
"logps/chosen": -61.69359588623047,
|
|
"logps/ref_chosen": -61.3736686706543,
|
|
"logps/ref_rejected": -76.00199890136719,
|
|
"logps/rejected": -80.33977508544922,
|
|
"loss": 1.085,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.40868327021598816,
|
|
"margin_dpo/beta_margin_grad_std": 0.1110108494758606,
|
|
"margin_dpo/beta_margin_mean": 0.4017845094203949,
|
|
"margin_dpo/beta_margin_std": 0.5204705595970154,
|
|
"margin_dpo/loss_margin_mean": 4.017845153808594,
|
|
"margin_dpo/margin_mean": 4.017845153808594,
|
|
"margin_dpo/margin_std": 5.1221513748168945,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.0763582966226138,
|
|
"grad_norm": 58.923404693603516,
|
|
"learning_rate": 3.695652173913043e-07,
|
|
"logits/chosen": -0.5524120330810547,
|
|
"logits/rejected": -0.496574342250824,
|
|
"logps/chosen": -51.979454040527344,
|
|
"logps/ref_chosen": -52.33735656738281,
|
|
"logps/ref_rejected": -79.97391510009766,
|
|
"logps/rejected": -85.81260681152344,
|
|
"loss": 0.9189,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.3595433533191681,
|
|
"margin_dpo/beta_margin_grad_std": 0.10714302211999893,
|
|
"margin_dpo/beta_margin_mean": 0.6196599006652832,
|
|
"margin_dpo/beta_margin_std": 0.5214123129844666,
|
|
"margin_dpo/loss_margin_mean": 6.196599006652832,
|
|
"margin_dpo/margin_mean": 6.196599006652832,
|
|
"margin_dpo/margin_std": 5.190753936767578,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.07782672540381791,
|
|
"grad_norm": 58.20880889892578,
|
|
"learning_rate": 3.7681159420289855e-07,
|
|
"logits/chosen": -0.6073682904243469,
|
|
"logits/rejected": -0.5856744050979614,
|
|
"logps/chosen": -53.506500244140625,
|
|
"logps/ref_chosen": -53.31465530395508,
|
|
"logps/ref_rejected": -91.7835922241211,
|
|
"logps/rejected": -98.30122375488281,
|
|
"loss": 0.9446,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.36480841040611267,
|
|
"margin_dpo/beta_margin_grad_std": 0.12540318071842194,
|
|
"margin_dpo/beta_margin_mean": 0.6325778961181641,
|
|
"margin_dpo/beta_margin_std": 0.6903671622276306,
|
|
"margin_dpo/loss_margin_mean": 6.325778961181641,
|
|
"margin_dpo/margin_mean": 6.325778484344482,
|
|
"margin_dpo/margin_std": 6.248142242431641,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.07929515418502203,
|
|
"grad_norm": 59.29412841796875,
|
|
"learning_rate": 3.8405797101449274e-07,
|
|
"logits/chosen": -0.633226752281189,
|
|
"logits/rejected": -0.5815136432647705,
|
|
"logps/chosen": -51.13933563232422,
|
|
"logps/ref_chosen": -50.68865966796875,
|
|
"logps/ref_rejected": -91.71539306640625,
|
|
"logps/rejected": -97.51422119140625,
|
|
"loss": 0.9783,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.3783862590789795,
|
|
"margin_dpo/beta_margin_grad_std": 0.10563214868307114,
|
|
"margin_dpo/beta_margin_mean": 0.5348156690597534,
|
|
"margin_dpo/beta_margin_std": 0.5101956725120544,
|
|
"margin_dpo/loss_margin_mean": 5.348156452178955,
|
|
"margin_dpo/margin_mean": 5.348155498504639,
|
|
"margin_dpo/margin_std": 5.086174488067627,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.08076358296622614,
|
|
"grad_norm": 53.738956451416016,
|
|
"learning_rate": 3.9130434782608694e-07,
|
|
"logits/chosen": -0.6361401081085205,
|
|
"logits/rejected": -0.5729630589485168,
|
|
"logps/chosen": -63.57060241699219,
|
|
"logps/ref_chosen": -62.615234375,
|
|
"logps/ref_rejected": -88.99349975585938,
|
|
"logps/rejected": -96.49041748046875,
|
|
"loss": 0.9548,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.361075222492218,
|
|
"margin_dpo/beta_margin_grad_std": 0.14729972183704376,
|
|
"margin_dpo/beta_margin_mean": 0.6541542410850525,
|
|
"margin_dpo/beta_margin_std": 0.7597689032554626,
|
|
"margin_dpo/loss_margin_mean": 6.541542053222656,
|
|
"margin_dpo/margin_mean": 6.541542053222656,
|
|
"margin_dpo/margin_std": 7.533283233642578,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.08223201174743025,
|
|
"grad_norm": 48.09397506713867,
|
|
"learning_rate": 3.9855072463768114e-07,
|
|
"logits/chosen": -0.5945051908493042,
|
|
"logits/rejected": -0.5514425039291382,
|
|
"logps/chosen": -58.66962432861328,
|
|
"logps/ref_chosen": -57.93273162841797,
|
|
"logps/ref_rejected": -94.1744384765625,
|
|
"logps/rejected": -101.10653686523438,
|
|
"loss": 0.9775,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.36780592799186707,
|
|
"margin_dpo/beta_margin_grad_std": 0.14850637316703796,
|
|
"margin_dpo/beta_margin_mean": 0.6195200085639954,
|
|
"margin_dpo/beta_margin_std": 0.7477858066558838,
|
|
"margin_dpo/loss_margin_mean": 6.195199966430664,
|
|
"margin_dpo/margin_mean": 6.195199012756348,
|
|
"margin_dpo/margin_std": 7.399816989898682,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.08370044052863436,
|
|
"grad_norm": 54.234169006347656,
|
|
"learning_rate": 4.057971014492754e-07,
|
|
"logits/chosen": -0.5641357898712158,
|
|
"logits/rejected": -0.5353480577468872,
|
|
"logps/chosen": -71.26276397705078,
|
|
"logps/ref_chosen": -70.49528503417969,
|
|
"logps/ref_rejected": -95.56546020507812,
|
|
"logps/rejected": -103.23522186279297,
|
|
"loss": 0.9078,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.34864187240600586,
|
|
"margin_dpo/beta_margin_grad_std": 0.13589681684970856,
|
|
"margin_dpo/beta_margin_mean": 0.6902284026145935,
|
|
"margin_dpo/beta_margin_std": 0.6726579070091248,
|
|
"margin_dpo/loss_margin_mean": 6.902284145355225,
|
|
"margin_dpo/margin_mean": 6.902284145355225,
|
|
"margin_dpo/margin_std": 6.639451026916504,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.08516886930983847,
|
|
"grad_norm": 59.243927001953125,
|
|
"learning_rate": 4.1304347826086954e-07,
|
|
"logits/chosen": -0.5894064903259277,
|
|
"logits/rejected": -0.5127171874046326,
|
|
"logps/chosen": -63.23316955566406,
|
|
"logps/ref_chosen": -62.13294219970703,
|
|
"logps/ref_rejected": -84.61729431152344,
|
|
"logps/rejected": -93.32413482666016,
|
|
"loss": 0.8977,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.3427189290523529,
|
|
"margin_dpo/beta_margin_grad_std": 0.15285082161426544,
|
|
"margin_dpo/beta_margin_mean": 0.7606607675552368,
|
|
"margin_dpo/beta_margin_std": 0.8165130615234375,
|
|
"margin_dpo/loss_margin_mean": 7.606607437133789,
|
|
"margin_dpo/margin_mean": 7.606607437133789,
|
|
"margin_dpo/margin_std": 8.09335708618164,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.08663729809104258,
|
|
"grad_norm": 55.42934799194336,
|
|
"learning_rate": 4.2028985507246374e-07,
|
|
"logits/chosen": -0.6423487663269043,
|
|
"logits/rejected": -0.6032625436782837,
|
|
"logps/chosen": -53.42650604248047,
|
|
"logps/ref_chosen": -51.932525634765625,
|
|
"logps/ref_rejected": -88.88520050048828,
|
|
"logps/rejected": -98.86468505859375,
|
|
"loss": 0.8575,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.32869505882263184,
|
|
"margin_dpo/beta_margin_grad_std": 0.15742561221122742,
|
|
"margin_dpo/beta_margin_mean": 0.8485509157180786,
|
|
"margin_dpo/beta_margin_std": 0.8816735148429871,
|
|
"margin_dpo/loss_margin_mean": 8.485508918762207,
|
|
"margin_dpo/margin_mean": 8.485508918762207,
|
|
"margin_dpo/margin_std": 8.604471206665039,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.0881057268722467,
|
|
"grad_norm": 64.29039764404297,
|
|
"learning_rate": 4.2753623188405794e-07,
|
|
"logits/chosen": -0.6296500563621521,
|
|
"logits/rejected": -0.5711052417755127,
|
|
"logps/chosen": -63.62670135498047,
|
|
"logps/ref_chosen": -60.94218444824219,
|
|
"logps/ref_rejected": -85.39340209960938,
|
|
"logps/rejected": -94.76435089111328,
|
|
"loss": 0.9555,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.3545895218849182,
|
|
"margin_dpo/beta_margin_grad_std": 0.15808549523353577,
|
|
"margin_dpo/beta_margin_mean": 0.6686438918113708,
|
|
"margin_dpo/beta_margin_std": 0.7756204009056091,
|
|
"margin_dpo/loss_margin_mean": 6.686439037322998,
|
|
"margin_dpo/margin_mean": 6.686439514160156,
|
|
"margin_dpo/margin_std": 7.678452968597412,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.08957415565345081,
|
|
"grad_norm": 54.964107513427734,
|
|
"learning_rate": 4.3478260869565214e-07,
|
|
"logits/chosen": -0.6372621655464172,
|
|
"logits/rejected": -0.6041065454483032,
|
|
"logps/chosen": -62.14350128173828,
|
|
"logps/ref_chosen": -60.633522033691406,
|
|
"logps/ref_rejected": -89.85249328613281,
|
|
"logps/rejected": -99.61428833007812,
|
|
"loss": 0.9341,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.34661665558815,
|
|
"margin_dpo/beta_margin_grad_std": 0.1781352162361145,
|
|
"margin_dpo/beta_margin_mean": 0.8251805901527405,
|
|
"margin_dpo/beta_margin_std": 1.1422574520111084,
|
|
"margin_dpo/loss_margin_mean": 8.251806259155273,
|
|
"margin_dpo/margin_mean": 8.251806259155273,
|
|
"margin_dpo/margin_std": 11.240764617919922,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.09104258443465492,
|
|
"grad_norm": 58.057708740234375,
|
|
"learning_rate": 4.420289855072464e-07,
|
|
"logits/chosen": -0.6090478897094727,
|
|
"logits/rejected": -0.5749986171722412,
|
|
"logps/chosen": -57.778465270996094,
|
|
"logps/ref_chosen": -56.15077209472656,
|
|
"logps/ref_rejected": -75.56619262695312,
|
|
"logps/rejected": -83.39352416992188,
|
|
"loss": 0.9993,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.37108179926872253,
|
|
"margin_dpo/beta_margin_grad_std": 0.15791070461273193,
|
|
"margin_dpo/beta_margin_mean": 0.6199632883071899,
|
|
"margin_dpo/beta_margin_std": 0.8312649130821228,
|
|
"margin_dpo/loss_margin_mean": 6.19963264465332,
|
|
"margin_dpo/margin_mean": 6.19963264465332,
|
|
"margin_dpo/margin_std": 8.127958297729492,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.09251101321585903,
|
|
"grad_norm": 56.769561767578125,
|
|
"learning_rate": 4.4927536231884053e-07,
|
|
"logits/chosen": -0.5860311388969421,
|
|
"logits/rejected": -0.5402973890304565,
|
|
"logps/chosen": -75.79495239257812,
|
|
"logps/ref_chosen": -73.14739227294922,
|
|
"logps/ref_rejected": -97.61006164550781,
|
|
"logps/rejected": -108.62382507324219,
|
|
"loss": 0.8773,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.3284067213535309,
|
|
"margin_dpo/beta_margin_grad_std": 0.16741114854812622,
|
|
"margin_dpo/beta_margin_mean": 0.8366211652755737,
|
|
"margin_dpo/beta_margin_std": 0.9040850400924683,
|
|
"margin_dpo/loss_margin_mean": 8.366211891174316,
|
|
"margin_dpo/margin_mean": 8.366212844848633,
|
|
"margin_dpo/margin_std": 8.857807159423828,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.09397944199706314,
|
|
"grad_norm": 52.091590881347656,
|
|
"learning_rate": 4.5652173913043473e-07,
|
|
"logits/chosen": -0.5791685581207275,
|
|
"logits/rejected": -0.5466402769088745,
|
|
"logps/chosen": -55.00431823730469,
|
|
"logps/ref_chosen": -53.99859619140625,
|
|
"logps/ref_rejected": -93.53020477294922,
|
|
"logps/rejected": -104.35765075683594,
|
|
"loss": 0.8493,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.3144356906414032,
|
|
"margin_dpo/beta_margin_grad_std": 0.1805381327867508,
|
|
"margin_dpo/beta_margin_mean": 0.9821729063987732,
|
|
"margin_dpo/beta_margin_std": 1.1361504793167114,
|
|
"margin_dpo/loss_margin_mean": 9.821728706359863,
|
|
"margin_dpo/margin_mean": 9.82172966003418,
|
|
"margin_dpo/margin_std": 11.043643951416016,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.09544787077826726,
|
|
"grad_norm": 54.09811782836914,
|
|
"learning_rate": 4.63768115942029e-07,
|
|
"logits/chosen": -0.6608457565307617,
|
|
"logits/rejected": -0.6478947401046753,
|
|
"logps/chosen": -68.0100326538086,
|
|
"logps/ref_chosen": -64.83599853515625,
|
|
"logps/ref_rejected": -109.94645690917969,
|
|
"logps/rejected": -122.96417236328125,
|
|
"loss": 0.8585,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.31038472056388855,
|
|
"margin_dpo/beta_margin_grad_std": 0.18928615748882294,
|
|
"margin_dpo/beta_margin_mean": 0.9843679666519165,
|
|
"margin_dpo/beta_margin_std": 1.1074903011322021,
|
|
"margin_dpo/loss_margin_mean": 9.843679428100586,
|
|
"margin_dpo/margin_mean": 9.843679428100586,
|
|
"margin_dpo/margin_std": 10.951974868774414,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.09691629955947137,
|
|
"grad_norm": 52.60911560058594,
|
|
"learning_rate": 4.7101449275362313e-07,
|
|
"logits/chosen": -0.6474887132644653,
|
|
"logits/rejected": -0.6150294542312622,
|
|
"logps/chosen": -54.36174011230469,
|
|
"logps/ref_chosen": -51.44352722167969,
|
|
"logps/ref_rejected": -75.63629150390625,
|
|
"logps/rejected": -87.54934692382812,
|
|
"loss": 0.8859,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.3307109475135803,
|
|
"margin_dpo/beta_margin_grad_std": 0.1775081753730774,
|
|
"margin_dpo/beta_margin_mean": 0.8994826078414917,
|
|
"margin_dpo/beta_margin_std": 1.1073994636535645,
|
|
"margin_dpo/loss_margin_mean": 8.99482536315918,
|
|
"margin_dpo/margin_mean": 8.99482536315918,
|
|
"margin_dpo/margin_std": 10.87942123413086,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.09838472834067548,
|
|
"grad_norm": 52.46964645385742,
|
|
"learning_rate": 4.782608695652174e-07,
|
|
"logits/chosen": -0.5966418981552124,
|
|
"logits/rejected": -0.5537301301956177,
|
|
"logps/chosen": -61.81807327270508,
|
|
"logps/ref_chosen": -59.34080505371094,
|
|
"logps/ref_rejected": -72.78729248046875,
|
|
"logps/rejected": -84.54171752929688,
|
|
"loss": 0.8693,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.32543134689331055,
|
|
"margin_dpo/beta_margin_grad_std": 0.17773009836673737,
|
|
"margin_dpo/beta_margin_mean": 0.9277163147926331,
|
|
"margin_dpo/beta_margin_std": 1.1012858152389526,
|
|
"margin_dpo/loss_margin_mean": 9.2771635055542,
|
|
"margin_dpo/margin_mean": 9.277162551879883,
|
|
"margin_dpo/margin_std": 10.92019271850586,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.09985315712187959,
|
|
"grad_norm": 52.40779113769531,
|
|
"learning_rate": 4.855072463768116e-07,
|
|
"logits/chosen": -0.6349166631698608,
|
|
"logits/rejected": -0.5751150250434875,
|
|
"logps/chosen": -67.98988342285156,
|
|
"logps/ref_chosen": -65.2058334350586,
|
|
"logps/ref_rejected": -77.20724487304688,
|
|
"logps/rejected": -88.71192932128906,
|
|
"loss": 0.8459,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.3253341615200043,
|
|
"margin_dpo/beta_margin_grad_std": 0.15630275011062622,
|
|
"margin_dpo/beta_margin_mean": 0.8720625042915344,
|
|
"margin_dpo/beta_margin_std": 0.9045540690422058,
|
|
"margin_dpo/loss_margin_mean": 8.720624923706055,
|
|
"margin_dpo/margin_mean": 8.720624923706055,
|
|
"margin_dpo/margin_std": 8.963220596313477,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.1013215859030837,
|
|
"grad_norm": 53.23897933959961,
|
|
"learning_rate": 4.927536231884058e-07,
|
|
"logits/chosen": -0.6085792183876038,
|
|
"logits/rejected": -0.5847188234329224,
|
|
"logps/chosen": -62.99334716796875,
|
|
"logps/ref_chosen": -59.81924057006836,
|
|
"logps/ref_rejected": -103.38886260986328,
|
|
"logps/rejected": -116.94822692871094,
|
|
"loss": 0.7777,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.30277135968208313,
|
|
"margin_dpo/beta_margin_grad_std": 0.16077764332294464,
|
|
"margin_dpo/beta_margin_mean": 1.0385247468948364,
|
|
"margin_dpo/beta_margin_std": 1.040853500366211,
|
|
"margin_dpo/loss_margin_mean": 10.385248184204102,
|
|
"margin_dpo/margin_mean": 10.385248184204102,
|
|
"margin_dpo/margin_std": 10.297136306762695,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.1027900146842878,
|
|
"grad_norm": 59.40316390991211,
|
|
"learning_rate": 5e-07,
|
|
"logits/chosen": -0.625554621219635,
|
|
"logits/rejected": -0.5908818244934082,
|
|
"logps/chosen": -66.4103012084961,
|
|
"logps/ref_chosen": -61.930641174316406,
|
|
"logps/ref_rejected": -91.060791015625,
|
|
"logps/rejected": -106.7230453491211,
|
|
"loss": 0.7928,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.30000537633895874,
|
|
"margin_dpo/beta_margin_grad_std": 0.18498124182224274,
|
|
"margin_dpo/beta_margin_mean": 1.118260145187378,
|
|
"margin_dpo/beta_margin_std": 1.199088215827942,
|
|
"margin_dpo/loss_margin_mean": 11.182600975036621,
|
|
"margin_dpo/margin_mean": 11.182600975036621,
|
|
"margin_dpo/margin_std": 11.917827606201172,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.10425844346549193,
|
|
"grad_norm": 49.68572998046875,
|
|
"learning_rate": 4.999967061337492e-07,
|
|
"logits/chosen": -0.6752599477767944,
|
|
"logits/rejected": -0.6361984014511108,
|
|
"logps/chosen": -65.69276428222656,
|
|
"logps/ref_chosen": -61.750343322753906,
|
|
"logps/ref_rejected": -97.33662414550781,
|
|
"logps/rejected": -114.14346313476562,
|
|
"loss": 0.702,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.27304375171661377,
|
|
"margin_dpo/beta_margin_grad_std": 0.16720205545425415,
|
|
"margin_dpo/beta_margin_mean": 1.2864418029785156,
|
|
"margin_dpo/beta_margin_std": 1.286440372467041,
|
|
"margin_dpo/loss_margin_mean": 12.86441707611084,
|
|
"margin_dpo/margin_mean": 12.864418029785156,
|
|
"margin_dpo/margin_std": 12.424565315246582,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.10572687224669604,
|
|
"grad_norm": 59.574241638183594,
|
|
"learning_rate": 4.999868246217933e-07,
|
|
"logits/chosen": -0.6442112922668457,
|
|
"logits/rejected": -0.6080772280693054,
|
|
"logps/chosen": -70.28240966796875,
|
|
"logps/ref_chosen": -66.05341339111328,
|
|
"logps/ref_rejected": -95.2869873046875,
|
|
"logps/rejected": -112.89981079101562,
|
|
"loss": 0.7297,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2696942389011383,
|
|
"margin_dpo/beta_margin_grad_std": 0.19517795741558075,
|
|
"margin_dpo/beta_margin_mean": 1.3383830785751343,
|
|
"margin_dpo/beta_margin_std": 1.3651797771453857,
|
|
"margin_dpo/loss_margin_mean": 13.383831024169922,
|
|
"margin_dpo/margin_mean": 13.383831024169922,
|
|
"margin_dpo/margin_std": 13.636287689208984,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.10719530102790015,
|
|
"grad_norm": 76.11861419677734,
|
|
"learning_rate": 4.999703557245192e-07,
|
|
"logits/chosen": -0.6918191909790039,
|
|
"logits/rejected": -0.6510320901870728,
|
|
"logps/chosen": -72.03385162353516,
|
|
"logps/ref_chosen": -66.25627136230469,
|
|
"logps/ref_rejected": -90.45613861083984,
|
|
"logps/rejected": -109.28495788574219,
|
|
"loss": 0.9513,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.31014135479927063,
|
|
"margin_dpo/beta_margin_grad_std": 0.24917910993099213,
|
|
"margin_dpo/beta_margin_mean": 1.3051246404647827,
|
|
"margin_dpo/beta_margin_std": 1.8701282739639282,
|
|
"margin_dpo/loss_margin_mean": 13.051246643066406,
|
|
"margin_dpo/margin_mean": 13.051246643066406,
|
|
"margin_dpo/margin_std": 18.630680084228516,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.10866372980910426,
|
|
"grad_norm": 71.0533676147461,
|
|
"learning_rate": 4.999472998758977e-07,
|
|
"logits/chosen": -0.6222573518753052,
|
|
"logits/rejected": -0.6104036569595337,
|
|
"logps/chosen": -59.54771423339844,
|
|
"logps/ref_chosen": -53.42488098144531,
|
|
"logps/ref_rejected": -95.94693756103516,
|
|
"logps/rejected": -115.84016418457031,
|
|
"loss": 0.8775,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.28980112075805664,
|
|
"margin_dpo/beta_margin_grad_std": 0.22041486203670502,
|
|
"margin_dpo/beta_margin_mean": 1.3770397901535034,
|
|
"margin_dpo/beta_margin_std": 2.0495500564575195,
|
|
"margin_dpo/loss_margin_mean": 13.770398139953613,
|
|
"margin_dpo/margin_mean": 13.770397186279297,
|
|
"margin_dpo/margin_std": 20.299190521240234,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.11013215859030837,
|
|
"grad_norm": 50.546207427978516,
|
|
"learning_rate": 4.999176576834721e-07,
|
|
"logits/chosen": -0.6528257131576538,
|
|
"logits/rejected": -0.6429094672203064,
|
|
"logps/chosen": -57.421756744384766,
|
|
"logps/ref_chosen": -51.861663818359375,
|
|
"logps/ref_rejected": -111.25397491455078,
|
|
"logps/rejected": -135.87710571289062,
|
|
"loss": 0.6084,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.22819584608078003,
|
|
"margin_dpo/beta_margin_grad_std": 0.2010163813829422,
|
|
"margin_dpo/beta_margin_mean": 1.90630304813385,
|
|
"margin_dpo/beta_margin_std": 1.8703465461730957,
|
|
"margin_dpo/loss_margin_mean": 19.063030242919922,
|
|
"margin_dpo/margin_mean": 19.06302833557129,
|
|
"margin_dpo/margin_std": 18.35777473449707,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.11160058737151249,
|
|
"grad_norm": 63.239871978759766,
|
|
"learning_rate": 4.998814299283415e-07,
|
|
"logits/chosen": -0.7003756165504456,
|
|
"logits/rejected": -0.6578394770622253,
|
|
"logps/chosen": -59.91857147216797,
|
|
"logps/ref_chosen": -53.26604080200195,
|
|
"logps/ref_rejected": -78.21662139892578,
|
|
"logps/rejected": -97.16926574707031,
|
|
"loss": 0.8122,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.28062668442726135,
|
|
"margin_dpo/beta_margin_grad_std": 0.20105010271072388,
|
|
"margin_dpo/beta_margin_mean": 1.2300118207931519,
|
|
"margin_dpo/beta_margin_std": 1.421257495880127,
|
|
"margin_dpo/loss_margin_mean": 12.300118446350098,
|
|
"margin_dpo/margin_mean": 12.300118446350098,
|
|
"margin_dpo/margin_std": 14.157339096069336,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.1130690161527166,
|
|
"grad_norm": 78.45389556884766,
|
|
"learning_rate": 4.998386175651409e-07,
|
|
"logits/chosen": -0.6659625768661499,
|
|
"logits/rejected": -0.6236972212791443,
|
|
"logps/chosen": -63.619422912597656,
|
|
"logps/ref_chosen": -58.0966796875,
|
|
"logps/ref_rejected": -93.77361297607422,
|
|
"logps/rejected": -118.58006286621094,
|
|
"loss": 0.6829,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2217966765165329,
|
|
"margin_dpo/beta_margin_grad_std": 0.22179701924324036,
|
|
"margin_dpo/beta_margin_mean": 1.9283708333969116,
|
|
"margin_dpo/beta_margin_std": 1.9269988536834717,
|
|
"margin_dpo/loss_margin_mean": 19.283708572387695,
|
|
"margin_dpo/margin_mean": 19.283706665039062,
|
|
"margin_dpo/margin_std": 19.11894989013672,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.1145374449339207,
|
|
"grad_norm": 66.56047058105469,
|
|
"learning_rate": 4.997892217220159e-07,
|
|
"logits/chosen": -0.6366250514984131,
|
|
"logits/rejected": -0.6083469986915588,
|
|
"logps/chosen": -60.89007568359375,
|
|
"logps/ref_chosen": -55.61378479003906,
|
|
"logps/ref_rejected": -84.93436431884766,
|
|
"logps/rejected": -104.90266418457031,
|
|
"loss": 0.7296,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2656141221523285,
|
|
"margin_dpo/beta_margin_grad_std": 0.21040529012680054,
|
|
"margin_dpo/beta_margin_mean": 1.4692002534866333,
|
|
"margin_dpo/beta_margin_std": 1.5563766956329346,
|
|
"margin_dpo/loss_margin_mean": 14.692002296447754,
|
|
"margin_dpo/margin_mean": 14.69200325012207,
|
|
"margin_dpo/margin_std": 15.322187423706055,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.11600587371512482,
|
|
"grad_norm": 59.296844482421875,
|
|
"learning_rate": 4.997332437005931e-07,
|
|
"logits/chosen": -0.6760110855102539,
|
|
"logits/rejected": -0.6464430093765259,
|
|
"logps/chosen": -60.498695373535156,
|
|
"logps/ref_chosen": -55.45048522949219,
|
|
"logps/ref_rejected": -87.64756774902344,
|
|
"logps/rejected": -108.78245544433594,
|
|
"loss": 0.7766,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2785935699939728,
|
|
"margin_dpo/beta_margin_grad_std": 0.22844330966472626,
|
|
"margin_dpo/beta_margin_mean": 1.6086679697036743,
|
|
"margin_dpo/beta_margin_std": 1.9045932292938232,
|
|
"margin_dpo/loss_margin_mean": 16.086679458618164,
|
|
"margin_dpo/margin_mean": 16.086679458618164,
|
|
"margin_dpo/margin_std": 18.848827362060547,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.11747430249632893,
|
|
"grad_norm": 63.66164016723633,
|
|
"learning_rate": 4.996706849759452e-07,
|
|
"logits/chosen": -0.7178832292556763,
|
|
"logits/rejected": -0.6710443496704102,
|
|
"logps/chosen": -65.51264190673828,
|
|
"logps/ref_chosen": -58.519290924072266,
|
|
"logps/ref_rejected": -87.54750061035156,
|
|
"logps/rejected": -108.77944946289062,
|
|
"loss": 0.8355,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.293730765581131,
|
|
"margin_dpo/beta_margin_grad_std": 0.22851316630840302,
|
|
"margin_dpo/beta_margin_mean": 1.42385995388031,
|
|
"margin_dpo/beta_margin_std": 1.8568590879440308,
|
|
"margin_dpo/loss_margin_mean": 14.238598823547363,
|
|
"margin_dpo/margin_mean": 14.238598823547363,
|
|
"margin_dpo/margin_std": 17.483436584472656,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.11894273127753303,
|
|
"grad_norm": 72.20431518554688,
|
|
"learning_rate": 4.996015471965529e-07,
|
|
"logits/chosen": -0.7084971070289612,
|
|
"logits/rejected": -0.6748213171958923,
|
|
"logps/chosen": -72.02912902832031,
|
|
"logps/ref_chosen": -66.44886779785156,
|
|
"logps/ref_rejected": -129.66270446777344,
|
|
"logps/rejected": -153.9308624267578,
|
|
"loss": 0.6904,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.24315199255943298,
|
|
"margin_dpo/beta_margin_grad_std": 0.22722284495830536,
|
|
"margin_dpo/beta_margin_mean": 1.8687902688980103,
|
|
"margin_dpo/beta_margin_std": 2.0691728591918945,
|
|
"margin_dpo/loss_margin_mean": 18.687902450561523,
|
|
"margin_dpo/margin_mean": 18.687902450561523,
|
|
"margin_dpo/margin_std": 20.542957305908203,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.12041116005873716,
|
|
"grad_norm": 87.32213592529297,
|
|
"learning_rate": 4.995258321842611e-07,
|
|
"logits/chosen": -0.6286877393722534,
|
|
"logits/rejected": -0.6112765073776245,
|
|
"logps/chosen": -59.366302490234375,
|
|
"logps/ref_chosen": -52.232383728027344,
|
|
"logps/ref_rejected": -90.74325561523438,
|
|
"logps/rejected": -112.9305419921875,
|
|
"loss": 0.9632,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2833250164985657,
|
|
"margin_dpo/beta_margin_grad_std": 0.2522350549697876,
|
|
"margin_dpo/beta_margin_mean": 1.5053365230560303,
|
|
"margin_dpo/beta_margin_std": 2.1997368335723877,
|
|
"margin_dpo/loss_margin_mean": 15.053364753723145,
|
|
"margin_dpo/margin_mean": 15.053365707397461,
|
|
"margin_dpo/margin_std": 21.363815307617188,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.12187958883994127,
|
|
"grad_norm": 67.5205307006836,
|
|
"learning_rate": 4.994435419342304e-07,
|
|
"logits/chosen": -0.6808498501777649,
|
|
"logits/rejected": -0.6353092193603516,
|
|
"logps/chosen": -62.771873474121094,
|
|
"logps/ref_chosen": -55.82738494873047,
|
|
"logps/ref_rejected": -103.71590423583984,
|
|
"logps/rejected": -127.50509643554688,
|
|
"loss": 0.7422,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2590833604335785,
|
|
"margin_dpo/beta_margin_grad_std": 0.22852419316768646,
|
|
"margin_dpo/beta_margin_mean": 1.6844712495803833,
|
|
"margin_dpo/beta_margin_std": 1.8570791482925415,
|
|
"margin_dpo/loss_margin_mean": 16.844711303710938,
|
|
"margin_dpo/margin_mean": 16.844711303710938,
|
|
"margin_dpo/margin_std": 18.56102752685547,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.12334801762114538,
|
|
"grad_norm": 58.463897705078125,
|
|
"learning_rate": 4.993546786148857e-07,
|
|
"logits/chosen": -0.6490943431854248,
|
|
"logits/rejected": -0.6113982200622559,
|
|
"logps/chosen": -72.32835388183594,
|
|
"logps/ref_chosen": -67.1761703491211,
|
|
"logps/ref_rejected": -87.29859924316406,
|
|
"logps/rejected": -107.63688659667969,
|
|
"loss": 0.6737,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.244083434343338,
|
|
"margin_dpo/beta_margin_grad_std": 0.19765815138816833,
|
|
"margin_dpo/beta_margin_mean": 1.5186108350753784,
|
|
"margin_dpo/beta_margin_std": 1.4150245189666748,
|
|
"margin_dpo/loss_margin_mean": 15.186108589172363,
|
|
"margin_dpo/margin_mean": 15.18610954284668,
|
|
"margin_dpo/margin_std": 13.861265182495117,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.12481644640234948,
|
|
"grad_norm": 64.56900787353516,
|
|
"learning_rate": 4.992592445678582e-07,
|
|
"logits/chosen": -0.6331249475479126,
|
|
"logits/rejected": -0.6021745204925537,
|
|
"logps/chosen": -64.1954345703125,
|
|
"logps/ref_chosen": -58.406620025634766,
|
|
"logps/ref_rejected": -78.63880157470703,
|
|
"logps/rejected": -98.99234008789062,
|
|
"loss": 0.7715,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.27991896867752075,
|
|
"margin_dpo/beta_margin_grad_std": 0.2167506366968155,
|
|
"margin_dpo/beta_margin_mean": 1.4564720392227173,
|
|
"margin_dpo/beta_margin_std": 1.6396623849868774,
|
|
"margin_dpo/loss_margin_mean": 14.564720153808594,
|
|
"margin_dpo/margin_mean": 14.56472110748291,
|
|
"margin_dpo/margin_std": 15.904397010803223,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.1262848751835536,
|
|
"grad_norm": 117.56181335449219,
|
|
"learning_rate": 4.991572423079235e-07,
|
|
"logits/chosen": -0.6592001914978027,
|
|
"logits/rejected": -0.6417681574821472,
|
|
"logps/chosen": -63.10496520996094,
|
|
"logps/ref_chosen": -56.13746643066406,
|
|
"logps/ref_rejected": -88.12165069580078,
|
|
"logps/rejected": -110.20996856689453,
|
|
"loss": 0.9186,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2995266318321228,
|
|
"margin_dpo/beta_margin_grad_std": 0.24782723188400269,
|
|
"margin_dpo/beta_margin_mean": 1.5120818614959717,
|
|
"margin_dpo/beta_margin_std": 2.213315725326538,
|
|
"margin_dpo/loss_margin_mean": 15.120819091796875,
|
|
"margin_dpo/margin_mean": 15.120819091796875,
|
|
"margin_dpo/margin_std": 21.751773834228516,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.1277533039647577,
|
|
"grad_norm": 66.58505249023438,
|
|
"learning_rate": 4.990486745229364e-07,
|
|
"logits/chosen": -0.7072494626045227,
|
|
"logits/rejected": -0.670096755027771,
|
|
"logps/chosen": -62.457305908203125,
|
|
"logps/ref_chosen": -55.63609313964844,
|
|
"logps/ref_rejected": -95.46757507324219,
|
|
"logps/rejected": -118.72473907470703,
|
|
"loss": 0.7859,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2566095292568207,
|
|
"margin_dpo/beta_margin_grad_std": 0.23276211321353912,
|
|
"margin_dpo/beta_margin_mean": 1.6435949802398682,
|
|
"margin_dpo/beta_margin_std": 1.9270051717758179,
|
|
"margin_dpo/loss_margin_mean": 16.435949325561523,
|
|
"margin_dpo/margin_mean": 16.435949325561523,
|
|
"margin_dpo/margin_std": 18.915019989013672,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.12922173274596183,
|
|
"grad_norm": 75.93292999267578,
|
|
"learning_rate": 4.989335440737586e-07,
|
|
"logits/chosen": -0.6478073596954346,
|
|
"logits/rejected": -0.6310935020446777,
|
|
"logps/chosen": -82.13240051269531,
|
|
"logps/ref_chosen": -73.67115020751953,
|
|
"logps/ref_rejected": -106.70849609375,
|
|
"logps/rejected": -127.77642822265625,
|
|
"loss": 0.9197,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.3014436364173889,
|
|
"margin_dpo/beta_margin_grad_std": 0.23827531933784485,
|
|
"margin_dpo/beta_margin_mean": 1.260668158531189,
|
|
"margin_dpo/beta_margin_std": 1.7176685333251953,
|
|
"margin_dpo/loss_margin_mean": 12.606681823730469,
|
|
"margin_dpo/margin_mean": 12.606681823730469,
|
|
"margin_dpo/margin_std": 15.93301773071289,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.13069016152716592,
|
|
"grad_norm": 56.17230224609375,
|
|
"learning_rate": 4.988118539941847e-07,
|
|
"logits/chosen": -0.6928755640983582,
|
|
"logits/rejected": -0.6521140336990356,
|
|
"logps/chosen": -65.11277770996094,
|
|
"logps/ref_chosen": -60.624916076660156,
|
|
"logps/ref_rejected": -82.08354949951172,
|
|
"logps/rejected": -99.52984619140625,
|
|
"loss": 0.7412,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.27584946155548096,
|
|
"margin_dpo/beta_margin_grad_std": 0.18205879628658295,
|
|
"margin_dpo/beta_margin_mean": 1.2958422899246216,
|
|
"margin_dpo/beta_margin_std": 1.4058305025100708,
|
|
"margin_dpo/loss_margin_mean": 12.958422660827637,
|
|
"margin_dpo/margin_mean": 12.958423614501953,
|
|
"margin_dpo/margin_std": 13.854536056518555,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.13215859030837004,
|
|
"grad_norm": 66.36186981201172,
|
|
"learning_rate": 4.986836074908615e-07,
|
|
"logits/chosen": -0.6513394713401794,
|
|
"logits/rejected": -0.6424415111541748,
|
|
"logps/chosen": -59.482887268066406,
|
|
"logps/ref_chosen": -53.285308837890625,
|
|
"logps/ref_rejected": -111.54470825195312,
|
|
"logps/rejected": -133.55593872070312,
|
|
"loss": 0.8411,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2849555015563965,
|
|
"margin_dpo/beta_margin_grad_std": 0.22966991364955902,
|
|
"margin_dpo/beta_margin_mean": 1.5813645124435425,
|
|
"margin_dpo/beta_margin_std": 2.088043451309204,
|
|
"margin_dpo/loss_margin_mean": 15.813644409179688,
|
|
"margin_dpo/margin_mean": 15.813644409179688,
|
|
"margin_dpo/margin_std": 20.459163665771484,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.13362701908957417,
|
|
"grad_norm": 65.70479583740234,
|
|
"learning_rate": 4.985488079432037e-07,
|
|
"logits/chosen": -0.695541262626648,
|
|
"logits/rejected": -0.6568491458892822,
|
|
"logps/chosen": -67.02444458007812,
|
|
"logps/ref_chosen": -61.80295944213867,
|
|
"logps/ref_rejected": -87.87395477294922,
|
|
"logps/rejected": -108.97652435302734,
|
|
"loss": 0.762,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.27236229181289673,
|
|
"margin_dpo/beta_margin_grad_std": 0.23041805624961853,
|
|
"margin_dpo/beta_margin_mean": 1.588107705116272,
|
|
"margin_dpo/beta_margin_std": 1.7729498147964478,
|
|
"margin_dpo/loss_margin_mean": 15.88107681274414,
|
|
"margin_dpo/margin_mean": 15.881075859069824,
|
|
"margin_dpo/margin_std": 17.554851531982422,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.13509544787077826,
|
|
"grad_norm": 60.354248046875,
|
|
"learning_rate": 4.984074589033043e-07,
|
|
"logits/chosen": -0.7051235437393188,
|
|
"logits/rejected": -0.6763289570808411,
|
|
"logps/chosen": -56.71138000488281,
|
|
"logps/ref_chosen": -51.640769958496094,
|
|
"logps/ref_rejected": -77.88117980957031,
|
|
"logps/rejected": -97.6747055053711,
|
|
"loss": 0.8107,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2847803235054016,
|
|
"margin_dpo/beta_margin_grad_std": 0.230974480509758,
|
|
"margin_dpo/beta_margin_mean": 1.4722909927368164,
|
|
"margin_dpo/beta_margin_std": 1.7639739513397217,
|
|
"margin_dpo/loss_margin_mean": 14.722909927368164,
|
|
"margin_dpo/margin_mean": 14.722909927368164,
|
|
"margin_dpo/margin_std": 17.423236846923828,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.13656387665198239,
|
|
"grad_norm": 48.63566589355469,
|
|
"learning_rate": 4.982595640958425e-07,
|
|
"logits/chosen": -0.7185451984405518,
|
|
"logits/rejected": -0.6557145714759827,
|
|
"logps/chosen": -57.98681640625,
|
|
"logps/ref_chosen": -52.529239654541016,
|
|
"logps/ref_rejected": -77.1607437133789,
|
|
"logps/rejected": -97.54901123046875,
|
|
"loss": 0.6862,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.25878626108169556,
|
|
"margin_dpo/beta_margin_grad_std": 0.19036650657653809,
|
|
"margin_dpo/beta_margin_mean": 1.4930684566497803,
|
|
"margin_dpo/beta_margin_std": 1.5610140562057495,
|
|
"margin_dpo/loss_margin_mean": 14.930684089660645,
|
|
"margin_dpo/margin_mean": 14.930685043334961,
|
|
"margin_dpo/margin_std": 15.499519348144531,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.13803230543318648,
|
|
"grad_norm": 51.54408264160156,
|
|
"learning_rate": 4.98105127417984e-07,
|
|
"logits/chosen": -0.6754232048988342,
|
|
"logits/rejected": -0.6463443040847778,
|
|
"logps/chosen": -67.19898986816406,
|
|
"logps/ref_chosen": -61.22261047363281,
|
|
"logps/ref_rejected": -99.59902954101562,
|
|
"logps/rejected": -121.30268859863281,
|
|
"loss": 0.6489,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2494257092475891,
|
|
"margin_dpo/beta_margin_grad_std": 0.1913631409406662,
|
|
"margin_dpo/beta_margin_mean": 1.5727283954620361,
|
|
"margin_dpo/beta_margin_std": 1.4842208623886108,
|
|
"margin_dpo/loss_margin_mean": 15.72728443145752,
|
|
"margin_dpo/margin_mean": 15.727283477783203,
|
|
"margin_dpo/margin_std": 14.665702819824219,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.1395007342143906,
|
|
"grad_norm": 49.133419036865234,
|
|
"learning_rate": 4.979441529392784e-07,
|
|
"logits/chosen": -0.7007203102111816,
|
|
"logits/rejected": -0.6629537343978882,
|
|
"logps/chosen": -57.13197326660156,
|
|
"logps/ref_chosen": -52.52364730834961,
|
|
"logps/ref_rejected": -75.88035583496094,
|
|
"logps/rejected": -93.41667175292969,
|
|
"loss": 0.7168,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.26963499188423157,
|
|
"margin_dpo/beta_margin_grad_std": 0.18063600361347198,
|
|
"margin_dpo/beta_margin_mean": 1.2927991151809692,
|
|
"margin_dpo/beta_margin_std": 1.250800371170044,
|
|
"margin_dpo/loss_margin_mean": 12.927990913391113,
|
|
"margin_dpo/margin_mean": 12.927990913391113,
|
|
"margin_dpo/margin_std": 12.453845977783203,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.14096916299559473,
|
|
"grad_norm": 50.28048324584961,
|
|
"learning_rate": 4.977766449015534e-07,
|
|
"logits/chosen": -0.6796199083328247,
|
|
"logits/rejected": -0.6378945708274841,
|
|
"logps/chosen": -65.92119598388672,
|
|
"logps/ref_chosen": -62.15697479248047,
|
|
"logps/ref_rejected": -96.59601593017578,
|
|
"logps/rejected": -117.46200561523438,
|
|
"loss": 0.6242,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.23479405045509338,
|
|
"margin_dpo/beta_margin_grad_std": 0.18774589896202087,
|
|
"margin_dpo/beta_margin_mean": 1.7101774215698242,
|
|
"margin_dpo/beta_margin_std": 1.7064077854156494,
|
|
"margin_dpo/loss_margin_mean": 17.101774215698242,
|
|
"margin_dpo/margin_mean": 17.10177230834961,
|
|
"margin_dpo/margin_std": 16.97222328186035,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.14243759177679882,
|
|
"grad_norm": 53.316551208496094,
|
|
"learning_rate": 4.976026077188012e-07,
|
|
"logits/chosen": -0.6628963947296143,
|
|
"logits/rejected": -0.6078641414642334,
|
|
"logps/chosen": -59.297088623046875,
|
|
"logps/ref_chosen": -54.64636993408203,
|
|
"logps/ref_rejected": -76.96475219726562,
|
|
"logps/rejected": -95.37380981445312,
|
|
"loss": 0.6845,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.26004087924957275,
|
|
"margin_dpo/beta_margin_grad_std": 0.18338225781917572,
|
|
"margin_dpo/beta_margin_mean": 1.375833511352539,
|
|
"margin_dpo/beta_margin_std": 1.2895034551620483,
|
|
"margin_dpo/loss_margin_mean": 13.758334159851074,
|
|
"margin_dpo/margin_mean": 13.75833511352539,
|
|
"margin_dpo/margin_std": 12.287176132202148,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.14390602055800295,
|
|
"grad_norm": 59.0107536315918,
|
|
"learning_rate": 4.974220459770639e-07,
|
|
"logits/chosen": -0.6648178100585938,
|
|
"logits/rejected": -0.6405047178268433,
|
|
"logps/chosen": -71.05479431152344,
|
|
"logps/ref_chosen": -65.25862884521484,
|
|
"logps/ref_rejected": -96.5274887084961,
|
|
"logps/rejected": -117.12971496582031,
|
|
"loss": 0.748,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.25851550698280334,
|
|
"margin_dpo/beta_margin_grad_std": 0.21575090289115906,
|
|
"margin_dpo/beta_margin_mean": 1.480607032775879,
|
|
"margin_dpo/beta_margin_std": 1.5301272869110107,
|
|
"margin_dpo/loss_margin_mean": 14.806070327758789,
|
|
"margin_dpo/margin_mean": 14.806069374084473,
|
|
"margin_dpo/margin_std": 15.232458114624023,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.14537444933920704,
|
|
"grad_norm": 48.36380386352539,
|
|
"learning_rate": 4.972349644343108e-07,
|
|
"logits/chosen": -0.6831210851669312,
|
|
"logits/rejected": -0.6707972884178162,
|
|
"logps/chosen": -50.50402069091797,
|
|
"logps/ref_chosen": -45.63848114013672,
|
|
"logps/ref_rejected": -86.43792724609375,
|
|
"logps/rejected": -107.33246612548828,
|
|
"loss": 0.6371,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.24989381432533264,
|
|
"margin_dpo/beta_margin_grad_std": 0.17566484212875366,
|
|
"margin_dpo/beta_margin_mean": 1.6028999090194702,
|
|
"margin_dpo/beta_margin_std": 1.6380233764648438,
|
|
"margin_dpo/loss_margin_mean": 16.02899932861328,
|
|
"margin_dpo/margin_mean": 16.02899932861328,
|
|
"margin_dpo/margin_std": 16.368377685546875,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.14684287812041116,
|
|
"grad_norm": 66.3724365234375,
|
|
"learning_rate": 4.970413680203148e-07,
|
|
"logits/chosen": -0.6820343732833862,
|
|
"logits/rejected": -0.6383761167526245,
|
|
"logps/chosen": -62.664703369140625,
|
|
"logps/ref_chosen": -57.5939826965332,
|
|
"logps/ref_rejected": -74.06021118164062,
|
|
"logps/rejected": -90.71258544921875,
|
|
"loss": 0.9045,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.30843135714530945,
|
|
"margin_dpo/beta_margin_grad_std": 0.22273820638656616,
|
|
"margin_dpo/beta_margin_mean": 1.158165693283081,
|
|
"margin_dpo/beta_margin_std": 1.5505050420761108,
|
|
"margin_dpo/loss_margin_mean": 11.581656455993652,
|
|
"margin_dpo/margin_mean": 11.581655502319336,
|
|
"margin_dpo/margin_std": 15.148920059204102,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.14684287812041116,
|
|
"eval_logits/chosen": -0.64442378282547,
|
|
"eval_logits/rejected": -0.6169079542160034,
|
|
"eval_logps/chosen": -87.21427917480469,
|
|
"eval_logps/ref_chosen": -79.05104064941406,
|
|
"eval_logps/ref_rejected": -86.79793548583984,
|
|
"eval_logps/rejected": -103.4049301147461,
|
|
"eval_loss": 0.5612262487411499,
|
|
"eval_margin_dpo/beta": 0.10000000149011612,
|
|
"eval_margin_dpo/beta_margin_grad_mean": -0.367234468460083,
|
|
"eval_margin_dpo/beta_margin_grad_std": 0.23124347627162933,
|
|
"eval_margin_dpo/beta_margin_mean": 0.8443758487701416,
|
|
"eval_margin_dpo/beta_margin_std": 1.5440738201141357,
|
|
"eval_margin_dpo/loss_margin_mean": 8.443757057189941,
|
|
"eval_margin_dpo/margin_mean": 8.443758010864258,
|
|
"eval_margin_dpo/margin_std": 15.440738677978516,
|
|
"eval_runtime": 40.1706,
|
|
"eval_samples_per_second": 58.227,
|
|
"eval_steps_per_second": 1.842,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.14831130690161526,
|
|
"grad_norm": 59.758052825927734,
|
|
"learning_rate": 4.968412618365215e-07,
|
|
"logits/chosen": -0.6818881630897522,
|
|
"logits/rejected": -0.6413546800613403,
|
|
"logps/chosen": -67.38121032714844,
|
|
"logps/ref_chosen": -61.64884948730469,
|
|
"logps/ref_rejected": -83.18968963623047,
|
|
"logps/rejected": -102.2203598022461,
|
|
"loss": 0.7943,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.29040390253067017,
|
|
"margin_dpo/beta_margin_grad_std": 0.208379328250885,
|
|
"margin_dpo/beta_margin_mean": 1.3298306465148926,
|
|
"margin_dpo/beta_margin_std": 1.6011595726013184,
|
|
"margin_dpo/loss_margin_mean": 13.298306465148926,
|
|
"margin_dpo/margin_mean": 13.298306465148926,
|
|
"margin_dpo/margin_std": 15.829672813415527,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.14977973568281938,
|
|
"grad_norm": 70.77701568603516,
|
|
"learning_rate": 4.966346511559149e-07,
|
|
"logits/chosen": -0.6993674039840698,
|
|
"logits/rejected": -0.6509321928024292,
|
|
"logps/chosen": -71.01653289794922,
|
|
"logps/ref_chosen": -64.0788803100586,
|
|
"logps/ref_rejected": -68.18707275390625,
|
|
"logps/rejected": -85.41771697998047,
|
|
"loss": 0.9392,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.32659777998924255,
|
|
"margin_dpo/beta_margin_grad_std": 0.22610485553741455,
|
|
"margin_dpo/beta_margin_mean": 1.0292984247207642,
|
|
"margin_dpo/beta_margin_std": 1.4347975254058838,
|
|
"margin_dpo/loss_margin_mean": 10.292984008789062,
|
|
"margin_dpo/margin_mean": 10.292984008789062,
|
|
"margin_dpo/margin_std": 14.034963607788086,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.1512481644640235,
|
|
"grad_norm": 45.32851028442383,
|
|
"learning_rate": 4.964215414228785e-07,
|
|
"logits/chosen": -0.6965250968933105,
|
|
"logits/rejected": -0.6588256359100342,
|
|
"logps/chosen": -64.84249877929688,
|
|
"logps/ref_chosen": -61.299278259277344,
|
|
"logps/ref_rejected": -93.57271575927734,
|
|
"logps/rejected": -115.04621887207031,
|
|
"loss": 0.5497,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.21648754179477692,
|
|
"margin_dpo/beta_margin_grad_std": 0.17404885590076447,
|
|
"margin_dpo/beta_margin_mean": 1.7930291891098022,
|
|
"margin_dpo/beta_margin_std": 1.5200693607330322,
|
|
"margin_dpo/loss_margin_mean": 17.9302921295166,
|
|
"margin_dpo/margin_mean": 17.93029022216797,
|
|
"margin_dpo/margin_std": 15.179329872131348,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.1527165932452276,
|
|
"grad_norm": 53.074501037597656,
|
|
"learning_rate": 4.96201938253052e-07,
|
|
"logits/chosen": -0.711995005607605,
|
|
"logits/rejected": -0.6668508052825928,
|
|
"logps/chosen": -59.31719207763672,
|
|
"logps/ref_chosen": -54.37277603149414,
|
|
"logps/ref_rejected": -89.5647201538086,
|
|
"logps/rejected": -111.41394805908203,
|
|
"loss": 0.6775,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.25234103202819824,
|
|
"margin_dpo/beta_margin_grad_std": 0.20327746868133545,
|
|
"margin_dpo/beta_margin_mean": 1.6904809474945068,
|
|
"margin_dpo/beta_margin_std": 1.7527903318405151,
|
|
"margin_dpo/loss_margin_mean": 16.904809951782227,
|
|
"margin_dpo/margin_mean": 16.904808044433594,
|
|
"margin_dpo/margin_std": 17.310930252075195,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.15418502202643172,
|
|
"grad_norm": 39.851192474365234,
|
|
"learning_rate": 4.959758474331832e-07,
|
|
"logits/chosen": -0.7239284515380859,
|
|
"logits/rejected": -0.6849699020385742,
|
|
"logps/chosen": -58.359535217285156,
|
|
"logps/ref_chosen": -54.638946533203125,
|
|
"logps/ref_rejected": -97.97351837158203,
|
|
"logps/rejected": -124.3260498046875,
|
|
"loss": 0.4216,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16937798261642456,
|
|
"margin_dpo/beta_margin_grad_std": 0.16204456984996796,
|
|
"margin_dpo/beta_margin_mean": 2.2631936073303223,
|
|
"margin_dpo/beta_margin_std": 1.6875535249710083,
|
|
"margin_dpo/loss_margin_mean": 22.63193702697754,
|
|
"margin_dpo/margin_mean": 22.63193702697754,
|
|
"margin_dpo/margin_std": 16.76972007751465,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.15565345080763582,
|
|
"grad_norm": 47.982364654541016,
|
|
"learning_rate": 4.957432749209755e-07,
|
|
"logits/chosen": -0.6622617840766907,
|
|
"logits/rejected": -0.6061959266662598,
|
|
"logps/chosen": -59.73749923706055,
|
|
"logps/ref_chosen": -54.83289337158203,
|
|
"logps/ref_rejected": -85.22461700439453,
|
|
"logps/rejected": -104.90985107421875,
|
|
"loss": 0.6834,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2569577693939209,
|
|
"margin_dpo/beta_margin_grad_std": 0.19905740022659302,
|
|
"margin_dpo/beta_margin_mean": 1.4780631065368652,
|
|
"margin_dpo/beta_margin_std": 1.442070722579956,
|
|
"margin_dpo/loss_margin_mean": 14.780631065368652,
|
|
"margin_dpo/margin_mean": 14.780631065368652,
|
|
"margin_dpo/margin_std": 14.37628173828125,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.15712187958883994,
|
|
"grad_norm": 48.62154006958008,
|
|
"learning_rate": 4.955042268449307e-07,
|
|
"logits/chosen": -0.7029379606246948,
|
|
"logits/rejected": -0.6468302011489868,
|
|
"logps/chosen": -75.64985656738281,
|
|
"logps/ref_chosen": -69.70780944824219,
|
|
"logps/ref_rejected": -94.73950958251953,
|
|
"logps/rejected": -117.44454956054688,
|
|
"loss": 0.6434,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.23800994455814362,
|
|
"margin_dpo/beta_margin_grad_std": 0.20428350567817688,
|
|
"margin_dpo/beta_margin_mean": 1.6762993335723877,
|
|
"margin_dpo/beta_margin_std": 1.5433250665664673,
|
|
"margin_dpo/loss_margin_mean": 16.76299285888672,
|
|
"margin_dpo/margin_mean": 16.76299285888672,
|
|
"margin_dpo/margin_std": 15.39747428894043,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.15859030837004406,
|
|
"grad_norm": 59.20159912109375,
|
|
"learning_rate": 4.952587095041881e-07,
|
|
"logits/chosen": -0.7281018495559692,
|
|
"logits/rejected": -0.6796263456344604,
|
|
"logps/chosen": -61.804962158203125,
|
|
"logps/ref_chosen": -56.0098876953125,
|
|
"logps/ref_rejected": -95.79601287841797,
|
|
"logps/rejected": -118.76254272460938,
|
|
"loss": 0.7472,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.26712775230407715,
|
|
"margin_dpo/beta_margin_grad_std": 0.22972996532917023,
|
|
"margin_dpo/beta_margin_mean": 1.7171452045440674,
|
|
"margin_dpo/beta_margin_std": 1.924187183380127,
|
|
"margin_dpo/loss_margin_mean": 17.171451568603516,
|
|
"margin_dpo/margin_mean": 17.171451568603516,
|
|
"margin_dpo/margin_std": 19.11905288696289,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.16005873715124816,
|
|
"grad_norm": 49.94407272338867,
|
|
"learning_rate": 4.95006729368358e-07,
|
|
"logits/chosen": -0.6409512758255005,
|
|
"logits/rejected": -0.6109081506729126,
|
|
"logps/chosen": -68.07378387451172,
|
|
"logps/ref_chosen": -62.88549041748047,
|
|
"logps/ref_rejected": -98.68573760986328,
|
|
"logps/rejected": -122.75498962402344,
|
|
"loss": 0.5514,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.21403388679027557,
|
|
"margin_dpo/beta_margin_grad_std": 0.18927177786827087,
|
|
"margin_dpo/beta_margin_mean": 1.8880969285964966,
|
|
"margin_dpo/beta_margin_std": 1.5617107152938843,
|
|
"margin_dpo/loss_margin_mean": 18.880970001220703,
|
|
"margin_dpo/margin_mean": 18.880970001220703,
|
|
"margin_dpo/margin_std": 15.536466598510742,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.16152716593245228,
|
|
"grad_norm": 51.64780807495117,
|
|
"learning_rate": 4.947482930773511e-07,
|
|
"logits/chosen": -0.6488534212112427,
|
|
"logits/rejected": -0.593596339225769,
|
|
"logps/chosen": -63.12610626220703,
|
|
"logps/ref_chosen": -58.753684997558594,
|
|
"logps/ref_rejected": -79.75001525878906,
|
|
"logps/rejected": -101.90496826171875,
|
|
"loss": 0.6812,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.23650266230106354,
|
|
"margin_dpo/beta_margin_grad_std": 0.2127843052148819,
|
|
"margin_dpo/beta_margin_mean": 1.7782528400421143,
|
|
"margin_dpo/beta_margin_std": 1.8577708005905151,
|
|
"margin_dpo/loss_margin_mean": 17.782527923583984,
|
|
"margin_dpo/margin_mean": 17.782527923583984,
|
|
"margin_dpo/margin_std": 18.085121154785156,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.16299559471365638,
|
|
"grad_norm": 53.70595169067383,
|
|
"learning_rate": 4.944834074412042e-07,
|
|
"logits/chosen": -0.7132564187049866,
|
|
"logits/rejected": -0.6853828430175781,
|
|
"logps/chosen": -75.00138092041016,
|
|
"logps/ref_chosen": -68.62410736083984,
|
|
"logps/ref_rejected": -98.42886352539062,
|
|
"logps/rejected": -123.1148452758789,
|
|
"loss": 0.6606,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.23275849223136902,
|
|
"margin_dpo/beta_margin_grad_std": 0.2126443088054657,
|
|
"margin_dpo/beta_margin_mean": 1.8308711051940918,
|
|
"margin_dpo/beta_margin_std": 1.8513504266738892,
|
|
"margin_dpo/loss_margin_mean": 18.3087100982666,
|
|
"margin_dpo/margin_mean": 18.3087100982666,
|
|
"margin_dpo/margin_std": 18.054677963256836,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.1644640234948605,
|
|
"grad_norm": 61.954097747802734,
|
|
"learning_rate": 4.942120794399002e-07,
|
|
"logits/chosen": -0.6944275498390198,
|
|
"logits/rejected": -0.6399896740913391,
|
|
"logps/chosen": -56.5648193359375,
|
|
"logps/ref_chosen": -50.24964141845703,
|
|
"logps/ref_rejected": -64.77442932128906,
|
|
"logps/rejected": -84.34908294677734,
|
|
"loss": 0.8098,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.28910669684410095,
|
|
"margin_dpo/beta_margin_grad_std": 0.2169356793165207,
|
|
"margin_dpo/beta_margin_mean": 1.3259477615356445,
|
|
"margin_dpo/beta_margin_std": 1.5364657640457153,
|
|
"margin_dpo/loss_margin_mean": 13.259476661682129,
|
|
"margin_dpo/margin_mean": 13.259477615356445,
|
|
"margin_dpo/margin_std": 15.114201545715332,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.16593245227606462,
|
|
"grad_norm": 51.86798858642578,
|
|
"learning_rate": 4.939343162231841e-07,
|
|
"logits/chosen": -0.6678988933563232,
|
|
"logits/rejected": -0.6150726079940796,
|
|
"logps/chosen": -72.72209167480469,
|
|
"logps/ref_chosen": -66.71295166015625,
|
|
"logps/ref_rejected": -77.96870422363281,
|
|
"logps/rejected": -98.91563415527344,
|
|
"loss": 0.6636,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.25216349959373474,
|
|
"margin_dpo/beta_margin_grad_std": 0.19559957087039948,
|
|
"margin_dpo/beta_margin_mean": 1.4937783479690552,
|
|
"margin_dpo/beta_margin_std": 1.3543710708618164,
|
|
"margin_dpo/loss_margin_mean": 14.937784194946289,
|
|
"margin_dpo/margin_mean": 14.937784194946289,
|
|
"margin_dpo/margin_std": 13.522451400756836,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.16740088105726872,
|
|
"grad_norm": 48.08191680908203,
|
|
"learning_rate": 4.936501251103751e-07,
|
|
"logits/chosen": -0.6919864416122437,
|
|
"logits/rejected": -0.644797682762146,
|
|
"logps/chosen": -63.47439193725586,
|
|
"logps/ref_chosen": -57.78507995605469,
|
|
"logps/ref_rejected": -87.10966491699219,
|
|
"logps/rejected": -113.00218200683594,
|
|
"loss": 0.5974,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.22666409611701965,
|
|
"margin_dpo/beta_margin_grad_std": 0.20277349650859833,
|
|
"margin_dpo/beta_margin_mean": 2.020320415496826,
|
|
"margin_dpo/beta_margin_std": 2.0695791244506836,
|
|
"margin_dpo/loss_margin_mean": 20.203205108642578,
|
|
"margin_dpo/margin_mean": 20.203205108642578,
|
|
"margin_dpo/margin_std": 20.269628524780273,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.16886930983847284,
|
|
"grad_norm": 74.83251190185547,
|
|
"learning_rate": 4.933595135901732e-07,
|
|
"logits/chosen": -0.7010315656661987,
|
|
"logits/rejected": -0.6554032564163208,
|
|
"logps/chosen": -73.77932739257812,
|
|
"logps/ref_chosen": -65.5826416015625,
|
|
"logps/ref_rejected": -98.56552124023438,
|
|
"logps/rejected": -122.03272247314453,
|
|
"loss": 0.7993,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.27328062057495117,
|
|
"margin_dpo/beta_margin_grad_std": 0.22063322365283966,
|
|
"margin_dpo/beta_margin_mean": 1.5270512104034424,
|
|
"margin_dpo/beta_margin_std": 1.8626760244369507,
|
|
"margin_dpo/loss_margin_mean": 15.270512580871582,
|
|
"margin_dpo/margin_mean": 15.270513534545898,
|
|
"margin_dpo/margin_std": 18.557777404785156,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.17033773861967694,
|
|
"grad_norm": 45.14370346069336,
|
|
"learning_rate": 4.930624893204624e-07,
|
|
"logits/chosen": -0.7024333477020264,
|
|
"logits/rejected": -0.6669833660125732,
|
|
"logps/chosen": -57.33141326904297,
|
|
"logps/ref_chosen": -51.40031051635742,
|
|
"logps/ref_rejected": -80.5218505859375,
|
|
"logps/rejected": -101.55380249023438,
|
|
"loss": 0.6234,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.24537521600723267,
|
|
"margin_dpo/beta_margin_grad_std": 0.17158278822898865,
|
|
"margin_dpo/beta_margin_mean": 1.5100841522216797,
|
|
"margin_dpo/beta_margin_std": 1.3967205286026,
|
|
"margin_dpo/loss_margin_mean": 15.100841522216797,
|
|
"margin_dpo/margin_mean": 15.100841522216797,
|
|
"margin_dpo/margin_std": 13.869951248168945,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.17180616740088106,
|
|
"grad_norm": 61.82289505004883,
|
|
"learning_rate": 4.927590601281083e-07,
|
|
"logits/chosen": -0.6753987669944763,
|
|
"logits/rejected": -0.6353236436843872,
|
|
"logps/chosen": -75.63998413085938,
|
|
"logps/ref_chosen": -69.29840850830078,
|
|
"logps/ref_rejected": -66.58399200439453,
|
|
"logps/rejected": -87.92289733886719,
|
|
"loss": 0.696,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2614162564277649,
|
|
"margin_dpo/beta_margin_grad_std": 0.19564659893512726,
|
|
"margin_dpo/beta_margin_mean": 1.4997328519821167,
|
|
"margin_dpo/beta_margin_std": 1.5858564376831055,
|
|
"margin_dpo/loss_margin_mean": 14.997328758239746,
|
|
"margin_dpo/margin_mean": 14.997328758239746,
|
|
"margin_dpo/margin_std": 15.84735107421875,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.17327459618208516,
|
|
"grad_norm": 59.757423400878906,
|
|
"learning_rate": 4.924492340087524e-07,
|
|
"logits/chosen": -0.69645094871521,
|
|
"logits/rejected": -0.653948962688446,
|
|
"logps/chosen": -62.284420013427734,
|
|
"logps/ref_chosen": -55.6409797668457,
|
|
"logps/ref_rejected": -75.66905212402344,
|
|
"logps/rejected": -96.29232788085938,
|
|
"loss": 0.667,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.25842976570129395,
|
|
"margin_dpo/beta_margin_grad_std": 0.1788530945777893,
|
|
"margin_dpo/beta_margin_mean": 1.397983431816101,
|
|
"margin_dpo/beta_margin_std": 1.2897310256958008,
|
|
"margin_dpo/loss_margin_mean": 13.979833602905273,
|
|
"margin_dpo/margin_mean": 13.979833602905273,
|
|
"margin_dpo/margin_std": 12.897065162658691,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.17474302496328928,
|
|
"grad_norm": 58.3023796081543,
|
|
"learning_rate": 4.92133019126601e-07,
|
|
"logits/chosen": -0.6907534003257751,
|
|
"logits/rejected": -0.666528582572937,
|
|
"logps/chosen": -80.68411254882812,
|
|
"logps/ref_chosen": -73.51019287109375,
|
|
"logps/ref_rejected": -102.97728729248047,
|
|
"logps/rejected": -124.96991729736328,
|
|
"loss": 0.7427,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2673878073692322,
|
|
"margin_dpo/beta_margin_grad_std": 0.21648362278938293,
|
|
"margin_dpo/beta_margin_mean": 1.4818708896636963,
|
|
"margin_dpo/beta_margin_std": 1.625535249710083,
|
|
"margin_dpo/loss_margin_mean": 14.818708419799805,
|
|
"margin_dpo/margin_mean": 14.818709373474121,
|
|
"margin_dpo/margin_std": 16.06285858154297,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.1762114537444934,
|
|
"grad_norm": 72.29347229003906,
|
|
"learning_rate": 4.918104238142103e-07,
|
|
"logits/chosen": -0.6836975812911987,
|
|
"logits/rejected": -0.6393747329711914,
|
|
"logps/chosen": -84.85501098632812,
|
|
"logps/ref_chosen": -76.78083801269531,
|
|
"logps/ref_rejected": -108.02374267578125,
|
|
"logps/rejected": -134.7047119140625,
|
|
"loss": 0.6003,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.22438913583755493,
|
|
"margin_dpo/beta_margin_grad_std": 0.2033691704273224,
|
|
"margin_dpo/beta_margin_mean": 1.860680341720581,
|
|
"margin_dpo/beta_margin_std": 1.7107524871826172,
|
|
"margin_dpo/loss_margin_mean": 18.606801986694336,
|
|
"margin_dpo/margin_mean": 18.60680389404297,
|
|
"margin_dpo/margin_std": 17.087509155273438,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.1776798825256975,
|
|
"grad_norm": 50.97231674194336,
|
|
"learning_rate": 4.91481456572267e-07,
|
|
"logits/chosen": -0.6479914784431458,
|
|
"logits/rejected": -0.6296772360801697,
|
|
"logps/chosen": -69.47196960449219,
|
|
"logps/ref_chosen": -61.789894104003906,
|
|
"logps/ref_rejected": -109.99456787109375,
|
|
"logps/rejected": -137.31689453125,
|
|
"loss": 0.6,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.22889560461044312,
|
|
"margin_dpo/beta_margin_grad_std": 0.20096932351589203,
|
|
"margin_dpo/beta_margin_mean": 1.9640241861343384,
|
|
"margin_dpo/beta_margin_std": 1.8757091760635376,
|
|
"margin_dpo/loss_margin_mean": 19.640241622924805,
|
|
"margin_dpo/margin_mean": 19.640243530273438,
|
|
"margin_dpo/margin_std": 18.727981567382812,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.17914831130690162,
|
|
"grad_norm": 44.44526672363281,
|
|
"learning_rate": 4.911461260693638e-07,
|
|
"logits/chosen": -0.6837228536605835,
|
|
"logits/rejected": -0.674382209777832,
|
|
"logps/chosen": -53.734107971191406,
|
|
"logps/ref_chosen": -46.90221405029297,
|
|
"logps/ref_rejected": -106.71418762207031,
|
|
"logps/rejected": -138.098388671875,
|
|
"loss": 0.4292,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1675311177968979,
|
|
"margin_dpo/beta_margin_grad_std": 0.18187315762043,
|
|
"margin_dpo/beta_margin_mean": 2.455230712890625,
|
|
"margin_dpo/beta_margin_std": 1.817508339881897,
|
|
"margin_dpo/loss_margin_mean": 24.55230712890625,
|
|
"margin_dpo/margin_mean": 24.55230712890625,
|
|
"margin_dpo/margin_std": 18.124570846557617,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.18061674008810572,
|
|
"grad_norm": 65.7748031616211,
|
|
"learning_rate": 4.908044411417711e-07,
|
|
"logits/chosen": -0.6735109090805054,
|
|
"logits/rejected": -0.642119288444519,
|
|
"logps/chosen": -68.216064453125,
|
|
"logps/ref_chosen": -61.33863830566406,
|
|
"logps/ref_rejected": -87.77539825439453,
|
|
"logps/rejected": -111.65895080566406,
|
|
"loss": 0.7835,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.25650110840797424,
|
|
"margin_dpo/beta_margin_grad_std": 0.23678642511367798,
|
|
"margin_dpo/beta_margin_mean": 1.700613021850586,
|
|
"margin_dpo/beta_margin_std": 1.9761358499526978,
|
|
"margin_dpo/loss_margin_mean": 17.00613021850586,
|
|
"margin_dpo/margin_mean": 17.00613021850586,
|
|
"margin_dpo/margin_std": 19.711463928222656,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.18208516886930984,
|
|
"grad_norm": 62.76095199584961,
|
|
"learning_rate": 4.904564107932048e-07,
|
|
"logits/chosen": -0.6483690142631531,
|
|
"logits/rejected": -0.6380556225776672,
|
|
"logps/chosen": -78.72987365722656,
|
|
"logps/ref_chosen": -71.44833374023438,
|
|
"logps/ref_rejected": -117.58056640625,
|
|
"logps/rejected": -146.22970581054688,
|
|
"loss": 0.6458,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.23011672496795654,
|
|
"margin_dpo/beta_margin_grad_std": 0.2240174412727356,
|
|
"margin_dpo/beta_margin_mean": 2.1367599964141846,
|
|
"margin_dpo/beta_margin_std": 2.3805363178253174,
|
|
"margin_dpo/loss_margin_mean": 21.367599487304688,
|
|
"margin_dpo/margin_mean": 21.367597579956055,
|
|
"margin_dpo/margin_std": 23.656444549560547,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.18355359765051396,
|
|
"grad_norm": 44.872169494628906,
|
|
"learning_rate": 4.90102044194588e-07,
|
|
"logits/chosen": -0.6881054639816284,
|
|
"logits/rejected": -0.6638908386230469,
|
|
"logps/chosen": -55.62939453125,
|
|
"logps/ref_chosen": -50.136940002441406,
|
|
"logps/ref_rejected": -83.98861694335938,
|
|
"logps/rejected": -109.26545715332031,
|
|
"loss": 0.5385,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.21027234196662903,
|
|
"margin_dpo/beta_margin_grad_std": 0.18001829087734222,
|
|
"margin_dpo/beta_margin_mean": 1.9784388542175293,
|
|
"margin_dpo/beta_margin_std": 1.7413667440414429,
|
|
"margin_dpo/loss_margin_mean": 19.784387588500977,
|
|
"margin_dpo/margin_mean": 19.78438949584961,
|
|
"margin_dpo/margin_std": 17.293458938598633,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.18502202643171806,
|
|
"grad_norm": 55.02238845825195,
|
|
"learning_rate": 4.897413506838102e-07,
|
|
"logits/chosen": -0.6792329549789429,
|
|
"logits/rejected": -0.6454166769981384,
|
|
"logps/chosen": -61.897850036621094,
|
|
"logps/ref_chosen": -55.66706848144531,
|
|
"logps/ref_rejected": -98.1297607421875,
|
|
"logps/rejected": -123.43519592285156,
|
|
"loss": 0.5557,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.21414323151111603,
|
|
"margin_dpo/beta_margin_grad_std": 0.18485862016677856,
|
|
"margin_dpo/beta_margin_mean": 1.9074655771255493,
|
|
"margin_dpo/beta_margin_std": 1.7027356624603271,
|
|
"margin_dpo/loss_margin_mean": 19.074655532836914,
|
|
"margin_dpo/margin_mean": 19.07465362548828,
|
|
"margin_dpo/margin_std": 17.013324737548828,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.18649045521292218,
|
|
"grad_norm": 46.633995056152344,
|
|
"learning_rate": 4.89374339765481e-07,
|
|
"logits/chosen": -0.6350376605987549,
|
|
"logits/rejected": -0.6004325151443481,
|
|
"logps/chosen": -61.812808990478516,
|
|
"logps/ref_chosen": -56.55467987060547,
|
|
"logps/ref_rejected": -76.7957763671875,
|
|
"logps/rejected": -98.11077880859375,
|
|
"loss": 0.6291,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.23935288190841675,
|
|
"margin_dpo/beta_margin_grad_std": 0.19522100687026978,
|
|
"margin_dpo/beta_margin_mean": 1.605687141418457,
|
|
"margin_dpo/beta_margin_std": 1.4145348072052002,
|
|
"margin_dpo/loss_margin_mean": 16.05687141418457,
|
|
"margin_dpo/margin_mean": 16.05687141418457,
|
|
"margin_dpo/margin_std": 13.91942024230957,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.18795888399412627,
|
|
"grad_norm": 51.87793731689453,
|
|
"learning_rate": 4.890010211106795e-07,
|
|
"logits/chosen": -0.6689374446868896,
|
|
"logits/rejected": -0.6231319904327393,
|
|
"logps/chosen": -63.837318420410156,
|
|
"logps/ref_chosen": -58.12095642089844,
|
|
"logps/ref_rejected": -76.43896484375,
|
|
"logps/rejected": -99.09458923339844,
|
|
"loss": 0.68,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2527642250061035,
|
|
"margin_dpo/beta_margin_grad_std": 0.20639841258525848,
|
|
"margin_dpo/beta_margin_mean": 1.6939269304275513,
|
|
"margin_dpo/beta_margin_std": 1.7684822082519531,
|
|
"margin_dpo/loss_margin_mean": 16.939268112182617,
|
|
"margin_dpo/margin_mean": 16.939268112182617,
|
|
"margin_dpo/margin_std": 17.603744506835938,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.1894273127753304,
|
|
"grad_norm": 73.03254699707031,
|
|
"learning_rate": 4.88621404556699e-07,
|
|
"logits/chosen": -0.6583871841430664,
|
|
"logits/rejected": -0.626196026802063,
|
|
"logps/chosen": -75.50413513183594,
|
|
"logps/ref_chosen": -66.91636657714844,
|
|
"logps/ref_rejected": -96.6422119140625,
|
|
"logps/rejected": -122.34420776367188,
|
|
"loss": 0.7997,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2635345458984375,
|
|
"margin_dpo/beta_margin_grad_std": 0.24272885918617249,
|
|
"margin_dpo/beta_margin_mean": 1.7114232778549194,
|
|
"margin_dpo/beta_margin_std": 2.0050904750823975,
|
|
"margin_dpo/loss_margin_mean": 17.114233016967773,
|
|
"margin_dpo/margin_mean": 17.11423110961914,
|
|
"margin_dpo/margin_std": 19.97789764404297,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.19089574155653452,
|
|
"grad_norm": 48.8452033996582,
|
|
"learning_rate": 4.882355001067891e-07,
|
|
"logits/chosen": -0.648663341999054,
|
|
"logits/rejected": -0.6347514390945435,
|
|
"logps/chosen": -50.91802215576172,
|
|
"logps/ref_chosen": -44.666847229003906,
|
|
"logps/ref_rejected": -82.78165435791016,
|
|
"logps/rejected": -111.94617462158203,
|
|
"loss": 0.5896,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.20127610862255096,
|
|
"margin_dpo/beta_margin_grad_std": 0.2229228913784027,
|
|
"margin_dpo/beta_margin_mean": 2.291335105895996,
|
|
"margin_dpo/beta_margin_std": 2.0445499420166016,
|
|
"margin_dpo/loss_margin_mean": 22.913349151611328,
|
|
"margin_dpo/margin_mean": 22.913349151611328,
|
|
"margin_dpo/margin_std": 19.553049087524414,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.19236417033773862,
|
|
"grad_norm": 43.2736701965332,
|
|
"learning_rate": 4.878433179298909e-07,
|
|
"logits/chosen": -0.6537525057792664,
|
|
"logits/rejected": -0.6376811265945435,
|
|
"logps/chosen": -49.20851135253906,
|
|
"logps/ref_chosen": -44.92458724975586,
|
|
"logps/ref_rejected": -88.44401550292969,
|
|
"logps/rejected": -112.79965209960938,
|
|
"loss": 0.543,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.20455870032310486,
|
|
"margin_dpo/beta_margin_grad_std": 0.19169239699840546,
|
|
"margin_dpo/beta_margin_mean": 2.007171869277954,
|
|
"margin_dpo/beta_margin_std": 1.7163995504379272,
|
|
"margin_dpo/loss_margin_mean": 20.071718215942383,
|
|
"margin_dpo/margin_mean": 20.071718215942383,
|
|
"margin_dpo/margin_std": 17.120357513427734,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.19383259911894274,
|
|
"grad_norm": 51.05913543701172,
|
|
"learning_rate": 4.874448683603694e-07,
|
|
"logits/chosen": -0.7091637849807739,
|
|
"logits/rejected": -0.6840115785598755,
|
|
"logps/chosen": -65.47123718261719,
|
|
"logps/ref_chosen": -59.00108337402344,
|
|
"logps/ref_rejected": -87.89215087890625,
|
|
"logps/rejected": -113.50138854980469,
|
|
"loss": 0.5456,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.21537162363529205,
|
|
"margin_dpo/beta_margin_grad_std": 0.1776151806116104,
|
|
"margin_dpo/beta_margin_mean": 1.9139082431793213,
|
|
"margin_dpo/beta_margin_std": 1.733182668685913,
|
|
"margin_dpo/loss_margin_mean": 19.139081954956055,
|
|
"margin_dpo/margin_mean": 19.139083862304688,
|
|
"margin_dpo/margin_std": 17.268104553222656,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.19530102790014683,
|
|
"grad_norm": 56.10818862915039,
|
|
"learning_rate": 4.870401618977415e-07,
|
|
"logits/chosen": -0.6690283417701721,
|
|
"logits/rejected": -0.6472345590591431,
|
|
"logps/chosen": -74.25129699707031,
|
|
"logps/ref_chosen": -66.60449981689453,
|
|
"logps/ref_rejected": -96.33355712890625,
|
|
"logps/rejected": -121.84832000732422,
|
|
"loss": 0.71,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2546374201774597,
|
|
"margin_dpo/beta_margin_grad_std": 0.22272148728370667,
|
|
"margin_dpo/beta_margin_mean": 1.7867953777313232,
|
|
"margin_dpo/beta_margin_std": 1.953741431236267,
|
|
"margin_dpo/loss_margin_mean": 17.867952346801758,
|
|
"margin_dpo/margin_mean": 17.86795425415039,
|
|
"margin_dpo/margin_std": 19.506633758544922,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.19676945668135096,
|
|
"grad_norm": 42.318260192871094,
|
|
"learning_rate": 4.866292092063986e-07,
|
|
"logits/chosen": -0.6918569803237915,
|
|
"logits/rejected": -0.6574737429618835,
|
|
"logps/chosen": -56.94035339355469,
|
|
"logps/ref_chosen": -52.06925582885742,
|
|
"logps/ref_rejected": -87.6545181274414,
|
|
"logps/rejected": -111.83763122558594,
|
|
"loss": 0.4885,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.20181448757648468,
|
|
"margin_dpo/beta_margin_grad_std": 0.14965248107910156,
|
|
"margin_dpo/beta_margin_mean": 1.931201696395874,
|
|
"margin_dpo/beta_margin_std": 1.5649566650390625,
|
|
"margin_dpo/loss_margin_mean": 19.3120174407959,
|
|
"margin_dpo/margin_mean": 19.3120174407959,
|
|
"margin_dpo/margin_std": 15.627666473388672,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.19823788546255505,
|
|
"grad_norm": 54.5223274230957,
|
|
"learning_rate": 4.862120211153265e-07,
|
|
"logits/chosen": -0.6978960037231445,
|
|
"logits/rejected": -0.6953153610229492,
|
|
"logps/chosen": -58.12250518798828,
|
|
"logps/ref_chosen": -50.353858947753906,
|
|
"logps/ref_rejected": -115.97975158691406,
|
|
"logps/rejected": -144.46224975585938,
|
|
"loss": 0.5843,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.21617625653743744,
|
|
"margin_dpo/beta_margin_grad_std": 0.2016371190547943,
|
|
"margin_dpo/beta_margin_mean": 2.0713863372802734,
|
|
"margin_dpo/beta_margin_std": 1.9683783054351807,
|
|
"margin_dpo/loss_margin_mean": 20.713863372802734,
|
|
"margin_dpo/margin_mean": 20.713863372802734,
|
|
"margin_dpo/margin_std": 19.445152282714844,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.19970631424375918,
|
|
"grad_norm": 57.70625686645508,
|
|
"learning_rate": 4.857886086178193e-07,
|
|
"logits/chosen": -0.6800702810287476,
|
|
"logits/rejected": -0.6486399173736572,
|
|
"logps/chosen": -72.93949890136719,
|
|
"logps/ref_chosen": -65.072509765625,
|
|
"logps/ref_rejected": -96.32122802734375,
|
|
"logps/rejected": -120.78422546386719,
|
|
"loss": 0.6567,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2523123025894165,
|
|
"margin_dpo/beta_margin_grad_std": 0.1883687824010849,
|
|
"margin_dpo/beta_margin_mean": 1.6596003770828247,
|
|
"margin_dpo/beta_margin_std": 1.9312176704406738,
|
|
"margin_dpo/loss_margin_mean": 16.596004486083984,
|
|
"margin_dpo/margin_mean": 16.596004486083984,
|
|
"margin_dpo/margin_std": 18.42391014099121,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.2011747430249633,
|
|
"grad_norm": 60.66901779174805,
|
|
"learning_rate": 4.853589828711902e-07,
|
|
"logits/chosen": -0.661456823348999,
|
|
"logits/rejected": -0.6546590328216553,
|
|
"logps/chosen": -58.281639099121094,
|
|
"logps/ref_chosen": -48.759117126464844,
|
|
"logps/ref_rejected": -113.86377716064453,
|
|
"logps/rejected": -145.80894470214844,
|
|
"loss": 0.6241,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.219110906124115,
|
|
"margin_dpo/beta_margin_grad_std": 0.21998608112335205,
|
|
"margin_dpo/beta_margin_mean": 2.242264986038208,
|
|
"margin_dpo/beta_margin_std": 2.2972939014434814,
|
|
"margin_dpo/loss_margin_mean": 22.422649383544922,
|
|
"margin_dpo/margin_mean": 22.422649383544922,
|
|
"margin_dpo/margin_std": 22.739280700683594,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.2026431718061674,
|
|
"grad_norm": 69.17042541503906,
|
|
"learning_rate": 4.849231551964771e-07,
|
|
"logits/chosen": -0.6419689059257507,
|
|
"logits/rejected": -0.6141855716705322,
|
|
"logps/chosen": -69.76889038085938,
|
|
"logps/ref_chosen": -60.519649505615234,
|
|
"logps/ref_rejected": -93.19694519042969,
|
|
"logps/rejected": -120.97154998779297,
|
|
"loss": 0.6941,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.23004138469696045,
|
|
"margin_dpo/beta_margin_grad_std": 0.22523212432861328,
|
|
"margin_dpo/beta_margin_mean": 1.852536678314209,
|
|
"margin_dpo/beta_margin_std": 1.8633073568344116,
|
|
"margin_dpo/loss_margin_mean": 18.525365829467773,
|
|
"margin_dpo/margin_mean": 18.525367736816406,
|
|
"margin_dpo/margin_std": 18.522676467895508,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.20411160058737152,
|
|
"grad_norm": 49.38462448120117,
|
|
"learning_rate": 4.844811370781446e-07,
|
|
"logits/chosen": -0.664415717124939,
|
|
"logits/rejected": -0.6345181465148926,
|
|
"logps/chosen": -53.80146789550781,
|
|
"logps/ref_chosen": -46.89138412475586,
|
|
"logps/ref_rejected": -79.72798156738281,
|
|
"logps/rejected": -107.02542114257812,
|
|
"loss": 0.5486,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.21235616505146027,
|
|
"margin_dpo/beta_margin_grad_std": 0.1912960559129715,
|
|
"margin_dpo/beta_margin_mean": 2.038736343383789,
|
|
"margin_dpo/beta_margin_std": 1.8344343900680542,
|
|
"margin_dpo/loss_margin_mean": 20.38736343383789,
|
|
"margin_dpo/margin_mean": 20.38736343383789,
|
|
"margin_dpo/margin_std": 18.1939640045166,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.2055800293685756,
|
|
"grad_norm": 52.71585464477539,
|
|
"learning_rate": 4.840329401637809e-07,
|
|
"logits/chosen": -0.6803675889968872,
|
|
"logits/rejected": -0.6538151502609253,
|
|
"logps/chosen": -66.43865203857422,
|
|
"logps/ref_chosen": -58.97471618652344,
|
|
"logps/ref_rejected": -83.28411102294922,
|
|
"logps/rejected": -110.04861450195312,
|
|
"loss": 0.6767,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.24082183837890625,
|
|
"margin_dpo/beta_margin_grad_std": 0.22746782004833221,
|
|
"margin_dpo/beta_margin_mean": 1.930057406425476,
|
|
"margin_dpo/beta_margin_std": 1.9649643898010254,
|
|
"margin_dpo/loss_margin_mean": 19.300573348999023,
|
|
"margin_dpo/margin_mean": 19.300575256347656,
|
|
"margin_dpo/margin_std": 19.638553619384766,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.20704845814977973,
|
|
"grad_norm": 63.27604293823242,
|
|
"learning_rate": 4.83578576263792e-07,
|
|
"logits/chosen": -0.6606422662734985,
|
|
"logits/rejected": -0.6359848976135254,
|
|
"logps/chosen": -81.39404296875,
|
|
"logps/ref_chosen": -75.0756607055664,
|
|
"logps/ref_rejected": -98.1922607421875,
|
|
"logps/rejected": -123.7364730834961,
|
|
"loss": 0.6223,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.21940495073795319,
|
|
"margin_dpo/beta_margin_grad_std": 0.2201792299747467,
|
|
"margin_dpo/beta_margin_mean": 1.922583818435669,
|
|
"margin_dpo/beta_margin_std": 1.737042784690857,
|
|
"margin_dpo/loss_margin_mean": 19.22583770751953,
|
|
"margin_dpo/margin_mean": 19.22583770751953,
|
|
"margin_dpo/margin_std": 17.296768188476562,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.20851688693098386,
|
|
"grad_norm": 71.84093475341797,
|
|
"learning_rate": 4.83118057351089e-07,
|
|
"logits/chosen": -0.6464049816131592,
|
|
"logits/rejected": -0.6306595802307129,
|
|
"logps/chosen": -67.52783203125,
|
|
"logps/ref_chosen": -58.027931213378906,
|
|
"logps/ref_rejected": -94.58222198486328,
|
|
"logps/rejected": -124.05125427246094,
|
|
"loss": 0.7609,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.24063211679458618,
|
|
"margin_dpo/beta_margin_grad_std": 0.23835302889347076,
|
|
"margin_dpo/beta_margin_mean": 1.996911883354187,
|
|
"margin_dpo/beta_margin_std": 2.20531964302063,
|
|
"margin_dpo/loss_margin_mean": 19.969118118286133,
|
|
"margin_dpo/margin_mean": 19.969120025634766,
|
|
"margin_dpo/margin_std": 21.67835235595703,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.20998531571218795,
|
|
"grad_norm": 71.78072357177734,
|
|
"learning_rate": 4.826513955607734e-07,
|
|
"logits/chosen": -0.6453176736831665,
|
|
"logits/rejected": -0.6054332256317139,
|
|
"logps/chosen": -66.23277282714844,
|
|
"logps/ref_chosen": -57.59645080566406,
|
|
"logps/ref_rejected": -78.99957275390625,
|
|
"logps/rejected": -102.97239685058594,
|
|
"loss": 0.8411,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2758726179599762,
|
|
"margin_dpo/beta_margin_grad_std": 0.2487667053937912,
|
|
"margin_dpo/beta_margin_mean": 1.5336509943008423,
|
|
"margin_dpo/beta_margin_std": 1.868659257888794,
|
|
"margin_dpo/loss_margin_mean": 15.336509704589844,
|
|
"margin_dpo/margin_mean": 15.336509704589844,
|
|
"margin_dpo/margin_std": 18.46063232421875,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.21145374449339208,
|
|
"grad_norm": 43.579097747802734,
|
|
"learning_rate": 4.821786031898176e-07,
|
|
"logits/chosen": -0.6609046459197998,
|
|
"logits/rejected": -0.614149808883667,
|
|
"logps/chosen": -66.0463638305664,
|
|
"logps/ref_chosen": -59.90636444091797,
|
|
"logps/ref_rejected": -82.00025939941406,
|
|
"logps/rejected": -107.83518981933594,
|
|
"loss": 0.5331,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.20229505002498627,
|
|
"margin_dpo/beta_margin_grad_std": 0.19436682760715485,
|
|
"margin_dpo/beta_margin_mean": 1.9694933891296387,
|
|
"margin_dpo/beta_margin_std": 1.6196097135543823,
|
|
"margin_dpo/loss_margin_mean": 19.694934844970703,
|
|
"margin_dpo/margin_mean": 19.694934844970703,
|
|
"margin_dpo/margin_std": 16.18764305114746,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.21292217327459617,
|
|
"grad_norm": 47.01042175292969,
|
|
"learning_rate": 4.816996926967401e-07,
|
|
"logits/chosen": -0.655602216720581,
|
|
"logits/rejected": -0.6127992868423462,
|
|
"logps/chosen": -64.15069580078125,
|
|
"logps/ref_chosen": -56.60066604614258,
|
|
"logps/ref_rejected": -77.86631774902344,
|
|
"logps/rejected": -105.38958740234375,
|
|
"loss": 0.5577,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.21516340970993042,
|
|
"margin_dpo/beta_margin_grad_std": 0.1895509660243988,
|
|
"margin_dpo/beta_margin_mean": 1.9973245859146118,
|
|
"margin_dpo/beta_margin_std": 1.822784185409546,
|
|
"margin_dpo/loss_margin_mean": 19.97324562072754,
|
|
"margin_dpo/margin_mean": 19.97324562072754,
|
|
"margin_dpo/margin_std": 18.10959815979004,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.2143906020558003,
|
|
"grad_norm": 70.7383041381836,
|
|
"learning_rate": 4.812146767012779e-07,
|
|
"logits/chosen": -0.6367348432540894,
|
|
"logits/rejected": -0.5759721398353577,
|
|
"logps/chosen": -76.57098388671875,
|
|
"logps/ref_chosen": -66.00045776367188,
|
|
"logps/ref_rejected": -81.70278930664062,
|
|
"logps/rejected": -108.75936126708984,
|
|
"loss": 0.7169,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2570827901363373,
|
|
"margin_dpo/beta_margin_grad_std": 0.22340109944343567,
|
|
"margin_dpo/beta_margin_mean": 1.6486042737960815,
|
|
"margin_dpo/beta_margin_std": 1.7461858987808228,
|
|
"margin_dpo/loss_margin_mean": 16.486042022705078,
|
|
"margin_dpo/margin_mean": 16.486042022705078,
|
|
"margin_dpo/margin_std": 17.46074676513672,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.21585903083700442,
|
|
"grad_norm": 58.0555305480957,
|
|
"learning_rate": 4.807235679840536e-07,
|
|
"logits/chosen": -0.6206094622612,
|
|
"logits/rejected": -0.5745464563369751,
|
|
"logps/chosen": -61.8310432434082,
|
|
"logps/ref_chosen": -53.405487060546875,
|
|
"logps/ref_rejected": -71.39061737060547,
|
|
"logps/rejected": -100.01313781738281,
|
|
"loss": 0.57,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.21462920308113098,
|
|
"margin_dpo/beta_margin_grad_std": 0.19549627602100372,
|
|
"margin_dpo/beta_margin_mean": 2.0196962356567383,
|
|
"margin_dpo/beta_margin_std": 1.897796630859375,
|
|
"margin_dpo/loss_margin_mean": 20.196962356567383,
|
|
"margin_dpo/margin_mean": 20.19696044921875,
|
|
"margin_dpo/margin_std": 18.958223342895508,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.2173274596182085,
|
|
"grad_norm": 50.468204498291016,
|
|
"learning_rate": 4.802263794862384e-07,
|
|
"logits/chosen": -0.6692857146263123,
|
|
"logits/rejected": -0.6372880935668945,
|
|
"logps/chosen": -71.55451202392578,
|
|
"logps/ref_chosen": -64.93708038330078,
|
|
"logps/ref_rejected": -103.09384155273438,
|
|
"logps/rejected": -125.87773132324219,
|
|
"loss": 0.6722,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.252309113740921,
|
|
"margin_dpo/beta_margin_grad_std": 0.19528046250343323,
|
|
"margin_dpo/beta_margin_mean": 1.6166454553604126,
|
|
"margin_dpo/beta_margin_std": 1.6389704942703247,
|
|
"margin_dpo/loss_margin_mean": 16.166454315185547,
|
|
"margin_dpo/margin_mean": 16.166454315185547,
|
|
"margin_dpo/margin_std": 15.465744018554688,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.21879588839941264,
|
|
"grad_norm": 43.54497146606445,
|
|
"learning_rate": 4.797231243092118e-07,
|
|
"logits/chosen": -0.68845534324646,
|
|
"logits/rejected": -0.6588988304138184,
|
|
"logps/chosen": -65.29835510253906,
|
|
"logps/ref_chosen": -58.47376251220703,
|
|
"logps/ref_rejected": -99.31474304199219,
|
|
"logps/rejected": -126.82577514648438,
|
|
"loss": 0.5076,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.19679757952690125,
|
|
"margin_dpo/beta_margin_grad_std": 0.18511685729026794,
|
|
"margin_dpo/beta_margin_mean": 2.068645477294922,
|
|
"margin_dpo/beta_margin_std": 1.6941298246383667,
|
|
"margin_dpo/loss_margin_mean": 20.686452865600586,
|
|
"margin_dpo/margin_mean": 20.68645477294922,
|
|
"margin_dpo/margin_std": 16.817272186279297,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.22026431718061673,
|
|
"grad_norm": 57.30229187011719,
|
|
"learning_rate": 4.792138157142157e-07,
|
|
"logits/chosen": -0.6421929001808167,
|
|
"logits/rejected": -0.6197161674499512,
|
|
"logps/chosen": -52.55424118041992,
|
|
"logps/ref_chosen": -45.705810546875,
|
|
"logps/ref_rejected": -83.34759521484375,
|
|
"logps/rejected": -109.71247863769531,
|
|
"loss": 0.614,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.22440461814403534,
|
|
"margin_dpo/beta_margin_grad_std": 0.20524847507476807,
|
|
"margin_dpo/beta_margin_mean": 1.9516453742980957,
|
|
"margin_dpo/beta_margin_std": 1.9830421209335327,
|
|
"margin_dpo/loss_margin_mean": 19.51645278930664,
|
|
"margin_dpo/margin_mean": 19.51645278930664,
|
|
"margin_dpo/margin_std": 19.350839614868164,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.22173274596182085,
|
|
"grad_norm": 51.592350006103516,
|
|
"learning_rate": 4.786984671220053e-07,
|
|
"logits/chosen": -0.7092918157577515,
|
|
"logits/rejected": -0.6702842712402344,
|
|
"logps/chosen": -77.94678497314453,
|
|
"logps/ref_chosen": -70.57083129882812,
|
|
"logps/ref_rejected": -100.46382141113281,
|
|
"logps/rejected": -129.22677612304688,
|
|
"loss": 0.5158,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1952391117811203,
|
|
"margin_dpo/beta_margin_grad_std": 0.1918613314628601,
|
|
"margin_dpo/beta_margin_mean": 2.138700008392334,
|
|
"margin_dpo/beta_margin_std": 1.8111618757247925,
|
|
"margin_dpo/loss_margin_mean": 21.386999130249023,
|
|
"margin_dpo/margin_mean": 21.38699722290039,
|
|
"margin_dpo/margin_std": 18.097902297973633,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.22320117474302498,
|
|
"grad_norm": 51.7116813659668,
|
|
"learning_rate": 4.78177092112495e-07,
|
|
"logits/chosen": -0.6578500270843506,
|
|
"logits/rejected": -0.6298225522041321,
|
|
"logps/chosen": -65.78941345214844,
|
|
"logps/ref_chosen": -60.164390563964844,
|
|
"logps/ref_rejected": -106.14045715332031,
|
|
"logps/rejected": -133.95053100585938,
|
|
"loss": 0.4999,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18865174055099487,
|
|
"margin_dpo/beta_margin_grad_std": 0.184275820851326,
|
|
"margin_dpo/beta_margin_mean": 2.2185051441192627,
|
|
"margin_dpo/beta_margin_std": 1.9241358041763306,
|
|
"margin_dpo/loss_margin_mean": 22.185049057006836,
|
|
"margin_dpo/margin_mean": 22.18505096435547,
|
|
"margin_dpo/margin_std": 18.43682098388672,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.22466960352422907,
|
|
"grad_norm": 43.9327507019043,
|
|
"learning_rate": 4.776497044244016e-07,
|
|
"logits/chosen": -0.7056645154953003,
|
|
"logits/rejected": -0.6881464719772339,
|
|
"logps/chosen": -62.922088623046875,
|
|
"logps/ref_chosen": -56.315277099609375,
|
|
"logps/ref_rejected": -85.65583801269531,
|
|
"logps/rejected": -111.08541870117188,
|
|
"loss": 0.6484,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2367786020040512,
|
|
"margin_dpo/beta_margin_grad_std": 0.21445007622241974,
|
|
"margin_dpo/beta_margin_mean": 1.8822778463363647,
|
|
"margin_dpo/beta_margin_std": 1.9090057611465454,
|
|
"margin_dpo/loss_margin_mean": 18.822778701782227,
|
|
"margin_dpo/margin_mean": 18.82278060913086,
|
|
"margin_dpo/margin_std": 19.06036376953125,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.2261380323054332,
|
|
"grad_norm": 70.27942657470703,
|
|
"learning_rate": 4.771163179548808e-07,
|
|
"logits/chosen": -0.6403573751449585,
|
|
"logits/rejected": -0.6136635541915894,
|
|
"logps/chosen": -71.216796875,
|
|
"logps/ref_chosen": -62.74256896972656,
|
|
"logps/ref_rejected": -104.24420166015625,
|
|
"logps/rejected": -131.48008728027344,
|
|
"loss": 0.7131,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.24161392450332642,
|
|
"margin_dpo/beta_margin_grad_std": 0.24336253106594086,
|
|
"margin_dpo/beta_margin_mean": 1.8761656284332275,
|
|
"margin_dpo/beta_margin_std": 1.9334018230438232,
|
|
"margin_dpo/loss_margin_mean": 18.761655807495117,
|
|
"margin_dpo/margin_mean": 18.761653900146484,
|
|
"margin_dpo/margin_std": 18.826614379882812,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.2276064610866373,
|
|
"grad_norm": 54.39125061035156,
|
|
"learning_rate": 4.7657694675916247e-07,
|
|
"logits/chosen": -0.6604463458061218,
|
|
"logits/rejected": -0.6317785978317261,
|
|
"logps/chosen": -66.77053833007812,
|
|
"logps/ref_chosen": -60.65318298339844,
|
|
"logps/ref_rejected": -77.49220275878906,
|
|
"logps/rejected": -103.34844207763672,
|
|
"loss": 0.5756,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.20776690542697906,
|
|
"margin_dpo/beta_margin_grad_std": 0.2009819895029068,
|
|
"margin_dpo/beta_margin_mean": 1.9738881587982178,
|
|
"margin_dpo/beta_margin_std": 1.7747788429260254,
|
|
"margin_dpo/loss_margin_mean": 19.738882064819336,
|
|
"margin_dpo/margin_mean": 19.738880157470703,
|
|
"margin_dpo/margin_std": 17.563674926757812,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.2290748898678414,
|
|
"grad_norm": 87.28961944580078,
|
|
"learning_rate": 4.7603160505017893e-07,
|
|
"logits/chosen": -0.6653755903244019,
|
|
"logits/rejected": -0.636063814163208,
|
|
"logps/chosen": -79.47747802734375,
|
|
"logps/ref_chosen": -69.49188232421875,
|
|
"logps/ref_rejected": -77.1692886352539,
|
|
"logps/rejected": -102.39488220214844,
|
|
"loss": 0.9429,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.27911999821662903,
|
|
"margin_dpo/beta_margin_grad_std": 0.26404860615730286,
|
|
"margin_dpo/beta_margin_mean": 1.523999810218811,
|
|
"margin_dpo/beta_margin_std": 2.001671075820923,
|
|
"margin_dpo/loss_margin_mean": 15.239997863769531,
|
|
"margin_dpo/margin_mean": 15.239997863769531,
|
|
"margin_dpo/margin_std": 19.949081420898438,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.2305433186490455,
|
|
"grad_norm": 63.12745666503906,
|
|
"learning_rate": 4.7548030719819154e-07,
|
|
"logits/chosen": -0.7227987051010132,
|
|
"logits/rejected": -0.6926777362823486,
|
|
"logps/chosen": -71.74752807617188,
|
|
"logps/ref_chosen": -61.368438720703125,
|
|
"logps/ref_rejected": -107.64636993408203,
|
|
"logps/rejected": -139.5974578857422,
|
|
"loss": 0.545,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2063656747341156,
|
|
"margin_dpo/beta_margin_grad_std": 0.20212943851947784,
|
|
"margin_dpo/beta_margin_mean": 2.1572000980377197,
|
|
"margin_dpo/beta_margin_std": 1.928480863571167,
|
|
"margin_dpo/loss_margin_mean": 21.57200050354004,
|
|
"margin_dpo/margin_mean": 21.57200050354004,
|
|
"margin_dpo/margin_std": 19.126846313476562,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.23201174743024963,
|
|
"grad_norm": 46.42555236816406,
|
|
"learning_rate": 4.7492306773041136e-07,
|
|
"logits/chosen": -0.6610178351402283,
|
|
"logits/rejected": -0.6375807523727417,
|
|
"logps/chosen": -65.13560485839844,
|
|
"logps/ref_chosen": -57.61292266845703,
|
|
"logps/ref_rejected": -113.6946792602539,
|
|
"logps/rejected": -143.13510131835938,
|
|
"loss": 0.564,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.21795856952667236,
|
|
"margin_dpo/beta_margin_grad_std": 0.196962371468544,
|
|
"margin_dpo/beta_margin_mean": 2.191772937774658,
|
|
"margin_dpo/beta_margin_std": 2.1438300609588623,
|
|
"margin_dpo/loss_margin_mean": 21.917728424072266,
|
|
"margin_dpo/margin_mean": 21.917728424072266,
|
|
"margin_dpo/margin_std": 21.403545379638672,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.23348017621145375,
|
|
"grad_norm": 53.46623229980469,
|
|
"learning_rate": 4.743599013306165e-07,
|
|
"logits/chosen": -0.6735081076622009,
|
|
"logits/rejected": -0.6329150199890137,
|
|
"logps/chosen": -89.84956359863281,
|
|
"logps/ref_chosen": -81.56034088134766,
|
|
"logps/ref_rejected": -88.8987045288086,
|
|
"logps/rejected": -116.82539367675781,
|
|
"loss": 0.6587,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.23721808195114136,
|
|
"margin_dpo/beta_margin_grad_std": 0.22351770102977753,
|
|
"margin_dpo/beta_margin_mean": 1.9637459516525269,
|
|
"margin_dpo/beta_margin_std": 2.0596272945404053,
|
|
"margin_dpo/loss_margin_mean": 19.63745880126953,
|
|
"margin_dpo/margin_mean": 19.63745880126953,
|
|
"margin_dpo/margin_std": 20.491199493408203,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.23494860499265785,
|
|
"grad_norm": 58.52571105957031,
|
|
"learning_rate": 4.737908228387656e-07,
|
|
"logits/chosen": -0.6944263577461243,
|
|
"logits/rejected": -0.6529999375343323,
|
|
"logps/chosen": -74.34519958496094,
|
|
"logps/ref_chosen": -65.73088073730469,
|
|
"logps/ref_rejected": -97.21781921386719,
|
|
"logps/rejected": -125.64112854003906,
|
|
"loss": 0.6916,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.24713920056819916,
|
|
"margin_dpo/beta_margin_grad_std": 0.22890552878379822,
|
|
"margin_dpo/beta_margin_mean": 1.980899691581726,
|
|
"margin_dpo/beta_margin_std": 2.211510419845581,
|
|
"margin_dpo/loss_margin_mean": 19.808996200561523,
|
|
"margin_dpo/margin_mean": 19.808996200561523,
|
|
"margin_dpo/margin_std": 22.036544799804688,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.23641703377386197,
|
|
"grad_norm": 54.677059173583984,
|
|
"learning_rate": 4.7321584725060594e-07,
|
|
"logits/chosen": -0.6853748559951782,
|
|
"logits/rejected": -0.6557826995849609,
|
|
"logps/chosen": -60.53142547607422,
|
|
"logps/ref_chosen": -52.43647766113281,
|
|
"logps/ref_rejected": -83.43095397949219,
|
|
"logps/rejected": -111.55258178710938,
|
|
"loss": 0.646,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.23030346632003784,
|
|
"margin_dpo/beta_margin_grad_std": 0.22901977598667145,
|
|
"margin_dpo/beta_margin_mean": 2.0026674270629883,
|
|
"margin_dpo/beta_margin_std": 1.9912761449813843,
|
|
"margin_dpo/loss_margin_mean": 20.026674270629883,
|
|
"margin_dpo/margin_mean": 20.026676177978516,
|
|
"margin_dpo/margin_std": 19.802610397338867,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.23788546255506607,
|
|
"grad_norm": 46.158084869384766,
|
|
"learning_rate": 4.7263498971727905e-07,
|
|
"logits/chosen": -0.6514345407485962,
|
|
"logits/rejected": -0.6138795614242554,
|
|
"logps/chosen": -70.659423828125,
|
|
"logps/ref_chosen": -62.61058807373047,
|
|
"logps/ref_rejected": -89.39057922363281,
|
|
"logps/rejected": -116.6668701171875,
|
|
"loss": 0.6432,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.23401562869548798,
|
|
"margin_dpo/beta_margin_grad_std": 0.21692141890525818,
|
|
"margin_dpo/beta_margin_mean": 1.9227447509765625,
|
|
"margin_dpo/beta_margin_std": 2.0753374099731445,
|
|
"margin_dpo/loss_margin_mean": 19.227447509765625,
|
|
"margin_dpo/margin_mean": 19.227447509765625,
|
|
"margin_dpo/margin_std": 19.711488723754883,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.2393538913362702,
|
|
"grad_norm": 40.664859771728516,
|
|
"learning_rate": 4.720482655449212e-07,
|
|
"logits/chosen": -0.6877887845039368,
|
|
"logits/rejected": -0.6518304944038391,
|
|
"logps/chosen": -62.216094970703125,
|
|
"logps/ref_chosen": -55.021629333496094,
|
|
"logps/ref_rejected": -75.41822052001953,
|
|
"logps/rejected": -101.20086669921875,
|
|
"loss": 0.6,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2209862768650055,
|
|
"margin_dpo/beta_margin_grad_std": 0.20438379049301147,
|
|
"margin_dpo/beta_margin_mean": 1.858818531036377,
|
|
"margin_dpo/beta_margin_std": 1.692821979522705,
|
|
"margin_dpo/loss_margin_mean": 18.588184356689453,
|
|
"margin_dpo/margin_mean": 18.588184356689453,
|
|
"margin_dpo/margin_std": 16.888572692871094,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.24082232011747431,
|
|
"grad_norm": 37.478580474853516,
|
|
"learning_rate": 4.714556901942599e-07,
|
|
"logits/chosen": -0.6735349297523499,
|
|
"logits/rejected": -0.6281991004943848,
|
|
"logps/chosen": -61.48329162597656,
|
|
"logps/ref_chosen": -55.64066696166992,
|
|
"logps/ref_rejected": -79.66463470458984,
|
|
"logps/rejected": -106.4957275390625,
|
|
"loss": 0.4672,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18233349919319153,
|
|
"margin_dpo/beta_margin_grad_std": 0.17561647295951843,
|
|
"margin_dpo/beta_margin_mean": 2.0988471508026123,
|
|
"margin_dpo/beta_margin_std": 1.599080204963684,
|
|
"margin_dpo/loss_margin_mean": 20.98847007751465,
|
|
"margin_dpo/margin_mean": 20.98847007751465,
|
|
"margin_dpo/margin_std": 15.165935516357422,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.2422907488986784,
|
|
"grad_norm": 59.977012634277344,
|
|
"learning_rate": 4.708572792802069e-07,
|
|
"logits/chosen": -0.6780301332473755,
|
|
"logits/rejected": -0.6259827613830566,
|
|
"logps/chosen": -69.79907989501953,
|
|
"logps/ref_chosen": -61.310691833496094,
|
|
"logps/ref_rejected": -73.67060852050781,
|
|
"logps/rejected": -96.30776977539062,
|
|
"loss": 0.7391,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2699863314628601,
|
|
"margin_dpo/beta_margin_grad_std": 0.2010025978088379,
|
|
"margin_dpo/beta_margin_mean": 1.4148781299591064,
|
|
"margin_dpo/beta_margin_std": 1.4906260967254639,
|
|
"margin_dpo/loss_margin_mean": 14.148780822753906,
|
|
"margin_dpo/margin_mean": 14.148780822753906,
|
|
"margin_dpo/margin_std": 14.75755500793457,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.24375917767988253,
|
|
"grad_norm": 45.09745407104492,
|
|
"learning_rate": 4.702530485714461e-07,
|
|
"logits/chosen": -0.6649864912033081,
|
|
"logits/rejected": -0.6568000316619873,
|
|
"logps/chosen": -59.23976516723633,
|
|
"logps/ref_chosen": -50.98360061645508,
|
|
"logps/ref_rejected": -98.09512329101562,
|
|
"logps/rejected": -129.61912536621094,
|
|
"loss": 0.5314,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.20547892153263092,
|
|
"margin_dpo/beta_margin_grad_std": 0.18839870393276215,
|
|
"margin_dpo/beta_margin_mean": 2.3267838954925537,
|
|
"margin_dpo/beta_margin_std": 2.3729536533355713,
|
|
"margin_dpo/loss_margin_mean": 23.267839431762695,
|
|
"margin_dpo/margin_mean": 23.267839431762695,
|
|
"margin_dpo/margin_std": 23.49030876159668,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.24522760646108663,
|
|
"grad_norm": 55.43495178222656,
|
|
"learning_rate": 4.6964301399001877e-07,
|
|
"logits/chosen": -0.6117781400680542,
|
|
"logits/rejected": -0.5964827537536621,
|
|
"logps/chosen": -58.788291931152344,
|
|
"logps/ref_chosen": -50.42409133911133,
|
|
"logps/ref_rejected": -96.03042602539062,
|
|
"logps/rejected": -128.52084350585938,
|
|
"loss": 0.4979,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1734393686056137,
|
|
"margin_dpo/beta_margin_grad_std": 0.19911234080791473,
|
|
"margin_dpo/beta_margin_mean": 2.4126217365264893,
|
|
"margin_dpo/beta_margin_std": 1.9151932001113892,
|
|
"margin_dpo/loss_margin_mean": 24.126218795776367,
|
|
"margin_dpo/margin_mean": 24.126216888427734,
|
|
"margin_dpo/margin_std": 18.741436004638672,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.24669603524229075,
|
|
"grad_norm": 47.17963790893555,
|
|
"learning_rate": 4.690271916109034e-07,
|
|
"logits/chosen": -0.6795276403427124,
|
|
"logits/rejected": -0.6451242566108704,
|
|
"logps/chosen": -57.11155700683594,
|
|
"logps/ref_chosen": -49.46282196044922,
|
|
"logps/ref_rejected": -75.30854797363281,
|
|
"logps/rejected": -101.9991455078125,
|
|
"loss": 0.5363,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2067001909017563,
|
|
"margin_dpo/beta_margin_grad_std": 0.18657654523849487,
|
|
"margin_dpo/beta_margin_mean": 1.9041860103607178,
|
|
"margin_dpo/beta_margin_std": 1.539984107017517,
|
|
"margin_dpo/loss_margin_mean": 19.041860580444336,
|
|
"margin_dpo/margin_mean": 19.041860580444336,
|
|
"margin_dpo/margin_std": 15.281282424926758,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.24816446402349487,
|
|
"grad_norm": 54.283538818359375,
|
|
"learning_rate": 4.6840559766159235e-07,
|
|
"logits/chosen": -0.6633504629135132,
|
|
"logits/rejected": -0.633745551109314,
|
|
"logps/chosen": -67.26417541503906,
|
|
"logps/ref_chosen": -59.803443908691406,
|
|
"logps/ref_rejected": -83.34574890136719,
|
|
"logps/rejected": -108.05218505859375,
|
|
"loss": 0.7725,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.27190113067626953,
|
|
"margin_dpo/beta_margin_grad_std": 0.22907379269599915,
|
|
"margin_dpo/beta_margin_mean": 1.7245699167251587,
|
|
"margin_dpo/beta_margin_std": 2.196559429168701,
|
|
"margin_dpo/loss_margin_mean": 17.245698928833008,
|
|
"margin_dpo/margin_mean": 17.245698928833008,
|
|
"margin_dpo/margin_std": 21.799476623535156,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.24963289280469897,
|
|
"grad_norm": 47.430335998535156,
|
|
"learning_rate": 4.6777824852166437e-07,
|
|
"logits/chosen": -0.6211998462677002,
|
|
"logits/rejected": -0.596994161605835,
|
|
"logps/chosen": -55.63056182861328,
|
|
"logps/ref_chosen": -49.471771240234375,
|
|
"logps/ref_rejected": -75.91734313964844,
|
|
"logps/rejected": -103.64749145507812,
|
|
"loss": 0.5665,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2037757784128189,
|
|
"margin_dpo/beta_margin_grad_std": 0.20665040612220764,
|
|
"margin_dpo/beta_margin_mean": 2.1571362018585205,
|
|
"margin_dpo/beta_margin_std": 1.8903136253356934,
|
|
"margin_dpo/loss_margin_mean": 21.571361541748047,
|
|
"margin_dpo/margin_mean": 21.571361541748047,
|
|
"margin_dpo/margin_std": 18.570228576660156,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.2511013215859031,
|
|
"grad_norm": 62.58616256713867,
|
|
"learning_rate": 4.6714516072235273e-07,
|
|
"logits/chosen": -0.6630641222000122,
|
|
"logits/rejected": -0.6101676225662231,
|
|
"logps/chosen": -92.2599868774414,
|
|
"logps/ref_chosen": -84.49931335449219,
|
|
"logps/ref_rejected": -109.38209533691406,
|
|
"logps/rejected": -135.90167236328125,
|
|
"loss": 0.6488,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.23200581967830658,
|
|
"margin_dpo/beta_margin_grad_std": 0.21404391527175903,
|
|
"margin_dpo/beta_margin_mean": 1.8758889436721802,
|
|
"margin_dpo/beta_margin_std": 1.987282395362854,
|
|
"margin_dpo/loss_margin_mean": 18.75889015197754,
|
|
"margin_dpo/margin_mean": 18.75889015197754,
|
|
"margin_dpo/margin_std": 19.822465896606445,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.2525697503671072,
|
|
"grad_norm": 65.06268310546875,
|
|
"learning_rate": 4.6650635094610966e-07,
|
|
"logits/chosen": -0.7175358533859253,
|
|
"logits/rejected": -0.6748548746109009,
|
|
"logps/chosen": -78.930419921875,
|
|
"logps/ref_chosen": -68.65391540527344,
|
|
"logps/ref_rejected": -85.43667602539062,
|
|
"logps/rejected": -113.89852905273438,
|
|
"loss": 0.6925,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.23910076916217804,
|
|
"margin_dpo/beta_margin_grad_std": 0.23038098216056824,
|
|
"margin_dpo/beta_margin_mean": 1.8185348510742188,
|
|
"margin_dpo/beta_margin_std": 1.889103651046753,
|
|
"margin_dpo/loss_margin_mean": 18.185348510742188,
|
|
"margin_dpo/margin_mean": 18.185348510742188,
|
|
"margin_dpo/margin_std": 18.722644805908203,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.2540381791483113,
|
|
"grad_norm": 50.819210052490234,
|
|
"learning_rate": 4.6586183602616687e-07,
|
|
"logits/chosen": -0.7069448828697205,
|
|
"logits/rejected": -0.6491652131080627,
|
|
"logps/chosen": -70.72373962402344,
|
|
"logps/ref_chosen": -63.050872802734375,
|
|
"logps/ref_rejected": -78.68392944335938,
|
|
"logps/rejected": -104.677490234375,
|
|
"loss": 0.6217,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.22918951511383057,
|
|
"margin_dpo/beta_margin_grad_std": 0.2061556577682495,
|
|
"margin_dpo/beta_margin_mean": 1.8320705890655518,
|
|
"margin_dpo/beta_margin_std": 1.7518055438995361,
|
|
"margin_dpo/loss_margin_mean": 18.32070541381836,
|
|
"margin_dpo/margin_mean": 18.32070541381836,
|
|
"margin_dpo/margin_std": 17.509033203125,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.2555066079295154,
|
|
"grad_norm": 52.17148971557617,
|
|
"learning_rate": 4.652116329460919e-07,
|
|
"logits/chosen": -0.6733288168907166,
|
|
"logits/rejected": -0.6736022233963013,
|
|
"logps/chosen": -62.04674530029297,
|
|
"logps/ref_chosen": -53.36296844482422,
|
|
"logps/ref_rejected": -101.91120910644531,
|
|
"logps/rejected": -129.01959228515625,
|
|
"loss": 0.6537,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.23130814731121063,
|
|
"margin_dpo/beta_margin_grad_std": 0.2197827398777008,
|
|
"margin_dpo/beta_margin_mean": 1.8424601554870605,
|
|
"margin_dpo/beta_margin_std": 1.8221278190612793,
|
|
"margin_dpo/loss_margin_mean": 18.42460060119629,
|
|
"margin_dpo/margin_mean": 18.42460060119629,
|
|
"margin_dpo/margin_std": 17.7147216796875,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.25697503671071953,
|
|
"grad_norm": 49.36385726928711,
|
|
"learning_rate": 4.645557588393406e-07,
|
|
"logits/chosen": -0.6248526573181152,
|
|
"logits/rejected": -0.6001460552215576,
|
|
"logps/chosen": -52.519676208496094,
|
|
"logps/ref_chosen": -45.417762756347656,
|
|
"logps/ref_rejected": -89.50579833984375,
|
|
"logps/rejected": -120.34637451171875,
|
|
"loss": 0.4427,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18073977530002594,
|
|
"margin_dpo/beta_margin_grad_std": 0.16047413647174835,
|
|
"margin_dpo/beta_margin_mean": 2.3738653659820557,
|
|
"margin_dpo/beta_margin_std": 1.9637548923492432,
|
|
"margin_dpo/loss_margin_mean": 23.7386531829834,
|
|
"margin_dpo/margin_mean": 23.7386531829834,
|
|
"margin_dpo/margin_std": 19.49551010131836,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.25844346549192365,
|
|
"grad_norm": 43.94850540161133,
|
|
"learning_rate": 4.638942309888058e-07,
|
|
"logits/chosen": -0.6811977028846741,
|
|
"logits/rejected": -0.6696683764457703,
|
|
"logps/chosen": -57.92758560180664,
|
|
"logps/ref_chosen": -50.45283889770508,
|
|
"logps/ref_rejected": -95.55896759033203,
|
|
"logps/rejected": -124.97660827636719,
|
|
"loss": 0.4848,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1871013045310974,
|
|
"margin_dpo/beta_margin_grad_std": 0.18806670606136322,
|
|
"margin_dpo/beta_margin_mean": 2.194288969039917,
|
|
"margin_dpo/beta_margin_std": 1.7437057495117188,
|
|
"margin_dpo/loss_margin_mean": 21.942888259887695,
|
|
"margin_dpo/margin_mean": 21.942890167236328,
|
|
"margin_dpo/margin_std": 17.408954620361328,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.2599118942731278,
|
|
"grad_norm": 41.124366760253906,
|
|
"learning_rate": 4.6322706682636137e-07,
|
|
"logits/chosen": -0.6584987640380859,
|
|
"logits/rejected": -0.6148337125778198,
|
|
"logps/chosen": -70.50564575195312,
|
|
"logps/ref_chosen": -61.21646499633789,
|
|
"logps/ref_rejected": -95.89378356933594,
|
|
"logps/rejected": -127.14639282226562,
|
|
"loss": 0.5412,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.20711472630500793,
|
|
"margin_dpo/beta_margin_grad_std": 0.19701464474201202,
|
|
"margin_dpo/beta_margin_mean": 2.196343183517456,
|
|
"margin_dpo/beta_margin_std": 2.060377359390259,
|
|
"margin_dpo/loss_margin_mean": 21.963430404663086,
|
|
"margin_dpo/margin_mean": 21.963428497314453,
|
|
"margin_dpo/margin_std": 20.492202758789062,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.26138032305433184,
|
|
"grad_norm": 59.32670974731445,
|
|
"learning_rate": 4.6255428393240354e-07,
|
|
"logits/chosen": -0.6575347185134888,
|
|
"logits/rejected": -0.6636496186256409,
|
|
"logps/chosen": -70.52571868896484,
|
|
"logps/ref_chosen": -58.26478958129883,
|
|
"logps/ref_rejected": -105.36532592773438,
|
|
"logps/rejected": -142.11297607421875,
|
|
"loss": 0.5441,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.19805437326431274,
|
|
"margin_dpo/beta_margin_grad_std": 0.2140408307313919,
|
|
"margin_dpo/beta_margin_mean": 2.4486706256866455,
|
|
"margin_dpo/beta_margin_std": 2.2665319442749023,
|
|
"margin_dpo/loss_margin_mean": 24.48670768737793,
|
|
"margin_dpo/margin_mean": 24.486705780029297,
|
|
"margin_dpo/margin_std": 22.613819122314453,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.26284875183553597,
|
|
"grad_norm": 72.6666488647461,
|
|
"learning_rate": 4.6187590003538724e-07,
|
|
"logits/chosen": -0.6500136256217957,
|
|
"logits/rejected": -0.6252506971359253,
|
|
"logps/chosen": -72.29452514648438,
|
|
"logps/ref_chosen": -61.05832290649414,
|
|
"logps/ref_rejected": -90.52782440185547,
|
|
"logps/rejected": -126.22462463378906,
|
|
"loss": 0.6795,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.21323637664318085,
|
|
"margin_dpo/beta_margin_grad_std": 0.2565738558769226,
|
|
"margin_dpo/beta_margin_mean": 2.4460597038269043,
|
|
"margin_dpo/beta_margin_std": 2.377058982849121,
|
|
"margin_dpo/loss_margin_mean": 24.460596084594727,
|
|
"margin_dpo/margin_mean": 24.460596084594727,
|
|
"margin_dpo/margin_std": 23.486286163330078,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.2643171806167401,
|
|
"grad_norm": 45.02251052856445,
|
|
"learning_rate": 4.611919330113591e-07,
|
|
"logits/chosen": -0.6473774909973145,
|
|
"logits/rejected": -0.6168711185455322,
|
|
"logps/chosen": -63.067901611328125,
|
|
"logps/ref_chosen": -54.34272003173828,
|
|
"logps/ref_rejected": -98.21183776855469,
|
|
"logps/rejected": -131.53292846679688,
|
|
"loss": 0.455,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.17352601885795593,
|
|
"margin_dpo/beta_margin_grad_std": 0.1921532303094864,
|
|
"margin_dpo/beta_margin_mean": 2.4595916271209717,
|
|
"margin_dpo/beta_margin_std": 1.927443504333496,
|
|
"margin_dpo/loss_margin_mean": 24.595916748046875,
|
|
"margin_dpo/margin_mean": 24.595916748046875,
|
|
"margin_dpo/margin_std": 19.19723129272461,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.2657856093979442,
|
|
"grad_norm": 52.635921478271484,
|
|
"learning_rate": 4.605024008834863e-07,
|
|
"logits/chosen": -0.6752246618270874,
|
|
"logits/rejected": -0.6351001262664795,
|
|
"logps/chosen": -63.29297637939453,
|
|
"logps/ref_chosen": -55.000457763671875,
|
|
"logps/ref_rejected": -61.656166076660156,
|
|
"logps/rejected": -87.51589965820312,
|
|
"loss": 0.6665,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.24270856380462646,
|
|
"margin_dpo/beta_margin_grad_std": 0.21218255162239075,
|
|
"margin_dpo/beta_margin_mean": 1.7567216157913208,
|
|
"margin_dpo/beta_margin_std": 1.816219687461853,
|
|
"margin_dpo/loss_margin_mean": 17.567216873168945,
|
|
"margin_dpo/margin_mean": 17.567216873168945,
|
|
"margin_dpo/margin_std": 18.145404815673828,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.26725403817914833,
|
|
"grad_norm": 62.45026397705078,
|
|
"learning_rate": 4.598073218215817e-07,
|
|
"logits/chosen": -0.6236890554428101,
|
|
"logits/rejected": -0.6039330363273621,
|
|
"logps/chosen": -50.268310546875,
|
|
"logps/ref_chosen": -41.10784912109375,
|
|
"logps/ref_rejected": -89.5215835571289,
|
|
"logps/rejected": -125.66545867919922,
|
|
"loss": 0.5717,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18390774726867676,
|
|
"margin_dpo/beta_margin_grad_std": 0.2302154004573822,
|
|
"margin_dpo/beta_margin_mean": 2.6983418464660645,
|
|
"margin_dpo/beta_margin_std": 2.357621908187866,
|
|
"margin_dpo/loss_margin_mean": 26.983417510986328,
|
|
"margin_dpo/margin_mean": 26.983417510986328,
|
|
"margin_dpo/margin_std": 23.51990509033203,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.2687224669603524,
|
|
"grad_norm": 57.11370849609375,
|
|
"learning_rate": 4.5910671414162484e-07,
|
|
"logits/chosen": -0.6656794548034668,
|
|
"logits/rejected": -0.6146266460418701,
|
|
"logps/chosen": -69.17279052734375,
|
|
"logps/ref_chosen": -57.524559020996094,
|
|
"logps/ref_rejected": -75.97572326660156,
|
|
"logps/rejected": -108.50943756103516,
|
|
"loss": 0.5269,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.20869557559490204,
|
|
"margin_dpo/beta_margin_grad_std": 0.18125931918621063,
|
|
"margin_dpo/beta_margin_mean": 2.0885488986968994,
|
|
"margin_dpo/beta_margin_std": 1.8435460329055786,
|
|
"margin_dpo/loss_margin_mean": 20.88549041748047,
|
|
"margin_dpo/margin_mean": 20.88549041748047,
|
|
"margin_dpo/margin_std": 16.793594360351562,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.2701908957415565,
|
|
"grad_norm": 55.754112243652344,
|
|
"learning_rate": 4.5840059630527985e-07,
|
|
"logits/chosen": -0.648979902267456,
|
|
"logits/rejected": -0.6380875706672668,
|
|
"logps/chosen": -67.61172485351562,
|
|
"logps/ref_chosen": -58.544952392578125,
|
|
"logps/ref_rejected": -76.63406372070312,
|
|
"logps/rejected": -101.36076354980469,
|
|
"loss": 0.7067,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.26532942056655884,
|
|
"margin_dpo/beta_margin_grad_std": 0.20013463497161865,
|
|
"margin_dpo/beta_margin_mean": 1.5659937858581543,
|
|
"margin_dpo/beta_margin_std": 1.7907655239105225,
|
|
"margin_dpo/loss_margin_mean": 15.659936904907227,
|
|
"margin_dpo/margin_mean": 15.659937858581543,
|
|
"margin_dpo/margin_std": 17.573902130126953,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.27165932452276065,
|
|
"grad_norm": 55.9732780456543,
|
|
"learning_rate": 4.5768898691940836e-07,
|
|
"logits/chosen": -0.6820551753044128,
|
|
"logits/rejected": -0.6277487277984619,
|
|
"logps/chosen": -71.6310806274414,
|
|
"logps/ref_chosen": -62.02584457397461,
|
|
"logps/ref_rejected": -73.76260375976562,
|
|
"logps/rejected": -98.80401611328125,
|
|
"loss": 0.8275,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.28143346309661865,
|
|
"margin_dpo/beta_margin_grad_std": 0.23456232249736786,
|
|
"margin_dpo/beta_margin_mean": 1.543617844581604,
|
|
"margin_dpo/beta_margin_std": 1.9218882322311401,
|
|
"margin_dpo/loss_margin_mean": 15.436178207397461,
|
|
"margin_dpo/margin_mean": 15.436178207397461,
|
|
"margin_dpo/margin_std": 18.934371948242188,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.27312775330396477,
|
|
"grad_norm": 42.71883010864258,
|
|
"learning_rate": 4.5697190473557947e-07,
|
|
"logits/chosen": -0.679948091506958,
|
|
"logits/rejected": -0.6224746704101562,
|
|
"logps/chosen": -79.09538269042969,
|
|
"logps/ref_chosen": -69.35346984863281,
|
|
"logps/ref_rejected": -88.07244873046875,
|
|
"logps/rejected": -123.15438842773438,
|
|
"loss": 0.3968,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15906205773353577,
|
|
"margin_dpo/beta_margin_grad_std": 0.1656772643327713,
|
|
"margin_dpo/beta_margin_mean": 2.5340030193328857,
|
|
"margin_dpo/beta_margin_std": 1.8850746154785156,
|
|
"margin_dpo/loss_margin_mean": 25.340028762817383,
|
|
"margin_dpo/margin_mean": 25.34002685546875,
|
|
"margin_dpo/margin_std": 18.844833374023438,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.2745961820851689,
|
|
"grad_norm": 63.56511306762695,
|
|
"learning_rate": 4.5624936864957555e-07,
|
|
"logits/chosen": -0.6759737730026245,
|
|
"logits/rejected": -0.6658194065093994,
|
|
"logps/chosen": -64.28144836425781,
|
|
"logps/ref_chosen": -52.75646209716797,
|
|
"logps/ref_rejected": -81.96910095214844,
|
|
"logps/rejected": -112.44464111328125,
|
|
"loss": 0.724,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.24051952362060547,
|
|
"margin_dpo/beta_margin_grad_std": 0.23406511545181274,
|
|
"margin_dpo/beta_margin_mean": 1.895055890083313,
|
|
"margin_dpo/beta_margin_std": 2.0592827796936035,
|
|
"margin_dpo/loss_margin_mean": 18.950557708740234,
|
|
"margin_dpo/margin_mean": 18.950557708740234,
|
|
"margin_dpo/margin_std": 20.23330307006836,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.27606461086637296,
|
|
"grad_norm": 59.55484390258789,
|
|
"learning_rate": 4.5552139770089454e-07,
|
|
"logits/chosen": -0.6790421009063721,
|
|
"logits/rejected": -0.6611262559890747,
|
|
"logps/chosen": -58.12608337402344,
|
|
"logps/ref_chosen": -49.415489196777344,
|
|
"logps/ref_rejected": -89.54043579101562,
|
|
"logps/rejected": -119.92129516601562,
|
|
"loss": 0.6085,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2119147628545761,
|
|
"margin_dpo/beta_margin_grad_std": 0.21607068181037903,
|
|
"margin_dpo/beta_margin_mean": 2.1670262813568115,
|
|
"margin_dpo/beta_margin_std": 2.0467336177825928,
|
|
"margin_dpo/loss_margin_mean": 21.670263290405273,
|
|
"margin_dpo/margin_mean": 21.670263290405273,
|
|
"margin_dpo/margin_std": 20.444095611572266,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.2775330396475771,
|
|
"grad_norm": 58.168373107910156,
|
|
"learning_rate": 4.5478801107224794e-07,
|
|
"logits/chosen": -0.6502203941345215,
|
|
"logits/rejected": -0.5917783975601196,
|
|
"logps/chosen": -60.68791961669922,
|
|
"logps/ref_chosen": -52.39896011352539,
|
|
"logps/ref_rejected": -72.16735076904297,
|
|
"logps/rejected": -102.79643249511719,
|
|
"loss": 0.5283,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.19385695457458496,
|
|
"margin_dpo/beta_margin_grad_std": 0.18882273137569427,
|
|
"margin_dpo/beta_margin_mean": 2.2340126037597656,
|
|
"margin_dpo/beta_margin_std": 1.9784209728240967,
|
|
"margin_dpo/loss_margin_mean": 22.340126037597656,
|
|
"margin_dpo/margin_mean": 22.340126037597656,
|
|
"margin_dpo/margin_std": 19.765621185302734,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.2790014684287812,
|
|
"grad_norm": 51.31618118286133,
|
|
"learning_rate": 4.5404922808905543e-07,
|
|
"logits/chosen": -0.7103268504142761,
|
|
"logits/rejected": -0.6599196195602417,
|
|
"logps/chosen": -73.30516052246094,
|
|
"logps/ref_chosen": -64.68305969238281,
|
|
"logps/ref_rejected": -102.55052185058594,
|
|
"logps/rejected": -134.74267578125,
|
|
"loss": 0.5025,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.19283924996852875,
|
|
"margin_dpo/beta_margin_grad_std": 0.19168947637081146,
|
|
"margin_dpo/beta_margin_mean": 2.3570051193237305,
|
|
"margin_dpo/beta_margin_std": 2.1541616916656494,
|
|
"margin_dpo/loss_margin_mean": 23.570051193237305,
|
|
"margin_dpo/margin_mean": 23.570049285888672,
|
|
"margin_dpo/margin_std": 20.566938400268555,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.28046989720998533,
|
|
"grad_norm": 34.443443298339844,
|
|
"learning_rate": 4.5330506821893565e-07,
|
|
"logits/chosen": -0.6464371681213379,
|
|
"logits/rejected": -0.6282103061676025,
|
|
"logps/chosen": -75.33536529541016,
|
|
"logps/ref_chosen": -68.65887451171875,
|
|
"logps/ref_rejected": -110.1396713256836,
|
|
"logps/rejected": -144.71490478515625,
|
|
"loss": 0.4356,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16399236023426056,
|
|
"margin_dpo/beta_margin_grad_std": 0.19074279069900513,
|
|
"margin_dpo/beta_margin_mean": 2.789874792098999,
|
|
"margin_dpo/beta_margin_std": 2.458559274673462,
|
|
"margin_dpo/loss_margin_mean": 27.89874839782715,
|
|
"margin_dpo/margin_mean": 27.898746490478516,
|
|
"margin_dpo/margin_std": 24.31833267211914,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.28193832599118945,
|
|
"grad_norm": 53.86375427246094,
|
|
"learning_rate": 4.5255555107119336e-07,
|
|
"logits/chosen": -0.6596635580062866,
|
|
"logits/rejected": -0.6359300017356873,
|
|
"logps/chosen": -80.91861724853516,
|
|
"logps/ref_chosen": -69.72691345214844,
|
|
"logps/ref_rejected": -103.32135009765625,
|
|
"logps/rejected": -138.19093322753906,
|
|
"loss": 0.5499,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.20277048647403717,
|
|
"margin_dpo/beta_margin_grad_std": 0.2157028615474701,
|
|
"margin_dpo/beta_margin_mean": 2.367788076400757,
|
|
"margin_dpo/beta_margin_std": 2.2121126651763916,
|
|
"margin_dpo/loss_margin_mean": 23.677879333496094,
|
|
"margin_dpo/margin_mean": 23.677879333496094,
|
|
"margin_dpo/margin_std": 21.862239837646484,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.2834067547723935,
|
|
"grad_norm": 83.77180480957031,
|
|
"learning_rate": 4.5180069639630236e-07,
|
|
"logits/chosen": -0.691096305847168,
|
|
"logits/rejected": -0.639538049697876,
|
|
"logps/chosen": -71.17577362060547,
|
|
"logps/ref_chosen": -60.19049835205078,
|
|
"logps/ref_rejected": -76.40755462646484,
|
|
"logps/rejected": -104.34375,
|
|
"loss": 0.781,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.23370835185050964,
|
|
"margin_dpo/beta_margin_grad_std": 0.2316931039094925,
|
|
"margin_dpo/beta_margin_mean": 1.6950924396514893,
|
|
"margin_dpo/beta_margin_std": 1.8348240852355957,
|
|
"margin_dpo/loss_margin_mean": 16.950923919677734,
|
|
"margin_dpo/margin_mean": 16.950923919677734,
|
|
"margin_dpo/margin_std": 17.993907928466797,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.28487518355359764,
|
|
"grad_norm": 36.576637268066406,
|
|
"learning_rate": 4.510405240853854e-07,
|
|
"logits/chosen": -0.6739726662635803,
|
|
"logits/rejected": -0.6504776477813721,
|
|
"logps/chosen": -45.40176010131836,
|
|
"logps/ref_chosen": -37.84037399291992,
|
|
"logps/ref_rejected": -60.684783935546875,
|
|
"logps/rejected": -90.91381072998047,
|
|
"loss": 0.5108,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.20001572370529175,
|
|
"margin_dpo/beta_margin_grad_std": 0.18756882846355438,
|
|
"margin_dpo/beta_margin_mean": 2.2667644023895264,
|
|
"margin_dpo/beta_margin_std": 1.985824704170227,
|
|
"margin_dpo/loss_margin_mean": 22.66764259338379,
|
|
"margin_dpo/margin_mean": 22.66764259338379,
|
|
"margin_dpo/margin_std": 19.685068130493164,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.28634361233480177,
|
|
"grad_norm": 54.66409683227539,
|
|
"learning_rate": 4.5027505416968985e-07,
|
|
"logits/chosen": -0.6257681846618652,
|
|
"logits/rejected": -0.6233822107315063,
|
|
"logps/chosen": -66.51872253417969,
|
|
"logps/ref_chosen": -54.891571044921875,
|
|
"logps/ref_rejected": -96.77095794677734,
|
|
"logps/rejected": -130.88046264648438,
|
|
"loss": 0.4833,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18052619695663452,
|
|
"margin_dpo/beta_margin_grad_std": 0.19298022985458374,
|
|
"margin_dpo/beta_margin_mean": 2.248234510421753,
|
|
"margin_dpo/beta_margin_std": 1.8670945167541504,
|
|
"margin_dpo/loss_margin_mean": 22.482343673706055,
|
|
"margin_dpo/margin_mean": 22.482345581054688,
|
|
"margin_dpo/margin_std": 18.633167266845703,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.2878120411160059,
|
|
"grad_norm": 45.430152893066406,
|
|
"learning_rate": 4.495043068200599e-07,
|
|
"logits/chosen": -0.6508013010025024,
|
|
"logits/rejected": -0.6155145168304443,
|
|
"logps/chosen": -61.67543029785156,
|
|
"logps/ref_chosen": -53.245243072509766,
|
|
"logps/ref_rejected": -76.05294799804688,
|
|
"logps/rejected": -111.40143585205078,
|
|
"loss": 0.442,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16920757293701172,
|
|
"margin_dpo/beta_margin_grad_std": 0.1858920007944107,
|
|
"margin_dpo/beta_margin_mean": 2.6918299198150635,
|
|
"margin_dpo/beta_margin_std": 2.2219343185424805,
|
|
"margin_dpo/loss_margin_mean": 26.918298721313477,
|
|
"margin_dpo/margin_mean": 26.918298721313477,
|
|
"margin_dpo/margin_std": 22.07672119140625,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.28928046989721,
|
|
"grad_norm": 52.48564910888672,
|
|
"learning_rate": 4.4872830234640493e-07,
|
|
"logits/chosen": -0.619194507598877,
|
|
"logits/rejected": -0.6072961091995239,
|
|
"logps/chosen": -69.83384704589844,
|
|
"logps/ref_chosen": -60.42033767700195,
|
|
"logps/ref_rejected": -77.20890808105469,
|
|
"logps/rejected": -107.95001220703125,
|
|
"loss": 0.5151,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.20070594549179077,
|
|
"margin_dpo/beta_margin_grad_std": 0.18301643431186676,
|
|
"margin_dpo/beta_margin_mean": 2.132758855819702,
|
|
"margin_dpo/beta_margin_std": 1.8700523376464844,
|
|
"margin_dpo/loss_margin_mean": 21.32758903503418,
|
|
"margin_dpo/margin_mean": 21.32758903503418,
|
|
"margin_dpo/margin_std": 18.477304458618164,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.2907488986784141,
|
|
"grad_norm": 50.00718307495117,
|
|
"learning_rate": 4.479470611971645e-07,
|
|
"logits/chosen": -0.6850384473800659,
|
|
"logits/rejected": -0.6736607551574707,
|
|
"logps/chosen": -64.92497253417969,
|
|
"logps/ref_chosen": -55.03618621826172,
|
|
"logps/ref_rejected": -97.24325561523438,
|
|
"logps/rejected": -130.21774291992188,
|
|
"loss": 0.5106,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1974949985742569,
|
|
"margin_dpo/beta_margin_grad_std": 0.18310509622097015,
|
|
"margin_dpo/beta_margin_mean": 2.30856990814209,
|
|
"margin_dpo/beta_margin_std": 2.314561128616333,
|
|
"margin_dpo/loss_margin_mean": 23.085697174072266,
|
|
"margin_dpo/margin_mean": 23.085697174072266,
|
|
"margin_dpo/margin_std": 22.705781936645508,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.2922173274596182,
|
|
"grad_norm": 54.243560791015625,
|
|
"learning_rate": 4.471606039587695e-07,
|
|
"logits/chosen": -0.681465744972229,
|
|
"logits/rejected": -0.6637758016586304,
|
|
"logps/chosen": -66.67937469482422,
|
|
"logps/ref_chosen": -56.828826904296875,
|
|
"logps/ref_rejected": -84.64820861816406,
|
|
"logps/rejected": -117.4178237915039,
|
|
"loss": 0.596,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.21248871088027954,
|
|
"margin_dpo/beta_margin_grad_std": 0.220571368932724,
|
|
"margin_dpo/beta_margin_mean": 2.2919063568115234,
|
|
"margin_dpo/beta_margin_std": 2.2386255264282227,
|
|
"margin_dpo/loss_margin_mean": 22.919063568115234,
|
|
"margin_dpo/margin_mean": 22.919063568115234,
|
|
"margin_dpo/margin_std": 22.307586669921875,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.2936857562408223,
|
|
"grad_norm": 69.43572998046875,
|
|
"learning_rate": 4.4636895135509966e-07,
|
|
"logits/chosen": -0.664925217628479,
|
|
"logits/rejected": -0.6286830902099609,
|
|
"logps/chosen": -64.1875991821289,
|
|
"logps/ref_chosen": -53.06706237792969,
|
|
"logps/ref_rejected": -80.60843658447266,
|
|
"logps/rejected": -116.37126159667969,
|
|
"loss": 0.6573,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2101076990365982,
|
|
"margin_dpo/beta_margin_grad_std": 0.22291822731494904,
|
|
"margin_dpo/beta_margin_mean": 2.464228868484497,
|
|
"margin_dpo/beta_margin_std": 2.5758798122406006,
|
|
"margin_dpo/loss_margin_mean": 24.642288208007812,
|
|
"margin_dpo/margin_mean": 24.642288208007812,
|
|
"margin_dpo/margin_std": 25.661331176757812,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.2936857562408223,
|
|
"eval_logits/chosen": -0.6437793374061584,
|
|
"eval_logits/rejected": -0.6209864616394043,
|
|
"eval_logps/chosen": -92.97444915771484,
|
|
"eval_logps/ref_chosen": -79.05104064941406,
|
|
"eval_logps/ref_rejected": -86.79793548583984,
|
|
"eval_logps/rejected": -115.39439392089844,
|
|
"eval_loss": 0.4776662588119507,
|
|
"eval_margin_dpo/beta": 0.10000000149011612,
|
|
"eval_margin_dpo/beta_margin_grad_mean": -0.31056177616119385,
|
|
"eval_margin_dpo/beta_margin_grad_std": 0.24659062922000885,
|
|
"eval_margin_dpo/beta_margin_mean": 1.4673058986663818,
|
|
"eval_margin_dpo/beta_margin_std": 2.1103227138519287,
|
|
"eval_margin_dpo/loss_margin_mean": 14.67305850982666,
|
|
"eval_margin_dpo/margin_mean": 14.67305850982666,
|
|
"eval_margin_dpo/margin_std": 21.103225708007812,
|
|
"eval_runtime": 40.1206,
|
|
"eval_samples_per_second": 58.299,
|
|
"eval_steps_per_second": 1.844,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.29515418502202645,
|
|
"grad_norm": 52.3123893737793,
|
|
"learning_rate": 4.455721242469372e-07,
|
|
"logits/chosen": -0.6279897689819336,
|
|
"logits/rejected": -0.5977976322174072,
|
|
"logps/chosen": -83.8150634765625,
|
|
"logps/ref_chosen": -75.4022216796875,
|
|
"logps/ref_rejected": -114.80821990966797,
|
|
"logps/rejected": -148.78948974609375,
|
|
"loss": 0.5336,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18962885439395905,
|
|
"margin_dpo/beta_margin_grad_std": 0.21612294018268585,
|
|
"margin_dpo/beta_margin_mean": 2.556842803955078,
|
|
"margin_dpo/beta_margin_std": 2.3333568572998047,
|
|
"margin_dpo/loss_margin_mean": 25.56842613220215,
|
|
"margin_dpo/margin_mean": 25.56842803955078,
|
|
"margin_dpo/margin_std": 23.23642349243164,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.2966226138032305,
|
|
"grad_norm": 71.65006256103516,
|
|
"learning_rate": 4.4477014363141755e-07,
|
|
"logits/chosen": -0.6751070618629456,
|
|
"logits/rejected": -0.663360059261322,
|
|
"logps/chosen": -60.95005798339844,
|
|
"logps/ref_chosen": -50.101318359375,
|
|
"logps/ref_rejected": -86.98503112792969,
|
|
"logps/rejected": -116.69070434570312,
|
|
"loss": 0.7708,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.25191596150398254,
|
|
"margin_dpo/beta_margin_grad_std": 0.24605146050453186,
|
|
"margin_dpo/beta_margin_mean": 1.8856925964355469,
|
|
"margin_dpo/beta_margin_std": 2.1072704792022705,
|
|
"margin_dpo/loss_margin_mean": 18.85692596435547,
|
|
"margin_dpo/margin_mean": 18.85692596435547,
|
|
"margin_dpo/margin_std": 21.052621841430664,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.29809104258443464,
|
|
"grad_norm": 45.57592010498047,
|
|
"learning_rate": 4.439630306414758e-07,
|
|
"logits/chosen": -0.6758503317832947,
|
|
"logits/rejected": -0.632592499256134,
|
|
"logps/chosen": -68.63609313964844,
|
|
"logps/ref_chosen": -60.60969543457031,
|
|
"logps/ref_rejected": -85.89596557617188,
|
|
"logps/rejected": -114.98287963867188,
|
|
"loss": 0.532,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.20186971127986908,
|
|
"margin_dpo/beta_margin_grad_std": 0.19567202031612396,
|
|
"margin_dpo/beta_margin_mean": 2.106050491333008,
|
|
"margin_dpo/beta_margin_std": 1.8404932022094727,
|
|
"margin_dpo/loss_margin_mean": 21.060503005981445,
|
|
"margin_dpo/margin_mean": 21.060504913330078,
|
|
"margin_dpo/margin_std": 18.373455047607422,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.29955947136563876,
|
|
"grad_norm": 46.98274230957031,
|
|
"learning_rate": 4.431508065452897e-07,
|
|
"logits/chosen": -0.640312910079956,
|
|
"logits/rejected": -0.5806652307510376,
|
|
"logps/chosen": -89.8402099609375,
|
|
"logps/ref_chosen": -80.16496276855469,
|
|
"logps/ref_rejected": -87.69590759277344,
|
|
"logps/rejected": -119.30364990234375,
|
|
"loss": 0.5197,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1984976977109909,
|
|
"margin_dpo/beta_margin_grad_std": 0.19227007031440735,
|
|
"margin_dpo/beta_margin_mean": 2.193248987197876,
|
|
"margin_dpo/beta_margin_std": 2.008610725402832,
|
|
"margin_dpo/loss_margin_mean": 21.9324893951416,
|
|
"margin_dpo/margin_mean": 21.9324893951416,
|
|
"margin_dpo/margin_std": 19.7504825592041,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.3010279001468429,
|
|
"grad_norm": 67.07288360595703,
|
|
"learning_rate": 4.4233349274571974e-07,
|
|
"logits/chosen": -0.690580427646637,
|
|
"logits/rejected": -0.6472284197807312,
|
|
"logps/chosen": -70.58891296386719,
|
|
"logps/ref_chosen": -59.384735107421875,
|
|
"logps/ref_rejected": -85.12505340576172,
|
|
"logps/rejected": -120.11308288574219,
|
|
"loss": 0.6391,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.21127313375473022,
|
|
"margin_dpo/beta_margin_grad_std": 0.2417152225971222,
|
|
"margin_dpo/beta_margin_mean": 2.378384828567505,
|
|
"margin_dpo/beta_margin_std": 2.325178384780884,
|
|
"margin_dpo/loss_margin_mean": 23.78384780883789,
|
|
"margin_dpo/margin_mean": 23.78384780883789,
|
|
"margin_dpo/margin_std": 23.02971649169922,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.302496328928047,
|
|
"grad_norm": 44.62752914428711,
|
|
"learning_rate": 4.415111107797445e-07,
|
|
"logits/chosen": -0.6897552013397217,
|
|
"logits/rejected": -0.6878204345703125,
|
|
"logps/chosen": -57.75178527832031,
|
|
"logps/ref_chosen": -46.964500427246094,
|
|
"logps/ref_rejected": -98.9534912109375,
|
|
"logps/rejected": -137.04470825195312,
|
|
"loss": 0.4331,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16189786791801453,
|
|
"margin_dpo/beta_margin_grad_std": 0.19883880019187927,
|
|
"margin_dpo/beta_margin_mean": 2.730393171310425,
|
|
"margin_dpo/beta_margin_std": 2.1477179527282715,
|
|
"margin_dpo/loss_margin_mean": 27.303930282592773,
|
|
"margin_dpo/margin_mean": 27.303932189941406,
|
|
"margin_dpo/margin_std": 21.387907028198242,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.3039647577092511,
|
|
"grad_norm": 51.993370056152344,
|
|
"learning_rate": 4.4068368231789365e-07,
|
|
"logits/chosen": -0.6620955467224121,
|
|
"logits/rejected": -0.6329070925712585,
|
|
"logps/chosen": -64.26461791992188,
|
|
"logps/ref_chosen": -56.05625915527344,
|
|
"logps/ref_rejected": -84.44779968261719,
|
|
"logps/rejected": -121.12300872802734,
|
|
"loss": 0.4613,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1773681491613388,
|
|
"margin_dpo/beta_margin_grad_std": 0.19014772772789001,
|
|
"margin_dpo/beta_margin_mean": 2.8466851711273193,
|
|
"margin_dpo/beta_margin_std": 2.5996739864349365,
|
|
"margin_dpo/loss_margin_mean": 28.46685028076172,
|
|
"margin_dpo/margin_mean": 28.46685028076172,
|
|
"margin_dpo/margin_std": 25.901378631591797,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.3054331864904552,
|
|
"grad_norm": 52.05033493041992,
|
|
"learning_rate": 4.398512291636768e-07,
|
|
"logits/chosen": -0.6714012622833252,
|
|
"logits/rejected": -0.6417751312255859,
|
|
"logps/chosen": -79.44926452636719,
|
|
"logps/ref_chosen": -67.06761169433594,
|
|
"logps/ref_rejected": -94.28689575195312,
|
|
"logps/rejected": -129.90614318847656,
|
|
"loss": 0.4839,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.19027359783649445,
|
|
"margin_dpo/beta_margin_grad_std": 0.18024224042892456,
|
|
"margin_dpo/beta_margin_mean": 2.3237593173980713,
|
|
"margin_dpo/beta_margin_std": 2.0613441467285156,
|
|
"margin_dpo/loss_margin_mean": 23.237592697143555,
|
|
"margin_dpo/margin_mean": 23.237594604492188,
|
|
"margin_dpo/margin_std": 20.450965881347656,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.3069016152716593,
|
|
"grad_norm": 49.21156692504883,
|
|
"learning_rate": 4.3901377325300857e-07,
|
|
"logits/chosen": -0.6503257751464844,
|
|
"logits/rejected": -0.6238174438476562,
|
|
"logps/chosen": -65.82164764404297,
|
|
"logps/ref_chosen": -56.18169403076172,
|
|
"logps/ref_rejected": -80.94152069091797,
|
|
"logps/rejected": -114.3055419921875,
|
|
"loss": 0.6043,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.20685909688472748,
|
|
"margin_dpo/beta_margin_grad_std": 0.23329907655715942,
|
|
"margin_dpo/beta_margin_mean": 2.372406482696533,
|
|
"margin_dpo/beta_margin_std": 2.2294185161590576,
|
|
"margin_dpo/loss_margin_mean": 23.724063873291016,
|
|
"margin_dpo/margin_mean": 23.724063873291016,
|
|
"margin_dpo/margin_std": 21.8853816986084,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.30837004405286345,
|
|
"grad_norm": 47.80137634277344,
|
|
"learning_rate": 4.381713366536311e-07,
|
|
"logits/chosen": -0.6832214593887329,
|
|
"logits/rejected": -0.6534386873245239,
|
|
"logps/chosen": -56.31086349487305,
|
|
"logps/ref_chosen": -46.371822357177734,
|
|
"logps/ref_rejected": -76.68162536621094,
|
|
"logps/rejected": -108.84925842285156,
|
|
"loss": 0.5055,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.19675123691558838,
|
|
"margin_dpo/beta_margin_grad_std": 0.18586638569831848,
|
|
"margin_dpo/beta_margin_mean": 2.2228593826293945,
|
|
"margin_dpo/beta_margin_std": 1.9132236242294312,
|
|
"margin_dpo/loss_margin_mean": 22.228591918945312,
|
|
"margin_dpo/margin_mean": 22.228591918945312,
|
|
"margin_dpo/margin_std": 18.896484375,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.30983847283406757,
|
|
"grad_norm": 59.744529724121094,
|
|
"learning_rate": 4.373239415645323e-07,
|
|
"logits/chosen": -0.6856693029403687,
|
|
"logits/rejected": -0.6315619945526123,
|
|
"logps/chosen": -91.70536041259766,
|
|
"logps/ref_chosen": -78.93235778808594,
|
|
"logps/ref_rejected": -86.82098388671875,
|
|
"logps/rejected": -122.416015625,
|
|
"loss": 0.597,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.20305074751377106,
|
|
"margin_dpo/beta_margin_grad_std": 0.23186683654785156,
|
|
"margin_dpo/beta_margin_mean": 2.2822024822235107,
|
|
"margin_dpo/beta_margin_std": 2.1043384075164795,
|
|
"margin_dpo/loss_margin_mean": 22.822023391723633,
|
|
"margin_dpo/margin_mean": 22.822023391723633,
|
|
"margin_dpo/margin_std": 20.654033660888672,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.31130690161527164,
|
|
"grad_norm": 49.052188873291016,
|
|
"learning_rate": 4.3647161031536086e-07,
|
|
"logits/chosen": -0.6898226737976074,
|
|
"logits/rejected": -0.6595550775527954,
|
|
"logps/chosen": -70.00460052490234,
|
|
"logps/ref_chosen": -58.19701385498047,
|
|
"logps/ref_rejected": -103.05784606933594,
|
|
"logps/rejected": -143.5913543701172,
|
|
"loss": 0.4566,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.17350082099437714,
|
|
"margin_dpo/beta_margin_grad_std": 0.19579628109931946,
|
|
"margin_dpo/beta_margin_mean": 2.872591733932495,
|
|
"margin_dpo/beta_margin_std": 2.5697410106658936,
|
|
"margin_dpo/loss_margin_mean": 28.725915908813477,
|
|
"margin_dpo/margin_mean": 28.725915908813477,
|
|
"margin_dpo/margin_std": 24.575883865356445,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.31277533039647576,
|
|
"grad_norm": 51.70877456665039,
|
|
"learning_rate": 4.3561436536583774e-07,
|
|
"logits/chosen": -0.6648838520050049,
|
|
"logits/rejected": -0.6213950514793396,
|
|
"logps/chosen": -77.46551513671875,
|
|
"logps/ref_chosen": -67.51271057128906,
|
|
"logps/ref_rejected": -93.91471862792969,
|
|
"logps/rejected": -132.9736785888672,
|
|
"loss": 0.4856,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1718873530626297,
|
|
"margin_dpo/beta_margin_grad_std": 0.20221562683582306,
|
|
"margin_dpo/beta_margin_mean": 2.9106154441833496,
|
|
"margin_dpo/beta_margin_std": 2.622797966003418,
|
|
"margin_dpo/loss_margin_mean": 29.10615348815918,
|
|
"margin_dpo/margin_mean": 29.106151580810547,
|
|
"margin_dpo/margin_std": 25.876745223999023,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.3142437591776799,
|
|
"grad_norm": 59.337345123291016,
|
|
"learning_rate": 4.3475222930516473e-07,
|
|
"logits/chosen": -0.6546872854232788,
|
|
"logits/rejected": -0.6387699842453003,
|
|
"logps/chosen": -52.264801025390625,
|
|
"logps/ref_chosen": -41.604888916015625,
|
|
"logps/ref_rejected": -77.51741027832031,
|
|
"logps/rejected": -111.19406127929688,
|
|
"loss": 0.6245,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.21652430295944214,
|
|
"margin_dpo/beta_margin_grad_std": 0.2097923308610916,
|
|
"margin_dpo/beta_margin_mean": 2.3016743659973145,
|
|
"margin_dpo/beta_margin_std": 2.411076784133911,
|
|
"margin_dpo/loss_margin_mean": 23.016742706298828,
|
|
"margin_dpo/margin_mean": 23.016742706298828,
|
|
"margin_dpo/margin_std": 24.032947540283203,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.315712187958884,
|
|
"grad_norm": 56.185760498046875,
|
|
"learning_rate": 4.3388522485142885e-07,
|
|
"logits/chosen": -0.6533488035202026,
|
|
"logits/rejected": -0.624896228313446,
|
|
"logps/chosen": -63.87721252441406,
|
|
"logps/ref_chosen": -53.279266357421875,
|
|
"logps/ref_rejected": -89.96464538574219,
|
|
"logps/rejected": -125.45509338378906,
|
|
"loss": 0.5351,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1931016743183136,
|
|
"margin_dpo/beta_margin_grad_std": 0.20534518361091614,
|
|
"margin_dpo/beta_margin_mean": 2.4892501831054688,
|
|
"margin_dpo/beta_margin_std": 2.3997206687927246,
|
|
"margin_dpo/loss_margin_mean": 24.892501831054688,
|
|
"margin_dpo/margin_mean": 24.892501831054688,
|
|
"margin_dpo/margin_std": 23.67044448852539,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.31718061674008813,
|
|
"grad_norm": 63.23625564575195,
|
|
"learning_rate": 4.330133748510036e-07,
|
|
"logits/chosen": -0.6673412919044495,
|
|
"logits/rejected": -0.6347181797027588,
|
|
"logps/chosen": -61.9163932800293,
|
|
"logps/ref_chosen": -48.887794494628906,
|
|
"logps/ref_rejected": -77.19892883300781,
|
|
"logps/rejected": -117.18614196777344,
|
|
"loss": 0.5867,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2070668637752533,
|
|
"margin_dpo/beta_margin_grad_std": 0.22892099618911743,
|
|
"margin_dpo/beta_margin_mean": 2.695861577987671,
|
|
"margin_dpo/beta_margin_std": 2.648024797439575,
|
|
"margin_dpo/loss_margin_mean": 26.958616256713867,
|
|
"margin_dpo/margin_mean": 26.958616256713867,
|
|
"margin_dpo/margin_std": 26.038864135742188,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.3186490455212922,
|
|
"grad_norm": 40.630271911621094,
|
|
"learning_rate": 4.3213670227794757e-07,
|
|
"logits/chosen": -0.6815335750579834,
|
|
"logits/rejected": -0.6386614441871643,
|
|
"logps/chosen": -60.836097717285156,
|
|
"logps/ref_chosen": -49.845306396484375,
|
|
"logps/ref_rejected": -100.07832336425781,
|
|
"logps/rejected": -138.99900817871094,
|
|
"loss": 0.4057,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15030139684677124,
|
|
"margin_dpo/beta_margin_grad_std": 0.1869634985923767,
|
|
"margin_dpo/beta_margin_mean": 2.7929890155792236,
|
|
"margin_dpo/beta_margin_std": 2.152282953262329,
|
|
"margin_dpo/loss_margin_mean": 27.929887771606445,
|
|
"margin_dpo/margin_mean": 27.929889678955078,
|
|
"margin_dpo/margin_std": 21.507854461669922,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.3201174743024963,
|
|
"grad_norm": 54.89970397949219,
|
|
"learning_rate": 4.3125523023339815e-07,
|
|
"logits/chosen": -0.6562488079071045,
|
|
"logits/rejected": -0.6250983476638794,
|
|
"logps/chosen": -69.97561645507812,
|
|
"logps/ref_chosen": -58.576683044433594,
|
|
"logps/ref_rejected": -87.84639739990234,
|
|
"logps/rejected": -124.18275451660156,
|
|
"loss": 0.5357,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.19920094311237335,
|
|
"margin_dpo/beta_margin_grad_std": 0.20679454505443573,
|
|
"margin_dpo/beta_margin_mean": 2.4937424659729004,
|
|
"margin_dpo/beta_margin_std": 2.4085628986358643,
|
|
"margin_dpo/loss_margin_mean": 24.937423706054688,
|
|
"margin_dpo/margin_mean": 24.937423706054688,
|
|
"margin_dpo/margin_std": 23.69991683959961,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.32158590308370044,
|
|
"grad_norm": 60.82085037231445,
|
|
"learning_rate": 4.303689819449636e-07,
|
|
"logits/chosen": -0.6473067998886108,
|
|
"logits/rejected": -0.6150014400482178,
|
|
"logps/chosen": -72.4845962524414,
|
|
"logps/ref_chosen": -61.083858489990234,
|
|
"logps/ref_rejected": -85.83042907714844,
|
|
"logps/rejected": -119.23858642578125,
|
|
"loss": 0.5267,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.19810640811920166,
|
|
"margin_dpo/beta_margin_grad_std": 0.19194501638412476,
|
|
"margin_dpo/beta_margin_mean": 2.200741767883301,
|
|
"margin_dpo/beta_margin_std": 1.9767764806747437,
|
|
"margin_dpo/loss_margin_mean": 22.007417678833008,
|
|
"margin_dpo/margin_mean": 22.007417678833008,
|
|
"margin_dpo/margin_std": 19.649311065673828,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.32305433186490456,
|
|
"grad_norm": 47.847412109375,
|
|
"learning_rate": 4.2947798076611047e-07,
|
|
"logits/chosen": -0.6549057960510254,
|
|
"logits/rejected": -0.6138431429862976,
|
|
"logps/chosen": -81.12652587890625,
|
|
"logps/ref_chosen": -70.03128051757812,
|
|
"logps/ref_rejected": -87.68551635742188,
|
|
"logps/rejected": -119.67072296142578,
|
|
"loss": 0.5029,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.19834579527378082,
|
|
"margin_dpo/beta_margin_grad_std": 0.180747851729393,
|
|
"margin_dpo/beta_margin_mean": 2.0889971256256104,
|
|
"margin_dpo/beta_margin_std": 1.714986801147461,
|
|
"margin_dpo/loss_margin_mean": 20.889970779418945,
|
|
"margin_dpo/margin_mean": 20.889970779418945,
|
|
"margin_dpo/margin_std": 16.6192626953125,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.3245227606461087,
|
|
"grad_norm": 48.202903747558594,
|
|
"learning_rate": 4.285822501755485e-07,
|
|
"logits/chosen": -0.6510884761810303,
|
|
"logits/rejected": -0.6392531394958496,
|
|
"logps/chosen": -64.44230651855469,
|
|
"logps/ref_chosen": -52.15470886230469,
|
|
"logps/ref_rejected": -106.46768188476562,
|
|
"logps/rejected": -151.5745391845703,
|
|
"loss": 0.339,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12460769712924957,
|
|
"margin_dpo/beta_margin_grad_std": 0.1816825121641159,
|
|
"margin_dpo/beta_margin_mean": 3.281925678253174,
|
|
"margin_dpo/beta_margin_std": 2.298614740371704,
|
|
"margin_dpo/loss_margin_mean": 32.81925582885742,
|
|
"margin_dpo/margin_mean": 32.81925582885742,
|
|
"margin_dpo/margin_std": 22.837791442871094,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.32599118942731276,
|
|
"grad_norm": 76.29179382324219,
|
|
"learning_rate": 4.276818137766118e-07,
|
|
"logits/chosen": -0.7204064130783081,
|
|
"logits/rejected": -0.6859586238861084,
|
|
"logps/chosen": -74.65057373046875,
|
|
"logps/ref_chosen": -60.971099853515625,
|
|
"logps/ref_rejected": -100.00115203857422,
|
|
"logps/rejected": -139.82711791992188,
|
|
"loss": 0.5718,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.19307678937911987,
|
|
"margin_dpo/beta_margin_grad_std": 0.22420592606067657,
|
|
"margin_dpo/beta_margin_mean": 2.6146483421325684,
|
|
"margin_dpo/beta_margin_std": 2.517277479171753,
|
|
"margin_dpo/loss_margin_mean": 26.146484375,
|
|
"margin_dpo/margin_mean": 26.146484375,
|
|
"margin_dpo/margin_std": 24.794532775878906,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.3274596182085169,
|
|
"grad_norm": 78.53938293457031,
|
|
"learning_rate": 4.2677669529663686e-07,
|
|
"logits/chosen": -0.7086101770401001,
|
|
"logits/rejected": -0.6605532169342041,
|
|
"logps/chosen": -68.55857849121094,
|
|
"logps/ref_chosen": -52.64057922363281,
|
|
"logps/ref_rejected": -82.82502746582031,
|
|
"logps/rejected": -121.03541564941406,
|
|
"loss": 0.738,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.22235842049121857,
|
|
"margin_dpo/beta_margin_grad_std": 0.26257219910621643,
|
|
"margin_dpo/beta_margin_mean": 2.229238271713257,
|
|
"margin_dpo/beta_margin_std": 2.2963743209838867,
|
|
"margin_dpo/loss_margin_mean": 22.292381286621094,
|
|
"margin_dpo/margin_mean": 22.292381286621094,
|
|
"margin_dpo/margin_std": 22.842185974121094,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.328928046989721,
|
|
"grad_norm": 74.9097671508789,
|
|
"learning_rate": 4.2586691858633747e-07,
|
|
"logits/chosen": -0.6751635074615479,
|
|
"logits/rejected": -0.6340160369873047,
|
|
"logps/chosen": -61.69850158691406,
|
|
"logps/ref_chosen": -48.59540939331055,
|
|
"logps/ref_rejected": -77.11648559570312,
|
|
"logps/rejected": -116.998046875,
|
|
"loss": 0.5673,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.19054511189460754,
|
|
"margin_dpo/beta_margin_grad_std": 0.2218094766139984,
|
|
"margin_dpo/beta_margin_mean": 2.6778461933135986,
|
|
"margin_dpo/beta_margin_std": 2.5320727825164795,
|
|
"margin_dpo/loss_margin_mean": 26.778461456298828,
|
|
"margin_dpo/margin_mean": 26.778461456298828,
|
|
"margin_dpo/margin_std": 24.951221466064453,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.3303964757709251,
|
|
"grad_norm": 43.42683792114258,
|
|
"learning_rate": 4.249525076191759e-07,
|
|
"logits/chosen": -0.6741304397583008,
|
|
"logits/rejected": -0.6419914960861206,
|
|
"logps/chosen": -72.66340637207031,
|
|
"logps/ref_chosen": -58.000465393066406,
|
|
"logps/ref_rejected": -99.90290832519531,
|
|
"logps/rejected": -147.5189208984375,
|
|
"loss": 0.4077,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14978624880313873,
|
|
"margin_dpo/beta_margin_grad_std": 0.203273743391037,
|
|
"margin_dpo/beta_margin_mean": 3.2953078746795654,
|
|
"margin_dpo/beta_margin_std": 2.6787664890289307,
|
|
"margin_dpo/loss_margin_mean": 32.95307922363281,
|
|
"margin_dpo/margin_mean": 32.95307922363281,
|
|
"margin_dpo/margin_std": 26.78388214111328,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.33186490455212925,
|
|
"grad_norm": 51.3669548034668,
|
|
"learning_rate": 4.2403348649073167e-07,
|
|
"logits/chosen": -0.6864838600158691,
|
|
"logits/rejected": -0.6341279745101929,
|
|
"logps/chosen": -69.51836395263672,
|
|
"logps/ref_chosen": -58.898799896240234,
|
|
"logps/ref_rejected": -78.68775939941406,
|
|
"logps/rejected": -114.87089538574219,
|
|
"loss": 0.4851,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.17993833124637604,
|
|
"margin_dpo/beta_margin_grad_std": 0.19534794986248016,
|
|
"margin_dpo/beta_margin_mean": 2.5563576221466064,
|
|
"margin_dpo/beta_margin_std": 2.2327442169189453,
|
|
"margin_dpo/loss_margin_mean": 25.563575744628906,
|
|
"margin_dpo/margin_mean": 25.563575744628906,
|
|
"margin_dpo/margin_std": 21.131916046142578,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.3333333333333333,
|
|
"grad_norm": 48.44467544555664,
|
|
"learning_rate": 4.2310987941806615e-07,
|
|
"logits/chosen": -0.6759487390518188,
|
|
"logits/rejected": -0.6457496881484985,
|
|
"logps/chosen": -70.79923248291016,
|
|
"logps/ref_chosen": -59.072181701660156,
|
|
"logps/ref_rejected": -99.41236877441406,
|
|
"logps/rejected": -142.67623901367188,
|
|
"loss": 0.4141,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15354523062705994,
|
|
"margin_dpo/beta_margin_grad_std": 0.1981254369020462,
|
|
"margin_dpo/beta_margin_mean": 3.153681993484497,
|
|
"margin_dpo/beta_margin_std": 2.6311187744140625,
|
|
"margin_dpo/loss_margin_mean": 31.53681755065918,
|
|
"margin_dpo/margin_mean": 31.536819458007812,
|
|
"margin_dpo/margin_std": 26.029647827148438,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.33480176211453744,
|
|
"grad_norm": 55.06504821777344,
|
|
"learning_rate": 4.2218171073908463e-07,
|
|
"logits/chosen": -0.6563422679901123,
|
|
"logits/rejected": -0.6227169036865234,
|
|
"logps/chosen": -78.89456176757812,
|
|
"logps/ref_chosen": -65.89129638671875,
|
|
"logps/ref_rejected": -91.04875183105469,
|
|
"logps/rejected": -128.3238525390625,
|
|
"loss": 0.5336,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18722449243068695,
|
|
"margin_dpo/beta_margin_grad_std": 0.2196025550365448,
|
|
"margin_dpo/beta_margin_mean": 2.427182912826538,
|
|
"margin_dpo/beta_margin_std": 2.091808795928955,
|
|
"margin_dpo/loss_margin_mean": 24.27182960510254,
|
|
"margin_dpo/margin_mean": 24.27182960510254,
|
|
"margin_dpo/margin_std": 20.79153823852539,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.33627019089574156,
|
|
"grad_norm": 64.2571792602539,
|
|
"learning_rate": 4.212490049118951e-07,
|
|
"logits/chosen": -0.6885573863983154,
|
|
"logits/rejected": -0.6359836459159851,
|
|
"logps/chosen": -85.07996368408203,
|
|
"logps/ref_chosen": -70.70636749267578,
|
|
"logps/ref_rejected": -84.52740478515625,
|
|
"logps/rejected": -126.00403594970703,
|
|
"loss": 0.598,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1898403912782669,
|
|
"margin_dpo/beta_margin_grad_std": 0.23285524547100067,
|
|
"margin_dpo/beta_margin_mean": 2.71030330657959,
|
|
"margin_dpo/beta_margin_std": 2.555929660797119,
|
|
"margin_dpo/loss_margin_mean": 27.1030330657959,
|
|
"margin_dpo/margin_mean": 27.103031158447266,
|
|
"margin_dpo/margin_std": 25.39706802368164,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.3377386196769457,
|
|
"grad_norm": 50.68180465698242,
|
|
"learning_rate": 4.203117865141635e-07,
|
|
"logits/chosen": -0.6804044842720032,
|
|
"logits/rejected": -0.6706264019012451,
|
|
"logps/chosen": -51.398292541503906,
|
|
"logps/ref_chosen": -39.282005310058594,
|
|
"logps/ref_rejected": -85.62191009521484,
|
|
"logps/rejected": -128.11248779296875,
|
|
"loss": 0.5067,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16133855283260345,
|
|
"margin_dpo/beta_margin_grad_std": 0.21256163716316223,
|
|
"margin_dpo/beta_margin_mean": 3.037428140640259,
|
|
"margin_dpo/beta_margin_std": 2.7951576709747314,
|
|
"margin_dpo/loss_margin_mean": 30.37428092956543,
|
|
"margin_dpo/margin_mean": 30.37428092956543,
|
|
"margin_dpo/margin_std": 27.84336280822754,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.3392070484581498,
|
|
"grad_norm": 42.53703689575195,
|
|
"learning_rate": 4.1937008024246625e-07,
|
|
"logits/chosen": -0.6829984188079834,
|
|
"logits/rejected": -0.6394829750061035,
|
|
"logps/chosen": -74.62582397460938,
|
|
"logps/ref_chosen": -63.27644348144531,
|
|
"logps/ref_rejected": -74.1239013671875,
|
|
"logps/rejected": -111.50166320800781,
|
|
"loss": 0.4698,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18761104345321655,
|
|
"margin_dpo/beta_margin_grad_std": 0.17357850074768066,
|
|
"margin_dpo/beta_margin_mean": 2.6028378009796143,
|
|
"margin_dpo/beta_margin_std": 2.508455276489258,
|
|
"margin_dpo/loss_margin_mean": 26.028377532958984,
|
|
"margin_dpo/margin_mean": 26.028377532958984,
|
|
"margin_dpo/margin_std": 24.996898651123047,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.3406754772393539,
|
|
"grad_norm": 70.08275604248047,
|
|
"learning_rate": 4.1842391091163933e-07,
|
|
"logits/chosen": -0.6572903394699097,
|
|
"logits/rejected": -0.5994934439659119,
|
|
"logps/chosen": -84.29617309570312,
|
|
"logps/ref_chosen": -70.74876403808594,
|
|
"logps/ref_rejected": -83.97706604003906,
|
|
"logps/rejected": -118.73604583740234,
|
|
"loss": 0.6921,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2373134195804596,
|
|
"margin_dpo/beta_margin_grad_std": 0.24425449967384338,
|
|
"margin_dpo/beta_margin_mean": 2.1211562156677246,
|
|
"margin_dpo/beta_margin_std": 2.3135242462158203,
|
|
"margin_dpo/loss_margin_mean": 21.211563110351562,
|
|
"margin_dpo/margin_mean": 21.211563110351562,
|
|
"margin_dpo/margin_std": 22.4114933013916,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.342143906020558,
|
|
"grad_norm": 61.6278076171875,
|
|
"learning_rate": 4.174733034541245e-07,
|
|
"logits/chosen": -0.6890474557876587,
|
|
"logits/rejected": -0.6643567085266113,
|
|
"logps/chosen": -67.88652801513672,
|
|
"logps/ref_chosen": -54.8829345703125,
|
|
"logps/ref_rejected": -107.48007202148438,
|
|
"logps/rejected": -148.36856079101562,
|
|
"loss": 0.5602,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18775954842567444,
|
|
"margin_dpo/beta_margin_grad_std": 0.23140782117843628,
|
|
"margin_dpo/beta_margin_mean": 2.7884888648986816,
|
|
"margin_dpo/beta_margin_std": 2.659421682357788,
|
|
"margin_dpo/loss_margin_mean": 27.8848876953125,
|
|
"margin_dpo/margin_mean": 27.884885787963867,
|
|
"margin_dpo/margin_std": 26.062320709228516,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.3436123348017621,
|
|
"grad_norm": 60.47285461425781,
|
|
"learning_rate": 4.165182829193126e-07,
|
|
"logits/chosen": -0.6370252370834351,
|
|
"logits/rejected": -0.6381373405456543,
|
|
"logps/chosen": -54.90777587890625,
|
|
"logps/ref_chosen": -44.09451675415039,
|
|
"logps/ref_rejected": -100.00663757324219,
|
|
"logps/rejected": -138.9691162109375,
|
|
"loss": 0.4561,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15120825171470642,
|
|
"margin_dpo/beta_margin_grad_std": 0.1929025799036026,
|
|
"margin_dpo/beta_margin_mean": 2.814922571182251,
|
|
"margin_dpo/beta_margin_std": 2.241670846939087,
|
|
"margin_dpo/loss_margin_mean": 28.14922523498535,
|
|
"margin_dpo/margin_mean": 28.14922332763672,
|
|
"margin_dpo/margin_std": 21.847400665283203,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.34508076358296624,
|
|
"grad_norm": 63.21758270263672,
|
|
"learning_rate": 4.1555887447288255e-07,
|
|
"logits/chosen": -0.6568065881729126,
|
|
"logits/rejected": -0.614643931388855,
|
|
"logps/chosen": -77.54314422607422,
|
|
"logps/ref_chosen": -62.237911224365234,
|
|
"logps/ref_rejected": -90.39505767822266,
|
|
"logps/rejected": -128.5604248046875,
|
|
"loss": 0.5974,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.215665802359581,
|
|
"margin_dpo/beta_margin_grad_std": 0.2162242829799652,
|
|
"margin_dpo/beta_margin_mean": 2.2860143184661865,
|
|
"margin_dpo/beta_margin_std": 2.296712875366211,
|
|
"margin_dpo/loss_margin_mean": 22.860143661499023,
|
|
"margin_dpo/margin_mean": 22.86014175415039,
|
|
"margin_dpo/margin_std": 22.919218063354492,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.3465491923641703,
|
|
"grad_norm": 65.25566864013672,
|
|
"learning_rate": 4.1459510339613946e-07,
|
|
"logits/chosen": -0.6559075117111206,
|
|
"logits/rejected": -0.6537389159202576,
|
|
"logps/chosen": -60.41249084472656,
|
|
"logps/ref_chosen": -49.34136199951172,
|
|
"logps/ref_rejected": -103.51162719726562,
|
|
"logps/rejected": -140.07135009765625,
|
|
"loss": 0.5646,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.19891716539859772,
|
|
"margin_dpo/beta_margin_grad_std": 0.22555947303771973,
|
|
"margin_dpo/beta_margin_mean": 2.548858165740967,
|
|
"margin_dpo/beta_margin_std": 2.3670222759246826,
|
|
"margin_dpo/loss_margin_mean": 25.488582611083984,
|
|
"margin_dpo/margin_mean": 25.488582611083984,
|
|
"margin_dpo/margin_std": 23.585155487060547,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.34801762114537443,
|
|
"grad_norm": 48.03404235839844,
|
|
"learning_rate": 4.136269950853473e-07,
|
|
"logits/chosen": -0.6702800989151001,
|
|
"logits/rejected": -0.636156439781189,
|
|
"logps/chosen": -65.91875457763672,
|
|
"logps/ref_chosen": -54.168121337890625,
|
|
"logps/ref_rejected": -94.78036499023438,
|
|
"logps/rejected": -134.05665588378906,
|
|
"loss": 0.5116,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.17360197007656097,
|
|
"margin_dpo/beta_margin_grad_std": 0.21111546456813812,
|
|
"margin_dpo/beta_margin_mean": 2.7525649070739746,
|
|
"margin_dpo/beta_margin_std": 2.4088451862335205,
|
|
"margin_dpo/loss_margin_mean": 27.52564811706543,
|
|
"margin_dpo/margin_mean": 27.52564811706543,
|
|
"margin_dpo/margin_std": 24.07387924194336,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.34948604992657856,
|
|
"grad_norm": 39.46758270263672,
|
|
"learning_rate": 4.126545750510605e-07,
|
|
"logits/chosen": -0.6305921077728271,
|
|
"logits/rejected": -0.6234115958213806,
|
|
"logps/chosen": -64.94898986816406,
|
|
"logps/ref_chosen": -53.973121643066406,
|
|
"logps/ref_rejected": -89.41795349121094,
|
|
"logps/rejected": -125.03469848632812,
|
|
"loss": 0.4407,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.17112761735916138,
|
|
"margin_dpo/beta_margin_grad_std": 0.17932020127773285,
|
|
"margin_dpo/beta_margin_mean": 2.464088201522827,
|
|
"margin_dpo/beta_margin_std": 2.025897979736328,
|
|
"margin_dpo/loss_margin_mean": 24.640880584716797,
|
|
"margin_dpo/margin_mean": 24.640880584716797,
|
|
"margin_dpo/margin_std": 20.111305236816406,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.3509544787077827,
|
|
"grad_norm": 49.3748664855957,
|
|
"learning_rate": 4.116778689174514e-07,
|
|
"logits/chosen": -0.7114957571029663,
|
|
"logits/rejected": -0.6843305826187134,
|
|
"logps/chosen": -70.67376708984375,
|
|
"logps/ref_chosen": -58.09782409667969,
|
|
"logps/ref_rejected": -93.59294128417969,
|
|
"logps/rejected": -131.71542358398438,
|
|
"loss": 0.4436,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16616235673427582,
|
|
"margin_dpo/beta_margin_grad_std": 0.18609200417995453,
|
|
"margin_dpo/beta_margin_mean": 2.554654359817505,
|
|
"margin_dpo/beta_margin_std": 2.115661144256592,
|
|
"margin_dpo/loss_margin_mean": 25.54654312133789,
|
|
"margin_dpo/margin_mean": 25.54654312133789,
|
|
"margin_dpo/margin_std": 19.89307975769043,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.3524229074889868,
|
|
"grad_norm": 60.53359603881836,
|
|
"learning_rate": 4.106969024216348e-07,
|
|
"logits/chosen": -0.6911687850952148,
|
|
"logits/rejected": -0.6599963903427124,
|
|
"logps/chosen": -73.52519226074219,
|
|
"logps/ref_chosen": -60.6144905090332,
|
|
"logps/ref_rejected": -74.1185302734375,
|
|
"logps/rejected": -109.58448791503906,
|
|
"loss": 0.6257,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2058243602514267,
|
|
"margin_dpo/beta_margin_grad_std": 0.2343757450580597,
|
|
"margin_dpo/beta_margin_mean": 2.2555253505706787,
|
|
"margin_dpo/beta_margin_std": 2.1107068061828613,
|
|
"margin_dpo/loss_margin_mean": 22.555252075195312,
|
|
"margin_dpo/margin_mean": 22.555252075195312,
|
|
"margin_dpo/margin_std": 20.787620544433594,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.35389133627019087,
|
|
"grad_norm": 59.422630310058594,
|
|
"learning_rate": 4.097117014129903e-07,
|
|
"logits/chosen": -0.6552136540412903,
|
|
"logits/rejected": -0.6012428998947144,
|
|
"logps/chosen": -76.52700805664062,
|
|
"logps/ref_chosen": -66.091064453125,
|
|
"logps/ref_rejected": -88.06088256835938,
|
|
"logps/rejected": -130.19644165039062,
|
|
"loss": 0.5099,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1663733720779419,
|
|
"margin_dpo/beta_margin_grad_std": 0.22269202768802643,
|
|
"margin_dpo/beta_margin_mean": 3.169961929321289,
|
|
"margin_dpo/beta_margin_std": 3.125377655029297,
|
|
"margin_dpo/loss_margin_mean": 31.69961929321289,
|
|
"margin_dpo/margin_mean": 31.69961929321289,
|
|
"margin_dpo/margin_std": 29.628376007080078,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.355359765051395,
|
|
"grad_norm": 52.94541931152344,
|
|
"learning_rate": 4.087222918524807e-07,
|
|
"logits/chosen": -0.6454315185546875,
|
|
"logits/rejected": -0.6136279702186584,
|
|
"logps/chosen": -79.44197845458984,
|
|
"logps/ref_chosen": -67.86392211914062,
|
|
"logps/ref_rejected": -83.36033630371094,
|
|
"logps/rejected": -119.58251190185547,
|
|
"loss": 0.4934,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1810285896062851,
|
|
"margin_dpo/beta_margin_grad_std": 0.19746260344982147,
|
|
"margin_dpo/beta_margin_mean": 2.4644126892089844,
|
|
"margin_dpo/beta_margin_std": 2.184521198272705,
|
|
"margin_dpo/loss_margin_mean": 24.644126892089844,
|
|
"margin_dpo/margin_mean": 24.644126892089844,
|
|
"margin_dpo/margin_std": 21.64803123474121,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.3568281938325991,
|
|
"grad_norm": 34.107791900634766,
|
|
"learning_rate": 4.07728699811968e-07,
|
|
"logits/chosen": -0.6725857257843018,
|
|
"logits/rejected": -0.6084048748016357,
|
|
"logps/chosen": -74.12469482421875,
|
|
"logps/ref_chosen": -63.08424377441406,
|
|
"logps/ref_rejected": -76.33563232421875,
|
|
"logps/rejected": -116.86687469482422,
|
|
"loss": 0.3271,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13316328823566437,
|
|
"margin_dpo/beta_margin_grad_std": 0.1522829383611679,
|
|
"margin_dpo/beta_margin_mean": 2.9490790367126465,
|
|
"margin_dpo/beta_margin_std": 2.1849277019500732,
|
|
"margin_dpo/loss_margin_mean": 29.49078941345215,
|
|
"margin_dpo/margin_mean": 29.49079132080078,
|
|
"margin_dpo/margin_std": 21.805618286132812,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.35829662261380324,
|
|
"grad_norm": 42.87071228027344,
|
|
"learning_rate": 4.067309514735267e-07,
|
|
"logits/chosen": -0.6881895065307617,
|
|
"logits/rejected": -0.6778185367584229,
|
|
"logps/chosen": -71.2780990600586,
|
|
"logps/ref_chosen": -61.14069366455078,
|
|
"logps/ref_rejected": -94.89193725585938,
|
|
"logps/rejected": -130.34854125976562,
|
|
"loss": 0.4934,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18467824161052704,
|
|
"margin_dpo/beta_margin_grad_std": 0.20196670293807983,
|
|
"margin_dpo/beta_margin_mean": 2.5319199562072754,
|
|
"margin_dpo/beta_margin_std": 2.150766372680664,
|
|
"margin_dpo/loss_margin_mean": 25.319198608398438,
|
|
"margin_dpo/margin_mean": 25.319198608398438,
|
|
"margin_dpo/margin_std": 21.36996078491211,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.35976505139500736,
|
|
"grad_norm": 74.6055679321289,
|
|
"learning_rate": 4.057290731287531e-07,
|
|
"logits/chosen": -0.7033660411834717,
|
|
"logits/rejected": -0.6514378786087036,
|
|
"logps/chosen": -78.92977905273438,
|
|
"logps/ref_chosen": -67.26228332519531,
|
|
"logps/ref_rejected": -87.64010620117188,
|
|
"logps/rejected": -126.20805358886719,
|
|
"loss": 0.5326,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1932898312807083,
|
|
"margin_dpo/beta_margin_grad_std": 0.2051628977060318,
|
|
"margin_dpo/beta_margin_mean": 2.6900460720062256,
|
|
"margin_dpo/beta_margin_std": 2.7254021167755127,
|
|
"margin_dpo/loss_margin_mean": 26.90045928955078,
|
|
"margin_dpo/margin_mean": 26.900461196899414,
|
|
"margin_dpo/margin_std": 25.503192901611328,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.36123348017621143,
|
|
"grad_norm": 56.00790023803711,
|
|
"learning_rate": 4.047230911780736e-07,
|
|
"logits/chosen": -0.7089934945106506,
|
|
"logits/rejected": -0.6705622673034668,
|
|
"logps/chosen": -78.0211181640625,
|
|
"logps/ref_chosen": -66.69696807861328,
|
|
"logps/ref_rejected": -84.34634399414062,
|
|
"logps/rejected": -118.77372741699219,
|
|
"loss": 0.5288,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1983981728553772,
|
|
"margin_dpo/beta_margin_grad_std": 0.19785138964653015,
|
|
"margin_dpo/beta_margin_mean": 2.310323476791382,
|
|
"margin_dpo/beta_margin_std": 2.1114535331726074,
|
|
"margin_dpo/loss_margin_mean": 23.103235244750977,
|
|
"margin_dpo/margin_mean": 23.103233337402344,
|
|
"margin_dpo/margin_std": 21.10454559326172,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.36270190895741555,
|
|
"grad_norm": 41.90789031982422,
|
|
"learning_rate": 4.0371303213004814e-07,
|
|
"logits/chosen": -0.7110755443572998,
|
|
"logits/rejected": -0.6894150972366333,
|
|
"logps/chosen": -68.0724868774414,
|
|
"logps/ref_chosen": -56.6053466796875,
|
|
"logps/ref_rejected": -106.29327392578125,
|
|
"logps/rejected": -150.26498413085938,
|
|
"loss": 0.4045,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14395716786384583,
|
|
"margin_dpo/beta_margin_grad_std": 0.19858244061470032,
|
|
"margin_dpo/beta_margin_mean": 3.2504570484161377,
|
|
"margin_dpo/beta_margin_std": 2.560331344604492,
|
|
"margin_dpo/loss_margin_mean": 32.50457000732422,
|
|
"margin_dpo/margin_mean": 32.50457000732422,
|
|
"margin_dpo/margin_std": 25.436208724975586,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.3641703377386197,
|
|
"grad_norm": 42.92959213256836,
|
|
"learning_rate": 4.0269892260067197e-07,
|
|
"logits/chosen": -0.6845219135284424,
|
|
"logits/rejected": -0.6683632135391235,
|
|
"logps/chosen": -54.540321350097656,
|
|
"logps/ref_chosen": -44.043216705322266,
|
|
"logps/ref_rejected": -91.85687255859375,
|
|
"logps/rejected": -126.71005249023438,
|
|
"loss": 0.4107,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16844278573989868,
|
|
"margin_dpo/beta_margin_grad_std": 0.15559379756450653,
|
|
"margin_dpo/beta_margin_mean": 2.4356071949005127,
|
|
"margin_dpo/beta_margin_std": 1.9233942031860352,
|
|
"margin_dpo/loss_margin_mean": 24.35607147216797,
|
|
"margin_dpo/margin_mean": 24.35607147216797,
|
|
"margin_dpo/margin_std": 19.101226806640625,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.3656387665198238,
|
|
"grad_norm": 59.13127517700195,
|
|
"learning_rate": 4.0168078931267426e-07,
|
|
"logits/chosen": -0.7052150964736938,
|
|
"logits/rejected": -0.6669450998306274,
|
|
"logps/chosen": -74.95724487304688,
|
|
"logps/ref_chosen": -62.442352294921875,
|
|
"logps/ref_rejected": -80.46806335449219,
|
|
"logps/rejected": -113.90575408935547,
|
|
"loss": 0.6535,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.23194736242294312,
|
|
"margin_dpo/beta_margin_grad_std": 0.2308819442987442,
|
|
"margin_dpo/beta_margin_mean": 2.0922796726226807,
|
|
"margin_dpo/beta_margin_std": 2.0924148559570312,
|
|
"margin_dpo/loss_margin_mean": 20.92279624938965,
|
|
"margin_dpo/margin_mean": 20.92279624938965,
|
|
"margin_dpo/margin_std": 20.69894790649414,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.3671071953010279,
|
|
"grad_norm": 34.11585235595703,
|
|
"learning_rate": 4.006586590948141e-07,
|
|
"logits/chosen": -0.6944586038589478,
|
|
"logits/rejected": -0.6244109272956848,
|
|
"logps/chosen": -74.52294158935547,
|
|
"logps/ref_chosen": -65.6366958618164,
|
|
"logps/ref_rejected": -73.87183380126953,
|
|
"logps/rejected": -108.57221221923828,
|
|
"loss": 0.4359,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1552935689687729,
|
|
"margin_dpo/beta_margin_grad_std": 0.19923508167266846,
|
|
"margin_dpo/beta_margin_mean": 2.581413507461548,
|
|
"margin_dpo/beta_margin_std": 1.8444185256958008,
|
|
"margin_dpo/loss_margin_mean": 25.81413459777832,
|
|
"margin_dpo/margin_mean": 25.81413459777832,
|
|
"margin_dpo/margin_std": 18.1165828704834,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.368575624082232,
|
|
"grad_norm": 44.37178039550781,
|
|
"learning_rate": 3.9963255888117325e-07,
|
|
"logits/chosen": -0.7029905319213867,
|
|
"logits/rejected": -0.6486064195632935,
|
|
"logps/chosen": -70.05519104003906,
|
|
"logps/ref_chosen": -57.182716369628906,
|
|
"logps/ref_rejected": -77.66343688964844,
|
|
"logps/rejected": -116.27742767333984,
|
|
"loss": 0.4579,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1774299144744873,
|
|
"margin_dpo/beta_margin_grad_std": 0.19099289178848267,
|
|
"margin_dpo/beta_margin_mean": 2.5741522312164307,
|
|
"margin_dpo/beta_margin_std": 2.079760789871216,
|
|
"margin_dpo/loss_margin_mean": 25.74152183532715,
|
|
"margin_dpo/margin_mean": 25.74152183532715,
|
|
"margin_dpo/margin_std": 20.57958984375,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.3700440528634361,
|
|
"grad_norm": 53.544761657714844,
|
|
"learning_rate": 3.9860251571044666e-07,
|
|
"logits/chosen": -0.6703172326087952,
|
|
"logits/rejected": -0.62431800365448,
|
|
"logps/chosen": -83.42109680175781,
|
|
"logps/ref_chosen": -71.68563842773438,
|
|
"logps/ref_rejected": -84.75798797607422,
|
|
"logps/rejected": -122.17694854736328,
|
|
"loss": 0.4309,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15842175483703613,
|
|
"margin_dpo/beta_margin_grad_std": 0.1866365671157837,
|
|
"margin_dpo/beta_margin_mean": 2.5683515071868896,
|
|
"margin_dpo/beta_margin_std": 1.9699735641479492,
|
|
"margin_dpo/loss_margin_mean": 25.683515548706055,
|
|
"margin_dpo/margin_mean": 25.683515548706055,
|
|
"margin_dpo/margin_std": 19.43787384033203,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.37151248164464024,
|
|
"grad_norm": 50.1516227722168,
|
|
"learning_rate": 3.9756855672522986e-07,
|
|
"logits/chosen": -0.6842066049575806,
|
|
"logits/rejected": -0.654214084148407,
|
|
"logps/chosen": -79.20399475097656,
|
|
"logps/ref_chosen": -69.13392639160156,
|
|
"logps/ref_rejected": -98.70252990722656,
|
|
"logps/rejected": -132.8687744140625,
|
|
"loss": 0.6253,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2049117386341095,
|
|
"margin_dpo/beta_margin_grad_std": 0.23132526874542236,
|
|
"margin_dpo/beta_margin_mean": 2.4096176624298096,
|
|
"margin_dpo/beta_margin_std": 2.3356130123138428,
|
|
"margin_dpo/loss_margin_mean": 24.096176147460938,
|
|
"margin_dpo/margin_mean": 24.096176147460938,
|
|
"margin_dpo/margin_std": 22.95254135131836,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.37298091042584436,
|
|
"grad_norm": 64.96926879882812,
|
|
"learning_rate": 3.965307091713037e-07,
|
|
"logits/chosen": -0.7051047682762146,
|
|
"logits/rejected": -0.6575514078140259,
|
|
"logps/chosen": -64.82050323486328,
|
|
"logps/ref_chosen": -54.154998779296875,
|
|
"logps/ref_rejected": -90.30764770507812,
|
|
"logps/rejected": -125.6099853515625,
|
|
"loss": 0.5557,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.19986601173877716,
|
|
"margin_dpo/beta_margin_grad_std": 0.2192426323890686,
|
|
"margin_dpo/beta_margin_mean": 2.4636828899383545,
|
|
"margin_dpo/beta_margin_std": 2.2709455490112305,
|
|
"margin_dpo/loss_margin_mean": 24.636829376220703,
|
|
"margin_dpo/margin_mean": 24.636829376220703,
|
|
"margin_dpo/margin_std": 22.574371337890625,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.3744493392070485,
|
|
"grad_norm": 66.39599609375,
|
|
"learning_rate": 3.954890003969163e-07,
|
|
"logits/chosen": -0.7102745771408081,
|
|
"logits/rejected": -0.6797518730163574,
|
|
"logps/chosen": -70.39166259765625,
|
|
"logps/ref_chosen": -57.14167022705078,
|
|
"logps/ref_rejected": -90.2085952758789,
|
|
"logps/rejected": -130.80026245117188,
|
|
"loss": 0.6594,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1914447396993637,
|
|
"margin_dpo/beta_margin_grad_std": 0.22745780646800995,
|
|
"margin_dpo/beta_margin_mean": 2.734168291091919,
|
|
"margin_dpo/beta_margin_std": 2.8561336994171143,
|
|
"margin_dpo/loss_margin_mean": 27.34168243408203,
|
|
"margin_dpo/margin_mean": 27.34168243408203,
|
|
"margin_dpo/margin_std": 28.012739181518555,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.37591776798825255,
|
|
"grad_norm": 58.85321807861328,
|
|
"learning_rate": 3.944434578520628e-07,
|
|
"logits/chosen": -0.6565215587615967,
|
|
"logits/rejected": -0.6265472769737244,
|
|
"logps/chosen": -68.35701751708984,
|
|
"logps/ref_chosen": -55.163490295410156,
|
|
"logps/ref_rejected": -92.56291961669922,
|
|
"logps/rejected": -133.102294921875,
|
|
"loss": 0.5121,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.17351847887039185,
|
|
"margin_dpo/beta_margin_grad_std": 0.20426101982593536,
|
|
"margin_dpo/beta_margin_mean": 2.734584093093872,
|
|
"margin_dpo/beta_margin_std": 2.515676498413086,
|
|
"margin_dpo/loss_margin_mean": 27.34583854675293,
|
|
"margin_dpo/margin_mean": 27.34583854675293,
|
|
"margin_dpo/margin_std": 25.045982360839844,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.37738619676945667,
|
|
"grad_norm": 45.65961456298828,
|
|
"learning_rate": 3.933941090877615e-07,
|
|
"logits/chosen": -0.6682260036468506,
|
|
"logits/rejected": -0.6451402902603149,
|
|
"logps/chosen": -61.90161895751953,
|
|
"logps/ref_chosen": -49.4236946105957,
|
|
"logps/ref_rejected": -79.53791809082031,
|
|
"logps/rejected": -122.11911010742188,
|
|
"loss": 0.5015,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.17995263636112213,
|
|
"margin_dpo/beta_margin_grad_std": 0.22144293785095215,
|
|
"margin_dpo/beta_margin_mean": 3.0103280544281006,
|
|
"margin_dpo/beta_margin_std": 2.688237190246582,
|
|
"margin_dpo/loss_margin_mean": 30.103281021118164,
|
|
"margin_dpo/margin_mean": 30.103281021118164,
|
|
"margin_dpo/margin_std": 25.718414306640625,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.3788546255506608,
|
|
"grad_norm": 90.31965637207031,
|
|
"learning_rate": 3.923409817553284e-07,
|
|
"logits/chosen": -0.6991258263587952,
|
|
"logits/rejected": -0.669155478477478,
|
|
"logps/chosen": -75.35392761230469,
|
|
"logps/ref_chosen": -59.384124755859375,
|
|
"logps/ref_rejected": -95.9901123046875,
|
|
"logps/rejected": -138.38613891601562,
|
|
"loss": 0.7407,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2009788304567337,
|
|
"margin_dpo/beta_margin_grad_std": 0.24991276860237122,
|
|
"margin_dpo/beta_margin_mean": 2.642622947692871,
|
|
"margin_dpo/beta_margin_std": 2.7432861328125,
|
|
"margin_dpo/loss_margin_mean": 26.426227569580078,
|
|
"margin_dpo/margin_mean": 26.426227569580078,
|
|
"margin_dpo/margin_std": 27.302228927612305,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.3803230543318649,
|
|
"grad_norm": 54.30337142944336,
|
|
"learning_rate": 3.9128410360564793e-07,
|
|
"logits/chosen": -0.6342747211456299,
|
|
"logits/rejected": -0.6089296340942383,
|
|
"logps/chosen": -67.30290222167969,
|
|
"logps/ref_chosen": -52.828346252441406,
|
|
"logps/ref_rejected": -89.19165802001953,
|
|
"logps/rejected": -127.61224365234375,
|
|
"loss": 0.5311,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1905156373977661,
|
|
"margin_dpo/beta_margin_grad_std": 0.20092925429344177,
|
|
"margin_dpo/beta_margin_mean": 2.3946025371551514,
|
|
"margin_dpo/beta_margin_std": 2.1142630577087402,
|
|
"margin_dpo/loss_margin_mean": 23.946025848388672,
|
|
"margin_dpo/margin_mean": 23.94602394104004,
|
|
"margin_dpo/margin_std": 20.407352447509766,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.38179148311306904,
|
|
"grad_norm": 60.32538604736328,
|
|
"learning_rate": 3.9022350248844246e-07,
|
|
"logits/chosen": -0.6234908103942871,
|
|
"logits/rejected": -0.6234794855117798,
|
|
"logps/chosen": -62.85065460205078,
|
|
"logps/ref_chosen": -47.41767501831055,
|
|
"logps/ref_rejected": -95.08979034423828,
|
|
"logps/rejected": -137.6796417236328,
|
|
"loss": 0.5057,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18406428396701813,
|
|
"margin_dpo/beta_margin_grad_std": 0.20967774093151093,
|
|
"margin_dpo/beta_margin_mean": 2.7156875133514404,
|
|
"margin_dpo/beta_margin_std": 2.5070676803588867,
|
|
"margin_dpo/loss_margin_mean": 27.156875610351562,
|
|
"margin_dpo/margin_mean": 27.156875610351562,
|
|
"margin_dpo/margin_std": 25.030288696289062,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.3832599118942731,
|
|
"grad_norm": 47.035186767578125,
|
|
"learning_rate": 3.891592063515376e-07,
|
|
"logits/chosen": -0.6528719067573547,
|
|
"logits/rejected": -0.6170308589935303,
|
|
"logps/chosen": -65.26475524902344,
|
|
"logps/ref_chosen": -53.03137969970703,
|
|
"logps/ref_rejected": -88.51494598388672,
|
|
"logps/rejected": -129.43865966796875,
|
|
"loss": 0.4748,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.17383930087089539,
|
|
"margin_dpo/beta_margin_grad_std": 0.20882652699947357,
|
|
"margin_dpo/beta_margin_mean": 2.869032144546509,
|
|
"margin_dpo/beta_margin_std": 2.6597743034362793,
|
|
"margin_dpo/loss_margin_mean": 28.690322875976562,
|
|
"margin_dpo/margin_mean": 28.690322875976562,
|
|
"margin_dpo/margin_std": 26.378036499023438,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.38472834067547723,
|
|
"grad_norm": 65.2990493774414,
|
|
"learning_rate": 3.880912432401264e-07,
|
|
"logits/chosen": -0.6476384401321411,
|
|
"logits/rejected": -0.601101279258728,
|
|
"logps/chosen": -74.31780242919922,
|
|
"logps/ref_chosen": -59.620140075683594,
|
|
"logps/ref_rejected": -86.41853332519531,
|
|
"logps/rejected": -126.95146179199219,
|
|
"loss": 0.5286,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1703178882598877,
|
|
"margin_dpo/beta_margin_grad_std": 0.2238461673259735,
|
|
"margin_dpo/beta_margin_mean": 2.583526849746704,
|
|
"margin_dpo/beta_margin_std": 2.1952381134033203,
|
|
"margin_dpo/loss_margin_mean": 25.835268020629883,
|
|
"margin_dpo/margin_mean": 25.835269927978516,
|
|
"margin_dpo/margin_std": 21.91771697998047,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.38619676945668135,
|
|
"grad_norm": 63.93273162841797,
|
|
"learning_rate": 3.870196412960302e-07,
|
|
"logits/chosen": -0.6848942041397095,
|
|
"logits/rejected": -0.6289730072021484,
|
|
"logps/chosen": -71.28265380859375,
|
|
"logps/ref_chosen": -59.42094421386719,
|
|
"logps/ref_rejected": -96.85720825195312,
|
|
"logps/rejected": -139.320556640625,
|
|
"loss": 0.4332,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16087834537029266,
|
|
"margin_dpo/beta_margin_grad_std": 0.1975705325603485,
|
|
"margin_dpo/beta_margin_mean": 3.0601646900177,
|
|
"margin_dpo/beta_margin_std": 2.6217379570007324,
|
|
"margin_dpo/loss_margin_mean": 30.601646423339844,
|
|
"margin_dpo/margin_mean": 30.601646423339844,
|
|
"margin_dpo/margin_std": 26.212867736816406,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.3876651982378855,
|
|
"grad_norm": 65.42985534667969,
|
|
"learning_rate": 3.8594442875695665e-07,
|
|
"logits/chosen": -0.6409514546394348,
|
|
"logits/rejected": -0.6121193766593933,
|
|
"logps/chosen": -76.15332794189453,
|
|
"logps/ref_chosen": -62.722084045410156,
|
|
"logps/ref_rejected": -93.85621643066406,
|
|
"logps/rejected": -131.38528442382812,
|
|
"loss": 0.5449,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.19213639199733734,
|
|
"margin_dpo/beta_margin_grad_std": 0.20564202964305878,
|
|
"margin_dpo/beta_margin_mean": 2.409783363342285,
|
|
"margin_dpo/beta_margin_std": 2.2144694328308105,
|
|
"margin_dpo/loss_margin_mean": 24.09783363342285,
|
|
"margin_dpo/margin_mean": 24.097835540771484,
|
|
"margin_dpo/margin_std": 21.762907028198242,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.3891336270190896,
|
|
"grad_norm": 73.85417938232422,
|
|
"learning_rate": 3.848656339557562e-07,
|
|
"logits/chosen": -0.6545775532722473,
|
|
"logits/rejected": -0.6242020130157471,
|
|
"logps/chosen": -76.27545928955078,
|
|
"logps/ref_chosen": -61.971466064453125,
|
|
"logps/ref_rejected": -88.02059936523438,
|
|
"logps/rejected": -127.61671447753906,
|
|
"loss": 0.5823,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.20656327903270721,
|
|
"margin_dpo/beta_margin_grad_std": 0.2223885953426361,
|
|
"margin_dpo/beta_margin_mean": 2.529212474822998,
|
|
"margin_dpo/beta_margin_std": 2.583730459213257,
|
|
"margin_dpo/loss_margin_mean": 25.292123794555664,
|
|
"margin_dpo/margin_mean": 25.292123794555664,
|
|
"margin_dpo/margin_std": 25.746461868286133,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.39060205580029367,
|
|
"grad_norm": 57.55160903930664,
|
|
"learning_rate": 3.8378328531967507e-07,
|
|
"logits/chosen": -0.6736335754394531,
|
|
"logits/rejected": -0.6081231832504272,
|
|
"logps/chosen": -80.7601547241211,
|
|
"logps/ref_chosen": -67.09967041015625,
|
|
"logps/ref_rejected": -67.97122192382812,
|
|
"logps/rejected": -106.38961791992188,
|
|
"loss": 0.5648,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.20071399211883545,
|
|
"margin_dpo/beta_margin_grad_std": 0.22172965109348297,
|
|
"margin_dpo/beta_margin_mean": 2.47579026222229,
|
|
"margin_dpo/beta_margin_std": 2.267148017883301,
|
|
"margin_dpo/loss_margin_mean": 24.757902145385742,
|
|
"margin_dpo/margin_mean": 24.757904052734375,
|
|
"margin_dpo/margin_std": 22.62070083618164,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.3920704845814978,
|
|
"grad_norm": 53.11775588989258,
|
|
"learning_rate": 3.8269741136960646e-07,
|
|
"logits/chosen": -0.6374738216400146,
|
|
"logits/rejected": -0.5902992486953735,
|
|
"logps/chosen": -82.08721923828125,
|
|
"logps/ref_chosen": -68.97074890136719,
|
|
"logps/ref_rejected": -90.16844940185547,
|
|
"logps/rejected": -130.69570922851562,
|
|
"loss": 0.4124,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15888135135173798,
|
|
"margin_dpo/beta_margin_grad_std": 0.18455727398395538,
|
|
"margin_dpo/beta_margin_mean": 2.7410788536071777,
|
|
"margin_dpo/beta_margin_std": 2.210850954055786,
|
|
"margin_dpo/loss_margin_mean": 27.410789489746094,
|
|
"margin_dpo/margin_mean": 27.410789489746094,
|
|
"margin_dpo/margin_std": 22.08306884765625,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.3935389133627019,
|
|
"grad_norm": 62.39994812011719,
|
|
"learning_rate": 3.8160804071933894e-07,
|
|
"logits/chosen": -0.6283696293830872,
|
|
"logits/rejected": -0.6117571592330933,
|
|
"logps/chosen": -68.46856689453125,
|
|
"logps/ref_chosen": -55.900306701660156,
|
|
"logps/ref_rejected": -101.64763641357422,
|
|
"logps/rejected": -139.620361328125,
|
|
"loss": 0.4971,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1794806867837906,
|
|
"margin_dpo/beta_margin_grad_std": 0.21003910899162292,
|
|
"margin_dpo/beta_margin_mean": 2.5404462814331055,
|
|
"margin_dpo/beta_margin_std": 2.1823596954345703,
|
|
"margin_dpo/loss_margin_mean": 25.404462814331055,
|
|
"margin_dpo/margin_mean": 25.404464721679688,
|
|
"margin_dpo/margin_std": 21.592742919921875,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.39500734214390604,
|
|
"grad_norm": 53.5538330078125,
|
|
"learning_rate": 3.8051520207480204e-07,
|
|
"logits/chosen": -0.6579411029815674,
|
|
"logits/rejected": -0.6127967238426208,
|
|
"logps/chosen": -82.96507263183594,
|
|
"logps/ref_chosen": -70.03955078125,
|
|
"logps/ref_rejected": -107.34937286376953,
|
|
"logps/rejected": -153.08908081054688,
|
|
"loss": 0.4067,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14487290382385254,
|
|
"margin_dpo/beta_margin_grad_std": 0.21411198377609253,
|
|
"margin_dpo/beta_margin_mean": 3.2814202308654785,
|
|
"margin_dpo/beta_margin_std": 2.366851329803467,
|
|
"margin_dpo/loss_margin_mean": 32.81420135498047,
|
|
"margin_dpo/margin_mean": 32.81420135498047,
|
|
"margin_dpo/margin_std": 23.582063674926758,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.3964757709251101,
|
|
"grad_norm": 41.36155319213867,
|
|
"learning_rate": 3.794189242333106e-07,
|
|
"logits/chosen": -0.6722906827926636,
|
|
"logits/rejected": -0.6524355411529541,
|
|
"logps/chosen": -80.42156219482422,
|
|
"logps/ref_chosen": -69.53347778320312,
|
|
"logps/ref_rejected": -109.92864990234375,
|
|
"logps/rejected": -145.8834228515625,
|
|
"loss": 0.5061,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18635544180870056,
|
|
"margin_dpo/beta_margin_grad_std": 0.20361123979091644,
|
|
"margin_dpo/beta_margin_mean": 2.506671190261841,
|
|
"margin_dpo/beta_margin_std": 2.224198341369629,
|
|
"margin_dpo/loss_margin_mean": 25.06671142578125,
|
|
"margin_dpo/margin_mean": 25.06671142578125,
|
|
"margin_dpo/margin_std": 22.077110290527344,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.39794419970631423,
|
|
"grad_norm": 51.65666961669922,
|
|
"learning_rate": 3.7831923608280514e-07,
|
|
"logits/chosen": -0.6164276599884033,
|
|
"logits/rejected": -0.5750702619552612,
|
|
"logps/chosen": -71.0101318359375,
|
|
"logps/ref_chosen": -56.76457214355469,
|
|
"logps/ref_rejected": -92.51383209228516,
|
|
"logps/rejected": -132.6912841796875,
|
|
"loss": 0.534,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18699227273464203,
|
|
"margin_dpo/beta_margin_grad_std": 0.21381914615631104,
|
|
"margin_dpo/beta_margin_mean": 2.593189001083374,
|
|
"margin_dpo/beta_margin_std": 2.331104040145874,
|
|
"margin_dpo/loss_margin_mean": 25.9318904876709,
|
|
"margin_dpo/margin_mean": 25.931888580322266,
|
|
"margin_dpo/margin_std": 23.307266235351562,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.39941262848751835,
|
|
"grad_norm": 52.00728225708008,
|
|
"learning_rate": 3.772161666010912e-07,
|
|
"logits/chosen": -0.6098858714103699,
|
|
"logits/rejected": -0.5980672836303711,
|
|
"logps/chosen": -62.51170349121094,
|
|
"logps/ref_chosen": -49.49715805053711,
|
|
"logps/ref_rejected": -105.54279327392578,
|
|
"logps/rejected": -150.09420776367188,
|
|
"loss": 0.5359,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.17077092826366425,
|
|
"margin_dpo/beta_margin_grad_std": 0.23799988627433777,
|
|
"margin_dpo/beta_margin_mean": 3.153686046600342,
|
|
"margin_dpo/beta_margin_std": 2.745887041091919,
|
|
"margin_dpo/loss_margin_mean": 31.5368595123291,
|
|
"margin_dpo/margin_mean": 31.5368595123291,
|
|
"margin_dpo/margin_std": 27.00173568725586,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.4008810572687225,
|
|
"grad_norm": 59.0302848815918,
|
|
"learning_rate": 3.761097448550755e-07,
|
|
"logits/chosen": -0.5825521945953369,
|
|
"logits/rejected": -0.5468716025352478,
|
|
"logps/chosen": -77.9120864868164,
|
|
"logps/ref_chosen": -62.97539520263672,
|
|
"logps/ref_rejected": -92.49858093261719,
|
|
"logps/rejected": -137.728759765625,
|
|
"loss": 0.4627,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16412891447544098,
|
|
"margin_dpo/beta_margin_grad_std": 0.2039998471736908,
|
|
"margin_dpo/beta_margin_mean": 3.0293478965759277,
|
|
"margin_dpo/beta_margin_std": 2.5976314544677734,
|
|
"margin_dpo/loss_margin_mean": 30.29347801208496,
|
|
"margin_dpo/margin_mean": 30.293479919433594,
|
|
"margin_dpo/margin_std": 24.974472045898438,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.4023494860499266,
|
|
"grad_norm": 55.06562423706055,
|
|
"learning_rate": 3.75e-07,
|
|
"logits/chosen": -0.6257538199424744,
|
|
"logits/rejected": -0.5888440608978271,
|
|
"logps/chosen": -71.956298828125,
|
|
"logps/ref_chosen": -55.66770935058594,
|
|
"logps/ref_rejected": -77.33308410644531,
|
|
"logps/rejected": -119.93206787109375,
|
|
"loss": 0.5193,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1804315745830536,
|
|
"margin_dpo/beta_margin_grad_std": 0.2112412303686142,
|
|
"margin_dpo/beta_margin_mean": 2.6310408115386963,
|
|
"margin_dpo/beta_margin_std": 2.3380789756774902,
|
|
"margin_dpo/loss_margin_mean": 26.310407638549805,
|
|
"margin_dpo/margin_mean": 26.310407638549805,
|
|
"margin_dpo/margin_std": 23.162757873535156,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.40381791483113066,
|
|
"grad_norm": 64.80329895019531,
|
|
"learning_rate": 3.738869612786737e-07,
|
|
"logits/chosen": -0.657637894153595,
|
|
"logits/rejected": -0.6397134065628052,
|
|
"logps/chosen": -60.017059326171875,
|
|
"logps/ref_chosen": -48.594703674316406,
|
|
"logps/ref_rejected": -93.30369567871094,
|
|
"logps/rejected": -132.4287567138672,
|
|
"loss": 0.4719,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.17478503286838531,
|
|
"margin_dpo/beta_margin_grad_std": 0.20220105350017548,
|
|
"margin_dpo/beta_margin_mean": 2.7702696323394775,
|
|
"margin_dpo/beta_margin_std": 2.443610906600952,
|
|
"margin_dpo/loss_margin_mean": 27.70269775390625,
|
|
"margin_dpo/margin_mean": 27.70269775390625,
|
|
"margin_dpo/margin_std": 24.364110946655273,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.4052863436123348,
|
|
"grad_norm": 62.004940032958984,
|
|
"learning_rate": 3.7277065802070204e-07,
|
|
"logits/chosen": -0.6588333249092102,
|
|
"logits/rejected": -0.6178128719329834,
|
|
"logps/chosen": -70.30280303955078,
|
|
"logps/ref_chosen": -56.57740783691406,
|
|
"logps/ref_rejected": -70.36566925048828,
|
|
"logps/rejected": -109.56034851074219,
|
|
"loss": 0.5956,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.21022818982601166,
|
|
"margin_dpo/beta_margin_grad_std": 0.23304350674152374,
|
|
"margin_dpo/beta_margin_mean": 2.5469281673431396,
|
|
"margin_dpo/beta_margin_std": 2.4726295471191406,
|
|
"margin_dpo/loss_margin_mean": 25.469282150268555,
|
|
"margin_dpo/margin_mean": 25.469280242919922,
|
|
"margin_dpo/margin_std": 24.609455108642578,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.4067547723935389,
|
|
"grad_norm": 41.01213836669922,
|
|
"learning_rate": 3.71651119641714e-07,
|
|
"logits/chosen": -0.6487230658531189,
|
|
"logits/rejected": -0.6121164560317993,
|
|
"logps/chosen": -68.6185302734375,
|
|
"logps/ref_chosen": -56.27156066894531,
|
|
"logps/ref_rejected": -92.88127136230469,
|
|
"logps/rejected": -129.5735626220703,
|
|
"loss": 0.4262,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.17126400768756866,
|
|
"margin_dpo/beta_margin_grad_std": 0.16744239628314972,
|
|
"margin_dpo/beta_margin_mean": 2.434532880783081,
|
|
"margin_dpo/beta_margin_std": 1.9181517362594604,
|
|
"margin_dpo/loss_margin_mean": 24.345327377319336,
|
|
"margin_dpo/margin_mean": 24.34532928466797,
|
|
"margin_dpo/margin_std": 18.586042404174805,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.40822320117474303,
|
|
"grad_norm": 45.832942962646484,
|
|
"learning_rate": 3.705283756425872e-07,
|
|
"logits/chosen": -0.6533815860748291,
|
|
"logits/rejected": -0.6426759958267212,
|
|
"logps/chosen": -64.33135986328125,
|
|
"logps/ref_chosen": -52.94194030761719,
|
|
"logps/ref_rejected": -91.25357818603516,
|
|
"logps/rejected": -132.53787231445312,
|
|
"loss": 0.482,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.17872436344623566,
|
|
"margin_dpo/beta_margin_grad_std": 0.21186299622058868,
|
|
"margin_dpo/beta_margin_mean": 2.989488124847412,
|
|
"margin_dpo/beta_margin_std": 2.6637423038482666,
|
|
"margin_dpo/loss_margin_mean": 29.894880294799805,
|
|
"margin_dpo/margin_mean": 29.894882202148438,
|
|
"margin_dpo/margin_std": 26.599227905273438,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.40969162995594716,
|
|
"grad_norm": 55.28075408935547,
|
|
"learning_rate": 3.6940245560867e-07,
|
|
"logits/chosen": -0.6621390581130981,
|
|
"logits/rejected": -0.6348008513450623,
|
|
"logps/chosen": -60.90464782714844,
|
|
"logps/ref_chosen": -48.641319274902344,
|
|
"logps/ref_rejected": -87.8514404296875,
|
|
"logps/rejected": -129.54296875,
|
|
"loss": 0.488,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.17444436252117157,
|
|
"margin_dpo/beta_margin_grad_std": 0.2196406126022339,
|
|
"margin_dpo/beta_margin_mean": 2.9428205490112305,
|
|
"margin_dpo/beta_margin_std": 2.446150302886963,
|
|
"margin_dpo/loss_margin_mean": 29.428205490112305,
|
|
"margin_dpo/margin_mean": 29.428203582763672,
|
|
"margin_dpo/margin_std": 24.257051467895508,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.4111600587371512,
|
|
"grad_norm": 38.125099182128906,
|
|
"learning_rate": 3.6827338920900253e-07,
|
|
"logits/chosen": -0.6191302537918091,
|
|
"logits/rejected": -0.6008737683296204,
|
|
"logps/chosen": -72.28899383544922,
|
|
"logps/ref_chosen": -58.797122955322266,
|
|
"logps/ref_rejected": -98.61885070800781,
|
|
"logps/rejected": -141.04525756835938,
|
|
"loss": 0.3483,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.133779838681221,
|
|
"margin_dpo/beta_margin_grad_std": 0.18086881935596466,
|
|
"margin_dpo/beta_margin_mean": 2.8934521675109863,
|
|
"margin_dpo/beta_margin_std": 1.8741562366485596,
|
|
"margin_dpo/loss_margin_mean": 28.934520721435547,
|
|
"margin_dpo/margin_mean": 28.934520721435547,
|
|
"margin_dpo/margin_std": 18.611377716064453,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.41262848751835535,
|
|
"grad_norm": 64.53363037109375,
|
|
"learning_rate": 3.6714120619553435e-07,
|
|
"logits/chosen": -0.665095329284668,
|
|
"logits/rejected": -0.62502121925354,
|
|
"logps/chosen": -67.85418701171875,
|
|
"logps/ref_chosen": -55.488521575927734,
|
|
"logps/ref_rejected": -80.88258361816406,
|
|
"logps/rejected": -118.54179382324219,
|
|
"loss": 0.483,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15846484899520874,
|
|
"margin_dpo/beta_margin_grad_std": 0.1876082420349121,
|
|
"margin_dpo/beta_margin_mean": 2.5293540954589844,
|
|
"margin_dpo/beta_margin_std": 2.0211899280548096,
|
|
"margin_dpo/loss_margin_mean": 25.293540954589844,
|
|
"margin_dpo/margin_mean": 25.293540954589844,
|
|
"margin_dpo/margin_std": 20.025854110717773,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.41409691629955947,
|
|
"grad_norm": 50.1074333190918,
|
|
"learning_rate": 3.660059364023408e-07,
|
|
"logits/chosen": -0.6407305002212524,
|
|
"logits/rejected": -0.593590497970581,
|
|
"logps/chosen": -85.81141662597656,
|
|
"logps/ref_chosen": -73.07014465332031,
|
|
"logps/ref_rejected": -95.35098266601562,
|
|
"logps/rejected": -131.50296020507812,
|
|
"loss": 0.475,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18391168117523193,
|
|
"margin_dpo/beta_margin_grad_std": 0.18578048050403595,
|
|
"margin_dpo/beta_margin_mean": 2.341071128845215,
|
|
"margin_dpo/beta_margin_std": 2.0938100814819336,
|
|
"margin_dpo/loss_margin_mean": 23.41071128845215,
|
|
"margin_dpo/margin_mean": 23.41071128845215,
|
|
"margin_dpo/margin_std": 20.858131408691406,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.4155653450807636,
|
|
"grad_norm": 48.29468536376953,
|
|
"learning_rate": 3.6486760974483685e-07,
|
|
"logits/chosen": -0.6420848369598389,
|
|
"logits/rejected": -0.6138025522232056,
|
|
"logps/chosen": -74.30840301513672,
|
|
"logps/ref_chosen": -61.89844512939453,
|
|
"logps/ref_rejected": -96.98655700683594,
|
|
"logps/rejected": -137.50985717773438,
|
|
"loss": 0.4753,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1640281230211258,
|
|
"margin_dpo/beta_margin_grad_std": 0.21006377041339874,
|
|
"margin_dpo/beta_margin_mean": 2.811335563659668,
|
|
"margin_dpo/beta_margin_std": 2.354823589324951,
|
|
"margin_dpo/loss_margin_mean": 28.113353729248047,
|
|
"margin_dpo/margin_mean": 28.113353729248047,
|
|
"margin_dpo/margin_std": 23.463539123535156,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.4170337738619677,
|
|
"grad_norm": 43.3529167175293,
|
|
"learning_rate": 3.6372625621898863e-07,
|
|
"logits/chosen": -0.6275640726089478,
|
|
"logits/rejected": -0.6143908500671387,
|
|
"logps/chosen": -72.13871765136719,
|
|
"logps/ref_chosen": -58.4355354309082,
|
|
"logps/ref_rejected": -93.46926879882812,
|
|
"logps/rejected": -137.00511169433594,
|
|
"loss": 0.4108,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15540792047977448,
|
|
"margin_dpo/beta_margin_grad_std": 0.1845344454050064,
|
|
"margin_dpo/beta_margin_mean": 2.983266592025757,
|
|
"margin_dpo/beta_margin_std": 2.578672170639038,
|
|
"margin_dpo/loss_margin_mean": 29.832664489746094,
|
|
"margin_dpo/margin_mean": 29.832664489746094,
|
|
"margin_dpo/margin_std": 25.724153518676758,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.4185022026431718,
|
|
"grad_norm": 57.101165771484375,
|
|
"learning_rate": 3.625819059005228e-07,
|
|
"logits/chosen": -0.6859316825866699,
|
|
"logits/rejected": -0.6596359014511108,
|
|
"logps/chosen": -81.82306671142578,
|
|
"logps/ref_chosen": -66.2322006225586,
|
|
"logps/ref_rejected": -99.1268310546875,
|
|
"logps/rejected": -141.17568969726562,
|
|
"loss": 0.4257,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16400812566280365,
|
|
"margin_dpo/beta_margin_grad_std": 0.18523728847503662,
|
|
"margin_dpo/beta_margin_mean": 2.6457977294921875,
|
|
"margin_dpo/beta_margin_std": 2.1147918701171875,
|
|
"margin_dpo/loss_margin_mean": 26.457977294921875,
|
|
"margin_dpo/margin_mean": 26.457977294921875,
|
|
"margin_dpo/margin_std": 20.855016708374023,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.4199706314243759,
|
|
"grad_norm": 58.981807708740234,
|
|
"learning_rate": 3.614345889441346e-07,
|
|
"logits/chosen": -0.6508222222328186,
|
|
"logits/rejected": -0.6174975633621216,
|
|
"logps/chosen": -86.8876724243164,
|
|
"logps/ref_chosen": -72.95100402832031,
|
|
"logps/ref_rejected": -88.58845520019531,
|
|
"logps/rejected": -130.25048828125,
|
|
"loss": 0.5505,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18501420319080353,
|
|
"margin_dpo/beta_margin_grad_std": 0.22696195542812347,
|
|
"margin_dpo/beta_margin_mean": 2.772536516189575,
|
|
"margin_dpo/beta_margin_std": 2.563308000564575,
|
|
"margin_dpo/loss_margin_mean": 27.725364685058594,
|
|
"margin_dpo/margin_mean": 27.725364685058594,
|
|
"margin_dpo/margin_std": 25.239097595214844,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.42143906020558003,
|
|
"grad_norm": 52.582481384277344,
|
|
"learning_rate": 3.6028433558269275e-07,
|
|
"logits/chosen": -0.658734142780304,
|
|
"logits/rejected": -0.6133627891540527,
|
|
"logps/chosen": -75.86917114257812,
|
|
"logps/ref_chosen": -61.54115295410156,
|
|
"logps/ref_rejected": -77.6960678100586,
|
|
"logps/rejected": -118.89381408691406,
|
|
"loss": 0.534,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1924961507320404,
|
|
"margin_dpo/beta_margin_grad_std": 0.21066516637802124,
|
|
"margin_dpo/beta_margin_mean": 2.6869726181030273,
|
|
"margin_dpo/beta_margin_std": 2.597897529602051,
|
|
"margin_dpo/loss_margin_mean": 26.869726181030273,
|
|
"margin_dpo/margin_mean": 26.86972427368164,
|
|
"margin_dpo/margin_std": 25.95490264892578,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.42290748898678415,
|
|
"grad_norm": 57.62872314453125,
|
|
"learning_rate": 3.5913117612644327e-07,
|
|
"logits/chosen": -0.634566605091095,
|
|
"logits/rejected": -0.6029102206230164,
|
|
"logps/chosen": -72.6466293334961,
|
|
"logps/ref_chosen": -56.661224365234375,
|
|
"logps/ref_rejected": -87.335693359375,
|
|
"logps/rejected": -131.12515258789062,
|
|
"loss": 0.4303,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16040383279323578,
|
|
"margin_dpo/beta_margin_grad_std": 0.19727593660354614,
|
|
"margin_dpo/beta_margin_mean": 2.7804043292999268,
|
|
"margin_dpo/beta_margin_std": 2.1588449478149414,
|
|
"margin_dpo/loss_margin_mean": 27.80404281616211,
|
|
"margin_dpo/margin_mean": 27.80404281616211,
|
|
"margin_dpo/margin_std": 21.125944137573242,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.4243759177679883,
|
|
"grad_norm": 50.83492660522461,
|
|
"learning_rate": 3.5797514096221024e-07,
|
|
"logits/chosen": -0.6417437791824341,
|
|
"logits/rejected": -0.6304539442062378,
|
|
"logps/chosen": -61.59012985229492,
|
|
"logps/ref_chosen": -45.23039245605469,
|
|
"logps/ref_rejected": -87.64266967773438,
|
|
"logps/rejected": -134.28424072265625,
|
|
"loss": 0.5004,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18503758311271667,
|
|
"margin_dpo/beta_margin_grad_std": 0.2104889303445816,
|
|
"margin_dpo/beta_margin_mean": 3.0281827449798584,
|
|
"margin_dpo/beta_margin_std": 2.936239004135132,
|
|
"margin_dpo/loss_margin_mean": 30.28182601928711,
|
|
"margin_dpo/margin_mean": 30.28182601928711,
|
|
"margin_dpo/margin_std": 29.03339958190918,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.42584434654919234,
|
|
"grad_norm": 63.28836441040039,
|
|
"learning_rate": 3.568162605525952e-07,
|
|
"logits/chosen": -0.5944575071334839,
|
|
"logits/rejected": -0.5908774137496948,
|
|
"logps/chosen": -72.06575775146484,
|
|
"logps/ref_chosen": -55.47149658203125,
|
|
"logps/ref_rejected": -116.70857238769531,
|
|
"logps/rejected": -164.83444213867188,
|
|
"loss": 0.5027,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.17223544418811798,
|
|
"margin_dpo/beta_margin_grad_std": 0.22271078824996948,
|
|
"margin_dpo/beta_margin_mean": 3.15316104888916,
|
|
"margin_dpo/beta_margin_std": 2.957723617553711,
|
|
"margin_dpo/loss_margin_mean": 31.53160858154297,
|
|
"margin_dpo/margin_mean": 31.53160858154297,
|
|
"margin_dpo/margin_std": 29.308597564697266,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.42731277533039647,
|
|
"grad_norm": 56.67517852783203,
|
|
"learning_rate": 3.5565456543517485e-07,
|
|
"logits/chosen": -0.6302033066749573,
|
|
"logits/rejected": -0.595551609992981,
|
|
"logps/chosen": -76.0269775390625,
|
|
"logps/ref_chosen": -63.26036834716797,
|
|
"logps/ref_rejected": -89.29708862304688,
|
|
"logps/rejected": -129.76498413085938,
|
|
"loss": 0.4813,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16477952897548676,
|
|
"margin_dpo/beta_margin_grad_std": 0.20427057147026062,
|
|
"margin_dpo/beta_margin_mean": 2.7701282501220703,
|
|
"margin_dpo/beta_margin_std": 2.2966012954711914,
|
|
"margin_dpo/loss_margin_mean": 27.701282501220703,
|
|
"margin_dpo/margin_mean": 27.70128059387207,
|
|
"margin_dpo/margin_std": 22.738750457763672,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.4287812041116006,
|
|
"grad_norm": 54.23537063598633,
|
|
"learning_rate": 3.5449008622169583e-07,
|
|
"logits/chosen": -0.6187624931335449,
|
|
"logits/rejected": -0.5753225684165955,
|
|
"logps/chosen": -70.70861053466797,
|
|
"logps/ref_chosen": -53.91852951049805,
|
|
"logps/ref_rejected": -89.96138000488281,
|
|
"logps/rejected": -136.59767150878906,
|
|
"loss": 0.3934,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15516723692417145,
|
|
"margin_dpo/beta_margin_grad_std": 0.1758795827627182,
|
|
"margin_dpo/beta_margin_mean": 2.9846208095550537,
|
|
"margin_dpo/beta_margin_std": 2.461369752883911,
|
|
"margin_dpo/loss_margin_mean": 29.846208572387695,
|
|
"margin_dpo/margin_mean": 29.846210479736328,
|
|
"margin_dpo/margin_std": 24.429065704345703,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.4302496328928047,
|
|
"grad_norm": 52.83697509765625,
|
|
"learning_rate": 3.5332285359726846e-07,
|
|
"logits/chosen": -0.6384230852127075,
|
|
"logits/rejected": -0.6081752777099609,
|
|
"logps/chosen": -76.67402648925781,
|
|
"logps/ref_chosen": -60.376033782958984,
|
|
"logps/ref_rejected": -77.8524398803711,
|
|
"logps/rejected": -118.3228988647461,
|
|
"loss": 0.5966,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2109437733888626,
|
|
"margin_dpo/beta_margin_grad_std": 0.217354878783226,
|
|
"margin_dpo/beta_margin_mean": 2.417245864868164,
|
|
"margin_dpo/beta_margin_std": 2.448225259780884,
|
|
"margin_dpo/loss_margin_mean": 24.17245864868164,
|
|
"margin_dpo/margin_mean": 24.17245864868164,
|
|
"margin_dpo/margin_std": 24.42681121826172,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.43171806167400884,
|
|
"grad_norm": 42.41814041137695,
|
|
"learning_rate": 3.5215289831955786e-07,
|
|
"logits/chosen": -0.6331781148910522,
|
|
"logits/rejected": -0.6203632354736328,
|
|
"logps/chosen": -62.738616943359375,
|
|
"logps/ref_chosen": -48.0875358581543,
|
|
"logps/ref_rejected": -81.89698791503906,
|
|
"logps/rejected": -123.75438690185547,
|
|
"loss": 0.5153,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1880524605512619,
|
|
"margin_dpo/beta_margin_grad_std": 0.2148909568786621,
|
|
"margin_dpo/beta_margin_mean": 2.7206313610076904,
|
|
"margin_dpo/beta_margin_std": 2.606935977935791,
|
|
"margin_dpo/loss_margin_mean": 27.206314086914062,
|
|
"margin_dpo/margin_mean": 27.206314086914062,
|
|
"margin_dpo/margin_std": 25.83649444580078,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.4331864904552129,
|
|
"grad_norm": 63.754703521728516,
|
|
"learning_rate": 3.509802512179737e-07,
|
|
"logits/chosen": -0.6102343797683716,
|
|
"logits/rejected": -0.6015244722366333,
|
|
"logps/chosen": -68.84889221191406,
|
|
"logps/ref_chosen": -49.92467498779297,
|
|
"logps/ref_rejected": -87.45632934570312,
|
|
"logps/rejected": -133.5269775390625,
|
|
"loss": 0.5905,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18437956273555756,
|
|
"margin_dpo/beta_margin_grad_std": 0.22511690855026245,
|
|
"margin_dpo/beta_margin_mean": 2.714644432067871,
|
|
"margin_dpo/beta_margin_std": 2.515841245651245,
|
|
"margin_dpo/loss_margin_mean": 27.14644432067871,
|
|
"margin_dpo/margin_mean": 27.146446228027344,
|
|
"margin_dpo/margin_std": 24.949363708496094,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.434654919236417,
|
|
"grad_norm": 79.74415588378906,
|
|
"learning_rate": 3.498049431928577e-07,
|
|
"logits/chosen": -0.6979824304580688,
|
|
"logits/rejected": -0.6591476202011108,
|
|
"logps/chosen": -84.0577392578125,
|
|
"logps/ref_chosen": -65.49124145507812,
|
|
"logps/ref_rejected": -93.08908081054688,
|
|
"logps/rejected": -135.13502502441406,
|
|
"loss": 0.7362,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.23561769723892212,
|
|
"margin_dpo/beta_margin_grad_std": 0.25472357869148254,
|
|
"margin_dpo/beta_margin_mean": 2.3479440212249756,
|
|
"margin_dpo/beta_margin_std": 2.6585099697113037,
|
|
"margin_dpo/loss_margin_mean": 23.47943878173828,
|
|
"margin_dpo/margin_mean": 23.47943878173828,
|
|
"margin_dpo/margin_std": 26.526391983032227,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.43612334801762115,
|
|
"grad_norm": 44.74517059326172,
|
|
"learning_rate": 3.486270052146694e-07,
|
|
"logits/chosen": -0.5774829387664795,
|
|
"logits/rejected": -0.5429031848907471,
|
|
"logps/chosen": -74.85836029052734,
|
|
"logps/ref_chosen": -56.47694778442383,
|
|
"logps/ref_rejected": -95.1385498046875,
|
|
"logps/rejected": -142.09182739257812,
|
|
"loss": 0.426,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16123466193675995,
|
|
"margin_dpo/beta_margin_grad_std": 0.1913672685623169,
|
|
"margin_dpo/beta_margin_mean": 2.8571863174438477,
|
|
"margin_dpo/beta_margin_std": 2.378805160522461,
|
|
"margin_dpo/loss_margin_mean": 28.571861267089844,
|
|
"margin_dpo/margin_mean": 28.571863174438477,
|
|
"margin_dpo/margin_std": 23.766578674316406,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.43759177679882527,
|
|
"grad_norm": 44.202003479003906,
|
|
"learning_rate": 3.474464683231698e-07,
|
|
"logits/chosen": -0.6306143999099731,
|
|
"logits/rejected": -0.6259936690330505,
|
|
"logps/chosen": -83.96099090576172,
|
|
"logps/ref_chosen": -67.32516479492188,
|
|
"logps/ref_rejected": -116.66217041015625,
|
|
"logps/rejected": -163.1012420654297,
|
|
"loss": 0.4135,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1614616960287094,
|
|
"margin_dpo/beta_margin_grad_std": 0.180389866232872,
|
|
"margin_dpo/beta_margin_mean": 2.9803249835968018,
|
|
"margin_dpo/beta_margin_std": 2.6625173091888428,
|
|
"margin_dpo/loss_margin_mean": 29.803251266479492,
|
|
"margin_dpo/margin_mean": 29.80324935913086,
|
|
"margin_dpo/margin_std": 26.537506103515625,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.4390602055800294,
|
|
"grad_norm": 59.32780075073242,
|
|
"learning_rate": 3.462633636266041e-07,
|
|
"logits/chosen": -0.5633834600448608,
|
|
"logits/rejected": -0.5420501232147217,
|
|
"logps/chosen": -64.30989837646484,
|
|
"logps/ref_chosen": -48.96209716796875,
|
|
"logps/ref_rejected": -84.32823944091797,
|
|
"logps/rejected": -130.85752868652344,
|
|
"loss": 0.5069,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.17790299654006958,
|
|
"margin_dpo/beta_margin_grad_std": 0.22678081691265106,
|
|
"margin_dpo/beta_margin_mean": 3.118149518966675,
|
|
"margin_dpo/beta_margin_std": 2.880525588989258,
|
|
"margin_dpo/loss_margin_mean": 31.181493759155273,
|
|
"margin_dpo/margin_mean": 31.181495666503906,
|
|
"margin_dpo/margin_std": 27.928592681884766,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.44052863436123346,
|
|
"grad_norm": 81.78619384765625,
|
|
"learning_rate": 3.4507772230088147e-07,
|
|
"logits/chosen": -0.6105576157569885,
|
|
"logits/rejected": -0.591549277305603,
|
|
"logps/chosen": -80.5472183227539,
|
|
"logps/ref_chosen": -59.073707580566406,
|
|
"logps/ref_rejected": -95.9664535522461,
|
|
"logps/rejected": -147.06117248535156,
|
|
"loss": 0.7096,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.20562496781349182,
|
|
"margin_dpo/beta_margin_grad_std": 0.27093952894210815,
|
|
"margin_dpo/beta_margin_mean": 2.9621217250823975,
|
|
"margin_dpo/beta_margin_std": 2.987994909286499,
|
|
"margin_dpo/loss_margin_mean": 29.6212158203125,
|
|
"margin_dpo/margin_mean": 29.621217727661133,
|
|
"margin_dpo/margin_std": 29.81679344177246,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.44052863436123346,
|
|
"eval_logits/chosen": -0.6160351634025574,
|
|
"eval_logits/rejected": -0.5900039672851562,
|
|
"eval_logps/chosen": -100.12930297851562,
|
|
"eval_logps/ref_chosen": -79.05104064941406,
|
|
"eval_logps/ref_rejected": -86.79793548583984,
|
|
"eval_logps/rejected": -125.98890686035156,
|
|
"eval_loss": 0.4404529929161072,
|
|
"eval_margin_dpo/beta": 0.10000000149011612,
|
|
"eval_margin_dpo/beta_margin_grad_mean": -0.28246673941612244,
|
|
"eval_margin_dpo/beta_margin_grad_std": 0.2513927221298218,
|
|
"eval_margin_dpo/beta_margin_mean": 1.8112715482711792,
|
|
"eval_margin_dpo/beta_margin_std": 2.3746871948242188,
|
|
"eval_margin_dpo/loss_margin_mean": 18.112716674804688,
|
|
"eval_margin_dpo/margin_mean": 18.112716674804688,
|
|
"eval_margin_dpo/margin_std": 23.746871948242188,
|
|
"eval_runtime": 40.0886,
|
|
"eval_samples_per_second": 58.346,
|
|
"eval_steps_per_second": 1.846,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.4419970631424376,
|
|
"grad_norm": 49.35285949707031,
|
|
"learning_rate": 3.4388957558875316e-07,
|
|
"logits/chosen": -0.6374814510345459,
|
|
"logits/rejected": -0.6068045496940613,
|
|
"logps/chosen": -75.65875244140625,
|
|
"logps/ref_chosen": -57.249366760253906,
|
|
"logps/ref_rejected": -92.35354614257812,
|
|
"logps/rejected": -141.78347778320312,
|
|
"loss": 0.404,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1557816118001938,
|
|
"margin_dpo/beta_margin_grad_std": 0.1888331174850464,
|
|
"margin_dpo/beta_margin_mean": 3.1020545959472656,
|
|
"margin_dpo/beta_margin_std": 2.5941102504730225,
|
|
"margin_dpo/loss_margin_mean": 31.020544052124023,
|
|
"margin_dpo/margin_mean": 31.020544052124023,
|
|
"margin_dpo/margin_std": 25.892236709594727,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.4434654919236417,
|
|
"grad_norm": 68.0921401977539,
|
|
"learning_rate": 3.426989547989902e-07,
|
|
"logits/chosen": -0.5743458271026611,
|
|
"logits/rejected": -0.5641738176345825,
|
|
"logps/chosen": -66.59398651123047,
|
|
"logps/ref_chosen": -51.19799041748047,
|
|
"logps/ref_rejected": -97.22636413574219,
|
|
"logps/rejected": -141.77328491210938,
|
|
"loss": 0.574,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18597975373268127,
|
|
"margin_dpo/beta_margin_grad_std": 0.23450718820095062,
|
|
"margin_dpo/beta_margin_mean": 2.9150946140289307,
|
|
"margin_dpo/beta_margin_std": 2.9601025581359863,
|
|
"margin_dpo/loss_margin_mean": 29.15094566345215,
|
|
"margin_dpo/margin_mean": 29.15094757080078,
|
|
"margin_dpo/margin_std": 29.199195861816406,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.44493392070484583,
|
|
"grad_norm": 74.45587921142578,
|
|
"learning_rate": 3.4150589130555773e-07,
|
|
"logits/chosen": -0.6296427249908447,
|
|
"logits/rejected": -0.5928441286087036,
|
|
"logps/chosen": -83.6898193359375,
|
|
"logps/ref_chosen": -66.71394348144531,
|
|
"logps/ref_rejected": -86.94542694091797,
|
|
"logps/rejected": -131.94627380371094,
|
|
"loss": 0.6507,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.21019597351551056,
|
|
"margin_dpo/beta_margin_grad_std": 0.25447341799736023,
|
|
"margin_dpo/beta_margin_mean": 2.802497386932373,
|
|
"margin_dpo/beta_margin_std": 2.8981969356536865,
|
|
"margin_dpo/loss_margin_mean": 28.024972915649414,
|
|
"margin_dpo/margin_mean": 28.02497100830078,
|
|
"margin_dpo/margin_std": 28.8425350189209,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.44640234948604995,
|
|
"grad_norm": 57.734458923339844,
|
|
"learning_rate": 3.403104165467883e-07,
|
|
"logits/chosen": -0.6465634703636169,
|
|
"logits/rejected": -0.6173808574676514,
|
|
"logps/chosen": -86.27723693847656,
|
|
"logps/ref_chosen": -71.95069885253906,
|
|
"logps/ref_rejected": -90.47203063964844,
|
|
"logps/rejected": -133.15660095214844,
|
|
"loss": 0.4558,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1500687152147293,
|
|
"margin_dpo/beta_margin_grad_std": 0.2116183042526245,
|
|
"margin_dpo/beta_margin_mean": 2.8358030319213867,
|
|
"margin_dpo/beta_margin_std": 2.0787370204925537,
|
|
"margin_dpo/loss_margin_mean": 28.358028411865234,
|
|
"margin_dpo/margin_mean": 28.358028411865234,
|
|
"margin_dpo/margin_std": 20.432727813720703,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.447870778267254,
|
|
"grad_norm": 54.050018310546875,
|
|
"learning_rate": 3.391125620245535e-07,
|
|
"logits/chosen": -0.6285189986228943,
|
|
"logits/rejected": -0.5891715884208679,
|
|
"logps/chosen": -84.83607482910156,
|
|
"logps/ref_chosen": -66.79523468017578,
|
|
"logps/ref_rejected": -92.75459289550781,
|
|
"logps/rejected": -139.55845642089844,
|
|
"loss": 0.4322,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16884967684745789,
|
|
"margin_dpo/beta_margin_grad_std": 0.18422438204288483,
|
|
"margin_dpo/beta_margin_mean": 2.8763022422790527,
|
|
"margin_dpo/beta_margin_std": 2.6594858169555664,
|
|
"margin_dpo/loss_margin_mean": 28.76302146911621,
|
|
"margin_dpo/margin_mean": 28.76302146911621,
|
|
"margin_dpo/margin_std": 26.524438858032227,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.44933920704845814,
|
|
"grad_norm": 73.82415008544922,
|
|
"learning_rate": 3.3791235930343417e-07,
|
|
"logits/chosen": -0.6591800451278687,
|
|
"logits/rejected": -0.608156681060791,
|
|
"logps/chosen": -85.36546325683594,
|
|
"logps/ref_chosen": -69.68389892578125,
|
|
"logps/ref_rejected": -85.15919494628906,
|
|
"logps/rejected": -128.71629333496094,
|
|
"loss": 0.5126,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16796466708183289,
|
|
"margin_dpo/beta_margin_grad_std": 0.21312610805034637,
|
|
"margin_dpo/beta_margin_mean": 2.7875523567199707,
|
|
"margin_dpo/beta_margin_std": 2.3330721855163574,
|
|
"margin_dpo/loss_margin_mean": 27.87552261352539,
|
|
"margin_dpo/margin_mean": 27.87552261352539,
|
|
"margin_dpo/margin_std": 23.294414520263672,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.45080763582966227,
|
|
"grad_norm": 54.04255294799805,
|
|
"learning_rate": 3.367098400098881e-07,
|
|
"logits/chosen": -0.632627546787262,
|
|
"logits/rejected": -0.6056050658226013,
|
|
"logps/chosen": -86.17469787597656,
|
|
"logps/ref_chosen": -70.16542053222656,
|
|
"logps/ref_rejected": -86.97230529785156,
|
|
"logps/rejected": -128.53074645996094,
|
|
"loss": 0.5331,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1910136193037033,
|
|
"margin_dpo/beta_margin_grad_std": 0.216377392411232,
|
|
"margin_dpo/beta_margin_mean": 2.554914951324463,
|
|
"margin_dpo/beta_margin_std": 2.4117367267608643,
|
|
"margin_dpo/loss_margin_mean": 25.549150466918945,
|
|
"margin_dpo/margin_mean": 25.549152374267578,
|
|
"margin_dpo/margin_std": 24.079296112060547,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.4522760646108664,
|
|
"grad_norm": 44.33732604980469,
|
|
"learning_rate": 3.355050358314172e-07,
|
|
"logits/chosen": -0.6031591892242432,
|
|
"logits/rejected": -0.5757944583892822,
|
|
"logps/chosen": -70.33519744873047,
|
|
"logps/ref_chosen": -55.2449951171875,
|
|
"logps/ref_rejected": -79.37226104736328,
|
|
"logps/rejected": -123.3551254272461,
|
|
"loss": 0.4969,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16970184445381165,
|
|
"margin_dpo/beta_margin_grad_std": 0.20795808732509613,
|
|
"margin_dpo/beta_margin_mean": 2.8892669677734375,
|
|
"margin_dpo/beta_margin_std": 2.663458824157715,
|
|
"margin_dpo/loss_margin_mean": 28.892669677734375,
|
|
"margin_dpo/margin_mean": 28.892669677734375,
|
|
"margin_dpo/margin_std": 26.245033264160156,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.45374449339207046,
|
|
"grad_norm": 57.88508987426758,
|
|
"learning_rate": 3.3429797851573183e-07,
|
|
"logits/chosen": -0.6012994647026062,
|
|
"logits/rejected": -0.564967691898346,
|
|
"logps/chosen": -66.71060180664062,
|
|
"logps/ref_chosen": -48.959083557128906,
|
|
"logps/ref_rejected": -82.34072875976562,
|
|
"logps/rejected": -128.30679321289062,
|
|
"loss": 0.5073,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.17930957674980164,
|
|
"margin_dpo/beta_margin_grad_std": 0.2210213840007782,
|
|
"margin_dpo/beta_margin_mean": 2.821453094482422,
|
|
"margin_dpo/beta_margin_std": 2.4553327560424805,
|
|
"margin_dpo/loss_margin_mean": 28.21453094482422,
|
|
"margin_dpo/margin_mean": 28.21453094482422,
|
|
"margin_dpo/margin_std": 24.337322235107422,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.4552129221732746,
|
|
"grad_norm": 50.711158752441406,
|
|
"learning_rate": 3.3308869986991487e-07,
|
|
"logits/chosen": -0.6853815913200378,
|
|
"logits/rejected": -0.6389970183372498,
|
|
"logps/chosen": -78.57404327392578,
|
|
"logps/ref_chosen": -62.74177932739258,
|
|
"logps/ref_rejected": -79.9302978515625,
|
|
"logps/rejected": -120.24269104003906,
|
|
"loss": 0.4436,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1786579042673111,
|
|
"margin_dpo/beta_margin_grad_std": 0.16861887276172638,
|
|
"margin_dpo/beta_margin_mean": 2.448012351989746,
|
|
"margin_dpo/beta_margin_std": 2.0728390216827393,
|
|
"margin_dpo/loss_margin_mean": 24.48012351989746,
|
|
"margin_dpo/margin_mean": 24.480121612548828,
|
|
"margin_dpo/margin_std": 20.56110382080078,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.4566813509544787,
|
|
"grad_norm": 63.41331100463867,
|
|
"learning_rate": 3.3187723175958346e-07,
|
|
"logits/chosen": -0.6061598062515259,
|
|
"logits/rejected": -0.56733638048172,
|
|
"logps/chosen": -73.1260757446289,
|
|
"logps/ref_chosen": -53.027976989746094,
|
|
"logps/ref_rejected": -77.43820190429688,
|
|
"logps/rejected": -131.88427734375,
|
|
"loss": 0.3422,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12758134305477142,
|
|
"margin_dpo/beta_margin_grad_std": 0.17252546548843384,
|
|
"margin_dpo/beta_margin_mean": 3.434797525405884,
|
|
"margin_dpo/beta_margin_std": 2.5421323776245117,
|
|
"margin_dpo/loss_margin_mean": 34.34797286987305,
|
|
"margin_dpo/margin_mean": 34.34797286987305,
|
|
"margin_dpo/margin_std": 25.21231460571289,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.4581497797356828,
|
|
"grad_norm": 58.5345458984375,
|
|
"learning_rate": 3.306636061080487e-07,
|
|
"logits/chosen": -0.59651780128479,
|
|
"logits/rejected": -0.5514322519302368,
|
|
"logps/chosen": -65.93643951416016,
|
|
"logps/ref_chosen": -49.39221954345703,
|
|
"logps/ref_rejected": -75.79280090332031,
|
|
"logps/rejected": -122.18972778320312,
|
|
"loss": 0.4828,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16882839798927307,
|
|
"margin_dpo/beta_margin_grad_std": 0.22298170626163483,
|
|
"margin_dpo/beta_margin_mean": 2.9852707386016846,
|
|
"margin_dpo/beta_margin_std": 2.6555700302124023,
|
|
"margin_dpo/loss_margin_mean": 29.852705001831055,
|
|
"margin_dpo/margin_mean": 29.852706909179688,
|
|
"margin_dpo/margin_std": 26.147369384765625,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.45961820851688695,
|
|
"grad_norm": 54.11558151245117,
|
|
"learning_rate": 3.2944785489547537e-07,
|
|
"logits/chosen": -0.6790950298309326,
|
|
"logits/rejected": -0.6427109241485596,
|
|
"logps/chosen": -64.72444152832031,
|
|
"logps/ref_chosen": -50.152740478515625,
|
|
"logps/ref_rejected": -86.40620422363281,
|
|
"logps/rejected": -126.8267822265625,
|
|
"loss": 0.6273,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.21419396996498108,
|
|
"margin_dpo/beta_margin_grad_std": 0.23362776637077332,
|
|
"margin_dpo/beta_margin_mean": 2.5848872661590576,
|
|
"margin_dpo/beta_margin_std": 2.6925547122955322,
|
|
"margin_dpo/loss_margin_mean": 25.8488712310791,
|
|
"margin_dpo/margin_mean": 25.848873138427734,
|
|
"margin_dpo/margin_std": 26.872703552246094,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.461086637298091,
|
|
"grad_norm": 64.02825164794922,
|
|
"learning_rate": 3.2823001015803857e-07,
|
|
"logits/chosen": -0.6256019473075867,
|
|
"logits/rejected": -0.6011070013046265,
|
|
"logps/chosen": -72.6094970703125,
|
|
"logps/ref_chosen": -57.23758316040039,
|
|
"logps/ref_rejected": -97.59652709960938,
|
|
"logps/rejected": -138.90609741210938,
|
|
"loss": 0.5787,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.20730111002922058,
|
|
"margin_dpo/beta_margin_grad_std": 0.22327031195163727,
|
|
"margin_dpo/beta_margin_mean": 2.593766450881958,
|
|
"margin_dpo/beta_margin_std": 2.570161819458008,
|
|
"margin_dpo/loss_margin_mean": 25.93766212463379,
|
|
"margin_dpo/margin_mean": 25.937664031982422,
|
|
"margin_dpo/margin_std": 25.60101890563965,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.46255506607929514,
|
|
"grad_norm": 47.55817413330078,
|
|
"learning_rate": 3.270101039870797e-07,
|
|
"logits/chosen": -0.5983539819717407,
|
|
"logits/rejected": -0.5785402059555054,
|
|
"logps/chosen": -64.21907043457031,
|
|
"logps/ref_chosen": -49.06958770751953,
|
|
"logps/ref_rejected": -85.68087768554688,
|
|
"logps/rejected": -125.40115356445312,
|
|
"loss": 0.4903,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18685929477214813,
|
|
"margin_dpo/beta_margin_grad_std": 0.19108892977237701,
|
|
"margin_dpo/beta_margin_mean": 2.457080602645874,
|
|
"margin_dpo/beta_margin_std": 2.294046401977539,
|
|
"margin_dpo/loss_margin_mean": 24.570804595947266,
|
|
"margin_dpo/margin_mean": 24.570804595947266,
|
|
"margin_dpo/margin_std": 22.66987419128418,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.46402349486049926,
|
|
"grad_norm": 51.51395034790039,
|
|
"learning_rate": 3.2578816852826086e-07,
|
|
"logits/chosen": -0.6153182983398438,
|
|
"logits/rejected": -0.6095402240753174,
|
|
"logps/chosen": -71.8460693359375,
|
|
"logps/ref_chosen": -54.26074981689453,
|
|
"logps/ref_rejected": -101.2814712524414,
|
|
"logps/rejected": -148.74497985839844,
|
|
"loss": 0.4124,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1572447568178177,
|
|
"margin_dpo/beta_margin_grad_std": 0.18591180443763733,
|
|
"margin_dpo/beta_margin_mean": 2.9878194332122803,
|
|
"margin_dpo/beta_margin_std": 2.6384308338165283,
|
|
"margin_dpo/loss_margin_mean": 29.87819480895996,
|
|
"margin_dpo/margin_mean": 29.87819480895996,
|
|
"margin_dpo/margin_std": 26.249610900878906,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.4654919236417034,
|
|
"grad_norm": 37.093284606933594,
|
|
"learning_rate": 3.2456423598071783e-07,
|
|
"logits/chosen": -0.6742887496948242,
|
|
"logits/rejected": -0.6377764940261841,
|
|
"logps/chosen": -69.39185333251953,
|
|
"logps/ref_chosen": -56.094207763671875,
|
|
"logps/ref_rejected": -100.69905090332031,
|
|
"logps/rejected": -147.84510803222656,
|
|
"loss": 0.3514,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13395161926746368,
|
|
"margin_dpo/beta_margin_grad_std": 0.18397875130176544,
|
|
"margin_dpo/beta_margin_mean": 3.384840965270996,
|
|
"margin_dpo/beta_margin_std": 2.511617422103882,
|
|
"margin_dpo/loss_margin_mean": 33.848411560058594,
|
|
"margin_dpo/margin_mean": 33.848411560058594,
|
|
"margin_dpo/margin_std": 24.782241821289062,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 0.4669603524229075,
|
|
"grad_norm": 46.82166290283203,
|
|
"learning_rate": 3.233383385962115e-07,
|
|
"logits/chosen": -0.6575570106506348,
|
|
"logits/rejected": -0.6119377613067627,
|
|
"logps/chosen": -77.43252563476562,
|
|
"logps/ref_chosen": -64.64570617675781,
|
|
"logps/ref_rejected": -82.76425170898438,
|
|
"logps/rejected": -126.07575988769531,
|
|
"loss": 0.4239,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15713295340538025,
|
|
"margin_dpo/beta_margin_grad_std": 0.19899439811706543,
|
|
"margin_dpo/beta_margin_mean": 3.0524682998657227,
|
|
"margin_dpo/beta_margin_std": 2.5276248455047607,
|
|
"margin_dpo/loss_margin_mean": 30.524681091308594,
|
|
"margin_dpo/margin_mean": 30.524682998657227,
|
|
"margin_dpo/margin_std": 25.1044921875,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.4684287812041116,
|
|
"grad_norm": 42.284812927246094,
|
|
"learning_rate": 3.2211050867827805e-07,
|
|
"logits/chosen": -0.6065380573272705,
|
|
"logits/rejected": -0.5934597253799438,
|
|
"logps/chosen": -62.33796691894531,
|
|
"logps/ref_chosen": -49.383758544921875,
|
|
"logps/ref_rejected": -113.90650939941406,
|
|
"logps/rejected": -156.28738403320312,
|
|
"loss": 0.3656,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14448316395282745,
|
|
"margin_dpo/beta_margin_grad_std": 0.173114612698555,
|
|
"margin_dpo/beta_margin_mean": 2.9426653385162354,
|
|
"margin_dpo/beta_margin_std": 2.2474348545074463,
|
|
"margin_dpo/loss_margin_mean": 29.426651000976562,
|
|
"margin_dpo/margin_mean": 29.426651000976562,
|
|
"margin_dpo/margin_std": 22.307289123535156,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 0.4698972099853157,
|
|
"grad_norm": 50.809173583984375,
|
|
"learning_rate": 3.208807785813777e-07,
|
|
"logits/chosen": -0.6396864056587219,
|
|
"logits/rejected": -0.6267807483673096,
|
|
"logps/chosen": -74.15386962890625,
|
|
"logps/ref_chosen": -59.50489044189453,
|
|
"logps/ref_rejected": -97.66716766357422,
|
|
"logps/rejected": -139.19309997558594,
|
|
"loss": 0.4867,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.17603495717048645,
|
|
"margin_dpo/beta_margin_grad_std": 0.19671519100666046,
|
|
"margin_dpo/beta_margin_mean": 2.687695026397705,
|
|
"margin_dpo/beta_margin_std": 2.3658065795898438,
|
|
"margin_dpo/loss_margin_mean": 26.876949310302734,
|
|
"margin_dpo/margin_mean": 26.876949310302734,
|
|
"margin_dpo/margin_std": 23.568016052246094,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.4713656387665198,
|
|
"grad_norm": 71.52301025390625,
|
|
"learning_rate": 3.1964918071004217e-07,
|
|
"logits/chosen": -0.6303710341453552,
|
|
"logits/rejected": -0.5920969247817993,
|
|
"logps/chosen": -80.57645416259766,
|
|
"logps/ref_chosen": -61.548683166503906,
|
|
"logps/ref_rejected": -91.64103698730469,
|
|
"logps/rejected": -136.7852020263672,
|
|
"loss": 0.706,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.224751278758049,
|
|
"margin_dpo/beta_margin_grad_std": 0.2634871304035187,
|
|
"margin_dpo/beta_margin_mean": 2.6116397380828857,
|
|
"margin_dpo/beta_margin_std": 2.8075194358825684,
|
|
"margin_dpo/loss_margin_mean": 26.116395950317383,
|
|
"margin_dpo/margin_mean": 26.116397857666016,
|
|
"margin_dpo/margin_std": 27.479251861572266,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 0.47283406754772395,
|
|
"grad_norm": 55.85123062133789,
|
|
"learning_rate": 3.184157475180207e-07,
|
|
"logits/chosen": -0.6203020811080933,
|
|
"logits/rejected": -0.5984194874763489,
|
|
"logps/chosen": -72.76739501953125,
|
|
"logps/ref_chosen": -57.29003143310547,
|
|
"logps/ref_rejected": -95.74992370605469,
|
|
"logps/rejected": -142.85971069335938,
|
|
"loss": 0.4316,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16712939739227295,
|
|
"margin_dpo/beta_margin_grad_std": 0.18851304054260254,
|
|
"margin_dpo/beta_margin_mean": 3.1632421016693115,
|
|
"margin_dpo/beta_margin_std": 2.7988502979278564,
|
|
"margin_dpo/loss_margin_mean": 31.63241958618164,
|
|
"margin_dpo/margin_mean": 31.63241958618164,
|
|
"margin_dpo/margin_std": 27.733840942382812,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.47430249632892807,
|
|
"grad_norm": 46.408023834228516,
|
|
"learning_rate": 3.171805115074251e-07,
|
|
"logits/chosen": -0.6271795034408569,
|
|
"logits/rejected": -0.60142982006073,
|
|
"logps/chosen": -66.6007308959961,
|
|
"logps/ref_chosen": -51.23395919799805,
|
|
"logps/ref_rejected": -75.06192016601562,
|
|
"logps/rejected": -121.7147445678711,
|
|
"loss": 0.4239,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15695661306381226,
|
|
"margin_dpo/beta_margin_grad_std": 0.1998593658208847,
|
|
"margin_dpo/beta_margin_mean": 3.128605842590332,
|
|
"margin_dpo/beta_margin_std": 2.577242851257324,
|
|
"margin_dpo/loss_margin_mean": 31.286056518554688,
|
|
"margin_dpo/margin_mean": 31.286056518554688,
|
|
"margin_dpo/margin_std": 25.391637802124023,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 0.47577092511013214,
|
|
"grad_norm": 59.7817497253418,
|
|
"learning_rate": 3.1594350522787295e-07,
|
|
"logits/chosen": -0.6021302938461304,
|
|
"logits/rejected": -0.5500773191452026,
|
|
"logps/chosen": -82.58967590332031,
|
|
"logps/ref_chosen": -65.13516998291016,
|
|
"logps/ref_rejected": -86.47750091552734,
|
|
"logps/rejected": -133.61947631835938,
|
|
"loss": 0.4627,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16844435036182404,
|
|
"margin_dpo/beta_margin_grad_std": 0.207102969288826,
|
|
"margin_dpo/beta_margin_mean": 2.9687466621398926,
|
|
"margin_dpo/beta_margin_std": 2.6024606227874756,
|
|
"margin_dpo/loss_margin_mean": 29.68746566772461,
|
|
"margin_dpo/margin_mean": 29.687463760375977,
|
|
"margin_dpo/margin_std": 25.10573959350586,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.47723935389133626,
|
|
"grad_norm": 43.97902297973633,
|
|
"learning_rate": 3.147047612756302e-07,
|
|
"logits/chosen": -0.6481966972351074,
|
|
"logits/rejected": -0.5832774639129639,
|
|
"logps/chosen": -70.51419067382812,
|
|
"logps/ref_chosen": -56.215599060058594,
|
|
"logps/ref_rejected": -70.0859375,
|
|
"logps/rejected": -113.08633422851562,
|
|
"loss": 0.4277,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1607721447944641,
|
|
"margin_dpo/beta_margin_grad_std": 0.20015878975391388,
|
|
"margin_dpo/beta_margin_mean": 2.870180368423462,
|
|
"margin_dpo/beta_margin_std": 2.1971116065979004,
|
|
"margin_dpo/loss_margin_mean": 28.70180320739746,
|
|
"margin_dpo/margin_mean": 28.70180320739746,
|
|
"margin_dpo/margin_std": 21.692012786865234,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.4787077826725404,
|
|
"grad_norm": 58.040103912353516,
|
|
"learning_rate": 3.134643122927519e-07,
|
|
"logits/chosen": -0.6689083576202393,
|
|
"logits/rejected": -0.6224409937858582,
|
|
"logps/chosen": -90.72734832763672,
|
|
"logps/ref_chosen": -72.72496032714844,
|
|
"logps/ref_rejected": -79.84678649902344,
|
|
"logps/rejected": -123.62333679199219,
|
|
"loss": 0.5107,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.19229000806808472,
|
|
"margin_dpo/beta_margin_grad_std": 0.202370285987854,
|
|
"margin_dpo/beta_margin_mean": 2.577415943145752,
|
|
"margin_dpo/beta_margin_std": 2.4158146381378174,
|
|
"margin_dpo/loss_margin_mean": 25.774160385131836,
|
|
"margin_dpo/margin_mean": 25.774158477783203,
|
|
"margin_dpo/margin_std": 24.106483459472656,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.4801762114537445,
|
|
"grad_norm": 48.54873275756836,
|
|
"learning_rate": 3.1222219096622264e-07,
|
|
"logits/chosen": -0.6299796104431152,
|
|
"logits/rejected": -0.5861495733261108,
|
|
"logps/chosen": -84.29374694824219,
|
|
"logps/ref_chosen": -69.13441467285156,
|
|
"logps/ref_rejected": -111.93377685546875,
|
|
"logps/rejected": -164.3966064453125,
|
|
"loss": 0.2894,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1097550168633461,
|
|
"margin_dpo/beta_margin_grad_std": 0.17322029173374176,
|
|
"margin_dpo/beta_margin_mean": 3.730348825454712,
|
|
"margin_dpo/beta_margin_std": 2.5048036575317383,
|
|
"margin_dpo/loss_margin_mean": 37.30348587036133,
|
|
"margin_dpo/margin_mean": 37.303489685058594,
|
|
"margin_dpo/margin_std": 24.60919952392578,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 0.48164464023494863,
|
|
"grad_norm": 52.86729431152344,
|
|
"learning_rate": 3.1097843002709427e-07,
|
|
"logits/chosen": -0.6159682869911194,
|
|
"logits/rejected": -0.5945558547973633,
|
|
"logps/chosen": -78.4199447631836,
|
|
"logps/ref_chosen": -59.68719482421875,
|
|
"logps/ref_rejected": -90.85499572753906,
|
|
"logps/rejected": -137.5189971923828,
|
|
"loss": 0.4753,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1785195767879486,
|
|
"margin_dpo/beta_margin_grad_std": 0.1958281397819519,
|
|
"margin_dpo/beta_margin_mean": 2.7931253910064697,
|
|
"margin_dpo/beta_margin_std": 2.519235134124756,
|
|
"margin_dpo/loss_margin_mean": 27.931251525878906,
|
|
"margin_dpo/margin_mean": 27.931251525878906,
|
|
"margin_dpo/margin_std": 25.17681312561035,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.4831130690161527,
|
|
"grad_norm": 60.656837463378906,
|
|
"learning_rate": 3.0973306224962437e-07,
|
|
"logits/chosen": -0.6298278570175171,
|
|
"logits/rejected": -0.5832244753837585,
|
|
"logps/chosen": -82.09062957763672,
|
|
"logps/ref_chosen": -65.2461929321289,
|
|
"logps/ref_rejected": -100.69770812988281,
|
|
"logps/rejected": -154.71499633789062,
|
|
"loss": 0.3743,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12058813869953156,
|
|
"margin_dpo/beta_margin_grad_std": 0.20209833979606628,
|
|
"margin_dpo/beta_margin_mean": 3.717285633087158,
|
|
"margin_dpo/beta_margin_std": 2.69805908203125,
|
|
"margin_dpo/loss_margin_mean": 37.172855377197266,
|
|
"margin_dpo/margin_mean": 37.172855377197266,
|
|
"margin_dpo/margin_std": 26.679462432861328,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 0.4845814977973568,
|
|
"grad_norm": 48.152469635009766,
|
|
"learning_rate": 3.084861204504122e-07,
|
|
"logits/chosen": -0.6236467361450195,
|
|
"logits/rejected": -0.6115535497665405,
|
|
"logps/chosen": -64.35501098632812,
|
|
"logps/ref_chosen": -46.998348236083984,
|
|
"logps/ref_rejected": -86.87684631347656,
|
|
"logps/rejected": -135.53587341308594,
|
|
"loss": 0.4026,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14161227643489838,
|
|
"margin_dpo/beta_margin_grad_std": 0.18910308182239532,
|
|
"margin_dpo/beta_margin_mean": 3.1302359104156494,
|
|
"margin_dpo/beta_margin_std": 2.4480643272399902,
|
|
"margin_dpo/loss_margin_mean": 31.302356719970703,
|
|
"margin_dpo/margin_mean": 31.302356719970703,
|
|
"margin_dpo/margin_std": 24.32876968383789,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.48604992657856094,
|
|
"grad_norm": 36.348045349121094,
|
|
"learning_rate": 3.072376374875335e-07,
|
|
"logits/chosen": -0.6435179710388184,
|
|
"logits/rejected": -0.607470691204071,
|
|
"logps/chosen": -66.53829193115234,
|
|
"logps/ref_chosen": -50.52424621582031,
|
|
"logps/ref_rejected": -89.01544189453125,
|
|
"logps/rejected": -139.56106567382812,
|
|
"loss": 0.2539,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1042742058634758,
|
|
"margin_dpo/beta_margin_grad_std": 0.14150582253932953,
|
|
"margin_dpo/beta_margin_mean": 3.453158378601074,
|
|
"margin_dpo/beta_margin_std": 2.3450417518615723,
|
|
"margin_dpo/loss_margin_mean": 34.53158187866211,
|
|
"margin_dpo/margin_mean": 34.53158187866211,
|
|
"margin_dpo/margin_std": 23.280845642089844,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 0.48751835535976507,
|
|
"grad_norm": 49.24102783203125,
|
|
"learning_rate": 3.059876462596758e-07,
|
|
"logits/chosen": -0.6652279496192932,
|
|
"logits/rejected": -0.6368132829666138,
|
|
"logps/chosen": -67.384521484375,
|
|
"logps/ref_chosen": -49.18028259277344,
|
|
"logps/ref_rejected": -76.48515319824219,
|
|
"logps/rejected": -120.33047485351562,
|
|
"loss": 0.5416,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18533176183700562,
|
|
"margin_dpo/beta_margin_grad_std": 0.21836212277412415,
|
|
"margin_dpo/beta_margin_mean": 2.564108371734619,
|
|
"margin_dpo/beta_margin_std": 2.247309684753418,
|
|
"margin_dpo/loss_margin_mean": 25.641084671020508,
|
|
"margin_dpo/margin_mean": 25.641082763671875,
|
|
"margin_dpo/margin_std": 22.292034149169922,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.4889867841409692,
|
|
"grad_norm": 65.32575225830078,
|
|
"learning_rate": 3.0473617970527015e-07,
|
|
"logits/chosen": -0.6248708963394165,
|
|
"logits/rejected": -0.6129882335662842,
|
|
"logps/chosen": -83.42967224121094,
|
|
"logps/ref_chosen": -63.75574493408203,
|
|
"logps/ref_rejected": -95.04411315917969,
|
|
"logps/rejected": -147.51449584960938,
|
|
"loss": 0.5294,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.17729292809963226,
|
|
"margin_dpo/beta_margin_grad_std": 0.23055444657802582,
|
|
"margin_dpo/beta_margin_mean": 3.2796459197998047,
|
|
"margin_dpo/beta_margin_std": 2.900787115097046,
|
|
"margin_dpo/loss_margin_mean": 32.79645919799805,
|
|
"margin_dpo/margin_mean": 32.79645919799805,
|
|
"margin_dpo/margin_std": 28.70106315612793,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 0.49045521292217326,
|
|
"grad_norm": 47.027408599853516,
|
|
"learning_rate": 3.034832708016243e-07,
|
|
"logits/chosen": -0.5920358896255493,
|
|
"logits/rejected": -0.5716053247451782,
|
|
"logps/chosen": -87.28846740722656,
|
|
"logps/ref_chosen": -66.97975158691406,
|
|
"logps/ref_rejected": -95.31692504882812,
|
|
"logps/rejected": -146.97540283203125,
|
|
"loss": 0.3501,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12930122017860413,
|
|
"margin_dpo/beta_margin_grad_std": 0.18267269432544708,
|
|
"margin_dpo/beta_margin_mean": 3.1349751949310303,
|
|
"margin_dpo/beta_margin_std": 2.204379081726074,
|
|
"margin_dpo/loss_margin_mean": 31.34975242614746,
|
|
"margin_dpo/margin_mean": 31.34975242614746,
|
|
"margin_dpo/margin_std": 22.04346466064453,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.4919236417033774,
|
|
"grad_norm": 64.20230865478516,
|
|
"learning_rate": 3.022289525640531e-07,
|
|
"logits/chosen": -0.6635192632675171,
|
|
"logits/rejected": -0.6386910676956177,
|
|
"logps/chosen": -80.52153015136719,
|
|
"logps/ref_chosen": -62.54248046875,
|
|
"logps/ref_rejected": -87.6176986694336,
|
|
"logps/rejected": -133.28208923339844,
|
|
"loss": 0.5273,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18919570744037628,
|
|
"margin_dpo/beta_margin_grad_std": 0.2199753075838089,
|
|
"margin_dpo/beta_margin_mean": 2.768533945083618,
|
|
"margin_dpo/beta_margin_std": 2.6116201877593994,
|
|
"margin_dpo/loss_margin_mean": 27.685338973999023,
|
|
"margin_dpo/margin_mean": 27.685338973999023,
|
|
"margin_dpo/margin_std": 25.792123794555664,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.4933920704845815,
|
|
"grad_norm": 77.19268035888672,
|
|
"learning_rate": 3.009732580450086e-07,
|
|
"logits/chosen": -0.597804069519043,
|
|
"logits/rejected": -0.5845491290092468,
|
|
"logps/chosen": -73.87245178222656,
|
|
"logps/ref_chosen": -54.531150817871094,
|
|
"logps/ref_rejected": -104.40424346923828,
|
|
"logps/rejected": -157.87643432617188,
|
|
"loss": 0.4829,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13811615109443665,
|
|
"margin_dpo/beta_margin_grad_std": 0.2196315973997116,
|
|
"margin_dpo/beta_margin_mean": 3.4130895137786865,
|
|
"margin_dpo/beta_margin_std": 2.9321839809417725,
|
|
"margin_dpo/loss_margin_mean": 34.13089370727539,
|
|
"margin_dpo/margin_mean": 34.13089370727539,
|
|
"margin_dpo/margin_std": 28.592952728271484,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.4948604992657856,
|
|
"grad_norm": 51.322021484375,
|
|
"learning_rate": 2.9971622033320914e-07,
|
|
"logits/chosen": -0.6781501173973083,
|
|
"logits/rejected": -0.655287504196167,
|
|
"logps/chosen": -82.64189147949219,
|
|
"logps/ref_chosen": -65.12869262695312,
|
|
"logps/ref_rejected": -101.72701263427734,
|
|
"logps/rejected": -150.01385498046875,
|
|
"loss": 0.3766,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14225561916828156,
|
|
"margin_dpo/beta_margin_grad_std": 0.1846165657043457,
|
|
"margin_dpo/beta_margin_mean": 3.077363967895508,
|
|
"margin_dpo/beta_margin_std": 2.2813127040863037,
|
|
"margin_dpo/loss_margin_mean": 30.773639678955078,
|
|
"margin_dpo/margin_mean": 30.773639678955078,
|
|
"margin_dpo/margin_std": 22.479888916015625,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 0.49632892804698975,
|
|
"grad_norm": 54.3819580078125,
|
|
"learning_rate": 2.984578725527675e-07,
|
|
"logits/chosen": -0.6051090955734253,
|
|
"logits/rejected": -0.5787324905395508,
|
|
"logps/chosen": -78.59669494628906,
|
|
"logps/ref_chosen": -58.422706604003906,
|
|
"logps/ref_rejected": -89.06854248046875,
|
|
"logps/rejected": -139.98745727539062,
|
|
"loss": 0.3929,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15525604784488678,
|
|
"margin_dpo/beta_margin_grad_std": 0.17774680256843567,
|
|
"margin_dpo/beta_margin_mean": 3.0744948387145996,
|
|
"margin_dpo/beta_margin_std": 2.481618642807007,
|
|
"margin_dpo/loss_margin_mean": 30.74494743347168,
|
|
"margin_dpo/margin_mean": 30.744945526123047,
|
|
"margin_dpo/margin_std": 24.781238555908203,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.4977973568281938,
|
|
"grad_norm": 43.893314361572266,
|
|
"learning_rate": 2.9719824786231796e-07,
|
|
"logits/chosen": -0.6943797469139099,
|
|
"logits/rejected": -0.6593571901321411,
|
|
"logps/chosen": -77.50848388671875,
|
|
"logps/ref_chosen": -59.99531555175781,
|
|
"logps/ref_rejected": -103.9109115600586,
|
|
"logps/rejected": -155.89947509765625,
|
|
"loss": 0.3567,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13147962093353271,
|
|
"margin_dpo/beta_margin_grad_std": 0.19132699072360992,
|
|
"margin_dpo/beta_margin_mean": 3.4475395679473877,
|
|
"margin_dpo/beta_margin_std": 2.6293349266052246,
|
|
"margin_dpo/loss_margin_mean": 34.47539520263672,
|
|
"margin_dpo/margin_mean": 34.47539520263672,
|
|
"margin_dpo/margin_std": 25.884403228759766,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 0.49926578560939794,
|
|
"grad_norm": 47.391937255859375,
|
|
"learning_rate": 2.959373794541426e-07,
|
|
"logits/chosen": -0.5665243864059448,
|
|
"logits/rejected": -0.5303752422332764,
|
|
"logps/chosen": -73.12321472167969,
|
|
"logps/ref_chosen": -52.83022689819336,
|
|
"logps/ref_rejected": -73.10723876953125,
|
|
"logps/rejected": -127.07223510742188,
|
|
"loss": 0.3914,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14956659078598022,
|
|
"margin_dpo/beta_margin_grad_std": 0.1902948021888733,
|
|
"margin_dpo/beta_margin_mean": 3.3672003746032715,
|
|
"margin_dpo/beta_margin_std": 3.0476183891296387,
|
|
"margin_dpo/loss_margin_mean": 33.67200469970703,
|
|
"margin_dpo/margin_mean": 33.67200469970703,
|
|
"margin_dpo/margin_std": 30.17064666748047,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.5007342143906021,
|
|
"grad_norm": 49.45682907104492,
|
|
"learning_rate": 2.946753005532965e-07,
|
|
"logits/chosen": -0.5927428007125854,
|
|
"logits/rejected": -0.5772538185119629,
|
|
"logps/chosen": -69.79241943359375,
|
|
"logps/ref_chosen": -47.899803161621094,
|
|
"logps/ref_rejected": -101.80987548828125,
|
|
"logps/rejected": -160.8358154296875,
|
|
"loss": 0.3099,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12000828981399536,
|
|
"margin_dpo/beta_margin_grad_std": 0.17166633903980255,
|
|
"margin_dpo/beta_margin_mean": 3.713330030441284,
|
|
"margin_dpo/beta_margin_std": 2.623293876647949,
|
|
"margin_dpo/loss_margin_mean": 37.13330078125,
|
|
"margin_dpo/margin_mean": 37.13330078125,
|
|
"margin_dpo/margin_std": 26.138290405273438,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 0.5022026431718062,
|
|
"grad_norm": 71.23127746582031,
|
|
"learning_rate": 2.934120444167326e-07,
|
|
"logits/chosen": -0.5858966708183289,
|
|
"logits/rejected": -0.5406848192214966,
|
|
"logps/chosen": -90.55252838134766,
|
|
"logps/ref_chosen": -71.99664306640625,
|
|
"logps/ref_rejected": -92.58959197998047,
|
|
"logps/rejected": -143.6535186767578,
|
|
"loss": 0.4539,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16609962284564972,
|
|
"margin_dpo/beta_margin_grad_std": 0.21481139957904816,
|
|
"margin_dpo/beta_margin_mean": 3.2508039474487305,
|
|
"margin_dpo/beta_margin_std": 2.9156150817871094,
|
|
"margin_dpo/loss_margin_mean": 32.50803756713867,
|
|
"margin_dpo/margin_mean": 32.50803756713867,
|
|
"margin_dpo/margin_std": 29.08768081665039,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.5036710719530103,
|
|
"grad_norm": 58.09341812133789,
|
|
"learning_rate": 2.9214764433242476e-07,
|
|
"logits/chosen": -0.6343034505844116,
|
|
"logits/rejected": -0.6098527908325195,
|
|
"logps/chosen": -71.43299865722656,
|
|
"logps/ref_chosen": -54.40562438964844,
|
|
"logps/ref_rejected": -111.04141998291016,
|
|
"logps/rejected": -162.60467529296875,
|
|
"loss": 0.3828,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13273513317108154,
|
|
"margin_dpo/beta_margin_grad_std": 0.20148909091949463,
|
|
"margin_dpo/beta_margin_mean": 3.4535882472991943,
|
|
"margin_dpo/beta_margin_std": 2.496697425842285,
|
|
"margin_dpo/loss_margin_mean": 34.53588104248047,
|
|
"margin_dpo/margin_mean": 34.53588104248047,
|
|
"margin_dpo/margin_std": 24.785640716552734,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 0.5051395007342144,
|
|
"grad_norm": 59.848392486572266,
|
|
"learning_rate": 2.9088213361849126e-07,
|
|
"logits/chosen": -0.605980396270752,
|
|
"logits/rejected": -0.5816007256507874,
|
|
"logps/chosen": -74.10179901123047,
|
|
"logps/ref_chosen": -53.96466827392578,
|
|
"logps/ref_rejected": -90.62336730957031,
|
|
"logps/rejected": -139.1201629638672,
|
|
"loss": 0.5713,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1968245953321457,
|
|
"margin_dpo/beta_margin_grad_std": 0.23204652965068817,
|
|
"margin_dpo/beta_margin_mean": 2.8359665870666504,
|
|
"margin_dpo/beta_margin_std": 2.9364418983459473,
|
|
"margin_dpo/loss_margin_mean": 28.359664916992188,
|
|
"margin_dpo/margin_mean": 28.359664916992188,
|
|
"margin_dpo/margin_std": 28.389617919921875,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.5066079295154186,
|
|
"grad_norm": 52.64528274536133,
|
|
"learning_rate": 2.896155456223163e-07,
|
|
"logits/chosen": -0.6204372048377991,
|
|
"logits/rejected": -0.5874470472335815,
|
|
"logps/chosen": -81.14032745361328,
|
|
"logps/ref_chosen": -61.685699462890625,
|
|
"logps/ref_rejected": -99.49040985107422,
|
|
"logps/rejected": -153.2753448486328,
|
|
"loss": 0.3808,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12536533176898956,
|
|
"margin_dpo/beta_margin_grad_std": 0.19518449902534485,
|
|
"margin_dpo/beta_margin_mean": 3.433030605316162,
|
|
"margin_dpo/beta_margin_std": 2.6923766136169434,
|
|
"margin_dpo/loss_margin_mean": 34.33030700683594,
|
|
"margin_dpo/margin_mean": 34.33030319213867,
|
|
"margin_dpo/margin_std": 26.533443450927734,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.5080763582966226,
|
|
"grad_norm": 66.96176147460938,
|
|
"learning_rate": 2.883479137196714e-07,
|
|
"logits/chosen": -0.6459622383117676,
|
|
"logits/rejected": -0.6260564923286438,
|
|
"logps/chosen": -77.54434204101562,
|
|
"logps/ref_chosen": -55.256263732910156,
|
|
"logps/ref_rejected": -77.41532135009766,
|
|
"logps/rejected": -130.17681884765625,
|
|
"loss": 0.501,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1676793098449707,
|
|
"margin_dpo/beta_margin_grad_std": 0.2248024344444275,
|
|
"margin_dpo/beta_margin_mean": 3.0473415851593018,
|
|
"margin_dpo/beta_margin_std": 2.67748761177063,
|
|
"margin_dpo/loss_margin_mean": 30.47341537475586,
|
|
"margin_dpo/margin_mean": 30.47341537475586,
|
|
"margin_dpo/margin_std": 26.7009220123291,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.5095447870778267,
|
|
"grad_norm": 59.65949249267578,
|
|
"learning_rate": 2.8707927131383614e-07,
|
|
"logits/chosen": -0.6436910033226013,
|
|
"logits/rejected": -0.6030080318450928,
|
|
"logps/chosen": -80.81089782714844,
|
|
"logps/ref_chosen": -57.56624221801758,
|
|
"logps/ref_rejected": -92.35508728027344,
|
|
"logps/rejected": -146.6624755859375,
|
|
"loss": 0.5062,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15754956007003784,
|
|
"margin_dpo/beta_margin_grad_std": 0.22358344495296478,
|
|
"margin_dpo/beta_margin_mean": 3.1062729358673096,
|
|
"margin_dpo/beta_margin_std": 2.751783847808838,
|
|
"margin_dpo/loss_margin_mean": 31.062728881835938,
|
|
"margin_dpo/margin_mean": 31.062728881835938,
|
|
"margin_dpo/margin_std": 27.136539459228516,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 0.5110132158590308,
|
|
"grad_norm": 66.42916870117188,
|
|
"learning_rate": 2.858096518347179e-07,
|
|
"logits/chosen": -0.6487417221069336,
|
|
"logits/rejected": -0.6265465021133423,
|
|
"logps/chosen": -76.80084991455078,
|
|
"logps/ref_chosen": -56.31770324707031,
|
|
"logps/ref_rejected": -89.13837432861328,
|
|
"logps/rejected": -139.78619384765625,
|
|
"loss": 0.5393,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18773815035820007,
|
|
"margin_dpo/beta_margin_grad_std": 0.23163601756095886,
|
|
"margin_dpo/beta_margin_mean": 3.016467809677124,
|
|
"margin_dpo/beta_margin_std": 2.694882392883301,
|
|
"margin_dpo/loss_margin_mean": 30.1646785736084,
|
|
"margin_dpo/margin_mean": 30.164676666259766,
|
|
"margin_dpo/margin_std": 26.57091522216797,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.5124816446402349,
|
|
"grad_norm": 74.79912567138672,
|
|
"learning_rate": 2.845390887379706e-07,
|
|
"logits/chosen": -0.6091076135635376,
|
|
"logits/rejected": -0.5961982011795044,
|
|
"logps/chosen": -76.59553527832031,
|
|
"logps/ref_chosen": -58.0255126953125,
|
|
"logps/ref_rejected": -97.50515747070312,
|
|
"logps/rejected": -141.9857177734375,
|
|
"loss": 0.7187,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.22252453863620758,
|
|
"margin_dpo/beta_margin_grad_std": 0.259151816368103,
|
|
"margin_dpo/beta_margin_mean": 2.591054916381836,
|
|
"margin_dpo/beta_margin_std": 2.9782986640930176,
|
|
"margin_dpo/loss_margin_mean": 25.91054916381836,
|
|
"margin_dpo/margin_mean": 25.91054916381836,
|
|
"margin_dpo/margin_std": 29.515146255493164,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 0.5139500734214391,
|
|
"grad_norm": 60.944671630859375,
|
|
"learning_rate": 2.8326761550411346e-07,
|
|
"logits/chosen": -0.6253387928009033,
|
|
"logits/rejected": -0.5987178087234497,
|
|
"logps/chosen": -83.54938507080078,
|
|
"logps/ref_chosen": -64.33049011230469,
|
|
"logps/ref_rejected": -89.87164306640625,
|
|
"logps/rejected": -136.64892578125,
|
|
"loss": 0.6331,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.20255906879901886,
|
|
"margin_dpo/beta_margin_grad_std": 0.24518385529518127,
|
|
"margin_dpo/beta_margin_mean": 2.7558395862579346,
|
|
"margin_dpo/beta_margin_std": 2.81416392326355,
|
|
"margin_dpo/loss_margin_mean": 27.558395385742188,
|
|
"margin_dpo/margin_mean": 27.558395385742188,
|
|
"margin_dpo/margin_std": 27.75186538696289,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.5154185022026432,
|
|
"grad_norm": 44.936790466308594,
|
|
"learning_rate": 2.819952656376487e-07,
|
|
"logits/chosen": -0.5853023529052734,
|
|
"logits/rejected": -0.5609848499298096,
|
|
"logps/chosen": -77.83966827392578,
|
|
"logps/ref_chosen": -60.6721305847168,
|
|
"logps/ref_rejected": -101.5654296875,
|
|
"logps/rejected": -153.1103515625,
|
|
"loss": 0.3463,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.125121608376503,
|
|
"margin_dpo/beta_margin_grad_std": 0.19120459258556366,
|
|
"margin_dpo/beta_margin_mean": 3.4377381801605225,
|
|
"margin_dpo/beta_margin_std": 2.4538416862487793,
|
|
"margin_dpo/loss_margin_mean": 34.37738037109375,
|
|
"margin_dpo/margin_mean": 34.37738037109375,
|
|
"margin_dpo/margin_std": 24.536075592041016,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 0.5168869309838473,
|
|
"grad_norm": 71.77349853515625,
|
|
"learning_rate": 2.8072207266617854e-07,
|
|
"logits/chosen": -0.6167569160461426,
|
|
"logits/rejected": -0.5798036456108093,
|
|
"logps/chosen": -88.59427642822266,
|
|
"logps/ref_chosen": -70.9434585571289,
|
|
"logps/ref_rejected": -76.6419677734375,
|
|
"logps/rejected": -121.15208435058594,
|
|
"loss": 0.5829,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.20149900019168854,
|
|
"margin_dpo/beta_margin_grad_std": 0.23062226176261902,
|
|
"margin_dpo/beta_margin_mean": 2.685929775238037,
|
|
"margin_dpo/beta_margin_std": 2.7565815448760986,
|
|
"margin_dpo/loss_margin_mean": 26.859296798706055,
|
|
"margin_dpo/margin_mean": 26.859298706054688,
|
|
"margin_dpo/margin_std": 27.36536407470703,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.5183553597650514,
|
|
"grad_norm": 69.42572021484375,
|
|
"learning_rate": 2.794480701395219e-07,
|
|
"logits/chosen": -0.6497888565063477,
|
|
"logits/rejected": -0.623069167137146,
|
|
"logps/chosen": -78.64827728271484,
|
|
"logps/ref_chosen": -58.39533996582031,
|
|
"logps/ref_rejected": -80.33552551269531,
|
|
"logps/rejected": -127.51103210449219,
|
|
"loss": 0.7022,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.21879735589027405,
|
|
"margin_dpo/beta_margin_grad_std": 0.2619403302669525,
|
|
"margin_dpo/beta_margin_mean": 2.6922569274902344,
|
|
"margin_dpo/beta_margin_std": 2.7726051807403564,
|
|
"margin_dpo/loss_margin_mean": 26.92256736755371,
|
|
"margin_dpo/margin_mean": 26.92256736755371,
|
|
"margin_dpo/margin_std": 27.62420082092285,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 0.5198237885462555,
|
|
"grad_norm": 39.96236801147461,
|
|
"learning_rate": 2.781732916288303e-07,
|
|
"logits/chosen": -0.6289291381835938,
|
|
"logits/rejected": -0.6045188903808594,
|
|
"logps/chosen": -76.57125854492188,
|
|
"logps/ref_chosen": -59.80299377441406,
|
|
"logps/ref_rejected": -88.75750732421875,
|
|
"logps/rejected": -137.36941528320312,
|
|
"loss": 0.2933,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1184755489230156,
|
|
"margin_dpo/beta_margin_grad_std": 0.15497253835201263,
|
|
"margin_dpo/beta_margin_mean": 3.184364080429077,
|
|
"margin_dpo/beta_margin_std": 2.173491954803467,
|
|
"margin_dpo/loss_margin_mean": 31.843639373779297,
|
|
"margin_dpo/margin_mean": 31.843639373779297,
|
|
"margin_dpo/margin_std": 21.62934684753418,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.5212922173274597,
|
|
"grad_norm": 38.83880615234375,
|
|
"learning_rate": 2.7689777072570284e-07,
|
|
"logits/chosen": -0.6833703517913818,
|
|
"logits/rejected": -0.6498676538467407,
|
|
"logps/chosen": -70.72756958007812,
|
|
"logps/ref_chosen": -54.128501892089844,
|
|
"logps/ref_rejected": -82.40606689453125,
|
|
"logps/rejected": -132.54278564453125,
|
|
"loss": 0.3525,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14085842669010162,
|
|
"margin_dpo/beta_margin_grad_std": 0.16604043543338776,
|
|
"margin_dpo/beta_margin_mean": 3.353764295578003,
|
|
"margin_dpo/beta_margin_std": 2.9216277599334717,
|
|
"margin_dpo/loss_margin_mean": 33.53764343261719,
|
|
"margin_dpo/margin_mean": 33.53764343261719,
|
|
"margin_dpo/margin_std": 28.546594619750977,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.5227606461086637,
|
|
"grad_norm": 94.49285888671875,
|
|
"learning_rate": 2.7562154104130176e-07,
|
|
"logits/chosen": -0.6543818712234497,
|
|
"logits/rejected": -0.6269608736038208,
|
|
"logps/chosen": -86.63969421386719,
|
|
"logps/ref_chosen": -64.67381286621094,
|
|
"logps/ref_rejected": -75.89926147460938,
|
|
"logps/rejected": -120.64968872070312,
|
|
"loss": 0.8,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2603550851345062,
|
|
"margin_dpo/beta_margin_grad_std": 0.25936320424079895,
|
|
"margin_dpo/beta_margin_mean": 2.278454065322876,
|
|
"margin_dpo/beta_margin_std": 2.8312277793884277,
|
|
"margin_dpo/loss_margin_mean": 22.7845401763916,
|
|
"margin_dpo/margin_mean": 22.78453826904297,
|
|
"margin_dpo/margin_std": 27.273006439208984,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.5242290748898678,
|
|
"grad_norm": 47.93386459350586,
|
|
"learning_rate": 2.7434463620546594e-07,
|
|
"logits/chosen": -0.621991753578186,
|
|
"logits/rejected": -0.5901703834533691,
|
|
"logps/chosen": -70.54598999023438,
|
|
"logps/ref_chosen": -52.725799560546875,
|
|
"logps/ref_rejected": -86.84115600585938,
|
|
"logps/rejected": -136.3178253173828,
|
|
"loss": 0.3853,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13819599151611328,
|
|
"margin_dpo/beta_margin_grad_std": 0.1930486112833023,
|
|
"margin_dpo/beta_margin_mean": 3.165647268295288,
|
|
"margin_dpo/beta_margin_std": 2.430621862411499,
|
|
"margin_dpo/loss_margin_mean": 31.65647315979004,
|
|
"margin_dpo/margin_mean": 31.65647315979004,
|
|
"margin_dpo/margin_std": 23.81618309020996,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 0.5256975036710719,
|
|
"grad_norm": 65.47496795654297,
|
|
"learning_rate": 2.730670898658255e-07,
|
|
"logits/chosen": -0.6221922636032104,
|
|
"logits/rejected": -0.5780969858169556,
|
|
"logps/chosen": -79.48448944091797,
|
|
"logps/ref_chosen": -63.20543670654297,
|
|
"logps/ref_rejected": -88.373291015625,
|
|
"logps/rejected": -133.81741333007812,
|
|
"loss": 0.4719,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.17817279696464539,
|
|
"margin_dpo/beta_margin_grad_std": 0.19289124011993408,
|
|
"margin_dpo/beta_margin_mean": 2.916506052017212,
|
|
"margin_dpo/beta_margin_std": 2.7734534740448,
|
|
"margin_dpo/loss_margin_mean": 29.165058135986328,
|
|
"margin_dpo/margin_mean": 29.165058135986328,
|
|
"margin_dpo/margin_std": 27.52760887145996,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.527165932452276,
|
|
"grad_norm": 65.17465209960938,
|
|
"learning_rate": 2.717889356869146e-07,
|
|
"logits/chosen": -0.5604794025421143,
|
|
"logits/rejected": -0.5229655504226685,
|
|
"logps/chosen": -78.45906829833984,
|
|
"logps/ref_chosen": -56.370216369628906,
|
|
"logps/ref_rejected": -82.17375183105469,
|
|
"logps/rejected": -136.7306365966797,
|
|
"loss": 0.4793,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15810422599315643,
|
|
"margin_dpo/beta_margin_grad_std": 0.22020529210567474,
|
|
"margin_dpo/beta_margin_mean": 3.2468044757843018,
|
|
"margin_dpo/beta_margin_std": 2.889819860458374,
|
|
"margin_dpo/loss_margin_mean": 32.46804428100586,
|
|
"margin_dpo/margin_mean": 32.46804428100586,
|
|
"margin_dpo/margin_std": 27.820709228515625,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 0.5286343612334802,
|
|
"grad_norm": 45.14263153076172,
|
|
"learning_rate": 2.7051020734928443e-07,
|
|
"logits/chosen": -0.5990117788314819,
|
|
"logits/rejected": -0.5746924877166748,
|
|
"logps/chosen": -70.55091857910156,
|
|
"logps/ref_chosen": -51.460384368896484,
|
|
"logps/ref_rejected": -69.83892822265625,
|
|
"logps/rejected": -118.64250183105469,
|
|
"loss": 0.4051,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14690235257148743,
|
|
"margin_dpo/beta_margin_grad_std": 0.18873253464698792,
|
|
"margin_dpo/beta_margin_mean": 2.9713053703308105,
|
|
"margin_dpo/beta_margin_std": 2.3531863689422607,
|
|
"margin_dpo/loss_margin_mean": 29.71305274963379,
|
|
"margin_dpo/margin_mean": 29.71305274963379,
|
|
"margin_dpo/margin_std": 23.29418182373047,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.5301027900146843,
|
|
"grad_norm": 59.539451599121094,
|
|
"learning_rate": 2.6923093854861593e-07,
|
|
"logits/chosen": -0.5896218419075012,
|
|
"logits/rejected": -0.5681077837944031,
|
|
"logps/chosen": -73.69671630859375,
|
|
"logps/ref_chosen": -53.86951446533203,
|
|
"logps/ref_rejected": -90.76925659179688,
|
|
"logps/rejected": -138.9726104736328,
|
|
"loss": 0.489,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.17487533390522003,
|
|
"margin_dpo/beta_margin_grad_std": 0.20955052971839905,
|
|
"margin_dpo/beta_margin_mean": 2.837615966796875,
|
|
"margin_dpo/beta_margin_std": 2.5256011486053467,
|
|
"margin_dpo/loss_margin_mean": 28.37615966796875,
|
|
"margin_dpo/margin_mean": 28.37615966796875,
|
|
"margin_dpo/margin_std": 24.988800048828125,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 0.5315712187958884,
|
|
"grad_norm": 54.718441009521484,
|
|
"learning_rate": 2.679511629948319e-07,
|
|
"logits/chosen": -0.6207703351974487,
|
|
"logits/rejected": -0.6055707931518555,
|
|
"logps/chosen": -78.78889465332031,
|
|
"logps/ref_chosen": -58.639060974121094,
|
|
"logps/ref_rejected": -105.58195495605469,
|
|
"logps/rejected": -159.64309692382812,
|
|
"loss": 0.4171,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14796419441699982,
|
|
"margin_dpo/beta_margin_grad_std": 0.20472835004329681,
|
|
"margin_dpo/beta_margin_mean": 3.3911311626434326,
|
|
"margin_dpo/beta_margin_std": 2.7831871509552,
|
|
"margin_dpo/loss_margin_mean": 33.91130828857422,
|
|
"margin_dpo/margin_mean": 33.91130828857422,
|
|
"margin_dpo/margin_std": 27.807514190673828,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.5330396475770925,
|
|
"grad_norm": 203.7032012939453,
|
|
"learning_rate": 2.6667091441120816e-07,
|
|
"logits/chosen": -0.6275466084480286,
|
|
"logits/rejected": -0.5816659331321716,
|
|
"logps/chosen": -62.10087585449219,
|
|
"logps/ref_chosen": -44.558380126953125,
|
|
"logps/ref_rejected": -74.69496154785156,
|
|
"logps/rejected": -131.32981872558594,
|
|
"loss": 0.4034,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13567416369915009,
|
|
"margin_dpo/beta_margin_grad_std": 0.2034136801958084,
|
|
"margin_dpo/beta_margin_mean": 3.909236192703247,
|
|
"margin_dpo/beta_margin_std": 3.2993836402893066,
|
|
"margin_dpo/loss_margin_mean": 39.09236145019531,
|
|
"margin_dpo/margin_mean": 39.09236145019531,
|
|
"margin_dpo/margin_std": 32.874427795410156,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 0.5345080763582967,
|
|
"grad_norm": 69.3237533569336,
|
|
"learning_rate": 2.6539022653348575e-07,
|
|
"logits/chosen": -0.6238687038421631,
|
|
"logits/rejected": -0.6193605065345764,
|
|
"logps/chosen": -67.90678405761719,
|
|
"logps/ref_chosen": -48.894622802734375,
|
|
"logps/ref_rejected": -91.395751953125,
|
|
"logps/rejected": -138.55311584472656,
|
|
"loss": 0.517,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1832251399755478,
|
|
"margin_dpo/beta_margin_grad_std": 0.21592886745929718,
|
|
"margin_dpo/beta_margin_mean": 2.8145196437835693,
|
|
"margin_dpo/beta_margin_std": 2.687561273574829,
|
|
"margin_dpo/loss_margin_mean": 28.14519691467285,
|
|
"margin_dpo/margin_mean": 28.14519691467285,
|
|
"margin_dpo/margin_std": 26.325363159179688,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.5359765051395007,
|
|
"grad_norm": 53.85197448730469,
|
|
"learning_rate": 2.641091331089811e-07,
|
|
"logits/chosen": -0.5883674621582031,
|
|
"logits/rejected": -0.5725036859512329,
|
|
"logps/chosen": -69.85151672363281,
|
|
"logps/ref_chosen": -51.49274444580078,
|
|
"logps/ref_rejected": -92.70166778564453,
|
|
"logps/rejected": -138.56735229492188,
|
|
"loss": 0.4649,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1821574568748474,
|
|
"margin_dpo/beta_margin_grad_std": 0.1840406358242035,
|
|
"margin_dpo/beta_margin_mean": 2.750690460205078,
|
|
"margin_dpo/beta_margin_std": 2.6088244915008545,
|
|
"margin_dpo/loss_margin_mean": 27.50690269470215,
|
|
"margin_dpo/margin_mean": 27.50690269470215,
|
|
"margin_dpo/margin_std": 25.32098960876465,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.5374449339207048,
|
|
"grad_norm": 37.25920867919922,
|
|
"learning_rate": 2.6282766789569736e-07,
|
|
"logits/chosen": -0.6385898590087891,
|
|
"logits/rejected": -0.6289624571800232,
|
|
"logps/chosen": -61.94309997558594,
|
|
"logps/ref_chosen": -44.7205696105957,
|
|
"logps/ref_rejected": -83.31040954589844,
|
|
"logps/rejected": -129.80517578125,
|
|
"loss": 0.3876,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15300396084785461,
|
|
"margin_dpo/beta_margin_grad_std": 0.17375686764717102,
|
|
"margin_dpo/beta_margin_mean": 2.9272243976593018,
|
|
"margin_dpo/beta_margin_std": 2.34128475189209,
|
|
"margin_dpo/loss_margin_mean": 29.27224349975586,
|
|
"margin_dpo/margin_mean": 29.27224349975586,
|
|
"margin_dpo/margin_std": 23.367576599121094,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.5389133627019089,
|
|
"grad_norm": 57.8803825378418,
|
|
"learning_rate": 2.615458646614349e-07,
|
|
"logits/chosen": -0.6073235273361206,
|
|
"logits/rejected": -0.5901012420654297,
|
|
"logps/chosen": -77.51007080078125,
|
|
"logps/ref_chosen": -58.405418395996094,
|
|
"logps/ref_rejected": -76.75132751464844,
|
|
"logps/rejected": -121.35176086425781,
|
|
"loss": 0.512,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1787261813879013,
|
|
"margin_dpo/beta_margin_grad_std": 0.21498483419418335,
|
|
"margin_dpo/beta_margin_mean": 2.5495781898498535,
|
|
"margin_dpo/beta_margin_std": 2.2787163257598877,
|
|
"margin_dpo/loss_margin_mean": 25.49578094482422,
|
|
"margin_dpo/margin_mean": 25.49578094482422,
|
|
"margin_dpo/margin_std": 22.63507843017578,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 0.540381791483113,
|
|
"grad_norm": 42.138545989990234,
|
|
"learning_rate": 2.6026375718290083e-07,
|
|
"logits/chosen": -0.6203917264938354,
|
|
"logits/rejected": -0.6051937937736511,
|
|
"logps/chosen": -61.3531494140625,
|
|
"logps/ref_chosen": -44.452518463134766,
|
|
"logps/ref_rejected": -98.55526733398438,
|
|
"logps/rejected": -146.37754821777344,
|
|
"loss": 0.3505,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13690005242824554,
|
|
"margin_dpo/beta_margin_grad_std": 0.1739962249994278,
|
|
"margin_dpo/beta_margin_mean": 3.092164993286133,
|
|
"margin_dpo/beta_margin_std": 2.4503121376037598,
|
|
"margin_dpo/loss_margin_mean": 30.921649932861328,
|
|
"margin_dpo/margin_mean": 30.921649932861328,
|
|
"margin_dpo/margin_std": 23.325572967529297,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.5418502202643172,
|
|
"grad_norm": 68.98252868652344,
|
|
"learning_rate": 2.589813792448196e-07,
|
|
"logits/chosen": -0.6216270923614502,
|
|
"logits/rejected": -0.5819696187973022,
|
|
"logps/chosen": -89.982421875,
|
|
"logps/ref_chosen": -71.38150024414062,
|
|
"logps/ref_rejected": -91.29582214355469,
|
|
"logps/rejected": -135.1055450439453,
|
|
"loss": 0.5441,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.19111789762973785,
|
|
"margin_dpo/beta_margin_grad_std": 0.2203211784362793,
|
|
"margin_dpo/beta_margin_mean": 2.520881175994873,
|
|
"margin_dpo/beta_margin_std": 2.2390410900115967,
|
|
"margin_dpo/loss_margin_mean": 25.208810806274414,
|
|
"margin_dpo/margin_mean": 25.208810806274414,
|
|
"margin_dpo/margin_std": 22.379486083984375,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 0.5433186490455213,
|
|
"grad_norm": 57.02136993408203,
|
|
"learning_rate": 2.5769876463904263e-07,
|
|
"logits/chosen": -0.6215974688529968,
|
|
"logits/rejected": -0.5932759046554565,
|
|
"logps/chosen": -90.78213500976562,
|
|
"logps/ref_chosen": -71.60749816894531,
|
|
"logps/ref_rejected": -97.25978088378906,
|
|
"logps/rejected": -141.36424255371094,
|
|
"loss": 0.5193,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1943112313747406,
|
|
"margin_dpo/beta_margin_grad_std": 0.2109173834323883,
|
|
"margin_dpo/beta_margin_mean": 2.4929823875427246,
|
|
"margin_dpo/beta_margin_std": 2.2076525688171387,
|
|
"margin_dpo/loss_margin_mean": 24.929824829101562,
|
|
"margin_dpo/margin_mean": 24.929824829101562,
|
|
"margin_dpo/margin_std": 21.949810028076172,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.5447870778267254,
|
|
"grad_norm": 66.95852661132812,
|
|
"learning_rate": 2.5641594716365744e-07,
|
|
"logits/chosen": -0.6244653463363647,
|
|
"logits/rejected": -0.5987306833267212,
|
|
"logps/chosen": -89.35826873779297,
|
|
"logps/ref_chosen": -69.41448974609375,
|
|
"logps/ref_rejected": -99.17217254638672,
|
|
"logps/rejected": -146.2442626953125,
|
|
"loss": 0.6181,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1994006186723709,
|
|
"margin_dpo/beta_margin_grad_std": 0.2342255413532257,
|
|
"margin_dpo/beta_margin_mean": 2.712832450866699,
|
|
"margin_dpo/beta_margin_std": 2.860605001449585,
|
|
"margin_dpo/loss_margin_mean": 27.12832260131836,
|
|
"margin_dpo/margin_mean": 27.12832260131836,
|
|
"margin_dpo/margin_std": 28.538127899169922,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 0.5462555066079295,
|
|
"grad_norm": 54.26102066040039,
|
|
"learning_rate": 2.551329606220976e-07,
|
|
"logits/chosen": -0.631676971912384,
|
|
"logits/rejected": -0.5819823741912842,
|
|
"logps/chosen": -81.3255615234375,
|
|
"logps/ref_chosen": -61.8179931640625,
|
|
"logps/ref_rejected": -78.53949737548828,
|
|
"logps/rejected": -129.32257080078125,
|
|
"loss": 0.5102,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.17238368093967438,
|
|
"margin_dpo/beta_margin_grad_std": 0.22069989144802094,
|
|
"margin_dpo/beta_margin_mean": 3.1275503635406494,
|
|
"margin_dpo/beta_margin_std": 2.999239444732666,
|
|
"margin_dpo/loss_margin_mean": 31.27550506591797,
|
|
"margin_dpo/margin_mean": 31.27550506591797,
|
|
"margin_dpo/margin_std": 29.97246551513672,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.5477239353891337,
|
|
"grad_norm": 65.14423370361328,
|
|
"learning_rate": 2.538498388222517e-07,
|
|
"logits/chosen": -0.6474554538726807,
|
|
"logits/rejected": -0.6035970449447632,
|
|
"logps/chosen": -85.37360382080078,
|
|
"logps/ref_chosen": -64.21713256835938,
|
|
"logps/ref_rejected": -85.95960998535156,
|
|
"logps/rejected": -140.09530639648438,
|
|
"loss": 0.4153,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14717315137386322,
|
|
"margin_dpo/beta_margin_grad_std": 0.20184947550296783,
|
|
"margin_dpo/beta_margin_mean": 3.2979226112365723,
|
|
"margin_dpo/beta_margin_std": 2.632497549057007,
|
|
"margin_dpo/loss_margin_mean": 32.979225158691406,
|
|
"margin_dpo/margin_mean": 32.979225158691406,
|
|
"margin_dpo/margin_std": 26.056396484375,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 0.5491923641703378,
|
|
"grad_norm": 46.910888671875,
|
|
"learning_rate": 2.525666155755725e-07,
|
|
"logits/chosen": -0.6809793710708618,
|
|
"logits/rejected": -0.6489218473434448,
|
|
"logps/chosen": -88.79120635986328,
|
|
"logps/ref_chosen": -70.65017700195312,
|
|
"logps/ref_rejected": -93.64016723632812,
|
|
"logps/rejected": -141.58169555664062,
|
|
"loss": 0.4273,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1627557873725891,
|
|
"margin_dpo/beta_margin_grad_std": 0.18945109844207764,
|
|
"margin_dpo/beta_margin_mean": 2.9800491333007812,
|
|
"margin_dpo/beta_margin_std": 2.557394027709961,
|
|
"margin_dpo/loss_margin_mean": 29.800491333007812,
|
|
"margin_dpo/margin_mean": 29.800491333007812,
|
|
"margin_dpo/margin_std": 25.399768829345703,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.5506607929515418,
|
|
"grad_norm": 54.142765045166016,
|
|
"learning_rate": 2.512833246961859e-07,
|
|
"logits/chosen": -0.6128416061401367,
|
|
"logits/rejected": -0.5992000102996826,
|
|
"logps/chosen": -79.39334106445312,
|
|
"logps/ref_chosen": -60.080223083496094,
|
|
"logps/ref_rejected": -88.93830871582031,
|
|
"logps/rejected": -137.71192932128906,
|
|
"loss": 0.516,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1836765855550766,
|
|
"margin_dpo/beta_margin_grad_std": 0.22431671619415283,
|
|
"margin_dpo/beta_margin_mean": 2.9460506439208984,
|
|
"margin_dpo/beta_margin_std": 2.548676013946533,
|
|
"margin_dpo/loss_margin_mean": 29.460506439208984,
|
|
"margin_dpo/margin_mean": 29.460506439208984,
|
|
"margin_dpo/margin_std": 24.365291595458984,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.5521292217327459,
|
|
"grad_norm": 48.23344421386719,
|
|
"learning_rate": 2.5e-07,
|
|
"logits/chosen": -0.6043192744255066,
|
|
"logits/rejected": -0.5859169960021973,
|
|
"logps/chosen": -82.08074951171875,
|
|
"logps/ref_chosen": -62.660308837890625,
|
|
"logps/ref_rejected": -105.526611328125,
|
|
"logps/rejected": -157.26370239257812,
|
|
"loss": 0.388,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14623141288757324,
|
|
"margin_dpo/beta_margin_grad_std": 0.18046867847442627,
|
|
"margin_dpo/beta_margin_mean": 3.23166561126709,
|
|
"margin_dpo/beta_margin_std": 2.768313407897949,
|
|
"margin_dpo/loss_margin_mean": 32.31665802001953,
|
|
"margin_dpo/margin_mean": 32.31665802001953,
|
|
"margin_dpo/margin_std": 27.595264434814453,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 0.55359765051395,
|
|
"grad_norm": 63.7028694152832,
|
|
"learning_rate": 2.487166753038141e-07,
|
|
"logits/chosen": -0.5758407115936279,
|
|
"logits/rejected": -0.5594580769538879,
|
|
"logps/chosen": -76.27653503417969,
|
|
"logps/ref_chosen": -54.478736877441406,
|
|
"logps/ref_rejected": -98.70335388183594,
|
|
"logps/rejected": -150.482421875,
|
|
"loss": 0.5374,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18470792472362518,
|
|
"margin_dpo/beta_margin_grad_std": 0.2296735942363739,
|
|
"margin_dpo/beta_margin_mean": 2.9981284141540527,
|
|
"margin_dpo/beta_margin_std": 2.736407995223999,
|
|
"margin_dpo/loss_margin_mean": 29.98128318786621,
|
|
"margin_dpo/margin_mean": 29.981285095214844,
|
|
"margin_dpo/margin_std": 26.938751220703125,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 0.5550660792951542,
|
|
"grad_norm": 43.11890411376953,
|
|
"learning_rate": 2.4743338442442754e-07,
|
|
"logits/chosen": -0.6386528015136719,
|
|
"logits/rejected": -0.6268770694732666,
|
|
"logps/chosen": -62.957725524902344,
|
|
"logps/ref_chosen": -45.02053451538086,
|
|
"logps/ref_rejected": -88.0469741821289,
|
|
"logps/rejected": -137.9139404296875,
|
|
"loss": 0.4093,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14005307853221893,
|
|
"margin_dpo/beta_margin_grad_std": 0.205052450299263,
|
|
"margin_dpo/beta_margin_mean": 3.1929781436920166,
|
|
"margin_dpo/beta_margin_std": 2.5704288482666016,
|
|
"margin_dpo/loss_margin_mean": 31.929780960083008,
|
|
"margin_dpo/margin_mean": 31.929779052734375,
|
|
"margin_dpo/margin_std": 25.305004119873047,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 0.5565345080763583,
|
|
"grad_norm": 55.71260452270508,
|
|
"learning_rate": 2.461501611777483e-07,
|
|
"logits/chosen": -0.628246545791626,
|
|
"logits/rejected": -0.6193478107452393,
|
|
"logps/chosen": -72.79714965820312,
|
|
"logps/ref_chosen": -53.182098388671875,
|
|
"logps/ref_rejected": -114.30015563964844,
|
|
"logps/rejected": -167.07992553710938,
|
|
"loss": 0.4362,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1567826271057129,
|
|
"margin_dpo/beta_margin_grad_std": 0.20273275673389435,
|
|
"margin_dpo/beta_margin_mean": 3.3164710998535156,
|
|
"margin_dpo/beta_margin_std": 2.8849940299987793,
|
|
"margin_dpo/loss_margin_mean": 33.164710998535156,
|
|
"margin_dpo/margin_mean": 33.164710998535156,
|
|
"margin_dpo/margin_std": 28.102596282958984,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 0.5580029368575624,
|
|
"grad_norm": 80.04903411865234,
|
|
"learning_rate": 2.4486703937790243e-07,
|
|
"logits/chosen": -0.5512839555740356,
|
|
"logits/rejected": -0.5587849617004395,
|
|
"logps/chosen": -74.60398864746094,
|
|
"logps/ref_chosen": -51.3530387878418,
|
|
"logps/ref_rejected": -104.19169616699219,
|
|
"logps/rejected": -162.14089965820312,
|
|
"loss": 0.5716,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.17476344108581543,
|
|
"margin_dpo/beta_margin_grad_std": 0.26113981008529663,
|
|
"margin_dpo/beta_margin_mean": 3.46982479095459,
|
|
"margin_dpo/beta_margin_std": 3.075237512588501,
|
|
"margin_dpo/loss_margin_mean": 34.69824981689453,
|
|
"margin_dpo/margin_mean": 34.69824981689453,
|
|
"margin_dpo/margin_std": 30.40831756591797,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.5594713656387665,
|
|
"grad_norm": 64.25127410888672,
|
|
"learning_rate": 2.435840528363426e-07,
|
|
"logits/chosen": -0.6331868171691895,
|
|
"logits/rejected": -0.5953764915466309,
|
|
"logps/chosen": -79.46463012695312,
|
|
"logps/ref_chosen": -57.80306625366211,
|
|
"logps/ref_rejected": -79.21940612792969,
|
|
"logps/rejected": -135.10447692871094,
|
|
"loss": 0.5267,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16175399720668793,
|
|
"margin_dpo/beta_margin_grad_std": 0.2353188693523407,
|
|
"margin_dpo/beta_margin_mean": 3.4223499298095703,
|
|
"margin_dpo/beta_margin_std": 3.058774471282959,
|
|
"margin_dpo/loss_margin_mean": 34.2234992980957,
|
|
"margin_dpo/margin_mean": 34.2234992980957,
|
|
"margin_dpo/margin_std": 30.406585693359375,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 0.5609397944199707,
|
|
"grad_norm": 57.7338752746582,
|
|
"learning_rate": 2.4230123536095745e-07,
|
|
"logits/chosen": -0.6542234420776367,
|
|
"logits/rejected": -0.6203514337539673,
|
|
"logps/chosen": -84.785400390625,
|
|
"logps/ref_chosen": -66.02030181884766,
|
|
"logps/ref_rejected": -110.71015930175781,
|
|
"logps/rejected": -164.99774169921875,
|
|
"loss": 0.3934,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13284680247306824,
|
|
"margin_dpo/beta_margin_grad_std": 0.2009667158126831,
|
|
"margin_dpo/beta_margin_mean": 3.5522475242614746,
|
|
"margin_dpo/beta_margin_std": 2.81280517578125,
|
|
"margin_dpo/loss_margin_mean": 35.52247619628906,
|
|
"margin_dpo/margin_mean": 35.52247619628906,
|
|
"margin_dpo/margin_std": 27.49190330505371,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 0.5624082232011748,
|
|
"grad_norm": 53.96702575683594,
|
|
"learning_rate": 2.4101862075518037e-07,
|
|
"logits/chosen": -0.6232413053512573,
|
|
"logits/rejected": -0.6115109920501709,
|
|
"logps/chosen": -72.27030944824219,
|
|
"logps/ref_chosen": -50.39148712158203,
|
|
"logps/ref_rejected": -93.71589660644531,
|
|
"logps/rejected": -147.88839721679688,
|
|
"loss": 0.3758,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14286085963249207,
|
|
"margin_dpo/beta_margin_grad_std": 0.1862889528274536,
|
|
"margin_dpo/beta_margin_mean": 3.229367256164551,
|
|
"margin_dpo/beta_margin_std": 2.584360122680664,
|
|
"margin_dpo/loss_margin_mean": 32.29367446899414,
|
|
"margin_dpo/margin_mean": 32.29367446899414,
|
|
"margin_dpo/margin_std": 25.570125579833984,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 0.5638766519823789,
|
|
"grad_norm": 51.74114227294922,
|
|
"learning_rate": 2.397362428170992e-07,
|
|
"logits/chosen": -0.6301860809326172,
|
|
"logits/rejected": -0.6027618050575256,
|
|
"logps/chosen": -73.91239929199219,
|
|
"logps/ref_chosen": -52.046104431152344,
|
|
"logps/ref_rejected": -85.76089477539062,
|
|
"logps/rejected": -139.81455993652344,
|
|
"loss": 0.4799,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1736118346452713,
|
|
"margin_dpo/beta_margin_grad_std": 0.21324963867664337,
|
|
"margin_dpo/beta_margin_mean": 3.2187373638153076,
|
|
"margin_dpo/beta_margin_std": 3.164245367050171,
|
|
"margin_dpo/loss_margin_mean": 32.18737030029297,
|
|
"margin_dpo/margin_mean": 32.187374114990234,
|
|
"margin_dpo/margin_std": 31.576709747314453,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 0.5653450807635829,
|
|
"grad_norm": 58.88325881958008,
|
|
"learning_rate": 2.3845413533856514e-07,
|
|
"logits/chosen": -0.6432499885559082,
|
|
"logits/rejected": -0.5914589166641235,
|
|
"logps/chosen": -84.34367370605469,
|
|
"logps/ref_chosen": -65.55216217041016,
|
|
"logps/ref_rejected": -77.82792663574219,
|
|
"logps/rejected": -125.24491882324219,
|
|
"loss": 0.4483,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16570799052715302,
|
|
"margin_dpo/beta_margin_grad_std": 0.20454205572605133,
|
|
"margin_dpo/beta_margin_mean": 2.8625473976135254,
|
|
"margin_dpo/beta_margin_std": 2.207449197769165,
|
|
"margin_dpo/loss_margin_mean": 28.625473022460938,
|
|
"margin_dpo/margin_mean": 28.625473022460938,
|
|
"margin_dpo/margin_std": 22.058141708374023,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.566813509544787,
|
|
"grad_norm": 67.18842315673828,
|
|
"learning_rate": 2.3717233210430254e-07,
|
|
"logits/chosen": -0.6237103939056396,
|
|
"logits/rejected": -0.5952026844024658,
|
|
"logps/chosen": -79.99833679199219,
|
|
"logps/ref_chosen": -58.22185516357422,
|
|
"logps/ref_rejected": -92.32742309570312,
|
|
"logps/rejected": -148.12274169921875,
|
|
"loss": 0.3538,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12317676097154617,
|
|
"margin_dpo/beta_margin_grad_std": 0.18745048344135284,
|
|
"margin_dpo/beta_margin_mean": 3.4018843173980713,
|
|
"margin_dpo/beta_margin_std": 2.448117971420288,
|
|
"margin_dpo/loss_margin_mean": 34.01884078979492,
|
|
"margin_dpo/margin_mean": 34.01884078979492,
|
|
"margin_dpo/margin_std": 24.252649307250977,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 0.5682819383259912,
|
|
"grad_norm": 67.83007049560547,
|
|
"learning_rate": 2.3589086689101889e-07,
|
|
"logits/chosen": -0.6703263521194458,
|
|
"logits/rejected": -0.6214380264282227,
|
|
"logps/chosen": -85.0511474609375,
|
|
"logps/ref_chosen": -66.41944885253906,
|
|
"logps/ref_rejected": -92.16915893554688,
|
|
"logps/rejected": -139.54898071289062,
|
|
"loss": 0.473,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16746900975704193,
|
|
"margin_dpo/beta_margin_grad_std": 0.21500559151172638,
|
|
"margin_dpo/beta_margin_mean": 2.874812602996826,
|
|
"margin_dpo/beta_margin_std": 2.3288333415985107,
|
|
"margin_dpo/loss_margin_mean": 28.748125076293945,
|
|
"margin_dpo/margin_mean": 28.748126983642578,
|
|
"margin_dpo/margin_std": 23.084835052490234,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 0.5697503671071953,
|
|
"grad_norm": 50.18345260620117,
|
|
"learning_rate": 2.3460977346651428e-07,
|
|
"logits/chosen": -0.614050567150116,
|
|
"logits/rejected": -0.6127490401268005,
|
|
"logps/chosen": -71.32160186767578,
|
|
"logps/ref_chosen": -50.129459381103516,
|
|
"logps/ref_rejected": -104.43305969238281,
|
|
"logps/rejected": -161.62437438964844,
|
|
"loss": 0.375,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13886678218841553,
|
|
"margin_dpo/beta_margin_grad_std": 0.19045500457286835,
|
|
"margin_dpo/beta_margin_mean": 3.599916458129883,
|
|
"margin_dpo/beta_margin_std": 3.1401546001434326,
|
|
"margin_dpo/loss_margin_mean": 35.99916458129883,
|
|
"margin_dpo/margin_mean": 35.99916076660156,
|
|
"margin_dpo/margin_std": 29.291606903076172,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 0.5712187958883994,
|
|
"grad_norm": 40.010982513427734,
|
|
"learning_rate": 2.3332908558879177e-07,
|
|
"logits/chosen": -0.6919381022453308,
|
|
"logits/rejected": -0.6485722064971924,
|
|
"logps/chosen": -77.64576721191406,
|
|
"logps/ref_chosen": -57.906593322753906,
|
|
"logps/ref_rejected": -77.91454315185547,
|
|
"logps/rejected": -131.74371337890625,
|
|
"loss": 0.3484,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13054527342319489,
|
|
"margin_dpo/beta_margin_grad_std": 0.1837066113948822,
|
|
"margin_dpo/beta_margin_mean": 3.4090003967285156,
|
|
"margin_dpo/beta_margin_std": 2.6279804706573486,
|
|
"margin_dpo/loss_margin_mean": 34.090003967285156,
|
|
"margin_dpo/margin_mean": 34.090003967285156,
|
|
"margin_dpo/margin_std": 26.119720458984375,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 0.5726872246696035,
|
|
"grad_norm": 66.40118408203125,
|
|
"learning_rate": 2.320488370051681e-07,
|
|
"logits/chosen": -0.6006795763969421,
|
|
"logits/rejected": -0.5766574740409851,
|
|
"logps/chosen": -70.85358428955078,
|
|
"logps/ref_chosen": -49.22591781616211,
|
|
"logps/ref_rejected": -85.5281982421875,
|
|
"logps/rejected": -138.63442993164062,
|
|
"loss": 0.5273,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16621273756027222,
|
|
"margin_dpo/beta_margin_grad_std": 0.23641641438007355,
|
|
"margin_dpo/beta_margin_mean": 3.1478559970855713,
|
|
"margin_dpo/beta_margin_std": 2.8455049991607666,
|
|
"margin_dpo/loss_margin_mean": 31.478559494018555,
|
|
"margin_dpo/margin_mean": 31.478557586669922,
|
|
"margin_dpo/margin_std": 27.472740173339844,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.5741556534508077,
|
|
"grad_norm": 60.51673126220703,
|
|
"learning_rate": 2.3076906145138405e-07,
|
|
"logits/chosen": -0.6301641464233398,
|
|
"logits/rejected": -0.6100099086761475,
|
|
"logps/chosen": -87.16332244873047,
|
|
"logps/ref_chosen": -64.32965087890625,
|
|
"logps/ref_rejected": -86.73820495605469,
|
|
"logps/rejected": -137.2554473876953,
|
|
"loss": 0.5215,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18781331181526184,
|
|
"margin_dpo/beta_margin_grad_std": 0.2151157259941101,
|
|
"margin_dpo/beta_margin_mean": 2.7683560848236084,
|
|
"margin_dpo/beta_margin_std": 2.7290608882904053,
|
|
"margin_dpo/loss_margin_mean": 27.683561325073242,
|
|
"margin_dpo/margin_mean": 27.683561325073242,
|
|
"margin_dpo/margin_std": 26.582765579223633,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 0.5756240822320118,
|
|
"grad_norm": 42.56395721435547,
|
|
"learning_rate": 2.294897926507156e-07,
|
|
"logits/chosen": -0.640722393989563,
|
|
"logits/rejected": -0.6189226508140564,
|
|
"logps/chosen": -71.88900756835938,
|
|
"logps/ref_chosen": -53.50397872924805,
|
|
"logps/ref_rejected": -102.34583282470703,
|
|
"logps/rejected": -155.18275451660156,
|
|
"loss": 0.296,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1182653084397316,
|
|
"margin_dpo/beta_margin_grad_std": 0.1554841548204422,
|
|
"margin_dpo/beta_margin_mean": 3.4451892375946045,
|
|
"margin_dpo/beta_margin_std": 2.6532013416290283,
|
|
"margin_dpo/loss_margin_mean": 34.4518928527832,
|
|
"margin_dpo/margin_mean": 34.45188903808594,
|
|
"margin_dpo/margin_std": 25.238216400146484,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 0.5770925110132159,
|
|
"grad_norm": 60.799842834472656,
|
|
"learning_rate": 2.2821106431308543e-07,
|
|
"logits/chosen": -0.6209444403648376,
|
|
"logits/rejected": -0.5936387777328491,
|
|
"logps/chosen": -65.83811950683594,
|
|
"logps/ref_chosen": -46.473915100097656,
|
|
"logps/ref_rejected": -71.96885681152344,
|
|
"logps/rejected": -118.9813461303711,
|
|
"loss": 0.537,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1976858228445053,
|
|
"margin_dpo/beta_margin_grad_std": 0.21031783521175385,
|
|
"margin_dpo/beta_margin_mean": 2.7648279666900635,
|
|
"margin_dpo/beta_margin_std": 2.7872002124786377,
|
|
"margin_dpo/loss_margin_mean": 27.648279190063477,
|
|
"margin_dpo/margin_mean": 27.648279190063477,
|
|
"margin_dpo/margin_std": 27.81708526611328,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 0.57856093979442,
|
|
"grad_norm": 54.4792594909668,
|
|
"learning_rate": 2.2693291013417452e-07,
|
|
"logits/chosen": -0.6115927696228027,
|
|
"logits/rejected": -0.5897927284240723,
|
|
"logps/chosen": -71.93934631347656,
|
|
"logps/ref_chosen": -52.91154479980469,
|
|
"logps/ref_rejected": -90.82263946533203,
|
|
"logps/rejected": -140.38363647460938,
|
|
"loss": 0.5764,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1873016506433487,
|
|
"margin_dpo/beta_margin_grad_std": 0.23149621486663818,
|
|
"margin_dpo/beta_margin_mean": 3.0533206462860107,
|
|
"margin_dpo/beta_margin_std": 3.1193511486053467,
|
|
"margin_dpo/loss_margin_mean": 30.533206939697266,
|
|
"margin_dpo/margin_mean": 30.533206939697266,
|
|
"margin_dpo/margin_std": 31.190715789794922,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 0.580029368575624,
|
|
"grad_norm": 46.82632827758789,
|
|
"learning_rate": 2.2565536379453404e-07,
|
|
"logits/chosen": -0.7002590894699097,
|
|
"logits/rejected": -0.6835087537765503,
|
|
"logps/chosen": -80.11807250976562,
|
|
"logps/ref_chosen": -62.546112060546875,
|
|
"logps/ref_rejected": -83.78262329101562,
|
|
"logps/rejected": -133.29296875,
|
|
"loss": 0.4308,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16609802842140198,
|
|
"margin_dpo/beta_margin_grad_std": 0.1954876035451889,
|
|
"margin_dpo/beta_margin_mean": 3.1938390731811523,
|
|
"margin_dpo/beta_margin_std": 2.6755151748657227,
|
|
"margin_dpo/loss_margin_mean": 31.938390731811523,
|
|
"margin_dpo/margin_mean": 31.93838882446289,
|
|
"margin_dpo/margin_std": 26.722272872924805,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.5814977973568282,
|
|
"grad_norm": 48.82279586791992,
|
|
"learning_rate": 2.2437845895869825e-07,
|
|
"logits/chosen": -0.6726581454277039,
|
|
"logits/rejected": -0.6284117698669434,
|
|
"logps/chosen": -88.34222412109375,
|
|
"logps/ref_chosen": -68.99594116210938,
|
|
"logps/ref_rejected": -88.64665985107422,
|
|
"logps/rejected": -139.3372802734375,
|
|
"loss": 0.4054,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1422402560710907,
|
|
"margin_dpo/beta_margin_grad_std": 0.21042148768901825,
|
|
"margin_dpo/beta_margin_mean": 3.134434461593628,
|
|
"margin_dpo/beta_margin_std": 2.398585557937622,
|
|
"margin_dpo/loss_margin_mean": 31.344343185424805,
|
|
"margin_dpo/margin_mean": 31.344345092773438,
|
|
"margin_dpo/margin_std": 23.319499969482422,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 0.5829662261380323,
|
|
"grad_norm": 46.95646286010742,
|
|
"learning_rate": 2.2310222927429716e-07,
|
|
"logits/chosen": -0.6410428285598755,
|
|
"logits/rejected": -0.5968055725097656,
|
|
"logps/chosen": -77.82908630371094,
|
|
"logps/ref_chosen": -61.27716827392578,
|
|
"logps/ref_rejected": -103.11612701416016,
|
|
"logps/rejected": -155.51364135742188,
|
|
"loss": 0.3677,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13442568480968475,
|
|
"margin_dpo/beta_margin_grad_std": 0.19571280479431152,
|
|
"margin_dpo/beta_margin_mean": 3.5845582485198975,
|
|
"margin_dpo/beta_margin_std": 2.7562365531921387,
|
|
"margin_dpo/loss_margin_mean": 35.8455810546875,
|
|
"margin_dpo/margin_mean": 35.8455810546875,
|
|
"margin_dpo/margin_std": 27.45928955078125,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 0.5844346549192364,
|
|
"grad_norm": 48.90303421020508,
|
|
"learning_rate": 2.2182670837116972e-07,
|
|
"logits/chosen": -0.7075108289718628,
|
|
"logits/rejected": -0.683822751045227,
|
|
"logps/chosen": -87.01773071289062,
|
|
"logps/ref_chosen": -68.15155029296875,
|
|
"logps/ref_rejected": -108.52360534667969,
|
|
"logps/rejected": -158.43099975585938,
|
|
"loss": 0.3507,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13588033616542816,
|
|
"margin_dpo/beta_margin_grad_std": 0.1709379106760025,
|
|
"margin_dpo/beta_margin_mean": 3.104120969772339,
|
|
"margin_dpo/beta_margin_std": 2.6031596660614014,
|
|
"margin_dpo/loss_margin_mean": 31.041210174560547,
|
|
"margin_dpo/margin_mean": 31.041208267211914,
|
|
"margin_dpo/margin_std": 25.99789810180664,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 0.5859030837004405,
|
|
"grad_norm": 54.68693923950195,
|
|
"learning_rate": 2.2055192986047804e-07,
|
|
"logits/chosen": -0.661706805229187,
|
|
"logits/rejected": -0.5980038046836853,
|
|
"logps/chosen": -77.65357971191406,
|
|
"logps/ref_chosen": -60.889801025390625,
|
|
"logps/ref_rejected": -77.96558380126953,
|
|
"logps/rejected": -129.20294189453125,
|
|
"loss": 0.4264,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14156478643417358,
|
|
"margin_dpo/beta_margin_grad_std": 0.22005578875541687,
|
|
"margin_dpo/beta_margin_mean": 3.4473578929901123,
|
|
"margin_dpo/beta_margin_std": 2.549055814743042,
|
|
"margin_dpo/loss_margin_mean": 34.47357940673828,
|
|
"margin_dpo/margin_mean": 34.47357940673828,
|
|
"margin_dpo/margin_std": 25.397838592529297,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 0.5873715124816447,
|
|
"grad_norm": 56.09019470214844,
|
|
"learning_rate": 2.192779273338215e-07,
|
|
"logits/chosen": -0.6517215967178345,
|
|
"logits/rejected": -0.6144533753395081,
|
|
"logps/chosen": -81.19852447509766,
|
|
"logps/ref_chosen": -63.64359664916992,
|
|
"logps/ref_rejected": -105.252685546875,
|
|
"logps/rejected": -161.04153442382812,
|
|
"loss": 0.4494,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13553817570209503,
|
|
"margin_dpo/beta_margin_grad_std": 0.2256467044353485,
|
|
"margin_dpo/beta_margin_mean": 3.8233911991119385,
|
|
"margin_dpo/beta_margin_std": 3.120196580886841,
|
|
"margin_dpo/loss_margin_mean": 38.233909606933594,
|
|
"margin_dpo/margin_mean": 38.233909606933594,
|
|
"margin_dpo/margin_std": 30.78741455078125,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.5873715124816447,
|
|
"eval_logits/chosen": -0.6053273677825928,
|
|
"eval_logits/rejected": -0.5765178203582764,
|
|
"eval_logps/chosen": -101.84111785888672,
|
|
"eval_logps/ref_chosen": -79.05104064941406,
|
|
"eval_logps/ref_rejected": -86.79793548583984,
|
|
"eval_logps/rejected": -129.76779174804688,
|
|
"eval_loss": 0.42193400859832764,
|
|
"eval_margin_dpo/beta": 0.10000000149011612,
|
|
"eval_margin_dpo/beta_margin_grad_mean": -0.26937761902809143,
|
|
"eval_margin_dpo/beta_margin_grad_std": 0.25375083088874817,
|
|
"eval_margin_dpo/beta_margin_mean": 2.017979383468628,
|
|
"eval_margin_dpo/beta_margin_std": 2.53668212890625,
|
|
"eval_margin_dpo/loss_margin_mean": 20.179792404174805,
|
|
"eval_margin_dpo/margin_mean": 20.179792404174805,
|
|
"eval_margin_dpo/margin_std": 25.366819381713867,
|
|
"eval_runtime": 40.0988,
|
|
"eval_samples_per_second": 58.331,
|
|
"eval_steps_per_second": 1.845,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.5888399412628488,
|
|
"grad_norm": 61.670928955078125,
|
|
"learning_rate": 2.1800473436235136e-07,
|
|
"logits/chosen": -0.5769657492637634,
|
|
"logits/rejected": -0.5573090314865112,
|
|
"logps/chosen": -76.15703582763672,
|
|
"logps/ref_chosen": -57.16303253173828,
|
|
"logps/ref_rejected": -83.79249572753906,
|
|
"logps/rejected": -132.30641174316406,
|
|
"loss": 0.659,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2077464908361435,
|
|
"margin_dpo/beta_margin_grad_std": 0.25239863991737366,
|
|
"margin_dpo/beta_margin_mean": 2.951991558074951,
|
|
"margin_dpo/beta_margin_std": 3.1654133796691895,
|
|
"margin_dpo/loss_margin_mean": 29.519914627075195,
|
|
"margin_dpo/margin_mean": 29.519912719726562,
|
|
"margin_dpo/margin_std": 31.590171813964844,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 0.5903083700440529,
|
|
"grad_norm": 26.211894989013672,
|
|
"learning_rate": 2.1673238449588665e-07,
|
|
"logits/chosen": -0.6328971982002258,
|
|
"logits/rejected": -0.584295392036438,
|
|
"logps/chosen": -62.62638854980469,
|
|
"logps/ref_chosen": -50.74037170410156,
|
|
"logps/ref_rejected": -81.0460433959961,
|
|
"logps/rejected": -131.90960693359375,
|
|
"loss": 0.2132,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.08531676232814789,
|
|
"margin_dpo/beta_margin_grad_std": 0.13695916533470154,
|
|
"margin_dpo/beta_margin_mean": 3.8977549076080322,
|
|
"margin_dpo/beta_margin_std": 2.383517265319824,
|
|
"margin_dpo/loss_margin_mean": 38.97754669189453,
|
|
"margin_dpo/margin_mean": 38.97754669189453,
|
|
"margin_dpo/margin_std": 23.83334732055664,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 0.591776798825257,
|
|
"grad_norm": 63.287567138671875,
|
|
"learning_rate": 2.154609112620295e-07,
|
|
"logits/chosen": -0.6422700881958008,
|
|
"logits/rejected": -0.6241501569747925,
|
|
"logps/chosen": -62.53410339355469,
|
|
"logps/ref_chosen": -47.14731216430664,
|
|
"logps/ref_rejected": -77.2666015625,
|
|
"logps/rejected": -122.82563781738281,
|
|
"loss": 0.5741,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1763564795255661,
|
|
"margin_dpo/beta_margin_grad_std": 0.23426702618598938,
|
|
"margin_dpo/beta_margin_mean": 3.0172247886657715,
|
|
"margin_dpo/beta_margin_std": 2.846843957901001,
|
|
"margin_dpo/loss_margin_mean": 30.1722469329834,
|
|
"margin_dpo/margin_mean": 30.17224884033203,
|
|
"margin_dpo/margin_std": 28.130752563476562,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 0.593245227606461,
|
|
"grad_norm": 54.917449951171875,
|
|
"learning_rate": 2.1419034816528218e-07,
|
|
"logits/chosen": -0.6123020648956299,
|
|
"logits/rejected": -0.578801155090332,
|
|
"logps/chosen": -63.40578079223633,
|
|
"logps/ref_chosen": -47.875274658203125,
|
|
"logps/ref_rejected": -77.15499877929688,
|
|
"logps/rejected": -123.22205352783203,
|
|
"loss": 0.5739,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18759508430957794,
|
|
"margin_dpo/beta_margin_grad_std": 0.23627623915672302,
|
|
"margin_dpo/beta_margin_mean": 3.053654670715332,
|
|
"margin_dpo/beta_margin_std": 2.8906707763671875,
|
|
"margin_dpo/loss_margin_mean": 30.53654670715332,
|
|
"margin_dpo/margin_mean": 30.53654670715332,
|
|
"margin_dpo/margin_std": 28.8435115814209,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 0.5947136563876652,
|
|
"grad_norm": 64.9948501586914,
|
|
"learning_rate": 2.129207286861638e-07,
|
|
"logits/chosen": -0.5796902179718018,
|
|
"logits/rejected": -0.549854040145874,
|
|
"logps/chosen": -84.49642944335938,
|
|
"logps/ref_chosen": -65.16290283203125,
|
|
"logps/ref_rejected": -87.18678283691406,
|
|
"logps/rejected": -136.73745727539062,
|
|
"loss": 0.5427,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18682169914245605,
|
|
"margin_dpo/beta_margin_grad_std": 0.22749853134155273,
|
|
"margin_dpo/beta_margin_mean": 3.021714210510254,
|
|
"margin_dpo/beta_margin_std": 2.9265987873077393,
|
|
"margin_dpo/loss_margin_mean": 30.217140197753906,
|
|
"margin_dpo/margin_mean": 30.217140197753906,
|
|
"margin_dpo/margin_std": 27.509521484375,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.5961820851688693,
|
|
"grad_norm": 61.627079010009766,
|
|
"learning_rate": 2.1165208628032861e-07,
|
|
"logits/chosen": -0.6366710662841797,
|
|
"logits/rejected": -0.6224513649940491,
|
|
"logps/chosen": -66.44183349609375,
|
|
"logps/ref_chosen": -49.740814208984375,
|
|
"logps/ref_rejected": -92.07862854003906,
|
|
"logps/rejected": -140.552490234375,
|
|
"loss": 0.5435,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1675260215997696,
|
|
"margin_dpo/beta_margin_grad_std": 0.22542835772037506,
|
|
"margin_dpo/beta_margin_mean": 3.1772830486297607,
|
|
"margin_dpo/beta_margin_std": 2.8382697105407715,
|
|
"margin_dpo/loss_margin_mean": 31.772830963134766,
|
|
"margin_dpo/margin_mean": 31.772830963134766,
|
|
"margin_dpo/margin_std": 27.909154891967773,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 0.5976505139500734,
|
|
"grad_norm": 68.63170623779297,
|
|
"learning_rate": 2.1038445437768375e-07,
|
|
"logits/chosen": -0.6445499062538147,
|
|
"logits/rejected": -0.599348783493042,
|
|
"logps/chosen": -72.40534973144531,
|
|
"logps/ref_chosen": -56.33069610595703,
|
|
"logps/ref_rejected": -77.5120849609375,
|
|
"logps/rejected": -125.86834716796875,
|
|
"loss": 0.6107,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18231885135173798,
|
|
"margin_dpo/beta_margin_grad_std": 0.25164178013801575,
|
|
"margin_dpo/beta_margin_mean": 3.228161334991455,
|
|
"margin_dpo/beta_margin_std": 2.978395462036133,
|
|
"margin_dpo/loss_margin_mean": 32.281612396240234,
|
|
"margin_dpo/margin_mean": 32.281612396240234,
|
|
"margin_dpo/margin_std": 29.094558715820312,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 0.5991189427312775,
|
|
"grad_norm": 81.44627380371094,
|
|
"learning_rate": 2.0911786638150872e-07,
|
|
"logits/chosen": -0.6902725696563721,
|
|
"logits/rejected": -0.6373718976974487,
|
|
"logps/chosen": -85.27023315429688,
|
|
"logps/ref_chosen": -69.789306640625,
|
|
"logps/ref_rejected": -90.09693908691406,
|
|
"logps/rejected": -133.43089294433594,
|
|
"loss": 0.6172,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.20204903185367584,
|
|
"margin_dpo/beta_margin_grad_std": 0.25068265199661255,
|
|
"margin_dpo/beta_margin_mean": 2.785304069519043,
|
|
"margin_dpo/beta_margin_std": 2.7439894676208496,
|
|
"margin_dpo/loss_margin_mean": 27.853038787841797,
|
|
"margin_dpo/margin_mean": 27.853038787841797,
|
|
"margin_dpo/margin_std": 27.155353546142578,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 0.6005873715124816,
|
|
"grad_norm": 49.702667236328125,
|
|
"learning_rate": 2.0785235566757517e-07,
|
|
"logits/chosen": -0.6016473770141602,
|
|
"logits/rejected": -0.5694031119346619,
|
|
"logps/chosen": -84.24601745605469,
|
|
"logps/ref_chosen": -67.31744384765625,
|
|
"logps/ref_rejected": -84.904296875,
|
|
"logps/rejected": -132.7557373046875,
|
|
"loss": 0.4121,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1540304273366928,
|
|
"margin_dpo/beta_margin_grad_std": 0.19426687061786652,
|
|
"margin_dpo/beta_margin_mean": 3.092287302017212,
|
|
"margin_dpo/beta_margin_std": 2.565514326095581,
|
|
"margin_dpo/loss_margin_mean": 30.92287254333496,
|
|
"margin_dpo/margin_mean": 30.92287254333496,
|
|
"margin_dpo/margin_std": 25.64594078063965,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 0.6020558002936858,
|
|
"grad_norm": 67.67236328125,
|
|
"learning_rate": 2.065879555832674e-07,
|
|
"logits/chosen": -0.6346931457519531,
|
|
"logits/rejected": -0.6326348781585693,
|
|
"logps/chosen": -70.31283569335938,
|
|
"logps/ref_chosen": -51.465354919433594,
|
|
"logps/ref_rejected": -83.198974609375,
|
|
"logps/rejected": -129.9054718017578,
|
|
"loss": 0.5957,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.20088441669940948,
|
|
"margin_dpo/beta_margin_grad_std": 0.24235385656356812,
|
|
"margin_dpo/beta_margin_mean": 2.7859020233154297,
|
|
"margin_dpo/beta_margin_std": 2.646648406982422,
|
|
"margin_dpo/loss_margin_mean": 27.859020233154297,
|
|
"margin_dpo/margin_mean": 27.859020233154297,
|
|
"margin_dpo/margin_std": 26.202781677246094,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.6035242290748899,
|
|
"grad_norm": 57.020423889160156,
|
|
"learning_rate": 2.0532469944670343e-07,
|
|
"logits/chosen": -0.6736893653869629,
|
|
"logits/rejected": -0.640461802482605,
|
|
"logps/chosen": -71.45536041259766,
|
|
"logps/ref_chosen": -52.30727005004883,
|
|
"logps/ref_rejected": -80.69495391845703,
|
|
"logps/rejected": -129.53814697265625,
|
|
"loss": 0.5393,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1864738166332245,
|
|
"margin_dpo/beta_margin_grad_std": 0.2311916947364807,
|
|
"margin_dpo/beta_margin_mean": 2.969511032104492,
|
|
"margin_dpo/beta_margin_std": 2.880366563796997,
|
|
"margin_dpo/loss_margin_mean": 29.695110321044922,
|
|
"margin_dpo/margin_mean": 29.69510841369629,
|
|
"margin_dpo/margin_std": 27.609901428222656,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 0.604992657856094,
|
|
"grad_norm": 41.312705993652344,
|
|
"learning_rate": 2.0406262054585738e-07,
|
|
"logits/chosen": -0.702052652835846,
|
|
"logits/rejected": -0.6910427808761597,
|
|
"logps/chosen": -68.71327209472656,
|
|
"logps/ref_chosen": -53.144126892089844,
|
|
"logps/ref_rejected": -100.06080627441406,
|
|
"logps/rejected": -145.08905029296875,
|
|
"loss": 0.501,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18716482818126678,
|
|
"margin_dpo/beta_margin_grad_std": 0.21060419082641602,
|
|
"margin_dpo/beta_margin_mean": 2.9459095001220703,
|
|
"margin_dpo/beta_margin_std": 2.7281651496887207,
|
|
"margin_dpo/loss_margin_mean": 29.459095001220703,
|
|
"margin_dpo/margin_mean": 29.459096908569336,
|
|
"margin_dpo/margin_std": 27.16181182861328,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 0.6064610866372981,
|
|
"grad_norm": 59.28904724121094,
|
|
"learning_rate": 2.0280175213768205e-07,
|
|
"logits/chosen": -0.5773541927337646,
|
|
"logits/rejected": -0.5431898832321167,
|
|
"logps/chosen": -80.49532318115234,
|
|
"logps/ref_chosen": -61.58196258544922,
|
|
"logps/ref_rejected": -99.47340393066406,
|
|
"logps/rejected": -148.28915405273438,
|
|
"loss": 0.4911,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15901578962802887,
|
|
"margin_dpo/beta_margin_grad_std": 0.21321162581443787,
|
|
"margin_dpo/beta_margin_mean": 2.990238666534424,
|
|
"margin_dpo/beta_margin_std": 2.5804879665374756,
|
|
"margin_dpo/loss_margin_mean": 29.902387619018555,
|
|
"margin_dpo/margin_mean": 29.902387619018555,
|
|
"margin_dpo/margin_std": 25.28069496154785,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 0.6079295154185022,
|
|
"grad_norm": 55.32724380493164,
|
|
"learning_rate": 2.0154212744723247e-07,
|
|
"logits/chosen": -0.6178678274154663,
|
|
"logits/rejected": -0.580098032951355,
|
|
"logps/chosen": -62.55944061279297,
|
|
"logps/ref_chosen": -46.63148880004883,
|
|
"logps/ref_rejected": -87.64652252197266,
|
|
"logps/rejected": -139.25851440429688,
|
|
"loss": 0.3637,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13259248435497284,
|
|
"margin_dpo/beta_margin_grad_std": 0.19208675622940063,
|
|
"margin_dpo/beta_margin_mean": 3.56840443611145,
|
|
"margin_dpo/beta_margin_std": 2.785224437713623,
|
|
"margin_dpo/loss_margin_mean": 35.684043884277344,
|
|
"margin_dpo/margin_mean": 35.684043884277344,
|
|
"margin_dpo/margin_std": 25.48971176147461,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 0.6093979441997063,
|
|
"grad_norm": 44.93287658691406,
|
|
"learning_rate": 2.002837796667909e-07,
|
|
"logits/chosen": -0.5938626527786255,
|
|
"logits/rejected": -0.5675798654556274,
|
|
"logps/chosen": -95.38108825683594,
|
|
"logps/ref_chosen": -78.6182861328125,
|
|
"logps/ref_rejected": -100.47752380371094,
|
|
"logps/rejected": -146.9215850830078,
|
|
"loss": 0.3982,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1585043966770172,
|
|
"margin_dpo/beta_margin_grad_std": 0.17520886659622192,
|
|
"margin_dpo/beta_margin_mean": 2.9681267738342285,
|
|
"margin_dpo/beta_margin_std": 2.4819741249084473,
|
|
"margin_dpo/loss_margin_mean": 29.68126678466797,
|
|
"margin_dpo/margin_mean": 29.6812686920166,
|
|
"margin_dpo/margin_std": 24.717784881591797,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.6108663729809104,
|
|
"grad_norm": 49.30588150024414,
|
|
"learning_rate": 1.990267419549914e-07,
|
|
"logits/chosen": -0.6397312879562378,
|
|
"logits/rejected": -0.6059544086456299,
|
|
"logps/chosen": -75.66851806640625,
|
|
"logps/ref_chosen": -58.27912521362305,
|
|
"logps/ref_rejected": -90.56871795654297,
|
|
"logps/rejected": -144.47354125976562,
|
|
"loss": 0.3876,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1345895677804947,
|
|
"margin_dpo/beta_margin_grad_std": 0.2077297866344452,
|
|
"margin_dpo/beta_margin_mean": 3.651543617248535,
|
|
"margin_dpo/beta_margin_std": 2.8901610374450684,
|
|
"margin_dpo/loss_margin_mean": 36.51543426513672,
|
|
"margin_dpo/margin_mean": 36.51543426513672,
|
|
"margin_dpo/margin_std": 27.713136672973633,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 0.6123348017621145,
|
|
"grad_norm": 38.555389404296875,
|
|
"learning_rate": 1.9777104743594686e-07,
|
|
"logits/chosen": -0.6252127289772034,
|
|
"logits/rejected": -0.5568169355392456,
|
|
"logps/chosen": -66.67837524414062,
|
|
"logps/ref_chosen": -50.1987190246582,
|
|
"logps/ref_rejected": -68.15184020996094,
|
|
"logps/rejected": -119.5999755859375,
|
|
"loss": 0.3154,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12362627685070038,
|
|
"margin_dpo/beta_margin_grad_std": 0.17288993299007416,
|
|
"margin_dpo/beta_margin_mean": 3.496847629547119,
|
|
"margin_dpo/beta_margin_std": 2.3686065673828125,
|
|
"margin_dpo/loss_margin_mean": 34.968475341796875,
|
|
"margin_dpo/margin_mean": 34.968475341796875,
|
|
"margin_dpo/margin_std": 23.240657806396484,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 0.6138032305433186,
|
|
"grad_norm": 64.83741760253906,
|
|
"learning_rate": 1.965167291983757e-07,
|
|
"logits/chosen": -0.6693556904792786,
|
|
"logits/rejected": -0.6072407960891724,
|
|
"logps/chosen": -99.16204833984375,
|
|
"logps/ref_chosen": -81.97846984863281,
|
|
"logps/ref_rejected": -104.69148254394531,
|
|
"logps/rejected": -156.01602172851562,
|
|
"loss": 0.5663,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16488902270793915,
|
|
"margin_dpo/beta_margin_grad_std": 0.2364022433757782,
|
|
"margin_dpo/beta_margin_mean": 3.4140961170196533,
|
|
"margin_dpo/beta_margin_std": 3.2014167308807373,
|
|
"margin_dpo/loss_margin_mean": 34.140960693359375,
|
|
"margin_dpo/margin_mean": 34.140960693359375,
|
|
"margin_dpo/margin_std": 31.345539093017578,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 0.6152716593245228,
|
|
"grad_norm": 46.96452331542969,
|
|
"learning_rate": 1.9526382029472988e-07,
|
|
"logits/chosen": -0.5874903202056885,
|
|
"logits/rejected": -0.5439319610595703,
|
|
"logps/chosen": -70.24662780761719,
|
|
"logps/ref_chosen": -52.948646545410156,
|
|
"logps/ref_rejected": -91.58309936523438,
|
|
"logps/rejected": -142.82901000976562,
|
|
"loss": 0.3088,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.11957548558712006,
|
|
"margin_dpo/beta_margin_grad_std": 0.16336920857429504,
|
|
"margin_dpo/beta_margin_mean": 3.3947930335998535,
|
|
"margin_dpo/beta_margin_std": 2.4165024757385254,
|
|
"margin_dpo/loss_margin_mean": 33.94792938232422,
|
|
"margin_dpo/margin_mean": 33.94792938232422,
|
|
"margin_dpo/margin_std": 23.982418060302734,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 0.6167400881057269,
|
|
"grad_norm": 61.41410827636719,
|
|
"learning_rate": 1.9401235374032425e-07,
|
|
"logits/chosen": -0.6708568930625916,
|
|
"logits/rejected": -0.594412624835968,
|
|
"logps/chosen": -95.875244140625,
|
|
"logps/ref_chosen": -77.7699203491211,
|
|
"logps/ref_rejected": -69.31985473632812,
|
|
"logps/rejected": -120.385009765625,
|
|
"loss": 0.4567,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1499803215265274,
|
|
"margin_dpo/beta_margin_grad_std": 0.21647407114505768,
|
|
"margin_dpo/beta_margin_mean": 3.2959823608398438,
|
|
"margin_dpo/beta_margin_std": 2.774785041809082,
|
|
"margin_dpo/loss_margin_mean": 32.95982360839844,
|
|
"margin_dpo/margin_mean": 32.95981979370117,
|
|
"margin_dpo/margin_std": 27.405033111572266,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.618208516886931,
|
|
"grad_norm": 79.4913330078125,
|
|
"learning_rate": 1.9276236251246653e-07,
|
|
"logits/chosen": -0.6430982351303101,
|
|
"logits/rejected": -0.6089684963226318,
|
|
"logps/chosen": -73.95745849609375,
|
|
"logps/ref_chosen": -53.765865325927734,
|
|
"logps/ref_rejected": -89.28144836425781,
|
|
"logps/rejected": -137.42054748535156,
|
|
"loss": 0.6226,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1968107521533966,
|
|
"margin_dpo/beta_margin_grad_std": 0.24985744059085846,
|
|
"margin_dpo/beta_margin_mean": 2.794750928878784,
|
|
"margin_dpo/beta_margin_std": 2.7135543823242188,
|
|
"margin_dpo/loss_margin_mean": 27.947509765625,
|
|
"margin_dpo/margin_mean": 27.947509765625,
|
|
"margin_dpo/margin_std": 26.780242919921875,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 0.6196769456681351,
|
|
"grad_norm": 66.62350463867188,
|
|
"learning_rate": 1.9151387954958792e-07,
|
|
"logits/chosen": -0.6613567471504211,
|
|
"logits/rejected": -0.6198326349258423,
|
|
"logps/chosen": -89.37240600585938,
|
|
"logps/ref_chosen": -68.6337661743164,
|
|
"logps/ref_rejected": -87.86351013183594,
|
|
"logps/rejected": -138.73875427246094,
|
|
"loss": 0.5663,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1885911077260971,
|
|
"margin_dpo/beta_margin_grad_std": 0.2437172681093216,
|
|
"margin_dpo/beta_margin_mean": 3.013660192489624,
|
|
"margin_dpo/beta_margin_std": 2.8644890785217285,
|
|
"margin_dpo/loss_margin_mean": 30.136600494384766,
|
|
"margin_dpo/margin_mean": 30.136600494384766,
|
|
"margin_dpo/margin_std": 28.641185760498047,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 0.6211453744493393,
|
|
"grad_norm": 66.34683227539062,
|
|
"learning_rate": 1.902669377503756e-07,
|
|
"logits/chosen": -0.6761616468429565,
|
|
"logits/rejected": -0.6586691737174988,
|
|
"logps/chosen": -74.14385986328125,
|
|
"logps/ref_chosen": -54.99030303955078,
|
|
"logps/ref_rejected": -86.30654907226562,
|
|
"logps/rejected": -136.7641143798828,
|
|
"loss": 0.5527,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18222779035568237,
|
|
"margin_dpo/beta_margin_grad_std": 0.2336231768131256,
|
|
"margin_dpo/beta_margin_mean": 3.1304006576538086,
|
|
"margin_dpo/beta_margin_std": 2.9750967025756836,
|
|
"margin_dpo/loss_margin_mean": 31.304006576538086,
|
|
"margin_dpo/margin_mean": 31.304006576538086,
|
|
"margin_dpo/margin_std": 29.631959915161133,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 0.6226138032305433,
|
|
"grad_norm": 48.2248649597168,
|
|
"learning_rate": 1.890215699729057e-07,
|
|
"logits/chosen": -0.6284000873565674,
|
|
"logits/rejected": -0.5798854231834412,
|
|
"logps/chosen": -73.47090148925781,
|
|
"logps/ref_chosen": -56.01191711425781,
|
|
"logps/ref_rejected": -66.47896575927734,
|
|
"logps/rejected": -118.09882354736328,
|
|
"loss": 0.4263,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15188807249069214,
|
|
"margin_dpo/beta_margin_grad_std": 0.21018096804618835,
|
|
"margin_dpo/beta_margin_mean": 3.4160873889923096,
|
|
"margin_dpo/beta_margin_std": 3.078399419784546,
|
|
"margin_dpo/loss_margin_mean": 34.16087341308594,
|
|
"margin_dpo/margin_mean": 34.16087341308594,
|
|
"margin_dpo/margin_std": 30.704998016357422,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 0.6240822320117474,
|
|
"grad_norm": 56.79523849487305,
|
|
"learning_rate": 1.8777780903377732e-07,
|
|
"logits/chosen": -0.6415660381317139,
|
|
"logits/rejected": -0.6306988000869751,
|
|
"logps/chosen": -65.49158477783203,
|
|
"logps/ref_chosen": -46.868995666503906,
|
|
"logps/ref_rejected": -95.92545318603516,
|
|
"logps/rejected": -145.18174743652344,
|
|
"loss": 0.5067,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16396918892860413,
|
|
"margin_dpo/beta_margin_grad_std": 0.22638258337974548,
|
|
"margin_dpo/beta_margin_mean": 3.063370704650879,
|
|
"margin_dpo/beta_margin_std": 2.508201837539673,
|
|
"margin_dpo/loss_margin_mean": 30.63370704650879,
|
|
"margin_dpo/margin_mean": 30.633705139160156,
|
|
"margin_dpo/margin_std": 24.710655212402344,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.6255506607929515,
|
|
"grad_norm": 73.21717071533203,
|
|
"learning_rate": 1.8653568770724803e-07,
|
|
"logits/chosen": -0.6280812621116638,
|
|
"logits/rejected": -0.5743027925491333,
|
|
"logps/chosen": -93.59241485595703,
|
|
"logps/ref_chosen": -76.58354187011719,
|
|
"logps/ref_rejected": -81.26658630371094,
|
|
"logps/rejected": -132.15199279785156,
|
|
"loss": 0.4413,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13312982022762299,
|
|
"margin_dpo/beta_margin_grad_std": 0.21179711818695068,
|
|
"margin_dpo/beta_margin_mean": 3.3876538276672363,
|
|
"margin_dpo/beta_margin_std": 2.695366144180298,
|
|
"margin_dpo/loss_margin_mean": 33.87653732299805,
|
|
"margin_dpo/margin_mean": 33.87653732299805,
|
|
"margin_dpo/margin_std": 26.354013442993164,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 0.6270190895741556,
|
|
"grad_norm": 56.27901840209961,
|
|
"learning_rate": 1.8529523872436977e-07,
|
|
"logits/chosen": -0.6733847856521606,
|
|
"logits/rejected": -0.6199424266815186,
|
|
"logps/chosen": -81.7194595336914,
|
|
"logps/ref_chosen": -64.8538818359375,
|
|
"logps/ref_rejected": -78.56600952148438,
|
|
"logps/rejected": -120.1583251953125,
|
|
"loss": 0.5885,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.190776988863945,
|
|
"margin_dpo/beta_margin_grad_std": 0.20414692163467407,
|
|
"margin_dpo/beta_margin_mean": 2.4726738929748535,
|
|
"margin_dpo/beta_margin_std": 2.3600223064422607,
|
|
"margin_dpo/loss_margin_mean": 24.72673797607422,
|
|
"margin_dpo/margin_mean": 24.72673797607422,
|
|
"margin_dpo/margin_std": 23.543621063232422,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 0.6284875183553598,
|
|
"grad_norm": 44.09659957885742,
|
|
"learning_rate": 1.8405649477212697e-07,
|
|
"logits/chosen": -0.6260280609130859,
|
|
"logits/rejected": -0.5897619724273682,
|
|
"logps/chosen": -83.10867309570312,
|
|
"logps/ref_chosen": -62.63666534423828,
|
|
"logps/ref_rejected": -103.28182220458984,
|
|
"logps/rejected": -159.34945678710938,
|
|
"loss": 0.3299,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.124484583735466,
|
|
"margin_dpo/beta_margin_grad_std": 0.1756177842617035,
|
|
"margin_dpo/beta_margin_mean": 3.559562921524048,
|
|
"margin_dpo/beta_margin_std": 2.730299234390259,
|
|
"margin_dpo/loss_margin_mean": 35.59562683105469,
|
|
"margin_dpo/margin_mean": 35.59562683105469,
|
|
"margin_dpo/margin_std": 27.276784896850586,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 0.6299559471365639,
|
|
"grad_norm": 61.60802459716797,
|
|
"learning_rate": 1.828194884925749e-07,
|
|
"logits/chosen": -0.636346697807312,
|
|
"logits/rejected": -0.5803790092468262,
|
|
"logps/chosen": -101.16323852539062,
|
|
"logps/ref_chosen": -81.23401641845703,
|
|
"logps/ref_rejected": -91.79493713378906,
|
|
"logps/rejected": -141.40106201171875,
|
|
"loss": 0.595,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1930510401725769,
|
|
"margin_dpo/beta_margin_grad_std": 0.24151724576950073,
|
|
"margin_dpo/beta_margin_mean": 2.967691421508789,
|
|
"margin_dpo/beta_margin_std": 2.905752182006836,
|
|
"margin_dpo/loss_margin_mean": 29.67691421508789,
|
|
"margin_dpo/margin_mean": 29.67691421508789,
|
|
"margin_dpo/margin_std": 28.60194969177246,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 0.631424375917768,
|
|
"grad_norm": 51.62448501586914,
|
|
"learning_rate": 1.8158425248197928e-07,
|
|
"logits/chosen": -0.6227689981460571,
|
|
"logits/rejected": -0.6045354008674622,
|
|
"logps/chosen": -79.01585388183594,
|
|
"logps/ref_chosen": -60.92032241821289,
|
|
"logps/ref_rejected": -104.42280578613281,
|
|
"logps/rejected": -153.30923461914062,
|
|
"loss": 0.4761,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1646682769060135,
|
|
"margin_dpo/beta_margin_grad_std": 0.2231719046831131,
|
|
"margin_dpo/beta_margin_mean": 3.0790908336639404,
|
|
"margin_dpo/beta_margin_std": 2.675757884979248,
|
|
"margin_dpo/loss_margin_mean": 30.790908813476562,
|
|
"margin_dpo/margin_mean": 30.790908813476562,
|
|
"margin_dpo/margin_std": 26.328550338745117,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.6328928046989721,
|
|
"grad_norm": 45.01468276977539,
|
|
"learning_rate": 1.8035081928995788e-07,
|
|
"logits/chosen": -0.6120933294296265,
|
|
"logits/rejected": -0.5965217351913452,
|
|
"logps/chosen": -76.03721618652344,
|
|
"logps/ref_chosen": -57.348751068115234,
|
|
"logps/ref_rejected": -92.84022521972656,
|
|
"logps/rejected": -146.1577911376953,
|
|
"loss": 0.3416,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13492785394191742,
|
|
"margin_dpo/beta_margin_grad_std": 0.17364878952503204,
|
|
"margin_dpo/beta_margin_mean": 3.4629099369049072,
|
|
"margin_dpo/beta_margin_std": 2.6419167518615723,
|
|
"margin_dpo/loss_margin_mean": 34.62909698486328,
|
|
"margin_dpo/margin_mean": 34.62909698486328,
|
|
"margin_dpo/margin_std": 26.410173416137695,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 0.6343612334801763,
|
|
"grad_norm": 55.2720947265625,
|
|
"learning_rate": 1.791192214186223e-07,
|
|
"logits/chosen": -0.6020532250404358,
|
|
"logits/rejected": -0.5625859498977661,
|
|
"logps/chosen": -88.92323303222656,
|
|
"logps/ref_chosen": -71.07479095458984,
|
|
"logps/ref_rejected": -98.57952880859375,
|
|
"logps/rejected": -148.73974609375,
|
|
"loss": 0.4396,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1505272537469864,
|
|
"margin_dpo/beta_margin_grad_std": 0.20940996706485748,
|
|
"margin_dpo/beta_margin_mean": 3.231178045272827,
|
|
"margin_dpo/beta_margin_std": 2.7881994247436523,
|
|
"margin_dpo/loss_margin_mean": 32.3117790222168,
|
|
"margin_dpo/margin_mean": 32.3117790222168,
|
|
"margin_dpo/margin_std": 27.102590560913086,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 0.6358296622613803,
|
|
"grad_norm": 71.04937744140625,
|
|
"learning_rate": 1.7788949132172193e-07,
|
|
"logits/chosen": -0.6384241580963135,
|
|
"logits/rejected": -0.6068836450576782,
|
|
"logps/chosen": -81.66122436523438,
|
|
"logps/ref_chosen": -58.273193359375,
|
|
"logps/ref_rejected": -95.95089721679688,
|
|
"logps/rejected": -147.70458984375,
|
|
"loss": 0.5849,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.19293591380119324,
|
|
"margin_dpo/beta_margin_grad_std": 0.2420828938484192,
|
|
"margin_dpo/beta_margin_mean": 2.83656644821167,
|
|
"margin_dpo/beta_margin_std": 2.67340350151062,
|
|
"margin_dpo/loss_margin_mean": 28.365663528442383,
|
|
"margin_dpo/margin_mean": 28.365665435791016,
|
|
"margin_dpo/margin_std": 26.324649810791016,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 0.6372980910425844,
|
|
"grad_norm": 48.197303771972656,
|
|
"learning_rate": 1.7666166140378853e-07,
|
|
"logits/chosen": -0.6621353626251221,
|
|
"logits/rejected": -0.6182979345321655,
|
|
"logps/chosen": -79.50520324707031,
|
|
"logps/ref_chosen": -61.97370147705078,
|
|
"logps/ref_rejected": -78.49861145019531,
|
|
"logps/rejected": -125.54408264160156,
|
|
"loss": 0.4218,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15651409327983856,
|
|
"margin_dpo/beta_margin_grad_std": 0.19872474670410156,
|
|
"margin_dpo/beta_margin_mean": 2.9513981342315674,
|
|
"margin_dpo/beta_margin_std": 2.5280110836029053,
|
|
"margin_dpo/loss_margin_mean": 29.513980865478516,
|
|
"margin_dpo/margin_mean": 29.513980865478516,
|
|
"margin_dpo/margin_std": 25.25749969482422,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 0.6387665198237885,
|
|
"grad_norm": 63.86077117919922,
|
|
"learning_rate": 1.7543576401928218e-07,
|
|
"logits/chosen": -0.6548939943313599,
|
|
"logits/rejected": -0.6191599369049072,
|
|
"logps/chosen": -69.592041015625,
|
|
"logps/ref_chosen": -51.502052307128906,
|
|
"logps/ref_rejected": -87.56689453125,
|
|
"logps/rejected": -138.00416564941406,
|
|
"loss": 0.5053,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1661788821220398,
|
|
"margin_dpo/beta_margin_grad_std": 0.21013152599334717,
|
|
"margin_dpo/beta_margin_mean": 3.234729051589966,
|
|
"margin_dpo/beta_margin_std": 2.9603843688964844,
|
|
"margin_dpo/loss_margin_mean": 32.3472900390625,
|
|
"margin_dpo/margin_mean": 32.3472900390625,
|
|
"margin_dpo/margin_std": 29.455238342285156,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.6402349486049926,
|
|
"grad_norm": 40.332698822021484,
|
|
"learning_rate": 1.742118314717391e-07,
|
|
"logits/chosen": -0.632080078125,
|
|
"logits/rejected": -0.5719594955444336,
|
|
"logps/chosen": -88.88678741455078,
|
|
"logps/ref_chosen": -71.40371704101562,
|
|
"logps/ref_rejected": -82.72775268554688,
|
|
"logps/rejected": -131.7387237548828,
|
|
"loss": 0.3539,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13681164383888245,
|
|
"margin_dpo/beta_margin_grad_std": 0.17888766527175903,
|
|
"margin_dpo/beta_margin_mean": 3.152789354324341,
|
|
"margin_dpo/beta_margin_std": 2.4287147521972656,
|
|
"margin_dpo/loss_margin_mean": 31.527891159057617,
|
|
"margin_dpo/margin_mean": 31.527891159057617,
|
|
"margin_dpo/margin_std": 24.248245239257812,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 0.6417033773861968,
|
|
"grad_norm": 51.00373840332031,
|
|
"learning_rate": 1.7298989601292036e-07,
|
|
"logits/chosen": -0.6353539228439331,
|
|
"logits/rejected": -0.5929083824157715,
|
|
"logps/chosen": -81.99353790283203,
|
|
"logps/ref_chosen": -64.7442626953125,
|
|
"logps/ref_rejected": -82.04356384277344,
|
|
"logps/rejected": -127.4609375,
|
|
"loss": 0.5269,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1786879152059555,
|
|
"margin_dpo/beta_margin_grad_std": 0.23251357674598694,
|
|
"margin_dpo/beta_margin_mean": 2.81680965423584,
|
|
"margin_dpo/beta_margin_std": 2.37345027923584,
|
|
"margin_dpo/loss_margin_mean": 28.168094635009766,
|
|
"margin_dpo/margin_mean": 28.168094635009766,
|
|
"margin_dpo/margin_std": 23.416202545166016,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 0.6431718061674009,
|
|
"grad_norm": 63.38606643676758,
|
|
"learning_rate": 1.7176998984196144e-07,
|
|
"logits/chosen": -0.6576756238937378,
|
|
"logits/rejected": -0.5832280516624451,
|
|
"logps/chosen": -78.18193817138672,
|
|
"logps/ref_chosen": -59.0186653137207,
|
|
"logps/ref_rejected": -83.07682037353516,
|
|
"logps/rejected": -136.60678100585938,
|
|
"loss": 0.3695,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13736094534397125,
|
|
"margin_dpo/beta_margin_grad_std": 0.18339543044567108,
|
|
"margin_dpo/beta_margin_mean": 3.4366683959960938,
|
|
"margin_dpo/beta_margin_std": 2.721860408782959,
|
|
"margin_dpo/loss_margin_mean": 34.36668395996094,
|
|
"margin_dpo/margin_mean": 34.36668395996094,
|
|
"margin_dpo/margin_std": 26.956180572509766,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 0.644640234948605,
|
|
"grad_norm": 71.34723663330078,
|
|
"learning_rate": 1.7055214510452458e-07,
|
|
"logits/chosen": -0.6156207323074341,
|
|
"logits/rejected": -0.5937438607215881,
|
|
"logps/chosen": -77.27565002441406,
|
|
"logps/ref_chosen": -53.784080505371094,
|
|
"logps/ref_rejected": -83.98545837402344,
|
|
"logps/rejected": -134.45162963867188,
|
|
"loss": 0.5261,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18373528122901917,
|
|
"margin_dpo/beta_margin_grad_std": 0.2151244729757309,
|
|
"margin_dpo/beta_margin_mean": 2.6974589824676514,
|
|
"margin_dpo/beta_margin_std": 2.4870941638946533,
|
|
"margin_dpo/loss_margin_mean": 26.97458839416504,
|
|
"margin_dpo/margin_mean": 26.974590301513672,
|
|
"margin_dpo/margin_std": 23.787738800048828,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 0.6461086637298091,
|
|
"grad_norm": 96.4582290649414,
|
|
"learning_rate": 1.6933639389195134e-07,
|
|
"logits/chosen": -0.6607520580291748,
|
|
"logits/rejected": -0.6199520826339722,
|
|
"logps/chosen": -96.89436340332031,
|
|
"logps/ref_chosen": -78.56671905517578,
|
|
"logps/ref_rejected": -96.49775695800781,
|
|
"logps/rejected": -140.70578002929688,
|
|
"loss": 0.6669,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2098814845085144,
|
|
"margin_dpo/beta_margin_grad_std": 0.25330764055252075,
|
|
"margin_dpo/beta_margin_mean": 2.5880370140075684,
|
|
"margin_dpo/beta_margin_std": 2.716387987136841,
|
|
"margin_dpo/loss_margin_mean": 25.880369186401367,
|
|
"margin_dpo/margin_mean": 25.880369186401367,
|
|
"margin_dpo/margin_std": 27.07331085205078,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.6475770925110133,
|
|
"grad_norm": 49.82929229736328,
|
|
"learning_rate": 1.681227682404166e-07,
|
|
"logits/chosen": -0.5963351726531982,
|
|
"logits/rejected": -0.5610902309417725,
|
|
"logps/chosen": -80.72434997558594,
|
|
"logps/ref_chosen": -60.824440002441406,
|
|
"logps/ref_rejected": -96.47080993652344,
|
|
"logps/rejected": -147.17962646484375,
|
|
"loss": 0.4379,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13756851851940155,
|
|
"margin_dpo/beta_margin_grad_std": 0.19719012081623077,
|
|
"margin_dpo/beta_margin_mean": 3.080892562866211,
|
|
"margin_dpo/beta_margin_std": 2.426534414291382,
|
|
"margin_dpo/loss_margin_mean": 30.808923721313477,
|
|
"margin_dpo/margin_mean": 30.808923721313477,
|
|
"margin_dpo/margin_std": 23.68011474609375,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 0.6490455212922174,
|
|
"grad_norm": 36.576942443847656,
|
|
"learning_rate": 1.669113001300851e-07,
|
|
"logits/chosen": -0.6140519380569458,
|
|
"logits/rejected": -0.5783543586730957,
|
|
"logps/chosen": -64.97787475585938,
|
|
"logps/ref_chosen": -47.01121520996094,
|
|
"logps/ref_rejected": -76.53926086425781,
|
|
"logps/rejected": -132.34170532226562,
|
|
"loss": 0.2823,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1093648299574852,
|
|
"margin_dpo/beta_margin_grad_std": 0.16080023348331451,
|
|
"margin_dpo/beta_margin_mean": 3.7835774421691895,
|
|
"margin_dpo/beta_margin_std": 2.697312593460083,
|
|
"margin_dpo/loss_margin_mean": 37.83577346801758,
|
|
"margin_dpo/margin_mean": 37.83577346801758,
|
|
"margin_dpo/margin_std": 26.404239654541016,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 0.6505139500734214,
|
|
"grad_norm": 79.94059753417969,
|
|
"learning_rate": 1.6570202148426815e-07,
|
|
"logits/chosen": -0.6004323959350586,
|
|
"logits/rejected": -0.5627496242523193,
|
|
"logps/chosen": -93.62142944335938,
|
|
"logps/ref_chosen": -71.27301788330078,
|
|
"logps/ref_rejected": -86.679931640625,
|
|
"logps/rejected": -137.5754852294922,
|
|
"loss": 0.6573,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.20045503973960876,
|
|
"margin_dpo/beta_margin_grad_std": 0.263896644115448,
|
|
"margin_dpo/beta_margin_mean": 2.8547141551971436,
|
|
"margin_dpo/beta_margin_std": 2.7842366695404053,
|
|
"margin_dpo/loss_margin_mean": 28.54714012145996,
|
|
"margin_dpo/margin_mean": 28.54714012145996,
|
|
"margin_dpo/margin_std": 27.68130111694336,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 0.6519823788546255,
|
|
"grad_norm": 47.294471740722656,
|
|
"learning_rate": 1.6449496416858282e-07,
|
|
"logits/chosen": -0.5860676169395447,
|
|
"logits/rejected": -0.5605667233467102,
|
|
"logps/chosen": -76.857421875,
|
|
"logps/ref_chosen": -57.213706970214844,
|
|
"logps/ref_rejected": -97.25489044189453,
|
|
"logps/rejected": -151.163330078125,
|
|
"loss": 0.4389,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14946991205215454,
|
|
"margin_dpo/beta_margin_grad_std": 0.21209140121936798,
|
|
"margin_dpo/beta_margin_mean": 3.4264724254608154,
|
|
"margin_dpo/beta_margin_std": 2.8675122261047363,
|
|
"margin_dpo/loss_margin_mean": 34.26472091674805,
|
|
"margin_dpo/margin_mean": 34.26472473144531,
|
|
"margin_dpo/margin_std": 28.598800659179688,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 0.6534508076358296,
|
|
"grad_norm": 61.75363540649414,
|
|
"learning_rate": 1.6329015999011182e-07,
|
|
"logits/chosen": -0.6285964250564575,
|
|
"logits/rejected": -0.5963205695152283,
|
|
"logps/chosen": -84.33077239990234,
|
|
"logps/ref_chosen": -67.29979705810547,
|
|
"logps/ref_rejected": -92.68267822265625,
|
|
"logps/rejected": -141.63113403320312,
|
|
"loss": 0.4624,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16633237898349762,
|
|
"margin_dpo/beta_margin_grad_std": 0.21091465651988983,
|
|
"margin_dpo/beta_margin_mean": 3.1917476654052734,
|
|
"margin_dpo/beta_margin_std": 2.7972888946533203,
|
|
"margin_dpo/loss_margin_mean": 31.917476654052734,
|
|
"margin_dpo/margin_mean": 31.917476654052734,
|
|
"margin_dpo/margin_std": 27.65774154663086,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.6549192364170338,
|
|
"grad_norm": 54.28517532348633,
|
|
"learning_rate": 1.6208764069656578e-07,
|
|
"logits/chosen": -0.5897877216339111,
|
|
"logits/rejected": -0.568926215171814,
|
|
"logps/chosen": -76.78812408447266,
|
|
"logps/ref_chosen": -59.098487854003906,
|
|
"logps/ref_rejected": -101.26419067382812,
|
|
"logps/rejected": -149.1267852783203,
|
|
"loss": 0.4368,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16494978964328766,
|
|
"margin_dpo/beta_margin_grad_std": 0.1965719312429428,
|
|
"margin_dpo/beta_margin_mean": 3.0172958374023438,
|
|
"margin_dpo/beta_margin_std": 2.6881942749023438,
|
|
"margin_dpo/loss_margin_mean": 30.172958374023438,
|
|
"margin_dpo/margin_mean": 30.172958374023438,
|
|
"margin_dpo/margin_std": 26.31899070739746,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 0.6563876651982379,
|
|
"grad_norm": 51.364315032958984,
|
|
"learning_rate": 1.608874379754465e-07,
|
|
"logits/chosen": -0.660834014415741,
|
|
"logits/rejected": -0.6618390083312988,
|
|
"logps/chosen": -76.43832397460938,
|
|
"logps/ref_chosen": -56.07533264160156,
|
|
"logps/ref_rejected": -98.69475555419922,
|
|
"logps/rejected": -150.78875732421875,
|
|
"loss": 0.4538,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16636352241039276,
|
|
"margin_dpo/beta_margin_grad_std": 0.20971129834651947,
|
|
"margin_dpo/beta_margin_mean": 3.1731016635894775,
|
|
"margin_dpo/beta_margin_std": 2.897716760635376,
|
|
"margin_dpo/loss_margin_mean": 31.731016159057617,
|
|
"margin_dpo/margin_mean": 31.73101806640625,
|
|
"margin_dpo/margin_std": 28.281917572021484,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 0.657856093979442,
|
|
"grad_norm": 47.65716552734375,
|
|
"learning_rate": 1.5968958345321177e-07,
|
|
"logits/chosen": -0.6168828010559082,
|
|
"logits/rejected": -0.600253701210022,
|
|
"logps/chosen": -80.88053131103516,
|
|
"logps/ref_chosen": -60.00384521484375,
|
|
"logps/ref_rejected": -102.26465606689453,
|
|
"logps/rejected": -155.2429962158203,
|
|
"loss": 0.3892,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13886789977550507,
|
|
"margin_dpo/beta_margin_grad_std": 0.18517683446407318,
|
|
"margin_dpo/beta_margin_mean": 3.2101657390594482,
|
|
"margin_dpo/beta_margin_std": 2.5543386936187744,
|
|
"margin_dpo/loss_margin_mean": 32.101654052734375,
|
|
"margin_dpo/margin_mean": 32.101654052734375,
|
|
"margin_dpo/margin_std": 25.43906021118164,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 0.6593245227606461,
|
|
"grad_norm": 79.98429107666016,
|
|
"learning_rate": 1.584941086944423e-07,
|
|
"logits/chosen": -0.5817546248435974,
|
|
"logits/rejected": -0.5362948179244995,
|
|
"logps/chosen": -89.62152099609375,
|
|
"logps/ref_chosen": -67.52661895751953,
|
|
"logps/ref_rejected": -88.59690856933594,
|
|
"logps/rejected": -142.1068878173828,
|
|
"loss": 0.6043,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.17372801899909973,
|
|
"margin_dpo/beta_margin_grad_std": 0.23874573409557343,
|
|
"margin_dpo/beta_margin_mean": 3.141507387161255,
|
|
"margin_dpo/beta_margin_std": 3.0401062965393066,
|
|
"margin_dpo/loss_margin_mean": 31.41507339477539,
|
|
"margin_dpo/margin_mean": 31.41507339477539,
|
|
"margin_dpo/margin_std": 30.071718215942383,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 0.6607929515418502,
|
|
"grad_norm": 44.39156723022461,
|
|
"learning_rate": 1.573010452010098e-07,
|
|
"logits/chosen": -0.6516839265823364,
|
|
"logits/rejected": -0.6243829727172852,
|
|
"logps/chosen": -73.27051544189453,
|
|
"logps/ref_chosen": -57.108116149902344,
|
|
"logps/ref_rejected": -102.75494384765625,
|
|
"logps/rejected": -153.4552459716797,
|
|
"loss": 0.3207,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12840278446674347,
|
|
"margin_dpo/beta_margin_grad_std": 0.1644502729177475,
|
|
"margin_dpo/beta_margin_mean": 3.4537904262542725,
|
|
"margin_dpo/beta_margin_std": 2.6313655376434326,
|
|
"margin_dpo/loss_margin_mean": 34.53790283203125,
|
|
"margin_dpo/margin_mean": 34.53790283203125,
|
|
"margin_dpo/margin_std": 25.840599060058594,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.6622613803230544,
|
|
"grad_norm": 75.2901382446289,
|
|
"learning_rate": 1.5611042441124687e-07,
|
|
"logits/chosen": -0.6581634283065796,
|
|
"logits/rejected": -0.6103047132492065,
|
|
"logps/chosen": -80.07470703125,
|
|
"logps/ref_chosen": -58.46883010864258,
|
|
"logps/ref_rejected": -72.92941284179688,
|
|
"logps/rejected": -124.00057983398438,
|
|
"loss": 0.5537,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16929282248020172,
|
|
"margin_dpo/beta_margin_grad_std": 0.2288813591003418,
|
|
"margin_dpo/beta_margin_mean": 2.9465293884277344,
|
|
"margin_dpo/beta_margin_std": 2.6042659282684326,
|
|
"margin_dpo/loss_margin_mean": 29.465293884277344,
|
|
"margin_dpo/margin_mean": 29.465293884277344,
|
|
"margin_dpo/margin_std": 25.818279266357422,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 0.6637298091042585,
|
|
"grad_norm": 35.9453239440918,
|
|
"learning_rate": 1.549222776991186e-07,
|
|
"logits/chosen": -0.546400785446167,
|
|
"logits/rejected": -0.5479906797409058,
|
|
"logps/chosen": -66.35121154785156,
|
|
"logps/ref_chosen": -50.39055252075195,
|
|
"logps/ref_rejected": -97.77143096923828,
|
|
"logps/rejected": -143.86688232421875,
|
|
"loss": 0.2857,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12084120512008667,
|
|
"margin_dpo/beta_margin_grad_std": 0.13351190090179443,
|
|
"margin_dpo/beta_margin_mean": 3.0134785175323486,
|
|
"margin_dpo/beta_margin_std": 2.2046637535095215,
|
|
"margin_dpo/loss_margin_mean": 30.134784698486328,
|
|
"margin_dpo/margin_mean": 30.134784698486328,
|
|
"margin_dpo/margin_std": 21.948862075805664,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 0.6651982378854625,
|
|
"grad_norm": 51.65986633300781,
|
|
"learning_rate": 1.5373663637339584e-07,
|
|
"logits/chosen": -0.6441305875778198,
|
|
"logits/rejected": -0.5928350687026978,
|
|
"logps/chosen": -76.96781921386719,
|
|
"logps/ref_chosen": -57.71485137939453,
|
|
"logps/ref_rejected": -82.20741271972656,
|
|
"logps/rejected": -130.54562377929688,
|
|
"loss": 0.4664,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16998730599880219,
|
|
"margin_dpo/beta_margin_grad_std": 0.2013465166091919,
|
|
"margin_dpo/beta_margin_mean": 2.90852427482605,
|
|
"margin_dpo/beta_margin_std": 2.558598279953003,
|
|
"margin_dpo/loss_margin_mean": 29.085243225097656,
|
|
"margin_dpo/margin_mean": 29.085243225097656,
|
|
"margin_dpo/margin_std": 25.423097610473633,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 0.6666666666666666,
|
|
"grad_norm": 59.35947036743164,
|
|
"learning_rate": 1.5255353167683017e-07,
|
|
"logits/chosen": -0.6171283721923828,
|
|
"logits/rejected": -0.5738873481750488,
|
|
"logps/chosen": -81.52304077148438,
|
|
"logps/ref_chosen": -60.945648193359375,
|
|
"logps/ref_rejected": -84.9507827758789,
|
|
"logps/rejected": -137.84750366210938,
|
|
"loss": 0.4047,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14290541410446167,
|
|
"margin_dpo/beta_margin_grad_std": 0.2013457864522934,
|
|
"margin_dpo/beta_margin_mean": 3.2319328784942627,
|
|
"margin_dpo/beta_margin_std": 2.6018524169921875,
|
|
"margin_dpo/loss_margin_mean": 32.31932830810547,
|
|
"margin_dpo/margin_mean": 32.31932830810547,
|
|
"margin_dpo/margin_std": 25.902687072753906,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 0.6681350954478708,
|
|
"grad_norm": 93.24932861328125,
|
|
"learning_rate": 1.5137299478533064e-07,
|
|
"logits/chosen": -0.6162554621696472,
|
|
"logits/rejected": -0.5891969203948975,
|
|
"logps/chosen": -64.90336608886719,
|
|
"logps/ref_chosen": -44.88671112060547,
|
|
"logps/ref_rejected": -115.30147552490234,
|
|
"logps/rejected": -172.52194213867188,
|
|
"loss": 0.3629,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1229911670088768,
|
|
"margin_dpo/beta_margin_grad_std": 0.19356586039066315,
|
|
"margin_dpo/beta_margin_mean": 3.7203803062438965,
|
|
"margin_dpo/beta_margin_std": 2.6521599292755127,
|
|
"margin_dpo/loss_margin_mean": 37.203800201416016,
|
|
"margin_dpo/margin_mean": 37.203800201416016,
|
|
"margin_dpo/margin_std": 26.29052734375,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.6696035242290749,
|
|
"grad_norm": 49.41230010986328,
|
|
"learning_rate": 1.5019505680714232e-07,
|
|
"logits/chosen": -0.6331825256347656,
|
|
"logits/rejected": -0.6310149431228638,
|
|
"logps/chosen": -74.30551147460938,
|
|
"logps/ref_chosen": -57.036781311035156,
|
|
"logps/ref_rejected": -105.21783447265625,
|
|
"logps/rejected": -160.00119018554688,
|
|
"loss": 0.354,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13223397731781006,
|
|
"margin_dpo/beta_margin_grad_std": 0.1870342195034027,
|
|
"margin_dpo/beta_margin_mean": 3.751462936401367,
|
|
"margin_dpo/beta_margin_std": 2.8913846015930176,
|
|
"margin_dpo/loss_margin_mean": 37.51462936401367,
|
|
"margin_dpo/margin_mean": 37.51462936401367,
|
|
"margin_dpo/margin_std": 28.42435073852539,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 0.671071953010279,
|
|
"grad_norm": 59.397212982177734,
|
|
"learning_rate": 1.4901974878202627e-07,
|
|
"logits/chosen": -0.6320329308509827,
|
|
"logits/rejected": -0.6049121618270874,
|
|
"logps/chosen": -72.51710510253906,
|
|
"logps/ref_chosen": -54.24253845214844,
|
|
"logps/ref_rejected": -85.10956573486328,
|
|
"logps/rejected": -136.43548583984375,
|
|
"loss": 0.386,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13050609827041626,
|
|
"margin_dpo/beta_margin_grad_std": 0.19707661867141724,
|
|
"margin_dpo/beta_margin_mean": 3.3051366806030273,
|
|
"margin_dpo/beta_margin_std": 2.4597647190093994,
|
|
"margin_dpo/loss_margin_mean": 33.051368713378906,
|
|
"margin_dpo/margin_mean": 33.051368713378906,
|
|
"margin_dpo/margin_std": 24.472869873046875,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 0.6725403817914831,
|
|
"grad_norm": 56.77046585083008,
|
|
"learning_rate": 1.4784710168044212e-07,
|
|
"logits/chosen": -0.6297258138656616,
|
|
"logits/rejected": -0.5929204225540161,
|
|
"logps/chosen": -74.71857452392578,
|
|
"logps/ref_chosen": -55.40888214111328,
|
|
"logps/ref_rejected": -97.68325805664062,
|
|
"logps/rejected": -155.025146484375,
|
|
"loss": 0.4411,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14024901390075684,
|
|
"margin_dpo/beta_margin_grad_std": 0.23043015599250793,
|
|
"margin_dpo/beta_margin_mean": 3.8032190799713135,
|
|
"margin_dpo/beta_margin_std": 3.263706922531128,
|
|
"margin_dpo/loss_margin_mean": 38.03219223022461,
|
|
"margin_dpo/margin_mean": 38.03219223022461,
|
|
"margin_dpo/margin_std": 32.41196060180664,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 0.6740088105726872,
|
|
"grad_norm": 47.203277587890625,
|
|
"learning_rate": 1.466771464027316e-07,
|
|
"logits/chosen": -0.592144250869751,
|
|
"logits/rejected": -0.5651764869689941,
|
|
"logps/chosen": -67.03455352783203,
|
|
"logps/ref_chosen": -46.55748748779297,
|
|
"logps/ref_rejected": -86.16854095458984,
|
|
"logps/rejected": -135.55999755859375,
|
|
"loss": 0.4592,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16323688626289368,
|
|
"margin_dpo/beta_margin_grad_std": 0.19608436524868011,
|
|
"margin_dpo/beta_margin_mean": 2.8914406299591064,
|
|
"margin_dpo/beta_margin_std": 2.370115280151367,
|
|
"margin_dpo/loss_margin_mean": 28.914405822753906,
|
|
"margin_dpo/margin_mean": 28.914405822753906,
|
|
"margin_dpo/margin_std": 23.49092674255371,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 0.6754772393538914,
|
|
"grad_norm": 59.67298126220703,
|
|
"learning_rate": 1.4550991377830423e-07,
|
|
"logits/chosen": -0.5806307792663574,
|
|
"logits/rejected": -0.5847660303115845,
|
|
"logps/chosen": -70.59028625488281,
|
|
"logps/ref_chosen": -51.63489532470703,
|
|
"logps/ref_rejected": -104.11935424804688,
|
|
"logps/rejected": -155.63986206054688,
|
|
"loss": 0.4209,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15195363759994507,
|
|
"margin_dpo/beta_margin_grad_std": 0.20993934571743011,
|
|
"margin_dpo/beta_margin_mean": 3.25651216506958,
|
|
"margin_dpo/beta_margin_std": 2.5778074264526367,
|
|
"margin_dpo/loss_margin_mean": 32.565120697021484,
|
|
"margin_dpo/margin_mean": 32.565120697021484,
|
|
"margin_dpo/margin_std": 25.7642879486084,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.6769456681350955,
|
|
"grad_norm": 59.93415069580078,
|
|
"learning_rate": 1.4434543456482518e-07,
|
|
"logits/chosen": -0.5920594930648804,
|
|
"logits/rejected": -0.5768572688102722,
|
|
"logps/chosen": -79.71414184570312,
|
|
"logps/ref_chosen": -55.18195343017578,
|
|
"logps/ref_rejected": -86.47689819335938,
|
|
"logps/rejected": -138.8244171142578,
|
|
"loss": 0.5473,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18966011703014374,
|
|
"margin_dpo/beta_margin_grad_std": 0.22385801374912262,
|
|
"margin_dpo/beta_margin_mean": 2.7815327644348145,
|
|
"margin_dpo/beta_margin_std": 2.8518621921539307,
|
|
"margin_dpo/loss_margin_mean": 27.815326690673828,
|
|
"margin_dpo/margin_mean": 27.815326690673828,
|
|
"margin_dpo/margin_std": 27.13003921508789,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 0.6784140969162996,
|
|
"grad_norm": 64.90670776367188,
|
|
"learning_rate": 1.4318373944740484e-07,
|
|
"logits/chosen": -0.6181149482727051,
|
|
"logits/rejected": -0.5787901878356934,
|
|
"logps/chosen": -93.2876968383789,
|
|
"logps/ref_chosen": -69.92803955078125,
|
|
"logps/ref_rejected": -78.84111785888672,
|
|
"logps/rejected": -129.06378173828125,
|
|
"loss": 0.554,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1927950084209442,
|
|
"margin_dpo/beta_margin_grad_std": 0.22063319385051727,
|
|
"margin_dpo/beta_margin_mean": 2.6862998008728027,
|
|
"margin_dpo/beta_margin_std": 2.5792369842529297,
|
|
"margin_dpo/loss_margin_mean": 26.86299705505371,
|
|
"margin_dpo/margin_mean": 26.862995147705078,
|
|
"margin_dpo/margin_std": 25.538467407226562,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 0.6798825256975036,
|
|
"grad_norm": 50.19252014160156,
|
|
"learning_rate": 1.4202485903778976e-07,
|
|
"logits/chosen": -0.6169182062149048,
|
|
"logits/rejected": -0.5887913703918457,
|
|
"logps/chosen": -75.74092864990234,
|
|
"logps/ref_chosen": -55.27437210083008,
|
|
"logps/ref_rejected": -89.02497863769531,
|
|
"logps/rejected": -143.4207763671875,
|
|
"loss": 0.3546,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12193028628826141,
|
|
"margin_dpo/beta_margin_grad_std": 0.19042545557022095,
|
|
"margin_dpo/beta_margin_mean": 3.392923593521118,
|
|
"margin_dpo/beta_margin_std": 2.4047834873199463,
|
|
"margin_dpo/loss_margin_mean": 33.929237365722656,
|
|
"margin_dpo/margin_mean": 33.929237365722656,
|
|
"margin_dpo/margin_std": 23.769535064697266,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 0.6813509544787077,
|
|
"grad_norm": 54.16157531738281,
|
|
"learning_rate": 1.4086882387355658e-07,
|
|
"logits/chosen": -0.6251201629638672,
|
|
"logits/rejected": -0.6308864951133728,
|
|
"logps/chosen": -73.30712890625,
|
|
"logps/ref_chosen": -50.91230010986328,
|
|
"logps/ref_rejected": -102.4893798828125,
|
|
"logps/rejected": -159.47793579101562,
|
|
"loss": 0.4531,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14893580973148346,
|
|
"margin_dpo/beta_margin_grad_std": 0.20893022418022156,
|
|
"margin_dpo/beta_margin_mean": 3.4593725204467773,
|
|
"margin_dpo/beta_margin_std": 2.9905498027801514,
|
|
"margin_dpo/loss_margin_mean": 34.593727111816406,
|
|
"margin_dpo/margin_mean": 34.593727111816406,
|
|
"margin_dpo/margin_std": 29.88116455078125,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 0.6828193832599119,
|
|
"grad_norm": 50.176815032958984,
|
|
"learning_rate": 1.3971566441730714e-07,
|
|
"logits/chosen": -0.6043756008148193,
|
|
"logits/rejected": -0.5841087102890015,
|
|
"logps/chosen": -81.1992416381836,
|
|
"logps/ref_chosen": -60.116851806640625,
|
|
"logps/ref_rejected": -113.94602966308594,
|
|
"logps/rejected": -172.650634765625,
|
|
"loss": 0.2808,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.10672765225172043,
|
|
"margin_dpo/beta_margin_grad_std": 0.16949497163295746,
|
|
"margin_dpo/beta_margin_mean": 3.7622225284576416,
|
|
"margin_dpo/beta_margin_std": 2.543063163757324,
|
|
"margin_dpo/loss_margin_mean": 37.622222900390625,
|
|
"margin_dpo/margin_mean": 37.622222900390625,
|
|
"margin_dpo/margin_std": 25.363601684570312,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.684287812041116,
|
|
"grad_norm": 57.16488265991211,
|
|
"learning_rate": 1.3856541105586545e-07,
|
|
"logits/chosen": -0.6066223382949829,
|
|
"logits/rejected": -0.5759164094924927,
|
|
"logps/chosen": -75.47810363769531,
|
|
"logps/ref_chosen": -52.920921325683594,
|
|
"logps/ref_rejected": -90.3154296875,
|
|
"logps/rejected": -146.87469482421875,
|
|
"loss": 0.3827,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12489843368530273,
|
|
"margin_dpo/beta_margin_grad_std": 0.20490986108779907,
|
|
"margin_dpo/beta_margin_mean": 3.4002089500427246,
|
|
"margin_dpo/beta_margin_std": 2.3887243270874023,
|
|
"margin_dpo/loss_margin_mean": 34.00209045410156,
|
|
"margin_dpo/margin_mean": 34.00209045410156,
|
|
"margin_dpo/margin_std": 23.383773803710938,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 0.6857562408223201,
|
|
"grad_norm": 46.38023376464844,
|
|
"learning_rate": 1.3741809409947729e-07,
|
|
"logits/chosen": -0.6104651689529419,
|
|
"logits/rejected": -0.5786043405532837,
|
|
"logps/chosen": -101.92547607421875,
|
|
"logps/ref_chosen": -78.7158203125,
|
|
"logps/ref_rejected": -102.86019897460938,
|
|
"logps/rejected": -160.5396270751953,
|
|
"loss": 0.3729,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1380797028541565,
|
|
"margin_dpo/beta_margin_grad_std": 0.1899155229330063,
|
|
"margin_dpo/beta_margin_mean": 3.4469778537750244,
|
|
"margin_dpo/beta_margin_std": 2.8059871196746826,
|
|
"margin_dpo/loss_margin_mean": 34.46977615356445,
|
|
"margin_dpo/margin_mean": 34.46977996826172,
|
|
"margin_dpo/margin_std": 27.862186431884766,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 0.6872246696035242,
|
|
"grad_norm": 55.537410736083984,
|
|
"learning_rate": 1.362737437810114e-07,
|
|
"logits/chosen": -0.6192047595977783,
|
|
"logits/rejected": -0.5922250747680664,
|
|
"logps/chosen": -89.64823913574219,
|
|
"logps/ref_chosen": -69.93536376953125,
|
|
"logps/ref_rejected": -101.02881622314453,
|
|
"logps/rejected": -152.7930450439453,
|
|
"loss": 0.3886,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1464032083749771,
|
|
"margin_dpo/beta_margin_grad_std": 0.18942488729953766,
|
|
"margin_dpo/beta_margin_mean": 3.2051353454589844,
|
|
"margin_dpo/beta_margin_std": 2.6790554523468018,
|
|
"margin_dpo/loss_margin_mean": 32.051353454589844,
|
|
"margin_dpo/margin_mean": 32.051353454589844,
|
|
"margin_dpo/margin_std": 26.76758575439453,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 0.6886930983847284,
|
|
"grad_norm": 57.15333938598633,
|
|
"learning_rate": 1.351323902551631e-07,
|
|
"logits/chosen": -0.6003662347793579,
|
|
"logits/rejected": -0.5658551454544067,
|
|
"logps/chosen": -91.19867706298828,
|
|
"logps/ref_chosen": -68.12469482421875,
|
|
"logps/ref_rejected": -104.78640747070312,
|
|
"logps/rejected": -161.0380401611328,
|
|
"loss": 0.4299,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14955471456050873,
|
|
"margin_dpo/beta_margin_grad_std": 0.21639080345630646,
|
|
"margin_dpo/beta_margin_mean": 3.317765474319458,
|
|
"margin_dpo/beta_margin_std": 2.7324230670928955,
|
|
"margin_dpo/loss_margin_mean": 33.17765426635742,
|
|
"margin_dpo/margin_mean": 33.17765426635742,
|
|
"margin_dpo/margin_std": 27.252918243408203,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 0.6901615271659325,
|
|
"grad_norm": 41.5388298034668,
|
|
"learning_rate": 1.339940635976592e-07,
|
|
"logits/chosen": -0.5871062278747559,
|
|
"logits/rejected": -0.5616201162338257,
|
|
"logps/chosen": -64.00105285644531,
|
|
"logps/ref_chosen": -43.79193115234375,
|
|
"logps/ref_rejected": -82.70285034179688,
|
|
"logps/rejected": -141.2603759765625,
|
|
"loss": 0.2368,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.09441064298152924,
|
|
"margin_dpo/beta_margin_grad_std": 0.15031108260154724,
|
|
"margin_dpo/beta_margin_mean": 3.8348402976989746,
|
|
"margin_dpo/beta_margin_std": 2.4011597633361816,
|
|
"margin_dpo/loss_margin_mean": 38.3484001159668,
|
|
"margin_dpo/margin_mean": 38.34840393066406,
|
|
"margin_dpo/margin_std": 23.939483642578125,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.6916299559471366,
|
|
"grad_norm": 54.3143310546875,
|
|
"learning_rate": 1.3285879380446563e-07,
|
|
"logits/chosen": -0.5919795036315918,
|
|
"logits/rejected": -0.5648236870765686,
|
|
"logps/chosen": -87.58413696289062,
|
|
"logps/ref_chosen": -63.33952331542969,
|
|
"logps/ref_rejected": -83.61048126220703,
|
|
"logps/rejected": -139.228271484375,
|
|
"loss": 0.4208,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1538044661283493,
|
|
"margin_dpo/beta_margin_grad_std": 0.2021295428276062,
|
|
"margin_dpo/beta_margin_mean": 3.1373167037963867,
|
|
"margin_dpo/beta_margin_std": 2.457742929458618,
|
|
"margin_dpo/loss_margin_mean": 31.373167037963867,
|
|
"margin_dpo/margin_mean": 31.3731689453125,
|
|
"margin_dpo/margin_std": 24.42245101928711,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 0.6930983847283406,
|
|
"grad_norm": 50.913185119628906,
|
|
"learning_rate": 1.317266107909975e-07,
|
|
"logits/chosen": -0.6416307687759399,
|
|
"logits/rejected": -0.5852631330490112,
|
|
"logps/chosen": -104.90176391601562,
|
|
"logps/ref_chosen": -83.66609954833984,
|
|
"logps/ref_rejected": -117.20919799804688,
|
|
"logps/rejected": -178.68946838378906,
|
|
"loss": 0.3109,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.11783776432275772,
|
|
"margin_dpo/beta_margin_grad_std": 0.1787194162607193,
|
|
"margin_dpo/beta_margin_mean": 4.02446174621582,
|
|
"margin_dpo/beta_margin_std": 3.3216898441314697,
|
|
"margin_dpo/loss_margin_mean": 40.2446174621582,
|
|
"margin_dpo/margin_mean": 40.24461364746094,
|
|
"margin_dpo/margin_std": 33.13086700439453,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 0.6945668135095447,
|
|
"grad_norm": 78.06228637695312,
|
|
"learning_rate": 1.3059754439133002e-07,
|
|
"logits/chosen": -0.5605521202087402,
|
|
"logits/rejected": -0.5147773623466492,
|
|
"logps/chosen": -87.47222900390625,
|
|
"logps/ref_chosen": -63.49696731567383,
|
|
"logps/ref_rejected": -81.14657592773438,
|
|
"logps/rejected": -133.27700805664062,
|
|
"loss": 0.4899,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.17109636962413788,
|
|
"margin_dpo/beta_margin_grad_std": 0.22186963260173798,
|
|
"margin_dpo/beta_margin_mean": 2.8155159950256348,
|
|
"margin_dpo/beta_margin_std": 2.263782501220703,
|
|
"margin_dpo/loss_margin_mean": 28.15515899658203,
|
|
"margin_dpo/margin_mean": 28.15515899658203,
|
|
"margin_dpo/margin_std": 22.536598205566406,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 0.6960352422907489,
|
|
"grad_norm": 60.656185150146484,
|
|
"learning_rate": 1.2947162435741277e-07,
|
|
"logits/chosen": -0.5783928632736206,
|
|
"logits/rejected": -0.5659887790679932,
|
|
"logps/chosen": -76.55195617675781,
|
|
"logps/ref_chosen": -52.6119384765625,
|
|
"logps/ref_rejected": -90.08041381835938,
|
|
"logps/rejected": -144.70611572265625,
|
|
"loss": 0.4737,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16651608049869537,
|
|
"margin_dpo/beta_margin_grad_std": 0.22569791972637177,
|
|
"margin_dpo/beta_margin_mean": 3.0685691833496094,
|
|
"margin_dpo/beta_margin_std": 2.5612540245056152,
|
|
"margin_dpo/loss_margin_mean": 30.68568992614746,
|
|
"margin_dpo/margin_mean": 30.685691833496094,
|
|
"margin_dpo/margin_std": 25.531770706176758,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 0.697503671071953,
|
|
"grad_norm": 43.59295654296875,
|
|
"learning_rate": 1.2834888035828596e-07,
|
|
"logits/chosen": -0.62577223777771,
|
|
"logits/rejected": -0.6225380897521973,
|
|
"logps/chosen": -63.40654754638672,
|
|
"logps/ref_chosen": -42.49519348144531,
|
|
"logps/ref_rejected": -90.06295013427734,
|
|
"logps/rejected": -145.40371704101562,
|
|
"loss": 0.3844,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1439659297466278,
|
|
"margin_dpo/beta_margin_grad_std": 0.19135436415672302,
|
|
"margin_dpo/beta_margin_mean": 3.442941188812256,
|
|
"margin_dpo/beta_margin_std": 3.0452959537506104,
|
|
"margin_dpo/loss_margin_mean": 34.429412841796875,
|
|
"margin_dpo/margin_mean": 34.429412841796875,
|
|
"margin_dpo/margin_std": 30.290939331054688,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.6989720998531571,
|
|
"grad_norm": 69.43315124511719,
|
|
"learning_rate": 1.2722934197929802e-07,
|
|
"logits/chosen": -0.5941322445869446,
|
|
"logits/rejected": -0.5612877607345581,
|
|
"logps/chosen": -64.73588562011719,
|
|
"logps/ref_chosen": -42.949378967285156,
|
|
"logps/ref_rejected": -73.71023559570312,
|
|
"logps/rejected": -125.85054016113281,
|
|
"loss": 0.5114,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.17846056818962097,
|
|
"margin_dpo/beta_margin_grad_std": 0.22891968488693237,
|
|
"margin_dpo/beta_margin_mean": 3.0353803634643555,
|
|
"margin_dpo/beta_margin_std": 2.6748228073120117,
|
|
"margin_dpo/loss_margin_mean": 30.353801727294922,
|
|
"margin_dpo/margin_mean": 30.353801727294922,
|
|
"margin_dpo/margin_std": 26.741519927978516,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 0.7004405286343612,
|
|
"grad_norm": 81.28004455566406,
|
|
"learning_rate": 1.2611303872132631e-07,
|
|
"logits/chosen": -0.6341814994812012,
|
|
"logits/rejected": -0.5662086009979248,
|
|
"logps/chosen": -95.98014831542969,
|
|
"logps/ref_chosen": -70.77261352539062,
|
|
"logps/ref_rejected": -76.13737487792969,
|
|
"logps/rejected": -133.20254516601562,
|
|
"loss": 0.6021,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15644104778766632,
|
|
"margin_dpo/beta_margin_grad_std": 0.24381397664546967,
|
|
"margin_dpo/beta_margin_mean": 3.185762882232666,
|
|
"margin_dpo/beta_margin_std": 2.7865076065063477,
|
|
"margin_dpo/loss_margin_mean": 31.857629776000977,
|
|
"margin_dpo/margin_mean": 31.857627868652344,
|
|
"margin_dpo/margin_std": 27.68490982055664,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 0.7019089574155654,
|
|
"grad_norm": 48.535404205322266,
|
|
"learning_rate": 1.2500000000000005e-07,
|
|
"logits/chosen": -0.5645046234130859,
|
|
"logits/rejected": -0.5495598316192627,
|
|
"logps/chosen": -61.48149871826172,
|
|
"logps/ref_chosen": -41.440513610839844,
|
|
"logps/ref_rejected": -85.36196899414062,
|
|
"logps/rejected": -139.90025329589844,
|
|
"loss": 0.4001,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14866864681243896,
|
|
"margin_dpo/beta_margin_grad_std": 0.18979746103286743,
|
|
"margin_dpo/beta_margin_mean": 3.4497292041778564,
|
|
"margin_dpo/beta_margin_std": 2.9467873573303223,
|
|
"margin_dpo/loss_margin_mean": 34.497291564941406,
|
|
"margin_dpo/margin_mean": 34.497291564941406,
|
|
"margin_dpo/margin_std": 29.08106231689453,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 0.7033773861967695,
|
|
"grad_norm": 56.268714904785156,
|
|
"learning_rate": 1.2389025514492456e-07,
|
|
"logits/chosen": -0.558883786201477,
|
|
"logits/rejected": -0.5508110523223877,
|
|
"logps/chosen": -79.05259704589844,
|
|
"logps/ref_chosen": -53.907920837402344,
|
|
"logps/ref_rejected": -95.1163330078125,
|
|
"logps/rejected": -150.62954711914062,
|
|
"loss": 0.4427,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15836496651172638,
|
|
"margin_dpo/beta_margin_grad_std": 0.21574333310127258,
|
|
"margin_dpo/beta_margin_mean": 3.036853313446045,
|
|
"margin_dpo/beta_margin_std": 2.2986533641815186,
|
|
"margin_dpo/loss_margin_mean": 30.368532180786133,
|
|
"margin_dpo/margin_mean": 30.368532180786133,
|
|
"margin_dpo/margin_std": 22.058135986328125,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 0.7048458149779736,
|
|
"grad_norm": 73.03453826904297,
|
|
"learning_rate": 1.227838333989088e-07,
|
|
"logits/chosen": -0.5816048979759216,
|
|
"logits/rejected": -0.523268461227417,
|
|
"logps/chosen": -84.984619140625,
|
|
"logps/ref_chosen": -58.682701110839844,
|
|
"logps/ref_rejected": -82.93248748779297,
|
|
"logps/rejected": -145.50759887695312,
|
|
"loss": 0.5051,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15481433272361755,
|
|
"margin_dpo/beta_margin_grad_std": 0.23069554567337036,
|
|
"margin_dpo/beta_margin_mean": 3.627319097518921,
|
|
"margin_dpo/beta_margin_std": 3.305205821990967,
|
|
"margin_dpo/loss_margin_mean": 36.273189544677734,
|
|
"margin_dpo/margin_mean": 36.273189544677734,
|
|
"margin_dpo/margin_std": 31.943330764770508,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.7063142437591777,
|
|
"grad_norm": 53.513370513916016,
|
|
"learning_rate": 1.2168076391719489e-07,
|
|
"logits/chosen": -0.6167398691177368,
|
|
"logits/rejected": -0.5804057121276855,
|
|
"logps/chosen": -79.90770721435547,
|
|
"logps/ref_chosen": -54.964271545410156,
|
|
"logps/ref_rejected": -92.42044067382812,
|
|
"logps/rejected": -152.1048583984375,
|
|
"loss": 0.4399,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14174267649650574,
|
|
"margin_dpo/beta_margin_grad_std": 0.2210419625043869,
|
|
"margin_dpo/beta_margin_mean": 3.4740993976593018,
|
|
"margin_dpo/beta_margin_std": 2.713843822479248,
|
|
"margin_dpo/loss_margin_mean": 34.74099349975586,
|
|
"margin_dpo/margin_mean": 34.74099349975586,
|
|
"margin_dpo/margin_std": 26.750259399414062,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 0.7077826725403817,
|
|
"grad_norm": 54.49075698852539,
|
|
"learning_rate": 1.2058107576668938e-07,
|
|
"logits/chosen": -0.5958288908004761,
|
|
"logits/rejected": -0.5650321841239929,
|
|
"logps/chosen": -89.89315795898438,
|
|
"logps/ref_chosen": -67.55347442626953,
|
|
"logps/ref_rejected": -87.58953857421875,
|
|
"logps/rejected": -140.00283813476562,
|
|
"loss": 0.4309,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1658599078655243,
|
|
"margin_dpo/beta_margin_grad_std": 0.1872914731502533,
|
|
"margin_dpo/beta_margin_mean": 3.0073630809783936,
|
|
"margin_dpo/beta_margin_std": 2.6090502738952637,
|
|
"margin_dpo/loss_margin_mean": 30.073631286621094,
|
|
"margin_dpo/margin_mean": 30.073631286621094,
|
|
"margin_dpo/margin_std": 25.875329971313477,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 0.7092511013215859,
|
|
"grad_norm": 65.8294677734375,
|
|
"learning_rate": 1.194847979251979e-07,
|
|
"logits/chosen": -0.6282751560211182,
|
|
"logits/rejected": -0.5697331428527832,
|
|
"logps/chosen": -88.70866394042969,
|
|
"logps/ref_chosen": -63.32981872558594,
|
|
"logps/ref_rejected": -95.78697204589844,
|
|
"logps/rejected": -156.66552734375,
|
|
"loss": 0.3968,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13324548304080963,
|
|
"margin_dpo/beta_margin_grad_std": 0.21172069013118744,
|
|
"margin_dpo/beta_margin_mean": 3.5499703884124756,
|
|
"margin_dpo/beta_margin_std": 2.7894442081451416,
|
|
"margin_dpo/loss_margin_mean": 35.49970245361328,
|
|
"margin_dpo/margin_mean": 35.49970245361328,
|
|
"margin_dpo/margin_std": 27.264251708984375,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 0.71071953010279,
|
|
"grad_norm": 55.688453674316406,
|
|
"learning_rate": 1.1839195928066101e-07,
|
|
"logits/chosen": -0.6670191287994385,
|
|
"logits/rejected": -0.6306544542312622,
|
|
"logps/chosen": -80.87345886230469,
|
|
"logps/ref_chosen": -59.13812255859375,
|
|
"logps/ref_rejected": -84.37144470214844,
|
|
"logps/rejected": -141.7012939453125,
|
|
"loss": 0.3678,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1434181034564972,
|
|
"margin_dpo/beta_margin_grad_std": 0.1813618689775467,
|
|
"margin_dpo/beta_margin_mean": 3.559450626373291,
|
|
"margin_dpo/beta_margin_std": 3.0208792686462402,
|
|
"margin_dpo/loss_margin_mean": 35.594505310058594,
|
|
"margin_dpo/margin_mean": 35.594505310058594,
|
|
"margin_dpo/margin_std": 29.648942947387695,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 0.7121879588839941,
|
|
"grad_norm": 52.00883483886719,
|
|
"learning_rate": 1.1730258863039347e-07,
|
|
"logits/chosen": -0.5754466652870178,
|
|
"logits/rejected": -0.5443192720413208,
|
|
"logps/chosen": -77.70271301269531,
|
|
"logps/ref_chosen": -58.849571228027344,
|
|
"logps/ref_rejected": -103.36408996582031,
|
|
"logps/rejected": -162.49534606933594,
|
|
"loss": 0.4199,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14156897366046906,
|
|
"margin_dpo/beta_margin_grad_std": 0.2183229923248291,
|
|
"margin_dpo/beta_margin_mean": 4.0278120040893555,
|
|
"margin_dpo/beta_margin_std": 3.2175581455230713,
|
|
"margin_dpo/loss_margin_mean": 40.27811813354492,
|
|
"margin_dpo/margin_mean": 40.278114318847656,
|
|
"margin_dpo/margin_std": 32.148040771484375,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.7136563876651982,
|
|
"grad_norm": 66.21233367919922,
|
|
"learning_rate": 1.1621671468032493e-07,
|
|
"logits/chosen": -0.6356394290924072,
|
|
"logits/rejected": -0.5828511714935303,
|
|
"logps/chosen": -77.98770904541016,
|
|
"logps/ref_chosen": -55.25966262817383,
|
|
"logps/ref_rejected": -92.13936614990234,
|
|
"logps/rejected": -153.8128204345703,
|
|
"loss": 0.424,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14394216239452362,
|
|
"margin_dpo/beta_margin_grad_std": 0.21945635974407196,
|
|
"margin_dpo/beta_margin_mean": 3.8945412635803223,
|
|
"margin_dpo/beta_margin_std": 3.0784053802490234,
|
|
"margin_dpo/loss_margin_mean": 38.945411682128906,
|
|
"margin_dpo/margin_mean": 38.945411682128906,
|
|
"margin_dpo/margin_std": 30.78309440612793,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 0.7151248164464024,
|
|
"grad_norm": 49.65977096557617,
|
|
"learning_rate": 1.1513436604424378e-07,
|
|
"logits/chosen": -0.6391937732696533,
|
|
"logits/rejected": -0.604444682598114,
|
|
"logps/chosen": -75.19181060791016,
|
|
"logps/ref_chosen": -53.06330871582031,
|
|
"logps/ref_rejected": -92.4188232421875,
|
|
"logps/rejected": -151.7467041015625,
|
|
"loss": 0.3256,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12563364207744598,
|
|
"margin_dpo/beta_margin_grad_std": 0.17673608660697937,
|
|
"margin_dpo/beta_margin_mean": 3.7199385166168213,
|
|
"margin_dpo/beta_margin_std": 2.6885433197021484,
|
|
"margin_dpo/loss_margin_mean": 37.19938278198242,
|
|
"margin_dpo/margin_mean": 37.19938278198242,
|
|
"margin_dpo/margin_std": 26.274166107177734,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 0.7165932452276065,
|
|
"grad_norm": 32.75376510620117,
|
|
"learning_rate": 1.1405557124304335e-07,
|
|
"logits/chosen": -0.5953603386878967,
|
|
"logits/rejected": -0.563835859298706,
|
|
"logps/chosen": -72.79434204101562,
|
|
"logps/ref_chosen": -52.228153228759766,
|
|
"logps/ref_rejected": -84.00656127929688,
|
|
"logps/rejected": -136.65927124023438,
|
|
"loss": 0.2845,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.11705981194972992,
|
|
"margin_dpo/beta_margin_grad_std": 0.1456899493932724,
|
|
"margin_dpo/beta_margin_mean": 3.208653450012207,
|
|
"margin_dpo/beta_margin_std": 2.13510799407959,
|
|
"margin_dpo/loss_margin_mean": 32.08653259277344,
|
|
"margin_dpo/margin_mean": 32.08653259277344,
|
|
"margin_dpo/margin_std": 21.324390411376953,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 0.7180616740088106,
|
|
"grad_norm": 55.29383850097656,
|
|
"learning_rate": 1.1298035870396985e-07,
|
|
"logits/chosen": -0.5945910215377808,
|
|
"logits/rejected": -0.5459895730018616,
|
|
"logps/chosen": -77.7701416015625,
|
|
"logps/ref_chosen": -55.989627838134766,
|
|
"logps/ref_rejected": -79.39813232421875,
|
|
"logps/rejected": -132.9573516845703,
|
|
"loss": 0.4441,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1625826209783554,
|
|
"margin_dpo/beta_margin_grad_std": 0.21051008999347687,
|
|
"margin_dpo/beta_margin_mean": 3.1778712272644043,
|
|
"margin_dpo/beta_margin_std": 2.766324520111084,
|
|
"margin_dpo/loss_margin_mean": 31.778711318969727,
|
|
"margin_dpo/margin_mean": 31.778709411621094,
|
|
"margin_dpo/margin_std": 27.465885162353516,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 0.7195301027900147,
|
|
"grad_norm": 67.01080322265625,
|
|
"learning_rate": 1.1190875675987355e-07,
|
|
"logits/chosen": -0.613182783126831,
|
|
"logits/rejected": -0.6027116775512695,
|
|
"logps/chosen": -72.7847900390625,
|
|
"logps/ref_chosen": -52.36639404296875,
|
|
"logps/ref_rejected": -110.40904998779297,
|
|
"logps/rejected": -162.11245727539062,
|
|
"loss": 0.573,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.186043843626976,
|
|
"margin_dpo/beta_margin_grad_std": 0.23473787307739258,
|
|
"margin_dpo/beta_margin_mean": 3.1285009384155273,
|
|
"margin_dpo/beta_margin_std": 2.9745311737060547,
|
|
"margin_dpo/loss_margin_mean": 31.28500747680664,
|
|
"margin_dpo/margin_mean": 31.28500747680664,
|
|
"margin_dpo/margin_std": 29.551124572753906,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.7209985315712188,
|
|
"grad_norm": 71.59069061279297,
|
|
"learning_rate": 1.1084079364846241e-07,
|
|
"logits/chosen": -0.5881800651550293,
|
|
"logits/rejected": -0.5435885190963745,
|
|
"logps/chosen": -82.98500061035156,
|
|
"logps/ref_chosen": -60.11626434326172,
|
|
"logps/ref_rejected": -73.27278900146484,
|
|
"logps/rejected": -124.22119140625,
|
|
"loss": 0.5801,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1894461065530777,
|
|
"margin_dpo/beta_margin_grad_std": 0.22930499911308289,
|
|
"margin_dpo/beta_margin_mean": 2.807966709136963,
|
|
"margin_dpo/beta_margin_std": 2.785522699356079,
|
|
"margin_dpo/loss_margin_mean": 28.079666137695312,
|
|
"margin_dpo/margin_mean": 28.079666137695312,
|
|
"margin_dpo/margin_std": 27.83734893798828,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 0.7224669603524229,
|
|
"grad_norm": 109.9788589477539,
|
|
"learning_rate": 1.097764975115576e-07,
|
|
"logits/chosen": -0.6198358535766602,
|
|
"logits/rejected": -0.5758175849914551,
|
|
"logps/chosen": -77.27084350585938,
|
|
"logps/ref_chosen": -53.99418258666992,
|
|
"logps/ref_rejected": -72.65962219238281,
|
|
"logps/rejected": -122.03599548339844,
|
|
"loss": 0.9317,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.2348533272743225,
|
|
"margin_dpo/beta_margin_grad_std": 0.31157541275024414,
|
|
"margin_dpo/beta_margin_mean": 2.609971046447754,
|
|
"margin_dpo/beta_margin_std": 3.011613368988037,
|
|
"margin_dpo/loss_margin_mean": 26.099708557128906,
|
|
"margin_dpo/margin_mean": 26.099708557128906,
|
|
"margin_dpo/margin_std": 29.874317169189453,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 0.723935389133627,
|
|
"grad_norm": 69.1717529296875,
|
|
"learning_rate": 1.0871589639435203e-07,
|
|
"logits/chosen": -0.6741948127746582,
|
|
"logits/rejected": -0.6164962649345398,
|
|
"logps/chosen": -95.62208557128906,
|
|
"logps/ref_chosen": -75.49723815917969,
|
|
"logps/ref_rejected": -87.32301330566406,
|
|
"logps/rejected": -140.3636016845703,
|
|
"loss": 0.4661,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15023410320281982,
|
|
"margin_dpo/beta_margin_grad_std": 0.22642172873020172,
|
|
"margin_dpo/beta_margin_mean": 3.2915735244750977,
|
|
"margin_dpo/beta_margin_std": 2.6520345211029053,
|
|
"margin_dpo/loss_margin_mean": 32.91573715209961,
|
|
"margin_dpo/margin_mean": 32.91573715209961,
|
|
"margin_dpo/margin_std": 26.319190979003906,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 0.7254038179148311,
|
|
"grad_norm": 75.6004867553711,
|
|
"learning_rate": 1.0765901824467166e-07,
|
|
"logits/chosen": -0.5462692379951477,
|
|
"logits/rejected": -0.5368998050689697,
|
|
"logps/chosen": -63.30023956298828,
|
|
"logps/ref_chosen": -41.35926818847656,
|
|
"logps/ref_rejected": -86.09136962890625,
|
|
"logps/rejected": -143.49691772460938,
|
|
"loss": 0.5108,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1606236845254898,
|
|
"margin_dpo/beta_margin_grad_std": 0.23982644081115723,
|
|
"margin_dpo/beta_margin_mean": 3.546459197998047,
|
|
"margin_dpo/beta_margin_std": 3.0083138942718506,
|
|
"margin_dpo/loss_margin_mean": 35.46459197998047,
|
|
"margin_dpo/margin_mean": 35.46459197998047,
|
|
"margin_dpo/margin_std": 29.650789260864258,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 0.7268722466960352,
|
|
"grad_norm": 67.52748107910156,
|
|
"learning_rate": 1.0660589091223854e-07,
|
|
"logits/chosen": -0.6319386959075928,
|
|
"logits/rejected": -0.5911184549331665,
|
|
"logps/chosen": -84.92739868164062,
|
|
"logps/ref_chosen": -63.53507995605469,
|
|
"logps/ref_rejected": -91.42443084716797,
|
|
"logps/rejected": -145.19595336914062,
|
|
"loss": 0.5177,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15922006964683533,
|
|
"margin_dpo/beta_margin_grad_std": 0.22867868840694427,
|
|
"margin_dpo/beta_margin_mean": 3.2379212379455566,
|
|
"margin_dpo/beta_margin_std": 2.7765204906463623,
|
|
"margin_dpo/loss_margin_mean": 32.37921142578125,
|
|
"margin_dpo/margin_mean": 32.37921142578125,
|
|
"margin_dpo/margin_std": 27.550233840942383,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.7283406754772394,
|
|
"grad_norm": 64.98089599609375,
|
|
"learning_rate": 1.0555654214793722e-07,
|
|
"logits/chosen": -0.6621850728988647,
|
|
"logits/rejected": -0.6073780655860901,
|
|
"logps/chosen": -96.5443115234375,
|
|
"logps/ref_chosen": -72.59192657470703,
|
|
"logps/ref_rejected": -84.32933807373047,
|
|
"logps/rejected": -136.9782257080078,
|
|
"loss": 0.5292,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.17409659922122955,
|
|
"margin_dpo/beta_margin_grad_std": 0.22257588803768158,
|
|
"margin_dpo/beta_margin_mean": 2.869650363922119,
|
|
"margin_dpo/beta_margin_std": 2.5869693756103516,
|
|
"margin_dpo/loss_margin_mean": 28.696502685546875,
|
|
"margin_dpo/margin_mean": 28.696502685546875,
|
|
"margin_dpo/margin_std": 25.838706970214844,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 0.7298091042584435,
|
|
"grad_norm": 77.44481658935547,
|
|
"learning_rate": 1.0451099960308374e-07,
|
|
"logits/chosen": -0.6251211166381836,
|
|
"logits/rejected": -0.5778101682662964,
|
|
"logps/chosen": -83.82826232910156,
|
|
"logps/ref_chosen": -58.593971252441406,
|
|
"logps/ref_rejected": -76.28836822509766,
|
|
"logps/rejected": -129.9313201904297,
|
|
"loss": 0.613,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.20568180084228516,
|
|
"margin_dpo/beta_margin_grad_std": 0.23945844173431396,
|
|
"margin_dpo/beta_margin_mean": 2.8408656120300293,
|
|
"margin_dpo/beta_margin_std": 2.808185577392578,
|
|
"margin_dpo/loss_margin_mean": 28.408655166625977,
|
|
"margin_dpo/margin_mean": 28.408655166625977,
|
|
"margin_dpo/margin_std": 27.508472442626953,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 0.7312775330396476,
|
|
"grad_norm": 67.77000427246094,
|
|
"learning_rate": 1.0346929082869641e-07,
|
|
"logits/chosen": -0.6193152666091919,
|
|
"logits/rejected": -0.5879042148590088,
|
|
"logps/chosen": -95.3200912475586,
|
|
"logps/ref_chosen": -71.20565795898438,
|
|
"logps/ref_rejected": -83.95803833007812,
|
|
"logps/rejected": -139.05723571777344,
|
|
"loss": 0.5312,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.17250090837478638,
|
|
"margin_dpo/beta_margin_grad_std": 0.23728637397289276,
|
|
"margin_dpo/beta_margin_mean": 3.098475933074951,
|
|
"margin_dpo/beta_margin_std": 2.798676013946533,
|
|
"margin_dpo/loss_margin_mean": 30.984760284423828,
|
|
"margin_dpo/margin_mean": 30.984760284423828,
|
|
"margin_dpo/margin_std": 27.886219024658203,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 0.7327459618208517,
|
|
"grad_norm": 82.45706939697266,
|
|
"learning_rate": 1.0243144327477013e-07,
|
|
"logits/chosen": -0.6282952427864075,
|
|
"logits/rejected": -0.6203751564025879,
|
|
"logps/chosen": -74.16600036621094,
|
|
"logps/ref_chosen": -51.25519561767578,
|
|
"logps/ref_rejected": -101.07870483398438,
|
|
"logps/rejected": -155.54342651367188,
|
|
"loss": 0.6873,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.179952934384346,
|
|
"margin_dpo/beta_margin_grad_std": 0.2602365016937256,
|
|
"margin_dpo/beta_margin_mean": 3.155392646789551,
|
|
"margin_dpo/beta_margin_std": 3.0621047019958496,
|
|
"margin_dpo/loss_margin_mean": 31.553926467895508,
|
|
"margin_dpo/margin_mean": 31.553926467895508,
|
|
"margin_dpo/margin_std": 30.37271499633789,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 0.7342143906020558,
|
|
"grad_norm": 45.40144729614258,
|
|
"learning_rate": 1.0139748428955333e-07,
|
|
"logits/chosen": -0.6029895544052124,
|
|
"logits/rejected": -0.5873157382011414,
|
|
"logps/chosen": -82.48162841796875,
|
|
"logps/ref_chosen": -57.027442932128906,
|
|
"logps/ref_rejected": -93.93421173095703,
|
|
"logps/rejected": -153.21792602539062,
|
|
"loss": 0.3799,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13710999488830566,
|
|
"margin_dpo/beta_margin_grad_std": 0.19010357558727264,
|
|
"margin_dpo/beta_margin_mean": 3.3829522132873535,
|
|
"margin_dpo/beta_margin_std": 2.9376726150512695,
|
|
"margin_dpo/loss_margin_mean": 33.82952117919922,
|
|
"margin_dpo/margin_mean": 33.82952117919922,
|
|
"margin_dpo/margin_std": 29.12575340270996,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.7342143906020558,
|
|
"eval_logits/chosen": -0.6185933947563171,
|
|
"eval_logits/rejected": -0.5921938419342041,
|
|
"eval_logps/chosen": -106.39403533935547,
|
|
"eval_logps/ref_chosen": -79.05104064941406,
|
|
"eval_logps/ref_rejected": -86.79793548583984,
|
|
"eval_logps/rejected": -135.7742156982422,
|
|
"eval_loss": 0.41000303626060486,
|
|
"eval_margin_dpo/beta": 0.10000000149011612,
|
|
"eval_margin_dpo/beta_margin_grad_mean": -0.25857067108154297,
|
|
"eval_margin_dpo/beta_margin_grad_std": 0.2553975284099579,
|
|
"eval_margin_dpo/beta_margin_mean": 2.1633284091949463,
|
|
"eval_margin_dpo/beta_margin_std": 2.637815237045288,
|
|
"eval_margin_dpo/loss_margin_mean": 21.633283615112305,
|
|
"eval_margin_dpo/margin_mean": 21.63328742980957,
|
|
"eval_margin_dpo/margin_std": 26.378154754638672,
|
|
"eval_runtime": 40.1427,
|
|
"eval_samples_per_second": 58.267,
|
|
"eval_steps_per_second": 1.843,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.73568281938326,
|
|
"grad_norm": 53.82852554321289,
|
|
"learning_rate": 1.0036744111882672e-07,
|
|
"logits/chosen": -0.6225707530975342,
|
|
"logits/rejected": -0.581325888633728,
|
|
"logps/chosen": -76.03878784179688,
|
|
"logps/ref_chosen": -54.359527587890625,
|
|
"logps/ref_rejected": -80.15670776367188,
|
|
"logps/rejected": -139.12896728515625,
|
|
"loss": 0.3404,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12611275911331177,
|
|
"margin_dpo/beta_margin_grad_std": 0.1843045949935913,
|
|
"margin_dpo/beta_margin_mean": 3.729299545288086,
|
|
"margin_dpo/beta_margin_std": 2.9057199954986572,
|
|
"margin_dpo/loss_margin_mean": 37.29299545288086,
|
|
"margin_dpo/margin_mean": 37.292991638183594,
|
|
"margin_dpo/margin_std": 28.878681182861328,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 0.737151248164464,
|
|
"grad_norm": 45.499080657958984,
|
|
"learning_rate": 9.934134090518592e-08,
|
|
"logits/chosen": -0.6202658414840698,
|
|
"logits/rejected": -0.5605558156967163,
|
|
"logps/chosen": -90.09158325195312,
|
|
"logps/ref_chosen": -67.60050964355469,
|
|
"logps/ref_rejected": -82.94876098632812,
|
|
"logps/rejected": -139.19085693359375,
|
|
"loss": 0.3223,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.11901772767305374,
|
|
"margin_dpo/beta_margin_grad_std": 0.18556702136993408,
|
|
"margin_dpo/beta_margin_mean": 3.375103235244751,
|
|
"margin_dpo/beta_margin_std": 2.1826813220977783,
|
|
"margin_dpo/loss_margin_mean": 33.75102996826172,
|
|
"margin_dpo/margin_mean": 33.75102996826172,
|
|
"margin_dpo/margin_std": 20.78955078125,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 0.7386196769456681,
|
|
"grad_norm": 51.559326171875,
|
|
"learning_rate": 9.831921068732571e-08,
|
|
"logits/chosen": -0.572176456451416,
|
|
"logits/rejected": -0.5319969654083252,
|
|
"logps/chosen": -75.5657958984375,
|
|
"logps/ref_chosen": -55.078407287597656,
|
|
"logps/ref_rejected": -82.50544738769531,
|
|
"logps/rejected": -137.12274169921875,
|
|
"loss": 0.3974,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13606566190719604,
|
|
"margin_dpo/beta_margin_grad_std": 0.21487735211849213,
|
|
"margin_dpo/beta_margin_mean": 3.4129903316497803,
|
|
"margin_dpo/beta_margin_std": 2.352537155151367,
|
|
"margin_dpo/loss_margin_mean": 34.12990188598633,
|
|
"margin_dpo/margin_mean": 34.12990188598633,
|
|
"margin_dpo/margin_std": 23.329858779907227,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 0.7400881057268722,
|
|
"grad_norm": 33.44047546386719,
|
|
"learning_rate": 9.730107739932805e-08,
|
|
"logits/chosen": -0.6334669589996338,
|
|
"logits/rejected": -0.614444375038147,
|
|
"logps/chosen": -79.06010437011719,
|
|
"logps/ref_chosen": -59.96575164794922,
|
|
"logps/ref_rejected": -103.76213073730469,
|
|
"logps/rejected": -162.8424072265625,
|
|
"loss": 0.2414,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.09301727265119553,
|
|
"margin_dpo/beta_margin_grad_std": 0.15588341653347015,
|
|
"margin_dpo/beta_margin_mean": 3.998591899871826,
|
|
"margin_dpo/beta_margin_std": 2.6279215812683105,
|
|
"margin_dpo/loss_margin_mean": 39.98591995239258,
|
|
"margin_dpo/margin_mean": 39.98591995239258,
|
|
"margin_dpo/margin_std": 25.813270568847656,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 0.7415565345080763,
|
|
"grad_norm": 81.39633178710938,
|
|
"learning_rate": 9.628696786995188e-08,
|
|
"logits/chosen": -0.6545782089233398,
|
|
"logits/rejected": -0.6030818819999695,
|
|
"logps/chosen": -101.48240661621094,
|
|
"logps/ref_chosen": -76.1549072265625,
|
|
"logps/ref_rejected": -88.58537292480469,
|
|
"logps/rejected": -142.19000244140625,
|
|
"loss": 0.6802,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.20703783631324768,
|
|
"margin_dpo/beta_margin_grad_std": 0.2496814727783203,
|
|
"margin_dpo/beta_margin_mean": 2.8277130126953125,
|
|
"margin_dpo/beta_margin_std": 2.957451343536377,
|
|
"margin_dpo/loss_margin_mean": 28.277130126953125,
|
|
"margin_dpo/margin_mean": 28.277130126953125,
|
|
"margin_dpo/margin_std": 29.50797462463379,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.7430249632892805,
|
|
"grad_norm": 67.16016387939453,
|
|
"learning_rate": 9.527690882192635e-08,
|
|
"logits/chosen": -0.6147867441177368,
|
|
"logits/rejected": -0.5790848731994629,
|
|
"logps/chosen": -70.76807403564453,
|
|
"logps/ref_chosen": -48.96050262451172,
|
|
"logps/ref_rejected": -78.41505432128906,
|
|
"logps/rejected": -134.20616149902344,
|
|
"loss": 0.4462,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1522112339735031,
|
|
"margin_dpo/beta_margin_grad_std": 0.21956755220890045,
|
|
"margin_dpo/beta_margin_mean": 3.3983535766601562,
|
|
"margin_dpo/beta_margin_std": 2.9518353939056396,
|
|
"margin_dpo/loss_margin_mean": 33.98353576660156,
|
|
"margin_dpo/margin_mean": 33.98353576660156,
|
|
"margin_dpo/margin_std": 28.88640594482422,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 0.7444933920704846,
|
|
"grad_norm": 52.902652740478516,
|
|
"learning_rate": 9.427092687124691e-08,
|
|
"logits/chosen": -0.6434965133666992,
|
|
"logits/rejected": -0.6062471866607666,
|
|
"logps/chosen": -90.41842651367188,
|
|
"logps/ref_chosen": -66.80150604248047,
|
|
"logps/ref_rejected": -95.37289428710938,
|
|
"logps/rejected": -150.97418212890625,
|
|
"loss": 0.3443,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1344669610261917,
|
|
"margin_dpo/beta_margin_grad_std": 0.1805417388677597,
|
|
"margin_dpo/beta_margin_mean": 3.198436975479126,
|
|
"margin_dpo/beta_margin_std": 2.2677083015441895,
|
|
"margin_dpo/loss_margin_mean": 31.9843692779541,
|
|
"margin_dpo/margin_mean": 31.984371185302734,
|
|
"margin_dpo/margin_std": 22.465970993041992,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 0.7459618208516887,
|
|
"grad_norm": 65.08528900146484,
|
|
"learning_rate": 9.326904852647344e-08,
|
|
"logits/chosen": -0.6305240392684937,
|
|
"logits/rejected": -0.5942909717559814,
|
|
"logps/chosen": -92.9295883178711,
|
|
"logps/ref_chosen": -71.303466796875,
|
|
"logps/ref_rejected": -95.6275405883789,
|
|
"logps/rejected": -149.353759765625,
|
|
"loss": 0.5068,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1743934601545334,
|
|
"margin_dpo/beta_margin_grad_std": 0.22552472352981567,
|
|
"margin_dpo/beta_margin_mean": 3.2100088596343994,
|
|
"margin_dpo/beta_margin_std": 2.8839914798736572,
|
|
"margin_dpo/loss_margin_mean": 32.1000862121582,
|
|
"margin_dpo/margin_mean": 32.1000862121582,
|
|
"margin_dpo/margin_std": 28.509567260742188,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 0.7474302496328928,
|
|
"grad_norm": 70.77447509765625,
|
|
"learning_rate": 9.227130018803195e-08,
|
|
"logits/chosen": -0.6270876526832581,
|
|
"logits/rejected": -0.5867846012115479,
|
|
"logps/chosen": -85.74343872070312,
|
|
"logps/ref_chosen": -63.81895065307617,
|
|
"logps/ref_rejected": -83.25643920898438,
|
|
"logps/rejected": -137.96099853515625,
|
|
"loss": 0.4849,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14794375002384186,
|
|
"margin_dpo/beta_margin_grad_std": 0.23274949193000793,
|
|
"margin_dpo/beta_margin_mean": 3.2780051231384277,
|
|
"margin_dpo/beta_margin_std": 2.602834463119507,
|
|
"margin_dpo/loss_margin_mean": 32.780052185058594,
|
|
"margin_dpo/margin_mean": 32.780052185058594,
|
|
"margin_dpo/margin_std": 25.765064239501953,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 0.748898678414097,
|
|
"grad_norm": 56.0135498046875,
|
|
"learning_rate": 9.127770814751932e-08,
|
|
"logits/chosen": -0.5831998586654663,
|
|
"logits/rejected": -0.5671366453170776,
|
|
"logps/chosen": -78.48440551757812,
|
|
"logps/ref_chosen": -51.878448486328125,
|
|
"logps/ref_rejected": -102.7651596069336,
|
|
"logps/rejected": -169.5789031982422,
|
|
"loss": 0.4031,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14359986782073975,
|
|
"margin_dpo/beta_margin_grad_std": 0.21300581097602844,
|
|
"margin_dpo/beta_margin_mean": 4.020778656005859,
|
|
"margin_dpo/beta_margin_std": 3.2402069568634033,
|
|
"margin_dpo/loss_margin_mean": 40.207786560058594,
|
|
"margin_dpo/margin_mean": 40.207786560058594,
|
|
"margin_dpo/margin_std": 32.260772705078125,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.750367107195301,
|
|
"grad_norm": 55.32306671142578,
|
|
"learning_rate": 9.028829858700973e-08,
|
|
"logits/chosen": -0.6215203404426575,
|
|
"logits/rejected": -0.586913526058197,
|
|
"logps/chosen": -82.04804229736328,
|
|
"logps/ref_chosen": -60.23811340332031,
|
|
"logps/ref_rejected": -92.85676574707031,
|
|
"logps/rejected": -150.72622680664062,
|
|
"loss": 0.4605,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1583447903394699,
|
|
"margin_dpo/beta_margin_grad_std": 0.22799551486968994,
|
|
"margin_dpo/beta_margin_mean": 3.605954170227051,
|
|
"margin_dpo/beta_margin_std": 2.9902870655059814,
|
|
"margin_dpo/loss_margin_mean": 36.05954360961914,
|
|
"margin_dpo/margin_mean": 36.059539794921875,
|
|
"margin_dpo/margin_std": 29.7518310546875,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 0.7518355359765051,
|
|
"grad_norm": 43.83152389526367,
|
|
"learning_rate": 8.930309757836516e-08,
|
|
"logits/chosen": -0.6087368726730347,
|
|
"logits/rejected": -0.5697593092918396,
|
|
"logps/chosen": -79.78984069824219,
|
|
"logps/ref_chosen": -54.905494689941406,
|
|
"logps/ref_rejected": -81.87586975097656,
|
|
"logps/rejected": -142.0902099609375,
|
|
"loss": 0.3361,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13248665630817413,
|
|
"margin_dpo/beta_margin_grad_std": 0.1739508956670761,
|
|
"margin_dpo/beta_margin_mean": 3.5329983234405518,
|
|
"margin_dpo/beta_margin_std": 2.7861456871032715,
|
|
"margin_dpo/loss_margin_mean": 35.32998275756836,
|
|
"margin_dpo/margin_mean": 35.32998275756836,
|
|
"margin_dpo/margin_std": 27.727153778076172,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 0.7533039647577092,
|
|
"grad_norm": 73.83541107177734,
|
|
"learning_rate": 8.832213108254863e-08,
|
|
"logits/chosen": -0.6487611532211304,
|
|
"logits/rejected": -0.5978541374206543,
|
|
"logps/chosen": -89.40873718261719,
|
|
"logps/ref_chosen": -64.91644287109375,
|
|
"logps/ref_rejected": -76.06245422363281,
|
|
"logps/rejected": -130.85317993164062,
|
|
"loss": 0.5745,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16774950921535492,
|
|
"margin_dpo/beta_margin_grad_std": 0.23628519475460052,
|
|
"margin_dpo/beta_margin_mean": 3.029844284057617,
|
|
"margin_dpo/beta_margin_std": 2.635484218597412,
|
|
"margin_dpo/loss_margin_mean": 30.29844093322754,
|
|
"margin_dpo/margin_mean": 30.298439025878906,
|
|
"margin_dpo/margin_std": 25.809885025024414,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 0.7547723935389133,
|
|
"grad_norm": 73.3758544921875,
|
|
"learning_rate": 8.734542494893954e-08,
|
|
"logits/chosen": -0.6052289605140686,
|
|
"logits/rejected": -0.5561543703079224,
|
|
"logps/chosen": -99.59014892578125,
|
|
"logps/ref_chosen": -74.22957611083984,
|
|
"logps/ref_rejected": -78.945556640625,
|
|
"logps/rejected": -135.22900390625,
|
|
"loss": 0.6952,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.21260841190814972,
|
|
"margin_dpo/beta_margin_grad_std": 0.2700116038322449,
|
|
"margin_dpo/beta_margin_mean": 3.092288017272949,
|
|
"margin_dpo/beta_margin_std": 3.3895537853240967,
|
|
"margin_dpo/loss_margin_mean": 30.92287826538086,
|
|
"margin_dpo/margin_mean": 30.922880172729492,
|
|
"margin_dpo/margin_std": 33.82048797607422,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 0.7562408223201175,
|
|
"grad_norm": 50.55330276489258,
|
|
"learning_rate": 8.637300491465272e-08,
|
|
"logits/chosen": -0.6106176376342773,
|
|
"logits/rejected": -0.5937142372131348,
|
|
"logps/chosen": -72.84141540527344,
|
|
"logps/ref_chosen": -50.40156555175781,
|
|
"logps/ref_rejected": -87.09774780273438,
|
|
"logps/rejected": -143.20123291015625,
|
|
"loss": 0.3735,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14427538216114044,
|
|
"margin_dpo/beta_margin_grad_std": 0.179901584982872,
|
|
"margin_dpo/beta_margin_mean": 3.366363525390625,
|
|
"margin_dpo/beta_margin_std": 2.7529520988464355,
|
|
"margin_dpo/loss_margin_mean": 33.66363525390625,
|
|
"margin_dpo/margin_mean": 33.66363525390625,
|
|
"margin_dpo/margin_std": 27.46428680419922,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.7577092511013216,
|
|
"grad_norm": 51.07502365112305,
|
|
"learning_rate": 8.540489660386064e-08,
|
|
"logits/chosen": -0.633804202079773,
|
|
"logits/rejected": -0.6136379837989807,
|
|
"logps/chosen": -87.3009033203125,
|
|
"logps/ref_chosen": -64.6495590209961,
|
|
"logps/ref_rejected": -111.72238159179688,
|
|
"logps/rejected": -169.82229614257812,
|
|
"loss": 0.3698,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14012214541435242,
|
|
"margin_dpo/beta_margin_grad_std": 0.18882179260253906,
|
|
"margin_dpo/beta_margin_mean": 3.544856548309326,
|
|
"margin_dpo/beta_margin_std": 2.8498873710632324,
|
|
"margin_dpo/loss_margin_mean": 35.44856262207031,
|
|
"margin_dpo/margin_mean": 35.44856262207031,
|
|
"margin_dpo/margin_std": 28.429126739501953,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 0.7591776798825257,
|
|
"grad_norm": 48.961795806884766,
|
|
"learning_rate": 8.444112552711752e-08,
|
|
"logits/chosen": -0.6369151473045349,
|
|
"logits/rejected": -0.5914682149887085,
|
|
"logps/chosen": -86.39828491210938,
|
|
"logps/ref_chosen": -60.913551330566406,
|
|
"logps/ref_rejected": -89.08308410644531,
|
|
"logps/rejected": -149.49826049804688,
|
|
"loss": 0.3858,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12788426876068115,
|
|
"margin_dpo/beta_margin_grad_std": 0.20656827092170715,
|
|
"margin_dpo/beta_margin_mean": 3.4930450916290283,
|
|
"margin_dpo/beta_margin_std": 2.5577361583709717,
|
|
"margin_dpo/loss_margin_mean": 34.930450439453125,
|
|
"margin_dpo/margin_mean": 34.930450439453125,
|
|
"margin_dpo/margin_std": 25.562118530273438,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 0.7606461086637298,
|
|
"grad_norm": 52.089088439941406,
|
|
"learning_rate": 8.348171708068747e-08,
|
|
"logits/chosen": -0.6326709985733032,
|
|
"logits/rejected": -0.6170345544815063,
|
|
"logps/chosen": -82.4386215209961,
|
|
"logps/ref_chosen": -57.45589065551758,
|
|
"logps/ref_rejected": -85.31269836425781,
|
|
"logps/rejected": -141.47119140625,
|
|
"loss": 0.4549,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15643204748630524,
|
|
"margin_dpo/beta_margin_grad_std": 0.2165152132511139,
|
|
"margin_dpo/beta_margin_mean": 3.1175765991210938,
|
|
"margin_dpo/beta_margin_std": 2.612419366836548,
|
|
"margin_dpo/loss_margin_mean": 31.175765991210938,
|
|
"margin_dpo/margin_mean": 31.175765991210938,
|
|
"margin_dpo/margin_std": 26.030349731445312,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 0.762114537444934,
|
|
"grad_norm": 60.18330001831055,
|
|
"learning_rate": 8.25266965458755e-08,
|
|
"logits/chosen": -0.5879247784614563,
|
|
"logits/rejected": -0.5525568723678589,
|
|
"logps/chosen": -96.47016906738281,
|
|
"logps/ref_chosen": -74.06330871582031,
|
|
"logps/ref_rejected": -104.44416809082031,
|
|
"logps/rejected": -159.36849975585938,
|
|
"loss": 0.4562,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16369768977165222,
|
|
"margin_dpo/beta_margin_grad_std": 0.2030007243156433,
|
|
"margin_dpo/beta_margin_mean": 3.25174617767334,
|
|
"margin_dpo/beta_margin_std": 3.083312511444092,
|
|
"margin_dpo/loss_margin_mean": 32.51746368408203,
|
|
"margin_dpo/margin_mean": 32.51746368408203,
|
|
"margin_dpo/margin_std": 30.369220733642578,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 0.7635829662261381,
|
|
"grad_norm": 51.291900634765625,
|
|
"learning_rate": 8.15760890883607e-08,
|
|
"logits/chosen": -0.5656751394271851,
|
|
"logits/rejected": -0.5422060489654541,
|
|
"logps/chosen": -93.58493041992188,
|
|
"logps/ref_chosen": -70.2998275756836,
|
|
"logps/ref_rejected": -99.98133850097656,
|
|
"logps/rejected": -156.14584350585938,
|
|
"loss": 0.3683,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13689424097537994,
|
|
"margin_dpo/beta_margin_grad_std": 0.186412513256073,
|
|
"margin_dpo/beta_margin_mean": 3.2879397869110107,
|
|
"margin_dpo/beta_margin_std": 2.570587635040283,
|
|
"margin_dpo/loss_margin_mean": 32.879398345947266,
|
|
"margin_dpo/margin_mean": 32.879398345947266,
|
|
"margin_dpo/margin_std": 24.496992111206055,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.7650513950073421,
|
|
"grad_norm": 50.26503372192383,
|
|
"learning_rate": 8.062991975753378e-08,
|
|
"logits/chosen": -0.607467770576477,
|
|
"logits/rejected": -0.5773638486862183,
|
|
"logps/chosen": -80.21942138671875,
|
|
"logps/ref_chosen": -58.14292907714844,
|
|
"logps/ref_rejected": -83.28060913085938,
|
|
"logps/rejected": -137.37908935546875,
|
|
"loss": 0.4181,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15461619198322296,
|
|
"margin_dpo/beta_margin_grad_std": 0.19728721678256989,
|
|
"margin_dpo/beta_margin_mean": 3.202197313308716,
|
|
"margin_dpo/beta_margin_std": 2.655714988708496,
|
|
"margin_dpo/loss_margin_mean": 32.02197265625,
|
|
"margin_dpo/margin_mean": 32.02197265625,
|
|
"margin_dpo/margin_std": 26.189250946044922,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 0.7665198237885462,
|
|
"grad_norm": 51.4885368347168,
|
|
"learning_rate": 7.968821348583643e-08,
|
|
"logits/chosen": -0.5788943767547607,
|
|
"logits/rejected": -0.5471011996269226,
|
|
"logps/chosen": -69.88147735595703,
|
|
"logps/ref_chosen": -46.54766845703125,
|
|
"logps/ref_rejected": -66.01388549804688,
|
|
"logps/rejected": -117.34169006347656,
|
|
"loss": 0.4752,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.17735202610492706,
|
|
"margin_dpo/beta_margin_grad_std": 0.2005145400762558,
|
|
"margin_dpo/beta_margin_mean": 2.7993991374969482,
|
|
"margin_dpo/beta_margin_std": 2.631875514984131,
|
|
"margin_dpo/loss_margin_mean": 27.99399185180664,
|
|
"margin_dpo/margin_mean": 27.99399185180664,
|
|
"margin_dpo/margin_std": 26.08908462524414,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 0.7679882525697503,
|
|
"grad_norm": 62.35993957519531,
|
|
"learning_rate": 7.875099508810484e-08,
|
|
"logits/chosen": -0.5938686728477478,
|
|
"logits/rejected": -0.550100564956665,
|
|
"logps/chosen": -85.62379455566406,
|
|
"logps/ref_chosen": -61.76960372924805,
|
|
"logps/ref_rejected": -83.76141357421875,
|
|
"logps/rejected": -139.5577850341797,
|
|
"loss": 0.5449,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.17923639714717865,
|
|
"margin_dpo/beta_margin_grad_std": 0.2329106628894806,
|
|
"margin_dpo/beta_margin_mean": 3.194218397140503,
|
|
"margin_dpo/beta_margin_std": 2.890404224395752,
|
|
"margin_dpo/loss_margin_mean": 31.942182540893555,
|
|
"margin_dpo/margin_mean": 31.942180633544922,
|
|
"margin_dpo/margin_std": 28.83761978149414,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 0.7694566813509545,
|
|
"grad_norm": 61.87083053588867,
|
|
"learning_rate": 7.781828926091535e-08,
|
|
"logits/chosen": -0.5877140760421753,
|
|
"logits/rejected": -0.5509617328643799,
|
|
"logps/chosen": -100.80895233154297,
|
|
"logps/ref_chosen": -78.0720443725586,
|
|
"logps/ref_rejected": -81.30198669433594,
|
|
"logps/rejected": -133.55508422851562,
|
|
"loss": 0.5047,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16188479959964752,
|
|
"margin_dpo/beta_margin_grad_std": 0.2156955450773239,
|
|
"margin_dpo/beta_margin_mean": 2.951618194580078,
|
|
"margin_dpo/beta_margin_std": 2.4761300086975098,
|
|
"margin_dpo/loss_margin_mean": 29.51618194580078,
|
|
"margin_dpo/margin_mean": 29.51618194580078,
|
|
"margin_dpo/margin_std": 24.702909469604492,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 0.7709251101321586,
|
|
"grad_norm": 35.78192901611328,
|
|
"learning_rate": 7.689012058193384e-08,
|
|
"logits/chosen": -0.5947517156600952,
|
|
"logits/rejected": -0.5842136144638062,
|
|
"logps/chosen": -72.66329956054688,
|
|
"logps/ref_chosen": -50.827857971191406,
|
|
"logps/ref_rejected": -100.05293273925781,
|
|
"logps/rejected": -157.0978546142578,
|
|
"loss": 0.2654,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1080591008067131,
|
|
"margin_dpo/beta_margin_grad_std": 0.1474897265434265,
|
|
"margin_dpo/beta_margin_mean": 3.5209484100341797,
|
|
"margin_dpo/beta_margin_std": 2.3788013458251953,
|
|
"margin_dpo/loss_margin_mean": 35.2094841003418,
|
|
"margin_dpo/margin_mean": 35.2094841003418,
|
|
"margin_dpo/margin_std": 23.671607971191406,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.7723935389133627,
|
|
"grad_norm": 69.24839782714844,
|
|
"learning_rate": 7.596651350926836e-08,
|
|
"logits/chosen": -0.6309370994567871,
|
|
"logits/rejected": -0.5772014260292053,
|
|
"logps/chosen": -88.0202407836914,
|
|
"logps/ref_chosen": -63.167232513427734,
|
|
"logps/ref_rejected": -86.30934143066406,
|
|
"logps/rejected": -145.69381713867188,
|
|
"loss": 0.4258,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.144230917096138,
|
|
"margin_dpo/beta_margin_grad_std": 0.21393723785877228,
|
|
"margin_dpo/beta_margin_mean": 3.453146457672119,
|
|
"margin_dpo/beta_margin_std": 2.792224168777466,
|
|
"margin_dpo/loss_margin_mean": 34.531463623046875,
|
|
"margin_dpo/margin_mean": 34.531463623046875,
|
|
"margin_dpo/margin_std": 27.76288604736328,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 0.7738619676945668,
|
|
"grad_norm": 76.02661895751953,
|
|
"learning_rate": 7.504749238082414e-08,
|
|
"logits/chosen": -0.6780938506126404,
|
|
"logits/rejected": -0.6229304671287537,
|
|
"logps/chosen": -94.22843933105469,
|
|
"logps/ref_chosen": -71.12867736816406,
|
|
"logps/ref_rejected": -78.3425521850586,
|
|
"logps/rejected": -133.57666015625,
|
|
"loss": 0.4342,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15442487597465515,
|
|
"margin_dpo/beta_margin_grad_std": 0.20508261024951935,
|
|
"margin_dpo/beta_margin_mean": 3.213435649871826,
|
|
"margin_dpo/beta_margin_std": 2.8153111934661865,
|
|
"margin_dpo/loss_margin_mean": 32.13435363769531,
|
|
"margin_dpo/margin_mean": 32.13435363769531,
|
|
"margin_dpo/margin_std": 28.03814697265625,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 0.775330396475771,
|
|
"grad_norm": 53.0721549987793,
|
|
"learning_rate": 7.413308141366254e-08,
|
|
"logits/chosen": -0.5941898226737976,
|
|
"logits/rejected": -0.5680090188980103,
|
|
"logps/chosen": -91.19302368164062,
|
|
"logps/ref_chosen": -68.0894546508789,
|
|
"logps/ref_rejected": -93.91006469726562,
|
|
"logps/rejected": -147.61412048339844,
|
|
"loss": 0.4206,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1577254980802536,
|
|
"margin_dpo/beta_margin_grad_std": 0.20022036135196686,
|
|
"margin_dpo/beta_margin_mean": 3.0600483417510986,
|
|
"margin_dpo/beta_margin_std": 2.504538059234619,
|
|
"margin_dpo/loss_margin_mean": 30.600481033325195,
|
|
"margin_dpo/margin_mean": 30.600481033325195,
|
|
"margin_dpo/margin_std": 24.954174041748047,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 0.7767988252569751,
|
|
"grad_norm": 76.8966064453125,
|
|
"learning_rate": 7.322330470336313e-08,
|
|
"logits/chosen": -0.6043155789375305,
|
|
"logits/rejected": -0.5836986899375916,
|
|
"logps/chosen": -82.0534896850586,
|
|
"logps/ref_chosen": -55.5749626159668,
|
|
"logps/ref_rejected": -89.20909118652344,
|
|
"logps/rejected": -143.092041015625,
|
|
"loss": 0.7156,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.19791270792484283,
|
|
"margin_dpo/beta_margin_grad_std": 0.25681713223457336,
|
|
"margin_dpo/beta_margin_mean": 2.740443229675293,
|
|
"margin_dpo/beta_margin_std": 2.8750996589660645,
|
|
"margin_dpo/loss_margin_mean": 27.40443229675293,
|
|
"margin_dpo/margin_mean": 27.404430389404297,
|
|
"margin_dpo/margin_std": 28.411148071289062,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 0.7782672540381792,
|
|
"grad_norm": 62.848148345947266,
|
|
"learning_rate": 7.231818622338822e-08,
|
|
"logits/chosen": -0.5931106805801392,
|
|
"logits/rejected": -0.5747063159942627,
|
|
"logps/chosen": -71.18637084960938,
|
|
"logps/ref_chosen": -47.601417541503906,
|
|
"logps/ref_rejected": -87.2845230102539,
|
|
"logps/rejected": -147.40582275390625,
|
|
"loss": 0.4445,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13506180047988892,
|
|
"margin_dpo/beta_margin_grad_std": 0.23474474251270294,
|
|
"margin_dpo/beta_margin_mean": 3.6536343097686768,
|
|
"margin_dpo/beta_margin_std": 2.7414755821228027,
|
|
"margin_dpo/loss_margin_mean": 36.53634262084961,
|
|
"margin_dpo/margin_mean": 36.53634262084961,
|
|
"margin_dpo/margin_std": 27.119749069213867,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.7797356828193832,
|
|
"grad_norm": 54.345516204833984,
|
|
"learning_rate": 7.141774982445147e-08,
|
|
"logits/chosen": -0.6257216334342957,
|
|
"logits/rejected": -0.5846695899963379,
|
|
"logps/chosen": -77.81207275390625,
|
|
"logps/ref_chosen": -55.246063232421875,
|
|
"logps/ref_rejected": -70.60598754882812,
|
|
"logps/rejected": -125.383056640625,
|
|
"loss": 0.5198,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16368556022644043,
|
|
"margin_dpo/beta_margin_grad_std": 0.23361165821552277,
|
|
"margin_dpo/beta_margin_mean": 3.22110652923584,
|
|
"margin_dpo/beta_margin_std": 2.786233901977539,
|
|
"margin_dpo/loss_margin_mean": 32.21106719970703,
|
|
"margin_dpo/margin_mean": 32.21106719970703,
|
|
"margin_dpo/margin_std": 27.787128448486328,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 0.7812041116005873,
|
|
"grad_norm": 57.886627197265625,
|
|
"learning_rate": 7.052201923388953e-08,
|
|
"logits/chosen": -0.6238207817077637,
|
|
"logits/rejected": -0.5933228731155396,
|
|
"logps/chosen": -93.80610656738281,
|
|
"logps/ref_chosen": -70.28602600097656,
|
|
"logps/ref_rejected": -86.5913314819336,
|
|
"logps/rejected": -148.1940155029297,
|
|
"loss": 0.3443,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12783224880695343,
|
|
"margin_dpo/beta_margin_grad_std": 0.19326746463775635,
|
|
"margin_dpo/beta_margin_mean": 3.8082594871520996,
|
|
"margin_dpo/beta_margin_std": 2.9897003173828125,
|
|
"margin_dpo/loss_margin_mean": 38.08259582519531,
|
|
"margin_dpo/margin_mean": 38.08259582519531,
|
|
"margin_dpo/margin_std": 27.77364730834961,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 0.7826725403817915,
|
|
"grad_norm": 71.21391296386719,
|
|
"learning_rate": 6.963101805503646e-08,
|
|
"logits/chosen": -0.6265207529067993,
|
|
"logits/rejected": -0.584482729434967,
|
|
"logps/chosen": -87.6553726196289,
|
|
"logps/ref_chosen": -64.8551025390625,
|
|
"logps/ref_rejected": -76.58805847167969,
|
|
"logps/rejected": -125.80079650878906,
|
|
"loss": 0.5973,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18623670935630798,
|
|
"margin_dpo/beta_margin_grad_std": 0.23616911470890045,
|
|
"margin_dpo/beta_margin_mean": 2.6412458419799805,
|
|
"margin_dpo/beta_margin_std": 2.417598009109497,
|
|
"margin_dpo/loss_margin_mean": 26.412456512451172,
|
|
"margin_dpo/margin_mean": 26.412456512451172,
|
|
"margin_dpo/margin_std": 23.92403793334961,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 0.7841409691629956,
|
|
"grad_norm": 46.806941986083984,
|
|
"learning_rate": 6.874476976660184e-08,
|
|
"logits/chosen": -0.6205226182937622,
|
|
"logits/rejected": -0.5897984504699707,
|
|
"logps/chosen": -82.1025619506836,
|
|
"logps/ref_chosen": -60.119388580322266,
|
|
"logps/ref_rejected": -78.54347229003906,
|
|
"logps/rejected": -132.9524688720703,
|
|
"loss": 0.3996,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15258625149726868,
|
|
"margin_dpo/beta_margin_grad_std": 0.18508727848529816,
|
|
"margin_dpo/beta_margin_mean": 3.2425835132598877,
|
|
"margin_dpo/beta_margin_std": 2.758850336074829,
|
|
"margin_dpo/loss_margin_mean": 32.42583465576172,
|
|
"margin_dpo/margin_mean": 32.42583465576172,
|
|
"margin_dpo/margin_std": 27.545181274414062,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 0.7856093979441997,
|
|
"grad_norm": 43.828521728515625,
|
|
"learning_rate": 6.786329772205246e-08,
|
|
"logits/chosen": -0.6110357046127319,
|
|
"logits/rejected": -0.5794057846069336,
|
|
"logps/chosen": -74.39203643798828,
|
|
"logps/ref_chosen": -54.330238342285156,
|
|
"logps/ref_rejected": -96.30763244628906,
|
|
"logps/rejected": -152.29916381835938,
|
|
"loss": 0.3868,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1333179920911789,
|
|
"margin_dpo/beta_margin_grad_std": 0.20468372106552124,
|
|
"margin_dpo/beta_margin_mean": 3.5929739475250244,
|
|
"margin_dpo/beta_margin_std": 2.733642578125,
|
|
"margin_dpo/loss_margin_mean": 35.92974090576172,
|
|
"margin_dpo/margin_mean": 35.92974090576172,
|
|
"margin_dpo/margin_std": 26.782804489135742,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.7870778267254038,
|
|
"grad_norm": 32.79661178588867,
|
|
"learning_rate": 6.698662514899638e-08,
|
|
"logits/chosen": -0.6306508779525757,
|
|
"logits/rejected": -0.6139326095581055,
|
|
"logps/chosen": -67.34485626220703,
|
|
"logps/ref_chosen": -47.08053207397461,
|
|
"logps/ref_rejected": -89.09783935546875,
|
|
"logps/rejected": -150.07534790039062,
|
|
"loss": 0.2143,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.08794363588094711,
|
|
"margin_dpo/beta_margin_grad_std": 0.13372628390789032,
|
|
"margin_dpo/beta_margin_mean": 4.071318626403809,
|
|
"margin_dpo/beta_margin_std": 2.757542848587036,
|
|
"margin_dpo/loss_margin_mean": 40.71318817138672,
|
|
"margin_dpo/margin_mean": 40.71318817138672,
|
|
"margin_dpo/margin_std": 27.551332473754883,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 0.788546255506608,
|
|
"grad_norm": 57.09232711791992,
|
|
"learning_rate": 6.611477514857114e-08,
|
|
"logits/chosen": -0.6294724941253662,
|
|
"logits/rejected": -0.5703548192977905,
|
|
"logps/chosen": -78.06488037109375,
|
|
"logps/ref_chosen": -57.747474670410156,
|
|
"logps/ref_rejected": -70.43838500976562,
|
|
"logps/rejected": -124.39582824707031,
|
|
"loss": 0.423,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.157338947057724,
|
|
"margin_dpo/beta_margin_grad_std": 0.19860957562923431,
|
|
"margin_dpo/beta_margin_mean": 3.3640034198760986,
|
|
"margin_dpo/beta_margin_std": 2.8764986991882324,
|
|
"margin_dpo/loss_margin_mean": 33.64003372192383,
|
|
"margin_dpo/margin_mean": 33.64003372192383,
|
|
"margin_dpo/margin_std": 28.32391929626465,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 0.7900146842878121,
|
|
"grad_norm": 48.7829704284668,
|
|
"learning_rate": 6.524777069483525e-08,
|
|
"logits/chosen": -0.6543197631835938,
|
|
"logits/rejected": -0.6095279455184937,
|
|
"logps/chosen": -88.88335418701172,
|
|
"logps/ref_chosen": -66.41593933105469,
|
|
"logps/ref_rejected": -84.22808837890625,
|
|
"logps/rejected": -139.14047241210938,
|
|
"loss": 0.354,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12948301434516907,
|
|
"margin_dpo/beta_margin_grad_std": 0.1803048700094223,
|
|
"margin_dpo/beta_margin_mean": 3.2444982528686523,
|
|
"margin_dpo/beta_margin_std": 2.5140750408172607,
|
|
"margin_dpo/loss_margin_mean": 32.44498062133789,
|
|
"margin_dpo/margin_mean": 32.44498062133789,
|
|
"margin_dpo/margin_std": 24.870681762695312,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 0.7914831130690162,
|
|
"grad_norm": 53.8671760559082,
|
|
"learning_rate": 6.438563463416221e-08,
|
|
"logits/chosen": -0.6606429815292358,
|
|
"logits/rejected": -0.6174535751342773,
|
|
"logps/chosen": -79.59363555908203,
|
|
"logps/ref_chosen": -58.49285125732422,
|
|
"logps/ref_rejected": -91.85395812988281,
|
|
"logps/rejected": -144.0075225830078,
|
|
"loss": 0.4892,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1646711677312851,
|
|
"margin_dpo/beta_margin_grad_std": 0.21774569153785706,
|
|
"margin_dpo/beta_margin_mean": 3.105278968811035,
|
|
"margin_dpo/beta_margin_std": 2.70540189743042,
|
|
"margin_dpo/loss_margin_mean": 31.05278968811035,
|
|
"margin_dpo/margin_mean": 31.05278968811035,
|
|
"margin_dpo/margin_std": 26.950185775756836,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 0.7929515418502202,
|
|
"grad_norm": 65.30083465576172,
|
|
"learning_rate": 6.352838968463919e-08,
|
|
"logits/chosen": -0.6370847225189209,
|
|
"logits/rejected": -0.6102155447006226,
|
|
"logps/chosen": -84.67435455322266,
|
|
"logps/ref_chosen": -63.482513427734375,
|
|
"logps/ref_rejected": -116.43000030517578,
|
|
"logps/rejected": -172.81341552734375,
|
|
"loss": 0.4614,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1394956409931183,
|
|
"margin_dpo/beta_margin_grad_std": 0.22995907068252563,
|
|
"margin_dpo/beta_margin_mean": 3.5191566944122314,
|
|
"margin_dpo/beta_margin_std": 2.8065860271453857,
|
|
"margin_dpo/loss_margin_mean": 35.191566467285156,
|
|
"margin_dpo/margin_mean": 35.191566467285156,
|
|
"margin_dpo/margin_std": 27.493711471557617,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.7944199706314243,
|
|
"grad_norm": 60.31626510620117,
|
|
"learning_rate": 6.267605843546767e-08,
|
|
"logits/chosen": -0.6471028327941895,
|
|
"logits/rejected": -0.6038549542427063,
|
|
"logps/chosen": -100.83651733398438,
|
|
"logps/ref_chosen": -78.28035736083984,
|
|
"logps/ref_rejected": -103.273681640625,
|
|
"logps/rejected": -156.01519775390625,
|
|
"loss": 0.4299,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1511632800102234,
|
|
"margin_dpo/beta_margin_grad_std": 0.20662708580493927,
|
|
"margin_dpo/beta_margin_mean": 3.0185351371765137,
|
|
"margin_dpo/beta_margin_std": 2.4248569011688232,
|
|
"margin_dpo/loss_margin_mean": 30.18535041809082,
|
|
"margin_dpo/margin_mean": 30.185348510742188,
|
|
"margin_dpo/margin_std": 23.390766143798828,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 0.7958883994126285,
|
|
"grad_norm": 37.80149841308594,
|
|
"learning_rate": 6.182866334636888e-08,
|
|
"logits/chosen": -0.6717028617858887,
|
|
"logits/rejected": -0.6646940112113953,
|
|
"logps/chosen": -79.87113952636719,
|
|
"logps/ref_chosen": -57.48497009277344,
|
|
"logps/ref_rejected": -96.47506713867188,
|
|
"logps/rejected": -153.2113494873047,
|
|
"loss": 0.3688,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13427144289016724,
|
|
"margin_dpo/beta_margin_grad_std": 0.19649642705917358,
|
|
"margin_dpo/beta_margin_mean": 3.4350104331970215,
|
|
"margin_dpo/beta_margin_std": 2.5037317276000977,
|
|
"margin_dpo/loss_margin_mean": 34.35010528564453,
|
|
"margin_dpo/margin_mean": 34.35010528564453,
|
|
"margin_dpo/margin_std": 24.91982650756836,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 0.7973568281938326,
|
|
"grad_norm": 72.47408294677734,
|
|
"learning_rate": 6.098622674699147e-08,
|
|
"logits/chosen": -0.6002076864242554,
|
|
"logits/rejected": -0.5865759253501892,
|
|
"logps/chosen": -83.70565795898438,
|
|
"logps/ref_chosen": -60.61750793457031,
|
|
"logps/ref_rejected": -105.59896850585938,
|
|
"logps/rejected": -154.27377319335938,
|
|
"loss": 0.5987,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1960902214050293,
|
|
"margin_dpo/beta_margin_grad_std": 0.23114755749702454,
|
|
"margin_dpo/beta_margin_mean": 2.5586671829223633,
|
|
"margin_dpo/beta_margin_std": 2.56718111038208,
|
|
"margin_dpo/loss_margin_mean": 25.586669921875,
|
|
"margin_dpo/margin_mean": 25.586671829223633,
|
|
"margin_dpo/margin_std": 25.219966888427734,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 0.7988252569750367,
|
|
"grad_norm": 53.03235626220703,
|
|
"learning_rate": 6.01487708363232e-08,
|
|
"logits/chosen": -0.6120076179504395,
|
|
"logits/rejected": -0.5983961820602417,
|
|
"logps/chosen": -84.55440521240234,
|
|
"logps/ref_chosen": -59.642303466796875,
|
|
"logps/ref_rejected": -100.95469665527344,
|
|
"logps/rejected": -158.740478515625,
|
|
"loss": 0.3113,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12066913396120071,
|
|
"margin_dpo/beta_margin_grad_std": 0.16076715290546417,
|
|
"margin_dpo/beta_margin_mean": 3.287367105484009,
|
|
"margin_dpo/beta_margin_std": 2.3992013931274414,
|
|
"margin_dpo/loss_margin_mean": 32.8736686706543,
|
|
"margin_dpo/margin_mean": 32.8736686706543,
|
|
"margin_dpo/margin_std": 23.937637329101562,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 0.8002936857562408,
|
|
"grad_norm": 50.49260330200195,
|
|
"learning_rate": 5.9316317682106294e-08,
|
|
"logits/chosen": -0.5966023206710815,
|
|
"logits/rejected": -0.5714937448501587,
|
|
"logps/chosen": -91.05603790283203,
|
|
"logps/ref_chosen": -67.64859771728516,
|
|
"logps/ref_rejected": -95.90800476074219,
|
|
"logps/rejected": -153.31039428710938,
|
|
"loss": 0.3894,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.147560253739357,
|
|
"margin_dpo/beta_margin_grad_std": 0.19324474036693573,
|
|
"margin_dpo/beta_margin_mean": 3.3994970321655273,
|
|
"margin_dpo/beta_margin_std": 2.6682093143463135,
|
|
"margin_dpo/loss_margin_mean": 33.99496841430664,
|
|
"margin_dpo/margin_mean": 33.994972229003906,
|
|
"margin_dpo/margin_std": 26.630447387695312,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.801762114537445,
|
|
"grad_norm": 48.28023147583008,
|
|
"learning_rate": 5.848888922025552e-08,
|
|
"logits/chosen": -0.598247230052948,
|
|
"logits/rejected": -0.5669834017753601,
|
|
"logps/chosen": -72.54621887207031,
|
|
"logps/ref_chosen": -50.744232177734375,
|
|
"logps/ref_rejected": -81.86622619628906,
|
|
"logps/rejected": -137.0621337890625,
|
|
"loss": 0.3406,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12454946339130402,
|
|
"margin_dpo/beta_margin_grad_std": 0.17545826733112335,
|
|
"margin_dpo/beta_margin_mean": 3.33939266204834,
|
|
"margin_dpo/beta_margin_std": 2.392932891845703,
|
|
"margin_dpo/loss_margin_mean": 33.393924713134766,
|
|
"margin_dpo/margin_mean": 33.39392852783203,
|
|
"margin_dpo/margin_std": 23.586105346679688,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 0.8032305433186491,
|
|
"grad_norm": 94.86836242675781,
|
|
"learning_rate": 5.7666507254280265e-08,
|
|
"logits/chosen": -0.5786527395248413,
|
|
"logits/rejected": -0.5422627925872803,
|
|
"logps/chosen": -98.80986022949219,
|
|
"logps/ref_chosen": -73.6877212524414,
|
|
"logps/ref_rejected": -90.76136779785156,
|
|
"logps/rejected": -146.6555938720703,
|
|
"loss": 0.5943,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18261206150054932,
|
|
"margin_dpo/beta_margin_grad_std": 0.2419717162847519,
|
|
"margin_dpo/beta_margin_mean": 3.0772078037261963,
|
|
"margin_dpo/beta_margin_std": 3.048326015472412,
|
|
"margin_dpo/loss_margin_mean": 30.772077560424805,
|
|
"margin_dpo/margin_mean": 30.772075653076172,
|
|
"margin_dpo/margin_std": 29.73462677001953,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 0.8046989720998532,
|
|
"grad_norm": 50.10495376586914,
|
|
"learning_rate": 5.684919345471029e-08,
|
|
"logits/chosen": -0.6317073106765747,
|
|
"logits/rejected": -0.5984237790107727,
|
|
"logps/chosen": -87.01058959960938,
|
|
"logps/ref_chosen": -65.24634552001953,
|
|
"logps/ref_rejected": -94.11807250976562,
|
|
"logps/rejected": -150.06707763671875,
|
|
"loss": 0.4276,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14587746560573578,
|
|
"margin_dpo/beta_margin_grad_std": 0.21648141741752625,
|
|
"margin_dpo/beta_margin_mean": 3.418475389480591,
|
|
"margin_dpo/beta_margin_std": 2.860063076019287,
|
|
"margin_dpo/loss_margin_mean": 34.18475341796875,
|
|
"margin_dpo/margin_mean": 34.18475341796875,
|
|
"margin_dpo/margin_std": 28.568710327148438,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 0.8061674008810573,
|
|
"grad_norm": 69.96656799316406,
|
|
"learning_rate": 5.603696935852426e-08,
|
|
"logits/chosen": -0.6198970079421997,
|
|
"logits/rejected": -0.5846245288848877,
|
|
"logps/chosen": -69.8495101928711,
|
|
"logps/ref_chosen": -49.21235656738281,
|
|
"logps/ref_rejected": -73.91031646728516,
|
|
"logps/rejected": -129.4089813232422,
|
|
"loss": 0.3395,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12493831664323807,
|
|
"margin_dpo/beta_margin_grad_std": 0.18240319192409515,
|
|
"margin_dpo/beta_margin_mean": 3.4861512184143066,
|
|
"margin_dpo/beta_margin_std": 2.548912286758423,
|
|
"margin_dpo/loss_margin_mean": 34.86151123046875,
|
|
"margin_dpo/margin_mean": 34.86151123046875,
|
|
"margin_dpo/margin_std": 25.463951110839844,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 0.8076358296622613,
|
|
"grad_norm": 75.74690246582031,
|
|
"learning_rate": 5.5229856368582376e-08,
|
|
"logits/chosen": -0.5934598445892334,
|
|
"logits/rejected": -0.5707608461380005,
|
|
"logps/chosen": -81.35966491699219,
|
|
"logps/ref_chosen": -56.80695343017578,
|
|
"logps/ref_rejected": -95.12580871582031,
|
|
"logps/rejected": -147.45431518554688,
|
|
"loss": 0.5179,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18190813064575195,
|
|
"margin_dpo/beta_margin_grad_std": 0.2241961658000946,
|
|
"margin_dpo/beta_margin_mean": 2.777578592300415,
|
|
"margin_dpo/beta_margin_std": 2.4132397174835205,
|
|
"margin_dpo/loss_margin_mean": 27.775785446166992,
|
|
"margin_dpo/margin_mean": 27.775785446166992,
|
|
"margin_dpo/margin_std": 24.089828491210938,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.8091042584434655,
|
|
"grad_norm": 63.201576232910156,
|
|
"learning_rate": 5.4427875753062734e-08,
|
|
"logits/chosen": -0.6071433424949646,
|
|
"logits/rejected": -0.5851248502731323,
|
|
"logps/chosen": -82.18833923339844,
|
|
"logps/ref_chosen": -59.10633087158203,
|
|
"logps/ref_rejected": -111.67280578613281,
|
|
"logps/rejected": -169.89010620117188,
|
|
"loss": 0.3486,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13184352219104767,
|
|
"margin_dpo/beta_margin_grad_std": 0.18528760969638824,
|
|
"margin_dpo/beta_margin_mean": 3.5135278701782227,
|
|
"margin_dpo/beta_margin_std": 2.6755788326263428,
|
|
"margin_dpo/loss_margin_mean": 35.135276794433594,
|
|
"margin_dpo/margin_mean": 35.135276794433594,
|
|
"margin_dpo/margin_std": 26.726924896240234,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 0.8105726872246696,
|
|
"grad_norm": 36.340457916259766,
|
|
"learning_rate": 5.363104864490034e-08,
|
|
"logits/chosen": -0.5997291803359985,
|
|
"logits/rejected": -0.5684548616409302,
|
|
"logps/chosen": -82.64513397216797,
|
|
"logps/ref_chosen": -62.35459899902344,
|
|
"logps/ref_rejected": -104.56210327148438,
|
|
"logps/rejected": -164.4604949951172,
|
|
"loss": 0.2519,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.10329456627368927,
|
|
"margin_dpo/beta_margin_grad_std": 0.142158642411232,
|
|
"margin_dpo/beta_margin_mean": 3.960785388946533,
|
|
"margin_dpo/beta_margin_std": 3.118678331375122,
|
|
"margin_dpo/loss_margin_mean": 39.607852935791016,
|
|
"margin_dpo/margin_mean": 39.607852935791016,
|
|
"margin_dpo/margin_std": 30.571399688720703,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 0.8120411160058737,
|
|
"grad_norm": 59.747318267822266,
|
|
"learning_rate": 5.2839396041230415e-08,
|
|
"logits/chosen": -0.6111286878585815,
|
|
"logits/rejected": -0.5851707458496094,
|
|
"logps/chosen": -89.31002044677734,
|
|
"logps/ref_chosen": -68.25881958007812,
|
|
"logps/ref_rejected": -98.0971450805664,
|
|
"logps/rejected": -149.80963134765625,
|
|
"loss": 0.4075,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15195949375629425,
|
|
"margin_dpo/beta_margin_grad_std": 0.1938675493001938,
|
|
"margin_dpo/beta_margin_mean": 3.0661275386810303,
|
|
"margin_dpo/beta_margin_std": 2.555492401123047,
|
|
"margin_dpo/loss_margin_mean": 30.661273956298828,
|
|
"margin_dpo/margin_mean": 30.661273956298828,
|
|
"margin_dpo/margin_std": 24.51015853881836,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 0.8135095447870778,
|
|
"grad_norm": 69.38467407226562,
|
|
"learning_rate": 5.205293880283551e-08,
|
|
"logits/chosen": -0.5836309194564819,
|
|
"logits/rejected": -0.5294244289398193,
|
|
"logps/chosen": -90.94577026367188,
|
|
"logps/ref_chosen": -67.94767761230469,
|
|
"logps/ref_rejected": -89.78272247314453,
|
|
"logps/rejected": -154.76803588867188,
|
|
"loss": 0.4321,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12726253271102905,
|
|
"margin_dpo/beta_margin_grad_std": 0.22509342432022095,
|
|
"margin_dpo/beta_margin_mean": 4.198723316192627,
|
|
"margin_dpo/beta_margin_std": 3.1148805618286133,
|
|
"margin_dpo/loss_margin_mean": 41.98722839355469,
|
|
"margin_dpo/margin_mean": 41.98722839355469,
|
|
"margin_dpo/margin_std": 31.020339965820312,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 0.8149779735682819,
|
|
"grad_norm": 60.07759094238281,
|
|
"learning_rate": 5.127169765359515e-08,
|
|
"logits/chosen": -0.631875216960907,
|
|
"logits/rejected": -0.6264532804489136,
|
|
"logps/chosen": -75.14740753173828,
|
|
"logps/ref_chosen": -53.33049011230469,
|
|
"logps/ref_rejected": -108.47937774658203,
|
|
"logps/rejected": -165.06500244140625,
|
|
"loss": 0.4537,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1464599072933197,
|
|
"margin_dpo/beta_margin_grad_std": 0.23116105794906616,
|
|
"margin_dpo/beta_margin_mean": 3.4768707752227783,
|
|
"margin_dpo/beta_margin_std": 2.832597494125366,
|
|
"margin_dpo/loss_margin_mean": 34.768707275390625,
|
|
"margin_dpo/margin_mean": 34.768707275390625,
|
|
"margin_dpo/margin_std": 27.81438446044922,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.8164464023494861,
|
|
"grad_norm": 95.35712432861328,
|
|
"learning_rate": 5.049569317994012e-08,
|
|
"logits/chosen": -0.5981155633926392,
|
|
"logits/rejected": -0.5586187839508057,
|
|
"logps/chosen": -80.44320678710938,
|
|
"logps/ref_chosen": -58.64447021484375,
|
|
"logps/ref_rejected": -101.34040832519531,
|
|
"logps/rejected": -153.6765594482422,
|
|
"loss": 0.5367,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1837112158536911,
|
|
"margin_dpo/beta_margin_grad_std": 0.23774275183677673,
|
|
"margin_dpo/beta_margin_mean": 3.053741455078125,
|
|
"margin_dpo/beta_margin_std": 2.7874701023101807,
|
|
"margin_dpo/loss_margin_mean": 30.53741455078125,
|
|
"margin_dpo/margin_mean": 30.53741455078125,
|
|
"margin_dpo/margin_std": 27.704578399658203,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 0.8179148311306902,
|
|
"grad_norm": 51.85511016845703,
|
|
"learning_rate": 4.9724945830310144e-08,
|
|
"logits/chosen": -0.6293572187423706,
|
|
"logits/rejected": -0.6109094619750977,
|
|
"logps/chosen": -89.30657958984375,
|
|
"logps/ref_chosen": -67.84066009521484,
|
|
"logps/ref_rejected": -109.93966674804688,
|
|
"logps/rejected": -161.80950927734375,
|
|
"loss": 0.4557,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1644848883152008,
|
|
"margin_dpo/beta_margin_grad_std": 0.2031964659690857,
|
|
"margin_dpo/beta_margin_mean": 3.0403928756713867,
|
|
"margin_dpo/beta_margin_std": 2.689265251159668,
|
|
"margin_dpo/loss_margin_mean": 30.403926849365234,
|
|
"margin_dpo/margin_mean": 30.403926849365234,
|
|
"margin_dpo/margin_std": 26.008148193359375,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 0.8193832599118943,
|
|
"grad_norm": 37.57906723022461,
|
|
"learning_rate": 4.8959475914614554e-08,
|
|
"logits/chosen": -0.6321591138839722,
|
|
"logits/rejected": -0.5818116068840027,
|
|
"logps/chosen": -81.43386840820312,
|
|
"logps/ref_chosen": -62.36824035644531,
|
|
"logps/ref_rejected": -102.16102600097656,
|
|
"logps/rejected": -162.21075439453125,
|
|
"loss": 0.2873,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.10846921056509018,
|
|
"margin_dpo/beta_margin_grad_std": 0.1664186269044876,
|
|
"margin_dpo/beta_margin_mean": 4.098410129547119,
|
|
"margin_dpo/beta_margin_std": 3.1049041748046875,
|
|
"margin_dpo/loss_margin_mean": 40.984100341796875,
|
|
"margin_dpo/margin_mean": 40.984100341796875,
|
|
"margin_dpo/margin_std": 30.58254051208496,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 0.8208516886930984,
|
|
"grad_norm": 55.36118698120117,
|
|
"learning_rate": 4.8199303603697614e-08,
|
|
"logits/chosen": -0.6702802181243896,
|
|
"logits/rejected": -0.6162378787994385,
|
|
"logps/chosen": -80.25833129882812,
|
|
"logps/ref_chosen": -60.75232696533203,
|
|
"logps/ref_rejected": -93.4422836303711,
|
|
"logps/rejected": -146.09507751464844,
|
|
"loss": 0.4285,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1548963040113449,
|
|
"margin_dpo/beta_margin_grad_std": 0.20749419927597046,
|
|
"margin_dpo/beta_margin_mean": 3.3146774768829346,
|
|
"margin_dpo/beta_margin_std": 2.7877228260040283,
|
|
"margin_dpo/loss_margin_mean": 33.14677429199219,
|
|
"margin_dpo/margin_mean": 33.14677429199219,
|
|
"margin_dpo/margin_std": 27.695289611816406,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 0.8223201174743024,
|
|
"grad_norm": 55.987613677978516,
|
|
"learning_rate": 4.7444448928806615e-08,
|
|
"logits/chosen": -0.5809895992279053,
|
|
"logits/rejected": -0.5309587717056274,
|
|
"logps/chosen": -78.91523742675781,
|
|
"logps/ref_chosen": -58.10382080078125,
|
|
"logps/ref_rejected": -79.99122619628906,
|
|
"logps/rejected": -129.71127319335938,
|
|
"loss": 0.4468,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1631620079278946,
|
|
"margin_dpo/beta_margin_grad_std": 0.19706346094608307,
|
|
"margin_dpo/beta_margin_mean": 2.890864372253418,
|
|
"margin_dpo/beta_margin_std": 2.4986603260040283,
|
|
"margin_dpo/loss_margin_mean": 28.90864372253418,
|
|
"margin_dpo/margin_mean": 28.90864372253418,
|
|
"margin_dpo/margin_std": 24.452037811279297,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.8237885462555066,
|
|
"grad_norm": 64.24273681640625,
|
|
"learning_rate": 4.669493178106432e-08,
|
|
"logits/chosen": -0.5840525031089783,
|
|
"logits/rejected": -0.5768797397613525,
|
|
"logps/chosen": -75.96839904785156,
|
|
"logps/ref_chosen": -50.91287612915039,
|
|
"logps/ref_rejected": -99.06857299804688,
|
|
"logps/rejected": -153.4114227294922,
|
|
"loss": 0.499,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1737356036901474,
|
|
"margin_dpo/beta_margin_grad_std": 0.21739643812179565,
|
|
"margin_dpo/beta_margin_mean": 2.9287328720092773,
|
|
"margin_dpo/beta_margin_std": 2.6283650398254395,
|
|
"margin_dpo/loss_margin_mean": 29.287328720092773,
|
|
"margin_dpo/margin_mean": 29.287330627441406,
|
|
"margin_dpo/margin_std": 26.124765396118164,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 0.8252569750367107,
|
|
"grad_norm": 32.95391082763672,
|
|
"learning_rate": 4.5950771910944596e-08,
|
|
"logits/chosen": -0.6423584222793579,
|
|
"logits/rejected": -0.5944841504096985,
|
|
"logps/chosen": -78.03691101074219,
|
|
"logps/ref_chosen": -59.46440124511719,
|
|
"logps/ref_rejected": -96.54266357421875,
|
|
"logps/rejected": -152.8929443359375,
|
|
"loss": 0.2448,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.09955663979053497,
|
|
"margin_dpo/beta_margin_grad_std": 0.13690762221813202,
|
|
"margin_dpo/beta_margin_mean": 3.777778148651123,
|
|
"margin_dpo/beta_margin_std": 2.689279079437256,
|
|
"margin_dpo/loss_margin_mean": 37.77777862548828,
|
|
"margin_dpo/margin_mean": 37.77777862548828,
|
|
"margin_dpo/margin_std": 26.642715454101562,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 0.8267254038179148,
|
|
"grad_norm": 62.781524658203125,
|
|
"learning_rate": 4.521198892775202e-08,
|
|
"logits/chosen": -0.5596526265144348,
|
|
"logits/rejected": -0.5383732914924622,
|
|
"logps/chosen": -83.21853637695312,
|
|
"logps/ref_chosen": -60.60819625854492,
|
|
"logps/ref_rejected": -94.56770324707031,
|
|
"logps/rejected": -147.46449279785156,
|
|
"loss": 0.4102,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15092504024505615,
|
|
"margin_dpo/beta_margin_grad_std": 0.19437021017074585,
|
|
"margin_dpo/beta_margin_mean": 3.0286452770233154,
|
|
"margin_dpo/beta_margin_std": 2.374891519546509,
|
|
"margin_dpo/loss_margin_mean": 30.28645133972168,
|
|
"margin_dpo/margin_mean": 30.286453247070312,
|
|
"margin_dpo/margin_std": 23.04823875427246,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 0.8281938325991189,
|
|
"grad_norm": 47.216941833496094,
|
|
"learning_rate": 4.447860229910544e-08,
|
|
"logits/chosen": -0.6696497201919556,
|
|
"logits/rejected": -0.6134607195854187,
|
|
"logps/chosen": -96.129638671875,
|
|
"logps/ref_chosen": -74.26837921142578,
|
|
"logps/ref_rejected": -93.2381820678711,
|
|
"logps/rejected": -147.440673828125,
|
|
"loss": 0.3702,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1358504742383957,
|
|
"margin_dpo/beta_margin_grad_std": 0.18947924673557281,
|
|
"margin_dpo/beta_margin_mean": 3.234123945236206,
|
|
"margin_dpo/beta_margin_std": 2.284991502761841,
|
|
"margin_dpo/loss_margin_mean": 32.34123992919922,
|
|
"margin_dpo/margin_mean": 32.34123992919922,
|
|
"margin_dpo/margin_std": 22.592432022094727,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 0.8296622613803231,
|
|
"grad_norm": 42.826656341552734,
|
|
"learning_rate": 4.375063135042445e-08,
|
|
"logits/chosen": -0.6142770051956177,
|
|
"logits/rejected": -0.5721128582954407,
|
|
"logps/chosen": -90.92054748535156,
|
|
"logps/ref_chosen": -69.0199203491211,
|
|
"logps/ref_rejected": -85.7789306640625,
|
|
"logps/rejected": -142.84002685546875,
|
|
"loss": 0.3742,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1414240449666977,
|
|
"margin_dpo/beta_margin_grad_std": 0.18444375693798065,
|
|
"margin_dpo/beta_margin_mean": 3.5160465240478516,
|
|
"margin_dpo/beta_margin_std": 3.0857439041137695,
|
|
"margin_dpo/loss_margin_mean": 35.160465240478516,
|
|
"margin_dpo/margin_mean": 35.160465240478516,
|
|
"margin_dpo/margin_std": 30.577224731445312,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.8311306901615272,
|
|
"grad_norm": 59.75011444091797,
|
|
"learning_rate": 4.3028095264420525e-08,
|
|
"logits/chosen": -0.5794559717178345,
|
|
"logits/rejected": -0.564411461353302,
|
|
"logps/chosen": -87.007568359375,
|
|
"logps/ref_chosen": -66.5453109741211,
|
|
"logps/ref_rejected": -103.86931610107422,
|
|
"logps/rejected": -158.2462158203125,
|
|
"loss": 0.4748,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16711834073066711,
|
|
"margin_dpo/beta_margin_grad_std": 0.2162701040506363,
|
|
"margin_dpo/beta_margin_mean": 3.391465425491333,
|
|
"margin_dpo/beta_margin_std": 3.0285098552703857,
|
|
"margin_dpo/loss_margin_mean": 33.91465377807617,
|
|
"margin_dpo/margin_mean": 33.91465759277344,
|
|
"margin_dpo/margin_std": 29.81332778930664,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 0.8325991189427313,
|
|
"grad_norm": 80.87443542480469,
|
|
"learning_rate": 4.231101308059165e-08,
|
|
"logits/chosen": -0.6721388697624207,
|
|
"logits/rejected": -0.6175321340560913,
|
|
"logps/chosen": -75.00394439697266,
|
|
"logps/ref_chosen": -52.858299255371094,
|
|
"logps/ref_rejected": -85.37095642089844,
|
|
"logps/rejected": -139.839599609375,
|
|
"loss": 0.5855,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.17626914381980896,
|
|
"margin_dpo/beta_margin_grad_std": 0.24363385140895844,
|
|
"margin_dpo/beta_margin_mean": 3.2322990894317627,
|
|
"margin_dpo/beta_margin_std": 2.850463390350342,
|
|
"margin_dpo/loss_margin_mean": 32.32299041748047,
|
|
"margin_dpo/margin_mean": 32.32299041748047,
|
|
"margin_dpo/margin_std": 28.030927658081055,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 0.8340675477239354,
|
|
"grad_norm": 44.05873489379883,
|
|
"learning_rate": 4.1599403694720145e-08,
|
|
"logits/chosen": -0.6051311492919922,
|
|
"logits/rejected": -0.58836430311203,
|
|
"logps/chosen": -67.80856323242188,
|
|
"logps/ref_chosen": -45.1923828125,
|
|
"logps/ref_rejected": -89.09236145019531,
|
|
"logps/rejected": -149.61813354492188,
|
|
"loss": 0.3543,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12138967961072922,
|
|
"margin_dpo/beta_margin_grad_std": 0.20320722460746765,
|
|
"margin_dpo/beta_margin_mean": 3.7909576892852783,
|
|
"margin_dpo/beta_margin_std": 2.74312162399292,
|
|
"margin_dpo/loss_margin_mean": 37.909576416015625,
|
|
"margin_dpo/margin_mean": 37.909576416015625,
|
|
"margin_dpo/margin_std": 26.69784164428711,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 0.8355359765051396,
|
|
"grad_norm": 64.07440948486328,
|
|
"learning_rate": 4.089328585837512e-08,
|
|
"logits/chosen": -0.6324214935302734,
|
|
"logits/rejected": -0.6018074750900269,
|
|
"logps/chosen": -86.39301300048828,
|
|
"logps/ref_chosen": -63.72056198120117,
|
|
"logps/ref_rejected": -79.10325622558594,
|
|
"logps/rejected": -131.90521240234375,
|
|
"loss": 0.5035,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.17517776787281036,
|
|
"margin_dpo/beta_margin_grad_std": 0.22150224447250366,
|
|
"margin_dpo/beta_margin_mean": 3.012951135635376,
|
|
"margin_dpo/beta_margin_std": 2.732919931411743,
|
|
"margin_dpo/loss_margin_mean": 30.1295108795166,
|
|
"margin_dpo/margin_mean": 30.129512786865234,
|
|
"margin_dpo/margin_std": 27.2763729095459,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 0.8370044052863436,
|
|
"grad_norm": 51.117984771728516,
|
|
"learning_rate": 4.019267817841834e-08,
|
|
"logits/chosen": -0.6380658149719238,
|
|
"logits/rejected": -0.586859941482544,
|
|
"logps/chosen": -82.45683288574219,
|
|
"logps/ref_chosen": -61.61454772949219,
|
|
"logps/ref_rejected": -82.1418685913086,
|
|
"logps/rejected": -137.9852294921875,
|
|
"loss": 0.3273,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12137595564126968,
|
|
"margin_dpo/beta_margin_grad_std": 0.18200945854187012,
|
|
"margin_dpo/beta_margin_mean": 3.500107765197754,
|
|
"margin_dpo/beta_margin_std": 2.537564516067505,
|
|
"margin_dpo/loss_margin_mean": 35.00107955932617,
|
|
"margin_dpo/margin_mean": 35.001075744628906,
|
|
"margin_dpo/margin_std": 25.32352066040039,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.8384728340675477,
|
|
"grad_norm": 61.06570053100586,
|
|
"learning_rate": 3.9497599116513705e-08,
|
|
"logits/chosen": -0.6200108528137207,
|
|
"logits/rejected": -0.5971379280090332,
|
|
"logps/chosen": -75.36268615722656,
|
|
"logps/ref_chosen": -53.05406188964844,
|
|
"logps/ref_rejected": -91.33682250976562,
|
|
"logps/rejected": -148.22666931152344,
|
|
"loss": 0.3723,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13746890425682068,
|
|
"margin_dpo/beta_margin_grad_std": 0.19136172533035278,
|
|
"margin_dpo/beta_margin_mean": 3.458122968673706,
|
|
"margin_dpo/beta_margin_std": 2.7801191806793213,
|
|
"margin_dpo/loss_margin_mean": 34.58123016357422,
|
|
"margin_dpo/margin_mean": 34.58122634887695,
|
|
"margin_dpo/margin_std": 27.320905685424805,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 0.8399412628487518,
|
|
"grad_norm": 81.89973449707031,
|
|
"learning_rate": 3.880806698864086e-08,
|
|
"logits/chosen": -0.5995185375213623,
|
|
"logits/rejected": -0.583366334438324,
|
|
"logps/chosen": -75.69577026367188,
|
|
"logps/ref_chosen": -48.459285736083984,
|
|
"logps/ref_rejected": -83.5570297241211,
|
|
"logps/rejected": -143.04672241210938,
|
|
"loss": 0.6994,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18695051968097687,
|
|
"margin_dpo/beta_margin_grad_std": 0.25848084688186646,
|
|
"margin_dpo/beta_margin_mean": 3.2253220081329346,
|
|
"margin_dpo/beta_margin_std": 3.1980674266815186,
|
|
"margin_dpo/loss_margin_mean": 32.25321960449219,
|
|
"margin_dpo/margin_mean": 32.25321960449219,
|
|
"margin_dpo/margin_std": 31.76388931274414,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 0.8414096916299559,
|
|
"grad_norm": 60.32197570800781,
|
|
"learning_rate": 3.812409996461275e-08,
|
|
"logits/chosen": -0.6115789413452148,
|
|
"logits/rejected": -0.5747958421707153,
|
|
"logps/chosen": -73.32516479492188,
|
|
"logps/ref_chosen": -51.62262725830078,
|
|
"logps/ref_rejected": -85.32499694824219,
|
|
"logps/rejected": -141.56448364257812,
|
|
"loss": 0.4409,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15062843263149261,
|
|
"margin_dpo/beta_margin_grad_std": 0.21439620852470398,
|
|
"margin_dpo/beta_margin_mean": 3.453695058822632,
|
|
"margin_dpo/beta_margin_std": 2.861546754837036,
|
|
"margin_dpo/loss_margin_mean": 34.536949157714844,
|
|
"margin_dpo/margin_mean": 34.536949157714844,
|
|
"margin_dpo/margin_std": 26.226520538330078,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 0.8428781204111601,
|
|
"grad_norm": 71.44043731689453,
|
|
"learning_rate": 3.74457160675965e-08,
|
|
"logits/chosen": -0.662164568901062,
|
|
"logits/rejected": -0.6289517879486084,
|
|
"logps/chosen": -74.57516479492188,
|
|
"logps/ref_chosen": -51.04446029663086,
|
|
"logps/ref_rejected": -92.80640411376953,
|
|
"logps/rejected": -150.48431396484375,
|
|
"loss": 0.4525,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14266708493232727,
|
|
"margin_dpo/beta_margin_grad_std": 0.20333649218082428,
|
|
"margin_dpo/beta_margin_mean": 3.4147207736968994,
|
|
"margin_dpo/beta_margin_std": 2.838653326034546,
|
|
"margin_dpo/loss_margin_mean": 34.14720916748047,
|
|
"margin_dpo/margin_mean": 34.14720916748047,
|
|
"margin_dpo/margin_std": 27.81047821044922,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 0.8443465491923642,
|
|
"grad_norm": 83.72123718261719,
|
|
"learning_rate": 3.677293317363864e-08,
|
|
"logits/chosen": -0.5885167121887207,
|
|
"logits/rejected": -0.5552273988723755,
|
|
"logps/chosen": -97.17411804199219,
|
|
"logps/ref_chosen": -71.79014587402344,
|
|
"logps/ref_rejected": -95.38619995117188,
|
|
"logps/rejected": -157.09030151367188,
|
|
"loss": 0.6269,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16689082980155945,
|
|
"margin_dpo/beta_margin_grad_std": 0.2690768837928772,
|
|
"margin_dpo/beta_margin_mean": 3.632012128829956,
|
|
"margin_dpo/beta_margin_std": 3.186659574508667,
|
|
"margin_dpo/loss_margin_mean": 36.32011795043945,
|
|
"margin_dpo/margin_mean": 36.32011795043945,
|
|
"margin_dpo/margin_std": 31.200788497924805,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.8458149779735683,
|
|
"grad_norm": 47.92924118041992,
|
|
"learning_rate": 3.6105769011194224e-08,
|
|
"logits/chosen": -0.608694314956665,
|
|
"logits/rejected": -0.6066184043884277,
|
|
"logps/chosen": -77.77719116210938,
|
|
"logps/ref_chosen": -54.262969970703125,
|
|
"logps/ref_rejected": -100.7542724609375,
|
|
"logps/rejected": -159.03663635253906,
|
|
"loss": 0.4414,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14325302839279175,
|
|
"margin_dpo/beta_margin_grad_std": 0.1987282782793045,
|
|
"margin_dpo/beta_margin_mean": 3.4768130779266357,
|
|
"margin_dpo/beta_margin_std": 3.035773992538452,
|
|
"margin_dpo/loss_margin_mean": 34.768131256103516,
|
|
"margin_dpo/margin_mean": 34.768131256103516,
|
|
"margin_dpo/margin_std": 29.473041534423828,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 0.8472834067547724,
|
|
"grad_norm": 50.64008331298828,
|
|
"learning_rate": 3.5444241160659304e-08,
|
|
"logits/chosen": -0.6180467009544373,
|
|
"logits/rejected": -0.5673972368240356,
|
|
"logps/chosen": -81.8788070678711,
|
|
"logps/ref_chosen": -61.909706115722656,
|
|
"logps/ref_rejected": -84.07069396972656,
|
|
"logps/rejected": -142.12271118164062,
|
|
"loss": 0.355,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12185031920671463,
|
|
"margin_dpo/beta_margin_grad_std": 0.19115541875362396,
|
|
"margin_dpo/beta_margin_mean": 3.8082919120788574,
|
|
"margin_dpo/beta_margin_std": 2.871381998062134,
|
|
"margin_dpo/loss_margin_mean": 38.082916259765625,
|
|
"margin_dpo/margin_mean": 38.082916259765625,
|
|
"margin_dpo/margin_std": 27.93082046508789,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 0.8487518355359766,
|
|
"grad_norm": 56.69212341308594,
|
|
"learning_rate": 3.478836705390808e-08,
|
|
"logits/chosen": -0.5924346446990967,
|
|
"logits/rejected": -0.576144814491272,
|
|
"logps/chosen": -76.00813293457031,
|
|
"logps/ref_chosen": -49.26368713378906,
|
|
"logps/ref_rejected": -83.43626403808594,
|
|
"logps/rejected": -145.4608154296875,
|
|
"loss": 0.3666,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13498100638389587,
|
|
"margin_dpo/beta_margin_grad_std": 0.1896868646144867,
|
|
"margin_dpo/beta_margin_mean": 3.5280113220214844,
|
|
"margin_dpo/beta_margin_std": 2.7423183917999268,
|
|
"margin_dpo/loss_margin_mean": 35.280113220214844,
|
|
"margin_dpo/margin_mean": 35.280113220214844,
|
|
"margin_dpo/margin_std": 26.86646270751953,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 0.8502202643171806,
|
|
"grad_norm": 58.20693588256836,
|
|
"learning_rate": 3.41381639738331e-08,
|
|
"logits/chosen": -0.5839706659317017,
|
|
"logits/rejected": -0.5557907223701477,
|
|
"logps/chosen": -80.32678985595703,
|
|
"logps/ref_chosen": -58.88581848144531,
|
|
"logps/ref_rejected": -94.78762817382812,
|
|
"logps/rejected": -146.857666015625,
|
|
"loss": 0.3726,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1434396207332611,
|
|
"margin_dpo/beta_margin_grad_std": 0.18104501068592072,
|
|
"margin_dpo/beta_margin_mean": 3.0629067420959473,
|
|
"margin_dpo/beta_margin_std": 2.3335225582122803,
|
|
"margin_dpo/loss_margin_mean": 30.62906837463379,
|
|
"margin_dpo/margin_mean": 30.629066467285156,
|
|
"margin_dpo/margin_std": 23.209409713745117,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 0.8516886930983847,
|
|
"grad_norm": 46.23623275756836,
|
|
"learning_rate": 3.349364905389032e-08,
|
|
"logits/chosen": -0.6004414558410645,
|
|
"logits/rejected": -0.5626081228256226,
|
|
"logps/chosen": -67.79255676269531,
|
|
"logps/ref_chosen": -48.70684051513672,
|
|
"logps/ref_rejected": -81.7583999633789,
|
|
"logps/rejected": -141.36367797851562,
|
|
"loss": 0.3452,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.11654352396726608,
|
|
"margin_dpo/beta_margin_grad_std": 0.19992825388908386,
|
|
"margin_dpo/beta_margin_mean": 4.051955699920654,
|
|
"margin_dpo/beta_margin_std": 3.0127224922180176,
|
|
"margin_dpo/loss_margin_mean": 40.519554138183594,
|
|
"margin_dpo/margin_mean": 40.519554138183594,
|
|
"margin_dpo/margin_std": 30.061260223388672,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.8531571218795888,
|
|
"grad_norm": 52.084510803222656,
|
|
"learning_rate": 3.285483927764726e-08,
|
|
"logits/chosen": -0.6124836802482605,
|
|
"logits/rejected": -0.5917458534240723,
|
|
"logps/chosen": -83.28103637695312,
|
|
"logps/ref_chosen": -62.22235107421875,
|
|
"logps/ref_rejected": -91.73568725585938,
|
|
"logps/rejected": -143.91891479492188,
|
|
"loss": 0.427,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15421409904956818,
|
|
"margin_dpo/beta_margin_grad_std": 0.20411258935928345,
|
|
"margin_dpo/beta_margin_mean": 3.1124558448791504,
|
|
"margin_dpo/beta_margin_std": 2.4851300716400146,
|
|
"margin_dpo/loss_margin_mean": 31.124557495117188,
|
|
"margin_dpo/margin_mean": 31.124557495117188,
|
|
"margin_dpo/margin_std": 24.729656219482422,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 0.8546255506607929,
|
|
"grad_norm": 66.89032745361328,
|
|
"learning_rate": 3.222175147833556e-08,
|
|
"logits/chosen": -0.6103986501693726,
|
|
"logits/rejected": -0.6106557846069336,
|
|
"logps/chosen": -77.18165588378906,
|
|
"logps/ref_chosen": -58.228660583496094,
|
|
"logps/ref_rejected": -110.06959533691406,
|
|
"logps/rejected": -164.72235107421875,
|
|
"loss": 0.4122,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1494351178407669,
|
|
"margin_dpo/beta_margin_grad_std": 0.20539377629756927,
|
|
"margin_dpo/beta_margin_mean": 3.5699760913848877,
|
|
"margin_dpo/beta_margin_std": 2.9193856716156006,
|
|
"margin_dpo/loss_margin_mean": 35.69976043701172,
|
|
"margin_dpo/margin_mean": 35.69976043701172,
|
|
"margin_dpo/margin_std": 29.070068359375,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 0.856093979441997,
|
|
"grad_norm": 67.63478088378906,
|
|
"learning_rate": 3.159440233840763e-08,
|
|
"logits/chosen": -0.5816251039505005,
|
|
"logits/rejected": -0.563323438167572,
|
|
"logps/chosen": -81.4218978881836,
|
|
"logps/ref_chosen": -56.86286163330078,
|
|
"logps/ref_rejected": -88.4039306640625,
|
|
"logps/rejected": -142.5087890625,
|
|
"loss": 0.5731,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18249757587909698,
|
|
"margin_dpo/beta_margin_grad_std": 0.23606114089488983,
|
|
"margin_dpo/beta_margin_mean": 2.954582929611206,
|
|
"margin_dpo/beta_margin_std": 2.9761416912078857,
|
|
"margin_dpo/loss_margin_mean": 29.54582977294922,
|
|
"margin_dpo/margin_mean": 29.54582977294922,
|
|
"margin_dpo/margin_std": 29.586864471435547,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 0.8575624082232012,
|
|
"grad_norm": 42.6038932800293,
|
|
"learning_rate": 3.0972808389096635e-08,
|
|
"logits/chosen": -0.5968157052993774,
|
|
"logits/rejected": -0.5377147197723389,
|
|
"logps/chosen": -74.70047760009766,
|
|
"logps/ref_chosen": -56.90068054199219,
|
|
"logps/ref_rejected": -97.63606262207031,
|
|
"logps/rejected": -154.8049774169922,
|
|
"loss": 0.2565,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.10045187175273895,
|
|
"margin_dpo/beta_margin_grad_std": 0.15640710294246674,
|
|
"margin_dpo/beta_margin_mean": 3.9369120597839355,
|
|
"margin_dpo/beta_margin_std": 2.6277453899383545,
|
|
"margin_dpo/loss_margin_mean": 39.36912155151367,
|
|
"margin_dpo/margin_mean": 39.369117736816406,
|
|
"margin_dpo/margin_std": 26.270313262939453,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 0.8590308370044053,
|
|
"grad_norm": 67.87578582763672,
|
|
"learning_rate": 3.035698600998121e-08,
|
|
"logits/chosen": -0.59834885597229,
|
|
"logits/rejected": -0.570793628692627,
|
|
"logps/chosen": -85.62800598144531,
|
|
"logps/ref_chosen": -60.973968505859375,
|
|
"logps/ref_rejected": -84.16952514648438,
|
|
"logps/rejected": -141.87606811523438,
|
|
"loss": 0.4808,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1607590615749359,
|
|
"margin_dpo/beta_margin_grad_std": 0.22445270419120789,
|
|
"margin_dpo/beta_margin_mean": 3.305250883102417,
|
|
"margin_dpo/beta_margin_std": 2.882838010787964,
|
|
"margin_dpo/loss_margin_mean": 33.05250549316406,
|
|
"margin_dpo/margin_mean": 33.05250549316406,
|
|
"margin_dpo/margin_std": 28.073482513427734,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.8604992657856094,
|
|
"grad_norm": 64.7607650756836,
|
|
"learning_rate": 2.974695142855388e-08,
|
|
"logits/chosen": -0.5770500898361206,
|
|
"logits/rejected": -0.5656349658966064,
|
|
"logps/chosen": -82.06817626953125,
|
|
"logps/ref_chosen": -56.85559844970703,
|
|
"logps/ref_rejected": -91.8026123046875,
|
|
"logps/rejected": -149.83375549316406,
|
|
"loss": 0.5571,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16552101075649261,
|
|
"margin_dpo/beta_margin_grad_std": 0.23515141010284424,
|
|
"margin_dpo/beta_margin_mean": 3.2818562984466553,
|
|
"margin_dpo/beta_margin_std": 2.998326539993286,
|
|
"margin_dpo/loss_margin_mean": 32.81856155395508,
|
|
"margin_dpo/margin_mean": 32.81856155395508,
|
|
"margin_dpo/margin_std": 29.866100311279297,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 0.8619676945668135,
|
|
"grad_norm": 46.60182571411133,
|
|
"learning_rate": 2.9142720719793122e-08,
|
|
"logits/chosen": -0.6530208587646484,
|
|
"logits/rejected": -0.6360703706741333,
|
|
"logps/chosen": -62.827880859375,
|
|
"logps/ref_chosen": -44.69159698486328,
|
|
"logps/ref_rejected": -82.62385559082031,
|
|
"logps/rejected": -131.34133911132812,
|
|
"loss": 0.5,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1838323473930359,
|
|
"margin_dpo/beta_margin_grad_std": 0.21402017772197723,
|
|
"margin_dpo/beta_margin_mean": 3.0581204891204834,
|
|
"margin_dpo/beta_margin_std": 2.7596585750579834,
|
|
"margin_dpo/loss_margin_mean": 30.581205368041992,
|
|
"margin_dpo/margin_mean": 30.58120346069336,
|
|
"margin_dpo/margin_std": 27.5166015625,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 0.8634361233480177,
|
|
"grad_norm": 69.0011978149414,
|
|
"learning_rate": 2.8544309805740018e-08,
|
|
"logits/chosen": -0.6193605661392212,
|
|
"logits/rejected": -0.6037042140960693,
|
|
"logps/chosen": -72.90446472167969,
|
|
"logps/ref_chosen": -50.294952392578125,
|
|
"logps/ref_rejected": -107.36988067626953,
|
|
"logps/rejected": -162.0211639404297,
|
|
"loss": 0.4825,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16973692178726196,
|
|
"margin_dpo/beta_margin_grad_std": 0.2188083976507187,
|
|
"margin_dpo/beta_margin_mean": 3.204176664352417,
|
|
"margin_dpo/beta_margin_std": 2.8548641204833984,
|
|
"margin_dpo/loss_margin_mean": 32.04176712036133,
|
|
"margin_dpo/margin_mean": 32.04176712036133,
|
|
"margin_dpo/margin_std": 28.34649658203125,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 0.8649045521292217,
|
|
"grad_norm": 41.75368118286133,
|
|
"learning_rate": 2.7951734455078786e-08,
|
|
"logits/chosen": -0.6043207049369812,
|
|
"logits/rejected": -0.5762794017791748,
|
|
"logps/chosen": -82.0301513671875,
|
|
"logps/ref_chosen": -59.929908752441406,
|
|
"logps/ref_rejected": -111.65534973144531,
|
|
"logps/rejected": -178.11956787109375,
|
|
"loss": 0.3183,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.10713424533605576,
|
|
"margin_dpo/beta_margin_grad_std": 0.19531351327896118,
|
|
"margin_dpo/beta_margin_mean": 4.436398506164551,
|
|
"margin_dpo/beta_margin_std": 3.308957099914551,
|
|
"margin_dpo/loss_margin_mean": 44.36398696899414,
|
|
"margin_dpo/margin_mean": 44.363983154296875,
|
|
"margin_dpo/margin_std": 33.016082763671875,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 0.8663729809104258,
|
|
"grad_norm": 38.492958068847656,
|
|
"learning_rate": 2.736501028272095e-08,
|
|
"logits/chosen": -0.6257822513580322,
|
|
"logits/rejected": -0.6011620163917542,
|
|
"logps/chosen": -77.59140014648438,
|
|
"logps/ref_chosen": -55.80979537963867,
|
|
"logps/ref_rejected": -106.06282043457031,
|
|
"logps/rejected": -166.545166015625,
|
|
"loss": 0.2747,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.10672850161790848,
|
|
"margin_dpo/beta_margin_grad_std": 0.15377846360206604,
|
|
"margin_dpo/beta_margin_mean": 3.8700757026672363,
|
|
"margin_dpo/beta_margin_std": 2.8155410289764404,
|
|
"margin_dpo/loss_margin_mean": 38.70075607299805,
|
|
"margin_dpo/margin_mean": 38.70075607299805,
|
|
"margin_dpo/margin_std": 28.101266860961914,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.8678414096916299,
|
|
"grad_norm": 63.176300048828125,
|
|
"learning_rate": 2.678415274939408e-08,
|
|
"logits/chosen": -0.6197452545166016,
|
|
"logits/rejected": -0.5587427616119385,
|
|
"logps/chosen": -81.11954498291016,
|
|
"logps/ref_chosen": -56.24061965942383,
|
|
"logps/ref_rejected": -83.78629302978516,
|
|
"logps/rejected": -145.21470642089844,
|
|
"loss": 0.3933,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12877370417118073,
|
|
"margin_dpo/beta_margin_grad_std": 0.21353955566883087,
|
|
"margin_dpo/beta_margin_mean": 3.6549482345581055,
|
|
"margin_dpo/beta_margin_std": 2.5838942527770996,
|
|
"margin_dpo/loss_margin_mean": 36.54948425292969,
|
|
"margin_dpo/margin_mean": 36.54948425292969,
|
|
"margin_dpo/margin_std": 25.822662353515625,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 0.869309838472834,
|
|
"grad_norm": 85.90067291259766,
|
|
"learning_rate": 2.6209177161234442e-08,
|
|
"logits/chosen": -0.6024812459945679,
|
|
"logits/rejected": -0.5797621011734009,
|
|
"logps/chosen": -73.40560913085938,
|
|
"logps/ref_chosen": -47.94025421142578,
|
|
"logps/ref_rejected": -75.73287963867188,
|
|
"logps/rejected": -136.9627685546875,
|
|
"loss": 0.5919,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15234871208667755,
|
|
"margin_dpo/beta_margin_grad_std": 0.2595595717430115,
|
|
"margin_dpo/beta_margin_mean": 3.576453685760498,
|
|
"margin_dpo/beta_margin_std": 2.9050722122192383,
|
|
"margin_dpo/loss_margin_mean": 35.76453399658203,
|
|
"margin_dpo/margin_mean": 35.76453399658203,
|
|
"margin_dpo/margin_std": 28.354209899902344,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 0.8707782672540382,
|
|
"grad_norm": 91.05819702148438,
|
|
"learning_rate": 2.564009866938349e-08,
|
|
"logits/chosen": -0.5534486770629883,
|
|
"logits/rejected": -0.5277604460716248,
|
|
"logps/chosen": -72.19068145751953,
|
|
"logps/ref_chosen": -48.690757751464844,
|
|
"logps/ref_rejected": -60.90800476074219,
|
|
"logps/rejected": -114.34281921386719,
|
|
"loss": 0.6214,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18934105336666107,
|
|
"margin_dpo/beta_margin_grad_std": 0.2560799717903137,
|
|
"margin_dpo/beta_margin_mean": 2.9934892654418945,
|
|
"margin_dpo/beta_margin_std": 2.8498647212982178,
|
|
"margin_dpo/loss_margin_mean": 29.934890747070312,
|
|
"margin_dpo/margin_mean": 29.934890747070312,
|
|
"margin_dpo/margin_std": 28.364194869995117,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 0.8722466960352423,
|
|
"grad_norm": 61.273799896240234,
|
|
"learning_rate": 2.5076932269588708e-08,
|
|
"logits/chosen": -0.6247504353523254,
|
|
"logits/rejected": -0.5774896740913391,
|
|
"logps/chosen": -76.23826599121094,
|
|
"logps/ref_chosen": -54.93488693237305,
|
|
"logps/ref_rejected": -86.09967041015625,
|
|
"logps/rejected": -147.30072021484375,
|
|
"loss": 0.4983,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14729663729667664,
|
|
"margin_dpo/beta_margin_grad_std": 0.21982048451900482,
|
|
"margin_dpo/beta_margin_mean": 3.9897680282592773,
|
|
"margin_dpo/beta_margin_std": 3.5569002628326416,
|
|
"margin_dpo/loss_margin_mean": 39.89767837524414,
|
|
"margin_dpo/margin_mean": 39.897682189941406,
|
|
"margin_dpo/margin_std": 34.09754180908203,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 0.8737151248164464,
|
|
"grad_norm": 43.19758987426758,
|
|
"learning_rate": 2.451969280180849e-08,
|
|
"logits/chosen": -0.5981707572937012,
|
|
"logits/rejected": -0.5647690296173096,
|
|
"logps/chosen": -72.47997283935547,
|
|
"logps/ref_chosen": -49.42041778564453,
|
|
"logps/ref_rejected": -80.62731170654297,
|
|
"logps/rejected": -135.92117309570312,
|
|
"loss": 0.3785,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14596061408519745,
|
|
"margin_dpo/beta_margin_grad_std": 0.18515609204769135,
|
|
"margin_dpo/beta_margin_mean": 3.223430871963501,
|
|
"margin_dpo/beta_margin_std": 2.712632417678833,
|
|
"margin_dpo/loss_margin_mean": 32.23430633544922,
|
|
"margin_dpo/margin_mean": 32.23430633544922,
|
|
"margin_dpo/margin_std": 26.91950225830078,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.8751835535976505,
|
|
"grad_norm": 62.67118453979492,
|
|
"learning_rate": 2.396839494982103e-08,
|
|
"logits/chosen": -0.5990803241729736,
|
|
"logits/rejected": -0.5536556243896484,
|
|
"logps/chosen": -81.62798309326172,
|
|
"logps/ref_chosen": -59.791683197021484,
|
|
"logps/ref_rejected": -80.09111785888672,
|
|
"logps/rejected": -136.9227752685547,
|
|
"loss": 0.4779,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16171905398368835,
|
|
"margin_dpo/beta_margin_grad_std": 0.2228172868490219,
|
|
"margin_dpo/beta_margin_mean": 3.4995357990264893,
|
|
"margin_dpo/beta_margin_std": 3.0359036922454834,
|
|
"margin_dpo/loss_margin_mean": 34.995357513427734,
|
|
"margin_dpo/margin_mean": 34.995357513427734,
|
|
"margin_dpo/margin_std": 29.87460708618164,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 0.8766519823788547,
|
|
"grad_norm": 60.41211700439453,
|
|
"learning_rate": 2.3423053240837514e-08,
|
|
"logits/chosen": -0.5785458087921143,
|
|
"logits/rejected": -0.5741050243377686,
|
|
"logps/chosen": -79.68421936035156,
|
|
"logps/ref_chosen": -57.26078796386719,
|
|
"logps/ref_rejected": -100.6937255859375,
|
|
"logps/rejected": -158.439453125,
|
|
"loss": 0.5269,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1700301468372345,
|
|
"margin_dpo/beta_margin_grad_std": 0.240619495511055,
|
|
"margin_dpo/beta_margin_mean": 3.532228708267212,
|
|
"margin_dpo/beta_margin_std": 3.1880156993865967,
|
|
"margin_dpo/loss_margin_mean": 35.32228469848633,
|
|
"margin_dpo/margin_mean": 35.32228088378906,
|
|
"margin_dpo/margin_std": 31.730697631835938,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 0.8781204111600588,
|
|
"grad_norm": 62.684593200683594,
|
|
"learning_rate": 2.2883682045119062e-08,
|
|
"logits/chosen": -0.6382852792739868,
|
|
"logits/rejected": -0.6104525923728943,
|
|
"logps/chosen": -75.68168640136719,
|
|
"logps/ref_chosen": -52.51850509643555,
|
|
"logps/ref_rejected": -89.44385528564453,
|
|
"logps/rejected": -145.06332397460938,
|
|
"loss": 0.4532,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14109815657138824,
|
|
"margin_dpo/beta_margin_grad_std": 0.21213975548744202,
|
|
"margin_dpo/beta_margin_mean": 3.2456300258636475,
|
|
"margin_dpo/beta_margin_std": 2.523146629333496,
|
|
"margin_dpo/loss_margin_mean": 32.456298828125,
|
|
"margin_dpo/margin_mean": 32.456298828125,
|
|
"margin_dpo/margin_std": 24.71021270751953,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 0.8795888399412628,
|
|
"grad_norm": 58.74440383911133,
|
|
"learning_rate": 2.2350295575598367e-08,
|
|
"logits/chosen": -0.5937786102294922,
|
|
"logits/rejected": -0.5736675262451172,
|
|
"logps/chosen": -71.58798217773438,
|
|
"logps/ref_chosen": -49.802677154541016,
|
|
"logps/ref_rejected": -82.978515625,
|
|
"logps/rejected": -137.42575073242188,
|
|
"loss": 0.4549,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15846121311187744,
|
|
"margin_dpo/beta_margin_grad_std": 0.22350937128067017,
|
|
"margin_dpo/beta_margin_mean": 3.266195058822632,
|
|
"margin_dpo/beta_margin_std": 2.5800249576568604,
|
|
"margin_dpo/loss_margin_mean": 32.661949157714844,
|
|
"margin_dpo/margin_mean": 32.661949157714844,
|
|
"margin_dpo/margin_std": 25.785709381103516,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 0.8810572687224669,
|
|
"grad_norm": 71.30066680908203,
|
|
"learning_rate": 2.1822907887504932e-08,
|
|
"logits/chosen": -0.663406252861023,
|
|
"logits/rejected": -0.6377668380737305,
|
|
"logps/chosen": -87.9482421875,
|
|
"logps/ref_chosen": -66.43487548828125,
|
|
"logps/ref_rejected": -85.45649719238281,
|
|
"logps/rejected": -137.04623413085938,
|
|
"loss": 0.4868,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15097591280937195,
|
|
"margin_dpo/beta_margin_grad_std": 0.21298004686832428,
|
|
"margin_dpo/beta_margin_mean": 3.007636785507202,
|
|
"margin_dpo/beta_margin_std": 2.5406298637390137,
|
|
"margin_dpo/loss_margin_mean": 30.07636833190918,
|
|
"margin_dpo/margin_mean": 30.076370239257812,
|
|
"margin_dpo/margin_std": 25.37958526611328,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.8810572687224669,
|
|
"eval_logits/chosen": -0.6200472712516785,
|
|
"eval_logits/rejected": -0.5939710140228271,
|
|
"eval_logps/chosen": -105.88009643554688,
|
|
"eval_logps/ref_chosen": -79.05104064941406,
|
|
"eval_logps/ref_rejected": -86.79793548583984,
|
|
"eval_logps/rejected": -135.36648559570312,
|
|
"eval_loss": 0.4055388867855072,
|
|
"eval_margin_dpo/beta": 0.10000000149011612,
|
|
"eval_margin_dpo/beta_margin_grad_mean": -0.2572847008705139,
|
|
"eval_margin_dpo/beta_margin_grad_std": 0.2540939152240753,
|
|
"eval_margin_dpo/beta_margin_mean": 2.173950672149658,
|
|
"eval_margin_dpo/beta_margin_std": 2.6342239379882812,
|
|
"eval_margin_dpo/loss_margin_mean": 21.739503860473633,
|
|
"eval_margin_dpo/margin_mean": 21.739503860473633,
|
|
"eval_margin_dpo/margin_std": 26.342239379882812,
|
|
"eval_runtime": 40.1711,
|
|
"eval_samples_per_second": 58.226,
|
|
"eval_steps_per_second": 1.842,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.882525697503671,
|
|
"grad_norm": 82.41748046875,
|
|
"learning_rate": 2.1301532877994742e-08,
|
|
"logits/chosen": -0.6234362125396729,
|
|
"logits/rejected": -0.595230758190155,
|
|
"logps/chosen": -85.03898620605469,
|
|
"logps/ref_chosen": -59.13360595703125,
|
|
"logps/ref_rejected": -94.69093322753906,
|
|
"logps/rejected": -154.72296142578125,
|
|
"loss": 0.5251,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15890392661094666,
|
|
"margin_dpo/beta_margin_grad_std": 0.24525830149650574,
|
|
"margin_dpo/beta_margin_mean": 3.412665843963623,
|
|
"margin_dpo/beta_margin_std": 2.932499885559082,
|
|
"margin_dpo/loss_margin_mean": 34.12665939331055,
|
|
"margin_dpo/margin_mean": 34.12665939331055,
|
|
"margin_dpo/margin_std": 28.886859893798828,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 0.8839941262848752,
|
|
"grad_norm": 68.01284790039062,
|
|
"learning_rate": 2.0786184285784298e-08,
|
|
"logits/chosen": -0.6261130571365356,
|
|
"logits/rejected": -0.626197338104248,
|
|
"logps/chosen": -66.78749084472656,
|
|
"logps/ref_chosen": -48.59352111816406,
|
|
"logps/ref_rejected": -87.6685562133789,
|
|
"logps/rejected": -143.57113647460938,
|
|
"loss": 0.3531,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12587696313858032,
|
|
"margin_dpo/beta_margin_grad_std": 0.19796113669872284,
|
|
"margin_dpo/beta_margin_mean": 3.770860195159912,
|
|
"margin_dpo/beta_margin_std": 2.8099164962768555,
|
|
"margin_dpo/loss_margin_mean": 37.70860290527344,
|
|
"margin_dpo/margin_mean": 37.70860290527344,
|
|
"margin_dpo/margin_std": 27.594802856445312,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 0.8854625550660793,
|
|
"grad_norm": 65.49505615234375,
|
|
"learning_rate": 2.0276875690788204e-08,
|
|
"logits/chosen": -0.637772262096405,
|
|
"logits/rejected": -0.5984662175178528,
|
|
"logps/chosen": -90.7020263671875,
|
|
"logps/ref_chosen": -70.41461944580078,
|
|
"logps/ref_rejected": -100.32560729980469,
|
|
"logps/rejected": -152.65615844726562,
|
|
"loss": 0.4719,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1628403216600418,
|
|
"margin_dpo/beta_margin_grad_std": 0.2238980382680893,
|
|
"margin_dpo/beta_margin_mean": 3.2043137550354004,
|
|
"margin_dpo/beta_margin_std": 2.628369092941284,
|
|
"margin_dpo/loss_margin_mean": 32.04313659667969,
|
|
"margin_dpo/margin_mean": 32.04313659667969,
|
|
"margin_dpo/margin_std": 26.230998992919922,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 0.8869309838472834,
|
|
"grad_norm": 64.45735931396484,
|
|
"learning_rate": 1.977362051376158e-08,
|
|
"logits/chosen": -0.5643373727798462,
|
|
"logits/rejected": -0.5535662770271301,
|
|
"logps/chosen": -65.24546813964844,
|
|
"logps/ref_chosen": -46.45808029174805,
|
|
"logps/ref_rejected": -91.8544921875,
|
|
"logps/rejected": -146.03567504882812,
|
|
"loss": 0.4607,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14535552263259888,
|
|
"margin_dpo/beta_margin_grad_std": 0.2219676375389099,
|
|
"margin_dpo/beta_margin_mean": 3.5393803119659424,
|
|
"margin_dpo/beta_margin_std": 2.988723039627075,
|
|
"margin_dpo/loss_margin_mean": 35.393802642822266,
|
|
"margin_dpo/margin_mean": 35.393802642822266,
|
|
"margin_dpo/margin_std": 29.511133193969727,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 0.8883994126284875,
|
|
"grad_norm": 62.30309295654297,
|
|
"learning_rate": 1.9276432015946446e-08,
|
|
"logits/chosen": -0.6186962127685547,
|
|
"logits/rejected": -0.603484034538269,
|
|
"logps/chosen": -90.84162139892578,
|
|
"logps/ref_chosen": -66.24933624267578,
|
|
"logps/ref_rejected": -102.30496978759766,
|
|
"logps/rejected": -158.584228515625,
|
|
"loss": 0.4569,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1487942636013031,
|
|
"margin_dpo/beta_margin_grad_std": 0.1991681158542633,
|
|
"margin_dpo/beta_margin_mean": 3.168696165084839,
|
|
"margin_dpo/beta_margin_std": 3.0381813049316406,
|
|
"margin_dpo/loss_margin_mean": 31.686960220336914,
|
|
"margin_dpo/margin_mean": 31.68695831298828,
|
|
"margin_dpo/margin_std": 29.331512451171875,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 0.8898678414096917,
|
|
"grad_norm": 44.491546630859375,
|
|
"learning_rate": 1.8785323298722093e-08,
|
|
"logits/chosen": -0.5961008071899414,
|
|
"logits/rejected": -0.564789354801178,
|
|
"logps/chosen": -76.80615234375,
|
|
"logps/ref_chosen": -54.819122314453125,
|
|
"logps/ref_rejected": -98.37147521972656,
|
|
"logps/rejected": -157.0362548828125,
|
|
"loss": 0.2954,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12030400335788727,
|
|
"margin_dpo/beta_margin_grad_std": 0.1559758484363556,
|
|
"margin_dpo/beta_margin_mean": 3.6677749156951904,
|
|
"margin_dpo/beta_margin_std": 2.649290084838867,
|
|
"margin_dpo/loss_margin_mean": 36.67774963378906,
|
|
"margin_dpo/margin_mean": 36.6777458190918,
|
|
"margin_dpo/margin_std": 25.4649658203125,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 0.8913362701908958,
|
|
"grad_norm": 50.320865631103516,
|
|
"learning_rate": 1.8300307303259904e-08,
|
|
"logits/chosen": -0.5963802337646484,
|
|
"logits/rejected": -0.5623406171798706,
|
|
"logps/chosen": -79.11578369140625,
|
|
"logps/ref_chosen": -58.08403778076172,
|
|
"logps/ref_rejected": -79.777099609375,
|
|
"logps/rejected": -133.24951171875,
|
|
"loss": 0.3387,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13021884858608246,
|
|
"margin_dpo/beta_margin_grad_std": 0.17474402487277985,
|
|
"margin_dpo/beta_margin_mean": 3.244065999984741,
|
|
"margin_dpo/beta_margin_std": 2.3698348999023438,
|
|
"margin_dpo/loss_margin_mean": 32.44065856933594,
|
|
"margin_dpo/margin_mean": 32.44065856933594,
|
|
"margin_dpo/margin_std": 23.566665649414062,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 0.8928046989720999,
|
|
"grad_norm": 58.68054962158203,
|
|
"learning_rate": 1.7821396810182437e-08,
|
|
"logits/chosen": -0.6197670698165894,
|
|
"logits/rejected": -0.5876868963241577,
|
|
"logps/chosen": -78.17123413085938,
|
|
"logps/ref_chosen": -57.450836181640625,
|
|
"logps/ref_rejected": -94.77339172363281,
|
|
"logps/rejected": -148.73159790039062,
|
|
"loss": 0.4835,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15202751755714417,
|
|
"margin_dpo/beta_margin_grad_std": 0.22783887386322021,
|
|
"margin_dpo/beta_margin_mean": 3.323781728744507,
|
|
"margin_dpo/beta_margin_std": 2.665648937225342,
|
|
"margin_dpo/loss_margin_mean": 33.237815856933594,
|
|
"margin_dpo/margin_mean": 33.237815856933594,
|
|
"margin_dpo/margin_std": 26.23067855834961,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 0.8942731277533039,
|
|
"grad_norm": 64.48681640625,
|
|
"learning_rate": 1.7348604439226617e-08,
|
|
"logits/chosen": -0.642276406288147,
|
|
"logits/rejected": -0.6062139272689819,
|
|
"logps/chosen": -81.96639251708984,
|
|
"logps/ref_chosen": -58.805355072021484,
|
|
"logps/ref_rejected": -88.81600952148438,
|
|
"logps/rejected": -145.61566162109375,
|
|
"loss": 0.3479,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12766654789447784,
|
|
"margin_dpo/beta_margin_grad_std": 0.18546564877033234,
|
|
"margin_dpo/beta_margin_mean": 3.3638622760772705,
|
|
"margin_dpo/beta_margin_std": 2.390188694000244,
|
|
"margin_dpo/loss_margin_mean": 33.63862228393555,
|
|
"margin_dpo/margin_mean": 33.63862228393555,
|
|
"margin_dpo/margin_std": 23.823345184326172,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 0.895741556534508,
|
|
"grad_norm": 74.75220489501953,
|
|
"learning_rate": 1.6881942648911074e-08,
|
|
"logits/chosen": -0.6093329191207886,
|
|
"logits/rejected": -0.5498037934303284,
|
|
"logps/chosen": -90.24623107910156,
|
|
"logps/ref_chosen": -65.69503784179688,
|
|
"logps/ref_rejected": -83.4053955078125,
|
|
"logps/rejected": -140.6365966796875,
|
|
"loss": 0.4533,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15364539623260498,
|
|
"margin_dpo/beta_margin_grad_std": 0.21927115321159363,
|
|
"margin_dpo/beta_margin_mean": 3.2680017948150635,
|
|
"margin_dpo/beta_margin_std": 2.5850718021392822,
|
|
"margin_dpo/loss_margin_mean": 32.680015563964844,
|
|
"margin_dpo/margin_mean": 32.680015563964844,
|
|
"margin_dpo/margin_std": 24.990657806396484,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.8972099853157122,
|
|
"grad_norm": 52.49784851074219,
|
|
"learning_rate": 1.6421423736208e-08,
|
|
"logits/chosen": -0.6183408498764038,
|
|
"logits/rejected": -0.580921471118927,
|
|
"logps/chosen": -74.63272094726562,
|
|
"logps/ref_chosen": -52.59947204589844,
|
|
"logps/ref_rejected": -86.33099365234375,
|
|
"logps/rejected": -144.24195861816406,
|
|
"loss": 0.3916,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1446484923362732,
|
|
"margin_dpo/beta_margin_grad_std": 0.1988787204027176,
|
|
"margin_dpo/beta_margin_mean": 3.587771415710449,
|
|
"margin_dpo/beta_margin_std": 2.8050875663757324,
|
|
"margin_dpo/loss_margin_mean": 35.877716064453125,
|
|
"margin_dpo/margin_mean": 35.877716064453125,
|
|
"margin_dpo/margin_std": 27.9959774017334,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 0.8986784140969163,
|
|
"grad_norm": 44.11368179321289,
|
|
"learning_rate": 1.5967059836219042e-08,
|
|
"logits/chosen": -0.6275376081466675,
|
|
"logits/rejected": -0.5670713782310486,
|
|
"logps/chosen": -80.20808410644531,
|
|
"logps/ref_chosen": -59.32372283935547,
|
|
"logps/ref_rejected": -88.31239318847656,
|
|
"logps/rejected": -150.14288330078125,
|
|
"loss": 0.2722,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.10046197474002838,
|
|
"margin_dpo/beta_margin_grad_std": 0.16786958277225494,
|
|
"margin_dpo/beta_margin_mean": 4.094614028930664,
|
|
"margin_dpo/beta_margin_std": 2.759153127670288,
|
|
"margin_dpo/loss_margin_mean": 40.94614028930664,
|
|
"margin_dpo/margin_mean": 40.94614028930664,
|
|
"margin_dpo/margin_std": 27.57101058959961,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 0.9001468428781204,
|
|
"grad_norm": 51.012901306152344,
|
|
"learning_rate": 1.551886292185553e-08,
|
|
"logits/chosen": -0.6327238082885742,
|
|
"logits/rejected": -0.6284672021865845,
|
|
"logps/chosen": -80.63017272949219,
|
|
"logps/ref_chosen": -59.72996520996094,
|
|
"logps/ref_rejected": -105.10753631591797,
|
|
"logps/rejected": -161.78628540039062,
|
|
"loss": 0.3659,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13130351901054382,
|
|
"margin_dpo/beta_margin_grad_std": 0.2031707614660263,
|
|
"margin_dpo/beta_margin_mean": 3.5778555870056152,
|
|
"margin_dpo/beta_margin_std": 2.7456395626068115,
|
|
"margin_dpo/loss_margin_mean": 35.77855682373047,
|
|
"margin_dpo/margin_mean": 35.77855682373047,
|
|
"margin_dpo/margin_std": 27.143339157104492,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 0.9016152716593245,
|
|
"grad_norm": 48.471588134765625,
|
|
"learning_rate": 1.507684480352292e-08,
|
|
"logits/chosen": -0.5755459070205688,
|
|
"logits/rejected": -0.5698869824409485,
|
|
"logps/chosen": -76.522705078125,
|
|
"logps/ref_chosen": -52.93898010253906,
|
|
"logps/ref_rejected": -104.67938232421875,
|
|
"logps/rejected": -164.02252197265625,
|
|
"loss": 0.3031,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.11619433760643005,
|
|
"margin_dpo/beta_margin_grad_std": 0.1717434674501419,
|
|
"margin_dpo/beta_margin_mean": 3.5759410858154297,
|
|
"margin_dpo/beta_margin_std": 2.556734800338745,
|
|
"margin_dpo/loss_margin_mean": 35.7594108581543,
|
|
"margin_dpo/margin_mean": 35.7594108581543,
|
|
"margin_dpo/margin_std": 25.549396514892578,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 0.9030837004405287,
|
|
"grad_norm": 41.79697799682617,
|
|
"learning_rate": 1.4641017128809801e-08,
|
|
"logits/chosen": -0.5839822292327881,
|
|
"logits/rejected": -0.5518302917480469,
|
|
"logps/chosen": -86.97941589355469,
|
|
"logps/ref_chosen": -65.81727600097656,
|
|
"logps/ref_rejected": -95.17749786376953,
|
|
"logps/rejected": -146.57379150390625,
|
|
"loss": 0.4035,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15672753751277924,
|
|
"margin_dpo/beta_margin_grad_std": 0.1847524344921112,
|
|
"margin_dpo/beta_margin_mean": 3.0234153270721436,
|
|
"margin_dpo/beta_margin_std": 2.3445184230804443,
|
|
"margin_dpo/loss_margin_mean": 30.234153747558594,
|
|
"margin_dpo/margin_mean": 30.23415184020996,
|
|
"margin_dpo/margin_std": 22.966224670410156,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 0.9045521292217328,
|
|
"grad_norm": 72.02803039550781,
|
|
"learning_rate": 1.4211391382180637e-08,
|
|
"logits/chosen": -0.613810122013092,
|
|
"logits/rejected": -0.5613222122192383,
|
|
"logps/chosen": -88.5474853515625,
|
|
"logps/ref_chosen": -65.13285827636719,
|
|
"logps/ref_rejected": -74.70050048828125,
|
|
"logps/rejected": -130.7073516845703,
|
|
"loss": 0.5016,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16017135977745056,
|
|
"margin_dpo/beta_margin_grad_std": 0.230714350938797,
|
|
"margin_dpo/beta_margin_mean": 3.2592217922210693,
|
|
"margin_dpo/beta_margin_std": 3.033946990966797,
|
|
"margin_dpo/loss_margin_mean": 32.59221649169922,
|
|
"margin_dpo/margin_mean": 32.59221649169922,
|
|
"margin_dpo/margin_std": 29.501014709472656,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 0.9060205580029369,
|
|
"grad_norm": 54.11730194091797,
|
|
"learning_rate": 1.378797888467345e-08,
|
|
"logits/chosen": -0.5736366510391235,
|
|
"logits/rejected": -0.5296716094017029,
|
|
"logps/chosen": -87.65239715576172,
|
|
"logps/ref_chosen": -63.005550384521484,
|
|
"logps/ref_rejected": -64.234130859375,
|
|
"logps/rejected": -118.85501098632812,
|
|
"loss": 0.3826,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14828212559223175,
|
|
"margin_dpo/beta_margin_grad_std": 0.17800341546535492,
|
|
"margin_dpo/beta_margin_mean": 2.9974029064178467,
|
|
"margin_dpo/beta_margin_std": 2.35481333732605,
|
|
"margin_dpo/loss_margin_mean": 29.974029541015625,
|
|
"margin_dpo/margin_mean": 29.974029541015625,
|
|
"margin_dpo/margin_std": 23.434463500976562,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 0.9074889867841409,
|
|
"grad_norm": 67.99271392822266,
|
|
"learning_rate": 1.3370790793601371e-08,
|
|
"logits/chosen": -0.6390504837036133,
|
|
"logits/rejected": -0.610953152179718,
|
|
"logps/chosen": -90.81625366210938,
|
|
"logps/ref_chosen": -67.10135650634766,
|
|
"logps/ref_rejected": -92.15339660644531,
|
|
"logps/rejected": -146.72813415527344,
|
|
"loss": 0.4624,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16537515819072723,
|
|
"margin_dpo/beta_margin_grad_std": 0.21399806439876556,
|
|
"margin_dpo/beta_margin_mean": 3.085983991622925,
|
|
"margin_dpo/beta_margin_std": 2.6704392433166504,
|
|
"margin_dpo/loss_margin_mean": 30.859838485717773,
|
|
"margin_dpo/margin_mean": 30.859840393066406,
|
|
"margin_dpo/margin_std": 26.370765686035156,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 0.908957415565345,
|
|
"grad_norm": 55.240272521972656,
|
|
"learning_rate": 1.2959838102258535e-08,
|
|
"logits/chosen": -0.5689994096755981,
|
|
"logits/rejected": -0.5356103777885437,
|
|
"logps/chosen": -79.01873779296875,
|
|
"logps/ref_chosen": -55.978233337402344,
|
|
"logps/ref_rejected": -93.1854019165039,
|
|
"logps/rejected": -149.14964294433594,
|
|
"loss": 0.4702,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16712483763694763,
|
|
"margin_dpo/beta_margin_grad_std": 0.21881355345249176,
|
|
"margin_dpo/beta_margin_mean": 3.2923738956451416,
|
|
"margin_dpo/beta_margin_std": 3.01572847366333,
|
|
"margin_dpo/loss_margin_mean": 32.92374038696289,
|
|
"margin_dpo/margin_mean": 32.92374038696289,
|
|
"margin_dpo/margin_std": 29.776756286621094,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 0.9104258443465492,
|
|
"grad_norm": 34.842933654785156,
|
|
"learning_rate": 1.2555131639630567e-08,
|
|
"logits/chosen": -0.6340548396110535,
|
|
"logits/rejected": -0.5965070724487305,
|
|
"logps/chosen": -79.86566162109375,
|
|
"logps/ref_chosen": -59.79750061035156,
|
|
"logps/ref_rejected": -78.41075134277344,
|
|
"logps/rejected": -134.0952911376953,
|
|
"loss": 0.2579,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.10915657132863998,
|
|
"margin_dpo/beta_margin_grad_std": 0.13496600091457367,
|
|
"margin_dpo/beta_margin_mean": 3.561638355255127,
|
|
"margin_dpo/beta_margin_std": 2.65966534614563,
|
|
"margin_dpo/loss_margin_mean": 35.61638259887695,
|
|
"margin_dpo/margin_mean": 35.61638259887695,
|
|
"margin_dpo/margin_std": 25.934829711914062,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.9118942731277533,
|
|
"grad_norm": 40.03609848022461,
|
|
"learning_rate": 1.2156682070109086e-08,
|
|
"logits/chosen": -0.6073925495147705,
|
|
"logits/rejected": -0.5800847411155701,
|
|
"logps/chosen": -72.59913635253906,
|
|
"logps/ref_chosen": -53.933753967285156,
|
|
"logps/ref_rejected": -88.36952209472656,
|
|
"logps/rejected": -143.29660034179688,
|
|
"loss": 0.3092,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.10899462550878525,
|
|
"margin_dpo/beta_margin_grad_std": 0.1772289127111435,
|
|
"margin_dpo/beta_margin_mean": 3.6261699199676514,
|
|
"margin_dpo/beta_margin_std": 2.691709041595459,
|
|
"margin_dpo/loss_margin_mean": 36.26169967651367,
|
|
"margin_dpo/margin_mean": 36.26169967651367,
|
|
"margin_dpo/margin_std": 26.822023391723633,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 0.9133627019089574,
|
|
"grad_norm": 48.39630889892578,
|
|
"learning_rate": 1.1764499893210878e-08,
|
|
"logits/chosen": -0.5869364142417908,
|
|
"logits/rejected": -0.5320132970809937,
|
|
"logps/chosen": -82.65914916992188,
|
|
"logps/ref_chosen": -60.28582000732422,
|
|
"logps/ref_rejected": -85.51873779296875,
|
|
"logps/rejected": -144.71177673339844,
|
|
"loss": 0.3854,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1378306895494461,
|
|
"margin_dpo/beta_margin_grad_std": 0.1983867883682251,
|
|
"margin_dpo/beta_margin_mean": 3.6819705963134766,
|
|
"margin_dpo/beta_margin_std": 2.8990466594696045,
|
|
"margin_dpo/loss_margin_mean": 36.819705963134766,
|
|
"margin_dpo/margin_mean": 36.819705963134766,
|
|
"margin_dpo/margin_std": 28.56911277770996,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 0.9148311306901615,
|
|
"grad_norm": 73.08351135253906,
|
|
"learning_rate": 1.1378595443300998e-08,
|
|
"logits/chosen": -0.6507315635681152,
|
|
"logits/rejected": -0.6161798238754272,
|
|
"logps/chosen": -88.72175598144531,
|
|
"logps/ref_chosen": -64.15696716308594,
|
|
"logps/ref_rejected": -85.08304595947266,
|
|
"logps/rejected": -140.02056884765625,
|
|
"loss": 0.5541,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18612176179885864,
|
|
"margin_dpo/beta_margin_grad_std": 0.23363880813121796,
|
|
"margin_dpo/beta_margin_mean": 3.037273645401001,
|
|
"margin_dpo/beta_margin_std": 2.930415630340576,
|
|
"margin_dpo/loss_margin_mean": 30.37273597717285,
|
|
"margin_dpo/margin_mean": 30.37273406982422,
|
|
"margin_dpo/margin_std": 28.88761329650879,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 0.9162995594713657,
|
|
"grad_norm": 71.18040466308594,
|
|
"learning_rate": 1.0998978889320582e-08,
|
|
"logits/chosen": -0.6796859502792358,
|
|
"logits/rejected": -0.6095322966575623,
|
|
"logps/chosen": -94.83811950683594,
|
|
"logps/ref_chosen": -71.91862487792969,
|
|
"logps/ref_rejected": -97.13203430175781,
|
|
"logps/rejected": -157.37045288085938,
|
|
"loss": 0.4965,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14844343066215515,
|
|
"margin_dpo/beta_margin_grad_std": 0.25005990266799927,
|
|
"margin_dpo/beta_margin_mean": 3.7318942546844482,
|
|
"margin_dpo/beta_margin_std": 2.772662878036499,
|
|
"margin_dpo/loss_margin_mean": 37.31894302368164,
|
|
"margin_dpo/margin_mean": 37.318939208984375,
|
|
"margin_dpo/margin_std": 27.622631072998047,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 0.9177679882525698,
|
|
"grad_norm": 49.115333557128906,
|
|
"learning_rate": 1.0625660234518913e-08,
|
|
"logits/chosen": -0.59247887134552,
|
|
"logits/rejected": -0.5529348850250244,
|
|
"logps/chosen": -81.66363525390625,
|
|
"logps/ref_chosen": -58.342071533203125,
|
|
"logps/ref_rejected": -86.09038543701172,
|
|
"logps/rejected": -144.93325805664062,
|
|
"loss": 0.3591,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13468137383460999,
|
|
"margin_dpo/beta_margin_grad_std": 0.18700142204761505,
|
|
"margin_dpo/beta_margin_mean": 3.552130937576294,
|
|
"margin_dpo/beta_margin_std": 2.8531718254089355,
|
|
"margin_dpo/loss_margin_mean": 35.52130889892578,
|
|
"margin_dpo/margin_mean": 35.52130889892578,
|
|
"margin_dpo/margin_std": 28.529512405395508,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.9192364170337739,
|
|
"grad_norm": 63.880088806152344,
|
|
"learning_rate": 1.0258649316189721e-08,
|
|
"logits/chosen": -0.5680443644523621,
|
|
"logits/rejected": -0.5336043834686279,
|
|
"logps/chosen": -98.9983139038086,
|
|
"logps/ref_chosen": -75.11260986328125,
|
|
"logps/ref_rejected": -99.18872833251953,
|
|
"logps/rejected": -153.16671752929688,
|
|
"loss": 0.5253,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18730950355529785,
|
|
"margin_dpo/beta_margin_grad_std": 0.21887990832328796,
|
|
"margin_dpo/beta_margin_mean": 3.009228467941284,
|
|
"margin_dpo/beta_margin_std": 2.89654541015625,
|
|
"margin_dpo/loss_margin_mean": 30.09228515625,
|
|
"margin_dpo/margin_mean": 30.09228515625,
|
|
"margin_dpo/margin_std": 28.44098472595215,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 0.920704845814978,
|
|
"grad_norm": 78.66019439697266,
|
|
"learning_rate": 9.897955805412e-09,
|
|
"logits/chosen": -0.579108476638794,
|
|
"logits/rejected": -0.587154746055603,
|
|
"logps/chosen": -69.19841003417969,
|
|
"logps/ref_chosen": -47.74314880371094,
|
|
"logps/ref_rejected": -106.75448608398438,
|
|
"logps/rejected": -162.16537475585938,
|
|
"loss": 0.6048,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18939301371574402,
|
|
"margin_dpo/beta_margin_grad_std": 0.2481917440891266,
|
|
"margin_dpo/beta_margin_mean": 3.3955633640289307,
|
|
"margin_dpo/beta_margin_std": 3.4090704917907715,
|
|
"margin_dpo/loss_margin_mean": 33.95563507080078,
|
|
"margin_dpo/margin_mean": 33.95563507080078,
|
|
"margin_dpo/margin_std": 34.049468994140625,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 0.922173274596182,
|
|
"grad_norm": 41.49999237060547,
|
|
"learning_rate": 9.543589206795238e-09,
|
|
"logits/chosen": -0.6199311017990112,
|
|
"logits/rejected": -0.6005183458328247,
|
|
"logps/chosen": -82.25130462646484,
|
|
"logps/ref_chosen": -60.182945251464844,
|
|
"logps/ref_rejected": -101.55467224121094,
|
|
"logps/rejected": -159.23948669433594,
|
|
"loss": 0.3001,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12019230425357819,
|
|
"margin_dpo/beta_margin_grad_std": 0.15883654356002808,
|
|
"margin_dpo/beta_margin_mean": 3.5616455078125,
|
|
"margin_dpo/beta_margin_std": 2.615797758102417,
|
|
"margin_dpo/loss_margin_mean": 35.616455078125,
|
|
"margin_dpo/margin_mean": 35.616455078125,
|
|
"margin_dpo/margin_std": 25.80486297607422,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 0.9236417033773862,
|
|
"grad_norm": 62.908477783203125,
|
|
"learning_rate": 9.19555885822887e-09,
|
|
"logits/chosen": -0.6567898392677307,
|
|
"logits/rejected": -0.6142420768737793,
|
|
"logps/chosen": -86.42594909667969,
|
|
"logps/ref_chosen": -64.21353912353516,
|
|
"logps/ref_rejected": -91.65367126464844,
|
|
"logps/rejected": -145.6768798828125,
|
|
"loss": 0.4054,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1407935917377472,
|
|
"margin_dpo/beta_margin_grad_std": 0.19307489693164825,
|
|
"margin_dpo/beta_margin_mean": 3.1810803413391113,
|
|
"margin_dpo/beta_margin_std": 2.5537304878234863,
|
|
"margin_dpo/loss_margin_mean": 31.81080436706543,
|
|
"margin_dpo/margin_mean": 31.810806274414062,
|
|
"margin_dpo/margin_std": 25.02639389038086,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 0.9251101321585903,
|
|
"grad_norm": 60.66549301147461,
|
|
"learning_rate": 8.85387393063622e-09,
|
|
"logits/chosen": -0.6706698536872864,
|
|
"logits/rejected": -0.6243743896484375,
|
|
"logps/chosen": -79.63174438476562,
|
|
"logps/ref_chosen": -59.29100036621094,
|
|
"logps/ref_rejected": -83.59829711914062,
|
|
"logps/rejected": -134.34188842773438,
|
|
"loss": 0.461,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1645456999540329,
|
|
"margin_dpo/beta_margin_grad_std": 0.20726469159126282,
|
|
"margin_dpo/beta_margin_mean": 3.0402843952178955,
|
|
"margin_dpo/beta_margin_std": 2.556612014770508,
|
|
"margin_dpo/loss_margin_mean": 30.402841567993164,
|
|
"margin_dpo/margin_mean": 30.402843475341797,
|
|
"margin_dpo/margin_std": 25.565155029296875,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.9265785609397944,
|
|
"grad_norm": 94.32537078857422,
|
|
"learning_rate": 8.518543427732949e-09,
|
|
"logits/chosen": -0.6151013374328613,
|
|
"logits/rejected": -0.5717021822929382,
|
|
"logps/chosen": -83.84978485107422,
|
|
"logps/ref_chosen": -59.45360565185547,
|
|
"logps/ref_rejected": -80.95157623291016,
|
|
"logps/rejected": -133.63461303710938,
|
|
"loss": 0.7356,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1976294070482254,
|
|
"margin_dpo/beta_margin_grad_std": 0.26410892605781555,
|
|
"margin_dpo/beta_margin_mean": 2.8286869525909424,
|
|
"margin_dpo/beta_margin_std": 2.959045886993408,
|
|
"margin_dpo/loss_margin_mean": 28.286867141723633,
|
|
"margin_dpo/margin_mean": 28.286869049072266,
|
|
"margin_dpo/margin_std": 29.41876220703125,
|
|
"step": 631
|
|
},
|
|
{
|
|
"epoch": 0.9280469897209985,
|
|
"grad_norm": 86.42517852783203,
|
|
"learning_rate": 8.189576185789637e-09,
|
|
"logits/chosen": -0.619070291519165,
|
|
"logits/rejected": -0.5838553309440613,
|
|
"logps/chosen": -85.71180725097656,
|
|
"logps/ref_chosen": -61.35155487060547,
|
|
"logps/ref_rejected": -86.16017150878906,
|
|
"logps/rejected": -143.08697509765625,
|
|
"loss": 0.7093,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16710862517356873,
|
|
"margin_dpo/beta_margin_grad_std": 0.26828470826148987,
|
|
"margin_dpo/beta_margin_mean": 3.256655216217041,
|
|
"margin_dpo/beta_margin_std": 3.002157211303711,
|
|
"margin_dpo/loss_margin_mean": 32.566551208496094,
|
|
"margin_dpo/margin_mean": 32.566551208496094,
|
|
"margin_dpo/margin_std": 29.249189376831055,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 0.9295154185022027,
|
|
"grad_norm": 60.10581970214844,
|
|
"learning_rate": 7.866980873399015e-09,
|
|
"logits/chosen": -0.6361432075500488,
|
|
"logits/rejected": -0.6225095987319946,
|
|
"logps/chosen": -80.6368408203125,
|
|
"logps/ref_chosen": -57.278167724609375,
|
|
"logps/ref_rejected": -91.58395385742188,
|
|
"logps/rejected": -142.36219787597656,
|
|
"loss": 0.5499,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.19326050579547882,
|
|
"margin_dpo/beta_margin_grad_std": 0.22396619617938995,
|
|
"margin_dpo/beta_margin_mean": 2.741957902908325,
|
|
"margin_dpo/beta_margin_std": 2.554403066635132,
|
|
"margin_dpo/loss_margin_mean": 27.419578552246094,
|
|
"margin_dpo/margin_mean": 27.419578552246094,
|
|
"margin_dpo/margin_std": 24.602121353149414,
|
|
"step": 633
|
|
},
|
|
{
|
|
"epoch": 0.9309838472834068,
|
|
"grad_norm": 73.96012878417969,
|
|
"learning_rate": 7.550765991247654e-09,
|
|
"logits/chosen": -0.5560423135757446,
|
|
"logits/rejected": -0.538284420967102,
|
|
"logps/chosen": -93.19425964355469,
|
|
"logps/ref_chosen": -66.61896514892578,
|
|
"logps/ref_rejected": -107.12565612792969,
|
|
"logps/rejected": -161.61175537109375,
|
|
"loss": 0.6531,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.20842374861240387,
|
|
"margin_dpo/beta_margin_grad_std": 0.24925780296325684,
|
|
"margin_dpo/beta_margin_mean": 2.791081190109253,
|
|
"margin_dpo/beta_margin_std": 2.91212797164917,
|
|
"margin_dpo/loss_margin_mean": 27.910810470581055,
|
|
"margin_dpo/margin_mean": 27.910812377929688,
|
|
"margin_dpo/margin_std": 29.05972671508789,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 0.9324522760646109,
|
|
"grad_norm": 50.861839294433594,
|
|
"learning_rate": 7.240939871891699e-09,
|
|
"logits/chosen": -0.608803391456604,
|
|
"logits/rejected": -0.5592911243438721,
|
|
"logps/chosen": -96.6619873046875,
|
|
"logps/ref_chosen": -73.95551300048828,
|
|
"logps/ref_rejected": -82.50045776367188,
|
|
"logps/rejected": -133.83990478515625,
|
|
"loss": 0.409,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15432217717170715,
|
|
"margin_dpo/beta_margin_grad_std": 0.18803834915161133,
|
|
"margin_dpo/beta_margin_mean": 2.8632962703704834,
|
|
"margin_dpo/beta_margin_std": 2.2558207511901855,
|
|
"margin_dpo/loss_margin_mean": 28.63296127319336,
|
|
"margin_dpo/margin_mean": 28.63296127319336,
|
|
"margin_dpo/margin_std": 22.48883628845215,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 0.933920704845815,
|
|
"grad_norm": 47.65840530395508,
|
|
"learning_rate": 6.937510679537628e-09,
|
|
"logits/chosen": -0.5629330277442932,
|
|
"logits/rejected": -0.5346908569335938,
|
|
"logps/chosen": -82.30425262451172,
|
|
"logps/ref_chosen": -59.628910064697266,
|
|
"logps/ref_rejected": -81.97883605957031,
|
|
"logps/rejected": -137.65878295898438,
|
|
"loss": 0.4012,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13953568041324615,
|
|
"margin_dpo/beta_margin_grad_std": 0.21565653383731842,
|
|
"margin_dpo/beta_margin_mean": 3.3004610538482666,
|
|
"margin_dpo/beta_margin_std": 2.40425968170166,
|
|
"margin_dpo/loss_margin_mean": 33.00461196899414,
|
|
"margin_dpo/margin_mean": 33.004608154296875,
|
|
"margin_dpo/margin_std": 23.834693908691406,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 0.9353891336270191,
|
|
"grad_norm": 53.40937042236328,
|
|
"learning_rate": 6.640486409826785e-09,
|
|
"logits/chosen": -0.5897486209869385,
|
|
"logits/rejected": -0.5671026110649109,
|
|
"logps/chosen": -73.21141815185547,
|
|
"logps/ref_chosen": -49.652687072753906,
|
|
"logps/ref_rejected": -98.40513610839844,
|
|
"logps/rejected": -154.89999389648438,
|
|
"loss": 0.3634,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13438232243061066,
|
|
"margin_dpo/beta_margin_grad_std": 0.18966805934906006,
|
|
"margin_dpo/beta_margin_mean": 3.2936134338378906,
|
|
"margin_dpo/beta_margin_std": 2.5701606273651123,
|
|
"margin_dpo/loss_margin_mean": 32.936134338378906,
|
|
"margin_dpo/margin_mean": 32.936134338378906,
|
|
"margin_dpo/margin_std": 25.349170684814453,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 0.9368575624082232,
|
|
"grad_norm": 41.96897888183594,
|
|
"learning_rate": 6.349874889624962e-09,
|
|
"logits/chosen": -0.5449614524841309,
|
|
"logits/rejected": -0.49521952867507935,
|
|
"logps/chosen": -78.70539855957031,
|
|
"logps/ref_chosen": -58.156646728515625,
|
|
"logps/ref_rejected": -79.3014907836914,
|
|
"logps/rejected": -136.9318084716797,
|
|
"loss": 0.3245,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12071166932582855,
|
|
"margin_dpo/beta_margin_grad_std": 0.17367184162139893,
|
|
"margin_dpo/beta_margin_mean": 3.7081568241119385,
|
|
"margin_dpo/beta_margin_std": 2.8503551483154297,
|
|
"margin_dpo/loss_margin_mean": 37.08156967163086,
|
|
"margin_dpo/margin_mean": 37.08156967163086,
|
|
"margin_dpo/margin_std": 27.137168884277344,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 0.9383259911894273,
|
|
"grad_norm": 57.53899383544922,
|
|
"learning_rate": 6.065683776815933e-09,
|
|
"logits/chosen": -0.58185875415802,
|
|
"logits/rejected": -0.5182079672813416,
|
|
"logps/chosen": -97.73635864257812,
|
|
"logps/ref_chosen": -72.32319641113281,
|
|
"logps/ref_rejected": -74.2749252319336,
|
|
"logps/rejected": -130.7800750732422,
|
|
"loss": 0.4397,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14476662874221802,
|
|
"margin_dpo/beta_margin_grad_std": 0.199687659740448,
|
|
"margin_dpo/beta_margin_mean": 3.109198570251465,
|
|
"margin_dpo/beta_margin_std": 2.4722304344177246,
|
|
"margin_dpo/loss_margin_mean": 31.09198570251465,
|
|
"margin_dpo/margin_mean": 31.09198760986328,
|
|
"margin_dpo/margin_std": 24.611787796020508,
|
|
"step": 639
|
|
},
|
|
{
|
|
"epoch": 0.9397944199706314,
|
|
"grad_norm": 44.61709213256836,
|
|
"learning_rate": 5.7879205600998296e-09,
|
|
"logits/chosen": -0.5778528451919556,
|
|
"logits/rejected": -0.5412660241127014,
|
|
"logps/chosen": -78.43016815185547,
|
|
"logps/ref_chosen": -56.13436508178711,
|
|
"logps/ref_rejected": -108.60014343261719,
|
|
"logps/rejected": -167.73947143554688,
|
|
"loss": 0.3066,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12420199811458588,
|
|
"margin_dpo/beta_margin_grad_std": 0.15969912707805634,
|
|
"margin_dpo/beta_margin_mean": 3.6843512058258057,
|
|
"margin_dpo/beta_margin_std": 3.017540454864502,
|
|
"margin_dpo/loss_margin_mean": 36.84351348876953,
|
|
"margin_dpo/margin_mean": 36.84351348876953,
|
|
"margin_dpo/margin_std": 29.767667770385742,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.9412628487518355,
|
|
"grad_norm": 51.515228271484375,
|
|
"learning_rate": 5.516592558795746e-09,
|
|
"logits/chosen": -0.6603978872299194,
|
|
"logits/rejected": -0.6059365272521973,
|
|
"logps/chosen": -88.82362365722656,
|
|
"logps/ref_chosen": -64.99689483642578,
|
|
"logps/ref_rejected": -86.99232482910156,
|
|
"logps/rejected": -142.99404907226562,
|
|
"loss": 0.3746,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1466035395860672,
|
|
"margin_dpo/beta_margin_grad_std": 0.17220094799995422,
|
|
"margin_dpo/beta_margin_mean": 3.217498779296875,
|
|
"margin_dpo/beta_margin_std": 3.0656890869140625,
|
|
"margin_dpo/loss_margin_mean": 32.17498779296875,
|
|
"margin_dpo/margin_mean": 32.17498779296875,
|
|
"margin_dpo/margin_std": 29.780851364135742,
|
|
"step": 641
|
|
},
|
|
{
|
|
"epoch": 0.9427312775330396,
|
|
"grad_norm": 78.2542724609375,
|
|
"learning_rate": 5.251706922648868e-09,
|
|
"logits/chosen": -0.5912165641784668,
|
|
"logits/rejected": -0.5562861561775208,
|
|
"logps/chosen": -90.29745483398438,
|
|
"logps/ref_chosen": -65.68924713134766,
|
|
"logps/ref_rejected": -110.24205017089844,
|
|
"logps/rejected": -170.28448486328125,
|
|
"loss": 0.4822,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15366876125335693,
|
|
"margin_dpo/beta_margin_grad_std": 0.22277072072029114,
|
|
"margin_dpo/beta_margin_mean": 3.5434229373931885,
|
|
"margin_dpo/beta_margin_std": 3.0842573642730713,
|
|
"margin_dpo/loss_margin_mean": 35.43423080444336,
|
|
"margin_dpo/margin_mean": 35.434226989746094,
|
|
"margin_dpo/margin_std": 30.440698623657227,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 0.9441997063142438,
|
|
"grad_norm": 51.46054458618164,
|
|
"learning_rate": 4.993270631642038e-09,
|
|
"logits/chosen": -0.6483656764030457,
|
|
"logits/rejected": -0.62122642993927,
|
|
"logps/chosen": -71.25507354736328,
|
|
"logps/ref_chosen": -51.94999694824219,
|
|
"logps/ref_rejected": -87.46833801269531,
|
|
"logps/rejected": -137.56893920898438,
|
|
"loss": 0.4257,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14452366530895233,
|
|
"margin_dpo/beta_margin_grad_std": 0.19863076508045197,
|
|
"margin_dpo/beta_margin_mean": 3.0795533657073975,
|
|
"margin_dpo/beta_margin_std": 2.446993350982666,
|
|
"margin_dpo/loss_margin_mean": 30.795534133911133,
|
|
"margin_dpo/margin_mean": 30.795534133911133,
|
|
"margin_dpo/margin_std": 24.044445037841797,
|
|
"step": 643
|
|
},
|
|
{
|
|
"epoch": 0.9456681350954479,
|
|
"grad_norm": 75.44609069824219,
|
|
"learning_rate": 4.741290495811873e-09,
|
|
"logits/chosen": -0.6009418964385986,
|
|
"logits/rejected": -0.57252037525177,
|
|
"logps/chosen": -79.76002502441406,
|
|
"logps/ref_chosen": -59.017662048339844,
|
|
"logps/ref_rejected": -87.13668823242188,
|
|
"logps/rejected": -138.11033630371094,
|
|
"loss": 0.5657,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1847480684518814,
|
|
"margin_dpo/beta_margin_grad_std": 0.23598462343215942,
|
|
"margin_dpo/beta_margin_mean": 3.0231282711029053,
|
|
"margin_dpo/beta_margin_std": 2.8853414058685303,
|
|
"margin_dpo/loss_margin_mean": 30.231281280517578,
|
|
"margin_dpo/margin_mean": 30.231281280517578,
|
|
"margin_dpo/margin_std": 28.730857849121094,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 0.947136563876652,
|
|
"grad_norm": 70.22451782226562,
|
|
"learning_rate": 4.495773155069299e-09,
|
|
"logits/chosen": -0.5856224298477173,
|
|
"logits/rejected": -0.5652365684509277,
|
|
"logps/chosen": -79.71002197265625,
|
|
"logps/ref_chosen": -55.87602233886719,
|
|
"logps/ref_rejected": -97.78080749511719,
|
|
"logps/rejected": -150.6129913330078,
|
|
"loss": 0.544,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.19084730744361877,
|
|
"margin_dpo/beta_margin_grad_std": 0.22894078493118286,
|
|
"margin_dpo/beta_margin_mean": 2.899817705154419,
|
|
"margin_dpo/beta_margin_std": 2.835707187652588,
|
|
"margin_dpo/loss_margin_mean": 28.99817657470703,
|
|
"margin_dpo/margin_mean": 28.99817657470703,
|
|
"margin_dpo/margin_std": 27.904760360717773,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 0.9486049926578561,
|
|
"grad_norm": 51.758888244628906,
|
|
"learning_rate": 4.256725079024553e-09,
|
|
"logits/chosen": -0.6095120906829834,
|
|
"logits/rejected": -0.5594819784164429,
|
|
"logps/chosen": -83.82559967041016,
|
|
"logps/ref_chosen": -61.275787353515625,
|
|
"logps/ref_rejected": -77.50580596923828,
|
|
"logps/rejected": -133.1284637451172,
|
|
"loss": 0.316,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.11909312754869461,
|
|
"margin_dpo/beta_margin_grad_std": 0.17143264412879944,
|
|
"margin_dpo/beta_margin_mean": 3.307283878326416,
|
|
"margin_dpo/beta_margin_std": 2.259632110595703,
|
|
"margin_dpo/loss_margin_mean": 33.072837829589844,
|
|
"margin_dpo/margin_mean": 33.072837829589844,
|
|
"margin_dpo/margin_std": 22.390499114990234,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 0.9500734214390602,
|
|
"grad_norm": 81.30612182617188,
|
|
"learning_rate": 4.024152566816791e-09,
|
|
"logits/chosen": -0.5496389865875244,
|
|
"logits/rejected": -0.5257160067558289,
|
|
"logps/chosen": -78.84927368164062,
|
|
"logps/ref_chosen": -54.852413177490234,
|
|
"logps/ref_rejected": -93.5194091796875,
|
|
"logps/rejected": -150.27786254882812,
|
|
"loss": 0.5032,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16137224435806274,
|
|
"margin_dpo/beta_margin_grad_std": 0.24134768545627594,
|
|
"margin_dpo/beta_margin_mean": 3.2761595249176025,
|
|
"margin_dpo/beta_margin_std": 2.728703260421753,
|
|
"margin_dpo/loss_margin_mean": 32.761592864990234,
|
|
"margin_dpo/margin_mean": 32.761592864990234,
|
|
"margin_dpo/margin_std": 26.8262939453125,
|
|
"step": 647
|
|
},
|
|
{
|
|
"epoch": 0.9515418502202643,
|
|
"grad_norm": 47.32956314086914,
|
|
"learning_rate": 3.798061746947995e-09,
|
|
"logits/chosen": -0.6139056086540222,
|
|
"logits/rejected": -0.6051241159439087,
|
|
"logps/chosen": -73.89356231689453,
|
|
"logps/ref_chosen": -54.17146682739258,
|
|
"logps/ref_rejected": -98.71279907226562,
|
|
"logps/rejected": -158.77578735351562,
|
|
"loss": 0.3728,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.138786181807518,
|
|
"margin_dpo/beta_margin_grad_std": 0.19550208747386932,
|
|
"margin_dpo/beta_margin_mean": 4.034091472625732,
|
|
"margin_dpo/beta_margin_std": 3.443060874938965,
|
|
"margin_dpo/loss_margin_mean": 40.340911865234375,
|
|
"margin_dpo/margin_mean": 40.340911865234375,
|
|
"margin_dpo/margin_std": 34.24688720703125,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 0.9530102790014684,
|
|
"grad_norm": 50.813629150390625,
|
|
"learning_rate": 3.5784585771215235e-09,
|
|
"logits/chosen": -0.6515902876853943,
|
|
"logits/rejected": -0.6201357841491699,
|
|
"logps/chosen": -83.07283020019531,
|
|
"logps/ref_chosen": -62.4803466796875,
|
|
"logps/ref_rejected": -80.07717895507812,
|
|
"logps/rejected": -129.16033935546875,
|
|
"loss": 0.536,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1962561458349228,
|
|
"margin_dpo/beta_margin_grad_std": 0.21189990639686584,
|
|
"margin_dpo/beta_margin_mean": 2.849066734313965,
|
|
"margin_dpo/beta_margin_std": 2.8495798110961914,
|
|
"margin_dpo/loss_margin_mean": 28.49066734313965,
|
|
"margin_dpo/margin_mean": 28.49066925048828,
|
|
"margin_dpo/margin_std": 28.419557571411133,
|
|
"step": 649
|
|
},
|
|
{
|
|
"epoch": 0.9544787077826725,
|
|
"grad_norm": 59.442115783691406,
|
|
"learning_rate": 3.3653488440851253e-09,
|
|
"logits/chosen": -0.5737979412078857,
|
|
"logits/rejected": -0.5637534260749817,
|
|
"logps/chosen": -80.34698486328125,
|
|
"logps/ref_chosen": -56.09281921386719,
|
|
"logps/ref_rejected": -98.26483917236328,
|
|
"logps/rejected": -159.14297485351562,
|
|
"loss": 0.3545,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13270466029644012,
|
|
"margin_dpo/beta_margin_grad_std": 0.1840543895959854,
|
|
"margin_dpo/beta_margin_mean": 3.662398338317871,
|
|
"margin_dpo/beta_margin_std": 2.9485061168670654,
|
|
"margin_dpo/loss_margin_mean": 36.62398147583008,
|
|
"margin_dpo/margin_mean": 36.623985290527344,
|
|
"margin_dpo/margin_std": 28.712535858154297,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.9559471365638766,
|
|
"grad_norm": 38.10145950317383,
|
|
"learning_rate": 3.158738163478475e-09,
|
|
"logits/chosen": -0.653481125831604,
|
|
"logits/rejected": -0.6552349328994751,
|
|
"logps/chosen": -62.947837829589844,
|
|
"logps/ref_chosen": -43.42544937133789,
|
|
"logps/ref_rejected": -99.9579086303711,
|
|
"logps/rejected": -155.12380981445312,
|
|
"loss": 0.3146,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12548606097698212,
|
|
"margin_dpo/beta_margin_grad_std": 0.1639980524778366,
|
|
"margin_dpo/beta_margin_mean": 3.564352035522461,
|
|
"margin_dpo/beta_margin_std": 2.6986141204833984,
|
|
"margin_dpo/loss_margin_mean": 35.64352035522461,
|
|
"margin_dpo/margin_mean": 35.643516540527344,
|
|
"margin_dpo/margin_std": 26.896413803100586,
|
|
"step": 651
|
|
},
|
|
{
|
|
"epoch": 0.9574155653450808,
|
|
"grad_norm": 39.05808639526367,
|
|
"learning_rate": 2.9586319796851555e-09,
|
|
"logits/chosen": -0.6412711143493652,
|
|
"logits/rejected": -0.617784857749939,
|
|
"logps/chosen": -78.93205261230469,
|
|
"logps/ref_chosen": -62.57680892944336,
|
|
"logps/ref_rejected": -111.76779174804688,
|
|
"logps/rejected": -163.570556640625,
|
|
"loss": 0.3386,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13532721996307373,
|
|
"margin_dpo/beta_margin_grad_std": 0.16726936399936676,
|
|
"margin_dpo/beta_margin_mean": 3.544752597808838,
|
|
"margin_dpo/beta_margin_std": 2.8201441764831543,
|
|
"margin_dpo/loss_margin_mean": 35.44752502441406,
|
|
"margin_dpo/margin_mean": 35.44752502441406,
|
|
"margin_dpo/margin_std": 28.104263305664062,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 0.9588839941262849,
|
|
"grad_norm": 51.56984329223633,
|
|
"learning_rate": 2.7650355656892166e-09,
|
|
"logits/chosen": -0.6192601919174194,
|
|
"logits/rejected": -0.5976792573928833,
|
|
"logps/chosen": -84.49002075195312,
|
|
"logps/ref_chosen": -61.11295700073242,
|
|
"logps/ref_rejected": -103.24960327148438,
|
|
"logps/rejected": -162.29043579101562,
|
|
"loss": 0.3249,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12032375484704971,
|
|
"margin_dpo/beta_margin_grad_std": 0.18257243931293488,
|
|
"margin_dpo/beta_margin_mean": 3.566375970840454,
|
|
"margin_dpo/beta_margin_std": 2.590984344482422,
|
|
"margin_dpo/loss_margin_mean": 35.66375732421875,
|
|
"margin_dpo/margin_mean": 35.66375732421875,
|
|
"margin_dpo/margin_std": 25.806888580322266,
|
|
"step": 653
|
|
},
|
|
{
|
|
"epoch": 0.960352422907489,
|
|
"grad_norm": 72.21066284179688,
|
|
"learning_rate": 2.577954022936174e-09,
|
|
"logits/chosen": -0.6111325025558472,
|
|
"logits/rejected": -0.6062880754470825,
|
|
"logps/chosen": -86.98482513427734,
|
|
"logps/ref_chosen": -61.7281379699707,
|
|
"logps/ref_rejected": -98.7738037109375,
|
|
"logps/rejected": -153.51400756835938,
|
|
"loss": 0.5285,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.17636682093143463,
|
|
"margin_dpo/beta_margin_grad_std": 0.2301536500453949,
|
|
"margin_dpo/beta_margin_mean": 2.948350667953491,
|
|
"margin_dpo/beta_margin_std": 2.8891336917877197,
|
|
"margin_dpo/loss_margin_mean": 29.48350715637207,
|
|
"margin_dpo/margin_mean": 29.483509063720703,
|
|
"margin_dpo/margin_std": 28.753616333007812,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 0.9618208516886931,
|
|
"grad_norm": 72.21392059326172,
|
|
"learning_rate": 2.397392281198729e-09,
|
|
"logits/chosen": -0.6073825359344482,
|
|
"logits/rejected": -0.6081333160400391,
|
|
"logps/chosen": -70.99528503417969,
|
|
"logps/ref_chosen": -49.576812744140625,
|
|
"logps/ref_rejected": -98.29183197021484,
|
|
"logps/rejected": -150.2451629638672,
|
|
"loss": 0.5089,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.18340007960796356,
|
|
"margin_dpo/beta_margin_grad_std": 0.217272087931633,
|
|
"margin_dpo/beta_margin_mean": 3.0534873008728027,
|
|
"margin_dpo/beta_margin_std": 2.986149311065674,
|
|
"margin_dpo/loss_margin_mean": 30.53487205505371,
|
|
"margin_dpo/margin_mean": 30.534870147705078,
|
|
"margin_dpo/margin_std": 29.086572647094727,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 0.9632892804698973,
|
|
"grad_norm": 40.71949768066406,
|
|
"learning_rate": 2.223355098446622e-09,
|
|
"logits/chosen": -0.5212767124176025,
|
|
"logits/rejected": -0.5257933139801025,
|
|
"logps/chosen": -73.37840270996094,
|
|
"logps/ref_chosen": -52.54943084716797,
|
|
"logps/ref_rejected": -113.67464447021484,
|
|
"logps/rejected": -176.44357299804688,
|
|
"loss": 0.2412,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.0933179035782814,
|
|
"margin_dpo/beta_margin_grad_std": 0.15993143618106842,
|
|
"margin_dpo/beta_margin_mean": 4.193995952606201,
|
|
"margin_dpo/beta_margin_std": 2.617056369781494,
|
|
"margin_dpo/loss_margin_mean": 41.93996047973633,
|
|
"margin_dpo/margin_mean": 41.93996047973633,
|
|
"margin_dpo/margin_std": 25.561412811279297,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 0.9647577092511013,
|
|
"grad_norm": 45.717838287353516,
|
|
"learning_rate": 2.055847060721566e-09,
|
|
"logits/chosen": -0.6373677849769592,
|
|
"logits/rejected": -0.6168010234832764,
|
|
"logps/chosen": -68.62776184082031,
|
|
"logps/ref_chosen": -46.700538635253906,
|
|
"logps/ref_rejected": -97.91487121582031,
|
|
"logps/rejected": -157.26351928710938,
|
|
"loss": 0.3432,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.11700256913900375,
|
|
"margin_dpo/beta_margin_grad_std": 0.1847466230392456,
|
|
"margin_dpo/beta_margin_mean": 3.7421414852142334,
|
|
"margin_dpo/beta_margin_std": 2.8833959102630615,
|
|
"margin_dpo/loss_margin_mean": 37.42141342163086,
|
|
"margin_dpo/margin_mean": 37.42141342163086,
|
|
"margin_dpo/margin_std": 28.687862396240234,
|
|
"step": 657
|
|
},
|
|
{
|
|
"epoch": 0.9662261380323054,
|
|
"grad_norm": 59.321533203125,
|
|
"learning_rate": 1.8948725820160662e-09,
|
|
"logits/chosen": -0.6310451030731201,
|
|
"logits/rejected": -0.5929208993911743,
|
|
"logps/chosen": -86.52423095703125,
|
|
"logps/ref_chosen": -60.958213806152344,
|
|
"logps/ref_rejected": -95.93949127197266,
|
|
"logps/rejected": -156.62518310546875,
|
|
"loss": 0.4487,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14949670433998108,
|
|
"margin_dpo/beta_margin_grad_std": 0.2181350290775299,
|
|
"margin_dpo/beta_margin_mean": 3.5119664669036865,
|
|
"margin_dpo/beta_margin_std": 3.0297350883483887,
|
|
"margin_dpo/loss_margin_mean": 35.11966323852539,
|
|
"margin_dpo/margin_mean": 35.119667053222656,
|
|
"margin_dpo/margin_std": 29.735076904296875,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 0.9676945668135095,
|
|
"grad_norm": 57.14666748046875,
|
|
"learning_rate": 1.7404359041573723e-09,
|
|
"logits/chosen": -0.6056843996047974,
|
|
"logits/rejected": -0.540166974067688,
|
|
"logps/chosen": -96.09359741210938,
|
|
"logps/ref_chosen": -76.74298095703125,
|
|
"logps/ref_rejected": -87.4709701538086,
|
|
"logps/rejected": -141.1275634765625,
|
|
"loss": 0.5047,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16867277026176453,
|
|
"margin_dpo/beta_margin_grad_std": 0.23597054183483124,
|
|
"margin_dpo/beta_margin_mean": 3.4305975437164307,
|
|
"margin_dpo/beta_margin_std": 2.9324827194213867,
|
|
"margin_dpo/loss_margin_mean": 34.305973052978516,
|
|
"margin_dpo/margin_mean": 34.30597686767578,
|
|
"margin_dpo/margin_std": 29.283281326293945,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 0.9691629955947136,
|
|
"grad_norm": 49.01050567626953,
|
|
"learning_rate": 1.592541096695571e-09,
|
|
"logits/chosen": -0.6273288130760193,
|
|
"logits/rejected": -0.5808557271957397,
|
|
"logps/chosen": -80.31892395019531,
|
|
"logps/ref_chosen": -59.047882080078125,
|
|
"logps/ref_rejected": -75.96005249023438,
|
|
"logps/rejected": -135.07073974609375,
|
|
"loss": 0.2917,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1133999153971672,
|
|
"margin_dpo/beta_margin_grad_std": 0.1706753671169281,
|
|
"margin_dpo/beta_margin_mean": 3.7839651107788086,
|
|
"margin_dpo/beta_margin_std": 2.7794392108917236,
|
|
"margin_dpo/loss_margin_mean": 37.83964920043945,
|
|
"margin_dpo/margin_mean": 37.83965301513672,
|
|
"margin_dpo/margin_std": 27.737031936645508,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.9706314243759178,
|
|
"grad_norm": 64.96249389648438,
|
|
"learning_rate": 1.4511920567963908e-09,
|
|
"logits/chosen": -0.6019885540008545,
|
|
"logits/rejected": -0.5567299127578735,
|
|
"logps/chosen": -71.31771850585938,
|
|
"logps/ref_chosen": -50.673973083496094,
|
|
"logps/ref_rejected": -86.00569152832031,
|
|
"logps/rejected": -141.6163787841797,
|
|
"loss": 0.4523,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14642944931983948,
|
|
"margin_dpo/beta_margin_grad_std": 0.2149476855993271,
|
|
"margin_dpo/beta_margin_mean": 3.4966940879821777,
|
|
"margin_dpo/beta_margin_std": 3.0865559577941895,
|
|
"margin_dpo/loss_margin_mean": 34.966941833496094,
|
|
"margin_dpo/margin_mean": 34.966941833496094,
|
|
"margin_dpo/margin_std": 29.39708709716797,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 0.9720998531571219,
|
|
"grad_norm": 50.99778747558594,
|
|
"learning_rate": 1.3163925091384532e-09,
|
|
"logits/chosen": -0.6079974174499512,
|
|
"logits/rejected": -0.5556979775428772,
|
|
"logps/chosen": -93.49765014648438,
|
|
"logps/ref_chosen": -69.26106262207031,
|
|
"logps/ref_rejected": -89.05593872070312,
|
|
"logps/rejected": -144.09814453125,
|
|
"loss": 0.378,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.14259321987628937,
|
|
"margin_dpo/beta_margin_grad_std": 0.17750491201877594,
|
|
"margin_dpo/beta_margin_mean": 3.080561399459839,
|
|
"margin_dpo/beta_margin_std": 2.571131944656372,
|
|
"margin_dpo/loss_margin_mean": 30.805612564086914,
|
|
"margin_dpo/margin_mean": 30.80561065673828,
|
|
"margin_dpo/margin_std": 25.51202964782715,
|
|
"step": 662
|
|
},
|
|
{
|
|
"epoch": 0.973568281938326,
|
|
"grad_norm": 39.297733306884766,
|
|
"learning_rate": 1.1881460058152382e-09,
|
|
"logits/chosen": -0.6374907493591309,
|
|
"logits/rejected": -0.6157968044281006,
|
|
"logps/chosen": -83.19400024414062,
|
|
"logps/ref_chosen": -64.87891387939453,
|
|
"logps/ref_rejected": -113.92536926269531,
|
|
"logps/rejected": -165.287353515625,
|
|
"loss": 0.3262,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12396994978189468,
|
|
"margin_dpo/beta_margin_grad_std": 0.15943719446659088,
|
|
"margin_dpo/beta_margin_mean": 3.304689407348633,
|
|
"margin_dpo/beta_margin_std": 2.461198568344116,
|
|
"margin_dpo/loss_margin_mean": 33.04689407348633,
|
|
"margin_dpo/margin_mean": 33.046897888183594,
|
|
"margin_dpo/margin_std": 24.47772216796875,
|
|
"step": 663
|
|
},
|
|
{
|
|
"epoch": 0.9750367107195301,
|
|
"grad_norm": 69.85308074951172,
|
|
"learning_rate": 1.066455926241383e-09,
|
|
"logits/chosen": -0.5776158571243286,
|
|
"logits/rejected": -0.5483744144439697,
|
|
"logps/chosen": -84.34225463867188,
|
|
"logps/ref_chosen": -60.88847351074219,
|
|
"logps/ref_rejected": -105.521728515625,
|
|
"logps/rejected": -166.12283325195312,
|
|
"loss": 0.4288,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.11845803260803223,
|
|
"margin_dpo/beta_margin_grad_std": 0.1965658962726593,
|
|
"margin_dpo/beta_margin_mean": 3.7147321701049805,
|
|
"margin_dpo/beta_margin_std": 2.736865997314453,
|
|
"margin_dpo/loss_margin_mean": 37.14732360839844,
|
|
"margin_dpo/margin_mean": 37.14732360839844,
|
|
"margin_dpo/margin_std": 26.97930145263672,
|
|
"step": 664
|
|
},
|
|
{
|
|
"epoch": 0.9765051395007343,
|
|
"grad_norm": 44.827796936035156,
|
|
"learning_rate": 9.513254770636137e-10,
|
|
"logits/chosen": -0.6395413279533386,
|
|
"logits/rejected": -0.5962468385696411,
|
|
"logps/chosen": -81.45133972167969,
|
|
"logps/ref_chosen": -60.56413269042969,
|
|
"logps/ref_rejected": -84.8088150024414,
|
|
"logps/rejected": -137.22821044921875,
|
|
"loss": 0.3524,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13737604022026062,
|
|
"margin_dpo/beta_margin_grad_std": 0.17597481608390808,
|
|
"margin_dpo/beta_margin_mean": 3.153219223022461,
|
|
"margin_dpo/beta_margin_std": 2.3784492015838623,
|
|
"margin_dpo/loss_margin_mean": 31.53219223022461,
|
|
"margin_dpo/margin_mean": 31.53219223022461,
|
|
"margin_dpo/margin_std": 23.21342658996582,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 0.9779735682819384,
|
|
"grad_norm": 61.68048858642578,
|
|
"learning_rate": 8.427576920763956e-10,
|
|
"logits/chosen": -0.6096721887588501,
|
|
"logits/rejected": -0.5720229148864746,
|
|
"logps/chosen": -88.06729125976562,
|
|
"logps/ref_chosen": -64.41996002197266,
|
|
"logps/ref_rejected": -95.89163208007812,
|
|
"logps/rejected": -154.82492065429688,
|
|
"loss": 0.4262,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13251623511314392,
|
|
"margin_dpo/beta_margin_grad_std": 0.21360599994659424,
|
|
"margin_dpo/beta_margin_mean": 3.5285956859588623,
|
|
"margin_dpo/beta_margin_std": 2.662677049636841,
|
|
"margin_dpo/loss_margin_mean": 35.28595733642578,
|
|
"margin_dpo/margin_mean": 35.28595733642578,
|
|
"margin_dpo/margin_std": 26.031997680664062,
|
|
"step": 666
|
|
},
|
|
{
|
|
"epoch": 0.9794419970631424,
|
|
"grad_norm": 58.16268539428711,
|
|
"learning_rate": 7.407554321417764e-10,
|
|
"logits/chosen": -0.5887176990509033,
|
|
"logits/rejected": -0.536880612373352,
|
|
"logps/chosen": -94.41732025146484,
|
|
"logps/ref_chosen": -69.27703094482422,
|
|
"logps/ref_rejected": -87.83549499511719,
|
|
"logps/rejected": -147.3884735107422,
|
|
"loss": 0.3242,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.12313113361597061,
|
|
"margin_dpo/beta_margin_grad_std": 0.16970713436603546,
|
|
"margin_dpo/beta_margin_mean": 3.441269636154175,
|
|
"margin_dpo/beta_margin_std": 2.472762107849121,
|
|
"margin_dpo/loss_margin_mean": 34.412696838378906,
|
|
"margin_dpo/margin_mean": 34.412696838378906,
|
|
"margin_dpo/margin_std": 24.47201919555664,
|
|
"step": 667
|
|
},
|
|
{
|
|
"epoch": 0.9809104258443465,
|
|
"grad_norm": 69.86036682128906,
|
|
"learning_rate": 6.453213851142225e-10,
|
|
"logits/chosen": -0.6267153024673462,
|
|
"logits/rejected": -0.5883671641349792,
|
|
"logps/chosen": -96.0662841796875,
|
|
"logps/ref_chosen": -72.60400390625,
|
|
"logps/ref_rejected": -103.73905181884766,
|
|
"logps/rejected": -160.34828186035156,
|
|
"loss": 0.4507,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1533161848783493,
|
|
"margin_dpo/beta_margin_grad_std": 0.22066539525985718,
|
|
"margin_dpo/beta_margin_mean": 3.314695358276367,
|
|
"margin_dpo/beta_margin_std": 2.6596431732177734,
|
|
"margin_dpo/loss_margin_mean": 33.146949768066406,
|
|
"margin_dpo/margin_mean": 33.146949768066406,
|
|
"margin_dpo/margin_std": 25.9494571685791,
|
|
"step": 668
|
|
},
|
|
{
|
|
"epoch": 0.9823788546255506,
|
|
"grad_norm": 68.40164947509766,
|
|
"learning_rate": 5.564580657695939e-10,
|
|
"logits/chosen": -0.6119288802146912,
|
|
"logits/rejected": -0.5665886998176575,
|
|
"logps/chosen": -65.71624755859375,
|
|
"logps/ref_chosen": -46.116416931152344,
|
|
"logps/ref_rejected": -77.92434692382812,
|
|
"logps/rejected": -135.82337951660156,
|
|
"loss": 0.5021,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.15446214377880096,
|
|
"margin_dpo/beta_margin_grad_std": 0.23781202733516693,
|
|
"margin_dpo/beta_margin_mean": 3.8299198150634766,
|
|
"margin_dpo/beta_margin_std": 3.285043716430664,
|
|
"margin_dpo/loss_margin_mean": 38.299198150634766,
|
|
"margin_dpo/margin_mean": 38.299198150634766,
|
|
"margin_dpo/margin_std": 32.602203369140625,
|
|
"step": 669
|
|
},
|
|
{
|
|
"epoch": 0.9838472834067548,
|
|
"grad_norm": 44.809444427490234,
|
|
"learning_rate": 4.741678157389739e-10,
|
|
"logits/chosen": -0.5694983005523682,
|
|
"logits/rejected": -0.5347045660018921,
|
|
"logps/chosen": -83.17808532714844,
|
|
"logps/ref_chosen": -62.34575653076172,
|
|
"logps/ref_rejected": -96.9405517578125,
|
|
"logps/rejected": -156.79319763183594,
|
|
"loss": 0.2702,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.10760509222745895,
|
|
"margin_dpo/beta_margin_grad_std": 0.15676988661289215,
|
|
"margin_dpo/beta_margin_mean": 3.9020321369171143,
|
|
"margin_dpo/beta_margin_std": 2.6109135150909424,
|
|
"margin_dpo/loss_margin_mean": 39.020320892333984,
|
|
"margin_dpo/margin_mean": 39.02031707763672,
|
|
"margin_dpo/margin_std": 25.866138458251953,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.9853157121879589,
|
|
"grad_norm": 48.393497467041016,
|
|
"learning_rate": 3.9845280344705245e-10,
|
|
"logits/chosen": -0.5919187068939209,
|
|
"logits/rejected": -0.5590361952781677,
|
|
"logps/chosen": -72.3186264038086,
|
|
"logps/ref_chosen": -48.00010681152344,
|
|
"logps/ref_rejected": -83.81932067871094,
|
|
"logps/rejected": -143.67893981933594,
|
|
"loss": 0.3555,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13741746544837952,
|
|
"margin_dpo/beta_margin_grad_std": 0.1758362054824829,
|
|
"margin_dpo/beta_margin_mean": 3.554109811782837,
|
|
"margin_dpo/beta_margin_std": 2.9027013778686523,
|
|
"margin_dpo/loss_margin_mean": 35.54109573364258,
|
|
"margin_dpo/margin_mean": 35.541099548339844,
|
|
"margin_dpo/margin_std": 28.457447052001953,
|
|
"step": 671
|
|
},
|
|
{
|
|
"epoch": 0.986784140969163,
|
|
"grad_norm": 66.19140625,
|
|
"learning_rate": 3.293150240547549e-10,
|
|
"logits/chosen": -0.6310614347457886,
|
|
"logits/rejected": -0.5937498211860657,
|
|
"logps/chosen": -82.76466369628906,
|
|
"logps/ref_chosen": -58.583290100097656,
|
|
"logps/ref_rejected": -93.14014434814453,
|
|
"logps/rejected": -149.71588134765625,
|
|
"loss": 0.4842,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1728401631116867,
|
|
"margin_dpo/beta_margin_grad_std": 0.21923863887786865,
|
|
"margin_dpo/beta_margin_mean": 3.2394371032714844,
|
|
"margin_dpo/beta_margin_std": 3.0078792572021484,
|
|
"margin_dpo/loss_margin_mean": 32.394371032714844,
|
|
"margin_dpo/margin_mean": 32.394371032714844,
|
|
"margin_dpo/margin_std": 29.72500228881836,
|
|
"step": 672
|
|
},
|
|
{
|
|
"epoch": 0.9882525697503671,
|
|
"grad_norm": 43.1835823059082,
|
|
"learning_rate": 2.6675629940689504e-10,
|
|
"logits/chosen": -0.6048033237457275,
|
|
"logits/rejected": -0.5747998952865601,
|
|
"logps/chosen": -67.85647583007812,
|
|
"logps/ref_chosen": -46.72320556640625,
|
|
"logps/ref_rejected": -85.29623413085938,
|
|
"logps/rejected": -143.50682067871094,
|
|
"loss": 0.3112,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1224966049194336,
|
|
"margin_dpo/beta_margin_grad_std": 0.16890129446983337,
|
|
"margin_dpo/beta_margin_mean": 3.707730531692505,
|
|
"margin_dpo/beta_margin_std": 2.7531516551971436,
|
|
"margin_dpo/loss_margin_mean": 37.07730484008789,
|
|
"margin_dpo/margin_mean": 37.077301025390625,
|
|
"margin_dpo/margin_std": 27.354259490966797,
|
|
"step": 673
|
|
},
|
|
{
|
|
"epoch": 0.9897209985315712,
|
|
"grad_norm": 36.11240005493164,
|
|
"learning_rate": 2.1077827798404725e-10,
|
|
"logits/chosen": -0.5830689668655396,
|
|
"logits/rejected": -0.5558980703353882,
|
|
"logps/chosen": -67.47659301757812,
|
|
"logps/ref_chosen": -45.445526123046875,
|
|
"logps/ref_rejected": -70.04593658447266,
|
|
"logps/rejected": -129.95156860351562,
|
|
"loss": 0.2851,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.11657389253377914,
|
|
"margin_dpo/beta_margin_grad_std": 0.1537243127822876,
|
|
"margin_dpo/beta_margin_mean": 3.78745698928833,
|
|
"margin_dpo/beta_margin_std": 2.833228826522827,
|
|
"margin_dpo/loss_margin_mean": 37.874568939208984,
|
|
"margin_dpo/margin_mean": 37.874568939208984,
|
|
"margin_dpo/margin_std": 28.31113052368164,
|
|
"step": 674
|
|
},
|
|
{
|
|
"epoch": 0.9911894273127754,
|
|
"grad_norm": 66.7328109741211,
|
|
"learning_rate": 1.6138243485910863e-10,
|
|
"logits/chosen": -0.5768786668777466,
|
|
"logits/rejected": -0.5500950813293457,
|
|
"logps/chosen": -64.9262924194336,
|
|
"logps/ref_chosen": -44.17628479003906,
|
|
"logps/ref_rejected": -74.09197998046875,
|
|
"logps/rejected": -134.33714294433594,
|
|
"loss": 0.3902,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.11795066297054291,
|
|
"margin_dpo/beta_margin_grad_std": 0.21005932986736298,
|
|
"margin_dpo/beta_margin_mean": 3.949514865875244,
|
|
"margin_dpo/beta_margin_std": 2.772658109664917,
|
|
"margin_dpo/loss_margin_mean": 39.495147705078125,
|
|
"margin_dpo/margin_mean": 39.495147705078125,
|
|
"margin_dpo/margin_std": 27.606351852416992,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 0.9926578560939795,
|
|
"grad_norm": 79.0772933959961,
|
|
"learning_rate": 1.1857007165852472e-10,
|
|
"logits/chosen": -0.615682065486908,
|
|
"logits/rejected": -0.5794901847839355,
|
|
"logps/chosen": -96.71508026123047,
|
|
"logps/ref_chosen": -71.39852142333984,
|
|
"logps/ref_rejected": -88.3587646484375,
|
|
"logps/rejected": -149.972412109375,
|
|
"loss": 0.4162,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1375354677438736,
|
|
"margin_dpo/beta_margin_grad_std": 0.20353099703788757,
|
|
"margin_dpo/beta_margin_mean": 3.629708766937256,
|
|
"margin_dpo/beta_margin_std": 2.896389961242676,
|
|
"margin_dpo/loss_margin_mean": 36.29708480834961,
|
|
"margin_dpo/margin_mean": 36.29708480834961,
|
|
"margin_dpo/margin_std": 28.651344299316406,
|
|
"step": 676
|
|
},
|
|
{
|
|
"epoch": 0.9941262848751835,
|
|
"grad_norm": 65.4261245727539,
|
|
"learning_rate": 8.23423165278725e-11,
|
|
"logits/chosen": -0.5974393486976624,
|
|
"logits/rejected": -0.5463284254074097,
|
|
"logps/chosen": -79.63191986083984,
|
|
"logps/ref_chosen": -56.52743911743164,
|
|
"logps/ref_rejected": -78.22654724121094,
|
|
"logps/rejected": -138.780517578125,
|
|
"loss": 0.4482,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.13777747750282288,
|
|
"margin_dpo/beta_margin_grad_std": 0.22497375309467316,
|
|
"margin_dpo/beta_margin_mean": 3.744948625564575,
|
|
"margin_dpo/beta_margin_std": 2.872178792953491,
|
|
"margin_dpo/loss_margin_mean": 37.449485778808594,
|
|
"margin_dpo/margin_mean": 37.449485778808594,
|
|
"margin_dpo/margin_std": 28.472801208496094,
|
|
"step": 677
|
|
},
|
|
{
|
|
"epoch": 0.9955947136563876,
|
|
"grad_norm": 50.95887756347656,
|
|
"learning_rate": 5.270012410216185e-11,
|
|
"logits/chosen": -0.5905472040176392,
|
|
"logits/rejected": -0.5667222738265991,
|
|
"logps/chosen": -67.8372802734375,
|
|
"logps/ref_chosen": -46.13447570800781,
|
|
"logps/ref_rejected": -80.60462951660156,
|
|
"logps/rejected": -139.0126495361328,
|
|
"loss": 0.4485,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16534043848514557,
|
|
"margin_dpo/beta_margin_grad_std": 0.21265582740306854,
|
|
"margin_dpo/beta_margin_mean": 3.6705210208892822,
|
|
"margin_dpo/beta_margin_std": 3.1234190464019775,
|
|
"margin_dpo/loss_margin_mean": 36.7052116394043,
|
|
"margin_dpo/margin_mean": 36.70520782470703,
|
|
"margin_dpo/margin_std": 31.16322135925293,
|
|
"step": 678
|
|
},
|
|
{
|
|
"epoch": 0.9970631424375918,
|
|
"grad_norm": 47.8213005065918,
|
|
"learning_rate": 2.9644275480772416e-11,
|
|
"logits/chosen": -0.6013349294662476,
|
|
"logits/rejected": -0.5681812167167664,
|
|
"logps/chosen": -72.65241241455078,
|
|
"logps/ref_chosen": -50.294921875,
|
|
"logps/ref_rejected": -76.59813690185547,
|
|
"logps/rejected": -135.8075408935547,
|
|
"loss": 0.3274,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.11131599545478821,
|
|
"margin_dpo/beta_margin_grad_std": 0.1771748960018158,
|
|
"margin_dpo/beta_margin_mean": 3.6851911544799805,
|
|
"margin_dpo/beta_margin_std": 2.695528745651245,
|
|
"margin_dpo/loss_margin_mean": 36.85191345214844,
|
|
"margin_dpo/margin_mean": 36.85191345214844,
|
|
"margin_dpo/margin_std": 26.87795639038086,
|
|
"step": 679
|
|
},
|
|
{
|
|
"epoch": 0.9985315712187959,
|
|
"grad_norm": 57.492881774902344,
|
|
"learning_rate": 1.31753782067201e-11,
|
|
"logits/chosen": -0.6063967347145081,
|
|
"logits/rejected": -0.5727298259735107,
|
|
"logps/chosen": -99.56130981445312,
|
|
"logps/ref_chosen": -76.91569519042969,
|
|
"logps/ref_rejected": -112.384765625,
|
|
"logps/rejected": -171.20968627929688,
|
|
"loss": 0.381,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.1346154808998108,
|
|
"margin_dpo/beta_margin_grad_std": 0.1998624950647354,
|
|
"margin_dpo/beta_margin_mean": 3.6179311275482178,
|
|
"margin_dpo/beta_margin_std": 2.948613405227661,
|
|
"margin_dpo/loss_margin_mean": 36.1793098449707,
|
|
"margin_dpo/margin_mean": 36.17931365966797,
|
|
"margin_dpo/margin_std": 29.298704147338867,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 52.16978073120117,
|
|
"learning_rate": 3.2938662507808745e-12,
|
|
"logits/chosen": -0.6496413946151733,
|
|
"logits/rejected": -0.6223350167274475,
|
|
"logps/chosen": -84.20474243164062,
|
|
"logps/ref_chosen": -60.957279205322266,
|
|
"logps/ref_rejected": -88.5579833984375,
|
|
"logps/rejected": -143.598876953125,
|
|
"loss": 0.4583,
|
|
"margin_dpo/beta": 0.10000000149011612,
|
|
"margin_dpo/beta_margin_grad_mean": -0.16029776632785797,
|
|
"margin_dpo/beta_margin_grad_std": 0.20890314877033234,
|
|
"margin_dpo/beta_margin_mean": 3.1793434619903564,
|
|
"margin_dpo/beta_margin_std": 2.862551212310791,
|
|
"margin_dpo/loss_margin_mean": 31.793434143066406,
|
|
"margin_dpo/margin_mean": 31.793434143066406,
|
|
"margin_dpo/margin_std": 28.037933349609375,
|
|
"step": 681
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"step": 681,
|
|
"total_flos": 0.0,
|
|
"train_loss": 0.5730435011495403,
|
|
"train_runtime": 3273.0613,
|
|
"train_samples_per_second": 13.32,
|
|
"train_steps_per_second": 0.208
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 681,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 200,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|