Files
llama3-8b-base-new-method-q…/trainer_state.json
ModelHub XC f7c5e9c105 初始化项目,由ModelHub XC社区提供模型
Model: W-61/llama3-8b-base-new-method-q_t-0.4-s_star0.6-beta-next-batch
Source: Original Platform
2026-05-12 15:46:44 +08:00

9107 lines
332 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9989528795811519,
"eval_steps": 500,
"global_step": 477,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0020942408376963353,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.02472555637359619,
"fcm_dpo/q_t": 0.5000618100166321,
"grad_norm": 14.294270515441895,
"learning_rate": 0.0,
"logits/chosen": -0.6300115585327148,
"logits/rejected": -0.6432225704193115,
"logps/chosen": -275.28570556640625,
"logps/ref_chosen": -275.23126220703125,
"logps/ref_rejected": -222.93289184570312,
"logps/rejected": -222.96261596679688,
"loss": 2.7731,
"margin_dpo/margin_mean": -0.024725839495658875,
"margin_dpo/margin_std": 0.6093291640281677,
"step": 1
},
{
"epoch": 0.004188481675392671,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.012230843305587769,
"fcm_dpo/q_t": 0.4999694228172302,
"grad_norm": 13.939297676086426,
"learning_rate": 1.0416666666666666e-08,
"logits/chosen": -0.6800055503845215,
"logits/rejected": -0.6697146892547607,
"logps/chosen": -264.71649169921875,
"logps/ref_chosen": -264.7611083984375,
"logps/ref_rejected": -242.55868530273438,
"logps/rejected": -242.52633666992188,
"loss": 2.7724,
"margin_dpo/margin_mean": 0.012230798602104187,
"margin_dpo/margin_std": 0.6481232643127441,
"step": 2
},
{
"epoch": 0.0062827225130890054,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.09099014103412628,
"fcm_dpo/q_t": 0.4997725486755371,
"grad_norm": 12.899177551269531,
"learning_rate": 2.083333333333333e-08,
"logits/chosen": -0.6909482479095459,
"logits/rejected": -0.737128496170044,
"logps/chosen": -274.06390380859375,
"logps/ref_chosen": -274.1018981933594,
"logps/ref_rejected": -286.588623046875,
"logps/rejected": -286.6416015625,
"loss": 2.7708,
"margin_dpo/margin_mean": 0.09099045395851135,
"margin_dpo/margin_std": 0.6566922664642334,
"step": 3
},
{
"epoch": 0.008376963350785341,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.05564302206039429,
"fcm_dpo/q_t": 0.4998609125614166,
"grad_norm": 15.841980934143066,
"learning_rate": 3.125e-08,
"logits/chosen": -0.6398437023162842,
"logits/rejected": -0.6393350958824158,
"logps/chosen": -329.88238525390625,
"logps/ref_chosen": -329.8382568359375,
"logps/ref_rejected": -303.274658203125,
"logps/rejected": -303.37445068359375,
"loss": 2.7715,
"margin_dpo/margin_mean": 0.055642664432525635,
"margin_dpo/margin_std": 0.7437540292739868,
"step": 4
},
{
"epoch": 0.010471204188481676,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.02273799479007721,
"fcm_dpo/q_t": 0.5000568628311157,
"grad_norm": 14.770719528198242,
"learning_rate": 4.166666666666666e-08,
"logits/chosen": -0.6010129451751709,
"logits/rejected": -0.6134638786315918,
"logps/chosen": -301.7453918457031,
"logps/ref_chosen": -301.73895263671875,
"logps/ref_rejected": -274.7654724121094,
"logps/rejected": -274.7491760253906,
"loss": 2.7731,
"margin_dpo/margin_mean": -0.02273605763912201,
"margin_dpo/margin_std": 0.7734405398368835,
"step": 5
},
{
"epoch": 0.012565445026178011,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.05401374399662018,
"fcm_dpo/q_t": 0.49986496567726135,
"grad_norm": 14.158160209655762,
"learning_rate": 5.208333333333333e-08,
"logits/chosen": -0.6948609352111816,
"logits/rejected": -0.6625156402587891,
"logps/chosen": -285.6659240722656,
"logps/ref_chosen": -285.6946716308594,
"logps/ref_rejected": -245.82781982421875,
"logps/rejected": -245.8531036376953,
"loss": 2.7715,
"margin_dpo/margin_mean": 0.05401468276977539,
"margin_dpo/margin_std": 0.6605967879295349,
"step": 6
},
{
"epoch": 0.014659685863874346,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.06357774138450623,
"fcm_dpo/q_t": 0.500158965587616,
"grad_norm": 14.326388359069824,
"learning_rate": 6.25e-08,
"logits/chosen": -0.5944424867630005,
"logits/rejected": -0.6219260096549988,
"logps/chosen": -264.7119140625,
"logps/ref_chosen": -264.6554260253906,
"logps/ref_rejected": -253.1074676513672,
"logps/rejected": -253.1003875732422,
"loss": 2.7739,
"margin_dpo/margin_mean": -0.06357811391353607,
"margin_dpo/margin_std": 0.6833884716033936,
"step": 7
},
{
"epoch": 0.016753926701570682,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.050836771726608276,
"fcm_dpo/q_t": 0.49987292289733887,
"grad_norm": 15.394753456115723,
"learning_rate": 7.291666666666667e-08,
"logits/chosen": -0.7060806751251221,
"logits/rejected": -0.7133373022079468,
"logps/chosen": -354.1933898925781,
"logps/ref_chosen": -354.18878173828125,
"logps/ref_rejected": -282.91900634765625,
"logps/rejected": -282.9744873046875,
"loss": 2.7716,
"margin_dpo/margin_mean": 0.05083726346492767,
"margin_dpo/margin_std": 0.7709032297134399,
"step": 8
},
{
"epoch": 0.018848167539267015,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.052999868988990784,
"fcm_dpo/q_t": 0.5001325011253357,
"grad_norm": 13.911539077758789,
"learning_rate": 8.333333333333333e-08,
"logits/chosen": -0.6435714960098267,
"logits/rejected": -0.6609861254692078,
"logps/chosen": -285.6179504394531,
"logps/ref_chosen": -285.55023193359375,
"logps/ref_rejected": -267.9884948730469,
"logps/rejected": -268.00323486328125,
"loss": 2.7737,
"margin_dpo/margin_mean": -0.05299980938434601,
"margin_dpo/margin_std": 0.7003268003463745,
"step": 9
},
{
"epoch": 0.020942408376963352,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.02624526619911194,
"fcm_dpo/q_t": 0.5000656247138977,
"grad_norm": 13.248313903808594,
"learning_rate": 9.375e-08,
"logits/chosen": -0.7044954895973206,
"logits/rejected": -0.7039515376091003,
"logps/chosen": -251.95484924316406,
"logps/ref_chosen": -251.91238403320312,
"logps/ref_rejected": -226.4530029296875,
"logps/rejected": -226.46922302246094,
"loss": 2.7731,
"margin_dpo/margin_mean": -0.02624651789665222,
"margin_dpo/margin_std": 0.7348624467849731,
"step": 10
},
{
"epoch": 0.023036649214659685,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.021501854062080383,
"fcm_dpo/q_t": 0.5000537633895874,
"grad_norm": 14.478872299194336,
"learning_rate": 1.0416666666666667e-07,
"logits/chosen": -0.6196871995925903,
"logits/rejected": -0.6669806838035583,
"logps/chosen": -301.000732421875,
"logps/ref_chosen": -301.0834655761719,
"logps/ref_rejected": -259.546630859375,
"logps/rejected": -259.4424133300781,
"loss": 2.773,
"margin_dpo/margin_mean": -0.02150268852710724,
"margin_dpo/margin_std": 0.6663240790367126,
"step": 11
},
{
"epoch": 0.025130890052356022,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.1592348963022232,
"fcm_dpo/q_t": 0.49960193037986755,
"grad_norm": 15.186013221740723,
"learning_rate": 1.1458333333333332e-07,
"logits/chosen": -0.614833414554596,
"logits/rejected": -0.5764847993850708,
"logps/chosen": -287.5548095703125,
"logps/ref_chosen": -287.548095703125,
"logps/ref_rejected": -277.3684387207031,
"logps/rejected": -277.5343933105469,
"loss": 2.7694,
"margin_dpo/margin_mean": 0.1592351645231247,
"margin_dpo/margin_std": 0.8970069289207458,
"step": 12
},
{
"epoch": 0.027225130890052355,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.023470968008041382,
"fcm_dpo/q_t": 0.499941349029541,
"grad_norm": 13.614638328552246,
"learning_rate": 1.25e-07,
"logits/chosen": -0.6662641167640686,
"logits/rejected": -0.6715924739837646,
"logps/chosen": -270.6993713378906,
"logps/ref_chosen": -270.6663818359375,
"logps/ref_rejected": -274.65472412109375,
"logps/rejected": -274.71112060546875,
"loss": 2.7722,
"margin_dpo/margin_mean": 0.023471489548683167,
"margin_dpo/margin_std": 0.81538987159729,
"step": 13
},
{
"epoch": 0.02931937172774869,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.012747511267662048,
"fcm_dpo/q_t": 0.49996814131736755,
"grad_norm": 14.191495895385742,
"learning_rate": 1.3541666666666666e-07,
"logits/chosen": -0.647956371307373,
"logits/rejected": -0.6730740070343018,
"logps/chosen": -281.5734558105469,
"logps/ref_chosen": -281.5932312011719,
"logps/ref_rejected": -263.50811767578125,
"logps/rejected": -263.5010986328125,
"loss": 2.7724,
"margin_dpo/margin_mean": 0.012746155261993408,
"margin_dpo/margin_std": 0.8045576810836792,
"step": 14
},
{
"epoch": 0.031413612565445025,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.04046200215816498,
"fcm_dpo/q_t": 0.49989885091781616,
"grad_norm": 15.161637306213379,
"learning_rate": 1.4583333333333335e-07,
"logits/chosen": -0.6534218788146973,
"logits/rejected": -0.6625234484672546,
"logps/chosen": -298.4216613769531,
"logps/ref_chosen": -298.45343017578125,
"logps/ref_rejected": -227.18118286132812,
"logps/rejected": -227.1898651123047,
"loss": 2.7718,
"margin_dpo/margin_mean": 0.04046127200126648,
"margin_dpo/margin_std": 0.6485350131988525,
"step": 15
},
{
"epoch": 0.033507853403141365,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.08368900418281555,
"fcm_dpo/q_t": 0.4997907876968384,
"grad_norm": 15.082191467285156,
"learning_rate": 1.5624999999999999e-07,
"logits/chosen": -0.6211791038513184,
"logits/rejected": -0.6177949905395508,
"logps/chosen": -293.935302734375,
"logps/ref_chosen": -293.96661376953125,
"logps/ref_rejected": -250.77394104003906,
"logps/rejected": -250.8262939453125,
"loss": 2.7709,
"margin_dpo/margin_mean": 0.08368849754333496,
"margin_dpo/margin_std": 0.699578583240509,
"step": 16
},
{
"epoch": 0.0356020942408377,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.05032643675804138,
"fcm_dpo/q_t": 0.49987420439720154,
"grad_norm": 13.872389793395996,
"learning_rate": 1.6666666666666665e-07,
"logits/chosen": -0.6021161079406738,
"logits/rejected": -0.6258226633071899,
"logps/chosen": -262.3020935058594,
"logps/ref_chosen": -262.3940124511719,
"logps/ref_rejected": -248.50033569335938,
"logps/rejected": -248.458740234375,
"loss": 2.7716,
"margin_dpo/margin_mean": 0.05032654106616974,
"margin_dpo/margin_std": 0.7281695604324341,
"step": 17
},
{
"epoch": 0.03769633507853403,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.09545254707336426,
"fcm_dpo/q_t": 0.49976134300231934,
"grad_norm": 14.841218948364258,
"learning_rate": 1.7708333333333334e-07,
"logits/chosen": -0.63617342710495,
"logits/rejected": -0.6350294351577759,
"logps/chosen": -293.66290283203125,
"logps/ref_chosen": -293.709228515625,
"logps/ref_rejected": -274.5844421386719,
"logps/rejected": -274.633544921875,
"loss": 2.7707,
"margin_dpo/margin_mean": 0.09545297920703888,
"margin_dpo/margin_std": 0.7858285903930664,
"step": 18
},
{
"epoch": 0.039790575916230364,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.002836480736732483,
"fcm_dpo/q_t": 0.49999290704727173,
"grad_norm": 14.09310245513916,
"learning_rate": 1.875e-07,
"logits/chosen": -0.6546177864074707,
"logits/rejected": -0.6501709222793579,
"logps/chosen": -280.23272705078125,
"logps/ref_chosen": -280.2656555175781,
"logps/ref_rejected": -259.9754943847656,
"logps/rejected": -259.94537353515625,
"loss": 2.7726,
"margin_dpo/margin_mean": 0.0028358548879623413,
"margin_dpo/margin_std": 0.6594122052192688,
"step": 19
},
{
"epoch": 0.041884816753926704,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.1465575397014618,
"fcm_dpo/q_t": 0.4996336102485657,
"grad_norm": 14.847814559936523,
"learning_rate": 1.9791666666666664e-07,
"logits/chosen": -0.6283329725265503,
"logits/rejected": -0.6554234027862549,
"logps/chosen": -303.75616455078125,
"logps/ref_chosen": -303.8954162597656,
"logps/ref_rejected": -260.214599609375,
"logps/rejected": -260.2218933105469,
"loss": 2.7697,
"margin_dpo/margin_mean": 0.14655731618404388,
"margin_dpo/margin_std": 0.7076655626296997,
"step": 20
},
{
"epoch": 0.04397905759162304,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.1370566040277481,
"fcm_dpo/q_t": 0.4996573328971863,
"grad_norm": 17.802753448486328,
"learning_rate": 2.0833333333333333e-07,
"logits/chosen": -0.6741464734077454,
"logits/rejected": -0.6965898275375366,
"logps/chosen": -301.4137268066406,
"logps/ref_chosen": -301.53350830078125,
"logps/ref_rejected": -280.29241943359375,
"logps/rejected": -280.30975341796875,
"loss": 2.7699,
"margin_dpo/margin_mean": 0.13705658912658691,
"margin_dpo/margin_std": 0.8495014905929565,
"step": 21
},
{
"epoch": 0.04607329842931937,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.03915375471115112,
"fcm_dpo/q_t": 0.4999020993709564,
"grad_norm": 12.647223472595215,
"learning_rate": 2.1875e-07,
"logits/chosen": -0.677182674407959,
"logits/rejected": -0.6768609881401062,
"logps/chosen": -259.8601989746094,
"logps/ref_chosen": -259.9951477050781,
"logps/ref_rejected": -243.091064453125,
"logps/rejected": -242.99530029296875,
"loss": 2.7718,
"margin_dpo/margin_mean": 0.03915439546108246,
"margin_dpo/margin_std": 0.836093544960022,
"step": 22
},
{
"epoch": 0.048167539267015703,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.1624380499124527,
"fcm_dpo/q_t": 0.49959391355514526,
"grad_norm": 14.06619930267334,
"learning_rate": 2.2916666666666663e-07,
"logits/chosen": -0.6333379745483398,
"logits/rejected": -0.6628628373146057,
"logps/chosen": -282.08575439453125,
"logps/ref_chosen": -282.1807556152344,
"logps/ref_rejected": -265.0758056640625,
"logps/rejected": -265.1431884765625,
"loss": 2.7694,
"margin_dpo/margin_mean": 0.16243839263916016,
"margin_dpo/margin_std": 0.6554839611053467,
"step": 23
},
{
"epoch": 0.050261780104712044,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.24190843105316162,
"fcm_dpo/q_t": 0.4993952512741089,
"grad_norm": 14.770678520202637,
"learning_rate": 2.3958333333333335e-07,
"logits/chosen": -0.6724287867546082,
"logits/rejected": -0.591440737247467,
"logps/chosen": -300.96697998046875,
"logps/ref_chosen": -301.17962646484375,
"logps/ref_rejected": -302.1425476074219,
"logps/rejected": -302.1717834472656,
"loss": 2.7678,
"margin_dpo/margin_mean": 0.24190691113471985,
"margin_dpo/margin_std": 0.7179240584373474,
"step": 24
},
{
"epoch": 0.05235602094240838,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.12971925735473633,
"fcm_dpo/q_t": 0.4996756911277771,
"grad_norm": 13.327330589294434,
"learning_rate": 2.5e-07,
"logits/chosen": -0.6330947279930115,
"logits/rejected": -0.6376215815544128,
"logps/chosen": -246.54849243164062,
"logps/ref_chosen": -246.74649047851562,
"logps/ref_rejected": -235.56109619140625,
"logps/rejected": -235.49281311035156,
"loss": 2.77,
"margin_dpo/margin_mean": 0.12971940636634827,
"margin_dpo/margin_std": 0.72020423412323,
"step": 25
},
{
"epoch": 0.05445026178010471,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.14916859567165375,
"fcm_dpo/q_t": 0.49962708353996277,
"grad_norm": 14.342643737792969,
"learning_rate": 2.604166666666667e-07,
"logits/chosen": -0.6970273852348328,
"logits/rejected": -0.7087149024009705,
"logps/chosen": -281.9627380371094,
"logps/ref_chosen": -282.1955871582031,
"logps/ref_rejected": -235.3131866455078,
"logps/rejected": -235.2294921875,
"loss": 2.7696,
"margin_dpo/margin_mean": 0.1491691768169403,
"margin_dpo/margin_std": 0.8496840000152588,
"step": 26
},
{
"epoch": 0.05654450261780105,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.1963942050933838,
"fcm_dpo/q_t": 0.4995090067386627,
"grad_norm": 14.021341323852539,
"learning_rate": 2.708333333333333e-07,
"logits/chosen": -0.6635923385620117,
"logits/rejected": -0.6803765296936035,
"logps/chosen": -323.55682373046875,
"logps/ref_chosen": -323.8563537597656,
"logps/ref_rejected": -245.96754455566406,
"logps/rejected": -245.86439514160156,
"loss": 2.7687,
"margin_dpo/margin_mean": 0.196393683552742,
"margin_dpo/margin_std": 0.8864683508872986,
"step": 27
},
{
"epoch": 0.05863874345549738,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.17544835805892944,
"fcm_dpo/q_t": 0.4995613694190979,
"grad_norm": 13.160128593444824,
"learning_rate": 2.8125e-07,
"logits/chosen": -0.646005392074585,
"logits/rejected": -0.653028666973114,
"logps/chosen": -247.93984985351562,
"logps/ref_chosen": -248.2467498779297,
"logps/ref_rejected": -240.03460693359375,
"logps/rejected": -239.9031524658203,
"loss": 2.7691,
"margin_dpo/margin_mean": 0.17544794082641602,
"margin_dpo/margin_std": 0.8528289198875427,
"step": 28
},
{
"epoch": 0.060732984293193716,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.20227624475955963,
"fcm_dpo/q_t": 0.499494343996048,
"grad_norm": 14.817415237426758,
"learning_rate": 2.916666666666667e-07,
"logits/chosen": -0.6163727641105652,
"logits/rejected": -0.633022665977478,
"logps/chosen": -317.9890441894531,
"logps/ref_chosen": -318.25640869140625,
"logps/ref_rejected": -286.74468994140625,
"logps/rejected": -286.6795959472656,
"loss": 2.7686,
"margin_dpo/margin_mean": 0.20227505266666412,
"margin_dpo/margin_std": 0.9348006248474121,
"step": 29
},
{
"epoch": 0.06282722513089005,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.2273406684398651,
"fcm_dpo/q_t": 0.49943166971206665,
"grad_norm": 14.480233192443848,
"learning_rate": 3.020833333333333e-07,
"logits/chosen": -0.6096110343933105,
"logits/rejected": -0.6268514394760132,
"logps/chosen": -252.68487548828125,
"logps/ref_chosen": -253.04913330078125,
"logps/ref_rejected": -261.3042297363281,
"logps/rejected": -261.16729736328125,
"loss": 2.7681,
"margin_dpo/margin_mean": 0.22734004259109497,
"margin_dpo/margin_std": 0.8947302103042603,
"step": 30
},
{
"epoch": 0.06492146596858639,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.24778537452220917,
"fcm_dpo/q_t": 0.49938058853149414,
"grad_norm": 12.485006332397461,
"learning_rate": 3.1249999999999997e-07,
"logits/chosen": -0.6786443591117859,
"logits/rejected": -0.7098686695098877,
"logps/chosen": -247.75253295898438,
"logps/ref_chosen": -248.1530303955078,
"logps/ref_rejected": -203.17323303222656,
"logps/rejected": -203.02053833007812,
"loss": 2.7677,
"margin_dpo/margin_mean": 0.2477852702140808,
"margin_dpo/margin_std": 1.0274678468704224,
"step": 31
},
{
"epoch": 0.06701570680628273,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.3018389642238617,
"fcm_dpo/q_t": 0.49924540519714355,
"grad_norm": 14.821319580078125,
"learning_rate": 3.2291666666666666e-07,
"logits/chosen": -0.6340981721878052,
"logits/rejected": -0.6360675096511841,
"logps/chosen": -305.0179443359375,
"logps/ref_chosen": -305.5399475097656,
"logps/ref_rejected": -267.6398620605469,
"logps/rejected": -267.4197082519531,
"loss": 2.7666,
"margin_dpo/margin_mean": 0.30183926224708557,
"margin_dpo/margin_std": 0.9502373933792114,
"step": 32
},
{
"epoch": 0.06910994764397906,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.3839860260486603,
"fcm_dpo/q_t": 0.49904006719589233,
"grad_norm": 14.19231128692627,
"learning_rate": 3.333333333333333e-07,
"logits/chosen": -0.6747459769248962,
"logits/rejected": -0.6878575682640076,
"logps/chosen": -285.6177978515625,
"logps/ref_chosen": -286.2335205078125,
"logps/ref_rejected": -255.37672424316406,
"logps/rejected": -255.1449737548828,
"loss": 2.765,
"margin_dpo/margin_mean": 0.38398584723472595,
"margin_dpo/margin_std": 1.0631319284439087,
"step": 33
},
{
"epoch": 0.0712041884816754,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.7448998093605042,
"fcm_dpo/q_t": 0.49813783168792725,
"grad_norm": 15.560317993164062,
"learning_rate": 3.4375e-07,
"logits/chosen": -0.6418187022209167,
"logits/rejected": -0.6509749889373779,
"logps/chosen": -340.8348693847656,
"logps/ref_chosen": -341.592041015625,
"logps/ref_rejected": -278.89520263671875,
"logps/rejected": -278.8829650878906,
"loss": 2.7578,
"margin_dpo/margin_mean": 0.7449004054069519,
"margin_dpo/margin_std": 1.0796188116073608,
"step": 34
},
{
"epoch": 0.07329842931937172,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.3485826849937439,
"fcm_dpo/q_t": 0.4991285800933838,
"grad_norm": 13.192370414733887,
"learning_rate": 3.541666666666667e-07,
"logits/chosen": -0.6524540185928345,
"logits/rejected": -0.6703010201454163,
"logps/chosen": -264.3785400390625,
"logps/ref_chosen": -265.07952880859375,
"logps/ref_rejected": -264.482421875,
"logps/rejected": -264.1300048828125,
"loss": 2.7657,
"margin_dpo/margin_mean": 0.348581463098526,
"margin_dpo/margin_std": 1.3539750576019287,
"step": 35
},
{
"epoch": 0.07539267015706806,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.572422981262207,
"fcm_dpo/q_t": 0.4985690116882324,
"grad_norm": 16.01019859313965,
"learning_rate": 3.645833333333333e-07,
"logits/chosen": -0.6324862241744995,
"logits/rejected": -0.6465145945549011,
"logps/chosen": -296.5573425292969,
"logps/ref_chosen": -297.3261413574219,
"logps/ref_rejected": -282.09576416015625,
"logps/rejected": -281.8994140625,
"loss": 2.7613,
"margin_dpo/margin_mean": 0.5724228620529175,
"margin_dpo/margin_std": 1.583176612854004,
"step": 36
},
{
"epoch": 0.0774869109947644,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.5346473455429077,
"fcm_dpo/q_t": 0.49866345524787903,
"grad_norm": 15.60018253326416,
"learning_rate": 3.75e-07,
"logits/chosen": -0.6215137243270874,
"logits/rejected": -0.6319845914840698,
"logps/chosen": -313.3050231933594,
"logps/ref_chosen": -314.03399658203125,
"logps/ref_rejected": -299.3407287597656,
"logps/rejected": -299.14642333984375,
"loss": 2.762,
"margin_dpo/margin_mean": 0.5346466898918152,
"margin_dpo/margin_std": 1.5284879207611084,
"step": 37
},
{
"epoch": 0.07958115183246073,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.6732960343360901,
"fcm_dpo/q_t": 0.4983168840408325,
"grad_norm": 14.269463539123535,
"learning_rate": 3.8541666666666665e-07,
"logits/chosen": -0.6749839186668396,
"logits/rejected": -0.6850463151931763,
"logps/chosen": -281.48095703125,
"logps/ref_chosen": -282.54119873046875,
"logps/ref_rejected": -269.7811279296875,
"logps/rejected": -269.3941955566406,
"loss": 2.7593,
"margin_dpo/margin_mean": 0.6732958555221558,
"margin_dpo/margin_std": 1.6695544719696045,
"step": 38
},
{
"epoch": 0.08167539267015707,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.3364109992980957,
"fcm_dpo/q_t": 0.49665945768356323,
"grad_norm": 14.766653060913086,
"learning_rate": 3.958333333333333e-07,
"logits/chosen": -0.6432491540908813,
"logits/rejected": -0.6515594720840454,
"logps/chosen": -275.4022216796875,
"logps/ref_chosen": -276.77294921875,
"logps/ref_rejected": -249.95889282226562,
"logps/rejected": -249.92457580566406,
"loss": 2.7462,
"margin_dpo/margin_mean": 1.3364107608795166,
"margin_dpo/margin_std": 1.924612045288086,
"step": 39
},
{
"epoch": 0.08376963350785341,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.7150388360023499,
"fcm_dpo/q_t": 0.4982127547264099,
"grad_norm": 13.69542121887207,
"learning_rate": 4.0625e-07,
"logits/chosen": -0.6558160185813904,
"logits/rejected": -0.6877782344818115,
"logps/chosen": -283.27117919921875,
"logps/ref_chosen": -284.30706787109375,
"logps/ref_rejected": -244.4443817138672,
"logps/rejected": -244.12350463867188,
"loss": 2.7585,
"margin_dpo/margin_mean": 0.7150383591651917,
"margin_dpo/margin_std": 1.8327528238296509,
"step": 40
},
{
"epoch": 0.08586387434554973,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.8251960277557373,
"fcm_dpo/q_t": 0.4979372024536133,
"grad_norm": 15.315406799316406,
"learning_rate": 4.1666666666666667e-07,
"logits/chosen": -0.6517306566238403,
"logits/rejected": -0.6766183376312256,
"logps/chosen": -292.7053527832031,
"logps/ref_chosen": -293.815185546875,
"logps/ref_rejected": -252.15411376953125,
"logps/rejected": -251.86947631835938,
"loss": 2.7563,
"margin_dpo/margin_mean": 0.8251962065696716,
"margin_dpo/margin_std": 1.8020844459533691,
"step": 41
},
{
"epoch": 0.08795811518324607,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.9245829582214355,
"fcm_dpo/q_t": 0.4976889491081238,
"grad_norm": 13.711472511291504,
"learning_rate": 4.270833333333333e-07,
"logits/chosen": -0.6545050740242004,
"logits/rejected": -0.671395480632782,
"logps/chosen": -251.4940948486328,
"logps/ref_chosen": -252.76023864746094,
"logps/ref_rejected": -261.04888916015625,
"logps/rejected": -260.70733642578125,
"loss": 2.7544,
"margin_dpo/margin_mean": 0.9245826005935669,
"margin_dpo/margin_std": 2.3186817169189453,
"step": 42
},
{
"epoch": 0.09005235602094241,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.2885487079620361,
"fcm_dpo/q_t": 0.4967789351940155,
"grad_norm": 14.949416160583496,
"learning_rate": 4.375e-07,
"logits/chosen": -0.6215295195579529,
"logits/rejected": -0.6345032453536987,
"logps/chosen": -315.4837646484375,
"logps/ref_chosen": -316.8347473144531,
"logps/ref_rejected": -273.7707214355469,
"logps/rejected": -273.70831298828125,
"loss": 2.7472,
"margin_dpo/margin_mean": 1.2885491847991943,
"margin_dpo/margin_std": 2.31832218170166,
"step": 43
},
{
"epoch": 0.09214659685863874,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.5698902606964111,
"fcm_dpo/q_t": 0.49607598781585693,
"grad_norm": 15.42691421508789,
"learning_rate": 4.479166666666667e-07,
"logits/chosen": -0.6347877383232117,
"logits/rejected": -0.6353117823600769,
"logps/chosen": -285.38555908203125,
"logps/ref_chosen": -286.87567138671875,
"logps/ref_rejected": -282.46429443359375,
"logps/rejected": -282.5440673828125,
"loss": 2.7418,
"margin_dpo/margin_mean": 1.5698902606964111,
"margin_dpo/margin_std": 2.9752612113952637,
"step": 44
},
{
"epoch": 0.09424083769633508,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.3766956329345703,
"fcm_dpo/q_t": 0.49655938148498535,
"grad_norm": 14.435276985168457,
"learning_rate": 4.5833333333333327e-07,
"logits/chosen": -0.7104847431182861,
"logits/rejected": -0.7301110625267029,
"logps/chosen": -322.58203125,
"logps/ref_chosen": -324.2633972167969,
"logps/ref_rejected": -293.09466552734375,
"logps/rejected": -292.7900085449219,
"loss": 2.7456,
"margin_dpo/margin_mean": 1.3766956329345703,
"margin_dpo/margin_std": 2.85690975189209,
"step": 45
},
{
"epoch": 0.09633507853403141,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.5545244216918945,
"fcm_dpo/q_t": 0.496114581823349,
"grad_norm": 15.118200302124023,
"learning_rate": 4.6874999999999996e-07,
"logits/chosen": -0.6655287742614746,
"logits/rejected": -0.6721944212913513,
"logps/chosen": -296.60784912109375,
"logps/ref_chosen": -298.3357238769531,
"logps/ref_rejected": -267.6670837402344,
"logps/rejected": -267.49371337890625,
"loss": 2.742,
"margin_dpo/margin_mean": 1.5545239448547363,
"margin_dpo/margin_std": 2.66898250579834,
"step": 46
},
{
"epoch": 0.09842931937172775,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.0343875885009766,
"fcm_dpo/q_t": 0.49741438031196594,
"grad_norm": 13.180990219116211,
"learning_rate": 4.791666666666667e-07,
"logits/chosen": -0.6274701356887817,
"logits/rejected": -0.6480857729911804,
"logps/chosen": -261.0554504394531,
"logps/ref_chosen": -262.5669250488281,
"logps/ref_rejected": -258.7110900878906,
"logps/rejected": -258.2340087890625,
"loss": 2.7526,
"margin_dpo/margin_mean": 1.0343868732452393,
"margin_dpo/margin_std": 3.3619446754455566,
"step": 47
},
{
"epoch": 0.10052356020942409,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.4535754919052124,
"fcm_dpo/q_t": 0.4963679611682892,
"grad_norm": 13.781423568725586,
"learning_rate": 4.895833333333333e-07,
"logits/chosen": -0.6604572534561157,
"logits/rejected": -0.6805275678634644,
"logps/chosen": -267.6912536621094,
"logps/ref_chosen": -269.4932861328125,
"logps/ref_rejected": -241.89007568359375,
"logps/rejected": -241.5416259765625,
"loss": 2.7441,
"margin_dpo/margin_mean": 1.453575849533081,
"margin_dpo/margin_std": 3.092007637023926,
"step": 48
},
{
"epoch": 0.10261780104712041,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.7581769227981567,
"fcm_dpo/q_t": 0.49560609459877014,
"grad_norm": 13.815945625305176,
"learning_rate": 5e-07,
"logits/chosen": -0.6750833988189697,
"logits/rejected": -0.6614372134208679,
"logps/chosen": -255.6660919189453,
"logps/ref_chosen": -257.884521484375,
"logps/ref_rejected": -256.8973083496094,
"logps/rejected": -256.4371032714844,
"loss": 2.7382,
"margin_dpo/margin_mean": 1.758177399635315,
"margin_dpo/margin_std": 3.521055221557617,
"step": 49
},
{
"epoch": 0.10471204188481675,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.0942366123199463,
"fcm_dpo/q_t": 0.4972657859325409,
"grad_norm": 14.002850532531738,
"learning_rate": 4.999932966293553e-07,
"logits/chosen": -0.6632430553436279,
"logits/rejected": -0.6833736896514893,
"logps/chosen": -299.620361328125,
"logps/ref_chosen": -301.62884521484375,
"logps/ref_rejected": -298.26812744140625,
"logps/rejected": -297.3538513183594,
"loss": 2.7516,
"margin_dpo/margin_mean": 1.094237208366394,
"margin_dpo/margin_std": 3.9754343032836914,
"step": 50
},
{
"epoch": 0.1068062827225131,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.9401464462280273,
"fcm_dpo/q_t": 0.49515286087989807,
"grad_norm": 14.562750816345215,
"learning_rate": 4.999731868769026e-07,
"logits/chosen": -0.6401030421257019,
"logits/rejected": -0.636492133140564,
"logps/chosen": -267.28765869140625,
"logps/ref_chosen": -269.37237548828125,
"logps/ref_rejected": -297.01373291015625,
"logps/rejected": -296.869140625,
"loss": 2.7352,
"margin_dpo/margin_mean": 1.9401463270187378,
"margin_dpo/margin_std": 4.744877815246582,
"step": 51
},
{
"epoch": 0.10890052356020942,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 3.099560260772705,
"fcm_dpo/q_t": 0.49225854873657227,
"grad_norm": 15.10215950012207,
"learning_rate": 4.99939671821067e-07,
"logits/chosen": -0.6769546270370483,
"logits/rejected": -0.6809255480766296,
"logps/chosen": -304.0876770019531,
"logps/ref_chosen": -306.9028015136719,
"logps/ref_rejected": -281.2473449707031,
"logps/rejected": -281.53179931640625,
"loss": 2.7124,
"margin_dpo/margin_mean": 3.0995595455169678,
"margin_dpo/margin_std": 4.842742919921875,
"step": 52
},
{
"epoch": 0.11099476439790576,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 2.183377265930176,
"fcm_dpo/q_t": 0.49454569816589355,
"grad_norm": 14.98223876953125,
"learning_rate": 4.998927532591591e-07,
"logits/chosen": -0.6660061478614807,
"logits/rejected": -0.7023321986198425,
"logps/chosen": -283.1426086425781,
"logps/ref_chosen": -285.9759216308594,
"logps/ref_rejected": -273.9139709472656,
"logps/rejected": -273.2640380859375,
"loss": 2.7306,
"margin_dpo/margin_mean": 2.1833763122558594,
"margin_dpo/margin_std": 5.137148380279541,
"step": 53
},
{
"epoch": 0.1130890052356021,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.8519773483276367,
"fcm_dpo/q_t": 0.4953748285770416,
"grad_norm": 13.197480201721191,
"learning_rate": 4.998324337072792e-07,
"logits/chosen": -0.7060707211494446,
"logits/rejected": -0.7145115733146667,
"logps/chosen": -303.8565368652344,
"logps/ref_chosen": -306.504638671875,
"logps/ref_rejected": -272.66888427734375,
"logps/rejected": -271.8727722167969,
"loss": 2.7372,
"margin_dpo/margin_mean": 1.8519774675369263,
"margin_dpo/margin_std": 5.3217267990112305,
"step": 54
},
{
"epoch": 0.11518324607329843,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 2.209861993789673,
"fcm_dpo/q_t": 0.4944804310798645,
"grad_norm": 12.42446517944336,
"learning_rate": 4.997587164001815e-07,
"logits/chosen": -0.6662589311599731,
"logits/rejected": -0.6707650423049927,
"logps/chosen": -220.50619506835938,
"logps/ref_chosen": -222.33013916015625,
"logps/ref_rejected": -206.5965118408203,
"logps/rejected": -206.98240661621094,
"loss": 2.7299,
"margin_dpo/margin_mean": 2.209861993789673,
"margin_dpo/margin_std": 4.900571346282959,
"step": 55
},
{
"epoch": 0.11727748691099477,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 3.187299966812134,
"fcm_dpo/q_t": 0.49204349517822266,
"grad_norm": 13.726897239685059,
"learning_rate": 4.996716052911017e-07,
"logits/chosen": -0.6358221769332886,
"logits/rejected": -0.6481887698173523,
"logps/chosen": -247.6187744140625,
"logps/ref_chosen": -250.47816467285156,
"logps/ref_rejected": -228.24984741210938,
"logps/rejected": -228.5777587890625,
"loss": 2.7115,
"margin_dpo/margin_mean": 3.1872992515563965,
"margin_dpo/margin_std": 6.127588272094727,
"step": 56
},
{
"epoch": 0.1193717277486911,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 4.3034772872924805,
"fcm_dpo/q_t": 0.48925405740737915,
"grad_norm": 15.382685661315918,
"learning_rate": 4.99571105051544e-07,
"logits/chosen": -0.7119564414024353,
"logits/rejected": -0.6899896860122681,
"logps/chosen": -311.2024841308594,
"logps/ref_chosen": -315.1195068359375,
"logps/ref_rejected": -272.755615234375,
"logps/rejected": -273.1420593261719,
"loss": 2.6892,
"margin_dpo/margin_mean": 4.303476333618164,
"margin_dpo/margin_std": 5.863011837005615,
"step": 57
},
{
"epoch": 0.12146596858638743,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 2.3125758171081543,
"fcm_dpo/q_t": 0.4942219853401184,
"grad_norm": 13.760862350463867,
"learning_rate": 4.994572210710314e-07,
"logits/chosen": -0.6568905711174011,
"logits/rejected": -0.6759148240089417,
"logps/chosen": -262.7411804199219,
"logps/ref_chosen": -265.1816711425781,
"logps/ref_rejected": -268.22039794921875,
"logps/rejected": -268.0924987792969,
"loss": 2.7283,
"margin_dpo/margin_mean": 2.3125760555267334,
"margin_dpo/margin_std": 5.716425895690918,
"step": 58
},
{
"epoch": 0.12356020942408377,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.6165785789489746,
"fcm_dpo/q_t": 0.4959658980369568,
"grad_norm": 14.839912414550781,
"learning_rate": 4.993299594568162e-07,
"logits/chosen": -0.6215161085128784,
"logits/rejected": -0.6151977777481079,
"logps/chosen": -284.2671203613281,
"logps/ref_chosen": -286.3539733886719,
"logps/ref_rejected": -260.6683654785156,
"logps/rejected": -260.19805908203125,
"loss": 2.7432,
"margin_dpo/margin_mean": 1.6165783405303955,
"margin_dpo/margin_std": 7.129423141479492,
"step": 59
},
{
"epoch": 0.1256544502617801,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 3.3817367553710938,
"fcm_dpo/q_t": 0.4915561079978943,
"grad_norm": 14.034817695617676,
"learning_rate": 4.991893270335525e-07,
"logits/chosen": -0.7009058594703674,
"logits/rejected": -0.7239299416542053,
"logps/chosen": -255.88699340820312,
"logps/ref_chosen": -258.74859619140625,
"logps/ref_rejected": -255.05441284179688,
"logps/rejected": -255.57455444335938,
"loss": 2.7083,
"margin_dpo/margin_mean": 3.381736993789673,
"margin_dpo/margin_std": 7.30776309967041,
"step": 60
},
{
"epoch": 0.12774869109947645,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 3.223818302154541,
"fcm_dpo/q_t": 0.4919551908969879,
"grad_norm": 14.857345581054688,
"learning_rate": 4.990353313429303e-07,
"logits/chosen": -0.6699813604354858,
"logits/rejected": -0.6866795420646667,
"logps/chosen": -275.4521179199219,
"logps/ref_chosen": -278.4678955078125,
"logps/ref_rejected": -252.025146484375,
"logps/rejected": -252.233154296875,
"loss": 2.7119,
"margin_dpo/margin_mean": 3.223818063735962,
"margin_dpo/margin_std": 7.812247276306152,
"step": 61
},
{
"epoch": 0.12984293193717278,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 3.4326207637786865,
"fcm_dpo/q_t": 0.49143853783607483,
"grad_norm": 13.362186431884766,
"learning_rate": 4.988679806432711e-07,
"logits/chosen": -0.6334831714630127,
"logits/rejected": -0.6700643301010132,
"logps/chosen": -268.894287109375,
"logps/ref_chosen": -272.92431640625,
"logps/ref_rejected": -260.7957763671875,
"logps/rejected": -260.1983947753906,
"loss": 2.7074,
"margin_dpo/margin_mean": 3.4326210021972656,
"margin_dpo/margin_std": 7.573697090148926,
"step": 62
},
{
"epoch": 0.1319371727748691,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 3.569255828857422,
"fcm_dpo/q_t": 0.4910915791988373,
"grad_norm": 14.09818172454834,
"learning_rate": 4.986872839090852e-07,
"logits/chosen": -0.6905286312103271,
"logits/rejected": -0.6957682371139526,
"logps/chosen": -273.65802001953125,
"logps/ref_chosen": -277.0889892578125,
"logps/ref_rejected": -273.3388977050781,
"logps/rejected": -273.4771728515625,
"loss": 2.7046,
"margin_dpo/margin_mean": 3.569256067276001,
"margin_dpo/margin_std": 7.42416524887085,
"step": 63
},
{
"epoch": 0.13403141361256546,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 4.288572788238525,
"fcm_dpo/q_t": 0.4893013536930084,
"grad_norm": 14.16968059539795,
"learning_rate": 4.9849325083059e-07,
"logits/chosen": -0.6592855453491211,
"logits/rejected": -0.6547948122024536,
"logps/chosen": -279.7130432128906,
"logps/ref_chosen": -283.8244934082031,
"logps/ref_rejected": -263.3023681640625,
"logps/rejected": -263.4794921875,
"loss": 2.6915,
"margin_dpo/margin_mean": 4.288572311401367,
"margin_dpo/margin_std": 8.552833557128906,
"step": 64
},
{
"epoch": 0.13612565445026178,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 3.3739163875579834,
"fcm_dpo/q_t": 0.49157899618148804,
"grad_norm": 13.907535552978516,
"learning_rate": 4.982858918131906e-07,
"logits/chosen": -0.7148904204368591,
"logits/rejected": -0.6865019798278809,
"logps/chosen": -261.4264221191406,
"logps/ref_chosen": -264.86993408203125,
"logps/ref_rejected": -268.5050354003906,
"logps/rejected": -268.4354553222656,
"loss": 2.7085,
"margin_dpo/margin_mean": 3.3739161491394043,
"margin_dpo/margin_std": 7.46225118637085,
"step": 65
},
{
"epoch": 0.1382198952879581,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 4.155472278594971,
"fcm_dpo/q_t": 0.4896465539932251,
"grad_norm": 13.827627182006836,
"learning_rate": 4.980652179769217e-07,
"logits/chosen": -0.7052959203720093,
"logits/rejected": -0.7210156321525574,
"logps/chosen": -269.82061767578125,
"logps/ref_chosen": -272.9283447265625,
"logps/ref_rejected": -280.94696044921875,
"logps/rejected": -281.9947509765625,
"loss": 2.6956,
"margin_dpo/margin_mean": 4.1554718017578125,
"margin_dpo/margin_std": 10.197858810424805,
"step": 66
},
{
"epoch": 0.14031413612565444,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 3.6113243103027344,
"fcm_dpo/q_t": 0.4909900724887848,
"grad_norm": 12.831055641174316,
"learning_rate": 4.978312411558517e-07,
"logits/chosen": -0.69936603307724,
"logits/rejected": -0.728168249130249,
"logps/chosen": -262.1564025878906,
"logps/ref_chosen": -266.18695068359375,
"logps/ref_rejected": -250.16806030273438,
"logps/rejected": -249.74880981445312,
"loss": 2.7052,
"margin_dpo/margin_mean": 3.611323595046997,
"margin_dpo/margin_std": 9.059497833251953,
"step": 67
},
{
"epoch": 0.1424083769633508,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 4.629339694976807,
"fcm_dpo/q_t": 0.48848462104797363,
"grad_norm": 13.920244216918945,
"learning_rate": 4.975839738974473e-07,
"logits/chosen": -0.709690511226654,
"logits/rejected": -0.7195597290992737,
"logps/chosen": -294.90496826171875,
"logps/ref_chosen": -297.9385986328125,
"logps/ref_rejected": -261.51104736328125,
"logps/rejected": -263.10675048828125,
"loss": 2.687,
"margin_dpo/margin_mean": 4.629340171813965,
"margin_dpo/margin_std": 10.80176830291748,
"step": 68
},
{
"epoch": 0.14450261780104712,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 6.114617347717285,
"fcm_dpo/q_t": 0.4847623109817505,
"grad_norm": 14.259486198425293,
"learning_rate": 4.97323429461901e-07,
"logits/chosen": -0.7122206091880798,
"logits/rejected": -0.7363570332527161,
"logps/chosen": -261.6822204589844,
"logps/ref_chosen": -265.6175231933594,
"logps/ref_rejected": -236.82855224609375,
"logps/rejected": -239.0078582763672,
"loss": 2.6571,
"margin_dpo/margin_mean": 6.114616870880127,
"margin_dpo/margin_std": 9.916292190551758,
"step": 69
},
{
"epoch": 0.14659685863874344,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 5.927559852600098,
"fcm_dpo/q_t": 0.4852375388145447,
"grad_norm": 14.307376861572266,
"learning_rate": 4.970496218214204e-07,
"logits/chosen": -0.6976159811019897,
"logits/rejected": -0.7270653247833252,
"logps/chosen": -291.8657531738281,
"logps/ref_chosen": -296.2259521484375,
"logps/ref_rejected": -254.69635009765625,
"logps/rejected": -256.26373291015625,
"loss": 2.6622,
"margin_dpo/margin_mean": 5.927558422088623,
"margin_dpo/margin_std": 11.058009147644043,
"step": 70
},
{
"epoch": 0.1486910994764398,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 5.119117736816406,
"fcm_dpo/q_t": 0.4872404932975769,
"grad_norm": 14.217865943908691,
"learning_rate": 4.967625656594781e-07,
"logits/chosen": -0.6751896739006042,
"logits/rejected": -0.667328417301178,
"logps/chosen": -283.8874816894531,
"logps/ref_chosen": -288.92724609375,
"logps/ref_rejected": -278.6432189941406,
"logps/rejected": -278.72259521484375,
"loss": 2.6799,
"margin_dpo/margin_mean": 5.119117736816406,
"margin_dpo/margin_std": 12.632835388183594,
"step": 71
},
{
"epoch": 0.15078534031413612,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 5.15415096282959,
"fcm_dpo/q_t": 0.48714739084243774,
"grad_norm": 14.081000328063965,
"learning_rate": 4.964622763700252e-07,
"logits/chosen": -0.7195616364479065,
"logits/rejected": -0.7294360399246216,
"logps/chosen": -233.75759887695312,
"logps/ref_chosen": -237.0452880859375,
"logps/ref_rejected": -252.7946319580078,
"logps/rejected": -254.66107177734375,
"loss": 2.6772,
"margin_dpo/margin_mean": 5.154151916503906,
"margin_dpo/margin_std": 10.921250343322754,
"step": 72
},
{
"epoch": 0.15287958115183245,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 4.800140380859375,
"fcm_dpo/q_t": 0.4880289137363434,
"grad_norm": 13.94972038269043,
"learning_rate": 4.961487700566646e-07,
"logits/chosen": -0.6823411583900452,
"logits/rejected": -0.6945748329162598,
"logps/chosen": -268.699462890625,
"logps/ref_chosen": -273.0531005859375,
"logps/ref_rejected": -246.83787536621094,
"logps/rejected": -247.28440856933594,
"loss": 2.686,
"margin_dpo/margin_mean": 4.800140380859375,
"margin_dpo/margin_std": 12.621927261352539,
"step": 73
},
{
"epoch": 0.1549738219895288,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 4.433067321777344,
"fcm_dpo/q_t": 0.4889623522758484,
"grad_norm": 15.233907699584961,
"learning_rate": 4.958220635317885e-07,
"logits/chosen": -0.7329132556915283,
"logits/rejected": -0.7140373587608337,
"logps/chosen": -338.9254150390625,
"logps/ref_chosen": -342.28192138671875,
"logps/ref_rejected": -330.03155517578125,
"logps/rejected": -331.1081237792969,
"loss": 2.6924,
"margin_dpo/margin_mean": 4.433067321777344,
"margin_dpo/margin_std": 12.183477401733398,
"step": 74
},
{
"epoch": 0.15706806282722513,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 6.617554664611816,
"fcm_dpo/q_t": 0.4835120439529419,
"grad_norm": 14.692926406860352,
"learning_rate": 4.954821743156767e-07,
"logits/chosen": -0.6664029359817505,
"logits/rejected": -0.6678723096847534,
"logps/chosen": -262.25531005859375,
"logps/ref_chosen": -266.8641357421875,
"logps/ref_rejected": -276.8700256347656,
"logps/rejected": -278.87872314453125,
"loss": 2.6491,
"margin_dpo/margin_mean": 6.617555141448975,
"margin_dpo/margin_std": 11.323915481567383,
"step": 75
},
{
"epoch": 0.15916230366492146,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 7.1546311378479,
"fcm_dpo/q_t": 0.48222318291664124,
"grad_norm": 14.566569328308105,
"learning_rate": 4.951291206355559e-07,
"logits/chosen": -0.7340927124023438,
"logits/rejected": -0.7434124946594238,
"logps/chosen": -277.08807373046875,
"logps/ref_chosen": -281.1745910644531,
"logps/ref_rejected": -263.6103515625,
"logps/rejected": -266.6784973144531,
"loss": 2.641,
"margin_dpo/margin_mean": 7.1546311378479,
"margin_dpo/margin_std": 13.309123992919922,
"step": 76
},
{
"epoch": 0.1612565445026178,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 5.820921897888184,
"fcm_dpo/q_t": 0.48554641008377075,
"grad_norm": 16.591699600219727,
"learning_rate": 4.947629214246236e-07,
"logits/chosen": -0.5975011587142944,
"logits/rejected": -0.6005101799964905,
"logps/chosen": -302.38055419921875,
"logps/ref_chosen": -306.09527587890625,
"logps/ref_rejected": -253.4856414794922,
"logps/rejected": -255.59182739257812,
"loss": 2.6684,
"margin_dpo/margin_mean": 5.820921897888184,
"margin_dpo/margin_std": 14.184414863586426,
"step": 77
},
{
"epoch": 0.16335078534031414,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 8.720876693725586,
"fcm_dpo/q_t": 0.47836023569107056,
"grad_norm": 14.840081214904785,
"learning_rate": 4.943835963210323e-07,
"logits/chosen": -0.7116665840148926,
"logits/rejected": -0.7065909504890442,
"logps/chosen": -253.0052032470703,
"logps/ref_chosen": -256.90234375,
"logps/ref_rejected": -211.57066345214844,
"logps/rejected": -216.39439392089844,
"loss": 2.6128,
"margin_dpo/margin_mean": 8.72087574005127,
"margin_dpo/margin_std": 14.666790008544922,
"step": 78
},
{
"epoch": 0.16544502617801046,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 8.277425765991211,
"fcm_dpo/q_t": 0.4794629216194153,
"grad_norm": 14.876046180725098,
"learning_rate": 4.939911656668361e-07,
"logits/chosen": -0.6847647428512573,
"logits/rejected": -0.7000732421875,
"logps/chosen": -263.1416931152344,
"logps/ref_chosen": -266.2735595703125,
"logps/ref_rejected": -251.56483459472656,
"logps/rejected": -256.71038818359375,
"loss": 2.6226,
"margin_dpo/margin_mean": 8.277425765991211,
"margin_dpo/margin_std": 15.380229949951172,
"step": 79
},
{
"epoch": 0.16753926701570682,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 6.552534580230713,
"fcm_dpo/q_t": 0.48373961448669434,
"grad_norm": 14.464832305908203,
"learning_rate": 4.935856505068998e-07,
"logits/chosen": -0.6876570582389832,
"logits/rejected": -0.7135106921195984,
"logps/chosen": -286.0024108886719,
"logps/ref_chosen": -287.8509826660156,
"logps/ref_rejected": -256.06829833984375,
"logps/rejected": -260.7722473144531,
"loss": 2.6536,
"margin_dpo/margin_mean": 6.552535057067871,
"margin_dpo/margin_std": 13.667981147766113,
"step": 80
},
{
"epoch": 0.16963350785340314,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 7.084200859069824,
"fcm_dpo/q_t": 0.48243093490600586,
"grad_norm": 14.040337562561035,
"learning_rate": 4.93167072587771e-07,
"logits/chosen": -0.6800787448883057,
"logits/rejected": -0.6770706176757812,
"logps/chosen": -266.1457214355469,
"logps/ref_chosen": -268.5232238769531,
"logps/ref_rejected": -237.81912231445312,
"logps/rejected": -242.52584838867188,
"loss": 2.6483,
"margin_dpo/margin_mean": 7.084200382232666,
"margin_dpo/margin_std": 17.199254989624023,
"step": 81
},
{
"epoch": 0.17172774869109947,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 7.901603698730469,
"fcm_dpo/q_t": 0.4804532527923584,
"grad_norm": 13.889287948608398,
"learning_rate": 4.92735454356513e-07,
"logits/chosen": -0.7560573816299438,
"logits/rejected": -0.7619873285293579,
"logps/chosen": -277.0093994140625,
"logps/ref_chosen": -279.36395263671875,
"logps/ref_rejected": -236.5254364013672,
"logps/rejected": -242.07249450683594,
"loss": 2.6318,
"margin_dpo/margin_mean": 7.901603698730469,
"margin_dpo/margin_std": 16.425201416015625,
"step": 82
},
{
"epoch": 0.17382198952879582,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 8.673921585083008,
"fcm_dpo/q_t": 0.47846150398254395,
"grad_norm": 15.388964653015137,
"learning_rate": 4.922908189595017e-07,
"logits/chosen": -0.7115753889083862,
"logits/rejected": -0.6914129853248596,
"logps/chosen": -273.9424743652344,
"logps/ref_chosen": -274.21923828125,
"logps/ref_rejected": -276.2333984375,
"logps/rejected": -284.63055419921875,
"loss": 2.6211,
"margin_dpo/margin_mean": 8.673920631408691,
"margin_dpo/margin_std": 18.709972381591797,
"step": 83
},
{
"epoch": 0.17591623036649215,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 7.278215408325195,
"fcm_dpo/q_t": 0.4819440245628357,
"grad_norm": 14.972529411315918,
"learning_rate": 4.918331902411841e-07,
"logits/chosen": -0.7390098571777344,
"logits/rejected": -0.751431405544281,
"logps/chosen": -293.8644714355469,
"logps/ref_chosen": -294.3975524902344,
"logps/ref_rejected": -279.8236083984375,
"logps/rejected": -286.5687255859375,
"loss": 2.6451,
"margin_dpo/margin_mean": 7.278214931488037,
"margin_dpo/margin_std": 17.533550262451172,
"step": 84
},
{
"epoch": 0.17801047120418848,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 5.719230651855469,
"fcm_dpo/q_t": 0.4858088791370392,
"grad_norm": 14.686837196350098,
"learning_rate": 4.913625927427995e-07,
"logits/chosen": -0.6824201345443726,
"logits/rejected": -0.688214898109436,
"logps/chosen": -245.18032836914062,
"logps/ref_chosen": -243.66220092773438,
"logps/ref_rejected": -263.9394836425781,
"logps/rejected": -271.17681884765625,
"loss": 2.6733,
"margin_dpo/margin_mean": 5.719229698181152,
"margin_dpo/margin_std": 15.939430236816406,
"step": 85
},
{
"epoch": 0.18010471204188483,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 8.894048690795898,
"fcm_dpo/q_t": 0.4779210090637207,
"grad_norm": 17.45473289489746,
"learning_rate": 4.908790517010636e-07,
"logits/chosen": -0.7136616110801697,
"logits/rejected": -0.7065308094024658,
"logps/chosen": -308.2430419921875,
"logps/ref_chosen": -309.4306945800781,
"logps/ref_rejected": -290.9124755859375,
"logps/rejected": -298.6188659667969,
"loss": 2.6145,
"margin_dpo/margin_mean": 8.894048690795898,
"margin_dpo/margin_std": 17.49412727355957,
"step": 86
},
{
"epoch": 0.18219895287958116,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 10.303476333618164,
"fcm_dpo/q_t": 0.47455835342407227,
"grad_norm": 14.826765060424805,
"learning_rate": 4.903825930468148e-07,
"logits/chosen": -0.7658448219299316,
"logits/rejected": -0.7589297890663147,
"logps/chosen": -278.1609191894531,
"logps/ref_chosen": -278.0277099609375,
"logps/ref_rejected": -245.69309997558594,
"logps/rejected": -256.1297912597656,
"loss": 2.5911,
"margin_dpo/margin_mean": 10.30347728729248,
"margin_dpo/margin_std": 19.4666748046875,
"step": 87
},
{
"epoch": 0.18429319371727748,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 8.835660934448242,
"fcm_dpo/q_t": 0.47816067934036255,
"grad_norm": 14.406031608581543,
"learning_rate": 4.898732434036243e-07,
"logits/chosen": -0.7765223383903503,
"logits/rejected": -0.7866891026496887,
"logps/chosen": -268.6676025390625,
"logps/ref_chosen": -266.51483154296875,
"logps/ref_rejected": -265.8998718261719,
"logps/rejected": -276.8883056640625,
"loss": 2.6189,
"margin_dpo/margin_mean": 8.835660934448242,
"margin_dpo/margin_std": 19.38115119934082,
"step": 88
},
{
"epoch": 0.18638743455497384,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 9.346006393432617,
"fcm_dpo/q_t": 0.47684943675994873,
"grad_norm": 15.267836570739746,
"learning_rate": 4.893510300863676e-07,
"logits/chosen": -0.7532129287719727,
"logits/rejected": -0.746292769908905,
"logps/chosen": -265.619873046875,
"logps/ref_chosen": -265.6893005371094,
"logps/ref_rejected": -251.4897918701172,
"logps/rejected": -260.766357421875,
"loss": 2.6086,
"margin_dpo/margin_mean": 9.346006393432617,
"margin_dpo/margin_std": 18.904386520385742,
"step": 89
},
{
"epoch": 0.18848167539267016,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 8.189677238464355,
"fcm_dpo/q_t": 0.47969090938568115,
"grad_norm": 14.898477554321289,
"learning_rate": 4.8881598109976e-07,
"logits/chosen": -0.761985719203949,
"logits/rejected": -0.7677095532417297,
"logps/chosen": -308.616943359375,
"logps/ref_chosen": -307.4250183105469,
"logps/ref_rejected": -265.7125549316406,
"logps/rejected": -275.09417724609375,
"loss": 2.629,
"margin_dpo/margin_mean": 8.189677238464355,
"margin_dpo/margin_std": 18.18859100341797,
"step": 90
},
{
"epoch": 0.1905759162303665,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 9.50722599029541,
"fcm_dpo/q_t": 0.4765552282333374,
"grad_norm": 16.52025032043457,
"learning_rate": 4.882681251368548e-07,
"logits/chosen": -0.71138596534729,
"logits/rejected": -0.7247719168663025,
"logps/chosen": -237.92726135253906,
"logps/ref_chosen": -235.740966796875,
"logps/ref_rejected": -226.64163208007812,
"logps/rejected": -238.33511352539062,
"loss": 2.6081,
"margin_dpo/margin_mean": 9.507225036621094,
"margin_dpo/margin_std": 19.964391708374023,
"step": 91
},
{
"epoch": 0.19267015706806281,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 10.155900001525879,
"fcm_dpo/q_t": 0.4749046564102173,
"grad_norm": 16.8834228515625,
"learning_rate": 4.877074915775048e-07,
"logits/chosen": -0.7550235390663147,
"logits/rejected": -0.7428237795829773,
"logps/chosen": -286.5733947753906,
"logps/ref_chosen": -283.44757080078125,
"logps/ref_rejected": -273.1358642578125,
"logps/rejected": -286.4175720214844,
"loss": 2.5984,
"margin_dpo/margin_mean": 10.155900001525879,
"margin_dpo/margin_std": 21.626422882080078,
"step": 92
},
{
"epoch": 0.19476439790575917,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 9.585163116455078,
"fcm_dpo/q_t": 0.4763486981391907,
"grad_norm": 14.270893096923828,
"learning_rate": 4.871341104867864e-07,
"logits/chosen": -0.7471739053726196,
"logits/rejected": -0.7665151357650757,
"logps/chosen": -235.86776733398438,
"logps/ref_chosen": -233.337158203125,
"logps/ref_rejected": -230.5454559326172,
"logps/rejected": -242.6612548828125,
"loss": 2.6058,
"margin_dpo/margin_mean": 9.585163116455078,
"margin_dpo/margin_std": 20.088417053222656,
"step": 93
},
{
"epoch": 0.1968586387434555,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 9.327486038208008,
"fcm_dpo/q_t": 0.4769233763217926,
"grad_norm": 15.648102760314941,
"learning_rate": 4.865480126133871e-07,
"logits/chosen": -0.6993234753608704,
"logits/rejected": -0.716032087802887,
"logps/chosen": -297.1053466796875,
"logps/ref_chosen": -294.65277099609375,
"logps/ref_rejected": -283.65802001953125,
"logps/rejected": -295.43804931640625,
"loss": 2.616,
"margin_dpo/margin_mean": 9.327486038208008,
"margin_dpo/margin_std": 22.305442810058594,
"step": 94
},
{
"epoch": 0.19895287958115182,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 10.140195846557617,
"fcm_dpo/q_t": 0.4750337600708008,
"grad_norm": 16.766843795776367,
"learning_rate": 4.859492293879573e-07,
"logits/chosen": -0.7564529776573181,
"logits/rejected": -0.7768670320510864,
"logps/chosen": -315.02734375,
"logps/ref_chosen": -311.6697082519531,
"logps/ref_rejected": -262.74798583984375,
"logps/rejected": -276.24578857421875,
"loss": 2.6034,
"margin_dpo/margin_mean": 10.140196800231934,
"margin_dpo/margin_std": 23.380367279052734,
"step": 95
},
{
"epoch": 0.20104712041884817,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 10.278494834899902,
"fcm_dpo/q_t": 0.4747167229652405,
"grad_norm": 18.818452835083008,
"learning_rate": 4.853377929214243e-07,
"logits/chosen": -0.728140652179718,
"logits/rejected": -0.7401422262191772,
"logps/chosen": -287.2830505371094,
"logps/ref_chosen": -282.5559997558594,
"logps/ref_rejected": -242.70599365234375,
"logps/rejected": -257.7115478515625,
"loss": 2.6016,
"margin_dpo/margin_mean": 10.278495788574219,
"margin_dpo/margin_std": 23.941333770751953,
"step": 96
},
{
"epoch": 0.2031413612565445,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 11.988941192626953,
"fcm_dpo/q_t": 0.4704693555831909,
"grad_norm": 16.01417350769043,
"learning_rate": 4.847137360032699e-07,
"logits/chosen": -0.7595709562301636,
"logits/rejected": -0.747144341468811,
"logps/chosen": -308.0236511230469,
"logps/ref_chosen": -303.57781982421875,
"logps/ref_rejected": -264.2193603515625,
"logps/rejected": -280.6541442871094,
"loss": 2.5667,
"margin_dpo/margin_mean": 11.98894214630127,
"margin_dpo/margin_std": 22.689369201660156,
"step": 97
},
{
"epoch": 0.20523560209424083,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 12.238821029663086,
"fcm_dpo/q_t": 0.4700348973274231,
"grad_norm": 17.27678108215332,
"learning_rate": 4.84077092099773e-07,
"logits/chosen": -0.789761483669281,
"logits/rejected": -0.799389123916626,
"logps/chosen": -291.6961669921875,
"logps/ref_chosen": -286.8303527832031,
"logps/ref_rejected": -278.08331298828125,
"logps/rejected": -295.18792724609375,
"loss": 2.5676,
"margin_dpo/margin_mean": 12.238821983337402,
"margin_dpo/margin_std": 24.61711311340332,
"step": 98
},
{
"epoch": 0.20732984293193718,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 12.695892333984375,
"fcm_dpo/q_t": 0.46888458728790283,
"grad_norm": 16.286874771118164,
"learning_rate": 4.834278953522137e-07,
"logits/chosen": -0.7494280934333801,
"logits/rejected": -0.7613869905471802,
"logps/chosen": -285.1810302734375,
"logps/ref_chosen": -279.9212341308594,
"logps/ref_rejected": -250.3451385498047,
"logps/rejected": -268.30084228515625,
"loss": 2.5665,
"margin_dpo/margin_mean": 12.695892333984375,
"margin_dpo/margin_std": 27.89350700378418,
"step": 99
},
{
"epoch": 0.2094240837696335,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 12.545092582702637,
"fcm_dpo/q_t": 0.46928441524505615,
"grad_norm": 17.758132934570312,
"learning_rate": 4.827661805750437e-07,
"logits/chosen": -0.7917307615280151,
"logits/rejected": -0.8021936416625977,
"logps/chosen": -304.547119140625,
"logps/ref_chosen": -296.82769775390625,
"logps/ref_rejected": -275.55438232421875,
"logps/rejected": -295.81890869140625,
"loss": 2.5616,
"margin_dpo/margin_mean": 12.545093536376953,
"margin_dpo/margin_std": 24.992952346801758,
"step": 100
},
{
"epoch": 0.21151832460732983,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 14.907539367675781,
"fcm_dpo/q_t": 0.4635261297225952,
"grad_norm": 16.2642879486084,
"learning_rate": 4.820919832540181e-07,
"logits/chosen": -0.7832655310630798,
"logits/rejected": -0.7952293753623962,
"logps/chosen": -257.8759460449219,
"logps/ref_chosen": -252.74203491210938,
"logps/ref_rejected": -276.42828369140625,
"logps/rejected": -296.4697265625,
"loss": 2.5229,
"margin_dpo/margin_mean": 14.907539367675781,
"margin_dpo/margin_std": 27.045452117919922,
"step": 101
},
{
"epoch": 0.2136125654450262,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 14.895282745361328,
"fcm_dpo/q_t": 0.46327468752861023,
"grad_norm": 16.111072540283203,
"learning_rate": 4.814053395442932e-07,
"logits/chosen": -0.7602806091308594,
"logits/rejected": -0.7565823793411255,
"logps/chosen": -224.63079833984375,
"logps/ref_chosen": -219.55374145507812,
"logps/ref_rejected": -231.9134521484375,
"logps/rejected": -251.88580322265625,
"loss": 2.52,
"margin_dpo/margin_mean": 14.895282745361328,
"margin_dpo/margin_std": 26.121959686279297,
"step": 102
},
{
"epoch": 0.2157068062827225,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 13.585211753845215,
"fcm_dpo/q_t": 0.4669133126735687,
"grad_norm": 16.34781837463379,
"learning_rate": 4.807062862684873e-07,
"logits/chosen": -0.7938933372497559,
"logits/rejected": -0.7916109561920166,
"logps/chosen": -264.3814392089844,
"logps/ref_chosen": -259.675048828125,
"logps/ref_rejected": -278.7356872558594,
"logps/rejected": -297.02728271484375,
"loss": 2.5454,
"margin_dpo/margin_mean": 13.585211753845215,
"margin_dpo/margin_std": 26.579490661621094,
"step": 103
},
{
"epoch": 0.21780104712041884,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 10.019797325134277,
"fcm_dpo/q_t": 0.47544795274734497,
"grad_norm": 16.59649085998535,
"learning_rate": 4.799948609147061e-07,
"logits/chosen": -0.7764335870742798,
"logits/rejected": -0.7803974151611328,
"logps/chosen": -277.05242919921875,
"logps/ref_chosen": -267.97418212890625,
"logps/ref_rejected": -230.53062438964844,
"logps/rejected": -249.62867736816406,
"loss": 2.613,
"margin_dpo/margin_mean": 10.019798278808594,
"margin_dpo/margin_std": 26.797956466674805,
"step": 104
},
{
"epoch": 0.2198952879581152,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 20.68578338623047,
"fcm_dpo/q_t": 0.44931602478027344,
"grad_norm": 17.398113250732422,
"learning_rate": 4.792711016345321e-07,
"logits/chosen": -0.7766079306602478,
"logits/rejected": -0.7850036025047302,
"logps/chosen": -327.42333984375,
"logps/ref_chosen": -322.25482177734375,
"logps/ref_rejected": -279.02838134765625,
"logps/rejected": -304.8826904296875,
"loss": 2.4205,
"margin_dpo/margin_mean": 20.6857852935791,
"margin_dpo/margin_std": 27.77981185913086,
"step": 105
},
{
"epoch": 0.22198952879581152,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 12.525270462036133,
"fcm_dpo/q_t": 0.46937787532806396,
"grad_norm": 18.587173461914062,
"learning_rate": 4.785350472409791e-07,
"logits/chosen": -0.7576958537101746,
"logits/rejected": -0.7877755165100098,
"logps/chosen": -308.6112060546875,
"logps/ref_chosen": -296.15777587890625,
"logps/ref_rejected": -266.2711181640625,
"logps/rejected": -291.24981689453125,
"loss": 2.5755,
"margin_dpo/margin_mean": 12.525269508361816,
"margin_dpo/margin_std": 30.001962661743164,
"step": 106
},
{
"epoch": 0.22408376963350785,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 19.707744598388672,
"fcm_dpo/q_t": 0.45181214809417725,
"grad_norm": 19.11066436767578,
"learning_rate": 4.777867372064105e-07,
"logits/chosen": -0.7814912796020508,
"logits/rejected": -0.7764994502067566,
"logps/chosen": -311.3240966796875,
"logps/ref_chosen": -306.9963073730469,
"logps/ref_rejected": -296.8005065917969,
"logps/rejected": -320.8360595703125,
"loss": 2.4386,
"margin_dpo/margin_mean": 19.707744598388672,
"margin_dpo/margin_std": 28.364734649658203,
"step": 107
},
{
"epoch": 0.2261780104712042,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 18.243602752685547,
"fcm_dpo/q_t": 0.4558309316635132,
"grad_norm": 50.12049102783203,
"learning_rate": 4.770262116604223e-07,
"logits/chosen": -0.7771338820457458,
"logits/rejected": -0.7864300012588501,
"logps/chosen": -300.6669921875,
"logps/ref_chosen": -295.1526794433594,
"logps/ref_rejected": -235.9735565185547,
"logps/rejected": -259.73150634765625,
"loss": 2.4765,
"margin_dpo/margin_mean": 18.243602752685547,
"margin_dpo/margin_std": 31.952545166015625,
"step": 108
},
{
"epoch": 0.22827225130890053,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 20.236467361450195,
"fcm_dpo/q_t": 0.45114585757255554,
"grad_norm": 19.165874481201172,
"learning_rate": 4.7625351138769166e-07,
"logits/chosen": -0.8085035681724548,
"logits/rejected": -0.8088306188583374,
"logps/chosen": -334.3610534667969,
"logps/ref_chosen": -325.9247741699219,
"logps/ref_rejected": -279.1575927734375,
"logps/rejected": -307.8303527832031,
"loss": 2.4402,
"margin_dpo/margin_mean": 20.236469268798828,
"margin_dpo/margin_std": 32.261653900146484,
"step": 109
},
{
"epoch": 0.23036649214659685,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 19.133163452148438,
"fcm_dpo/q_t": 0.4537068009376526,
"grad_norm": 17.354257583618164,
"learning_rate": 4.75468677825789e-07,
"logits/chosen": -0.8018887639045715,
"logits/rejected": -0.7904523611068726,
"logps/chosen": -283.482666015625,
"logps/ref_chosen": -274.439208984375,
"logps/ref_rejected": -260.0518798828125,
"logps/rejected": -288.228515625,
"loss": 2.4666,
"margin_dpo/margin_mean": 19.133161544799805,
"margin_dpo/margin_std": 34.13288879394531,
"step": 110
},
{
"epoch": 0.2324607329842932,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 20.286800384521484,
"fcm_dpo/q_t": 0.4511919915676117,
"grad_norm": 19.255943298339844,
"learning_rate": 4.7467175306295647e-07,
"logits/chosen": -0.8372225761413574,
"logits/rejected": -0.8194422721862793,
"logps/chosen": -339.20458984375,
"logps/ref_chosen": -329.23614501953125,
"logps/ref_rejected": -287.83258056640625,
"logps/rejected": -318.0878601074219,
"loss": 2.4513,
"margin_dpo/margin_mean": 20.28679847717285,
"margin_dpo/margin_std": 35.69065856933594,
"step": 111
},
{
"epoch": 0.23455497382198953,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 13.170830726623535,
"fcm_dpo/q_t": 0.46836143732070923,
"grad_norm": 19.74009895324707,
"learning_rate": 4.7386277983585053e-07,
"logits/chosen": -0.7510491609573364,
"logits/rejected": -0.7747848033905029,
"logps/chosen": -272.5999450683594,
"logps/ref_chosen": -257.05938720703125,
"logps/ref_rejected": -272.9543762207031,
"logps/rejected": -301.665771484375,
"loss": 2.5921,
"margin_dpo/margin_mean": 13.170831680297852,
"margin_dpo/margin_std": 36.926422119140625,
"step": 112
},
{
"epoch": 0.23664921465968586,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 24.51295280456543,
"fcm_dpo/q_t": 0.4418681859970093,
"grad_norm": 20.760662078857422,
"learning_rate": 4.7304180152725024e-07,
"logits/chosen": -0.8299680948257446,
"logits/rejected": -0.8356962203979492,
"logps/chosen": -299.16558837890625,
"logps/ref_chosen": -286.0416564941406,
"logps/ref_rejected": -270.374267578125,
"logps/rejected": -308.0111083984375,
"loss": 2.3921,
"margin_dpo/margin_mean": 24.51295280456543,
"margin_dpo/margin_std": 40.263916015625,
"step": 113
},
{
"epoch": 0.2387434554973822,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 13.790729522705078,
"fcm_dpo/q_t": 0.4664270281791687,
"grad_norm": 20.009796142578125,
"learning_rate": 4.7220886216373085e-07,
"logits/chosen": -0.8483290672302246,
"logits/rejected": -0.8481042981147766,
"logps/chosen": -277.133056640625,
"logps/ref_chosen": -260.0084533691406,
"logps/ref_rejected": -246.668701171875,
"logps/rejected": -277.58404541015625,
"loss": 2.5741,
"margin_dpo/margin_mean": 13.790729522705078,
"margin_dpo/margin_std": 35.78166961669922,
"step": 114
},
{
"epoch": 0.24083769633507854,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 14.022125244140625,
"fcm_dpo/q_t": 0.4671180844306946,
"grad_norm": 22.126644134521484,
"learning_rate": 4.7136400641330245e-07,
"logits/chosen": -0.8374190330505371,
"logits/rejected": -0.8019670844078064,
"logps/chosen": -318.7716064453125,
"logps/ref_chosen": -299.4229736328125,
"logps/ref_rejected": -272.11407470703125,
"logps/rejected": -305.4848327636719,
"loss": 2.5819,
"margin_dpo/margin_mean": 14.022125244140625,
"margin_dpo/margin_std": 40.54826354980469,
"step": 115
},
{
"epoch": 0.24293193717277486,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 18.081724166870117,
"fcm_dpo/q_t": 0.45619040727615356,
"grad_norm": 19.613004684448242,
"learning_rate": 4.70507279583015e-07,
"logits/chosen": -0.870734453201294,
"logits/rejected": -0.8401482701301575,
"logps/chosen": -295.11480712890625,
"logps/ref_chosen": -279.2639465332031,
"logps/ref_rejected": -253.6208953857422,
"logps/rejected": -287.5534973144531,
"loss": 2.4919,
"margin_dpo/margin_mean": 18.081724166870117,
"margin_dpo/margin_std": 35.874794006347656,
"step": 116
},
{
"epoch": 0.2450261780104712,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 20.975980758666992,
"fcm_dpo/q_t": 0.4494273066520691,
"grad_norm": 21.376453399658203,
"learning_rate": 4.6963872761652834e-07,
"logits/chosen": -0.829878032207489,
"logits/rejected": -0.8307478427886963,
"logps/chosen": -278.62109375,
"logps/ref_chosen": -259.224853515625,
"logps/ref_rejected": -229.30487060546875,
"logps/rejected": -269.6770935058594,
"loss": 2.4392,
"margin_dpo/margin_mean": 20.975980758666992,
"margin_dpo/margin_std": 34.94779968261719,
"step": 117
},
{
"epoch": 0.24712041884816754,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 23.182437896728516,
"fcm_dpo/q_t": 0.4453861713409424,
"grad_norm": 22.829740524291992,
"learning_rate": 4.687583970916486e-07,
"logits/chosen": -0.8200262188911438,
"logits/rejected": -0.8091733455657959,
"logps/chosen": -293.0044250488281,
"logps/ref_chosen": -267.0707092285156,
"logps/ref_rejected": -272.744140625,
"logps/rejected": -321.8602600097656,
"loss": 2.4338,
"margin_dpo/margin_mean": 23.18243980407715,
"margin_dpo/margin_std": 44.550193786621094,
"step": 118
},
{
"epoch": 0.24921465968586387,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 17.070175170898438,
"fcm_dpo/q_t": 0.4598205089569092,
"grad_norm": 22.841718673706055,
"learning_rate": 4.6786633521783005e-07,
"logits/chosen": -0.8833033442497253,
"logits/rejected": -0.8812328577041626,
"logps/chosen": -356.4073486328125,
"logps/ref_chosen": -324.6766357421875,
"logps/ref_rejected": -306.02850341796875,
"logps/rejected": -354.829345703125,
"loss": 2.5419,
"margin_dpo/margin_mean": 17.070175170898438,
"margin_dpo/margin_std": 43.53057098388672,
"step": 119
},
{
"epoch": 0.2513089005235602,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 19.51913833618164,
"fcm_dpo/q_t": 0.45390617847442627,
"grad_norm": 20.96836280822754,
"learning_rate": 4.669625898336438e-07,
"logits/chosen": -0.8406351804733276,
"logits/rejected": -0.8485410809516907,
"logps/chosen": -343.9928894042969,
"logps/ref_chosen": -315.2617492675781,
"logps/ref_rejected": -265.3182067871094,
"logps/rejected": -313.5684814453125,
"loss": 2.4969,
"margin_dpo/margin_mean": 19.51913833618164,
"margin_dpo/margin_std": 43.91914749145508,
"step": 120
},
{
"epoch": 0.2534031413612565,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 13.95651912689209,
"fcm_dpo/q_t": 0.4672980308532715,
"grad_norm": 29.33723258972168,
"learning_rate": 4.6604720940421207e-07,
"logits/chosen": -0.8484371900558472,
"logits/rejected": -0.8586273789405823,
"logps/chosen": -256.35931396484375,
"logps/ref_chosen": -222.99610900878906,
"logps/ref_rejected": -226.92971801757812,
"logps/rejected": -274.24945068359375,
"loss": 2.5883,
"margin_dpo/margin_mean": 13.95651912689209,
"margin_dpo/margin_std": 41.100868225097656,
"step": 121
},
{
"epoch": 0.2554973821989529,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 19.7639102935791,
"fcm_dpo/q_t": 0.45342403650283813,
"grad_norm": 28.156707763671875,
"learning_rate": 4.651202430186092e-07,
"logits/chosen": -0.8983137607574463,
"logits/rejected": -0.865532398223877,
"logps/chosen": -310.1314392089844,
"logps/ref_chosen": -276.02630615234375,
"logps/ref_rejected": -277.97418212890625,
"logps/rejected": -331.84326171875,
"loss": 2.5094,
"margin_dpo/margin_mean": 19.7639102935791,
"margin_dpo/margin_std": 47.79970169067383,
"step": 122
},
{
"epoch": 0.25759162303664923,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 25.810714721679688,
"fcm_dpo/q_t": 0.4385852813720703,
"grad_norm": 24.411760330200195,
"learning_rate": 4.6418174038722924e-07,
"logits/chosen": -0.8424972891807556,
"logits/rejected": -0.8320512175559998,
"logps/chosen": -354.67022705078125,
"logps/ref_chosen": -328.15460205078125,
"logps/ref_rejected": -280.6913146972656,
"logps/rejected": -333.01763916015625,
"loss": 2.3849,
"margin_dpo/margin_mean": 25.810712814331055,
"margin_dpo/margin_std": 43.724998474121094,
"step": 123
},
{
"epoch": 0.25968586387434556,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 21.996456146240234,
"fcm_dpo/q_t": 0.44727516174316406,
"grad_norm": 26.579641342163086,
"learning_rate": 4.6323175183912023e-07,
"logits/chosen": -0.84192955493927,
"logits/rejected": -0.8101947903633118,
"logps/chosen": -303.3060302734375,
"logps/ref_chosen": -275.69622802734375,
"logps/ref_rejected": -225.3711700439453,
"logps/rejected": -274.9774475097656,
"loss": 2.4439,
"margin_dpo/margin_mean": 21.9964599609375,
"margin_dpo/margin_std": 41.46756362915039,
"step": 124
},
{
"epoch": 0.2617801047120419,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 20.08155632019043,
"fcm_dpo/q_t": 0.4534819722175598,
"grad_norm": 26.522098541259766,
"learning_rate": 4.6227032831928483e-07,
"logits/chosen": -0.8277649879455566,
"logits/rejected": -0.7954879999160767,
"logps/chosen": -307.00042724609375,
"logps/ref_chosen": -278.06976318359375,
"logps/ref_rejected": -265.64752197265625,
"logps/rejected": -314.65972900390625,
"loss": 2.5236,
"margin_dpo/margin_mean": 20.081558227539062,
"margin_dpo/margin_std": 52.180213928222656,
"step": 125
},
{
"epoch": 0.2638743455497382,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 22.623416900634766,
"fcm_dpo/q_t": 0.4464207887649536,
"grad_norm": 25.031295776367188,
"learning_rate": 4.612975213859487e-07,
"logits/chosen": -0.836571216583252,
"logits/rejected": -0.8457585573196411,
"logps/chosen": -346.5595703125,
"logps/ref_chosen": -321.3960876464844,
"logps/ref_rejected": -285.3716735839844,
"logps/rejected": -333.1585388183594,
"loss": 2.4507,
"margin_dpo/margin_mean": 22.623414993286133,
"margin_dpo/margin_std": 45.92791748046875,
"step": 126
},
{
"epoch": 0.26596858638743454,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 26.685638427734375,
"fcm_dpo/q_t": 0.43754297494888306,
"grad_norm": 24.699491500854492,
"learning_rate": 4.603133832077953e-07,
"logits/chosen": -0.896428108215332,
"logits/rejected": -0.8578190803527832,
"logps/chosen": -330.4049072265625,
"logps/ref_chosen": -306.55877685546875,
"logps/ref_rejected": -274.8651428222656,
"logps/rejected": -325.39691162109375,
"loss": 2.3924,
"margin_dpo/margin_mean": 26.68564224243164,
"margin_dpo/margin_std": 47.29706954956055,
"step": 127
},
{
"epoch": 0.2680628272251309,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 30.830581665039062,
"fcm_dpo/q_t": 0.4280052185058594,
"grad_norm": 24.923940658569336,
"learning_rate": 4.5931796656116837e-07,
"logits/chosen": -0.8046323657035828,
"logits/rejected": -0.7962044477462769,
"logps/chosen": -284.2278137207031,
"logps/ref_chosen": -265.3973693847656,
"logps/ref_rejected": -250.9737548828125,
"logps/rejected": -300.634765625,
"loss": 2.3295,
"margin_dpo/margin_mean": 30.830585479736328,
"margin_dpo/margin_std": 50.823123931884766,
"step": 128
},
{
"epoch": 0.27015706806282724,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 27.66098403930664,
"fcm_dpo/q_t": 0.4357224106788635,
"grad_norm": 23.75534439086914,
"learning_rate": 4.5831132482724193e-07,
"logits/chosen": -0.826598584651947,
"logits/rejected": -0.8240451812744141,
"logps/chosen": -323.3130798339844,
"logps/ref_chosen": -303.158447265625,
"logps/ref_rejected": -275.9891052246094,
"logps/rejected": -323.8047180175781,
"loss": 2.3817,
"margin_dpo/margin_mean": 27.66098403930664,
"margin_dpo/margin_std": 50.59884262084961,
"step": 129
},
{
"epoch": 0.27225130890052357,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 28.019359588623047,
"fcm_dpo/q_t": 0.4347432255744934,
"grad_norm": 28.133453369140625,
"learning_rate": 4.5729351198915705e-07,
"logits/chosen": -0.8189080953598022,
"logits/rejected": -0.8522607684135437,
"logps/chosen": -310.00341796875,
"logps/ref_chosen": -286.4073486328125,
"logps/ref_rejected": -294.384765625,
"logps/rejected": -346.000244140625,
"loss": 2.3759,
"margin_dpo/margin_mean": 28.01936149597168,
"margin_dpo/margin_std": 50.8518180847168,
"step": 130
},
{
"epoch": 0.2743455497382199,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 22.60095977783203,
"fcm_dpo/q_t": 0.4474557340145111,
"grad_norm": 31.637062072753906,
"learning_rate": 4.5626458262912735e-07,
"logits/chosen": -0.8900598883628845,
"logits/rejected": -0.8440274000167847,
"logps/chosen": -339.7086181640625,
"logps/ref_chosen": -311.5650634765625,
"logps/ref_rejected": -291.6243591308594,
"logps/rejected": -342.36883544921875,
"loss": 2.493,
"margin_dpo/margin_mean": 22.600963592529297,
"margin_dpo/margin_std": 54.707794189453125,
"step": 131
},
{
"epoch": 0.2764397905759162,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 31.54349136352539,
"fcm_dpo/q_t": 0.4277009665966034,
"grad_norm": 33.08030700683594,
"learning_rate": 4.5522459192551166e-07,
"logits/chosen": -0.8386721014976501,
"logits/rejected": -0.827111005783081,
"logps/chosen": -294.23040771484375,
"logps/ref_chosen": -270.0818176269531,
"logps/ref_rejected": -284.31671142578125,
"logps/rejected": -340.0087585449219,
"loss": 2.3453,
"margin_dpo/margin_mean": 31.54349136352539,
"margin_dpo/margin_std": 55.689910888671875,
"step": 132
},
{
"epoch": 0.27853403141361255,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 28.924863815307617,
"fcm_dpo/q_t": 0.4328790307044983,
"grad_norm": 22.327438354492188,
"learning_rate": 4.541735956498554e-07,
"logits/chosen": -0.8723856210708618,
"logits/rejected": -0.8711926341056824,
"logps/chosen": -313.7200012207031,
"logps/ref_chosen": -285.621337890625,
"logps/ref_rejected": -251.20909118652344,
"logps/rejected": -308.23260498046875,
"loss": 2.3669,
"margin_dpo/margin_mean": 28.924861907958984,
"margin_dpo/margin_std": 51.9771614074707,
"step": 133
},
{
"epoch": 0.2806282722513089,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 21.624433517456055,
"fcm_dpo/q_t": 0.44887590408325195,
"grad_norm": 25.839866638183594,
"learning_rate": 4.5311165016389914e-07,
"logits/chosen": -0.8534168004989624,
"logits/rejected": -0.8518850803375244,
"logps/chosen": -360.07989501953125,
"logps/ref_chosen": -318.92083740234375,
"logps/ref_rejected": -293.18804931640625,
"logps/rejected": -355.9715270996094,
"loss": 2.4722,
"margin_dpo/margin_mean": 21.624431610107422,
"margin_dpo/margin_std": 46.410152435302734,
"step": 134
},
{
"epoch": 0.28272251308900526,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 25.612655639648438,
"fcm_dpo/q_t": 0.43953537940979004,
"grad_norm": 30.349666595458984,
"learning_rate": 4.520388124165564e-07,
"logits/chosen": -0.7702710628509521,
"logits/rejected": -0.8013277649879456,
"logps/chosen": -331.9397277832031,
"logps/ref_chosen": -292.82171630859375,
"logps/ref_rejected": -269.2796630859375,
"logps/rejected": -334.0102844238281,
"loss": 2.4326,
"margin_dpo/margin_mean": 25.612655639648438,
"margin_dpo/margin_std": 52.38396453857422,
"step": 135
},
{
"epoch": 0.2848167539267016,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 28.22667694091797,
"fcm_dpo/q_t": 0.4354791045188904,
"grad_norm": 38.085662841796875,
"learning_rate": 4.5095513994085974e-07,
"logits/chosen": -0.8200618028640747,
"logits/rejected": -0.8148263096809387,
"logps/chosen": -314.89459228515625,
"logps/ref_chosen": -272.8525390625,
"logps/ref_rejected": -252.68370056152344,
"logps/rejected": -322.9524841308594,
"loss": 2.4093,
"margin_dpo/margin_mean": 28.2266788482666,
"margin_dpo/margin_std": 57.474273681640625,
"step": 136
},
{
"epoch": 0.2869109947643979,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 25.167770385742188,
"fcm_dpo/q_t": 0.44242584705352783,
"grad_norm": 33.27935028076172,
"learning_rate": 4.498606908508753e-07,
"logits/chosen": -0.861386775970459,
"logits/rejected": -0.847549557685852,
"logps/chosen": -347.38037109375,
"logps/ref_chosen": -300.75225830078125,
"logps/ref_rejected": -286.1944274902344,
"logps/rejected": -357.99029541015625,
"loss": 2.4438,
"margin_dpo/margin_mean": 25.167770385742188,
"margin_dpo/margin_std": 54.293479919433594,
"step": 137
},
{
"epoch": 0.28900523560209423,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 29.926258087158203,
"fcm_dpo/q_t": 0.4325460195541382,
"grad_norm": 28.63718605041504,
"learning_rate": 4.487555238385862e-07,
"logits/chosen": -0.795958399772644,
"logits/rejected": -0.7795577645301819,
"logps/chosen": -332.85772705078125,
"logps/ref_chosen": -288.93701171875,
"logps/ref_rejected": -263.7076416015625,
"logps/rejected": -337.55462646484375,
"loss": 2.409,
"margin_dpo/margin_mean": 29.92625617980957,
"margin_dpo/margin_std": 63.08027648925781,
"step": 138
},
{
"epoch": 0.29109947643979056,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 20.54540252685547,
"fcm_dpo/q_t": 0.4529183506965637,
"grad_norm": 37.236663818359375,
"learning_rate": 4.476396981707453e-07,
"logits/chosen": -0.8100706934928894,
"logits/rejected": -0.8342887163162231,
"logps/chosen": -310.41162109375,
"logps/ref_chosen": -270.0443115234375,
"logps/ref_rejected": -267.319091796875,
"logps/rejected": -328.2318115234375,
"loss": 2.5317,
"margin_dpo/margin_mean": 20.5454044342041,
"margin_dpo/margin_std": 54.45534896850586,
"step": 139
},
{
"epoch": 0.2931937172774869,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 31.949995040893555,
"fcm_dpo/q_t": 0.42545801401138306,
"grad_norm": 31.992462158203125,
"learning_rate": 4.4651327368569684e-07,
"logits/chosen": -0.8931283950805664,
"logits/rejected": -0.8660533428192139,
"logps/chosen": -320.27825927734375,
"logps/ref_chosen": -282.95556640625,
"logps/ref_rejected": -251.17034912109375,
"logps/rejected": -320.44305419921875,
"loss": 2.3131,
"margin_dpo/margin_mean": 31.949995040893555,
"margin_dpo/margin_std": 51.55928039550781,
"step": 140
},
{
"epoch": 0.29528795811518327,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 33.247337341308594,
"fcm_dpo/q_t": 0.42268094420433044,
"grad_norm": 31.17916488647461,
"learning_rate": 4.453763107901675e-07,
"logits/chosen": -0.8124111294746399,
"logits/rejected": -0.8089026808738708,
"logps/chosen": -331.6009521484375,
"logps/ref_chosen": -296.3001708984375,
"logps/ref_rejected": -279.85089111328125,
"logps/rejected": -348.3990173339844,
"loss": 2.334,
"margin_dpo/margin_mean": 33.247337341308594,
"margin_dpo/margin_std": 59.35110855102539,
"step": 141
},
{
"epoch": 0.2973821989528796,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 27.26082992553711,
"fcm_dpo/q_t": 0.43761512637138367,
"grad_norm": 31.09318733215332,
"learning_rate": 4.4422887045602674e-07,
"logits/chosen": -0.8420273661613464,
"logits/rejected": -0.8427264094352722,
"logps/chosen": -336.1419982910156,
"logps/ref_chosen": -300.56585693359375,
"logps/ref_rejected": -231.4297332763672,
"logps/rejected": -294.2667236328125,
"loss": 2.442,
"margin_dpo/margin_mean": 27.260831832885742,
"margin_dpo/margin_std": 60.99100112915039,
"step": 142
},
{
"epoch": 0.2994764397905759,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 32.61566925048828,
"fcm_dpo/q_t": 0.42502930760383606,
"grad_norm": 29.233285903930664,
"learning_rate": 4.4307101421701755e-07,
"logits/chosen": -0.8273440003395081,
"logits/rejected": -0.8075336217880249,
"logps/chosen": -332.0931396484375,
"logps/ref_chosen": -296.73236083984375,
"logps/ref_rejected": -266.4532470703125,
"logps/rejected": -334.4296875,
"loss": 2.3135,
"margin_dpo/margin_mean": 32.61566925048828,
"margin_dpo/margin_std": 53.791358947753906,
"step": 143
},
{
"epoch": 0.30157068062827225,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 26.467735290527344,
"fcm_dpo/q_t": 0.4402278661727905,
"grad_norm": 27.194307327270508,
"learning_rate": 4.419028041654559e-07,
"logits/chosen": -0.884995698928833,
"logits/rejected": -0.873570442199707,
"logps/chosen": -334.0097351074219,
"logps/ref_chosen": -298.843994140625,
"logps/ref_rejected": -266.120849609375,
"logps/rejected": -327.75433349609375,
"loss": 2.4446,
"margin_dpo/margin_mean": 26.467735290527344,
"margin_dpo/margin_std": 58.89242172241211,
"step": 144
},
{
"epoch": 0.3036649214659686,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 34.09653091430664,
"fcm_dpo/q_t": 0.4219384789466858,
"grad_norm": 26.732824325561523,
"learning_rate": 4.4072430294890166e-07,
"logits/chosen": -0.8661454916000366,
"logits/rejected": -0.8702672719955444,
"logps/chosen": -306.3210144042969,
"logps/ref_chosen": -275.75286865234375,
"logps/ref_rejected": -214.74807739257812,
"logps/rejected": -279.41278076171875,
"loss": 2.2969,
"margin_dpo/margin_mean": 34.096527099609375,
"margin_dpo/margin_std": 55.289180755615234,
"step": 145
},
{
"epoch": 0.3057591623036649,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 33.99929428100586,
"fcm_dpo/q_t": 0.42257657647132874,
"grad_norm": 28.107284545898438,
"learning_rate": 4.395355737667985e-07,
"logits/chosen": -0.8498966693878174,
"logits/rejected": -0.8444647789001465,
"logps/chosen": -315.95745849609375,
"logps/ref_chosen": -277.09820556640625,
"logps/ref_rejected": -265.41339111328125,
"logps/rejected": -338.27191162109375,
"loss": 2.3044,
"margin_dpo/margin_mean": 33.99929428100586,
"margin_dpo/margin_std": 54.75486755371094,
"step": 146
},
{
"epoch": 0.3078534031413613,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 26.186552047729492,
"fcm_dpo/q_t": 0.4397900700569153,
"grad_norm": 30.25438690185547,
"learning_rate": 4.3833668036708483e-07,
"logits/chosen": -0.8417080640792847,
"logits/rejected": -0.8361557126045227,
"logps/chosen": -332.2541198730469,
"logps/ref_chosen": -291.4185791015625,
"logps/ref_rejected": -253.44476318359375,
"logps/rejected": -320.46685791015625,
"loss": 2.4564,
"margin_dpo/margin_mean": 26.18655014038086,
"margin_dpo/margin_std": 57.984493255615234,
"step": 147
},
{
"epoch": 0.3099476439790576,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 29.048696517944336,
"fcm_dpo/q_t": 0.43610677123069763,
"grad_norm": 27.547760009765625,
"learning_rate": 4.3712768704277524e-07,
"logits/chosen": -0.8931534290313721,
"logits/rejected": -0.8946137428283691,
"logps/chosen": -273.2369384765625,
"logps/ref_chosen": -236.74850463867188,
"logps/ref_rejected": -231.46395874023438,
"logps/rejected": -297.0010986328125,
"loss": 2.4266,
"margin_dpo/margin_mean": 29.048696517944336,
"margin_dpo/margin_std": 64.64740753173828,
"step": 148
},
{
"epoch": 0.31204188481675393,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 37.39274597167969,
"fcm_dpo/q_t": 0.4143550395965576,
"grad_norm": 29.936614990234375,
"learning_rate": 4.3590865862851263e-07,
"logits/chosen": -0.8574589490890503,
"logits/rejected": -0.8457263708114624,
"logps/chosen": -363.8849792480469,
"logps/ref_chosen": -319.9284973144531,
"logps/ref_rejected": -308.2037048339844,
"logps/rejected": -389.5529479980469,
"loss": 2.234,
"margin_dpo/margin_mean": 37.39274597167969,
"margin_dpo/margin_std": 54.14255142211914,
"step": 149
},
{
"epoch": 0.31413612565445026,
"fcm_dpo/beta": 0.009999999776482582,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 30.965776443481445,
"fcm_dpo/q_t": 0.42867594957351685,
"grad_norm": 29.833314895629883,
"learning_rate": 4.346796604970912e-07,
"logits/chosen": -0.8480283617973328,
"logits/rejected": -0.8364046812057495,
"logps/chosen": -325.42901611328125,
"logps/ref_chosen": -276.3182067871094,
"logps/ref_rejected": -273.0213317871094,
"logps/rejected": -353.097900390625,
"loss": 2.3588,
"margin_dpo/margin_mean": 30.965778350830078,
"margin_dpo/margin_std": 57.076961517333984,
"step": 150
},
{
"epoch": 0.3162303664921466,
"fcm_dpo/beta": 0.01013832539319992,
"fcm_dpo/delta": 0.13737793266773224,
"fcm_dpo/margin": 46.26220703125,
"fcm_dpo/q_t": 0.3957858085632324,
"grad_norm": 36.25398635864258,
"learning_rate": 4.3344075855595097e-07,
"logits/chosen": -0.8818316459655762,
"logits/rejected": -0.8756142854690552,
"logps/chosen": -344.2825927734375,
"logps/ref_chosen": -297.31280517578125,
"logps/ref_rejected": -266.0942687988281,
"logps/rejected": -359.3262939453125,
"loss": 2.127,
"margin_dpo/margin_mean": 46.26220703125,
"margin_dpo/margin_std": 61.039520263671875,
"step": 151
},
{
"epoch": 0.3183246073298429,
"fcm_dpo/beta": 0.01013832539319992,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 38.702327728271484,
"fcm_dpo/q_t": 0.41483741998672485,
"grad_norm": 28.594280242919922,
"learning_rate": 4.3219201924364323e-07,
"logits/chosen": -0.8768357038497925,
"logits/rejected": -0.8742275238037109,
"logps/chosen": -313.04766845703125,
"logps/ref_chosen": -270.24700927734375,
"logps/ref_rejected": -269.7776184082031,
"logps/rejected": -351.28057861328125,
"loss": 2.2879,
"margin_dpo/margin_mean": 38.702327728271484,
"margin_dpo/margin_std": 66.79115295410156,
"step": 152
},
{
"epoch": 0.3204188481675393,
"fcm_dpo/beta": 0.010181351564824581,
"fcm_dpo/delta": 0.042349446564912796,
"fcm_dpo/margin": 55.004207611083984,
"fcm_dpo/q_t": 0.3775664269924164,
"grad_norm": 31.99171257019043,
"learning_rate": 4.309335095262675e-07,
"logits/chosen": -0.8659894466400146,
"logits/rejected": -0.8552813529968262,
"logps/chosen": -325.279052734375,
"logps/ref_chosen": -273.779052734375,
"logps/ref_rejected": -280.951904296875,
"logps/rejected": -387.4560852050781,
"loss": 1.9965,
"margin_dpo/margin_mean": 55.00421142578125,
"margin_dpo/margin_std": 63.94519805908203,
"step": 153
},
{
"epoch": 0.3225130890052356,
"fcm_dpo/beta": 0.010181351564824581,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 43.047603607177734,
"fcm_dpo/q_t": 0.40552785992622375,
"grad_norm": 35.714759826660156,
"learning_rate": 4.2966529689388064e-07,
"logits/chosen": -0.8902490139007568,
"logits/rejected": -0.8725008964538574,
"logps/chosen": -343.05712890625,
"logps/ref_chosen": -289.9031982421875,
"logps/ref_rejected": -261.51239013671875,
"logps/rejected": -357.7139587402344,
"loss": 2.2594,
"margin_dpo/margin_mean": 43.047603607177734,
"margin_dpo/margin_std": 76.23304748535156,
"step": 154
},
{
"epoch": 0.32460732984293195,
"fcm_dpo/beta": 0.010181351564824581,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 37.05428695678711,
"fcm_dpo/q_t": 0.4198029041290283,
"grad_norm": 40.28274154663086,
"learning_rate": 4.2838744935687716e-07,
"logits/chosen": -0.8533347249031067,
"logits/rejected": -0.8502396941184998,
"logps/chosen": -345.1561279296875,
"logps/ref_chosen": -285.8611755371094,
"logps/ref_rejected": -300.1239013671875,
"logps/rejected": -396.47314453125,
"loss": 2.3237,
"margin_dpo/margin_mean": 37.054283142089844,
"margin_dpo/margin_std": 72.10279083251953,
"step": 155
},
{
"epoch": 0.3267015706806283,
"fcm_dpo/beta": 0.010245493613183498,
"fcm_dpo/delta": 0.0628020390868187,
"fcm_dpo/margin": 52.76293182373047,
"fcm_dpo/q_t": 0.3856203556060791,
"grad_norm": 40.06550598144531,
"learning_rate": 4.271000354423425e-07,
"logits/chosen": -0.8683021068572998,
"logits/rejected": -0.8649327754974365,
"logps/chosen": -333.6725769042969,
"logps/ref_chosen": -279.0354919433594,
"logps/ref_rejected": -244.22702026367188,
"logps/rejected": -351.62701416015625,
"loss": 2.1523,
"margin_dpo/margin_mean": 52.76293182373047,
"margin_dpo/margin_std": 81.59442901611328,
"step": 156
},
{
"epoch": 0.3287958115183246,
"fcm_dpo/beta": 0.010245493613183498,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 40.05835723876953,
"fcm_dpo/q_t": 0.4097440838813782,
"grad_norm": 38.659027099609375,
"learning_rate": 4.258031241903777e-07,
"logits/chosen": -0.9263207912445068,
"logits/rejected": -0.9254261255264282,
"logps/chosen": -329.6526184082031,
"logps/ref_chosen": -270.830322265625,
"logps/ref_rejected": -259.0738830566406,
"logps/rejected": -357.95458984375,
"loss": 2.2783,
"margin_dpo/margin_mean": 40.05835723876953,
"margin_dpo/margin_std": 70.69584655761719,
"step": 157
},
{
"epoch": 0.3308900523560209,
"fcm_dpo/beta": 0.010378586128354073,
"fcm_dpo/delta": 0.12906675040721893,
"fcm_dpo/margin": 45.96491622924805,
"fcm_dpo/q_t": 0.3958016335964203,
"grad_norm": 35.829532623291016,
"learning_rate": 4.2449678515039743e-07,
"logits/chosen": -0.852488100528717,
"logits/rejected": -0.8409570455551147,
"logps/chosen": -347.9700927734375,
"logps/ref_chosen": -289.9663391113281,
"logps/ref_rejected": -271.34625244140625,
"logps/rejected": -375.31494140625,
"loss": 2.1911,
"margin_dpo/margin_mean": 45.96491622924805,
"margin_dpo/margin_std": 71.07288360595703,
"step": 158
},
{
"epoch": 0.33298429319371725,
"fcm_dpo/beta": 0.010378586128354073,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 28.07744026184082,
"fcm_dpo/q_t": 0.43824782967567444,
"grad_norm": 53.52383041381836,
"learning_rate": 4.2318108837739986e-07,
"logits/chosen": -0.9305505752563477,
"logits/rejected": -0.8912345170974731,
"logps/chosen": -383.9658203125,
"logps/ref_chosen": -321.37835693359375,
"logps/ref_rejected": -250.455810546875,
"logps/rejected": -341.1207275390625,
"loss": 2.5258,
"margin_dpo/margin_mean": 28.077438354492188,
"margin_dpo/margin_std": 75.05416107177734,
"step": 159
},
{
"epoch": 0.33507853403141363,
"fcm_dpo/beta": 0.010454267263412476,
"fcm_dpo/delta": 0.07265590876340866,
"fcm_dpo/margin": 50.81078338623047,
"fcm_dpo/q_t": 0.3829062581062317,
"grad_norm": 32.27238082885742,
"learning_rate": 4.218561044282098e-07,
"logits/chosen": -0.8754724264144897,
"logits/rejected": -0.8799326419830322,
"logps/chosen": -326.27581787109375,
"logps/ref_chosen": -276.28350830078125,
"logps/ref_rejected": -262.7477722167969,
"logps/rejected": -363.5508728027344,
"loss": 2.0335,
"margin_dpo/margin_mean": 50.810787200927734,
"margin_dpo/margin_std": 60.55773162841797,
"step": 160
},
{
"epoch": 0.33717277486910996,
"fcm_dpo/beta": 0.010454267263412476,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 44.55485153198242,
"fcm_dpo/q_t": 0.4016062617301941,
"grad_norm": 44.253787994384766,
"learning_rate": 4.2052190435769554e-07,
"logits/chosen": -0.8832786679267883,
"logits/rejected": -0.8662618398666382,
"logps/chosen": -369.86419677734375,
"logps/ref_chosen": -310.4927978515625,
"logps/ref_rejected": -250.25247192382812,
"logps/rejected": -354.1787414550781,
"loss": 2.239,
"margin_dpo/margin_mean": 44.55485153198242,
"margin_dpo/margin_std": 77.50798797607422,
"step": 161
},
{
"epoch": 0.3392670157068063,
"fcm_dpo/beta": 0.010454267263412476,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 41.5074462890625,
"fcm_dpo/q_t": 0.4050026535987854,
"grad_norm": 32.70212173461914,
"learning_rate": 4.1917855971495763e-07,
"logits/chosen": -0.8605970144271851,
"logits/rejected": -0.855280876159668,
"logps/chosen": -349.3827209472656,
"logps/ref_chosen": -296.1104736328125,
"logps/ref_rejected": -253.4334716796875,
"logps/rejected": -348.21319580078125,
"loss": 2.2482,
"margin_dpo/margin_mean": 41.5074462890625,
"margin_dpo/margin_std": 70.52921295166016,
"step": 162
},
{
"epoch": 0.3413612565445026,
"fcm_dpo/beta": 0.01056086365133524,
"fcm_dpo/delta": 0.10144808888435364,
"fcm_dpo/margin": 47.6888427734375,
"fcm_dpo/q_t": 0.38962239027023315,
"grad_norm": 39.535457611083984,
"learning_rate": 4.1782614253949255e-07,
"logits/chosen": -0.8874891400337219,
"logits/rejected": -0.8854560256004333,
"logps/chosen": -347.2881774902344,
"logps/ref_chosen": -293.4998779296875,
"logps/ref_rejected": -266.710693359375,
"logps/rejected": -368.18780517578125,
"loss": 2.1237,
"margin_dpo/margin_mean": 47.6888427734375,
"margin_dpo/margin_std": 66.00093841552734,
"step": 163
},
{
"epoch": 0.34345549738219894,
"fcm_dpo/beta": 0.01056086365133524,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 42.86344909667969,
"fcm_dpo/q_t": 0.40378016233444214,
"grad_norm": 38.552513122558594,
"learning_rate": 4.164647253573289e-07,
"logits/chosen": -0.8534321188926697,
"logits/rejected": -0.8655048608779907,
"logps/chosen": -331.81146240234375,
"logps/ref_chosen": -267.04949951171875,
"logps/ref_rejected": -215.97865295410156,
"logps/rejected": -323.6040344238281,
"loss": 2.253,
"margin_dpo/margin_mean": 42.86344909667969,
"margin_dpo/margin_std": 74.0870590209961,
"step": 164
},
{
"epoch": 0.34554973821989526,
"fcm_dpo/beta": 0.01056086365133524,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 30.588369369506836,
"fcm_dpo/q_t": 0.4285372495651245,
"grad_norm": 46.66164779663086,
"learning_rate": 4.1509438117713863e-07,
"logits/chosen": -0.8580479025840759,
"logits/rejected": -0.8378003835678101,
"logps/chosen": -329.509521484375,
"logps/ref_chosen": -278.0614929199219,
"logps/ref_rejected": -260.427734375,
"logps/rejected": -342.464111328125,
"loss": 2.3768,
"margin_dpo/margin_mean": 30.588369369506836,
"margin_dpo/margin_std": 59.47747802734375,
"step": 165
},
{
"epoch": 0.34764397905759165,
"fcm_dpo/beta": 0.01056086365133524,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 37.41815948486328,
"fcm_dpo/q_t": 0.41559290885925293,
"grad_norm": 49.696250915527344,
"learning_rate": 4.137151834863213e-07,
"logits/chosen": -0.8159278631210327,
"logits/rejected": -0.7940029501914978,
"logps/chosen": -321.7041931152344,
"logps/ref_chosen": -275.94903564453125,
"logps/ref_rejected": -232.13490295410156,
"logps/rejected": -315.3082275390625,
"loss": 2.3167,
"margin_dpo/margin_mean": 37.41815948486328,
"margin_dpo/margin_std": 71.07618713378906,
"step": 166
},
{
"epoch": 0.34973821989528797,
"fcm_dpo/beta": 0.01056086365133524,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 43.08562088012695,
"fcm_dpo/q_t": 0.4000244438648224,
"grad_norm": 43.15227508544922,
"learning_rate": 4.123272062470633e-07,
"logits/chosen": -0.8272606134414673,
"logits/rejected": -0.8182485699653625,
"logps/chosen": -335.5617980957031,
"logps/ref_chosen": -280.5513916015625,
"logps/ref_rejected": -255.29434204101562,
"logps/rejected": -353.39031982421875,
"loss": 2.2412,
"margin_dpo/margin_mean": 43.08562088012695,
"margin_dpo/margin_std": 73.9297103881836,
"step": 167
},
{
"epoch": 0.3518324607329843,
"fcm_dpo/beta": 0.010669088922441006,
"fcm_dpo/delta": 0.10195599496364594,
"fcm_dpo/margin": 47.159400939941406,
"fcm_dpo/q_t": 0.3892422914505005,
"grad_norm": 81.89006042480469,
"learning_rate": 4.1093052389237174e-07,
"logits/chosen": -0.7965179681777954,
"logits/rejected": -0.7800202369689941,
"logps/chosen": -373.31292724609375,
"logps/ref_chosen": -315.7982177734375,
"logps/ref_rejected": -291.4696350097656,
"logps/rejected": -396.14373779296875,
"loss": 2.1589,
"margin_dpo/margin_mean": 47.15939712524414,
"margin_dpo/margin_std": 70.53857421875,
"step": 168
},
{
"epoch": 0.3539267015706806,
"fcm_dpo/beta": 0.010686170309782028,
"fcm_dpo/delta": 0.01599729433655739,
"fcm_dpo/margin": 54.737823486328125,
"fcm_dpo/q_t": 0.37534353137016296,
"grad_norm": 48.01166915893555,
"learning_rate": 4.0952521132208267e-07,
"logits/chosen": -0.798102617263794,
"logits/rejected": -0.8081264495849609,
"logps/chosen": -319.39306640625,
"logps/ref_chosen": -261.06427001953125,
"logps/ref_rejected": -235.4066162109375,
"logps/rejected": -348.4732666015625,
"loss": 2.0334,
"margin_dpo/margin_mean": 54.737823486328125,
"margin_dpo/margin_std": 71.47173309326172,
"step": 169
},
{
"epoch": 0.35602094240837695,
"fcm_dpo/beta": 0.010802491568028927,
"fcm_dpo/delta": 0.10826356709003448,
"fcm_dpo/margin": 46.016151428222656,
"fcm_dpo/q_t": 0.3984832763671875,
"grad_norm": 43.01882553100586,
"learning_rate": 4.081113438988443e-07,
"logits/chosen": -0.7814924120903015,
"logits/rejected": -0.7822093963623047,
"logps/chosen": -368.6761779785156,
"logps/ref_chosen": -308.96722412109375,
"logps/ref_rejected": -263.8489685058594,
"logps/rejected": -369.5740966796875,
"loss": 2.2589,
"margin_dpo/margin_mean": 46.016151428222656,
"margin_dpo/margin_std": 85.33642578125,
"step": 170
},
{
"epoch": 0.3581151832460733,
"fcm_dpo/beta": 0.010814553126692772,
"fcm_dpo/delta": 0.011159293353557587,
"fcm_dpo/margin": 54.50971221923828,
"fcm_dpo/q_t": 0.37487342953681946,
"grad_norm": 54.44708251953125,
"learning_rate": 4.0668899744407567e-07,
"logits/chosen": -0.796809196472168,
"logits/rejected": -0.812903881072998,
"logps/chosen": -314.2414855957031,
"logps/ref_chosen": -258.8890380859375,
"logps/ref_rejected": -262.1927490234375,
"logps/rejected": -372.054931640625,
"loss": 2.0501,
"margin_dpo/margin_mean": 54.50971221923828,
"margin_dpo/margin_std": 73.22706604003906,
"step": 171
},
{
"epoch": 0.36020942408376966,
"fcm_dpo/beta": 0.010814553126692772,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 31.951438903808594,
"fcm_dpo/q_t": 0.4231824278831482,
"grad_norm": 47.22236251831055,
"learning_rate": 4.0525824823390043e-07,
"logits/chosen": -0.8318301439285278,
"logits/rejected": -0.8481395244598389,
"logps/chosen": -399.9178466796875,
"logps/ref_chosen": -339.0223388671875,
"logps/ref_rejected": -295.78607177734375,
"logps/rejected": -388.6329650878906,
"loss": 2.4217,
"margin_dpo/margin_mean": 31.951438903808594,
"margin_dpo/margin_std": 71.80816650390625,
"step": 172
},
{
"epoch": 0.362303664921466,
"fcm_dpo/beta": 0.010814553126692772,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 42.441490173339844,
"fcm_dpo/q_t": 0.404560923576355,
"grad_norm": 56.89242172241211,
"learning_rate": 4.0381917299505686e-07,
"logits/chosen": -0.8460440635681152,
"logits/rejected": -0.8457463979721069,
"logps/chosen": -358.222412109375,
"logps/ref_chosen": -300.1114501953125,
"logps/ref_rejected": -273.78460693359375,
"logps/rejected": -374.3370361328125,
"loss": 2.2602,
"margin_dpo/margin_mean": 42.441490173339844,
"margin_dpo/margin_std": 75.01513671875,
"step": 173
},
{
"epoch": 0.3643979057591623,
"fcm_dpo/beta": 0.010905003175139427,
"fcm_dpo/delta": 0.0832894966006279,
"fcm_dpo/margin": 47.77918243408203,
"fcm_dpo/q_t": 0.38450849056243896,
"grad_norm": 47.33632278442383,
"learning_rate": 4.0237184890078243e-07,
"logits/chosen": -0.8236465454101562,
"logits/rejected": -0.8156465291976929,
"logps/chosen": -397.7867431640625,
"logps/ref_chosen": -335.0538635253906,
"logps/ref_rejected": -257.46697998046875,
"logps/rejected": -367.9790344238281,
"loss": 2.1145,
"margin_dpo/margin_mean": 47.77918243408203,
"margin_dpo/margin_std": 67.31674194335938,
"step": 174
},
{
"epoch": 0.36649214659685864,
"fcm_dpo/beta": 0.010905003175139427,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 41.846221923828125,
"fcm_dpo/q_t": 0.40785056352615356,
"grad_norm": 43.924095153808594,
"learning_rate": 4.00916353566676e-07,
"logits/chosen": -0.832136869430542,
"logits/rejected": -0.8334869146347046,
"logps/chosen": -359.1160888671875,
"logps/ref_chosen": -284.39556884765625,
"logps/ref_rejected": -283.3745422363281,
"logps/rejected": -399.94122314453125,
"loss": 2.2915,
"margin_dpo/margin_mean": 41.846221923828125,
"margin_dpo/margin_std": 80.6987075805664,
"step": 175
},
{
"epoch": 0.36858638743455496,
"fcm_dpo/beta": 0.010905003175139427,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 33.758182525634766,
"fcm_dpo/q_t": 0.4228512942790985,
"grad_norm": 50.57489776611328,
"learning_rate": 3.994527650465352e-07,
"logits/chosen": -0.8025520443916321,
"logits/rejected": -0.8136235475540161,
"logps/chosen": -325.7782287597656,
"logps/ref_chosen": -251.81277465820312,
"logps/ref_rejected": -242.05081176757812,
"logps/rejected": -349.7744140625,
"loss": 2.4649,
"margin_dpo/margin_mean": 33.758182525634766,
"margin_dpo/margin_std": 82.78248596191406,
"step": 176
},
{
"epoch": 0.3706806282722513,
"fcm_dpo/beta": 0.010905003175139427,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 34.45179748535156,
"fcm_dpo/q_t": 0.41915345191955566,
"grad_norm": 58.265846252441406,
"learning_rate": 3.979811618281705e-07,
"logits/chosen": -0.8841328620910645,
"logits/rejected": -0.858696460723877,
"logps/chosen": -380.12762451171875,
"logps/ref_chosen": -298.64642333984375,
"logps/ref_rejected": -295.66534423828125,
"logps/rejected": -411.5983581542969,
"loss": 2.4665,
"margin_dpo/margin_mean": 34.4517936706543,
"margin_dpo/margin_std": 79.73922729492188,
"step": 177
},
{
"epoch": 0.37277486910994767,
"fcm_dpo/beta": 0.010979030281305313,
"fcm_dpo/delta": 0.06765389442443848,
"fcm_dpo/margin": 48.81668472290039,
"fcm_dpo/q_t": 0.3905832767486572,
"grad_norm": 47.875335693359375,
"learning_rate": 3.9650162282919654e-07,
"logits/chosen": -0.8143019676208496,
"logits/rejected": -0.8102011680603027,
"logps/chosen": -352.3527526855469,
"logps/ref_chosen": -286.2576599121094,
"logps/ref_rejected": -243.97488403320312,
"logps/rejected": -358.88665771484375,
"loss": 2.195,
"margin_dpo/margin_mean": 48.81668472290039,
"margin_dpo/margin_std": 81.74942016601562,
"step": 178
},
{
"epoch": 0.374869109947644,
"fcm_dpo/beta": 0.010979030281305313,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 43.20061492919922,
"fcm_dpo/q_t": 0.4006160795688629,
"grad_norm": 51.75831604003906,
"learning_rate": 3.9501422739279953e-07,
"logits/chosen": -0.7910268306732178,
"logits/rejected": -0.7968552112579346,
"logps/chosen": -326.4940490722656,
"logps/ref_chosen": -259.7370910644531,
"logps/ref_rejected": -277.8829040527344,
"logps/rejected": -387.84051513671875,
"loss": 2.2837,
"margin_dpo/margin_mean": 43.20061492919922,
"margin_dpo/margin_std": 79.8097915649414,
"step": 179
},
{
"epoch": 0.3769633507853403,
"fcm_dpo/beta": 0.011051933281123638,
"fcm_dpo/delta": 0.06618259847164154,
"fcm_dpo/margin": 48.621543884277344,
"fcm_dpo/q_t": 0.39017754793167114,
"grad_norm": 69.36066436767578,
"learning_rate": 3.935190552834828e-07,
"logits/chosen": -0.8243378400802612,
"logits/rejected": -0.8496301174163818,
"logps/chosen": -332.49139404296875,
"logps/ref_chosen": -267.30889892578125,
"logps/ref_rejected": -230.44383239746094,
"logps/rejected": -344.2478942871094,
"loss": 2.1452,
"margin_dpo/margin_mean": 48.621543884277344,
"margin_dpo/margin_std": 76.47697448730469,
"step": 180
},
{
"epoch": 0.37905759162303665,
"fcm_dpo/beta": 0.011051933281123638,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 40.955902099609375,
"fcm_dpo/q_t": 0.4051758944988251,
"grad_norm": 53.8263053894043,
"learning_rate": 3.920161866827889e-07,
"logits/chosen": -0.8178322315216064,
"logits/rejected": -0.8243290781974792,
"logps/chosen": -372.14532470703125,
"logps/ref_chosen": -300.49139404296875,
"logps/ref_rejected": -278.9774169921875,
"logps/rejected": -391.5872802734375,
"loss": 2.3213,
"margin_dpo/margin_mean": 40.955902099609375,
"margin_dpo/margin_std": 80.00852966308594,
"step": 181
},
{
"epoch": 0.381151832460733,
"fcm_dpo/beta": 0.01102329883724451,
"fcm_dpo/delta": -0.025942454114556313,
"fcm_dpo/margin": 56.63646697998047,
"fcm_dpo/q_t": 0.3688533902168274,
"grad_norm": 54.440731048583984,
"learning_rate": 3.90505702185e-07,
"logits/chosen": -0.817319929599762,
"logits/rejected": -0.843417763710022,
"logps/chosen": -346.1722717285156,
"logps/ref_chosen": -279.49810791015625,
"logps/ref_rejected": -263.692626953125,
"logps/rejected": -387.00323486328125,
"loss": 2.0059,
"margin_dpo/margin_mean": 56.63646697998047,
"margin_dpo/margin_std": 73.90352630615234,
"step": 182
},
{
"epoch": 0.3832460732984293,
"fcm_dpo/beta": 0.01108313724398613,
"fcm_dpo/delta": 0.0541364960372448,
"fcm_dpo/margin": 49.51906967163086,
"fcm_dpo/q_t": 0.38862109184265137,
"grad_norm": 36.02983093261719,
"learning_rate": 3.889876827928156e-07,
"logits/chosen": -0.8477475047111511,
"logits/rejected": -0.8534224629402161,
"logps/chosen": -337.31524658203125,
"logps/ref_chosen": -271.2057189941406,
"logps/ref_rejected": -243.9116668701172,
"logps/rejected": -359.5402526855469,
"loss": 2.1803,
"margin_dpo/margin_mean": 49.519065856933594,
"margin_dpo/margin_std": 81.75679779052734,
"step": 183
},
{
"epoch": 0.38534031413612563,
"fcm_dpo/beta": 0.010942747816443443,
"fcm_dpo/delta": -0.12747809290885925,
"fcm_dpo/margin": 65.6382827758789,
"fcm_dpo/q_t": 0.35070472955703735,
"grad_norm": 44.199676513671875,
"learning_rate": 3.874622099130087e-07,
"logits/chosen": -0.8934085369110107,
"logits/rejected": -0.8783657550811768,
"logps/chosen": -379.7773132324219,
"logps/ref_chosen": -318.44580078125,
"logps/ref_rejected": -266.6490783691406,
"logps/rejected": -393.618896484375,
"loss": 1.9295,
"margin_dpo/margin_mean": 65.63827514648438,
"margin_dpo/margin_std": 82.38207244873047,
"step": 184
},
{
"epoch": 0.387434554973822,
"fcm_dpo/beta": 0.010986991226673126,
"fcm_dpo/delta": 0.040350355207920074,
"fcm_dpo/margin": 51.143428802490234,
"fcm_dpo/q_t": 0.38633477687835693,
"grad_norm": 45.72472381591797,
"learning_rate": 3.859293653520604e-07,
"logits/chosen": -0.8774163722991943,
"logits/rejected": -0.8725811243057251,
"logps/chosen": -346.99420166015625,
"logps/ref_chosen": -274.308837890625,
"logps/ref_rejected": -260.7296142578125,
"logps/rejected": -384.55841064453125,
"loss": 2.1656,
"margin_dpo/margin_mean": 51.143428802490234,
"margin_dpo/margin_std": 82.68324279785156,
"step": 185
},
{
"epoch": 0.38952879581151834,
"fcm_dpo/beta": 0.01109787356108427,
"fcm_dpo/delta": 0.10041571408510208,
"fcm_dpo/margin": 45.47052764892578,
"fcm_dpo/q_t": 0.39409583806991577,
"grad_norm": 44.38091278076172,
"learning_rate": 3.8438923131177237e-07,
"logits/chosen": -0.8811839818954468,
"logits/rejected": -0.8832820057868958,
"logps/chosen": -371.6052551269531,
"logps/ref_chosen": -299.00537109375,
"logps/ref_rejected": -274.4019775390625,
"logps/rejected": -392.4723815917969,
"loss": 2.2199,
"margin_dpo/margin_mean": 45.47052764892578,
"margin_dpo/margin_std": 77.14076232910156,
"step": 186
},
{
"epoch": 0.39162303664921466,
"fcm_dpo/beta": 0.01109787356108427,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 40.196556091308594,
"fcm_dpo/q_t": 0.40425950288772583,
"grad_norm": 69.00861358642578,
"learning_rate": 3.828418903848593e-07,
"logits/chosen": -0.8305719494819641,
"logits/rejected": -0.8189241886138916,
"logps/chosen": -408.04376220703125,
"logps/ref_chosen": -329.8253173828125,
"logps/ref_rejected": -263.7211608886719,
"logps/rejected": -382.1361999511719,
"loss": 2.3976,
"margin_dpo/margin_mean": 40.196556091308594,
"margin_dpo/margin_std": 85.9332275390625,
"step": 187
},
{
"epoch": 0.393717277486911,
"fcm_dpo/beta": 0.011188428848981857,
"fcm_dpo/delta": 0.08126568794250488,
"fcm_dpo/margin": 46.74177551269531,
"fcm_dpo/q_t": 0.3961402475833893,
"grad_norm": 51.355995178222656,
"learning_rate": 3.812874255505191e-07,
"logits/chosen": -0.8473328351974487,
"logits/rejected": -0.8425517678260803,
"logps/chosen": -338.21832275390625,
"logps/ref_chosen": -263.005615234375,
"logps/ref_rejected": -247.08377075195312,
"logps/rejected": -369.03826904296875,
"loss": 2.299,
"margin_dpo/margin_mean": 46.74177169799805,
"margin_dpo/margin_std": 89.63372802734375,
"step": 188
},
{
"epoch": 0.3958115183246073,
"fcm_dpo/beta": 0.011118452996015549,
"fcm_dpo/delta": -0.06273911893367767,
"fcm_dpo/margin": 59.23433303833008,
"fcm_dpo/q_t": 0.36273935437202454,
"grad_norm": 37.47338104248047,
"learning_rate": 3.797259201699833e-07,
"logits/chosen": -0.8697251081466675,
"logits/rejected": -0.876884937286377,
"logps/chosen": -333.13616943359375,
"logps/ref_chosen": -272.96038818359375,
"logps/ref_rejected": -275.14288330078125,
"logps/rejected": -394.55303955078125,
"loss": 1.9622,
"margin_dpo/margin_mean": 59.23433303833008,
"margin_dpo/margin_std": 75.86497497558594,
"step": 189
},
{
"epoch": 0.39790575916230364,
"fcm_dpo/beta": 0.01116191316395998,
"fcm_dpo/delta": 0.03901253640651703,
"fcm_dpo/margin": 50.45553207397461,
"fcm_dpo/q_t": 0.38307875394821167,
"grad_norm": 47.19729995727539,
"learning_rate": 3.781574579820464e-07,
"logits/chosen": -0.8625926375389099,
"logits/rejected": -0.8313058018684387,
"logps/chosen": -322.6522521972656,
"logps/ref_chosen": -257.79754638671875,
"logps/ref_rejected": -225.2164306640625,
"logps/rejected": -340.5266418457031,
"loss": 2.1043,
"margin_dpo/margin_mean": 50.45553207397461,
"margin_dpo/margin_std": 73.80197143554688,
"step": 190
},
{
"epoch": 0.4,
"fcm_dpo/beta": 0.011220700107514858,
"fcm_dpo/delta": 0.052529074251651764,
"fcm_dpo/margin": 49.048126220703125,
"fcm_dpo/q_t": 0.3867928087711334,
"grad_norm": 43.824501037597656,
"learning_rate": 3.765821230985757e-07,
"logits/chosen": -0.8807193636894226,
"logits/rejected": -0.8816335201263428,
"logps/chosen": -306.66497802734375,
"logps/ref_chosen": -243.8585205078125,
"logps/ref_rejected": -245.12164306640625,
"logps/rejected": -356.97625732421875,
"loss": 2.1879,
"margin_dpo/margin_mean": 49.048126220703125,
"margin_dpo/margin_std": 81.77505493164062,
"step": 191
},
{
"epoch": 0.40209424083769635,
"fcm_dpo/beta": 0.011220700107514858,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 38.828983306884766,
"fcm_dpo/q_t": 0.40661659836769104,
"grad_norm": 40.18767166137695,
"learning_rate": 3.75e-07,
"logits/chosen": -0.8306310772895813,
"logits/rejected": -0.8200534582138062,
"logps/chosen": -339.1163330078125,
"logps/ref_chosen": -266.97991943359375,
"logps/ref_rejected": -260.1748046875,
"logps/rejected": -371.1401672363281,
"loss": 2.3052,
"margin_dpo/margin_mean": 38.82898712158203,
"margin_dpo/margin_std": 72.91365051269531,
"step": 192
},
{
"epoch": 0.4041884816753927,
"fcm_dpo/beta": 0.01127539575099945,
"fcm_dpo/delta": 0.04862726107239723,
"fcm_dpo/margin": 49.13888931274414,
"fcm_dpo/q_t": 0.386112779378891,
"grad_norm": 48.155181884765625,
"learning_rate": 3.734111735307796e-07,
"logits/chosen": -0.8871767520904541,
"logits/rejected": -0.8659235835075378,
"logps/chosen": -359.76715087890625,
"logps/ref_chosen": -280.25323486328125,
"logps/ref_rejected": -291.04339599609375,
"logps/rejected": -419.6961669921875,
"loss": 2.1897,
"margin_dpo/margin_mean": 49.13888931274414,
"margin_dpo/margin_std": 81.4281234741211,
"step": 193
},
{
"epoch": 0.406282722513089,
"fcm_dpo/beta": 0.01127539575099945,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 38.24643325805664,
"fcm_dpo/q_t": 0.410458505153656,
"grad_norm": 58.05720520019531,
"learning_rate": 3.7181572889485623e-07,
"logits/chosen": -0.8614132404327393,
"logits/rejected": -0.8538017272949219,
"logps/chosen": -369.65673828125,
"logps/ref_chosen": -288.13946533203125,
"logps/ref_rejected": -251.3153076171875,
"logps/rejected": -371.0790100097656,
"loss": 2.3265,
"margin_dpo/margin_mean": 38.246437072753906,
"margin_dpo/margin_std": 75.92887115478516,
"step": 194
},
{
"epoch": 0.4083769633507853,
"fcm_dpo/beta": 0.01127539575099945,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 39.015113830566406,
"fcm_dpo/q_t": 0.4104893207550049,
"grad_norm": 41.20073318481445,
"learning_rate": 3.7021375165108377e-07,
"logits/chosen": -0.8500396013259888,
"logits/rejected": -0.8551274538040161,
"logps/chosen": -356.44171142578125,
"logps/ref_chosen": -274.0006103515625,
"logps/ref_rejected": -280.22564697265625,
"logps/rejected": -401.68182373046875,
"loss": 2.3038,
"margin_dpo/margin_mean": 39.015113830566406,
"margin_dpo/margin_std": 74.95777893066406,
"step": 195
},
{
"epoch": 0.41047120418848165,
"fcm_dpo/beta": 0.011388367973268032,
"fcm_dpo/delta": 0.09969466179609299,
"fcm_dpo/margin": 44.371421813964844,
"fcm_dpo/q_t": 0.39527997374534607,
"grad_norm": 46.40882873535156,
"learning_rate": 3.6860532770864005e-07,
"logits/chosen": -0.8462792634963989,
"logits/rejected": -0.8548792600631714,
"logps/chosen": -346.9256896972656,
"logps/ref_chosen": -274.90069580078125,
"logps/ref_rejected": -248.73440551757812,
"logps/rejected": -365.130859375,
"loss": 2.2177,
"margin_dpo/margin_mean": 44.371421813964844,
"margin_dpo/margin_std": 76.4969711303711,
"step": 196
},
{
"epoch": 0.41256544502617803,
"fcm_dpo/beta": 0.011362292803823948,
"fcm_dpo/delta": -0.02292262390255928,
"fcm_dpo/margin": 54.69814682006836,
"fcm_dpo/q_t": 0.37083643674850464,
"grad_norm": 44.53741455078125,
"learning_rate": 3.6699054332241985e-07,
"logits/chosen": -0.8727219104766846,
"logits/rejected": -0.8640350699424744,
"logps/chosen": -386.80157470703125,
"logps/ref_chosen": -309.5348205566406,
"logps/ref_rejected": -264.3189392089844,
"logps/rejected": -396.2838439941406,
"loss": 2.0386,
"margin_dpo/margin_mean": 54.698150634765625,
"margin_dpo/margin_std": 74.75113677978516,
"step": 197
},
{
"epoch": 0.41465968586387436,
"fcm_dpo/beta": 0.011329276487231255,
"fcm_dpo/delta": -0.02910027652978897,
"fcm_dpo/margin": 55.36737060546875,
"fcm_dpo/q_t": 0.3729621171951294,
"grad_norm": 50.719356536865234,
"learning_rate": 3.653694850884091e-07,
"logits/chosen": -0.8691450357437134,
"logits/rejected": -0.8486235737800598,
"logps/chosen": -375.4002685546875,
"logps/ref_chosen": -301.0134582519531,
"logps/ref_rejected": -292.83941650390625,
"logps/rejected": -422.5935974121094,
"loss": 2.0804,
"margin_dpo/margin_mean": 55.36737060546875,
"margin_dpo/margin_std": 81.80626678466797,
"step": 198
},
{
"epoch": 0.4167539267015707,
"fcm_dpo/beta": 0.011388827115297318,
"fcm_dpo/delta": 0.05242609605193138,
"fcm_dpo/margin": 48.332645416259766,
"fcm_dpo/q_t": 0.38754040002822876,
"grad_norm": 57.40102767944336,
"learning_rate": 3.6374223993904124e-07,
"logits/chosen": -0.8484653234481812,
"logits/rejected": -0.8157787322998047,
"logps/chosen": -342.7844543457031,
"logps/ref_chosen": -264.6058654785156,
"logps/ref_rejected": -214.90428161621094,
"logps/rejected": -341.41552734375,
"loss": 2.1867,
"margin_dpo/margin_mean": 48.33264923095703,
"margin_dpo/margin_std": 80.66270446777344,
"step": 199
},
{
"epoch": 0.418848167539267,
"fcm_dpo/beta": 0.011388827115297318,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 43.314247131347656,
"fcm_dpo/q_t": 0.4016145169734955,
"grad_norm": 47.863616943359375,
"learning_rate": 3.621088951385353e-07,
"logits/chosen": -0.8921021223068237,
"logits/rejected": -0.8780491352081299,
"logps/chosen": -410.08270263671875,
"logps/ref_chosen": -324.1588134765625,
"logps/ref_rejected": -277.8011474609375,
"logps/rejected": -407.039306640625,
"loss": 2.375,
"margin_dpo/margin_mean": 43.31425094604492,
"margin_dpo/margin_std": 90.23580932617188,
"step": 200
},
{
"epoch": 0.42094240837696334,
"fcm_dpo/beta": 0.011478085070848465,
"fcm_dpo/delta": 0.07806775718927383,
"fcm_dpo/margin": 45.82844543457031,
"fcm_dpo/q_t": 0.39458033442497253,
"grad_norm": 50.767982482910156,
"learning_rate": 3.604695382782159e-07,
"logits/chosen": -0.8739713430404663,
"logits/rejected": -0.8680374622344971,
"logps/chosen": -368.12261962890625,
"logps/ref_chosen": -271.4956970214844,
"logps/ref_rejected": -245.71644592285156,
"logps/rejected": -388.1717834472656,
"loss": 2.2832,
"margin_dpo/margin_mean": 45.82844543457031,
"margin_dpo/margin_std": 84.9791488647461,
"step": 201
},
{
"epoch": 0.42303664921465967,
"fcm_dpo/beta": 0.011565309017896652,
"fcm_dpo/delta": 0.07570445537567139,
"fcm_dpo/margin": 45.67796325683594,
"fcm_dpo/q_t": 0.3948380947113037,
"grad_norm": 49.32555389404297,
"learning_rate": 3.588242572718162e-07,
"logits/chosen": -0.8743944764137268,
"logits/rejected": -0.8688530921936035,
"logps/chosen": -368.3001708984375,
"logps/ref_chosen": -272.0979309082031,
"logps/ref_rejected": -235.9467010498047,
"logps/rejected": -377.826904296875,
"loss": 2.2763,
"margin_dpo/margin_mean": 45.67795944213867,
"margin_dpo/margin_std": 84.5333023071289,
"step": 202
},
{
"epoch": 0.42513089005235605,
"fcm_dpo/beta": 0.011565309017896652,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 34.73882293701172,
"fcm_dpo/q_t": 0.4136587977409363,
"grad_norm": 49.48384475708008,
"learning_rate": 3.571731403507635e-07,
"logits/chosen": -0.8502493500709534,
"logits/rejected": -0.8567000031471252,
"logps/chosen": -383.7005615234375,
"logps/ref_chosen": -280.2221374511719,
"logps/ref_rejected": -251.80093383789062,
"logps/rejected": -390.0181884765625,
"loss": 2.3899,
"margin_dpo/margin_mean": 34.73882293701172,
"margin_dpo/margin_std": 73.63934326171875,
"step": 203
},
{
"epoch": 0.4272251308900524,
"fcm_dpo/beta": 0.011557999067008495,
"fcm_dpo/delta": -0.0063223461620509624,
"fcm_dpo/margin": 52.42595291137695,
"fcm_dpo/q_t": 0.37393608689308167,
"grad_norm": 51.727413177490234,
"learning_rate": 3.5551627605944746e-07,
"logits/chosen": -0.8875571489334106,
"logits/rejected": -0.8688842058181763,
"logps/chosen": -416.36810302734375,
"logps/ref_chosen": -318.79608154296875,
"logps/ref_rejected": -269.7093200683594,
"logps/rejected": -419.7073059082031,
"loss": 2.0832,
"margin_dpo/margin_mean": 52.42595672607422,
"margin_dpo/margin_std": 76.59628295898438,
"step": 204
},
{
"epoch": 0.4293193717277487,
"fcm_dpo/beta": 0.011503488756716251,
"fcm_dpo/delta": -0.04727426916360855,
"fcm_dpo/margin": 56.00227737426758,
"fcm_dpo/q_t": 0.37115567922592163,
"grad_norm": 48.91386795043945,
"learning_rate": 3.5385375325047163e-07,
"logits/chosen": -0.8458026051521301,
"logits/rejected": -0.8174245953559875,
"logps/chosen": -381.99627685546875,
"logps/ref_chosen": -283.7620849609375,
"logps/ref_rejected": -297.7041320800781,
"logps/rejected": -451.94061279296875,
"loss": 2.0633,
"margin_dpo/margin_mean": 56.00227355957031,
"margin_dpo/margin_std": 83.61642456054688,
"step": 205
},
{
"epoch": 0.431413612565445,
"fcm_dpo/beta": 0.011503488756716251,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 41.25376892089844,
"fcm_dpo/q_t": 0.4033774435520172,
"grad_norm": 46.304874420166016,
"learning_rate": 3.5218566107988867e-07,
"logits/chosen": -0.8711480498313904,
"logits/rejected": -0.8915102481842041,
"logps/chosen": -395.21112060546875,
"logps/ref_chosen": -293.6638488769531,
"logps/ref_rejected": -291.29913330078125,
"logps/rejected": -434.10015869140625,
"loss": 2.358,
"margin_dpo/margin_mean": 41.25376892089844,
"margin_dpo/margin_std": 84.27801513671875,
"step": 206
},
{
"epoch": 0.43350785340314135,
"fcm_dpo/beta": 0.011625375598669052,
"fcm_dpo/delta": 0.10539878159761429,
"fcm_dpo/margin": 42.995758056640625,
"fcm_dpo/q_t": 0.39784395694732666,
"grad_norm": 58.490989685058594,
"learning_rate": 3.505120890024195e-07,
"logits/chosen": -0.8150318264961243,
"logits/rejected": -0.8241398334503174,
"logps/chosen": -358.9227294921875,
"logps/ref_chosen": -270.5350646972656,
"logps/ref_rejected": -278.78411865234375,
"logps/rejected": -410.1675109863281,
"loss": 2.3579,
"margin_dpo/margin_mean": 42.995758056640625,
"margin_dpo/margin_std": 89.30279541015625,
"step": 207
},
{
"epoch": 0.4356020942408377,
"fcm_dpo/beta": 0.011574611999094486,
"fcm_dpo/delta": -0.04376169294118881,
"fcm_dpo/margin": 55.37556076049805,
"fcm_dpo/q_t": 0.37183550000190735,
"grad_norm": 35.90634536743164,
"learning_rate": 3.4883312676665534e-07,
"logits/chosen": -0.8726118206977844,
"logits/rejected": -0.8324218988418579,
"logps/chosen": -370.0745849609375,
"logps/ref_chosen": -279.582763671875,
"logps/ref_rejected": -290.045166015625,
"logps/rejected": -435.91253662109375,
"loss": 2.0414,
"margin_dpo/margin_mean": 55.37556457519531,
"margin_dpo/margin_std": 82.45820617675781,
"step": 208
},
{
"epoch": 0.437696335078534,
"fcm_dpo/beta": 0.011574611999094486,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 38.47338104248047,
"fcm_dpo/q_t": 0.40673622488975525,
"grad_norm": 55.249759674072266,
"learning_rate": 3.4714886441024573e-07,
"logits/chosen": -0.7866029739379883,
"logits/rejected": -0.7895369529724121,
"logps/chosen": -408.91436767578125,
"logps/ref_chosen": -318.87249755859375,
"logps/ref_rejected": -270.6342468261719,
"logps/rejected": -399.1495056152344,
"loss": 2.3819,
"margin_dpo/margin_mean": 38.473384857177734,
"margin_dpo/margin_std": 81.56889343261719,
"step": 209
},
{
"epoch": 0.4397905759162304,
"fcm_dpo/beta": 0.011620636098086834,
"fcm_dpo/delta": 0.03968438133597374,
"fcm_dpo/margin": 48.409019470214844,
"fcm_dpo/q_t": 0.3853999674320221,
"grad_norm": 43.16934585571289,
"learning_rate": 3.454593922550693e-07,
"logits/chosen": -0.811585545539856,
"logits/rejected": -0.8048301339149475,
"logps/chosen": -363.9505615234375,
"logps/ref_chosen": -283.14031982421875,
"logps/ref_rejected": -287.302978515625,
"logps/rejected": -416.52227783203125,
"loss": 2.1625,
"margin_dpo/margin_mean": 48.40902328491211,
"margin_dpo/margin_std": 78.59758758544922,
"step": 210
},
{
"epoch": 0.4418848167539267,
"fcm_dpo/beta": 0.011498289182782173,
"fcm_dpo/delta": -0.10584259778261185,
"fcm_dpo/margin": 60.74044418334961,
"fcm_dpo/q_t": 0.3524332642555237,
"grad_norm": 37.0330810546875,
"learning_rate": 3.4376480090239047e-07,
"logits/chosen": -0.822546124458313,
"logits/rejected": -0.8105400800704956,
"logps/chosen": -346.8141174316406,
"logps/ref_chosen": -276.4228820800781,
"logps/ref_rejected": -252.40589904785156,
"logps/rejected": -383.5375671386719,
"loss": 1.892,
"margin_dpo/margin_mean": 60.740440368652344,
"margin_dpo/margin_std": 71.19911193847656,
"step": 211
},
{
"epoch": 0.44397905759162304,
"fcm_dpo/beta": 0.011498289182782173,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 39.469200134277344,
"fcm_dpo/q_t": 0.40205302834510803,
"grad_norm": 43.004859924316406,
"learning_rate": 3.4206518122800055e-07,
"logits/chosen": -0.817995011806488,
"logits/rejected": -0.81924968957901,
"logps/chosen": -347.65509033203125,
"logps/ref_chosen": -271.7055358886719,
"logps/ref_rejected": -241.1897430419922,
"logps/rejected": -356.6084899902344,
"loss": 2.2843,
"margin_dpo/margin_mean": 39.46919631958008,
"margin_dpo/margin_std": 71.83837127685547,
"step": 212
},
{
"epoch": 0.44607329842931936,
"fcm_dpo/beta": 0.011614889837801456,
"fcm_dpo/delta": 0.10089624673128128,
"fcm_dpo/margin": 43.40678405761719,
"fcm_dpo/q_t": 0.39988279342651367,
"grad_norm": 41.354888916015625,
"learning_rate": 3.403606243773448e-07,
"logits/chosen": -0.8224835395812988,
"logits/rejected": -0.8362897634506226,
"logps/chosen": -376.9057922363281,
"logps/ref_chosen": -302.2976379394531,
"logps/ref_rejected": -303.62017822265625,
"logps/rejected": -421.6351318359375,
"loss": 2.2785,
"margin_dpo/margin_mean": 43.40678405761719,
"margin_dpo/margin_std": 82.53073120117188,
"step": 213
},
{
"epoch": 0.4481675392670157,
"fcm_dpo/beta": 0.011635429225862026,
"fcm_dpo/delta": 0.017668021842837334,
"fcm_dpo/margin": 50.13667678833008,
"fcm_dpo/q_t": 0.3777585029602051,
"grad_norm": 43.36865234375,
"learning_rate": 3.3865122176063385e-07,
"logits/chosen": -0.8282523155212402,
"logits/rejected": -0.829418420791626,
"logps/chosen": -356.675537109375,
"logps/ref_chosen": -272.13262939453125,
"logps/ref_rejected": -294.82354736328125,
"logps/rejected": -429.50311279296875,
"loss": 2.058,
"margin_dpo/margin_mean": 50.13667297363281,
"margin_dpo/margin_std": 70.05065155029297,
"step": 214
},
{
"epoch": 0.450261780104712,
"fcm_dpo/beta": 0.011635429225862026,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 36.94020462036133,
"fcm_dpo/q_t": 0.41564711928367615,
"grad_norm": 45.623741149902344,
"learning_rate": 3.3693706504794243e-07,
"logits/chosen": -0.8667133450508118,
"logits/rejected": -0.8559756278991699,
"logps/chosen": -376.4792175292969,
"logps/ref_chosen": -291.3782958984375,
"logps/ref_rejected": -261.0592346191406,
"logps/rejected": -383.100341796875,
"loss": 2.371,
"margin_dpo/margin_mean": 36.94020462036133,
"margin_dpo/margin_std": 79.50491333007812,
"step": 215
},
{
"epoch": 0.4523560209424084,
"fcm_dpo/beta": 0.01165247242897749,
"fcm_dpo/delta": 0.014636674895882607,
"fcm_dpo/margin": 50.30870056152344,
"fcm_dpo/q_t": 0.3807034492492676,
"grad_norm": 38.21538162231445,
"learning_rate": 3.3521824616429284e-07,
"logits/chosen": -0.8838326930999756,
"logits/rejected": -0.8807901740074158,
"logps/chosen": -416.7056884765625,
"logps/ref_chosen": -338.5054626464844,
"logps/ref_rejected": -305.74560546875,
"logps/rejected": -434.25457763671875,
"loss": 2.18,
"margin_dpo/margin_mean": 50.30870056152344,
"margin_dpo/margin_std": 83.40432739257812,
"step": 216
},
{
"epoch": 0.4544502617801047,
"fcm_dpo/beta": 0.01153170969337225,
"fcm_dpo/delta": -0.10417811572551727,
"fcm_dpo/margin": 60.43164825439453,
"fcm_dpo/q_t": 0.35806769132614136,
"grad_norm": 41.93501663208008,
"learning_rate": 3.334948572847253e-07,
"logits/chosen": -0.7885605096817017,
"logits/rejected": -0.7631282806396484,
"logps/chosen": -374.5654602050781,
"logps/ref_chosen": -293.5498046875,
"logps/ref_rejected": -256.781005859375,
"logps/rejected": -398.22833251953125,
"loss": 1.9857,
"margin_dpo/margin_mean": 60.43164825439453,
"margin_dpo/margin_std": 82.61273193359375,
"step": 217
},
{
"epoch": 0.45654450261780105,
"fcm_dpo/beta": 0.011513221077620983,
"fcm_dpo/delta": -0.016045905649662018,
"fcm_dpo/margin": 53.421905517578125,
"fcm_dpo/q_t": 0.3726479411125183,
"grad_norm": 41.79888916015625,
"learning_rate": 3.317669908293554e-07,
"logits/chosen": -0.8220831155776978,
"logits/rejected": -0.8435771465301514,
"logps/chosen": -398.8973388671875,
"logps/ref_chosen": -320.579345703125,
"logps/ref_rejected": -294.03570556640625,
"logps/rejected": -425.7756042480469,
"loss": 2.0646,
"margin_dpo/margin_mean": 53.42190933227539,
"margin_dpo/margin_std": 77.18704223632812,
"step": 218
},
{
"epoch": 0.4586387434554974,
"fcm_dpo/beta": 0.011473771184682846,
"fcm_dpo/delta": -0.03432409092783928,
"fcm_dpo/margin": 55.09527587890625,
"fcm_dpo/q_t": 0.36992937326431274,
"grad_norm": 51.594608306884766,
"learning_rate": 3.300347394584172e-07,
"logits/chosen": -0.8170064091682434,
"logits/rejected": -0.8397572040557861,
"logps/chosen": -349.82843017578125,
"logps/ref_chosen": -268.41864013671875,
"logps/ref_rejected": -265.78070068359375,
"logps/rejected": -402.2857666015625,
"loss": 2.0798,
"margin_dpo/margin_mean": 55.09527587890625,
"margin_dpo/margin_std": 83.4979248046875,
"step": 219
},
{
"epoch": 0.4607329842931937,
"fcm_dpo/beta": 0.011408637277781963,
"fcm_dpo/delta": -0.05692977458238602,
"fcm_dpo/margin": 57.254913330078125,
"fcm_dpo/q_t": 0.3638424277305603,
"grad_norm": 37.36294937133789,
"learning_rate": 3.2829819606729477e-07,
"logits/chosen": -0.8621986508369446,
"logits/rejected": -0.8415334224700928,
"logps/chosen": -394.0870361328125,
"logps/ref_chosen": -312.886474609375,
"logps/ref_rejected": -259.5097961425781,
"logps/rejected": -397.96527099609375,
"loss": 2.0181,
"margin_dpo/margin_mean": 57.254913330078125,
"margin_dpo/margin_std": 78.34246826171875,
"step": 220
},
{
"epoch": 0.46282722513089003,
"fcm_dpo/beta": 0.011509214527904987,
"fcm_dpo/delta": 0.0877726674079895,
"fcm_dpo/margin": 44.8982048034668,
"fcm_dpo/q_t": 0.3982035517692566,
"grad_norm": 49.23297882080078,
"learning_rate": 3.265574537815398e-07,
"logits/chosen": -0.7905181646347046,
"logits/rejected": -0.8004481196403503,
"logps/chosen": -388.2945556640625,
"logps/ref_chosen": -300.3258361816406,
"logps/ref_rejected": -286.312255859375,
"logps/rejected": -419.17919921875,
"loss": 2.2735,
"margin_dpo/margin_mean": 44.8982048034668,
"margin_dpo/margin_std": 84.64593505859375,
"step": 221
},
{
"epoch": 0.4649214659685864,
"fcm_dpo/beta": 0.01153627596795559,
"fcm_dpo/delta": 0.023485397920012474,
"fcm_dpo/margin": 50.09156799316406,
"fcm_dpo/q_t": 0.380188524723053,
"grad_norm": 42.51252746582031,
"learning_rate": 3.248126059518784e-07,
"logits/chosen": -0.8761808276176453,
"logits/rejected": -0.8566697239875793,
"logps/chosen": -382.58056640625,
"logps/ref_chosen": -297.1113586425781,
"logps/ref_rejected": -235.53561401367188,
"logps/rejected": -371.09637451171875,
"loss": 2.1339,
"margin_dpo/margin_mean": 50.09156799316406,
"margin_dpo/margin_std": 76.23758697509766,
"step": 222
},
{
"epoch": 0.46701570680628274,
"fcm_dpo/beta": 0.011528990231454372,
"fcm_dpo/delta": -0.006317767780274153,
"fcm_dpo/margin": 52.5574951171875,
"fcm_dpo/q_t": 0.37457358837127686,
"grad_norm": 55.26633834838867,
"learning_rate": 3.230637461492043e-07,
"logits/chosen": -0.8254790306091309,
"logits/rejected": -0.7992933988571167,
"logps/chosen": -374.0750427246094,
"logps/ref_chosen": -286.41510009765625,
"logps/ref_rejected": -241.11904907226562,
"logps/rejected": -381.3365173339844,
"loss": 2.1171,
"margin_dpo/margin_mean": 52.5574951171875,
"margin_dpo/margin_std": 80.84004974365234,
"step": 223
},
{
"epoch": 0.46910994764397906,
"fcm_dpo/beta": 0.011457276530563831,
"fcm_dpo/delta": -0.06239692494273186,
"fcm_dpo/margin": 57.45489501953125,
"fcm_dpo/q_t": 0.3645704388618469,
"grad_norm": 48.68239974975586,
"learning_rate": 3.213109681595612e-07,
"logits/chosen": -0.7970999479293823,
"logits/rejected": -0.8092724084854126,
"logps/chosen": -329.4682312011719,
"logps/ref_chosen": -249.4923553466797,
"logps/ref_rejected": -233.1204071044922,
"logps/rejected": -370.55120849609375,
"loss": 1.9876,
"margin_dpo/margin_mean": 57.45489501953125,
"margin_dpo/margin_std": 76.5157470703125,
"step": 224
},
{
"epoch": 0.4712041884816754,
"fcm_dpo/beta": 0.011476612649857998,
"fcm_dpo/delta": 0.016862675547599792,
"fcm_dpo/margin": 50.89667892456055,
"fcm_dpo/q_t": 0.3859655261039734,
"grad_norm": 54.2133674621582,
"learning_rate": 3.1955436597911315e-07,
"logits/chosen": -0.8409017324447632,
"logits/rejected": -0.8229585886001587,
"logps/chosen": -405.7902526855469,
"logps/ref_chosen": -311.8583984375,
"logps/ref_rejected": -336.8563537597656,
"logps/rejected": -481.6849365234375,
"loss": 2.2025,
"margin_dpo/margin_mean": 50.89668273925781,
"margin_dpo/margin_std": 87.63338470458984,
"step": 225
},
{
"epoch": 0.4732984293193717,
"fcm_dpo/beta": 0.01155221275985241,
"fcm_dpo/delta": 0.0656573474407196,
"fcm_dpo/margin": 46.55926513671875,
"fcm_dpo/q_t": 0.3928050696849823,
"grad_norm": 42.344486236572266,
"learning_rate": 3.1779403380910425e-07,
"logits/chosen": -0.856964647769928,
"logits/rejected": -0.8483293652534485,
"logps/chosen": -335.8982238769531,
"logps/ref_chosen": -252.20126342773438,
"logps/ref_rejected": -254.41946411132812,
"logps/rejected": -384.6756896972656,
"loss": 2.2299,
"margin_dpo/margin_mean": 46.55926513671875,
"margin_dpo/margin_std": 82.88809967041016,
"step": 226
},
{
"epoch": 0.47539267015706804,
"fcm_dpo/beta": 0.011505689471960068,
"fcm_dpo/delta": -0.04035334661602974,
"fcm_dpo/margin": 55.43122863769531,
"fcm_dpo/q_t": 0.3694474697113037,
"grad_norm": 45.55215072631836,
"learning_rate": 3.160300660508064e-07,
"logits/chosen": -0.8314103484153748,
"logits/rejected": -0.82843017578125,
"logps/chosen": -368.4416809082031,
"logps/ref_chosen": -285.25946044921875,
"logps/ref_rejected": -261.3177185058594,
"logps/rejected": -399.93115234375,
"loss": 2.121,
"margin_dpo/margin_mean": 55.43122863769531,
"margin_dpo/margin_std": 86.32599639892578,
"step": 227
},
{
"epoch": 0.4774869109947644,
"fcm_dpo/beta": 0.011403365060687065,
"fcm_dpo/delta": -0.0893319696187973,
"fcm_dpo/margin": 59.912269592285156,
"fcm_dpo/q_t": 0.36185258626937866,
"grad_norm": 36.95317077636719,
"learning_rate": 3.1426255730045695e-07,
"logits/chosen": -0.8484254479408264,
"logits/rejected": -0.8235490322113037,
"logps/chosen": -385.413330078125,
"logps/ref_chosen": -313.8188171386719,
"logps/ref_rejected": -258.071533203125,
"logps/rejected": -389.5783386230469,
"loss": 1.9869,
"margin_dpo/margin_mean": 59.91227340698242,
"margin_dpo/margin_std": 80.2008285522461,
"step": 228
},
{
"epoch": 0.47958115183246075,
"fcm_dpo/beta": 0.01123168133199215,
"fcm_dpo/delta": -0.15170006453990936,
"fcm_dpo/margin": 65.91914367675781,
"fcm_dpo/q_t": 0.34847745299339294,
"grad_norm": 47.422576904296875,
"learning_rate": 3.1249160234418644e-07,
"logits/chosen": -0.8252373337745667,
"logits/rejected": -0.8322288393974304,
"logps/chosen": -375.4277648925781,
"logps/ref_chosen": -291.97076416015625,
"logps/ref_rejected": -263.42218017578125,
"logps/rejected": -412.79833984375,
"loss": 1.9225,
"margin_dpo/margin_mean": 65.91914367675781,
"margin_dpo/margin_std": 84.00717163085938,
"step": 229
},
{
"epoch": 0.4816753926701571,
"fcm_dpo/beta": 0.011167926713824272,
"fcm_dpo/delta": -0.056925076991319656,
"fcm_dpo/margin": 58.48857879638672,
"fcm_dpo/q_t": 0.37040385603904724,
"grad_norm": 54.820640563964844,
"learning_rate": 3.1071729615293424e-07,
"logits/chosen": -0.8645958304405212,
"logits/rejected": -0.8664934635162354,
"logps/chosen": -310.0601501464844,
"logps/ref_chosen": -233.2601318359375,
"logps/ref_rejected": -238.9227752685547,
"logps/rejected": -374.21136474609375,
"loss": 2.085,
"margin_dpo/margin_mean": 58.48857498168945,
"margin_dpo/margin_std": 90.30332946777344,
"step": 230
},
{
"epoch": 0.4837696335078534,
"fcm_dpo/beta": 0.011248757131397724,
"fcm_dpo/delta": 0.07211625576019287,
"fcm_dpo/margin": 47.26783752441406,
"fcm_dpo/q_t": 0.38907888531684875,
"grad_norm": 53.43687438964844,
"learning_rate": 3.0893973387735683e-07,
"logits/chosen": -0.8459908962249756,
"logits/rejected": -0.8355890512466431,
"logps/chosen": -406.4673767089844,
"logps/ref_chosen": -322.15521240234375,
"logps/ref_rejected": -280.970703125,
"logps/rejected": -412.55072021484375,
"loss": 2.1884,
"margin_dpo/margin_mean": 47.26784133911133,
"margin_dpo/margin_std": 77.64056396484375,
"step": 231
},
{
"epoch": 0.48586387434554973,
"fcm_dpo/beta": 0.011308438144624233,
"fcm_dpo/delta": 0.052915558218955994,
"fcm_dpo/margin": 48.635101318359375,
"fcm_dpo/q_t": 0.3891379237174988,
"grad_norm": 57.95627975463867,
"learning_rate": 3.071590108427243e-07,
"logits/chosen": -0.8315503597259521,
"logits/rejected": -0.8197436332702637,
"logps/chosen": -355.36541748046875,
"logps/ref_chosen": -271.7437744140625,
"logps/ref_rejected": -249.94650268554688,
"logps/rejected": -382.2032775878906,
"loss": 2.2599,
"margin_dpo/margin_mean": 48.635101318359375,
"margin_dpo/margin_std": 86.83106994628906,
"step": 232
},
{
"epoch": 0.48795811518324606,
"fcm_dpo/beta": 0.011229691095650196,
"fcm_dpo/delta": -0.06987949460744858,
"fcm_dpo/margin": 59.23713684082031,
"fcm_dpo/q_t": 0.3646395802497864,
"grad_norm": 42.54497146606445,
"learning_rate": 3.05375222543809e-07,
"logits/chosen": -0.8663790225982666,
"logits/rejected": -0.858837902545929,
"logps/chosen": -364.13671875,
"logps/ref_chosen": -285.34234619140625,
"logps/ref_rejected": -266.3358154296875,
"logps/rejected": -404.3673400878906,
"loss": 2.0301,
"margin_dpo/margin_mean": 59.237144470214844,
"margin_dpo/margin_std": 81.6547622680664,
"step": 233
},
{
"epoch": 0.4900523560209424,
"fcm_dpo/beta": 0.01123469602316618,
"fcm_dpo/delta": 0.00445548165589571,
"fcm_dpo/margin": 53.03302764892578,
"fcm_dpo/q_t": 0.38143569231033325,
"grad_norm": 59.68339157104492,
"learning_rate": 3.035884646397637e-07,
"logits/chosen": -0.8368986248970032,
"logits/rejected": -0.8313064575195312,
"logps/chosen": -377.9920654296875,
"logps/ref_chosen": -294.9057312011719,
"logps/ref_rejected": -299.3805236816406,
"logps/rejected": -435.4998474121094,
"loss": 2.2269,
"margin_dpo/margin_mean": 53.03302764892578,
"margin_dpo/margin_std": 92.39456176757812,
"step": 234
},
{
"epoch": 0.49214659685863876,
"fcm_dpo/beta": 0.011183816008269787,
"fcm_dpo/delta": -0.04539116844534874,
"fcm_dpo/margin": 57.446250915527344,
"fcm_dpo/q_t": 0.369347482919693,
"grad_norm": 43.349822998046875,
"learning_rate": 3.017988329489923e-07,
"logits/chosen": -0.8626374006271362,
"logits/rejected": -0.8631974458694458,
"logps/chosen": -370.3212890625,
"logps/ref_chosen": -289.49755859375,
"logps/ref_rejected": -247.55105590820312,
"logps/rejected": -385.821044921875,
"loss": 2.0968,
"margin_dpo/margin_mean": 57.446250915527344,
"margin_dpo/margin_std": 87.06011962890625,
"step": 235
},
{
"epoch": 0.4942408376963351,
"fcm_dpo/beta": 0.011175952851772308,
"fcm_dpo/delta": -0.007033457513898611,
"fcm_dpo/margin": 54.27784729003906,
"fcm_dpo/q_t": 0.37658172845840454,
"grad_norm": 52.07412338256836,
"learning_rate": 3.000064234440111e-07,
"logits/chosen": -0.8791717290878296,
"logits/rejected": -0.8793454170227051,
"logps/chosen": -367.5154724121094,
"logps/ref_chosen": -288.8846435546875,
"logps/ref_rejected": -242.04971313476562,
"logps/rejected": -374.9583740234375,
"loss": 2.1337,
"margin_dpo/margin_mean": 54.27784729003906,
"margin_dpo/margin_std": 84.73802185058594,
"step": 236
},
{
"epoch": 0.4963350785340314,
"fcm_dpo/beta": 0.011140123009681702,
"fcm_dpo/delta": -0.03211143985390663,
"fcm_dpo/margin": 56.559959411621094,
"fcm_dpo/q_t": 0.3709508180618286,
"grad_norm": 50.27377700805664,
"learning_rate": 2.9821133224630223e-07,
"logits/chosen": -0.8347765803337097,
"logits/rejected": -0.8186193108558655,
"logps/chosen": -347.33685302734375,
"logps/ref_chosen": -265.47869873046875,
"logps/ref_rejected": -267.98779296875,
"logps/rejected": -406.4059143066406,
"loss": 2.0543,
"margin_dpo/margin_mean": 56.55995559692383,
"margin_dpo/margin_std": 80.91068267822266,
"step": 237
},
{
"epoch": 0.49842931937172774,
"fcm_dpo/beta": 0.011114435270428658,
"fcm_dpo/delta": -0.023085102438926697,
"fcm_dpo/margin": 55.931617736816406,
"fcm_dpo/q_t": 0.3777042329311371,
"grad_norm": 49.55057907104492,
"learning_rate": 2.964136556211588e-07,
"logits/chosen": -0.8459721803665161,
"logits/rejected": -0.8250448703765869,
"logps/chosen": -398.3431396484375,
"logps/ref_chosen": -312.0026550292969,
"logps/ref_rejected": -270.02484130859375,
"logps/rejected": -412.29693603515625,
"loss": 2.0948,
"margin_dpo/margin_mean": 55.93161392211914,
"margin_dpo/margin_std": 85.75509643554688,
"step": 238
},
{
"epoch": 0.5005235602094241,
"fcm_dpo/beta": 0.01120331697165966,
"fcm_dpo/delta": 0.07965114712715149,
"fcm_dpo/margin": 46.81739044189453,
"fcm_dpo/q_t": 0.39847880601882935,
"grad_norm": 64.70271301269531,
"learning_rate": 2.946134899725226e-07,
"logits/chosen": -0.8460415601730347,
"logits/rejected": -0.8806518316268921,
"logps/chosen": -346.79248046875,
"logps/ref_chosen": -267.167236328125,
"logps/ref_rejected": -275.99444580078125,
"logps/rejected": -402.4371032714844,
"loss": 2.3332,
"margin_dpo/margin_mean": 46.81739807128906,
"margin_dpo/margin_std": 93.64937591552734,
"step": 239
},
{
"epoch": 0.5026178010471204,
"fcm_dpo/beta": 0.011122040450572968,
"fcm_dpo/delta": -0.07281152904033661,
"fcm_dpo/margin": 60.05467224121094,
"fcm_dpo/q_t": 0.3666057884693146,
"grad_norm": 47.212059020996094,
"learning_rate": 2.9281093183781403e-07,
"logits/chosen": -0.8871007561683655,
"logits/rejected": -0.8852866291999817,
"logps/chosen": -363.2594299316406,
"logps/ref_chosen": -285.9796447753906,
"logps/ref_rejected": -256.8263244628906,
"logps/rejected": -394.1607971191406,
"loss": 2.0307,
"margin_dpo/margin_mean": 60.0546760559082,
"margin_dpo/margin_std": 86.94390869140625,
"step": 240
},
{
"epoch": 0.5047120418848168,
"fcm_dpo/beta": 0.01121701579540968,
"fcm_dpo/delta": 0.08503154665231705,
"fcm_dpo/margin": 46.30161666870117,
"fcm_dpo/q_t": 0.39727580547332764,
"grad_norm": 53.38301467895508,
"learning_rate": 2.910060778827554e-07,
"logits/chosen": -0.8218780755996704,
"logits/rejected": -0.8062134981155396,
"logps/chosen": -347.52294921875,
"logps/ref_chosen": -261.516845703125,
"logps/ref_rejected": -250.219482421875,
"logps/rejected": -382.5271911621094,
"loss": 2.2472,
"margin_dpo/margin_mean": 46.30161666870117,
"margin_dpo/margin_std": 80.78411865234375,
"step": 241
},
{
"epoch": 0.506806282722513,
"fcm_dpo/beta": 0.01121577713638544,
"fcm_dpo/delta": -0.0011044739512726665,
"fcm_dpo/margin": 53.588626861572266,
"fcm_dpo/q_t": 0.37954312562942505,
"grad_norm": 57.44087219238281,
"learning_rate": 2.891990248961871e-07,
"logits/chosen": -0.8770509958267212,
"logits/rejected": -0.8585466146469116,
"logps/chosen": -349.27166748046875,
"logps/ref_chosen": -270.51397705078125,
"logps/ref_rejected": -244.85325622558594,
"logps/rejected": -377.1995544433594,
"loss": 2.081,
"margin_dpo/margin_mean": 53.588623046875,
"margin_dpo/margin_std": 78.41287231445312,
"step": 242
},
{
"epoch": 0.5089005235602094,
"fcm_dpo/beta": 0.011150119826197624,
"fcm_dpo/delta": -0.05871187150478363,
"fcm_dpo/margin": 58.73082733154297,
"fcm_dpo/q_t": 0.3701602816581726,
"grad_norm": 41.00937271118164,
"learning_rate": 2.873898697848762e-07,
"logits/chosen": -0.8751071691513062,
"logits/rejected": -0.8671752214431763,
"logps/chosen": -395.99371337890625,
"logps/ref_chosen": -324.68206787109375,
"logps/ref_rejected": -307.1185607910156,
"logps/rejected": -437.1610412597656,
"loss": 2.0244,
"margin_dpo/margin_mean": 58.7308349609375,
"margin_dpo/margin_std": 84.86109924316406,
"step": 243
},
{
"epoch": 0.5109947643979058,
"fcm_dpo/beta": 0.011092188768088818,
"fcm_dpo/delta": -0.052091244608163834,
"fcm_dpo/margin": 58.48289108276367,
"fcm_dpo/q_t": 0.36454105377197266,
"grad_norm": 56.700645446777344,
"learning_rate": 2.8557870956832133e-07,
"logits/chosen": -0.8686002492904663,
"logits/rejected": -0.8242263793945312,
"logps/chosen": -391.94451904296875,
"logps/ref_chosen": -318.979248046875,
"logps/ref_rejected": -269.6687927246094,
"logps/rejected": -401.116943359375,
"loss": 2.0397,
"margin_dpo/margin_mean": 58.482887268066406,
"margin_dpo/margin_std": 81.44038391113281,
"step": 244
},
{
"epoch": 0.5130890052356021,
"fcm_dpo/beta": 0.011050288565456867,
"fcm_dpo/delta": -0.03784612938761711,
"fcm_dpo/margin": 57.50408172607422,
"fcm_dpo/q_t": 0.36484986543655396,
"grad_norm": 42.722900390625,
"learning_rate": 2.837656413735479e-07,
"logits/chosen": -0.8694234490394592,
"logits/rejected": -0.8738152384757996,
"logps/chosen": -363.55609130859375,
"logps/ref_chosen": -294.8980712890625,
"logps/ref_rejected": -239.81539916992188,
"logps/rejected": -365.9775085449219,
"loss": 2.0336,
"margin_dpo/margin_mean": 57.504085540771484,
"margin_dpo/margin_std": 78.0125732421875,
"step": 245
},
{
"epoch": 0.5151832460732985,
"fcm_dpo/beta": 0.011050288565456867,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 41.46842956542969,
"fcm_dpo/q_t": 0.4083188772201538,
"grad_norm": 53.125675201416016,
"learning_rate": 2.8195076242990116e-07,
"logits/chosen": -0.856926679611206,
"logits/rejected": -0.8645325303077698,
"logps/chosen": -365.9036865234375,
"logps/ref_chosen": -280.6853942871094,
"logps/ref_rejected": -253.6463623046875,
"logps/rejected": -380.33306884765625,
"loss": 2.3497,
"margin_dpo/margin_mean": 41.46842956542969,
"margin_dpo/margin_std": 86.0017318725586,
"step": 246
},
{
"epoch": 0.5172774869109947,
"fcm_dpo/beta": 0.011128617450594902,
"fcm_dpo/delta": 0.07063379138708115,
"fcm_dpo/margin": 47.90519332885742,
"fcm_dpo/q_t": 0.39123034477233887,
"grad_norm": 40.086029052734375,
"learning_rate": 2.801341700638307e-07,
"logits/chosen": -0.8583031296730042,
"logits/rejected": -0.858988881111145,
"logps/chosen": -363.207275390625,
"logps/ref_chosen": -281.10906982421875,
"logps/ref_rejected": -260.3768615722656,
"logps/rejected": -390.3802490234375,
"loss": 2.2025,
"margin_dpo/margin_mean": 47.90519714355469,
"margin_dpo/margin_std": 79.46726989746094,
"step": 247
},
{
"epoch": 0.5193717277486911,
"fcm_dpo/beta": 0.01118995901197195,
"fcm_dpo/delta": 0.05496884509921074,
"fcm_dpo/margin": 48.97563934326172,
"fcm_dpo/q_t": 0.38512587547302246,
"grad_norm": 56.72340393066406,
"learning_rate": 2.7831596169367227e-07,
"logits/chosen": -0.831001877784729,
"logits/rejected": -0.8354662656784058,
"logps/chosen": -351.86279296875,
"logps/ref_chosen": -270.318359375,
"logps/ref_rejected": -233.46780395507812,
"logps/rejected": -363.9878845214844,
"loss": 2.1723,
"margin_dpo/margin_mean": 48.97563552856445,
"margin_dpo/margin_std": 79.56255340576172,
"step": 248
},
{
"epoch": 0.5214659685863874,
"fcm_dpo/beta": 0.011280355043709278,
"fcm_dpo/delta": 0.08045822381973267,
"fcm_dpo/margin": 46.429283142089844,
"fcm_dpo/q_t": 0.3924635052680969,
"grad_norm": 46.11130905151367,
"learning_rate": 2.7649623482442274e-07,
"logits/chosen": -0.8485307097434998,
"logits/rejected": -0.8299336433410645,
"logps/chosen": -367.3172607421875,
"logps/ref_chosen": -275.808837890625,
"logps/ref_rejected": -243.45484924316406,
"logps/rejected": -381.3925476074219,
"loss": 2.2986,
"margin_dpo/margin_mean": 46.429283142089844,
"margin_dpo/margin_std": 88.60334777832031,
"step": 249
},
{
"epoch": 0.5235602094240838,
"fcm_dpo/beta": 0.011195365339517593,
"fcm_dpo/delta": -0.07562804967164993,
"fcm_dpo/margin": 59.89421844482422,
"fcm_dpo/q_t": 0.3663076162338257,
"grad_norm": 46.86848831176758,
"learning_rate": 2.7467508704251135e-07,
"logits/chosen": -0.8492177128791809,
"logits/rejected": -0.8528013825416565,
"logps/chosen": -386.95599365234375,
"logps/ref_chosen": -292.4945373535156,
"logps/ref_rejected": -284.2854309082031,
"logps/rejected": -438.6411437988281,
"loss": 2.0781,
"margin_dpo/margin_mean": 59.89421844482422,
"margin_dpo/margin_std": 89.06050872802734,
"step": 250
},
{
"epoch": 0.5256544502617801,
"fcm_dpo/beta": 0.011186817660927773,
"fcm_dpo/delta": -0.0076376101933419704,
"fcm_dpo/margin": 54.27581787109375,
"fcm_dpo/q_t": 0.3836982846260071,
"grad_norm": 57.85386657714844,
"learning_rate": 2.7285261601056697e-07,
"logits/chosen": -0.8376632332801819,
"logits/rejected": -0.8246986865997314,
"logps/chosen": -366.87115478515625,
"logps/ref_chosen": -281.736572265625,
"logps/ref_rejected": -255.9449462890625,
"logps/rejected": -395.3553466796875,
"loss": 2.1198,
"margin_dpo/margin_mean": 54.275821685791016,
"margin_dpo/margin_std": 87.9932632446289,
"step": 251
},
{
"epoch": 0.5277486910994764,
"fcm_dpo/beta": 0.011193905957043171,
"fcm_dpo/delta": 0.006334537640213966,
"fcm_dpo/margin": 53.06830596923828,
"fcm_dpo/q_t": 0.3763534426689148,
"grad_norm": 43.30856704711914,
"learning_rate": 2.7102891946217994e-07,
"logits/chosen": -0.887787938117981,
"logits/rejected": -0.8614078760147095,
"logps/chosen": -396.5959777832031,
"logps/ref_chosen": -295.9674377441406,
"logps/ref_rejected": -280.10430908203125,
"logps/rejected": -433.8011474609375,
"loss": 2.187,
"margin_dpo/margin_mean": 53.06830596923828,
"margin_dpo/margin_std": 88.38142395019531,
"step": 252
},
{
"epoch": 0.5298429319371728,
"fcm_dpo/beta": 0.011209012009203434,
"fcm_dpo/delta": 0.013485941104590893,
"fcm_dpo/margin": 52.395835876464844,
"fcm_dpo/q_t": 0.3871498107910156,
"grad_norm": 48.36872482299805,
"learning_rate": 2.692040951966617e-07,
"logits/chosen": -0.8747860193252563,
"logits/rejected": -0.872173011302948,
"logps/chosen": -384.8511657714844,
"logps/ref_chosen": -277.072265625,
"logps/ref_rejected": -247.32032775878906,
"logps/rejected": -407.49505615234375,
"loss": 2.2103,
"margin_dpo/margin_mean": 52.395835876464844,
"margin_dpo/margin_std": 92.51750183105469,
"step": 253
},
{
"epoch": 0.5319371727748691,
"fcm_dpo/beta": 0.011176303029060364,
"fcm_dpo/delta": -0.029223300516605377,
"fcm_dpo/margin": 56.13548278808594,
"fcm_dpo/q_t": 0.37417668104171753,
"grad_norm": 39.057472229003906,
"learning_rate": 2.6737824107379947e-07,
"logits/chosen": -0.7909507751464844,
"logits/rejected": -0.7841414213180542,
"logps/chosen": -372.0244140625,
"logps/ref_chosen": -269.9478759765625,
"logps/ref_rejected": -249.4471435546875,
"logps/rejected": -407.6592102050781,
"loss": 2.0804,
"margin_dpo/margin_mean": 56.13548278808594,
"margin_dpo/margin_std": 81.23562622070312,
"step": 254
},
{
"epoch": 0.5340314136125655,
"fcm_dpo/beta": 0.01106705330312252,
"fcm_dpo/delta": -0.09823214262723923,
"fcm_dpo/margin": 62.47433853149414,
"fcm_dpo/q_t": 0.3637208938598633,
"grad_norm": 46.793663024902344,
"learning_rate": 2.655514550086086e-07,
"logits/chosen": -0.8256233334541321,
"logits/rejected": -0.7955724000930786,
"logps/chosen": -405.82513427734375,
"logps/ref_chosen": -306.6553039550781,
"logps/ref_rejected": -254.47177124023438,
"logps/rejected": -416.1159973144531,
"loss": 2.121,
"margin_dpo/margin_mean": 62.47433853149414,
"margin_dpo/margin_std": 99.15513610839844,
"step": 255
},
{
"epoch": 0.5361256544502618,
"fcm_dpo/beta": 0.011012692004442215,
"fcm_dpo/delta": -0.049240756779909134,
"fcm_dpo/margin": 58.664283752441406,
"fcm_dpo/q_t": 0.36328646540641785,
"grad_norm": 96.03997802734375,
"learning_rate": 2.6372383496608186e-07,
"logits/chosen": -0.8487477898597717,
"logits/rejected": -0.8419747948646545,
"logps/chosen": -427.13616943359375,
"logps/ref_chosen": -323.71820068359375,
"logps/ref_rejected": -254.1883544921875,
"logps/rejected": -416.2706298828125,
"loss": 2.2568,
"margin_dpo/margin_mean": 58.66428756713867,
"margin_dpo/margin_std": 106.82813262939453,
"step": 256
},
{
"epoch": 0.5382198952879581,
"fcm_dpo/beta": 0.010948875918984413,
"fcm_dpo/delta": -0.05811656638979912,
"fcm_dpo/margin": 59.75982666015625,
"fcm_dpo/q_t": 0.36736562848091125,
"grad_norm": 49.24437713623047,
"learning_rate": 2.618954789559356e-07,
"logits/chosen": -0.8388201594352722,
"logits/rejected": -0.8341472744941711,
"logps/chosen": -373.2787170410156,
"logps/ref_chosen": -267.2120666503906,
"logps/ref_rejected": -249.13357543945312,
"logps/rejected": -414.9600524902344,
"loss": 2.0285,
"margin_dpo/margin_mean": 59.759830474853516,
"margin_dpo/margin_std": 84.38015747070312,
"step": 257
},
{
"epoch": 0.5403141361256545,
"fcm_dpo/beta": 0.010874452069401741,
"fcm_dpo/delta": -0.06820604205131531,
"fcm_dpo/margin": 61.02964782714844,
"fcm_dpo/q_t": 0.3607839047908783,
"grad_norm": 41.02734375,
"learning_rate": 2.600664850273538e-07,
"logits/chosen": -0.863893449306488,
"logits/rejected": -0.8384455442428589,
"logps/chosen": -391.0607604980469,
"logps/ref_chosen": -277.6827392578125,
"logps/ref_rejected": -250.7461395263672,
"logps/rejected": -425.15380859375,
"loss": 2.0149,
"margin_dpo/margin_mean": 61.02964782714844,
"margin_dpo/margin_std": 81.34092712402344,
"step": 258
},
{
"epoch": 0.5424083769633508,
"fcm_dpo/beta": 0.010855198837816715,
"fcm_dpo/delta": -0.01772034913301468,
"fcm_dpo/margin": 56.80473327636719,
"fcm_dpo/q_t": 0.37353599071502686,
"grad_norm": 41.15122604370117,
"learning_rate": 2.582369512637302e-07,
"logits/chosen": -0.8587203025817871,
"logits/rejected": -0.8582494258880615,
"logps/chosen": -398.05303955078125,
"logps/ref_chosen": -294.6099548339844,
"logps/ref_rejected": -272.2757873535156,
"logps/rejected": -432.52362060546875,
"loss": 2.0692,
"margin_dpo/margin_mean": 56.80473327636719,
"margin_dpo/margin_std": 82.64161682128906,
"step": 259
},
{
"epoch": 0.5445026178010471,
"fcm_dpo/beta": 0.010855198837816715,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 28.038097381591797,
"fcm_dpo/q_t": 0.4398733377456665,
"grad_norm": 50.62301254272461,
"learning_rate": 2.5640697577740815e-07,
"logits/chosen": -0.8644586801528931,
"logits/rejected": -0.8649744987487793,
"logps/chosen": -403.7824401855469,
"logps/ref_chosen": -290.85711669921875,
"logps/ref_rejected": -277.5970458984375,
"logps/rejected": -418.56048583984375,
"loss": 2.5946,
"margin_dpo/margin_mean": 28.038097381591797,
"margin_dpo/margin_std": 83.62470245361328,
"step": 260
},
{
"epoch": 0.5465968586387434,
"fcm_dpo/beta": 0.010941832326352596,
"fcm_dpo/delta": 0.07949154824018478,
"fcm_dpo/margin": 47.95015335083008,
"fcm_dpo/q_t": 0.3965446949005127,
"grad_norm": 68.84960174560547,
"learning_rate": 2.5457665670441937e-07,
"logits/chosen": -0.7816342711448669,
"logits/rejected": -0.7925888299942017,
"logps/chosen": -373.12347412109375,
"logps/ref_chosen": -251.13223266601562,
"logps/ref_rejected": -244.75482177734375,
"logps/rejected": -414.69622802734375,
"loss": 2.2899,
"margin_dpo/margin_mean": 47.95015335083008,
"margin_dpo/margin_std": 91.630615234375,
"step": 261
},
{
"epoch": 0.5486910994764398,
"fcm_dpo/beta": 0.010847585275769234,
"fcm_dpo/delta": -0.08650798350572586,
"fcm_dpo/margin": 62.74159240722656,
"fcm_dpo/q_t": 0.361616313457489,
"grad_norm": 42.56726837158203,
"learning_rate": 2.527460921992209e-07,
"logits/chosen": -0.8039164543151855,
"logits/rejected": -0.795622706413269,
"logps/chosen": -407.39019775390625,
"logps/ref_chosen": -299.7217712402344,
"logps/ref_rejected": -277.09234619140625,
"logps/rejected": -447.5023193359375,
"loss": 1.9871,
"margin_dpo/margin_mean": 62.74159240722656,
"margin_dpo/margin_std": 83.42963409423828,
"step": 262
},
{
"epoch": 0.5507853403141362,
"fcm_dpo/beta": 0.010899926535785198,
"fcm_dpo/delta": 0.04813588410615921,
"fcm_dpo/margin": 50.87437438964844,
"fcm_dpo/q_t": 0.3856518268585205,
"grad_norm": 35.797576904296875,
"learning_rate": 2.509153804294318e-07,
"logits/chosen": -0.8057024478912354,
"logits/rejected": -0.7925027012825012,
"logps/chosen": -395.77630615234375,
"logps/ref_chosen": -279.9526062011719,
"logps/ref_rejected": -256.5344543457031,
"logps/rejected": -423.23248291015625,
"loss": 2.2284,
"margin_dpo/margin_mean": 50.87437438964844,
"margin_dpo/margin_std": 88.68104553222656,
"step": 263
},
{
"epoch": 0.5528795811518324,
"fcm_dpo/beta": 0.01083714421838522,
"fcm_dpo/delta": -0.0577649362385273,
"fcm_dpo/margin": 60.3458137512207,
"fcm_dpo/q_t": 0.36682409048080444,
"grad_norm": 42.66000747680664,
"learning_rate": 2.4908461957056825e-07,
"logits/chosen": -0.8125603199005127,
"logits/rejected": -0.8080126643180847,
"logps/chosen": -371.6108093261719,
"logps/ref_chosen": -260.53509521484375,
"logps/ref_rejected": -255.5300750732422,
"logps/rejected": -426.95159912109375,
"loss": 2.0269,
"margin_dpo/margin_mean": 60.34580993652344,
"margin_dpo/margin_std": 84.83659362792969,
"step": 264
},
{
"epoch": 0.5549738219895288,
"fcm_dpo/beta": 0.01070459559559822,
"fcm_dpo/delta": -0.12306361645460129,
"fcm_dpo/margin": 66.72086334228516,
"fcm_dpo/q_t": 0.3582300543785095,
"grad_norm": 42.377262115478516,
"learning_rate": 2.4725390780077905e-07,
"logits/chosen": -0.8775874376296997,
"logits/rejected": -0.882483959197998,
"logps/chosen": -394.9687805175781,
"logps/ref_chosen": -283.7130432128906,
"logps/ref_rejected": -270.32867431640625,
"logps/rejected": -448.3052673339844,
"loss": 2.0248,
"margin_dpo/margin_mean": 66.72086334228516,
"margin_dpo/margin_std": 93.51565551757812,
"step": 265
},
{
"epoch": 0.5570680628272251,
"fcm_dpo/beta": 0.01059802994132042,
"fcm_dpo/delta": -0.100050188601017,
"fcm_dpo/margin": 65.39716339111328,
"fcm_dpo/q_t": 0.355808824300766,
"grad_norm": 45.007423400878906,
"learning_rate": 2.454233432955807e-07,
"logits/chosen": -0.871313214302063,
"logits/rejected": -0.8497393131256104,
"logps/chosen": -377.9949645996094,
"logps/ref_chosen": -278.09930419921875,
"logps/ref_rejected": -260.67034912109375,
"logps/rejected": -425.9631652832031,
"loss": 1.928,
"margin_dpo/margin_mean": 65.39716339111328,
"margin_dpo/margin_std": 80.33833312988281,
"step": 266
},
{
"epoch": 0.5591623036649215,
"fcm_dpo/beta": 0.010643371380865574,
"fcm_dpo/delta": 0.04269120469689369,
"fcm_dpo/margin": 52.58607482910156,
"fcm_dpo/q_t": 0.3854196071624756,
"grad_norm": 43.52296829223633,
"learning_rate": 2.435930242225919e-07,
"logits/chosen": -0.8321479558944702,
"logits/rejected": -0.8424822092056274,
"logps/chosen": -396.172607421875,
"logps/ref_chosen": -280.33319091796875,
"logps/ref_rejected": -247.7987518310547,
"logps/rejected": -416.2242736816406,
"loss": 2.1671,
"margin_dpo/margin_mean": 52.5860710144043,
"margin_dpo/margin_std": 85.20578002929688,
"step": 267
},
{
"epoch": 0.5612565445026177,
"fcm_dpo/beta": 0.010542841628193855,
"fcm_dpo/delta": -0.09490174800157547,
"fcm_dpo/margin": 65.28962707519531,
"fcm_dpo/q_t": 0.36029142141342163,
"grad_norm": 46.59050750732422,
"learning_rate": 2.4176304873626984e-07,
"logits/chosen": -0.7996433973312378,
"logits/rejected": -0.7823675870895386,
"logps/chosen": -407.8582458496094,
"logps/ref_chosen": -304.1787109375,
"logps/ref_rejected": -272.7938232421875,
"logps/rejected": -441.76300048828125,
"loss": 2.0012,
"margin_dpo/margin_mean": 65.28963470458984,
"margin_dpo/margin_std": 89.2424087524414,
"step": 268
},
{
"epoch": 0.5633507853403141,
"fcm_dpo/beta": 0.010634300298988819,
"fcm_dpo/delta": 0.08637518435716629,
"fcm_dpo/margin": 48.717872619628906,
"fcm_dpo/q_t": 0.39338934421539307,
"grad_norm": 57.86610412597656,
"learning_rate": 2.399335149726463e-07,
"logits/chosen": -0.8215547204017639,
"logits/rejected": -0.8171166181564331,
"logps/chosen": -353.824462890625,
"logps/ref_chosen": -249.84510803222656,
"logps/ref_rejected": -223.36734008789062,
"logps/rejected": -376.0645751953125,
"loss": 2.2902,
"margin_dpo/margin_mean": 48.71787643432617,
"margin_dpo/margin_std": 91.85305786132812,
"step": 269
},
{
"epoch": 0.5654450261780105,
"fcm_dpo/beta": 0.010605805553495884,
"fcm_dpo/delta": -0.02683107927441597,
"fcm_dpo/margin": 58.944271087646484,
"fcm_dpo/q_t": 0.376254141330719,
"grad_norm": 43.06734848022461,
"learning_rate": 2.381045210440644e-07,
"logits/chosen": -0.8905260562896729,
"logits/rejected": -0.9027525186538696,
"logps/chosen": -425.31298828125,
"logps/ref_chosen": -318.5623779296875,
"logps/ref_rejected": -281.18505859375,
"logps/rejected": -446.8799133300781,
"loss": 2.1871,
"margin_dpo/margin_mean": 58.944271087646484,
"margin_dpo/margin_std": 100.78343200683594,
"step": 270
},
{
"epoch": 0.5675392670157068,
"fcm_dpo/beta": 0.010648606345057487,
"fcm_dpo/delta": 0.04027487337589264,
"fcm_dpo/margin": 52.775352478027344,
"fcm_dpo/q_t": 0.38823169469833374,
"grad_norm": 47.92866134643555,
"learning_rate": 2.3627616503391812e-07,
"logits/chosen": -0.7817699313163757,
"logits/rejected": -0.7808968424797058,
"logps/chosen": -386.7995300292969,
"logps/ref_chosen": -284.10467529296875,
"logps/ref_rejected": -253.9595947265625,
"logps/rejected": -409.4297790527344,
"loss": 2.1892,
"margin_dpo/margin_mean": 52.775352478027344,
"margin_dpo/margin_std": 88.72415161132812,
"step": 271
},
{
"epoch": 0.5696335078534032,
"fcm_dpo/beta": 0.010608755983412266,
"fcm_dpo/delta": -0.03749370574951172,
"fcm_dpo/margin": 59.866397857666016,
"fcm_dpo/q_t": 0.373867928981781,
"grad_norm": 41.87883758544922,
"learning_rate": 2.344485449913914e-07,
"logits/chosen": -0.8470156788825989,
"logits/rejected": -0.8377933502197266,
"logps/chosen": -390.36419677734375,
"logps/ref_chosen": -297.3590087890625,
"logps/ref_rejected": -279.2024230957031,
"logps/rejected": -432.07403564453125,
"loss": 2.1662,
"margin_dpo/margin_mean": 59.86639404296875,
"margin_dpo/margin_std": 100.88554382324219,
"step": 272
},
{
"epoch": 0.5717277486910994,
"fcm_dpo/beta": 0.010550711303949356,
"fcm_dpo/delta": -0.05486389249563217,
"fcm_dpo/margin": 61.72862243652344,
"fcm_dpo/q_t": 0.3700960874557495,
"grad_norm": 45.351871490478516,
"learning_rate": 2.3262175892620062e-07,
"logits/chosen": -0.8470065593719482,
"logits/rejected": -0.8606555461883545,
"logps/chosen": -385.10101318359375,
"logps/ref_chosen": -293.20574951171875,
"logps/ref_rejected": -274.7646789550781,
"logps/rejected": -428.3885498046875,
"loss": 2.0994,
"margin_dpo/margin_mean": 61.7286262512207,
"margin_dpo/margin_std": 94.42216491699219,
"step": 273
},
{
"epoch": 0.5738219895287958,
"fcm_dpo/beta": 0.010357696563005447,
"fcm_dpo/delta": -0.18463444709777832,
"fcm_dpo/margin": 74.367919921875,
"fcm_dpo/q_t": 0.3416471779346466,
"grad_norm": 41.99620056152344,
"learning_rate": 2.3079590480333827e-07,
"logits/chosen": -0.7980269193649292,
"logits/rejected": -0.7709203958511353,
"logps/chosen": -361.46337890625,
"logps/ref_chosen": -270.5586853027344,
"logps/ref_rejected": -239.46800231933594,
"logps/rejected": -404.7406005859375,
"loss": 1.8727,
"margin_dpo/margin_mean": 74.367919921875,
"margin_dpo/margin_std": 89.1718978881836,
"step": 274
},
{
"epoch": 0.5759162303664922,
"fcm_dpo/beta": 0.01020055916160345,
"fcm_dpo/delta": -0.15287356078624725,
"fcm_dpo/margin": 72.6873550415039,
"fcm_dpo/q_t": 0.3521430790424347,
"grad_norm": 36.203521728515625,
"learning_rate": 2.2897108053782e-07,
"logits/chosen": -0.8515808582305908,
"logits/rejected": -0.8344404697418213,
"logps/chosen": -335.6006774902344,
"logps/ref_chosen": -250.31922912597656,
"logps/ref_rejected": -249.31613159179688,
"logps/rejected": -407.2849426269531,
"loss": 1.9149,
"margin_dpo/margin_mean": 72.68734741210938,
"margin_dpo/margin_std": 92.69225311279297,
"step": 275
},
{
"epoch": 0.5780104712041885,
"fcm_dpo/beta": 0.010121261700987816,
"fcm_dpo/delta": -0.07804254442453384,
"fcm_dpo/margin": 66.47111511230469,
"fcm_dpo/q_t": 0.36096659302711487,
"grad_norm": 37.32109832763672,
"learning_rate": 2.2714738398943308e-07,
"logits/chosen": -0.8925960063934326,
"logits/rejected": -0.8720380067825317,
"logps/chosen": -387.9191589355469,
"logps/ref_chosen": -297.63104248046875,
"logps/ref_rejected": -295.223876953125,
"logps/rejected": -451.98309326171875,
"loss": 2.0214,
"margin_dpo/margin_mean": 66.47111511230469,
"margin_dpo/margin_std": 92.85818481445312,
"step": 276
},
{
"epoch": 0.5801047120418849,
"fcm_dpo/beta": 0.010191617533564568,
"fcm_dpo/delta": 0.06927236169576645,
"fcm_dpo/margin": 52.43690490722656,
"fcm_dpo/q_t": 0.39031559228897095,
"grad_norm": 50.279022216796875,
"learning_rate": 2.2532491295748865e-07,
"logits/chosen": -0.8411717414855957,
"logits/rejected": -0.8436754941940308,
"logps/chosen": -367.122802734375,
"logps/ref_chosen": -266.3604736328125,
"logps/ref_rejected": -253.375,
"logps/rejected": -406.57421875,
"loss": 2.2409,
"margin_dpo/margin_mean": 52.4369010925293,
"margin_dpo/margin_std": 93.75244140625,
"step": 277
},
{
"epoch": 0.5821989528795811,
"fcm_dpo/beta": 0.010191617533564568,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 43.56483459472656,
"fcm_dpo/q_t": 0.4187608063220978,
"grad_norm": 46.038841247558594,
"learning_rate": 2.2350376517557726e-07,
"logits/chosen": -0.8760834336280823,
"logits/rejected": -0.8470039367675781,
"logps/chosen": -379.5211486816406,
"logps/ref_chosen": -267.40728759765625,
"logps/ref_rejected": -229.5784912109375,
"logps/rejected": -385.2572021484375,
"loss": 2.4806,
"margin_dpo/margin_mean": 43.56483459472656,
"margin_dpo/margin_std": 104.89246368408203,
"step": 278
},
{
"epoch": 0.5842931937172775,
"fcm_dpo/beta": 0.010066019371151924,
"fcm_dpo/delta": -0.12400264292955399,
"fcm_dpo/margin": 71.03903198242188,
"fcm_dpo/q_t": 0.3566210865974426,
"grad_norm": 53.25889205932617,
"learning_rate": 2.2168403830632769e-07,
"logits/chosen": -0.7910211086273193,
"logits/rejected": -0.7792261838912964,
"logps/chosen": -412.38592529296875,
"logps/ref_chosen": -313.3677978515625,
"logps/ref_rejected": -299.1744384765625,
"logps/rejected": -469.23162841796875,
"loss": 2.008,
"margin_dpo/margin_mean": 71.03903198242188,
"margin_dpo/margin_std": 101.22288513183594,
"step": 279
},
{
"epoch": 0.5863874345549738,
"fcm_dpo/beta": 0.01006636954843998,
"fcm_dpo/delta": 0.0003481449093669653,
"fcm_dpo/margin": 59.571895599365234,
"fcm_dpo/q_t": 0.3777249753475189,
"grad_norm": 35.59181213378906,
"learning_rate": 2.1986582993616925e-07,
"logits/chosen": -0.861056923866272,
"logits/rejected": -0.8725382089614868,
"logps/chosen": -358.2587890625,
"logps/ref_chosen": -265.5558166503906,
"logps/ref_rejected": -247.16551208496094,
"logps/rejected": -399.44036865234375,
"loss": 2.1386,
"margin_dpo/margin_mean": 59.5718994140625,
"margin_dpo/margin_std": 94.98712158203125,
"step": 280
},
{
"epoch": 0.5884816753926702,
"fcm_dpo/beta": 0.010039416141808033,
"fcm_dpo/delta": -0.026811985298991203,
"fcm_dpo/margin": 62.26792907714844,
"fcm_dpo/q_t": 0.3763653337955475,
"grad_norm": 43.584205627441406,
"learning_rate": 2.1804923757009882e-07,
"logits/chosen": -0.8334259390830994,
"logits/rejected": -0.8437093496322632,
"logps/chosen": -407.51873779296875,
"logps/ref_chosen": -295.2995910644531,
"logps/ref_rejected": -293.810791015625,
"logps/rejected": -468.2978515625,
"loss": 2.1062,
"margin_dpo/margin_mean": 62.26792907714844,
"margin_dpo/margin_std": 97.07644653320312,
"step": 281
},
{
"epoch": 0.5905759162303665,
"fcm_dpo/beta": 0.010006159543991089,
"fcm_dpo/delta": -0.033181145787239075,
"fcm_dpo/margin": 63.06951904296875,
"fcm_dpo/q_t": 0.37208595871925354,
"grad_norm": 42.99271011352539,
"learning_rate": 2.1623435862645205e-07,
"logits/chosen": -0.8276978731155396,
"logits/rejected": -0.830435037612915,
"logps/chosen": -419.7965087890625,
"logps/ref_chosen": -318.63714599609375,
"logps/ref_rejected": -273.5940246582031,
"logps/rejected": -437.8229064941406,
"loss": 2.1001,
"margin_dpo/margin_mean": 63.069522857666016,
"margin_dpo/margin_std": 95.92523193359375,
"step": 282
},
{
"epoch": 0.5926701570680628,
"fcm_dpo/beta": 0.010040544904768467,
"fcm_dpo/delta": 0.034305017441511154,
"fcm_dpo/margin": 56.53467559814453,
"fcm_dpo/q_t": 0.3877224028110504,
"grad_norm": 39.61931610107422,
"learning_rate": 2.1442129043167873e-07,
"logits/chosen": -0.8462857604026794,
"logits/rejected": -0.8432782292366028,
"logps/chosen": -360.7950744628906,
"logps/ref_chosen": -254.66055297851562,
"logps/ref_rejected": -236.87747192382812,
"logps/rejected": -399.54669189453125,
"loss": 2.1553,
"margin_dpo/margin_mean": 56.53468322753906,
"margin_dpo/margin_std": 92.14421081542969,
"step": 283
},
{
"epoch": 0.5947643979057592,
"fcm_dpo/beta": 0.010015284642577171,
"fcm_dpo/delta": -0.025189727544784546,
"fcm_dpo/margin": 62.26651382446289,
"fcm_dpo/q_t": 0.37723034620285034,
"grad_norm": 58.63220977783203,
"learning_rate": 2.1261013021512378e-07,
"logits/chosen": -0.8283619284629822,
"logits/rejected": -0.8163946866989136,
"logps/chosen": -385.39642333984375,
"logps/ref_chosen": -273.355224609375,
"logps/ref_rejected": -259.8509521484375,
"logps/rejected": -434.1585998535156,
"loss": 2.1961,
"margin_dpo/margin_mean": 62.26651382446289,
"margin_dpo/margin_std": 106.06271362304688,
"step": 284
},
{
"epoch": 0.5968586387434555,
"fcm_dpo/beta": 0.010015284642577171,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 45.020751953125,
"fcm_dpo/q_t": 0.4107897877693176,
"grad_norm": 54.859474182128906,
"learning_rate": 2.1080097510381294e-07,
"logits/chosen": -0.8187122344970703,
"logits/rejected": -0.8182280659675598,
"logps/chosen": -425.72027587890625,
"logps/ref_chosen": -309.80224609375,
"logps/ref_rejected": -279.11846923828125,
"logps/rejected": -440.0572204589844,
"loss": 2.3866,
"margin_dpo/margin_mean": 45.020751953125,
"margin_dpo/margin_std": 94.1063232421875,
"step": 285
},
{
"epoch": 0.5989528795811518,
"fcm_dpo/beta": 0.010027091950178146,
"fcm_dpo/delta": 0.011782662011682987,
"fcm_dpo/margin": 58.731964111328125,
"fcm_dpo/q_t": 0.38405752182006836,
"grad_norm": 52.8409538269043,
"learning_rate": 2.089939221172446e-07,
"logits/chosen": -0.8124702572822571,
"logits/rejected": -0.8048035502433777,
"logps/chosen": -380.0823669433594,
"logps/ref_chosen": -271.4655456542969,
"logps/ref_rejected": -279.5379333496094,
"logps/rejected": -446.88671875,
"loss": 2.1607,
"margin_dpo/margin_mean": 58.731964111328125,
"margin_dpo/margin_std": 98.83124542236328,
"step": 286
},
{
"epoch": 0.6010471204188481,
"fcm_dpo/beta": 0.01003226824104786,
"fcm_dpo/delta": 0.005160625092685223,
"fcm_dpo/margin": 59.323219299316406,
"fcm_dpo/q_t": 0.3794439435005188,
"grad_norm": 44.22550582885742,
"learning_rate": 2.0718906816218595e-07,
"logits/chosen": -0.833111584186554,
"logits/rejected": -0.823143720626831,
"logps/chosen": -387.0350646972656,
"logps/ref_chosen": -277.09326171875,
"logps/ref_rejected": -233.55599975585938,
"logps/rejected": -402.8210144042969,
"loss": 2.1876,
"margin_dpo/margin_mean": 59.323219299316406,
"margin_dpo/margin_std": 97.91876220703125,
"step": 287
},
{
"epoch": 0.6031413612565445,
"fcm_dpo/beta": 0.010074135847389698,
"fcm_dpo/delta": 0.041645895689725876,
"fcm_dpo/margin": 55.655818939208984,
"fcm_dpo/q_t": 0.3857857584953308,
"grad_norm": 42.96995544433594,
"learning_rate": 2.053865100274774e-07,
"logits/chosen": -0.8498709201812744,
"logits/rejected": -0.8570349216461182,
"logps/chosen": -399.763427734375,
"logps/ref_chosen": -293.168212890625,
"logps/ref_rejected": -263.40594482421875,
"logps/rejected": -425.656982421875,
"loss": 2.1887,
"margin_dpo/margin_mean": 55.655818939208984,
"margin_dpo/margin_std": 92.36091613769531,
"step": 288
},
{
"epoch": 0.6052356020942409,
"fcm_dpo/beta": 0.010074135847389698,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 37.436737060546875,
"fcm_dpo/q_t": 0.4230777621269226,
"grad_norm": 45.797325134277344,
"learning_rate": 2.035863443788411e-07,
"logits/chosen": -0.8302249312400818,
"logits/rejected": -0.818213939666748,
"logps/chosen": -451.8826599121094,
"logps/ref_chosen": -329.9573974609375,
"logps/ref_rejected": -276.7565002441406,
"logps/rejected": -436.1184997558594,
"loss": 2.4789,
"margin_dpo/margin_mean": 37.436737060546875,
"margin_dpo/margin_std": 92.28260803222656,
"step": 289
},
{
"epoch": 0.6073298429319371,
"fcm_dpo/beta": 0.010126025415956974,
"fcm_dpo/delta": 0.05137556046247482,
"fcm_dpo/margin": 54.458709716796875,
"fcm_dpo/q_t": 0.3912982940673828,
"grad_norm": 56.4118766784668,
"learning_rate": 2.0178866775369774e-07,
"logits/chosen": -0.8381444215774536,
"logits/rejected": -0.7853888273239136,
"logps/chosen": -434.4165954589844,
"logps/ref_chosen": -324.6690368652344,
"logps/ref_rejected": -311.8419494628906,
"logps/rejected": -476.04815673828125,
"loss": 2.2458,
"margin_dpo/margin_mean": 54.458709716796875,
"margin_dpo/margin_std": 97.96626281738281,
"step": 290
},
{
"epoch": 0.6094240837696335,
"fcm_dpo/beta": 0.010019439272582531,
"fcm_dpo/delta": -0.10581731796264648,
"fcm_dpo/margin": 69.70329284667969,
"fcm_dpo/q_t": 0.35912033915519714,
"grad_norm": 39.026832580566406,
"learning_rate": 1.9999357655598891e-07,
"logits/chosen": -0.8280945420265198,
"logits/rejected": -0.8222076296806335,
"logps/chosen": -375.1222839355469,
"logps/ref_chosen": -274.1440734863281,
"logps/ref_rejected": -278.07208251953125,
"logps/rejected": -448.7535400390625,
"loss": 1.9838,
"margin_dpo/margin_mean": 69.70329284667969,
"margin_dpo/margin_std": 93.61442565917969,
"step": 291
},
{
"epoch": 0.6115183246073298,
"fcm_dpo/beta": 0.010129205882549286,
"fcm_dpo/delta": 0.10895773023366928,
"fcm_dpo/margin": 49.00895690917969,
"fcm_dpo/q_t": 0.3987590968608856,
"grad_norm": 45.311134338378906,
"learning_rate": 1.9820116705100775e-07,
"logits/chosen": -0.8249736428260803,
"logits/rejected": -0.821456789970398,
"logps/chosen": -358.0533142089844,
"logps/ref_chosen": -259.3636779785156,
"logps/ref_rejected": -279.31072998046875,
"logps/rejected": -427.00933837890625,
"loss": 2.2903,
"margin_dpo/margin_mean": 49.00895690917969,
"margin_dpo/margin_std": 94.14812469482422,
"step": 292
},
{
"epoch": 0.6136125654450262,
"fcm_dpo/beta": 0.010073216632008553,
"fcm_dpo/delta": -0.05542852357029915,
"fcm_dpo/margin": 64.70680236816406,
"fcm_dpo/q_t": 0.36682164669036865,
"grad_norm": 49.751827239990234,
"learning_rate": 1.9641153536023642e-07,
"logits/chosen": -0.9084374904632568,
"logits/rejected": -0.8743972778320312,
"logps/chosen": -408.55804443359375,
"logps/ref_chosen": -303.7708435058594,
"logps/ref_rejected": -270.08209228515625,
"logps/rejected": -439.57611083984375,
"loss": 1.9975,
"margin_dpo/margin_mean": 64.70680236816406,
"margin_dpo/margin_std": 86.56443786621094,
"step": 293
},
{
"epoch": 0.6157068062827226,
"fcm_dpo/beta": 0.01007704995572567,
"fcm_dpo/delta": 0.003804786130785942,
"fcm_dpo/margin": 59.186180114746094,
"fcm_dpo/q_t": 0.3824058771133423,
"grad_norm": 44.1444206237793,
"learning_rate": 1.9462477745619106e-07,
"logits/chosen": -0.8228561878204346,
"logits/rejected": -0.8309228420257568,
"logps/chosen": -335.5594482421875,
"logps/ref_chosen": -240.23831176757812,
"logps/ref_rejected": -229.18954467773438,
"logps/rejected": -383.6968994140625,
"loss": 2.1318,
"margin_dpo/margin_mean": 59.186180114746094,
"margin_dpo/margin_std": 94.04498291015625,
"step": 294
},
{
"epoch": 0.6178010471204188,
"fcm_dpo/beta": 0.010150096379220486,
"fcm_dpo/delta": 0.07222657650709152,
"fcm_dpo/margin": 52.373802185058594,
"fcm_dpo/q_t": 0.39122486114501953,
"grad_norm": 40.20429992675781,
"learning_rate": 1.928409891572757e-07,
"logits/chosen": -0.8109441995620728,
"logits/rejected": -0.8221766948699951,
"logps/chosen": -349.451416015625,
"logps/ref_chosen": -251.00970458984375,
"logps/ref_rejected": -244.15142822265625,
"logps/rejected": -394.96697998046875,
"loss": 2.1934,
"margin_dpo/margin_mean": 52.37379837036133,
"margin_dpo/margin_std": 87.55313873291016,
"step": 295
},
{
"epoch": 0.6198952879581152,
"fcm_dpo/beta": 0.009992404840886593,
"fcm_dpo/delta": -0.15657944977283478,
"fcm_dpo/margin": 74.53913879394531,
"fcm_dpo/q_t": 0.34834346175193787,
"grad_norm": 44.559654235839844,
"learning_rate": 1.9106026612264315e-07,
"logits/chosen": -0.797303318977356,
"logits/rejected": -0.7727010846138,
"logps/chosen": -392.84326171875,
"logps/ref_chosen": -293.880615234375,
"logps/ref_rejected": -283.41461181640625,
"logps/rejected": -456.9163513183594,
"loss": 1.9588,
"margin_dpo/margin_mean": 74.53914642333984,
"margin_dpo/margin_std": 98.64404296875,
"step": 296
},
{
"epoch": 0.6219895287958115,
"fcm_dpo/beta": 0.010080668143928051,
"fcm_dpo/delta": 0.08794273436069489,
"fcm_dpo/margin": 51.24464797973633,
"fcm_dpo/q_t": 0.3935660421848297,
"grad_norm": 42.070838928222656,
"learning_rate": 1.8928270384706582e-07,
"logits/chosen": -0.8731030225753784,
"logits/rejected": -0.868933379650116,
"logps/chosen": -389.524658203125,
"logps/ref_chosen": -289.4600830078125,
"logps/ref_rejected": -283.6915283203125,
"logps/rejected": -435.0007629394531,
"loss": 2.2084,
"margin_dpo/margin_mean": 51.24465560913086,
"margin_dpo/margin_std": 84.09497833251953,
"step": 297
},
{
"epoch": 0.6240837696335079,
"fcm_dpo/beta": 0.010046989656984806,
"fcm_dpo/delta": -0.0334644578397274,
"fcm_dpo/margin": 62.83953094482422,
"fcm_dpo/q_t": 0.37526988983154297,
"grad_norm": 50.428749084472656,
"learning_rate": 1.875083976558136e-07,
"logits/chosen": -0.813686728477478,
"logits/rejected": -0.8053783178329468,
"logps/chosen": -396.1636657714844,
"logps/ref_chosen": -306.5150146484375,
"logps/ref_rejected": -280.6922912597656,
"logps/rejected": -433.18048095703125,
"loss": 2.1366,
"margin_dpo/margin_mean": 62.83953094482422,
"margin_dpo/margin_std": 98.94940948486328,
"step": 298
},
{
"epoch": 0.6261780104712041,
"fcm_dpo/beta": 0.010100976563990116,
"fcm_dpo/delta": 0.05359088256955147,
"fcm_dpo/margin": 54.38535690307617,
"fcm_dpo/q_t": 0.3850356936454773,
"grad_norm": 43.860626220703125,
"learning_rate": 1.8573744269954297e-07,
"logits/chosen": -0.801188588142395,
"logits/rejected": -0.7936796545982361,
"logps/chosen": -387.90338134765625,
"logps/ref_chosen": -281.3638000488281,
"logps/ref_rejected": -270.3995361328125,
"logps/rejected": -431.3244934082031,
"loss": 2.1451,
"margin_dpo/margin_mean": 54.38536071777344,
"margin_dpo/margin_std": 83.9508056640625,
"step": 299
},
{
"epoch": 0.6282722513089005,
"fcm_dpo/beta": 0.01017346978187561,
"fcm_dpo/delta": 0.07151259481906891,
"fcm_dpo/margin": 52.32042694091797,
"fcm_dpo/q_t": 0.39060622453689575,
"grad_norm": 47.70722579956055,
"learning_rate": 1.839699339491937e-07,
"logits/chosen": -0.8247004747390747,
"logits/rejected": -0.8056859970092773,
"logps/chosen": -423.5160827636719,
"logps/ref_chosen": -314.83575439453125,
"logps/ref_rejected": -269.10955810546875,
"logps/rejected": -430.1102600097656,
"loss": 2.2218,
"margin_dpo/margin_mean": 52.3204231262207,
"margin_dpo/margin_std": 90.9654312133789,
"step": 300
},
{
"epoch": 0.6303664921465969,
"fcm_dpo/beta": 0.010228170081973076,
"fcm_dpo/delta": 0.053623542189598083,
"fcm_dpo/margin": 53.70600891113281,
"fcm_dpo/q_t": 0.3890165090560913,
"grad_norm": 44.373165130615234,
"learning_rate": 1.8220596619089573e-07,
"logits/chosen": -0.8386092782020569,
"logits/rejected": -0.8467391133308411,
"logps/chosen": -381.4074401855469,
"logps/ref_chosen": -279.89453125,
"logps/ref_rejected": -271.67254638671875,
"logps/rejected": -426.8914794921875,
"loss": 2.1951,
"margin_dpo/margin_mean": 53.70600891113281,
"margin_dpo/margin_std": 90.34825134277344,
"step": 301
},
{
"epoch": 0.6324607329842932,
"fcm_dpo/beta": 0.010183500126004219,
"fcm_dpo/delta": -0.0437694787979126,
"fcm_dpo/margin": 62.940826416015625,
"fcm_dpo/q_t": 0.3705183267593384,
"grad_norm": 54.901649475097656,
"learning_rate": 1.8044563402088682e-07,
"logits/chosen": -0.8182817101478577,
"logits/rejected": -0.8039256930351257,
"logps/chosen": -368.46209716796875,
"logps/ref_chosen": -271.3318176269531,
"logps/ref_rejected": -256.5587158203125,
"logps/rejected": -416.62982177734375,
"loss": 2.0709,
"margin_dpo/margin_mean": 62.940826416015625,
"margin_dpo/margin_std": 92.8311767578125,
"step": 302
},
{
"epoch": 0.6345549738219896,
"fcm_dpo/beta": 0.010186291299760342,
"fcm_dpo/delta": 0.0027407321613281965,
"fcm_dpo/margin": 58.64970397949219,
"fcm_dpo/q_t": 0.3783823847770691,
"grad_norm": 46.2238655090332,
"learning_rate": 1.7868903184043885e-07,
"logits/chosen": -0.798983097076416,
"logits/rejected": -0.7864460945129395,
"logps/chosen": -408.74359130859375,
"logps/ref_chosen": -304.8810119628906,
"logps/ref_rejected": -269.0672302246094,
"logps/rejected": -431.57952880859375,
"loss": 2.1497,
"margin_dpo/margin_mean": 58.64970397949219,
"margin_dpo/margin_std": 92.76274871826172,
"step": 303
},
{
"epoch": 0.6366492146596858,
"fcm_dpo/beta": 0.010141594335436821,
"fcm_dpo/delta": -0.04397614300251007,
"fcm_dpo/margin": 63.21988296508789,
"fcm_dpo/q_t": 0.3736143708229065,
"grad_norm": 55.40259552001953,
"learning_rate": 1.7693625385079574e-07,
"logits/chosen": -0.8265484571456909,
"logits/rejected": -0.8342360258102417,
"logps/chosen": -404.1358642578125,
"logps/ref_chosen": -290.7109680175781,
"logps/ref_rejected": -237.6811981201172,
"logps/rejected": -414.32598876953125,
"loss": 2.0782,
"margin_dpo/margin_mean": 63.219886779785156,
"margin_dpo/margin_std": 96.28173828125,
"step": 304
},
{
"epoch": 0.6387434554973822,
"fcm_dpo/beta": 0.00984902959316969,
"fcm_dpo/delta": -0.29272305965423584,
"fcm_dpo/margin": 88.02590942382812,
"fcm_dpo/q_t": 0.32526132464408875,
"grad_norm": 51.372337341308594,
"learning_rate": 1.7518739404812155e-07,
"logits/chosen": -0.845760703086853,
"logits/rejected": -0.8201291561126709,
"logps/chosen": -351.881591796875,
"logps/ref_chosen": -256.4839782714844,
"logps/ref_rejected": -266.4090576171875,
"logps/rejected": -449.83258056640625,
"loss": 1.7632,
"margin_dpo/margin_mean": 88.02590942382812,
"margin_dpo/margin_std": 99.01487731933594,
"step": 305
},
{
"epoch": 0.6408376963350786,
"fcm_dpo/beta": 0.009877652861177921,
"fcm_dpo/delta": 0.029019657522439957,
"fcm_dpo/margin": 57.97325897216797,
"fcm_dpo/q_t": 0.38242125511169434,
"grad_norm": 45.8656005859375,
"learning_rate": 1.7344254621846017e-07,
"logits/chosen": -0.8495131134986877,
"logits/rejected": -0.8392184972763062,
"logps/chosen": -421.48919677734375,
"logps/ref_chosen": -320.64923095703125,
"logps/ref_rejected": -273.366455078125,
"logps/rejected": -432.1796875,
"loss": 2.1187,
"margin_dpo/margin_mean": 57.97325897216797,
"margin_dpo/margin_std": 86.58419036865234,
"step": 306
},
{
"epoch": 0.6429319371727749,
"fcm_dpo/beta": 0.009858179837465286,
"fcm_dpo/delta": -0.019733604043722153,
"fcm_dpo/margin": 62.7409782409668,
"fcm_dpo/q_t": 0.37120628356933594,
"grad_norm": 66.25527954101562,
"learning_rate": 1.717018039327053e-07,
"logits/chosen": -0.7906115055084229,
"logits/rejected": -0.8188501000404358,
"logps/chosen": -403.6545715332031,
"logps/ref_chosen": -279.45416259765625,
"logps/ref_rejected": -240.3816680908203,
"logps/rejected": -427.3230285644531,
"loss": 2.0681,
"margin_dpo/margin_mean": 62.74097442626953,
"margin_dpo/margin_std": 86.63983917236328,
"step": 307
},
{
"epoch": 0.6450261780104712,
"fcm_dpo/beta": 0.009858179837465286,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 46.39971923828125,
"fcm_dpo/q_t": 0.40446293354034424,
"grad_norm": 50.953643798828125,
"learning_rate": 1.699652605415828e-07,
"logits/chosen": -0.8273122906684875,
"logits/rejected": -0.8418517708778381,
"logps/chosen": -426.36541748046875,
"logps/ref_chosen": -296.598388671875,
"logps/ref_rejected": -258.6962890625,
"logps/rejected": -434.863037109375,
"loss": 2.3307,
"margin_dpo/margin_mean": 46.399723052978516,
"margin_dpo/margin_std": 92.61000061035156,
"step": 308
},
{
"epoch": 0.6471204188481675,
"fcm_dpo/beta": 0.009756959974765778,
"fcm_dpo/delta": -0.10320662707090378,
"fcm_dpo/margin": 71.3322982788086,
"fcm_dpo/q_t": 0.3589407205581665,
"grad_norm": 50.02326583862305,
"learning_rate": 1.6823300917064458e-07,
"logits/chosen": -0.8325700163841248,
"logits/rejected": -0.8428257703781128,
"logps/chosen": -401.8047790527344,
"logps/ref_chosen": -281.38812255859375,
"logps/ref_rejected": -262.4524230957031,
"logps/rejected": -454.20135498046875,
"loss": 1.9826,
"margin_dpo/margin_mean": 71.3322982788086,
"margin_dpo/margin_std": 97.09811401367188,
"step": 309
},
{
"epoch": 0.6492146596858639,
"fcm_dpo/beta": 0.009693772532045841,
"fcm_dpo/delta": -0.06497249007225037,
"fcm_dpo/margin": 68.15365600585938,
"fcm_dpo/q_t": 0.3655131459236145,
"grad_norm": 38.82382583618164,
"learning_rate": 1.6650514271527465e-07,
"logits/chosen": -0.8251418471336365,
"logits/rejected": -0.8055839538574219,
"logps/chosen": -397.37835693359375,
"logps/ref_chosen": -279.187255859375,
"logps/ref_rejected": -261.82562255859375,
"logps/rejected": -448.17041015625,
"loss": 2.0208,
"margin_dpo/margin_mean": 68.15365600585938,
"margin_dpo/margin_std": 93.89353942871094,
"step": 310
},
{
"epoch": 0.6513089005235602,
"fcm_dpo/beta": 0.009623035788536072,
"fcm_dpo/delta": -0.07323934882879257,
"fcm_dpo/margin": 69.4507064819336,
"fcm_dpo/q_t": 0.36212098598480225,
"grad_norm": 47.919578552246094,
"learning_rate": 1.647817538357072e-07,
"logits/chosen": -0.8307650089263916,
"logits/rejected": -0.8133838176727295,
"logps/chosen": -391.8092346191406,
"logps/ref_chosen": -271.39813232421875,
"logps/ref_rejected": -266.11187744140625,
"logps/rejected": -455.9736633300781,
"loss": 2.0467,
"margin_dpo/margin_mean": 69.4507064819336,
"margin_dpo/margin_std": 100.18804931640625,
"step": 311
},
{
"epoch": 0.6534031413612565,
"fcm_dpo/beta": 0.009639092721045017,
"fcm_dpo/delta": 0.01667206361889839,
"fcm_dpo/margin": 60.61787033081055,
"fcm_dpo/q_t": 0.3860943913459778,
"grad_norm": 52.11962127685547,
"learning_rate": 1.6306293495205755e-07,
"logits/chosen": -0.8416957855224609,
"logits/rejected": -0.8253231048583984,
"logps/chosen": -403.291259765625,
"logps/ref_chosen": -282.385009765625,
"logps/ref_rejected": -246.35379028320312,
"logps/rejected": -427.87786865234375,
"loss": 2.2649,
"margin_dpo/margin_mean": 60.61786651611328,
"margin_dpo/margin_std": 110.7199478149414,
"step": 312
},
{
"epoch": 0.6554973821989529,
"fcm_dpo/beta": 0.00965213030576706,
"fcm_dpo/delta": 0.01351652480661869,
"fcm_dpo/margin": 60.844261169433594,
"fcm_dpo/q_t": 0.38465067744255066,
"grad_norm": 41.01002502441406,
"learning_rate": 1.6134877823936607e-07,
"logits/chosen": -0.8802863359451294,
"logits/rejected": -0.8746137022972107,
"logps/chosen": -426.55517578125,
"logps/ref_chosen": -303.6308288574219,
"logps/ref_rejected": -273.111328125,
"logps/rejected": -456.87994384765625,
"loss": 2.21,
"margin_dpo/margin_mean": 60.844261169433594,
"margin_dpo/margin_std": 105.22071838378906,
"step": 313
},
{
"epoch": 0.6575916230366492,
"fcm_dpo/beta": 0.00963983591645956,
"fcm_dpo/delta": -0.012745541520416737,
"fcm_dpo/margin": 63.482933044433594,
"fcm_dpo/q_t": 0.37370842695236206,
"grad_norm": 44.937538146972656,
"learning_rate": 1.5963937562265522e-07,
"logits/chosen": -0.8963859677314758,
"logits/rejected": -0.8823469877243042,
"logps/chosen": -416.4776916503906,
"logps/ref_chosen": -302.3042907714844,
"logps/ref_rejected": -273.62567138671875,
"logps/rejected": -451.281982421875,
"loss": 2.0809,
"margin_dpo/margin_mean": 63.48293685913086,
"margin_dpo/margin_std": 93.04552459716797,
"step": 314
},
{
"epoch": 0.6596858638743456,
"fcm_dpo/beta": 0.009537220001220703,
"fcm_dpo/delta": -0.10702024400234222,
"fcm_dpo/margin": 73.34359741210938,
"fcm_dpo/q_t": 0.3581216335296631,
"grad_norm": 42.89246368408203,
"learning_rate": 1.5793481877199943e-07,
"logits/chosen": -0.8685057163238525,
"logits/rejected": -0.8564040064811707,
"logps/chosen": -414.5675354003906,
"logps/ref_chosen": -302.729248046875,
"logps/ref_rejected": -270.2690734863281,
"logps/rejected": -455.4509582519531,
"loss": 2.0008,
"margin_dpo/margin_mean": 73.34359741210938,
"margin_dpo/margin_std": 100.36629486083984,
"step": 315
},
{
"epoch": 0.6617801047120419,
"fcm_dpo/beta": 0.009476941078901291,
"fcm_dpo/delta": -0.06340419501066208,
"fcm_dpo/margin": 69.55949401855469,
"fcm_dpo/q_t": 0.36943817138671875,
"grad_norm": 42.75691604614258,
"learning_rate": 1.562351990976095e-07,
"logits/chosen": -0.8774769306182861,
"logits/rejected": -0.8684459328651428,
"logps/chosen": -427.0941162109375,
"logps/ref_chosen": -310.5706481933594,
"logps/ref_rejected": -272.92718505859375,
"logps/rejected": -459.0101318359375,
"loss": 2.0639,
"margin_dpo/margin_mean": 69.55949401855469,
"margin_dpo/margin_std": 102.9499740600586,
"step": 316
},
{
"epoch": 0.6638743455497382,
"fcm_dpo/beta": 0.00944075733423233,
"fcm_dpo/delta": -0.038253530859947205,
"fcm_dpo/margin": 67.34805297851562,
"fcm_dpo/q_t": 0.36406368017196655,
"grad_norm": 37.80487823486328,
"learning_rate": 1.5454060774493065e-07,
"logits/chosen": -0.871228039264679,
"logits/rejected": -0.8480809926986694,
"logps/chosen": -353.8040771484375,
"logps/ref_chosen": -253.90037536621094,
"logps/ref_rejected": -218.73301696777344,
"logps/rejected": -385.9847412109375,
"loss": 1.9695,
"margin_dpo/margin_mean": 67.34805297851562,
"margin_dpo/margin_std": 81.45759582519531,
"step": 317
},
{
"epoch": 0.6659685863874345,
"fcm_dpo/beta": 0.009361113421618938,
"fcm_dpo/delta": -0.08471996337175369,
"fcm_dpo/margin": 72.528076171875,
"fcm_dpo/q_t": 0.35852178931236267,
"grad_norm": 39.93299102783203,
"learning_rate": 1.5285113558975427e-07,
"logits/chosen": -0.8823913335800171,
"logits/rejected": -0.8606669306755066,
"logps/chosen": -383.55804443359375,
"logps/ref_chosen": -270.8228759765625,
"logps/ref_rejected": -255.31813049316406,
"logps/rejected": -440.5813293457031,
"loss": 1.9551,
"margin_dpo/margin_mean": 72.52806854248047,
"margin_dpo/margin_std": 90.9876937866211,
"step": 318
},
{
"epoch": 0.6680628272251309,
"fcm_dpo/beta": 0.009307840839028358,
"fcm_dpo/delta": -0.05707043036818504,
"fcm_dpo/margin": 70.19148254394531,
"fcm_dpo/q_t": 0.36161231994628906,
"grad_norm": 50.45790481567383,
"learning_rate": 1.5116687323334464e-07,
"logits/chosen": -0.8738488554954529,
"logits/rejected": -0.856220006942749,
"logps/chosen": -421.997802734375,
"logps/ref_chosen": -301.0028381347656,
"logps/ref_rejected": -242.3814239501953,
"logps/rejected": -433.56787109375,
"loss": 1.9534,
"margin_dpo/margin_mean": 70.19148254394531,
"margin_dpo/margin_std": 85.43022155761719,
"step": 319
},
{
"epoch": 0.6701570680628273,
"fcm_dpo/beta": 0.009273889474570751,
"fcm_dpo/delta": -0.03654306009411812,
"fcm_dpo/margin": 68.38783264160156,
"fcm_dpo/q_t": 0.3723883330821991,
"grad_norm": 44.70541000366211,
"learning_rate": 1.4948791099758052e-07,
"logits/chosen": -0.8426743149757385,
"logits/rejected": -0.844310998916626,
"logps/chosen": -417.6581726074219,
"logps/ref_chosen": -303.6225891113281,
"logps/ref_rejected": -280.854736328125,
"logps/rejected": -463.27813720703125,
"loss": 2.1098,
"margin_dpo/margin_mean": 68.3878402709961,
"margin_dpo/margin_std": 106.05199432373047,
"step": 320
},
{
"epoch": 0.6722513089005235,
"fcm_dpo/beta": 0.009273889474570751,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 48.55364990234375,
"fcm_dpo/q_t": 0.4098847508430481,
"grad_norm": 46.149600982666016,
"learning_rate": 1.478143389201113e-07,
"logits/chosen": -0.8574014902114868,
"logits/rejected": -0.8277444839477539,
"logps/chosen": -420.8229064941406,
"logps/ref_chosen": -288.98583984375,
"logps/ref_rejected": -241.1822052001953,
"logps/rejected": -421.57293701171875,
"loss": 2.3309,
"margin_dpo/margin_mean": 48.55364990234375,
"margin_dpo/margin_std": 97.7642593383789,
"step": 321
},
{
"epoch": 0.6743455497382199,
"fcm_dpo/beta": 0.009281165897846222,
"fcm_dpo/delta": 0.00784320943057537,
"fcm_dpo/margin": 63.85204315185547,
"fcm_dpo/q_t": 0.3790236711502075,
"grad_norm": 44.22325897216797,
"learning_rate": 1.461462467495284e-07,
"logits/chosen": -0.9144413471221924,
"logits/rejected": -0.8776738047599792,
"logps/chosen": -439.8148498535156,
"logps/ref_chosen": -308.54345703125,
"logps/ref_rejected": -269.7965393066406,
"logps/rejected": -464.9199523925781,
"loss": 2.1079,
"margin_dpo/margin_mean": 63.85204315185547,
"margin_dpo/margin_std": 97.95731353759766,
"step": 322
},
{
"epoch": 0.6764397905759162,
"fcm_dpo/beta": 0.009281165897846222,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 45.453025817871094,
"fcm_dpo/q_t": 0.41507554054260254,
"grad_norm": 45.833465576171875,
"learning_rate": 1.4448372394055246e-07,
"logits/chosen": -0.8904120326042175,
"logits/rejected": -0.8855328559875488,
"logps/chosen": -412.77911376953125,
"logps/ref_chosen": -282.4936218261719,
"logps/ref_rejected": -227.70529174804688,
"logps/rejected": -403.44378662109375,
"loss": 2.3986,
"margin_dpo/margin_mean": 45.45302963256836,
"margin_dpo/margin_std": 99.61731719970703,
"step": 323
},
{
"epoch": 0.6785340314136126,
"fcm_dpo/beta": 0.009196965955197811,
"fcm_dpo/delta": -0.09113486856222153,
"fcm_dpo/margin": 74.46638488769531,
"fcm_dpo/q_t": 0.3564291298389435,
"grad_norm": 44.91636276245117,
"learning_rate": 1.428268596492364e-07,
"logits/chosen": -0.8263076543807983,
"logits/rejected": -0.8249261975288391,
"logps/chosen": -353.7449951171875,
"logps/ref_chosen": -239.33839416503906,
"logps/ref_rejected": -230.53775024414062,
"logps/rejected": -419.4107666015625,
"loss": 1.9312,
"margin_dpo/margin_mean": 74.46637725830078,
"margin_dpo/margin_std": 91.86680603027344,
"step": 324
},
{
"epoch": 0.680628272251309,
"fcm_dpo/beta": 0.009207826107740402,
"fcm_dpo/delta": 0.011801640503108501,
"fcm_dpo/margin": 63.95569610595703,
"fcm_dpo/q_t": 0.38345351815223694,
"grad_norm": 52.68724822998047,
"learning_rate": 1.4117574272818386e-07,
"logits/chosen": -0.8334261178970337,
"logits/rejected": -0.8223669528961182,
"logps/chosen": -409.6816101074219,
"logps/ref_chosen": -280.62896728515625,
"logps/ref_rejected": -270.49749755859375,
"logps/rejected": -463.5058288574219,
"loss": 2.1888,
"margin_dpo/margin_mean": 63.95569610595703,
"margin_dpo/margin_std": 107.58307647705078,
"step": 325
},
{
"epoch": 0.6827225130890052,
"fcm_dpo/beta": 0.009235017001628876,
"fcm_dpo/delta": 0.029486754909157753,
"fcm_dpo/margin": 61.95960235595703,
"fcm_dpo/q_t": 0.38181790709495544,
"grad_norm": 45.73406219482422,
"learning_rate": 1.3953046172178413e-07,
"logits/chosen": -0.9344097375869751,
"logits/rejected": -0.9264284372329712,
"logps/chosen": -363.0096435546875,
"logps/ref_chosen": -240.98712158203125,
"logps/ref_rejected": -261.01824951171875,
"logps/rejected": -445.0003662109375,
"loss": 2.1685,
"margin_dpo/margin_mean": 61.95960235595703,
"margin_dpo/margin_std": 99.9350357055664,
"step": 326
},
{
"epoch": 0.6848167539267016,
"fcm_dpo/beta": 0.009148889221251011,
"fcm_dpo/delta": -0.09369934350252151,
"fcm_dpo/margin": 75.11619567871094,
"fcm_dpo/q_t": 0.35875385999679565,
"grad_norm": 43.30995559692383,
"learning_rate": 1.3789110486146468e-07,
"logits/chosen": -0.8843097686767578,
"logits/rejected": -0.8651639223098755,
"logps/chosen": -392.42529296875,
"logps/ref_chosen": -279.52001953125,
"logps/ref_rejected": -269.5309143066406,
"logps/rejected": -457.5523986816406,
"loss": 1.9784,
"margin_dpo/margin_mean": 75.11619567871094,
"margin_dpo/margin_std": 99.89405822753906,
"step": 327
},
{
"epoch": 0.6869109947643979,
"fcm_dpo/beta": 0.009126955643296242,
"fcm_dpo/delta": -0.024003077298402786,
"fcm_dpo/margin": 68.20533752441406,
"fcm_dpo/q_t": 0.3691180944442749,
"grad_norm": 41.92138671875,
"learning_rate": 1.362577600609588e-07,
"logits/chosen": -0.843334436416626,
"logits/rejected": -0.846055269241333,
"logps/chosen": -423.91888427734375,
"logps/ref_chosen": -301.033447265625,
"logps/ref_rejected": -284.2018127441406,
"logps/rejected": -475.2925720214844,
"loss": 1.9971,
"margin_dpo/margin_mean": 68.20533752441406,
"margin_dpo/margin_std": 86.52494812011719,
"step": 328
},
{
"epoch": 0.6890052356020943,
"fcm_dpo/beta": 0.009110012091696262,
"fcm_dpo/delta": -0.018581591546535492,
"fcm_dpo/margin": 67.77523803710938,
"fcm_dpo/q_t": 0.381742000579834,
"grad_norm": 44.83687973022461,
"learning_rate": 1.3463051491159093e-07,
"logits/chosen": -0.8453918695449829,
"logits/rejected": -0.8298577666282654,
"logps/chosen": -452.70306396484375,
"logps/ref_chosen": -319.9888610839844,
"logps/ref_rejected": -307.5588684082031,
"logps/rejected": -508.0483093261719,
"loss": 2.166,
"margin_dpo/margin_mean": 67.77523803710938,
"margin_dpo/margin_std": 113.28670501708984,
"step": 329
},
{
"epoch": 0.6910994764397905,
"fcm_dpo/beta": 0.009131439961493015,
"fcm_dpo/delta": 0.023494014516472816,
"fcm_dpo/margin": 63.28268051147461,
"fcm_dpo/q_t": 0.3786526322364807,
"grad_norm": 58.67969512939453,
"learning_rate": 1.3300945667758012e-07,
"logits/chosen": -0.8560429811477661,
"logits/rejected": -0.8652155995368958,
"logps/chosen": -430.5826721191406,
"logps/ref_chosen": -301.11474609375,
"logps/ref_rejected": -299.6709899902344,
"logps/rejected": -492.42156982421875,
"loss": 2.0877,
"margin_dpo/margin_mean": 63.282684326171875,
"margin_dpo/margin_std": 91.25841522216797,
"step": 330
},
{
"epoch": 0.6931937172774869,
"fcm_dpo/beta": 0.009124303236603737,
"fcm_dpo/delta": -0.007818717509508133,
"fcm_dpo/margin": 66.56329345703125,
"fcm_dpo/q_t": 0.3789626359939575,
"grad_norm": 48.28733444213867,
"learning_rate": 1.3139467229135998e-07,
"logits/chosen": -0.8760262131690979,
"logits/rejected": -0.8653951287269592,
"logps/chosen": -396.9462890625,
"logps/ref_chosen": -277.59149169921875,
"logps/ref_rejected": -256.03173828125,
"logps/rejected": -441.94989013671875,
"loss": 2.1662,
"margin_dpo/margin_mean": 66.56329345703125,
"margin_dpo/margin_std": 110.90050506591797,
"step": 331
},
{
"epoch": 0.6952879581151833,
"fcm_dpo/beta": 0.009117452427744865,
"fcm_dpo/delta": -0.007511162664741278,
"fcm_dpo/margin": 66.58164978027344,
"fcm_dpo/q_t": 0.37895503640174866,
"grad_norm": 43.0096321105957,
"learning_rate": 1.2978624834891626e-07,
"logits/chosen": -0.8695042133331299,
"logits/rejected": -0.8489128947257996,
"logps/chosen": -395.18682861328125,
"logps/ref_chosen": -269.9737243652344,
"logps/ref_rejected": -235.03524780273438,
"logps/rejected": -426.8299865722656,
"loss": 2.169,
"margin_dpo/margin_mean": 66.58164978027344,
"margin_dpo/margin_std": 109.51850128173828,
"step": 332
},
{
"epoch": 0.6973821989528796,
"fcm_dpo/beta": 0.009194210171699524,
"fcm_dpo/delta": 0.08383534103631973,
"fcm_dpo/margin": 56.612815856933594,
"fcm_dpo/q_t": 0.3920256793498993,
"grad_norm": 47.510498046875,
"learning_rate": 1.281842711051438e-07,
"logits/chosen": -0.919110119342804,
"logits/rejected": -0.8911232352256775,
"logps/chosen": -421.38922119140625,
"logps/ref_chosen": -296.76300048828125,
"logps/ref_rejected": -265.97503662109375,
"logps/rejected": -447.214111328125,
"loss": 2.1777,
"margin_dpo/margin_mean": 56.61281204223633,
"margin_dpo/margin_std": 90.5257568359375,
"step": 333
},
{
"epoch": 0.6994764397905759,
"fcm_dpo/beta": 0.009175931103527546,
"fcm_dpo/delta": -0.019901270046830177,
"fcm_dpo/margin": 67.42300415039062,
"fcm_dpo/q_t": 0.37146827578544617,
"grad_norm": 42.73683547973633,
"learning_rate": 1.2658882646922033e-07,
"logits/chosen": -0.8545191884040833,
"logits/rejected": -0.8338840007781982,
"logps/chosen": -421.5471496582031,
"logps/ref_chosen": -301.0367431640625,
"logps/ref_rejected": -268.86993408203125,
"logps/rejected": -456.8033447265625,
"loss": 2.0778,
"margin_dpo/margin_mean": 67.42301177978516,
"margin_dpo/margin_std": 98.09639739990234,
"step": 334
},
{
"epoch": 0.7015706806282722,
"fcm_dpo/beta": 0.009171090088784695,
"fcm_dpo/delta": -0.005277520511299372,
"fcm_dpo/margin": 65.96360778808594,
"fcm_dpo/q_t": 0.3779684901237488,
"grad_norm": 64.892333984375,
"learning_rate": 1.2500000000000005e-07,
"logits/chosen": -0.8495713472366333,
"logits/rejected": -0.8469050526618958,
"logps/chosen": -410.298095703125,
"logps/ref_chosen": -276.1327819824219,
"logps/ref_rejected": -243.44451904296875,
"logps/rejected": -443.5734558105469,
"loss": 2.1477,
"margin_dpo/margin_mean": 65.96360778808594,
"margin_dpo/margin_std": 105.33401489257812,
"step": 335
},
{
"epoch": 0.7036649214659686,
"fcm_dpo/beta": 0.009184801019728184,
"fcm_dpo/delta": 0.014939000830054283,
"fcm_dpo/margin": 63.79405212402344,
"fcm_dpo/q_t": 0.3872807025909424,
"grad_norm": 51.2830810546875,
"learning_rate": 1.2341787690142435e-07,
"logits/chosen": -0.8631800413131714,
"logits/rejected": -0.8063917756080627,
"logps/chosen": -378.32537841796875,
"logps/ref_chosen": -246.26263427734375,
"logps/ref_rejected": -261.06573486328125,
"logps/rejected": -456.9225158691406,
"loss": 2.1847,
"margin_dpo/margin_mean": 63.79405212402344,
"margin_dpo/margin_std": 112.00708770751953,
"step": 336
},
{
"epoch": 0.7057591623036649,
"fcm_dpo/beta": 0.009108812548220158,
"fcm_dpo/delta": -0.08307711035013199,
"fcm_dpo/margin": 74.37037658691406,
"fcm_dpo/q_t": 0.36212968826293945,
"grad_norm": 44.643272399902344,
"learning_rate": 1.2184254201795363e-07,
"logits/chosen": -0.878758430480957,
"logits/rejected": -0.8544448614120483,
"logps/chosen": -390.03424072265625,
"logps/ref_chosen": -266.9938049316406,
"logps/ref_rejected": -253.01400756835938,
"logps/rejected": -450.4248352050781,
"loss": 1.9699,
"margin_dpo/margin_mean": 74.37037658691406,
"margin_dpo/margin_std": 96.72308349609375,
"step": 337
},
{
"epoch": 0.7078534031413612,
"fcm_dpo/beta": 0.009086714126169682,
"fcm_dpo/delta": -0.02429015375673771,
"fcm_dpo/margin": 68.53694152832031,
"fcm_dpo/q_t": 0.3729407787322998,
"grad_norm": 54.19309616088867,
"learning_rate": 1.202740798300168e-07,
"logits/chosen": -0.8930896520614624,
"logits/rejected": -0.8791784048080444,
"logps/chosen": -393.632568359375,
"logps/ref_chosen": -276.5925598144531,
"logps/ref_rejected": -233.979248046875,
"logps/rejected": -419.55615234375,
"loss": 2.0647,
"margin_dpo/margin_mean": 68.53694152832031,
"margin_dpo/margin_std": 99.26116180419922,
"step": 338
},
{
"epoch": 0.7099476439790576,
"fcm_dpo/beta": 0.009022459387779236,
"fcm_dpo/delta": -0.07096432149410248,
"fcm_dpo/margin": 73.84014892578125,
"fcm_dpo/q_t": 0.36225461959838867,
"grad_norm": 40.96029281616211,
"learning_rate": 1.1871257444948096e-07,
"logits/chosen": -0.9017908573150635,
"logits/rejected": -0.8958266973495483,
"logps/chosen": -429.28839111328125,
"logps/ref_chosen": -303.5277404785156,
"logps/ref_rejected": -283.11785888671875,
"logps/rejected": -482.71868896484375,
"loss": 2.0518,
"margin_dpo/margin_mean": 73.84014892578125,
"margin_dpo/margin_std": 105.82533264160156,
"step": 339
},
{
"epoch": 0.7120418848167539,
"fcm_dpo/beta": 0.009045583195984364,
"fcm_dpo/delta": 0.025595922023057938,
"fcm_dpo/margin": 63.66380310058594,
"fcm_dpo/q_t": 0.3872483968734741,
"grad_norm": 55.380645751953125,
"learning_rate": 1.1715810961514072e-07,
"logits/chosen": -0.8384690284729004,
"logits/rejected": -0.8374818563461304,
"logps/chosen": -392.0450744628906,
"logps/ref_chosen": -261.5257568359375,
"logps/ref_rejected": -259.39862060546875,
"logps/rejected": -453.5817565917969,
"loss": 2.2603,
"margin_dpo/margin_mean": 63.66379928588867,
"margin_dpo/margin_std": 117.7413558959961,
"step": 340
},
{
"epoch": 0.7141361256544503,
"fcm_dpo/beta": 0.009045583195984364,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 54.00872802734375,
"fcm_dpo/q_t": 0.40302836894989014,
"grad_norm": 79.38268280029297,
"learning_rate": 1.1561076868822755e-07,
"logits/chosen": -0.8766363859176636,
"logits/rejected": -0.8552701473236084,
"logps/chosen": -468.98046875,
"logps/ref_chosen": -315.9035339355469,
"logps/ref_rejected": -308.0238952636719,
"logps/rejected": -515.1095581054688,
"loss": 2.4203,
"margin_dpo/margin_mean": 54.008731842041016,
"margin_dpo/margin_std": 118.41658020019531,
"step": 341
},
{
"epoch": 0.7162303664921466,
"fcm_dpo/beta": 0.009042193181812763,
"fcm_dpo/delta": -0.0037485104985535145,
"fcm_dpo/margin": 66.7451171875,
"fcm_dpo/q_t": 0.37250685691833496,
"grad_norm": 46.937744140625,
"learning_rate": 1.1407063464793965e-07,
"logits/chosen": -0.8617367148399353,
"logits/rejected": -0.862312376499176,
"logps/chosen": -395.92767333984375,
"logps/ref_chosen": -269.17864990234375,
"logps/ref_rejected": -260.89459228515625,
"logps/rejected": -454.38873291015625,
"loss": 2.0708,
"margin_dpo/margin_mean": 66.7451171875,
"margin_dpo/margin_std": 94.46238708496094,
"step": 342
},
{
"epoch": 0.7183246073298429,
"fcm_dpo/beta": 0.00908593088388443,
"fcm_dpo/delta": 0.04825450852513313,
"fcm_dpo/margin": 61.01898956298828,
"fcm_dpo/q_t": 0.38600867986679077,
"grad_norm": 45.07503128051758,
"learning_rate": 1.125377900869913e-07,
"logits/chosen": -0.8688806295394897,
"logits/rejected": -0.8506696224212646,
"logps/chosen": -441.1053466796875,
"logps/ref_chosen": -310.7200012207031,
"logps/ref_rejected": -263.52142333984375,
"logps/rejected": -454.92578125,
"loss": 2.1457,
"margin_dpo/margin_mean": 61.01898956298828,
"margin_dpo/margin_std": 95.47964477539062,
"step": 343
},
{
"epoch": 0.7204188481675393,
"fcm_dpo/beta": 0.00908383447676897,
"fcm_dpo/delta": -0.0023072678595781326,
"fcm_dpo/margin": 66.29010009765625,
"fcm_dpo/q_t": 0.3743930459022522,
"grad_norm": 47.028038024902344,
"learning_rate": 1.110123172071844e-07,
"logits/chosen": -0.8713587522506714,
"logits/rejected": -0.8560975193977356,
"logps/chosen": -432.2508544921875,
"logps/ref_chosen": -301.7999267578125,
"logps/ref_rejected": -257.91455078125,
"logps/rejected": -454.65557861328125,
"loss": 2.1082,
"margin_dpo/margin_mean": 66.29010009765625,
"margin_dpo/margin_std": 98.27161407470703,
"step": 344
},
{
"epoch": 0.7225130890052356,
"fcm_dpo/beta": 0.009164392948150635,
"fcm_dpo/delta": 0.08829258382320404,
"fcm_dpo/margin": 56.3316535949707,
"fcm_dpo/q_t": 0.3931768536567688,
"grad_norm": 51.22610092163086,
"learning_rate": 1.09494297815e-07,
"logits/chosen": -0.8609453439712524,
"logits/rejected": -0.856240451335907,
"logps/chosen": -410.7709655761719,
"logps/ref_chosen": -283.0184631347656,
"logps/ref_rejected": -266.84130859375,
"logps/rejected": -450.92547607421875,
"loss": 2.17,
"margin_dpo/margin_mean": 56.3316535949707,
"margin_dpo/margin_std": 85.86083984375,
"step": 345
},
{
"epoch": 0.724607329842932,
"fcm_dpo/beta": 0.00912663247436285,
"fcm_dpo/delta": -0.04128880053758621,
"fcm_dpo/margin": 69.97613525390625,
"fcm_dpo/q_t": 0.3670651316642761,
"grad_norm": 41.6786003112793,
"learning_rate": 1.0798381331721107e-07,
"logits/chosen": -0.9368342161178589,
"logits/rejected": -0.892183244228363,
"logps/chosen": -405.13299560546875,
"logps/ref_chosen": -268.4412536621094,
"logps/ref_rejected": -227.82054138183594,
"logps/rejected": -434.48846435546875,
"loss": 2.0879,
"margin_dpo/margin_mean": 69.97613525390625,
"margin_dpo/margin_std": 104.63372039794922,
"step": 346
},
{
"epoch": 0.7267015706806282,
"fcm_dpo/beta": 0.009183524176478386,
"fcm_dpo/delta": 0.062142547219991684,
"fcm_dpo/margin": 58.9327392578125,
"fcm_dpo/q_t": 0.38579636812210083,
"grad_norm": 49.11149978637695,
"learning_rate": 1.0648094471651722e-07,
"logits/chosen": -0.8082336187362671,
"logits/rejected": -0.8258241415023804,
"logps/chosen": -401.04315185546875,
"logps/ref_chosen": -273.70355224609375,
"logps/ref_rejected": -243.6602325439453,
"logps/rejected": -429.93255615234375,
"loss": 2.1686,
"margin_dpo/margin_mean": 58.932735443115234,
"margin_dpo/margin_std": 94.13764953613281,
"step": 347
},
{
"epoch": 0.7287958115183246,
"fcm_dpo/beta": 0.009183524176478386,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 51.678627014160156,
"fcm_dpo/q_t": 0.40437978506088257,
"grad_norm": 41.95808410644531,
"learning_rate": 1.0498577260720048e-07,
"logits/chosen": -0.8909600973129272,
"logits/rejected": -0.8730888366699219,
"logps/chosen": -410.87091064453125,
"logps/ref_chosen": -285.64141845703125,
"logps/ref_rejected": -265.61993408203125,
"logps/rejected": -442.528076171875,
"loss": 2.2669,
"margin_dpo/margin_mean": 51.67862319946289,
"margin_dpo/margin_std": 94.15160369873047,
"step": 348
},
{
"epoch": 0.7308900523560209,
"fcm_dpo/beta": 0.009106711484491825,
"fcm_dpo/delta": -0.08399360626935959,
"fcm_dpo/margin": 74.48051452636719,
"fcm_dpo/q_t": 0.36194926500320435,
"grad_norm": 61.8884162902832,
"learning_rate": 1.0349837717080347e-07,
"logits/chosen": -0.8422749638557434,
"logits/rejected": -0.8339679837226868,
"logps/chosen": -453.944091796875,
"logps/ref_chosen": -328.3175048828125,
"logps/ref_rejected": -292.3702392578125,
"logps/rejected": -492.4773254394531,
"loss": 2.0451,
"margin_dpo/margin_mean": 74.48051452636719,
"margin_dpo/margin_std": 107.34288024902344,
"step": 349
},
{
"epoch": 0.7329842931937173,
"fcm_dpo/beta": 0.009116199798882008,
"fcm_dpo/delta": 0.010413175448775291,
"fcm_dpo/margin": 64.74201202392578,
"fcm_dpo/q_t": 0.3800838589668274,
"grad_norm": 43.500823974609375,
"learning_rate": 1.0201883817182949e-07,
"logits/chosen": -0.8396041989326477,
"logits/rejected": -0.8510884046554565,
"logps/chosen": -426.5689392089844,
"logps/ref_chosen": -292.8046569824219,
"logps/ref_rejected": -250.36270141601562,
"logps/rejected": -448.8689880371094,
"loss": 2.1681,
"margin_dpo/margin_mean": 64.74201202392578,
"margin_dpo/margin_std": 103.9215087890625,
"step": 350
},
{
"epoch": 0.7350785340314137,
"fcm_dpo/beta": 0.009116199798882008,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 43.26811981201172,
"fcm_dpo/q_t": 0.420653760433197,
"grad_norm": 50.497249603271484,
"learning_rate": 1.0054723495346482e-07,
"logits/chosen": -0.8941627740859985,
"logits/rejected": -0.8819515705108643,
"logps/chosen": -438.43524169921875,
"logps/ref_chosen": -311.8890075683594,
"logps/ref_rejected": -263.59033203125,
"logps/rejected": -433.4046630859375,
"loss": 2.4957,
"margin_dpo/margin_mean": 43.26811981201172,
"margin_dpo/margin_std": 108.7548828125,
"step": 351
},
{
"epoch": 0.7371727748691099,
"fcm_dpo/beta": 0.00901816040277481,
"fcm_dpo/delta": -0.10812651365995407,
"fcm_dpo/margin": 77.67781829833984,
"fcm_dpo/q_t": 0.3553728461265564,
"grad_norm": 52.22137451171875,
"learning_rate": 9.908364643332398e-08,
"logits/chosen": -0.8435696363449097,
"logits/rejected": -0.8204574584960938,
"logps/chosen": -373.21844482421875,
"logps/ref_chosen": -254.9078826904297,
"logps/ref_rejected": -257.16204833984375,
"logps/rejected": -453.150390625,
"loss": 1.9854,
"margin_dpo/margin_mean": 77.67781829833984,
"margin_dpo/margin_std": 100.93040466308594,
"step": 352
},
{
"epoch": 0.7392670157068063,
"fcm_dpo/beta": 0.009035843424499035,
"fcm_dpo/delta": 0.019588714465498924,
"fcm_dpo/margin": 64.36027526855469,
"fcm_dpo/q_t": 0.38209855556488037,
"grad_norm": 48.67715072631836,
"learning_rate": 9.76281510992176e-08,
"logits/chosen": -0.8544372320175171,
"logits/rejected": -0.8528692722320557,
"logps/chosen": -400.0290832519531,
"logps/ref_chosen": -270.37603759765625,
"logps/ref_rejected": -264.65234375,
"logps/rejected": -458.6656494140625,
"loss": 2.1434,
"margin_dpo/margin_mean": 64.36028289794922,
"margin_dpo/margin_std": 103.38621520996094,
"step": 353
},
{
"epoch": 0.7413612565445026,
"fcm_dpo/beta": 0.009035843424499035,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 46.03172302246094,
"fcm_dpo/q_t": 0.41462138295173645,
"grad_norm": 49.20920944213867,
"learning_rate": 9.618082700494318e-08,
"logits/chosen": -0.8525259494781494,
"logits/rejected": -0.8770024180412292,
"logps/chosen": -389.23553466796875,
"logps/ref_chosen": -257.6485595703125,
"logps/ref_rejected": -246.94500732421875,
"logps/rejected": -424.563720703125,
"loss": 2.4353,
"margin_dpo/margin_mean": 46.03172302246094,
"margin_dpo/margin_std": 105.22050476074219,
"step": 354
},
{
"epoch": 0.743455497382199,
"fcm_dpo/beta": 0.008985553868114948,
"fcm_dpo/delta": -0.055811259895563126,
"fcm_dpo/margin": 72.57886505126953,
"fcm_dpo/q_t": 0.36635830998420715,
"grad_norm": 44.5856819152832,
"learning_rate": 9.474175176609956e-08,
"logits/chosen": -0.8917354345321655,
"logits/rejected": -0.8934190273284912,
"logps/chosen": -419.35760498046875,
"logps/ref_chosen": -293.35333251953125,
"logps/ref_rejected": -275.6051940917969,
"logps/rejected": -474.1883239746094,
"loss": 2.1127,
"margin_dpo/margin_mean": 72.57887268066406,
"margin_dpo/margin_std": 113.52491760253906,
"step": 355
},
{
"epoch": 0.7455497382198953,
"fcm_dpo/beta": 0.008985553868114948,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 49.117393493652344,
"fcm_dpo/q_t": 0.40446412563323975,
"grad_norm": 45.01557159423828,
"learning_rate": 9.331100255592436e-08,
"logits/chosen": -0.8282819986343384,
"logits/rejected": -0.8507692813873291,
"logps/chosen": -324.16046142578125,
"logps/ref_chosen": -204.25550842285156,
"logps/ref_rejected": -213.47314453125,
"logps/rejected": -382.49554443359375,
"loss": 2.2599,
"margin_dpo/margin_mean": 49.117393493652344,
"margin_dpo/margin_std": 83.45521545410156,
"step": 356
},
{
"epoch": 0.7476439790575916,
"fcm_dpo/beta": 0.008958682417869568,
"fcm_dpo/delta": -0.029949765652418137,
"fcm_dpo/margin": 70.10694885253906,
"fcm_dpo/q_t": 0.3753436207771301,
"grad_norm": 46.91786575317383,
"learning_rate": 9.18886561011557e-08,
"logits/chosen": -0.7969743013381958,
"logits/rejected": -0.7989677786827087,
"logps/chosen": -395.6004333496094,
"logps/ref_chosen": -266.37054443359375,
"logps/ref_rejected": -239.04421997070312,
"logps/rejected": -438.381103515625,
"loss": 2.0975,
"margin_dpo/margin_mean": 70.10694885253906,
"margin_dpo/margin_std": 107.07955169677734,
"step": 357
},
{
"epoch": 0.749738219895288,
"fcm_dpo/beta": 0.008873265236616135,
"fcm_dpo/delta": -0.0958036258816719,
"fcm_dpo/margin": 77.66807556152344,
"fcm_dpo/q_t": 0.3574942648410797,
"grad_norm": 39.41917037963867,
"learning_rate": 9.047478867791731e-08,
"logits/chosen": -0.8658862709999084,
"logits/rejected": -0.8504045009613037,
"logps/chosen": -414.51318359375,
"logps/ref_chosen": -299.1474609375,
"logps/ref_rejected": -257.2527770996094,
"logps/rejected": -450.28656005859375,
"loss": 1.9911,
"margin_dpo/margin_mean": 77.66807556152344,
"margin_dpo/margin_std": 103.13372802734375,
"step": 358
},
{
"epoch": 0.7518324607329843,
"fcm_dpo/beta": 0.008872843347489834,
"fcm_dpo/delta": -0.0004753024550154805,
"fcm_dpo/margin": 67.67241668701172,
"fcm_dpo/q_t": 0.3736295998096466,
"grad_norm": 44.54613494873047,
"learning_rate": 8.906947610762825e-08,
"logits/chosen": -0.8287288546562195,
"logits/rejected": -0.8419591188430786,
"logps/chosen": -421.3351745605469,
"logps/ref_chosen": -302.99786376953125,
"logps/ref_rejected": -260.4137268066406,
"logps/rejected": -446.4234619140625,
"loss": 2.0469,
"margin_dpo/margin_mean": 67.67241668701172,
"margin_dpo/margin_std": 93.52236938476562,
"step": 359
},
{
"epoch": 0.7539267015706806,
"fcm_dpo/beta": 0.008935142308473587,
"fcm_dpo/delta": 0.06996749341487885,
"fcm_dpo/margin": 59.736488342285156,
"fcm_dpo/q_t": 0.3867560029029846,
"grad_norm": 49.16413497924805,
"learning_rate": 8.76727937529367e-08,
"logits/chosen": -0.8559695482254028,
"logits/rejected": -0.8482515215873718,
"logps/chosen": -437.3080749511719,
"logps/ref_chosen": -309.6114501953125,
"logps/ref_rejected": -256.64276123046875,
"logps/rejected": -444.07586669921875,
"loss": 2.2327,
"margin_dpo/margin_mean": 59.736488342285156,
"margin_dpo/margin_std": 104.10269927978516,
"step": 360
},
{
"epoch": 0.7560209424083769,
"fcm_dpo/beta": 0.008872722275555134,
"fcm_dpo/delta": -0.07010410726070404,
"fcm_dpo/margin": 74.99646759033203,
"fcm_dpo/q_t": 0.36600881814956665,
"grad_norm": 44.427391052246094,
"learning_rate": 8.628481651367875e-08,
"logits/chosen": -0.820360541343689,
"logits/rejected": -0.8067670464515686,
"logps/chosen": -368.1976013183594,
"logps/ref_chosen": -263.3797607421875,
"logps/ref_rejected": -271.1795654296875,
"logps/rejected": -450.9938659667969,
"loss": 2.082,
"margin_dpo/margin_mean": 74.99646759033203,
"margin_dpo/margin_std": 112.66693878173828,
"step": 361
},
{
"epoch": 0.7581151832460733,
"fcm_dpo/beta": 0.008882660418748856,
"fcm_dpo/delta": 0.011194163002073765,
"fcm_dpo/margin": 66.3613510131836,
"fcm_dpo/q_t": 0.3751099109649658,
"grad_norm": 41.053009033203125,
"learning_rate": 8.490561882286135e-08,
"logits/chosen": -0.835723340511322,
"logits/rejected": -0.826585590839386,
"logps/chosen": -419.86328125,
"logps/ref_chosen": -303.2583923339844,
"logps/ref_rejected": -243.22714233398438,
"logps/rejected": -426.193359375,
"loss": 2.0487,
"margin_dpo/margin_mean": 66.36134338378906,
"margin_dpo/margin_std": 89.87059783935547,
"step": 362
},
{
"epoch": 0.7602094240837697,
"fcm_dpo/beta": 0.00888850912451744,
"fcm_dpo/delta": 0.006582686211913824,
"fcm_dpo/margin": 66.80625915527344,
"fcm_dpo/q_t": 0.37992870807647705,
"grad_norm": 45.540409088134766,
"learning_rate": 8.353527464267104e-08,
"logits/chosen": -0.8588496446609497,
"logits/rejected": -0.8221259713172913,
"logps/chosen": -424.4974365234375,
"logps/ref_chosen": -303.34722900390625,
"logps/ref_rejected": -262.04290771484375,
"logps/rejected": -449.9993896484375,
"loss": 2.1413,
"margin_dpo/margin_mean": 66.80625915527344,
"margin_dpo/margin_std": 107.60731506347656,
"step": 363
},
{
"epoch": 0.762303664921466,
"fcm_dpo/beta": 0.008958369493484497,
"fcm_dpo/delta": 0.0782887190580368,
"fcm_dpo/margin": 58.69502639770508,
"fcm_dpo/q_t": 0.393614798784256,
"grad_norm": 44.03261947631836,
"learning_rate": 8.217385746050742e-08,
"logits/chosen": -0.8114150166511536,
"logits/rejected": -0.8196886777877808,
"logps/chosen": -423.57843017578125,
"logps/ref_chosen": -285.54376220703125,
"logps/ref_rejected": -284.85406494140625,
"logps/rejected": -481.583740234375,
"loss": 2.2684,
"margin_dpo/margin_mean": 58.69502639770508,
"margin_dpo/margin_std": 108.13606262207031,
"step": 364
},
{
"epoch": 0.7643979057591623,
"fcm_dpo/beta": 0.00898480974137783,
"fcm_dpo/delta": 0.029471566900610924,
"fcm_dpo/margin": 63.68663787841797,
"fcm_dpo/q_t": 0.38519859313964844,
"grad_norm": 43.16640090942383,
"learning_rate": 8.082144028504231e-08,
"logits/chosen": -0.8398497104644775,
"logits/rejected": -0.8438706398010254,
"logps/chosen": -400.2857360839844,
"logps/ref_chosen": -274.7878112792969,
"logps/ref_rejected": -256.5738220214844,
"logps/rejected": -445.7584228515625,
"loss": 2.1472,
"margin_dpo/margin_mean": 63.68663787841797,
"margin_dpo/margin_std": 101.01165771484375,
"step": 365
},
{
"epoch": 0.7664921465968586,
"fcm_dpo/beta": 0.008959110826253891,
"fcm_dpo/delta": -0.028643431141972542,
"fcm_dpo/margin": 69.96736145019531,
"fcm_dpo/q_t": 0.370860755443573,
"grad_norm": 44.536624908447266,
"learning_rate": 7.947809564230445e-08,
"logits/chosen": -0.8110489845275879,
"logits/rejected": -0.8219121694564819,
"logps/chosen": -404.80389404296875,
"logps/ref_chosen": -286.6496276855469,
"logps/ref_rejected": -251.97589111328125,
"logps/rejected": -440.0975341796875,
"loss": 2.0754,
"margin_dpo/margin_mean": 69.96736145019531,
"margin_dpo/margin_std": 102.69366455078125,
"step": 366
},
{
"epoch": 0.768586387434555,
"fcm_dpo/beta": 0.008931051939725876,
"fcm_dpo/delta": -0.03136768937110901,
"fcm_dpo/margin": 70.47213745117188,
"fcm_dpo/q_t": 0.3691490888595581,
"grad_norm": 46.853302001953125,
"learning_rate": 7.814389557179016e-08,
"logits/chosen": -0.8272769451141357,
"logits/rejected": -0.814450740814209,
"logps/chosen": -418.5333557128906,
"logps/ref_chosen": -301.9449768066406,
"logps/ref_rejected": -265.57000732421875,
"logps/rejected": -452.6304931640625,
"loss": 2.0294,
"margin_dpo/margin_mean": 70.47213745117188,
"margin_dpo/margin_std": 95.5082015991211,
"step": 367
},
{
"epoch": 0.7706806282722513,
"fcm_dpo/beta": 0.008782615885138512,
"fcm_dpo/delta": -0.1675986796617508,
"fcm_dpo/margin": 85.94717407226562,
"fcm_dpo/q_t": 0.33995819091796875,
"grad_norm": 43.90300369262695,
"learning_rate": 7.681891162260015e-08,
"logits/chosen": -0.8048295378684998,
"logits/rejected": -0.8147680163383484,
"logps/chosen": -403.388671875,
"logps/ref_chosen": -294.62652587890625,
"logps/ref_rejected": -258.7644958496094,
"logps/rejected": -453.47381591796875,
"loss": 1.8053,
"margin_dpo/margin_mean": 85.94717407226562,
"margin_dpo/margin_std": 92.41850280761719,
"step": 368
},
{
"epoch": 0.7727748691099476,
"fcm_dpo/beta": 0.008831196464598179,
"fcm_dpo/delta": 0.05516188219189644,
"fcm_dpo/margin": 62.035972595214844,
"fcm_dpo/q_t": 0.3846426010131836,
"grad_norm": 47.85908889770508,
"learning_rate": 7.550321484960251e-08,
"logits/chosen": -0.8620946407318115,
"logits/rejected": -0.847419023513794,
"logps/chosen": -401.12396240234375,
"logps/ref_chosen": -282.5057067871094,
"logps/ref_rejected": -266.4173889160156,
"logps/rejected": -447.0716247558594,
"loss": 2.1281,
"margin_dpo/margin_mean": 62.035972595214844,
"margin_dpo/margin_std": 93.12213134765625,
"step": 369
},
{
"epoch": 0.774869109947644,
"fcm_dpo/beta": 0.00876467302441597,
"fcm_dpo/delta": -0.07561240345239639,
"fcm_dpo/margin": 76.5029296875,
"fcm_dpo/q_t": 0.3597964942455292,
"grad_norm": 35.33626174926758,
"learning_rate": 7.419687580962222e-08,
"logits/chosen": -0.8643448948860168,
"logits/rejected": -0.8831155896186829,
"logps/chosen": -359.3563232421875,
"logps/ref_chosen": -251.0064239501953,
"logps/ref_rejected": -238.1302490234375,
"logps/rejected": -422.98309326171875,
"loss": 2.0009,
"margin_dpo/margin_mean": 76.5029296875,
"margin_dpo/margin_std": 102.87065124511719,
"step": 370
},
{
"epoch": 0.7769633507853403,
"fcm_dpo/beta": 0.00883510336279869,
"fcm_dpo/delta": 0.08003556728363037,
"fcm_dpo/margin": 59.32502365112305,
"fcm_dpo/q_t": 0.3900216221809387,
"grad_norm": 58.52687454223633,
"learning_rate": 7.289996455765748e-08,
"logits/chosen": -0.8131742477416992,
"logits/rejected": -0.807681679725647,
"logps/chosen": -420.22088623046875,
"logps/ref_chosen": -296.6591796875,
"logps/ref_rejected": -251.14675903320312,
"logps/rejected": -434.03350830078125,
"loss": 2.1488,
"margin_dpo/margin_mean": 59.32502365112305,
"margin_dpo/margin_std": 90.69882202148438,
"step": 371
},
{
"epoch": 0.7790575916230367,
"fcm_dpo/beta": 0.00876561552286148,
"fcm_dpo/delta": -0.0789603590965271,
"fcm_dpo/margin": 76.84803771972656,
"fcm_dpo/q_t": 0.36093005537986755,
"grad_norm": 38.11396408081055,
"learning_rate": 7.161255064312283e-08,
"logits/chosen": -0.7843295931816101,
"logits/rejected": -0.7872138023376465,
"logps/chosen": -448.9085998535156,
"logps/ref_chosen": -331.3714599609375,
"logps/ref_rejected": -285.5594482421875,
"logps/rejected": -479.9446105957031,
"loss": 1.9991,
"margin_dpo/margin_mean": 76.8480453491211,
"margin_dpo/margin_std": 101.65011596679688,
"step": 372
},
{
"epoch": 0.7811518324607329,
"fcm_dpo/beta": 0.008744609542191029,
"fcm_dpo/delta": -0.023992624133825302,
"fcm_dpo/margin": 71.1864013671875,
"fcm_dpo/q_t": 0.36455684900283813,
"grad_norm": 48.70323944091797,
"learning_rate": 7.033470310611945e-08,
"logits/chosen": -0.8801475763320923,
"logits/rejected": -0.8626178503036499,
"logps/chosen": -430.4300537109375,
"logps/ref_chosen": -321.9429931640625,
"logps/ref_rejected": -271.2239990234375,
"logps/rejected": -450.8974914550781,
"loss": 1.9955,
"margin_dpo/margin_mean": 71.1864013671875,
"margin_dpo/margin_std": 88.515869140625,
"step": 373
},
{
"epoch": 0.7832460732984293,
"fcm_dpo/beta": 0.008817512542009354,
"fcm_dpo/delta": 0.08302365988492966,
"fcm_dpo/margin": 59.11943054199219,
"fcm_dpo/q_t": 0.3934447169303894,
"grad_norm": 38.01100158691406,
"learning_rate": 6.906649047373245e-08,
"logits/chosen": -0.8494223952293396,
"logits/rejected": -0.8483001589775085,
"logps/chosen": -435.84893798828125,
"logps/ref_chosen": -319.1685791015625,
"logps/ref_rejected": -284.6286315917969,
"logps/rejected": -460.42840576171875,
"loss": 2.1931,
"margin_dpo/margin_mean": 59.11943817138672,
"margin_dpo/margin_std": 98.76087951660156,
"step": 374
},
{
"epoch": 0.7853403141361257,
"fcm_dpo/beta": 0.008913343772292137,
"fcm_dpo/delta": 0.1080966666340828,
"fcm_dpo/margin": 55.787086486816406,
"fcm_dpo/q_t": 0.39758050441741943,
"grad_norm": 45.23257064819336,
"learning_rate": 6.780798075635675e-08,
"logits/chosen": -0.8638669848442078,
"logits/rejected": -0.8484641909599304,
"logps/chosen": -438.5658264160156,
"logps/ref_chosen": -314.8757629394531,
"logps/ref_rejected": -259.19744873046875,
"logps/rejected": -438.674560546875,
"loss": 2.2248,
"margin_dpo/margin_mean": 55.787086486816406,
"margin_dpo/margin_std": 94.40326690673828,
"step": 375
},
{
"epoch": 0.787434554973822,
"fcm_dpo/beta": 0.008943653665482998,
"fcm_dpo/delta": 0.03394775837659836,
"fcm_dpo/margin": 63.506160736083984,
"fcm_dpo/q_t": 0.38482290506362915,
"grad_norm": 48.83885955810547,
"learning_rate": 6.655924144404906e-08,
"logits/chosen": -0.8455772399902344,
"logits/rejected": -0.8562027812004089,
"logps/chosen": -410.6141052246094,
"logps/ref_chosen": -287.6732482910156,
"logps/ref_rejected": -256.6697082519531,
"logps/rejected": -443.11669921875,
"loss": 2.1703,
"margin_dpo/margin_mean": 63.50615692138672,
"margin_dpo/margin_std": 104.01822662353516,
"step": 376
},
{
"epoch": 0.7895287958115184,
"fcm_dpo/beta": 0.008943653665482998,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 46.180973052978516,
"fcm_dpo/q_t": 0.41257187724113464,
"grad_norm": 44.666011810302734,
"learning_rate": 6.532033950290885e-08,
"logits/chosen": -0.8321439027786255,
"logits/rejected": -0.8328297138214111,
"logps/chosen": -434.384765625,
"logps/ref_chosen": -305.261474609375,
"logps/ref_rejected": -271.8873596191406,
"logps/rejected": -447.191650390625,
"loss": 2.4015,
"margin_dpo/margin_mean": 46.180973052978516,
"margin_dpo/margin_std": 101.86759948730469,
"step": 377
},
{
"epoch": 0.7916230366492146,
"fcm_dpo/beta": 0.009046794846653938,
"fcm_dpo/delta": 0.11466342955827713,
"fcm_dpo/margin": 54.266029357910156,
"fcm_dpo/q_t": 0.39963600039482117,
"grad_norm": 50.50645065307617,
"learning_rate": 6.409134137148736e-08,
"logits/chosen": -0.8337588906288147,
"logits/rejected": -0.8219572305679321,
"logps/chosen": -401.2605285644531,
"logps/ref_chosen": -281.52960205078125,
"logps/ref_rejected": -296.96905517578125,
"logps/rejected": -470.966064453125,
"loss": 2.2416,
"margin_dpo/margin_mean": 54.266029357910156,
"margin_dpo/margin_std": 95.79962158203125,
"step": 378
},
{
"epoch": 0.793717277486911,
"fcm_dpo/beta": 0.009109079837799072,
"fcm_dpo/delta": 0.0686117559671402,
"fcm_dpo/margin": 58.737735748291016,
"fcm_dpo/q_t": 0.3886304497718811,
"grad_norm": 45.648128509521484,
"learning_rate": 6.28723129572247e-08,
"logits/chosen": -0.8842474818229675,
"logits/rejected": -0.8692854642868042,
"logps/chosen": -377.6402587890625,
"logps/ref_chosen": -265.0807800292969,
"logps/ref_rejected": -230.58941650390625,
"logps/rejected": -401.8865966796875,
"loss": 2.1893,
"margin_dpo/margin_mean": 58.73773956298828,
"margin_dpo/margin_std": 96.90349578857422,
"step": 379
},
{
"epoch": 0.7958115183246073,
"fcm_dpo/beta": 0.009144636802375317,
"fcm_dpo/delta": 0.03895849734544754,
"fcm_dpo/margin": 61.59145736694336,
"fcm_dpo/q_t": 0.3840789198875427,
"grad_norm": 60.29104995727539,
"learning_rate": 6.166331963291519e-08,
"logits/chosen": -0.8563022613525391,
"logits/rejected": -0.8388513922691345,
"logps/chosen": -427.99169921875,
"logps/ref_chosen": -305.9084167480469,
"logps/ref_rejected": -286.5906982421875,
"logps/rejected": -470.2654113769531,
"loss": 2.1523,
"margin_dpo/margin_mean": 61.59145736694336,
"margin_dpo/margin_std": 98.5282211303711,
"step": 380
},
{
"epoch": 0.7979057591623037,
"fcm_dpo/beta": 0.009134043008089066,
"fcm_dpo/delta": -0.011590981855988503,
"fcm_dpo/margin": 66.87974548339844,
"fcm_dpo/q_t": 0.37681466341018677,
"grad_norm": 42.678253173828125,
"learning_rate": 6.046442623320145e-08,
"logits/chosen": -0.8101858496665955,
"logits/rejected": -0.7783362865447998,
"logps/chosen": -368.9512939453125,
"logps/ref_chosen": -252.87066650390625,
"logps/ref_rejected": -261.1927490234375,
"logps/rejected": -444.1531066894531,
"loss": 2.0876,
"margin_dpo/margin_mean": 66.87974548339844,
"margin_dpo/margin_std": 100.2506103515625,
"step": 381
},
{
"epoch": 0.8,
"fcm_dpo/beta": 0.00905250571668148,
"fcm_dpo/delta": -0.08966833353042603,
"fcm_dpo/margin": 75.50526428222656,
"fcm_dpo/q_t": 0.3560827970504761,
"grad_norm": 36.75041198730469,
"learning_rate": 5.9275697051098275e-08,
"logits/chosen": -0.8631778955459595,
"logits/rejected": -0.8610793948173523,
"logps/chosen": -400.8864440917969,
"logps/ref_chosen": -289.21148681640625,
"logps/ref_rejected": -278.4574890136719,
"logps/rejected": -465.6376953125,
"loss": 1.9422,
"margin_dpo/margin_mean": 75.50526428222656,
"margin_dpo/margin_std": 92.95863342285156,
"step": 382
},
{
"epoch": 0.8020942408376963,
"fcm_dpo/beta": 0.009035475552082062,
"fcm_dpo/delta": -0.018830539658665657,
"fcm_dpo/margin": 68.36013793945312,
"fcm_dpo/q_t": 0.3715241849422455,
"grad_norm": 50.64119338989258,
"learning_rate": 5.809719583454414e-08,
"logits/chosen": -0.8461681604385376,
"logits/rejected": -0.8319650888442993,
"logps/chosen": -384.038330078125,
"logps/ref_chosen": -273.6308288574219,
"logps/ref_rejected": -261.4393310546875,
"logps/rejected": -440.20697021484375,
"loss": 2.0677,
"margin_dpo/margin_mean": 68.36013793945312,
"margin_dpo/margin_std": 98.72421264648438,
"step": 383
},
{
"epoch": 0.8041884816753927,
"fcm_dpo/beta": 0.009106705896556377,
"fcm_dpo/delta": 0.07852490246295929,
"fcm_dpo/margin": 57.71418380737305,
"fcm_dpo/q_t": 0.3911736011505127,
"grad_norm": 36.492897033691406,
"learning_rate": 5.6928985782982524e-08,
"logits/chosen": -0.8531427979469299,
"logits/rejected": -0.8563142418861389,
"logps/chosen": -393.4993591308594,
"logps/ref_chosen": -274.5699768066406,
"logps/ref_rejected": -285.8210754394531,
"logps/rejected": -462.46466064453125,
"loss": 2.1876,
"margin_dpo/margin_mean": 57.71417999267578,
"margin_dpo/margin_std": 94.16452026367188,
"step": 384
},
{
"epoch": 0.806282722513089,
"fcm_dpo/beta": 0.009165641851723194,
"fcm_dpo/delta": 0.06450905650854111,
"fcm_dpo/margin": 58.80182647705078,
"fcm_dpo/q_t": 0.3869282603263855,
"grad_norm": 50.559879302978516,
"learning_rate": 5.57711295439732e-08,
"logits/chosen": -0.8153691291809082,
"logits/rejected": -0.8172346353530884,
"logps/chosen": -403.4466552734375,
"logps/ref_chosen": -284.15057373046875,
"logps/ref_rejected": -244.8896942138672,
"logps/rejected": -422.9875793457031,
"loss": 2.1297,
"margin_dpo/margin_mean": 58.80182647705078,
"margin_dpo/margin_std": 88.29859924316406,
"step": 385
},
{
"epoch": 0.8083769633507853,
"fcm_dpo/beta": 0.009059401229023933,
"fcm_dpo/delta": -0.11658855527639389,
"fcm_dpo/margin": 78.18203735351562,
"fcm_dpo/q_t": 0.35379326343536377,
"grad_norm": 40.985050201416016,
"learning_rate": 5.4623689209832484e-08,
"logits/chosen": -0.7967289686203003,
"logits/rejected": -0.7979340553283691,
"logps/chosen": -428.823974609375,
"logps/ref_chosen": -320.17626953125,
"logps/ref_rejected": -302.05322265625,
"logps/rejected": -488.8829650878906,
"loss": 1.9319,
"margin_dpo/margin_mean": 78.18203735351562,
"margin_dpo/margin_std": 97.14153289794922,
"step": 386
},
{
"epoch": 0.8104712041884817,
"fcm_dpo/beta": 0.009046727791428566,
"fcm_dpo/delta": -0.013999204151332378,
"fcm_dpo/margin": 67.77481079101562,
"fcm_dpo/q_t": 0.37105774879455566,
"grad_norm": 41.09712600708008,
"learning_rate": 5.3486726314303175e-08,
"logits/chosen": -0.8473271727561951,
"logits/rejected": -0.8514444828033447,
"logps/chosen": -387.8977355957031,
"logps/ref_chosen": -272.2801818847656,
"logps/ref_rejected": -265.1622314453125,
"logps/rejected": -448.55462646484375,
"loss": 2.0708,
"margin_dpo/margin_mean": 67.77481079101562,
"margin_dpo/margin_std": 97.33201599121094,
"step": 387
},
{
"epoch": 0.812565445026178,
"fcm_dpo/beta": 0.009046727791428566,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 48.02550506591797,
"fcm_dpo/q_t": 0.4109206199645996,
"grad_norm": 58.431819915771484,
"learning_rate": 5.2360301829254745e-08,
"logits/chosen": -0.8235338926315308,
"logits/rejected": -0.8178836703300476,
"logps/chosen": -403.1437683105469,
"logps/ref_chosen": -272.5313415527344,
"logps/ref_rejected": -239.5546875,
"logps/rejected": -418.192626953125,
"loss": 2.3761,
"margin_dpo/margin_mean": 48.025508880615234,
"margin_dpo/margin_std": 101.64024353027344,
"step": 388
},
{
"epoch": 0.8146596858638744,
"fcm_dpo/beta": 0.009059837087988853,
"fcm_dpo/delta": 0.014480167999863625,
"fcm_dpo/margin": 64.72172546386719,
"fcm_dpo/q_t": 0.3822452127933502,
"grad_norm": 43.22749328613281,
"learning_rate": 5.1244476161413806e-08,
"logits/chosen": -0.8432326316833496,
"logits/rejected": -0.8447349667549133,
"logps/chosen": -404.267333984375,
"logps/ref_chosen": -281.08929443359375,
"logps/ref_rejected": -246.50379943847656,
"logps/rejected": -434.403564453125,
"loss": 2.1553,
"margin_dpo/margin_mean": 64.72173309326172,
"margin_dpo/margin_std": 103.07266235351562,
"step": 389
},
{
"epoch": 0.8167539267015707,
"fcm_dpo/beta": 0.00906858965754509,
"fcm_dpo/delta": 0.009656627662479877,
"fcm_dpo/margin": 65.16048431396484,
"fcm_dpo/q_t": 0.3776322901248932,
"grad_norm": 38.573402404785156,
"learning_rate": 5.013930914912476e-08,
"logits/chosen": -0.8598880767822266,
"logits/rejected": -0.8659382462501526,
"logps/chosen": -405.0728759765625,
"logps/ref_chosen": -283.98748779296875,
"logps/ref_rejected": -283.4721374511719,
"logps/rejected": -469.718017578125,
"loss": 2.0945,
"margin_dpo/margin_mean": 65.16048431396484,
"margin_dpo/margin_std": 95.23829650878906,
"step": 390
},
{
"epoch": 0.818848167539267,
"fcm_dpo/beta": 0.009060530923306942,
"fcm_dpo/delta": -0.008890703320503235,
"fcm_dpo/margin": 67.142822265625,
"fcm_dpo/q_t": 0.3751678168773651,
"grad_norm": 42.2318229675293,
"learning_rate": 4.904486005914027e-08,
"logits/chosen": -0.8241249322891235,
"logits/rejected": -0.8189826011657715,
"logps/chosen": -413.9036865234375,
"logps/ref_chosen": -283.86138916015625,
"logps/ref_rejected": -263.5279541015625,
"logps/rejected": -460.71307373046875,
"loss": 2.0708,
"margin_dpo/margin_mean": 67.142822265625,
"margin_dpo/margin_std": 97.04931640625,
"step": 391
},
{
"epoch": 0.8209424083769633,
"fcm_dpo/beta": 0.008920765481889248,
"fcm_dpo/delta": -0.15545937418937683,
"fcm_dpo/margin": 83.379150390625,
"fcm_dpo/q_t": 0.34487900137901306,
"grad_norm": 45.39931869506836,
"learning_rate": 4.796118758344353e-08,
"logits/chosen": -0.8124989867210388,
"logits/rejected": -0.8294109106063843,
"logps/chosen": -427.7132873535156,
"logps/ref_chosen": -310.0700988769531,
"logps/ref_rejected": -252.89981079101562,
"logps/rejected": -453.9221496582031,
"loss": 1.8773,
"margin_dpo/margin_mean": 83.37914276123047,
"margin_dpo/margin_std": 96.69622802734375,
"step": 392
},
{
"epoch": 0.8230366492146597,
"fcm_dpo/beta": 0.00896378792822361,
"fcm_dpo/delta": 0.04811134934425354,
"fcm_dpo/margin": 61.86561584472656,
"fcm_dpo/q_t": 0.38522768020629883,
"grad_norm": 52.68271255493164,
"learning_rate": 4.688834983610082e-08,
"logits/chosen": -0.8560529947280884,
"logits/rejected": -0.8472511768341064,
"logps/chosen": -406.266357421875,
"logps/ref_chosen": -286.7156677246094,
"logps/ref_rejected": -230.00344848632812,
"logps/rejected": -411.4197692871094,
"loss": 2.1431,
"margin_dpo/margin_mean": 61.8656120300293,
"margin_dpo/margin_std": 95.9111328125,
"step": 393
},
{
"epoch": 0.8251308900523561,
"fcm_dpo/beta": 0.009046666324138641,
"fcm_dpo/delta": 0.09203460067510605,
"fcm_dpo/margin": 56.668609619140625,
"fcm_dpo/q_t": 0.3996460437774658,
"grad_norm": 37.54924392700195,
"learning_rate": 4.582640435014459e-08,
"logits/chosen": -0.8843240737915039,
"logits/rejected": -0.8835663795471191,
"logps/chosen": -445.21319580078125,
"logps/ref_chosen": -325.9934387207031,
"logps/ref_rejected": -317.4176940917969,
"logps/rejected": -493.3060607910156,
"loss": 2.2589,
"margin_dpo/margin_mean": 56.668609619140625,
"margin_dpo/margin_std": 101.60933685302734,
"step": 394
},
{
"epoch": 0.8272251308900523,
"fcm_dpo/beta": 0.008996936492621899,
"fcm_dpo/delta": -0.055121734738349915,
"fcm_dpo/margin": 72.41581726074219,
"fcm_dpo/q_t": 0.3690818250179291,
"grad_norm": 37.268280029296875,
"learning_rate": 4.477540807448832e-08,
"logits/chosen": -0.8178912997245789,
"logits/rejected": -0.8257204294204712,
"logps/chosen": -386.1897277832031,
"logps/ref_chosen": -268.90081787109375,
"logps/ref_rejected": -272.8436279296875,
"logps/rejected": -462.54833984375,
"loss": 2.0065,
"margin_dpo/margin_mean": 72.41581726074219,
"margin_dpo/margin_std": 99.58771514892578,
"step": 395
},
{
"epoch": 0.8293193717277487,
"fcm_dpo/beta": 0.009008500725030899,
"fcm_dpo/delta": 0.01284549105912447,
"fcm_dpo/margin": 65.26160430908203,
"fcm_dpo/q_t": 0.3776703476905823,
"grad_norm": 39.83456039428711,
"learning_rate": 4.373541737087263e-08,
"logits/chosen": -0.8394569158554077,
"logits/rejected": -0.8310561776161194,
"logps/chosen": -409.4137268066406,
"logps/ref_chosen": -291.19830322265625,
"logps/ref_rejected": -253.27821350097656,
"logps/rejected": -436.7552490234375,
"loss": 2.0994,
"margin_dpo/margin_mean": 65.26160430908203,
"margin_dpo/margin_std": 95.30459594726562,
"step": 396
},
{
"epoch": 0.831413612565445,
"fcm_dpo/beta": 0.0090852165594697,
"fcm_dpo/delta": 0.08479887247085571,
"fcm_dpo/margin": 57.1905517578125,
"fcm_dpo/q_t": 0.39606374502182007,
"grad_norm": 45.396202087402344,
"learning_rate": 4.270648801084295e-08,
"logits/chosen": -0.8564621210098267,
"logits/rejected": -0.8406748175621033,
"logps/chosen": -429.6406555175781,
"logps/ref_chosen": -309.8224182128906,
"logps/ref_rejected": -291.907470703125,
"logps/rejected": -468.91632080078125,
"loss": 2.2748,
"margin_dpo/margin_mean": 57.1905517578125,
"margin_dpo/margin_std": 106.34642791748047,
"step": 397
},
{
"epoch": 0.8335078534031414,
"fcm_dpo/beta": 0.009198704734444618,
"fcm_dpo/delta": 0.12414112687110901,
"fcm_dpo/margin": 52.37727355957031,
"fcm_dpo/q_t": 0.3993566632270813,
"grad_norm": 54.71518325805664,
"learning_rate": 4.168867517275806e-08,
"logits/chosen": -0.7654838562011719,
"logits/rejected": -0.7991914749145508,
"logps/chosen": -429.72418212890625,
"logps/ref_chosen": -297.8135070800781,
"logps/ref_rejected": -270.50091552734375,
"logps/rejected": -454.78887939453125,
"loss": 2.433,
"margin_dpo/margin_mean": 52.377281188964844,
"margin_dpo/margin_std": 115.62989807128906,
"step": 398
},
{
"epoch": 0.8356020942408376,
"fcm_dpo/beta": 0.00922246091067791,
"fcm_dpo/delta": 0.025792479515075684,
"fcm_dpo/margin": 62.42264938354492,
"fcm_dpo/q_t": 0.38247746229171753,
"grad_norm": 42.232269287109375,
"learning_rate": 4.0682033438831584e-08,
"logits/chosen": -0.8487710356712341,
"logits/rejected": -0.8146480917930603,
"logps/chosen": -420.9412841796875,
"logps/ref_chosen": -292.8467712402344,
"logps/ref_rejected": -268.3638916015625,
"logps/rejected": -458.881103515625,
"loss": 2.1554,
"margin_dpo/margin_mean": 62.42264938354492,
"margin_dpo/margin_std": 98.82135009765625,
"step": 399
},
{
"epoch": 0.837696335078534,
"fcm_dpo/beta": 0.009250282309949398,
"fcm_dpo/delta": 0.030122073367238045,
"fcm_dpo/margin": 61.79239273071289,
"fcm_dpo/q_t": 0.3824981153011322,
"grad_norm": 51.098445892333984,
"learning_rate": 3.968661679220467e-08,
"logits/chosen": -0.8918007016181946,
"logits/rejected": -0.8914182782173157,
"logps/chosen": -386.5912780761719,
"logps/ref_chosen": -263.6764221191406,
"logps/ref_rejected": -258.670166015625,
"logps/rejected": -443.37744140625,
"loss": 2.1912,
"margin_dpo/margin_mean": 61.792388916015625,
"margin_dpo/margin_std": 102.4735107421875,
"step": 400
},
{
"epoch": 0.8397905759162304,
"fcm_dpo/beta": 0.00922383088618517,
"fcm_dpo/delta": -0.02863612025976181,
"fcm_dpo/margin": 67.95858764648438,
"fcm_dpo/q_t": 0.3708932399749756,
"grad_norm": 54.48740005493164,
"learning_rate": 3.8702478614051345e-08,
"logits/chosen": -0.8366138935089111,
"logits/rejected": -0.8357076048851013,
"logps/chosen": -438.028564453125,
"logps/ref_chosen": -318.2853088378906,
"logps/ref_rejected": -293.745849609375,
"logps/rejected": -481.4476623535156,
"loss": 2.0431,
"margin_dpo/margin_mean": 67.95858001708984,
"margin_dpo/margin_std": 95.96973419189453,
"step": 401
},
{
"epoch": 0.8418848167539267,
"fcm_dpo/beta": 0.009234806522727013,
"fcm_dpo/delta": 0.011891900561749935,
"fcm_dpo/margin": 63.75963592529297,
"fcm_dpo/q_t": 0.37997788190841675,
"grad_norm": 47.25274658203125,
"learning_rate": 3.772967168071517e-08,
"logits/chosen": -0.8964592814445496,
"logits/rejected": -0.8779340386390686,
"logps/chosen": -425.38006591796875,
"logps/ref_chosen": -309.4278564453125,
"logps/ref_rejected": -282.028564453125,
"logps/rejected": -461.7404479980469,
"loss": 2.0995,
"margin_dpo/margin_mean": 63.75963592529297,
"margin_dpo/margin_std": 96.97467041015625,
"step": 402
},
{
"epoch": 0.8439790575916231,
"fcm_dpo/beta": 0.009065676480531693,
"fcm_dpo/delta": -0.18484215438365936,
"fcm_dpo/margin": 84.98739624023438,
"fcm_dpo/q_t": 0.34054630994796753,
"grad_norm": 41.43680953979492,
"learning_rate": 3.676824816087978e-08,
"logits/chosen": -0.8489100337028503,
"logits/rejected": -0.8344736099243164,
"logps/chosen": -426.6050109863281,
"logps/ref_chosen": -309.02850341796875,
"logps/ref_rejected": -272.9409484863281,
"logps/rejected": -475.5048828125,
"loss": 1.8318,
"margin_dpo/margin_mean": 84.98739624023438,
"margin_dpo/margin_std": 96.16925048828125,
"step": 403
},
{
"epoch": 0.8460732984293193,
"fcm_dpo/beta": 0.009082796983420849,
"fcm_dpo/delta": 0.018867533653974533,
"fcm_dpo/margin": 64.10249328613281,
"fcm_dpo/q_t": 0.3783862590789795,
"grad_norm": 43.79155349731445,
"learning_rate": 3.581825961277074e-08,
"logits/chosen": -0.876073956489563,
"logits/rejected": -0.8583503365516663,
"logps/chosen": -424.343994140625,
"logps/ref_chosen": -297.2837219238281,
"logps/ref_rejected": -256.996826171875,
"logps/rejected": -448.15960693359375,
"loss": 2.1588,
"margin_dpo/margin_mean": 64.10249328613281,
"margin_dpo/margin_std": 102.44731903076172,
"step": 404
},
{
"epoch": 0.8481675392670157,
"fcm_dpo/beta": 0.00904757808893919,
"fcm_dpo/delta": -0.03885126858949661,
"fcm_dpo/margin": 70.3364028930664,
"fcm_dpo/q_t": 0.36864298582077026,
"grad_norm": 34.74131393432617,
"learning_rate": 3.487975698139084e-08,
"logits/chosen": -0.8143957853317261,
"logits/rejected": -0.8225845098495483,
"logps/chosen": -375.9350280761719,
"logps/ref_chosen": -257.96533203125,
"logps/ref_rejected": -255.811279296875,
"logps/rejected": -444.11737060546875,
"loss": 2.0313,
"margin_dpo/margin_mean": 70.3364028930664,
"margin_dpo/margin_std": 97.38545227050781,
"step": 405
},
{
"epoch": 0.8502617801047121,
"fcm_dpo/beta": 0.009167040698230267,
"fcm_dpo/delta": 0.13117404282093048,
"fcm_dpo/margin": 51.817840576171875,
"fcm_dpo/q_t": 0.3999882936477661,
"grad_norm": 49.4265251159668,
"learning_rate": 3.3952790595787986e-08,
"logits/chosen": -0.8473017811775208,
"logits/rejected": -0.8291510939598083,
"logps/chosen": -416.92425537109375,
"logps/ref_chosen": -285.1810302734375,
"logps/ref_rejected": -264.422607421875,
"logps/rejected": -447.983642578125,
"loss": 2.2432,
"margin_dpo/margin_mean": 51.817840576171875,
"margin_dpo/margin_std": 89.82006072998047,
"step": 406
},
{
"epoch": 0.8523560209424084,
"fcm_dpo/beta": 0.00911739468574524,
"fcm_dpo/delta": -0.054304175078868866,
"fcm_dpo/margin": 71.37572479248047,
"fcm_dpo/q_t": 0.3691890239715576,
"grad_norm": 51.377384185791016,
"learning_rate": 3.303741016635614e-08,
"logits/chosen": -0.828616201877594,
"logits/rejected": -0.8513669967651367,
"logps/chosen": -398.3943176269531,
"logps/ref_chosen": -265.23809814453125,
"logps/ref_rejected": -219.02969360351562,
"logps/rejected": -423.5616149902344,
"loss": 2.0574,
"margin_dpo/margin_mean": 71.37572479248047,
"margin_dpo/margin_std": 102.88460540771484,
"step": 407
},
{
"epoch": 0.8544502617801047,
"fcm_dpo/beta": 0.009092845022678375,
"fcm_dpo/delta": -0.02696285769343376,
"fcm_dpo/margin": 68.76557159423828,
"fcm_dpo/q_t": 0.3719443380832672,
"grad_norm": 35.444427490234375,
"learning_rate": 3.2133664782169944e-08,
"logits/chosen": -0.8586457371711731,
"logits/rejected": -0.8522211909294128,
"logps/chosen": -416.1060485839844,
"logps/ref_chosen": -296.97259521484375,
"logps/ref_rejected": -295.4610595703125,
"logps/rejected": -483.3600769042969,
"loss": 2.0512,
"margin_dpo/margin_mean": 68.76557159423828,
"margin_dpo/margin_std": 94.53829956054688,
"step": 408
},
{
"epoch": 0.856544502617801,
"fcm_dpo/beta": 0.009090198203921318,
"fcm_dpo/delta": -0.002911497838795185,
"fcm_dpo/margin": 66.30614471435547,
"fcm_dpo/q_t": 0.3767385482788086,
"grad_norm": 43.50408172607422,
"learning_rate": 3.12416029083514e-08,
"logits/chosen": -0.8403241634368896,
"logits/rejected": -0.8295544385910034,
"logps/chosen": -416.3131408691406,
"logps/ref_chosen": -287.3793640136719,
"logps/ref_rejected": -275.7888488769531,
"logps/rejected": -471.0287780761719,
"loss": 2.1701,
"margin_dpo/margin_mean": 66.30614471435547,
"margin_dpo/margin_std": 109.85335540771484,
"step": 409
},
{
"epoch": 0.8586387434554974,
"fcm_dpo/beta": 0.009145773947238922,
"fcm_dpo/delta": 0.06095205247402191,
"fcm_dpo/margin": 59.29991149902344,
"fcm_dpo/q_t": 0.3891522288322449,
"grad_norm": 44.5724983215332,
"learning_rate": 3.036127238347164e-08,
"logits/chosen": -0.852216362953186,
"logits/rejected": -0.8620251417160034,
"logps/chosen": -403.9638366699219,
"logps/ref_chosen": -281.7801513671875,
"logps/ref_rejected": -266.75018310546875,
"logps/rejected": -448.2337646484375,
"loss": 2.2124,
"margin_dpo/margin_mean": 59.29990768432617,
"margin_dpo/margin_std": 101.16648864746094,
"step": 410
},
{
"epoch": 0.8607329842931937,
"fcm_dpo/beta": 0.00907374732196331,
"fcm_dpo/delta": -0.07906590402126312,
"fcm_dpo/margin": 74.2491455078125,
"fcm_dpo/q_t": 0.3606072664260864,
"grad_norm": 39.0561637878418,
"learning_rate": 2.9492720416985e-08,
"logits/chosen": -0.8626891374588013,
"logits/rejected": -0.8336524963378906,
"logps/chosen": -401.28741455078125,
"logps/ref_chosen": -281.5872497558594,
"logps/ref_rejected": -254.79075622558594,
"logps/rejected": -448.7400817871094,
"loss": 1.9545,
"margin_dpo/margin_mean": 74.2491455078125,
"margin_dpo/margin_std": 95.4036865234375,
"step": 411
},
{
"epoch": 0.86282722513089,
"fcm_dpo/beta": 0.009142782539129257,
"fcm_dpo/delta": 0.07579462230205536,
"fcm_dpo/margin": 57.77165222167969,
"fcm_dpo/q_t": 0.394888699054718,
"grad_norm": 42.87049865722656,
"learning_rate": 2.863599358669755e-08,
"logits/chosen": -0.8131078481674194,
"logits/rejected": -0.8211822509765625,
"logps/chosen": -411.53662109375,
"logps/ref_chosen": -276.5341491699219,
"logps/ref_rejected": -273.8749694824219,
"logps/rejected": -466.64910888671875,
"loss": 2.2267,
"margin_dpo/margin_mean": 57.77164840698242,
"margin_dpo/margin_std": 97.42094421386719,
"step": 412
},
{
"epoch": 0.8649214659685864,
"fcm_dpo/beta": 0.00922452099621296,
"fcm_dpo/delta": 0.08900528401136398,
"fcm_dpo/margin": 55.8905029296875,
"fcm_dpo/q_t": 0.3945767879486084,
"grad_norm": 42.23957824707031,
"learning_rate": 2.7791137836269158e-08,
"logits/chosen": -0.8459134697914124,
"logits/rejected": -0.8397260904312134,
"logps/chosen": -397.4471435546875,
"logps/ref_chosen": -271.27459716796875,
"logps/ref_rejected": -270.1611633300781,
"logps/rejected": -452.2242431640625,
"loss": 2.2211,
"margin_dpo/margin_mean": 55.8905029296875,
"margin_dpo/margin_std": 96.78380584716797,
"step": 413
},
{
"epoch": 0.8670157068062827,
"fcm_dpo/beta": 0.00917508639395237,
"fcm_dpo/delta": -0.05373471975326538,
"fcm_dpo/margin": 70.86923217773438,
"fcm_dpo/q_t": 0.3667352795600891,
"grad_norm": 43.44190979003906,
"learning_rate": 2.6958198472749717e-08,
"logits/chosen": -0.8781423568725586,
"logits/rejected": -0.8842149972915649,
"logps/chosen": -423.2403564453125,
"logps/ref_chosen": -297.11505126953125,
"logps/ref_rejected": -271.7034606933594,
"logps/rejected": -468.6979675292969,
"loss": 2.0281,
"margin_dpo/margin_mean": 70.86922454833984,
"margin_dpo/margin_std": 98.45124816894531,
"step": 414
},
{
"epoch": 0.8691099476439791,
"fcm_dpo/beta": 0.00919144693762064,
"fcm_dpo/delta": 0.01781538687646389,
"fcm_dpo/margin": 63.45276641845703,
"fcm_dpo/q_t": 0.3753476142883301,
"grad_norm": 40.2779426574707,
"learning_rate": 2.613722016414943e-08,
"logits/chosen": -0.8788604736328125,
"logits/rejected": -0.8692939281463623,
"logps/chosen": -420.3956298828125,
"logps/ref_chosen": -297.69268798828125,
"logps/ref_rejected": -279.05035400390625,
"logps/rejected": -465.2060852050781,
"loss": 2.0544,
"margin_dpo/margin_mean": 63.45276641845703,
"margin_dpo/margin_std": 86.41554260253906,
"step": 415
},
{
"epoch": 0.8712041884816754,
"fcm_dpo/beta": 0.009103688411414623,
"fcm_dpo/delta": -0.09593759477138519,
"fcm_dpo/margin": 75.71578216552734,
"fcm_dpo/q_t": 0.35760408639907837,
"grad_norm": 38.380680084228516,
"learning_rate": 2.5328246937043525e-08,
"logits/chosen": -0.870101809501648,
"logits/rejected": -0.8800961375236511,
"logps/chosen": -427.42657470703125,
"logps/ref_chosen": -311.8255615234375,
"logps/ref_rejected": -268.609375,
"logps/rejected": -459.9261474609375,
"loss": 1.991,
"margin_dpo/margin_mean": 75.71578216552734,
"margin_dpo/margin_std": 101.31974792480469,
"step": 416
},
{
"epoch": 0.8732984293193717,
"fcm_dpo/beta": 0.009087484329938889,
"fcm_dpo/delta": -0.017814848572015762,
"fcm_dpo/margin": 67.8642349243164,
"fcm_dpo/q_t": 0.37603920698165894,
"grad_norm": 40.87303924560547,
"learning_rate": 2.4531322174210973e-08,
"logits/chosen": -0.8256704807281494,
"logits/rejected": -0.8271574378013611,
"logps/chosen": -434.3507995605469,
"logps/ref_chosen": -310.43682861328125,
"logps/ref_rejected": -277.15576171875,
"logps/rejected": -468.9339599609375,
"loss": 2.1151,
"margin_dpo/margin_mean": 67.8642349243164,
"margin_dpo/margin_std": 105.58177185058594,
"step": 417
},
{
"epoch": 0.875392670157068,
"fcm_dpo/beta": 0.00911343190819025,
"fcm_dpo/delta": 0.02851286716759205,
"fcm_dpo/margin": 62.88727569580078,
"fcm_dpo/q_t": 0.38212618231773376,
"grad_norm": 43.50324630737305,
"learning_rate": 2.3746488612308295e-08,
"logits/chosen": -0.8227719068527222,
"logits/rejected": -0.8056213855743408,
"logps/chosen": -415.47357177734375,
"logps/ref_chosen": -278.4959411621094,
"logps/ref_rejected": -276.5561828613281,
"logps/rejected": -476.4211120605469,
"loss": 2.1628,
"margin_dpo/margin_mean": 62.88727951049805,
"margin_dpo/margin_std": 99.70533752441406,
"step": 418
},
{
"epoch": 0.8774869109947644,
"fcm_dpo/beta": 0.009050325490534306,
"fcm_dpo/delta": -0.06948597729206085,
"fcm_dpo/margin": 73.46145629882812,
"fcm_dpo/q_t": 0.36211949586868286,
"grad_norm": 42.223331451416016,
"learning_rate": 2.297378833957761e-08,
"logits/chosen": -0.8671696186065674,
"logits/rejected": -0.8473471403121948,
"logps/chosen": -433.8696594238281,
"logps/ref_chosen": -298.90020751953125,
"logps/ref_rejected": -246.15478515625,
"logps/rejected": -454.58563232421875,
"loss": 2.065,
"margin_dpo/margin_mean": 73.46146392822266,
"margin_dpo/margin_std": 108.15577697753906,
"step": 419
},
{
"epoch": 0.8795811518324608,
"fcm_dpo/beta": 0.008941629901528358,
"fcm_dpo/delta": -0.1208285391330719,
"fcm_dpo/margin": 79.64669799804688,
"fcm_dpo/q_t": 0.35645708441734314,
"grad_norm": 66.53260803222656,
"learning_rate": 2.2213262793589482e-08,
"logits/chosen": -0.8046590685844421,
"logits/rejected": -0.7820172309875488,
"logps/chosen": -396.0497741699219,
"logps/ref_chosen": -264.5608825683594,
"logps/ref_rejected": -245.67759704589844,
"logps/rejected": -456.8131408691406,
"loss": 2.0414,
"margin_dpo/margin_mean": 79.6467056274414,
"margin_dpo/margin_std": 116.9892578125,
"step": 420
},
{
"epoch": 0.881675392670157,
"fcm_dpo/beta": 0.008944323286414146,
"fcm_dpo/delta": 0.0030114029068499804,
"fcm_dpo/margin": 66.76507568359375,
"fcm_dpo/q_t": 0.3728353679180145,
"grad_norm": 47.43436050415039,
"learning_rate": 2.1464952759020856e-08,
"logits/chosen": -0.8856263756752014,
"logits/rejected": -0.8722842335700989,
"logps/chosen": -418.6994934082031,
"logps/ref_chosen": -297.70501708984375,
"logps/ref_rejected": -243.75355529785156,
"logps/rejected": -431.51312255859375,
"loss": 2.0726,
"margin_dpo/margin_mean": 66.76507568359375,
"margin_dpo/margin_std": 94.80990600585938,
"step": 421
},
{
"epoch": 0.8837696335078534,
"fcm_dpo/beta": 0.00891195796430111,
"fcm_dpo/delta": -0.036251250654459,
"fcm_dpo/margin": 71.1346435546875,
"fcm_dpo/q_t": 0.37105536460876465,
"grad_norm": 37.76618576049805,
"learning_rate": 2.07288983654679e-08,
"logits/chosen": -0.7624043822288513,
"logits/rejected": -0.8073043823242188,
"logps/chosen": -412.7300720214844,
"logps/ref_chosen": -288.3587646484375,
"logps/ref_rejected": -256.444580078125,
"logps/rejected": -451.9505615234375,
"loss": 2.0861,
"margin_dpo/margin_mean": 71.1346435546875,
"margin_dpo/margin_std": 105.84291076660156,
"step": 422
},
{
"epoch": 0.8858638743455497,
"fcm_dpo/beta": 0.00884657260030508,
"fcm_dpo/delta": -0.07363896071910858,
"fcm_dpo/margin": 75.58821105957031,
"fcm_dpo/q_t": 0.3611782193183899,
"grad_norm": 43.86180877685547,
"learning_rate": 2.0005139085293942e-08,
"logits/chosen": -0.8781679272651672,
"logits/rejected": -0.8628135323524475,
"logps/chosen": -424.294189453125,
"logps/ref_chosen": -296.0070495605469,
"logps/ref_rejected": -261.3511047363281,
"logps/rejected": -465.22650146484375,
"loss": 1.9891,
"margin_dpo/margin_mean": 75.58820343017578,
"margin_dpo/margin_std": 100.61441040039062,
"step": 423
},
{
"epoch": 0.8879581151832461,
"fcm_dpo/beta": 0.00880428310483694,
"fcm_dpo/delta": -0.047917693853378296,
"fcm_dpo/margin": 73.2394027709961,
"fcm_dpo/q_t": 0.3628760874271393,
"grad_norm": 46.02090072631836,
"learning_rate": 1.9293713731512673e-08,
"logits/chosen": -0.8573895692825317,
"logits/rejected": -0.8584867715835571,
"logps/chosen": -428.28289794921875,
"logps/ref_chosen": -309.421875,
"logps/ref_rejected": -249.15145874023438,
"logps/rejected": -441.25189208984375,
"loss": 1.9749,
"margin_dpo/margin_mean": 73.2394027709961,
"margin_dpo/margin_std": 93.41693115234375,
"step": 424
},
{
"epoch": 0.8900523560209425,
"fcm_dpo/beta": 0.008864101953804493,
"fcm_dpo/delta": 0.06771360337734222,
"fcm_dpo/margin": 60.457664489746094,
"fcm_dpo/q_t": 0.3915264904499054,
"grad_norm": 44.9000358581543,
"learning_rate": 1.8594660455706763e-08,
"logits/chosen": -0.8390822410583496,
"logits/rejected": -0.8460398316383362,
"logps/chosen": -408.5638732910156,
"logps/ref_chosen": -280.50909423828125,
"logps/ref_rejected": -276.8252258300781,
"logps/rejected": -465.3376770019531,
"loss": 2.2276,
"margin_dpo/margin_mean": 60.457664489746094,
"margin_dpo/margin_std": 105.6123275756836,
"step": 425
},
{
"epoch": 0.8921465968586387,
"fcm_dpo/beta": 0.008852999657392502,
"fcm_dpo/delta": -0.012533169239759445,
"fcm_dpo/margin": 69.10267639160156,
"fcm_dpo/q_t": 0.37290841341018677,
"grad_norm": 45.63178634643555,
"learning_rate": 1.7908016745981856e-08,
"logits/chosen": -0.8664529323577881,
"logits/rejected": -0.855957567691803,
"logps/chosen": -424.10943603515625,
"logps/ref_chosen": -292.78521728515625,
"logps/ref_rejected": -255.62908935546875,
"logps/rejected": -456.05596923828125,
"loss": 2.0551,
"margin_dpo/margin_mean": 69.10267639160156,
"margin_dpo/margin_std": 97.69993591308594,
"step": 426
},
{
"epoch": 0.8942408376963351,
"fcm_dpo/beta": 0.008692766539752483,
"fcm_dpo/delta": -0.1826513111591339,
"fcm_dpo/margin": 88.40521240234375,
"fcm_dpo/q_t": 0.34239208698272705,
"grad_norm": 44.900634765625,
"learning_rate": 1.7233819424956247e-08,
"logits/chosen": -0.8505325317382812,
"logits/rejected": -0.8232094645500183,
"logps/chosen": -415.133056640625,
"logps/ref_chosen": -288.7687072753906,
"logps/ref_rejected": -268.503173828125,
"logps/rejected": -483.272705078125,
"loss": 1.9003,
"margin_dpo/margin_mean": 88.40521240234375,
"margin_dpo/margin_std": 106.93820190429688,
"step": 427
},
{
"epoch": 0.8963350785340314,
"fcm_dpo/beta": 0.00860194955021143,
"fcm_dpo/delta": -0.10502413660287857,
"fcm_dpo/margin": 81.10469055175781,
"fcm_dpo/q_t": 0.35469961166381836,
"grad_norm": 44.770870208740234,
"learning_rate": 1.6572104647786245e-08,
"logits/chosen": -0.8194983601570129,
"logits/rejected": -0.8448799848556519,
"logps/chosen": -435.13592529296875,
"logps/ref_chosen": -295.5209655761719,
"logps/ref_rejected": -275.70672607421875,
"logps/rejected": -496.4263916015625,
"loss": 1.9724,
"margin_dpo/margin_mean": 81.10469055175781,
"margin_dpo/margin_std": 106.30459594726562,
"step": 428
},
{
"epoch": 0.8984293193717278,
"fcm_dpo/beta": 0.008581049740314484,
"fcm_dpo/delta": -0.02432570792734623,
"fcm_dpo/margin": 72.57955932617188,
"fcm_dpo/q_t": 0.36753594875335693,
"grad_norm": 74.59688568115234,
"learning_rate": 1.5922907900227017e-08,
"logits/chosen": -0.8204233050346375,
"logits/rejected": -0.8261222243309021,
"logps/chosen": -402.6282043457031,
"logps/ref_chosen": -274.392333984375,
"logps/ref_rejected": -258.58056640625,
"logps/rejected": -459.39605712890625,
"loss": 2.1266,
"margin_dpo/margin_mean": 72.57955932617188,
"margin_dpo/margin_std": 112.30268096923828,
"step": 429
},
{
"epoch": 0.900523560209424,
"fcm_dpo/beta": 0.008623561821877956,
"fcm_dpo/delta": 0.049419913440942764,
"fcm_dpo/margin": 64.16232299804688,
"fcm_dpo/q_t": 0.38646456599235535,
"grad_norm": 44.88824462890625,
"learning_rate": 1.5286263996730026e-08,
"logits/chosen": -0.8728733062744141,
"logits/rejected": -0.8493736982345581,
"logps/chosen": -413.1773986816406,
"logps/ref_chosen": -288.7391357421875,
"logps/ref_rejected": -268.6106262207031,
"logps/rejected": -457.21124267578125,
"loss": 2.1531,
"margin_dpo/margin_mean": 64.1623306274414,
"margin_dpo/margin_std": 101.1588363647461,
"step": 430
},
{
"epoch": 0.9026178010471204,
"fcm_dpo/beta": 0.008623561821877956,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 54.34422302246094,
"fcm_dpo/q_t": 0.40338844060897827,
"grad_norm": 42.61466598510742,
"learning_rate": 1.4662207078575684e-08,
"logits/chosen": -0.8448891639709473,
"logits/rejected": -0.8200463652610779,
"logps/chosen": -403.5398254394531,
"logps/ref_chosen": -275.7247314453125,
"logps/ref_rejected": -268.91949462890625,
"logps/rejected": -451.07879638671875,
"loss": 2.2548,
"margin_dpo/margin_mean": 54.3442268371582,
"margin_dpo/margin_std": 96.5052490234375,
"step": 431
},
{
"epoch": 0.9047120418848168,
"fcm_dpo/beta": 0.00860768835991621,
"fcm_dpo/delta": -0.01842416450381279,
"fcm_dpo/margin": 71.71331024169922,
"fcm_dpo/q_t": 0.3745311498641968,
"grad_norm": 32.758541107177734,
"learning_rate": 1.40507706120426e-08,
"logits/chosen": -0.8849490880966187,
"logits/rejected": -0.8723046779632568,
"logps/chosen": -409.10235595703125,
"logps/ref_chosen": -291.4200744628906,
"logps/ref_rejected": -255.48301696777344,
"logps/rejected": -444.87860107421875,
"loss": 2.0714,
"margin_dpo/margin_mean": 71.71331024169922,
"margin_dpo/margin_std": 105.3165054321289,
"step": 432
},
{
"epoch": 0.9068062827225131,
"fcm_dpo/beta": 0.008653717115521431,
"fcm_dpo/delta": 0.05333186686038971,
"fcm_dpo/margin": 63.50928497314453,
"fcm_dpo/q_t": 0.3824273347854614,
"grad_norm": 41.268856048583984,
"learning_rate": 1.345198738661285e-08,
"logits/chosen": -0.8427897095680237,
"logits/rejected": -0.8407853841781616,
"logps/chosen": -380.2388610839844,
"logps/ref_chosen": -246.22689819335938,
"logps/ref_rejected": -253.65921020507812,
"logps/rejected": -451.1804504394531,
"loss": 2.1241,
"margin_dpo/margin_mean": 63.50928497314453,
"margin_dpo/margin_std": 94.83145141601562,
"step": 433
},
{
"epoch": 0.9089005235602095,
"fcm_dpo/beta": 0.008693271316587925,
"fcm_dpo/delta": 0.045603420585393906,
"fcm_dpo/margin": 64.06455993652344,
"fcm_dpo/q_t": 0.3837374746799469,
"grad_norm": 39.08061599731445,
"learning_rate": 1.2865889513213628e-08,
"logits/chosen": -0.8433459997177124,
"logits/rejected": -0.8529931902885437,
"logps/chosen": -432.81304931640625,
"logps/ref_chosen": -295.4617919921875,
"logps/ref_rejected": -256.24847412109375,
"logps/rejected": -457.6642761230469,
"loss": 2.1212,
"margin_dpo/margin_mean": 64.06456756591797,
"margin_dpo/margin_std": 97.08004760742188,
"step": 434
},
{
"epoch": 0.9109947643979057,
"fcm_dpo/beta": 0.008682639338076115,
"fcm_dpo/delta": -0.012237189337611198,
"fcm_dpo/margin": 70.42655944824219,
"fcm_dpo/q_t": 0.3742543160915375,
"grad_norm": 59.918060302734375,
"learning_rate": 1.2292508422495157e-08,
"logits/chosen": -0.8576679229736328,
"logits/rejected": -0.8467893600463867,
"logps/chosen": -386.19427490234375,
"logps/ref_chosen": -260.7384033203125,
"logps/ref_rejected": -248.5688018798828,
"logps/rejected": -444.45123291015625,
"loss": 2.0355,
"margin_dpo/margin_mean": 70.42656707763672,
"margin_dpo/margin_std": 99.70752716064453,
"step": 435
},
{
"epoch": 0.9130890052356021,
"fcm_dpo/beta": 0.00876124482601881,
"fcm_dpo/delta": 0.09012450277805328,
"fcm_dpo/margin": 58.72356033325195,
"fcm_dpo/q_t": 0.3964012861251831,
"grad_norm": 63.34469223022461,
"learning_rate": 1.1731874863145142e-08,
"logits/chosen": -0.8322401642799377,
"logits/rejected": -0.8262742757797241,
"logps/chosen": -454.0661315917969,
"logps/ref_chosen": -319.322509765625,
"logps/ref_rejected": -299.30450439453125,
"logps/rejected": -492.77166748046875,
"loss": 2.2295,
"margin_dpo/margin_mean": 58.72356033325195,
"margin_dpo/margin_std": 104.90573120117188,
"step": 436
},
{
"epoch": 0.9151832460732985,
"fcm_dpo/beta": 0.008693304844200611,
"fcm_dpo/delta": -0.07784850895404816,
"fcm_dpo/margin": 77.36897277832031,
"fcm_dpo/q_t": 0.3631066679954529,
"grad_norm": 45.64623260498047,
"learning_rate": 1.118401890024001e-08,
"logits/chosen": -0.8639684915542603,
"logits/rejected": -0.853186309337616,
"logps/chosen": -403.3670349121094,
"logps/ref_chosen": -278.8287658691406,
"logps/ref_rejected": -272.5431213378906,
"logps/rejected": -474.4503479003906,
"loss": 2.0081,
"margin_dpo/margin_mean": 77.36898040771484,
"margin_dpo/margin_std": 105.77632141113281,
"step": 437
},
{
"epoch": 0.9172774869109948,
"fcm_dpo/beta": 0.008693304844200611,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 41.027130126953125,
"fcm_dpo/q_t": 0.4252595901489258,
"grad_norm": 48.469844818115234,
"learning_rate": 1.06489699136324e-08,
"logits/chosen": -0.8260249495506287,
"logits/rejected": -0.8411048650741577,
"logps/chosen": -390.3609619140625,
"logps/ref_chosen": -259.3190612792969,
"logps/ref_rejected": -241.00140380859375,
"logps/rejected": -413.0704040527344,
"loss": 2.4809,
"margin_dpo/margin_mean": 41.027130126953125,
"margin_dpo/margin_std": 101.3244400024414,
"step": 438
},
{
"epoch": 0.9193717277486911,
"fcm_dpo/beta": 0.008708984591066837,
"fcm_dpo/delta": 0.018020011484622955,
"fcm_dpo/margin": 66.94577026367188,
"fcm_dpo/q_t": 0.37868958711624146,
"grad_norm": 45.784629821777344,
"learning_rate": 1.0126756596375685e-08,
"logits/chosen": -0.8205324411392212,
"logits/rejected": -0.827317476272583,
"logps/chosen": -388.9625244140625,
"logps/ref_chosen": -257.1243896484375,
"logps/ref_rejected": -243.20416259765625,
"logps/rejected": -441.988037109375,
"loss": 2.0946,
"margin_dpo/margin_mean": 66.94577026367188,
"margin_dpo/margin_std": 98.32827758789062,
"step": 439
},
{
"epoch": 0.9214659685863874,
"fcm_dpo/beta": 0.00882963091135025,
"fcm_dpo/delta": 0.13757984340190887,
"fcm_dpo/margin": 53.09690856933594,
"fcm_dpo/q_t": 0.3998752236366272,
"grad_norm": 41.94895935058594,
"learning_rate": 9.617406953185136e-09,
"logits/chosen": -0.8745940327644348,
"logits/rejected": -0.869692862033844,
"logps/chosen": -446.4129333496094,
"logps/ref_chosen": -307.5315246582031,
"logps/ref_rejected": -264.35064697265625,
"logps/rejected": -456.3289794921875,
"loss": 2.233,
"margin_dpo/margin_mean": 53.09690856933594,
"margin_dpo/margin_std": 88.46170043945312,
"step": 440
},
{
"epoch": 0.9235602094240838,
"fcm_dpo/beta": 0.008752807974815369,
"fcm_dpo/delta": -0.08738674968481064,
"fcm_dpo/margin": 77.8499984741211,
"fcm_dpo/q_t": 0.3549300730228424,
"grad_norm": 40.734012603759766,
"learning_rate": 9.12094829893642e-09,
"logits/chosen": -0.823627233505249,
"logits/rejected": -0.8097070455551147,
"logps/chosen": -435.1826477050781,
"logps/ref_chosen": -309.98199462890625,
"logps/ref_rejected": -297.49810791015625,
"logps/rejected": -500.5487976074219,
"loss": 1.9098,
"margin_dpo/margin_mean": 77.8499984741211,
"margin_dpo/margin_std": 91.17788696289062,
"step": 441
},
{
"epoch": 0.9256544502617801,
"fcm_dpo/beta": 0.008837632834911346,
"fcm_dpo/delta": 0.09644545614719391,
"fcm_dpo/margin": 57.5306282043457,
"fcm_dpo/q_t": 0.39469730854034424,
"grad_norm": 40.97517776489258,
"learning_rate": 8.637407257200496e-09,
"logits/chosen": -0.8937469124794006,
"logits/rejected": -0.8575794696807861,
"logps/chosen": -419.60052490234375,
"logps/ref_chosen": -278.9791564941406,
"logps/ref_rejected": -242.87661743164062,
"logps/rejected": -441.02862548828125,
"loss": 2.2569,
"margin_dpo/margin_mean": 57.53062438964844,
"margin_dpo/margin_std": 102.94535064697266,
"step": 442
},
{
"epoch": 0.9277486910994764,
"fcm_dpo/beta": 0.008864030241966248,
"fcm_dpo/delta": 0.029824750497937202,
"fcm_dpo/margin": 64.51673889160156,
"fcm_dpo/q_t": 0.37917160987854004,
"grad_norm": 44.149391174316406,
"learning_rate": 8.166809758815895e-09,
"logits/chosen": -0.8136034607887268,
"logits/rejected": -0.8338358998298645,
"logps/chosen": -402.213623046875,
"logps/ref_chosen": -273.55902099609375,
"logps/ref_rejected": -264.0243835449219,
"logps/rejected": -457.1957092285156,
"loss": 2.1235,
"margin_dpo/margin_mean": 64.51673889160156,
"margin_dpo/margin_std": 97.72688293457031,
"step": 443
},
{
"epoch": 0.9298429319371728,
"fcm_dpo/beta": 0.008833720348775387,
"fcm_dpo/delta": -0.034253042191267014,
"fcm_dpo/margin": 71.55357360839844,
"fcm_dpo/q_t": 0.3740062713623047,
"grad_norm": 44.894142150878906,
"learning_rate": 7.709181040498253e-09,
"logits/chosen": -0.8303347826004028,
"logits/rejected": -0.8188279271125793,
"logps/chosen": -426.9786376953125,
"logps/ref_chosen": -298.1441650390625,
"logps/ref_rejected": -268.0523681640625,
"logps/rejected": -468.4404296875,
"loss": 2.1326,
"margin_dpo/margin_mean": 71.55357360839844,
"margin_dpo/margin_std": 113.65848541259766,
"step": 444
},
{
"epoch": 0.9319371727748691,
"fcm_dpo/beta": 0.008890610188245773,
"fcm_dpo/delta": 0.06419478356838226,
"fcm_dpo/margin": 60.654537200927734,
"fcm_dpo/q_t": 0.3909081518650055,
"grad_norm": 41.07414245605469,
"learning_rate": 7.2645456434869965e-09,
"logits/chosen": -0.8898538947105408,
"logits/rejected": -0.903307318687439,
"logps/chosen": -383.76568603515625,
"logps/ref_chosen": -254.54071044921875,
"logps/ref_rejected": -264.2383728027344,
"logps/rejected": -454.117919921875,
"loss": 2.2104,
"margin_dpo/margin_mean": 60.654537200927734,
"margin_dpo/margin_std": 105.50547790527344,
"step": 445
},
{
"epoch": 0.9340314136125655,
"fcm_dpo/beta": 0.008875961415469646,
"fcm_dpo/delta": -0.016490750014781952,
"fcm_dpo/margin": 69.34178161621094,
"fcm_dpo/q_t": 0.3712490200996399,
"grad_norm": 42.30801010131836,
"learning_rate": 6.832927412229017e-09,
"logits/chosen": -0.8205384612083435,
"logits/rejected": -0.8248943090438843,
"logps/chosen": -432.05029296875,
"logps/ref_chosen": -306.7224426269531,
"logps/ref_rejected": -266.3731689453125,
"logps/rejected": -461.04278564453125,
"loss": 2.0884,
"margin_dpo/margin_mean": 69.34178161621094,
"margin_dpo/margin_std": 102.84835815429688,
"step": 446
},
{
"epoch": 0.9361256544502617,
"fcm_dpo/beta": 0.008795022964477539,
"fcm_dpo/delta": -0.09160717576742172,
"fcm_dpo/margin": 77.91912841796875,
"fcm_dpo/q_t": 0.3552432656288147,
"grad_norm": 37.48695755004883,
"learning_rate": 6.414349493100129e-09,
"logits/chosen": -0.8199048638343811,
"logits/rejected": -0.8173272609710693,
"logps/chosen": -382.1482238769531,
"logps/ref_chosen": -260.5173034667969,
"logps/ref_rejected": -236.46876525878906,
"logps/rejected": -436.018798828125,
"loss": 1.9044,
"margin_dpo/margin_mean": 77.91912841796875,
"margin_dpo/margin_std": 91.65718078613281,
"step": 447
},
{
"epoch": 0.9382198952879581,
"fcm_dpo/beta": 0.008778293617069721,
"fcm_dpo/delta": -0.019039252772927284,
"fcm_dpo/margin": 70.38517761230469,
"fcm_dpo/q_t": 0.37277084589004517,
"grad_norm": 51.16434860229492,
"learning_rate": 6.0088343331638756e-09,
"logits/chosen": -0.8248725533485413,
"logits/rejected": -0.8216261863708496,
"logps/chosen": -401.23046875,
"logps/ref_chosen": -268.78704833984375,
"logps/ref_rejected": -262.1793212890625,
"logps/rejected": -465.0079040527344,
"loss": 2.0479,
"margin_dpo/margin_mean": 70.38518524169922,
"margin_dpo/margin_std": 98.59706115722656,
"step": 448
},
{
"epoch": 0.9403141361256544,
"fcm_dpo/beta": 0.008752609603106976,
"fcm_dpo/delta": -0.029301224276423454,
"fcm_dpo/margin": 71.6883316040039,
"fcm_dpo/q_t": 0.3656477928161621,
"grad_norm": 51.05447769165039,
"learning_rate": 5.616403678967624e-09,
"logits/chosen": -0.9058056473731995,
"logits/rejected": -0.8970423936843872,
"logps/chosen": -450.223388671875,
"logps/ref_chosen": -330.9514465332031,
"logps/ref_rejected": -239.7663116455078,
"logps/rejected": -430.7265625,
"loss": 2.0485,
"margin_dpo/margin_mean": 71.6883316040039,
"margin_dpo/margin_std": 100.07246398925781,
"step": 449
},
{
"epoch": 0.9424083769633508,
"fcm_dpo/beta": 0.008786321617662907,
"fcm_dpo/delta": 0.03844215348362923,
"fcm_dpo/margin": 64.15890502929688,
"fcm_dpo/q_t": 0.3801519274711609,
"grad_norm": 42.531436920166016,
"learning_rate": 5.2370785753763356e-09,
"logits/chosen": -0.8197349309921265,
"logits/rejected": -0.8217321634292603,
"logps/chosen": -420.3738098144531,
"logps/ref_chosen": -284.2654113769531,
"logps/ref_rejected": -250.54696655273438,
"logps/rejected": -450.81427001953125,
"loss": 2.0634,
"margin_dpo/margin_mean": 64.15890502929688,
"margin_dpo/margin_std": 85.9537124633789,
"step": 450
},
{
"epoch": 0.9445026178010472,
"fcm_dpo/beta": 0.008835596963763237,
"fcm_dpo/delta": 0.05592557415366173,
"fcm_dpo/margin": 61.922889709472656,
"fcm_dpo/q_t": 0.3888331353664398,
"grad_norm": 51.49987030029297,
"learning_rate": 4.8708793644441086e-09,
"logits/chosen": -0.8302224278450012,
"logits/rejected": -0.8115442991256714,
"logps/chosen": -444.387451171875,
"logps/ref_chosen": -302.3209228515625,
"logps/ref_rejected": -254.0835418701172,
"logps/rejected": -458.07293701171875,
"loss": 2.1937,
"margin_dpo/margin_mean": 61.922889709472656,
"margin_dpo/margin_std": 103.0977783203125,
"step": 451
},
{
"epoch": 0.9465968586387434,
"fcm_dpo/beta": 0.008819738402962685,
"fcm_dpo/delta": -0.017964571714401245,
"fcm_dpo/margin": 69.9403305053711,
"fcm_dpo/q_t": 0.3747006058692932,
"grad_norm": 56.56977844238281,
"learning_rate": 4.517825684323323e-09,
"logits/chosen": -0.877912700176239,
"logits/rejected": -0.8617779016494751,
"logps/chosen": -424.0169677734375,
"logps/ref_chosen": -299.39215087890625,
"logps/ref_rejected": -284.34954833984375,
"logps/rejected": -478.9146728515625,
"loss": 2.0809,
"margin_dpo/margin_mean": 69.9403305053711,
"margin_dpo/margin_std": 100.61822509765625,
"step": 452
},
{
"epoch": 0.9486910994764398,
"fcm_dpo/beta": 0.008782096207141876,
"fcm_dpo/delta": -0.04277113825082779,
"fcm_dpo/margin": 72.87870788574219,
"fcm_dpo/q_t": 0.36750224232673645,
"grad_norm": 43.342140197753906,
"learning_rate": 4.1779364682113794e-09,
"logits/chosen": -0.8177959322929382,
"logits/rejected": -0.8149423003196716,
"logps/chosen": -457.3665771484375,
"logps/ref_chosen": -324.65167236328125,
"logps/ref_rejected": -304.15191650390625,
"logps/rejected": -509.7455139160156,
"loss": 2.0149,
"margin_dpo/margin_mean": 72.87870788574219,
"margin_dpo/margin_std": 100.32862854003906,
"step": 453
},
{
"epoch": 0.9507853403141361,
"fcm_dpo/beta": 0.00875700730830431,
"fcm_dpo/delta": -0.0286086592823267,
"fcm_dpo/margin": 71.57843017578125,
"fcm_dpo/q_t": 0.36884886026382446,
"grad_norm": 38.213104248046875,
"learning_rate": 3.851229943335393e-09,
"logits/chosen": -0.8717144131660461,
"logits/rejected": -0.8810984492301941,
"logps/chosen": -429.33184814453125,
"logps/ref_chosen": -299.61175537109375,
"logps/ref_rejected": -303.73187255859375,
"logps/rejected": -505.0303955078125,
"loss": 2.0718,
"margin_dpo/margin_mean": 71.57843017578125,
"margin_dpo/margin_std": 103.91864776611328,
"step": 454
},
{
"epoch": 0.9528795811518325,
"fcm_dpo/beta": 0.00875700730830431,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 54.69949722290039,
"fcm_dpo/q_t": 0.4014514982700348,
"grad_norm": 40.12241744995117,
"learning_rate": 3.5377236299748147e-09,
"logits/chosen": -0.8415601253509521,
"logits/rejected": -0.8486207127571106,
"logps/chosen": -400.47161865234375,
"logps/ref_chosen": -273.6116943359375,
"logps/ref_rejected": -274.45379638671875,
"logps/rejected": -456.01324462890625,
"loss": 2.2778,
"margin_dpo/margin_mean": 54.69949722290039,
"margin_dpo/margin_std": 102.8629150390625,
"step": 455
},
{
"epoch": 0.9549738219895288,
"fcm_dpo/beta": 0.008689460344612598,
"fcm_dpo/delta": -0.07743367552757263,
"fcm_dpo/margin": 77.35903930664062,
"fcm_dpo/q_t": 0.3737419843673706,
"grad_norm": 43.01519775390625,
"learning_rate": 3.2374343405217884e-09,
"logits/chosen": -0.7665938138961792,
"logits/rejected": -0.7807924747467041,
"logps/chosen": -465.2822265625,
"logps/ref_chosen": -322.17193603515625,
"logps/ref_rejected": -294.54461669921875,
"logps/rejected": -515.0139770507812,
"loss": 2.143,
"margin_dpo/margin_mean": 77.35903930664062,
"margin_dpo/margin_std": 131.53829956054688,
"step": 456
},
{
"epoch": 0.9570680628272251,
"fcm_dpo/beta": 0.008638950996100903,
"fcm_dpo/delta": -0.05829649791121483,
"fcm_dpo/margin": 75.75804138183594,
"fcm_dpo/q_t": 0.358904629945755,
"grad_norm": 39.71597671508789,
"learning_rate": 2.9503781785795713e-09,
"logits/chosen": -0.8269126415252686,
"logits/rejected": -0.831961452960968,
"logps/chosen": -444.3221435546875,
"logps/ref_chosen": -307.7962341308594,
"logps/ref_rejected": -274.5443420410156,
"logps/rejected": -486.8282775878906,
"loss": 2.0587,
"margin_dpo/margin_mean": 75.75804138183594,
"margin_dpo/margin_std": 109.29644012451172,
"step": 457
},
{
"epoch": 0.9591623036649215,
"fcm_dpo/beta": 0.008662523701786995,
"fcm_dpo/delta": 0.027249781414866447,
"fcm_dpo/margin": 66.298583984375,
"fcm_dpo/q_t": 0.3828786313533783,
"grad_norm": 37.38143539428711,
"learning_rate": 2.6765705380989432e-09,
"logits/chosen": -0.8471473455429077,
"logits/rejected": -0.8355039358139038,
"logps/chosen": -431.59747314453125,
"logps/ref_chosen": -297.0316467285156,
"logps/ref_rejected": -276.110107421875,
"logps/rejected": -476.9745178222656,
"loss": 2.1579,
"margin_dpo/margin_mean": 66.298583984375,
"margin_dpo/margin_std": 106.61160278320312,
"step": 458
},
{
"epoch": 0.9612565445026178,
"fcm_dpo/beta": 0.008732744492590427,
"fcm_dpo/delta": 0.08073625713586807,
"fcm_dpo/margin": 59.943702697753906,
"fcm_dpo/q_t": 0.3928934633731842,
"grad_norm": 42.83574676513672,
"learning_rate": 2.416026102552732e-09,
"logits/chosen": -0.8932129144668579,
"logits/rejected": -0.8888850212097168,
"logps/chosen": -421.65960693359375,
"logps/ref_chosen": -293.5252990722656,
"logps/ref_rejected": -289.30279541015625,
"logps/rejected": -477.38079833984375,
"loss": 2.2107,
"margin_dpo/margin_mean": 59.94369888305664,
"margin_dpo/margin_std": 101.44248962402344,
"step": 459
},
{
"epoch": 0.9633507853403142,
"fcm_dpo/beta": 0.008813105523586273,
"fcm_dpo/delta": 0.09160228818655014,
"fcm_dpo/margin": 58.217403411865234,
"fcm_dpo/q_t": 0.3912573456764221,
"grad_norm": 42.28700637817383,
"learning_rate": 2.168758844148272e-09,
"logits/chosen": -0.861108124256134,
"logits/rejected": -0.8648127317428589,
"logps/chosen": -449.7640075683594,
"logps/ref_chosen": -318.7803649902344,
"logps/ref_rejected": -258.8020935058594,
"logps/rejected": -448.0031433105469,
"loss": 2.1995,
"margin_dpo/margin_mean": 58.217403411865234,
"margin_dpo/margin_std": 95.59834289550781,
"step": 460
},
{
"epoch": 0.9654450261780104,
"fcm_dpo/beta": 0.00883092824369669,
"fcm_dpo/delta": 0.02020292542874813,
"fcm_dpo/margin": 65.78805541992188,
"fcm_dpo/q_t": 0.3831380009651184,
"grad_norm": 43.58109664916992,
"learning_rate": 1.9347820230782295e-09,
"logits/chosen": -0.8504255414009094,
"logits/rejected": -0.8694344162940979,
"logps/chosen": -373.186767578125,
"logps/ref_chosen": -243.90997314453125,
"logps/ref_rejected": -232.6463623046875,
"logps/rejected": -427.711181640625,
"loss": 2.1791,
"margin_dpo/margin_mean": 65.78805541992188,
"margin_dpo/margin_std": 109.15921020507812,
"step": 461
},
{
"epoch": 0.9675392670157068,
"fcm_dpo/beta": 0.008751820772886276,
"fcm_dpo/delta": -0.08998344093561172,
"fcm_dpo/margin": 78.13260650634766,
"fcm_dpo/q_t": 0.3626604676246643,
"grad_norm": 45.95448684692383,
"learning_rate": 1.7141081868094209e-09,
"logits/chosen": -0.8504431247711182,
"logits/rejected": -0.8138877153396606,
"logps/chosen": -474.6099853515625,
"logps/ref_chosen": -344.0909729003906,
"logps/ref_rejected": -252.44119262695312,
"logps/rejected": -461.0928039550781,
"loss": 2.0458,
"margin_dpo/margin_mean": 78.13260650634766,
"margin_dpo/margin_std": 113.3060302734375,
"step": 462
},
{
"epoch": 0.9696335078534032,
"fcm_dpo/beta": 0.008806941099464893,
"fcm_dpo/delta": 0.06278365105390549,
"fcm_dpo/margin": 61.38338088989258,
"fcm_dpo/q_t": 0.38775062561035156,
"grad_norm": 42.34247970581055,
"learning_rate": 1.5067491694100153e-09,
"logits/chosen": -0.8756191730499268,
"logits/rejected": -0.8500838279724121,
"logps/chosen": -425.34808349609375,
"logps/ref_chosen": -297.1424560546875,
"logps/ref_rejected": -234.0263671875,
"logps/rejected": -423.6153869628906,
"loss": 2.2023,
"margin_dpo/margin_mean": 61.38337707519531,
"margin_dpo/margin_std": 104.2444076538086,
"step": 463
},
{
"epoch": 0.9717277486910995,
"fcm_dpo/beta": 0.008874843828380108,
"fcm_dpo/delta": 0.0768059715628624,
"fcm_dpo/margin": 59.40700912475586,
"fcm_dpo/q_t": 0.39218664169311523,
"grad_norm": 57.075843811035156,
"learning_rate": 1.3127160909147672e-09,
"logits/chosen": -0.8419668078422546,
"logits/rejected": -0.8657316565513611,
"logps/chosen": -406.98358154296875,
"logps/ref_chosen": -265.71075439453125,
"logps/ref_rejected": -256.4163818359375,
"logps/rejected": -457.0962219238281,
"loss": 2.2274,
"margin_dpo/margin_mean": 59.407012939453125,
"margin_dpo/margin_std": 103.13267517089844,
"step": 464
},
{
"epoch": 0.9738219895287958,
"fcm_dpo/beta": 0.008788841776549816,
"fcm_dpo/delta": -0.09737826883792877,
"fcm_dpo/margin": 78.57921600341797,
"fcm_dpo/q_t": 0.359138160943985,
"grad_norm": 36.587318420410156,
"learning_rate": 1.1320193567288527e-09,
"logits/chosen": -0.9030634760856628,
"logits/rejected": -0.8796818256378174,
"logps/chosen": -416.7887268066406,
"logps/ref_chosen": -293.1527404785156,
"logps/ref_rejected": -293.7060852050781,
"logps/rejected": -495.92132568359375,
"loss": 1.9913,
"margin_dpo/margin_mean": 78.57921600341797,
"margin_dpo/margin_std": 99.85714721679688,
"step": 465
},
{
"epoch": 0.9759162303664921,
"fcm_dpo/beta": 0.008672168478369713,
"fcm_dpo/delta": -0.13364030420780182,
"fcm_dpo/margin": 83.47406005859375,
"fcm_dpo/q_t": 0.3470858335494995,
"grad_norm": 36.336116790771484,
"learning_rate": 9.64668657069706e-10,
"logits/chosen": -0.8266316652297974,
"logits/rejected": -0.7887781858444214,
"logps/chosen": -378.3794250488281,
"logps/ref_chosen": -261.4775695800781,
"logps/ref_rejected": -248.36630249023438,
"logps/rejected": -448.7422180175781,
"loss": 1.8533,
"margin_dpo/margin_mean": 83.47406005859375,
"margin_dpo/margin_std": 92.80216217041016,
"step": 466
},
{
"epoch": 0.9780104712041885,
"fcm_dpo/beta": 0.008739949204027653,
"fcm_dpo/delta": 0.07785549014806747,
"fcm_dpo/margin": 60.20922088623047,
"fcm_dpo/q_t": 0.39485564827919006,
"grad_norm": 47.46133804321289,
"learning_rate": 8.106729664475176e-10,
"logits/chosen": -0.8089423179626465,
"logits/rejected": -0.8069463968276978,
"logps/chosen": -399.79339599609375,
"logps/ref_chosen": -266.354248046875,
"logps/ref_rejected": -277.7558288574219,
"logps/rejected": -471.4041748046875,
"loss": 2.2684,
"margin_dpo/margin_mean": 60.20921325683594,
"margin_dpo/margin_std": 112.47489929199219,
"step": 467
},
{
"epoch": 0.9801047120418848,
"fcm_dpo/beta": 0.008801182731986046,
"fcm_dpo/delta": 0.06981690973043442,
"fcm_dpo/margin": 60.66203308105469,
"fcm_dpo/q_t": 0.3883158266544342,
"grad_norm": 43.128841400146484,
"learning_rate": 6.700405431837585e-10,
"logits/chosen": -0.8879643678665161,
"logits/rejected": -0.8662184476852417,
"logps/chosen": -446.94769287109375,
"logps/ref_chosen": -317.9631652832031,
"logps/ref_rejected": -261.8691101074219,
"logps/rejected": -451.51568603515625,
"loss": 2.1956,
"margin_dpo/margin_mean": 60.66202926635742,
"margin_dpo/margin_std": 101.31952667236328,
"step": 468
},
{
"epoch": 0.9821989528795811,
"fcm_dpo/beta": 0.008728330954909325,
"fcm_dpo/delta": -0.08311955630779266,
"fcm_dpo/margin": 77.61679077148438,
"fcm_dpo/q_t": 0.3604065179824829,
"grad_norm": 40.32331466674805,
"learning_rate": 5.427789289685347e-10,
"logits/chosen": -0.8244286775588989,
"logits/rejected": -0.8153553009033203,
"logps/chosen": -448.32989501953125,
"logps/ref_chosen": -324.8868103027344,
"logps/ref_rejected": -264.0347595214844,
"logps/rejected": -465.09466552734375,
"loss": 2.0197,
"margin_dpo/margin_mean": 77.61678314208984,
"margin_dpo/margin_std": 108.5995864868164,
"step": 469
},
{
"epoch": 0.9842931937172775,
"fcm_dpo/beta": 0.008674891665577888,
"fcm_dpo/delta": -0.06141306459903717,
"fcm_dpo/margin": 75.77772521972656,
"fcm_dpo/q_t": 0.365027517080307,
"grad_norm": 36.93330764770508,
"learning_rate": 4.288949484559934e-10,
"logits/chosen": -0.8245395421981812,
"logits/rejected": -0.8235185146331787,
"logps/chosen": -435.5399475097656,
"logps/ref_chosen": -314.7042236328125,
"logps/ref_rejected": -259.2235107421875,
"logps/rejected": -455.8369140625,
"loss": 2.0079,
"margin_dpo/margin_mean": 75.77772521972656,
"margin_dpo/margin_std": 102.35045623779297,
"step": 470
},
{
"epoch": 0.9863874345549738,
"fcm_dpo/beta": 0.008701599203050137,
"fcm_dpo/delta": 0.030739355832338333,
"fcm_dpo/margin": 65.62164306640625,
"fcm_dpo/q_t": 0.3876665234565735,
"grad_norm": 44.551334381103516,
"learning_rate": 3.2839470889836627e-10,
"logits/chosen": -0.8671724796295166,
"logits/rejected": -0.8626248240470886,
"logps/chosen": -428.4151306152344,
"logps/ref_chosen": -292.5748291015625,
"logps/ref_rejected": -298.7525329589844,
"logps/rejected": -500.2144775390625,
"loss": 2.166,
"margin_dpo/margin_mean": 65.62164306640625,
"margin_dpo/margin_std": 108.62025451660156,
"step": 471
},
{
"epoch": 0.9884816753926702,
"fcm_dpo/beta": 0.008667578920722008,
"fcm_dpo/delta": -0.03917299956083298,
"fcm_dpo/margin": 73.45465850830078,
"fcm_dpo/q_t": 0.36951494216918945,
"grad_norm": 36.73457336425781,
"learning_rate": 2.412835998185092e-10,
"logits/chosen": -0.8567590117454529,
"logits/rejected": -0.8757361173629761,
"logps/chosen": -363.5979919433594,
"logps/ref_chosen": -243.3738250732422,
"logps/ref_rejected": -251.12542724609375,
"logps/rejected": -444.8042297363281,
"loss": 1.998,
"margin_dpo/margin_mean": 73.45465850830078,
"margin_dpo/margin_std": 97.12403106689453,
"step": 472
},
{
"epoch": 0.9905759162303664,
"fcm_dpo/beta": 0.00859910249710083,
"fcm_dpo/delta": -0.07931700348854065,
"fcm_dpo/margin": 78.37448120117188,
"fcm_dpo/q_t": 0.35735899209976196,
"grad_norm": 47.21487808227539,
"learning_rate": 1.6756629272085544e-10,
"logits/chosen": -0.8321959972381592,
"logits/rejected": -0.8360787034034729,
"logps/chosen": -412.3969421386719,
"logps/ref_chosen": -286.3286437988281,
"logps/ref_rejected": -258.65032958984375,
"logps/rejected": -463.0931396484375,
"loss": 1.9668,
"margin_dpo/margin_mean": 78.37447357177734,
"margin_dpo/margin_std": 100.45634460449219,
"step": 473
},
{
"epoch": 0.9926701570680628,
"fcm_dpo/beta": 0.008656758815050125,
"fcm_dpo/delta": 0.06682530045509338,
"fcm_dpo/margin": 62.0035285949707,
"fcm_dpo/q_t": 0.385443776845932,
"grad_norm": 48.544525146484375,
"learning_rate": 1.072467408408384e-10,
"logits/chosen": -0.8695976138114929,
"logits/rejected": -0.8695297837257385,
"logps/chosen": -419.8274841308594,
"logps/ref_chosen": -288.08966064453125,
"logps/ref_rejected": -266.700439453125,
"logps/rejected": -460.44183349609375,
"loss": 2.1319,
"margin_dpo/margin_mean": 62.00352478027344,
"margin_dpo/margin_std": 90.4610595703125,
"step": 474
},
{
"epoch": 0.9947643979057592,
"fcm_dpo/beta": 0.00870420504361391,
"fcm_dpo/delta": 0.05465860292315483,
"fcm_dpo/margin": 62.99602508544922,
"fcm_dpo/q_t": 0.3868858218193054,
"grad_norm": 47.711673736572266,
"learning_rate": 6.032817893297793e-11,
"logits/chosen": -0.8438542485237122,
"logits/rejected": -0.8592179417610168,
"logps/chosen": -377.9834289550781,
"logps/ref_chosen": -256.0030517578125,
"logps/ref_rejected": -244.49240112304688,
"logps/rejected": -429.46881103515625,
"loss": 2.1336,
"margin_dpo/margin_mean": 62.99601745605469,
"margin_dpo/margin_std": 95.35174560546875,
"step": 475
},
{
"epoch": 0.9968586387434555,
"fcm_dpo/beta": 0.008708557114005089,
"fcm_dpo/delta": 0.004998601041734219,
"fcm_dpo/margin": 68.35792541503906,
"fcm_dpo/q_t": 0.38140690326690674,
"grad_norm": 43.18914794921875,
"learning_rate": 2.6813123097352287e-11,
"logits/chosen": -0.9018619060516357,
"logits/rejected": -0.8689190149307251,
"logps/chosen": -440.60888671875,
"logps/ref_chosen": -321.4674987792969,
"logps/ref_rejected": -295.05810546875,
"logps/rejected": -482.55743408203125,
"loss": 2.1453,
"margin_dpo/margin_mean": 68.35792541503906,
"margin_dpo/margin_std": 109.48751068115234,
"step": 476
},
{
"epoch": 0.9989528795811519,
"fcm_dpo/beta": 0.008701276034116745,
"fcm_dpo/delta": -0.008364125154912472,
"fcm_dpo/margin": 69.85820007324219,
"fcm_dpo/q_t": 0.37900084257125854,
"grad_norm": 46.994117736816406,
"learning_rate": 6.7033706447061635e-12,
"logits/chosen": -0.8022258281707764,
"logits/rejected": -0.8117117881774902,
"logps/chosen": -411.8818664550781,
"logps/ref_chosen": -276.7939758300781,
"logps/ref_rejected": -244.83456420898438,
"logps/rejected": -449.7806091308594,
"loss": 2.1897,
"margin_dpo/margin_mean": 69.85820007324219,
"margin_dpo/margin_std": 114.87376403808594,
"step": 477
},
{
"epoch": 0.9989528795811519,
"step": 477,
"total_flos": 0.0,
"train_loss": 2.2929351502244577,
"train_runtime": 6534.1855,
"train_samples_per_second": 9.356,
"train_steps_per_second": 0.073
}
],
"logging_steps": 1,
"max_steps": 477,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}