Files
llama3-8b-base-new-method-q…/trainer_state.json
ModelHub XC 5155db5af1 初始化项目,由ModelHub XC社区提供模型
Model: W-61/llama3-8b-base-new-method-q_t-0.4-s_star0.6
Source: Original Platform
2026-05-13 08:00:38 +08:00

9107 lines
333 KiB
JSON

{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9989528795811519,
"eval_steps": 500,
"global_step": 477,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0020942408376963353,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.02793481945991516,
"fcm_dpo/q_t": 0.500069797039032,
"grad_norm": 28.588409423828125,
"learning_rate": 0.0,
"logits/chosen": -0.5906078815460205,
"logits/rejected": -0.6050581932067871,
"logps/chosen": -275.48590087890625,
"logps/ref_chosen": -275.43902587890625,
"logps/ref_rejected": -223.14576721191406,
"logps/rejected": -223.16473388671875,
"loss": 5.5463,
"margin_dpo/margin_mean": -0.02793477475643158,
"margin_dpo/margin_std": 0.5724214911460876,
"step": 1
},
{
"epoch": 0.004188481675392671,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.014312177896499634,
"fcm_dpo/q_t": 0.4999642074108124,
"grad_norm": 27.878114700317383,
"learning_rate": 1.0416666666666666e-08,
"logits/chosen": -0.6574729681015015,
"logits/rejected": -0.6464410424232483,
"logps/chosen": -264.7165222167969,
"logps/ref_chosen": -264.7611083984375,
"logps/ref_rejected": -242.5597686767578,
"logps/rejected": -242.52951049804688,
"loss": 5.5446,
"margin_dpo/margin_mean": 0.014312252402305603,
"margin_dpo/margin_std": 0.6423971652984619,
"step": 2
},
{
"epoch": 0.0062827225130890054,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.004927471280097961,
"fcm_dpo/q_t": 0.4999876916408539,
"grad_norm": 25.813234329223633,
"learning_rate": 2.083333333333333e-08,
"logits/chosen": -0.6840659379959106,
"logits/rejected": -0.7352093458175659,
"logps/chosen": -274.1263122558594,
"logps/ref_chosen": -274.1018981933594,
"logps/ref_rejected": -286.5882568359375,
"logps/rejected": -286.61761474609375,
"loss": 5.545,
"margin_dpo/margin_mean": 0.0049266517162323,
"margin_dpo/margin_std": 0.6733812093734741,
"step": 3
},
{
"epoch": 0.008376963350785341,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.13576093316078186,
"fcm_dpo/q_t": 0.49966058135032654,
"grad_norm": 31.608015060424805,
"learning_rate": 3.125e-08,
"logits/chosen": -0.61723792552948,
"logits/rejected": -0.6116781234741211,
"logps/chosen": -329.80804443359375,
"logps/ref_chosen": -329.8382568359375,
"logps/ref_rejected": -303.2850646972656,
"logps/rejected": -303.39056396484375,
"loss": 5.5398,
"margin_dpo/margin_mean": 0.13576152920722961,
"margin_dpo/margin_std": 0.8702787756919861,
"step": 4
},
{
"epoch": 0.010471204188481676,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.02006945013999939,
"fcm_dpo/q_t": 0.4999498128890991,
"grad_norm": 29.561357498168945,
"learning_rate": 4.166666666666666e-08,
"logits/chosen": -0.5704169273376465,
"logits/rejected": -0.5865122675895691,
"logps/chosen": -301.6563720703125,
"logps/ref_chosen": -301.7389221191406,
"logps/ref_rejected": -274.7654724121094,
"logps/rejected": -274.7029724121094,
"loss": 5.5444,
"margin_dpo/margin_mean": 0.020069316029548645,
"margin_dpo/margin_std": 0.6975337862968445,
"step": 5
},
{
"epoch": 0.012565445026178011,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.07302632927894592,
"fcm_dpo/q_t": 0.499817430973053,
"grad_norm": 28.17999267578125,
"learning_rate": 5.208333333333333e-08,
"logits/chosen": -0.6796520352363586,
"logits/rejected": -0.6424388885498047,
"logps/chosen": -285.6215515136719,
"logps/ref_chosen": -285.6946716308594,
"logps/ref_rejected": -245.8200225830078,
"logps/rejected": -245.81993103027344,
"loss": 5.5423,
"margin_dpo/margin_mean": 0.07302609086036682,
"margin_dpo/margin_std": 0.7085909247398376,
"step": 6
},
{
"epoch": 0.014659685863874346,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.1700635403394699,
"fcm_dpo/q_t": 0.5004251003265381,
"grad_norm": 28.65284538269043,
"learning_rate": 6.25e-08,
"logits/chosen": -0.5784342288970947,
"logits/rejected": -0.611269474029541,
"logps/chosen": -264.7759704589844,
"logps/ref_chosen": -264.65545654296875,
"logps/ref_rejected": -253.10305786132812,
"logps/rejected": -253.05352783203125,
"loss": 5.552,
"margin_dpo/margin_mean": -0.17006349563598633,
"margin_dpo/margin_std": 0.7042044401168823,
"step": 7
},
{
"epoch": 0.016753926701570682,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.024522915482521057,
"fcm_dpo/q_t": 0.4999386966228485,
"grad_norm": 30.742538452148438,
"learning_rate": 7.291666666666667e-08,
"logits/chosen": -0.6803320646286011,
"logits/rejected": -0.6875563859939575,
"logps/chosen": -354.2235412597656,
"logps/ref_chosen": -354.1887512207031,
"logps/ref_rejected": -282.9112243652344,
"logps/rejected": -282.97052001953125,
"loss": 5.5443,
"margin_dpo/margin_mean": 0.0245237797498703,
"margin_dpo/margin_std": 0.7568092346191406,
"step": 8
},
{
"epoch": 0.018848167539267015,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.012213125824928284,
"fcm_dpo/q_t": 0.4999694526195526,
"grad_norm": 27.8194637298584,
"learning_rate": 8.333333333333333e-08,
"logits/chosen": -0.6387220025062561,
"logits/rejected": -0.6569886207580566,
"logps/chosen": -285.7725524902344,
"logps/ref_chosen": -285.76055908203125,
"logps/ref_rejected": -268.0285339355469,
"logps/rejected": -268.052734375,
"loss": 5.5448,
"margin_dpo/margin_mean": 0.012212991714477539,
"margin_dpo/margin_std": 0.738137423992157,
"step": 9
},
{
"epoch": 0.020942408376963352,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.07377111911773682,
"fcm_dpo/q_t": 0.49981561303138733,
"grad_norm": 26.536937713623047,
"learning_rate": 9.375e-08,
"logits/chosen": -0.6875832676887512,
"logits/rejected": -0.6820325255393982,
"logps/chosen": -251.8724822998047,
"logps/ref_chosen": -251.91238403320312,
"logps/ref_rejected": -226.45260620117188,
"logps/rejected": -226.48646545410156,
"loss": 5.5423,
"margin_dpo/margin_mean": 0.07377050817012787,
"margin_dpo/margin_std": 0.6598670482635498,
"step": 10
},
{
"epoch": 0.023036649214659685,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.01063111424446106,
"fcm_dpo/q_t": 0.4999734163284302,
"grad_norm": 29.03658676147461,
"learning_rate": 1.0416666666666667e-07,
"logits/chosen": -0.5948761701583862,
"logits/rejected": -0.6499456763267517,
"logps/chosen": -301.04718017578125,
"logps/ref_chosen": -301.08343505859375,
"logps/ref_rejected": -259.546630859375,
"logps/rejected": -259.52099609375,
"loss": 5.5448,
"margin_dpo/margin_mean": 0.01063111424446106,
"margin_dpo/margin_std": 0.7354652881622314,
"step": 11
},
{
"epoch": 0.025130890052356022,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.13213083148002625,
"fcm_dpo/q_t": 0.4996696412563324,
"grad_norm": 30.0300350189209,
"learning_rate": 1.1458333333333332e-07,
"logits/chosen": -0.5773444175720215,
"logits/rejected": -0.5371226668357849,
"logps/chosen": -287.5074462890625,
"logps/ref_chosen": -287.548095703125,
"logps/ref_rejected": -277.37945556640625,
"logps/rejected": -277.470947265625,
"loss": 5.54,
"margin_dpo/margin_mean": 0.1321302056312561,
"margin_dpo/margin_std": 0.7726021409034729,
"step": 12
},
{
"epoch": 0.027225130890052355,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.13757160305976868,
"fcm_dpo/q_t": 0.4996561110019684,
"grad_norm": 27.360511779785156,
"learning_rate": 1.25e-07,
"logits/chosen": -0.660868227481842,
"logits/rejected": -0.6686940789222717,
"logps/chosen": -270.62811279296875,
"logps/ref_chosen": -270.6664123535156,
"logps/ref_rejected": -274.6546936035156,
"logps/rejected": -274.7539978027344,
"loss": 5.5397,
"margin_dpo/margin_mean": 0.13757173717021942,
"margin_dpo/margin_std": 0.6561607718467712,
"step": 13
},
{
"epoch": 0.02931937172774869,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.05934774875640869,
"fcm_dpo/q_t": 0.49985164403915405,
"grad_norm": 28.293212890625,
"learning_rate": 1.3541666666666666e-07,
"logits/chosen": -0.6239809393882751,
"logits/rejected": -0.6529561281204224,
"logps/chosen": -281.5748596191406,
"logps/ref_chosen": -281.59320068359375,
"logps/ref_rejected": -263.52215576171875,
"logps/rejected": -263.5631408691406,
"loss": 5.5429,
"margin_dpo/margin_mean": 0.0593467652797699,
"margin_dpo/margin_std": 0.7482225894927979,
"step": 14
},
{
"epoch": 0.031413612565445025,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.039270758628845215,
"fcm_dpo/q_t": 0.4999018609523773,
"grad_norm": 30.250518798828125,
"learning_rate": 1.4583333333333335e-07,
"logits/chosen": -0.6411060094833374,
"logits/rejected": -0.6542255878448486,
"logps/chosen": -298.3626403808594,
"logps/ref_chosen": -298.4093322753906,
"logps/ref_rejected": -227.5626983642578,
"logps/rejected": -227.5552978515625,
"loss": 5.5437,
"margin_dpo/margin_mean": 0.03926950693130493,
"margin_dpo/margin_std": 0.6303021907806396,
"step": 15
},
{
"epoch": 0.033507853403141365,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.07741273194551468,
"fcm_dpo/q_t": 0.4998064637184143,
"grad_norm": 30.171316146850586,
"learning_rate": 1.5624999999999999e-07,
"logits/chosen": -0.6012479066848755,
"logits/rejected": -0.5982969999313354,
"logps/chosen": -293.8901062011719,
"logps/ref_chosen": -293.96661376953125,
"logps/ref_rejected": -250.78443908691406,
"logps/rejected": -250.78536987304688,
"loss": 5.5421,
"margin_dpo/margin_mean": 0.07741250097751617,
"margin_dpo/margin_std": 0.7642932534217834,
"step": 16
},
{
"epoch": 0.0356020942408377,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.07924064993858337,
"fcm_dpo/q_t": 0.4998018741607666,
"grad_norm": 27.790224075317383,
"learning_rate": 1.6666666666666665e-07,
"logits/chosen": -0.5656470656394958,
"logits/rejected": -0.5915025472640991,
"logps/chosen": -262.3228759765625,
"logps/ref_chosen": -262.39398193359375,
"logps/ref_rejected": -248.500244140625,
"logps/rejected": -248.5083465576172,
"loss": 5.5421,
"margin_dpo/margin_mean": 0.07924069464206696,
"margin_dpo/margin_std": 0.7225322723388672,
"step": 17
},
{
"epoch": 0.03769633507853403,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.0872071236371994,
"fcm_dpo/q_t": 0.499781996011734,
"grad_norm": 29.743511199951172,
"learning_rate": 1.7708333333333334e-07,
"logits/chosen": -0.6092166304588318,
"logits/rejected": -0.6151822805404663,
"logps/chosen": -293.66082763671875,
"logps/ref_chosen": -293.709228515625,
"logps/ref_rejected": -274.5875244140625,
"logps/rejected": -274.62628173828125,
"loss": 5.5417,
"margin_dpo/margin_mean": 0.08720706403255463,
"margin_dpo/margin_std": 0.7599306702613831,
"step": 18
},
{
"epoch": 0.039790575916230364,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.015616178512573242,
"fcm_dpo/q_t": 0.4999609589576721,
"grad_norm": 28.129470825195312,
"learning_rate": 1.875e-07,
"logits/chosen": -0.6186746954917908,
"logits/rejected": -0.6140046715736389,
"logps/chosen": -280.1886901855469,
"logps/ref_chosen": -280.26568603515625,
"logps/ref_rejected": -259.9742736816406,
"logps/rejected": -259.91290283203125,
"loss": 5.5446,
"margin_dpo/margin_mean": 0.015616029500961304,
"margin_dpo/margin_std": 0.7585482001304626,
"step": 19
},
{
"epoch": 0.041884816753926704,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.12203510105609894,
"fcm_dpo/q_t": 0.49969494342803955,
"grad_norm": 29.671567916870117,
"learning_rate": 1.9791666666666664e-07,
"logits/chosen": -0.6235091090202332,
"logits/rejected": -0.6559134125709534,
"logps/chosen": -303.7472229003906,
"logps/ref_chosen": -303.8954162597656,
"logps/ref_rejected": -260.214599609375,
"logps/rejected": -260.1884765625,
"loss": 5.5404,
"margin_dpo/margin_mean": 0.12203498184680939,
"margin_dpo/margin_std": 0.7693343162536621,
"step": 20
},
{
"epoch": 0.04397905759162304,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.07917946577072144,
"fcm_dpo/q_t": 0.4998020529747009,
"grad_norm": 35.06045913696289,
"learning_rate": 2.0833333333333333e-07,
"logits/chosen": -0.651059627532959,
"logits/rejected": -0.6805247664451599,
"logps/chosen": -301.47650146484375,
"logps/ref_chosen": -301.5334777832031,
"logps/ref_rejected": -280.28900146484375,
"logps/rejected": -280.3111572265625,
"loss": 5.5421,
"margin_dpo/margin_mean": 0.07917973399162292,
"margin_dpo/margin_std": 0.7579631805419922,
"step": 21
},
{
"epoch": 0.04607329842931937,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": -0.07917653024196625,
"fcm_dpo/q_t": 0.5001979470252991,
"grad_norm": 25.275522232055664,
"learning_rate": 2.1875e-07,
"logits/chosen": -0.6546105742454529,
"logits/rejected": -0.6562420129776001,
"logps/chosen": -259.986083984375,
"logps/ref_chosen": -259.9951477050781,
"logps/ref_rejected": -243.0721435546875,
"logps/rejected": -242.98394775390625,
"loss": 5.5484,
"margin_dpo/margin_mean": -0.0791759043931961,
"margin_dpo/margin_std": 0.7844414710998535,
"step": 22
},
{
"epoch": 0.048167539267015703,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.07579465210437775,
"fcm_dpo/q_t": 0.49981051683425903,
"grad_norm": 27.832441329956055,
"learning_rate": 2.2916666666666663e-07,
"logits/chosen": -0.6248008012771606,
"logits/rejected": -0.6592731475830078,
"logps/chosen": -282.06793212890625,
"logps/ref_chosen": -282.1807556152344,
"logps/ref_rejected": -265.0758056640625,
"logps/rejected": -265.0387878417969,
"loss": 5.5422,
"margin_dpo/margin_mean": 0.07579512894153595,
"margin_dpo/margin_std": 0.7000916600227356,
"step": 23
},
{
"epoch": 0.050261780104712044,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.3224449157714844,
"fcm_dpo/q_t": 0.49919387698173523,
"grad_norm": 29.807321548461914,
"learning_rate": 2.3958333333333335e-07,
"logits/chosen": -0.656727135181427,
"logits/rejected": -0.568871021270752,
"logps/chosen": -300.95025634765625,
"logps/ref_chosen": -301.17962646484375,
"logps/ref_rejected": -302.12786865234375,
"logps/rejected": -302.2209167480469,
"loss": 5.5324,
"margin_dpo/margin_mean": 0.32244449853897095,
"margin_dpo/margin_std": 0.8477628827095032,
"step": 24
},
{
"epoch": 0.05235602094240838,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.05949197709560394,
"fcm_dpo/q_t": 0.4998512864112854,
"grad_norm": 26.471641540527344,
"learning_rate": 2.5e-07,
"logits/chosen": -0.5939292311668396,
"logits/rejected": -0.6018354296684265,
"logps/chosen": -246.61839294433594,
"logps/ref_chosen": -246.74649047851562,
"logps/ref_rejected": -235.55638122558594,
"logps/rejected": -235.48777770996094,
"loss": 5.5429,
"margin_dpo/margin_mean": 0.05949154496192932,
"margin_dpo/margin_std": 0.7037143707275391,
"step": 25
},
{
"epoch": 0.05445026178010471,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.1720740646123886,
"fcm_dpo/q_t": 0.4995698034763336,
"grad_norm": 28.6167049407959,
"learning_rate": 2.604166666666667e-07,
"logits/chosen": -0.6558055877685547,
"logits/rejected": -0.6704913377761841,
"logps/chosen": -281.93994140625,
"logps/ref_chosen": -282.1955871582031,
"logps/ref_rejected": -235.3135528564453,
"logps/rejected": -235.22994995117188,
"loss": 5.5384,
"margin_dpo/margin_mean": 0.17207396030426025,
"margin_dpo/margin_std": 0.7740581035614014,
"step": 26
},
{
"epoch": 0.05654450261780105,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.17665477097034454,
"fcm_dpo/q_t": 0.49955838918685913,
"grad_norm": 27.75247573852539,
"learning_rate": 2.708333333333333e-07,
"logits/chosen": -0.652806282043457,
"logits/rejected": -0.6721222400665283,
"logps/chosen": -323.57098388671875,
"logps/ref_chosen": -323.8563537597656,
"logps/ref_rejected": -245.968017578125,
"logps/rejected": -245.85931396484375,
"loss": 5.5382,
"margin_dpo/margin_mean": 0.17665448784828186,
"margin_dpo/margin_std": 0.9216269850730896,
"step": 27
},
{
"epoch": 0.05863874345549738,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.18093740940093994,
"fcm_dpo/q_t": 0.49954766035079956,
"grad_norm": 26.308048248291016,
"learning_rate": 2.8125e-07,
"logits/chosen": -0.6253893971443176,
"logits/rejected": -0.6348061561584473,
"logps/chosen": -247.98081970214844,
"logps/ref_chosen": -248.24673461914062,
"logps/ref_rejected": -240.0382080078125,
"logps/rejected": -239.95323181152344,
"loss": 5.538,
"margin_dpo/margin_mean": 0.18093715608119965,
"margin_dpo/margin_std": 0.8086836934089661,
"step": 28
},
{
"epoch": 0.060732984293193716,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.2044949233531952,
"fcm_dpo/q_t": 0.4994887411594391,
"grad_norm": 29.518325805664062,
"learning_rate": 2.916666666666667e-07,
"logits/chosen": -0.5903065204620361,
"logits/rejected": -0.6122087240219116,
"logps/chosen": -317.87603759765625,
"logps/ref_chosen": -318.2564392089844,
"logps/ref_rejected": -286.75848388671875,
"logps/rejected": -286.5826110839844,
"loss": 5.5371,
"margin_dpo/margin_mean": 0.20449501276016235,
"margin_dpo/margin_std": 0.8431642055511475,
"step": 29
},
{
"epoch": 0.06282722513089005,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.26426297426223755,
"fcm_dpo/q_t": 0.49933937191963196,
"grad_norm": 28.8969783782959,
"learning_rate": 3.020833333333333e-07,
"logits/chosen": -0.6008409261703491,
"logits/rejected": -0.6181896328926086,
"logps/chosen": -252.7128143310547,
"logps/ref_chosen": -253.0491485595703,
"logps/ref_rejected": -261.30029296875,
"logps/rejected": -261.2282409667969,
"loss": 5.5347,
"margin_dpo/margin_mean": 0.26426294445991516,
"margin_dpo/margin_std": 0.8474119901657104,
"step": 30
},
{
"epoch": 0.06492146596858639,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.2159474790096283,
"fcm_dpo/q_t": 0.4994601905345917,
"grad_norm": 25.033519744873047,
"learning_rate": 3.1249999999999997e-07,
"logits/chosen": -0.6617273092269897,
"logits/rejected": -0.697493314743042,
"logps/chosen": -247.74754333496094,
"logps/ref_chosen": -248.15301513671875,
"logps/ref_rejected": -203.17703247070312,
"logps/rejected": -202.98751831054688,
"loss": 5.5367,
"margin_dpo/margin_mean": 0.2159472405910492,
"margin_dpo/margin_std": 0.9926141500473022,
"step": 31
},
{
"epoch": 0.06701570680628273,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.2026405930519104,
"fcm_dpo/q_t": 0.49949339032173157,
"grad_norm": 29.645679473876953,
"learning_rate": 3.2291666666666666e-07,
"logits/chosen": -0.6060912013053894,
"logits/rejected": -0.6101662516593933,
"logps/chosen": -305.0863037109375,
"logps/ref_chosen": -305.5399475097656,
"logps/ref_rejected": -267.6527099609375,
"logps/rejected": -267.4017028808594,
"loss": 5.5372,
"margin_dpo/margin_mean": 0.2026415467262268,
"margin_dpo/margin_std": 0.9702023267745972,
"step": 32
},
{
"epoch": 0.06910994764397906,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.37219977378845215,
"fcm_dpo/q_t": 0.49906954169273376,
"grad_norm": 28.309568405151367,
"learning_rate": 3.333333333333333e-07,
"logits/chosen": -0.6386057138442993,
"logits/rejected": -0.6507879495620728,
"logps/chosen": -285.6277160644531,
"logps/ref_chosen": -286.2335205078125,
"logps/ref_rejected": -255.38748168945312,
"logps/rejected": -255.15390014648438,
"loss": 5.5304,
"margin_dpo/margin_mean": 0.37220001220703125,
"margin_dpo/margin_std": 1.079951524734497,
"step": 33
},
{
"epoch": 0.0712041884816754,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.7043960094451904,
"fcm_dpo/q_t": 0.49823909997940063,
"grad_norm": 31.112960815429688,
"learning_rate": 3.4375e-07,
"logits/chosen": -0.6270374059677124,
"logits/rejected": -0.6368086338043213,
"logps/chosen": -340.82989501953125,
"logps/ref_chosen": -341.5920104980469,
"logps/ref_rejected": -278.8866882324219,
"logps/rejected": -278.8289794921875,
"loss": 5.5172,
"margin_dpo/margin_mean": 0.7043963074684143,
"margin_dpo/margin_std": 1.175834059715271,
"step": 34
},
{
"epoch": 0.07329842931937172,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.28337526321411133,
"fcm_dpo/q_t": 0.4992915987968445,
"grad_norm": 26.421070098876953,
"learning_rate": 3.541666666666667e-07,
"logits/chosen": -0.6235166192054749,
"logits/rejected": -0.6431545615196228,
"logps/chosen": -264.448974609375,
"logps/ref_chosen": -265.0795593261719,
"logps/ref_rejected": -264.4876708984375,
"logps/rejected": -264.1404724121094,
"loss": 5.534,
"margin_dpo/margin_mean": 0.2833753824234009,
"margin_dpo/margin_std": 1.2927016019821167,
"step": 35
},
{
"epoch": 0.07539267015706806,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.6523094177246094,
"fcm_dpo/q_t": 0.49836936593055725,
"grad_norm": 31.853910446166992,
"learning_rate": 3.645833333333333e-07,
"logits/chosen": -0.5965672731399536,
"logits/rejected": -0.6139001250267029,
"logps/chosen": -296.49725341796875,
"logps/ref_chosen": -297.3261413574219,
"logps/ref_rejected": -282.09515380859375,
"logps/rejected": -281.9185791015625,
"loss": 5.5193,
"margin_dpo/margin_mean": 0.6523087024688721,
"margin_dpo/margin_std": 1.4231526851654053,
"step": 36
},
{
"epoch": 0.0774869109947644,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.5245670080184937,
"fcm_dpo/q_t": 0.4986886978149414,
"grad_norm": 30.910381317138672,
"learning_rate": 3.75e-07,
"logits/chosen": -0.6018107533454895,
"logits/rejected": -0.6163386106491089,
"logps/chosen": -313.3153381347656,
"logps/ref_chosen": -314.0340270996094,
"logps/ref_rejected": -299.3437805175781,
"logps/rejected": -299.149658203125,
"loss": 5.5245,
"margin_dpo/margin_mean": 0.5245668888092041,
"margin_dpo/margin_std": 1.4947643280029297,
"step": 37
},
{
"epoch": 0.07958115183246073,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.6186456680297852,
"fcm_dpo/q_t": 0.4984534978866577,
"grad_norm": 28.512378692626953,
"learning_rate": 3.8541666666666665e-07,
"logits/chosen": -0.635643720626831,
"logits/rejected": -0.6476578712463379,
"logps/chosen": -281.5082092285156,
"logps/ref_chosen": -282.54119873046875,
"logps/ref_rejected": -269.7773132324219,
"logps/rejected": -269.3629150390625,
"loss": 5.5207,
"margin_dpo/margin_mean": 0.6186456680297852,
"margin_dpo/margin_std": 1.486997127532959,
"step": 38
},
{
"epoch": 0.08167539267015707,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.244322657585144,
"fcm_dpo/q_t": 0.49688953161239624,
"grad_norm": 29.5240478515625,
"learning_rate": 3.958333333333333e-07,
"logits/chosen": -0.6167585253715515,
"logits/rejected": -0.6307709217071533,
"logps/chosen": -275.46820068359375,
"logps/ref_chosen": -276.7729187011719,
"logps/ref_rejected": -249.95889282226562,
"logps/rejected": -249.89846801757812,
"loss": 5.4959,
"margin_dpo/margin_mean": 1.2443227767944336,
"margin_dpo/margin_std": 1.6552257537841797,
"step": 39
},
{
"epoch": 0.08376963350785341,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.6932114362716675,
"fcm_dpo/q_t": 0.498267263174057,
"grad_norm": 27.426746368408203,
"learning_rate": 4.0625e-07,
"logits/chosen": -0.6174054741859436,
"logits/rejected": -0.652121901512146,
"logps/chosen": -283.2413330078125,
"logps/ref_chosen": -284.30706787109375,
"logps/ref_rejected": -244.4459991455078,
"logps/rejected": -244.0734405517578,
"loss": 5.5179,
"margin_dpo/margin_mean": 0.6932120323181152,
"margin_dpo/margin_std": 1.8467386960983276,
"step": 40
},
{
"epoch": 0.08586387434554973,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.7606411576271057,
"fcm_dpo/q_t": 0.49809861183166504,
"grad_norm": 30.642261505126953,
"learning_rate": 4.1666666666666667e-07,
"logits/chosen": -0.6371512413024902,
"logits/rejected": -0.6642083525657654,
"logps/chosen": -292.70098876953125,
"logps/ref_chosen": -293.8151550292969,
"logps/ref_rejected": -252.16815185546875,
"logps/rejected": -251.8146209716797,
"loss": 5.5151,
"margin_dpo/margin_mean": 0.7606427669525146,
"margin_dpo/margin_std": 1.701622486114502,
"step": 41
},
{
"epoch": 0.08795811518324607,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 0.9576462507247925,
"fcm_dpo/q_t": 0.4976062476634979,
"grad_norm": 27.611026763916016,
"learning_rate": 4.270833333333333e-07,
"logits/chosen": -0.6295742988586426,
"logits/rejected": -0.6472880244255066,
"logps/chosen": -251.46585083007812,
"logps/ref_chosen": -252.76023864746094,
"logps/ref_rejected": -261.0414733886719,
"logps/rejected": -260.7047424316406,
"loss": 5.5075,
"margin_dpo/margin_mean": 0.9576468467712402,
"margin_dpo/margin_std": 2.2051548957824707,
"step": 42
},
{
"epoch": 0.09005235602094241,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.2415517568588257,
"fcm_dpo/q_t": 0.49689653515815735,
"grad_norm": 29.821718215942383,
"learning_rate": 4.375e-07,
"logits/chosen": -0.5814259648323059,
"logits/rejected": -0.5952868461608887,
"logps/chosen": -315.51239013671875,
"logps/ref_chosen": -316.8347473144531,
"logps/ref_rejected": -273.7649230957031,
"logps/rejected": -273.68414306640625,
"loss": 5.4962,
"margin_dpo/margin_mean": 1.241552472114563,
"margin_dpo/margin_std": 2.2551767826080322,
"step": 43
},
{
"epoch": 0.09214659685863874,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.6491384506225586,
"fcm_dpo/q_t": 0.4958779215812683,
"grad_norm": 30.916332244873047,
"learning_rate": 4.479166666666667e-07,
"logits/chosen": -0.5960883498191833,
"logits/rejected": -0.594748854637146,
"logps/chosen": -285.3397216796875,
"logps/ref_chosen": -286.8757019042969,
"logps/ref_rejected": -282.4681396484375,
"logps/rejected": -282.581298828125,
"loss": 5.4805,
"margin_dpo/margin_mean": 1.649139165878296,
"margin_dpo/margin_std": 3.022979974746704,
"step": 44
},
{
"epoch": 0.09424083769633508,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.249646544456482,
"fcm_dpo/q_t": 0.49687686562538147,
"grad_norm": 28.89425277709961,
"learning_rate": 4.5833333333333327e-07,
"logits/chosen": -0.6914635300636292,
"logits/rejected": -0.7155641913414001,
"logps/chosen": -322.6188659667969,
"logps/ref_chosen": -324.2633972167969,
"logps/ref_rejected": -293.09466552734375,
"logps/rejected": -292.6997985839844,
"loss": 5.4962,
"margin_dpo/margin_mean": 1.2496464252471924,
"margin_dpo/margin_std": 2.7630484104156494,
"step": 45
},
{
"epoch": 0.09633507853403141,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.5387152433395386,
"fcm_dpo/q_t": 0.49615412950515747,
"grad_norm": 30.371707916259766,
"learning_rate": 4.6874999999999996e-07,
"logits/chosen": -0.6212559938430786,
"logits/rejected": -0.6344550251960754,
"logps/chosen": -296.5879211425781,
"logps/ref_chosen": -298.3357238769531,
"logps/ref_rejected": -267.66204833984375,
"logps/rejected": -267.45294189453125,
"loss": 5.4846,
"margin_dpo/margin_mean": 1.5387158393859863,
"margin_dpo/margin_std": 2.5809476375579834,
"step": 46
},
{
"epoch": 0.09842931937172775,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.1460494995117188,
"fcm_dpo/q_t": 0.4971354603767395,
"grad_norm": 26.331024169921875,
"learning_rate": 4.791666666666667e-07,
"logits/chosen": -0.6050630211830139,
"logits/rejected": -0.6257311701774597,
"logps/chosen": -260.9723815917969,
"logps/ref_chosen": -262.5669250488281,
"logps/ref_rejected": -258.70989990234375,
"logps/rejected": -258.2613525390625,
"loss": 5.5007,
"margin_dpo/margin_mean": 1.1460487842559814,
"margin_dpo/margin_std": 3.3671884536743164,
"step": 47
},
{
"epoch": 0.10052356020942409,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.543501853942871,
"fcm_dpo/q_t": 0.49614325165748596,
"grad_norm": 27.530820846557617,
"learning_rate": 4.895833333333333e-07,
"logits/chosen": -0.6108264923095703,
"logits/rejected": -0.6356756687164307,
"logps/chosen": -267.622802734375,
"logps/ref_chosen": -269.4932556152344,
"logps/ref_rejected": -241.888916015625,
"logps/rejected": -241.56198120117188,
"loss": 5.4847,
"margin_dpo/margin_mean": 1.543501853942871,
"margin_dpo/margin_std": 2.9233288764953613,
"step": 48
},
{
"epoch": 0.10261780104712041,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.6295912265777588,
"fcm_dpo/q_t": 0.4959271550178528,
"grad_norm": 27.69369125366211,
"learning_rate": 5e-07,
"logits/chosen": -0.6768261194229126,
"logits/rejected": -0.6610736846923828,
"logps/chosen": -255.700439453125,
"logps/ref_chosen": -257.8844909667969,
"logps/ref_rejected": -256.8912048339844,
"logps/rejected": -256.3367614746094,
"loss": 5.4815,
"margin_dpo/margin_mean": 1.6295921802520752,
"margin_dpo/margin_std": 3.415902614593506,
"step": 49
},
{
"epoch": 0.10471204188481675,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.2272062301635742,
"fcm_dpo/q_t": 0.49693384766578674,
"grad_norm": 27.84197235107422,
"learning_rate": 4.999932966293553e-07,
"logits/chosen": -0.6295742392539978,
"logits/rejected": -0.6513818502426147,
"logps/chosen": -299.98370361328125,
"logps/ref_chosen": -302.1083679199219,
"logps/ref_rejected": -298.355224609375,
"logps/rejected": -297.457763671875,
"loss": 5.4979,
"margin_dpo/margin_mean": 1.227207064628601,
"margin_dpo/margin_std": 3.8781399726867676,
"step": 50
},
{
"epoch": 0.1068062827225131,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.9203577041625977,
"fcm_dpo/q_t": 0.4952022135257721,
"grad_norm": 29.167591094970703,
"learning_rate": 4.999731868769026e-07,
"logits/chosen": -0.6100184321403503,
"logits/rejected": -0.6041375398635864,
"logps/chosen": -267.2272644042969,
"logps/ref_chosen": -269.37237548828125,
"logps/ref_rejected": -297.0167541503906,
"logps/rejected": -296.7919616699219,
"loss": 5.4712,
"margin_dpo/margin_mean": 1.920358419418335,
"margin_dpo/margin_std": 4.5612592697143555,
"step": 51
},
{
"epoch": 0.10890052356020942,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 3.1528501510620117,
"fcm_dpo/q_t": 0.49212491512298584,
"grad_norm": 30.217905044555664,
"learning_rate": 4.99939671821067e-07,
"logits/chosen": -0.6610238552093506,
"logits/rejected": -0.6679620146751404,
"logps/chosen": -304.05975341796875,
"logps/ref_chosen": -306.9028015136719,
"logps/ref_rejected": -281.24737548828125,
"logps/rejected": -281.55718994140625,
"loss": 5.4226,
"margin_dpo/margin_mean": 3.15285062789917,
"margin_dpo/margin_std": 4.807487487792969,
"step": 52
},
{
"epoch": 0.11099476439790576,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 2.2223706245422363,
"fcm_dpo/q_t": 0.4944484829902649,
"grad_norm": 30.019371032714844,
"learning_rate": 4.998927532591591e-07,
"logits/chosen": -0.6413898468017578,
"logits/rejected": -0.6815477609634399,
"logps/chosen": -283.0915222167969,
"logps/ref_chosen": -285.9759521484375,
"logps/ref_rejected": -273.9073486328125,
"logps/rejected": -273.2453308105469,
"loss": 5.46,
"margin_dpo/margin_mean": 2.2223708629608154,
"margin_dpo/margin_std": 5.145662784576416,
"step": 53
},
{
"epoch": 0.1130890052356021,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.779584527015686,
"fcm_dpo/q_t": 0.49555593729019165,
"grad_norm": 26.53990936279297,
"learning_rate": 4.998324337072792e-07,
"logits/chosen": -0.6968706846237183,
"logits/rejected": -0.7060681581497192,
"logps/chosen": -303.8675842285156,
"logps/ref_chosen": -306.504638671875,
"logps/ref_rejected": -272.67431640625,
"logps/rejected": -271.81689453125,
"loss": 5.4775,
"margin_dpo/margin_mean": 1.7795841693878174,
"margin_dpo/margin_std": 5.366870403289795,
"step": 54
},
{
"epoch": 0.11518324607329843,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 2.3014349937438965,
"fcm_dpo/q_t": 0.49425217509269714,
"grad_norm": 24.823610305786133,
"learning_rate": 4.997587164001815e-07,
"logits/chosen": -0.6394699215888977,
"logits/rejected": -0.6436302065849304,
"logps/chosen": -220.45968627929688,
"logps/ref_chosen": -222.33013916015625,
"logps/ref_rejected": -206.59571838378906,
"logps/rejected": -207.02670288085938,
"loss": 5.4561,
"margin_dpo/margin_mean": 2.301435708999634,
"margin_dpo/margin_std": 4.904862880706787,
"step": 55
},
{
"epoch": 0.11727748691099477,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 3.1400392055511475,
"fcm_dpo/q_t": 0.49216216802597046,
"grad_norm": 27.470352172851562,
"learning_rate": 4.996716052911017e-07,
"logits/chosen": -0.6028516888618469,
"logits/rejected": -0.6167591214179993,
"logps/chosen": -247.6485595703125,
"logps/ref_chosen": -250.47816467285156,
"logps/ref_rejected": -228.25848388671875,
"logps/rejected": -228.5689239501953,
"loss": 5.4249,
"margin_dpo/margin_mean": 3.140040159225464,
"margin_dpo/margin_std": 5.96918249130249,
"step": 56
},
{
"epoch": 0.1193717277486911,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 4.32672643661499,
"fcm_dpo/q_t": 0.489196240901947,
"grad_norm": 30.81308937072754,
"learning_rate": 4.99571105051544e-07,
"logits/chosen": -0.6932777166366577,
"logits/rejected": -0.6637296080589294,
"logps/chosen": -311.116455078125,
"logps/ref_chosen": -315.1195373535156,
"logps/ref_rejected": -272.755615234375,
"logps/rejected": -273.0793151855469,
"loss": 5.3776,
"margin_dpo/margin_mean": 4.32672643661499,
"margin_dpo/margin_std": 5.751393795013428,
"step": 57
},
{
"epoch": 0.12146596858638743,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 2.327610969543457,
"fcm_dpo/q_t": 0.494184285402298,
"grad_norm": 27.799428939819336,
"learning_rate": 4.994572210710314e-07,
"logits/chosen": -0.6185472011566162,
"logits/rejected": -0.642967939376831,
"logps/chosen": -262.7143859863281,
"logps/ref_chosen": -265.1816711425781,
"logps/ref_rejected": -268.2203369140625,
"logps/rejected": -268.08062744140625,
"loss": 5.4561,
"margin_dpo/margin_mean": 2.3276116847991943,
"margin_dpo/margin_std": 5.6259846687316895,
"step": 58
},
{
"epoch": 0.12356020942408377,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 1.62693190574646,
"fcm_dpo/q_t": 0.4959397315979004,
"grad_norm": 29.78545379638672,
"learning_rate": 4.993299594568162e-07,
"logits/chosen": -0.6041760444641113,
"logits/rejected": -0.5971446633338928,
"logps/chosen": -284.1904602050781,
"logps/ref_chosen": -286.35394287109375,
"logps/ref_rejected": -260.6757507324219,
"logps/rejected": -260.13916015625,
"loss": 5.4861,
"margin_dpo/margin_mean": 1.626932978630066,
"margin_dpo/margin_std": 7.043335437774658,
"step": 59
},
{
"epoch": 0.1256544502617801,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 3.401254177093506,
"fcm_dpo/q_t": 0.49150803685188293,
"grad_norm": 28.24305534362793,
"learning_rate": 4.991893270335525e-07,
"logits/chosen": -0.6704109311103821,
"logits/rejected": -0.6943408846855164,
"logps/chosen": -256.01605224609375,
"logps/ref_chosen": -258.9134521484375,
"logps/ref_rejected": -255.21377563476562,
"logps/rejected": -255.71762084960938,
"loss": 5.4159,
"margin_dpo/margin_mean": 3.4012551307678223,
"margin_dpo/margin_std": 7.249381065368652,
"step": 60
},
{
"epoch": 0.12774869109947645,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 3.159944772720337,
"fcm_dpo/q_t": 0.4921136796474457,
"grad_norm": 29.98958969116211,
"learning_rate": 4.990353313429303e-07,
"logits/chosen": -0.6577328443527222,
"logits/rejected": -0.675537645816803,
"logps/chosen": -275.4341125488281,
"logps/ref_chosen": -278.4678955078125,
"logps/ref_rejected": -252.02720642089844,
"logps/rejected": -252.15333557128906,
"loss": 5.4264,
"margin_dpo/margin_mean": 3.159944534301758,
"margin_dpo/margin_std": 7.637033462524414,
"step": 61
},
{
"epoch": 0.12984293193717278,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 3.3599321842193604,
"fcm_dpo/q_t": 0.4916197657585144,
"grad_norm": 26.689373016357422,
"learning_rate": 4.988679806432711e-07,
"logits/chosen": -0.6122528314590454,
"logits/rejected": -0.6542145609855652,
"logps/chosen": -268.97198486328125,
"logps/ref_chosen": -272.92431640625,
"logps/ref_rejected": -260.7935485839844,
"logps/rejected": -260.201171875,
"loss": 5.4177,
"margin_dpo/margin_mean": 3.3599326610565186,
"margin_dpo/margin_std": 7.437721252441406,
"step": 62
},
{
"epoch": 0.1319371727748691,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 3.375985860824585,
"fcm_dpo/q_t": 0.4915734529495239,
"grad_norm": 28.36103630065918,
"learning_rate": 4.986872839090852e-07,
"logits/chosen": -0.6654269695281982,
"logits/rejected": -0.672168493270874,
"logps/chosen": -273.79058837890625,
"logps/ref_chosen": -277.0889892578125,
"logps/ref_rejected": -273.3413391113281,
"logps/rejected": -273.4189147949219,
"loss": 5.4169,
"margin_dpo/margin_mean": 3.3759853839874268,
"margin_dpo/margin_std": 7.225174903869629,
"step": 63
},
{
"epoch": 0.13403141361256546,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 4.306244373321533,
"fcm_dpo/q_t": 0.48925700783729553,
"grad_norm": 28.335159301757812,
"learning_rate": 4.9849325083059e-07,
"logits/chosen": -0.6384666562080383,
"logits/rejected": -0.634242057800293,
"logps/chosen": -279.6446533203125,
"logps/ref_chosen": -283.8244934082031,
"logps/ref_rejected": -263.29351806640625,
"logps/rejected": -263.419921875,
"loss": 5.3821,
"margin_dpo/margin_mean": 4.306243419647217,
"margin_dpo/margin_std": 8.264389038085938,
"step": 64
},
{
"epoch": 0.13612565445026178,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 3.298241376876831,
"fcm_dpo/q_t": 0.49176740646362305,
"grad_norm": 27.84617805480957,
"learning_rate": 4.982858918131906e-07,
"logits/chosen": -0.7026023268699646,
"logits/rejected": -0.6706424951553345,
"logps/chosen": -261.4404296875,
"logps/ref_chosen": -264.8699645996094,
"logps/ref_rejected": -268.5076904296875,
"logps/rejected": -268.3764343261719,
"loss": 5.4202,
"margin_dpo/margin_mean": 3.2982404232025146,
"margin_dpo/margin_std": 7.425678253173828,
"step": 65
},
{
"epoch": 0.1382198952879581,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 4.103024005889893,
"fcm_dpo/q_t": 0.48977720737457275,
"grad_norm": 27.68032455444336,
"learning_rate": 4.980652179769217e-07,
"logits/chosen": -0.6742034554481506,
"logits/rejected": -0.6938581466674805,
"logps/chosen": -269.83514404296875,
"logps/ref_chosen": -272.9283142089844,
"logps/ref_rejected": -280.94696044921875,
"logps/rejected": -281.956787109375,
"loss": 5.3933,
"margin_dpo/margin_mean": 4.103023529052734,
"margin_dpo/margin_std": 10.111289024353027,
"step": 66
},
{
"epoch": 0.14031413612565444,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 3.6750328540802,
"fcm_dpo/q_t": 0.49083197116851807,
"grad_norm": 25.625337600708008,
"learning_rate": 4.978312411558517e-07,
"logits/chosen": -0.6939666867256165,
"logits/rejected": -0.7270027995109558,
"logps/chosen": -262.1410217285156,
"logps/ref_chosen": -266.18695068359375,
"logps/ref_rejected": -250.17405700683594,
"logps/rejected": -249.80316162109375,
"loss": 5.4077,
"margin_dpo/margin_mean": 3.675032615661621,
"margin_dpo/margin_std": 8.745926856994629,
"step": 67
},
{
"epoch": 0.1424083769633508,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 4.620594501495361,
"fcm_dpo/q_t": 0.48850664496421814,
"grad_norm": 28.196027755737305,
"learning_rate": 4.975839738974473e-07,
"logits/chosen": -0.6843511462211609,
"logits/rejected": -0.6975783109664917,
"logps/chosen": -294.89398193359375,
"logps/ref_chosen": -297.9385986328125,
"logps/ref_rejected": -261.5141296386719,
"logps/rejected": -263.0901184082031,
"loss": 5.3746,
"margin_dpo/margin_mean": 4.6205949783325195,
"margin_dpo/margin_std": 10.609207153320312,
"step": 68
},
{
"epoch": 0.14450261780104712,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 5.947337627410889,
"fcm_dpo/q_t": 0.48517727851867676,
"grad_norm": 28.755943298339844,
"learning_rate": 4.97323429461901e-07,
"logits/chosen": -0.6901842355728149,
"logits/rejected": -0.7216166257858276,
"logps/chosen": -261.7662658691406,
"logps/ref_chosen": -265.6175231933594,
"logps/ref_rejected": -236.8287353515625,
"logps/rejected": -238.92481994628906,
"loss": 5.3208,
"margin_dpo/margin_mean": 5.947338581085205,
"margin_dpo/margin_std": 9.681401252746582,
"step": 69
},
{
"epoch": 0.14659685863874344,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 5.84010124206543,
"fcm_dpo/q_t": 0.48545634746551514,
"grad_norm": 28.748836517333984,
"learning_rate": 4.970496218214204e-07,
"logits/chosen": -0.6725472211837769,
"logits/rejected": -0.7054740786552429,
"logps/chosen": -291.8865051269531,
"logps/ref_chosen": -296.2259216308594,
"logps/ref_rejected": -254.68496704101562,
"logps/rejected": -256.18560791015625,
"loss": 5.328,
"margin_dpo/margin_mean": 5.840100288391113,
"margin_dpo/margin_std": 10.971734046936035,
"step": 70
},
{
"epoch": 0.1486910994764398,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 5.157320499420166,
"fcm_dpo/q_t": 0.4871465563774109,
"grad_norm": 28.44356346130371,
"learning_rate": 4.967625656594781e-07,
"logits/chosen": -0.6519126892089844,
"logits/rejected": -0.6409755945205688,
"logps/chosen": -283.77288818359375,
"logps/ref_chosen": -288.92724609375,
"logps/ref_rejected": -278.6405334472656,
"logps/rejected": -278.64349365234375,
"loss": 5.3581,
"margin_dpo/margin_mean": 5.15731954574585,
"margin_dpo/margin_std": 12.306236267089844,
"step": 71
},
{
"epoch": 0.15078534031413612,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 5.081698894500732,
"fcm_dpo/q_t": 0.4873269498348236,
"grad_norm": 28.21834945678711,
"learning_rate": 4.964622763700252e-07,
"logits/chosen": -0.700300931930542,
"logits/rejected": -0.7120264172554016,
"logps/chosen": -233.72628784179688,
"logps/ref_chosen": -237.0452880859375,
"logps/ref_rejected": -252.7946319580078,
"logps/rejected": -254.55735778808594,
"loss": 5.3569,
"margin_dpo/margin_mean": 5.081700325012207,
"margin_dpo/margin_std": 10.372089385986328,
"step": 72
},
{
"epoch": 0.15287958115183245,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 4.796858787536621,
"fcm_dpo/q_t": 0.48803800344467163,
"grad_norm": 27.81236457824707,
"learning_rate": 4.961487700566646e-07,
"logits/chosen": -0.660097599029541,
"logits/rejected": -0.6779041886329651,
"logps/chosen": -268.6611022949219,
"logps/ref_chosen": -273.0531005859375,
"logps/ref_rejected": -246.8330841064453,
"logps/rejected": -247.23794555664062,
"loss": 5.372,
"margin_dpo/margin_mean": 4.796858787536621,
"margin_dpo/margin_std": 12.272148132324219,
"step": 73
},
{
"epoch": 0.1549738219895288,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 4.374041557312012,
"fcm_dpo/q_t": 0.48911044001579285,
"grad_norm": 30.324485778808594,
"learning_rate": 4.958220635317885e-07,
"logits/chosen": -0.7277234196662903,
"logits/rejected": -0.7060559391975403,
"logps/chosen": -338.9614562988281,
"logps/ref_chosen": -342.2818908691406,
"logps/ref_rejected": -330.0293884277344,
"logps/rejected": -331.08294677734375,
"loss": 5.3871,
"margin_dpo/margin_mean": 4.374040603637695,
"margin_dpo/margin_std": 11.71304702758789,
"step": 74
},
{
"epoch": 0.15706806282722513,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 6.654738426208496,
"fcm_dpo/q_t": 0.48341798782348633,
"grad_norm": 29.471088409423828,
"learning_rate": 4.954821743156767e-07,
"logits/chosen": -0.6373202800750732,
"logits/rejected": -0.6386787295341492,
"logps/chosen": -262.1964111328125,
"logps/ref_chosen": -266.8641662597656,
"logps/ref_rejected": -276.8699951171875,
"logps/rejected": -278.8570251464844,
"loss": 5.2965,
"margin_dpo/margin_mean": 6.6547393798828125,
"margin_dpo/margin_std": 10.852510452270508,
"step": 75
},
{
"epoch": 0.15916230366492146,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 7.250955104827881,
"fcm_dpo/q_t": 0.4819841682910919,
"grad_norm": 29.12325096130371,
"learning_rate": 4.951291206355559e-07,
"logits/chosen": -0.7122775316238403,
"logits/rejected": -0.7196100950241089,
"logps/chosen": -276.97625732421875,
"logps/ref_chosen": -281.174560546875,
"logps/ref_rejected": -263.6067199707031,
"logps/rejected": -266.6593017578125,
"loss": 5.2782,
"margin_dpo/margin_mean": 7.250955581665039,
"margin_dpo/margin_std": 12.860380172729492,
"step": 76
},
{
"epoch": 0.1612565445026178,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 5.727426052093506,
"fcm_dpo/q_t": 0.48577675223350525,
"grad_norm": 33.081321716308594,
"learning_rate": 4.947629214246236e-07,
"logits/chosen": -0.5670685172080994,
"logits/rejected": -0.5766149759292603,
"logps/chosen": -302.4000549316406,
"logps/ref_chosen": -306.09527587890625,
"logps/ref_rejected": -253.49569702148438,
"logps/rejected": -255.5279083251953,
"loss": 5.3404,
"margin_dpo/margin_mean": 5.727425575256348,
"margin_dpo/margin_std": 13.957700729370117,
"step": 77
},
{
"epoch": 0.16335078534031414,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 8.765287399291992,
"fcm_dpo/q_t": 0.4782516360282898,
"grad_norm": 29.579288482666016,
"learning_rate": 4.943835963210323e-07,
"logits/chosen": -0.6769639253616333,
"logits/rejected": -0.6716817617416382,
"logps/chosen": -253.07315063476562,
"logps/ref_chosen": -256.9934997558594,
"logps/ref_rejected": -211.74012756347656,
"logps/rejected": -216.5850830078125,
"loss": 5.2239,
"margin_dpo/margin_mean": 8.765288352966309,
"margin_dpo/margin_std": 14.442641258239746,
"step": 78
},
{
"epoch": 0.16544502617801046,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 8.160972595214844,
"fcm_dpo/q_t": 0.479748010635376,
"grad_norm": 29.708646774291992,
"learning_rate": 4.939911656668361e-07,
"logits/chosen": -0.6571123003959656,
"logits/rejected": -0.6778618693351746,
"logps/chosen": -263.1961669921875,
"logps/ref_chosen": -266.2735595703125,
"logps/ref_rejected": -251.57257080078125,
"logps/rejected": -256.6561584472656,
"loss": 5.2494,
"margin_dpo/margin_mean": 8.160972595214844,
"margin_dpo/margin_std": 14.984216690063477,
"step": 79
},
{
"epoch": 0.16753926701570682,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 6.6066484451293945,
"fcm_dpo/q_t": 0.4836036264896393,
"grad_norm": 28.88882064819336,
"learning_rate": 4.935856505068998e-07,
"logits/chosen": -0.6748917698860168,
"logits/rejected": -0.7046967148780823,
"logps/chosen": -285.9798278808594,
"logps/ref_chosen": -287.8509826660156,
"logps/ref_rejected": -256.0766296386719,
"logps/rejected": -260.81207275390625,
"loss": 5.3052,
"margin_dpo/margin_mean": 6.606649398803711,
"margin_dpo/margin_std": 13.269367218017578,
"step": 80
},
{
"epoch": 0.16963350785340314,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 7.190183639526367,
"fcm_dpo/q_t": 0.4821716547012329,
"grad_norm": 28.030818939208984,
"learning_rate": 4.93167072587771e-07,
"logits/chosen": -0.6425015926361084,
"logits/rejected": -0.641758143901825,
"logps/chosen": -266.0821228027344,
"logps/ref_chosen": -268.5232238769531,
"logps/ref_rejected": -237.81137084960938,
"logps/rejected": -242.56045532226562,
"loss": 5.2924,
"margin_dpo/margin_mean": 7.190183639526367,
"margin_dpo/margin_std": 16.7568302154541,
"step": 81
},
{
"epoch": 0.17172774869109947,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 7.836269378662109,
"fcm_dpo/q_t": 0.48062148690223694,
"grad_norm": 27.81950569152832,
"learning_rate": 4.92735454356513e-07,
"logits/chosen": -0.7289373874664307,
"logits/rejected": -0.7366238236427307,
"logps/chosen": -276.76800537109375,
"logps/ref_chosen": -279.24798583984375,
"logps/ref_rejected": -236.6510772705078,
"logps/rejected": -242.00738525390625,
"loss": 5.2665,
"margin_dpo/margin_mean": 7.836269378662109,
"margin_dpo/margin_std": 15.824460983276367,
"step": 82
},
{
"epoch": 0.17382198952879582,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 8.676000595092773,
"fcm_dpo/q_t": 0.47846055030822754,
"grad_norm": 30.727758407592773,
"learning_rate": 4.922908189595017e-07,
"logits/chosen": -0.6865428686141968,
"logits/rejected": -0.6698246002197266,
"logps/chosen": -273.9609069824219,
"logps/ref_chosen": -274.21923828125,
"logps/ref_rejected": -276.2212219238281,
"logps/rejected": -284.638916015625,
"loss": 5.2418,
"margin_dpo/margin_mean": 8.676000595092773,
"margin_dpo/margin_std": 18.0338077545166,
"step": 83
},
{
"epoch": 0.17591623036649215,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 7.394294738769531,
"fcm_dpo/q_t": 0.4816589951515198,
"grad_norm": 29.849651336669922,
"learning_rate": 4.918331902411841e-07,
"logits/chosen": -0.7308733463287354,
"logits/rejected": -0.7451096177101135,
"logps/chosen": -293.73614501953125,
"logps/ref_chosen": -294.3975524902344,
"logps/ref_rejected": -279.81884765625,
"logps/rejected": -286.5517272949219,
"loss": 5.2856,
"margin_dpo/margin_mean": 7.3942952156066895,
"margin_dpo/margin_std": 16.78285789489746,
"step": 84
},
{
"epoch": 0.17801047120418848,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 5.794508934020996,
"fcm_dpo/q_t": 0.4856225848197937,
"grad_norm": 29.354572296142578,
"learning_rate": 4.913625927427995e-07,
"logits/chosen": -0.6477065086364746,
"logits/rejected": -0.6560468077659607,
"logps/chosen": -245.17652893066406,
"logps/ref_chosen": -243.66220092773438,
"logps/ref_rejected": -263.9421691894531,
"logps/rejected": -271.2510070800781,
"loss": 5.3435,
"margin_dpo/margin_mean": 5.794508934020996,
"margin_dpo/margin_std": 15.385719299316406,
"step": 85
},
{
"epoch": 0.18010471204188483,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 8.944154739379883,
"fcm_dpo/q_t": 0.4777962565422058,
"grad_norm": 34.90058517456055,
"learning_rate": 4.908790517010636e-07,
"logits/chosen": -0.6895659565925598,
"logits/rejected": -0.6828902363777161,
"logps/chosen": -308.1940612792969,
"logps/ref_chosen": -309.4306945800781,
"logps/ref_rejected": -290.91278076171875,
"logps/rejected": -298.62030029296875,
"loss": 5.227,
"margin_dpo/margin_mean": 8.944153785705566,
"margin_dpo/margin_std": 17.328086853027344,
"step": 86
},
{
"epoch": 0.18219895287958116,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 10.260643005371094,
"fcm_dpo/q_t": 0.4746631383895874,
"grad_norm": 29.66830825805664,
"learning_rate": 4.903825930468148e-07,
"logits/chosen": -0.7512010931968689,
"logits/rejected": -0.7451151013374329,
"logps/chosen": -278.11773681640625,
"logps/ref_chosen": -278.0277099609375,
"logps/ref_rejected": -245.70123291015625,
"logps/rejected": -256.0518493652344,
"loss": 5.1839,
"margin_dpo/margin_mean": 10.260643005371094,
"margin_dpo/margin_std": 18.95460319519043,
"step": 87
},
{
"epoch": 0.18429319371727748,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 8.927164077758789,
"fcm_dpo/q_t": 0.477934330701828,
"grad_norm": 28.764787673950195,
"learning_rate": 4.898732434036243e-07,
"logits/chosen": -0.7751933336257935,
"logits/rejected": -0.7918457984924316,
"logps/chosen": -268.5487365722656,
"logps/ref_chosen": -266.5148010253906,
"logps/ref_rejected": -265.90081787109375,
"logps/rejected": -276.8619384765625,
"loss": 5.2339,
"margin_dpo/margin_mean": 8.927164077758789,
"margin_dpo/margin_std": 19.03485107421875,
"step": 88
},
{
"epoch": 0.18638743455497384,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 9.322531700134277,
"fcm_dpo/q_t": 0.4769115746021271,
"grad_norm": 30.534467697143555,
"learning_rate": 4.893510300863676e-07,
"logits/chosen": -0.7388455271720886,
"logits/rejected": -0.7291704416275024,
"logps/chosen": -265.65667724609375,
"logps/ref_chosen": -265.6893005371094,
"logps/ref_rejected": -251.49314880371094,
"logps/rejected": -260.7830810546875,
"loss": 5.2181,
"margin_dpo/margin_mean": 9.322531700134277,
"margin_dpo/margin_std": 18.186668395996094,
"step": 89
},
{
"epoch": 0.18848167539267016,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 8.266681671142578,
"fcm_dpo/q_t": 0.47950226068496704,
"grad_norm": 29.96919822692871,
"learning_rate": 4.8881598109976e-07,
"logits/chosen": -0.7426354885101318,
"logits/rejected": -0.7515090703964233,
"logps/chosen": -308.5796813964844,
"logps/ref_chosen": -307.4250183105469,
"logps/ref_rejected": -265.7172546386719,
"logps/rejected": -275.13861083984375,
"loss": 5.2548,
"margin_dpo/margin_mean": 8.266682624816895,
"margin_dpo/margin_std": 17.510297775268555,
"step": 90
},
{
"epoch": 0.1905759162303665,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 9.559464454650879,
"fcm_dpo/q_t": 0.47642382979393005,
"grad_norm": 32.85371017456055,
"learning_rate": 4.882681251368548e-07,
"logits/chosen": -0.6758638620376587,
"logits/rejected": -0.6926856637001038,
"logps/chosen": -237.77833557128906,
"logps/ref_chosen": -235.74098205566406,
"logps/ref_rejected": -226.6428985595703,
"logps/rejected": -238.23971557617188,
"loss": 5.214,
"margin_dpo/margin_mean": 9.559464454650879,
"margin_dpo/margin_std": 19.182907104492188,
"step": 91
},
{
"epoch": 0.19267015706806281,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 10.123077392578125,
"fcm_dpo/q_t": 0.47499004006385803,
"grad_norm": 33.156288146972656,
"learning_rate": 4.877074915775048e-07,
"logits/chosen": -0.7403147220611572,
"logits/rejected": -0.72395920753479,
"logps/chosen": -286.5037841796875,
"logps/ref_chosen": -283.4475402832031,
"logps/ref_rejected": -273.134033203125,
"logps/rejected": -286.3133544921875,
"loss": 5.1979,
"margin_dpo/margin_mean": 10.123077392578125,
"margin_dpo/margin_std": 21.20536231994629,
"step": 92
},
{
"epoch": 0.19476439790575917,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 9.612565994262695,
"fcm_dpo/q_t": 0.4762864410877228,
"grad_norm": 28.68975257873535,
"learning_rate": 4.871341104867864e-07,
"logits/chosen": -0.7337056994438171,
"logits/rejected": -0.7575539350509644,
"logps/chosen": -235.6643524169922,
"logps/ref_chosen": -233.33714294433594,
"logps/ref_rejected": -230.54273986816406,
"logps/rejected": -242.48248291015625,
"loss": 5.2109,
"margin_dpo/margin_mean": 9.612567901611328,
"margin_dpo/margin_std": 19.466064453125,
"step": 93
},
{
"epoch": 0.1968586387434555,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 9.34221076965332,
"fcm_dpo/q_t": 0.47688379883766174,
"grad_norm": 31.385103225708008,
"learning_rate": 4.865480126133871e-07,
"logits/chosen": -0.6867436766624451,
"logits/rejected": -0.7081081867218018,
"logps/chosen": -296.996826171875,
"logps/ref_chosen": -294.6528015136719,
"logps/ref_rejected": -283.657958984375,
"logps/rejected": -295.34423828125,
"loss": 5.2315,
"margin_dpo/margin_mean": 9.342211723327637,
"margin_dpo/margin_std": 21.86980628967285,
"step": 94
},
{
"epoch": 0.19895287958115182,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 10.031109809875488,
"fcm_dpo/q_t": 0.47530385851860046,
"grad_norm": 33.378883361816406,
"learning_rate": 4.859492293879573e-07,
"logits/chosen": -0.7343789339065552,
"logits/rejected": -0.756463885307312,
"logps/chosen": -315.03533935546875,
"logps/ref_chosen": -311.6697082519531,
"logps/ref_rejected": -262.7471923828125,
"logps/rejected": -276.1439208984375,
"loss": 5.2116,
"margin_dpo/margin_mean": 10.031108856201172,
"margin_dpo/margin_std": 22.576051712036133,
"step": 95
},
{
"epoch": 0.20104712041884817,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 10.250564575195312,
"fcm_dpo/q_t": 0.4747813940048218,
"grad_norm": 37.031070709228516,
"learning_rate": 4.853377929214243e-07,
"logits/chosen": -0.7045353651046753,
"logits/rejected": -0.7171480655670166,
"logps/chosen": -287.286376953125,
"logps/ref_chosen": -282.55596923828125,
"logps/ref_rejected": -242.71588134765625,
"logps/rejected": -257.6968078613281,
"loss": 5.204,
"margin_dpo/margin_mean": 10.250566482543945,
"margin_dpo/margin_std": 23.351776123046875,
"step": 96
},
{
"epoch": 0.2031413612565445,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 12.032766342163086,
"fcm_dpo/q_t": 0.47035449743270874,
"grad_norm": 32.03152847290039,
"learning_rate": 4.847137360032699e-07,
"logits/chosen": -0.7473950386047363,
"logits/rejected": -0.7346963286399841,
"logps/chosen": -307.8915100097656,
"logps/ref_chosen": -303.57781982421875,
"logps/ref_rejected": -264.22491455078125,
"logps/rejected": -280.5714416503906,
"loss": 5.1312,
"margin_dpo/margin_mean": 12.032766342163086,
"margin_dpo/margin_std": 22.341087341308594,
"step": 97
},
{
"epoch": 0.20523560209424083,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 12.27221965789795,
"fcm_dpo/q_t": 0.4699545204639435,
"grad_norm": 34.470611572265625,
"learning_rate": 4.84077092099773e-07,
"logits/chosen": -0.7738041877746582,
"logits/rejected": -0.7862895131111145,
"logps/chosen": -291.651611328125,
"logps/ref_chosen": -286.8303527832031,
"logps/ref_rejected": -278.08331298828125,
"logps/rejected": -295.1767883300781,
"loss": 5.1343,
"margin_dpo/margin_mean": 12.272220611572266,
"margin_dpo/margin_std": 22.819379806518555,
"step": 98
},
{
"epoch": 0.20732984293193718,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 12.655092239379883,
"fcm_dpo/q_t": 0.46898141503334045,
"grad_norm": 32.86789321899414,
"learning_rate": 4.834278953522137e-07,
"logits/chosen": -0.736225962638855,
"logits/rejected": -0.7492181062698364,
"logps/chosen": -285.2147521972656,
"logps/ref_chosen": -279.92120361328125,
"logps/ref_rejected": -250.3365478515625,
"logps/rejected": -268.28515625,
"loss": 5.1341,
"margin_dpo/margin_mean": 12.65509033203125,
"margin_dpo/margin_std": 27.144683837890625,
"step": 99
},
{
"epoch": 0.2094240837696335,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 12.444181442260742,
"fcm_dpo/q_t": 0.4695214033126831,
"grad_norm": 35.53663635253906,
"learning_rate": 4.827661805750437e-07,
"logits/chosen": -0.7796453833580017,
"logits/rejected": -0.7926532030105591,
"logps/chosen": -304.6813659667969,
"logps/ref_chosen": -296.8276672363281,
"logps/ref_rejected": -275.56146240234375,
"logps/rejected": -295.8592529296875,
"loss": 5.1268,
"margin_dpo/margin_mean": 12.444183349609375,
"margin_dpo/margin_std": 24.066898345947266,
"step": 100
},
{
"epoch": 0.21151832460732983,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 15.036481857299805,
"fcm_dpo/q_t": 0.4632215201854706,
"grad_norm": 32.41525650024414,
"learning_rate": 4.820919832540181e-07,
"logits/chosen": -0.7710584998130798,
"logits/rejected": -0.7849109172821045,
"logps/chosen": -257.8660888671875,
"logps/ref_chosen": -252.74203491210938,
"logps/ref_rejected": -276.4185485839844,
"logps/rejected": -296.5791015625,
"loss": 5.0411,
"margin_dpo/margin_mean": 15.036481857299805,
"margin_dpo/margin_std": 26.31357765197754,
"step": 101
},
{
"epoch": 0.2136125654450262,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 14.796443939208984,
"fcm_dpo/q_t": 0.4635191857814789,
"grad_norm": 32.229835510253906,
"learning_rate": 4.814053395442932e-07,
"logits/chosen": -0.747922420501709,
"logits/rejected": -0.7440513372421265,
"logps/chosen": -224.68930053710938,
"logps/ref_chosen": -219.5537109375,
"logps/ref_rejected": -231.90853881835938,
"logps/rejected": -251.84056091308594,
"loss": 5.0434,
"margin_dpo/margin_mean": 14.7964448928833,
"margin_dpo/margin_std": 24.94526481628418,
"step": 102
},
{
"epoch": 0.2157068062827225,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 13.613631248474121,
"fcm_dpo/q_t": 0.46684983372688293,
"grad_norm": 32.68932342529297,
"learning_rate": 4.807062862684873e-07,
"logits/chosen": -0.7766976356506348,
"logits/rejected": -0.7736947536468506,
"logps/chosen": -264.4947509765625,
"logps/ref_chosen": -259.6750793457031,
"logps/ref_rejected": -278.7400817871094,
"logps/rejected": -297.17340087890625,
"loss": 5.0901,
"margin_dpo/margin_mean": 13.613631248474121,
"margin_dpo/margin_std": 25.650850296020508,
"step": 103
},
{
"epoch": 0.21780104712041884,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 10.063506126403809,
"fcm_dpo/q_t": 0.47534891963005066,
"grad_norm": 33.11854934692383,
"learning_rate": 4.799948609147061e-07,
"logits/chosen": -0.7753955125808716,
"logits/rejected": -0.7825241088867188,
"logps/chosen": -277.03887939453125,
"logps/ref_chosen": -267.9741516113281,
"logps/ref_rejected": -230.5306396484375,
"logps/rejected": -249.6588134765625,
"loss": 5.225,
"margin_dpo/margin_mean": 10.063505172729492,
"margin_dpo/margin_std": 26.37413215637207,
"step": 104
},
{
"epoch": 0.2198952879581152,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 20.57976531982422,
"fcm_dpo/q_t": 0.4495754837989807,
"grad_norm": 34.676841735839844,
"learning_rate": 4.792711016345321e-07,
"logits/chosen": -0.7640881538391113,
"logits/rejected": -0.7759814858436584,
"logps/chosen": -327.5960998535156,
"logps/ref_chosen": -322.25482177734375,
"logps/ref_rejected": -279.02978515625,
"logps/rejected": -304.9508361816406,
"loss": 4.8454,
"margin_dpo/margin_mean": 20.579763412475586,
"margin_dpo/margin_std": 26.948394775390625,
"step": 105
},
{
"epoch": 0.22198952879581152,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 12.518043518066406,
"fcm_dpo/q_t": 0.4693966507911682,
"grad_norm": 38.22789764404297,
"learning_rate": 4.785350472409791e-07,
"logits/chosen": -0.7372999787330627,
"logits/rejected": -0.7724757194519043,
"logps/chosen": -308.64599609375,
"logps/ref_chosen": -296.15777587890625,
"logps/ref_rejected": -266.2691650390625,
"logps/rejected": -291.275390625,
"loss": 5.1512,
"margin_dpo/margin_mean": 12.518043518066406,
"margin_dpo/margin_std": 29.34693145751953,
"step": 106
},
{
"epoch": 0.22408376963350785,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 19.722835540771484,
"fcm_dpo/q_t": 0.4517911970615387,
"grad_norm": 38.28390121459961,
"learning_rate": 4.777867372064105e-07,
"logits/chosen": -0.7788010239601135,
"logits/rejected": -0.77190101146698,
"logps/chosen": -311.3397216796875,
"logps/ref_chosen": -306.996337890625,
"logps/ref_rejected": -296.79412841796875,
"logps/rejected": -320.8603515625,
"loss": 4.877,
"margin_dpo/margin_mean": 19.722835540771484,
"margin_dpo/margin_std": 27.75455665588379,
"step": 107
},
{
"epoch": 0.2261780104712042,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 18.265289306640625,
"fcm_dpo/q_t": 0.4557679295539856,
"grad_norm": 100.8703842163086,
"learning_rate": 4.770262116604223e-07,
"logits/chosen": -0.7604493498802185,
"logits/rejected": -0.7712941765785217,
"logps/chosen": -300.7414245605469,
"logps/ref_chosen": -295.1526794433594,
"logps/ref_rejected": -235.974853515625,
"logps/rejected": -259.8288879394531,
"loss": 4.9518,
"margin_dpo/margin_mean": 18.265289306640625,
"margin_dpo/margin_std": 30.369789123535156,
"step": 108
},
{
"epoch": 0.22827225130890053,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 20.25125503540039,
"fcm_dpo/q_t": 0.45110049843788147,
"grad_norm": 38.23166275024414,
"learning_rate": 4.7625351138769166e-07,
"logits/chosen": -0.7942591309547424,
"logits/rejected": -0.791488528251648,
"logps/chosen": -334.3830261230469,
"logps/ref_chosen": -325.9248046875,
"logps/ref_rejected": -279.15423583984375,
"logps/rejected": -307.8637390136719,
"loss": 4.8796,
"margin_dpo/margin_mean": 20.25125503540039,
"margin_dpo/margin_std": 31.70002555847168,
"step": 109
},
{
"epoch": 0.23036649214659685,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 19.04351806640625,
"fcm_dpo/q_t": 0.4539196491241455,
"grad_norm": 34.719669342041016,
"learning_rate": 4.75468677825789e-07,
"logits/chosen": -0.8007383346557617,
"logits/rejected": -0.7890827059745789,
"logps/chosen": -283.4249572753906,
"logps/ref_chosen": -274.439208984375,
"logps/ref_rejected": -260.0552062988281,
"logps/rejected": -288.0845031738281,
"loss": 4.9361,
"margin_dpo/margin_mean": 19.043519973754883,
"margin_dpo/margin_std": 33.801780700683594,
"step": 110
},
{
"epoch": 0.2324607329842932,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 20.36928939819336,
"fcm_dpo/q_t": 0.450971394777298,
"grad_norm": 38.119789123535156,
"learning_rate": 4.7467175306295647e-07,
"logits/chosen": -0.8362029194831848,
"logits/rejected": -0.8157572150230408,
"logps/chosen": -338.9130859375,
"logps/ref_chosen": -329.2361755371094,
"logps/ref_rejected": -287.82830810546875,
"logps/rejected": -317.8745422363281,
"loss": 4.8982,
"margin_dpo/margin_mean": 20.369291305541992,
"margin_dpo/margin_std": 34.47408676147461,
"step": 111
},
{
"epoch": 0.23455497382198953,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 13.103754043579102,
"fcm_dpo/q_t": 0.4685131311416626,
"grad_norm": 39.488136291503906,
"learning_rate": 4.7386277983585053e-07,
"logits/chosen": -0.7342914342880249,
"logits/rejected": -0.7637506127357483,
"logps/chosen": -272.73883056640625,
"logps/ref_chosen": -257.0593566894531,
"logps/ref_rejected": -272.9595031738281,
"logps/rejected": -301.7427062988281,
"loss": 5.1873,
"margin_dpo/margin_mean": 13.103754997253418,
"margin_dpo/margin_std": 36.33610153198242,
"step": 112
},
{
"epoch": 0.23664921465968586,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 24.700986862182617,
"fcm_dpo/q_t": 0.44143223762512207,
"grad_norm": 41.771854400634766,
"learning_rate": 4.7304180152725024e-07,
"logits/chosen": -0.8089713454246521,
"logits/rejected": -0.8124600648880005,
"logps/chosen": -298.964599609375,
"logps/ref_chosen": -286.0416564941406,
"logps/ref_rejected": -270.374267578125,
"logps/rejected": -307.9981994628906,
"loss": 4.777,
"margin_dpo/margin_mean": 24.700986862182617,
"margin_dpo/margin_std": 39.2578125,
"step": 113
},
{
"epoch": 0.2387434554973822,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 13.720559120178223,
"fcm_dpo/q_t": 0.4666045606136322,
"grad_norm": 40.004310607910156,
"learning_rate": 4.7220886216373085e-07,
"logits/chosen": -0.8449599742889404,
"logits/rejected": -0.8461405038833618,
"logps/chosen": -276.9638671875,
"logps/ref_chosen": -260.0084533691406,
"logps/ref_rejected": -246.67190551757812,
"logps/rejected": -277.3478698730469,
"loss": 5.1508,
"margin_dpo/margin_mean": 13.720561027526855,
"margin_dpo/margin_std": 35.51994323730469,
"step": 114
},
{
"epoch": 0.24083769633507854,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 14.000336647033691,
"fcm_dpo/q_t": 0.46712926030158997,
"grad_norm": 43.63201904296875,
"learning_rate": 4.7136400641330245e-07,
"logits/chosen": -0.8578217029571533,
"logits/rejected": -0.8179551959037781,
"logps/chosen": -318.00872802734375,
"logps/ref_chosen": -298.8608093261719,
"logps/ref_rejected": -272.1927795410156,
"logps/rejected": -305.341064453125,
"loss": 5.164,
"margin_dpo/margin_mean": 14.000338554382324,
"margin_dpo/margin_std": 38.821632385253906,
"step": 115
},
{
"epoch": 0.24293193717277486,
"fcm_dpo/beta": 0.009999998845160007,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 18.14166259765625,
"fcm_dpo/q_t": 0.4560409188270569,
"grad_norm": 38.731719970703125,
"learning_rate": 4.70507279583015e-07,
"logits/chosen": -0.8598328828811646,
"logits/rejected": -0.8213679790496826,
"logps/chosen": -294.76617431640625,
"logps/ref_chosen": -279.263916015625,
"logps/ref_rejected": -253.6192169189453,
"logps/rejected": -287.2631530761719,
"loss": 4.9813,
"margin_dpo/margin_mean": 18.14166259765625,
"margin_dpo/margin_std": 35.22654724121094,
"step": 116
},
{
"epoch": 0.2450261780104712,
"fcm_dpo/beta": 0.010019151493906975,
"fcm_dpo/delta": 0.019006887450814247,
"fcm_dpo/margin": 20.834585189819336,
"fcm_dpo/q_t": 0.44975975155830383,
"grad_norm": 42.065223693847656,
"learning_rate": 4.6963872761652834e-07,
"logits/chosen": -0.8122572898864746,
"logits/rejected": -0.8118118643760681,
"logps/chosen": -278.41455078125,
"logps/ref_chosen": -259.2248840332031,
"logps/ref_rejected": -229.3042755126953,
"logps/rejected": -269.32855224609375,
"loss": 4.8801,
"margin_dpo/margin_mean": 20.834585189819336,
"margin_dpo/margin_std": 32.034950256347656,
"step": 117
},
{
"epoch": 0.24712041884816754,
"fcm_dpo/beta": 0.010153218172490597,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 23.139522552490234,
"fcm_dpo/q_t": 0.4447304904460907,
"grad_norm": 46.219722747802734,
"learning_rate": 4.687583970916486e-07,
"logits/chosen": -0.8006891012191772,
"logits/rejected": -0.7873902320861816,
"logps/chosen": -292.9374694824219,
"logps/ref_chosen": -267.0707092285156,
"logps/ref_rejected": -272.7322082519531,
"logps/rejected": -321.73846435546875,
"loss": 4.8612,
"margin_dpo/margin_mean": 23.139522552490234,
"margin_dpo/margin_std": 43.533145904541016,
"step": 118
},
{
"epoch": 0.24921465968586387,
"fcm_dpo/beta": 0.010153218172490597,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 16.957921981811523,
"fcm_dpo/q_t": 0.45950642228126526,
"grad_norm": 46.27934646606445,
"learning_rate": 4.6786633521783005e-07,
"logits/chosen": -0.8989782333374023,
"logits/rejected": -0.8991548418998718,
"logps/chosen": -356.221923828125,
"logps/ref_chosen": -324.6766357421875,
"logps/ref_rejected": -306.0322265625,
"logps/rejected": -354.5354309082031,
"loss": 5.083,
"margin_dpo/margin_mean": 16.95792007446289,
"margin_dpo/margin_std": 41.61006164550781,
"step": 119
},
{
"epoch": 0.2513089005235602,
"fcm_dpo/beta": 0.010216230526566505,
"fcm_dpo/delta": 0.020517978817224503,
"fcm_dpo/margin": 19.49388885498047,
"fcm_dpo/q_t": 0.4531494379043579,
"grad_norm": 42.6641731262207,
"learning_rate": 4.669625898336438e-07,
"logits/chosen": -0.8377653360366821,
"logits/rejected": -0.8475413918495178,
"logps/chosen": -343.8332214355469,
"logps/ref_chosen": -315.2617492675781,
"logps/ref_rejected": -265.32501220703125,
"logps/rejected": -313.390380859375,
"loss": 4.9857,
"margin_dpo/margin_mean": 19.49388885498047,
"margin_dpo/margin_std": 41.39332580566406,
"step": 120
},
{
"epoch": 0.2534031413612565,
"fcm_dpo/beta": 0.01032125111669302,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 13.927159309387207,
"fcm_dpo/q_t": 0.4664355218410492,
"grad_norm": 60.31532287597656,
"learning_rate": 4.6604720940421207e-07,
"logits/chosen": -0.8654758930206299,
"logits/rejected": -0.8799676299095154,
"logps/chosen": -256.4559631347656,
"logps/ref_chosen": -222.99609375,
"logps/ref_rejected": -226.92860412597656,
"logps/rejected": -274.31561279296875,
"loss": 5.1715,
"margin_dpo/margin_mean": 13.92716121673584,
"margin_dpo/margin_std": 40.628849029541016,
"step": 121
},
{
"epoch": 0.2554973821989529,
"fcm_dpo/beta": 0.01032125111669302,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 19.776992797851562,
"fcm_dpo/q_t": 0.45205867290496826,
"grad_norm": 57.31346130371094,
"learning_rate": 4.651202430186092e-07,
"logits/chosen": -0.9148901700973511,
"logits/rejected": -0.8764075636863708,
"logps/chosen": -309.6811828613281,
"logps/ref_chosen": -276.02630615234375,
"logps/ref_rejected": -277.97418212890625,
"logps/rejected": -331.4060363769531,
"loss": 5.0087,
"margin_dpo/margin_mean": 19.776994705200195,
"margin_dpo/margin_std": 46.8244514465332,
"step": 122
},
{
"epoch": 0.25759162303664923,
"fcm_dpo/beta": 0.01032125111669302,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 25.753007888793945,
"fcm_dpo/q_t": 0.43692460656166077,
"grad_norm": 50.204429626464844,
"learning_rate": 4.6418174038722924e-07,
"logits/chosen": -0.8277499079704285,
"logits/rejected": -0.8173301219940186,
"logps/chosen": -354.5268249511719,
"logps/ref_chosen": -328.1546325683594,
"logps/ref_rejected": -280.6911315917969,
"logps/rejected": -332.8163146972656,
"loss": 4.7536,
"margin_dpo/margin_mean": 25.753005981445312,
"margin_dpo/margin_std": 42.5488166809082,
"step": 123
},
{
"epoch": 0.25968586387434556,
"fcm_dpo/beta": 0.01032125111669302,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 22.0130558013916,
"fcm_dpo/q_t": 0.4456826448440552,
"grad_norm": 54.57142639160156,
"learning_rate": 4.6323175183912023e-07,
"logits/chosen": -0.8554036021232605,
"logits/rejected": -0.8191466927528381,
"logps/chosen": -302.9939270019531,
"logps/ref_chosen": -275.6961975097656,
"logps/ref_rejected": -225.361572265625,
"logps/rejected": -274.6723327636719,
"loss": 4.8715,
"margin_dpo/margin_mean": 22.01305389404297,
"margin_dpo/margin_std": 39.297996520996094,
"step": 124
},
{
"epoch": 0.2617801047120419,
"fcm_dpo/beta": 0.01032125111669302,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 19.9759578704834,
"fcm_dpo/q_t": 0.4523986577987671,
"grad_norm": 53.71831130981445,
"learning_rate": 4.6227032831928483e-07,
"logits/chosen": -0.8214420080184937,
"logits/rejected": -0.7753271460533142,
"logps/chosen": -306.7501220703125,
"logps/ref_chosen": -278.06976318359375,
"logps/ref_rejected": -265.63873291015625,
"logps/rejected": -314.2950439453125,
"loss": 5.0425,
"margin_dpo/margin_mean": 19.9759578704834,
"margin_dpo/margin_std": 50.73390579223633,
"step": 125
},
{
"epoch": 0.2638743455497382,
"fcm_dpo/beta": 0.01032125111669302,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 22.57097816467285,
"fcm_dpo/q_t": 0.4450061023235321,
"grad_norm": 51.29899215698242,
"learning_rate": 4.612975213859487e-07,
"logits/chosen": -0.8229495286941528,
"logits/rejected": -0.8314183354377747,
"logps/chosen": -346.1187438964844,
"logps/ref_chosen": -321.3960876464844,
"logps/ref_rejected": -285.37664794921875,
"logps/rejected": -332.6702880859375,
"loss": 4.8884,
"margin_dpo/margin_mean": 22.57097625732422,
"margin_dpo/margin_std": 45.24233627319336,
"step": 126
},
{
"epoch": 0.26596858638743454,
"fcm_dpo/beta": 0.010337848216295242,
"fcm_dpo/delta": 0.008014491759240627,
"fcm_dpo/margin": 26.70491600036621,
"fcm_dpo/q_t": 0.4356614053249359,
"grad_norm": 50.95025634765625,
"learning_rate": 4.603133832077953e-07,
"logits/chosen": -0.9044252634048462,
"logits/rejected": -0.8535300493240356,
"logps/chosen": -330.0174255371094,
"logps/ref_chosen": -306.55877685546875,
"logps/ref_rejected": -274.8651428222656,
"logps/rejected": -325.02874755859375,
"loss": 4.7648,
"margin_dpo/margin_mean": 26.70491600036621,
"margin_dpo/margin_std": 45.92063903808594,
"step": 127
},
{
"epoch": 0.2680628272251309,
"fcm_dpo/beta": 0.01038763951510191,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 30.74336051940918,
"fcm_dpo/q_t": 0.4257518947124481,
"grad_norm": 50.56816864013672,
"learning_rate": 4.5931796656116837e-07,
"logits/chosen": -0.7912300825119019,
"logits/rejected": -0.7867921590805054,
"logps/chosen": -283.9368591308594,
"logps/ref_chosen": -265.3973693847656,
"logps/ref_rejected": -250.9737548828125,
"logps/rejected": -300.25665283203125,
"loss": 4.6379,
"margin_dpo/margin_mean": 30.743362426757812,
"margin_dpo/margin_std": 49.90855407714844,
"step": 128
},
{
"epoch": 0.27015706806282724,
"fcm_dpo/beta": 0.010399233549833298,
"fcm_dpo/delta": 0.011111855506896973,
"fcm_dpo/margin": 27.626813888549805,
"fcm_dpo/q_t": 0.4336443245410919,
"grad_norm": 46.69161605834961,
"learning_rate": 4.5831132482724193e-07,
"logits/chosen": -0.8296740055084229,
"logits/rejected": -0.8278064131736755,
"logps/chosen": -323.13433837890625,
"logps/ref_chosen": -303.158447265625,
"logps/ref_rejected": -275.9891052246094,
"logps/rejected": -323.5918273925781,
"loss": 4.7427,
"margin_dpo/margin_mean": 27.626811981201172,
"margin_dpo/margin_std": 49.01125717163086,
"step": 129
},
{
"epoch": 0.27225130890052357,
"fcm_dpo/beta": 0.010586390271782875,
"fcm_dpo/delta": 0.01436567772179842,
"fcm_dpo/margin": 27.690763473510742,
"fcm_dpo/q_t": 0.43227171897888184,
"grad_norm": 59.677452087402344,
"learning_rate": 4.5729351198915705e-07,
"logits/chosen": -0.807654619216919,
"logits/rejected": -0.8454784154891968,
"logps/chosen": -309.1056213378906,
"logps/ref_chosen": -286.4073486328125,
"logps/ref_rejected": -294.38665771484375,
"logps/rejected": -344.77569580078125,
"loss": 4.7332,
"margin_dpo/margin_mean": 27.690763473510742,
"margin_dpo/margin_std": 49.714717864990234,
"step": 130
},
{
"epoch": 0.2743455497382199,
"fcm_dpo/beta": 0.010601533576846123,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 22.352327346801758,
"fcm_dpo/q_t": 0.4453392028808594,
"grad_norm": 63.08754348754883,
"learning_rate": 4.5626458262912735e-07,
"logits/chosen": -0.8775216937065125,
"logits/rejected": -0.8249009251594543,
"logps/chosen": -338.8774719238281,
"logps/ref_chosen": -311.5650634765625,
"logps/ref_rejected": -291.62432861328125,
"logps/rejected": -341.2890625,
"loss": 4.9797,
"margin_dpo/margin_mean": 22.352325439453125,
"margin_dpo/margin_std": 53.10045623779297,
"step": 131
},
{
"epoch": 0.2764397905759162,
"fcm_dpo/beta": 0.010651674121618271,
"fcm_dpo/delta": 0.0169695895165205,
"fcm_dpo/margin": 31.339889526367188,
"fcm_dpo/q_t": 0.42420607805252075,
"grad_norm": 66.45543670654297,
"learning_rate": 4.5522459192551166e-07,
"logits/chosen": -0.837442934513092,
"logits/rejected": -0.8209645748138428,
"logps/chosen": -293.2777099609375,
"logps/ref_chosen": -270.0818176269531,
"logps/ref_rejected": -284.3084411621094,
"logps/rejected": -338.8442687988281,
"loss": 4.6632,
"margin_dpo/margin_mean": 31.339889526367188,
"margin_dpo/margin_std": 54.162715911865234,
"step": 132
},
{
"epoch": 0.27853403141361255,
"fcm_dpo/beta": 0.010769927874207497,
"fcm_dpo/delta": 0.010881779715418816,
"fcm_dpo/margin": 28.75752830505371,
"fcm_dpo/q_t": 0.42886942625045776,
"grad_norm": 47.24089431762695,
"learning_rate": 4.541735956498554e-07,
"logits/chosen": -0.8648256063461304,
"logits/rejected": -0.8635565042495728,
"logps/chosen": -312.2882385253906,
"logps/ref_chosen": -285.6213684082031,
"logps/ref_rejected": -251.19386291503906,
"logps/rejected": -306.6182556152344,
"loss": 4.7005,
"margin_dpo/margin_mean": 28.757530212402344,
"margin_dpo/margin_std": 50.636070251464844,
"step": 133
},
{
"epoch": 0.2806282722513089,
"fcm_dpo/beta": 0.01087371539324522,
"fcm_dpo/delta": 0.0076601761393249035,
"fcm_dpo/margin": 21.52016830444336,
"fcm_dpo/q_t": 0.44513970613479614,
"grad_norm": 55.61819076538086,
"learning_rate": 4.5311165016389914e-07,
"logits/chosen": -0.8549675345420837,
"logits/rejected": -0.8537189364433289,
"logps/chosen": -358.1379699707031,
"logps/ref_chosen": -318.92083740234375,
"logps/ref_rejected": -293.1894836425781,
"logps/rejected": -353.92681884765625,
"loss": 4.9157,
"margin_dpo/margin_mean": 21.520170211791992,
"margin_dpo/margin_std": 45.02650451660156,
"step": 134
},
{
"epoch": 0.28272251308900526,
"fcm_dpo/beta": 0.010914881713688374,
"fcm_dpo/delta": 0.003592526540160179,
"fcm_dpo/margin": 25.296375274658203,
"fcm_dpo/q_t": 0.43536409735679626,
"grad_norm": 66.84896087646484,
"learning_rate": 4.520388124165564e-07,
"logits/chosen": -0.7576621174812317,
"logits/rejected": -0.7966057062149048,
"logps/chosen": -329.4492492675781,
"logps/ref_chosen": -292.8217468261719,
"logps/ref_rejected": -269.2896728515625,
"logps/rejected": -331.21356201171875,
"loss": 4.8392,
"margin_dpo/margin_mean": 25.296375274658203,
"margin_dpo/margin_std": 49.8359489440918,
"step": 135
},
{
"epoch": 0.2848167539267016,
"fcm_dpo/beta": 0.010963549837470055,
"fcm_dpo/delta": 0.007633054628968239,
"fcm_dpo/margin": 28.468870162963867,
"fcm_dpo/q_t": 0.4295894503593445,
"grad_norm": 77.23846435546875,
"learning_rate": 4.5095513994085974e-07,
"logits/chosen": -0.8170275688171387,
"logits/rejected": -0.8116145133972168,
"logps/chosen": -312.12353515625,
"logps/ref_chosen": -272.8525390625,
"logps/ref_rejected": -252.68202209472656,
"logps/rejected": -320.42193603515625,
"loss": 4.7672,
"margin_dpo/margin_mean": 28.468868255615234,
"margin_dpo/margin_std": 55.25246047973633,
"step": 136
},
{
"epoch": 0.2869109947643979,
"fcm_dpo/beta": 0.011102970689535141,
"fcm_dpo/delta": 0.01760217919945717,
"fcm_dpo/margin": 25.322219848632812,
"fcm_dpo/q_t": 0.43689489364624023,
"grad_norm": 71.75211334228516,
"learning_rate": 4.498606908508753e-07,
"logits/chosen": -0.8611711859703064,
"logits/rejected": -0.8475285172462463,
"logps/chosen": -344.4405517578125,
"logps/ref_chosen": -300.7522277832031,
"logps/ref_rejected": -286.1935119628906,
"logps/rejected": -355.20404052734375,
"loss": 4.8397,
"margin_dpo/margin_mean": 25.322223663330078,
"margin_dpo/margin_std": 52.45671081542969,
"step": 137
},
{
"epoch": 0.28900523560209423,
"fcm_dpo/beta": 0.011161497794091702,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 29.333019256591797,
"fcm_dpo/q_t": 0.4278327226638794,
"grad_norm": 65.46996307373047,
"learning_rate": 4.487555238385862e-07,
"logits/chosen": -0.7847152352333069,
"logits/rejected": -0.7637529969215393,
"logps/chosen": -330.0584716796875,
"logps/ref_chosen": -288.89056396484375,
"logps/ref_rejected": -263.1719055175781,
"logps/rejected": -333.67279052734375,
"loss": 4.8012,
"margin_dpo/margin_mean": 29.333017349243164,
"margin_dpo/margin_std": 61.842342376708984,
"step": 138
},
{
"epoch": 0.29109947643979056,
"fcm_dpo/beta": 0.011205606162548065,
"fcm_dpo/delta": 0.007879176177084446,
"fcm_dpo/margin": 20.918292999267578,
"fcm_dpo/q_t": 0.4472315013408661,
"grad_norm": 71.34123992919922,
"learning_rate": 4.476396981707453e-07,
"logits/chosen": -0.8044672012329102,
"logits/rejected": -0.8337982892990112,
"logps/chosen": -307.34002685546875,
"logps/ref_chosen": -270.0443115234375,
"logps/ref_rejected": -267.3226013183594,
"logps/rejected": -325.53662109375,
"loss": 5.0205,
"margin_dpo/margin_mean": 20.918291091918945,
"margin_dpo/margin_std": 52.10359573364258,
"step": 139
},
{
"epoch": 0.2931937172774869,
"fcm_dpo/beta": 0.011320183053612709,
"fcm_dpo/delta": 0.019457083195447922,
"fcm_dpo/margin": 31.68231201171875,
"fcm_dpo/q_t": 0.41780370473861694,
"grad_norm": 61.65796661376953,
"learning_rate": 4.4651327368569684e-07,
"logits/chosen": -0.8818329572677612,
"logits/rejected": -0.8469699621200562,
"logps/chosen": -317.78314208984375,
"logps/ref_chosen": -282.9555969238281,
"logps/ref_rejected": -251.17181396484375,
"logps/rejected": -317.681640625,
"loss": 4.5506,
"margin_dpo/margin_mean": 31.68231201171875,
"margin_dpo/margin_std": 48.60394287109375,
"step": 140
},
{
"epoch": 0.29528795811518327,
"fcm_dpo/beta": 0.011689888313412666,
"fcm_dpo/delta": 0.0463109090924263,
"fcm_dpo/margin": 32.73537826538086,
"fcm_dpo/q_t": 0.4133494198322296,
"grad_norm": 74.72412872314453,
"learning_rate": 4.453763107901675e-07,
"logits/chosen": -0.8145374059677124,
"logits/rejected": -0.8087294697761536,
"logps/chosen": -329.2580261230469,
"logps/ref_chosen": -296.3001708984375,
"logps/ref_rejected": -279.8486633300781,
"logps/rejected": -345.5418701171875,
"loss": 4.6074,
"margin_dpo/margin_mean": 32.73537826538086,
"margin_dpo/margin_std": 57.37196350097656,
"step": 141
},
{
"epoch": 0.2973821989528796,
"fcm_dpo/beta": 0.01217513345181942,
"fcm_dpo/delta": 0.061236005276441574,
"fcm_dpo/margin": 26.921472549438477,
"fcm_dpo/q_t": 0.42784056067466736,
"grad_norm": 71.6540756225586,
"learning_rate": 4.4422887045602674e-07,
"logits/chosen": -0.834761381149292,
"logits/rejected": -0.8356201648712158,
"logps/chosen": -333.8131103515625,
"logps/ref_chosen": -300.56585693359375,
"logps/ref_rejected": -231.43316650390625,
"logps/rejected": -291.6018981933594,
"loss": 4.8387,
"margin_dpo/margin_mean": 26.92147445678711,
"margin_dpo/margin_std": 57.958465576171875,
"step": 142
},
{
"epoch": 0.2994764397905759,
"fcm_dpo/beta": 0.012588088400661945,
"fcm_dpo/delta": 0.025997720658779144,
"fcm_dpo/margin": 32.42500305175781,
"fcm_dpo/q_t": 0.4098896384239197,
"grad_norm": 71.97573852539062,
"learning_rate": 4.4307101421701755e-07,
"logits/chosen": -0.8194795846939087,
"logits/rejected": -0.7966126799583435,
"logps/chosen": -329.8268737792969,
"logps/ref_chosen": -296.73236083984375,
"logps/ref_rejected": -266.45257568359375,
"logps/rejected": -331.97210693359375,
"loss": 4.4955,
"margin_dpo/margin_mean": 32.42500305175781,
"margin_dpo/margin_std": 51.72855758666992,
"step": 143
},
{
"epoch": 0.30157068062827225,
"fcm_dpo/beta": 0.012575407512485981,
"fcm_dpo/delta": -0.008530584163963795,
"fcm_dpo/margin": 26.383615493774414,
"fcm_dpo/q_t": 0.4281023442745209,
"grad_norm": 65.61817932128906,
"learning_rate": 4.419028041654559e-07,
"logits/chosen": -0.8793942332267761,
"logits/rejected": -0.8645679950714111,
"logps/chosen": -331.48211669921875,
"logps/ref_chosen": -298.843994140625,
"logps/ref_rejected": -266.120849609375,
"logps/rejected": -325.142578125,
"loss": 4.8261,
"margin_dpo/margin_mean": 26.38361358642578,
"margin_dpo/margin_std": 56.853328704833984,
"step": 144
},
{
"epoch": 0.3036649214659686,
"fcm_dpo/beta": 0.01254544872790575,
"fcm_dpo/delta": -0.01076475065201521,
"fcm_dpo/margin": 34.03130340576172,
"fcm_dpo/q_t": 0.4056648015975952,
"grad_norm": 67.97920989990234,
"learning_rate": 4.4072430294890166e-07,
"logits/chosen": -0.8655514717102051,
"logits/rejected": -0.867131769657135,
"logps/chosen": -303.7411193847656,
"logps/ref_chosen": -275.7528381347656,
"logps/ref_rejected": -214.74807739257812,
"logps/rejected": -276.76763916015625,
"loss": 4.4575,
"margin_dpo/margin_mean": 34.031307220458984,
"margin_dpo/margin_std": 52.632320404052734,
"step": 145
},
{
"epoch": 0.3057591623036649,
"fcm_dpo/beta": 0.012334452010691166,
"fcm_dpo/delta": -0.015375002287328243,
"fcm_dpo/margin": 33.436424255371094,
"fcm_dpo/q_t": 0.4091810882091522,
"grad_norm": 65.9619369506836,
"learning_rate": 4.395355737667985e-07,
"logits/chosen": -0.8543013334274292,
"logits/rejected": -0.8488306999206543,
"logps/chosen": -313.3991394042969,
"logps/ref_chosen": -277.09820556640625,
"logps/ref_rejected": -265.41046142578125,
"logps/rejected": -335.14776611328125,
"loss": 4.5041,
"margin_dpo/margin_mean": 33.436424255371094,
"margin_dpo/margin_std": 52.72343826293945,
"step": 146
},
{
"epoch": 0.3078534031413613,
"fcm_dpo/beta": 0.012323684990406036,
"fcm_dpo/delta": -0.014296751469373703,
"fcm_dpo/margin": 25.813636779785156,
"fcm_dpo/q_t": 0.42911848425865173,
"grad_norm": 70.41072845458984,
"learning_rate": 4.3833668036708483e-07,
"logits/chosen": -0.845245897769928,
"logits/rejected": -0.8413334488868713,
"logps/chosen": -329.39447021484375,
"logps/ref_chosen": -291.4185791015625,
"logps/ref_rejected": -253.43051147460938,
"logps/rejected": -317.2200927734375,
"loss": 4.8645,
"margin_dpo/margin_mean": 25.813636779785156,
"margin_dpo/margin_std": 54.92702865600586,
"step": 147
},
{
"epoch": 0.3099476439790576,
"fcm_dpo/beta": 0.012206897139549255,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 28.223648071289062,
"fcm_dpo/q_t": 0.4271428883075714,
"grad_norm": 66.38739776611328,
"learning_rate": 4.3712768704277524e-07,
"logits/chosen": -0.8933055400848389,
"logits/rejected": -0.8955690860748291,
"logps/chosen": -270.71759033203125,
"logps/ref_chosen": -236.74850463867188,
"logps/ref_rejected": -231.4674072265625,
"logps/rejected": -293.66009521484375,
"loss": 4.8075,
"margin_dpo/margin_mean": 28.223648071289062,
"margin_dpo/margin_std": 61.39948654174805,
"step": 148
},
{
"epoch": 0.31204188481675393,
"fcm_dpo/beta": 0.012314035557210445,
"fcm_dpo/delta": 0.01721823401749134,
"fcm_dpo/margin": 36.42420196533203,
"fcm_dpo/q_t": 0.4006516933441162,
"grad_norm": 77.8093032836914,
"learning_rate": 4.3590865862851263e-07,
"logits/chosen": -0.8710360527038574,
"logits/rejected": -0.8562425374984741,
"logps/chosen": -360.1137390136719,
"logps/ref_chosen": -319.9284973144531,
"logps/ref_rejected": -308.20233154296875,
"logps/rejected": -384.81182861328125,
"loss": 4.3377,
"margin_dpo/margin_mean": 36.4242057800293,
"margin_dpo/margin_std": 52.034080505371094,
"step": 149
},
{
"epoch": 0.31413612565445026,
"fcm_dpo/beta": 0.012715589255094528,
"fcm_dpo/delta": 0.053375184535980225,
"fcm_dpo/margin": 30.288711547851562,
"fcm_dpo/q_t": 0.4150834083557129,
"grad_norm": 72.03448486328125,
"learning_rate": 4.346796604970912e-07,
"logits/chosen": -0.8501912951469421,
"logits/rejected": -0.8354445099830627,
"logps/chosen": -321.476806640625,
"logps/ref_chosen": -276.3182373046875,
"logps/ref_rejected": -273.02215576171875,
"logps/rejected": -348.46942138671875,
"loss": 4.6202,
"margin_dpo/margin_mean": 30.288713455200195,
"margin_dpo/margin_std": 54.6740837097168,
"step": 150
},
{
"epoch": 0.3162303664921466,
"fcm_dpo/beta": 0.012854784727096558,
"fcm_dpo/delta": -0.019951222464442253,
"fcm_dpo/margin": 44.77876281738281,
"fcm_dpo/q_t": 0.37585607171058655,
"grad_norm": 79.77096557617188,
"learning_rate": 4.3344075855595097e-07,
"logits/chosen": -0.871157169342041,
"logits/rejected": -0.8656657338142395,
"logps/chosen": -339.890869140625,
"logps/ref_chosen": -297.31280517578125,
"logps/ref_rejected": -266.1003723144531,
"logps/rejected": -353.45721435546875,
"loss": 4.0884,
"margin_dpo/margin_mean": 44.77876663208008,
"margin_dpo/margin_std": 57.61820983886719,
"step": 151
},
{
"epoch": 0.3183246073298429,
"fcm_dpo/beta": 0.012488273903727531,
"fcm_dpo/delta": -0.028484076261520386,
"fcm_dpo/margin": 37.6032829284668,
"fcm_dpo/q_t": 0.4018944799900055,
"grad_norm": 67.45524597167969,
"learning_rate": 4.3219201924364323e-07,
"logits/chosen": -0.8691319823265076,
"logits/rejected": -0.8660374879837036,
"logps/chosen": -307.91668701171875,
"logps/ref_chosen": -270.2470397949219,
"logps/ref_rejected": -269.7749328613281,
"logps/rejected": -345.04791259765625,
"loss": 4.4857,
"margin_dpo/margin_mean": 37.60328674316406,
"margin_dpo/margin_std": 61.99601745605469,
"step": 152
},
{
"epoch": 0.3204188481675393,
"fcm_dpo/beta": 0.01196226291358471,
"fcm_dpo/delta": -0.05861516296863556,
"fcm_dpo/margin": 52.48561096191406,
"fcm_dpo/q_t": 0.3647117614746094,
"grad_norm": 73.9798355102539,
"learning_rate": 4.309335095262675e-07,
"logits/chosen": -0.8589173555374146,
"logits/rejected": -0.8468472957611084,
"logps/chosen": -319.50592041015625,
"logps/ref_chosen": -273.779052734375,
"logps/ref_rejected": -280.9530944824219,
"logps/rejected": -379.16558837890625,
"loss": 3.8701,
"margin_dpo/margin_mean": 52.48561096191406,
"margin_dpo/margin_std": 58.62868881225586,
"step": 153
},
{
"epoch": 0.3225130890052356,
"fcm_dpo/beta": 0.011798365972936153,
"fcm_dpo/delta": -0.018483448773622513,
"fcm_dpo/margin": 41.51868438720703,
"fcm_dpo/q_t": 0.3961712121963501,
"grad_norm": 70.80081176757812,
"learning_rate": 4.2966529689388064e-07,
"logits/chosen": -0.9012446403503418,
"logits/rejected": -0.8823704719543457,
"logps/chosen": -336.96453857421875,
"logps/ref_chosen": -289.9031982421875,
"logps/ref_rejected": -261.5166320800781,
"logps/rejected": -350.0966491699219,
"loss": 4.4483,
"margin_dpo/margin_mean": 41.518680572509766,
"margin_dpo/margin_std": 68.92434692382812,
"step": 154
},
{
"epoch": 0.32460732984293195,
"fcm_dpo/beta": 0.01188894733786583,
"fcm_dpo/delta": 0.020381543785333633,
"fcm_dpo/margin": 35.36058044433594,
"fcm_dpo/q_t": 0.41230636835098267,
"grad_norm": 93.07943725585938,
"learning_rate": 4.2838744935687716e-07,
"logits/chosen": -0.8364645838737488,
"logits/rejected": -0.8337617516517639,
"logps/chosen": -339.5706481933594,
"logps/ref_chosen": -285.8612060546875,
"logps/ref_rejected": -300.1272888183594,
"logps/rejected": -389.19732666015625,
"loss": 4.5817,
"margin_dpo/margin_mean": 35.36058044433594,
"margin_dpo/margin_std": 63.83776092529297,
"step": 155
},
{
"epoch": 0.3267015706806283,
"fcm_dpo/beta": 0.011719970963895321,
"fcm_dpo/delta": -0.07219862937927246,
"fcm_dpo/margin": 50.530967712402344,
"fcm_dpo/q_t": 0.375670850276947,
"grad_norm": 78.01712036132812,
"learning_rate": 4.271000354423425e-07,
"logits/chosen": -0.8607072830200195,
"logits/rejected": -0.8579990863800049,
"logps/chosen": -327.7187805175781,
"logps/ref_chosen": -279.0354919433594,
"logps/ref_rejected": -244.2198486328125,
"logps/rejected": -343.43408203125,
"loss": 4.2327,
"margin_dpo/margin_mean": 50.530967712402344,
"margin_dpo/margin_std": 74.52775573730469,
"step": 156
},
{
"epoch": 0.3287958115183246,
"fcm_dpo/beta": 0.010926080867648125,
"fcm_dpo/delta": -0.044517070055007935,
"fcm_dpo/margin": 38.28815460205078,
"fcm_dpo/q_t": 0.40745672583580017,
"grad_norm": 64.37937927246094,
"learning_rate": 4.258031241903777e-07,
"logits/chosen": -0.9258842468261719,
"logits/rejected": -0.9248091578483582,
"logps/chosen": -324.15484619140625,
"logps/ref_chosen": -270.830322265625,
"logps/ref_rejected": -259.08319091796875,
"logps/rejected": -350.69586181640625,
"loss": 4.5384,
"margin_dpo/margin_mean": 38.28815460205078,
"margin_dpo/margin_std": 64.0045166015625,
"step": 157
},
{
"epoch": 0.3308900523560209,
"fcm_dpo/beta": 0.010922886431217194,
"fcm_dpo/delta": 0.0029906341806054115,
"fcm_dpo/margin": 44.55851364135742,
"fcm_dpo/q_t": 0.39252322912216187,
"grad_norm": 67.0842056274414,
"learning_rate": 4.2449678515039743e-07,
"logits/chosen": -0.8688482642173767,
"logits/rejected": -0.8544604778289795,
"logps/chosen": -343.98077392578125,
"logps/ref_chosen": -290.381103515625,
"logps/ref_rejected": -271.95166015625,
"logps/rejected": -370.1098937988281,
"loss": 4.3355,
"margin_dpo/margin_mean": 44.558509826660156,
"margin_dpo/margin_std": 64.2014389038086,
"step": 158
},
{
"epoch": 0.33298429319371725,
"fcm_dpo/beta": 0.010863966308534145,
"fcm_dpo/delta": 0.010356229729950428,
"fcm_dpo/margin": 28.11869239807129,
"fcm_dpo/q_t": 0.4351333975791931,
"grad_norm": 98.55948638916016,
"learning_rate": 4.2318108837739986e-07,
"logits/chosen": -0.9458408355712891,
"logits/rejected": -0.8985559344291687,
"logps/chosen": -377.7143859863281,
"logps/ref_chosen": -321.37835693359375,
"logps/ref_rejected": -250.45652770996094,
"logps/rejected": -334.9112854003906,
"loss": 4.9886,
"margin_dpo/margin_mean": 28.118694305419922,
"margin_dpo/margin_std": 68.85266876220703,
"step": 159
},
{
"epoch": 0.33507853403141363,
"fcm_dpo/beta": 0.010856934823095798,
"fcm_dpo/delta": -0.0305030420422554,
"fcm_dpo/margin": 48.76349639892578,
"fcm_dpo/q_t": 0.38204342126846313,
"grad_norm": 76.18362426757812,
"learning_rate": 4.218561044282098e-07,
"logits/chosen": -0.8730629682540894,
"logits/rejected": -0.8760570287704468,
"logps/chosen": -323.17864990234375,
"logps/ref_chosen": -276.28350830078125,
"logps/ref_rejected": -262.7477722167969,
"logps/rejected": -358.40643310546875,
"loss": 4.0662,
"margin_dpo/margin_mean": 48.76349639892578,
"margin_dpo/margin_std": 56.23298645019531,
"step": 160
},
{
"epoch": 0.33717277486910996,
"fcm_dpo/beta": 0.010607090778648853,
"fcm_dpo/delta": -0.010924622416496277,
"fcm_dpo/margin": 43.944374084472656,
"fcm_dpo/q_t": 0.4002479612827301,
"grad_norm": 78.79258728027344,
"learning_rate": 4.2052190435769554e-07,
"logits/chosen": -0.8944586515426636,
"logits/rejected": -0.8787456750869751,
"logps/chosen": -366.509521484375,
"logps/ref_chosen": -310.4927978515625,
"logps/ref_rejected": -250.25347900390625,
"logps/rejected": -350.2145690917969,
"loss": 4.4434,
"margin_dpo/margin_mean": 43.944374084472656,
"margin_dpo/margin_std": 71.29467010498047,
"step": 161
},
{
"epoch": 0.3392670157068063,
"fcm_dpo/beta": 0.010460903868079185,
"fcm_dpo/delta": 0.0181996151804924,
"fcm_dpo/margin": 39.56256866455078,
"fcm_dpo/q_t": 0.4082155227661133,
"grad_norm": 61.922889709472656,
"learning_rate": 4.1917855971495763e-07,
"logits/chosen": -0.8656594157218933,
"logits/rejected": -0.8580671548843384,
"logps/chosen": -347.6031494140625,
"logps/ref_chosen": -296.1105041503906,
"logps/ref_rejected": -253.4247589111328,
"logps/rejected": -344.4800109863281,
"loss": 4.5132,
"margin_dpo/margin_mean": 39.562564849853516,
"margin_dpo/margin_std": 63.643367767333984,
"step": 162
},
{
"epoch": 0.3413612565445026,
"fcm_dpo/beta": 0.01069792453199625,
"fcm_dpo/delta": -0.004520459100604057,
"fcm_dpo/margin": 44.61610412597656,
"fcm_dpo/q_t": 0.3934933543205261,
"grad_norm": 85.54950714111328,
"learning_rate": 4.1782614253949255e-07,
"logits/chosen": -0.9105485677719116,
"logits/rejected": -0.9078636765480042,
"logps/chosen": -348.55645751953125,
"logps/ref_chosen": -293.5898132324219,
"logps/ref_rejected": -266.951904296875,
"logps/rejected": -366.53466796875,
"loss": 4.2956,
"margin_dpo/margin_mean": 44.61610412597656,
"margin_dpo/margin_std": 61.28437042236328,
"step": 163
},
{
"epoch": 0.34345549738219894,
"fcm_dpo/beta": 0.010653373785316944,
"fcm_dpo/delta": -0.019431831315159798,
"fcm_dpo/margin": 40.82433319091797,
"fcm_dpo/q_t": 0.4064997732639313,
"grad_norm": 79.59967803955078,
"learning_rate": 4.164647253573289e-07,
"logits/chosen": -0.8544159531593323,
"logits/rejected": -0.8663524389266968,
"logps/chosen": -331.69659423828125,
"logps/ref_chosen": -267.04949951171875,
"logps/ref_rejected": -215.9768829345703,
"logps/rejected": -321.44830322265625,
"loss": 4.5291,
"margin_dpo/margin_mean": 40.82433319091797,
"margin_dpo/margin_std": 69.7838134765625,
"step": 164
},
{
"epoch": 0.34554973821989526,
"fcm_dpo/beta": 0.01054457575082779,
"fcm_dpo/delta": 0.002462883247062564,
"fcm_dpo/margin": 29.31476593017578,
"fcm_dpo/q_t": 0.4311188757419586,
"grad_norm": 81.46530151367188,
"learning_rate": 4.1509438117713863e-07,
"logits/chosen": -0.8851940035820007,
"logits/rejected": -0.8585687875747681,
"logps/chosen": -329.74737548828125,
"logps/ref_chosen": -278.06146240234375,
"logps/ref_rejected": -260.4288635253906,
"logps/rejected": -341.4295349121094,
"loss": 4.7755,
"margin_dpo/margin_mean": 29.31476593017578,
"margin_dpo/margin_std": 56.65957260131836,
"step": 165
},
{
"epoch": 0.34764397905759165,
"fcm_dpo/beta": 0.010434024967253208,
"fcm_dpo/delta": -0.013516011647880077,
"fcm_dpo/margin": 36.80669403076172,
"fcm_dpo/q_t": 0.4170481562614441,
"grad_norm": 88.08253479003906,
"learning_rate": 4.137151834863213e-07,
"logits/chosen": -0.8408401012420654,
"logits/rejected": -0.8059453964233398,
"logps/chosen": -321.4761962890625,
"logps/ref_chosen": -275.6466369628906,
"logps/ref_rejected": -232.37017822265625,
"logps/rejected": -315.0064697265625,
"loss": 4.621,
"margin_dpo/margin_mean": 36.80669403076172,
"margin_dpo/margin_std": 67.22309875488281,
"step": 166
},
{
"epoch": 0.34973821989528797,
"fcm_dpo/beta": 0.010469815693795681,
"fcm_dpo/delta": -0.009936400689184666,
"fcm_dpo/margin": 41.52671813964844,
"fcm_dpo/q_t": 0.40366828441619873,
"grad_norm": 92.14634704589844,
"learning_rate": 4.123272062470633e-07,
"logits/chosen": -0.8315152525901794,
"logits/rejected": -0.8175035715103149,
"logps/chosen": -333.81878662109375,
"logps/ref_chosen": -280.5514221191406,
"logps/ref_rejected": -255.2896728515625,
"logps/rejected": -350.083740234375,
"loss": 4.5055,
"margin_dpo/margin_mean": 41.52671813964844,
"margin_dpo/margin_std": 69.44818115234375,
"step": 167
},
{
"epoch": 0.3518324607329843,
"fcm_dpo/beta": 0.01042957603931427,
"fcm_dpo/delta": 0.012083848938345909,
"fcm_dpo/margin": 46.28468322753906,
"fcm_dpo/q_t": 0.39214834570884705,
"grad_norm": 171.32440185546875,
"learning_rate": 4.1093052389237174e-07,
"logits/chosen": -0.8135068416595459,
"logits/rejected": -0.7858311533927917,
"logps/chosen": -371.6254577636719,
"logps/ref_chosen": -315.7982177734375,
"logps/ref_rejected": -291.48406982421875,
"logps/rejected": -393.59600830078125,
"loss": 4.3379,
"margin_dpo/margin_mean": 46.28468322753906,
"margin_dpo/margin_std": 68.01168823242188,
"step": 168
},
{
"epoch": 0.3539267015706806,
"fcm_dpo/beta": 0.010296836495399475,
"fcm_dpo/delta": -0.0430966354906559,
"fcm_dpo/margin": 54.97410202026367,
"fcm_dpo/q_t": 0.3766815960407257,
"grad_norm": 106.70867919921875,
"learning_rate": 4.0952521132208267e-07,
"logits/chosen": -0.811809778213501,
"logits/rejected": -0.8225773572921753,
"logps/chosen": -314.03985595703125,
"logps/ref_chosen": -261.06427001953125,
"logps/ref_rejected": -235.40663146972656,
"logps/rejected": -343.35638427734375,
"loss": 4.0653,
"margin_dpo/margin_mean": 54.97410202026367,
"margin_dpo/margin_std": 67.30137634277344,
"step": 169
},
{
"epoch": 0.35602094240837695,
"fcm_dpo/beta": 0.010233273729681969,
"fcm_dpo/delta": 0.03896753489971161,
"fcm_dpo/margin": 45.84815216064453,
"fcm_dpo/q_t": 0.4011499881744385,
"grad_norm": 97.2695083618164,
"learning_rate": 4.081113438988443e-07,
"logits/chosen": -0.7859560251235962,
"logits/rejected": -0.7877269983291626,
"logps/chosen": -361.7164001464844,
"logps/ref_chosen": -308.96722412109375,
"logps/ref_rejected": -263.8466796875,
"logps/rejected": -362.4439697265625,
"loss": 4.4936,
"margin_dpo/margin_mean": 45.8481559753418,
"margin_dpo/margin_std": 80.94108581542969,
"step": 170
},
{
"epoch": 0.3581151832460733,
"fcm_dpo/beta": 0.010340461507439613,
"fcm_dpo/delta": -0.0003459630534052849,
"fcm_dpo/margin": 52.07539367675781,
"fcm_dpo/q_t": 0.3825134336948395,
"grad_norm": 103.40943908691406,
"learning_rate": 4.0668899744407567e-07,
"logits/chosen": -0.8086336255073547,
"logits/rejected": -0.8238397240638733,
"logps/chosen": -304.28411865234375,
"logps/ref_chosen": -258.8890380859375,
"logps/ref_rejected": -262.19140625,
"logps/rejected": -359.661865234375,
"loss": 4.1402,
"margin_dpo/margin_mean": 52.07539367675781,
"margin_dpo/margin_std": 66.66648864746094,
"step": 171
},
{
"epoch": 0.36020942408376966,
"fcm_dpo/beta": 0.010584648698568344,
"fcm_dpo/delta": 0.031594403088092804,
"fcm_dpo/margin": 30.508852005004883,
"fcm_dpo/q_t": 0.4273463487625122,
"grad_norm": 99.63431549072266,
"learning_rate": 4.0525824823390043e-07,
"logits/chosen": -0.823786199092865,
"logits/rejected": -0.8405004739761353,
"logps/chosen": -386.2387390136719,
"logps/ref_chosen": -339.0223388671875,
"logps/ref_rejected": -295.78759765625,
"logps/rejected": -373.5128479003906,
"loss": 4.8523,
"margin_dpo/margin_mean": 30.508852005004883,
"margin_dpo/margin_std": 67.79949951171875,
"step": 172
},
{
"epoch": 0.362303664921466,
"fcm_dpo/beta": 0.010532171465456486,
"fcm_dpo/delta": -0.022472519427537918,
"fcm_dpo/margin": 38.833194732666016,
"fcm_dpo/q_t": 0.41293755173683167,
"grad_norm": 131.93397521972656,
"learning_rate": 4.0381917299505686e-07,
"logits/chosen": -0.8346858024597168,
"logits/rejected": -0.8341192603111267,
"logps/chosen": -344.9473876953125,
"logps/ref_chosen": -300.1114501953125,
"logps/ref_rejected": -273.78460693359375,
"logps/rejected": -357.4537353515625,
"loss": 4.5961,
"margin_dpo/margin_mean": 38.833194732666016,
"margin_dpo/margin_std": 68.92523956298828,
"step": 173
},
{
"epoch": 0.3643979057591623,
"fcm_dpo/beta": 0.010570104233920574,
"fcm_dpo/delta": 0.02033688873052597,
"fcm_dpo/margin": 47.59809494018555,
"fcm_dpo/q_t": 0.38671064376831055,
"grad_norm": 136.58636474609375,
"learning_rate": 4.0237184890078243e-07,
"logits/chosen": -0.8063141107559204,
"logits/rejected": -0.7906365394592285,
"logps/chosen": -379.1315002441406,
"logps/ref_chosen": -335.0538635253906,
"logps/ref_rejected": -257.4646911621094,
"logps/rejected": -349.1404113769531,
"loss": 4.1945,
"margin_dpo/margin_mean": 47.59809494018555,
"margin_dpo/margin_std": 62.02534484863281,
"step": 174
},
{
"epoch": 0.36649214659685864,
"fcm_dpo/beta": 0.010354937054216862,
"fcm_dpo/delta": -0.06971834599971771,
"fcm_dpo/margin": 44.109615325927734,
"fcm_dpo/q_t": 0.4042738974094391,
"grad_norm": 81.95703887939453,
"learning_rate": 4.00916353566676e-07,
"logits/chosen": -0.8228567242622375,
"logits/rejected": -0.8251509070396423,
"logps/chosen": -340.4371337890625,
"logps/ref_chosen": -284.39556884765625,
"logps/ref_rejected": -283.3876647949219,
"logps/rejected": -383.538818359375,
"loss": 4.5043,
"margin_dpo/margin_mean": 44.109615325927734,
"margin_dpo/margin_std": 72.40750122070312,
"step": 175
},
{
"epoch": 0.36858638743455496,
"fcm_dpo/beta": 0.01005008164793253,
"fcm_dpo/delta": -0.004386642947793007,
"fcm_dpo/margin": 34.69060134887695,
"fcm_dpo/q_t": 0.4250302314758301,
"grad_norm": 107.35453796386719,
"learning_rate": 3.994527650465352e-07,
"logits/chosen": -0.7942756414413452,
"logits/rejected": -0.8046650886535645,
"logps/chosen": -307.6514892578125,
"logps/ref_chosen": -251.81280517578125,
"logps/ref_rejected": -242.05328369140625,
"logps/rejected": -332.58258056640625,
"loss": 4.8619,
"margin_dpo/margin_mean": 34.69059753417969,
"margin_dpo/margin_std": 77.51087951660156,
"step": 176
},
{
"epoch": 0.3706806282722513,
"fcm_dpo/beta": 0.010259328410029411,
"fcm_dpo/delta": 0.03945356607437134,
"fcm_dpo/margin": 35.12948226928711,
"fcm_dpo/q_t": 0.4213239550590515,
"grad_norm": 85.04834747314453,
"learning_rate": 3.979811618281705e-07,
"logits/chosen": -0.8936614394187927,
"logits/rejected": -0.8652552366256714,
"logps/chosen": -361.48468017578125,
"logps/ref_chosen": -298.6463928222656,
"logps/ref_rejected": -295.66534423828125,
"logps/rejected": -393.63311767578125,
"loss": 4.8502,
"margin_dpo/margin_mean": 35.129478454589844,
"margin_dpo/margin_std": 74.60753631591797,
"step": 177
},
{
"epoch": 0.37277486910994767,
"fcm_dpo/beta": 0.010234692133963108,
"fcm_dpo/delta": -0.013511069118976593,
"fcm_dpo/margin": 51.200801849365234,
"fcm_dpo/q_t": 0.38916733860969543,
"grad_norm": 118.27215576171875,
"learning_rate": 3.9650162282919654e-07,
"logits/chosen": -0.8079323768615723,
"logits/rejected": -0.8033620119094849,
"logps/chosen": -339.77313232421875,
"logps/ref_chosen": -286.2576599121094,
"logps/ref_rejected": -243.97491455078125,
"logps/rejected": -348.6911926269531,
"loss": 4.2924,
"margin_dpo/margin_mean": 51.200801849365234,
"margin_dpo/margin_std": 77.08502960205078,
"step": 178
},
{
"epoch": 0.374869109947644,
"fcm_dpo/beta": 0.010111997835338116,
"fcm_dpo/delta": -0.024984102696180344,
"fcm_dpo/margin": 42.48678970336914,
"fcm_dpo/q_t": 0.4064781665802002,
"grad_norm": 117.5345458984375,
"learning_rate": 3.9501422739279953e-07,
"logits/chosen": -0.7860456109046936,
"logits/rejected": -0.7883044481277466,
"logps/chosen": -317.0030212402344,
"logps/ref_chosen": -259.737060546875,
"logps/ref_rejected": -277.8813171386719,
"logps/rejected": -377.634033203125,
"loss": 4.5748,
"margin_dpo/margin_mean": 42.486785888671875,
"margin_dpo/margin_std": 74.06698608398438,
"step": 179
},
{
"epoch": 0.3769633507853403,
"fcm_dpo/beta": 0.010198577307164669,
"fcm_dpo/delta": 0.030198615044355392,
"fcm_dpo/margin": 49.38009262084961,
"fcm_dpo/q_t": 0.3938713073730469,
"grad_norm": 118.17071533203125,
"learning_rate": 3.935190552834828e-07,
"logits/chosen": -0.8322474956512451,
"logits/rejected": -0.8610942959785461,
"logps/chosen": -325.9746398925781,
"logps/ref_chosen": -267.30889892578125,
"logps/ref_rejected": -230.4376983642578,
"logps/rejected": -338.4835205078125,
"loss": 4.3078,
"margin_dpo/margin_mean": 49.380088806152344,
"margin_dpo/margin_std": 73.99922180175781,
"step": 180
},
{
"epoch": 0.37905759162303665,
"fcm_dpo/beta": 0.010158884339034557,
"fcm_dpo/delta": -0.006093205884099007,
"fcm_dpo/margin": 40.51472854614258,
"fcm_dpo/q_t": 0.4114302098751068,
"grad_norm": 144.84808349609375,
"learning_rate": 3.920161866827889e-07,
"logits/chosen": -0.8205707669258118,
"logits/rejected": -0.8252695202827454,
"logps/chosen": -367.8943176269531,
"logps/ref_chosen": -300.49139404296875,
"logps/ref_rejected": -278.98284912109375,
"logps/rejected": -386.9005126953125,
"loss": 4.6462,
"margin_dpo/margin_mean": 40.51472854614258,
"margin_dpo/margin_std": 76.76387786865234,
"step": 181
},
{
"epoch": 0.381151832460733,
"fcm_dpo/beta": 0.01002179179340601,
"fcm_dpo/delta": -0.06329777836799622,
"fcm_dpo/margin": 57.60490798950195,
"fcm_dpo/q_t": 0.3754042387008667,
"grad_norm": 79.3833999633789,
"learning_rate": 3.90505702185e-07,
"logits/chosen": -0.8123592138290405,
"logits/rejected": -0.8395570516586304,
"logps/chosen": -344.8331298828125,
"logps/ref_chosen": -279.4981689453125,
"logps/ref_rejected": -263.6926574707031,
"logps/rejected": -386.632568359375,
"loss": 4.0947,
"margin_dpo/margin_mean": 57.60490798950195,
"margin_dpo/margin_std": 74.24735260009766,
"step": 182
},
{
"epoch": 0.3832460732984293,
"fcm_dpo/beta": 0.009926295839250088,
"fcm_dpo/delta": 0.04236668348312378,
"fcm_dpo/margin": 51.719627380371094,
"fcm_dpo/q_t": 0.3916701674461365,
"grad_norm": 69.92806243896484,
"learning_rate": 3.889876827928156e-07,
"logits/chosen": -0.8526961803436279,
"logits/rejected": -0.8551933169364929,
"logps/chosen": -336.1162414550781,
"logps/ref_chosen": -270.8456726074219,
"logps/ref_rejected": -244.1910400390625,
"logps/rejected": -361.18121337890625,
"loss": 4.3359,
"margin_dpo/margin_mean": 51.719627380371094,
"margin_dpo/margin_std": 80.76448822021484,
"step": 183
},
{
"epoch": 0.38534031413612563,
"fcm_dpo/beta": 0.009438715875148773,
"fcm_dpo/delta": -0.10046012699604034,
"fcm_dpo/margin": 69.27982330322266,
"fcm_dpo/q_t": 0.3597652316093445,
"grad_norm": 82.657958984375,
"learning_rate": 3.874622099130087e-07,
"logits/chosen": -0.8982101082801819,
"logits/rejected": -0.8769604563713074,
"logps/chosen": -382.0798645019531,
"logps/ref_chosen": -318.4457702636719,
"logps/ref_rejected": -266.640869140625,
"logps/rejected": -399.55474853515625,
"loss": 3.9421,
"margin_dpo/margin_mean": 69.27982330322266,
"margin_dpo/margin_std": 85.03633880615234,
"step": 184
},
{
"epoch": 0.387434554973822,
"fcm_dpo/beta": 0.009186076931655407,
"fcm_dpo/delta": -0.023270942270755768,
"fcm_dpo/margin": 48.90029525756836,
"fcm_dpo/q_t": 0.40330770611763,
"grad_norm": 90.22915649414062,
"learning_rate": 3.859293653520604e-07,
"logits/chosen": -0.8841208219528198,
"logits/rejected": -0.8796699643135071,
"logps/chosen": -349.35546875,
"logps/ref_chosen": -274.308837890625,
"logps/ref_rejected": -260.7274169921875,
"logps/rejected": -384.67437744140625,
"loss": 4.5001,
"margin_dpo/margin_mean": 48.900299072265625,
"margin_dpo/margin_std": 82.05442810058594,
"step": 185
},
{
"epoch": 0.38952879581151834,
"fcm_dpo/beta": 0.009153323248028755,
"fcm_dpo/delta": -0.0018929075449705124,
"fcm_dpo/margin": 46.96990203857422,
"fcm_dpo/q_t": 0.4052902162075043,
"grad_norm": 95.6862564086914,
"learning_rate": 3.8438923131177237e-07,
"logits/chosen": -0.8866674304008484,
"logits/rejected": -0.8885794878005981,
"logps/chosen": -373.1971130371094,
"logps/ref_chosen": -299.00537109375,
"logps/ref_rejected": -274.4014587402344,
"logps/rejected": -395.5630798339844,
"loss": 4.4928,
"margin_dpo/margin_mean": 46.96990203857422,
"margin_dpo/margin_std": 75.92547607421875,
"step": 186
},
{
"epoch": 0.39162303664921466,
"fcm_dpo/beta": 0.009052860550582409,
"fcm_dpo/delta": -0.005804085172712803,
"fcm_dpo/margin": 43.057518005371094,
"fcm_dpo/q_t": 0.4116409420967102,
"grad_norm": 123.71559143066406,
"learning_rate": 3.828418903848593e-07,
"logits/chosen": -0.8360170722007751,
"logits/rejected": -0.8213926553726196,
"logps/chosen": -412.3048095703125,
"logps/ref_chosen": -329.8253173828125,
"logps/ref_rejected": -263.73175048828125,
"logps/rejected": -389.26885986328125,
"loss": 4.7538,
"margin_dpo/margin_mean": 43.057518005371094,
"margin_dpo/margin_std": 86.98685455322266,
"step": 187
},
{
"epoch": 0.393717277486911,
"fcm_dpo/beta": 0.009060696698725224,
"fcm_dpo/delta": -0.0018207728862762451,
"fcm_dpo/margin": 48.631187438964844,
"fcm_dpo/q_t": 0.4070327579975128,
"grad_norm": 76.27377319335938,
"learning_rate": 3.812874255505191e-07,
"logits/chosen": -0.8485522270202637,
"logits/rejected": -0.8396183848381042,
"logps/chosen": -338.3583679199219,
"logps/ref_chosen": -263.005615234375,
"logps/ref_rejected": -247.08668518066406,
"logps/rejected": -371.07061767578125,
"loss": 4.6237,
"margin_dpo/margin_mean": 48.63118362426758,
"margin_dpo/margin_std": 90.66398620605469,
"step": 188
},
{
"epoch": 0.3958115183246073,
"fcm_dpo/beta": 0.008920717053115368,
"fcm_dpo/delta": -0.02324105054140091,
"fcm_dpo/margin": 59.242305755615234,
"fcm_dpo/q_t": 0.3857063353061676,
"grad_norm": 112.7400894165039,
"learning_rate": 3.797259201699833e-07,
"logits/chosen": -0.876733660697937,
"logits/rejected": -0.8841363787651062,
"logps/chosen": -335.7334289550781,
"logps/ref_chosen": -272.96038818359375,
"logps/ref_rejected": -275.13238525390625,
"logps/rejected": -397.14776611328125,
"loss": 4.1634,
"margin_dpo/margin_mean": 59.24230194091797,
"margin_dpo/margin_std": 76.81705474853516,
"step": 189
},
{
"epoch": 0.39790575916230364,
"fcm_dpo/beta": 0.0087926359847188,
"fcm_dpo/delta": -0.005176635459065437,
"fcm_dpo/margin": 52.0158805847168,
"fcm_dpo/q_t": 0.3995116353034973,
"grad_norm": 87.53999328613281,
"learning_rate": 3.781574579820464e-07,
"logits/chosen": -0.8722460865974426,
"logits/rejected": -0.8310267329216003,
"logps/chosen": -322.8664855957031,
"logps/ref_chosen": -257.79754638671875,
"logps/ref_rejected": -225.2164306640625,
"logps/rejected": -342.3012390136719,
"loss": 4.3655,
"margin_dpo/margin_mean": 52.01588821411133,
"margin_dpo/margin_std": 76.1653823852539,
"step": 190
},
{
"epoch": 0.4,
"fcm_dpo/beta": 0.008890870027244091,
"fcm_dpo/delta": 0.013219781219959259,
"fcm_dpo/margin": 52.1411247253418,
"fcm_dpo/q_t": 0.3991526961326599,
"grad_norm": 74.29290008544922,
"learning_rate": 3.765821230985757e-07,
"logits/chosen": -0.9063390493392944,
"logits/rejected": -0.9074532985687256,
"logps/chosen": -303.6692199707031,
"logps/ref_chosen": -243.8585205078125,
"logps/ref_rejected": -245.12136840820312,
"logps/rejected": -357.0732116699219,
"loss": 4.4241,
"margin_dpo/margin_mean": 52.1411247253418,
"margin_dpo/margin_std": 83.34513092041016,
"step": 191
},
{
"epoch": 0.40209424083769635,
"fcm_dpo/beta": 0.009091068990528584,
"fcm_dpo/delta": 0.029404528439044952,
"fcm_dpo/margin": 40.7011833190918,
"fcm_dpo/q_t": 0.4178800880908966,
"grad_norm": 74.3216552734375,
"learning_rate": 3.75e-07,
"logits/chosen": -0.8373446464538574,
"logits/rejected": -0.8207670450210571,
"logps/chosen": -337.1842346191406,
"logps/ref_chosen": -266.9799499511719,
"logps/ref_rejected": -260.1697082519531,
"logps/rejected": -371.0751953125,
"loss": 4.6603,
"margin_dpo/margin_mean": 40.7011833190918,
"margin_dpo/margin_std": 74.03218841552734,
"step": 192
},
{
"epoch": 0.4041884816753927,
"fcm_dpo/beta": 0.009255967102944851,
"fcm_dpo/delta": 0.02564959228038788,
"fcm_dpo/margin": 50.345237731933594,
"fcm_dpo/q_t": 0.39963340759277344,
"grad_norm": 78.43025970458984,
"learning_rate": 3.734111735307796e-07,
"logits/chosen": -0.8874344229698181,
"logits/rejected": -0.8585877418518066,
"logps/chosen": -360.2878723144531,
"logps/ref_chosen": -280.25323486328125,
"logps/ref_rejected": -291.0348815917969,
"logps/rejected": -421.4147644042969,
"loss": 4.4668,
"margin_dpo/margin_mean": 50.345237731933594,
"margin_dpo/margin_std": 81.91041564941406,
"step": 193
},
{
"epoch": 0.406282722513089,
"fcm_dpo/beta": 0.009216805920004845,
"fcm_dpo/delta": -0.022374922409653664,
"fcm_dpo/margin": 39.54142379760742,
"fcm_dpo/q_t": 0.42112547159194946,
"grad_norm": 116.94824981689453,
"learning_rate": 3.7181572889485623e-07,
"logits/chosen": -0.8712892532348633,
"logits/rejected": -0.8622381091117859,
"logps/chosen": -370.13507080078125,
"logps/ref_chosen": -288.4075927734375,
"logps/ref_rejected": -251.57994079589844,
"logps/rejected": -372.848876953125,
"loss": 4.7329,
"margin_dpo/margin_mean": 39.54142761230469,
"margin_dpo/margin_std": 77.82317352294922,
"step": 194
},
{
"epoch": 0.4083769633507853,
"fcm_dpo/beta": 0.009137854911386967,
"fcm_dpo/delta": -0.01154034398496151,
"fcm_dpo/margin": 37.4809455871582,
"fcm_dpo/q_t": 0.4273977279663086,
"grad_norm": 94.07014465332031,
"learning_rate": 3.7021375165108377e-07,
"logits/chosen": -0.8473997116088867,
"logits/rejected": -0.8510259985923767,
"logps/chosen": -356.85504150390625,
"logps/ref_chosen": -274.0006408691406,
"logps/ref_rejected": -280.22723388671875,
"logps/rejected": -400.5626220703125,
"loss": 4.7994,
"margin_dpo/margin_mean": 37.4809455871582,
"margin_dpo/margin_std": 78.94889831542969,
"step": 195
},
{
"epoch": 0.41047120418848165,
"fcm_dpo/beta": 0.009222757071256638,
"fcm_dpo/delta": 0.01410084217786789,
"fcm_dpo/margin": 49.72914123535156,
"fcm_dpo/q_t": 0.40148258209228516,
"grad_norm": 82.66631317138672,
"learning_rate": 3.6860532770864005e-07,
"logits/chosen": -0.8405277132987976,
"logits/rejected": -0.8463934659957886,
"logps/chosen": -343.28216552734375,
"logps/ref_chosen": -274.90069580078125,
"logps/ref_rejected": -248.7281951904297,
"logps/rejected": -366.8388366699219,
"loss": 4.4567,
"margin_dpo/margin_mean": 49.72914123535156,
"margin_dpo/margin_std": 82.37889862060547,
"step": 196
},
{
"epoch": 0.41256544502617803,
"fcm_dpo/beta": 0.009043055586516857,
"fcm_dpo/delta": -0.04608980193734169,
"fcm_dpo/margin": 59.12012481689453,
"fcm_dpo/q_t": 0.38405507802963257,
"grad_norm": 109.01643371582031,
"learning_rate": 3.6699054332241985e-07,
"logits/chosen": -0.8721888661384583,
"logits/rejected": -0.8570114374160767,
"logps/chosen": -382.73541259765625,
"logps/ref_chosen": -309.5348205566406,
"logps/ref_rejected": -264.3179931640625,
"logps/rejected": -396.638671875,
"loss": 4.1834,
"margin_dpo/margin_mean": 59.1201171875,
"margin_dpo/margin_std": 76.47515106201172,
"step": 197
},
{
"epoch": 0.41465968586387436,
"fcm_dpo/beta": 0.008807329460978508,
"fcm_dpo/delta": -0.0007833493873476982,
"fcm_dpo/margin": 58.406890869140625,
"fcm_dpo/q_t": 0.3895660638809204,
"grad_norm": 90.92684936523438,
"learning_rate": 3.653694850884091e-07,
"logits/chosen": -0.874003529548645,
"logits/rejected": -0.8490350842475891,
"logps/chosen": -369.50592041015625,
"logps/ref_chosen": -301.0134582519531,
"logps/ref_rejected": -292.84185791015625,
"logps/rejected": -419.7412109375,
"loss": 4.2752,
"margin_dpo/margin_mean": 58.40689468383789,
"margin_dpo/margin_std": 84.98297119140625,
"step": 198
},
{
"epoch": 0.4167539267015707,
"fcm_dpo/beta": 0.009006940759718418,
"fcm_dpo/delta": 0.032412342727184296,
"fcm_dpo/margin": 52.57870864868164,
"fcm_dpo/q_t": 0.39799413084983826,
"grad_norm": 107.88542938232422,
"learning_rate": 3.6374223993904124e-07,
"logits/chosen": -0.8546149730682373,
"logits/rejected": -0.8131856918334961,
"logps/chosen": -339.248291015625,
"logps/ref_chosen": -264.6058654785156,
"logps/ref_rejected": -214.9014892578125,
"logps/rejected": -342.1226806640625,
"loss": 4.3915,
"margin_dpo/margin_mean": 52.57870864868164,
"margin_dpo/margin_std": 82.29186248779297,
"step": 199
},
{
"epoch": 0.418848167539267,
"fcm_dpo/beta": 0.008955798111855984,
"fcm_dpo/delta": -0.03517484664916992,
"fcm_dpo/margin": 43.835540771484375,
"fcm_dpo/q_t": 0.4171965718269348,
"grad_norm": 166.77725219726562,
"learning_rate": 3.621088951385353e-07,
"logits/chosen": -0.9009106159210205,
"logits/rejected": -0.8782452344894409,
"logps/chosen": -407.1960754394531,
"logps/ref_chosen": -324.1588134765625,
"logps/ref_rejected": -277.80218505859375,
"logps/rejected": -404.67498779296875,
"loss": 4.8277,
"margin_dpo/margin_mean": 43.835540771484375,
"margin_dpo/margin_std": 94.47071075439453,
"step": 200
},
{
"epoch": 0.42094240837696334,
"fcm_dpo/beta": 0.008827144280076027,
"fcm_dpo/delta": -0.013941485434770584,
"fcm_dpo/margin": 48.97315216064453,
"fcm_dpo/q_t": 0.40686681866645813,
"grad_norm": 110.23241424560547,
"learning_rate": 3.604695382782159e-07,
"logits/chosen": -0.8766977190971375,
"logits/rejected": -0.8654013276100159,
"logps/chosen": -361.5707092285156,
"logps/ref_chosen": -271.49566650390625,
"logps/ref_rejected": -245.71414184570312,
"logps/rejected": -384.7623291015625,
"loss": 4.5951,
"margin_dpo/margin_mean": 48.97315216064453,
"margin_dpo/margin_std": 88.29933166503906,
"step": 201
},
{
"epoch": 0.42303664921465967,
"fcm_dpo/beta": 0.008783853612840176,
"fcm_dpo/delta": 0.012613123282790184,
"fcm_dpo/margin": 49.992881774902344,
"fcm_dpo/q_t": 0.40671300888061523,
"grad_norm": 97.78633117675781,
"learning_rate": 3.588242572718162e-07,
"logits/chosen": -0.8849156498908997,
"logits/rejected": -0.8734662532806396,
"logps/chosen": -359.2434387207031,
"logps/ref_chosen": -272.0979309082031,
"logps/ref_rejected": -235.94805908203125,
"logps/rejected": -373.08648681640625,
"loss": 4.5931,
"margin_dpo/margin_mean": 49.99287796020508,
"margin_dpo/margin_std": 91.31502532958984,
"step": 202
},
{
"epoch": 0.42513089005235605,
"fcm_dpo/beta": 0.009002954699099064,
"fcm_dpo/delta": 0.018306914716959,
"fcm_dpo/margin": 37.65985870361328,
"fcm_dpo/q_t": 0.42487096786499023,
"grad_norm": 88.12516021728516,
"learning_rate": 3.571731403507635e-07,
"logits/chosen": -0.8546892404556274,
"logits/rejected": -0.862351655960083,
"logps/chosen": -375.03314208984375,
"logps/ref_chosen": -280.2221374511719,
"logps/ref_rejected": -251.79798889160156,
"logps/rejected": -384.26885986328125,
"loss": 4.8132,
"margin_dpo/margin_mean": 37.65985870361328,
"margin_dpo/margin_std": 79.15308380126953,
"step": 203
},
{
"epoch": 0.4272251308900524,
"fcm_dpo/beta": 0.009114697575569153,
"fcm_dpo/delta": 0.0330776572227478,
"fcm_dpo/margin": 56.529239654541016,
"fcm_dpo/q_t": 0.3886546194553375,
"grad_norm": 108.35669708251953,
"learning_rate": 3.5551627605944746e-07,
"logits/chosen": -0.9038645625114441,
"logits/rejected": -0.8774609565734863,
"logps/chosen": -403.1597900390625,
"logps/ref_chosen": -318.7960510253906,
"logps/ref_rejected": -269.69921875,
"logps/rejected": -410.59222412109375,
"loss": 4.2776,
"margin_dpo/margin_mean": 56.529239654541016,
"margin_dpo/margin_std": 82.768798828125,
"step": 204
},
{
"epoch": 0.4293193717277487,
"fcm_dpo/beta": 0.009092997759580612,
"fcm_dpo/delta": -0.04461819678544998,
"fcm_dpo/margin": 64.81192779541016,
"fcm_dpo/q_t": 0.3769741952419281,
"grad_norm": 94.5504379272461,
"learning_rate": 3.5385375325047163e-07,
"logits/chosen": -0.8502748012542725,
"logits/rejected": -0.8185191750526428,
"logps/chosen": -365.49652099609375,
"logps/ref_chosen": -283.7620544433594,
"logps/ref_rejected": -297.69439697265625,
"logps/rejected": -444.24078369140625,
"loss": 4.1451,
"margin_dpo/margin_mean": 64.81192779541016,
"margin_dpo/margin_std": 90.32294464111328,
"step": 205
},
{
"epoch": 0.431413612565445,
"fcm_dpo/beta": 0.008791347965598106,
"fcm_dpo/delta": -0.023827582597732544,
"fcm_dpo/margin": 51.878639221191406,
"fcm_dpo/q_t": 0.4022713601589203,
"grad_norm": 111.39179992675781,
"learning_rate": 3.5218566107988867e-07,
"logits/chosen": -0.8792861104011536,
"logits/rejected": -0.901611864566803,
"logps/chosen": -378.02874755859375,
"logps/ref_chosen": -293.66387939453125,
"logps/ref_rejected": -291.3056640625,
"logps/rejected": -427.5492248535156,
"loss": 4.557,
"margin_dpo/margin_mean": 51.878639221191406,
"margin_dpo/margin_std": 91.19255065917969,
"step": 206
},
{
"epoch": 0.43350785340314135,
"fcm_dpo/beta": 0.008793886750936508,
"fcm_dpo/delta": 0.03257821500301361,
"fcm_dpo/margin": 48.695133209228516,
"fcm_dpo/q_t": 0.4082852602005005,
"grad_norm": 128.17391967773438,
"learning_rate": 3.505120890024195e-07,
"logits/chosen": -0.8236503005027771,
"logits/rejected": -0.8320968747138977,
"logps/chosen": -345.8304138183594,
"logps/ref_chosen": -270.5350646972656,
"logps/ref_rejected": -278.7747497558594,
"logps/rejected": -402.7652282714844,
"loss": 4.7188,
"margin_dpo/margin_mean": 48.695133209228516,
"margin_dpo/margin_std": 98.31978607177734,
"step": 207
},
{
"epoch": 0.4356020942408377,
"fcm_dpo/beta": 0.008899745531380177,
"fcm_dpo/delta": -0.023412320762872696,
"fcm_dpo/margin": 61.56591796875,
"fcm_dpo/q_t": 0.38415008783340454,
"grad_norm": 79.3017807006836,
"learning_rate": 3.4883312676665534e-07,
"logits/chosen": -0.8775085806846619,
"logits/rejected": -0.8293582201004028,
"logps/chosen": -359.858642578125,
"logps/ref_chosen": -279.582763671875,
"logps/ref_rejected": -290.041015625,
"logps/rejected": -431.8828125,
"loss": 4.1762,
"margin_dpo/margin_mean": 61.565914154052734,
"margin_dpo/margin_std": 86.33106994628906,
"step": 208
},
{
"epoch": 0.437696335078534,
"fcm_dpo/beta": 0.00878224615007639,
"fcm_dpo/delta": 0.0022584167309105396,
"fcm_dpo/margin": 38.80632781982422,
"fcm_dpo/q_t": 0.42601528763771057,
"grad_norm": 98.5056381225586,
"learning_rate": 3.4714886441024573e-07,
"logits/chosen": -0.7886694669723511,
"logits/rejected": -0.7872456312179565,
"logps/chosen": -404.982177734375,
"logps/ref_chosen": -318.8725280761719,
"logps/ref_rejected": -270.64324951171875,
"logps/rejected": -395.55926513671875,
"loss": 4.8535,
"margin_dpo/margin_mean": 38.80632781982422,
"margin_dpo/margin_std": 85.54448699951172,
"step": 209
},
{
"epoch": 0.4397905759162304,
"fcm_dpo/beta": 0.008630942553281784,
"fcm_dpo/delta": -0.02474762126803398,
"fcm_dpo/margin": 52.63178253173828,
"fcm_dpo/q_t": 0.4012606739997864,
"grad_norm": 83.42438507080078,
"learning_rate": 3.454593922550693e-07,
"logits/chosen": -0.8290054798126221,
"logits/rejected": -0.8178330659866333,
"logps/chosen": -358.53643798828125,
"logps/ref_chosen": -283.14031982421875,
"logps/ref_rejected": -287.2986755371094,
"logps/rejected": -415.3265686035156,
"loss": 4.4553,
"margin_dpo/margin_mean": 52.63178253173828,
"margin_dpo/margin_std": 86.18547058105469,
"step": 210
},
{
"epoch": 0.4418848167539267,
"fcm_dpo/beta": 0.008338711224496365,
"fcm_dpo/delta": -0.06341198086738586,
"fcm_dpo/margin": 64.17230987548828,
"fcm_dpo/q_t": 0.3805708587169647,
"grad_norm": 77.78020477294922,
"learning_rate": 3.4376480090239047e-07,
"logits/chosen": -0.8488391041755676,
"logits/rejected": -0.8283941149711609,
"logps/chosen": -342.4434509277344,
"logps/ref_chosen": -276.4228515625,
"logps/ref_rejected": -252.40603637695312,
"logps/rejected": -382.59893798828125,
"loss": 4.086,
"margin_dpo/margin_mean": 64.17230224609375,
"margin_dpo/margin_std": 74.36666107177734,
"step": 211
},
{
"epoch": 0.44397905759162304,
"fcm_dpo/beta": 0.008269790560007095,
"fcm_dpo/delta": 0.03665412217378616,
"fcm_dpo/margin": 43.63727951049805,
"fcm_dpo/q_t": 0.41719359159469604,
"grad_norm": 91.59362030029297,
"learning_rate": 3.4206518122800055e-07,
"logits/chosen": -0.8333749771118164,
"logits/rejected": -0.8325668573379517,
"logps/chosen": -348.72747802734375,
"logps/ref_chosen": -271.7055358886719,
"logps/ref_rejected": -241.18511962890625,
"logps/rejected": -361.84442138671875,
"loss": 4.6904,
"margin_dpo/margin_mean": 43.63727951049805,
"margin_dpo/margin_std": 80.35758209228516,
"step": 212
},
{
"epoch": 0.44607329842931936,
"fcm_dpo/beta": 0.008552048355340958,
"fcm_dpo/delta": 0.023182792589068413,
"fcm_dpo/margin": 47.81290817260742,
"fcm_dpo/q_t": 0.41327401995658875,
"grad_norm": 95.78131866455078,
"learning_rate": 3.403606243773448e-07,
"logits/chosen": -0.8298773765563965,
"logits/rejected": -0.8453171849250793,
"logps/chosen": -382.4115295410156,
"logps/ref_chosen": -302.2976379394531,
"logps/ref_rejected": -303.6202087402344,
"logps/rejected": -431.5469970703125,
"loss": 4.6235,
"margin_dpo/margin_mean": 47.81290054321289,
"margin_dpo/margin_std": 89.57734680175781,
"step": 213
},
{
"epoch": 0.4481675392670157,
"fcm_dpo/beta": 0.008502138778567314,
"fcm_dpo/delta": -0.031188862398266792,
"fcm_dpo/margin": 54.75181579589844,
"fcm_dpo/q_t": 0.39819973707199097,
"grad_norm": 102.20772552490234,
"learning_rate": 3.3865122176063385e-07,
"logits/chosen": -0.8368493318557739,
"logits/rejected": -0.8382536172866821,
"logps/chosen": -367.3687438964844,
"logps/ref_chosen": -272.13262939453125,
"logps/ref_rejected": -294.82354736328125,
"logps/rejected": -444.81146240234375,
"loss": 4.3495,
"margin_dpo/margin_mean": 54.75181579589844,
"margin_dpo/margin_std": 77.65438842773438,
"step": 214
},
{
"epoch": 0.450261780104712,
"fcm_dpo/beta": 0.008280987851321697,
"fcm_dpo/delta": -0.03984394669532776,
"fcm_dpo/margin": 41.743927001953125,
"fcm_dpo/q_t": 0.4267158508300781,
"grad_norm": 97.85367584228516,
"learning_rate": 3.3693706504794243e-07,
"logits/chosen": -0.8805428743362427,
"logits/rejected": -0.8621854186058044,
"logps/chosen": -387.81982421875,
"logps/ref_chosen": -291.3782958984375,
"logps/ref_rejected": -261.05792236328125,
"logps/rejected": -399.2433166503906,
"loss": 4.8072,
"margin_dpo/margin_mean": 41.74393081665039,
"margin_dpo/margin_std": 86.34469604492188,
"step": 215
},
{
"epoch": 0.4523560209424084,
"fcm_dpo/beta": 0.008214929141104221,
"fcm_dpo/delta": 0.013393443077802658,
"fcm_dpo/margin": 53.72854995727539,
"fcm_dpo/q_t": 0.40446725487709045,
"grad_norm": 105.10110473632812,
"learning_rate": 3.3521824616429284e-07,
"logits/chosen": -0.8951680660247803,
"logits/rejected": -0.8882848024368286,
"logps/chosen": -429.6124572753906,
"logps/ref_chosen": -338.50543212890625,
"logps/ref_rejected": -305.76104736328125,
"logps/rejected": -450.5965881347656,
"loss": 4.5592,
"margin_dpo/margin_mean": 53.728546142578125,
"margin_dpo/margin_std": 95.15604400634766,
"step": 216
},
{
"epoch": 0.4544502617801047,
"fcm_dpo/beta": 0.008185570128262043,
"fcm_dpo/delta": 0.013848704285919666,
"fcm_dpo/margin": 65.41835021972656,
"fcm_dpo/q_t": 0.3857192099094391,
"grad_norm": 82.3330307006836,
"learning_rate": 3.334948572847253e-07,
"logits/chosen": -0.8043022751808167,
"logits/rejected": -0.7672190070152283,
"logps/chosen": -390.0213928222656,
"logps/ref_chosen": -293.5498046875,
"logps/ref_rejected": -256.7830810546875,
"logps/rejected": -418.6730651855469,
"loss": 4.2157,
"margin_dpo/margin_mean": 65.41835021972656,
"margin_dpo/margin_std": 92.30606079101562,
"step": 217
},
{
"epoch": 0.45654450261780105,
"fcm_dpo/beta": 0.008206600323319435,
"fcm_dpo/delta": 0.0029644761234521866,
"fcm_dpo/margin": 62.74656295776367,
"fcm_dpo/q_t": 0.38812729716300964,
"grad_norm": 92.55133819580078,
"learning_rate": 3.317669908293554e-07,
"logits/chosen": -0.8422492742538452,
"logits/rejected": -0.8610657453536987,
"logps/chosen": -415.7991027832031,
"logps/ref_chosen": -320.579345703125,
"logps/ref_rejected": -294.0381164550781,
"logps/rejected": -452.00445556640625,
"loss": 4.2441,
"margin_dpo/margin_mean": 62.74656295776367,
"margin_dpo/margin_std": 89.6981201171875,
"step": 218
},
{
"epoch": 0.4586387434554974,
"fcm_dpo/beta": 0.008203094825148582,
"fcm_dpo/delta": -0.02315128594636917,
"fcm_dpo/margin": 59.49258041381836,
"fcm_dpo/q_t": 0.39354074001312256,
"grad_norm": 100.95577239990234,
"learning_rate": 3.300347394584172e-07,
"logits/chosen": -0.8339366316795349,
"logits/rejected": -0.8600754141807556,
"logps/chosen": -366.7186279296875,
"logps/ref_chosen": -268.4186096191406,
"logps/ref_rejected": -265.7808837890625,
"logps/rejected": -423.57342529296875,
"loss": 4.3875,
"margin_dpo/margin_mean": 59.49258041381836,
"margin_dpo/margin_std": 91.78353118896484,
"step": 219
},
{
"epoch": 0.4607329842931937,
"fcm_dpo/beta": 0.008344794623553753,
"fcm_dpo/delta": 0.034630343317985535,
"fcm_dpo/margin": 63.56485366821289,
"fcm_dpo/q_t": 0.38438913226127625,
"grad_norm": 139.32931518554688,
"learning_rate": 3.2829819606729477e-07,
"logits/chosen": -0.8834062218666077,
"logits/rejected": -0.8546550869941711,
"logps/chosen": -409.75164794921875,
"logps/ref_chosen": -312.8864440917969,
"logps/ref_rejected": -259.5191955566406,
"logps/rejected": -419.9492492675781,
"loss": 4.2254,
"margin_dpo/margin_mean": 63.56485366821289,
"margin_dpo/margin_std": 89.70089721679688,
"step": 220
},
{
"epoch": 0.46282722513089003,
"fcm_dpo/beta": 0.008402268402278423,
"fcm_dpo/delta": -0.024994712322950363,
"fcm_dpo/margin": 48.53934860229492,
"fcm_dpo/q_t": 0.4136922061443329,
"grad_norm": 94.52365112304688,
"learning_rate": 3.265574537815398e-07,
"logits/chosen": -0.8016759157180786,
"logits/rejected": -0.81261146068573,
"logps/chosen": -406.11309814453125,
"logps/ref_chosen": -300.32586669921875,
"logps/ref_rejected": -286.312255859375,
"logps/rejected": -440.63885498046875,
"loss": 4.67,
"margin_dpo/margin_mean": 48.53934860229492,
"margin_dpo/margin_std": 92.02408599853516,
"step": 221
},
{
"epoch": 0.4649214659685864,
"fcm_dpo/beta": 0.008166534826159477,
"fcm_dpo/delta": -0.01898489147424698,
"fcm_dpo/margin": 52.806861877441406,
"fcm_dpo/q_t": 0.40435272455215454,
"grad_norm": 104.70159149169922,
"learning_rate": 3.248126059518784e-07,
"logits/chosen": -0.8911526203155518,
"logits/rejected": -0.8636682629585266,
"logps/chosen": -396.1815490722656,
"logps/ref_chosen": -297.1113586425781,
"logps/ref_rejected": -235.53146362304688,
"logps/rejected": -387.40850830078125,
"loss": 4.4779,
"margin_dpo/margin_mean": 52.806861877441406,
"margin_dpo/margin_std": 84.82585144042969,
"step": 222
},
{
"epoch": 0.46701570680628274,
"fcm_dpo/beta": 0.008165441453456879,
"fcm_dpo/delta": 0.014042757451534271,
"fcm_dpo/margin": 55.89503479003906,
"fcm_dpo/q_t": 0.3992462754249573,
"grad_norm": 84.36212921142578,
"learning_rate": 3.230637461492043e-07,
"logits/chosen": -0.8363898992538452,
"logits/rejected": -0.8041598200798035,
"logps/chosen": -383.9892272949219,
"logps/ref_chosen": -286.41510009765625,
"logps/ref_rejected": -241.1181640625,
"logps/rejected": -394.58734130859375,
"loss": 4.39,
"margin_dpo/margin_mean": 55.89503860473633,
"margin_dpo/margin_std": 85.06916046142578,
"step": 223
},
{
"epoch": 0.46910994764397906,
"fcm_dpo/beta": 0.008199742995202541,
"fcm_dpo/delta": -0.024815764278173447,
"fcm_dpo/margin": 61.84333038330078,
"fcm_dpo/q_t": 0.389165461063385,
"grad_norm": 104.28144073486328,
"learning_rate": 3.213109681595612e-07,
"logits/chosen": -0.802398145198822,
"logits/rejected": -0.8188230395317078,
"logps/chosen": -336.8028259277344,
"logps/ref_chosen": -249.49234008789062,
"logps/ref_rejected": -233.10752868652344,
"logps/rejected": -382.2613525390625,
"loss": 4.2198,
"margin_dpo/margin_mean": 61.84333038330078,
"margin_dpo/margin_std": 82.82916259765625,
"step": 224
},
{
"epoch": 0.4712041884816754,
"fcm_dpo/beta": 0.008005239069461823,
"fcm_dpo/delta": -0.003811831586062908,
"fcm_dpo/margin": 54.48173904418945,
"fcm_dpo/q_t": 0.40706250071525574,
"grad_norm": 104.64070129394531,
"learning_rate": 3.1955436597911315e-07,
"logits/chosen": -0.8437448143959045,
"logits/rejected": -0.819525957107544,
"logps/chosen": -413.5852966308594,
"logps/ref_chosen": -311.8583679199219,
"logps/ref_rejected": -336.8523864746094,
"logps/rejected": -493.06103515625,
"loss": 4.5478,
"margin_dpo/margin_mean": 54.48173904418945,
"margin_dpo/margin_std": 95.53211975097656,
"step": 225
},
{
"epoch": 0.4732984293193717,
"fcm_dpo/beta": 0.007992172613739967,
"fcm_dpo/delta": -0.002601095475256443,
"fcm_dpo/margin": 51.423744201660156,
"fcm_dpo/q_t": 0.41092240810394287,
"grad_norm": 82.05426788330078,
"learning_rate": 3.1779403380910425e-07,
"logits/chosen": -0.8683302402496338,
"logits/rejected": -0.8574090003967285,
"logps/chosen": -337.87457275390625,
"logps/ref_chosen": -252.20123291015625,
"logps/ref_rejected": -254.41162109375,
"logps/rejected": -391.5086975097656,
"loss": 4.5394,
"margin_dpo/margin_mean": 51.423744201660156,
"margin_dpo/margin_std": 88.69334411621094,
"step": 226
},
{
"epoch": 0.47539267015706804,
"fcm_dpo/beta": 0.007951832376420498,
"fcm_dpo/delta": -0.0021827910095453262,
"fcm_dpo/margin": 58.96784973144531,
"fcm_dpo/q_t": 0.3964974880218506,
"grad_norm": 76.2507553100586,
"learning_rate": 3.160300660508064e-07,
"logits/chosen": -0.8247069120407104,
"logits/rejected": -0.8209613561630249,
"logps/chosen": -369.0705871582031,
"logps/ref_chosen": -285.25946044921875,
"logps/ref_rejected": -261.3220520019531,
"logps/rejected": -404.10101318359375,
"loss": 4.3982,
"margin_dpo/margin_mean": 58.96784973144531,
"margin_dpo/margin_std": 91.02167510986328,
"step": 227
},
{
"epoch": 0.4774869109947644,
"fcm_dpo/beta": 0.007981422357261181,
"fcm_dpo/delta": -0.030416294932365417,
"fcm_dpo/margin": 59.90766143798828,
"fcm_dpo/q_t": 0.39538905024528503,
"grad_norm": 87.2895736694336,
"learning_rate": 3.1426255730045695e-07,
"logits/chosen": -0.844511091709137,
"logits/rejected": -0.8116718530654907,
"logps/chosen": -387.2881164550781,
"logps/ref_chosen": -313.81878662109375,
"logps/ref_rejected": -258.07061767578125,
"logps/rejected": -391.4476013183594,
"loss": 4.2818,
"margin_dpo/margin_mean": 59.90766143798828,
"margin_dpo/margin_std": 81.06597137451172,
"step": 228
},
{
"epoch": 0.47958115183246075,
"fcm_dpo/beta": 0.007695622276514769,
"fcm_dpo/delta": -0.013841855339705944,
"fcm_dpo/margin": 64.4743881225586,
"fcm_dpo/q_t": 0.3899773061275482,
"grad_norm": 108.06454467773438,
"learning_rate": 3.1249160234418644e-07,
"logits/chosen": -0.8274182677268982,
"logits/rejected": -0.8372348546981812,
"logps/chosen": -377.1920166015625,
"logps/ref_chosen": -291.9707946777344,
"logps/ref_rejected": -263.42059326171875,
"logps/rejected": -413.1162414550781,
"loss": 4.2288,
"margin_dpo/margin_mean": 64.4743881225586,
"margin_dpo/margin_std": 85.52989196777344,
"step": 229
},
{
"epoch": 0.4816753926701571,
"fcm_dpo/beta": 0.007751506753265858,
"fcm_dpo/delta": 0.03374722972512245,
"fcm_dpo/margin": 60.86343765258789,
"fcm_dpo/q_t": 0.3971996009349823,
"grad_norm": 71.40199279785156,
"learning_rate": 3.1071729615293424e-07,
"logits/chosen": -0.8689441680908203,
"logits/rejected": -0.8685297966003418,
"logps/chosen": -308.78057861328125,
"logps/ref_chosen": -233.2601318359375,
"logps/ref_rejected": -238.922119140625,
"logps/rejected": -375.3060302734375,
"loss": 4.3431,
"margin_dpo/margin_mean": 60.863441467285156,
"margin_dpo/margin_std": 91.31082153320312,
"step": 230
},
{
"epoch": 0.4837696335078534,
"fcm_dpo/beta": 0.007850628346204758,
"fcm_dpo/delta": -0.01595788449048996,
"fcm_dpo/margin": 49.51227569580078,
"fcm_dpo/q_t": 0.41266465187072754,
"grad_norm": 78.52806091308594,
"learning_rate": 3.0893973387735683e-07,
"logits/chosen": -0.8488875031471252,
"logits/rejected": -0.8367761969566345,
"logps/chosen": -408.9139099121094,
"logps/ref_chosen": -322.1551818847656,
"logps/ref_rejected": -280.97613525390625,
"logps/rejected": -417.24713134765625,
"loss": 4.5434,
"margin_dpo/margin_mean": 49.51227569580078,
"margin_dpo/margin_std": 79.67231750488281,
"step": 231
},
{
"epoch": 0.48586387434554973,
"fcm_dpo/beta": 0.0078697195276618,
"fcm_dpo/delta": -0.003923982381820679,
"fcm_dpo/margin": 52.60862350463867,
"fcm_dpo/q_t": 0.4088551998138428,
"grad_norm": 140.39244079589844,
"learning_rate": 3.071590108427243e-07,
"logits/chosen": -0.8175272941589355,
"logits/rejected": -0.7981524467468262,
"logps/chosen": -354.7743225097656,
"logps/ref_chosen": -271.7437744140625,
"logps/ref_rejected": -249.94981384277344,
"logps/rejected": -385.58905029296875,
"loss": 4.5563,
"margin_dpo/margin_mean": 52.608619689941406,
"margin_dpo/margin_std": 87.00882720947266,
"step": 232
},
{
"epoch": 0.48795811518324606,
"fcm_dpo/beta": 0.007669942919164896,
"fcm_dpo/delta": -0.040378112345933914,
"fcm_dpo/margin": 58.76781463623047,
"fcm_dpo/q_t": 0.40030309557914734,
"grad_norm": 71.58476257324219,
"learning_rate": 3.05375222543809e-07,
"logits/chosen": -0.8641107678413391,
"logits/rejected": -0.8540716767311096,
"logps/chosen": -365.4216003417969,
"logps/ref_chosen": -285.3423156738281,
"logps/ref_rejected": -266.34320068359375,
"logps/rejected": -405.1903076171875,
"loss": 4.3662,
"margin_dpo/margin_mean": 58.7678108215332,
"margin_dpo/margin_std": 83.56101989746094,
"step": 233
},
{
"epoch": 0.4900523560209424,
"fcm_dpo/beta": 0.007635914720594883,
"fcm_dpo/delta": 0.028283506631851196,
"fcm_dpo/margin": 50.3465461730957,
"fcm_dpo/q_t": 0.41592836380004883,
"grad_norm": 68.07914733886719,
"learning_rate": 3.035884646397637e-07,
"logits/chosen": -0.8356366753578186,
"logits/rejected": -0.8209684491157532,
"logps/chosen": -379.02130126953125,
"logps/ref_chosen": -294.9057312011719,
"logps/ref_rejected": -299.37054443359375,
"logps/rejected": -433.8326416015625,
"loss": 4.6745,
"margin_dpo/margin_mean": 50.3465461730957,
"margin_dpo/margin_std": 95.68152618408203,
"step": 234
},
{
"epoch": 0.49214659685863876,
"fcm_dpo/beta": 0.007906999439001083,
"fcm_dpo/delta": 0.039126671850681305,
"fcm_dpo/margin": 60.62733840942383,
"fcm_dpo/q_t": 0.39487558603286743,
"grad_norm": 72.7540512084961,
"learning_rate": 3.017988329489923e-07,
"logits/chosen": -0.8502811789512634,
"logits/rejected": -0.8423137068748474,
"logps/chosen": -369.0730285644531,
"logps/ref_chosen": -289.49755859375,
"logps/ref_rejected": -247.55076599121094,
"logps/rejected": -387.7535400390625,
"loss": 4.3199,
"margin_dpo/margin_mean": 60.627342224121094,
"margin_dpo/margin_std": 89.68913269042969,
"step": 235
},
{
"epoch": 0.4942408376963351,
"fcm_dpo/beta": 0.008040757849812508,
"fcm_dpo/delta": 0.023043854162096977,
"fcm_dpo/margin": 54.43937301635742,
"fcm_dpo/q_t": 0.40379512310028076,
"grad_norm": 90.92185974121094,
"learning_rate": 3.000064234440111e-07,
"logits/chosen": -0.8824329376220703,
"logits/rejected": -0.8828592896461487,
"logps/chosen": -365.1532287597656,
"logps/ref_chosen": -288.8846435546875,
"logps/ref_rejected": -242.0452880859375,
"logps/rejected": -372.7532043457031,
"loss": 4.433,
"margin_dpo/margin_mean": 54.43937301635742,
"margin_dpo/margin_std": 85.27285766601562,
"step": 236
},
{
"epoch": 0.4963350785340314,
"fcm_dpo/beta": 0.008117102086544037,
"fcm_dpo/delta": -0.0058396486565470695,
"fcm_dpo/margin": 55.81697082519531,
"fcm_dpo/q_t": 0.40049877762794495,
"grad_norm": 84.8370361328125,
"learning_rate": 2.9821133224630223e-07,
"logits/chosen": -0.8333731293678284,
"logits/rejected": -0.8125811815261841,
"logps/chosen": -347.8975524902344,
"logps/ref_chosen": -265.47869873046875,
"logps/ref_rejected": -267.9891357421875,
"logps/rejected": -406.22491455078125,
"loss": 4.3497,
"margin_dpo/margin_mean": 55.81697082519531,
"margin_dpo/margin_std": 81.0350112915039,
"step": 237
},
{
"epoch": 0.49842931937172774,
"fcm_dpo/beta": 0.008078444749116898,
"fcm_dpo/delta": 0.00038408301770687103,
"fcm_dpo/margin": 53.928340911865234,
"fcm_dpo/q_t": 0.40683451294898987,
"grad_norm": 89.60171508789062,
"learning_rate": 2.964136556211588e-07,
"logits/chosen": -0.8460046052932739,
"logits/rejected": -0.8157504200935364,
"logps/chosen": -401.5897521972656,
"logps/ref_chosen": -312.0026550292969,
"logps/ref_rejected": -270.0257263183594,
"logps/rejected": -413.5411376953125,
"loss": 4.4321,
"margin_dpo/margin_mean": 53.928340911865234,
"margin_dpo/margin_std": 84.81220245361328,
"step": 238
},
{
"epoch": 0.5005235602094241,
"fcm_dpo/beta": 0.008109199814498425,
"fcm_dpo/delta": 0.005069888196885586,
"fcm_dpo/margin": 51.8012580871582,
"fcm_dpo/q_t": 0.4107738733291626,
"grad_norm": 78.47299194335938,
"learning_rate": 2.946134899725226e-07,
"logits/chosen": -0.829999566078186,
"logits/rejected": -0.8703840374946594,
"logps/chosen": -344.9981994628906,
"logps/ref_chosen": -266.9936218261719,
"logps/ref_rejected": -276.13525390625,
"logps/rejected": -405.9410705566406,
"loss": 4.6567,
"margin_dpo/margin_mean": 51.80126190185547,
"margin_dpo/margin_std": 100.10111999511719,
"step": 239
},
{
"epoch": 0.5026178010471204,
"fcm_dpo/beta": 0.008296947926282883,
"fcm_dpo/delta": 0.03266207128763199,
"fcm_dpo/margin": 59.96100616455078,
"fcm_dpo/q_t": 0.3940165638923645,
"grad_norm": 78.5995864868164,
"learning_rate": 2.9281093183781403e-07,
"logits/chosen": -0.9000136852264404,
"logits/rejected": -0.8934633135795593,
"logps/chosen": -367.1950988769531,
"logps/ref_chosen": -286.0997619628906,
"logps/ref_rejected": -256.9459533691406,
"logps/rejected": -398.0023193359375,
"loss": 4.3154,
"margin_dpo/margin_mean": 59.96100616455078,
"margin_dpo/margin_std": 90.11138153076172,
"step": 240
},
{
"epoch": 0.5047120418848168,
"fcm_dpo/beta": 0.008294462226331234,
"fcm_dpo/delta": -0.001803908497095108,
"fcm_dpo/margin": 46.42206573486328,
"fcm_dpo/q_t": 0.4186059236526489,
"grad_norm": 87.16766357421875,
"learning_rate": 2.910060778827554e-07,
"logits/chosen": -0.8022783994674683,
"logits/rejected": -0.7795644998550415,
"logps/chosen": -354.74188232421875,
"logps/ref_chosen": -260.6881408691406,
"logps/ref_rejected": -250.02915954589844,
"logps/rejected": -390.5049743652344,
"loss": 4.6915,
"margin_dpo/margin_mean": 46.422061920166016,
"margin_dpo/margin_std": 88.32010650634766,
"step": 241
},
{
"epoch": 0.506806282722513,
"fcm_dpo/beta": 0.008265901356935501,
"fcm_dpo/delta": 0.011965340934693813,
"fcm_dpo/margin": 57.2177619934082,
"fcm_dpo/q_t": 0.3987177908420563,
"grad_norm": 129.31790161132812,
"learning_rate": 2.891990248961871e-07,
"logits/chosen": -0.8726097345352173,
"logits/rejected": -0.8508012890815735,
"logps/chosen": -352.75592041015625,
"logps/ref_chosen": -270.51397705078125,
"logps/ref_rejected": -244.8560791015625,
"logps/rejected": -384.3157653808594,
"loss": 4.3126,
"margin_dpo/margin_mean": 57.2177619934082,
"margin_dpo/margin_std": 83.8380126953125,
"step": 242
},
{
"epoch": 0.5089005235602094,
"fcm_dpo/beta": 0.008414202369749546,
"fcm_dpo/delta": -0.02754206582903862,
"fcm_dpo/margin": 64.92880249023438,
"fcm_dpo/q_t": 0.3845221698284149,
"grad_norm": 101.96147918701172,
"learning_rate": 2.873898697848762e-07,
"logits/chosen": -0.8713305592536926,
"logits/rejected": -0.8565847873687744,
"logps/chosen": -403.90106201171875,
"logps/ref_chosen": -324.68206787109375,
"logps/ref_rejected": -307.1111755371094,
"logps/rejected": -451.2590026855469,
"loss": 4.1495,
"margin_dpo/margin_mean": 64.92879486083984,
"margin_dpo/margin_std": 87.35710906982422,
"step": 243
},
{
"epoch": 0.5109947643979058,
"fcm_dpo/beta": 0.008167761377990246,
"fcm_dpo/delta": -0.007273124065250158,
"fcm_dpo/margin": 65.28921508789062,
"fcm_dpo/q_t": 0.38221222162246704,
"grad_norm": 113.77725219726562,
"learning_rate": 2.8557870956832133e-07,
"logits/chosen": -0.8651580810546875,
"logits/rejected": -0.8182386755943298,
"logps/chosen": -400.8844299316406,
"logps/ref_chosen": -318.979248046875,
"logps/ref_rejected": -269.67572021484375,
"logps/rejected": -416.8700866699219,
"loss": 4.1858,
"margin_dpo/margin_mean": 65.28921508789062,
"margin_dpo/margin_std": 85.85536193847656,
"step": 244
},
{
"epoch": 0.5130890052356021,
"fcm_dpo/beta": 0.008117292076349258,
"fcm_dpo/delta": -0.022571483626961708,
"fcm_dpo/margin": 65.36834716796875,
"fcm_dpo/q_t": 0.3831850290298462,
"grad_norm": 71.29036712646484,
"learning_rate": 2.837656413735479e-07,
"logits/chosen": -0.8694513440132141,
"logits/rejected": -0.8749232292175293,
"logps/chosen": -376.11077880859375,
"logps/ref_chosen": -294.8980712890625,
"logps/ref_rejected": -239.8111114501953,
"logps/rejected": -386.3921813964844,
"loss": 4.1545,
"margin_dpo/margin_mean": 65.36834716796875,
"margin_dpo/margin_std": 83.06272888183594,
"step": 245
},
{
"epoch": 0.5151832460732985,
"fcm_dpo/beta": 0.007969960570335388,
"fcm_dpo/delta": -0.008928188122808933,
"fcm_dpo/margin": 44.71929931640625,
"fcm_dpo/q_t": 0.4241969585418701,
"grad_norm": 86.78738403320312,
"learning_rate": 2.8195076242990116e-07,
"logits/chosen": -0.8513779044151306,
"logits/rejected": -0.8571193218231201,
"logps/chosen": -380.00701904296875,
"logps/ref_chosen": -280.6854248046875,
"logps/ref_rejected": -253.65382385253906,
"logps/rejected": -397.6946716308594,
"loss": 4.8125,
"margin_dpo/margin_mean": 44.71929931640625,
"margin_dpo/margin_std": 96.33696746826172,
"step": 246
},
{
"epoch": 0.5172774869109947,
"fcm_dpo/beta": 0.007945312187075615,
"fcm_dpo/delta": -0.0081523098051548,
"fcm_dpo/margin": 59.00965881347656,
"fcm_dpo/q_t": 0.3986579179763794,
"grad_norm": 63.86665725708008,
"learning_rate": 2.801341700638307e-07,
"logits/chosen": -0.8496901392936707,
"logits/rejected": -0.8478038311004639,
"logps/chosen": -373.80157470703125,
"logps/ref_chosen": -281.1091003417969,
"logps/ref_rejected": -260.3700866699219,
"logps/rejected": -412.0722351074219,
"loss": 4.3814,
"margin_dpo/margin_mean": 59.0096549987793,
"margin_dpo/margin_std": 89.11226654052734,
"step": 247
},
{
"epoch": 0.5193717277486911,
"fcm_dpo/beta": 0.007763488218188286,
"fcm_dpo/delta": -0.02389615960419178,
"fcm_dpo/margin": 55.78809356689453,
"fcm_dpo/q_t": 0.4041425585746765,
"grad_norm": 117.25294494628906,
"learning_rate": 2.7831596169367227e-07,
"logits/chosen": -0.8269961476325989,
"logits/rejected": -0.8366529941558838,
"logps/chosen": -363.4480895996094,
"logps/ref_chosen": -270.318359375,
"logps/ref_rejected": -233.46778869628906,
"logps/rejected": -382.3856201171875,
"loss": 4.4813,
"margin_dpo/margin_mean": 55.78809356689453,
"margin_dpo/margin_std": 88.58773803710938,
"step": 248
},
{
"epoch": 0.5214659685863874,
"fcm_dpo/beta": 0.00790868978947401,
"fcm_dpo/delta": 0.03692461922764778,
"fcm_dpo/margin": 48.326904296875,
"fcm_dpo/q_t": 0.4165218472480774,
"grad_norm": 106.95769500732422,
"learning_rate": 2.7649623482442274e-07,
"logits/chosen": -0.8559276461601257,
"logits/rejected": -0.8305518627166748,
"logps/chosen": -385.6744689941406,
"logps/ref_chosen": -275.8088684082031,
"logps/ref_rejected": -243.45138549804688,
"logps/rejected": -401.6439514160156,
"loss": 4.7438,
"margin_dpo/margin_mean": 48.326904296875,
"margin_dpo/margin_std": 99.37535095214844,
"step": 249
},
{
"epoch": 0.5235602094240838,
"fcm_dpo/beta": 0.00772194704040885,
"fcm_dpo/delta": -0.05862649157643318,
"fcm_dpo/margin": 67.97274017333984,
"fcm_dpo/q_t": 0.38753461837768555,
"grad_norm": 97.94071197509766,
"learning_rate": 2.7467508704251135e-07,
"logits/chosen": -0.8564193248748779,
"logits/rejected": -0.8508210182189941,
"logps/chosen": -403.1372375488281,
"logps/ref_chosen": -291.68524169921875,
"logps/ref_rejected": -284.5358581542969,
"logps/rejected": -463.9606018066406,
"loss": 4.3415,
"margin_dpo/margin_mean": 67.97273254394531,
"margin_dpo/margin_std": 104.22421264648438,
"step": 250
},
{
"epoch": 0.5256544502617801,
"fcm_dpo/beta": 0.007520149927586317,
"fcm_dpo/delta": -0.030201872810721397,
"fcm_dpo/margin": 60.210289001464844,
"fcm_dpo/q_t": 0.40624505281448364,
"grad_norm": 90.0475082397461,
"learning_rate": 2.7285261601056697e-07,
"logits/chosen": -0.8678263425827026,
"logits/rejected": -0.8515715003013611,
"logps/chosen": -383.1903991699219,
"logps/ref_chosen": -281.736572265625,
"logps/ref_rejected": -255.9419708251953,
"logps/rejected": -417.6060485839844,
"loss": 4.4574,
"margin_dpo/margin_mean": 60.210289001464844,
"margin_dpo/margin_std": 97.2637939453125,
"step": 251
},
{
"epoch": 0.5277486910994764,
"fcm_dpo/beta": 0.007303354796022177,
"fcm_dpo/delta": -0.024126823991537094,
"fcm_dpo/margin": 57.908607482910156,
"fcm_dpo/q_t": 0.40556401014328003,
"grad_norm": 121.3109130859375,
"learning_rate": 2.7102891946217994e-07,
"logits/chosen": -0.906019926071167,
"logits/rejected": -0.8782291412353516,
"logps/chosen": -409.6369323730469,
"logps/ref_chosen": -295.9674072265625,
"logps/ref_rejected": -280.111572265625,
"logps/rejected": -451.689697265625,
"loss": 4.5379,
"margin_dpo/margin_mean": 57.90860366821289,
"margin_dpo/margin_std": 97.96647644042969,
"step": 252
},
{
"epoch": 0.5298429319371728,
"fcm_dpo/beta": 0.007306728512048721,
"fcm_dpo/delta": 0.008086594752967358,
"fcm_dpo/margin": 57.006065368652344,
"fcm_dpo/q_t": 0.4107561707496643,
"grad_norm": 95.82548522949219,
"learning_rate": 2.692040951966617e-07,
"logits/chosen": -0.8742294311523438,
"logits/rejected": -0.8646455407142639,
"logps/chosen": -400.19940185546875,
"logps/ref_chosen": -277.072265625,
"logps/ref_rejected": -247.31643676757812,
"logps/rejected": -427.44964599609375,
"loss": 4.5858,
"margin_dpo/margin_mean": 57.006065368652344,
"margin_dpo/margin_std": 100.61224365234375,
"step": 253
},
{
"epoch": 0.5319371727748691,
"fcm_dpo/beta": 0.007321351673454046,
"fcm_dpo/delta": -0.030212795361876488,
"fcm_dpo/margin": 61.80467224121094,
"fcm_dpo/q_t": 0.4016120135784149,
"grad_norm": 80.6700439453125,
"learning_rate": 2.6737824107379947e-07,
"logits/chosen": -0.8140766620635986,
"logits/rejected": -0.7952826619148254,
"logps/chosen": -390.226806640625,
"logps/ref_chosen": -269.9478454589844,
"logps/ref_rejected": -249.45005798339844,
"logps/rejected": -431.5336608886719,
"loss": 4.4121,
"margin_dpo/margin_mean": 61.8046760559082,
"margin_dpo/margin_std": 94.67742919921875,
"step": 254
},
{
"epoch": 0.5340314136125655,
"fcm_dpo/beta": 0.007317520212382078,
"fcm_dpo/delta": 0.02305128611624241,
"fcm_dpo/margin": 71.82103729248047,
"fcm_dpo/q_t": 0.38811203837394714,
"grad_norm": 86.81169128417969,
"learning_rate": 2.655514550086086e-07,
"logits/chosen": -0.8274251222610474,
"logits/rejected": -0.78985595703125,
"logps/chosen": -420.36663818359375,
"logps/ref_chosen": -306.6552734375,
"logps/ref_rejected": -254.47528076171875,
"logps/rejected": -440.00762939453125,
"loss": 4.333,
"margin_dpo/margin_mean": 71.82103729248047,
"margin_dpo/margin_std": 111.82829284667969,
"step": 255
},
{
"epoch": 0.5361256544502618,
"fcm_dpo/beta": 0.007091246545314789,
"fcm_dpo/delta": -0.07956443727016449,
"fcm_dpo/margin": 65.31961822509766,
"fcm_dpo/q_t": 0.3946439325809479,
"grad_norm": 143.94879150390625,
"learning_rate": 2.6372383496608186e-07,
"logits/chosen": -0.8587058782577515,
"logits/rejected": -0.8423352837562561,
"logps/chosen": -442.55224609375,
"logps/ref_chosen": -323.7181701660156,
"logps/ref_rejected": -254.1871337890625,
"logps/rejected": -438.3408203125,
"loss": 4.5335,
"margin_dpo/margin_mean": 65.31961059570312,
"margin_dpo/margin_std": 108.02703094482422,
"step": 256
},
{
"epoch": 0.5382198952879581,
"fcm_dpo/beta": 0.007004152052104473,
"fcm_dpo/delta": 0.047338955104351044,
"fcm_dpo/margin": 67.73786926269531,
"fcm_dpo/q_t": 0.39716964960098267,
"grad_norm": 107.15489959716797,
"learning_rate": 2.618954789559356e-07,
"logits/chosen": -0.8520160913467407,
"logits/rejected": -0.8423279523849487,
"logps/chosen": -389.7987365722656,
"logps/ref_chosen": -267.21209716796875,
"logps/ref_rejected": -249.12579345703125,
"logps/rejected": -439.4503173828125,
"loss": 4.3327,
"margin_dpo/margin_mean": 67.73786926269531,
"margin_dpo/margin_std": 101.2127456665039,
"step": 257
},
{
"epoch": 0.5403141361256545,
"fcm_dpo/beta": 0.007009489927440882,
"fcm_dpo/delta": -0.022928498685359955,
"fcm_dpo/margin": 67.92435455322266,
"fcm_dpo/q_t": 0.39396271109580994,
"grad_norm": 94.51057434082031,
"learning_rate": 2.600664850273538e-07,
"logits/chosen": -0.8741627335548401,
"logits/rejected": -0.8431136608123779,
"logps/chosen": -408.9303894042969,
"logps/ref_chosen": -277.6827392578125,
"logps/ref_rejected": -250.73385620117188,
"logps/rejected": -449.90582275390625,
"loss": 4.3035,
"margin_dpo/margin_mean": 67.92435455322266,
"margin_dpo/margin_std": 93.82572937011719,
"step": 258
},
{
"epoch": 0.5424083769633508,
"fcm_dpo/beta": 0.007051707711070776,
"fcm_dpo/delta": 0.012171324342489243,
"fcm_dpo/margin": 62.30729675292969,
"fcm_dpo/q_t": 0.4041876196861267,
"grad_norm": 89.43299102783203,
"learning_rate": 2.582369512637302e-07,
"logits/chosen": -0.8937543630599976,
"logits/rejected": -0.8885373473167419,
"logps/chosen": -417.1524658203125,
"logps/ref_chosen": -294.6099853515625,
"logps/ref_rejected": -272.2725830078125,
"logps/rejected": -457.1224060058594,
"loss": 4.4307,
"margin_dpo/margin_mean": 62.30729675292969,
"margin_dpo/margin_std": 97.35728454589844,
"step": 259
},
{
"epoch": 0.5445026178010471,
"fcm_dpo/beta": 0.0070616197772324085,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 30.28175163269043,
"fcm_dpo/q_t": 0.45381462574005127,
"grad_norm": 80.50341796875,
"learning_rate": 2.5640697577740815e-07,
"logits/chosen": -0.8794345259666443,
"logits/rejected": -0.8733927011489868,
"logps/chosen": -418.9019775390625,
"logps/ref_chosen": -290.85711669921875,
"logps/ref_rejected": -277.5970153808594,
"logps/rejected": -435.9236755371094,
"loss": 5.1902,
"margin_dpo/margin_mean": 30.28175163269043,
"margin_dpo/margin_std": 95.94783782958984,
"step": 260
},
{
"epoch": 0.5465968586387434,
"fcm_dpo/beta": 0.0070557305589318275,
"fcm_dpo/delta": -0.003128012176603079,
"fcm_dpo/margin": 55.079566955566406,
"fcm_dpo/q_t": 0.41656798124313354,
"grad_norm": 77.27826690673828,
"learning_rate": 2.5457665670441937e-07,
"logits/chosen": -0.7756448984146118,
"logits/rejected": -0.7851826548576355,
"logps/chosen": -384.649169921875,
"logps/ref_chosen": -251.13223266601562,
"logps/ref_rejected": -244.76016235351562,
"logps/rejected": -433.3566589355469,
"loss": 4.6971,
"margin_dpo/margin_mean": 55.079559326171875,
"margin_dpo/margin_std": 107.6162109375,
"step": 261
},
{
"epoch": 0.5486910994764398,
"fcm_dpo/beta": 0.007122871000319719,
"fcm_dpo/delta": -0.004592832177877426,
"fcm_dpo/margin": 71.38398742675781,
"fcm_dpo/q_t": 0.3885217308998108,
"grad_norm": 100.85935974121094,
"learning_rate": 2.527460921992209e-07,
"logits/chosen": -0.8093084096908569,
"logits/rejected": -0.7977215051651001,
"logps/chosen": -422.3883972167969,
"logps/ref_chosen": -299.7217712402344,
"logps/ref_rejected": -277.0969543457031,
"logps/rejected": -471.1475524902344,
"loss": 4.2029,
"margin_dpo/margin_mean": 71.38399505615234,
"margin_dpo/margin_std": 94.25498962402344,
"step": 262
},
{
"epoch": 0.5507853403141362,
"fcm_dpo/beta": 0.0070396289229393005,
"fcm_dpo/delta": 0.003985295072197914,
"fcm_dpo/margin": 61.07171630859375,
"fcm_dpo/q_t": 0.4044322073459625,
"grad_norm": 107.648681640625,
"learning_rate": 2.509153804294318e-07,
"logits/chosen": -0.792377769947052,
"logits/rejected": -0.7732011079788208,
"logps/chosen": -410.0021667480469,
"logps/ref_chosen": -280.1349792480469,
"logps/ref_rejected": -256.7151184082031,
"logps/rejected": -447.654052734375,
"loss": 4.5065,
"margin_dpo/margin_mean": 61.07171630859375,
"margin_dpo/margin_std": 97.67864227294922,
"step": 263
},
{
"epoch": 0.5528795811518324,
"fcm_dpo/beta": 0.006962607614696026,
"fcm_dpo/delta": -0.005255230236798525,
"fcm_dpo/margin": 73.86559295654297,
"fcm_dpo/q_t": 0.388028621673584,
"grad_norm": 77.60498046875,
"learning_rate": 2.4908461957056825e-07,
"logits/chosen": -0.8271849155426025,
"logits/rejected": -0.821992039680481,
"logps/chosen": -383.2393798828125,
"logps/ref_chosen": -260.53509521484375,
"logps/ref_rejected": -255.53799438476562,
"logps/rejected": -452.1078796386719,
"loss": 4.2021,
"margin_dpo/margin_mean": 73.86559295654297,
"margin_dpo/margin_std": 98.95625305175781,
"step": 264
},
{
"epoch": 0.5549738219895288,
"fcm_dpo/beta": 0.006922694388777018,
"fcm_dpo/delta": -0.033377017825841904,
"fcm_dpo/margin": 74.40999603271484,
"fcm_dpo/q_t": 0.38931435346603394,
"grad_norm": 80.55166625976562,
"learning_rate": 2.4725390780077905e-07,
"logits/chosen": -0.8924263119697571,
"logits/rejected": -0.8955501317977905,
"logps/chosen": -409.32769775390625,
"logps/ref_chosen": -283.7130432128906,
"logps/ref_rejected": -270.3209533691406,
"logps/rejected": -470.3455505371094,
"loss": 4.2945,
"margin_dpo/margin_mean": 74.40999603271484,
"margin_dpo/margin_std": 109.31571960449219,
"step": 265
},
{
"epoch": 0.5570680628272251,
"fcm_dpo/beta": 0.006731396075338125,
"fcm_dpo/delta": -0.037259288132190704,
"fcm_dpo/margin": 73.79714965820312,
"fcm_dpo/q_t": 0.38967522978782654,
"grad_norm": 68.00286865234375,
"learning_rate": 2.454233432955807e-07,
"logits/chosen": -0.9042876362800598,
"logits/rejected": -0.8750791549682617,
"logps/chosen": -390.12872314453125,
"logps/ref_chosen": -278.09930419921875,
"logps/ref_rejected": -260.6734619140625,
"logps/rejected": -446.5,
"loss": 4.1997,
"margin_dpo/margin_mean": 73.79714965820312,
"margin_dpo/margin_std": 93.62163543701172,
"step": 266
},
{
"epoch": 0.5591623036649215,
"fcm_dpo/beta": 0.006746482569724321,
"fcm_dpo/delta": 0.04334060102701187,
"fcm_dpo/margin": 60.28854751586914,
"fcm_dpo/q_t": 0.40969720482826233,
"grad_norm": 72.49388885498047,
"learning_rate": 2.435930242225919e-07,
"logits/chosen": -0.8423041701316833,
"logits/rejected": -0.8496881127357483,
"logps/chosen": -408.0243225097656,
"logps/ref_chosen": -280.33319091796875,
"logps/ref_rejected": -247.78099060058594,
"logps/rejected": -435.7605895996094,
"loss": 4.5149,
"margin_dpo/margin_mean": 60.28854751586914,
"margin_dpo/margin_std": 97.47137451171875,
"step": 267
},
{
"epoch": 0.5612565445026177,
"fcm_dpo/beta": 0.006985923275351524,
"fcm_dpo/delta": 0.02039477974176407,
"fcm_dpo/margin": 73.69112396240234,
"fcm_dpo/q_t": 0.38772106170654297,
"grad_norm": 83.59169006347656,
"learning_rate": 2.4176304873626984e-07,
"logits/chosen": -0.8112634420394897,
"logits/rejected": -0.7900372743606567,
"logps/chosen": -424.6039733886719,
"logps/ref_chosen": -304.1787109375,
"logps/ref_rejected": -272.80316162109375,
"logps/rejected": -466.9195556640625,
"loss": 4.2079,
"margin_dpo/margin_mean": 73.69112396240234,
"margin_dpo/margin_std": 99.36200714111328,
"step": 268
},
{
"epoch": 0.5633507853403141,
"fcm_dpo/beta": 0.007065373472869396,
"fcm_dpo/delta": 0.04509638249874115,
"fcm_dpo/margin": 58.13600158691406,
"fcm_dpo/q_t": 0.4082724452018738,
"grad_norm": 88.63798522949219,
"learning_rate": 2.399335149726463e-07,
"logits/chosen": -0.8427159190177917,
"logits/rejected": -0.8367486000061035,
"logps/chosen": -372.8246154785156,
"logps/ref_chosen": -249.84512329101562,
"logps/ref_rejected": -223.37356567382812,
"logps/rejected": -404.4891052246094,
"loss": 4.5646,
"margin_dpo/margin_mean": 58.13600158691406,
"margin_dpo/margin_std": 101.92466735839844,
"step": 269
},
{
"epoch": 0.5654450261780105,
"fcm_dpo/beta": 0.007279932964593172,
"fcm_dpo/delta": 0.024306561797857285,
"fcm_dpo/margin": 65.43903350830078,
"fcm_dpo/q_t": 0.3980584740638733,
"grad_norm": 96.99080657958984,
"learning_rate": 2.381045210440644e-07,
"logits/chosen": -0.9254723787307739,
"logits/rejected": -0.9346519708633423,
"logps/chosen": -450.68603515625,
"logps/ref_chosen": -318.5623779296875,
"logps/ref_rejected": -281.1880798339844,
"logps/rejected": -478.7507629394531,
"loss": 4.4724,
"margin_dpo/margin_mean": 65.43903350830078,
"margin_dpo/margin_std": 112.69806671142578,
"step": 270
},
{
"epoch": 0.5675392670157068,
"fcm_dpo/beta": 0.0073170713149011135,
"fcm_dpo/delta": -0.024393264204263687,
"fcm_dpo/margin": 55.638763427734375,
"fcm_dpo/q_t": 0.41326209902763367,
"grad_norm": 92.29756164550781,
"learning_rate": 2.3627616503391812e-07,
"logits/chosen": -0.7708844542503357,
"logits/rejected": -0.7646578550338745,
"logps/chosen": -415.815673828125,
"logps/ref_chosen": -284.104736328125,
"logps/ref_rejected": -253.9580535888672,
"logps/rejected": -441.3077697753906,
"loss": 4.5874,
"margin_dpo/margin_mean": 55.638763427734375,
"margin_dpo/margin_std": 99.02946472167969,
"step": 271
},
{
"epoch": 0.5696335078534032,
"fcm_dpo/beta": 0.007198906969279051,
"fcm_dpo/delta": -0.004485009238123894,
"fcm_dpo/margin": 64.96874237060547,
"fcm_dpo/q_t": 0.39990508556365967,
"grad_norm": 78.74405670166016,
"learning_rate": 2.344485449913914e-07,
"logits/chosen": -0.8817507028579712,
"logits/rejected": -0.8673537969589233,
"logps/chosen": -423.4478759765625,
"logps/ref_chosen": -297.3590087890625,
"logps/ref_rejected": -279.20196533203125,
"logps/rejected": -470.2595520019531,
"loss": 4.5042,
"margin_dpo/margin_mean": 64.96874237060547,
"margin_dpo/margin_std": 112.7622299194336,
"step": 272
},
{
"epoch": 0.5717277486910994,
"fcm_dpo/beta": 0.007184017449617386,
"fcm_dpo/delta": 0.005371536128222942,
"fcm_dpo/margin": 68.41497039794922,
"fcm_dpo/q_t": 0.3951926827430725,
"grad_norm": 102.56245422363281,
"learning_rate": 2.3262175892620062e-07,
"logits/chosen": -0.86268150806427,
"logits/rejected": -0.8752706050872803,
"logps/chosen": -420.91461181640625,
"logps/ref_chosen": -293.20574951171875,
"logps/ref_rejected": -274.7646789550781,
"logps/rejected": -470.88848876953125,
"loss": 4.4026,
"margin_dpo/margin_mean": 68.41497039794922,
"margin_dpo/margin_std": 110.33782958984375,
"step": 273
},
{
"epoch": 0.5738219895287958,
"fcm_dpo/beta": 0.007152612321078777,
"fcm_dpo/delta": -0.028561905026435852,
"fcm_dpo/margin": 83.62451934814453,
"fcm_dpo/q_t": 0.37009555101394653,
"grad_norm": 108.42113494873047,
"learning_rate": 2.3079590480333827e-07,
"logits/chosen": -0.8165264129638672,
"logits/rejected": -0.7849279046058655,
"logps/chosen": -393.9966125488281,
"logps/ref_chosen": -270.55865478515625,
"logps/ref_rejected": -239.47048950195312,
"logps/rejected": -446.5329284667969,
"loss": 3.9903,
"margin_dpo/margin_mean": 83.62451934814453,
"margin_dpo/margin_std": 102.3349838256836,
"step": 274
},
{
"epoch": 0.5759162303664922,
"fcm_dpo/beta": 0.006889094598591328,
"fcm_dpo/delta": -0.037462376058101654,
"fcm_dpo/margin": 80.98619842529297,
"fcm_dpo/q_t": 0.379713773727417,
"grad_norm": 81.02855682373047,
"learning_rate": 2.2897108053782e-07,
"logits/chosen": -0.864743709564209,
"logits/rejected": -0.8470298647880554,
"logps/chosen": -368.32879638671875,
"logps/ref_chosen": -250.4369354248047,
"logps/ref_rejected": -249.5605926513672,
"logps/rejected": -448.4385986328125,
"loss": 4.0944,
"margin_dpo/margin_mean": 80.98619842529297,
"margin_dpo/margin_std": 103.47311401367188,
"step": 275
},
{
"epoch": 0.5780104712041885,
"fcm_dpo/beta": 0.006674672476947308,
"fcm_dpo/delta": -0.04028826206922531,
"fcm_dpo/margin": 74.76463317871094,
"fcm_dpo/q_t": 0.39007264375686646,
"grad_norm": 83.3016128540039,
"learning_rate": 2.2714738398943308e-07,
"logits/chosen": -0.9309563636779785,
"logits/rejected": -0.9080483913421631,
"logps/chosen": -423.72235107421875,
"logps/ref_chosen": -297.8566589355469,
"logps/ref_rejected": -295.5954895019531,
"logps/rejected": -496.225830078125,
"loss": 4.2971,
"margin_dpo/margin_mean": 74.76463317871094,
"margin_dpo/margin_std": 108.16291809082031,
"step": 276
},
{
"epoch": 0.5801047120418849,
"fcm_dpo/beta": 0.0066186352632939816,
"fcm_dpo/delta": -0.004491167608648539,
"fcm_dpo/margin": 54.66672897338867,
"fcm_dpo/q_t": 0.42031899094581604,
"grad_norm": 104.00203704833984,
"learning_rate": 2.2532491295748865e-07,
"logits/chosen": -0.8646161556243896,
"logits/rejected": -0.8646143078804016,
"logps/chosen": -405.98822021484375,
"logps/ref_chosen": -266.3604736328125,
"logps/ref_rejected": -253.36767578125,
"logps/rejected": -447.662109375,
"loss": 4.7764,
"margin_dpo/margin_mean": 54.66672897338867,
"margin_dpo/margin_std": 113.69486999511719,
"step": 277
},
{
"epoch": 0.5821989528795811,
"fcm_dpo/beta": 0.006715740542858839,
"fcm_dpo/delta": 0.02268362231552601,
"fcm_dpo/margin": 46.8961181640625,
"fcm_dpo/q_t": 0.4369484484195709,
"grad_norm": 140.3434295654297,
"learning_rate": 2.2350376517557726e-07,
"logits/chosen": -0.899573802947998,
"logits/rejected": -0.8620951771736145,
"logps/chosen": -416.90313720703125,
"logps/ref_chosen": -267.40728759765625,
"logps/ref_rejected": -229.5758514404297,
"logps/rejected": -425.96783447265625,
"loss": 5.0277,
"margin_dpo/margin_mean": 46.8961181640625,
"margin_dpo/margin_std": 120.51719665527344,
"step": 278
},
{
"epoch": 0.5842931937172775,
"fcm_dpo/beta": 0.006686557084321976,
"fcm_dpo/delta": -0.02701484225690365,
"fcm_dpo/margin": 83.11595153808594,
"fcm_dpo/q_t": 0.38234850764274597,
"grad_norm": 93.99402618408203,
"learning_rate": 2.2168403830632769e-07,
"logits/chosen": -0.809998095035553,
"logits/rejected": -0.7943370342254639,
"logps/chosen": -445.050537109375,
"logps/ref_chosen": -313.3677978515625,
"logps/ref_rejected": -299.1744384765625,
"logps/rejected": -513.97314453125,
"loss": 4.1802,
"margin_dpo/margin_mean": 83.11595153808594,
"margin_dpo/margin_std": 115.72819519042969,
"step": 279
},
{
"epoch": 0.5863874345549738,
"fcm_dpo/beta": 0.006766310427337885,
"fcm_dpo/delta": 0.05007310211658478,
"fcm_dpo/margin": 66.72462463378906,
"fcm_dpo/q_t": 0.40197527408599854,
"grad_norm": 77.47269439697266,
"learning_rate": 2.1986582993616925e-07,
"logits/chosen": -0.870246410369873,
"logits/rejected": -0.8812520503997803,
"logps/chosen": -391.3334045410156,
"logps/ref_chosen": -265.5558166503906,
"logps/ref_rejected": -247.1573944091797,
"logps/rejected": -439.6595764160156,
"loss": 4.4456,
"margin_dpo/margin_mean": 66.72462463378906,
"margin_dpo/margin_std": 109.50012969970703,
"step": 280
},
{
"epoch": 0.5884816753926702,
"fcm_dpo/beta": 0.006822681520134211,
"fcm_dpo/delta": -0.020259760320186615,
"fcm_dpo/margin": 71.36731719970703,
"fcm_dpo/q_t": 0.3979014456272125,
"grad_norm": 104.89105224609375,
"learning_rate": 2.1804923757009882e-07,
"logits/chosen": -0.855800986289978,
"logits/rejected": -0.8650408983230591,
"logps/chosen": -444.5128479003906,
"logps/ref_chosen": -295.2995910644531,
"logps/ref_rejected": -293.80877685546875,
"logps/rejected": -514.3893432617188,
"loss": 4.403,
"margin_dpo/margin_mean": 71.36732482910156,
"margin_dpo/margin_std": 113.76239776611328,
"step": 281
},
{
"epoch": 0.5905759162303665,
"fcm_dpo/beta": 0.006766719743609428,
"fcm_dpo/delta": -0.0025003692135214806,
"fcm_dpo/margin": 69.98826599121094,
"fcm_dpo/q_t": 0.3973071575164795,
"grad_norm": 71.5643539428711,
"learning_rate": 2.1623435862645205e-07,
"logits/chosen": -0.8500492572784424,
"logits/rejected": -0.8468190431594849,
"logps/chosen": -454.17938232421875,
"logps/ref_chosen": -318.63714599609375,
"logps/ref_rejected": -273.5943603515625,
"logps/rejected": -479.1248779296875,
"loss": 4.3813,
"margin_dpo/margin_mean": 69.98826599121094,
"margin_dpo/margin_std": 107.60038757324219,
"step": 282
},
{
"epoch": 0.5926701570680628,
"fcm_dpo/beta": 0.006712625734508038,
"fcm_dpo/delta": -0.015935653820633888,
"fcm_dpo/margin": 65.22550201416016,
"fcm_dpo/q_t": 0.40675878524780273,
"grad_norm": 72.51644134521484,
"learning_rate": 2.1442129043167873e-07,
"logits/chosen": -0.8635268807411194,
"logits/rejected": -0.8540130853652954,
"logps/chosen": -392.47882080078125,
"logps/ref_chosen": -255.11477661132812,
"logps/ref_rejected": -236.97372436523438,
"logps/rejected": -439.5632629394531,
"loss": 4.4794,
"margin_dpo/margin_mean": 65.22550201416016,
"margin_dpo/margin_std": 107.16011810302734,
"step": 283
},
{
"epoch": 0.5947643979057592,
"fcm_dpo/beta": 0.0064870212227106094,
"fcm_dpo/delta": -0.039267394691705704,
"fcm_dpo/margin": 74.81352996826172,
"fcm_dpo/q_t": 0.3955889046192169,
"grad_norm": 100.19154357910156,
"learning_rate": 2.1261013021512378e-07,
"logits/chosen": -0.8406773209571838,
"logits/rejected": -0.8125337958335876,
"logps/chosen": -415.146728515625,
"logps/ref_chosen": -273.355224609375,
"logps/ref_rejected": -259.84759521484375,
"logps/rejected": -476.45263671875,
"loss": 4.3948,
"margin_dpo/margin_mean": 74.81352996826172,
"margin_dpo/margin_std": 116.77448272705078,
"step": 284
},
{
"epoch": 0.5968586387434555,
"fcm_dpo/beta": 0.006448796950280666,
"fcm_dpo/delta": 0.005233362317085266,
"fcm_dpo/margin": 48.80956268310547,
"fcm_dpo/q_t": 0.4320613145828247,
"grad_norm": 115.82734680175781,
"learning_rate": 2.1080097510381294e-07,
"logits/chosen": -0.8477023839950562,
"logits/rejected": -0.8431457281112671,
"logps/chosen": -455.9388732910156,
"logps/ref_chosen": -309.8022155761719,
"logps/ref_rejected": -279.11846923828125,
"logps/rejected": -474.064697265625,
"loss": 4.9064,
"margin_dpo/margin_mean": 48.80955505371094,
"margin_dpo/margin_std": 111.24368286132812,
"step": 285
},
{
"epoch": 0.5989528795811518,
"fcm_dpo/beta": 0.006532514467835426,
"fcm_dpo/delta": 0.0254356786608696,
"fcm_dpo/margin": 65.45335388183594,
"fcm_dpo/q_t": 0.4098204970359802,
"grad_norm": 80.22543334960938,
"learning_rate": 2.089939221172446e-07,
"logits/chosen": -0.8206965923309326,
"logits/rejected": -0.8110418915748596,
"logps/chosen": -408.1368408203125,
"logps/ref_chosen": -271.4655456542969,
"logps/ref_rejected": -279.531494140625,
"logps/rejected": -481.6561279296875,
"loss": 4.5244,
"margin_dpo/margin_mean": 65.45335388183594,
"margin_dpo/margin_std": 113.59163665771484,
"step": 286
},
{
"epoch": 0.6010471204188481,
"fcm_dpo/beta": 0.00667279027402401,
"fcm_dpo/delta": 0.019707411527633667,
"fcm_dpo/margin": 64.98822021484375,
"fcm_dpo/q_t": 0.40575066208839417,
"grad_norm": 79.20893096923828,
"learning_rate": 2.0718906816218595e-07,
"logits/chosen": -0.856654942035675,
"logits/rejected": -0.841168224811554,
"logps/chosen": -412.86700439453125,
"logps/ref_chosen": -277.0932312011719,
"logps/ref_rejected": -233.55599975585938,
"logps/rejected": -434.3179931640625,
"loss": 4.5597,
"margin_dpo/margin_mean": 64.98822021484375,
"margin_dpo/margin_std": 114.76084899902344,
"step": 287
},
{
"epoch": 0.6031413612565445,
"fcm_dpo/beta": 0.006710154935717583,
"fcm_dpo/delta": -0.014013386331498623,
"fcm_dpo/margin": 61.08485412597656,
"fcm_dpo/q_t": 0.4106292426586151,
"grad_norm": 92.16407775878906,
"learning_rate": 2.053865100274774e-07,
"logits/chosen": -0.85230553150177,
"logits/rejected": -0.8594391942024231,
"logps/chosen": -425.701171875,
"logps/ref_chosen": -293.1681823730469,
"logps/ref_rejected": -263.4059143066406,
"logps/rejected": -457.0237731933594,
"loss": 4.584,
"margin_dpo/margin_mean": 61.08485794067383,
"margin_dpo/margin_std": 109.07479858398438,
"step": 288
},
{
"epoch": 0.6052356020942409,
"fcm_dpo/beta": 0.006692454218864441,
"fcm_dpo/delta": 0.01522915530949831,
"fcm_dpo/margin": 40.210289001464844,
"fcm_dpo/q_t": 0.4406478703022003,
"grad_norm": 97.82292938232422,
"learning_rate": 2.035863443788411e-07,
"logits/chosen": -0.849497377872467,
"logits/rejected": -0.8317880034446716,
"logps/chosen": -478.8475341796875,
"logps/ref_chosen": -329.9574279785156,
"logps/ref_rejected": -276.7565002441406,
"logps/rejected": -465.85687255859375,
"loss": 5.023,
"margin_dpo/margin_mean": 40.210289001464844,
"margin_dpo/margin_std": 104.00145721435547,
"step": 289
},
{
"epoch": 0.6073298429319371,
"fcm_dpo/beta": 0.0065245069563388824,
"fcm_dpo/delta": -0.04471207410097122,
"fcm_dpo/margin": 57.04399108886719,
"fcm_dpo/q_t": 0.4190434217453003,
"grad_norm": 83.78768157958984,
"learning_rate": 2.0178866775369774e-07,
"logits/chosen": -0.8456165194511414,
"logits/rejected": -0.7818999886512756,
"logps/chosen": -460.7046203613281,
"logps/ref_chosen": -324.6690673828125,
"logps/ref_rejected": -311.8439636230469,
"logps/rejected": -504.9234924316406,
"loss": 4.7357,
"margin_dpo/margin_mean": 57.043983459472656,
"margin_dpo/margin_std": 112.49860382080078,
"step": 290
},
{
"epoch": 0.6094240837696335,
"fcm_dpo/beta": 0.006406103260815144,
"fcm_dpo/delta": -0.016831081360578537,
"fcm_dpo/margin": 75.2476806640625,
"fcm_dpo/q_t": 0.3942800760269165,
"grad_norm": 80.28817749023438,
"learning_rate": 1.9999357655598891e-07,
"logits/chosen": -0.831132173538208,
"logits/rejected": -0.8223029971122742,
"logps/chosen": -400.536376953125,
"logps/ref_chosen": -275.1535949707031,
"logps/ref_rejected": -278.1832580566406,
"logps/rejected": -478.81365966796875,
"loss": 4.2825,
"margin_dpo/margin_mean": 75.24768829345703,
"margin_dpo/margin_std": 103.58267211914062,
"step": 291
},
{
"epoch": 0.6115183246073298,
"fcm_dpo/beta": 0.006372286006808281,
"fcm_dpo/delta": -0.007237437646836042,
"fcm_dpo/margin": 56.694847106933594,
"fcm_dpo/q_t": 0.4195069968700409,
"grad_norm": 73.26972198486328,
"learning_rate": 1.9820116705100775e-07,
"logits/chosen": -0.8270624876022339,
"logits/rejected": -0.8235753774642944,
"logps/chosen": -377.44439697265625,
"logps/ref_chosen": -259.3636779785156,
"logps/ref_rejected": -279.30218505859375,
"logps/rejected": -454.0777282714844,
"loss": 4.6624,
"margin_dpo/margin_mean": 56.694847106933594,
"margin_dpo/margin_std": 104.66506958007812,
"step": 292
},
{
"epoch": 0.6136125654450262,
"fcm_dpo/beta": 0.0063700140453875065,
"fcm_dpo/delta": 0.003185997251421213,
"fcm_dpo/margin": 70.93012237548828,
"fcm_dpo/q_t": 0.40013647079467773,
"grad_norm": 87.3719711303711,
"learning_rate": 1.9641153536023642e-07,
"logits/chosen": -0.9205597639083862,
"logits/rejected": -0.8796571493148804,
"logps/chosen": -435.2584228515625,
"logps/ref_chosen": -303.77081298828125,
"logps/ref_rejected": -270.07513427734375,
"logps/rejected": -472.4928283691406,
"loss": 4.3374,
"margin_dpo/margin_mean": 70.93012237548828,
"margin_dpo/margin_std": 98.5417251586914,
"step": 293
},
{
"epoch": 0.6157068062827226,
"fcm_dpo/beta": 0.00646553561091423,
"fcm_dpo/delta": 0.018955595791339874,
"fcm_dpo/margin": 65.13241577148438,
"fcm_dpo/q_t": 0.4103822112083435,
"grad_norm": 82.27020263671875,
"learning_rate": 1.9462477745619106e-07,
"logits/chosen": -0.8257191181182861,
"logits/rejected": -0.8322641849517822,
"logps/chosen": -361.6537170410156,
"logps/ref_chosen": -240.23831176757812,
"logps/ref_rejected": -229.187744140625,
"logps/rejected": -415.735595703125,
"loss": 4.5112,
"margin_dpo/margin_mean": 65.1324234008789,
"margin_dpo/margin_std": 109.38378143310547,
"step": 294
},
{
"epoch": 0.6178010471204188,
"fcm_dpo/beta": 0.006473129615187645,
"fcm_dpo/delta": 0.013521851040422916,
"fcm_dpo/margin": 63.43254852294922,
"fcm_dpo/q_t": 0.40921661257743835,
"grad_norm": 76.50547790527344,
"learning_rate": 1.928409891572757e-07,
"logits/chosen": -0.8179617524147034,
"logits/rejected": -0.8252490758895874,
"logps/chosen": -374.97149658203125,
"logps/ref_chosen": -251.00970458984375,
"logps/ref_rejected": -244.15142822265625,
"logps/rejected": -431.5457763671875,
"loss": 4.4736,
"margin_dpo/margin_mean": 63.432552337646484,
"margin_dpo/margin_std": 99.88431549072266,
"step": 295
},
{
"epoch": 0.6198952879581152,
"fcm_dpo/beta": 0.006484742276370525,
"fcm_dpo/delta": -0.02761462889611721,
"fcm_dpo/margin": 84.66267395019531,
"fcm_dpo/q_t": 0.3797982931137085,
"grad_norm": 83.58622741699219,
"learning_rate": 1.9106026612264315e-07,
"logits/chosen": -0.8179198503494263,
"logits/rejected": -0.7856135964393616,
"logps/chosen": -420.0888977050781,
"logps/ref_chosen": -293.880615234375,
"logps/ref_rejected": -283.4175720214844,
"logps/rejected": -494.28857421875,
"loss": 4.1604,
"margin_dpo/margin_mean": 84.66267395019531,
"margin_dpo/margin_std": 113.16719818115234,
"step": 296
},
{
"epoch": 0.6219895287958115,
"fcm_dpo/beta": 0.0063199070282280445,
"fcm_dpo/delta": -0.019577497616410255,
"fcm_dpo/margin": 59.113037109375,
"fcm_dpo/q_t": 0.41643232107162476,
"grad_norm": 80.03433227539062,
"learning_rate": 1.8928270384706582e-07,
"logits/chosen": -0.9052919745445251,
"logits/rejected": -0.8989733457565308,
"logps/chosen": -414.87054443359375,
"logps/ref_chosen": -289.4600830078125,
"logps/ref_rejected": -283.69110107421875,
"logps/rejected": -468.21453857421875,
"loss": 4.5989,
"margin_dpo/margin_mean": 59.113037109375,
"margin_dpo/margin_std": 100.17891693115234,
"step": 297
},
{
"epoch": 0.6240837696335079,
"fcm_dpo/beta": 0.006384821608662605,
"fcm_dpo/delta": 0.010201474651694298,
"fcm_dpo/margin": 74.46007537841797,
"fcm_dpo/q_t": 0.3973884880542755,
"grad_norm": 99.93316650390625,
"learning_rate": 1.875083976558136e-07,
"logits/chosen": -0.8152309060096741,
"logits/rejected": -0.8023860454559326,
"logps/chosen": -425.2400207519531,
"logps/ref_chosen": -306.5150146484375,
"logps/ref_rejected": -280.6969909667969,
"logps/rejected": -473.88201904296875,
"loss": 4.4122,
"margin_dpo/margin_mean": 74.46007537841797,
"margin_dpo/margin_std": 115.8766098022461,
"step": 298
},
{
"epoch": 0.6261780104712041,
"fcm_dpo/beta": 0.006350814364850521,
"fcm_dpo/delta": -0.002623580861836672,
"fcm_dpo/margin": 60.436031341552734,
"fcm_dpo/q_t": 0.41395753622055054,
"grad_norm": 88.62031555175781,
"learning_rate": 1.8573744269954297e-07,
"logits/chosen": -0.8021990060806274,
"logits/rejected": -0.7875962257385254,
"logps/chosen": -420.3141174316406,
"logps/ref_chosen": -281.36376953125,
"logps/ref_rejected": -270.39508056640625,
"logps/rejected": -469.7814636230469,
"loss": 4.5633,
"margin_dpo/margin_mean": 60.43603515625,
"margin_dpo/margin_std": 96.89127349853516,
"step": 299
},
{
"epoch": 0.6282722513089005,
"fcm_dpo/beta": 0.006370040588080883,
"fcm_dpo/delta": 0.02115996927022934,
"fcm_dpo/margin": 56.68406677246094,
"fcm_dpo/q_t": 0.42014020681381226,
"grad_norm": 143.75216674804688,
"learning_rate": 1.839699339491937e-07,
"logits/chosen": -0.8434377908706665,
"logits/rejected": -0.8197212219238281,
"logps/chosen": -457.9049377441406,
"logps/ref_chosen": -314.923095703125,
"logps/ref_rejected": -269.2027893066406,
"logps/rejected": -468.86871337890625,
"loss": 4.731,
"margin_dpo/margin_mean": 56.6840705871582,
"margin_dpo/margin_std": 112.98627471923828,
"step": 300
},
{
"epoch": 0.6303664921465969,
"fcm_dpo/beta": 0.006618153303861618,
"fcm_dpo/delta": 0.040116339921951294,
"fcm_dpo/margin": 62.580421447753906,
"fcm_dpo/q_t": 0.4097692668437958,
"grad_norm": 93.548828125,
"learning_rate": 1.8220596619089573e-07,
"logits/chosen": -0.857657253742218,
"logits/rejected": -0.857261061668396,
"logps/chosen": -413.1051330566406,
"logps/ref_chosen": -279.89453125,
"logps/ref_rejected": -271.6694641113281,
"logps/rejected": -467.4604797363281,
"loss": 4.5268,
"margin_dpo/margin_mean": 62.58042526245117,
"margin_dpo/margin_std": 104.89457702636719,
"step": 301
},
{
"epoch": 0.6324607329842932,
"fcm_dpo/beta": 0.006750874686986208,
"fcm_dpo/delta": 0.022394752129912376,
"fcm_dpo/margin": 75.39173889160156,
"fcm_dpo/q_t": 0.39047718048095703,
"grad_norm": 96.45508575439453,
"learning_rate": 1.8044563402088682e-07,
"logits/chosen": -0.8146281838417053,
"logits/rejected": -0.8003526926040649,
"logps/chosen": -402.07379150390625,
"logps/ref_chosen": -271.3318176269531,
"logps/ref_rejected": -256.5587158203125,
"logps/rejected": -462.6923828125,
"loss": 4.2878,
"margin_dpo/margin_mean": 75.39173889160156,
"margin_dpo/margin_std": 110.79779052734375,
"step": 302
},
{
"epoch": 0.6345549738219896,
"fcm_dpo/beta": 0.0067290207371115685,
"fcm_dpo/delta": -0.012645013630390167,
"fcm_dpo/margin": 69.51010131835938,
"fcm_dpo/q_t": 0.3968094289302826,
"grad_norm": 89.24269104003906,
"learning_rate": 1.7868903184043885e-07,
"logits/chosen": -0.8077495098114014,
"logits/rejected": -0.787011444568634,
"logps/chosen": -441.51629638671875,
"logps/ref_chosen": -304.88104248046875,
"logps/ref_rejected": -269.063720703125,
"logps/rejected": -475.2090759277344,
"loss": 4.4047,
"margin_dpo/margin_mean": 69.51010131835938,
"margin_dpo/margin_std": 107.004638671875,
"step": 303
},
{
"epoch": 0.6366492146596858,
"fcm_dpo/beta": 0.006841976661235094,
"fcm_dpo/delta": 0.033334143459796906,
"fcm_dpo/margin": 76.11368560791016,
"fcm_dpo/q_t": 0.3914816379547119,
"grad_norm": 96.88390350341797,
"learning_rate": 1.7693625385079574e-07,
"logits/chosen": -0.8111523389816284,
"logits/rejected": -0.8187903165817261,
"logps/chosen": -438.7404479980469,
"logps/ref_chosen": -290.7109680175781,
"logps/ref_rejected": -237.6885986328125,
"logps/rejected": -461.8317565917969,
"loss": 4.2888,
"margin_dpo/margin_mean": 76.11369323730469,
"margin_dpo/margin_std": 116.99392700195312,
"step": 304
},
{
"epoch": 0.6387434554973822,
"fcm_dpo/beta": 0.006396348122507334,
"fcm_dpo/delta": -0.1703343540430069,
"fcm_dpo/margin": 107.9814224243164,
"fcm_dpo/q_t": 0.3516058921813965,
"grad_norm": 75.3594741821289,
"learning_rate": 1.7518739404812155e-07,
"logits/chosen": -0.8580024242401123,
"logits/rejected": -0.8278228640556335,
"logps/chosen": -383.34393310546875,
"logps/ref_chosen": -256.4839782714844,
"logps/ref_rejected": -266.4063415527344,
"logps/rejected": -501.24761962890625,
"loss": 3.8056,
"margin_dpo/margin_mean": 107.9814224243164,
"margin_dpo/margin_std": 118.7427749633789,
"step": 305
},
{
"epoch": 0.6408376963350786,
"fcm_dpo/beta": 0.006036079488694668,
"fcm_dpo/delta": -0.0010613007470965385,
"fcm_dpo/margin": 65.81829833984375,
"fcm_dpo/q_t": 0.4118325114250183,
"grad_norm": 83.1937484741211,
"learning_rate": 1.7344254621846017e-07,
"logits/chosen": -0.8624471426010132,
"logits/rejected": -0.8450292944908142,
"logps/chosen": -458.81719970703125,
"logps/ref_chosen": -320.6492004394531,
"logps/ref_rejected": -273.36773681640625,
"logps/rejected": -477.35400390625,
"loss": 4.5335,
"margin_dpo/margin_mean": 65.81829833984375,
"margin_dpo/margin_std": 105.20728302001953,
"step": 306
},
{
"epoch": 0.6429319371727749,
"fcm_dpo/beta": 0.006111084017902613,
"fcm_dpo/delta": 0.03404655680060387,
"fcm_dpo/margin": 74.80149841308594,
"fcm_dpo/q_t": 0.39704886078834534,
"grad_norm": 86.36273956298828,
"learning_rate": 1.717018039327053e-07,
"logits/chosen": -0.794308602809906,
"logits/rejected": -0.8210631608963013,
"logps/chosen": -440.82342529296875,
"logps/ref_chosen": -279.4541931152344,
"logps/ref_rejected": -240.3796844482422,
"logps/rejected": -476.5504455566406,
"loss": 4.322,
"margin_dpo/margin_mean": 74.80149841308594,
"margin_dpo/margin_std": 101.4282455444336,
"step": 307
},
{
"epoch": 0.6450261780104712,
"fcm_dpo/beta": 0.006177757866680622,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 47.84044647216797,
"fcm_dpo/q_t": 0.4335402250289917,
"grad_norm": 89.86914825439453,
"learning_rate": 1.699652605415828e-07,
"logits/chosen": -0.8320671319961548,
"logits/rejected": -0.8438513278961182,
"logps/chosen": -472.46478271484375,
"logps/ref_chosen": -297.068359375,
"logps/ref_rejected": -258.83856201171875,
"logps/rejected": -482.0754089355469,
"loss": 4.8917,
"margin_dpo/margin_mean": 47.84044647216797,
"margin_dpo/margin_std": 108.76823425292969,
"step": 308
},
{
"epoch": 0.6471204188481675,
"fcm_dpo/beta": 0.006233610212802887,
"fcm_dpo/delta": 0.030657585710287094,
"fcm_dpo/margin": 79.33876037597656,
"fcm_dpo/q_t": 0.3936896026134491,
"grad_norm": 99.26750946044922,
"learning_rate": 1.6823300917064458e-07,
"logits/chosen": -0.8398002982139587,
"logits/rejected": -0.8489018678665161,
"logps/chosen": -445.3302001953125,
"logps/ref_chosen": -281.3881530761719,
"logps/ref_rejected": -262.458740234375,
"logps/rejected": -505.739501953125,
"loss": 4.2964,
"margin_dpo/margin_mean": 79.33875274658203,
"margin_dpo/margin_std": 117.96406555175781,
"step": 309
},
{
"epoch": 0.6492146596858639,
"fcm_dpo/beta": 0.0062689767219126225,
"fcm_dpo/delta": -0.023925358429551125,
"fcm_dpo/margin": 78.43304443359375,
"fcm_dpo/q_t": 0.39273297786712646,
"grad_norm": 157.27012634277344,
"learning_rate": 1.6650514271527465e-07,
"logits/chosen": -0.8444918394088745,
"logits/rejected": -0.8194589614868164,
"logps/chosen": -438.931884765625,
"logps/ref_chosen": -279.1872863769531,
"logps/ref_rejected": -261.8279724121094,
"logps/rejected": -500.0056457519531,
"loss": 4.2968,
"margin_dpo/margin_mean": 78.43304443359375,
"margin_dpo/margin_std": 111.59870910644531,
"step": 310
},
{
"epoch": 0.6513089005235602,
"fcm_dpo/beta": 0.006130394991487265,
"fcm_dpo/delta": -0.007703306153416634,
"fcm_dpo/margin": 75.38703918457031,
"fcm_dpo/q_t": 0.39765244722366333,
"grad_norm": 146.17376708984375,
"learning_rate": 1.647817538357072e-07,
"logits/chosen": -0.8305755853652954,
"logits/rejected": -0.8122124075889587,
"logps/chosen": -437.87200927734375,
"logps/ref_chosen": -271.39813232421875,
"logps/ref_rejected": -266.12701416015625,
"logps/rejected": -507.98797607421875,
"loss": 4.4273,
"margin_dpo/margin_mean": 75.38703155517578,
"margin_dpo/margin_std": 117.2852783203125,
"step": 311
},
{
"epoch": 0.6534031413612565,
"fcm_dpo/beta": 0.00600817333906889,
"fcm_dpo/delta": -0.0545642226934433,
"fcm_dpo/margin": 68.5428466796875,
"fcm_dpo/q_t": 0.4122428297996521,
"grad_norm": 104.90550231933594,
"learning_rate": 1.6306293495205755e-07,
"logits/chosen": -0.8464758396148682,
"logits/rejected": -0.8214608430862427,
"logps/chosen": -447.5267639160156,
"logps/ref_chosen": -282.3850402832031,
"logps/ref_rejected": -246.35389709472656,
"logps/rejected": -480.0384826660156,
"loss": 4.7234,
"margin_dpo/margin_mean": 68.5428466796875,
"margin_dpo/margin_std": 135.85427856445312,
"step": 312
},
{
"epoch": 0.6554973821989529,
"fcm_dpo/beta": 0.005932308733463287,
"fcm_dpo/delta": -0.02101931907236576,
"fcm_dpo/margin": 71.42918395996094,
"fcm_dpo/q_t": 0.41074472665786743,
"grad_norm": 75.15242004394531,
"learning_rate": 1.6134877823936607e-07,
"logits/chosen": -0.8891011476516724,
"logits/rejected": -0.8756020069122314,
"logps/chosen": -469.3253479003906,
"logps/ref_chosen": -303.630859375,
"logps/ref_rejected": -273.1156921386719,
"logps/rejected": -510.2393798828125,
"loss": 4.5956,
"margin_dpo/margin_mean": 71.42918395996094,
"margin_dpo/margin_std": 126.27899169921875,
"step": 313
},
{
"epoch": 0.6575916230366492,
"fcm_dpo/beta": 0.005879377480596304,
"fcm_dpo/delta": 0.035980284214019775,
"fcm_dpo/margin": 71.69800567626953,
"fcm_dpo/q_t": 0.40531933307647705,
"grad_norm": 102.78753662109375,
"learning_rate": 1.5963937562265522e-07,
"logits/chosen": -0.9064289331436157,
"logits/rejected": -0.8947293758392334,
"logps/chosen": -461.6485595703125,
"logps/ref_chosen": -302.3042907714844,
"logps/ref_rejected": -273.6416015625,
"logps/rejected": -504.6839294433594,
"loss": 4.4426,
"margin_dpo/margin_mean": 71.69800567626953,
"margin_dpo/margin_std": 109.42501831054688,
"step": 314
},
{
"epoch": 0.6596858638743456,
"fcm_dpo/beta": 0.005956702399998903,
"fcm_dpo/delta": -0.0020787278190255165,
"fcm_dpo/margin": 85.90567016601562,
"fcm_dpo/q_t": 0.3879070580005646,
"grad_norm": 84.1939697265625,
"learning_rate": 1.5793481877199943e-07,
"logits/chosen": -0.8775259256362915,
"logits/rejected": -0.860569953918457,
"logps/chosen": -459.0497741699219,
"logps/ref_chosen": -302.729248046875,
"logps/ref_rejected": -270.26910400390625,
"logps/rejected": -512.4953002929688,
"loss": 4.1999,
"margin_dpo/margin_mean": 85.90567016601562,
"margin_dpo/margin_std": 114.59932708740234,
"step": 315
},
{
"epoch": 0.6617801047120419,
"fcm_dpo/beta": 0.00605932530015707,
"fcm_dpo/delta": 0.009984731674194336,
"fcm_dpo/margin": 77.9808349609375,
"fcm_dpo/q_t": 0.39844048023223877,
"grad_norm": 106.31928253173828,
"learning_rate": 1.562351990976095e-07,
"logits/chosen": -0.8906347155570984,
"logits/rejected": -0.8755995035171509,
"logps/chosen": -471.20452880859375,
"logps/ref_chosen": -310.5706481933594,
"logps/ref_rejected": -272.9354553222656,
"logps/rejected": -511.5502014160156,
"loss": 4.3628,
"margin_dpo/margin_mean": 77.9808349609375,
"margin_dpo/margin_std": 119.30810546875,
"step": 316
},
{
"epoch": 0.6638743455497382,
"fcm_dpo/beta": 0.006037175189703703,
"fcm_dpo/delta": 0.010911202058196068,
"fcm_dpo/margin": 74.88856506347656,
"fcm_dpo/q_t": 0.39838525652885437,
"grad_norm": 84.3582992553711,
"learning_rate": 1.5454060774493065e-07,
"logits/chosen": -0.8910847902297974,
"logits/rejected": -0.860565185546875,
"logps/chosen": -394.9084777832031,
"logps/ref_chosen": -253.90036010742188,
"logps/ref_rejected": -218.74078369140625,
"logps/rejected": -434.6374206542969,
"loss": 4.3347,
"margin_dpo/margin_mean": 74.88856506347656,
"margin_dpo/margin_std": 103.74834442138672,
"step": 317
},
{
"epoch": 0.6659685863874345,
"fcm_dpo/beta": 0.0059028794057667255,
"fcm_dpo/delta": -0.04113336279988289,
"fcm_dpo/margin": 88.18386840820312,
"fcm_dpo/q_t": 0.3841785490512848,
"grad_norm": 70.52163696289062,
"learning_rate": 1.5285113558975427e-07,
"logits/chosen": -0.9013999700546265,
"logits/rejected": -0.8701552152633667,
"logps/chosen": -417.7796936035156,
"logps/ref_chosen": -270.8228759765625,
"logps/ref_rejected": -255.30972290039062,
"logps/rejected": -490.450439453125,
"loss": 4.153,
"margin_dpo/margin_mean": 88.18386840820312,
"margin_dpo/margin_std": 111.3894271850586,
"step": 318
},
{
"epoch": 0.6680628272251309,
"fcm_dpo/beta": 0.006000366993248463,
"fcm_dpo/delta": 0.0448153130710125,
"fcm_dpo/margin": 82.24055480957031,
"fcm_dpo/q_t": 0.38862344622612,
"grad_norm": 94.62660217285156,
"learning_rate": 1.5116687323334464e-07,
"logits/chosen": -0.8874606490135193,
"logits/rejected": -0.857952356338501,
"logps/chosen": -455.48486328125,
"logps/ref_chosen": -301.0028076171875,
"logps/ref_rejected": -242.39002990722656,
"logps/rejected": -479.1126708984375,
"loss": 4.1591,
"margin_dpo/margin_mean": 82.24055480957031,
"margin_dpo/margin_std": 99.47880554199219,
"step": 319
},
{
"epoch": 0.6701570680628273,
"fcm_dpo/beta": 0.00606071762740612,
"fcm_dpo/delta": -0.0073157744482159615,
"fcm_dpo/margin": 77.76814270019531,
"fcm_dpo/q_t": 0.398094117641449,
"grad_norm": 91.96961212158203,
"learning_rate": 1.4948791099758052e-07,
"logits/chosen": -0.8550815582275391,
"logits/rejected": -0.8503654599189758,
"logps/chosen": -452.3647155761719,
"logps/ref_chosen": -303.6225891113281,
"logps/ref_rejected": -280.85174560546875,
"logps/rejected": -507.362060546875,
"loss": 4.4294,
"margin_dpo/margin_mean": 77.76814270019531,
"margin_dpo/margin_std": 126.88796997070312,
"step": 320
},
{
"epoch": 0.6722513089005235,
"fcm_dpo/beta": 0.0060967146418988705,
"fcm_dpo/delta": 0.02567823976278305,
"fcm_dpo/margin": 54.03432083129883,
"fcm_dpo/q_t": 0.4288126230239868,
"grad_norm": 95.56733703613281,
"learning_rate": 1.478143389201113e-07,
"logits/chosen": -0.8824321031570435,
"logits/rejected": -0.847492516040802,
"logps/chosen": -452.32916259765625,
"logps/ref_chosen": -288.98583984375,
"logps/ref_rejected": -241.1822052001953,
"logps/rejected": -458.5599060058594,
"loss": 4.8267,
"margin_dpo/margin_mean": 54.03431701660156,
"margin_dpo/margin_std": 116.12944030761719,
"step": 321
},
{
"epoch": 0.6743455497382199,
"fcm_dpo/beta": 0.0063505470752716064,
"fcm_dpo/delta": 0.060941774398088455,
"fcm_dpo/margin": 70.42938995361328,
"fcm_dpo/q_t": 0.4040513336658478,
"grad_norm": 84.9507064819336,
"learning_rate": 1.461462467495284e-07,
"logits/chosen": -0.935436487197876,
"logits/rejected": -0.8963932394981384,
"logps/chosen": -473.46697998046875,
"logps/ref_chosen": -308.54345703125,
"logps/ref_rejected": -269.7995910644531,
"logps/rejected": -505.15252685546875,
"loss": 4.4664,
"margin_dpo/margin_mean": 70.42938995361328,
"margin_dpo/margin_std": 118.01498413085938,
"step": 322
},
{
"epoch": 0.6764397905759162,
"fcm_dpo/beta": 0.006490709725767374,
"fcm_dpo/delta": 0.0008104295702651143,
"fcm_dpo/margin": 55.20634841918945,
"fcm_dpo/q_t": 0.423994779586792,
"grad_norm": 96.36703491210938,
"learning_rate": 1.4448372394055246e-07,
"logits/chosen": -0.8880312442779541,
"logits/rejected": -0.8804765343666077,
"logps/chosen": -436.9565124511719,
"logps/ref_chosen": -279.49371337890625,
"logps/ref_rejected": -228.15521240234375,
"logps/rejected": -440.8243713378906,
"loss": 4.8032,
"margin_dpo/margin_mean": 55.20635223388672,
"margin_dpo/margin_std": 118.82220458984375,
"step": 323
},
{
"epoch": 0.6785340314136126,
"fcm_dpo/beta": 0.006320840213447809,
"fcm_dpo/delta": -0.018460873514413834,
"fcm_dpo/margin": 86.409912109375,
"fcm_dpo/q_t": 0.37887102365493774,
"grad_norm": 97.08597564697266,
"learning_rate": 1.428268596492364e-07,
"logits/chosen": -0.8197450041770935,
"logits/rejected": -0.8136438727378845,
"logps/chosen": -376.65020751953125,
"logps/ref_chosen": -239.33836364746094,
"logps/ref_rejected": -230.53775024414062,
"logps/rejected": -454.25946044921875,
"loss": 4.0627,
"margin_dpo/margin_mean": 86.409912109375,
"margin_dpo/margin_std": 101.99807739257812,
"step": 324
},
{
"epoch": 0.680628272251309,
"fcm_dpo/beta": 0.006354123819619417,
"fcm_dpo/delta": -0.0036203861236572266,
"fcm_dpo/margin": 73.38639068603516,
"fcm_dpo/q_t": 0.4013862907886505,
"grad_norm": 98.75489807128906,
"learning_rate": 1.4117574272818386e-07,
"logits/chosen": -0.8287428617477417,
"logits/rejected": -0.812412679195404,
"logps/chosen": -435.7538757324219,
"logps/ref_chosen": -280.62896728515625,
"logps/ref_rejected": -270.5085754394531,
"logps/rejected": -499.0198974609375,
"loss": 4.5095,
"margin_dpo/margin_mean": 73.38638305664062,
"margin_dpo/margin_std": 126.34809112548828,
"step": 325
},
{
"epoch": 0.6827225130890052,
"fcm_dpo/beta": 0.006422973703593016,
"fcm_dpo/delta": 0.002800529822707176,
"fcm_dpo/margin": 72.96824645996094,
"fcm_dpo/q_t": 0.39816251397132874,
"grad_norm": 101.16968536376953,
"learning_rate": 1.3953046172178413e-07,
"logits/chosen": -0.9406434297561646,
"logits/rejected": -0.9313357472419739,
"logps/chosen": -385.07867431640625,
"logps/ref_chosen": -240.9871368408203,
"logps/ref_rejected": -261.0238342285156,
"logps/rejected": -478.0836181640625,
"loss": 4.4055,
"margin_dpo/margin_mean": 72.96824645996094,
"margin_dpo/margin_std": 114.49446105957031,
"step": 326
},
{
"epoch": 0.6848167539267016,
"fcm_dpo/beta": 0.006266596727073193,
"fcm_dpo/delta": -0.023223016411066055,
"fcm_dpo/margin": 80.68010711669922,
"fcm_dpo/q_t": 0.38984158635139465,
"grad_norm": 122.5355224609375,
"learning_rate": 1.3789110486146468e-07,
"logits/chosen": -0.8904516696929932,
"logits/rejected": -0.8670768737792969,
"logps/chosen": -414.452880859375,
"logps/ref_chosen": -279.6148986816406,
"logps/ref_rejected": -269.76934814453125,
"logps/rejected": -485.2874755859375,
"loss": 4.2333,
"margin_dpo/margin_mean": 80.68010711669922,
"margin_dpo/margin_std": 110.3857421875,
"step": 327
},
{
"epoch": 0.6869109947643979,
"fcm_dpo/beta": 0.006192709319293499,
"fcm_dpo/delta": -0.014546114951372147,
"fcm_dpo/margin": 73.90266418457031,
"fcm_dpo/q_t": 0.39877283573150635,
"grad_norm": 123.79401397705078,
"learning_rate": 1.362577600609588e-07,
"logits/chosen": -0.8520888090133667,
"logits/rejected": -0.852110743522644,
"logps/chosen": -450.4490051269531,
"logps/ref_chosen": -301.033447265625,
"logps/ref_rejected": -284.2101135253906,
"logps/rejected": -507.5283203125,
"loss": 4.3332,
"margin_dpo/margin_mean": 73.90266418457031,
"margin_dpo/margin_std": 103.64661407470703,
"step": 328
},
{
"epoch": 0.6890052356020943,
"fcm_dpo/beta": 0.0062096007168293,
"fcm_dpo/delta": 0.012995388358831406,
"fcm_dpo/margin": 81.40901184082031,
"fcm_dpo/q_t": 0.39801162481307983,
"grad_norm": 98.26327514648438,
"learning_rate": 1.3463051491159093e-07,
"logits/chosen": -0.8586837649345398,
"logits/rejected": -0.8311326503753662,
"logps/chosen": -477.521484375,
"logps/ref_chosen": -319.9888610839844,
"logps/ref_rejected": -307.5588684082031,
"logps/rejected": -546.50048828125,
"loss": 4.4147,
"margin_dpo/margin_mean": 81.40901184082031,
"margin_dpo/margin_std": 134.6064453125,
"step": 329
},
{
"epoch": 0.6910994764397905,
"fcm_dpo/beta": 0.006210251711308956,
"fcm_dpo/delta": 0.0007845014333724976,
"fcm_dpo/margin": 74.684326171875,
"fcm_dpo/q_t": 0.3985758423805237,
"grad_norm": 118.70218658447266,
"learning_rate": 1.3300945667758012e-07,
"logits/chosen": -0.8644099831581116,
"logits/rejected": -0.8749092817306519,
"logps/chosen": -453.7188720703125,
"logps/ref_chosen": -301.11474609375,
"logps/ref_rejected": -299.673095703125,
"logps/rejected": -526.9615478515625,
"loss": 4.3426,
"margin_dpo/margin_mean": 74.684326171875,
"margin_dpo/margin_std": 108.2922592163086,
"step": 330
},
{
"epoch": 0.6931937172774869,
"fcm_dpo/beta": 0.0062918756157159805,
"fcm_dpo/delta": 0.02173735201358795,
"fcm_dpo/margin": 72.32933044433594,
"fcm_dpo/q_t": 0.4040902853012085,
"grad_norm": 102.2640151977539,
"learning_rate": 1.3139467229135998e-07,
"logits/chosen": -0.8764195442199707,
"logits/rejected": -0.8572993278503418,
"logps/chosen": -423.5179443359375,
"logps/ref_chosen": -277.59149169921875,
"logps/ref_rejected": -256.025634765625,
"logps/rejected": -474.2813720703125,
"loss": 4.5118,
"margin_dpo/margin_mean": 72.32933807373047,
"margin_dpo/margin_std": 127.8876953125,
"step": 331
},
{
"epoch": 0.6952879581151833,
"fcm_dpo/beta": 0.006289025768637657,
"fcm_dpo/delta": -0.021090295165777206,
"fcm_dpo/margin": 77.75308227539062,
"fcm_dpo/q_t": 0.3964523673057556,
"grad_norm": 79.96570587158203,
"learning_rate": 1.2978624834891626e-07,
"logits/chosen": -0.877373993396759,
"logits/rejected": -0.8500229716300964,
"logps/chosen": -421.5205993652344,
"logps/ref_chosen": -269.97369384765625,
"logps/ref_rejected": -235.03164672851562,
"logps/rejected": -464.3316345214844,
"loss": 4.4297,
"margin_dpo/margin_mean": 77.75308990478516,
"margin_dpo/margin_std": 127.36727142333984,
"step": 332
},
{
"epoch": 0.6973821989528796,
"fcm_dpo/beta": 0.006327156908810139,
"fcm_dpo/delta": 0.02856810763478279,
"fcm_dpo/margin": 65.4626693725586,
"fcm_dpo/q_t": 0.41039058566093445,
"grad_norm": 89.18975830078125,
"learning_rate": 1.281842711051438e-07,
"logits/chosen": -0.950103223323822,
"logits/rejected": -0.9125382304191589,
"logps/chosen": -451.1890869140625,
"logps/ref_chosen": -296.76300048828125,
"logps/ref_rejected": -265.97991943359375,
"logps/rejected": -485.86871337890625,
"loss": 4.5003,
"margin_dpo/margin_mean": 65.46266174316406,
"margin_dpo/margin_std": 108.16644287109375,
"step": 333
},
{
"epoch": 0.6994764397905759,
"fcm_dpo/beta": 0.006423701532185078,
"fcm_dpo/delta": -0.02974826470017433,
"fcm_dpo/margin": 74.7381362915039,
"fcm_dpo/q_t": 0.3955420255661011,
"grad_norm": 111.00814819335938,
"learning_rate": 1.2658882646922033e-07,
"logits/chosen": -0.8517099618911743,
"logits/rejected": -0.824093222618103,
"logps/chosen": -447.6474609375,
"logps/ref_chosen": -301.0367431640625,
"logps/ref_rejected": -268.87652587890625,
"logps/rejected": -490.225341796875,
"loss": 4.3765,
"margin_dpo/margin_mean": 74.7381362915039,
"margin_dpo/margin_std": 112.17715454101562,
"step": 334
},
{
"epoch": 0.7015706806282722,
"fcm_dpo/beta": 0.00610282551497221,
"fcm_dpo/delta": -0.03819268196821213,
"fcm_dpo/margin": 75.7378158569336,
"fcm_dpo/q_t": 0.3999744653701782,
"grad_norm": 98.96734619140625,
"learning_rate": 1.2500000000000005e-07,
"logits/chosen": -0.8547537922859192,
"logits/rejected": -0.8491874933242798,
"logps/chosen": -438.50799560546875,
"logps/ref_chosen": -276.13275146484375,
"logps/ref_rejected": -243.44203186035156,
"logps/rejected": -481.5550231933594,
"loss": 4.4533,
"margin_dpo/margin_mean": 75.7378158569336,
"margin_dpo/margin_std": 121.23136138916016,
"step": 335
},
{
"epoch": 0.7036649214659686,
"fcm_dpo/beta": 0.006105098873376846,
"fcm_dpo/delta": 0.011495206505060196,
"fcm_dpo/margin": 71.84475708007812,
"fcm_dpo/q_t": 0.41034749150276184,
"grad_norm": 88.5465316772461,
"learning_rate": 1.2341787690142435e-07,
"logits/chosen": -0.8739686012268066,
"logits/rejected": -0.8107472658157349,
"logps/chosen": -407.6484375,
"logps/ref_chosen": -246.2626495361328,
"logps/ref_rejected": -261.0617980957031,
"logps/rejected": -494.29241943359375,
"loss": 4.5309,
"margin_dpo/margin_mean": 71.8447494506836,
"margin_dpo/margin_std": 125.27745819091797,
"step": 336
},
{
"epoch": 0.7057591623036649,
"fcm_dpo/beta": 0.006118074059486389,
"fcm_dpo/delta": -0.026346998289227486,
"fcm_dpo/margin": 82.11372375488281,
"fcm_dpo/q_t": 0.39139044284820557,
"grad_norm": 101.76982879638672,
"learning_rate": 1.2184254201795363e-07,
"logits/chosen": -0.8743100166320801,
"logits/rejected": -0.8459343314170837,
"logps/chosen": -416.9528503417969,
"logps/ref_chosen": -266.9937744140625,
"logps/ref_rejected": -253.015625,
"logps/rejected": -485.08837890625,
"loss": 4.2292,
"margin_dpo/margin_mean": 82.11372375488281,
"margin_dpo/margin_std": 111.41446685791016,
"step": 337
},
{
"epoch": 0.7078534031413612,
"fcm_dpo/beta": 0.006007419899106026,
"fcm_dpo/delta": -0.009809102863073349,
"fcm_dpo/margin": 79.27864074707031,
"fcm_dpo/q_t": 0.39599183201789856,
"grad_norm": 140.00198364257812,
"learning_rate": 1.202740798300168e-07,
"logits/chosen": -0.9043620228767395,
"logits/rejected": -0.8862846493721008,
"logps/chosen": -422.32366943359375,
"logps/ref_chosen": -276.5925598144531,
"logps/ref_rejected": -233.979248046875,
"logps/rejected": -458.989013671875,
"loss": 4.3198,
"margin_dpo/margin_mean": 79.27864074707031,
"margin_dpo/margin_std": 114.81280517578125,
"step": 338
},
{
"epoch": 0.7099476439790576,
"fcm_dpo/beta": 0.00596853019669652,
"fcm_dpo/delta": -0.022641818970441818,
"fcm_dpo/margin": 87.88375854492188,
"fcm_dpo/q_t": 0.38571611046791077,
"grad_norm": 83.07054138183594,
"learning_rate": 1.1871257444948096e-07,
"logits/chosen": -0.9114202260971069,
"logits/rejected": -0.8988155722618103,
"logps/chosen": -456.2464599609375,
"logps/ref_chosen": -303.5277404785156,
"logps/ref_rejected": -283.11676025390625,
"logps/rejected": -523.71923828125,
"loss": 4.234,
"margin_dpo/margin_mean": 87.88375091552734,
"margin_dpo/margin_std": 123.4928970336914,
"step": 339
},
{
"epoch": 0.7120418848167539,
"fcm_dpo/beta": 0.0059357453137636185,
"fcm_dpo/delta": 0.029740605503320694,
"fcm_dpo/margin": 70.70509338378906,
"fcm_dpo/q_t": 0.4103008508682251,
"grad_norm": 124.46795654296875,
"learning_rate": 1.1715810961514072e-07,
"logits/chosen": -0.8569005131721497,
"logits/rejected": -0.8529561758041382,
"logps/chosen": -426.36224365234375,
"logps/ref_chosen": -261.773681640625,
"logps/ref_rejected": -259.6319580078125,
"logps/rejected": -494.9256286621094,
"loss": 4.6878,
"margin_dpo/margin_mean": 70.70509338378906,
"margin_dpo/margin_std": 138.91661071777344,
"step": 340
},
{
"epoch": 0.7141361256544503,
"fcm_dpo/beta": 0.006035798694938421,
"fcm_dpo/delta": -0.00834234245121479,
"fcm_dpo/margin": 62.31619644165039,
"fcm_dpo/q_t": 0.42033177614212036,
"grad_norm": 96.36731719970703,
"learning_rate": 1.1561076868822755e-07,
"logits/chosen": -0.8783236145973206,
"logits/rejected": -0.8495619297027588,
"logps/chosen": -501.4289245605469,
"logps/ref_chosen": -315.903564453125,
"logps/ref_rejected": -308.02392578125,
"logps/rejected": -555.865478515625,
"loss": 4.8825,
"margin_dpo/margin_mean": 62.31619644165039,
"margin_dpo/margin_std": 138.55262756347656,
"step": 341
},
{
"epoch": 0.7162303664921466,
"fcm_dpo/beta": 0.006027575582265854,
"fcm_dpo/delta": 0.009746946394443512,
"fcm_dpo/margin": 73.49700164794922,
"fcm_dpo/q_t": 0.401203453540802,
"grad_norm": 96.99530792236328,
"learning_rate": 1.1407063464793965e-07,
"logits/chosen": -0.8633178472518921,
"logits/rejected": -0.8587174415588379,
"logps/chosen": -425.6961669921875,
"logps/ref_chosen": -269.17864990234375,
"logps/ref_rejected": -260.8977355957031,
"logps/rejected": -490.9122619628906,
"loss": 4.3999,
"margin_dpo/margin_mean": 73.49700164794922,
"margin_dpo/margin_std": 109.63931274414062,
"step": 342
},
{
"epoch": 0.7183246073298429,
"fcm_dpo/beta": 0.0060882181860506535,
"fcm_dpo/delta": 0.041927412152290344,
"fcm_dpo/margin": 67.39126586914062,
"fcm_dpo/q_t": 0.4095425009727478,
"grad_norm": 78.4207763671875,
"learning_rate": 1.125377900869913e-07,
"logits/chosen": -0.8656107783317566,
"logits/rejected": -0.8401020169258118,
"logps/chosen": -472.4183654785156,
"logps/ref_chosen": -310.719970703125,
"logps/ref_rejected": -263.5224914550781,
"logps/rejected": -492.6121520996094,
"loss": 4.4919,
"margin_dpo/margin_mean": 67.3912582397461,
"margin_dpo/margin_std": 109.82903289794922,
"step": 343
},
{
"epoch": 0.7204188481675393,
"fcm_dpo/beta": 0.006175879389047623,
"fcm_dpo/delta": -0.02153255045413971,
"fcm_dpo/margin": 69.64826202392578,
"fcm_dpo/q_t": 0.40481624007225037,
"grad_norm": 87.6580581665039,
"learning_rate": 1.110123172071844e-07,
"logits/chosen": -0.8676539063453674,
"logits/rejected": -0.8484990000724792,
"logps/chosen": -464.42755126953125,
"logps/ref_chosen": -301.7999267578125,
"logps/ref_rejected": -257.9061584472656,
"logps/rejected": -490.1820373535156,
"loss": 4.5216,
"margin_dpo/margin_mean": 69.64826202392578,
"margin_dpo/margin_std": 116.88276672363281,
"step": 344
},
{
"epoch": 0.7225130890052356,
"fcm_dpo/beta": 0.006050711497664452,
"fcm_dpo/delta": -0.012871744111180305,
"fcm_dpo/margin": 62.111324310302734,
"fcm_dpo/q_t": 0.41702836751937866,
"grad_norm": 118.44066619873047,
"learning_rate": 1.09494297815e-07,
"logits/chosen": -0.87298983335495,
"logits/rejected": -0.8727879524230957,
"logps/chosen": -442.3889465332031,
"logps/ref_chosen": -283.0184326171875,
"logps/ref_rejected": -266.8457336425781,
"logps/rejected": -488.3275451660156,
"loss": 4.5952,
"margin_dpo/margin_mean": 62.111324310302734,
"margin_dpo/margin_std": 101.83731842041016,
"step": 345
},
{
"epoch": 0.724607329842932,
"fcm_dpo/beta": 0.0059198313392698765,
"fcm_dpo/delta": -0.045941807329654694,
"fcm_dpo/margin": 80.39420318603516,
"fcm_dpo/q_t": 0.3944595456123352,
"grad_norm": 89.89678955078125,
"learning_rate": 1.0798381331721107e-07,
"logits/chosen": -0.9523677825927734,
"logits/rejected": -0.9006531238555908,
"logps/chosen": -438.8220520019531,
"logps/ref_chosen": -268.44122314453125,
"logps/ref_rejected": -227.8225860595703,
"logps/rejected": -478.5976257324219,
"loss": 4.3652,
"margin_dpo/margin_mean": 80.39420318603516,
"margin_dpo/margin_std": 115.65848541259766,
"step": 346
},
{
"epoch": 0.7267015706806282,
"fcm_dpo/beta": 0.0058544836938381195,
"fcm_dpo/delta": 0.014904415234923363,
"fcm_dpo/margin": 64.3581314086914,
"fcm_dpo/q_t": 0.41530972719192505,
"grad_norm": 110.02278137207031,
"learning_rate": 1.0648094471651722e-07,
"logits/chosen": -0.8056429028511047,
"logits/rejected": -0.8316211104393005,
"logps/chosen": -434.23370361328125,
"logps/ref_chosen": -273.80743408203125,
"logps/ref_rejected": -243.77993774414062,
"logps/rejected": -468.5643310546875,
"loss": 4.5818,
"margin_dpo/margin_mean": 64.3581314086914,
"margin_dpo/margin_std": 106.8345947265625,
"step": 347
},
{
"epoch": 0.7287958115183246,
"fcm_dpo/beta": 0.005897823721170425,
"fcm_dpo/delta": 0.0033234686125069857,
"fcm_dpo/margin": 60.73371124267578,
"fcm_dpo/q_t": 0.42289185523986816,
"grad_norm": 77.89971923828125,
"learning_rate": 1.0498577260720048e-07,
"logits/chosen": -0.8974190950393677,
"logits/rejected": -0.8794293403625488,
"logps/chosen": -443.1033935546875,
"logps/ref_chosen": -285.64141845703125,
"logps/ref_rejected": -265.6270446777344,
"logps/rejected": -483.8227233886719,
"loss": 4.6911,
"margin_dpo/margin_mean": 60.73371505737305,
"margin_dpo/margin_std": 115.34396362304688,
"step": 348
},
{
"epoch": 0.7308900523560209,
"fcm_dpo/beta": 0.00578670809045434,
"fcm_dpo/delta": -0.043646618723869324,
"fcm_dpo/margin": 87.91434478759766,
"fcm_dpo/q_t": 0.387836754322052,
"grad_norm": 110.71875,
"learning_rate": 1.0349837717080347e-07,
"logits/chosen": -0.8430861234664917,
"logits/rejected": -0.8314425349235535,
"logps/chosen": -489.0180969238281,
"logps/ref_chosen": -328.3175048828125,
"logps/ref_rejected": -292.37872314453125,
"logps/rejected": -540.99365234375,
"loss": 4.2708,
"margin_dpo/margin_mean": 87.91434478759766,
"margin_dpo/margin_std": 124.25060272216797,
"step": 349
},
{
"epoch": 0.7329842931937173,
"fcm_dpo/beta": 0.005712728016078472,
"fcm_dpo/delta": 0.016727229580283165,
"fcm_dpo/margin": 73.64863586425781,
"fcm_dpo/q_t": 0.4075002670288086,
"grad_norm": 79.8434829711914,
"learning_rate": 1.0201883817182949e-07,
"logits/chosen": -0.8432740569114685,
"logits/rejected": -0.8538658618927002,
"logps/chosen": -465.37115478515625,
"logps/ref_chosen": -292.8046569824219,
"logps/ref_rejected": -250.35504150390625,
"logps/rejected": -496.5701904296875,
"loss": 4.5371,
"margin_dpo/margin_mean": 73.64864349365234,
"margin_dpo/margin_std": 125.77432250976562,
"step": 350
},
{
"epoch": 0.7350785340314137,
"fcm_dpo/beta": 0.0057805683463811874,
"fcm_dpo/delta": 0.0012550692772492766,
"fcm_dpo/margin": 47.94782257080078,
"fcm_dpo/q_t": 0.43993788957595825,
"grad_norm": 80.92696380615234,
"learning_rate": 1.0054723495346482e-07,
"logits/chosen": -0.9094070196151733,
"logits/rejected": -0.8915246725082397,
"logps/chosen": -473.46112060546875,
"logps/ref_chosen": -311.8890380859375,
"logps/ref_rejected": -263.59033203125,
"logps/rejected": -473.1102294921875,
"loss": 5.0341,
"margin_dpo/margin_mean": 47.94782257080078,
"margin_dpo/margin_std": 125.9432601928711,
"step": 351
},
{
"epoch": 0.7371727748691099,
"fcm_dpo/beta": 0.005751358810812235,
"fcm_dpo/delta": -0.01616637036204338,
"fcm_dpo/margin": 87.51070404052734,
"fcm_dpo/q_t": 0.3887549042701721,
"grad_norm": 91.40033721923828,
"learning_rate": 9.908364643332398e-08,
"logits/chosen": -0.8355797529220581,
"logits/rejected": -0.8050141334533691,
"logps/chosen": -406.9665222167969,
"logps/ref_chosen": -254.9078826904297,
"logps/ref_rejected": -257.1688232421875,
"logps/rejected": -496.73822021484375,
"loss": 4.2611,
"margin_dpo/margin_mean": 87.51070404052734,
"margin_dpo/margin_std": 120.50152587890625,
"step": 352
},
{
"epoch": 0.7392670157068063,
"fcm_dpo/beta": 0.005663630552589893,
"fcm_dpo/delta": -0.017772413790225983,
"fcm_dpo/margin": 75.58662414550781,
"fcm_dpo/q_t": 0.4062352180480957,
"grad_norm": 95.64911651611328,
"learning_rate": 9.76281510992176e-08,
"logits/chosen": -0.8585754036903381,
"logits/rejected": -0.8523797988891602,
"logps/chosen": -433.67724609375,
"logps/ref_chosen": -270.3760681152344,
"logps/ref_rejected": -264.65234375,
"logps/rejected": -503.5401306152344,
"loss": 4.4941,
"margin_dpo/margin_mean": 75.58662414550781,
"margin_dpo/margin_std": 123.0322265625,
"step": 353
},
{
"epoch": 0.7413612565445026,
"fcm_dpo/beta": 0.005698430351912975,
"fcm_dpo/delta": 0.01707335188984871,
"fcm_dpo/margin": 56.518348693847656,
"fcm_dpo/q_t": 0.4294819235801697,
"grad_norm": 100.97528076171875,
"learning_rate": 9.618082700494318e-08,
"logits/chosen": -0.851939857006073,
"logits/rejected": -0.8844606876373291,
"logps/chosen": -422.6122741699219,
"logps/ref_chosen": -257.6485595703125,
"logps/ref_rejected": -246.94203186035156,
"logps/rejected": -468.4241027832031,
"loss": 4.8345,
"margin_dpo/margin_mean": 56.518348693847656,
"margin_dpo/margin_std": 121.96803283691406,
"step": 354
},
{
"epoch": 0.743455497382199,
"fcm_dpo/beta": 0.005763496737927198,
"fcm_dpo/delta": 0.02075032889842987,
"fcm_dpo/margin": 81.39944458007812,
"fcm_dpo/q_t": 0.3979400098323822,
"grad_norm": 102.7159423828125,
"learning_rate": 9.474175176609956e-08,
"logits/chosen": -0.9034559726715088,
"logits/rejected": -0.9011656641960144,
"logps/chosen": -457.8494567871094,
"logps/ref_chosen": -293.7086181640625,
"logps/ref_rejected": -275.7286682128906,
"logps/rejected": -521.2689208984375,
"loss": 4.467,
"margin_dpo/margin_mean": 81.39944458007812,
"margin_dpo/margin_std": 131.8922882080078,
"step": 355
},
{
"epoch": 0.7455497382198953,
"fcm_dpo/beta": 0.005846232175827026,
"fcm_dpo/delta": 0.005677876062691212,
"fcm_dpo/margin": 55.80099105834961,
"fcm_dpo/q_t": 0.426545649766922,
"grad_norm": 99.32606506347656,
"learning_rate": 9.331100255592436e-08,
"logits/chosen": -0.8223292231559753,
"logits/rejected": -0.8494030833244324,
"logps/chosen": -359.28802490234375,
"logps/ref_chosen": -204.25550842285156,
"logps/ref_rejected": -213.467529296875,
"logps/rejected": -424.301025390625,
"loss": 4.7212,
"margin_dpo/margin_mean": 55.80099105834961,
"margin_dpo/margin_std": 101.92318725585938,
"step": 356
},
{
"epoch": 0.7476439790575916,
"fcm_dpo/beta": 0.005853200796991587,
"fcm_dpo/delta": 0.01576152630150318,
"fcm_dpo/margin": 78.51569366455078,
"fcm_dpo/q_t": 0.40189990401268005,
"grad_norm": 88.03360748291016,
"learning_rate": 9.18886561011557e-08,
"logits/chosen": -0.7967926263809204,
"logits/rejected": -0.7948128581047058,
"logps/chosen": -430.37744140625,
"logps/ref_chosen": -266.3705749511719,
"logps/ref_rejected": -239.04490661621094,
"logps/rejected": -481.5674743652344,
"loss": 4.4341,
"margin_dpo/margin_mean": 78.51569366455078,
"margin_dpo/margin_std": 123.16831970214844,
"step": 357
},
{
"epoch": 0.749738219895288,
"fcm_dpo/beta": 0.005892930086702108,
"fcm_dpo/delta": -0.016687767580151558,
"fcm_dpo/margin": 88.47492980957031,
"fcm_dpo/q_t": 0.3852734863758087,
"grad_norm": 81.83648681640625,
"learning_rate": 9.047478867791731e-08,
"logits/chosen": -0.8925428986549377,
"logits/rejected": -0.8706269860267639,
"logps/chosen": -445.3536376953125,
"logps/ref_chosen": -299.1474609375,
"logps/ref_rejected": -257.2531433105469,
"logps/rejected": -491.93426513671875,
"loss": 4.2216,
"margin_dpo/margin_mean": 88.47492980957031,
"margin_dpo/margin_std": 120.04918670654297,
"step": 358
},
{
"epoch": 0.7518324607329843,
"fcm_dpo/beta": 0.005809912458062172,
"fcm_dpo/delta": -0.005308025516569614,
"fcm_dpo/margin": 76.4288101196289,
"fcm_dpo/q_t": 0.4017740488052368,
"grad_norm": 101.37205505371094,
"learning_rate": 8.906947610762825e-08,
"logits/chosen": -0.8550885915756226,
"logits/rejected": -0.8663427233695984,
"logps/chosen": -455.0647888183594,
"logps/ref_chosen": -302.99786376953125,
"logps/ref_rejected": -260.4137268066406,
"logps/rejected": -488.90948486328125,
"loss": 4.3942,
"margin_dpo/margin_mean": 76.42880249023438,
"margin_dpo/margin_std": 113.6546401977539,
"step": 359
},
{
"epoch": 0.7539267015706806,
"fcm_dpo/beta": 0.005714688450098038,
"fcm_dpo/delta": -0.03394751250743866,
"fcm_dpo/margin": 67.75115966796875,
"fcm_dpo/q_t": 0.41331568360328674,
"grad_norm": 96.14535522460938,
"learning_rate": 8.76727937529367e-08,
"logits/chosen": -0.8793231248855591,
"logits/rejected": -0.8659530878067017,
"logps/chosen": -471.56915283203125,
"logps/ref_chosen": -309.6114501953125,
"logps/ref_rejected": -256.64031982421875,
"logps/rejected": -486.34912109375,
"loss": 4.6733,
"margin_dpo/margin_mean": 67.75115966796875,
"margin_dpo/margin_std": 122.61837005615234,
"step": 360
},
{
"epoch": 0.7560209424083769,
"fcm_dpo/beta": 0.005799026228487492,
"fcm_dpo/delta": 0.03264398127794266,
"fcm_dpo/margin": 92.61182403564453,
"fcm_dpo/q_t": 0.38507190346717834,
"grad_norm": 77.22454071044922,
"learning_rate": 8.628481651367875e-08,
"logits/chosen": -0.8238570690155029,
"logits/rejected": -0.803720235824585,
"logps/chosen": -393.7602233886719,
"logps/ref_chosen": -263.3797607421875,
"logps/ref_rejected": -271.18157958984375,
"logps/rejected": -494.173828125,
"loss": 4.2488,
"margin_dpo/margin_mean": 92.61182403564453,
"margin_dpo/margin_std": 135.1205291748047,
"step": 361
},
{
"epoch": 0.7581151832460733,
"fcm_dpo/beta": 0.005834028124809265,
"fcm_dpo/delta": 0.024925608187913895,
"fcm_dpo/margin": 76.73521423339844,
"fcm_dpo/q_t": 0.40017956495285034,
"grad_norm": 77.04792022705078,
"learning_rate": 8.490561882286135e-08,
"logits/chosen": -0.8460214138031006,
"logits/rejected": -0.8339633345603943,
"logps/chosen": -447.5461730957031,
"logps/ref_chosen": -303.2583923339844,
"logps/ref_rejected": -243.22891235351562,
"logps/rejected": -464.2519226074219,
"loss": 4.3232,
"margin_dpo/margin_mean": 76.73521423339844,
"margin_dpo/margin_std": 105.83525848388672,
"step": 362
},
{
"epoch": 0.7602094240837697,
"fcm_dpo/beta": 0.005913248751312494,
"fcm_dpo/delta": -0.024684742093086243,
"fcm_dpo/margin": 77.5684814453125,
"fcm_dpo/q_t": 0.4012209177017212,
"grad_norm": 112.09042358398438,
"learning_rate": 8.353527464267104e-08,
"logits/chosen": -0.8637977838516235,
"logits/rejected": -0.8185281157493591,
"logps/chosen": -452.3515319824219,
"logps/ref_chosen": -303.34722900390625,
"logps/ref_rejected": -262.05419921875,
"logps/rejected": -488.62701416015625,
"loss": 4.4309,
"margin_dpo/margin_mean": 77.56847381591797,
"margin_dpo/margin_std": 123.60404205322266,
"step": 363
},
{
"epoch": 0.762303664921466,
"fcm_dpo/beta": 0.0058737266808748245,
"fcm_dpo/delta": 0.01749351993203163,
"fcm_dpo/margin": 67.9317855834961,
"fcm_dpo/q_t": 0.4149549603462219,
"grad_norm": 70.62501525878906,
"learning_rate": 8.217385746050742e-08,
"logits/chosen": -0.8313828706741333,
"logits/rejected": -0.8386605978012085,
"logps/chosen": -457.17083740234375,
"logps/ref_chosen": -285.54376220703125,
"logps/ref_rejected": -284.84619140625,
"logps/rejected": -524.405029296875,
"loss": 4.6634,
"margin_dpo/margin_mean": 67.93179321289062,
"margin_dpo/margin_std": 130.92474365234375,
"step": 364
},
{
"epoch": 0.7643979057591623,
"fcm_dpo/beta": 0.005943778902292252,
"fcm_dpo/delta": 0.03518182039260864,
"fcm_dpo/margin": 68.78251647949219,
"fcm_dpo/q_t": 0.4121231138706207,
"grad_norm": 114.49729919433594,
"learning_rate": 8.082144028504231e-08,
"logits/chosen": -0.8508257865905762,
"logits/rejected": -0.8534409403800964,
"logps/chosen": -434.00701904296875,
"logps/ref_chosen": -274.7878112792969,
"logps/ref_rejected": -256.5738220214844,
"logps/rejected": -484.5755310058594,
"loss": 4.575,
"margin_dpo/margin_mean": 68.78252410888672,
"margin_dpo/margin_std": 120.49612426757812,
"step": 365
},
{
"epoch": 0.7664921465968586,
"fcm_dpo/beta": 0.006143275648355484,
"fcm_dpo/delta": 0.024667471647262573,
"fcm_dpo/margin": 77.7815933227539,
"fcm_dpo/q_t": 0.3957550525665283,
"grad_norm": 73.5907974243164,
"learning_rate": 7.947809564230445e-08,
"logits/chosen": -0.8157333731651306,
"logits/rejected": -0.8252131342887878,
"logps/chosen": -433.6657409667969,
"logps/ref_chosen": -286.6496276855469,
"logps/ref_rejected": -251.97140502929688,
"logps/rejected": -476.7691650390625,
"loss": 4.3843,
"margin_dpo/margin_mean": 77.7815933227539,
"margin_dpo/margin_std": 122.67338562011719,
"step": 366
},
{
"epoch": 0.768586387434555,
"fcm_dpo/beta": 0.006196199916303158,
"fcm_dpo/delta": 0.016968997195363045,
"fcm_dpo/margin": 81.11161041259766,
"fcm_dpo/q_t": 0.391402930021286,
"grad_norm": 110.20975494384766,
"learning_rate": 7.814389557179016e-08,
"logits/chosen": -0.8344327211380005,
"logits/rejected": -0.8122835159301758,
"logps/chosen": -446.1802673339844,
"logps/ref_chosen": -301.9449768066406,
"logps/ref_rejected": -265.5677185058594,
"logps/rejected": -490.9145812988281,
"loss": 4.2666,
"margin_dpo/margin_mean": 81.11161041259766,
"margin_dpo/margin_std": 115.35000610351562,
"step": 367
},
{
"epoch": 0.7706806282722513,
"fcm_dpo/beta": 0.006140113342553377,
"fcm_dpo/delta": -0.05516147240996361,
"fcm_dpo/margin": 98.54963684082031,
"fcm_dpo/q_t": 0.3665163218975067,
"grad_norm": 78.63714599609375,
"learning_rate": 7.681891162260015e-08,
"logits/chosen": -0.8067573308944702,
"logits/rejected": -0.8146823644638062,
"logps/chosen": -430.4259948730469,
"logps/ref_chosen": -294.62652587890625,
"logps/ref_rejected": -258.7628479003906,
"logps/rejected": -493.1119689941406,
"loss": 3.8877,
"margin_dpo/margin_mean": 98.54963684082031,
"margin_dpo/margin_std": 106.2259521484375,
"step": 368
},
{
"epoch": 0.7727748691099476,
"fcm_dpo/beta": 0.005970560014247894,
"fcm_dpo/delta": -0.0030122532043606043,
"fcm_dpo/margin": 69.67950439453125,
"fcm_dpo/q_t": 0.4068659245967865,
"grad_norm": 79.01779174804688,
"learning_rate": 7.550321484960251e-08,
"logits/chosen": -0.8858903050422668,
"logits/rejected": -0.8695452213287354,
"logps/chosen": -428.8231506347656,
"logps/ref_chosen": -282.5057373046875,
"logps/ref_rejected": -266.41607666015625,
"logps/rejected": -482.4130554199219,
"loss": 4.4581,
"margin_dpo/margin_mean": 69.67951202392578,
"margin_dpo/margin_std": 107.40840911865234,
"step": 369
},
{
"epoch": 0.774869109947644,
"fcm_dpo/beta": 0.005926723126322031,
"fcm_dpo/delta": -0.02430885285139084,
"fcm_dpo/margin": 87.5254898071289,
"fcm_dpo/q_t": 0.3869887888431549,
"grad_norm": 82.53268432617188,
"learning_rate": 7.419687580962222e-08,
"logits/chosen": -0.8693802952766418,
"logits/rejected": -0.8931019306182861,
"logps/chosen": -385.6020202636719,
"logps/ref_chosen": -251.00640869140625,
"logps/ref_rejected": -238.12542724609375,
"logps/rejected": -460.2464599609375,
"loss": 4.2537,
"margin_dpo/margin_mean": 87.52548217773438,
"margin_dpo/margin_std": 125.15676879882812,
"step": 370
},
{
"epoch": 0.7769633507853403,
"fcm_dpo/beta": 0.005934232845902443,
"fcm_dpo/delta": 0.0160065945237875,
"fcm_dpo/margin": 63.61900329589844,
"fcm_dpo/q_t": 0.4160739779472351,
"grad_norm": 117.99787139892578,
"learning_rate": 7.289996455765748e-08,
"logits/chosen": -0.822093665599823,
"logits/rejected": -0.8111148476600647,
"logps/chosen": -452.1481018066406,
"logps/ref_chosen": -296.6591491699219,
"logps/ref_rejected": -251.14675903320312,
"logps/rejected": -470.25469970703125,
"loss": 4.5981,
"margin_dpo/margin_mean": 63.61901092529297,
"margin_dpo/margin_std": 109.16148376464844,
"step": 371
},
{
"epoch": 0.7790575916230367,
"fcm_dpo/beta": 0.005838276818394661,
"fcm_dpo/delta": -0.018924139440059662,
"fcm_dpo/margin": 88.01292419433594,
"fcm_dpo/q_t": 0.3878030478954315,
"grad_norm": 87.69620513916016,
"learning_rate": 7.161255064312283e-08,
"logits/chosen": -0.7913342714309692,
"logits/rejected": -0.7832822799682617,
"logps/chosen": -480.2196044921875,
"logps/ref_chosen": -331.3714599609375,
"logps/ref_rejected": -285.56805419921875,
"logps/rejected": -522.4291381835938,
"loss": 4.2501,
"margin_dpo/margin_mean": 88.01290893554688,
"margin_dpo/margin_std": 120.81631469726562,
"step": 372
},
{
"epoch": 0.7811518324607329,
"fcm_dpo/beta": 0.005902654957026243,
"fcm_dpo/delta": 0.03373875096440315,
"fcm_dpo/margin": 82.58007049560547,
"fcm_dpo/q_t": 0.3900327980518341,
"grad_norm": 96.37030792236328,
"learning_rate": 7.033470310611945e-08,
"logits/chosen": -0.8866556286811829,
"logits/rejected": -0.8576078414916992,
"logps/chosen": -457.91162109375,
"logps/ref_chosen": -321.9429931640625,
"logps/ref_rejected": -271.2288513183594,
"logps/rejected": -489.7775573730469,
"loss": 4.2322,
"margin_dpo/margin_mean": 82.58007049560547,
"margin_dpo/margin_std": 107.1668930053711,
"step": 373
},
{
"epoch": 0.7832460732984293,
"fcm_dpo/beta": 0.005965717602521181,
"fcm_dpo/delta": -0.008612215518951416,
"fcm_dpo/margin": 69.785400390625,
"fcm_dpo/q_t": 0.40951642394065857,
"grad_norm": 70.30329132080078,
"learning_rate": 6.906649047373245e-08,
"logits/chosen": -0.8783119916915894,
"logits/rejected": -0.8763049840927124,
"logps/chosen": -463.5076599121094,
"logps/ref_chosen": -318.8375244140625,
"logps/ref_rejected": -285.1805419921875,
"logps/rejected": -499.6360168457031,
"loss": 4.5067,
"margin_dpo/margin_mean": 69.78540802001953,
"margin_dpo/margin_std": 115.1856689453125,
"step": 374
},
{
"epoch": 0.7853403141361257,
"fcm_dpo/beta": 0.0060208821669220924,
"fcm_dpo/delta": 0.0241708941757679,
"fcm_dpo/margin": 61.874324798583984,
"fcm_dpo/q_t": 0.4182923436164856,
"grad_norm": 84.8660659790039,
"learning_rate": 6.780798075635675e-08,
"logits/chosen": -0.8697792887687683,
"logits/rejected": -0.8496103286743164,
"logps/chosen": -464.14794921875,
"logps/ref_chosen": -314.87579345703125,
"logps/ref_rejected": -259.1965026855469,
"logps/rejected": -470.3429870605469,
"loss": 4.6508,
"margin_dpo/margin_mean": 61.87432098388672,
"margin_dpo/margin_std": 113.47924041748047,
"step": 375
},
{
"epoch": 0.787434554973822,
"fcm_dpo/beta": 0.006095539778470993,
"fcm_dpo/delta": -0.01614905148744583,
"fcm_dpo/margin": 75.52915954589844,
"fcm_dpo/q_t": 0.4003352224826813,
"grad_norm": 118.24581146240234,
"learning_rate": 6.655924144404906e-08,
"logits/chosen": -0.8553508520126343,
"logits/rejected": -0.8609263896942139,
"logps/chosen": -435.73291015625,
"logps/ref_chosen": -287.6732482910156,
"logps/ref_rejected": -256.6697082519531,
"logps/rejected": -480.258544921875,
"loss": 4.4422,
"margin_dpo/margin_mean": 75.5291519165039,
"margin_dpo/margin_std": 121.42930603027344,
"step": 376
},
{
"epoch": 0.7895287958115184,
"fcm_dpo/beta": 0.0060347155667841434,
"fcm_dpo/delta": 0.02518528327345848,
"fcm_dpo/margin": 52.56871795654297,
"fcm_dpo/q_t": 0.4289829730987549,
"grad_norm": 77.70852661132812,
"learning_rate": 6.532033950290885e-08,
"logits/chosen": -0.8347393274307251,
"logits/rejected": -0.8331432342529297,
"logps/chosen": -464.60101318359375,
"logps/ref_chosen": -305.261474609375,
"logps/ref_rejected": -271.8887023925781,
"logps/rejected": -483.7969970703125,
"loss": 4.8715,
"margin_dpo/margin_mean": 52.5687141418457,
"margin_dpo/margin_std": 117.13998413085938,
"step": 377
},
{
"epoch": 0.7916230366492146,
"fcm_dpo/beta": 0.006152212154120207,
"fcm_dpo/delta": 0.01931355521082878,
"fcm_dpo/margin": 62.3286247253418,
"fcm_dpo/q_t": 0.4168659448623657,
"grad_norm": 114.11940002441406,
"learning_rate": 6.409134137148736e-08,
"logits/chosen": -0.8285514116287231,
"logits/rejected": -0.8158466219902039,
"logps/chosen": -428.2237243652344,
"logps/ref_chosen": -281.5295715332031,
"logps/ref_rejected": -296.980224609375,
"logps/rejected": -506.0030517578125,
"loss": 4.6407,
"margin_dpo/margin_mean": 62.3286247253418,
"margin_dpo/margin_std": 114.72845458984375,
"step": 378
},
{
"epoch": 0.793717277486911,
"fcm_dpo/beta": 0.006289042532444,
"fcm_dpo/delta": 0.027656404301524162,
"fcm_dpo/margin": 69.89701080322266,
"fcm_dpo/q_t": 0.4024941325187683,
"grad_norm": 115.24781036376953,
"learning_rate": 6.28723129572247e-08,
"logits/chosen": -0.8913782238960266,
"logits/rejected": -0.8727085590362549,
"logps/chosen": -402.0198059082031,
"logps/ref_chosen": -265.0807800292969,
"logps/ref_rejected": -230.58932495117188,
"logps/rejected": -437.42535400390625,
"loss": 4.4366,
"margin_dpo/margin_mean": 69.89701080322266,
"margin_dpo/margin_std": 110.67437744140625,
"step": 379
},
{
"epoch": 0.7958115183246073,
"fcm_dpo/beta": 0.0064195310696959496,
"fcm_dpo/delta": 0.005187598988413811,
"fcm_dpo/margin": 72.04703521728516,
"fcm_dpo/q_t": 0.40023064613342285,
"grad_norm": 91.46406555175781,
"learning_rate": 6.166331963291519e-08,
"logits/chosen": -0.8595871329307556,
"logits/rejected": -0.8388068079948425,
"logps/chosen": -452.0315856933594,
"logps/ref_chosen": -305.90838623046875,
"logps/ref_rejected": -286.5906677246094,
"logps/rejected": -504.7608337402344,
"loss": 4.4181,
"margin_dpo/margin_mean": 72.04704284667969,
"margin_dpo/margin_std": 111.72415161132812,
"step": 380
},
{
"epoch": 0.7979057591623037,
"fcm_dpo/beta": 0.006307562813162804,
"fcm_dpo/delta": -0.03759654238820076,
"fcm_dpo/margin": 80.26667785644531,
"fcm_dpo/q_t": 0.39190673828125,
"grad_norm": 93.41769409179688,
"learning_rate": 6.046442623320145e-08,
"logits/chosen": -0.8274192810058594,
"logits/rejected": -0.792305052280426,
"logps/chosen": -390.3536376953125,
"logps/ref_chosen": -248.75901794433594,
"logps/ref_rejected": -261.37420654296875,
"logps/rejected": -483.2354431152344,
"loss": 4.3242,
"margin_dpo/margin_mean": 80.26667785644531,
"margin_dpo/margin_std": 119.75177764892578,
"step": 381
},
{
"epoch": 0.8,
"fcm_dpo/beta": 0.006153635680675507,
"fcm_dpo/delta": -0.02411348558962345,
"fcm_dpo/margin": 90.74658203125,
"fcm_dpo/q_t": 0.3765316903591156,
"grad_norm": 71.57942962646484,
"learning_rate": 5.9275697051098275e-08,
"logits/chosen": -0.8713455200195312,
"logits/rejected": -0.8670474290847778,
"logps/chosen": -423.5090026855469,
"logps/ref_chosen": -289.2114562988281,
"logps/ref_rejected": -278.45751953125,
"logps/rejected": -503.5016784667969,
"loss": 4.0686,
"margin_dpo/margin_mean": 90.74658203125,
"margin_dpo/margin_std": 110.36607360839844,
"step": 382
},
{
"epoch": 0.8020942408376963,
"fcm_dpo/beta": 0.006209728308022022,
"fcm_dpo/delta": 0.009757298976182938,
"fcm_dpo/margin": 81.08769989013672,
"fcm_dpo/q_t": 0.3904225826263428,
"grad_norm": 114.92029571533203,
"learning_rate": 5.809719583454414e-08,
"logits/chosen": -0.8584976196289062,
"logits/rejected": -0.8407590389251709,
"logps/chosen": -407.2940673828125,
"logps/ref_chosen": -273.630859375,
"logps/ref_rejected": -261.44024658203125,
"logps/rejected": -476.191162109375,
"loss": 4.2888,
"margin_dpo/margin_mean": 81.08769989013672,
"margin_dpo/margin_std": 116.51924133300781,
"step": 383
},
{
"epoch": 0.8041884816753927,
"fcm_dpo/beta": 0.006165705155581236,
"fcm_dpo/delta": 0.015231862664222717,
"fcm_dpo/margin": 67.98896026611328,
"fcm_dpo/q_t": 0.40863093733787537,
"grad_norm": 102.76622772216797,
"learning_rate": 5.6928985782982524e-08,
"logits/chosen": -0.8647336959838867,
"logits/rejected": -0.8628965616226196,
"logps/chosen": -417.2807922363281,
"logps/ref_chosen": -274.5699462890625,
"logps/ref_rejected": -285.8253479003906,
"logps/rejected": -496.5251159667969,
"loss": 4.514,
"margin_dpo/margin_mean": 67.98896026611328,
"margin_dpo/margin_std": 113.87785339355469,
"step": 384
},
{
"epoch": 0.806282722513089,
"fcm_dpo/beta": 0.0063270702958106995,
"fcm_dpo/delta": 0.029631979763507843,
"fcm_dpo/margin": 67.26148223876953,
"fcm_dpo/q_t": 0.4064520299434662,
"grad_norm": 106.2359390258789,
"learning_rate": 5.57711295439732e-08,
"logits/chosen": -0.8074496984481812,
"logits/rejected": -0.8046512603759766,
"logps/chosen": -428.5892639160156,
"logps/ref_chosen": -284.150634765625,
"logps/ref_rejected": -244.87921142578125,
"logps/rejected": -456.5793762207031,
"loss": 4.4353,
"margin_dpo/margin_mean": 67.26148223876953,
"margin_dpo/margin_std": 103.14485931396484,
"step": 385
},
{
"epoch": 0.8083769633507853,
"fcm_dpo/beta": 0.006050161086022854,
"fcm_dpo/delta": -0.05498097091913223,
"fcm_dpo/margin": 88.26424407958984,
"fcm_dpo/q_t": 0.3840131163597107,
"grad_norm": 84.46400451660156,
"learning_rate": 5.4623689209832484e-08,
"logits/chosen": -0.7946534752845764,
"logits/rejected": -0.7936133742332458,
"logps/chosen": -453.0300598144531,
"logps/ref_chosen": -320.1762390136719,
"logps/ref_rejected": -302.05023193359375,
"logps/rejected": -523.1682739257812,
"loss": 4.1745,
"margin_dpo/margin_mean": 88.26424407958984,
"margin_dpo/margin_std": 112.27763366699219,
"step": 386
},
{
"epoch": 0.8104712041884817,
"fcm_dpo/beta": 0.006075785029679537,
"fcm_dpo/delta": -0.00033976510167121887,
"fcm_dpo/margin": 71.9666976928711,
"fcm_dpo/q_t": 0.4021925926208496,
"grad_norm": 81.50077056884766,
"learning_rate": 5.3486726314303175e-08,
"logits/chosen": -0.843817412853241,
"logits/rejected": -0.8498228788375854,
"logps/chosen": -417.2613220214844,
"logps/ref_chosen": -272.2801513671875,
"logps/ref_rejected": -265.1615905761719,
"logps/rejected": -482.10943603515625,
"loss": 4.4821,
"margin_dpo/margin_mean": 71.9666976928711,
"margin_dpo/margin_std": 115.36253356933594,
"step": 387
},
{
"epoch": 0.812565445026178,
"fcm_dpo/beta": 0.0059943245723843575,
"fcm_dpo/delta": -0.017121536657214165,
"fcm_dpo/margin": 49.354312896728516,
"fcm_dpo/q_t": 0.43480098247528076,
"grad_norm": 98.12901306152344,
"learning_rate": 5.2360301829254745e-08,
"logits/chosen": -0.827545166015625,
"logits/rejected": -0.8192716240882874,
"logps/chosen": -427.61083984375,
"logps/ref_chosen": -272.5313415527344,
"logps/ref_rejected": -239.55735778808594,
"logps/rejected": -443.9912414550781,
"loss": 4.9501,
"margin_dpo/margin_mean": 49.354312896728516,
"margin_dpo/margin_std": 115.72721862792969,
"step": 388
},
{
"epoch": 0.8146596858638744,
"fcm_dpo/beta": 0.005862588062882423,
"fcm_dpo/delta": -0.04299803823232651,
"fcm_dpo/margin": 72.5375747680664,
"fcm_dpo/q_t": 0.4077499210834503,
"grad_norm": 74.54901123046875,
"learning_rate": 5.1244476161413806e-08,
"logits/chosen": -0.8496405482292175,
"logits/rejected": -0.8481156229972839,
"logps/chosen": -430.34722900390625,
"logps/ref_chosen": -281.0892639160156,
"logps/ref_rejected": -246.50045776367188,
"logps/rejected": -468.2959899902344,
"loss": 4.5301,
"margin_dpo/margin_mean": 72.53758239746094,
"margin_dpo/margin_std": 120.5777816772461,
"step": 389
},
{
"epoch": 0.8167539267015707,
"fcm_dpo/beta": 0.005800171289592981,
"fcm_dpo/delta": -0.005835860967636108,
"fcm_dpo/margin": 74.20028686523438,
"fcm_dpo/q_t": 0.4045044183731079,
"grad_norm": 61.78060531616211,
"learning_rate": 5.013930914912476e-08,
"logits/chosen": -0.8756927847862244,
"logits/rejected": -0.8838696479797363,
"logps/chosen": -430.1648864746094,
"logps/ref_chosen": -283.98748779296875,
"logps/ref_rejected": -283.465087890625,
"logps/rejected": -503.8427734375,
"loss": 4.429,
"margin_dpo/margin_mean": 74.20028686523438,
"margin_dpo/margin_std": 112.28407287597656,
"step": 390
},
{
"epoch": 0.818848167539267,
"fcm_dpo/beta": 0.005761809181421995,
"fcm_dpo/delta": -0.0012128003872931004,
"fcm_dpo/margin": 78.95933532714844,
"fcm_dpo/q_t": 0.3996528089046478,
"grad_norm": 95.56779479980469,
"learning_rate": 4.904486005914027e-08,
"logits/chosen": -0.8164564967155457,
"logits/rejected": -0.8071346282958984,
"logps/chosen": -439.491455078125,
"logps/ref_chosen": -283.86138916015625,
"logps/ref_rejected": -263.5093688964844,
"logps/rejected": -498.0987854003906,
"loss": 4.3854,
"margin_dpo/margin_mean": 78.95933532714844,
"margin_dpo/margin_std": 116.82786560058594,
"step": 391
},
{
"epoch": 0.8209424083769633,
"fcm_dpo/beta": 0.005556129384785891,
"fcm_dpo/delta": -0.04375196620821953,
"fcm_dpo/margin": 92.68234252929688,
"fcm_dpo/q_t": 0.3841486871242523,
"grad_norm": 77.97855377197266,
"learning_rate": 4.796118758344353e-08,
"logits/chosen": -0.8111199140548706,
"logits/rejected": -0.8326938152313232,
"logps/chosen": -452.4772033691406,
"logps/ref_chosen": -310.070068359375,
"logps/ref_rejected": -252.89817810058594,
"logps/rejected": -487.9876403808594,
"loss": 4.1345,
"margin_dpo/margin_mean": 92.68234252929688,
"margin_dpo/margin_std": 109.37548828125,
"step": 392
},
{
"epoch": 0.8230366492146597,
"fcm_dpo/beta": 0.005607847589999437,
"fcm_dpo/delta": 0.028703555464744568,
"fcm_dpo/margin": 70.10714721679688,
"fcm_dpo/q_t": 0.4122408926486969,
"grad_norm": 52.80439376831055,
"learning_rate": 4.688834983610082e-08,
"logits/chosen": -0.8504996299743652,
"logits/rejected": -0.8361021876335144,
"logps/chosen": -427.104248046875,
"logps/ref_chosen": -286.7156677246094,
"logps/ref_rejected": -230.00357055664062,
"logps/rejected": -440.49932861328125,
"loss": 4.5192,
"margin_dpo/margin_mean": 70.10714721679688,
"margin_dpo/margin_std": 114.96930694580078,
"step": 393
},
{
"epoch": 0.8251308900523561,
"fcm_dpo/beta": 0.00568231288343668,
"fcm_dpo/delta": 0.0005726986564695835,
"fcm_dpo/margin": 64.0665283203125,
"fcm_dpo/q_t": 0.42188864946365356,
"grad_norm": 75.28536987304688,
"learning_rate": 4.582640435014459e-08,
"logits/chosen": -0.8926426768302917,
"logits/rejected": -0.8892621397972107,
"logps/chosen": -470.1836242675781,
"logps/ref_chosen": -325.9934387207031,
"logps/ref_rejected": -317.42706298828125,
"logps/rejected": -525.6837158203125,
"loss": 4.6822,
"margin_dpo/margin_mean": 64.0665283203125,
"margin_dpo/margin_std": 119.64508819580078,
"step": 394
},
{
"epoch": 0.8272251308900523,
"fcm_dpo/beta": 0.0057205078192055225,
"fcm_dpo/delta": 0.006010397337377071,
"fcm_dpo/margin": 84.71833801269531,
"fcm_dpo/q_t": 0.396056205034256,
"grad_norm": 66.78388214111328,
"learning_rate": 4.477540807448832e-08,
"logits/chosen": -0.8380643129348755,
"logits/rejected": -0.8483308553695679,
"logps/chosen": -412.0726318359375,
"logps/ref_chosen": -268.90081787109375,
"logps/ref_rejected": -272.85809326171875,
"logps/rejected": -500.748291015625,
"loss": 4.2831,
"margin_dpo/margin_mean": 84.71833801269531,
"margin_dpo/margin_std": 120.70704650878906,
"step": 395
},
{
"epoch": 0.8293193717277487,
"fcm_dpo/beta": 0.005744542460888624,
"fcm_dpo/delta": 0.01056294422596693,
"fcm_dpo/margin": 74.62256622314453,
"fcm_dpo/q_t": 0.4045211672782898,
"grad_norm": 91.99522399902344,
"learning_rate": 4.373541737087263e-08,
"logits/chosen": -0.8419132232666016,
"logits/rejected": -0.8268328905105591,
"logps/chosen": -434.677978515625,
"logps/ref_chosen": -291.19830322265625,
"logps/ref_rejected": -253.2803955078125,
"logps/rejected": -471.3826599121094,
"loss": 4.4236,
"margin_dpo/margin_mean": 74.62256622314453,
"margin_dpo/margin_std": 110.49628448486328,
"step": 396
},
{
"epoch": 0.831413612565445,
"fcm_dpo/beta": 0.005709501449018717,
"fcm_dpo/delta": -0.01539832167327404,
"fcm_dpo/margin": 63.596290588378906,
"fcm_dpo/q_t": 0.42058008909225464,
"grad_norm": 97.39371490478516,
"learning_rate": 4.270648801084295e-08,
"logits/chosen": -0.8566058278083801,
"logits/rejected": -0.8354380130767822,
"logps/chosen": -454.8165588378906,
"logps/ref_chosen": -309.8224182128906,
"logps/ref_rejected": -291.9057922363281,
"logps/rejected": -500.4962158203125,
"loss": 4.7185,
"margin_dpo/margin_mean": 63.596290588378906,
"margin_dpo/margin_std": 120.06228637695312,
"step": 397
},
{
"epoch": 0.8335078534031414,
"fcm_dpo/beta": 0.005609571933746338,
"fcm_dpo/delta": -0.022501792758703232,
"fcm_dpo/margin": 61.537353515625,
"fcm_dpo/q_t": 0.4217448830604553,
"grad_norm": 94.82504272460938,
"learning_rate": 4.168867517275806e-08,
"logits/chosen": -0.758474588394165,
"logits/rejected": -0.7971174120903015,
"logps/chosen": -458.55206298828125,
"logps/ref_chosen": -297.8135070800781,
"logps/ref_rejected": -270.5025634765625,
"logps/rejected": -492.7784423828125,
"loss": 4.8741,
"margin_dpo/margin_mean": 61.53734588623047,
"margin_dpo/margin_std": 134.36575317382812,
"step": 398
},
{
"epoch": 0.8356020942408376,
"fcm_dpo/beta": 0.005676961503922939,
"fcm_dpo/delta": 0.020742880180478096,
"fcm_dpo/margin": 69.29886627197266,
"fcm_dpo/q_t": 0.4137263298034668,
"grad_norm": 95.31922912597656,
"learning_rate": 4.0682033438831584e-08,
"logits/chosen": -0.8655831813812256,
"logits/rejected": -0.8285009860992432,
"logps/chosen": -449.01116943359375,
"logps/ref_chosen": -292.8467712402344,
"logps/ref_rejected": -268.3638916015625,
"logps/rejected": -493.8271484375,
"loss": 4.5865,
"margin_dpo/margin_mean": 69.29886627197266,
"margin_dpo/margin_std": 120.91732025146484,
"step": 399
},
{
"epoch": 0.837696335078534,
"fcm_dpo/beta": 0.005642024800181389,
"fcm_dpo/delta": -0.0023907367140054703,
"fcm_dpo/margin": 66.65235900878906,
"fcm_dpo/q_t": 0.4158845841884613,
"grad_norm": 62.70810317993164,
"learning_rate": 3.968661679220467e-08,
"logits/chosen": -0.9007169604301453,
"logits/rejected": -0.9017296433448792,
"logps/chosen": -413.6676330566406,
"logps/ref_chosen": -263.6763916015625,
"logps/ref_rejected": -258.67266845703125,
"logps/rejected": -475.3162536621094,
"loss": 4.6554,
"margin_dpo/margin_mean": 66.65235900878906,
"margin_dpo/margin_std": 119.08416748046875,
"step": 400
},
{
"epoch": 0.8397905759162304,
"fcm_dpo/beta": 0.0058155423030257225,
"fcm_dpo/delta": 0.041426703333854675,
"fcm_dpo/margin": 74.96790313720703,
"fcm_dpo/q_t": 0.4043424427509308,
"grad_norm": 95.43059539794922,
"learning_rate": 3.8702478614051345e-08,
"logits/chosen": -0.8293420672416687,
"logits/rejected": -0.8301993608474731,
"logps/chosen": -465.02734375,
"logps/ref_chosen": -318.2853088378906,
"logps/ref_rejected": -293.75225830078125,
"logps/rejected": -515.462158203125,
"loss": 4.4232,
"margin_dpo/margin_mean": 74.96790313720703,
"margin_dpo/margin_std": 115.87003326416016,
"step": 401
},
{
"epoch": 0.8418848167539267,
"fcm_dpo/beta": 0.005902654491364956,
"fcm_dpo/delta": 0.020538516342639923,
"fcm_dpo/margin": 74.3623046875,
"fcm_dpo/q_t": 0.40442952513694763,
"grad_norm": 93.4496841430664,
"learning_rate": 3.772967168071517e-08,
"logits/chosen": -0.8920119404792786,
"logits/rejected": -0.8689513802528381,
"logps/chosen": -452.31707763671875,
"logps/ref_chosen": -309.4278564453125,
"logps/ref_rejected": -282.0279846191406,
"logps/rejected": -499.279541015625,
"loss": 4.404,
"margin_dpo/margin_mean": 74.3623046875,
"margin_dpo/margin_std": 115.68778228759766,
"step": 402
},
{
"epoch": 0.8439790575916231,
"fcm_dpo/beta": 0.005882907193154097,
"fcm_dpo/delta": -0.04950367659330368,
"fcm_dpo/margin": 99.05455017089844,
"fcm_dpo/q_t": 0.3726005554199219,
"grad_norm": 70.34110260009766,
"learning_rate": 3.676824816087978e-08,
"logits/chosen": -0.8733730912208557,
"logits/rejected": -0.8539401292800903,
"logps/chosen": -452.7962646484375,
"logps/ref_chosen": -309.1670837402344,
"logps/ref_rejected": -273.0928955078125,
"logps/rejected": -515.776611328125,
"loss": 3.9753,
"margin_dpo/margin_mean": 99.05455017089844,
"margin_dpo/margin_std": 114.16398620605469,
"step": 403
},
{
"epoch": 0.8460732984293193,
"fcm_dpo/beta": 0.005841795355081558,
"fcm_dpo/delta": 0.036075517535209656,
"fcm_dpo/margin": 75.77478790283203,
"fcm_dpo/q_t": 0.40231138467788696,
"grad_norm": 118.02269744873047,
"learning_rate": 3.581825961277074e-08,
"logits/chosen": -0.8921858072280884,
"logits/rejected": -0.8689145445823669,
"logps/chosen": -454.9412536621094,
"logps/ref_chosen": -297.5953674316406,
"logps/ref_rejected": -257.24658203125,
"logps/rejected": -490.36724853515625,
"loss": 4.4651,
"margin_dpo/margin_mean": 75.77479553222656,
"margin_dpo/margin_std": 121.71118927001953,
"step": 404
},
{
"epoch": 0.8481675392670157,
"fcm_dpo/beta": 0.00599704822525382,
"fcm_dpo/delta": 0.032708872109651566,
"fcm_dpo/margin": 80.49796295166016,
"fcm_dpo/q_t": 0.39463940262794495,
"grad_norm": 58.484832763671875,
"learning_rate": 3.487975698139084e-08,
"logits/chosen": -0.816624641418457,
"logits/rejected": -0.8253002166748047,
"logps/chosen": -406.7665100097656,
"logps/ref_chosen": -257.96533203125,
"logps/ref_rejected": -255.811279296875,
"logps/rejected": -485.1104431152344,
"loss": 4.2978,
"margin_dpo/margin_mean": 80.49796295166016,
"margin_dpo/margin_std": 116.48614501953125,
"step": 405
},
{
"epoch": 0.8502617801047121,
"fcm_dpo/beta": 0.006106458138674498,
"fcm_dpo/delta": 0.007878802716732025,
"fcm_dpo/margin": 54.28831481933594,
"fcm_dpo/q_t": 0.42538759112358093,
"grad_norm": 88.17230224609375,
"learning_rate": 3.3952790595787986e-08,
"logits/chosen": -0.8533459901809692,
"logits/rejected": -0.8310421705245972,
"logps/chosen": -448.6619873046875,
"logps/ref_chosen": -285.1810607910156,
"logps/ref_rejected": -264.41351318359375,
"logps/rejected": -482.18280029296875,
"loss": 4.789,
"margin_dpo/margin_mean": 54.28831481933594,
"margin_dpo/margin_std": 110.01734924316406,
"step": 406
},
{
"epoch": 0.8523560209424084,
"fcm_dpo/beta": 0.006114899180829525,
"fcm_dpo/delta": 0.004650698509067297,
"fcm_dpo/margin": 79.76924133300781,
"fcm_dpo/q_t": 0.39609432220458984,
"grad_norm": 83.25770568847656,
"learning_rate": 3.303741016635614e-08,
"logits/chosen": -0.8408970832824707,
"logits/rejected": -0.8644695281982422,
"logps/chosen": -431.0948486328125,
"logps/ref_chosen": -265.23809814453125,
"logps/ref_rejected": -219.0631561279297,
"logps/rejected": -464.6891174316406,
"loss": 4.3823,
"margin_dpo/margin_mean": 79.76924133300781,
"margin_dpo/margin_std": 124.53511810302734,
"step": 407
},
{
"epoch": 0.8544502617801047,
"fcm_dpo/beta": 0.006033940240740776,
"fcm_dpo/delta": -0.04740475118160248,
"fcm_dpo/margin": 78.37755584716797,
"fcm_dpo/q_t": 0.39640355110168457,
"grad_norm": 73.92862701416016,
"learning_rate": 3.2133664782169944e-08,
"logits/chosen": -0.8809211254119873,
"logits/rejected": -0.8750734329223633,
"logps/chosen": -442.30926513671875,
"logps/ref_chosen": -296.9726257324219,
"logps/ref_rejected": -295.4786376953125,
"logps/rejected": -519.1928100585938,
"loss": 4.3317,
"margin_dpo/margin_mean": 78.37754821777344,
"margin_dpo/margin_std": 111.70768737792969,
"step": 408
},
{
"epoch": 0.856544502617801,
"fcm_dpo/beta": 0.005910936277359724,
"fcm_dpo/delta": 0.0008720820769667625,
"fcm_dpo/margin": 76.07402038574219,
"fcm_dpo/q_t": 0.40171951055526733,
"grad_norm": 87.3036880493164,
"learning_rate": 3.12416029083514e-08,
"logits/chosen": -0.8479756712913513,
"logits/rejected": -0.8337869048118591,
"logps/chosen": -443.5372619628906,
"logps/ref_chosen": -287.37933349609375,
"logps/ref_rejected": -275.80291748046875,
"logps/rejected": -508.03485107421875,
"loss": 4.5197,
"margin_dpo/margin_mean": 76.07402038574219,
"margin_dpo/margin_std": 132.66152954101562,
"step": 409
},
{
"epoch": 0.8586387434554974,
"fcm_dpo/beta": 0.00603690929710865,
"fcm_dpo/delta": 0.05478723347187042,
"fcm_dpo/margin": 70.18323516845703,
"fcm_dpo/q_t": 0.4090788662433624,
"grad_norm": 101.47988891601562,
"learning_rate": 3.036127238347164e-08,
"logits/chosen": -0.8647603392601013,
"logits/rejected": -0.8730304837226868,
"logps/chosen": -432.797607421875,
"logps/ref_chosen": -281.7801818847656,
"logps/ref_rejected": -266.7550354003906,
"logps/rejected": -487.9557189941406,
"loss": 4.5527,
"margin_dpo/margin_mean": 70.18323516845703,
"margin_dpo/margin_std": 122.88518524169922,
"step": 410
},
{
"epoch": 0.8607329842931937,
"fcm_dpo/beta": 0.006130387540906668,
"fcm_dpo/delta": -0.022047296166419983,
"fcm_dpo/margin": 84.9005355834961,
"fcm_dpo/q_t": 0.38678428530693054,
"grad_norm": 81.07767486572266,
"learning_rate": 2.9492720416985e-08,
"logits/chosen": -0.8631331920623779,
"logits/rejected": -0.821528434753418,
"logps/chosen": -429.8334045410156,
"logps/ref_chosen": -281.5872497558594,
"logps/ref_rejected": -254.78916931152344,
"logps/rejected": -487.9358825683594,
"loss": 4.1487,
"margin_dpo/margin_mean": 84.9005355834961,
"margin_dpo/margin_std": 108.43609619140625,
"step": 411
},
{
"epoch": 0.86282722513089,
"fcm_dpo/beta": 0.005866607651114464,
"fcm_dpo/delta": -0.03245619311928749,
"fcm_dpo/margin": 67.33855438232422,
"fcm_dpo/q_t": 0.41442614793777466,
"grad_norm": 112.37825775146484,
"learning_rate": 2.863599358669755e-08,
"logits/chosen": -0.8319679498672485,
"logits/rejected": -0.8427531719207764,
"logps/chosen": -442.21954345703125,
"logps/ref_chosen": -276.796142578125,
"logps/ref_rejected": -274.1370849609375,
"logps/rejected": -506.8990783691406,
"loss": 4.5832,
"margin_dpo/margin_mean": 67.33855438232422,
"margin_dpo/margin_std": 114.56013488769531,
"step": 412
},
{
"epoch": 0.8649214659685864,
"fcm_dpo/beta": 0.005889165215194225,
"fcm_dpo/delta": 0.0,
"fcm_dpo/margin": 62.966705322265625,
"fcm_dpo/q_t": 0.41903725266456604,
"grad_norm": 77.6297836303711,
"learning_rate": 2.7791137836269158e-08,
"logits/chosen": -0.8528549671173096,
"logits/rejected": -0.8511483669281006,
"logps/chosen": -427.43853759765625,
"logps/ref_chosen": -271.2745666503906,
"logps/ref_rejected": -270.16912841796875,
"logps/rejected": -489.2998046875,
"loss": 4.6339,
"margin_dpo/margin_mean": 62.96670913696289,
"margin_dpo/margin_std": 115.2607421875,
"step": 413
},
{
"epoch": 0.8670157068062827,
"fcm_dpo/beta": 0.005872922483831644,
"fcm_dpo/delta": -0.018332332372665405,
"fcm_dpo/margin": 81.90978240966797,
"fcm_dpo/q_t": 0.39579349756240845,
"grad_norm": 96.03504180908203,
"learning_rate": 2.6958198472749717e-08,
"logits/chosen": -0.8964706659317017,
"logits/rejected": -0.8990840911865234,
"logps/chosen": -453.42193603515625,
"logps/ref_chosen": -297.11505126953125,
"logps/ref_rejected": -271.7034606933594,
"logps/rejected": -509.920166015625,
"loss": 4.3142,
"margin_dpo/margin_mean": 81.90979766845703,
"margin_dpo/margin_std": 116.18328094482422,
"step": 414
},
{
"epoch": 0.8691099476439791,
"fcm_dpo/beta": 0.005942903459072113,
"fcm_dpo/delta": 0.03745156526565552,
"fcm_dpo/margin": 75.0562744140625,
"fcm_dpo/q_t": 0.39994585514068604,
"grad_norm": 75.44696044921875,
"learning_rate": 2.613722016414943e-08,
"logits/chosen": -0.8856995701789856,
"logits/rejected": -0.8719730377197266,
"logps/chosen": -445.2117919921875,
"logps/ref_chosen": -297.6926574707031,
"logps/ref_rejected": -279.0503234863281,
"logps/rejected": -501.625732421875,
"loss": 4.3258,
"margin_dpo/margin_mean": 75.0562744140625,
"margin_dpo/margin_std": 104.50729370117188,
"step": 415
},
{
"epoch": 0.8712041884816754,
"fcm_dpo/beta": 0.005803161766380072,
"fcm_dpo/delta": -0.027818644419312477,
"fcm_dpo/margin": 88.9339370727539,
"fcm_dpo/q_t": 0.38566797971725464,
"grad_norm": 57.11982345581055,
"learning_rate": 2.5328246937043525e-08,
"logits/chosen": -0.8839541077613831,
"logits/rejected": -0.8885830640792847,
"logps/chosen": -454.14947509765625,
"logps/ref_chosen": -311.8255615234375,
"logps/ref_rejected": -268.6170654296875,
"logps/rejected": -499.87493896484375,
"loss": 4.2069,
"margin_dpo/margin_mean": 88.9339370727539,
"margin_dpo/margin_std": 118.29706573486328,
"step": 416
},
{
"epoch": 0.8732984293193717,
"fcm_dpo/beta": 0.00576009601354599,
"fcm_dpo/delta": -0.03987088054418564,
"fcm_dpo/margin": 76.869140625,
"fcm_dpo/q_t": 0.4039611518383026,
"grad_norm": 79.72920227050781,
"learning_rate": 2.4531322174210973e-08,
"logits/chosen": -0.8273904919624329,
"logits/rejected": -0.8230270743370056,
"logps/chosen": -464.0906066894531,
"logps/ref_chosen": -310.43682861328125,
"logps/ref_rejected": -277.15283203125,
"logps/rejected": -507.67578125,
"loss": 4.4403,
"margin_dpo/margin_mean": 76.869140625,
"margin_dpo/margin_std": 118.56803131103516,
"step": 417
},
{
"epoch": 0.875392670157068,
"fcm_dpo/beta": 0.005557649303227663,
"fcm_dpo/delta": -0.03773919492959976,
"fcm_dpo/margin": 73.4759750366211,
"fcm_dpo/q_t": 0.4096967577934265,
"grad_norm": 82.97373962402344,
"learning_rate": 2.3746488612308295e-08,
"logits/chosen": -0.8288162350654602,
"logits/rejected": -0.8059218525886536,
"logps/chosen": -443.56036376953125,
"logps/ref_chosen": -278.49591064453125,
"logps/ref_rejected": -276.56671142578125,
"logps/rejected": -515.107177734375,
"loss": 4.5563,
"margin_dpo/margin_mean": 73.4759750366211,
"margin_dpo/margin_std": 122.90873718261719,
"step": 418
},
{
"epoch": 0.8774869109947644,
"fcm_dpo/beta": 0.005459555424749851,
"fcm_dpo/delta": -0.02270214632153511,
"fcm_dpo/margin": 82.70216369628906,
"fcm_dpo/q_t": 0.39849433302879333,
"grad_norm": 79.49519348144531,
"learning_rate": 2.297378833957761e-08,
"logits/chosen": -0.8849822282791138,
"logits/rejected": -0.8598195910453796,
"logps/chosen": -464.46832275390625,
"logps/ref_chosen": -298.9002380371094,
"logps/ref_rejected": -246.1540985107422,
"logps/rejected": -494.4243469238281,
"loss": 4.4405,
"margin_dpo/margin_mean": 82.70216369628906,
"margin_dpo/margin_std": 130.42945861816406,
"step": 419
},
{
"epoch": 0.8795811518324608,
"fcm_dpo/beta": 0.005352628417313099,
"fcm_dpo/delta": -0.010891912505030632,
"fcm_dpo/margin": 91.52397918701172,
"fcm_dpo/q_t": 0.39347517490386963,
"grad_norm": 85.73942565917969,
"learning_rate": 2.2213262793589482e-08,
"logits/chosen": -0.8193755745887756,
"logits/rejected": -0.7928704619407654,
"logps/chosen": -428.12677001953125,
"logps/ref_chosen": -264.5608825683594,
"logps/ref_rejected": -245.67031860351562,
"logps/rejected": -500.7602233886719,
"loss": 4.3604,
"margin_dpo/margin_mean": 91.52397918701172,
"margin_dpo/margin_std": 141.74795532226562,
"step": 420
},
{
"epoch": 0.881675392670157,
"fcm_dpo/beta": 0.005432287231087685,
"fcm_dpo/delta": 0.027727685868740082,
"fcm_dpo/margin": 79.07569885253906,
"fcm_dpo/q_t": 0.40264689922332764,
"grad_norm": 86.56868743896484,
"learning_rate": 2.1464952759020856e-08,
"logits/chosen": -0.9046144485473633,
"logits/rejected": -0.8843198418617249,
"logps/chosen": -444.3966064453125,
"logps/ref_chosen": -297.70501708984375,
"logps/ref_rejected": -243.74771118164062,
"logps/rejected": -469.5150146484375,
"loss": 4.3809,
"margin_dpo/margin_mean": 79.07569122314453,
"margin_dpo/margin_std": 113.20210266113281,
"step": 421
},
{
"epoch": 0.8837696335078534,
"fcm_dpo/beta": 0.005514299962669611,
"fcm_dpo/delta": 0.024063242599368095,
"fcm_dpo/margin": 79.2686767578125,
"fcm_dpo/q_t": 0.40496283769607544,
"grad_norm": 80.32569122314453,
"learning_rate": 2.07288983654679e-08,
"logits/chosen": -0.7527928352355957,
"logits/rejected": -0.7986171245574951,
"logps/chosen": -443.7966613769531,
"logps/ref_chosen": -288.3587646484375,
"logps/ref_rejected": -256.4377746582031,
"logps/rejected": -491.14434814453125,
"loss": 4.4651,
"margin_dpo/margin_mean": 79.2686767578125,
"margin_dpo/margin_std": 129.04014587402344,
"step": 422
},
{
"epoch": 0.8858638743455497,
"fcm_dpo/beta": 0.0055565787479281425,
"fcm_dpo/delta": -0.0007310956716537476,
"fcm_dpo/margin": 89.85076904296875,
"fcm_dpo/q_t": 0.3894519507884979,
"grad_norm": 83.02074432373047,
"learning_rate": 2.0005139085293942e-08,
"logits/chosen": -0.8701252937316895,
"logits/rejected": -0.8482787013053894,
"logps/chosen": -452.765380859375,
"logps/ref_chosen": -296.00701904296875,
"logps/ref_rejected": -261.3480529785156,
"logps/rejected": -507.9571838378906,
"loss": 4.2225,
"margin_dpo/margin_mean": 89.85076904296875,
"margin_dpo/margin_std": 120.07376861572266,
"step": 423
},
{
"epoch": 0.8879581151832461,
"fcm_dpo/beta": 0.0054782391525805,
"fcm_dpo/delta": -0.01255854032933712,
"fcm_dpo/margin": 79.7599105834961,
"fcm_dpo/q_t": 0.401252418756485,
"grad_norm": 85.05402374267578,
"learning_rate": 1.9293713731512673e-08,
"logits/chosen": -0.8636439442634583,
"logits/rejected": -0.8568890690803528,
"logps/chosen": -457.044189453125,
"logps/ref_chosen": -309.421875,
"logps/ref_rejected": -249.14886474609375,
"logps/rejected": -476.53106689453125,
"loss": 4.3359,
"margin_dpo/margin_mean": 79.75990295410156,
"margin_dpo/margin_std": 107.50566101074219,
"step": 424
},
{
"epoch": 0.8900523560209425,
"fcm_dpo/beta": 0.005506892688572407,
"fcm_dpo/delta": -0.0035199569538235664,
"fcm_dpo/margin": 69.21192169189453,
"fcm_dpo/q_t": 0.41614991426467896,
"grad_norm": 69.28353118896484,
"learning_rate": 1.8594660455706763e-08,
"logits/chosen": -0.8524129390716553,
"logits/rejected": -0.8573122024536133,
"logps/chosen": -438.5583801269531,
"logps/ref_chosen": -280.50909423828125,
"logps/ref_rejected": -276.8252258300781,
"logps/rejected": -504.08642578125,
"loss": 4.6056,
"margin_dpo/margin_mean": 69.21192169189453,
"margin_dpo/margin_std": 119.23878479003906,
"step": 425
},
{
"epoch": 0.8921465968586387,
"fcm_dpo/beta": 0.0055983117781579494,
"fcm_dpo/delta": 0.03057074546813965,
"fcm_dpo/margin": 78.6160888671875,
"fcm_dpo/q_t": 0.4026082456111908,
"grad_norm": 109.25310516357422,
"learning_rate": 1.7908016745981856e-08,
"logits/chosen": -0.8885151147842407,
"logits/rejected": -0.8731377124786377,
"logps/chosen": -450.5845947265625,
"logps/ref_chosen": -292.78521728515625,
"logps/ref_rejected": -255.62698364257812,
"logps/rejected": -492.04241943359375,
"loss": 4.3977,
"margin_dpo/margin_mean": 78.6160888671875,
"margin_dpo/margin_std": 116.64258575439453,
"step": 426
},
{
"epoch": 0.8942408376963351,
"fcm_dpo/beta": 0.005566183011978865,
"fcm_dpo/delta": -0.07854758203029633,
"fcm_dpo/margin": 106.50798034667969,
"fcm_dpo/q_t": 0.3709147274494171,
"grad_norm": 92.78337860107422,
"learning_rate": 1.7233819424956247e-08,
"logits/chosen": -0.857439398765564,
"logits/rejected": -0.8251030445098877,
"logps/chosen": -443.70599365234375,
"logps/ref_chosen": -288.7687072753906,
"logps/ref_rejected": -268.4986572265625,
"logps/rejected": -529.9439697265625,
"loss": 4.0525,
"margin_dpo/margin_mean": 106.50798797607422,
"margin_dpo/margin_std": 131.25039672851562,
"step": 427
},
{
"epoch": 0.8963350785340314,
"fcm_dpo/beta": 0.005344281904399395,
"fcm_dpo/delta": 0.004657023120671511,
"fcm_dpo/margin": 94.15774536132812,
"fcm_dpo/q_t": 0.38818326592445374,
"grad_norm": 79.40316009521484,
"learning_rate": 1.6572104647786245e-08,
"logits/chosen": -0.807416558265686,
"logits/rejected": -0.8362680673599243,
"logps/chosen": -468.56304931640625,
"logps/ref_chosen": -295.5209655761719,
"logps/ref_rejected": -275.71026611328125,
"logps/rejected": -542.91015625,
"loss": 4.2235,
"margin_dpo/margin_mean": 94.15774536132812,
"margin_dpo/margin_std": 125.85404205322266,
"step": 428
},
{
"epoch": 0.8984293193717278,
"fcm_dpo/beta": 0.005322734825313091,
"fcm_dpo/delta": 0.021079566329717636,
"fcm_dpo/margin": 85.7845458984375,
"fcm_dpo/q_t": 0.39704862236976624,
"grad_norm": 104.11244201660156,
"learning_rate": 1.5922907900227017e-08,
"logits/chosen": -0.8256397247314453,
"logits/rejected": -0.8333263993263245,
"logps/chosen": -432.5317077636719,
"logps/ref_chosen": -274.392333984375,
"logps/ref_rejected": -258.574462890625,
"logps/rejected": -502.49835205078125,
"loss": 4.4423,
"margin_dpo/margin_mean": 85.7845458984375,
"margin_dpo/margin_std": 134.6868896484375,
"step": 429
},
{
"epoch": 0.900523560209424,
"fcm_dpo/beta": 0.0054636141285300255,
"fcm_dpo/delta": 0.007338759955018759,
"fcm_dpo/margin": 73.55387115478516,
"fcm_dpo/q_t": 0.41170477867126465,
"grad_norm": 77.30048370361328,
"learning_rate": 1.5286263996730026e-08,
"logits/chosen": -0.9002894163131714,
"logits/rejected": -0.8716924786567688,
"logps/chosen": -446.356689453125,
"logps/ref_chosen": -288.7391357421875,
"logps/ref_rejected": -268.6106262207031,
"logps/rejected": -499.7821044921875,
"loss": 4.534,
"margin_dpo/margin_mean": 73.55387115478516,
"margin_dpo/margin_std": 121.59858703613281,
"step": 430
},
{
"epoch": 0.9026178010471204,
"fcm_dpo/beta": 0.005443257745355368,
"fcm_dpo/delta": -0.012642772868275642,
"fcm_dpo/margin": 62.72542190551758,
"fcm_dpo/q_t": 0.4242577850818634,
"grad_norm": 79.91255187988281,
"learning_rate": 1.4662207078575684e-08,
"logits/chosen": -0.8643919229507446,
"logits/rejected": -0.8308559060096741,
"logps/chosen": -431.7356262207031,
"logps/ref_chosen": -273.8291931152344,
"logps/ref_rejected": -269.02239990234375,
"logps/rejected": -489.6541748046875,
"loss": 4.6841,
"margin_dpo/margin_mean": 62.72542190551758,
"margin_dpo/margin_std": 112.40259552001953,
"step": 431
},
{
"epoch": 0.9047120418848168,
"fcm_dpo/beta": 0.00547941867262125,
"fcm_dpo/delta": 0.029989372938871384,
"fcm_dpo/margin": 82.27677917480469,
"fcm_dpo/q_t": 0.40229594707489014,
"grad_norm": 54.76456069946289,
"learning_rate": 1.40507706120426e-08,
"logits/chosen": -0.8918091654777527,
"logits/rejected": -0.8755742311477661,
"logps/chosen": -438.60479736328125,
"logps/ref_chosen": -291.42010498046875,
"logps/ref_rejected": -255.48202514648438,
"logps/rejected": -484.9435119628906,
"loss": 4.3868,
"margin_dpo/margin_mean": 82.27677917480469,
"margin_dpo/margin_std": 124.5217514038086,
"step": 432
},
{
"epoch": 0.9068062827225131,
"fcm_dpo/beta": 0.005607725586742163,
"fcm_dpo/delta": 0.02624966762959957,
"fcm_dpo/margin": 74.57537841796875,
"fcm_dpo/q_t": 0.40625956654548645,
"grad_norm": 72.38121032714844,
"learning_rate": 1.345198738661285e-08,
"logits/chosen": -0.8581191301345825,
"logits/rejected": -0.8533951044082642,
"logps/chosen": -411.82525634765625,
"logps/ref_chosen": -246.2268829345703,
"logps/ref_rejected": -253.65924072265625,
"logps/rejected": -493.8329772949219,
"loss": 4.4275,
"margin_dpo/margin_mean": 74.57537841796875,
"margin_dpo/margin_std": 113.67277526855469,
"step": 433
},
{
"epoch": 0.9089005235602095,
"fcm_dpo/beta": 0.005738706793636084,
"fcm_dpo/delta": 0.04150499403476715,
"fcm_dpo/margin": 73.76260375976562,
"fcm_dpo/q_t": 0.40676653385162354,
"grad_norm": 81.86857604980469,
"learning_rate": 1.2865889513213628e-08,
"logits/chosen": -0.8482558727264404,
"logits/rejected": -0.8541163206100464,
"logps/chosen": -465.7991027832031,
"logps/ref_chosen": -295.4618225097656,
"logps/ref_rejected": -256.2254333496094,
"logps/rejected": -500.3253479003906,
"loss": 4.4495,
"margin_dpo/margin_mean": 73.76260375976562,
"margin_dpo/margin_std": 114.90216064453125,
"step": 434
},
{
"epoch": 0.9109947643979057,
"fcm_dpo/beta": 0.005786753259599209,
"fcm_dpo/delta": -0.025829218327999115,
"fcm_dpo/margin": 83.76541900634766,
"fcm_dpo/q_t": 0.39663389325141907,
"grad_norm": 120.20237731933594,
"learning_rate": 1.2292508422495157e-08,
"logits/chosen": -0.8453208208084106,
"logits/rejected": -0.8324633240699768,
"logps/chosen": -415.5836486816406,
"logps/ref_chosen": -260.7384033203125,
"logps/ref_rejected": -248.5688018798828,
"logps/rejected": -487.179443359375,
"loss": 4.2876,
"margin_dpo/margin_mean": 83.76541900634766,
"margin_dpo/margin_std": 117.57093811035156,
"step": 435
},
{
"epoch": 0.9130890052356021,
"fcm_dpo/beta": 0.005721217952668667,
"fcm_dpo/delta": 0.00584397790953517,
"fcm_dpo/margin": 65.6273193359375,
"fcm_dpo/q_t": 0.4201821982860565,
"grad_norm": 95.52420806884766,
"learning_rate": 1.1731874863145142e-08,
"logits/chosen": -0.8257200717926025,
"logits/rejected": -0.822697103023529,
"logps/chosen": -488.3257141113281,
"logps/ref_chosen": -319.3224792480469,
"logps/ref_rejected": -299.30322265625,
"logps/rejected": -533.9337768554688,
"loss": 4.6615,
"margin_dpo/margin_mean": 65.62731170654297,
"margin_dpo/margin_std": 127.28849029541016,
"step": 436
},
{
"epoch": 0.9151832460732985,
"fcm_dpo/beta": 0.005732518620789051,
"fcm_dpo/delta": -0.007303288206458092,
"fcm_dpo/margin": 88.15834045410156,
"fcm_dpo/q_t": 0.3912045359611511,
"grad_norm": 96.03433990478516,
"learning_rate": 1.118401890024001e-08,
"logits/chosen": -0.8685740232467651,
"logits/rejected": -0.8529913425445557,
"logps/chosen": -437.06005859375,
"logps/ref_chosen": -279.1155700683594,
"logps/ref_rejected": -272.904052734375,
"logps/rejected": -519.0068969726562,
"loss": 4.2895,
"margin_dpo/margin_mean": 88.15834045410156,
"margin_dpo/margin_std": 126.89192199707031,
"step": 437
},
{
"epoch": 0.9172774869109948,
"fcm_dpo/beta": 0.005765823647379875,
"fcm_dpo/delta": 0.013268672861158848,
"fcm_dpo/margin": 45.000396728515625,
"fcm_dpo/q_t": 0.4435799717903137,
"grad_norm": 86.30032348632812,
"learning_rate": 1.06489699136324e-08,
"logits/chosen": -0.8477824330329895,
"logits/rejected": -0.8642206788063049,
"logps/chosen": -424.9432373046875,
"logps/ref_chosen": -259.53076171875,
"logps/ref_rejected": -241.20753479003906,
"logps/rejected": -451.620361328125,
"loss": 5.072,
"margin_dpo/margin_mean": 45.00039291381836,
"margin_dpo/margin_std": 120.71588897705078,
"step": 438
},
{
"epoch": 0.9193717277486911,
"fcm_dpo/beta": 0.005863718222826719,
"fcm_dpo/delta": 0.02462560310959816,
"fcm_dpo/margin": 81.05281066894531,
"fcm_dpo/q_t": 0.3949528932571411,
"grad_norm": 99.85994720458984,
"learning_rate": 1.0126756596375685e-08,
"logits/chosen": -0.8318859934806824,
"logits/rejected": -0.8341456055641174,
"logps/chosen": -419.4014587402344,
"logps/ref_chosen": -257.1243896484375,
"logps/ref_rejected": -243.20416259765625,
"logps/rejected": -486.5340270996094,
"loss": 4.293,
"margin_dpo/margin_mean": 81.05281066894531,
"margin_dpo/margin_std": 114.34336853027344,
"step": 439
},
{
"epoch": 0.9214659685863874,
"fcm_dpo/beta": 0.005901531782001257,
"fcm_dpo/delta": 0.015944896265864372,
"fcm_dpo/margin": 57.71433639526367,
"fcm_dpo/q_t": 0.4226870834827423,
"grad_norm": 78.55369567871094,
"learning_rate": 9.617406953185136e-09,
"logits/chosen": -0.8871990442276001,
"logits/rejected": -0.8788408041000366,
"logps/chosen": -482.9383239746094,
"logps/ref_chosen": -307.680419921875,
"logps/ref_rejected": -264.5030212402344,
"logps/rejected": -497.47528076171875,
"loss": 4.7105,
"margin_dpo/margin_mean": 57.71433639526367,
"margin_dpo/margin_std": 106.74054718017578,
"step": 440
},
{
"epoch": 0.9235602094240838,
"fcm_dpo/beta": 0.005923756398260593,
"fcm_dpo/delta": -0.023813921958208084,
"fcm_dpo/margin": 84.93556213378906,
"fcm_dpo/q_t": 0.38803669810295105,
"grad_norm": 86.6318588256836,
"learning_rate": 9.12094829893642e-09,
"logits/chosen": -0.8496291637420654,
"logits/rejected": -0.8375406265258789,
"logps/chosen": -470.83203125,
"logps/ref_chosen": -309.9819641113281,
"logps/ref_rejected": -297.4968566894531,
"logps/rejected": -543.282470703125,
"loss": 4.1966,
"margin_dpo/margin_mean": 84.93556213378906,
"margin_dpo/margin_std": 108.48258972167969,
"step": 441
},
{
"epoch": 0.9256544502617801,
"fcm_dpo/beta": 0.005848567001521587,
"fcm_dpo/delta": 0.002810728969052434,
"fcm_dpo/margin": 65.81173706054688,
"fcm_dpo/q_t": 0.4157796800136566,
"grad_norm": 85.53507232666016,
"learning_rate": 8.637407257200496e-09,
"logits/chosen": -0.9164412021636963,
"logits/rejected": -0.8720095157623291,
"logps/chosen": -456.07440185546875,
"logps/ref_chosen": -278.9791564941406,
"logps/ref_rejected": -242.87310791015625,
"logps/rejected": -485.7801513671875,
"loss": 4.6629,
"margin_dpo/margin_mean": 65.81172943115234,
"margin_dpo/margin_std": 123.5169677734375,
"step": 442
},
{
"epoch": 0.9277486910994764,
"fcm_dpo/beta": 0.005795356351882219,
"fcm_dpo/delta": -0.04416951909661293,
"fcm_dpo/margin": 73.87689208984375,
"fcm_dpo/q_t": 0.40389981865882874,
"grad_norm": 84.2241439819336,
"learning_rate": 8.166809758815895e-09,
"logits/chosen": -0.806608259677887,
"logits/rejected": -0.8269810676574707,
"logps/chosen": -433.94921875,
"logps/ref_chosen": -273.5590515136719,
"logps/ref_rejected": -264.0199279785156,
"logps/rejected": -498.2870178222656,
"loss": 4.4568,
"margin_dpo/margin_mean": 73.87689208984375,
"margin_dpo/margin_std": 113.74835205078125,
"step": 443
},
{
"epoch": 0.9298429319371728,
"fcm_dpo/beta": 0.005656501278281212,
"fcm_dpo/delta": 0.010248812846839428,
"fcm_dpo/margin": 81.75801849365234,
"fcm_dpo/q_t": 0.4004564583301544,
"grad_norm": 87.11282348632812,
"learning_rate": 7.709181040498253e-09,
"logits/chosen": -0.8332200646400452,
"logits/rejected": -0.8207956552505493,
"logps/chosen": -460.2959899902344,
"logps/ref_chosen": -298.1441955566406,
"logps/ref_rejected": -268.0572814941406,
"logps/rejected": -511.96710205078125,
"loss": 4.4441,
"margin_dpo/margin_mean": 81.75801849365234,
"margin_dpo/margin_std": 133.82241821289062,
"step": 444
},
{
"epoch": 0.9319371727748691,
"fcm_dpo/beta": 0.005604690872132778,
"fcm_dpo/delta": -0.05251846835017204,
"fcm_dpo/margin": 70.86494445800781,
"fcm_dpo/q_t": 0.41341015696525574,
"grad_norm": 77.97962188720703,
"learning_rate": 7.2645456434869965e-09,
"logits/chosen": -0.9123114943504333,
"logits/rejected": -0.9230950474739075,
"logps/chosen": -417.5129699707031,
"logps/ref_chosen": -254.54067993164062,
"logps/ref_rejected": -264.2445983886719,
"logps/rejected": -498.081787109375,
"loss": 4.6085,
"margin_dpo/margin_mean": 70.86495208740234,
"margin_dpo/margin_std": 120.35411071777344,
"step": 445
},
{
"epoch": 0.9340314136125655,
"fcm_dpo/beta": 0.005446064751595259,
"fcm_dpo/delta": 0.013901110738515854,
"fcm_dpo/margin": 83.37535858154297,
"fcm_dpo/q_t": 0.39877498149871826,
"grad_norm": 58.67851257324219,
"learning_rate": 6.832927412229017e-09,
"logits/chosen": -0.8186119794845581,
"logits/rejected": -0.8207546472549438,
"logps/chosen": -461.3026428222656,
"logps/ref_chosen": -306.72247314453125,
"logps/ref_rejected": -266.3735656738281,
"logps/rejected": -504.3291320800781,
"loss": 4.3583,
"margin_dpo/margin_mean": 83.37535858154297,
"margin_dpo/margin_std": 121.05807495117188,
"step": 446
},
{
"epoch": 0.9361256544502617,
"fcm_dpo/beta": 0.00545801455155015,
"fcm_dpo/delta": 0.007384308613836765,
"fcm_dpo/margin": 91.6766357421875,
"fcm_dpo/q_t": 0.3885115385055542,
"grad_norm": 80.14990997314453,
"learning_rate": 6.414349493100129e-09,
"logits/chosen": -0.8291323781013489,
"logits/rejected": -0.8268994688987732,
"logps/chosen": -409.84698486328125,
"logps/ref_chosen": -260.51727294921875,
"logps/ref_rejected": -236.47061157226562,
"logps/rejected": -477.4768981933594,
"loss": 4.153,
"margin_dpo/margin_mean": 91.6766357421875,
"margin_dpo/margin_std": 111.279052734375,
"step": 447
},
{
"epoch": 0.9382198952879581,
"fcm_dpo/beta": 0.005555163137614727,
"fcm_dpo/delta": 0.01007094793021679,
"fcm_dpo/margin": 82.22817993164062,
"fcm_dpo/q_t": 0.3993835747241974,
"grad_norm": 96.70314025878906,
"learning_rate": 6.0088343331638756e-09,
"logits/chosen": -0.8319501280784607,
"logits/rejected": -0.8259201049804688,
"logps/chosen": -435.1070861816406,
"logps/ref_chosen": -268.78704833984375,
"logps/ref_rejected": -262.1703796386719,
"logps/rejected": -510.7186279296875,
"loss": 4.3331,
"margin_dpo/margin_mean": 82.22817993164062,
"margin_dpo/margin_std": 115.585205078125,
"step": 448
},
{
"epoch": 0.9403141361256544,
"fcm_dpo/beta": 0.005662827752530575,
"fcm_dpo/delta": 0.016118278726935387,
"fcm_dpo/margin": 84.56497955322266,
"fcm_dpo/q_t": 0.39151531457901,
"grad_norm": 94.81627655029297,
"learning_rate": 5.616403678967624e-09,
"logits/chosen": -0.9148420691490173,
"logits/rejected": -0.8991859555244446,
"logps/chosen": -485.2103271484375,
"logps/ref_chosen": -331.58074951171875,
"logps/ref_rejected": -240.3651123046875,
"logps/rejected": -478.5596923828125,
"loss": 4.3273,
"margin_dpo/margin_mean": 84.56498718261719,
"margin_dpo/margin_std": 122.7528076171875,
"step": 449
},
{
"epoch": 0.9424083769633508,
"fcm_dpo/beta": 0.005679248366504908,
"fcm_dpo/delta": 0.003446461632847786,
"fcm_dpo/margin": 75.41486358642578,
"fcm_dpo/q_t": 0.40379881858825684,
"grad_norm": 96.63995361328125,
"learning_rate": 5.2370785753763356e-09,
"logits/chosen": -0.813703179359436,
"logits/rejected": -0.8123511672019958,
"logps/chosen": -454.12786865234375,
"logps/ref_chosen": -284.26544189453125,
"logps/ref_rejected": -250.5401611328125,
"logps/rejected": -495.81744384765625,
"loss": 4.3804,
"margin_dpo/margin_mean": 75.41486358642578,
"margin_dpo/margin_std": 105.50797271728516,
"step": 450
},
{
"epoch": 0.9445026178010472,
"fcm_dpo/beta": 0.0055923121981322765,
"fcm_dpo/delta": -0.017466533929109573,
"fcm_dpo/margin": 70.07624053955078,
"fcm_dpo/q_t": 0.4131358861923218,
"grad_norm": 89.1207046508789,
"learning_rate": 4.8708793644441086e-09,
"logits/chosen": -0.8348339200019836,
"logits/rejected": -0.8052613735198975,
"logps/chosen": -477.8414306640625,
"logps/ref_chosen": -302.3209228515625,
"logps/ref_rejected": -254.09747314453125,
"logps/rejected": -499.6942138671875,
"loss": 4.5781,
"margin_dpo/margin_mean": 70.07624816894531,
"margin_dpo/margin_std": 117.26962280273438,
"step": 451
},
{
"epoch": 0.9465968586387434,
"fcm_dpo/beta": 0.005450280383229256,
"fcm_dpo/delta": -0.03073035180568695,
"fcm_dpo/margin": 87.17440795898438,
"fcm_dpo/q_t": 0.397165447473526,
"grad_norm": 85.69123840332031,
"learning_rate": 4.517825684323323e-09,
"logits/chosen": -0.8877136707305908,
"logits/rejected": -0.8643620014190674,
"logps/chosen": -459.5303039550781,
"logps/ref_chosen": -299.39215087890625,
"logps/ref_rejected": -284.3475036621094,
"logps/rejected": -531.6600952148438,
"loss": 4.3552,
"margin_dpo/margin_mean": 87.17440795898438,
"margin_dpo/margin_std": 129.93690490722656,
"step": 452
},
{
"epoch": 0.9486910994764398,
"fcm_dpo/beta": 0.005511339753866196,
"fcm_dpo/delta": 0.02019287645816803,
"fcm_dpo/margin": 80.06608581542969,
"fcm_dpo/q_t": 0.4024481177330017,
"grad_norm": 73.6377182006836,
"learning_rate": 4.1779364682113794e-09,
"logits/chosen": -0.8337778449058533,
"logits/rejected": -0.8293156027793884,
"logps/chosen": -494.59014892578125,
"logps/ref_chosen": -324.6517028808594,
"logps/ref_rejected": -304.1527099609375,
"logps/rejected": -554.1572875976562,
"loss": 4.3921,
"margin_dpo/margin_mean": 80.06608581542969,
"margin_dpo/margin_std": 122.65001678466797,
"step": 453
},
{
"epoch": 0.9507853403141361,
"fcm_dpo/beta": 0.005554089788347483,
"fcm_dpo/delta": -0.007839905098080635,
"fcm_dpo/margin": 85.82544708251953,
"fcm_dpo/q_t": 0.3937579393386841,
"grad_norm": 79.51781463623047,
"learning_rate": 3.851229943335393e-09,
"logits/chosen": -0.8730612993240356,
"logits/rejected": -0.8847228288650513,
"logps/chosen": -465.4644775390625,
"logps/ref_chosen": -299.6117248535156,
"logps/ref_rejected": -303.74224853515625,
"logps/rejected": -555.42041015625,
"loss": 4.3296,
"margin_dpo/margin_mean": 85.82544708251953,
"margin_dpo/margin_std": 124.97549438476562,
"step": 454
},
{
"epoch": 0.9528795811518325,
"fcm_dpo/beta": 0.005550594534724951,
"fcm_dpo/delta": 0.004189205355942249,
"fcm_dpo/margin": 62.6799430847168,
"fcm_dpo/q_t": 0.4252288043498993,
"grad_norm": 80.37173461914062,
"learning_rate": 3.5377236299748147e-09,
"logits/chosen": -0.8488650321960449,
"logits/rejected": -0.8594434261322021,
"logps/chosen": -433.9133605957031,
"logps/ref_chosen": -273.6116943359375,
"logps/ref_rejected": -274.4293518066406,
"logps/rejected": -497.4110412597656,
"loss": 4.7381,
"margin_dpo/margin_mean": 62.67994689941406,
"margin_dpo/margin_std": 124.70183563232422,
"step": 455
},
{
"epoch": 0.9549738219895288,
"fcm_dpo/beta": 0.005470286589115858,
"fcm_dpo/delta": -0.042232729494571686,
"fcm_dpo/margin": 87.12472534179688,
"fcm_dpo/q_t": 0.40261968970298767,
"grad_norm": 79.4292221069336,
"learning_rate": 3.2374343405217884e-09,
"logits/chosen": -0.7552270889282227,
"logits/rejected": -0.7663687467575073,
"logps/chosen": -502.1330261230469,
"logps/ref_chosen": -322.17193603515625,
"logps/ref_rejected": -294.54461669921875,
"logps/rejected": -561.6304321289062,
"loss": 4.4833,
"margin_dpo/margin_mean": 87.12472534179688,
"margin_dpo/margin_std": 152.3446044921875,
"step": 456
},
{
"epoch": 0.9570680628272251,
"fcm_dpo/beta": 0.005494946148246527,
"fcm_dpo/delta": 0.039688169956207275,
"fcm_dpo/margin": 90.5218505859375,
"fcm_dpo/q_t": 0.38801220059394836,
"grad_norm": 77.78425598144531,
"learning_rate": 2.9503781785795713e-09,
"logits/chosen": -0.831157386302948,
"logits/rejected": -0.8342105150222778,
"logps/chosen": -477.7151794433594,
"logps/ref_chosen": -307.7962341308594,
"logps/ref_rejected": -274.5501403808594,
"logps/rejected": -534.9909057617188,
"loss": 4.3304,
"margin_dpo/margin_mean": 90.5218505859375,
"margin_dpo/margin_std": 134.86016845703125,
"step": 457
},
{
"epoch": 0.9591623036649215,
"fcm_dpo/beta": 0.005554028321057558,
"fcm_dpo/delta": 0.004314765799790621,
"fcm_dpo/margin": 78.83430480957031,
"fcm_dpo/q_t": 0.40472283959388733,
"grad_norm": 69.29220581054688,
"learning_rate": 2.6765705380989432e-09,
"logits/chosen": -0.8536108136177063,
"logits/rejected": -0.8355450630187988,
"logps/chosen": -466.06524658203125,
"logps/ref_chosen": -297.0316467285156,
"logps/ref_rejected": -276.1112365722656,
"logps/rejected": -523.9791259765625,
"loss": 4.4573,
"margin_dpo/margin_mean": 78.83430480957031,
"margin_dpo/margin_std": 125.95325469970703,
"step": 458
},
{
"epoch": 0.9612565445026178,
"fcm_dpo/beta": 0.005654921289533377,
"fcm_dpo/delta": 0.04975789785385132,
"fcm_dpo/margin": 67.66356658935547,
"fcm_dpo/q_t": 0.4175841510295868,
"grad_norm": 79.01788330078125,
"learning_rate": 2.416026102552732e-09,
"logits/chosen": -0.9122970104217529,
"logits/rejected": -0.9040276408195496,
"logps/chosen": -451.9207763671875,
"logps/ref_chosen": -293.5252990722656,
"logps/ref_rejected": -289.30126953125,
"logps/rejected": -515.3603515625,
"loss": 4.6228,
"margin_dpo/margin_mean": 67.66356658935547,
"margin_dpo/margin_std": 121.35086059570312,
"step": 459
},
{
"epoch": 0.9633507853403142,
"fcm_dpo/beta": 0.005886501632630825,
"fcm_dpo/delta": 0.050066620111465454,
"fcm_dpo/margin": 66.52505493164062,
"fcm_dpo/q_t": 0.4135185778141022,
"grad_norm": 120.4760513305664,
"learning_rate": 2.168758844148272e-09,
"logits/chosen": -0.8763177990913391,
"logits/rejected": -0.8776261806488037,
"logps/chosen": -480.5386962890625,
"logps/ref_chosen": -318.7803649902344,
"logps/ref_rejected": -258.7906799316406,
"logps/rejected": -487.07403564453125,
"loss": 4.5718,
"margin_dpo/margin_mean": 66.52505493164062,
"margin_dpo/margin_std": 115.43054962158203,
"step": 460
},
{
"epoch": 0.9654450261780104,
"fcm_dpo/beta": 0.006130027584731579,
"fcm_dpo/delta": 0.02878350019454956,
"fcm_dpo/margin": 76.16785430908203,
"fcm_dpo/q_t": 0.4008653461933136,
"grad_norm": 97.33731079101562,
"learning_rate": 1.9347820230782295e-09,
"logits/chosen": -0.8445146083831787,
"logits/rejected": -0.8692530989646912,
"logps/chosen": -407.8790588378906,
"logps/ref_chosen": -243.9099884033203,
"logps/ref_rejected": -232.6382293701172,
"logps/rejected": -472.7751159667969,
"loss": 4.4989,
"margin_dpo/margin_mean": 76.16785430908203,
"margin_dpo/margin_std": 129.78131103515625,
"step": 461
},
{
"epoch": 0.9675392670157068,
"fcm_dpo/beta": 0.00608763936907053,
"fcm_dpo/delta": -0.02736498787999153,
"fcm_dpo/margin": 90.83987426757812,
"fcm_dpo/q_t": 0.38287225365638733,
"grad_norm": 91.66661834716797,
"learning_rate": 1.7141081868094209e-09,
"logits/chosen": -0.8614755868911743,
"logits/rejected": -0.8124670386314392,
"logps/chosen": -508.40203857421875,
"logps/ref_chosen": -344.09100341796875,
"logps/ref_rejected": -252.45037841796875,
"logps/rejected": -507.601318359375,
"loss": 4.2288,
"margin_dpo/margin_mean": 90.83987426757812,
"margin_dpo/margin_std": 130.77877807617188,
"step": 462
},
{
"epoch": 0.9696335078534032,
"fcm_dpo/beta": 0.005978195928037167,
"fcm_dpo/delta": -0.015503959730267525,
"fcm_dpo/margin": 72.70486450195312,
"fcm_dpo/q_t": 0.40621206164360046,
"grad_norm": 88.91027069091797,
"learning_rate": 1.5067491694100153e-09,
"logits/chosen": -0.870310366153717,
"logits/rejected": -0.8297045230865479,
"logps/chosen": -459.2799072265625,
"logps/ref_chosen": -297.3134460449219,
"logps/ref_rejected": -234.3878936767578,
"logps/rejected": -469.05926513671875,
"loss": 4.5566,
"margin_dpo/margin_mean": 72.70486450195312,
"margin_dpo/margin_std": 128.3613739013672,
"step": 463
},
{
"epoch": 0.9717277486910995,
"fcm_dpo/beta": 0.005981272552162409,
"fcm_dpo/delta": 0.0026608407497406006,
"fcm_dpo/margin": 69.68535614013672,
"fcm_dpo/q_t": 0.4095439314842224,
"grad_norm": 143.6160888671875,
"learning_rate": 1.3127160909147672e-09,
"logits/chosen": -0.8514159917831421,
"logits/rejected": -0.8731627464294434,
"logps/chosen": -441.0743408203125,
"logps/ref_chosen": -265.71075439453125,
"logps/ref_rejected": -256.4108581542969,
"logps/rejected": -501.45977783203125,
"loss": 4.5617,
"margin_dpo/margin_mean": 69.68534851074219,
"margin_dpo/margin_std": 122.15727233886719,
"step": 464
},
{
"epoch": 0.9738219895287958,
"fcm_dpo/beta": 0.00563573744148016,
"fcm_dpo/delta": -0.0926453098654747,
"fcm_dpo/margin": 94.17436218261719,
"fcm_dpo/q_t": 0.3835006058216095,
"grad_norm": 84.5207290649414,
"learning_rate": 1.1320193567288527e-09,
"logits/chosen": -0.9116008877754211,
"logits/rejected": -0.8782393336296082,
"logps/chosen": -449.47894287109375,
"logps/ref_chosen": -293.1527404785156,
"logps/ref_rejected": -293.70947265625,
"logps/rejected": -544.2099609375,
"loss": 4.196,
"margin_dpo/margin_mean": 94.17436218261719,
"margin_dpo/margin_std": 121.221923828125,
"step": 465
},
{
"epoch": 0.9759162303664921,
"fcm_dpo/beta": 0.005492227151989937,
"fcm_dpo/delta": -0.01386126596480608,
"fcm_dpo/margin": 96.84224700927734,
"fcm_dpo/q_t": 0.38248857855796814,
"grad_norm": 73.73484802246094,
"learning_rate": 9.64668657069706e-10,
"logits/chosen": -0.8305215835571289,
"logits/rejected": -0.7785975337028503,
"logps/chosen": -410.4551086425781,
"logps/ref_chosen": -261.4775695800781,
"logps/ref_rejected": -248.36282348632812,
"logps/rejected": -494.18255615234375,
"loss": 4.0916,
"margin_dpo/margin_mean": 96.84223175048828,
"margin_dpo/margin_std": 117.41493225097656,
"step": 466
},
{
"epoch": 0.9780104712041885,
"fcm_dpo/beta": 0.005417585372924805,
"fcm_dpo/delta": -0.010228976607322693,
"fcm_dpo/margin": 71.93285369873047,
"fcm_dpo/q_t": 0.4153047800064087,
"grad_norm": 81.48310852050781,
"learning_rate": 8.106729664475176e-10,
"logits/chosen": -0.8108528852462769,
"logits/rejected": -0.8057974576950073,
"logps/chosen": -431.22412109375,
"logps/ref_chosen": -266.354248046875,
"logps/ref_rejected": -277.76324462890625,
"logps/rejected": -514.5659790039062,
"loss": 4.6387,
"margin_dpo/margin_mean": 71.93285369873047,
"margin_dpo/margin_std": 132.09213256835938,
"step": 467
},
{
"epoch": 0.9801047120418848,
"fcm_dpo/beta": 0.00549811776727438,
"fcm_dpo/delta": 0.021085752174258232,
"fcm_dpo/margin": 67.23999786376953,
"fcm_dpo/q_t": 0.41613680124282837,
"grad_norm": 82.15486145019531,
"learning_rate": 6.700405431837585e-10,
"logits/chosen": -0.9056313037872314,
"logits/rejected": -0.8760198354721069,
"logps/chosen": -480.73162841796875,
"logps/ref_chosen": -317.9631652832031,
"logps/ref_rejected": -261.8744201660156,
"logps/rejected": -491.88287353515625,
"loss": 4.6372,
"margin_dpo/margin_mean": 67.23999786376953,
"margin_dpo/margin_std": 119.53501892089844,
"step": 468
},
{
"epoch": 0.9821989528795811,
"fcm_dpo/beta": 0.005514613352715969,
"fcm_dpo/delta": -0.004768936894834042,
"fcm_dpo/margin": 91.36453247070312,
"fcm_dpo/q_t": 0.3884834349155426,
"grad_norm": 64.80612182617188,
"learning_rate": 5.427789289685347e-10,
"logits/chosen": -0.8382606506347656,
"logits/rejected": -0.8244605660438538,
"logps/chosen": -479.63787841796875,
"logps/ref_chosen": -324.8868103027344,
"logps/ref_rejected": -264.0421447753906,
"logps/rejected": -510.1578063964844,
"loss": 4.2394,
"margin_dpo/margin_mean": 91.36453247070312,
"margin_dpo/margin_std": 125.39714050292969,
"step": 469
},
{
"epoch": 0.9842931937172775,
"fcm_dpo/beta": 0.005484522320330143,
"fcm_dpo/delta": -0.013818852603435516,
"fcm_dpo/margin": 84.76747131347656,
"fcm_dpo/q_t": 0.39746540784835815,
"grad_norm": 83.80744171142578,
"learning_rate": 4.288949484559934e-10,
"logits/chosen": -0.8254011869430542,
"logits/rejected": -0.8231028318405151,
"logps/chosen": -470.1563720703125,
"logps/ref_chosen": -314.7042541503906,
"logps/ref_rejected": -259.2276611328125,
"logps/rejected": -499.447265625,
"loss": 4.3306,
"margin_dpo/margin_mean": 84.76747131347656,
"margin_dpo/margin_std": 120.29107666015625,
"step": 470
},
{
"epoch": 0.9863874345549738,
"fcm_dpo/beta": 0.0054491800256073475,
"fcm_dpo/delta": -0.004639061167836189,
"fcm_dpo/margin": 78.18816375732422,
"fcm_dpo/q_t": 0.41097328066825867,
"grad_norm": 89.04488372802734,
"learning_rate": 3.2839470889836627e-10,
"logits/chosen": -0.8858464360237122,
"logits/rejected": -0.8748361468315125,
"logps/chosen": -462.7659912109375,
"logps/ref_chosen": -292.801513671875,
"logps/ref_rejected": -298.979248046875,
"logps/rejected": -547.1318359375,
"loss": 4.5156,
"margin_dpo/margin_mean": 78.18816375732422,
"margin_dpo/margin_std": 133.43814086914062,
"step": 471
},
{
"epoch": 0.9884816753926702,
"fcm_dpo/beta": 0.005437402054667473,
"fcm_dpo/delta": 0.0063332486897706985,
"fcm_dpo/margin": 88.26492309570312,
"fcm_dpo/q_t": 0.3961615264415741,
"grad_norm": 73.95541381835938,
"learning_rate": 2.412835998185092e-10,
"logits/chosen": -0.8828275203704834,
"logits/rejected": -0.8989793658256531,
"logps/chosen": -395.50311279296875,
"logps/ref_chosen": -243.37380981445312,
"logps/ref_rejected": -251.12109375,
"logps/rejected": -491.51531982421875,
"loss": 4.277,
"margin_dpo/margin_mean": 88.26492309570312,
"margin_dpo/margin_std": 122.50725555419922,
"step": 472
},
{
"epoch": 0.9905759162303664,
"fcm_dpo/beta": 0.005580813158303499,
"fcm_dpo/delta": 0.042260996997356415,
"fcm_dpo/margin": 91.16032409667969,
"fcm_dpo/q_t": 0.3865862488746643,
"grad_norm": 75.50499725341797,
"learning_rate": 1.6756629272085544e-10,
"logits/chosen": -0.8339266180992126,
"logits/rejected": -0.8364192247390747,
"logps/chosen": -444.529052734375,
"logps/ref_chosen": -286.3286437988281,
"logps/ref_rejected": -258.6535339355469,
"logps/rejected": -508.0143127441406,
"loss": 4.1634,
"margin_dpo/margin_mean": 91.16032409667969,
"margin_dpo/margin_std": 114.12368774414062,
"step": 473
},
{
"epoch": 0.9926701570680628,
"fcm_dpo/beta": 0.005527782253921032,
"fcm_dpo/delta": -0.028659962117671967,
"fcm_dpo/margin": 75.26751708984375,
"fcm_dpo/q_t": 0.40586668252944946,
"grad_norm": 91.60271453857422,
"learning_rate": 1.072467408408384e-10,
"logits/chosen": -0.8723991513252258,
"logits/rejected": -0.8708733916282654,
"logps/chosen": -451.0887756347656,
"logps/ref_chosen": -288.08966064453125,
"logps/ref_rejected": -266.69696044921875,
"logps/rejected": -504.9636535644531,
"loss": 4.4347,
"margin_dpo/margin_mean": 75.26751708984375,
"margin_dpo/margin_std": 108.9689712524414,
"step": 474
},
{
"epoch": 0.9947643979057592,
"fcm_dpo/beta": 0.0054468982852995396,
"fcm_dpo/delta": -0.02301694266498089,
"fcm_dpo/margin": 70.21611022949219,
"fcm_dpo/q_t": 0.41601884365081787,
"grad_norm": 91.83534240722656,
"learning_rate": 6.032817893297793e-11,
"logits/chosen": -0.8479362726211548,
"logits/rejected": -0.8634592890739441,
"logps/chosen": -409.83917236328125,
"logps/ref_chosen": -256.0030517578125,
"logps/ref_rejected": -244.50660705566406,
"logps/rejected": -468.5588073730469,
"loss": 4.5513,
"margin_dpo/margin_mean": 70.21611022949219,
"margin_dpo/margin_std": 114.1260986328125,
"step": 475
},
{
"epoch": 0.9968586387434555,
"fcm_dpo/beta": 0.005282857920974493,
"fcm_dpo/delta": -0.0387037992477417,
"fcm_dpo/margin": 80.51129913330078,
"fcm_dpo/q_t": 0.40867236256599426,
"grad_norm": 65.54981994628906,
"learning_rate": 2.6813123097352287e-11,
"logits/chosen": -0.9229732751846313,
"logits/rejected": -0.8801470398902893,
"logps/chosen": -470.89788818359375,
"logps/ref_chosen": -321.467529296875,
"logps/ref_rejected": -295.0592956542969,
"logps/rejected": -525.0009765625,
"loss": 4.4865,
"margin_dpo/margin_mean": 80.51129150390625,
"margin_dpo/margin_std": 127.75419616699219,
"step": 476
},
{
"epoch": 0.9989528795811519,
"fcm_dpo/beta": 0.005212708842009306,
"fcm_dpo/delta": -0.02850748598575592,
"fcm_dpo/margin": 78.61067199707031,
"fcm_dpo/q_t": 0.4106035530567169,
"grad_norm": 77.4634780883789,
"learning_rate": 6.7033706447061635e-12,
"logits/chosen": -0.8011319041252136,
"logits/rejected": -0.8102338314056396,
"logps/chosen": -445.9722900390625,
"logps/ref_chosen": -277.4477233886719,
"logps/ref_rejected": -244.70004272460938,
"logps/rejected": -491.83526611328125,
"loss": 4.6089,
"margin_dpo/margin_mean": 78.61067199707031,
"margin_dpo/margin_std": 140.86875915527344,
"step": 477
},
{
"epoch": 0.9989528795811519,
"step": 477,
"total_flos": 0.0,
"train_loss": 4.696330676288725,
"train_runtime": 6071.6812,
"train_samples_per_second": 10.069,
"train_steps_per_second": 0.079
}
],
"logging_steps": 1,
"max_steps": 477,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}