15707 lines
589 KiB
JSON
15707 lines
589 KiB
JSON
{
|
|
"best_global_step": null,
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 1.0,
|
|
"eval_steps": 200,
|
|
"global_step": 681,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"KL/chosen_KL_mean": 0.00527191162109375,
|
|
"KL/mean": 0.016706019639968872,
|
|
"KL/rejected_KL_mean": 0.028141021728515625,
|
|
"KL/std": 0.272699236869812,
|
|
"epoch": 0.0014684287812041115,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.02287006378173828,
|
|
"fcm_dpo/q_t": 0.5000571608543396,
|
|
"grad_norm": 8.340126991271973,
|
|
"learning_rate": 0.0,
|
|
"logits/chosen": -0.4974287748336792,
|
|
"logits/rejected": -0.43299180269241333,
|
|
"logps/chosen": -50.1435661315918,
|
|
"logps/ref_chosen": -50.14883804321289,
|
|
"logps/ref_rejected": -74.1280517578125,
|
|
"logps/rejected": -74.09991455078125,
|
|
"loss": 1.3865,
|
|
"margin_dpo/margin_mean": -0.02287048101425171,
|
|
"margin_dpo/margin_std": 0.41920793056488037,
|
|
"step": 1
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.03498649597167969,
|
|
"KL/mean": -0.00212840735912323,
|
|
"KL/rejected_KL_mean": 0.030735015869140625,
|
|
"KL/std": 0.24797174334526062,
|
|
"epoch": 0.002936857562408223,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.06572261452674866,
|
|
"fcm_dpo/q_t": 0.500164270401001,
|
|
"grad_norm": 7.205794811248779,
|
|
"learning_rate": 7.246376811594203e-09,
|
|
"logits/chosen": -0.49536412954330444,
|
|
"logits/rejected": -0.4594460427761078,
|
|
"logps/chosen": -52.65568923950195,
|
|
"logps/ref_chosen": -52.620704650878906,
|
|
"logps/ref_rejected": -75.30413818359375,
|
|
"logps/rejected": -75.27340698242188,
|
|
"loss": 1.387,
|
|
"margin_dpo/margin_mean": -0.06572240591049194,
|
|
"margin_dpo/margin_std": 0.35048407316207886,
|
|
"step": 2
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.028177261352539062,
|
|
"KL/mean": 0.011634737253189087,
|
|
"KL/rejected_KL_mean": -0.00490570068359375,
|
|
"KL/std": 0.2545679211616516,
|
|
"epoch": 0.004405286343612335,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.03308027982711792,
|
|
"fcm_dpo/q_t": 0.49991729855537415,
|
|
"grad_norm": 7.091545581817627,
|
|
"learning_rate": 1.4492753623188406e-08,
|
|
"logits/chosen": -0.4817052185535431,
|
|
"logits/rejected": -0.44228988885879517,
|
|
"logps/chosen": -60.95341873168945,
|
|
"logps/ref_chosen": -60.981597900390625,
|
|
"logps/ref_rejected": -68.67259216308594,
|
|
"logps/rejected": -68.67750549316406,
|
|
"loss": 1.386,
|
|
"margin_dpo/margin_mean": 0.03308090567588806,
|
|
"margin_dpo/margin_std": 0.3488999903202057,
|
|
"step": 3
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.006183624267578125,
|
|
"KL/mean": 0.019635915756225586,
|
|
"KL/rejected_KL_mean": 0.03308868408203125,
|
|
"KL/std": 0.28558221459388733,
|
|
"epoch": 0.005873715124816446,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.026903212070465088,
|
|
"fcm_dpo/q_t": 0.5000672340393066,
|
|
"grad_norm": 7.214421272277832,
|
|
"learning_rate": 2.1739130434782606e-08,
|
|
"logits/chosen": -0.468106746673584,
|
|
"logits/rejected": -0.44051337242126465,
|
|
"logps/chosen": -56.76152801513672,
|
|
"logps/ref_chosen": -56.7677116394043,
|
|
"logps/ref_rejected": -86.64710998535156,
|
|
"logps/rejected": -86.61402130126953,
|
|
"loss": 1.3866,
|
|
"margin_dpo/margin_mean": -0.026903808116912842,
|
|
"margin_dpo/margin_std": 0.39421218633651733,
|
|
"step": 4
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.01943206787109375,
|
|
"KL/mean": 0.002883225679397583,
|
|
"KL/rejected_KL_mean": -0.013660430908203125,
|
|
"KL/std": 0.2767731547355652,
|
|
"epoch": 0.007342143906020558,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.03309446573257446,
|
|
"fcm_dpo/q_t": 0.49991726875305176,
|
|
"grad_norm": 8.964797019958496,
|
|
"learning_rate": 2.898550724637681e-08,
|
|
"logits/chosen": -0.5146475434303284,
|
|
"logits/rejected": -0.47093117237091064,
|
|
"logps/chosen": -53.839942932128906,
|
|
"logps/ref_chosen": -53.859375,
|
|
"logps/ref_rejected": -84.14918518066406,
|
|
"logps/rejected": -84.162841796875,
|
|
"loss": 1.386,
|
|
"margin_dpo/margin_mean": 0.033094823360443115,
|
|
"margin_dpo/margin_std": 0.38494962453842163,
|
|
"step": 5
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.009487152099609375,
|
|
"KL/mean": -0.01324455440044403,
|
|
"KL/rejected_KL_mean": -0.01700592041015625,
|
|
"KL/std": 0.27032917737960815,
|
|
"epoch": 0.00881057268722467,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.007514864206314087,
|
|
"fcm_dpo/q_t": 0.49998119473457336,
|
|
"grad_norm": 9.190613746643066,
|
|
"learning_rate": 3.6231884057971014e-08,
|
|
"logits/chosen": -0.5035334825515747,
|
|
"logits/rejected": -0.46098393201828003,
|
|
"logps/chosen": -63.016971588134766,
|
|
"logps/ref_chosen": -63.007484436035156,
|
|
"logps/ref_rejected": -92.64534759521484,
|
|
"logps/rejected": -92.662353515625,
|
|
"loss": 1.3862,
|
|
"margin_dpo/margin_mean": 0.007514625787734985,
|
|
"margin_dpo/margin_std": 0.3818962574005127,
|
|
"step": 6
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.06007957458496094,
|
|
"KL/mean": 0.0322260856628418,
|
|
"KL/rejected_KL_mean": 0.004375457763671875,
|
|
"KL/std": 0.2890022397041321,
|
|
"epoch": 0.010279001468428781,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.05570727586746216,
|
|
"fcm_dpo/q_t": 0.4998607337474823,
|
|
"grad_norm": 8.227945327758789,
|
|
"learning_rate": 4.347826086956521e-08,
|
|
"logits/chosen": -0.5052176713943481,
|
|
"logits/rejected": -0.47141021490097046,
|
|
"logps/chosen": -57.71474075317383,
|
|
"logps/ref_chosen": -57.774818420410156,
|
|
"logps/ref_rejected": -103.92059326171875,
|
|
"logps/rejected": -103.91621398925781,
|
|
"loss": 1.3857,
|
|
"margin_dpo/margin_mean": 0.055707335472106934,
|
|
"margin_dpo/margin_std": 0.38999414443969727,
|
|
"step": 7
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.039844512939453125,
|
|
"KL/mean": 0.04513771831989288,
|
|
"KL/rejected_KL_mean": 0.050434112548828125,
|
|
"KL/std": 0.3095516264438629,
|
|
"epoch": 0.011747430249632892,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.010594159364700317,
|
|
"fcm_dpo/q_t": 0.5000264644622803,
|
|
"grad_norm": 7.855659008026123,
|
|
"learning_rate": 5.0724637681159424e-08,
|
|
"logits/chosen": -0.5170360803604126,
|
|
"logits/rejected": -0.492270290851593,
|
|
"logps/chosen": -58.67619323730469,
|
|
"logps/ref_chosen": -58.716033935546875,
|
|
"logps/ref_rejected": -79.3114242553711,
|
|
"logps/rejected": -79.260986328125,
|
|
"loss": 1.3864,
|
|
"margin_dpo/margin_mean": -0.010594218969345093,
|
|
"margin_dpo/margin_std": 0.42736732959747314,
|
|
"step": 8
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.0255889892578125,
|
|
"KL/mean": 0.0015124678611755371,
|
|
"KL/rejected_KL_mean": -0.0225677490234375,
|
|
"KL/std": 0.2851980924606323,
|
|
"epoch": 0.013215859030837005,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.048153460025787354,
|
|
"fcm_dpo/q_t": 0.4998795986175537,
|
|
"grad_norm": 8.50635814666748,
|
|
"learning_rate": 5.797101449275362e-08,
|
|
"logits/chosen": -0.4870206117630005,
|
|
"logits/rejected": -0.4398488402366638,
|
|
"logps/chosen": -69.84125518798828,
|
|
"logps/ref_chosen": -69.8668441772461,
|
|
"logps/ref_rejected": -99.6026611328125,
|
|
"logps/rejected": -99.62522888183594,
|
|
"loss": 1.3858,
|
|
"margin_dpo/margin_mean": 0.04815271496772766,
|
|
"margin_dpo/margin_std": 0.38030916452407837,
|
|
"step": 9
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.014789581298828125,
|
|
"KL/mean": -0.01405847817659378,
|
|
"KL/rejected_KL_mean": -0.0133209228515625,
|
|
"KL/std": 0.2681947946548462,
|
|
"epoch": 0.014684287812041116,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.0014634579420089722,
|
|
"fcm_dpo/q_t": 0.5000036358833313,
|
|
"grad_norm": 7.091888427734375,
|
|
"learning_rate": 6.521739130434782e-08,
|
|
"logits/chosen": -0.4998844861984253,
|
|
"logits/rejected": -0.45695722103118896,
|
|
"logps/chosen": -48.372474670410156,
|
|
"logps/ref_chosen": -48.35768508911133,
|
|
"logps/ref_rejected": -80.37206268310547,
|
|
"logps/rejected": -80.38538360595703,
|
|
"loss": 1.3863,
|
|
"margin_dpo/margin_mean": -0.001463174819946289,
|
|
"margin_dpo/margin_std": 0.3855435252189636,
|
|
"step": 10
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.007877349853515625,
|
|
"KL/mean": -0.010366648435592651,
|
|
"KL/rejected_KL_mean": -0.01285552978515625,
|
|
"KL/std": 0.2346026599407196,
|
|
"epoch": 0.016152716593245228,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.004973113536834717,
|
|
"fcm_dpo/q_t": 0.49998754262924194,
|
|
"grad_norm": 6.8613715171813965,
|
|
"learning_rate": 7.246376811594203e-08,
|
|
"logits/chosen": -0.46066391468048096,
|
|
"logits/rejected": -0.4356629252433777,
|
|
"logps/chosen": -53.02473449707031,
|
|
"logps/ref_chosen": -53.01685333251953,
|
|
"logps/ref_rejected": -87.78038024902344,
|
|
"logps/rejected": -87.79322814941406,
|
|
"loss": 1.3862,
|
|
"margin_dpo/margin_mean": 0.0049735307693481445,
|
|
"margin_dpo/margin_std": 0.2909265458583832,
|
|
"step": 11
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.006072998046875,
|
|
"KL/mean": -0.024121850728988647,
|
|
"KL/rejected_KL_mean": -0.054309844970703125,
|
|
"KL/std": 0.3304472863674164,
|
|
"epoch": 0.01762114537444934,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.06038558483123779,
|
|
"fcm_dpo/q_t": 0.49984902143478394,
|
|
"grad_norm": 9.009154319763184,
|
|
"learning_rate": 7.971014492753623e-08,
|
|
"logits/chosen": -0.5414502620697021,
|
|
"logits/rejected": -0.5054250359535217,
|
|
"logps/chosen": -61.79936218261719,
|
|
"logps/ref_chosen": -61.80543518066406,
|
|
"logps/ref_rejected": -104.8582763671875,
|
|
"logps/rejected": -104.91258239746094,
|
|
"loss": 1.3857,
|
|
"margin_dpo/margin_mean": 0.0603850781917572,
|
|
"margin_dpo/margin_std": 0.43303191661834717,
|
|
"step": 12
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.0059814453125,
|
|
"KL/mean": 0.00032275915145874023,
|
|
"KL/rejected_KL_mean": 0.00662994384765625,
|
|
"KL/std": 0.2694360017776489,
|
|
"epoch": 0.01908957415565345,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.012606263160705566,
|
|
"fcm_dpo/q_t": 0.5000314712524414,
|
|
"grad_norm": 7.9163641929626465,
|
|
"learning_rate": 8.695652173913042e-08,
|
|
"logits/chosen": -0.49102455377578735,
|
|
"logits/rejected": -0.46374207735061646,
|
|
"logps/chosen": -64.2663345336914,
|
|
"logps/ref_chosen": -64.2603530883789,
|
|
"logps/ref_rejected": -87.20307922363281,
|
|
"logps/rejected": -87.19645690917969,
|
|
"loss": 1.3864,
|
|
"margin_dpo/margin_mean": -0.012606710195541382,
|
|
"margin_dpo/margin_std": 0.3794170618057251,
|
|
"step": 13
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.03643989562988281,
|
|
"KL/mean": -0.018927976489067078,
|
|
"KL/rejected_KL_mean": -0.0014190673828125,
|
|
"KL/std": 0.2660324275493622,
|
|
"epoch": 0.020558002936857563,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.03501877188682556,
|
|
"fcm_dpo/q_t": 0.5000874996185303,
|
|
"grad_norm": 8.576128005981445,
|
|
"learning_rate": 9.420289855072464e-08,
|
|
"logits/chosen": -0.49155694246292114,
|
|
"logits/rejected": -0.4527207314968109,
|
|
"logps/chosen": -58.14665222167969,
|
|
"logps/ref_chosen": -58.11021041870117,
|
|
"logps/ref_rejected": -104.04708099365234,
|
|
"logps/rejected": -104.04850006103516,
|
|
"loss": 1.3866,
|
|
"margin_dpo/margin_mean": -0.03501787781715393,
|
|
"margin_dpo/margin_std": 0.4262927174568176,
|
|
"step": 14
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.028324127197265625,
|
|
"KL/mean": -0.051674991846084595,
|
|
"KL/rejected_KL_mean": -0.07502365112304688,
|
|
"KL/std": 0.23149462044239044,
|
|
"epoch": 0.022026431718061675,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.04670119285583496,
|
|
"fcm_dpo/q_t": 0.499883234500885,
|
|
"grad_norm": 6.426931858062744,
|
|
"learning_rate": 1.0144927536231885e-07,
|
|
"logits/chosen": -0.5326635837554932,
|
|
"logits/rejected": -0.5161415338516235,
|
|
"logps/chosen": -56.99523162841797,
|
|
"logps/ref_chosen": -56.96691131591797,
|
|
"logps/ref_rejected": -80.80863952636719,
|
|
"logps/rejected": -80.8836669921875,
|
|
"loss": 1.3858,
|
|
"margin_dpo/margin_mean": 0.04670119285583496,
|
|
"margin_dpo/margin_std": 0.32319122552871704,
|
|
"step": 15
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.04820060729980469,
|
|
"KL/mean": -0.04530364274978638,
|
|
"KL/rejected_KL_mean": -0.0424041748046875,
|
|
"KL/std": 0.29397979378700256,
|
|
"epoch": 0.023494860499265784,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.005795121192932129,
|
|
"fcm_dpo/q_t": 0.5000145435333252,
|
|
"grad_norm": 8.416418075561523,
|
|
"learning_rate": 1.0869565217391303e-07,
|
|
"logits/chosen": -0.5461217164993286,
|
|
"logits/rejected": -0.5072727203369141,
|
|
"logps/chosen": -61.78809356689453,
|
|
"logps/ref_chosen": -61.739891052246094,
|
|
"logps/ref_rejected": -84.36947631835938,
|
|
"logps/rejected": -84.41188049316406,
|
|
"loss": 1.3864,
|
|
"margin_dpo/margin_mean": -0.005795121192932129,
|
|
"margin_dpo/margin_std": 0.3890739381313324,
|
|
"step": 16
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.046901702880859375,
|
|
"KL/mean": -0.010359078645706177,
|
|
"KL/rejected_KL_mean": -0.06762313842773438,
|
|
"KL/std": 0.27913013100624084,
|
|
"epoch": 0.024963289280469897,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.11452382802963257,
|
|
"fcm_dpo/q_t": 0.4997136890888214,
|
|
"grad_norm": 7.9028801918029785,
|
|
"learning_rate": 1.1594202898550725e-07,
|
|
"logits/chosen": -0.5041570067405701,
|
|
"logits/rejected": -0.4673753082752228,
|
|
"logps/chosen": -67.66343688964844,
|
|
"logps/ref_chosen": -67.71033477783203,
|
|
"logps/ref_rejected": -85.37865447998047,
|
|
"logps/rejected": -85.44627380371094,
|
|
"loss": 1.3852,
|
|
"margin_dpo/margin_mean": 0.11452355980873108,
|
|
"margin_dpo/margin_std": 0.37491074204444885,
|
|
"step": 17
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.0292816162109375,
|
|
"KL/mean": -0.03873269259929657,
|
|
"KL/rejected_KL_mean": -0.048187255859375,
|
|
"KL/std": 0.23902130126953125,
|
|
"epoch": 0.02643171806167401,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.018904417753219604,
|
|
"fcm_dpo/q_t": 0.4999527335166931,
|
|
"grad_norm": 8.233268737792969,
|
|
"learning_rate": 1.2318840579710146e-07,
|
|
"logits/chosen": -0.5057047009468079,
|
|
"logits/rejected": -0.45175978541374207,
|
|
"logps/chosen": -47.76877212524414,
|
|
"logps/ref_chosen": -47.7394905090332,
|
|
"logps/ref_rejected": -75.4722900390625,
|
|
"logps/rejected": -75.52047729492188,
|
|
"loss": 1.3861,
|
|
"margin_dpo/margin_mean": 0.01890420913696289,
|
|
"margin_dpo/margin_std": 0.31775712966918945,
|
|
"step": 18
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.03473663330078125,
|
|
"KL/mean": -0.004338964819908142,
|
|
"KL/rejected_KL_mean": -0.0434112548828125,
|
|
"KL/std": 0.24089065194129944,
|
|
"epoch": 0.027900146842878122,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.07813850045204163,
|
|
"fcm_dpo/q_t": 0.49980464577674866,
|
|
"grad_norm": 7.455746173858643,
|
|
"learning_rate": 1.3043478260869563e-07,
|
|
"logits/chosen": -0.5036299228668213,
|
|
"logits/rejected": -0.45466092228889465,
|
|
"logps/chosen": -70.17062377929688,
|
|
"logps/ref_chosen": -70.20536041259766,
|
|
"logps/ref_rejected": -89.7575912475586,
|
|
"logps/rejected": -89.8010025024414,
|
|
"loss": 1.3855,
|
|
"margin_dpo/margin_mean": 0.07813867926597595,
|
|
"margin_dpo/margin_std": 0.3746962547302246,
|
|
"step": 19
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.049213409423828125,
|
|
"KL/mean": -0.0454762727022171,
|
|
"KL/rejected_KL_mean": -0.041736602783203125,
|
|
"KL/std": 0.29056376218795776,
|
|
"epoch": 0.02936857562408223,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": -0.007474333047866821,
|
|
"fcm_dpo/q_t": 0.5000186562538147,
|
|
"grad_norm": 7.4274702072143555,
|
|
"learning_rate": 1.3768115942028986e-07,
|
|
"logits/chosen": -0.5517487525939941,
|
|
"logits/rejected": -0.49535927176475525,
|
|
"logps/chosen": -50.852455139160156,
|
|
"logps/ref_chosen": -50.80324172973633,
|
|
"logps/ref_rejected": -78.82334899902344,
|
|
"logps/rejected": -78.86508178710938,
|
|
"loss": 1.3864,
|
|
"margin_dpo/margin_mean": -0.007474362850189209,
|
|
"margin_dpo/margin_std": 0.37508344650268555,
|
|
"step": 20
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.0056247711181640625,
|
|
"KL/mean": -0.04150792211294174,
|
|
"KL/rejected_KL_mean": -0.077392578125,
|
|
"KL/std": 0.30757784843444824,
|
|
"epoch": 0.030837004405286344,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.07176293432712555,
|
|
"fcm_dpo/q_t": 0.4998205900192261,
|
|
"grad_norm": 7.820558547973633,
|
|
"learning_rate": 1.4492753623188405e-07,
|
|
"logits/chosen": -0.5255781412124634,
|
|
"logits/rejected": -0.5039485096931458,
|
|
"logps/chosen": -50.068641662597656,
|
|
"logps/ref_chosen": -50.063018798828125,
|
|
"logps/ref_rejected": -77.86878967285156,
|
|
"logps/rejected": -77.94618225097656,
|
|
"loss": 1.3856,
|
|
"margin_dpo/margin_mean": 0.07176269590854645,
|
|
"margin_dpo/margin_std": 0.43745559453964233,
|
|
"step": 21
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.027448654174804688,
|
|
"KL/mean": -0.0883231908082962,
|
|
"KL/rejected_KL_mean": -0.14919662475585938,
|
|
"KL/std": 0.2602458596229553,
|
|
"epoch": 0.032305433186490456,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.12175038456916809,
|
|
"fcm_dpo/q_t": 0.49969562888145447,
|
|
"grad_norm": 8.56733512878418,
|
|
"learning_rate": 1.5217391304347825e-07,
|
|
"logits/chosen": -0.4898416996002197,
|
|
"logits/rejected": -0.44627994298934937,
|
|
"logps/chosen": -59.0850830078125,
|
|
"logps/ref_chosen": -59.05763626098633,
|
|
"logps/ref_rejected": -97.50466918945312,
|
|
"logps/rejected": -97.65386962890625,
|
|
"loss": 1.3851,
|
|
"margin_dpo/margin_mean": 0.12175118923187256,
|
|
"margin_dpo/margin_std": 0.3743841052055359,
|
|
"step": 22
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.06468772888183594,
|
|
"KL/mean": -0.022989824414253235,
|
|
"KL/rejected_KL_mean": -0.11066055297851562,
|
|
"KL/std": 0.3429142236709595,
|
|
"epoch": 0.033773861967694566,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.17535313963890076,
|
|
"fcm_dpo/q_t": 0.499561607837677,
|
|
"grad_norm": 8.251264572143555,
|
|
"learning_rate": 1.5942028985507245e-07,
|
|
"logits/chosen": -0.4786554276943207,
|
|
"logits/rejected": -0.4556560516357422,
|
|
"logps/chosen": -60.01300811767578,
|
|
"logps/ref_chosen": -60.07769775390625,
|
|
"logps/ref_rejected": -81.13955688476562,
|
|
"logps/rejected": -81.25021362304688,
|
|
"loss": 1.3845,
|
|
"margin_dpo/margin_mean": 0.17535346746444702,
|
|
"margin_dpo/margin_std": 0.47338640689849854,
|
|
"step": 23
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.06081390380859375,
|
|
"KL/mean": -0.07897857576608658,
|
|
"KL/rejected_KL_mean": -0.21877288818359375,
|
|
"KL/std": 0.30757251381874084,
|
|
"epoch": 0.03524229074889868,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.2795853614807129,
|
|
"fcm_dpo/q_t": 0.499301016330719,
|
|
"grad_norm": 8.768179893493652,
|
|
"learning_rate": 1.6666666666666665e-07,
|
|
"logits/chosen": -0.51224684715271,
|
|
"logits/rejected": -0.4956665635108948,
|
|
"logps/chosen": -44.230220794677734,
|
|
"logps/ref_chosen": -44.29103469848633,
|
|
"logps/ref_rejected": -99.12521362304688,
|
|
"logps/rejected": -99.34398651123047,
|
|
"loss": 1.3835,
|
|
"margin_dpo/margin_mean": 0.2795855402946472,
|
|
"margin_dpo/margin_std": 0.3744848668575287,
|
|
"step": 24
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.04968070983886719,
|
|
"KL/mean": -0.09518682956695557,
|
|
"KL/rejected_KL_mean": -0.24005126953125,
|
|
"KL/std": 0.369601845741272,
|
|
"epoch": 0.03671071953010279,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.2897287607192993,
|
|
"fcm_dpo/q_t": 0.49927568435668945,
|
|
"grad_norm": 7.525589942932129,
|
|
"learning_rate": 1.7391304347826085e-07,
|
|
"logits/chosen": -0.4925091564655304,
|
|
"logits/rejected": -0.4624241888523102,
|
|
"logps/chosen": -52.48737335205078,
|
|
"logps/ref_chosen": -52.537052154541016,
|
|
"logps/ref_rejected": -89.34219360351562,
|
|
"logps/rejected": -89.58224487304688,
|
|
"loss": 1.3834,
|
|
"margin_dpo/margin_mean": 0.28972867131233215,
|
|
"margin_dpo/margin_std": 0.42933177947998047,
|
|
"step": 25
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.09215354919433594,
|
|
"KL/mean": -0.1101590245962143,
|
|
"KL/rejected_KL_mean": -0.3124732971191406,
|
|
"KL/std": 0.4453110992908478,
|
|
"epoch": 0.0381791483113069,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.4046301543712616,
|
|
"fcm_dpo/q_t": 0.49898844957351685,
|
|
"grad_norm": 9.038716316223145,
|
|
"learning_rate": 1.8115942028985507e-07,
|
|
"logits/chosen": -0.5335673689842224,
|
|
"logits/rejected": -0.5019059777259827,
|
|
"logps/chosen": -53.83065414428711,
|
|
"logps/ref_chosen": -53.92280578613281,
|
|
"logps/ref_rejected": -103.35971069335938,
|
|
"logps/rejected": -103.67218780517578,
|
|
"loss": 1.3823,
|
|
"margin_dpo/margin_mean": 0.40463075041770935,
|
|
"margin_dpo/margin_std": 0.5417345762252808,
|
|
"step": 26
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.1369171142578125,
|
|
"KL/mean": -0.1361747682094574,
|
|
"KL/rejected_KL_mean": -0.4092674255371094,
|
|
"KL/std": 0.48576533794403076,
|
|
"epoch": 0.039647577092511016,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.5461834073066711,
|
|
"fcm_dpo/q_t": 0.49863457679748535,
|
|
"grad_norm": 9.623809814453125,
|
|
"learning_rate": 1.8840579710144927e-07,
|
|
"logits/chosen": -0.5602696537971497,
|
|
"logits/rejected": -0.5244206190109253,
|
|
"logps/chosen": -42.76161193847656,
|
|
"logps/ref_chosen": -42.898529052734375,
|
|
"logps/ref_rejected": -98.72419738769531,
|
|
"logps/rejected": -99.13346862792969,
|
|
"loss": 1.3809,
|
|
"margin_dpo/margin_mean": 0.5461829900741577,
|
|
"margin_dpo/margin_std": 0.6316946744918823,
|
|
"step": 27
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.014486312866210938,
|
|
"KL/mean": -0.14598755538463593,
|
|
"KL/rejected_KL_mean": -0.3064613342285156,
|
|
"KL/std": 0.3845537304878235,
|
|
"epoch": 0.041116005873715125,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.32094791531562805,
|
|
"fcm_dpo/q_t": 0.4991976320743561,
|
|
"grad_norm": 7.836782932281494,
|
|
"learning_rate": 1.9565217391304347e-07,
|
|
"logits/chosen": -0.502698540687561,
|
|
"logits/rejected": -0.4471771717071533,
|
|
"logps/chosen": -60.542015075683594,
|
|
"logps/ref_chosen": -60.55650329589844,
|
|
"logps/ref_rejected": -91.40111541748047,
|
|
"logps/rejected": -91.70758056640625,
|
|
"loss": 1.3831,
|
|
"margin_dpo/margin_mean": 0.3209477663040161,
|
|
"margin_dpo/margin_std": 0.5258319973945618,
|
|
"step": 28
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.07568168640136719,
|
|
"KL/mean": -0.15161648392677307,
|
|
"KL/rejected_KL_mean": -0.37891387939453125,
|
|
"KL/std": 0.46041831374168396,
|
|
"epoch": 0.042584434654919234,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.4545966386795044,
|
|
"fcm_dpo/q_t": 0.4988635182380676,
|
|
"grad_norm": 9.418075561523438,
|
|
"learning_rate": 2.028985507246377e-07,
|
|
"logits/chosen": -0.5607113242149353,
|
|
"logits/rejected": -0.5150310397148132,
|
|
"logps/chosen": -57.73210144042969,
|
|
"logps/ref_chosen": -57.80778503417969,
|
|
"logps/ref_rejected": -97.39434814453125,
|
|
"logps/rejected": -97.77326965332031,
|
|
"loss": 1.3818,
|
|
"margin_dpo/margin_mean": 0.4545968770980835,
|
|
"margin_dpo/margin_std": 0.5804776549339294,
|
|
"step": 29
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.11934471130371094,
|
|
"KL/mean": -0.19414639472961426,
|
|
"KL/rejected_KL_mean": -0.5076408386230469,
|
|
"KL/std": 0.5560356974601746,
|
|
"epoch": 0.04405286343612335,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.6269863843917847,
|
|
"fcm_dpo/q_t": 0.49843254685401917,
|
|
"grad_norm": 9.180720329284668,
|
|
"learning_rate": 2.1014492753623187e-07,
|
|
"logits/chosen": -0.46149182319641113,
|
|
"logits/rejected": -0.42938873171806335,
|
|
"logps/chosen": -52.45802688598633,
|
|
"logps/ref_chosen": -52.577369689941406,
|
|
"logps/ref_rejected": -98.48920440673828,
|
|
"logps/rejected": -98.99684143066406,
|
|
"loss": 1.38,
|
|
"margin_dpo/margin_mean": 0.6269862651824951,
|
|
"margin_dpo/margin_std": 0.565685510635376,
|
|
"step": 30
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.07823753356933594,
|
|
"KL/mean": -0.14964136481285095,
|
|
"KL/rejected_KL_mean": -0.3775215148925781,
|
|
"KL/std": 0.5351479649543762,
|
|
"epoch": 0.04552129221732746,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.4557562470436096,
|
|
"fcm_dpo/q_t": 0.498860627412796,
|
|
"grad_norm": 7.051517009735107,
|
|
"learning_rate": 2.1739130434782607e-07,
|
|
"logits/chosen": -0.4864119291305542,
|
|
"logits/rejected": -0.43947017192840576,
|
|
"logps/chosen": -63.72868347167969,
|
|
"logps/ref_chosen": -63.806922912597656,
|
|
"logps/ref_rejected": -72.89400482177734,
|
|
"logps/rejected": -73.27153015136719,
|
|
"loss": 1.3818,
|
|
"margin_dpo/margin_mean": 0.455756276845932,
|
|
"margin_dpo/margin_std": 0.6158726215362549,
|
|
"step": 31
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.21595001220703125,
|
|
"KL/mean": -0.17387576401233673,
|
|
"KL/rejected_KL_mean": -0.5637054443359375,
|
|
"KL/std": 0.7160457968711853,
|
|
"epoch": 0.04698972099853157,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.7796535491943359,
|
|
"fcm_dpo/q_t": 0.4980509281158447,
|
|
"grad_norm": 8.872539520263672,
|
|
"learning_rate": 2.2463768115942027e-07,
|
|
"logits/chosen": -0.5233839750289917,
|
|
"logits/rejected": -0.48274725675582886,
|
|
"logps/chosen": -62.52357482910156,
|
|
"logps/ref_chosen": -62.739524841308594,
|
|
"logps/ref_rejected": -89.3175048828125,
|
|
"logps/rejected": -89.88121032714844,
|
|
"loss": 1.3785,
|
|
"margin_dpo/margin_mean": 0.7796535491943359,
|
|
"margin_dpo/margin_std": 0.9119139909744263,
|
|
"step": 32
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.10995101928710938,
|
|
"KL/mean": -0.19211336970329285,
|
|
"KL/rejected_KL_mean": -0.49417877197265625,
|
|
"KL/std": 0.5824633836746216,
|
|
"epoch": 0.048458149779735685,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.6041242480278015,
|
|
"fcm_dpo/q_t": 0.49848970770835876,
|
|
"grad_norm": 7.534836292266846,
|
|
"learning_rate": 2.318840579710145e-07,
|
|
"logits/chosen": -0.49200475215911865,
|
|
"logits/rejected": -0.465828537940979,
|
|
"logps/chosen": -53.151023864746094,
|
|
"logps/ref_chosen": -53.26097106933594,
|
|
"logps/ref_rejected": -87.8851318359375,
|
|
"logps/rejected": -88.37931823730469,
|
|
"loss": 1.3803,
|
|
"margin_dpo/margin_mean": 0.6041243076324463,
|
|
"margin_dpo/margin_std": 0.696311354637146,
|
|
"step": 33
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.05680084228515625,
|
|
"KL/mean": -0.3211583197116852,
|
|
"KL/rejected_KL_mean": -0.6991157531738281,
|
|
"KL/std": 0.7343294620513916,
|
|
"epoch": 0.049926578560939794,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 0.7559173107147217,
|
|
"fcm_dpo/q_t": 0.49811026453971863,
|
|
"grad_norm": 8.2849702835083,
|
|
"learning_rate": 2.391304347826087e-07,
|
|
"logits/chosen": -0.4856771230697632,
|
|
"logits/rejected": -0.4683513939380646,
|
|
"logps/chosen": -50.760528564453125,
|
|
"logps/ref_chosen": -50.81732940673828,
|
|
"logps/ref_rejected": -101.92184448242188,
|
|
"logps/rejected": -102.62095642089844,
|
|
"loss": 1.3788,
|
|
"margin_dpo/margin_mean": 0.7559161186218262,
|
|
"margin_dpo/margin_std": 0.89031583070755,
|
|
"step": 34
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.1258563995361328,
|
|
"KL/mean": -0.46609964966773987,
|
|
"KL/rejected_KL_mean": -1.0580558776855469,
|
|
"KL/std": 0.997234582901001,
|
|
"epoch": 0.0513950073421439,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.1839112043380737,
|
|
"fcm_dpo/q_t": 0.49704039096832275,
|
|
"grad_norm": 9.21121597290039,
|
|
"learning_rate": 2.463768115942029e-07,
|
|
"logits/chosen": -0.5175144672393799,
|
|
"logits/rejected": -0.48064374923706055,
|
|
"logps/chosen": -50.89863586425781,
|
|
"logps/ref_chosen": -51.02449035644531,
|
|
"logps/ref_rejected": -106.82443237304688,
|
|
"logps/rejected": -107.88248443603516,
|
|
"loss": 1.3745,
|
|
"margin_dpo/margin_mean": 1.1839113235473633,
|
|
"margin_dpo/margin_std": 1.1529996395111084,
|
|
"step": 35
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.013143539428710938,
|
|
"KL/mean": -0.5748996138572693,
|
|
"KL/rejected_KL_mean": -1.1629409790039062,
|
|
"KL/std": 1.143606424331665,
|
|
"epoch": 0.05286343612334802,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.1760886907577515,
|
|
"fcm_dpo/q_t": 0.4970599412918091,
|
|
"grad_norm": 8.075494766235352,
|
|
"learning_rate": 2.536231884057971e-07,
|
|
"logits/chosen": -0.569900393486023,
|
|
"logits/rejected": -0.5340551733970642,
|
|
"logps/chosen": -51.97834777832031,
|
|
"logps/ref_chosen": -51.991493225097656,
|
|
"logps/ref_rejected": -86.0406265258789,
|
|
"logps/rejected": -87.20356750488281,
|
|
"loss": 1.3746,
|
|
"margin_dpo/margin_mean": 1.1760879755020142,
|
|
"margin_dpo/margin_std": 1.2999153137207031,
|
|
"step": 36
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.00957489013671875,
|
|
"KL/mean": -0.5088434219360352,
|
|
"KL/rejected_KL_mean": -1.0272636413574219,
|
|
"KL/std": 1.0523037910461426,
|
|
"epoch": 0.05433186490455213,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.0368335247039795,
|
|
"fcm_dpo/q_t": 0.4974081218242645,
|
|
"grad_norm": 6.84469747543335,
|
|
"learning_rate": 2.6086956521739126e-07,
|
|
"logits/chosen": -0.5325401425361633,
|
|
"logits/rejected": -0.49065572023391724,
|
|
"logps/chosen": -62.79753112792969,
|
|
"logps/ref_chosen": -62.807106018066406,
|
|
"logps/ref_rejected": -77.89507293701172,
|
|
"logps/rejected": -78.92233276367188,
|
|
"loss": 1.376,
|
|
"margin_dpo/margin_mean": 1.0368335247039795,
|
|
"margin_dpo/margin_std": 1.3373100757598877,
|
|
"step": 37
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.1680927276611328,
|
|
"KL/mean": -0.552198052406311,
|
|
"KL/rejected_KL_mean": -1.272491455078125,
|
|
"KL/std": 1.348757266998291,
|
|
"epoch": 0.055800293685756244,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.4405823945999146,
|
|
"fcm_dpo/q_t": 0.4963989853858948,
|
|
"grad_norm": 7.924060821533203,
|
|
"learning_rate": 2.681159420289855e-07,
|
|
"logits/chosen": -0.5047751665115356,
|
|
"logits/rejected": -0.47182124853134155,
|
|
"logps/chosen": -48.22242736816406,
|
|
"logps/ref_chosen": -48.39051818847656,
|
|
"logps/ref_rejected": -97.91244506835938,
|
|
"logps/rejected": -99.1849365234375,
|
|
"loss": 1.372,
|
|
"margin_dpo/margin_mean": 1.440582513809204,
|
|
"margin_dpo/margin_std": 1.5998311042785645,
|
|
"step": 38
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.09676551818847656,
|
|
"KL/mean": -0.7393452525138855,
|
|
"KL/rejected_KL_mean": -1.5754547119140625,
|
|
"KL/std": 1.302678108215332,
|
|
"epoch": 0.05726872246696035,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.6722157001495361,
|
|
"fcm_dpo/q_t": 0.4958198070526123,
|
|
"grad_norm": 8.258176803588867,
|
|
"learning_rate": 2.753623188405797e-07,
|
|
"logits/chosen": -0.5640593767166138,
|
|
"logits/rejected": -0.5244793891906738,
|
|
"logps/chosen": -50.653709411621094,
|
|
"logps/ref_chosen": -50.75047302246094,
|
|
"logps/ref_rejected": -78.56951141357422,
|
|
"logps/rejected": -80.14496612548828,
|
|
"loss": 1.3697,
|
|
"margin_dpo/margin_mean": 1.6722155809402466,
|
|
"margin_dpo/margin_std": 1.39006769657135,
|
|
"step": 39
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.2056598663330078,
|
|
"KL/mean": -0.5616458654403687,
|
|
"KL/rejected_KL_mean": -1.3289527893066406,
|
|
"KL/std": 1.4022493362426758,
|
|
"epoch": 0.05873715124816446,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.5346159934997559,
|
|
"fcm_dpo/q_t": 0.4961639642715454,
|
|
"grad_norm": 6.980234146118164,
|
|
"learning_rate": 2.8260869565217386e-07,
|
|
"logits/chosen": -0.5176148414611816,
|
|
"logits/rejected": -0.4874315857887268,
|
|
"logps/chosen": -57.7794075012207,
|
|
"logps/ref_chosen": -57.985069274902344,
|
|
"logps/ref_rejected": -74.3000717163086,
|
|
"logps/rejected": -75.6290283203125,
|
|
"loss": 1.3711,
|
|
"margin_dpo/margin_mean": 1.5346163511276245,
|
|
"margin_dpo/margin_std": 1.6990015506744385,
|
|
"step": 40
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.01158905029296875,
|
|
"KL/mean": -0.9059728980064392,
|
|
"KL/rejected_KL_mean": -1.8235282897949219,
|
|
"KL/std": 1.8433566093444824,
|
|
"epoch": 0.06020558002936858,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 1.8351185321807861,
|
|
"fcm_dpo/q_t": 0.49541300535202026,
|
|
"grad_norm": 8.068608283996582,
|
|
"learning_rate": 2.898550724637681e-07,
|
|
"logits/chosen": -0.5168710350990295,
|
|
"logits/rejected": -0.4790883958339691,
|
|
"logps/chosen": -62.684226989746094,
|
|
"logps/ref_chosen": -62.69581604003906,
|
|
"logps/ref_rejected": -97.02352905273438,
|
|
"logps/rejected": -98.84706115722656,
|
|
"loss": 1.3681,
|
|
"margin_dpo/margin_mean": 1.8351190090179443,
|
|
"margin_dpo/margin_std": 2.0257954597473145,
|
|
"step": 41
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.2347736358642578,
|
|
"KL/mean": -1.0914355516433716,
|
|
"KL/rejected_KL_mean": -2.4176406860351562,
|
|
"KL/std": 2.2188539505004883,
|
|
"epoch": 0.06167400881057269,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 2.6524176597595215,
|
|
"fcm_dpo/q_t": 0.49337083101272583,
|
|
"grad_norm": 9.999738693237305,
|
|
"learning_rate": 2.971014492753623e-07,
|
|
"logits/chosen": -0.5359020233154297,
|
|
"logits/rejected": -0.4893391728401184,
|
|
"logps/chosen": -58.731651306152344,
|
|
"logps/ref_chosen": -58.966426849365234,
|
|
"logps/ref_rejected": -109.90837097167969,
|
|
"logps/rejected": -112.32601928710938,
|
|
"loss": 1.3601,
|
|
"margin_dpo/margin_mean": 2.6524174213409424,
|
|
"margin_dpo/margin_std": 2.480203628540039,
|
|
"step": 42
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.5463447570800781,
|
|
"KL/mean": -0.7033693790435791,
|
|
"KL/rejected_KL_mean": -1.9530906677246094,
|
|
"KL/std": 1.851230263710022,
|
|
"epoch": 0.0631424375917768,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 2.4994335174560547,
|
|
"fcm_dpo/q_t": 0.4937525987625122,
|
|
"grad_norm": 8.732246398925781,
|
|
"learning_rate": 3.043478260869565e-07,
|
|
"logits/chosen": -0.5335399508476257,
|
|
"logits/rejected": -0.5083379745483398,
|
|
"logps/chosen": -53.609649658203125,
|
|
"logps/ref_chosen": -54.15599822998047,
|
|
"logps/ref_rejected": -96.48019409179688,
|
|
"logps/rejected": -98.43328094482422,
|
|
"loss": 1.3616,
|
|
"margin_dpo/margin_mean": 2.4994330406188965,
|
|
"margin_dpo/margin_std": 2.0141167640686035,
|
|
"step": 43
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.25655555725097656,
|
|
"KL/mean": -1.1926369667053223,
|
|
"KL/rejected_KL_mean": -2.641826629638672,
|
|
"KL/std": 2.3078997135162354,
|
|
"epoch": 0.06461086637298091,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 2.8983845710754395,
|
|
"fcm_dpo/q_t": 0.4927557110786438,
|
|
"grad_norm": 9.835100173950195,
|
|
"learning_rate": 3.115942028985507e-07,
|
|
"logits/chosen": -0.48626744747161865,
|
|
"logits/rejected": -0.465964674949646,
|
|
"logps/chosen": -49.82194519042969,
|
|
"logps/ref_chosen": -50.07849884033203,
|
|
"logps/ref_rejected": -108.78376007080078,
|
|
"logps/rejected": -111.42558288574219,
|
|
"loss": 1.3577,
|
|
"margin_dpo/margin_mean": 2.8983850479125977,
|
|
"margin_dpo/margin_std": 2.2746810913085938,
|
|
"step": 44
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.12450027465820312,
|
|
"KL/mean": -1.0119799375534058,
|
|
"KL/rejected_KL_mean": -2.1484642028808594,
|
|
"KL/std": 2.0461864471435547,
|
|
"epoch": 0.06607929515418502,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 2.2729620933532715,
|
|
"fcm_dpo/q_t": 0.49431926012039185,
|
|
"grad_norm": 7.852822303771973,
|
|
"learning_rate": 3.188405797101449e-07,
|
|
"logits/chosen": -0.457671582698822,
|
|
"logits/rejected": -0.4445871412754059,
|
|
"logps/chosen": -48.290428161621094,
|
|
"logps/ref_chosen": -48.4149284362793,
|
|
"logps/ref_rejected": -77.93643188476562,
|
|
"logps/rejected": -80.08489227294922,
|
|
"loss": 1.3639,
|
|
"margin_dpo/margin_mean": 2.2729620933532715,
|
|
"margin_dpo/margin_std": 2.53743839263916,
|
|
"step": 45
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.15254783630371094,
|
|
"KL/mean": -1.3116981983184814,
|
|
"KL/rejected_KL_mean": -2.7759437561035156,
|
|
"KL/std": 2.635380744934082,
|
|
"epoch": 0.06754772393538913,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 2.92849063873291,
|
|
"fcm_dpo/q_t": 0.4926820993423462,
|
|
"grad_norm": 9.287505149841309,
|
|
"learning_rate": 3.260869565217391e-07,
|
|
"logits/chosen": -0.5358284115791321,
|
|
"logits/rejected": -0.48519566655158997,
|
|
"logps/chosen": -55.84687805175781,
|
|
"logps/ref_chosen": -55.999427795410156,
|
|
"logps/ref_rejected": -95.652587890625,
|
|
"logps/rejected": -98.42852783203125,
|
|
"loss": 1.3575,
|
|
"margin_dpo/margin_mean": 2.92849063873291,
|
|
"margin_dpo/margin_std": 3.251277446746826,
|
|
"step": 46
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.4180793762207031,
|
|
"KL/mean": -1.0824042558670044,
|
|
"KL/rejected_KL_mean": -2.5828895568847656,
|
|
"KL/std": 2.6021361351013184,
|
|
"epoch": 0.06901615271659324,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 3.0009684562683105,
|
|
"fcm_dpo/q_t": 0.49249979853630066,
|
|
"grad_norm": 8.359269142150879,
|
|
"learning_rate": 3.333333333333333e-07,
|
|
"logits/chosen": -0.583840012550354,
|
|
"logits/rejected": -0.531823992729187,
|
|
"logps/chosen": -57.507999420166016,
|
|
"logps/ref_chosen": -57.92607879638672,
|
|
"logps/ref_rejected": -94.67920684814453,
|
|
"logps/rejected": -97.26210021972656,
|
|
"loss": 1.3567,
|
|
"margin_dpo/margin_mean": 3.000969409942627,
|
|
"margin_dpo/margin_std": 2.6266069412231445,
|
|
"step": 47
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.00414276123046875,
|
|
"KL/mean": -1.5863243341445923,
|
|
"KL/rejected_KL_mean": -3.176788330078125,
|
|
"KL/std": 2.768789768218994,
|
|
"epoch": 0.07048458149779736,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 3.1809349060058594,
|
|
"fcm_dpo/q_t": 0.4920506179332733,
|
|
"grad_norm": 9.244763374328613,
|
|
"learning_rate": 3.4057971014492755e-07,
|
|
"logits/chosen": -0.590816855430603,
|
|
"logits/rejected": -0.532641589641571,
|
|
"logps/chosen": -57.183929443359375,
|
|
"logps/ref_chosen": -57.188072204589844,
|
|
"logps/ref_rejected": -88.0166015625,
|
|
"logps/rejected": -91.19338989257812,
|
|
"loss": 1.355,
|
|
"margin_dpo/margin_mean": 3.1809351444244385,
|
|
"margin_dpo/margin_std": 3.039764881134033,
|
|
"step": 48
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.3029060363769531,
|
|
"KL/mean": -1.659820556640625,
|
|
"KL/rejected_KL_mean": -3.622547149658203,
|
|
"KL/std": 3.5545148849487305,
|
|
"epoch": 0.07195301027900147,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 3.9254517555236816,
|
|
"fcm_dpo/q_t": 0.49019408226013184,
|
|
"grad_norm": 8.854732513427734,
|
|
"learning_rate": 3.478260869565217e-07,
|
|
"logits/chosen": -0.5587940812110901,
|
|
"logits/rejected": -0.5012864470481873,
|
|
"logps/chosen": -61.382362365722656,
|
|
"logps/ref_chosen": -61.685272216796875,
|
|
"logps/ref_rejected": -83.76747131347656,
|
|
"logps/rejected": -87.39002227783203,
|
|
"loss": 1.3479,
|
|
"margin_dpo/margin_mean": 3.9254512786865234,
|
|
"margin_dpo/margin_std": 4.098909378051758,
|
|
"step": 49
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.2061939239501953,
|
|
"KL/mean": -2.284212827682495,
|
|
"KL/rejected_KL_mean": -4.3622283935546875,
|
|
"KL/std": 3.580059051513672,
|
|
"epoch": 0.07342143906020558,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 4.156033515930176,
|
|
"fcm_dpo/q_t": 0.4896165132522583,
|
|
"grad_norm": 8.716004371643066,
|
|
"learning_rate": 3.5507246376811595e-07,
|
|
"logits/chosen": -0.5688312649726868,
|
|
"logits/rejected": -0.5336655378341675,
|
|
"logps/chosen": -58.93033218383789,
|
|
"logps/ref_chosen": -58.72413635253906,
|
|
"logps/ref_rejected": -96.35814666748047,
|
|
"logps/rejected": -100.72037506103516,
|
|
"loss": 1.3456,
|
|
"margin_dpo/margin_mean": 4.156033515930176,
|
|
"margin_dpo/margin_std": 4.06223201751709,
|
|
"step": 50
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.30522727966308594,
|
|
"KL/mean": -2.3866991996765137,
|
|
"KL/rejected_KL_mean": -4.468173980712891,
|
|
"KL/std": 4.316704750061035,
|
|
"epoch": 0.07488986784140969,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 4.162949562072754,
|
|
"fcm_dpo/q_t": 0.4896053671836853,
|
|
"grad_norm": 8.142417907714844,
|
|
"learning_rate": 3.6231884057971015e-07,
|
|
"logits/chosen": -0.5444722175598145,
|
|
"logits/rejected": -0.5125424861907959,
|
|
"logps/chosen": -61.67889404296875,
|
|
"logps/ref_chosen": -61.3736686706543,
|
|
"logps/ref_rejected": -76.00199890136719,
|
|
"logps/rejected": -80.47017669677734,
|
|
"loss": 1.3458,
|
|
"margin_dpo/margin_mean": 4.162949562072754,
|
|
"margin_dpo/margin_std": 5.265970706939697,
|
|
"step": 51
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": 0.31786346435546875,
|
|
"KL/mean": -2.8440990447998047,
|
|
"KL/rejected_KL_mean": -6.006065368652344,
|
|
"KL/std": 4.961765289306641,
|
|
"epoch": 0.0763582966226138,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 6.323929309844971,
|
|
"fcm_dpo/q_t": 0.48420995473861694,
|
|
"grad_norm": 9.986041069030762,
|
|
"learning_rate": 3.695652173913043e-07,
|
|
"logits/chosen": -0.5617812275886536,
|
|
"logits/rejected": -0.5068017840385437,
|
|
"logps/chosen": -52.019493103027344,
|
|
"logps/ref_chosen": -52.33735656738281,
|
|
"logps/ref_rejected": -79.97391510009766,
|
|
"logps/rejected": -85.97998046875,
|
|
"loss": 1.3248,
|
|
"margin_dpo/margin_mean": 6.323929309844971,
|
|
"margin_dpo/margin_std": 5.412091255187988,
|
|
"step": 52
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.24696731567382812,
|
|
"KL/mean": -3.4364819526672363,
|
|
"KL/rejected_KL_mean": -6.6259918212890625,
|
|
"KL/std": 5.564968109130859,
|
|
"epoch": 0.07782672540381791,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 6.37903356552124,
|
|
"fcm_dpo/q_t": 0.4840887486934662,
|
|
"grad_norm": 10.334386825561523,
|
|
"learning_rate": 3.7681159420289855e-07,
|
|
"logits/chosen": -0.6039080619812012,
|
|
"logits/rejected": -0.5819511413574219,
|
|
"logps/chosen": -53.56161880493164,
|
|
"logps/ref_chosen": -53.31465148925781,
|
|
"logps/ref_rejected": -91.78359985351562,
|
|
"logps/rejected": -98.40959167480469,
|
|
"loss": 1.3248,
|
|
"margin_dpo/margin_mean": 6.379033088684082,
|
|
"margin_dpo/margin_std": 6.412992477416992,
|
|
"step": 53
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.4557170867919922,
|
|
"KL/mean": -3.2610464096069336,
|
|
"KL/rejected_KL_mean": -6.066375732421875,
|
|
"KL/std": 5.338939189910889,
|
|
"epoch": 0.07929515418502203,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 5.610658645629883,
|
|
"fcm_dpo/q_t": 0.4859907031059265,
|
|
"grad_norm": 8.843002319335938,
|
|
"learning_rate": 3.8405797101449274e-07,
|
|
"logits/chosen": -0.6185827255249023,
|
|
"logits/rejected": -0.566498339176178,
|
|
"logps/chosen": -51.144378662109375,
|
|
"logps/ref_chosen": -50.68865966796875,
|
|
"logps/ref_rejected": -91.71539306640625,
|
|
"logps/rejected": -97.78176879882812,
|
|
"loss": 1.3317,
|
|
"margin_dpo/margin_mean": 5.610658168792725,
|
|
"margin_dpo/margin_std": 5.301271438598633,
|
|
"step": 54
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -1.0718555450439453,
|
|
"KL/mean": -4.378843307495117,
|
|
"KL/rejected_KL_mean": -7.685829162597656,
|
|
"KL/std": 6.400544166564941,
|
|
"epoch": 0.08076358296622614,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 6.613970756530762,
|
|
"fcm_dpo/q_t": 0.4835028052330017,
|
|
"grad_norm": 9.306718826293945,
|
|
"learning_rate": 3.9130434782608694e-07,
|
|
"logits/chosen": -0.6270061731338501,
|
|
"logits/rejected": -0.5628513097763062,
|
|
"logps/chosen": -63.68708801269531,
|
|
"logps/ref_chosen": -62.615234375,
|
|
"logps/ref_rejected": -88.99349975585938,
|
|
"logps/rejected": -96.67933654785156,
|
|
"loss": 1.3228,
|
|
"margin_dpo/margin_mean": 6.6139702796936035,
|
|
"margin_dpo/margin_std": 7.726709365844727,
|
|
"step": 55
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.8420734405517578,
|
|
"KL/mean": -4.043349266052246,
|
|
"KL/rejected_KL_mean": -7.24462890625,
|
|
"KL/std": 6.373098850250244,
|
|
"epoch": 0.08223201174743025,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 6.402560234069824,
|
|
"fcm_dpo/q_t": 0.4840297996997833,
|
|
"grad_norm": 8.995559692382812,
|
|
"learning_rate": 3.9855072463768114e-07,
|
|
"logits/chosen": -0.6062008142471313,
|
|
"logits/rejected": -0.563714861869812,
|
|
"logps/chosen": -58.774803161621094,
|
|
"logps/ref_chosen": -57.9327278137207,
|
|
"logps/ref_rejected": -94.1744384765625,
|
|
"logps/rejected": -101.4190673828125,
|
|
"loss": 1.3248,
|
|
"margin_dpo/margin_mean": 6.402560234069824,
|
|
"margin_dpo/margin_std": 7.6934638023376465,
|
|
"step": 56
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -0.9112758636474609,
|
|
"KL/mean": -4.525204181671143,
|
|
"KL/rejected_KL_mean": -8.13912582397461,
|
|
"KL/std": 6.3168745040893555,
|
|
"epoch": 0.08370044052863436,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 7.227848052978516,
|
|
"fcm_dpo/q_t": 0.48196300864219666,
|
|
"grad_norm": 9.872321128845215,
|
|
"learning_rate": 4.057971014492754e-07,
|
|
"logits/chosen": -0.5837876200675964,
|
|
"logits/rejected": -0.5559124946594238,
|
|
"logps/chosen": -71.40656280517578,
|
|
"logps/ref_chosen": -70.49528503417969,
|
|
"logps/ref_rejected": -95.56546020507812,
|
|
"logps/rejected": -103.70458221435547,
|
|
"loss": 1.3166,
|
|
"margin_dpo/margin_mean": 7.227848052978516,
|
|
"margin_dpo/margin_std": 7.028669834136963,
|
|
"step": 57
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -1.2650394439697266,
|
|
"KL/mean": -5.307687759399414,
|
|
"KL/rejected_KL_mean": -9.350334167480469,
|
|
"KL/std": 7.259403228759766,
|
|
"epoch": 0.08516886930983847,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 8.08529281616211,
|
|
"fcm_dpo/q_t": 0.4798462390899658,
|
|
"grad_norm": 10.019336700439453,
|
|
"learning_rate": 4.1304347826086954e-07,
|
|
"logits/chosen": -0.5869804620742798,
|
|
"logits/rejected": -0.5100945830345154,
|
|
"logps/chosen": -63.397979736328125,
|
|
"logps/ref_chosen": -62.13294219970703,
|
|
"logps/ref_rejected": -84.61729431152344,
|
|
"logps/rejected": -93.9676284790039,
|
|
"loss": 1.309,
|
|
"margin_dpo/margin_mean": 8.08529281616211,
|
|
"margin_dpo/margin_std": 8.671724319458008,
|
|
"step": 58
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -1.8158931732177734,
|
|
"KL/mean": -6.279197692871094,
|
|
"KL/rejected_KL_mean": -10.742504119873047,
|
|
"KL/std": 8.032249450683594,
|
|
"epoch": 0.08663729809104258,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 8.926612854003906,
|
|
"fcm_dpo/q_t": 0.4777594804763794,
|
|
"grad_norm": 11.210515975952148,
|
|
"learning_rate": 4.2028985507246374e-07,
|
|
"logits/chosen": -0.6375648379325867,
|
|
"logits/rejected": -0.5986994504928589,
|
|
"logps/chosen": -53.74842071533203,
|
|
"logps/ref_chosen": -51.932525634765625,
|
|
"logps/ref_rejected": -88.88520050048828,
|
|
"logps/rejected": -99.62770080566406,
|
|
"loss": 1.3012,
|
|
"margin_dpo/margin_mean": 8.926612854003906,
|
|
"margin_dpo/margin_std": 9.161856651306152,
|
|
"step": 59
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -3.208467483520508,
|
|
"KL/mean": -6.759184837341309,
|
|
"KL/rejected_KL_mean": -10.309898376464844,
|
|
"KL/std": 7.358757972717285,
|
|
"epoch": 0.0881057268722467,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 7.1014251708984375,
|
|
"fcm_dpo/q_t": 0.4822811782360077,
|
|
"grad_norm": 9.682544708251953,
|
|
"learning_rate": 4.2753623188405794e-07,
|
|
"logits/chosen": -0.6127077341079712,
|
|
"logits/rejected": -0.5527953505516052,
|
|
"logps/chosen": -64.1506576538086,
|
|
"logps/ref_chosen": -60.94218826293945,
|
|
"logps/ref_rejected": -85.39340209960938,
|
|
"logps/rejected": -95.70329284667969,
|
|
"loss": 1.3183,
|
|
"margin_dpo/margin_mean": 7.101426124572754,
|
|
"margin_dpo/margin_std": 8.231400489807129,
|
|
"step": 60
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -1.9628067016601562,
|
|
"KL/mean": -6.415502548217773,
|
|
"KL/rejected_KL_mean": -10.86819839477539,
|
|
"KL/std": 9.966720581054688,
|
|
"epoch": 0.08957415565345081,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 8.9053955078125,
|
|
"fcm_dpo/q_t": 0.47791624069213867,
|
|
"grad_norm": 10.344194412231445,
|
|
"learning_rate": 4.3478260869565214e-07,
|
|
"logits/chosen": -0.624599814414978,
|
|
"logits/rejected": -0.5915525555610657,
|
|
"logps/chosen": -62.59632873535156,
|
|
"logps/ref_chosen": -60.633522033691406,
|
|
"logps/ref_rejected": -89.85249328613281,
|
|
"logps/rejected": -100.72069549560547,
|
|
"loss": 1.3032,
|
|
"margin_dpo/margin_mean": 8.9053955078125,
|
|
"margin_dpo/margin_std": 12.48222541809082,
|
|
"step": 61
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -2.1025753021240234,
|
|
"KL/mean": -5.457090854644775,
|
|
"KL/rejected_KL_mean": -8.811607360839844,
|
|
"KL/std": 7.392644882202148,
|
|
"epoch": 0.09104258443465492,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 6.709033012390137,
|
|
"fcm_dpo/q_t": 0.4832811653614044,
|
|
"grad_norm": 8.395082473754883,
|
|
"learning_rate": 4.420289855072464e-07,
|
|
"logits/chosen": -0.6115210056304932,
|
|
"logits/rejected": -0.5771872401237488,
|
|
"logps/chosen": -58.25334930419922,
|
|
"logps/ref_chosen": -56.15077209472656,
|
|
"logps/ref_rejected": -75.56619262695312,
|
|
"logps/rejected": -84.3778076171875,
|
|
"loss": 1.3225,
|
|
"margin_dpo/margin_mean": 6.709033012390137,
|
|
"margin_dpo/margin_std": 9.149477005004883,
|
|
"step": 62
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -3.5704593658447266,
|
|
"KL/mean": -8.119380950927734,
|
|
"KL/rejected_KL_mean": -12.66830062866211,
|
|
"KL/std": 9.557559967041016,
|
|
"epoch": 0.09251101321585903,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 9.097840309143066,
|
|
"fcm_dpo/q_t": 0.4773363471031189,
|
|
"grad_norm": 10.433484077453613,
|
|
"learning_rate": 4.4927536231884053e-07,
|
|
"logits/chosen": -0.6092942953109741,
|
|
"logits/rejected": -0.5663588047027588,
|
|
"logps/chosen": -76.71784973144531,
|
|
"logps/ref_chosen": -73.14739227294922,
|
|
"logps/ref_rejected": -97.61006164550781,
|
|
"logps/rejected": -110.27836608886719,
|
|
"loss": 1.3,
|
|
"margin_dpo/margin_mean": 9.09783935546875,
|
|
"margin_dpo/margin_std": 10.06234359741211,
|
|
"step": 63
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -1.6353092193603516,
|
|
"KL/mean": -7.144941329956055,
|
|
"KL/rejected_KL_mean": -12.65457534790039,
|
|
"KL/std": 10.44306755065918,
|
|
"epoch": 0.09397944199706314,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 11.019262313842773,
|
|
"fcm_dpo/q_t": 0.4726361632347107,
|
|
"grad_norm": 11.34101390838623,
|
|
"learning_rate": 4.5652173913043473e-07,
|
|
"logits/chosen": -0.5932717323303223,
|
|
"logits/rejected": -0.5615238547325134,
|
|
"logps/chosen": -55.6339111328125,
|
|
"logps/ref_chosen": -53.998600006103516,
|
|
"logps/ref_rejected": -93.53019714355469,
|
|
"logps/rejected": -106.18477630615234,
|
|
"loss": 1.2835,
|
|
"margin_dpo/margin_mean": 11.019262313842773,
|
|
"margin_dpo/margin_std": 12.875155448913574,
|
|
"step": 64
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -4.444938659667969,
|
|
"KL/mean": -10.106355667114258,
|
|
"KL/rejected_KL_mean": -15.767776489257812,
|
|
"KL/std": 11.982595443725586,
|
|
"epoch": 0.09544787077826726,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 11.32283878326416,
|
|
"fcm_dpo/q_t": 0.47184616327285767,
|
|
"grad_norm": 11.773619651794434,
|
|
"learning_rate": 4.63768115942029e-07,
|
|
"logits/chosen": -0.6665968298912048,
|
|
"logits/rejected": -0.6556574106216431,
|
|
"logps/chosen": -69.28094482421875,
|
|
"logps/ref_chosen": -64.83599853515625,
|
|
"logps/ref_rejected": -109.94645690917969,
|
|
"logps/rejected": -125.7142333984375,
|
|
"loss": 1.2808,
|
|
"margin_dpo/margin_mean": 11.322837829589844,
|
|
"margin_dpo/margin_std": 13.313655853271484,
|
|
"step": 65
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -4.254911422729492,
|
|
"KL/mean": -9.470987319946289,
|
|
"KL/rejected_KL_mean": -14.687057495117188,
|
|
"KL/std": 10.89914321899414,
|
|
"epoch": 0.09691629955947137,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 10.432148933410645,
|
|
"fcm_dpo/q_t": 0.4741200804710388,
|
|
"grad_norm": 10.362818717956543,
|
|
"learning_rate": 4.7101449275362313e-07,
|
|
"logits/chosen": -0.6421518325805664,
|
|
"logits/rejected": -0.6104958057403564,
|
|
"logps/chosen": -55.69843673706055,
|
|
"logps/ref_chosen": -51.44352722167969,
|
|
"logps/ref_rejected": -75.63629913330078,
|
|
"logps/rejected": -90.32335662841797,
|
|
"loss": 1.2892,
|
|
"margin_dpo/margin_mean": 10.432148933410645,
|
|
"margin_dpo/margin_std": 13.316844940185547,
|
|
"step": 66
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -4.397365570068359,
|
|
"KL/mean": -9.999573707580566,
|
|
"KL/rejected_KL_mean": -15.601787567138672,
|
|
"KL/std": 12.074445724487305,
|
|
"epoch": 0.09838472834067548,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 11.204422950744629,
|
|
"fcm_dpo/q_t": 0.4722447097301483,
|
|
"grad_norm": 10.62942886352539,
|
|
"learning_rate": 4.782608695652174e-07,
|
|
"logits/chosen": -0.5891748070716858,
|
|
"logits/rejected": -0.5434067249298096,
|
|
"logps/chosen": -63.7381706237793,
|
|
"logps/ref_chosen": -59.34080505371094,
|
|
"logps/ref_rejected": -72.78728485107422,
|
|
"logps/rejected": -88.38906860351562,
|
|
"loss": 1.2828,
|
|
"margin_dpo/margin_mean": 11.204421997070312,
|
|
"margin_dpo/margin_std": 14.561704635620117,
|
|
"step": 67
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -6.524868011474609,
|
|
"KL/mean": -11.753357887268066,
|
|
"KL/rejected_KL_mean": -16.981849670410156,
|
|
"KL/std": 11.021953582763672,
|
|
"epoch": 0.09985315712187959,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 10.456976890563965,
|
|
"fcm_dpo/q_t": 0.47402510046958923,
|
|
"grad_norm": 9.816877365112305,
|
|
"learning_rate": 4.855072463768116e-07,
|
|
"logits/chosen": -0.6517592668533325,
|
|
"logits/rejected": -0.586235761642456,
|
|
"logps/chosen": -71.73069763183594,
|
|
"logps/ref_chosen": -65.2058334350586,
|
|
"logps/ref_rejected": -77.20724487304688,
|
|
"logps/rejected": -94.18910217285156,
|
|
"loss": 1.2886,
|
|
"margin_dpo/margin_mean": 10.456975936889648,
|
|
"margin_dpo/margin_std": 12.803793907165527,
|
|
"step": 68
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -8.00680160522461,
|
|
"KL/mean": -15.686670303344727,
|
|
"KL/rejected_KL_mean": -23.366546630859375,
|
|
"KL/std": 15.096254348754883,
|
|
"epoch": 0.1013215859030837,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 15.35973834991455,
|
|
"fcm_dpo/q_t": 0.46209076046943665,
|
|
"grad_norm": 13.197538375854492,
|
|
"learning_rate": 4.927536231884058e-07,
|
|
"logits/chosen": -0.6669565439224243,
|
|
"logits/rejected": -0.6464905738830566,
|
|
"logps/chosen": -67.82604217529297,
|
|
"logps/ref_chosen": -59.81924057006836,
|
|
"logps/ref_rejected": -103.38886260986328,
|
|
"logps/rejected": -126.75540924072266,
|
|
"loss": 1.2463,
|
|
"margin_dpo/margin_mean": 15.359739303588867,
|
|
"margin_dpo/margin_std": 17.261123657226562,
|
|
"step": 69
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -10.887544631958008,
|
|
"KL/mean": -18.979265213012695,
|
|
"KL/rejected_KL_mean": -27.07099151611328,
|
|
"KL/std": 19.12816619873047,
|
|
"epoch": 0.1027900146842878,
|
|
"fcm_dpo/beta": 0.009999999776482582,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 16.18343734741211,
|
|
"fcm_dpo/q_t": 0.46015501022338867,
|
|
"grad_norm": 12.805341720581055,
|
|
"learning_rate": 5e-07,
|
|
"logits/chosen": -0.629044771194458,
|
|
"logits/rejected": -0.5975900888442993,
|
|
"logps/chosen": -72.81818389892578,
|
|
"logps/ref_chosen": -61.930641174316406,
|
|
"logps/ref_rejected": -91.06078338623047,
|
|
"logps/rejected": -118.13177490234375,
|
|
"loss": 1.2415,
|
|
"margin_dpo/margin_mean": 16.183441162109375,
|
|
"margin_dpo/margin_std": 20.474119186401367,
|
|
"step": 70
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -11.302818298339844,
|
|
"KL/mean": -21.22766876220703,
|
|
"KL/rejected_KL_mean": -31.152530670166016,
|
|
"KL/std": 20.86431121826172,
|
|
"epoch": 0.10425844346549193,
|
|
"fcm_dpo/beta": 0.010172600857913494,
|
|
"fcm_dpo/delta": 0.08556444197893143,
|
|
"fcm_dpo/margin": 19.849708557128906,
|
|
"fcm_dpo/q_t": 0.45123565196990967,
|
|
"grad_norm": 14.385030746459961,
|
|
"learning_rate": 4.999967061337492e-07,
|
|
"logits/chosen": -0.7001615762710571,
|
|
"logits/rejected": -0.6704069972038269,
|
|
"logps/chosen": -73.05315399169922,
|
|
"logps/ref_chosen": -61.750335693359375,
|
|
"logps/ref_rejected": -97.33662414550781,
|
|
"logps/rejected": -128.48915100097656,
|
|
"loss": 1.2088,
|
|
"margin_dpo/margin_mean": 19.849708557128906,
|
|
"margin_dpo/margin_std": 23.028926849365234,
|
|
"step": 71
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -13.121393203735352,
|
|
"KL/mean": -22.910423278808594,
|
|
"KL/rejected_KL_mean": -32.6994514465332,
|
|
"KL/std": 21.885107040405273,
|
|
"epoch": 0.10572687224669604,
|
|
"fcm_dpo/beta": 0.010172600857913494,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 19.57806396484375,
|
|
"fcm_dpo/q_t": 0.451375812292099,
|
|
"grad_norm": 13.355144500732422,
|
|
"learning_rate": 4.999868246217933e-07,
|
|
"logits/chosen": -0.6597040891647339,
|
|
"logits/rejected": -0.6241432428359985,
|
|
"logps/chosen": -79.1748046875,
|
|
"logps/ref_chosen": -66.05341339111328,
|
|
"logps/ref_rejected": -95.2869873046875,
|
|
"logps/rejected": -127.98643493652344,
|
|
"loss": 1.2156,
|
|
"margin_dpo/margin_mean": 19.57806396484375,
|
|
"margin_dpo/margin_std": 27.155168533325195,
|
|
"step": 72
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -15.171804428100586,
|
|
"KL/mean": -25.175312042236328,
|
|
"KL/rejected_KL_mean": -35.178829193115234,
|
|
"KL/std": 27.357677459716797,
|
|
"epoch": 0.10719530102790015,
|
|
"fcm_dpo/beta": 0.010345780290663242,
|
|
"fcm_dpo/delta": 0.08440417796373367,
|
|
"fcm_dpo/margin": 20.00701904296875,
|
|
"fcm_dpo/q_t": 0.4510188698768616,
|
|
"grad_norm": 14.032218933105469,
|
|
"learning_rate": 4.999703557245192e-07,
|
|
"logits/chosen": -0.6953517198562622,
|
|
"logits/rejected": -0.6581678986549377,
|
|
"logps/chosen": -81.4280776977539,
|
|
"logps/ref_chosen": -66.25627136230469,
|
|
"logps/ref_rejected": -90.45613098144531,
|
|
"logps/rejected": -125.63496398925781,
|
|
"loss": 1.2222,
|
|
"margin_dpo/margin_mean": 20.00701904296875,
|
|
"margin_dpo/margin_std": 35.20468521118164,
|
|
"step": 73
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -15.799406051635742,
|
|
"KL/mean": -27.360076904296875,
|
|
"KL/rejected_KL_mean": -38.92074203491211,
|
|
"KL/std": 30.7703857421875,
|
|
"epoch": 0.10866372980910426,
|
|
"fcm_dpo/beta": 0.010593706741929054,
|
|
"fcm_dpo/delta": 0.15896809101104736,
|
|
"fcm_dpo/margin": 23.121337890625,
|
|
"fcm_dpo/q_t": 0.4444906413555145,
|
|
"grad_norm": 15.119664192199707,
|
|
"learning_rate": 4.999472998758977e-07,
|
|
"logits/chosen": -0.6417176723480225,
|
|
"logits/rejected": -0.6399871706962585,
|
|
"logps/chosen": -69.22428894042969,
|
|
"logps/ref_chosen": -53.42488098144531,
|
|
"logps/ref_rejected": -95.94693756103516,
|
|
"logps/rejected": -134.86767578125,
|
|
"loss": 1.1959,
|
|
"margin_dpo/margin_mean": 23.121337890625,
|
|
"margin_dpo/margin_std": 39.1754150390625,
|
|
"step": 74
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -15.983125686645508,
|
|
"KL/mean": -32.20029067993164,
|
|
"KL/rejected_KL_mean": -48.417449951171875,
|
|
"KL/std": 31.59003448486328,
|
|
"epoch": 0.11013215859030837,
|
|
"fcm_dpo/beta": 0.010802132077515125,
|
|
"fcm_dpo/delta": 0.050960563123226166,
|
|
"fcm_dpo/margin": 32.434326171875,
|
|
"fcm_dpo/q_t": 0.41791272163391113,
|
|
"grad_norm": 17.095163345336914,
|
|
"learning_rate": 4.999176576834721e-07,
|
|
"logits/chosen": -0.6758487224578857,
|
|
"logits/rejected": -0.6743229627609253,
|
|
"logps/chosen": -67.84478759765625,
|
|
"logps/ref_chosen": -51.861663818359375,
|
|
"logps/ref_rejected": -111.25398254394531,
|
|
"logps/rejected": -159.6714324951172,
|
|
"loss": 1.1055,
|
|
"margin_dpo/margin_mean": 32.434326171875,
|
|
"margin_dpo/margin_std": 37.082366943359375,
|
|
"step": 75
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -18.9597225189209,
|
|
"KL/mean": -28.698415756225586,
|
|
"KL/rejected_KL_mean": -38.43710708618164,
|
|
"KL/std": 25.665380477905273,
|
|
"epoch": 0.11160058737151249,
|
|
"fcm_dpo/beta": 0.010884184390306473,
|
|
"fcm_dpo/delta": 0.08708269149065018,
|
|
"fcm_dpo/margin": 19.47739028930664,
|
|
"fcm_dpo/q_t": 0.4486614465713501,
|
|
"grad_norm": 15.436707496643066,
|
|
"learning_rate": 4.998814299283415e-07,
|
|
"logits/chosen": -0.6917558908462524,
|
|
"logits/rejected": -0.6552442312240601,
|
|
"logps/chosen": -72.22576141357422,
|
|
"logps/ref_chosen": -53.26603698730469,
|
|
"logps/ref_rejected": -78.21662902832031,
|
|
"logps/rejected": -116.65373229980469,
|
|
"loss": 1.2096,
|
|
"margin_dpo/margin_mean": 19.47739028930664,
|
|
"margin_dpo/margin_std": 28.66197395324707,
|
|
"step": 76
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -17.53479766845703,
|
|
"KL/mean": -34.09250259399414,
|
|
"KL/rejected_KL_mean": -50.65019989013672,
|
|
"KL/std": 34.816802978515625,
|
|
"epoch": 0.1130690161527166,
|
|
"fcm_dpo/beta": 0.011013032868504524,
|
|
"fcm_dpo/delta": 0.036607466638088226,
|
|
"fcm_dpo/margin": 33.11540603637695,
|
|
"fcm_dpo/q_t": 0.4138905107975006,
|
|
"grad_norm": 17.87863540649414,
|
|
"learning_rate": 4.998386175651409e-07,
|
|
"logits/chosen": -0.6697767376899719,
|
|
"logits/rejected": -0.637236475944519,
|
|
"logps/chosen": -75.63147735595703,
|
|
"logps/ref_chosen": -58.0966796875,
|
|
"logps/ref_rejected": -93.77361297607422,
|
|
"logps/rejected": -144.42381286621094,
|
|
"loss": 1.1015,
|
|
"margin_dpo/margin_mean": 33.11540603637695,
|
|
"margin_dpo/margin_std": 40.183067321777344,
|
|
"step": 77
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -18.032699584960938,
|
|
"KL/mean": -29.887676239013672,
|
|
"KL/rejected_KL_mean": -41.74265670776367,
|
|
"KL/std": 29.635108947753906,
|
|
"epoch": 0.1145374449339207,
|
|
"fcm_dpo/beta": 0.011299570091068745,
|
|
"fcm_dpo/delta": 0.1355305016040802,
|
|
"fcm_dpo/margin": 23.709951400756836,
|
|
"fcm_dpo/q_t": 0.43751174211502075,
|
|
"grad_norm": 15.848010063171387,
|
|
"learning_rate": 4.997892217220159e-07,
|
|
"logits/chosen": -0.6312749981880188,
|
|
"logits/rejected": -0.6117902994155884,
|
|
"logps/chosen": -73.646484375,
|
|
"logps/ref_chosen": -55.61378479003906,
|
|
"logps/ref_rejected": -84.93436431884766,
|
|
"logps/rejected": -126.67701721191406,
|
|
"loss": 1.1749,
|
|
"margin_dpo/margin_mean": 23.709949493408203,
|
|
"margin_dpo/margin_std": 35.13151168823242,
|
|
"step": 78
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -19.16200828552246,
|
|
"KL/mean": -31.912139892578125,
|
|
"KL/rejected_KL_mean": -44.66227340698242,
|
|
"KL/std": 31.9647274017334,
|
|
"epoch": 0.11600587371512482,
|
|
"fcm_dpo/beta": 0.01154954545199871,
|
|
"fcm_dpo/delta": 0.10868433862924576,
|
|
"fcm_dpo/margin": 25.500267028808594,
|
|
"fcm_dpo/q_t": 0.4317125082015991,
|
|
"grad_norm": 15.949761390686035,
|
|
"learning_rate": 4.997332437005931e-07,
|
|
"logits/chosen": -0.6368188858032227,
|
|
"logits/rejected": -0.6110581755638123,
|
|
"logps/chosen": -74.61249542236328,
|
|
"logps/ref_chosen": -55.45048522949219,
|
|
"logps/ref_rejected": -87.64756774902344,
|
|
"logps/rejected": -132.30984497070312,
|
|
"loss": 1.1611,
|
|
"margin_dpo/margin_mean": 25.500267028808594,
|
|
"margin_dpo/margin_std": 38.79698181152344,
|
|
"step": 79
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -23.745559692382812,
|
|
"KL/mean": -37.06023406982422,
|
|
"KL/rejected_KL_mean": -50.374908447265625,
|
|
"KL/std": 37.243316650390625,
|
|
"epoch": 0.11747430249632893,
|
|
"fcm_dpo/beta": 0.011725610122084618,
|
|
"fcm_dpo/delta": 0.09062545001506805,
|
|
"fcm_dpo/margin": 26.62934112548828,
|
|
"fcm_dpo/q_t": 0.43057841062545776,
|
|
"grad_norm": 17.33467674255371,
|
|
"learning_rate": 4.996706849759452e-07,
|
|
"logits/chosen": -0.7032785415649414,
|
|
"logits/rejected": -0.6706737279891968,
|
|
"logps/chosen": -82.26484680175781,
|
|
"logps/ref_chosen": -58.519290924072266,
|
|
"logps/ref_rejected": -87.54750061035156,
|
|
"logps/rejected": -137.9224090576172,
|
|
"loss": 1.163,
|
|
"margin_dpo/margin_mean": 26.62934112548828,
|
|
"margin_dpo/margin_std": 43.98291778564453,
|
|
"step": 80
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -23.4375,
|
|
"KL/mean": -41.379188537597656,
|
|
"KL/rejected_KL_mean": -59.32087707519531,
|
|
"KL/std": 43.66413497924805,
|
|
"epoch": 0.11894273127753303,
|
|
"fcm_dpo/beta": 0.011759042739868164,
|
|
"fcm_dpo/delta": -0.023072410374879837,
|
|
"fcm_dpo/margin": 35.88337326049805,
|
|
"fcm_dpo/q_t": 0.4062455892562866,
|
|
"grad_norm": 18.646865844726562,
|
|
"learning_rate": 4.996015471965529e-07,
|
|
"logits/chosen": -0.6688940525054932,
|
|
"logits/rejected": -0.6473867893218994,
|
|
"logps/chosen": -89.88636779785156,
|
|
"logps/ref_chosen": -66.44886779785156,
|
|
"logps/ref_rejected": -129.66270446777344,
|
|
"logps/rejected": -188.98358154296875,
|
|
"loss": 1.0918,
|
|
"margin_dpo/margin_mean": 35.88337326049805,
|
|
"margin_dpo/margin_std": 52.36148452758789,
|
|
"step": 81
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -26.48457145690918,
|
|
"KL/mean": -40.68313217163086,
|
|
"KL/rejected_KL_mean": -54.88169479370117,
|
|
"KL/std": 39.783546447753906,
|
|
"epoch": 0.12041116005873716,
|
|
"fcm_dpo/beta": 0.011723190546035767,
|
|
"fcm_dpo/delta": -0.040955908596515656,
|
|
"fcm_dpo/margin": 28.397132873535156,
|
|
"fcm_dpo/q_t": 0.42619970440864563,
|
|
"grad_norm": 18.904956817626953,
|
|
"learning_rate": 4.995258321842611e-07,
|
|
"logits/chosen": -0.6247996091842651,
|
|
"logits/rejected": -0.6217905282974243,
|
|
"logps/chosen": -78.71695709228516,
|
|
"logps/ref_chosen": -52.232383728027344,
|
|
"logps/ref_rejected": -90.74325561523438,
|
|
"logps/rejected": -145.62493896484375,
|
|
"loss": 1.1808,
|
|
"margin_dpo/margin_mean": 28.397132873535156,
|
|
"margin_dpo/margin_std": 55.747474670410156,
|
|
"step": 82
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -27.066598892211914,
|
|
"KL/mean": -45.09041213989258,
|
|
"KL/rejected_KL_mean": -63.11423110961914,
|
|
"KL/std": 41.50777816772461,
|
|
"epoch": 0.12187958883994127,
|
|
"fcm_dpo/beta": 0.011612952686846256,
|
|
"fcm_dpo/delta": -0.019600681960582733,
|
|
"fcm_dpo/margin": 36.04762649536133,
|
|
"fcm_dpo/q_t": 0.4054431617259979,
|
|
"grad_norm": 19.01769256591797,
|
|
"learning_rate": 4.994435419342304e-07,
|
|
"logits/chosen": -0.6414648294448853,
|
|
"logits/rejected": -0.6217591762542725,
|
|
"logps/chosen": -82.89398193359375,
|
|
"logps/ref_chosen": -55.82738494873047,
|
|
"logps/ref_rejected": -103.71589660644531,
|
|
"logps/rejected": -166.83013916015625,
|
|
"loss": 1.089,
|
|
"margin_dpo/margin_mean": 36.04762268066406,
|
|
"margin_dpo/margin_std": 50.104408264160156,
|
|
"step": 83
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -28.04334831237793,
|
|
"KL/mean": -41.49994659423828,
|
|
"KL/rejected_KL_mean": -54.956539154052734,
|
|
"KL/std": 35.679141998291016,
|
|
"epoch": 0.12334801762114538,
|
|
"fcm_dpo/beta": 0.011577482335269451,
|
|
"fcm_dpo/delta": -0.022414665669202805,
|
|
"fcm_dpo/margin": 26.913192749023438,
|
|
"fcm_dpo/q_t": 0.4257325530052185,
|
|
"grad_norm": 18.095876693725586,
|
|
"learning_rate": 4.993546786148857e-07,
|
|
"logits/chosen": -0.6499658823013306,
|
|
"logits/rejected": -0.6158007383346558,
|
|
"logps/chosen": -95.21951293945312,
|
|
"logps/ref_chosen": -67.1761703491211,
|
|
"logps/ref_rejected": -87.29859924316406,
|
|
"logps/rejected": -142.25514221191406,
|
|
"loss": 1.1425,
|
|
"margin_dpo/margin_mean": 26.913192749023438,
|
|
"margin_dpo/margin_std": 35.351402282714844,
|
|
"step": 84
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -27.43400764465332,
|
|
"KL/mean": -41.591861724853516,
|
|
"KL/rejected_KL_mean": -55.74971389770508,
|
|
"KL/std": 35.18168258666992,
|
|
"epoch": 0.12481644640234948,
|
|
"fcm_dpo/beta": 0.011758394539356232,
|
|
"fcm_dpo/delta": 0.06842872500419617,
|
|
"fcm_dpo/margin": 28.315706253051758,
|
|
"fcm_dpo/q_t": 0.42318564653396606,
|
|
"grad_norm": 18.253501892089844,
|
|
"learning_rate": 4.992592445678582e-07,
|
|
"logits/chosen": -0.6255546808242798,
|
|
"logits/rejected": -0.5947661995887756,
|
|
"logps/chosen": -85.84062194824219,
|
|
"logps/ref_chosen": -58.4066162109375,
|
|
"logps/ref_rejected": -78.63880157470703,
|
|
"logps/rejected": -134.38851928710938,
|
|
"loss": 1.1441,
|
|
"margin_dpo/margin_mean": 28.31570816040039,
|
|
"margin_dpo/margin_std": 43.07608413696289,
|
|
"step": 85
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -36.229835510253906,
|
|
"KL/mean": -51.37822723388672,
|
|
"KL/rejected_KL_mean": -66.526611328125,
|
|
"KL/std": 52.038185119628906,
|
|
"epoch": 0.1262848751835536,
|
|
"fcm_dpo/beta": 0.011856161057949066,
|
|
"fcm_dpo/delta": 0.041851602494716644,
|
|
"fcm_dpo/margin": 30.296781539916992,
|
|
"fcm_dpo/q_t": 0.42889153957366943,
|
|
"grad_norm": 25.211519241333008,
|
|
"learning_rate": 4.991572423079235e-07,
|
|
"logits/chosen": -0.645729124546051,
|
|
"logits/rejected": -0.6413381099700928,
|
|
"logps/chosen": -92.36730194091797,
|
|
"logps/ref_chosen": -56.13746643066406,
|
|
"logps/ref_rejected": -88.12165069580078,
|
|
"logps/rejected": -154.64825439453125,
|
|
"loss": 1.2181,
|
|
"margin_dpo/margin_mean": 30.29677963256836,
|
|
"margin_dpo/margin_std": 73.2314453125,
|
|
"step": 86
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -35.271644592285156,
|
|
"KL/mean": -54.79254913330078,
|
|
"KL/rejected_KL_mean": -74.31346130371094,
|
|
"KL/std": 52.29801940917969,
|
|
"epoch": 0.1277533039647577,
|
|
"fcm_dpo/beta": 0.011678045615553856,
|
|
"fcm_dpo/delta": -0.059305619448423386,
|
|
"fcm_dpo/margin": 39.041812896728516,
|
|
"fcm_dpo/q_t": 0.39871087670326233,
|
|
"grad_norm": 21.151241302490234,
|
|
"learning_rate": 4.990486745229364e-07,
|
|
"logits/chosen": -0.6645753383636475,
|
|
"logits/rejected": -0.6533565521240234,
|
|
"logps/chosen": -90.9077377319336,
|
|
"logps/ref_chosen": -55.63609313964844,
|
|
"logps/ref_rejected": -95.46757507324219,
|
|
"logps/rejected": -169.78103637695312,
|
|
"loss": 1.1169,
|
|
"margin_dpo/margin_mean": 39.041812896728516,
|
|
"margin_dpo/margin_std": 65.63897705078125,
|
|
"step": 87
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -44.63642883300781,
|
|
"KL/mean": -59.331417083740234,
|
|
"KL/rejected_KL_mean": -74.02639770507812,
|
|
"KL/std": 56.308712005615234,
|
|
"epoch": 0.12922173274596183,
|
|
"fcm_dpo/beta": 0.011739738285541534,
|
|
"fcm_dpo/delta": 0.05674154311418533,
|
|
"fcm_dpo/margin": 29.389965057373047,
|
|
"fcm_dpo/q_t": 0.42796188592910767,
|
|
"grad_norm": 22.641603469848633,
|
|
"learning_rate": 4.989335440737586e-07,
|
|
"logits/chosen": -0.6030087471008301,
|
|
"logits/rejected": -0.6048742532730103,
|
|
"logps/chosen": -118.30757904052734,
|
|
"logps/ref_chosen": -73.67115020751953,
|
|
"logps/ref_rejected": -106.70849609375,
|
|
"logps/rejected": -180.73489379882812,
|
|
"loss": 1.2051,
|
|
"margin_dpo/margin_mean": 29.389965057373047,
|
|
"margin_dpo/margin_std": 62.56239318847656,
|
|
"step": 88
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -29.5759334564209,
|
|
"KL/mean": -45.80217742919922,
|
|
"KL/rejected_KL_mean": -62.02842712402344,
|
|
"KL/std": 47.946502685546875,
|
|
"epoch": 0.13069016152716592,
|
|
"fcm_dpo/beta": 0.011899597942829132,
|
|
"fcm_dpo/delta": 0.014035461470484734,
|
|
"fcm_dpo/margin": 32.45248794555664,
|
|
"fcm_dpo/q_t": 0.4130520224571228,
|
|
"grad_norm": 20.236740112304688,
|
|
"learning_rate": 4.988118539941847e-07,
|
|
"logits/chosen": -0.6709840297698975,
|
|
"logits/rejected": -0.6467102766036987,
|
|
"logps/chosen": -90.20085144042969,
|
|
"logps/ref_chosen": -60.624916076660156,
|
|
"logps/ref_rejected": -82.08354949951172,
|
|
"logps/rejected": -144.11196899414062,
|
|
"loss": 1.1371,
|
|
"margin_dpo/margin_mean": 32.452484130859375,
|
|
"margin_dpo/margin_std": 56.894569396972656,
|
|
"step": 89
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -33.55852508544922,
|
|
"KL/mean": -58.233924865722656,
|
|
"KL/rejected_KL_mean": -82.90933227539062,
|
|
"KL/std": 63.781768798828125,
|
|
"epoch": 0.13215859030837004,
|
|
"fcm_dpo/beta": 0.0116573516279459,
|
|
"fcm_dpo/delta": -0.18619467318058014,
|
|
"fcm_dpo/margin": 49.350799560546875,
|
|
"fcm_dpo/q_t": 0.3860289454460144,
|
|
"grad_norm": 20.391284942626953,
|
|
"learning_rate": 4.986836074908615e-07,
|
|
"logits/chosen": -0.5895090103149414,
|
|
"logits/rejected": -0.614214301109314,
|
|
"logps/chosen": -86.84383392333984,
|
|
"logps/ref_chosen": -53.285308837890625,
|
|
"logps/ref_rejected": -111.54470825195312,
|
|
"logps/rejected": -194.45404052734375,
|
|
"loss": 1.0805,
|
|
"margin_dpo/margin_mean": 49.350799560546875,
|
|
"margin_dpo/margin_std": 80.73963928222656,
|
|
"step": 90
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -36.51939392089844,
|
|
"KL/mean": -53.32404327392578,
|
|
"KL/rejected_KL_mean": -70.12869262695312,
|
|
"KL/std": 47.886573791503906,
|
|
"epoch": 0.13362701908957417,
|
|
"fcm_dpo/beta": 0.011482559144496918,
|
|
"fcm_dpo/delta": 0.014481155201792717,
|
|
"fcm_dpo/margin": 33.60929870605469,
|
|
"fcm_dpo/q_t": 0.41819268465042114,
|
|
"grad_norm": 21.17986488342285,
|
|
"learning_rate": 4.985488079432037e-07,
|
|
"logits/chosen": -0.6286755800247192,
|
|
"logits/rejected": -0.6033735275268555,
|
|
"logps/chosen": -98.32235717773438,
|
|
"logps/ref_chosen": -61.802955627441406,
|
|
"logps/ref_rejected": -87.87395477294922,
|
|
"logps/rejected": -158.00265502929688,
|
|
"loss": 1.1619,
|
|
"margin_dpo/margin_mean": 33.60929870605469,
|
|
"margin_dpo/margin_std": 65.38414764404297,
|
|
"step": 91
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -30.792192459106445,
|
|
"KL/mean": -48.84156036376953,
|
|
"KL/rejected_KL_mean": -66.89093017578125,
|
|
"KL/std": 49.2903938293457,
|
|
"epoch": 0.13509544787077826,
|
|
"fcm_dpo/beta": 0.011489994823932648,
|
|
"fcm_dpo/delta": -0.015798617154359818,
|
|
"fcm_dpo/margin": 36.09873580932617,
|
|
"fcm_dpo/q_t": 0.4099717140197754,
|
|
"grad_norm": 19.466535568237305,
|
|
"learning_rate": 4.984074589033043e-07,
|
|
"logits/chosen": -0.6480433344841003,
|
|
"logits/rejected": -0.6293787360191345,
|
|
"logps/chosen": -82.4329605102539,
|
|
"logps/ref_chosen": -51.640769958496094,
|
|
"logps/ref_rejected": -77.88117980957031,
|
|
"logps/rejected": -144.77210998535156,
|
|
"loss": 1.1196,
|
|
"margin_dpo/margin_mean": 36.09873580932617,
|
|
"margin_dpo/margin_std": 59.55793762207031,
|
|
"step": 92
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -34.3369140625,
|
|
"KL/mean": -51.498043060302734,
|
|
"KL/rejected_KL_mean": -68.65916442871094,
|
|
"KL/std": 43.49789810180664,
|
|
"epoch": 0.13656387665198239,
|
|
"fcm_dpo/beta": 0.01147179864346981,
|
|
"fcm_dpo/delta": 0.006311129778623581,
|
|
"fcm_dpo/margin": 34.32225799560547,
|
|
"fcm_dpo/q_t": 0.41234347224235535,
|
|
"grad_norm": 21.00588607788086,
|
|
"learning_rate": 4.982595640958425e-07,
|
|
"logits/chosen": -0.668390154838562,
|
|
"logits/rejected": -0.632922887802124,
|
|
"logps/chosen": -86.86614990234375,
|
|
"logps/ref_chosen": -52.529239654541016,
|
|
"logps/ref_rejected": -77.16075134277344,
|
|
"logps/rejected": -145.81991577148438,
|
|
"loss": 1.1109,
|
|
"margin_dpo/margin_mean": 34.32225799560547,
|
|
"margin_dpo/margin_std": 52.0517578125,
|
|
"step": 93
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -36.46757507324219,
|
|
"KL/mean": -57.51295471191406,
|
|
"KL/rejected_KL_mean": -78.55833435058594,
|
|
"KL/std": 52.84346008300781,
|
|
"epoch": 0.13803230543318648,
|
|
"fcm_dpo/beta": 0.011251532472670078,
|
|
"fcm_dpo/delta": -0.07815787196159363,
|
|
"fcm_dpo/margin": 42.09075164794922,
|
|
"fcm_dpo/q_t": 0.3950212597846985,
|
|
"grad_norm": 19.056589126586914,
|
|
"learning_rate": 4.98105127417984e-07,
|
|
"logits/chosen": -0.6087779998779297,
|
|
"logits/rejected": -0.6042633056640625,
|
|
"logps/chosen": -97.690185546875,
|
|
"logps/ref_chosen": -61.22261047363281,
|
|
"logps/ref_rejected": -99.59902954101562,
|
|
"logps/rejected": -178.15737915039062,
|
|
"loss": 1.0606,
|
|
"margin_dpo/margin_mean": 42.09075164794922,
|
|
"margin_dpo/margin_std": 56.86457061767578,
|
|
"step": 94
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -35.560211181640625,
|
|
"KL/mean": -52.13538360595703,
|
|
"KL/rejected_KL_mean": -68.71056365966797,
|
|
"KL/std": 53.81283187866211,
|
|
"epoch": 0.1395007342143906,
|
|
"fcm_dpo/beta": 0.011258168146014214,
|
|
"fcm_dpo/delta": 0.02752673253417015,
|
|
"fcm_dpo/margin": 33.15034484863281,
|
|
"fcm_dpo/q_t": 0.41526269912719727,
|
|
"grad_norm": 19.388639450073242,
|
|
"learning_rate": 4.979441529392784e-07,
|
|
"logits/chosen": -0.6361397504806519,
|
|
"logits/rejected": -0.6155867576599121,
|
|
"logps/chosen": -88.08385467529297,
|
|
"logps/ref_chosen": -52.523643493652344,
|
|
"logps/ref_rejected": -75.8803482055664,
|
|
"logps/rejected": -144.59091186523438,
|
|
"loss": 1.1443,
|
|
"margin_dpo/margin_mean": 33.15034484863281,
|
|
"margin_dpo/margin_std": 56.809608459472656,
|
|
"step": 95
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -35.88848876953125,
|
|
"KL/mean": -58.62786865234375,
|
|
"KL/rejected_KL_mean": -81.36726379394531,
|
|
"KL/std": 55.729679107666016,
|
|
"epoch": 0.14096916299559473,
|
|
"fcm_dpo/beta": 0.011056499555706978,
|
|
"fcm_dpo/delta": -0.11031479388475418,
|
|
"fcm_dpo/margin": 45.47876739501953,
|
|
"fcm_dpo/q_t": 0.3921157121658325,
|
|
"grad_norm": 19.741348266601562,
|
|
"learning_rate": 4.977766449015534e-07,
|
|
"logits/chosen": -0.629560112953186,
|
|
"logits/rejected": -0.6098443269729614,
|
|
"logps/chosen": -98.04545593261719,
|
|
"logps/ref_chosen": -62.15697479248047,
|
|
"logps/ref_rejected": -96.59601593017578,
|
|
"logps/rejected": -177.96328735351562,
|
|
"loss": 1.0532,
|
|
"margin_dpo/margin_mean": 45.4787712097168,
|
|
"margin_dpo/margin_std": 63.79114532470703,
|
|
"step": 96
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -36.149559020996094,
|
|
"KL/mean": -53.12095642089844,
|
|
"KL/rejected_KL_mean": -70.09235382080078,
|
|
"KL/std": 48.205299377441406,
|
|
"epoch": 0.14243759177679882,
|
|
"fcm_dpo/beta": 0.01119668036699295,
|
|
"fcm_dpo/delta": 0.019375190138816833,
|
|
"fcm_dpo/margin": 33.94279861450195,
|
|
"fcm_dpo/q_t": 0.41301482915878296,
|
|
"grad_norm": 20.09777069091797,
|
|
"learning_rate": 4.976026077188012e-07,
|
|
"logits/chosen": -0.5712728500366211,
|
|
"logits/rejected": -0.539288341999054,
|
|
"logps/chosen": -90.79592895507812,
|
|
"logps/ref_chosen": -54.646366119384766,
|
|
"logps/ref_rejected": -76.96475219726562,
|
|
"logps/rejected": -147.05709838867188,
|
|
"loss": 1.116,
|
|
"margin_dpo/margin_mean": 33.94279479980469,
|
|
"margin_dpo/margin_std": 48.3692626953125,
|
|
"step": 97
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -46.89853286743164,
|
|
"KL/mean": -66.17620849609375,
|
|
"KL/rejected_KL_mean": -85.4538803100586,
|
|
"KL/std": 56.045082092285156,
|
|
"epoch": 0.14390602055800295,
|
|
"fcm_dpo/beta": 0.01105651818215847,
|
|
"fcm_dpo/delta": -0.02756763994693756,
|
|
"fcm_dpo/margin": 38.55535125732422,
|
|
"fcm_dpo/q_t": 0.4013606309890747,
|
|
"grad_norm": 22.645526885986328,
|
|
"learning_rate": 4.974220459770639e-07,
|
|
"logits/chosen": -0.6146658658981323,
|
|
"logits/rejected": -0.6087555885314941,
|
|
"logps/chosen": -112.15716552734375,
|
|
"logps/ref_chosen": -65.25862884521484,
|
|
"logps/ref_rejected": -96.5274887084961,
|
|
"logps/rejected": -181.9813690185547,
|
|
"loss": 1.1381,
|
|
"margin_dpo/margin_mean": 38.55535125732422,
|
|
"margin_dpo/margin_std": 68.4720687866211,
|
|
"step": 98
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -34.3526611328125,
|
|
"KL/mean": -59.61415481567383,
|
|
"KL/rejected_KL_mean": -84.87564086914062,
|
|
"KL/std": 57.15583038330078,
|
|
"epoch": 0.14537444933920704,
|
|
"fcm_dpo/beta": 0.010768149048089981,
|
|
"fcm_dpo/delta": -0.1532631367444992,
|
|
"fcm_dpo/margin": 50.522987365722656,
|
|
"fcm_dpo/q_t": 0.38575196266174316,
|
|
"grad_norm": 18.111738204956055,
|
|
"learning_rate": 4.972349644343108e-07,
|
|
"logits/chosen": -0.5844358205795288,
|
|
"logits/rejected": -0.5914009213447571,
|
|
"logps/chosen": -79.99114990234375,
|
|
"logps/ref_chosen": -45.638484954833984,
|
|
"logps/ref_rejected": -86.43793487548828,
|
|
"logps/rejected": -171.31356811523438,
|
|
"loss": 1.0399,
|
|
"margin_dpo/margin_mean": 50.522987365722656,
|
|
"margin_dpo/margin_std": 72.46966552734375,
|
|
"step": 99
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -39.64739990234375,
|
|
"KL/mean": -51.475440979003906,
|
|
"KL/rejected_KL_mean": -63.3034782409668,
|
|
"KL/std": 47.796791076660156,
|
|
"epoch": 0.14684287812041116,
|
|
"fcm_dpo/beta": 0.010749414563179016,
|
|
"fcm_dpo/delta": 0.02157057449221611,
|
|
"fcm_dpo/margin": 23.65607452392578,
|
|
"fcm_dpo/q_t": 0.44148170948028564,
|
|
"grad_norm": 22.503297805786133,
|
|
"learning_rate": 4.970413680203148e-07,
|
|
"logits/chosen": -0.5854922533035278,
|
|
"logits/rejected": -0.5520744323730469,
|
|
"logps/chosen": -97.24137878417969,
|
|
"logps/ref_chosen": -57.59397888183594,
|
|
"logps/ref_rejected": -74.06021118164062,
|
|
"logps/rejected": -137.3636932373047,
|
|
"loss": 1.2548,
|
|
"margin_dpo/margin_mean": 23.65607452392578,
|
|
"margin_dpo/margin_std": 61.108909606933594,
|
|
"step": 100
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -47.286502838134766,
|
|
"KL/mean": -63.186546325683594,
|
|
"KL/rejected_KL_mean": -79.08659362792969,
|
|
"KL/std": 57.57587432861328,
|
|
"epoch": 0.14831130690161526,
|
|
"fcm_dpo/beta": 0.010717286728322506,
|
|
"fcm_dpo/delta": -0.025740258395671844,
|
|
"fcm_dpo/margin": 31.800077438354492,
|
|
"fcm_dpo/q_t": 0.4272317886352539,
|
|
"grad_norm": 21.163375854492188,
|
|
"learning_rate": 4.968412618365215e-07,
|
|
"logits/chosen": -0.5871464014053345,
|
|
"logits/rejected": -0.5648493766784668,
|
|
"logps/chosen": -108.93535614013672,
|
|
"logps/ref_chosen": -61.64885330200195,
|
|
"logps/ref_rejected": -83.18968200683594,
|
|
"logps/rejected": -162.27627563476562,
|
|
"loss": 1.1985,
|
|
"margin_dpo/margin_mean": 31.800079345703125,
|
|
"margin_dpo/margin_std": 68.4903793334961,
|
|
"step": 101
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -49.380165100097656,
|
|
"KL/mean": -60.16246795654297,
|
|
"KL/rejected_KL_mean": -70.94477844238281,
|
|
"KL/std": 52.73678970336914,
|
|
"epoch": 0.14977973568281938,
|
|
"fcm_dpo/beta": 0.010729561559855938,
|
|
"fcm_dpo/delta": 0.011440283618867397,
|
|
"fcm_dpo/margin": 21.564611434936523,
|
|
"fcm_dpo/q_t": 0.44714266061782837,
|
|
"grad_norm": 24.271780014038086,
|
|
"learning_rate": 4.966346511559149e-07,
|
|
"logits/chosen": -0.6152628660202026,
|
|
"logits/rejected": -0.5813932418823242,
|
|
"logps/chosen": -113.45904541015625,
|
|
"logps/ref_chosen": -64.0788803100586,
|
|
"logps/ref_rejected": -68.18707275390625,
|
|
"logps/rejected": -139.13185119628906,
|
|
"loss": 1.2855,
|
|
"margin_dpo/margin_mean": 21.564613342285156,
|
|
"margin_dpo/margin_std": 64.52105712890625,
|
|
"step": 102
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -41.929290771484375,
|
|
"KL/mean": -65.89997100830078,
|
|
"KL/rejected_KL_mean": -89.87065124511719,
|
|
"KL/std": 59.839744567871094,
|
|
"epoch": 0.1512481644640235,
|
|
"fcm_dpo/beta": 0.010541867464780807,
|
|
"fcm_dpo/delta": -0.11135346442461014,
|
|
"fcm_dpo/margin": 47.94136047363281,
|
|
"fcm_dpo/q_t": 0.39261913299560547,
|
|
"grad_norm": 21.387969970703125,
|
|
"learning_rate": 4.964215414228785e-07,
|
|
"logits/chosen": -0.5887047052383423,
|
|
"logits/rejected": -0.5597223043441772,
|
|
"logps/chosen": -103.22856903076172,
|
|
"logps/ref_chosen": -61.299278259277344,
|
|
"logps/ref_rejected": -93.57270812988281,
|
|
"logps/rejected": -183.443359375,
|
|
"loss": 1.0745,
|
|
"margin_dpo/margin_mean": 47.94136047363281,
|
|
"margin_dpo/margin_std": 75.64704895019531,
|
|
"step": 103
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -41.508384704589844,
|
|
"KL/mean": -64.53752899169922,
|
|
"KL/rejected_KL_mean": -87.56666564941406,
|
|
"KL/std": 62.41368103027344,
|
|
"epoch": 0.1527165932452276,
|
|
"fcm_dpo/beta": 0.010378319770097733,
|
|
"fcm_dpo/delta": -0.08192168176174164,
|
|
"fcm_dpo/margin": 46.05828094482422,
|
|
"fcm_dpo/q_t": 0.4016958773136139,
|
|
"grad_norm": 19.554399490356445,
|
|
"learning_rate": 4.96201938253052e-07,
|
|
"logits/chosen": -0.6282894015312195,
|
|
"logits/rejected": -0.6099402904510498,
|
|
"logps/chosen": -95.88116455078125,
|
|
"logps/ref_chosen": -54.372772216796875,
|
|
"logps/ref_rejected": -89.5647201538086,
|
|
"logps/rejected": -177.13137817382812,
|
|
"loss": 1.104,
|
|
"margin_dpo/margin_mean": 46.05828094482422,
|
|
"margin_dpo/margin_std": 78.06277465820312,
|
|
"step": 104
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -38.417266845703125,
|
|
"KL/mean": -71.04873657226562,
|
|
"KL/rejected_KL_mean": -103.68021392822266,
|
|
"KL/std": 57.34044647216797,
|
|
"epoch": 0.15418502202643172,
|
|
"fcm_dpo/beta": 0.009946699254214764,
|
|
"fcm_dpo/delta": -0.26617431640625,
|
|
"fcm_dpo/margin": 65.26294708251953,
|
|
"fcm_dpo/q_t": 0.3527500033378601,
|
|
"grad_norm": 18.77589988708496,
|
|
"learning_rate": 4.959758474331832e-07,
|
|
"logits/chosen": -0.5998860597610474,
|
|
"logits/rejected": -0.5840677618980408,
|
|
"logps/chosen": -93.05621337890625,
|
|
"logps/ref_chosen": -54.638946533203125,
|
|
"logps/ref_rejected": -97.97351837158203,
|
|
"logps/rejected": -201.6537322998047,
|
|
"loss": 0.9308,
|
|
"margin_dpo/margin_mean": 65.26294708251953,
|
|
"margin_dpo/margin_std": 64.84896087646484,
|
|
"step": 105
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -43.48663330078125,
|
|
"KL/mean": -63.503273010253906,
|
|
"KL/rejected_KL_mean": -83.5199203491211,
|
|
"KL/std": 52.4485969543457,
|
|
"epoch": 0.15565345080763582,
|
|
"fcm_dpo/beta": 0.00979258120059967,
|
|
"fcm_dpo/delta": 0.008240575902163982,
|
|
"fcm_dpo/margin": 40.03329086303711,
|
|
"fcm_dpo/q_t": 0.4101172089576721,
|
|
"grad_norm": 18.440258026123047,
|
|
"learning_rate": 4.957432749209755e-07,
|
|
"logits/chosen": -0.5603185296058655,
|
|
"logits/rejected": -0.5446274280548096,
|
|
"logps/chosen": -98.31952667236328,
|
|
"logps/ref_chosen": -54.83289337158203,
|
|
"logps/ref_rejected": -85.22461700439453,
|
|
"logps/rejected": -168.74453735351562,
|
|
"loss": 1.1055,
|
|
"margin_dpo/margin_mean": 40.033287048339844,
|
|
"margin_dpo/margin_std": 56.37229919433594,
|
|
"step": 106
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -51.00328063964844,
|
|
"KL/mean": -73.30986022949219,
|
|
"KL/rejected_KL_mean": -95.61644744873047,
|
|
"KL/std": 63.23443603515625,
|
|
"epoch": 0.15712187958883994,
|
|
"fcm_dpo/beta": 0.009722733870148659,
|
|
"fcm_dpo/delta": -0.0357857272028923,
|
|
"fcm_dpo/margin": 44.61316680908203,
|
|
"fcm_dpo/q_t": 0.40253520011901855,
|
|
"grad_norm": 19.489307403564453,
|
|
"learning_rate": 4.955042268449307e-07,
|
|
"logits/chosen": -0.5885684490203857,
|
|
"logits/rejected": -0.549545168876648,
|
|
"logps/chosen": -120.71109008789062,
|
|
"logps/ref_chosen": -69.70780944824219,
|
|
"logps/ref_rejected": -94.73950958251953,
|
|
"logps/rejected": -190.35595703125,
|
|
"loss": 1.0986,
|
|
"margin_dpo/margin_mean": 44.61316680908203,
|
|
"margin_dpo/margin_std": 67.18595886230469,
|
|
"step": 107
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -43.193382263183594,
|
|
"KL/mean": -71.44862365722656,
|
|
"KL/rejected_KL_mean": -99.703857421875,
|
|
"KL/std": 69.26988220214844,
|
|
"epoch": 0.15859030837004406,
|
|
"fcm_dpo/beta": 0.009545030072331429,
|
|
"fcm_dpo/delta": -0.14720328152179718,
|
|
"fcm_dpo/margin": 56.510475158691406,
|
|
"fcm_dpo/q_t": 0.3897445499897003,
|
|
"grad_norm": 17.88391876220703,
|
|
"learning_rate": 4.952587095041881e-07,
|
|
"logits/chosen": -0.5926969051361084,
|
|
"logits/rejected": -0.5816439390182495,
|
|
"logps/chosen": -99.2032699584961,
|
|
"logps/ref_chosen": -56.0098876953125,
|
|
"logps/ref_rejected": -95.79601287841797,
|
|
"logps/rejected": -195.4998779296875,
|
|
"loss": 1.0727,
|
|
"margin_dpo/margin_mean": 56.510475158691406,
|
|
"margin_dpo/margin_std": 89.7379150390625,
|
|
"step": 108
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -40.947486877441406,
|
|
"KL/mean": -69.35021209716797,
|
|
"KL/rejected_KL_mean": -97.7529296875,
|
|
"KL/std": 64.73497772216797,
|
|
"epoch": 0.16005873715124816,
|
|
"fcm_dpo/beta": 0.009237101301550865,
|
|
"fcm_dpo/delta": -0.13259612023830414,
|
|
"fcm_dpo/margin": 56.80544662475586,
|
|
"fcm_dpo/q_t": 0.3831191956996918,
|
|
"grad_norm": 20.214975357055664,
|
|
"learning_rate": 4.95006729368358e-07,
|
|
"logits/chosen": -0.5185987949371338,
|
|
"logits/rejected": -0.4973178505897522,
|
|
"logps/chosen": -103.83297729492188,
|
|
"logps/ref_chosen": -62.88549041748047,
|
|
"logps/ref_rejected": -98.68573760986328,
|
|
"logps/rejected": -196.43865966796875,
|
|
"loss": 1.0587,
|
|
"margin_dpo/margin_mean": 56.805450439453125,
|
|
"margin_dpo/margin_std": 82.919921875,
|
|
"step": 109
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -41.9679069519043,
|
|
"KL/mean": -67.5267333984375,
|
|
"KL/rejected_KL_mean": -93.08556365966797,
|
|
"KL/std": 64.46952819824219,
|
|
"epoch": 0.16152716593245228,
|
|
"fcm_dpo/beta": 0.009010251611471176,
|
|
"fcm_dpo/delta": -0.06750426441431046,
|
|
"fcm_dpo/margin": 51.11765670776367,
|
|
"fcm_dpo/q_t": 0.39977186918258667,
|
|
"grad_norm": 16.951026916503906,
|
|
"learning_rate": 4.947482930773511e-07,
|
|
"logits/chosen": -0.5459502339363098,
|
|
"logits/rejected": -0.5163878798484802,
|
|
"logps/chosen": -100.72158813476562,
|
|
"logps/ref_chosen": -58.753684997558594,
|
|
"logps/ref_rejected": -79.75001525878906,
|
|
"logps/rejected": -172.8355712890625,
|
|
"loss": 1.1026,
|
|
"margin_dpo/margin_mean": 51.11766052246094,
|
|
"margin_dpo/margin_std": 78.87454223632812,
|
|
"step": 110
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -46.687034606933594,
|
|
"KL/mean": -75.8902816772461,
|
|
"KL/rejected_KL_mean": -105.0935287475586,
|
|
"KL/std": 71.61009979248047,
|
|
"epoch": 0.16299559471365638,
|
|
"fcm_dpo/beta": 0.008878624066710472,
|
|
"fcm_dpo/delta": -0.12616059184074402,
|
|
"fcm_dpo/margin": 58.406497955322266,
|
|
"fcm_dpo/q_t": 0.3876160979270935,
|
|
"grad_norm": 19.642478942871094,
|
|
"learning_rate": 4.944834074412042e-07,
|
|
"logits/chosen": -0.5734955072402954,
|
|
"logits/rejected": -0.5520858764648438,
|
|
"logps/chosen": -115.31114196777344,
|
|
"logps/ref_chosen": -68.62410736083984,
|
|
"logps/ref_rejected": -98.42886352539062,
|
|
"logps/rejected": -203.52239990234375,
|
|
"loss": 1.0866,
|
|
"margin_dpo/margin_mean": 58.40650177001953,
|
|
"margin_dpo/margin_std": 92.70616149902344,
|
|
"step": 111
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -41.41010284423828,
|
|
"KL/mean": -58.663936614990234,
|
|
"KL/rejected_KL_mean": -75.91777038574219,
|
|
"KL/std": 46.183799743652344,
|
|
"epoch": 0.1644640234948605,
|
|
"fcm_dpo/beta": 0.008953899145126343,
|
|
"fcm_dpo/delta": 0.09397280961275101,
|
|
"fcm_dpo/margin": 34.507667541503906,
|
|
"fcm_dpo/q_t": 0.42883390188217163,
|
|
"grad_norm": 16.9028263092041,
|
|
"learning_rate": 4.942120794399002e-07,
|
|
"logits/chosen": -0.5514860153198242,
|
|
"logits/rejected": -0.5197386741638184,
|
|
"logps/chosen": -91.65974426269531,
|
|
"logps/ref_chosen": -50.24964141845703,
|
|
"logps/ref_rejected": -64.77442932128906,
|
|
"logps/rejected": -140.69219970703125,
|
|
"loss": 1.1682,
|
|
"margin_dpo/margin_mean": 34.507667541503906,
|
|
"margin_dpo/margin_std": 58.28282165527344,
|
|
"step": 112
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -53.2155647277832,
|
|
"KL/mean": -70.94862365722656,
|
|
"KL/rejected_KL_mean": -88.68169403076172,
|
|
"KL/std": 53.065284729003906,
|
|
"epoch": 0.16593245227606462,
|
|
"fcm_dpo/beta": 0.009143839590251446,
|
|
"fcm_dpo/delta": 0.07819047570228577,
|
|
"fcm_dpo/margin": 35.46611785888672,
|
|
"fcm_dpo/q_t": 0.42639607191085815,
|
|
"grad_norm": 22.58910369873047,
|
|
"learning_rate": 4.939343162231841e-07,
|
|
"logits/chosen": -0.5312271118164062,
|
|
"logits/rejected": -0.4909241497516632,
|
|
"logps/chosen": -119.92851257324219,
|
|
"logps/ref_chosen": -66.71295166015625,
|
|
"logps/ref_rejected": -77.96870422363281,
|
|
"logps/rejected": -166.650390625,
|
|
"loss": 1.1542,
|
|
"margin_dpo/margin_mean": 35.466121673583984,
|
|
"margin_dpo/margin_std": 58.21109390258789,
|
|
"step": 113
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -46.58165740966797,
|
|
"KL/mean": -75.94132232666016,
|
|
"KL/rejected_KL_mean": -105.30099487304688,
|
|
"KL/std": 72.10044860839844,
|
|
"epoch": 0.16740088105726872,
|
|
"fcm_dpo/beta": 0.008939421735703945,
|
|
"fcm_dpo/delta": -0.13344621658325195,
|
|
"fcm_dpo/margin": 58.719337463378906,
|
|
"fcm_dpo/q_t": 0.3906566798686981,
|
|
"grad_norm": 19.71776580810547,
|
|
"learning_rate": 4.936501251103751e-07,
|
|
"logits/chosen": -0.5248334407806396,
|
|
"logits/rejected": -0.4965624213218689,
|
|
"logps/chosen": -104.36673736572266,
|
|
"logps/ref_chosen": -57.78507995605469,
|
|
"logps/ref_rejected": -87.10966491699219,
|
|
"logps/rejected": -192.41064453125,
|
|
"loss": 1.0516,
|
|
"margin_dpo/margin_mean": 58.719337463378906,
|
|
"margin_dpo/margin_std": 87.8689956665039,
|
|
"step": 114
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -59.11194610595703,
|
|
"KL/mean": -79.4608154296875,
|
|
"KL/rejected_KL_mean": -99.8096923828125,
|
|
"KL/std": 69.4343490600586,
|
|
"epoch": 0.16886930983847284,
|
|
"fcm_dpo/beta": 0.009012718684971333,
|
|
"fcm_dpo/delta": 0.034050408750772476,
|
|
"fcm_dpo/margin": 40.69775390625,
|
|
"fcm_dpo/q_t": 0.424325168132782,
|
|
"grad_norm": 25.762571334838867,
|
|
"learning_rate": 4.933595135901732e-07,
|
|
"logits/chosen": -0.5764279961585999,
|
|
"logits/rejected": -0.5661026239395142,
|
|
"logps/chosen": -124.69458770751953,
|
|
"logps/ref_chosen": -65.5826416015625,
|
|
"logps/ref_rejected": -98.56552124023438,
|
|
"logps/rejected": -198.37521362304688,
|
|
"loss": 1.2073,
|
|
"margin_dpo/margin_mean": 40.69775390625,
|
|
"margin_dpo/margin_std": 93.00070190429688,
|
|
"step": 115
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -42.108062744140625,
|
|
"KL/mean": -64.91938018798828,
|
|
"KL/rejected_KL_mean": -87.73070526123047,
|
|
"KL/std": 55.80364990234375,
|
|
"epoch": 0.17033773861967694,
|
|
"fcm_dpo/beta": 0.00902644731104374,
|
|
"fcm_dpo/delta": -0.012853723019361496,
|
|
"fcm_dpo/margin": 45.62263488769531,
|
|
"fcm_dpo/q_t": 0.40703320503234863,
|
|
"grad_norm": 18.27412223815918,
|
|
"learning_rate": 4.930624893204624e-07,
|
|
"logits/chosen": -0.5399957895278931,
|
|
"logits/rejected": -0.5367946624755859,
|
|
"logps/chosen": -93.50837707519531,
|
|
"logps/ref_chosen": -51.40031433105469,
|
|
"logps/ref_rejected": -80.5218505859375,
|
|
"logps/rejected": -168.2525634765625,
|
|
"loss": 1.0915,
|
|
"margin_dpo/margin_mean": 45.62263870239258,
|
|
"margin_dpo/margin_std": 62.58723831176758,
|
|
"step": 116
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -55.41862487792969,
|
|
"KL/mean": -72.41690826416016,
|
|
"KL/rejected_KL_mean": -89.41517639160156,
|
|
"KL/std": 57.73213195800781,
|
|
"epoch": 0.17180616740088106,
|
|
"fcm_dpo/beta": 0.009095819666981697,
|
|
"fcm_dpo/delta": 0.09368915110826492,
|
|
"fcm_dpo/margin": 33.996551513671875,
|
|
"fcm_dpo/q_t": 0.430633544921875,
|
|
"grad_norm": 24.457244873046875,
|
|
"learning_rate": 4.927590601281083e-07,
|
|
"logits/chosen": -0.527985692024231,
|
|
"logits/rejected": -0.4939156174659729,
|
|
"logps/chosen": -124.71703338623047,
|
|
"logps/ref_chosen": -69.29840850830078,
|
|
"logps/ref_rejected": -66.583984375,
|
|
"logps/rejected": -155.99917602539062,
|
|
"loss": 1.1898,
|
|
"margin_dpo/margin_mean": 33.996551513671875,
|
|
"margin_dpo/margin_std": 68.07157897949219,
|
|
"step": 117
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -43.04719161987305,
|
|
"KL/mean": -64.5184326171875,
|
|
"KL/rejected_KL_mean": -85.98967742919922,
|
|
"KL/std": 53.621681213378906,
|
|
"epoch": 0.17327459618208516,
|
|
"fcm_dpo/beta": 0.009164330549538136,
|
|
"fcm_dpo/delta": 0.006666237488389015,
|
|
"fcm_dpo/margin": 42.942481994628906,
|
|
"fcm_dpo/q_t": 0.4111817479133606,
|
|
"grad_norm": 18.125078201293945,
|
|
"learning_rate": 4.924492340087524e-07,
|
|
"logits/chosen": -0.5757678151130676,
|
|
"logits/rejected": -0.558840274810791,
|
|
"logps/chosen": -98.68817138671875,
|
|
"logps/ref_chosen": -55.6409797668457,
|
|
"logps/ref_rejected": -75.66905975341797,
|
|
"logps/rejected": -161.6587371826172,
|
|
"loss": 1.1042,
|
|
"margin_dpo/margin_mean": 42.942481994628906,
|
|
"margin_dpo/margin_std": 61.708221435546875,
|
|
"step": 118
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -58.032772064208984,
|
|
"KL/mean": -79.35901641845703,
|
|
"KL/rejected_KL_mean": -100.68525695800781,
|
|
"KL/std": 59.09315490722656,
|
|
"epoch": 0.17474302496328928,
|
|
"fcm_dpo/beta": 0.009107567369937897,
|
|
"fcm_dpo/delta": 0.011166650801897049,
|
|
"fcm_dpo/margin": 42.652496337890625,
|
|
"fcm_dpo/q_t": 0.4156750738620758,
|
|
"grad_norm": 19.924360275268555,
|
|
"learning_rate": 4.92133019126601e-07,
|
|
"logits/chosen": -0.5483442544937134,
|
|
"logits/rejected": -0.5380154848098755,
|
|
"logps/chosen": -131.54296875,
|
|
"logps/ref_chosen": -73.51019287109375,
|
|
"logps/ref_rejected": -102.977294921875,
|
|
"logps/rejected": -203.6625518798828,
|
|
"loss": 1.1422,
|
|
"margin_dpo/margin_mean": 42.652496337890625,
|
|
"margin_dpo/margin_std": 74.453125,
|
|
"step": 119
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -57.64821243286133,
|
|
"KL/mean": -87.497314453125,
|
|
"KL/rejected_KL_mean": -117.34640502929688,
|
|
"KL/std": 71.73361206054688,
|
|
"epoch": 0.1762114537444934,
|
|
"fcm_dpo/beta": 0.008971385657787323,
|
|
"fcm_dpo/delta": -0.14341270923614502,
|
|
"fcm_dpo/margin": 59.69819641113281,
|
|
"fcm_dpo/q_t": 0.381797730922699,
|
|
"grad_norm": 20.973718643188477,
|
|
"learning_rate": 4.918104238142103e-07,
|
|
"logits/chosen": -0.5744304656982422,
|
|
"logits/rejected": -0.5464004278182983,
|
|
"logps/chosen": -134.42904663085938,
|
|
"logps/ref_chosen": -76.78083801269531,
|
|
"logps/ref_rejected": -108.02374267578125,
|
|
"logps/rejected": -225.37014770507812,
|
|
"loss": 1.0263,
|
|
"margin_dpo/margin_mean": 59.69819641113281,
|
|
"margin_dpo/margin_std": 77.37503051757812,
|
|
"step": 120
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -53.727439880371094,
|
|
"KL/mean": -87.06233215332031,
|
|
"KL/rejected_KL_mean": -120.39723205566406,
|
|
"KL/std": 69.44754028320312,
|
|
"epoch": 0.1776798825256975,
|
|
"fcm_dpo/beta": 0.008634019643068314,
|
|
"fcm_dpo/delta": -0.187477245926857,
|
|
"fcm_dpo/margin": 66.66979217529297,
|
|
"fcm_dpo/q_t": 0.37362366914749146,
|
|
"grad_norm": 19.9356689453125,
|
|
"learning_rate": 4.91481456572267e-07,
|
|
"logits/chosen": -0.5384774208068848,
|
|
"logits/rejected": -0.5364508628845215,
|
|
"logps/chosen": -115.517333984375,
|
|
"logps/ref_chosen": -61.789894104003906,
|
|
"logps/ref_rejected": -109.99456787109375,
|
|
"logps/rejected": -230.3917999267578,
|
|
"loss": 1.0099,
|
|
"margin_dpo/margin_mean": 66.66979217529297,
|
|
"margin_dpo/margin_std": 85.21018981933594,
|
|
"step": 121
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -47.43219757080078,
|
|
"KL/mean": -87.10603332519531,
|
|
"KL/rejected_KL_mean": -126.77987670898438,
|
|
"KL/std": 75.40602111816406,
|
|
"epoch": 0.17914831130690162,
|
|
"fcm_dpo/beta": 0.008252674713730812,
|
|
"fcm_dpo/delta": -0.27251002192497253,
|
|
"fcm_dpo/margin": 79.34767150878906,
|
|
"fcm_dpo/q_t": 0.35448387265205383,
|
|
"grad_norm": 20.8872013092041,
|
|
"learning_rate": 4.911461260693638e-07,
|
|
"logits/chosen": -0.4834294021129608,
|
|
"logits/rejected": -0.5015791058540344,
|
|
"logps/chosen": -94.33441162109375,
|
|
"logps/ref_chosen": -46.9022102355957,
|
|
"logps/ref_rejected": -106.71418762207031,
|
|
"logps/rejected": -233.4940643310547,
|
|
"loss": 0.9294,
|
|
"margin_dpo/margin_mean": 79.34767150878906,
|
|
"margin_dpo/margin_std": 80.3590316772461,
|
|
"step": 122
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -53.51214599609375,
|
|
"KL/mean": -79.38796997070312,
|
|
"KL/rejected_KL_mean": -105.2637939453125,
|
|
"KL/std": 65.03099060058594,
|
|
"epoch": 0.18061674008810572,
|
|
"fcm_dpo/beta": 0.008060860447585583,
|
|
"fcm_dpo/delta": -0.018722567707300186,
|
|
"fcm_dpo/margin": 51.75164031982422,
|
|
"fcm_dpo/q_t": 0.4093893766403198,
|
|
"grad_norm": 21.224868774414062,
|
|
"learning_rate": 4.908044411417711e-07,
|
|
"logits/chosen": -0.504252552986145,
|
|
"logits/rejected": -0.48894137144088745,
|
|
"logps/chosen": -114.85078430175781,
|
|
"logps/ref_chosen": -61.33863830566406,
|
|
"logps/ref_rejected": -87.775390625,
|
|
"logps/rejected": -193.0391845703125,
|
|
"loss": 1.1298,
|
|
"margin_dpo/margin_mean": 51.75164031982422,
|
|
"margin_dpo/margin_std": 88.38829040527344,
|
|
"step": 123
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -62.671470642089844,
|
|
"KL/mean": -101.19732666015625,
|
|
"KL/rejected_KL_mean": -139.72317504882812,
|
|
"KL/std": 90.27295684814453,
|
|
"epoch": 0.18208516886930984,
|
|
"fcm_dpo/beta": 0.007825289852917194,
|
|
"fcm_dpo/delta": -0.21662405133247375,
|
|
"fcm_dpo/margin": 77.05169677734375,
|
|
"fcm_dpo/q_t": 0.37590959668159485,
|
|
"grad_norm": 21.635025024414062,
|
|
"learning_rate": 4.904564107932048e-07,
|
|
"logits/chosen": -0.4845294654369354,
|
|
"logits/rejected": -0.488964319229126,
|
|
"logps/chosen": -134.11981201171875,
|
|
"logps/ref_chosen": -71.44833374023438,
|
|
"logps/ref_rejected": -117.58056640625,
|
|
"logps/rejected": -257.3037414550781,
|
|
"loss": 1.0383,
|
|
"margin_dpo/margin_mean": 77.05169677734375,
|
|
"margin_dpo/margin_std": 113.15299987792969,
|
|
"step": 124
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -49.68274688720703,
|
|
"KL/mean": -82.18218231201172,
|
|
"KL/rejected_KL_mean": -114.68162536621094,
|
|
"KL/std": 74.36106872558594,
|
|
"epoch": 0.18355359765051396,
|
|
"fcm_dpo/beta": 0.007650085724890232,
|
|
"fcm_dpo/delta": -0.10223683714866638,
|
|
"fcm_dpo/margin": 64.99887084960938,
|
|
"fcm_dpo/q_t": 0.3918633460998535,
|
|
"grad_norm": 18.291349411010742,
|
|
"learning_rate": 4.90102044194588e-07,
|
|
"logits/chosen": -0.4274330139160156,
|
|
"logits/rejected": -0.4299238622188568,
|
|
"logps/chosen": -99.81968688964844,
|
|
"logps/ref_chosen": -50.136940002441406,
|
|
"logps/ref_rejected": -83.98861694335938,
|
|
"logps/rejected": -198.67022705078125,
|
|
"loss": 1.0649,
|
|
"margin_dpo/margin_mean": 64.99887084960938,
|
|
"margin_dpo/margin_std": 93.5775375366211,
|
|
"step": 125
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -53.004913330078125,
|
|
"KL/mean": -84.8802719116211,
|
|
"KL/rejected_KL_mean": -116.75562286376953,
|
|
"KL/std": 69.43212890625,
|
|
"epoch": 0.18502202643171806,
|
|
"fcm_dpo/beta": 0.007492750883102417,
|
|
"fcm_dpo/delta": -0.08181630074977875,
|
|
"fcm_dpo/margin": 63.75071716308594,
|
|
"fcm_dpo/q_t": 0.3952334523200989,
|
|
"grad_norm": 18.59366226196289,
|
|
"learning_rate": 4.897413506838102e-07,
|
|
"logits/chosen": -0.4854479432106018,
|
|
"logits/rejected": -0.4832964539527893,
|
|
"logps/chosen": -108.67198181152344,
|
|
"logps/ref_chosen": -55.66706848144531,
|
|
"logps/ref_rejected": -98.1297607421875,
|
|
"logps/rejected": -214.8853759765625,
|
|
"loss": 1.0617,
|
|
"margin_dpo/margin_mean": 63.75071716308594,
|
|
"margin_dpo/margin_std": 88.2365493774414,
|
|
"step": 126
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -51.491268157958984,
|
|
"KL/mean": -73.90081787109375,
|
|
"KL/rejected_KL_mean": -96.31036376953125,
|
|
"KL/std": 60.034873962402344,
|
|
"epoch": 0.18649045521292218,
|
|
"fcm_dpo/beta": 0.007554207928478718,
|
|
"fcm_dpo/delta": 0.06351131945848465,
|
|
"fcm_dpo/margin": 44.81909942626953,
|
|
"fcm_dpo/q_t": 0.4214317202568054,
|
|
"grad_norm": 17.630146026611328,
|
|
"learning_rate": 4.89374339765481e-07,
|
|
"logits/chosen": -0.4814761281013489,
|
|
"logits/rejected": -0.46358734369277954,
|
|
"logps/chosen": -108.04594421386719,
|
|
"logps/ref_chosen": -56.55467987060547,
|
|
"logps/ref_rejected": -76.7957763671875,
|
|
"logps/rejected": -173.10614013671875,
|
|
"loss": 1.152,
|
|
"margin_dpo/margin_mean": 44.81909942626953,
|
|
"margin_dpo/margin_std": 74.9388198852539,
|
|
"step": 127
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -52.88978958129883,
|
|
"KL/mean": -76.76133728027344,
|
|
"KL/rejected_KL_mean": -100.63288879394531,
|
|
"KL/std": 68.35536193847656,
|
|
"epoch": 0.18795888399412627,
|
|
"fcm_dpo/beta": 0.007643786258995533,
|
|
"fcm_dpo/delta": 0.03590956702828407,
|
|
"fcm_dpo/margin": 47.74309539794922,
|
|
"fcm_dpo/q_t": 0.4184267520904541,
|
|
"grad_norm": 19.399093627929688,
|
|
"learning_rate": 4.890010211106795e-07,
|
|
"logits/chosen": -0.5004081726074219,
|
|
"logits/rejected": -0.4844392240047455,
|
|
"logps/chosen": -111.0107421875,
|
|
"logps/ref_chosen": -58.12095642089844,
|
|
"logps/ref_rejected": -76.43896484375,
|
|
"logps/rejected": -177.0718536376953,
|
|
"loss": 1.1543,
|
|
"margin_dpo/margin_mean": 47.74309539794922,
|
|
"margin_dpo/margin_std": 85.46942138671875,
|
|
"step": 128
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -65.713623046875,
|
|
"KL/mean": -90.0938720703125,
|
|
"KL/rejected_KL_mean": -114.47410583496094,
|
|
"KL/std": 78.53107452392578,
|
|
"epoch": 0.1894273127753304,
|
|
"fcm_dpo/beta": 0.007660663686692715,
|
|
"fcm_dpo/delta": 0.027477692812681198,
|
|
"fcm_dpo/margin": 48.760475158691406,
|
|
"fcm_dpo/q_t": 0.42177292704582214,
|
|
"grad_norm": 19.868791580200195,
|
|
"learning_rate": 4.88621404556699e-07,
|
|
"logits/chosen": -0.5160699486732483,
|
|
"logits/rejected": -0.5076286792755127,
|
|
"logps/chosen": -132.6300048828125,
|
|
"logps/ref_chosen": -66.91637420654297,
|
|
"logps/ref_rejected": -96.6422119140625,
|
|
"logps/rejected": -211.11631774902344,
|
|
"loss": 1.1857,
|
|
"margin_dpo/margin_mean": 48.760475158691406,
|
|
"margin_dpo/margin_std": 104.92391967773438,
|
|
"step": 129
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -47.17679977416992,
|
|
"KL/mean": -84.11569213867188,
|
|
"KL/rejected_KL_mean": -121.05458068847656,
|
|
"KL/std": 75.62738037109375,
|
|
"epoch": 0.19089574155653452,
|
|
"fcm_dpo/beta": 0.0075556435622274876,
|
|
"fcm_dpo/delta": -0.16790251433849335,
|
|
"fcm_dpo/margin": 73.87777709960938,
|
|
"fcm_dpo/q_t": 0.37920111417770386,
|
|
"grad_norm": 16.111379623413086,
|
|
"learning_rate": 4.882355001067891e-07,
|
|
"logits/chosen": -0.4705796241760254,
|
|
"logits/rejected": -0.46549493074417114,
|
|
"logps/chosen": -91.8436508178711,
|
|
"logps/ref_chosen": -44.66685104370117,
|
|
"logps/ref_rejected": -82.78165435791016,
|
|
"logps/rejected": -203.83624267578125,
|
|
"loss": 1.0229,
|
|
"margin_dpo/margin_mean": 73.87777709960938,
|
|
"margin_dpo/margin_std": 93.94640350341797,
|
|
"step": 130
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -43.336204528808594,
|
|
"KL/mean": -80.46978759765625,
|
|
"KL/rejected_KL_mean": -117.60336303710938,
|
|
"KL/std": 78.42054748535156,
|
|
"epoch": 0.19236417033773862,
|
|
"fcm_dpo/beta": 0.007246783934533596,
|
|
"fcm_dpo/delta": -0.1462840735912323,
|
|
"fcm_dpo/margin": 74.26715087890625,
|
|
"fcm_dpo/q_t": 0.3767462372779846,
|
|
"grad_norm": 20.645126342773438,
|
|
"learning_rate": 4.878433179298909e-07,
|
|
"logits/chosen": -0.4658737778663635,
|
|
"logits/rejected": -0.47121483087539673,
|
|
"logps/chosen": -88.26079559326172,
|
|
"logps/ref_chosen": -44.924591064453125,
|
|
"logps/ref_rejected": -88.44401550292969,
|
|
"logps/rejected": -206.04736328125,
|
|
"loss": 1.0082,
|
|
"margin_dpo/margin_mean": 74.26715850830078,
|
|
"margin_dpo/margin_std": 86.19850158691406,
|
|
"step": 131
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -56.63502502441406,
|
|
"KL/mean": -87.46903228759766,
|
|
"KL/rejected_KL_mean": -118.30303955078125,
|
|
"KL/std": 77.27864074707031,
|
|
"epoch": 0.19383259911894274,
|
|
"fcm_dpo/beta": 0.007140764966607094,
|
|
"fcm_dpo/delta": -0.04240689054131508,
|
|
"fcm_dpo/margin": 61.66801452636719,
|
|
"fcm_dpo/q_t": 0.4060777425765991,
|
|
"grad_norm": 18.950071334838867,
|
|
"learning_rate": 4.874448683603694e-07,
|
|
"logits/chosen": -0.5243598222732544,
|
|
"logits/rejected": -0.525653600692749,
|
|
"logps/chosen": -115.6361083984375,
|
|
"logps/ref_chosen": -59.00108337402344,
|
|
"logps/ref_rejected": -87.89215087890625,
|
|
"logps/rejected": -206.1951904296875,
|
|
"loss": 1.1026,
|
|
"margin_dpo/margin_mean": 61.66801452636719,
|
|
"margin_dpo/margin_std": 99.96194458007812,
|
|
"step": 132
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -67.54967498779297,
|
|
"KL/mean": -93.98008728027344,
|
|
"KL/rejected_KL_mean": -120.41049194335938,
|
|
"KL/std": 68.62925720214844,
|
|
"epoch": 0.19530102790014683,
|
|
"fcm_dpo/beta": 0.007148602977395058,
|
|
"fcm_dpo/delta": 0.022928927093744278,
|
|
"fcm_dpo/margin": 52.86082458496094,
|
|
"fcm_dpo/q_t": 0.4168737530708313,
|
|
"grad_norm": 20.872831344604492,
|
|
"learning_rate": 4.870401618977415e-07,
|
|
"logits/chosen": -0.479339599609375,
|
|
"logits/rejected": -0.46433907747268677,
|
|
"logps/chosen": -134.1541748046875,
|
|
"logps/ref_chosen": -66.60449981689453,
|
|
"logps/ref_rejected": -96.33355712890625,
|
|
"logps/rejected": -216.74404907226562,
|
|
"loss": 1.1346,
|
|
"margin_dpo/margin_mean": 52.86082458496094,
|
|
"margin_dpo/margin_std": 88.52780151367188,
|
|
"step": 133
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -51.839813232421875,
|
|
"KL/mean": -80.39920043945312,
|
|
"KL/rejected_KL_mean": -108.95858001708984,
|
|
"KL/std": 64.22247314453125,
|
|
"epoch": 0.19676945668135096,
|
|
"fcm_dpo/beta": 0.0071844616904854774,
|
|
"fcm_dpo/delta": -0.010975977405905724,
|
|
"fcm_dpo/margin": 57.118778228759766,
|
|
"fcm_dpo/q_t": 0.40691226720809937,
|
|
"grad_norm": 16.936574935913086,
|
|
"learning_rate": 4.866292092063986e-07,
|
|
"logits/chosen": -0.4439271092414856,
|
|
"logits/rejected": -0.429843544960022,
|
|
"logps/chosen": -103.90907287597656,
|
|
"logps/ref_chosen": -52.06925582885742,
|
|
"logps/ref_rejected": -87.6545181274414,
|
|
"logps/rejected": -196.61309814453125,
|
|
"loss": 1.0822,
|
|
"margin_dpo/margin_mean": 57.118778228759766,
|
|
"margin_dpo/margin_std": 74.62931060791016,
|
|
"step": 134
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -57.692352294921875,
|
|
"KL/mean": -100.01170349121094,
|
|
"KL/rejected_KL_mean": -142.3310546875,
|
|
"KL/std": 90.0093002319336,
|
|
"epoch": 0.19823788546255505,
|
|
"fcm_dpo/beta": 0.006944045424461365,
|
|
"fcm_dpo/delta": -0.1996196210384369,
|
|
"fcm_dpo/margin": 84.63871765136719,
|
|
"fcm_dpo/q_t": 0.3745703101158142,
|
|
"grad_norm": 18.334857940673828,
|
|
"learning_rate": 4.862120211153265e-07,
|
|
"logits/chosen": -0.4614737033843994,
|
|
"logits/rejected": -0.49732786417007446,
|
|
"logps/chosen": -108.04621887207031,
|
|
"logps/ref_chosen": -50.353858947753906,
|
|
"logps/ref_rejected": -115.97975158691406,
|
|
"logps/rejected": -258.310791015625,
|
|
"loss": 1.009,
|
|
"margin_dpo/margin_mean": 84.63871765136719,
|
|
"margin_dpo/margin_std": 110.02676391601562,
|
|
"step": 135
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -69.69944763183594,
|
|
"KL/mean": -98.3271484375,
|
|
"KL/rejected_KL_mean": -126.95484924316406,
|
|
"KL/std": 84.19580078125,
|
|
"epoch": 0.19970631424375918,
|
|
"fcm_dpo/beta": 0.006846585310995579,
|
|
"fcm_dpo/delta": 0.007697347551584244,
|
|
"fcm_dpo/margin": 57.25541305541992,
|
|
"fcm_dpo/q_t": 0.4213330149650574,
|
|
"grad_norm": 19.6416015625,
|
|
"learning_rate": 4.857886086178193e-07,
|
|
"logits/chosen": -0.4833676218986511,
|
|
"logits/rejected": -0.4771433472633362,
|
|
"logps/chosen": -134.77195739746094,
|
|
"logps/ref_chosen": -65.072509765625,
|
|
"logps/ref_rejected": -96.32122802734375,
|
|
"logps/rejected": -223.27609252929688,
|
|
"loss": 1.1537,
|
|
"margin_dpo/margin_mean": 57.255409240722656,
|
|
"margin_dpo/margin_std": 109.88507080078125,
|
|
"step": 136
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -64.6696548461914,
|
|
"KL/mean": -109.20771789550781,
|
|
"KL/rejected_KL_mean": -153.74578857421875,
|
|
"KL/std": 107.58679962158203,
|
|
"epoch": 0.2011747430249633,
|
|
"fcm_dpo/beta": 0.006671931594610214,
|
|
"fcm_dpo/delta": -0.20688247680664062,
|
|
"fcm_dpo/margin": 89.07612609863281,
|
|
"fcm_dpo/q_t": 0.3791872262954712,
|
|
"grad_norm": 16.7557373046875,
|
|
"learning_rate": 4.853589828711902e-07,
|
|
"logits/chosen": -0.419033944606781,
|
|
"logits/rejected": -0.4432998299598694,
|
|
"logps/chosen": -113.42877197265625,
|
|
"logps/ref_chosen": -48.759117126464844,
|
|
"logps/ref_rejected": -113.86376953125,
|
|
"logps/rejected": -267.60955810546875,
|
|
"loss": 1.0331,
|
|
"margin_dpo/margin_mean": 89.07611846923828,
|
|
"margin_dpo/margin_std": 130.33053588867188,
|
|
"step": 137
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -69.46388244628906,
|
|
"KL/mean": -103.4703369140625,
|
|
"KL/rejected_KL_mean": -137.47679138183594,
|
|
"KL/std": 83.18930053710938,
|
|
"epoch": 0.2026431718061674,
|
|
"fcm_dpo/beta": 0.006574639119207859,
|
|
"fcm_dpo/delta": -0.04935392364859581,
|
|
"fcm_dpo/margin": 68.0129165649414,
|
|
"fcm_dpo/q_t": 0.39788979291915894,
|
|
"grad_norm": 18.415468215942383,
|
|
"learning_rate": 4.849231551964771e-07,
|
|
"logits/chosen": -0.40760838985443115,
|
|
"logits/rejected": -0.3944551348686218,
|
|
"logps/chosen": -129.9835205078125,
|
|
"logps/ref_chosen": -60.519649505615234,
|
|
"logps/ref_rejected": -93.19694519042969,
|
|
"logps/rejected": -230.67373657226562,
|
|
"loss": 1.0613,
|
|
"margin_dpo/margin_mean": 68.0129165649414,
|
|
"margin_dpo/margin_std": 85.65242004394531,
|
|
"step": 138
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -58.71821975708008,
|
|
"KL/mean": -98.14753723144531,
|
|
"KL/rejected_KL_mean": -137.57684326171875,
|
|
"KL/std": 77.90769958496094,
|
|
"epoch": 0.20411160058737152,
|
|
"fcm_dpo/beta": 0.006447950378060341,
|
|
"fcm_dpo/delta": -0.11413857340812683,
|
|
"fcm_dpo/margin": 78.85862731933594,
|
|
"fcm_dpo/q_t": 0.38780367374420166,
|
|
"grad_norm": 17.002193450927734,
|
|
"learning_rate": 4.844811370781446e-07,
|
|
"logits/chosen": -0.40922728180885315,
|
|
"logits/rejected": -0.39873528480529785,
|
|
"logps/chosen": -105.60960388183594,
|
|
"logps/ref_chosen": -46.89138412475586,
|
|
"logps/ref_rejected": -79.72798156738281,
|
|
"logps/rejected": -217.30484008789062,
|
|
"loss": 1.0361,
|
|
"margin_dpo/margin_mean": 78.85863494873047,
|
|
"margin_dpo/margin_std": 103.01472473144531,
|
|
"step": 139
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -71.46368408203125,
|
|
"KL/mean": -106.60093688964844,
|
|
"KL/rejected_KL_mean": -141.73822021484375,
|
|
"KL/std": 86.1796875,
|
|
"epoch": 0.2055800293685756,
|
|
"fcm_dpo/beta": 0.006347954273223877,
|
|
"fcm_dpo/delta": -0.04827836528420448,
|
|
"fcm_dpo/margin": 70.2745361328125,
|
|
"fcm_dpo/q_t": 0.4006612300872803,
|
|
"grad_norm": 19.857847213745117,
|
|
"learning_rate": 4.840329401637809e-07,
|
|
"logits/chosen": -0.3932759761810303,
|
|
"logits/rejected": -0.37940922379493713,
|
|
"logps/chosen": -130.43838500976562,
|
|
"logps/ref_chosen": -58.97471618652344,
|
|
"logps/ref_rejected": -83.28410339355469,
|
|
"logps/rejected": -225.02232360839844,
|
|
"loss": 1.0896,
|
|
"margin_dpo/margin_mean": 70.2745361328125,
|
|
"margin_dpo/margin_std": 105.37348937988281,
|
|
"step": 140
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -81.21903991699219,
|
|
"KL/mean": -114.55131530761719,
|
|
"KL/rejected_KL_mean": -147.88356018066406,
|
|
"KL/std": 97.91165161132812,
|
|
"epoch": 0.20704845814977973,
|
|
"fcm_dpo/beta": 0.006319768726825714,
|
|
"fcm_dpo/delta": -0.022237718105316162,
|
|
"fcm_dpo/margin": 66.66454315185547,
|
|
"fcm_dpo/q_t": 0.4057791233062744,
|
|
"grad_norm": 25.337135314941406,
|
|
"learning_rate": 4.83578576263792e-07,
|
|
"logits/chosen": -0.4274560213088989,
|
|
"logits/rejected": -0.41715872287750244,
|
|
"logps/chosen": -156.29470825195312,
|
|
"logps/ref_chosen": -75.07566833496094,
|
|
"logps/ref_rejected": -98.1922607421875,
|
|
"logps/rejected": -246.07583618164062,
|
|
"loss": 1.1267,
|
|
"margin_dpo/margin_mean": 66.66454315185547,
|
|
"margin_dpo/margin_std": 114.31845092773438,
|
|
"step": 141
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -79.89231872558594,
|
|
"KL/mean": -118.12299346923828,
|
|
"KL/rejected_KL_mean": -156.35366821289062,
|
|
"KL/std": 101.84807586669922,
|
|
"epoch": 0.20851688693098386,
|
|
"fcm_dpo/beta": 0.006258774548768997,
|
|
"fcm_dpo/delta": -0.08255193382501602,
|
|
"fcm_dpo/margin": 76.46134185791016,
|
|
"fcm_dpo/q_t": 0.39626699686050415,
|
|
"grad_norm": 26.112525939941406,
|
|
"learning_rate": 4.83118057351089e-07,
|
|
"logits/chosen": -0.4015616774559021,
|
|
"logits/rejected": -0.4008292555809021,
|
|
"logps/chosen": -137.92025756835938,
|
|
"logps/ref_chosen": -58.027931213378906,
|
|
"logps/ref_rejected": -94.58222961425781,
|
|
"logps/rejected": -250.93588256835938,
|
|
"loss": 1.095,
|
|
"margin_dpo/margin_mean": 76.46134185791016,
|
|
"margin_dpo/margin_std": 119.62361145019531,
|
|
"step": 142
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -84.15359497070312,
|
|
"KL/mean": -108.9317626953125,
|
|
"KL/rejected_KL_mean": -133.70993041992188,
|
|
"KL/std": 89.8084945678711,
|
|
"epoch": 0.20998531571218795,
|
|
"fcm_dpo/beta": 0.006263419054448605,
|
|
"fcm_dpo/delta": 0.09250222891569138,
|
|
"fcm_dpo/margin": 49.55633544921875,
|
|
"fcm_dpo/q_t": 0.4340221583843231,
|
|
"grad_norm": 23.864248275756836,
|
|
"learning_rate": 4.826513955607734e-07,
|
|
"logits/chosen": -0.3417607545852661,
|
|
"logits/rejected": -0.33318936824798584,
|
|
"logps/chosen": -141.7500457763672,
|
|
"logps/ref_chosen": -57.59645080566406,
|
|
"logps/ref_rejected": -78.99957275390625,
|
|
"logps/rejected": -212.70950317382812,
|
|
"loss": 1.2025,
|
|
"margin_dpo/margin_mean": 49.55633544921875,
|
|
"margin_dpo/margin_std": 106.16546630859375,
|
|
"step": 143
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -76.2918701171875,
|
|
"KL/mean": -105.60356140136719,
|
|
"KL/rejected_KL_mean": -134.91525268554688,
|
|
"KL/std": 77.67918395996094,
|
|
"epoch": 0.21145374449339208,
|
|
"fcm_dpo/beta": 0.006343062035739422,
|
|
"fcm_dpo/delta": 0.029245702549815178,
|
|
"fcm_dpo/margin": 58.62339782714844,
|
|
"fcm_dpo/q_t": 0.41614866256713867,
|
|
"grad_norm": 21.587350845336914,
|
|
"learning_rate": 4.821786031898176e-07,
|
|
"logits/chosen": -0.37479305267333984,
|
|
"logits/rejected": -0.3632616400718689,
|
|
"logps/chosen": -136.1982421875,
|
|
"logps/ref_chosen": -59.90636444091797,
|
|
"logps/ref_rejected": -82.00025939941406,
|
|
"logps/rejected": -216.91552734375,
|
|
"loss": 1.1285,
|
|
"margin_dpo/margin_mean": 58.62339782714844,
|
|
"margin_dpo/margin_std": 91.50389099121094,
|
|
"step": 144
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -73.16571044921875,
|
|
"KL/mean": -104.94743347167969,
|
|
"KL/rejected_KL_mean": -136.72915649414062,
|
|
"KL/std": 75.64851379394531,
|
|
"epoch": 0.21292217327459617,
|
|
"fcm_dpo/beta": 0.006349918898195028,
|
|
"fcm_dpo/delta": -0.0037822211161255836,
|
|
"fcm_dpo/margin": 63.56344985961914,
|
|
"fcm_dpo/q_t": 0.4085870385169983,
|
|
"grad_norm": 22.88797950744629,
|
|
"learning_rate": 4.816996926967401e-07,
|
|
"logits/chosen": -0.40823960304260254,
|
|
"logits/rejected": -0.3923068642616272,
|
|
"logps/chosen": -129.76637268066406,
|
|
"logps/ref_chosen": -56.60066604614258,
|
|
"logps/ref_rejected": -77.86631774902344,
|
|
"logps/rejected": -214.595458984375,
|
|
"loss": 1.1044,
|
|
"margin_dpo/margin_mean": 63.56344985961914,
|
|
"margin_dpo/margin_std": 93.52117919921875,
|
|
"step": 145
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -96.4278793334961,
|
|
"KL/mean": -121.21267700195312,
|
|
"KL/rejected_KL_mean": -145.9974822998047,
|
|
"KL/std": 80.74990844726562,
|
|
"epoch": 0.2143906020558003,
|
|
"fcm_dpo/beta": 0.006410893052816391,
|
|
"fcm_dpo/delta": 0.08496344089508057,
|
|
"fcm_dpo/margin": 49.569610595703125,
|
|
"fcm_dpo/q_t": 0.427983820438385,
|
|
"grad_norm": 26.765342712402344,
|
|
"learning_rate": 4.812146767012779e-07,
|
|
"logits/chosen": -0.3858996033668518,
|
|
"logits/rejected": -0.3604584336280823,
|
|
"logps/chosen": -162.42832946777344,
|
|
"logps/ref_chosen": -66.00045013427734,
|
|
"logps/ref_rejected": -81.70278930664062,
|
|
"logps/rejected": -227.7002716064453,
|
|
"loss": 1.1884,
|
|
"margin_dpo/margin_mean": 49.569610595703125,
|
|
"margin_dpo/margin_std": 97.2584228515625,
|
|
"step": 146
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -70.62712097167969,
|
|
"KL/mean": -103.94308471679688,
|
|
"KL/rejected_KL_mean": -137.25904846191406,
|
|
"KL/std": 82.13803100585938,
|
|
"epoch": 0.21585903083700442,
|
|
"fcm_dpo/beta": 0.006426135078072548,
|
|
"fcm_dpo/delta": -0.029495222494006157,
|
|
"fcm_dpo/margin": 66.63192749023438,
|
|
"fcm_dpo/q_t": 0.40504029393196106,
|
|
"grad_norm": 20.992704391479492,
|
|
"learning_rate": 4.807235679840536e-07,
|
|
"logits/chosen": -0.41126739978790283,
|
|
"logits/rejected": -0.39271873235702515,
|
|
"logps/chosen": -124.03260803222656,
|
|
"logps/ref_chosen": -53.405487060546875,
|
|
"logps/ref_rejected": -71.39060974121094,
|
|
"logps/rejected": -208.649658203125,
|
|
"loss": 1.1021,
|
|
"margin_dpo/margin_mean": 66.63192749023438,
|
|
"margin_dpo/margin_std": 102.30528259277344,
|
|
"step": 147
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -70.48782348632812,
|
|
"KL/mean": -99.92208862304688,
|
|
"KL/rejected_KL_mean": -129.35635375976562,
|
|
"KL/std": 84.81340026855469,
|
|
"epoch": 0.2173274596182085,
|
|
"fcm_dpo/beta": 0.0063689956441521645,
|
|
"fcm_dpo/delta": -0.08440735191106796,
|
|
"fcm_dpo/margin": 58.8685302734375,
|
|
"fcm_dpo/q_t": 0.41790372133255005,
|
|
"grad_norm": 18.33550262451172,
|
|
"learning_rate": 4.802263794862384e-07,
|
|
"logits/chosen": -0.44826143980026245,
|
|
"logits/rejected": -0.4405372738838196,
|
|
"logps/chosen": -135.42489624023438,
|
|
"logps/ref_chosen": -64.93708038330078,
|
|
"logps/ref_rejected": -103.09384155273438,
|
|
"logps/rejected": -232.4501953125,
|
|
"loss": 1.1302,
|
|
"margin_dpo/margin_mean": 58.868534088134766,
|
|
"margin_dpo/margin_std": 89.78065490722656,
|
|
"step": 148
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -66.89385986328125,
|
|
"KL/mean": -102.25575256347656,
|
|
"KL/rejected_KL_mean": -137.61764526367188,
|
|
"KL/std": 72.74675750732422,
|
|
"epoch": 0.21879588839941264,
|
|
"fcm_dpo/beta": 0.006224669516086578,
|
|
"fcm_dpo/delta": -0.043726127594709396,
|
|
"fcm_dpo/margin": 70.72378540039062,
|
|
"fcm_dpo/q_t": 0.39915308356285095,
|
|
"grad_norm": 17.068838119506836,
|
|
"learning_rate": 4.797231243092118e-07,
|
|
"logits/chosen": -0.4722484350204468,
|
|
"logits/rejected": -0.4587002694606781,
|
|
"logps/chosen": -125.36762237548828,
|
|
"logps/ref_chosen": -58.47376251220703,
|
|
"logps/ref_rejected": -99.31474304199219,
|
|
"logps/rejected": -236.93238830566406,
|
|
"loss": 1.0654,
|
|
"margin_dpo/margin_mean": 70.72378540039062,
|
|
"margin_dpo/margin_std": 85.64163208007812,
|
|
"step": 149
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -58.0627555847168,
|
|
"KL/mean": -93.5211181640625,
|
|
"KL/rejected_KL_mean": -128.97947692871094,
|
|
"KL/std": 83.53118896484375,
|
|
"epoch": 0.22026431718061673,
|
|
"fcm_dpo/beta": 0.006181714590638876,
|
|
"fcm_dpo/delta": -0.04127602279186249,
|
|
"fcm_dpo/margin": 70.91671752929688,
|
|
"fcm_dpo/q_t": 0.4037661552429199,
|
|
"grad_norm": 17.37567710876465,
|
|
"learning_rate": 4.792138157142157e-07,
|
|
"logits/chosen": -0.4367871582508087,
|
|
"logits/rejected": -0.43975830078125,
|
|
"logps/chosen": -103.76856994628906,
|
|
"logps/ref_chosen": -45.705810546875,
|
|
"logps/ref_rejected": -83.34759521484375,
|
|
"logps/rejected": -212.3270721435547,
|
|
"loss": 1.0768,
|
|
"margin_dpo/margin_mean": 70.91671752929688,
|
|
"margin_dpo/margin_std": 98.11479187011719,
|
|
"step": 150
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -73.78369140625,
|
|
"KL/mean": -109.70790100097656,
|
|
"KL/rejected_KL_mean": -145.63211059570312,
|
|
"KL/std": 83.37910461425781,
|
|
"epoch": 0.22173274596182085,
|
|
"fcm_dpo/beta": 0.006163077428936958,
|
|
"fcm_dpo/delta": -0.044833216816186905,
|
|
"fcm_dpo/margin": 71.84840393066406,
|
|
"fcm_dpo/q_t": 0.3990030288696289,
|
|
"grad_norm": 21.97403907775879,
|
|
"learning_rate": 4.786984671220053e-07,
|
|
"logits/chosen": -0.5252622365951538,
|
|
"logits/rejected": -0.49835896492004395,
|
|
"logps/chosen": -144.35452270507812,
|
|
"logps/ref_chosen": -70.57083129882812,
|
|
"logps/ref_rejected": -100.46382141113281,
|
|
"logps/rejected": -246.09591674804688,
|
|
"loss": 1.065,
|
|
"margin_dpo/margin_mean": 71.84840393066406,
|
|
"margin_dpo/margin_std": 91.86006164550781,
|
|
"step": 151
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -64.33494567871094,
|
|
"KL/mean": -107.04579162597656,
|
|
"KL/rejected_KL_mean": -149.75662231445312,
|
|
"KL/std": 83.78707122802734,
|
|
"epoch": 0.22320117474302498,
|
|
"fcm_dpo/beta": 0.0060538845136761665,
|
|
"fcm_dpo/delta": -0.12335566431283951,
|
|
"fcm_dpo/margin": 85.42166900634766,
|
|
"fcm_dpo/q_t": 0.3849901556968689,
|
|
"grad_norm": 20.60307502746582,
|
|
"learning_rate": 4.78177092112495e-07,
|
|
"logits/chosen": -0.47641807794570923,
|
|
"logits/rejected": -0.47393327951431274,
|
|
"logps/chosen": -124.49932861328125,
|
|
"logps/ref_chosen": -60.16438674926758,
|
|
"logps/ref_rejected": -106.14045715332031,
|
|
"logps/rejected": -255.89707946777344,
|
|
"loss": 1.0264,
|
|
"margin_dpo/margin_mean": 85.42166900634766,
|
|
"margin_dpo/margin_std": 103.75975799560547,
|
|
"step": 152
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -62.89659881591797,
|
|
"KL/mean": -99.18505859375,
|
|
"KL/rejected_KL_mean": -135.47352600097656,
|
|
"KL/std": 90.631591796875,
|
|
"epoch": 0.22466960352422907,
|
|
"fcm_dpo/beta": 0.005982040427625179,
|
|
"fcm_dpo/delta": -0.03570834919810295,
|
|
"fcm_dpo/margin": 72.57691955566406,
|
|
"fcm_dpo/q_t": 0.40594881772994995,
|
|
"grad_norm": 14.930242538452148,
|
|
"learning_rate": 4.776497044244016e-07,
|
|
"logits/chosen": -0.4422386884689331,
|
|
"logits/rejected": -0.43338215351104736,
|
|
"logps/chosen": -119.21187591552734,
|
|
"logps/ref_chosen": -56.315277099609375,
|
|
"logps/ref_rejected": -85.65583801269531,
|
|
"logps/rejected": -221.12936401367188,
|
|
"loss": 1.0966,
|
|
"margin_dpo/margin_mean": 72.57691955566406,
|
|
"margin_dpo/margin_std": 111.9495849609375,
|
|
"step": 153
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -76.06448364257812,
|
|
"KL/mean": -110.12120056152344,
|
|
"KL/rejected_KL_mean": -144.17791748046875,
|
|
"KL/std": 89.9405517578125,
|
|
"epoch": 0.2261380323054332,
|
|
"fcm_dpo/beta": 0.00597739452496171,
|
|
"fcm_dpo/delta": -0.007730741053819656,
|
|
"fcm_dpo/margin": 68.11343383789062,
|
|
"fcm_dpo/q_t": 0.4101860225200653,
|
|
"grad_norm": 17.922649383544922,
|
|
"learning_rate": 4.771163179548808e-07,
|
|
"logits/chosen": -0.49747714400291443,
|
|
"logits/rejected": -0.5010119676589966,
|
|
"logps/chosen": -138.8070526123047,
|
|
"logps/ref_chosen": -62.74256896972656,
|
|
"logps/ref_rejected": -104.24420166015625,
|
|
"logps/rejected": -248.422119140625,
|
|
"loss": 1.1354,
|
|
"margin_dpo/margin_mean": 68.11343383789062,
|
|
"margin_dpo/margin_std": 118.15486145019531,
|
|
"step": 154
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -68.90711975097656,
|
|
"KL/mean": -102.35006713867188,
|
|
"KL/rejected_KL_mean": -135.7930145263672,
|
|
"KL/std": 81.32989501953125,
|
|
"epoch": 0.2276064610866373,
|
|
"fcm_dpo/beta": 0.0059681423008441925,
|
|
"fcm_dpo/delta": 0.0006999801844358444,
|
|
"fcm_dpo/margin": 66.88587951660156,
|
|
"fcm_dpo/q_t": 0.4097937345504761,
|
|
"grad_norm": 18.32874298095703,
|
|
"learning_rate": 4.7657694675916247e-07,
|
|
"logits/chosen": -0.48518913984298706,
|
|
"logits/rejected": -0.4643056392669678,
|
|
"logps/chosen": -129.560302734375,
|
|
"logps/ref_chosen": -60.65318298339844,
|
|
"logps/ref_rejected": -77.49220275878906,
|
|
"logps/rejected": -213.28521728515625,
|
|
"loss": 1.1141,
|
|
"margin_dpo/margin_mean": 66.88587951660156,
|
|
"margin_dpo/margin_std": 103.57026672363281,
|
|
"step": 155
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -92.63871765136719,
|
|
"KL/mean": -112.63226318359375,
|
|
"KL/rejected_KL_mean": -132.6258087158203,
|
|
"KL/std": 85.67424011230469,
|
|
"epoch": 0.2290748898678414,
|
|
"fcm_dpo/beta": 0.006016138941049576,
|
|
"fcm_dpo/delta": 0.05333181843161583,
|
|
"fcm_dpo/margin": 39.98707962036133,
|
|
"fcm_dpo/q_t": 0.444929301738739,
|
|
"grad_norm": 26.82451629638672,
|
|
"learning_rate": 4.7603160505017893e-07,
|
|
"logits/chosen": -0.4589994549751282,
|
|
"logits/rejected": -0.4519059658050537,
|
|
"logps/chosen": -162.13059997558594,
|
|
"logps/ref_chosen": -69.49188232421875,
|
|
"logps/ref_rejected": -77.16929626464844,
|
|
"logps/rejected": -209.79510498046875,
|
|
"loss": 1.2724,
|
|
"margin_dpo/margin_mean": 39.98707580566406,
|
|
"margin_dpo/margin_std": 114.85380554199219,
|
|
"step": 156
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -82.70185089111328,
|
|
"KL/mean": -126.093505859375,
|
|
"KL/rejected_KL_mean": -169.48513793945312,
|
|
"KL/std": 92.4810791015625,
|
|
"epoch": 0.2305433186490455,
|
|
"fcm_dpo/beta": 0.005881883203983307,
|
|
"fcm_dpo/delta": -0.11752188205718994,
|
|
"fcm_dpo/margin": 86.78329467773438,
|
|
"fcm_dpo/q_t": 0.38307642936706543,
|
|
"grad_norm": 21.786405563354492,
|
|
"learning_rate": 4.7548030719819154e-07,
|
|
"logits/chosen": -0.4166560769081116,
|
|
"logits/rejected": -0.42006832361221313,
|
|
"logps/chosen": -144.07028198242188,
|
|
"logps/ref_chosen": -61.368438720703125,
|
|
"logps/ref_rejected": -107.64636993408203,
|
|
"logps/rejected": -277.13153076171875,
|
|
"loss": 1.0327,
|
|
"margin_dpo/margin_mean": 86.78329467773438,
|
|
"margin_dpo/margin_std": 104.9678955078125,
|
|
"step": 157
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -84.41264343261719,
|
|
"KL/mean": -131.40390014648438,
|
|
"KL/rejected_KL_mean": -178.39517211914062,
|
|
"KL/std": 117.00088500976562,
|
|
"epoch": 0.23201174743024963,
|
|
"fcm_dpo/beta": 0.0057478612288832664,
|
|
"fcm_dpo/delta": -0.14822149276733398,
|
|
"fcm_dpo/margin": 93.98252868652344,
|
|
"fcm_dpo/q_t": 0.38809406757354736,
|
|
"grad_norm": 17.88888931274414,
|
|
"learning_rate": 4.7492306773041136e-07,
|
|
"logits/chosen": -0.3769122362136841,
|
|
"logits/rejected": -0.3925984501838684,
|
|
"logps/chosen": -142.02557373046875,
|
|
"logps/ref_chosen": -57.612918853759766,
|
|
"logps/ref_rejected": -113.6946792602539,
|
|
"logps/rejected": -292.08984375,
|
|
"loss": 1.0569,
|
|
"margin_dpo/margin_mean": 93.98252868652344,
|
|
"margin_dpo/margin_std": 142.27569580078125,
|
|
"step": 158
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -95.15133666992188,
|
|
"KL/mean": -126.28166198730469,
|
|
"KL/rejected_KL_mean": -157.41195678710938,
|
|
"KL/std": 101.34651184082031,
|
|
"epoch": 0.23348017621145375,
|
|
"fcm_dpo/beta": 0.00576662877574563,
|
|
"fcm_dpo/delta": 0.04188086465001106,
|
|
"fcm_dpo/margin": 62.26060485839844,
|
|
"fcm_dpo/q_t": 0.41926220059394836,
|
|
"grad_norm": 26.526140213012695,
|
|
"learning_rate": 4.743599013306165e-07,
|
|
"logits/chosen": -0.4394528865814209,
|
|
"logits/rejected": -0.4065374433994293,
|
|
"logps/chosen": -176.71168518066406,
|
|
"logps/ref_chosen": -81.56034851074219,
|
|
"logps/ref_rejected": -88.89871215820312,
|
|
"logps/rejected": -246.3106689453125,
|
|
"loss": 1.1579,
|
|
"margin_dpo/margin_mean": 62.26060485839844,
|
|
"margin_dpo/margin_std": 111.88368225097656,
|
|
"step": 159
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -97.3049087524414,
|
|
"KL/mean": -140.1513671875,
|
|
"KL/rejected_KL_mean": -182.99783325195312,
|
|
"KL/std": 107.30947875976562,
|
|
"epoch": 0.23494860499265785,
|
|
"fcm_dpo/beta": 0.005649491213262081,
|
|
"fcm_dpo/delta": -0.08930858224630356,
|
|
"fcm_dpo/margin": 85.69293212890625,
|
|
"fcm_dpo/q_t": 0.3967619240283966,
|
|
"grad_norm": 20.257490158081055,
|
|
"learning_rate": 4.737908228387656e-07,
|
|
"logits/chosen": -0.38310354948043823,
|
|
"logits/rejected": -0.37118303775787354,
|
|
"logps/chosen": -163.03579711914062,
|
|
"logps/ref_chosen": -65.73088073730469,
|
|
"logps/ref_rejected": -97.21781921386719,
|
|
"logps/rejected": -280.21563720703125,
|
|
"loss": 1.0875,
|
|
"margin_dpo/margin_mean": 85.69292449951172,
|
|
"margin_dpo/margin_std": 134.3516387939453,
|
|
"step": 160
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -83.62994384765625,
|
|
"KL/mean": -121.65289306640625,
|
|
"KL/rejected_KL_mean": -159.6758575439453,
|
|
"KL/std": 88.20482635498047,
|
|
"epoch": 0.23641703377386197,
|
|
"fcm_dpo/beta": 0.005624156445264816,
|
|
"fcm_dpo/delta": -0.028947748243808746,
|
|
"fcm_dpo/margin": 76.04591369628906,
|
|
"fcm_dpo/q_t": 0.40480250120162964,
|
|
"grad_norm": 17.573366165161133,
|
|
"learning_rate": 4.7321584725060594e-07,
|
|
"logits/chosen": -0.4291399121284485,
|
|
"logits/rejected": -0.4256962835788727,
|
|
"logps/chosen": -136.06640625,
|
|
"logps/ref_chosen": -52.43647003173828,
|
|
"logps/ref_rejected": -83.43095397949219,
|
|
"logps/rejected": -243.1068115234375,
|
|
"loss": 1.0919,
|
|
"margin_dpo/margin_mean": 76.04591369628906,
|
|
"margin_dpo/margin_std": 109.95503234863281,
|
|
"step": 161
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -82.52705383300781,
|
|
"KL/mean": -119.3626708984375,
|
|
"KL/rejected_KL_mean": -156.19830322265625,
|
|
"KL/std": 96.73245239257812,
|
|
"epoch": 0.23788546255506607,
|
|
"fcm_dpo/beta": 0.005556900054216385,
|
|
"fcm_dpo/delta": -0.011652916669845581,
|
|
"fcm_dpo/margin": 73.67125701904297,
|
|
"fcm_dpo/q_t": 0.40978431701660156,
|
|
"grad_norm": 20.28093147277832,
|
|
"learning_rate": 4.7263498971727905e-07,
|
|
"logits/chosen": -0.44169116020202637,
|
|
"logits/rejected": -0.42343568801879883,
|
|
"logps/chosen": -145.13763427734375,
|
|
"logps/ref_chosen": -62.6105842590332,
|
|
"logps/ref_rejected": -89.39057922363281,
|
|
"logps/rejected": -245.58888244628906,
|
|
"loss": 1.1189,
|
|
"margin_dpo/margin_mean": 73.67125701904297,
|
|
"margin_dpo/margin_std": 116.21994018554688,
|
|
"step": 162
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -91.48970794677734,
|
|
"KL/mean": -127.75386810302734,
|
|
"KL/rejected_KL_mean": -164.01803588867188,
|
|
"KL/std": 96.89871215820312,
|
|
"epoch": 0.2393538913362702,
|
|
"fcm_dpo/beta": 0.005600422620773315,
|
|
"fcm_dpo/delta": -0.006450829096138477,
|
|
"fcm_dpo/margin": 72.5283203125,
|
|
"fcm_dpo/q_t": 0.4107089638710022,
|
|
"grad_norm": 19.475297927856445,
|
|
"learning_rate": 4.720482655449212e-07,
|
|
"logits/chosen": -0.38897454738616943,
|
|
"logits/rejected": -0.3698977828025818,
|
|
"logps/chosen": -146.51133728027344,
|
|
"logps/ref_chosen": -55.021629333496094,
|
|
"logps/ref_rejected": -75.418212890625,
|
|
"logps/rejected": -239.43624877929688,
|
|
"loss": 1.1174,
|
|
"margin_dpo/margin_mean": 72.5283203125,
|
|
"margin_dpo/margin_std": 116.68798828125,
|
|
"step": 163
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -84.55402374267578,
|
|
"KL/mean": -128.27662658691406,
|
|
"KL/rejected_KL_mean": -171.9992218017578,
|
|
"KL/std": 93.24118041992188,
|
|
"epoch": 0.24082232011747431,
|
|
"fcm_dpo/beta": 0.00548876728862524,
|
|
"fcm_dpo/delta": -0.08558943122625351,
|
|
"fcm_dpo/margin": 87.4451904296875,
|
|
"fcm_dpo/q_t": 0.39110738039016724,
|
|
"grad_norm": 19.807645797729492,
|
|
"learning_rate": 4.714556901942599e-07,
|
|
"logits/chosen": -0.36497557163238525,
|
|
"logits/rejected": -0.34768325090408325,
|
|
"logps/chosen": -140.1947021484375,
|
|
"logps/ref_chosen": -55.64066696166992,
|
|
"logps/ref_rejected": -79.66463470458984,
|
|
"logps/rejected": -251.66384887695312,
|
|
"loss": 1.0448,
|
|
"margin_dpo/margin_mean": 87.44519805908203,
|
|
"margin_dpo/margin_std": 107.89261627197266,
|
|
"step": 164
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -92.47479248046875,
|
|
"KL/mean": -121.87619018554688,
|
|
"KL/rejected_KL_mean": -151.27755737304688,
|
|
"KL/std": 80.65908813476562,
|
|
"epoch": 0.2422907488986784,
|
|
"fcm_dpo/beta": 0.005550094414502382,
|
|
"fcm_dpo/delta": 0.07617515325546265,
|
|
"fcm_dpo/margin": 58.802772521972656,
|
|
"fcm_dpo/q_t": 0.4268398880958557,
|
|
"grad_norm": 22.88750457763672,
|
|
"learning_rate": 4.708572792802069e-07,
|
|
"logits/chosen": -0.39553022384643555,
|
|
"logits/rejected": -0.36636269092559814,
|
|
"logps/chosen": -153.78549194335938,
|
|
"logps/ref_chosen": -61.310691833496094,
|
|
"logps/ref_rejected": -73.67060852050781,
|
|
"logps/rejected": -224.9481658935547,
|
|
"loss": 1.1709,
|
|
"margin_dpo/margin_mean": 58.802772521972656,
|
|
"margin_dpo/margin_std": 107.60116577148438,
|
|
"step": 165
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -83.616455078125,
|
|
"KL/mean": -137.0757293701172,
|
|
"KL/rejected_KL_mean": -190.53501892089844,
|
|
"KL/std": 119.21273803710938,
|
|
"epoch": 0.24375917767988253,
|
|
"fcm_dpo/beta": 0.005407451651990414,
|
|
"fcm_dpo/delta": -0.1903567910194397,
|
|
"fcm_dpo/margin": 106.9185562133789,
|
|
"fcm_dpo/q_t": 0.38125768303871155,
|
|
"grad_norm": 17.94371223449707,
|
|
"learning_rate": 4.702530485714461e-07,
|
|
"logits/chosen": -0.3321695327758789,
|
|
"logits/rejected": -0.3424978256225586,
|
|
"logps/chosen": -134.6000518798828,
|
|
"logps/ref_chosen": -50.98360061645508,
|
|
"logps/ref_rejected": -98.09512329101562,
|
|
"logps/rejected": -288.630126953125,
|
|
"loss": 1.0224,
|
|
"margin_dpo/margin_mean": 106.91854858398438,
|
|
"margin_dpo/margin_std": 150.0036163330078,
|
|
"step": 166
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -84.20343780517578,
|
|
"KL/mean": -139.89492797851562,
|
|
"KL/rejected_KL_mean": -195.58641052246094,
|
|
"KL/std": 104.5910873413086,
|
|
"epoch": 0.24522760646108663,
|
|
"fcm_dpo/beta": 0.0052237361669540405,
|
|
"fcm_dpo/delta": -0.19318926334381104,
|
|
"fcm_dpo/margin": 111.38297271728516,
|
|
"fcm_dpo/q_t": 0.36922937631607056,
|
|
"grad_norm": 17.214645385742188,
|
|
"learning_rate": 4.6964301399001877e-07,
|
|
"logits/chosen": -0.3567941188812256,
|
|
"logits/rejected": -0.36129891872406006,
|
|
"logps/chosen": -134.62753295898438,
|
|
"logps/ref_chosen": -50.424095153808594,
|
|
"logps/ref_rejected": -96.03042602539062,
|
|
"logps/rejected": -291.6168212890625,
|
|
"loss": 0.9761,
|
|
"margin_dpo/margin_mean": 111.38297271728516,
|
|
"margin_dpo/margin_std": 121.13790893554688,
|
|
"step": 167
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -92.93460083007812,
|
|
"KL/mean": -133.62405395507812,
|
|
"KL/rejected_KL_mean": -174.31350708007812,
|
|
"KL/std": 100.2225341796875,
|
|
"epoch": 0.24669603524229075,
|
|
"fcm_dpo/beta": 0.005147742573171854,
|
|
"fcm_dpo/delta": -0.019897453486919403,
|
|
"fcm_dpo/margin": 81.37892150878906,
|
|
"fcm_dpo/q_t": 0.40599554777145386,
|
|
"grad_norm": 20.31831932067871,
|
|
"learning_rate": 4.690271916109034e-07,
|
|
"logits/chosen": -0.3324066996574402,
|
|
"logits/rejected": -0.32228702306747437,
|
|
"logps/chosen": -142.39743041992188,
|
|
"logps/ref_chosen": -49.462825775146484,
|
|
"logps/ref_rejected": -75.30855560302734,
|
|
"logps/rejected": -249.6220703125,
|
|
"loss": 1.0879,
|
|
"margin_dpo/margin_mean": 81.37892150878906,
|
|
"margin_dpo/margin_std": 113.19637298583984,
|
|
"step": 168
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -97.10243225097656,
|
|
"KL/mean": -133.62738037109375,
|
|
"KL/rejected_KL_mean": -170.15234375,
|
|
"KL/std": 102.53475952148438,
|
|
"epoch": 0.24816446402349487,
|
|
"fcm_dpo/beta": 0.005072026047855616,
|
|
"fcm_dpo/delta": -0.08044641464948654,
|
|
"fcm_dpo/margin": 73.04991149902344,
|
|
"fcm_dpo/q_t": 0.42030519247055054,
|
|
"grad_norm": 20.161535263061523,
|
|
"learning_rate": 4.6840559766159235e-07,
|
|
"logits/chosen": -0.3249385356903076,
|
|
"logits/rejected": -0.3059506416320801,
|
|
"logps/chosen": -156.9058837890625,
|
|
"logps/ref_chosen": -59.803443908691406,
|
|
"logps/ref_rejected": -83.34574890136719,
|
|
"logps/rejected": -253.49807739257812,
|
|
"loss": 1.168,
|
|
"margin_dpo/margin_mean": 73.04991149902344,
|
|
"margin_dpo/margin_std": 141.12490844726562,
|
|
"step": 169
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -85.55194091796875,
|
|
"KL/mean": -127.24555206298828,
|
|
"KL/rejected_KL_mean": -168.9391632080078,
|
|
"KL/std": 97.04989624023438,
|
|
"epoch": 0.24963289280469897,
|
|
"fcm_dpo/beta": 0.005024762358516455,
|
|
"fcm_dpo/delta": -0.02076905593276024,
|
|
"fcm_dpo/margin": 83.38722229003906,
|
|
"fcm_dpo/q_t": 0.40413689613342285,
|
|
"grad_norm": 17.05228042602539,
|
|
"learning_rate": 4.6777824852166437e-07,
|
|
"logits/chosen": -0.3223455548286438,
|
|
"logits/rejected": -0.3122260272502899,
|
|
"logps/chosen": -135.02371215820312,
|
|
"logps/ref_chosen": -49.471771240234375,
|
|
"logps/ref_rejected": -75.91734313964844,
|
|
"logps/rejected": -244.85650634765625,
|
|
"loss": 1.0942,
|
|
"margin_dpo/margin_mean": 83.38722229003906,
|
|
"margin_dpo/margin_std": 117.27194213867188,
|
|
"step": 170
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -121.26353454589844,
|
|
"KL/mean": -156.89358520507812,
|
|
"KL/rejected_KL_mean": -192.5236358642578,
|
|
"KL/std": 110.09349060058594,
|
|
"epoch": 0.2511013215859031,
|
|
"fcm_dpo/beta": 0.00508046243339777,
|
|
"fcm_dpo/delta": 0.039394039660692215,
|
|
"fcm_dpo/margin": 71.26011657714844,
|
|
"fcm_dpo/q_t": 0.42305469512939453,
|
|
"grad_norm": 28.202070236206055,
|
|
"learning_rate": 4.6714516072235273e-07,
|
|
"logits/chosen": -0.31354865431785583,
|
|
"logits/rejected": -0.29599112272262573,
|
|
"logps/chosen": -205.76284790039062,
|
|
"logps/ref_chosen": -84.49931335449219,
|
|
"logps/ref_rejected": -109.38209533691406,
|
|
"logps/rejected": -301.9057312011719,
|
|
"loss": 1.1705,
|
|
"margin_dpo/margin_mean": 71.26011657714844,
|
|
"margin_dpo/margin_std": 142.1697235107422,
|
|
"step": 171
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -104.97297668457031,
|
|
"KL/mean": -141.43255615234375,
|
|
"KL/rejected_KL_mean": -177.89212036132812,
|
|
"KL/std": 106.18240356445312,
|
|
"epoch": 0.2525697503671072,
|
|
"fcm_dpo/beta": 0.005108260549604893,
|
|
"fcm_dpo/delta": 0.028583845123648643,
|
|
"fcm_dpo/margin": 72.91913604736328,
|
|
"fcm_dpo/q_t": 0.41656991839408875,
|
|
"grad_norm": 19.106325149536133,
|
|
"learning_rate": 4.6650635094610966e-07,
|
|
"logits/chosen": -0.38692790269851685,
|
|
"logits/rejected": -0.36780279874801636,
|
|
"logps/chosen": -173.62689208984375,
|
|
"logps/ref_chosen": -68.65391540527344,
|
|
"logps/ref_rejected": -85.43667602539062,
|
|
"logps/rejected": -263.32879638671875,
|
|
"loss": 1.1391,
|
|
"margin_dpo/margin_mean": 72.91913604736328,
|
|
"margin_dpo/margin_std": 123.32398986816406,
|
|
"step": 172
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -95.9285659790039,
|
|
"KL/mean": -132.39529418945312,
|
|
"KL/rejected_KL_mean": -168.86203002929688,
|
|
"KL/std": 101.62166595458984,
|
|
"epoch": 0.2540381791483113,
|
|
"fcm_dpo/beta": 0.005157306790351868,
|
|
"fcm_dpo/delta": 0.0244424007833004,
|
|
"fcm_dpo/margin": 72.93345642089844,
|
|
"fcm_dpo/q_t": 0.4147951602935791,
|
|
"grad_norm": 20.805395126342773,
|
|
"learning_rate": 4.6586183602616687e-07,
|
|
"logits/chosen": -0.38531604409217834,
|
|
"logits/rejected": -0.3533366620540619,
|
|
"logps/chosen": -158.9794464111328,
|
|
"logps/ref_chosen": -63.050880432128906,
|
|
"logps/ref_rejected": -78.68392181396484,
|
|
"logps/rejected": -247.5459442138672,
|
|
"loss": 1.1146,
|
|
"margin_dpo/margin_mean": 72.93345642089844,
|
|
"margin_dpo/margin_std": 106.28599548339844,
|
|
"step": 173
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -89.95355224609375,
|
|
"KL/mean": -134.23170471191406,
|
|
"KL/rejected_KL_mean": -178.50985717773438,
|
|
"KL/std": 104.74340057373047,
|
|
"epoch": 0.2555066079295154,
|
|
"fcm_dpo/beta": 0.005131464451551437,
|
|
"fcm_dpo/delta": -0.05737413465976715,
|
|
"fcm_dpo/margin": 88.55628967285156,
|
|
"fcm_dpo/q_t": 0.3998154103755951,
|
|
"grad_norm": 24.817533493041992,
|
|
"learning_rate": 4.652116329460919e-07,
|
|
"logits/chosen": -0.3375644087791443,
|
|
"logits/rejected": -0.35590213537216187,
|
|
"logps/chosen": -143.3165283203125,
|
|
"logps/ref_chosen": -53.36296844482422,
|
|
"logps/ref_rejected": -101.91120910644531,
|
|
"logps/rejected": -280.42108154296875,
|
|
"loss": 1.0809,
|
|
"margin_dpo/margin_mean": 88.55628967285156,
|
|
"margin_dpo/margin_std": 126.68205261230469,
|
|
"step": 174
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -84.00241088867188,
|
|
"KL/mean": -142.2693328857422,
|
|
"KL/rejected_KL_mean": -200.5362548828125,
|
|
"KL/std": 111.97515869140625,
|
|
"epoch": 0.25697503671071953,
|
|
"fcm_dpo/beta": 0.004940693732351065,
|
|
"fcm_dpo/delta": -0.18679270148277283,
|
|
"fcm_dpo/margin": 116.53382873535156,
|
|
"fcm_dpo/q_t": 0.36849379539489746,
|
|
"grad_norm": 25.079553604125977,
|
|
"learning_rate": 4.645557588393406e-07,
|
|
"logits/chosen": -0.3228433430194855,
|
|
"logits/rejected": -0.3084716796875,
|
|
"logps/chosen": -129.42018127441406,
|
|
"logps/ref_chosen": -45.417762756347656,
|
|
"logps/ref_rejected": -89.50579833984375,
|
|
"logps/rejected": -290.04205322265625,
|
|
"loss": 0.9676,
|
|
"margin_dpo/margin_mean": 116.53382873535156,
|
|
"margin_dpo/margin_std": 118.0002212524414,
|
|
"step": 175
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -88.73968505859375,
|
|
"KL/mean": -139.49606323242188,
|
|
"KL/rejected_KL_mean": -190.25241088867188,
|
|
"KL/std": 112.8240966796875,
|
|
"epoch": 0.25844346549192365,
|
|
"fcm_dpo/beta": 0.004837565589696169,
|
|
"fcm_dpo/delta": -0.09568466246128082,
|
|
"fcm_dpo/margin": 101.51274108886719,
|
|
"fcm_dpo/q_t": 0.392004132270813,
|
|
"grad_norm": 18.32849884033203,
|
|
"learning_rate": 4.638942309888058e-07,
|
|
"logits/chosen": -0.3147198557853699,
|
|
"logits/rejected": -0.332048237323761,
|
|
"logps/chosen": -139.19252014160156,
|
|
"logps/ref_chosen": -50.452842712402344,
|
|
"logps/ref_rejected": -95.5589599609375,
|
|
"logps/rejected": -285.8114013671875,
|
|
"loss": 1.0415,
|
|
"margin_dpo/margin_mean": 101.51274108886719,
|
|
"margin_dpo/margin_std": 129.95030212402344,
|
|
"step": 176
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -102.09382629394531,
|
|
"KL/mean": -152.91012573242188,
|
|
"KL/rejected_KL_mean": -203.72640991210938,
|
|
"KL/std": 122.18125915527344,
|
|
"epoch": 0.2599118942731278,
|
|
"fcm_dpo/beta": 0.004758263938128948,
|
|
"fcm_dpo/delta": -0.0877579003572464,
|
|
"fcm_dpo/margin": 101.63257598876953,
|
|
"fcm_dpo/q_t": 0.3923775553703308,
|
|
"grad_norm": 19.02363395690918,
|
|
"learning_rate": 4.6322706682636137e-07,
|
|
"logits/chosen": -0.3660031855106354,
|
|
"logits/rejected": -0.3579842448234558,
|
|
"logps/chosen": -163.310302734375,
|
|
"logps/ref_chosen": -61.216468811035156,
|
|
"logps/ref_rejected": -95.89378356933594,
|
|
"logps/rejected": -299.62017822265625,
|
|
"loss": 1.0435,
|
|
"margin_dpo/margin_mean": 101.63257598876953,
|
|
"margin_dpo/margin_std": 129.4840545654297,
|
|
"step": 177
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -114.02023315429688,
|
|
"KL/mean": -175.93092346191406,
|
|
"KL/rejected_KL_mean": -237.8416290283203,
|
|
"KL/std": 142.59483337402344,
|
|
"epoch": 0.26138032305433184,
|
|
"fcm_dpo/beta": 0.004583236761391163,
|
|
"fcm_dpo/delta": -0.17850404977798462,
|
|
"fcm_dpo/margin": 123.82139587402344,
|
|
"fcm_dpo/q_t": 0.3769300878047943,
|
|
"grad_norm": 22.4151554107666,
|
|
"learning_rate": 4.6255428393240354e-07,
|
|
"logits/chosen": -0.2533833086490631,
|
|
"logits/rejected": -0.24754983186721802,
|
|
"logps/chosen": -172.28501892089844,
|
|
"logps/ref_chosen": -58.26478958129883,
|
|
"logps/ref_rejected": -105.3653335571289,
|
|
"logps/rejected": -343.20697021484375,
|
|
"loss": 1.0058,
|
|
"margin_dpo/margin_mean": 123.82139587402344,
|
|
"margin_dpo/margin_std": 155.78990173339844,
|
|
"step": 178
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -119.15005493164062,
|
|
"KL/mean": -162.2534637451172,
|
|
"KL/rejected_KL_mean": -205.35687255859375,
|
|
"KL/std": 122.278076171875,
|
|
"epoch": 0.26284875183553597,
|
|
"fcm_dpo/beta": 0.004542763344943523,
|
|
"fcm_dpo/delta": 0.008336875587701797,
|
|
"fcm_dpo/margin": 86.20682525634766,
|
|
"fcm_dpo/q_t": 0.4141519069671631,
|
|
"grad_norm": 29.362442016601562,
|
|
"learning_rate": 4.6187590003538724e-07,
|
|
"logits/chosen": -0.287581205368042,
|
|
"logits/rejected": -0.29421767592430115,
|
|
"logps/chosen": -180.2083740234375,
|
|
"logps/ref_chosen": -61.05832290649414,
|
|
"logps/ref_rejected": -90.52782440185547,
|
|
"logps/rejected": -295.88470458984375,
|
|
"loss": 1.146,
|
|
"margin_dpo/margin_mean": 86.20682525634766,
|
|
"margin_dpo/margin_std": 153.45443725585938,
|
|
"step": 179
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -102.34881591796875,
|
|
"KL/mean": -159.50070190429688,
|
|
"KL/rejected_KL_mean": -216.65260314941406,
|
|
"KL/std": 110.17591857910156,
|
|
"epoch": 0.2643171806167401,
|
|
"fcm_dpo/beta": 0.004480388015508652,
|
|
"fcm_dpo/delta": -0.11835239082574844,
|
|
"fcm_dpo/margin": 114.30378723144531,
|
|
"fcm_dpo/q_t": 0.38382506370544434,
|
|
"grad_norm": 20.13519859313965,
|
|
"learning_rate": 4.611919330113591e-07,
|
|
"logits/chosen": -0.27647683024406433,
|
|
"logits/rejected": -0.27314233779907227,
|
|
"logps/chosen": -156.6915283203125,
|
|
"logps/ref_chosen": -54.34272003173828,
|
|
"logps/ref_rejected": -98.21183776855469,
|
|
"logps/rejected": -314.86444091796875,
|
|
"loss": 1.0244,
|
|
"margin_dpo/margin_mean": 114.30378723144531,
|
|
"margin_dpo/margin_std": 138.1670379638672,
|
|
"step": 180
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -88.83747100830078,
|
|
"KL/mean": -122.73734283447266,
|
|
"KL/rejected_KL_mean": -156.63722229003906,
|
|
"KL/std": 99.75308990478516,
|
|
"epoch": 0.2657856093979442,
|
|
"fcm_dpo/beta": 0.004530083388090134,
|
|
"fcm_dpo/delta": 0.0955948680639267,
|
|
"fcm_dpo/margin": 67.79975128173828,
|
|
"fcm_dpo/q_t": 0.4301533102989197,
|
|
"grad_norm": 19.06928825378418,
|
|
"learning_rate": 4.605024008834863e-07,
|
|
"logits/chosen": -0.30789846181869507,
|
|
"logits/rejected": -0.2823750972747803,
|
|
"logps/chosen": -143.83792114257812,
|
|
"logps/ref_chosen": -55.000457763671875,
|
|
"logps/ref_rejected": -61.656166076660156,
|
|
"logps/rejected": -218.29339599609375,
|
|
"loss": 1.1817,
|
|
"margin_dpo/margin_mean": 67.79975128173828,
|
|
"margin_dpo/margin_std": 127.82617950439453,
|
|
"step": 181
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -82.40182495117188,
|
|
"KL/mean": -144.41375732421875,
|
|
"KL/rejected_KL_mean": -206.42568969726562,
|
|
"KL/std": 120.55620574951172,
|
|
"epoch": 0.26725403817914833,
|
|
"fcm_dpo/beta": 0.0044230264611542225,
|
|
"fcm_dpo/delta": -0.1580391675233841,
|
|
"fcm_dpo/margin": 124.02388000488281,
|
|
"fcm_dpo/q_t": 0.37559401988983154,
|
|
"grad_norm": 17.178207397460938,
|
|
"learning_rate": 4.598073218215817e-07,
|
|
"logits/chosen": -0.27274084091186523,
|
|
"logits/rejected": -0.28051310777664185,
|
|
"logps/chosen": -123.50967407226562,
|
|
"logps/ref_chosen": -41.107852935791016,
|
|
"logps/ref_rejected": -89.5215835571289,
|
|
"logps/rejected": -295.947265625,
|
|
"loss": 1.0052,
|
|
"margin_dpo/margin_mean": 124.02387237548828,
|
|
"margin_dpo/margin_std": 144.7704620361328,
|
|
"step": 182
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -124.9429931640625,
|
|
"KL/mean": -156.95785522460938,
|
|
"KL/rejected_KL_mean": -188.97271728515625,
|
|
"KL/std": 100.42144012451172,
|
|
"epoch": 0.2687224669603524,
|
|
"fcm_dpo/beta": 0.004367251414805651,
|
|
"fcm_dpo/delta": -0.04111050069332123,
|
|
"fcm_dpo/margin": 64.02972412109375,
|
|
"fcm_dpo/q_t": 0.4345667362213135,
|
|
"grad_norm": 21.047754287719727,
|
|
"learning_rate": 4.5910671414162484e-07,
|
|
"logits/chosen": -0.3011692762374878,
|
|
"logits/rejected": -0.2902328372001648,
|
|
"logps/chosen": -182.46755981445312,
|
|
"logps/ref_chosen": -57.52456283569336,
|
|
"logps/ref_rejected": -75.97572326660156,
|
|
"logps/rejected": -264.9484558105469,
|
|
"loss": 1.1896,
|
|
"margin_dpo/margin_mean": 64.02973175048828,
|
|
"margin_dpo/margin_std": 109.3995132446289,
|
|
"step": 183
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -106.86518859863281,
|
|
"KL/mean": -141.30413818359375,
|
|
"KL/rejected_KL_mean": -175.74305725097656,
|
|
"KL/std": 97.58000183105469,
|
|
"epoch": 0.2701908957415565,
|
|
"fcm_dpo/beta": 0.00436544232070446,
|
|
"fcm_dpo/delta": -0.0041433474980294704,
|
|
"fcm_dpo/margin": 68.87787628173828,
|
|
"fcm_dpo/q_t": 0.4317898750305176,
|
|
"grad_norm": 17.791168212890625,
|
|
"learning_rate": 4.5840059630527985e-07,
|
|
"logits/chosen": -0.33284837007522583,
|
|
"logits/rejected": -0.32246139645576477,
|
|
"logps/chosen": -165.41015625,
|
|
"logps/ref_chosen": -58.544952392578125,
|
|
"logps/ref_rejected": -76.63406372070312,
|
|
"logps/rejected": -252.37713623046875,
|
|
"loss": 1.1784,
|
|
"margin_dpo/margin_mean": 68.87787628173828,
|
|
"margin_dpo/margin_std": 122.99388122558594,
|
|
"step": 184
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -114.82501983642578,
|
|
"KL/mean": -141.30067443847656,
|
|
"KL/rejected_KL_mean": -167.77633666992188,
|
|
"KL/std": 109.46660614013672,
|
|
"epoch": 0.27165932452276065,
|
|
"fcm_dpo/beta": 0.004431641660630703,
|
|
"fcm_dpo/delta": 0.0773247703909874,
|
|
"fcm_dpo/margin": 52.95130157470703,
|
|
"fcm_dpo/q_t": 0.44948315620422363,
|
|
"grad_norm": 19.48412322998047,
|
|
"learning_rate": 4.5768898691940836e-07,
|
|
"logits/chosen": -0.2981659173965454,
|
|
"logits/rejected": -0.2731373608112335,
|
|
"logps/chosen": -176.85086059570312,
|
|
"logps/ref_chosen": -62.025848388671875,
|
|
"logps/ref_rejected": -73.7625961303711,
|
|
"logps/rejected": -241.5389404296875,
|
|
"loss": 1.2441,
|
|
"margin_dpo/margin_mean": 52.9513053894043,
|
|
"margin_dpo/margin_std": 132.49623107910156,
|
|
"step": 185
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -105.06900787353516,
|
|
"KL/mean": -157.23483276367188,
|
|
"KL/rejected_KL_mean": -209.400634765625,
|
|
"KL/std": 109.59912109375,
|
|
"epoch": 0.27312775330396477,
|
|
"fcm_dpo/beta": 0.004385577980428934,
|
|
"fcm_dpo/delta": -0.06044544652104378,
|
|
"fcm_dpo/margin": 104.33164978027344,
|
|
"fcm_dpo/q_t": 0.39578211307525635,
|
|
"grad_norm": 20.40852928161621,
|
|
"learning_rate": 4.5697190473557947e-07,
|
|
"logits/chosen": -0.3577519655227661,
|
|
"logits/rejected": -0.3329923450946808,
|
|
"logps/chosen": -174.4224853515625,
|
|
"logps/ref_chosen": -69.35346984863281,
|
|
"logps/ref_rejected": -88.07244873046875,
|
|
"logps/rejected": -297.47308349609375,
|
|
"loss": 1.0496,
|
|
"margin_dpo/margin_mean": 104.33164978027344,
|
|
"margin_dpo/margin_std": 125.84209442138672,
|
|
"step": 186
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -99.39851379394531,
|
|
"KL/mean": -143.77651977539062,
|
|
"KL/rejected_KL_mean": -188.154541015625,
|
|
"KL/std": 107.75646209716797,
|
|
"epoch": 0.2745961820851689,
|
|
"fcm_dpo/beta": 0.004401649348437786,
|
|
"fcm_dpo/delta": 0.00935973972082138,
|
|
"fcm_dpo/margin": 88.75602722167969,
|
|
"fcm_dpo/q_t": 0.4099273979663849,
|
|
"grad_norm": 21.53246307373047,
|
|
"learning_rate": 4.5624936864957555e-07,
|
|
"logits/chosen": -0.3320094645023346,
|
|
"logits/rejected": -0.32622426748275757,
|
|
"logps/chosen": -152.15496826171875,
|
|
"logps/ref_chosen": -52.7564582824707,
|
|
"logps/ref_rejected": -81.96910095214844,
|
|
"logps/rejected": -270.1236267089844,
|
|
"loss": 1.0945,
|
|
"margin_dpo/margin_mean": 88.75602722167969,
|
|
"margin_dpo/margin_std": 114.76980590820312,
|
|
"step": 187
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -93.59693908691406,
|
|
"KL/mean": -147.21905517578125,
|
|
"KL/rejected_KL_mean": -200.8411865234375,
|
|
"KL/std": 118.67021179199219,
|
|
"epoch": 0.27606461086637296,
|
|
"fcm_dpo/beta": 0.004345991648733616,
|
|
"fcm_dpo/delta": -0.0692645013332367,
|
|
"fcm_dpo/margin": 107.2442626953125,
|
|
"fcm_dpo/q_t": 0.39470282196998596,
|
|
"grad_norm": 31.087356567382812,
|
|
"learning_rate": 4.5552139770089454e-07,
|
|
"logits/chosen": -0.328615665435791,
|
|
"logits/rejected": -0.3348105847835541,
|
|
"logps/chosen": -143.01242065429688,
|
|
"logps/ref_chosen": -49.415489196777344,
|
|
"logps/ref_rejected": -89.54043579101562,
|
|
"logps/rejected": -290.3816223144531,
|
|
"loss": 1.0482,
|
|
"margin_dpo/margin_mean": 107.2442626953125,
|
|
"margin_dpo/margin_std": 131.91983032226562,
|
|
"step": 188
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -109.96575927734375,
|
|
"KL/mean": -154.14344787597656,
|
|
"KL/rejected_KL_mean": -198.32113647460938,
|
|
"KL/std": 121.00015258789062,
|
|
"epoch": 0.2775330396475771,
|
|
"fcm_dpo/beta": 0.0043410686776041985,
|
|
"fcm_dpo/delta": 0.01706843078136444,
|
|
"fcm_dpo/margin": 88.35537719726562,
|
|
"fcm_dpo/q_t": 0.41580936312675476,
|
|
"grad_norm": 24.516483306884766,
|
|
"learning_rate": 4.5478801107224794e-07,
|
|
"logits/chosen": -0.31679028272628784,
|
|
"logits/rejected": -0.29732340574264526,
|
|
"logps/chosen": -162.36471557617188,
|
|
"logps/ref_chosen": -52.39896011352539,
|
|
"logps/ref_rejected": -72.16735076904297,
|
|
"logps/rejected": -270.4884948730469,
|
|
"loss": 1.1315,
|
|
"margin_dpo/margin_mean": 88.35537719726562,
|
|
"margin_dpo/margin_std": 149.4475860595703,
|
|
"step": 189
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -119.08251953125,
|
|
"KL/mean": -171.51473999023438,
|
|
"KL/rejected_KL_mean": -223.94699096679688,
|
|
"KL/std": 130.3875732421875,
|
|
"epoch": 0.2790014684287812,
|
|
"fcm_dpo/beta": 0.004339671693742275,
|
|
"fcm_dpo/delta": -0.058640651404857635,
|
|
"fcm_dpo/margin": 104.86446380615234,
|
|
"fcm_dpo/q_t": 0.39942899346351624,
|
|
"grad_norm": 18.538650512695312,
|
|
"learning_rate": 4.5404922808905543e-07,
|
|
"logits/chosen": -0.3367846608161926,
|
|
"logits/rejected": -0.32651811838150024,
|
|
"logps/chosen": -183.76559448242188,
|
|
"logps/ref_chosen": -64.68305969238281,
|
|
"logps/ref_rejected": -102.55052185058594,
|
|
"logps/rejected": -326.49749755859375,
|
|
"loss": 1.0841,
|
|
"margin_dpo/margin_mean": 104.86446380615234,
|
|
"margin_dpo/margin_std": 149.67710876464844,
|
|
"step": 190
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -109.22671508789062,
|
|
"KL/mean": -184.44924926757812,
|
|
"KL/rejected_KL_mean": -259.6717834472656,
|
|
"KL/std": 148.47653198242188,
|
|
"epoch": 0.28046989720998533,
|
|
"fcm_dpo/beta": 0.004123254679143429,
|
|
"fcm_dpo/delta": -0.2364530861377716,
|
|
"fcm_dpo/margin": 150.44508361816406,
|
|
"fcm_dpo/q_t": 0.36313188076019287,
|
|
"grad_norm": 19.302614212036133,
|
|
"learning_rate": 4.5330506821893565e-07,
|
|
"logits/chosen": -0.31450676918029785,
|
|
"logits/rejected": -0.2896798849105835,
|
|
"logps/chosen": -177.88558959960938,
|
|
"logps/ref_chosen": -68.65887451171875,
|
|
"logps/ref_rejected": -110.1396713256836,
|
|
"logps/rejected": -369.81146240234375,
|
|
"loss": 0.9577,
|
|
"margin_dpo/margin_mean": 150.44508361816406,
|
|
"margin_dpo/margin_std": 164.7996826171875,
|
|
"step": 191
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -138.94046020507812,
|
|
"KL/mean": -188.46331787109375,
|
|
"KL/rejected_KL_mean": -237.9861602783203,
|
|
"KL/std": 130.4176025390625,
|
|
"epoch": 0.28193832599118945,
|
|
"fcm_dpo/beta": 0.004099044483155012,
|
|
"fcm_dpo/delta": -0.0063680801540613174,
|
|
"fcm_dpo/margin": 99.04568481445312,
|
|
"fcm_dpo/q_t": 0.4107716679573059,
|
|
"grad_norm": 21.972942352294922,
|
|
"learning_rate": 4.5255555107119336e-07,
|
|
"logits/chosen": -0.28979045152664185,
|
|
"logits/rejected": -0.2879485487937927,
|
|
"logps/chosen": -208.66738891601562,
|
|
"logps/ref_chosen": -69.72691345214844,
|
|
"logps/ref_rejected": -103.32135009765625,
|
|
"logps/rejected": -341.3074951171875,
|
|
"loss": 1.1173,
|
|
"margin_dpo/margin_mean": 99.04568481445312,
|
|
"margin_dpo/margin_std": 159.619140625,
|
|
"step": 192
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -135.95660400390625,
|
|
"KL/mean": -166.56283569335938,
|
|
"KL/rejected_KL_mean": -197.16908264160156,
|
|
"KL/std": 121.18572998046875,
|
|
"epoch": 0.2834067547723935,
|
|
"fcm_dpo/beta": 0.004099993035197258,
|
|
"fcm_dpo/delta": 0.03158862516283989,
|
|
"fcm_dpo/margin": 61.212486267089844,
|
|
"fcm_dpo/q_t": 0.4411364793777466,
|
|
"grad_norm": 28.599891662597656,
|
|
"learning_rate": 4.5180069639630236e-07,
|
|
"logits/chosen": -0.3054850101470947,
|
|
"logits/rejected": -0.2954953908920288,
|
|
"logps/chosen": -196.1470947265625,
|
|
"logps/ref_chosen": -60.19049835205078,
|
|
"logps/ref_rejected": -76.40755462646484,
|
|
"logps/rejected": -273.5766296386719,
|
|
"loss": 1.2463,
|
|
"margin_dpo/margin_mean": 61.21249008178711,
|
|
"margin_dpo/margin_std": 154.33920288085938,
|
|
"step": 193
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -86.49054718017578,
|
|
"KL/mean": -132.54513549804688,
|
|
"KL/rejected_KL_mean": -178.5997314453125,
|
|
"KL/std": 100.00372314453125,
|
|
"epoch": 0.28487518355359764,
|
|
"fcm_dpo/beta": 0.004112754482775927,
|
|
"fcm_dpo/delta": 0.02185986563563347,
|
|
"fcm_dpo/margin": 92.10918426513672,
|
|
"fcm_dpo/q_t": 0.4111691117286682,
|
|
"grad_norm": 17.3194637298584,
|
|
"learning_rate": 4.510405240853854e-07,
|
|
"logits/chosen": -0.22373536229133606,
|
|
"logits/rejected": -0.20780496299266815,
|
|
"logps/chosen": -124.33091735839844,
|
|
"logps/ref_chosen": -37.84037399291992,
|
|
"logps/ref_rejected": -60.684783935546875,
|
|
"logps/rejected": -239.28451538085938,
|
|
"loss": 1.0929,
|
|
"margin_dpo/margin_mean": 92.10918426513672,
|
|
"margin_dpo/margin_std": 109.87422943115234,
|
|
"step": 194
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -134.20848083496094,
|
|
"KL/mean": -185.5911865234375,
|
|
"KL/rejected_KL_mean": -236.97390747070312,
|
|
"KL/std": 123.36296081542969,
|
|
"epoch": 0.28634361233480177,
|
|
"fcm_dpo/beta": 0.004115342628210783,
|
|
"fcm_dpo/delta": -0.023933224380016327,
|
|
"fcm_dpo/margin": 102.76541900634766,
|
|
"fcm_dpo/q_t": 0.4027373194694519,
|
|
"grad_norm": 20.05968475341797,
|
|
"learning_rate": 4.5027505416968985e-07,
|
|
"logits/chosen": -0.2823333740234375,
|
|
"logits/rejected": -0.29932117462158203,
|
|
"logps/chosen": -189.1000518798828,
|
|
"logps/ref_chosen": -54.891571044921875,
|
|
"logps/ref_rejected": -96.77095794677734,
|
|
"logps/rejected": -333.744873046875,
|
|
"loss": 1.0699,
|
|
"margin_dpo/margin_mean": 102.76541137695312,
|
|
"margin_dpo/margin_std": 126.29493713378906,
|
|
"step": 195
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -106.2418212890625,
|
|
"KL/mean": -163.61598205566406,
|
|
"KL/rejected_KL_mean": -220.99012756347656,
|
|
"KL/std": 124.76324462890625,
|
|
"epoch": 0.2878120411160059,
|
|
"fcm_dpo/beta": 0.004045085981488228,
|
|
"fcm_dpo/delta": -0.06845034658908844,
|
|
"fcm_dpo/margin": 114.74829864501953,
|
|
"fcm_dpo/q_t": 0.39634737372398376,
|
|
"grad_norm": 16.996543884277344,
|
|
"learning_rate": 4.495043068200599e-07,
|
|
"logits/chosen": -0.3161476254463196,
|
|
"logits/rejected": -0.30336615443229675,
|
|
"logps/chosen": -159.487060546875,
|
|
"logps/ref_chosen": -53.245243072509766,
|
|
"logps/ref_rejected": -76.05294799804688,
|
|
"logps/rejected": -297.0430603027344,
|
|
"loss": 1.0639,
|
|
"margin_dpo/margin_mean": 114.74829864501953,
|
|
"margin_dpo/margin_std": 149.60369873046875,
|
|
"step": 196
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -109.41055297851562,
|
|
"KL/mean": -152.39816284179688,
|
|
"KL/rejected_KL_mean": -195.3857879638672,
|
|
"KL/std": 105.53937530517578,
|
|
"epoch": 0.28928046989721,
|
|
"fcm_dpo/beta": 0.004100443329662085,
|
|
"fcm_dpo/delta": 0.048719555139541626,
|
|
"fcm_dpo/margin": 85.9752197265625,
|
|
"fcm_dpo/q_t": 0.4195671081542969,
|
|
"grad_norm": 18.929014205932617,
|
|
"learning_rate": 4.4872830234640493e-07,
|
|
"logits/chosen": -0.2705003023147583,
|
|
"logits/rejected": -0.264984130859375,
|
|
"logps/chosen": -169.8308868408203,
|
|
"logps/ref_chosen": -60.42033386230469,
|
|
"logps/ref_rejected": -77.20890808105469,
|
|
"logps/rejected": -272.5946960449219,
|
|
"loss": 1.1245,
|
|
"margin_dpo/margin_mean": 85.9752197265625,
|
|
"margin_dpo/margin_std": 122.52574920654297,
|
|
"step": 197
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -121.74513244628906,
|
|
"KL/mean": -178.72122192382812,
|
|
"KL/rejected_KL_mean": -235.69732666015625,
|
|
"KL/std": 134.29644775390625,
|
|
"epoch": 0.2907488986784141,
|
|
"fcm_dpo/beta": 0.0040567112155258656,
|
|
"fcm_dpo/delta": -0.06527149677276611,
|
|
"fcm_dpo/margin": 113.95219421386719,
|
|
"fcm_dpo/q_t": 0.39734193682670593,
|
|
"grad_norm": 20.2078914642334,
|
|
"learning_rate": 4.479470611971645e-07,
|
|
"logits/chosen": -0.3216399848461151,
|
|
"logits/rejected": -0.3219534158706665,
|
|
"logps/chosen": -176.78131103515625,
|
|
"logps/ref_chosen": -55.03618621826172,
|
|
"logps/ref_rejected": -97.24325561523438,
|
|
"logps/rejected": -332.9405822753906,
|
|
"loss": 1.0587,
|
|
"margin_dpo/margin_mean": 113.95220184326172,
|
|
"margin_dpo/margin_std": 150.6449737548828,
|
|
"step": 198
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -117.42064666748047,
|
|
"KL/mean": -173.71978759765625,
|
|
"KL/rejected_KL_mean": -230.0189666748047,
|
|
"KL/std": 121.98558044433594,
|
|
"epoch": 0.2922173274596182,
|
|
"fcm_dpo/beta": 0.003988361917436123,
|
|
"fcm_dpo/delta": -0.05231431871652603,
|
|
"fcm_dpo/margin": 112.59829711914062,
|
|
"fcm_dpo/q_t": 0.3982279896736145,
|
|
"grad_norm": 19.83755111694336,
|
|
"learning_rate": 4.471606039587695e-07,
|
|
"logits/chosen": -0.33298349380493164,
|
|
"logits/rejected": -0.3157057762145996,
|
|
"logps/chosen": -174.24948120117188,
|
|
"logps/ref_chosen": -56.828826904296875,
|
|
"logps/ref_rejected": -84.64820861816406,
|
|
"logps/rejected": -314.66717529296875,
|
|
"loss": 1.0711,
|
|
"margin_dpo/margin_mean": 112.59829711914062,
|
|
"margin_dpo/margin_std": 149.75167846679688,
|
|
"step": 199
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -118.09941101074219,
|
|
"KL/mean": -175.55503845214844,
|
|
"KL/rejected_KL_mean": -233.01065063476562,
|
|
"KL/std": 135.21307373046875,
|
|
"epoch": 0.2936857562408223,
|
|
"fcm_dpo/beta": 0.003952971659600735,
|
|
"fcm_dpo/delta": -0.05722919851541519,
|
|
"fcm_dpo/margin": 114.91127014160156,
|
|
"fcm_dpo/q_t": 0.4002299904823303,
|
|
"grad_norm": 27.9632625579834,
|
|
"learning_rate": 4.4636895135509966e-07,
|
|
"logits/chosen": -0.2579571604728699,
|
|
"logits/rejected": -0.24183320999145508,
|
|
"logps/chosen": -171.16647338867188,
|
|
"logps/ref_chosen": -53.06706237792969,
|
|
"logps/ref_rejected": -80.60843658447266,
|
|
"logps/rejected": -313.61907958984375,
|
|
"loss": 1.0904,
|
|
"margin_dpo/margin_mean": 114.9112548828125,
|
|
"margin_dpo/margin_std": 175.34649658203125,
|
|
"step": 200
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -124.49310302734375,
|
|
"KL/mean": -181.124267578125,
|
|
"KL/rejected_KL_mean": -237.75543212890625,
|
|
"KL/std": 141.97161865234375,
|
|
"epoch": 0.29515418502202645,
|
|
"fcm_dpo/beta": 0.003938804380595684,
|
|
"fcm_dpo/delta": -0.04832981526851654,
|
|
"fcm_dpo/margin": 113.26231384277344,
|
|
"fcm_dpo/q_t": 0.399710088968277,
|
|
"grad_norm": 20.296661376953125,
|
|
"learning_rate": 4.455721242469372e-07,
|
|
"logits/chosen": -0.35240548849105835,
|
|
"logits/rejected": -0.35075196623802185,
|
|
"logps/chosen": -199.89532470703125,
|
|
"logps/ref_chosen": -75.4022216796875,
|
|
"logps/ref_rejected": -114.80821990966797,
|
|
"logps/rejected": -352.56365966796875,
|
|
"loss": 1.079,
|
|
"margin_dpo/margin_mean": 113.26231384277344,
|
|
"margin_dpo/margin_std": 158.4004669189453,
|
|
"step": 201
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -125.97883605957031,
|
|
"KL/mean": -165.76266479492188,
|
|
"KL/rejected_KL_mean": -205.54647827148438,
|
|
"KL/std": 121.41524505615234,
|
|
"epoch": 0.2966226138032305,
|
|
"fcm_dpo/beta": 0.003970341291278601,
|
|
"fcm_dpo/delta": 0.08675570785999298,
|
|
"fcm_dpo/margin": 79.567626953125,
|
|
"fcm_dpo/q_t": 0.4300415515899658,
|
|
"grad_norm": 21.754104614257812,
|
|
"learning_rate": 4.4477014363141755e-07,
|
|
"logits/chosen": -0.2775609493255615,
|
|
"logits/rejected": -0.29300734400749207,
|
|
"logps/chosen": -176.0801544189453,
|
|
"logps/ref_chosen": -50.101318359375,
|
|
"logps/ref_rejected": -86.98503112792969,
|
|
"logps/rejected": -292.531494140625,
|
|
"loss": 1.1859,
|
|
"margin_dpo/margin_mean": 79.567626953125,
|
|
"margin_dpo/margin_std": 157.5809326171875,
|
|
"step": 202
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -122.95875549316406,
|
|
"KL/mean": -171.21636962890625,
|
|
"KL/rejected_KL_mean": -219.47396850585938,
|
|
"KL/std": 122.28076934814453,
|
|
"epoch": 0.29809104258443464,
|
|
"fcm_dpo/beta": 0.003991924226284027,
|
|
"fcm_dpo/delta": 0.01531082671135664,
|
|
"fcm_dpo/margin": 96.51522827148438,
|
|
"fcm_dpo/q_t": 0.41198647022247314,
|
|
"grad_norm": 18.4498233795166,
|
|
"learning_rate": 4.439630306414758e-07,
|
|
"logits/chosen": -0.30969899892807007,
|
|
"logits/rejected": -0.2990788519382477,
|
|
"logps/chosen": -183.56845092773438,
|
|
"logps/ref_chosen": -60.60969543457031,
|
|
"logps/ref_rejected": -85.89596557617188,
|
|
"logps/rejected": -305.36993408203125,
|
|
"loss": 1.105,
|
|
"margin_dpo/margin_mean": 96.51522827148438,
|
|
"margin_dpo/margin_std": 135.06640625,
|
|
"step": 203
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -139.44410705566406,
|
|
"KL/mean": -183.3643341064453,
|
|
"KL/rejected_KL_mean": -227.28453063964844,
|
|
"KL/std": 135.77395629882812,
|
|
"epoch": 0.29955947136563876,
|
|
"fcm_dpo/beta": 0.00402648001909256,
|
|
"fcm_dpo/delta": 0.04800150915980339,
|
|
"fcm_dpo/margin": 87.84043884277344,
|
|
"fcm_dpo/q_t": 0.42272064089775085,
|
|
"grad_norm": 23.171846389770508,
|
|
"learning_rate": 4.431508065452897e-07,
|
|
"logits/chosen": -0.40281885862350464,
|
|
"logits/rejected": -0.3679213225841522,
|
|
"logps/chosen": -219.60906982421875,
|
|
"logps/ref_chosen": -80.16496276855469,
|
|
"logps/ref_rejected": -87.69590759277344,
|
|
"logps/rejected": -314.9804382324219,
|
|
"loss": 1.1632,
|
|
"margin_dpo/margin_mean": 87.84043884277344,
|
|
"margin_dpo/margin_std": 163.03756713867188,
|
|
"step": 204
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -131.06268310546875,
|
|
"KL/mean": -188.67852783203125,
|
|
"KL/rejected_KL_mean": -246.2943572998047,
|
|
"KL/std": 136.5657958984375,
|
|
"epoch": 0.3010279001468429,
|
|
"fcm_dpo/beta": 0.003965743817389011,
|
|
"fcm_dpo/delta": -0.061633773148059845,
|
|
"fcm_dpo/margin": 115.23167419433594,
|
|
"fcm_dpo/q_t": 0.3951718211174011,
|
|
"grad_norm": 19.90717124938965,
|
|
"learning_rate": 4.4233349274571974e-07,
|
|
"logits/chosen": -0.30089646577835083,
|
|
"logits/rejected": -0.27246442437171936,
|
|
"logps/chosen": -190.44741821289062,
|
|
"logps/ref_chosen": -59.384735107421875,
|
|
"logps/ref_rejected": -85.12505340576172,
|
|
"logps/rejected": -331.4194030761719,
|
|
"loss": 1.0691,
|
|
"margin_dpo/margin_mean": 115.23167419433594,
|
|
"margin_dpo/margin_std": 149.4026641845703,
|
|
"step": 205
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -116.30944061279297,
|
|
"KL/mean": -179.20240783691406,
|
|
"KL/rejected_KL_mean": -242.0953826904297,
|
|
"KL/std": 123.43933868408203,
|
|
"epoch": 0.302496328928047,
|
|
"fcm_dpo/beta": 0.0039128996431827545,
|
|
"fcm_dpo/delta": -0.0976862832903862,
|
|
"fcm_dpo/margin": 125.78593444824219,
|
|
"fcm_dpo/q_t": 0.38541656732559204,
|
|
"grad_norm": 26.1035099029541,
|
|
"learning_rate": 4.415111107797445e-07,
|
|
"logits/chosen": -0.2364203929901123,
|
|
"logits/rejected": -0.2409205287694931,
|
|
"logps/chosen": -163.27394104003906,
|
|
"logps/ref_chosen": -46.964500427246094,
|
|
"logps/ref_rejected": -98.9534912109375,
|
|
"logps/rejected": -341.04888916015625,
|
|
"loss": 1.0192,
|
|
"margin_dpo/margin_mean": 125.78593444824219,
|
|
"margin_dpo/margin_std": 134.7174072265625,
|
|
"step": 206
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -106.32139587402344,
|
|
"KL/mean": -176.8770751953125,
|
|
"KL/rejected_KL_mean": -247.43272399902344,
|
|
"KL/std": 138.58750915527344,
|
|
"epoch": 0.3039647577092511,
|
|
"fcm_dpo/beta": 0.0038270847871899605,
|
|
"fcm_dpo/delta": -0.14793969690799713,
|
|
"fcm_dpo/margin": 141.11134338378906,
|
|
"fcm_dpo/q_t": 0.37947410345077515,
|
|
"grad_norm": 31.017250061035156,
|
|
"learning_rate": 4.4068368231789365e-07,
|
|
"logits/chosen": -0.3106921911239624,
|
|
"logits/rejected": -0.2881418466567993,
|
|
"logps/chosen": -162.37765502929688,
|
|
"logps/ref_chosen": -56.05625915527344,
|
|
"logps/ref_rejected": -84.44779968261719,
|
|
"logps/rejected": -331.8805236816406,
|
|
"loss": 0.9999,
|
|
"margin_dpo/margin_mean": 141.111328125,
|
|
"margin_dpo/margin_std": 160.7344970703125,
|
|
"step": 207
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -168.3114776611328,
|
|
"KL/mean": -226.00588989257812,
|
|
"KL/rejected_KL_mean": -283.7003173828125,
|
|
"KL/std": 139.61465454101562,
|
|
"epoch": 0.3054331864904552,
|
|
"fcm_dpo/beta": 0.0037661269307136536,
|
|
"fcm_dpo/delta": -0.03632538765668869,
|
|
"fcm_dpo/margin": 115.3888168334961,
|
|
"fcm_dpo/q_t": 0.4015238881111145,
|
|
"grad_norm": 28.11204719543457,
|
|
"learning_rate": 4.398512291636768e-07,
|
|
"logits/chosen": -0.34051740169525146,
|
|
"logits/rejected": -0.32537776231765747,
|
|
"logps/chosen": -235.37908935546875,
|
|
"logps/ref_chosen": -67.06761169433594,
|
|
"logps/ref_rejected": -94.28689575195312,
|
|
"logps/rejected": -377.9872131347656,
|
|
"loss": 1.0934,
|
|
"margin_dpo/margin_mean": 115.3888168334961,
|
|
"margin_dpo/margin_std": 170.67803955078125,
|
|
"step": 208
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -137.9810028076172,
|
|
"KL/mean": -188.3424835205078,
|
|
"KL/rejected_KL_mean": -238.7039794921875,
|
|
"KL/std": 125.35417175292969,
|
|
"epoch": 0.3069016152716593,
|
|
"fcm_dpo/beta": 0.003780151717364788,
|
|
"fcm_dpo/delta": 0.020004911348223686,
|
|
"fcm_dpo/margin": 100.72296142578125,
|
|
"fcm_dpo/q_t": 0.41364553570747375,
|
|
"grad_norm": 24.225013732910156,
|
|
"learning_rate": 4.3901377325300857e-07,
|
|
"logits/chosen": -0.23914602398872375,
|
|
"logits/rejected": -0.22716867923736572,
|
|
"logps/chosen": -194.16268920898438,
|
|
"logps/ref_chosen": -56.18169403076172,
|
|
"logps/ref_rejected": -80.94152069091797,
|
|
"logps/rejected": -319.6455078125,
|
|
"loss": 1.1282,
|
|
"margin_dpo/margin_mean": 100.72296142578125,
|
|
"margin_dpo/margin_std": 158.71670532226562,
|
|
"step": 209
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -121.80764770507812,
|
|
"KL/mean": -179.4827880859375,
|
|
"KL/rejected_KL_mean": -237.15792846679688,
|
|
"KL/std": 125.80329895019531,
|
|
"epoch": 0.30837004405286345,
|
|
"fcm_dpo/beta": 0.003775266231968999,
|
|
"fcm_dpo/delta": -0.03721902519464493,
|
|
"fcm_dpo/margin": 115.35026550292969,
|
|
"fcm_dpo/q_t": 0.40104708075523376,
|
|
"grad_norm": 22.758333206176758,
|
|
"learning_rate": 4.381713366536311e-07,
|
|
"logits/chosen": -0.27275052666664124,
|
|
"logits/rejected": -0.2652055025100708,
|
|
"logps/chosen": -168.17947387695312,
|
|
"logps/ref_chosen": -46.371822357177734,
|
|
"logps/ref_rejected": -76.68162536621094,
|
|
"logps/rejected": -313.83953857421875,
|
|
"loss": 1.0734,
|
|
"margin_dpo/margin_mean": 115.35028076171875,
|
|
"margin_dpo/margin_std": 150.81455993652344,
|
|
"step": 210
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -178.0186767578125,
|
|
"KL/mean": -228.12051391601562,
|
|
"KL/rejected_KL_mean": -278.22235107421875,
|
|
"KL/std": 146.26031494140625,
|
|
"epoch": 0.30983847283406757,
|
|
"fcm_dpo/beta": 0.00377118238247931,
|
|
"fcm_dpo/delta": 0.02294088713824749,
|
|
"fcm_dpo/margin": 100.20367431640625,
|
|
"fcm_dpo/q_t": 0.41908180713653564,
|
|
"grad_norm": 27.190279006958008,
|
|
"learning_rate": 4.373239415645323e-07,
|
|
"logits/chosen": -0.3228422999382019,
|
|
"logits/rejected": -0.2874525487422943,
|
|
"logps/chosen": -256.9510192871094,
|
|
"logps/ref_chosen": -78.93235778808594,
|
|
"logps/ref_rejected": -86.82098388671875,
|
|
"logps/rejected": -365.0433349609375,
|
|
"loss": 1.1417,
|
|
"margin_dpo/margin_mean": 100.20367431640625,
|
|
"margin_dpo/margin_std": 171.17764282226562,
|
|
"step": 211
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -148.94281005859375,
|
|
"KL/mean": -219.05404663085938,
|
|
"KL/rejected_KL_mean": -289.165283203125,
|
|
"KL/std": 156.24212646484375,
|
|
"epoch": 0.31130690161527164,
|
|
"fcm_dpo/beta": 0.0036756170447915792,
|
|
"fcm_dpo/delta": -0.12374652922153473,
|
|
"fcm_dpo/margin": 140.2224884033203,
|
|
"fcm_dpo/q_t": 0.3847534656524658,
|
|
"grad_norm": 24.610458374023438,
|
|
"learning_rate": 4.3647161031536086e-07,
|
|
"logits/chosen": -0.29684221744537354,
|
|
"logits/rejected": -0.29030919075012207,
|
|
"logps/chosen": -207.1398162841797,
|
|
"logps/ref_chosen": -58.19701385498047,
|
|
"logps/ref_rejected": -103.05785369873047,
|
|
"logps/rejected": -392.2231140136719,
|
|
"loss": 1.0371,
|
|
"margin_dpo/margin_mean": 140.22247314453125,
|
|
"margin_dpo/margin_std": 175.03460693359375,
|
|
"step": 212
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -138.5974884033203,
|
|
"KL/mean": -207.8812713623047,
|
|
"KL/rejected_KL_mean": -277.1650390625,
|
|
"KL/std": 138.962158203125,
|
|
"epoch": 0.31277533039647576,
|
|
"fcm_dpo/beta": 0.003611032385379076,
|
|
"fcm_dpo/delta": -0.1061759814620018,
|
|
"fcm_dpo/margin": 138.56756591796875,
|
|
"fcm_dpo/q_t": 0.3859563171863556,
|
|
"grad_norm": 24.44261360168457,
|
|
"learning_rate": 4.3561436536583774e-07,
|
|
"logits/chosen": -0.3207147717475891,
|
|
"logits/rejected": -0.29644298553466797,
|
|
"logps/chosen": -206.11019897460938,
|
|
"logps/ref_chosen": -67.51271057128906,
|
|
"logps/ref_rejected": -93.91471862792969,
|
|
"logps/rejected": -371.07977294921875,
|
|
"loss": 1.0262,
|
|
"margin_dpo/margin_mean": 138.56756591796875,
|
|
"margin_dpo/margin_std": 162.03538513183594,
|
|
"step": 213
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -118.24832916259766,
|
|
"KL/mean": -180.09129333496094,
|
|
"KL/rejected_KL_mean": -241.93423461914062,
|
|
"KL/std": 136.1957550048828,
|
|
"epoch": 0.3142437591776799,
|
|
"fcm_dpo/beta": 0.003579269163310528,
|
|
"fcm_dpo/delta": -0.04467523843050003,
|
|
"fcm_dpo/margin": 123.68592071533203,
|
|
"fcm_dpo/q_t": 0.4001598358154297,
|
|
"grad_norm": 21.233543395996094,
|
|
"learning_rate": 4.3475222930516473e-07,
|
|
"logits/chosen": -0.27026987075805664,
|
|
"logits/rejected": -0.2760501205921173,
|
|
"logps/chosen": -159.85321044921875,
|
|
"logps/ref_chosen": -41.604888916015625,
|
|
"logps/ref_rejected": -77.51741027832031,
|
|
"logps/rejected": -319.45166015625,
|
|
"loss": 1.0675,
|
|
"margin_dpo/margin_mean": 123.68592834472656,
|
|
"margin_dpo/margin_std": 161.08143615722656,
|
|
"step": 214
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -144.5403289794922,
|
|
"KL/mean": -206.9398193359375,
|
|
"KL/rejected_KL_mean": -269.33929443359375,
|
|
"KL/std": 141.94100952148438,
|
|
"epoch": 0.315712187958884,
|
|
"fcm_dpo/beta": 0.003539241384714842,
|
|
"fcm_dpo/delta": -0.043820615857839584,
|
|
"fcm_dpo/margin": 124.79898071289062,
|
|
"fcm_dpo/q_t": 0.3984990119934082,
|
|
"grad_norm": 22.27779769897461,
|
|
"learning_rate": 4.3388522485142885e-07,
|
|
"logits/chosen": -0.30568164587020874,
|
|
"logits/rejected": -0.3003992736339569,
|
|
"logps/chosen": -197.81959533691406,
|
|
"logps/ref_chosen": -53.279266357421875,
|
|
"logps/ref_rejected": -89.96464538574219,
|
|
"logps/rejected": -359.303955078125,
|
|
"loss": 1.0582,
|
|
"margin_dpo/margin_mean": 124.79898071289062,
|
|
"margin_dpo/margin_std": 149.58108520507812,
|
|
"step": 215
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -145.63059997558594,
|
|
"KL/mean": -207.81692504882812,
|
|
"KL/rejected_KL_mean": -270.00323486328125,
|
|
"KL/std": 146.82232666015625,
|
|
"epoch": 0.31718061674008813,
|
|
"fcm_dpo/beta": 0.003528446890413761,
|
|
"fcm_dpo/delta": -0.04068659618496895,
|
|
"fcm_dpo/margin": 124.37261199951172,
|
|
"fcm_dpo/q_t": 0.40117913484573364,
|
|
"grad_norm": 22.973649978637695,
|
|
"learning_rate": 4.330133748510036e-07,
|
|
"logits/chosen": -0.3099059760570526,
|
|
"logits/rejected": -0.29590481519699097,
|
|
"logps/chosen": -194.51840209960938,
|
|
"logps/ref_chosen": -48.887794494628906,
|
|
"logps/ref_rejected": -77.19892883300781,
|
|
"logps/rejected": -347.2021484375,
|
|
"loss": 1.0879,
|
|
"margin_dpo/margin_mean": 124.37261199951172,
|
|
"margin_dpo/margin_std": 179.5612030029297,
|
|
"step": 216
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -145.95330810546875,
|
|
"KL/mean": -222.22024536132812,
|
|
"KL/rejected_KL_mean": -298.4871826171875,
|
|
"KL/std": 154.306396484375,
|
|
"epoch": 0.3186490455212922,
|
|
"fcm_dpo/beta": 0.0034370056819170713,
|
|
"fcm_dpo/delta": -0.13140688836574554,
|
|
"fcm_dpo/margin": 152.53384399414062,
|
|
"fcm_dpo/q_t": 0.3825801610946655,
|
|
"grad_norm": 19.35890007019043,
|
|
"learning_rate": 4.3213670227794757e-07,
|
|
"logits/chosen": -0.2936730980873108,
|
|
"logits/rejected": -0.2913660407066345,
|
|
"logps/chosen": -195.79861450195312,
|
|
"logps/ref_chosen": -49.845306396484375,
|
|
"logps/ref_rejected": -100.07832336425781,
|
|
"logps/rejected": -398.56549072265625,
|
|
"loss": 1.0126,
|
|
"margin_dpo/margin_mean": 152.53384399414062,
|
|
"margin_dpo/margin_std": 178.2061767578125,
|
|
"step": 217
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -150.1802215576172,
|
|
"KL/mean": -208.71240234375,
|
|
"KL/rejected_KL_mean": -267.24462890625,
|
|
"KL/std": 148.29302978515625,
|
|
"epoch": 0.3201174743024963,
|
|
"fcm_dpo/beta": 0.003414642531424761,
|
|
"fcm_dpo/delta": 0.00023527629673480988,
|
|
"fcm_dpo/margin": 117.06442260742188,
|
|
"fcm_dpo/q_t": 0.4108254313468933,
|
|
"grad_norm": 20.17465591430664,
|
|
"learning_rate": 4.3125523023339815e-07,
|
|
"logits/chosen": -0.304365873336792,
|
|
"logits/rejected": -0.3002937436103821,
|
|
"logps/chosen": -208.75689697265625,
|
|
"logps/ref_chosen": -58.576683044433594,
|
|
"logps/ref_rejected": -87.84639739990234,
|
|
"logps/rejected": -355.09100341796875,
|
|
"loss": 1.1119,
|
|
"margin_dpo/margin_mean": 117.06441497802734,
|
|
"margin_dpo/margin_std": 178.00685119628906,
|
|
"step": 218
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -161.31483459472656,
|
|
"KL/mean": -210.52394104003906,
|
|
"KL/rejected_KL_mean": -259.7330627441406,
|
|
"KL/std": 153.1935577392578,
|
|
"epoch": 0.32158590308370044,
|
|
"fcm_dpo/beta": 0.0034687574952840805,
|
|
"fcm_dpo/delta": 0.059782225638628006,
|
|
"fcm_dpo/margin": 98.41824340820312,
|
|
"fcm_dpo/q_t": 0.42399221658706665,
|
|
"grad_norm": 28.482004165649414,
|
|
"learning_rate": 4.303689819449636e-07,
|
|
"logits/chosen": -0.3322892487049103,
|
|
"logits/rejected": -0.3280831575393677,
|
|
"logps/chosen": -222.39869689941406,
|
|
"logps/ref_chosen": -61.083858489990234,
|
|
"logps/ref_rejected": -85.83042907714844,
|
|
"logps/rejected": -345.5634765625,
|
|
"loss": 1.1775,
|
|
"margin_dpo/margin_mean": 98.4182357788086,
|
|
"margin_dpo/margin_std": 191.77099609375,
|
|
"step": 219
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -178.02310180664062,
|
|
"KL/mean": -218.76882934570312,
|
|
"KL/rejected_KL_mean": -259.5145568847656,
|
|
"KL/std": 128.1570587158203,
|
|
"epoch": 0.32305433186490456,
|
|
"fcm_dpo/beta": 0.0035150342155247927,
|
|
"fcm_dpo/delta": 0.11705435812473297,
|
|
"fcm_dpo/margin": 81.49143981933594,
|
|
"fcm_dpo/q_t": 0.43284928798675537,
|
|
"grad_norm": 23.411617279052734,
|
|
"learning_rate": 4.2947798076611047e-07,
|
|
"logits/chosen": -0.34668925404548645,
|
|
"logits/rejected": -0.32663899660110474,
|
|
"logps/chosen": -248.0543975830078,
|
|
"logps/ref_chosen": -70.03128051757812,
|
|
"logps/ref_rejected": -87.68551635742188,
|
|
"logps/rejected": -347.2000732421875,
|
|
"loss": 1.1759,
|
|
"margin_dpo/margin_mean": 81.49143981933594,
|
|
"margin_dpo/margin_std": 136.2982177734375,
|
|
"step": 220
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -148.138427734375,
|
|
"KL/mean": -238.5886993408203,
|
|
"KL/rejected_KL_mean": -329.03900146484375,
|
|
"KL/std": 168.93533325195312,
|
|
"epoch": 0.3245227606461087,
|
|
"fcm_dpo/beta": 0.0034304747823625803,
|
|
"fcm_dpo/delta": -0.23470783233642578,
|
|
"fcm_dpo/margin": 180.9005584716797,
|
|
"fcm_dpo/q_t": 0.3586632013320923,
|
|
"grad_norm": 30.417999267578125,
|
|
"learning_rate": 4.285822501755485e-07,
|
|
"logits/chosen": -0.3302137851715088,
|
|
"logits/rejected": -0.3399258852005005,
|
|
"logps/chosen": -200.29312133789062,
|
|
"logps/ref_chosen": -52.15470886230469,
|
|
"logps/ref_rejected": -106.46768188476562,
|
|
"logps/rejected": -435.50665283203125,
|
|
"loss": 0.9405,
|
|
"margin_dpo/margin_mean": 180.9005584716797,
|
|
"margin_dpo/margin_std": 174.17965698242188,
|
|
"step": 221
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -150.40621948242188,
|
|
"KL/mean": -219.40045166015625,
|
|
"KL/rejected_KL_mean": -288.39471435546875,
|
|
"KL/std": 150.12142944335938,
|
|
"epoch": 0.32599118942731276,
|
|
"fcm_dpo/beta": 0.0033583808690309525,
|
|
"fcm_dpo/delta": -0.06650819629430771,
|
|
"fcm_dpo/margin": 137.9884796142578,
|
|
"fcm_dpo/q_t": 0.39468562602996826,
|
|
"grad_norm": 26.266162872314453,
|
|
"learning_rate": 4.276818137766118e-07,
|
|
"logits/chosen": -0.3900914788246155,
|
|
"logits/rejected": -0.39828717708587646,
|
|
"logps/chosen": -211.3773193359375,
|
|
"logps/ref_chosen": -60.971099853515625,
|
|
"logps/ref_rejected": -100.00115203857422,
|
|
"logps/rejected": -388.3958740234375,
|
|
"loss": 1.0513,
|
|
"margin_dpo/margin_mean": 137.9884796142578,
|
|
"margin_dpo/margin_std": 170.7541046142578,
|
|
"step": 222
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -167.1275634765625,
|
|
"KL/mean": -225.02340698242188,
|
|
"KL/rejected_KL_mean": -282.91925048828125,
|
|
"KL/std": 149.61036682128906,
|
|
"epoch": 0.3274596182085169,
|
|
"fcm_dpo/beta": 0.0033374596387147903,
|
|
"fcm_dpo/delta": 0.014067416079342365,
|
|
"fcm_dpo/margin": 115.79170227050781,
|
|
"fcm_dpo/q_t": 0.413519948720932,
|
|
"grad_norm": 27.220115661621094,
|
|
"learning_rate": 4.2677669529663686e-07,
|
|
"logits/chosen": -0.28457504510879517,
|
|
"logits/rejected": -0.28229665756225586,
|
|
"logps/chosen": -219.76812744140625,
|
|
"logps/ref_chosen": -52.64057540893555,
|
|
"logps/ref_rejected": -82.82502746582031,
|
|
"logps/rejected": -365.7442626953125,
|
|
"loss": 1.1331,
|
|
"margin_dpo/margin_mean": 115.79170227050781,
|
|
"margin_dpo/margin_std": 194.0624542236328,
|
|
"step": 223
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -151.59030151367188,
|
|
"KL/mean": -218.800048828125,
|
|
"KL/rejected_KL_mean": -286.0097961425781,
|
|
"KL/std": 170.38592529296875,
|
|
"epoch": 0.328928046989721,
|
|
"fcm_dpo/beta": 0.003296963172033429,
|
|
"fcm_dpo/delta": -0.046691399067640305,
|
|
"fcm_dpo/margin": 134.4194793701172,
|
|
"fcm_dpo/q_t": 0.4030272364616394,
|
|
"grad_norm": 22.146337509155273,
|
|
"learning_rate": 4.2586691858633747e-07,
|
|
"logits/chosen": -0.3159272074699402,
|
|
"logits/rejected": -0.29905927181243896,
|
|
"logps/chosen": -200.1857147216797,
|
|
"logps/ref_chosen": -48.59541320800781,
|
|
"logps/ref_rejected": -77.11648559570312,
|
|
"logps/rejected": -363.12628173828125,
|
|
"loss": 1.0896,
|
|
"margin_dpo/margin_mean": 134.4194793701172,
|
|
"margin_dpo/margin_std": 198.03443908691406,
|
|
"step": 224
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -178.26751708984375,
|
|
"KL/mean": -257.4982604980469,
|
|
"KL/rejected_KL_mean": -336.72900390625,
|
|
"KL/std": 167.69284057617188,
|
|
"epoch": 0.3303964757709251,
|
|
"fcm_dpo/beta": 0.00325207132846117,
|
|
"fcm_dpo/delta": -0.12169913947582245,
|
|
"fcm_dpo/margin": 158.46151733398438,
|
|
"fcm_dpo/q_t": 0.3860987424850464,
|
|
"grad_norm": 20.99376678466797,
|
|
"learning_rate": 4.249525076191759e-07,
|
|
"logits/chosen": -0.31401747465133667,
|
|
"logits/rejected": -0.3065524101257324,
|
|
"logps/chosen": -236.26797485351562,
|
|
"logps/ref_chosen": -58.000465393066406,
|
|
"logps/ref_rejected": -99.90291595458984,
|
|
"logps/rejected": -436.6319274902344,
|
|
"loss": 1.0399,
|
|
"margin_dpo/margin_mean": 158.4615020751953,
|
|
"margin_dpo/margin_std": 211.03097534179688,
|
|
"step": 225
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -154.25083923339844,
|
|
"KL/mean": -218.34536743164062,
|
|
"KL/rejected_KL_mean": -282.43994140625,
|
|
"KL/std": 166.46963500976562,
|
|
"epoch": 0.33186490455212925,
|
|
"fcm_dpo/beta": 0.003208290785551071,
|
|
"fcm_dpo/delta": -0.01261284202337265,
|
|
"fcm_dpo/margin": 128.18907165527344,
|
|
"fcm_dpo/q_t": 0.41000163555145264,
|
|
"grad_norm": 24.45271873474121,
|
|
"learning_rate": 4.2403348649073167e-07,
|
|
"logits/chosen": -0.3766024708747864,
|
|
"logits/rejected": -0.3424544930458069,
|
|
"logps/chosen": -213.14964294433594,
|
|
"logps/ref_chosen": -58.898799896240234,
|
|
"logps/ref_rejected": -78.68775939941406,
|
|
"logps/rejected": -361.127685546875,
|
|
"loss": 1.1111,
|
|
"margin_dpo/margin_mean": 128.18907165527344,
|
|
"margin_dpo/margin_std": 198.9245147705078,
|
|
"step": 226
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -181.13844299316406,
|
|
"KL/mean": -261.814453125,
|
|
"KL/rejected_KL_mean": -342.49041748046875,
|
|
"KL/std": 189.39837646484375,
|
|
"epoch": 0.3333333333333333,
|
|
"fcm_dpo/beta": 0.003157012164592743,
|
|
"fcm_dpo/delta": -0.11616270244121552,
|
|
"fcm_dpo/margin": 161.3520050048828,
|
|
"fcm_dpo/q_t": 0.38600099086761475,
|
|
"grad_norm": 20.904441833496094,
|
|
"learning_rate": 4.2310987941806615e-07,
|
|
"logits/chosen": -0.34949034452438354,
|
|
"logits/rejected": -0.3386501669883728,
|
|
"logps/chosen": -240.2106170654297,
|
|
"logps/ref_chosen": -59.072181701660156,
|
|
"logps/ref_rejected": -99.41236877441406,
|
|
"logps/rejected": -441.9028015136719,
|
|
"loss": 1.0312,
|
|
"margin_dpo/margin_mean": 161.35202026367188,
|
|
"margin_dpo/margin_std": 202.92633056640625,
|
|
"step": 227
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -182.08627319335938,
|
|
"KL/mean": -236.57989501953125,
|
|
"KL/rejected_KL_mean": -291.0735168457031,
|
|
"KL/std": 154.74215698242188,
|
|
"epoch": 0.33480176211453744,
|
|
"fcm_dpo/beta": 0.0031847129575908184,
|
|
"fcm_dpo/delta": 0.05459333956241608,
|
|
"fcm_dpo/margin": 108.98725128173828,
|
|
"fcm_dpo/q_t": 0.42142853140830994,
|
|
"grad_norm": 23.708755493164062,
|
|
"learning_rate": 4.2218171073908463e-07,
|
|
"logits/chosen": -0.33513695001602173,
|
|
"logits/rejected": -0.31932687759399414,
|
|
"logps/chosen": -247.97756958007812,
|
|
"logps/ref_chosen": -65.89128875732422,
|
|
"logps/ref_rejected": -91.04875183105469,
|
|
"logps/rejected": -382.12225341796875,
|
|
"loss": 1.1621,
|
|
"margin_dpo/margin_mean": 108.98725128173828,
|
|
"margin_dpo/margin_std": 198.34036254882812,
|
|
"step": 228
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -189.10377502441406,
|
|
"KL/mean": -245.99990844726562,
|
|
"KL/rejected_KL_mean": -302.89605712890625,
|
|
"KL/std": 178.9686279296875,
|
|
"epoch": 0.33627019089574156,
|
|
"fcm_dpo/beta": 0.003205793909728527,
|
|
"fcm_dpo/delta": 0.0365116223692894,
|
|
"fcm_dpo/margin": 113.79228210449219,
|
|
"fcm_dpo/q_t": 0.41705501079559326,
|
|
"grad_norm": 32.29511642456055,
|
|
"learning_rate": 4.212490049118951e-07,
|
|
"logits/chosen": -0.4073641300201416,
|
|
"logits/rejected": -0.37577980756759644,
|
|
"logps/chosen": -259.8101501464844,
|
|
"logps/ref_chosen": -70.70637512207031,
|
|
"logps/ref_rejected": -84.52741241455078,
|
|
"logps/rejected": -387.4234619140625,
|
|
"loss": 1.1379,
|
|
"margin_dpo/margin_mean": 113.79228210449219,
|
|
"margin_dpo/margin_std": 187.65060424804688,
|
|
"step": 229
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -134.7830810546875,
|
|
"KL/mean": -224.5379638671875,
|
|
"KL/rejected_KL_mean": -314.2928466796875,
|
|
"KL/std": 161.85427856445312,
|
|
"epoch": 0.3377386196769457,
|
|
"fcm_dpo/beta": 0.0031274245120584965,
|
|
"fcm_dpo/delta": -0.171125590801239,
|
|
"fcm_dpo/margin": 179.509765625,
|
|
"fcm_dpo/q_t": 0.37051764130592346,
|
|
"grad_norm": 29.884607315063477,
|
|
"learning_rate": 4.203117865141635e-07,
|
|
"logits/chosen": -0.33008766174316406,
|
|
"logits/rejected": -0.3357307016849518,
|
|
"logps/chosen": -174.06509399414062,
|
|
"logps/ref_chosen": -39.282005310058594,
|
|
"logps/ref_rejected": -85.62191009521484,
|
|
"logps/rejected": -399.9147644042969,
|
|
"loss": 0.9731,
|
|
"margin_dpo/margin_mean": 179.509765625,
|
|
"margin_dpo/margin_std": 179.34201049804688,
|
|
"step": 230
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -152.9336395263672,
|
|
"KL/mean": -212.84384155273438,
|
|
"KL/rejected_KL_mean": -272.7540283203125,
|
|
"KL/std": 138.79013061523438,
|
|
"epoch": 0.3392070484581498,
|
|
"fcm_dpo/beta": 0.0031172512099146843,
|
|
"fcm_dpo/delta": 0.027481382712721825,
|
|
"fcm_dpo/margin": 119.82038116455078,
|
|
"fcm_dpo/q_t": 0.4157181978225708,
|
|
"grad_norm": 23.125385284423828,
|
|
"learning_rate": 4.1937008024246625e-07,
|
|
"logits/chosen": -0.38110148906707764,
|
|
"logits/rejected": -0.35203638672828674,
|
|
"logps/chosen": -216.2100830078125,
|
|
"logps/ref_chosen": -63.27644348144531,
|
|
"logps/ref_rejected": -74.1239013671875,
|
|
"logps/rejected": -346.8779296875,
|
|
"loss": 1.1104,
|
|
"margin_dpo/margin_mean": 119.82037353515625,
|
|
"margin_dpo/margin_std": 167.3756866455078,
|
|
"step": 231
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -184.20339965820312,
|
|
"KL/mean": -231.1331024169922,
|
|
"KL/rejected_KL_mean": -278.06280517578125,
|
|
"KL/std": 157.26010131835938,
|
|
"epoch": 0.3406754772393539,
|
|
"fcm_dpo/beta": 0.003163769142702222,
|
|
"fcm_dpo/delta": 0.10632483661174774,
|
|
"fcm_dpo/margin": 93.8593978881836,
|
|
"fcm_dpo/q_t": 0.4341059923171997,
|
|
"grad_norm": 21.933931350708008,
|
|
"learning_rate": 4.1842391091163933e-07,
|
|
"logits/chosen": -0.397582471370697,
|
|
"logits/rejected": -0.37435561418533325,
|
|
"logps/chosen": -254.9521484375,
|
|
"logps/ref_chosen": -70.74876403808594,
|
|
"logps/ref_rejected": -83.97706604003906,
|
|
"logps/rejected": -362.03985595703125,
|
|
"loss": 1.1763,
|
|
"margin_dpo/margin_mean": 93.8593978881836,
|
|
"margin_dpo/margin_std": 168.48648071289062,
|
|
"step": 232
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -160.23126220703125,
|
|
"KL/mean": -240.1412353515625,
|
|
"KL/rejected_KL_mean": -320.05120849609375,
|
|
"KL/std": 174.00173950195312,
|
|
"epoch": 0.342143906020558,
|
|
"fcm_dpo/beta": 0.0031418418511748314,
|
|
"fcm_dpo/delta": -0.1073966920375824,
|
|
"fcm_dpo/margin": 159.8199462890625,
|
|
"fcm_dpo/q_t": 0.39174312353134155,
|
|
"grad_norm": 22.029088973999023,
|
|
"learning_rate": 4.174733034541245e-07,
|
|
"logits/chosen": -0.3995114266872406,
|
|
"logits/rejected": -0.40224993228912354,
|
|
"logps/chosen": -215.11419677734375,
|
|
"logps/ref_chosen": -54.8829345703125,
|
|
"logps/ref_rejected": -107.4800796508789,
|
|
"logps/rejected": -427.5312805175781,
|
|
"loss": 1.0642,
|
|
"margin_dpo/margin_mean": 159.8199462890625,
|
|
"margin_dpo/margin_std": 231.9136199951172,
|
|
"step": 233
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -149.30445861816406,
|
|
"KL/mean": -230.3643798828125,
|
|
"KL/rejected_KL_mean": -311.4242858886719,
|
|
"KL/std": 154.63232421875,
|
|
"epoch": 0.3436123348017621,
|
|
"fcm_dpo/beta": 0.0030462380964308977,
|
|
"fcm_dpo/delta": -0.10102301090955734,
|
|
"fcm_dpo/margin": 162.1198272705078,
|
|
"fcm_dpo/q_t": 0.38654690980911255,
|
|
"grad_norm": 25.708904266357422,
|
|
"learning_rate": 4.165182829193126e-07,
|
|
"logits/chosen": -0.3387085795402527,
|
|
"logits/rejected": -0.36640793085098267,
|
|
"logps/chosen": -193.39898681640625,
|
|
"logps/ref_chosen": -44.094520568847656,
|
|
"logps/ref_rejected": -100.00663757324219,
|
|
"logps/rejected": -411.430908203125,
|
|
"loss": 1.0221,
|
|
"margin_dpo/margin_mean": 162.1198272705078,
|
|
"margin_dpo/margin_std": 174.42822265625,
|
|
"step": 234
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -196.90953063964844,
|
|
"KL/mean": -248.29786682128906,
|
|
"KL/rejected_KL_mean": -299.68621826171875,
|
|
"KL/std": 150.12139892578125,
|
|
"epoch": 0.34508076358296624,
|
|
"fcm_dpo/beta": 0.0031026601791381836,
|
|
"fcm_dpo/delta": 0.08341732621192932,
|
|
"fcm_dpo/margin": 102.77667236328125,
|
|
"fcm_dpo/q_t": 0.42741718888282776,
|
|
"grad_norm": 29.176280975341797,
|
|
"learning_rate": 4.1555887447288255e-07,
|
|
"logits/chosen": -0.4230844974517822,
|
|
"logits/rejected": -0.4051060080528259,
|
|
"logps/chosen": -259.1474609375,
|
|
"logps/ref_chosen": -62.237911224365234,
|
|
"logps/ref_rejected": -90.39506530761719,
|
|
"logps/rejected": -390.0812683105469,
|
|
"loss": 1.1737,
|
|
"margin_dpo/margin_mean": 102.77667236328125,
|
|
"margin_dpo/margin_std": 188.78956604003906,
|
|
"step": 235
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -145.384765625,
|
|
"KL/mean": -228.32327270507812,
|
|
"KL/rejected_KL_mean": -311.2618103027344,
|
|
"KL/std": 162.75277709960938,
|
|
"epoch": 0.3465491923641703,
|
|
"fcm_dpo/beta": 0.0030574114061892033,
|
|
"fcm_dpo/delta": -0.11282503604888916,
|
|
"fcm_dpo/margin": 165.8770294189453,
|
|
"fcm_dpo/q_t": 0.3818013072013855,
|
|
"grad_norm": 40.81807327270508,
|
|
"learning_rate": 4.1459510339613946e-07,
|
|
"logits/chosen": -0.37076273560523987,
|
|
"logits/rejected": -0.37188154458999634,
|
|
"logps/chosen": -194.72613525390625,
|
|
"logps/ref_chosen": -49.34136199951172,
|
|
"logps/ref_rejected": -103.51162719726562,
|
|
"logps/rejected": -414.7734375,
|
|
"loss": 0.9972,
|
|
"margin_dpo/margin_mean": 165.87701416015625,
|
|
"margin_dpo/margin_std": 158.9945526123047,
|
|
"step": 236
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -204.69064331054688,
|
|
"KL/mean": -265.85699462890625,
|
|
"KL/rejected_KL_mean": -327.0233459472656,
|
|
"KL/std": 163.26248168945312,
|
|
"epoch": 0.34801762114537443,
|
|
"fcm_dpo/beta": 0.0030537089332938194,
|
|
"fcm_dpo/delta": 0.02740669995546341,
|
|
"fcm_dpo/margin": 122.33269500732422,
|
|
"fcm_dpo/q_t": 0.41523268818855286,
|
|
"grad_norm": 31.793489456176758,
|
|
"learning_rate": 4.136269950853473e-07,
|
|
"logits/chosen": -0.3925231993198395,
|
|
"logits/rejected": -0.38862764835357666,
|
|
"logps/chosen": -258.8587646484375,
|
|
"logps/ref_chosen": -54.168121337890625,
|
|
"logps/ref_rejected": -94.78036499023438,
|
|
"logps/rejected": -421.8037109375,
|
|
"loss": 1.1268,
|
|
"margin_dpo/margin_mean": 122.33268737792969,
|
|
"margin_dpo/margin_std": 192.3961639404297,
|
|
"step": 237
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -185.77113342285156,
|
|
"KL/mean": -250.8236083984375,
|
|
"KL/rejected_KL_mean": -315.8760986328125,
|
|
"KL/std": 168.3731689453125,
|
|
"epoch": 0.34948604992657856,
|
|
"fcm_dpo/beta": 0.0030407910235226154,
|
|
"fcm_dpo/delta": 0.004025213420391083,
|
|
"fcm_dpo/margin": 130.10498046875,
|
|
"fcm_dpo/q_t": 0.4116755723953247,
|
|
"grad_norm": 23.8784236907959,
|
|
"learning_rate": 4.126545750510605e-07,
|
|
"logits/chosen": -0.3695657551288605,
|
|
"logits/rejected": -0.3817945122718811,
|
|
"logps/chosen": -239.7442626953125,
|
|
"logps/ref_chosen": -53.973121643066406,
|
|
"logps/ref_rejected": -89.41795349121094,
|
|
"logps/rejected": -405.2940673828125,
|
|
"loss": 1.1026,
|
|
"margin_dpo/margin_mean": 130.10496520996094,
|
|
"margin_dpo/margin_std": 184.20098876953125,
|
|
"step": 238
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -210.7151641845703,
|
|
"KL/mean": -281.1242980957031,
|
|
"KL/rejected_KL_mean": -351.5334167480469,
|
|
"KL/std": 166.0899200439453,
|
|
"epoch": 0.3509544787077827,
|
|
"fcm_dpo/beta": 0.003023794386535883,
|
|
"fcm_dpo/delta": -0.028100494295358658,
|
|
"fcm_dpo/margin": 140.81825256347656,
|
|
"fcm_dpo/q_t": 0.40190714597702026,
|
|
"grad_norm": 22.221601486206055,
|
|
"learning_rate": 4.116778689174514e-07,
|
|
"logits/chosen": -0.368292897939682,
|
|
"logits/rejected": -0.36200201511383057,
|
|
"logps/chosen": -268.81298828125,
|
|
"logps/ref_chosen": -58.09782409667969,
|
|
"logps/ref_rejected": -93.59294128417969,
|
|
"logps/rejected": -445.1263732910156,
|
|
"loss": 1.0808,
|
|
"margin_dpo/margin_mean": 140.81825256347656,
|
|
"margin_dpo/margin_std": 182.08518981933594,
|
|
"step": 239
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -216.9973602294922,
|
|
"KL/mean": -274.34576416015625,
|
|
"KL/rejected_KL_mean": -331.69415283203125,
|
|
"KL/std": 175.84886169433594,
|
|
"epoch": 0.3524229074889868,
|
|
"fcm_dpo/beta": 0.0030523231253027916,
|
|
"fcm_dpo/delta": 0.05162970349192619,
|
|
"fcm_dpo/margin": 114.69682312011719,
|
|
"fcm_dpo/q_t": 0.42092815041542053,
|
|
"grad_norm": 35.80266571044922,
|
|
"learning_rate": 4.106969024216348e-07,
|
|
"logits/chosen": -0.38743269443511963,
|
|
"logits/rejected": -0.3659111261367798,
|
|
"logps/chosen": -277.6118469238281,
|
|
"logps/ref_chosen": -60.6144905090332,
|
|
"logps/ref_rejected": -74.1185302734375,
|
|
"logps/rejected": -405.81268310546875,
|
|
"loss": 1.1633,
|
|
"margin_dpo/margin_mean": 114.69682312011719,
|
|
"margin_dpo/margin_std": 208.11935424804688,
|
|
"step": 240
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -181.38380432128906,
|
|
"KL/mean": -275.91607666015625,
|
|
"KL/rejected_KL_mean": -370.4483337402344,
|
|
"KL/std": 190.72769165039062,
|
|
"epoch": 0.35389133627019087,
|
|
"fcm_dpo/beta": 0.002998801413923502,
|
|
"fcm_dpo/delta": -0.17671522498130798,
|
|
"fcm_dpo/margin": 189.06454467773438,
|
|
"fcm_dpo/q_t": 0.37420031428337097,
|
|
"grad_norm": 22.811471939086914,
|
|
"learning_rate": 4.097117014129903e-07,
|
|
"logits/chosen": -0.37639686465263367,
|
|
"logits/rejected": -0.3528909683227539,
|
|
"logps/chosen": -247.47486877441406,
|
|
"logps/ref_chosen": -66.091064453125,
|
|
"logps/ref_rejected": -88.06088256835938,
|
|
"logps/rejected": -458.50921630859375,
|
|
"loss": 0.9919,
|
|
"margin_dpo/margin_mean": 189.06454467773438,
|
|
"margin_dpo/margin_std": 214.1299591064453,
|
|
"step": 241
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -193.56138610839844,
|
|
"KL/mean": -256.479736328125,
|
|
"KL/rejected_KL_mean": -319.3980407714844,
|
|
"KL/std": 160.45431518554688,
|
|
"epoch": 0.355359765051395,
|
|
"fcm_dpo/beta": 0.0029919487424194813,
|
|
"fcm_dpo/delta": 0.023674048483371735,
|
|
"fcm_dpo/margin": 125.83666229248047,
|
|
"fcm_dpo/q_t": 0.41582486033439636,
|
|
"grad_norm": 26.62445068359375,
|
|
"learning_rate": 4.087222918524807e-07,
|
|
"logits/chosen": -0.3719189763069153,
|
|
"logits/rejected": -0.35311925411224365,
|
|
"logps/chosen": -261.4253234863281,
|
|
"logps/ref_chosen": -67.86392974853516,
|
|
"logps/ref_rejected": -83.36033630371094,
|
|
"logps/rejected": -402.75836181640625,
|
|
"loss": 1.1239,
|
|
"margin_dpo/margin_mean": 125.83665466308594,
|
|
"margin_dpo/margin_std": 194.09152221679688,
|
|
"step": 242
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -182.10948181152344,
|
|
"KL/mean": -261.4007568359375,
|
|
"KL/rejected_KL_mean": -340.6920166015625,
|
|
"KL/std": 171.34756469726562,
|
|
"epoch": 0.3568281938325991,
|
|
"fcm_dpo/beta": 0.0029446138069033623,
|
|
"fcm_dpo/delta": -0.07035504281520844,
|
|
"fcm_dpo/margin": 158.58251953125,
|
|
"fcm_dpo/q_t": 0.39357781410217285,
|
|
"grad_norm": 31.950265884399414,
|
|
"learning_rate": 4.07728699811968e-07,
|
|
"logits/chosen": -0.374033123254776,
|
|
"logits/rejected": -0.34913793206214905,
|
|
"logps/chosen": -245.1937255859375,
|
|
"logps/ref_chosen": -63.0842399597168,
|
|
"logps/ref_rejected": -76.33563232421875,
|
|
"logps/rejected": -417.02764892578125,
|
|
"loss": 1.0455,
|
|
"margin_dpo/margin_mean": 158.58251953125,
|
|
"margin_dpo/margin_std": 192.53515625,
|
|
"step": 243
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -152.93548583984375,
|
|
"KL/mean": -233.27462768554688,
|
|
"KL/rejected_KL_mean": -313.61376953125,
|
|
"KL/std": 160.87197875976562,
|
|
"epoch": 0.35829662261380324,
|
|
"fcm_dpo/beta": 0.0029027406126260757,
|
|
"fcm_dpo/delta": -0.06980758905410767,
|
|
"fcm_dpo/margin": 160.67831420898438,
|
|
"fcm_dpo/q_t": 0.392170250415802,
|
|
"grad_norm": 34.01569747924805,
|
|
"learning_rate": 4.067309514735267e-07,
|
|
"logits/chosen": -0.4261099100112915,
|
|
"logits/rejected": -0.42256179451942444,
|
|
"logps/chosen": -214.076171875,
|
|
"logps/ref_chosen": -61.140689849853516,
|
|
"logps/ref_rejected": -94.89193725585938,
|
|
"logps/rejected": -408.5057067871094,
|
|
"loss": 1.0296,
|
|
"margin_dpo/margin_mean": 160.67831420898438,
|
|
"margin_dpo/margin_std": 168.09088134765625,
|
|
"step": 244
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -182.1680450439453,
|
|
"KL/mean": -244.11676025390625,
|
|
"KL/rejected_KL_mean": -306.0654602050781,
|
|
"KL/std": 164.1088104248047,
|
|
"epoch": 0.35976505139500736,
|
|
"fcm_dpo/beta": 0.0028605135157704353,
|
|
"fcm_dpo/delta": -0.06291086226701736,
|
|
"fcm_dpo/margin": 123.89741516113281,
|
|
"fcm_dpo/q_t": 0.4174633324146271,
|
|
"grad_norm": 24.750608444213867,
|
|
"learning_rate": 4.057290731287531e-07,
|
|
"logits/chosen": -0.4355185627937317,
|
|
"logits/rejected": -0.4130878448486328,
|
|
"logps/chosen": -249.43032836914062,
|
|
"logps/ref_chosen": -67.26228332519531,
|
|
"logps/ref_rejected": -87.64010620117188,
|
|
"logps/rejected": -393.70556640625,
|
|
"loss": 1.1342,
|
|
"margin_dpo/margin_mean": 123.89741516113281,
|
|
"margin_dpo/margin_std": 185.7630615234375,
|
|
"step": 245
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -177.64511108398438,
|
|
"KL/mean": -240.8006134033203,
|
|
"KL/rejected_KL_mean": -303.9561462402344,
|
|
"KL/std": 180.6883544921875,
|
|
"epoch": 0.36123348017621143,
|
|
"fcm_dpo/beta": 0.0028710057958960533,
|
|
"fcm_dpo/delta": 0.03875650092959404,
|
|
"fcm_dpo/margin": 126.31101989746094,
|
|
"fcm_dpo/q_t": 0.4186369776725769,
|
|
"grad_norm": 21.476974487304688,
|
|
"learning_rate": 4.047230911780736e-07,
|
|
"logits/chosen": -0.45945310592651367,
|
|
"logits/rejected": -0.42501145601272583,
|
|
"logps/chosen": -244.34207153320312,
|
|
"logps/ref_chosen": -66.69696807861328,
|
|
"logps/ref_rejected": -84.34634399414062,
|
|
"logps/rejected": -388.302490234375,
|
|
"loss": 1.1283,
|
|
"margin_dpo/margin_mean": 126.31101989746094,
|
|
"margin_dpo/margin_std": 195.61752319335938,
|
|
"step": 246
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -199.306884765625,
|
|
"KL/mean": -292.37335205078125,
|
|
"KL/rejected_KL_mean": -385.4398193359375,
|
|
"KL/std": 187.60211181640625,
|
|
"epoch": 0.36270190895741555,
|
|
"fcm_dpo/beta": 0.002821533940732479,
|
|
"fcm_dpo/delta": -0.13242369890213013,
|
|
"fcm_dpo/margin": 186.1329345703125,
|
|
"fcm_dpo/q_t": 0.3798220753669739,
|
|
"grad_norm": 30.762731552124023,
|
|
"learning_rate": 4.0371303213004814e-07,
|
|
"logits/chosen": -0.3767867386341095,
|
|
"logits/rejected": -0.37550073862075806,
|
|
"logps/chosen": -255.9122314453125,
|
|
"logps/ref_chosen": -56.6053466796875,
|
|
"logps/ref_rejected": -106.29326629638672,
|
|
"logps/rejected": -491.73309326171875,
|
|
"loss": 1.002,
|
|
"margin_dpo/margin_mean": 186.1329345703125,
|
|
"margin_dpo/margin_std": 199.94444274902344,
|
|
"step": 247
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -168.62863159179688,
|
|
"KL/mean": -249.12957763671875,
|
|
"KL/rejected_KL_mean": -329.6305236816406,
|
|
"KL/std": 152.61151123046875,
|
|
"epoch": 0.3641703377386197,
|
|
"fcm_dpo/beta": 0.002776243956759572,
|
|
"fcm_dpo/delta": -0.04965835064649582,
|
|
"fcm_dpo/margin": 161.00189208984375,
|
|
"fcm_dpo/q_t": 0.3941725492477417,
|
|
"grad_norm": 29.54060935974121,
|
|
"learning_rate": 4.0269892260067197e-07,
|
|
"logits/chosen": -0.39201515913009644,
|
|
"logits/rejected": -0.411907821893692,
|
|
"logps/chosen": -212.67184448242188,
|
|
"logps/ref_chosen": -44.043216705322266,
|
|
"logps/ref_rejected": -91.85687255859375,
|
|
"logps/rejected": -421.4873962402344,
|
|
"loss": 1.0304,
|
|
"margin_dpo/margin_mean": 161.00189208984375,
|
|
"margin_dpo/margin_std": 149.979248046875,
|
|
"step": 248
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -237.52285766601562,
|
|
"KL/mean": -280.86602783203125,
|
|
"KL/rejected_KL_mean": -324.20916748046875,
|
|
"KL/std": 169.23114013671875,
|
|
"epoch": 0.3656387665198238,
|
|
"fcm_dpo/beta": 0.002819925779476762,
|
|
"fcm_dpo/delta": 0.07187280803918839,
|
|
"fcm_dpo/margin": 86.68631744384766,
|
|
"fcm_dpo/q_t": 0.44473278522491455,
|
|
"grad_norm": 31.686059951782227,
|
|
"learning_rate": 4.0168078931267426e-07,
|
|
"logits/chosen": -0.4031098484992981,
|
|
"logits/rejected": -0.3796127140522003,
|
|
"logps/chosen": -299.9652099609375,
|
|
"logps/ref_chosen": -62.442352294921875,
|
|
"logps/ref_rejected": -80.46806335449219,
|
|
"logps/rejected": -404.6772155761719,
|
|
"loss": 1.2357,
|
|
"margin_dpo/margin_mean": 86.68630981445312,
|
|
"margin_dpo/margin_std": 204.52139282226562,
|
|
"step": 249
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -216.50027465820312,
|
|
"KL/mean": -296.10235595703125,
|
|
"KL/rejected_KL_mean": -375.7043762207031,
|
|
"KL/std": 172.25009155273438,
|
|
"epoch": 0.3671071953010279,
|
|
"fcm_dpo/beta": 0.002813429571688175,
|
|
"fcm_dpo/delta": -0.050442732870578766,
|
|
"fcm_dpo/margin": 159.2041015625,
|
|
"fcm_dpo/q_t": 0.396010160446167,
|
|
"grad_norm": 39.18155288696289,
|
|
"learning_rate": 4.006586590948141e-07,
|
|
"logits/chosen": -0.4070359468460083,
|
|
"logits/rejected": -0.35379326343536377,
|
|
"logps/chosen": -282.136962890625,
|
|
"logps/ref_chosen": -65.63668823242188,
|
|
"logps/ref_rejected": -73.87184143066406,
|
|
"logps/rejected": -449.57623291015625,
|
|
"loss": 1.048,
|
|
"margin_dpo/margin_mean": 159.2041015625,
|
|
"margin_dpo/margin_std": 176.7471923828125,
|
|
"step": 250
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -232.60723876953125,
|
|
"KL/mean": -285.5682678222656,
|
|
"KL/rejected_KL_mean": -338.529296875,
|
|
"KL/std": 180.64505004882812,
|
|
"epoch": 0.368575624082232,
|
|
"fcm_dpo/beta": 0.002828112803399563,
|
|
"fcm_dpo/delta": 0.10367438197135925,
|
|
"fcm_dpo/margin": 105.92207336425781,
|
|
"fcm_dpo/q_t": 0.4322122633457184,
|
|
"grad_norm": 36.757362365722656,
|
|
"learning_rate": 3.9963255888117325e-07,
|
|
"logits/chosen": -0.4116126298904419,
|
|
"logits/rejected": -0.37989452481269836,
|
|
"logps/chosen": -289.7899475097656,
|
|
"logps/ref_chosen": -57.182716369628906,
|
|
"logps/ref_rejected": -77.66343688964844,
|
|
"logps/rejected": -416.1927490234375,
|
|
"loss": 1.1861,
|
|
"margin_dpo/margin_mean": 105.92207336425781,
|
|
"margin_dpo/margin_std": 196.50746154785156,
|
|
"step": 251
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -235.21749877929688,
|
|
"KL/mean": -305.7660217285156,
|
|
"KL/rejected_KL_mean": -376.3145446777344,
|
|
"KL/std": 156.46197509765625,
|
|
"epoch": 0.3700440528634361,
|
|
"fcm_dpo/beta": 0.0028432621620595455,
|
|
"fcm_dpo/delta": -0.0013277605175971985,
|
|
"fcm_dpo/margin": 141.09707641601562,
|
|
"fcm_dpo/q_t": 0.4044671356678009,
|
|
"grad_norm": 32.75727081298828,
|
|
"learning_rate": 3.9860251571044666e-07,
|
|
"logits/chosen": -0.4887322187423706,
|
|
"logits/rejected": -0.4495335817337036,
|
|
"logps/chosen": -306.90313720703125,
|
|
"logps/ref_chosen": -71.68563842773438,
|
|
"logps/ref_rejected": -84.75799560546875,
|
|
"logps/rejected": -461.0725402832031,
|
|
"loss": 1.0723,
|
|
"margin_dpo/margin_mean": 141.0970916748047,
|
|
"margin_dpo/margin_std": 154.25804138183594,
|
|
"step": 252
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -212.88052368164062,
|
|
"KL/mean": -288.3155517578125,
|
|
"KL/rejected_KL_mean": -363.75054931640625,
|
|
"KL/std": 172.37203979492188,
|
|
"epoch": 0.37151248164464024,
|
|
"fcm_dpo/beta": 0.002854670397937298,
|
|
"fcm_dpo/delta": -0.03276565670967102,
|
|
"fcm_dpo/margin": 150.87002563476562,
|
|
"fcm_dpo/q_t": 0.40049469470977783,
|
|
"grad_norm": 30.010822296142578,
|
|
"learning_rate": 3.9756855672522986e-07,
|
|
"logits/chosen": -0.4635479152202606,
|
|
"logits/rejected": -0.4555034637451172,
|
|
"logps/chosen": -282.01446533203125,
|
|
"logps/ref_chosen": -69.1339340209961,
|
|
"logps/ref_rejected": -98.70252990722656,
|
|
"logps/rejected": -462.4530944824219,
|
|
"loss": 1.0725,
|
|
"margin_dpo/margin_mean": 150.87002563476562,
|
|
"margin_dpo/margin_std": 187.1278533935547,
|
|
"step": 253
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -218.92982482910156,
|
|
"KL/mean": -283.40130615234375,
|
|
"KL/rejected_KL_mean": -347.8727111816406,
|
|
"KL/std": 189.0523223876953,
|
|
"epoch": 0.37298091042584436,
|
|
"fcm_dpo/beta": 0.0028438782319426537,
|
|
"fcm_dpo/delta": 0.03458229452371597,
|
|
"fcm_dpo/margin": 128.94290161132812,
|
|
"fcm_dpo/q_t": 0.42061156034469604,
|
|
"grad_norm": 27.448017120361328,
|
|
"learning_rate": 3.965307091713037e-07,
|
|
"logits/chosen": -0.42898088693618774,
|
|
"logits/rejected": -0.412333220243454,
|
|
"logps/chosen": -273.0848388671875,
|
|
"logps/ref_chosen": -54.154998779296875,
|
|
"logps/ref_rejected": -90.30764770507812,
|
|
"logps/rejected": -438.18035888671875,
|
|
"loss": 1.1518,
|
|
"margin_dpo/margin_mean": 128.94290161132812,
|
|
"margin_dpo/margin_std": 234.24417114257812,
|
|
"step": 254
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -221.4287109375,
|
|
"KL/mean": -289.2665710449219,
|
|
"KL/rejected_KL_mean": -357.1044006347656,
|
|
"KL/std": 166.446533203125,
|
|
"epoch": 0.3744493392070485,
|
|
"fcm_dpo/beta": 0.0028422600589692593,
|
|
"fcm_dpo/delta": 0.014506950974464417,
|
|
"fcm_dpo/margin": 135.67572021484375,
|
|
"fcm_dpo/q_t": 0.4121626913547516,
|
|
"grad_norm": 25.36671257019043,
|
|
"learning_rate": 3.954890003969163e-07,
|
|
"logits/chosen": -0.39531874656677246,
|
|
"logits/rejected": -0.3849487900733948,
|
|
"logps/chosen": -278.57037353515625,
|
|
"logps/ref_chosen": -57.14167022705078,
|
|
"logps/ref_rejected": -90.2085952758789,
|
|
"logps/rejected": -447.31298828125,
|
|
"loss": 1.1249,
|
|
"margin_dpo/margin_mean": 135.67572021484375,
|
|
"margin_dpo/margin_std": 212.42404174804688,
|
|
"step": 255
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -182.2279052734375,
|
|
"KL/mean": -258.11773681640625,
|
|
"KL/rejected_KL_mean": -334.0075988769531,
|
|
"KL/std": 175.14785766601562,
|
|
"epoch": 0.37591776798825255,
|
|
"fcm_dpo/beta": 0.0028450002428144217,
|
|
"fcm_dpo/delta": -0.03324428200721741,
|
|
"fcm_dpo/margin": 151.77967834472656,
|
|
"fcm_dpo/q_t": 0.40111541748046875,
|
|
"grad_norm": 22.90846061706543,
|
|
"learning_rate": 3.944434578520628e-07,
|
|
"logits/chosen": -0.37317514419555664,
|
|
"logits/rejected": -0.38296449184417725,
|
|
"logps/chosen": -237.39138793945312,
|
|
"logps/ref_chosen": -55.163490295410156,
|
|
"logps/ref_rejected": -92.56291961669922,
|
|
"logps/rejected": -426.57049560546875,
|
|
"loss": 1.0753,
|
|
"margin_dpo/margin_mean": 151.77967834472656,
|
|
"margin_dpo/margin_std": 200.09576416015625,
|
|
"step": 256
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -171.60227966308594,
|
|
"KL/mean": -247.82455444335938,
|
|
"KL/rejected_KL_mean": -324.04681396484375,
|
|
"KL/std": 171.75497436523438,
|
|
"epoch": 0.37738619676945667,
|
|
"fcm_dpo/beta": 0.0028111585415899754,
|
|
"fcm_dpo/delta": -0.03062574565410614,
|
|
"fcm_dpo/margin": 152.44454956054688,
|
|
"fcm_dpo/q_t": 0.40255630016326904,
|
|
"grad_norm": 23.42417335510254,
|
|
"learning_rate": 3.933941090877615e-07,
|
|
"logits/chosen": -0.37235432863235474,
|
|
"logits/rejected": -0.3574731945991516,
|
|
"logps/chosen": -221.02597045898438,
|
|
"logps/ref_chosen": -49.42369842529297,
|
|
"logps/ref_rejected": -79.53791809082031,
|
|
"logps/rejected": -403.5847473144531,
|
|
"loss": 1.079,
|
|
"margin_dpo/margin_mean": 152.4445343017578,
|
|
"margin_dpo/margin_std": 199.14427185058594,
|
|
"step": 257
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -208.58660888671875,
|
|
"KL/mean": -283.39227294921875,
|
|
"KL/rejected_KL_mean": -358.19793701171875,
|
|
"KL/std": 177.68453979492188,
|
|
"epoch": 0.3788546255506608,
|
|
"fcm_dpo/beta": 0.0028177620843052864,
|
|
"fcm_dpo/delta": -0.022539909929037094,
|
|
"fcm_dpo/margin": 149.61135864257812,
|
|
"fcm_dpo/q_t": 0.40266942977905273,
|
|
"grad_norm": 21.441373825073242,
|
|
"learning_rate": 3.923409817553284e-07,
|
|
"logits/chosen": -0.38592028617858887,
|
|
"logits/rejected": -0.38714897632598877,
|
|
"logps/chosen": -267.9707336425781,
|
|
"logps/ref_chosen": -59.384124755859375,
|
|
"logps/ref_rejected": -95.99010467529297,
|
|
"logps/rejected": -454.18804931640625,
|
|
"loss": 1.0923,
|
|
"margin_dpo/margin_mean": 149.61135864257812,
|
|
"margin_dpo/margin_std": 213.09808349609375,
|
|
"step": 258
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -190.80014038085938,
|
|
"KL/mean": -251.88589477539062,
|
|
"KL/rejected_KL_mean": -312.9716491699219,
|
|
"KL/std": 164.8191375732422,
|
|
"epoch": 0.3803230543318649,
|
|
"fcm_dpo/beta": 0.0028243116103112698,
|
|
"fcm_dpo/delta": 0.056878622621297836,
|
|
"fcm_dpo/margin": 122.1714859008789,
|
|
"fcm_dpo/q_t": 0.4213777184486389,
|
|
"grad_norm": 20.104839324951172,
|
|
"learning_rate": 3.9128410360564793e-07,
|
|
"logits/chosen": -0.43218350410461426,
|
|
"logits/rejected": -0.43397071957588196,
|
|
"logps/chosen": -243.62847900390625,
|
|
"logps/ref_chosen": -52.828346252441406,
|
|
"logps/ref_rejected": -89.191650390625,
|
|
"logps/rejected": -402.1632995605469,
|
|
"loss": 1.1409,
|
|
"margin_dpo/margin_mean": 122.17149353027344,
|
|
"margin_dpo/margin_std": 192.09332275390625,
|
|
"step": 259
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -179.60235595703125,
|
|
"KL/mean": -260.7071838378906,
|
|
"KL/rejected_KL_mean": -341.81207275390625,
|
|
"KL/std": 161.13522338867188,
|
|
"epoch": 0.38179148311306904,
|
|
"fcm_dpo/beta": 0.0028252771589905024,
|
|
"fcm_dpo/delta": -0.06110315024852753,
|
|
"fcm_dpo/margin": 162.20968627929688,
|
|
"fcm_dpo/q_t": 0.39523473381996155,
|
|
"grad_norm": 28.360965728759766,
|
|
"learning_rate": 3.9022350248844246e-07,
|
|
"logits/chosen": -0.3964114189147949,
|
|
"logits/rejected": -0.41389453411102295,
|
|
"logps/chosen": -227.02001953125,
|
|
"logps/ref_chosen": -47.41767501831055,
|
|
"logps/ref_rejected": -95.08978271484375,
|
|
"logps/rejected": -436.90185546875,
|
|
"loss": 1.0368,
|
|
"margin_dpo/margin_mean": 162.20970153808594,
|
|
"margin_dpo/margin_std": 178.4059295654297,
|
|
"step": 260
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -177.91055297851562,
|
|
"KL/mean": -253.4554443359375,
|
|
"KL/rejected_KL_mean": -329.0003356933594,
|
|
"KL/std": 176.36572265625,
|
|
"epoch": 0.3832599118942731,
|
|
"fcm_dpo/beta": 0.0027923104353249073,
|
|
"fcm_dpo/delta": -0.022975105792284012,
|
|
"fcm_dpo/margin": 151.08978271484375,
|
|
"fcm_dpo/q_t": 0.404338002204895,
|
|
"grad_norm": 19.33860969543457,
|
|
"learning_rate": 3.891592063515376e-07,
|
|
"logits/chosen": -0.344787061214447,
|
|
"logits/rejected": -0.3448353409767151,
|
|
"logps/chosen": -230.94192504882812,
|
|
"logps/ref_chosen": -53.03137969970703,
|
|
"logps/ref_rejected": -88.51494598388672,
|
|
"logps/rejected": -417.5152893066406,
|
|
"loss": 1.0807,
|
|
"margin_dpo/margin_mean": 151.08978271484375,
|
|
"margin_dpo/margin_std": 202.36895751953125,
|
|
"step": 261
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -209.18212890625,
|
|
"KL/mean": -271.7855224609375,
|
|
"KL/rejected_KL_mean": -334.3888854980469,
|
|
"KL/std": 161.00527954101562,
|
|
"epoch": 0.38472834067547723,
|
|
"fcm_dpo/beta": 0.002820716006681323,
|
|
"fcm_dpo/delta": 0.04834378883242607,
|
|
"fcm_dpo/margin": 125.2067642211914,
|
|
"fcm_dpo/q_t": 0.41728508472442627,
|
|
"grad_norm": 22.37173843383789,
|
|
"learning_rate": 3.880912432401264e-07,
|
|
"logits/chosen": -0.37253305315971375,
|
|
"logits/rejected": -0.349983811378479,
|
|
"logps/chosen": -268.8022766113281,
|
|
"logps/ref_chosen": -59.620140075683594,
|
|
"logps/ref_rejected": -86.41853332519531,
|
|
"logps/rejected": -420.80743408203125,
|
|
"loss": 1.1113,
|
|
"margin_dpo/margin_mean": 125.2067642211914,
|
|
"margin_dpo/margin_std": 159.09869384765625,
|
|
"step": 262
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -196.1129150390625,
|
|
"KL/mean": -283.8154296875,
|
|
"KL/rejected_KL_mean": -371.51788330078125,
|
|
"KL/std": 189.73565673828125,
|
|
"epoch": 0.38619676945668135,
|
|
"fcm_dpo/beta": 0.0027740350924432278,
|
|
"fcm_dpo/delta": -0.09155195951461792,
|
|
"fcm_dpo/margin": 175.40493774414062,
|
|
"fcm_dpo/q_t": 0.3883536458015442,
|
|
"grad_norm": 20.149484634399414,
|
|
"learning_rate": 3.870196412960302e-07,
|
|
"logits/chosen": -0.38992154598236084,
|
|
"logits/rejected": -0.36442500352859497,
|
|
"logps/chosen": -255.53387451171875,
|
|
"logps/ref_chosen": -59.42094421386719,
|
|
"logps/ref_rejected": -96.85720825195312,
|
|
"logps/rejected": -468.3750915527344,
|
|
"loss": 1.0312,
|
|
"margin_dpo/margin_mean": 175.40493774414062,
|
|
"margin_dpo/margin_std": 203.16934204101562,
|
|
"step": 263
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -215.1387939453125,
|
|
"KL/mean": -291.82952880859375,
|
|
"KL/rejected_KL_mean": -368.520263671875,
|
|
"KL/std": 174.2871856689453,
|
|
"epoch": 0.3876651982378855,
|
|
"fcm_dpo/beta": 0.0027424870058894157,
|
|
"fcm_dpo/delta": -0.022643636912107468,
|
|
"fcm_dpo/margin": 153.38145446777344,
|
|
"fcm_dpo/q_t": 0.4036809206008911,
|
|
"grad_norm": 21.269851684570312,
|
|
"learning_rate": 3.8594442875695665e-07,
|
|
"logits/chosen": -0.4510270953178406,
|
|
"logits/rejected": -0.44331079721450806,
|
|
"logps/chosen": -277.86090087890625,
|
|
"logps/ref_chosen": -62.722084045410156,
|
|
"logps/ref_rejected": -93.85620880126953,
|
|
"logps/rejected": -462.37646484375,
|
|
"loss": 1.0828,
|
|
"margin_dpo/margin_mean": 153.38145446777344,
|
|
"margin_dpo/margin_std": 199.32289123535156,
|
|
"step": 264
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -248.67318725585938,
|
|
"KL/mean": -317.910888671875,
|
|
"KL/rejected_KL_mean": -387.1485900878906,
|
|
"KL/std": 200.890380859375,
|
|
"epoch": 0.3891336270190896,
|
|
"fcm_dpo/beta": 0.0027675144374370575,
|
|
"fcm_dpo/delta": 0.01740371063351631,
|
|
"fcm_dpo/margin": 138.47540283203125,
|
|
"fcm_dpo/q_t": 0.4150318503379822,
|
|
"grad_norm": 26.573469161987305,
|
|
"learning_rate": 3.848656339557562e-07,
|
|
"logits/chosen": -0.3928653597831726,
|
|
"logits/rejected": -0.37691766023635864,
|
|
"logps/chosen": -310.6446533203125,
|
|
"logps/ref_chosen": -61.971466064453125,
|
|
"logps/ref_rejected": -88.02059936523438,
|
|
"logps/rejected": -475.169189453125,
|
|
"loss": 1.1456,
|
|
"margin_dpo/margin_mean": 138.47540283203125,
|
|
"margin_dpo/margin_std": 247.60830688476562,
|
|
"step": 265
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -252.51040649414062,
|
|
"KL/mean": -309.49322509765625,
|
|
"KL/rejected_KL_mean": -366.4759826660156,
|
|
"KL/std": 169.7954864501953,
|
|
"epoch": 0.39060205580029367,
|
|
"fcm_dpo/beta": 0.0028024273924529552,
|
|
"fcm_dpo/delta": 0.08327002823352814,
|
|
"fcm_dpo/margin": 113.96560668945312,
|
|
"fcm_dpo/q_t": 0.4282206892967224,
|
|
"grad_norm": 47.3087272644043,
|
|
"learning_rate": 3.8378328531967507e-07,
|
|
"logits/chosen": -0.4444202184677124,
|
|
"logits/rejected": -0.40275368094444275,
|
|
"logps/chosen": -319.6100769042969,
|
|
"logps/ref_chosen": -67.09967041015625,
|
|
"logps/ref_rejected": -67.97122192382812,
|
|
"logps/rejected": -434.44720458984375,
|
|
"loss": 1.1671,
|
|
"margin_dpo/margin_mean": 113.96561431884766,
|
|
"margin_dpo/margin_std": 202.73809814453125,
|
|
"step": 266
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -230.05838012695312,
|
|
"KL/mean": -302.7050476074219,
|
|
"KL/rejected_KL_mean": -375.3517150878906,
|
|
"KL/std": 186.0106964111328,
|
|
"epoch": 0.3920704845814978,
|
|
"fcm_dpo/beta": 0.002801567316055298,
|
|
"fcm_dpo/delta": -0.007594583556056023,
|
|
"fcm_dpo/margin": 145.2933349609375,
|
|
"fcm_dpo/q_t": 0.4081183075904846,
|
|
"grad_norm": 42.9015007019043,
|
|
"learning_rate": 3.8269741136960646e-07,
|
|
"logits/chosen": -0.45946192741394043,
|
|
"logits/rejected": -0.4249149560928345,
|
|
"logps/chosen": -299.02911376953125,
|
|
"logps/ref_chosen": -68.97075653076172,
|
|
"logps/ref_rejected": -90.16844940185547,
|
|
"logps/rejected": -465.5201416015625,
|
|
"loss": 1.1036,
|
|
"margin_dpo/margin_mean": 145.2933349609375,
|
|
"margin_dpo/margin_std": 216.03964233398438,
|
|
"step": 267
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -240.4680938720703,
|
|
"KL/mean": -314.457763671875,
|
|
"KL/rejected_KL_mean": -388.44744873046875,
|
|
"KL/std": 178.60980224609375,
|
|
"epoch": 0.3935389133627019,
|
|
"fcm_dpo/beta": 0.002796788001433015,
|
|
"fcm_dpo/delta": -0.014621859416365623,
|
|
"fcm_dpo/margin": 147.9793243408203,
|
|
"fcm_dpo/q_t": 0.40683937072753906,
|
|
"grad_norm": 40.570281982421875,
|
|
"learning_rate": 3.8160804071933894e-07,
|
|
"logits/chosen": -0.4454725980758667,
|
|
"logits/rejected": -0.4506447911262512,
|
|
"logps/chosen": -296.368408203125,
|
|
"logps/ref_chosen": -55.90031051635742,
|
|
"logps/ref_rejected": -101.64763641357422,
|
|
"logps/rejected": -490.0950927734375,
|
|
"loss": 1.0956,
|
|
"margin_dpo/margin_mean": 147.9793243408203,
|
|
"margin_dpo/margin_std": 214.23509216308594,
|
|
"step": 268
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -267.2579345703125,
|
|
"KL/mean": -353.1320495605469,
|
|
"KL/rejected_KL_mean": -439.00616455078125,
|
|
"KL/std": 187.59320068359375,
|
|
"epoch": 0.39500734214390604,
|
|
"fcm_dpo/beta": 0.0027706455439329147,
|
|
"fcm_dpo/delta": -0.07959494739770889,
|
|
"fcm_dpo/margin": 171.74826049804688,
|
|
"fcm_dpo/q_t": 0.39228904247283936,
|
|
"grad_norm": 46.9615364074707,
|
|
"learning_rate": 3.8051520207480204e-07,
|
|
"logits/chosen": -0.4752381443977356,
|
|
"logits/rejected": -0.4565969407558441,
|
|
"logps/chosen": -337.2974853515625,
|
|
"logps/ref_chosen": -70.03955841064453,
|
|
"logps/ref_rejected": -107.34937286376953,
|
|
"logps/rejected": -546.3555297851562,
|
|
"loss": 1.0666,
|
|
"margin_dpo/margin_mean": 171.74826049804688,
|
|
"margin_dpo/margin_std": 238.62860107421875,
|
|
"step": 269
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -247.1910400390625,
|
|
"KL/mean": -316.6631774902344,
|
|
"KL/rejected_KL_mean": -386.13531494140625,
|
|
"KL/std": 175.74114990234375,
|
|
"epoch": 0.3964757709251101,
|
|
"fcm_dpo/beta": 0.0027725521940737963,
|
|
"fcm_dpo/delta": 0.015092555433511734,
|
|
"fcm_dpo/margin": 138.9442901611328,
|
|
"fcm_dpo/q_t": 0.41288816928863525,
|
|
"grad_norm": 37.03840255737305,
|
|
"learning_rate": 3.794189242333106e-07,
|
|
"logits/chosen": -0.49404847621917725,
|
|
"logits/rejected": -0.48589587211608887,
|
|
"logps/chosen": -316.7245178222656,
|
|
"logps/ref_chosen": -69.53347778320312,
|
|
"logps/ref_rejected": -109.92864990234375,
|
|
"logps/rejected": -496.06396484375,
|
|
"loss": 1.1284,
|
|
"margin_dpo/margin_mean": 138.94430541992188,
|
|
"margin_dpo/margin_std": 224.98004150390625,
|
|
"step": 270
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -232.8036346435547,
|
|
"KL/mean": -320.9297790527344,
|
|
"KL/rejected_KL_mean": -409.055908203125,
|
|
"KL/std": 184.5279541015625,
|
|
"epoch": 0.39794419970631423,
|
|
"fcm_dpo/beta": 0.002722542965784669,
|
|
"fcm_dpo/delta": -0.08434660732746124,
|
|
"fcm_dpo/margin": 176.25228881835938,
|
|
"fcm_dpo/q_t": 0.3910220265388489,
|
|
"grad_norm": 28.625146865844727,
|
|
"learning_rate": 3.7831923608280514e-07,
|
|
"logits/chosen": -0.412489652633667,
|
|
"logits/rejected": -0.39746084809303284,
|
|
"logps/chosen": -289.5682067871094,
|
|
"logps/ref_chosen": -56.76456832885742,
|
|
"logps/ref_rejected": -92.51383972167969,
|
|
"logps/rejected": -501.56976318359375,
|
|
"loss": 1.036,
|
|
"margin_dpo/margin_mean": 176.25230407714844,
|
|
"margin_dpo/margin_std": 207.7333526611328,
|
|
"step": 271
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -228.11370849609375,
|
|
"KL/mean": -338.98406982421875,
|
|
"KL/rejected_KL_mean": -449.85443115234375,
|
|
"KL/std": 208.9947052001953,
|
|
"epoch": 0.39941262848751835,
|
|
"fcm_dpo/beta": 0.002650283742696047,
|
|
"fcm_dpo/delta": -0.1989383101463318,
|
|
"fcm_dpo/margin": 221.74072265625,
|
|
"fcm_dpo/q_t": 0.3656819462776184,
|
|
"grad_norm": 28.886754989624023,
|
|
"learning_rate": 3.772161666010912e-07,
|
|
"logits/chosen": -0.3776736557483673,
|
|
"logits/rejected": -0.39134037494659424,
|
|
"logps/chosen": -277.6108703613281,
|
|
"logps/ref_chosen": -49.497154235839844,
|
|
"logps/ref_rejected": -105.54279327392578,
|
|
"logps/rejected": -555.397216796875,
|
|
"loss": 0.9588,
|
|
"margin_dpo/margin_mean": 221.74072265625,
|
|
"margin_dpo/margin_std": 217.42919921875,
|
|
"step": 272
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -259.71624755859375,
|
|
"KL/mean": -362.0477294921875,
|
|
"KL/rejected_KL_mean": -464.37921142578125,
|
|
"KL/std": 212.40911865234375,
|
|
"epoch": 0.4008810572687225,
|
|
"fcm_dpo/beta": 0.0025578399654477835,
|
|
"fcm_dpo/delta": -0.130665123462677,
|
|
"fcm_dpo/margin": 204.66293334960938,
|
|
"fcm_dpo/q_t": 0.38167691230773926,
|
|
"grad_norm": 22.654264450073242,
|
|
"learning_rate": 3.761097448550755e-07,
|
|
"logits/chosen": -0.3508095145225525,
|
|
"logits/rejected": -0.33492064476013184,
|
|
"logps/chosen": -322.691650390625,
|
|
"logps/ref_chosen": -62.97539520263672,
|
|
"logps/ref_rejected": -92.49858093261719,
|
|
"logps/rejected": -556.8778076171875,
|
|
"loss": 1.0197,
|
|
"margin_dpo/margin_mean": 204.6629638671875,
|
|
"margin_dpo/margin_std": 246.62765502929688,
|
|
"step": 273
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -288.61114501953125,
|
|
"KL/mean": -363.21929931640625,
|
|
"KL/rejected_KL_mean": -437.827392578125,
|
|
"KL/std": 188.80880737304688,
|
|
"epoch": 0.4023494860499266,
|
|
"fcm_dpo/beta": 0.002562238136306405,
|
|
"fcm_dpo/delta": 0.018024658784270287,
|
|
"fcm_dpo/margin": 149.21621704101562,
|
|
"fcm_dpo/q_t": 0.4136476218700409,
|
|
"grad_norm": 26.790353775024414,
|
|
"learning_rate": 3.75e-07,
|
|
"logits/chosen": -0.31436455249786377,
|
|
"logits/rejected": -0.299676775932312,
|
|
"logps/chosen": -344.27886962890625,
|
|
"logps/ref_chosen": -55.66770935058594,
|
|
"logps/ref_rejected": -77.33308410644531,
|
|
"logps/rejected": -515.1604614257812,
|
|
"loss": 1.1192,
|
|
"margin_dpo/margin_mean": 149.21621704101562,
|
|
"margin_dpo/margin_std": 225.90634155273438,
|
|
"step": 274
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -218.06088256835938,
|
|
"KL/mean": -301.337890625,
|
|
"KL/rejected_KL_mean": -384.61492919921875,
|
|
"KL/std": 183.7950439453125,
|
|
"epoch": 0.40381791483113066,
|
|
"fcm_dpo/beta": 0.0025526927784085274,
|
|
"fcm_dpo/delta": -0.026383230462670326,
|
|
"fcm_dpo/margin": 166.55404663085938,
|
|
"fcm_dpo/q_t": 0.40220552682876587,
|
|
"grad_norm": 23.10381507873535,
|
|
"learning_rate": 3.738869612786737e-07,
|
|
"logits/chosen": -0.3793821334838867,
|
|
"logits/rejected": -0.38762110471725464,
|
|
"logps/chosen": -266.65557861328125,
|
|
"logps/ref_chosen": -48.594703674316406,
|
|
"logps/ref_rejected": -93.30369567871094,
|
|
"logps/rejected": -477.91864013671875,
|
|
"loss": 1.0756,
|
|
"margin_dpo/margin_mean": 166.5540313720703,
|
|
"margin_dpo/margin_std": 212.90811157226562,
|
|
"step": 275
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -227.9523468017578,
|
|
"KL/mean": -303.5640869140625,
|
|
"KL/rejected_KL_mean": -379.1758117675781,
|
|
"KL/std": 192.03842163085938,
|
|
"epoch": 0.4052863436123348,
|
|
"fcm_dpo/beta": 0.00254382798448205,
|
|
"fcm_dpo/delta": 0.015904389321804047,
|
|
"fcm_dpo/margin": 151.22344970703125,
|
|
"fcm_dpo/q_t": 0.4147305190563202,
|
|
"grad_norm": 25.779754638671875,
|
|
"learning_rate": 3.7277065802070204e-07,
|
|
"logits/chosen": -0.369443416595459,
|
|
"logits/rejected": -0.34958142042160034,
|
|
"logps/chosen": -284.5297546386719,
|
|
"logps/ref_chosen": -56.57740783691406,
|
|
"logps/ref_rejected": -70.36566925048828,
|
|
"logps/rejected": -449.5414733886719,
|
|
"loss": 1.1294,
|
|
"margin_dpo/margin_mean": 151.22344970703125,
|
|
"margin_dpo/margin_std": 247.93980407714844,
|
|
"step": 276
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -234.90208435058594,
|
|
"KL/mean": -317.98382568359375,
|
|
"KL/rejected_KL_mean": -401.0655517578125,
|
|
"KL/std": 185.35687255859375,
|
|
"epoch": 0.4067547723935389,
|
|
"fcm_dpo/beta": 0.002545831026509404,
|
|
"fcm_dpo/delta": -0.024078505113720894,
|
|
"fcm_dpo/margin": 166.16348266601562,
|
|
"fcm_dpo/q_t": 0.4027344584465027,
|
|
"grad_norm": 28.24149513244629,
|
|
"learning_rate": 3.71651119641714e-07,
|
|
"logits/chosen": -0.3797181248664856,
|
|
"logits/rejected": -0.36499595642089844,
|
|
"logps/chosen": -291.17364501953125,
|
|
"logps/ref_chosen": -56.27156066894531,
|
|
"logps/ref_rejected": -92.88127136230469,
|
|
"logps/rejected": -493.94683837890625,
|
|
"loss": 1.0789,
|
|
"margin_dpo/margin_mean": 166.16348266601562,
|
|
"margin_dpo/margin_std": 216.53277587890625,
|
|
"step": 277
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -200.89450073242188,
|
|
"KL/mean": -295.7635803222656,
|
|
"KL/rejected_KL_mean": -390.6326904296875,
|
|
"KL/std": 192.54470825195312,
|
|
"epoch": 0.40822320117474303,
|
|
"fcm_dpo/beta": 0.0024995177518576384,
|
|
"fcm_dpo/delta": -0.07845177501440048,
|
|
"fcm_dpo/margin": 189.73814392089844,
|
|
"fcm_dpo/q_t": 0.3918403387069702,
|
|
"grad_norm": 28.204530715942383,
|
|
"learning_rate": 3.705283756425872e-07,
|
|
"logits/chosen": -0.3621584475040436,
|
|
"logits/rejected": -0.36707815527915955,
|
|
"logps/chosen": -253.83644104003906,
|
|
"logps/ref_chosen": -52.94194030761719,
|
|
"logps/ref_rejected": -91.25357818603516,
|
|
"logps/rejected": -481.8862609863281,
|
|
"loss": 1.0374,
|
|
"margin_dpo/margin_mean": 189.73814392089844,
|
|
"margin_dpo/margin_std": 220.7503662109375,
|
|
"step": 278
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -234.5529022216797,
|
|
"KL/mean": -331.0247802734375,
|
|
"KL/rejected_KL_mean": -427.4967041015625,
|
|
"KL/std": 201.0699462890625,
|
|
"epoch": 0.40969162995594716,
|
|
"fcm_dpo/beta": 0.002455736044794321,
|
|
"fcm_dpo/delta": -0.07850091904401779,
|
|
"fcm_dpo/margin": 192.94378662109375,
|
|
"fcm_dpo/q_t": 0.3932103216648102,
|
|
"grad_norm": 28.575786590576172,
|
|
"learning_rate": 3.6940245560867e-07,
|
|
"logits/chosen": -0.30353468656539917,
|
|
"logits/rejected": -0.3015468418598175,
|
|
"logps/chosen": -283.1942138671875,
|
|
"logps/ref_chosen": -48.641319274902344,
|
|
"logps/ref_rejected": -87.8514404296875,
|
|
"logps/rejected": -515.34814453125,
|
|
"loss": 1.0546,
|
|
"margin_dpo/margin_mean": 192.94378662109375,
|
|
"margin_dpo/margin_std": 249.06761169433594,
|
|
"step": 279
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -236.78497314453125,
|
|
"KL/mean": -333.14849853515625,
|
|
"KL/rejected_KL_mean": -429.51202392578125,
|
|
"KL/std": 184.6090087890625,
|
|
"epoch": 0.4111600587371512,
|
|
"fcm_dpo/beta": 0.002430729568004608,
|
|
"fcm_dpo/delta": -0.07191157341003418,
|
|
"fcm_dpo/margin": 192.72705078125,
|
|
"fcm_dpo/q_t": 0.39016252756118774,
|
|
"grad_norm": 25.460493087768555,
|
|
"learning_rate": 3.6827338920900253e-07,
|
|
"logits/chosen": -0.33522510528564453,
|
|
"logits/rejected": -0.34023696184158325,
|
|
"logps/chosen": -295.58209228515625,
|
|
"logps/ref_chosen": -58.797122955322266,
|
|
"logps/ref_rejected": -98.61885070800781,
|
|
"logps/rejected": -528.130859375,
|
|
"loss": 1.0293,
|
|
"margin_dpo/margin_mean": 192.72706604003906,
|
|
"margin_dpo/margin_std": 203.84701538085938,
|
|
"step": 280
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -226.8874969482422,
|
|
"KL/mean": -314.1214599609375,
|
|
"KL/rejected_KL_mean": -401.35540771484375,
|
|
"KL/std": 185.2099151611328,
|
|
"epoch": 0.41262848751835535,
|
|
"fcm_dpo/beta": 0.002424264792352915,
|
|
"fcm_dpo/delta": -0.024164361879229546,
|
|
"fcm_dpo/margin": 174.46792602539062,
|
|
"fcm_dpo/q_t": 0.3996432423591614,
|
|
"grad_norm": 25.438335418701172,
|
|
"learning_rate": 3.6714120619553435e-07,
|
|
"logits/chosen": -0.3693084716796875,
|
|
"logits/rejected": -0.34565502405166626,
|
|
"logps/chosen": -282.3760070800781,
|
|
"logps/ref_chosen": -55.488521575927734,
|
|
"logps/ref_rejected": -80.88258361816406,
|
|
"logps/rejected": -482.23797607421875,
|
|
"loss": 1.0649,
|
|
"margin_dpo/margin_mean": 174.46792602539062,
|
|
"margin_dpo/margin_std": 200.58334350585938,
|
|
"step": 281
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -254.75025939941406,
|
|
"KL/mean": -321.8013610839844,
|
|
"KL/rejected_KL_mean": -388.85247802734375,
|
|
"KL/std": 179.27874755859375,
|
|
"epoch": 0.41409691629955947,
|
|
"fcm_dpo/beta": 0.0024102902971208096,
|
|
"fcm_dpo/delta": -0.00805886834859848,
|
|
"fcm_dpo/margin": 134.1021728515625,
|
|
"fcm_dpo/q_t": 0.42627450823783875,
|
|
"grad_norm": 21.817731857299805,
|
|
"learning_rate": 3.660059364023408e-07,
|
|
"logits/chosen": -0.4306085705757141,
|
|
"logits/rejected": -0.4117346405982971,
|
|
"logps/chosen": -327.8204040527344,
|
|
"logps/ref_chosen": -73.07014465332031,
|
|
"logps/ref_rejected": -95.35098266601562,
|
|
"logps/rejected": -484.20343017578125,
|
|
"loss": 1.1493,
|
|
"margin_dpo/margin_mean": 134.1021728515625,
|
|
"margin_dpo/margin_std": 210.12014770507812,
|
|
"step": 282
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -255.66551208496094,
|
|
"KL/mean": -355.66534423828125,
|
|
"KL/rejected_KL_mean": -455.6651611328125,
|
|
"KL/std": 223.33087158203125,
|
|
"epoch": 0.4155653450807636,
|
|
"fcm_dpo/beta": 0.002389241009950638,
|
|
"fcm_dpo/delta": -0.08176899701356888,
|
|
"fcm_dpo/margin": 199.99966430664062,
|
|
"fcm_dpo/q_t": 0.3880097270011902,
|
|
"grad_norm": 38.63431167602539,
|
|
"learning_rate": 3.6486760974483685e-07,
|
|
"logits/chosen": -0.38895383477211,
|
|
"logits/rejected": -0.3880043029785156,
|
|
"logps/chosen": -317.56396484375,
|
|
"logps/ref_chosen": -61.89844512939453,
|
|
"logps/ref_rejected": -96.98655700683594,
|
|
"logps/rejected": -552.6517333984375,
|
|
"loss": 1.025,
|
|
"margin_dpo/margin_mean": 199.99964904785156,
|
|
"margin_dpo/margin_std": 207.48794555664062,
|
|
"step": 283
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -252.18701171875,
|
|
"KL/mean": -347.4981689453125,
|
|
"KL/rejected_KL_mean": -442.809326171875,
|
|
"KL/std": 203.94338989257812,
|
|
"epoch": 0.4170337738619677,
|
|
"fcm_dpo/beta": 0.00234918761998415,
|
|
"fcm_dpo/delta": -0.050124749541282654,
|
|
"fcm_dpo/margin": 190.62234497070312,
|
|
"fcm_dpo/q_t": 0.3954547643661499,
|
|
"grad_norm": 31.127235412597656,
|
|
"learning_rate": 3.6372625621898863e-07,
|
|
"logits/chosen": -0.4325849413871765,
|
|
"logits/rejected": -0.4140619933605194,
|
|
"logps/chosen": -310.62255859375,
|
|
"logps/ref_chosen": -58.4355354309082,
|
|
"logps/ref_rejected": -93.46926879882812,
|
|
"logps/rejected": -536.278564453125,
|
|
"loss": 1.0387,
|
|
"margin_dpo/margin_mean": 190.62232971191406,
|
|
"margin_dpo/margin_std": 199.58392333984375,
|
|
"step": 284
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -275.643310546875,
|
|
"KL/mean": -361.4195556640625,
|
|
"KL/rejected_KL_mean": -447.19573974609375,
|
|
"KL/std": 183.0438232421875,
|
|
"epoch": 0.4185022026431718,
|
|
"fcm_dpo/beta": 0.0023565019946545362,
|
|
"fcm_dpo/delta": -0.005000069737434387,
|
|
"fcm_dpo/margin": 171.5524444580078,
|
|
"fcm_dpo/q_t": 0.40418434143066406,
|
|
"grad_norm": 24.421079635620117,
|
|
"learning_rate": 3.625819059005228e-07,
|
|
"logits/chosen": -0.40086328983306885,
|
|
"logits/rejected": -0.38890522718429565,
|
|
"logps/chosen": -341.8755187988281,
|
|
"logps/ref_chosen": -66.23219299316406,
|
|
"logps/ref_rejected": -99.1268310546875,
|
|
"logps/rejected": -546.3225708007812,
|
|
"loss": 1.0734,
|
|
"margin_dpo/margin_mean": 171.5524444580078,
|
|
"margin_dpo/margin_std": 191.03448486328125,
|
|
"step": 285
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -279.2388000488281,
|
|
"KL/mean": -373.7999572753906,
|
|
"KL/rejected_KL_mean": -468.3611145019531,
|
|
"KL/std": 205.68328857421875,
|
|
"epoch": 0.4199706314243759,
|
|
"fcm_dpo/beta": 0.002331117633730173,
|
|
"fcm_dpo/delta": -0.042728979140520096,
|
|
"fcm_dpo/margin": 189.12229919433594,
|
|
"fcm_dpo/q_t": 0.397432416677475,
|
|
"grad_norm": 21.488588333129883,
|
|
"learning_rate": 3.614345889441346e-07,
|
|
"logits/chosen": -0.39442330598831177,
|
|
"logits/rejected": -0.37771064043045044,
|
|
"logps/chosen": -352.1898193359375,
|
|
"logps/ref_chosen": -72.95100402832031,
|
|
"logps/ref_rejected": -88.58845520019531,
|
|
"logps/rejected": -556.9495849609375,
|
|
"loss": 1.0568,
|
|
"margin_dpo/margin_mean": 189.12228393554688,
|
|
"margin_dpo/margin_std": 223.15341186523438,
|
|
"step": 286
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -267.8426513671875,
|
|
"KL/mean": -340.4630126953125,
|
|
"KL/rejected_KL_mean": -413.0833740234375,
|
|
"KL/std": 175.79714965820312,
|
|
"epoch": 0.42143906020558003,
|
|
"fcm_dpo/beta": 0.0023477966897189617,
|
|
"fcm_dpo/delta": 0.060965895652770996,
|
|
"fcm_dpo/margin": 145.24075317382812,
|
|
"fcm_dpo/q_t": 0.42086952924728394,
|
|
"grad_norm": 31.77181625366211,
|
|
"learning_rate": 3.6028433558269275e-07,
|
|
"logits/chosen": -0.3862367272377014,
|
|
"logits/rejected": -0.3612860441207886,
|
|
"logps/chosen": -329.3837890625,
|
|
"logps/ref_chosen": -61.54115295410156,
|
|
"logps/ref_rejected": -77.69607543945312,
|
|
"logps/rejected": -490.7794494628906,
|
|
"loss": 1.1282,
|
|
"margin_dpo/margin_mean": 145.24075317382812,
|
|
"margin_dpo/margin_std": 204.16952514648438,
|
|
"step": 287
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -258.47845458984375,
|
|
"KL/mean": -353.82647705078125,
|
|
"KL/rejected_KL_mean": -449.17449951171875,
|
|
"KL/std": 181.2964630126953,
|
|
"epoch": 0.42290748898678415,
|
|
"fcm_dpo/beta": 0.0023200467694550753,
|
|
"fcm_dpo/delta": -0.04565563425421715,
|
|
"fcm_dpo/margin": 190.696044921875,
|
|
"fcm_dpo/q_t": 0.39606454968452454,
|
|
"grad_norm": 21.244749069213867,
|
|
"learning_rate": 3.5913117612644327e-07,
|
|
"logits/chosen": -0.4210980534553528,
|
|
"logits/rejected": -0.4073001742362976,
|
|
"logps/chosen": -315.1396484375,
|
|
"logps/ref_chosen": -56.661224365234375,
|
|
"logps/ref_rejected": -87.33570098876953,
|
|
"logps/rejected": -536.5101928710938,
|
|
"loss": 1.0438,
|
|
"margin_dpo/margin_mean": 190.696044921875,
|
|
"margin_dpo/margin_std": 194.03762817382812,
|
|
"step": 288
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -238.14132690429688,
|
|
"KL/mean": -345.27081298828125,
|
|
"KL/rejected_KL_mean": -452.4002380371094,
|
|
"KL/std": 204.56678771972656,
|
|
"epoch": 0.4243759177679883,
|
|
"fcm_dpo/beta": 0.0023045637644827366,
|
|
"fcm_dpo/delta": -0.09854762256145477,
|
|
"fcm_dpo/margin": 214.25892639160156,
|
|
"fcm_dpo/q_t": 0.38599973917007446,
|
|
"grad_norm": 25.482728958129883,
|
|
"learning_rate": 3.5797514096221024e-07,
|
|
"logits/chosen": -0.34231024980545044,
|
|
"logits/rejected": -0.3436782956123352,
|
|
"logps/chosen": -283.3717346191406,
|
|
"logps/ref_chosen": -45.23039245605469,
|
|
"logps/ref_rejected": -87.64266967773438,
|
|
"logps/rejected": -540.0429077148438,
|
|
"loss": 1.0151,
|
|
"margin_dpo/margin_mean": 214.25892639160156,
|
|
"margin_dpo/margin_std": 223.70123291015625,
|
|
"step": 289
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -261.7184143066406,
|
|
"KL/mean": -369.31634521484375,
|
|
"KL/rejected_KL_mean": -476.91424560546875,
|
|
"KL/std": 220.097900390625,
|
|
"epoch": 0.42584434654919234,
|
|
"fcm_dpo/beta": 0.002251718658953905,
|
|
"fcm_dpo/delta": -0.08903662860393524,
|
|
"fcm_dpo/margin": 215.1958465576172,
|
|
"fcm_dpo/q_t": 0.39020198583602905,
|
|
"grad_norm": 21.68331527709961,
|
|
"learning_rate": 3.568162605525952e-07,
|
|
"logits/chosen": -0.3297966718673706,
|
|
"logits/rejected": -0.35255828499794006,
|
|
"logps/chosen": -317.1899108886719,
|
|
"logps/ref_chosen": -55.47149658203125,
|
|
"logps/ref_rejected": -116.70857238769531,
|
|
"logps/rejected": -593.622802734375,
|
|
"loss": 1.0375,
|
|
"margin_dpo/margin_mean": 215.1958465576172,
|
|
"margin_dpo/margin_std": 261.9568786621094,
|
|
"step": 290
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -228.8701171875,
|
|
"KL/mean": -324.2679138183594,
|
|
"KL/rejected_KL_mean": -419.6656799316406,
|
|
"KL/std": 185.85340881347656,
|
|
"epoch": 0.42731277533039647,
|
|
"fcm_dpo/beta": 0.002243693685159087,
|
|
"fcm_dpo/delta": -0.02954481914639473,
|
|
"fcm_dpo/margin": 190.7955780029297,
|
|
"fcm_dpo/q_t": 0.400044322013855,
|
|
"grad_norm": 27.926626205444336,
|
|
"learning_rate": 3.5565456543517485e-07,
|
|
"logits/chosen": -0.3716287612915039,
|
|
"logits/rejected": -0.3628491163253784,
|
|
"logps/chosen": -292.1304931640625,
|
|
"logps/ref_chosen": -63.26036834716797,
|
|
"logps/ref_rejected": -89.29708862304688,
|
|
"logps/rejected": -508.9627685546875,
|
|
"loss": 1.0652,
|
|
"margin_dpo/margin_mean": 190.79556274414062,
|
|
"margin_dpo/margin_std": 225.35430908203125,
|
|
"step": 291
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -256.63824462890625,
|
|
"KL/mean": -359.156494140625,
|
|
"KL/rejected_KL_mean": -461.67474365234375,
|
|
"KL/std": 220.2518768310547,
|
|
"epoch": 0.4287812041116006,
|
|
"fcm_dpo/beta": 0.002220253925770521,
|
|
"fcm_dpo/delta": -0.057921458035707474,
|
|
"fcm_dpo/margin": 205.0364990234375,
|
|
"fcm_dpo/q_t": 0.39462825655937195,
|
|
"grad_norm": 22.149768829345703,
|
|
"learning_rate": 3.5449008622169583e-07,
|
|
"logits/chosen": -0.381770521402359,
|
|
"logits/rejected": -0.3677070736885071,
|
|
"logps/chosen": -310.5567626953125,
|
|
"logps/ref_chosen": -53.91852951049805,
|
|
"logps/ref_rejected": -89.96138000488281,
|
|
"logps/rejected": -551.6361083984375,
|
|
"loss": 1.0614,
|
|
"margin_dpo/margin_mean": 205.0364990234375,
|
|
"margin_dpo/margin_std": 262.11480712890625,
|
|
"step": 292
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -260.9930419921875,
|
|
"KL/mean": -341.6339416503906,
|
|
"KL/rejected_KL_mean": -422.27484130859375,
|
|
"KL/std": 220.97372436523438,
|
|
"epoch": 0.4302496328928047,
|
|
"fcm_dpo/beta": 0.002223607152700424,
|
|
"fcm_dpo/delta": 0.04278174415230751,
|
|
"fcm_dpo/margin": 161.2817840576172,
|
|
"fcm_dpo/q_t": 0.4189508557319641,
|
|
"grad_norm": 25.350297927856445,
|
|
"learning_rate": 3.5332285359726846e-07,
|
|
"logits/chosen": -0.37213313579559326,
|
|
"logits/rejected": -0.3637707829475403,
|
|
"logps/chosen": -321.36907958984375,
|
|
"logps/ref_chosen": -60.376033782958984,
|
|
"logps/ref_rejected": -77.85244750976562,
|
|
"logps/rejected": -500.12725830078125,
|
|
"loss": 1.1311,
|
|
"margin_dpo/margin_mean": 161.2817840576172,
|
|
"margin_dpo/margin_std": 250.83364868164062,
|
|
"step": 293
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -240.05978393554688,
|
|
"KL/mean": -329.448974609375,
|
|
"KL/rejected_KL_mean": -418.83819580078125,
|
|
"KL/std": 193.92105102539062,
|
|
"epoch": 0.43171806167400884,
|
|
"fcm_dpo/beta": 0.002221715170890093,
|
|
"fcm_dpo/delta": 0.002862263470888138,
|
|
"fcm_dpo/margin": 178.77838134765625,
|
|
"fcm_dpo/q_t": 0.4095104932785034,
|
|
"grad_norm": 21.2161808013916,
|
|
"learning_rate": 3.5215289831955786e-07,
|
|
"logits/chosen": -0.4091721773147583,
|
|
"logits/rejected": -0.4162572920322418,
|
|
"logps/chosen": -288.1473083496094,
|
|
"logps/ref_chosen": -48.0875358581543,
|
|
"logps/ref_rejected": -81.89698791503906,
|
|
"logps/rejected": -500.73516845703125,
|
|
"loss": 1.0956,
|
|
"margin_dpo/margin_mean": 178.77838134765625,
|
|
"margin_dpo/margin_std": 243.22738647460938,
|
|
"step": 294
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -289.834228515625,
|
|
"KL/mean": -390.12548828125,
|
|
"KL/rejected_KL_mean": -490.416748046875,
|
|
"KL/std": 226.3201904296875,
|
|
"epoch": 0.4331864904552129,
|
|
"fcm_dpo/beta": 0.0022136676125228405,
|
|
"fcm_dpo/delta": -0.04604334011673927,
|
|
"fcm_dpo/margin": 200.58251953125,
|
|
"fcm_dpo/q_t": 0.39920759201049805,
|
|
"grad_norm": 28.84412384033203,
|
|
"learning_rate": 3.509802512179737e-07,
|
|
"logits/chosen": -0.4074261784553528,
|
|
"logits/rejected": -0.409212589263916,
|
|
"logps/chosen": -339.7589111328125,
|
|
"logps/ref_chosen": -49.92467498779297,
|
|
"logps/ref_rejected": -87.45632934570312,
|
|
"logps/rejected": -577.873046875,
|
|
"loss": 1.0697,
|
|
"margin_dpo/margin_mean": 200.58251953125,
|
|
"margin_dpo/margin_std": 263.1807861328125,
|
|
"step": 295
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -358.89825439453125,
|
|
"KL/mean": -427.7105712890625,
|
|
"KL/rejected_KL_mean": -496.5228576660156,
|
|
"KL/std": 208.72183227539062,
|
|
"epoch": 0.434654919236417,
|
|
"fcm_dpo/beta": 0.002201622352004051,
|
|
"fcm_dpo/delta": -0.008969346061348915,
|
|
"fcm_dpo/margin": 137.62461853027344,
|
|
"fcm_dpo/q_t": 0.4300941824913025,
|
|
"grad_norm": 32.84729766845703,
|
|
"learning_rate": 3.498049431928577e-07,
|
|
"logits/chosen": -0.4741077721118927,
|
|
"logits/rejected": -0.45637625455856323,
|
|
"logps/chosen": -424.3894958496094,
|
|
"logps/ref_chosen": -65.49124145507812,
|
|
"logps/ref_rejected": -93.08908081054688,
|
|
"logps/rejected": -589.6119384765625,
|
|
"loss": 1.1886,
|
|
"margin_dpo/margin_mean": 137.62461853027344,
|
|
"margin_dpo/margin_std": 264.7969665527344,
|
|
"step": 296
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -307.435546875,
|
|
"KL/mean": -389.3873291015625,
|
|
"KL/rejected_KL_mean": -471.3391418457031,
|
|
"KL/std": 198.08912658691406,
|
|
"epoch": 0.43612334801762115,
|
|
"fcm_dpo/beta": 0.002210353035479784,
|
|
"fcm_dpo/delta": 0.03913431614637375,
|
|
"fcm_dpo/margin": 163.90359497070312,
|
|
"fcm_dpo/q_t": 0.4156332015991211,
|
|
"grad_norm": 34.864501953125,
|
|
"learning_rate": 3.486270052146694e-07,
|
|
"logits/chosen": -0.4226665794849396,
|
|
"logits/rejected": -0.42391157150268555,
|
|
"logps/chosen": -363.9125061035156,
|
|
"logps/ref_chosen": -56.476951599121094,
|
|
"logps/ref_rejected": -95.1385498046875,
|
|
"logps/rejected": -566.4776611328125,
|
|
"loss": 1.1084,
|
|
"margin_dpo/margin_mean": 163.90359497070312,
|
|
"margin_dpo/margin_std": 211.23016357421875,
|
|
"step": 297
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -320.65106201171875,
|
|
"KL/mean": -428.41290283203125,
|
|
"KL/rejected_KL_mean": -536.1747436523438,
|
|
"KL/std": 268.907470703125,
|
|
"epoch": 0.43759177679882527,
|
|
"fcm_dpo/beta": 0.002200640505179763,
|
|
"fcm_dpo/delta": -0.07795488834381104,
|
|
"fcm_dpo/margin": 215.52365112304688,
|
|
"fcm_dpo/q_t": 0.3964436948299408,
|
|
"grad_norm": 34.56257247924805,
|
|
"learning_rate": 3.474464683231698e-07,
|
|
"logits/chosen": -0.4743010401725769,
|
|
"logits/rejected": -0.49710047245025635,
|
|
"logps/chosen": -387.97625732421875,
|
|
"logps/ref_chosen": -67.32516479492188,
|
|
"logps/ref_rejected": -116.66217041015625,
|
|
"logps/rejected": -652.8369140625,
|
|
"loss": 1.0729,
|
|
"margin_dpo/margin_mean": 215.52365112304688,
|
|
"margin_dpo/margin_std": 318.0645751953125,
|
|
"step": 298
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -266.99639892578125,
|
|
"KL/mean": -358.8790283203125,
|
|
"KL/rejected_KL_mean": -450.76171875,
|
|
"KL/std": 197.55548095703125,
|
|
"epoch": 0.4390602055800294,
|
|
"fcm_dpo/beta": 0.00219709612429142,
|
|
"fcm_dpo/delta": -0.004710428416728973,
|
|
"fcm_dpo/margin": 183.76527404785156,
|
|
"fcm_dpo/q_t": 0.40845823287963867,
|
|
"grad_norm": 38.42890930175781,
|
|
"learning_rate": 3.462633636266041e-07,
|
|
"logits/chosen": -0.42935478687286377,
|
|
"logits/rejected": -0.4295846223831177,
|
|
"logps/chosen": -315.95849609375,
|
|
"logps/ref_chosen": -48.96209716796875,
|
|
"logps/ref_rejected": -84.32823944091797,
|
|
"logps/rejected": -535.0899658203125,
|
|
"loss": 1.0958,
|
|
"margin_dpo/margin_mean": 183.76528930664062,
|
|
"margin_dpo/margin_std": 249.51800537109375,
|
|
"step": 299
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -330.11163330078125,
|
|
"KL/mean": -438.03948974609375,
|
|
"KL/rejected_KL_mean": -545.9674072265625,
|
|
"KL/std": 232.30531311035156,
|
|
"epoch": 0.44052863436123346,
|
|
"fcm_dpo/beta": 0.0021653189323842525,
|
|
"fcm_dpo/delta": -0.07065486907958984,
|
|
"fcm_dpo/margin": 215.855712890625,
|
|
"fcm_dpo/q_t": 0.3930676281452179,
|
|
"grad_norm": 31.024282455444336,
|
|
"learning_rate": 3.4507772230088147e-07,
|
|
"logits/chosen": -0.4451923668384552,
|
|
"logits/rejected": -0.4505726099014282,
|
|
"logps/chosen": -389.18536376953125,
|
|
"logps/ref_chosen": -59.07371139526367,
|
|
"logps/ref_rejected": -95.9664535522461,
|
|
"logps/rejected": -641.933837890625,
|
|
"loss": 1.0619,
|
|
"margin_dpo/margin_mean": 215.855712890625,
|
|
"margin_dpo/margin_std": 283.98614501953125,
|
|
"step": 300
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -276.162353515625,
|
|
"KL/mean": -384.3056640625,
|
|
"KL/rejected_KL_mean": -492.4489440917969,
|
|
"KL/std": 221.1756591796875,
|
|
"epoch": 0.4419970631424376,
|
|
"fcm_dpo/beta": 0.002126394771039486,
|
|
"fcm_dpo/delta": -0.0630912333726883,
|
|
"fcm_dpo/margin": 216.28659057617188,
|
|
"fcm_dpo/q_t": 0.3957647681236267,
|
|
"grad_norm": 25.04326629638672,
|
|
"learning_rate": 3.4388957558875316e-07,
|
|
"logits/chosen": -0.45365726947784424,
|
|
"logits/rejected": -0.45016711950302124,
|
|
"logps/chosen": -333.4117431640625,
|
|
"logps/ref_chosen": -57.249366760253906,
|
|
"logps/ref_rejected": -92.35354614257812,
|
|
"logps/rejected": -584.802490234375,
|
|
"loss": 1.048,
|
|
"margin_dpo/margin_mean": 216.28659057617188,
|
|
"margin_dpo/margin_std": 261.11767578125,
|
|
"step": 301
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -260.8786315917969,
|
|
"KL/mean": -353.64752197265625,
|
|
"KL/rejected_KL_mean": -446.4163818359375,
|
|
"KL/std": 209.54000854492188,
|
|
"epoch": 0.4434654919236417,
|
|
"fcm_dpo/beta": 0.002119125798344612,
|
|
"fcm_dpo/delta": 0.006838139146566391,
|
|
"fcm_dpo/margin": 185.53775024414062,
|
|
"fcm_dpo/q_t": 0.41012802720069885,
|
|
"grad_norm": 35.94118118286133,
|
|
"learning_rate": 3.426989547989902e-07,
|
|
"logits/chosen": -0.4112318456172943,
|
|
"logits/rejected": -0.41728314757347107,
|
|
"logps/chosen": -312.0766296386719,
|
|
"logps/ref_chosen": -51.197994232177734,
|
|
"logps/ref_rejected": -97.22636413574219,
|
|
"logps/rejected": -543.6427612304688,
|
|
"loss": 1.095,
|
|
"margin_dpo/margin_mean": 185.53775024414062,
|
|
"margin_dpo/margin_std": 245.56341552734375,
|
|
"step": 302
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -266.37298583984375,
|
|
"KL/mean": -351.4562683105469,
|
|
"KL/rejected_KL_mean": -436.53955078125,
|
|
"KL/std": 212.11721801757812,
|
|
"epoch": 0.44493392070484583,
|
|
"fcm_dpo/beta": 0.0021344092674553394,
|
|
"fcm_dpo/delta": 0.03814225643873215,
|
|
"fcm_dpo/margin": 170.1665496826172,
|
|
"fcm_dpo/q_t": 0.4166523218154907,
|
|
"grad_norm": 29.625904083251953,
|
|
"learning_rate": 3.4150589130555773e-07,
|
|
"logits/chosen": -0.4059848487377167,
|
|
"logits/rejected": -0.3904208242893219,
|
|
"logps/chosen": -333.0869445800781,
|
|
"logps/ref_chosen": -66.71394348144531,
|
|
"logps/ref_rejected": -86.94542694091797,
|
|
"logps/rejected": -523.4849853515625,
|
|
"loss": 1.1261,
|
|
"margin_dpo/margin_mean": 170.1665496826172,
|
|
"margin_dpo/margin_std": 254.2838897705078,
|
|
"step": 303
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -242.7084503173828,
|
|
"KL/mean": -338.2328796386719,
|
|
"KL/rejected_KL_mean": -433.75732421875,
|
|
"KL/std": 192.66885375976562,
|
|
"epoch": 0.44640234948604995,
|
|
"fcm_dpo/beta": 0.0021487209014594555,
|
|
"fcm_dpo/delta": -0.011212758719921112,
|
|
"fcm_dpo/margin": 191.04885864257812,
|
|
"fcm_dpo/q_t": 0.40207067131996155,
|
|
"grad_norm": 24.7471981048584,
|
|
"learning_rate": 3.403104165467883e-07,
|
|
"logits/chosen": -0.44146013259887695,
|
|
"logits/rejected": -0.4333987236022949,
|
|
"logps/chosen": -314.6591491699219,
|
|
"logps/ref_chosen": -71.95069885253906,
|
|
"logps/ref_rejected": -90.47203063964844,
|
|
"logps/rejected": -524.2293701171875,
|
|
"loss": 1.0533,
|
|
"margin_dpo/margin_mean": 191.04885864257812,
|
|
"margin_dpo/margin_std": 175.52703857421875,
|
|
"step": 304
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -261.6916809082031,
|
|
"KL/mean": -349.76568603515625,
|
|
"KL/rejected_KL_mean": -437.8396911621094,
|
|
"KL/std": 222.32864379882812,
|
|
"epoch": 0.447870778267254,
|
|
"fcm_dpo/beta": 0.0021360788960009813,
|
|
"fcm_dpo/delta": 0.02424509823322296,
|
|
"fcm_dpo/margin": 176.14801025390625,
|
|
"fcm_dpo/q_t": 0.41488200426101685,
|
|
"grad_norm": 25.216215133666992,
|
|
"learning_rate": 3.391125620245535e-07,
|
|
"logits/chosen": -0.4338444471359253,
|
|
"logits/rejected": -0.4185817837715149,
|
|
"logps/chosen": -328.4869079589844,
|
|
"logps/ref_chosen": -66.79523468017578,
|
|
"logps/ref_rejected": -92.75459289550781,
|
|
"logps/rejected": -530.5942993164062,
|
|
"loss": 1.1129,
|
|
"margin_dpo/margin_mean": 176.1479949951172,
|
|
"margin_dpo/margin_std": 249.10455322265625,
|
|
"step": 305
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -267.495361328125,
|
|
"KL/mean": -352.1854248046875,
|
|
"KL/rejected_KL_mean": -436.8755798339844,
|
|
"KL/std": 201.3372802734375,
|
|
"epoch": 0.44933920704845814,
|
|
"fcm_dpo/beta": 0.0021633305586874485,
|
|
"fcm_dpo/delta": 0.03478704392910004,
|
|
"fcm_dpo/margin": 169.38018798828125,
|
|
"fcm_dpo/q_t": 0.415255069732666,
|
|
"grad_norm": 25.18739891052246,
|
|
"learning_rate": 3.3791235930343417e-07,
|
|
"logits/chosen": -0.40468522906303406,
|
|
"logits/rejected": -0.37944549322128296,
|
|
"logps/chosen": -337.17926025390625,
|
|
"logps/ref_chosen": -69.68389892578125,
|
|
"logps/ref_rejected": -85.15919494628906,
|
|
"logps/rejected": -522.0347900390625,
|
|
"loss": 1.1056,
|
|
"margin_dpo/margin_mean": 169.38018798828125,
|
|
"margin_dpo/margin_std": 220.4286651611328,
|
|
"step": 306
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -242.18780517578125,
|
|
"KL/mean": -331.582275390625,
|
|
"KL/rejected_KL_mean": -420.9767150878906,
|
|
"KL/std": 187.04690551757812,
|
|
"epoch": 0.45080763582966227,
|
|
"fcm_dpo/beta": 0.002166555728763342,
|
|
"fcm_dpo/delta": 0.01314252894371748,
|
|
"fcm_dpo/margin": 178.78890991210938,
|
|
"fcm_dpo/q_t": 0.4101484417915344,
|
|
"grad_norm": 25.99106788635254,
|
|
"learning_rate": 3.367098400098881e-07,
|
|
"logits/chosen": -0.4158622920513153,
|
|
"logits/rejected": -0.40115827322006226,
|
|
"logps/chosen": -312.35321044921875,
|
|
"logps/ref_chosen": -70.16542053222656,
|
|
"logps/ref_rejected": -86.97230529785156,
|
|
"logps/rejected": -507.94903564453125,
|
|
"loss": 1.0951,
|
|
"margin_dpo/margin_mean": 178.78890991210938,
|
|
"margin_dpo/margin_std": 231.8785400390625,
|
|
"step": 307
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -253.13291931152344,
|
|
"KL/mean": -351.1971740722656,
|
|
"KL/rejected_KL_mean": -449.2614440917969,
|
|
"KL/std": 208.60134887695312,
|
|
"epoch": 0.4522760646108664,
|
|
"fcm_dpo/beta": 0.0021638874895870686,
|
|
"fcm_dpo/delta": -0.02547409199178219,
|
|
"fcm_dpo/margin": 196.12857055664062,
|
|
"fcm_dpo/q_t": 0.39992159605026245,
|
|
"grad_norm": 31.040889739990234,
|
|
"learning_rate": 3.355050358314172e-07,
|
|
"logits/chosen": -0.35027140378952026,
|
|
"logits/rejected": -0.3394496440887451,
|
|
"logps/chosen": -308.3779296875,
|
|
"logps/ref_chosen": -55.2449951171875,
|
|
"logps/ref_rejected": -79.37226104736328,
|
|
"logps/rejected": -528.6337280273438,
|
|
"loss": 1.0528,
|
|
"margin_dpo/margin_mean": 196.12855529785156,
|
|
"margin_dpo/margin_std": 203.5819091796875,
|
|
"step": 308
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -259.13006591796875,
|
|
"KL/mean": -356.8582763671875,
|
|
"KL/rejected_KL_mean": -454.58648681640625,
|
|
"KL/std": 223.6021270751953,
|
|
"epoch": 0.45374449339207046,
|
|
"fcm_dpo/beta": 0.002161671407520771,
|
|
"fcm_dpo/delta": -0.02382533997297287,
|
|
"fcm_dpo/margin": 195.45639038085938,
|
|
"fcm_dpo/q_t": 0.40092766284942627,
|
|
"grad_norm": 28.08131217956543,
|
|
"learning_rate": 3.3429797851573183e-07,
|
|
"logits/chosen": -0.36709824204444885,
|
|
"logits/rejected": -0.3614857792854309,
|
|
"logps/chosen": -308.08917236328125,
|
|
"logps/ref_chosen": -48.959083557128906,
|
|
"logps/ref_rejected": -82.34072875976562,
|
|
"logps/rejected": -536.92724609375,
|
|
"loss": 1.0705,
|
|
"margin_dpo/margin_mean": 195.45639038085938,
|
|
"margin_dpo/margin_std": 234.07315063476562,
|
|
"step": 309
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -300.7269287109375,
|
|
"KL/mean": -390.6546630859375,
|
|
"KL/rejected_KL_mean": -480.58233642578125,
|
|
"KL/std": 196.88262939453125,
|
|
"epoch": 0.4552129221732746,
|
|
"fcm_dpo/beta": 0.0021615237928926945,
|
|
"fcm_dpo/delta": 0.011311601847410202,
|
|
"fcm_dpo/margin": 179.8553924560547,
|
|
"fcm_dpo/q_t": 0.40847909450531006,
|
|
"grad_norm": 24.29796600341797,
|
|
"learning_rate": 3.3308869986991487e-07,
|
|
"logits/chosen": -0.41836071014404297,
|
|
"logits/rejected": -0.4063529372215271,
|
|
"logps/chosen": -363.46875,
|
|
"logps/ref_chosen": -62.74177932739258,
|
|
"logps/ref_rejected": -79.9302978515625,
|
|
"logps/rejected": -560.5126342773438,
|
|
"loss": 1.0811,
|
|
"margin_dpo/margin_mean": 179.85537719726562,
|
|
"margin_dpo/margin_std": 198.85214233398438,
|
|
"step": 310
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -314.01068115234375,
|
|
"KL/mean": -412.86981201171875,
|
|
"KL/rejected_KL_mean": -511.7289123535156,
|
|
"KL/std": 246.66839599609375,
|
|
"epoch": 0.4566813509544787,
|
|
"fcm_dpo/beta": 0.0021447776816785336,
|
|
"fcm_dpo/delta": -0.025147156789898872,
|
|
"fcm_dpo/margin": 197.71823120117188,
|
|
"fcm_dpo/q_t": 0.40380626916885376,
|
|
"grad_norm": 25.550621032714844,
|
|
"learning_rate": 3.3187723175958346e-07,
|
|
"logits/chosen": -0.38108277320861816,
|
|
"logits/rejected": -0.356780469417572,
|
|
"logps/chosen": -367.0386657714844,
|
|
"logps/ref_chosen": -53.02798080444336,
|
|
"logps/ref_rejected": -77.43820190429688,
|
|
"logps/rejected": -589.1671142578125,
|
|
"loss": 1.0811,
|
|
"margin_dpo/margin_mean": 197.71823120117188,
|
|
"margin_dpo/margin_std": 265.0999450683594,
|
|
"step": 311
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -291.0233154296875,
|
|
"KL/mean": -382.343017578125,
|
|
"KL/rejected_KL_mean": -473.6627197265625,
|
|
"KL/std": 216.25013732910156,
|
|
"epoch": 0.4581497797356828,
|
|
"fcm_dpo/beta": 0.002153100911527872,
|
|
"fcm_dpo/delta": 0.00674719363451004,
|
|
"fcm_dpo/margin": 182.63937377929688,
|
|
"fcm_dpo/q_t": 0.4111559987068176,
|
|
"grad_norm": 22.847942352294922,
|
|
"learning_rate": 3.306636061080487e-07,
|
|
"logits/chosen": -0.38960134983062744,
|
|
"logits/rejected": -0.3816367983818054,
|
|
"logps/chosen": -340.41552734375,
|
|
"logps/ref_chosen": -49.39221954345703,
|
|
"logps/ref_rejected": -75.79280853271484,
|
|
"logps/rejected": -549.4555053710938,
|
|
"loss": 1.1007,
|
|
"margin_dpo/margin_mean": 182.63937377929688,
|
|
"margin_dpo/margin_std": 256.60113525390625,
|
|
"step": 312
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -274.72918701171875,
|
|
"KL/mean": -370.8074951171875,
|
|
"KL/rejected_KL_mean": -466.88580322265625,
|
|
"KL/std": 231.34361267089844,
|
|
"epoch": 0.45961820851688695,
|
|
"fcm_dpo/beta": 0.002157143084332347,
|
|
"fcm_dpo/delta": -0.016167178750038147,
|
|
"fcm_dpo/margin": 192.15664672851562,
|
|
"fcm_dpo/q_t": 0.4041683077812195,
|
|
"grad_norm": 31.167348861694336,
|
|
"learning_rate": 3.2944785489547537e-07,
|
|
"logits/chosen": -0.46091365814208984,
|
|
"logits/rejected": -0.4598471522331238,
|
|
"logps/chosen": -324.88189697265625,
|
|
"logps/ref_chosen": -50.152740478515625,
|
|
"logps/ref_rejected": -86.40620422363281,
|
|
"logps/rejected": -553.2919921875,
|
|
"loss": 1.0858,
|
|
"margin_dpo/margin_mean": 192.15664672851562,
|
|
"margin_dpo/margin_std": 247.33575439453125,
|
|
"step": 313
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -303.0827941894531,
|
|
"KL/mean": -394.4405517578125,
|
|
"KL/rejected_KL_mean": -485.79833984375,
|
|
"KL/std": 226.08700561523438,
|
|
"epoch": 0.461086637298091,
|
|
"fcm_dpo/beta": 0.0021345026325434446,
|
|
"fcm_dpo/delta": 0.010156366974115372,
|
|
"fcm_dpo/margin": 182.71548461914062,
|
|
"fcm_dpo/q_t": 0.4131431579589844,
|
|
"grad_norm": 20.98644256591797,
|
|
"learning_rate": 3.2823001015803857e-07,
|
|
"logits/chosen": -0.4531956911087036,
|
|
"logits/rejected": -0.45664170384407043,
|
|
"logps/chosen": -360.32037353515625,
|
|
"logps/ref_chosen": -57.237579345703125,
|
|
"logps/ref_rejected": -97.5965347290039,
|
|
"logps/rejected": -583.3948974609375,
|
|
"loss": 1.1244,
|
|
"margin_dpo/margin_mean": 182.71548461914062,
|
|
"margin_dpo/margin_std": 292.40838623046875,
|
|
"step": 314
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -276.57342529296875,
|
|
"KL/mean": -360.9225769042969,
|
|
"KL/rejected_KL_mean": -445.271728515625,
|
|
"KL/std": 206.61561584472656,
|
|
"epoch": 0.46255506607929514,
|
|
"fcm_dpo/beta": 0.002153775654733181,
|
|
"fcm_dpo/delta": 0.038061730563640594,
|
|
"fcm_dpo/margin": 168.69830322265625,
|
|
"fcm_dpo/q_t": 0.41625896096229553,
|
|
"grad_norm": 22.165285110473633,
|
|
"learning_rate": 3.270101039870797e-07,
|
|
"logits/chosen": -0.36769017577171326,
|
|
"logits/rejected": -0.37278226017951965,
|
|
"logps/chosen": -325.64300537109375,
|
|
"logps/ref_chosen": -49.06958770751953,
|
|
"logps/ref_rejected": -85.68087768554688,
|
|
"logps/rejected": -530.95263671875,
|
|
"loss": 1.1123,
|
|
"margin_dpo/margin_mean": 168.69830322265625,
|
|
"margin_dpo/margin_std": 228.2129364013672,
|
|
"step": 315
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -269.3196105957031,
|
|
"KL/mean": -389.4281921386719,
|
|
"KL/rejected_KL_mean": -509.5367736816406,
|
|
"KL/std": 240.0266571044922,
|
|
"epoch": 0.46402349486049926,
|
|
"fcm_dpo/beta": 0.0021110116504132748,
|
|
"fcm_dpo/delta": -0.11421408504247665,
|
|
"fcm_dpo/margin": 240.21713256835938,
|
|
"fcm_dpo/q_t": 0.38528791069984436,
|
|
"grad_norm": 26.478769302368164,
|
|
"learning_rate": 3.2578816852826086e-07,
|
|
"logits/chosen": -0.4204370975494385,
|
|
"logits/rejected": -0.42546719312667847,
|
|
"logps/chosen": -323.58038330078125,
|
|
"logps/ref_chosen": -54.26074981689453,
|
|
"logps/ref_rejected": -101.2814712524414,
|
|
"logps/rejected": -610.8182373046875,
|
|
"loss": 1.0169,
|
|
"margin_dpo/margin_mean": 240.21714782714844,
|
|
"margin_dpo/margin_std": 269.4544677734375,
|
|
"step": 316
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -275.29345703125,
|
|
"KL/mean": -406.8150939941406,
|
|
"KL/rejected_KL_mean": -538.336669921875,
|
|
"KL/std": 220.98388671875,
|
|
"epoch": 0.4654919236417034,
|
|
"fcm_dpo/beta": 0.0020721801556646824,
|
|
"fcm_dpo/delta": -0.153153657913208,
|
|
"fcm_dpo/margin": 263.04327392578125,
|
|
"fcm_dpo/q_t": 0.37356036901474,
|
|
"grad_norm": 29.60879898071289,
|
|
"learning_rate": 3.2456423598071783e-07,
|
|
"logits/chosen": -0.4123254120349884,
|
|
"logits/rejected": -0.4049742817878723,
|
|
"logps/chosen": -331.3876953125,
|
|
"logps/ref_chosen": -56.094207763671875,
|
|
"logps/ref_rejected": -100.69905090332031,
|
|
"logps/rejected": -639.0357666015625,
|
|
"loss": 0.9783,
|
|
"margin_dpo/margin_mean": 263.04327392578125,
|
|
"margin_dpo/margin_std": 248.7153778076172,
|
|
"step": 317
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -307.56475830078125,
|
|
"KL/mean": -399.586669921875,
|
|
"KL/rejected_KL_mean": -491.6085205078125,
|
|
"KL/std": 220.44793701171875,
|
|
"epoch": 0.4669603524229075,
|
|
"fcm_dpo/beta": 0.0020595774985849857,
|
|
"fcm_dpo/delta": 0.021566076204180717,
|
|
"fcm_dpo/margin": 184.04380798339844,
|
|
"fcm_dpo/q_t": 0.4124523401260376,
|
|
"grad_norm": 24.619739532470703,
|
|
"learning_rate": 3.233383385962115e-07,
|
|
"logits/chosen": -0.458835244178772,
|
|
"logits/rejected": -0.4265810549259186,
|
|
"logps/chosen": -372.21044921875,
|
|
"logps/ref_chosen": -64.64569854736328,
|
|
"logps/ref_rejected": -82.76425170898438,
|
|
"logps/rejected": -574.372802734375,
|
|
"loss": 1.1006,
|
|
"margin_dpo/margin_mean": 184.0438232421875,
|
|
"margin_dpo/margin_std": 241.61331176757812,
|
|
"step": 318
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -269.0120849609375,
|
|
"KL/mean": -385.52276611328125,
|
|
"KL/rejected_KL_mean": -502.0334777832031,
|
|
"KL/std": 247.28811645507812,
|
|
"epoch": 0.4684287812041116,
|
|
"fcm_dpo/beta": 0.0020323502831161022,
|
|
"fcm_dpo/delta": -0.07725685834884644,
|
|
"fcm_dpo/margin": 233.02137756347656,
|
|
"fcm_dpo/q_t": 0.3905225992202759,
|
|
"grad_norm": 23.96953010559082,
|
|
"learning_rate": 3.2211050867827805e-07,
|
|
"logits/chosen": -0.3532301187515259,
|
|
"logits/rejected": -0.3711482882499695,
|
|
"logps/chosen": -318.3958435058594,
|
|
"logps/ref_chosen": -49.383758544921875,
|
|
"logps/ref_rejected": -113.90650939941406,
|
|
"logps/rejected": -615.93994140625,
|
|
"loss": 1.035,
|
|
"margin_dpo/margin_mean": 233.0213623046875,
|
|
"margin_dpo/margin_std": 266.6269226074219,
|
|
"step": 319
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -284.35699462890625,
|
|
"KL/mean": -408.48895263671875,
|
|
"KL/rejected_KL_mean": -532.6209106445312,
|
|
"KL/std": 253.5668487548828,
|
|
"epoch": 0.4698972099853157,
|
|
"fcm_dpo/beta": 0.0019884873181581497,
|
|
"fcm_dpo/delta": -0.09909307956695557,
|
|
"fcm_dpo/margin": 248.263916015625,
|
|
"fcm_dpo/q_t": 0.3870677351951599,
|
|
"grad_norm": 23.290699005126953,
|
|
"learning_rate": 3.208807785813777e-07,
|
|
"logits/chosen": -0.41272127628326416,
|
|
"logits/rejected": -0.4184020161628723,
|
|
"logps/chosen": -343.86187744140625,
|
|
"logps/ref_chosen": -59.50489044189453,
|
|
"logps/ref_rejected": -97.66717529296875,
|
|
"logps/rejected": -630.2880859375,
|
|
"loss": 1.0227,
|
|
"margin_dpo/margin_mean": 248.263916015625,
|
|
"margin_dpo/margin_std": 279.31939697265625,
|
|
"step": 320
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -348.3397216796875,
|
|
"KL/mean": -455.6825256347656,
|
|
"KL/rejected_KL_mean": -563.0252685546875,
|
|
"KL/std": 267.32122802734375,
|
|
"epoch": 0.4713656387665198,
|
|
"fcm_dpo/beta": 0.0019679851830005646,
|
|
"fcm_dpo/delta": -0.024129830300807953,
|
|
"fcm_dpo/margin": 214.68557739257812,
|
|
"fcm_dpo/q_t": 0.4036102294921875,
|
|
"grad_norm": 23.15062713623047,
|
|
"learning_rate": 3.1964918071004217e-07,
|
|
"logits/chosen": -0.40483784675598145,
|
|
"logits/rejected": -0.3935700058937073,
|
|
"logps/chosen": -409.8883972167969,
|
|
"logps/ref_chosen": -61.548683166503906,
|
|
"logps/ref_rejected": -91.64103698730469,
|
|
"logps/rejected": -654.6663818359375,
|
|
"loss": 1.082,
|
|
"margin_dpo/margin_mean": 214.68557739257812,
|
|
"margin_dpo/margin_std": 282.3053283691406,
|
|
"step": 321
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -306.1932373046875,
|
|
"KL/mean": -422.8832702636719,
|
|
"KL/rejected_KL_mean": -539.5733642578125,
|
|
"KL/std": 236.69369506835938,
|
|
"epoch": 0.47283406754772395,
|
|
"fcm_dpo/beta": 0.0019517629407346249,
|
|
"fcm_dpo/delta": -0.05857790261507034,
|
|
"fcm_dpo/margin": 233.38006591796875,
|
|
"fcm_dpo/q_t": 0.39376458525657654,
|
|
"grad_norm": 24.584089279174805,
|
|
"learning_rate": 3.184157475180207e-07,
|
|
"logits/chosen": -0.3697229027748108,
|
|
"logits/rejected": -0.36216434836387634,
|
|
"logps/chosen": -363.4832763671875,
|
|
"logps/ref_chosen": -57.29003143310547,
|
|
"logps/ref_rejected": -95.74992370605469,
|
|
"logps/rejected": -635.3232421875,
|
|
"loss": 1.0394,
|
|
"margin_dpo/margin_mean": 233.3800811767578,
|
|
"margin_dpo/margin_std": 251.07766723632812,
|
|
"step": 322
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -326.294189453125,
|
|
"KL/mean": -422.7344970703125,
|
|
"KL/rejected_KL_mean": -519.1747436523438,
|
|
"KL/std": 234.59567260742188,
|
|
"epoch": 0.47430249632892807,
|
|
"fcm_dpo/beta": 0.0019555268809199333,
|
|
"fcm_dpo/delta": 0.02367909625172615,
|
|
"fcm_dpo/margin": 192.8805694580078,
|
|
"fcm_dpo/q_t": 0.41228896379470825,
|
|
"grad_norm": 30.523874282836914,
|
|
"learning_rate": 3.171805115074251e-07,
|
|
"logits/chosen": -0.3859459161758423,
|
|
"logits/rejected": -0.37858086824417114,
|
|
"logps/chosen": -377.5281677246094,
|
|
"logps/ref_chosen": -51.23395919799805,
|
|
"logps/ref_rejected": -75.06192016601562,
|
|
"logps/rejected": -594.2366943359375,
|
|
"loss": 1.1079,
|
|
"margin_dpo/margin_mean": 192.88055419921875,
|
|
"margin_dpo/margin_std": 258.48101806640625,
|
|
"step": 323
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -374.3531494140625,
|
|
"KL/mean": -466.55242919921875,
|
|
"KL/rejected_KL_mean": -558.751708984375,
|
|
"KL/std": 251.7170867919922,
|
|
"epoch": 0.47577092511013214,
|
|
"fcm_dpo/beta": 0.001949124038219452,
|
|
"fcm_dpo/delta": -0.06465040892362595,
|
|
"fcm_dpo/margin": 184.39852905273438,
|
|
"fcm_dpo/q_t": 0.41932082176208496,
|
|
"grad_norm": 36.11357498168945,
|
|
"learning_rate": 3.1594350522787295e-07,
|
|
"logits/chosen": -0.4411655068397522,
|
|
"logits/rejected": -0.42205148935317993,
|
|
"logps/chosen": -439.48834228515625,
|
|
"logps/ref_chosen": -65.13516998291016,
|
|
"logps/ref_rejected": -86.47750854492188,
|
|
"logps/rejected": -645.229248046875,
|
|
"loss": 1.1457,
|
|
"margin_dpo/margin_mean": 184.39854431152344,
|
|
"margin_dpo/margin_std": 294.26849365234375,
|
|
"step": 324
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -294.81378173828125,
|
|
"KL/mean": -377.78643798828125,
|
|
"KL/rejected_KL_mean": -460.75909423828125,
|
|
"KL/std": 227.09103393554688,
|
|
"epoch": 0.47723935389133626,
|
|
"fcm_dpo/beta": 0.0019613862968981266,
|
|
"fcm_dpo/delta": 0.07689196616411209,
|
|
"fcm_dpo/margin": 165.94529724121094,
|
|
"fcm_dpo/q_t": 0.42428165674209595,
|
|
"grad_norm": 28.9737548828125,
|
|
"learning_rate": 3.147047612756302e-07,
|
|
"logits/chosen": -0.4612424969673157,
|
|
"logits/rejected": -0.4356522858142853,
|
|
"logps/chosen": -351.0293884277344,
|
|
"logps/ref_chosen": -56.215599060058594,
|
|
"logps/ref_rejected": -70.08592987060547,
|
|
"logps/rejected": -530.844970703125,
|
|
"loss": 1.1393,
|
|
"margin_dpo/margin_mean": 165.9453125,
|
|
"margin_dpo/margin_std": 232.77090454101562,
|
|
"step": 325
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -328.23577880859375,
|
|
"KL/mean": -415.43890380859375,
|
|
"KL/rejected_KL_mean": -502.64208984375,
|
|
"KL/std": 211.7913818359375,
|
|
"epoch": 0.4787077826725404,
|
|
"fcm_dpo/beta": 0.001981673063710332,
|
|
"fcm_dpo/delta": 0.05636116489768028,
|
|
"fcm_dpo/margin": 174.4063262939453,
|
|
"fcm_dpo/q_t": 0.4185304045677185,
|
|
"grad_norm": 27.75748634338379,
|
|
"learning_rate": 3.134643122927519e-07,
|
|
"logits/chosen": -0.4789705276489258,
|
|
"logits/rejected": -0.4462633728981018,
|
|
"logps/chosen": -400.960693359375,
|
|
"logps/ref_chosen": -72.72496032714844,
|
|
"logps/ref_rejected": -79.8467788696289,
|
|
"logps/rejected": -582.4888916015625,
|
|
"loss": 1.1104,
|
|
"margin_dpo/margin_mean": 174.4063262939453,
|
|
"margin_dpo/margin_std": 205.46969604492188,
|
|
"step": 326
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -288.17822265625,
|
|
"KL/mean": -407.74261474609375,
|
|
"KL/rejected_KL_mean": -527.3070068359375,
|
|
"KL/std": 220.7349090576172,
|
|
"epoch": 0.4801762114537445,
|
|
"fcm_dpo/beta": 0.001960520865395665,
|
|
"fcm_dpo/delta": -0.07271062582731247,
|
|
"fcm_dpo/margin": 239.12875366210938,
|
|
"fcm_dpo/q_t": 0.390238493680954,
|
|
"grad_norm": 28.559314727783203,
|
|
"learning_rate": 3.1222219096622264e-07,
|
|
"logits/chosen": -0.42903268337249756,
|
|
"logits/rejected": -0.41195765137672424,
|
|
"logps/chosen": -357.3126525878906,
|
|
"logps/ref_chosen": -69.13441467285156,
|
|
"logps/ref_rejected": -111.93377685546875,
|
|
"logps/rejected": -639.2407836914062,
|
|
"loss": 1.0326,
|
|
"margin_dpo/margin_mean": 239.12875366210938,
|
|
"margin_dpo/margin_std": 257.76385498046875,
|
|
"step": 327
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -292.6146240234375,
|
|
"KL/mean": -400.96087646484375,
|
|
"KL/rejected_KL_mean": -509.3071594238281,
|
|
"KL/std": 242.0338134765625,
|
|
"epoch": 0.48164464023494863,
|
|
"fcm_dpo/beta": 0.001954542938619852,
|
|
"fcm_dpo/delta": -0.024569327011704445,
|
|
"fcm_dpo/margin": 216.6925811767578,
|
|
"fcm_dpo/q_t": 0.40333792567253113,
|
|
"grad_norm": 22.35950469970703,
|
|
"learning_rate": 3.1097843002709427e-07,
|
|
"logits/chosen": -0.4434688091278076,
|
|
"logits/rejected": -0.4467797577381134,
|
|
"logps/chosen": -352.30181884765625,
|
|
"logps/ref_chosen": -59.68719482421875,
|
|
"logps/ref_rejected": -90.85499572753906,
|
|
"logps/rejected": -600.1621704101562,
|
|
"loss": 1.0738,
|
|
"margin_dpo/margin_mean": 216.69256591796875,
|
|
"margin_dpo/margin_std": 276.7907409667969,
|
|
"step": 328
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -322.8833923339844,
|
|
"KL/mean": -433.7863464355469,
|
|
"KL/rejected_KL_mean": -544.6892700195312,
|
|
"KL/std": 254.3605194091797,
|
|
"epoch": 0.4831130690161527,
|
|
"fcm_dpo/beta": 0.0019329939968883991,
|
|
"fcm_dpo/delta": -0.030582299456000328,
|
|
"fcm_dpo/margin": 221.80587768554688,
|
|
"fcm_dpo/q_t": 0.4004287123680115,
|
|
"grad_norm": 30.763805389404297,
|
|
"learning_rate": 3.0973306224962437e-07,
|
|
"logits/chosen": -0.42895740270614624,
|
|
"logits/rejected": -0.42029309272766113,
|
|
"logps/chosen": -388.12957763671875,
|
|
"logps/ref_chosen": -65.2461929321289,
|
|
"logps/ref_rejected": -100.69770812988281,
|
|
"logps/rejected": -645.386962890625,
|
|
"loss": 1.0718,
|
|
"margin_dpo/margin_mean": 221.80587768554688,
|
|
"margin_dpo/margin_std": 272.5767517089844,
|
|
"step": 329
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -270.37603759765625,
|
|
"KL/mean": -380.49884033203125,
|
|
"KL/rejected_KL_mean": -490.62164306640625,
|
|
"KL/std": 230.03123474121094,
|
|
"epoch": 0.4845814977973568,
|
|
"fcm_dpo/beta": 0.001930012134835124,
|
|
"fcm_dpo/delta": -0.026210233569145203,
|
|
"fcm_dpo/margin": 220.24560546875,
|
|
"fcm_dpo/q_t": 0.40108194947242737,
|
|
"grad_norm": 25.122922897338867,
|
|
"learning_rate": 3.084861204504122e-07,
|
|
"logits/chosen": -0.3836897611618042,
|
|
"logits/rejected": -0.38447412848472595,
|
|
"logps/chosen": -317.3743591308594,
|
|
"logps/ref_chosen": -46.998348236083984,
|
|
"logps/ref_rejected": -86.87684631347656,
|
|
"logps/rejected": -577.4984741210938,
|
|
"loss": 1.0643,
|
|
"margin_dpo/margin_mean": 220.24560546875,
|
|
"margin_dpo/margin_std": 259.4215087890625,
|
|
"step": 330
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -291.59906005859375,
|
|
"KL/mean": -409.332275390625,
|
|
"KL/rejected_KL_mean": -527.0654907226562,
|
|
"KL/std": 194.7655029296875,
|
|
"epoch": 0.48604992657856094,
|
|
"fcm_dpo/beta": 0.0019126099068671465,
|
|
"fcm_dpo/delta": -0.05275537818670273,
|
|
"fcm_dpo/margin": 235.46646118164062,
|
|
"fcm_dpo/q_t": 0.39248865842819214,
|
|
"grad_norm": 23.949661254882812,
|
|
"learning_rate": 3.072376374875335e-07,
|
|
"logits/chosen": -0.44039618968963623,
|
|
"logits/rejected": -0.4362325668334961,
|
|
"logps/chosen": -342.123291015625,
|
|
"logps/ref_chosen": -50.52424621582031,
|
|
"logps/ref_rejected": -89.01544189453125,
|
|
"logps/rejected": -616.0809326171875,
|
|
"loss": 1.0157,
|
|
"margin_dpo/margin_mean": 235.46646118164062,
|
|
"margin_dpo/margin_std": 187.62429809570312,
|
|
"step": 331
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -296.7283935546875,
|
|
"KL/mean": -378.4895935058594,
|
|
"KL/rejected_KL_mean": -460.2508544921875,
|
|
"KL/std": 197.91973876953125,
|
|
"epoch": 0.48751835535976507,
|
|
"fcm_dpo/beta": 0.0019352274248376489,
|
|
"fcm_dpo/delta": 0.0861460417509079,
|
|
"fcm_dpo/margin": 163.52247619628906,
|
|
"fcm_dpo/q_t": 0.4261719584465027,
|
|
"grad_norm": 22.072265625,
|
|
"learning_rate": 3.059876462596758e-07,
|
|
"logits/chosen": -0.43224036693573,
|
|
"logits/rejected": -0.41115298867225647,
|
|
"logps/chosen": -345.9086608886719,
|
|
"logps/ref_chosen": -49.18028259277344,
|
|
"logps/ref_rejected": -76.48515319824219,
|
|
"logps/rejected": -536.7359619140625,
|
|
"loss": 1.1374,
|
|
"margin_dpo/margin_mean": 163.52249145507812,
|
|
"margin_dpo/margin_std": 219.849853515625,
|
|
"step": 332
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -331.73468017578125,
|
|
"KL/mean": -443.3945617675781,
|
|
"KL/rejected_KL_mean": -555.054443359375,
|
|
"KL/std": 243.6097412109375,
|
|
"epoch": 0.4889867841409692,
|
|
"fcm_dpo/beta": 0.0019211724866181612,
|
|
"fcm_dpo/delta": -0.03121526539325714,
|
|
"fcm_dpo/margin": 223.31976318359375,
|
|
"fcm_dpo/q_t": 0.40155458450317383,
|
|
"grad_norm": 22.271997451782227,
|
|
"learning_rate": 3.0473617970527015e-07,
|
|
"logits/chosen": -0.44901585578918457,
|
|
"logits/rejected": -0.4433661699295044,
|
|
"logps/chosen": -395.4904479980469,
|
|
"logps/ref_chosen": -63.75574493408203,
|
|
"logps/ref_rejected": -95.04411315917969,
|
|
"logps/rejected": -650.0985717773438,
|
|
"loss": 1.0795,
|
|
"margin_dpo/margin_mean": 223.31976318359375,
|
|
"margin_dpo/margin_std": 292.02545166015625,
|
|
"step": 333
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -331.0152587890625,
|
|
"KL/mean": -430.8121643066406,
|
|
"KL/rejected_KL_mean": -530.609130859375,
|
|
"KL/std": 269.2216796875,
|
|
"epoch": 0.49045521292217326,
|
|
"fcm_dpo/beta": 0.001930908882059157,
|
|
"fcm_dpo/delta": 0.015167435631155968,
|
|
"fcm_dpo/margin": 199.5938720703125,
|
|
"fcm_dpo/q_t": 0.4123017191886902,
|
|
"grad_norm": 26.76993179321289,
|
|
"learning_rate": 3.034832708016243e-07,
|
|
"logits/chosen": -0.44665104150772095,
|
|
"logits/rejected": -0.44349536299705505,
|
|
"logps/chosen": -397.9949951171875,
|
|
"logps/ref_chosen": -66.97975158691406,
|
|
"logps/ref_rejected": -95.31692504882812,
|
|
"logps/rejected": -625.926025390625,
|
|
"loss": 1.1182,
|
|
"margin_dpo/margin_mean": 199.5938720703125,
|
|
"margin_dpo/margin_std": 301.0901794433594,
|
|
"step": 334
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -356.3892517089844,
|
|
"KL/mean": -430.55914306640625,
|
|
"KL/rejected_KL_mean": -504.72900390625,
|
|
"KL/std": 242.3770294189453,
|
|
"epoch": 0.4919236417033774,
|
|
"fcm_dpo/beta": 0.001974080689251423,
|
|
"fcm_dpo/delta": 0.10976044833660126,
|
|
"fcm_dpo/margin": 148.3397216796875,
|
|
"fcm_dpo/q_t": 0.4333241581916809,
|
|
"grad_norm": 35.3482666015625,
|
|
"learning_rate": 3.022289525640531e-07,
|
|
"logits/chosen": -0.482355535030365,
|
|
"logits/rejected": -0.4589642584323883,
|
|
"logps/chosen": -418.9317321777344,
|
|
"logps/ref_chosen": -62.54248046875,
|
|
"logps/ref_rejected": -87.61770629882812,
|
|
"logps/rejected": -592.3466796875,
|
|
"loss": 1.1826,
|
|
"margin_dpo/margin_mean": 148.3397216796875,
|
|
"margin_dpo/margin_std": 268.9454650878906,
|
|
"step": 335
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -359.19427490234375,
|
|
"KL/mean": -484.33697509765625,
|
|
"KL/rejected_KL_mean": -609.4796752929688,
|
|
"KL/std": 294.24444580078125,
|
|
"epoch": 0.4933920704845815,
|
|
"fcm_dpo/beta": 0.0019574996549636126,
|
|
"fcm_dpo/delta": -0.09448903799057007,
|
|
"fcm_dpo/margin": 250.28536987304688,
|
|
"fcm_dpo/q_t": 0.39117854833602905,
|
|
"grad_norm": 29.434301376342773,
|
|
"learning_rate": 3.009732580450086e-07,
|
|
"logits/chosen": -0.4395965039730072,
|
|
"logits/rejected": -0.44075945019721985,
|
|
"logps/chosen": -413.72540283203125,
|
|
"logps/ref_chosen": -54.53115463256836,
|
|
"logps/ref_rejected": -104.40424346923828,
|
|
"logps/rejected": -713.8839111328125,
|
|
"loss": 1.0614,
|
|
"margin_dpo/margin_mean": 250.285400390625,
|
|
"margin_dpo/margin_std": 347.0870666503906,
|
|
"step": 336
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -325.9806213378906,
|
|
"KL/mean": -445.74078369140625,
|
|
"KL/rejected_KL_mean": -565.5008544921875,
|
|
"KL/std": 236.7762908935547,
|
|
"epoch": 0.4948604992657856,
|
|
"fcm_dpo/beta": 0.001918459078297019,
|
|
"fcm_dpo/delta": -0.06264565885066986,
|
|
"fcm_dpo/margin": 239.52027893066406,
|
|
"fcm_dpo/q_t": 0.3944718539714813,
|
|
"grad_norm": 29.495193481445312,
|
|
"learning_rate": 2.9971622033320914e-07,
|
|
"logits/chosen": -0.48390763998031616,
|
|
"logits/rejected": -0.4708746075630188,
|
|
"logps/chosen": -391.10931396484375,
|
|
"logps/ref_chosen": -65.12869262695312,
|
|
"logps/ref_rejected": -101.72701263427734,
|
|
"logps/rejected": -667.2279052734375,
|
|
"loss": 1.0447,
|
|
"margin_dpo/margin_mean": 239.52027893066406,
|
|
"margin_dpo/margin_std": 279.1695556640625,
|
|
"step": 337
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -295.68658447265625,
|
|
"KL/mean": -416.0144958496094,
|
|
"KL/rejected_KL_mean": -536.3424072265625,
|
|
"KL/std": 231.92388916015625,
|
|
"epoch": 0.49632892804698975,
|
|
"fcm_dpo/beta": 0.0018986309878528118,
|
|
"fcm_dpo/delta": -0.059694305062294006,
|
|
"fcm_dpo/margin": 240.65579223632812,
|
|
"fcm_dpo/q_t": 0.392913818359375,
|
|
"grad_norm": 25.081783294677734,
|
|
"learning_rate": 2.984578725527675e-07,
|
|
"logits/chosen": -0.46351104974746704,
|
|
"logits/rejected": -0.4610709249973297,
|
|
"logps/chosen": -354.10931396484375,
|
|
"logps/ref_chosen": -58.422706604003906,
|
|
"logps/ref_rejected": -89.06854248046875,
|
|
"logps/rejected": -625.4109497070312,
|
|
"loss": 1.0302,
|
|
"margin_dpo/margin_mean": 240.65579223632812,
|
|
"margin_dpo/margin_std": 242.48886108398438,
|
|
"step": 338
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -302.7047424316406,
|
|
"KL/mean": -416.17254638671875,
|
|
"KL/rejected_KL_mean": -529.640380859375,
|
|
"KL/std": 236.2411346435547,
|
|
"epoch": 0.4977973568281938,
|
|
"fcm_dpo/beta": 0.0018999692983925343,
|
|
"fcm_dpo/delta": -0.03361833840608597,
|
|
"fcm_dpo/margin": 226.93565368652344,
|
|
"fcm_dpo/q_t": 0.3996594548225403,
|
|
"grad_norm": 26.538564682006836,
|
|
"learning_rate": 2.9719824786231796e-07,
|
|
"logits/chosen": -0.5198140740394592,
|
|
"logits/rejected": -0.5054018497467041,
|
|
"logps/chosen": -362.7000732421875,
|
|
"logps/ref_chosen": -59.99531555175781,
|
|
"logps/ref_rejected": -103.9109115600586,
|
|
"logps/rejected": -633.55126953125,
|
|
"loss": 1.0607,
|
|
"margin_dpo/margin_mean": 226.93565368652344,
|
|
"margin_dpo/margin_std": 250.84112548828125,
|
|
"step": 339
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -318.005859375,
|
|
"KL/mean": -415.7840270996094,
|
|
"KL/rejected_KL_mean": -513.5621948242188,
|
|
"KL/std": 228.09695434570312,
|
|
"epoch": 0.49926578560939794,
|
|
"fcm_dpo/beta": 0.0018835279624909163,
|
|
"fcm_dpo/delta": 0.03270437568426132,
|
|
"fcm_dpo/margin": 195.55636596679688,
|
|
"fcm_dpo/q_t": 0.4155174195766449,
|
|
"grad_norm": 23.04448890686035,
|
|
"learning_rate": 2.959373794541426e-07,
|
|
"logits/chosen": -0.39705830812454224,
|
|
"logits/rejected": -0.3717266917228699,
|
|
"logps/chosen": -370.8360595703125,
|
|
"logps/ref_chosen": -52.83022689819336,
|
|
"logps/ref_rejected": -73.10723114013672,
|
|
"logps/rejected": -586.66943359375,
|
|
"loss": 1.1182,
|
|
"margin_dpo/margin_mean": 195.55636596679688,
|
|
"margin_dpo/margin_std": 281.4355163574219,
|
|
"step": 340
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -308.8104248046875,
|
|
"KL/mean": -426.990478515625,
|
|
"KL/rejected_KL_mean": -545.1705322265625,
|
|
"KL/std": 248.2613983154297,
|
|
"epoch": 0.5007342143906021,
|
|
"fcm_dpo/beta": 0.0018748041475191712,
|
|
"fcm_dpo/delta": -0.04550610110163689,
|
|
"fcm_dpo/margin": 236.36013793945312,
|
|
"fcm_dpo/q_t": 0.39694035053253174,
|
|
"grad_norm": 26.15273094177246,
|
|
"learning_rate": 2.946753005532965e-07,
|
|
"logits/chosen": -0.42927074432373047,
|
|
"logits/rejected": -0.42871958017349243,
|
|
"logps/chosen": -356.7102355957031,
|
|
"logps/ref_chosen": -47.899803161621094,
|
|
"logps/ref_rejected": -101.80987548828125,
|
|
"logps/rejected": -646.98046875,
|
|
"loss": 1.0485,
|
|
"margin_dpo/margin_mean": 236.36013793945312,
|
|
"margin_dpo/margin_std": 258.4355163574219,
|
|
"step": 341
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -302.05859375,
|
|
"KL/mean": -402.9062805175781,
|
|
"KL/rejected_KL_mean": -503.75396728515625,
|
|
"KL/std": 237.40493774414062,
|
|
"epoch": 0.5022026431718062,
|
|
"fcm_dpo/beta": 0.0018906050827354193,
|
|
"fcm_dpo/delta": 0.01890111342072487,
|
|
"fcm_dpo/margin": 201.69537353515625,
|
|
"fcm_dpo/q_t": 0.41175463795661926,
|
|
"grad_norm": 23.40822410583496,
|
|
"learning_rate": 2.934120444167326e-07,
|
|
"logits/chosen": -0.47432941198349,
|
|
"logits/rejected": -0.442875474691391,
|
|
"logps/chosen": -374.05523681640625,
|
|
"logps/ref_chosen": -71.99664306640625,
|
|
"logps/ref_rejected": -92.58959197998047,
|
|
"logps/rejected": -596.3435668945312,
|
|
"loss": 1.111,
|
|
"margin_dpo/margin_mean": 201.69537353515625,
|
|
"margin_dpo/margin_std": 286.6202392578125,
|
|
"step": 342
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -271.26263427734375,
|
|
"KL/mean": -396.2135314941406,
|
|
"KL/rejected_KL_mean": -521.1644287109375,
|
|
"KL/std": 233.09645080566406,
|
|
"epoch": 0.5036710719530103,
|
|
"fcm_dpo/beta": 0.0018635441083461046,
|
|
"fcm_dpo/delta": -0.06895594298839569,
|
|
"fcm_dpo/margin": 249.90182495117188,
|
|
"fcm_dpo/q_t": 0.3894881308078766,
|
|
"grad_norm": 26.693635940551758,
|
|
"learning_rate": 2.9214764433242476e-07,
|
|
"logits/chosen": -0.4619428813457489,
|
|
"logits/rejected": -0.4649538993835449,
|
|
"logps/chosen": -325.6682434082031,
|
|
"logps/ref_chosen": -54.405616760253906,
|
|
"logps/ref_rejected": -111.04142761230469,
|
|
"logps/rejected": -632.2058715820312,
|
|
"loss": 1.0107,
|
|
"margin_dpo/margin_mean": 249.90182495117188,
|
|
"margin_dpo/margin_std": 208.7919158935547,
|
|
"step": 343
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -278.57086181640625,
|
|
"KL/mean": -390.90570068359375,
|
|
"KL/rejected_KL_mean": -503.2406005859375,
|
|
"KL/std": 266.9576416015625,
|
|
"epoch": 0.5051395007342144,
|
|
"fcm_dpo/beta": 0.0018696986371651292,
|
|
"fcm_dpo/delta": -0.022609613835811615,
|
|
"fcm_dpo/margin": 224.6697235107422,
|
|
"fcm_dpo/q_t": 0.40508079528808594,
|
|
"grad_norm": 26.9531192779541,
|
|
"learning_rate": 2.9088213361849126e-07,
|
|
"logits/chosen": -0.4527415633201599,
|
|
"logits/rejected": -0.44961199164390564,
|
|
"logps/chosen": -332.5355224609375,
|
|
"logps/ref_chosen": -53.96466827392578,
|
|
"logps/ref_rejected": -90.62336730957031,
|
|
"logps/rejected": -593.8639526367188,
|
|
"loss": 1.0759,
|
|
"margin_dpo/margin_mean": 224.6697235107422,
|
|
"margin_dpo/margin_std": 269.2288818359375,
|
|
"step": 344
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -335.9206237792969,
|
|
"KL/mean": -457.4391174316406,
|
|
"KL/rejected_KL_mean": -578.9576416015625,
|
|
"KL/std": 244.952392578125,
|
|
"epoch": 0.5066079295154186,
|
|
"fcm_dpo/beta": 0.0018422373104840517,
|
|
"fcm_dpo/delta": -0.0500393845140934,
|
|
"fcm_dpo/margin": 243.03695678710938,
|
|
"fcm_dpo/q_t": 0.395746111869812,
|
|
"grad_norm": 25.29005241394043,
|
|
"learning_rate": 2.896155456223163e-07,
|
|
"logits/chosen": -0.4590086340904236,
|
|
"logits/rejected": -0.4529004395008087,
|
|
"logps/chosen": -397.6063232421875,
|
|
"logps/ref_chosen": -61.685699462890625,
|
|
"logps/ref_rejected": -99.49041748046875,
|
|
"logps/rejected": -678.447998046875,
|
|
"loss": 1.0482,
|
|
"margin_dpo/margin_mean": 243.03695678710938,
|
|
"margin_dpo/margin_std": 273.3504638671875,
|
|
"step": 345
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -342.43505859375,
|
|
"KL/mean": -449.899658203125,
|
|
"KL/rejected_KL_mean": -557.3642578125,
|
|
"KL/std": 243.40853881835938,
|
|
"epoch": 0.5080763582966226,
|
|
"fcm_dpo/beta": 0.0018314840272068977,
|
|
"fcm_dpo/delta": 0.0066130333580076694,
|
|
"fcm_dpo/margin": 214.92916870117188,
|
|
"fcm_dpo/q_t": 0.4076474905014038,
|
|
"grad_norm": 26.261621475219727,
|
|
"learning_rate": 2.883479137196714e-07,
|
|
"logits/chosen": -0.42875561118125916,
|
|
"logits/rejected": -0.41599297523498535,
|
|
"logps/chosen": -397.69134521484375,
|
|
"logps/ref_chosen": -55.256263732910156,
|
|
"logps/ref_rejected": -77.41532135009766,
|
|
"logps/rejected": -634.779541015625,
|
|
"loss": 1.0887,
|
|
"margin_dpo/margin_mean": 214.92916870117188,
|
|
"margin_dpo/margin_std": 269.51617431640625,
|
|
"step": 346
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -348.1890563964844,
|
|
"KL/mean": -457.7886047363281,
|
|
"KL/rejected_KL_mean": -567.38818359375,
|
|
"KL/std": 259.5652160644531,
|
|
"epoch": 0.5095447870778267,
|
|
"fcm_dpo/beta": 0.0018333385232836008,
|
|
"fcm_dpo/delta": -0.0019443881465122104,
|
|
"fcm_dpo/margin": 219.19906616210938,
|
|
"fcm_dpo/q_t": 0.4073731303215027,
|
|
"grad_norm": 21.977977752685547,
|
|
"learning_rate": 2.8707927131383614e-07,
|
|
"logits/chosen": -0.423196017742157,
|
|
"logits/rejected": -0.4180574417114258,
|
|
"logps/chosen": -405.75531005859375,
|
|
"logps/ref_chosen": -57.56623840332031,
|
|
"logps/ref_rejected": -92.35509490966797,
|
|
"logps/rejected": -659.7432861328125,
|
|
"loss": 1.0904,
|
|
"margin_dpo/margin_mean": 219.19906616210938,
|
|
"margin_dpo/margin_std": 289.9748229980469,
|
|
"step": 347
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -307.3543701171875,
|
|
"KL/mean": -404.4385070800781,
|
|
"KL/rejected_KL_mean": -501.52264404296875,
|
|
"KL/std": 220.54977416992188,
|
|
"epoch": 0.5110132158590308,
|
|
"fcm_dpo/beta": 0.0018353135092183948,
|
|
"fcm_dpo/delta": 0.04490099474787712,
|
|
"fcm_dpo/margin": 194.16824340820312,
|
|
"fcm_dpo/q_t": 0.4174761176109314,
|
|
"grad_norm": 25.500268936157227,
|
|
"learning_rate": 2.858096518347179e-07,
|
|
"logits/chosen": -0.44019395112991333,
|
|
"logits/rejected": -0.43998709321022034,
|
|
"logps/chosen": -363.67205810546875,
|
|
"logps/ref_chosen": -56.31770324707031,
|
|
"logps/ref_rejected": -89.13836669921875,
|
|
"logps/rejected": -590.6610107421875,
|
|
"loss": 1.1141,
|
|
"margin_dpo/margin_mean": 194.16824340820312,
|
|
"margin_dpo/margin_std": 251.42027282714844,
|
|
"step": 348
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -298.98748779296875,
|
|
"KL/mean": -411.78753662109375,
|
|
"KL/rejected_KL_mean": -524.587646484375,
|
|
"KL/std": 259.8543395996094,
|
|
"epoch": 0.5124816446402349,
|
|
"fcm_dpo/beta": 0.0018510316731408238,
|
|
"fcm_dpo/delta": -0.018560701981186867,
|
|
"fcm_dpo/margin": 225.6001739501953,
|
|
"fcm_dpo/q_t": 0.40646952390670776,
|
|
"grad_norm": 21.230770111083984,
|
|
"learning_rate": 2.845390887379706e-07,
|
|
"logits/chosen": -0.41702014207839966,
|
|
"logits/rejected": -0.41563892364501953,
|
|
"logps/chosen": -357.01300048828125,
|
|
"logps/ref_chosen": -58.025516510009766,
|
|
"logps/ref_rejected": -97.50515747070312,
|
|
"logps/rejected": -622.0927734375,
|
|
"loss": 1.0942,
|
|
"margin_dpo/margin_mean": 225.60018920898438,
|
|
"margin_dpo/margin_std": 322.47943115234375,
|
|
"step": 349
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -325.2149353027344,
|
|
"KL/mean": -437.28228759765625,
|
|
"KL/rejected_KL_mean": -549.349609375,
|
|
"KL/std": 247.470703125,
|
|
"epoch": 0.5139500734214391,
|
|
"fcm_dpo/beta": 0.0018313410691916943,
|
|
"fcm_dpo/delta": -0.011365924030542374,
|
|
"fcm_dpo/margin": 224.13467407226562,
|
|
"fcm_dpo/q_t": 0.40537285804748535,
|
|
"grad_norm": 27.918197631835938,
|
|
"learning_rate": 2.8326761550411346e-07,
|
|
"logits/chosen": -0.47454479336738586,
|
|
"logits/rejected": -0.477811336517334,
|
|
"logps/chosen": -389.54541015625,
|
|
"logps/ref_chosen": -64.33049011230469,
|
|
"logps/ref_rejected": -89.87164306640625,
|
|
"logps/rejected": -639.2212524414062,
|
|
"loss": 1.0923,
|
|
"margin_dpo/margin_mean": 224.13467407226562,
|
|
"margin_dpo/margin_std": 306.51544189453125,
|
|
"step": 350
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -301.2596130371094,
|
|
"KL/mean": -436.33697509765625,
|
|
"KL/rejected_KL_mean": -571.4143676757812,
|
|
"KL/std": 292.23785400390625,
|
|
"epoch": 0.5154185022026432,
|
|
"fcm_dpo/beta": 0.0018058628775179386,
|
|
"fcm_dpo/delta": -0.09307081252336502,
|
|
"fcm_dpo/margin": 270.1547546386719,
|
|
"fcm_dpo/q_t": 0.3909297585487366,
|
|
"grad_norm": 29.366575241088867,
|
|
"learning_rate": 2.819952656376487e-07,
|
|
"logits/chosen": -0.44908708333969116,
|
|
"logits/rejected": -0.44883760809898376,
|
|
"logps/chosen": -361.9317321777344,
|
|
"logps/ref_chosen": -60.6721305847168,
|
|
"logps/ref_rejected": -101.5654296875,
|
|
"logps/rejected": -672.9797973632812,
|
|
"loss": 1.0433,
|
|
"margin_dpo/margin_mean": 270.15478515625,
|
|
"margin_dpo/margin_std": 341.22308349609375,
|
|
"step": 351
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -359.5778503417969,
|
|
"KL/mean": -438.5601806640625,
|
|
"KL/rejected_KL_mean": -517.5425415039062,
|
|
"KL/std": 265.6382141113281,
|
|
"epoch": 0.5168869309838473,
|
|
"fcm_dpo/beta": 0.0018303534016013145,
|
|
"fcm_dpo/delta": 0.11433063447475433,
|
|
"fcm_dpo/margin": 157.96470642089844,
|
|
"fcm_dpo/q_t": 0.4339728057384491,
|
|
"grad_norm": 27.643993377685547,
|
|
"learning_rate": 2.8072207266617854e-07,
|
|
"logits/chosen": -0.47322726249694824,
|
|
"logits/rejected": -0.4383177161216736,
|
|
"logps/chosen": -430.52130126953125,
|
|
"logps/ref_chosen": -70.9434585571289,
|
|
"logps/ref_rejected": -76.6419677734375,
|
|
"logps/rejected": -594.1845092773438,
|
|
"loss": 1.1878,
|
|
"margin_dpo/margin_mean": 157.96470642089844,
|
|
"margin_dpo/margin_std": 294.22454833984375,
|
|
"step": 352
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -324.626953125,
|
|
"KL/mean": -434.9208068847656,
|
|
"KL/rejected_KL_mean": -545.214599609375,
|
|
"KL/std": 259.8445739746094,
|
|
"epoch": 0.5183553597650514,
|
|
"fcm_dpo/beta": 0.001833123154938221,
|
|
"fcm_dpo/delta": -0.005401637405157089,
|
|
"fcm_dpo/margin": 220.58767700195312,
|
|
"fcm_dpo/q_t": 0.4085083305835724,
|
|
"grad_norm": 36.342987060546875,
|
|
"learning_rate": 2.794480701395219e-07,
|
|
"logits/chosen": -0.48048973083496094,
|
|
"logits/rejected": -0.46663162112236023,
|
|
"logps/chosen": -383.02227783203125,
|
|
"logps/ref_chosen": -58.39533996582031,
|
|
"logps/ref_rejected": -80.33553314208984,
|
|
"logps/rejected": -625.5501708984375,
|
|
"loss": 1.0973,
|
|
"margin_dpo/margin_mean": 220.58767700195312,
|
|
"margin_dpo/margin_std": 301.33837890625,
|
|
"step": 353
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -281.23583984375,
|
|
"KL/mean": -396.7293701171875,
|
|
"KL/rejected_KL_mean": -512.222900390625,
|
|
"KL/std": 240.41439819335938,
|
|
"epoch": 0.5198237885462555,
|
|
"fcm_dpo/beta": 0.001836308278143406,
|
|
"fcm_dpo/delta": -0.025253523141145706,
|
|
"fcm_dpo/margin": 230.987060546875,
|
|
"fcm_dpo/q_t": 0.4003763198852539,
|
|
"grad_norm": 22.05979347229004,
|
|
"learning_rate": 2.781732916288303e-07,
|
|
"logits/chosen": -0.4675145745277405,
|
|
"logits/rejected": -0.4558253884315491,
|
|
"logps/chosen": -341.038818359375,
|
|
"logps/ref_chosen": -59.80299377441406,
|
|
"logps/ref_rejected": -88.75750732421875,
|
|
"logps/rejected": -600.9804077148438,
|
|
"loss": 1.0521,
|
|
"margin_dpo/margin_mean": 230.987060546875,
|
|
"margin_dpo/margin_std": 238.68994140625,
|
|
"step": 354
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -278.87738037109375,
|
|
"KL/mean": -394.18597412109375,
|
|
"KL/rejected_KL_mean": -509.4945983886719,
|
|
"KL/std": 236.02166748046875,
|
|
"epoch": 0.5212922173274597,
|
|
"fcm_dpo/beta": 0.0018241136567667127,
|
|
"fcm_dpo/delta": -0.021784018725156784,
|
|
"fcm_dpo/margin": 230.61721801757812,
|
|
"fcm_dpo/q_t": 0.4008174538612366,
|
|
"grad_norm": 39.719356536865234,
|
|
"learning_rate": 2.7689777072570284e-07,
|
|
"logits/chosen": -0.5206550359725952,
|
|
"logits/rejected": -0.5103884339332581,
|
|
"logps/chosen": -333.0058898925781,
|
|
"logps/ref_chosen": -54.12849807739258,
|
|
"logps/ref_rejected": -82.40606689453125,
|
|
"logps/rejected": -591.900634765625,
|
|
"loss": 1.0547,
|
|
"margin_dpo/margin_mean": 230.6171875,
|
|
"margin_dpo/margin_std": 236.90528869628906,
|
|
"step": 355
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -327.414794921875,
|
|
"KL/mean": -388.6732177734375,
|
|
"KL/rejected_KL_mean": -449.9316101074219,
|
|
"KL/std": 244.87353515625,
|
|
"epoch": 0.5227606461086637,
|
|
"fcm_dpo/beta": 0.0018336132634431124,
|
|
"fcm_dpo/delta": 0.03628718480467796,
|
|
"fcm_dpo/margin": 122.51679992675781,
|
|
"fcm_dpo/q_t": 0.4491380453109741,
|
|
"grad_norm": 27.5300350189209,
|
|
"learning_rate": 2.7562154104130176e-07,
|
|
"logits/chosen": -0.4934132695198059,
|
|
"logits/rejected": -0.46816959977149963,
|
|
"logps/chosen": -392.088623046875,
|
|
"logps/ref_chosen": -64.6738052368164,
|
|
"logps/ref_rejected": -75.89926147460938,
|
|
"logps/rejected": -525.8308715820312,
|
|
"loss": 1.2489,
|
|
"margin_dpo/margin_mean": 122.51680755615234,
|
|
"margin_dpo/margin_std": 300.3589782714844,
|
|
"step": 356
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -283.6260681152344,
|
|
"KL/mean": -386.2947082519531,
|
|
"KL/rejected_KL_mean": -488.9633483886719,
|
|
"KL/std": 233.4674835205078,
|
|
"epoch": 0.5242290748898678,
|
|
"fcm_dpo/beta": 0.0018397256499156356,
|
|
"fcm_dpo/delta": 0.022911615669727325,
|
|
"fcm_dpo/margin": 205.3372802734375,
|
|
"fcm_dpo/q_t": 0.41156771779060364,
|
|
"grad_norm": 27.42888641357422,
|
|
"learning_rate": 2.7434463620546594e-07,
|
|
"logits/chosen": -0.4698370695114136,
|
|
"logits/rejected": -0.4574124217033386,
|
|
"logps/chosen": -336.35186767578125,
|
|
"logps/ref_chosen": -52.725799560546875,
|
|
"logps/ref_rejected": -86.84115600585938,
|
|
"logps/rejected": -575.8045043945312,
|
|
"loss": 1.0915,
|
|
"margin_dpo/margin_mean": 205.3372802734375,
|
|
"margin_dpo/margin_std": 241.0042724609375,
|
|
"step": 357
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -264.0908508300781,
|
|
"KL/mean": -358.65875244140625,
|
|
"KL/rejected_KL_mean": -453.22662353515625,
|
|
"KL/std": 234.24258422851562,
|
|
"epoch": 0.5256975036710719,
|
|
"fcm_dpo/beta": 0.001860738848336041,
|
|
"fcm_dpo/delta": 0.04985009878873825,
|
|
"fcm_dpo/margin": 189.13580322265625,
|
|
"fcm_dpo/q_t": 0.41889050602912903,
|
|
"grad_norm": 24.31972312927246,
|
|
"learning_rate": 2.730670898658255e-07,
|
|
"logits/chosen": -0.48346900939941406,
|
|
"logits/rejected": -0.4679170846939087,
|
|
"logps/chosen": -327.2962646484375,
|
|
"logps/ref_chosen": -63.20543670654297,
|
|
"logps/ref_rejected": -88.373291015625,
|
|
"logps/rejected": -541.5999755859375,
|
|
"loss": 1.1185,
|
|
"margin_dpo/margin_mean": 189.13580322265625,
|
|
"margin_dpo/margin_std": 257.4248352050781,
|
|
"step": 358
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -305.0037841796875,
|
|
"KL/mean": -414.48199462890625,
|
|
"KL/rejected_KL_mean": -523.960205078125,
|
|
"KL/std": 237.02291870117188,
|
|
"epoch": 0.527165932452276,
|
|
"fcm_dpo/beta": 0.0018613252323120832,
|
|
"fcm_dpo/delta": -0.007966313511133194,
|
|
"fcm_dpo/margin": 218.9563446044922,
|
|
"fcm_dpo/q_t": 0.4068043828010559,
|
|
"grad_norm": 26.327983856201172,
|
|
"learning_rate": 2.717889356869146e-07,
|
|
"logits/chosen": -0.43576061725616455,
|
|
"logits/rejected": -0.42485448718070984,
|
|
"logps/chosen": -361.3740234375,
|
|
"logps/ref_chosen": -56.370216369628906,
|
|
"logps/ref_rejected": -82.17375183105469,
|
|
"logps/rejected": -606.1339111328125,
|
|
"loss": 1.0842,
|
|
"margin_dpo/margin_mean": 218.95635986328125,
|
|
"margin_dpo/margin_std": 277.9966735839844,
|
|
"step": 359
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -296.8334045410156,
|
|
"KL/mean": -387.6656188964844,
|
|
"KL/rejected_KL_mean": -478.4978332519531,
|
|
"KL/std": 208.1656951904297,
|
|
"epoch": 0.5286343612334802,
|
|
"fcm_dpo/beta": 0.001883307471871376,
|
|
"fcm_dpo/delta": 0.05981479212641716,
|
|
"fcm_dpo/margin": 181.6644287109375,
|
|
"fcm_dpo/q_t": 0.41872933506965637,
|
|
"grad_norm": 31.305709838867188,
|
|
"learning_rate": 2.7051020734928443e-07,
|
|
"logits/chosen": -0.4398846924304962,
|
|
"logits/rejected": -0.425040602684021,
|
|
"logps/chosen": -348.2937927246094,
|
|
"logps/ref_chosen": -51.460384368896484,
|
|
"logps/ref_rejected": -69.83892059326172,
|
|
"logps/rejected": -548.3367919921875,
|
|
"loss": 1.1061,
|
|
"margin_dpo/margin_mean": 181.6644287109375,
|
|
"margin_dpo/margin_std": 195.6152801513672,
|
|
"step": 360
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -330.93896484375,
|
|
"KL/mean": -420.14593505859375,
|
|
"KL/rejected_KL_mean": -509.3529052734375,
|
|
"KL/std": 238.32623291015625,
|
|
"epoch": 0.5301027900146843,
|
|
"fcm_dpo/beta": 0.0019093567971140146,
|
|
"fcm_dpo/delta": 0.061102624982595444,
|
|
"fcm_dpo/margin": 178.4139404296875,
|
|
"fcm_dpo/q_t": 0.4211677312850952,
|
|
"grad_norm": 29.791786193847656,
|
|
"learning_rate": 2.6923093854861593e-07,
|
|
"logits/chosen": -0.4700263738632202,
|
|
"logits/rejected": -0.4652746319770813,
|
|
"logps/chosen": -384.8084716796875,
|
|
"logps/ref_chosen": -53.86951446533203,
|
|
"logps/ref_rejected": -90.7692642211914,
|
|
"logps/rejected": -600.1221923828125,
|
|
"loss": 1.1334,
|
|
"margin_dpo/margin_mean": 178.41392517089844,
|
|
"margin_dpo/margin_std": 260.59844970703125,
|
|
"step": 361
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -293.47247314453125,
|
|
"KL/mean": -433.2166442871094,
|
|
"KL/rejected_KL_mean": -572.9608154296875,
|
|
"KL/std": 255.72384643554688,
|
|
"epoch": 0.5315712187958884,
|
|
"fcm_dpo/beta": 0.0018680819775909185,
|
|
"fcm_dpo/delta": -0.129384845495224,
|
|
"fcm_dpo/margin": 279.48834228515625,
|
|
"fcm_dpo/q_t": 0.37981897592544556,
|
|
"grad_norm": 21.00020408630371,
|
|
"learning_rate": 2.679511629948319e-07,
|
|
"logits/chosen": -0.4529603123664856,
|
|
"logits/rejected": -0.46245017647743225,
|
|
"logps/chosen": -352.1115417480469,
|
|
"logps/ref_chosen": -58.639060974121094,
|
|
"logps/ref_rejected": -105.58195495605469,
|
|
"logps/rejected": -678.542724609375,
|
|
"loss": 0.9909,
|
|
"margin_dpo/margin_mean": 279.48834228515625,
|
|
"margin_dpo/margin_std": 277.12896728515625,
|
|
"step": 362
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -272.2425537109375,
|
|
"KL/mean": -413.42852783203125,
|
|
"KL/rejected_KL_mean": -554.614501953125,
|
|
"KL/std": 253.0455322265625,
|
|
"epoch": 0.5330396475770925,
|
|
"fcm_dpo/beta": 0.0018322591204196215,
|
|
"fcm_dpo/delta": -0.12359863519668579,
|
|
"fcm_dpo/margin": 282.3719482421875,
|
|
"fcm_dpo/q_t": 0.380690336227417,
|
|
"grad_norm": 26.93399429321289,
|
|
"learning_rate": 2.6667091441120816e-07,
|
|
"logits/chosen": -0.4361415505409241,
|
|
"logits/rejected": -0.42436856031417847,
|
|
"logps/chosen": -316.80096435546875,
|
|
"logps/ref_chosen": -44.558380126953125,
|
|
"logps/ref_rejected": -74.69496154785156,
|
|
"logps/rejected": -629.3094482421875,
|
|
"loss": 0.9956,
|
|
"margin_dpo/margin_mean": 282.3719482421875,
|
|
"margin_dpo/margin_std": 281.8260803222656,
|
|
"step": 363
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -291.7043151855469,
|
|
"KL/mean": -398.51031494140625,
|
|
"KL/rejected_KL_mean": -505.3163757324219,
|
|
"KL/std": 243.93292236328125,
|
|
"epoch": 0.5345080763582967,
|
|
"fcm_dpo/beta": 0.001825526007451117,
|
|
"fcm_dpo/delta": 0.010103408247232437,
|
|
"fcm_dpo/margin": 213.612060546875,
|
|
"fcm_dpo/q_t": 0.40954408049583435,
|
|
"grad_norm": 27.668123245239258,
|
|
"learning_rate": 2.6539022653348575e-07,
|
|
"logits/chosen": -0.4502606987953186,
|
|
"logits/rejected": -0.46004268527030945,
|
|
"logps/chosen": -340.59893798828125,
|
|
"logps/ref_chosen": -48.894622802734375,
|
|
"logps/ref_rejected": -91.395751953125,
|
|
"logps/rejected": -596.712158203125,
|
|
"loss": 1.0951,
|
|
"margin_dpo/margin_mean": 213.612060546875,
|
|
"margin_dpo/margin_std": 277.07574462890625,
|
|
"step": 364
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -293.99603271484375,
|
|
"KL/mean": -406.5279541015625,
|
|
"KL/rejected_KL_mean": -519.0599365234375,
|
|
"KL/std": 261.5389099121094,
|
|
"epoch": 0.5359765051395007,
|
|
"fcm_dpo/beta": 0.0018164238426834345,
|
|
"fcm_dpo/delta": -0.009190201759338379,
|
|
"fcm_dpo/margin": 225.06381225585938,
|
|
"fcm_dpo/q_t": 0.4062727391719818,
|
|
"grad_norm": 21.68266487121582,
|
|
"learning_rate": 2.641091331089811e-07,
|
|
"logits/chosen": -0.4506559371948242,
|
|
"logits/rejected": -0.46002912521362305,
|
|
"logps/chosen": -345.48876953125,
|
|
"logps/ref_chosen": -51.49274444580078,
|
|
"logps/ref_rejected": -92.70166778564453,
|
|
"logps/rejected": -611.7615966796875,
|
|
"loss": 1.074,
|
|
"margin_dpo/margin_mean": 225.06381225585938,
|
|
"margin_dpo/margin_std": 269.61572265625,
|
|
"step": 365
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -268.130615234375,
|
|
"KL/mean": -385.05804443359375,
|
|
"KL/rejected_KL_mean": -501.9854736328125,
|
|
"KL/std": 256.3507385253906,
|
|
"epoch": 0.5374449339207048,
|
|
"fcm_dpo/beta": 0.0018009209306910634,
|
|
"fcm_dpo/delta": -0.022701263427734375,
|
|
"fcm_dpo/margin": 233.8548583984375,
|
|
"fcm_dpo/q_t": 0.40347927808761597,
|
|
"grad_norm": 23.559268951416016,
|
|
"learning_rate": 2.6282766789569736e-07,
|
|
"logits/chosen": -0.4541221857070923,
|
|
"logits/rejected": -0.46955257654190063,
|
|
"logps/chosen": -312.8511962890625,
|
|
"logps/ref_chosen": -44.7205696105957,
|
|
"logps/ref_rejected": -83.31040954589844,
|
|
"logps/rejected": -585.2958984375,
|
|
"loss": 1.0802,
|
|
"margin_dpo/margin_mean": 233.85484313964844,
|
|
"margin_dpo/margin_std": 303.1095275878906,
|
|
"step": 366
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -283.7847900390625,
|
|
"KL/mean": -377.00250244140625,
|
|
"KL/rejected_KL_mean": -470.22015380859375,
|
|
"KL/std": 222.3704376220703,
|
|
"epoch": 0.5389133627019089,
|
|
"fcm_dpo/beta": 0.0018272295128554106,
|
|
"fcm_dpo/delta": 0.06128734350204468,
|
|
"fcm_dpo/margin": 186.4353485107422,
|
|
"fcm_dpo/q_t": 0.419744074344635,
|
|
"grad_norm": 18.683847427368164,
|
|
"learning_rate": 2.615458646614349e-07,
|
|
"logits/chosen": -0.49926167726516724,
|
|
"logits/rejected": -0.4835873246192932,
|
|
"logps/chosen": -342.1902160644531,
|
|
"logps/ref_chosen": -58.405418395996094,
|
|
"logps/ref_rejected": -76.75132751464844,
|
|
"logps/rejected": -546.971435546875,
|
|
"loss": 1.1238,
|
|
"margin_dpo/margin_mean": 186.4353485107422,
|
|
"margin_dpo/margin_std": 248.96206665039062,
|
|
"step": 367
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -259.47998046875,
|
|
"KL/mean": -409.32464599609375,
|
|
"KL/rejected_KL_mean": -559.1693115234375,
|
|
"KL/std": 250.1630859375,
|
|
"epoch": 0.540381791483113,
|
|
"fcm_dpo/beta": 0.0017914584605023265,
|
|
"fcm_dpo/delta": -0.14462688565254211,
|
|
"fcm_dpo/margin": 299.6893310546875,
|
|
"fcm_dpo/q_t": 0.37201637029647827,
|
|
"grad_norm": 36.82114791870117,
|
|
"learning_rate": 2.6026375718290083e-07,
|
|
"logits/chosen": -0.4651143252849579,
|
|
"logits/rejected": -0.4765470325946808,
|
|
"logps/chosen": -303.9324951171875,
|
|
"logps/ref_chosen": -44.452518463134766,
|
|
"logps/ref_rejected": -98.55526733398438,
|
|
"logps/rejected": -657.724609375,
|
|
"loss": 0.9563,
|
|
"margin_dpo/margin_mean": 299.6893310546875,
|
|
"margin_dpo/margin_std": 219.16549682617188,
|
|
"step": 368
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -349.58837890625,
|
|
"KL/mean": -428.125244140625,
|
|
"KL/rejected_KL_mean": -506.66204833984375,
|
|
"KL/std": 251.02548217773438,
|
|
"epoch": 0.5418502202643172,
|
|
"fcm_dpo/beta": 0.0018156407168135047,
|
|
"fcm_dpo/delta": 0.11753154546022415,
|
|
"fcm_dpo/margin": 157.07363891601562,
|
|
"fcm_dpo/q_t": 0.43435177206993103,
|
|
"grad_norm": 28.619403839111328,
|
|
"learning_rate": 2.589813792448196e-07,
|
|
"logits/chosen": -0.47754406929016113,
|
|
"logits/rejected": -0.460124135017395,
|
|
"logps/chosen": -420.96990966796875,
|
|
"logps/ref_chosen": -71.38150024414062,
|
|
"logps/ref_rejected": -91.29582214355469,
|
|
"logps/rejected": -597.9578857421875,
|
|
"loss": 1.1905,
|
|
"margin_dpo/margin_mean": 157.07363891601562,
|
|
"margin_dpo/margin_std": 296.42974853515625,
|
|
"step": 369
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -352.63336181640625,
|
|
"KL/mean": -427.6588134765625,
|
|
"KL/rejected_KL_mean": -502.6842346191406,
|
|
"KL/std": 246.27041625976562,
|
|
"epoch": 0.5433186490455213,
|
|
"fcm_dpo/beta": 0.0018587787635624409,
|
|
"fcm_dpo/delta": 0.12413851916790009,
|
|
"fcm_dpo/margin": 150.05091857910156,
|
|
"fcm_dpo/q_t": 0.43549519777297974,
|
|
"grad_norm": 25.48780059814453,
|
|
"learning_rate": 2.5769876463904263e-07,
|
|
"logits/chosen": -0.47866642475128174,
|
|
"logits/rejected": -0.47251564264297485,
|
|
"logps/chosen": -424.2408447265625,
|
|
"logps/ref_chosen": -71.60749816894531,
|
|
"logps/ref_rejected": -97.25978088378906,
|
|
"logps/rejected": -599.9440307617188,
|
|
"loss": 1.1907,
|
|
"margin_dpo/margin_mean": 150.05091857910156,
|
|
"margin_dpo/margin_std": 276.411376953125,
|
|
"step": 370
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -345.16607666015625,
|
|
"KL/mean": -453.348876953125,
|
|
"KL/rejected_KL_mean": -561.5316772460938,
|
|
"KL/std": 264.92401123046875,
|
|
"epoch": 0.5447870778267254,
|
|
"fcm_dpo/beta": 0.001868913066573441,
|
|
"fcm_dpo/delta": -0.004648314788937569,
|
|
"fcm_dpo/margin": 216.36563110351562,
|
|
"fcm_dpo/q_t": 0.40844932198524475,
|
|
"grad_norm": 24.080177307128906,
|
|
"learning_rate": 2.5641594716365744e-07,
|
|
"logits/chosen": -0.5107867121696472,
|
|
"logits/rejected": -0.49860259890556335,
|
|
"logps/chosen": -414.58056640625,
|
|
"logps/ref_chosen": -69.41448974609375,
|
|
"logps/ref_rejected": -99.17217254638672,
|
|
"logps/rejected": -660.703857421875,
|
|
"loss": 1.1036,
|
|
"margin_dpo/margin_mean": 216.36563110351562,
|
|
"margin_dpo/margin_std": 315.74822998046875,
|
|
"step": 371
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -327.27838134765625,
|
|
"KL/mean": -456.94696044921875,
|
|
"KL/rejected_KL_mean": -586.6155395507812,
|
|
"KL/std": 292.30975341796875,
|
|
"epoch": 0.5462555066079295,
|
|
"fcm_dpo/beta": 0.001838641008362174,
|
|
"fcm_dpo/delta": -0.08088327199220657,
|
|
"fcm_dpo/margin": 259.337158203125,
|
|
"fcm_dpo/q_t": 0.3929249942302704,
|
|
"grad_norm": 23.18116569519043,
|
|
"learning_rate": 2.551329606220976e-07,
|
|
"logits/chosen": -0.47245320677757263,
|
|
"logits/rejected": -0.44926324486732483,
|
|
"logps/chosen": -389.09637451171875,
|
|
"logps/ref_chosen": -61.8179931640625,
|
|
"logps/ref_rejected": -78.53948974609375,
|
|
"logps/rejected": -665.155029296875,
|
|
"loss": 1.0477,
|
|
"margin_dpo/margin_mean": 259.3371887207031,
|
|
"margin_dpo/margin_std": 330.2596435546875,
|
|
"step": 372
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -352.394775390625,
|
|
"KL/mean": -474.00799560546875,
|
|
"KL/rejected_KL_mean": -595.6212158203125,
|
|
"KL/std": 281.63482666015625,
|
|
"epoch": 0.5477239353891337,
|
|
"fcm_dpo/beta": 0.0018302889075130224,
|
|
"fcm_dpo/delta": -0.04753255099058151,
|
|
"fcm_dpo/margin": 243.22650146484375,
|
|
"fcm_dpo/q_t": 0.39616119861602783,
|
|
"grad_norm": 29.970626831054688,
|
|
"learning_rate": 2.538498388222517e-07,
|
|
"logits/chosen": -0.4819701910018921,
|
|
"logits/rejected": -0.45813024044036865,
|
|
"logps/chosen": -416.6119079589844,
|
|
"logps/ref_chosen": -64.21713256835938,
|
|
"logps/ref_rejected": -85.95960998535156,
|
|
"logps/rejected": -681.580810546875,
|
|
"loss": 1.0512,
|
|
"margin_dpo/margin_mean": 243.22650146484375,
|
|
"margin_dpo/margin_std": 269.6577453613281,
|
|
"step": 373
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -333.73046875,
|
|
"KL/mean": -441.07891845703125,
|
|
"KL/rejected_KL_mean": -548.4273681640625,
|
|
"KL/std": 305.963623046875,
|
|
"epoch": 0.5491923641703378,
|
|
"fcm_dpo/beta": 0.0018100242596119642,
|
|
"fcm_dpo/delta": 0.011296160519123077,
|
|
"fcm_dpo/margin": 214.69683837890625,
|
|
"fcm_dpo/q_t": 0.4148157835006714,
|
|
"grad_norm": 37.50387954711914,
|
|
"learning_rate": 2.525666155755725e-07,
|
|
"logits/chosen": -0.5606328845024109,
|
|
"logits/rejected": -0.542881429195404,
|
|
"logps/chosen": -404.38067626953125,
|
|
"logps/ref_chosen": -70.65018463134766,
|
|
"logps/ref_rejected": -93.64016723632812,
|
|
"logps/rejected": -642.0675048828125,
|
|
"loss": 1.128,
|
|
"margin_dpo/margin_mean": 214.6968536376953,
|
|
"margin_dpo/margin_std": 351.6225891113281,
|
|
"step": 374
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -341.535888671875,
|
|
"KL/mean": -445.7072448730469,
|
|
"KL/rejected_KL_mean": -549.8786010742188,
|
|
"KL/std": 251.57171630859375,
|
|
"epoch": 0.5506607929515418,
|
|
"fcm_dpo/beta": 0.0018158955499529839,
|
|
"fcm_dpo/delta": 0.021893244236707687,
|
|
"fcm_dpo/margin": 208.34274291992188,
|
|
"fcm_dpo/q_t": 0.4129091203212738,
|
|
"grad_norm": 48.105567932128906,
|
|
"learning_rate": 2.512833246961859e-07,
|
|
"logits/chosen": -0.49703970551490784,
|
|
"logits/rejected": -0.4937781095504761,
|
|
"logps/chosen": -401.6160888671875,
|
|
"logps/ref_chosen": -60.080223083496094,
|
|
"logps/ref_rejected": -88.93830871582031,
|
|
"logps/rejected": -638.81689453125,
|
|
"loss": 1.1173,
|
|
"margin_dpo/margin_mean": 208.3427276611328,
|
|
"margin_dpo/margin_std": 301.77508544921875,
|
|
"step": 375
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -335.3367919921875,
|
|
"KL/mean": -466.28643798828125,
|
|
"KL/rejected_KL_mean": -597.236083984375,
|
|
"KL/std": 274.84326171875,
|
|
"epoch": 0.5521292217327459,
|
|
"fcm_dpo/beta": 0.0018059706781059504,
|
|
"fcm_dpo/delta": -0.07673737406730652,
|
|
"fcm_dpo/margin": 261.89935302734375,
|
|
"fcm_dpo/q_t": 0.39158695936203003,
|
|
"grad_norm": 26.295751571655273,
|
|
"learning_rate": 2.5e-07,
|
|
"logits/chosen": -0.5031468272209167,
|
|
"logits/rejected": -0.49027374386787415,
|
|
"logps/chosen": -397.9971008300781,
|
|
"logps/ref_chosen": -62.660308837890625,
|
|
"logps/ref_rejected": -105.52660369873047,
|
|
"logps/rejected": -702.7626953125,
|
|
"loss": 1.0434,
|
|
"margin_dpo/margin_mean": 261.89935302734375,
|
|
"margin_dpo/margin_std": 315.4093322753906,
|
|
"step": 376
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -340.5239562988281,
|
|
"KL/mean": -469.12371826171875,
|
|
"KL/rejected_KL_mean": -597.7235107421875,
|
|
"KL/std": 295.64276123046875,
|
|
"epoch": 0.55359765051395,
|
|
"fcm_dpo/beta": 0.0017904455307871103,
|
|
"fcm_dpo/delta": -0.06344657391309738,
|
|
"fcm_dpo/margin": 257.19952392578125,
|
|
"fcm_dpo/q_t": 0.39518678188323975,
|
|
"grad_norm": 22.948881149291992,
|
|
"learning_rate": 2.487166753038141e-07,
|
|
"logits/chosen": -0.4441227614879608,
|
|
"logits/rejected": -0.44588595628738403,
|
|
"logps/chosen": -395.002685546875,
|
|
"logps/ref_chosen": -54.478736877441406,
|
|
"logps/ref_rejected": -98.70335388183594,
|
|
"logps/rejected": -696.4268188476562,
|
|
"loss": 1.0531,
|
|
"margin_dpo/margin_mean": 257.19952392578125,
|
|
"margin_dpo/margin_std": 316.891357421875,
|
|
"step": 377
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -318.0654296875,
|
|
"KL/mean": -454.87060546875,
|
|
"KL/rejected_KL_mean": -591.67578125,
|
|
"KL/std": 273.9302978515625,
|
|
"epoch": 0.5550660792951542,
|
|
"fcm_dpo/beta": 0.0017552496865391731,
|
|
"fcm_dpo/delta": -0.08436104655265808,
|
|
"fcm_dpo/margin": 273.6103820800781,
|
|
"fcm_dpo/q_t": 0.38744112849235535,
|
|
"grad_norm": 28.960853576660156,
|
|
"learning_rate": 2.4743338442442754e-07,
|
|
"logits/chosen": -0.4610844552516937,
|
|
"logits/rejected": -0.47619086503982544,
|
|
"logps/chosen": -363.0859680175781,
|
|
"logps/ref_chosen": -45.02053451538086,
|
|
"logps/ref_rejected": -88.0469741821289,
|
|
"logps/rejected": -679.7227783203125,
|
|
"loss": 1.0243,
|
|
"margin_dpo/margin_mean": 273.6103515625,
|
|
"margin_dpo/margin_std": 292.38726806640625,
|
|
"step": 378
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -354.04534912109375,
|
|
"KL/mean": -489.762939453125,
|
|
"KL/rejected_KL_mean": -625.4805908203125,
|
|
"KL/std": 277.69482421875,
|
|
"epoch": 0.5565345080763583,
|
|
"fcm_dpo/beta": 0.001722155138850212,
|
|
"fcm_dpo/delta": -0.07153955847024918,
|
|
"fcm_dpo/margin": 271.4352722167969,
|
|
"fcm_dpo/q_t": 0.3932555019855499,
|
|
"grad_norm": 25.876663208007812,
|
|
"learning_rate": 2.461501611777483e-07,
|
|
"logits/chosen": -0.43229052424430847,
|
|
"logits/rejected": -0.4548417925834656,
|
|
"logps/chosen": -407.2274475097656,
|
|
"logps/ref_chosen": -53.182098388671875,
|
|
"logps/ref_rejected": -114.3001708984375,
|
|
"logps/rejected": -739.78076171875,
|
|
"loss": 1.0471,
|
|
"margin_dpo/margin_mean": 271.4352722167969,
|
|
"margin_dpo/margin_std": 328.153076171875,
|
|
"step": 379
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -363.6824645996094,
|
|
"KL/mean": -511.7635192871094,
|
|
"KL/rejected_KL_mean": -659.8446044921875,
|
|
"KL/std": 315.0853576660156,
|
|
"epoch": 0.5580029368575624,
|
|
"fcm_dpo/beta": 0.0016977135092020035,
|
|
"fcm_dpo/delta": -0.10816927254199982,
|
|
"fcm_dpo/margin": 296.16217041015625,
|
|
"fcm_dpo/q_t": 0.3837231993675232,
|
|
"grad_norm": 24.72132682800293,
|
|
"learning_rate": 2.4486703937790243e-07,
|
|
"logits/chosen": -0.4478057622909546,
|
|
"logits/rejected": -0.47531557083129883,
|
|
"logps/chosen": -415.0354919433594,
|
|
"logps/ref_chosen": -51.3530387878418,
|
|
"logps/ref_rejected": -104.19169616699219,
|
|
"logps/rejected": -764.0363159179688,
|
|
"loss": 1.0225,
|
|
"margin_dpo/margin_mean": 296.16217041015625,
|
|
"margin_dpo/margin_std": 341.88568115234375,
|
|
"step": 380
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -377.09307861328125,
|
|
"KL/mean": -475.0587158203125,
|
|
"KL/rejected_KL_mean": -573.0244140625,
|
|
"KL/std": 262.92779541015625,
|
|
"epoch": 0.5594713656387665,
|
|
"fcm_dpo/beta": 0.0017013371689245105,
|
|
"fcm_dpo/delta": 0.06901153177022934,
|
|
"fcm_dpo/margin": 195.93124389648438,
|
|
"fcm_dpo/q_t": 0.4243543744087219,
|
|
"grad_norm": 35.74467849731445,
|
|
"learning_rate": 2.435840528363426e-07,
|
|
"logits/chosen": -0.4533649682998657,
|
|
"logits/rejected": -0.4306221902370453,
|
|
"logps/chosen": -434.89617919921875,
|
|
"logps/ref_chosen": -57.80306625366211,
|
|
"logps/ref_rejected": -79.21940612792969,
|
|
"logps/rejected": -652.2437744140625,
|
|
"loss": 1.169,
|
|
"margin_dpo/margin_mean": 195.93124389648438,
|
|
"margin_dpo/margin_std": 360.15875244140625,
|
|
"step": 381
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -365.1787109375,
|
|
"KL/mean": -495.492431640625,
|
|
"KL/rejected_KL_mean": -625.8062133789062,
|
|
"KL/std": 256.8338623046875,
|
|
"epoch": 0.5609397944199707,
|
|
"fcm_dpo/beta": 0.0017002095701172948,
|
|
"fcm_dpo/delta": -0.0450989231467247,
|
|
"fcm_dpo/margin": 260.62750244140625,
|
|
"fcm_dpo/q_t": 0.3970971703529358,
|
|
"grad_norm": 24.592235565185547,
|
|
"learning_rate": 2.4230123536095745e-07,
|
|
"logits/chosen": -0.49676984548568726,
|
|
"logits/rejected": -0.5024675130844116,
|
|
"logps/chosen": -431.1990051269531,
|
|
"logps/ref_chosen": -66.02030181884766,
|
|
"logps/ref_rejected": -110.71016693115234,
|
|
"logps/rejected": -736.516357421875,
|
|
"loss": 1.0485,
|
|
"margin_dpo/margin_mean": 260.62750244140625,
|
|
"margin_dpo/margin_std": 289.3924255371094,
|
|
"step": 382
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -379.60955810546875,
|
|
"KL/mean": -507.6312255859375,
|
|
"KL/rejected_KL_mean": -635.6529541015625,
|
|
"KL/std": 288.3782958984375,
|
|
"epoch": 0.5624082232011748,
|
|
"fcm_dpo/beta": 0.0016838510055094957,
|
|
"fcm_dpo/delta": -0.03266420215368271,
|
|
"fcm_dpo/margin": 256.0433349609375,
|
|
"fcm_dpo/q_t": 0.40153148770332336,
|
|
"grad_norm": 29.20414924621582,
|
|
"learning_rate": 2.4101862075518037e-07,
|
|
"logits/chosen": -0.46060335636138916,
|
|
"logits/rejected": -0.4702298641204834,
|
|
"logps/chosen": -430.00103759765625,
|
|
"logps/ref_chosen": -50.39148712158203,
|
|
"logps/ref_rejected": -93.71589660644531,
|
|
"logps/rejected": -729.3687744140625,
|
|
"loss": 1.0922,
|
|
"margin_dpo/margin_mean": 256.0433654785156,
|
|
"margin_dpo/margin_std": 373.36480712890625,
|
|
"step": 383
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -376.22772216796875,
|
|
"KL/mean": -477.7603759765625,
|
|
"KL/rejected_KL_mean": -579.2930908203125,
|
|
"KL/std": 248.6445770263672,
|
|
"epoch": 0.5638766519823789,
|
|
"fcm_dpo/beta": 0.001703817630186677,
|
|
"fcm_dpo/delta": 0.05552485212683678,
|
|
"fcm_dpo/margin": 203.06533813476562,
|
|
"fcm_dpo/q_t": 0.41818147897720337,
|
|
"grad_norm": 22.670204162597656,
|
|
"learning_rate": 2.397362428170992e-07,
|
|
"logits/chosen": -0.5022902488708496,
|
|
"logits/rejected": -0.49495917558670044,
|
|
"logps/chosen": -428.2738037109375,
|
|
"logps/ref_chosen": -52.046104431152344,
|
|
"logps/ref_rejected": -85.76089477539062,
|
|
"logps/rejected": -665.053955078125,
|
|
"loss": 1.1105,
|
|
"margin_dpo/margin_mean": 203.0653533935547,
|
|
"margin_dpo/margin_std": 239.64828491210938,
|
|
"step": 384
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -350.5365295410156,
|
|
"KL/mean": -468.0144958496094,
|
|
"KL/rejected_KL_mean": -585.492431640625,
|
|
"KL/std": 216.46690368652344,
|
|
"epoch": 0.5653450807635829,
|
|
"fcm_dpo/beta": 0.0017044099513441324,
|
|
"fcm_dpo/delta": -0.0005003036931157112,
|
|
"fcm_dpo/margin": 234.95591735839844,
|
|
"fcm_dpo/q_t": 0.4049755930900574,
|
|
"grad_norm": 28.35698127746582,
|
|
"learning_rate": 2.3845413533856514e-07,
|
|
"logits/chosen": -0.5131600499153137,
|
|
"logits/rejected": -0.4851377606391907,
|
|
"logps/chosen": -416.08868408203125,
|
|
"logps/ref_chosen": -65.55215454101562,
|
|
"logps/ref_rejected": -77.82792663574219,
|
|
"logps/rejected": -663.3204345703125,
|
|
"loss": 1.0662,
|
|
"margin_dpo/margin_mean": 234.95591735839844,
|
|
"margin_dpo/margin_std": 241.16229248046875,
|
|
"step": 385
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -364.15606689453125,
|
|
"KL/mean": -490.9974365234375,
|
|
"KL/rejected_KL_mean": -617.8388061523438,
|
|
"KL/std": 270.4039611816406,
|
|
"epoch": 0.566813509544787,
|
|
"fcm_dpo/beta": 0.0016963122179731727,
|
|
"fcm_dpo/delta": -0.03167739138007164,
|
|
"fcm_dpo/margin": 253.68276977539062,
|
|
"fcm_dpo/q_t": 0.40111613273620605,
|
|
"grad_norm": 32.36057662963867,
|
|
"learning_rate": 2.3717233210430254e-07,
|
|
"logits/chosen": -0.48662200570106506,
|
|
"logits/rejected": -0.48291075229644775,
|
|
"logps/chosen": -422.3779296875,
|
|
"logps/ref_chosen": -58.22185516357422,
|
|
"logps/ref_rejected": -92.32742309570312,
|
|
"logps/rejected": -710.166259765625,
|
|
"loss": 1.0717,
|
|
"margin_dpo/margin_mean": 253.68276977539062,
|
|
"margin_dpo/margin_std": 324.97998046875,
|
|
"step": 386
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -387.12628173828125,
|
|
"KL/mean": -492.91485595703125,
|
|
"KL/rejected_KL_mean": -598.7034301757812,
|
|
"KL/std": 246.6182403564453,
|
|
"epoch": 0.5682819383259912,
|
|
"fcm_dpo/beta": 0.0016972242156043649,
|
|
"fcm_dpo/delta": 0.042357347905635834,
|
|
"fcm_dpo/margin": 211.57708740234375,
|
|
"fcm_dpo/q_t": 0.4156672954559326,
|
|
"grad_norm": 27.685876846313477,
|
|
"learning_rate": 2.3589086689101889e-07,
|
|
"logits/chosen": -0.542807936668396,
|
|
"logits/rejected": -0.5214394330978394,
|
|
"logps/chosen": -453.5457458496094,
|
|
"logps/ref_chosen": -66.41944885253906,
|
|
"logps/ref_rejected": -92.16915893554688,
|
|
"logps/rejected": -690.87255859375,
|
|
"loss": 1.1055,
|
|
"margin_dpo/margin_mean": 211.57708740234375,
|
|
"margin_dpo/margin_std": 257.301513671875,
|
|
"step": 387
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -357.0862731933594,
|
|
"KL/mean": -501.005126953125,
|
|
"KL/rejected_KL_mean": -644.924072265625,
|
|
"KL/std": 291.4306335449219,
|
|
"epoch": 0.5697503671071953,
|
|
"fcm_dpo/beta": 0.001679969485849142,
|
|
"fcm_dpo/delta": -0.08814238011837006,
|
|
"fcm_dpo/margin": 287.8377380371094,
|
|
"fcm_dpo/q_t": 0.38981735706329346,
|
|
"grad_norm": 29.01787757873535,
|
|
"learning_rate": 2.3460977346651428e-07,
|
|
"logits/chosen": -0.4653438925743103,
|
|
"logits/rejected": -0.4758313298225403,
|
|
"logps/chosen": -407.2157287597656,
|
|
"logps/ref_chosen": -50.129459381103516,
|
|
"logps/ref_rejected": -104.43305969238281,
|
|
"logps/rejected": -749.3570556640625,
|
|
"loss": 1.0296,
|
|
"margin_dpo/margin_mean": 287.8377380371094,
|
|
"margin_dpo/margin_std": 330.7196044921875,
|
|
"step": 388
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -387.08868408203125,
|
|
"KL/mean": -507.94390869140625,
|
|
"KL/rejected_KL_mean": -628.7991333007812,
|
|
"KL/std": 287.0469970703125,
|
|
"epoch": 0.5712187958883994,
|
|
"fcm_dpo/beta": 0.001672594342380762,
|
|
"fcm_dpo/delta": -0.004527151584625244,
|
|
"fcm_dpo/margin": 241.71041870117188,
|
|
"fcm_dpo/q_t": 0.4067634344100952,
|
|
"grad_norm": 32.89521789550781,
|
|
"learning_rate": 2.3332908558879177e-07,
|
|
"logits/chosen": -0.5016822218894958,
|
|
"logits/rejected": -0.49213531613349915,
|
|
"logps/chosen": -444.99530029296875,
|
|
"logps/ref_chosen": -57.906593322753906,
|
|
"logps/ref_rejected": -77.91454315185547,
|
|
"logps/rejected": -706.7136840820312,
|
|
"loss": 1.0873,
|
|
"margin_dpo/margin_mean": 241.71041870117188,
|
|
"margin_dpo/margin_std": 314.6967468261719,
|
|
"step": 389
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -378.8408508300781,
|
|
"KL/mean": -499.02130126953125,
|
|
"KL/rejected_KL_mean": -619.20166015625,
|
|
"KL/std": 283.59832763671875,
|
|
"epoch": 0.5726872246696035,
|
|
"fcm_dpo/beta": 0.0016685773152858019,
|
|
"fcm_dpo/delta": -0.001378379762172699,
|
|
"fcm_dpo/margin": 240.36087036132812,
|
|
"fcm_dpo/q_t": 0.41037964820861816,
|
|
"grad_norm": 22.919740676879883,
|
|
"learning_rate": 2.320488370051681e-07,
|
|
"logits/chosen": -0.4558466672897339,
|
|
"logits/rejected": -0.45187222957611084,
|
|
"logps/chosen": -428.0667724609375,
|
|
"logps/ref_chosen": -49.22591781616211,
|
|
"logps/ref_rejected": -85.5281982421875,
|
|
"logps/rejected": -704.7298583984375,
|
|
"loss": 1.1064,
|
|
"margin_dpo/margin_mean": 240.36087036132812,
|
|
"margin_dpo/margin_std": 358.0471496582031,
|
|
"step": 390
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -380.08941650390625,
|
|
"KL/mean": -455.04193115234375,
|
|
"KL/rejected_KL_mean": -529.9944458007812,
|
|
"KL/std": 265.760986328125,
|
|
"epoch": 0.5741556534508077,
|
|
"fcm_dpo/beta": 0.0017130144406110048,
|
|
"fcm_dpo/delta": 0.1468581259250641,
|
|
"fcm_dpo/margin": 149.905029296875,
|
|
"fcm_dpo/q_t": 0.4409900903701782,
|
|
"grad_norm": 36.1956672668457,
|
|
"learning_rate": 2.3076906145138405e-07,
|
|
"logits/chosen": -0.4843197762966156,
|
|
"logits/rejected": -0.47525227069854736,
|
|
"logps/chosen": -444.4190673828125,
|
|
"logps/ref_chosen": -64.32965087890625,
|
|
"logps/ref_rejected": -86.73820495605469,
|
|
"logps/rejected": -616.732666015625,
|
|
"loss": 1.203,
|
|
"margin_dpo/margin_mean": 149.90504455566406,
|
|
"margin_dpo/margin_std": 278.86932373046875,
|
|
"step": 391
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -321.9182434082031,
|
|
"KL/mean": -463.30865478515625,
|
|
"KL/rejected_KL_mean": -604.6990966796875,
|
|
"KL/std": 282.4703674316406,
|
|
"epoch": 0.5756240822320118,
|
|
"fcm_dpo/beta": 0.001707045128569007,
|
|
"fcm_dpo/delta": -0.08683174103498459,
|
|
"fcm_dpo/margin": 282.7807922363281,
|
|
"fcm_dpo/q_t": 0.3881346583366394,
|
|
"grad_norm": 23.744003295898438,
|
|
"learning_rate": 2.294897926507156e-07,
|
|
"logits/chosen": -0.46998441219329834,
|
|
"logits/rejected": -0.4667205512523651,
|
|
"logps/chosen": -375.4222412109375,
|
|
"logps/ref_chosen": -53.50397872924805,
|
|
"logps/ref_rejected": -102.34584045410156,
|
|
"logps/rejected": -707.044921875,
|
|
"loss": 1.0159,
|
|
"margin_dpo/margin_mean": 282.78076171875,
|
|
"margin_dpo/margin_std": 284.8000183105469,
|
|
"step": 392
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -327.07318115234375,
|
|
"KL/mean": -438.6499938964844,
|
|
"KL/rejected_KL_mean": -550.226806640625,
|
|
"KL/std": 282.72235107421875,
|
|
"epoch": 0.5770925110132159,
|
|
"fcm_dpo/beta": 0.0016989409923553467,
|
|
"fcm_dpo/delta": 0.021702561527490616,
|
|
"fcm_dpo/margin": 223.1536102294922,
|
|
"fcm_dpo/q_t": 0.4159389138221741,
|
|
"grad_norm": 22.198171615600586,
|
|
"learning_rate": 2.2821106431308543e-07,
|
|
"logits/chosen": -0.4430672526359558,
|
|
"logits/rejected": -0.43764716386795044,
|
|
"logps/chosen": -373.547119140625,
|
|
"logps/ref_chosen": -46.473915100097656,
|
|
"logps/ref_rejected": -71.96885681152344,
|
|
"logps/rejected": -622.1956787109375,
|
|
"loss": 1.1229,
|
|
"margin_dpo/margin_mean": 223.1536102294922,
|
|
"margin_dpo/margin_std": 354.7700500488281,
|
|
"step": 393
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -356.56494140625,
|
|
"KL/mean": -474.52728271484375,
|
|
"KL/rejected_KL_mean": -592.4896240234375,
|
|
"KL/std": 290.612548828125,
|
|
"epoch": 0.57856093979442,
|
|
"fcm_dpo/beta": 0.0017010483425110579,
|
|
"fcm_dpo/delta": -0.001378481974825263,
|
|
"fcm_dpo/margin": 235.9246368408203,
|
|
"fcm_dpo/q_t": 0.40806007385253906,
|
|
"grad_norm": 21.11285972595215,
|
|
"learning_rate": 2.2693291013417452e-07,
|
|
"logits/chosen": -0.45615267753601074,
|
|
"logits/rejected": -0.45724251866340637,
|
|
"logps/chosen": -409.47650146484375,
|
|
"logps/ref_chosen": -52.91154861450195,
|
|
"logps/ref_rejected": -90.8226318359375,
|
|
"logps/rejected": -683.312255859375,
|
|
"loss": 1.0882,
|
|
"margin_dpo/margin_mean": 235.92465209960938,
|
|
"margin_dpo/margin_std": 308.0911865234375,
|
|
"step": 394
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -360.2952880859375,
|
|
"KL/mean": -485.2971496582031,
|
|
"KL/rejected_KL_mean": -610.299072265625,
|
|
"KL/std": 286.58233642578125,
|
|
"epoch": 0.580029368575624,
|
|
"fcm_dpo/beta": 0.0016905716620385647,
|
|
"fcm_dpo/delta": -0.023899473249912262,
|
|
"fcm_dpo/margin": 250.00375366210938,
|
|
"fcm_dpo/q_t": 0.4038696587085724,
|
|
"grad_norm": 23.830419540405273,
|
|
"learning_rate": 2.2565536379453404e-07,
|
|
"logits/chosen": -0.5101211667060852,
|
|
"logits/rejected": -0.5033398270606995,
|
|
"logps/chosen": -422.8414001464844,
|
|
"logps/ref_chosen": -62.546112060546875,
|
|
"logps/ref_rejected": -83.78262329101562,
|
|
"logps/rejected": -694.0816650390625,
|
|
"loss": 1.0818,
|
|
"margin_dpo/margin_mean": 250.00375366210938,
|
|
"margin_dpo/margin_std": 334.2568359375,
|
|
"step": 395
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -359.490966796875,
|
|
"KL/mean": -474.9879150390625,
|
|
"KL/rejected_KL_mean": -590.48486328125,
|
|
"KL/std": 282.67681884765625,
|
|
"epoch": 0.5814977973568282,
|
|
"fcm_dpo/beta": 0.0016928238328546286,
|
|
"fcm_dpo/delta": 0.009283696301281452,
|
|
"fcm_dpo/margin": 230.993896484375,
|
|
"fcm_dpo/q_t": 0.40855342149734497,
|
|
"grad_norm": 24.08172607421875,
|
|
"learning_rate": 2.2437845895869825e-07,
|
|
"logits/chosen": -0.4725229740142822,
|
|
"logits/rejected": -0.4491950571537018,
|
|
"logps/chosen": -428.4869079589844,
|
|
"logps/ref_chosen": -68.99594116210938,
|
|
"logps/ref_rejected": -88.64665985107422,
|
|
"logps/rejected": -679.1314697265625,
|
|
"loss": 1.0811,
|
|
"margin_dpo/margin_mean": 230.993896484375,
|
|
"margin_dpo/margin_std": 268.80023193359375,
|
|
"step": 396
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -343.18365478515625,
|
|
"KL/mean": -490.54986572265625,
|
|
"KL/rejected_KL_mean": -637.9160766601562,
|
|
"KL/std": 279.0823974609375,
|
|
"epoch": 0.5829662261380323,
|
|
"fcm_dpo/beta": 0.0016665621660649776,
|
|
"fcm_dpo/delta": -0.09650179743766785,
|
|
"fcm_dpo/margin": 294.732421875,
|
|
"fcm_dpo/q_t": 0.38623836636543274,
|
|
"grad_norm": 34.25861358642578,
|
|
"learning_rate": 2.2310222927429716e-07,
|
|
"logits/chosen": -0.46848201751708984,
|
|
"logits/rejected": -0.47495004534721375,
|
|
"logps/chosen": -404.4608154296875,
|
|
"logps/ref_chosen": -61.27716827392578,
|
|
"logps/ref_rejected": -103.11612701416016,
|
|
"logps/rejected": -741.0322265625,
|
|
"loss": 1.0128,
|
|
"margin_dpo/margin_mean": 294.732421875,
|
|
"margin_dpo/margin_std": 302.2978515625,
|
|
"step": 397
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -366.3542175292969,
|
|
"KL/mean": -501.455322265625,
|
|
"KL/rejected_KL_mean": -636.5565185546875,
|
|
"KL/std": 288.9761962890625,
|
|
"epoch": 0.5844346549192364,
|
|
"fcm_dpo/beta": 0.0016521359793841839,
|
|
"fcm_dpo/delta": -0.04858284816145897,
|
|
"fcm_dpo/margin": 270.20220947265625,
|
|
"fcm_dpo/q_t": 0.3986930847167969,
|
|
"grad_norm": 28.262405395507812,
|
|
"learning_rate": 2.2182670837116972e-07,
|
|
"logits/chosen": -0.5342578887939453,
|
|
"logits/rejected": -0.53399258852005,
|
|
"logps/chosen": -434.5057678222656,
|
|
"logps/ref_chosen": -68.15155029296875,
|
|
"logps/ref_rejected": -108.52360534667969,
|
|
"logps/rejected": -745.080078125,
|
|
"loss": 1.0596,
|
|
"margin_dpo/margin_mean": 270.20220947265625,
|
|
"margin_dpo/margin_std": 338.95098876953125,
|
|
"step": 398
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -335.1020202636719,
|
|
"KL/mean": -451.53240966796875,
|
|
"KL/rejected_KL_mean": -567.9627685546875,
|
|
"KL/std": 267.4539489746094,
|
|
"epoch": 0.5859030837004405,
|
|
"fcm_dpo/beta": 0.0016457277815788984,
|
|
"fcm_dpo/delta": 0.01717957854270935,
|
|
"fcm_dpo/margin": 232.86073303222656,
|
|
"fcm_dpo/q_t": 0.41211044788360596,
|
|
"grad_norm": 28.764699935913086,
|
|
"learning_rate": 2.2055192986047804e-07,
|
|
"logits/chosen": -0.5000342130661011,
|
|
"logits/rejected": -0.4618859887123108,
|
|
"logps/chosen": -395.9918212890625,
|
|
"logps/ref_chosen": -60.889801025390625,
|
|
"logps/ref_rejected": -77.965576171875,
|
|
"logps/rejected": -645.9283447265625,
|
|
"loss": 1.1164,
|
|
"margin_dpo/margin_mean": 232.8607177734375,
|
|
"margin_dpo/margin_std": 344.62420654296875,
|
|
"step": 399
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -311.3885192871094,
|
|
"KL/mean": -480.91925048828125,
|
|
"KL/rejected_KL_mean": -650.449951171875,
|
|
"KL/std": 279.1466064453125,
|
|
"epoch": 0.5873715124816447,
|
|
"fcm_dpo/beta": 0.0016120923683047295,
|
|
"fcm_dpo/delta": -0.15557917952537537,
|
|
"fcm_dpo/margin": 339.0614013671875,
|
|
"fcm_dpo/q_t": 0.37171751260757446,
|
|
"grad_norm": 27.097814559936523,
|
|
"learning_rate": 2.192779273338215e-07,
|
|
"logits/chosen": -0.4866938591003418,
|
|
"logits/rejected": -0.4817845821380615,
|
|
"logps/chosen": -375.0321044921875,
|
|
"logps/ref_chosen": -63.64359664916992,
|
|
"logps/ref_rejected": -105.252685546875,
|
|
"logps/rejected": -755.70263671875,
|
|
"loss": 0.968,
|
|
"margin_dpo/margin_mean": 339.0614013671875,
|
|
"margin_dpo/margin_std": 303.73828125,
|
|
"step": 400
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -366.85516357421875,
|
|
"KL/mean": -460.4599304199219,
|
|
"KL/rejected_KL_mean": -554.064697265625,
|
|
"KL/std": 292.738037109375,
|
|
"epoch": 0.5888399412628488,
|
|
"fcm_dpo/beta": 0.0016232456546276808,
|
|
"fcm_dpo/delta": 0.09924636781215668,
|
|
"fcm_dpo/margin": 187.20950317382812,
|
|
"fcm_dpo/q_t": 0.4327518343925476,
|
|
"grad_norm": 30.14374351501465,
|
|
"learning_rate": 2.1800473436235136e-07,
|
|
"logits/chosen": -0.4865230619907379,
|
|
"logits/rejected": -0.4814421534538269,
|
|
"logps/chosen": -424.0182189941406,
|
|
"logps/ref_chosen": -57.16303253173828,
|
|
"logps/ref_rejected": -83.79249572753906,
|
|
"logps/rejected": -637.857177734375,
|
|
"loss": 1.2006,
|
|
"margin_dpo/margin_mean": 187.20950317382812,
|
|
"margin_dpo/margin_std": 391.4587707519531,
|
|
"step": 401
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -272.751953125,
|
|
"KL/mean": -456.13507080078125,
|
|
"KL/rejected_KL_mean": -639.5181884765625,
|
|
"KL/std": 318.4268798828125,
|
|
"epoch": 0.5903083700440529,
|
|
"fcm_dpo/beta": 0.0015893438830971718,
|
|
"fcm_dpo/delta": -0.19418612122535706,
|
|
"fcm_dpo/margin": 366.76617431640625,
|
|
"fcm_dpo/q_t": 0.3651409447193146,
|
|
"grad_norm": 27.241565704345703,
|
|
"learning_rate": 2.1673238449588665e-07,
|
|
"logits/chosen": -0.5259881019592285,
|
|
"logits/rejected": -0.5160728693008423,
|
|
"logps/chosen": -323.4923400878906,
|
|
"logps/ref_chosen": -50.74037170410156,
|
|
"logps/ref_rejected": -81.0460433959961,
|
|
"logps/rejected": -720.564208984375,
|
|
"loss": 0.9443,
|
|
"margin_dpo/margin_mean": 366.76617431640625,
|
|
"margin_dpo/margin_std": 324.19622802734375,
|
|
"step": 402
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -300.929443359375,
|
|
"KL/mean": -437.60577392578125,
|
|
"KL/rejected_KL_mean": -574.2821044921875,
|
|
"KL/std": 293.6529541015625,
|
|
"epoch": 0.591776798825257,
|
|
"fcm_dpo/beta": 0.0015664222883060575,
|
|
"fcm_dpo/delta": -0.029441511258482933,
|
|
"fcm_dpo/margin": 273.35260009765625,
|
|
"fcm_dpo/q_t": 0.40054211020469666,
|
|
"grad_norm": 27.655994415283203,
|
|
"learning_rate": 2.154609112620295e-07,
|
|
"logits/chosen": -0.5488017797470093,
|
|
"logits/rejected": -0.542914867401123,
|
|
"logps/chosen": -348.0767822265625,
|
|
"logps/ref_chosen": -47.14731216430664,
|
|
"logps/ref_rejected": -77.2666015625,
|
|
"logps/rejected": -651.5487060546875,
|
|
"loss": 1.057,
|
|
"margin_dpo/margin_mean": 273.35260009765625,
|
|
"margin_dpo/margin_std": 303.58697509765625,
|
|
"step": 403
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -353.74798583984375,
|
|
"KL/mean": -488.7012939453125,
|
|
"KL/rejected_KL_mean": -623.654541015625,
|
|
"KL/std": 293.87213134765625,
|
|
"epoch": 0.593245227606461,
|
|
"fcm_dpo/beta": 0.001556064235046506,
|
|
"fcm_dpo/delta": -0.020973514765501022,
|
|
"fcm_dpo/margin": 269.9066162109375,
|
|
"fcm_dpo/q_t": 0.40406206250190735,
|
|
"grad_norm": 33.138004302978516,
|
|
"learning_rate": 2.1419034816528218e-07,
|
|
"logits/chosen": -0.5281866788864136,
|
|
"logits/rejected": -0.5200468301773071,
|
|
"logps/chosen": -401.62322998046875,
|
|
"logps/ref_chosen": -47.875274658203125,
|
|
"logps/ref_rejected": -77.15499877929688,
|
|
"logps/rejected": -700.8095703125,
|
|
"loss": 1.091,
|
|
"margin_dpo/margin_mean": 269.9066162109375,
|
|
"margin_dpo/margin_std": 377.742919921875,
|
|
"step": 404
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -396.12872314453125,
|
|
"KL/mean": -505.8334045410156,
|
|
"KL/rejected_KL_mean": -615.5380859375,
|
|
"KL/std": 310.9896240234375,
|
|
"epoch": 0.5947136563876652,
|
|
"fcm_dpo/beta": 0.0015513843391090631,
|
|
"fcm_dpo/delta": -0.03758659213781357,
|
|
"fcm_dpo/margin": 219.4093475341797,
|
|
"fcm_dpo/q_t": 0.4234468638896942,
|
|
"grad_norm": 35.584747314453125,
|
|
"learning_rate": 2.129207286861638e-07,
|
|
"logits/chosen": -0.5194091796875,
|
|
"logits/rejected": -0.5121314525604248,
|
|
"logps/chosen": -461.2916259765625,
|
|
"logps/ref_chosen": -65.16290283203125,
|
|
"logps/ref_rejected": -87.18678283691406,
|
|
"logps/rejected": -702.724853515625,
|
|
"loss": 1.1657,
|
|
"margin_dpo/margin_mean": 219.4093475341797,
|
|
"margin_dpo/margin_std": 391.95977783203125,
|
|
"step": 405
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -364.4059753417969,
|
|
"KL/mean": -506.44384765625,
|
|
"KL/rejected_KL_mean": -648.481689453125,
|
|
"KL/std": 309.79296875,
|
|
"epoch": 0.5961820851688693,
|
|
"fcm_dpo/beta": 0.0015432301443070173,
|
|
"fcm_dpo/delta": -0.0403851754963398,
|
|
"fcm_dpo/margin": 284.0757141113281,
|
|
"fcm_dpo/q_t": 0.39986056089401245,
|
|
"grad_norm": 25.82929801940918,
|
|
"learning_rate": 2.1165208628032861e-07,
|
|
"logits/chosen": -0.54176926612854,
|
|
"logits/rejected": -0.5563890933990479,
|
|
"logps/chosen": -414.14678955078125,
|
|
"logps/ref_chosen": -49.740814208984375,
|
|
"logps/ref_rejected": -92.07862854003906,
|
|
"logps/rejected": -740.560302734375,
|
|
"loss": 1.0647,
|
|
"margin_dpo/margin_mean": 284.07568359375,
|
|
"margin_dpo/margin_std": 352.7003173828125,
|
|
"step": 406
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -376.4976806640625,
|
|
"KL/mean": -473.91802978515625,
|
|
"KL/rejected_KL_mean": -571.33837890625,
|
|
"KL/std": 257.8326110839844,
|
|
"epoch": 0.5976505139500734,
|
|
"fcm_dpo/beta": 0.0015315297059714794,
|
|
"fcm_dpo/delta": -0.01038383599370718,
|
|
"fcm_dpo/margin": 194.84068298339844,
|
|
"fcm_dpo/q_t": 0.42981648445129395,
|
|
"grad_norm": 35.46054458618164,
|
|
"learning_rate": 2.1038445437768375e-07,
|
|
"logits/chosen": -0.5773499011993408,
|
|
"logits/rejected": -0.5510052442550659,
|
|
"logps/chosen": -432.8283996582031,
|
|
"logps/ref_chosen": -56.33069610595703,
|
|
"logps/ref_rejected": -77.51209259033203,
|
|
"logps/rejected": -648.8504638671875,
|
|
"loss": 1.1752,
|
|
"margin_dpo/margin_mean": 194.8406982421875,
|
|
"margin_dpo/margin_std": 331.57635498046875,
|
|
"step": 407
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -382.6614990234375,
|
|
"KL/mean": -483.43701171875,
|
|
"KL/rejected_KL_mean": -584.2125244140625,
|
|
"KL/std": 246.0572967529297,
|
|
"epoch": 0.5991189427312775,
|
|
"fcm_dpo/beta": 0.0015524220652878284,
|
|
"fcm_dpo/delta": 0.08978626132011414,
|
|
"fcm_dpo/margin": 201.551025390625,
|
|
"fcm_dpo/q_t": 0.4264683127403259,
|
|
"grad_norm": 37.8366813659668,
|
|
"learning_rate": 2.0911786638150872e-07,
|
|
"logits/chosen": -0.57146817445755,
|
|
"logits/rejected": -0.5515158176422119,
|
|
"logps/chosen": -452.4508056640625,
|
|
"logps/ref_chosen": -69.789306640625,
|
|
"logps/ref_rejected": -90.09693908691406,
|
|
"logps/rejected": -674.3094482421875,
|
|
"loss": 1.1414,
|
|
"margin_dpo/margin_mean": 201.551025390625,
|
|
"margin_dpo/margin_std": 274.039306640625,
|
|
"step": 408
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -374.21478271484375,
|
|
"KL/mean": -472.4864501953125,
|
|
"KL/rejected_KL_mean": -570.7581787109375,
|
|
"KL/std": 268.3485107421875,
|
|
"epoch": 0.6005873715124816,
|
|
"fcm_dpo/beta": 0.0015834926161915064,
|
|
"fcm_dpo/delta": 0.09120422601699829,
|
|
"fcm_dpo/margin": 196.54339599609375,
|
|
"fcm_dpo/q_t": 0.4273938536643982,
|
|
"grad_norm": 39.993656158447266,
|
|
"learning_rate": 2.0785235566757517e-07,
|
|
"logits/chosen": -0.5402634143829346,
|
|
"logits/rejected": -0.5223067998886108,
|
|
"logps/chosen": -441.5321960449219,
|
|
"logps/ref_chosen": -67.31744384765625,
|
|
"logps/ref_rejected": -84.904296875,
|
|
"logps/rejected": -655.6624755859375,
|
|
"loss": 1.158,
|
|
"margin_dpo/margin_mean": 196.5434112548828,
|
|
"margin_dpo/margin_std": 309.50628662109375,
|
|
"step": 409
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -335.39337158203125,
|
|
"KL/mean": -453.332763671875,
|
|
"KL/rejected_KL_mean": -571.2720947265625,
|
|
"KL/std": 258.4756774902344,
|
|
"epoch": 0.6020558002936858,
|
|
"fcm_dpo/beta": 0.0015911536756902933,
|
|
"fcm_dpo/delta": 0.025650672614574432,
|
|
"fcm_dpo/margin": 235.87872314453125,
|
|
"fcm_dpo/q_t": 0.41136714816093445,
|
|
"grad_norm": 29.003385543823242,
|
|
"learning_rate": 2.065879555832674e-07,
|
|
"logits/chosen": -0.5437331795692444,
|
|
"logits/rejected": -0.5466384887695312,
|
|
"logps/chosen": -386.8587341308594,
|
|
"logps/ref_chosen": -51.465354919433594,
|
|
"logps/ref_rejected": -83.198974609375,
|
|
"logps/rejected": -654.4710693359375,
|
|
"loss": 1.0969,
|
|
"margin_dpo/margin_mean": 235.8787384033203,
|
|
"margin_dpo/margin_std": 290.5599365234375,
|
|
"step": 410
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -340.03350830078125,
|
|
"KL/mean": -455.3880615234375,
|
|
"KL/rejected_KL_mean": -570.7425537109375,
|
|
"KL/std": 282.7340393066406,
|
|
"epoch": 0.6035242290748899,
|
|
"fcm_dpo/beta": 0.001590752974152565,
|
|
"fcm_dpo/delta": 0.033395539969205856,
|
|
"fcm_dpo/margin": 230.70909118652344,
|
|
"fcm_dpo/q_t": 0.41678670048713684,
|
|
"grad_norm": 26.219621658325195,
|
|
"learning_rate": 2.0532469944670343e-07,
|
|
"logits/chosen": -0.520312488079071,
|
|
"logits/rejected": -0.5262941718101501,
|
|
"logps/chosen": -392.34075927734375,
|
|
"logps/ref_chosen": -52.30727005004883,
|
|
"logps/ref_rejected": -80.69495391845703,
|
|
"logps/rejected": -651.4375,
|
|
"loss": 1.1209,
|
|
"margin_dpo/margin_mean": 230.70907592773438,
|
|
"margin_dpo/margin_std": 329.6683349609375,
|
|
"step": 411
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -355.45513916015625,
|
|
"KL/mean": -482.0203857421875,
|
|
"KL/rejected_KL_mean": -608.5856323242188,
|
|
"KL/std": 281.10675048828125,
|
|
"epoch": 0.604992657856094,
|
|
"fcm_dpo/beta": 0.0016053288709372282,
|
|
"fcm_dpo/delta": -0.006643663160502911,
|
|
"fcm_dpo/margin": 253.1304931640625,
|
|
"fcm_dpo/q_t": 0.4056549072265625,
|
|
"grad_norm": 40.66035079956055,
|
|
"learning_rate": 2.0406262054585738e-07,
|
|
"logits/chosen": -0.5641697645187378,
|
|
"logits/rejected": -0.5924566984176636,
|
|
"logps/chosen": -408.5992736816406,
|
|
"logps/ref_chosen": -53.144126892089844,
|
|
"logps/ref_rejected": -100.0608139038086,
|
|
"logps/rejected": -708.6464233398438,
|
|
"loss": 1.0857,
|
|
"margin_dpo/margin_mean": 253.13047790527344,
|
|
"margin_dpo/margin_std": 327.401611328125,
|
|
"step": 412
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -375.2008056640625,
|
|
"KL/mean": -497.345703125,
|
|
"KL/rejected_KL_mean": -619.4906005859375,
|
|
"KL/std": 274.7577819824219,
|
|
"epoch": 0.6064610866372981,
|
|
"fcm_dpo/beta": 0.001608746824786067,
|
|
"fcm_dpo/delta": 0.007142549380660057,
|
|
"fcm_dpo/margin": 244.28973388671875,
|
|
"fcm_dpo/q_t": 0.40697982907295227,
|
|
"grad_norm": 26.795982360839844,
|
|
"learning_rate": 2.0280175213768205e-07,
|
|
"logits/chosen": -0.4796082675457001,
|
|
"logits/rejected": -0.4848223328590393,
|
|
"logps/chosen": -436.78277587890625,
|
|
"logps/ref_chosen": -61.58196258544922,
|
|
"logps/ref_rejected": -99.47340393066406,
|
|
"logps/rejected": -718.9639892578125,
|
|
"loss": 1.0903,
|
|
"margin_dpo/margin_mean": 244.28973388671875,
|
|
"margin_dpo/margin_std": 309.2333984375,
|
|
"step": 413
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -338.37677001953125,
|
|
"KL/mean": -473.0335998535156,
|
|
"KL/rejected_KL_mean": -607.6904296875,
|
|
"KL/std": 263.765625,
|
|
"epoch": 0.6079295154185022,
|
|
"fcm_dpo/beta": 0.0016081007197499275,
|
|
"fcm_dpo/delta": -0.035320840775966644,
|
|
"fcm_dpo/margin": 269.31365966796875,
|
|
"fcm_dpo/q_t": 0.3990900218486786,
|
|
"grad_norm": 38.35430908203125,
|
|
"learning_rate": 2.0154212744723247e-07,
|
|
"logits/chosen": -0.4838346838951111,
|
|
"logits/rejected": -0.47299548983573914,
|
|
"logps/chosen": -385.00823974609375,
|
|
"logps/ref_chosen": -46.63148498535156,
|
|
"logps/ref_rejected": -87.64653015136719,
|
|
"logps/rejected": -695.3369750976562,
|
|
"loss": 1.0622,
|
|
"margin_dpo/margin_mean": 269.3136901855469,
|
|
"margin_dpo/margin_std": 310.88043212890625,
|
|
"step": 414
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -378.6455993652344,
|
|
"KL/mean": -483.13079833984375,
|
|
"KL/rejected_KL_mean": -587.6160278320312,
|
|
"KL/std": 267.6329345703125,
|
|
"epoch": 0.6093979441997063,
|
|
"fcm_dpo/beta": 0.0016053159488365054,
|
|
"fcm_dpo/delta": 0.06679742783308029,
|
|
"fcm_dpo/margin": 208.97039794921875,
|
|
"fcm_dpo/q_t": 0.4219440221786499,
|
|
"grad_norm": 31.549434661865234,
|
|
"learning_rate": 2.002837796667909e-07,
|
|
"logits/chosen": -0.5333505868911743,
|
|
"logits/rejected": -0.5307386517524719,
|
|
"logps/chosen": -457.2638854980469,
|
|
"logps/ref_chosen": -78.6182861328125,
|
|
"logps/ref_rejected": -100.47752380371094,
|
|
"logps/rejected": -688.093505859375,
|
|
"loss": 1.1386,
|
|
"margin_dpo/margin_mean": 208.97039794921875,
|
|
"margin_dpo/margin_std": 311.5289306640625,
|
|
"step": 415
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -346.6520690917969,
|
|
"KL/mean": -505.2399597167969,
|
|
"KL/rejected_KL_mean": -663.827880859375,
|
|
"KL/std": 294.3641357421875,
|
|
"epoch": 0.6108663729809104,
|
|
"fcm_dpo/beta": 0.0015930493827909231,
|
|
"fcm_dpo/delta": -0.11073094606399536,
|
|
"fcm_dpo/margin": 317.17578125,
|
|
"fcm_dpo/q_t": 0.38171255588531494,
|
|
"grad_norm": 32.9141731262207,
|
|
"learning_rate": 1.990267419549914e-07,
|
|
"logits/chosen": -0.5502901077270508,
|
|
"logits/rejected": -0.5549454092979431,
|
|
"logps/chosen": -404.93121337890625,
|
|
"logps/ref_chosen": -58.27912521362305,
|
|
"logps/ref_rejected": -90.56871795654297,
|
|
"logps/rejected": -754.3966064453125,
|
|
"loss": 0.9955,
|
|
"margin_dpo/margin_mean": 317.17578125,
|
|
"margin_dpo/margin_std": 294.7266845703125,
|
|
"step": 416
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -341.5537109375,
|
|
"KL/mean": -474.1524658203125,
|
|
"KL/rejected_KL_mean": -606.7512817382812,
|
|
"KL/std": 268.6434631347656,
|
|
"epoch": 0.6123348017621145,
|
|
"fcm_dpo/beta": 0.0015767996665090322,
|
|
"fcm_dpo/delta": -0.01895320415496826,
|
|
"fcm_dpo/margin": 265.19757080078125,
|
|
"fcm_dpo/q_t": 0.40123608708381653,
|
|
"grad_norm": 31.803016662597656,
|
|
"learning_rate": 1.9777104743594686e-07,
|
|
"logits/chosen": -0.4980270266532898,
|
|
"logits/rejected": -0.47427403926849365,
|
|
"logps/chosen": -391.7524108886719,
|
|
"logps/ref_chosen": -50.1987190246582,
|
|
"logps/ref_rejected": -68.15184020996094,
|
|
"logps/rejected": -674.903076171875,
|
|
"loss": 1.0551,
|
|
"margin_dpo/margin_mean": 265.19757080078125,
|
|
"margin_dpo/margin_std": 272.559814453125,
|
|
"step": 417
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -383.3123779296875,
|
|
"KL/mean": -517.654541015625,
|
|
"KL/rejected_KL_mean": -651.9967041015625,
|
|
"KL/std": 306.8751220703125,
|
|
"epoch": 0.6138032305433186,
|
|
"fcm_dpo/beta": 0.0015800942201167345,
|
|
"fcm_dpo/delta": -0.026646777987480164,
|
|
"fcm_dpo/margin": 268.684326171875,
|
|
"fcm_dpo/q_t": 0.40411561727523804,
|
|
"grad_norm": 25.314533233642578,
|
|
"learning_rate": 1.965167291983757e-07,
|
|
"logits/chosen": -0.5755934715270996,
|
|
"logits/rejected": -0.5533599853515625,
|
|
"logps/chosen": -465.2908630371094,
|
|
"logps/ref_chosen": -81.97846984863281,
|
|
"logps/ref_rejected": -104.69148254394531,
|
|
"logps/rejected": -756.688232421875,
|
|
"loss": 1.0853,
|
|
"margin_dpo/margin_mean": 268.684326171875,
|
|
"margin_dpo/margin_std": 358.3873291015625,
|
|
"step": 418
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -348.782470703125,
|
|
"KL/mean": -498.6832275390625,
|
|
"KL/rejected_KL_mean": -648.583984375,
|
|
"KL/std": 267.0831298828125,
|
|
"epoch": 0.6152716593245228,
|
|
"fcm_dpo/beta": 0.0015506461495533586,
|
|
"fcm_dpo/delta": -0.06802451610565186,
|
|
"fcm_dpo/margin": 299.801513671875,
|
|
"fcm_dpo/q_t": 0.3909485936164856,
|
|
"grad_norm": 24.278188705444336,
|
|
"learning_rate": 1.9526382029472988e-07,
|
|
"logits/chosen": -0.5322695970535278,
|
|
"logits/rejected": -0.5325411558151245,
|
|
"logps/chosen": -401.7311096191406,
|
|
"logps/ref_chosen": -52.948646545410156,
|
|
"logps/ref_rejected": -91.58309936523438,
|
|
"logps/rejected": -740.1671142578125,
|
|
"loss": 1.0295,
|
|
"margin_dpo/margin_mean": 299.801513671875,
|
|
"margin_dpo/margin_std": 311.72760009765625,
|
|
"step": 419
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -444.0596923828125,
|
|
"KL/mean": -532.8899536132812,
|
|
"KL/rejected_KL_mean": -621.72021484375,
|
|
"KL/std": 289.5302734375,
|
|
"epoch": 0.6167400881057269,
|
|
"fcm_dpo/beta": 0.001575858099386096,
|
|
"fcm_dpo/delta": 0.12323421239852905,
|
|
"fcm_dpo/margin": 177.66058349609375,
|
|
"fcm_dpo/q_t": 0.4371680021286011,
|
|
"grad_norm": 60.59740447998047,
|
|
"learning_rate": 1.9401235374032425e-07,
|
|
"logits/chosen": -0.5358173847198486,
|
|
"logits/rejected": -0.4956286549568176,
|
|
"logps/chosen": -521.82958984375,
|
|
"logps/ref_chosen": -77.7699203491211,
|
|
"logps/ref_rejected": -69.31985473632812,
|
|
"logps/rejected": -691.0401000976562,
|
|
"loss": 1.2169,
|
|
"margin_dpo/margin_mean": 177.66058349609375,
|
|
"margin_dpo/margin_std": 394.1659240722656,
|
|
"step": 420
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -365.69305419921875,
|
|
"KL/mean": -470.6966552734375,
|
|
"KL/rejected_KL_mean": -575.7002563476562,
|
|
"KL/std": 274.4378967285156,
|
|
"epoch": 0.618208516886931,
|
|
"fcm_dpo/beta": 0.001605308847501874,
|
|
"fcm_dpo/delta": 0.06441329419612885,
|
|
"fcm_dpo/margin": 210.0072021484375,
|
|
"fcm_dpo/q_t": 0.4202464818954468,
|
|
"grad_norm": 29.370344161987305,
|
|
"learning_rate": 1.9276236251246653e-07,
|
|
"logits/chosen": -0.5225532054901123,
|
|
"logits/rejected": -0.5048704147338867,
|
|
"logps/chosen": -419.45892333984375,
|
|
"logps/ref_chosen": -53.765865325927734,
|
|
"logps/ref_rejected": -89.28144836425781,
|
|
"logps/rejected": -664.981689453125,
|
|
"loss": 1.1354,
|
|
"margin_dpo/margin_mean": 210.0072021484375,
|
|
"margin_dpo/margin_std": 298.9598388671875,
|
|
"step": 421
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -418.38153076171875,
|
|
"KL/mean": -540.8232421875,
|
|
"KL/rejected_KL_mean": -663.264892578125,
|
|
"KL/std": 289.33966064453125,
|
|
"epoch": 0.6196769456681351,
|
|
"fcm_dpo/beta": 0.0016051906859502196,
|
|
"fcm_dpo/delta": 0.007199084386229515,
|
|
"fcm_dpo/margin": 244.88333129882812,
|
|
"fcm_dpo/q_t": 0.40865635871887207,
|
|
"grad_norm": 34.15210723876953,
|
|
"learning_rate": 1.9151387954958792e-07,
|
|
"logits/chosen": -0.555054783821106,
|
|
"logits/rejected": -0.5556162595748901,
|
|
"logps/chosen": -487.01531982421875,
|
|
"logps/ref_chosen": -68.6337661743164,
|
|
"logps/ref_rejected": -87.86351013183594,
|
|
"logps/rejected": -751.12841796875,
|
|
"loss": 1.102,
|
|
"margin_dpo/margin_mean": 244.8833465576172,
|
|
"margin_dpo/margin_std": 341.8487548828125,
|
|
"step": 422
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -407.53509521484375,
|
|
"KL/mean": -542.244140625,
|
|
"KL/rejected_KL_mean": -676.9532470703125,
|
|
"KL/std": 281.966796875,
|
|
"epoch": 0.6211453744493393,
|
|
"fcm_dpo/beta": 0.0015967879444360733,
|
|
"fcm_dpo/delta": -0.0316154807806015,
|
|
"fcm_dpo/margin": 269.41815185546875,
|
|
"fcm_dpo/q_t": 0.39945095777511597,
|
|
"grad_norm": 28.743013381958008,
|
|
"learning_rate": 1.902669377503756e-07,
|
|
"logits/chosen": -0.5464938879013062,
|
|
"logits/rejected": -0.5520018339157104,
|
|
"logps/chosen": -462.525390625,
|
|
"logps/ref_chosen": -54.99030303955078,
|
|
"logps/ref_rejected": -86.30654907226562,
|
|
"logps/rejected": -763.2598266601562,
|
|
"loss": 1.0566,
|
|
"margin_dpo/margin_mean": 269.41815185546875,
|
|
"margin_dpo/margin_std": 303.0052795410156,
|
|
"step": 423
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -378.3030090332031,
|
|
"KL/mean": -499.42327880859375,
|
|
"KL/rejected_KL_mean": -620.5435791015625,
|
|
"KL/std": 280.09075927734375,
|
|
"epoch": 0.6226138032305433,
|
|
"fcm_dpo/beta": 0.0015932518290355802,
|
|
"fcm_dpo/delta": 0.014376441016793251,
|
|
"fcm_dpo/margin": 242.24057006835938,
|
|
"fcm_dpo/q_t": 0.4120855927467346,
|
|
"grad_norm": 30.807126998901367,
|
|
"learning_rate": 1.890215699729057e-07,
|
|
"logits/chosen": -0.5540552139282227,
|
|
"logits/rejected": -0.5233687162399292,
|
|
"logps/chosen": -434.31494140625,
|
|
"logps/ref_chosen": -56.01192092895508,
|
|
"logps/ref_rejected": -66.47896575927734,
|
|
"logps/rejected": -687.0225830078125,
|
|
"loss": 1.1061,
|
|
"margin_dpo/margin_mean": 242.24058532714844,
|
|
"margin_dpo/margin_std": 339.5845947265625,
|
|
"step": 424
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -409.63446044921875,
|
|
"KL/mean": -513.9957885742188,
|
|
"KL/rejected_KL_mean": -618.3570556640625,
|
|
"KL/std": 262.7354736328125,
|
|
"epoch": 0.6240822320117474,
|
|
"fcm_dpo/beta": 0.00161844864487648,
|
|
"fcm_dpo/delta": 0.06421151012182236,
|
|
"fcm_dpo/margin": 208.72262573242188,
|
|
"fcm_dpo/q_t": 0.42051440477371216,
|
|
"grad_norm": 38.50931930541992,
|
|
"learning_rate": 1.8777780903377732e-07,
|
|
"logits/chosen": -0.5310732126235962,
|
|
"logits/rejected": -0.5318828225135803,
|
|
"logps/chosen": -456.50347900390625,
|
|
"logps/ref_chosen": -46.86899948120117,
|
|
"logps/ref_rejected": -95.92545318603516,
|
|
"logps/rejected": -714.2825317382812,
|
|
"loss": 1.1387,
|
|
"margin_dpo/margin_mean": 208.72262573242188,
|
|
"margin_dpo/margin_std": 312.8076171875,
|
|
"step": 425
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -386.08856201171875,
|
|
"KL/mean": -505.2015686035156,
|
|
"KL/rejected_KL_mean": -624.3145751953125,
|
|
"KL/std": 267.33624267578125,
|
|
"epoch": 0.6255506607929515,
|
|
"fcm_dpo/beta": 0.0016317331464961171,
|
|
"fcm_dpo/delta": 0.011202432215213776,
|
|
"fcm_dpo/margin": 238.22601318359375,
|
|
"fcm_dpo/q_t": 0.409078449010849,
|
|
"grad_norm": 31.958324432373047,
|
|
"learning_rate": 1.8653568770724803e-07,
|
|
"logits/chosen": -0.5409977436065674,
|
|
"logits/rejected": -0.501447319984436,
|
|
"logps/chosen": -462.672119140625,
|
|
"logps/ref_chosen": -76.58354187011719,
|
|
"logps/ref_rejected": -81.26658630371094,
|
|
"logps/rejected": -705.5811767578125,
|
|
"loss": 1.0974,
|
|
"margin_dpo/margin_mean": 238.22601318359375,
|
|
"margin_dpo/margin_std": 305.9199523925781,
|
|
"step": 426
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -357.8903503417969,
|
|
"KL/mean": -448.9057312011719,
|
|
"KL/rejected_KL_mean": -539.921142578125,
|
|
"KL/std": 236.3748779296875,
|
|
"epoch": 0.6270190895741556,
|
|
"fcm_dpo/beta": 0.0016468719113618135,
|
|
"fcm_dpo/delta": 0.10344026982784271,
|
|
"fcm_dpo/margin": 182.03077697753906,
|
|
"fcm_dpo/q_t": 0.4312647581100464,
|
|
"grad_norm": 26.211519241333008,
|
|
"learning_rate": 1.8529523872436977e-07,
|
|
"logits/chosen": -0.5992106199264526,
|
|
"logits/rejected": -0.5759471654891968,
|
|
"logps/chosen": -422.7442321777344,
|
|
"logps/ref_chosen": -64.8538818359375,
|
|
"logps/ref_rejected": -78.5660171508789,
|
|
"logps/rejected": -618.4871215820312,
|
|
"loss": 1.1615,
|
|
"margin_dpo/margin_mean": 182.03079223632812,
|
|
"margin_dpo/margin_std": 283.958251953125,
|
|
"step": 427
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -429.00396728515625,
|
|
"KL/mean": -561.623291015625,
|
|
"KL/rejected_KL_mean": -694.2426147460938,
|
|
"KL/std": 311.591064453125,
|
|
"epoch": 0.6284875183553598,
|
|
"fcm_dpo/beta": 0.001645084354095161,
|
|
"fcm_dpo/delta": -0.038231100887060165,
|
|
"fcm_dpo/margin": 265.2386779785156,
|
|
"fcm_dpo/q_t": 0.4009571671485901,
|
|
"grad_norm": 35.00596237182617,
|
|
"learning_rate": 1.8405649477212697e-07,
|
|
"logits/chosen": -0.5608881711959839,
|
|
"logits/rejected": -0.559416651725769,
|
|
"logps/chosen": -491.640625,
|
|
"logps/ref_chosen": -62.63666534423828,
|
|
"logps/ref_rejected": -103.28181457519531,
|
|
"logps/rejected": -797.5244140625,
|
|
"loss": 1.0869,
|
|
"margin_dpo/margin_mean": 265.2386474609375,
|
|
"margin_dpo/margin_std": 379.43597412109375,
|
|
"step": 428
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -423.6542053222656,
|
|
"KL/mean": -521.1947631835938,
|
|
"KL/rejected_KL_mean": -618.7353515625,
|
|
"KL/std": 257.50677490234375,
|
|
"epoch": 0.6299559471365639,
|
|
"fcm_dpo/beta": 0.0016439331229776144,
|
|
"fcm_dpo/delta": -0.013148479163646698,
|
|
"fcm_dpo/margin": 195.0811767578125,
|
|
"fcm_dpo/q_t": 0.42502105236053467,
|
|
"grad_norm": 28.7783145904541,
|
|
"learning_rate": 1.828194884925749e-07,
|
|
"logits/chosen": -0.5880295634269714,
|
|
"logits/rejected": -0.5610803961753845,
|
|
"logps/chosen": -504.8882141113281,
|
|
"logps/ref_chosen": -81.23401641845703,
|
|
"logps/ref_rejected": -91.79493713378906,
|
|
"logps/rejected": -710.5302734375,
|
|
"loss": 1.1646,
|
|
"margin_dpo/margin_mean": 195.08114624023438,
|
|
"margin_dpo/margin_std": 329.076171875,
|
|
"step": 429
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -349.86883544921875,
|
|
"KL/mean": -457.08282470703125,
|
|
"KL/rejected_KL_mean": -564.2967529296875,
|
|
"KL/std": 251.49932861328125,
|
|
"epoch": 0.631424375917768,
|
|
"fcm_dpo/beta": 0.0016547690611332655,
|
|
"fcm_dpo/delta": 0.04677361994981766,
|
|
"fcm_dpo/margin": 214.4279022216797,
|
|
"fcm_dpo/q_t": 0.4177946150302887,
|
|
"grad_norm": 30.429576873779297,
|
|
"learning_rate": 1.8158425248197928e-07,
|
|
"logits/chosen": -0.5739535093307495,
|
|
"logits/rejected": -0.5771204233169556,
|
|
"logps/chosen": -410.7891845703125,
|
|
"logps/ref_chosen": -60.920326232910156,
|
|
"logps/ref_rejected": -104.42280578613281,
|
|
"logps/rejected": -668.7196044921875,
|
|
"loss": 1.1134,
|
|
"margin_dpo/margin_mean": 214.42791748046875,
|
|
"margin_dpo/margin_std": 282.10595703125,
|
|
"step": 430
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -334.3349609375,
|
|
"KL/mean": -474.6339111328125,
|
|
"KL/rejected_KL_mean": -614.932861328125,
|
|
"KL/std": 273.7649230957031,
|
|
"epoch": 0.6328928046989721,
|
|
"fcm_dpo/beta": 0.0016404774505645037,
|
|
"fcm_dpo/delta": -0.06328192353248596,
|
|
"fcm_dpo/margin": 280.59796142578125,
|
|
"fcm_dpo/q_t": 0.39278194308280945,
|
|
"grad_norm": 32.40290451049805,
|
|
"learning_rate": 1.8035081928995788e-07,
|
|
"logits/chosen": -0.5994788408279419,
|
|
"logits/rejected": -0.5967893600463867,
|
|
"logps/chosen": -391.6836853027344,
|
|
"logps/ref_chosen": -57.34874725341797,
|
|
"logps/ref_rejected": -92.84022521972656,
|
|
"logps/rejected": -707.7731323242188,
|
|
"loss": 1.0381,
|
|
"margin_dpo/margin_mean": 280.59796142578125,
|
|
"margin_dpo/margin_std": 304.55780029296875,
|
|
"step": 431
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -322.6483154296875,
|
|
"KL/mean": -469.07745361328125,
|
|
"KL/rejected_KL_mean": -615.506591796875,
|
|
"KL/std": 274.3560485839844,
|
|
"epoch": 0.6343612334801763,
|
|
"fcm_dpo/beta": 0.0016270647756755352,
|
|
"fcm_dpo/delta": -0.08062286674976349,
|
|
"fcm_dpo/margin": 292.85833740234375,
|
|
"fcm_dpo/q_t": 0.38900789618492126,
|
|
"grad_norm": 35.32616424560547,
|
|
"learning_rate": 1.791192214186223e-07,
|
|
"logits/chosen": -0.5596065521240234,
|
|
"logits/rejected": -0.5433411598205566,
|
|
"logps/chosen": -393.72308349609375,
|
|
"logps/ref_chosen": -71.07479095458984,
|
|
"logps/ref_rejected": -98.57952880859375,
|
|
"logps/rejected": -714.086181640625,
|
|
"loss": 1.0171,
|
|
"margin_dpo/margin_mean": 292.85833740234375,
|
|
"margin_dpo/margin_std": 278.27447509765625,
|
|
"step": 432
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -416.74896240234375,
|
|
"KL/mean": -512.5089721679688,
|
|
"KL/rejected_KL_mean": -608.2689208984375,
|
|
"KL/std": 277.8785095214844,
|
|
"epoch": 0.6358296622613803,
|
|
"fcm_dpo/beta": 0.001634822110645473,
|
|
"fcm_dpo/delta": 0.08947563171386719,
|
|
"fcm_dpo/margin": 191.52005004882812,
|
|
"fcm_dpo/q_t": 0.425261914730072,
|
|
"grad_norm": 36.68017578125,
|
|
"learning_rate": 1.7788949132172193e-07,
|
|
"logits/chosen": -0.6115612983703613,
|
|
"logits/rejected": -0.5973314046859741,
|
|
"logps/chosen": -475.02215576171875,
|
|
"logps/ref_chosen": -58.273193359375,
|
|
"logps/ref_rejected": -95.95089721679688,
|
|
"logps/rejected": -704.2198486328125,
|
|
"loss": 1.1663,
|
|
"margin_dpo/margin_mean": 191.52005004882812,
|
|
"margin_dpo/margin_std": 325.86212158203125,
|
|
"step": 433
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -380.5859375,
|
|
"KL/mean": -486.9596252441406,
|
|
"KL/rejected_KL_mean": -593.333251953125,
|
|
"KL/std": 263.0496520996094,
|
|
"epoch": 0.6372980910425844,
|
|
"fcm_dpo/beta": 0.0016470999689772725,
|
|
"fcm_dpo/delta": 0.05136201158165932,
|
|
"fcm_dpo/margin": 212.747314453125,
|
|
"fcm_dpo/q_t": 0.4220370948314667,
|
|
"grad_norm": 33.74725341796875,
|
|
"learning_rate": 1.7666166140378853e-07,
|
|
"logits/chosen": -0.6359285116195679,
|
|
"logits/rejected": -0.6292107105255127,
|
|
"logps/chosen": -442.5596618652344,
|
|
"logps/ref_chosen": -61.97370147705078,
|
|
"logps/ref_rejected": -78.49861145019531,
|
|
"logps/rejected": -671.8319091796875,
|
|
"loss": 1.1326,
|
|
"margin_dpo/margin_mean": 212.747314453125,
|
|
"margin_dpo/margin_std": 328.4437561035156,
|
|
"step": 434
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -346.1165771484375,
|
|
"KL/mean": -471.15478515625,
|
|
"KL/rejected_KL_mean": -596.1929931640625,
|
|
"KL/std": 277.8624267578125,
|
|
"epoch": 0.6387665198237885,
|
|
"fcm_dpo/beta": 0.0016516190953552723,
|
|
"fcm_dpo/delta": -0.0136133236810565,
|
|
"fcm_dpo/margin": 250.07640075683594,
|
|
"fcm_dpo/q_t": 0.4048606753349304,
|
|
"grad_norm": 35.001678466796875,
|
|
"learning_rate": 1.7543576401928218e-07,
|
|
"logits/chosen": -0.643917441368103,
|
|
"logits/rejected": -0.6357216835021973,
|
|
"logps/chosen": -397.61865234375,
|
|
"logps/ref_chosen": -51.502052307128906,
|
|
"logps/ref_rejected": -87.56689453125,
|
|
"logps/rejected": -683.7598876953125,
|
|
"loss": 1.0826,
|
|
"margin_dpo/margin_mean": 250.076416015625,
|
|
"margin_dpo/margin_std": 318.9538269042969,
|
|
"step": 435
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -359.42626953125,
|
|
"KL/mean": -470.8343505859375,
|
|
"KL/rejected_KL_mean": -582.242431640625,
|
|
"KL/std": 251.36630249023438,
|
|
"epoch": 0.6402349486049926,
|
|
"fcm_dpo/beta": 0.0016546837287023664,
|
|
"fcm_dpo/delta": 0.032419584691524506,
|
|
"fcm_dpo/margin": 222.81613159179688,
|
|
"fcm_dpo/q_t": 0.41425737738609314,
|
|
"grad_norm": 35.268802642822266,
|
|
"learning_rate": 1.742118314717391e-07,
|
|
"logits/chosen": -0.6128599643707275,
|
|
"logits/rejected": -0.5752372741699219,
|
|
"logps/chosen": -430.83001708984375,
|
|
"logps/ref_chosen": -71.40371704101562,
|
|
"logps/ref_rejected": -82.72775268554688,
|
|
"logps/rejected": -664.97021484375,
|
|
"loss": 1.1098,
|
|
"margin_dpo/margin_mean": 222.81613159179688,
|
|
"margin_dpo/margin_std": 298.9530029296875,
|
|
"step": 436
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -371.99591064453125,
|
|
"KL/mean": -481.1207275390625,
|
|
"KL/rejected_KL_mean": -590.2454833984375,
|
|
"KL/std": 237.38775634765625,
|
|
"epoch": 0.6417033773861968,
|
|
"fcm_dpo/beta": 0.001670231344178319,
|
|
"fcm_dpo/delta": 0.036831051111221313,
|
|
"fcm_dpo/margin": 218.2496337890625,
|
|
"fcm_dpo/q_t": 0.41457653045654297,
|
|
"grad_norm": 27.045368194580078,
|
|
"learning_rate": 1.7298989601292036e-07,
|
|
"logits/chosen": -0.6260372400283813,
|
|
"logits/rejected": -0.5966402292251587,
|
|
"logps/chosen": -436.74017333984375,
|
|
"logps/ref_chosen": -64.7442626953125,
|
|
"logps/ref_rejected": -82.04356384277344,
|
|
"logps/rejected": -672.2890625,
|
|
"loss": 1.1091,
|
|
"margin_dpo/margin_mean": 218.2496337890625,
|
|
"margin_dpo/margin_std": 285.473388671875,
|
|
"step": 437
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -393.12811279296875,
|
|
"KL/mean": -519.9140014648438,
|
|
"KL/rejected_KL_mean": -646.6998901367188,
|
|
"KL/std": 283.584716796875,
|
|
"epoch": 0.6431718061674009,
|
|
"fcm_dpo/beta": 0.0016619900707155466,
|
|
"fcm_dpo/delta": -0.022839529439806938,
|
|
"fcm_dpo/margin": 253.57174682617188,
|
|
"fcm_dpo/q_t": 0.40174010396003723,
|
|
"grad_norm": 36.76564025878906,
|
|
"learning_rate": 1.7176998984196144e-07,
|
|
"logits/chosen": -0.6138747930526733,
|
|
"logits/rejected": -0.5866237878799438,
|
|
"logps/chosen": -452.14678955078125,
|
|
"logps/ref_chosen": -59.0186653137207,
|
|
"logps/ref_rejected": -83.07682800292969,
|
|
"logps/rejected": -729.7767333984375,
|
|
"loss": 1.0735,
|
|
"margin_dpo/margin_mean": 253.57174682617188,
|
|
"margin_dpo/margin_std": 311.7683410644531,
|
|
"step": 438
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -400.92803955078125,
|
|
"KL/mean": -509.7080993652344,
|
|
"KL/rejected_KL_mean": -618.4881591796875,
|
|
"KL/std": 285.30078125,
|
|
"epoch": 0.644640234948605,
|
|
"fcm_dpo/beta": 0.0016392945544794202,
|
|
"fcm_dpo/delta": -0.08405376225709915,
|
|
"fcm_dpo/margin": 217.56015014648438,
|
|
"fcm_dpo/q_t": 0.4188630282878876,
|
|
"grad_norm": 30.52996253967285,
|
|
"learning_rate": 1.7055214510452458e-07,
|
|
"logits/chosen": -0.6197365522384644,
|
|
"logits/rejected": -0.6234545707702637,
|
|
"logps/chosen": -454.71209716796875,
|
|
"logps/ref_chosen": -53.78407669067383,
|
|
"logps/ref_rejected": -83.98545837402344,
|
|
"logps/rejected": -702.4736328125,
|
|
"loss": 1.1405,
|
|
"margin_dpo/margin_mean": 217.56015014648438,
|
|
"margin_dpo/margin_std": 336.02734375,
|
|
"step": 439
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -425.029541015625,
|
|
"KL/mean": -547.2294921875,
|
|
"KL/rejected_KL_mean": -669.429443359375,
|
|
"KL/std": 345.5404968261719,
|
|
"epoch": 0.6461086637298091,
|
|
"fcm_dpo/beta": 0.001642939867451787,
|
|
"fcm_dpo/delta": -0.0017192382365465164,
|
|
"fcm_dpo/margin": 244.39990234375,
|
|
"fcm_dpo/q_t": 0.4110341966152191,
|
|
"grad_norm": 46.26620101928711,
|
|
"learning_rate": 1.6933639389195134e-07,
|
|
"logits/chosen": -0.6939189434051514,
|
|
"logits/rejected": -0.6905303597450256,
|
|
"logps/chosen": -503.59625244140625,
|
|
"logps/ref_chosen": -78.56671905517578,
|
|
"logps/ref_rejected": -96.49775695800781,
|
|
"logps/rejected": -765.9271850585938,
|
|
"loss": 1.1009,
|
|
"margin_dpo/margin_mean": 244.39990234375,
|
|
"margin_dpo/margin_std": 356.36102294921875,
|
|
"step": 440
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -480.99591064453125,
|
|
"KL/mean": -598.77001953125,
|
|
"KL/rejected_KL_mean": -716.5440673828125,
|
|
"KL/std": 346.46795654296875,
|
|
"epoch": 0.6475770925110133,
|
|
"fcm_dpo/beta": 0.001648401957936585,
|
|
"fcm_dpo/delta": 0.011800557374954224,
|
|
"fcm_dpo/margin": 235.54818725585938,
|
|
"fcm_dpo/q_t": 0.41518956422805786,
|
|
"grad_norm": 47.700626373291016,
|
|
"learning_rate": 1.681227682404166e-07,
|
|
"logits/chosen": -0.6772704720497131,
|
|
"logits/rejected": -0.661035418510437,
|
|
"logps/chosen": -541.8203125,
|
|
"logps/ref_chosen": -60.824440002441406,
|
|
"logps/ref_rejected": -96.47080993652344,
|
|
"logps/rejected": -813.014892578125,
|
|
"loss": 1.1481,
|
|
"margin_dpo/margin_mean": 235.54818725585938,
|
|
"margin_dpo/margin_std": 421.46875,
|
|
"step": 441
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -414.614501953125,
|
|
"KL/mean": -552.625244140625,
|
|
"KL/rejected_KL_mean": -690.6361083984375,
|
|
"KL/std": 340.30548095703125,
|
|
"epoch": 0.6490455212922174,
|
|
"fcm_dpo/beta": 0.0016397257568314672,
|
|
"fcm_dpo/delta": -0.055591996759176254,
|
|
"fcm_dpo/margin": 276.02154541015625,
|
|
"fcm_dpo/q_t": 0.4000922739505768,
|
|
"grad_norm": 32.922607421875,
|
|
"learning_rate": 1.669113001300851e-07,
|
|
"logits/chosen": -0.6050703525543213,
|
|
"logits/rejected": -0.5892840623855591,
|
|
"logps/chosen": -461.625732421875,
|
|
"logps/ref_chosen": -47.01121520996094,
|
|
"logps/ref_rejected": -76.53926086425781,
|
|
"logps/rejected": -767.17529296875,
|
|
"loss": 1.0748,
|
|
"margin_dpo/margin_mean": 276.021484375,
|
|
"margin_dpo/margin_std": 379.1744384765625,
|
|
"step": 442
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -455.964111328125,
|
|
"KL/mean": -543.126953125,
|
|
"KL/rejected_KL_mean": -630.2898559570312,
|
|
"KL/std": 322.6165466308594,
|
|
"epoch": 0.6505139500734214,
|
|
"fcm_dpo/beta": 0.0016242916462942958,
|
|
"fcm_dpo/delta": -0.0004603892157319933,
|
|
"fcm_dpo/margin": 174.32577514648438,
|
|
"fcm_dpo/q_t": 0.4363713562488556,
|
|
"grad_norm": 49.77903366088867,
|
|
"learning_rate": 1.6570202148426815e-07,
|
|
"logits/chosen": -0.6197609901428223,
|
|
"logits/rejected": -0.5921432375907898,
|
|
"logps/chosen": -527.2371215820312,
|
|
"logps/ref_chosen": -71.27301788330078,
|
|
"logps/ref_rejected": -86.679931640625,
|
|
"logps/rejected": -716.9697875976562,
|
|
"loss": 1.2244,
|
|
"margin_dpo/margin_mean": 174.32577514648438,
|
|
"margin_dpo/margin_std": 398.736572265625,
|
|
"step": 443
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -431.3829040527344,
|
|
"KL/mean": -579.9524536132812,
|
|
"KL/rejected_KL_mean": -728.5220336914062,
|
|
"KL/std": 343.83526611328125,
|
|
"epoch": 0.6519823788546255,
|
|
"fcm_dpo/beta": 0.0016027928795665503,
|
|
"fcm_dpo/delta": -0.0802171379327774,
|
|
"fcm_dpo/margin": 297.1391296386719,
|
|
"fcm_dpo/q_t": 0.39284807443618774,
|
|
"grad_norm": 47.931034088134766,
|
|
"learning_rate": 1.6449496416858282e-07,
|
|
"logits/chosen": -0.5499997138977051,
|
|
"logits/rejected": -0.5509436726570129,
|
|
"logps/chosen": -488.59661865234375,
|
|
"logps/ref_chosen": -57.213706970214844,
|
|
"logps/ref_rejected": -97.25489807128906,
|
|
"logps/rejected": -825.7769165039062,
|
|
"loss": 1.0468,
|
|
"margin_dpo/margin_mean": 297.1391296386719,
|
|
"margin_dpo/margin_std": 377.43927001953125,
|
|
"step": 444
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -386.8304443359375,
|
|
"KL/mean": -519.5946655273438,
|
|
"KL/rejected_KL_mean": -652.35888671875,
|
|
"KL/std": 278.63726806640625,
|
|
"epoch": 0.6534508076358296,
|
|
"fcm_dpo/beta": 0.0015939505537971854,
|
|
"fcm_dpo/delta": -0.02425987273454666,
|
|
"fcm_dpo/margin": 265.5284118652344,
|
|
"fcm_dpo/q_t": 0.4040035307407379,
|
|
"grad_norm": 33.41164779663086,
|
|
"learning_rate": 1.6329015999011182e-07,
|
|
"logits/chosen": -0.6030969619750977,
|
|
"logits/rejected": -0.587662398815155,
|
|
"logps/chosen": -454.1302490234375,
|
|
"logps/ref_chosen": -67.29979705810547,
|
|
"logps/ref_rejected": -92.68267059326172,
|
|
"logps/rejected": -745.0415649414062,
|
|
"loss": 1.0847,
|
|
"margin_dpo/margin_mean": 265.5284118652344,
|
|
"margin_dpo/margin_std": 356.4762878417969,
|
|
"step": 445
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -347.9998779296875,
|
|
"KL/mean": -496.1356201171875,
|
|
"KL/rejected_KL_mean": -644.271484375,
|
|
"KL/std": 297.07550048828125,
|
|
"epoch": 0.6549192364170338,
|
|
"fcm_dpo/beta": 0.001583605189807713,
|
|
"fcm_dpo/delta": -0.07294195890426636,
|
|
"fcm_dpo/margin": 296.2715759277344,
|
|
"fcm_dpo/q_t": 0.3905741572380066,
|
|
"grad_norm": 32.357421875,
|
|
"learning_rate": 1.6208764069656578e-07,
|
|
"logits/chosen": -0.593482255935669,
|
|
"logits/rejected": -0.6047611236572266,
|
|
"logps/chosen": -407.0983581542969,
|
|
"logps/ref_chosen": -59.098487854003906,
|
|
"logps/ref_rejected": -101.26419067382812,
|
|
"logps/rejected": -745.53564453125,
|
|
"loss": 1.0313,
|
|
"margin_dpo/margin_mean": 296.2715759277344,
|
|
"margin_dpo/margin_std": 308.3010559082031,
|
|
"step": 446
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -356.1015625,
|
|
"KL/mean": -507.9754333496094,
|
|
"KL/rejected_KL_mean": -659.849365234375,
|
|
"KL/std": 338.80889892578125,
|
|
"epoch": 0.6563876651982379,
|
|
"fcm_dpo/beta": 0.001546173356473446,
|
|
"fcm_dpo/delta": -0.07345931977033615,
|
|
"fcm_dpo/margin": 303.7477722167969,
|
|
"fcm_dpo/q_t": 0.39456889033317566,
|
|
"grad_norm": 34.99128341674805,
|
|
"learning_rate": 1.608874379754465e-07,
|
|
"logits/chosen": -0.6641237735748291,
|
|
"logits/rejected": -0.6772187948226929,
|
|
"logps/chosen": -412.1768798828125,
|
|
"logps/ref_chosen": -56.07533264160156,
|
|
"logps/ref_rejected": -98.69475555419922,
|
|
"logps/rejected": -758.5440673828125,
|
|
"loss": 1.0466,
|
|
"margin_dpo/margin_mean": 303.7477722167969,
|
|
"margin_dpo/margin_std": 382.21380615234375,
|
|
"step": 447
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -386.7417297363281,
|
|
"KL/mean": -532.7618408203125,
|
|
"KL/rejected_KL_mean": -678.7818603515625,
|
|
"KL/std": 287.2745056152344,
|
|
"epoch": 0.657856093979442,
|
|
"fcm_dpo/beta": 0.0015382280107587576,
|
|
"fcm_dpo/delta": -0.051600463688373566,
|
|
"fcm_dpo/margin": 292.04010009765625,
|
|
"fcm_dpo/q_t": 0.3958974778652191,
|
|
"grad_norm": 37.12090301513672,
|
|
"learning_rate": 1.5968958345321177e-07,
|
|
"logits/chosen": -0.5725095272064209,
|
|
"logits/rejected": -0.5768595933914185,
|
|
"logps/chosen": -446.7455749511719,
|
|
"logps/ref_chosen": -60.00384521484375,
|
|
"logps/ref_rejected": -102.26465606689453,
|
|
"logps/rejected": -781.0465087890625,
|
|
"loss": 1.044,
|
|
"margin_dpo/margin_mean": 292.0401306152344,
|
|
"margin_dpo/margin_std": 318.9796142578125,
|
|
"step": 448
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -391.531982421875,
|
|
"KL/mean": -530.09326171875,
|
|
"KL/rejected_KL_mean": -668.654541015625,
|
|
"KL/std": 336.97320556640625,
|
|
"epoch": 0.6593245227606461,
|
|
"fcm_dpo/beta": 0.001521222060546279,
|
|
"fcm_dpo/delta": -0.02261107787489891,
|
|
"fcm_dpo/margin": 277.12255859375,
|
|
"fcm_dpo/q_t": 0.4064374566078186,
|
|
"grad_norm": 35.58975601196289,
|
|
"learning_rate": 1.584941086944423e-07,
|
|
"logits/chosen": -0.6065933704376221,
|
|
"logits/rejected": -0.5960414409637451,
|
|
"logps/chosen": -459.05859375,
|
|
"logps/ref_chosen": -67.52661895751953,
|
|
"logps/ref_rejected": -88.59690856933594,
|
|
"logps/rejected": -757.25146484375,
|
|
"loss": 1.1009,
|
|
"margin_dpo/margin_mean": 277.12255859375,
|
|
"margin_dpo/margin_std": 424.0272216796875,
|
|
"step": 449
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -325.14208984375,
|
|
"KL/mean": -486.7525634765625,
|
|
"KL/rejected_KL_mean": -648.3630981445312,
|
|
"KL/std": 311.44439697265625,
|
|
"epoch": 0.6607929515418502,
|
|
"fcm_dpo/beta": 0.0015039572026580572,
|
|
"fcm_dpo/delta": -0.09042147547006607,
|
|
"fcm_dpo/margin": 323.22100830078125,
|
|
"fcm_dpo/q_t": 0.3854549527168274,
|
|
"grad_norm": 38.07679748535156,
|
|
"learning_rate": 1.573010452010098e-07,
|
|
"logits/chosen": -0.6078156232833862,
|
|
"logits/rejected": -0.6151422262191772,
|
|
"logps/chosen": -382.2502136230469,
|
|
"logps/ref_chosen": -57.10811996459961,
|
|
"logps/ref_rejected": -102.75494384765625,
|
|
"logps/rejected": -751.1180419921875,
|
|
"loss": 1.0075,
|
|
"margin_dpo/margin_mean": 323.2209777832031,
|
|
"margin_dpo/margin_std": 305.29486083984375,
|
|
"step": 450
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -423.325439453125,
|
|
"KL/mean": -537.2266845703125,
|
|
"KL/rejected_KL_mean": -651.1279907226562,
|
|
"KL/std": 339.7525634765625,
|
|
"epoch": 0.6622613803230544,
|
|
"fcm_dpo/beta": 0.001510746544227004,
|
|
"fcm_dpo/delta": 0.057636506855487823,
|
|
"fcm_dpo/margin": 227.80252075195312,
|
|
"fcm_dpo/q_t": 0.41878455877304077,
|
|
"grad_norm": 28.49110221862793,
|
|
"learning_rate": 1.5611042441124687e-07,
|
|
"logits/chosen": -0.650975227355957,
|
|
"logits/rejected": -0.6234632134437561,
|
|
"logps/chosen": -481.79425048828125,
|
|
"logps/ref_chosen": -58.46883010864258,
|
|
"logps/ref_rejected": -72.92941284179688,
|
|
"logps/rejected": -724.057373046875,
|
|
"loss": 1.1516,
|
|
"margin_dpo/margin_mean": 227.80252075195312,
|
|
"margin_dpo/margin_std": 386.4510498046875,
|
|
"step": 451
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -296.03009033203125,
|
|
"KL/mean": -442.83074951171875,
|
|
"KL/rejected_KL_mean": -589.63134765625,
|
|
"KL/std": 274.8699951171875,
|
|
"epoch": 0.6637298091042585,
|
|
"fcm_dpo/beta": 0.0014999432023614645,
|
|
"fcm_dpo/delta": -0.04238360375165939,
|
|
"fcm_dpo/margin": 293.60125732421875,
|
|
"fcm_dpo/q_t": 0.3958936929702759,
|
|
"grad_norm": 21.779226303100586,
|
|
"learning_rate": 1.549222776991186e-07,
|
|
"logits/chosen": -0.5605393648147583,
|
|
"logits/rejected": -0.5794718265533447,
|
|
"logps/chosen": -346.420654296875,
|
|
"logps/ref_chosen": -50.39055252075195,
|
|
"logps/ref_rejected": -97.77142333984375,
|
|
"logps/rejected": -687.40283203125,
|
|
"loss": 1.0375,
|
|
"margin_dpo/margin_mean": 293.60125732421875,
|
|
"margin_dpo/margin_std": 286.99456787109375,
|
|
"step": 452
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -361.25189208984375,
|
|
"KL/mean": -487.23480224609375,
|
|
"KL/rejected_KL_mean": -613.2176513671875,
|
|
"KL/std": 274.94085693359375,
|
|
"epoch": 0.6651982378854625,
|
|
"fcm_dpo/beta": 0.0014997010584920645,
|
|
"fcm_dpo/delta": 0.022862950339913368,
|
|
"fcm_dpo/margin": 251.9658203125,
|
|
"fcm_dpo/q_t": 0.4132787585258484,
|
|
"grad_norm": 23.549057006835938,
|
|
"learning_rate": 1.5373663637339584e-07,
|
|
"logits/chosen": -0.6005634069442749,
|
|
"logits/rejected": -0.576606273651123,
|
|
"logps/chosen": -418.96673583984375,
|
|
"logps/ref_chosen": -57.71485137939453,
|
|
"logps/ref_rejected": -82.20741271972656,
|
|
"logps/rejected": -695.4251098632812,
|
|
"loss": 1.1003,
|
|
"margin_dpo/margin_mean": 251.9658203125,
|
|
"margin_dpo/margin_std": 328.4573974609375,
|
|
"step": 453
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -429.2816162109375,
|
|
"KL/mean": -578.0045166015625,
|
|
"KL/rejected_KL_mean": -726.7275390625,
|
|
"KL/std": 324.15545654296875,
|
|
"epoch": 0.6666666666666666,
|
|
"fcm_dpo/beta": 0.0014922961127012968,
|
|
"fcm_dpo/delta": -0.04614517092704773,
|
|
"fcm_dpo/margin": 297.4459228515625,
|
|
"fcm_dpo/q_t": 0.3982745409011841,
|
|
"grad_norm": 23.49666404724121,
|
|
"learning_rate": 1.5255353167683017e-07,
|
|
"logits/chosen": -0.6576023101806641,
|
|
"logits/rejected": -0.6443264484405518,
|
|
"logps/chosen": -490.2272644042969,
|
|
"logps/ref_chosen": -60.945648193359375,
|
|
"logps/ref_rejected": -84.95079040527344,
|
|
"logps/rejected": -811.6783447265625,
|
|
"loss": 1.0595,
|
|
"margin_dpo/margin_mean": 297.4459228515625,
|
|
"margin_dpo/margin_std": 368.7091369628906,
|
|
"step": 454
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -380.530517578125,
|
|
"KL/mean": -561.5405883789062,
|
|
"KL/rejected_KL_mean": -742.5506591796875,
|
|
"KL/std": 345.6868896484375,
|
|
"epoch": 0.6681350954478708,
|
|
"fcm_dpo/beta": 0.0014687062939628959,
|
|
"fcm_dpo/delta": -0.13889265060424805,
|
|
"fcm_dpo/margin": 362.0201416015625,
|
|
"fcm_dpo/q_t": 0.37912923097610474,
|
|
"grad_norm": 33.2188606262207,
|
|
"learning_rate": 1.5137299478533064e-07,
|
|
"logits/chosen": -0.6506750583648682,
|
|
"logits/rejected": -0.6731724739074707,
|
|
"logps/chosen": -425.417236328125,
|
|
"logps/ref_chosen": -44.88671112060547,
|
|
"logps/ref_rejected": -115.30147552490234,
|
|
"logps/rejected": -857.8521728515625,
|
|
"loss": 1.0019,
|
|
"margin_dpo/margin_mean": 362.0201416015625,
|
|
"margin_dpo/margin_std": 392.91925048828125,
|
|
"step": 455
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -401.8112487792969,
|
|
"KL/mean": -575.7777099609375,
|
|
"KL/rejected_KL_mean": -749.744140625,
|
|
"KL/std": 341.2975158691406,
|
|
"epoch": 0.6696035242290749,
|
|
"fcm_dpo/beta": 0.0014300058828666806,
|
|
"fcm_dpo/delta": -0.10262109339237213,
|
|
"fcm_dpo/margin": 347.93292236328125,
|
|
"fcm_dpo/q_t": 0.38482022285461426,
|
|
"grad_norm": 32.33451843261719,
|
|
"learning_rate": 1.5019505680714232e-07,
|
|
"logits/chosen": -0.612378716468811,
|
|
"logits/rejected": -0.632037878036499,
|
|
"logps/chosen": -458.8480224609375,
|
|
"logps/ref_chosen": -57.036781311035156,
|
|
"logps/ref_rejected": -105.21784210205078,
|
|
"logps/rejected": -854.9620361328125,
|
|
"loss": 1.0025,
|
|
"margin_dpo/margin_mean": 347.9329528808594,
|
|
"margin_dpo/margin_std": 340.260986328125,
|
|
"step": 456
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -395.79827880859375,
|
|
"KL/mean": -566.0772705078125,
|
|
"KL/rejected_KL_mean": -736.3562622070312,
|
|
"KL/std": 335.4608459472656,
|
|
"epoch": 0.671071953010279,
|
|
"fcm_dpo/beta": 0.0014014223124831915,
|
|
"fcm_dpo/delta": -0.08146154880523682,
|
|
"fcm_dpo/margin": 340.5579833984375,
|
|
"fcm_dpo/q_t": 0.3878782391548157,
|
|
"grad_norm": 27.79351043701172,
|
|
"learning_rate": 1.4901974878202627e-07,
|
|
"logits/chosen": -0.6594676971435547,
|
|
"logits/rejected": -0.6568940877914429,
|
|
"logps/chosen": -450.0408020019531,
|
|
"logps/ref_chosen": -54.24253845214844,
|
|
"logps/ref_rejected": -85.10956573486328,
|
|
"logps/rejected": -821.4658203125,
|
|
"loss": 1.0157,
|
|
"margin_dpo/margin_mean": 340.5579833984375,
|
|
"margin_dpo/margin_std": 328.9454345703125,
|
|
"step": 457
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -425.1473693847656,
|
|
"KL/mean": -578.1829833984375,
|
|
"KL/rejected_KL_mean": -731.2185668945312,
|
|
"KL/std": 312.2036437988281,
|
|
"epoch": 0.6725403817914831,
|
|
"fcm_dpo/beta": 0.001388939330354333,
|
|
"fcm_dpo/delta": -0.0265361275523901,
|
|
"fcm_dpo/margin": 306.07122802734375,
|
|
"fcm_dpo/q_t": 0.4020830988883972,
|
|
"grad_norm": 24.5087890625,
|
|
"learning_rate": 1.4784710168044212e-07,
|
|
"logits/chosen": -0.6767433881759644,
|
|
"logits/rejected": -0.6698124408721924,
|
|
"logps/chosen": -480.5562438964844,
|
|
"logps/ref_chosen": -55.40888214111328,
|
|
"logps/ref_rejected": -97.68325805664062,
|
|
"logps/rejected": -828.90185546875,
|
|
"loss": 1.0668,
|
|
"margin_dpo/margin_mean": 306.07122802734375,
|
|
"margin_dpo/margin_std": 367.548095703125,
|
|
"step": 458
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -477.1727294921875,
|
|
"KL/mean": -637.5369873046875,
|
|
"KL/rejected_KL_mean": -797.9011840820312,
|
|
"KL/std": 362.09600830078125,
|
|
"epoch": 0.6740088105726872,
|
|
"fcm_dpo/beta": 0.0013780685840174556,
|
|
"fcm_dpo/delta": -0.04427627474069595,
|
|
"fcm_dpo/margin": 320.7283935546875,
|
|
"fcm_dpo/q_t": 0.3988497853279114,
|
|
"grad_norm": 37.77009201049805,
|
|
"learning_rate": 1.466771464027316e-07,
|
|
"logits/chosen": -0.6691190004348755,
|
|
"logits/rejected": -0.6827735304832458,
|
|
"logps/chosen": -523.730224609375,
|
|
"logps/ref_chosen": -46.55748748779297,
|
|
"logps/ref_rejected": -86.16854095458984,
|
|
"logps/rejected": -884.0697021484375,
|
|
"loss": 1.0698,
|
|
"margin_dpo/margin_mean": 320.7284240722656,
|
|
"margin_dpo/margin_std": 416.4349365234375,
|
|
"step": 459
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -524.5628051757812,
|
|
"KL/mean": -702.454833984375,
|
|
"KL/rejected_KL_mean": -880.3468627929688,
|
|
"KL/std": 362.19757080078125,
|
|
"epoch": 0.6754772393538914,
|
|
"fcm_dpo/beta": 0.0013614799827337265,
|
|
"fcm_dpo/delta": -0.08867627382278442,
|
|
"fcm_dpo/margin": 355.7840576171875,
|
|
"fcm_dpo/q_t": 0.38881251215934753,
|
|
"grad_norm": 38.9324836730957,
|
|
"learning_rate": 1.4550991377830423e-07,
|
|
"logits/chosen": -0.7337905168533325,
|
|
"logits/rejected": -0.7658564448356628,
|
|
"logps/chosen": -576.19775390625,
|
|
"logps/ref_chosen": -51.63489532470703,
|
|
"logps/ref_rejected": -104.11935424804688,
|
|
"logps/rejected": -984.4661865234375,
|
|
"loss": 1.0251,
|
|
"margin_dpo/margin_mean": 355.7840576171875,
|
|
"margin_dpo/margin_std": 394.8114013671875,
|
|
"step": 460
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -551.4806518554688,
|
|
"KL/mean": -691.9464111328125,
|
|
"KL/rejected_KL_mean": -832.4122314453125,
|
|
"KL/std": 369.3660888671875,
|
|
"epoch": 0.6769456681350955,
|
|
"fcm_dpo/beta": 0.001360948197543621,
|
|
"fcm_dpo/delta": 0.018288645893335342,
|
|
"fcm_dpo/margin": 280.931640625,
|
|
"fcm_dpo/q_t": 0.41348153352737427,
|
|
"grad_norm": 25.64112663269043,
|
|
"learning_rate": 1.4434543456482518e-07,
|
|
"logits/chosen": -0.7380908727645874,
|
|
"logits/rejected": -0.750026524066925,
|
|
"logps/chosen": -606.66259765625,
|
|
"logps/ref_chosen": -55.18195724487305,
|
|
"logps/ref_rejected": -86.47689819335938,
|
|
"logps/rejected": -918.88916015625,
|
|
"loss": 1.114,
|
|
"margin_dpo/margin_mean": 280.931640625,
|
|
"margin_dpo/margin_std": 416.9804992675781,
|
|
"step": 461
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -591.0242919921875,
|
|
"KL/mean": -702.4153442382812,
|
|
"KL/rejected_KL_mean": -813.806396484375,
|
|
"KL/std": 374.5951232910156,
|
|
"epoch": 0.6784140969162996,
|
|
"fcm_dpo/beta": 0.0013791057281196117,
|
|
"fcm_dpo/delta": 0.09573453664779663,
|
|
"fcm_dpo/margin": 222.78219604492188,
|
|
"fcm_dpo/q_t": 0.4319148361682892,
|
|
"grad_norm": 43.11104965209961,
|
|
"learning_rate": 1.4318373944740484e-07,
|
|
"logits/chosen": -0.8458345532417297,
|
|
"logits/rejected": -0.829505205154419,
|
|
"logps/chosen": -660.9522705078125,
|
|
"logps/ref_chosen": -69.92803192138672,
|
|
"logps/ref_rejected": -78.84111022949219,
|
|
"logps/rejected": -892.6475830078125,
|
|
"loss": 1.1826,
|
|
"margin_dpo/margin_mean": 222.78216552734375,
|
|
"margin_dpo/margin_std": 426.85430908203125,
|
|
"step": 462
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -574.5623779296875,
|
|
"KL/mean": -721.1395263671875,
|
|
"KL/rejected_KL_mean": -867.7166748046875,
|
|
"KL/std": 383.65875244140625,
|
|
"epoch": 0.6798825256975036,
|
|
"fcm_dpo/beta": 0.0013890512054786086,
|
|
"fcm_dpo/delta": -0.007682671770453453,
|
|
"fcm_dpo/margin": 293.15423583984375,
|
|
"fcm_dpo/q_t": 0.40834498405456543,
|
|
"grad_norm": 39.33317565917969,
|
|
"learning_rate": 1.4202485903778976e-07,
|
|
"logits/chosen": -0.8075680732727051,
|
|
"logits/rejected": -0.8106831312179565,
|
|
"logps/chosen": -629.8367919921875,
|
|
"logps/ref_chosen": -55.27437210083008,
|
|
"logps/ref_rejected": -89.02497863769531,
|
|
"logps/rejected": -956.7415771484375,
|
|
"loss": 1.1057,
|
|
"margin_dpo/margin_mean": 293.15423583984375,
|
|
"margin_dpo/margin_std": 435.8871765136719,
|
|
"step": 463
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -575.095458984375,
|
|
"KL/mean": -810.3673095703125,
|
|
"KL/rejected_KL_mean": -1045.63916015625,
|
|
"KL/std": 450.2059631347656,
|
|
"epoch": 0.6813509544787077,
|
|
"fcm_dpo/beta": 0.0013278971891850233,
|
|
"fcm_dpo/delta": -0.24185608327388763,
|
|
"fcm_dpo/margin": 470.54376220703125,
|
|
"fcm_dpo/q_t": 0.35767611861228943,
|
|
"grad_norm": 36.297725677490234,
|
|
"learning_rate": 1.4086882387355658e-07,
|
|
"logits/chosen": -0.7917243242263794,
|
|
"logits/rejected": -0.8560171127319336,
|
|
"logps/chosen": -626.0076904296875,
|
|
"logps/ref_chosen": -50.91230010986328,
|
|
"logps/ref_rejected": -102.4893798828125,
|
|
"logps/rejected": -1148.1285400390625,
|
|
"loss": 0.937,
|
|
"margin_dpo/margin_mean": 470.5437316894531,
|
|
"margin_dpo/margin_std": 456.50927734375,
|
|
"step": 464
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -572.2319946289062,
|
|
"KL/mean": -770.283447265625,
|
|
"KL/rejected_KL_mean": -968.3349609375,
|
|
"KL/std": 461.42987060546875,
|
|
"epoch": 0.6828193832599119,
|
|
"fcm_dpo/beta": 0.0012997114099562168,
|
|
"fcm_dpo/delta": -0.12087617814540863,
|
|
"fcm_dpo/margin": 396.10296630859375,
|
|
"fcm_dpo/q_t": 0.38136354088783264,
|
|
"grad_norm": 36.88157653808594,
|
|
"learning_rate": 1.3971566441730714e-07,
|
|
"logits/chosen": -0.7635716199874878,
|
|
"logits/rejected": -0.7767517566680908,
|
|
"logps/chosen": -632.348876953125,
|
|
"logps/ref_chosen": -60.116851806640625,
|
|
"logps/ref_rejected": -113.94602966308594,
|
|
"logps/rejected": -1082.281005859375,
|
|
"loss": 1.0321,
|
|
"margin_dpo/margin_mean": 396.10296630859375,
|
|
"margin_dpo/margin_std": 493.5492858886719,
|
|
"step": 465
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -633.3134765625,
|
|
"KL/mean": -802.6712646484375,
|
|
"KL/rejected_KL_mean": -972.0289306640625,
|
|
"KL/std": 447.70947265625,
|
|
"epoch": 0.684287812041116,
|
|
"fcm_dpo/beta": 0.001274168025702238,
|
|
"fcm_dpo/delta": -0.03375307843089104,
|
|
"fcm_dpo/margin": 338.7154541015625,
|
|
"fcm_dpo/q_t": 0.4008180499076843,
|
|
"grad_norm": 33.14835739135742,
|
|
"learning_rate": 1.3856541105586545e-07,
|
|
"logits/chosen": -0.8126999139785767,
|
|
"logits/rejected": -0.8147940635681152,
|
|
"logps/chosen": -686.234375,
|
|
"logps/ref_chosen": -52.920921325683594,
|
|
"logps/ref_rejected": -90.3154296875,
|
|
"logps/rejected": -1062.3443603515625,
|
|
"loss": 1.0895,
|
|
"margin_dpo/margin_mean": 338.7154541015625,
|
|
"margin_dpo/margin_std": 476.6962890625,
|
|
"step": 466
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -806.89892578125,
|
|
"KL/mean": -979.3716430664062,
|
|
"KL/rejected_KL_mean": -1151.8443603515625,
|
|
"KL/std": 577.361572265625,
|
|
"epoch": 0.6857562408223201,
|
|
"fcm_dpo/beta": 0.001259978162124753,
|
|
"fcm_dpo/delta": -0.03823067247867584,
|
|
"fcm_dpo/margin": 344.94549560546875,
|
|
"fcm_dpo/q_t": 0.40663182735443115,
|
|
"grad_norm": 69.0197525024414,
|
|
"learning_rate": 1.3741809409947729e-07,
|
|
"logits/chosen": -0.920897901058197,
|
|
"logits/rejected": -0.8992031812667847,
|
|
"logps/chosen": -885.61474609375,
|
|
"logps/ref_chosen": -78.7158203125,
|
|
"logps/ref_rejected": -102.86019897460938,
|
|
"logps/rejected": -1254.70458984375,
|
|
"loss": 1.1674,
|
|
"margin_dpo/margin_mean": 344.94549560546875,
|
|
"margin_dpo/margin_std": 677.0098876953125,
|
|
"step": 467
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -625.35986328125,
|
|
"KL/mean": -837.527099609375,
|
|
"KL/rejected_KL_mean": -1049.6942138671875,
|
|
"KL/std": 500.7874755859375,
|
|
"epoch": 0.6872246696035242,
|
|
"fcm_dpo/beta": 0.0012417640537023544,
|
|
"fcm_dpo/delta": -0.13427412509918213,
|
|
"fcm_dpo/margin": 424.33441162109375,
|
|
"fcm_dpo/q_t": 0.3849431276321411,
|
|
"grad_norm": 38.766334533691406,
|
|
"learning_rate": 1.362737437810114e-07,
|
|
"logits/chosen": -0.869031548500061,
|
|
"logits/rejected": -0.8773350715637207,
|
|
"logps/chosen": -695.2952270507812,
|
|
"logps/ref_chosen": -69.93536376953125,
|
|
"logps/ref_rejected": -101.02880859375,
|
|
"logps/rejected": -1150.7230224609375,
|
|
"loss": 1.028,
|
|
"margin_dpo/margin_mean": 424.33441162109375,
|
|
"margin_dpo/margin_std": 557.9710693359375,
|
|
"step": 468
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -647.9295654296875,
|
|
"KL/mean": -849.9874267578125,
|
|
"KL/rejected_KL_mean": -1052.0452880859375,
|
|
"KL/std": 426.935791015625,
|
|
"epoch": 0.6886930983847284,
|
|
"fcm_dpo/beta": 0.0012096271384507418,
|
|
"fcm_dpo/delta": -0.09502536803483963,
|
|
"fcm_dpo/margin": 404.11572265625,
|
|
"fcm_dpo/q_t": 0.38767051696777344,
|
|
"grad_norm": 37.197357177734375,
|
|
"learning_rate": 1.351323902551631e-07,
|
|
"logits/chosen": -0.8826281428337097,
|
|
"logits/rejected": -0.8875974416732788,
|
|
"logps/chosen": -716.05419921875,
|
|
"logps/ref_chosen": -68.12469482421875,
|
|
"logps/ref_rejected": -104.78640747070312,
|
|
"logps/rejected": -1156.8316650390625,
|
|
"loss": 1.0321,
|
|
"margin_dpo/margin_mean": 404.11572265625,
|
|
"margin_dpo/margin_std": 464.22857666015625,
|
|
"step": 469
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -522.9632568359375,
|
|
"KL/mean": -709.4610595703125,
|
|
"KL/rejected_KL_mean": -895.9589233398438,
|
|
"KL/std": 429.58807373046875,
|
|
"epoch": 0.6901615271659325,
|
|
"fcm_dpo/beta": 0.001205753069370985,
|
|
"fcm_dpo/delta": -0.052081190049648285,
|
|
"fcm_dpo/margin": 372.99566650390625,
|
|
"fcm_dpo/q_t": 0.3957051932811737,
|
|
"grad_norm": 26.235424041748047,
|
|
"learning_rate": 1.339940635976592e-07,
|
|
"logits/chosen": -0.8188216686248779,
|
|
"logits/rejected": -0.8245443105697632,
|
|
"logps/chosen": -566.755126953125,
|
|
"logps/ref_chosen": -43.791927337646484,
|
|
"logps/ref_rejected": -82.70285034179688,
|
|
"logps/rejected": -978.6617431640625,
|
|
"loss": 1.0611,
|
|
"margin_dpo/margin_mean": 372.99566650390625,
|
|
"margin_dpo/margin_std": 466.9468078613281,
|
|
"step": 470
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -658.70751953125,
|
|
"KL/mean": -822.0084228515625,
|
|
"KL/rejected_KL_mean": -985.309326171875,
|
|
"KL/std": 467.14373779296875,
|
|
"epoch": 0.6916299559471366,
|
|
"fcm_dpo/beta": 0.0011935688089579344,
|
|
"fcm_dpo/delta": 0.009803693741559982,
|
|
"fcm_dpo/margin": 326.6017761230469,
|
|
"fcm_dpo/q_t": 0.41308581829071045,
|
|
"grad_norm": 32.16667938232422,
|
|
"learning_rate": 1.3285879380446563e-07,
|
|
"logits/chosen": -0.90375816822052,
|
|
"logits/rejected": -0.9042317271232605,
|
|
"logps/chosen": -722.0469970703125,
|
|
"logps/ref_chosen": -63.33952331542969,
|
|
"logps/ref_rejected": -83.61048126220703,
|
|
"logps/rejected": -1068.9197998046875,
|
|
"loss": 1.1159,
|
|
"margin_dpo/margin_mean": 326.601806640625,
|
|
"margin_dpo/margin_std": 494.537353515625,
|
|
"step": 471
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -641.023681640625,
|
|
"KL/mean": -837.814697265625,
|
|
"KL/rejected_KL_mean": -1034.605712890625,
|
|
"KL/std": 537.2503662109375,
|
|
"epoch": 0.6930983847283406,
|
|
"fcm_dpo/beta": 0.0011852658353745937,
|
|
"fcm_dpo/delta": -0.07024183124303818,
|
|
"fcm_dpo/margin": 393.58203125,
|
|
"fcm_dpo/q_t": 0.3995856046676636,
|
|
"grad_norm": 28.73564338684082,
|
|
"learning_rate": 1.317266107909975e-07,
|
|
"logits/chosen": -0.8875927925109863,
|
|
"logits/rejected": -0.8611509203910828,
|
|
"logps/chosen": -724.6898193359375,
|
|
"logps/ref_chosen": -83.66610717773438,
|
|
"logps/ref_rejected": -117.20919799804688,
|
|
"logps/rejected": -1151.81494140625,
|
|
"loss": 1.08,
|
|
"margin_dpo/margin_mean": 393.58203125,
|
|
"margin_dpo/margin_std": 579.6232299804688,
|
|
"step": 472
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -757.5256958007812,
|
|
"KL/mean": -852.3858642578125,
|
|
"KL/rejected_KL_mean": -947.24609375,
|
|
"KL/std": 554.10888671875,
|
|
"epoch": 0.6945668135095447,
|
|
"fcm_dpo/beta": 0.0012000746792182326,
|
|
"fcm_dpo/delta": 0.06275806576013565,
|
|
"fcm_dpo/margin": 189.72039794921875,
|
|
"fcm_dpo/q_t": 0.4505438506603241,
|
|
"grad_norm": 99.1730728149414,
|
|
"learning_rate": 1.3059754439133002e-07,
|
|
"logits/chosen": -0.8745533227920532,
|
|
"logits/rejected": -0.8379828929901123,
|
|
"logps/chosen": -821.022705078125,
|
|
"logps/ref_chosen": -63.49696731567383,
|
|
"logps/ref_rejected": -81.14657592773438,
|
|
"logps/rejected": -1028.3927001953125,
|
|
"loss": 1.3228,
|
|
"margin_dpo/margin_mean": 189.72039794921875,
|
|
"margin_dpo/margin_std": 697.6124267578125,
|
|
"step": 473
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -628.0360107421875,
|
|
"KL/mean": -785.1285400390625,
|
|
"KL/rejected_KL_mean": -942.2210693359375,
|
|
"KL/std": 488.5404052734375,
|
|
"epoch": 0.6960352422907489,
|
|
"fcm_dpo/beta": 0.0011911317706108093,
|
|
"fcm_dpo/delta": -0.0750809758901596,
|
|
"fcm_dpo/margin": 314.18511962890625,
|
|
"fcm_dpo/q_t": 0.41488319635391235,
|
|
"grad_norm": 34.411014556884766,
|
|
"learning_rate": 1.2947162435741277e-07,
|
|
"logits/chosen": -0.820152223110199,
|
|
"logits/rejected": -0.8232827186584473,
|
|
"logps/chosen": -680.64794921875,
|
|
"logps/ref_chosen": -52.6119384765625,
|
|
"logps/ref_rejected": -90.08041381835938,
|
|
"logps/rejected": -1032.301513671875,
|
|
"loss": 1.1519,
|
|
"margin_dpo/margin_mean": 314.1850891113281,
|
|
"margin_dpo/margin_std": 546.5555419921875,
|
|
"step": 474
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -469.183837890625,
|
|
"KL/mean": -671.3321533203125,
|
|
"KL/rejected_KL_mean": -873.4803466796875,
|
|
"KL/std": 405.08099365234375,
|
|
"epoch": 0.697503671071953,
|
|
"fcm_dpo/beta": 0.0011684303171932697,
|
|
"fcm_dpo/delta": -0.07601547241210938,
|
|
"fcm_dpo/margin": 404.2965087890625,
|
|
"fcm_dpo/q_t": 0.39084818959236145,
|
|
"grad_norm": 29.762432098388672,
|
|
"learning_rate": 1.2834888035828596e-07,
|
|
"logits/chosen": -0.8730387687683105,
|
|
"logits/rejected": -0.896369993686676,
|
|
"logps/chosen": -511.6790466308594,
|
|
"logps/ref_chosen": -42.49519348144531,
|
|
"logps/ref_rejected": -90.06294250488281,
|
|
"logps/rejected": -963.5432739257812,
|
|
"loss": 1.0303,
|
|
"margin_dpo/margin_mean": 404.2965087890625,
|
|
"margin_dpo/margin_std": 443.20880126953125,
|
|
"step": 475
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -576.46826171875,
|
|
"KL/mean": -734.1749267578125,
|
|
"KL/rejected_KL_mean": -891.881591796875,
|
|
"KL/std": 429.33795166015625,
|
|
"epoch": 0.6989720998531571,
|
|
"fcm_dpo/beta": 0.0011697396403178573,
|
|
"fcm_dpo/delta": 0.03224332630634308,
|
|
"fcm_dpo/margin": 315.4133605957031,
|
|
"fcm_dpo/q_t": 0.41562318801879883,
|
|
"grad_norm": 34.20625686645508,
|
|
"learning_rate": 1.2722934197929802e-07,
|
|
"logits/chosen": -0.8082433342933655,
|
|
"logits/rejected": -0.8174213171005249,
|
|
"logps/chosen": -619.4176025390625,
|
|
"logps/ref_chosen": -42.94938278198242,
|
|
"logps/ref_rejected": -73.71023559570312,
|
|
"logps/rejected": -965.591796875,
|
|
"loss": 1.1123,
|
|
"margin_dpo/margin_mean": 315.4133605957031,
|
|
"margin_dpo/margin_std": 440.5125732421875,
|
|
"step": 476
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -598.4927978515625,
|
|
"KL/mean": -759.2872314453125,
|
|
"KL/rejected_KL_mean": -920.0816650390625,
|
|
"KL/std": 456.7637939453125,
|
|
"epoch": 0.7004405286343612,
|
|
"fcm_dpo/beta": 0.0011787796393036842,
|
|
"fcm_dpo/delta": 0.02154078520834446,
|
|
"fcm_dpo/margin": 321.58892822265625,
|
|
"fcm_dpo/q_t": 0.4136529564857483,
|
|
"grad_norm": 26.809673309326172,
|
|
"learning_rate": 1.2611303872132631e-07,
|
|
"logits/chosen": -0.860885739326477,
|
|
"logits/rejected": -0.8121699094772339,
|
|
"logps/chosen": -669.265380859375,
|
|
"logps/ref_chosen": -70.77261352539062,
|
|
"logps/ref_rejected": -76.13737487792969,
|
|
"logps/rejected": -996.2190551757812,
|
|
"loss": 1.1364,
|
|
"margin_dpo/margin_mean": 321.5888977050781,
|
|
"margin_dpo/margin_std": 541.0950927734375,
|
|
"step": 477
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -491.0763854980469,
|
|
"KL/mean": -672.4014892578125,
|
|
"KL/rejected_KL_mean": -853.7266845703125,
|
|
"KL/std": 389.57879638671875,
|
|
"epoch": 0.7019089574155654,
|
|
"fcm_dpo/beta": 0.0011755165178328753,
|
|
"fcm_dpo/delta": -0.02756289392709732,
|
|
"fcm_dpo/margin": 362.6502685546875,
|
|
"fcm_dpo/q_t": 0.4015880227088928,
|
|
"grad_norm": 28.476211547851562,
|
|
"learning_rate": 1.2500000000000005e-07,
|
|
"logits/chosen": -0.7467737197875977,
|
|
"logits/rejected": -0.7583505511283875,
|
|
"logps/chosen": -532.5169067382812,
|
|
"logps/ref_chosen": -41.440513610839844,
|
|
"logps/ref_rejected": -85.36196899414062,
|
|
"logps/rejected": -939.088623046875,
|
|
"loss": 1.0715,
|
|
"margin_dpo/margin_mean": 362.6502685546875,
|
|
"margin_dpo/margin_std": 451.751708984375,
|
|
"step": 478
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -616.0745849609375,
|
|
"KL/mean": -788.5822143554688,
|
|
"KL/rejected_KL_mean": -961.08984375,
|
|
"KL/std": 471.95684814453125,
|
|
"epoch": 0.7033773861967695,
|
|
"fcm_dpo/beta": 0.001176186604425311,
|
|
"fcm_dpo/delta": -0.0067070163786411285,
|
|
"fcm_dpo/margin": 345.01519775390625,
|
|
"fcm_dpo/q_t": 0.40878403186798096,
|
|
"grad_norm": 28.581802368164062,
|
|
"learning_rate": 1.2389025514492456e-07,
|
|
"logits/chosen": -0.7714790105819702,
|
|
"logits/rejected": -0.794031023979187,
|
|
"logps/chosen": -669.9825439453125,
|
|
"logps/ref_chosen": -53.907920837402344,
|
|
"logps/ref_rejected": -95.1163330078125,
|
|
"logps/rejected": -1056.2061767578125,
|
|
"loss": 1.1094,
|
|
"margin_dpo/margin_mean": 345.01519775390625,
|
|
"margin_dpo/margin_std": 522.553955078125,
|
|
"step": 479
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -697.255126953125,
|
|
"KL/mean": -839.0286865234375,
|
|
"KL/rejected_KL_mean": -980.8023071289062,
|
|
"KL/std": 442.7588806152344,
|
|
"epoch": 0.7048458149779736,
|
|
"fcm_dpo/beta": 0.00116480584256351,
|
|
"fcm_dpo/delta": -0.03519085794687271,
|
|
"fcm_dpo/margin": 283.5472106933594,
|
|
"fcm_dpo/q_t": 0.42370662093162537,
|
|
"grad_norm": 48.71305847167969,
|
|
"learning_rate": 1.227838333989088e-07,
|
|
"logits/chosen": -0.7846644520759583,
|
|
"logits/rejected": -0.7700395584106445,
|
|
"logps/chosen": -755.9378051757812,
|
|
"logps/ref_chosen": -58.682701110839844,
|
|
"logps/ref_rejected": -82.93248748779297,
|
|
"logps/rejected": -1063.73486328125,
|
|
"loss": 1.1653,
|
|
"margin_dpo/margin_mean": 283.5472412109375,
|
|
"margin_dpo/margin_std": 481.92913818359375,
|
|
"step": 480
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -606.09716796875,
|
|
"KL/mean": -816.431640625,
|
|
"KL/rejected_KL_mean": -1026.76611328125,
|
|
"KL/std": 473.70269775390625,
|
|
"epoch": 0.7063142437591777,
|
|
"fcm_dpo/beta": 0.0011470152530819178,
|
|
"fcm_dpo/delta": -0.08663056790828705,
|
|
"fcm_dpo/margin": 420.6690368652344,
|
|
"fcm_dpo/q_t": 0.39048588275909424,
|
|
"grad_norm": 37.28335189819336,
|
|
"learning_rate": 1.2168076391719489e-07,
|
|
"logits/chosen": -0.8456419110298157,
|
|
"logits/rejected": -0.8666043281555176,
|
|
"logps/chosen": -661.0614013671875,
|
|
"logps/ref_chosen": -54.964271545410156,
|
|
"logps/ref_rejected": -92.42044067382812,
|
|
"logps/rejected": -1119.1866455078125,
|
|
"loss": 1.0382,
|
|
"margin_dpo/margin_mean": 420.6690368652344,
|
|
"margin_dpo/margin_std": 509.59088134765625,
|
|
"step": 481
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -669.8154907226562,
|
|
"KL/mean": -776.3551025390625,
|
|
"KL/rejected_KL_mean": -882.894775390625,
|
|
"KL/std": 481.4432373046875,
|
|
"epoch": 0.7077826725403817,
|
|
"fcm_dpo/beta": 0.0011683362536132336,
|
|
"fcm_dpo/delta": 0.15478384494781494,
|
|
"fcm_dpo/margin": 213.07933044433594,
|
|
"fcm_dpo/q_t": 0.4412637948989868,
|
|
"grad_norm": 46.77339172363281,
|
|
"learning_rate": 1.2058107576668938e-07,
|
|
"logits/chosen": -0.7833594083786011,
|
|
"logits/rejected": -0.7691007852554321,
|
|
"logps/chosen": -737.368896484375,
|
|
"logps/ref_chosen": -67.553466796875,
|
|
"logps/ref_rejected": -87.58953857421875,
|
|
"logps/rejected": -970.4843139648438,
|
|
"loss": 1.2584,
|
|
"margin_dpo/margin_mean": 213.07931518554688,
|
|
"margin_dpo/margin_std": 576.9671630859375,
|
|
"step": 482
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -593.7992553710938,
|
|
"KL/mean": -821.1121826171875,
|
|
"KL/rejected_KL_mean": -1048.425048828125,
|
|
"KL/std": 477.17486572265625,
|
|
"epoch": 0.7092511013215859,
|
|
"fcm_dpo/beta": 0.0011535545345395803,
|
|
"fcm_dpo/delta": -0.13136449456214905,
|
|
"fcm_dpo/margin": 454.625732421875,
|
|
"fcm_dpo/q_t": 0.3826148509979248,
|
|
"grad_norm": 31.144506454467773,
|
|
"learning_rate": 1.194847979251979e-07,
|
|
"logits/chosen": -0.871976375579834,
|
|
"logits/rejected": -0.878372311592102,
|
|
"logps/chosen": -657.1290893554688,
|
|
"logps/ref_chosen": -63.32981872558594,
|
|
"logps/ref_rejected": -95.78697204589844,
|
|
"logps/rejected": -1144.2119140625,
|
|
"loss": 1.0182,
|
|
"margin_dpo/margin_mean": 454.6257629394531,
|
|
"margin_dpo/margin_std": 549.6389770507812,
|
|
"step": 483
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -519.2157592773438,
|
|
"KL/mean": -720.3311767578125,
|
|
"KL/rejected_KL_mean": -921.4464721679688,
|
|
"KL/std": 480.59307861328125,
|
|
"epoch": 0.71071953010279,
|
|
"fcm_dpo/beta": 0.0011427226709201932,
|
|
"fcm_dpo/delta": -0.06293704360723495,
|
|
"fcm_dpo/margin": 402.230712890625,
|
|
"fcm_dpo/q_t": 0.39542460441589355,
|
|
"grad_norm": 40.899009704589844,
|
|
"learning_rate": 1.1839195928066101e-07,
|
|
"logits/chosen": -0.8334769010543823,
|
|
"logits/rejected": -0.8463296890258789,
|
|
"logps/chosen": -578.3538818359375,
|
|
"logps/ref_chosen": -59.13812255859375,
|
|
"logps/ref_rejected": -84.37144470214844,
|
|
"logps/rejected": -1005.8179321289062,
|
|
"loss": 1.0499,
|
|
"margin_dpo/margin_mean": 402.230712890625,
|
|
"margin_dpo/margin_std": 477.49810791015625,
|
|
"step": 484
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -550.48193359375,
|
|
"KL/mean": -742.0205078125,
|
|
"KL/rejected_KL_mean": -933.5590209960938,
|
|
"KL/std": 464.6655578613281,
|
|
"epoch": 0.7121879588839941,
|
|
"fcm_dpo/beta": 0.0011293399147689342,
|
|
"fcm_dpo/delta": -0.03417329490184784,
|
|
"fcm_dpo/margin": 383.07708740234375,
|
|
"fcm_dpo/q_t": 0.40266501903533936,
|
|
"grad_norm": 32.848182678222656,
|
|
"learning_rate": 1.1730258863039347e-07,
|
|
"logits/chosen": -0.8008699417114258,
|
|
"logits/rejected": -0.8162240386009216,
|
|
"logps/chosen": -609.33154296875,
|
|
"logps/ref_chosen": -58.849571228027344,
|
|
"logps/ref_rejected": -103.36408233642578,
|
|
"logps/rejected": -1036.923095703125,
|
|
"loss": 1.0859,
|
|
"margin_dpo/margin_mean": 383.07708740234375,
|
|
"margin_dpo/margin_std": 541.0684814453125,
|
|
"step": 485
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -621.676513671875,
|
|
"KL/mean": -835.73876953125,
|
|
"KL/rejected_KL_mean": -1049.801025390625,
|
|
"KL/std": 494.6927185058594,
|
|
"epoch": 0.7136563876651982,
|
|
"fcm_dpo/beta": 0.0011101996060460806,
|
|
"fcm_dpo/delta": -0.079122394323349,
|
|
"fcm_dpo/margin": 428.12445068359375,
|
|
"fcm_dpo/q_t": 0.39419782161712646,
|
|
"grad_norm": 31.21572494506836,
|
|
"learning_rate": 1.1621671468032493e-07,
|
|
"logits/chosen": -0.908329963684082,
|
|
"logits/rejected": -0.9156872034072876,
|
|
"logps/chosen": -676.9362182617188,
|
|
"logps/ref_chosen": -55.25966262817383,
|
|
"logps/ref_rejected": -92.13936614990234,
|
|
"logps/rejected": -1141.9404296875,
|
|
"loss": 1.072,
|
|
"margin_dpo/margin_mean": 428.12445068359375,
|
|
"margin_dpo/margin_std": 619.9473876953125,
|
|
"step": 486
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -667.73486328125,
|
|
"KL/mean": -831.6156616210938,
|
|
"KL/rejected_KL_mean": -995.4964599609375,
|
|
"KL/std": 467.8983154296875,
|
|
"epoch": 0.7151248164464024,
|
|
"fcm_dpo/beta": 0.0011161823058500886,
|
|
"fcm_dpo/delta": 0.0350751131772995,
|
|
"fcm_dpo/margin": 327.7616271972656,
|
|
"fcm_dpo/q_t": 0.41450557112693787,
|
|
"grad_norm": 42.0443229675293,
|
|
"learning_rate": 1.1513436604424378e-07,
|
|
"logits/chosen": -0.8875995874404907,
|
|
"logits/rejected": -0.891254186630249,
|
|
"logps/chosen": -720.7981567382812,
|
|
"logps/ref_chosen": -53.06330871582031,
|
|
"logps/ref_rejected": -92.41883087158203,
|
|
"logps/rejected": -1087.915283203125,
|
|
"loss": 1.125,
|
|
"margin_dpo/margin_mean": 327.7616271972656,
|
|
"margin_dpo/margin_std": 490.9307861328125,
|
|
"step": 487
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -553.8161010742188,
|
|
"KL/mean": -718.7296142578125,
|
|
"KL/rejected_KL_mean": -883.6431884765625,
|
|
"KL/std": 415.6312255859375,
|
|
"epoch": 0.7165932452276065,
|
|
"fcm_dpo/beta": 0.0011240593157708645,
|
|
"fcm_dpo/delta": 0.02986850030720234,
|
|
"fcm_dpo/margin": 329.82708740234375,
|
|
"fcm_dpo/q_t": 0.4142517149448395,
|
|
"grad_norm": 32.32245635986328,
|
|
"learning_rate": 1.1405557124304335e-07,
|
|
"logits/chosen": -0.8679848313331604,
|
|
"logits/rejected": -0.8698484301567078,
|
|
"logps/chosen": -606.0442504882812,
|
|
"logps/ref_chosen": -52.22815704345703,
|
|
"logps/ref_rejected": -84.00656127929688,
|
|
"logps/rejected": -967.6497802734375,
|
|
"loss": 1.1052,
|
|
"margin_dpo/margin_mean": 329.82708740234375,
|
|
"margin_dpo/margin_std": 427.99810791015625,
|
|
"step": 488
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -516.3485107421875,
|
|
"KL/mean": -680.156982421875,
|
|
"KL/rejected_KL_mean": -843.9655151367188,
|
|
"KL/std": 421.83648681640625,
|
|
"epoch": 0.7180616740088106,
|
|
"fcm_dpo/beta": 0.001128336414694786,
|
|
"fcm_dpo/delta": 0.031364768743515015,
|
|
"fcm_dpo/margin": 327.616943359375,
|
|
"fcm_dpo/q_t": 0.4163089990615845,
|
|
"grad_norm": 32.1641960144043,
|
|
"learning_rate": 1.1298035870396985e-07,
|
|
"logits/chosen": -0.9042928218841553,
|
|
"logits/rejected": -0.8924728035926819,
|
|
"logps/chosen": -572.338134765625,
|
|
"logps/ref_chosen": -55.989627838134766,
|
|
"logps/ref_rejected": -79.39812469482422,
|
|
"logps/rejected": -923.3636474609375,
|
|
"loss": 1.1145,
|
|
"margin_dpo/margin_mean": 327.616943359375,
|
|
"margin_dpo/margin_std": 469.63079833984375,
|
|
"step": 489
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -605.282958984375,
|
|
"KL/mean": -779.2673950195312,
|
|
"KL/rejected_KL_mean": -953.2518310546875,
|
|
"KL/std": 508.89208984375,
|
|
"epoch": 0.7195301027900147,
|
|
"fcm_dpo/beta": 0.001132056349888444,
|
|
"fcm_dpo/delta": 0.006162045523524284,
|
|
"fcm_dpo/margin": 347.96881103515625,
|
|
"fcm_dpo/q_t": 0.4120814800262451,
|
|
"grad_norm": 57.53245544433594,
|
|
"learning_rate": 1.1190875675987355e-07,
|
|
"logits/chosen": -0.9138531684875488,
|
|
"logits/rejected": -0.9565045833587646,
|
|
"logps/chosen": -657.6494140625,
|
|
"logps/ref_chosen": -52.36639404296875,
|
|
"logps/ref_rejected": -110.4090576171875,
|
|
"logps/rejected": -1063.660888671875,
|
|
"loss": 1.1397,
|
|
"margin_dpo/margin_mean": 347.96881103515625,
|
|
"margin_dpo/margin_std": 604.8751220703125,
|
|
"step": 490
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -560.5130615234375,
|
|
"KL/mean": -677.8720703125,
|
|
"KL/rejected_KL_mean": -795.2310791015625,
|
|
"KL/std": 434.89801025390625,
|
|
"epoch": 0.7209985315712188,
|
|
"fcm_dpo/beta": 0.0011535290395841002,
|
|
"fcm_dpo/delta": 0.1326218992471695,
|
|
"fcm_dpo/margin": 234.71804809570312,
|
|
"fcm_dpo/q_t": 0.438266396522522,
|
|
"grad_norm": 30.0527400970459,
|
|
"learning_rate": 1.1084079364846241e-07,
|
|
"logits/chosen": -0.887237548828125,
|
|
"logits/rejected": -0.8714909553527832,
|
|
"logps/chosen": -620.6292724609375,
|
|
"logps/ref_chosen": -60.11626434326172,
|
|
"logps/ref_rejected": -73.27278900146484,
|
|
"logps/rejected": -868.50390625,
|
|
"loss": 1.195,
|
|
"margin_dpo/margin_mean": 234.7180633544922,
|
|
"margin_dpo/margin_std": 441.885009765625,
|
|
"step": 491
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -571.1317138671875,
|
|
"KL/mean": -690.6876220703125,
|
|
"KL/rejected_KL_mean": -810.2435302734375,
|
|
"KL/std": 437.35736083984375,
|
|
"epoch": 0.7224669603524229,
|
|
"fcm_dpo/beta": 0.0011779199121519923,
|
|
"fcm_dpo/delta": 0.121956005692482,
|
|
"fcm_dpo/margin": 239.11178588867188,
|
|
"fcm_dpo/q_t": 0.4370569884777069,
|
|
"grad_norm": 29.874557495117188,
|
|
"learning_rate": 1.097764975115576e-07,
|
|
"logits/chosen": -0.9431591033935547,
|
|
"logits/rejected": -0.9226012229919434,
|
|
"logps/chosen": -625.1259155273438,
|
|
"logps/ref_chosen": -53.994178771972656,
|
|
"logps/ref_rejected": -72.65962219238281,
|
|
"logps/rejected": -882.9031372070312,
|
|
"loss": 1.2094,
|
|
"margin_dpo/margin_mean": 239.1117706298828,
|
|
"margin_dpo/margin_std": 507.32025146484375,
|
|
"step": 492
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -595.4638061523438,
|
|
"KL/mean": -726.3221435546875,
|
|
"KL/rejected_KL_mean": -857.1805419921875,
|
|
"KL/std": 469.4154968261719,
|
|
"epoch": 0.723935389133627,
|
|
"fcm_dpo/beta": 0.0011845249682664871,
|
|
"fcm_dpo/delta": -0.016040312126278877,
|
|
"fcm_dpo/margin": 261.7166748046875,
|
|
"fcm_dpo/q_t": 0.42683732509613037,
|
|
"grad_norm": 33.20534896850586,
|
|
"learning_rate": 1.0871589639435203e-07,
|
|
"logits/chosen": -0.9668236970901489,
|
|
"logits/rejected": -0.9259661436080933,
|
|
"logps/chosen": -670.9610595703125,
|
|
"logps/ref_chosen": -75.49723815917969,
|
|
"logps/ref_rejected": -87.32301330566406,
|
|
"logps/rejected": -944.5035400390625,
|
|
"loss": 1.1687,
|
|
"margin_dpo/margin_mean": 261.7166748046875,
|
|
"margin_dpo/margin_std": 446.52606201171875,
|
|
"step": 493
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -478.02276611328125,
|
|
"KL/mean": -682.6138916015625,
|
|
"KL/rejected_KL_mean": -887.2049560546875,
|
|
"KL/std": 429.03424072265625,
|
|
"epoch": 0.7254038179148311,
|
|
"fcm_dpo/beta": 0.0011693753767758608,
|
|
"fcm_dpo/delta": -0.08248934149742126,
|
|
"fcm_dpo/margin": 409.18218994140625,
|
|
"fcm_dpo/q_t": 0.38879674673080444,
|
|
"grad_norm": 43.84877395629883,
|
|
"learning_rate": 1.0765901824467166e-07,
|
|
"logits/chosen": -0.7634121179580688,
|
|
"logits/rejected": -0.7962794303894043,
|
|
"logps/chosen": -519.382080078125,
|
|
"logps/ref_chosen": -41.35926818847656,
|
|
"logps/ref_rejected": -86.09136962890625,
|
|
"logps/rejected": -973.29638671875,
|
|
"loss": 1.0237,
|
|
"margin_dpo/margin_mean": 409.18218994140625,
|
|
"margin_dpo/margin_std": 432.674072265625,
|
|
"step": 494
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -547.5703125,
|
|
"KL/mean": -727.9459228515625,
|
|
"KL/rejected_KL_mean": -908.321533203125,
|
|
"KL/std": 453.545654296875,
|
|
"epoch": 0.7268722466960352,
|
|
"fcm_dpo/beta": 0.0011636005947366357,
|
|
"fcm_dpo/delta": -0.020662881433963776,
|
|
"fcm_dpo/margin": 360.75115966796875,
|
|
"fcm_dpo/q_t": 0.40708619356155396,
|
|
"grad_norm": 42.19923782348633,
|
|
"learning_rate": 1.0660589091223854e-07,
|
|
"logits/chosen": -0.9473215937614441,
|
|
"logits/rejected": -0.9521135091781616,
|
|
"logps/chosen": -611.1054077148438,
|
|
"logps/ref_chosen": -63.53507995605469,
|
|
"logps/ref_rejected": -91.42443084716797,
|
|
"logps/rejected": -999.7459716796875,
|
|
"loss": 1.0991,
|
|
"margin_dpo/margin_mean": 360.7511901855469,
|
|
"margin_dpo/margin_std": 544.379638671875,
|
|
"step": 495
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -643.5244750976562,
|
|
"KL/mean": -745.2659912109375,
|
|
"KL/rejected_KL_mean": -847.007568359375,
|
|
"KL/std": 351.7397766113281,
|
|
"epoch": 0.7283406754772394,
|
|
"fcm_dpo/beta": 0.0011893340852111578,
|
|
"fcm_dpo/delta": 0.1618269979953766,
|
|
"fcm_dpo/margin": 203.4831085205078,
|
|
"fcm_dpo/q_t": 0.44347870349884033,
|
|
"grad_norm": 57.562477111816406,
|
|
"learning_rate": 1.0555654214793722e-07,
|
|
"logits/chosen": -0.9014628529548645,
|
|
"logits/rejected": -0.8666530251502991,
|
|
"logps/chosen": -716.1163940429688,
|
|
"logps/ref_chosen": -72.5919189453125,
|
|
"logps/ref_rejected": -84.32933807373047,
|
|
"logps/rejected": -931.3369140625,
|
|
"loss": 1.2108,
|
|
"margin_dpo/margin_mean": 203.48312377929688,
|
|
"margin_dpo/margin_std": 389.7113037109375,
|
|
"step": 496
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -629.3239135742188,
|
|
"KL/mean": -727.7587890625,
|
|
"KL/rejected_KL_mean": -826.1937255859375,
|
|
"KL/std": 427.0351867675781,
|
|
"epoch": 0.7298091042584435,
|
|
"fcm_dpo/beta": 0.001203190186060965,
|
|
"fcm_dpo/delta": 0.019497813656926155,
|
|
"fcm_dpo/margin": 196.86973571777344,
|
|
"fcm_dpo/q_t": 0.4448572099208832,
|
|
"grad_norm": 40.526710510253906,
|
|
"learning_rate": 1.0451099960308374e-07,
|
|
"logits/chosen": -0.879808783531189,
|
|
"logits/rejected": -0.8550307154655457,
|
|
"logps/chosen": -687.9178466796875,
|
|
"logps/ref_chosen": -58.59397506713867,
|
|
"logps/ref_rejected": -76.28836822509766,
|
|
"logps/rejected": -902.4820556640625,
|
|
"loss": 1.2269,
|
|
"margin_dpo/margin_mean": 196.86973571777344,
|
|
"margin_dpo/margin_std": 415.5872802734375,
|
|
"step": 497
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -624.457275390625,
|
|
"KL/mean": -773.5231323242188,
|
|
"KL/rejected_KL_mean": -922.5890502929688,
|
|
"KL/std": 460.3760986328125,
|
|
"epoch": 0.7312775330396476,
|
|
"fcm_dpo/beta": 0.00121046113781631,
|
|
"fcm_dpo/delta": 0.04058893769979477,
|
|
"fcm_dpo/margin": 298.13177490234375,
|
|
"fcm_dpo/q_t": 0.416517436504364,
|
|
"grad_norm": 28.048709869384766,
|
|
"learning_rate": 1.0346929082869641e-07,
|
|
"logits/chosen": -0.8946305513381958,
|
|
"logits/rejected": -0.874454140663147,
|
|
"logps/chosen": -695.6629028320312,
|
|
"logps/ref_chosen": -71.20565795898438,
|
|
"logps/ref_rejected": -83.95803833007812,
|
|
"logps/rejected": -1006.547119140625,
|
|
"loss": 1.1448,
|
|
"margin_dpo/margin_mean": 298.1318054199219,
|
|
"margin_dpo/margin_std": 504.37261962890625,
|
|
"step": 498
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -535.3038940429688,
|
|
"KL/mean": -735.150146484375,
|
|
"KL/rejected_KL_mean": -934.9964599609375,
|
|
"KL/std": 470.1824951171875,
|
|
"epoch": 0.7327459618208517,
|
|
"fcm_dpo/beta": 0.0011967134196311235,
|
|
"fcm_dpo/delta": -0.08238838613033295,
|
|
"fcm_dpo/margin": 399.6925964355469,
|
|
"fcm_dpo/q_t": 0.39139658212661743,
|
|
"grad_norm": 45.138729095458984,
|
|
"learning_rate": 1.0243144327477013e-07,
|
|
"logits/chosen": -0.8791499733924866,
|
|
"logits/rejected": -0.9143052697181702,
|
|
"logps/chosen": -586.55908203125,
|
|
"logps/ref_chosen": -51.25519561767578,
|
|
"logps/ref_rejected": -101.07870483398438,
|
|
"logps/rejected": -1036.0751953125,
|
|
"loss": 1.0497,
|
|
"margin_dpo/margin_mean": 399.692626953125,
|
|
"margin_dpo/margin_std": 514.5656127929688,
|
|
"step": 499
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -659.9070434570312,
|
|
"KL/mean": -845.4681396484375,
|
|
"KL/rejected_KL_mean": -1031.0291748046875,
|
|
"KL/std": 449.5928039550781,
|
|
"epoch": 0.7342143906020558,
|
|
"fcm_dpo/beta": 0.0011844468535855412,
|
|
"fcm_dpo/delta": -0.04147026687860489,
|
|
"fcm_dpo/margin": 371.12213134765625,
|
|
"fcm_dpo/q_t": 0.4010527729988098,
|
|
"grad_norm": 30.78356170654297,
|
|
"learning_rate": 1.0139748428955333e-07,
|
|
"logits/chosen": -0.8759046792984009,
|
|
"logits/rejected": -0.9078898429870605,
|
|
"logps/chosen": -716.9344482421875,
|
|
"logps/ref_chosen": -57.027442932128906,
|
|
"logps/ref_rejected": -93.93421173095703,
|
|
"logps/rejected": -1124.96337890625,
|
|
"loss": 1.0946,
|
|
"margin_dpo/margin_mean": 371.12213134765625,
|
|
"margin_dpo/margin_std": 553.8121337890625,
|
|
"step": 500
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -582.7872924804688,
|
|
"KL/mean": -753.5026245117188,
|
|
"KL/rejected_KL_mean": -924.218017578125,
|
|
"KL/std": 446.186767578125,
|
|
"epoch": 0.73568281938326,
|
|
"fcm_dpo/beta": 0.001185835339128971,
|
|
"fcm_dpo/delta": -0.005243198946118355,
|
|
"fcm_dpo/margin": 341.43072509765625,
|
|
"fcm_dpo/q_t": 0.4099680185317993,
|
|
"grad_norm": 37.05495834350586,
|
|
"learning_rate": 1.0036744111882672e-07,
|
|
"logits/chosen": -0.8284963369369507,
|
|
"logits/rejected": -0.8059309720993042,
|
|
"logps/chosen": -637.1468505859375,
|
|
"logps/ref_chosen": -54.359527587890625,
|
|
"logps/ref_rejected": -80.15670013427734,
|
|
"logps/rejected": -1004.3746948242188,
|
|
"loss": 1.1294,
|
|
"margin_dpo/margin_mean": 341.4306640625,
|
|
"margin_dpo/margin_std": 574.979248046875,
|
|
"step": 501
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -551.6671142578125,
|
|
"KL/mean": -723.5264282226562,
|
|
"KL/rejected_KL_mean": -895.3857421875,
|
|
"KL/std": 402.4056091308594,
|
|
"epoch": 0.737151248164464,
|
|
"fcm_dpo/beta": 0.0011830935254693031,
|
|
"fcm_dpo/delta": -0.006993459537625313,
|
|
"fcm_dpo/margin": 343.71856689453125,
|
|
"fcm_dpo/q_t": 0.40695464611053467,
|
|
"grad_norm": 26.800430297851562,
|
|
"learning_rate": 9.934134090518592e-08,
|
|
"logits/chosen": -0.8308136463165283,
|
|
"logits/rejected": -0.8038866519927979,
|
|
"logps/chosen": -619.2677001953125,
|
|
"logps/ref_chosen": -67.60050964355469,
|
|
"logps/ref_rejected": -82.94876098632812,
|
|
"logps/rejected": -978.33447265625,
|
|
"loss": 1.0823,
|
|
"margin_dpo/margin_mean": 343.71856689453125,
|
|
"margin_dpo/margin_std": 436.3647155761719,
|
|
"step": 502
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -547.663330078125,
|
|
"KL/mean": -710.1171875,
|
|
"KL/rejected_KL_mean": -872.571044921875,
|
|
"KL/std": 404.2541809082031,
|
|
"epoch": 0.7386196769456681,
|
|
"fcm_dpo/beta": 0.001180183608084917,
|
|
"fcm_dpo/delta": 0.017100892961025238,
|
|
"fcm_dpo/margin": 324.90771484375,
|
|
"fcm_dpo/q_t": 0.41350266337394714,
|
|
"grad_norm": 32.493919372558594,
|
|
"learning_rate": 9.831921068732571e-08,
|
|
"logits/chosen": -0.7998204231262207,
|
|
"logits/rejected": -0.7804523706436157,
|
|
"logps/chosen": -602.74169921875,
|
|
"logps/ref_chosen": -55.078407287597656,
|
|
"logps/ref_rejected": -82.50544738769531,
|
|
"logps/rejected": -955.0765380859375,
|
|
"loss": 1.1058,
|
|
"margin_dpo/margin_mean": 324.90771484375,
|
|
"margin_dpo/margin_std": 457.2688293457031,
|
|
"step": 503
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -594.047607421875,
|
|
"KL/mean": -796.29931640625,
|
|
"KL/rejected_KL_mean": -998.551025390625,
|
|
"KL/std": 500.2449645996094,
|
|
"epoch": 0.7400881057268722,
|
|
"fcm_dpo/beta": 0.0011717536253854632,
|
|
"fcm_dpo/delta": -0.07761284708976746,
|
|
"fcm_dpo/margin": 404.5033874511719,
|
|
"fcm_dpo/q_t": 0.39360561966896057,
|
|
"grad_norm": 34.34120178222656,
|
|
"learning_rate": 9.730107739932805e-08,
|
|
"logits/chosen": -0.849440336227417,
|
|
"logits/rejected": -0.8721122741699219,
|
|
"logps/chosen": -654.0133666992188,
|
|
"logps/ref_chosen": -59.96575164794922,
|
|
"logps/ref_rejected": -103.76212310791016,
|
|
"logps/rejected": -1102.3131103515625,
|
|
"loss": 1.0663,
|
|
"margin_dpo/margin_mean": 404.50335693359375,
|
|
"margin_dpo/margin_std": 555.4103393554688,
|
|
"step": 504
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -663.4384155273438,
|
|
"KL/mean": -768.9776611328125,
|
|
"KL/rejected_KL_mean": -874.516845703125,
|
|
"KL/std": 459.5237731933594,
|
|
"epoch": 0.7415565345080763,
|
|
"fcm_dpo/beta": 0.0011943629942834377,
|
|
"fcm_dpo/delta": 0.15151187777519226,
|
|
"fcm_dpo/margin": 211.07833862304688,
|
|
"fcm_dpo/q_t": 0.4423941969871521,
|
|
"grad_norm": 45.66891098022461,
|
|
"learning_rate": 9.628696786995188e-08,
|
|
"logits/chosen": -0.8771257400512695,
|
|
"logits/rejected": -0.8454539775848389,
|
|
"logps/chosen": -739.5933227539062,
|
|
"logps/ref_chosen": -76.1549072265625,
|
|
"logps/ref_rejected": -88.58537292480469,
|
|
"logps/rejected": -963.1021728515625,
|
|
"loss": 1.2136,
|
|
"margin_dpo/margin_mean": 211.07835388183594,
|
|
"margin_dpo/margin_std": 428.6888427734375,
|
|
"step": 505
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -519.6373291015625,
|
|
"KL/mean": -697.4900512695312,
|
|
"KL/rejected_KL_mean": -875.3427734375,
|
|
"KL/std": 440.137939453125,
|
|
"epoch": 0.7430249632892805,
|
|
"fcm_dpo/beta": 0.00119347358122468,
|
|
"fcm_dpo/delta": -0.025928327813744545,
|
|
"fcm_dpo/margin": 355.7054748535156,
|
|
"fcm_dpo/q_t": 0.40311557054519653,
|
|
"grad_norm": 24.074623107910156,
|
|
"learning_rate": 9.527690882192635e-08,
|
|
"logits/chosen": -0.8389706611633301,
|
|
"logits/rejected": -0.8414362668991089,
|
|
"logps/chosen": -568.5978393554688,
|
|
"logps/ref_chosen": -48.96050262451172,
|
|
"logps/ref_rejected": -78.41505432128906,
|
|
"logps/rejected": -953.7578125,
|
|
"loss": 1.0838,
|
|
"margin_dpo/margin_mean": 355.7054748535156,
|
|
"margin_dpo/margin_std": 483.68572998046875,
|
|
"step": 506
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -560.86669921875,
|
|
"KL/mean": -723.412841796875,
|
|
"KL/rejected_KL_mean": -885.958984375,
|
|
"KL/std": 489.5423583984375,
|
|
"epoch": 0.7444933920704846,
|
|
"fcm_dpo/beta": 0.001195873599499464,
|
|
"fcm_dpo/delta": 0.011642876081168652,
|
|
"fcm_dpo/margin": 325.09228515625,
|
|
"fcm_dpo/q_t": 0.4149119257926941,
|
|
"grad_norm": 34.941505432128906,
|
|
"learning_rate": 9.427092687124691e-08,
|
|
"logits/chosen": -0.8597081899642944,
|
|
"logits/rejected": -0.8631129264831543,
|
|
"logps/chosen": -627.6681518554688,
|
|
"logps/ref_chosen": -66.80149841308594,
|
|
"logps/ref_rejected": -95.37289428710938,
|
|
"logps/rejected": -981.3319091796875,
|
|
"loss": 1.1265,
|
|
"margin_dpo/margin_mean": 325.09228515625,
|
|
"margin_dpo/margin_std": 534.9813232421875,
|
|
"step": 507
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -605.2239990234375,
|
|
"KL/mean": -735.3968505859375,
|
|
"KL/rejected_KL_mean": -865.5697021484375,
|
|
"KL/std": 466.6279296875,
|
|
"epoch": 0.7459618208516887,
|
|
"fcm_dpo/beta": 0.0012166362721472979,
|
|
"fcm_dpo/delta": 0.08571073412895203,
|
|
"fcm_dpo/margin": 260.345703125,
|
|
"fcm_dpo/q_t": 0.4304507374763489,
|
|
"grad_norm": 38.527687072753906,
|
|
"learning_rate": 9.326904852647344e-08,
|
|
"logits/chosen": -0.8243334889411926,
|
|
"logits/rejected": -0.8081272840499878,
|
|
"logps/chosen": -676.5274658203125,
|
|
"logps/ref_chosen": -71.303466796875,
|
|
"logps/ref_rejected": -95.6275405883789,
|
|
"logps/rejected": -961.197265625,
|
|
"loss": 1.2015,
|
|
"margin_dpo/margin_mean": 260.345703125,
|
|
"margin_dpo/margin_std": 558.1546630859375,
|
|
"step": 508
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -494.65545654296875,
|
|
"KL/mean": -636.5025024414062,
|
|
"KL/rejected_KL_mean": -778.3495483398438,
|
|
"KL/std": 360.523193359375,
|
|
"epoch": 0.7474302496328928,
|
|
"fcm_dpo/beta": 0.001233469694852829,
|
|
"fcm_dpo/delta": 0.05139687657356262,
|
|
"fcm_dpo/margin": 283.694091796875,
|
|
"fcm_dpo/q_t": 0.4210923910140991,
|
|
"grad_norm": 32.919189453125,
|
|
"learning_rate": 9.227130018803195e-08,
|
|
"logits/chosen": -0.7569400668144226,
|
|
"logits/rejected": -0.7431646585464478,
|
|
"logps/chosen": -558.4744262695312,
|
|
"logps/ref_chosen": -63.81895065307617,
|
|
"logps/ref_rejected": -83.25643920898438,
|
|
"logps/rejected": -861.60595703125,
|
|
"loss": 1.1423,
|
|
"margin_dpo/margin_mean": 283.6940612792969,
|
|
"margin_dpo/margin_std": 457.988525390625,
|
|
"step": 509
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -584.29296875,
|
|
"KL/mean": -770.234375,
|
|
"KL/rejected_KL_mean": -956.17578125,
|
|
"KL/std": 406.34039306640625,
|
|
"epoch": 0.748898678414097,
|
|
"fcm_dpo/beta": 0.001223585568368435,
|
|
"fcm_dpo/delta": -0.05761527270078659,
|
|
"fcm_dpo/margin": 371.8828125,
|
|
"fcm_dpo/q_t": 0.39347004890441895,
|
|
"grad_norm": 34.82098388671875,
|
|
"learning_rate": 9.127770814751932e-08,
|
|
"logits/chosen": -0.7776767611503601,
|
|
"logits/rejected": -0.7976189851760864,
|
|
"logps/chosen": -636.17138671875,
|
|
"logps/ref_chosen": -51.878448486328125,
|
|
"logps/ref_rejected": -102.7651596069336,
|
|
"logps/rejected": -1058.94091796875,
|
|
"loss": 1.0368,
|
|
"margin_dpo/margin_mean": 371.8828125,
|
|
"margin_dpo/margin_std": 393.40496826171875,
|
|
"step": 510
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -544.2560424804688,
|
|
"KL/mean": -698.630615234375,
|
|
"KL/rejected_KL_mean": -853.0051879882812,
|
|
"KL/std": 448.10504150390625,
|
|
"epoch": 0.750367107195301,
|
|
"fcm_dpo/beta": 0.0012225550599396229,
|
|
"fcm_dpo/delta": 0.0234109815210104,
|
|
"fcm_dpo/margin": 308.7491455078125,
|
|
"fcm_dpo/q_t": 0.41429954767227173,
|
|
"grad_norm": 30.70000648498535,
|
|
"learning_rate": 9.028829858700973e-08,
|
|
"logits/chosen": -0.8361300230026245,
|
|
"logits/rejected": -0.8324530124664307,
|
|
"logps/chosen": -604.494140625,
|
|
"logps/ref_chosen": -60.23811721801758,
|
|
"logps/ref_rejected": -92.85676574707031,
|
|
"logps/rejected": -945.8619384765625,
|
|
"loss": 1.1419,
|
|
"margin_dpo/margin_mean": 308.7491455078125,
|
|
"margin_dpo/margin_std": 537.5291748046875,
|
|
"step": 511
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -450.8485107421875,
|
|
"KL/mean": -653.24609375,
|
|
"KL/rejected_KL_mean": -855.6436157226562,
|
|
"KL/std": 413.8804931640625,
|
|
"epoch": 0.7518355359765051,
|
|
"fcm_dpo/beta": 0.0012053523678332567,
|
|
"fcm_dpo/delta": -0.09257997572422028,
|
|
"fcm_dpo/margin": 404.79510498046875,
|
|
"fcm_dpo/q_t": 0.3863917589187622,
|
|
"grad_norm": 36.188262939453125,
|
|
"learning_rate": 8.930309757836516e-08,
|
|
"logits/chosen": -0.8050196170806885,
|
|
"logits/rejected": -0.8159193396568298,
|
|
"logps/chosen": -505.7539978027344,
|
|
"logps/ref_chosen": -54.905494689941406,
|
|
"logps/ref_rejected": -81.87586975097656,
|
|
"logps/rejected": -937.51953125,
|
|
"loss": 1.0147,
|
|
"margin_dpo/margin_mean": 404.79510498046875,
|
|
"margin_dpo/margin_std": 416.42584228515625,
|
|
"step": 512
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -559.5546875,
|
|
"KL/mean": -700.3875732421875,
|
|
"KL/rejected_KL_mean": -841.2203979492188,
|
|
"KL/std": 382.072021484375,
|
|
"epoch": 0.7533039647577092,
|
|
"fcm_dpo/beta": 0.0011912956833839417,
|
|
"fcm_dpo/delta": -0.04120471701025963,
|
|
"fcm_dpo/margin": 281.6656799316406,
|
|
"fcm_dpo/q_t": 0.42267322540283203,
|
|
"grad_norm": 37.652217864990234,
|
|
"learning_rate": 8.832213108254863e-08,
|
|
"logits/chosen": -0.8725820183753967,
|
|
"logits/rejected": -0.8468344211578369,
|
|
"logps/chosen": -624.47119140625,
|
|
"logps/ref_chosen": -64.91644287109375,
|
|
"logps/ref_rejected": -76.06245422363281,
|
|
"logps/rejected": -917.2828369140625,
|
|
"loss": 1.1508,
|
|
"margin_dpo/margin_mean": 281.6656494140625,
|
|
"margin_dpo/margin_std": 448.5667724609375,
|
|
"step": 513
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -589.4393920898438,
|
|
"KL/mean": -719.176513671875,
|
|
"KL/rejected_KL_mean": -848.9135131835938,
|
|
"KL/std": 411.193359375,
|
|
"epoch": 0.7547723935389133,
|
|
"fcm_dpo/beta": 0.0012082626344636083,
|
|
"fcm_dpo/delta": 0.08919873833656311,
|
|
"fcm_dpo/margin": 259.47412109375,
|
|
"fcm_dpo/q_t": 0.4297820031642914,
|
|
"grad_norm": 31.276504516601562,
|
|
"learning_rate": 8.734542494893954e-08,
|
|
"logits/chosen": -0.8355896472930908,
|
|
"logits/rejected": -0.809748649597168,
|
|
"logps/chosen": -663.6689453125,
|
|
"logps/ref_chosen": -74.22957611083984,
|
|
"logps/ref_rejected": -78.945556640625,
|
|
"logps/rejected": -927.8590698242188,
|
|
"loss": 1.1703,
|
|
"margin_dpo/margin_mean": 259.4741516113281,
|
|
"margin_dpo/margin_std": 465.32330322265625,
|
|
"step": 514
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -501.87353515625,
|
|
"KL/mean": -626.3300170898438,
|
|
"KL/rejected_KL_mean": -750.7864990234375,
|
|
"KL/std": 381.322021484375,
|
|
"epoch": 0.7562408223201175,
|
|
"fcm_dpo/beta": 0.0012302729301154613,
|
|
"fcm_dpo/delta": 0.09674014896154404,
|
|
"fcm_dpo/margin": 248.9129638671875,
|
|
"fcm_dpo/q_t": 0.4292389154434204,
|
|
"grad_norm": 41.97957992553711,
|
|
"learning_rate": 8.637300491465272e-08,
|
|
"logits/chosen": -0.8100356459617615,
|
|
"logits/rejected": -0.8176305294036865,
|
|
"logps/chosen": -552.275146484375,
|
|
"logps/ref_chosen": -50.40156555175781,
|
|
"logps/ref_rejected": -87.09774780273438,
|
|
"logps/rejected": -837.88427734375,
|
|
"loss": 1.1746,
|
|
"margin_dpo/margin_mean": 248.9129638671875,
|
|
"margin_dpo/margin_std": 451.3743896484375,
|
|
"step": 515
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -512.7012939453125,
|
|
"KL/mean": -686.7001953125,
|
|
"KL/rejected_KL_mean": -860.69921875,
|
|
"KL/std": 412.843017578125,
|
|
"epoch": 0.7577092511013216,
|
|
"fcm_dpo/beta": 0.0012345185969024897,
|
|
"fcm_dpo/delta": -0.031035784631967545,
|
|
"fcm_dpo/margin": 347.9979248046875,
|
|
"fcm_dpo/q_t": 0.3999173939228058,
|
|
"grad_norm": 31.350547790527344,
|
|
"learning_rate": 8.540489660386064e-08,
|
|
"logits/chosen": -0.883063018321991,
|
|
"logits/rejected": -0.9014164209365845,
|
|
"logps/chosen": -577.350830078125,
|
|
"logps/ref_chosen": -64.64956665039062,
|
|
"logps/ref_rejected": -111.72237396240234,
|
|
"logps/rejected": -972.4215698242188,
|
|
"loss": 1.0631,
|
|
"margin_dpo/margin_mean": 347.9979553222656,
|
|
"margin_dpo/margin_std": 409.755126953125,
|
|
"step": 516
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -538.5610961914062,
|
|
"KL/mean": -739.44091796875,
|
|
"KL/rejected_KL_mean": -940.3209228515625,
|
|
"KL/std": 451.02313232421875,
|
|
"epoch": 0.7591776798825257,
|
|
"fcm_dpo/beta": 0.0012100373860448599,
|
|
"fcm_dpo/delta": -0.09078750759363174,
|
|
"fcm_dpo/margin": 401.75970458984375,
|
|
"fcm_dpo/q_t": 0.39081257581710815,
|
|
"grad_norm": 25.801740646362305,
|
|
"learning_rate": 8.444112552711752e-08,
|
|
"logits/chosen": -0.8275068402290344,
|
|
"logits/rejected": -0.8201382160186768,
|
|
"logps/chosen": -599.474609375,
|
|
"logps/ref_chosen": -60.913551330566406,
|
|
"logps/ref_rejected": -89.08308410644531,
|
|
"logps/rejected": -1029.4039306640625,
|
|
"loss": 1.0379,
|
|
"margin_dpo/margin_mean": 401.759765625,
|
|
"margin_dpo/margin_std": 496.04901123046875,
|
|
"step": 517
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -519.912353515625,
|
|
"KL/mean": -685.43994140625,
|
|
"KL/rejected_KL_mean": -850.967529296875,
|
|
"KL/std": 387.7268371582031,
|
|
"epoch": 0.7606461086637298,
|
|
"fcm_dpo/beta": 0.0012026941403746605,
|
|
"fcm_dpo/delta": 0.0016661733388900757,
|
|
"fcm_dpo/margin": 331.0551452636719,
|
|
"fcm_dpo/q_t": 0.4073007106781006,
|
|
"grad_norm": 30.128890991210938,
|
|
"learning_rate": 8.348171708068747e-08,
|
|
"logits/chosen": -0.8605848550796509,
|
|
"logits/rejected": -0.8738881945610046,
|
|
"logps/chosen": -577.3682861328125,
|
|
"logps/ref_chosen": -57.45589065551758,
|
|
"logps/ref_rejected": -85.31269836425781,
|
|
"logps/rejected": -936.2802124023438,
|
|
"loss": 1.0897,
|
|
"margin_dpo/margin_mean": 331.05511474609375,
|
|
"margin_dpo/margin_std": 424.0645751953125,
|
|
"step": 518
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -521.1378784179688,
|
|
"KL/mean": -639.3558349609375,
|
|
"KL/rejected_KL_mean": -757.57373046875,
|
|
"KL/std": 362.4400939941406,
|
|
"epoch": 0.762114537444934,
|
|
"fcm_dpo/beta": 0.0012314484920352697,
|
|
"fcm_dpo/delta": 0.1115046888589859,
|
|
"fcm_dpo/margin": 236.4358367919922,
|
|
"fcm_dpo/q_t": 0.4326602518558502,
|
|
"grad_norm": 43.568424224853516,
|
|
"learning_rate": 8.25266965458755e-08,
|
|
"logits/chosen": -0.8680436015129089,
|
|
"logits/rejected": -0.847466230392456,
|
|
"logps/chosen": -595.201171875,
|
|
"logps/ref_chosen": -74.06331634521484,
|
|
"logps/ref_rejected": -104.44416809082031,
|
|
"logps/rejected": -862.0178833007812,
|
|
"loss": 1.193,
|
|
"margin_dpo/margin_mean": 236.4358367919922,
|
|
"margin_dpo/margin_std": 456.2001953125,
|
|
"step": 519
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -569.1138916015625,
|
|
"KL/mean": -716.5203857421875,
|
|
"KL/rejected_KL_mean": -863.9268188476562,
|
|
"KL/std": 410.4988098144531,
|
|
"epoch": 0.7635829662261381,
|
|
"fcm_dpo/beta": 0.001238158904016018,
|
|
"fcm_dpo/delta": 0.03627597913146019,
|
|
"fcm_dpo/margin": 294.8129577636719,
|
|
"fcm_dpo/q_t": 0.41828474402427673,
|
|
"grad_norm": 27.024534225463867,
|
|
"learning_rate": 8.15760890883607e-08,
|
|
"logits/chosen": -0.779923677444458,
|
|
"logits/rejected": -0.7774548530578613,
|
|
"logps/chosen": -639.4136962890625,
|
|
"logps/ref_chosen": -70.2998275756836,
|
|
"logps/ref_rejected": -99.98133850097656,
|
|
"logps/rejected": -963.908203125,
|
|
"loss": 1.1323,
|
|
"margin_dpo/margin_mean": 294.8129577636719,
|
|
"margin_dpo/margin_std": 456.40606689453125,
|
|
"step": 520
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -496.0313415527344,
|
|
"KL/mean": -657.4324951171875,
|
|
"KL/rejected_KL_mean": -818.833740234375,
|
|
"KL/std": 417.3710021972656,
|
|
"epoch": 0.7650513950073421,
|
|
"fcm_dpo/beta": 0.0012491261586546898,
|
|
"fcm_dpo/delta": -0.0038999132812023163,
|
|
"fcm_dpo/margin": 322.8023681640625,
|
|
"fcm_dpo/q_t": 0.40750136971473694,
|
|
"grad_norm": 31.882793426513672,
|
|
"learning_rate": 8.062991975753378e-08,
|
|
"logits/chosen": -0.8779969215393066,
|
|
"logits/rejected": -0.8690969944000244,
|
|
"logps/chosen": -554.1742553710938,
|
|
"logps/ref_chosen": -58.14292526245117,
|
|
"logps/ref_rejected": -83.28060913085938,
|
|
"logps/rejected": -902.1143188476562,
|
|
"loss": 1.089,
|
|
"margin_dpo/margin_mean": 322.8023681640625,
|
|
"margin_dpo/margin_std": 418.15899658203125,
|
|
"step": 521
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -583.5753784179688,
|
|
"KL/mean": -740.7099609375,
|
|
"KL/rejected_KL_mean": -897.844482421875,
|
|
"KL/std": 467.26300048828125,
|
|
"epoch": 0.7665198237885462,
|
|
"fcm_dpo/beta": 0.001243784325197339,
|
|
"fcm_dpo/delta": 0.009488995186984539,
|
|
"fcm_dpo/margin": 314.26910400390625,
|
|
"fcm_dpo/q_t": 0.4114811420440674,
|
|
"grad_norm": 31.201757431030273,
|
|
"learning_rate": 7.968821348583643e-08,
|
|
"logits/chosen": -0.8858053088188171,
|
|
"logits/rejected": -0.8832094669342041,
|
|
"logps/chosen": -630.123046875,
|
|
"logps/ref_chosen": -46.54766845703125,
|
|
"logps/ref_rejected": -66.01388549804688,
|
|
"logps/rejected": -963.8583984375,
|
|
"loss": 1.1203,
|
|
"margin_dpo/margin_mean": 314.26910400390625,
|
|
"margin_dpo/margin_std": 489.2512512207031,
|
|
"step": 522
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -587.31884765625,
|
|
"KL/mean": -753.39404296875,
|
|
"KL/rejected_KL_mean": -919.4691772460938,
|
|
"KL/std": 486.9326477050781,
|
|
"epoch": 0.7679882525697503,
|
|
"fcm_dpo/beta": 0.0012416969984769821,
|
|
"fcm_dpo/delta": -0.012975066900253296,
|
|
"fcm_dpo/margin": 332.15032958984375,
|
|
"fcm_dpo/q_t": 0.40742552280426025,
|
|
"grad_norm": 33.407474517822266,
|
|
"learning_rate": 7.875099508810484e-08,
|
|
"logits/chosen": -0.9253371953964233,
|
|
"logits/rejected": -0.9181835055351257,
|
|
"logps/chosen": -649.0885009765625,
|
|
"logps/ref_chosen": -61.76960372924805,
|
|
"logps/ref_rejected": -83.76141357421875,
|
|
"logps/rejected": -1003.2305908203125,
|
|
"loss": 1.1124,
|
|
"margin_dpo/margin_mean": 332.15032958984375,
|
|
"margin_dpo/margin_std": 522.383056640625,
|
|
"step": 523
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -593.376953125,
|
|
"KL/mean": -751.2468872070312,
|
|
"KL/rejected_KL_mean": -909.1168212890625,
|
|
"KL/std": 469.7861633300781,
|
|
"epoch": 0.7694566813509545,
|
|
"fcm_dpo/beta": 0.0012377724051475525,
|
|
"fcm_dpo/delta": 0.009213726967573166,
|
|
"fcm_dpo/margin": 315.73992919921875,
|
|
"fcm_dpo/q_t": 0.40860164165496826,
|
|
"grad_norm": 39.42531967163086,
|
|
"learning_rate": 7.781828926091535e-08,
|
|
"logits/chosen": -0.9645393490791321,
|
|
"logits/rejected": -0.9483359456062317,
|
|
"logps/chosen": -671.448974609375,
|
|
"logps/ref_chosen": -78.0720443725586,
|
|
"logps/ref_rejected": -81.30198669433594,
|
|
"logps/rejected": -990.4188232421875,
|
|
"loss": 1.114,
|
|
"margin_dpo/margin_mean": 315.7398986816406,
|
|
"margin_dpo/margin_std": 464.941162109375,
|
|
"step": 524
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -537.3798828125,
|
|
"KL/mean": -752.6138305664062,
|
|
"KL/rejected_KL_mean": -967.847900390625,
|
|
"KL/std": 474.93743896484375,
|
|
"epoch": 0.7709251101321586,
|
|
"fcm_dpo/beta": 0.0012156711891293526,
|
|
"fcm_dpo/delta": -0.13079476356506348,
|
|
"fcm_dpo/margin": 430.468017578125,
|
|
"fcm_dpo/q_t": 0.3830450177192688,
|
|
"grad_norm": 28.412078857421875,
|
|
"learning_rate": 7.689012058193384e-08,
|
|
"logits/chosen": -0.8482464551925659,
|
|
"logits/rejected": -0.8838850259780884,
|
|
"logps/chosen": -588.2077026367188,
|
|
"logps/ref_chosen": -50.827857971191406,
|
|
"logps/ref_rejected": -100.05294036865234,
|
|
"logps/rejected": -1067.90087890625,
|
|
"loss": 1.0139,
|
|
"margin_dpo/margin_mean": 430.468017578125,
|
|
"margin_dpo/margin_std": 507.2789306640625,
|
|
"step": 525
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -593.8212890625,
|
|
"KL/mean": -805.8401489257812,
|
|
"KL/rejected_KL_mean": -1017.8590087890625,
|
|
"KL/std": 471.7684020996094,
|
|
"epoch": 0.7723935389133627,
|
|
"fcm_dpo/beta": 0.0011959581170231104,
|
|
"fcm_dpo/delta": -0.11271873861551285,
|
|
"fcm_dpo/margin": 424.0378112792969,
|
|
"fcm_dpo/q_t": 0.384555459022522,
|
|
"grad_norm": 30.65778923034668,
|
|
"learning_rate": 7.596651350926836e-08,
|
|
"logits/chosen": -0.906538724899292,
|
|
"logits/rejected": -0.895421028137207,
|
|
"logps/chosen": -656.988525390625,
|
|
"logps/ref_chosen": -63.167236328125,
|
|
"logps/ref_rejected": -86.30934143066406,
|
|
"logps/rejected": -1104.16845703125,
|
|
"loss": 1.0352,
|
|
"margin_dpo/margin_mean": 424.0378112792969,
|
|
"margin_dpo/margin_std": 526.3082885742188,
|
|
"step": 526
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -620.7032470703125,
|
|
"KL/mean": -759.3160400390625,
|
|
"KL/rejected_KL_mean": -897.9288330078125,
|
|
"KL/std": 481.2168273925781,
|
|
"epoch": 0.7738619676945668,
|
|
"fcm_dpo/beta": 0.0011956689413636923,
|
|
"fcm_dpo/delta": 0.07093732804059982,
|
|
"fcm_dpo/margin": 277.2255859375,
|
|
"fcm_dpo/q_t": 0.4224342703819275,
|
|
"grad_norm": 31.139324188232422,
|
|
"learning_rate": 7.504749238082414e-08,
|
|
"logits/chosen": -1.0832974910736084,
|
|
"logits/rejected": -1.045976996421814,
|
|
"logps/chosen": -691.8319702148438,
|
|
"logps/ref_chosen": -71.12867736816406,
|
|
"logps/ref_rejected": -78.3425521850586,
|
|
"logps/rejected": -976.2713623046875,
|
|
"loss": 1.138,
|
|
"margin_dpo/margin_mean": 277.2255859375,
|
|
"margin_dpo/margin_std": 403.221435546875,
|
|
"step": 527
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -624.593505859375,
|
|
"KL/mean": -788.0255126953125,
|
|
"KL/rejected_KL_mean": -951.45751953125,
|
|
"KL/std": 467.02947998046875,
|
|
"epoch": 0.775330396475771,
|
|
"fcm_dpo/beta": 0.0012017192784696817,
|
|
"fcm_dpo/delta": 0.007483818102627993,
|
|
"fcm_dpo/margin": 326.864013671875,
|
|
"fcm_dpo/q_t": 0.41412389278411865,
|
|
"grad_norm": 40.499786376953125,
|
|
"learning_rate": 7.413308141366254e-08,
|
|
"logits/chosen": -0.9295456409454346,
|
|
"logits/rejected": -0.9090088605880737,
|
|
"logps/chosen": -692.6829833984375,
|
|
"logps/ref_chosen": -68.0894546508789,
|
|
"logps/ref_rejected": -93.91006469726562,
|
|
"logps/rejected": -1045.3675537109375,
|
|
"loss": 1.1349,
|
|
"margin_dpo/margin_mean": 326.864013671875,
|
|
"margin_dpo/margin_std": 558.260986328125,
|
|
"step": 528
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -722.912109375,
|
|
"KL/mean": -839.7539672851562,
|
|
"KL/rejected_KL_mean": -956.5958251953125,
|
|
"KL/std": 438.76708984375,
|
|
"epoch": 0.7767988252569751,
|
|
"fcm_dpo/beta": 0.0012046921765431762,
|
|
"fcm_dpo/delta": 0.005615768022835255,
|
|
"fcm_dpo/margin": 233.68368530273438,
|
|
"fcm_dpo/q_t": 0.43421024084091187,
|
|
"grad_norm": 66.41038513183594,
|
|
"learning_rate": 7.322330470336313e-08,
|
|
"logits/chosen": -0.9495022296905518,
|
|
"logits/rejected": -0.9608061909675598,
|
|
"logps/chosen": -778.487060546875,
|
|
"logps/ref_chosen": -55.57495880126953,
|
|
"logps/ref_rejected": -89.20909118652344,
|
|
"logps/rejected": -1045.804931640625,
|
|
"loss": 1.2262,
|
|
"margin_dpo/margin_mean": 233.68368530273438,
|
|
"margin_dpo/margin_std": 538.73583984375,
|
|
"step": 529
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -588.6814575195312,
|
|
"KL/mean": -773.9688720703125,
|
|
"KL/rejected_KL_mean": -959.25634765625,
|
|
"KL/std": 510.5151672363281,
|
|
"epoch": 0.7782672540381792,
|
|
"fcm_dpo/beta": 0.001198010751977563,
|
|
"fcm_dpo/delta": -0.04596946761012077,
|
|
"fcm_dpo/margin": 370.57501220703125,
|
|
"fcm_dpo/q_t": 0.40217673778533936,
|
|
"grad_norm": 55.660064697265625,
|
|
"learning_rate": 7.231818622338822e-08,
|
|
"logits/chosen": -0.8760310411453247,
|
|
"logits/rejected": -0.8702448606491089,
|
|
"logps/chosen": -636.2828369140625,
|
|
"logps/ref_chosen": -47.601417541503906,
|
|
"logps/ref_rejected": -87.2845230102539,
|
|
"logps/rejected": -1046.5408935546875,
|
|
"loss": 1.1169,
|
|
"margin_dpo/margin_mean": 370.574951171875,
|
|
"margin_dpo/margin_std": 630.6913452148438,
|
|
"step": 530
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -657.83935546875,
|
|
"KL/mean": -809.4407958984375,
|
|
"KL/rejected_KL_mean": -961.042236328125,
|
|
"KL/std": 517.9159545898438,
|
|
"epoch": 0.7797356828193832,
|
|
"fcm_dpo/beta": 0.0012029436184093356,
|
|
"fcm_dpo/delta": 0.03640556335449219,
|
|
"fcm_dpo/margin": 303.2028503417969,
|
|
"fcm_dpo/q_t": 0.4182535409927368,
|
|
"grad_norm": 38.57360076904297,
|
|
"learning_rate": 7.141774982445147e-08,
|
|
"logits/chosen": -0.9681419134140015,
|
|
"logits/rejected": -0.9443149566650391,
|
|
"logps/chosen": -713.08544921875,
|
|
"logps/ref_chosen": -55.246063232421875,
|
|
"logps/ref_rejected": -70.60598754882812,
|
|
"logps/rejected": -1031.648193359375,
|
|
"loss": 1.1404,
|
|
"margin_dpo/margin_mean": 303.2028503417969,
|
|
"margin_dpo/margin_std": 503.85516357421875,
|
|
"step": 531
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -635.9500732421875,
|
|
"KL/mean": -814.24462890625,
|
|
"KL/rejected_KL_mean": -992.5391845703125,
|
|
"KL/std": 492.651123046875,
|
|
"epoch": 0.7812041116005873,
|
|
"fcm_dpo/beta": 0.0011894925264641643,
|
|
"fcm_dpo/delta": -0.026376843452453613,
|
|
"fcm_dpo/margin": 356.58917236328125,
|
|
"fcm_dpo/q_t": 0.405168741941452,
|
|
"grad_norm": 54.94654846191406,
|
|
"learning_rate": 7.052201923388953e-08,
|
|
"logits/chosen": -0.8919358849525452,
|
|
"logits/rejected": -0.8668221235275269,
|
|
"logps/chosen": -706.236083984375,
|
|
"logps/ref_chosen": -70.28601837158203,
|
|
"logps/ref_rejected": -86.5913314819336,
|
|
"logps/rejected": -1079.1304931640625,
|
|
"loss": 1.12,
|
|
"margin_dpo/margin_mean": 356.58917236328125,
|
|
"margin_dpo/margin_std": 579.14501953125,
|
|
"step": 532
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -578.204345703125,
|
|
"KL/mean": -705.219970703125,
|
|
"KL/rejected_KL_mean": -832.2355346679688,
|
|
"KL/std": 433.0811767578125,
|
|
"epoch": 0.7826725403817915,
|
|
"fcm_dpo/beta": 0.0012182076461613178,
|
|
"fcm_dpo/delta": 0.09259242564439774,
|
|
"fcm_dpo/margin": 254.03118896484375,
|
|
"fcm_dpo/q_t": 0.4302961230278015,
|
|
"grad_norm": 44.284828186035156,
|
|
"learning_rate": 6.963101805503646e-08,
|
|
"logits/chosen": -0.8935759663581848,
|
|
"logits/rejected": -0.8628044128417969,
|
|
"logps/chosen": -643.0594482421875,
|
|
"logps/ref_chosen": -64.8551025390625,
|
|
"logps/ref_rejected": -76.58805847167969,
|
|
"logps/rejected": -908.8236083984375,
|
|
"loss": 1.1921,
|
|
"margin_dpo/margin_mean": 254.03115844726562,
|
|
"margin_dpo/margin_std": 512.8229370117188,
|
|
"step": 533
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -593.3036499023438,
|
|
"KL/mean": -759.54248046875,
|
|
"KL/rejected_KL_mean": -925.7813110351562,
|
|
"KL/std": 448.52166748046875,
|
|
"epoch": 0.7841409691629956,
|
|
"fcm_dpo/beta": 0.0012101430911570787,
|
|
"fcm_dpo/delta": -0.0032868273556232452,
|
|
"fcm_dpo/margin": 332.4776611328125,
|
|
"fcm_dpo/q_t": 0.4076574742794037,
|
|
"grad_norm": 33.67475509643555,
|
|
"learning_rate": 6.874476976660184e-08,
|
|
"logits/chosen": -0.8921518325805664,
|
|
"logits/rejected": -0.8880842328071594,
|
|
"logps/chosen": -653.4230346679688,
|
|
"logps/ref_chosen": -60.119388580322266,
|
|
"logps/ref_rejected": -78.54347229003906,
|
|
"logps/rejected": -1004.3247680664062,
|
|
"loss": 1.0979,
|
|
"margin_dpo/margin_mean": 332.4776611328125,
|
|
"margin_dpo/margin_std": 454.71746826171875,
|
|
"step": 534
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -496.10723876953125,
|
|
"KL/mean": -687.46923828125,
|
|
"KL/rejected_KL_mean": -878.831298828125,
|
|
"KL/std": 442.2324523925781,
|
|
"epoch": 0.7856093979441997,
|
|
"fcm_dpo/beta": 0.0012131070252507925,
|
|
"fcm_dpo/delta": -0.06780680269002914,
|
|
"fcm_dpo/margin": 382.72406005859375,
|
|
"fcm_dpo/q_t": 0.3943653106689453,
|
|
"grad_norm": 31.497711181640625,
|
|
"learning_rate": 6.786329772205246e-08,
|
|
"logits/chosen": -0.8254178166389465,
|
|
"logits/rejected": -0.8272514343261719,
|
|
"logps/chosen": -550.4375,
|
|
"logps/ref_chosen": -54.330238342285156,
|
|
"logps/ref_rejected": -96.30763244628906,
|
|
"logps/rejected": -975.138916015625,
|
|
"loss": 1.0524,
|
|
"margin_dpo/margin_mean": 382.72406005859375,
|
|
"margin_dpo/margin_std": 464.69305419921875,
|
|
"step": 535
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -460.1872863769531,
|
|
"KL/mean": -684.6822509765625,
|
|
"KL/rejected_KL_mean": -909.1771240234375,
|
|
"KL/std": 509.9305114746094,
|
|
"epoch": 0.7870778267254038,
|
|
"fcm_dpo/beta": 0.001176601741462946,
|
|
"fcm_dpo/delta": -0.13552269339561462,
|
|
"fcm_dpo/margin": 448.9898681640625,
|
|
"fcm_dpo/q_t": 0.3865072429180145,
|
|
"grad_norm": 41.1716194152832,
|
|
"learning_rate": 6.698662514899638e-08,
|
|
"logits/chosen": -0.8181933164596558,
|
|
"logits/rejected": -0.8456603288650513,
|
|
"logps/chosen": -507.267822265625,
|
|
"logps/ref_chosen": -47.08053207397461,
|
|
"logps/ref_rejected": -89.09783935546875,
|
|
"logps/rejected": -998.2750244140625,
|
|
"loss": 1.0291,
|
|
"margin_dpo/margin_mean": 448.9898376464844,
|
|
"margin_dpo/margin_std": 610.5261840820312,
|
|
"step": 536
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -449.18890380859375,
|
|
"KL/mean": -613.4636840820312,
|
|
"KL/rejected_KL_mean": -777.7384033203125,
|
|
"KL/std": 419.79339599609375,
|
|
"epoch": 0.788546255506608,
|
|
"fcm_dpo/beta": 0.0011767192045226693,
|
|
"fcm_dpo/delta": 0.013456817716360092,
|
|
"fcm_dpo/margin": 328.549560546875,
|
|
"fcm_dpo/q_t": 0.4111158847808838,
|
|
"grad_norm": 32.44279861450195,
|
|
"learning_rate": 6.611477514857114e-08,
|
|
"logits/chosen": -0.8254159688949585,
|
|
"logits/rejected": -0.7971071004867554,
|
|
"logps/chosen": -506.9363708496094,
|
|
"logps/ref_chosen": -57.747467041015625,
|
|
"logps/ref_rejected": -70.43838500976562,
|
|
"logps/rejected": -848.1768188476562,
|
|
"loss": 1.1176,
|
|
"margin_dpo/margin_mean": 328.5495910644531,
|
|
"margin_dpo/margin_std": 493.1300048828125,
|
|
"step": 537
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -591.84228515625,
|
|
"KL/mean": -767.7392578125,
|
|
"KL/rejected_KL_mean": -943.63623046875,
|
|
"KL/std": 430.4447021484375,
|
|
"epoch": 0.7900146842878121,
|
|
"fcm_dpo/beta": 0.0011678216978907585,
|
|
"fcm_dpo/delta": -0.011409275233745575,
|
|
"fcm_dpo/margin": 351.79400634765625,
|
|
"fcm_dpo/q_t": 0.4062436521053314,
|
|
"grad_norm": 33.99517059326172,
|
|
"learning_rate": 6.524777069483525e-08,
|
|
"logits/chosen": -0.8461936712265015,
|
|
"logits/rejected": -0.826158881187439,
|
|
"logps/chosen": -658.2581787109375,
|
|
"logps/ref_chosen": -66.41594696044922,
|
|
"logps/ref_rejected": -84.22808837890625,
|
|
"logps/rejected": -1027.8642578125,
|
|
"loss": 1.0812,
|
|
"margin_dpo/margin_mean": 351.7939758300781,
|
|
"margin_dpo/margin_std": 454.0805358886719,
|
|
"step": 538
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -514.8009643554688,
|
|
"KL/mean": -671.9696655273438,
|
|
"KL/rejected_KL_mean": -829.1383666992188,
|
|
"KL/std": 363.4195861816406,
|
|
"epoch": 0.7914831130690162,
|
|
"fcm_dpo/beta": 0.0011770533164963126,
|
|
"fcm_dpo/delta": 0.031044667586684227,
|
|
"fcm_dpo/margin": 314.33740234375,
|
|
"fcm_dpo/q_t": 0.41412806510925293,
|
|
"grad_norm": 38.721561431884766,
|
|
"learning_rate": 6.438563463416221e-08,
|
|
"logits/chosen": -0.9151267409324646,
|
|
"logits/rejected": -0.8990967273712158,
|
|
"logps/chosen": -573.2938232421875,
|
|
"logps/ref_chosen": -58.492855072021484,
|
|
"logps/ref_rejected": -91.85395050048828,
|
|
"logps/rejected": -920.9923095703125,
|
|
"loss": 1.1041,
|
|
"margin_dpo/margin_mean": 314.33740234375,
|
|
"margin_dpo/margin_std": 406.0032958984375,
|
|
"step": 539
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -513.6804809570312,
|
|
"KL/mean": -730.3675537109375,
|
|
"KL/rejected_KL_mean": -947.0548095703125,
|
|
"KL/std": 456.95489501953125,
|
|
"epoch": 0.7929515418502202,
|
|
"fcm_dpo/beta": 0.0011629726504907012,
|
|
"fcm_dpo/delta": -0.10943492501974106,
|
|
"fcm_dpo/margin": 433.374267578125,
|
|
"fcm_dpo/q_t": 0.3882465958595276,
|
|
"grad_norm": 41.09721374511719,
|
|
"learning_rate": 6.352838968463919e-08,
|
|
"logits/chosen": -0.8337998390197754,
|
|
"logits/rejected": -0.8539774417877197,
|
|
"logps/chosen": -577.1629638671875,
|
|
"logps/ref_chosen": -63.482513427734375,
|
|
"logps/ref_rejected": -116.42999267578125,
|
|
"logps/rejected": -1063.4847412109375,
|
|
"loss": 1.0355,
|
|
"margin_dpo/margin_mean": 433.374267578125,
|
|
"margin_dpo/margin_std": 532.08251953125,
|
|
"step": 540
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -621.1068725585938,
|
|
"KL/mean": -744.4969482421875,
|
|
"KL/rejected_KL_mean": -867.8870849609375,
|
|
"KL/std": 408.0279846191406,
|
|
"epoch": 0.7944199706314243,
|
|
"fcm_dpo/beta": 0.0011470350436866283,
|
|
"fcm_dpo/delta": -0.01891069859266281,
|
|
"fcm_dpo/margin": 246.7801513671875,
|
|
"fcm_dpo/q_t": 0.43513962626457214,
|
|
"grad_norm": 53.4821662902832,
|
|
"learning_rate": 6.267605843546767e-08,
|
|
"logits/chosen": -0.9593532085418701,
|
|
"logits/rejected": -0.9463214874267578,
|
|
"logps/chosen": -699.38720703125,
|
|
"logps/ref_chosen": -78.28036499023438,
|
|
"logps/ref_rejected": -103.273681640625,
|
|
"logps/rejected": -971.1607666015625,
|
|
"loss": 1.206,
|
|
"margin_dpo/margin_mean": 246.7801513671875,
|
|
"margin_dpo/margin_std": 501.5079040527344,
|
|
"step": 541
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -533.010498046875,
|
|
"KL/mean": -752.8260498046875,
|
|
"KL/rejected_KL_mean": -972.6415405273438,
|
|
"KL/std": 486.2320556640625,
|
|
"epoch": 0.7958883994126285,
|
|
"fcm_dpo/beta": 0.0011208573123440146,
|
|
"fcm_dpo/delta": -0.09979057312011719,
|
|
"fcm_dpo/margin": 439.63116455078125,
|
|
"fcm_dpo/q_t": 0.38958051800727844,
|
|
"grad_norm": 61.64540100097656,
|
|
"learning_rate": 6.182866334636888e-08,
|
|
"logits/chosen": -0.9522601962089539,
|
|
"logits/rejected": -0.9825873374938965,
|
|
"logps/chosen": -590.4954223632812,
|
|
"logps/ref_chosen": -57.48497009277344,
|
|
"logps/ref_rejected": -96.47506713867188,
|
|
"logps/rejected": -1069.11669921875,
|
|
"loss": 1.0499,
|
|
"margin_dpo/margin_mean": 439.631103515625,
|
|
"margin_dpo/margin_std": 572.5598754882812,
|
|
"step": 542
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -616.3714599609375,
|
|
"KL/mean": -763.2518310546875,
|
|
"KL/rejected_KL_mean": -910.132080078125,
|
|
"KL/std": 572.7112426757812,
|
|
"epoch": 0.7973568281938326,
|
|
"fcm_dpo/beta": 0.001132916659116745,
|
|
"fcm_dpo/delta": 0.06954119354486465,
|
|
"fcm_dpo/margin": 293.7606506347656,
|
|
"fcm_dpo/q_t": 0.4342951774597168,
|
|
"grad_norm": 34.3123893737793,
|
|
"learning_rate": 6.098622674699147e-08,
|
|
"logits/chosen": -0.8914676904678345,
|
|
"logits/rejected": -0.9207860231399536,
|
|
"logps/chosen": -676.989013671875,
|
|
"logps/ref_chosen": -60.61750793457031,
|
|
"logps/ref_rejected": -105.59896850585938,
|
|
"logps/rejected": -1015.7310791015625,
|
|
"loss": 1.207,
|
|
"margin_dpo/margin_mean": 293.7606506347656,
|
|
"margin_dpo/margin_std": 677.75341796875,
|
|
"step": 543
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -620.8755493164062,
|
|
"KL/mean": -799.795654296875,
|
|
"KL/rejected_KL_mean": -978.7156372070312,
|
|
"KL/std": 466.3764343261719,
|
|
"epoch": 0.7988252569750367,
|
|
"fcm_dpo/beta": 0.0011378147173672915,
|
|
"fcm_dpo/delta": -0.007483053486794233,
|
|
"fcm_dpo/margin": 357.8401184082031,
|
|
"fcm_dpo/q_t": 0.40781134366989136,
|
|
"grad_norm": 31.13963508605957,
|
|
"learning_rate": 6.01487708363232e-08,
|
|
"logits/chosen": -0.8915605545043945,
|
|
"logits/rejected": -0.9122974872589111,
|
|
"logps/chosen": -680.517822265625,
|
|
"logps/ref_chosen": -59.642303466796875,
|
|
"logps/ref_rejected": -100.95469665527344,
|
|
"logps/rejected": -1079.67041015625,
|
|
"loss": 1.1041,
|
|
"margin_dpo/margin_mean": 357.8401184082031,
|
|
"margin_dpo/margin_std": 532.9642333984375,
|
|
"step": 544
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -593.229736328125,
|
|
"KL/mean": -805.3840942382812,
|
|
"KL/rejected_KL_mean": -1017.53857421875,
|
|
"KL/std": 472.28790283203125,
|
|
"epoch": 0.8002936857562408,
|
|
"fcm_dpo/beta": 0.0011245384812355042,
|
|
"fcm_dpo/delta": -0.08103551715612411,
|
|
"fcm_dpo/margin": 424.308837890625,
|
|
"fcm_dpo/q_t": 0.39294663071632385,
|
|
"grad_norm": 26.347267150878906,
|
|
"learning_rate": 5.9316317682106294e-08,
|
|
"logits/chosen": -0.8452168703079224,
|
|
"logits/rejected": -0.8719925284385681,
|
|
"logps/chosen": -660.8782958984375,
|
|
"logps/ref_chosen": -67.64859771728516,
|
|
"logps/ref_rejected": -95.90800476074219,
|
|
"logps/rejected": -1113.446533203125,
|
|
"loss": 1.0526,
|
|
"margin_dpo/margin_mean": 424.308837890625,
|
|
"margin_dpo/margin_std": 552.90673828125,
|
|
"step": 545
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -566.79052734375,
|
|
"KL/mean": -708.225830078125,
|
|
"KL/rejected_KL_mean": -849.6610717773438,
|
|
"KL/std": 417.33697509765625,
|
|
"epoch": 0.801762114537445,
|
|
"fcm_dpo/beta": 0.0011334663722664118,
|
|
"fcm_dpo/delta": 0.08203422278165817,
|
|
"fcm_dpo/margin": 282.87054443359375,
|
|
"fcm_dpo/q_t": 0.4246765971183777,
|
|
"grad_norm": 36.055816650390625,
|
|
"learning_rate": 5.848888922025552e-08,
|
|
"logits/chosen": -0.8745754957199097,
|
|
"logits/rejected": -0.8525873422622681,
|
|
"logps/chosen": -617.5347900390625,
|
|
"logps/ref_chosen": -50.744232177734375,
|
|
"logps/ref_rejected": -81.86622619628906,
|
|
"logps/rejected": -931.52734375,
|
|
"loss": 1.1546,
|
|
"margin_dpo/margin_mean": 282.87054443359375,
|
|
"margin_dpo/margin_std": 451.3182373046875,
|
|
"step": 546
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -571.3265380859375,
|
|
"KL/mean": -752.8795166015625,
|
|
"KL/rejected_KL_mean": -934.4325561523438,
|
|
"KL/std": 474.61773681640625,
|
|
"epoch": 0.8032305433186491,
|
|
"fcm_dpo/beta": 0.0011374622117727995,
|
|
"fcm_dpo/delta": -0.013586894609034061,
|
|
"fcm_dpo/margin": 363.1061096191406,
|
|
"fcm_dpo/q_t": 0.4062024652957916,
|
|
"grad_norm": 41.295894622802734,
|
|
"learning_rate": 5.7666507254280265e-08,
|
|
"logits/chosen": -0.863783597946167,
|
|
"logits/rejected": -0.8649648427963257,
|
|
"logps/chosen": -645.0142211914062,
|
|
"logps/ref_chosen": -73.6877212524414,
|
|
"logps/ref_rejected": -90.76136779785156,
|
|
"logps/rejected": -1025.19384765625,
|
|
"loss": 1.0906,
|
|
"margin_dpo/margin_mean": 363.1061096191406,
|
|
"margin_dpo/margin_std": 503.5409240722656,
|
|
"step": 547
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -572.3947143554688,
|
|
"KL/mean": -745.8720703125,
|
|
"KL/rejected_KL_mean": -919.3494873046875,
|
|
"KL/std": 484.0094909667969,
|
|
"epoch": 0.8046989720998532,
|
|
"fcm_dpo/beta": 0.0011355069000273943,
|
|
"fcm_dpo/delta": 0.006264576222747564,
|
|
"fcm_dpo/margin": 346.9548034667969,
|
|
"fcm_dpo/q_t": 0.41354212164878845,
|
|
"grad_norm": 29.72757339477539,
|
|
"learning_rate": 5.684919345471029e-08,
|
|
"logits/chosen": -0.9228535294532776,
|
|
"logits/rejected": -0.922650933265686,
|
|
"logps/chosen": -637.6410522460938,
|
|
"logps/ref_chosen": -65.24634552001953,
|
|
"logps/ref_rejected": -94.11807250976562,
|
|
"logps/rejected": -1013.467529296875,
|
|
"loss": 1.1081,
|
|
"margin_dpo/margin_mean": 346.9548034667969,
|
|
"margin_dpo/margin_std": 528.34912109375,
|
|
"step": 548
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -588.3477783203125,
|
|
"KL/mean": -716.7921142578125,
|
|
"KL/rejected_KL_mean": -845.236572265625,
|
|
"KL/std": 395.0334777832031,
|
|
"epoch": 0.8061674008810573,
|
|
"fcm_dpo/beta": 0.0011380038922652602,
|
|
"fcm_dpo/delta": 0.00906070601195097,
|
|
"fcm_dpo/margin": 256.88873291015625,
|
|
"fcm_dpo/q_t": 0.4329938292503357,
|
|
"grad_norm": 34.956844329833984,
|
|
"learning_rate": 5.603696935852426e-08,
|
|
"logits/chosen": -0.9303746223449707,
|
|
"logits/rejected": -0.9102168083190918,
|
|
"logps/chosen": -637.5601806640625,
|
|
"logps/ref_chosen": -49.21235656738281,
|
|
"logps/ref_rejected": -73.91031646728516,
|
|
"logps/rejected": -919.1468505859375,
|
|
"loss": 1.1825,
|
|
"margin_dpo/margin_mean": 256.88873291015625,
|
|
"margin_dpo/margin_std": 461.2471008300781,
|
|
"step": 549
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -585.5970458984375,
|
|
"KL/mean": -743.171630859375,
|
|
"KL/rejected_KL_mean": -900.746337890625,
|
|
"KL/std": 431.36700439453125,
|
|
"epoch": 0.8076358296622613,
|
|
"fcm_dpo/beta": 0.001144929206930101,
|
|
"fcm_dpo/delta": 0.04066295921802521,
|
|
"fcm_dpo/margin": 315.1492919921875,
|
|
"fcm_dpo/q_t": 0.41719305515289307,
|
|
"grad_norm": 33.49467468261719,
|
|
"learning_rate": 5.5229856368582376e-08,
|
|
"logits/chosen": -0.8681415319442749,
|
|
"logits/rejected": -0.8837727308273315,
|
|
"logps/chosen": -642.4039306640625,
|
|
"logps/ref_chosen": -56.80695343017578,
|
|
"logps/ref_rejected": -95.12580871582031,
|
|
"logps/rejected": -995.8721313476562,
|
|
"loss": 1.1245,
|
|
"margin_dpo/margin_mean": 315.1493225097656,
|
|
"margin_dpo/margin_std": 469.3822021484375,
|
|
"step": 550
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -528.0404663085938,
|
|
"KL/mean": -771.3831787109375,
|
|
"KL/rejected_KL_mean": -1014.7258911132812,
|
|
"KL/std": 475.05487060546875,
|
|
"epoch": 0.8091042584434655,
|
|
"fcm_dpo/beta": 0.0011208320502191782,
|
|
"fcm_dpo/delta": -0.15413454174995422,
|
|
"fcm_dpo/margin": 486.6854248046875,
|
|
"fcm_dpo/q_t": 0.37404024600982666,
|
|
"grad_norm": 60.243988037109375,
|
|
"learning_rate": 5.4427875753062734e-08,
|
|
"logits/chosen": -0.8686560392379761,
|
|
"logits/rejected": -0.9161897301673889,
|
|
"logps/chosen": -587.1467895507812,
|
|
"logps/ref_chosen": -59.10633087158203,
|
|
"logps/ref_rejected": -111.67280578613281,
|
|
"logps/rejected": -1126.398681640625,
|
|
"loss": 0.9776,
|
|
"margin_dpo/margin_mean": 486.6854248046875,
|
|
"margin_dpo/margin_std": 475.30499267578125,
|
|
"step": 551
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -493.5096435546875,
|
|
"KL/mean": -766.2874145507812,
|
|
"KL/rejected_KL_mean": -1039.065185546875,
|
|
"KL/std": 549.2742919921875,
|
|
"epoch": 0.8105726872246696,
|
|
"fcm_dpo/beta": 0.0010729696368798614,
|
|
"fcm_dpo/delta": -0.1995118260383606,
|
|
"fcm_dpo/margin": 545.5556030273438,
|
|
"fcm_dpo/q_t": 0.37025463581085205,
|
|
"grad_norm": 43.864139556884766,
|
|
"learning_rate": 5.363104864490034e-08,
|
|
"logits/chosen": -0.917883574962616,
|
|
"logits/rejected": -0.9444681406021118,
|
|
"logps/chosen": -555.8642578125,
|
|
"logps/ref_chosen": -62.35459899902344,
|
|
"logps/ref_rejected": -104.56210327148438,
|
|
"logps/rejected": -1143.6273193359375,
|
|
"loss": 0.9757,
|
|
"margin_dpo/margin_mean": 545.5556030273438,
|
|
"margin_dpo/margin_std": 603.31982421875,
|
|
"step": 552
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -577.623046875,
|
|
"KL/mean": -726.396484375,
|
|
"KL/rejected_KL_mean": -875.169921875,
|
|
"KL/std": 449.4180603027344,
|
|
"epoch": 0.8120411160058737,
|
|
"fcm_dpo/beta": 0.0010809717932716012,
|
|
"fcm_dpo/delta": 0.0810445249080658,
|
|
"fcm_dpo/margin": 297.5469055175781,
|
|
"fcm_dpo/q_t": 0.4275718331336975,
|
|
"grad_norm": 31.865938186645508,
|
|
"learning_rate": 5.2839396041230415e-08,
|
|
"logits/chosen": -0.9152529835700989,
|
|
"logits/rejected": -0.9069106578826904,
|
|
"logps/chosen": -645.8818359375,
|
|
"logps/ref_chosen": -68.25881958007812,
|
|
"logps/ref_rejected": -98.0971450805664,
|
|
"logps/rejected": -973.26708984375,
|
|
"loss": 1.1534,
|
|
"margin_dpo/margin_mean": 297.54693603515625,
|
|
"margin_dpo/margin_std": 481.7157897949219,
|
|
"step": 553
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -595.7544555664062,
|
|
"KL/mean": -803.332275390625,
|
|
"KL/rejected_KL_mean": -1010.9100341796875,
|
|
"KL/std": 496.8535461425781,
|
|
"epoch": 0.8135095447870778,
|
|
"fcm_dpo/beta": 0.001084424089640379,
|
|
"fcm_dpo/delta": -0.05283275246620178,
|
|
"fcm_dpo/margin": 415.1555480957031,
|
|
"fcm_dpo/q_t": 0.4000872075557709,
|
|
"grad_norm": 33.93936538696289,
|
|
"learning_rate": 5.205293880283551e-08,
|
|
"logits/chosen": -0.8999141454696655,
|
|
"logits/rejected": -0.862054705619812,
|
|
"logps/chosen": -663.7021484375,
|
|
"logps/ref_chosen": -67.94767761230469,
|
|
"logps/ref_rejected": -89.78272247314453,
|
|
"logps/rejected": -1100.6927490234375,
|
|
"loss": 1.0927,
|
|
"margin_dpo/margin_mean": 415.1555480957031,
|
|
"margin_dpo/margin_std": 624.768798828125,
|
|
"step": 554
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -615.0294189453125,
|
|
"KL/mean": -845.592041015625,
|
|
"KL/rejected_KL_mean": -1076.154541015625,
|
|
"KL/std": 520.6309814453125,
|
|
"epoch": 0.8149779735682819,
|
|
"fcm_dpo/beta": 0.0010615733917802572,
|
|
"fcm_dpo/delta": -0.09408356249332428,
|
|
"fcm_dpo/margin": 461.125244140625,
|
|
"fcm_dpo/q_t": 0.3936702311038971,
|
|
"grad_norm": 35.487144470214844,
|
|
"learning_rate": 5.127169765359515e-08,
|
|
"logits/chosen": -0.9481945037841797,
|
|
"logits/rejected": -0.9943474531173706,
|
|
"logps/chosen": -668.35986328125,
|
|
"logps/ref_chosen": -53.33049011230469,
|
|
"logps/ref_rejected": -108.47937774658203,
|
|
"logps/rejected": -1184.634033203125,
|
|
"loss": 1.0645,
|
|
"margin_dpo/margin_mean": 461.125244140625,
|
|
"margin_dpo/margin_std": 665.6109008789062,
|
|
"step": 555
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -569.9747314453125,
|
|
"KL/mean": -710.6820678710938,
|
|
"KL/rejected_KL_mean": -851.389404296875,
|
|
"KL/std": 405.1685791015625,
|
|
"epoch": 0.8164464023494861,
|
|
"fcm_dpo/beta": 0.0010712645016610622,
|
|
"fcm_dpo/delta": 0.10165860503911972,
|
|
"fcm_dpo/margin": 281.4146728515625,
|
|
"fcm_dpo/q_t": 0.43035784363746643,
|
|
"grad_norm": 31.20359230041504,
|
|
"learning_rate": 5.049569317994012e-08,
|
|
"logits/chosen": -0.9110164642333984,
|
|
"logits/rejected": -0.8961154222488403,
|
|
"logps/chosen": -628.6192016601562,
|
|
"logps/ref_chosen": -58.64447021484375,
|
|
"logps/ref_rejected": -101.34040832519531,
|
|
"logps/rejected": -952.7298583984375,
|
|
"loss": 1.1538,
|
|
"margin_dpo/margin_mean": 281.4146728515625,
|
|
"margin_dpo/margin_std": 414.3664245605469,
|
|
"step": 556
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -631.4099731445312,
|
|
"KL/mean": -833.5421142578125,
|
|
"KL/rejected_KL_mean": -1035.67431640625,
|
|
"KL/std": 523.793701171875,
|
|
"epoch": 0.8179148311306902,
|
|
"fcm_dpo/beta": 0.0010692158248275518,
|
|
"fcm_dpo/delta": -0.033952295780181885,
|
|
"fcm_dpo/margin": 404.26434326171875,
|
|
"fcm_dpo/q_t": 0.40289121866226196,
|
|
"grad_norm": 51.41856384277344,
|
|
"learning_rate": 4.9724945830310144e-08,
|
|
"logits/chosen": -0.9860169887542725,
|
|
"logits/rejected": -1.0132906436920166,
|
|
"logps/chosen": -699.2506103515625,
|
|
"logps/ref_chosen": -67.84066009521484,
|
|
"logps/ref_rejected": -109.93965911865234,
|
|
"logps/rejected": -1145.614013671875,
|
|
"loss": 1.0935,
|
|
"margin_dpo/margin_mean": 404.2643127441406,
|
|
"margin_dpo/margin_std": 592.6957397460938,
|
|
"step": 557
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -567.9088745117188,
|
|
"KL/mean": -840.1057739257812,
|
|
"KL/rejected_KL_mean": -1112.302490234375,
|
|
"KL/std": 511.2911376953125,
|
|
"epoch": 0.8193832599118943,
|
|
"fcm_dpo/beta": 0.0010408093221485615,
|
|
"fcm_dpo/delta": -0.17697550356388092,
|
|
"fcm_dpo/margin": 544.393798828125,
|
|
"fcm_dpo/q_t": 0.3689645528793335,
|
|
"grad_norm": 24.727500915527344,
|
|
"learning_rate": 4.8959475914614554e-08,
|
|
"logits/chosen": -1.0073204040527344,
|
|
"logits/rejected": -1.016093373298645,
|
|
"logps/chosen": -630.277099609375,
|
|
"logps/ref_chosen": -62.36824035644531,
|
|
"logps/ref_rejected": -102.16102600097656,
|
|
"logps/rejected": -1214.463623046875,
|
|
"loss": 0.9804,
|
|
"margin_dpo/margin_mean": 544.393798828125,
|
|
"margin_dpo/margin_std": 575.1395874023438,
|
|
"step": 558
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -625.84619140625,
|
|
"KL/mean": -851.332275390625,
|
|
"KL/rejected_KL_mean": -1076.818359375,
|
|
"KL/std": 508.06561279296875,
|
|
"epoch": 0.8208516886930984,
|
|
"fcm_dpo/beta": 0.0010242098942399025,
|
|
"fcm_dpo/delta": -0.06484264880418777,
|
|
"fcm_dpo/margin": 450.97210693359375,
|
|
"fcm_dpo/q_t": 0.3939628005027771,
|
|
"grad_norm": 27.729312896728516,
|
|
"learning_rate": 4.8199303603697614e-08,
|
|
"logits/chosen": -1.085421085357666,
|
|
"logits/rejected": -1.071927547454834,
|
|
"logps/chosen": -686.5985717773438,
|
|
"logps/ref_chosen": -60.752323150634766,
|
|
"logps/ref_rejected": -93.44229125976562,
|
|
"logps/rejected": -1170.2607421875,
|
|
"loss": 1.0467,
|
|
"margin_dpo/margin_mean": 450.9721374511719,
|
|
"margin_dpo/margin_std": 538.5919799804688,
|
|
"step": 559
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -600.4629516601562,
|
|
"KL/mean": -748.181884765625,
|
|
"KL/rejected_KL_mean": -895.9007568359375,
|
|
"KL/std": 419.6651306152344,
|
|
"epoch": 0.8223201174743024,
|
|
"fcm_dpo/beta": 0.0010198511881753802,
|
|
"fcm_dpo/delta": 0.00034468769445084035,
|
|
"fcm_dpo/margin": 295.4377746582031,
|
|
"fcm_dpo/q_t": 0.42941996455192566,
|
|
"grad_norm": 32.959224700927734,
|
|
"learning_rate": 4.7444448928806615e-08,
|
|
"logits/chosen": -0.8413786888122559,
|
|
"logits/rejected": -0.8068987131118774,
|
|
"logps/chosen": -658.5667724609375,
|
|
"logps/ref_chosen": -58.10382080078125,
|
|
"logps/ref_rejected": -79.99122619628906,
|
|
"logps/rejected": -975.8919677734375,
|
|
"loss": 1.165,
|
|
"margin_dpo/margin_mean": 295.4377746582031,
|
|
"margin_dpo/margin_std": 473.3067321777344,
|
|
"step": 560
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -676.5592651367188,
|
|
"KL/mean": -825.4444580078125,
|
|
"KL/rejected_KL_mean": -974.32958984375,
|
|
"KL/std": 466.03192138671875,
|
|
"epoch": 0.8237885462555066,
|
|
"fcm_dpo/beta": 0.001038446556776762,
|
|
"fcm_dpo/delta": 0.09292855858802795,
|
|
"fcm_dpo/margin": 297.7703857421875,
|
|
"fcm_dpo/q_t": 0.4279605746269226,
|
|
"grad_norm": 33.256561279296875,
|
|
"learning_rate": 4.669493178106432e-08,
|
|
"logits/chosen": -0.957642674446106,
|
|
"logits/rejected": -0.9741103649139404,
|
|
"logps/chosen": -727.47216796875,
|
|
"logps/ref_chosen": -50.912879943847656,
|
|
"logps/ref_rejected": -99.06856536865234,
|
|
"logps/rejected": -1073.398193359375,
|
|
"loss": 1.1824,
|
|
"margin_dpo/margin_mean": 297.7703857421875,
|
|
"margin_dpo/margin_std": 566.9799194335938,
|
|
"step": 561
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -637.7071533203125,
|
|
"KL/mean": -828.40576171875,
|
|
"KL/rejected_KL_mean": -1019.1043701171875,
|
|
"KL/std": 509.77227783203125,
|
|
"epoch": 0.8252569750367107,
|
|
"fcm_dpo/beta": 0.0010351063683629036,
|
|
"fcm_dpo/delta": 0.005036838352680206,
|
|
"fcm_dpo/margin": 381.3973388671875,
|
|
"fcm_dpo/q_t": 0.4100903272628784,
|
|
"grad_norm": 27.82461929321289,
|
|
"learning_rate": 4.5950771910944596e-08,
|
|
"logits/chosen": -0.9611387848854065,
|
|
"logits/rejected": -0.9594268798828125,
|
|
"logps/chosen": -697.1715087890625,
|
|
"logps/ref_chosen": -59.46440124511719,
|
|
"logps/ref_rejected": -96.54266357421875,
|
|
"logps/rejected": -1115.64697265625,
|
|
"loss": 1.1057,
|
|
"margin_dpo/margin_mean": 381.3973083496094,
|
|
"margin_dpo/margin_std": 549.9033203125,
|
|
"step": 562
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -682.5115966796875,
|
|
"KL/mean": -834.73291015625,
|
|
"KL/rejected_KL_mean": -986.954345703125,
|
|
"KL/std": 534.4413452148438,
|
|
"epoch": 0.8267254038179148,
|
|
"fcm_dpo/beta": 0.0010367175564169884,
|
|
"fcm_dpo/delta": -0.031953129917383194,
|
|
"fcm_dpo/margin": 304.4427490234375,
|
|
"fcm_dpo/q_t": 0.4237147569656372,
|
|
"grad_norm": 37.00297164916992,
|
|
"learning_rate": 4.521198892775202e-08,
|
|
"logits/chosen": -0.9149300456047058,
|
|
"logits/rejected": -0.9164028167724609,
|
|
"logps/chosen": -743.1197509765625,
|
|
"logps/ref_chosen": -60.60819625854492,
|
|
"logps/ref_rejected": -94.56770324707031,
|
|
"logps/rejected": -1081.52197265625,
|
|
"loss": 1.2024,
|
|
"margin_dpo/margin_mean": 304.4427490234375,
|
|
"margin_dpo/margin_std": 631.2129516601562,
|
|
"step": 563
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -653.9114990234375,
|
|
"KL/mean": -828.191650390625,
|
|
"KL/rejected_KL_mean": -1002.471923828125,
|
|
"KL/std": 475.3956298828125,
|
|
"epoch": 0.8281938325991189,
|
|
"fcm_dpo/beta": 0.0010415834840387106,
|
|
"fcm_dpo/delta": 0.03815501928329468,
|
|
"fcm_dpo/margin": 348.56048583984375,
|
|
"fcm_dpo/q_t": 0.41654127836227417,
|
|
"grad_norm": 37.090126037597656,
|
|
"learning_rate": 4.447860229910544e-08,
|
|
"logits/chosen": -1.0703301429748535,
|
|
"logits/rejected": -1.0528302192687988,
|
|
"logps/chosen": -728.1798095703125,
|
|
"logps/ref_chosen": -74.26837921142578,
|
|
"logps/ref_rejected": -93.23818969726562,
|
|
"logps/rejected": -1095.7100830078125,
|
|
"loss": 1.1179,
|
|
"margin_dpo/margin_mean": 348.56048583984375,
|
|
"margin_dpo/margin_std": 479.8955078125,
|
|
"step": 564
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -655.407470703125,
|
|
"KL/mean": -850.3612060546875,
|
|
"KL/rejected_KL_mean": -1045.3148193359375,
|
|
"KL/std": 540.4480590820312,
|
|
"epoch": 0.8296622613803231,
|
|
"fcm_dpo/beta": 0.0010392372496426105,
|
|
"fcm_dpo/delta": -0.005461537279188633,
|
|
"fcm_dpo/margin": 389.9073486328125,
|
|
"fcm_dpo/q_t": 0.4108823537826538,
|
|
"grad_norm": 30.26292610168457,
|
|
"learning_rate": 4.375063135042445e-08,
|
|
"logits/chosen": -0.9375953674316406,
|
|
"logits/rejected": -0.9292128086090088,
|
|
"logps/chosen": -724.4274291992188,
|
|
"logps/ref_chosen": -69.0199203491211,
|
|
"logps/ref_rejected": -85.7789306640625,
|
|
"logps/rejected": -1131.09375,
|
|
"loss": 1.1223,
|
|
"margin_dpo/margin_mean": 389.9073486328125,
|
|
"margin_dpo/margin_std": 643.8388671875,
|
|
"step": 565
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -634.423095703125,
|
|
"KL/mean": -859.91552734375,
|
|
"KL/rejected_KL_mean": -1085.407958984375,
|
|
"KL/std": 575.044189453125,
|
|
"epoch": 0.8311306901615272,
|
|
"fcm_dpo/beta": 0.0010359040461480618,
|
|
"fcm_dpo/delta": -0.0708489641547203,
|
|
"fcm_dpo/margin": 450.98480224609375,
|
|
"fcm_dpo/q_t": 0.3965566158294678,
|
|
"grad_norm": 29.393285751342773,
|
|
"learning_rate": 4.3028095264420525e-08,
|
|
"logits/chosen": -0.9996108412742615,
|
|
"logits/rejected": -1.016195297241211,
|
|
"logps/chosen": -700.9684448242188,
|
|
"logps/ref_chosen": -66.5453109741211,
|
|
"logps/ref_rejected": -103.86932373046875,
|
|
"logps/rejected": -1189.2772216796875,
|
|
"loss": 1.094,
|
|
"margin_dpo/margin_mean": 450.98480224609375,
|
|
"margin_dpo/margin_std": 689.2914428710938,
|
|
"step": 566
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -630.6632690429688,
|
|
"KL/mean": -796.9432373046875,
|
|
"KL/rejected_KL_mean": -963.22314453125,
|
|
"KL/std": 397.6266174316406,
|
|
"epoch": 0.8325991189427313,
|
|
"fcm_dpo/beta": 0.0010342567693442106,
|
|
"fcm_dpo/delta": 0.05805526301264763,
|
|
"fcm_dpo/margin": 332.55987548828125,
|
|
"fcm_dpo/q_t": 0.41991090774536133,
|
|
"grad_norm": 36.57158660888672,
|
|
"learning_rate": 4.231101308059165e-08,
|
|
"logits/chosen": -1.0474796295166016,
|
|
"logits/rejected": -1.0378050804138184,
|
|
"logps/chosen": -683.5215454101562,
|
|
"logps/ref_chosen": -52.85829544067383,
|
|
"logps/ref_rejected": -85.37095642089844,
|
|
"logps/rejected": -1048.5941162109375,
|
|
"loss": 1.1274,
|
|
"margin_dpo/margin_mean": 332.55987548828125,
|
|
"margin_dpo/margin_std": 469.06707763671875,
|
|
"step": 567
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -598.9827880859375,
|
|
"KL/mean": -826.0681762695312,
|
|
"KL/rejected_KL_mean": -1053.153564453125,
|
|
"KL/std": 466.5391845703125,
|
|
"epoch": 0.8340675477239354,
|
|
"fcm_dpo/beta": 0.0010257186368107796,
|
|
"fcm_dpo/delta": -0.06917618960142136,
|
|
"fcm_dpo/margin": 454.1707763671875,
|
|
"fcm_dpo/q_t": 0.39098745584487915,
|
|
"grad_norm": 33.18759536743164,
|
|
"learning_rate": 4.1599403694720145e-08,
|
|
"logits/chosen": -0.9316244125366211,
|
|
"logits/rejected": -0.962453305721283,
|
|
"logps/chosen": -644.1751708984375,
|
|
"logps/ref_chosen": -45.1923828125,
|
|
"logps/ref_rejected": -89.09236907958984,
|
|
"logps/rejected": -1142.2459716796875,
|
|
"loss": 1.0315,
|
|
"margin_dpo/margin_mean": 454.1707763671875,
|
|
"margin_dpo/margin_std": 485.6314697265625,
|
|
"step": 568
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -648.8056640625,
|
|
"KL/mean": -841.3399658203125,
|
|
"KL/rejected_KL_mean": -1033.874267578125,
|
|
"KL/std": 573.2427978515625,
|
|
"epoch": 0.8355359765051396,
|
|
"fcm_dpo/beta": 0.0010291270446032286,
|
|
"fcm_dpo/delta": 0.003299180418252945,
|
|
"fcm_dpo/margin": 385.0684814453125,
|
|
"fcm_dpo/q_t": 0.41084566712379456,
|
|
"grad_norm": 40.002628326416016,
|
|
"learning_rate": 4.089328585837512e-08,
|
|
"logits/chosen": -0.9903547763824463,
|
|
"logits/rejected": -0.9887925982475281,
|
|
"logps/chosen": -712.5262451171875,
|
|
"logps/ref_chosen": -63.72056198120117,
|
|
"logps/ref_rejected": -79.10325622558594,
|
|
"logps/rejected": -1112.9775390625,
|
|
"loss": 1.1275,
|
|
"margin_dpo/margin_mean": 385.0685119628906,
|
|
"margin_dpo/margin_std": 624.4798583984375,
|
|
"step": 569
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -634.85205078125,
|
|
"KL/mean": -803.91943359375,
|
|
"KL/rejected_KL_mean": -972.98681640625,
|
|
"KL/std": 460.9179992675781,
|
|
"epoch": 0.8370044052863436,
|
|
"fcm_dpo/beta": 0.0010310852667316794,
|
|
"fcm_dpo/delta": 0.053242240101099014,
|
|
"fcm_dpo/margin": 338.1347351074219,
|
|
"fcm_dpo/q_t": 0.41983291506767273,
|
|
"grad_norm": 34.861881256103516,
|
|
"learning_rate": 4.019267817841834e-08,
|
|
"logits/chosen": -1.04459547996521,
|
|
"logits/rejected": -1.023921251296997,
|
|
"logps/chosen": -696.466552734375,
|
|
"logps/ref_chosen": -61.61454391479492,
|
|
"logps/ref_rejected": -82.14186096191406,
|
|
"logps/rejected": -1055.128662109375,
|
|
"loss": 1.1299,
|
|
"margin_dpo/margin_mean": 338.134765625,
|
|
"margin_dpo/margin_std": 491.9954833984375,
|
|
"step": 570
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -630.3326416015625,
|
|
"KL/mean": -840.3262939453125,
|
|
"KL/rejected_KL_mean": -1050.320068359375,
|
|
"KL/std": 493.2159423828125,
|
|
"epoch": 0.8384728340675477,
|
|
"fcm_dpo/beta": 0.0010264207376167178,
|
|
"fcm_dpo/delta": -0.03282318636775017,
|
|
"fcm_dpo/margin": 419.9873046875,
|
|
"fcm_dpo/q_t": 0.4029013514518738,
|
|
"grad_norm": 31.51184844970703,
|
|
"learning_rate": 3.9497599116513705e-08,
|
|
"logits/chosen": -0.9481862187385559,
|
|
"logits/rejected": -0.9578366279602051,
|
|
"logps/chosen": -683.38671875,
|
|
"logps/ref_chosen": -53.05406188964844,
|
|
"logps/ref_rejected": -91.33682250976562,
|
|
"logps/rejected": -1141.6568603515625,
|
|
"loss": 1.0896,
|
|
"margin_dpo/margin_mean": 419.9873352050781,
|
|
"margin_dpo/margin_std": 602.700439453125,
|
|
"step": 571
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -692.636962890625,
|
|
"KL/mean": -900.9115600585938,
|
|
"KL/rejected_KL_mean": -1109.18603515625,
|
|
"KL/std": 563.3121337890625,
|
|
"epoch": 0.8399412628487518,
|
|
"fcm_dpo/beta": 0.0010215662186965346,
|
|
"fcm_dpo/delta": -0.026851139962673187,
|
|
"fcm_dpo/margin": 416.5491943359375,
|
|
"fcm_dpo/q_t": 0.40701764822006226,
|
|
"grad_norm": 28.058713912963867,
|
|
"learning_rate": 3.880806698864086e-08,
|
|
"logits/chosen": -0.9600076675415039,
|
|
"logits/rejected": -0.9866325855255127,
|
|
"logps/chosen": -741.0962524414062,
|
|
"logps/ref_chosen": -48.45928955078125,
|
|
"logps/ref_rejected": -83.55703735351562,
|
|
"logps/rejected": -1192.7431640625,
|
|
"loss": 1.1143,
|
|
"margin_dpo/margin_mean": 416.5491943359375,
|
|
"margin_dpo/margin_std": 682.91455078125,
|
|
"step": 572
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -665.21142578125,
|
|
"KL/mean": -851.5628662109375,
|
|
"KL/rejected_KL_mean": -1037.914306640625,
|
|
"KL/std": 504.3909606933594,
|
|
"epoch": 0.8414096916299559,
|
|
"fcm_dpo/beta": 0.0010271357605233788,
|
|
"fcm_dpo/delta": 0.01777409017086029,
|
|
"fcm_dpo/margin": 372.70294189453125,
|
|
"fcm_dpo/q_t": 0.4132155776023865,
|
|
"grad_norm": 24.622867584228516,
|
|
"learning_rate": 3.812409996461275e-08,
|
|
"logits/chosen": -1.040392518043518,
|
|
"logits/rejected": -1.0433319807052612,
|
|
"logps/chosen": -716.833984375,
|
|
"logps/ref_chosen": -51.62262725830078,
|
|
"logps/ref_rejected": -85.32499694824219,
|
|
"logps/rejected": -1123.2392578125,
|
|
"loss": 1.1042,
|
|
"margin_dpo/margin_mean": 372.70294189453125,
|
|
"margin_dpo/margin_std": 519.7007446289062,
|
|
"step": 573
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -621.5488891601562,
|
|
"KL/mean": -825.0750732421875,
|
|
"KL/rejected_KL_mean": -1028.601318359375,
|
|
"KL/std": 480.80010986328125,
|
|
"epoch": 0.8428781204111601,
|
|
"fcm_dpo/beta": 0.0010248222388327122,
|
|
"fcm_dpo/delta": -0.017912685871124268,
|
|
"fcm_dpo/margin": 407.05230712890625,
|
|
"fcm_dpo/q_t": 0.40481486916542053,
|
|
"grad_norm": 31.015390396118164,
|
|
"learning_rate": 3.74457160675965e-08,
|
|
"logits/chosen": -1.0330562591552734,
|
|
"logits/rejected": -1.0494093894958496,
|
|
"logps/chosen": -672.5933837890625,
|
|
"logps/ref_chosen": -51.04446029663086,
|
|
"logps/ref_rejected": -92.80640411376953,
|
|
"logps/rejected": -1121.40771484375,
|
|
"loss": 1.0857,
|
|
"margin_dpo/margin_mean": 407.05230712890625,
|
|
"margin_dpo/margin_std": 548.0139770507812,
|
|
"step": 574
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -679.0296020507812,
|
|
"KL/mean": -858.723876953125,
|
|
"KL/rejected_KL_mean": -1038.418212890625,
|
|
"KL/std": 490.94537353515625,
|
|
"epoch": 0.8443465491923642,
|
|
"fcm_dpo/beta": 0.0010078808991238475,
|
|
"fcm_dpo/delta": -0.0724976509809494,
|
|
"fcm_dpo/margin": 359.38861083984375,
|
|
"fcm_dpo/q_t": 0.41705384850502014,
|
|
"grad_norm": 37.037967681884766,
|
|
"learning_rate": 3.677293317363864e-08,
|
|
"logits/chosen": -0.8891516923904419,
|
|
"logits/rejected": -0.8854223489761353,
|
|
"logps/chosen": -750.8197021484375,
|
|
"logps/ref_chosen": -71.7901382446289,
|
|
"logps/ref_rejected": -95.38619995117188,
|
|
"logps/rejected": -1133.804443359375,
|
|
"loss": 1.1541,
|
|
"margin_dpo/margin_mean": 359.38861083984375,
|
|
"margin_dpo/margin_std": 618.3295288085938,
|
|
"step": 575
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -629.9134521484375,
|
|
"KL/mean": -782.214111328125,
|
|
"KL/rejected_KL_mean": -934.5147705078125,
|
|
"KL/std": 449.57354736328125,
|
|
"epoch": 0.8458149779735683,
|
|
"fcm_dpo/beta": 0.0010182232363149524,
|
|
"fcm_dpo/delta": 0.09279034286737442,
|
|
"fcm_dpo/margin": 304.60137939453125,
|
|
"fcm_dpo/q_t": 0.4291686415672302,
|
|
"grad_norm": 26.034320831298828,
|
|
"learning_rate": 3.6105769011194224e-08,
|
|
"logits/chosen": -0.9969866275787354,
|
|
"logits/rejected": -1.025536298751831,
|
|
"logps/chosen": -684.1763916015625,
|
|
"logps/ref_chosen": -54.262962341308594,
|
|
"logps/ref_rejected": -100.75428009033203,
|
|
"logps/rejected": -1035.26904296875,
|
|
"loss": 1.1671,
|
|
"margin_dpo/margin_mean": 304.60137939453125,
|
|
"margin_dpo/margin_std": 517.416748046875,
|
|
"step": 576
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -572.9135131835938,
|
|
"KL/mean": -760.6309814453125,
|
|
"KL/rejected_KL_mean": -948.348388671875,
|
|
"KL/std": 488.9225158691406,
|
|
"epoch": 0.8472834067547724,
|
|
"fcm_dpo/beta": 0.001027634833008051,
|
|
"fcm_dpo/delta": 0.014742329716682434,
|
|
"fcm_dpo/margin": 375.43487548828125,
|
|
"fcm_dpo/q_t": 0.4120475649833679,
|
|
"grad_norm": 29.45041275024414,
|
|
"learning_rate": 3.5444241160659304e-08,
|
|
"logits/chosen": -0.9848508834838867,
|
|
"logits/rejected": -0.9585464000701904,
|
|
"logps/chosen": -634.8232421875,
|
|
"logps/ref_chosen": -61.909706115722656,
|
|
"logps/ref_rejected": -84.07069396972656,
|
|
"logps/rejected": -1032.419189453125,
|
|
"loss": 1.1134,
|
|
"margin_dpo/margin_mean": 375.43487548828125,
|
|
"margin_dpo/margin_std": 524.5450439453125,
|
|
"step": 577
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -563.70556640625,
|
|
"KL/mean": -769.3616943359375,
|
|
"KL/rejected_KL_mean": -975.017822265625,
|
|
"KL/std": 475.5242919921875,
|
|
"epoch": 0.8487518355359766,
|
|
"fcm_dpo/beta": 0.001021248521283269,
|
|
"fcm_dpo/delta": -0.021578624844551086,
|
|
"fcm_dpo/margin": 411.31219482421875,
|
|
"fcm_dpo/q_t": 0.40228772163391113,
|
|
"grad_norm": 34.02935791015625,
|
|
"learning_rate": 3.478836705390808e-08,
|
|
"logits/chosen": -0.8643592596054077,
|
|
"logits/rejected": -0.8935543894767761,
|
|
"logps/chosen": -612.96923828125,
|
|
"logps/ref_chosen": -49.26368713378906,
|
|
"logps/ref_rejected": -83.4362564086914,
|
|
"logps/rejected": -1058.4541015625,
|
|
"loss": 1.0665,
|
|
"margin_dpo/margin_mean": 411.31219482421875,
|
|
"margin_dpo/margin_std": 471.1173095703125,
|
|
"step": 578
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -619.7525634765625,
|
|
"KL/mean": -754.7780151367188,
|
|
"KL/rejected_KL_mean": -889.803466796875,
|
|
"KL/std": 469.8000793457031,
|
|
"epoch": 0.8502202643171806,
|
|
"fcm_dpo/beta": 0.0010419844184070826,
|
|
"fcm_dpo/delta": 0.12221585214138031,
|
|
"fcm_dpo/margin": 270.0508728027344,
|
|
"fcm_dpo/q_t": 0.43572354316711426,
|
|
"grad_norm": 47.86115646362305,
|
|
"learning_rate": 3.41381639738331e-08,
|
|
"logits/chosen": -0.9247668981552124,
|
|
"logits/rejected": -0.918264627456665,
|
|
"logps/chosen": -678.638427734375,
|
|
"logps/ref_chosen": -58.88581848144531,
|
|
"logps/ref_rejected": -94.78762817382812,
|
|
"logps/rejected": -984.591064453125,
|
|
"loss": 1.194,
|
|
"margin_dpo/margin_mean": 270.0508728027344,
|
|
"margin_dpo/margin_std": 516.8199462890625,
|
|
"step": 579
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -498.0091857910156,
|
|
"KL/mean": -721.5133056640625,
|
|
"KL/rejected_KL_mean": -945.0174560546875,
|
|
"KL/std": 533.5467529296875,
|
|
"epoch": 0.8516886930983847,
|
|
"fcm_dpo/beta": 0.0010376223362982273,
|
|
"fcm_dpo/delta": -0.06723435968160629,
|
|
"fcm_dpo/margin": 447.0083312988281,
|
|
"fcm_dpo/q_t": 0.3978680372238159,
|
|
"grad_norm": 30.88446617126465,
|
|
"learning_rate": 3.349364905389032e-08,
|
|
"logits/chosen": -0.8265971541404724,
|
|
"logits/rejected": -0.8474031686782837,
|
|
"logps/chosen": -546.7160034179688,
|
|
"logps/ref_chosen": -48.70683670043945,
|
|
"logps/ref_rejected": -81.7583999633789,
|
|
"logps/rejected": -1026.77587890625,
|
|
"loss": 1.0653,
|
|
"margin_dpo/margin_mean": 447.0083312988281,
|
|
"margin_dpo/margin_std": 623.1065673828125,
|
|
"step": 580
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -619.2047729492188,
|
|
"KL/mean": -788.866943359375,
|
|
"KL/rejected_KL_mean": -958.529052734375,
|
|
"KL/std": 473.9400634765625,
|
|
"epoch": 0.8531571218795888,
|
|
"fcm_dpo/beta": 0.001045089797116816,
|
|
"fcm_dpo/delta": 0.046971119940280914,
|
|
"fcm_dpo/margin": 339.32427978515625,
|
|
"fcm_dpo/q_t": 0.4190484881401062,
|
|
"grad_norm": 28.153030395507812,
|
|
"learning_rate": 3.285483927764726e-08,
|
|
"logits/chosen": -1.0129151344299316,
|
|
"logits/rejected": -1.0199846029281616,
|
|
"logps/chosen": -681.4271240234375,
|
|
"logps/ref_chosen": -62.22235107421875,
|
|
"logps/ref_rejected": -91.73568725585938,
|
|
"logps/rejected": -1050.2647705078125,
|
|
"loss": 1.133,
|
|
"margin_dpo/margin_mean": 339.3243103027344,
|
|
"margin_dpo/margin_std": 528.5376586914062,
|
|
"step": 581
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -569.8297119140625,
|
|
"KL/mean": -768.3109130859375,
|
|
"KL/rejected_KL_mean": -966.7920532226562,
|
|
"KL/std": 444.11328125,
|
|
"epoch": 0.8546255506607929,
|
|
"fcm_dpo/beta": 0.001052438747137785,
|
|
"fcm_dpo/delta": -0.019892334938049316,
|
|
"fcm_dpo/margin": 396.9622802734375,
|
|
"fcm_dpo/q_t": 0.40300631523132324,
|
|
"grad_norm": 30.396282196044922,
|
|
"learning_rate": 3.222175147833556e-08,
|
|
"logits/chosen": -0.9671785831451416,
|
|
"logits/rejected": -0.9903292059898376,
|
|
"logps/chosen": -628.058349609375,
|
|
"logps/ref_chosen": -58.228660583496094,
|
|
"logps/ref_rejected": -110.06959533691406,
|
|
"logps/rejected": -1076.861572265625,
|
|
"loss": 1.0821,
|
|
"margin_dpo/margin_mean": 396.96234130859375,
|
|
"margin_dpo/margin_std": 493.5679931640625,
|
|
"step": 582
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -644.336181640625,
|
|
"KL/mean": -767.6728515625,
|
|
"KL/rejected_KL_mean": -891.0096435546875,
|
|
"KL/std": 475.7613830566406,
|
|
"epoch": 0.856093979441997,
|
|
"fcm_dpo/beta": 0.0010401608888059855,
|
|
"fcm_dpo/delta": -0.01038757897913456,
|
|
"fcm_dpo/margin": 246.67337036132812,
|
|
"fcm_dpo/q_t": 0.4429190754890442,
|
|
"grad_norm": 31.7283878326416,
|
|
"learning_rate": 3.159440233840763e-08,
|
|
"logits/chosen": -0.9231404066085815,
|
|
"logits/rejected": -0.9159576892852783,
|
|
"logps/chosen": -701.1990966796875,
|
|
"logps/ref_chosen": -56.86286163330078,
|
|
"logps/ref_rejected": -88.4039306640625,
|
|
"logps/rejected": -979.41357421875,
|
|
"loss": 1.2385,
|
|
"margin_dpo/margin_mean": 246.6733856201172,
|
|
"margin_dpo/margin_std": 584.5450439453125,
|
|
"step": 583
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -548.4625244140625,
|
|
"KL/mean": -786.724853515625,
|
|
"KL/rejected_KL_mean": -1024.9871826171875,
|
|
"KL/std": 508.10198974609375,
|
|
"epoch": 0.8575624082232012,
|
|
"fcm_dpo/beta": 0.001025655074045062,
|
|
"fcm_dpo/delta": -0.0933271199464798,
|
|
"fcm_dpo/margin": 476.524658203125,
|
|
"fcm_dpo/q_t": 0.3882429897785187,
|
|
"grad_norm": 27.133150100708008,
|
|
"learning_rate": 3.0972808389096635e-08,
|
|
"logits/chosen": -0.9409053921699524,
|
|
"logits/rejected": -0.9479919672012329,
|
|
"logps/chosen": -605.3631591796875,
|
|
"logps/ref_chosen": -56.90068054199219,
|
|
"logps/ref_rejected": -97.63606262207031,
|
|
"logps/rejected": -1122.623291015625,
|
|
"loss": 1.0207,
|
|
"margin_dpo/margin_mean": 476.524658203125,
|
|
"margin_dpo/margin_std": 514.8095703125,
|
|
"step": 584
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -656.8305053710938,
|
|
"KL/mean": -868.0594482421875,
|
|
"KL/rejected_KL_mean": -1079.288330078125,
|
|
"KL/std": 551.96435546875,
|
|
"epoch": 0.8590308370044053,
|
|
"fcm_dpo/beta": 0.001014050329104066,
|
|
"fcm_dpo/delta": -0.029842915013432503,
|
|
"fcm_dpo/margin": 422.4579162597656,
|
|
"fcm_dpo/q_t": 0.40475040674209595,
|
|
"grad_norm": 28.67152976989746,
|
|
"learning_rate": 3.035698600998121e-08,
|
|
"logits/chosen": -0.9976698756217957,
|
|
"logits/rejected": -1.021203875541687,
|
|
"logps/chosen": -717.804443359375,
|
|
"logps/ref_chosen": -60.973968505859375,
|
|
"logps/ref_rejected": -84.16952514648438,
|
|
"logps/rejected": -1163.4578857421875,
|
|
"loss": 1.1049,
|
|
"margin_dpo/margin_mean": 422.4578857421875,
|
|
"margin_dpo/margin_std": 662.7896728515625,
|
|
"step": 585
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -663.6984252929688,
|
|
"KL/mean": -804.322021484375,
|
|
"KL/rejected_KL_mean": -944.9456176757812,
|
|
"KL/std": 479.632568359375,
|
|
"epoch": 0.8604992657856094,
|
|
"fcm_dpo/beta": 0.0010314470855519176,
|
|
"fcm_dpo/delta": 0.11326177418231964,
|
|
"fcm_dpo/margin": 281.2471618652344,
|
|
"fcm_dpo/q_t": 0.433902382850647,
|
|
"grad_norm": 29.24315071105957,
|
|
"learning_rate": 2.974695142855388e-08,
|
|
"logits/chosen": -0.9781264066696167,
|
|
"logits/rejected": -0.9948530197143555,
|
|
"logps/chosen": -720.5540161132812,
|
|
"logps/ref_chosen": -56.85559844970703,
|
|
"logps/ref_rejected": -91.80261993408203,
|
|
"logps/rejected": -1036.748291015625,
|
|
"loss": 1.1906,
|
|
"margin_dpo/margin_mean": 281.2471618652344,
|
|
"margin_dpo/margin_std": 540.845458984375,
|
|
"step": 586
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -457.7816162109375,
|
|
"KL/mean": -663.854736328125,
|
|
"KL/rejected_KL_mean": -869.9278564453125,
|
|
"KL/std": 509.3709411621094,
|
|
"epoch": 0.8619676945668135,
|
|
"fcm_dpo/beta": 0.0010362120810896158,
|
|
"fcm_dpo/delta": -0.02832759916782379,
|
|
"fcm_dpo/margin": 412.146240234375,
|
|
"fcm_dpo/q_t": 0.40200504660606384,
|
|
"grad_norm": 31.405168533325195,
|
|
"learning_rate": 2.9142720719793122e-08,
|
|
"logits/chosen": -1.012029767036438,
|
|
"logits/rejected": -1.0366127490997314,
|
|
"logps/chosen": -502.47320556640625,
|
|
"logps/ref_chosen": -44.69159698486328,
|
|
"logps/ref_rejected": -82.62385559082031,
|
|
"logps/rejected": -952.5516967773438,
|
|
"loss": 1.0723,
|
|
"margin_dpo/margin_mean": 412.146240234375,
|
|
"margin_dpo/margin_std": 520.8572998046875,
|
|
"step": 587
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -634.4208984375,
|
|
"KL/mean": -803.1209716796875,
|
|
"KL/rejected_KL_mean": -971.821044921875,
|
|
"KL/std": 458.4549560546875,
|
|
"epoch": 0.8634361233480177,
|
|
"fcm_dpo/beta": 0.001035462599247694,
|
|
"fcm_dpo/delta": 0.05226360261440277,
|
|
"fcm_dpo/margin": 337.400146484375,
|
|
"fcm_dpo/q_t": 0.4189639091491699,
|
|
"grad_norm": 30.63210678100586,
|
|
"learning_rate": 2.8544309805740018e-08,
|
|
"logits/chosen": -0.9633903503417969,
|
|
"logits/rejected": -0.9861800670623779,
|
|
"logps/chosen": -684.7158203125,
|
|
"logps/ref_chosen": -50.29494857788086,
|
|
"logps/ref_rejected": -107.36988067626953,
|
|
"logps/rejected": -1079.19091796875,
|
|
"loss": 1.1261,
|
|
"margin_dpo/margin_mean": 337.4001770019531,
|
|
"margin_dpo/margin_std": 472.3351745605469,
|
|
"step": 588
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -631.3834838867188,
|
|
"KL/mean": -850.8163452148438,
|
|
"KL/rejected_KL_mean": -1070.249267578125,
|
|
"KL/std": 498.56488037109375,
|
|
"epoch": 0.8649045521292217,
|
|
"fcm_dpo/beta": 0.001036192523315549,
|
|
"fcm_dpo/delta": -0.057322654873132706,
|
|
"fcm_dpo/margin": 438.8658142089844,
|
|
"fcm_dpo/q_t": 0.39515334367752075,
|
|
"grad_norm": 27.6535587310791,
|
|
"learning_rate": 2.7951734455078786e-08,
|
|
"logits/chosen": -0.9325675964355469,
|
|
"logits/rejected": -0.9372642636299133,
|
|
"logps/chosen": -691.3133544921875,
|
|
"logps/ref_chosen": -59.929908752441406,
|
|
"logps/ref_rejected": -111.65534973144531,
|
|
"logps/rejected": -1181.904541015625,
|
|
"loss": 1.0514,
|
|
"margin_dpo/margin_mean": 438.8658142089844,
|
|
"margin_dpo/margin_std": 524.8668823242188,
|
|
"step": 589
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -563.392333984375,
|
|
"KL/mean": -777.1893310546875,
|
|
"KL/rejected_KL_mean": -990.9863891601562,
|
|
"KL/std": 485.23968505859375,
|
|
"epoch": 0.8663729809104258,
|
|
"fcm_dpo/beta": 0.0010251689236611128,
|
|
"fcm_dpo/delta": -0.04011045768857002,
|
|
"fcm_dpo/margin": 427.59405517578125,
|
|
"fcm_dpo/q_t": 0.40004873275756836,
|
|
"grad_norm": 29.574190139770508,
|
|
"learning_rate": 2.736501028272095e-08,
|
|
"logits/chosen": -0.9315773248672485,
|
|
"logits/rejected": -0.9519675970077515,
|
|
"logps/chosen": -619.2021484375,
|
|
"logps/ref_chosen": -55.80979537963867,
|
|
"logps/ref_rejected": -106.06282043457031,
|
|
"logps/rejected": -1097.0491943359375,
|
|
"loss": 1.0636,
|
|
"margin_dpo/margin_mean": 427.59405517578125,
|
|
"margin_dpo/margin_std": 530.9572143554688,
|
|
"step": 590
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -604.170654296875,
|
|
"KL/mean": -798.3487548828125,
|
|
"KL/rejected_KL_mean": -992.52685546875,
|
|
"KL/std": 467.26275634765625,
|
|
"epoch": 0.8678414096916299,
|
|
"fcm_dpo/beta": 0.0010215968359261751,
|
|
"fcm_dpo/delta": 0.0033291950821876526,
|
|
"fcm_dpo/margin": 388.35614013671875,
|
|
"fcm_dpo/q_t": 0.40842732787132263,
|
|
"grad_norm": 27.988250732421875,
|
|
"learning_rate": 2.678415274939408e-08,
|
|
"logits/chosen": -0.9853817224502563,
|
|
"logits/rejected": -0.9650702476501465,
|
|
"logps/chosen": -660.4113159179688,
|
|
"logps/ref_chosen": -56.24061965942383,
|
|
"logps/ref_rejected": -83.78629302978516,
|
|
"logps/rejected": -1076.3131103515625,
|
|
"loss": 1.0985,
|
|
"margin_dpo/margin_mean": 388.35614013671875,
|
|
"margin_dpo/margin_std": 533.7264404296875,
|
|
"step": 591
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -623.6812133789062,
|
|
"KL/mean": -797.793212890625,
|
|
"KL/rejected_KL_mean": -971.9052734375,
|
|
"KL/std": 479.56719970703125,
|
|
"epoch": 0.869309838472834,
|
|
"fcm_dpo/beta": 0.0010291507933288813,
|
|
"fcm_dpo/delta": 0.04319499433040619,
|
|
"fcm_dpo/margin": 348.22406005859375,
|
|
"fcm_dpo/q_t": 0.4191494584083557,
|
|
"grad_norm": 25.876665115356445,
|
|
"learning_rate": 2.6209177161234442e-08,
|
|
"logits/chosen": -0.9463398456573486,
|
|
"logits/rejected": -0.9439194798469543,
|
|
"logps/chosen": -671.6214599609375,
|
|
"logps/ref_chosen": -47.94025421142578,
|
|
"logps/ref_rejected": -75.73287963867188,
|
|
"logps/rejected": -1047.63818359375,
|
|
"loss": 1.1678,
|
|
"margin_dpo/margin_mean": 348.22406005859375,
|
|
"margin_dpo/margin_std": 647.0997314453125,
|
|
"step": 592
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -612.51904296875,
|
|
"KL/mean": -746.0147705078125,
|
|
"KL/rejected_KL_mean": -879.5105590820312,
|
|
"KL/std": 498.0760498046875,
|
|
"epoch": 0.8707782672540382,
|
|
"fcm_dpo/beta": 0.0010393188567832112,
|
|
"fcm_dpo/delta": 0.03283300623297691,
|
|
"fcm_dpo/margin": 266.99151611328125,
|
|
"fcm_dpo/q_t": 0.43733033537864685,
|
|
"grad_norm": 34.479095458984375,
|
|
"learning_rate": 2.564009866938349e-08,
|
|
"logits/chosen": -0.8614813089370728,
|
|
"logits/rejected": -0.849102258682251,
|
|
"logps/chosen": -661.2098388671875,
|
|
"logps/ref_chosen": -48.690757751464844,
|
|
"logps/ref_rejected": -60.90800094604492,
|
|
"logps/rejected": -940.4185791015625,
|
|
"loss": 1.2072,
|
|
"margin_dpo/margin_mean": 266.99151611328125,
|
|
"margin_dpo/margin_std": 553.5676879882812,
|
|
"step": 593
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -567.38037109375,
|
|
"KL/mean": -739.0123291015625,
|
|
"KL/rejected_KL_mean": -910.6441650390625,
|
|
"KL/std": 496.5671691894531,
|
|
"epoch": 0.8722466960352423,
|
|
"fcm_dpo/beta": 0.0010341550223529339,
|
|
"fcm_dpo/delta": -0.04993312805891037,
|
|
"fcm_dpo/margin": 343.2637939453125,
|
|
"fcm_dpo/q_t": 0.4182147681713104,
|
|
"grad_norm": 31.0626220703125,
|
|
"learning_rate": 2.5076932269588708e-08,
|
|
"logits/chosen": -0.9724768400192261,
|
|
"logits/rejected": -0.9574602246284485,
|
|
"logps/chosen": -622.3153076171875,
|
|
"logps/ref_chosen": -54.93488693237305,
|
|
"logps/ref_rejected": -86.09967803955078,
|
|
"logps/rejected": -996.743896484375,
|
|
"loss": 1.1371,
|
|
"margin_dpo/margin_mean": 343.2638244628906,
|
|
"margin_dpo/margin_std": 524.0340576171875,
|
|
"step": 594
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -574.682861328125,
|
|
"KL/mean": -762.4730224609375,
|
|
"KL/rejected_KL_mean": -950.2633056640625,
|
|
"KL/std": 482.28497314453125,
|
|
"epoch": 0.8737151248164464,
|
|
"fcm_dpo/beta": 0.0010293896775692701,
|
|
"fcm_dpo/delta": 0.013885049149394035,
|
|
"fcm_dpo/margin": 375.58038330078125,
|
|
"fcm_dpo/q_t": 0.41420266032218933,
|
|
"grad_norm": 31.87458610534668,
|
|
"learning_rate": 2.451969280180849e-08,
|
|
"logits/chosen": -0.928321361541748,
|
|
"logits/rejected": -0.9355098009109497,
|
|
"logps/chosen": -624.103271484375,
|
|
"logps/ref_chosen": -49.4204216003418,
|
|
"logps/ref_rejected": -80.62731170654297,
|
|
"logps/rejected": -1030.890625,
|
|
"loss": 1.1078,
|
|
"margin_dpo/margin_mean": 375.58038330078125,
|
|
"margin_dpo/margin_std": 548.4652099609375,
|
|
"step": 595
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -637.4048461914062,
|
|
"KL/mean": -785.274658203125,
|
|
"KL/rejected_KL_mean": -933.1444091796875,
|
|
"KL/std": 496.126708984375,
|
|
"epoch": 0.8751835535976505,
|
|
"fcm_dpo/beta": 0.0010479073971509933,
|
|
"fcm_dpo/delta": 0.09277448803186417,
|
|
"fcm_dpo/margin": 295.7396240234375,
|
|
"fcm_dpo/q_t": 0.431826651096344,
|
|
"grad_norm": 49.97145080566406,
|
|
"learning_rate": 2.396839494982103e-08,
|
|
"logits/chosen": -0.942278265953064,
|
|
"logits/rejected": -0.9016916155815125,
|
|
"logps/chosen": -697.196533203125,
|
|
"logps/ref_chosen": -59.791683197021484,
|
|
"logps/ref_rejected": -80.09111785888672,
|
|
"logps/rejected": -1013.235595703125,
|
|
"loss": 1.1853,
|
|
"margin_dpo/margin_mean": 295.7396240234375,
|
|
"margin_dpo/margin_std": 582.1591796875,
|
|
"step": 596
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -593.11328125,
|
|
"KL/mean": -829.9117431640625,
|
|
"KL/rejected_KL_mean": -1066.7103271484375,
|
|
"KL/std": 540.2069091796875,
|
|
"epoch": 0.8766519823788547,
|
|
"fcm_dpo/beta": 0.0010269451886415482,
|
|
"fcm_dpo/delta": -0.09347677230834961,
|
|
"fcm_dpo/margin": 473.59698486328125,
|
|
"fcm_dpo/q_t": 0.39144447445869446,
|
|
"grad_norm": 33.3725471496582,
|
|
"learning_rate": 2.3423053240837514e-08,
|
|
"logits/chosen": -0.8926633596420288,
|
|
"logits/rejected": -0.9352051019668579,
|
|
"logps/chosen": -650.3740844726562,
|
|
"logps/ref_chosen": -57.26078796386719,
|
|
"logps/ref_rejected": -100.6937255859375,
|
|
"logps/rejected": -1167.404052734375,
|
|
"loss": 1.0513,
|
|
"margin_dpo/margin_mean": 473.59698486328125,
|
|
"margin_dpo/margin_std": 604.17333984375,
|
|
"step": 597
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -608.8829345703125,
|
|
"KL/mean": -787.3731689453125,
|
|
"KL/rejected_KL_mean": -965.8634033203125,
|
|
"KL/std": 469.7054138183594,
|
|
"epoch": 0.8781204111600588,
|
|
"fcm_dpo/beta": 0.0010294051608070731,
|
|
"fcm_dpo/delta": 0.03294781595468521,
|
|
"fcm_dpo/margin": 356.9803771972656,
|
|
"fcm_dpo/q_t": 0.4142112135887146,
|
|
"grad_norm": 26.78837013244629,
|
|
"learning_rate": 2.2883682045119062e-08,
|
|
"logits/chosen": -0.9758745431900024,
|
|
"logits/rejected": -0.9783375859260559,
|
|
"logps/chosen": -661.4014892578125,
|
|
"logps/ref_chosen": -52.51850509643555,
|
|
"logps/ref_rejected": -89.44385528564453,
|
|
"logps/rejected": -1055.3072509765625,
|
|
"loss": 1.1243,
|
|
"margin_dpo/margin_mean": 356.98040771484375,
|
|
"margin_dpo/margin_std": 516.56591796875,
|
|
"step": 598
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -612.8923950195312,
|
|
"KL/mean": -774.1793823242188,
|
|
"KL/rejected_KL_mean": -935.4663696289062,
|
|
"KL/std": 445.364501953125,
|
|
"epoch": 0.8795888399412628,
|
|
"fcm_dpo/beta": 0.0010303169256076217,
|
|
"fcm_dpo/delta": -0.039691608399152756,
|
|
"fcm_dpo/margin": 322.57403564453125,
|
|
"fcm_dpo/q_t": 0.4193369150161743,
|
|
"grad_norm": 28.11864471435547,
|
|
"learning_rate": 2.2350295575598367e-08,
|
|
"logits/chosen": -0.9431838989257812,
|
|
"logits/rejected": -0.9493868350982666,
|
|
"logps/chosen": -662.695068359375,
|
|
"logps/ref_chosen": -49.802677154541016,
|
|
"logps/ref_rejected": -82.978515625,
|
|
"logps/rejected": -1018.4449462890625,
|
|
"loss": 1.129,
|
|
"margin_dpo/margin_mean": 322.57403564453125,
|
|
"margin_dpo/margin_std": 408.63031005859375,
|
|
"step": 599
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -657.6517333984375,
|
|
"KL/mean": -805.3268432617188,
|
|
"KL/rejected_KL_mean": -953.001953125,
|
|
"KL/std": 460.2043151855469,
|
|
"epoch": 0.8810572687224669,
|
|
"fcm_dpo/beta": 0.001043910626322031,
|
|
"fcm_dpo/delta": 0.09467366337776184,
|
|
"fcm_dpo/margin": 295.3502197265625,
|
|
"fcm_dpo/q_t": 0.4305538535118103,
|
|
"grad_norm": 35.54128646850586,
|
|
"learning_rate": 2.1822907887504932e-08,
|
|
"logits/chosen": -1.0306495428085327,
|
|
"logits/rejected": -1.0190434455871582,
|
|
"logps/chosen": -724.086669921875,
|
|
"logps/ref_chosen": -66.43487548828125,
|
|
"logps/ref_rejected": -85.45649719238281,
|
|
"logps/rejected": -1038.45849609375,
|
|
"loss": 1.1772,
|
|
"margin_dpo/margin_mean": 295.3502197265625,
|
|
"margin_dpo/margin_std": 545.6597290039062,
|
|
"step": 600
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -669.33251953125,
|
|
"KL/mean": -859.396240234375,
|
|
"KL/rejected_KL_mean": -1049.4599609375,
|
|
"KL/std": 509.2434997558594,
|
|
"epoch": 0.882525697503671,
|
|
"fcm_dpo/beta": 0.0010490333661437035,
|
|
"fcm_dpo/delta": 0.0012606056407094002,
|
|
"fcm_dpo/margin": 380.12744140625,
|
|
"fcm_dpo/q_t": 0.40675991773605347,
|
|
"grad_norm": 29.669403076171875,
|
|
"learning_rate": 2.1301532877994742e-08,
|
|
"logits/chosen": -0.9606672525405884,
|
|
"logits/rejected": -0.9689534306526184,
|
|
"logps/chosen": -728.4661254882812,
|
|
"logps/ref_chosen": -59.13361358642578,
|
|
"logps/ref_rejected": -94.69093322753906,
|
|
"logps/rejected": -1144.15087890625,
|
|
"loss": 1.088,
|
|
"margin_dpo/margin_mean": 380.12744140625,
|
|
"margin_dpo/margin_std": 485.5259704589844,
|
|
"step": 601
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -479.5604248046875,
|
|
"KL/mean": -705.6307373046875,
|
|
"KL/rejected_KL_mean": -931.701171875,
|
|
"KL/std": 458.986328125,
|
|
"epoch": 0.8839941262848752,
|
|
"fcm_dpo/beta": 0.001045595621690154,
|
|
"fcm_dpo/delta": -0.07681306451559067,
|
|
"fcm_dpo/margin": 452.1407470703125,
|
|
"fcm_dpo/q_t": 0.39179527759552,
|
|
"grad_norm": 48.49148178100586,
|
|
"learning_rate": 2.0786184285784298e-08,
|
|
"logits/chosen": -0.9890528917312622,
|
|
"logits/rejected": -1.0126900672912598,
|
|
"logps/chosen": -528.1539306640625,
|
|
"logps/ref_chosen": -48.59352111816406,
|
|
"logps/ref_rejected": -87.6685562133789,
|
|
"logps/rejected": -1019.3697509765625,
|
|
"loss": 1.0357,
|
|
"margin_dpo/margin_mean": 452.1407470703125,
|
|
"margin_dpo/margin_std": 503.97027587890625,
|
|
"step": 602
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -562.6729125976562,
|
|
"KL/mean": -779.9182739257812,
|
|
"KL/rejected_KL_mean": -997.16357421875,
|
|
"KL/std": 511.3848876953125,
|
|
"epoch": 0.8854625550660793,
|
|
"fcm_dpo/beta": 0.0010269982740283012,
|
|
"fcm_dpo/delta": -0.04837334156036377,
|
|
"fcm_dpo/margin": 434.49066162109375,
|
|
"fcm_dpo/q_t": 0.40095192193984985,
|
|
"grad_norm": 47.26500701904297,
|
|
"learning_rate": 2.0276875690788204e-08,
|
|
"logits/chosen": -0.9793489575386047,
|
|
"logits/rejected": -0.9629038572311401,
|
|
"logps/chosen": -633.0875244140625,
|
|
"logps/ref_chosen": -70.41461944580078,
|
|
"logps/ref_rejected": -100.32559967041016,
|
|
"logps/rejected": -1097.4892578125,
|
|
"loss": 1.0773,
|
|
"margin_dpo/margin_mean": 434.49066162109375,
|
|
"margin_dpo/margin_std": 614.062744140625,
|
|
"step": 603
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -594.250244140625,
|
|
"KL/mean": -817.974853515625,
|
|
"KL/rejected_KL_mean": -1041.699462890625,
|
|
"KL/std": 504.0755615234375,
|
|
"epoch": 0.8869309838472834,
|
|
"fcm_dpo/beta": 0.0010149029549211264,
|
|
"fcm_dpo/delta": -0.056748565286397934,
|
|
"fcm_dpo/margin": 447.44915771484375,
|
|
"fcm_dpo/q_t": 0.39813223481178284,
|
|
"grad_norm": 33.27207565307617,
|
|
"learning_rate": 1.977362051376158e-08,
|
|
"logits/chosen": -0.9312797784805298,
|
|
"logits/rejected": -0.957409143447876,
|
|
"logps/chosen": -640.7083129882812,
|
|
"logps/ref_chosen": -46.45808029174805,
|
|
"logps/ref_rejected": -91.8544921875,
|
|
"logps/rejected": -1133.553955078125,
|
|
"loss": 1.075,
|
|
"margin_dpo/margin_mean": 447.44915771484375,
|
|
"margin_dpo/margin_std": 625.2024536132812,
|
|
"step": 604
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -613.604248046875,
|
|
"KL/mean": -800.700927734375,
|
|
"KL/rejected_KL_mean": -987.7977294921875,
|
|
"KL/std": 475.6051025390625,
|
|
"epoch": 0.8883994126284875,
|
|
"fcm_dpo/beta": 0.0010130970040336251,
|
|
"fcm_dpo/delta": 0.021622149273753166,
|
|
"fcm_dpo/margin": 374.1934509277344,
|
|
"fcm_dpo/q_t": 0.4142388701438904,
|
|
"grad_norm": 35.06957244873047,
|
|
"learning_rate": 1.9276432015946446e-08,
|
|
"logits/chosen": -0.926941990852356,
|
|
"logits/rejected": -0.9346826672554016,
|
|
"logps/chosen": -679.8536376953125,
|
|
"logps/ref_chosen": -66.24933624267578,
|
|
"logps/ref_rejected": -102.30496978759766,
|
|
"logps/rejected": -1090.1026611328125,
|
|
"loss": 1.1146,
|
|
"margin_dpo/margin_mean": 374.19342041015625,
|
|
"margin_dpo/margin_std": 554.1260986328125,
|
|
"step": 605
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -615.2284545898438,
|
|
"KL/mean": -821.0089111328125,
|
|
"KL/rejected_KL_mean": -1026.7894287109375,
|
|
"KL/std": 506.0755615234375,
|
|
"epoch": 0.8898678414096917,
|
|
"fcm_dpo/beta": 0.001017784932628274,
|
|
"fcm_dpo/delta": -0.01989796943962574,
|
|
"fcm_dpo/margin": 411.5608825683594,
|
|
"fcm_dpo/q_t": 0.4054357707500458,
|
|
"grad_norm": 25.421344757080078,
|
|
"learning_rate": 1.8785323298722093e-08,
|
|
"logits/chosen": -0.9341834783554077,
|
|
"logits/rejected": -0.9450877904891968,
|
|
"logps/chosen": -670.047607421875,
|
|
"logps/ref_chosen": -54.819122314453125,
|
|
"logps/ref_rejected": -98.37146759033203,
|
|
"logps/rejected": -1125.160888671875,
|
|
"loss": 1.0817,
|
|
"margin_dpo/margin_mean": 411.5608825683594,
|
|
"margin_dpo/margin_std": 548.4053955078125,
|
|
"step": 606
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -652.0457763671875,
|
|
"KL/mean": -803.5496826171875,
|
|
"KL/rejected_KL_mean": -955.053466796875,
|
|
"KL/std": 466.5636291503906,
|
|
"epoch": 0.8913362701908958,
|
|
"fcm_dpo/beta": 0.0010304426541551948,
|
|
"fcm_dpo/delta": 0.09002204239368439,
|
|
"fcm_dpo/margin": 303.0076904296875,
|
|
"fcm_dpo/q_t": 0.4281555414199829,
|
|
"grad_norm": 29.748842239379883,
|
|
"learning_rate": 1.8300307303259904e-08,
|
|
"logits/chosen": -0.9252548217773438,
|
|
"logits/rejected": -0.9004162549972534,
|
|
"logps/chosen": -710.1298828125,
|
|
"logps/ref_chosen": -58.08403778076172,
|
|
"logps/ref_rejected": -79.777099609375,
|
|
"logps/rejected": -1034.83056640625,
|
|
"loss": 1.1683,
|
|
"margin_dpo/margin_mean": 303.0076904296875,
|
|
"margin_dpo/margin_std": 525.4556274414062,
|
|
"step": 607
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -589.3780517578125,
|
|
"KL/mean": -781.2340698242188,
|
|
"KL/rejected_KL_mean": -973.090087890625,
|
|
"KL/std": 448.39385986328125,
|
|
"epoch": 0.8928046989720999,
|
|
"fcm_dpo/beta": 0.001034360844641924,
|
|
"fcm_dpo/delta": 0.0031163040548563004,
|
|
"fcm_dpo/margin": 383.7120056152344,
|
|
"fcm_dpo/q_t": 0.4076157808303833,
|
|
"grad_norm": 38.95619583129883,
|
|
"learning_rate": 1.7821396810182437e-08,
|
|
"logits/chosen": -0.9842853546142578,
|
|
"logits/rejected": -0.9917802810668945,
|
|
"logps/chosen": -646.8289184570312,
|
|
"logps/ref_chosen": -57.450836181640625,
|
|
"logps/ref_rejected": -94.77339172363281,
|
|
"logps/rejected": -1067.863525390625,
|
|
"loss": 1.0818,
|
|
"margin_dpo/margin_mean": 383.71197509765625,
|
|
"margin_dpo/margin_std": 460.55084228515625,
|
|
"step": 608
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -609.8814697265625,
|
|
"KL/mean": -836.5782470703125,
|
|
"KL/rejected_KL_mean": -1063.2750244140625,
|
|
"KL/std": 583.9754638671875,
|
|
"epoch": 0.8942731277533039,
|
|
"fcm_dpo/beta": 0.0010232683271169662,
|
|
"fcm_dpo/delta": -0.06700144708156586,
|
|
"fcm_dpo/margin": 453.39361572265625,
|
|
"fcm_dpo/q_t": 0.3996211588382721,
|
|
"grad_norm": 29.039478302001953,
|
|
"learning_rate": 1.7348604439226617e-08,
|
|
"logits/chosen": -1.0189104080200195,
|
|
"logits/rejected": -1.0311899185180664,
|
|
"logps/chosen": -668.6868896484375,
|
|
"logps/ref_chosen": -58.805355072021484,
|
|
"logps/ref_rejected": -88.81600952148438,
|
|
"logps/rejected": -1152.091064453125,
|
|
"loss": 1.0751,
|
|
"margin_dpo/margin_mean": 453.39361572265625,
|
|
"margin_dpo/margin_std": 680.756103515625,
|
|
"step": 609
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -615.740966796875,
|
|
"KL/mean": -764.8184814453125,
|
|
"KL/rejected_KL_mean": -913.8959350585938,
|
|
"KL/std": 459.981201171875,
|
|
"epoch": 0.895741556534508,
|
|
"fcm_dpo/beta": 0.0010335487313568592,
|
|
"fcm_dpo/delta": 0.09469583630561829,
|
|
"fcm_dpo/margin": 298.15496826171875,
|
|
"fcm_dpo/q_t": 0.42828184366226196,
|
|
"grad_norm": 47.39312744140625,
|
|
"learning_rate": 1.6881942648911074e-08,
|
|
"logits/chosen": -0.9286304712295532,
|
|
"logits/rejected": -0.8889775276184082,
|
|
"logps/chosen": -681.43603515625,
|
|
"logps/ref_chosen": -65.69503784179688,
|
|
"logps/ref_rejected": -83.40538787841797,
|
|
"logps/rejected": -997.3013305664062,
|
|
"loss": 1.1705,
|
|
"margin_dpo/margin_mean": 298.15496826171875,
|
|
"margin_dpo/margin_std": 523.1708984375,
|
|
"step": 610
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -617.3861694335938,
|
|
"KL/mean": -879.7110595703125,
|
|
"KL/rejected_KL_mean": -1142.035888671875,
|
|
"KL/std": 586.503173828125,
|
|
"epoch": 0.8972099853157122,
|
|
"fcm_dpo/beta": 0.001017481554299593,
|
|
"fcm_dpo/delta": -0.14123646914958954,
|
|
"fcm_dpo/margin": 524.6498413085938,
|
|
"fcm_dpo/q_t": 0.38256320357322693,
|
|
"grad_norm": 33.80018615722656,
|
|
"learning_rate": 1.6421423736208e-08,
|
|
"logits/chosen": -0.9945396184921265,
|
|
"logits/rejected": -1.0387227535247803,
|
|
"logps/chosen": -669.985595703125,
|
|
"logps/ref_chosen": -52.59946823120117,
|
|
"logps/ref_rejected": -86.33099365234375,
|
|
"logps/rejected": -1228.366943359375,
|
|
"loss": 1.026,
|
|
"margin_dpo/margin_mean": 524.6498413085938,
|
|
"margin_dpo/margin_std": 674.8773193359375,
|
|
"step": 611
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -683.014892578125,
|
|
"KL/mean": -888.0888671875,
|
|
"KL/rejected_KL_mean": -1093.162841796875,
|
|
"KL/std": 499.14178466796875,
|
|
"epoch": 0.8986784140969163,
|
|
"fcm_dpo/beta": 0.001010039821267128,
|
|
"fcm_dpo/delta": -0.015054378658533096,
|
|
"fcm_dpo/margin": 410.1479797363281,
|
|
"fcm_dpo/q_t": 0.40539655089378357,
|
|
"grad_norm": 24.813846588134766,
|
|
"learning_rate": 1.5967059836219042e-08,
|
|
"logits/chosen": -1.0098485946655273,
|
|
"logits/rejected": -1.000733733177185,
|
|
"logps/chosen": -742.338623046875,
|
|
"logps/ref_chosen": -59.32372283935547,
|
|
"logps/ref_rejected": -88.31239318847656,
|
|
"logps/rejected": -1181.475341796875,
|
|
"loss": 1.0831,
|
|
"margin_dpo/margin_mean": 410.1479797363281,
|
|
"margin_dpo/margin_std": 539.570556640625,
|
|
"step": 612
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -591.58203125,
|
|
"KL/mean": -833.6363525390625,
|
|
"KL/rejected_KL_mean": -1075.690673828125,
|
|
"KL/std": 527.3922119140625,
|
|
"epoch": 0.9001468428781204,
|
|
"fcm_dpo/beta": 0.0009931058157235384,
|
|
"fcm_dpo/delta": -0.08487021923065186,
|
|
"fcm_dpo/margin": 484.1087646484375,
|
|
"fcm_dpo/q_t": 0.38921403884887695,
|
|
"grad_norm": 35.546573638916016,
|
|
"learning_rate": 1.551886292185553e-08,
|
|
"logits/chosen": -1.0186982154846191,
|
|
"logits/rejected": -1.0692498683929443,
|
|
"logps/chosen": -651.31201171875,
|
|
"logps/ref_chosen": -59.72996520996094,
|
|
"logps/ref_rejected": -105.10752868652344,
|
|
"logps/rejected": -1180.79833984375,
|
|
"loss": 1.0269,
|
|
"margin_dpo/margin_mean": 484.1087646484375,
|
|
"margin_dpo/margin_std": 536.681884765625,
|
|
"step": 613
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -657.602294921875,
|
|
"KL/mean": -874.024169921875,
|
|
"KL/rejected_KL_mean": -1090.4461669921875,
|
|
"KL/std": 522.123046875,
|
|
"epoch": 0.9016152716593245,
|
|
"fcm_dpo/beta": 0.0009876348776742816,
|
|
"fcm_dpo/delta": -0.028807081282138824,
|
|
"fcm_dpo/margin": 432.84381103515625,
|
|
"fcm_dpo/q_t": 0.4031580984592438,
|
|
"grad_norm": 34.93425750732422,
|
|
"learning_rate": 1.507684480352292e-08,
|
|
"logits/chosen": -0.9571743011474609,
|
|
"logits/rejected": -1.0247644186019897,
|
|
"logps/chosen": -710.541259765625,
|
|
"logps/ref_chosen": -52.93898010253906,
|
|
"logps/ref_rejected": -104.67938232421875,
|
|
"logps/rejected": -1195.12548828125,
|
|
"loss": 1.0948,
|
|
"margin_dpo/margin_mean": 432.84381103515625,
|
|
"margin_dpo/margin_std": 632.5867919921875,
|
|
"step": 614
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -625.9893798828125,
|
|
"KL/mean": -813.37548828125,
|
|
"KL/rejected_KL_mean": -1000.7615966796875,
|
|
"KL/std": 563.71142578125,
|
|
"epoch": 0.9030837004405287,
|
|
"fcm_dpo/beta": 0.000991692766547203,
|
|
"fcm_dpo/delta": 0.02896309643983841,
|
|
"fcm_dpo/margin": 374.772216796875,
|
|
"fcm_dpo/q_t": 0.41585174202919006,
|
|
"grad_norm": 25.01183319091797,
|
|
"learning_rate": 1.4641017128809801e-08,
|
|
"logits/chosen": -0.9686431884765625,
|
|
"logits/rejected": -0.9801833033561707,
|
|
"logps/chosen": -691.8067016601562,
|
|
"logps/ref_chosen": -65.81727600097656,
|
|
"logps/ref_rejected": -95.17749786376953,
|
|
"logps/rejected": -1095.939208984375,
|
|
"loss": 1.1292,
|
|
"margin_dpo/margin_mean": 374.772216796875,
|
|
"margin_dpo/margin_std": 597.46826171875,
|
|
"step": 615
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -744.90234375,
|
|
"KL/mean": -902.1414184570312,
|
|
"KL/rejected_KL_mean": -1059.3804931640625,
|
|
"KL/std": 491.1352233886719,
|
|
"epoch": 0.9045521292217328,
|
|
"fcm_dpo/beta": 0.0010039603803306818,
|
|
"fcm_dpo/delta": 0.08676433563232422,
|
|
"fcm_dpo/margin": 314.47821044921875,
|
|
"fcm_dpo/q_t": 0.42835602164268494,
|
|
"grad_norm": 39.332359313964844,
|
|
"learning_rate": 1.4211391382180637e-08,
|
|
"logits/chosen": -1.029462218284607,
|
|
"logits/rejected": -1.003951907157898,
|
|
"logps/chosen": -810.03515625,
|
|
"logps/ref_chosen": -65.13285827636719,
|
|
"logps/ref_rejected": -74.70050048828125,
|
|
"logps/rejected": -1134.0810546875,
|
|
"loss": 1.1674,
|
|
"margin_dpo/margin_mean": 314.478271484375,
|
|
"margin_dpo/margin_std": 547.7131958007812,
|
|
"step": 616
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -681.607666015625,
|
|
"KL/mean": -797.3641967773438,
|
|
"KL/rejected_KL_mean": -913.1207275390625,
|
|
"KL/std": 436.149169921875,
|
|
"epoch": 0.9060205580029369,
|
|
"fcm_dpo/beta": 0.001032671658322215,
|
|
"fcm_dpo/delta": 0.16463825106620789,
|
|
"fcm_dpo/margin": 231.5129852294922,
|
|
"fcm_dpo/q_t": 0.44537049531936646,
|
|
"grad_norm": 54.0463981628418,
|
|
"learning_rate": 1.378797888467345e-08,
|
|
"logits/chosen": -0.9412636756896973,
|
|
"logits/rejected": -0.8970128297805786,
|
|
"logps/chosen": -744.61328125,
|
|
"logps/ref_chosen": -63.005550384521484,
|
|
"logps/ref_rejected": -64.234130859375,
|
|
"logps/rejected": -977.3547973632812,
|
|
"loss": 1.2256,
|
|
"margin_dpo/margin_mean": 231.51300048828125,
|
|
"margin_dpo/margin_std": 500.28045654296875,
|
|
"step": 617
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -718.72119140625,
|
|
"KL/mean": -963.6566772460938,
|
|
"KL/rejected_KL_mean": -1208.59228515625,
|
|
"KL/std": 597.5335693359375,
|
|
"epoch": 0.9074889867841409,
|
|
"fcm_dpo/beta": 0.0010278007248416543,
|
|
"fcm_dpo/delta": -0.1088884249329567,
|
|
"fcm_dpo/margin": 489.87103271484375,
|
|
"fcm_dpo/q_t": 0.39031103253364563,
|
|
"grad_norm": 36.161380767822266,
|
|
"learning_rate": 1.3370790793601371e-08,
|
|
"logits/chosen": -0.9905341267585754,
|
|
"logits/rejected": -1.0219985246658325,
|
|
"logps/chosen": -785.822509765625,
|
|
"logps/ref_chosen": -67.10134887695312,
|
|
"logps/ref_rejected": -92.15340423583984,
|
|
"logps/rejected": -1300.74560546875,
|
|
"loss": 1.0778,
|
|
"margin_dpo/margin_mean": 489.87103271484375,
|
|
"margin_dpo/margin_std": 753.9830322265625,
|
|
"step": 618
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -694.73486328125,
|
|
"KL/mean": -886.2698974609375,
|
|
"KL/rejected_KL_mean": -1077.804931640625,
|
|
"KL/std": 564.9559326171875,
|
|
"epoch": 0.908957415565345,
|
|
"fcm_dpo/beta": 0.0010191791225224733,
|
|
"fcm_dpo/delta": 0.00997202843427658,
|
|
"fcm_dpo/margin": 383.0700378417969,
|
|
"fcm_dpo/q_t": 0.4178283214569092,
|
|
"grad_norm": 38.189334869384766,
|
|
"learning_rate": 1.2959838102258535e-08,
|
|
"logits/chosen": -0.9747885465621948,
|
|
"logits/rejected": -0.9790507555007935,
|
|
"logps/chosen": -750.713134765625,
|
|
"logps/ref_chosen": -55.978233337402344,
|
|
"logps/ref_rejected": -93.1854019165039,
|
|
"logps/rejected": -1170.990234375,
|
|
"loss": 1.1572,
|
|
"margin_dpo/margin_mean": 383.070068359375,
|
|
"margin_dpo/margin_std": 730.9571533203125,
|
|
"step": 619
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -647.696044921875,
|
|
"KL/mean": -829.687255859375,
|
|
"KL/rejected_KL_mean": -1011.6784057617188,
|
|
"KL/std": 487.70135498046875,
|
|
"epoch": 0.9104258443465492,
|
|
"fcm_dpo/beta": 0.0010204799473285675,
|
|
"fcm_dpo/delta": 0.029505692422389984,
|
|
"fcm_dpo/margin": 363.9824523925781,
|
|
"fcm_dpo/q_t": 0.41603296995162964,
|
|
"grad_norm": 30.40621566772461,
|
|
"learning_rate": 1.2555131639630567e-08,
|
|
"logits/chosen": -1.0277998447418213,
|
|
"logits/rejected": -1.0267902612686157,
|
|
"logps/chosen": -707.4935302734375,
|
|
"logps/ref_chosen": -59.79750061035156,
|
|
"logps/ref_rejected": -78.41075134277344,
|
|
"logps/rejected": -1090.089111328125,
|
|
"loss": 1.1238,
|
|
"margin_dpo/margin_mean": 363.982421875,
|
|
"margin_dpo/margin_std": 553.198974609375,
|
|
"step": 620
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -630.0430908203125,
|
|
"KL/mean": -906.111083984375,
|
|
"KL/rejected_KL_mean": -1182.178955078125,
|
|
"KL/std": 607.4686279296875,
|
|
"epoch": 0.9118942731277533,
|
|
"fcm_dpo/beta": 0.001008342718705535,
|
|
"fcm_dpo/delta": -0.16588960587978363,
|
|
"fcm_dpo/margin": 552.135986328125,
|
|
"fcm_dpo/q_t": 0.37482962012290955,
|
|
"grad_norm": 46.220027923583984,
|
|
"learning_rate": 1.2156682070109086e-08,
|
|
"logits/chosen": -1.044553518295288,
|
|
"logits/rejected": -1.0921435356140137,
|
|
"logps/chosen": -683.976806640625,
|
|
"logps/ref_chosen": -53.93375778198242,
|
|
"logps/ref_rejected": -88.36951446533203,
|
|
"logps/rejected": -1270.548583984375,
|
|
"loss": 1.0065,
|
|
"margin_dpo/margin_mean": 552.135986328125,
|
|
"margin_dpo/margin_std": 646.8200073242188,
|
|
"step": 621
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -638.1873779296875,
|
|
"KL/mean": -831.6390380859375,
|
|
"KL/rejected_KL_mean": -1025.090576171875,
|
|
"KL/std": 474.5973815917969,
|
|
"epoch": 0.9133627019089574,
|
|
"fcm_dpo/beta": 0.0009906619088724256,
|
|
"fcm_dpo/delta": 0.01709701120853424,
|
|
"fcm_dpo/margin": 386.9031677246094,
|
|
"fcm_dpo/q_t": 0.4140721559524536,
|
|
"grad_norm": 29.612194061279297,
|
|
"learning_rate": 1.1764499893210878e-08,
|
|
"logits/chosen": -0.9218890070915222,
|
|
"logits/rejected": -0.8999383449554443,
|
|
"logps/chosen": -698.4732666015625,
|
|
"logps/ref_chosen": -60.28582000732422,
|
|
"logps/ref_rejected": -85.51873779296875,
|
|
"logps/rejected": -1110.609375,
|
|
"loss": 1.1149,
|
|
"margin_dpo/margin_mean": 386.90313720703125,
|
|
"margin_dpo/margin_std": 587.5882568359375,
|
|
"step": 622
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -709.4207153320312,
|
|
"KL/mean": -861.94287109375,
|
|
"KL/rejected_KL_mean": -1014.465087890625,
|
|
"KL/std": 493.4505920410156,
|
|
"epoch": 0.9148311306901615,
|
|
"fcm_dpo/beta": 0.0010092295706272125,
|
|
"fcm_dpo/delta": 0.09511934220790863,
|
|
"fcm_dpo/margin": 305.0443420410156,
|
|
"fcm_dpo/q_t": 0.431568443775177,
|
|
"grad_norm": 41.82392883300781,
|
|
"learning_rate": 1.1378595443300998e-08,
|
|
"logits/chosen": -1.078977346420288,
|
|
"logits/rejected": -1.0663626194000244,
|
|
"logps/chosen": -773.57763671875,
|
|
"logps/ref_chosen": -64.1569595336914,
|
|
"logps/ref_rejected": -85.08304595947266,
|
|
"logps/rejected": -1099.548095703125,
|
|
"loss": 1.1834,
|
|
"margin_dpo/margin_mean": 305.0443115234375,
|
|
"margin_dpo/margin_std": 590.980224609375,
|
|
"step": 623
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -691.134033203125,
|
|
"KL/mean": -919.0125122070312,
|
|
"KL/rejected_KL_mean": -1146.890869140625,
|
|
"KL/std": 508.752685546875,
|
|
"epoch": 0.9162995594713657,
|
|
"fcm_dpo/beta": 0.0010058375773951411,
|
|
"fcm_dpo/delta": -0.06120828539133072,
|
|
"fcm_dpo/margin": 455.75689697265625,
|
|
"fcm_dpo/q_t": 0.39310479164123535,
|
|
"grad_norm": 32.26824951171875,
|
|
"learning_rate": 1.0998978889320582e-08,
|
|
"logits/chosen": -1.066502332687378,
|
|
"logits/rejected": -1.0555529594421387,
|
|
"logps/chosen": -763.052734375,
|
|
"logps/ref_chosen": -71.91862487792969,
|
|
"logps/ref_rejected": -97.13203430175781,
|
|
"logps/rejected": -1244.02294921875,
|
|
"loss": 1.0517,
|
|
"margin_dpo/margin_mean": 455.75689697265625,
|
|
"margin_dpo/margin_std": 550.7293701171875,
|
|
"step": 624
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -651.8470458984375,
|
|
"KL/mean": -884.4716796875,
|
|
"KL/rejected_KL_mean": -1117.0963134765625,
|
|
"KL/std": 530.2442626953125,
|
|
"epoch": 0.9177679882525698,
|
|
"fcm_dpo/beta": 0.0009930902160704136,
|
|
"fcm_dpo/delta": -0.06502003967761993,
|
|
"fcm_dpo/margin": 465.24932861328125,
|
|
"fcm_dpo/q_t": 0.394126296043396,
|
|
"grad_norm": 47.13850402832031,
|
|
"learning_rate": 1.0625660234518913e-08,
|
|
"logits/chosen": -0.9659937024116516,
|
|
"logits/rejected": -0.9809095859527588,
|
|
"logps/chosen": -710.1890869140625,
|
|
"logps/ref_chosen": -58.342071533203125,
|
|
"logps/ref_rejected": -86.09038543701172,
|
|
"logps/rejected": -1203.186767578125,
|
|
"loss": 1.0347,
|
|
"margin_dpo/margin_mean": 465.24932861328125,
|
|
"margin_dpo/margin_std": 516.6107788085938,
|
|
"step": 625
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -775.7965087890625,
|
|
"KL/mean": -922.9110107421875,
|
|
"KL/rejected_KL_mean": -1070.0255126953125,
|
|
"KL/std": 596.8397216796875,
|
|
"epoch": 0.9192364170337739,
|
|
"fcm_dpo/beta": 0.0010072626173496246,
|
|
"fcm_dpo/delta": 0.10651648044586182,
|
|
"fcm_dpo/margin": 294.22900390625,
|
|
"fcm_dpo/q_t": 0.4323081970214844,
|
|
"grad_norm": 27.3458251953125,
|
|
"learning_rate": 1.0258649316189721e-08,
|
|
"logits/chosen": -0.9275539517402649,
|
|
"logits/rejected": -0.9108865261077881,
|
|
"logps/chosen": -850.9091796875,
|
|
"logps/ref_chosen": -75.11260986328125,
|
|
"logps/ref_rejected": -99.188720703125,
|
|
"logps/rejected": -1169.2142333984375,
|
|
"loss": 1.1941,
|
|
"margin_dpo/margin_mean": 294.22900390625,
|
|
"margin_dpo/margin_std": 575.285400390625,
|
|
"step": 626
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -557.2504272460938,
|
|
"KL/mean": -842.9886474609375,
|
|
"KL/rejected_KL_mean": -1128.726806640625,
|
|
"KL/std": 656.2432250976562,
|
|
"epoch": 0.920704845814978,
|
|
"fcm_dpo/beta": 0.0009910902008414268,
|
|
"fcm_dpo/delta": -0.176089346408844,
|
|
"fcm_dpo/margin": 571.476318359375,
|
|
"fcm_dpo/q_t": 0.3810996115207672,
|
|
"grad_norm": 26.04566192626953,
|
|
"learning_rate": 9.897955805412e-09,
|
|
"logits/chosen": -0.8594233989715576,
|
|
"logits/rejected": -0.9377764463424683,
|
|
"logps/chosen": -604.9935913085938,
|
|
"logps/ref_chosen": -47.74314880371094,
|
|
"logps/ref_rejected": -106.75448608398438,
|
|
"logps/rejected": -1235.481201171875,
|
|
"loss": 1.0171,
|
|
"margin_dpo/margin_mean": 571.476318359375,
|
|
"margin_dpo/margin_std": 751.080810546875,
|
|
"step": 627
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -722.8740844726562,
|
|
"KL/mean": -924.2254638671875,
|
|
"KL/rejected_KL_mean": -1125.5770263671875,
|
|
"KL/std": 539.3365478515625,
|
|
"epoch": 0.922173274596182,
|
|
"fcm_dpo/beta": 0.0009758264059200883,
|
|
"fcm_dpo/delta": 0.007305025588721037,
|
|
"fcm_dpo/margin": 402.702880859375,
|
|
"fcm_dpo/q_t": 0.41142743825912476,
|
|
"grad_norm": 27.978281021118164,
|
|
"learning_rate": 9.543589206795238e-09,
|
|
"logits/chosen": -1.021456003189087,
|
|
"logits/rejected": -1.0348306894302368,
|
|
"logps/chosen": -783.0570068359375,
|
|
"logps/ref_chosen": -60.182945251464844,
|
|
"logps/ref_rejected": -101.55467224121094,
|
|
"logps/rejected": -1227.131591796875,
|
|
"loss": 1.113,
|
|
"margin_dpo/margin_mean": 402.702880859375,
|
|
"margin_dpo/margin_std": 607.0352783203125,
|
|
"step": 628
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -697.99755859375,
|
|
"KL/mean": -897.359375,
|
|
"KL/rejected_KL_mean": -1096.72119140625,
|
|
"KL/std": 534.4760131835938,
|
|
"epoch": 0.9236417033773862,
|
|
"fcm_dpo/beta": 0.0009785511065274477,
|
|
"fcm_dpo/delta": 0.010228663682937622,
|
|
"fcm_dpo/margin": 398.7236328125,
|
|
"fcm_dpo/q_t": 0.4093359112739563,
|
|
"grad_norm": 31.393468856811523,
|
|
"learning_rate": 9.19555885822887e-09,
|
|
"logits/chosen": -1.0401864051818848,
|
|
"logits/rejected": -1.0507943630218506,
|
|
"logps/chosen": -762.2110595703125,
|
|
"logps/ref_chosen": -64.21354675292969,
|
|
"logps/ref_rejected": -91.65367126464844,
|
|
"logps/rejected": -1188.374755859375,
|
|
"loss": 1.0973,
|
|
"margin_dpo/margin_mean": 398.7236328125,
|
|
"margin_dpo/margin_std": 530.4059448242188,
|
|
"step": 629
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -634.3704833984375,
|
|
"KL/mean": -745.990478515625,
|
|
"KL/rejected_KL_mean": -857.6103515625,
|
|
"KL/std": 511.8554382324219,
|
|
"epoch": 0.9251101321585903,
|
|
"fcm_dpo/beta": 0.000984629150480032,
|
|
"fcm_dpo/delta": 0.05507725104689598,
|
|
"fcm_dpo/margin": 223.2398681640625,
|
|
"fcm_dpo/q_t": 0.4526089131832123,
|
|
"grad_norm": 44.8847541809082,
|
|
"learning_rate": 8.85387393063622e-09,
|
|
"logits/chosen": -0.9998958706855774,
|
|
"logits/rejected": -0.9623087644577026,
|
|
"logps/chosen": -693.6614990234375,
|
|
"logps/ref_chosen": -59.29100036621094,
|
|
"logps/ref_rejected": -83.59829711914062,
|
|
"logps/rejected": -941.2086791992188,
|
|
"loss": 1.2595,
|
|
"margin_dpo/margin_mean": 223.2398681640625,
|
|
"margin_dpo/margin_std": 596.5005493164062,
|
|
"step": 630
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -742.057861328125,
|
|
"KL/mean": -920.331298828125,
|
|
"KL/rejected_KL_mean": -1098.604736328125,
|
|
"KL/std": 551.6812133789062,
|
|
"epoch": 0.9265785609397944,
|
|
"fcm_dpo/beta": 0.0009971531108021736,
|
|
"fcm_dpo/delta": 0.04609350860118866,
|
|
"fcm_dpo/margin": 356.5467529296875,
|
|
"fcm_dpo/q_t": 0.4193703532218933,
|
|
"grad_norm": 27.521265029907227,
|
|
"learning_rate": 8.518543427732949e-09,
|
|
"logits/chosen": -1.0666249990463257,
|
|
"logits/rejected": -1.0676807165145874,
|
|
"logps/chosen": -801.511474609375,
|
|
"logps/ref_chosen": -59.45360565185547,
|
|
"logps/ref_rejected": -80.95156860351562,
|
|
"logps/rejected": -1179.5562744140625,
|
|
"loss": 1.1545,
|
|
"margin_dpo/margin_mean": 356.5467529296875,
|
|
"margin_dpo/margin_std": 633.4856567382812,
|
|
"step": 631
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -661.1845092773438,
|
|
"KL/mean": -845.2674560546875,
|
|
"KL/rejected_KL_mean": -1029.350341796875,
|
|
"KL/std": 465.203369140625,
|
|
"epoch": 0.9280469897209985,
|
|
"fcm_dpo/beta": 0.0009988134261220694,
|
|
"fcm_dpo/delta": 0.03309358283877373,
|
|
"fcm_dpo/margin": 368.1659240722656,
|
|
"fcm_dpo/q_t": 0.4161521792411804,
|
|
"grad_norm": 31.457181930541992,
|
|
"learning_rate": 8.189576185789637e-09,
|
|
"logits/chosen": -1.0178093910217285,
|
|
"logits/rejected": -1.0131831169128418,
|
|
"logps/chosen": -722.5360717773438,
|
|
"logps/ref_chosen": -61.35155487060547,
|
|
"logps/ref_rejected": -86.16017150878906,
|
|
"logps/rejected": -1115.510498046875,
|
|
"loss": 1.1364,
|
|
"margin_dpo/margin_mean": 368.16595458984375,
|
|
"margin_dpo/margin_std": 587.306884765625,
|
|
"step": 632
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -722.002685546875,
|
|
"KL/mean": -844.985107421875,
|
|
"KL/rejected_KL_mean": -967.967529296875,
|
|
"KL/std": 474.93060302734375,
|
|
"epoch": 0.9295154185022027,
|
|
"fcm_dpo/beta": 0.0010103812674060464,
|
|
"fcm_dpo/delta": 0.04477893188595772,
|
|
"fcm_dpo/margin": 245.96484375,
|
|
"fcm_dpo/q_t": 0.44230562448501587,
|
|
"grad_norm": 33.3397102355957,
|
|
"learning_rate": 7.866980873399015e-09,
|
|
"logits/chosen": -1.0611484050750732,
|
|
"logits/rejected": -1.0687685012817383,
|
|
"logps/chosen": -779.2808837890625,
|
|
"logps/ref_chosen": -57.27816390991211,
|
|
"logps/ref_rejected": -91.58395385742188,
|
|
"logps/rejected": -1059.551513671875,
|
|
"loss": 1.2266,
|
|
"margin_dpo/margin_mean": 245.96484375,
|
|
"margin_dpo/margin_std": 546.748046875,
|
|
"step": 633
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -794.5701904296875,
|
|
"KL/mean": -933.633544921875,
|
|
"KL/rejected_KL_mean": -1072.697021484375,
|
|
"KL/std": 556.5794067382812,
|
|
"epoch": 0.9309838472834068,
|
|
"fcm_dpo/beta": 0.0010201697004958987,
|
|
"fcm_dpo/delta": 0.025866778567433357,
|
|
"fcm_dpo/margin": 278.1268615722656,
|
|
"fcm_dpo/q_t": 0.43637216091156006,
|
|
"grad_norm": 29.091527938842773,
|
|
"learning_rate": 7.550765991247654e-09,
|
|
"logits/chosen": -0.9369779825210571,
|
|
"logits/rejected": -0.932574987411499,
|
|
"logps/chosen": -861.1891479492188,
|
|
"logps/ref_chosen": -66.61896514892578,
|
|
"logps/ref_rejected": -107.12564849853516,
|
|
"logps/rejected": -1179.82275390625,
|
|
"loss": 1.2001,
|
|
"margin_dpo/margin_mean": 278.1268310546875,
|
|
"margin_dpo/margin_std": 560.8475952148438,
|
|
"step": 634
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -698.1052856445312,
|
|
"KL/mean": -869.6240844726562,
|
|
"KL/rejected_KL_mean": -1041.142822265625,
|
|
"KL/std": 603.190185546875,
|
|
"epoch": 0.9324522760646109,
|
|
"fcm_dpo/beta": 0.0010265845339745283,
|
|
"fcm_dpo/delta": 0.04961933195590973,
|
|
"fcm_dpo/margin": 343.0375671386719,
|
|
"fcm_dpo/q_t": 0.421801894903183,
|
|
"grad_norm": 36.472694396972656,
|
|
"learning_rate": 7.240939871891699e-09,
|
|
"logits/chosen": -1.003667950630188,
|
|
"logits/rejected": -0.9820040464401245,
|
|
"logps/chosen": -772.060791015625,
|
|
"logps/ref_chosen": -73.95551300048828,
|
|
"logps/ref_rejected": -82.50045776367188,
|
|
"logps/rejected": -1123.643310546875,
|
|
"loss": 1.1452,
|
|
"margin_dpo/margin_mean": 343.0375671386719,
|
|
"margin_dpo/margin_std": 582.0855712890625,
|
|
"step": 635
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -670.5150146484375,
|
|
"KL/mean": -865.4879760742188,
|
|
"KL/rejected_KL_mean": -1060.4609375,
|
|
"KL/std": 573.9064331054688,
|
|
"epoch": 0.933920704845815,
|
|
"fcm_dpo/beta": 0.0010364481713622808,
|
|
"fcm_dpo/delta": -0.0052045732736587524,
|
|
"fcm_dpo/margin": 389.94598388671875,
|
|
"fcm_dpo/q_t": 0.4110804796218872,
|
|
"grad_norm": 28.409608840942383,
|
|
"learning_rate": 6.937510679537628e-09,
|
|
"logits/chosen": -0.9490704536437988,
|
|
"logits/rejected": -0.9512150287628174,
|
|
"logps/chosen": -730.1439208984375,
|
|
"logps/ref_chosen": -59.628910064697266,
|
|
"logps/ref_rejected": -81.97883605957031,
|
|
"logps/rejected": -1142.4398193359375,
|
|
"loss": 1.1069,
|
|
"margin_dpo/margin_mean": 389.9460144042969,
|
|
"margin_dpo/margin_std": 601.882568359375,
|
|
"step": 636
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -652.6944580078125,
|
|
"KL/mean": -868.58056640625,
|
|
"KL/rejected_KL_mean": -1084.4666748046875,
|
|
"KL/std": 552.986572265625,
|
|
"epoch": 0.9353891336270191,
|
|
"fcm_dpo/beta": 0.0010184976272284985,
|
|
"fcm_dpo/delta": -0.04213680326938629,
|
|
"fcm_dpo/margin": 431.7720947265625,
|
|
"fcm_dpo/q_t": 0.4004812240600586,
|
|
"grad_norm": 30.13751792907715,
|
|
"learning_rate": 6.640486409826785e-09,
|
|
"logits/chosen": -1.028292179107666,
|
|
"logits/rejected": -1.0698986053466797,
|
|
"logps/chosen": -702.34716796875,
|
|
"logps/ref_chosen": -49.652687072753906,
|
|
"logps/ref_rejected": -98.40513610839844,
|
|
"logps/rejected": -1182.871826171875,
|
|
"loss": 1.0734,
|
|
"margin_dpo/margin_mean": 431.7720947265625,
|
|
"margin_dpo/margin_std": 576.40869140625,
|
|
"step": 637
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -654.815185546875,
|
|
"KL/mean": -831.240234375,
|
|
"KL/rejected_KL_mean": -1007.665283203125,
|
|
"KL/std": 540.849609375,
|
|
"epoch": 0.9368575624082232,
|
|
"fcm_dpo/beta": 0.001035545952618122,
|
|
"fcm_dpo/delta": 0.03378577530384064,
|
|
"fcm_dpo/margin": 352.8500671386719,
|
|
"fcm_dpo/q_t": 0.41145235300064087,
|
|
"grad_norm": 40.42203140258789,
|
|
"learning_rate": 6.349874889624962e-09,
|
|
"logits/chosen": -0.9185539484024048,
|
|
"logits/rejected": -0.8953431248664856,
|
|
"logps/chosen": -712.9718017578125,
|
|
"logps/ref_chosen": -58.156639099121094,
|
|
"logps/ref_rejected": -79.3014907836914,
|
|
"logps/rejected": -1086.966796875,
|
|
"loss": 1.1535,
|
|
"margin_dpo/margin_mean": 352.8500671386719,
|
|
"margin_dpo/margin_std": 612.448974609375,
|
|
"step": 638
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -840.7874755859375,
|
|
"KL/mean": -925.3656005859375,
|
|
"KL/rejected_KL_mean": -1009.9437866210938,
|
|
"KL/std": 470.619140625,
|
|
"epoch": 0.9383259911894273,
|
|
"fcm_dpo/beta": 0.0010275545064359903,
|
|
"fcm_dpo/delta": 0.0,
|
|
"fcm_dpo/margin": 169.1563262939453,
|
|
"fcm_dpo/q_t": 0.46009236574172974,
|
|
"grad_norm": 89.78936004638672,
|
|
"learning_rate": 6.065683776815933e-09,
|
|
"logits/chosen": -0.9101927876472473,
|
|
"logits/rejected": -0.8411852121353149,
|
|
"logps/chosen": -913.1106567382812,
|
|
"logps/ref_chosen": -72.32319641113281,
|
|
"logps/ref_rejected": -74.2749252319336,
|
|
"logps/rejected": -1084.21875,
|
|
"loss": 1.3033,
|
|
"margin_dpo/margin_mean": 169.15634155273438,
|
|
"margin_dpo/margin_std": 571.2896728515625,
|
|
"step": 639
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -661.3975830078125,
|
|
"KL/mean": -938.8929443359375,
|
|
"KL/rejected_KL_mean": -1216.3883056640625,
|
|
"KL/std": 608.6758422851562,
|
|
"epoch": 0.9397944199706314,
|
|
"fcm_dpo/beta": 0.0010085678659379482,
|
|
"fcm_dpo/delta": -0.16897350549697876,
|
|
"fcm_dpo/margin": 554.99072265625,
|
|
"fcm_dpo/q_t": 0.379363477230072,
|
|
"grad_norm": 40.64341354370117,
|
|
"learning_rate": 5.7879205600998296e-09,
|
|
"logits/chosen": -0.9130121469497681,
|
|
"logits/rejected": -0.9402400255203247,
|
|
"logps/chosen": -717.5319213867188,
|
|
"logps/ref_chosen": -56.13436508178711,
|
|
"logps/ref_rejected": -108.60014343261719,
|
|
"logps/rejected": -1324.9884033203125,
|
|
"loss": 1.0178,
|
|
"margin_dpo/margin_mean": 554.99072265625,
|
|
"margin_dpo/margin_std": 715.995361328125,
|
|
"step": 640
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -758.479736328125,
|
|
"KL/mean": -919.178466796875,
|
|
"KL/rejected_KL_mean": -1079.877197265625,
|
|
"KL/std": 485.34954833984375,
|
|
"epoch": 0.9412628487518355,
|
|
"fcm_dpo/beta": 0.0010051288409158587,
|
|
"fcm_dpo/delta": 0.07953417301177979,
|
|
"fcm_dpo/margin": 321.3974609375,
|
|
"fcm_dpo/q_t": 0.42806270718574524,
|
|
"grad_norm": 30.03923797607422,
|
|
"learning_rate": 5.516592558795746e-09,
|
|
"logits/chosen": -0.9936619997024536,
|
|
"logits/rejected": -0.9894883036613464,
|
|
"logps/chosen": -823.4765625,
|
|
"logps/ref_chosen": -64.99689483642578,
|
|
"logps/ref_rejected": -86.99232482910156,
|
|
"logps/rejected": -1166.8695068359375,
|
|
"loss": 1.1793,
|
|
"margin_dpo/margin_mean": 321.3974609375,
|
|
"margin_dpo/margin_std": 621.0615234375,
|
|
"step": 641
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -754.40771484375,
|
|
"KL/mean": -960.8053588867188,
|
|
"KL/rejected_KL_mean": -1167.2030029296875,
|
|
"KL/std": 651.447021484375,
|
|
"epoch": 0.9427312775330396,
|
|
"fcm_dpo/beta": 0.0010101549560204148,
|
|
"fcm_dpo/delta": -0.017909951508045197,
|
|
"fcm_dpo/margin": 412.79522705078125,
|
|
"fcm_dpo/q_t": 0.4139317274093628,
|
|
"grad_norm": 40.49140167236328,
|
|
"learning_rate": 5.251706922648868e-09,
|
|
"logits/chosen": -0.9240103960037231,
|
|
"logits/rejected": -0.9492435455322266,
|
|
"logps/chosen": -820.0969848632812,
|
|
"logps/ref_chosen": -65.68924713134766,
|
|
"logps/ref_rejected": -110.24205017089844,
|
|
"logps/rejected": -1277.445068359375,
|
|
"loss": 1.1415,
|
|
"margin_dpo/margin_mean": 412.7952575683594,
|
|
"margin_dpo/margin_std": 782.796142578125,
|
|
"step": 642
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -682.908935546875,
|
|
"KL/mean": -835.4664306640625,
|
|
"KL/rejected_KL_mean": -988.0238037109375,
|
|
"KL/std": 504.5992431640625,
|
|
"epoch": 0.9441997063142438,
|
|
"fcm_dpo/beta": 0.0010031081037595868,
|
|
"fcm_dpo/delta": -0.013042459264397621,
|
|
"fcm_dpo/margin": 305.1148986816406,
|
|
"fcm_dpo/q_t": 0.42987653613090515,
|
|
"grad_norm": 39.44825744628906,
|
|
"learning_rate": 4.993270631642038e-09,
|
|
"logits/chosen": -1.0819464921951294,
|
|
"logits/rejected": -1.0728490352630615,
|
|
"logps/chosen": -734.8589477539062,
|
|
"logps/ref_chosen": -51.94999694824219,
|
|
"logps/ref_rejected": -87.46833801269531,
|
|
"logps/rejected": -1075.4921875,
|
|
"loss": 1.1687,
|
|
"margin_dpo/margin_mean": 305.1148986816406,
|
|
"margin_dpo/margin_std": 513.481689453125,
|
|
"step": 643
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -659.0570068359375,
|
|
"KL/mean": -824.5692138671875,
|
|
"KL/rejected_KL_mean": -990.0814208984375,
|
|
"KL/std": 571.162841796875,
|
|
"epoch": 0.9456681350954479,
|
|
"fcm_dpo/beta": 0.0010127369314432144,
|
|
"fcm_dpo/delta": 0.06703174114227295,
|
|
"fcm_dpo/margin": 331.0244140625,
|
|
"fcm_dpo/q_t": 0.4254780113697052,
|
|
"grad_norm": 36.88336181640625,
|
|
"learning_rate": 4.741290495811873e-09,
|
|
"logits/chosen": -0.9800692796707153,
|
|
"logits/rejected": -0.9834997653961182,
|
|
"logps/chosen": -718.07470703125,
|
|
"logps/ref_chosen": -59.017662048339844,
|
|
"logps/ref_rejected": -87.13668823242188,
|
|
"logps/rejected": -1077.2181396484375,
|
|
"loss": 1.1787,
|
|
"margin_dpo/margin_mean": 331.0244140625,
|
|
"margin_dpo/margin_std": 643.7864990234375,
|
|
"step": 644
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -694.98095703125,
|
|
"KL/mean": -790.7579345703125,
|
|
"KL/rejected_KL_mean": -886.534912109375,
|
|
"KL/std": 483.12469482421875,
|
|
"epoch": 0.947136563876652,
|
|
"fcm_dpo/beta": 0.0010233320062980056,
|
|
"fcm_dpo/delta": 0.032772209495306015,
|
|
"fcm_dpo/margin": 191.55389404296875,
|
|
"fcm_dpo/q_t": 0.4574551284313202,
|
|
"grad_norm": 85.91677856445312,
|
|
"learning_rate": 4.495773155069299e-09,
|
|
"logits/chosen": -0.9518178105354309,
|
|
"logits/rejected": -0.940590500831604,
|
|
"logps/chosen": -750.8569946289062,
|
|
"logps/ref_chosen": -55.87602233886719,
|
|
"logps/ref_rejected": -97.78080749511719,
|
|
"logps/rejected": -984.315673828125,
|
|
"loss": 1.3102,
|
|
"margin_dpo/margin_mean": 191.55389404296875,
|
|
"margin_dpo/margin_std": 646.7799072265625,
|
|
"step": 645
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -637.5367431640625,
|
|
"KL/mean": -772.5591430664062,
|
|
"KL/rejected_KL_mean": -907.58154296875,
|
|
"KL/std": 423.0977783203125,
|
|
"epoch": 0.9486049926578561,
|
|
"fcm_dpo/beta": 0.0010392502881586552,
|
|
"fcm_dpo/delta": 0.12298852950334549,
|
|
"fcm_dpo/margin": 270.04473876953125,
|
|
"fcm_dpo/q_t": 0.4354844391345978,
|
|
"grad_norm": 37.95448684692383,
|
|
"learning_rate": 4.256725079024553e-09,
|
|
"logits/chosen": -0.9876176118850708,
|
|
"logits/rejected": -0.9597277641296387,
|
|
"logps/chosen": -698.8125610351562,
|
|
"logps/ref_chosen": -61.275787353515625,
|
|
"logps/ref_rejected": -77.50580596923828,
|
|
"logps/rejected": -985.0872802734375,
|
|
"loss": 1.1861,
|
|
"margin_dpo/margin_mean": 270.04473876953125,
|
|
"margin_dpo/margin_std": 478.6196594238281,
|
|
"step": 646
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -578.6500854492188,
|
|
"KL/mean": -764.76220703125,
|
|
"KL/rejected_KL_mean": -950.8743896484375,
|
|
"KL/std": 502.62335205078125,
|
|
"epoch": 0.9500734214390602,
|
|
"fcm_dpo/beta": 0.001051081228069961,
|
|
"fcm_dpo/delta": 0.009098398499190807,
|
|
"fcm_dpo/margin": 372.2242736816406,
|
|
"fcm_dpo/q_t": 0.40922337770462036,
|
|
"grad_norm": 39.47010040283203,
|
|
"learning_rate": 4.024152566816791e-09,
|
|
"logits/chosen": -0.8613879680633545,
|
|
"logits/rejected": -0.8856191039085388,
|
|
"logps/chosen": -633.5025024414062,
|
|
"logps/ref_chosen": -54.8524169921875,
|
|
"logps/ref_rejected": -93.5194091796875,
|
|
"logps/rejected": -1044.393798828125,
|
|
"loss": 1.0951,
|
|
"margin_dpo/margin_mean": 372.22430419921875,
|
|
"margin_dpo/margin_std": 483.755615234375,
|
|
"step": 647
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -618.8386840820312,
|
|
"KL/mean": -882.6348266601562,
|
|
"KL/rejected_KL_mean": -1146.430908203125,
|
|
"KL/std": 597.5460815429688,
|
|
"epoch": 0.9515418502202643,
|
|
"fcm_dpo/beta": 0.0010257565882056952,
|
|
"fcm_dpo/delta": -0.14960268139839172,
|
|
"fcm_dpo/margin": 527.59228515625,
|
|
"fcm_dpo/q_t": 0.38258910179138184,
|
|
"grad_norm": 26.944942474365234,
|
|
"learning_rate": 3.798061746947995e-09,
|
|
"logits/chosen": -1.024482011795044,
|
|
"logits/rejected": -1.0812674760818481,
|
|
"logps/chosen": -673.0101318359375,
|
|
"logps/ref_chosen": -54.17146682739258,
|
|
"logps/ref_rejected": -98.7127914428711,
|
|
"logps/rejected": -1245.143798828125,
|
|
"loss": 1.0229,
|
|
"margin_dpo/margin_mean": 527.59228515625,
|
|
"margin_dpo/margin_std": 698.5792846679688,
|
|
"step": 648
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -641.0887451171875,
|
|
"KL/mean": -776.3487548828125,
|
|
"KL/rejected_KL_mean": -911.6088256835938,
|
|
"KL/std": 461.4584045410156,
|
|
"epoch": 0.9530102790014684,
|
|
"fcm_dpo/beta": 0.00104125018697232,
|
|
"fcm_dpo/delta": 0.12126278877258301,
|
|
"fcm_dpo/margin": 270.52008056640625,
|
|
"fcm_dpo/q_t": 0.43895599246025085,
|
|
"grad_norm": 37.985782623291016,
|
|
"learning_rate": 3.5784585771215235e-09,
|
|
"logits/chosen": -1.062050223350525,
|
|
"logits/rejected": -1.049740195274353,
|
|
"logps/chosen": -703.569091796875,
|
|
"logps/ref_chosen": -62.480350494384766,
|
|
"logps/ref_rejected": -80.07717895507812,
|
|
"logps/rejected": -991.68603515625,
|
|
"loss": 1.2132,
|
|
"margin_dpo/margin_mean": 270.5200500488281,
|
|
"margin_dpo/margin_std": 580.6653442382812,
|
|
"step": 649
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -681.760498046875,
|
|
"KL/mean": -890.84912109375,
|
|
"KL/rejected_KL_mean": -1099.9375,
|
|
"KL/std": 568.1502685546875,
|
|
"epoch": 0.9544787077826725,
|
|
"fcm_dpo/beta": 0.0010373436380177736,
|
|
"fcm_dpo/delta": -0.035463616251945496,
|
|
"fcm_dpo/margin": 418.1771545410156,
|
|
"fcm_dpo/q_t": 0.40345823764801025,
|
|
"grad_norm": 34.3553352355957,
|
|
"learning_rate": 3.3653488440851253e-09,
|
|
"logits/chosen": -0.9683902859687805,
|
|
"logits/rejected": -0.9844076037406921,
|
|
"logps/chosen": -737.853271484375,
|
|
"logps/ref_chosen": -56.09281921386719,
|
|
"logps/ref_rejected": -98.26483917236328,
|
|
"logps/rejected": -1198.202392578125,
|
|
"loss": 1.1016,
|
|
"margin_dpo/margin_mean": 418.1771545410156,
|
|
"margin_dpo/margin_std": 649.6616821289062,
|
|
"step": 650
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -489.3945007324219,
|
|
"KL/mean": -743.3966674804688,
|
|
"KL/rejected_KL_mean": -997.3988037109375,
|
|
"KL/std": 551.2980346679688,
|
|
"epoch": 0.9559471365638766,
|
|
"fcm_dpo/beta": 0.0010176938958466053,
|
|
"fcm_dpo/delta": -0.12348881363868713,
|
|
"fcm_dpo/margin": 508.0043640136719,
|
|
"fcm_dpo/q_t": 0.38280242681503296,
|
|
"grad_norm": 31.118337631225586,
|
|
"learning_rate": 3.158738163478475e-09,
|
|
"logits/chosen": -1.0014972686767578,
|
|
"logits/rejected": -1.0536954402923584,
|
|
"logps/chosen": -532.8199462890625,
|
|
"logps/ref_chosen": -43.42544937133789,
|
|
"logps/ref_rejected": -99.95791625976562,
|
|
"logps/rejected": -1097.356689453125,
|
|
"loss": 1.0078,
|
|
"margin_dpo/margin_mean": 508.0043640136719,
|
|
"margin_dpo/margin_std": 563.4285888671875,
|
|
"step": 651
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -616.5572509765625,
|
|
"KL/mean": -798.780029296875,
|
|
"KL/rejected_KL_mean": -981.0028076171875,
|
|
"KL/std": 537.9312744140625,
|
|
"epoch": 0.9574155653450808,
|
|
"fcm_dpo/beta": 0.001018517417833209,
|
|
"fcm_dpo/delta": 0.029787715524435043,
|
|
"fcm_dpo/margin": 364.4455871582031,
|
|
"fcm_dpo/q_t": 0.41681456565856934,
|
|
"grad_norm": 38.289588928222656,
|
|
"learning_rate": 2.9586319796851555e-09,
|
|
"logits/chosen": -1.037517786026001,
|
|
"logits/rejected": -1.0513508319854736,
|
|
"logps/chosen": -679.134033203125,
|
|
"logps/ref_chosen": -62.57680892944336,
|
|
"logps/ref_rejected": -111.76779174804688,
|
|
"logps/rejected": -1092.7706298828125,
|
|
"loss": 1.1381,
|
|
"margin_dpo/margin_mean": 364.44561767578125,
|
|
"margin_dpo/margin_std": 614.3720703125,
|
|
"step": 652
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -721.114013671875,
|
|
"KL/mean": -896.546630859375,
|
|
"KL/rejected_KL_mean": -1071.979248046875,
|
|
"KL/std": 555.6015625,
|
|
"epoch": 0.9588839941262849,
|
|
"fcm_dpo/beta": 0.0010254649678245187,
|
|
"fcm_dpo/delta": 0.04164200276136398,
|
|
"fcm_dpo/margin": 350.8651123046875,
|
|
"fcm_dpo/q_t": 0.4201071858406067,
|
|
"grad_norm": 32.48366165161133,
|
|
"learning_rate": 2.7650355656892166e-09,
|
|
"logits/chosen": -1.0504939556121826,
|
|
"logits/rejected": -1.0696086883544922,
|
|
"logps/chosen": -782.2269897460938,
|
|
"logps/ref_chosen": -61.11295700073242,
|
|
"logps/ref_rejected": -103.24960327148438,
|
|
"logps/rejected": -1175.228759765625,
|
|
"loss": 1.1448,
|
|
"margin_dpo/margin_mean": 350.8651123046875,
|
|
"margin_dpo/margin_std": 597.3900146484375,
|
|
"step": 653
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -651.263427734375,
|
|
"KL/mean": -819.9118041992188,
|
|
"KL/rejected_KL_mean": -988.5601196289062,
|
|
"KL/std": 463.3123779296875,
|
|
"epoch": 0.960352422907489,
|
|
"fcm_dpo/beta": 0.0010335429105907679,
|
|
"fcm_dpo/delta": 0.053280387073755264,
|
|
"fcm_dpo/margin": 337.2967224121094,
|
|
"fcm_dpo/q_t": 0.4215894043445587,
|
|
"grad_norm": 29.535350799560547,
|
|
"learning_rate": 2.577954022936174e-09,
|
|
"logits/chosen": -1.0323097705841064,
|
|
"logits/rejected": -1.0458433628082275,
|
|
"logps/chosen": -712.9915771484375,
|
|
"logps/ref_chosen": -61.7281379699707,
|
|
"logps/ref_rejected": -98.7738037109375,
|
|
"logps/rejected": -1087.333984375,
|
|
"loss": 1.1394,
|
|
"margin_dpo/margin_mean": 337.2967224121094,
|
|
"margin_dpo/margin_std": 535.9071044921875,
|
|
"step": 654
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -600.1435546875,
|
|
"KL/mean": -785.5983276367188,
|
|
"KL/rejected_KL_mean": -971.0531005859375,
|
|
"KL/std": 495.2772216796875,
|
|
"epoch": 0.9618208516886931,
|
|
"fcm_dpo/beta": 0.0010393604170531034,
|
|
"fcm_dpo/delta": 0.01507401093840599,
|
|
"fcm_dpo/margin": 370.9095458984375,
|
|
"fcm_dpo/q_t": 0.4142192006111145,
|
|
"grad_norm": 32.12811279296875,
|
|
"learning_rate": 2.397392281198729e-09,
|
|
"logits/chosen": -0.987531304359436,
|
|
"logits/rejected": -1.030656337738037,
|
|
"logps/chosen": -649.7203369140625,
|
|
"logps/ref_chosen": -49.576812744140625,
|
|
"logps/ref_rejected": -98.29183197021484,
|
|
"logps/rejected": -1069.344970703125,
|
|
"loss": 1.1228,
|
|
"margin_dpo/margin_mean": 370.9095458984375,
|
|
"margin_dpo/margin_std": 588.18408203125,
|
|
"step": 655
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -635.6533203125,
|
|
"KL/mean": -928.177734375,
|
|
"KL/rejected_KL_mean": -1220.7020263671875,
|
|
"KL/std": 598.5955810546875,
|
|
"epoch": 0.9632892804698973,
|
|
"fcm_dpo/beta": 0.0010131911840289831,
|
|
"fcm_dpo/delta": -0.20445646345615387,
|
|
"fcm_dpo/margin": 585.0487060546875,
|
|
"fcm_dpo/q_t": 0.3669106960296631,
|
|
"grad_norm": 47.409523010253906,
|
|
"learning_rate": 2.223355098446622e-09,
|
|
"logits/chosen": -0.9014885425567627,
|
|
"logits/rejected": -0.9727605581283569,
|
|
"logps/chosen": -688.2027587890625,
|
|
"logps/ref_chosen": -52.54943084716797,
|
|
"logps/ref_rejected": -113.67464447021484,
|
|
"logps/rejected": -1334.376708984375,
|
|
"loss": 0.9602,
|
|
"margin_dpo/margin_mean": 585.0487060546875,
|
|
"margin_dpo/margin_std": 599.3031005859375,
|
|
"step": 656
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -615.9948120117188,
|
|
"KL/mean": -864.385498046875,
|
|
"KL/rejected_KL_mean": -1112.776123046875,
|
|
"KL/std": 595.528564453125,
|
|
"epoch": 0.9647577092511013,
|
|
"fcm_dpo/beta": 0.0009820859413594007,
|
|
"fcm_dpo/delta": -0.09292930364608765,
|
|
"fcm_dpo/margin": 496.7813720703125,
|
|
"fcm_dpo/q_t": 0.39006322622299194,
|
|
"grad_norm": 31.6498966217041,
|
|
"learning_rate": 2.055847060721566e-09,
|
|
"logits/chosen": -1.0363855361938477,
|
|
"logits/rejected": -1.0796374082565308,
|
|
"logps/chosen": -662.6953125,
|
|
"logps/ref_chosen": -46.700538635253906,
|
|
"logps/ref_rejected": -97.91487121582031,
|
|
"logps/rejected": -1210.6910400390625,
|
|
"loss": 1.0397,
|
|
"margin_dpo/margin_mean": 496.7813720703125,
|
|
"margin_dpo/margin_std": 621.57470703125,
|
|
"step": 657
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -664.8219604492188,
|
|
"KL/mean": -847.6753540039062,
|
|
"KL/rejected_KL_mean": -1030.5286865234375,
|
|
"KL/std": 466.9921875,
|
|
"epoch": 0.9662261380323054,
|
|
"fcm_dpo/beta": 0.00098237837664783,
|
|
"fcm_dpo/delta": 0.04202239215373993,
|
|
"fcm_dpo/margin": 365.70672607421875,
|
|
"fcm_dpo/q_t": 0.4168873727321625,
|
|
"grad_norm": 35.535884857177734,
|
|
"learning_rate": 1.8948725820160662e-09,
|
|
"logits/chosen": -0.9736270904541016,
|
|
"logits/rejected": -0.9889022707939148,
|
|
"logps/chosen": -725.7801513671875,
|
|
"logps/ref_chosen": -60.95820999145508,
|
|
"logps/ref_rejected": -95.93949127197266,
|
|
"logps/rejected": -1126.46826171875,
|
|
"loss": 1.124,
|
|
"margin_dpo/margin_mean": 365.70672607421875,
|
|
"margin_dpo/margin_std": 524.6923217773438,
|
|
"step": 658
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -604.6826782226562,
|
|
"KL/mean": -799.8530883789062,
|
|
"KL/rejected_KL_mean": -995.0235595703125,
|
|
"KL/std": 473.0037841796875,
|
|
"epoch": 0.9676945668135095,
|
|
"fcm_dpo/beta": 0.0009883574675768614,
|
|
"fcm_dpo/delta": 0.014654016122221947,
|
|
"fcm_dpo/margin": 390.34075927734375,
|
|
"fcm_dpo/q_t": 0.41146624088287354,
|
|
"grad_norm": 25.766517639160156,
|
|
"learning_rate": 1.7404359041573723e-09,
|
|
"logits/chosen": -0.9188249111175537,
|
|
"logits/rejected": -0.8752338886260986,
|
|
"logps/chosen": -681.4256591796875,
|
|
"logps/ref_chosen": -76.74298095703125,
|
|
"logps/ref_rejected": -87.4709701538086,
|
|
"logps/rejected": -1082.4945068359375,
|
|
"loss": 1.0978,
|
|
"margin_dpo/margin_mean": 390.3408203125,
|
|
"margin_dpo/margin_std": 514.568359375,
|
|
"step": 659
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -646.2470703125,
|
|
"KL/mean": -875.0294189453125,
|
|
"KL/rejected_KL_mean": -1103.811767578125,
|
|
"KL/std": 534.911865234375,
|
|
"epoch": 0.9691629955947136,
|
|
"fcm_dpo/beta": 0.000986184342764318,
|
|
"fcm_dpo/delta": -0.0536465048789978,
|
|
"fcm_dpo/margin": 457.5646057128906,
|
|
"fcm_dpo/q_t": 0.39679035544395447,
|
|
"grad_norm": 49.228233337402344,
|
|
"learning_rate": 1.592541096695571e-09,
|
|
"logits/chosen": -0.9931929111480713,
|
|
"logits/rejected": -0.995282769203186,
|
|
"logps/chosen": -705.294921875,
|
|
"logps/ref_chosen": -59.04788589477539,
|
|
"logps/ref_rejected": -75.96005249023438,
|
|
"logps/rejected": -1179.771728515625,
|
|
"loss": 1.0585,
|
|
"margin_dpo/margin_mean": 457.5645751953125,
|
|
"margin_dpo/margin_std": 571.7623291015625,
|
|
"step": 660
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -567.4749145507812,
|
|
"KL/mean": -781.959716796875,
|
|
"KL/rejected_KL_mean": -996.4445190429688,
|
|
"KL/std": 581.6163330078125,
|
|
"epoch": 0.9706314243759178,
|
|
"fcm_dpo/beta": 0.0009824851294979453,
|
|
"fcm_dpo/delta": -0.022726453840732574,
|
|
"fcm_dpo/margin": 428.9695739746094,
|
|
"fcm_dpo/q_t": 0.40450412034988403,
|
|
"grad_norm": 37.587955474853516,
|
|
"learning_rate": 1.4511920567963908e-09,
|
|
"logits/chosen": -1.0141196250915527,
|
|
"logits/rejected": -1.0162596702575684,
|
|
"logps/chosen": -618.14892578125,
|
|
"logps/ref_chosen": -50.673973083496094,
|
|
"logps/ref_rejected": -86.00569152832031,
|
|
"logps/rejected": -1082.4501953125,
|
|
"loss": 1.0753,
|
|
"margin_dpo/margin_mean": 428.96954345703125,
|
|
"margin_dpo/margin_std": 554.69189453125,
|
|
"step": 661
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -687.85302734375,
|
|
"KL/mean": -843.4173583984375,
|
|
"KL/rejected_KL_mean": -998.9815673828125,
|
|
"KL/std": 508.81829833984375,
|
|
"epoch": 0.9720998531571219,
|
|
"fcm_dpo/beta": 0.0009899393189698458,
|
|
"fcm_dpo/delta": 0.09499240666627884,
|
|
"fcm_dpo/margin": 311.1284484863281,
|
|
"fcm_dpo/q_t": 0.4304784834384918,
|
|
"grad_norm": 34.92011260986328,
|
|
"learning_rate": 1.3163925091384532e-09,
|
|
"logits/chosen": -0.9697600603103638,
|
|
"logits/rejected": -0.9555808901786804,
|
|
"logps/chosen": -757.1141357421875,
|
|
"logps/ref_chosen": -69.26106262207031,
|
|
"logps/ref_rejected": -89.05593872070312,
|
|
"logps/rejected": -1088.0374755859375,
|
|
"loss": 1.1943,
|
|
"margin_dpo/margin_mean": 311.1284484863281,
|
|
"margin_dpo/margin_std": 634.947998046875,
|
|
"step": 662
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -626.7081298828125,
|
|
"KL/mean": -834.0655517578125,
|
|
"KL/rejected_KL_mean": -1041.4229736328125,
|
|
"KL/std": 581.6314697265625,
|
|
"epoch": 0.973568281938326,
|
|
"fcm_dpo/beta": 0.000994151458144188,
|
|
"fcm_dpo/delta": -0.01281630527228117,
|
|
"fcm_dpo/margin": 414.7147521972656,
|
|
"fcm_dpo/q_t": 0.40818944573402405,
|
|
"grad_norm": 28.98455047607422,
|
|
"learning_rate": 1.1881460058152382e-09,
|
|
"logits/chosen": -1.0159096717834473,
|
|
"logits/rejected": -1.0367473363876343,
|
|
"logps/chosen": -691.5870361328125,
|
|
"logps/ref_chosen": -64.87890625,
|
|
"logps/ref_rejected": -113.92536926269531,
|
|
"logps/rejected": -1155.348388671875,
|
|
"loss": 1.1103,
|
|
"margin_dpo/margin_mean": 414.71478271484375,
|
|
"margin_dpo/margin_std": 667.8819580078125,
|
|
"step": 663
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -645.205322265625,
|
|
"KL/mean": -881.735107421875,
|
|
"KL/rejected_KL_mean": -1118.2647705078125,
|
|
"KL/std": 575.0771484375,
|
|
"epoch": 0.9750367107195301,
|
|
"fcm_dpo/beta": 0.000979449599981308,
|
|
"fcm_dpo/delta": -0.06696485728025436,
|
|
"fcm_dpo/margin": 473.0594482421875,
|
|
"fcm_dpo/q_t": 0.3954671621322632,
|
|
"grad_norm": 26.692996978759766,
|
|
"learning_rate": 1.066455926241383e-09,
|
|
"logits/chosen": -0.9881083965301514,
|
|
"logits/rejected": -1.0177645683288574,
|
|
"logps/chosen": -706.0938110351562,
|
|
"logps/ref_chosen": -60.88847351074219,
|
|
"logps/ref_rejected": -105.521728515625,
|
|
"logps/rejected": -1223.7864990234375,
|
|
"loss": 1.0597,
|
|
"margin_dpo/margin_mean": 473.0594177246094,
|
|
"margin_dpo/margin_std": 619.4970703125,
|
|
"step": 664
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -595.4884033203125,
|
|
"KL/mean": -785.0396728515625,
|
|
"KL/rejected_KL_mean": -974.5908813476562,
|
|
"KL/std": 466.5865478515625,
|
|
"epoch": 0.9765051395007343,
|
|
"fcm_dpo/beta": 0.0009838908445090055,
|
|
"fcm_dpo/delta": 0.028051599860191345,
|
|
"fcm_dpo/margin": 379.10247802734375,
|
|
"fcm_dpo/q_t": 0.41350919008255005,
|
|
"grad_norm": 33.21379089355469,
|
|
"learning_rate": 9.513254770636137e-10,
|
|
"logits/chosen": -1.076425313949585,
|
|
"logits/rejected": -1.0883920192718506,
|
|
"logps/chosen": -656.0525512695312,
|
|
"logps/ref_chosen": -60.56413269042969,
|
|
"logps/ref_rejected": -84.80882263183594,
|
|
"logps/rejected": -1059.399658203125,
|
|
"loss": 1.0975,
|
|
"margin_dpo/margin_mean": 379.10247802734375,
|
|
"margin_dpo/margin_std": 466.126220703125,
|
|
"step": 665
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -640.5621337890625,
|
|
"KL/mean": -844.2626953125,
|
|
"KL/rejected_KL_mean": -1047.96337890625,
|
|
"KL/std": 506.4635314941406,
|
|
"epoch": 0.9779735682819384,
|
|
"fcm_dpo/beta": 0.00098421610891819,
|
|
"fcm_dpo/delta": -0.001031767576932907,
|
|
"fcm_dpo/margin": 407.4012451171875,
|
|
"fcm_dpo/q_t": 0.4083176255226135,
|
|
"grad_norm": 28.340333938598633,
|
|
"learning_rate": 8.427576920763956e-10,
|
|
"logits/chosen": -0.9047819375991821,
|
|
"logits/rejected": -0.9091357588768005,
|
|
"logps/chosen": -704.9820556640625,
|
|
"logps/ref_chosen": -64.41996002197266,
|
|
"logps/ref_rejected": -95.8916244506836,
|
|
"logps/rejected": -1143.85498046875,
|
|
"loss": 1.0931,
|
|
"margin_dpo/margin_mean": 407.4012451171875,
|
|
"margin_dpo/margin_std": 544.3861083984375,
|
|
"step": 666
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -708.0230712890625,
|
|
"KL/mean": -926.270751953125,
|
|
"KL/rejected_KL_mean": -1144.5185546875,
|
|
"KL/std": 533.2125244140625,
|
|
"epoch": 0.9794419970631424,
|
|
"fcm_dpo/beta": 0.0009798547253012657,
|
|
"fcm_dpo/delta": -0.028992321342229843,
|
|
"fcm_dpo/margin": 436.495361328125,
|
|
"fcm_dpo/q_t": 0.40200570225715637,
|
|
"grad_norm": 37.009464263916016,
|
|
"learning_rate": 7.407554321417764e-10,
|
|
"logits/chosen": -0.9339680671691895,
|
|
"logits/rejected": -0.918968677520752,
|
|
"logps/chosen": -777.3001098632812,
|
|
"logps/ref_chosen": -69.27702331542969,
|
|
"logps/ref_rejected": -87.83549499511719,
|
|
"logps/rejected": -1232.35400390625,
|
|
"loss": 1.0786,
|
|
"margin_dpo/margin_mean": 436.495361328125,
|
|
"margin_dpo/margin_std": 579.8436889648438,
|
|
"step": 667
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -741.93408203125,
|
|
"KL/mean": -907.15625,
|
|
"KL/rejected_KL_mean": -1072.37841796875,
|
|
"KL/std": 566.6580810546875,
|
|
"epoch": 0.9809104258443465,
|
|
"fcm_dpo/beta": 0.0009952853433787823,
|
|
"fcm_dpo/delta": 0.07259482145309448,
|
|
"fcm_dpo/margin": 330.4443359375,
|
|
"fcm_dpo/q_t": 0.42794138193130493,
|
|
"grad_norm": 33.102317810058594,
|
|
"learning_rate": 6.453213851142225e-10,
|
|
"logits/chosen": -1.0173263549804688,
|
|
"logits/rejected": -1.014156460762024,
|
|
"logps/chosen": -814.5380859375,
|
|
"logps/ref_chosen": -72.60400390625,
|
|
"logps/ref_rejected": -103.73905944824219,
|
|
"logps/rejected": -1176.117431640625,
|
|
"loss": 1.1857,
|
|
"margin_dpo/margin_mean": 330.4443359375,
|
|
"margin_dpo/margin_std": 662.1550903320312,
|
|
"step": 668
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -590.9251708984375,
|
|
"KL/mean": -818.6317749023438,
|
|
"KL/rejected_KL_mean": -1046.33837890625,
|
|
"KL/std": 550.6939697265625,
|
|
"epoch": 0.9823788546255506,
|
|
"fcm_dpo/beta": 0.000986847560852766,
|
|
"fcm_dpo/delta": -0.051722507923841476,
|
|
"fcm_dpo/margin": 455.413330078125,
|
|
"fcm_dpo/q_t": 0.3974974751472473,
|
|
"grad_norm": 25.456424713134766,
|
|
"learning_rate": 5.564580657695939e-10,
|
|
"logits/chosen": -0.9681833982467651,
|
|
"logits/rejected": -0.9636249542236328,
|
|
"logps/chosen": -637.0415649414062,
|
|
"logps/ref_chosen": -46.116416931152344,
|
|
"logps/ref_rejected": -77.92434692382812,
|
|
"logps/rejected": -1124.2626953125,
|
|
"loss": 1.0695,
|
|
"margin_dpo/margin_mean": 455.41326904296875,
|
|
"margin_dpo/margin_std": 612.3760986328125,
|
|
"step": 669
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -551.4520263671875,
|
|
"KL/mean": -797.196533203125,
|
|
"KL/rejected_KL_mean": -1042.94091796875,
|
|
"KL/std": 524.746826171875,
|
|
"epoch": 0.9838472834067548,
|
|
"fcm_dpo/beta": 0.000975792994722724,
|
|
"fcm_dpo/delta": -0.08364107459783554,
|
|
"fcm_dpo/margin": 491.48895263671875,
|
|
"fcm_dpo/q_t": 0.39131563901901245,
|
|
"grad_norm": 32.25609588623047,
|
|
"learning_rate": 4.741678157389739e-10,
|
|
"logits/chosen": -0.9351658225059509,
|
|
"logits/rejected": -0.9491223096847534,
|
|
"logps/chosen": -613.7977905273438,
|
|
"logps/ref_chosen": -62.34575271606445,
|
|
"logps/ref_rejected": -96.9405517578125,
|
|
"logps/rejected": -1139.881591796875,
|
|
"loss": 1.049,
|
|
"margin_dpo/margin_mean": 491.48895263671875,
|
|
"margin_dpo/margin_std": 618.3614501953125,
|
|
"step": 670
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -666.8336181640625,
|
|
"KL/mean": -867.621337890625,
|
|
"KL/rejected_KL_mean": -1068.4091796875,
|
|
"KL/std": 490.7064514160156,
|
|
"epoch": 0.9853157121879589,
|
|
"fcm_dpo/beta": 0.0009718855144456029,
|
|
"fcm_dpo/delta": 0.009846452623605728,
|
|
"fcm_dpo/margin": 401.5755310058594,
|
|
"fcm_dpo/q_t": 0.4098934829235077,
|
|
"grad_norm": 28.569211959838867,
|
|
"learning_rate": 3.9845280344705245e-10,
|
|
"logits/chosen": -0.9902809858322144,
|
|
"logits/rejected": -1.0138908624649048,
|
|
"logps/chosen": -714.833740234375,
|
|
"logps/ref_chosen": -48.00010681152344,
|
|
"logps/ref_rejected": -83.81932067871094,
|
|
"logps/rejected": -1152.228515625,
|
|
"loss": 1.1089,
|
|
"margin_dpo/margin_mean": 401.5755310058594,
|
|
"margin_dpo/margin_std": 580.24072265625,
|
|
"step": 671
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -731.397216796875,
|
|
"KL/mean": -927.7864990234375,
|
|
"KL/rejected_KL_mean": -1124.175537109375,
|
|
"KL/std": 593.037109375,
|
|
"epoch": 0.986784140969163,
|
|
"fcm_dpo/beta": 0.0009703624527901411,
|
|
"fcm_dpo/delta": 0.019587505608797073,
|
|
"fcm_dpo/margin": 392.7784423828125,
|
|
"fcm_dpo/q_t": 0.41550886631011963,
|
|
"grad_norm": 36.741268157958984,
|
|
"learning_rate": 3.293150240547549e-10,
|
|
"logits/chosen": -1.065507411956787,
|
|
"logits/rejected": -1.0642685890197754,
|
|
"logps/chosen": -789.9805297851562,
|
|
"logps/ref_chosen": -58.58328628540039,
|
|
"logps/ref_rejected": -93.14015197753906,
|
|
"logps/rejected": -1217.3157958984375,
|
|
"loss": 1.1383,
|
|
"margin_dpo/margin_mean": 392.7784118652344,
|
|
"margin_dpo/margin_std": 660.264892578125,
|
|
"step": 672
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -674.35888671875,
|
|
"KL/mean": -860.500732421875,
|
|
"KL/rejected_KL_mean": -1046.642578125,
|
|
"KL/std": 500.42138671875,
|
|
"epoch": 0.9882525697503671,
|
|
"fcm_dpo/beta": 0.0009781282860785723,
|
|
"fcm_dpo/delta": 0.03722069412469864,
|
|
"fcm_dpo/margin": 372.2838134765625,
|
|
"fcm_dpo/q_t": 0.41715848445892334,
|
|
"grad_norm": 30.6710262298584,
|
|
"learning_rate": 2.6675629940689504e-10,
|
|
"logits/chosen": -1.0316221714019775,
|
|
"logits/rejected": -1.03529691696167,
|
|
"logps/chosen": -721.08203125,
|
|
"logps/ref_chosen": -46.72320556640625,
|
|
"logps/ref_rejected": -85.29623413085938,
|
|
"logps/rejected": -1131.9388427734375,
|
|
"loss": 1.1215,
|
|
"margin_dpo/margin_mean": 372.2838134765625,
|
|
"margin_dpo/margin_std": 559.671142578125,
|
|
"step": 673
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -570.3616943359375,
|
|
"KL/mean": -808.6674194335938,
|
|
"KL/rejected_KL_mean": -1046.97314453125,
|
|
"KL/std": 530.7771606445312,
|
|
"epoch": 0.9897209985315712,
|
|
"fcm_dpo/beta": 0.0009717537323012948,
|
|
"fcm_dpo/delta": -0.06616582721471786,
|
|
"fcm_dpo/margin": 476.61138916015625,
|
|
"fcm_dpo/q_t": 0.39771580696105957,
|
|
"grad_norm": 31.129558563232422,
|
|
"learning_rate": 2.1077827798404725e-10,
|
|
"logits/chosen": -0.929929792881012,
|
|
"logits/rejected": -0.9406229257583618,
|
|
"logps/chosen": -615.8072509765625,
|
|
"logps/ref_chosen": -45.445526123046875,
|
|
"logps/ref_rejected": -70.04593658447266,
|
|
"logps/rejected": -1117.01904296875,
|
|
"loss": 1.0577,
|
|
"margin_dpo/margin_mean": 476.61138916015625,
|
|
"margin_dpo/margin_std": 633.5640258789062,
|
|
"step": 674
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -638.90673828125,
|
|
"KL/mean": -887.892578125,
|
|
"KL/rejected_KL_mean": -1136.87841796875,
|
|
"KL/std": 577.7835693359375,
|
|
"epoch": 0.9911894273127754,
|
|
"fcm_dpo/beta": 0.0009487034403719008,
|
|
"fcm_dpo/delta": -0.07788591086864471,
|
|
"fcm_dpo/margin": 497.97161865234375,
|
|
"fcm_dpo/q_t": 0.3958815634250641,
|
|
"grad_norm": 26.884960174560547,
|
|
"learning_rate": 1.6138243485910863e-10,
|
|
"logits/chosen": -0.9694858193397522,
|
|
"logits/rejected": -0.984051525592804,
|
|
"logps/chosen": -683.0830688476562,
|
|
"logps/ref_chosen": -44.17628479003906,
|
|
"logps/ref_rejected": -74.09197998046875,
|
|
"logps/rejected": -1210.970458984375,
|
|
"loss": 1.0522,
|
|
"margin_dpo/margin_mean": 497.97161865234375,
|
|
"margin_dpo/margin_std": 613.9585571289062,
|
|
"step": 675
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -707.06591796875,
|
|
"KL/mean": -924.9794921875,
|
|
"KL/rejected_KL_mean": -1142.89306640625,
|
|
"KL/std": 531.7125244140625,
|
|
"epoch": 0.9926578560939795,
|
|
"fcm_dpo/beta": 0.0009498898871243,
|
|
"fcm_dpo/delta": -0.01460132747888565,
|
|
"fcm_dpo/margin": 435.82708740234375,
|
|
"fcm_dpo/q_t": 0.4043177366256714,
|
|
"grad_norm": 30.11288833618164,
|
|
"learning_rate": 1.1857007165852472e-10,
|
|
"logits/chosen": -0.9417062997817993,
|
|
"logits/rejected": -0.9432613849639893,
|
|
"logps/chosen": -778.4644775390625,
|
|
"logps/ref_chosen": -71.39852905273438,
|
|
"logps/ref_rejected": -88.3587646484375,
|
|
"logps/rejected": -1231.2518310546875,
|
|
"loss": 1.074,
|
|
"margin_dpo/margin_mean": 435.8271179199219,
|
|
"margin_dpo/margin_std": 524.7921142578125,
|
|
"step": 676
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -676.025634765625,
|
|
"KL/mean": -884.4794921875,
|
|
"KL/rejected_KL_mean": -1092.933349609375,
|
|
"KL/std": 490.99835205078125,
|
|
"epoch": 0.9941262848751835,
|
|
"fcm_dpo/beta": 0.0009504948975518346,
|
|
"fcm_dpo/delta": 0.0038806493394076824,
|
|
"fcm_dpo/margin": 416.90771484375,
|
|
"fcm_dpo/q_t": 0.4103269577026367,
|
|
"grad_norm": 33.28916931152344,
|
|
"learning_rate": 8.23423165278725e-11,
|
|
"logits/chosen": -1.00029718875885,
|
|
"logits/rejected": -0.9821897745132446,
|
|
"logps/chosen": -732.5531005859375,
|
|
"logps/ref_chosen": -56.527435302734375,
|
|
"logps/ref_rejected": -78.22654724121094,
|
|
"logps/rejected": -1171.159912109375,
|
|
"loss": 1.0959,
|
|
"margin_dpo/margin_mean": 416.90771484375,
|
|
"margin_dpo/margin_std": 574.4068603515625,
|
|
"step": 677
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -570.7870483398438,
|
|
"KL/mean": -824.8927612304688,
|
|
"KL/rejected_KL_mean": -1078.99853515625,
|
|
"KL/std": 591.4881591796875,
|
|
"epoch": 0.9955947136563876,
|
|
"fcm_dpo/beta": 0.00094210309907794,
|
|
"fcm_dpo/delta": -0.08271745592355728,
|
|
"fcm_dpo/margin": 508.2114562988281,
|
|
"fcm_dpo/q_t": 0.39141643047332764,
|
|
"grad_norm": 30.350744247436523,
|
|
"learning_rate": 5.270012410216185e-11,
|
|
"logits/chosen": -0.9413450956344604,
|
|
"logits/rejected": -0.971659243106842,
|
|
"logps/chosen": -616.9215087890625,
|
|
"logps/ref_chosen": -46.13447570800781,
|
|
"logps/ref_rejected": -80.60462951660156,
|
|
"logps/rejected": -1159.6031494140625,
|
|
"loss": 1.0481,
|
|
"margin_dpo/margin_mean": 508.21142578125,
|
|
"margin_dpo/margin_std": 649.2615966796875,
|
|
"step": 678
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -671.6044921875,
|
|
"KL/mean": -847.1055908203125,
|
|
"KL/rejected_KL_mean": -1022.606689453125,
|
|
"KL/std": 466.68133544921875,
|
|
"epoch": 0.9970631424375918,
|
|
"fcm_dpo/beta": 0.0009439511341042817,
|
|
"fcm_dpo/delta": 0.07106737792491913,
|
|
"fcm_dpo/margin": 351.002197265625,
|
|
"fcm_dpo/q_t": 0.42417770624160767,
|
|
"grad_norm": 31.743593215942383,
|
|
"learning_rate": 2.9644275480772416e-11,
|
|
"logits/chosen": -0.9747291803359985,
|
|
"logits/rejected": -0.9602512717247009,
|
|
"logps/chosen": -721.8994140625,
|
|
"logps/ref_chosen": -50.294921875,
|
|
"logps/ref_rejected": -76.59813690185547,
|
|
"logps/rejected": -1099.204833984375,
|
|
"loss": 1.1447,
|
|
"margin_dpo/margin_mean": 351.002197265625,
|
|
"margin_dpo/margin_std": 545.5799560546875,
|
|
"step": 679
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -670.5235595703125,
|
|
"KL/mean": -899.030517578125,
|
|
"KL/rejected_KL_mean": -1127.53759765625,
|
|
"KL/std": 594.6282958984375,
|
|
"epoch": 0.9985315712187959,
|
|
"fcm_dpo/beta": 0.0009394378867000341,
|
|
"fcm_dpo/delta": -0.031213950365781784,
|
|
"fcm_dpo/margin": 457.013916015625,
|
|
"fcm_dpo/q_t": 0.4019904136657715,
|
|
"grad_norm": 32.0710334777832,
|
|
"learning_rate": 1.31753782067201e-11,
|
|
"logits/chosen": -0.9574640989303589,
|
|
"logits/rejected": -0.9788249731063843,
|
|
"logps/chosen": -747.4393310546875,
|
|
"logps/ref_chosen": -76.91569519042969,
|
|
"logps/ref_rejected": -112.384765625,
|
|
"logps/rejected": -1239.92236328125,
|
|
"loss": 1.097,
|
|
"margin_dpo/margin_mean": 457.013916015625,
|
|
"margin_dpo/margin_std": 679.540283203125,
|
|
"step": 680
|
|
},
|
|
{
|
|
"KL/chosen_KL_mean": -644.1123046875,
|
|
"KL/mean": -826.009765625,
|
|
"KL/rejected_KL_mean": -1007.9073486328125,
|
|
"KL/std": 493.124755859375,
|
|
"epoch": 1.0,
|
|
"fcm_dpo/beta": 0.0009560231701470912,
|
|
"fcm_dpo/delta": 0.05282256752252579,
|
|
"fcm_dpo/margin": 363.79510498046875,
|
|
"fcm_dpo/q_t": 0.42025691270828247,
|
|
"grad_norm": 24.496997833251953,
|
|
"learning_rate": 3.2938662507808745e-12,
|
|
"logits/chosen": -1.0301257371902466,
|
|
"logits/rejected": -1.0402805805206299,
|
|
"logps/chosen": -705.069580078125,
|
|
"logps/ref_chosen": -60.957279205322266,
|
|
"logps/ref_rejected": -88.55797576904297,
|
|
"logps/rejected": -1096.46533203125,
|
|
"loss": 1.1386,
|
|
"margin_dpo/margin_mean": 363.79510498046875,
|
|
"margin_dpo/margin_std": 546.2330932617188,
|
|
"step": 681
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"step": 681,
|
|
"total_flos": 0.0,
|
|
"train_loss": 1.126299982641587,
|
|
"train_runtime": 1736.7793,
|
|
"train_samples_per_second": 25.103,
|
|
"train_steps_per_second": 0.392
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 681,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 50,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": false,
|
|
"should_training_stop": false
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 8,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|