3323 lines
113 KiB
JSON
3323 lines
113 KiB
JSON
{
|
|
"best_metric": 0.675000011920929,
|
|
"best_model_checkpoint": "./outputs/tinyllama-1.1b-dpo-pku-saferlhf/checkpoint-1200",
|
|
"epoch": 0.9997600191984641,
|
|
"eval_steps": 200,
|
|
"global_step": 2083,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.004799616030717543,
|
|
"grad_norm": 57.0,
|
|
"learning_rate": 2.3923444976076555e-08,
|
|
"logits/chosen": -2.688718318939209,
|
|
"logits/rejected": -2.5538744926452637,
|
|
"logps/chosen": -212.6398162841797,
|
|
"logps/rejected": -186.61505126953125,
|
|
"loss": 0.6966,
|
|
"rewards/accuracies": 0.35624998807907104,
|
|
"rewards/chosen": -0.005152043420821428,
|
|
"rewards/margins": -0.006588623858988285,
|
|
"rewards/rejected": 0.0014365792740136385,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.009599232061435085,
|
|
"grad_norm": 59.75,
|
|
"learning_rate": 4.784688995215311e-08,
|
|
"logits/chosen": -2.728940486907959,
|
|
"logits/rejected": -2.616565227508545,
|
|
"logps/chosen": -223.5636749267578,
|
|
"logps/rejected": -203.41867065429688,
|
|
"loss": 0.6899,
|
|
"rewards/accuracies": 0.5874999761581421,
|
|
"rewards/chosen": 0.002993463072925806,
|
|
"rewards/margins": 0.006973244249820709,
|
|
"rewards/rejected": -0.003979781176894903,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.014398848092152628,
|
|
"grad_norm": 50.75,
|
|
"learning_rate": 7.177033492822967e-08,
|
|
"logits/chosen": -2.716870069503784,
|
|
"logits/rejected": -2.6400887966156006,
|
|
"logps/chosen": -237.99618530273438,
|
|
"logps/rejected": -219.1649627685547,
|
|
"loss": 0.6938,
|
|
"rewards/accuracies": 0.40625,
|
|
"rewards/chosen": -0.0003619740600697696,
|
|
"rewards/margins": -0.0006011867080815136,
|
|
"rewards/rejected": 0.00023921244428493083,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.01919846412287017,
|
|
"grad_norm": 57.25,
|
|
"learning_rate": 9.569377990430622e-08,
|
|
"logits/chosen": -2.7444612979888916,
|
|
"logits/rejected": -2.5919036865234375,
|
|
"logps/chosen": -251.178466796875,
|
|
"logps/rejected": -196.35256958007812,
|
|
"loss": 0.6926,
|
|
"rewards/accuracies": 0.574999988079071,
|
|
"rewards/chosen": -0.0019455172587186098,
|
|
"rewards/margins": 0.001769614638760686,
|
|
"rewards/rejected": -0.0037151314318180084,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.023998080153587713,
|
|
"grad_norm": 48.25,
|
|
"learning_rate": 1.1961722488038278e-07,
|
|
"logits/chosen": -2.6667444705963135,
|
|
"logits/rejected": -2.6048452854156494,
|
|
"logps/chosen": -234.8287353515625,
|
|
"logps/rejected": -199.68902587890625,
|
|
"loss": 0.6949,
|
|
"rewards/accuracies": 0.45625001192092896,
|
|
"rewards/chosen": -0.0009633477893657982,
|
|
"rewards/margins": -0.002945653162896633,
|
|
"rewards/rejected": 0.001982305431738496,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.028797696184305256,
|
|
"grad_norm": 63.5,
|
|
"learning_rate": 1.4354066985645933e-07,
|
|
"logits/chosen": -2.705540180206299,
|
|
"logits/rejected": -2.599553346633911,
|
|
"logps/chosen": -223.2880859375,
|
|
"logps/rejected": -215.03759765625,
|
|
"loss": 0.6932,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.002534763887524605,
|
|
"rewards/margins": 0.00042680976912379265,
|
|
"rewards/rejected": -0.002961573889479041,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.033597312215022795,
|
|
"grad_norm": 51.25,
|
|
"learning_rate": 1.6746411483253589e-07,
|
|
"logits/chosen": -2.726954698562622,
|
|
"logits/rejected": -2.5858073234558105,
|
|
"logps/chosen": -245.69790649414062,
|
|
"logps/rejected": -205.90469360351562,
|
|
"loss": 0.6925,
|
|
"rewards/accuracies": 0.48124998807907104,
|
|
"rewards/chosen": -0.001567687257193029,
|
|
"rewards/margins": 0.0018620832124724984,
|
|
"rewards/rejected": -0.0034297697711735964,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.03839692824574034,
|
|
"grad_norm": 52.25,
|
|
"learning_rate": 1.9138755980861244e-07,
|
|
"logits/chosen": -2.72399640083313,
|
|
"logits/rejected": -2.6148579120635986,
|
|
"logps/chosen": -235.31991577148438,
|
|
"logps/rejected": -201.12049865722656,
|
|
"loss": 0.6933,
|
|
"rewards/accuracies": 0.518750011920929,
|
|
"rewards/chosen": 0.0001770513626979664,
|
|
"rewards/margins": 0.0003844931779894978,
|
|
"rewards/rejected": -0.00020744054927490652,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.04319654427645788,
|
|
"grad_norm": 53.0,
|
|
"learning_rate": 2.15311004784689e-07,
|
|
"logits/chosen": -2.7387874126434326,
|
|
"logits/rejected": -2.5575790405273438,
|
|
"logps/chosen": -251.2541961669922,
|
|
"logps/rejected": -189.22152709960938,
|
|
"loss": 0.6945,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.001186860492452979,
|
|
"rewards/margins": -0.002072554547339678,
|
|
"rewards/rejected": 0.0008856941130943596,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.04799616030717543,
|
|
"grad_norm": 50.25,
|
|
"learning_rate": 2.3923444976076555e-07,
|
|
"logits/chosen": -2.707674503326416,
|
|
"logits/rejected": -2.5784828662872314,
|
|
"logps/chosen": -228.6029510498047,
|
|
"logps/rejected": -206.3776092529297,
|
|
"loss": 0.6919,
|
|
"rewards/accuracies": 0.543749988079071,
|
|
"rewards/chosen": 0.0017306295922026038,
|
|
"rewards/margins": 0.0031413964461535215,
|
|
"rewards/rejected": -0.001410767319612205,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.052795776337892966,
|
|
"grad_norm": 52.25,
|
|
"learning_rate": 2.631578947368421e-07,
|
|
"logits/chosen": -2.7312660217285156,
|
|
"logits/rejected": -2.5520169734954834,
|
|
"logps/chosen": -237.2713165283203,
|
|
"logps/rejected": -178.83563232421875,
|
|
"loss": 0.6954,
|
|
"rewards/accuracies": 0.45625001192092896,
|
|
"rewards/chosen": 8.01489659352228e-05,
|
|
"rewards/margins": -0.004063433036208153,
|
|
"rewards/rejected": 0.004143581725656986,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.05759539236861051,
|
|
"grad_norm": 54.75,
|
|
"learning_rate": 2.8708133971291866e-07,
|
|
"logits/chosen": -2.7264270782470703,
|
|
"logits/rejected": -2.602839946746826,
|
|
"logps/chosen": -234.1689910888672,
|
|
"logps/rejected": -204.75241088867188,
|
|
"loss": 0.6945,
|
|
"rewards/accuracies": 0.4749999940395355,
|
|
"rewards/chosen": -0.0038609784096479416,
|
|
"rewards/margins": -0.0021540005691349506,
|
|
"rewards/rejected": -0.0017069776076823473,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.06239500839932805,
|
|
"grad_norm": 60.25,
|
|
"learning_rate": 3.110047846889952e-07,
|
|
"logits/chosen": -2.71527361869812,
|
|
"logits/rejected": -2.5875349044799805,
|
|
"logps/chosen": -252.4326934814453,
|
|
"logps/rejected": -210.2029266357422,
|
|
"loss": 0.6909,
|
|
"rewards/accuracies": 0.543749988079071,
|
|
"rewards/chosen": 0.0007449972326867282,
|
|
"rewards/margins": 0.005101869348436594,
|
|
"rewards/rejected": -0.004356871824711561,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.06719462443004559,
|
|
"grad_norm": 49.75,
|
|
"learning_rate": 3.3492822966507177e-07,
|
|
"logits/chosen": -2.6849746704101562,
|
|
"logits/rejected": -2.6195476055145264,
|
|
"logps/chosen": -235.63906860351562,
|
|
"logps/rejected": -218.46603393554688,
|
|
"loss": 0.6935,
|
|
"rewards/accuracies": 0.512499988079071,
|
|
"rewards/chosen": 0.0014313453575596213,
|
|
"rewards/margins": -0.00015073138638399541,
|
|
"rewards/rejected": 0.0015820765402168036,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.07199424046076314,
|
|
"grad_norm": 51.25,
|
|
"learning_rate": 3.588516746411483e-07,
|
|
"logits/chosen": -2.702357769012451,
|
|
"logits/rejected": -2.6205286979675293,
|
|
"logps/chosen": -232.359619140625,
|
|
"logps/rejected": -245.7252197265625,
|
|
"loss": 0.6947,
|
|
"rewards/accuracies": 0.48124998807907104,
|
|
"rewards/chosen": 0.002868877723813057,
|
|
"rewards/margins": -0.0025208499282598495,
|
|
"rewards/rejected": 0.005389728117734194,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.07679385649148068,
|
|
"grad_norm": 69.0,
|
|
"learning_rate": 3.827751196172249e-07,
|
|
"logits/chosen": -2.6653263568878174,
|
|
"logits/rejected": -2.5433461666107178,
|
|
"logps/chosen": -243.0439453125,
|
|
"logps/rejected": -200.38685607910156,
|
|
"loss": 0.6959,
|
|
"rewards/accuracies": 0.48124998807907104,
|
|
"rewards/chosen": -0.004858389962464571,
|
|
"rewards/margins": -0.004746051970869303,
|
|
"rewards/rejected": -0.00011233799159526825,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.08159347252219823,
|
|
"grad_norm": 54.0,
|
|
"learning_rate": 4.066985645933014e-07,
|
|
"logits/chosen": -2.7417194843292236,
|
|
"logits/rejected": -2.5782225131988525,
|
|
"logps/chosen": -256.54278564453125,
|
|
"logps/rejected": -199.0166473388672,
|
|
"loss": 0.6918,
|
|
"rewards/accuracies": 0.543749988079071,
|
|
"rewards/chosen": 0.0022998370695859194,
|
|
"rewards/margins": 0.003148593008518219,
|
|
"rewards/rejected": -0.0008487561717629433,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.08639308855291576,
|
|
"grad_norm": 53.5,
|
|
"learning_rate": 4.30622009569378e-07,
|
|
"logits/chosen": -2.7641491889953613,
|
|
"logits/rejected": -2.6625149250030518,
|
|
"logps/chosen": -242.5579071044922,
|
|
"logps/rejected": -201.333251953125,
|
|
"loss": 0.6908,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": -0.000439296942204237,
|
|
"rewards/margins": 0.00535095389932394,
|
|
"rewards/rejected": -0.005790251307189465,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.09119270458363331,
|
|
"grad_norm": 56.0,
|
|
"learning_rate": 4.545454545454545e-07,
|
|
"logits/chosen": -2.691822052001953,
|
|
"logits/rejected": -2.587761640548706,
|
|
"logps/chosen": -240.88916015625,
|
|
"logps/rejected": -198.99119567871094,
|
|
"loss": 0.6915,
|
|
"rewards/accuracies": 0.518750011920929,
|
|
"rewards/chosen": 0.002601384650915861,
|
|
"rewards/margins": 0.003762087319046259,
|
|
"rewards/rejected": -0.0011607027845457196,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.09599232061435085,
|
|
"grad_norm": 45.25,
|
|
"learning_rate": 4.784688995215311e-07,
|
|
"logits/chosen": -2.7268319129943848,
|
|
"logits/rejected": -2.6072897911071777,
|
|
"logps/chosen": -228.60128784179688,
|
|
"logps/rejected": -190.16201782226562,
|
|
"loss": 0.6899,
|
|
"rewards/accuracies": 0.574999988079071,
|
|
"rewards/chosen": 0.0014531847555190325,
|
|
"rewards/margins": 0.00695295725017786,
|
|
"rewards/rejected": -0.005499773193150759,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.09599232061435085,
|
|
"eval_logits/chosen": -2.7156012058258057,
|
|
"eval_logits/rejected": -2.595405340194702,
|
|
"eval_logps/chosen": -233.07533264160156,
|
|
"eval_logps/rejected": -205.5474853515625,
|
|
"eval_loss": 0.6897569298744202,
|
|
"eval_rewards/accuracies": 0.5680000185966492,
|
|
"eval_rewards/chosen": 0.005765980575233698,
|
|
"eval_rewards/margins": 0.007436447311192751,
|
|
"eval_rewards/rejected": -0.0016704658046364784,
|
|
"eval_runtime": 21.4199,
|
|
"eval_samples_per_second": 46.686,
|
|
"eval_steps_per_second": 11.671,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.1007919366450684,
|
|
"grad_norm": 50.0,
|
|
"learning_rate": 4.999996487062011e-07,
|
|
"logits/chosen": -2.6748883724212646,
|
|
"logits/rejected": -2.5881714820861816,
|
|
"logps/chosen": -242.6162872314453,
|
|
"logps/rejected": -214.3368377685547,
|
|
"loss": 0.6931,
|
|
"rewards/accuracies": 0.4937500059604645,
|
|
"rewards/chosen": 0.0026280046440660954,
|
|
"rewards/margins": 0.000607747002504766,
|
|
"rewards/rejected": 0.002020257292315364,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.10559155267578593,
|
|
"grad_norm": 49.5,
|
|
"learning_rate": 4.999574946449064e-07,
|
|
"logits/chosen": -2.7096612453460693,
|
|
"logits/rejected": -2.5823137760162354,
|
|
"logps/chosen": -226.0032196044922,
|
|
"logps/rejected": -185.87234497070312,
|
|
"loss": 0.6908,
|
|
"rewards/accuracies": 0.5375000238418579,
|
|
"rewards/chosen": 0.007502657826989889,
|
|
"rewards/margins": 0.005469072610139847,
|
|
"rewards/rejected": 0.002033584751188755,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.11039116870650348,
|
|
"grad_norm": 53.25,
|
|
"learning_rate": 4.998450953980164e-07,
|
|
"logits/chosen": -2.674795389175415,
|
|
"logits/rejected": -2.562544345855713,
|
|
"logps/chosen": -231.25247192382812,
|
|
"logps/rejected": -223.4167938232422,
|
|
"loss": 0.6922,
|
|
"rewards/accuracies": 0.5249999761581421,
|
|
"rewards/chosen": 0.005688765086233616,
|
|
"rewards/margins": 0.0028063193894922733,
|
|
"rewards/rejected": 0.0028824463952332735,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.11519078473722102,
|
|
"grad_norm": 49.5,
|
|
"learning_rate": 4.996624825529257e-07,
|
|
"logits/chosen": -2.752612590789795,
|
|
"logits/rejected": -2.641317367553711,
|
|
"logps/chosen": -216.5712432861328,
|
|
"logps/rejected": -192.3323211669922,
|
|
"loss": 0.69,
|
|
"rewards/accuracies": 0.53125,
|
|
"rewards/chosen": 0.01013671699911356,
|
|
"rewards/margins": 0.006918230559676886,
|
|
"rewards/rejected": 0.003218486439436674,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.11999040076793857,
|
|
"grad_norm": 51.5,
|
|
"learning_rate": 4.994097074290524e-07,
|
|
"logits/chosen": -2.7131876945495605,
|
|
"logits/rejected": -2.591782331466675,
|
|
"logps/chosen": -228.76925659179688,
|
|
"logps/rejected": -200.34799194335938,
|
|
"loss": 0.6902,
|
|
"rewards/accuracies": 0.550000011920929,
|
|
"rewards/chosen": 0.00667478796094656,
|
|
"rewards/margins": 0.00645422050729394,
|
|
"rewards/rejected": 0.00022056761372368783,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.1247900167986561,
|
|
"grad_norm": 53.0,
|
|
"learning_rate": 4.990868410634162e-07,
|
|
"logits/chosen": -2.7187414169311523,
|
|
"logits/rejected": -2.6327602863311768,
|
|
"logps/chosen": -225.66043090820312,
|
|
"logps/rejected": -192.34942626953125,
|
|
"loss": 0.6881,
|
|
"rewards/accuracies": 0.574999988079071,
|
|
"rewards/chosen": 0.009334566071629524,
|
|
"rewards/margins": 0.010765586979687214,
|
|
"rewards/rejected": -0.0014310205588117242,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.12958963282937366,
|
|
"grad_norm": 55.0,
|
|
"learning_rate": 4.986939741906753e-07,
|
|
"logits/chosen": -2.7310328483581543,
|
|
"logits/rejected": -2.6244540214538574,
|
|
"logps/chosen": -214.0838623046875,
|
|
"logps/rejected": -191.58615112304688,
|
|
"loss": 0.685,
|
|
"rewards/accuracies": 0.637499988079071,
|
|
"rewards/chosen": 0.012120475992560387,
|
|
"rewards/margins": 0.016924794763326645,
|
|
"rewards/rejected": -0.004804318305104971,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.13438924886009118,
|
|
"grad_norm": 61.75,
|
|
"learning_rate": 4.982312172176264e-07,
|
|
"logits/chosen": -2.7920923233032227,
|
|
"logits/rejected": -2.5907645225524902,
|
|
"logps/chosen": -273.3122253417969,
|
|
"logps/rejected": -205.17733764648438,
|
|
"loss": 0.6891,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": 0.02231917902827263,
|
|
"rewards/margins": 0.008881422691047192,
|
|
"rewards/rejected": 0.013437752611935139,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.13918886489080873,
|
|
"grad_norm": 57.25,
|
|
"learning_rate": 4.976987001921786e-07,
|
|
"logits/chosen": -2.710538625717163,
|
|
"logits/rejected": -2.5878021717071533,
|
|
"logps/chosen": -235.57437133789062,
|
|
"logps/rejected": -204.8675537109375,
|
|
"loss": 0.6868,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": 0.013490339741110802,
|
|
"rewards/margins": 0.01348956674337387,
|
|
"rewards/rejected": 7.734633982181549e-07,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.14398848092152627,
|
|
"grad_norm": 52.75,
|
|
"learning_rate": 4.97096572766805e-07,
|
|
"logits/chosen": -2.727212905883789,
|
|
"logits/rejected": -2.582718849182129,
|
|
"logps/chosen": -240.856201171875,
|
|
"logps/rejected": -190.96470642089844,
|
|
"loss": 0.6864,
|
|
"rewards/accuracies": 0.5687500238418579,
|
|
"rewards/chosen": 0.01539058517664671,
|
|
"rewards/margins": 0.014497148804366589,
|
|
"rewards/rejected": 0.0008934367215260863,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.14878809695224382,
|
|
"grad_norm": 49.0,
|
|
"learning_rate": 4.964250041564868e-07,
|
|
"logits/chosen": -2.7062602043151855,
|
|
"logits/rejected": -2.5759198665618896,
|
|
"logps/chosen": -232.9503173828125,
|
|
"logps/rejected": -198.68533325195312,
|
|
"loss": 0.6863,
|
|
"rewards/accuracies": 0.6000000238418579,
|
|
"rewards/chosen": 0.010409911163151264,
|
|
"rewards/margins": 0.01460896898061037,
|
|
"rewards/rejected": -0.0041990578174591064,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.15358771298296137,
|
|
"grad_norm": 48.0,
|
|
"learning_rate": 4.956841830911587e-07,
|
|
"logits/chosen": -2.688969850540161,
|
|
"logits/rejected": -2.5635781288146973,
|
|
"logps/chosen": -244.30337524414062,
|
|
"logps/rejected": -198.82345581054688,
|
|
"loss": 0.6871,
|
|
"rewards/accuracies": 0.606249988079071,
|
|
"rewards/chosen": 0.01625187136232853,
|
|
"rewards/margins": 0.013046267442405224,
|
|
"rewards/rejected": 0.0032056006602942944,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.1583873290136789,
|
|
"grad_norm": 56.25,
|
|
"learning_rate": 4.948743177626708e-07,
|
|
"logits/chosen": -2.708862543106079,
|
|
"logits/rejected": -2.5964908599853516,
|
|
"logps/chosen": -218.1621856689453,
|
|
"logps/rejected": -196.46609497070312,
|
|
"loss": 0.691,
|
|
"rewards/accuracies": 0.5562499761581421,
|
|
"rewards/chosen": 0.015645433217287064,
|
|
"rewards/margins": 0.005278537981212139,
|
|
"rewards/rejected": 0.010366896167397499,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.16318694504439646,
|
|
"grad_norm": 53.5,
|
|
"learning_rate": 4.939956357662805e-07,
|
|
"logits/chosen": -2.664097309112549,
|
|
"logits/rejected": -2.504223108291626,
|
|
"logps/chosen": -233.2242431640625,
|
|
"logps/rejected": -178.89642333984375,
|
|
"loss": 0.6869,
|
|
"rewards/accuracies": 0.6000000238418579,
|
|
"rewards/chosen": 0.010349711403250694,
|
|
"rewards/margins": 0.013576941564679146,
|
|
"rewards/rejected": -0.0032272294629365206,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.16798656107511398,
|
|
"grad_norm": 53.25,
|
|
"learning_rate": 4.930483840366915e-07,
|
|
"logits/chosen": -2.6505606174468994,
|
|
"logits/rejected": -2.508861541748047,
|
|
"logps/chosen": -253.9485321044922,
|
|
"logps/rejected": -195.91908264160156,
|
|
"loss": 0.6851,
|
|
"rewards/accuracies": 0.606249988079071,
|
|
"rewards/chosen": 0.024759415537118912,
|
|
"rewards/margins": 0.017002228647470474,
|
|
"rewards/rejected": 0.0077571868896484375,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.17278617710583152,
|
|
"grad_norm": 46.75,
|
|
"learning_rate": 4.920328287786586e-07,
|
|
"logits/chosen": -2.661841869354248,
|
|
"logits/rejected": -2.5565028190612793,
|
|
"logps/chosen": -229.78305053710938,
|
|
"logps/rejected": -194.48904418945312,
|
|
"loss": 0.6866,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": 0.021085303276777267,
|
|
"rewards/margins": 0.014185063540935516,
|
|
"rewards/rejected": 0.006900241132825613,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.17758579313654907,
|
|
"grad_norm": 45.5,
|
|
"learning_rate": 4.90949255392176e-07,
|
|
"logits/chosen": -2.70994234085083,
|
|
"logits/rejected": -2.547222852706909,
|
|
"logps/chosen": -244.82565307617188,
|
|
"logps/rejected": -202.6025390625,
|
|
"loss": 0.6828,
|
|
"rewards/accuracies": 0.6625000238418579,
|
|
"rewards/chosen": 0.022651832550764084,
|
|
"rewards/margins": 0.021922901272773743,
|
|
"rewards/rejected": 0.0007289334898814559,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.18238540916726662,
|
|
"grad_norm": 48.75,
|
|
"learning_rate": 4.897979683922727e-07,
|
|
"logits/chosen": -2.733055591583252,
|
|
"logits/rejected": -2.6267523765563965,
|
|
"logps/chosen": -218.7659149169922,
|
|
"logps/rejected": -180.62173461914062,
|
|
"loss": 0.6845,
|
|
"rewards/accuracies": 0.5562499761581421,
|
|
"rewards/chosen": 0.01779359206557274,
|
|
"rewards/margins": 0.01852073147892952,
|
|
"rewards/rejected": -0.0007271372596733272,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.18718502519798416,
|
|
"grad_norm": 48.75,
|
|
"learning_rate": 4.885792913234339e-07,
|
|
"logits/chosen": -2.6706607341766357,
|
|
"logits/rejected": -2.6117610931396484,
|
|
"logps/chosen": -223.50021362304688,
|
|
"logps/rejected": -208.219482421875,
|
|
"loss": 0.6879,
|
|
"rewards/accuracies": 0.59375,
|
|
"rewards/chosen": 0.025855297222733498,
|
|
"rewards/margins": 0.011695639230310917,
|
|
"rewards/rejected": 0.014159657061100006,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.1919846412287017,
|
|
"grad_norm": 49.5,
|
|
"learning_rate": 4.872935666686766e-07,
|
|
"logits/chosen": -2.6978952884674072,
|
|
"logits/rejected": -2.5849671363830566,
|
|
"logps/chosen": -233.3878631591797,
|
|
"logps/rejected": -212.00723266601562,
|
|
"loss": 0.6868,
|
|
"rewards/accuracies": 0.59375,
|
|
"rewards/chosen": 0.025096680968999863,
|
|
"rewards/margins": 0.013743218965828419,
|
|
"rewards/rejected": 0.01135346107184887,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.1919846412287017,
|
|
"eval_logits/chosen": -2.7157444953918457,
|
|
"eval_logits/rejected": -2.595482587814331,
|
|
"eval_logps/chosen": -232.87948608398438,
|
|
"eval_logps/rejected": -205.45095825195312,
|
|
"eval_loss": 0.6850579977035522,
|
|
"eval_rewards/accuracies": 0.6299999952316284,
|
|
"eval_rewards/chosen": 0.025349698960781097,
|
|
"eval_rewards/margins": 0.017367491498589516,
|
|
"eval_rewards/rejected": 0.007982207462191582,
|
|
"eval_runtime": 21.4159,
|
|
"eval_samples_per_second": 46.694,
|
|
"eval_steps_per_second": 11.674,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.19678425725941925,
|
|
"grad_norm": 50.75,
|
|
"learning_rate": 4.859411557533018e-07,
|
|
"logits/chosen": -2.7110087871551514,
|
|
"logits/rejected": -2.599547863006592,
|
|
"logps/chosen": -229.89578247070312,
|
|
"logps/rejected": -196.5421905517578,
|
|
"loss": 0.685,
|
|
"rewards/accuracies": 0.5687500238418579,
|
|
"rewards/chosen": 0.023681003600358963,
|
|
"rewards/margins": 0.01766652800142765,
|
|
"rewards/rejected": 0.006014474667608738,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.2015838732901368,
|
|
"grad_norm": 47.75,
|
|
"learning_rate": 4.845224386433521e-07,
|
|
"logits/chosen": -2.6937224864959717,
|
|
"logits/rejected": -2.616425037384033,
|
|
"logps/chosen": -207.7985382080078,
|
|
"logps/rejected": -210.020751953125,
|
|
"loss": 0.6884,
|
|
"rewards/accuracies": 0.512499988079071,
|
|
"rewards/chosen": 0.02355768159031868,
|
|
"rewards/margins": 0.01056084968149662,
|
|
"rewards/rejected": 0.012996832840144634,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.20638348932085432,
|
|
"grad_norm": 42.75,
|
|
"learning_rate": 4.830378140388015e-07,
|
|
"logits/chosen": -2.802743434906006,
|
|
"logits/rejected": -2.6532254219055176,
|
|
"logps/chosen": -238.71115112304688,
|
|
"logps/rejected": -192.9730987548828,
|
|
"loss": 0.6816,
|
|
"rewards/accuracies": 0.668749988079071,
|
|
"rewards/chosen": 0.025352749973535538,
|
|
"rewards/margins": 0.02443523518741131,
|
|
"rewards/rejected": 0.0009175121667794883,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.21118310535157186,
|
|
"grad_norm": 52.5,
|
|
"learning_rate": 4.814876991615104e-07,
|
|
"logits/chosen": -2.682868719100952,
|
|
"logits/rejected": -2.5881507396698,
|
|
"logps/chosen": -226.88131713867188,
|
|
"logps/rejected": -197.5751495361328,
|
|
"loss": 0.6858,
|
|
"rewards/accuracies": 0.5687500238418579,
|
|
"rewards/chosen": 0.024431098252534866,
|
|
"rewards/margins": 0.016206270083785057,
|
|
"rewards/rejected": 0.008224830962717533,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.2159827213822894,
|
|
"grad_norm": 55.0,
|
|
"learning_rate": 4.798725296379735e-07,
|
|
"logits/chosen": -2.711108684539795,
|
|
"logits/rejected": -2.626420497894287,
|
|
"logps/chosen": -221.24533081054688,
|
|
"logps/rejected": -193.32937622070312,
|
|
"loss": 0.6844,
|
|
"rewards/accuracies": 0.6187499761581421,
|
|
"rewards/chosen": 0.031023338437080383,
|
|
"rewards/margins": 0.01875515654683113,
|
|
"rewards/rejected": 0.012268180958926678,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.22078233741300696,
|
|
"grad_norm": 48.5,
|
|
"learning_rate": 4.781927593768969e-07,
|
|
"logits/chosen": -2.7570741176605225,
|
|
"logits/rejected": -2.6272220611572266,
|
|
"logps/chosen": -232.4735107421875,
|
|
"logps/rejected": -199.38890075683594,
|
|
"loss": 0.6839,
|
|
"rewards/accuracies": 0.5874999761581421,
|
|
"rewards/chosen": 0.026766937226057053,
|
|
"rewards/margins": 0.019803114235401154,
|
|
"rewards/rejected": 0.006963823921978474,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.2255819534437245,
|
|
"grad_norm": 46.5,
|
|
"learning_rate": 4.764488604416364e-07,
|
|
"logits/chosen": -2.7485814094543457,
|
|
"logits/rejected": -2.579071044921875,
|
|
"logps/chosen": -257.55096435546875,
|
|
"logps/rejected": -215.48782348632812,
|
|
"loss": 0.6799,
|
|
"rewards/accuracies": 0.637499988079071,
|
|
"rewards/chosen": 0.03846278041601181,
|
|
"rewards/margins": 0.02814416028559208,
|
|
"rewards/rejected": 0.010318620130419731,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.23038156947444205,
|
|
"grad_norm": 49.5,
|
|
"learning_rate": 4.7464132291753457e-07,
|
|
"logits/chosen": -2.693459987640381,
|
|
"logits/rejected": -2.601459503173828,
|
|
"logps/chosen": -213.8591766357422,
|
|
"logps/rejected": -188.70779418945312,
|
|
"loss": 0.6871,
|
|
"rewards/accuracies": 0.5687500238418579,
|
|
"rewards/chosen": 0.02118738368153572,
|
|
"rewards/margins": 0.013606322929263115,
|
|
"rewards/rejected": 0.007581062614917755,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.2351811855051596,
|
|
"grad_norm": 50.0,
|
|
"learning_rate": 4.7277065477419236e-07,
|
|
"logits/chosen": -2.6836752891540527,
|
|
"logits/rejected": -2.5498709678649902,
|
|
"logps/chosen": -229.8844451904297,
|
|
"logps/rejected": -181.79513549804688,
|
|
"loss": 0.68,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": 0.02958494983613491,
|
|
"rewards/margins": 0.027864400297403336,
|
|
"rewards/rejected": 0.0017205558251589537,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.23998080153587714,
|
|
"grad_norm": 50.0,
|
|
"learning_rate": 4.7083738172271575e-07,
|
|
"logits/chosen": -2.6776702404022217,
|
|
"logits/rejected": -2.5478641986846924,
|
|
"logps/chosen": -241.678466796875,
|
|
"logps/rejected": -201.00245666503906,
|
|
"loss": 0.6835,
|
|
"rewards/accuracies": 0.6312500238418579,
|
|
"rewards/chosen": 0.03041999600827694,
|
|
"rewards/margins": 0.020813625305891037,
|
|
"rewards/rejected": 0.009606371633708477,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.24478041756659466,
|
|
"grad_norm": 47.5,
|
|
"learning_rate": 4.6884204706797537e-07,
|
|
"logits/chosen": -2.67305850982666,
|
|
"logits/rejected": -2.5395994186401367,
|
|
"logps/chosen": -246.15304565429688,
|
|
"logps/rejected": -191.6173553466797,
|
|
"loss": 0.6802,
|
|
"rewards/accuracies": 0.637499988079071,
|
|
"rewards/chosen": 0.03808588907122612,
|
|
"rewards/margins": 0.027713218703866005,
|
|
"rewards/rejected": 0.010372666642069817,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.2495800335973122,
|
|
"grad_norm": 56.25,
|
|
"learning_rate": 4.6678521155592266e-07,
|
|
"logits/chosen": -2.715430498123169,
|
|
"logits/rejected": -2.5766196250915527,
|
|
"logps/chosen": -257.3648681640625,
|
|
"logps/rejected": -218.2578125,
|
|
"loss": 0.6819,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": 0.03974943235516548,
|
|
"rewards/margins": 0.0246734581887722,
|
|
"rewards/rejected": 0.01507597416639328,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.2543796496280298,
|
|
"grad_norm": 42.75,
|
|
"learning_rate": 4.646674532160041e-07,
|
|
"logits/chosen": -2.7444615364074707,
|
|
"logits/rejected": -2.642268180847168,
|
|
"logps/chosen": -234.83529663085938,
|
|
"logps/rejected": -207.768798828125,
|
|
"loss": 0.6895,
|
|
"rewards/accuracies": 0.518750011920929,
|
|
"rewards/chosen": 0.02697952464222908,
|
|
"rewards/margins": 0.008664881810545921,
|
|
"rewards/rejected": 0.018314644694328308,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.2591792656587473,
|
|
"grad_norm": 52.5,
|
|
"learning_rate": 4.624893671987185e-07,
|
|
"logits/chosen": -2.710597515106201,
|
|
"logits/rejected": -2.618180513381958,
|
|
"logps/chosen": -220.21383666992188,
|
|
"logps/rejected": -187.06417846679688,
|
|
"loss": 0.6782,
|
|
"rewards/accuracies": 0.612500011920929,
|
|
"rewards/chosen": 0.030816808342933655,
|
|
"rewards/margins": 0.031680621206760406,
|
|
"rewards/rejected": -0.0008638119325041771,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.2639788816894648,
|
|
"grad_norm": 51.5,
|
|
"learning_rate": 4.602515656083629e-07,
|
|
"logits/chosen": -2.7750511169433594,
|
|
"logits/rejected": -2.689318895339966,
|
|
"logps/chosen": -234.1312255859375,
|
|
"logps/rejected": -218.6779327392578,
|
|
"loss": 0.682,
|
|
"rewards/accuracies": 0.612500011920929,
|
|
"rewards/chosen": 0.032838549464941025,
|
|
"rewards/margins": 0.02399415522813797,
|
|
"rewards/rejected": 0.008844394236803055,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.26877849772018236,
|
|
"grad_norm": 48.5,
|
|
"learning_rate": 4.5795467733101356e-07,
|
|
"logits/chosen": -2.716984510421753,
|
|
"logits/rejected": -2.536345958709717,
|
|
"logps/chosen": -238.9961395263672,
|
|
"logps/rejected": -209.2313690185547,
|
|
"loss": 0.6839,
|
|
"rewards/accuracies": 0.5687500238418579,
|
|
"rewards/chosen": 0.029773468151688576,
|
|
"rewards/margins": 0.020234117284417152,
|
|
"rewards/rejected": 0.009539352729916573,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.2735781137508999,
|
|
"grad_norm": 45.0,
|
|
"learning_rate": 4.555993478577911e-07,
|
|
"logits/chosen": -2.7671806812286377,
|
|
"logits/rejected": -2.5658230781555176,
|
|
"logps/chosen": -245.57693481445312,
|
|
"logps/rejected": -186.72381591796875,
|
|
"loss": 0.6743,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": 0.04541153460741043,
|
|
"rewards/margins": 0.03957425057888031,
|
|
"rewards/rejected": 0.0058372789062559605,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.27837772978161746,
|
|
"grad_norm": 46.75,
|
|
"learning_rate": 4.531862391034591e-07,
|
|
"logits/chosen": -2.6841483116149902,
|
|
"logits/rejected": -2.5884292125701904,
|
|
"logps/chosen": -234.3633270263672,
|
|
"logps/rejected": -198.0540008544922,
|
|
"loss": 0.6779,
|
|
"rewards/accuracies": 0.6812499761581421,
|
|
"rewards/chosen": 0.03814179450273514,
|
|
"rewards/margins": 0.03233181685209274,
|
|
"rewards/rejected": 0.005809984169900417,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.283177345812335,
|
|
"grad_norm": 48.5,
|
|
"learning_rate": 4.5071602922040734e-07,
|
|
"logits/chosen": -2.762327194213867,
|
|
"logits/rejected": -2.6249194145202637,
|
|
"logps/chosen": -237.1390380859375,
|
|
"logps/rejected": -201.12574768066406,
|
|
"loss": 0.6779,
|
|
"rewards/accuracies": 0.6312500238418579,
|
|
"rewards/chosen": 0.0438932366669178,
|
|
"rewards/margins": 0.032640643417835236,
|
|
"rewards/rejected": 0.011252591386437416,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.28797696184305255,
|
|
"grad_norm": 51.25,
|
|
"learning_rate": 4.4818941240807133e-07,
|
|
"logits/chosen": -2.751591920852661,
|
|
"logits/rejected": -2.6312174797058105,
|
|
"logps/chosen": -235.29855346679688,
|
|
"logps/rejected": -211.7731475830078,
|
|
"loss": 0.6741,
|
|
"rewards/accuracies": 0.6937500238418579,
|
|
"rewards/chosen": 0.043742720037698746,
|
|
"rewards/margins": 0.040415119379758835,
|
|
"rewards/rejected": 0.0033275973983108997,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.28797696184305255,
|
|
"eval_logits/chosen": -2.7159337997436523,
|
|
"eval_logits/rejected": -2.5955567359924316,
|
|
"eval_logps/chosen": -232.7647247314453,
|
|
"eval_logps/rejected": -205.43447875976562,
|
|
"eval_loss": 0.6805809140205383,
|
|
"eval_rewards/accuracies": 0.640999972820282,
|
|
"eval_rewards/chosen": 0.03682754188776016,
|
|
"eval_rewards/margins": 0.02719729021191597,
|
|
"eval_rewards/rejected": 0.009630252607166767,
|
|
"eval_runtime": 21.3954,
|
|
"eval_samples_per_second": 46.739,
|
|
"eval_steps_per_second": 11.685,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.2927765778737701,
|
|
"grad_norm": 46.75,
|
|
"learning_rate": 4.456070987178426e-07,
|
|
"logits/chosen": -2.7190473079681396,
|
|
"logits/rejected": -2.549043893814087,
|
|
"logps/chosen": -218.71493530273438,
|
|
"logps/rejected": -175.1071014404297,
|
|
"loss": 0.6771,
|
|
"rewards/accuracies": 0.6312500238418579,
|
|
"rewards/chosen": 0.029196638613939285,
|
|
"rewards/margins": 0.034055858850479126,
|
|
"rewards/rejected": -0.004859219305217266,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.29757619390448764,
|
|
"grad_norm": 43.5,
|
|
"learning_rate": 4.429698138535241e-07,
|
|
"logits/chosen": -2.689408779144287,
|
|
"logits/rejected": -2.5913164615631104,
|
|
"logps/chosen": -238.9366912841797,
|
|
"logps/rejected": -217.3396453857422,
|
|
"loss": 0.6825,
|
|
"rewards/accuracies": 0.612500011920929,
|
|
"rewards/chosen": 0.04081985726952553,
|
|
"rewards/margins": 0.02339627407491207,
|
|
"rewards/rejected": 0.01742357760667801,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.3023758099352052,
|
|
"grad_norm": 45.25,
|
|
"learning_rate": 4.402782989673867e-07,
|
|
"logits/chosen": -2.7332329750061035,
|
|
"logits/rejected": -2.5742244720458984,
|
|
"logps/chosen": -241.5056610107422,
|
|
"logps/rejected": -199.6383514404297,
|
|
"loss": 0.6788,
|
|
"rewards/accuracies": 0.6625000238418579,
|
|
"rewards/chosen": 0.044023603200912476,
|
|
"rewards/margins": 0.03084336593747139,
|
|
"rewards/rejected": 0.013180236332118511,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.30717542596592273,
|
|
"grad_norm": 48.5,
|
|
"learning_rate": 4.3753331045188415e-07,
|
|
"logits/chosen": -2.651803970336914,
|
|
"logits/rejected": -2.60718035697937,
|
|
"logps/chosen": -223.8908233642578,
|
|
"logps/rejected": -215.56411743164062,
|
|
"loss": 0.6881,
|
|
"rewards/accuracies": 0.5375000238418579,
|
|
"rewards/chosen": 0.027814963832497597,
|
|
"rewards/margins": 0.012350986711680889,
|
|
"rewards/rejected": 0.015463980846107006,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.3119750419966403,
|
|
"grad_norm": 47.75,
|
|
"learning_rate": 4.3473561972708517e-07,
|
|
"logits/chosen": -2.7187139987945557,
|
|
"logits/rejected": -2.5601890087127686,
|
|
"logps/chosen": -232.9247283935547,
|
|
"logps/rejected": -204.0401611328125,
|
|
"loss": 0.6835,
|
|
"rewards/accuracies": 0.637499988079071,
|
|
"rewards/chosen": 0.030253728851675987,
|
|
"rewards/margins": 0.021437767893075943,
|
|
"rewards/rejected": 0.008815961889922619,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.3167746580273578,
|
|
"grad_norm": 45.75,
|
|
"learning_rate": 4.3188601302388276e-07,
|
|
"logits/chosen": -2.6520533561706543,
|
|
"logits/rejected": -2.577056407928467,
|
|
"logps/chosen": -217.80337524414062,
|
|
"logps/rejected": -217.4649658203125,
|
|
"loss": 0.6781,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": 0.043406371027231216,
|
|
"rewards/margins": 0.03220932558178902,
|
|
"rewards/rejected": 0.011197047308087349,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.32157427405807537,
|
|
"grad_norm": 45.0,
|
|
"learning_rate": 4.289852911630406e-07,
|
|
"logits/chosen": -2.746192455291748,
|
|
"logits/rejected": -2.5919899940490723,
|
|
"logps/chosen": -261.55328369140625,
|
|
"logps/rejected": -208.4563446044922,
|
|
"loss": 0.6746,
|
|
"rewards/accuracies": 0.6937500238418579,
|
|
"rewards/chosen": 0.05559698864817619,
|
|
"rewards/margins": 0.039354514330625534,
|
|
"rewards/rejected": 0.016242478042840958,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.3263738900887929,
|
|
"grad_norm": 56.5,
|
|
"learning_rate": 4.2603426933013955e-07,
|
|
"logits/chosen": -2.7136101722717285,
|
|
"logits/rejected": -2.5737733840942383,
|
|
"logps/chosen": -235.10751342773438,
|
|
"logps/rejected": -190.7623291015625,
|
|
"loss": 0.6826,
|
|
"rewards/accuracies": 0.6000000238418579,
|
|
"rewards/chosen": 0.03037920594215393,
|
|
"rewards/margins": 0.02374129556119442,
|
|
"rewards/rejected": 0.0066379099152982235,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.33117350611951046,
|
|
"grad_norm": 49.75,
|
|
"learning_rate": 4.2303377684648734e-07,
|
|
"logits/chosen": -2.693387746810913,
|
|
"logits/rejected": -2.6150875091552734,
|
|
"logps/chosen": -229.60073852539062,
|
|
"logps/rejected": -227.20849609375,
|
|
"loss": 0.6781,
|
|
"rewards/accuracies": 0.668749988079071,
|
|
"rewards/chosen": 0.047377780079841614,
|
|
"rewards/margins": 0.03262994438409805,
|
|
"rewards/rejected": 0.014747830107808113,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.33597312215022795,
|
|
"grad_norm": 58.25,
|
|
"learning_rate": 4.199846569360557e-07,
|
|
"logits/chosen": -2.7111198902130127,
|
|
"logits/rejected": -2.6025779247283936,
|
|
"logps/chosen": -236.203857421875,
|
|
"logps/rejected": -210.2013702392578,
|
|
"loss": 0.6878,
|
|
"rewards/accuracies": 0.574999988079071,
|
|
"rewards/chosen": 0.03395792096853256,
|
|
"rewards/margins": 0.013354765251278877,
|
|
"rewards/rejected": 0.020603153854608536,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.3407727381809455,
|
|
"grad_norm": 51.0,
|
|
"learning_rate": 4.1688776648851034e-07,
|
|
"logits/chosen": -2.7076306343078613,
|
|
"logits/rejected": -2.5523858070373535,
|
|
"logps/chosen": -228.8372039794922,
|
|
"logps/rejected": -181.23193359375,
|
|
"loss": 0.6765,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": 0.03608817234635353,
|
|
"rewards/margins": 0.03536154329776764,
|
|
"rewards/rejected": 0.0007266284665092826,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.34557235421166305,
|
|
"grad_norm": 49.25,
|
|
"learning_rate": 4.1374397581840034e-07,
|
|
"logits/chosen": -2.7360334396362305,
|
|
"logits/rejected": -2.5981593132019043,
|
|
"logps/chosen": -227.6597137451172,
|
|
"logps/rejected": -183.78594970703125,
|
|
"loss": 0.676,
|
|
"rewards/accuracies": 0.6625000238418579,
|
|
"rewards/chosen": 0.040428828448057175,
|
|
"rewards/margins": 0.03674127534031868,
|
|
"rewards/rejected": 0.0036875568330287933,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.3503719702423806,
|
|
"grad_norm": 53.5,
|
|
"learning_rate": 4.105541684205751e-07,
|
|
"logits/chosen": -2.6906344890594482,
|
|
"logits/rejected": -2.5774295330047607,
|
|
"logps/chosen": -218.3501434326172,
|
|
"logps/rejected": -195.3230743408203,
|
|
"loss": 0.6776,
|
|
"rewards/accuracies": 0.6312500238418579,
|
|
"rewards/chosen": 0.037394892424345016,
|
|
"rewards/margins": 0.03364209085702896,
|
|
"rewards/rejected": 0.0037527973763644695,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.35517158627309814,
|
|
"grad_norm": 49.25,
|
|
"learning_rate": 4.073192407218971e-07,
|
|
"logits/chosen": -2.7351787090301514,
|
|
"logits/rejected": -2.5966103076934814,
|
|
"logps/chosen": -241.0888214111328,
|
|
"logps/rejected": -191.10189819335938,
|
|
"loss": 0.6704,
|
|
"rewards/accuracies": 0.6625000238418579,
|
|
"rewards/chosen": 0.0579073540866375,
|
|
"rewards/margins": 0.04853241890668869,
|
|
"rewards/rejected": 0.009374936111271381,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.3599712023038157,
|
|
"grad_norm": 50.5,
|
|
"learning_rate": 4.040401018293204e-07,
|
|
"logits/chosen": -2.664130687713623,
|
|
"logits/rejected": -2.594024181365967,
|
|
"logps/chosen": -221.1615753173828,
|
|
"logps/rejected": -228.0982666015625,
|
|
"loss": 0.6793,
|
|
"rewards/accuracies": 0.637499988079071,
|
|
"rewards/chosen": 0.0396769680082798,
|
|
"rewards/margins": 0.03049297071993351,
|
|
"rewards/rejected": 0.009183998219668865,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.36477081833453323,
|
|
"grad_norm": 52.25,
|
|
"learning_rate": 4.0071767327440536e-07,
|
|
"logits/chosen": -2.6687798500061035,
|
|
"logits/rejected": -2.6370534896850586,
|
|
"logps/chosen": -235.759521484375,
|
|
"logps/rejected": -229.422119140625,
|
|
"loss": 0.6776,
|
|
"rewards/accuracies": 0.668749988079071,
|
|
"rewards/chosen": 0.05410134792327881,
|
|
"rewards/margins": 0.034135472029447556,
|
|
"rewards/rejected": 0.019965868443250656,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.3695704343652508,
|
|
"grad_norm": 47.25,
|
|
"learning_rate": 3.9735288875434254e-07,
|
|
"logits/chosen": -2.741582155227661,
|
|
"logits/rejected": -2.554959774017334,
|
|
"logps/chosen": -242.64480590820312,
|
|
"logps/rejected": -185.625244140625,
|
|
"loss": 0.6808,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": 0.034398965537548065,
|
|
"rewards/margins": 0.02690746821463108,
|
|
"rewards/rejected": 0.007491500116884708,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.3743700503959683,
|
|
"grad_norm": 55.0,
|
|
"learning_rate": 3.939466938695565e-07,
|
|
"logits/chosen": -2.660132884979248,
|
|
"logits/rejected": -2.5671591758728027,
|
|
"logps/chosen": -253.7039337158203,
|
|
"logps/rejected": -221.8881072998047,
|
|
"loss": 0.6792,
|
|
"rewards/accuracies": 0.5874999761581421,
|
|
"rewards/chosen": 0.04830535501241684,
|
|
"rewards/margins": 0.031025772914290428,
|
|
"rewards/rejected": 0.01727958396077156,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.37916966642668587,
|
|
"grad_norm": 46.25,
|
|
"learning_rate": 3.905000458579657e-07,
|
|
"logits/chosen": -2.672783851623535,
|
|
"logits/rejected": -2.598494291305542,
|
|
"logps/chosen": -210.40975952148438,
|
|
"logps/rejected": -224.6177978515625,
|
|
"loss": 0.6821,
|
|
"rewards/accuracies": 0.6187499761581421,
|
|
"rewards/chosen": 0.03782086446881294,
|
|
"rewards/margins": 0.02480388432741165,
|
|
"rewards/rejected": 0.01301698386669159,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.3839692824574034,
|
|
"grad_norm": 50.75,
|
|
"learning_rate": 3.870139133259709e-07,
|
|
"logits/chosen": -2.6891722679138184,
|
|
"logits/rejected": -2.5445141792297363,
|
|
"logps/chosen": -259.342529296875,
|
|
"logps/rejected": -206.988525390625,
|
|
"loss": 0.6767,
|
|
"rewards/accuracies": 0.6625000238418579,
|
|
"rewards/chosen": 0.05699441581964493,
|
|
"rewards/margins": 0.03583725541830063,
|
|
"rewards/rejected": 0.0211571604013443,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.3839692824574034,
|
|
"eval_logits/chosen": -2.7158119678497314,
|
|
"eval_logits/rejected": -2.595388650894165,
|
|
"eval_logps/chosen": -232.6337127685547,
|
|
"eval_logps/rejected": -205.41590881347656,
|
|
"eval_loss": 0.6753210425376892,
|
|
"eval_rewards/accuracies": 0.6669999957084656,
|
|
"eval_rewards/chosen": 0.04992655664682388,
|
|
"eval_rewards/margins": 0.03843830153346062,
|
|
"eval_rewards/rejected": 0.01148825604468584,
|
|
"eval_runtime": 21.4126,
|
|
"eval_samples_per_second": 46.702,
|
|
"eval_steps_per_second": 11.675,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.38876889848812096,
|
|
"grad_norm": 55.5,
|
|
"learning_rate": 3.8348927597624964e-07,
|
|
"logits/chosen": -2.740044116973877,
|
|
"logits/rejected": -2.6301164627075195,
|
|
"logps/chosen": -231.58377075195312,
|
|
"logps/rejected": -210.11886596679688,
|
|
"loss": 0.6786,
|
|
"rewards/accuracies": 0.606249988079071,
|
|
"rewards/chosen": 0.052413731813430786,
|
|
"rewards/margins": 0.031818680465221405,
|
|
"rewards/rejected": 0.020595049485564232,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.3935685145188385,
|
|
"grad_norm": 49.25,
|
|
"learning_rate": 3.7992712433243114e-07,
|
|
"logits/chosen": -2.717849016189575,
|
|
"logits/rejected": -2.5538547039031982,
|
|
"logps/chosen": -233.3022003173828,
|
|
"logps/rejected": -178.08187866210938,
|
|
"loss": 0.6776,
|
|
"rewards/accuracies": 0.6625000238418579,
|
|
"rewards/chosen": 0.034468021243810654,
|
|
"rewards/margins": 0.03307095915079117,
|
|
"rewards/rejected": 0.001397057669237256,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.39836813054955605,
|
|
"grad_norm": 47.5,
|
|
"learning_rate": 3.7632845946073135e-07,
|
|
"logits/chosen": -2.7453646659851074,
|
|
"logits/rejected": -2.5826191902160645,
|
|
"logps/chosen": -246.8603515625,
|
|
"logps/rejected": -179.30404663085938,
|
|
"loss": 0.6707,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": 0.045634619891643524,
|
|
"rewards/margins": 0.047946564853191376,
|
|
"rewards/rejected": -0.002311945194378495,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.4031677465802736,
|
|
"grad_norm": 48.0,
|
|
"learning_rate": 3.7269429268862507e-07,
|
|
"logits/chosen": -2.710023880004883,
|
|
"logits/rejected": -2.6359734535217285,
|
|
"logps/chosen": -208.82150268554688,
|
|
"logps/rejected": -196.3325653076172,
|
|
"loss": 0.6799,
|
|
"rewards/accuracies": 0.637499988079071,
|
|
"rewards/chosen": 0.04252176731824875,
|
|
"rewards/margins": 0.02919645607471466,
|
|
"rewards/rejected": 0.01332530565559864,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.40796736261099115,
|
|
"grad_norm": 57.0,
|
|
"learning_rate": 3.6902564532063336e-07,
|
|
"logits/chosen": -2.7001442909240723,
|
|
"logits/rejected": -2.6420705318450928,
|
|
"logps/chosen": -214.80667114257812,
|
|
"logps/rejected": -198.60533142089844,
|
|
"loss": 0.6792,
|
|
"rewards/accuracies": 0.6187499761581421,
|
|
"rewards/chosen": 0.04096178710460663,
|
|
"rewards/margins": 0.03064887225627899,
|
|
"rewards/rejected": 0.010312914848327637,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.41276697864170864,
|
|
"grad_norm": 50.0,
|
|
"learning_rate": 3.653235483513084e-07,
|
|
"logits/chosen": -2.736861228942871,
|
|
"logits/rejected": -2.618833541870117,
|
|
"logps/chosen": -248.7901153564453,
|
|
"logps/rejected": -216.1083526611328,
|
|
"loss": 0.6743,
|
|
"rewards/accuracies": 0.6312500238418579,
|
|
"rewards/chosen": 0.056495171040296555,
|
|
"rewards/margins": 0.041087765246629715,
|
|
"rewards/rejected": 0.015407413244247437,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.4175665946724262,
|
|
"grad_norm": 48.0,
|
|
"learning_rate": 3.615890421754944e-07,
|
|
"logits/chosen": -2.724944591522217,
|
|
"logits/rejected": -2.6574723720550537,
|
|
"logps/chosen": -223.84408569335938,
|
|
"logps/rejected": -194.44735717773438,
|
|
"loss": 0.6796,
|
|
"rewards/accuracies": 0.5874999761581421,
|
|
"rewards/chosen": 0.042622704058885574,
|
|
"rewards/margins": 0.02971811592578888,
|
|
"rewards/rejected": 0.012904593721032143,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.42236621070314373,
|
|
"grad_norm": 61.0,
|
|
"learning_rate": 3.5782317629594706e-07,
|
|
"logits/chosen": -2.706808567047119,
|
|
"logits/rejected": -2.6081411838531494,
|
|
"logps/chosen": -241.17495727539062,
|
|
"logps/rejected": -212.4481658935547,
|
|
"loss": 0.6819,
|
|
"rewards/accuracies": 0.550000011920929,
|
|
"rewards/chosen": 0.047704242169857025,
|
|
"rewards/margins": 0.025270383805036545,
|
|
"rewards/rejected": 0.022433852776885033,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.4271658267338613,
|
|
"grad_norm": 47.5,
|
|
"learning_rate": 3.5402700902839313e-07,
|
|
"logits/chosen": -2.6064233779907227,
|
|
"logits/rejected": -2.556283473968506,
|
|
"logps/chosen": -206.00344848632812,
|
|
"logps/rejected": -208.06936645507812,
|
|
"loss": 0.6791,
|
|
"rewards/accuracies": 0.606249988079071,
|
|
"rewards/chosen": 0.036233410239219666,
|
|
"rewards/margins": 0.030693132430315018,
|
|
"rewards/rejected": 0.005540275014936924,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.4319654427645788,
|
|
"grad_norm": 46.75,
|
|
"learning_rate": 3.5020160720411403e-07,
|
|
"logits/chosen": -2.722177267074585,
|
|
"logits/rejected": -2.592517137527466,
|
|
"logps/chosen": -234.11703491210938,
|
|
"logps/rejected": -216.63766479492188,
|
|
"loss": 0.6759,
|
|
"rewards/accuracies": 0.643750011920929,
|
|
"rewards/chosen": 0.05363321304321289,
|
|
"rewards/margins": 0.03814633563160896,
|
|
"rewards/rejected": 0.015486878342926502,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.43676505879529637,
|
|
"grad_norm": 52.75,
|
|
"learning_rate": 3.46348045870135e-07,
|
|
"logits/chosen": -2.6586787700653076,
|
|
"logits/rejected": -2.601860523223877,
|
|
"logps/chosen": -215.1962432861328,
|
|
"logps/rejected": -208.45068359375,
|
|
"loss": 0.6834,
|
|
"rewards/accuracies": 0.5687500238418579,
|
|
"rewards/chosen": 0.04313874989748001,
|
|
"rewards/margins": 0.022245222702622414,
|
|
"rewards/rejected": 0.020893529057502747,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.4415646748260139,
|
|
"grad_norm": 45.75,
|
|
"learning_rate": 3.4246740798710725e-07,
|
|
"logits/chosen": -2.672468900680542,
|
|
"logits/rejected": -2.5783610343933105,
|
|
"logps/chosen": -216.0347442626953,
|
|
"logps/rejected": -195.738037109375,
|
|
"loss": 0.6819,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": 0.03864717856049538,
|
|
"rewards/margins": 0.02574675716459751,
|
|
"rewards/rejected": 0.01290042232722044,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.44636429085673146,
|
|
"grad_norm": 50.25,
|
|
"learning_rate": 3.3856078412496417e-07,
|
|
"logits/chosen": -2.729473829269409,
|
|
"logits/rejected": -2.588343858718872,
|
|
"logps/chosen": -238.8496551513672,
|
|
"logps/rejected": -190.5778045654297,
|
|
"loss": 0.6685,
|
|
"rewards/accuracies": 0.706250011920929,
|
|
"rewards/chosen": 0.048866622149944305,
|
|
"rewards/margins": 0.05237164348363876,
|
|
"rewards/rejected": -0.003505019936710596,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.451163906887449,
|
|
"grad_norm": 49.5,
|
|
"learning_rate": 3.3462927215644066e-07,
|
|
"logits/chosen": -2.747483968734741,
|
|
"logits/rejected": -2.640693187713623,
|
|
"logps/chosen": -262.37213134765625,
|
|
"logps/rejected": -216.82742309570312,
|
|
"loss": 0.6748,
|
|
"rewards/accuracies": 0.668749988079071,
|
|
"rewards/chosen": 0.060678768903017044,
|
|
"rewards/margins": 0.04027427360415459,
|
|
"rewards/rejected": 0.020404506474733353,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.45596352291816655,
|
|
"grad_norm": 42.75,
|
|
"learning_rate": 3.3067397694853937e-07,
|
|
"logits/chosen": -2.6840896606445312,
|
|
"logits/rejected": -2.554112434387207,
|
|
"logps/chosen": -233.49899291992188,
|
|
"logps/rejected": -191.6746063232422,
|
|
"loss": 0.6713,
|
|
"rewards/accuracies": 0.6812499761581421,
|
|
"rewards/chosen": 0.051722604781389236,
|
|
"rewards/margins": 0.04748953878879547,
|
|
"rewards/rejected": 0.0042330720461905,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.4607631389488841,
|
|
"grad_norm": 46.0,
|
|
"learning_rate": 3.2669601005203155e-07,
|
|
"logits/chosen": -2.717355251312256,
|
|
"logits/rejected": -2.615908145904541,
|
|
"logps/chosen": -204.6543426513672,
|
|
"logps/rejected": -185.7799072265625,
|
|
"loss": 0.6734,
|
|
"rewards/accuracies": 0.65625,
|
|
"rewards/chosen": 0.04618459939956665,
|
|
"rewards/margins": 0.042539265006780624,
|
|
"rewards/rejected": 0.003645337652415037,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.46556275497960165,
|
|
"grad_norm": 48.5,
|
|
"learning_rate": 3.2269648938907973e-07,
|
|
"logits/chosen": -2.6776490211486816,
|
|
"logits/rejected": -2.560394287109375,
|
|
"logps/chosen": -214.96142578125,
|
|
"logps/rejected": -182.30978393554688,
|
|
"loss": 0.6781,
|
|
"rewards/accuracies": 0.65625,
|
|
"rewards/chosen": 0.0470888614654541,
|
|
"rewards/margins": 0.033395569771528244,
|
|
"rewards/rejected": 0.013693295419216156,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 0.4703623710103192,
|
|
"grad_norm": 49.0,
|
|
"learning_rate": 3.186765389390695e-07,
|
|
"logits/chosen": -2.7659125328063965,
|
|
"logits/rejected": -2.6198360919952393,
|
|
"logps/chosen": -251.896240234375,
|
|
"logps/rejected": -194.74826049804688,
|
|
"loss": 0.6773,
|
|
"rewards/accuracies": 0.643750011920929,
|
|
"rewards/chosen": 0.043909598141908646,
|
|
"rewards/margins": 0.03449582681059837,
|
|
"rewards/rejected": 0.009413773193955421,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.47516198704103674,
|
|
"grad_norm": 50.25,
|
|
"learning_rate": 3.146372884227393e-07,
|
|
"logits/chosen": -2.7383854389190674,
|
|
"logits/rejected": -2.633877992630005,
|
|
"logps/chosen": -249.55557250976562,
|
|
"logps/rejected": -215.5314178466797,
|
|
"loss": 0.6781,
|
|
"rewards/accuracies": 0.612500011920929,
|
|
"rewards/chosen": 0.05108872801065445,
|
|
"rewards/margins": 0.03366169333457947,
|
|
"rewards/rejected": 0.01742703653872013,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 0.4799616030717543,
|
|
"grad_norm": 45.75,
|
|
"learning_rate": 3.105798729846969e-07,
|
|
"logits/chosen": -2.6620967388153076,
|
|
"logits/rejected": -2.5416641235351562,
|
|
"logps/chosen": -214.88015747070312,
|
|
"logps/rejected": -182.47698974609375,
|
|
"loss": 0.676,
|
|
"rewards/accuracies": 0.675000011920929,
|
|
"rewards/chosen": 0.04558812081813812,
|
|
"rewards/margins": 0.0367765799164772,
|
|
"rewards/rejected": 0.00881153903901577,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.4799616030717543,
|
|
"eval_logits/chosen": -2.716266393661499,
|
|
"eval_logits/rejected": -2.595984935760498,
|
|
"eval_logps/chosen": -232.63925170898438,
|
|
"eval_logps/rejected": -205.39816284179688,
|
|
"eval_loss": 0.6766188740730286,
|
|
"eval_rewards/accuracies": 0.6570000052452087,
|
|
"eval_rewards/chosen": 0.04937145859003067,
|
|
"eval_rewards/margins": 0.03610716760158539,
|
|
"eval_rewards/rejected": 0.013264299370348454,
|
|
"eval_runtime": 21.4065,
|
|
"eval_samples_per_second": 46.715,
|
|
"eval_steps_per_second": 11.679,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.48476121910247183,
|
|
"grad_norm": 47.0,
|
|
"learning_rate": 3.065054328744109e-07,
|
|
"logits/chosen": -2.6782500743865967,
|
|
"logits/rejected": -2.5327606201171875,
|
|
"logps/chosen": -249.7314910888672,
|
|
"logps/rejected": -209.0973663330078,
|
|
"loss": 0.6751,
|
|
"rewards/accuracies": 0.6812499761581421,
|
|
"rewards/chosen": 0.05049954727292061,
|
|
"rewards/margins": 0.03927897661924362,
|
|
"rewards/rejected": 0.011220571584999561,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 0.4895608351331893,
|
|
"grad_norm": 51.75,
|
|
"learning_rate": 3.024151131257687e-07,
|
|
"logits/chosen": -2.7015366554260254,
|
|
"logits/rejected": -2.5806756019592285,
|
|
"logps/chosen": -245.3987274169922,
|
|
"logps/rejected": -191.32785034179688,
|
|
"loss": 0.6728,
|
|
"rewards/accuracies": 0.675000011920929,
|
|
"rewards/chosen": 0.04835500195622444,
|
|
"rewards/margins": 0.04381892830133438,
|
|
"rewards/rejected": 0.00453607365489006,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 0.49436045116390687,
|
|
"grad_norm": 50.75,
|
|
"learning_rate": 2.9831006323528886e-07,
|
|
"logits/chosen": -2.7741270065307617,
|
|
"logits/rejected": -2.5906481742858887,
|
|
"logps/chosen": -254.17239379882812,
|
|
"logps/rejected": -197.8710479736328,
|
|
"loss": 0.673,
|
|
"rewards/accuracies": 0.6937500238418579,
|
|
"rewards/chosen": 0.059526920318603516,
|
|
"rewards/margins": 0.04361771419644356,
|
|
"rewards/rejected": 0.01590920425951481,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 0.4991600671946244,
|
|
"grad_norm": 48.0,
|
|
"learning_rate": 2.941914368390798e-07,
|
|
"logits/chosen": -2.692235231399536,
|
|
"logits/rejected": -2.610217332839966,
|
|
"logps/chosen": -218.3246307373047,
|
|
"logps/rejected": -205.70993041992188,
|
|
"loss": 0.6816,
|
|
"rewards/accuracies": 0.6000000238418579,
|
|
"rewards/chosen": 0.032597918063402176,
|
|
"rewards/margins": 0.025554979220032692,
|
|
"rewards/rejected": 0.007042936980724335,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 0.503959683225342,
|
|
"grad_norm": 56.75,
|
|
"learning_rate": 2.900603913886357e-07,
|
|
"logits/chosen": -2.672635555267334,
|
|
"logits/rejected": -2.5501255989074707,
|
|
"logps/chosen": -244.4874267578125,
|
|
"logps/rejected": -211.55068969726562,
|
|
"loss": 0.6746,
|
|
"rewards/accuracies": 0.6625000238418579,
|
|
"rewards/chosen": 0.04713388532400131,
|
|
"rewards/margins": 0.04026245325803757,
|
|
"rewards/rejected": 0.006871436722576618,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.5087592992560596,
|
|
"grad_norm": 49.25,
|
|
"learning_rate": 2.859180878255588e-07,
|
|
"logits/chosen": -2.682440996170044,
|
|
"logits/rejected": -2.611323833465576,
|
|
"logps/chosen": -232.0714874267578,
|
|
"logps/rejected": -215.4655303955078,
|
|
"loss": 0.6815,
|
|
"rewards/accuracies": 0.5562499761581421,
|
|
"rewards/chosen": 0.04437769576907158,
|
|
"rewards/margins": 0.02631448581814766,
|
|
"rewards/rejected": 0.018063215538859367,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 0.5135589152867771,
|
|
"grad_norm": 50.25,
|
|
"learning_rate": 2.8176569025530234e-07,
|
|
"logits/chosen": -2.7059943675994873,
|
|
"logits/rejected": -2.602865219116211,
|
|
"logps/chosen": -232.48147583007812,
|
|
"logps/rejected": -204.5135040283203,
|
|
"loss": 0.6773,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": 0.05438702180981636,
|
|
"rewards/margins": 0.034470170736312866,
|
|
"rewards/rejected": 0.019916851073503494,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 0.5183585313174947,
|
|
"grad_norm": 50.25,
|
|
"learning_rate": 2.7760436562002346e-07,
|
|
"logits/chosen": -2.6945126056671143,
|
|
"logits/rejected": -2.516050338745117,
|
|
"logps/chosen": -265.76055908203125,
|
|
"logps/rejected": -181.6714324951172,
|
|
"loss": 0.6745,
|
|
"rewards/accuracies": 0.6625000238418579,
|
|
"rewards/chosen": 0.05421316623687744,
|
|
"rewards/margins": 0.04068455100059509,
|
|
"rewards/rejected": 0.0135286133736372,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 0.5231581473482121,
|
|
"grad_norm": 53.5,
|
|
"learning_rate": 2.734352833706392e-07,
|
|
"logits/chosen": -2.7844748497009277,
|
|
"logits/rejected": -2.654388904571533,
|
|
"logps/chosen": -249.6466827392578,
|
|
"logps/rejected": -214.0853271484375,
|
|
"loss": 0.6748,
|
|
"rewards/accuracies": 0.606249988079071,
|
|
"rewards/chosen": 0.06085364893078804,
|
|
"rewards/margins": 0.04021080583333969,
|
|
"rewards/rejected": 0.020642835646867752,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 0.5279577633789296,
|
|
"grad_norm": 47.75,
|
|
"learning_rate": 2.6925961513817733e-07,
|
|
"logits/chosen": -2.6918578147888184,
|
|
"logits/rejected": -2.627488374710083,
|
|
"logps/chosen": -199.40310668945312,
|
|
"logps/rejected": -200.98233032226562,
|
|
"loss": 0.676,
|
|
"rewards/accuracies": 0.637499988079071,
|
|
"rewards/chosen": 0.043318361043930054,
|
|
"rewards/margins": 0.03646283596754074,
|
|
"rewards/rejected": 0.006855523679405451,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.5327573794096472,
|
|
"grad_norm": 50.0,
|
|
"learning_rate": 2.6507853440451484e-07,
|
|
"logits/chosen": -2.7055044174194336,
|
|
"logits/rejected": -2.61013126373291,
|
|
"logps/chosen": -227.26321411132812,
|
|
"logps/rejected": -205.27490234375,
|
|
"loss": 0.6744,
|
|
"rewards/accuracies": 0.637499988079071,
|
|
"rewards/chosen": 0.04602568596601486,
|
|
"rewards/margins": 0.04149339720606804,
|
|
"rewards/rejected": 0.00453228922560811,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 0.5375569954403647,
|
|
"grad_norm": 42.0,
|
|
"learning_rate": 2.608932161725958e-07,
|
|
"logits/chosen": -2.7049965858459473,
|
|
"logits/rejected": -2.570584774017334,
|
|
"logps/chosen": -232.04818725585938,
|
|
"logps/rejected": -203.71127319335938,
|
|
"loss": 0.6741,
|
|
"rewards/accuracies": 0.637499988079071,
|
|
"rewards/chosen": 0.04708060622215271,
|
|
"rewards/margins": 0.04201260581612587,
|
|
"rewards/rejected": 0.005067999474704266,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 0.5423566114710823,
|
|
"grad_norm": 56.5,
|
|
"learning_rate": 2.5670483663622247e-07,
|
|
"logits/chosen": -2.6920132637023926,
|
|
"logits/rejected": -2.583217144012451,
|
|
"logps/chosen": -239.03427124023438,
|
|
"logps/rejected": -200.62376403808594,
|
|
"loss": 0.6751,
|
|
"rewards/accuracies": 0.637499988079071,
|
|
"rewards/chosen": 0.04974411427974701,
|
|
"rewards/margins": 0.039146848022937775,
|
|
"rewards/rejected": 0.010597268119454384,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 0.5471562275017998,
|
|
"grad_norm": 43.5,
|
|
"learning_rate": 2.5251457284951056e-07,
|
|
"logits/chosen": -2.709200859069824,
|
|
"logits/rejected": -2.6131153106689453,
|
|
"logps/chosen": -227.12826538085938,
|
|
"logps/rejected": -191.06222534179688,
|
|
"loss": 0.6735,
|
|
"rewards/accuracies": 0.7124999761581421,
|
|
"rewards/chosen": 0.051633380353450775,
|
|
"rewards/margins": 0.04308091849088669,
|
|
"rewards/rejected": 0.008552461862564087,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 0.5519558435325174,
|
|
"grad_norm": 45.5,
|
|
"learning_rate": 2.4832360239610414e-07,
|
|
"logits/chosen": -2.702821731567383,
|
|
"logits/rejected": -2.5874671936035156,
|
|
"logps/chosen": -228.0370635986328,
|
|
"logps/rejected": -201.68345642089844,
|
|
"loss": 0.6721,
|
|
"rewards/accuracies": 0.65625,
|
|
"rewards/chosen": 0.052544206380844116,
|
|
"rewards/margins": 0.04553115367889404,
|
|
"rewards/rejected": 0.007013053633272648,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 0.5567554595632349,
|
|
"grad_norm": 52.25,
|
|
"learning_rate": 2.441331030582407e-07,
|
|
"logits/chosen": -2.720001697540283,
|
|
"logits/rejected": -2.630744457244873,
|
|
"logps/chosen": -223.72116088867188,
|
|
"logps/rejected": -205.20474243164062,
|
|
"loss": 0.6789,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": 0.05077257752418518,
|
|
"rewards/margins": 0.030812978744506836,
|
|
"rewards/rejected": 0.019959593191742897,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 0.5615550755939525,
|
|
"grad_norm": 48.5,
|
|
"learning_rate": 2.39944252485761e-07,
|
|
"logits/chosen": -2.7418465614318848,
|
|
"logits/rejected": -2.5958893299102783,
|
|
"logps/chosen": -245.22238159179688,
|
|
"logps/rejected": -192.0289764404297,
|
|
"loss": 0.6723,
|
|
"rewards/accuracies": 0.637499988079071,
|
|
"rewards/chosen": 0.05651768296957016,
|
|
"rewards/margins": 0.04533126950263977,
|
|
"rewards/rejected": 0.011186418123543262,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 0.56635469162467,
|
|
"grad_norm": 53.25,
|
|
"learning_rate": 2.3575822786515529e-07,
|
|
"logits/chosen": -2.6802361011505127,
|
|
"logits/rejected": -2.5656845569610596,
|
|
"logps/chosen": -231.7133026123047,
|
|
"logps/rejected": -209.87765502929688,
|
|
"loss": 0.6792,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": 0.04587788134813309,
|
|
"rewards/margins": 0.03062388300895691,
|
|
"rewards/rejected": 0.015253995545208454,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 0.5711543076553875,
|
|
"grad_norm": 54.0,
|
|
"learning_rate": 2.3157620558874106e-07,
|
|
"logits/chosen": -2.723170757293701,
|
|
"logits/rejected": -2.5895724296569824,
|
|
"logps/chosen": -242.99923706054688,
|
|
"logps/rejected": -193.60450744628906,
|
|
"loss": 0.6776,
|
|
"rewards/accuracies": 0.612500011920929,
|
|
"rewards/chosen": 0.052822746336460114,
|
|
"rewards/margins": 0.03466617316007614,
|
|
"rewards/rejected": 0.018156569451093674,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 0.5759539236861051,
|
|
"grad_norm": 42.75,
|
|
"learning_rate": 2.2739936092406286e-07,
|
|
"logits/chosen": -2.674161434173584,
|
|
"logits/rejected": -2.576936721801758,
|
|
"logps/chosen": -226.0552215576172,
|
|
"logps/rejected": -213.71524047851562,
|
|
"loss": 0.6774,
|
|
"rewards/accuracies": 0.65625,
|
|
"rewards/chosen": 0.05344771221280098,
|
|
"rewards/margins": 0.034632958471775055,
|
|
"rewards/rejected": 0.018814753741025925,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.5759539236861051,
|
|
"eval_logits/chosen": -2.71634578704834,
|
|
"eval_logits/rejected": -2.595935583114624,
|
|
"eval_logps/chosen": -232.62538146972656,
|
|
"eval_logps/rejected": -205.43319702148438,
|
|
"eval_loss": 0.6742354035377502,
|
|
"eval_rewards/accuracies": 0.675000011920929,
|
|
"eval_rewards/chosen": 0.050759363919496536,
|
|
"eval_rewards/margins": 0.041003111749887466,
|
|
"eval_rewards/rejected": 0.009756244719028473,
|
|
"eval_runtime": 21.4174,
|
|
"eval_samples_per_second": 46.691,
|
|
"eval_steps_per_second": 11.673,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.5807535397168226,
|
|
"grad_norm": 48.25,
|
|
"learning_rate": 2.232288676836087e-07,
|
|
"logits/chosen": -2.617983341217041,
|
|
"logits/rejected": -2.5485970973968506,
|
|
"logps/chosen": -238.41134643554688,
|
|
"logps/rejected": -203.52255249023438,
|
|
"loss": 0.6683,
|
|
"rewards/accuracies": 0.7124999761581421,
|
|
"rewards/chosen": 0.06037604808807373,
|
|
"rewards/margins": 0.05402814596891403,
|
|
"rewards/rejected": 0.006347896996885538,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 0.5855531557475402,
|
|
"grad_norm": 50.25,
|
|
"learning_rate": 2.1906589789493518e-07,
|
|
"logits/chosen": -2.710653066635132,
|
|
"logits/rejected": -2.5681469440460205,
|
|
"logps/chosen": -217.65231323242188,
|
|
"logps/rejected": -183.86453247070312,
|
|
"loss": 0.6765,
|
|
"rewards/accuracies": 0.6312500238418579,
|
|
"rewards/chosen": 0.04624713212251663,
|
|
"rewards/margins": 0.036256637424230576,
|
|
"rewards/rejected": 0.009990494698286057,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 0.5903527717782577,
|
|
"grad_norm": 50.25,
|
|
"learning_rate": 2.1491162147129428e-07,
|
|
"logits/chosen": -2.71733021736145,
|
|
"logits/rejected": -2.6050782203674316,
|
|
"logps/chosen": -232.63601684570312,
|
|
"logps/rejected": -208.39132690429688,
|
|
"loss": 0.6763,
|
|
"rewards/accuracies": 0.643750011920929,
|
|
"rewards/chosen": 0.051035962998867035,
|
|
"rewards/margins": 0.036614201962947845,
|
|
"rewards/rejected": 0.014421762898564339,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 0.5951523878089753,
|
|
"grad_norm": 48.0,
|
|
"learning_rate": 2.107672058828544e-07,
|
|
"logits/chosen": -2.722160816192627,
|
|
"logits/rejected": -2.608168363571167,
|
|
"logps/chosen": -226.43807983398438,
|
|
"logps/rejected": -192.34970092773438,
|
|
"loss": 0.6717,
|
|
"rewards/accuracies": 0.675000011920929,
|
|
"rewards/chosen": 0.05640612170100212,
|
|
"rewards/margins": 0.0462309755384922,
|
|
"rewards/rejected": 0.010175148025155067,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 0.5999520038396928,
|
|
"grad_norm": 54.5,
|
|
"learning_rate": 2.0663381582860825e-07,
|
|
"logits/chosen": -2.7216320037841797,
|
|
"logits/rejected": -2.643075942993164,
|
|
"logps/chosen": -226.637451171875,
|
|
"logps/rejected": -209.5499725341797,
|
|
"loss": 0.6759,
|
|
"rewards/accuracies": 0.643750011920929,
|
|
"rewards/chosen": 0.046819452196359634,
|
|
"rewards/margins": 0.03698267415165901,
|
|
"rewards/rejected": 0.00983678363263607,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 0.6047516198704104,
|
|
"grad_norm": 45.25,
|
|
"learning_rate": 2.025126129090588e-07,
|
|
"logits/chosen": -2.776801586151123,
|
|
"logits/rejected": -2.626488447189331,
|
|
"logps/chosen": -221.3492431640625,
|
|
"logps/rejected": -179.89920043945312,
|
|
"loss": 0.6709,
|
|
"rewards/accuracies": 0.6937500238418579,
|
|
"rewards/chosen": 0.051989030092954636,
|
|
"rewards/margins": 0.04751256853342056,
|
|
"rewards/rejected": 0.004476464353501797,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 0.6095512359011279,
|
|
"grad_norm": 43.5,
|
|
"learning_rate": 1.9840475529977655e-07,
|
|
"logits/chosen": -2.71726655960083,
|
|
"logits/rejected": -2.6046361923217773,
|
|
"logps/chosen": -227.9778594970703,
|
|
"logps/rejected": -195.9775390625,
|
|
"loss": 0.6739,
|
|
"rewards/accuracies": 0.643750011920929,
|
|
"rewards/chosen": 0.044441260397434235,
|
|
"rewards/margins": 0.04231487214565277,
|
|
"rewards/rejected": 0.0021263775415718555,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 0.6143508519318455,
|
|
"grad_norm": 49.75,
|
|
"learning_rate": 1.9431139742591896e-07,
|
|
"logits/chosen": -2.7021281719207764,
|
|
"logits/rejected": -2.58604097366333,
|
|
"logps/chosen": -207.78173828125,
|
|
"logps/rejected": -187.71017456054688,
|
|
"loss": 0.6787,
|
|
"rewards/accuracies": 0.6812499761581421,
|
|
"rewards/chosen": 0.03924870118498802,
|
|
"rewards/margins": 0.031981147825717926,
|
|
"rewards/rejected": 0.0072675542905926704,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 0.619150467962563,
|
|
"grad_norm": 52.0,
|
|
"learning_rate": 1.9023368963780455e-07,
|
|
"logits/chosen": -2.721538543701172,
|
|
"logits/rejected": -2.6105265617370605,
|
|
"logps/chosen": -232.59326171875,
|
|
"logps/rejected": -196.3831024169922,
|
|
"loss": 0.6749,
|
|
"rewards/accuracies": 0.6312500238418579,
|
|
"rewards/chosen": 0.04439551383256912,
|
|
"rewards/margins": 0.03949584811925888,
|
|
"rewards/rejected": 0.004899662919342518,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 0.6239500839932806,
|
|
"grad_norm": 47.25,
|
|
"learning_rate": 1.861727778876314e-07,
|
|
"logits/chosen": -2.7027573585510254,
|
|
"logits/rejected": -2.5897445678710938,
|
|
"logps/chosen": -207.4355926513672,
|
|
"logps/rejected": -173.4372100830078,
|
|
"loss": 0.6804,
|
|
"rewards/accuracies": 0.637499988079071,
|
|
"rewards/chosen": 0.030175339430570602,
|
|
"rewards/margins": 0.028225919231772423,
|
|
"rewards/rejected": 0.001949421362951398,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.6287497000239981,
|
|
"grad_norm": 46.5,
|
|
"learning_rate": 1.821298034074315e-07,
|
|
"logits/chosen": -2.7313363552093506,
|
|
"logits/rejected": -2.6595630645751953,
|
|
"logps/chosen": -222.908447265625,
|
|
"logps/rejected": -203.55274963378906,
|
|
"loss": 0.685,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": 0.03836590051651001,
|
|
"rewards/margins": 0.019602758809924126,
|
|
"rewards/rejected": 0.018763139843940735,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 0.6335493160547156,
|
|
"grad_norm": 48.5,
|
|
"learning_rate": 1.7810590238835276e-07,
|
|
"logits/chosen": -2.6614937782287598,
|
|
"logits/rejected": -2.6302168369293213,
|
|
"logps/chosen": -224.0082244873047,
|
|
"logps/rejected": -239.5669708251953,
|
|
"loss": 0.6785,
|
|
"rewards/accuracies": 0.59375,
|
|
"rewards/chosen": 0.049693018198013306,
|
|
"rewards/margins": 0.03309093788266182,
|
|
"rewards/rejected": 0.016602078452706337,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 0.6383489320854332,
|
|
"grad_norm": 48.75,
|
|
"learning_rate": 1.7410220566135603e-07,
|
|
"logits/chosen": -2.733497142791748,
|
|
"logits/rejected": -2.613424777984619,
|
|
"logps/chosen": -227.0560302734375,
|
|
"logps/rejected": -196.37181091308594,
|
|
"loss": 0.6739,
|
|
"rewards/accuracies": 0.675000011920929,
|
|
"rewards/chosen": 0.05607549101114273,
|
|
"rewards/margins": 0.041366271674633026,
|
|
"rewards/rejected": 0.014709214679896832,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 0.6431485481161507,
|
|
"grad_norm": 46.75,
|
|
"learning_rate": 1.7011983837942021e-07,
|
|
"logits/chosen": -2.7072107791900635,
|
|
"logits/rejected": -2.5902278423309326,
|
|
"logps/chosen": -233.81179809570312,
|
|
"logps/rejected": -206.1332244873047,
|
|
"loss": 0.6712,
|
|
"rewards/accuracies": 0.65625,
|
|
"rewards/chosen": 0.0620468370616436,
|
|
"rewards/margins": 0.04747987538576126,
|
|
"rewards/rejected": 0.014566963538527489,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 0.6479481641468683,
|
|
"grad_norm": 45.5,
|
|
"learning_rate": 1.6615991970134158e-07,
|
|
"logits/chosen": -2.741150379180908,
|
|
"logits/rejected": -2.6298651695251465,
|
|
"logps/chosen": -221.12841796875,
|
|
"logps/rejected": -191.54258728027344,
|
|
"loss": 0.6773,
|
|
"rewards/accuracies": 0.675000011920929,
|
|
"rewards/chosen": 0.04875689372420311,
|
|
"rewards/margins": 0.03473493829369545,
|
|
"rewards/rejected": 0.014021962881088257,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 0.6527477801775858,
|
|
"grad_norm": 46.75,
|
|
"learning_rate": 1.622235624772183e-07,
|
|
"logits/chosen": -2.6976001262664795,
|
|
"logits/rejected": -2.5869300365448,
|
|
"logps/chosen": -232.46533203125,
|
|
"logps/rejected": -209.5470428466797,
|
|
"loss": 0.6755,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": 0.05371447280049324,
|
|
"rewards/margins": 0.0387248769402504,
|
|
"rewards/rejected": 0.014989593997597694,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 0.6575473962083034,
|
|
"grad_norm": 46.5,
|
|
"learning_rate": 1.5831187293570825e-07,
|
|
"logits/chosen": -2.722553014755249,
|
|
"logits/rejected": -2.602963924407959,
|
|
"logps/chosen": -272.6893615722656,
|
|
"logps/rejected": -217.13632202148438,
|
|
"loss": 0.6784,
|
|
"rewards/accuracies": 0.5874999761581421,
|
|
"rewards/chosen": 0.06017666310071945,
|
|
"rewards/margins": 0.03393205627799034,
|
|
"rewards/rejected": 0.026244616135954857,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 0.6623470122390209,
|
|
"grad_norm": 50.25,
|
|
"learning_rate": 1.5442595037314648e-07,
|
|
"logits/chosen": -2.7165400981903076,
|
|
"logits/rejected": -2.5861897468566895,
|
|
"logps/chosen": -240.0535888671875,
|
|
"logps/rejected": -186.63296508789062,
|
|
"loss": 0.6701,
|
|
"rewards/accuracies": 0.6937500238418579,
|
|
"rewards/chosen": 0.06224694103002548,
|
|
"rewards/margins": 0.049013856798410416,
|
|
"rewards/rejected": 0.013233085162937641,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 0.6671466282697385,
|
|
"grad_norm": 48.25,
|
|
"learning_rate": 1.5056688684461232e-07,
|
|
"logits/chosen": -2.7177727222442627,
|
|
"logits/rejected": -2.5875582695007324,
|
|
"logps/chosen": -241.77590942382812,
|
|
"logps/rejected": -202.8583221435547,
|
|
"loss": 0.6715,
|
|
"rewards/accuracies": 0.71875,
|
|
"rewards/chosen": 0.05555950850248337,
|
|
"rewards/margins": 0.04663746803998947,
|
|
"rewards/rejected": 0.008922042325139046,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 0.6719462443004559,
|
|
"grad_norm": 52.0,
|
|
"learning_rate": 1.4673576685703026e-07,
|
|
"logits/chosen": -2.71079158782959,
|
|
"logits/rejected": -2.6190669536590576,
|
|
"logps/chosen": -240.28317260742188,
|
|
"logps/rejected": -208.4333953857422,
|
|
"loss": 0.6746,
|
|
"rewards/accuracies": 0.675000011920929,
|
|
"rewards/chosen": 0.05780891329050064,
|
|
"rewards/margins": 0.04101189970970154,
|
|
"rewards/rejected": 0.016797009855508804,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.6719462443004559,
|
|
"eval_logits/chosen": -2.7162351608276367,
|
|
"eval_logits/rejected": -2.595787763595581,
|
|
"eval_logps/chosen": -232.603759765625,
|
|
"eval_logps/rejected": -205.423583984375,
|
|
"eval_loss": 0.6737259030342102,
|
|
"eval_rewards/accuracies": 0.6549999713897705,
|
|
"eval_rewards/chosen": 0.052920494228601456,
|
|
"eval_rewards/margins": 0.04220106825232506,
|
|
"eval_rewards/rejected": 0.010719424113631248,
|
|
"eval_runtime": 21.4584,
|
|
"eval_samples_per_second": 46.602,
|
|
"eval_steps_per_second": 11.65,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.6767458603311735,
|
|
"grad_norm": 45.25,
|
|
"learning_rate": 1.429336670643929e-07,
|
|
"logits/chosen": -2.6878199577331543,
|
|
"logits/rejected": -2.5576183795928955,
|
|
"logps/chosen": -217.6654052734375,
|
|
"logps/rejected": -194.15072631835938,
|
|
"loss": 0.6747,
|
|
"rewards/accuracies": 0.65625,
|
|
"rewards/chosen": 0.04295631870627403,
|
|
"rewards/margins": 0.04018958657979965,
|
|
"rewards/rejected": 0.0027667314279824495,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 0.681545476361891,
|
|
"grad_norm": 44.25,
|
|
"learning_rate": 1.3916165596519013e-07,
|
|
"logits/chosen": -2.721832036972046,
|
|
"logits/rejected": -2.5464541912078857,
|
|
"logps/chosen": -230.22433471679688,
|
|
"logps/rejected": -185.80426025390625,
|
|
"loss": 0.673,
|
|
"rewards/accuracies": 0.6937500238418579,
|
|
"rewards/chosen": 0.0356689877808094,
|
|
"rewards/margins": 0.04347275570034981,
|
|
"rewards/rejected": -0.007803765125572681,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 0.6863450923926085,
|
|
"grad_norm": 50.75,
|
|
"learning_rate": 1.354207936021309e-07,
|
|
"logits/chosen": -2.7223222255706787,
|
|
"logits/rejected": -2.565199375152588,
|
|
"logps/chosen": -222.4684295654297,
|
|
"logps/rejected": -181.96670532226562,
|
|
"loss": 0.6756,
|
|
"rewards/accuracies": 0.65625,
|
|
"rewards/chosen": 0.0483887605369091,
|
|
"rewards/margins": 0.0376238189637661,
|
|
"rewards/rejected": 0.010764943435788155,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 0.6911447084233261,
|
|
"grad_norm": 56.0,
|
|
"learning_rate": 1.317121312642406e-07,
|
|
"logits/chosen": -2.712290048599243,
|
|
"logits/rejected": -2.5553765296936035,
|
|
"logps/chosen": -231.23538208007812,
|
|
"logps/rejected": -199.30477905273438,
|
|
"loss": 0.6737,
|
|
"rewards/accuracies": 0.675000011920929,
|
|
"rewards/chosen": 0.05653299763798714,
|
|
"rewards/margins": 0.04253407567739487,
|
|
"rewards/rejected": 0.013998927548527718,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 0.6959443244540436,
|
|
"grad_norm": 54.0,
|
|
"learning_rate": 1.280367111914195e-07,
|
|
"logits/chosen": -2.635277509689331,
|
|
"logits/rejected": -2.543097972869873,
|
|
"logps/chosen": -245.3563690185547,
|
|
"logps/rejected": -227.46142578125,
|
|
"loss": 0.6802,
|
|
"rewards/accuracies": 0.6312500238418579,
|
|
"rewards/chosen": 0.055252768099308014,
|
|
"rewards/margins": 0.029850680381059647,
|
|
"rewards/rejected": 0.02540207840502262,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 0.7007439404847612,
|
|
"grad_norm": 58.75,
|
|
"learning_rate": 1.243955662815429e-07,
|
|
"logits/chosen": -2.7177271842956543,
|
|
"logits/rejected": -2.5459141731262207,
|
|
"logps/chosen": -247.24038696289062,
|
|
"logps/rejected": -206.8587188720703,
|
|
"loss": 0.6777,
|
|
"rewards/accuracies": 0.668749988079071,
|
|
"rewards/chosen": 0.05328672379255295,
|
|
"rewards/margins": 0.03413508087396622,
|
|
"rewards/rejected": 0.01915164105594158,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 0.7055435565154787,
|
|
"grad_norm": 52.25,
|
|
"learning_rate": 1.207897198001878e-07,
|
|
"logits/chosen": -2.747087001800537,
|
|
"logits/rejected": -2.646921396255493,
|
|
"logps/chosen": -230.12109375,
|
|
"logps/rejected": -196.12423706054688,
|
|
"loss": 0.6732,
|
|
"rewards/accuracies": 0.6625000238418579,
|
|
"rewards/chosen": 0.050879109650850296,
|
|
"rewards/margins": 0.04289903864264488,
|
|
"rewards/rejected": 0.007980065420269966,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 0.7103431725461963,
|
|
"grad_norm": 50.0,
|
|
"learning_rate": 1.1722018509306586e-07,
|
|
"logits/chosen": -2.708061456680298,
|
|
"logits/rejected": -2.556723117828369,
|
|
"logps/chosen": -247.2672119140625,
|
|
"logps/rejected": -188.83200073242188,
|
|
"loss": 0.6704,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": 0.04851624369621277,
|
|
"rewards/margins": 0.04957341402769089,
|
|
"rewards/rejected": -0.001057169632986188,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 0.7151427885769138,
|
|
"grad_norm": 46.5,
|
|
"learning_rate": 1.1368796530124442e-07,
|
|
"logits/chosen": -2.671211004257202,
|
|
"logits/rejected": -2.543172597885132,
|
|
"logps/chosen": -249.50686645507812,
|
|
"logps/rejected": -195.772216796875,
|
|
"loss": 0.6652,
|
|
"rewards/accuracies": 0.731249988079071,
|
|
"rewards/chosen": 0.06298129260540009,
|
|
"rewards/margins": 0.05997220426797867,
|
|
"rewards/rejected": 0.0030090927612036467,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 0.7199424046076314,
|
|
"grad_norm": 48.25,
|
|
"learning_rate": 1.1019405307923557e-07,
|
|
"logits/chosen": -2.719313144683838,
|
|
"logits/rejected": -2.598017454147339,
|
|
"logps/chosen": -244.72067260742188,
|
|
"logps/rejected": -204.39401245117188,
|
|
"loss": 0.6727,
|
|
"rewards/accuracies": 0.675000011920929,
|
|
"rewards/chosen": 0.06084643676877022,
|
|
"rewards/margins": 0.04462161287665367,
|
|
"rewards/rejected": 0.016224823892116547,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.7247420206383489,
|
|
"grad_norm": 52.0,
|
|
"learning_rate": 1.0673943031603133e-07,
|
|
"logits/chosen": -2.7169058322906494,
|
|
"logits/rejected": -2.6187710762023926,
|
|
"logps/chosen": -221.7633819580078,
|
|
"logps/rejected": -206.36257934570312,
|
|
"loss": 0.6743,
|
|
"rewards/accuracies": 0.6625000238418579,
|
|
"rewards/chosen": 0.04818148910999298,
|
|
"rewards/margins": 0.04159141331911087,
|
|
"rewards/rejected": 0.006590074393898249,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 0.7295416366690665,
|
|
"grad_norm": 51.0,
|
|
"learning_rate": 1.0332506785916522e-07,
|
|
"logits/chosen": -2.690253496170044,
|
|
"logits/rejected": -2.5843067169189453,
|
|
"logps/chosen": -238.80850219726562,
|
|
"logps/rejected": -208.9747314453125,
|
|
"loss": 0.6821,
|
|
"rewards/accuracies": 0.6187499761581421,
|
|
"rewards/chosen": 0.04689895361661911,
|
|
"rewards/margins": 0.025392215698957443,
|
|
"rewards/rejected": 0.021506736055016518,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 0.734341252699784,
|
|
"grad_norm": 45.75,
|
|
"learning_rate": 9.995192524187637e-08,
|
|
"logits/chosen": -2.633424997329712,
|
|
"logits/rejected": -2.576991081237793,
|
|
"logps/chosen": -220.3133087158203,
|
|
"logps/rejected": -207.2361297607422,
|
|
"loss": 0.6806,
|
|
"rewards/accuracies": 0.6000000238418579,
|
|
"rewards/chosen": 0.04313893988728523,
|
|
"rewards/margins": 0.028983239084482193,
|
|
"rewards/rejected": 0.01415570080280304,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 0.7391408687305016,
|
|
"grad_norm": 47.25,
|
|
"learning_rate": 9.662095041345317e-08,
|
|
"logits/chosen": -2.6786739826202393,
|
|
"logits/rejected": -2.547990322113037,
|
|
"logps/chosen": -245.1093292236328,
|
|
"logps/rejected": -216.21823120117188,
|
|
"loss": 0.6736,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": 0.05186639353632927,
|
|
"rewards/margins": 0.042230743914842606,
|
|
"rewards/rejected": 0.00963564682751894,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 0.7439404847612191,
|
|
"grad_norm": 52.25,
|
|
"learning_rate": 9.333307947283256e-08,
|
|
"logits/chosen": -2.7363951206207275,
|
|
"logits/rejected": -2.621778964996338,
|
|
"logps/chosen": -240.5101318359375,
|
|
"logps/rejected": -212.27685546875,
|
|
"loss": 0.677,
|
|
"rewards/accuracies": 0.643750011920929,
|
|
"rewards/chosen": 0.04980180412530899,
|
|
"rewards/margins": 0.035508893430233,
|
|
"rewards/rejected": 0.014292912557721138,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 0.7487401007919366,
|
|
"grad_norm": 45.75,
|
|
"learning_rate": 9.008923640552978e-08,
|
|
"logits/chosen": -2.688732147216797,
|
|
"logits/rejected": -2.5987465381622314,
|
|
"logps/chosen": -210.93844604492188,
|
|
"logps/rejected": -178.03103637695312,
|
|
"loss": 0.6756,
|
|
"rewards/accuracies": 0.637499988079071,
|
|
"rewards/chosen": 0.03177894279360771,
|
|
"rewards/margins": 0.03770405799150467,
|
|
"rewards/rejected": -0.005925112869590521,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 0.7535397168226542,
|
|
"grad_norm": 49.25,
|
|
"learning_rate": 8.689033282397165e-08,
|
|
"logits/chosen": -2.717036485671997,
|
|
"logits/rejected": -2.59865140914917,
|
|
"logps/chosen": -228.39285278320312,
|
|
"logps/rejected": -202.19210815429688,
|
|
"loss": 0.6765,
|
|
"rewards/accuracies": 0.643750011920929,
|
|
"rewards/chosen": 0.04258617386221886,
|
|
"rewards/margins": 0.03610239177942276,
|
|
"rewards/rejected": 0.006483784876763821,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 0.7583393328533717,
|
|
"grad_norm": 44.25,
|
|
"learning_rate": 8.373726771130768e-08,
|
|
"logits/chosen": -2.7102208137512207,
|
|
"logits/rejected": -2.585137128829956,
|
|
"logps/chosen": -240.1363983154297,
|
|
"logps/rejected": -198.0478057861328,
|
|
"loss": 0.6694,
|
|
"rewards/accuracies": 0.6937500238418579,
|
|
"rewards/chosen": 0.058793772011995316,
|
|
"rewards/margins": 0.051263321191072464,
|
|
"rewards/rejected": 0.007530451752245426,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 0.7631389488840893,
|
|
"grad_norm": 54.5,
|
|
"learning_rate": 8.063092716877015e-08,
|
|
"logits/chosen": -2.654996871948242,
|
|
"logits/rejected": -2.5496888160705566,
|
|
"logps/chosen": -253.6233673095703,
|
|
"logps/rejected": -211.02017211914062,
|
|
"loss": 0.6746,
|
|
"rewards/accuracies": 0.6625000238418579,
|
|
"rewards/chosen": 0.05824859067797661,
|
|
"rewards/margins": 0.04030389338731766,
|
|
"rewards/rejected": 0.0179446954280138,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 0.7679385649148068,
|
|
"grad_norm": 46.75,
|
|
"learning_rate": 7.757218416665445e-08,
|
|
"logits/chosen": -2.745260715484619,
|
|
"logits/rejected": -2.5834543704986572,
|
|
"logps/chosen": -229.595703125,
|
|
"logps/rejected": -188.58816528320312,
|
|
"loss": 0.6678,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": 0.05159440636634827,
|
|
"rewards/margins": 0.054445721209049225,
|
|
"rewards/rejected": -0.00285131623968482,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.7679385649148068,
|
|
"eval_logits/chosen": -2.7164273262023926,
|
|
"eval_logits/rejected": -2.5960192680358887,
|
|
"eval_logps/chosen": -232.62025451660156,
|
|
"eval_logps/rejected": -205.42721557617188,
|
|
"eval_loss": 0.6742997169494629,
|
|
"eval_rewards/accuracies": 0.6629999876022339,
|
|
"eval_rewards/chosen": 0.051273249089717865,
|
|
"eval_rewards/margins": 0.04091595113277435,
|
|
"eval_rewards/rejected": 0.010357297956943512,
|
|
"eval_runtime": 21.4386,
|
|
"eval_samples_per_second": 46.645,
|
|
"eval_steps_per_second": 11.661,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.7727381809455244,
|
|
"grad_norm": 53.75,
|
|
"learning_rate": 7.456189829898954e-08,
|
|
"logits/chosen": -2.722618818283081,
|
|
"logits/rejected": -2.5636143684387207,
|
|
"logps/chosen": -236.95559692382812,
|
|
"logps/rejected": -187.6465301513672,
|
|
"loss": 0.6711,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": 0.05168156698346138,
|
|
"rewards/margins": 0.04733755439519882,
|
|
"rewards/rejected": 0.00434401398524642,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 0.7775377969762419,
|
|
"grad_norm": 47.5,
|
|
"learning_rate": 7.160091554196731e-08,
|
|
"logits/chosen": -2.7647414207458496,
|
|
"logits/rejected": -2.6293978691101074,
|
|
"logps/chosen": -234.0639190673828,
|
|
"logps/rejected": -196.3838653564453,
|
|
"loss": 0.673,
|
|
"rewards/accuracies": 0.5874999761581421,
|
|
"rewards/chosen": 0.051941949874162674,
|
|
"rewards/margins": 0.043656349182128906,
|
|
"rewards/rejected": 0.008285606279969215,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 0.7823374130069595,
|
|
"grad_norm": 46.75,
|
|
"learning_rate": 6.86900680161994e-08,
|
|
"logits/chosen": -2.6866321563720703,
|
|
"logits/rejected": -2.612730026245117,
|
|
"logps/chosen": -240.7451171875,
|
|
"logps/rejected": -228.06771850585938,
|
|
"loss": 0.6801,
|
|
"rewards/accuracies": 0.612500011920929,
|
|
"rewards/chosen": 0.05514489486813545,
|
|
"rewards/margins": 0.029478853568434715,
|
|
"rewards/rejected": 0.025666039437055588,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 0.787137029037677,
|
|
"grad_norm": 41.75,
|
|
"learning_rate": 6.583017375286726e-08,
|
|
"logits/chosen": -2.695338726043701,
|
|
"logits/rejected": -2.5727577209472656,
|
|
"logps/chosen": -229.021728515625,
|
|
"logps/rejected": -195.28890991210938,
|
|
"loss": 0.6761,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": 0.050636857748031616,
|
|
"rewards/margins": 0.03745696693658829,
|
|
"rewards/rejected": 0.01317988894879818,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 0.7919366450683946,
|
|
"grad_norm": 48.0,
|
|
"learning_rate": 6.302203646383239e-08,
|
|
"logits/chosen": -2.71480131149292,
|
|
"logits/rejected": -2.6168365478515625,
|
|
"logps/chosen": -241.9342041015625,
|
|
"logps/rejected": -196.26498413085938,
|
|
"loss": 0.6745,
|
|
"rewards/accuracies": 0.612500011920929,
|
|
"rewards/chosen": 0.047247517853975296,
|
|
"rewards/margins": 0.04049244523048401,
|
|
"rewards/rejected": 0.006755062844604254,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 0.7967362610991121,
|
|
"grad_norm": 50.0,
|
|
"learning_rate": 6.02664453157703e-08,
|
|
"logits/chosen": -2.764697790145874,
|
|
"logits/rejected": -2.6510090827941895,
|
|
"logps/chosen": -232.6202850341797,
|
|
"logps/rejected": -214.68887329101562,
|
|
"loss": 0.6789,
|
|
"rewards/accuracies": 0.643750011920929,
|
|
"rewards/chosen": 0.04804060235619545,
|
|
"rewards/margins": 0.031984902918338776,
|
|
"rewards/rejected": 0.016055695712566376,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 0.8015358771298297,
|
|
"grad_norm": 39.25,
|
|
"learning_rate": 5.756417470839195e-08,
|
|
"logits/chosen": -2.7477545738220215,
|
|
"logits/rejected": -2.6470861434936523,
|
|
"logps/chosen": -226.798828125,
|
|
"logps/rejected": -196.84617614746094,
|
|
"loss": 0.6756,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": 0.04336618259549141,
|
|
"rewards/margins": 0.03806891292333603,
|
|
"rewards/rejected": 0.005297265015542507,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 0.8063354931605472,
|
|
"grad_norm": 46.0,
|
|
"learning_rate": 5.491598405681558e-08,
|
|
"logits/chosen": -2.7781832218170166,
|
|
"logits/rejected": -2.594255208969116,
|
|
"logps/chosen": -244.9146728515625,
|
|
"logps/rejected": -191.92153930664062,
|
|
"loss": 0.6696,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": 0.05770384520292282,
|
|
"rewards/margins": 0.05054632946848869,
|
|
"rewards/rejected": 0.007157514337450266,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 0.8111351091912647,
|
|
"grad_norm": 46.25,
|
|
"learning_rate": 5.232261757814924e-08,
|
|
"logits/chosen": -2.676637649536133,
|
|
"logits/rejected": -2.5312628746032715,
|
|
"logps/chosen": -239.4225616455078,
|
|
"logps/rejected": -201.6903076171875,
|
|
"loss": 0.6665,
|
|
"rewards/accuracies": 0.706250011920929,
|
|
"rewards/chosen": 0.06439922004938126,
|
|
"rewards/margins": 0.0572139136493206,
|
|
"rewards/rejected": 0.007185307331383228,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 0.8159347252219823,
|
|
"grad_norm": 48.0,
|
|
"learning_rate": 4.978480408234465e-08,
|
|
"logits/chosen": -2.6256635189056396,
|
|
"logits/rejected": -2.5944604873657227,
|
|
"logps/chosen": -213.68728637695312,
|
|
"logps/rejected": -203.4552459716797,
|
|
"loss": 0.6819,
|
|
"rewards/accuracies": 0.5625,
|
|
"rewards/chosen": 0.035785894840955734,
|
|
"rewards/margins": 0.025840366259217262,
|
|
"rewards/rejected": 0.00994553230702877,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 0.8207343412526998,
|
|
"grad_norm": 49.0,
|
|
"learning_rate": 4.730325676738089e-08,
|
|
"logits/chosen": -2.7068257331848145,
|
|
"logits/rejected": -2.5982439517974854,
|
|
"logps/chosen": -227.2494354248047,
|
|
"logps/rejected": -194.36875915527344,
|
|
"loss": 0.6773,
|
|
"rewards/accuracies": 0.6312500238418579,
|
|
"rewards/chosen": 0.041812531650066376,
|
|
"rewards/margins": 0.0348488949239254,
|
|
"rewards/rejected": 0.006963637657463551,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 0.8255339572834173,
|
|
"grad_norm": 47.0,
|
|
"learning_rate": 4.487867301883527e-08,
|
|
"logits/chosen": -2.6358511447906494,
|
|
"logits/rejected": -2.5312399864196777,
|
|
"logps/chosen": -222.84445190429688,
|
|
"logps/rejected": -203.24679565429688,
|
|
"loss": 0.6697,
|
|
"rewards/accuracies": 0.668749988079071,
|
|
"rewards/chosen": 0.05580927059054375,
|
|
"rewards/margins": 0.04995386302471161,
|
|
"rewards/rejected": 0.005855409428477287,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 0.8303335733141348,
|
|
"grad_norm": 45.75,
|
|
"learning_rate": 4.2511734213898085e-08,
|
|
"logits/chosen": -2.7530338764190674,
|
|
"logits/rejected": -2.6062283515930176,
|
|
"logps/chosen": -251.0270233154297,
|
|
"logps/rejected": -206.8340301513672,
|
|
"loss": 0.6759,
|
|
"rewards/accuracies": 0.6187499761581421,
|
|
"rewards/chosen": 0.0477396696805954,
|
|
"rewards/margins": 0.038779519498348236,
|
|
"rewards/rejected": 0.008960146456956863,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 0.8351331893448524,
|
|
"grad_norm": 59.75,
|
|
"learning_rate": 4.020310552988632e-08,
|
|
"logits/chosen": -2.726264476776123,
|
|
"logits/rejected": -2.5524630546569824,
|
|
"logps/chosen": -247.4801788330078,
|
|
"logps/rejected": -204.47012329101562,
|
|
"loss": 0.67,
|
|
"rewards/accuracies": 0.65625,
|
|
"rewards/chosen": 0.0596492774784565,
|
|
"rewards/margins": 0.04989113658666611,
|
|
"rewards/rejected": 0.009758138097822666,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 0.8399328053755699,
|
|
"grad_norm": 48.5,
|
|
"learning_rate": 3.795343575730975e-08,
|
|
"logits/chosen": -2.7325968742370605,
|
|
"logits/rejected": -2.602531671524048,
|
|
"logps/chosen": -251.66342163085938,
|
|
"logps/rejected": -216.50717163085938,
|
|
"loss": 0.6805,
|
|
"rewards/accuracies": 0.637499988079071,
|
|
"rewards/chosen": 0.052908021956682205,
|
|
"rewards/margins": 0.029805365949869156,
|
|
"rewards/rejected": 0.0231026578694582,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 0.8447324214062875,
|
|
"grad_norm": 50.75,
|
|
"learning_rate": 3.576335711754236e-08,
|
|
"logits/chosen": -2.7325785160064697,
|
|
"logits/rejected": -2.6831870079040527,
|
|
"logps/chosen": -234.35336303710938,
|
|
"logps/rejected": -216.2118682861328,
|
|
"loss": 0.6801,
|
|
"rewards/accuracies": 0.606249988079071,
|
|
"rewards/chosen": 0.04432320594787598,
|
|
"rewards/margins": 0.028889168053865433,
|
|
"rewards/rejected": 0.015434036031365395,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 0.849532037437005,
|
|
"grad_norm": 52.25,
|
|
"learning_rate": 3.363348508515015e-08,
|
|
"logits/chosen": -2.7496337890625,
|
|
"logits/rejected": -2.6104989051818848,
|
|
"logps/chosen": -236.09640502929688,
|
|
"logps/rejected": -209.546875,
|
|
"loss": 0.6741,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": 0.05187439173460007,
|
|
"rewards/margins": 0.041690729558467865,
|
|
"rewards/rejected": 0.010183664970099926,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 0.8543316534677226,
|
|
"grad_norm": 52.0,
|
|
"learning_rate": 3.156441821492506e-08,
|
|
"logits/chosen": -2.709050416946411,
|
|
"logits/rejected": -2.584873676300049,
|
|
"logps/chosen": -234.66232299804688,
|
|
"logps/rejected": -206.11624145507812,
|
|
"loss": 0.6738,
|
|
"rewards/accuracies": 0.637499988079071,
|
|
"rewards/chosen": 0.05997660011053085,
|
|
"rewards/margins": 0.04241427406668663,
|
|
"rewards/rejected": 0.017562326043844223,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 0.8591312694984401,
|
|
"grad_norm": 48.0,
|
|
"learning_rate": 2.955673797367411e-08,
|
|
"logits/chosen": -2.7106432914733887,
|
|
"logits/rejected": -2.567945718765259,
|
|
"logps/chosen": -238.9900665283203,
|
|
"logps/rejected": -189.52700805664062,
|
|
"loss": 0.6738,
|
|
"rewards/accuracies": 0.6625000238418579,
|
|
"rewards/chosen": 0.052786171436309814,
|
|
"rewards/margins": 0.04187396913766861,
|
|
"rewards/rejected": 0.01091220136731863,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 0.8639308855291576,
|
|
"grad_norm": 49.75,
|
|
"learning_rate": 2.7611008576810674e-08,
|
|
"logits/chosen": -2.724682331085205,
|
|
"logits/rejected": -2.603818416595459,
|
|
"logps/chosen": -215.18359375,
|
|
"logps/rejected": -187.88693237304688,
|
|
"loss": 0.6833,
|
|
"rewards/accuracies": 0.606249988079071,
|
|
"rewards/chosen": 0.03471002355217934,
|
|
"rewards/margins": 0.02330438420176506,
|
|
"rewards/rejected": 0.011405635625123978,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.8639308855291576,
|
|
"eval_logits/chosen": -2.716348886489868,
|
|
"eval_logits/rejected": -2.595895290374756,
|
|
"eval_logps/chosen": -232.60520935058594,
|
|
"eval_logps/rejected": -205.4226837158203,
|
|
"eval_loss": 0.673865795135498,
|
|
"eval_rewards/accuracies": 0.6620000004768372,
|
|
"eval_rewards/chosen": 0.05277761444449425,
|
|
"eval_rewards/margins": 0.041967809200286865,
|
|
"eval_rewards/rejected": 0.010809808038175106,
|
|
"eval_runtime": 21.4412,
|
|
"eval_samples_per_second": 46.639,
|
|
"eval_steps_per_second": 11.66,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.8687305015598752,
|
|
"grad_norm": 53.5,
|
|
"learning_rate": 2.5727776829793767e-08,
|
|
"logits/chosen": -2.740374803543091,
|
|
"logits/rejected": -2.5743534564971924,
|
|
"logps/chosen": -236.7235565185547,
|
|
"logps/rejected": -174.8079833984375,
|
|
"loss": 0.6725,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": 0.045521851629018784,
|
|
"rewards/margins": 0.04434143006801605,
|
|
"rewards/rejected": 0.0011804220266640186,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 0.8735301175905927,
|
|
"grad_norm": 52.75,
|
|
"learning_rate": 2.390757197446025e-08,
|
|
"logits/chosen": -2.721191883087158,
|
|
"logits/rejected": -2.5652270317077637,
|
|
"logps/chosen": -240.0341339111328,
|
|
"logps/rejected": -187.92848205566406,
|
|
"loss": 0.6739,
|
|
"rewards/accuracies": 0.668749988079071,
|
|
"rewards/chosen": 0.05156964808702469,
|
|
"rewards/margins": 0.041832335293293,
|
|
"rewards/rejected": 0.009737305343151093,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 0.8783297336213103,
|
|
"grad_norm": 50.0,
|
|
"learning_rate": 2.2150905540292585e-08,
|
|
"logits/chosen": -2.736666679382324,
|
|
"logits/rejected": -2.5938286781311035,
|
|
"logps/chosen": -227.4267578125,
|
|
"logps/rejected": -202.85935974121094,
|
|
"loss": 0.6757,
|
|
"rewards/accuracies": 0.612500011920929,
|
|
"rewards/chosen": 0.054040707647800446,
|
|
"rewards/margins": 0.03809930384159088,
|
|
"rewards/rejected": 0.015941400080919266,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 0.8831293496520278,
|
|
"grad_norm": 46.25,
|
|
"learning_rate": 2.0458271200664624e-08,
|
|
"logits/chosen": -2.6549439430236816,
|
|
"logits/rejected": -2.612755537033081,
|
|
"logps/chosen": -210.38265991210938,
|
|
"logps/rejected": -196.44735717773438,
|
|
"loss": 0.6762,
|
|
"rewards/accuracies": 0.6312500238418579,
|
|
"rewards/chosen": 0.04778756946325302,
|
|
"rewards/margins": 0.0375523678958416,
|
|
"rewards/rejected": 0.01023520715534687,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 0.8879289656827454,
|
|
"grad_norm": 48.0,
|
|
"learning_rate": 1.8830144634105206e-08,
|
|
"logits/chosen": -2.7017998695373535,
|
|
"logits/rejected": -2.5428473949432373,
|
|
"logps/chosen": -245.57290649414062,
|
|
"logps/rejected": -186.67721557617188,
|
|
"loss": 0.6703,
|
|
"rewards/accuracies": 0.699999988079071,
|
|
"rewards/chosen": 0.061512064188718796,
|
|
"rewards/margins": 0.04928315803408623,
|
|
"rewards/rejected": 0.012228906154632568,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 0.8927285817134629,
|
|
"grad_norm": 46.5,
|
|
"learning_rate": 1.7266983390618994e-08,
|
|
"logits/chosen": -2.6695878505706787,
|
|
"logits/rejected": -2.551301956176758,
|
|
"logps/chosen": -227.3214111328125,
|
|
"logps/rejected": -186.639404296875,
|
|
"loss": 0.6687,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": 0.0619601309299469,
|
|
"rewards/margins": 0.05267205834388733,
|
|
"rewards/rejected": 0.009288066066801548,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 0.8975281977441805,
|
|
"grad_norm": 44.5,
|
|
"learning_rate": 1.5769226763101885e-08,
|
|
"logits/chosen": -2.60475492477417,
|
|
"logits/rejected": -2.5645339488983154,
|
|
"logps/chosen": -228.18612670898438,
|
|
"logps/rejected": -207.1147003173828,
|
|
"loss": 0.6772,
|
|
"rewards/accuracies": 0.6000000238418579,
|
|
"rewards/chosen": 0.038809359073638916,
|
|
"rewards/margins": 0.03571794182062149,
|
|
"rewards/rejected": 0.0030914172530174255,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 0.902327813774898,
|
|
"grad_norm": 47.0,
|
|
"learning_rate": 1.4337295663887084e-08,
|
|
"logits/chosen": -2.763521194458008,
|
|
"logits/rejected": -2.614365339279175,
|
|
"logps/chosen": -238.0912628173828,
|
|
"logps/rejected": -187.44529724121094,
|
|
"loss": 0.6705,
|
|
"rewards/accuracies": 0.7124999761581421,
|
|
"rewards/chosen": 0.06117742136120796,
|
|
"rewards/margins": 0.04880157858133316,
|
|
"rewards/rejected": 0.012375839985907078,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 0.9071274298056156,
|
|
"grad_norm": 52.75,
|
|
"learning_rate": 1.2971592506456796e-08,
|
|
"logits/chosen": -2.6662535667419434,
|
|
"logits/rejected": -2.585869550704956,
|
|
"logps/chosen": -203.06375122070312,
|
|
"logps/rejected": -187.27978515625,
|
|
"loss": 0.6761,
|
|
"rewards/accuracies": 0.6625000238418579,
|
|
"rewards/chosen": 0.04065801948308945,
|
|
"rewards/margins": 0.03694169595837593,
|
|
"rewards/rejected": 0.0037163265515118837,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 0.9119270458363331,
|
|
"grad_norm": 46.75,
|
|
"learning_rate": 1.1672501092352544e-08,
|
|
"logits/chosen": -2.7174623012542725,
|
|
"logits/rejected": -2.5767555236816406,
|
|
"logps/chosen": -239.62747192382812,
|
|
"logps/rejected": -203.18661499023438,
|
|
"loss": 0.6694,
|
|
"rewards/accuracies": 0.668749988079071,
|
|
"rewards/chosen": 0.05886067822575569,
|
|
"rewards/margins": 0.05083751678466797,
|
|
"rewards/rejected": 0.008023159578442574,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 0.9167266618670507,
|
|
"grad_norm": 50.5,
|
|
"learning_rate": 1.0440386503315967e-08,
|
|
"logits/chosen": -2.638658046722412,
|
|
"logits/rejected": -2.565709114074707,
|
|
"logps/chosen": -222.6656494140625,
|
|
"logps/rejected": -242.17562866210938,
|
|
"loss": 0.6788,
|
|
"rewards/accuracies": 0.6312500238418579,
|
|
"rewards/chosen": 0.049609534442424774,
|
|
"rewards/margins": 0.0319090262055397,
|
|
"rewards/rejected": 0.01770050823688507,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 0.9215262778977682,
|
|
"grad_norm": 49.25,
|
|
"learning_rate": 9.275594998690573e-09,
|
|
"logits/chosen": -2.688535690307617,
|
|
"logits/rejected": -2.516364574432373,
|
|
"logps/chosen": -248.7972412109375,
|
|
"logps/rejected": -191.66940307617188,
|
|
"loss": 0.6685,
|
|
"rewards/accuracies": 0.7250000238418579,
|
|
"rewards/chosen": 0.0669298768043518,
|
|
"rewards/margins": 0.052894193679094315,
|
|
"rewards/rejected": 0.014035684056580067,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 0.9263258939284857,
|
|
"grad_norm": 50.75,
|
|
"learning_rate": 8.178453918112782e-09,
|
|
"logits/chosen": -2.695676326751709,
|
|
"logits/rejected": -2.564342737197876,
|
|
"logps/chosen": -223.0631561279297,
|
|
"logps/rejected": -179.12435913085938,
|
|
"loss": 0.6699,
|
|
"rewards/accuracies": 0.6812499761581421,
|
|
"rewards/chosen": 0.04940425604581833,
|
|
"rewards/margins": 0.049749527126550674,
|
|
"rewards/rejected": -0.00034527387470006943,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 0.9311255099592033,
|
|
"grad_norm": 64.5,
|
|
"learning_rate": 7.149271589520167e-09,
|
|
"logits/chosen": -2.655266523361206,
|
|
"logits/rejected": -2.529818058013916,
|
|
"logps/chosen": -209.69650268554688,
|
|
"logps/rejected": -191.54139709472656,
|
|
"loss": 0.6801,
|
|
"rewards/accuracies": 0.643750011920929,
|
|
"rewards/chosen": 0.03373004496097565,
|
|
"rewards/margins": 0.029740754514932632,
|
|
"rewards/rejected": 0.0039892946369946,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 0.9359251259899208,
|
|
"grad_norm": 40.75,
|
|
"learning_rate": 6.188337242502784e-09,
|
|
"logits/chosen": -2.6980903148651123,
|
|
"logits/rejected": -2.5514559745788574,
|
|
"logps/chosen": -236.17300415039062,
|
|
"logps/rejected": -191.43814086914062,
|
|
"loss": 0.6731,
|
|
"rewards/accuracies": 0.6812499761581421,
|
|
"rewards/chosen": 0.051222801208496094,
|
|
"rewards/margins": 0.04344618320465088,
|
|
"rewards/rejected": 0.007776615209877491,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 0.9407247420206384,
|
|
"grad_norm": 43.5,
|
|
"learning_rate": 5.295920927021108e-09,
|
|
"logits/chosen": -2.7303788661956787,
|
|
"logits/rejected": -2.624377727508545,
|
|
"logps/chosen": -233.98922729492188,
|
|
"logps/rejected": -197.11575317382812,
|
|
"loss": 0.6741,
|
|
"rewards/accuracies": 0.6812499761581421,
|
|
"rewards/chosen": 0.05416171997785568,
|
|
"rewards/margins": 0.04171646013855934,
|
|
"rewards/rejected": 0.01244526170194149,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 0.9455243580513559,
|
|
"grad_norm": 53.0,
|
|
"learning_rate": 4.472273437514357e-09,
|
|
"logits/chosen": -2.7538888454437256,
|
|
"logits/rejected": -2.6263771057128906,
|
|
"logps/chosen": -257.5274353027344,
|
|
"logps/rejected": -210.43649291992188,
|
|
"loss": 0.6698,
|
|
"rewards/accuracies": 0.699999988079071,
|
|
"rewards/chosen": 0.0656302273273468,
|
|
"rewards/margins": 0.050034552812576294,
|
|
"rewards/rejected": 0.015595669858157635,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 0.9503239740820735,
|
|
"grad_norm": 46.75,
|
|
"learning_rate": 3.7176262424202522e-09,
|
|
"logits/chosen": -2.710458278656006,
|
|
"logits/rejected": -2.611675262451172,
|
|
"logps/chosen": -221.28195190429688,
|
|
"logps/rejected": -202.12786865234375,
|
|
"loss": 0.6825,
|
|
"rewards/accuracies": 0.6187499761581421,
|
|
"rewards/chosen": 0.03624237701296806,
|
|
"rewards/margins": 0.023813677951693535,
|
|
"rewards/rejected": 0.012428699992597103,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 0.955123590112791,
|
|
"grad_norm": 51.25,
|
|
"learning_rate": 3.0321914191255292e-09,
|
|
"logits/chosen": -2.684296131134033,
|
|
"logits/rejected": -2.57779598236084,
|
|
"logps/chosen": -241.6301727294922,
|
|
"logps/rejected": -214.57138061523438,
|
|
"loss": 0.6789,
|
|
"rewards/accuracies": 0.6312500238418579,
|
|
"rewards/chosen": 0.044890034943819046,
|
|
"rewards/margins": 0.03138625621795654,
|
|
"rewards/rejected": 0.013503775000572205,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 0.9599232061435086,
|
|
"grad_norm": 45.25,
|
|
"learning_rate": 2.416161594366417e-09,
|
|
"logits/chosen": -2.744062900543213,
|
|
"logits/rejected": -2.644768238067627,
|
|
"logps/chosen": -218.0794677734375,
|
|
"logps/rejected": -204.07879638671875,
|
|
"loss": 0.6743,
|
|
"rewards/accuracies": 0.6625000238418579,
|
|
"rewards/chosen": 0.05610308051109314,
|
|
"rewards/margins": 0.04027719795703888,
|
|
"rewards/rejected": 0.015825878828763962,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.9599232061435086,
|
|
"eval_logits/chosen": -2.716303825378418,
|
|
"eval_logits/rejected": -2.5958354473114014,
|
|
"eval_logps/chosen": -232.603271484375,
|
|
"eval_logps/rejected": -205.41600036621094,
|
|
"eval_loss": 0.6740667223930359,
|
|
"eval_rewards/accuracies": 0.6449999809265137,
|
|
"eval_rewards/chosen": 0.05297102406620979,
|
|
"eval_rewards/margins": 0.04149361699819565,
|
|
"eval_rewards/rejected": 0.011477403342723846,
|
|
"eval_runtime": 21.4581,
|
|
"eval_samples_per_second": 46.602,
|
|
"eval_steps_per_second": 11.651,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.9647228221742261,
|
|
"grad_norm": 46.75,
|
|
"learning_rate": 1.8697098900948283e-09,
|
|
"logits/chosen": -2.670266628265381,
|
|
"logits/rejected": -2.5685760974884033,
|
|
"logps/chosen": -225.6419219970703,
|
|
"logps/rejected": -218.05624389648438,
|
|
"loss": 0.6796,
|
|
"rewards/accuracies": 0.606249988079071,
|
|
"rewards/chosen": 0.04892081022262573,
|
|
"rewards/margins": 0.0300876684486866,
|
|
"rewards/rejected": 0.01883314736187458,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 0.9695224382049437,
|
|
"grad_norm": 46.5,
|
|
"learning_rate": 1.3929898748261948e-09,
|
|
"logits/chosen": -2.751359224319458,
|
|
"logits/rejected": -2.6107022762298584,
|
|
"logps/chosen": -228.06881713867188,
|
|
"logps/rejected": -205.7585906982422,
|
|
"loss": 0.6756,
|
|
"rewards/accuracies": 0.643750011920929,
|
|
"rewards/chosen": 0.042551226913928986,
|
|
"rewards/margins": 0.03797770291566849,
|
|
"rewards/rejected": 0.004573523066937923,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 0.9743220542356611,
|
|
"grad_norm": 50.0,
|
|
"learning_rate": 9.861355204825172e-10,
|
|
"logits/chosen": -2.7200140953063965,
|
|
"logits/rejected": -2.597716808319092,
|
|
"logps/chosen": -256.5636291503906,
|
|
"logps/rejected": -198.85813903808594,
|
|
"loss": 0.6759,
|
|
"rewards/accuracies": 0.6000000238418579,
|
|
"rewards/chosen": 0.05483978986740112,
|
|
"rewards/margins": 0.03850018233060837,
|
|
"rewards/rejected": 0.016339603811502457,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 0.9791216702663786,
|
|
"grad_norm": 47.25,
|
|
"learning_rate": 6.492611647420932e-10,
|
|
"logits/chosen": -2.6937568187713623,
|
|
"logits/rejected": -2.550854206085205,
|
|
"logps/chosen": -227.3085479736328,
|
|
"logps/rejected": -193.7042694091797,
|
|
"loss": 0.6746,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": 0.0502045638859272,
|
|
"rewards/margins": 0.040242839604616165,
|
|
"rewards/rejected": 0.009961729869246483,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 0.9839212862970962,
|
|
"grad_norm": 50.0,
|
|
"learning_rate": 3.8246147890763636e-10,
|
|
"logits/chosen": -2.7479116916656494,
|
|
"logits/rejected": -2.6015403270721436,
|
|
"logps/chosen": -243.7687530517578,
|
|
"logps/rejected": -200.82359313964844,
|
|
"loss": 0.6735,
|
|
"rewards/accuracies": 0.668749988079071,
|
|
"rewards/chosen": 0.05925138667225838,
|
|
"rewards/margins": 0.042620036751031876,
|
|
"rewards/rejected": 0.016631346195936203,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 0.9887209023278137,
|
|
"grad_norm": 44.25,
|
|
"learning_rate": 1.8581144130089266e-10,
|
|
"logits/chosen": -2.674731731414795,
|
|
"logits/rejected": -2.6031594276428223,
|
|
"logps/chosen": -220.9689483642578,
|
|
"logps/rejected": -206.5128173828125,
|
|
"loss": 0.6804,
|
|
"rewards/accuracies": 0.637499988079071,
|
|
"rewards/chosen": 0.046965621411800385,
|
|
"rewards/margins": 0.028399232774972916,
|
|
"rewards/rejected": 0.018566394224762917,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 0.9935205183585313,
|
|
"grad_norm": 45.25,
|
|
"learning_rate": 5.936631619152255e-11,
|
|
"logits/chosen": -2.718005895614624,
|
|
"logits/rejected": -2.6375200748443604,
|
|
"logps/chosen": -232.222900390625,
|
|
"logps/rejected": -203.7240753173828,
|
|
"loss": 0.6763,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": 0.05353207513689995,
|
|
"rewards/margins": 0.03654414042830467,
|
|
"rewards/rejected": 0.016987936571240425,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 0.9983201343892488,
|
|
"grad_norm": 49.25,
|
|
"learning_rate": 3.1616382663024467e-12,
|
|
"logits/chosen": -2.7572877407073975,
|
|
"logits/rejected": -2.611027956008911,
|
|
"logps/chosen": -232.747314453125,
|
|
"logps/rejected": -208.55935668945312,
|
|
"loss": 0.6791,
|
|
"rewards/accuracies": 0.6312500238418579,
|
|
"rewards/chosen": 0.04861464723944664,
|
|
"rewards/margins": 0.03146715834736824,
|
|
"rewards/rejected": 0.0171474888920784,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 0.9997600191984641,
|
|
"step": 2083,
|
|
"total_flos": 0.0,
|
|
"train_loss": 0.6792905164692074,
|
|
"train_runtime": 2163.7863,
|
|
"train_samples_per_second": 15.405,
|
|
"train_steps_per_second": 0.963
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 2083,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 200,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 4,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|