3049 lines
102 KiB
JSON
3049 lines
102 KiB
JSON
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 1.0,
|
|
"eval_steps": 100,
|
|
"global_step": 1937,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 2.5773195876288657e-09,
|
|
"logits/chosen": -2.3950748443603516,
|
|
"logits/rejected": -2.440203905105591,
|
|
"logps/chosen": -134.41867065429688,
|
|
"logps/rejected": -130.99615478515625,
|
|
"loss": 0.6931,
|
|
"rewards/accuracies": 0.0,
|
|
"rewards/chosen": 0.0,
|
|
"rewards/margins": 0.0,
|
|
"rewards/rejected": 0.0,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 2.5773195876288656e-08,
|
|
"logits/chosen": -2.905548095703125,
|
|
"logits/rejected": -2.900480031967163,
|
|
"logps/chosen": -333.7513427734375,
|
|
"logps/rejected": -256.0840148925781,
|
|
"loss": 0.693,
|
|
"rewards/accuracies": 0.4444444477558136,
|
|
"rewards/chosen": 0.0011086573358625174,
|
|
"rewards/margins": 0.0019541841465979815,
|
|
"rewards/rejected": -0.0008455271599814296,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 5.154639175257731e-08,
|
|
"logits/chosen": -2.7942252159118652,
|
|
"logits/rejected": -2.5809876918792725,
|
|
"logps/chosen": -318.5552673339844,
|
|
"logps/rejected": -244.302734375,
|
|
"loss": 0.6874,
|
|
"rewards/accuracies": 0.6000000238418579,
|
|
"rewards/chosen": 0.007939901202917099,
|
|
"rewards/margins": 0.012212857604026794,
|
|
"rewards/rejected": -0.004272956866770983,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 7.731958762886598e-08,
|
|
"logits/chosen": -2.95878267288208,
|
|
"logits/rejected": -2.914288282394409,
|
|
"logps/chosen": -328.8043518066406,
|
|
"logps/rejected": -237.27151489257812,
|
|
"loss": 0.6807,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": 0.042353544384241104,
|
|
"rewards/margins": 0.013048592023551464,
|
|
"rewards/rejected": 0.029304955154657364,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 1.0309278350515462e-07,
|
|
"logits/chosen": -2.766305685043335,
|
|
"logits/rejected": -2.8168842792510986,
|
|
"logps/chosen": -298.6869812011719,
|
|
"logps/rejected": -283.5751037597656,
|
|
"loss": 0.6611,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": 0.053902607411146164,
|
|
"rewards/margins": 0.043827421963214874,
|
|
"rewards/rejected": 0.010075189173221588,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.2886597938144328e-07,
|
|
"logits/chosen": -2.940128803253174,
|
|
"logits/rejected": -2.9153294563293457,
|
|
"logps/chosen": -353.18408203125,
|
|
"logps/rejected": -278.1434326171875,
|
|
"loss": 0.6488,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": 0.10765328258275986,
|
|
"rewards/margins": 0.15659113228321075,
|
|
"rewards/rejected": -0.0489378497004509,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 1.5463917525773197e-07,
|
|
"logits/chosen": -2.9204277992248535,
|
|
"logits/rejected": -2.847609281539917,
|
|
"logps/chosen": -337.5159912109375,
|
|
"logps/rejected": -260.6918029785156,
|
|
"loss": 0.6253,
|
|
"rewards/accuracies": 0.8999999761581421,
|
|
"rewards/chosen": 0.17934152483940125,
|
|
"rewards/margins": 0.402275413274765,
|
|
"rewards/rejected": -0.22293388843536377,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 1.804123711340206e-07,
|
|
"logits/chosen": -2.9181199073791504,
|
|
"logits/rejected": -2.883514881134033,
|
|
"logps/chosen": -402.8895568847656,
|
|
"logps/rejected": -262.24609375,
|
|
"loss": 0.6168,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": 0.14267697930335999,
|
|
"rewards/margins": 0.45374640822410583,
|
|
"rewards/rejected": -0.3110693693161011,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 2.0618556701030925e-07,
|
|
"logits/chosen": -3.0056729316711426,
|
|
"logits/rejected": -2.903585433959961,
|
|
"logps/chosen": -391.81524658203125,
|
|
"logps/rejected": -263.700439453125,
|
|
"loss": 0.5578,
|
|
"rewards/accuracies": 0.699999988079071,
|
|
"rewards/chosen": -0.3499735891819,
|
|
"rewards/margins": 0.6209262609481812,
|
|
"rewards/rejected": -0.970899760723114,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 2.3195876288659794e-07,
|
|
"logits/chosen": -2.8997607231140137,
|
|
"logits/rejected": -2.787290573120117,
|
|
"logps/chosen": -333.6437072753906,
|
|
"logps/rejected": -248.12149047851562,
|
|
"loss": 0.5714,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.30834370851516724,
|
|
"rewards/margins": 0.7723485231399536,
|
|
"rewards/rejected": -1.0806924104690552,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 2.5773195876288655e-07,
|
|
"logits/chosen": -2.729923725128174,
|
|
"logits/rejected": -2.668921947479248,
|
|
"logps/chosen": -299.4058837890625,
|
|
"logps/rejected": -299.69464111328125,
|
|
"loss": 0.5602,
|
|
"rewards/accuracies": 0.699999988079071,
|
|
"rewards/chosen": -0.3236086368560791,
|
|
"rewards/margins": 0.48320120573043823,
|
|
"rewards/rejected": -0.8068099021911621,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"eval_logits/chosen": -2.679702043533325,
|
|
"eval_logits/rejected": -2.655423879623413,
|
|
"eval_logps/chosen": -293.716064453125,
|
|
"eval_logps/rejected": -306.2606506347656,
|
|
"eval_loss": 0.5588741302490234,
|
|
"eval_rewards/accuracies": 0.71875,
|
|
"eval_rewards/chosen": -0.3358975350856781,
|
|
"eval_rewards/margins": 0.48091739416122437,
|
|
"eval_rewards/rejected": -0.8168148994445801,
|
|
"eval_runtime": 59.9141,
|
|
"eval_samples_per_second": 16.691,
|
|
"eval_steps_per_second": 0.267,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 2.835051546391752e-07,
|
|
"logits/chosen": -2.91627836227417,
|
|
"logits/rejected": -2.970510482788086,
|
|
"logps/chosen": -383.3172302246094,
|
|
"logps/rejected": -347.4027404785156,
|
|
"loss": 0.5764,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.4358394742012024,
|
|
"rewards/margins": 0.6925853490829468,
|
|
"rewards/rejected": -1.128424882888794,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 3.0927835051546394e-07,
|
|
"logits/chosen": -2.760704517364502,
|
|
"logits/rejected": -2.690364360809326,
|
|
"logps/chosen": -230.55453491210938,
|
|
"logps/rejected": -258.10711669921875,
|
|
"loss": 0.5352,
|
|
"rewards/accuracies": 0.550000011920929,
|
|
"rewards/chosen": -0.5467896461486816,
|
|
"rewards/margins": 0.3793262541294098,
|
|
"rewards/rejected": -0.926115870475769,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 3.3505154639175255e-07,
|
|
"logits/chosen": -2.9248669147491455,
|
|
"logits/rejected": -2.743758201599121,
|
|
"logps/chosen": -375.25665283203125,
|
|
"logps/rejected": -237.9368438720703,
|
|
"loss": 0.5478,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": -0.2538636326789856,
|
|
"rewards/margins": 0.8244791030883789,
|
|
"rewards/rejected": -1.0783426761627197,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 3.608247422680412e-07,
|
|
"logits/chosen": -2.789468765258789,
|
|
"logits/rejected": -2.6087424755096436,
|
|
"logps/chosen": -341.793212890625,
|
|
"logps/rejected": -303.3952941894531,
|
|
"loss": 0.531,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.5787044763565063,
|
|
"rewards/margins": 0.7447447776794434,
|
|
"rewards/rejected": -1.3234491348266602,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 3.865979381443299e-07,
|
|
"logits/chosen": -3.055696964263916,
|
|
"logits/rejected": -2.8961217403411865,
|
|
"logps/chosen": -342.10162353515625,
|
|
"logps/rejected": -362.9927978515625,
|
|
"loss": 0.5117,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.22076380252838135,
|
|
"rewards/margins": 1.0446679592132568,
|
|
"rewards/rejected": -1.2654317617416382,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 4.123711340206185e-07,
|
|
"logits/chosen": -2.769010066986084,
|
|
"logits/rejected": -2.677729368209839,
|
|
"logps/chosen": -316.1387939453125,
|
|
"logps/rejected": -257.9635009765625,
|
|
"loss": 0.5264,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": -0.5055423378944397,
|
|
"rewards/margins": 0.9351354837417603,
|
|
"rewards/rejected": -1.4406778812408447,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 4.381443298969072e-07,
|
|
"logits/chosen": -2.7085089683532715,
|
|
"logits/rejected": -2.600250005722046,
|
|
"logps/chosen": -276.8943176269531,
|
|
"logps/rejected": -258.60809326171875,
|
|
"loss": 0.478,
|
|
"rewards/accuracies": 0.8999999761581421,
|
|
"rewards/chosen": -0.2844000458717346,
|
|
"rewards/margins": 1.338111162185669,
|
|
"rewards/rejected": -1.6225111484527588,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 4.639175257731959e-07,
|
|
"logits/chosen": -2.8041911125183105,
|
|
"logits/rejected": -2.6761889457702637,
|
|
"logps/chosen": -272.099365234375,
|
|
"logps/rejected": -236.39675903320312,
|
|
"loss": 0.4904,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.0498843789100647,
|
|
"rewards/margins": 1.0812078714370728,
|
|
"rewards/rejected": -1.1310923099517822,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 4.896907216494845e-07,
|
|
"logits/chosen": -2.7893078327178955,
|
|
"logits/rejected": -2.787668228149414,
|
|
"logps/chosen": -315.68365478515625,
|
|
"logps/rejected": -278.1927490234375,
|
|
"loss": 0.5809,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.05461850017309189,
|
|
"rewards/margins": 0.6613677740097046,
|
|
"rewards/rejected": -0.7159863114356995,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 4.982788296041308e-07,
|
|
"logits/chosen": -2.7293038368225098,
|
|
"logits/rejected": -2.7431674003601074,
|
|
"logps/chosen": -319.84307861328125,
|
|
"logps/rejected": -285.5979309082031,
|
|
"loss": 0.4852,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.25821852684020996,
|
|
"rewards/margins": 0.9920754432678223,
|
|
"rewards/rejected": -1.2502939701080322,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"eval_logits/chosen": -2.5956599712371826,
|
|
"eval_logits/rejected": -2.576249122619629,
|
|
"eval_logps/chosen": -297.6180725097656,
|
|
"eval_logps/rejected": -319.912353515625,
|
|
"eval_loss": 0.513595700263977,
|
|
"eval_rewards/accuracies": 0.8125,
|
|
"eval_rewards/chosen": -0.5309990048408508,
|
|
"eval_rewards/margins": 0.9684009552001953,
|
|
"eval_rewards/rejected": -1.4993999004364014,
|
|
"eval_runtime": 58.9562,
|
|
"eval_samples_per_second": 16.962,
|
|
"eval_steps_per_second": 0.271,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 4.954102122776821e-07,
|
|
"logits/chosen": -2.4963221549987793,
|
|
"logits/rejected": -2.2522215843200684,
|
|
"logps/chosen": -330.4200134277344,
|
|
"logps/rejected": -269.993896484375,
|
|
"loss": 0.5766,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.39517518877983093,
|
|
"rewards/margins": 1.0865150690078735,
|
|
"rewards/rejected": -1.4816901683807373,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 4.925415949512335e-07,
|
|
"logits/chosen": -2.709627628326416,
|
|
"logits/rejected": -2.6324925422668457,
|
|
"logps/chosen": -355.61602783203125,
|
|
"logps/rejected": -297.36346435546875,
|
|
"loss": 0.5109,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.4435829222202301,
|
|
"rewards/margins": 0.6400429606437683,
|
|
"rewards/rejected": -1.0836259126663208,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 4.896729776247848e-07,
|
|
"logits/chosen": -2.7627291679382324,
|
|
"logits/rejected": -2.728057384490967,
|
|
"logps/chosen": -326.62017822265625,
|
|
"logps/rejected": -297.73150634765625,
|
|
"loss": 0.4948,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": -0.0938631147146225,
|
|
"rewards/margins": 0.8200471997261047,
|
|
"rewards/rejected": -0.9139102697372437,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 4.868043602983362e-07,
|
|
"logits/chosen": -2.758730173110962,
|
|
"logits/rejected": -2.7346835136413574,
|
|
"logps/chosen": -399.9264221191406,
|
|
"logps/rejected": -287.3780212402344,
|
|
"loss": 0.5489,
|
|
"rewards/accuracies": 0.550000011920929,
|
|
"rewards/chosen": -0.5322622656822205,
|
|
"rewards/margins": 0.36636096239089966,
|
|
"rewards/rejected": -0.8986232876777649,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 4.839357429718875e-07,
|
|
"logits/chosen": -2.922417163848877,
|
|
"logits/rejected": -2.7935752868652344,
|
|
"logps/chosen": -267.0278015136719,
|
|
"logps/rejected": -201.1547088623047,
|
|
"loss": 0.509,
|
|
"rewards/accuracies": 0.949999988079071,
|
|
"rewards/chosen": 0.024714648723602295,
|
|
"rewards/margins": 1.111724615097046,
|
|
"rewards/rejected": -1.087010145187378,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 4.810671256454389e-07,
|
|
"logits/chosen": -2.9726548194885254,
|
|
"logits/rejected": -2.8518033027648926,
|
|
"logps/chosen": -345.22869873046875,
|
|
"logps/rejected": -239.1555938720703,
|
|
"loss": 0.5219,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": 0.062345243990421295,
|
|
"rewards/margins": 1.2734425067901611,
|
|
"rewards/rejected": -1.211097240447998,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 4.781985083189902e-07,
|
|
"logits/chosen": -2.840888500213623,
|
|
"logits/rejected": -2.7617764472961426,
|
|
"logps/chosen": -343.1612243652344,
|
|
"logps/rejected": -278.6701354980469,
|
|
"loss": 0.4955,
|
|
"rewards/accuracies": 0.6000000238418579,
|
|
"rewards/chosen": -0.15016205608844757,
|
|
"rewards/margins": 0.7697200179100037,
|
|
"rewards/rejected": -0.9198821187019348,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 4.7532989099254154e-07,
|
|
"logits/chosen": -2.9551005363464355,
|
|
"logits/rejected": -2.8918609619140625,
|
|
"logps/chosen": -391.0706787109375,
|
|
"logps/rejected": -303.4861755371094,
|
|
"loss": 0.5101,
|
|
"rewards/accuracies": 0.699999988079071,
|
|
"rewards/chosen": -0.2836604118347168,
|
|
"rewards/margins": 0.7845603227615356,
|
|
"rewards/rejected": -1.068220853805542,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 4.724612736660929e-07,
|
|
"logits/chosen": -2.664578437805176,
|
|
"logits/rejected": -2.801819086074829,
|
|
"logps/chosen": -272.6911315917969,
|
|
"logps/rejected": -268.9104919433594,
|
|
"loss": 0.4815,
|
|
"rewards/accuracies": 0.6000000238418579,
|
|
"rewards/chosen": 0.15687711536884308,
|
|
"rewards/margins": 0.9703904986381531,
|
|
"rewards/rejected": -0.813513457775116,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 4.695926563396443e-07,
|
|
"logits/chosen": -2.7159955501556396,
|
|
"logits/rejected": -2.7721734046936035,
|
|
"logps/chosen": -444.1826171875,
|
|
"logps/rejected": -277.5564270019531,
|
|
"loss": 0.5212,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": 0.18774226307868958,
|
|
"rewards/margins": 1.6489717960357666,
|
|
"rewards/rejected": -1.4612294435501099,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"eval_logits/chosen": -2.7125377655029297,
|
|
"eval_logits/rejected": -2.6865103244781494,
|
|
"eval_logps/chosen": -290.3699035644531,
|
|
"eval_logps/rejected": -313.44439697265625,
|
|
"eval_loss": 0.5167616009712219,
|
|
"eval_rewards/accuracies": 0.78125,
|
|
"eval_rewards/chosen": -0.1685914546251297,
|
|
"eval_rewards/margins": 1.0074106454849243,
|
|
"eval_rewards/rejected": -1.1760022640228271,
|
|
"eval_runtime": 57.2558,
|
|
"eval_samples_per_second": 17.465,
|
|
"eval_steps_per_second": 0.279,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 4.6672403901319564e-07,
|
|
"logits/chosen": -2.9332168102264404,
|
|
"logits/rejected": -2.83400297164917,
|
|
"logps/chosen": -351.634033203125,
|
|
"logps/rejected": -267.8584899902344,
|
|
"loss": 0.5018,
|
|
"rewards/accuracies": 0.8999999761581421,
|
|
"rewards/chosen": -0.10320504754781723,
|
|
"rewards/margins": 1.208617091178894,
|
|
"rewards/rejected": -1.3118221759796143,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 4.63855421686747e-07,
|
|
"logits/chosen": -2.86641001701355,
|
|
"logits/rejected": -2.9064583778381348,
|
|
"logps/chosen": -401.264404296875,
|
|
"logps/rejected": -330.1752624511719,
|
|
"loss": 0.4492,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.5078498125076294,
|
|
"rewards/margins": 1.2765679359436035,
|
|
"rewards/rejected": -1.784417748451233,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 4.609868043602983e-07,
|
|
"logits/chosen": -2.7677345275878906,
|
|
"logits/rejected": -2.808917760848999,
|
|
"logps/chosen": -373.0283203125,
|
|
"logps/rejected": -278.2051086425781,
|
|
"loss": 0.4576,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.5256485939025879,
|
|
"rewards/margins": 1.5822279453277588,
|
|
"rewards/rejected": -2.1078765392303467,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 4.581181870338497e-07,
|
|
"logits/chosen": -2.5862860679626465,
|
|
"logits/rejected": -2.6729650497436523,
|
|
"logps/chosen": -264.1101989746094,
|
|
"logps/rejected": -302.21832275390625,
|
|
"loss": 0.5585,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.3558864891529083,
|
|
"rewards/margins": 0.7631327509880066,
|
|
"rewards/rejected": -1.1190193891525269,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 4.55249569707401e-07,
|
|
"logits/chosen": -2.9297871589660645,
|
|
"logits/rejected": -2.8774681091308594,
|
|
"logps/chosen": -199.7515106201172,
|
|
"logps/rejected": -261.4060363769531,
|
|
"loss": 0.4773,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": -0.46969500184059143,
|
|
"rewards/margins": 0.5962409973144531,
|
|
"rewards/rejected": -1.0659358501434326,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 4.5238095238095237e-07,
|
|
"logits/chosen": -2.9737091064453125,
|
|
"logits/rejected": -3.0415940284729004,
|
|
"logps/chosen": -417.215087890625,
|
|
"logps/rejected": -402.1128845214844,
|
|
"loss": 0.4545,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": 0.06597085297107697,
|
|
"rewards/margins": 1.6805864572525024,
|
|
"rewards/rejected": -1.6146154403686523,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 4.495123350545037e-07,
|
|
"logits/chosen": -2.9340670108795166,
|
|
"logits/rejected": -2.8952622413635254,
|
|
"logps/chosen": -200.85787963867188,
|
|
"logps/rejected": -190.8145751953125,
|
|
"loss": 0.462,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.39831048250198364,
|
|
"rewards/margins": 1.7207963466644287,
|
|
"rewards/rejected": -2.1191065311431885,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 4.46643717728055e-07,
|
|
"logits/chosen": -2.7802371978759766,
|
|
"logits/rejected": -2.692594051361084,
|
|
"logps/chosen": -342.89556884765625,
|
|
"logps/rejected": -360.488037109375,
|
|
"loss": 0.5404,
|
|
"rewards/accuracies": 0.6000000238418579,
|
|
"rewards/chosen": -0.2285180538892746,
|
|
"rewards/margins": 0.9076235890388489,
|
|
"rewards/rejected": -1.1361417770385742,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 4.437751004016064e-07,
|
|
"logits/chosen": -2.8958873748779297,
|
|
"logits/rejected": -2.8253509998321533,
|
|
"logps/chosen": -338.5411071777344,
|
|
"logps/rejected": -302.31536865234375,
|
|
"loss": 0.4479,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.5085374712944031,
|
|
"rewards/margins": 1.4444196224212646,
|
|
"rewards/rejected": -1.9529569149017334,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 4.4090648307515776e-07,
|
|
"logits/chosen": -3.072361469268799,
|
|
"logits/rejected": -3.0403876304626465,
|
|
"logps/chosen": -298.0294189453125,
|
|
"logps/rejected": -269.53759765625,
|
|
"loss": 0.5496,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.7352712154388428,
|
|
"rewards/margins": 0.4947175979614258,
|
|
"rewards/rejected": -1.229988694190979,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"eval_logits/chosen": -2.8217618465423584,
|
|
"eval_logits/rejected": -2.7946977615356445,
|
|
"eval_logps/chosen": -290.2325744628906,
|
|
"eval_logps/rejected": -324.2634582519531,
|
|
"eval_loss": 0.4835220277309418,
|
|
"eval_rewards/accuracies": 0.828125,
|
|
"eval_rewards/chosen": -0.16172367334365845,
|
|
"eval_rewards/margins": 1.5552303791046143,
|
|
"eval_rewards/rejected": -1.716953992843628,
|
|
"eval_runtime": 58.8529,
|
|
"eval_samples_per_second": 16.992,
|
|
"eval_steps_per_second": 0.272,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 4.380378657487091e-07,
|
|
"logits/chosen": -2.878788948059082,
|
|
"logits/rejected": -2.7508370876312256,
|
|
"logps/chosen": -361.15850830078125,
|
|
"logps/rejected": -244.7436065673828,
|
|
"loss": 0.4209,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": -0.3598092496395111,
|
|
"rewards/margins": 0.8073941469192505,
|
|
"rewards/rejected": -1.1672031879425049,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 4.3516924842226045e-07,
|
|
"logits/chosen": -2.940977096557617,
|
|
"logits/rejected": -2.811171531677246,
|
|
"logps/chosen": -342.15802001953125,
|
|
"logps/rejected": -299.4014587402344,
|
|
"loss": 0.4994,
|
|
"rewards/accuracies": 0.550000011920929,
|
|
"rewards/chosen": -0.5423839092254639,
|
|
"rewards/margins": 1.0953930616378784,
|
|
"rewards/rejected": -1.6377769708633423,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 4.323006310958118e-07,
|
|
"logits/chosen": -2.928069591522217,
|
|
"logits/rejected": -2.839228391647339,
|
|
"logps/chosen": -246.8507080078125,
|
|
"logps/rejected": -235.521728515625,
|
|
"loss": 0.4726,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.7062541246414185,
|
|
"rewards/margins": 0.9924257397651672,
|
|
"rewards/rejected": -1.6986801624298096,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 4.2943201376936315e-07,
|
|
"logits/chosen": -2.8603832721710205,
|
|
"logits/rejected": -2.7824721336364746,
|
|
"logps/chosen": -376.1601257324219,
|
|
"logps/rejected": -356.33905029296875,
|
|
"loss": 0.5524,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.40359026193618774,
|
|
"rewards/margins": 0.40071097016334534,
|
|
"rewards/rejected": -0.8043011426925659,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 4.265633964429145e-07,
|
|
"logits/chosen": -2.9410994052886963,
|
|
"logits/rejected": -2.9663491249084473,
|
|
"logps/chosen": -346.1313171386719,
|
|
"logps/rejected": -281.6514587402344,
|
|
"loss": 0.469,
|
|
"rewards/accuracies": 0.6000000238418579,
|
|
"rewards/chosen": -0.5330409407615662,
|
|
"rewards/margins": 0.553443968296051,
|
|
"rewards/rejected": -1.0864850282669067,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 4.2369477911646584e-07,
|
|
"logits/chosen": -2.9534189701080322,
|
|
"logits/rejected": -2.9417405128479004,
|
|
"logps/chosen": -362.89105224609375,
|
|
"logps/rejected": -356.94598388671875,
|
|
"loss": 0.546,
|
|
"rewards/accuracies": 0.8999999761581421,
|
|
"rewards/chosen": -0.10002334415912628,
|
|
"rewards/margins": 1.6824315786361694,
|
|
"rewards/rejected": -1.7824550867080688,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 4.208261617900172e-07,
|
|
"logits/chosen": -2.8067574501037598,
|
|
"logits/rejected": -2.815100908279419,
|
|
"logps/chosen": -359.10791015625,
|
|
"logps/rejected": -276.3143615722656,
|
|
"loss": 0.4473,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": -0.5546383857727051,
|
|
"rewards/margins": 0.8130642175674438,
|
|
"rewards/rejected": -1.367702603340149,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 4.179575444635686e-07,
|
|
"logits/chosen": -3.0283010005950928,
|
|
"logits/rejected": -3.0092482566833496,
|
|
"logps/chosen": -320.7592468261719,
|
|
"logps/rejected": -327.0148010253906,
|
|
"loss": 0.4589,
|
|
"rewards/accuracies": 0.6000000238418579,
|
|
"rewards/chosen": -0.023906176909804344,
|
|
"rewards/margins": 0.516281008720398,
|
|
"rewards/rejected": -0.5401870608329773,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 4.150889271371199e-07,
|
|
"logits/chosen": -2.9509072303771973,
|
|
"logits/rejected": -2.9088292121887207,
|
|
"logps/chosen": -359.8896789550781,
|
|
"logps/rejected": -298.53167724609375,
|
|
"loss": 0.4272,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": -0.9165481328964233,
|
|
"rewards/margins": 0.8448396921157837,
|
|
"rewards/rejected": -1.761387586593628,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 4.1222030981067123e-07,
|
|
"logits/chosen": -3.1632120609283447,
|
|
"logits/rejected": -2.959442138671875,
|
|
"logps/chosen": -348.33709716796875,
|
|
"logps/rejected": -335.841552734375,
|
|
"loss": 0.5209,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": 0.01871591992676258,
|
|
"rewards/margins": 1.6875536441802979,
|
|
"rewards/rejected": -1.668837547302246,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"eval_logits/chosen": -2.8667073249816895,
|
|
"eval_logits/rejected": -2.8388283252716064,
|
|
"eval_logps/chosen": -296.5545654296875,
|
|
"eval_logps/rejected": -323.13250732421875,
|
|
"eval_loss": 0.5053695440292358,
|
|
"eval_rewards/accuracies": 0.734375,
|
|
"eval_rewards/chosen": -0.4778231978416443,
|
|
"eval_rewards/margins": 1.1825826168060303,
|
|
"eval_rewards/rejected": -1.6604057550430298,
|
|
"eval_runtime": 53.6768,
|
|
"eval_samples_per_second": 18.63,
|
|
"eval_steps_per_second": 0.298,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 4.093516924842226e-07,
|
|
"logits/chosen": -2.5999233722686768,
|
|
"logits/rejected": -2.6488568782806396,
|
|
"logps/chosen": -279.40753173828125,
|
|
"logps/rejected": -288.4382019042969,
|
|
"loss": 0.5262,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": -0.03970176726579666,
|
|
"rewards/margins": 0.8601824641227722,
|
|
"rewards/rejected": -0.8998842239379883,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 4.064830751577739e-07,
|
|
"logits/chosen": -3.004120349884033,
|
|
"logits/rejected": -2.983384132385254,
|
|
"logps/chosen": -223.1768035888672,
|
|
"logps/rejected": -233.73312377929688,
|
|
"loss": 0.5426,
|
|
"rewards/accuracies": 0.699999988079071,
|
|
"rewards/chosen": -0.22979409992694855,
|
|
"rewards/margins": 1.1553027629852295,
|
|
"rewards/rejected": -1.3850969076156616,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 4.036144578313253e-07,
|
|
"logits/chosen": -3.057387351989746,
|
|
"logits/rejected": -2.9704792499542236,
|
|
"logps/chosen": -325.57025146484375,
|
|
"logps/rejected": -283.2383117675781,
|
|
"loss": 0.5058,
|
|
"rewards/accuracies": 0.949999988079071,
|
|
"rewards/chosen": 0.17162951827049255,
|
|
"rewards/margins": 1.4068710803985596,
|
|
"rewards/rejected": -1.2352415323257446,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 4.007458405048766e-07,
|
|
"logits/chosen": -2.940493583679199,
|
|
"logits/rejected": -2.9570419788360596,
|
|
"logps/chosen": -227.3856964111328,
|
|
"logps/rejected": -289.257080078125,
|
|
"loss": 0.5339,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": -0.835346519947052,
|
|
"rewards/margins": 0.5295913219451904,
|
|
"rewards/rejected": -1.3649379014968872,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 3.9787722317842796e-07,
|
|
"logits/chosen": -3.0390350818634033,
|
|
"logits/rejected": -3.0303795337677,
|
|
"logps/chosen": -389.0104675292969,
|
|
"logps/rejected": -340.8678283691406,
|
|
"loss": 0.5671,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.15655937790870667,
|
|
"rewards/margins": 1.444120168685913,
|
|
"rewards/rejected": -1.6006797552108765,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 3.950086058519793e-07,
|
|
"logits/chosen": -3.060770034790039,
|
|
"logits/rejected": -3.0684587955474854,
|
|
"logps/chosen": -366.5193786621094,
|
|
"logps/rejected": -359.5283203125,
|
|
"loss": 0.501,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.8856328129768372,
|
|
"rewards/margins": 1.0001723766326904,
|
|
"rewards/rejected": -1.8858048915863037,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 3.9213998852553066e-07,
|
|
"logits/chosen": -3.022430896759033,
|
|
"logits/rejected": -2.831993341445923,
|
|
"logps/chosen": -379.4962463378906,
|
|
"logps/rejected": -278.2923889160156,
|
|
"loss": 0.4947,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.7191627621650696,
|
|
"rewards/margins": 1.4330142736434937,
|
|
"rewards/rejected": -2.152177095413208,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 3.8927137119908206e-07,
|
|
"logits/chosen": -2.922940731048584,
|
|
"logits/rejected": -2.9428744316101074,
|
|
"logps/chosen": -361.6253356933594,
|
|
"logps/rejected": -278.06280517578125,
|
|
"loss": 0.5375,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": -0.573477029800415,
|
|
"rewards/margins": 0.7330626249313354,
|
|
"rewards/rejected": -1.30653977394104,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 3.864027538726334e-07,
|
|
"logits/chosen": -2.9140264987945557,
|
|
"logits/rejected": -2.889644145965576,
|
|
"logps/chosen": -289.79058837890625,
|
|
"logps/rejected": -292.6978759765625,
|
|
"loss": 0.4726,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.1533357799053192,
|
|
"rewards/margins": 1.4288156032562256,
|
|
"rewards/rejected": -1.582151174545288,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 3.835341365461847e-07,
|
|
"logits/chosen": -2.869101047515869,
|
|
"logits/rejected": -2.88112735748291,
|
|
"logps/chosen": -243.238037109375,
|
|
"logps/rejected": -313.36260986328125,
|
|
"loss": 0.4617,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.5780342221260071,
|
|
"rewards/margins": 1.605724573135376,
|
|
"rewards/rejected": -2.1837587356567383,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"eval_logits/chosen": -2.8521082401275635,
|
|
"eval_logits/rejected": -2.823408603668213,
|
|
"eval_logps/chosen": -294.47406005859375,
|
|
"eval_logps/rejected": -320.2847900390625,
|
|
"eval_loss": 0.4909786283969879,
|
|
"eval_rewards/accuracies": 0.765625,
|
|
"eval_rewards/chosen": -0.373797744512558,
|
|
"eval_rewards/margins": 1.1442232131958008,
|
|
"eval_rewards/rejected": -1.5180209875106812,
|
|
"eval_runtime": 59.0534,
|
|
"eval_samples_per_second": 16.934,
|
|
"eval_steps_per_second": 0.271,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 3.8066551921973605e-07,
|
|
"logits/chosen": -2.9201841354370117,
|
|
"logits/rejected": -2.845341205596924,
|
|
"logps/chosen": -338.87127685546875,
|
|
"logps/rejected": -337.95928955078125,
|
|
"loss": 0.5093,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.7563498020172119,
|
|
"rewards/margins": 1.1776630878448486,
|
|
"rewards/rejected": -1.934012770652771,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 3.7779690189328745e-07,
|
|
"logits/chosen": -3.034162759780884,
|
|
"logits/rejected": -3.027251720428467,
|
|
"logps/chosen": -379.36590576171875,
|
|
"logps/rejected": -306.5363464355469,
|
|
"loss": 0.5269,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.6596435308456421,
|
|
"rewards/margins": 0.7433546185493469,
|
|
"rewards/rejected": -1.4029979705810547,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 3.749282845668388e-07,
|
|
"logits/chosen": -2.87540864944458,
|
|
"logits/rejected": -2.834453821182251,
|
|
"logps/chosen": -252.4665069580078,
|
|
"logps/rejected": -234.0683135986328,
|
|
"loss": 0.4806,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -1.2540767192840576,
|
|
"rewards/margins": 1.1702220439910889,
|
|
"rewards/rejected": -2.4242987632751465,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 3.7205966724039014e-07,
|
|
"logits/chosen": -2.9034817218780518,
|
|
"logits/rejected": -2.8846733570098877,
|
|
"logps/chosen": -308.4912109375,
|
|
"logps/rejected": -294.0049743652344,
|
|
"loss": 0.5071,
|
|
"rewards/accuracies": 0.8999999761581421,
|
|
"rewards/chosen": -0.7412096261978149,
|
|
"rewards/margins": 1.7391561269760132,
|
|
"rewards/rejected": -2.480365753173828,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 3.6919104991394144e-07,
|
|
"logits/chosen": -2.931398868560791,
|
|
"logits/rejected": -2.982588291168213,
|
|
"logps/chosen": -284.8676452636719,
|
|
"logps/rejected": -328.3984069824219,
|
|
"loss": 0.5295,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.8696074485778809,
|
|
"rewards/margins": 1.0539840459823608,
|
|
"rewards/rejected": -1.9235913753509521,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 3.663224325874928e-07,
|
|
"logits/chosen": -3.033780097961426,
|
|
"logits/rejected": -2.9609603881835938,
|
|
"logps/chosen": -372.281005859375,
|
|
"logps/rejected": -324.37640380859375,
|
|
"loss": 0.4882,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.7286888957023621,
|
|
"rewards/margins": 1.0642763376235962,
|
|
"rewards/rejected": -1.792965292930603,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 3.634538152610442e-07,
|
|
"logits/chosen": -2.93717622756958,
|
|
"logits/rejected": -2.9276621341705322,
|
|
"logps/chosen": -240.49267578125,
|
|
"logps/rejected": -235.82901000976562,
|
|
"loss": 0.5725,
|
|
"rewards/accuracies": 0.6000000238418579,
|
|
"rewards/chosen": -0.5567636489868164,
|
|
"rewards/margins": 0.7625681161880493,
|
|
"rewards/rejected": -1.3193317651748657,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 3.6058519793459553e-07,
|
|
"logits/chosen": -2.834033727645874,
|
|
"logits/rejected": -2.778200387954712,
|
|
"logps/chosen": -311.96209716796875,
|
|
"logps/rejected": -279.34210205078125,
|
|
"loss": 0.4695,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.3849760890007019,
|
|
"rewards/margins": 1.1402137279510498,
|
|
"rewards/rejected": -1.5251898765563965,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 3.577165806081469e-07,
|
|
"logits/chosen": -3.0405967235565186,
|
|
"logits/rejected": -2.836169481277466,
|
|
"logps/chosen": -377.798828125,
|
|
"logps/rejected": -225.7850799560547,
|
|
"loss": 0.4235,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": 0.16026358306407928,
|
|
"rewards/margins": 1.4788005352020264,
|
|
"rewards/rejected": -1.3185369968414307,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"learning_rate": 3.5484796328169817e-07,
|
|
"logits/chosen": -2.914154052734375,
|
|
"logits/rejected": -2.837174415588379,
|
|
"logps/chosen": -486.6206970214844,
|
|
"logps/rejected": -388.7563171386719,
|
|
"loss": 0.4452,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": 0.3467257618904114,
|
|
"rewards/margins": 1.8637710809707642,
|
|
"rewards/rejected": -1.517045021057129,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.36,
|
|
"eval_logits/chosen": -2.7652909755706787,
|
|
"eval_logits/rejected": -2.7400858402252197,
|
|
"eval_logps/chosen": -296.1795959472656,
|
|
"eval_logps/rejected": -323.0770263671875,
|
|
"eval_loss": 0.48382142186164856,
|
|
"eval_rewards/accuracies": 0.703125,
|
|
"eval_rewards/chosen": -0.45907530188560486,
|
|
"eval_rewards/margins": 1.19855797290802,
|
|
"eval_rewards/rejected": -1.6576331853866577,
|
|
"eval_runtime": 56.4234,
|
|
"eval_samples_per_second": 17.723,
|
|
"eval_steps_per_second": 0.284,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 3.519793459552495e-07,
|
|
"logits/chosen": -2.81813645362854,
|
|
"logits/rejected": -2.891169309616089,
|
|
"logps/chosen": -304.12921142578125,
|
|
"logps/rejected": -300.01275634765625,
|
|
"loss": 0.4796,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -1.1196739673614502,
|
|
"rewards/margins": 0.9914043545722961,
|
|
"rewards/rejected": -2.1110780239105225,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.37,
|
|
"learning_rate": 3.491107286288009e-07,
|
|
"logits/chosen": -2.8922622203826904,
|
|
"logits/rejected": -2.8977553844451904,
|
|
"logps/chosen": -317.526123046875,
|
|
"logps/rejected": -434.422119140625,
|
|
"loss": 0.4804,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": -0.36432236433029175,
|
|
"rewards/margins": 2.1755287647247314,
|
|
"rewards/rejected": -2.539850950241089,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 3.4624211130235227e-07,
|
|
"logits/chosen": -2.897742509841919,
|
|
"logits/rejected": -2.849132537841797,
|
|
"logps/chosen": -359.33770751953125,
|
|
"logps/rejected": -375.489990234375,
|
|
"loss": 0.4515,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": -0.7771750688552856,
|
|
"rewards/margins": 0.7097036838531494,
|
|
"rewards/rejected": -1.4868788719177246,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.38,
|
|
"learning_rate": 3.433734939759036e-07,
|
|
"logits/chosen": -2.8762059211730957,
|
|
"logits/rejected": -2.845869302749634,
|
|
"logps/chosen": -367.5733642578125,
|
|
"logps/rejected": -284.77886962890625,
|
|
"loss": 0.5371,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.7336939573287964,
|
|
"rewards/margins": 1.311545968055725,
|
|
"rewards/rejected": -2.0452399253845215,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 3.405048766494549e-07,
|
|
"logits/chosen": -2.8201470375061035,
|
|
"logits/rejected": -2.8354251384735107,
|
|
"logps/chosen": -340.94549560546875,
|
|
"logps/rejected": -293.8914794921875,
|
|
"loss": 0.4984,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.7219395637512207,
|
|
"rewards/margins": 1.4890177249908447,
|
|
"rewards/rejected": -2.2109572887420654,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.39,
|
|
"learning_rate": 3.376362593230063e-07,
|
|
"logits/chosen": -2.7780914306640625,
|
|
"logits/rejected": -2.7091243267059326,
|
|
"logps/chosen": -291.79864501953125,
|
|
"logps/rejected": -293.62762451171875,
|
|
"loss": 0.5131,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.4624660611152649,
|
|
"rewards/margins": 1.375497579574585,
|
|
"rewards/rejected": -1.8379634618759155,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 3.3476764199655765e-07,
|
|
"logits/chosen": -2.9393422603607178,
|
|
"logits/rejected": -2.913353681564331,
|
|
"logps/chosen": -266.66033935546875,
|
|
"logps/rejected": -310.0787658691406,
|
|
"loss": 0.5009,
|
|
"rewards/accuracies": 0.699999988079071,
|
|
"rewards/chosen": -0.27197906374931335,
|
|
"rewards/margins": 1.5637094974517822,
|
|
"rewards/rejected": -1.835688591003418,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.4,
|
|
"learning_rate": 3.31899024670109e-07,
|
|
"logits/chosen": -2.799229621887207,
|
|
"logits/rejected": -2.7572007179260254,
|
|
"logps/chosen": -322.958251953125,
|
|
"logps/rejected": -250.6745147705078,
|
|
"loss": 0.462,
|
|
"rewards/accuracies": 0.8999999761581421,
|
|
"rewards/chosen": -0.1813792735338211,
|
|
"rewards/margins": 1.8009974956512451,
|
|
"rewards/rejected": -1.9823768138885498,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 3.2903040734366035e-07,
|
|
"logits/chosen": -2.9396896362304688,
|
|
"logits/rejected": -2.8256068229675293,
|
|
"logps/chosen": -336.33111572265625,
|
|
"logps/rejected": -305.9268493652344,
|
|
"loss": 0.4724,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.5670480728149414,
|
|
"rewards/margins": 1.4373046159744263,
|
|
"rewards/rejected": -2.004352569580078,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"learning_rate": 3.261617900172117e-07,
|
|
"logits/chosen": -2.8622994422912598,
|
|
"logits/rejected": -2.8278136253356934,
|
|
"logps/chosen": -284.0804443359375,
|
|
"logps/rejected": -313.81939697265625,
|
|
"loss": 0.4674,
|
|
"rewards/accuracies": 0.8999999761581421,
|
|
"rewards/chosen": 0.13986745476722717,
|
|
"rewards/margins": 1.9817402362823486,
|
|
"rewards/rejected": -1.8418725728988647,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.41,
|
|
"eval_logits/chosen": -2.6945066452026367,
|
|
"eval_logits/rejected": -2.6739954948425293,
|
|
"eval_logps/chosen": -298.38177490234375,
|
|
"eval_logps/rejected": -327.2416076660156,
|
|
"eval_loss": 0.5077354907989502,
|
|
"eval_rewards/accuracies": 0.765625,
|
|
"eval_rewards/chosen": -0.5691820383071899,
|
|
"eval_rewards/margins": 1.2966790199279785,
|
|
"eval_rewards/rejected": -1.865861177444458,
|
|
"eval_runtime": 55.4449,
|
|
"eval_samples_per_second": 18.036,
|
|
"eval_steps_per_second": 0.289,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 3.2329317269076304e-07,
|
|
"logits/chosen": -2.8082878589630127,
|
|
"logits/rejected": -2.783658981323242,
|
|
"logps/chosen": -226.2019500732422,
|
|
"logps/rejected": -294.66326904296875,
|
|
"loss": 0.5152,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.869047999382019,
|
|
"rewards/margins": 1.269092321395874,
|
|
"rewards/rejected": -2.1381404399871826,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.42,
|
|
"learning_rate": 3.204245553643144e-07,
|
|
"logits/chosen": -2.9127354621887207,
|
|
"logits/rejected": -2.894219160079956,
|
|
"logps/chosen": -230.79513549804688,
|
|
"logps/rejected": -305.60113525390625,
|
|
"loss": 0.5279,
|
|
"rewards/accuracies": 0.3499999940395355,
|
|
"rewards/chosen": -0.7637732028961182,
|
|
"rewards/margins": -0.17934365570545197,
|
|
"rewards/rejected": -0.5844296216964722,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 3.1755593803786574e-07,
|
|
"logits/chosen": -2.9059672355651855,
|
|
"logits/rejected": -2.9442248344421387,
|
|
"logps/chosen": -238.79248046875,
|
|
"logps/rejected": -285.3636474609375,
|
|
"loss": 0.5044,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.5089541673660278,
|
|
"rewards/margins": 1.2097771167755127,
|
|
"rewards/rejected": -1.718731164932251,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.43,
|
|
"learning_rate": 3.146873207114171e-07,
|
|
"logits/chosen": -2.953200340270996,
|
|
"logits/rejected": -2.8728442192077637,
|
|
"logps/chosen": -345.51593017578125,
|
|
"logps/rejected": -307.09002685546875,
|
|
"loss": 0.5092,
|
|
"rewards/accuracies": 0.550000011920929,
|
|
"rewards/chosen": -1.007508397102356,
|
|
"rewards/margins": 0.5991470813751221,
|
|
"rewards/rejected": -1.6066553592681885,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 3.1181870338496843e-07,
|
|
"logits/chosen": -2.8444714546203613,
|
|
"logits/rejected": -2.791147470474243,
|
|
"logps/chosen": -296.8919677734375,
|
|
"logps/rejected": -274.53082275390625,
|
|
"loss": 0.4805,
|
|
"rewards/accuracies": 0.8999999761581421,
|
|
"rewards/chosen": -0.953098475933075,
|
|
"rewards/margins": 1.4887616634368896,
|
|
"rewards/rejected": -2.4418601989746094,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.44,
|
|
"learning_rate": 3.089500860585198e-07,
|
|
"logits/chosen": -2.6704187393188477,
|
|
"logits/rejected": -2.6690378189086914,
|
|
"logps/chosen": -249.43881225585938,
|
|
"logps/rejected": -302.4588928222656,
|
|
"loss": 0.5111,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -1.3775177001953125,
|
|
"rewards/margins": 1.329897403717041,
|
|
"rewards/rejected": -2.7074151039123535,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 3.060814687320711e-07,
|
|
"logits/chosen": -2.839586019515991,
|
|
"logits/rejected": -2.834083318710327,
|
|
"logps/chosen": -304.1976318359375,
|
|
"logps/rejected": -256.19378662109375,
|
|
"loss": 0.4354,
|
|
"rewards/accuracies": 0.8999999761581421,
|
|
"rewards/chosen": -0.47585782408714294,
|
|
"rewards/margins": 2.021141529083252,
|
|
"rewards/rejected": -2.4969992637634277,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.45,
|
|
"learning_rate": 3.0321285140562247e-07,
|
|
"logits/chosen": -2.7889621257781982,
|
|
"logits/rejected": -2.7733607292175293,
|
|
"logps/chosen": -280.20977783203125,
|
|
"logps/rejected": -347.7413635253906,
|
|
"loss": 0.4604,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.9127300977706909,
|
|
"rewards/margins": 0.9622424244880676,
|
|
"rewards/rejected": -1.8749723434448242,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 3.003442340791738e-07,
|
|
"logits/chosen": -2.904717206954956,
|
|
"logits/rejected": -2.835303783416748,
|
|
"logps/chosen": -336.64776611328125,
|
|
"logps/rejected": -316.78045654296875,
|
|
"loss": 0.5003,
|
|
"rewards/accuracies": 0.699999988079071,
|
|
"rewards/chosen": -0.2770691215991974,
|
|
"rewards/margins": 1.1059465408325195,
|
|
"rewards/rejected": -1.383015751838684,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"learning_rate": 2.974756167527252e-07,
|
|
"logits/chosen": -3.022934913635254,
|
|
"logits/rejected": -2.956709146499634,
|
|
"logps/chosen": -286.189208984375,
|
|
"logps/rejected": -264.89776611328125,
|
|
"loss": 0.4656,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.7135367393493652,
|
|
"rewards/margins": 1.1268901824951172,
|
|
"rewards/rejected": -1.8404268026351929,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.46,
|
|
"eval_logits/chosen": -2.8014752864837646,
|
|
"eval_logits/rejected": -2.781651258468628,
|
|
"eval_logps/chosen": -297.5553283691406,
|
|
"eval_logps/rejected": -323.15179443359375,
|
|
"eval_loss": 0.4927058219909668,
|
|
"eval_rewards/accuracies": 0.765625,
|
|
"eval_rewards/chosen": -0.5278611183166504,
|
|
"eval_rewards/margins": 1.1335095167160034,
|
|
"eval_rewards/rejected": -1.661370873451233,
|
|
"eval_runtime": 57.0998,
|
|
"eval_samples_per_second": 17.513,
|
|
"eval_steps_per_second": 0.28,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 2.946069994262765e-07,
|
|
"logits/chosen": -2.978717803955078,
|
|
"logits/rejected": -2.8918721675872803,
|
|
"logps/chosen": -336.64495849609375,
|
|
"logps/rejected": -247.2961883544922,
|
|
"loss": 0.4344,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.6280319094657898,
|
|
"rewards/margins": 1.4678236246109009,
|
|
"rewards/rejected": -2.095855712890625,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 0.47,
|
|
"learning_rate": 2.9173838209982786e-07,
|
|
"logits/chosen": -2.786372423171997,
|
|
"logits/rejected": -2.8410544395446777,
|
|
"logps/chosen": -298.88360595703125,
|
|
"logps/rejected": -302.8692626953125,
|
|
"loss": 0.4686,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": -0.6036109924316406,
|
|
"rewards/margins": 0.7661906480789185,
|
|
"rewards/rejected": -1.369801640510559,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 0.48,
|
|
"learning_rate": 2.888697647733792e-07,
|
|
"logits/chosen": -2.915067195892334,
|
|
"logits/rejected": -2.732910633087158,
|
|
"logps/chosen": -356.8029479980469,
|
|
"logps/rejected": -226.57974243164062,
|
|
"loss": 0.4128,
|
|
"rewards/accuracies": 0.8999999761581421,
|
|
"rewards/chosen": -0.25983041524887085,
|
|
"rewards/margins": 2.1089906692504883,
|
|
"rewards/rejected": -2.368821144104004,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 2.8600114744693055e-07,
|
|
"logits/chosen": -2.872128486633301,
|
|
"logits/rejected": -2.8349528312683105,
|
|
"logps/chosen": -276.8409729003906,
|
|
"logps/rejected": -234.0821990966797,
|
|
"loss": 0.4633,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.33096224069595337,
|
|
"rewards/margins": 1.0401618480682373,
|
|
"rewards/rejected": -1.371124029159546,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 0.49,
|
|
"learning_rate": 2.8313253012048195e-07,
|
|
"logits/chosen": -2.768357276916504,
|
|
"logits/rejected": -2.8650612831115723,
|
|
"logps/chosen": -286.5350341796875,
|
|
"logps/rejected": -295.7557678222656,
|
|
"loss": 0.4694,
|
|
"rewards/accuracies": 1.0,
|
|
"rewards/chosen": -0.42378902435302734,
|
|
"rewards/margins": 2.2293660640716553,
|
|
"rewards/rejected": -2.6531550884246826,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 2.802639127940333e-07,
|
|
"logits/chosen": -2.8344969749450684,
|
|
"logits/rejected": -2.8180437088012695,
|
|
"logps/chosen": -312.8253173828125,
|
|
"logps/rejected": -299.6221618652344,
|
|
"loss": 0.4998,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.2208956480026245,
|
|
"rewards/margins": 1.7145334482192993,
|
|
"rewards/rejected": -1.9354288578033447,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 2.773952954675846e-07,
|
|
"logits/chosen": -2.809140682220459,
|
|
"logits/rejected": -2.699197292327881,
|
|
"logps/chosen": -369.69854736328125,
|
|
"logps/rejected": -293.45477294921875,
|
|
"loss": 0.4849,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.28183019161224365,
|
|
"rewards/margins": 1.6505457162857056,
|
|
"rewards/rejected": -1.9323756694793701,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 2.7452667814113594e-07,
|
|
"logits/chosen": -2.8047561645507812,
|
|
"logits/rejected": -2.777113199234009,
|
|
"logps/chosen": -326.3648376464844,
|
|
"logps/rejected": -289.0523986816406,
|
|
"loss": 0.4543,
|
|
"rewards/accuracies": 0.949999988079071,
|
|
"rewards/chosen": -0.42743197083473206,
|
|
"rewards/margins": 1.8371082544326782,
|
|
"rewards/rejected": -2.264540195465088,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 0.51,
|
|
"learning_rate": 2.716580608146873e-07,
|
|
"logits/chosen": -2.8819570541381836,
|
|
"logits/rejected": -2.8494954109191895,
|
|
"logps/chosen": -318.66204833984375,
|
|
"logps/rejected": -345.1630859375,
|
|
"loss": 0.4534,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.4583119750022888,
|
|
"rewards/margins": 1.3975725173950195,
|
|
"rewards/rejected": -1.8558847904205322,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 2.687894434882387e-07,
|
|
"logits/chosen": -2.8820109367370605,
|
|
"logits/rejected": -2.9153826236724854,
|
|
"logps/chosen": -251.1084747314453,
|
|
"logps/rejected": -264.5345153808594,
|
|
"loss": 0.4102,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": -0.5280589461326599,
|
|
"rewards/margins": 1.0574977397918701,
|
|
"rewards/rejected": -1.5855568647384644,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"eval_logits/chosen": -2.745532512664795,
|
|
"eval_logits/rejected": -2.7159857749938965,
|
|
"eval_logps/chosen": -298.5311279296875,
|
|
"eval_logps/rejected": -331.2577819824219,
|
|
"eval_loss": 0.4771742522716522,
|
|
"eval_rewards/accuracies": 0.765625,
|
|
"eval_rewards/chosen": -0.5766510367393494,
|
|
"eval_rewards/margins": 1.4900195598602295,
|
|
"eval_rewards/rejected": -2.0666706562042236,
|
|
"eval_runtime": 60.3269,
|
|
"eval_samples_per_second": 16.576,
|
|
"eval_steps_per_second": 0.265,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.52,
|
|
"learning_rate": 2.6592082616179004e-07,
|
|
"logits/chosen": -2.753818988800049,
|
|
"logits/rejected": -2.6555442810058594,
|
|
"logps/chosen": -421.75408935546875,
|
|
"logps/rejected": -326.7854919433594,
|
|
"loss": 0.4969,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.7776181101799011,
|
|
"rewards/margins": 1.5239381790161133,
|
|
"rewards/rejected": -2.30155611038208,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 2.6305220883534133e-07,
|
|
"logits/chosen": -2.795933961868286,
|
|
"logits/rejected": -2.875976085662842,
|
|
"logps/chosen": -359.01318359375,
|
|
"logps/rejected": -325.6358642578125,
|
|
"loss": 0.4814,
|
|
"rewards/accuracies": 0.699999988079071,
|
|
"rewards/chosen": -0.12685959041118622,
|
|
"rewards/margins": 1.6275579929351807,
|
|
"rewards/rejected": -1.7544174194335938,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 0.53,
|
|
"learning_rate": 2.601835915088927e-07,
|
|
"logits/chosen": -2.7979979515075684,
|
|
"logits/rejected": -2.7110390663146973,
|
|
"logps/chosen": -307.5633239746094,
|
|
"logps/rejected": -272.8973388671875,
|
|
"loss": 0.4354,
|
|
"rewards/accuracies": 0.8999999761581421,
|
|
"rewards/chosen": -0.4669399857521057,
|
|
"rewards/margins": 1.9002759456634521,
|
|
"rewards/rejected": -2.367216110229492,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 2.573149741824441e-07,
|
|
"logits/chosen": -2.864424467086792,
|
|
"logits/rejected": -2.7874035835266113,
|
|
"logps/chosen": -301.7801513671875,
|
|
"logps/rejected": -366.8116149902344,
|
|
"loss": 0.4799,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.38910773396492004,
|
|
"rewards/margins": 2.2144553661346436,
|
|
"rewards/rejected": -2.6035633087158203,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 0.54,
|
|
"learning_rate": 2.544463568559954e-07,
|
|
"logits/chosen": -2.8506453037261963,
|
|
"logits/rejected": -2.7827541828155518,
|
|
"logps/chosen": -332.9029846191406,
|
|
"logps/rejected": -288.8146667480469,
|
|
"loss": 0.4728,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -1.1103570461273193,
|
|
"rewards/margins": 1.3038640022277832,
|
|
"rewards/rejected": -2.4142210483551025,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 2.5157773952954677e-07,
|
|
"logits/chosen": -2.746716022491455,
|
|
"logits/rejected": -2.738701105117798,
|
|
"logps/chosen": -362.2853698730469,
|
|
"logps/rejected": -263.8562316894531,
|
|
"loss": 0.5158,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.4159720540046692,
|
|
"rewards/margins": 1.3381980657577515,
|
|
"rewards/rejected": -1.7541700601577759,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 0.55,
|
|
"learning_rate": 2.4870912220309807e-07,
|
|
"logits/chosen": -2.7854630947113037,
|
|
"logits/rejected": -2.815742015838623,
|
|
"logps/chosen": -434.0101623535156,
|
|
"logps/rejected": -322.96990966796875,
|
|
"loss": 0.5047,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.38989701867103577,
|
|
"rewards/margins": 1.555858850479126,
|
|
"rewards/rejected": -1.9457557201385498,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 2.4584050487664947e-07,
|
|
"logits/chosen": -2.9198765754699707,
|
|
"logits/rejected": -2.8616695404052734,
|
|
"logps/chosen": -290.16680908203125,
|
|
"logps/rejected": -285.9183349609375,
|
|
"loss": 0.4499,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.7213519811630249,
|
|
"rewards/margins": 1.0302088260650635,
|
|
"rewards/rejected": -1.751560926437378,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 0.56,
|
|
"learning_rate": 2.429718875502008e-07,
|
|
"logits/chosen": -2.9124810695648193,
|
|
"logits/rejected": -2.816594362258911,
|
|
"logps/chosen": -285.74139404296875,
|
|
"logps/rejected": -237.6147003173828,
|
|
"loss": 0.4673,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.599921464920044,
|
|
"rewards/margins": 1.2972980737686157,
|
|
"rewards/rejected": -1.8972194194793701,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 2.4010327022375216e-07,
|
|
"logits/chosen": -2.8846845626831055,
|
|
"logits/rejected": -2.914504289627075,
|
|
"logps/chosen": -373.78887939453125,
|
|
"logps/rejected": -349.1485900878906,
|
|
"loss": 0.4663,
|
|
"rewards/accuracies": 0.8999999761581421,
|
|
"rewards/chosen": -0.4652225971221924,
|
|
"rewards/margins": 0.824032187461853,
|
|
"rewards/rejected": -1.289254903793335,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"eval_logits/chosen": -2.725741386413574,
|
|
"eval_logits/rejected": -2.6993935108184814,
|
|
"eval_logps/chosen": -303.0741271972656,
|
|
"eval_logps/rejected": -331.96044921875,
|
|
"eval_loss": 0.4740181565284729,
|
|
"eval_rewards/accuracies": 0.765625,
|
|
"eval_rewards/chosen": -0.803801417350769,
|
|
"eval_rewards/margins": 1.2980027198791504,
|
|
"eval_rewards/rejected": -2.10180401802063,
|
|
"eval_runtime": 57.8167,
|
|
"eval_samples_per_second": 17.296,
|
|
"eval_steps_per_second": 0.277,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 0.57,
|
|
"learning_rate": 2.3723465289730348e-07,
|
|
"logits/chosen": -2.9184155464172363,
|
|
"logits/rejected": -2.913644552230835,
|
|
"logps/chosen": -325.4616394042969,
|
|
"logps/rejected": -290.3975830078125,
|
|
"loss": 0.4519,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.7317468523979187,
|
|
"rewards/margins": 0.9042754173278809,
|
|
"rewards/rejected": -1.6360222101211548,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 2.3436603557085483e-07,
|
|
"logits/chosen": -2.845045328140259,
|
|
"logits/rejected": -2.6523425579071045,
|
|
"logps/chosen": -370.0478820800781,
|
|
"logps/rejected": -286.46112060546875,
|
|
"loss": 0.5856,
|
|
"rewards/accuracies": 0.699999988079071,
|
|
"rewards/chosen": -0.8687052726745605,
|
|
"rewards/margins": 1.1778913736343384,
|
|
"rewards/rejected": -2.0465967655181885,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 0.58,
|
|
"learning_rate": 2.314974182444062e-07,
|
|
"logits/chosen": -2.8266937732696533,
|
|
"logits/rejected": -2.6822288036346436,
|
|
"logps/chosen": -350.9194641113281,
|
|
"logps/rejected": -341.3634033203125,
|
|
"loss": 0.4118,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -1.1032984256744385,
|
|
"rewards/margins": 1.7717195749282837,
|
|
"rewards/rejected": -2.8750178813934326,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 2.2862880091795752e-07,
|
|
"logits/chosen": -2.71614146232605,
|
|
"logits/rejected": -2.7234530448913574,
|
|
"logps/chosen": -332.0517272949219,
|
|
"logps/rejected": -334.45977783203125,
|
|
"loss": 0.5011,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -1.2248785495758057,
|
|
"rewards/margins": 1.2211261987686157,
|
|
"rewards/rejected": -2.4460043907165527,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 0.59,
|
|
"learning_rate": 2.257601835915089e-07,
|
|
"logits/chosen": -2.746645450592041,
|
|
"logits/rejected": -2.7202837467193604,
|
|
"logps/chosen": -285.22576904296875,
|
|
"logps/rejected": -271.9557800292969,
|
|
"loss": 0.4743,
|
|
"rewards/accuracies": 0.699999988079071,
|
|
"rewards/chosen": -0.9407691955566406,
|
|
"rewards/margins": 1.4142510890960693,
|
|
"rewards/rejected": -2.3550198078155518,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 2.2289156626506022e-07,
|
|
"logits/chosen": -2.6213831901550293,
|
|
"logits/rejected": -2.5203697681427,
|
|
"logps/chosen": -377.27337646484375,
|
|
"logps/rejected": -397.376953125,
|
|
"loss": 0.4551,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.6566792726516724,
|
|
"rewards/margins": 1.3486577272415161,
|
|
"rewards/rejected": -2.0053369998931885,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 0.6,
|
|
"learning_rate": 2.200229489386116e-07,
|
|
"logits/chosen": -2.8519699573516846,
|
|
"logits/rejected": -2.9094433784484863,
|
|
"logps/chosen": -343.9521789550781,
|
|
"logps/rejected": -308.2449645996094,
|
|
"loss": 0.4565,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.4127805829048157,
|
|
"rewards/margins": 0.8223036527633667,
|
|
"rewards/rejected": -1.2350841760635376,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 2.1715433161216294e-07,
|
|
"logits/chosen": -2.8739943504333496,
|
|
"logits/rejected": -2.8903346061706543,
|
|
"logps/chosen": -301.22967529296875,
|
|
"logps/rejected": -251.42855834960938,
|
|
"loss": 0.4426,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.35094451904296875,
|
|
"rewards/margins": 1.0077975988388062,
|
|
"rewards/rejected": -1.3587422370910645,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 0.61,
|
|
"learning_rate": 2.1428571428571426e-07,
|
|
"logits/chosen": -2.892425060272217,
|
|
"logits/rejected": -2.8500430583953857,
|
|
"logps/chosen": -328.62652587890625,
|
|
"logps/rejected": -307.386474609375,
|
|
"loss": 0.4722,
|
|
"rewards/accuracies": 0.699999988079071,
|
|
"rewards/chosen": -0.8684075474739075,
|
|
"rewards/margins": 1.0192383527755737,
|
|
"rewards/rejected": -1.887645959854126,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 2.1141709695926563e-07,
|
|
"logits/chosen": -2.910890817642212,
|
|
"logits/rejected": -2.8874032497406006,
|
|
"logps/chosen": -265.87310791015625,
|
|
"logps/rejected": -210.3219451904297,
|
|
"loss": 0.4737,
|
|
"rewards/accuracies": 0.699999988079071,
|
|
"rewards/chosen": -0.5260100960731506,
|
|
"rewards/margins": 0.7490276098251343,
|
|
"rewards/rejected": -1.2750377655029297,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"eval_logits/chosen": -2.713484764099121,
|
|
"eval_logits/rejected": -2.6841983795166016,
|
|
"eval_logps/chosen": -294.5633850097656,
|
|
"eval_logps/rejected": -323.9544677734375,
|
|
"eval_loss": 0.47160398960113525,
|
|
"eval_rewards/accuracies": 0.796875,
|
|
"eval_rewards/chosen": -0.3782654404640198,
|
|
"eval_rewards/margins": 1.3232390880584717,
|
|
"eval_rewards/rejected": -1.7015043497085571,
|
|
"eval_runtime": 57.3455,
|
|
"eval_samples_per_second": 17.438,
|
|
"eval_steps_per_second": 0.279,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 0.62,
|
|
"learning_rate": 2.0854847963281698e-07,
|
|
"logits/chosen": -2.970987319946289,
|
|
"logits/rejected": -2.867969512939453,
|
|
"logps/chosen": -368.6965026855469,
|
|
"logps/rejected": -353.63604736328125,
|
|
"loss": 0.4535,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.4251924455165863,
|
|
"rewards/margins": 1.3024253845214844,
|
|
"rewards/rejected": -1.727617859840393,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 0.63,
|
|
"learning_rate": 2.0567986230636832e-07,
|
|
"logits/chosen": -2.964089870452881,
|
|
"logits/rejected": -2.933443784713745,
|
|
"logps/chosen": -293.6631164550781,
|
|
"logps/rejected": -330.43878173828125,
|
|
"loss": 0.4637,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.6487616300582886,
|
|
"rewards/margins": 1.531836986541748,
|
|
"rewards/rejected": -2.180598735809326,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 2.0281124497991967e-07,
|
|
"logits/chosen": -2.9999537467956543,
|
|
"logits/rejected": -2.9158272743225098,
|
|
"logps/chosen": -342.1163330078125,
|
|
"logps/rejected": -396.15350341796875,
|
|
"loss": 0.4611,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.16167068481445312,
|
|
"rewards/margins": 1.715951919555664,
|
|
"rewards/rejected": -1.8776228427886963,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 0.64,
|
|
"learning_rate": 1.9994262765347102e-07,
|
|
"logits/chosen": -2.9242331981658936,
|
|
"logits/rejected": -2.841064929962158,
|
|
"logps/chosen": -337.76983642578125,
|
|
"logps/rejected": -282.3847351074219,
|
|
"loss": 0.4917,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": -1.049020528793335,
|
|
"rewards/margins": 0.9485428929328918,
|
|
"rewards/rejected": -1.9975636005401611,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 1.9707401032702237e-07,
|
|
"logits/chosen": -2.7807137966156006,
|
|
"logits/rejected": -2.775256872177124,
|
|
"logps/chosen": -282.36151123046875,
|
|
"logps/rejected": -339.3646545410156,
|
|
"loss": 0.4765,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.4363161623477936,
|
|
"rewards/margins": 1.3644092082977295,
|
|
"rewards/rejected": -1.8007253408432007,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 0.65,
|
|
"learning_rate": 1.942053930005737e-07,
|
|
"logits/chosen": -2.850567579269409,
|
|
"logits/rejected": -2.8491926193237305,
|
|
"logps/chosen": -355.41064453125,
|
|
"logps/rejected": -349.8357238769531,
|
|
"loss": 0.4739,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.5196908712387085,
|
|
"rewards/margins": 1.9353296756744385,
|
|
"rewards/rejected": -2.4550204277038574,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 1.9133677567412506e-07,
|
|
"logits/chosen": -2.6965737342834473,
|
|
"logits/rejected": -2.7178375720977783,
|
|
"logps/chosen": -253.52798461914062,
|
|
"logps/rejected": -266.63330078125,
|
|
"loss": 0.4285,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.8542221784591675,
|
|
"rewards/margins": 1.580542802810669,
|
|
"rewards/rejected": -2.434765338897705,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 1.884681583476764e-07,
|
|
"logits/chosen": -2.8671460151672363,
|
|
"logits/rejected": -2.8770196437835693,
|
|
"logps/chosen": -395.8647155761719,
|
|
"logps/rejected": -322.73565673828125,
|
|
"loss": 0.5017,
|
|
"rewards/accuracies": 0.699999988079071,
|
|
"rewards/chosen": -0.8471239805221558,
|
|
"rewards/margins": 0.8821918368339539,
|
|
"rewards/rejected": -1.729315996170044,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 1.8559954102122778e-07,
|
|
"logits/chosen": -2.8296945095062256,
|
|
"logits/rejected": -2.8752951622009277,
|
|
"logps/chosen": -439.872314453125,
|
|
"logps/rejected": -365.8454284667969,
|
|
"loss": 0.4554,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.5656521320343018,
|
|
"rewards/margins": 1.5323896408081055,
|
|
"rewards/rejected": -2.0980417728424072,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"learning_rate": 1.827309236947791e-07,
|
|
"logits/chosen": -2.946869134902954,
|
|
"logits/rejected": -2.8591227531433105,
|
|
"logps/chosen": -316.06512451171875,
|
|
"logps/rejected": -311.39892578125,
|
|
"loss": 0.4259,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.757400631904602,
|
|
"rewards/margins": 1.6462675333023071,
|
|
"rewards/rejected": -2.40366792678833,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.67,
|
|
"eval_logits/chosen": -2.735586166381836,
|
|
"eval_logits/rejected": -2.7046470642089844,
|
|
"eval_logps/chosen": -299.4761047363281,
|
|
"eval_logps/rejected": -329.3312072753906,
|
|
"eval_loss": 0.486631840467453,
|
|
"eval_rewards/accuracies": 0.78125,
|
|
"eval_rewards/chosen": -0.6239006519317627,
|
|
"eval_rewards/margins": 1.346441626548767,
|
|
"eval_rewards/rejected": -1.9703422784805298,
|
|
"eval_runtime": 57.6013,
|
|
"eval_samples_per_second": 17.361,
|
|
"eval_steps_per_second": 0.278,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 1.7986230636833047e-07,
|
|
"logits/chosen": -2.9283134937286377,
|
|
"logits/rejected": -2.797074556350708,
|
|
"logps/chosen": -277.39044189453125,
|
|
"logps/rejected": -298.46978759765625,
|
|
"loss": 0.4063,
|
|
"rewards/accuracies": 0.550000011920929,
|
|
"rewards/chosen": -0.8742395639419556,
|
|
"rewards/margins": 0.7818448543548584,
|
|
"rewards/rejected": -1.656084418296814,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 0.68,
|
|
"learning_rate": 1.769936890418818e-07,
|
|
"logits/chosen": -2.8751561641693115,
|
|
"logits/rejected": -2.8942153453826904,
|
|
"logps/chosen": -327.19158935546875,
|
|
"logps/rejected": -371.1598205566406,
|
|
"loss": 0.4662,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.4851578176021576,
|
|
"rewards/margins": 1.8855422735214233,
|
|
"rewards/rejected": -2.3707003593444824,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 1.7412507171543314e-07,
|
|
"logits/chosen": -2.869723081588745,
|
|
"logits/rejected": -2.791024684906006,
|
|
"logps/chosen": -299.9261474609375,
|
|
"logps/rejected": -240.87722778320312,
|
|
"loss": 0.5043,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": -1.252393364906311,
|
|
"rewards/margins": 0.8984333872795105,
|
|
"rewards/rejected": -2.1508266925811768,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 0.69,
|
|
"learning_rate": 1.7125645438898452e-07,
|
|
"logits/chosen": -2.8324294090270996,
|
|
"logits/rejected": -2.8103926181793213,
|
|
"logps/chosen": -360.5182800292969,
|
|
"logps/rejected": -295.58953857421875,
|
|
"loss": 0.4544,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.9667569994926453,
|
|
"rewards/margins": 0.8512316942214966,
|
|
"rewards/rejected": -1.8179887533187866,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 1.6838783706253584e-07,
|
|
"logits/chosen": -2.848896026611328,
|
|
"logits/rejected": -2.7923531532287598,
|
|
"logps/chosen": -372.48175048828125,
|
|
"logps/rejected": -274.67169189453125,
|
|
"loss": 0.5062,
|
|
"rewards/accuracies": 0.699999988079071,
|
|
"rewards/chosen": -0.6907297372817993,
|
|
"rewards/margins": 1.647875189781189,
|
|
"rewards/rejected": -2.3386049270629883,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 0.7,
|
|
"learning_rate": 1.655192197360872e-07,
|
|
"logits/chosen": -3.0092597007751465,
|
|
"logits/rejected": -3.0115504264831543,
|
|
"logps/chosen": -414.8208923339844,
|
|
"logps/rejected": -411.04327392578125,
|
|
"loss": 0.5338,
|
|
"rewards/accuracies": 0.699999988079071,
|
|
"rewards/chosen": -0.2658500671386719,
|
|
"rewards/margins": 1.0859276056289673,
|
|
"rewards/rejected": -1.3517776727676392,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 1.6265060240963853e-07,
|
|
"logits/chosen": -2.947781801223755,
|
|
"logits/rejected": -2.9552831649780273,
|
|
"logps/chosen": -383.8572082519531,
|
|
"logps/rejected": -299.138427734375,
|
|
"loss": 0.5202,
|
|
"rewards/accuracies": 0.699999988079071,
|
|
"rewards/chosen": -0.4735872149467468,
|
|
"rewards/margins": 1.4772822856903076,
|
|
"rewards/rejected": -1.9508693218231201,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 0.71,
|
|
"learning_rate": 1.597819850831899e-07,
|
|
"logits/chosen": -2.812499523162842,
|
|
"logits/rejected": -2.7452359199523926,
|
|
"logps/chosen": -339.11981201171875,
|
|
"logps/rejected": -291.80035400390625,
|
|
"loss": 0.513,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.7339299917221069,
|
|
"rewards/margins": 1.1406757831573486,
|
|
"rewards/rejected": -1.8746061325073242,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 1.5691336775674125e-07,
|
|
"logits/chosen": -2.7743217945098877,
|
|
"logits/rejected": -2.7072505950927734,
|
|
"logps/chosen": -318.3100891113281,
|
|
"logps/rejected": -287.02581787109375,
|
|
"loss": 0.5028,
|
|
"rewards/accuracies": 0.699999988079071,
|
|
"rewards/chosen": -0.30296844244003296,
|
|
"rewards/margins": 1.0583657026290894,
|
|
"rewards/rejected": -1.3613340854644775,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"learning_rate": 1.5404475043029257e-07,
|
|
"logits/chosen": -2.930776834487915,
|
|
"logits/rejected": -2.942199468612671,
|
|
"logps/chosen": -239.4253387451172,
|
|
"logps/rejected": -327.1017150878906,
|
|
"loss": 0.4935,
|
|
"rewards/accuracies": 0.699999988079071,
|
|
"rewards/chosen": -0.503673255443573,
|
|
"rewards/margins": 0.9641939401626587,
|
|
"rewards/rejected": -1.4678672552108765,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.72,
|
|
"eval_logits/chosen": -2.7443745136260986,
|
|
"eval_logits/rejected": -2.71527099609375,
|
|
"eval_logps/chosen": -298.24908447265625,
|
|
"eval_logps/rejected": -325.1242980957031,
|
|
"eval_loss": 0.4747045338153839,
|
|
"eval_rewards/accuracies": 0.78125,
|
|
"eval_rewards/chosen": -0.5625503063201904,
|
|
"eval_rewards/margins": 1.1974470615386963,
|
|
"eval_rewards/rejected": -1.7599973678588867,
|
|
"eval_runtime": 59.3107,
|
|
"eval_samples_per_second": 16.86,
|
|
"eval_steps_per_second": 0.27,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 1.5117613310384395e-07,
|
|
"logits/chosen": -2.92527437210083,
|
|
"logits/rejected": -3.0139377117156982,
|
|
"logps/chosen": -237.60580444335938,
|
|
"logps/rejected": -268.05389404296875,
|
|
"loss": 0.4175,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.6132401823997498,
|
|
"rewards/margins": 1.2935967445373535,
|
|
"rewards/rejected": -1.9068371057510376,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 0.73,
|
|
"learning_rate": 1.483075157773953e-07,
|
|
"logits/chosen": -2.7750067710876465,
|
|
"logits/rejected": -2.552576780319214,
|
|
"logps/chosen": -268.2431945800781,
|
|
"logps/rejected": -310.37359619140625,
|
|
"loss": 0.531,
|
|
"rewards/accuracies": 0.699999988079071,
|
|
"rewards/chosen": -0.5963152647018433,
|
|
"rewards/margins": 0.873083770275116,
|
|
"rewards/rejected": -1.469399094581604,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 1.4543889845094664e-07,
|
|
"logits/chosen": -2.900043249130249,
|
|
"logits/rejected": -2.9251770973205566,
|
|
"logps/chosen": -294.197998046875,
|
|
"logps/rejected": -420.86981201171875,
|
|
"loss": 0.5,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.7220600247383118,
|
|
"rewards/margins": 1.1596812009811401,
|
|
"rewards/rejected": -1.8817412853240967,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 0.74,
|
|
"learning_rate": 1.42570281124498e-07,
|
|
"logits/chosen": -2.8284263610839844,
|
|
"logits/rejected": -2.861384630203247,
|
|
"logps/chosen": -330.3293762207031,
|
|
"logps/rejected": -352.83013916015625,
|
|
"loss": 0.5019,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.5237518548965454,
|
|
"rewards/margins": 1.484616756439209,
|
|
"rewards/rejected": -2.008368730545044,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 1.3970166379804933e-07,
|
|
"logits/chosen": -2.9035301208496094,
|
|
"logits/rejected": -2.877016544342041,
|
|
"logps/chosen": -330.01885986328125,
|
|
"logps/rejected": -309.932373046875,
|
|
"loss": 0.4207,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.6725677847862244,
|
|
"rewards/margins": 1.2416926622390747,
|
|
"rewards/rejected": -1.9142605066299438,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"learning_rate": 1.3683304647160068e-07,
|
|
"logits/chosen": -2.8522450923919678,
|
|
"logits/rejected": -2.804635763168335,
|
|
"logps/chosen": -315.32598876953125,
|
|
"logps/rejected": -319.85528564453125,
|
|
"loss": 0.484,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.6710207462310791,
|
|
"rewards/margins": 2.302441120147705,
|
|
"rewards/rejected": -2.973461627960205,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 1.3396442914515203e-07,
|
|
"logits/chosen": -2.795161008834839,
|
|
"logits/rejected": -2.835954189300537,
|
|
"logps/chosen": -301.64306640625,
|
|
"logps/rejected": -345.019287109375,
|
|
"loss": 0.5058,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.31656789779663086,
|
|
"rewards/margins": 1.5656077861785889,
|
|
"rewards/rejected": -1.8821756839752197,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 0.76,
|
|
"learning_rate": 1.3109581181870338e-07,
|
|
"logits/chosen": -2.8382556438446045,
|
|
"logits/rejected": -2.7719860076904297,
|
|
"logps/chosen": -416.19622802734375,
|
|
"logps/rejected": -278.07989501953125,
|
|
"loss": 0.3959,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.7636359930038452,
|
|
"rewards/margins": 2.003495216369629,
|
|
"rewards/rejected": -2.7671313285827637,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 1.2822719449225472e-07,
|
|
"logits/chosen": -2.7587904930114746,
|
|
"logits/rejected": -2.747542381286621,
|
|
"logps/chosen": -255.3273468017578,
|
|
"logps/rejected": -239.77218627929688,
|
|
"loss": 0.4899,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.0010371461976319551,
|
|
"rewards/margins": 2.0150508880615234,
|
|
"rewards/rejected": -2.0160880088806152,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"learning_rate": 1.253585771658061e-07,
|
|
"logits/chosen": -2.7033538818359375,
|
|
"logits/rejected": -2.7172279357910156,
|
|
"logps/chosen": -317.5029296875,
|
|
"logps/rejected": -332.539794921875,
|
|
"loss": 0.4211,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.8115976452827454,
|
|
"rewards/margins": 1.1709970235824585,
|
|
"rewards/rejected": -1.9825948476791382,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.77,
|
|
"eval_logits/chosen": -2.7236168384552,
|
|
"eval_logits/rejected": -2.6944026947021484,
|
|
"eval_logps/chosen": -299.1958923339844,
|
|
"eval_logps/rejected": -329.9108581542969,
|
|
"eval_loss": 0.4644867181777954,
|
|
"eval_rewards/accuracies": 0.765625,
|
|
"eval_rewards/chosen": -0.6098894476890564,
|
|
"eval_rewards/margins": 1.389434814453125,
|
|
"eval_rewards/rejected": -1.9993242025375366,
|
|
"eval_runtime": 57.1288,
|
|
"eval_samples_per_second": 17.504,
|
|
"eval_steps_per_second": 0.28,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 1.2248995983935742e-07,
|
|
"logits/chosen": -2.7994678020477295,
|
|
"logits/rejected": -2.593963384628296,
|
|
"logps/chosen": -300.65960693359375,
|
|
"logps/rejected": -338.9661560058594,
|
|
"loss": 0.4655,
|
|
"rewards/accuracies": 0.8999999761581421,
|
|
"rewards/chosen": -0.44014763832092285,
|
|
"rewards/margins": 2.050450325012207,
|
|
"rewards/rejected": -2.49059796333313,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 0.78,
|
|
"learning_rate": 1.1962134251290876e-07,
|
|
"logits/chosen": -2.8634955883026123,
|
|
"logits/rejected": -2.9231014251708984,
|
|
"logps/chosen": -350.2730407714844,
|
|
"logps/rejected": -394.0359191894531,
|
|
"loss": 0.4687,
|
|
"rewards/accuracies": 0.6000000238418579,
|
|
"rewards/chosen": -0.7544436454772949,
|
|
"rewards/margins": 0.6226231455802917,
|
|
"rewards/rejected": -1.377066731452942,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 0.79,
|
|
"learning_rate": 1.1675272518646012e-07,
|
|
"logits/chosen": -2.714735984802246,
|
|
"logits/rejected": -2.6250107288360596,
|
|
"logps/chosen": -271.64691162109375,
|
|
"logps/rejected": -272.4801330566406,
|
|
"loss": 0.3846,
|
|
"rewards/accuracies": 0.8999999761581421,
|
|
"rewards/chosen": 0.06765889376401901,
|
|
"rewards/margins": 1.8679250478744507,
|
|
"rewards/rejected": -1.800265908241272,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.1388410786001147e-07,
|
|
"logits/chosen": -2.952043294906616,
|
|
"logits/rejected": -2.9354848861694336,
|
|
"logps/chosen": -237.10208129882812,
|
|
"logps/rejected": -248.58023071289062,
|
|
"loss": 0.4791,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": -0.8253666162490845,
|
|
"rewards/margins": 0.6920875906944275,
|
|
"rewards/rejected": -1.5174543857574463,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 0.8,
|
|
"learning_rate": 1.1101549053356282e-07,
|
|
"logits/chosen": -2.839200735092163,
|
|
"logits/rejected": -2.827392101287842,
|
|
"logps/chosen": -347.25390625,
|
|
"logps/rejected": -284.7652893066406,
|
|
"loss": 0.4351,
|
|
"rewards/accuracies": 0.8999999761581421,
|
|
"rewards/chosen": -0.3293851315975189,
|
|
"rewards/margins": 2.0348496437072754,
|
|
"rewards/rejected": -2.3642349243164062,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.0814687320711418e-07,
|
|
"logits/chosen": -2.853760242462158,
|
|
"logits/rejected": -2.728318214416504,
|
|
"logps/chosen": -224.41683959960938,
|
|
"logps/rejected": -228.2812042236328,
|
|
"loss": 0.4528,
|
|
"rewards/accuracies": 0.8999999761581421,
|
|
"rewards/chosen": -0.3215061128139496,
|
|
"rewards/margins": 1.6524702310562134,
|
|
"rewards/rejected": -1.9739763736724854,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 0.81,
|
|
"learning_rate": 1.0527825588066551e-07,
|
|
"logits/chosen": -2.870434284210205,
|
|
"logits/rejected": -2.793577194213867,
|
|
"logps/chosen": -282.13787841796875,
|
|
"logps/rejected": -271.94915771484375,
|
|
"loss": 0.4144,
|
|
"rewards/accuracies": 0.949999988079071,
|
|
"rewards/chosen": -0.5694266557693481,
|
|
"rewards/margins": 1.3067474365234375,
|
|
"rewards/rejected": -1.8761742115020752,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 1.0240963855421686e-07,
|
|
"logits/chosen": -2.73341703414917,
|
|
"logits/rejected": -2.7930026054382324,
|
|
"logps/chosen": -214.6037139892578,
|
|
"logps/rejected": -285.880859375,
|
|
"loss": 0.4287,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.6492952108383179,
|
|
"rewards/margins": 1.3862565755844116,
|
|
"rewards/rejected": -2.0355517864227295,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 0.82,
|
|
"learning_rate": 9.95410212277682e-08,
|
|
"logits/chosen": -2.885023593902588,
|
|
"logits/rejected": -2.929999351501465,
|
|
"logps/chosen": -294.29364013671875,
|
|
"logps/rejected": -273.53729248046875,
|
|
"loss": 0.4548,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.3889610171318054,
|
|
"rewards/margins": 1.3147281408309937,
|
|
"rewards/rejected": -1.7036889791488647,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 9.667240390131957e-08,
|
|
"logits/chosen": -2.8551316261291504,
|
|
"logits/rejected": -2.913240432739258,
|
|
"logps/chosen": -301.7610168457031,
|
|
"logps/rejected": -309.9018249511719,
|
|
"loss": 0.4931,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.7070005536079407,
|
|
"rewards/margins": 1.0328757762908936,
|
|
"rewards/rejected": -1.7398761510849,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"eval_logits/chosen": -2.7304749488830566,
|
|
"eval_logits/rejected": -2.7005884647369385,
|
|
"eval_logps/chosen": -300.5933532714844,
|
|
"eval_logps/rejected": -332.0889892578125,
|
|
"eval_loss": 0.4683745205402374,
|
|
"eval_rewards/accuracies": 0.765625,
|
|
"eval_rewards/chosen": -0.6797637343406677,
|
|
"eval_rewards/margins": 1.4284672737121582,
|
|
"eval_rewards/rejected": -2.1082310676574707,
|
|
"eval_runtime": 58.1263,
|
|
"eval_samples_per_second": 17.204,
|
|
"eval_steps_per_second": 0.275,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 9.380378657487091e-08,
|
|
"logits/chosen": -2.786533832550049,
|
|
"logits/rejected": -2.8000881671905518,
|
|
"logps/chosen": -252.6224822998047,
|
|
"logps/rejected": -307.4824523925781,
|
|
"loss": 0.4282,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.869725227355957,
|
|
"rewards/margins": 1.490140438079834,
|
|
"rewards/rejected": -2.359865665435791,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 9.093516924842226e-08,
|
|
"logits/chosen": -2.9314494132995605,
|
|
"logits/rejected": -2.888124942779541,
|
|
"logps/chosen": -322.18988037109375,
|
|
"logps/rejected": -340.00958251953125,
|
|
"loss": 0.4095,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.7641150951385498,
|
|
"rewards/margins": 1.8909038305282593,
|
|
"rewards/rejected": -2.6550190448760986,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 0.84,
|
|
"learning_rate": 8.806655192197361e-08,
|
|
"logits/chosen": -2.91792368888855,
|
|
"logits/rejected": -2.8502402305603027,
|
|
"logps/chosen": -327.46807861328125,
|
|
"logps/rejected": -310.23199462890625,
|
|
"loss": 0.4625,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.5610017776489258,
|
|
"rewards/margins": 1.5278574228286743,
|
|
"rewards/rejected": -2.0888590812683105,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 8.519793459552494e-08,
|
|
"logits/chosen": -2.8847482204437256,
|
|
"logits/rejected": -2.8378400802612305,
|
|
"logps/chosen": -315.1554870605469,
|
|
"logps/rejected": -282.12652587890625,
|
|
"loss": 0.418,
|
|
"rewards/accuracies": 0.8999999761581421,
|
|
"rewards/chosen": -0.6100664138793945,
|
|
"rewards/margins": 1.6232362985610962,
|
|
"rewards/rejected": -2.233302593231201,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 0.85,
|
|
"learning_rate": 8.23293172690763e-08,
|
|
"logits/chosen": -2.943779230117798,
|
|
"logits/rejected": -2.894235372543335,
|
|
"logps/chosen": -380.72894287109375,
|
|
"logps/rejected": -279.9917907714844,
|
|
"loss": 0.4489,
|
|
"rewards/accuracies": 0.8999999761581421,
|
|
"rewards/chosen": 0.3063332736492157,
|
|
"rewards/margins": 2.2862744331359863,
|
|
"rewards/rejected": -1.9799413681030273,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 7.946069994262765e-08,
|
|
"logits/chosen": -2.738778829574585,
|
|
"logits/rejected": -2.805850028991699,
|
|
"logps/chosen": -221.29507446289062,
|
|
"logps/rejected": -314.97076416015625,
|
|
"loss": 0.4268,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.5129801034927368,
|
|
"rewards/margins": 1.8285270929336548,
|
|
"rewards/rejected": -2.3415069580078125,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 0.86,
|
|
"learning_rate": 7.6592082616179e-08,
|
|
"logits/chosen": -2.73237681388855,
|
|
"logits/rejected": -2.7697629928588867,
|
|
"logps/chosen": -295.9866638183594,
|
|
"logps/rejected": -282.50189208984375,
|
|
"loss": 0.5111,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.5220045447349548,
|
|
"rewards/margins": 1.3860810995101929,
|
|
"rewards/rejected": -1.908085823059082,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 7.372346528973034e-08,
|
|
"logits/chosen": -2.9239017963409424,
|
|
"logits/rejected": -2.8694822788238525,
|
|
"logps/chosen": -266.6614074707031,
|
|
"logps/rejected": -303.3201599121094,
|
|
"loss": 0.5028,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.5870487689971924,
|
|
"rewards/margins": 0.9326974749565125,
|
|
"rewards/rejected": -1.5197464227676392,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 0.87,
|
|
"learning_rate": 7.08548479632817e-08,
|
|
"logits/chosen": -2.8852458000183105,
|
|
"logits/rejected": -2.9049694538116455,
|
|
"logps/chosen": -256.247802734375,
|
|
"logps/rejected": -329.19305419921875,
|
|
"loss": 0.4537,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.6385833024978638,
|
|
"rewards/margins": 1.4285590648651123,
|
|
"rewards/rejected": -2.0671424865722656,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 6.798623063683305e-08,
|
|
"logits/chosen": -2.8753390312194824,
|
|
"logits/rejected": -2.7723491191864014,
|
|
"logps/chosen": -285.18365478515625,
|
|
"logps/rejected": -267.1637878417969,
|
|
"loss": 0.5029,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.7313726544380188,
|
|
"rewards/margins": 1.1651675701141357,
|
|
"rewards/rejected": -1.8965400457382202,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"eval_logits/chosen": -2.740252733230591,
|
|
"eval_logits/rejected": -2.710822343826294,
|
|
"eval_logps/chosen": -297.1233215332031,
|
|
"eval_logps/rejected": -327.8266906738281,
|
|
"eval_loss": 0.4595467746257782,
|
|
"eval_rewards/accuracies": 0.78125,
|
|
"eval_rewards/chosen": -0.5062620639801025,
|
|
"eval_rewards/margins": 1.3888535499572754,
|
|
"eval_rewards/rejected": -1.895115852355957,
|
|
"eval_runtime": 55.2023,
|
|
"eval_samples_per_second": 18.115,
|
|
"eval_steps_per_second": 0.29,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 0.88,
|
|
"learning_rate": 6.511761331038438e-08,
|
|
"logits/chosen": -2.895554542541504,
|
|
"logits/rejected": -2.7735402584075928,
|
|
"logps/chosen": -375.2083435058594,
|
|
"logps/rejected": -386.50689697265625,
|
|
"loss": 0.4635,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.7999193072319031,
|
|
"rewards/margins": 1.7455612421035767,
|
|
"rewards/rejected": -2.545480728149414,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 6.224899598393573e-08,
|
|
"logits/chosen": -2.897815704345703,
|
|
"logits/rejected": -2.833980083465576,
|
|
"logps/chosen": -349.2824401855469,
|
|
"logps/rejected": -320.26080322265625,
|
|
"loss": 0.5059,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.8347634077072144,
|
|
"rewards/margins": 1.1928595304489136,
|
|
"rewards/rejected": -2.027622938156128,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 0.89,
|
|
"learning_rate": 5.9380378657487085e-08,
|
|
"logits/chosen": -2.747729539871216,
|
|
"logits/rejected": -2.749624490737915,
|
|
"logps/chosen": -389.26104736328125,
|
|
"logps/rejected": -379.7428283691406,
|
|
"loss": 0.4561,
|
|
"rewards/accuracies": 0.550000011920929,
|
|
"rewards/chosen": -0.46041926741600037,
|
|
"rewards/margins": 1.0110886096954346,
|
|
"rewards/rejected": -1.4715079069137573,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 5.651176133103844e-08,
|
|
"logits/chosen": -2.9265472888946533,
|
|
"logits/rejected": -2.7790768146514893,
|
|
"logps/chosen": -344.9275817871094,
|
|
"logps/rejected": -267.4143371582031,
|
|
"loss": 0.4472,
|
|
"rewards/accuracies": 0.949999988079071,
|
|
"rewards/chosen": -0.3255007266998291,
|
|
"rewards/margins": 2.0611414909362793,
|
|
"rewards/rejected": -2.3866424560546875,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 0.9,
|
|
"learning_rate": 5.3643144004589786e-08,
|
|
"logits/chosen": -2.8731534481048584,
|
|
"logits/rejected": -2.7964863777160645,
|
|
"logps/chosen": -293.29681396484375,
|
|
"logps/rejected": -305.7369079589844,
|
|
"loss": 0.4481,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.3717958629131317,
|
|
"rewards/margins": 1.912921667098999,
|
|
"rewards/rejected": -2.2847177982330322,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 5.077452667814113e-08,
|
|
"logits/chosen": -2.771488666534424,
|
|
"logits/rejected": -2.865757465362549,
|
|
"logps/chosen": -309.177978515625,
|
|
"logps/rejected": -285.9333801269531,
|
|
"loss": 0.4892,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.7683748602867126,
|
|
"rewards/margins": 1.0694787502288818,
|
|
"rewards/rejected": -1.8378536701202393,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 0.91,
|
|
"learning_rate": 4.790590935169248e-08,
|
|
"logits/chosen": -2.8640894889831543,
|
|
"logits/rejected": -2.8574938774108887,
|
|
"logps/chosen": -270.6001281738281,
|
|
"logps/rejected": -282.6536865234375,
|
|
"loss": 0.4509,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.5418082475662231,
|
|
"rewards/margins": 1.5019443035125732,
|
|
"rewards/rejected": -2.043752670288086,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 4.5037292025243834e-08,
|
|
"logits/chosen": -2.916822910308838,
|
|
"logits/rejected": -2.945218086242676,
|
|
"logps/chosen": -330.62493896484375,
|
|
"logps/rejected": -340.20599365234375,
|
|
"loss": 0.4988,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.980034351348877,
|
|
"rewards/margins": 1.0700609683990479,
|
|
"rewards/rejected": -2.0500950813293457,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 0.92,
|
|
"learning_rate": 4.2168674698795174e-08,
|
|
"logits/chosen": -2.9247031211853027,
|
|
"logits/rejected": -2.9124653339385986,
|
|
"logps/chosen": -289.2491149902344,
|
|
"logps/rejected": -264.84283447265625,
|
|
"loss": 0.4577,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.3735916018486023,
|
|
"rewards/margins": 0.9222102165222168,
|
|
"rewards/rejected": -1.2958018779754639,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 3.930005737234653e-08,
|
|
"logits/chosen": -2.9057695865631104,
|
|
"logits/rejected": -2.819000005722046,
|
|
"logps/chosen": -337.3312683105469,
|
|
"logps/rejected": -276.67010498046875,
|
|
"loss": 0.4965,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.23481640219688416,
|
|
"rewards/margins": 1.076974868774414,
|
|
"rewards/rejected": -1.3117913007736206,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"eval_logits/chosen": -2.752323627471924,
|
|
"eval_logits/rejected": -2.7225875854492188,
|
|
"eval_logps/chosen": -298.1203308105469,
|
|
"eval_logps/rejected": -328.0830993652344,
|
|
"eval_loss": 0.46130359172821045,
|
|
"eval_rewards/accuracies": 0.78125,
|
|
"eval_rewards/chosen": -0.5561107397079468,
|
|
"eval_rewards/margins": 1.3518264293670654,
|
|
"eval_rewards/rejected": -1.9079371690750122,
|
|
"eval_runtime": 57.0782,
|
|
"eval_samples_per_second": 17.52,
|
|
"eval_steps_per_second": 0.28,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 0.93,
|
|
"learning_rate": 3.6431440045897875e-08,
|
|
"logits/chosen": -2.8899993896484375,
|
|
"logits/rejected": -2.886475086212158,
|
|
"logps/chosen": -212.6807861328125,
|
|
"logps/rejected": -243.3516082763672,
|
|
"loss": 0.4966,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.7232971787452698,
|
|
"rewards/margins": 1.2528116703033447,
|
|
"rewards/rejected": -1.9761087894439697,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 0.94,
|
|
"learning_rate": 3.356282271944923e-08,
|
|
"logits/chosen": -2.936781406402588,
|
|
"logits/rejected": -2.913865566253662,
|
|
"logps/chosen": -259.8666076660156,
|
|
"logps/rejected": -268.0596008300781,
|
|
"loss": 0.4455,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.7119141221046448,
|
|
"rewards/margins": 1.462996244430542,
|
|
"rewards/rejected": -2.174910306930542,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 0.94,
|
|
"learning_rate": 3.0694205393000576e-08,
|
|
"logits/chosen": -2.8601667881011963,
|
|
"logits/rejected": -2.8055875301361084,
|
|
"logps/chosen": -324.9699401855469,
|
|
"logps/rejected": -261.33355712890625,
|
|
"loss": 0.4484,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.6698704361915588,
|
|
"rewards/margins": 1.4717018604278564,
|
|
"rewards/rejected": -2.1415722370147705,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 0.95,
|
|
"learning_rate": 2.782558806655192e-08,
|
|
"logits/chosen": -2.9527735710144043,
|
|
"logits/rejected": -2.9381892681121826,
|
|
"logps/chosen": -338.42889404296875,
|
|
"logps/rejected": -293.7242126464844,
|
|
"loss": 0.4437,
|
|
"rewards/accuracies": 0.6000000238418579,
|
|
"rewards/chosen": -0.5508059859275818,
|
|
"rewards/margins": 0.5796544551849365,
|
|
"rewards/rejected": -1.130460500717163,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"learning_rate": 2.4956970740103267e-08,
|
|
"logits/chosen": -2.8355095386505127,
|
|
"logits/rejected": -2.847282648086548,
|
|
"logps/chosen": -303.50482177734375,
|
|
"logps/rejected": -281.2980041503906,
|
|
"loss": 0.4709,
|
|
"rewards/accuracies": 0.949999988079071,
|
|
"rewards/chosen": -0.2886872887611389,
|
|
"rewards/margins": 2.1929566860198975,
|
|
"rewards/rejected": -2.4816439151763916,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 0.96,
|
|
"learning_rate": 2.2088353413654617e-08,
|
|
"logits/chosen": -2.8765408992767334,
|
|
"logits/rejected": -2.8786520957946777,
|
|
"logps/chosen": -321.27203369140625,
|
|
"logps/rejected": -308.62628173828125,
|
|
"loss": 0.4371,
|
|
"rewards/accuracies": 0.800000011920929,
|
|
"rewards/chosen": -0.6084128618240356,
|
|
"rewards/margins": 1.1289150714874268,
|
|
"rewards/rejected": -1.7373279333114624,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 1.9219736087205964e-08,
|
|
"logits/chosen": -2.8658809661865234,
|
|
"logits/rejected": -2.86934232711792,
|
|
"logps/chosen": -271.0024719238281,
|
|
"logps/rejected": -249.09765625,
|
|
"loss": 0.4678,
|
|
"rewards/accuracies": 0.6499999761581421,
|
|
"rewards/chosen": -0.7377957105636597,
|
|
"rewards/margins": 0.9156107902526855,
|
|
"rewards/rejected": -1.6534065008163452,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 0.97,
|
|
"learning_rate": 1.6351118760757314e-08,
|
|
"logits/chosen": -2.7913639545440674,
|
|
"logits/rejected": -2.7344272136688232,
|
|
"logps/chosen": -299.87823486328125,
|
|
"logps/rejected": -340.60467529296875,
|
|
"loss": 0.4679,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.9796409606933594,
|
|
"rewards/margins": 1.4006431102752686,
|
|
"rewards/rejected": -2.380284070968628,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"learning_rate": 1.3482501434308661e-08,
|
|
"logits/chosen": -2.9909162521362305,
|
|
"logits/rejected": -2.8788750171661377,
|
|
"logps/chosen": -316.1296081542969,
|
|
"logps/rejected": -329.0151062011719,
|
|
"loss": 0.4584,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.6042786836624146,
|
|
"rewards/margins": 1.349906325340271,
|
|
"rewards/rejected": -1.954184889793396,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"learning_rate": 1.061388410786001e-08,
|
|
"logits/chosen": -2.840143918991089,
|
|
"logits/rejected": -2.8358423709869385,
|
|
"logps/chosen": -347.96185302734375,
|
|
"logps/rejected": -336.83831787109375,
|
|
"loss": 0.4337,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.25721627473831177,
|
|
"rewards/margins": 1.7318767309188843,
|
|
"rewards/rejected": -1.9890931844711304,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 0.98,
|
|
"eval_logits/chosen": -2.746859073638916,
|
|
"eval_logits/rejected": -2.717475175857544,
|
|
"eval_logps/chosen": -297.12957763671875,
|
|
"eval_logps/rejected": -327.35992431640625,
|
|
"eval_loss": 0.4607921838760376,
|
|
"eval_rewards/accuracies": 0.765625,
|
|
"eval_rewards/chosen": -0.5065746307373047,
|
|
"eval_rewards/margins": 1.365204095840454,
|
|
"eval_rewards/rejected": -1.871778964996338,
|
|
"eval_runtime": 59.1698,
|
|
"eval_samples_per_second": 16.901,
|
|
"eval_steps_per_second": 0.27,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 7.745266781411359e-09,
|
|
"logits/chosen": -2.944617986679077,
|
|
"logits/rejected": -2.859666347503662,
|
|
"logps/chosen": -312.51654052734375,
|
|
"logps/rejected": -263.56494140625,
|
|
"loss": 0.4129,
|
|
"rewards/accuracies": 0.550000011920929,
|
|
"rewards/chosen": -0.29929789900779724,
|
|
"rewards/margins": 0.7224828004837036,
|
|
"rewards/rejected": -1.0217806100845337,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 0.99,
|
|
"learning_rate": 4.8766494549627085e-09,
|
|
"logits/chosen": -2.8194212913513184,
|
|
"logits/rejected": -2.776564598083496,
|
|
"logps/chosen": -363.00396728515625,
|
|
"logps/rejected": -299.03338623046875,
|
|
"loss": 0.4647,
|
|
"rewards/accuracies": 0.550000011920929,
|
|
"rewards/chosen": -0.38584208488464355,
|
|
"rewards/margins": 0.8067569732666016,
|
|
"rewards/rejected": -1.1925990581512451,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"learning_rate": 2.008032128514056e-09,
|
|
"logits/chosen": -2.8939270973205566,
|
|
"logits/rejected": -2.778672933578491,
|
|
"logps/chosen": -323.1568298339844,
|
|
"logps/rejected": -284.89361572265625,
|
|
"loss": 0.4376,
|
|
"rewards/accuracies": 0.8500000238418579,
|
|
"rewards/chosen": -0.581506609916687,
|
|
"rewards/margins": 1.8644784688949585,
|
|
"rewards/rejected": -2.4459850788116455,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"step": 1937,
|
|
"total_flos": 0.0,
|
|
"train_loss": 0.48803097129668166,
|
|
"train_runtime": 7971.1784,
|
|
"train_samples_per_second": 7.774,
|
|
"train_steps_per_second": 0.243
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 1937,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 500,
|
|
"total_flos": 0.0,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|