13884 lines
414 KiB
JSON
13884 lines
414 KiB
JSON
{
|
|
"best_global_step": 850,
|
|
"best_metric": 0.31901347637176514,
|
|
"best_model_checkpoint": "/experiment_results/dpo/A-vibe_OPEN_SOURCE_checkpoint-1600_dpo_chosen_OUR_super_unsafe_from_PR_x15_NEW_CORRECT_04_10_25_v9/checkpoint-850",
|
|
"epoch": 1.0,
|
|
"eval_steps": 50,
|
|
"global_step": 904,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0011061946902654867,
|
|
"grad_norm": 21.33904266357422,
|
|
"learning_rate": 0.0,
|
|
"logits/chosen": -1.55078125,
|
|
"logits/rejected": -1.46875,
|
|
"logps/chosen": -288.0,
|
|
"logps/rejected": -235.5,
|
|
"loss": 0.7017,
|
|
"rewards/accuracies": 0.078125,
|
|
"rewards/chosen": -0.007916450500488281,
|
|
"rewards/margins": -0.0164794921875,
|
|
"rewards/rejected": 0.00848388671875,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.0022123893805309734,
|
|
"grad_norm": 19.44487762451172,
|
|
"learning_rate": 1.7857142857142856e-08,
|
|
"logits/chosen": -1.5,
|
|
"logits/rejected": -1.43359375,
|
|
"logps/chosen": -259.0,
|
|
"logps/rejected": -226.0,
|
|
"loss": 0.6987,
|
|
"rewards/accuracies": 0.171875,
|
|
"rewards/chosen": -0.006072998046875,
|
|
"rewards/margins": -0.007415771484375,
|
|
"rewards/rejected": 0.0013580322265625,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.00331858407079646,
|
|
"grad_norm": 21.772796630859375,
|
|
"learning_rate": 3.571428571428571e-08,
|
|
"logits/chosen": -1.58984375,
|
|
"logits/rejected": -1.54296875,
|
|
"logps/chosen": -288.0,
|
|
"logps/rejected": -286.0,
|
|
"loss": 0.6943,
|
|
"rewards/accuracies": 0.296875,
|
|
"rewards/chosen": -0.0041046142578125,
|
|
"rewards/margins": 0.002166748046875,
|
|
"rewards/rejected": -0.0062713623046875,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.004424778761061947,
|
|
"grad_norm": 20.7520751953125,
|
|
"learning_rate": 5.3571428571428564e-08,
|
|
"logits/chosen": -1.65625,
|
|
"logits/rejected": -1.6015625,
|
|
"logps/chosen": -257.5,
|
|
"logps/rejected": -243.0,
|
|
"loss": 0.6858,
|
|
"rewards/accuracies": 0.328125,
|
|
"rewards/chosen": 0.0084075927734375,
|
|
"rewards/margins": 0.0184326171875,
|
|
"rewards/rejected": -0.009979248046875,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.0055309734513274336,
|
|
"grad_norm": 22.113121032714844,
|
|
"learning_rate": 7.142857142857142e-08,
|
|
"logits/chosen": -1.5234375,
|
|
"logits/rejected": -1.53515625,
|
|
"logps/chosen": -263.0,
|
|
"logps/rejected": -262.5,
|
|
"loss": 0.6965,
|
|
"rewards/accuracies": 0.28125,
|
|
"rewards/chosen": -0.008130073547363281,
|
|
"rewards/margins": -0.003143310546875,
|
|
"rewards/rejected": -0.0049991607666015625,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.00663716814159292,
|
|
"grad_norm": 22.67697525024414,
|
|
"learning_rate": 8.928571428571429e-08,
|
|
"logits/chosen": -1.4609375,
|
|
"logits/rejected": -1.62109375,
|
|
"logps/chosen": -252.5,
|
|
"logps/rejected": -259.5,
|
|
"loss": 0.6851,
|
|
"rewards/accuracies": 0.3046875,
|
|
"rewards/chosen": 0.00469970703125,
|
|
"rewards/margins": 0.013885498046875,
|
|
"rewards/rejected": -0.009189605712890625,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.007743362831858407,
|
|
"grad_norm": 23.316373825073242,
|
|
"learning_rate": 1.0714285714285713e-07,
|
|
"logits/chosen": -1.46484375,
|
|
"logits/rejected": -1.3984375,
|
|
"logps/chosen": -279.0,
|
|
"logps/rejected": -271.0,
|
|
"loss": 0.698,
|
|
"rewards/accuracies": 0.3125,
|
|
"rewards/chosen": 0.00156402587890625,
|
|
"rewards/margins": -0.0079498291015625,
|
|
"rewards/rejected": 0.00946044921875,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.008849557522123894,
|
|
"grad_norm": 24.865726470947266,
|
|
"learning_rate": 1.25e-07,
|
|
"logits/chosen": -1.43359375,
|
|
"logits/rejected": -1.546875,
|
|
"logps/chosen": -275.0,
|
|
"logps/rejected": -292.0,
|
|
"loss": 0.7039,
|
|
"rewards/accuracies": 0.203125,
|
|
"rewards/chosen": -0.006072998046875,
|
|
"rewards/margins": -0.01904296875,
|
|
"rewards/rejected": 0.01300048828125,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.00995575221238938,
|
|
"grad_norm": 20.924415588378906,
|
|
"learning_rate": 1.4285714285714285e-07,
|
|
"logits/chosen": -1.55859375,
|
|
"logits/rejected": -1.51953125,
|
|
"logps/chosen": -238.5,
|
|
"logps/rejected": -238.5,
|
|
"loss": 0.6892,
|
|
"rewards/accuracies": 0.296875,
|
|
"rewards/chosen": 0.0125274658203125,
|
|
"rewards/margins": 0.0072021484375,
|
|
"rewards/rejected": 0.0052642822265625,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.011061946902654867,
|
|
"grad_norm": 19.864246368408203,
|
|
"learning_rate": 1.6071428571428573e-07,
|
|
"logits/chosen": -1.56640625,
|
|
"logits/rejected": -1.48046875,
|
|
"logps/chosen": -249.0,
|
|
"logps/rejected": -230.0,
|
|
"loss": 0.6956,
|
|
"rewards/accuracies": 0.3046875,
|
|
"rewards/chosen": 0.0086822509765625,
|
|
"rewards/margins": 0.00128173828125,
|
|
"rewards/rejected": 0.0074615478515625,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.012168141592920354,
|
|
"grad_norm": 22.528316497802734,
|
|
"learning_rate": 1.7857142857142858e-07,
|
|
"logits/chosen": -1.59375,
|
|
"logits/rejected": -1.5,
|
|
"logps/chosen": -272.0,
|
|
"logps/rejected": -290.0,
|
|
"loss": 0.6936,
|
|
"rewards/accuracies": 0.3359375,
|
|
"rewards/chosen": -0.0045032501220703125,
|
|
"rewards/margins": 0.0057544708251953125,
|
|
"rewards/rejected": -0.01029062271118164,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.01327433628318584,
|
|
"grad_norm": 21.385112762451172,
|
|
"learning_rate": 1.964285714285714e-07,
|
|
"logits/chosen": -1.43359375,
|
|
"logits/rejected": -1.38671875,
|
|
"logps/chosen": -270.0,
|
|
"logps/rejected": -281.0,
|
|
"loss": 0.6895,
|
|
"rewards/accuracies": 0.3359375,
|
|
"rewards/chosen": 0.001373291015625,
|
|
"rewards/margins": 0.0108795166015625,
|
|
"rewards/rejected": -0.009471893310546875,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.014380530973451327,
|
|
"grad_norm": 21.703392028808594,
|
|
"learning_rate": 2.1428571428571426e-07,
|
|
"logits/chosen": -1.51953125,
|
|
"logits/rejected": -1.35546875,
|
|
"logps/chosen": -258.0,
|
|
"logps/rejected": -263.0,
|
|
"loss": 0.7104,
|
|
"rewards/accuracies": 0.2421875,
|
|
"rewards/chosen": -0.0135040283203125,
|
|
"rewards/margins": -0.0324249267578125,
|
|
"rewards/rejected": 0.01898193359375,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.015486725663716814,
|
|
"grad_norm": 19.697071075439453,
|
|
"learning_rate": 2.3214285714285714e-07,
|
|
"logits/chosen": -1.4140625,
|
|
"logits/rejected": -1.56640625,
|
|
"logps/chosen": -248.0,
|
|
"logps/rejected": -233.5,
|
|
"loss": 0.6953,
|
|
"rewards/accuracies": 0.3125,
|
|
"rewards/chosen": 0.005401611328125,
|
|
"rewards/margins": -0.001678466796875,
|
|
"rewards/rejected": 0.007049560546875,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.016592920353982302,
|
|
"grad_norm": 21.335206985473633,
|
|
"learning_rate": 2.5e-07,
|
|
"logits/chosen": -1.56640625,
|
|
"logits/rejected": -1.51953125,
|
|
"logps/chosen": -272.0,
|
|
"logps/rejected": -270.0,
|
|
"loss": 0.6838,
|
|
"rewards/accuracies": 0.34375,
|
|
"rewards/chosen": 0.0098724365234375,
|
|
"rewards/margins": 0.0190277099609375,
|
|
"rewards/rejected": -0.009204864501953125,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.017699115044247787,
|
|
"grad_norm": 21.42949867248535,
|
|
"learning_rate": 2.6785714285714284e-07,
|
|
"logits/chosen": -1.515625,
|
|
"logits/rejected": -1.6328125,
|
|
"logps/chosen": -248.5,
|
|
"logps/rejected": -244.5,
|
|
"loss": 0.6785,
|
|
"rewards/accuracies": 0.34375,
|
|
"rewards/chosen": 0.00927734375,
|
|
"rewards/margins": 0.0289306640625,
|
|
"rewards/rejected": -0.0196533203125,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.018805309734513276,
|
|
"grad_norm": 20.796878814697266,
|
|
"learning_rate": 2.857142857142857e-07,
|
|
"logits/chosen": -1.60546875,
|
|
"logits/rejected": -1.625,
|
|
"logps/chosen": -231.5,
|
|
"logps/rejected": -231.5,
|
|
"loss": 0.6899,
|
|
"rewards/accuracies": 0.34375,
|
|
"rewards/chosen": 0.00724029541015625,
|
|
"rewards/margins": 0.011138916015625,
|
|
"rewards/rejected": -0.00391387939453125,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.01991150442477876,
|
|
"grad_norm": 20.082786560058594,
|
|
"learning_rate": 3.0357142857142855e-07,
|
|
"logits/chosen": -1.46875,
|
|
"logits/rejected": -1.40625,
|
|
"logps/chosen": -251.0,
|
|
"logps/rejected": -248.5,
|
|
"loss": 0.688,
|
|
"rewards/accuracies": 0.40625,
|
|
"rewards/chosen": 0.016357421875,
|
|
"rewards/margins": 0.0147705078125,
|
|
"rewards/rejected": 0.0015716552734375,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.02101769911504425,
|
|
"grad_norm": 21.640682220458984,
|
|
"learning_rate": 3.2142857142857145e-07,
|
|
"logits/chosen": -1.59765625,
|
|
"logits/rejected": -1.3515625,
|
|
"logps/chosen": -264.0,
|
|
"logps/rejected": -262.0,
|
|
"loss": 0.6912,
|
|
"rewards/accuracies": 0.3359375,
|
|
"rewards/chosen": 0.01506805419921875,
|
|
"rewards/margins": 0.005462646484375,
|
|
"rewards/rejected": 0.00958251953125,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.022123893805309734,
|
|
"grad_norm": 22.128896713256836,
|
|
"learning_rate": 3.392857142857143e-07,
|
|
"logits/chosen": -1.57421875,
|
|
"logits/rejected": -1.47265625,
|
|
"logps/chosen": -267.5,
|
|
"logps/rejected": -267.0,
|
|
"loss": 0.6917,
|
|
"rewards/accuracies": 0.3515625,
|
|
"rewards/chosen": 0.01458740234375,
|
|
"rewards/margins": 0.0075225830078125,
|
|
"rewards/rejected": 0.007049560546875,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.023230088495575223,
|
|
"grad_norm": 20.139122009277344,
|
|
"learning_rate": 3.5714285714285716e-07,
|
|
"logits/chosen": -1.58203125,
|
|
"logits/rejected": -1.46484375,
|
|
"logps/chosen": -251.5,
|
|
"logps/rejected": -251.0,
|
|
"loss": 0.699,
|
|
"rewards/accuracies": 0.3203125,
|
|
"rewards/chosen": 0.003238677978515625,
|
|
"rewards/margins": -0.0052642822265625,
|
|
"rewards/rejected": 0.008502960205078125,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.024336283185840708,
|
|
"grad_norm": 20.964323043823242,
|
|
"learning_rate": 3.75e-07,
|
|
"logits/chosen": -1.55859375,
|
|
"logits/rejected": -1.49609375,
|
|
"logps/chosen": -236.0,
|
|
"logps/rejected": -260.5,
|
|
"loss": 0.6882,
|
|
"rewards/accuracies": 0.3984375,
|
|
"rewards/chosen": 0.02685546875,
|
|
"rewards/margins": 0.013885498046875,
|
|
"rewards/rejected": 0.01297760009765625,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.025442477876106196,
|
|
"grad_norm": 19.556018829345703,
|
|
"learning_rate": 3.928571428571428e-07,
|
|
"logits/chosen": -1.6015625,
|
|
"logits/rejected": -1.4296875,
|
|
"logps/chosen": -234.0,
|
|
"logps/rejected": -210.0,
|
|
"loss": 0.6941,
|
|
"rewards/accuracies": 0.359375,
|
|
"rewards/chosen": 0.01114654541015625,
|
|
"rewards/margins": 0.00146484375,
|
|
"rewards/rejected": 0.0096893310546875,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.02654867256637168,
|
|
"grad_norm": 195.61749267578125,
|
|
"learning_rate": 4.1071428571428566e-07,
|
|
"logits/chosen": -1.59375,
|
|
"logits/rejected": -1.328125,
|
|
"logps/chosen": -264.0,
|
|
"logps/rejected": -329.5,
|
|
"loss": 0.676,
|
|
"rewards/accuracies": 0.4921875,
|
|
"rewards/chosen": 0.0570068359375,
|
|
"rewards/margins": -0.006103515625,
|
|
"rewards/rejected": 0.0631866455078125,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.02765486725663717,
|
|
"grad_norm": 21.722719192504883,
|
|
"learning_rate": 4.285714285714285e-07,
|
|
"logits/chosen": -1.4375,
|
|
"logits/rejected": -1.546875,
|
|
"logps/chosen": -259.0,
|
|
"logps/rejected": -269.0,
|
|
"loss": 0.6887,
|
|
"rewards/accuracies": 0.453125,
|
|
"rewards/chosen": 0.0538330078125,
|
|
"rewards/margins": 0.0157470703125,
|
|
"rewards/rejected": 0.03802490234375,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.028761061946902654,
|
|
"grad_norm": 22.364490509033203,
|
|
"learning_rate": 4.464285714285714e-07,
|
|
"logits/chosen": -1.41015625,
|
|
"logits/rejected": -1.36328125,
|
|
"logps/chosen": -296.0,
|
|
"logps/rejected": -305.0,
|
|
"loss": 0.6882,
|
|
"rewards/accuracies": 0.4375,
|
|
"rewards/chosen": 0.0601806640625,
|
|
"rewards/margins": 0.01422119140625,
|
|
"rewards/rejected": 0.0460205078125,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.029867256637168143,
|
|
"grad_norm": 20.38817024230957,
|
|
"learning_rate": 4.6428571428571427e-07,
|
|
"logits/chosen": -1.44140625,
|
|
"logits/rejected": -1.390625,
|
|
"logps/chosen": -280.0,
|
|
"logps/rejected": -265.0,
|
|
"loss": 0.6743,
|
|
"rewards/accuracies": 0.4609375,
|
|
"rewards/chosen": 0.0726318359375,
|
|
"rewards/margins": 0.0447998046875,
|
|
"rewards/rejected": 0.02783203125,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.030973451327433628,
|
|
"grad_norm": 21.340524673461914,
|
|
"learning_rate": 4.821428571428571e-07,
|
|
"logits/chosen": -1.4609375,
|
|
"logits/rejected": -1.49609375,
|
|
"logps/chosen": -263.0,
|
|
"logps/rejected": -233.5,
|
|
"loss": 0.6704,
|
|
"rewards/accuracies": 0.4921875,
|
|
"rewards/chosen": 0.0859375,
|
|
"rewards/margins": 0.05419921875,
|
|
"rewards/rejected": 0.03167724609375,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.032079646017699116,
|
|
"grad_norm": 22.794097900390625,
|
|
"learning_rate": 5e-07,
|
|
"logits/chosen": -1.47265625,
|
|
"logits/rejected": -1.52734375,
|
|
"logps/chosen": -251.5,
|
|
"logps/rejected": -277.0,
|
|
"loss": 0.6665,
|
|
"rewards/accuracies": 0.5,
|
|
"rewards/chosen": 0.100341796875,
|
|
"rewards/margins": 0.0615234375,
|
|
"rewards/rejected": 0.03863525390625,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.033185840707964605,
|
|
"grad_norm": 21.916282653808594,
|
|
"learning_rate": 4.999983923145526e-07,
|
|
"logits/chosen": -1.45703125,
|
|
"logits/rejected": -1.44140625,
|
|
"logps/chosen": -268.0,
|
|
"logps/rejected": -271.0,
|
|
"loss": 0.6672,
|
|
"rewards/accuracies": 0.5078125,
|
|
"rewards/chosen": 0.087158203125,
|
|
"rewards/margins": 0.052978515625,
|
|
"rewards/rejected": 0.03411865234375,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.034292035398230086,
|
|
"grad_norm": 20.50246810913086,
|
|
"learning_rate": 4.999935692788877e-07,
|
|
"logits/chosen": -1.44140625,
|
|
"logits/rejected": -1.42578125,
|
|
"logps/chosen": -263.0,
|
|
"logps/rejected": -280.0,
|
|
"loss": 0.6626,
|
|
"rewards/accuracies": 0.4921875,
|
|
"rewards/chosen": 0.1103515625,
|
|
"rewards/margins": 0.071533203125,
|
|
"rewards/rejected": 0.03887939453125,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.035398230088495575,
|
|
"grad_norm": 21.142545700073242,
|
|
"learning_rate": 4.999855309550366e-07,
|
|
"logits/chosen": -1.54296875,
|
|
"logits/rejected": -1.5859375,
|
|
"logps/chosen": -291.0,
|
|
"logps/rejected": -268.0,
|
|
"loss": 0.6704,
|
|
"rewards/accuracies": 0.5390625,
|
|
"rewards/chosen": 0.091552734375,
|
|
"rewards/margins": 0.0552978515625,
|
|
"rewards/rejected": 0.03607177734375,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.03650442477876106,
|
|
"grad_norm": 20.50800895690918,
|
|
"learning_rate": 4.999742774463842e-07,
|
|
"logits/chosen": -1.4375,
|
|
"logits/rejected": -1.40234375,
|
|
"logps/chosen": -256.5,
|
|
"logps/rejected": -270.0,
|
|
"loss": 0.6494,
|
|
"rewards/accuracies": 0.6171875,
|
|
"rewards/chosen": 0.1484375,
|
|
"rewards/margins": 0.092041015625,
|
|
"rewards/rejected": 0.0565185546875,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.03761061946902655,
|
|
"grad_norm": 19.532590866088867,
|
|
"learning_rate": 4.999598088976672e-07,
|
|
"logits/chosen": -1.49609375,
|
|
"logits/rejected": -1.4765625,
|
|
"logps/chosen": -250.0,
|
|
"logps/rejected": -260.0,
|
|
"loss": 0.6445,
|
|
"rewards/accuracies": 0.59375,
|
|
"rewards/chosen": 0.177734375,
|
|
"rewards/margins": 0.105224609375,
|
|
"rewards/rejected": 0.072509765625,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.03871681415929203,
|
|
"grad_norm": 20.883621215820312,
|
|
"learning_rate": 4.999421254949727e-07,
|
|
"logits/chosen": -1.5390625,
|
|
"logits/rejected": -1.4140625,
|
|
"logps/chosen": -271.0,
|
|
"logps/rejected": -270.0,
|
|
"loss": 0.6501,
|
|
"rewards/accuracies": 0.5234375,
|
|
"rewards/chosen": 0.16943359375,
|
|
"rewards/margins": 0.103271484375,
|
|
"rewards/rejected": 0.0655517578125,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.03982300884955752,
|
|
"grad_norm": 20.3232479095459,
|
|
"learning_rate": 4.999212274657353e-07,
|
|
"logits/chosen": -1.51953125,
|
|
"logits/rejected": -1.46484375,
|
|
"logps/chosen": -257.0,
|
|
"logps/rejected": -255.5,
|
|
"loss": 0.6428,
|
|
"rewards/accuracies": 0.6015625,
|
|
"rewards/chosen": 0.2080078125,
|
|
"rewards/margins": 0.114013671875,
|
|
"rewards/rejected": 0.09375,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.04092920353982301,
|
|
"grad_norm": 21.007495880126953,
|
|
"learning_rate": 4.99897115078735e-07,
|
|
"logits/chosen": -1.4609375,
|
|
"logits/rejected": -1.58984375,
|
|
"logps/chosen": -259.5,
|
|
"logps/rejected": -253.0,
|
|
"loss": 0.636,
|
|
"rewards/accuracies": 0.5859375,
|
|
"rewards/chosen": 0.24609375,
|
|
"rewards/margins": 0.13525390625,
|
|
"rewards/rejected": 0.110595703125,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.0420353982300885,
|
|
"grad_norm": 18.463993072509766,
|
|
"learning_rate": 4.998697886440926e-07,
|
|
"logits/chosen": -1.5078125,
|
|
"logits/rejected": -1.4375,
|
|
"logps/chosen": -242.0,
|
|
"logps/rejected": -246.0,
|
|
"loss": 0.6384,
|
|
"rewards/accuracies": 0.5234375,
|
|
"rewards/chosen": 0.23681640625,
|
|
"rewards/margins": 0.130859375,
|
|
"rewards/rejected": 0.106201171875,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.04314159292035398,
|
|
"grad_norm": 20.67741584777832,
|
|
"learning_rate": 4.998392485132666e-07,
|
|
"logits/chosen": -1.49609375,
|
|
"logits/rejected": -1.375,
|
|
"logps/chosen": -267.0,
|
|
"logps/rejected": -275.0,
|
|
"loss": 0.6331,
|
|
"rewards/accuracies": 0.5546875,
|
|
"rewards/chosen": 0.275390625,
|
|
"rewards/margins": 0.14990234375,
|
|
"rewards/rejected": 0.12548828125,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.04424778761061947,
|
|
"grad_norm": 20.392776489257812,
|
|
"learning_rate": 4.998054950790485e-07,
|
|
"logits/chosen": -1.359375,
|
|
"logits/rejected": -1.4609375,
|
|
"logps/chosen": -275.0,
|
|
"logps/rejected": -285.0,
|
|
"loss": 0.6218,
|
|
"rewards/accuracies": 0.59375,
|
|
"rewards/chosen": 0.2841796875,
|
|
"rewards/margins": 0.1650390625,
|
|
"rewards/rejected": 0.11865234375,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.04535398230088496,
|
|
"grad_norm": 19.42493438720703,
|
|
"learning_rate": 4.997685287755575e-07,
|
|
"logits/chosen": -1.515625,
|
|
"logits/rejected": -1.3828125,
|
|
"logps/chosen": -271.0,
|
|
"logps/rejected": -262.5,
|
|
"loss": 0.6274,
|
|
"rewards/accuracies": 0.5703125,
|
|
"rewards/chosen": 0.2744140625,
|
|
"rewards/margins": 0.15234375,
|
|
"rewards/rejected": 0.12158203125,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.046460176991150445,
|
|
"grad_norm": 19.07245635986328,
|
|
"learning_rate": 4.99728350078235e-07,
|
|
"logits/chosen": -1.53515625,
|
|
"logits/rejected": -1.4453125,
|
|
"logps/chosen": -274.0,
|
|
"logps/rejected": -251.5,
|
|
"loss": 0.6108,
|
|
"rewards/accuracies": 0.5859375,
|
|
"rewards/chosen": 0.3173828125,
|
|
"rewards/margins": 0.18896484375,
|
|
"rewards/rejected": 0.127685546875,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.04756637168141593,
|
|
"grad_norm": 19.7177677154541,
|
|
"learning_rate": 4.996849595038388e-07,
|
|
"logits/chosen": -1.515625,
|
|
"logits/rejected": -1.49609375,
|
|
"logps/chosen": -273.5,
|
|
"logps/rejected": -281.0,
|
|
"loss": 0.6208,
|
|
"rewards/accuracies": 0.578125,
|
|
"rewards/chosen": 0.3310546875,
|
|
"rewards/margins": 0.17529296875,
|
|
"rewards/rejected": 0.15625,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.048672566371681415,
|
|
"grad_norm": 19.820003509521484,
|
|
"learning_rate": 4.996383576104361e-07,
|
|
"logits/chosen": -1.5234375,
|
|
"logits/rejected": -1.421875,
|
|
"logps/chosen": -261.0,
|
|
"logps/rejected": -263.5,
|
|
"loss": 0.6196,
|
|
"rewards/accuracies": 0.5859375,
|
|
"rewards/chosen": 0.330078125,
|
|
"rewards/margins": 0.1796875,
|
|
"rewards/rejected": 0.15087890625,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.049778761061946904,
|
|
"grad_norm": 20.092729568481445,
|
|
"learning_rate": 4.995885449973962e-07,
|
|
"logits/chosen": -1.36328125,
|
|
"logits/rejected": -1.39453125,
|
|
"logps/chosen": -293.0,
|
|
"logps/rejected": -295.0,
|
|
"loss": 0.6111,
|
|
"rewards/accuracies": 0.5859375,
|
|
"rewards/chosen": 0.3408203125,
|
|
"rewards/margins": 0.20068359375,
|
|
"rewards/rejected": 0.140380859375,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.05088495575221239,
|
|
"grad_norm": 18.567899703979492,
|
|
"learning_rate": 4.995355223053834e-07,
|
|
"logits/chosen": -1.5,
|
|
"logits/rejected": -1.44921875,
|
|
"logps/chosen": -260.5,
|
|
"logps/rejected": -255.5,
|
|
"loss": 0.6146,
|
|
"rewards/accuracies": 0.59375,
|
|
"rewards/chosen": 0.3359375,
|
|
"rewards/margins": 0.1982421875,
|
|
"rewards/rejected": 0.13720703125,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.051991150442477874,
|
|
"grad_norm": 20.356060028076172,
|
|
"learning_rate": 4.994792902163481e-07,
|
|
"logits/chosen": -1.45703125,
|
|
"logits/rejected": -1.29296875,
|
|
"logps/chosen": -280.0,
|
|
"logps/rejected": -260.0,
|
|
"loss": 0.627,
|
|
"rewards/accuracies": 0.5859375,
|
|
"rewards/chosen": 0.357421875,
|
|
"rewards/margins": 0.1787109375,
|
|
"rewards/rejected": 0.1787109375,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.05309734513274336,
|
|
"grad_norm": 20.717748641967773,
|
|
"learning_rate": 4.994198494535182e-07,
|
|
"logits/chosen": -1.4765625,
|
|
"logits/rejected": -1.41796875,
|
|
"logps/chosen": -280.0,
|
|
"logps/rejected": -281.0,
|
|
"loss": 0.5881,
|
|
"rewards/accuracies": 0.6171875,
|
|
"rewards/chosen": 0.4091796875,
|
|
"rewards/margins": 0.25390625,
|
|
"rewards/rejected": 0.1552734375,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.05420353982300885,
|
|
"grad_norm": 19.05792236328125,
|
|
"learning_rate": 4.993572007813904e-07,
|
|
"logits/chosen": -1.390625,
|
|
"logits/rejected": -1.35546875,
|
|
"logps/chosen": -251.5,
|
|
"logps/rejected": -277.0,
|
|
"loss": 0.5889,
|
|
"rewards/accuracies": 0.6015625,
|
|
"rewards/chosen": 0.41015625,
|
|
"rewards/margins": 0.263671875,
|
|
"rewards/rejected": 0.14599609375,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.05530973451327434,
|
|
"grad_norm": 17.29762840270996,
|
|
"learning_rate": 4.992913450057195e-07,
|
|
"logits/chosen": -1.41796875,
|
|
"logits/rejected": -1.35546875,
|
|
"logps/chosen": -237.0,
|
|
"logps/rejected": -224.5,
|
|
"loss": 0.5867,
|
|
"rewards/accuracies": 0.6015625,
|
|
"rewards/chosen": 0.4501953125,
|
|
"rewards/margins": 0.26953125,
|
|
"rewards/rejected": 0.1796875,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.05530973451327434,
|
|
"eval_logits/chosen": -1.4197372198104858,
|
|
"eval_logits/rejected": -1.4136348962783813,
|
|
"eval_logps/chosen": -255.96517944335938,
|
|
"eval_logps/rejected": -257.37811279296875,
|
|
"eval_loss": 0.5778365731239319,
|
|
"eval_rewards/accuracies": 0.6217424273490906,
|
|
"eval_rewards/chosen": 0.4856770932674408,
|
|
"eval_rewards/margins": 0.2985657751560211,
|
|
"eval_rewards/rejected": 0.1872473508119583,
|
|
"eval_runtime": 210.1095,
|
|
"eval_samples_per_second": 61.173,
|
|
"eval_steps_per_second": 0.957,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.05641592920353982,
|
|
"grad_norm": 18.575069427490234,
|
|
"learning_rate": 4.992222829735082e-07,
|
|
"logits/chosen": -1.5078125,
|
|
"logits/rejected": -1.421875,
|
|
"logps/chosen": -260.0,
|
|
"logps/rejected": -259.0,
|
|
"loss": 0.5874,
|
|
"rewards/accuracies": 0.609375,
|
|
"rewards/chosen": 0.48046875,
|
|
"rewards/margins": 0.27734375,
|
|
"rewards/rejected": 0.203125,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.05752212389380531,
|
|
"grad_norm": 17.976177215576172,
|
|
"learning_rate": 4.991500155729971e-07,
|
|
"logits/chosen": -1.42578125,
|
|
"logits/rejected": -1.43359375,
|
|
"logps/chosen": -252.5,
|
|
"logps/rejected": -256.0,
|
|
"loss": 0.575,
|
|
"rewards/accuracies": 0.6640625,
|
|
"rewards/chosen": 0.50390625,
|
|
"rewards/margins": 0.30078125,
|
|
"rewards/rejected": 0.20458984375,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.0586283185840708,
|
|
"grad_norm": 17.979496002197266,
|
|
"learning_rate": 4.99074543733652e-07,
|
|
"logits/chosen": -1.4453125,
|
|
"logits/rejected": -1.46875,
|
|
"logps/chosen": -265.0,
|
|
"logps/rejected": -267.0,
|
|
"loss": 0.5444,
|
|
"rewards/accuracies": 0.7109375,
|
|
"rewards/chosen": 0.55859375,
|
|
"rewards/margins": 0.3984375,
|
|
"rewards/rejected": 0.1611328125,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.059734513274336286,
|
|
"grad_norm": 17.736249923706055,
|
|
"learning_rate": 4.989958684261526e-07,
|
|
"logits/chosen": -1.31640625,
|
|
"logits/rejected": -1.375,
|
|
"logps/chosen": -255.5,
|
|
"logps/rejected": -292.0,
|
|
"loss": 0.5529,
|
|
"rewards/accuracies": 0.59375,
|
|
"rewards/chosen": 0.568359375,
|
|
"rewards/margins": 0.39453125,
|
|
"rewards/rejected": 0.17333984375,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.06084070796460177,
|
|
"grad_norm": 17.284420013427734,
|
|
"learning_rate": 4.989139906623802e-07,
|
|
"logits/chosen": -1.44140625,
|
|
"logits/rejected": -1.42578125,
|
|
"logps/chosen": -253.0,
|
|
"logps/rejected": -256.5,
|
|
"loss": 0.5522,
|
|
"rewards/accuracies": 0.6953125,
|
|
"rewards/chosen": 0.599609375,
|
|
"rewards/margins": 0.373046875,
|
|
"rewards/rejected": 0.2255859375,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.061946902654867256,
|
|
"grad_norm": 17.704713821411133,
|
|
"learning_rate": 4.988289114954044e-07,
|
|
"logits/chosen": -1.3671875,
|
|
"logits/rejected": -1.35546875,
|
|
"logps/chosen": -237.5,
|
|
"logps/rejected": -260.0,
|
|
"loss": 0.5504,
|
|
"rewards/accuracies": 0.609375,
|
|
"rewards/chosen": 0.654296875,
|
|
"rewards/margins": 0.3896484375,
|
|
"rewards/rejected": 0.263671875,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.06305309734513274,
|
|
"grad_norm": 17.688888549804688,
|
|
"learning_rate": 4.987406320194694e-07,
|
|
"logits/chosen": -1.453125,
|
|
"logits/rejected": -1.3359375,
|
|
"logps/chosen": -242.5,
|
|
"logps/rejected": -247.5,
|
|
"loss": 0.5537,
|
|
"rewards/accuracies": 0.65625,
|
|
"rewards/chosen": 0.69140625,
|
|
"rewards/margins": 0.3857421875,
|
|
"rewards/rejected": 0.3046875,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.06415929203539823,
|
|
"grad_norm": 17.646419525146484,
|
|
"learning_rate": 4.986491533699802e-07,
|
|
"logits/chosen": -1.41015625,
|
|
"logits/rejected": -1.37109375,
|
|
"logps/chosen": -256.5,
|
|
"logps/rejected": -281.0,
|
|
"loss": 0.5431,
|
|
"rewards/accuracies": 0.59375,
|
|
"rewards/chosen": 0.71484375,
|
|
"rewards/margins": 0.439453125,
|
|
"rewards/rejected": 0.27490234375,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.06526548672566372,
|
|
"grad_norm": 17.19894027709961,
|
|
"learning_rate": 4.985544767234879e-07,
|
|
"logits/chosen": -1.41796875,
|
|
"logits/rejected": -1.4453125,
|
|
"logps/chosen": -245.5,
|
|
"logps/rejected": -250.5,
|
|
"loss": 0.5403,
|
|
"rewards/accuracies": 0.609375,
|
|
"rewards/chosen": 0.7421875,
|
|
"rewards/margins": 0.4541015625,
|
|
"rewards/rejected": 0.287109375,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.06637168141592921,
|
|
"grad_norm": 16.630441665649414,
|
|
"learning_rate": 4.984566032976749e-07,
|
|
"logits/chosen": -1.390625,
|
|
"logits/rejected": -1.33984375,
|
|
"logps/chosen": -248.0,
|
|
"logps/rejected": -254.5,
|
|
"loss": 0.5386,
|
|
"rewards/accuracies": 0.6640625,
|
|
"rewards/chosen": 0.828125,
|
|
"rewards/margins": 0.466796875,
|
|
"rewards/rejected": 0.361328125,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.06747787610619468,
|
|
"grad_norm": 17.252979278564453,
|
|
"learning_rate": 4.983555343513384e-07,
|
|
"logits/chosen": -1.41796875,
|
|
"logits/rejected": -1.4375,
|
|
"logps/chosen": -250.5,
|
|
"logps/rejected": -275.5,
|
|
"loss": 0.5028,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": 0.8515625,
|
|
"rewards/margins": 0.578125,
|
|
"rewards/rejected": 0.271484375,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.06858407079646017,
|
|
"grad_norm": 17.014602661132812,
|
|
"learning_rate": 4.982512711843752e-07,
|
|
"logits/chosen": -1.32421875,
|
|
"logits/rejected": -1.30078125,
|
|
"logps/chosen": -242.0,
|
|
"logps/rejected": -243.5,
|
|
"loss": 0.5159,
|
|
"rewards/accuracies": 0.671875,
|
|
"rewards/chosen": 0.94140625,
|
|
"rewards/margins": 0.5458984375,
|
|
"rewards/rejected": 0.39453125,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.06969026548672566,
|
|
"grad_norm": 16.877351760864258,
|
|
"learning_rate": 4.98143815137764e-07,
|
|
"logits/chosen": -1.40234375,
|
|
"logits/rejected": -1.36328125,
|
|
"logps/chosen": -264.0,
|
|
"logps/rejected": -285.0,
|
|
"loss": 0.5343,
|
|
"rewards/accuracies": 0.6328125,
|
|
"rewards/chosen": 0.857421875,
|
|
"rewards/margins": 0.4853515625,
|
|
"rewards/rejected": 0.373046875,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.07079646017699115,
|
|
"grad_norm": 17.047161102294922,
|
|
"learning_rate": 4.980331675935493e-07,
|
|
"logits/chosen": -1.33203125,
|
|
"logits/rejected": -1.34375,
|
|
"logps/chosen": -254.5,
|
|
"logps/rejected": -291.0,
|
|
"loss": 0.5211,
|
|
"rewards/accuracies": 0.6484375,
|
|
"rewards/chosen": 0.962890625,
|
|
"rewards/margins": 0.533203125,
|
|
"rewards/rejected": 0.4296875,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.07190265486725664,
|
|
"grad_norm": 17.61670684814453,
|
|
"learning_rate": 4.979193299748224e-07,
|
|
"logits/chosen": -1.2890625,
|
|
"logits/rejected": -1.3828125,
|
|
"logps/chosen": -265.0,
|
|
"logps/rejected": -278.0,
|
|
"loss": 0.4878,
|
|
"rewards/accuracies": 0.7109375,
|
|
"rewards/chosen": 0.96484375,
|
|
"rewards/margins": 0.6640625,
|
|
"rewards/rejected": 0.2998046875,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.07300884955752213,
|
|
"grad_norm": 17.460668563842773,
|
|
"learning_rate": 4.978023037457043e-07,
|
|
"logits/chosen": -1.44921875,
|
|
"logits/rejected": -1.3515625,
|
|
"logps/chosen": -267.0,
|
|
"logps/rejected": -279.0,
|
|
"loss": 0.536,
|
|
"rewards/accuracies": 0.5703125,
|
|
"rewards/chosen": 0.95703125,
|
|
"rewards/margins": 0.5390625,
|
|
"rewards/rejected": 0.416015625,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.07411504424778761,
|
|
"grad_norm": 181.168212890625,
|
|
"learning_rate": 4.976820904113256e-07,
|
|
"logits/chosen": -1.36328125,
|
|
"logits/rejected": -1.30859375,
|
|
"logps/chosen": -233.5,
|
|
"logps/rejected": -340.0,
|
|
"loss": 0.489,
|
|
"rewards/accuracies": 0.734375,
|
|
"rewards/chosen": 1.05859375,
|
|
"rewards/margins": 0.646484375,
|
|
"rewards/rejected": 0.41015625,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.0752212389380531,
|
|
"grad_norm": 15.64547061920166,
|
|
"learning_rate": 4.975586915178084e-07,
|
|
"logits/chosen": -1.3671875,
|
|
"logits/rejected": -1.40234375,
|
|
"logps/chosen": -241.5,
|
|
"logps/rejected": -256.0,
|
|
"loss": 0.4702,
|
|
"rewards/accuracies": 0.6953125,
|
|
"rewards/chosen": 1.041015625,
|
|
"rewards/margins": 0.716796875,
|
|
"rewards/rejected": 0.32421875,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.07632743362831858,
|
|
"grad_norm": 16.069597244262695,
|
|
"learning_rate": 4.974321086522452e-07,
|
|
"logits/chosen": -1.3046875,
|
|
"logits/rejected": -1.234375,
|
|
"logps/chosen": -256.5,
|
|
"logps/rejected": -251.5,
|
|
"loss": 0.5188,
|
|
"rewards/accuracies": 0.6328125,
|
|
"rewards/chosen": 0.986328125,
|
|
"rewards/margins": 0.568359375,
|
|
"rewards/rejected": 0.4208984375,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.07743362831858407,
|
|
"grad_norm": 149.1916046142578,
|
|
"learning_rate": 4.973023434426798e-07,
|
|
"logits/chosen": -1.4140625,
|
|
"logits/rejected": -1.421875,
|
|
"logps/chosen": -248.5,
|
|
"logps/rejected": -247.0,
|
|
"loss": 0.5642,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": 0.96875,
|
|
"rewards/margins": 0.5107421875,
|
|
"rewards/rejected": 0.458984375,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.07853982300884955,
|
|
"grad_norm": 16.17060089111328,
|
|
"learning_rate": 4.971693975580851e-07,
|
|
"logits/chosen": -1.390625,
|
|
"logits/rejected": -1.28515625,
|
|
"logps/chosen": -232.0,
|
|
"logps/rejected": -241.0,
|
|
"loss": 0.5079,
|
|
"rewards/accuracies": 0.6328125,
|
|
"rewards/chosen": 1.025390625,
|
|
"rewards/margins": 0.61328125,
|
|
"rewards/rejected": 0.4140625,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.07964601769911504,
|
|
"grad_norm": 17.631471633911133,
|
|
"learning_rate": 4.970332727083425e-07,
|
|
"logits/chosen": -1.36328125,
|
|
"logits/rejected": -1.35546875,
|
|
"logps/chosen": -271.5,
|
|
"logps/rejected": -283.0,
|
|
"loss": 0.5212,
|
|
"rewards/accuracies": 0.625,
|
|
"rewards/chosen": 1.1328125,
|
|
"rewards/margins": 0.619140625,
|
|
"rewards/rejected": 0.5166015625,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.08075221238938053,
|
|
"grad_norm": 17.61063003540039,
|
|
"learning_rate": 4.968939706442195e-07,
|
|
"logits/chosen": -1.39453125,
|
|
"logits/rejected": -1.203125,
|
|
"logps/chosen": -275.0,
|
|
"logps/rejected": -255.5,
|
|
"loss": 0.5211,
|
|
"rewards/accuracies": 0.640625,
|
|
"rewards/chosen": 0.99609375,
|
|
"rewards/margins": 0.583984375,
|
|
"rewards/rejected": 0.4130859375,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.08185840707964602,
|
|
"grad_norm": 16.28321075439453,
|
|
"learning_rate": 4.967514931573472e-07,
|
|
"logits/chosen": -1.26953125,
|
|
"logits/rejected": -1.41015625,
|
|
"logps/chosen": -243.0,
|
|
"logps/rejected": -258.0,
|
|
"loss": 0.4977,
|
|
"rewards/accuracies": 0.6640625,
|
|
"rewards/chosen": 1.20703125,
|
|
"rewards/margins": 0.689453125,
|
|
"rewards/rejected": 0.5146484375,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.08296460176991151,
|
|
"grad_norm": 16.079599380493164,
|
|
"learning_rate": 4.966058420801977e-07,
|
|
"logits/chosen": -1.34765625,
|
|
"logits/rejected": -1.35546875,
|
|
"logps/chosen": -259.5,
|
|
"logps/rejected": -244.0,
|
|
"loss": 0.4648,
|
|
"rewards/accuracies": 0.6796875,
|
|
"rewards/chosen": 1.13671875,
|
|
"rewards/margins": 0.779296875,
|
|
"rewards/rejected": 0.3583984375,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.084070796460177,
|
|
"grad_norm": 18.725271224975586,
|
|
"learning_rate": 4.964570192860596e-07,
|
|
"logits/chosen": -1.359375,
|
|
"logits/rejected": -1.36328125,
|
|
"logps/chosen": -287.0,
|
|
"logps/rejected": -250.5,
|
|
"loss": 0.5436,
|
|
"rewards/accuracies": 0.5859375,
|
|
"rewards/chosen": 1.109375,
|
|
"rewards/margins": 0.5234375,
|
|
"rewards/rejected": 0.583984375,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.08517699115044247,
|
|
"grad_norm": 15.678024291992188,
|
|
"learning_rate": 4.963050266890152e-07,
|
|
"logits/chosen": -1.328125,
|
|
"logits/rejected": -1.4609375,
|
|
"logps/chosen": -253.0,
|
|
"logps/rejected": -246.0,
|
|
"loss": 0.4833,
|
|
"rewards/accuracies": 0.703125,
|
|
"rewards/chosen": 1.15625,
|
|
"rewards/margins": 0.767578125,
|
|
"rewards/rejected": 0.390625,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.08628318584070796,
|
|
"grad_norm": 15.54704761505127,
|
|
"learning_rate": 4.961498662439145e-07,
|
|
"logits/chosen": -1.32421875,
|
|
"logits/rejected": -1.359375,
|
|
"logps/chosen": -230.5,
|
|
"logps/rejected": -249.0,
|
|
"loss": 0.4718,
|
|
"rewards/accuracies": 0.7109375,
|
|
"rewards/chosen": 1.3046875,
|
|
"rewards/margins": 0.82421875,
|
|
"rewards/rejected": 0.48046875,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.08738938053097345,
|
|
"grad_norm": 16.073156356811523,
|
|
"learning_rate": 4.959915399463512e-07,
|
|
"logits/chosen": -1.2890625,
|
|
"logits/rejected": -1.27734375,
|
|
"logps/chosen": -246.0,
|
|
"logps/rejected": -259.5,
|
|
"loss": 0.4602,
|
|
"rewards/accuracies": 0.671875,
|
|
"rewards/chosen": 1.27734375,
|
|
"rewards/margins": 0.85546875,
|
|
"rewards/rejected": 0.423828125,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.08849557522123894,
|
|
"grad_norm": 14.94100284576416,
|
|
"learning_rate": 4.958300498326362e-07,
|
|
"logits/chosen": -1.26953125,
|
|
"logits/rejected": -1.40234375,
|
|
"logps/chosen": -231.0,
|
|
"logps/rejected": -264.5,
|
|
"loss": 0.4397,
|
|
"rewards/accuracies": 0.7421875,
|
|
"rewards/chosen": 1.30078125,
|
|
"rewards/margins": 0.953125,
|
|
"rewards/rejected": 0.345703125,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.08960176991150443,
|
|
"grad_norm": 17.28353500366211,
|
|
"learning_rate": 4.956653979797721e-07,
|
|
"logits/chosen": -1.3671875,
|
|
"logits/rejected": -1.3828125,
|
|
"logps/chosen": -280.0,
|
|
"logps/rejected": -259.5,
|
|
"loss": 0.5153,
|
|
"rewards/accuracies": 0.609375,
|
|
"rewards/chosen": 1.18359375,
|
|
"rewards/margins": 0.71875,
|
|
"rewards/rejected": 0.4658203125,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.09070796460176991,
|
|
"grad_norm": 15.823220252990723,
|
|
"learning_rate": 4.954975865054259e-07,
|
|
"logits/chosen": -1.3671875,
|
|
"logits/rejected": -1.32421875,
|
|
"logps/chosen": -255.5,
|
|
"logps/rejected": -256.5,
|
|
"loss": 0.4614,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": 1.34375,
|
|
"rewards/margins": 0.87109375,
|
|
"rewards/rejected": 0.474609375,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.0918141592920354,
|
|
"grad_norm": 14.873113632202148,
|
|
"learning_rate": 4.953266175679023e-07,
|
|
"logits/chosen": -1.2890625,
|
|
"logits/rejected": -1.3125,
|
|
"logps/chosen": -236.5,
|
|
"logps/rejected": -241.5,
|
|
"loss": 0.4586,
|
|
"rewards/accuracies": 0.703125,
|
|
"rewards/chosen": 1.33984375,
|
|
"rewards/margins": 0.869140625,
|
|
"rewards/rejected": 0.47265625,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.09292035398230089,
|
|
"grad_norm": 16.7225341796875,
|
|
"learning_rate": 4.951524933661154e-07,
|
|
"logits/chosen": -1.328125,
|
|
"logits/rejected": -1.30078125,
|
|
"logps/chosen": -256.0,
|
|
"logps/rejected": -237.0,
|
|
"loss": 0.5129,
|
|
"rewards/accuracies": 0.640625,
|
|
"rewards/chosen": 1.1953125,
|
|
"rewards/margins": 0.6953125,
|
|
"rewards/rejected": 0.5009765625,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.09402654867256637,
|
|
"grad_norm": 15.362020492553711,
|
|
"learning_rate": 4.949752161395605e-07,
|
|
"logits/chosen": -1.3046875,
|
|
"logits/rejected": -1.24609375,
|
|
"logps/chosen": -257.0,
|
|
"logps/rejected": -252.0,
|
|
"loss": 0.4339,
|
|
"rewards/accuracies": 0.703125,
|
|
"rewards/chosen": 1.2890625,
|
|
"rewards/margins": 0.990234375,
|
|
"rewards/rejected": 0.30078125,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.09513274336283185,
|
|
"grad_norm": 15.91197681427002,
|
|
"learning_rate": 4.94794788168286e-07,
|
|
"logits/chosen": -1.32421875,
|
|
"logits/rejected": -1.24609375,
|
|
"logps/chosen": -229.5,
|
|
"logps/rejected": -255.5,
|
|
"loss": 0.4675,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": 1.3203125,
|
|
"rewards/margins": 0.85546875,
|
|
"rewards/rejected": 0.4658203125,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.09623893805309734,
|
|
"grad_norm": 15.647570610046387,
|
|
"learning_rate": 4.946112117728634e-07,
|
|
"logits/chosen": -1.3671875,
|
|
"logits/rejected": -1.3359375,
|
|
"logps/chosen": -243.0,
|
|
"logps/rejected": -235.5,
|
|
"loss": 0.4574,
|
|
"rewards/accuracies": 0.671875,
|
|
"rewards/chosen": 1.28515625,
|
|
"rewards/margins": 0.865234375,
|
|
"rewards/rejected": 0.41796875,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.09734513274336283,
|
|
"grad_norm": 17.21525001525879,
|
|
"learning_rate": 4.944244893143572e-07,
|
|
"logits/chosen": -1.3515625,
|
|
"logits/rejected": -1.26953125,
|
|
"logps/chosen": -268.0,
|
|
"logps/rejected": -264.0,
|
|
"loss": 0.4832,
|
|
"rewards/accuracies": 0.671875,
|
|
"rewards/chosen": 1.28515625,
|
|
"rewards/margins": 0.826171875,
|
|
"rewards/rejected": 0.4580078125,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.09845132743362832,
|
|
"grad_norm": 16.55433464050293,
|
|
"learning_rate": 4.942346231942955e-07,
|
|
"logits/chosen": -1.3671875,
|
|
"logits/rejected": -1.3828125,
|
|
"logps/chosen": -255.0,
|
|
"logps/rejected": -258.5,
|
|
"loss": 0.4967,
|
|
"rewards/accuracies": 0.609375,
|
|
"rewards/chosen": 1.3359375,
|
|
"rewards/margins": 0.80078125,
|
|
"rewards/rejected": 0.53515625,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.09955752212389381,
|
|
"grad_norm": 15.434545516967773,
|
|
"learning_rate": 4.94041615854638e-07,
|
|
"logits/chosen": -1.3671875,
|
|
"logits/rejected": -1.28515625,
|
|
"logps/chosen": -265.0,
|
|
"logps/rejected": -262.5,
|
|
"loss": 0.4258,
|
|
"rewards/accuracies": 0.6640625,
|
|
"rewards/chosen": 1.42578125,
|
|
"rewards/margins": 1.0390625,
|
|
"rewards/rejected": 0.3876953125,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.1006637168141593,
|
|
"grad_norm": 16.600032806396484,
|
|
"learning_rate": 4.938454697777457e-07,
|
|
"logits/chosen": -1.234375,
|
|
"logits/rejected": -1.1953125,
|
|
"logps/chosen": -279.0,
|
|
"logps/rejected": -278.0,
|
|
"loss": 0.473,
|
|
"rewards/accuracies": 0.671875,
|
|
"rewards/chosen": 1.265625,
|
|
"rewards/margins": 0.8828125,
|
|
"rewards/rejected": 0.3818359375,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.10176991150442478,
|
|
"grad_norm": 17.01357078552246,
|
|
"learning_rate": 4.936461874863479e-07,
|
|
"logits/chosen": -1.3828125,
|
|
"logits/rejected": -1.3828125,
|
|
"logps/chosen": -255.5,
|
|
"logps/rejected": -286.0,
|
|
"loss": 0.495,
|
|
"rewards/accuracies": 0.640625,
|
|
"rewards/chosen": 1.3125,
|
|
"rewards/margins": 0.833984375,
|
|
"rewards/rejected": 0.4765625,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.10287610619469026,
|
|
"grad_norm": 15.263469696044922,
|
|
"learning_rate": 4.934437715435107e-07,
|
|
"logits/chosen": -1.27734375,
|
|
"logits/rejected": -1.33203125,
|
|
"logps/chosen": -244.5,
|
|
"logps/rejected": -245.0,
|
|
"loss": 0.4545,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": 1.328125,
|
|
"rewards/margins": 0.955078125,
|
|
"rewards/rejected": 0.3759765625,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.10398230088495575,
|
|
"grad_norm": 15.016999244689941,
|
|
"learning_rate": 4.932382245526034e-07,
|
|
"logits/chosen": -1.23828125,
|
|
"logits/rejected": -1.21875,
|
|
"logps/chosen": -245.0,
|
|
"logps/rejected": -256.5,
|
|
"loss": 0.4381,
|
|
"rewards/accuracies": 0.7265625,
|
|
"rewards/chosen": 1.34375,
|
|
"rewards/margins": 0.970703125,
|
|
"rewards/rejected": 0.3740234375,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.10508849557522124,
|
|
"grad_norm": 16.34784507751465,
|
|
"learning_rate": 4.930295491572653e-07,
|
|
"logits/chosen": -1.37109375,
|
|
"logits/rejected": -1.3203125,
|
|
"logps/chosen": -247.0,
|
|
"logps/rejected": -260.0,
|
|
"loss": 0.4766,
|
|
"rewards/accuracies": 0.640625,
|
|
"rewards/chosen": 1.34765625,
|
|
"rewards/margins": 0.87890625,
|
|
"rewards/rejected": 0.46875,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.10619469026548672,
|
|
"grad_norm": 15.943212509155273,
|
|
"learning_rate": 4.928177480413714e-07,
|
|
"logits/chosen": -1.3046875,
|
|
"logits/rejected": -1.28125,
|
|
"logps/chosen": -259.0,
|
|
"logps/rejected": -270.5,
|
|
"loss": 0.4727,
|
|
"rewards/accuracies": 0.6484375,
|
|
"rewards/chosen": 1.3515625,
|
|
"rewards/margins": 0.955078125,
|
|
"rewards/rejected": 0.3955078125,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.10730088495575221,
|
|
"grad_norm": 16.164276123046875,
|
|
"learning_rate": 4.926028239289984e-07,
|
|
"logits/chosen": -1.2421875,
|
|
"logits/rejected": -1.38671875,
|
|
"logps/chosen": -273.5,
|
|
"logps/rejected": -268.5,
|
|
"loss": 0.4556,
|
|
"rewards/accuracies": 0.6640625,
|
|
"rewards/chosen": 1.37890625,
|
|
"rewards/margins": 0.97265625,
|
|
"rewards/rejected": 0.4072265625,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.1084070796460177,
|
|
"grad_norm": 16.16509437561035,
|
|
"learning_rate": 4.923847795843893e-07,
|
|
"logits/chosen": -1.359375,
|
|
"logits/rejected": -1.2265625,
|
|
"logps/chosen": -270.0,
|
|
"logps/rejected": -277.0,
|
|
"loss": 0.4657,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": 1.265625,
|
|
"rewards/margins": 0.947265625,
|
|
"rewards/rejected": 0.3173828125,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.10951327433628319,
|
|
"grad_norm": 16.42505645751953,
|
|
"learning_rate": 4.921636178119177e-07,
|
|
"logits/chosen": -1.47265625,
|
|
"logits/rejected": -1.16796875,
|
|
"logps/chosen": -251.0,
|
|
"logps/rejected": -232.5,
|
|
"loss": 0.4747,
|
|
"rewards/accuracies": 0.6328125,
|
|
"rewards/chosen": 1.296875,
|
|
"rewards/margins": 0.8671875,
|
|
"rewards/rejected": 0.4287109375,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.11061946902654868,
|
|
"grad_norm": 23.872831344604492,
|
|
"learning_rate": 4.919393414560522e-07,
|
|
"logits/chosen": -1.39453125,
|
|
"logits/rejected": -1.3125,
|
|
"logps/chosen": -246.0,
|
|
"logps/rejected": -249.5,
|
|
"loss": 0.4521,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": 1.26171875,
|
|
"rewards/margins": 0.93359375,
|
|
"rewards/rejected": 0.328125,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.11061946902654868,
|
|
"eval_logits/chosen": -1.329796314239502,
|
|
"eval_logits/rejected": -1.3044931888580322,
|
|
"eval_logps/chosen": -247.93531799316406,
|
|
"eval_logps/rejected": -256.3333435058594,
|
|
"eval_loss": 0.4438014328479767,
|
|
"eval_rewards/accuracies": 0.7061508893966675,
|
|
"eval_rewards/chosen": 1.2971081733703613,
|
|
"eval_rewards/margins": 1.0102806091308594,
|
|
"eval_rewards/rejected": 0.2868901491165161,
|
|
"eval_runtime": 193.1281,
|
|
"eval_samples_per_second": 66.552,
|
|
"eval_steps_per_second": 1.041,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.11172566371681415,
|
|
"grad_norm": 13.894512176513672,
|
|
"learning_rate": 4.917119534013193e-07,
|
|
"logits/chosen": -1.26953125,
|
|
"logits/rejected": -1.234375,
|
|
"logps/chosen": -233.0,
|
|
"logps/rejected": -233.0,
|
|
"loss": 0.418,
|
|
"rewards/accuracies": 0.71875,
|
|
"rewards/chosen": 1.25390625,
|
|
"rewards/margins": 1.0546875,
|
|
"rewards/rejected": 0.1982421875,
|
|
"step": 101
|
|
},
|
|
{
|
|
"epoch": 0.11283185840707964,
|
|
"grad_norm": 15.946537017822266,
|
|
"learning_rate": 4.91481456572267e-07,
|
|
"logits/chosen": -1.359375,
|
|
"logits/rejected": -1.23828125,
|
|
"logps/chosen": -251.5,
|
|
"logps/rejected": -249.5,
|
|
"loss": 0.4642,
|
|
"rewards/accuracies": 0.6953125,
|
|
"rewards/chosen": 1.22265625,
|
|
"rewards/margins": 0.91796875,
|
|
"rewards/rejected": 0.30517578125,
|
|
"step": 102
|
|
},
|
|
{
|
|
"epoch": 0.11393805309734513,
|
|
"grad_norm": 13.790292739868164,
|
|
"learning_rate": 4.912478539334264e-07,
|
|
"logits/chosen": -1.27734375,
|
|
"logits/rejected": -1.35546875,
|
|
"logps/chosen": -223.5,
|
|
"logps/rejected": -241.0,
|
|
"loss": 0.3972,
|
|
"rewards/accuracies": 0.7421875,
|
|
"rewards/chosen": 1.40625,
|
|
"rewards/margins": 1.14453125,
|
|
"rewards/rejected": 0.25634765625,
|
|
"step": 103
|
|
},
|
|
{
|
|
"epoch": 0.11504424778761062,
|
|
"grad_norm": 14.399200439453125,
|
|
"learning_rate": 4.910111484892739e-07,
|
|
"logits/chosen": -1.296875,
|
|
"logits/rejected": -1.26171875,
|
|
"logps/chosen": -240.5,
|
|
"logps/rejected": -260.5,
|
|
"loss": 0.3929,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 1.36328125,
|
|
"rewards/margins": 1.23046875,
|
|
"rewards/rejected": 0.1336669921875,
|
|
"step": 104
|
|
},
|
|
{
|
|
"epoch": 0.1161504424778761,
|
|
"grad_norm": 16.46123504638672,
|
|
"learning_rate": 4.907713432841928e-07,
|
|
"logits/chosen": -1.24609375,
|
|
"logits/rejected": -1.14453125,
|
|
"logps/chosen": -255.0,
|
|
"logps/rejected": -229.0,
|
|
"loss": 0.5001,
|
|
"rewards/accuracies": 0.6171875,
|
|
"rewards/chosen": 1.0625,
|
|
"rewards/margins": 0.828125,
|
|
"rewards/rejected": 0.23486328125,
|
|
"step": 105
|
|
},
|
|
{
|
|
"epoch": 0.1172566371681416,
|
|
"grad_norm": 16.047285079956055,
|
|
"learning_rate": 4.905284414024337e-07,
|
|
"logits/chosen": -1.23828125,
|
|
"logits/rejected": -1.40234375,
|
|
"logps/chosen": -242.5,
|
|
"logps/rejected": -284.0,
|
|
"loss": 0.4525,
|
|
"rewards/accuracies": 0.7109375,
|
|
"rewards/chosen": 1.22265625,
|
|
"rewards/margins": 0.986328125,
|
|
"rewards/rejected": 0.23388671875,
|
|
"step": 106
|
|
},
|
|
{
|
|
"epoch": 0.11836283185840708,
|
|
"grad_norm": 16.925933837890625,
|
|
"learning_rate": 4.902824459680752e-07,
|
|
"logits/chosen": -1.2578125,
|
|
"logits/rejected": -1.3125,
|
|
"logps/chosen": -265.0,
|
|
"logps/rejected": -266.0,
|
|
"loss": 0.46,
|
|
"rewards/accuracies": 0.7421875,
|
|
"rewards/chosen": 1.19140625,
|
|
"rewards/margins": 0.978515625,
|
|
"rewards/rejected": 0.209228515625,
|
|
"step": 107
|
|
},
|
|
{
|
|
"epoch": 0.11946902654867257,
|
|
"grad_norm": 15.083377838134766,
|
|
"learning_rate": 4.900333601449835e-07,
|
|
"logits/chosen": -1.265625,
|
|
"logits/rejected": -1.33203125,
|
|
"logps/chosen": -266.0,
|
|
"logps/rejected": -265.0,
|
|
"loss": 0.4376,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": 1.21484375,
|
|
"rewards/margins": 1.05078125,
|
|
"rewards/rejected": 0.162109375,
|
|
"step": 108
|
|
},
|
|
{
|
|
"epoch": 0.12057522123893805,
|
|
"grad_norm": 17.161651611328125,
|
|
"learning_rate": 4.89781187136772e-07,
|
|
"logits/chosen": -1.25,
|
|
"logits/rejected": -1.27734375,
|
|
"logps/chosen": -254.0,
|
|
"logps/rejected": -276.0,
|
|
"loss": 0.4388,
|
|
"rewards/accuracies": 0.71875,
|
|
"rewards/chosen": 1.26171875,
|
|
"rewards/margins": 1.09375,
|
|
"rewards/rejected": 0.16796875,
|
|
"step": 109
|
|
},
|
|
{
|
|
"epoch": 0.12168141592920353,
|
|
"grad_norm": 15.041230201721191,
|
|
"learning_rate": 4.895259301867595e-07,
|
|
"logits/chosen": -1.23828125,
|
|
"logits/rejected": -1.328125,
|
|
"logps/chosen": -246.0,
|
|
"logps/rejected": -280.0,
|
|
"loss": 0.4269,
|
|
"rewards/accuracies": 0.7109375,
|
|
"rewards/chosen": 1.3046875,
|
|
"rewards/margins": 1.1171875,
|
|
"rewards/rejected": 0.18603515625,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 0.12278761061946902,
|
|
"grad_norm": 13.183340072631836,
|
|
"learning_rate": 4.892675925779292e-07,
|
|
"logits/chosen": -1.3359375,
|
|
"logits/rejected": -1.36328125,
|
|
"logps/chosen": -207.5,
|
|
"logps/rejected": -250.0,
|
|
"loss": 0.4122,
|
|
"rewards/accuracies": 0.734375,
|
|
"rewards/chosen": 1.328125,
|
|
"rewards/margins": 1.26171875,
|
|
"rewards/rejected": 0.0699310302734375,
|
|
"step": 111
|
|
},
|
|
{
|
|
"epoch": 0.12389380530973451,
|
|
"grad_norm": 15.673736572265625,
|
|
"learning_rate": 4.89006177632886e-07,
|
|
"logits/chosen": -1.375,
|
|
"logits/rejected": -1.37109375,
|
|
"logps/chosen": -263.0,
|
|
"logps/rejected": -272.0,
|
|
"loss": 0.4412,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 1.24609375,
|
|
"rewards/margins": 1.08203125,
|
|
"rewards/rejected": 0.1640625,
|
|
"step": 112
|
|
},
|
|
{
|
|
"epoch": 0.125,
|
|
"grad_norm": 14.536067008972168,
|
|
"learning_rate": 4.887416887138138e-07,
|
|
"logits/chosen": -1.1875,
|
|
"logits/rejected": -1.15625,
|
|
"logps/chosen": -257.5,
|
|
"logps/rejected": -270.0,
|
|
"loss": 0.4604,
|
|
"rewards/accuracies": 0.671875,
|
|
"rewards/chosen": 1.1953125,
|
|
"rewards/margins": 1.09375,
|
|
"rewards/rejected": 0.10595703125,
|
|
"step": 113
|
|
},
|
|
{
|
|
"epoch": 0.1261061946902655,
|
|
"grad_norm": 14.573882102966309,
|
|
"learning_rate": 4.884741292224326e-07,
|
|
"logits/chosen": -1.296875,
|
|
"logits/rejected": -1.34765625,
|
|
"logps/chosen": -240.0,
|
|
"logps/rejected": -268.5,
|
|
"loss": 0.4091,
|
|
"rewards/accuracies": 0.734375,
|
|
"rewards/chosen": 1.265625,
|
|
"rewards/margins": 1.16796875,
|
|
"rewards/rejected": 0.09814453125,
|
|
"step": 114
|
|
},
|
|
{
|
|
"epoch": 0.12721238938053098,
|
|
"grad_norm": 15.746649742126465,
|
|
"learning_rate": 4.882035025999544e-07,
|
|
"logits/chosen": -1.22265625,
|
|
"logits/rejected": -1.1875,
|
|
"logps/chosen": -273.5,
|
|
"logps/rejected": -270.5,
|
|
"loss": 0.4313,
|
|
"rewards/accuracies": 0.71875,
|
|
"rewards/chosen": 1.15234375,
|
|
"rewards/margins": 1.1640625,
|
|
"rewards/rejected": -0.0108642578125,
|
|
"step": 115
|
|
},
|
|
{
|
|
"epoch": 0.12831858407079647,
|
|
"grad_norm": 15.159698486328125,
|
|
"learning_rate": 4.879298123270391e-07,
|
|
"logits/chosen": -1.32421875,
|
|
"logits/rejected": -1.36328125,
|
|
"logps/chosen": -244.5,
|
|
"logps/rejected": -256.0,
|
|
"loss": 0.4331,
|
|
"rewards/accuracies": 0.6796875,
|
|
"rewards/chosen": 1.23046875,
|
|
"rewards/margins": 1.171875,
|
|
"rewards/rejected": 0.0592041015625,
|
|
"step": 116
|
|
},
|
|
{
|
|
"epoch": 0.12942477876106195,
|
|
"grad_norm": 14.491214752197266,
|
|
"learning_rate": 4.876530619237495e-07,
|
|
"logits/chosen": -1.20703125,
|
|
"logits/rejected": -1.2578125,
|
|
"logps/chosen": -235.5,
|
|
"logps/rejected": -233.5,
|
|
"loss": 0.4267,
|
|
"rewards/accuracies": 0.6953125,
|
|
"rewards/chosen": 1.19140625,
|
|
"rewards/margins": 1.20703125,
|
|
"rewards/rejected": -0.018310546875,
|
|
"step": 117
|
|
},
|
|
{
|
|
"epoch": 0.13053097345132744,
|
|
"grad_norm": 15.388319969177246,
|
|
"learning_rate": 4.873732549495065e-07,
|
|
"logits/chosen": -1.3203125,
|
|
"logits/rejected": -1.25,
|
|
"logps/chosen": -263.0,
|
|
"logps/rejected": -254.5,
|
|
"loss": 0.4351,
|
|
"rewards/accuracies": 0.7109375,
|
|
"rewards/chosen": 1.16796875,
|
|
"rewards/margins": 1.1171875,
|
|
"rewards/rejected": 0.05218505859375,
|
|
"step": 118
|
|
},
|
|
{
|
|
"epoch": 0.13163716814159293,
|
|
"grad_norm": 14.733474731445312,
|
|
"learning_rate": 4.870903950030428e-07,
|
|
"logits/chosen": -1.35546875,
|
|
"logits/rejected": -1.31640625,
|
|
"logps/chosen": -241.5,
|
|
"logps/rejected": -274.0,
|
|
"loss": 0.376,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 1.390625,
|
|
"rewards/margins": 1.30859375,
|
|
"rewards/rejected": 0.086669921875,
|
|
"step": 119
|
|
},
|
|
{
|
|
"epoch": 0.13274336283185842,
|
|
"grad_norm": 14.26261043548584,
|
|
"learning_rate": 4.868044857223571e-07,
|
|
"logits/chosen": -1.2890625,
|
|
"logits/rejected": -1.29296875,
|
|
"logps/chosen": -248.5,
|
|
"logps/rejected": -281.0,
|
|
"loss": 0.3815,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 1.296875,
|
|
"rewards/margins": 1.3046875,
|
|
"rewards/rejected": -0.008544921875,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 0.1338495575221239,
|
|
"grad_norm": 15.668647766113281,
|
|
"learning_rate": 4.865155307846669e-07,
|
|
"logits/chosen": -1.3828125,
|
|
"logits/rejected": -1.4375,
|
|
"logps/chosen": -232.0,
|
|
"logps/rejected": -240.0,
|
|
"loss": 0.4114,
|
|
"rewards/accuracies": 0.7109375,
|
|
"rewards/chosen": 1.2734375,
|
|
"rewards/margins": 1.2421875,
|
|
"rewards/rejected": 0.0323486328125,
|
|
"step": 121
|
|
},
|
|
{
|
|
"epoch": 0.13495575221238937,
|
|
"grad_norm": 16.117341995239258,
|
|
"learning_rate": 4.862235339063613e-07,
|
|
"logits/chosen": -1.35546875,
|
|
"logits/rejected": -1.26171875,
|
|
"logps/chosen": -252.5,
|
|
"logps/rejected": -268.0,
|
|
"loss": 0.4789,
|
|
"rewards/accuracies": 0.6640625,
|
|
"rewards/chosen": 1.10546875,
|
|
"rewards/margins": 0.9375,
|
|
"rewards/rejected": 0.169189453125,
|
|
"step": 122
|
|
},
|
|
{
|
|
"epoch": 0.13606194690265486,
|
|
"grad_norm": 15.315237998962402,
|
|
"learning_rate": 4.859284988429533e-07,
|
|
"logits/chosen": -1.390625,
|
|
"logits/rejected": -1.37109375,
|
|
"logps/chosen": -264.0,
|
|
"logps/rejected": -302.0,
|
|
"loss": 0.4574,
|
|
"rewards/accuracies": 0.6796875,
|
|
"rewards/chosen": 1.09375,
|
|
"rewards/margins": 1.0234375,
|
|
"rewards/rejected": 0.0693359375,
|
|
"step": 123
|
|
},
|
|
{
|
|
"epoch": 0.13716814159292035,
|
|
"grad_norm": 13.38134765625,
|
|
"learning_rate": 4.856304293890317e-07,
|
|
"logits/chosen": -1.25,
|
|
"logits/rejected": -1.18359375,
|
|
"logps/chosen": -255.0,
|
|
"logps/rejected": -253.5,
|
|
"loss": 0.3681,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 1.3984375,
|
|
"rewards/margins": 1.484375,
|
|
"rewards/rejected": -0.08575439453125,
|
|
"step": 124
|
|
},
|
|
{
|
|
"epoch": 0.13827433628318583,
|
|
"grad_norm": 17.225801467895508,
|
|
"learning_rate": 4.853293293782118e-07,
|
|
"logits/chosen": -1.39453125,
|
|
"logits/rejected": -1.4140625,
|
|
"logps/chosen": -276.0,
|
|
"logps/rejected": -280.0,
|
|
"loss": 0.458,
|
|
"rewards/accuracies": 0.6796875,
|
|
"rewards/chosen": 1.1796875,
|
|
"rewards/margins": 1.08203125,
|
|
"rewards/rejected": 0.09991455078125,
|
|
"step": 125
|
|
},
|
|
{
|
|
"epoch": 0.13938053097345132,
|
|
"grad_norm": 14.186132431030273,
|
|
"learning_rate": 4.850252026830863e-07,
|
|
"logits/chosen": -1.359375,
|
|
"logits/rejected": -1.26953125,
|
|
"logps/chosen": -234.5,
|
|
"logps/rejected": -252.5,
|
|
"loss": 0.4436,
|
|
"rewards/accuracies": 0.734375,
|
|
"rewards/chosen": 1.25390625,
|
|
"rewards/margins": 1.123046875,
|
|
"rewards/rejected": 0.1328125,
|
|
"step": 126
|
|
},
|
|
{
|
|
"epoch": 0.1404867256637168,
|
|
"grad_norm": 14.477481842041016,
|
|
"learning_rate": 4.84718053215176e-07,
|
|
"logits/chosen": -1.25390625,
|
|
"logits/rejected": -1.23046875,
|
|
"logps/chosen": -249.5,
|
|
"logps/rejected": -256.0,
|
|
"loss": 0.4314,
|
|
"rewards/accuracies": 0.703125,
|
|
"rewards/chosen": 1.296875,
|
|
"rewards/margins": 1.140625,
|
|
"rewards/rejected": 0.1552734375,
|
|
"step": 127
|
|
},
|
|
{
|
|
"epoch": 0.1415929203539823,
|
|
"grad_norm": 15.153040885925293,
|
|
"learning_rate": 4.844078849248785e-07,
|
|
"logits/chosen": -1.30859375,
|
|
"logits/rejected": -1.3125,
|
|
"logps/chosen": -260.0,
|
|
"logps/rejected": -292.0,
|
|
"loss": 0.3964,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": 1.421875,
|
|
"rewards/margins": 1.37890625,
|
|
"rewards/rejected": 0.0396728515625,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 0.1426991150442478,
|
|
"grad_norm": 14.35177230834961,
|
|
"learning_rate": 4.840947018014182e-07,
|
|
"logits/chosen": -1.23828125,
|
|
"logits/rejected": -1.19140625,
|
|
"logps/chosen": -256.5,
|
|
"logps/rejected": -251.5,
|
|
"loss": 0.4107,
|
|
"rewards/accuracies": 0.7578125,
|
|
"rewards/chosen": 1.33203125,
|
|
"rewards/margins": 1.2734375,
|
|
"rewards/rejected": 0.060028076171875,
|
|
"step": 129
|
|
},
|
|
{
|
|
"epoch": 0.14380530973451328,
|
|
"grad_norm": 14.168734550476074,
|
|
"learning_rate": 4.837785078727948e-07,
|
|
"logits/chosen": -1.25,
|
|
"logits/rejected": -1.19140625,
|
|
"logps/chosen": -248.0,
|
|
"logps/rejected": -284.0,
|
|
"loss": 0.3812,
|
|
"rewards/accuracies": 0.7578125,
|
|
"rewards/chosen": 1.3984375,
|
|
"rewards/margins": 1.390625,
|
|
"rewards/rejected": 0.00927734375,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 0.14491150442477876,
|
|
"grad_norm": 15.743026733398438,
|
|
"learning_rate": 4.834593072057313e-07,
|
|
"logits/chosen": -1.28125,
|
|
"logits/rejected": -1.30078125,
|
|
"logps/chosen": -246.0,
|
|
"logps/rejected": -265.0,
|
|
"loss": 0.4586,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": 1.31640625,
|
|
"rewards/margins": 1.08984375,
|
|
"rewards/rejected": 0.2255859375,
|
|
"step": 131
|
|
},
|
|
{
|
|
"epoch": 0.14601769911504425,
|
|
"grad_norm": 16.969074249267578,
|
|
"learning_rate": 4.831371039056217e-07,
|
|
"logits/chosen": -1.20703125,
|
|
"logits/rejected": -1.1484375,
|
|
"logps/chosen": -275.0,
|
|
"logps/rejected": -296.0,
|
|
"loss": 0.4373,
|
|
"rewards/accuracies": 0.7265625,
|
|
"rewards/chosen": 1.2109375,
|
|
"rewards/margins": 1.19921875,
|
|
"rewards/rejected": 0.0108642578125,
|
|
"step": 132
|
|
},
|
|
{
|
|
"epoch": 0.14712389380530974,
|
|
"grad_norm": 14.101773262023926,
|
|
"learning_rate": 4.828119021164786e-07,
|
|
"logits/chosen": -1.25390625,
|
|
"logits/rejected": -1.296875,
|
|
"logps/chosen": -246.5,
|
|
"logps/rejected": -277.0,
|
|
"loss": 0.3919,
|
|
"rewards/accuracies": 0.7578125,
|
|
"rewards/chosen": 1.359375,
|
|
"rewards/margins": 1.43359375,
|
|
"rewards/rejected": -0.07568359375,
|
|
"step": 133
|
|
},
|
|
{
|
|
"epoch": 0.14823008849557523,
|
|
"grad_norm": 15.83488941192627,
|
|
"learning_rate": 4.824837060208795e-07,
|
|
"logits/chosen": -1.3046875,
|
|
"logits/rejected": -1.265625,
|
|
"logps/chosen": -275.0,
|
|
"logps/rejected": -268.5,
|
|
"loss": 0.4578,
|
|
"rewards/accuracies": 0.703125,
|
|
"rewards/chosen": 1.1953125,
|
|
"rewards/margins": 0.998046875,
|
|
"rewards/rejected": 0.193359375,
|
|
"step": 134
|
|
},
|
|
{
|
|
"epoch": 0.14933628318584072,
|
|
"grad_norm": 13.669934272766113,
|
|
"learning_rate": 4.82152519839913e-07,
|
|
"logits/chosen": -1.390625,
|
|
"logits/rejected": -1.2578125,
|
|
"logps/chosen": -241.5,
|
|
"logps/rejected": -243.5,
|
|
"loss": 0.3765,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 1.4375,
|
|
"rewards/margins": 1.40625,
|
|
"rewards/rejected": 0.0296630859375,
|
|
"step": 135
|
|
},
|
|
{
|
|
"epoch": 0.1504424778761062,
|
|
"grad_norm": 16.85657501220703,
|
|
"learning_rate": 4.818183478331247e-07,
|
|
"logits/chosen": -1.13671875,
|
|
"logits/rejected": -1.25390625,
|
|
"logps/chosen": -257.5,
|
|
"logps/rejected": -277.5,
|
|
"loss": 0.4258,
|
|
"rewards/accuracies": 0.6953125,
|
|
"rewards/chosen": 1.390625,
|
|
"rewards/margins": 1.3125,
|
|
"rewards/rejected": 0.0772705078125,
|
|
"step": 136
|
|
},
|
|
{
|
|
"epoch": 0.1515486725663717,
|
|
"grad_norm": 15.21373462677002,
|
|
"learning_rate": 4.814811942984625e-07,
|
|
"logits/chosen": -1.29296875,
|
|
"logits/rejected": -1.1953125,
|
|
"logps/chosen": -256.5,
|
|
"logps/rejected": -240.0,
|
|
"loss": 0.4232,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 1.19921875,
|
|
"rewards/margins": 1.12890625,
|
|
"rewards/rejected": 0.0693359375,
|
|
"step": 137
|
|
},
|
|
{
|
|
"epoch": 0.15265486725663716,
|
|
"grad_norm": 13.69796085357666,
|
|
"learning_rate": 4.811410635722209e-07,
|
|
"logits/chosen": -1.2890625,
|
|
"logits/rejected": -1.21875,
|
|
"logps/chosen": -236.5,
|
|
"logps/rejected": -257.0,
|
|
"loss": 0.3722,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 1.4453125,
|
|
"rewards/margins": 1.55078125,
|
|
"rewards/rejected": -0.10595703125,
|
|
"step": 138
|
|
},
|
|
{
|
|
"epoch": 0.15376106194690264,
|
|
"grad_norm": 15.000753402709961,
|
|
"learning_rate": 4.807979600289857e-07,
|
|
"logits/chosen": -1.21875,
|
|
"logits/rejected": -1.27734375,
|
|
"logps/chosen": -274.0,
|
|
"logps/rejected": -297.0,
|
|
"loss": 0.3709,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": 1.30078125,
|
|
"rewards/margins": 1.515625,
|
|
"rewards/rejected": -0.21240234375,
|
|
"step": 139
|
|
},
|
|
{
|
|
"epoch": 0.15486725663716813,
|
|
"grad_norm": 13.44487476348877,
|
|
"learning_rate": 4.804518880815776e-07,
|
|
"logits/chosen": -1.15625,
|
|
"logits/rejected": -1.27734375,
|
|
"logps/chosen": -248.5,
|
|
"logps/rejected": -267.5,
|
|
"loss": 0.3818,
|
|
"rewards/accuracies": 0.7265625,
|
|
"rewards/chosen": 1.37109375,
|
|
"rewards/margins": 1.515625,
|
|
"rewards/rejected": -0.144775390625,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 0.15597345132743362,
|
|
"grad_norm": 15.1209135055542,
|
|
"learning_rate": 4.801028521809951e-07,
|
|
"logits/chosen": -1.21875,
|
|
"logits/rejected": -1.16796875,
|
|
"logps/chosen": -273.0,
|
|
"logps/rejected": -271.5,
|
|
"loss": 0.4027,
|
|
"rewards/accuracies": 0.7578125,
|
|
"rewards/chosen": 1.19140625,
|
|
"rewards/margins": 1.3125,
|
|
"rewards/rejected": -0.122802734375,
|
|
"step": 141
|
|
},
|
|
{
|
|
"epoch": 0.1570796460176991,
|
|
"grad_norm": 16.363567352294922,
|
|
"learning_rate": 4.797508568163578e-07,
|
|
"logits/chosen": -1.33203125,
|
|
"logits/rejected": -1.2109375,
|
|
"logps/chosen": -262.0,
|
|
"logps/rejected": -269.0,
|
|
"loss": 0.4581,
|
|
"rewards/accuracies": 0.7265625,
|
|
"rewards/chosen": 1.072265625,
|
|
"rewards/margins": 1.169921875,
|
|
"rewards/rejected": -0.097412109375,
|
|
"step": 142
|
|
},
|
|
{
|
|
"epoch": 0.1581858407079646,
|
|
"grad_norm": 13.670063972473145,
|
|
"learning_rate": 4.793959065148484e-07,
|
|
"logits/chosen": -1.2734375,
|
|
"logits/rejected": -1.2421875,
|
|
"logps/chosen": -240.0,
|
|
"logps/rejected": -254.5,
|
|
"loss": 0.3719,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 1.296875,
|
|
"rewards/margins": 1.4765625,
|
|
"rewards/rejected": -0.179443359375,
|
|
"step": 143
|
|
},
|
|
{
|
|
"epoch": 0.1592920353982301,
|
|
"grad_norm": 14.17078971862793,
|
|
"learning_rate": 4.790380058416542e-07,
|
|
"logits/chosen": -1.26953125,
|
|
"logits/rejected": -1.23046875,
|
|
"logps/chosen": -240.0,
|
|
"logps/rejected": -259.5,
|
|
"loss": 0.3726,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 1.35546875,
|
|
"rewards/margins": 1.625,
|
|
"rewards/rejected": -0.2666015625,
|
|
"step": 144
|
|
},
|
|
{
|
|
"epoch": 0.16039823008849557,
|
|
"grad_norm": 13.858586311340332,
|
|
"learning_rate": 4.786771593999089e-07,
|
|
"logits/chosen": -1.25390625,
|
|
"logits/rejected": -1.25,
|
|
"logps/chosen": -242.5,
|
|
"logps/rejected": -251.5,
|
|
"loss": 0.377,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 1.3515625,
|
|
"rewards/margins": 1.5078125,
|
|
"rewards/rejected": -0.154296875,
|
|
"step": 145
|
|
},
|
|
{
|
|
"epoch": 0.16150442477876106,
|
|
"grad_norm": 15.108954429626465,
|
|
"learning_rate": 4.783133718306331e-07,
|
|
"logits/chosen": -1.21875,
|
|
"logits/rejected": -1.28125,
|
|
"logps/chosen": -266.0,
|
|
"logps/rejected": -305.0,
|
|
"loss": 0.4185,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 1.21875,
|
|
"rewards/margins": 1.37109375,
|
|
"rewards/rejected": -0.15087890625,
|
|
"step": 146
|
|
},
|
|
{
|
|
"epoch": 0.16261061946902655,
|
|
"grad_norm": 14.861040115356445,
|
|
"learning_rate": 4.779466478126746e-07,
|
|
"logits/chosen": -1.3828125,
|
|
"logits/rejected": -1.30078125,
|
|
"logps/chosen": -242.0,
|
|
"logps/rejected": -239.5,
|
|
"loss": 0.3849,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 1.140625,
|
|
"rewards/margins": 1.4296875,
|
|
"rewards/rejected": -0.2919921875,
|
|
"step": 147
|
|
},
|
|
{
|
|
"epoch": 0.16371681415929204,
|
|
"grad_norm": 14.671175956726074,
|
|
"learning_rate": 4.775769920626483e-07,
|
|
"logits/chosen": -1.37890625,
|
|
"logits/rejected": -1.27734375,
|
|
"logps/chosen": -238.5,
|
|
"logps/rejected": -251.0,
|
|
"loss": 0.4109,
|
|
"rewards/accuracies": 0.71875,
|
|
"rewards/chosen": 1.1484375,
|
|
"rewards/margins": 1.27734375,
|
|
"rewards/rejected": -0.129150390625,
|
|
"step": 148
|
|
},
|
|
{
|
|
"epoch": 0.16482300884955753,
|
|
"grad_norm": 13.885614395141602,
|
|
"learning_rate": 4.772044093348757e-07,
|
|
"logits/chosen": -1.24609375,
|
|
"logits/rejected": -1.23828125,
|
|
"logps/chosen": -245.5,
|
|
"logps/rejected": -247.0,
|
|
"loss": 0.4042,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": 1.162109375,
|
|
"rewards/margins": 1.361328125,
|
|
"rewards/rejected": -0.19720458984375,
|
|
"step": 149
|
|
},
|
|
{
|
|
"epoch": 0.16592920353982302,
|
|
"grad_norm": 15.551752090454102,
|
|
"learning_rate": 4.7682890442132336e-07,
|
|
"logits/chosen": -1.3671875,
|
|
"logits/rejected": -1.2265625,
|
|
"logps/chosen": -255.0,
|
|
"logps/rejected": -252.0,
|
|
"loss": 0.415,
|
|
"rewards/accuracies": 0.71875,
|
|
"rewards/chosen": 1.095703125,
|
|
"rewards/margins": 1.37890625,
|
|
"rewards/rejected": -0.28369140625,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.16592920353982302,
|
|
"eval_logits/chosen": -1.2870413064956665,
|
|
"eval_logits/rejected": -1.2431591749191284,
|
|
"eval_logps/chosen": -248.97512817382812,
|
|
"eval_logps/rejected": -261.86566162109375,
|
|
"eval_loss": 0.39721065759658813,
|
|
"eval_rewards/accuracies": 0.7473672032356262,
|
|
"eval_rewards/chosen": 1.184818148612976,
|
|
"eval_rewards/margins": 1.4427666664123535,
|
|
"eval_rewards/rejected": -0.25743111968040466,
|
|
"eval_runtime": 193.0648,
|
|
"eval_samples_per_second": 66.573,
|
|
"eval_steps_per_second": 1.041,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 0.1670353982300885,
|
|
"grad_norm": 15.598936080932617,
|
|
"learning_rate": 4.7645048215154156e-07,
|
|
"logits/chosen": -1.3046875,
|
|
"logits/rejected": -1.2890625,
|
|
"logps/chosen": -242.0,
|
|
"logps/rejected": -260.0,
|
|
"loss": 0.4404,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": 1.146484375,
|
|
"rewards/margins": 1.39453125,
|
|
"rewards/rejected": -0.24755859375,
|
|
"step": 151
|
|
},
|
|
{
|
|
"epoch": 0.168141592920354,
|
|
"grad_norm": 13.759398460388184,
|
|
"learning_rate": 4.760691473926021e-07,
|
|
"logits/chosen": -1.24609375,
|
|
"logits/rejected": -1.234375,
|
|
"logps/chosen": -248.5,
|
|
"logps/rejected": -269.0,
|
|
"loss": 0.3753,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": 1.2890625,
|
|
"rewards/margins": 1.55859375,
|
|
"rewards/rejected": -0.265625,
|
|
"step": 152
|
|
},
|
|
{
|
|
"epoch": 0.16924778761061948,
|
|
"grad_norm": 17.32530975341797,
|
|
"learning_rate": 4.756849050490357e-07,
|
|
"logits/chosen": -1.26953125,
|
|
"logits/rejected": -1.1640625,
|
|
"logps/chosen": -287.0,
|
|
"logps/rejected": -302.0,
|
|
"loss": 0.4487,
|
|
"rewards/accuracies": 0.7265625,
|
|
"rewards/chosen": 1.01953125,
|
|
"rewards/margins": 1.23828125,
|
|
"rewards/rejected": -0.21826171875,
|
|
"step": 153
|
|
},
|
|
{
|
|
"epoch": 0.17035398230088494,
|
|
"grad_norm": 16.289810180664062,
|
|
"learning_rate": 4.75297760062769e-07,
|
|
"logits/chosen": -1.36328125,
|
|
"logits/rejected": -1.296875,
|
|
"logps/chosen": -271.0,
|
|
"logps/rejected": -266.5,
|
|
"loss": 0.4189,
|
|
"rewards/accuracies": 0.703125,
|
|
"rewards/chosen": 1.0703125,
|
|
"rewards/margins": 1.375,
|
|
"rewards/rejected": -0.30419921875,
|
|
"step": 154
|
|
},
|
|
{
|
|
"epoch": 0.17146017699115043,
|
|
"grad_norm": 15.245888710021973,
|
|
"learning_rate": 4.749077174130608e-07,
|
|
"logits/chosen": -1.3203125,
|
|
"logits/rejected": -1.23828125,
|
|
"logps/chosen": -264.0,
|
|
"logps/rejected": -282.0,
|
|
"loss": 0.4183,
|
|
"rewards/accuracies": 0.71875,
|
|
"rewards/chosen": 1.16015625,
|
|
"rewards/margins": 1.40234375,
|
|
"rewards/rejected": -0.240234375,
|
|
"step": 155
|
|
},
|
|
{
|
|
"epoch": 0.17256637168141592,
|
|
"grad_norm": 14.452110290527344,
|
|
"learning_rate": 4.7451478211643835e-07,
|
|
"logits/chosen": -1.39453125,
|
|
"logits/rejected": -1.30859375,
|
|
"logps/chosen": -253.0,
|
|
"logps/rejected": -256.0,
|
|
"loss": 0.3993,
|
|
"rewards/accuracies": 0.7421875,
|
|
"rewards/chosen": 1.2421875,
|
|
"rewards/margins": 1.44921875,
|
|
"rewards/rejected": -0.20654296875,
|
|
"step": 156
|
|
},
|
|
{
|
|
"epoch": 0.1736725663716814,
|
|
"grad_norm": 14.378584861755371,
|
|
"learning_rate": 4.741189592266325e-07,
|
|
"logits/chosen": -1.29296875,
|
|
"logits/rejected": -1.26171875,
|
|
"logps/chosen": -231.5,
|
|
"logps/rejected": -273.5,
|
|
"loss": 0.3664,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 1.3125,
|
|
"rewards/margins": 1.70703125,
|
|
"rewards/rejected": -0.39453125,
|
|
"step": 157
|
|
},
|
|
{
|
|
"epoch": 0.1747787610619469,
|
|
"grad_norm": 13.193842887878418,
|
|
"learning_rate": 4.7372025383451274e-07,
|
|
"logits/chosen": -1.12109375,
|
|
"logits/rejected": -1.203125,
|
|
"logps/chosen": -240.0,
|
|
"logps/rejected": -260.0,
|
|
"loss": 0.3485,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 1.12109375,
|
|
"rewards/margins": 1.6171875,
|
|
"rewards/rejected": -0.4912109375,
|
|
"step": 158
|
|
},
|
|
{
|
|
"epoch": 0.17588495575221239,
|
|
"grad_norm": 13.745351791381836,
|
|
"learning_rate": 4.7331867106802204e-07,
|
|
"logits/chosen": -1.2421875,
|
|
"logits/rejected": -1.1875,
|
|
"logps/chosen": -258.5,
|
|
"logps/rejected": -265.0,
|
|
"loss": 0.3891,
|
|
"rewards/accuracies": 0.7109375,
|
|
"rewards/chosen": 1.23046875,
|
|
"rewards/margins": 1.5859375,
|
|
"rewards/rejected": -0.35546875,
|
|
"step": 159
|
|
},
|
|
{
|
|
"epoch": 0.17699115044247787,
|
|
"grad_norm": 14.0711669921875,
|
|
"learning_rate": 4.7291421609211045e-07,
|
|
"logits/chosen": -1.31640625,
|
|
"logits/rejected": -1.24609375,
|
|
"logps/chosen": -251.5,
|
|
"logps/rejected": -282.0,
|
|
"loss": 0.3999,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": 1.1875,
|
|
"rewards/margins": 1.3984375,
|
|
"rewards/rejected": -0.20654296875,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 0.17809734513274336,
|
|
"grad_norm": 13.304108619689941,
|
|
"learning_rate": 4.725068941086692e-07,
|
|
"logits/chosen": -1.37109375,
|
|
"logits/rejected": -1.1953125,
|
|
"logps/chosen": -255.5,
|
|
"logps/rejected": -262.0,
|
|
"loss": 0.3558,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 1.2265625,
|
|
"rewards/margins": 1.63671875,
|
|
"rewards/rejected": -0.4140625,
|
|
"step": 161
|
|
},
|
|
{
|
|
"epoch": 0.17920353982300885,
|
|
"grad_norm": 13.896252632141113,
|
|
"learning_rate": 4.7209671035646304e-07,
|
|
"logits/chosen": -1.3125,
|
|
"logits/rejected": -1.2265625,
|
|
"logps/chosen": -248.5,
|
|
"logps/rejected": -264.0,
|
|
"loss": 0.3942,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": 1.15234375,
|
|
"rewards/margins": 1.42578125,
|
|
"rewards/rejected": -0.27294921875,
|
|
"step": 162
|
|
},
|
|
{
|
|
"epoch": 0.18030973451327434,
|
|
"grad_norm": 14.796649932861328,
|
|
"learning_rate": 4.7168367011106367e-07,
|
|
"logits/chosen": -1.30859375,
|
|
"logits/rejected": -1.30078125,
|
|
"logps/chosen": -245.0,
|
|
"logps/rejected": -262.5,
|
|
"loss": 0.3799,
|
|
"rewards/accuracies": 0.7578125,
|
|
"rewards/chosen": 1.13671875,
|
|
"rewards/margins": 1.5703125,
|
|
"rewards/rejected": -0.431640625,
|
|
"step": 163
|
|
},
|
|
{
|
|
"epoch": 0.18141592920353983,
|
|
"grad_norm": 16.078460693359375,
|
|
"learning_rate": 4.712677786847814e-07,
|
|
"logits/chosen": -1.44140625,
|
|
"logits/rejected": -1.1875,
|
|
"logps/chosen": -243.5,
|
|
"logps/rejected": -250.0,
|
|
"loss": 0.4507,
|
|
"rewards/accuracies": 0.703125,
|
|
"rewards/chosen": 1.044921875,
|
|
"rewards/margins": 1.2265625,
|
|
"rewards/rejected": -0.18359375,
|
|
"step": 164
|
|
},
|
|
{
|
|
"epoch": 0.18252212389380532,
|
|
"grad_norm": 13.583531379699707,
|
|
"learning_rate": 4.708490414265971e-07,
|
|
"logits/chosen": -1.375,
|
|
"logits/rejected": -1.1796875,
|
|
"logps/chosen": -262.0,
|
|
"logps/rejected": -272.5,
|
|
"loss": 0.3486,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 1.18359375,
|
|
"rewards/margins": 1.63671875,
|
|
"rewards/rejected": -0.4521484375,
|
|
"step": 165
|
|
},
|
|
{
|
|
"epoch": 0.1836283185840708,
|
|
"grad_norm": 14.29465389251709,
|
|
"learning_rate": 4.7042746372209296e-07,
|
|
"logits/chosen": -1.25,
|
|
"logits/rejected": -1.32421875,
|
|
"logps/chosen": -249.5,
|
|
"logps/rejected": -278.0,
|
|
"loss": 0.357,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 1.3046875,
|
|
"rewards/margins": 1.71875,
|
|
"rewards/rejected": -0.416015625,
|
|
"step": 166
|
|
},
|
|
{
|
|
"epoch": 0.1847345132743363,
|
|
"grad_norm": 14.11926555633545,
|
|
"learning_rate": 4.700030509933839e-07,
|
|
"logits/chosen": -1.12890625,
|
|
"logits/rejected": -1.1484375,
|
|
"logps/chosen": -235.5,
|
|
"logps/rejected": -273.0,
|
|
"loss": 0.3775,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 1.33984375,
|
|
"rewards/margins": 1.60546875,
|
|
"rewards/rejected": -0.2646484375,
|
|
"step": 167
|
|
},
|
|
{
|
|
"epoch": 0.18584070796460178,
|
|
"grad_norm": 13.987667083740234,
|
|
"learning_rate": 4.6957580869904707e-07,
|
|
"logits/chosen": -1.234375,
|
|
"logits/rejected": -1.140625,
|
|
"logps/chosen": -266.0,
|
|
"logps/rejected": -280.0,
|
|
"loss": 0.3593,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 1.15625,
|
|
"rewards/margins": 1.55078125,
|
|
"rewards/rejected": -0.3935546875,
|
|
"step": 168
|
|
},
|
|
{
|
|
"epoch": 0.18694690265486727,
|
|
"grad_norm": 14.725763320922852,
|
|
"learning_rate": 4.691457423340524e-07,
|
|
"logits/chosen": -1.2265625,
|
|
"logits/rejected": -1.076171875,
|
|
"logps/chosen": -261.0,
|
|
"logps/rejected": -248.5,
|
|
"loss": 0.3935,
|
|
"rewards/accuracies": 0.7421875,
|
|
"rewards/chosen": 0.953125,
|
|
"rewards/margins": 1.39453125,
|
|
"rewards/rejected": -0.439453125,
|
|
"step": 169
|
|
},
|
|
{
|
|
"epoch": 0.18805309734513273,
|
|
"grad_norm": 15.593293190002441,
|
|
"learning_rate": 4.6871285742969114e-07,
|
|
"logits/chosen": -1.171875,
|
|
"logits/rejected": -1.21875,
|
|
"logps/chosen": -267.0,
|
|
"logps/rejected": -278.0,
|
|
"loss": 0.4233,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": 1.23828125,
|
|
"rewards/margins": 1.5234375,
|
|
"rewards/rejected": -0.279296875,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 0.18915929203539822,
|
|
"grad_norm": 13.978684425354004,
|
|
"learning_rate": 4.682771595535056e-07,
|
|
"logits/chosen": -1.2890625,
|
|
"logits/rejected": -1.3046875,
|
|
"logps/chosen": -244.5,
|
|
"logps/rejected": -274.0,
|
|
"loss": 0.3605,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 1.41796875,
|
|
"rewards/margins": 1.6171875,
|
|
"rewards/rejected": -0.19677734375,
|
|
"step": 171
|
|
},
|
|
{
|
|
"epoch": 0.1902654867256637,
|
|
"grad_norm": 12.64192008972168,
|
|
"learning_rate": 4.678386543092168e-07,
|
|
"logits/chosen": -1.1953125,
|
|
"logits/rejected": -1.16015625,
|
|
"logps/chosen": -243.5,
|
|
"logps/rejected": -267.0,
|
|
"loss": 0.35,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 1.32421875,
|
|
"rewards/margins": 1.8515625,
|
|
"rewards/rejected": -0.52734375,
|
|
"step": 172
|
|
},
|
|
{
|
|
"epoch": 0.1913716814159292,
|
|
"grad_norm": 15.251437187194824,
|
|
"learning_rate": 4.673973473366527e-07,
|
|
"logits/chosen": -1.25390625,
|
|
"logits/rejected": -1.2578125,
|
|
"logps/chosen": -252.5,
|
|
"logps/rejected": -270.5,
|
|
"loss": 0.386,
|
|
"rewards/accuracies": 0.71875,
|
|
"rewards/chosen": 1.33984375,
|
|
"rewards/margins": 1.71875,
|
|
"rewards/rejected": -0.3818359375,
|
|
"step": 173
|
|
},
|
|
{
|
|
"epoch": 0.19247787610619468,
|
|
"grad_norm": 11.346704483032227,
|
|
"learning_rate": 4.669532443116757e-07,
|
|
"logits/chosen": -1.2890625,
|
|
"logits/rejected": -1.20703125,
|
|
"logps/chosen": -227.0,
|
|
"logps/rejected": -244.5,
|
|
"loss": 0.2852,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 1.46484375,
|
|
"rewards/margins": 2.0625,
|
|
"rewards/rejected": -0.59765625,
|
|
"step": 174
|
|
},
|
|
{
|
|
"epoch": 0.19358407079646017,
|
|
"grad_norm": 17.457523345947266,
|
|
"learning_rate": 4.6650635094610966e-07,
|
|
"logits/chosen": -1.34375,
|
|
"logits/rejected": -1.12109375,
|
|
"logps/chosen": -280.0,
|
|
"logps/rejected": -277.0,
|
|
"loss": 0.4692,
|
|
"rewards/accuracies": 0.6953125,
|
|
"rewards/chosen": 1.05859375,
|
|
"rewards/margins": 1.22265625,
|
|
"rewards/rejected": -0.166748046875,
|
|
"step": 175
|
|
},
|
|
{
|
|
"epoch": 0.19469026548672566,
|
|
"grad_norm": 14.530098915100098,
|
|
"learning_rate": 4.6605667298766607e-07,
|
|
"logits/chosen": -1.30078125,
|
|
"logits/rejected": -1.21875,
|
|
"logps/chosen": -241.5,
|
|
"logps/rejected": -260.0,
|
|
"loss": 0.3907,
|
|
"rewards/accuracies": 0.71875,
|
|
"rewards/chosen": 1.17578125,
|
|
"rewards/margins": 1.67578125,
|
|
"rewards/rejected": -0.501953125,
|
|
"step": 176
|
|
},
|
|
{
|
|
"epoch": 0.19579646017699115,
|
|
"grad_norm": 15.266855239868164,
|
|
"learning_rate": 4.656042162198708e-07,
|
|
"logits/chosen": -1.43359375,
|
|
"logits/rejected": -1.3046875,
|
|
"logps/chosen": -235.0,
|
|
"logps/rejected": -265.0,
|
|
"loss": 0.4364,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": 1.0703125,
|
|
"rewards/margins": 1.4296875,
|
|
"rewards/rejected": -0.357421875,
|
|
"step": 177
|
|
},
|
|
{
|
|
"epoch": 0.19690265486725664,
|
|
"grad_norm": 12.054651260375977,
|
|
"learning_rate": 4.6514898646198896e-07,
|
|
"logits/chosen": -1.3203125,
|
|
"logits/rejected": -1.26171875,
|
|
"logps/chosen": -257.0,
|
|
"logps/rejected": -271.0,
|
|
"loss": 0.3194,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 1.2734375,
|
|
"rewards/margins": 1.91015625,
|
|
"rewards/rejected": -0.6328125,
|
|
"step": 178
|
|
},
|
|
{
|
|
"epoch": 0.19800884955752213,
|
|
"grad_norm": 15.28715705871582,
|
|
"learning_rate": 4.6469098956895076e-07,
|
|
"logits/chosen": -1.3125,
|
|
"logits/rejected": -1.22265625,
|
|
"logps/chosen": -265.5,
|
|
"logps/rejected": -277.0,
|
|
"loss": 0.3848,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 1.28125,
|
|
"rewards/margins": 1.65234375,
|
|
"rewards/rejected": -0.373046875,
|
|
"step": 179
|
|
},
|
|
{
|
|
"epoch": 0.19911504424778761,
|
|
"grad_norm": 14.788736343383789,
|
|
"learning_rate": 4.6423023143127557e-07,
|
|
"logits/chosen": -1.2265625,
|
|
"logits/rejected": -1.3984375,
|
|
"logps/chosen": -252.0,
|
|
"logps/rejected": -272.0,
|
|
"loss": 0.3994,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 1.0859375,
|
|
"rewards/margins": 1.5,
|
|
"rewards/rejected": -0.4150390625,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 0.2002212389380531,
|
|
"grad_norm": 14.42548942565918,
|
|
"learning_rate": 4.637667179749968e-07,
|
|
"logits/chosen": -1.23046875,
|
|
"logits/rejected": -1.21484375,
|
|
"logps/chosen": -272.5,
|
|
"logps/rejected": -274.5,
|
|
"loss": 0.3871,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": 1.0390625,
|
|
"rewards/margins": 1.515625,
|
|
"rewards/rejected": -0.48046875,
|
|
"step": 181
|
|
},
|
|
{
|
|
"epoch": 0.2013274336283186,
|
|
"grad_norm": 13.830480575561523,
|
|
"learning_rate": 4.63300455161585e-07,
|
|
"logits/chosen": -1.29296875,
|
|
"logits/rejected": -1.19140625,
|
|
"logps/chosen": -250.0,
|
|
"logps/rejected": -248.0,
|
|
"loss": 0.3167,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 1.34375,
|
|
"rewards/margins": 1.92578125,
|
|
"rewards/rejected": -0.5859375,
|
|
"step": 182
|
|
},
|
|
{
|
|
"epoch": 0.20243362831858408,
|
|
"grad_norm": 14.639776229858398,
|
|
"learning_rate": 4.6283144898787174e-07,
|
|
"logits/chosen": -1.32421875,
|
|
"logits/rejected": -1.20703125,
|
|
"logps/chosen": -247.5,
|
|
"logps/rejected": -279.0,
|
|
"loss": 0.3672,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 1.32421875,
|
|
"rewards/margins": 1.79296875,
|
|
"rewards/rejected": -0.46484375,
|
|
"step": 183
|
|
},
|
|
{
|
|
"epoch": 0.20353982300884957,
|
|
"grad_norm": 13.662202835083008,
|
|
"learning_rate": 4.6235970548597224e-07,
|
|
"logits/chosen": -1.26953125,
|
|
"logits/rejected": -1.234375,
|
|
"logps/chosen": -231.0,
|
|
"logps/rejected": -240.0,
|
|
"loss": 0.3531,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": 1.28125,
|
|
"rewards/margins": 1.78515625,
|
|
"rewards/rejected": -0.505859375,
|
|
"step": 184
|
|
},
|
|
{
|
|
"epoch": 0.20464601769911506,
|
|
"grad_norm": 13.101706504821777,
|
|
"learning_rate": 4.6188523072320777e-07,
|
|
"logits/chosen": -1.2421875,
|
|
"logits/rejected": -1.14453125,
|
|
"logps/chosen": -253.0,
|
|
"logps/rejected": -273.0,
|
|
"loss": 0.3276,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 1.2421875,
|
|
"rewards/margins": 1.82421875,
|
|
"rewards/rejected": -0.5830078125,
|
|
"step": 185
|
|
},
|
|
{
|
|
"epoch": 0.20575221238938052,
|
|
"grad_norm": 16.33759307861328,
|
|
"learning_rate": 4.614080308020277e-07,
|
|
"logits/chosen": -1.25,
|
|
"logits/rejected": -1.2265625,
|
|
"logps/chosen": -258.0,
|
|
"logps/rejected": -290.0,
|
|
"loss": 0.3694,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 1.203125,
|
|
"rewards/margins": 1.69140625,
|
|
"rewards/rejected": -0.48828125,
|
|
"step": 186
|
|
},
|
|
{
|
|
"epoch": 0.206858407079646,
|
|
"grad_norm": 13.627776145935059,
|
|
"learning_rate": 4.609281118599311e-07,
|
|
"logits/chosen": -1.3046875,
|
|
"logits/rejected": -1.21875,
|
|
"logps/chosen": -238.5,
|
|
"logps/rejected": -239.0,
|
|
"loss": 0.4007,
|
|
"rewards/accuracies": 0.7265625,
|
|
"rewards/chosen": 1.056640625,
|
|
"rewards/margins": 1.55078125,
|
|
"rewards/rejected": -0.4931640625,
|
|
"step": 187
|
|
},
|
|
{
|
|
"epoch": 0.2079646017699115,
|
|
"grad_norm": 13.673922538757324,
|
|
"learning_rate": 4.6044548006938734e-07,
|
|
"logits/chosen": -1.3671875,
|
|
"logits/rejected": -1.1796875,
|
|
"logps/chosen": -247.5,
|
|
"logps/rejected": -254.5,
|
|
"loss": 0.3592,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 1.29296875,
|
|
"rewards/margins": 1.7265625,
|
|
"rewards/rejected": -0.4375,
|
|
"step": 188
|
|
},
|
|
{
|
|
"epoch": 0.20907079646017698,
|
|
"grad_norm": 14.20157527923584,
|
|
"learning_rate": 4.5996014163775745e-07,
|
|
"logits/chosen": -1.30078125,
|
|
"logits/rejected": -1.26953125,
|
|
"logps/chosen": -268.5,
|
|
"logps/rejected": -272.0,
|
|
"loss": 0.3429,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 1.32421875,
|
|
"rewards/margins": 1.84765625,
|
|
"rewards/rejected": -0.5234375,
|
|
"step": 189
|
|
},
|
|
{
|
|
"epoch": 0.21017699115044247,
|
|
"grad_norm": 14.90439510345459,
|
|
"learning_rate": 4.5947210280721353e-07,
|
|
"logits/chosen": -1.34375,
|
|
"logits/rejected": -1.2421875,
|
|
"logps/chosen": -248.0,
|
|
"logps/rejected": -285.0,
|
|
"loss": 0.373,
|
|
"rewards/accuracies": 0.7265625,
|
|
"rewards/chosen": 1.203125,
|
|
"rewards/margins": 1.765625,
|
|
"rewards/rejected": -0.5625,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 0.21128318584070796,
|
|
"grad_norm": 14.063448905944824,
|
|
"learning_rate": 4.589813698546592e-07,
|
|
"logits/chosen": -1.34375,
|
|
"logits/rejected": -1.1328125,
|
|
"logps/chosen": -256.0,
|
|
"logps/rejected": -274.0,
|
|
"loss": 0.3471,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 1.21875,
|
|
"rewards/margins": 1.828125,
|
|
"rewards/rejected": -0.607421875,
|
|
"step": 191
|
|
},
|
|
{
|
|
"epoch": 0.21238938053097345,
|
|
"grad_norm": 13.391234397888184,
|
|
"learning_rate": 4.584879490916481e-07,
|
|
"logits/chosen": -1.30078125,
|
|
"logits/rejected": -1.234375,
|
|
"logps/chosen": -247.5,
|
|
"logps/rejected": -241.5,
|
|
"loss": 0.356,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 1.0390625,
|
|
"rewards/margins": 1.9296875,
|
|
"rewards/rejected": -0.892578125,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 0.21349557522123894,
|
|
"grad_norm": 13.415105819702148,
|
|
"learning_rate": 4.5799184686430343e-07,
|
|
"logits/chosen": -1.23046875,
|
|
"logits/rejected": -1.09375,
|
|
"logps/chosen": -251.0,
|
|
"logps/rejected": -257.5,
|
|
"loss": 0.34,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": 1.1875,
|
|
"rewards/margins": 1.87109375,
|
|
"rewards/rejected": -0.689453125,
|
|
"step": 193
|
|
},
|
|
{
|
|
"epoch": 0.21460176991150443,
|
|
"grad_norm": 13.00170612335205,
|
|
"learning_rate": 4.574930695532356e-07,
|
|
"logits/chosen": -1.30078125,
|
|
"logits/rejected": -1.32421875,
|
|
"logps/chosen": -257.0,
|
|
"logps/rejected": -273.0,
|
|
"loss": 0.3455,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 1.203125,
|
|
"rewards/margins": 1.82421875,
|
|
"rewards/rejected": -0.623046875,
|
|
"step": 194
|
|
},
|
|
{
|
|
"epoch": 0.2157079646017699,
|
|
"grad_norm": 13.366878509521484,
|
|
"learning_rate": 4.569916235734611e-07,
|
|
"logits/chosen": -1.2890625,
|
|
"logits/rejected": -1.19921875,
|
|
"logps/chosen": -240.5,
|
|
"logps/rejected": -272.0,
|
|
"loss": 0.3792,
|
|
"rewards/accuracies": 0.7109375,
|
|
"rewards/chosen": 1.2109375,
|
|
"rewards/margins": 1.7578125,
|
|
"rewards/rejected": -0.544921875,
|
|
"step": 195
|
|
},
|
|
{
|
|
"epoch": 0.2168141592920354,
|
|
"grad_norm": 14.402266502380371,
|
|
"learning_rate": 4.5648751537431897e-07,
|
|
"logits/chosen": -1.21484375,
|
|
"logits/rejected": -1.171875,
|
|
"logps/chosen": -250.5,
|
|
"logps/rejected": -286.0,
|
|
"loss": 0.428,
|
|
"rewards/accuracies": 0.71875,
|
|
"rewards/chosen": 0.9453125,
|
|
"rewards/margins": 1.3984375,
|
|
"rewards/rejected": -0.4560546875,
|
|
"step": 196
|
|
},
|
|
{
|
|
"epoch": 0.2179203539823009,
|
|
"grad_norm": 15.003867149353027,
|
|
"learning_rate": 4.559807514393885e-07,
|
|
"logits/chosen": -1.30859375,
|
|
"logits/rejected": -1.2421875,
|
|
"logps/chosen": -276.5,
|
|
"logps/rejected": -286.0,
|
|
"loss": 0.35,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": 1.296875,
|
|
"rewards/margins": 1.87109375,
|
|
"rewards/rejected": -0.5810546875,
|
|
"step": 197
|
|
},
|
|
{
|
|
"epoch": 0.21902654867256638,
|
|
"grad_norm": 14.217790603637695,
|
|
"learning_rate": 4.5547133828640595e-07,
|
|
"logits/chosen": -1.25,
|
|
"logits/rejected": -1.24609375,
|
|
"logps/chosen": -267.0,
|
|
"logps/rejected": -266.0,
|
|
"loss": 0.3393,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 1.27734375,
|
|
"rewards/margins": 1.83203125,
|
|
"rewards/rejected": -0.5556640625,
|
|
"step": 198
|
|
},
|
|
{
|
|
"epoch": 0.22013274336283187,
|
|
"grad_norm": 13.773700714111328,
|
|
"learning_rate": 4.5495928246717995e-07,
|
|
"logits/chosen": -1.2421875,
|
|
"logits/rejected": -1.28125,
|
|
"logps/chosen": -265.0,
|
|
"logps/rejected": -292.0,
|
|
"loss": 0.3351,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 1.375,
|
|
"rewards/margins": 2.0546875,
|
|
"rewards/rejected": -0.67578125,
|
|
"step": 199
|
|
},
|
|
{
|
|
"epoch": 0.22123893805309736,
|
|
"grad_norm": 14.733463287353516,
|
|
"learning_rate": 4.544445905675081e-07,
|
|
"logits/chosen": -1.265625,
|
|
"logits/rejected": -1.18359375,
|
|
"logps/chosen": -266.0,
|
|
"logps/rejected": -281.5,
|
|
"loss": 0.3673,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 1.0859375,
|
|
"rewards/margins": 1.7265625,
|
|
"rewards/rejected": -0.642578125,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.22123893805309736,
|
|
"eval_logits/chosen": -1.276119351387024,
|
|
"eval_logits/rejected": -1.2190414667129517,
|
|
"eval_logps/chosen": -249.2039794921875,
|
|
"eval_logps/rejected": -265.0248718261719,
|
|
"eval_loss": 0.37490636110305786,
|
|
"eval_rewards/accuracies": 0.7651365399360657,
|
|
"eval_rewards/chosen": 1.1634211540222168,
|
|
"eval_rewards/margins": 1.7447527647018433,
|
|
"eval_rewards/rejected": -0.5807631611824036,
|
|
"eval_runtime": 192.9266,
|
|
"eval_samples_per_second": 66.621,
|
|
"eval_steps_per_second": 1.042,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 0.22234513274336284,
|
|
"grad_norm": 14.183818817138672,
|
|
"learning_rate": 4.539272692070919e-07,
|
|
"logits/chosen": -1.2890625,
|
|
"logits/rejected": -1.203125,
|
|
"logps/chosen": -270.0,
|
|
"logps/rejected": -238.5,
|
|
"loss": 0.3398,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 1.2421875,
|
|
"rewards/margins": 1.84375,
|
|
"rewards/rejected": -0.6015625,
|
|
"step": 201
|
|
},
|
|
{
|
|
"epoch": 0.2234513274336283,
|
|
"grad_norm": 14.671875953674316,
|
|
"learning_rate": 4.534073250394515e-07,
|
|
"logits/chosen": -1.40625,
|
|
"logits/rejected": -1.25390625,
|
|
"logps/chosen": -245.5,
|
|
"logps/rejected": -261.5,
|
|
"loss": 0.4247,
|
|
"rewards/accuracies": 0.6796875,
|
|
"rewards/chosen": 0.904296875,
|
|
"rewards/margins": 1.375,
|
|
"rewards/rejected": -0.47265625,
|
|
"step": 202
|
|
},
|
|
{
|
|
"epoch": 0.2245575221238938,
|
|
"grad_norm": 14.409346580505371,
|
|
"learning_rate": 4.5288476475184025e-07,
|
|
"logits/chosen": -1.23828125,
|
|
"logits/rejected": -1.15234375,
|
|
"logps/chosen": -251.5,
|
|
"logps/rejected": -259.0,
|
|
"loss": 0.3738,
|
|
"rewards/accuracies": 0.7578125,
|
|
"rewards/chosen": 1.10546875,
|
|
"rewards/margins": 1.73046875,
|
|
"rewards/rejected": -0.623046875,
|
|
"step": 203
|
|
},
|
|
{
|
|
"epoch": 0.22566371681415928,
|
|
"grad_norm": 16.879392623901367,
|
|
"learning_rate": 4.523595950651587e-07,
|
|
"logits/chosen": -1.21484375,
|
|
"logits/rejected": -1.25390625,
|
|
"logps/chosen": -272.0,
|
|
"logps/rejected": -281.0,
|
|
"loss": 0.4152,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": 1.0546875,
|
|
"rewards/margins": 1.640625,
|
|
"rewards/rejected": -0.58203125,
|
|
"step": 204
|
|
},
|
|
{
|
|
"epoch": 0.22676991150442477,
|
|
"grad_norm": 13.093546867370605,
|
|
"learning_rate": 4.518318227338681e-07,
|
|
"logits/chosen": -1.20703125,
|
|
"logits/rejected": -1.1953125,
|
|
"logps/chosen": -272.0,
|
|
"logps/rejected": -275.0,
|
|
"loss": 0.3398,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 1.07421875,
|
|
"rewards/margins": 1.71484375,
|
|
"rewards/rejected": -0.640625,
|
|
"step": 205
|
|
},
|
|
{
|
|
"epoch": 0.22787610619469026,
|
|
"grad_norm": 14.883780479431152,
|
|
"learning_rate": 4.5130145454590374e-07,
|
|
"logits/chosen": -1.28515625,
|
|
"logits/rejected": -1.140625,
|
|
"logps/chosen": -247.0,
|
|
"logps/rejected": -279.0,
|
|
"loss": 0.3714,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 1.12890625,
|
|
"rewards/margins": 1.84375,
|
|
"rewards/rejected": -0.71875,
|
|
"step": 206
|
|
},
|
|
{
|
|
"epoch": 0.22898230088495575,
|
|
"grad_norm": 13.462334632873535,
|
|
"learning_rate": 4.5076849732258737e-07,
|
|
"logits/chosen": -1.30078125,
|
|
"logits/rejected": -1.1953125,
|
|
"logps/chosen": -233.0,
|
|
"logps/rejected": -231.5,
|
|
"loss": 0.3624,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 1.2578125,
|
|
"rewards/margins": 1.75,
|
|
"rewards/rejected": -0.4931640625,
|
|
"step": 207
|
|
},
|
|
{
|
|
"epoch": 0.23008849557522124,
|
|
"grad_norm": 13.485892295837402,
|
|
"learning_rate": 4.5023295791853937e-07,
|
|
"logits/chosen": -1.30859375,
|
|
"logits/rejected": -1.25390625,
|
|
"logps/chosen": -243.0,
|
|
"logps/rejected": -284.0,
|
|
"loss": 0.3465,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 1.36328125,
|
|
"rewards/margins": 1.86328125,
|
|
"rewards/rejected": -0.4951171875,
|
|
"step": 208
|
|
},
|
|
{
|
|
"epoch": 0.23119469026548672,
|
|
"grad_norm": 13.468306541442871,
|
|
"learning_rate": 4.496948432215912e-07,
|
|
"logits/chosen": -1.2734375,
|
|
"logits/rejected": -1.14453125,
|
|
"logps/chosen": -239.0,
|
|
"logps/rejected": -231.5,
|
|
"loss": 0.3881,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 1.07421875,
|
|
"rewards/margins": 1.6484375,
|
|
"rewards/rejected": -0.576171875,
|
|
"step": 209
|
|
},
|
|
{
|
|
"epoch": 0.2323008849557522,
|
|
"grad_norm": 14.274983406066895,
|
|
"learning_rate": 4.4915416015269614e-07,
|
|
"logits/chosen": -1.30078125,
|
|
"logits/rejected": -1.265625,
|
|
"logps/chosen": -271.0,
|
|
"logps/rejected": -279.5,
|
|
"loss": 0.3449,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 1.24609375,
|
|
"rewards/margins": 1.875,
|
|
"rewards/rejected": -0.6298828125,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 0.2334070796460177,
|
|
"grad_norm": 14.726081848144531,
|
|
"learning_rate": 4.486109156658405e-07,
|
|
"logits/chosen": -1.25390625,
|
|
"logits/rejected": -1.30078125,
|
|
"logps/chosen": -223.0,
|
|
"logps/rejected": -258.0,
|
|
"loss": 0.3548,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 1.26953125,
|
|
"rewards/margins": 1.83203125,
|
|
"rewards/rejected": -0.5556640625,
|
|
"step": 211
|
|
},
|
|
{
|
|
"epoch": 0.2345132743362832,
|
|
"grad_norm": 14.424053192138672,
|
|
"learning_rate": 4.480651167479544e-07,
|
|
"logits/chosen": -1.328125,
|
|
"logits/rejected": -1.18359375,
|
|
"logps/chosen": -235.5,
|
|
"logps/rejected": -251.0,
|
|
"loss": 0.3725,
|
|
"rewards/accuracies": 0.7578125,
|
|
"rewards/chosen": 1.3125,
|
|
"rewards/margins": 1.7109375,
|
|
"rewards/rejected": -0.3974609375,
|
|
"step": 212
|
|
},
|
|
{
|
|
"epoch": 0.23561946902654868,
|
|
"grad_norm": 16.50137710571289,
|
|
"learning_rate": 4.475167704188218e-07,
|
|
"logits/chosen": -1.28515625,
|
|
"logits/rejected": -1.33203125,
|
|
"logps/chosen": -261.0,
|
|
"logps/rejected": -274.0,
|
|
"loss": 0.4309,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 1.0625,
|
|
"rewards/margins": 1.57421875,
|
|
"rewards/rejected": -0.513671875,
|
|
"step": 213
|
|
},
|
|
{
|
|
"epoch": 0.23672566371681417,
|
|
"grad_norm": 13.223847389221191,
|
|
"learning_rate": 4.4696588373098973e-07,
|
|
"logits/chosen": -1.203125,
|
|
"logits/rejected": -1.26953125,
|
|
"logps/chosen": -246.0,
|
|
"logps/rejected": -262.5,
|
|
"loss": 0.3152,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 1.40234375,
|
|
"rewards/margins": 2.12109375,
|
|
"rewards/rejected": -0.7177734375,
|
|
"step": 214
|
|
},
|
|
{
|
|
"epoch": 0.23783185840707965,
|
|
"grad_norm": 15.553281784057617,
|
|
"learning_rate": 4.4641246376967854e-07,
|
|
"logits/chosen": -1.19140625,
|
|
"logits/rejected": -1.1640625,
|
|
"logps/chosen": -256.5,
|
|
"logps/rejected": -271.0,
|
|
"loss": 0.3849,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 1.203125,
|
|
"rewards/margins": 1.73046875,
|
|
"rewards/rejected": -0.52734375,
|
|
"step": 215
|
|
},
|
|
{
|
|
"epoch": 0.23893805309734514,
|
|
"grad_norm": 14.652776718139648,
|
|
"learning_rate": 4.4585651765268983e-07,
|
|
"logits/chosen": -1.21484375,
|
|
"logits/rejected": -1.16796875,
|
|
"logps/chosen": -249.0,
|
|
"logps/rejected": -240.0,
|
|
"loss": 0.394,
|
|
"rewards/accuracies": 0.7421875,
|
|
"rewards/chosen": 1.15625,
|
|
"rewards/margins": 1.73828125,
|
|
"rewards/rejected": -0.5859375,
|
|
"step": 216
|
|
},
|
|
{
|
|
"epoch": 0.24004424778761063,
|
|
"grad_norm": 15.165270805358887,
|
|
"learning_rate": 4.452980525303155e-07,
|
|
"logits/chosen": -1.29296875,
|
|
"logits/rejected": -1.22265625,
|
|
"logps/chosen": -272.5,
|
|
"logps/rejected": -269.0,
|
|
"loss": 0.3583,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 1.25,
|
|
"rewards/margins": 1.81640625,
|
|
"rewards/rejected": -0.56640625,
|
|
"step": 217
|
|
},
|
|
{
|
|
"epoch": 0.2411504424778761,
|
|
"grad_norm": 13.010436058044434,
|
|
"learning_rate": 4.4473707558524553e-07,
|
|
"logits/chosen": -1.41796875,
|
|
"logits/rejected": -1.1640625,
|
|
"logps/chosen": -248.5,
|
|
"logps/rejected": -276.0,
|
|
"loss": 0.3244,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 1.30859375,
|
|
"rewards/margins": 2.05859375,
|
|
"rewards/rejected": -0.751953125,
|
|
"step": 218
|
|
},
|
|
{
|
|
"epoch": 0.24225663716814158,
|
|
"grad_norm": 14.902968406677246,
|
|
"learning_rate": 4.4417359403247567e-07,
|
|
"logits/chosen": -1.2890625,
|
|
"logits/rejected": -1.10546875,
|
|
"logps/chosen": -255.0,
|
|
"logps/rejected": -276.0,
|
|
"loss": 0.3569,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": 1.453125,
|
|
"rewards/margins": 2.0234375,
|
|
"rewards/rejected": -0.568359375,
|
|
"step": 219
|
|
},
|
|
{
|
|
"epoch": 0.24336283185840707,
|
|
"grad_norm": 13.878840446472168,
|
|
"learning_rate": 4.436076151192146e-07,
|
|
"logits/chosen": -1.33203125,
|
|
"logits/rejected": -1.26953125,
|
|
"logps/chosen": -218.0,
|
|
"logps/rejected": -246.5,
|
|
"loss": 0.3976,
|
|
"rewards/accuracies": 0.703125,
|
|
"rewards/chosen": 1.1484375,
|
|
"rewards/margins": 1.8046875,
|
|
"rewards/rejected": -0.654296875,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 0.24446902654867256,
|
|
"grad_norm": 13.981918334960938,
|
|
"learning_rate": 4.4303914612479104e-07,
|
|
"logits/chosen": -1.32421875,
|
|
"logits/rejected": -1.25,
|
|
"logps/chosen": -237.0,
|
|
"logps/rejected": -273.0,
|
|
"loss": 0.3427,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 1.36328125,
|
|
"rewards/margins": 2.0390625,
|
|
"rewards/rejected": -0.673828125,
|
|
"step": 221
|
|
},
|
|
{
|
|
"epoch": 0.24557522123893805,
|
|
"grad_norm": 12.754227638244629,
|
|
"learning_rate": 4.4246819436055946e-07,
|
|
"logits/chosen": -1.234375,
|
|
"logits/rejected": -1.140625,
|
|
"logps/chosen": -248.0,
|
|
"logps/rejected": -250.0,
|
|
"loss": 0.3383,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 1.109375,
|
|
"rewards/margins": 1.87890625,
|
|
"rewards/rejected": -0.76171875,
|
|
"step": 222
|
|
},
|
|
{
|
|
"epoch": 0.24668141592920353,
|
|
"grad_norm": 14.764009475708008,
|
|
"learning_rate": 4.418947671698066e-07,
|
|
"logits/chosen": -1.34375,
|
|
"logits/rejected": -1.234375,
|
|
"logps/chosen": -250.5,
|
|
"logps/rejected": -266.0,
|
|
"loss": 0.3845,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 1.060546875,
|
|
"rewards/margins": 1.7265625,
|
|
"rewards/rejected": -0.66015625,
|
|
"step": 223
|
|
},
|
|
{
|
|
"epoch": 0.24778761061946902,
|
|
"grad_norm": 15.158235549926758,
|
|
"learning_rate": 4.4131887192765684e-07,
|
|
"logits/chosen": -1.28515625,
|
|
"logits/rejected": -1.26953125,
|
|
"logps/chosen": -244.0,
|
|
"logps/rejected": -265.0,
|
|
"loss": 0.3368,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 1.19140625,
|
|
"rewards/margins": 2.09375,
|
|
"rewards/rejected": -0.904296875,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 0.2488938053097345,
|
|
"grad_norm": 13.44605827331543,
|
|
"learning_rate": 4.4074051604097753e-07,
|
|
"logits/chosen": -1.265625,
|
|
"logits/rejected": -1.26171875,
|
|
"logps/chosen": -248.0,
|
|
"logps/rejected": -269.0,
|
|
"loss": 0.3464,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 1.28515625,
|
|
"rewards/margins": 2.03125,
|
|
"rewards/rejected": -0.744140625,
|
|
"step": 225
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"grad_norm": 15.778076171875,
|
|
"learning_rate": 4.401597069482832e-07,
|
|
"logits/chosen": -1.32421875,
|
|
"logits/rejected": -1.26171875,
|
|
"logps/chosen": -248.5,
|
|
"logps/rejected": -265.0,
|
|
"loss": 0.4139,
|
|
"rewards/accuracies": 0.7265625,
|
|
"rewards/chosen": 0.962890625,
|
|
"rewards/margins": 1.66015625,
|
|
"rewards/rejected": -0.697265625,
|
|
"step": 226
|
|
},
|
|
{
|
|
"epoch": 0.25110619469026546,
|
|
"grad_norm": 13.870752334594727,
|
|
"learning_rate": 4.395764521196406e-07,
|
|
"logits/chosen": -1.26171875,
|
|
"logits/rejected": -1.2265625,
|
|
"logps/chosen": -234.5,
|
|
"logps/rejected": -281.0,
|
|
"loss": 0.3158,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 1.3984375,
|
|
"rewards/margins": 2.2734375,
|
|
"rewards/rejected": -0.87109375,
|
|
"step": 227
|
|
},
|
|
{
|
|
"epoch": 0.252212389380531,
|
|
"grad_norm": 13.615601539611816,
|
|
"learning_rate": 4.389907590565721e-07,
|
|
"logits/chosen": -1.1796875,
|
|
"logits/rejected": -1.15234375,
|
|
"logps/chosen": -268.0,
|
|
"logps/rejected": -290.0,
|
|
"loss": 0.3724,
|
|
"rewards/accuracies": 0.7421875,
|
|
"rewards/chosen": 1.087890625,
|
|
"rewards/margins": 1.91796875,
|
|
"rewards/rejected": -0.830078125,
|
|
"step": 228
|
|
},
|
|
{
|
|
"epoch": 0.25331858407079644,
|
|
"grad_norm": 14.186120986938477,
|
|
"learning_rate": 4.3840263529195943e-07,
|
|
"logits/chosen": -1.2421875,
|
|
"logits/rejected": -1.2109375,
|
|
"logps/chosen": -248.5,
|
|
"logps/rejected": -262.0,
|
|
"loss": 0.3415,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 1.072265625,
|
|
"rewards/margins": 1.9375,
|
|
"rewards/rejected": -0.865234375,
|
|
"step": 229
|
|
},
|
|
{
|
|
"epoch": 0.25442477876106195,
|
|
"grad_norm": 12.267884254455566,
|
|
"learning_rate": 4.3781208838994663e-07,
|
|
"logits/chosen": -1.296875,
|
|
"logits/rejected": -1.23828125,
|
|
"logps/chosen": -246.0,
|
|
"logps/rejected": -257.5,
|
|
"loss": 0.3271,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 1.15234375,
|
|
"rewards/margins": 1.97265625,
|
|
"rewards/rejected": -0.8203125,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 0.2555309734513274,
|
|
"grad_norm": 14.3861722946167,
|
|
"learning_rate": 4.372191259458432e-07,
|
|
"logits/chosen": -1.3515625,
|
|
"logits/rejected": -1.21875,
|
|
"logps/chosen": -234.5,
|
|
"logps/rejected": -251.0,
|
|
"loss": 0.3735,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 1.111328125,
|
|
"rewards/margins": 1.92578125,
|
|
"rewards/rejected": -0.81640625,
|
|
"step": 231
|
|
},
|
|
{
|
|
"epoch": 0.25663716814159293,
|
|
"grad_norm": 13.046867370605469,
|
|
"learning_rate": 4.366237555860256e-07,
|
|
"logits/chosen": -1.35546875,
|
|
"logits/rejected": -1.2109375,
|
|
"logps/chosen": -246.0,
|
|
"logps/rejected": -270.0,
|
|
"loss": 0.3317,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 1.16015625,
|
|
"rewards/margins": 2.109375,
|
|
"rewards/rejected": -0.9453125,
|
|
"step": 232
|
|
},
|
|
{
|
|
"epoch": 0.2577433628318584,
|
|
"grad_norm": 15.247108459472656,
|
|
"learning_rate": 4.3602598496784013e-07,
|
|
"logits/chosen": -1.2578125,
|
|
"logits/rejected": -1.140625,
|
|
"logps/chosen": -272.0,
|
|
"logps/rejected": -268.0,
|
|
"loss": 0.3798,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 0.9765625,
|
|
"rewards/margins": 1.875,
|
|
"rewards/rejected": -0.896484375,
|
|
"step": 233
|
|
},
|
|
{
|
|
"epoch": 0.2588495575221239,
|
|
"grad_norm": 13.2136812210083,
|
|
"learning_rate": 4.3542582177950373e-07,
|
|
"logits/chosen": -1.21484375,
|
|
"logits/rejected": -1.1484375,
|
|
"logps/chosen": -227.5,
|
|
"logps/rejected": -262.5,
|
|
"loss": 0.3171,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 1.26953125,
|
|
"rewards/margins": 2.1015625,
|
|
"rewards/rejected": -0.822265625,
|
|
"step": 234
|
|
},
|
|
{
|
|
"epoch": 0.25995575221238937,
|
|
"grad_norm": 13.574021339416504,
|
|
"learning_rate": 4.348232737400054e-07,
|
|
"logits/chosen": -1.13671875,
|
|
"logits/rejected": -1.171875,
|
|
"logps/chosen": -239.0,
|
|
"logps/rejected": -267.0,
|
|
"loss": 0.3749,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 1.05859375,
|
|
"rewards/margins": 1.78125,
|
|
"rewards/rejected": -0.720703125,
|
|
"step": 235
|
|
},
|
|
{
|
|
"epoch": 0.2610619469026549,
|
|
"grad_norm": 13.393758773803711,
|
|
"learning_rate": 4.3421834859900685e-07,
|
|
"logits/chosen": -1.265625,
|
|
"logits/rejected": -1.140625,
|
|
"logps/chosen": -236.5,
|
|
"logps/rejected": -255.0,
|
|
"loss": 0.3454,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 1.1796875,
|
|
"rewards/margins": 2.140625,
|
|
"rewards/rejected": -0.962890625,
|
|
"step": 236
|
|
},
|
|
{
|
|
"epoch": 0.26216814159292035,
|
|
"grad_norm": 17.910938262939453,
|
|
"learning_rate": 4.336110541367428e-07,
|
|
"logits/chosen": -1.25390625,
|
|
"logits/rejected": -1.19921875,
|
|
"logps/chosen": -245.5,
|
|
"logps/rejected": -272.0,
|
|
"loss": 0.4424,
|
|
"rewards/accuracies": 0.6875,
|
|
"rewards/chosen": 0.943359375,
|
|
"rewards/margins": 1.58984375,
|
|
"rewards/rejected": -0.646484375,
|
|
"step": 237
|
|
},
|
|
{
|
|
"epoch": 0.26327433628318586,
|
|
"grad_norm": 14.35909366607666,
|
|
"learning_rate": 4.33001398163921e-07,
|
|
"logits/chosen": -1.25,
|
|
"logits/rejected": -1.2265625,
|
|
"logps/chosen": -243.5,
|
|
"logps/rejected": -260.5,
|
|
"loss": 0.3525,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 1.30078125,
|
|
"rewards/margins": 2.2109375,
|
|
"rewards/rejected": -0.91015625,
|
|
"step": 238
|
|
},
|
|
{
|
|
"epoch": 0.2643805309734513,
|
|
"grad_norm": 15.5848970413208,
|
|
"learning_rate": 4.3238938852162187e-07,
|
|
"logits/chosen": -1.1875,
|
|
"logits/rejected": -1.3046875,
|
|
"logps/chosen": -250.5,
|
|
"logps/rejected": -273.0,
|
|
"loss": 0.3839,
|
|
"rewards/accuracies": 0.7265625,
|
|
"rewards/chosen": 1.1640625,
|
|
"rewards/margins": 2.03125,
|
|
"rewards/rejected": -0.861328125,
|
|
"step": 239
|
|
},
|
|
{
|
|
"epoch": 0.26548672566371684,
|
|
"grad_norm": 13.962175369262695,
|
|
"learning_rate": 4.317750330811972e-07,
|
|
"logits/chosen": -1.3671875,
|
|
"logits/rejected": -1.328125,
|
|
"logps/chosen": -250.5,
|
|
"logps/rejected": -275.0,
|
|
"loss": 0.3394,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 1.1875,
|
|
"rewards/margins": 1.91015625,
|
|
"rewards/rejected": -0.72265625,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 0.2665929203539823,
|
|
"grad_norm": 13.130892753601074,
|
|
"learning_rate": 4.311583397441696e-07,
|
|
"logits/chosen": -1.18359375,
|
|
"logits/rejected": -1.26171875,
|
|
"logps/chosen": -240.0,
|
|
"logps/rejected": -255.5,
|
|
"loss": 0.3364,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 1.25,
|
|
"rewards/margins": 2.203125,
|
|
"rewards/rejected": -0.9453125,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 0.2676991150442478,
|
|
"grad_norm": 15.227952003479004,
|
|
"learning_rate": 4.3053931644213e-07,
|
|
"logits/chosen": -1.23046875,
|
|
"logits/rejected": -1.1640625,
|
|
"logps/chosen": -261.0,
|
|
"logps/rejected": -269.5,
|
|
"loss": 0.4343,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": 1.15234375,
|
|
"rewards/margins": 1.65234375,
|
|
"rewards/rejected": -0.4970703125,
|
|
"step": 242
|
|
},
|
|
{
|
|
"epoch": 0.2688053097345133,
|
|
"grad_norm": 11.86292552947998,
|
|
"learning_rate": 4.2991797113663676e-07,
|
|
"logits/chosen": -1.24609375,
|
|
"logits/rejected": -1.1875,
|
|
"logps/chosen": -239.5,
|
|
"logps/rejected": -268.0,
|
|
"loss": 0.2865,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 1.3125,
|
|
"rewards/margins": 2.296875,
|
|
"rewards/rejected": -0.98046875,
|
|
"step": 243
|
|
},
|
|
{
|
|
"epoch": 0.26991150442477874,
|
|
"grad_norm": 12.915170669555664,
|
|
"learning_rate": 4.292943118191121e-07,
|
|
"logits/chosen": -1.19140625,
|
|
"logits/rejected": -1.23828125,
|
|
"logps/chosen": -243.0,
|
|
"logps/rejected": -257.0,
|
|
"loss": 0.3192,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 1.26953125,
|
|
"rewards/margins": 2.1328125,
|
|
"rewards/rejected": -0.869140625,
|
|
"step": 244
|
|
},
|
|
{
|
|
"epoch": 0.27101769911504425,
|
|
"grad_norm": 16.35938262939453,
|
|
"learning_rate": 4.2866834651074024e-07,
|
|
"logits/chosen": -1.16015625,
|
|
"logits/rejected": -1.12890625,
|
|
"logps/chosen": -283.0,
|
|
"logps/rejected": -308.0,
|
|
"loss": 0.3896,
|
|
"rewards/accuracies": 0.734375,
|
|
"rewards/chosen": 1.19921875,
|
|
"rewards/margins": 1.8046875,
|
|
"rewards/rejected": -0.607421875,
|
|
"step": 245
|
|
},
|
|
{
|
|
"epoch": 0.2721238938053097,
|
|
"grad_norm": 14.654645919799805,
|
|
"learning_rate": 4.280400832623636e-07,
|
|
"logits/chosen": -1.25,
|
|
"logits/rejected": -1.10546875,
|
|
"logps/chosen": -269.5,
|
|
"logps/rejected": -273.0,
|
|
"loss": 0.3785,
|
|
"rewards/accuracies": 0.7265625,
|
|
"rewards/chosen": 1.234375,
|
|
"rewards/margins": 1.95703125,
|
|
"rewards/rejected": -0.71484375,
|
|
"step": 246
|
|
},
|
|
{
|
|
"epoch": 0.27323008849557523,
|
|
"grad_norm": 12.577658653259277,
|
|
"learning_rate": 4.274095301543796e-07,
|
|
"logits/chosen": -1.4140625,
|
|
"logits/rejected": -1.234375,
|
|
"logps/chosen": -222.5,
|
|
"logps/rejected": -252.0,
|
|
"loss": 0.3402,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 1.33984375,
|
|
"rewards/margins": 2.06640625,
|
|
"rewards/rejected": -0.73046875,
|
|
"step": 247
|
|
},
|
|
{
|
|
"epoch": 0.2743362831858407,
|
|
"grad_norm": 13.634322166442871,
|
|
"learning_rate": 4.2677669529663686e-07,
|
|
"logits/chosen": -1.2578125,
|
|
"logits/rejected": -1.1640625,
|
|
"logps/chosen": -266.0,
|
|
"logps/rejected": -267.5,
|
|
"loss": 0.3221,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 1.41015625,
|
|
"rewards/margins": 2.2734375,
|
|
"rewards/rejected": -0.87109375,
|
|
"step": 248
|
|
},
|
|
{
|
|
"epoch": 0.2754424778761062,
|
|
"grad_norm": 14.120111465454102,
|
|
"learning_rate": 4.2614158682833037e-07,
|
|
"logits/chosen": -1.26953125,
|
|
"logits/rejected": -1.1328125,
|
|
"logps/chosen": -251.0,
|
|
"logps/rejected": -281.0,
|
|
"loss": 0.3739,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 1.32421875,
|
|
"rewards/margins": 1.90625,
|
|
"rewards/rejected": -0.5791015625,
|
|
"step": 249
|
|
},
|
|
{
|
|
"epoch": 0.27654867256637167,
|
|
"grad_norm": 14.189047813415527,
|
|
"learning_rate": 4.255042129178973e-07,
|
|
"logits/chosen": -1.27734375,
|
|
"logits/rejected": -1.21484375,
|
|
"logps/chosen": -237.0,
|
|
"logps/rejected": -268.0,
|
|
"loss": 0.3868,
|
|
"rewards/accuracies": 0.7421875,
|
|
"rewards/chosen": 1.14453125,
|
|
"rewards/margins": 1.9375,
|
|
"rewards/rejected": -0.794921875,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.27654867256637167,
|
|
"eval_logits/chosen": -1.2774020433425903,
|
|
"eval_logits/rejected": -1.207769751548767,
|
|
"eval_logps/chosen": -248.95523071289062,
|
|
"eval_logps/rejected": -267.1990051269531,
|
|
"eval_loss": 0.36062541604042053,
|
|
"eval_rewards/accuracies": 0.7771241068840027,
|
|
"eval_rewards/chosen": 1.1926889419555664,
|
|
"eval_rewards/margins": 1.988767147064209,
|
|
"eval_rewards/rejected": -0.7957963943481445,
|
|
"eval_runtime": 193.0793,
|
|
"eval_samples_per_second": 66.568,
|
|
"eval_steps_per_second": 1.041,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 0.2776548672566372,
|
|
"grad_norm": 14.157464981079102,
|
|
"learning_rate": 4.248645817629117e-07,
|
|
"logits/chosen": -1.40625,
|
|
"logits/rejected": -1.26171875,
|
|
"logps/chosen": -262.0,
|
|
"logps/rejected": -279.0,
|
|
"loss": 0.3588,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 1.033203125,
|
|
"rewards/margins": 1.91796875,
|
|
"rewards/rejected": -0.88671875,
|
|
"step": 251
|
|
},
|
|
{
|
|
"epoch": 0.27876106194690264,
|
|
"grad_norm": 12.822221755981445,
|
|
"learning_rate": 4.242227015899793e-07,
|
|
"logits/chosen": -1.2890625,
|
|
"logits/rejected": -1.19140625,
|
|
"logps/chosen": -245.5,
|
|
"logps/rejected": -273.0,
|
|
"loss": 0.3323,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 1.41796875,
|
|
"rewards/margins": 2.2734375,
|
|
"rewards/rejected": -0.853515625,
|
|
"step": 252
|
|
},
|
|
{
|
|
"epoch": 0.27986725663716816,
|
|
"grad_norm": 15.107699394226074,
|
|
"learning_rate": 4.2357858065463124e-07,
|
|
"logits/chosen": -1.3046875,
|
|
"logits/rejected": -1.13671875,
|
|
"logps/chosen": -243.5,
|
|
"logps/rejected": -275.0,
|
|
"loss": 0.4063,
|
|
"rewards/accuracies": 0.7421875,
|
|
"rewards/chosen": 1.2421875,
|
|
"rewards/margins": 1.88671875,
|
|
"rewards/rejected": -0.642578125,
|
|
"step": 253
|
|
},
|
|
{
|
|
"epoch": 0.2809734513274336,
|
|
"grad_norm": 14.644704818725586,
|
|
"learning_rate": 4.229322272412185e-07,
|
|
"logits/chosen": -1.2421875,
|
|
"logits/rejected": -1.203125,
|
|
"logps/chosen": -274.0,
|
|
"logps/rejected": -289.0,
|
|
"loss": 0.3511,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 1.009765625,
|
|
"rewards/margins": 1.98046875,
|
|
"rewards/rejected": -0.970703125,
|
|
"step": 254
|
|
},
|
|
{
|
|
"epoch": 0.28207964601769914,
|
|
"grad_norm": 14.453044891357422,
|
|
"learning_rate": 4.222836496628047e-07,
|
|
"logits/chosen": -1.31640625,
|
|
"logits/rejected": -1.21875,
|
|
"logps/chosen": -264.0,
|
|
"logps/rejected": -286.0,
|
|
"loss": 0.3342,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 1.21484375,
|
|
"rewards/margins": 1.9375,
|
|
"rewards/rejected": -0.72265625,
|
|
"step": 255
|
|
},
|
|
{
|
|
"epoch": 0.2831858407079646,
|
|
"grad_norm": 12.731569290161133,
|
|
"learning_rate": 4.216328562610599e-07,
|
|
"logits/chosen": -1.33203125,
|
|
"logits/rejected": -1.2109375,
|
|
"logps/chosen": -231.5,
|
|
"logps/rejected": -262.5,
|
|
"loss": 0.3542,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 1.06640625,
|
|
"rewards/margins": 2.12890625,
|
|
"rewards/rejected": -1.064453125,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 0.2842920353982301,
|
|
"grad_norm": 14.310720443725586,
|
|
"learning_rate": 4.209798554061527e-07,
|
|
"logits/chosen": -1.2578125,
|
|
"logits/rejected": -1.21484375,
|
|
"logps/chosen": -258.0,
|
|
"logps/rejected": -282.0,
|
|
"loss": 0.3884,
|
|
"rewards/accuracies": 0.7578125,
|
|
"rewards/chosen": 0.966796875,
|
|
"rewards/margins": 1.828125,
|
|
"rewards/rejected": -0.861328125,
|
|
"step": 257
|
|
},
|
|
{
|
|
"epoch": 0.2853982300884956,
|
|
"grad_norm": 14.716500282287598,
|
|
"learning_rate": 4.203246554966428e-07,
|
|
"logits/chosen": -1.1796875,
|
|
"logits/rejected": -1.3046875,
|
|
"logps/chosen": -243.0,
|
|
"logps/rejected": -253.0,
|
|
"loss": 0.4139,
|
|
"rewards/accuracies": 0.71875,
|
|
"rewards/chosen": 0.962890625,
|
|
"rewards/margins": 1.71484375,
|
|
"rewards/rejected": -0.75,
|
|
"step": 258
|
|
},
|
|
{
|
|
"epoch": 0.28650442477876104,
|
|
"grad_norm": 14.436864852905273,
|
|
"learning_rate": 4.1966726495937305e-07,
|
|
"logits/chosen": -1.42578125,
|
|
"logits/rejected": -1.20703125,
|
|
"logps/chosen": -252.5,
|
|
"logps/rejected": -273.0,
|
|
"loss": 0.3439,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 1.08984375,
|
|
"rewards/margins": 1.95703125,
|
|
"rewards/rejected": -0.86328125,
|
|
"step": 259
|
|
},
|
|
{
|
|
"epoch": 0.28761061946902655,
|
|
"grad_norm": 15.182847023010254,
|
|
"learning_rate": 4.1900769224936124e-07,
|
|
"logits/chosen": -1.1953125,
|
|
"logits/rejected": -1.19140625,
|
|
"logps/chosen": -286.0,
|
|
"logps/rejected": -310.0,
|
|
"loss": 0.3774,
|
|
"rewards/accuracies": 0.7109375,
|
|
"rewards/chosen": 0.96875,
|
|
"rewards/margins": 1.99609375,
|
|
"rewards/rejected": -1.03125,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 0.288716814159292,
|
|
"grad_norm": 13.360356330871582,
|
|
"learning_rate": 4.1834594584969077e-07,
|
|
"logits/chosen": -1.25390625,
|
|
"logits/rejected": -1.2109375,
|
|
"logps/chosen": -248.5,
|
|
"logps/rejected": -266.0,
|
|
"loss": 0.3638,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 1.115234375,
|
|
"rewards/margins": 1.9140625,
|
|
"rewards/rejected": -0.802734375,
|
|
"step": 261
|
|
},
|
|
{
|
|
"epoch": 0.28982300884955753,
|
|
"grad_norm": 13.982027053833008,
|
|
"learning_rate": 4.176820342714022e-07,
|
|
"logits/chosen": -1.39453125,
|
|
"logits/rejected": -1.26171875,
|
|
"logps/chosen": -259.0,
|
|
"logps/rejected": -281.0,
|
|
"loss": 0.3449,
|
|
"rewards/accuracies": 0.7578125,
|
|
"rewards/chosen": 1.068359375,
|
|
"rewards/margins": 1.99609375,
|
|
"rewards/rejected": -0.921875,
|
|
"step": 262
|
|
},
|
|
{
|
|
"epoch": 0.290929203539823,
|
|
"grad_norm": 13.159867286682129,
|
|
"learning_rate": 4.1701596605338334e-07,
|
|
"logits/chosen": -1.40234375,
|
|
"logits/rejected": -1.234375,
|
|
"logps/chosen": -242.5,
|
|
"logps/rejected": -271.0,
|
|
"loss": 0.3395,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 1.25,
|
|
"rewards/margins": 1.99609375,
|
|
"rewards/rejected": -0.751953125,
|
|
"step": 263
|
|
},
|
|
{
|
|
"epoch": 0.2920353982300885,
|
|
"grad_norm": 12.9893798828125,
|
|
"learning_rate": 4.1634774976225965e-07,
|
|
"logits/chosen": -1.35546875,
|
|
"logits/rejected": -1.2109375,
|
|
"logps/chosen": -234.5,
|
|
"logps/rejected": -277.0,
|
|
"loss": 0.3156,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 1.171875,
|
|
"rewards/margins": 2.2421875,
|
|
"rewards/rejected": -1.0703125,
|
|
"step": 264
|
|
},
|
|
{
|
|
"epoch": 0.29314159292035397,
|
|
"grad_norm": 13.78116226196289,
|
|
"learning_rate": 4.15677393992284e-07,
|
|
"logits/chosen": -1.2890625,
|
|
"logits/rejected": -1.2578125,
|
|
"logps/chosen": -253.5,
|
|
"logps/rejected": -279.0,
|
|
"loss": 0.3418,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 1.20703125,
|
|
"rewards/margins": 2.171875,
|
|
"rewards/rejected": -0.966796875,
|
|
"step": 265
|
|
},
|
|
{
|
|
"epoch": 0.2942477876106195,
|
|
"grad_norm": 14.935332298278809,
|
|
"learning_rate": 4.150049073652261e-07,
|
|
"logits/chosen": -1.31640625,
|
|
"logits/rejected": -1.15625,
|
|
"logps/chosen": -265.0,
|
|
"logps/rejected": -291.0,
|
|
"loss": 0.3503,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 1.21875,
|
|
"rewards/margins": 2.1640625,
|
|
"rewards/rejected": -0.94140625,
|
|
"step": 266
|
|
},
|
|
{
|
|
"epoch": 0.29535398230088494,
|
|
"grad_norm": 15.937322616577148,
|
|
"learning_rate": 4.1433029853026163e-07,
|
|
"logits/chosen": -1.31640625,
|
|
"logits/rejected": -1.203125,
|
|
"logps/chosen": -245.5,
|
|
"logps/rejected": -294.0,
|
|
"loss": 0.3923,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 1.0625,
|
|
"rewards/margins": 1.91796875,
|
|
"rewards/rejected": -0.85546875,
|
|
"step": 267
|
|
},
|
|
{
|
|
"epoch": 0.29646017699115046,
|
|
"grad_norm": 14.759867668151855,
|
|
"learning_rate": 4.136535761638611e-07,
|
|
"logits/chosen": -1.2890625,
|
|
"logits/rejected": -1.09375,
|
|
"logps/chosen": -276.0,
|
|
"logps/rejected": -295.0,
|
|
"loss": 0.356,
|
|
"rewards/accuracies": 0.7578125,
|
|
"rewards/chosen": 0.98828125,
|
|
"rewards/margins": 1.98828125,
|
|
"rewards/rejected": -1.001953125,
|
|
"step": 268
|
|
},
|
|
{
|
|
"epoch": 0.2975663716814159,
|
|
"grad_norm": 13.723134994506836,
|
|
"learning_rate": 4.129747489696781e-07,
|
|
"logits/chosen": -1.23828125,
|
|
"logits/rejected": -1.12890625,
|
|
"logps/chosen": -252.5,
|
|
"logps/rejected": -246.5,
|
|
"loss": 0.3215,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 1.06640625,
|
|
"rewards/margins": 2.3671875,
|
|
"rewards/rejected": -1.29296875,
|
|
"step": 269
|
|
},
|
|
{
|
|
"epoch": 0.29867256637168144,
|
|
"grad_norm": 12.263731002807617,
|
|
"learning_rate": 4.122938256784374e-07,
|
|
"logits/chosen": -1.3671875,
|
|
"logits/rejected": -1.30859375,
|
|
"logps/chosen": -216.5,
|
|
"logps/rejected": -275.0,
|
|
"loss": 0.3189,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 1.16796875,
|
|
"rewards/margins": 2.265625,
|
|
"rewards/rejected": -1.09375,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 0.2997787610619469,
|
|
"grad_norm": 15.063496589660645,
|
|
"learning_rate": 4.116108150478228e-07,
|
|
"logits/chosen": -1.1875,
|
|
"logits/rejected": -1.125,
|
|
"logps/chosen": -255.5,
|
|
"logps/rejected": -256.5,
|
|
"loss": 0.3799,
|
|
"rewards/accuracies": 0.7421875,
|
|
"rewards/chosen": 0.818359375,
|
|
"rewards/margins": 1.7890625,
|
|
"rewards/rejected": -0.974609375,
|
|
"step": 271
|
|
},
|
|
{
|
|
"epoch": 0.3008849557522124,
|
|
"grad_norm": 15.24313735961914,
|
|
"learning_rate": 4.109257258623643e-07,
|
|
"logits/chosen": -1.2265625,
|
|
"logits/rejected": -1.09765625,
|
|
"logps/chosen": -238.5,
|
|
"logps/rejected": -274.0,
|
|
"loss": 0.3779,
|
|
"rewards/accuracies": 0.7578125,
|
|
"rewards/chosen": 1.15625,
|
|
"rewards/margins": 2.1875,
|
|
"rewards/rejected": -1.03125,
|
|
"step": 272
|
|
},
|
|
{
|
|
"epoch": 0.3019911504424779,
|
|
"grad_norm": 14.098630905151367,
|
|
"learning_rate": 4.1023856693332516e-07,
|
|
"logits/chosen": -1.3203125,
|
|
"logits/rejected": -1.17578125,
|
|
"logps/chosen": -248.0,
|
|
"logps/rejected": -272.0,
|
|
"loss": 0.3197,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 0.982421875,
|
|
"rewards/margins": 2.25,
|
|
"rewards/rejected": -1.26953125,
|
|
"step": 273
|
|
},
|
|
{
|
|
"epoch": 0.3030973451327434,
|
|
"grad_norm": 13.230525970458984,
|
|
"learning_rate": 4.0954934709858857e-07,
|
|
"logits/chosen": -1.234375,
|
|
"logits/rejected": -1.16796875,
|
|
"logps/chosen": -268.0,
|
|
"logps/rejected": -287.0,
|
|
"loss": 0.3215,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 1.15234375,
|
|
"rewards/margins": 2.125,
|
|
"rewards/rejected": -0.97265625,
|
|
"step": 274
|
|
},
|
|
{
|
|
"epoch": 0.30420353982300885,
|
|
"grad_norm": 12.722634315490723,
|
|
"learning_rate": 4.0885807522254433e-07,
|
|
"logits/chosen": -1.375,
|
|
"logits/rejected": -1.25390625,
|
|
"logps/chosen": -256.5,
|
|
"logps/rejected": -319.0,
|
|
"loss": 0.3175,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 1.37890625,
|
|
"rewards/margins": 2.3515625,
|
|
"rewards/rejected": -0.96875,
|
|
"step": 275
|
|
},
|
|
{
|
|
"epoch": 0.3053097345132743,
|
|
"grad_norm": 12.688482284545898,
|
|
"learning_rate": 4.0816476019597423e-07,
|
|
"logits/chosen": -1.41015625,
|
|
"logits/rejected": -1.2890625,
|
|
"logps/chosen": -235.5,
|
|
"logps/rejected": -256.0,
|
|
"loss": 0.3222,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 1.28515625,
|
|
"rewards/margins": 2.20703125,
|
|
"rewards/rejected": -0.92578125,
|
|
"step": 276
|
|
},
|
|
{
|
|
"epoch": 0.3064159292035398,
|
|
"grad_norm": 14.044715881347656,
|
|
"learning_rate": 4.0746941093593807e-07,
|
|
"logits/chosen": -1.2578125,
|
|
"logits/rejected": -1.27734375,
|
|
"logps/chosen": -249.0,
|
|
"logps/rejected": -295.0,
|
|
"loss": 0.2954,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 1.359375,
|
|
"rewards/margins": 2.3671875,
|
|
"rewards/rejected": -1.009765625,
|
|
"step": 277
|
|
},
|
|
{
|
|
"epoch": 0.3075221238938053,
|
|
"grad_norm": 15.867609024047852,
|
|
"learning_rate": 4.0677203638565893e-07,
|
|
"logits/chosen": -1.36328125,
|
|
"logits/rejected": -1.25390625,
|
|
"logps/chosen": -260.0,
|
|
"logps/rejected": -275.0,
|
|
"loss": 0.3278,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 1.19921875,
|
|
"rewards/margins": 2.296875,
|
|
"rewards/rejected": -1.10546875,
|
|
"step": 278
|
|
},
|
|
{
|
|
"epoch": 0.3086283185840708,
|
|
"grad_norm": 16.124387741088867,
|
|
"learning_rate": 4.060726455144082e-07,
|
|
"logits/chosen": -1.25390625,
|
|
"logits/rejected": -1.16015625,
|
|
"logps/chosen": -240.5,
|
|
"logps/rejected": -281.0,
|
|
"loss": 0.3936,
|
|
"rewards/accuracies": 0.71875,
|
|
"rewards/chosen": 1.025390625,
|
|
"rewards/margins": 1.96484375,
|
|
"rewards/rejected": -0.9375,
|
|
"step": 279
|
|
},
|
|
{
|
|
"epoch": 0.30973451327433627,
|
|
"grad_norm": 14.164496421813965,
|
|
"learning_rate": 4.0537124731739003e-07,
|
|
"logits/chosen": -1.34765625,
|
|
"logits/rejected": -1.1953125,
|
|
"logps/chosen": -250.0,
|
|
"logps/rejected": -270.0,
|
|
"loss": 0.3594,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 1.005859375,
|
|
"rewards/margins": 2.02734375,
|
|
"rewards/rejected": -1.02734375,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 0.3108407079646018,
|
|
"grad_norm": 14.754056930541992,
|
|
"learning_rate": 4.0466785081562583e-07,
|
|
"logits/chosen": -1.234375,
|
|
"logits/rejected": -1.2265625,
|
|
"logps/chosen": -258.5,
|
|
"logps/rejected": -247.0,
|
|
"loss": 0.3625,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 1.095703125,
|
|
"rewards/margins": 2.06640625,
|
|
"rewards/rejected": -0.970703125,
|
|
"step": 281
|
|
},
|
|
{
|
|
"epoch": 0.31194690265486724,
|
|
"grad_norm": 14.682291030883789,
|
|
"learning_rate": 4.039624650558382e-07,
|
|
"logits/chosen": -1.13671875,
|
|
"logits/rejected": -1.21484375,
|
|
"logps/chosen": -239.0,
|
|
"logps/rejected": -265.5,
|
|
"loss": 0.3439,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 1.28125,
|
|
"rewards/margins": 2.2890625,
|
|
"rewards/rejected": -1.009765625,
|
|
"step": 282
|
|
},
|
|
{
|
|
"epoch": 0.31305309734513276,
|
|
"grad_norm": 13.215510368347168,
|
|
"learning_rate": 4.032550991103344e-07,
|
|
"logits/chosen": -1.3984375,
|
|
"logits/rejected": -1.30859375,
|
|
"logps/chosen": -218.5,
|
|
"logps/rejected": -263.5,
|
|
"loss": 0.3302,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 1.0078125,
|
|
"rewards/margins": 2.03515625,
|
|
"rewards/rejected": -1.029296875,
|
|
"step": 283
|
|
},
|
|
{
|
|
"epoch": 0.3141592920353982,
|
|
"grad_norm": 14.175792694091797,
|
|
"learning_rate": 4.0254576207689004e-07,
|
|
"logits/chosen": -1.23046875,
|
|
"logits/rejected": -1.21484375,
|
|
"logps/chosen": -268.0,
|
|
"logps/rejected": -307.0,
|
|
"loss": 0.3466,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 1.083984375,
|
|
"rewards/margins": 2.0703125,
|
|
"rewards/rejected": -0.986328125,
|
|
"step": 284
|
|
},
|
|
{
|
|
"epoch": 0.31526548672566373,
|
|
"grad_norm": 15.70940113067627,
|
|
"learning_rate": 4.0183446307863174e-07,
|
|
"logits/chosen": -1.359375,
|
|
"logits/rejected": -1.20703125,
|
|
"logps/chosen": -249.0,
|
|
"logps/rejected": -281.0,
|
|
"loss": 0.3759,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 0.927734375,
|
|
"rewards/margins": 1.94921875,
|
|
"rewards/rejected": -1.015625,
|
|
"step": 285
|
|
},
|
|
{
|
|
"epoch": 0.3163716814159292,
|
|
"grad_norm": 14.439340591430664,
|
|
"learning_rate": 4.0112121126391967e-07,
|
|
"logits/chosen": -1.26953125,
|
|
"logits/rejected": -1.26171875,
|
|
"logps/chosen": -278.0,
|
|
"logps/rejected": -298.0,
|
|
"loss": 0.3487,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 1.2421875,
|
|
"rewards/margins": 2.3515625,
|
|
"rewards/rejected": -1.109375,
|
|
"step": 286
|
|
},
|
|
{
|
|
"epoch": 0.3174778761061947,
|
|
"grad_norm": 13.696681022644043,
|
|
"learning_rate": 4.0040601580623054e-07,
|
|
"logits/chosen": -1.3125,
|
|
"logits/rejected": -1.3515625,
|
|
"logps/chosen": -236.0,
|
|
"logps/rejected": -246.0,
|
|
"loss": 0.3344,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 1.181640625,
|
|
"rewards/margins": 2.3515625,
|
|
"rewards/rejected": -1.16796875,
|
|
"step": 287
|
|
},
|
|
{
|
|
"epoch": 0.3185840707964602,
|
|
"grad_norm": 13.659770011901855,
|
|
"learning_rate": 3.9968888590403904e-07,
|
|
"logits/chosen": -1.28125,
|
|
"logits/rejected": -1.3828125,
|
|
"logps/chosen": -248.5,
|
|
"logps/rejected": -280.0,
|
|
"loss": 0.3278,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 1.33203125,
|
|
"rewards/margins": 2.375,
|
|
"rewards/rejected": -1.046875,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 0.3196902654867257,
|
|
"grad_norm": 12.041626930236816,
|
|
"learning_rate": 3.9896983078069947e-07,
|
|
"logits/chosen": -1.3203125,
|
|
"logits/rejected": -1.16796875,
|
|
"logps/chosen": -245.0,
|
|
"logps/rejected": -273.5,
|
|
"loss": 0.3141,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 1.22265625,
|
|
"rewards/margins": 2.296875,
|
|
"rewards/rejected": -1.078125,
|
|
"step": 289
|
|
},
|
|
{
|
|
"epoch": 0.32079646017699115,
|
|
"grad_norm": 14.61534595489502,
|
|
"learning_rate": 3.9824885968432755e-07,
|
|
"logits/chosen": -1.31640625,
|
|
"logits/rejected": -1.1875,
|
|
"logps/chosen": -241.5,
|
|
"logps/rejected": -251.0,
|
|
"loss": 0.3742,
|
|
"rewards/accuracies": 0.7421875,
|
|
"rewards/chosen": 1.05859375,
|
|
"rewards/margins": 2.0859375,
|
|
"rewards/rejected": -1.025390625,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 0.3219026548672566,
|
|
"grad_norm": 13.926555633544922,
|
|
"learning_rate": 3.975259818876811e-07,
|
|
"logits/chosen": -1.31640625,
|
|
"logits/rejected": -1.2578125,
|
|
"logps/chosen": -262.0,
|
|
"logps/rejected": -259.0,
|
|
"loss": 0.298,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 1.2265625,
|
|
"rewards/margins": 2.328125,
|
|
"rewards/rejected": -1.1015625,
|
|
"step": 291
|
|
},
|
|
{
|
|
"epoch": 0.3230088495575221,
|
|
"grad_norm": 12.315802574157715,
|
|
"learning_rate": 3.968012066880412e-07,
|
|
"logits/chosen": -1.27734375,
|
|
"logits/rejected": -1.2265625,
|
|
"logps/chosen": -259.0,
|
|
"logps/rejected": -267.0,
|
|
"loss": 0.3022,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 1.2109375,
|
|
"rewards/margins": 2.515625,
|
|
"rewards/rejected": -1.30078125,
|
|
"step": 292
|
|
},
|
|
{
|
|
"epoch": 0.3241150442477876,
|
|
"grad_norm": 12.437846183776855,
|
|
"learning_rate": 3.960745434070921e-07,
|
|
"logits/chosen": -1.19921875,
|
|
"logits/rejected": -1.06640625,
|
|
"logps/chosen": -256.5,
|
|
"logps/rejected": -281.0,
|
|
"loss": 0.3422,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.744140625,
|
|
"rewards/margins": 1.89453125,
|
|
"rewards/rejected": -1.1484375,
|
|
"step": 293
|
|
},
|
|
{
|
|
"epoch": 0.3252212389380531,
|
|
"grad_norm": 13.978434562683105,
|
|
"learning_rate": 3.9534600139080163e-07,
|
|
"logits/chosen": -1.2109375,
|
|
"logits/rejected": -1.140625,
|
|
"logps/chosen": -237.0,
|
|
"logps/rejected": -274.0,
|
|
"loss": 0.366,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.888671875,
|
|
"rewards/margins": 2.2109375,
|
|
"rewards/rejected": -1.32421875,
|
|
"step": 294
|
|
},
|
|
{
|
|
"epoch": 0.32632743362831856,
|
|
"grad_norm": 17.65538787841797,
|
|
"learning_rate": 3.94615590009301e-07,
|
|
"logits/chosen": -1.2578125,
|
|
"logits/rejected": -1.25390625,
|
|
"logps/chosen": -264.0,
|
|
"logps/rejected": -285.0,
|
|
"loss": 0.4392,
|
|
"rewards/accuracies": 0.7109375,
|
|
"rewards/chosen": 0.931640625,
|
|
"rewards/margins": 1.875,
|
|
"rewards/rejected": -0.947265625,
|
|
"step": 295
|
|
},
|
|
{
|
|
"epoch": 0.3274336283185841,
|
|
"grad_norm": 12.776206016540527,
|
|
"learning_rate": 3.9388331865676425e-07,
|
|
"logits/chosen": -1.1796875,
|
|
"logits/rejected": -1.234375,
|
|
"logps/chosen": -246.5,
|
|
"logps/rejected": -260.0,
|
|
"loss": 0.2823,
|
|
"rewards/accuracies": 0.875,
|
|
"rewards/chosen": 1.0234375,
|
|
"rewards/margins": 2.453125,
|
|
"rewards/rejected": -1.42578125,
|
|
"step": 296
|
|
},
|
|
{
|
|
"epoch": 0.32853982300884954,
|
|
"grad_norm": 15.579447746276855,
|
|
"learning_rate": 3.931491967512872e-07,
|
|
"logits/chosen": -1.3984375,
|
|
"logits/rejected": -1.2734375,
|
|
"logps/chosen": -252.5,
|
|
"logps/rejected": -283.0,
|
|
"loss": 0.3896,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 0.94140625,
|
|
"rewards/margins": 1.859375,
|
|
"rewards/rejected": -0.91796875,
|
|
"step": 297
|
|
},
|
|
{
|
|
"epoch": 0.32964601769911506,
|
|
"grad_norm": 13.556859970092773,
|
|
"learning_rate": 3.9241323373476686e-07,
|
|
"logits/chosen": -1.16015625,
|
|
"logits/rejected": -1.125,
|
|
"logps/chosen": -258.0,
|
|
"logps/rejected": -265.0,
|
|
"loss": 0.3322,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 1.1015625,
|
|
"rewards/margins": 2.296875,
|
|
"rewards/rejected": -1.1953125,
|
|
"step": 298
|
|
},
|
|
{
|
|
"epoch": 0.3307522123893805,
|
|
"grad_norm": 12.573343276977539,
|
|
"learning_rate": 3.916754390727794e-07,
|
|
"logits/chosen": -1.203125,
|
|
"logits/rejected": -1.15234375,
|
|
"logps/chosen": -251.0,
|
|
"logps/rejected": -285.0,
|
|
"loss": 0.2524,
|
|
"rewards/accuracies": 0.875,
|
|
"rewards/chosen": 1.375,
|
|
"rewards/margins": 2.71875,
|
|
"rewards/rejected": -1.34765625,
|
|
"step": 299
|
|
},
|
|
{
|
|
"epoch": 0.33185840707964603,
|
|
"grad_norm": 13.832542419433594,
|
|
"learning_rate": 3.9093582225445877e-07,
|
|
"logits/chosen": -1.28125,
|
|
"logits/rejected": -1.1953125,
|
|
"logps/chosen": -263.0,
|
|
"logps/rejected": -283.5,
|
|
"loss": 0.3695,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 0.689453125,
|
|
"rewards/margins": 1.85546875,
|
|
"rewards/rejected": -1.1640625,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.33185840707964603,
|
|
"eval_logits/chosen": -1.2725435495376587,
|
|
"eval_logits/rejected": -1.1930581331253052,
|
|
"eval_logps/chosen": -251.15921020507812,
|
|
"eval_logps/rejected": -271.43780517578125,
|
|
"eval_loss": 0.35059425234794617,
|
|
"eval_rewards/accuracies": 0.7869349718093872,
|
|
"eval_rewards/chosen": 0.9655628204345703,
|
|
"eval_rewards/margins": 2.1816697120666504,
|
|
"eval_rewards/rejected": -1.2169232368469238,
|
|
"eval_runtime": 193.0334,
|
|
"eval_samples_per_second": 66.584,
|
|
"eval_steps_per_second": 1.041,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 0.3329646017699115,
|
|
"grad_norm": 13.506171226501465,
|
|
"learning_rate": 3.901943927923744e-07,
|
|
"logits/chosen": -1.2578125,
|
|
"logits/rejected": -1.15234375,
|
|
"logps/chosen": -258.5,
|
|
"logps/rejected": -279.0,
|
|
"loss": 0.3567,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 0.908203125,
|
|
"rewards/margins": 2.03125,
|
|
"rewards/rejected": -1.119140625,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 0.334070796460177,
|
|
"grad_norm": 14.112154960632324,
|
|
"learning_rate": 3.8945116022240937e-07,
|
|
"logits/chosen": -1.18359375,
|
|
"logits/rejected": -1.08203125,
|
|
"logps/chosen": -268.0,
|
|
"logps/rejected": -313.0,
|
|
"loss": 0.3424,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.9765625,
|
|
"rewards/margins": 2.2109375,
|
|
"rewards/rejected": -1.23046875,
|
|
"step": 302
|
|
},
|
|
{
|
|
"epoch": 0.33517699115044247,
|
|
"grad_norm": 13.437678337097168,
|
|
"learning_rate": 3.8870613410363707e-07,
|
|
"logits/chosen": -1.26171875,
|
|
"logits/rejected": -1.18359375,
|
|
"logps/chosen": -269.0,
|
|
"logps/rejected": -273.5,
|
|
"loss": 0.361,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.830078125,
|
|
"rewards/margins": 1.97265625,
|
|
"rewards/rejected": -1.14453125,
|
|
"step": 303
|
|
},
|
|
{
|
|
"epoch": 0.336283185840708,
|
|
"grad_norm": 14.396577835083008,
|
|
"learning_rate": 3.8795932401819863e-07,
|
|
"logits/chosen": -1.27734375,
|
|
"logits/rejected": -1.1171875,
|
|
"logps/chosen": -272.0,
|
|
"logps/rejected": -286.0,
|
|
"loss": 0.3308,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.861328125,
|
|
"rewards/margins": 2.1796875,
|
|
"rewards/rejected": -1.31640625,
|
|
"step": 304
|
|
},
|
|
{
|
|
"epoch": 0.33738938053097345,
|
|
"grad_norm": 15.585335731506348,
|
|
"learning_rate": 3.872107395711798e-07,
|
|
"logits/chosen": -1.30859375,
|
|
"logits/rejected": -1.08984375,
|
|
"logps/chosen": -280.0,
|
|
"logps/rejected": -337.0,
|
|
"loss": 0.369,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": 0.9921875,
|
|
"rewards/margins": 1.98046875,
|
|
"rewards/rejected": -0.984375,
|
|
"step": 305
|
|
},
|
|
{
|
|
"epoch": 0.33849557522123896,
|
|
"grad_norm": 16.997081756591797,
|
|
"learning_rate": 3.864603903904871e-07,
|
|
"logits/chosen": -1.27734375,
|
|
"logits/rejected": -1.171875,
|
|
"logps/chosen": -283.0,
|
|
"logps/rejected": -291.0,
|
|
"loss": 0.3989,
|
|
"rewards/accuracies": 0.734375,
|
|
"rewards/chosen": 0.982421875,
|
|
"rewards/margins": 2.2421875,
|
|
"rewards/rejected": -1.2578125,
|
|
"step": 306
|
|
},
|
|
{
|
|
"epoch": 0.3396017699115044,
|
|
"grad_norm": 16.18097496032715,
|
|
"learning_rate": 3.857082861267242e-07,
|
|
"logits/chosen": -1.265625,
|
|
"logits/rejected": -1.23828125,
|
|
"logps/chosen": -249.0,
|
|
"logps/rejected": -266.0,
|
|
"loss": 0.402,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 0.83984375,
|
|
"rewards/margins": 1.9921875,
|
|
"rewards/rejected": -1.15234375,
|
|
"step": 307
|
|
},
|
|
{
|
|
"epoch": 0.3407079646017699,
|
|
"grad_norm": 13.513619422912598,
|
|
"learning_rate": 3.849544364530677e-07,
|
|
"logits/chosen": -1.32421875,
|
|
"logits/rejected": -1.2421875,
|
|
"logps/chosen": -264.5,
|
|
"logps/rejected": -273.0,
|
|
"loss": 0.2981,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.98046875,
|
|
"rewards/margins": 2.4921875,
|
|
"rewards/rejected": -1.515625,
|
|
"step": 308
|
|
},
|
|
{
|
|
"epoch": 0.3418141592920354,
|
|
"grad_norm": 15.130499839782715,
|
|
"learning_rate": 3.8419885106514295e-07,
|
|
"logits/chosen": -1.23828125,
|
|
"logits/rejected": -1.18359375,
|
|
"logps/chosen": -271.5,
|
|
"logps/rejected": -284.0,
|
|
"loss": 0.3542,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.962890625,
|
|
"rewards/margins": 2.1484375,
|
|
"rewards/rejected": -1.1875,
|
|
"step": 309
|
|
},
|
|
{
|
|
"epoch": 0.34292035398230086,
|
|
"grad_norm": 18.18540382385254,
|
|
"learning_rate": 3.834415396808988e-07,
|
|
"logits/chosen": -1.27734375,
|
|
"logits/rejected": -1.1640625,
|
|
"logps/chosen": -251.0,
|
|
"logps/rejected": -289.0,
|
|
"loss": 0.3976,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.78515625,
|
|
"rewards/margins": 2.03515625,
|
|
"rewards/rejected": -1.24609375,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 0.3440265486725664,
|
|
"grad_norm": 13.235279083251953,
|
|
"learning_rate": 3.826825120404833e-07,
|
|
"logits/chosen": -1.14453125,
|
|
"logits/rejected": -1.18359375,
|
|
"logps/chosen": -265.0,
|
|
"logps/rejected": -271.0,
|
|
"loss": 0.3018,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 0.990234375,
|
|
"rewards/margins": 2.40625,
|
|
"rewards/rejected": -1.4140625,
|
|
"step": 311
|
|
},
|
|
{
|
|
"epoch": 0.34513274336283184,
|
|
"grad_norm": 14.219352722167969,
|
|
"learning_rate": 3.81921777906118e-07,
|
|
"logits/chosen": -1.34375,
|
|
"logits/rejected": -1.16796875,
|
|
"logps/chosen": -243.0,
|
|
"logps/rejected": -265.0,
|
|
"loss": 0.3433,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 1.10546875,
|
|
"rewards/margins": 2.3203125,
|
|
"rewards/rejected": -1.2109375,
|
|
"step": 312
|
|
},
|
|
{
|
|
"epoch": 0.34623893805309736,
|
|
"grad_norm": 14.085458755493164,
|
|
"learning_rate": 3.8115934706197244e-07,
|
|
"logits/chosen": -1.3046875,
|
|
"logits/rejected": -1.234375,
|
|
"logps/chosen": -263.0,
|
|
"logps/rejected": -260.0,
|
|
"loss": 0.3526,
|
|
"rewards/accuracies": 0.7421875,
|
|
"rewards/chosen": 1.12890625,
|
|
"rewards/margins": 2.17578125,
|
|
"rewards/rejected": -1.046875,
|
|
"step": 313
|
|
},
|
|
{
|
|
"epoch": 0.3473451327433628,
|
|
"grad_norm": 13.884740829467773,
|
|
"learning_rate": 3.8039522931403847e-07,
|
|
"logits/chosen": -1.4375,
|
|
"logits/rejected": -1.22265625,
|
|
"logps/chosen": -257.0,
|
|
"logps/rejected": -274.5,
|
|
"loss": 0.3197,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 1.1328125,
|
|
"rewards/margins": 2.29296875,
|
|
"rewards/rejected": -1.15625,
|
|
"step": 314
|
|
},
|
|
{
|
|
"epoch": 0.34845132743362833,
|
|
"grad_norm": 15.969679832458496,
|
|
"learning_rate": 3.7962943449000377e-07,
|
|
"logits/chosen": -1.1640625,
|
|
"logits/rejected": -1.1484375,
|
|
"logps/chosen": -260.0,
|
|
"logps/rejected": -283.0,
|
|
"loss": 0.4191,
|
|
"rewards/accuracies": 0.71875,
|
|
"rewards/chosen": 0.90625,
|
|
"rewards/margins": 1.7890625,
|
|
"rewards/rejected": -0.880859375,
|
|
"step": 315
|
|
},
|
|
{
|
|
"epoch": 0.3495575221238938,
|
|
"grad_norm": 14.207815170288086,
|
|
"learning_rate": 3.7886197243912607e-07,
|
|
"logits/chosen": -1.22265625,
|
|
"logits/rejected": -1.1015625,
|
|
"logps/chosen": -256.0,
|
|
"logps/rejected": -279.0,
|
|
"loss": 0.3409,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 1.19140625,
|
|
"rewards/margins": 2.34375,
|
|
"rewards/rejected": -1.15625,
|
|
"step": 316
|
|
},
|
|
{
|
|
"epoch": 0.3506637168141593,
|
|
"grad_norm": 14.867213249206543,
|
|
"learning_rate": 3.7809285303210593e-07,
|
|
"logits/chosen": -1.26953125,
|
|
"logits/rejected": -1.09375,
|
|
"logps/chosen": -248.5,
|
|
"logps/rejected": -246.0,
|
|
"loss": 0.3668,
|
|
"rewards/accuracies": 0.7421875,
|
|
"rewards/chosen": 1.06640625,
|
|
"rewards/margins": 2.0,
|
|
"rewards/rejected": -0.93359375,
|
|
"step": 317
|
|
},
|
|
{
|
|
"epoch": 0.35176991150442477,
|
|
"grad_norm": 13.388772964477539,
|
|
"learning_rate": 3.7732208616095986e-07,
|
|
"logits/chosen": -1.12890625,
|
|
"logits/rejected": -1.1484375,
|
|
"logps/chosen": -249.0,
|
|
"logps/rejected": -276.0,
|
|
"loss": 0.3055,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 1.44140625,
|
|
"rewards/margins": 2.578125,
|
|
"rewards/rejected": -1.13671875,
|
|
"step": 318
|
|
},
|
|
{
|
|
"epoch": 0.3528761061946903,
|
|
"grad_norm": 13.385259628295898,
|
|
"learning_rate": 3.7654968173889334e-07,
|
|
"logits/chosen": -1.34375,
|
|
"logits/rejected": -1.14453125,
|
|
"logps/chosen": -240.5,
|
|
"logps/rejected": -279.0,
|
|
"loss": 0.3375,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 1.28125,
|
|
"rewards/margins": 2.4765625,
|
|
"rewards/rejected": -1.19140625,
|
|
"step": 319
|
|
},
|
|
{
|
|
"epoch": 0.35398230088495575,
|
|
"grad_norm": 13.495279312133789,
|
|
"learning_rate": 3.7577564970017336e-07,
|
|
"logits/chosen": -1.3203125,
|
|
"logits/rejected": -1.12890625,
|
|
"logps/chosen": -237.5,
|
|
"logps/rejected": -251.0,
|
|
"loss": 0.3125,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 1.16796875,
|
|
"rewards/margins": 2.546875,
|
|
"rewards/rejected": -1.37890625,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 0.35508849557522126,
|
|
"grad_norm": 14.151552200317383,
|
|
"learning_rate": 3.75e-07,
|
|
"logits/chosen": -1.28515625,
|
|
"logits/rejected": -1.1171875,
|
|
"logps/chosen": -251.0,
|
|
"logps/rejected": -281.0,
|
|
"loss": 0.3316,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 1.12109375,
|
|
"rewards/margins": 2.3125,
|
|
"rewards/rejected": -1.19140625,
|
|
"step": 321
|
|
},
|
|
{
|
|
"epoch": 0.3561946902654867,
|
|
"grad_norm": 13.408705711364746,
|
|
"learning_rate": 3.742227426143793e-07,
|
|
"logits/chosen": -1.23046875,
|
|
"logits/rejected": -1.12890625,
|
|
"logps/chosen": -229.5,
|
|
"logps/rejected": -235.0,
|
|
"loss": 0.3559,
|
|
"rewards/accuracies": 0.7578125,
|
|
"rewards/chosen": 0.94140625,
|
|
"rewards/margins": 2.0625,
|
|
"rewards/rejected": -1.1171875,
|
|
"step": 322
|
|
},
|
|
{
|
|
"epoch": 0.3573008849557522,
|
|
"grad_norm": 14.06219482421875,
|
|
"learning_rate": 3.734438875399943e-07,
|
|
"logits/chosen": -1.29296875,
|
|
"logits/rejected": -1.22265625,
|
|
"logps/chosen": -270.0,
|
|
"logps/rejected": -296.0,
|
|
"loss": 0.3082,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 1.3046875,
|
|
"rewards/margins": 2.5,
|
|
"rewards/rejected": -1.19921875,
|
|
"step": 323
|
|
},
|
|
{
|
|
"epoch": 0.3584070796460177,
|
|
"grad_norm": 13.949469566345215,
|
|
"learning_rate": 3.726634447940768e-07,
|
|
"logits/chosen": -1.3359375,
|
|
"logits/rejected": -1.17578125,
|
|
"logps/chosen": -277.0,
|
|
"logps/rejected": -297.0,
|
|
"loss": 0.3666,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 1.04296875,
|
|
"rewards/margins": 1.87109375,
|
|
"rewards/rejected": -0.83203125,
|
|
"step": 324
|
|
},
|
|
{
|
|
"epoch": 0.35951327433628316,
|
|
"grad_norm": 14.00977897644043,
|
|
"learning_rate": 3.7188142441427836e-07,
|
|
"logits/chosen": -1.1484375,
|
|
"logits/rejected": -1.1640625,
|
|
"logps/chosen": -237.5,
|
|
"logps/rejected": -263.0,
|
|
"loss": 0.3086,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 1.328125,
|
|
"rewards/margins": 2.53125,
|
|
"rewards/rejected": -1.203125,
|
|
"step": 325
|
|
},
|
|
{
|
|
"epoch": 0.3606194690265487,
|
|
"grad_norm": 17.24125099182129,
|
|
"learning_rate": 3.710978364585411e-07,
|
|
"logits/chosen": -1.25,
|
|
"logits/rejected": -1.23046875,
|
|
"logps/chosen": -265.5,
|
|
"logps/rejected": -274.0,
|
|
"loss": 0.4063,
|
|
"rewards/accuracies": 0.7421875,
|
|
"rewards/chosen": 1.005859375,
|
|
"rewards/margins": 2.12890625,
|
|
"rewards/rejected": -1.126953125,
|
|
"step": 326
|
|
},
|
|
{
|
|
"epoch": 0.36172566371681414,
|
|
"grad_norm": 13.335806846618652,
|
|
"learning_rate": 3.7031269100496897e-07,
|
|
"logits/chosen": -1.3203125,
|
|
"logits/rejected": -1.1328125,
|
|
"logps/chosen": -246.0,
|
|
"logps/rejected": -255.5,
|
|
"loss": 0.3012,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 1.142578125,
|
|
"rewards/margins": 2.578125,
|
|
"rewards/rejected": -1.42578125,
|
|
"step": 327
|
|
},
|
|
{
|
|
"epoch": 0.36283185840707965,
|
|
"grad_norm": 14.329955101013184,
|
|
"learning_rate": 3.69525998151697e-07,
|
|
"logits/chosen": -1.20703125,
|
|
"logits/rejected": -1.140625,
|
|
"logps/chosen": -260.0,
|
|
"logps/rejected": -284.0,
|
|
"loss": 0.3322,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": 1.23046875,
|
|
"rewards/margins": 2.453125,
|
|
"rewards/rejected": -1.22265625,
|
|
"step": 328
|
|
},
|
|
{
|
|
"epoch": 0.3639380530973451,
|
|
"grad_norm": 14.133960723876953,
|
|
"learning_rate": 3.687377680167626e-07,
|
|
"logits/chosen": -1.23828125,
|
|
"logits/rejected": -1.125,
|
|
"logps/chosen": -253.0,
|
|
"logps/rejected": -271.0,
|
|
"loss": 0.3381,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 1.23046875,
|
|
"rewards/margins": 2.5390625,
|
|
"rewards/rejected": -1.3125,
|
|
"step": 329
|
|
},
|
|
{
|
|
"epoch": 0.36504424778761063,
|
|
"grad_norm": 15.067541122436523,
|
|
"learning_rate": 3.6794801073797453e-07,
|
|
"logits/chosen": -1.2265625,
|
|
"logits/rejected": -1.15234375,
|
|
"logps/chosen": -262.0,
|
|
"logps/rejected": -284.0,
|
|
"loss": 0.3784,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 1.06640625,
|
|
"rewards/margins": 2.1640625,
|
|
"rewards/rejected": -1.09375,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 0.3661504424778761,
|
|
"grad_norm": 14.064321517944336,
|
|
"learning_rate": 3.671567364727833e-07,
|
|
"logits/chosen": -1.29296875,
|
|
"logits/rejected": -1.140625,
|
|
"logps/chosen": -234.5,
|
|
"logps/rejected": -260.0,
|
|
"loss": 0.3866,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 1.0234375,
|
|
"rewards/margins": 2.15625,
|
|
"rewards/rejected": -1.12890625,
|
|
"step": 331
|
|
},
|
|
{
|
|
"epoch": 0.3672566371681416,
|
|
"grad_norm": 14.258491516113281,
|
|
"learning_rate": 3.663639553981497e-07,
|
|
"logits/chosen": -1.2890625,
|
|
"logits/rejected": -1.2421875,
|
|
"logps/chosen": -236.0,
|
|
"logps/rejected": -253.5,
|
|
"loss": 0.3042,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 1.4140625,
|
|
"rewards/margins": 2.6640625,
|
|
"rewards/rejected": -1.25,
|
|
"step": 332
|
|
},
|
|
{
|
|
"epoch": 0.36836283185840707,
|
|
"grad_norm": 14.841512680053711,
|
|
"learning_rate": 3.655696777104146e-07,
|
|
"logits/chosen": -1.25390625,
|
|
"logits/rejected": -1.21875,
|
|
"logps/chosen": -251.5,
|
|
"logps/rejected": -276.0,
|
|
"loss": 0.338,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 1.28515625,
|
|
"rewards/margins": 2.53125,
|
|
"rewards/rejected": -1.24609375,
|
|
"step": 333
|
|
},
|
|
{
|
|
"epoch": 0.3694690265486726,
|
|
"grad_norm": 13.853322982788086,
|
|
"learning_rate": 3.647739136251673e-07,
|
|
"logits/chosen": -1.1953125,
|
|
"logits/rejected": -1.20703125,
|
|
"logps/chosen": -265.0,
|
|
"logps/rejected": -308.0,
|
|
"loss": 0.3455,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 1.1796875,
|
|
"rewards/margins": 2.234375,
|
|
"rewards/rejected": -1.0546875,
|
|
"step": 334
|
|
},
|
|
{
|
|
"epoch": 0.37057522123893805,
|
|
"grad_norm": 15.147529602050781,
|
|
"learning_rate": 3.639766733771147e-07,
|
|
"logits/chosen": -1.34375,
|
|
"logits/rejected": -1.25,
|
|
"logps/chosen": -250.5,
|
|
"logps/rejected": -287.0,
|
|
"loss": 0.3692,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 1.10546875,
|
|
"rewards/margins": 2.26171875,
|
|
"rewards/rejected": -1.15625,
|
|
"step": 335
|
|
},
|
|
{
|
|
"epoch": 0.37168141592920356,
|
|
"grad_norm": 13.71894359588623,
|
|
"learning_rate": 3.6317796721994903e-07,
|
|
"logits/chosen": -1.30859375,
|
|
"logits/rejected": -1.1953125,
|
|
"logps/chosen": -272.0,
|
|
"logps/rejected": -268.0,
|
|
"loss": 0.3311,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 1.083984375,
|
|
"rewards/margins": 2.33984375,
|
|
"rewards/rejected": -1.25390625,
|
|
"step": 336
|
|
},
|
|
{
|
|
"epoch": 0.372787610619469,
|
|
"grad_norm": 12.683527946472168,
|
|
"learning_rate": 3.623778054262164e-07,
|
|
"logits/chosen": -1.30078125,
|
|
"logits/rejected": -1.23046875,
|
|
"logps/chosen": -268.0,
|
|
"logps/rejected": -275.0,
|
|
"loss": 0.302,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 1.26171875,
|
|
"rewards/margins": 2.3515625,
|
|
"rewards/rejected": -1.09375,
|
|
"step": 337
|
|
},
|
|
{
|
|
"epoch": 0.37389380530973454,
|
|
"grad_norm": 12.87300968170166,
|
|
"learning_rate": 3.6157619828718473e-07,
|
|
"logits/chosen": -1.21875,
|
|
"logits/rejected": -1.140625,
|
|
"logps/chosen": -248.0,
|
|
"logps/rejected": -249.5,
|
|
"loss": 0.3173,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 0.939453125,
|
|
"rewards/margins": 2.3046875,
|
|
"rewards/rejected": -1.36328125,
|
|
"step": 338
|
|
},
|
|
{
|
|
"epoch": 0.375,
|
|
"grad_norm": 13.38857650756836,
|
|
"learning_rate": 3.6077315611271095e-07,
|
|
"logits/chosen": -1.359375,
|
|
"logits/rejected": -1.28125,
|
|
"logps/chosen": -247.5,
|
|
"logps/rejected": -256.0,
|
|
"loss": 0.3028,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 1.03125,
|
|
"rewards/margins": 2.3671875,
|
|
"rewards/rejected": -1.33984375,
|
|
"step": 339
|
|
},
|
|
{
|
|
"epoch": 0.37610619469026546,
|
|
"grad_norm": 13.413825988769531,
|
|
"learning_rate": 3.5996868923110883e-07,
|
|
"logits/chosen": -1.30859375,
|
|
"logits/rejected": -1.1796875,
|
|
"logps/chosen": -229.0,
|
|
"logps/rejected": -270.5,
|
|
"loss": 0.3433,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 1.015625,
|
|
"rewards/margins": 2.2421875,
|
|
"rewards/rejected": -1.2265625,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 0.377212389380531,
|
|
"grad_norm": 14.234886169433594,
|
|
"learning_rate": 3.59162807989016e-07,
|
|
"logits/chosen": -1.26953125,
|
|
"logits/rejected": -1.13671875,
|
|
"logps/chosen": -248.5,
|
|
"logps/rejected": -233.5,
|
|
"loss": 0.3125,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 1.125,
|
|
"rewards/margins": 2.6796875,
|
|
"rewards/rejected": -1.55078125,
|
|
"step": 341
|
|
},
|
|
{
|
|
"epoch": 0.37831858407079644,
|
|
"grad_norm": 14.096612930297852,
|
|
"learning_rate": 3.583555227512607e-07,
|
|
"logits/chosen": -1.3515625,
|
|
"logits/rejected": -1.12109375,
|
|
"logps/chosen": -238.5,
|
|
"logps/rejected": -265.0,
|
|
"loss": 0.356,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 1.1875,
|
|
"rewards/margins": 2.65625,
|
|
"rewards/rejected": -1.47265625,
|
|
"step": 342
|
|
},
|
|
{
|
|
"epoch": 0.37942477876106195,
|
|
"grad_norm": 13.053836822509766,
|
|
"learning_rate": 3.5754684390072886e-07,
|
|
"logits/chosen": -1.16796875,
|
|
"logits/rejected": -1.0625,
|
|
"logps/chosen": -241.0,
|
|
"logps/rejected": -280.0,
|
|
"loss": 0.3579,
|
|
"rewards/accuracies": 0.7578125,
|
|
"rewards/chosen": 1.033203125,
|
|
"rewards/margins": 2.10546875,
|
|
"rewards/rejected": -1.07421875,
|
|
"step": 343
|
|
},
|
|
{
|
|
"epoch": 0.3805309734513274,
|
|
"grad_norm": 13.854188919067383,
|
|
"learning_rate": 3.5673678183823024e-07,
|
|
"logits/chosen": -1.13671875,
|
|
"logits/rejected": -1.0546875,
|
|
"logps/chosen": -284.0,
|
|
"logps/rejected": -302.0,
|
|
"loss": 0.3128,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 1.25390625,
|
|
"rewards/margins": 2.5078125,
|
|
"rewards/rejected": -1.2578125,
|
|
"step": 344
|
|
},
|
|
{
|
|
"epoch": 0.38163716814159293,
|
|
"grad_norm": 15.331599235534668,
|
|
"learning_rate": 3.559253469823647e-07,
|
|
"logits/chosen": -1.09375,
|
|
"logits/rejected": -1.048828125,
|
|
"logps/chosen": -250.5,
|
|
"logps/rejected": -278.0,
|
|
"loss": 0.3974,
|
|
"rewards/accuracies": 0.7109375,
|
|
"rewards/chosen": 1.05078125,
|
|
"rewards/margins": 2.3203125,
|
|
"rewards/rejected": -1.2734375,
|
|
"step": 345
|
|
},
|
|
{
|
|
"epoch": 0.3827433628318584,
|
|
"grad_norm": 13.891538619995117,
|
|
"learning_rate": 3.5511254976938834e-07,
|
|
"logits/chosen": -1.2890625,
|
|
"logits/rejected": -1.1484375,
|
|
"logps/chosen": -269.0,
|
|
"logps/rejected": -274.0,
|
|
"loss": 0.3552,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 1.0703125,
|
|
"rewards/margins": 2.125,
|
|
"rewards/rejected": -1.05078125,
|
|
"step": 346
|
|
},
|
|
{
|
|
"epoch": 0.3838495575221239,
|
|
"grad_norm": 14.387117385864258,
|
|
"learning_rate": 3.542984006530792e-07,
|
|
"logits/chosen": -1.36328125,
|
|
"logits/rejected": -1.16015625,
|
|
"logps/chosen": -237.0,
|
|
"logps/rejected": -272.0,
|
|
"loss": 0.3257,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 1.044921875,
|
|
"rewards/margins": 2.546875,
|
|
"rewards/rejected": -1.5078125,
|
|
"step": 347
|
|
},
|
|
{
|
|
"epoch": 0.38495575221238937,
|
|
"grad_norm": 14.693575859069824,
|
|
"learning_rate": 3.534829101046027e-07,
|
|
"logits/chosen": -1.26953125,
|
|
"logits/rejected": -1.19921875,
|
|
"logps/chosen": -253.0,
|
|
"logps/rejected": -271.5,
|
|
"loss": 0.4006,
|
|
"rewards/accuracies": 0.7578125,
|
|
"rewards/chosen": 0.859375,
|
|
"rewards/margins": 1.90234375,
|
|
"rewards/rejected": -1.046875,
|
|
"step": 348
|
|
},
|
|
{
|
|
"epoch": 0.3860619469026549,
|
|
"grad_norm": 14.893670082092285,
|
|
"learning_rate": 3.5266608861237723e-07,
|
|
"logits/chosen": -1.30859375,
|
|
"logits/rejected": -1.16015625,
|
|
"logps/chosen": -257.0,
|
|
"logps/rejected": -273.0,
|
|
"loss": 0.3469,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 0.845703125,
|
|
"rewards/margins": 2.171875,
|
|
"rewards/rejected": -1.32421875,
|
|
"step": 349
|
|
},
|
|
{
|
|
"epoch": 0.38716814159292035,
|
|
"grad_norm": 13.093351364135742,
|
|
"learning_rate": 3.518479466819389e-07,
|
|
"logits/chosen": -1.1484375,
|
|
"logits/rejected": -1.20703125,
|
|
"logps/chosen": -251.0,
|
|
"logps/rejected": -290.0,
|
|
"loss": 0.3118,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 1.056640625,
|
|
"rewards/margins": 2.5546875,
|
|
"rewards/rejected": -1.5,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.38716814159292035,
|
|
"eval_logits/chosen": -1.260883092880249,
|
|
"eval_logits/rejected": -1.1730799674987793,
|
|
"eval_logps/chosen": -250.82586669921875,
|
|
"eval_logps/rejected": -272.52239990234375,
|
|
"eval_loss": 0.3436649739742279,
|
|
"eval_rewards/accuracies": 0.7924543023109436,
|
|
"eval_rewards/chosen": 1.0035176277160645,
|
|
"eval_rewards/margins": 2.3350045680999756,
|
|
"eval_rewards/rejected": -1.3310012817382812,
|
|
"eval_runtime": 192.8803,
|
|
"eval_samples_per_second": 66.637,
|
|
"eval_steps_per_second": 1.042,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 0.38827433628318586,
|
|
"grad_norm": 17.082809448242188,
|
|
"learning_rate": 3.510284948358068e-07,
|
|
"logits/chosen": -1.2734375,
|
|
"logits/rejected": -1.1328125,
|
|
"logps/chosen": -264.0,
|
|
"logps/rejected": -286.0,
|
|
"loss": 0.4283,
|
|
"rewards/accuracies": 0.703125,
|
|
"rewards/chosen": 0.78125,
|
|
"rewards/margins": 2.02734375,
|
|
"rewards/rejected": -1.25,
|
|
"step": 351
|
|
},
|
|
{
|
|
"epoch": 0.3893805309734513,
|
|
"grad_norm": 14.743417739868164,
|
|
"learning_rate": 3.5020774361334744e-07,
|
|
"logits/chosen": -1.27734375,
|
|
"logits/rejected": -1.28125,
|
|
"logps/chosen": -235.0,
|
|
"logps/rejected": -291.0,
|
|
"loss": 0.3538,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 1.1015625,
|
|
"rewards/margins": 2.375,
|
|
"rewards/rejected": -1.26953125,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 0.39048672566371684,
|
|
"grad_norm": 12.676244735717773,
|
|
"learning_rate": 3.49385703570639e-07,
|
|
"logits/chosen": -1.35546875,
|
|
"logits/rejected": -1.19921875,
|
|
"logps/chosen": -243.5,
|
|
"logps/rejected": -254.0,
|
|
"loss": 0.2715,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 1.2578125,
|
|
"rewards/margins": 2.7578125,
|
|
"rewards/rejected": -1.5,
|
|
"step": 353
|
|
},
|
|
{
|
|
"epoch": 0.3915929203539823,
|
|
"grad_norm": 13.607057571411133,
|
|
"learning_rate": 3.485623852803361e-07,
|
|
"logits/chosen": -1.1953125,
|
|
"logits/rejected": -1.12890625,
|
|
"logps/chosen": -248.0,
|
|
"logps/rejected": -263.5,
|
|
"loss": 0.3456,
|
|
"rewards/accuracies": 0.7578125,
|
|
"rewards/chosen": 0.962890625,
|
|
"rewards/margins": 2.46875,
|
|
"rewards/rejected": -1.5,
|
|
"step": 354
|
|
},
|
|
{
|
|
"epoch": 0.3926991150442478,
|
|
"grad_norm": 12.650440216064453,
|
|
"learning_rate": 3.4773779933153343e-07,
|
|
"logits/chosen": -1.296875,
|
|
"logits/rejected": -1.2578125,
|
|
"logps/chosen": -222.5,
|
|
"logps/rejected": -240.0,
|
|
"loss": 0.3298,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 1.037109375,
|
|
"rewards/margins": 2.265625,
|
|
"rewards/rejected": -1.2265625,
|
|
"step": 355
|
|
},
|
|
{
|
|
"epoch": 0.3938053097345133,
|
|
"grad_norm": 13.600220680236816,
|
|
"learning_rate": 3.4691195632962957e-07,
|
|
"logits/chosen": -1.41015625,
|
|
"logits/rejected": -1.1796875,
|
|
"logps/chosen": -225.0,
|
|
"logps/rejected": -251.0,
|
|
"loss": 0.3439,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.978515625,
|
|
"rewards/margins": 2.3515625,
|
|
"rewards/rejected": -1.375,
|
|
"step": 356
|
|
},
|
|
{
|
|
"epoch": 0.39491150442477874,
|
|
"grad_norm": 14.466880798339844,
|
|
"learning_rate": 3.4608486689619083e-07,
|
|
"logits/chosen": -1.1640625,
|
|
"logits/rejected": -1.171875,
|
|
"logps/chosen": -252.0,
|
|
"logps/rejected": -254.0,
|
|
"loss": 0.3437,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 0.904296875,
|
|
"rewards/margins": 2.4375,
|
|
"rewards/rejected": -1.53125,
|
|
"step": 357
|
|
},
|
|
{
|
|
"epoch": 0.39601769911504425,
|
|
"grad_norm": 13.527942657470703,
|
|
"learning_rate": 3.4525654166881426e-07,
|
|
"logits/chosen": -1.34375,
|
|
"logits/rejected": -1.19140625,
|
|
"logps/chosen": -256.5,
|
|
"logps/rejected": -281.0,
|
|
"loss": 0.3267,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.962890625,
|
|
"rewards/margins": 2.40625,
|
|
"rewards/rejected": -1.44140625,
|
|
"step": 358
|
|
},
|
|
{
|
|
"epoch": 0.3971238938053097,
|
|
"grad_norm": 13.268882751464844,
|
|
"learning_rate": 3.4442699130099116e-07,
|
|
"logits/chosen": -1.26171875,
|
|
"logits/rejected": -1.1328125,
|
|
"logps/chosen": -268.0,
|
|
"logps/rejected": -297.0,
|
|
"loss": 0.293,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 0.990234375,
|
|
"rewards/margins": 2.6171875,
|
|
"rewards/rejected": -1.6328125,
|
|
"step": 359
|
|
},
|
|
{
|
|
"epoch": 0.39823008849557523,
|
|
"grad_norm": 14.783308982849121,
|
|
"learning_rate": 3.435962264619702e-07,
|
|
"logits/chosen": -1.1796875,
|
|
"logits/rejected": -1.12109375,
|
|
"logps/chosen": -239.5,
|
|
"logps/rejected": -278.0,
|
|
"loss": 0.3438,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 0.765625,
|
|
"rewards/margins": 2.25390625,
|
|
"rewards/rejected": -1.484375,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 0.3993362831858407,
|
|
"grad_norm": 14.32043743133545,
|
|
"learning_rate": 3.427642578366194e-07,
|
|
"logits/chosen": -1.2578125,
|
|
"logits/rejected": -1.19140625,
|
|
"logps/chosen": -256.5,
|
|
"logps/rejected": -278.0,
|
|
"loss": 0.3622,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 0.9375,
|
|
"rewards/margins": 2.359375,
|
|
"rewards/rejected": -1.421875,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 0.4004424778761062,
|
|
"grad_norm": 175.1194305419922,
|
|
"learning_rate": 3.419310961252897e-07,
|
|
"logits/chosen": -1.2109375,
|
|
"logits/rejected": -1.10546875,
|
|
"logps/chosen": -234.5,
|
|
"logps/rejected": -354.0,
|
|
"loss": 0.3211,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 1.06640625,
|
|
"rewards/margins": 2.6015625,
|
|
"rewards/rejected": -1.53515625,
|
|
"step": 362
|
|
},
|
|
{
|
|
"epoch": 0.40154867256637167,
|
|
"grad_norm": 13.051432609558105,
|
|
"learning_rate": 3.4109675204367686e-07,
|
|
"logits/chosen": -1.1953125,
|
|
"logits/rejected": -1.06640625,
|
|
"logps/chosen": -269.0,
|
|
"logps/rejected": -313.0,
|
|
"loss": 0.3161,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 1.140625,
|
|
"rewards/margins": 2.5703125,
|
|
"rewards/rejected": -1.4296875,
|
|
"step": 363
|
|
},
|
|
{
|
|
"epoch": 0.4026548672566372,
|
|
"grad_norm": 12.166739463806152,
|
|
"learning_rate": 3.4026123632268354e-07,
|
|
"logits/chosen": -1.25390625,
|
|
"logits/rejected": -1.09375,
|
|
"logps/chosen": -235.0,
|
|
"logps/rejected": -244.0,
|
|
"loss": 0.3185,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.6875,
|
|
"rewards/margins": 2.1953125,
|
|
"rewards/rejected": -1.50390625,
|
|
"step": 364
|
|
},
|
|
{
|
|
"epoch": 0.40376106194690264,
|
|
"grad_norm": 13.57703971862793,
|
|
"learning_rate": 3.3942455970828146e-07,
|
|
"logits/chosen": -1.25390625,
|
|
"logits/rejected": -1.11328125,
|
|
"logps/chosen": -249.5,
|
|
"logps/rejected": -265.0,
|
|
"loss": 0.3227,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 1.013671875,
|
|
"rewards/margins": 2.546875,
|
|
"rewards/rejected": -1.52734375,
|
|
"step": 365
|
|
},
|
|
{
|
|
"epoch": 0.40486725663716816,
|
|
"grad_norm": 12.446300506591797,
|
|
"learning_rate": 3.38586732961373e-07,
|
|
"logits/chosen": -1.3828125,
|
|
"logits/rejected": -1.21875,
|
|
"logps/chosen": -232.5,
|
|
"logps/rejected": -237.5,
|
|
"loss": 0.3575,
|
|
"rewards/accuracies": 0.7578125,
|
|
"rewards/chosen": 0.560546875,
|
|
"rewards/margins": 2.234375,
|
|
"rewards/rejected": -1.6796875,
|
|
"step": 366
|
|
},
|
|
{
|
|
"epoch": 0.4059734513274336,
|
|
"grad_norm": 13.510902404785156,
|
|
"learning_rate": 3.3774776685765327e-07,
|
|
"logits/chosen": -1.3125,
|
|
"logits/rejected": -1.265625,
|
|
"logps/chosen": -242.5,
|
|
"logps/rejected": -264.5,
|
|
"loss": 0.3312,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 0.79296875,
|
|
"rewards/margins": 2.3671875,
|
|
"rewards/rejected": -1.57421875,
|
|
"step": 367
|
|
},
|
|
{
|
|
"epoch": 0.40707964601769914,
|
|
"grad_norm": 12.729259490966797,
|
|
"learning_rate": 3.3690767218747104e-07,
|
|
"logits/chosen": -1.1015625,
|
|
"logits/rejected": -1.1328125,
|
|
"logps/chosen": -243.5,
|
|
"logps/rejected": -272.0,
|
|
"loss": 0.2871,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 1.12109375,
|
|
"rewards/margins": 2.7734375,
|
|
"rewards/rejected": -1.6484375,
|
|
"step": 368
|
|
},
|
|
{
|
|
"epoch": 0.4081858407079646,
|
|
"grad_norm": 14.687115669250488,
|
|
"learning_rate": 3.3606645975569e-07,
|
|
"logits/chosen": -1.34765625,
|
|
"logits/rejected": -1.078125,
|
|
"logps/chosen": -248.5,
|
|
"logps/rejected": -254.5,
|
|
"loss": 0.3694,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 0.65625,
|
|
"rewards/margins": 2.125,
|
|
"rewards/rejected": -1.47265625,
|
|
"step": 369
|
|
},
|
|
{
|
|
"epoch": 0.4092920353982301,
|
|
"grad_norm": 11.254127502441406,
|
|
"learning_rate": 3.3522414038155016e-07,
|
|
"logits/chosen": -1.22265625,
|
|
"logits/rejected": -1.12109375,
|
|
"logps/chosen": -225.0,
|
|
"logps/rejected": -259.5,
|
|
"loss": 0.2835,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.970703125,
|
|
"rewards/margins": 2.65625,
|
|
"rewards/rejected": -1.68359375,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 0.4103982300884956,
|
|
"grad_norm": 15.060591697692871,
|
|
"learning_rate": 3.343807248985283e-07,
|
|
"logits/chosen": -1.2890625,
|
|
"logits/rejected": -1.16015625,
|
|
"logps/chosen": -240.0,
|
|
"logps/rejected": -265.5,
|
|
"loss": 0.3759,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.689453125,
|
|
"rewards/margins": 2.2109375,
|
|
"rewards/rejected": -1.5234375,
|
|
"step": 371
|
|
},
|
|
{
|
|
"epoch": 0.41150442477876104,
|
|
"grad_norm": 11.981417655944824,
|
|
"learning_rate": 3.335362241541988e-07,
|
|
"logits/chosen": -1.171875,
|
|
"logits/rejected": -1.09375,
|
|
"logps/chosen": -260.0,
|
|
"logps/rejected": -280.0,
|
|
"loss": 0.3012,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 0.732421875,
|
|
"rewards/margins": 2.3828125,
|
|
"rewards/rejected": -1.64453125,
|
|
"step": 372
|
|
},
|
|
{
|
|
"epoch": 0.41261061946902655,
|
|
"grad_norm": 13.114727020263672,
|
|
"learning_rate": 3.32690649010094e-07,
|
|
"logits/chosen": -1.265625,
|
|
"logits/rejected": -1.16796875,
|
|
"logps/chosen": -244.5,
|
|
"logps/rejected": -267.5,
|
|
"loss": 0.2875,
|
|
"rewards/accuracies": 0.8828125,
|
|
"rewards/chosen": 0.927734375,
|
|
"rewards/margins": 2.8125,
|
|
"rewards/rejected": -1.8828125,
|
|
"step": 373
|
|
},
|
|
{
|
|
"epoch": 0.413716814159292,
|
|
"grad_norm": 13.435688972473145,
|
|
"learning_rate": 3.3184401034156484e-07,
|
|
"logits/chosen": -1.16796875,
|
|
"logits/rejected": -1.015625,
|
|
"logps/chosen": -263.0,
|
|
"logps/rejected": -271.0,
|
|
"loss": 0.344,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.658203125,
|
|
"rewards/margins": 2.2890625,
|
|
"rewards/rejected": -1.625,
|
|
"step": 374
|
|
},
|
|
{
|
|
"epoch": 0.41482300884955753,
|
|
"grad_norm": 16.25585174560547,
|
|
"learning_rate": 3.3099631903764064e-07,
|
|
"logits/chosen": -1.26171875,
|
|
"logits/rejected": -1.125,
|
|
"logps/chosen": -270.0,
|
|
"logps/rejected": -291.0,
|
|
"loss": 0.4301,
|
|
"rewards/accuracies": 0.71875,
|
|
"rewards/chosen": 0.572265625,
|
|
"rewards/margins": 1.765625,
|
|
"rewards/rejected": -1.1953125,
|
|
"step": 375
|
|
},
|
|
{
|
|
"epoch": 0.415929203539823,
|
|
"grad_norm": 13.53650188446045,
|
|
"learning_rate": 3.3014758600088923e-07,
|
|
"logits/chosen": -1.2890625,
|
|
"logits/rejected": -1.2265625,
|
|
"logps/chosen": -232.0,
|
|
"logps/rejected": -266.5,
|
|
"loss": 0.3326,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 0.755859375,
|
|
"rewards/margins": 2.234375,
|
|
"rewards/rejected": -1.48046875,
|
|
"step": 376
|
|
},
|
|
{
|
|
"epoch": 0.4170353982300885,
|
|
"grad_norm": 12.601165771484375,
|
|
"learning_rate": 3.2929782214727653e-07,
|
|
"logits/chosen": -1.29296875,
|
|
"logits/rejected": -1.18359375,
|
|
"logps/chosen": -246.5,
|
|
"logps/rejected": -263.5,
|
|
"loss": 0.3436,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 0.771484375,
|
|
"rewards/margins": 2.515625,
|
|
"rewards/rejected": -1.7421875,
|
|
"step": 377
|
|
},
|
|
{
|
|
"epoch": 0.41814159292035397,
|
|
"grad_norm": 13.9395112991333,
|
|
"learning_rate": 3.2844703840602636e-07,
|
|
"logits/chosen": -1.30078125,
|
|
"logits/rejected": -1.30859375,
|
|
"logps/chosen": -243.5,
|
|
"logps/rejected": -262.5,
|
|
"loss": 0.3515,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 0.92578125,
|
|
"rewards/margins": 2.390625,
|
|
"rewards/rejected": -1.46484375,
|
|
"step": 378
|
|
},
|
|
{
|
|
"epoch": 0.4192477876106195,
|
|
"grad_norm": 13.737992286682129,
|
|
"learning_rate": 3.2759524571948e-07,
|
|
"logits/chosen": -1.3125,
|
|
"logits/rejected": -1.11328125,
|
|
"logps/chosen": -258.0,
|
|
"logps/rejected": -293.0,
|
|
"loss": 0.2964,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.970703125,
|
|
"rewards/margins": 2.5859375,
|
|
"rewards/rejected": -1.61328125,
|
|
"step": 379
|
|
},
|
|
{
|
|
"epoch": 0.42035398230088494,
|
|
"grad_norm": 14.736825942993164,
|
|
"learning_rate": 3.26742455042955e-07,
|
|
"logits/chosen": -1.3515625,
|
|
"logits/rejected": -1.2109375,
|
|
"logps/chosen": -247.5,
|
|
"logps/rejected": -238.0,
|
|
"loss": 0.3616,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 0.634765625,
|
|
"rewards/margins": 2.1875,
|
|
"rewards/rejected": -1.55078125,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 0.42146017699115046,
|
|
"grad_norm": 14.469903945922852,
|
|
"learning_rate": 3.2588867734460464e-07,
|
|
"logits/chosen": -1.1796875,
|
|
"logits/rejected": -1.15234375,
|
|
"logps/chosen": -252.0,
|
|
"logps/rejected": -260.5,
|
|
"loss": 0.355,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 0.9296875,
|
|
"rewards/margins": 2.5546875,
|
|
"rewards/rejected": -1.62109375,
|
|
"step": 381
|
|
},
|
|
{
|
|
"epoch": 0.4225663716814159,
|
|
"grad_norm": 14.506092071533203,
|
|
"learning_rate": 3.250339236052767e-07,
|
|
"logits/chosen": -1.140625,
|
|
"logits/rejected": -1.18359375,
|
|
"logps/chosen": -261.0,
|
|
"logps/rejected": -289.0,
|
|
"loss": 0.3673,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": 0.951171875,
|
|
"rewards/margins": 2.3359375,
|
|
"rewards/rejected": -1.37890625,
|
|
"step": 382
|
|
},
|
|
{
|
|
"epoch": 0.42367256637168144,
|
|
"grad_norm": 13.806063652038574,
|
|
"learning_rate": 3.2417820481837256e-07,
|
|
"logits/chosen": -1.3671875,
|
|
"logits/rejected": -1.16015625,
|
|
"logps/chosen": -254.0,
|
|
"logps/rejected": -274.0,
|
|
"loss": 0.3272,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.712890625,
|
|
"rewards/margins": 2.40625,
|
|
"rewards/rejected": -1.69140625,
|
|
"step": 383
|
|
},
|
|
{
|
|
"epoch": 0.4247787610619469,
|
|
"grad_norm": 13.990365982055664,
|
|
"learning_rate": 3.2332153198970517e-07,
|
|
"logits/chosen": -1.28125,
|
|
"logits/rejected": -1.2109375,
|
|
"logps/chosen": -250.5,
|
|
"logps/rejected": -279.0,
|
|
"loss": 0.334,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 0.66796875,
|
|
"rewards/margins": 2.1640625,
|
|
"rewards/rejected": -1.49609375,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 0.4258849557522124,
|
|
"grad_norm": 15.526028633117676,
|
|
"learning_rate": 3.2246391613735815e-07,
|
|
"logits/chosen": -1.390625,
|
|
"logits/rejected": -1.1953125,
|
|
"logps/chosen": -253.0,
|
|
"logps/rejected": -258.5,
|
|
"loss": 0.3283,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.92578125,
|
|
"rewards/margins": 2.421875,
|
|
"rewards/rejected": -1.49609375,
|
|
"step": 385
|
|
},
|
|
{
|
|
"epoch": 0.4269911504424779,
|
|
"grad_norm": 13.068387985229492,
|
|
"learning_rate": 3.2160536829154356e-07,
|
|
"logits/chosen": -1.27734375,
|
|
"logits/rejected": -1.23046875,
|
|
"logps/chosen": -248.5,
|
|
"logps/rejected": -286.0,
|
|
"loss": 0.2813,
|
|
"rewards/accuracies": 0.859375,
|
|
"rewards/chosen": 1.46875,
|
|
"rewards/margins": 3.0078125,
|
|
"rewards/rejected": -1.54296875,
|
|
"step": 386
|
|
},
|
|
{
|
|
"epoch": 0.4280973451327434,
|
|
"grad_norm": 14.58014965057373,
|
|
"learning_rate": 3.207458994944606e-07,
|
|
"logits/chosen": -1.2578125,
|
|
"logits/rejected": -1.16796875,
|
|
"logps/chosen": -261.0,
|
|
"logps/rejected": -269.0,
|
|
"loss": 0.3732,
|
|
"rewards/accuracies": 0.7578125,
|
|
"rewards/chosen": 0.6640625,
|
|
"rewards/margins": 2.0546875,
|
|
"rewards/rejected": -1.390625,
|
|
"step": 387
|
|
},
|
|
{
|
|
"epoch": 0.42920353982300885,
|
|
"grad_norm": 13.451835632324219,
|
|
"learning_rate": 3.1988552080015294e-07,
|
|
"logits/chosen": -1.33984375,
|
|
"logits/rejected": -1.15234375,
|
|
"logps/chosen": -256.5,
|
|
"logps/rejected": -264.0,
|
|
"loss": 0.3112,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 1.11328125,
|
|
"rewards/margins": 2.515625,
|
|
"rewards/rejected": -1.40234375,
|
|
"step": 388
|
|
},
|
|
{
|
|
"epoch": 0.4303097345132743,
|
|
"grad_norm": 15.350495338439941,
|
|
"learning_rate": 3.1902424327436725e-07,
|
|
"logits/chosen": -1.23828125,
|
|
"logits/rejected": -1.0625,
|
|
"logps/chosen": -273.0,
|
|
"logps/rejected": -264.0,
|
|
"loss": 0.3406,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.712890625,
|
|
"rewards/margins": 2.1484375,
|
|
"rewards/rejected": -1.44140625,
|
|
"step": 389
|
|
},
|
|
{
|
|
"epoch": 0.4314159292035398,
|
|
"grad_norm": 17.061744689941406,
|
|
"learning_rate": 3.1816207799440996e-07,
|
|
"logits/chosen": -1.265625,
|
|
"logits/rejected": -1.12890625,
|
|
"logps/chosen": -278.5,
|
|
"logps/rejected": -318.0,
|
|
"loss": 0.3654,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 1.16015625,
|
|
"rewards/margins": 2.4375,
|
|
"rewards/rejected": -1.28125,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 0.4325221238938053,
|
|
"grad_norm": 16.2224063873291,
|
|
"learning_rate": 3.1729903604900595e-07,
|
|
"logits/chosen": -1.25390625,
|
|
"logits/rejected": -1.08203125,
|
|
"logps/chosen": -244.0,
|
|
"logps/rejected": -281.0,
|
|
"loss": 0.3328,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.923828125,
|
|
"rewards/margins": 2.5234375,
|
|
"rewards/rejected": -1.59375,
|
|
"step": 391
|
|
},
|
|
{
|
|
"epoch": 0.4336283185840708,
|
|
"grad_norm": 13.922826766967773,
|
|
"learning_rate": 3.1643512853815487e-07,
|
|
"logits/chosen": -1.26171875,
|
|
"logits/rejected": -1.25390625,
|
|
"logps/chosen": -249.0,
|
|
"logps/rejected": -280.0,
|
|
"loss": 0.3626,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.89453125,
|
|
"rewards/margins": 2.12890625,
|
|
"rewards/rejected": -1.2421875,
|
|
"step": 392
|
|
},
|
|
{
|
|
"epoch": 0.43473451327433627,
|
|
"grad_norm": 14.233848571777344,
|
|
"learning_rate": 3.15570366572989e-07,
|
|
"logits/chosen": -1.3203125,
|
|
"logits/rejected": -1.18359375,
|
|
"logps/chosen": -246.5,
|
|
"logps/rejected": -255.5,
|
|
"loss": 0.33,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 0.875,
|
|
"rewards/margins": 2.2421875,
|
|
"rewards/rejected": -1.3671875,
|
|
"step": 393
|
|
},
|
|
{
|
|
"epoch": 0.4358407079646018,
|
|
"grad_norm": 15.168607711791992,
|
|
"learning_rate": 3.147047612756302e-07,
|
|
"logits/chosen": -1.21875,
|
|
"logits/rejected": -1.17578125,
|
|
"logps/chosen": -281.0,
|
|
"logps/rejected": -290.0,
|
|
"loss": 0.345,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 0.892578125,
|
|
"rewards/margins": 2.3125,
|
|
"rewards/rejected": -1.421875,
|
|
"step": 394
|
|
},
|
|
{
|
|
"epoch": 0.43694690265486724,
|
|
"grad_norm": 14.825115203857422,
|
|
"learning_rate": 3.138383237790467e-07,
|
|
"logits/chosen": -1.203125,
|
|
"logits/rejected": -1.1484375,
|
|
"logps/chosen": -250.5,
|
|
"logps/rejected": -272.0,
|
|
"loss": 0.3428,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.98828125,
|
|
"rewards/margins": 2.4765625,
|
|
"rewards/rejected": -1.484375,
|
|
"step": 395
|
|
},
|
|
{
|
|
"epoch": 0.43805309734513276,
|
|
"grad_norm": 12.729918479919434,
|
|
"learning_rate": 3.129710652269103e-07,
|
|
"logits/chosen": -1.30859375,
|
|
"logits/rejected": -1.1171875,
|
|
"logps/chosen": -230.0,
|
|
"logps/rejected": -255.5,
|
|
"loss": 0.2864,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 1.20703125,
|
|
"rewards/margins": 2.875,
|
|
"rewards/rejected": -1.66796875,
|
|
"step": 396
|
|
},
|
|
{
|
|
"epoch": 0.4391592920353982,
|
|
"grad_norm": 12.986177444458008,
|
|
"learning_rate": 3.1210299677345253e-07,
|
|
"logits/chosen": -1.17578125,
|
|
"logits/rejected": -1.140625,
|
|
"logps/chosen": -257.0,
|
|
"logps/rejected": -279.0,
|
|
"loss": 0.3394,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 1.013671875,
|
|
"rewards/margins": 2.46875,
|
|
"rewards/rejected": -1.45703125,
|
|
"step": 397
|
|
},
|
|
{
|
|
"epoch": 0.44026548672566373,
|
|
"grad_norm": 15.79924488067627,
|
|
"learning_rate": 3.1123412958332153e-07,
|
|
"logits/chosen": -1.30859375,
|
|
"logits/rejected": -1.15234375,
|
|
"logps/chosen": -248.0,
|
|
"logps/rejected": -275.0,
|
|
"loss": 0.3804,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 0.84375,
|
|
"rewards/margins": 2.34375,
|
|
"rewards/rejected": -1.5,
|
|
"step": 398
|
|
},
|
|
{
|
|
"epoch": 0.4413716814159292,
|
|
"grad_norm": 14.248608589172363,
|
|
"learning_rate": 3.1036447483143834e-07,
|
|
"logits/chosen": -1.2578125,
|
|
"logits/rejected": -1.22265625,
|
|
"logps/chosen": -261.5,
|
|
"logps/rejected": -275.0,
|
|
"loss": 0.3299,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.99609375,
|
|
"rewards/margins": 2.6015625,
|
|
"rewards/rejected": -1.609375,
|
|
"step": 399
|
|
},
|
|
{
|
|
"epoch": 0.4424778761061947,
|
|
"grad_norm": 14.430908203125,
|
|
"learning_rate": 3.094940437028535e-07,
|
|
"logits/chosen": -1.1171875,
|
|
"logits/rejected": -1.125,
|
|
"logps/chosen": -250.5,
|
|
"logps/rejected": -251.5,
|
|
"loss": 0.3726,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 0.953125,
|
|
"rewards/margins": 2.26953125,
|
|
"rewards/rejected": -1.3203125,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.4424778761061947,
|
|
"eval_logits/chosen": -1.2572294473648071,
|
|
"eval_logits/rejected": -1.1652674674987793,
|
|
"eval_logps/chosen": -250.63681030273438,
|
|
"eval_logps/rejected": -273.5472717285156,
|
|
"eval_loss": 0.3369702994823456,
|
|
"eval_rewards/accuracies": 0.7978180646896362,
|
|
"eval_rewards/chosen": 1.024176001548767,
|
|
"eval_rewards/margins": 2.449626922607422,
|
|
"eval_rewards/rejected": -1.4251010417938232,
|
|
"eval_runtime": 193.115,
|
|
"eval_samples_per_second": 66.556,
|
|
"eval_steps_per_second": 1.041,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 0.4435840707964602,
|
|
"grad_norm": 14.084267616271973,
|
|
"learning_rate": 3.086228473926024e-07,
|
|
"logits/chosen": -1.23828125,
|
|
"logits/rejected": -1.26171875,
|
|
"logps/chosen": -242.5,
|
|
"logps/rejected": -257.0,
|
|
"loss": 0.3172,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 1.12109375,
|
|
"rewards/margins": 2.5390625,
|
|
"rewards/rejected": -1.4140625,
|
|
"step": 401
|
|
},
|
|
{
|
|
"epoch": 0.4446902654867257,
|
|
"grad_norm": 13.272000312805176,
|
|
"learning_rate": 3.077508971055623e-07,
|
|
"logits/chosen": -1.1015625,
|
|
"logits/rejected": -1.171875,
|
|
"logps/chosen": -246.5,
|
|
"logps/rejected": -295.0,
|
|
"loss": 0.2771,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 1.2265625,
|
|
"rewards/margins": 2.90625,
|
|
"rewards/rejected": -1.6796875,
|
|
"step": 402
|
|
},
|
|
{
|
|
"epoch": 0.44579646017699115,
|
|
"grad_norm": 13.017451286315918,
|
|
"learning_rate": 3.0687820405630736e-07,
|
|
"logits/chosen": -1.31640625,
|
|
"logits/rejected": -1.2265625,
|
|
"logps/chosen": -258.5,
|
|
"logps/rejected": -286.0,
|
|
"loss": 0.2997,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 1.3125,
|
|
"rewards/margins": 2.8125,
|
|
"rewards/rejected": -1.5,
|
|
"step": 403
|
|
},
|
|
{
|
|
"epoch": 0.4469026548672566,
|
|
"grad_norm": 11.719470024108887,
|
|
"learning_rate": 3.060047794689649e-07,
|
|
"logits/chosen": -1.24609375,
|
|
"logits/rejected": -1.171875,
|
|
"logps/chosen": -246.0,
|
|
"logps/rejected": -252.0,
|
|
"loss": 0.273,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 1.154296875,
|
|
"rewards/margins": 2.7109375,
|
|
"rewards/rejected": -1.55078125,
|
|
"step": 404
|
|
},
|
|
{
|
|
"epoch": 0.4480088495575221,
|
|
"grad_norm": 12.74482250213623,
|
|
"learning_rate": 3.0513063457707106e-07,
|
|
"logits/chosen": -1.32421875,
|
|
"logits/rejected": -1.17578125,
|
|
"logps/chosen": -238.5,
|
|
"logps/rejected": -227.0,
|
|
"loss": 0.3567,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.908203125,
|
|
"rewards/margins": 2.234375,
|
|
"rewards/rejected": -1.32421875,
|
|
"step": 405
|
|
},
|
|
{
|
|
"epoch": 0.4491150442477876,
|
|
"grad_norm": 14.130414962768555,
|
|
"learning_rate": 3.0425578062342577e-07,
|
|
"logits/chosen": -1.1484375,
|
|
"logits/rejected": -1.18359375,
|
|
"logps/chosen": -241.5,
|
|
"logps/rejected": -268.0,
|
|
"loss": 0.3743,
|
|
"rewards/accuracies": 0.71875,
|
|
"rewards/chosen": 1.017578125,
|
|
"rewards/margins": 2.203125,
|
|
"rewards/rejected": -1.18359375,
|
|
"step": 406
|
|
},
|
|
{
|
|
"epoch": 0.4502212389380531,
|
|
"grad_norm": 15.725412368774414,
|
|
"learning_rate": 3.03380228859949e-07,
|
|
"logits/chosen": -1.29296875,
|
|
"logits/rejected": -1.13671875,
|
|
"logps/chosen": -271.5,
|
|
"logps/rejected": -291.0,
|
|
"loss": 0.3421,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 1.140625,
|
|
"rewards/margins": 2.4140625,
|
|
"rewards/rejected": -1.26953125,
|
|
"step": 407
|
|
},
|
|
{
|
|
"epoch": 0.45132743362831856,
|
|
"grad_norm": 13.073963165283203,
|
|
"learning_rate": 3.0250399054753526e-07,
|
|
"logits/chosen": -1.19921875,
|
|
"logits/rejected": -1.1328125,
|
|
"logps/chosen": -271.0,
|
|
"logps/rejected": -265.0,
|
|
"loss": 0.3024,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 1.078125,
|
|
"rewards/margins": 2.515625,
|
|
"rewards/rejected": -1.4375,
|
|
"step": 408
|
|
},
|
|
{
|
|
"epoch": 0.4524336283185841,
|
|
"grad_norm": 12.850948333740234,
|
|
"learning_rate": 3.016270769559093e-07,
|
|
"logits/chosen": -1.203125,
|
|
"logits/rejected": -1.06640625,
|
|
"logps/chosen": -258.0,
|
|
"logps/rejected": -275.0,
|
|
"loss": 0.3189,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 1.041015625,
|
|
"rewards/margins": 2.234375,
|
|
"rewards/rejected": -1.1953125,
|
|
"step": 409
|
|
},
|
|
{
|
|
"epoch": 0.45353982300884954,
|
|
"grad_norm": 13.47533130645752,
|
|
"learning_rate": 3.007494993634808e-07,
|
|
"logits/chosen": -1.2578125,
|
|
"logits/rejected": -1.16015625,
|
|
"logps/chosen": -259.0,
|
|
"logps/rejected": -269.0,
|
|
"loss": 0.3222,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 1.125,
|
|
"rewards/margins": 2.53125,
|
|
"rewards/rejected": -1.41015625,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 0.45464601769911506,
|
|
"grad_norm": 13.043135643005371,
|
|
"learning_rate": 2.9987126905719965e-07,
|
|
"logits/chosen": -1.20703125,
|
|
"logits/rejected": -1.1484375,
|
|
"logps/chosen": -265.5,
|
|
"logps/rejected": -272.5,
|
|
"loss": 0.3374,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 0.9140625,
|
|
"rewards/margins": 2.3125,
|
|
"rewards/rejected": -1.40234375,
|
|
"step": 411
|
|
},
|
|
{
|
|
"epoch": 0.4557522123893805,
|
|
"grad_norm": 14.49729061126709,
|
|
"learning_rate": 2.989923973324105e-07,
|
|
"logits/chosen": -1.20703125,
|
|
"logits/rejected": -1.08984375,
|
|
"logps/chosen": -252.5,
|
|
"logps/rejected": -281.0,
|
|
"loss": 0.3668,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 1.09765625,
|
|
"rewards/margins": 2.265625,
|
|
"rewards/rejected": -1.1640625,
|
|
"step": 412
|
|
},
|
|
{
|
|
"epoch": 0.45685840707964603,
|
|
"grad_norm": 15.509222030639648,
|
|
"learning_rate": 2.9811289549270745e-07,
|
|
"logits/chosen": -1.33203125,
|
|
"logits/rejected": -1.2109375,
|
|
"logps/chosen": -250.5,
|
|
"logps/rejected": -286.0,
|
|
"loss": 0.3665,
|
|
"rewards/accuracies": 0.7421875,
|
|
"rewards/chosen": 1.203125,
|
|
"rewards/margins": 2.5,
|
|
"rewards/rejected": -1.2890625,
|
|
"step": 413
|
|
},
|
|
{
|
|
"epoch": 0.4579646017699115,
|
|
"grad_norm": 13.909936904907227,
|
|
"learning_rate": 2.9723277484978917e-07,
|
|
"logits/chosen": -1.1796875,
|
|
"logits/rejected": -1.2109375,
|
|
"logps/chosen": -270.0,
|
|
"logps/rejected": -291.0,
|
|
"loss": 0.2915,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 1.17578125,
|
|
"rewards/margins": 2.765625,
|
|
"rewards/rejected": -1.58984375,
|
|
"step": 414
|
|
},
|
|
{
|
|
"epoch": 0.459070796460177,
|
|
"grad_norm": 13.325665473937988,
|
|
"learning_rate": 2.963520467233127e-07,
|
|
"logits/chosen": -1.45703125,
|
|
"logits/rejected": -1.19140625,
|
|
"logps/chosen": -252.0,
|
|
"logps/rejected": -262.5,
|
|
"loss": 0.3212,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 1.07421875,
|
|
"rewards/margins": 2.4140625,
|
|
"rewards/rejected": -1.34375,
|
|
"step": 415
|
|
},
|
|
{
|
|
"epoch": 0.46017699115044247,
|
|
"grad_norm": 15.239510536193848,
|
|
"learning_rate": 2.954707224407485e-07,
|
|
"logits/chosen": -1.30078125,
|
|
"logits/rejected": -1.24609375,
|
|
"logps/chosen": -261.5,
|
|
"logps/rejected": -285.0,
|
|
"loss": 0.3534,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 1.0625,
|
|
"rewards/margins": 2.3046875,
|
|
"rewards/rejected": -1.2421875,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 0.461283185840708,
|
|
"grad_norm": 15.735715866088867,
|
|
"learning_rate": 2.945888133372343e-07,
|
|
"logits/chosen": -1.21484375,
|
|
"logits/rejected": -1.12890625,
|
|
"logps/chosen": -287.0,
|
|
"logps/rejected": -288.0,
|
|
"loss": 0.3967,
|
|
"rewards/accuracies": 0.7265625,
|
|
"rewards/chosen": 0.88671875,
|
|
"rewards/margins": 2.125,
|
|
"rewards/rejected": -1.23828125,
|
|
"step": 417
|
|
},
|
|
{
|
|
"epoch": 0.46238938053097345,
|
|
"grad_norm": 15.247976303100586,
|
|
"learning_rate": 2.937063307554295e-07,
|
|
"logits/chosen": -1.32421875,
|
|
"logits/rejected": -1.21484375,
|
|
"logps/chosen": -226.0,
|
|
"logps/rejected": -250.0,
|
|
"loss": 0.3726,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 1.12890625,
|
|
"rewards/margins": 2.375,
|
|
"rewards/rejected": -1.24609375,
|
|
"step": 418
|
|
},
|
|
{
|
|
"epoch": 0.46349557522123896,
|
|
"grad_norm": 12.819127082824707,
|
|
"learning_rate": 2.9282328604536937e-07,
|
|
"logits/chosen": -1.26953125,
|
|
"logits/rejected": -1.2109375,
|
|
"logps/chosen": -249.5,
|
|
"logps/rejected": -271.0,
|
|
"loss": 0.3065,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 1.14453125,
|
|
"rewards/margins": 2.578125,
|
|
"rewards/rejected": -1.4296875,
|
|
"step": 419
|
|
},
|
|
{
|
|
"epoch": 0.4646017699115044,
|
|
"grad_norm": 13.217004776000977,
|
|
"learning_rate": 2.9193969056431907e-07,
|
|
"logits/chosen": -1.19921875,
|
|
"logits/rejected": -1.15234375,
|
|
"logps/chosen": -254.5,
|
|
"logps/rejected": -270.0,
|
|
"loss": 0.3139,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 1.09765625,
|
|
"rewards/margins": 2.78125,
|
|
"rewards/rejected": -1.68359375,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 0.4657079646017699,
|
|
"grad_norm": 14.80079174041748,
|
|
"learning_rate": 2.910555556766272e-07,
|
|
"logits/chosen": -1.4375,
|
|
"logits/rejected": -1.234375,
|
|
"logps/chosen": -226.5,
|
|
"logps/rejected": -263.0,
|
|
"loss": 0.3987,
|
|
"rewards/accuracies": 0.7265625,
|
|
"rewards/chosen": 0.94140625,
|
|
"rewards/margins": 2.12109375,
|
|
"rewards/rejected": -1.17578125,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 0.4668141592920354,
|
|
"grad_norm": 13.704545974731445,
|
|
"learning_rate": 2.9017089275358014e-07,
|
|
"logits/chosen": -1.1953125,
|
|
"logits/rejected": -1.1328125,
|
|
"logps/chosen": -271.0,
|
|
"logps/rejected": -287.0,
|
|
"loss": 0.3016,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 0.9609375,
|
|
"rewards/margins": 2.546875,
|
|
"rewards/rejected": -1.5859375,
|
|
"step": 422
|
|
},
|
|
{
|
|
"epoch": 0.46792035398230086,
|
|
"grad_norm": 14.536904335021973,
|
|
"learning_rate": 2.8928571317325564e-07,
|
|
"logits/chosen": -1.25,
|
|
"logits/rejected": -1.0703125,
|
|
"logps/chosen": -279.0,
|
|
"logps/rejected": -291.0,
|
|
"loss": 0.3234,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 1.19140625,
|
|
"rewards/margins": 2.6171875,
|
|
"rewards/rejected": -1.4296875,
|
|
"step": 423
|
|
},
|
|
{
|
|
"epoch": 0.4690265486725664,
|
|
"grad_norm": 15.45283317565918,
|
|
"learning_rate": 2.8840002832037625e-07,
|
|
"logits/chosen": -1.28515625,
|
|
"logits/rejected": -1.1875,
|
|
"logps/chosen": -261.0,
|
|
"logps/rejected": -283.0,
|
|
"loss": 0.365,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 1.171875,
|
|
"rewards/margins": 2.234375,
|
|
"rewards/rejected": -1.0703125,
|
|
"step": 424
|
|
},
|
|
{
|
|
"epoch": 0.47013274336283184,
|
|
"grad_norm": 14.761160850524902,
|
|
"learning_rate": 2.8751384958616316e-07,
|
|
"logits/chosen": -1.20703125,
|
|
"logits/rejected": -1.1484375,
|
|
"logps/chosen": -257.0,
|
|
"logps/rejected": -285.0,
|
|
"loss": 0.3295,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 1.2578125,
|
|
"rewards/margins": 2.9296875,
|
|
"rewards/rejected": -1.671875,
|
|
"step": 425
|
|
},
|
|
{
|
|
"epoch": 0.47123893805309736,
|
|
"grad_norm": 14.022229194641113,
|
|
"learning_rate": 2.8662718836818964e-07,
|
|
"logits/chosen": -1.3359375,
|
|
"logits/rejected": -1.1796875,
|
|
"logps/chosen": -249.5,
|
|
"logps/rejected": -275.0,
|
|
"loss": 0.3165,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 0.984375,
|
|
"rewards/margins": 2.53125,
|
|
"rewards/rejected": -1.546875,
|
|
"step": 426
|
|
},
|
|
{
|
|
"epoch": 0.4723451327433628,
|
|
"grad_norm": 13.972189903259277,
|
|
"learning_rate": 2.8574005607023444e-07,
|
|
"logits/chosen": -1.328125,
|
|
"logits/rejected": -1.15234375,
|
|
"logps/chosen": -253.0,
|
|
"logps/rejected": -286.0,
|
|
"loss": 0.3595,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 1.17578125,
|
|
"rewards/margins": 2.3359375,
|
|
"rewards/rejected": -1.16015625,
|
|
"step": 427
|
|
},
|
|
{
|
|
"epoch": 0.47345132743362833,
|
|
"grad_norm": 13.316848754882812,
|
|
"learning_rate": 2.848524641021349e-07,
|
|
"logits/chosen": -1.2734375,
|
|
"logits/rejected": -1.15625,
|
|
"logps/chosen": -279.0,
|
|
"logps/rejected": -304.0,
|
|
"loss": 0.2876,
|
|
"rewards/accuracies": 0.8828125,
|
|
"rewards/chosen": 1.14453125,
|
|
"rewards/margins": 2.734375,
|
|
"rewards/rejected": -1.5859375,
|
|
"step": 428
|
|
},
|
|
{
|
|
"epoch": 0.4745575221238938,
|
|
"grad_norm": 15.443883895874023,
|
|
"learning_rate": 2.839644238796407e-07,
|
|
"logits/chosen": -1.24609375,
|
|
"logits/rejected": -1.14453125,
|
|
"logps/chosen": -279.0,
|
|
"logps/rejected": -291.0,
|
|
"loss": 0.3446,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 1.017578125,
|
|
"rewards/margins": 2.453125,
|
|
"rewards/rejected": -1.4375,
|
|
"step": 429
|
|
},
|
|
{
|
|
"epoch": 0.4756637168141593,
|
|
"grad_norm": 14.372305870056152,
|
|
"learning_rate": 2.8307594682426637e-07,
|
|
"logits/chosen": -1.2109375,
|
|
"logits/rejected": -1.1484375,
|
|
"logps/chosen": -260.5,
|
|
"logps/rejected": -309.0,
|
|
"loss": 0.2813,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 1.0,
|
|
"rewards/margins": 2.9609375,
|
|
"rewards/rejected": -1.95703125,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 0.47676991150442477,
|
|
"grad_norm": 13.688916206359863,
|
|
"learning_rate": 2.8218704436314524e-07,
|
|
"logits/chosen": -1.46875,
|
|
"logits/rejected": -1.22265625,
|
|
"logps/chosen": -253.5,
|
|
"logps/rejected": -276.0,
|
|
"loss": 0.341,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.798828125,
|
|
"rewards/margins": 2.171875,
|
|
"rewards/rejected": -1.37109375,
|
|
"step": 431
|
|
},
|
|
{
|
|
"epoch": 0.4778761061946903,
|
|
"grad_norm": 12.372815132141113,
|
|
"learning_rate": 2.8129772792888145e-07,
|
|
"logits/chosen": -1.29296875,
|
|
"logits/rejected": -1.1171875,
|
|
"logps/chosen": -235.0,
|
|
"logps/rejected": -281.0,
|
|
"loss": 0.2966,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.869140625,
|
|
"rewards/margins": 2.6328125,
|
|
"rewards/rejected": -1.76171875,
|
|
"step": 432
|
|
},
|
|
{
|
|
"epoch": 0.47898230088495575,
|
|
"grad_norm": 15.202491760253906,
|
|
"learning_rate": 2.804080089594039e-07,
|
|
"logits/chosen": -1.26953125,
|
|
"logits/rejected": -1.109375,
|
|
"logps/chosen": -260.0,
|
|
"logps/rejected": -263.0,
|
|
"loss": 0.3812,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 0.490234375,
|
|
"rewards/margins": 1.98046875,
|
|
"rewards/rejected": -1.484375,
|
|
"step": 433
|
|
},
|
|
{
|
|
"epoch": 0.48008849557522126,
|
|
"grad_norm": 15.252046585083008,
|
|
"learning_rate": 2.7951789889781845e-07,
|
|
"logits/chosen": -1.25,
|
|
"logits/rejected": -1.125,
|
|
"logps/chosen": -261.0,
|
|
"logps/rejected": -299.0,
|
|
"loss": 0.3649,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 0.919921875,
|
|
"rewards/margins": 2.328125,
|
|
"rewards/rejected": -1.40625,
|
|
"step": 434
|
|
},
|
|
{
|
|
"epoch": 0.4811946902654867,
|
|
"grad_norm": 11.937089920043945,
|
|
"learning_rate": 2.786274091922611e-07,
|
|
"logits/chosen": -1.296875,
|
|
"logits/rejected": -1.1484375,
|
|
"logps/chosen": -257.0,
|
|
"logps/rejected": -279.0,
|
|
"loss": 0.2799,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 0.9375,
|
|
"rewards/margins": 2.703125,
|
|
"rewards/rejected": -1.76953125,
|
|
"step": 435
|
|
},
|
|
{
|
|
"epoch": 0.4823008849557522,
|
|
"grad_norm": 12.86253833770752,
|
|
"learning_rate": 2.7773655129575043e-07,
|
|
"logits/chosen": -1.25,
|
|
"logits/rejected": -1.11328125,
|
|
"logps/chosen": -237.5,
|
|
"logps/rejected": -266.5,
|
|
"loss": 0.3076,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.802734375,
|
|
"rewards/margins": 2.75,
|
|
"rewards/rejected": -1.953125,
|
|
"step": 436
|
|
},
|
|
{
|
|
"epoch": 0.4834070796460177,
|
|
"grad_norm": 12.546619415283203,
|
|
"learning_rate": 2.7684533666604076e-07,
|
|
"logits/chosen": -1.3828125,
|
|
"logits/rejected": -1.05078125,
|
|
"logps/chosen": -253.5,
|
|
"logps/rejected": -257.0,
|
|
"loss": 0.3184,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 0.689453125,
|
|
"rewards/margins": 2.3984375,
|
|
"rewards/rejected": -1.71484375,
|
|
"step": 437
|
|
},
|
|
{
|
|
"epoch": 0.48451327433628316,
|
|
"grad_norm": 18.16977310180664,
|
|
"learning_rate": 2.759537767654744e-07,
|
|
"logits/chosen": -1.1796875,
|
|
"logits/rejected": -1.1796875,
|
|
"logps/chosen": -274.0,
|
|
"logps/rejected": -294.0,
|
|
"loss": 0.387,
|
|
"rewards/accuracies": 0.734375,
|
|
"rewards/chosen": 0.615234375,
|
|
"rewards/margins": 2.3203125,
|
|
"rewards/rejected": -1.703125,
|
|
"step": 438
|
|
},
|
|
{
|
|
"epoch": 0.4856194690265487,
|
|
"grad_norm": 12.303600311279297,
|
|
"learning_rate": 2.750618830608343e-07,
|
|
"logits/chosen": -1.3203125,
|
|
"logits/rejected": -1.06640625,
|
|
"logps/chosen": -235.0,
|
|
"logps/rejected": -242.5,
|
|
"loss": 0.2887,
|
|
"rewards/accuracies": 0.859375,
|
|
"rewards/chosen": 0.74609375,
|
|
"rewards/margins": 2.5546875,
|
|
"rewards/rejected": -1.8125,
|
|
"step": 439
|
|
},
|
|
{
|
|
"epoch": 0.48672566371681414,
|
|
"grad_norm": 13.665416717529297,
|
|
"learning_rate": 2.7416966702319683e-07,
|
|
"logits/chosen": -1.203125,
|
|
"logits/rejected": -1.1171875,
|
|
"logps/chosen": -283.5,
|
|
"logps/rejected": -304.0,
|
|
"loss": 0.2974,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 0.880859375,
|
|
"rewards/margins": 2.6328125,
|
|
"rewards/rejected": -1.75390625,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 0.48783185840707965,
|
|
"grad_norm": 14.621048927307129,
|
|
"learning_rate": 2.732771401277838e-07,
|
|
"logits/chosen": -1.21484375,
|
|
"logits/rejected": -1.109375,
|
|
"logps/chosen": -266.5,
|
|
"logps/rejected": -264.5,
|
|
"loss": 0.3651,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 0.4580078125,
|
|
"rewards/margins": 2.140625,
|
|
"rewards/rejected": -1.68359375,
|
|
"step": 441
|
|
},
|
|
{
|
|
"epoch": 0.4889380530973451,
|
|
"grad_norm": 12.872169494628906,
|
|
"learning_rate": 2.7238431385381523e-07,
|
|
"logits/chosen": -1.25,
|
|
"logits/rejected": -1.1484375,
|
|
"logps/chosen": -245.5,
|
|
"logps/rejected": -279.0,
|
|
"loss": 0.3244,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.765625,
|
|
"rewards/margins": 2.640625,
|
|
"rewards/rejected": -1.875,
|
|
"step": 442
|
|
},
|
|
{
|
|
"epoch": 0.49004424778761063,
|
|
"grad_norm": 13.446981430053711,
|
|
"learning_rate": 2.714911996843616e-07,
|
|
"logits/chosen": -1.23828125,
|
|
"logits/rejected": -1.14453125,
|
|
"logps/chosen": -263.0,
|
|
"logps/rejected": -300.0,
|
|
"loss": 0.3075,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.671875,
|
|
"rewards/margins": 2.5703125,
|
|
"rewards/rejected": -1.90234375,
|
|
"step": 443
|
|
},
|
|
{
|
|
"epoch": 0.4911504424778761,
|
|
"grad_norm": 14.347339630126953,
|
|
"learning_rate": 2.7059780910619617e-07,
|
|
"logits/chosen": -1.26953125,
|
|
"logits/rejected": -1.08203125,
|
|
"logps/chosen": -275.0,
|
|
"logps/rejected": -310.0,
|
|
"loss": 0.3042,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 1.0625,
|
|
"rewards/margins": 2.9140625,
|
|
"rewards/rejected": -1.84765625,
|
|
"step": 444
|
|
},
|
|
{
|
|
"epoch": 0.4922566371681416,
|
|
"grad_norm": 14.27387523651123,
|
|
"learning_rate": 2.6970415360964716e-07,
|
|
"logits/chosen": -1.27734375,
|
|
"logits/rejected": -1.2109375,
|
|
"logps/chosen": -237.0,
|
|
"logps/rejected": -258.0,
|
|
"loss": 0.3354,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.98046875,
|
|
"rewards/margins": 2.7421875,
|
|
"rewards/rejected": -1.76171875,
|
|
"step": 445
|
|
},
|
|
{
|
|
"epoch": 0.49336283185840707,
|
|
"grad_norm": 13.943259239196777,
|
|
"learning_rate": 2.6881024468845e-07,
|
|
"logits/chosen": -1.15234375,
|
|
"logits/rejected": -1.16796875,
|
|
"logps/chosen": -247.5,
|
|
"logps/rejected": -275.5,
|
|
"loss": 0.3356,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 0.69140625,
|
|
"rewards/margins": 2.734375,
|
|
"rewards/rejected": -2.046875,
|
|
"step": 446
|
|
},
|
|
{
|
|
"epoch": 0.4944690265486726,
|
|
"grad_norm": 15.396841049194336,
|
|
"learning_rate": 2.679160938395997e-07,
|
|
"logits/chosen": -1.28515625,
|
|
"logits/rejected": -1.203125,
|
|
"logps/chosen": -251.0,
|
|
"logps/rejected": -283.0,
|
|
"loss": 0.3342,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 0.9921875,
|
|
"rewards/margins": 2.6796875,
|
|
"rewards/rejected": -1.68359375,
|
|
"step": 447
|
|
},
|
|
{
|
|
"epoch": 0.49557522123893805,
|
|
"grad_norm": 12.922630310058594,
|
|
"learning_rate": 2.670217125632027e-07,
|
|
"logits/chosen": -1.21875,
|
|
"logits/rejected": -1.17578125,
|
|
"logps/chosen": -248.5,
|
|
"logps/rejected": -258.5,
|
|
"loss": 0.3361,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 0.529296875,
|
|
"rewards/margins": 2.1875,
|
|
"rewards/rejected": -1.66015625,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 0.49668141592920356,
|
|
"grad_norm": 18.911989212036133,
|
|
"learning_rate": 2.661271123623291e-07,
|
|
"logits/chosen": -1.3125,
|
|
"logits/rejected": -1.19921875,
|
|
"logps/chosen": -288.0,
|
|
"logps/rejected": -278.0,
|
|
"loss": 0.4185,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 0.599609375,
|
|
"rewards/margins": 2.078125,
|
|
"rewards/rejected": -1.4765625,
|
|
"step": 449
|
|
},
|
|
{
|
|
"epoch": 0.497787610619469,
|
|
"grad_norm": 16.120946884155273,
|
|
"learning_rate": 2.652323047428646e-07,
|
|
"logits/chosen": -1.21484375,
|
|
"logits/rejected": -1.109375,
|
|
"logps/chosen": -279.0,
|
|
"logps/rejected": -303.0,
|
|
"loss": 0.363,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 0.9609375,
|
|
"rewards/margins": 2.609375,
|
|
"rewards/rejected": -1.65625,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.497787610619469,
|
|
"eval_logits/chosen": -1.2554415464401245,
|
|
"eval_logits/rejected": -1.1580379009246826,
|
|
"eval_logps/chosen": -252.30845642089844,
|
|
"eval_logps/rejected": -276.3631896972656,
|
|
"eval_loss": 0.3314497768878937,
|
|
"eval_rewards/accuracies": 0.8052030205726624,
|
|
"eval_rewards/chosen": 0.85384601354599,
|
|
"eval_rewards/margins": 2.567397356033325,
|
|
"eval_rewards/rejected": -1.7136777639389038,
|
|
"eval_runtime": 193.0141,
|
|
"eval_samples_per_second": 66.591,
|
|
"eval_steps_per_second": 1.041,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 0.49889380530973454,
|
|
"grad_norm": 13.481291770935059,
|
|
"learning_rate": 2.6433730121336283e-07,
|
|
"logits/chosen": -1.3671875,
|
|
"logits/rejected": -1.234375,
|
|
"logps/chosen": -241.5,
|
|
"logps/rejected": -278.0,
|
|
"loss": 0.3044,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 0.791015625,
|
|
"rewards/margins": 2.7578125,
|
|
"rewards/rejected": -1.9765625,
|
|
"step": 451
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"grad_norm": 15.87637996673584,
|
|
"learning_rate": 2.6344211328489696e-07,
|
|
"logits/chosen": -1.2734375,
|
|
"logits/rejected": -1.1953125,
|
|
"logps/chosen": -269.0,
|
|
"logps/rejected": -291.0,
|
|
"loss": 0.3646,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 0.68359375,
|
|
"rewards/margins": 2.28515625,
|
|
"rewards/rejected": -1.6015625,
|
|
"step": 452
|
|
},
|
|
{
|
|
"epoch": 0.5011061946902655,
|
|
"grad_norm": 12.020176887512207,
|
|
"learning_rate": 2.625467524709118e-07,
|
|
"logits/chosen": -1.2265625,
|
|
"logits/rejected": -1.10546875,
|
|
"logps/chosen": -255.0,
|
|
"logps/rejected": -283.0,
|
|
"loss": 0.2739,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 0.91796875,
|
|
"rewards/margins": 2.8828125,
|
|
"rewards/rejected": -1.96484375,
|
|
"step": 453
|
|
},
|
|
{
|
|
"epoch": 0.5022123893805309,
|
|
"grad_norm": 13.10580825805664,
|
|
"learning_rate": 2.616512302870757e-07,
|
|
"logits/chosen": -1.23046875,
|
|
"logits/rejected": -1.109375,
|
|
"logps/chosen": -280.0,
|
|
"logps/rejected": -286.0,
|
|
"loss": 0.33,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.7685546875,
|
|
"rewards/margins": 2.3203125,
|
|
"rewards/rejected": -1.55078125,
|
|
"step": 454
|
|
},
|
|
{
|
|
"epoch": 0.5033185840707964,
|
|
"grad_norm": 15.729186058044434,
|
|
"learning_rate": 2.607555582511326e-07,
|
|
"logits/chosen": -1.3828125,
|
|
"logits/rejected": -1.140625,
|
|
"logps/chosen": -289.0,
|
|
"logps/rejected": -285.0,
|
|
"loss": 0.3862,
|
|
"rewards/accuracies": 0.734375,
|
|
"rewards/chosen": 0.671875,
|
|
"rewards/margins": 2.1796875,
|
|
"rewards/rejected": -1.50390625,
|
|
"step": 455
|
|
},
|
|
{
|
|
"epoch": 0.504424778761062,
|
|
"grad_norm": 13.986113548278809,
|
|
"learning_rate": 2.5985974788275374e-07,
|
|
"logits/chosen": -1.3828125,
|
|
"logits/rejected": -1.203125,
|
|
"logps/chosen": -230.5,
|
|
"logps/rejected": -267.0,
|
|
"loss": 0.3423,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 1.1640625,
|
|
"rewards/margins": 2.8515625,
|
|
"rewards/rejected": -1.6875,
|
|
"step": 456
|
|
},
|
|
{
|
|
"epoch": 0.5055309734513275,
|
|
"grad_norm": 14.71418285369873,
|
|
"learning_rate": 2.5896381070338933e-07,
|
|
"logits/chosen": -1.2890625,
|
|
"logits/rejected": -1.16796875,
|
|
"logps/chosen": -274.0,
|
|
"logps/rejected": -273.0,
|
|
"loss": 0.3394,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 0.775390625,
|
|
"rewards/margins": 2.19921875,
|
|
"rewards/rejected": -1.421875,
|
|
"step": 457
|
|
},
|
|
{
|
|
"epoch": 0.5066371681415929,
|
|
"grad_norm": 13.852214813232422,
|
|
"learning_rate": 2.5806775823612076e-07,
|
|
"logits/chosen": -1.34375,
|
|
"logits/rejected": -1.203125,
|
|
"logps/chosen": -244.0,
|
|
"logps/rejected": -284.0,
|
|
"loss": 0.3206,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.888671875,
|
|
"rewards/margins": 2.5078125,
|
|
"rewards/rejected": -1.62109375,
|
|
"step": 458
|
|
},
|
|
{
|
|
"epoch": 0.5077433628318584,
|
|
"grad_norm": 13.243755340576172,
|
|
"learning_rate": 2.5717160200551213e-07,
|
|
"logits/chosen": -1.2421875,
|
|
"logits/rejected": -1.0546875,
|
|
"logps/chosen": -242.0,
|
|
"logps/rejected": -268.0,
|
|
"loss": 0.3353,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.626953125,
|
|
"rewards/margins": 2.453125,
|
|
"rewards/rejected": -1.828125,
|
|
"step": 459
|
|
},
|
|
{
|
|
"epoch": 0.5088495575221239,
|
|
"grad_norm": 13.583708763122559,
|
|
"learning_rate": 2.562753535374621e-07,
|
|
"logits/chosen": -1.25,
|
|
"logits/rejected": -1.1015625,
|
|
"logps/chosen": -244.5,
|
|
"logps/rejected": -266.5,
|
|
"loss": 0.3068,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 0.912109375,
|
|
"rewards/margins": 2.515625,
|
|
"rewards/rejected": -1.6015625,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 0.5099557522123894,
|
|
"grad_norm": 14.627636909484863,
|
|
"learning_rate": 2.553790243590556e-07,
|
|
"logits/chosen": -1.2109375,
|
|
"logits/rejected": -1.21875,
|
|
"logps/chosen": -230.0,
|
|
"logps/rejected": -265.0,
|
|
"loss": 0.3564,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 0.998046875,
|
|
"rewards/margins": 2.3984375,
|
|
"rewards/rejected": -1.40234375,
|
|
"step": 461
|
|
},
|
|
{
|
|
"epoch": 0.5110619469026548,
|
|
"grad_norm": 16.162841796875,
|
|
"learning_rate": 2.5448262599841556e-07,
|
|
"logits/chosen": -1.3984375,
|
|
"logits/rejected": -1.23046875,
|
|
"logps/chosen": -256.5,
|
|
"logps/rejected": -280.0,
|
|
"loss": 0.3297,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 0.83984375,
|
|
"rewards/margins": 2.453125,
|
|
"rewards/rejected": -1.6171875,
|
|
"step": 462
|
|
},
|
|
{
|
|
"epoch": 0.5121681415929203,
|
|
"grad_norm": 13.07744026184082,
|
|
"learning_rate": 2.535861699845549e-07,
|
|
"logits/chosen": -1.203125,
|
|
"logits/rejected": -1.1796875,
|
|
"logps/chosen": -244.5,
|
|
"logps/rejected": -279.0,
|
|
"loss": 0.3268,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 0.818359375,
|
|
"rewards/margins": 2.5,
|
|
"rewards/rejected": -1.68359375,
|
|
"step": 463
|
|
},
|
|
{
|
|
"epoch": 0.5132743362831859,
|
|
"grad_norm": 13.961365699768066,
|
|
"learning_rate": 2.526896678472279e-07,
|
|
"logits/chosen": -1.265625,
|
|
"logits/rejected": -1.12109375,
|
|
"logps/chosen": -267.0,
|
|
"logps/rejected": -273.0,
|
|
"loss": 0.3112,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.732421875,
|
|
"rewards/margins": 2.671875,
|
|
"rewards/rejected": -1.94140625,
|
|
"step": 464
|
|
},
|
|
{
|
|
"epoch": 0.5143805309734514,
|
|
"grad_norm": 12.809479713439941,
|
|
"learning_rate": 2.51793131116782e-07,
|
|
"logits/chosen": -1.328125,
|
|
"logits/rejected": -1.15625,
|
|
"logps/chosen": -223.0,
|
|
"logps/rejected": -244.5,
|
|
"loss": 0.3162,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 0.744140625,
|
|
"rewards/margins": 2.7109375,
|
|
"rewards/rejected": -1.9765625,
|
|
"step": 465
|
|
},
|
|
{
|
|
"epoch": 0.5154867256637168,
|
|
"grad_norm": 12.547861099243164,
|
|
"learning_rate": 2.5089657132400964e-07,
|
|
"logits/chosen": -1.14453125,
|
|
"logits/rejected": -1.08984375,
|
|
"logps/chosen": -261.0,
|
|
"logps/rejected": -273.0,
|
|
"loss": 0.2895,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 0.85546875,
|
|
"rewards/margins": 2.6796875,
|
|
"rewards/rejected": -1.828125,
|
|
"step": 466
|
|
},
|
|
{
|
|
"epoch": 0.5165929203539823,
|
|
"grad_norm": 13.852119445800781,
|
|
"learning_rate": 2.5e-07,
|
|
"logits/chosen": -1.22265625,
|
|
"logits/rejected": -1.18359375,
|
|
"logps/chosen": -256.5,
|
|
"logps/rejected": -294.0,
|
|
"loss": 0.3104,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.8046875,
|
|
"rewards/margins": 2.6875,
|
|
"rewards/rejected": -1.8828125,
|
|
"step": 467
|
|
},
|
|
{
|
|
"epoch": 0.5176991150442478,
|
|
"grad_norm": 12.272007942199707,
|
|
"learning_rate": 2.491034286759903e-07,
|
|
"logits/chosen": -1.26953125,
|
|
"logits/rejected": -1.14453125,
|
|
"logps/chosen": -254.0,
|
|
"logps/rejected": -284.0,
|
|
"loss": 0.2965,
|
|
"rewards/accuracies": 0.890625,
|
|
"rewards/chosen": 0.87109375,
|
|
"rewards/margins": 2.75,
|
|
"rewards/rejected": -1.87109375,
|
|
"step": 468
|
|
},
|
|
{
|
|
"epoch": 0.5188053097345132,
|
|
"grad_norm": 13.289767265319824,
|
|
"learning_rate": 2.482068688832181e-07,
|
|
"logits/chosen": -1.26171875,
|
|
"logits/rejected": -1.13671875,
|
|
"logps/chosen": -236.5,
|
|
"logps/rejected": -258.0,
|
|
"loss": 0.3045,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.90234375,
|
|
"rewards/margins": 2.734375,
|
|
"rewards/rejected": -1.82421875,
|
|
"step": 469
|
|
},
|
|
{
|
|
"epoch": 0.5199115044247787,
|
|
"grad_norm": 13.52807903289795,
|
|
"learning_rate": 2.4731033215277213e-07,
|
|
"logits/chosen": -1.2578125,
|
|
"logits/rejected": -1.17578125,
|
|
"logps/chosen": -251.5,
|
|
"logps/rejected": -285.0,
|
|
"loss": 0.3189,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 0.828125,
|
|
"rewards/margins": 2.640625,
|
|
"rewards/rejected": -1.8203125,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 0.5210176991150443,
|
|
"grad_norm": 14.39229679107666,
|
|
"learning_rate": 2.464138300154451e-07,
|
|
"logits/chosen": -1.3515625,
|
|
"logits/rejected": -1.1640625,
|
|
"logps/chosen": -254.5,
|
|
"logps/rejected": -280.0,
|
|
"loss": 0.3246,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 0.787109375,
|
|
"rewards/margins": 2.6015625,
|
|
"rewards/rejected": -1.81640625,
|
|
"step": 471
|
|
},
|
|
{
|
|
"epoch": 0.5221238938053098,
|
|
"grad_norm": 14.51116943359375,
|
|
"learning_rate": 2.455173740015845e-07,
|
|
"logits/chosen": -1.30859375,
|
|
"logits/rejected": -1.12109375,
|
|
"logps/chosen": -246.0,
|
|
"logps/rejected": -269.5,
|
|
"loss": 0.3957,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": 0.65234375,
|
|
"rewards/margins": 2.35546875,
|
|
"rewards/rejected": -1.70703125,
|
|
"step": 472
|
|
},
|
|
{
|
|
"epoch": 0.5232300884955752,
|
|
"grad_norm": 14.468950271606445,
|
|
"learning_rate": 2.4462097564094445e-07,
|
|
"logits/chosen": -1.3125,
|
|
"logits/rejected": -1.2421875,
|
|
"logps/chosen": -250.0,
|
|
"logps/rejected": -296.0,
|
|
"loss": 0.3396,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.703125,
|
|
"rewards/margins": 2.546875,
|
|
"rewards/rejected": -1.83984375,
|
|
"step": 473
|
|
},
|
|
{
|
|
"epoch": 0.5243362831858407,
|
|
"grad_norm": 12.03284740447998,
|
|
"learning_rate": 2.4372464646253794e-07,
|
|
"logits/chosen": -1.1953125,
|
|
"logits/rejected": -1.09765625,
|
|
"logps/chosen": -255.0,
|
|
"logps/rejected": -277.0,
|
|
"loss": 0.266,
|
|
"rewards/accuracies": 0.859375,
|
|
"rewards/chosen": 1.017578125,
|
|
"rewards/margins": 2.96875,
|
|
"rewards/rejected": -1.9453125,
|
|
"step": 474
|
|
},
|
|
{
|
|
"epoch": 0.5254424778761062,
|
|
"grad_norm": 19.379676818847656,
|
|
"learning_rate": 2.4282839799448785e-07,
|
|
"logits/chosen": -1.22265625,
|
|
"logits/rejected": -1.17578125,
|
|
"logps/chosen": -277.0,
|
|
"logps/rejected": -316.0,
|
|
"loss": 0.3512,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.763671875,
|
|
"rewards/margins": 2.7578125,
|
|
"rewards/rejected": -1.99609375,
|
|
"step": 475
|
|
},
|
|
{
|
|
"epoch": 0.5265486725663717,
|
|
"grad_norm": 13.170727729797363,
|
|
"learning_rate": 2.419322417638792e-07,
|
|
"logits/chosen": -1.27734375,
|
|
"logits/rejected": -1.1171875,
|
|
"logps/chosen": -255.5,
|
|
"logps/rejected": -274.0,
|
|
"loss": 0.3296,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.4990234375,
|
|
"rewards/margins": 2.421875,
|
|
"rewards/rejected": -1.9296875,
|
|
"step": 476
|
|
},
|
|
{
|
|
"epoch": 0.5276548672566371,
|
|
"grad_norm": 10.943991661071777,
|
|
"learning_rate": 2.410361892966107e-07,
|
|
"logits/chosen": -1.3046875,
|
|
"logits/rejected": -1.11328125,
|
|
"logps/chosen": -223.5,
|
|
"logps/rejected": -244.0,
|
|
"loss": 0.2629,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 0.806640625,
|
|
"rewards/margins": 3.1171875,
|
|
"rewards/rejected": -2.3125,
|
|
"step": 477
|
|
},
|
|
{
|
|
"epoch": 0.5287610619469026,
|
|
"grad_norm": 14.212424278259277,
|
|
"learning_rate": 2.401402521172463e-07,
|
|
"logits/chosen": -1.20703125,
|
|
"logits/rejected": -1.2109375,
|
|
"logps/chosen": -249.5,
|
|
"logps/rejected": -274.0,
|
|
"loss": 0.3509,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 0.65625,
|
|
"rewards/margins": 2.4140625,
|
|
"rewards/rejected": -1.75,
|
|
"step": 478
|
|
},
|
|
{
|
|
"epoch": 0.5298672566371682,
|
|
"grad_norm": 11.761503219604492,
|
|
"learning_rate": 2.392444417488673e-07,
|
|
"logits/chosen": -1.41796875,
|
|
"logits/rejected": -1.234375,
|
|
"logps/chosen": -234.5,
|
|
"logps/rejected": -278.0,
|
|
"loss": 0.2504,
|
|
"rewards/accuracies": 0.8671875,
|
|
"rewards/chosen": 1.00390625,
|
|
"rewards/margins": 2.953125,
|
|
"rewards/rejected": -1.953125,
|
|
"step": 479
|
|
},
|
|
{
|
|
"epoch": 0.5309734513274337,
|
|
"grad_norm": 15.108154296875,
|
|
"learning_rate": 2.3834876971292433e-07,
|
|
"logits/chosen": -1.2421875,
|
|
"logits/rejected": -1.2265625,
|
|
"logps/chosen": -285.0,
|
|
"logps/rejected": -303.0,
|
|
"loss": 0.3124,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.97265625,
|
|
"rewards/margins": 3.0546875,
|
|
"rewards/rejected": -2.0859375,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 0.5320796460176991,
|
|
"grad_norm": 13.407039642333984,
|
|
"learning_rate": 2.3745324752908822e-07,
|
|
"logits/chosen": -1.20703125,
|
|
"logits/rejected": -1.1796875,
|
|
"logps/chosen": -253.0,
|
|
"logps/rejected": -283.0,
|
|
"loss": 0.2827,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.830078125,
|
|
"rewards/margins": 2.71875,
|
|
"rewards/rejected": -1.890625,
|
|
"step": 481
|
|
},
|
|
{
|
|
"epoch": 0.5331858407079646,
|
|
"grad_norm": 13.764703750610352,
|
|
"learning_rate": 2.365578867151031e-07,
|
|
"logits/chosen": -1.27734375,
|
|
"logits/rejected": -1.1640625,
|
|
"logps/chosen": -249.0,
|
|
"logps/rejected": -260.0,
|
|
"loss": 0.3393,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.607421875,
|
|
"rewards/margins": 2.8359375,
|
|
"rewards/rejected": -2.2265625,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 0.5342920353982301,
|
|
"grad_norm": 14.601144790649414,
|
|
"learning_rate": 2.3566269878663714e-07,
|
|
"logits/chosen": -1.30078125,
|
|
"logits/rejected": -1.2109375,
|
|
"logps/chosen": -264.5,
|
|
"logps/rejected": -291.0,
|
|
"loss": 0.3486,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 0.587890625,
|
|
"rewards/margins": 2.3046875,
|
|
"rewards/rejected": -1.71484375,
|
|
"step": 483
|
|
},
|
|
{
|
|
"epoch": 0.5353982300884956,
|
|
"grad_norm": 13.12956714630127,
|
|
"learning_rate": 2.347676952571354e-07,
|
|
"logits/chosen": -1.26953125,
|
|
"logits/rejected": -1.16015625,
|
|
"logps/chosen": -218.0,
|
|
"logps/rejected": -243.0,
|
|
"loss": 0.3522,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.751953125,
|
|
"rewards/margins": 2.7265625,
|
|
"rewards/rejected": -1.9765625,
|
|
"step": 484
|
|
},
|
|
{
|
|
"epoch": 0.536504424778761,
|
|
"grad_norm": 13.433536529541016,
|
|
"learning_rate": 2.3387288763767095e-07,
|
|
"logits/chosen": -1.22265625,
|
|
"logits/rejected": -1.0703125,
|
|
"logps/chosen": -266.0,
|
|
"logps/rejected": -267.0,
|
|
"loss": 0.3058,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 0.6484375,
|
|
"rewards/margins": 2.7734375,
|
|
"rewards/rejected": -2.125,
|
|
"step": 485
|
|
},
|
|
{
|
|
"epoch": 0.5376106194690266,
|
|
"grad_norm": 13.798343658447266,
|
|
"learning_rate": 2.329782874367973e-07,
|
|
"logits/chosen": -1.25,
|
|
"logits/rejected": -1.125,
|
|
"logps/chosen": -250.5,
|
|
"logps/rejected": -260.0,
|
|
"loss": 0.2997,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.568359375,
|
|
"rewards/margins": 2.703125,
|
|
"rewards/rejected": -2.125,
|
|
"step": 486
|
|
},
|
|
{
|
|
"epoch": 0.5387168141592921,
|
|
"grad_norm": 13.606473922729492,
|
|
"learning_rate": 2.3208390616040025e-07,
|
|
"logits/chosen": -1.234375,
|
|
"logits/rejected": -1.20703125,
|
|
"logps/chosen": -265.5,
|
|
"logps/rejected": -323.0,
|
|
"loss": 0.3473,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 0.7421875,
|
|
"rewards/margins": 2.4609375,
|
|
"rewards/rejected": -1.71875,
|
|
"step": 487
|
|
},
|
|
{
|
|
"epoch": 0.5398230088495575,
|
|
"grad_norm": 14.441394805908203,
|
|
"learning_rate": 2.3118975531155003e-07,
|
|
"logits/chosen": -1.22265625,
|
|
"logits/rejected": -1.14453125,
|
|
"logps/chosen": -257.5,
|
|
"logps/rejected": -281.0,
|
|
"loss": 0.3566,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 0.501953125,
|
|
"rewards/margins": 2.21875,
|
|
"rewards/rejected": -1.71875,
|
|
"step": 488
|
|
},
|
|
{
|
|
"epoch": 0.540929203539823,
|
|
"grad_norm": 13.99435043334961,
|
|
"learning_rate": 2.3029584639035284e-07,
|
|
"logits/chosen": -1.24609375,
|
|
"logits/rejected": -1.16796875,
|
|
"logps/chosen": -251.0,
|
|
"logps/rejected": -293.0,
|
|
"loss": 0.3419,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 0.701171875,
|
|
"rewards/margins": 2.5234375,
|
|
"rewards/rejected": -1.828125,
|
|
"step": 489
|
|
},
|
|
{
|
|
"epoch": 0.5420353982300885,
|
|
"grad_norm": 12.558284759521484,
|
|
"learning_rate": 2.294021908938039e-07,
|
|
"logits/chosen": -1.26953125,
|
|
"logits/rejected": -1.05859375,
|
|
"logps/chosen": -243.0,
|
|
"logps/rejected": -249.5,
|
|
"loss": 0.2931,
|
|
"rewards/accuracies": 0.859375,
|
|
"rewards/chosen": 0.7470703125,
|
|
"rewards/margins": 2.9140625,
|
|
"rewards/rejected": -2.1640625,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 0.543141592920354,
|
|
"grad_norm": 13.195667266845703,
|
|
"learning_rate": 2.285088003156384e-07,
|
|
"logits/chosen": -1.20703125,
|
|
"logits/rejected": -1.0625,
|
|
"logps/chosen": -268.0,
|
|
"logps/rejected": -308.0,
|
|
"loss": 0.33,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 0.669921875,
|
|
"rewards/margins": 2.5234375,
|
|
"rewards/rejected": -1.8515625,
|
|
"step": 491
|
|
},
|
|
{
|
|
"epoch": 0.5442477876106194,
|
|
"grad_norm": 13.966536521911621,
|
|
"learning_rate": 2.2761568614618472e-07,
|
|
"logits/chosen": -1.33203125,
|
|
"logits/rejected": -1.3203125,
|
|
"logps/chosen": -250.5,
|
|
"logps/rejected": -266.0,
|
|
"loss": 0.3732,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 0.763671875,
|
|
"rewards/margins": 2.3203125,
|
|
"rewards/rejected": -1.5546875,
|
|
"step": 492
|
|
},
|
|
{
|
|
"epoch": 0.5453539823008849,
|
|
"grad_norm": 13.840253829956055,
|
|
"learning_rate": 2.2672285987221625e-07,
|
|
"logits/chosen": -1.3359375,
|
|
"logits/rejected": -1.16015625,
|
|
"logps/chosen": -263.0,
|
|
"logps/rejected": -279.5,
|
|
"loss": 0.3326,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 0.62109375,
|
|
"rewards/margins": 2.4140625,
|
|
"rewards/rejected": -1.7890625,
|
|
"step": 493
|
|
},
|
|
{
|
|
"epoch": 0.5464601769911505,
|
|
"grad_norm": 13.471453666687012,
|
|
"learning_rate": 2.2583033297680315e-07,
|
|
"logits/chosen": -1.203125,
|
|
"logits/rejected": -1.06640625,
|
|
"logps/chosen": -274.0,
|
|
"logps/rejected": -307.0,
|
|
"loss": 0.3214,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 0.6025390625,
|
|
"rewards/margins": 2.6953125,
|
|
"rewards/rejected": -2.08984375,
|
|
"step": 494
|
|
},
|
|
{
|
|
"epoch": 0.547566371681416,
|
|
"grad_norm": 13.170902252197266,
|
|
"learning_rate": 2.2493811693916567e-07,
|
|
"logits/chosen": -1.3984375,
|
|
"logits/rejected": -1.13671875,
|
|
"logps/chosen": -255.5,
|
|
"logps/rejected": -285.0,
|
|
"loss": 0.2704,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 0.822265625,
|
|
"rewards/margins": 2.7421875,
|
|
"rewards/rejected": -1.91796875,
|
|
"step": 495
|
|
},
|
|
{
|
|
"epoch": 0.5486725663716814,
|
|
"grad_norm": 14.177343368530273,
|
|
"learning_rate": 2.2404622323452562e-07,
|
|
"logits/chosen": -1.30078125,
|
|
"logits/rejected": -1.171875,
|
|
"logps/chosen": -238.0,
|
|
"logps/rejected": -284.0,
|
|
"loss": 0.3352,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.916015625,
|
|
"rewards/margins": 2.515625,
|
|
"rewards/rejected": -1.59765625,
|
|
"step": 496
|
|
},
|
|
{
|
|
"epoch": 0.5497787610619469,
|
|
"grad_norm": 10.760467529296875,
|
|
"learning_rate": 2.2315466333395924e-07,
|
|
"logits/chosen": -1.21484375,
|
|
"logits/rejected": -1.19921875,
|
|
"logps/chosen": -222.5,
|
|
"logps/rejected": -285.0,
|
|
"loss": 0.2274,
|
|
"rewards/accuracies": 0.8828125,
|
|
"rewards/chosen": 1.1875,
|
|
"rewards/margins": 3.3515625,
|
|
"rewards/rejected": -2.1640625,
|
|
"step": 497
|
|
},
|
|
{
|
|
"epoch": 0.5508849557522124,
|
|
"grad_norm": 15.023584365844727,
|
|
"learning_rate": 2.222634487042496e-07,
|
|
"logits/chosen": -1.296875,
|
|
"logits/rejected": -1.2109375,
|
|
"logps/chosen": -256.5,
|
|
"logps/rejected": -279.5,
|
|
"loss": 0.3246,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 0.76953125,
|
|
"rewards/margins": 2.515625,
|
|
"rewards/rejected": -1.75,
|
|
"step": 498
|
|
},
|
|
{
|
|
"epoch": 0.5519911504424779,
|
|
"grad_norm": 12.802349090576172,
|
|
"learning_rate": 2.2137259080773896e-07,
|
|
"logits/chosen": -1.28515625,
|
|
"logits/rejected": -1.21875,
|
|
"logps/chosen": -246.5,
|
|
"logps/rejected": -256.0,
|
|
"loss": 0.2918,
|
|
"rewards/accuracies": 0.8828125,
|
|
"rewards/chosen": 0.83203125,
|
|
"rewards/margins": 2.8203125,
|
|
"rewards/rejected": -1.98828125,
|
|
"step": 499
|
|
},
|
|
{
|
|
"epoch": 0.5530973451327433,
|
|
"grad_norm": 13.720869064331055,
|
|
"learning_rate": 2.204821011021815e-07,
|
|
"logits/chosen": -1.19921875,
|
|
"logits/rejected": -1.09765625,
|
|
"logps/chosen": -242.0,
|
|
"logps/rejected": -278.5,
|
|
"loss": 0.3394,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.6025390625,
|
|
"rewards/margins": 2.28125,
|
|
"rewards/rejected": -1.6796875,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.5530973451327433,
|
|
"eval_logits/chosen": -1.2517879009246826,
|
|
"eval_logits/rejected": -1.1513915061950684,
|
|
"eval_logps/chosen": -252.49253845214844,
|
|
"eval_logps/rejected": -277.5074768066406,
|
|
"eval_loss": 0.3277411162853241,
|
|
"eval_rewards/accuracies": 0.8062752485275269,
|
|
"eval_rewards/chosen": 0.83104008436203,
|
|
"eval_rewards/margins": 2.661613702774048,
|
|
"eval_rewards/rejected": -1.8305736780166626,
|
|
"eval_runtime": 193.0734,
|
|
"eval_samples_per_second": 66.571,
|
|
"eval_steps_per_second": 1.041,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.5542035398230089,
|
|
"grad_norm": 13.983589172363281,
|
|
"learning_rate": 2.195919910405961e-07,
|
|
"logits/chosen": -1.1875,
|
|
"logits/rejected": -1.0546875,
|
|
"logps/chosen": -243.5,
|
|
"logps/rejected": -268.0,
|
|
"loss": 0.3415,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 0.810546875,
|
|
"rewards/margins": 2.7109375,
|
|
"rewards/rejected": -1.89453125,
|
|
"step": 501
|
|
},
|
|
{
|
|
"epoch": 0.5553097345132744,
|
|
"grad_norm": 13.887030601501465,
|
|
"learning_rate": 2.1870227207111853e-07,
|
|
"logits/chosen": -1.2578125,
|
|
"logits/rejected": -1.0703125,
|
|
"logps/chosen": -271.0,
|
|
"logps/rejected": -282.0,
|
|
"loss": 0.3074,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.68359375,
|
|
"rewards/margins": 2.65625,
|
|
"rewards/rejected": -1.9765625,
|
|
"step": 502
|
|
},
|
|
{
|
|
"epoch": 0.5564159292035398,
|
|
"grad_norm": 13.680130004882812,
|
|
"learning_rate": 2.1781295563685476e-07,
|
|
"logits/chosen": -1.15625,
|
|
"logits/rejected": -1.009765625,
|
|
"logps/chosen": -280.0,
|
|
"logps/rejected": -288.0,
|
|
"loss": 0.3024,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 0.8046875,
|
|
"rewards/margins": 2.8828125,
|
|
"rewards/rejected": -2.0859375,
|
|
"step": 503
|
|
},
|
|
{
|
|
"epoch": 0.5575221238938053,
|
|
"grad_norm": 14.351888656616211,
|
|
"learning_rate": 2.1692405317573366e-07,
|
|
"logits/chosen": -1.28515625,
|
|
"logits/rejected": -1.16796875,
|
|
"logps/chosen": -257.0,
|
|
"logps/rejected": -259.5,
|
|
"loss": 0.3655,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 0.689453125,
|
|
"rewards/margins": 2.515625,
|
|
"rewards/rejected": -1.82421875,
|
|
"step": 504
|
|
},
|
|
{
|
|
"epoch": 0.5586283185840708,
|
|
"grad_norm": 13.825343132019043,
|
|
"learning_rate": 2.1603557612035932e-07,
|
|
"logits/chosen": -1.1796875,
|
|
"logits/rejected": -1.1875,
|
|
"logps/chosen": -274.0,
|
|
"logps/rejected": -302.0,
|
|
"loss": 0.2957,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 1.046875,
|
|
"rewards/margins": 2.78125,
|
|
"rewards/rejected": -1.73828125,
|
|
"step": 505
|
|
},
|
|
{
|
|
"epoch": 0.5597345132743363,
|
|
"grad_norm": 15.617433547973633,
|
|
"learning_rate": 2.1514753589786516e-07,
|
|
"logits/chosen": -1.25,
|
|
"logits/rejected": -1.1484375,
|
|
"logps/chosen": -257.5,
|
|
"logps/rejected": -283.0,
|
|
"loss": 0.3659,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.775390625,
|
|
"rewards/margins": 2.359375,
|
|
"rewards/rejected": -1.578125,
|
|
"step": 506
|
|
},
|
|
{
|
|
"epoch": 0.5608407079646017,
|
|
"grad_norm": 12.907238960266113,
|
|
"learning_rate": 2.1425994392976559e-07,
|
|
"logits/chosen": -1.22265625,
|
|
"logits/rejected": -1.1875,
|
|
"logps/chosen": -253.5,
|
|
"logps/rejected": -284.0,
|
|
"loss": 0.3268,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.8828125,
|
|
"rewards/margins": 2.875,
|
|
"rewards/rejected": -1.98828125,
|
|
"step": 507
|
|
},
|
|
{
|
|
"epoch": 0.5619469026548672,
|
|
"grad_norm": 12.990286827087402,
|
|
"learning_rate": 2.1337281163181034e-07,
|
|
"logits/chosen": -1.31640625,
|
|
"logits/rejected": -1.16015625,
|
|
"logps/chosen": -280.0,
|
|
"logps/rejected": -278.0,
|
|
"loss": 0.2744,
|
|
"rewards/accuracies": 0.859375,
|
|
"rewards/chosen": 0.853515625,
|
|
"rewards/margins": 2.7265625,
|
|
"rewards/rejected": -1.875,
|
|
"step": 508
|
|
},
|
|
{
|
|
"epoch": 0.5630530973451328,
|
|
"grad_norm": 10.47897720336914,
|
|
"learning_rate": 2.1248615041383682e-07,
|
|
"logits/chosen": -1.2421875,
|
|
"logits/rejected": -1.1328125,
|
|
"logps/chosen": -234.5,
|
|
"logps/rejected": -279.5,
|
|
"loss": 0.2341,
|
|
"rewards/accuracies": 0.875,
|
|
"rewards/chosen": 0.822265625,
|
|
"rewards/margins": 3.046875,
|
|
"rewards/rejected": -2.21875,
|
|
"step": 509
|
|
},
|
|
{
|
|
"epoch": 0.5641592920353983,
|
|
"grad_norm": 12.620576858520508,
|
|
"learning_rate": 2.1159997167962378e-07,
|
|
"logits/chosen": -1.2421875,
|
|
"logits/rejected": -1.078125,
|
|
"logps/chosen": -229.5,
|
|
"logps/rejected": -265.0,
|
|
"loss": 0.3217,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.779296875,
|
|
"rewards/margins": 2.5625,
|
|
"rewards/rejected": -1.78515625,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 0.5652654867256637,
|
|
"grad_norm": 14.882222175598145,
|
|
"learning_rate": 2.1071428682674436e-07,
|
|
"logits/chosen": -1.234375,
|
|
"logits/rejected": -1.140625,
|
|
"logps/chosen": -258.0,
|
|
"logps/rejected": -293.0,
|
|
"loss": 0.3461,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 1.03125,
|
|
"rewards/margins": 2.65625,
|
|
"rewards/rejected": -1.625,
|
|
"step": 511
|
|
},
|
|
{
|
|
"epoch": 0.5663716814159292,
|
|
"grad_norm": 14.687088966369629,
|
|
"learning_rate": 2.098291072464199e-07,
|
|
"logits/chosen": -1.234375,
|
|
"logits/rejected": -1.21875,
|
|
"logps/chosen": -250.5,
|
|
"logps/rejected": -307.0,
|
|
"loss": 0.3347,
|
|
"rewards/accuracies": 0.734375,
|
|
"rewards/chosen": 1.025390625,
|
|
"rewards/margins": 2.8203125,
|
|
"rewards/rejected": -1.796875,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 0.5674778761061947,
|
|
"grad_norm": 13.261859893798828,
|
|
"learning_rate": 2.0894444432337282e-07,
|
|
"logits/chosen": -1.41796875,
|
|
"logits/rejected": -1.18359375,
|
|
"logps/chosen": -252.5,
|
|
"logps/rejected": -263.0,
|
|
"loss": 0.2804,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 0.830078125,
|
|
"rewards/margins": 2.796875,
|
|
"rewards/rejected": -1.96875,
|
|
"step": 513
|
|
},
|
|
{
|
|
"epoch": 0.5685840707964602,
|
|
"grad_norm": 15.263360977172852,
|
|
"learning_rate": 2.08060309435681e-07,
|
|
"logits/chosen": -1.359375,
|
|
"logits/rejected": -1.21484375,
|
|
"logps/chosen": -267.0,
|
|
"logps/rejected": -302.0,
|
|
"loss": 0.3238,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 0.931640625,
|
|
"rewards/margins": 2.7734375,
|
|
"rewards/rejected": -1.84375,
|
|
"step": 514
|
|
},
|
|
{
|
|
"epoch": 0.5696902654867256,
|
|
"grad_norm": 13.30583667755127,
|
|
"learning_rate": 2.071767139546306e-07,
|
|
"logits/chosen": -1.27734375,
|
|
"logits/rejected": -1.09375,
|
|
"logps/chosen": -253.5,
|
|
"logps/rejected": -301.0,
|
|
"loss": 0.3201,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 0.810546875,
|
|
"rewards/margins": 2.609375,
|
|
"rewards/rejected": -1.796875,
|
|
"step": 515
|
|
},
|
|
{
|
|
"epoch": 0.5707964601769911,
|
|
"grad_norm": 13.454291343688965,
|
|
"learning_rate": 2.062936692445705e-07,
|
|
"logits/chosen": -1.2578125,
|
|
"logits/rejected": -1.125,
|
|
"logps/chosen": -244.0,
|
|
"logps/rejected": -285.0,
|
|
"loss": 0.3014,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 0.8046875,
|
|
"rewards/margins": 2.6953125,
|
|
"rewards/rejected": -1.8984375,
|
|
"step": 516
|
|
},
|
|
{
|
|
"epoch": 0.5719026548672567,
|
|
"grad_norm": 14.328228950500488,
|
|
"learning_rate": 2.0541118666276577e-07,
|
|
"logits/chosen": -1.25,
|
|
"logits/rejected": -1.17578125,
|
|
"logps/chosen": -261.0,
|
|
"logps/rejected": -314.0,
|
|
"loss": 0.331,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 0.90625,
|
|
"rewards/margins": 2.609375,
|
|
"rewards/rejected": -1.70703125,
|
|
"step": 517
|
|
},
|
|
{
|
|
"epoch": 0.5730088495575221,
|
|
"grad_norm": 15.810956001281738,
|
|
"learning_rate": 2.045292775592515e-07,
|
|
"logits/chosen": -1.25,
|
|
"logits/rejected": -1.1875,
|
|
"logps/chosen": -258.5,
|
|
"logps/rejected": -287.0,
|
|
"loss": 0.3654,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 0.828125,
|
|
"rewards/margins": 2.765625,
|
|
"rewards/rejected": -1.9375,
|
|
"step": 518
|
|
},
|
|
{
|
|
"epoch": 0.5741150442477876,
|
|
"grad_norm": 14.267369270324707,
|
|
"learning_rate": 2.0364795327668722e-07,
|
|
"logits/chosen": -1.27734375,
|
|
"logits/rejected": -1.078125,
|
|
"logps/chosen": -290.0,
|
|
"logps/rejected": -282.0,
|
|
"loss": 0.3298,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.5419921875,
|
|
"rewards/margins": 2.453125,
|
|
"rewards/rejected": -1.9140625,
|
|
"step": 519
|
|
},
|
|
{
|
|
"epoch": 0.5752212389380531,
|
|
"grad_norm": 17.044023513793945,
|
|
"learning_rate": 2.0276722515021084e-07,
|
|
"logits/chosen": -1.3125,
|
|
"logits/rejected": -1.15625,
|
|
"logps/chosen": -254.5,
|
|
"logps/rejected": -288.0,
|
|
"loss": 0.4207,
|
|
"rewards/accuracies": 0.7421875,
|
|
"rewards/chosen": 0.66015625,
|
|
"rewards/margins": 2.12890625,
|
|
"rewards/rejected": -1.46875,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 0.5763274336283186,
|
|
"grad_norm": 14.049108505249023,
|
|
"learning_rate": 2.0188710450729253e-07,
|
|
"logits/chosen": -1.19140625,
|
|
"logits/rejected": -1.21875,
|
|
"logps/chosen": -235.5,
|
|
"logps/rejected": -283.0,
|
|
"loss": 0.3075,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 1.232421875,
|
|
"rewards/margins": 3.1640625,
|
|
"rewards/rejected": -1.9375,
|
|
"step": 521
|
|
},
|
|
{
|
|
"epoch": 0.577433628318584,
|
|
"grad_norm": 14.81714153289795,
|
|
"learning_rate": 2.0100760266758953e-07,
|
|
"logits/chosen": -1.26953125,
|
|
"logits/rejected": -1.08984375,
|
|
"logps/chosen": -253.5,
|
|
"logps/rejected": -244.5,
|
|
"loss": 0.3601,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 0.3740234375,
|
|
"rewards/margins": 2.3671875,
|
|
"rewards/rejected": -2.0,
|
|
"step": 522
|
|
},
|
|
{
|
|
"epoch": 0.5785398230088495,
|
|
"grad_norm": 12.647814750671387,
|
|
"learning_rate": 2.0012873094280032e-07,
|
|
"logits/chosen": -1.2421875,
|
|
"logits/rejected": -1.140625,
|
|
"logps/chosen": -254.5,
|
|
"logps/rejected": -297.0,
|
|
"loss": 0.2831,
|
|
"rewards/accuracies": 0.875,
|
|
"rewards/chosen": 1.060546875,
|
|
"rewards/margins": 3.125,
|
|
"rewards/rejected": -2.0625,
|
|
"step": 523
|
|
},
|
|
{
|
|
"epoch": 0.5796460176991151,
|
|
"grad_norm": 13.28409481048584,
|
|
"learning_rate": 1.992505006365191e-07,
|
|
"logits/chosen": -1.1484375,
|
|
"logits/rejected": -1.11328125,
|
|
"logps/chosen": -268.0,
|
|
"logps/rejected": -295.0,
|
|
"loss": 0.2986,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.8828125,
|
|
"rewards/margins": 2.859375,
|
|
"rewards/rejected": -1.96875,
|
|
"step": 524
|
|
},
|
|
{
|
|
"epoch": 0.5807522123893806,
|
|
"grad_norm": 12.828714370727539,
|
|
"learning_rate": 1.983729230440907e-07,
|
|
"logits/chosen": -1.25390625,
|
|
"logits/rejected": -1.26171875,
|
|
"logps/chosen": -241.5,
|
|
"logps/rejected": -292.0,
|
|
"loss": 0.2867,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 1.021484375,
|
|
"rewards/margins": 3.0703125,
|
|
"rewards/rejected": -2.0546875,
|
|
"step": 525
|
|
},
|
|
{
|
|
"epoch": 0.581858407079646,
|
|
"grad_norm": 13.270194053649902,
|
|
"learning_rate": 1.974960094524647e-07,
|
|
"logits/chosen": -1.1875,
|
|
"logits/rejected": -1.13671875,
|
|
"logps/chosen": -252.5,
|
|
"logps/rejected": -284.0,
|
|
"loss": 0.3036,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 0.837890625,
|
|
"rewards/margins": 2.71875,
|
|
"rewards/rejected": -1.87890625,
|
|
"step": 526
|
|
},
|
|
{
|
|
"epoch": 0.5829646017699115,
|
|
"grad_norm": 14.76196575164795,
|
|
"learning_rate": 1.9661977114005095e-07,
|
|
"logits/chosen": -1.2109375,
|
|
"logits/rejected": -1.09375,
|
|
"logps/chosen": -266.0,
|
|
"logps/rejected": -282.0,
|
|
"loss": 0.3643,
|
|
"rewards/accuracies": 0.7265625,
|
|
"rewards/chosen": 0.83984375,
|
|
"rewards/margins": 2.65625,
|
|
"rewards/rejected": -1.81640625,
|
|
"step": 527
|
|
},
|
|
{
|
|
"epoch": 0.584070796460177,
|
|
"grad_norm": 13.863215446472168,
|
|
"learning_rate": 1.9574421937657423e-07,
|
|
"logits/chosen": -1.2265625,
|
|
"logits/rejected": -1.171875,
|
|
"logps/chosen": -261.0,
|
|
"logps/rejected": -295.0,
|
|
"loss": 0.2729,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 1.02734375,
|
|
"rewards/margins": 3.078125,
|
|
"rewards/rejected": -2.0546875,
|
|
"step": 528
|
|
},
|
|
{
|
|
"epoch": 0.5851769911504425,
|
|
"grad_norm": 17.09484100341797,
|
|
"learning_rate": 1.9486936542292897e-07,
|
|
"logits/chosen": -1.2734375,
|
|
"logits/rejected": -1.1484375,
|
|
"logps/chosen": -281.0,
|
|
"logps/rejected": -284.0,
|
|
"loss": 0.4144,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": 0.681640625,
|
|
"rewards/margins": 2.203125,
|
|
"rewards/rejected": -1.5234375,
|
|
"step": 529
|
|
},
|
|
{
|
|
"epoch": 0.5862831858407079,
|
|
"grad_norm": 13.073324203491211,
|
|
"learning_rate": 1.9399522053103512e-07,
|
|
"logits/chosen": -1.22265625,
|
|
"logits/rejected": -1.16015625,
|
|
"logps/chosen": -256.5,
|
|
"logps/rejected": -272.5,
|
|
"loss": 0.3109,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.90234375,
|
|
"rewards/margins": 2.734375,
|
|
"rewards/rejected": -1.83984375,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 0.5873893805309734,
|
|
"grad_norm": 16.40873908996582,
|
|
"learning_rate": 1.9312179594369267e-07,
|
|
"logits/chosen": -1.171875,
|
|
"logits/rejected": -1.1015625,
|
|
"logps/chosen": -270.0,
|
|
"logps/rejected": -298.0,
|
|
"loss": 0.3547,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 0.759765625,
|
|
"rewards/margins": 2.5078125,
|
|
"rewards/rejected": -1.75,
|
|
"step": 531
|
|
},
|
|
{
|
|
"epoch": 0.588495575221239,
|
|
"grad_norm": 13.864864349365234,
|
|
"learning_rate": 1.9224910289443766e-07,
|
|
"logits/chosen": -1.27734375,
|
|
"logits/rejected": -1.13671875,
|
|
"logps/chosen": -233.5,
|
|
"logps/rejected": -259.0,
|
|
"loss": 0.367,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 0.880859375,
|
|
"rewards/margins": 2.4765625,
|
|
"rewards/rejected": -1.6015625,
|
|
"step": 532
|
|
},
|
|
{
|
|
"epoch": 0.5896017699115044,
|
|
"grad_norm": 11.59555721282959,
|
|
"learning_rate": 1.913771526073976e-07,
|
|
"logits/chosen": -1.3203125,
|
|
"logits/rejected": -1.12890625,
|
|
"logps/chosen": -254.0,
|
|
"logps/rejected": -295.0,
|
|
"loss": 0.2546,
|
|
"rewards/accuracies": 0.875,
|
|
"rewards/chosen": 1.154296875,
|
|
"rewards/margins": 2.9140625,
|
|
"rewards/rejected": -1.75390625,
|
|
"step": 533
|
|
},
|
|
{
|
|
"epoch": 0.5907079646017699,
|
|
"grad_norm": 559.9500122070312,
|
|
"learning_rate": 1.9050595629714654e-07,
|
|
"logits/chosen": -1.2265625,
|
|
"logits/rejected": -0.99609375,
|
|
"logps/chosen": -270.0,
|
|
"logps/rejected": -344.0,
|
|
"loss": 0.3657,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 0.978515625,
|
|
"rewards/margins": 2.65625,
|
|
"rewards/rejected": -1.671875,
|
|
"step": 534
|
|
},
|
|
{
|
|
"epoch": 0.5918141592920354,
|
|
"grad_norm": 12.260162353515625,
|
|
"learning_rate": 1.8963552516856158e-07,
|
|
"logits/chosen": -1.2734375,
|
|
"logits/rejected": -1.16015625,
|
|
"logps/chosen": -242.0,
|
|
"logps/rejected": -265.5,
|
|
"loss": 0.2995,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 1.09375,
|
|
"rewards/margins": 3.015625,
|
|
"rewards/rejected": -1.921875,
|
|
"step": 535
|
|
},
|
|
{
|
|
"epoch": 0.5929203539823009,
|
|
"grad_norm": 15.339058876037598,
|
|
"learning_rate": 1.8876587041667852e-07,
|
|
"logits/chosen": -1.27734375,
|
|
"logits/rejected": -1.20703125,
|
|
"logps/chosen": -241.0,
|
|
"logps/rejected": -265.0,
|
|
"loss": 0.3577,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 0.865234375,
|
|
"rewards/margins": 2.3984375,
|
|
"rewards/rejected": -1.52734375,
|
|
"step": 536
|
|
},
|
|
{
|
|
"epoch": 0.5940265486725663,
|
|
"grad_norm": 16.724506378173828,
|
|
"learning_rate": 1.8789700322654747e-07,
|
|
"logits/chosen": -1.23828125,
|
|
"logits/rejected": -1.046875,
|
|
"logps/chosen": -251.0,
|
|
"logps/rejected": -289.0,
|
|
"loss": 0.2921,
|
|
"rewards/accuracies": 0.859375,
|
|
"rewards/chosen": 0.802734375,
|
|
"rewards/margins": 2.90625,
|
|
"rewards/rejected": -2.1015625,
|
|
"step": 537
|
|
},
|
|
{
|
|
"epoch": 0.5951327433628318,
|
|
"grad_norm": 14.673587799072266,
|
|
"learning_rate": 1.8702893477308972e-07,
|
|
"logits/chosen": -1.2265625,
|
|
"logits/rejected": -1.078125,
|
|
"logps/chosen": -253.0,
|
|
"logps/rejected": -261.0,
|
|
"loss": 0.3511,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 0.869140625,
|
|
"rewards/margins": 2.671875,
|
|
"rewards/rejected": -1.8046875,
|
|
"step": 538
|
|
},
|
|
{
|
|
"epoch": 0.5962389380530974,
|
|
"grad_norm": 15.79550552368164,
|
|
"learning_rate": 1.8616167622095324e-07,
|
|
"logits/chosen": -1.203125,
|
|
"logits/rejected": -1.12109375,
|
|
"logps/chosen": -267.0,
|
|
"logps/rejected": -313.0,
|
|
"loss": 0.3384,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 1.021484375,
|
|
"rewards/margins": 2.671875,
|
|
"rewards/rejected": -1.6484375,
|
|
"step": 539
|
|
},
|
|
{
|
|
"epoch": 0.5973451327433629,
|
|
"grad_norm": 14.441572189331055,
|
|
"learning_rate": 1.8529523872436977e-07,
|
|
"logits/chosen": -1.37890625,
|
|
"logits/rejected": -1.1640625,
|
|
"logps/chosen": -249.5,
|
|
"logps/rejected": -272.0,
|
|
"loss": 0.3185,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 0.921875,
|
|
"rewards/margins": 2.671875,
|
|
"rewards/rejected": -1.75,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 0.5984513274336283,
|
|
"grad_norm": 13.769876480102539,
|
|
"learning_rate": 1.8442963342701105e-07,
|
|
"logits/chosen": -1.14453125,
|
|
"logits/rejected": -1.1875,
|
|
"logps/chosen": -277.0,
|
|
"logps/rejected": -275.0,
|
|
"loss": 0.2794,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 0.994140625,
|
|
"rewards/margins": 2.9375,
|
|
"rewards/rejected": -1.9453125,
|
|
"step": 541
|
|
},
|
|
{
|
|
"epoch": 0.5995575221238938,
|
|
"grad_norm": 14.039227485656738,
|
|
"learning_rate": 1.8356487146184516e-07,
|
|
"logits/chosen": -1.26953125,
|
|
"logits/rejected": -1.20703125,
|
|
"logps/chosen": -234.0,
|
|
"logps/rejected": -252.5,
|
|
"loss": 0.3448,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 1.119140625,
|
|
"rewards/margins": 2.78125,
|
|
"rewards/rejected": -1.6640625,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 0.6006637168141593,
|
|
"grad_norm": 12.930294036865234,
|
|
"learning_rate": 1.8270096395099403e-07,
|
|
"logits/chosen": -1.31640625,
|
|
"logits/rejected": -1.171875,
|
|
"logps/chosen": -242.5,
|
|
"logps/rejected": -273.0,
|
|
"loss": 0.298,
|
|
"rewards/accuracies": 0.859375,
|
|
"rewards/chosen": 1.0859375,
|
|
"rewards/margins": 2.703125,
|
|
"rewards/rejected": -1.6171875,
|
|
"step": 543
|
|
},
|
|
{
|
|
"epoch": 0.6017699115044248,
|
|
"grad_norm": 13.82011890411377,
|
|
"learning_rate": 1.8183792200559e-07,
|
|
"logits/chosen": -1.2890625,
|
|
"logits/rejected": -1.1640625,
|
|
"logps/chosen": -256.0,
|
|
"logps/rejected": -287.0,
|
|
"loss": 0.3418,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.943359375,
|
|
"rewards/margins": 2.359375,
|
|
"rewards/rejected": -1.4140625,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 0.6028761061946902,
|
|
"grad_norm": 15.775805473327637,
|
|
"learning_rate": 1.8097575672563275e-07,
|
|
"logits/chosen": -1.34375,
|
|
"logits/rejected": -1.15625,
|
|
"logps/chosen": -249.0,
|
|
"logps/rejected": -275.0,
|
|
"loss": 0.2854,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 1.1875,
|
|
"rewards/margins": 3.1171875,
|
|
"rewards/rejected": -1.92578125,
|
|
"step": 545
|
|
},
|
|
{
|
|
"epoch": 0.6039823008849557,
|
|
"grad_norm": 12.980159759521484,
|
|
"learning_rate": 1.80114479199847e-07,
|
|
"logits/chosen": -1.1171875,
|
|
"logits/rejected": -1.140625,
|
|
"logps/chosen": -263.0,
|
|
"logps/rejected": -277.0,
|
|
"loss": 0.2444,
|
|
"rewards/accuracies": 0.875,
|
|
"rewards/chosen": 1.23046875,
|
|
"rewards/margins": 3.28125,
|
|
"rewards/rejected": -2.0546875,
|
|
"step": 546
|
|
},
|
|
{
|
|
"epoch": 0.6050884955752213,
|
|
"grad_norm": 13.192628860473633,
|
|
"learning_rate": 1.792541005055394e-07,
|
|
"logits/chosen": -1.2578125,
|
|
"logits/rejected": -1.22265625,
|
|
"logps/chosen": -254.0,
|
|
"logps/rejected": -286.0,
|
|
"loss": 0.3065,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 1.138671875,
|
|
"rewards/margins": 2.8984375,
|
|
"rewards/rejected": -1.7578125,
|
|
"step": 547
|
|
},
|
|
{
|
|
"epoch": 0.6061946902654868,
|
|
"grad_norm": 13.52103328704834,
|
|
"learning_rate": 1.783946317084564e-07,
|
|
"logits/chosen": -1.328125,
|
|
"logits/rejected": -1.08984375,
|
|
"logps/chosen": -253.0,
|
|
"logps/rejected": -269.0,
|
|
"loss": 0.2691,
|
|
"rewards/accuracies": 0.8671875,
|
|
"rewards/chosen": 1.01953125,
|
|
"rewards/margins": 2.9765625,
|
|
"rewards/rejected": -1.953125,
|
|
"step": 548
|
|
},
|
|
{
|
|
"epoch": 0.6073008849557522,
|
|
"grad_norm": 12.777830123901367,
|
|
"learning_rate": 1.7753608386264193e-07,
|
|
"logits/chosen": -1.19921875,
|
|
"logits/rejected": -1.20703125,
|
|
"logps/chosen": -225.0,
|
|
"logps/rejected": -274.0,
|
|
"loss": 0.3203,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 1.09765625,
|
|
"rewards/margins": 2.7578125,
|
|
"rewards/rejected": -1.66796875,
|
|
"step": 549
|
|
},
|
|
{
|
|
"epoch": 0.6084070796460177,
|
|
"grad_norm": 13.757856369018555,
|
|
"learning_rate": 1.7667846801029486e-07,
|
|
"logits/chosen": -1.32421875,
|
|
"logits/rejected": -1.13671875,
|
|
"logps/chosen": -264.0,
|
|
"logps/rejected": -278.0,
|
|
"loss": 0.2789,
|
|
"rewards/accuracies": 0.8671875,
|
|
"rewards/chosen": 1.126953125,
|
|
"rewards/margins": 2.640625,
|
|
"rewards/rejected": -1.515625,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.6084070796460177,
|
|
"eval_logits/chosen": -1.262554407119751,
|
|
"eval_logits/rejected": -1.1598647832870483,
|
|
"eval_logps/chosen": -250.73133850097656,
|
|
"eval_logps/rejected": -276.39801025390625,
|
|
"eval_loss": 0.32494351267814636,
|
|
"eval_rewards/accuracies": 0.8066800236701965,
|
|
"eval_rewards/chosen": 1.011232852935791,
|
|
"eval_rewards/margins": 2.7289724349975586,
|
|
"eval_rewards/rejected": -1.717836618423462,
|
|
"eval_runtime": 193.0762,
|
|
"eval_samples_per_second": 66.57,
|
|
"eval_steps_per_second": 1.041,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 0.6095132743362832,
|
|
"grad_norm": 16.458438873291016,
|
|
"learning_rate": 1.758217951816274e-07,
|
|
"logits/chosen": -1.25390625,
|
|
"logits/rejected": -1.15625,
|
|
"logps/chosen": -289.0,
|
|
"logps/rejected": -310.0,
|
|
"loss": 0.3871,
|
|
"rewards/accuracies": 0.7578125,
|
|
"rewards/chosen": 0.57421875,
|
|
"rewards/margins": 2.15625,
|
|
"rewards/rejected": -1.5859375,
|
|
"step": 551
|
|
},
|
|
{
|
|
"epoch": 0.6106194690265486,
|
|
"grad_norm": 15.513572692871094,
|
|
"learning_rate": 1.7496607639472327e-07,
|
|
"logits/chosen": -1.2265625,
|
|
"logits/rejected": -1.19921875,
|
|
"logps/chosen": -242.0,
|
|
"logps/rejected": -270.0,
|
|
"loss": 0.33,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 1.00390625,
|
|
"rewards/margins": 2.9375,
|
|
"rewards/rejected": -1.9375,
|
|
"step": 552
|
|
},
|
|
{
|
|
"epoch": 0.6117256637168141,
|
|
"grad_norm": 15.365250587463379,
|
|
"learning_rate": 1.7411132265539536e-07,
|
|
"logits/chosen": -1.203125,
|
|
"logits/rejected": -1.0625,
|
|
"logps/chosen": -250.0,
|
|
"logps/rejected": -297.0,
|
|
"loss": 0.3456,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.931640625,
|
|
"rewards/margins": 2.546875,
|
|
"rewards/rejected": -1.6171875,
|
|
"step": 553
|
|
},
|
|
{
|
|
"epoch": 0.6128318584070797,
|
|
"grad_norm": 14.896991729736328,
|
|
"learning_rate": 1.7325754495704507e-07,
|
|
"logits/chosen": -1.25390625,
|
|
"logits/rejected": -1.1875,
|
|
"logps/chosen": -267.5,
|
|
"logps/rejected": -315.0,
|
|
"loss": 0.3605,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 0.787109375,
|
|
"rewards/margins": 2.3671875,
|
|
"rewards/rejected": -1.578125,
|
|
"step": 554
|
|
},
|
|
{
|
|
"epoch": 0.6139380530973452,
|
|
"grad_norm": 14.317460060119629,
|
|
"learning_rate": 1.7240475428051997e-07,
|
|
"logits/chosen": -1.34765625,
|
|
"logits/rejected": -1.1328125,
|
|
"logps/chosen": -247.0,
|
|
"logps/rejected": -268.0,
|
|
"loss": 0.3123,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 0.96875,
|
|
"rewards/margins": 2.75,
|
|
"rewards/rejected": -1.78515625,
|
|
"step": 555
|
|
},
|
|
{
|
|
"epoch": 0.6150442477876106,
|
|
"grad_norm": 13.919463157653809,
|
|
"learning_rate": 1.7155296159397356e-07,
|
|
"logits/chosen": -1.22265625,
|
|
"logits/rejected": -1.19140625,
|
|
"logps/chosen": -261.5,
|
|
"logps/rejected": -304.0,
|
|
"loss": 0.3188,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 0.88671875,
|
|
"rewards/margins": 2.7734375,
|
|
"rewards/rejected": -1.890625,
|
|
"step": 556
|
|
},
|
|
{
|
|
"epoch": 0.6161504424778761,
|
|
"grad_norm": 15.724615097045898,
|
|
"learning_rate": 1.707021778527235e-07,
|
|
"logits/chosen": -1.34765625,
|
|
"logits/rejected": -1.1953125,
|
|
"logps/chosen": -278.0,
|
|
"logps/rejected": -298.0,
|
|
"loss": 0.3411,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 1.0625,
|
|
"rewards/margins": 2.5234375,
|
|
"rewards/rejected": -1.4609375,
|
|
"step": 557
|
|
},
|
|
{
|
|
"epoch": 0.6172566371681416,
|
|
"grad_norm": 11.178709983825684,
|
|
"learning_rate": 1.6985241399911082e-07,
|
|
"logits/chosen": -1.41796875,
|
|
"logits/rejected": -1.21484375,
|
|
"logps/chosen": -234.5,
|
|
"logps/rejected": -259.0,
|
|
"loss": 0.2349,
|
|
"rewards/accuracies": 0.859375,
|
|
"rewards/chosen": 0.91015625,
|
|
"rewards/margins": 3.2265625,
|
|
"rewards/rejected": -2.3125,
|
|
"step": 558
|
|
},
|
|
{
|
|
"epoch": 0.6183628318584071,
|
|
"grad_norm": 13.019828796386719,
|
|
"learning_rate": 1.6900368096235931e-07,
|
|
"logits/chosen": -1.328125,
|
|
"logits/rejected": -1.1953125,
|
|
"logps/chosen": -227.5,
|
|
"logps/rejected": -288.0,
|
|
"loss": 0.3063,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 1.208984375,
|
|
"rewards/margins": 3.265625,
|
|
"rewards/rejected": -2.05078125,
|
|
"step": 559
|
|
},
|
|
{
|
|
"epoch": 0.6194690265486725,
|
|
"grad_norm": 15.409528732299805,
|
|
"learning_rate": 1.6815598965843519e-07,
|
|
"logits/chosen": -1.30078125,
|
|
"logits/rejected": -1.15625,
|
|
"logps/chosen": -266.0,
|
|
"logps/rejected": -326.0,
|
|
"loss": 0.2972,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 1.12109375,
|
|
"rewards/margins": 3.1875,
|
|
"rewards/rejected": -2.0703125,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 0.620575221238938,
|
|
"grad_norm": 13.341466903686523,
|
|
"learning_rate": 1.67309350989906e-07,
|
|
"logits/chosen": -1.37109375,
|
|
"logits/rejected": -1.09765625,
|
|
"logps/chosen": -257.0,
|
|
"logps/rejected": -259.5,
|
|
"loss": 0.3021,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.78125,
|
|
"rewards/margins": 2.65625,
|
|
"rewards/rejected": -1.875,
|
|
"step": 561
|
|
},
|
|
{
|
|
"epoch": 0.6216814159292036,
|
|
"grad_norm": 13.608122825622559,
|
|
"learning_rate": 1.664637758458013e-07,
|
|
"logits/chosen": -1.3828125,
|
|
"logits/rejected": -1.078125,
|
|
"logps/chosen": -248.5,
|
|
"logps/rejected": -238.5,
|
|
"loss": 0.3346,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 0.4716796875,
|
|
"rewards/margins": 2.3359375,
|
|
"rewards/rejected": -1.86328125,
|
|
"step": 562
|
|
},
|
|
{
|
|
"epoch": 0.6227876106194691,
|
|
"grad_norm": 14.688355445861816,
|
|
"learning_rate": 1.656192751014717e-07,
|
|
"logits/chosen": -1.2578125,
|
|
"logits/rejected": -1.13671875,
|
|
"logps/chosen": -266.0,
|
|
"logps/rejected": -298.0,
|
|
"loss": 0.3522,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 0.90234375,
|
|
"rewards/margins": 2.578125,
|
|
"rewards/rejected": -1.67578125,
|
|
"step": 563
|
|
},
|
|
{
|
|
"epoch": 0.6238938053097345,
|
|
"grad_norm": 13.295293807983398,
|
|
"learning_rate": 1.647758596184498e-07,
|
|
"logits/chosen": -1.1875,
|
|
"logits/rejected": -1.17578125,
|
|
"logps/chosen": -262.5,
|
|
"logps/rejected": -287.0,
|
|
"loss": 0.3039,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.849609375,
|
|
"rewards/margins": 2.4921875,
|
|
"rewards/rejected": -1.64453125,
|
|
"step": 564
|
|
},
|
|
{
|
|
"epoch": 0.625,
|
|
"grad_norm": 12.697134017944336,
|
|
"learning_rate": 1.6393354024431e-07,
|
|
"logits/chosen": -1.2421875,
|
|
"logits/rejected": -1.05078125,
|
|
"logps/chosen": -257.5,
|
|
"logps/rejected": -265.0,
|
|
"loss": 0.2807,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 0.87109375,
|
|
"rewards/margins": 3.015625,
|
|
"rewards/rejected": -2.14453125,
|
|
"step": 565
|
|
},
|
|
{
|
|
"epoch": 0.6261061946902655,
|
|
"grad_norm": 23.94145965576172,
|
|
"learning_rate": 1.63092327812529e-07,
|
|
"logits/chosen": -1.34375,
|
|
"logits/rejected": -1.2734375,
|
|
"logps/chosen": -246.5,
|
|
"logps/rejected": -232.5,
|
|
"loss": 0.3647,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 0.576171875,
|
|
"rewards/margins": 2.421875,
|
|
"rewards/rejected": -1.84375,
|
|
"step": 566
|
|
},
|
|
{
|
|
"epoch": 0.6272123893805309,
|
|
"grad_norm": 13.088120460510254,
|
|
"learning_rate": 1.622522331423467e-07,
|
|
"logits/chosen": -1.2109375,
|
|
"logits/rejected": -1.1640625,
|
|
"logps/chosen": -261.0,
|
|
"logps/rejected": -307.0,
|
|
"loss": 0.307,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 0.875,
|
|
"rewards/margins": 2.8046875,
|
|
"rewards/rejected": -1.92578125,
|
|
"step": 567
|
|
},
|
|
{
|
|
"epoch": 0.6283185840707964,
|
|
"grad_norm": 16.868358612060547,
|
|
"learning_rate": 1.6141326703862706e-07,
|
|
"logits/chosen": -1.2734375,
|
|
"logits/rejected": -1.24609375,
|
|
"logps/chosen": -260.0,
|
|
"logps/rejected": -292.0,
|
|
"loss": 0.4,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 0.720703125,
|
|
"rewards/margins": 2.5703125,
|
|
"rewards/rejected": -1.8515625,
|
|
"step": 568
|
|
},
|
|
{
|
|
"epoch": 0.629424778761062,
|
|
"grad_norm": 13.88227653503418,
|
|
"learning_rate": 1.605754402917186e-07,
|
|
"logits/chosen": -1.43359375,
|
|
"logits/rejected": -1.28125,
|
|
"logps/chosen": -245.0,
|
|
"logps/rejected": -266.5,
|
|
"loss": 0.2863,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.96484375,
|
|
"rewards/margins": 2.9296875,
|
|
"rewards/rejected": -1.96875,
|
|
"step": 569
|
|
},
|
|
{
|
|
"epoch": 0.6305309734513275,
|
|
"grad_norm": 13.3289794921875,
|
|
"learning_rate": 1.5973876367731651e-07,
|
|
"logits/chosen": -1.35546875,
|
|
"logits/rejected": -1.08203125,
|
|
"logps/chosen": -280.0,
|
|
"logps/rejected": -306.0,
|
|
"loss": 0.2719,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 1.14453125,
|
|
"rewards/margins": 3.078125,
|
|
"rewards/rejected": -1.93359375,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 0.6316371681415929,
|
|
"grad_norm": 13.53636360168457,
|
|
"learning_rate": 1.5890324795632315e-07,
|
|
"logits/chosen": -1.3671875,
|
|
"logits/rejected": -1.28515625,
|
|
"logps/chosen": -223.0,
|
|
"logps/rejected": -262.0,
|
|
"loss": 0.3,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 1.017578125,
|
|
"rewards/margins": 2.90625,
|
|
"rewards/rejected": -1.89453125,
|
|
"step": 571
|
|
},
|
|
{
|
|
"epoch": 0.6327433628318584,
|
|
"grad_norm": 14.343642234802246,
|
|
"learning_rate": 1.5806890387471023e-07,
|
|
"logits/chosen": -1.3359375,
|
|
"logits/rejected": -1.12109375,
|
|
"logps/chosen": -267.0,
|
|
"logps/rejected": -291.0,
|
|
"loss": 0.2824,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 0.9453125,
|
|
"rewards/margins": 2.984375,
|
|
"rewards/rejected": -2.04296875,
|
|
"step": 572
|
|
},
|
|
{
|
|
"epoch": 0.6338495575221239,
|
|
"grad_norm": 13.577574729919434,
|
|
"learning_rate": 1.5723574216338065e-07,
|
|
"logits/chosen": -1.26171875,
|
|
"logits/rejected": -1.17578125,
|
|
"logps/chosen": -272.0,
|
|
"logps/rejected": -276.0,
|
|
"loss": 0.2799,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 0.9296875,
|
|
"rewards/margins": 2.8125,
|
|
"rewards/rejected": -1.8828125,
|
|
"step": 573
|
|
},
|
|
{
|
|
"epoch": 0.6349557522123894,
|
|
"grad_norm": 17.00868797302246,
|
|
"learning_rate": 1.5640377353802985e-07,
|
|
"logits/chosen": -1.3125,
|
|
"logits/rejected": -1.025390625,
|
|
"logps/chosen": -286.0,
|
|
"logps/rejected": -286.0,
|
|
"loss": 0.3574,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 0.73828125,
|
|
"rewards/margins": 2.6015625,
|
|
"rewards/rejected": -1.859375,
|
|
"step": 574
|
|
},
|
|
{
|
|
"epoch": 0.6360619469026548,
|
|
"grad_norm": 14.934076309204102,
|
|
"learning_rate": 1.5557300869900874e-07,
|
|
"logits/chosen": -1.2578125,
|
|
"logits/rejected": -1.09375,
|
|
"logps/chosen": -281.5,
|
|
"logps/rejected": -320.0,
|
|
"loss": 0.347,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 0.7509765625,
|
|
"rewards/margins": 2.47265625,
|
|
"rewards/rejected": -1.7265625,
|
|
"step": 575
|
|
},
|
|
{
|
|
"epoch": 0.6371681415929203,
|
|
"grad_norm": 14.064445495605469,
|
|
"learning_rate": 1.547434583311858e-07,
|
|
"logits/chosen": -1.234375,
|
|
"logits/rejected": -1.06640625,
|
|
"logps/chosen": -262.0,
|
|
"logps/rejected": -262.0,
|
|
"loss": 0.374,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 0.5078125,
|
|
"rewards/margins": 2.16796875,
|
|
"rewards/rejected": -1.65625,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 0.6382743362831859,
|
|
"grad_norm": 14.291051864624023,
|
|
"learning_rate": 1.5391513310380923e-07,
|
|
"logits/chosen": -1.17578125,
|
|
"logits/rejected": -1.1484375,
|
|
"logps/chosen": -264.5,
|
|
"logps/rejected": -322.0,
|
|
"loss": 0.2885,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.8671875,
|
|
"rewards/margins": 2.8359375,
|
|
"rewards/rejected": -1.96484375,
|
|
"step": 577
|
|
},
|
|
{
|
|
"epoch": 0.6393805309734514,
|
|
"grad_norm": 15.690069198608398,
|
|
"learning_rate": 1.5308804367037049e-07,
|
|
"logits/chosen": -1.37890625,
|
|
"logits/rejected": -1.12109375,
|
|
"logps/chosen": -271.0,
|
|
"logps/rejected": -313.0,
|
|
"loss": 0.3193,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.921875,
|
|
"rewards/margins": 2.8125,
|
|
"rewards/rejected": -1.88671875,
|
|
"step": 578
|
|
},
|
|
{
|
|
"epoch": 0.6404867256637168,
|
|
"grad_norm": 14.940479278564453,
|
|
"learning_rate": 1.5226220066846662e-07,
|
|
"logits/chosen": -1.20703125,
|
|
"logits/rejected": -1.12109375,
|
|
"logps/chosen": -277.0,
|
|
"logps/rejected": -313.0,
|
|
"loss": 0.317,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.912109375,
|
|
"rewards/margins": 2.7421875,
|
|
"rewards/rejected": -1.828125,
|
|
"step": 579
|
|
},
|
|
{
|
|
"epoch": 0.6415929203539823,
|
|
"grad_norm": 14.204512596130371,
|
|
"learning_rate": 1.5143761471966387e-07,
|
|
"logits/chosen": -1.25,
|
|
"logits/rejected": -1.1953125,
|
|
"logps/chosen": -267.0,
|
|
"logps/rejected": -296.0,
|
|
"loss": 0.2923,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.935546875,
|
|
"rewards/margins": 2.9453125,
|
|
"rewards/rejected": -2.01953125,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 0.6426991150442478,
|
|
"grad_norm": 12.345739364624023,
|
|
"learning_rate": 1.5061429642936104e-07,
|
|
"logits/chosen": -1.2734375,
|
|
"logits/rejected": -1.15625,
|
|
"logps/chosen": -238.5,
|
|
"logps/rejected": -271.0,
|
|
"loss": 0.2898,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 1.078125,
|
|
"rewards/margins": 3.0390625,
|
|
"rewards/rejected": -1.9609375,
|
|
"step": 581
|
|
},
|
|
{
|
|
"epoch": 0.6438053097345132,
|
|
"grad_norm": 14.108118057250977,
|
|
"learning_rate": 1.497922563866526e-07,
|
|
"logits/chosen": -1.25390625,
|
|
"logits/rejected": -1.25390625,
|
|
"logps/chosen": -225.5,
|
|
"logps/rejected": -276.0,
|
|
"loss": 0.3588,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 1.01171875,
|
|
"rewards/margins": 2.5078125,
|
|
"rewards/rejected": -1.4921875,
|
|
"step": 582
|
|
},
|
|
{
|
|
"epoch": 0.6449115044247787,
|
|
"grad_norm": 15.642598152160645,
|
|
"learning_rate": 1.4897150516419315e-07,
|
|
"logits/chosen": -1.33984375,
|
|
"logits/rejected": -1.09765625,
|
|
"logps/chosen": -262.5,
|
|
"logps/rejected": -281.0,
|
|
"loss": 0.3357,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.962890625,
|
|
"rewards/margins": 2.671875,
|
|
"rewards/rejected": -1.70703125,
|
|
"step": 583
|
|
},
|
|
{
|
|
"epoch": 0.6460176991150443,
|
|
"grad_norm": 13.628485679626465,
|
|
"learning_rate": 1.481520533180611e-07,
|
|
"logits/chosen": -1.2734375,
|
|
"logits/rejected": -1.16796875,
|
|
"logps/chosen": -245.0,
|
|
"logps/rejected": -250.0,
|
|
"loss": 0.2903,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 0.8515625,
|
|
"rewards/margins": 2.8359375,
|
|
"rewards/rejected": -1.9921875,
|
|
"step": 584
|
|
},
|
|
{
|
|
"epoch": 0.6471238938053098,
|
|
"grad_norm": 12.115748405456543,
|
|
"learning_rate": 1.4733391138762275e-07,
|
|
"logits/chosen": -1.265625,
|
|
"logits/rejected": -1.21484375,
|
|
"logps/chosen": -237.0,
|
|
"logps/rejected": -255.5,
|
|
"loss": 0.2511,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 1.16796875,
|
|
"rewards/margins": 3.484375,
|
|
"rewards/rejected": -2.3125,
|
|
"step": 585
|
|
},
|
|
{
|
|
"epoch": 0.6482300884955752,
|
|
"grad_norm": 12.073527336120605,
|
|
"learning_rate": 1.4651708989539733e-07,
|
|
"logits/chosen": -1.3203125,
|
|
"logits/rejected": -1.2265625,
|
|
"logps/chosen": -255.0,
|
|
"logps/rejected": -251.5,
|
|
"loss": 0.27,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 1.025390625,
|
|
"rewards/margins": 2.8671875,
|
|
"rewards/rejected": -1.84375,
|
|
"step": 586
|
|
},
|
|
{
|
|
"epoch": 0.6493362831858407,
|
|
"grad_norm": 15.416234016418457,
|
|
"learning_rate": 1.4570159934692084e-07,
|
|
"logits/chosen": -1.2265625,
|
|
"logits/rejected": -1.234375,
|
|
"logps/chosen": -264.0,
|
|
"logps/rejected": -290.0,
|
|
"loss": 0.4144,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 0.69921875,
|
|
"rewards/margins": 2.203125,
|
|
"rewards/rejected": -1.50390625,
|
|
"step": 587
|
|
},
|
|
{
|
|
"epoch": 0.6504424778761062,
|
|
"grad_norm": 14.182881355285645,
|
|
"learning_rate": 1.448874502306116e-07,
|
|
"logits/chosen": -1.21875,
|
|
"logits/rejected": -1.0859375,
|
|
"logps/chosen": -262.5,
|
|
"logps/rejected": -280.0,
|
|
"loss": 0.3193,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.9921875,
|
|
"rewards/margins": 2.7421875,
|
|
"rewards/rejected": -1.75390625,
|
|
"step": 588
|
|
},
|
|
{
|
|
"epoch": 0.6515486725663717,
|
|
"grad_norm": 14.108832359313965,
|
|
"learning_rate": 1.4407465301763532e-07,
|
|
"logits/chosen": -1.37890625,
|
|
"logits/rejected": -1.21875,
|
|
"logps/chosen": -249.0,
|
|
"logps/rejected": -257.5,
|
|
"loss": 0.355,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.75,
|
|
"rewards/margins": 2.734375,
|
|
"rewards/rejected": -1.98046875,
|
|
"step": 589
|
|
},
|
|
{
|
|
"epoch": 0.6526548672566371,
|
|
"grad_norm": 14.618428230285645,
|
|
"learning_rate": 1.432632181617698e-07,
|
|
"logits/chosen": -1.24609375,
|
|
"logits/rejected": -1.02734375,
|
|
"logps/chosen": -243.0,
|
|
"logps/rejected": -281.0,
|
|
"loss": 0.313,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 1.0078125,
|
|
"rewards/margins": 2.9296875,
|
|
"rewards/rejected": -1.92578125,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 0.6537610619469026,
|
|
"grad_norm": 15.182010650634766,
|
|
"learning_rate": 1.4245315609927112e-07,
|
|
"logits/chosen": -1.2578125,
|
|
"logits/rejected": -1.14453125,
|
|
"logps/chosen": -262.0,
|
|
"logps/rejected": -269.0,
|
|
"loss": 0.3443,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 0.732421875,
|
|
"rewards/margins": 2.6484375,
|
|
"rewards/rejected": -1.91015625,
|
|
"step": 591
|
|
},
|
|
{
|
|
"epoch": 0.6548672566371682,
|
|
"grad_norm": 14.359109878540039,
|
|
"learning_rate": 1.4164447724873933e-07,
|
|
"logits/chosen": -1.19140625,
|
|
"logits/rejected": -1.16015625,
|
|
"logps/chosen": -253.5,
|
|
"logps/rejected": -288.0,
|
|
"loss": 0.3191,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.93359375,
|
|
"rewards/margins": 2.6171875,
|
|
"rewards/rejected": -1.6875,
|
|
"step": 592
|
|
},
|
|
{
|
|
"epoch": 0.6559734513274337,
|
|
"grad_norm": 14.253448486328125,
|
|
"learning_rate": 1.4083719201098402e-07,
|
|
"logits/chosen": -1.3359375,
|
|
"logits/rejected": -1.2421875,
|
|
"logps/chosen": -251.5,
|
|
"logps/rejected": -288.0,
|
|
"loss": 0.3304,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.8203125,
|
|
"rewards/margins": 2.4765625,
|
|
"rewards/rejected": -1.65625,
|
|
"step": 593
|
|
},
|
|
{
|
|
"epoch": 0.6570796460176991,
|
|
"grad_norm": 14.521297454833984,
|
|
"learning_rate": 1.400313107688912e-07,
|
|
"logits/chosen": -1.37109375,
|
|
"logits/rejected": -1.19140625,
|
|
"logps/chosen": -250.0,
|
|
"logps/rejected": -260.0,
|
|
"loss": 0.3297,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 0.982421875,
|
|
"rewards/margins": 2.859375,
|
|
"rewards/rejected": -1.875,
|
|
"step": 594
|
|
},
|
|
{
|
|
"epoch": 0.6581858407079646,
|
|
"grad_norm": 13.660658836364746,
|
|
"learning_rate": 1.39226843887289e-07,
|
|
"logits/chosen": -1.26171875,
|
|
"logits/rejected": -1.19140625,
|
|
"logps/chosen": -235.0,
|
|
"logps/rejected": -296.0,
|
|
"loss": 0.3347,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.96875,
|
|
"rewards/margins": 2.9609375,
|
|
"rewards/rejected": -1.9921875,
|
|
"step": 595
|
|
},
|
|
{
|
|
"epoch": 0.6592920353982301,
|
|
"grad_norm": 15.24756908416748,
|
|
"learning_rate": 1.384238017128152e-07,
|
|
"logits/chosen": -1.2734375,
|
|
"logits/rejected": -1.12109375,
|
|
"logps/chosen": -241.5,
|
|
"logps/rejected": -274.0,
|
|
"loss": 0.3958,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.884765625,
|
|
"rewards/margins": 2.6875,
|
|
"rewards/rejected": -1.80859375,
|
|
"step": 596
|
|
},
|
|
{
|
|
"epoch": 0.6603982300884956,
|
|
"grad_norm": 14.213970184326172,
|
|
"learning_rate": 1.3762219457378354e-07,
|
|
"logits/chosen": -1.26953125,
|
|
"logits/rejected": -1.12109375,
|
|
"logps/chosen": -240.0,
|
|
"logps/rejected": -288.0,
|
|
"loss": 0.2724,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 1.1484375,
|
|
"rewards/margins": 2.84375,
|
|
"rewards/rejected": -1.6953125,
|
|
"step": 597
|
|
},
|
|
{
|
|
"epoch": 0.661504424778761,
|
|
"grad_norm": 13.295817375183105,
|
|
"learning_rate": 1.3682203278005095e-07,
|
|
"logits/chosen": -1.203125,
|
|
"logits/rejected": -1.11328125,
|
|
"logps/chosen": -267.0,
|
|
"logps/rejected": -287.0,
|
|
"loss": 0.2403,
|
|
"rewards/accuracies": 0.890625,
|
|
"rewards/chosen": 1.296875,
|
|
"rewards/margins": 3.078125,
|
|
"rewards/rejected": -1.77734375,
|
|
"step": 598
|
|
},
|
|
{
|
|
"epoch": 0.6626106194690266,
|
|
"grad_norm": 13.04489517211914,
|
|
"learning_rate": 1.3602332662288534e-07,
|
|
"logits/chosen": -1.2578125,
|
|
"logits/rejected": -1.078125,
|
|
"logps/chosen": -262.0,
|
|
"logps/rejected": -269.0,
|
|
"loss": 0.2891,
|
|
"rewards/accuracies": 0.8671875,
|
|
"rewards/chosen": 0.86328125,
|
|
"rewards/margins": 2.71875,
|
|
"rewards/rejected": -1.859375,
|
|
"step": 599
|
|
},
|
|
{
|
|
"epoch": 0.6637168141592921,
|
|
"grad_norm": 15.907817840576172,
|
|
"learning_rate": 1.3522608637483266e-07,
|
|
"logits/chosen": -1.359375,
|
|
"logits/rejected": -1.203125,
|
|
"logps/chosen": -241.5,
|
|
"logps/rejected": -275.0,
|
|
"loss": 0.3724,
|
|
"rewards/accuracies": 0.734375,
|
|
"rewards/chosen": 0.548828125,
|
|
"rewards/margins": 2.2421875,
|
|
"rewards/rejected": -1.6953125,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.6637168141592921,
|
|
"eval_logits/chosen": -1.2614272832870483,
|
|
"eval_logits/rejected": -1.1564831733703613,
|
|
"eval_logps/chosen": -251.43780517578125,
|
|
"eval_logps/rejected": -277.5970153808594,
|
|
"eval_loss": 0.3234591782093048,
|
|
"eval_rewards/accuracies": 0.809928834438324,
|
|
"eval_rewards/chosen": 0.9355371594429016,
|
|
"eval_rewards/margins": 2.773709535598755,
|
|
"eval_rewards/rejected": -1.8374922275543213,
|
|
"eval_runtime": 193.0898,
|
|
"eval_samples_per_second": 66.565,
|
|
"eval_steps_per_second": 1.041,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 0.6648230088495575,
|
|
"grad_norm": 13.516127586364746,
|
|
"learning_rate": 1.3443032228958545e-07,
|
|
"logits/chosen": -1.2109375,
|
|
"logits/rejected": -1.1015625,
|
|
"logps/chosen": -252.0,
|
|
"logps/rejected": -284.0,
|
|
"loss": 0.3214,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.916015625,
|
|
"rewards/margins": 2.734375,
|
|
"rewards/rejected": -1.8203125,
|
|
"step": 601
|
|
},
|
|
{
|
|
"epoch": 0.665929203539823,
|
|
"grad_norm": 13.290761947631836,
|
|
"learning_rate": 1.336360446018503e-07,
|
|
"logits/chosen": -1.37890625,
|
|
"logits/rejected": -1.1953125,
|
|
"logps/chosen": -240.5,
|
|
"logps/rejected": -248.5,
|
|
"loss": 0.3253,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 1.048828125,
|
|
"rewards/margins": 2.65625,
|
|
"rewards/rejected": -1.60546875,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 0.6670353982300885,
|
|
"grad_norm": 13.730428695678711,
|
|
"learning_rate": 1.3284326352721675e-07,
|
|
"logits/chosen": -1.24609375,
|
|
"logits/rejected": -1.15625,
|
|
"logps/chosen": -237.0,
|
|
"logps/rejected": -265.0,
|
|
"loss": 0.3161,
|
|
"rewards/accuracies": 0.8671875,
|
|
"rewards/chosen": 0.96875,
|
|
"rewards/margins": 3.171875,
|
|
"rewards/rejected": -2.1953125,
|
|
"step": 603
|
|
},
|
|
{
|
|
"epoch": 0.668141592920354,
|
|
"grad_norm": 13.699116706848145,
|
|
"learning_rate": 1.3205198926202544e-07,
|
|
"logits/chosen": -1.25,
|
|
"logits/rejected": -1.125,
|
|
"logps/chosen": -254.5,
|
|
"logps/rejected": -295.0,
|
|
"loss": 0.3262,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 1.0390625,
|
|
"rewards/margins": 2.7265625,
|
|
"rewards/rejected": -1.6875,
|
|
"step": 604
|
|
},
|
|
{
|
|
"epoch": 0.6692477876106194,
|
|
"grad_norm": 15.250642776489258,
|
|
"learning_rate": 1.312622319832375e-07,
|
|
"logits/chosen": -1.234375,
|
|
"logits/rejected": -1.13671875,
|
|
"logps/chosen": -262.0,
|
|
"logps/rejected": -275.0,
|
|
"loss": 0.3704,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 0.822265625,
|
|
"rewards/margins": 2.515625,
|
|
"rewards/rejected": -1.69140625,
|
|
"step": 605
|
|
},
|
|
{
|
|
"epoch": 0.6703539823008849,
|
|
"grad_norm": 14.902206420898438,
|
|
"learning_rate": 1.3047400184830303e-07,
|
|
"logits/chosen": -1.18359375,
|
|
"logits/rejected": -1.064453125,
|
|
"logps/chosen": -248.5,
|
|
"logps/rejected": -277.0,
|
|
"loss": 0.3634,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.76953125,
|
|
"rewards/margins": 2.3984375,
|
|
"rewards/rejected": -1.625,
|
|
"step": 606
|
|
},
|
|
{
|
|
"epoch": 0.6714601769911505,
|
|
"grad_norm": 15.015229225158691,
|
|
"learning_rate": 1.2968730899503106e-07,
|
|
"logits/chosen": -1.39453125,
|
|
"logits/rejected": -1.265625,
|
|
"logps/chosen": -254.5,
|
|
"logps/rejected": -271.0,
|
|
"loss": 0.3279,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 0.943359375,
|
|
"rewards/margins": 2.625,
|
|
"rewards/rejected": -1.6796875,
|
|
"step": 607
|
|
},
|
|
{
|
|
"epoch": 0.672566371681416,
|
|
"grad_norm": 14.369139671325684,
|
|
"learning_rate": 1.2890216354145888e-07,
|
|
"logits/chosen": -1.28515625,
|
|
"logits/rejected": -1.1796875,
|
|
"logps/chosen": -241.0,
|
|
"logps/rejected": -251.0,
|
|
"loss": 0.3464,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 0.953125,
|
|
"rewards/margins": 2.625,
|
|
"rewards/rejected": -1.671875,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 0.6736725663716814,
|
|
"grad_norm": 13.686366081237793,
|
|
"learning_rate": 1.2811857558572167e-07,
|
|
"logits/chosen": -1.1953125,
|
|
"logits/rejected": -1.056640625,
|
|
"logps/chosen": -259.0,
|
|
"logps/rejected": -262.0,
|
|
"loss": 0.3501,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.763671875,
|
|
"rewards/margins": 2.7421875,
|
|
"rewards/rejected": -1.9765625,
|
|
"step": 609
|
|
},
|
|
{
|
|
"epoch": 0.6747787610619469,
|
|
"grad_norm": 13.95426082611084,
|
|
"learning_rate": 1.2733655520592326e-07,
|
|
"logits/chosen": -1.30078125,
|
|
"logits/rejected": -1.1640625,
|
|
"logps/chosen": -257.5,
|
|
"logps/rejected": -308.0,
|
|
"loss": 0.2923,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 1.10546875,
|
|
"rewards/margins": 2.9765625,
|
|
"rewards/rejected": -1.875,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 0.6758849557522124,
|
|
"grad_norm": 13.288588523864746,
|
|
"learning_rate": 1.265561124600057e-07,
|
|
"logits/chosen": -1.1875,
|
|
"logits/rejected": -1.0703125,
|
|
"logps/chosen": -250.0,
|
|
"logps/rejected": -277.0,
|
|
"loss": 0.3124,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.875,
|
|
"rewards/margins": 2.765625,
|
|
"rewards/rejected": -1.890625,
|
|
"step": 611
|
|
},
|
|
{
|
|
"epoch": 0.6769911504424779,
|
|
"grad_norm": 14.142792701721191,
|
|
"learning_rate": 1.2577725738562068e-07,
|
|
"logits/chosen": -1.30078125,
|
|
"logits/rejected": -1.18359375,
|
|
"logps/chosen": -244.5,
|
|
"logps/rejected": -249.0,
|
|
"loss": 0.3795,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": 0.533203125,
|
|
"rewards/margins": 2.19921875,
|
|
"rewards/rejected": -1.66796875,
|
|
"step": 612
|
|
},
|
|
{
|
|
"epoch": 0.6780973451327433,
|
|
"grad_norm": 12.952162742614746,
|
|
"learning_rate": 1.2500000000000005e-07,
|
|
"logits/chosen": -1.08984375,
|
|
"logits/rejected": -1.109375,
|
|
"logps/chosen": -259.5,
|
|
"logps/rejected": -315.0,
|
|
"loss": 0.2972,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 0.9453125,
|
|
"rewards/margins": 2.984375,
|
|
"rewards/rejected": -2.03515625,
|
|
"step": 613
|
|
},
|
|
{
|
|
"epoch": 0.6792035398230089,
|
|
"grad_norm": 12.563096046447754,
|
|
"learning_rate": 1.2422435029982667e-07,
|
|
"logits/chosen": -1.33203125,
|
|
"logits/rejected": -1.2109375,
|
|
"logps/chosen": -251.0,
|
|
"logps/rejected": -277.0,
|
|
"loss": 0.2854,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 1.1640625,
|
|
"rewards/margins": 2.9921875,
|
|
"rewards/rejected": -1.828125,
|
|
"step": 614
|
|
},
|
|
{
|
|
"epoch": 0.6803097345132744,
|
|
"grad_norm": 15.870928764343262,
|
|
"learning_rate": 1.234503182611066e-07,
|
|
"logits/chosen": -1.38671875,
|
|
"logits/rejected": -1.22265625,
|
|
"logps/chosen": -271.5,
|
|
"logps/rejected": -310.0,
|
|
"loss": 0.3588,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": 0.6884765625,
|
|
"rewards/margins": 2.390625,
|
|
"rewards/rejected": -1.70703125,
|
|
"step": 615
|
|
},
|
|
{
|
|
"epoch": 0.6814159292035398,
|
|
"grad_norm": 12.973315238952637,
|
|
"learning_rate": 1.2267791383904017e-07,
|
|
"logits/chosen": -1.23828125,
|
|
"logits/rejected": -1.12890625,
|
|
"logps/chosen": -229.0,
|
|
"logps/rejected": -272.5,
|
|
"loss": 0.2962,
|
|
"rewards/accuracies": 0.859375,
|
|
"rewards/chosen": 1.130859375,
|
|
"rewards/margins": 3.2109375,
|
|
"rewards/rejected": -2.09375,
|
|
"step": 616
|
|
},
|
|
{
|
|
"epoch": 0.6825221238938053,
|
|
"grad_norm": 15.841652870178223,
|
|
"learning_rate": 1.2190714696789407e-07,
|
|
"logits/chosen": -1.20703125,
|
|
"logits/rejected": -1.11328125,
|
|
"logps/chosen": -266.0,
|
|
"logps/rejected": -282.0,
|
|
"loss": 0.3952,
|
|
"rewards/accuracies": 0.6953125,
|
|
"rewards/chosen": 0.6005859375,
|
|
"rewards/margins": 2.2421875,
|
|
"rewards/rejected": -1.640625,
|
|
"step": 617
|
|
},
|
|
{
|
|
"epoch": 0.6836283185840708,
|
|
"grad_norm": 14.495512008666992,
|
|
"learning_rate": 1.2113802756087396e-07,
|
|
"logits/chosen": -1.2421875,
|
|
"logits/rejected": -1.15625,
|
|
"logps/chosen": -251.5,
|
|
"logps/rejected": -270.5,
|
|
"loss": 0.3808,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 0.5576171875,
|
|
"rewards/margins": 2.3359375,
|
|
"rewards/rejected": -1.77734375,
|
|
"step": 618
|
|
},
|
|
{
|
|
"epoch": 0.6847345132743363,
|
|
"grad_norm": 13.138040542602539,
|
|
"learning_rate": 1.2037056550999623e-07,
|
|
"logits/chosen": -1.08984375,
|
|
"logits/rejected": -1.046875,
|
|
"logps/chosen": -261.0,
|
|
"logps/rejected": -308.0,
|
|
"loss": 0.3147,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.82421875,
|
|
"rewards/margins": 2.8203125,
|
|
"rewards/rejected": -1.99609375,
|
|
"step": 619
|
|
},
|
|
{
|
|
"epoch": 0.6858407079646017,
|
|
"grad_norm": 15.598456382751465,
|
|
"learning_rate": 1.1960477068596154e-07,
|
|
"logits/chosen": -1.36328125,
|
|
"logits/rejected": -1.08984375,
|
|
"logps/chosen": -266.0,
|
|
"logps/rejected": -286.0,
|
|
"loss": 0.3759,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 0.62890625,
|
|
"rewards/margins": 2.1875,
|
|
"rewards/rejected": -1.55859375,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 0.6869469026548672,
|
|
"grad_norm": 13.848457336425781,
|
|
"learning_rate": 1.1884065293802756e-07,
|
|
"logits/chosen": -1.1640625,
|
|
"logits/rejected": -1.18359375,
|
|
"logps/chosen": -244.5,
|
|
"logps/rejected": -257.0,
|
|
"loss": 0.3068,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.958984375,
|
|
"rewards/margins": 2.8828125,
|
|
"rewards/rejected": -1.92578125,
|
|
"step": 621
|
|
},
|
|
{
|
|
"epoch": 0.6880530973451328,
|
|
"grad_norm": 12.871940612792969,
|
|
"learning_rate": 1.1807822209388196e-07,
|
|
"logits/chosen": -1.2890625,
|
|
"logits/rejected": -1.2109375,
|
|
"logps/chosen": -239.0,
|
|
"logps/rejected": -281.0,
|
|
"loss": 0.2818,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 1.08984375,
|
|
"rewards/margins": 3.171875,
|
|
"rewards/rejected": -2.0859375,
|
|
"step": 622
|
|
},
|
|
{
|
|
"epoch": 0.6891592920353983,
|
|
"grad_norm": 13.695356369018555,
|
|
"learning_rate": 1.173174879595166e-07,
|
|
"logits/chosen": -1.32421875,
|
|
"logits/rejected": -1.1484375,
|
|
"logps/chosen": -244.5,
|
|
"logps/rejected": -276.0,
|
|
"loss": 0.3137,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.9609375,
|
|
"rewards/margins": 2.875,
|
|
"rewards/rejected": -1.91796875,
|
|
"step": 623
|
|
},
|
|
{
|
|
"epoch": 0.6902654867256637,
|
|
"grad_norm": 16.23243522644043,
|
|
"learning_rate": 1.1655846031910119e-07,
|
|
"logits/chosen": -1.359375,
|
|
"logits/rejected": -1.21484375,
|
|
"logps/chosen": -253.0,
|
|
"logps/rejected": -301.0,
|
|
"loss": 0.3016,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 0.837890625,
|
|
"rewards/margins": 3.0625,
|
|
"rewards/rejected": -2.2265625,
|
|
"step": 624
|
|
},
|
|
{
|
|
"epoch": 0.6913716814159292,
|
|
"grad_norm": 14.047713279724121,
|
|
"learning_rate": 1.1580114893485712e-07,
|
|
"logits/chosen": -1.203125,
|
|
"logits/rejected": -1.18359375,
|
|
"logps/chosen": -241.0,
|
|
"logps/rejected": -286.0,
|
|
"loss": 0.2963,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 1.08203125,
|
|
"rewards/margins": 3.265625,
|
|
"rewards/rejected": -2.1796875,
|
|
"step": 625
|
|
},
|
|
{
|
|
"epoch": 0.6924778761061947,
|
|
"grad_norm": 13.80639934539795,
|
|
"learning_rate": 1.1504556354693226e-07,
|
|
"logits/chosen": -1.3359375,
|
|
"logits/rejected": -1.15625,
|
|
"logps/chosen": -248.5,
|
|
"logps/rejected": -274.0,
|
|
"loss": 0.317,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.9296875,
|
|
"rewards/margins": 2.84375,
|
|
"rewards/rejected": -1.91796875,
|
|
"step": 626
|
|
},
|
|
{
|
|
"epoch": 0.6935840707964602,
|
|
"grad_norm": 13.272629737854004,
|
|
"learning_rate": 1.1429171387327585e-07,
|
|
"logits/chosen": -1.34375,
|
|
"logits/rejected": -1.2109375,
|
|
"logps/chosen": -238.5,
|
|
"logps/rejected": -286.0,
|
|
"loss": 0.2575,
|
|
"rewards/accuracies": 0.875,
|
|
"rewards/chosen": 1.07421875,
|
|
"rewards/margins": 3.328125,
|
|
"rewards/rejected": -2.25,
|
|
"step": 627
|
|
},
|
|
{
|
|
"epoch": 0.6946902654867256,
|
|
"grad_norm": 15.396360397338867,
|
|
"learning_rate": 1.1353960960951293e-07,
|
|
"logits/chosen": -1.2421875,
|
|
"logits/rejected": -1.15625,
|
|
"logps/chosen": -276.0,
|
|
"logps/rejected": -276.0,
|
|
"loss": 0.3754,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 0.822265625,
|
|
"rewards/margins": 2.4921875,
|
|
"rewards/rejected": -1.671875,
|
|
"step": 628
|
|
},
|
|
{
|
|
"epoch": 0.6957964601769911,
|
|
"grad_norm": 13.207889556884766,
|
|
"learning_rate": 1.1278926042882026e-07,
|
|
"logits/chosen": -1.29296875,
|
|
"logits/rejected": -1.1484375,
|
|
"logps/chosen": -249.5,
|
|
"logps/rejected": -302.0,
|
|
"loss": 0.3109,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.80078125,
|
|
"rewards/margins": 2.9140625,
|
|
"rewards/rejected": -2.11328125,
|
|
"step": 629
|
|
},
|
|
{
|
|
"epoch": 0.6969026548672567,
|
|
"grad_norm": 13.04702091217041,
|
|
"learning_rate": 1.120406759818014e-07,
|
|
"logits/chosen": -1.29296875,
|
|
"logits/rejected": -1.1640625,
|
|
"logps/chosen": -236.5,
|
|
"logps/rejected": -270.0,
|
|
"loss": 0.3229,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.900390625,
|
|
"rewards/margins": 2.6640625,
|
|
"rewards/rejected": -1.76953125,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 0.6980088495575221,
|
|
"grad_norm": 14.894906997680664,
|
|
"learning_rate": 1.1129386589636292e-07,
|
|
"logits/chosen": -1.3125,
|
|
"logits/rejected": -1.18359375,
|
|
"logps/chosen": -280.0,
|
|
"logps/rejected": -280.5,
|
|
"loss": 0.316,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.83203125,
|
|
"rewards/margins": 2.828125,
|
|
"rewards/rejected": -2.0,
|
|
"step": 631
|
|
},
|
|
{
|
|
"epoch": 0.6991150442477876,
|
|
"grad_norm": 16.062137603759766,
|
|
"learning_rate": 1.1054883977759066e-07,
|
|
"logits/chosen": -1.26953125,
|
|
"logits/rejected": -1.140625,
|
|
"logps/chosen": -275.0,
|
|
"logps/rejected": -277.0,
|
|
"loss": 0.3502,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 0.69140625,
|
|
"rewards/margins": 2.71875,
|
|
"rewards/rejected": -2.0234375,
|
|
"step": 632
|
|
},
|
|
{
|
|
"epoch": 0.7002212389380531,
|
|
"grad_norm": 14.050618171691895,
|
|
"learning_rate": 1.0980560720762555e-07,
|
|
"logits/chosen": -1.19921875,
|
|
"logits/rejected": -1.1484375,
|
|
"logps/chosen": -248.0,
|
|
"logps/rejected": -288.0,
|
|
"loss": 0.3215,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.7216796875,
|
|
"rewards/margins": 2.8125,
|
|
"rewards/rejected": -2.0859375,
|
|
"step": 633
|
|
},
|
|
{
|
|
"epoch": 0.7013274336283186,
|
|
"grad_norm": 11.265563011169434,
|
|
"learning_rate": 1.0906417774554132e-07,
|
|
"logits/chosen": -1.1953125,
|
|
"logits/rejected": -1.21484375,
|
|
"logps/chosen": -234.0,
|
|
"logps/rejected": -249.5,
|
|
"loss": 0.2667,
|
|
"rewards/accuracies": 0.8671875,
|
|
"rewards/chosen": 1.05859375,
|
|
"rewards/margins": 3.203125,
|
|
"rewards/rejected": -2.140625,
|
|
"step": 634
|
|
},
|
|
{
|
|
"epoch": 0.702433628318584,
|
|
"grad_norm": 13.785270690917969,
|
|
"learning_rate": 1.0832456092722062e-07,
|
|
"logits/chosen": -1.3046875,
|
|
"logits/rejected": -1.18359375,
|
|
"logps/chosen": -268.0,
|
|
"logps/rejected": -271.0,
|
|
"loss": 0.3269,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.63671875,
|
|
"rewards/margins": 2.421875,
|
|
"rewards/rejected": -1.7890625,
|
|
"step": 635
|
|
},
|
|
{
|
|
"epoch": 0.7035398230088495,
|
|
"grad_norm": 14.249685287475586,
|
|
"learning_rate": 1.0758676626523311e-07,
|
|
"logits/chosen": -1.32421875,
|
|
"logits/rejected": -1.1796875,
|
|
"logps/chosen": -265.0,
|
|
"logps/rejected": -286.0,
|
|
"loss": 0.314,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.705078125,
|
|
"rewards/margins": 2.75,
|
|
"rewards/rejected": -2.046875,
|
|
"step": 636
|
|
},
|
|
{
|
|
"epoch": 0.7046460176991151,
|
|
"grad_norm": 12.366557121276855,
|
|
"learning_rate": 1.0685080324871278e-07,
|
|
"logits/chosen": -1.2421875,
|
|
"logits/rejected": -1.021484375,
|
|
"logps/chosen": -256.0,
|
|
"logps/rejected": -298.0,
|
|
"loss": 0.27,
|
|
"rewards/accuracies": 0.859375,
|
|
"rewards/chosen": 0.75390625,
|
|
"rewards/margins": 2.65625,
|
|
"rewards/rejected": -1.90234375,
|
|
"step": 637
|
|
},
|
|
{
|
|
"epoch": 0.7057522123893806,
|
|
"grad_norm": 16.30191421508789,
|
|
"learning_rate": 1.0611668134323575e-07,
|
|
"logits/chosen": -1.30078125,
|
|
"logits/rejected": -1.1484375,
|
|
"logps/chosen": -282.0,
|
|
"logps/rejected": -299.0,
|
|
"loss": 0.3438,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 0.4140625,
|
|
"rewards/margins": 2.54296875,
|
|
"rewards/rejected": -2.1328125,
|
|
"step": 638
|
|
},
|
|
{
|
|
"epoch": 0.706858407079646,
|
|
"grad_norm": 14.99670696258545,
|
|
"learning_rate": 1.0538440999069895e-07,
|
|
"logits/chosen": -1.30859375,
|
|
"logits/rejected": -1.19921875,
|
|
"logps/chosen": -255.5,
|
|
"logps/rejected": -298.0,
|
|
"loss": 0.3104,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 0.896484375,
|
|
"rewards/margins": 2.8671875,
|
|
"rewards/rejected": -1.96875,
|
|
"step": 639
|
|
},
|
|
{
|
|
"epoch": 0.7079646017699115,
|
|
"grad_norm": 12.429228782653809,
|
|
"learning_rate": 1.0465399860919838e-07,
|
|
"logits/chosen": -1.2265625,
|
|
"logits/rejected": -1.1953125,
|
|
"logps/chosen": -255.5,
|
|
"logps/rejected": -273.0,
|
|
"loss": 0.2869,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 1.080078125,
|
|
"rewards/margins": 3.0859375,
|
|
"rewards/rejected": -2.00390625,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 0.709070796460177,
|
|
"grad_norm": 12.204998970031738,
|
|
"learning_rate": 1.0392545659290788e-07,
|
|
"logits/chosen": -1.2578125,
|
|
"logits/rejected": -1.21875,
|
|
"logps/chosen": -260.5,
|
|
"logps/rejected": -274.0,
|
|
"loss": 0.2817,
|
|
"rewards/accuracies": 0.8671875,
|
|
"rewards/chosen": 0.810546875,
|
|
"rewards/margins": 2.9921875,
|
|
"rewards/rejected": -2.1875,
|
|
"step": 641
|
|
},
|
|
{
|
|
"epoch": 0.7101769911504425,
|
|
"grad_norm": 14.068879127502441,
|
|
"learning_rate": 1.0319879331195882e-07,
|
|
"logits/chosen": -1.21484375,
|
|
"logits/rejected": -1.0703125,
|
|
"logps/chosen": -254.0,
|
|
"logps/rejected": -272.0,
|
|
"loss": 0.3538,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.591796875,
|
|
"rewards/margins": 2.5859375,
|
|
"rewards/rejected": -2.0,
|
|
"step": 642
|
|
},
|
|
{
|
|
"epoch": 0.7112831858407079,
|
|
"grad_norm": 12.932374954223633,
|
|
"learning_rate": 1.0247401811231887e-07,
|
|
"logits/chosen": -1.390625,
|
|
"logits/rejected": -1.18359375,
|
|
"logps/chosen": -233.0,
|
|
"logps/rejected": -259.0,
|
|
"loss": 0.2886,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.841796875,
|
|
"rewards/margins": 2.9375,
|
|
"rewards/rejected": -2.09375,
|
|
"step": 643
|
|
},
|
|
{
|
|
"epoch": 0.7123893805309734,
|
|
"grad_norm": 12.754419326782227,
|
|
"learning_rate": 1.0175114031567245e-07,
|
|
"logits/chosen": -1.27734375,
|
|
"logits/rejected": -1.2109375,
|
|
"logps/chosen": -253.0,
|
|
"logps/rejected": -288.0,
|
|
"loss": 0.2941,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 0.798828125,
|
|
"rewards/margins": 2.671875,
|
|
"rewards/rejected": -1.875,
|
|
"step": 644
|
|
},
|
|
{
|
|
"epoch": 0.713495575221239,
|
|
"grad_norm": 13.075281143188477,
|
|
"learning_rate": 1.0103016921930055e-07,
|
|
"logits/chosen": -1.3046875,
|
|
"logits/rejected": -1.18359375,
|
|
"logps/chosen": -247.5,
|
|
"logps/rejected": -276.0,
|
|
"loss": 0.324,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.845703125,
|
|
"rewards/margins": 2.5546875,
|
|
"rewards/rejected": -1.70703125,
|
|
"step": 645
|
|
},
|
|
{
|
|
"epoch": 0.7146017699115044,
|
|
"grad_norm": 15.02340030670166,
|
|
"learning_rate": 1.0031111409596091e-07,
|
|
"logits/chosen": -1.15625,
|
|
"logits/rejected": -1.2109375,
|
|
"logps/chosen": -246.5,
|
|
"logps/rejected": -258.0,
|
|
"loss": 0.3851,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 0.669921875,
|
|
"rewards/margins": 2.4140625,
|
|
"rewards/rejected": -1.7421875,
|
|
"step": 646
|
|
},
|
|
{
|
|
"epoch": 0.7157079646017699,
|
|
"grad_norm": 12.193872451782227,
|
|
"learning_rate": 9.95939841937693e-08,
|
|
"logits/chosen": -1.30859375,
|
|
"logits/rejected": -1.2578125,
|
|
"logps/chosen": -259.5,
|
|
"logps/rejected": -265.5,
|
|
"loss": 0.2392,
|
|
"rewards/accuracies": 0.890625,
|
|
"rewards/chosen": 0.86328125,
|
|
"rewards/margins": 3.2734375,
|
|
"rewards/rejected": -2.4140625,
|
|
"step": 647
|
|
},
|
|
{
|
|
"epoch": 0.7168141592920354,
|
|
"grad_norm": 13.42468547821045,
|
|
"learning_rate": 9.887878873608027e-08,
|
|
"logits/chosen": -1.16015625,
|
|
"logits/rejected": -1.11328125,
|
|
"logps/chosen": -263.5,
|
|
"logps/rejected": -290.0,
|
|
"loss": 0.3087,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 0.791015625,
|
|
"rewards/margins": 2.875,
|
|
"rewards/rejected": -2.08203125,
|
|
"step": 648
|
|
},
|
|
{
|
|
"epoch": 0.7179203539823009,
|
|
"grad_norm": 13.621614456176758,
|
|
"learning_rate": 9.816553692136834e-08,
|
|
"logits/chosen": -1.17578125,
|
|
"logits/rejected": -1.109375,
|
|
"logps/chosen": -256.5,
|
|
"logps/rejected": -282.0,
|
|
"loss": 0.2806,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 1.1484375,
|
|
"rewards/margins": 3.046875,
|
|
"rewards/rejected": -1.89453125,
|
|
"step": 649
|
|
},
|
|
{
|
|
"epoch": 0.7190265486725663,
|
|
"grad_norm": 13.231938362121582,
|
|
"learning_rate": 9.745423792310995e-08,
|
|
"logits/chosen": -1.29296875,
|
|
"logits/rejected": -1.12890625,
|
|
"logps/chosen": -243.0,
|
|
"logps/rejected": -258.5,
|
|
"loss": 0.2872,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.7265625,
|
|
"rewards/margins": 2.90625,
|
|
"rewards/rejected": -2.1796875,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.7190265486725663,
|
|
"eval_logits/chosen": -1.2634872198104858,
|
|
"eval_logits/rejected": -1.1566191911697388,
|
|
"eval_logps/chosen": -252.76119995117188,
|
|
"eval_logps/rejected": -279.27362060546875,
|
|
"eval_loss": 0.3217768967151642,
|
|
"eval_rewards/accuracies": 0.8134269714355469,
|
|
"eval_rewards/chosen": 0.8057758212089539,
|
|
"eval_rewards/margins": 2.8031716346740723,
|
|
"eval_rewards/rejected": -1.996579647064209,
|
|
"eval_runtime": 193.0564,
|
|
"eval_samples_per_second": 66.576,
|
|
"eval_steps_per_second": 1.041,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 0.7201327433628318,
|
|
"grad_norm": 13.893576622009277,
|
|
"learning_rate": 9.674490088966562e-08,
|
|
"logits/chosen": -1.2734375,
|
|
"logits/rejected": -1.12109375,
|
|
"logps/chosen": -262.5,
|
|
"logps/rejected": -296.0,
|
|
"loss": 0.2924,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.919921875,
|
|
"rewards/margins": 3.0859375,
|
|
"rewards/rejected": -2.1640625,
|
|
"step": 651
|
|
},
|
|
{
|
|
"epoch": 0.7212389380530974,
|
|
"grad_norm": 13.017692565917969,
|
|
"learning_rate": 9.603753494416184e-08,
|
|
"logits/chosen": -1.328125,
|
|
"logits/rejected": -1.28515625,
|
|
"logps/chosen": -242.0,
|
|
"logps/rejected": -248.0,
|
|
"loss": 0.2897,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 0.791015625,
|
|
"rewards/margins": 2.7109375,
|
|
"rewards/rejected": -1.9140625,
|
|
"step": 652
|
|
},
|
|
{
|
|
"epoch": 0.7223451327433629,
|
|
"grad_norm": 17.18537712097168,
|
|
"learning_rate": 9.533214918437421e-08,
|
|
"logits/chosen": -1.25390625,
|
|
"logits/rejected": -1.23828125,
|
|
"logps/chosen": -283.0,
|
|
"logps/rejected": -287.0,
|
|
"loss": 0.402,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.509765625,
|
|
"rewards/margins": 2.1640625,
|
|
"rewards/rejected": -1.65625,
|
|
"step": 653
|
|
},
|
|
{
|
|
"epoch": 0.7234513274336283,
|
|
"grad_norm": 17.223974227905273,
|
|
"learning_rate": 9.462875268261e-08,
|
|
"logits/chosen": -1.19140625,
|
|
"logits/rejected": -1.2578125,
|
|
"logps/chosen": -297.0,
|
|
"logps/rejected": -311.0,
|
|
"loss": 0.3244,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.814453125,
|
|
"rewards/margins": 2.859375,
|
|
"rewards/rejected": -2.046875,
|
|
"step": 654
|
|
},
|
|
{
|
|
"epoch": 0.7245575221238938,
|
|
"grad_norm": 14.414237976074219,
|
|
"learning_rate": 9.39273544855918e-08,
|
|
"logits/chosen": -1.24609375,
|
|
"logits/rejected": -1.10546875,
|
|
"logps/chosen": -259.0,
|
|
"logps/rejected": -303.0,
|
|
"loss": 0.3058,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.888671875,
|
|
"rewards/margins": 3.140625,
|
|
"rewards/rejected": -2.25,
|
|
"step": 655
|
|
},
|
|
{
|
|
"epoch": 0.7256637168141593,
|
|
"grad_norm": 13.708487510681152,
|
|
"learning_rate": 9.32279636143411e-08,
|
|
"logits/chosen": -1.359375,
|
|
"logits/rejected": -1.16015625,
|
|
"logps/chosen": -271.0,
|
|
"logps/rejected": -277.5,
|
|
"loss": 0.3109,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 0.89453125,
|
|
"rewards/margins": 2.640625,
|
|
"rewards/rejected": -1.7421875,
|
|
"step": 656
|
|
},
|
|
{
|
|
"epoch": 0.7267699115044248,
|
|
"grad_norm": 14.687643051147461,
|
|
"learning_rate": 9.253058906406194e-08,
|
|
"logits/chosen": -1.203125,
|
|
"logits/rejected": -1.0625,
|
|
"logps/chosen": -280.0,
|
|
"logps/rejected": -306.0,
|
|
"loss": 0.314,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.810546875,
|
|
"rewards/margins": 2.7734375,
|
|
"rewards/rejected": -1.96875,
|
|
"step": 657
|
|
},
|
|
{
|
|
"epoch": 0.7278761061946902,
|
|
"grad_norm": 13.893321990966797,
|
|
"learning_rate": 9.183523980402582e-08,
|
|
"logits/chosen": -1.21484375,
|
|
"logits/rejected": -1.203125,
|
|
"logps/chosen": -240.0,
|
|
"logps/rejected": -291.0,
|
|
"loss": 0.3241,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 0.82421875,
|
|
"rewards/margins": 2.6875,
|
|
"rewards/rejected": -1.87109375,
|
|
"step": 658
|
|
},
|
|
{
|
|
"epoch": 0.7289823008849557,
|
|
"grad_norm": 13.528450965881348,
|
|
"learning_rate": 9.114192477745566e-08,
|
|
"logits/chosen": -1.3359375,
|
|
"logits/rejected": -1.1484375,
|
|
"logps/chosen": -262.0,
|
|
"logps/rejected": -278.0,
|
|
"loss": 0.3098,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.6640625,
|
|
"rewards/margins": 2.6953125,
|
|
"rewards/rejected": -2.03515625,
|
|
"step": 659
|
|
},
|
|
{
|
|
"epoch": 0.7300884955752213,
|
|
"grad_norm": 15.182424545288086,
|
|
"learning_rate": 9.045065290141138e-08,
|
|
"logits/chosen": -1.22265625,
|
|
"logits/rejected": -1.1015625,
|
|
"logps/chosen": -275.0,
|
|
"logps/rejected": -305.0,
|
|
"loss": 0.3081,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.822265625,
|
|
"rewards/margins": 2.8046875,
|
|
"rewards/rejected": -1.984375,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 0.7311946902654868,
|
|
"grad_norm": 14.025420188903809,
|
|
"learning_rate": 8.976143306667491e-08,
|
|
"logits/chosen": -1.21484375,
|
|
"logits/rejected": -1.16015625,
|
|
"logps/chosen": -255.5,
|
|
"logps/rejected": -290.0,
|
|
"loss": 0.2861,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.857421875,
|
|
"rewards/margins": 2.9765625,
|
|
"rewards/rejected": -2.1171875,
|
|
"step": 661
|
|
},
|
|
{
|
|
"epoch": 0.7323008849557522,
|
|
"grad_norm": 12.591769218444824,
|
|
"learning_rate": 8.907427413763572e-08,
|
|
"logits/chosen": -1.32421875,
|
|
"logits/rejected": -1.1171875,
|
|
"logps/chosen": -268.0,
|
|
"logps/rejected": -275.5,
|
|
"loss": 0.2648,
|
|
"rewards/accuracies": 0.8828125,
|
|
"rewards/chosen": 0.677734375,
|
|
"rewards/margins": 2.9453125,
|
|
"rewards/rejected": -2.265625,
|
|
"step": 662
|
|
},
|
|
{
|
|
"epoch": 0.7334070796460177,
|
|
"grad_norm": 15.431063652038574,
|
|
"learning_rate": 8.838918495217712e-08,
|
|
"logits/chosen": -1.25390625,
|
|
"logits/rejected": -1.14453125,
|
|
"logps/chosen": -269.5,
|
|
"logps/rejected": -304.0,
|
|
"loss": 0.3575,
|
|
"rewards/accuracies": 0.7578125,
|
|
"rewards/chosen": 0.787109375,
|
|
"rewards/margins": 2.640625,
|
|
"rewards/rejected": -1.84765625,
|
|
"step": 663
|
|
},
|
|
{
|
|
"epoch": 0.7345132743362832,
|
|
"grad_norm": 14.970857620239258,
|
|
"learning_rate": 8.770617432156257e-08,
|
|
"logits/chosen": -1.28515625,
|
|
"logits/rejected": -1.05859375,
|
|
"logps/chosen": -268.0,
|
|
"logps/rejected": -298.0,
|
|
"loss": 0.3506,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.708984375,
|
|
"rewards/margins": 2.4921875,
|
|
"rewards/rejected": -1.78125,
|
|
"step": 664
|
|
},
|
|
{
|
|
"epoch": 0.7356194690265486,
|
|
"grad_norm": 15.439310073852539,
|
|
"learning_rate": 8.702525103032184e-08,
|
|
"logits/chosen": -1.2421875,
|
|
"logits/rejected": -1.07421875,
|
|
"logps/chosen": -248.0,
|
|
"logps/rejected": -280.5,
|
|
"loss": 0.3629,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.7626953125,
|
|
"rewards/margins": 2.59375,
|
|
"rewards/rejected": -1.828125,
|
|
"step": 665
|
|
},
|
|
{
|
|
"epoch": 0.7367256637168141,
|
|
"grad_norm": 13.227315902709961,
|
|
"learning_rate": 8.634642383613891e-08,
|
|
"logits/chosen": -1.21875,
|
|
"logits/rejected": -1.11328125,
|
|
"logps/chosen": -254.5,
|
|
"logps/rejected": -285.0,
|
|
"loss": 0.3095,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.8984375,
|
|
"rewards/margins": 2.875,
|
|
"rewards/rejected": -1.97265625,
|
|
"step": 666
|
|
},
|
|
{
|
|
"epoch": 0.7378318584070797,
|
|
"grad_norm": 12.241044044494629,
|
|
"learning_rate": 8.566970146973835e-08,
|
|
"logits/chosen": -1.30859375,
|
|
"logits/rejected": -1.125,
|
|
"logps/chosen": -257.0,
|
|
"logps/rejected": -293.0,
|
|
"loss": 0.2911,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.849609375,
|
|
"rewards/margins": 3.0,
|
|
"rewards/rejected": -2.15234375,
|
|
"step": 667
|
|
},
|
|
{
|
|
"epoch": 0.7389380530973452,
|
|
"grad_norm": 12.409917831420898,
|
|
"learning_rate": 8.499509263477387e-08,
|
|
"logits/chosen": -1.375,
|
|
"logits/rejected": -1.15234375,
|
|
"logps/chosen": -222.0,
|
|
"logps/rejected": -269.0,
|
|
"loss": 0.285,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 1.044921875,
|
|
"rewards/margins": 3.109375,
|
|
"rewards/rejected": -2.05859375,
|
|
"step": 668
|
|
},
|
|
{
|
|
"epoch": 0.7400442477876106,
|
|
"grad_norm": 16.232877731323242,
|
|
"learning_rate": 8.432260600771599e-08,
|
|
"logits/chosen": -1.31640625,
|
|
"logits/rejected": -1.171875,
|
|
"logps/chosen": -278.0,
|
|
"logps/rejected": -274.0,
|
|
"loss": 0.3434,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 0.6806640625,
|
|
"rewards/margins": 2.796875,
|
|
"rewards/rejected": -2.11328125,
|
|
"step": 669
|
|
},
|
|
{
|
|
"epoch": 0.7411504424778761,
|
|
"grad_norm": 12.330305099487305,
|
|
"learning_rate": 8.36522502377403e-08,
|
|
"logits/chosen": -1.34375,
|
|
"logits/rejected": -1.1640625,
|
|
"logps/chosen": -239.5,
|
|
"logps/rejected": -292.0,
|
|
"loss": 0.2725,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 0.86328125,
|
|
"rewards/margins": 2.7734375,
|
|
"rewards/rejected": -1.9140625,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 0.7422566371681416,
|
|
"grad_norm": 15.042512893676758,
|
|
"learning_rate": 8.298403394661657e-08,
|
|
"logits/chosen": -1.24609375,
|
|
"logits/rejected": -1.18359375,
|
|
"logps/chosen": -278.0,
|
|
"logps/rejected": -262.0,
|
|
"loss": 0.3643,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 0.53515625,
|
|
"rewards/margins": 2.4609375,
|
|
"rewards/rejected": -1.92578125,
|
|
"step": 671
|
|
},
|
|
{
|
|
"epoch": 0.7433628318584071,
|
|
"grad_norm": 15.917474746704102,
|
|
"learning_rate": 8.231796572859778e-08,
|
|
"logits/chosen": -1.09765625,
|
|
"logits/rejected": -1.12109375,
|
|
"logps/chosen": -250.5,
|
|
"logps/rejected": -301.0,
|
|
"loss": 0.2963,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 1.0,
|
|
"rewards/margins": 3.1796875,
|
|
"rewards/rejected": -2.1796875,
|
|
"step": 672
|
|
},
|
|
{
|
|
"epoch": 0.7444690265486725,
|
|
"grad_norm": 13.662030220031738,
|
|
"learning_rate": 8.165405415030915e-08,
|
|
"logits/chosen": -1.35546875,
|
|
"logits/rejected": -1.14453125,
|
|
"logps/chosen": -288.0,
|
|
"logps/rejected": -279.0,
|
|
"loss": 0.2763,
|
|
"rewards/accuracies": 0.859375,
|
|
"rewards/chosen": 0.91015625,
|
|
"rewards/margins": 2.953125,
|
|
"rewards/rejected": -2.0390625,
|
|
"step": 673
|
|
},
|
|
{
|
|
"epoch": 0.745575221238938,
|
|
"grad_norm": 14.487608909606934,
|
|
"learning_rate": 8.099230775063879e-08,
|
|
"logits/chosen": -1.2890625,
|
|
"logits/rejected": -1.140625,
|
|
"logps/chosen": -261.0,
|
|
"logps/rejected": -277.0,
|
|
"loss": 0.319,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 0.75390625,
|
|
"rewards/margins": 2.7109375,
|
|
"rewards/rejected": -1.953125,
|
|
"step": 674
|
|
},
|
|
{
|
|
"epoch": 0.7466814159292036,
|
|
"grad_norm": 15.394200325012207,
|
|
"learning_rate": 8.033273504062698e-08,
|
|
"logits/chosen": -1.12109375,
|
|
"logits/rejected": -1.11328125,
|
|
"logps/chosen": -267.0,
|
|
"logps/rejected": -314.0,
|
|
"loss": 0.3292,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.70703125,
|
|
"rewards/margins": 2.84375,
|
|
"rewards/rejected": -2.1328125,
|
|
"step": 675
|
|
},
|
|
{
|
|
"epoch": 0.7477876106194691,
|
|
"grad_norm": 16.063007354736328,
|
|
"learning_rate": 7.967534450335728e-08,
|
|
"logits/chosen": -1.32421875,
|
|
"logits/rejected": -1.19140625,
|
|
"logps/chosen": -253.5,
|
|
"logps/rejected": -266.5,
|
|
"loss": 0.3824,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 0.556640625,
|
|
"rewards/margins": 2.4453125,
|
|
"rewards/rejected": -1.88671875,
|
|
"step": 676
|
|
},
|
|
{
|
|
"epoch": 0.7488938053097345,
|
|
"grad_norm": 15.266008377075195,
|
|
"learning_rate": 7.902014459384742e-08,
|
|
"logits/chosen": -1.21875,
|
|
"logits/rejected": -1.03515625,
|
|
"logps/chosen": -259.0,
|
|
"logps/rejected": -301.0,
|
|
"loss": 0.3159,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.875,
|
|
"rewards/margins": 3.1015625,
|
|
"rewards/rejected": -2.2265625,
|
|
"step": 677
|
|
},
|
|
{
|
|
"epoch": 0.75,
|
|
"grad_norm": 11.596745491027832,
|
|
"learning_rate": 7.836714373894015e-08,
|
|
"logits/chosen": -1.1484375,
|
|
"logits/rejected": -1.05078125,
|
|
"logps/chosen": -248.5,
|
|
"logps/rejected": -268.5,
|
|
"loss": 0.224,
|
|
"rewards/accuracies": 0.8828125,
|
|
"rewards/chosen": 1.046875,
|
|
"rewards/margins": 3.390625,
|
|
"rewards/rejected": -2.3359375,
|
|
"step": 678
|
|
},
|
|
{
|
|
"epoch": 0.7511061946902655,
|
|
"grad_norm": 12.86449909210205,
|
|
"learning_rate": 7.771635033719528e-08,
|
|
"logits/chosen": -1.26171875,
|
|
"logits/rejected": -1.09375,
|
|
"logps/chosen": -271.0,
|
|
"logps/rejected": -258.5,
|
|
"loss": 0.2782,
|
|
"rewards/accuracies": 0.875,
|
|
"rewards/chosen": 0.80078125,
|
|
"rewards/margins": 2.8359375,
|
|
"rewards/rejected": -2.03125,
|
|
"step": 679
|
|
},
|
|
{
|
|
"epoch": 0.7522123893805309,
|
|
"grad_norm": 12.727359771728516,
|
|
"learning_rate": 7.70677727587816e-08,
|
|
"logits/chosen": -1.2578125,
|
|
"logits/rejected": -1.0859375,
|
|
"logps/chosen": -257.0,
|
|
"logps/rejected": -295.0,
|
|
"loss": 0.2793,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.70703125,
|
|
"rewards/margins": 2.734375,
|
|
"rewards/rejected": -2.0234375,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 0.7533185840707964,
|
|
"grad_norm": 12.862136840820312,
|
|
"learning_rate": 7.642141934536874e-08,
|
|
"logits/chosen": -1.3203125,
|
|
"logits/rejected": -1.1953125,
|
|
"logps/chosen": -242.5,
|
|
"logps/rejected": -268.0,
|
|
"loss": 0.2937,
|
|
"rewards/accuracies": 0.859375,
|
|
"rewards/chosen": 0.720703125,
|
|
"rewards/margins": 3.0546875,
|
|
"rewards/rejected": -2.328125,
|
|
"step": 681
|
|
},
|
|
{
|
|
"epoch": 0.754424778761062,
|
|
"grad_norm": 13.950096130371094,
|
|
"learning_rate": 7.577729841002075e-08,
|
|
"logits/chosen": -1.17578125,
|
|
"logits/rejected": -1.12890625,
|
|
"logps/chosen": -279.0,
|
|
"logps/rejected": -308.0,
|
|
"loss": 0.2855,
|
|
"rewards/accuracies": 0.859375,
|
|
"rewards/chosen": 0.703125,
|
|
"rewards/margins": 2.875,
|
|
"rewards/rejected": -2.171875,
|
|
"step": 682
|
|
},
|
|
{
|
|
"epoch": 0.7555309734513275,
|
|
"grad_norm": 15.024590492248535,
|
|
"learning_rate": 7.513541823708827e-08,
|
|
"logits/chosen": -1.26953125,
|
|
"logits/rejected": -1.140625,
|
|
"logps/chosen": -251.0,
|
|
"logps/rejected": -292.0,
|
|
"loss": 0.3303,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.697265625,
|
|
"rewards/margins": 2.75,
|
|
"rewards/rejected": -2.05078125,
|
|
"step": 683
|
|
},
|
|
{
|
|
"epoch": 0.7566371681415929,
|
|
"grad_norm": 14.337872505187988,
|
|
"learning_rate": 7.449578708210267e-08,
|
|
"logits/chosen": -1.26953125,
|
|
"logits/rejected": -1.21484375,
|
|
"logps/chosen": -283.0,
|
|
"logps/rejected": -276.0,
|
|
"loss": 0.3292,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.71484375,
|
|
"rewards/margins": 2.8515625,
|
|
"rewards/rejected": -2.14453125,
|
|
"step": 684
|
|
},
|
|
{
|
|
"epoch": 0.7577433628318584,
|
|
"grad_norm": 13.712812423706055,
|
|
"learning_rate": 7.385841317166966e-08,
|
|
"logits/chosen": -1.30078125,
|
|
"logits/rejected": -1.1953125,
|
|
"logps/chosen": -250.5,
|
|
"logps/rejected": -283.0,
|
|
"loss": 0.309,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 0.654296875,
|
|
"rewards/margins": 2.8515625,
|
|
"rewards/rejected": -2.1953125,
|
|
"step": 685
|
|
},
|
|
{
|
|
"epoch": 0.7588495575221239,
|
|
"grad_norm": 11.522303581237793,
|
|
"learning_rate": 7.322330470336313e-08,
|
|
"logits/chosen": -1.43359375,
|
|
"logits/rejected": -1.15625,
|
|
"logps/chosen": -247.0,
|
|
"logps/rejected": -287.0,
|
|
"loss": 0.2535,
|
|
"rewards/accuracies": 0.890625,
|
|
"rewards/chosen": 0.541015625,
|
|
"rewards/margins": 2.796875,
|
|
"rewards/rejected": -2.265625,
|
|
"step": 686
|
|
},
|
|
{
|
|
"epoch": 0.7599557522123894,
|
|
"grad_norm": 16.617996215820312,
|
|
"learning_rate": 7.25904698456203e-08,
|
|
"logits/chosen": -1.21484375,
|
|
"logits/rejected": -1.0859375,
|
|
"logps/chosen": -283.0,
|
|
"logps/rejected": -304.0,
|
|
"loss": 0.3768,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 0.59375,
|
|
"rewards/margins": 2.578125,
|
|
"rewards/rejected": -1.98828125,
|
|
"step": 687
|
|
},
|
|
{
|
|
"epoch": 0.7610619469026548,
|
|
"grad_norm": 13.16273021697998,
|
|
"learning_rate": 7.195991673763644e-08,
|
|
"logits/chosen": -1.2578125,
|
|
"logits/rejected": -1.140625,
|
|
"logps/chosen": -256.0,
|
|
"logps/rejected": -261.5,
|
|
"loss": 0.342,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.5322265625,
|
|
"rewards/margins": 2.6015625,
|
|
"rewards/rejected": -2.078125,
|
|
"step": 688
|
|
},
|
|
{
|
|
"epoch": 0.7621681415929203,
|
|
"grad_norm": 14.337390899658203,
|
|
"learning_rate": 7.133165348925976e-08,
|
|
"logits/chosen": -1.25,
|
|
"logits/rejected": -1.2734375,
|
|
"logps/chosen": -253.5,
|
|
"logps/rejected": -281.0,
|
|
"loss": 0.3474,
|
|
"rewards/accuracies": 0.7578125,
|
|
"rewards/chosen": 1.0078125,
|
|
"rewards/margins": 2.8359375,
|
|
"rewards/rejected": -1.83203125,
|
|
"step": 689
|
|
},
|
|
{
|
|
"epoch": 0.7632743362831859,
|
|
"grad_norm": 13.05460262298584,
|
|
"learning_rate": 7.070568818088782e-08,
|
|
"logits/chosen": -1.29296875,
|
|
"logits/rejected": -1.06640625,
|
|
"logps/chosen": -266.5,
|
|
"logps/rejected": -289.5,
|
|
"loss": 0.3306,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 0.76171875,
|
|
"rewards/margins": 2.6875,
|
|
"rewards/rejected": -1.921875,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 0.7643805309734514,
|
|
"grad_norm": 13.12061595916748,
|
|
"learning_rate": 7.008202886336323e-08,
|
|
"logits/chosen": -1.296875,
|
|
"logits/rejected": -1.11328125,
|
|
"logps/chosen": -252.0,
|
|
"logps/rejected": -294.0,
|
|
"loss": 0.3064,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 0.6943359375,
|
|
"rewards/margins": 2.96875,
|
|
"rewards/rejected": -2.2734375,
|
|
"step": 691
|
|
},
|
|
{
|
|
"epoch": 0.7654867256637168,
|
|
"grad_norm": 15.881913185119629,
|
|
"learning_rate": 6.94606835578699e-08,
|
|
"logits/chosen": -1.2734375,
|
|
"logits/rejected": -1.09375,
|
|
"logps/chosen": -267.5,
|
|
"logps/rejected": -279.0,
|
|
"loss": 0.39,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.5244140625,
|
|
"rewards/margins": 2.265625,
|
|
"rewards/rejected": -1.734375,
|
|
"step": 692
|
|
},
|
|
{
|
|
"epoch": 0.7665929203539823,
|
|
"grad_norm": 18.117141723632812,
|
|
"learning_rate": 6.884166025583043e-08,
|
|
"logits/chosen": -1.19140625,
|
|
"logits/rejected": -1.13671875,
|
|
"logps/chosen": -289.0,
|
|
"logps/rejected": -318.0,
|
|
"loss": 0.3893,
|
|
"rewards/accuracies": 0.734375,
|
|
"rewards/chosen": 0.591796875,
|
|
"rewards/margins": 2.71875,
|
|
"rewards/rejected": -2.1328125,
|
|
"step": 693
|
|
},
|
|
{
|
|
"epoch": 0.7676991150442478,
|
|
"grad_norm": 14.054704666137695,
|
|
"learning_rate": 6.822496691880275e-08,
|
|
"logits/chosen": -1.34765625,
|
|
"logits/rejected": -1.18359375,
|
|
"logps/chosen": -250.5,
|
|
"logps/rejected": -272.0,
|
|
"loss": 0.3268,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.708984375,
|
|
"rewards/margins": 2.6875,
|
|
"rewards/rejected": -1.9765625,
|
|
"step": 694
|
|
},
|
|
{
|
|
"epoch": 0.7688053097345132,
|
|
"grad_norm": 11.454045295715332,
|
|
"learning_rate": 6.761061147837807e-08,
|
|
"logits/chosen": -1.41796875,
|
|
"logits/rejected": -1.12890625,
|
|
"logps/chosen": -254.5,
|
|
"logps/rejected": -293.0,
|
|
"loss": 0.2523,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 0.861328125,
|
|
"rewards/margins": 3.1484375,
|
|
"rewards/rejected": -2.28125,
|
|
"step": 695
|
|
},
|
|
{
|
|
"epoch": 0.7699115044247787,
|
|
"grad_norm": 13.245506286621094,
|
|
"learning_rate": 6.699860183607894e-08,
|
|
"logits/chosen": -1.359375,
|
|
"logits/rejected": -1.140625,
|
|
"logps/chosen": -275.0,
|
|
"logps/rejected": -273.0,
|
|
"loss": 0.3098,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 0.4716796875,
|
|
"rewards/margins": 2.71875,
|
|
"rewards/rejected": -2.25,
|
|
"step": 696
|
|
},
|
|
{
|
|
"epoch": 0.7710176991150443,
|
|
"grad_norm": 13.23479175567627,
|
|
"learning_rate": 6.638894586325719e-08,
|
|
"logits/chosen": -1.2890625,
|
|
"logits/rejected": -1.19140625,
|
|
"logps/chosen": -246.0,
|
|
"logps/rejected": -288.0,
|
|
"loss": 0.2909,
|
|
"rewards/accuracies": 0.8671875,
|
|
"rewards/chosen": 0.5712890625,
|
|
"rewards/margins": 2.859375,
|
|
"rewards/rejected": -2.296875,
|
|
"step": 697
|
|
},
|
|
{
|
|
"epoch": 0.7721238938053098,
|
|
"grad_norm": 14.469060897827148,
|
|
"learning_rate": 6.578165140099317e-08,
|
|
"logits/chosen": -1.26953125,
|
|
"logits/rejected": -1.20703125,
|
|
"logps/chosen": -252.5,
|
|
"logps/rejected": -277.0,
|
|
"loss": 0.3493,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.79296875,
|
|
"rewards/margins": 2.8359375,
|
|
"rewards/rejected": -2.04296875,
|
|
"step": 698
|
|
},
|
|
{
|
|
"epoch": 0.7732300884955752,
|
|
"grad_norm": 15.471959114074707,
|
|
"learning_rate": 6.517672625999465e-08,
|
|
"logits/chosen": -1.20703125,
|
|
"logits/rejected": -1.09375,
|
|
"logps/chosen": -254.0,
|
|
"logps/rejected": -284.0,
|
|
"loss": 0.3456,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 0.5849609375,
|
|
"rewards/margins": 2.78125,
|
|
"rewards/rejected": -2.203125,
|
|
"step": 699
|
|
},
|
|
{
|
|
"epoch": 0.7743362831858407,
|
|
"grad_norm": 13.424947738647461,
|
|
"learning_rate": 6.457417822049627e-08,
|
|
"logits/chosen": -1.35546875,
|
|
"logits/rejected": -1.13671875,
|
|
"logps/chosen": -260.0,
|
|
"logps/rejected": -280.0,
|
|
"loss": 0.3278,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.681640625,
|
|
"rewards/margins": 2.5703125,
|
|
"rewards/rejected": -1.890625,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.7743362831858407,
|
|
"eval_logits/chosen": -1.267957091331482,
|
|
"eval_logits/rejected": -1.1595537662506104,
|
|
"eval_logps/chosen": -253.52735900878906,
|
|
"eval_logps/rejected": -279.9950256347656,
|
|
"eval_loss": 0.3205508887767792,
|
|
"eval_rewards/accuracies": 0.8137379288673401,
|
|
"eval_rewards/chosen": 0.73013836145401,
|
|
"eval_rewards/margins": 2.814093589782715,
|
|
"eval_rewards/rejected": -2.0833332538604736,
|
|
"eval_runtime": 193.1227,
|
|
"eval_samples_per_second": 66.554,
|
|
"eval_steps_per_second": 1.041,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 0.7754424778761062,
|
|
"grad_norm": 14.452095985412598,
|
|
"learning_rate": 6.397401503215991e-08,
|
|
"logits/chosen": -1.2578125,
|
|
"logits/rejected": -1.125,
|
|
"logps/chosen": -268.0,
|
|
"logps/rejected": -296.0,
|
|
"loss": 0.3012,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.92578125,
|
|
"rewards/margins": 3.0703125,
|
|
"rewards/rejected": -2.14453125,
|
|
"step": 701
|
|
},
|
|
{
|
|
"epoch": 0.7765486725663717,
|
|
"grad_norm": 13.497214317321777,
|
|
"learning_rate": 6.33762444139744e-08,
|
|
"logits/chosen": -1.53515625,
|
|
"logits/rejected": -1.15625,
|
|
"logps/chosen": -244.5,
|
|
"logps/rejected": -292.0,
|
|
"loss": 0.3147,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.78515625,
|
|
"rewards/margins": 2.78125,
|
|
"rewards/rejected": -2.0,
|
|
"step": 702
|
|
},
|
|
{
|
|
"epoch": 0.7776548672566371,
|
|
"grad_norm": 13.972166061401367,
|
|
"learning_rate": 6.278087405415683e-08,
|
|
"logits/chosen": -1.3125,
|
|
"logits/rejected": -1.140625,
|
|
"logps/chosen": -258.0,
|
|
"logps/rejected": -260.0,
|
|
"loss": 0.2868,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.8046875,
|
|
"rewards/margins": 3.03125,
|
|
"rewards/rejected": -2.2265625,
|
|
"step": 703
|
|
},
|
|
{
|
|
"epoch": 0.7787610619469026,
|
|
"grad_norm": 13.861830711364746,
|
|
"learning_rate": 6.218791161005335e-08,
|
|
"logits/chosen": -1.2265625,
|
|
"logits/rejected": -1.09765625,
|
|
"logps/chosen": -238.5,
|
|
"logps/rejected": -299.0,
|
|
"loss": 0.2945,
|
|
"rewards/accuracies": 0.8671875,
|
|
"rewards/chosen": 0.76953125,
|
|
"rewards/margins": 3.0,
|
|
"rewards/rejected": -2.234375,
|
|
"step": 704
|
|
},
|
|
{
|
|
"epoch": 0.7798672566371682,
|
|
"grad_norm": 15.766735076904297,
|
|
"learning_rate": 6.159736470804059e-08,
|
|
"logits/chosen": -1.3046875,
|
|
"logits/rejected": -1.2421875,
|
|
"logps/chosen": -250.5,
|
|
"logps/rejected": -261.0,
|
|
"loss": 0.3834,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 0.6416015625,
|
|
"rewards/margins": 2.21875,
|
|
"rewards/rejected": -1.57421875,
|
|
"step": 705
|
|
},
|
|
{
|
|
"epoch": 0.7809734513274337,
|
|
"grad_norm": 13.845602989196777,
|
|
"learning_rate": 6.100924094342785e-08,
|
|
"logits/chosen": -1.4140625,
|
|
"logits/rejected": -1.19921875,
|
|
"logps/chosen": -230.5,
|
|
"logps/rejected": -236.0,
|
|
"loss": 0.3024,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.705078125,
|
|
"rewards/margins": 2.9609375,
|
|
"rewards/rejected": -2.2578125,
|
|
"step": 706
|
|
},
|
|
{
|
|
"epoch": 0.7820796460176991,
|
|
"grad_norm": 14.929936408996582,
|
|
"learning_rate": 6.042354788035942e-08,
|
|
"logits/chosen": -1.18359375,
|
|
"logits/rejected": -1.04296875,
|
|
"logps/chosen": -269.0,
|
|
"logps/rejected": -291.0,
|
|
"loss": 0.3403,
|
|
"rewards/accuracies": 0.7578125,
|
|
"rewards/chosen": 0.6162109375,
|
|
"rewards/margins": 2.7109375,
|
|
"rewards/rejected": -2.09765625,
|
|
"step": 707
|
|
},
|
|
{
|
|
"epoch": 0.7831858407079646,
|
|
"grad_norm": 13.97938346862793,
|
|
"learning_rate": 5.984029305171678e-08,
|
|
"logits/chosen": -1.26953125,
|
|
"logits/rejected": -1.2734375,
|
|
"logps/chosen": -245.0,
|
|
"logps/rejected": -287.0,
|
|
"loss": 0.2896,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 1.0,
|
|
"rewards/margins": 3.21875,
|
|
"rewards/rejected": -2.21875,
|
|
"step": 708
|
|
},
|
|
{
|
|
"epoch": 0.7842920353982301,
|
|
"grad_norm": 14.008685111999512,
|
|
"learning_rate": 5.925948395902253e-08,
|
|
"logits/chosen": -1.32421875,
|
|
"logits/rejected": -1.21875,
|
|
"logps/chosen": -272.0,
|
|
"logps/rejected": -313.0,
|
|
"loss": 0.3008,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.884765625,
|
|
"rewards/margins": 2.9765625,
|
|
"rewards/rejected": -2.0859375,
|
|
"step": 709
|
|
},
|
|
{
|
|
"epoch": 0.7853982300884956,
|
|
"grad_norm": 12.459348678588867,
|
|
"learning_rate": 5.868112807234313e-08,
|
|
"logits/chosen": -1.29296875,
|
|
"logits/rejected": -1.140625,
|
|
"logps/chosen": -269.0,
|
|
"logps/rejected": -371.0,
|
|
"loss": 0.262,
|
|
"rewards/accuracies": 0.875,
|
|
"rewards/chosen": 0.994140625,
|
|
"rewards/margins": 3.0390625,
|
|
"rewards/rejected": -2.046875,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 0.786504424778761,
|
|
"grad_norm": 14.358124732971191,
|
|
"learning_rate": 5.810523283019339e-08,
|
|
"logits/chosen": -1.3046875,
|
|
"logits/rejected": -1.1875,
|
|
"logps/chosen": -282.0,
|
|
"logps/rejected": -273.0,
|
|
"loss": 0.3423,
|
|
"rewards/accuracies": 0.859375,
|
|
"rewards/chosen": 0.484375,
|
|
"rewards/margins": 2.484375,
|
|
"rewards/rejected": -2.00390625,
|
|
"step": 711
|
|
},
|
|
{
|
|
"epoch": 0.7876106194690266,
|
|
"grad_norm": 12.388589859008789,
|
|
"learning_rate": 5.753180563944057e-08,
|
|
"logits/chosen": -1.37109375,
|
|
"logits/rejected": -1.09375,
|
|
"logps/chosen": -232.0,
|
|
"logps/rejected": -247.5,
|
|
"loss": 0.2437,
|
|
"rewards/accuracies": 0.859375,
|
|
"rewards/chosen": 0.931640625,
|
|
"rewards/margins": 3.46875,
|
|
"rewards/rejected": -2.5390625,
|
|
"step": 712
|
|
},
|
|
{
|
|
"epoch": 0.7887168141592921,
|
|
"grad_norm": 12.301764488220215,
|
|
"learning_rate": 5.6960853875208935e-08,
|
|
"logits/chosen": -1.19921875,
|
|
"logits/rejected": -1.1640625,
|
|
"logps/chosen": -252.0,
|
|
"logps/rejected": -267.0,
|
|
"loss": 0.3027,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.671875,
|
|
"rewards/margins": 2.8125,
|
|
"rewards/rejected": -2.1328125,
|
|
"step": 713
|
|
},
|
|
{
|
|
"epoch": 0.7898230088495575,
|
|
"grad_norm": 14.501238822937012,
|
|
"learning_rate": 5.6392384880785294e-08,
|
|
"logits/chosen": -1.37109375,
|
|
"logits/rejected": -1.2265625,
|
|
"logps/chosen": -276.0,
|
|
"logps/rejected": -285.0,
|
|
"loss": 0.3198,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 0.501953125,
|
|
"rewards/margins": 2.6328125,
|
|
"rewards/rejected": -2.125,
|
|
"step": 714
|
|
},
|
|
{
|
|
"epoch": 0.790929203539823,
|
|
"grad_norm": 12.956294059753418,
|
|
"learning_rate": 5.5826405967524357e-08,
|
|
"logits/chosen": -1.1484375,
|
|
"logits/rejected": -1.0859375,
|
|
"logps/chosen": -255.0,
|
|
"logps/rejected": -297.0,
|
|
"loss": 0.272,
|
|
"rewards/accuracies": 0.859375,
|
|
"rewards/chosen": 0.779296875,
|
|
"rewards/margins": 3.203125,
|
|
"rewards/rejected": -2.421875,
|
|
"step": 715
|
|
},
|
|
{
|
|
"epoch": 0.7920353982300885,
|
|
"grad_norm": 14.246673583984375,
|
|
"learning_rate": 5.526292441475447e-08,
|
|
"logits/chosen": -1.32421875,
|
|
"logits/rejected": -1.140625,
|
|
"logps/chosen": -269.0,
|
|
"logps/rejected": -308.0,
|
|
"loss": 0.2897,
|
|
"rewards/accuracies": 0.875,
|
|
"rewards/chosen": 0.818359375,
|
|
"rewards/margins": 2.765625,
|
|
"rewards/rejected": -1.94921875,
|
|
"step": 716
|
|
},
|
|
{
|
|
"epoch": 0.793141592920354,
|
|
"grad_norm": 14.141976356506348,
|
|
"learning_rate": 5.470194746968451e-08,
|
|
"logits/chosen": -1.265625,
|
|
"logits/rejected": -1.2265625,
|
|
"logps/chosen": -246.0,
|
|
"logps/rejected": -288.0,
|
|
"loss": 0.3056,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 0.7265625,
|
|
"rewards/margins": 3.0859375,
|
|
"rewards/rejected": -2.359375,
|
|
"step": 717
|
|
},
|
|
{
|
|
"epoch": 0.7942477876106194,
|
|
"grad_norm": 13.89908218383789,
|
|
"learning_rate": 5.4143482347310116e-08,
|
|
"logits/chosen": -1.3046875,
|
|
"logits/rejected": -1.10546875,
|
|
"logps/chosen": -273.0,
|
|
"logps/rejected": -295.0,
|
|
"loss": 0.3041,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 0.87890625,
|
|
"rewards/margins": 2.828125,
|
|
"rewards/rejected": -1.94921875,
|
|
"step": 718
|
|
},
|
|
{
|
|
"epoch": 0.7953539823008849,
|
|
"grad_norm": 11.489982604980469,
|
|
"learning_rate": 5.358753623032136e-08,
|
|
"logits/chosen": -1.359375,
|
|
"logits/rejected": -1.12109375,
|
|
"logps/chosen": -228.0,
|
|
"logps/rejected": -249.0,
|
|
"loss": 0.2602,
|
|
"rewards/accuracies": 0.859375,
|
|
"rewards/chosen": 0.88671875,
|
|
"rewards/margins": 3.09375,
|
|
"rewards/rejected": -2.2109375,
|
|
"step": 719
|
|
},
|
|
{
|
|
"epoch": 0.7964601769911505,
|
|
"grad_norm": 13.466360092163086,
|
|
"learning_rate": 5.3034116269010194e-08,
|
|
"logits/chosen": -1.453125,
|
|
"logits/rejected": -1.234375,
|
|
"logps/chosen": -269.0,
|
|
"logps/rejected": -295.0,
|
|
"loss": 0.3119,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.59765625,
|
|
"rewards/margins": 2.4921875,
|
|
"rewards/rejected": -1.890625,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 0.797566371681416,
|
|
"grad_norm": 15.516824722290039,
|
|
"learning_rate": 5.248322958117815e-08,
|
|
"logits/chosen": -1.1875,
|
|
"logits/rejected": -1.15625,
|
|
"logps/chosen": -261.0,
|
|
"logps/rejected": -273.0,
|
|
"loss": 0.361,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 0.91015625,
|
|
"rewards/margins": 2.6875,
|
|
"rewards/rejected": -1.78125,
|
|
"step": 721
|
|
},
|
|
{
|
|
"epoch": 0.7986725663716814,
|
|
"grad_norm": 14.861969947814941,
|
|
"learning_rate": 5.1934883252045507e-08,
|
|
"logits/chosen": -1.234375,
|
|
"logits/rejected": -1.16796875,
|
|
"logps/chosen": -249.5,
|
|
"logps/rejected": -279.0,
|
|
"loss": 0.3549,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": 0.65625,
|
|
"rewards/margins": 2.8046875,
|
|
"rewards/rejected": -2.1484375,
|
|
"step": 722
|
|
},
|
|
{
|
|
"epoch": 0.7997787610619469,
|
|
"grad_norm": 14.74849796295166,
|
|
"learning_rate": 5.138908433415945e-08,
|
|
"logits/chosen": -1.28125,
|
|
"logits/rejected": -1.23828125,
|
|
"logps/chosen": -271.0,
|
|
"logps/rejected": -311.0,
|
|
"loss": 0.2943,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 0.8203125,
|
|
"rewards/margins": 3.1171875,
|
|
"rewards/rejected": -2.3046875,
|
|
"step": 723
|
|
},
|
|
{
|
|
"epoch": 0.8008849557522124,
|
|
"grad_norm": 13.291254043579102,
|
|
"learning_rate": 5.0845839847303894e-08,
|
|
"logits/chosen": -1.25390625,
|
|
"logits/rejected": -1.11328125,
|
|
"logps/chosen": -244.5,
|
|
"logps/rejected": -257.0,
|
|
"loss": 0.3242,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.666015625,
|
|
"rewards/margins": 2.8671875,
|
|
"rewards/rejected": -2.1953125,
|
|
"step": 724
|
|
},
|
|
{
|
|
"epoch": 0.8019911504424779,
|
|
"grad_norm": 12.395694732666016,
|
|
"learning_rate": 5.030515677840882e-08,
|
|
"logits/chosen": -1.1875,
|
|
"logits/rejected": -1.1171875,
|
|
"logps/chosen": -240.5,
|
|
"logps/rejected": -276.0,
|
|
"loss": 0.3041,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.7890625,
|
|
"rewards/margins": 3.03125,
|
|
"rewards/rejected": -2.2421875,
|
|
"step": 725
|
|
},
|
|
{
|
|
"epoch": 0.8030973451327433,
|
|
"grad_norm": 13.156864166259766,
|
|
"learning_rate": 4.9767042081460626e-08,
|
|
"logits/chosen": -1.421875,
|
|
"logits/rejected": -1.12109375,
|
|
"logps/chosen": -253.5,
|
|
"logps/rejected": -286.0,
|
|
"loss": 0.2806,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.701171875,
|
|
"rewards/margins": 2.8359375,
|
|
"rewards/rejected": -2.1328125,
|
|
"step": 726
|
|
},
|
|
{
|
|
"epoch": 0.8042035398230089,
|
|
"grad_norm": 13.708073616027832,
|
|
"learning_rate": 4.923150267741266e-08,
|
|
"logits/chosen": -1.1953125,
|
|
"logits/rejected": -1.13671875,
|
|
"logps/chosen": -272.0,
|
|
"logps/rejected": -310.0,
|
|
"loss": 0.2606,
|
|
"rewards/accuracies": 0.8671875,
|
|
"rewards/chosen": 0.796875,
|
|
"rewards/margins": 3.2734375,
|
|
"rewards/rejected": -2.4765625,
|
|
"step": 727
|
|
},
|
|
{
|
|
"epoch": 0.8053097345132744,
|
|
"grad_norm": 13.454339981079102,
|
|
"learning_rate": 4.869854545409627e-08,
|
|
"logits/chosen": -1.2578125,
|
|
"logits/rejected": -1.2109375,
|
|
"logps/chosen": -243.0,
|
|
"logps/rejected": -296.0,
|
|
"loss": 0.2951,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 0.841796875,
|
|
"rewards/margins": 2.8671875,
|
|
"rewards/rejected": -2.02734375,
|
|
"step": 728
|
|
},
|
|
{
|
|
"epoch": 0.8064159292035398,
|
|
"grad_norm": 13.385002136230469,
|
|
"learning_rate": 4.816817726613187e-08,
|
|
"logits/chosen": -1.30078125,
|
|
"logits/rejected": -1.20703125,
|
|
"logps/chosen": -255.5,
|
|
"logps/rejected": -267.0,
|
|
"loss": 0.3009,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.5869140625,
|
|
"rewards/margins": 2.875,
|
|
"rewards/rejected": -2.2890625,
|
|
"step": 729
|
|
},
|
|
{
|
|
"epoch": 0.8075221238938053,
|
|
"grad_norm": 11.77560806274414,
|
|
"learning_rate": 4.7640404934841284e-08,
|
|
"logits/chosen": -1.25390625,
|
|
"logits/rejected": -1.1171875,
|
|
"logps/chosen": -239.5,
|
|
"logps/rejected": -256.5,
|
|
"loss": 0.2937,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 0.65234375,
|
|
"rewards/margins": 2.7734375,
|
|
"rewards/rejected": -2.12109375,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 0.8086283185840708,
|
|
"grad_norm": 14.025035858154297,
|
|
"learning_rate": 4.7115235248159776e-08,
|
|
"logits/chosen": -1.3359375,
|
|
"logits/rejected": -1.14453125,
|
|
"logps/chosen": -283.0,
|
|
"logps/rejected": -303.0,
|
|
"loss": 0.2726,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 1.037109375,
|
|
"rewards/margins": 3.1328125,
|
|
"rewards/rejected": -2.10546875,
|
|
"step": 731
|
|
},
|
|
{
|
|
"epoch": 0.8097345132743363,
|
|
"grad_norm": 13.514138221740723,
|
|
"learning_rate": 4.659267496054847e-08,
|
|
"logits/chosen": -1.2890625,
|
|
"logits/rejected": -1.091796875,
|
|
"logps/chosen": -248.5,
|
|
"logps/rejected": -266.5,
|
|
"loss": 0.2988,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 0.646484375,
|
|
"rewards/margins": 2.84375,
|
|
"rewards/rejected": -2.1953125,
|
|
"step": 732
|
|
},
|
|
{
|
|
"epoch": 0.8108407079646017,
|
|
"grad_norm": 15.020828247070312,
|
|
"learning_rate": 4.60727307929081e-08,
|
|
"logits/chosen": -1.28125,
|
|
"logits/rejected": -1.06640625,
|
|
"logps/chosen": -258.5,
|
|
"logps/rejected": -275.0,
|
|
"loss": 0.3037,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 0.75390625,
|
|
"rewards/margins": 3.09375,
|
|
"rewards/rejected": -2.34375,
|
|
"step": 733
|
|
},
|
|
{
|
|
"epoch": 0.8119469026548672,
|
|
"grad_norm": 14.957762718200684,
|
|
"learning_rate": 4.555540943249187e-08,
|
|
"logits/chosen": -1.3515625,
|
|
"logits/rejected": -1.19921875,
|
|
"logps/chosen": -248.5,
|
|
"logps/rejected": -304.0,
|
|
"loss": 0.3,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 0.705078125,
|
|
"rewards/margins": 2.8515625,
|
|
"rewards/rejected": -2.1484375,
|
|
"step": 734
|
|
},
|
|
{
|
|
"epoch": 0.8130530973451328,
|
|
"grad_norm": 12.412934303283691,
|
|
"learning_rate": 4.5040717532820046e-08,
|
|
"logits/chosen": -1.28125,
|
|
"logits/rejected": -1.15234375,
|
|
"logps/chosen": -257.5,
|
|
"logps/rejected": -296.0,
|
|
"loss": 0.282,
|
|
"rewards/accuracies": 0.8671875,
|
|
"rewards/chosen": 0.7607421875,
|
|
"rewards/margins": 2.953125,
|
|
"rewards/rejected": -2.1953125,
|
|
"step": 735
|
|
},
|
|
{
|
|
"epoch": 0.8141592920353983,
|
|
"grad_norm": 15.76734733581543,
|
|
"learning_rate": 4.4528661713594125e-08,
|
|
"logits/chosen": -1.3515625,
|
|
"logits/rejected": -1.1796875,
|
|
"logps/chosen": -238.5,
|
|
"logps/rejected": -262.0,
|
|
"loss": 0.3355,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 0.8427734375,
|
|
"rewards/margins": 2.96875,
|
|
"rewards/rejected": -2.125,
|
|
"step": 736
|
|
},
|
|
{
|
|
"epoch": 0.8152654867256637,
|
|
"grad_norm": 16.009498596191406,
|
|
"learning_rate": 4.4019248560611454e-08,
|
|
"logits/chosen": -1.1953125,
|
|
"logits/rejected": -1.16796875,
|
|
"logps/chosen": -271.0,
|
|
"logps/rejected": -292.0,
|
|
"loss": 0.34,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 0.8203125,
|
|
"rewards/margins": 2.7578125,
|
|
"rewards/rejected": -1.9375,
|
|
"step": 737
|
|
},
|
|
{
|
|
"epoch": 0.8163716814159292,
|
|
"grad_norm": 12.171030044555664,
|
|
"learning_rate": 4.3512484625681e-08,
|
|
"logits/chosen": -1.29296875,
|
|
"logits/rejected": -1.046875,
|
|
"logps/chosen": -257.0,
|
|
"logps/rejected": -285.0,
|
|
"loss": 0.2528,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 0.6953125,
|
|
"rewards/margins": 2.9921875,
|
|
"rewards/rejected": -2.2890625,
|
|
"step": 738
|
|
},
|
|
{
|
|
"epoch": 0.8174778761061947,
|
|
"grad_norm": 14.278532981872559,
|
|
"learning_rate": 4.3008376426538903e-08,
|
|
"logits/chosen": -1.31640625,
|
|
"logits/rejected": -1.2265625,
|
|
"logps/chosen": -250.5,
|
|
"logps/rejected": -258.5,
|
|
"loss": 0.3722,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 0.5029296875,
|
|
"rewards/margins": 2.4921875,
|
|
"rewards/rejected": -1.9921875,
|
|
"step": 739
|
|
},
|
|
{
|
|
"epoch": 0.8185840707964602,
|
|
"grad_norm": 12.9563570022583,
|
|
"learning_rate": 4.250693044676429e-08,
|
|
"logits/chosen": -1.2734375,
|
|
"logits/rejected": -1.17578125,
|
|
"logps/chosen": -270.0,
|
|
"logps/rejected": -287.0,
|
|
"loss": 0.2685,
|
|
"rewards/accuracies": 0.8671875,
|
|
"rewards/chosen": 0.685546875,
|
|
"rewards/margins": 3.1640625,
|
|
"rewards/rejected": -2.4765625,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 0.8196902654867256,
|
|
"grad_norm": 13.755107879638672,
|
|
"learning_rate": 4.2008153135696584e-08,
|
|
"logits/chosen": -1.21484375,
|
|
"logits/rejected": -1.09375,
|
|
"logps/chosen": -246.5,
|
|
"logps/rejected": -285.0,
|
|
"loss": 0.3042,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 0.736328125,
|
|
"rewards/margins": 2.9765625,
|
|
"rewards/rejected": -2.2421875,
|
|
"step": 741
|
|
},
|
|
{
|
|
"epoch": 0.8207964601769911,
|
|
"grad_norm": 12.855173110961914,
|
|
"learning_rate": 4.151205090835183e-08,
|
|
"logits/chosen": -1.2734375,
|
|
"logits/rejected": -1.10546875,
|
|
"logps/chosen": -243.5,
|
|
"logps/rejected": -283.0,
|
|
"loss": 0.2732,
|
|
"rewards/accuracies": 0.875,
|
|
"rewards/chosen": 0.916015625,
|
|
"rewards/margins": 3.0859375,
|
|
"rewards/rejected": -2.1640625,
|
|
"step": 742
|
|
},
|
|
{
|
|
"epoch": 0.8219026548672567,
|
|
"grad_norm": 15.404345512390137,
|
|
"learning_rate": 4.1018630145340735e-08,
|
|
"logits/chosen": -1.29296875,
|
|
"logits/rejected": -1.34375,
|
|
"logps/chosen": -255.0,
|
|
"logps/rejected": -262.5,
|
|
"loss": 0.2993,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 0.810546875,
|
|
"rewards/margins": 3.09375,
|
|
"rewards/rejected": -2.28125,
|
|
"step": 743
|
|
},
|
|
{
|
|
"epoch": 0.8230088495575221,
|
|
"grad_norm": 13.697175979614258,
|
|
"learning_rate": 4.0527897192786433e-08,
|
|
"logits/chosen": -1.2421875,
|
|
"logits/rejected": -1.171875,
|
|
"logps/chosen": -279.0,
|
|
"logps/rejected": -289.0,
|
|
"loss": 0.2732,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 0.83984375,
|
|
"rewards/margins": 3.046875,
|
|
"rewards/rejected": -2.19921875,
|
|
"step": 744
|
|
},
|
|
{
|
|
"epoch": 0.8241150442477876,
|
|
"grad_norm": 14.881061553955078,
|
|
"learning_rate": 4.003985836224255e-08,
|
|
"logits/chosen": -1.296875,
|
|
"logits/rejected": -1.30859375,
|
|
"logps/chosen": -256.5,
|
|
"logps/rejected": -284.0,
|
|
"loss": 0.3474,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.5068359375,
|
|
"rewards/margins": 2.375,
|
|
"rewards/rejected": -1.87109375,
|
|
"step": 745
|
|
},
|
|
{
|
|
"epoch": 0.8252212389380531,
|
|
"grad_norm": 13.085796356201172,
|
|
"learning_rate": 3.955451993061268e-08,
|
|
"logits/chosen": -1.33984375,
|
|
"logits/rejected": -1.12890625,
|
|
"logps/chosen": -258.0,
|
|
"logps/rejected": -292.0,
|
|
"loss": 0.2616,
|
|
"rewards/accuracies": 0.875,
|
|
"rewards/chosen": 0.91796875,
|
|
"rewards/margins": 3.09375,
|
|
"rewards/rejected": -2.1796875,
|
|
"step": 746
|
|
},
|
|
{
|
|
"epoch": 0.8263274336283186,
|
|
"grad_norm": 13.392922401428223,
|
|
"learning_rate": 3.9071888140068926e-08,
|
|
"logits/chosen": -1.2109375,
|
|
"logits/rejected": -1.16796875,
|
|
"logps/chosen": -256.0,
|
|
"logps/rejected": -316.0,
|
|
"loss": 0.2815,
|
|
"rewards/accuracies": 0.859375,
|
|
"rewards/chosen": 0.986328125,
|
|
"rewards/margins": 3.1171875,
|
|
"rewards/rejected": -2.1328125,
|
|
"step": 747
|
|
},
|
|
{
|
|
"epoch": 0.827433628318584,
|
|
"grad_norm": 12.065234184265137,
|
|
"learning_rate": 3.859196919797228e-08,
|
|
"logits/chosen": -1.3359375,
|
|
"logits/rejected": -1.12109375,
|
|
"logps/chosen": -247.5,
|
|
"logps/rejected": -264.0,
|
|
"loss": 0.3147,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.7265625,
|
|
"rewards/margins": 2.9609375,
|
|
"rewards/rejected": -2.2421875,
|
|
"step": 748
|
|
},
|
|
{
|
|
"epoch": 0.8285398230088495,
|
|
"grad_norm": 14.233034133911133,
|
|
"learning_rate": 3.811476927679227e-08,
|
|
"logits/chosen": -1.16015625,
|
|
"logits/rejected": -1.1640625,
|
|
"logps/chosen": -265.0,
|
|
"logps/rejected": -300.0,
|
|
"loss": 0.3261,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.6884765625,
|
|
"rewards/margins": 2.75,
|
|
"rewards/rejected": -2.0703125,
|
|
"step": 749
|
|
},
|
|
{
|
|
"epoch": 0.8296460176991151,
|
|
"grad_norm": 14.785301208496094,
|
|
"learning_rate": 3.764029451402778e-08,
|
|
"logits/chosen": -1.265625,
|
|
"logits/rejected": -1.19140625,
|
|
"logps/chosen": -236.0,
|
|
"logps/rejected": -282.0,
|
|
"loss": 0.297,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 0.791015625,
|
|
"rewards/margins": 3.1640625,
|
|
"rewards/rejected": -2.375,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.8296460176991151,
|
|
"eval_logits/chosen": -1.270017147064209,
|
|
"eval_logits/rejected": -1.1617498397827148,
|
|
"eval_logps/chosen": -253.53233337402344,
|
|
"eval_logps/rejected": -280.19403076171875,
|
|
"eval_loss": 0.31980380415916443,
|
|
"eval_rewards/accuracies": 0.8147646188735962,
|
|
"eval_rewards/chosen": 0.72982257604599,
|
|
"eval_rewards/margins": 2.827347755432129,
|
|
"eval_rewards/rejected": -2.09759783744812,
|
|
"eval_runtime": 193.0983,
|
|
"eval_samples_per_second": 66.562,
|
|
"eval_steps_per_second": 1.041,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 0.8307522123893806,
|
|
"grad_norm": 13.682051658630371,
|
|
"learning_rate": 3.716855101212826e-08,
|
|
"logits/chosen": -1.26953125,
|
|
"logits/rejected": -1.1953125,
|
|
"logps/chosen": -270.0,
|
|
"logps/rejected": -284.5,
|
|
"loss": 0.3091,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.740234375,
|
|
"rewards/margins": 2.8125,
|
|
"rewards/rejected": -2.0703125,
|
|
"step": 751
|
|
},
|
|
{
|
|
"epoch": 0.831858407079646,
|
|
"grad_norm": 14.422385215759277,
|
|
"learning_rate": 3.6699544838415034e-08,
|
|
"logits/chosen": -1.328125,
|
|
"logits/rejected": -1.1015625,
|
|
"logps/chosen": -268.0,
|
|
"logps/rejected": -266.0,
|
|
"loss": 0.3043,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 0.724609375,
|
|
"rewards/margins": 2.875,
|
|
"rewards/rejected": -2.15234375,
|
|
"step": 752
|
|
},
|
|
{
|
|
"epoch": 0.8329646017699115,
|
|
"grad_norm": 14.678279876708984,
|
|
"learning_rate": 3.623328202500322e-08,
|
|
"logits/chosen": -1.21484375,
|
|
"logits/rejected": -1.13671875,
|
|
"logps/chosen": -280.0,
|
|
"logps/rejected": -305.0,
|
|
"loss": 0.3304,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 0.6796875,
|
|
"rewards/margins": 2.4765625,
|
|
"rewards/rejected": -1.796875,
|
|
"step": 753
|
|
},
|
|
{
|
|
"epoch": 0.834070796460177,
|
|
"grad_norm": 12.621984481811523,
|
|
"learning_rate": 3.576976856872438e-08,
|
|
"logits/chosen": -1.421875,
|
|
"logits/rejected": -1.0859375,
|
|
"logps/chosen": -252.0,
|
|
"logps/rejected": -276.0,
|
|
"loss": 0.294,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 0.73046875,
|
|
"rewards/margins": 2.8515625,
|
|
"rewards/rejected": -2.1171875,
|
|
"step": 754
|
|
},
|
|
{
|
|
"epoch": 0.8351769911504425,
|
|
"grad_norm": 11.676498413085938,
|
|
"learning_rate": 3.530901043104928e-08,
|
|
"logits/chosen": -1.26171875,
|
|
"logits/rejected": -1.1953125,
|
|
"logps/chosen": -227.0,
|
|
"logps/rejected": -266.0,
|
|
"loss": 0.2778,
|
|
"rewards/accuracies": 0.8828125,
|
|
"rewards/chosen": 1.048828125,
|
|
"rewards/margins": 3.109375,
|
|
"rewards/rejected": -2.0625,
|
|
"step": 755
|
|
},
|
|
{
|
|
"epoch": 0.8362831858407079,
|
|
"grad_norm": 12.77115249633789,
|
|
"learning_rate": 3.4851013538011035e-08,
|
|
"logits/chosen": -1.3125,
|
|
"logits/rejected": -1.17578125,
|
|
"logps/chosen": -252.5,
|
|
"logps/rejected": -290.0,
|
|
"loss": 0.2771,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 0.7578125,
|
|
"rewards/margins": 3.0078125,
|
|
"rewards/rejected": -2.2578125,
|
|
"step": 756
|
|
},
|
|
{
|
|
"epoch": 0.8373893805309734,
|
|
"grad_norm": 13.537567138671875,
|
|
"learning_rate": 3.439578378012925e-08,
|
|
"logits/chosen": -1.34765625,
|
|
"logits/rejected": -1.17578125,
|
|
"logps/chosen": -251.0,
|
|
"logps/rejected": -285.5,
|
|
"loss": 0.2978,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 0.9921875,
|
|
"rewards/margins": 3.140625,
|
|
"rewards/rejected": -2.1484375,
|
|
"step": 757
|
|
},
|
|
{
|
|
"epoch": 0.838495575221239,
|
|
"grad_norm": 12.62022590637207,
|
|
"learning_rate": 3.394332701233391e-08,
|
|
"logits/chosen": -1.265625,
|
|
"logits/rejected": -1.1796875,
|
|
"logps/chosen": -242.5,
|
|
"logps/rejected": -261.0,
|
|
"loss": 0.2755,
|
|
"rewards/accuracies": 0.8671875,
|
|
"rewards/chosen": 0.8359375,
|
|
"rewards/margins": 3.0078125,
|
|
"rewards/rejected": -2.1640625,
|
|
"step": 758
|
|
},
|
|
{
|
|
"epoch": 0.8396017699115044,
|
|
"grad_norm": 14.283227920532227,
|
|
"learning_rate": 3.349364905389032e-08,
|
|
"logits/chosen": -1.15234375,
|
|
"logits/rejected": -1.158203125,
|
|
"logps/chosen": -269.0,
|
|
"logps/rejected": -290.0,
|
|
"loss": 0.3305,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.658203125,
|
|
"rewards/margins": 2.8828125,
|
|
"rewards/rejected": -2.2265625,
|
|
"step": 759
|
|
},
|
|
{
|
|
"epoch": 0.8407079646017699,
|
|
"grad_norm": 12.961087226867676,
|
|
"learning_rate": 3.304675568832427e-08,
|
|
"logits/chosen": -1.265625,
|
|
"logits/rejected": -1.109375,
|
|
"logps/chosen": -263.0,
|
|
"logps/rejected": -279.5,
|
|
"loss": 0.3033,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.755859375,
|
|
"rewards/margins": 2.5078125,
|
|
"rewards/rejected": -1.75,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 0.8418141592920354,
|
|
"grad_norm": 14.768875122070312,
|
|
"learning_rate": 3.260265266334725e-08,
|
|
"logits/chosen": -1.21484375,
|
|
"logits/rejected": -1.125,
|
|
"logps/chosen": -256.0,
|
|
"logps/rejected": -282.0,
|
|
"loss": 0.382,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.765625,
|
|
"rewards/margins": 2.40234375,
|
|
"rewards/rejected": -1.63671875,
|
|
"step": 761
|
|
},
|
|
{
|
|
"epoch": 0.8429203539823009,
|
|
"grad_norm": 16.72699546813965,
|
|
"learning_rate": 3.216134569078316e-08,
|
|
"logits/chosen": -1.25,
|
|
"logits/rejected": -1.22265625,
|
|
"logps/chosen": -266.0,
|
|
"logps/rejected": -300.0,
|
|
"loss": 0.3642,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 0.90625,
|
|
"rewards/margins": 2.7421875,
|
|
"rewards/rejected": -1.83203125,
|
|
"step": 762
|
|
},
|
|
{
|
|
"epoch": 0.8440265486725663,
|
|
"grad_norm": 12.911907196044922,
|
|
"learning_rate": 3.172284044649437e-08,
|
|
"logits/chosen": -1.265625,
|
|
"logits/rejected": -1.1171875,
|
|
"logps/chosen": -260.5,
|
|
"logps/rejected": -308.0,
|
|
"loss": 0.3017,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 0.6640625,
|
|
"rewards/margins": 2.6640625,
|
|
"rewards/rejected": -2.0,
|
|
"step": 763
|
|
},
|
|
{
|
|
"epoch": 0.8451327433628318,
|
|
"grad_norm": 15.997196197509766,
|
|
"learning_rate": 3.128714257030882e-08,
|
|
"logits/chosen": -1.27734375,
|
|
"logits/rejected": -1.15625,
|
|
"logps/chosen": -284.0,
|
|
"logps/rejected": -301.0,
|
|
"loss": 0.3964,
|
|
"rewards/accuracies": 0.7421875,
|
|
"rewards/chosen": 0.607421875,
|
|
"rewards/margins": 2.3203125,
|
|
"rewards/rejected": -1.71875,
|
|
"step": 764
|
|
},
|
|
{
|
|
"epoch": 0.8462389380530974,
|
|
"grad_norm": 14.732622146606445,
|
|
"learning_rate": 3.085425766594768e-08,
|
|
"logits/chosen": -1.2265625,
|
|
"logits/rejected": -1.23828125,
|
|
"logps/chosen": -262.0,
|
|
"logps/rejected": -245.0,
|
|
"loss": 0.3107,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.724609375,
|
|
"rewards/margins": 2.9375,
|
|
"rewards/rejected": -2.21875,
|
|
"step": 765
|
|
},
|
|
{
|
|
"epoch": 0.8473451327433629,
|
|
"grad_norm": 14.123418807983398,
|
|
"learning_rate": 3.042419130095292e-08,
|
|
"logits/chosen": -1.26953125,
|
|
"logits/rejected": -1.19140625,
|
|
"logps/chosen": -249.0,
|
|
"logps/rejected": -303.0,
|
|
"loss": 0.2951,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.73828125,
|
|
"rewards/margins": 3.2109375,
|
|
"rewards/rejected": -2.4765625,
|
|
"step": 766
|
|
},
|
|
{
|
|
"epoch": 0.8484513274336283,
|
|
"grad_norm": 15.25007438659668,
|
|
"learning_rate": 2.999694900661609e-08,
|
|
"logits/chosen": -1.2578125,
|
|
"logits/rejected": -1.12109375,
|
|
"logps/chosen": -280.0,
|
|
"logps/rejected": -286.0,
|
|
"loss": 0.3976,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": 0.296875,
|
|
"rewards/margins": 1.96484375,
|
|
"rewards/rejected": -1.66796875,
|
|
"step": 767
|
|
},
|
|
{
|
|
"epoch": 0.8495575221238938,
|
|
"grad_norm": 14.552936553955078,
|
|
"learning_rate": 2.9572536277906984e-08,
|
|
"logits/chosen": -1.2421875,
|
|
"logits/rejected": -1.15234375,
|
|
"logps/chosen": -251.5,
|
|
"logps/rejected": -294.0,
|
|
"loss": 0.3292,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.5849609375,
|
|
"rewards/margins": 2.734375,
|
|
"rewards/rejected": -2.1484375,
|
|
"step": 768
|
|
},
|
|
{
|
|
"epoch": 0.8506637168141593,
|
|
"grad_norm": 12.925614356994629,
|
|
"learning_rate": 2.9150958573402885e-08,
|
|
"logits/chosen": -1.3828125,
|
|
"logits/rejected": -1.171875,
|
|
"logps/chosen": -267.0,
|
|
"logps/rejected": -305.0,
|
|
"loss": 0.2762,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 0.775390625,
|
|
"rewards/margins": 2.8984375,
|
|
"rewards/rejected": -2.125,
|
|
"step": 769
|
|
},
|
|
{
|
|
"epoch": 0.8517699115044248,
|
|
"grad_norm": 14.300766944885254,
|
|
"learning_rate": 2.8732221315218573e-08,
|
|
"logits/chosen": -1.18359375,
|
|
"logits/rejected": -1.12109375,
|
|
"logps/chosen": -257.0,
|
|
"logps/rejected": -279.0,
|
|
"loss": 0.344,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.52587890625,
|
|
"rewards/margins": 2.484375,
|
|
"rewards/rejected": -1.953125,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 0.8528761061946902,
|
|
"grad_norm": 13.133272171020508,
|
|
"learning_rate": 2.8316329888936315e-08,
|
|
"logits/chosen": -1.2578125,
|
|
"logits/rejected": -1.1015625,
|
|
"logps/chosen": -250.5,
|
|
"logps/rejected": -269.0,
|
|
"loss": 0.2487,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.927734375,
|
|
"rewards/margins": 3.046875,
|
|
"rewards/rejected": -2.12109375,
|
|
"step": 771
|
|
},
|
|
{
|
|
"epoch": 0.8539823008849557,
|
|
"grad_norm": 12.045042991638184,
|
|
"learning_rate": 2.7903289643537e-08,
|
|
"logits/chosen": -1.34375,
|
|
"logits/rejected": -1.1171875,
|
|
"logps/chosen": -256.5,
|
|
"logps/rejected": -267.5,
|
|
"loss": 0.2765,
|
|
"rewards/accuracies": 0.859375,
|
|
"rewards/chosen": 0.76171875,
|
|
"rewards/margins": 2.96875,
|
|
"rewards/rejected": -2.2109375,
|
|
"step": 772
|
|
},
|
|
{
|
|
"epoch": 0.8550884955752213,
|
|
"grad_norm": 12.052350044250488,
|
|
"learning_rate": 2.7493105891330832e-08,
|
|
"logits/chosen": -1.28125,
|
|
"logits/rejected": -1.16015625,
|
|
"logps/chosen": -240.0,
|
|
"logps/rejected": -274.0,
|
|
"loss": 0.2838,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 0.81640625,
|
|
"rewards/margins": 3.1171875,
|
|
"rewards/rejected": -2.296875,
|
|
"step": 773
|
|
},
|
|
{
|
|
"epoch": 0.8561946902654868,
|
|
"grad_norm": 12.869089126586914,
|
|
"learning_rate": 2.7085783907889514e-08,
|
|
"logits/chosen": -1.26953125,
|
|
"logits/rejected": -1.1796875,
|
|
"logps/chosen": -260.0,
|
|
"logps/rejected": -274.0,
|
|
"loss": 0.3115,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 0.6953125,
|
|
"rewards/margins": 2.546875,
|
|
"rewards/rejected": -1.8515625,
|
|
"step": 774
|
|
},
|
|
{
|
|
"epoch": 0.8573008849557522,
|
|
"grad_norm": 13.210247993469238,
|
|
"learning_rate": 2.6681328931977942e-08,
|
|
"logits/chosen": -1.2578125,
|
|
"logits/rejected": -1.03125,
|
|
"logps/chosen": -247.5,
|
|
"logps/rejected": -286.0,
|
|
"loss": 0.2939,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.896484375,
|
|
"rewards/margins": 2.9765625,
|
|
"rewards/rejected": -2.078125,
|
|
"step": 775
|
|
},
|
|
{
|
|
"epoch": 0.8584070796460177,
|
|
"grad_norm": 13.413789749145508,
|
|
"learning_rate": 2.6279746165487255e-08,
|
|
"logits/chosen": -1.26171875,
|
|
"logits/rejected": -1.171875,
|
|
"logps/chosen": -267.0,
|
|
"logps/rejected": -282.0,
|
|
"loss": 0.3004,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 0.658203125,
|
|
"rewards/margins": 2.6015625,
|
|
"rewards/rejected": -1.94921875,
|
|
"step": 776
|
|
},
|
|
{
|
|
"epoch": 0.8595132743362832,
|
|
"grad_norm": 13.01457691192627,
|
|
"learning_rate": 2.5881040773367502e-08,
|
|
"logits/chosen": -1.1875,
|
|
"logits/rejected": -1.05859375,
|
|
"logps/chosen": -240.0,
|
|
"logps/rejected": -257.0,
|
|
"loss": 0.3088,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 0.716796875,
|
|
"rewards/margins": 3.03125,
|
|
"rewards/rejected": -2.31640625,
|
|
"step": 777
|
|
},
|
|
{
|
|
"epoch": 0.8606194690265486,
|
|
"grad_norm": 12.700637817382812,
|
|
"learning_rate": 2.5485217883561616e-08,
|
|
"logits/chosen": -1.30859375,
|
|
"logits/rejected": -1.1484375,
|
|
"logps/chosen": -248.5,
|
|
"logps/rejected": -279.0,
|
|
"loss": 0.2977,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.826171875,
|
|
"rewards/margins": 2.9375,
|
|
"rewards/rejected": -2.11328125,
|
|
"step": 778
|
|
},
|
|
{
|
|
"epoch": 0.8617256637168141,
|
|
"grad_norm": 13.09081745147705,
|
|
"learning_rate": 2.5092282586939183e-08,
|
|
"logits/chosen": -1.3828125,
|
|
"logits/rejected": -1.1328125,
|
|
"logps/chosen": -272.0,
|
|
"logps/rejected": -284.5,
|
|
"loss": 0.2959,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 0.755859375,
|
|
"rewards/margins": 2.7265625,
|
|
"rewards/rejected": -1.96875,
|
|
"step": 779
|
|
},
|
|
{
|
|
"epoch": 0.8628318584070797,
|
|
"grad_norm": 12.912965774536133,
|
|
"learning_rate": 2.470223993723103e-08,
|
|
"logits/chosen": -1.171875,
|
|
"logits/rejected": -1.10546875,
|
|
"logps/chosen": -259.0,
|
|
"logps/rejected": -283.5,
|
|
"loss": 0.304,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.73828125,
|
|
"rewards/margins": 2.9609375,
|
|
"rewards/rejected": -2.21875,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 0.8639380530973452,
|
|
"grad_norm": 13.398490905761719,
|
|
"learning_rate": 2.4315094950964343e-08,
|
|
"logits/chosen": -1.375,
|
|
"logits/rejected": -1.1953125,
|
|
"logps/chosen": -272.5,
|
|
"logps/rejected": -278.5,
|
|
"loss": 0.3286,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 0.61328125,
|
|
"rewards/margins": 2.4140625,
|
|
"rewards/rejected": -1.796875,
|
|
"step": 781
|
|
},
|
|
{
|
|
"epoch": 0.8650442477876106,
|
|
"grad_norm": 13.045671463012695,
|
|
"learning_rate": 2.393085260739794e-08,
|
|
"logits/chosen": -1.36328125,
|
|
"logits/rejected": -1.15625,
|
|
"logps/chosen": -242.0,
|
|
"logps/rejected": -263.5,
|
|
"loss": 0.3228,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 0.80859375,
|
|
"rewards/margins": 2.7421875,
|
|
"rewards/rejected": -1.94140625,
|
|
"step": 782
|
|
},
|
|
{
|
|
"epoch": 0.8661504424778761,
|
|
"grad_norm": 15.309684753417969,
|
|
"learning_rate": 2.3549517848458435e-08,
|
|
"logits/chosen": -1.26171875,
|
|
"logits/rejected": -1.1328125,
|
|
"logps/chosen": -279.0,
|
|
"logps/rejected": -301.0,
|
|
"loss": 0.3618,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": 0.724609375,
|
|
"rewards/margins": 2.4765625,
|
|
"rewards/rejected": -1.75,
|
|
"step": 783
|
|
},
|
|
{
|
|
"epoch": 0.8672566371681416,
|
|
"grad_norm": 12.972829818725586,
|
|
"learning_rate": 2.3171095578676637e-08,
|
|
"logits/chosen": -1.2578125,
|
|
"logits/rejected": -1.1171875,
|
|
"logps/chosen": -255.5,
|
|
"logps/rejected": -305.0,
|
|
"loss": 0.2948,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.84375,
|
|
"rewards/margins": 2.9140625,
|
|
"rewards/rejected": -2.078125,
|
|
"step": 784
|
|
},
|
|
{
|
|
"epoch": 0.8683628318584071,
|
|
"grad_norm": 12.639619827270508,
|
|
"learning_rate": 2.2795590665124263e-08,
|
|
"logits/chosen": -1.2421875,
|
|
"logits/rejected": -1.09375,
|
|
"logps/chosen": -235.0,
|
|
"logps/rejected": -269.5,
|
|
"loss": 0.2619,
|
|
"rewards/accuracies": 0.8828125,
|
|
"rewards/chosen": 0.7578125,
|
|
"rewards/margins": 3.171875,
|
|
"rewards/rejected": -2.421875,
|
|
"step": 785
|
|
},
|
|
{
|
|
"epoch": 0.8694690265486725,
|
|
"grad_norm": 12.34381103515625,
|
|
"learning_rate": 2.2423007937351634e-08,
|
|
"logits/chosen": -1.1953125,
|
|
"logits/rejected": -1.13671875,
|
|
"logps/chosen": -254.0,
|
|
"logps/rejected": -264.0,
|
|
"loss": 0.2839,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 0.521484375,
|
|
"rewards/margins": 2.8828125,
|
|
"rewards/rejected": -2.359375,
|
|
"step": 786
|
|
},
|
|
{
|
|
"epoch": 0.870575221238938,
|
|
"grad_norm": 13.31490707397461,
|
|
"learning_rate": 2.205335218732543e-08,
|
|
"logits/chosen": -1.2734375,
|
|
"logits/rejected": -1.21875,
|
|
"logps/chosen": -259.0,
|
|
"logps/rejected": -280.0,
|
|
"loss": 0.3176,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.666015625,
|
|
"rewards/margins": 2.65625,
|
|
"rewards/rejected": -1.9921875,
|
|
"step": 787
|
|
},
|
|
{
|
|
"epoch": 0.8716814159292036,
|
|
"grad_norm": 14.77593994140625,
|
|
"learning_rate": 2.1686628169366923e-08,
|
|
"logits/chosen": -1.109375,
|
|
"logits/rejected": -1.1171875,
|
|
"logps/chosen": -266.0,
|
|
"logps/rejected": -297.0,
|
|
"loss": 0.3291,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.771484375,
|
|
"rewards/margins": 2.8671875,
|
|
"rewards/rejected": -2.1015625,
|
|
"step": 788
|
|
},
|
|
{
|
|
"epoch": 0.8727876106194691,
|
|
"grad_norm": 12.58286190032959,
|
|
"learning_rate": 2.1322840600091096e-08,
|
|
"logits/chosen": -1.265625,
|
|
"logits/rejected": -1.15625,
|
|
"logps/chosen": -249.5,
|
|
"logps/rejected": -260.0,
|
|
"loss": 0.2995,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 0.3896484375,
|
|
"rewards/margins": 2.6328125,
|
|
"rewards/rejected": -2.2421875,
|
|
"step": 789
|
|
},
|
|
{
|
|
"epoch": 0.8738938053097345,
|
|
"grad_norm": 13.99928092956543,
|
|
"learning_rate": 2.0961994158345763e-08,
|
|
"logits/chosen": -1.34765625,
|
|
"logits/rejected": -1.0859375,
|
|
"logps/chosen": -254.5,
|
|
"logps/rejected": -263.5,
|
|
"loss": 0.2972,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 0.662109375,
|
|
"rewards/margins": 2.8359375,
|
|
"rewards/rejected": -2.16796875,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 0.875,
|
|
"grad_norm": 11.941873550415039,
|
|
"learning_rate": 2.0604093485151548e-08,
|
|
"logits/chosen": -1.31640625,
|
|
"logits/rejected": -1.1171875,
|
|
"logps/chosen": -261.0,
|
|
"logps/rejected": -270.0,
|
|
"loss": 0.2886,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.818359375,
|
|
"rewards/margins": 3.15625,
|
|
"rewards/rejected": -2.3359375,
|
|
"step": 791
|
|
},
|
|
{
|
|
"epoch": 0.8761061946902655,
|
|
"grad_norm": 17.870344161987305,
|
|
"learning_rate": 2.0249143183642097e-08,
|
|
"logits/chosen": -1.3984375,
|
|
"logits/rejected": -1.1953125,
|
|
"logps/chosen": -244.0,
|
|
"logps/rejected": -268.0,
|
|
"loss": 0.4293,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 0.4189453125,
|
|
"rewards/margins": 2.26171875,
|
|
"rewards/rejected": -1.83984375,
|
|
"step": 792
|
|
},
|
|
{
|
|
"epoch": 0.8772123893805309,
|
|
"grad_norm": 12.3770112991333,
|
|
"learning_rate": 1.989714781900484e-08,
|
|
"logits/chosen": -1.3671875,
|
|
"logits/rejected": -1.14453125,
|
|
"logps/chosen": -264.0,
|
|
"logps/rejected": -284.0,
|
|
"loss": 0.2621,
|
|
"rewards/accuracies": 0.8671875,
|
|
"rewards/chosen": 0.869140625,
|
|
"rewards/margins": 3.03125,
|
|
"rewards/rejected": -2.1640625,
|
|
"step": 793
|
|
},
|
|
{
|
|
"epoch": 0.8783185840707964,
|
|
"grad_norm": 14.804219245910645,
|
|
"learning_rate": 1.95481119184224e-08,
|
|
"logits/chosen": -1.1953125,
|
|
"logits/rejected": -1.140625,
|
|
"logps/chosen": -245.0,
|
|
"logps/rejected": -302.0,
|
|
"loss": 0.3552,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 0.638671875,
|
|
"rewards/margins": 2.6875,
|
|
"rewards/rejected": -2.04296875,
|
|
"step": 794
|
|
},
|
|
{
|
|
"epoch": 0.879424778761062,
|
|
"grad_norm": 13.518996238708496,
|
|
"learning_rate": 1.9202039971014243e-08,
|
|
"logits/chosen": -1.375,
|
|
"logits/rejected": -1.20703125,
|
|
"logps/chosen": -241.5,
|
|
"logps/rejected": -263.0,
|
|
"loss": 0.3375,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 0.69921875,
|
|
"rewards/margins": 2.734375,
|
|
"rewards/rejected": -2.03125,
|
|
"step": 795
|
|
},
|
|
{
|
|
"epoch": 0.8805309734513275,
|
|
"grad_norm": 13.753449440002441,
|
|
"learning_rate": 1.8858936427779137e-08,
|
|
"logits/chosen": -1.21875,
|
|
"logits/rejected": -1.12109375,
|
|
"logps/chosen": -262.5,
|
|
"logps/rejected": -289.0,
|
|
"loss": 0.2857,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.75,
|
|
"rewards/margins": 2.8203125,
|
|
"rewards/rejected": -2.0703125,
|
|
"step": 796
|
|
},
|
|
{
|
|
"epoch": 0.8816371681415929,
|
|
"grad_norm": 15.924559593200684,
|
|
"learning_rate": 1.8518805701537548e-08,
|
|
"logits/chosen": -1.25,
|
|
"logits/rejected": -1.09765625,
|
|
"logps/chosen": -253.5,
|
|
"logps/rejected": -264.5,
|
|
"loss": 0.3678,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": 0.671875,
|
|
"rewards/margins": 2.640625,
|
|
"rewards/rejected": -1.96484375,
|
|
"step": 797
|
|
},
|
|
{
|
|
"epoch": 0.8827433628318584,
|
|
"grad_norm": 12.930898666381836,
|
|
"learning_rate": 1.818165216687531e-08,
|
|
"logits/chosen": -1.26171875,
|
|
"logits/rejected": -1.1640625,
|
|
"logps/chosen": -248.5,
|
|
"logps/rejected": -258.0,
|
|
"loss": 0.2994,
|
|
"rewards/accuracies": 0.859375,
|
|
"rewards/chosen": 0.732421875,
|
|
"rewards/margins": 2.9296875,
|
|
"rewards/rejected": -2.1953125,
|
|
"step": 798
|
|
},
|
|
{
|
|
"epoch": 0.8838495575221239,
|
|
"grad_norm": 14.774980545043945,
|
|
"learning_rate": 1.7847480160087025e-08,
|
|
"logits/chosen": -1.26953125,
|
|
"logits/rejected": -1.125,
|
|
"logps/chosen": -250.5,
|
|
"logps/rejected": -290.0,
|
|
"loss": 0.3111,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 0.724609375,
|
|
"rewards/margins": 2.953125,
|
|
"rewards/rejected": -2.234375,
|
|
"step": 799
|
|
},
|
|
{
|
|
"epoch": 0.8849557522123894,
|
|
"grad_norm": 13.716545104980469,
|
|
"learning_rate": 1.7516293979120523e-08,
|
|
"logits/chosen": -1.2578125,
|
|
"logits/rejected": -1.1328125,
|
|
"logps/chosen": -262.5,
|
|
"logps/rejected": -267.0,
|
|
"loss": 0.3317,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.5703125,
|
|
"rewards/margins": 2.640625,
|
|
"rewards/rejected": -2.0625,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.8849557522123894,
|
|
"eval_logits/chosen": -1.2699394226074219,
|
|
"eval_logits/rejected": -1.161244511604309,
|
|
"eval_logps/chosen": -253.7014923095703,
|
|
"eval_logps/rejected": -280.33831787109375,
|
|
"eval_loss": 0.31928393244743347,
|
|
"eval_rewards/accuracies": 0.8145930171012878,
|
|
"eval_rewards/chosen": 0.7206642627716064,
|
|
"eval_rewards/margins": 2.836987018585205,
|
|
"eval_rewards/rejected": -2.1172263622283936,
|
|
"eval_runtime": 193.0847,
|
|
"eval_samples_per_second": 66.567,
|
|
"eval_steps_per_second": 1.041,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 0.8860619469026548,
|
|
"grad_norm": 14.249678611755371,
|
|
"learning_rate": 1.7188097883521352e-08,
|
|
"logits/chosen": -1.28515625,
|
|
"logits/rejected": -1.171875,
|
|
"logps/chosen": -248.5,
|
|
"logps/rejected": -251.0,
|
|
"loss": 0.2843,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 0.751953125,
|
|
"rewards/margins": 2.984375,
|
|
"rewards/rejected": -2.234375,
|
|
"step": 801
|
|
},
|
|
{
|
|
"epoch": 0.8871681415929203,
|
|
"grad_norm": 12.044215202331543,
|
|
"learning_rate": 1.6862896094378244e-08,
|
|
"logits/chosen": -1.296875,
|
|
"logits/rejected": -1.2578125,
|
|
"logps/chosen": -236.0,
|
|
"logps/rejected": -261.5,
|
|
"loss": 0.2971,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 0.8515625,
|
|
"rewards/margins": 3.1171875,
|
|
"rewards/rejected": -2.265625,
|
|
"step": 802
|
|
},
|
|
{
|
|
"epoch": 0.8882743362831859,
|
|
"grad_norm": 13.170328140258789,
|
|
"learning_rate": 1.654069279426873e-08,
|
|
"logits/chosen": -1.21484375,
|
|
"logits/rejected": -1.125,
|
|
"logps/chosen": -255.5,
|
|
"logps/rejected": -300.0,
|
|
"loss": 0.2789,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 1.017578125,
|
|
"rewards/margins": 3.1328125,
|
|
"rewards/rejected": -2.1171875,
|
|
"step": 803
|
|
},
|
|
{
|
|
"epoch": 0.8893805309734514,
|
|
"grad_norm": 13.188612937927246,
|
|
"learning_rate": 1.6221492127205166e-08,
|
|
"logits/chosen": -1.26953125,
|
|
"logits/rejected": -1.24609375,
|
|
"logps/chosen": -269.0,
|
|
"logps/rejected": -293.0,
|
|
"loss": 0.2959,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 0.900390625,
|
|
"rewards/margins": 2.8125,
|
|
"rewards/rejected": -1.921875,
|
|
"step": 804
|
|
},
|
|
{
|
|
"epoch": 0.8904867256637168,
|
|
"grad_norm": 17.259361267089844,
|
|
"learning_rate": 1.5905298198581774e-08,
|
|
"logits/chosen": -1.25,
|
|
"logits/rejected": -1.171875,
|
|
"logps/chosen": -271.0,
|
|
"logps/rejected": -301.0,
|
|
"loss": 0.3979,
|
|
"rewards/accuracies": 0.7421875,
|
|
"rewards/chosen": 0.814453125,
|
|
"rewards/margins": 2.4921875,
|
|
"rewards/rejected": -1.6796875,
|
|
"step": 805
|
|
},
|
|
{
|
|
"epoch": 0.8915929203539823,
|
|
"grad_norm": 13.314188957214355,
|
|
"learning_rate": 1.5592115075121508e-08,
|
|
"logits/chosen": -1.3203125,
|
|
"logits/rejected": -1.15234375,
|
|
"logps/chosen": -250.5,
|
|
"logps/rejected": -294.0,
|
|
"loss": 0.3297,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.822265625,
|
|
"rewards/margins": 2.546875,
|
|
"rewards/rejected": -1.7265625,
|
|
"step": 806
|
|
},
|
|
{
|
|
"epoch": 0.8926991150442478,
|
|
"grad_norm": 12.087140083312988,
|
|
"learning_rate": 1.5281946784824002e-08,
|
|
"logits/chosen": -1.33203125,
|
|
"logits/rejected": -1.15625,
|
|
"logps/chosen": -250.0,
|
|
"logps/rejected": -297.0,
|
|
"loss": 0.2368,
|
|
"rewards/accuracies": 0.8828125,
|
|
"rewards/chosen": 0.66015625,
|
|
"rewards/margins": 3.1953125,
|
|
"rewards/rejected": -2.5234375,
|
|
"step": 807
|
|
},
|
|
{
|
|
"epoch": 0.8938053097345132,
|
|
"grad_norm": 15.906932830810547,
|
|
"learning_rate": 1.4974797316913673e-08,
|
|
"logits/chosen": -1.28125,
|
|
"logits/rejected": -1.15625,
|
|
"logps/chosen": -287.0,
|
|
"logps/rejected": -302.0,
|
|
"loss": 0.3282,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.671875,
|
|
"rewards/margins": 2.625,
|
|
"rewards/rejected": -1.9609375,
|
|
"step": 808
|
|
},
|
|
{
|
|
"epoch": 0.8949115044247787,
|
|
"grad_norm": 13.53934383392334,
|
|
"learning_rate": 1.4670670621788229e-08,
|
|
"logits/chosen": -1.1328125,
|
|
"logits/rejected": -1.1015625,
|
|
"logps/chosen": -266.0,
|
|
"logps/rejected": -293.0,
|
|
"loss": 0.2885,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 1.05859375,
|
|
"rewards/margins": 3.359375,
|
|
"rewards/rejected": -2.3046875,
|
|
"step": 809
|
|
},
|
|
{
|
|
"epoch": 0.8960176991150443,
|
|
"grad_norm": 13.705190658569336,
|
|
"learning_rate": 1.4369570610968274e-08,
|
|
"logits/chosen": -1.3515625,
|
|
"logits/rejected": -1.17578125,
|
|
"logps/chosen": -250.0,
|
|
"logps/rejected": -271.0,
|
|
"loss": 0.346,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.693359375,
|
|
"rewards/margins": 2.6171875,
|
|
"rewards/rejected": -1.92578125,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 0.8971238938053098,
|
|
"grad_norm": 12.113191604614258,
|
|
"learning_rate": 1.4071501157046666e-08,
|
|
"logits/chosen": -1.19921875,
|
|
"logits/rejected": -1.1328125,
|
|
"logps/chosen": -256.0,
|
|
"logps/rejected": -274.5,
|
|
"loss": 0.2523,
|
|
"rewards/accuracies": 0.8671875,
|
|
"rewards/chosen": 0.84375,
|
|
"rewards/margins": 3.1171875,
|
|
"rewards/rejected": -2.27734375,
|
|
"step": 811
|
|
},
|
|
{
|
|
"epoch": 0.8982300884955752,
|
|
"grad_norm": 13.494206428527832,
|
|
"learning_rate": 1.3776466093638695e-08,
|
|
"logits/chosen": -1.234375,
|
|
"logits/rejected": -1.0546875,
|
|
"logps/chosen": -241.0,
|
|
"logps/rejected": -272.0,
|
|
"loss": 0.2704,
|
|
"rewards/accuracies": 0.875,
|
|
"rewards/chosen": 0.990234375,
|
|
"rewards/margins": 3.4140625,
|
|
"rewards/rejected": -2.4296875,
|
|
"step": 812
|
|
},
|
|
{
|
|
"epoch": 0.8993362831858407,
|
|
"grad_norm": 11.447568893432617,
|
|
"learning_rate": 1.3484469215333082e-08,
|
|
"logits/chosen": -1.34765625,
|
|
"logits/rejected": -1.19921875,
|
|
"logps/chosen": -252.5,
|
|
"logps/rejected": -244.0,
|
|
"loss": 0.257,
|
|
"rewards/accuracies": 0.875,
|
|
"rewards/chosen": 0.6953125,
|
|
"rewards/margins": 3.109375,
|
|
"rewards/rejected": -2.4140625,
|
|
"step": 813
|
|
},
|
|
{
|
|
"epoch": 0.9004424778761062,
|
|
"grad_norm": 17.08716583251953,
|
|
"learning_rate": 1.3195514277642817e-08,
|
|
"logits/chosen": -1.41015625,
|
|
"logits/rejected": -1.3125,
|
|
"logps/chosen": -264.0,
|
|
"logps/rejected": -257.5,
|
|
"loss": 0.4052,
|
|
"rewards/accuracies": 0.7265625,
|
|
"rewards/chosen": 0.4658203125,
|
|
"rewards/margins": 2.4140625,
|
|
"rewards/rejected": -1.94921875,
|
|
"step": 814
|
|
},
|
|
{
|
|
"epoch": 0.9015486725663717,
|
|
"grad_norm": 12.849235534667969,
|
|
"learning_rate": 1.2909604996957091e-08,
|
|
"logits/chosen": -1.2890625,
|
|
"logits/rejected": -1.14453125,
|
|
"logps/chosen": -252.5,
|
|
"logps/rejected": -279.0,
|
|
"loss": 0.2986,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 0.75,
|
|
"rewards/margins": 2.734375,
|
|
"rewards/rejected": -1.98046875,
|
|
"step": 815
|
|
},
|
|
{
|
|
"epoch": 0.9026548672566371,
|
|
"grad_norm": 16.0147647857666,
|
|
"learning_rate": 1.2626745050493493e-08,
|
|
"logits/chosen": -1.36328125,
|
|
"logits/rejected": -1.23828125,
|
|
"logps/chosen": -245.5,
|
|
"logps/rejected": -296.0,
|
|
"loss": 0.3544,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 0.650390625,
|
|
"rewards/margins": 2.578125,
|
|
"rewards/rejected": -1.93359375,
|
|
"step": 816
|
|
},
|
|
{
|
|
"epoch": 0.9037610619469026,
|
|
"grad_norm": 12.192747116088867,
|
|
"learning_rate": 1.234693807625048e-08,
|
|
"logits/chosen": -1.234375,
|
|
"logits/rejected": -1.21875,
|
|
"logps/chosen": -256.0,
|
|
"logps/rejected": -277.0,
|
|
"loss": 0.2743,
|
|
"rewards/accuracies": 0.859375,
|
|
"rewards/chosen": 0.802734375,
|
|
"rewards/margins": 3.2421875,
|
|
"rewards/rejected": -2.4296875,
|
|
"step": 817
|
|
},
|
|
{
|
|
"epoch": 0.9048672566371682,
|
|
"grad_norm": 19.156158447265625,
|
|
"learning_rate": 1.2070187672960947e-08,
|
|
"logits/chosen": -1.4296875,
|
|
"logits/rejected": -1.125,
|
|
"logps/chosen": -261.0,
|
|
"logps/rejected": -283.0,
|
|
"loss": 0.4435,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 0.70703125,
|
|
"rewards/margins": 2.515625,
|
|
"rewards/rejected": -1.8125,
|
|
"step": 818
|
|
},
|
|
{
|
|
"epoch": 0.9059734513274337,
|
|
"grad_norm": 14.084782600402832,
|
|
"learning_rate": 1.179649740004557e-08,
|
|
"logits/chosen": -1.24609375,
|
|
"logits/rejected": -1.0703125,
|
|
"logps/chosen": -273.0,
|
|
"logps/rejected": -272.5,
|
|
"loss": 0.2877,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 0.4453125,
|
|
"rewards/margins": 2.546875,
|
|
"rewards/rejected": -2.1015625,
|
|
"step": 819
|
|
},
|
|
{
|
|
"epoch": 0.9070796460176991,
|
|
"grad_norm": 14.487624168395996,
|
|
"learning_rate": 1.1525870777567393e-08,
|
|
"logits/chosen": -1.234375,
|
|
"logits/rejected": -1.16015625,
|
|
"logps/chosen": -273.0,
|
|
"logps/rejected": -278.0,
|
|
"loss": 0.3505,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.5,
|
|
"rewards/margins": 2.34375,
|
|
"rewards/rejected": -1.83984375,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 0.9081858407079646,
|
|
"grad_norm": 13.851645469665527,
|
|
"learning_rate": 1.1258311286186207e-08,
|
|
"logits/chosen": -1.28125,
|
|
"logits/rejected": -1.1015625,
|
|
"logps/chosen": -244.0,
|
|
"logps/rejected": -292.0,
|
|
"loss": 0.2884,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 0.9921875,
|
|
"rewards/margins": 3.2578125,
|
|
"rewards/rejected": -2.265625,
|
|
"step": 821
|
|
},
|
|
{
|
|
"epoch": 0.9092920353982301,
|
|
"grad_norm": 13.431646347045898,
|
|
"learning_rate": 1.0993822367114047e-08,
|
|
"logits/chosen": -1.359375,
|
|
"logits/rejected": -1.1640625,
|
|
"logps/chosen": -285.0,
|
|
"logps/rejected": -291.0,
|
|
"loss": 0.2858,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 0.703125,
|
|
"rewards/margins": 2.6796875,
|
|
"rewards/rejected": -1.96875,
|
|
"step": 822
|
|
},
|
|
{
|
|
"epoch": 0.9103982300884956,
|
|
"grad_norm": 15.518174171447754,
|
|
"learning_rate": 1.0732407422070794e-08,
|
|
"logits/chosen": -1.3359375,
|
|
"logits/rejected": -1.19140625,
|
|
"logps/chosen": -230.5,
|
|
"logps/rejected": -270.0,
|
|
"loss": 0.3882,
|
|
"rewards/accuracies": 0.765625,
|
|
"rewards/chosen": 0.5517578125,
|
|
"rewards/margins": 2.5546875,
|
|
"rewards/rejected": -1.99609375,
|
|
"step": 823
|
|
},
|
|
{
|
|
"epoch": 0.911504424778761,
|
|
"grad_norm": 12.571428298950195,
|
|
"learning_rate": 1.0474069813240505e-08,
|
|
"logits/chosen": -1.23046875,
|
|
"logits/rejected": -1.1328125,
|
|
"logps/chosen": -241.0,
|
|
"logps/rejected": -290.0,
|
|
"loss": 0.3193,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 0.53515625,
|
|
"rewards/margins": 2.59375,
|
|
"rewards/rejected": -2.0546875,
|
|
"step": 824
|
|
},
|
|
{
|
|
"epoch": 0.9126106194690266,
|
|
"grad_norm": 14.974266052246094,
|
|
"learning_rate": 1.021881286322801e-08,
|
|
"logits/chosen": -1.2421875,
|
|
"logits/rejected": -1.109375,
|
|
"logps/chosen": -264.5,
|
|
"logps/rejected": -283.0,
|
|
"loss": 0.3549,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 0.5546875,
|
|
"rewards/margins": 2.23828125,
|
|
"rewards/rejected": -1.6796875,
|
|
"step": 825
|
|
},
|
|
{
|
|
"epoch": 0.9137168141592921,
|
|
"grad_norm": 12.049909591674805,
|
|
"learning_rate": 9.966639855016446e-09,
|
|
"logits/chosen": -1.3984375,
|
|
"logits/rejected": -1.203125,
|
|
"logps/chosen": -238.0,
|
|
"logps/rejected": -257.0,
|
|
"loss": 0.2548,
|
|
"rewards/accuracies": 0.890625,
|
|
"rewards/chosen": 0.775390625,
|
|
"rewards/margins": 3.203125,
|
|
"rewards/rejected": -2.4296875,
|
|
"step": 826
|
|
},
|
|
{
|
|
"epoch": 0.9148230088495575,
|
|
"grad_norm": 16.12934112548828,
|
|
"learning_rate": 9.71755403192484e-09,
|
|
"logits/chosen": -1.27734375,
|
|
"logits/rejected": -1.12890625,
|
|
"logps/chosen": -274.0,
|
|
"logps/rejected": -281.0,
|
|
"loss": 0.3717,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 0.437744140625,
|
|
"rewards/margins": 2.4609375,
|
|
"rewards/rejected": -2.0234375,
|
|
"step": 827
|
|
},
|
|
{
|
|
"epoch": 0.915929203539823,
|
|
"grad_norm": 15.575227737426758,
|
|
"learning_rate": 9.47155859756632e-09,
|
|
"logits/chosen": -1.34765625,
|
|
"logits/rejected": -1.2265625,
|
|
"logps/chosen": -244.0,
|
|
"logps/rejected": -277.0,
|
|
"loss": 0.3755,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 0.701171875,
|
|
"rewards/margins": 2.8046875,
|
|
"rewards/rejected": -2.109375,
|
|
"step": 828
|
|
},
|
|
{
|
|
"epoch": 0.9170353982300885,
|
|
"grad_norm": 13.580742835998535,
|
|
"learning_rate": 9.228656715807249e-09,
|
|
"logits/chosen": -1.2265625,
|
|
"logits/rejected": -1.125,
|
|
"logps/chosen": -264.0,
|
|
"logps/rejected": -301.0,
|
|
"loss": 0.2762,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 0.828125,
|
|
"rewards/margins": 3.140625,
|
|
"rewards/rejected": -2.3125,
|
|
"step": 829
|
|
},
|
|
{
|
|
"epoch": 0.918141592920354,
|
|
"grad_norm": 14.229433059692383,
|
|
"learning_rate": 8.988851510726092e-09,
|
|
"logits/chosen": -1.375,
|
|
"logits/rejected": -1.109375,
|
|
"logps/chosen": -269.0,
|
|
"logps/rejected": -276.0,
|
|
"loss": 0.2769,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 0.5478515625,
|
|
"rewards/margins": 2.8828125,
|
|
"rewards/rejected": -2.3359375,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 0.9192477876106194,
|
|
"grad_norm": 12.20298957824707,
|
|
"learning_rate": 8.752146066573597e-09,
|
|
"logits/chosen": -1.171875,
|
|
"logits/rejected": -1.1484375,
|
|
"logps/chosen": -254.0,
|
|
"logps/rejected": -291.0,
|
|
"loss": 0.2699,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 0.734375,
|
|
"rewards/margins": 2.8125,
|
|
"rewards/rejected": -2.08203125,
|
|
"step": 831
|
|
},
|
|
{
|
|
"epoch": 0.9203539823008849,
|
|
"grad_norm": 14.036704063415527,
|
|
"learning_rate": 8.518543427732949e-09,
|
|
"logits/chosen": -1.296875,
|
|
"logits/rejected": -1.1171875,
|
|
"logps/chosen": -265.0,
|
|
"logps/rejected": -267.0,
|
|
"loss": 0.3249,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.578125,
|
|
"rewards/margins": 2.5625,
|
|
"rewards/rejected": -1.98046875,
|
|
"step": 832
|
|
},
|
|
{
|
|
"epoch": 0.9214601769911505,
|
|
"grad_norm": 12.48025131225586,
|
|
"learning_rate": 8.288046598680627e-09,
|
|
"logits/chosen": -1.234375,
|
|
"logits/rejected": -1.12109375,
|
|
"logps/chosen": -260.0,
|
|
"logps/rejected": -268.0,
|
|
"loss": 0.2814,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 0.66796875,
|
|
"rewards/margins": 3.0859375,
|
|
"rewards/rejected": -2.421875,
|
|
"step": 833
|
|
},
|
|
{
|
|
"epoch": 0.922566371681416,
|
|
"grad_norm": 12.8703031539917,
|
|
"learning_rate": 8.060658543947829e-09,
|
|
"logits/chosen": -1.2890625,
|
|
"logits/rejected": -1.30078125,
|
|
"logps/chosen": -223.0,
|
|
"logps/rejected": -258.5,
|
|
"loss": 0.2808,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 0.8125,
|
|
"rewards/margins": 2.953125,
|
|
"rewards/rejected": -2.1484375,
|
|
"step": 834
|
|
},
|
|
{
|
|
"epoch": 0.9236725663716814,
|
|
"grad_norm": 13.693394660949707,
|
|
"learning_rate": 7.836382188082302e-09,
|
|
"logits/chosen": -1.234375,
|
|
"logits/rejected": -1.203125,
|
|
"logps/chosen": -264.0,
|
|
"logps/rejected": -289.0,
|
|
"loss": 0.2979,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.48828125,
|
|
"rewards/margins": 2.7421875,
|
|
"rewards/rejected": -2.2421875,
|
|
"step": 835
|
|
},
|
|
{
|
|
"epoch": 0.9247787610619469,
|
|
"grad_norm": 12.683737754821777,
|
|
"learning_rate": 7.61522041561069e-09,
|
|
"logits/chosen": -1.30859375,
|
|
"logits/rejected": -1.11328125,
|
|
"logps/chosen": -246.0,
|
|
"logps/rejected": -265.0,
|
|
"loss": 0.2762,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 0.81640625,
|
|
"rewards/margins": 2.859375,
|
|
"rewards/rejected": -2.046875,
|
|
"step": 836
|
|
},
|
|
{
|
|
"epoch": 0.9258849557522124,
|
|
"grad_norm": 15.07400894165039,
|
|
"learning_rate": 7.397176071001543e-09,
|
|
"logits/chosen": -1.35546875,
|
|
"logits/rejected": -1.1796875,
|
|
"logps/chosen": -251.0,
|
|
"logps/rejected": -267.0,
|
|
"loss": 0.3266,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 0.708984375,
|
|
"rewards/margins": 2.6171875,
|
|
"rewards/rejected": -1.90625,
|
|
"step": 837
|
|
},
|
|
{
|
|
"epoch": 0.9269911504424779,
|
|
"grad_norm": 12.571556091308594,
|
|
"learning_rate": 7.182251958628538e-09,
|
|
"logits/chosen": -1.33984375,
|
|
"logits/rejected": -1.21875,
|
|
"logps/chosen": -236.5,
|
|
"logps/rejected": -259.0,
|
|
"loss": 0.2943,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.5966796875,
|
|
"rewards/margins": 2.7109375,
|
|
"rewards/rejected": -2.12109375,
|
|
"step": 838
|
|
},
|
|
{
|
|
"epoch": 0.9280973451327433,
|
|
"grad_norm": 12.665489196777344,
|
|
"learning_rate": 6.970450842734649e-09,
|
|
"logits/chosen": -1.375,
|
|
"logits/rejected": -1.171875,
|
|
"logps/chosen": -260.0,
|
|
"logps/rejected": -276.0,
|
|
"loss": 0.2713,
|
|
"rewards/accuracies": 0.875,
|
|
"rewards/chosen": 0.7734375,
|
|
"rewards/margins": 3.0078125,
|
|
"rewards/rejected": -2.234375,
|
|
"step": 839
|
|
},
|
|
{
|
|
"epoch": 0.9292035398230089,
|
|
"grad_norm": 15.426192283630371,
|
|
"learning_rate": 6.761775447396506e-09,
|
|
"logits/chosen": -1.26171875,
|
|
"logits/rejected": -1.203125,
|
|
"logps/chosen": -244.0,
|
|
"logps/rejected": -297.0,
|
|
"loss": 0.3234,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.791015625,
|
|
"rewards/margins": 3.015625,
|
|
"rewards/rejected": -2.234375,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 0.9303097345132744,
|
|
"grad_norm": 12.329756736755371,
|
|
"learning_rate": 6.556228456489232e-09,
|
|
"logits/chosen": -1.1875,
|
|
"logits/rejected": -1.0859375,
|
|
"logps/chosen": -253.5,
|
|
"logps/rejected": -280.0,
|
|
"loss": 0.2926,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.6484375,
|
|
"rewards/margins": 2.9453125,
|
|
"rewards/rejected": -2.296875,
|
|
"step": 841
|
|
},
|
|
{
|
|
"epoch": 0.9314159292035398,
|
|
"grad_norm": 16.28516387939453,
|
|
"learning_rate": 6.353812513652052e-09,
|
|
"logits/chosen": -1.2109375,
|
|
"logits/rejected": -1.10546875,
|
|
"logps/chosen": -260.0,
|
|
"logps/rejected": -282.0,
|
|
"loss": 0.3844,
|
|
"rewards/accuracies": 0.7578125,
|
|
"rewards/chosen": 0.529296875,
|
|
"rewards/margins": 2.4453125,
|
|
"rewards/rejected": -1.9140625,
|
|
"step": 842
|
|
},
|
|
{
|
|
"epoch": 0.9325221238938053,
|
|
"grad_norm": 16.69934844970703,
|
|
"learning_rate": 6.154530222254372e-09,
|
|
"logits/chosen": -1.25390625,
|
|
"logits/rejected": -1.21875,
|
|
"logps/chosen": -245.5,
|
|
"logps/rejected": -280.0,
|
|
"loss": 0.3776,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 0.6630859375,
|
|
"rewards/margins": 2.53125,
|
|
"rewards/rejected": -1.8671875,
|
|
"step": 843
|
|
},
|
|
{
|
|
"epoch": 0.9336283185840708,
|
|
"grad_norm": 15.312355995178223,
|
|
"learning_rate": 5.958384145362038e-09,
|
|
"logits/chosen": -1.27734375,
|
|
"logits/rejected": -1.1796875,
|
|
"logps/chosen": -267.5,
|
|
"logps/rejected": -304.0,
|
|
"loss": 0.3446,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 0.619140625,
|
|
"rewards/margins": 2.7265625,
|
|
"rewards/rejected": -2.109375,
|
|
"step": 844
|
|
},
|
|
{
|
|
"epoch": 0.9347345132743363,
|
|
"grad_norm": 13.851134300231934,
|
|
"learning_rate": 5.765376805704575e-09,
|
|
"logits/chosen": -1.296875,
|
|
"logits/rejected": -1.16015625,
|
|
"logps/chosen": -242.5,
|
|
"logps/rejected": -286.0,
|
|
"loss": 0.312,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 0.826171875,
|
|
"rewards/margins": 2.78125,
|
|
"rewards/rejected": -1.953125,
|
|
"step": 845
|
|
},
|
|
{
|
|
"epoch": 0.9358407079646017,
|
|
"grad_norm": 12.364534378051758,
|
|
"learning_rate": 5.575510685642798e-09,
|
|
"logits/chosen": -1.1328125,
|
|
"logits/rejected": -1.1875,
|
|
"logps/chosen": -265.0,
|
|
"logps/rejected": -298.0,
|
|
"loss": 0.2532,
|
|
"rewards/accuracies": 0.8671875,
|
|
"rewards/chosen": 1.015625,
|
|
"rewards/margins": 3.3671875,
|
|
"rewards/rejected": -2.3515625,
|
|
"step": 846
|
|
},
|
|
{
|
|
"epoch": 0.9369469026548672,
|
|
"grad_norm": 15.209588050842285,
|
|
"learning_rate": 5.38878822713662e-09,
|
|
"logits/chosen": -1.25390625,
|
|
"logits/rejected": -1.10546875,
|
|
"logps/chosen": -279.0,
|
|
"logps/rejected": -300.0,
|
|
"loss": 0.3528,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 0.505859375,
|
|
"rewards/margins": 2.6796875,
|
|
"rewards/rejected": -2.1796875,
|
|
"step": 847
|
|
},
|
|
{
|
|
"epoch": 0.9380530973451328,
|
|
"grad_norm": 13.730789184570312,
|
|
"learning_rate": 5.205211831713935e-09,
|
|
"logits/chosen": -1.37109375,
|
|
"logits/rejected": -1.125,
|
|
"logps/chosen": -239.5,
|
|
"logps/rejected": -240.0,
|
|
"loss": 0.3282,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.6494140625,
|
|
"rewards/margins": 2.921875,
|
|
"rewards/rejected": -2.2734375,
|
|
"step": 848
|
|
},
|
|
{
|
|
"epoch": 0.9391592920353983,
|
|
"grad_norm": 13.921919822692871,
|
|
"learning_rate": 5.024783860439474e-09,
|
|
"logits/chosen": -1.28125,
|
|
"logits/rejected": -1.08203125,
|
|
"logps/chosen": -228.0,
|
|
"logps/rejected": -262.0,
|
|
"loss": 0.3565,
|
|
"rewards/accuracies": 0.7421875,
|
|
"rewards/chosen": 0.517578125,
|
|
"rewards/margins": 2.5703125,
|
|
"rewards/rejected": -2.046875,
|
|
"step": 849
|
|
},
|
|
{
|
|
"epoch": 0.9402654867256637,
|
|
"grad_norm": 15.472764015197754,
|
|
"learning_rate": 4.8475066338846685e-09,
|
|
"logits/chosen": -1.3515625,
|
|
"logits/rejected": -1.15234375,
|
|
"logps/chosen": -252.0,
|
|
"logps/rejected": -282.0,
|
|
"loss": 0.3386,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 0.658203125,
|
|
"rewards/margins": 2.8203125,
|
|
"rewards/rejected": -2.1640625,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.9402654867256637,
|
|
"eval_logits/chosen": -1.2672574520111084,
|
|
"eval_logits/rejected": -1.1583489179611206,
|
|
"eval_logps/chosen": -253.592041015625,
|
|
"eval_logps/rejected": -280.3034973144531,
|
|
"eval_loss": 0.31901347637176514,
|
|
"eval_rewards/accuracies": 0.8145152926445007,
|
|
"eval_rewards/chosen": 0.7268248796463013,
|
|
"eval_rewards/margins": 2.8418843746185303,
|
|
"eval_rewards/rejected": -2.1149721145629883,
|
|
"eval_runtime": 192.9475,
|
|
"eval_samples_per_second": 66.614,
|
|
"eval_steps_per_second": 1.042,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 0.9413716814159292,
|
|
"grad_norm": 13.594935417175293,
|
|
"learning_rate": 4.673382432097667e-09,
|
|
"logits/chosen": -1.3515625,
|
|
"logits/rejected": -1.23046875,
|
|
"logps/chosen": -256.0,
|
|
"logps/rejected": -263.0,
|
|
"loss": 0.3324,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 0.6171875,
|
|
"rewards/margins": 2.7265625,
|
|
"rewards/rejected": -2.1171875,
|
|
"step": 851
|
|
},
|
|
{
|
|
"epoch": 0.9424778761061947,
|
|
"grad_norm": 14.526602745056152,
|
|
"learning_rate": 4.5024134945740036e-09,
|
|
"logits/chosen": -1.3203125,
|
|
"logits/rejected": -1.22265625,
|
|
"logps/chosen": -229.5,
|
|
"logps/rejected": -244.5,
|
|
"loss": 0.3492,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.546875,
|
|
"rewards/margins": 2.6640625,
|
|
"rewards/rejected": -2.109375,
|
|
"step": 852
|
|
},
|
|
{
|
|
"epoch": 0.9435840707964602,
|
|
"grad_norm": 16.662525177001953,
|
|
"learning_rate": 4.334602020227867e-09,
|
|
"logits/chosen": -1.34375,
|
|
"logits/rejected": -1.15625,
|
|
"logps/chosen": -285.0,
|
|
"logps/rejected": -290.0,
|
|
"loss": 0.3672,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 0.4638671875,
|
|
"rewards/margins": 2.3984375,
|
|
"rewards/rejected": -1.94140625,
|
|
"step": 853
|
|
},
|
|
{
|
|
"epoch": 0.9446902654867256,
|
|
"grad_norm": 14.094331741333008,
|
|
"learning_rate": 4.169950167363767e-09,
|
|
"logits/chosen": -1.265625,
|
|
"logits/rejected": -1.0625,
|
|
"logps/chosen": -263.0,
|
|
"logps/rejected": -297.0,
|
|
"loss": 0.3088,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 0.84765625,
|
|
"rewards/margins": 2.9375,
|
|
"rewards/rejected": -2.09375,
|
|
"step": 854
|
|
},
|
|
{
|
|
"epoch": 0.9457964601769911,
|
|
"grad_norm": 14.868205070495605,
|
|
"learning_rate": 4.0084600536488265e-09,
|
|
"logits/chosen": -1.38671875,
|
|
"logits/rejected": -1.17578125,
|
|
"logps/chosen": -238.0,
|
|
"logps/rejected": -290.0,
|
|
"loss": 0.3156,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 0.8359375,
|
|
"rewards/margins": 2.8125,
|
|
"rewards/rejected": -1.9765625,
|
|
"step": 855
|
|
},
|
|
{
|
|
"epoch": 0.9469026548672567,
|
|
"grad_norm": 13.155553817749023,
|
|
"learning_rate": 3.850133756085505e-09,
|
|
"logits/chosen": -1.31640625,
|
|
"logits/rejected": -1.15234375,
|
|
"logps/chosen": -270.0,
|
|
"logps/rejected": -290.0,
|
|
"loss": 0.3135,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.7265625,
|
|
"rewards/margins": 2.7890625,
|
|
"rewards/rejected": -2.0625,
|
|
"step": 856
|
|
},
|
|
{
|
|
"epoch": 0.9480088495575221,
|
|
"grad_norm": 13.842921257019043,
|
|
"learning_rate": 3.694973310984839e-09,
|
|
"logits/chosen": -1.359375,
|
|
"logits/rejected": -1.15625,
|
|
"logps/chosen": -258.0,
|
|
"logps/rejected": -281.0,
|
|
"loss": 0.3115,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.759765625,
|
|
"rewards/margins": 2.6171875,
|
|
"rewards/rejected": -1.859375,
|
|
"step": 857
|
|
},
|
|
{
|
|
"epoch": 0.9491150442477876,
|
|
"grad_norm": 13.213567733764648,
|
|
"learning_rate": 3.5429807139403524e-09,
|
|
"logits/chosen": -1.22265625,
|
|
"logits/rejected": -1.048828125,
|
|
"logps/chosen": -243.0,
|
|
"logps/rejected": -300.0,
|
|
"loss": 0.2749,
|
|
"rewards/accuracies": 0.859375,
|
|
"rewards/chosen": 0.87890625,
|
|
"rewards/margins": 3.2109375,
|
|
"rewards/rejected": -2.328125,
|
|
"step": 858
|
|
},
|
|
{
|
|
"epoch": 0.9502212389380531,
|
|
"grad_norm": 11.955760955810547,
|
|
"learning_rate": 3.3941579198023816e-09,
|
|
"logits/chosen": -1.484375,
|
|
"logits/rejected": -1.13671875,
|
|
"logps/chosen": -218.0,
|
|
"logps/rejected": -260.0,
|
|
"loss": 0.2961,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 0.8046875,
|
|
"rewards/margins": 2.7421875,
|
|
"rewards/rejected": -1.93359375,
|
|
"step": 859
|
|
},
|
|
{
|
|
"epoch": 0.9513274336283186,
|
|
"grad_norm": 13.337422370910645,
|
|
"learning_rate": 3.248506842652793e-09,
|
|
"logits/chosen": -1.2578125,
|
|
"logits/rejected": -1.12109375,
|
|
"logps/chosen": -249.0,
|
|
"logps/rejected": -309.0,
|
|
"loss": 0.2853,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.8349609375,
|
|
"rewards/margins": 3.078125,
|
|
"rewards/rejected": -2.25,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 0.952433628318584,
|
|
"grad_norm": 12.912832260131836,
|
|
"learning_rate": 3.106029355780582e-09,
|
|
"logits/chosen": -1.234375,
|
|
"logits/rejected": -1.15234375,
|
|
"logps/chosen": -271.0,
|
|
"logps/rejected": -282.0,
|
|
"loss": 0.3052,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 0.447265625,
|
|
"rewards/margins": 2.65625,
|
|
"rewards/rejected": -2.20703125,
|
|
"step": 861
|
|
},
|
|
{
|
|
"epoch": 0.9535398230088495,
|
|
"grad_norm": 14.942134857177734,
|
|
"learning_rate": 2.9667272916575337e-09,
|
|
"logits/chosen": -1.20703125,
|
|
"logits/rejected": -1.04296875,
|
|
"logps/chosen": -247.5,
|
|
"logps/rejected": -279.0,
|
|
"loss": 0.3356,
|
|
"rewards/accuracies": 0.734375,
|
|
"rewards/chosen": 0.69921875,
|
|
"rewards/margins": 2.8671875,
|
|
"rewards/rejected": -2.1640625,
|
|
"step": 862
|
|
},
|
|
{
|
|
"epoch": 0.9546460176991151,
|
|
"grad_norm": 11.314682960510254,
|
|
"learning_rate": 2.830602441914881e-09,
|
|
"logits/chosen": -1.23828125,
|
|
"logits/rejected": -1.1875,
|
|
"logps/chosen": -263.0,
|
|
"logps/rejected": -277.0,
|
|
"loss": 0.2615,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 0.80859375,
|
|
"rewards/margins": 3.1953125,
|
|
"rewards/rejected": -2.390625,
|
|
"step": 863
|
|
},
|
|
{
|
|
"epoch": 0.9557522123893806,
|
|
"grad_norm": 13.024490356445312,
|
|
"learning_rate": 2.6976565573202102e-09,
|
|
"logits/chosen": -1.23828125,
|
|
"logits/rejected": -1.23046875,
|
|
"logps/chosen": -249.0,
|
|
"logps/rejected": -275.0,
|
|
"loss": 0.2961,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 0.734375,
|
|
"rewards/margins": 2.7890625,
|
|
"rewards/rejected": -2.0546875,
|
|
"step": 864
|
|
},
|
|
{
|
|
"epoch": 0.956858407079646,
|
|
"grad_norm": 13.718770980834961,
|
|
"learning_rate": 2.5678913477547302e-09,
|
|
"logits/chosen": -1.39453125,
|
|
"logits/rejected": -1.1796875,
|
|
"logps/chosen": -274.0,
|
|
"logps/rejected": -312.0,
|
|
"loss": 0.2869,
|
|
"rewards/accuracies": 0.875,
|
|
"rewards/chosen": 0.853515625,
|
|
"rewards/margins": 2.7890625,
|
|
"rewards/rejected": -1.9296875,
|
|
"step": 865
|
|
},
|
|
{
|
|
"epoch": 0.9579646017699115,
|
|
"grad_norm": 13.562867164611816,
|
|
"learning_rate": 2.441308482191623e-09,
|
|
"logits/chosen": -1.12890625,
|
|
"logits/rejected": -1.0078125,
|
|
"logps/chosen": -252.5,
|
|
"logps/rejected": -291.0,
|
|
"loss": 0.3117,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.845703125,
|
|
"rewards/margins": 2.8359375,
|
|
"rewards/rejected": -1.98828125,
|
|
"step": 866
|
|
},
|
|
{
|
|
"epoch": 0.959070796460177,
|
|
"grad_norm": 13.698179244995117,
|
|
"learning_rate": 2.3179095886743384e-09,
|
|
"logits/chosen": -1.2890625,
|
|
"logits/rejected": -1.234375,
|
|
"logps/chosen": -230.5,
|
|
"logps/rejected": -266.5,
|
|
"loss": 0.3103,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.9375,
|
|
"rewards/margins": 3.1484375,
|
|
"rewards/rejected": -2.20703125,
|
|
"step": 867
|
|
},
|
|
{
|
|
"epoch": 0.9601769911504425,
|
|
"grad_norm": 13.498557090759277,
|
|
"learning_rate": 2.1976962542956945e-09,
|
|
"logits/chosen": -1.2890625,
|
|
"logits/rejected": -1.22265625,
|
|
"logps/chosen": -245.5,
|
|
"logps/rejected": -281.0,
|
|
"loss": 0.3036,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 0.935546875,
|
|
"rewards/margins": 2.9453125,
|
|
"rewards/rejected": -2.0,
|
|
"step": 868
|
|
},
|
|
{
|
|
"epoch": 0.9612831858407079,
|
|
"grad_norm": 12.500775337219238,
|
|
"learning_rate": 2.0806700251775055e-09,
|
|
"logits/chosen": -1.296875,
|
|
"logits/rejected": -1.171875,
|
|
"logps/chosen": -232.5,
|
|
"logps/rejected": -262.0,
|
|
"loss": 0.2973,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 0.724609375,
|
|
"rewards/margins": 2.953125,
|
|
"rewards/rejected": -2.2265625,
|
|
"step": 869
|
|
},
|
|
{
|
|
"epoch": 0.9623893805309734,
|
|
"grad_norm": 13.277873992919922,
|
|
"learning_rate": 1.966832406450708e-09,
|
|
"logits/chosen": -1.3828125,
|
|
"logits/rejected": -1.15234375,
|
|
"logps/chosen": -232.0,
|
|
"logps/rejected": -260.0,
|
|
"loss": 0.3434,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.5185546875,
|
|
"rewards/margins": 2.3984375,
|
|
"rewards/rejected": -1.87890625,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 0.963495575221239,
|
|
"grad_norm": 14.751419067382812,
|
|
"learning_rate": 1.85618486223596e-09,
|
|
"logits/chosen": -1.234375,
|
|
"logits/rejected": -1.1484375,
|
|
"logps/chosen": -269.0,
|
|
"logps/rejected": -291.0,
|
|
"loss": 0.3578,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 0.4072265625,
|
|
"rewards/margins": 2.484375,
|
|
"rewards/rejected": -2.07421875,
|
|
"step": 871
|
|
},
|
|
{
|
|
"epoch": 0.9646017699115044,
|
|
"grad_norm": 16.29852294921875,
|
|
"learning_rate": 1.748728815624878e-09,
|
|
"logits/chosen": -1.328125,
|
|
"logits/rejected": -1.109375,
|
|
"logps/chosen": -273.0,
|
|
"logps/rejected": -264.0,
|
|
"loss": 0.3518,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.50927734375,
|
|
"rewards/margins": 2.734375,
|
|
"rewards/rejected": -2.2265625,
|
|
"step": 872
|
|
},
|
|
{
|
|
"epoch": 0.9657079646017699,
|
|
"grad_norm": 16.783334732055664,
|
|
"learning_rate": 1.6444656486615805e-09,
|
|
"logits/chosen": -1.1328125,
|
|
"logits/rejected": -1.07421875,
|
|
"logps/chosen": -287.0,
|
|
"logps/rejected": -307.0,
|
|
"loss": 0.3656,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 0.478515625,
|
|
"rewards/margins": 2.4765625,
|
|
"rewards/rejected": -2.00390625,
|
|
"step": 873
|
|
},
|
|
{
|
|
"epoch": 0.9668141592920354,
|
|
"grad_norm": 16.244199752807617,
|
|
"learning_rate": 1.5433967023250894e-09,
|
|
"logits/chosen": -1.37109375,
|
|
"logits/rejected": -1.1015625,
|
|
"logps/chosen": -275.0,
|
|
"logps/rejected": -317.0,
|
|
"loss": 0.3542,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 0.7353515625,
|
|
"rewards/margins": 2.8359375,
|
|
"rewards/rejected": -2.09375,
|
|
"step": 874
|
|
},
|
|
{
|
|
"epoch": 0.9679203539823009,
|
|
"grad_norm": 13.663660049438477,
|
|
"learning_rate": 1.4455232765120396e-09,
|
|
"logits/chosen": -1.3359375,
|
|
"logits/rejected": -1.22265625,
|
|
"logps/chosen": -244.5,
|
|
"logps/rejected": -268.0,
|
|
"loss": 0.3567,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": 0.701171875,
|
|
"rewards/margins": 2.625,
|
|
"rewards/rejected": -1.921875,
|
|
"step": 875
|
|
},
|
|
{
|
|
"epoch": 0.9690265486725663,
|
|
"grad_norm": 12.790926933288574,
|
|
"learning_rate": 1.3508466300198306e-09,
|
|
"logits/chosen": -1.4296875,
|
|
"logits/rejected": -1.21875,
|
|
"logps/chosen": -232.5,
|
|
"logps/rejected": -262.0,
|
|
"loss": 0.3053,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.685546875,
|
|
"rewards/margins": 2.921875,
|
|
"rewards/rejected": -2.234375,
|
|
"step": 876
|
|
},
|
|
{
|
|
"epoch": 0.9701327433628318,
|
|
"grad_norm": 15.329063415527344,
|
|
"learning_rate": 1.2593679805306401e-09,
|
|
"logits/chosen": -1.20703125,
|
|
"logits/rejected": -1.20703125,
|
|
"logps/chosen": -254.5,
|
|
"logps/rejected": -278.0,
|
|
"loss": 0.3161,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.87890625,
|
|
"rewards/margins": 2.9609375,
|
|
"rewards/rejected": -2.07421875,
|
|
"step": 877
|
|
},
|
|
{
|
|
"epoch": 0.9712389380530974,
|
|
"grad_norm": 15.826077461242676,
|
|
"learning_rate": 1.1710885045956021e-09,
|
|
"logits/chosen": -1.41015625,
|
|
"logits/rejected": -1.26953125,
|
|
"logps/chosen": -257.5,
|
|
"logps/rejected": -281.0,
|
|
"loss": 0.3719,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 0.478515625,
|
|
"rewards/margins": 2.2734375,
|
|
"rewards/rejected": -1.79296875,
|
|
"step": 878
|
|
},
|
|
{
|
|
"epoch": 0.9723451327433629,
|
|
"grad_norm": 15.952284812927246,
|
|
"learning_rate": 1.0860093376197642e-09,
|
|
"logits/chosen": -1.28125,
|
|
"logits/rejected": -1.05078125,
|
|
"logps/chosen": -260.0,
|
|
"logps/rejected": -289.0,
|
|
"loss": 0.3437,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.73828125,
|
|
"rewards/margins": 2.859375,
|
|
"rewards/rejected": -2.1171875,
|
|
"step": 879
|
|
},
|
|
{
|
|
"epoch": 0.9734513274336283,
|
|
"grad_norm": 13.334358215332031,
|
|
"learning_rate": 1.0041315738474055e-09,
|
|
"logits/chosen": -1.203125,
|
|
"logits/rejected": -1.0859375,
|
|
"logps/chosen": -261.5,
|
|
"logps/rejected": -312.0,
|
|
"loss": 0.2845,
|
|
"rewards/accuracies": 0.8359375,
|
|
"rewards/chosen": 0.869140625,
|
|
"rewards/margins": 3.125,
|
|
"rewards/rejected": -2.265625,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 0.9745575221238938,
|
|
"grad_norm": 11.215107917785645,
|
|
"learning_rate": 9.254562663480458e-10,
|
|
"logits/chosen": -1.3125,
|
|
"logits/rejected": -1.2265625,
|
|
"logps/chosen": -241.5,
|
|
"logps/rejected": -287.0,
|
|
"loss": 0.2595,
|
|
"rewards/accuracies": 0.8828125,
|
|
"rewards/chosen": 1.03125,
|
|
"rewards/margins": 3.1484375,
|
|
"rewards/rejected": -2.125,
|
|
"step": 881
|
|
},
|
|
{
|
|
"epoch": 0.9756637168141593,
|
|
"grad_norm": 13.879293441772461,
|
|
"learning_rate": 8.499844270028755e-10,
|
|
"logits/chosen": -1.3046875,
|
|
"logits/rejected": -1.10546875,
|
|
"logps/chosen": -250.5,
|
|
"logps/rejected": -267.5,
|
|
"loss": 0.3143,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 0.494140625,
|
|
"rewards/margins": 2.84375,
|
|
"rewards/rejected": -2.3515625,
|
|
"step": 882
|
|
},
|
|
{
|
|
"epoch": 0.9767699115044248,
|
|
"grad_norm": 425.59161376953125,
|
|
"learning_rate": 7.777170264917365e-10,
|
|
"logits/chosen": -1.2421875,
|
|
"logits/rejected": -1.0703125,
|
|
"logps/chosen": -260.0,
|
|
"logps/rejected": -347.0,
|
|
"loss": 0.4257,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": 0.646484375,
|
|
"rewards/margins": 2.890625,
|
|
"rewards/rejected": -2.2421875,
|
|
"step": 883
|
|
},
|
|
{
|
|
"epoch": 0.9778761061946902,
|
|
"grad_norm": 25.028003692626953,
|
|
"learning_rate": 7.086549942805498e-10,
|
|
"logits/chosen": -1.19921875,
|
|
"logits/rejected": -1.11328125,
|
|
"logps/chosen": -285.0,
|
|
"logps/rejected": -282.0,
|
|
"loss": 0.3772,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": 0.56640625,
|
|
"rewards/margins": 2.359375,
|
|
"rewards/rejected": -1.79296875,
|
|
"step": 884
|
|
},
|
|
{
|
|
"epoch": 0.9789823008849557,
|
|
"grad_norm": 15.059175491333008,
|
|
"learning_rate": 6.427992186095744e-10,
|
|
"logits/chosen": -1.28515625,
|
|
"logits/rejected": -1.19140625,
|
|
"logps/chosen": -228.0,
|
|
"logps/rejected": -271.0,
|
|
"loss": 0.3026,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 0.94921875,
|
|
"rewards/margins": 3.140625,
|
|
"rewards/rejected": -2.1953125,
|
|
"step": 885
|
|
},
|
|
{
|
|
"epoch": 0.9800884955752213,
|
|
"grad_norm": 12.722869873046875,
|
|
"learning_rate": 5.801505464817502e-10,
|
|
"logits/chosen": -1.171875,
|
|
"logits/rejected": -1.08984375,
|
|
"logps/chosen": -244.0,
|
|
"logps/rejected": -279.0,
|
|
"loss": 0.3066,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": 0.61328125,
|
|
"rewards/margins": 2.6640625,
|
|
"rewards/rejected": -2.0546875,
|
|
"step": 886
|
|
},
|
|
{
|
|
"epoch": 0.9811946902654868,
|
|
"grad_norm": 13.964948654174805,
|
|
"learning_rate": 5.207097836519569e-10,
|
|
"logits/chosen": -1.2421875,
|
|
"logits/rejected": -1.1171875,
|
|
"logps/chosen": -249.5,
|
|
"logps/rejected": -287.0,
|
|
"loss": 0.3159,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 0.8046875,
|
|
"rewards/margins": 2.953125,
|
|
"rewards/rejected": -2.1484375,
|
|
"step": 887
|
|
},
|
|
{
|
|
"epoch": 0.9823008849557522,
|
|
"grad_norm": 13.418638229370117,
|
|
"learning_rate": 4.644776946165774e-10,
|
|
"logits/chosen": -1.2734375,
|
|
"logits/rejected": -1.1484375,
|
|
"logps/chosen": -246.0,
|
|
"logps/rejected": -253.5,
|
|
"loss": 0.3351,
|
|
"rewards/accuracies": 0.8671875,
|
|
"rewards/chosen": 0.580078125,
|
|
"rewards/margins": 2.7734375,
|
|
"rewards/rejected": -2.1953125,
|
|
"step": 888
|
|
},
|
|
{
|
|
"epoch": 0.9834070796460177,
|
|
"grad_norm": 11.94414234161377,
|
|
"learning_rate": 4.114550026037278e-10,
|
|
"logits/chosen": -1.30078125,
|
|
"logits/rejected": -1.10546875,
|
|
"logps/chosen": -237.0,
|
|
"logps/rejected": -285.0,
|
|
"loss": 0.2559,
|
|
"rewards/accuracies": 0.8671875,
|
|
"rewards/chosen": 0.701171875,
|
|
"rewards/margins": 3.1484375,
|
|
"rewards/rejected": -2.453125,
|
|
"step": 889
|
|
},
|
|
{
|
|
"epoch": 0.9845132743362832,
|
|
"grad_norm": 14.505678176879883,
|
|
"learning_rate": 3.6164238956384876e-10,
|
|
"logits/chosen": -1.21484375,
|
|
"logits/rejected": -1.26953125,
|
|
"logps/chosen": -248.5,
|
|
"logps/rejected": -281.0,
|
|
"loss": 0.2998,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.876953125,
|
|
"rewards/margins": 2.875,
|
|
"rewards/rejected": -2.00390625,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 0.9856194690265486,
|
|
"grad_norm": 12.155240058898926,
|
|
"learning_rate": 3.150404961611008e-10,
|
|
"logits/chosen": -1.234375,
|
|
"logits/rejected": -1.140625,
|
|
"logps/chosen": -240.0,
|
|
"logps/rejected": -276.0,
|
|
"loss": 0.2918,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.685546875,
|
|
"rewards/margins": 3.0078125,
|
|
"rewards/rejected": -2.3203125,
|
|
"step": 891
|
|
},
|
|
{
|
|
"epoch": 0.9867256637168141,
|
|
"grad_norm": 13.752731323242188,
|
|
"learning_rate": 2.716499217649271e-10,
|
|
"logits/chosen": -1.2109375,
|
|
"logits/rejected": -1.1640625,
|
|
"logps/chosen": -241.5,
|
|
"logps/rejected": -277.0,
|
|
"loss": 0.3461,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.638671875,
|
|
"rewards/margins": 2.6015625,
|
|
"rewards/rejected": -1.9609375,
|
|
"step": 892
|
|
},
|
|
{
|
|
"epoch": 0.9878318584070797,
|
|
"grad_norm": 14.712821960449219,
|
|
"learning_rate": 2.3147122444250323e-10,
|
|
"logits/chosen": -1.2265625,
|
|
"logits/rejected": -1.15234375,
|
|
"logps/chosen": -242.0,
|
|
"logps/rejected": -274.0,
|
|
"loss": 0.3957,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.4921875,
|
|
"rewards/margins": 2.5234375,
|
|
"rewards/rejected": -2.03125,
|
|
"step": 893
|
|
},
|
|
{
|
|
"epoch": 0.9889380530973452,
|
|
"grad_norm": 13.806950569152832,
|
|
"learning_rate": 1.9450492095149373e-10,
|
|
"logits/chosen": -1.27734375,
|
|
"logits/rejected": -1.06640625,
|
|
"logps/chosen": -251.0,
|
|
"logps/rejected": -282.0,
|
|
"loss": 0.3152,
|
|
"rewards/accuracies": 0.8203125,
|
|
"rewards/chosen": 0.84375,
|
|
"rewards/margins": 2.9921875,
|
|
"rewards/rejected": -2.1484375,
|
|
"step": 894
|
|
},
|
|
{
|
|
"epoch": 0.9900442477876106,
|
|
"grad_norm": 13.336440086364746,
|
|
"learning_rate": 1.607514867333626e-10,
|
|
"logits/chosen": -1.17578125,
|
|
"logits/rejected": -1.0546875,
|
|
"logps/chosen": -273.5,
|
|
"logps/rejected": -280.0,
|
|
"loss": 0.3012,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 0.646484375,
|
|
"rewards/margins": 2.7578125,
|
|
"rewards/rejected": -2.1015625,
|
|
"step": 895
|
|
},
|
|
{
|
|
"epoch": 0.9911504424778761,
|
|
"grad_norm": 14.591585159301758,
|
|
"learning_rate": 1.3021135590740583e-10,
|
|
"logits/chosen": -1.30078125,
|
|
"logits/rejected": -1.10546875,
|
|
"logps/chosen": -255.0,
|
|
"logps/rejected": -281.0,
|
|
"loss": 0.356,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.66015625,
|
|
"rewards/margins": 2.4921875,
|
|
"rewards/rejected": -1.83203125,
|
|
"step": 896
|
|
},
|
|
{
|
|
"epoch": 0.9922566371681416,
|
|
"grad_norm": 14.33768367767334,
|
|
"learning_rate": 1.028849212649785e-10,
|
|
"logits/chosen": -1.33203125,
|
|
"logits/rejected": -1.1875,
|
|
"logps/chosen": -272.0,
|
|
"logps/rejected": -264.0,
|
|
"loss": 0.3197,
|
|
"rewards/accuracies": 0.8046875,
|
|
"rewards/chosen": 0.716796875,
|
|
"rewards/margins": 2.765625,
|
|
"rewards/rejected": -2.046875,
|
|
"step": 897
|
|
},
|
|
{
|
|
"epoch": 0.9933628318584071,
|
|
"grad_norm": 14.789177894592285,
|
|
"learning_rate": 7.877253426458175e-11,
|
|
"logits/chosen": -1.2890625,
|
|
"logits/rejected": -1.125,
|
|
"logps/chosen": -253.5,
|
|
"logps/rejected": -296.0,
|
|
"loss": 0.3679,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": 0.57421875,
|
|
"rewards/margins": 2.3984375,
|
|
"rewards/rejected": -1.82421875,
|
|
"step": 898
|
|
},
|
|
{
|
|
"epoch": 0.9944690265486725,
|
|
"grad_norm": 14.226619720458984,
|
|
"learning_rate": 5.7874505027283304e-11,
|
|
"logits/chosen": -1.265625,
|
|
"logits/rejected": -1.12890625,
|
|
"logps/chosen": -256.0,
|
|
"logps/rejected": -263.5,
|
|
"loss": 0.3177,
|
|
"rewards/accuracies": 0.8515625,
|
|
"rewards/chosen": 0.6015625,
|
|
"rewards/margins": 2.6796875,
|
|
"rewards/rejected": -2.0703125,
|
|
"step": 899
|
|
},
|
|
{
|
|
"epoch": 0.995575221238938,
|
|
"grad_norm": 14.491003036499023,
|
|
"learning_rate": 4.0191102332748364e-11,
|
|
"logits/chosen": -1.37109375,
|
|
"logits/rejected": -1.2421875,
|
|
"logps/chosen": -261.0,
|
|
"logps/rejected": -300.0,
|
|
"loss": 0.2955,
|
|
"rewards/accuracies": 0.8125,
|
|
"rewards/chosen": 0.892578125,
|
|
"rewards/margins": 2.9140625,
|
|
"rewards/rejected": -2.0234375,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.995575221238938,
|
|
"eval_logits/chosen": -1.2664412260055542,
|
|
"eval_logits/rejected": -1.1579796075820923,
|
|
"eval_logps/chosen": -253.59701538085938,
|
|
"eval_logps/rejected": -280.3333435058594,
|
|
"eval_loss": 0.31904885172843933,
|
|
"eval_rewards/accuracies": 0.8163970708847046,
|
|
"eval_rewards/chosen": 0.7264896035194397,
|
|
"eval_rewards/margins": 2.841573476791382,
|
|
"eval_rewards/rejected": -2.1163711547851562,
|
|
"eval_runtime": 193.0253,
|
|
"eval_samples_per_second": 66.587,
|
|
"eval_steps_per_second": 1.041,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 0.9966814159292036,
|
|
"grad_norm": 11.916271209716797,
|
|
"learning_rate": 2.5722553615770137e-11,
|
|
"logits/chosen": -1.31640625,
|
|
"logits/rejected": -1.11328125,
|
|
"logps/chosen": -246.0,
|
|
"logps/rejected": -269.5,
|
|
"loss": 0.2564,
|
|
"rewards/accuracies": 0.859375,
|
|
"rewards/chosen": 0.962890625,
|
|
"rewards/margins": 3.4609375,
|
|
"rewards/rejected": -2.4921875,
|
|
"step": 901
|
|
},
|
|
{
|
|
"epoch": 0.9977876106194691,
|
|
"grad_norm": 15.206621170043945,
|
|
"learning_rate": 1.4469044963355547e-11,
|
|
"logits/chosen": -1.2109375,
|
|
"logits/rejected": -1.08984375,
|
|
"logps/chosen": -250.5,
|
|
"logps/rejected": -298.0,
|
|
"loss": 0.3246,
|
|
"rewards/accuracies": 0.7890625,
|
|
"rewards/chosen": 0.677734375,
|
|
"rewards/margins": 2.6953125,
|
|
"rewards/rejected": -2.015625,
|
|
"step": 902
|
|
},
|
|
{
|
|
"epoch": 0.9988938053097345,
|
|
"grad_norm": 16.438447952270508,
|
|
"learning_rate": 6.430721112282711e-12,
|
|
"logits/chosen": -1.265625,
|
|
"logits/rejected": -1.19921875,
|
|
"logps/chosen": -255.5,
|
|
"logps/rejected": -294.0,
|
|
"loss": 0.4007,
|
|
"rewards/accuracies": 0.78125,
|
|
"rewards/chosen": 0.6181640625,
|
|
"rewards/margins": 2.5703125,
|
|
"rewards/rejected": -1.953125,
|
|
"step": 903
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"grad_norm": 13.59802532196045,
|
|
"learning_rate": 1.6076854473801027e-12,
|
|
"logits/chosen": -1.359375,
|
|
"logits/rejected": -1.16796875,
|
|
"logps/chosen": -269.0,
|
|
"logps/rejected": -293.0,
|
|
"loss": 0.2951,
|
|
"rewards/accuracies": 0.875,
|
|
"rewards/chosen": 0.6875,
|
|
"rewards/margins": 2.75,
|
|
"rewards/rejected": -2.0625,
|
|
"step": 904
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 904,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 50,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 8,
|
|
"train_dataloader_state_dict": null,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|