3885 lines
112 KiB
JSON
3885 lines
112 KiB
JSON
|
|
{
|
||
|
|
"best_metric": null,
|
||
|
|
"best_model_checkpoint": null,
|
||
|
|
"epoch": 1.0,
|
||
|
|
"eval_steps": 500,
|
||
|
|
"global_step": 226,
|
||
|
|
"is_hyper_param_search": false,
|
||
|
|
"is_local_process_zero": true,
|
||
|
|
"is_world_process_zero": true,
|
||
|
|
"log_history": [
|
||
|
|
{
|
||
|
|
"epoch": 0.004424778761061947,
|
||
|
|
"grad_norm": 2.562241554260254,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.2119140625,
|
||
|
|
"logits/rejected": -0.1328125,
|
||
|
|
"logps/chosen": -242.0,
|
||
|
|
"logps/rejected": -178.0,
|
||
|
|
"loss": 0.6914,
|
||
|
|
"loss/chosen-sft": 1.0,
|
||
|
|
"loss/dpo": 0.69140625,
|
||
|
|
"rewards/accuracies": 0.0,
|
||
|
|
"rewards/chosen": 0.0,
|
||
|
|
"rewards/margins": 0.0,
|
||
|
|
"rewards/rejected": 0.0,
|
||
|
|
"step": 1
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.008849557522123894,
|
||
|
|
"grad_norm": 2.733123302459717,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.263671875,
|
||
|
|
"logits/rejected": -0.19140625,
|
||
|
|
"logps/chosen": -225.0,
|
||
|
|
"logps/rejected": -175.0,
|
||
|
|
"loss": 0.6929,
|
||
|
|
"loss/chosen-sft": 1.09375,
|
||
|
|
"loss/dpo": 0.69140625,
|
||
|
|
"rewards/accuracies": 0.3125,
|
||
|
|
"rewards/chosen": 0.00019550323486328125,
|
||
|
|
"rewards/margins": -0.00183868408203125,
|
||
|
|
"rewards/rejected": 0.0020294189453125,
|
||
|
|
"step": 2
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.01327433628318584,
|
||
|
|
"grad_norm": 5.071370601654053,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.287109375,
|
||
|
|
"logits/rejected": -0.1796875,
|
||
|
|
"logps/chosen": -258.0,
|
||
|
|
"logps/rejected": -195.0,
|
||
|
|
"loss": 0.6914,
|
||
|
|
"loss/chosen-sft": 1.1171875,
|
||
|
|
"loss/dpo": 0.69140625,
|
||
|
|
"rewards/accuracies": 0.21875,
|
||
|
|
"rewards/chosen": -0.000743865966796875,
|
||
|
|
"rewards/margins": -3.910064697265625e-05,
|
||
|
|
"rewards/rejected": -0.000701904296875,
|
||
|
|
"step": 3
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.017699115044247787,
|
||
|
|
"grad_norm": 9.871452331542969,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.294921875,
|
||
|
|
"logits/rejected": -0.310546875,
|
||
|
|
"logps/chosen": -270.0,
|
||
|
|
"logps/rejected": -236.0,
|
||
|
|
"loss": 0.6914,
|
||
|
|
"loss/chosen-sft": 1.09375,
|
||
|
|
"loss/dpo": 0.69140625,
|
||
|
|
"rewards/accuracies": 0.375,
|
||
|
|
"rewards/chosen": -0.0017242431640625,
|
||
|
|
"rewards/margins": 0.002197265625,
|
||
|
|
"rewards/rejected": -0.00390625,
|
||
|
|
"step": 4
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.022123893805309734,
|
||
|
|
"grad_norm": 8.816597938537598,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.1328125,
|
||
|
|
"logits/rejected": -0.2421875,
|
||
|
|
"logps/chosen": -280.0,
|
||
|
|
"logps/rejected": -249.0,
|
||
|
|
"loss": 0.6914,
|
||
|
|
"loss/chosen-sft": 1.1484375,
|
||
|
|
"loss/dpo": 0.6953125,
|
||
|
|
"rewards/accuracies": 0.3125,
|
||
|
|
"rewards/chosen": -0.00156402587890625,
|
||
|
|
"rewards/margins": -0.0022735595703125,
|
||
|
|
"rewards/rejected": 0.000705718994140625,
|
||
|
|
"step": 5
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.02654867256637168,
|
||
|
|
"grad_norm": 8.637555122375488,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.12890625,
|
||
|
|
"logits/rejected": -0.1611328125,
|
||
|
|
"logps/chosen": -223.0,
|
||
|
|
"logps/rejected": -172.0,
|
||
|
|
"loss": 0.6895,
|
||
|
|
"loss/chosen-sft": 1.0,
|
||
|
|
"loss/dpo": 0.69140625,
|
||
|
|
"rewards/accuracies": 0.625,
|
||
|
|
"rewards/chosen": 0.005889892578125,
|
||
|
|
"rewards/margins": 0.006195068359375,
|
||
|
|
"rewards/rejected": -0.00031280517578125,
|
||
|
|
"step": 6
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.030973451327433628,
|
||
|
|
"grad_norm": 4.954357147216797,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.27734375,
|
||
|
|
"logits/rejected": -0.125,
|
||
|
|
"logps/chosen": -312.0,
|
||
|
|
"logps/rejected": -228.0,
|
||
|
|
"loss": 0.6899,
|
||
|
|
"loss/chosen-sft": 1.078125,
|
||
|
|
"loss/dpo": 0.69140625,
|
||
|
|
"rewards/accuracies": 0.40625,
|
||
|
|
"rewards/chosen": 0.00113677978515625,
|
||
|
|
"rewards/margins": 0.0034027099609375,
|
||
|
|
"rewards/rejected": -0.0022735595703125,
|
||
|
|
"step": 7
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.035398230088495575,
|
||
|
|
"grad_norm": 7.5598249435424805,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.095703125,
|
||
|
|
"logits/rejected": -0.1416015625,
|
||
|
|
"logps/chosen": -240.0,
|
||
|
|
"logps/rejected": -233.0,
|
||
|
|
"loss": 0.6875,
|
||
|
|
"loss/chosen-sft": 1.0546875,
|
||
|
|
"loss/dpo": 0.6875,
|
||
|
|
"rewards/accuracies": 0.625,
|
||
|
|
"rewards/chosen": 0.006988525390625,
|
||
|
|
"rewards/margins": 0.01007080078125,
|
||
|
|
"rewards/rejected": -0.003082275390625,
|
||
|
|
"step": 8
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.03982300884955752,
|
||
|
|
"grad_norm": 2.6033551692962646,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.294921875,
|
||
|
|
"logits/rejected": -0.28125,
|
||
|
|
"logps/chosen": -292.0,
|
||
|
|
"logps/rejected": -212.0,
|
||
|
|
"loss": 0.6885,
|
||
|
|
"loss/chosen-sft": 1.15625,
|
||
|
|
"loss/dpo": 0.6875,
|
||
|
|
"rewards/accuracies": 0.5625,
|
||
|
|
"rewards/chosen": 0.00665283203125,
|
||
|
|
"rewards/margins": 0.0135498046875,
|
||
|
|
"rewards/rejected": -0.00689697265625,
|
||
|
|
"step": 9
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.04424778761061947,
|
||
|
|
"grad_norm": 10.546786308288574,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.267578125,
|
||
|
|
"logits/rejected": -0.050537109375,
|
||
|
|
"logps/chosen": -308.0,
|
||
|
|
"logps/rejected": -210.0,
|
||
|
|
"loss": 0.687,
|
||
|
|
"loss/chosen-sft": 1.078125,
|
||
|
|
"loss/dpo": 0.68359375,
|
||
|
|
"rewards/accuracies": 0.6875,
|
||
|
|
"rewards/chosen": 0.0142822265625,
|
||
|
|
"rewards/margins": 0.02099609375,
|
||
|
|
"rewards/rejected": -0.006683349609375,
|
||
|
|
"step": 10
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.048672566371681415,
|
||
|
|
"grad_norm": 9.844188690185547,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.3046875,
|
||
|
|
"logits/rejected": -0.177734375,
|
||
|
|
"logps/chosen": -243.0,
|
||
|
|
"logps/rejected": -234.0,
|
||
|
|
"loss": 0.687,
|
||
|
|
"loss/chosen-sft": 1.1796875,
|
||
|
|
"loss/dpo": 0.68359375,
|
||
|
|
"rewards/accuracies": 0.75,
|
||
|
|
"rewards/chosen": 0.01025390625,
|
||
|
|
"rewards/margins": 0.016357421875,
|
||
|
|
"rewards/rejected": -0.006103515625,
|
||
|
|
"step": 11
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.05309734513274336,
|
||
|
|
"grad_norm": 12.557535171508789,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.2265625,
|
||
|
|
"logits/rejected": -0.2275390625,
|
||
|
|
"logps/chosen": -244.0,
|
||
|
|
"logps/rejected": -219.0,
|
||
|
|
"loss": 0.686,
|
||
|
|
"loss/chosen-sft": 1.078125,
|
||
|
|
"loss/dpo": 0.6875,
|
||
|
|
"rewards/accuracies": 0.5625,
|
||
|
|
"rewards/chosen": 0.0025177001953125,
|
||
|
|
"rewards/margins": 0.0113525390625,
|
||
|
|
"rewards/rejected": -0.0087890625,
|
||
|
|
"step": 12
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.05752212389380531,
|
||
|
|
"grad_norm": 8.109821319580078,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.1953125,
|
||
|
|
"logits/rejected": -0.224609375,
|
||
|
|
"logps/chosen": -272.0,
|
||
|
|
"logps/rejected": -217.0,
|
||
|
|
"loss": 0.6836,
|
||
|
|
"loss/chosen-sft": 1.03125,
|
||
|
|
"loss/dpo": 0.68359375,
|
||
|
|
"rewards/accuracies": 0.65625,
|
||
|
|
"rewards/chosen": 0.00726318359375,
|
||
|
|
"rewards/margins": 0.0164794921875,
|
||
|
|
"rewards/rejected": -0.00921630859375,
|
||
|
|
"step": 13
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.061946902654867256,
|
||
|
|
"grad_norm": 8.9277982711792,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.19140625,
|
||
|
|
"logits/rejected": -0.2412109375,
|
||
|
|
"logps/chosen": -294.0,
|
||
|
|
"logps/rejected": -198.0,
|
||
|
|
"loss": 0.6816,
|
||
|
|
"loss/chosen-sft": 1.109375,
|
||
|
|
"loss/dpo": 0.6796875,
|
||
|
|
"rewards/accuracies": 0.75,
|
||
|
|
"rewards/chosen": 0.007171630859375,
|
||
|
|
"rewards/margins": 0.025146484375,
|
||
|
|
"rewards/rejected": -0.0179443359375,
|
||
|
|
"step": 14
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.06637168141592921,
|
||
|
|
"grad_norm": 3.4456562995910645,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.07470703125,
|
||
|
|
"logits/rejected": -0.0712890625,
|
||
|
|
"logps/chosen": -239.0,
|
||
|
|
"logps/rejected": -227.0,
|
||
|
|
"loss": 0.6826,
|
||
|
|
"loss/chosen-sft": 0.93359375,
|
||
|
|
"loss/dpo": 0.6796875,
|
||
|
|
"rewards/accuracies": 0.59375,
|
||
|
|
"rewards/chosen": 0.00970458984375,
|
||
|
|
"rewards/margins": 0.0260009765625,
|
||
|
|
"rewards/rejected": -0.016357421875,
|
||
|
|
"step": 15
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.07079646017699115,
|
||
|
|
"grad_norm": 3.63268780708313,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.244140625,
|
||
|
|
"logits/rejected": -0.255859375,
|
||
|
|
"logps/chosen": -264.0,
|
||
|
|
"logps/rejected": -208.0,
|
||
|
|
"loss": 0.6826,
|
||
|
|
"loss/chosen-sft": 1.0,
|
||
|
|
"loss/dpo": 0.6796875,
|
||
|
|
"rewards/accuracies": 0.53125,
|
||
|
|
"rewards/chosen": 0.007537841796875,
|
||
|
|
"rewards/margins": 0.03173828125,
|
||
|
|
"rewards/rejected": -0.0242919921875,
|
||
|
|
"step": 16
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.0752212389380531,
|
||
|
|
"grad_norm": 14.086406707763672,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.30078125,
|
||
|
|
"logits/rejected": -0.271484375,
|
||
|
|
"logps/chosen": -214.0,
|
||
|
|
"logps/rejected": -197.0,
|
||
|
|
"loss": 0.6807,
|
||
|
|
"loss/chosen-sft": 0.9296875,
|
||
|
|
"loss/dpo": 0.6875,
|
||
|
|
"rewards/accuracies": 0.53125,
|
||
|
|
"rewards/chosen": -0.004791259765625,
|
||
|
|
"rewards/margins": 0.01422119140625,
|
||
|
|
"rewards/rejected": -0.0189208984375,
|
||
|
|
"step": 17
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.07964601769911504,
|
||
|
|
"grad_norm": 3.81648850440979,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.1435546875,
|
||
|
|
"logits/rejected": -0.1494140625,
|
||
|
|
"logps/chosen": -268.0,
|
||
|
|
"logps/rejected": -223.0,
|
||
|
|
"loss": 0.6797,
|
||
|
|
"loss/chosen-sft": 1.0,
|
||
|
|
"loss/dpo": 0.6796875,
|
||
|
|
"rewards/accuracies": 0.65625,
|
||
|
|
"rewards/chosen": 0.0057373046875,
|
||
|
|
"rewards/margins": 0.02734375,
|
||
|
|
"rewards/rejected": -0.0216064453125,
|
||
|
|
"step": 18
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.084070796460177,
|
||
|
|
"grad_norm": 3.1519317626953125,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.2109375,
|
||
|
|
"logits/rejected": -0.1220703125,
|
||
|
|
"logps/chosen": -247.0,
|
||
|
|
"logps/rejected": -236.0,
|
||
|
|
"loss": 0.6748,
|
||
|
|
"loss/chosen-sft": 1.0625,
|
||
|
|
"loss/dpo": 0.67578125,
|
||
|
|
"rewards/accuracies": 0.65625,
|
||
|
|
"rewards/chosen": 0.004730224609375,
|
||
|
|
"rewards/margins": 0.04052734375,
|
||
|
|
"rewards/rejected": -0.035888671875,
|
||
|
|
"step": 19
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.08849557522123894,
|
||
|
|
"grad_norm": 8.66562271118164,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.1982421875,
|
||
|
|
"logits/rejected": -0.1904296875,
|
||
|
|
"logps/chosen": -304.0,
|
||
|
|
"logps/rejected": -245.0,
|
||
|
|
"loss": 0.6733,
|
||
|
|
"loss/chosen-sft": 0.99609375,
|
||
|
|
"loss/dpo": 0.671875,
|
||
|
|
"rewards/accuracies": 0.65625,
|
||
|
|
"rewards/chosen": 0.0106201171875,
|
||
|
|
"rewards/margins": 0.0400390625,
|
||
|
|
"rewards/rejected": -0.029296875,
|
||
|
|
"step": 20
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.09292035398230089,
|
||
|
|
"grad_norm": 3.798952579498291,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.2578125,
|
||
|
|
"logits/rejected": -0.0281982421875,
|
||
|
|
"logps/chosen": -296.0,
|
||
|
|
"logps/rejected": -190.0,
|
||
|
|
"loss": 0.6782,
|
||
|
|
"loss/chosen-sft": 1.125,
|
||
|
|
"loss/dpo": 0.68359375,
|
||
|
|
"rewards/accuracies": 0.65625,
|
||
|
|
"rewards/chosen": -0.006195068359375,
|
||
|
|
"rewards/margins": 0.0238037109375,
|
||
|
|
"rewards/rejected": -0.030029296875,
|
||
|
|
"step": 21
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.09734513274336283,
|
||
|
|
"grad_norm": 6.044543743133545,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.1728515625,
|
||
|
|
"logits/rejected": -0.263671875,
|
||
|
|
"logps/chosen": -238.0,
|
||
|
|
"logps/rejected": -225.0,
|
||
|
|
"loss": 0.6743,
|
||
|
|
"loss/chosen-sft": 0.98828125,
|
||
|
|
"loss/dpo": 0.68359375,
|
||
|
|
"rewards/accuracies": 0.59375,
|
||
|
|
"rewards/chosen": -0.004638671875,
|
||
|
|
"rewards/margins": 0.01611328125,
|
||
|
|
"rewards/rejected": -0.020751953125,
|
||
|
|
"step": 22
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.10176991150442478,
|
||
|
|
"grad_norm": 6.223972797393799,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.36328125,
|
||
|
|
"logits/rejected": -0.37890625,
|
||
|
|
"logps/chosen": -256.0,
|
||
|
|
"logps/rejected": -235.0,
|
||
|
|
"loss": 0.6768,
|
||
|
|
"loss/chosen-sft": 1.09375,
|
||
|
|
"loss/dpo": 0.6796875,
|
||
|
|
"rewards/accuracies": 0.625,
|
||
|
|
"rewards/chosen": -0.0225830078125,
|
||
|
|
"rewards/margins": 0.03173828125,
|
||
|
|
"rewards/rejected": -0.05419921875,
|
||
|
|
"step": 23
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.10619469026548672,
|
||
|
|
"grad_norm": 2.817391872406006,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.2353515625,
|
||
|
|
"logits/rejected": -0.27734375,
|
||
|
|
"logps/chosen": -258.0,
|
||
|
|
"logps/rejected": -196.0,
|
||
|
|
"loss": 0.6699,
|
||
|
|
"loss/chosen-sft": 1.09375,
|
||
|
|
"loss/dpo": 0.67578125,
|
||
|
|
"rewards/accuracies": 0.71875,
|
||
|
|
"rewards/chosen": -0.00830078125,
|
||
|
|
"rewards/margins": 0.03515625,
|
||
|
|
"rewards/rejected": -0.043212890625,
|
||
|
|
"step": 24
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.11061946902654868,
|
||
|
|
"grad_norm": 5.743912220001221,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.12890625,
|
||
|
|
"logits/rejected": -0.07861328125,
|
||
|
|
"logps/chosen": -268.0,
|
||
|
|
"logps/rejected": -231.0,
|
||
|
|
"loss": 0.6685,
|
||
|
|
"loss/chosen-sft": 0.9140625,
|
||
|
|
"loss/dpo": 0.66796875,
|
||
|
|
"rewards/accuracies": 0.71875,
|
||
|
|
"rewards/chosen": -0.0189208984375,
|
||
|
|
"rewards/margins": 0.050537109375,
|
||
|
|
"rewards/rejected": -0.0693359375,
|
||
|
|
"step": 25
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.11504424778761062,
|
||
|
|
"grad_norm": 3.4631690979003906,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": 0.046142578125,
|
||
|
|
"logits/rejected": 0.018798828125,
|
||
|
|
"logps/chosen": -213.0,
|
||
|
|
"logps/rejected": -252.0,
|
||
|
|
"loss": 0.6699,
|
||
|
|
"loss/chosen-sft": 0.87109375,
|
||
|
|
"loss/dpo": 0.65625,
|
||
|
|
"rewards/accuracies": 0.84375,
|
||
|
|
"rewards/chosen": 0.0076904296875,
|
||
|
|
"rewards/margins": 0.07470703125,
|
||
|
|
"rewards/rejected": -0.06689453125,
|
||
|
|
"step": 26
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.11946902654867257,
|
||
|
|
"grad_norm": 2.302494525909424,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.2734375,
|
||
|
|
"logits/rejected": -0.296875,
|
||
|
|
"logps/chosen": -294.0,
|
||
|
|
"logps/rejected": -219.0,
|
||
|
|
"loss": 0.6733,
|
||
|
|
"loss/chosen-sft": 1.1796875,
|
||
|
|
"loss/dpo": 0.6640625,
|
||
|
|
"rewards/accuracies": 0.71875,
|
||
|
|
"rewards/chosen": -0.0025177001953125,
|
||
|
|
"rewards/margins": 0.058837890625,
|
||
|
|
"rewards/rejected": -0.061279296875,
|
||
|
|
"step": 27
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.12389380530973451,
|
||
|
|
"grad_norm": 15.041751861572266,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.326171875,
|
||
|
|
"logits/rejected": -0.2255859375,
|
||
|
|
"logps/chosen": -324.0,
|
||
|
|
"logps/rejected": -215.0,
|
||
|
|
"loss": 0.6597,
|
||
|
|
"loss/chosen-sft": 1.1328125,
|
||
|
|
"loss/dpo": 0.65625,
|
||
|
|
"rewards/accuracies": 0.78125,
|
||
|
|
"rewards/chosen": 0.0016632080078125,
|
||
|
|
"rewards/margins": 0.076171875,
|
||
|
|
"rewards/rejected": -0.07470703125,
|
||
|
|
"step": 28
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.12831858407079647,
|
||
|
|
"grad_norm": 10.651082992553711,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.055908203125,
|
||
|
|
"logits/rejected": -0.04638671875,
|
||
|
|
"logps/chosen": -264.0,
|
||
|
|
"logps/rejected": -233.0,
|
||
|
|
"loss": 0.6704,
|
||
|
|
"loss/chosen-sft": 0.9140625,
|
||
|
|
"loss/dpo": 0.65625,
|
||
|
|
"rewards/accuracies": 0.8125,
|
||
|
|
"rewards/chosen": -0.0001506805419921875,
|
||
|
|
"rewards/margins": 0.0751953125,
|
||
|
|
"rewards/rejected": -0.0751953125,
|
||
|
|
"step": 29
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.13274336283185842,
|
||
|
|
"grad_norm": 17.013574600219727,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.21484375,
|
||
|
|
"logits/rejected": -0.138671875,
|
||
|
|
"logps/chosen": -225.0,
|
||
|
|
"logps/rejected": -202.0,
|
||
|
|
"loss": 0.6685,
|
||
|
|
"loss/chosen-sft": 0.984375,
|
||
|
|
"loss/dpo": 0.67578125,
|
||
|
|
"rewards/accuracies": 0.53125,
|
||
|
|
"rewards/chosen": -0.0390625,
|
||
|
|
"rewards/margins": 0.038818359375,
|
||
|
|
"rewards/rejected": -0.07763671875,
|
||
|
|
"step": 30
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.13716814159292035,
|
||
|
|
"grad_norm": 3.365304708480835,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.267578125,
|
||
|
|
"logits/rejected": -0.25390625,
|
||
|
|
"logps/chosen": -296.0,
|
||
|
|
"logps/rejected": -262.0,
|
||
|
|
"loss": 0.6641,
|
||
|
|
"loss/chosen-sft": 1.1640625,
|
||
|
|
"loss/dpo": 0.6640625,
|
||
|
|
"rewards/accuracies": 0.71875,
|
||
|
|
"rewards/chosen": -0.0126953125,
|
||
|
|
"rewards/margins": 0.06689453125,
|
||
|
|
"rewards/rejected": -0.07958984375,
|
||
|
|
"step": 31
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1415929203539823,
|
||
|
|
"grad_norm": 5.34974479675293,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.263671875,
|
||
|
|
"logits/rejected": -0.294921875,
|
||
|
|
"logps/chosen": -234.0,
|
||
|
|
"logps/rejected": -255.0,
|
||
|
|
"loss": 0.668,
|
||
|
|
"loss/chosen-sft": 1.0546875,
|
||
|
|
"loss/dpo": 0.65625,
|
||
|
|
"rewards/accuracies": 0.8125,
|
||
|
|
"rewards/chosen": 0.0081787109375,
|
||
|
|
"rewards/margins": 0.0830078125,
|
||
|
|
"rewards/rejected": -0.07470703125,
|
||
|
|
"step": 32
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.14601769911504425,
|
||
|
|
"grad_norm": 5.2689948081970215,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.255859375,
|
||
|
|
"logits/rejected": -0.38671875,
|
||
|
|
"logps/chosen": -221.0,
|
||
|
|
"logps/rejected": -318.0,
|
||
|
|
"loss": 0.6582,
|
||
|
|
"loss/chosen-sft": 1.0390625,
|
||
|
|
"loss/dpo": 0.6484375,
|
||
|
|
"rewards/accuracies": 0.8125,
|
||
|
|
"rewards/chosen": -0.0267333984375,
|
||
|
|
"rewards/margins": 0.09814453125,
|
||
|
|
"rewards/rejected": -0.12451171875,
|
||
|
|
"step": 33
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1504424778761062,
|
||
|
|
"grad_norm": 13.799201011657715,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.275390625,
|
||
|
|
"logits/rejected": -0.21875,
|
||
|
|
"logps/chosen": -238.0,
|
||
|
|
"logps/rejected": -208.0,
|
||
|
|
"loss": 0.6733,
|
||
|
|
"loss/chosen-sft": 1.046875,
|
||
|
|
"loss/dpo": 0.6484375,
|
||
|
|
"rewards/accuracies": 0.6875,
|
||
|
|
"rewards/chosen": -0.03662109375,
|
||
|
|
"rewards/margins": 0.09765625,
|
||
|
|
"rewards/rejected": -0.1337890625,
|
||
|
|
"step": 34
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.15486725663716813,
|
||
|
|
"grad_norm": 13.07458782196045,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.369140625,
|
||
|
|
"logits/rejected": -0.24609375,
|
||
|
|
"logps/chosen": -280.0,
|
||
|
|
"logps/rejected": -255.0,
|
||
|
|
"loss": 0.6631,
|
||
|
|
"loss/chosen-sft": 1.1015625,
|
||
|
|
"loss/dpo": 0.66015625,
|
||
|
|
"rewards/accuracies": 0.5625,
|
||
|
|
"rewards/chosen": -0.078125,
|
||
|
|
"rewards/margins": 0.07470703125,
|
||
|
|
"rewards/rejected": -0.15234375,
|
||
|
|
"step": 35
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1592920353982301,
|
||
|
|
"grad_norm": 2.256340742111206,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.287109375,
|
||
|
|
"logits/rejected": -0.28125,
|
||
|
|
"logps/chosen": -226.0,
|
||
|
|
"logps/rejected": -240.0,
|
||
|
|
"loss": 0.6543,
|
||
|
|
"loss/chosen-sft": 1.015625,
|
||
|
|
"loss/dpo": 0.6484375,
|
||
|
|
"rewards/accuracies": 0.75,
|
||
|
|
"rewards/chosen": -0.049072265625,
|
||
|
|
"rewards/margins": 0.095703125,
|
||
|
|
"rewards/rejected": -0.1455078125,
|
||
|
|
"step": 36
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.16371681415929204,
|
||
|
|
"grad_norm": 8.795002937316895,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.2294921875,
|
||
|
|
"logits/rejected": -0.21484375,
|
||
|
|
"logps/chosen": -225.0,
|
||
|
|
"logps/rejected": -216.0,
|
||
|
|
"loss": 0.6523,
|
||
|
|
"loss/chosen-sft": 0.9765625,
|
||
|
|
"loss/dpo": 0.671875,
|
||
|
|
"rewards/accuracies": 0.59375,
|
||
|
|
"rewards/chosen": -0.06787109375,
|
||
|
|
"rewards/margins": 0.047607421875,
|
||
|
|
"rewards/rejected": -0.115234375,
|
||
|
|
"step": 37
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.168141592920354,
|
||
|
|
"grad_norm": 2.735612154006958,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.271484375,
|
||
|
|
"logits/rejected": -0.26171875,
|
||
|
|
"logps/chosen": -322.0,
|
||
|
|
"logps/rejected": -236.0,
|
||
|
|
"loss": 0.6455,
|
||
|
|
"loss/chosen-sft": 1.234375,
|
||
|
|
"loss/dpo": 0.625,
|
||
|
|
"rewards/accuracies": 0.84375,
|
||
|
|
"rewards/chosen": -0.029052734375,
|
||
|
|
"rewards/margins": 0.1474609375,
|
||
|
|
"rewards/rejected": -0.1767578125,
|
||
|
|
"step": 38
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.17256637168141592,
|
||
|
|
"grad_norm": 17.598873138427734,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.23046875,
|
||
|
|
"logits/rejected": -0.357421875,
|
||
|
|
"logps/chosen": -237.0,
|
||
|
|
"logps/rejected": -253.0,
|
||
|
|
"loss": 0.6367,
|
||
|
|
"loss/chosen-sft": 1.0625,
|
||
|
|
"loss/dpo": 0.64453125,
|
||
|
|
"rewards/accuracies": 0.71875,
|
||
|
|
"rewards/chosen": -0.053955078125,
|
||
|
|
"rewards/margins": 0.10888671875,
|
||
|
|
"rewards/rejected": -0.1630859375,
|
||
|
|
"step": 39
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.17699115044247787,
|
||
|
|
"grad_norm": 4.538353443145752,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.1953125,
|
||
|
|
"logits/rejected": -0.263671875,
|
||
|
|
"logps/chosen": -240.0,
|
||
|
|
"logps/rejected": -290.0,
|
||
|
|
"loss": 0.6475,
|
||
|
|
"loss/chosen-sft": 0.9296875,
|
||
|
|
"loss/dpo": 0.6328125,
|
||
|
|
"rewards/accuracies": 0.71875,
|
||
|
|
"rewards/chosen": -0.0625,
|
||
|
|
"rewards/margins": 0.13671875,
|
||
|
|
"rewards/rejected": -0.19921875,
|
||
|
|
"step": 40
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.18141592920353983,
|
||
|
|
"grad_norm": 17.900062561035156,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.349609375,
|
||
|
|
"logits/rejected": -0.306640625,
|
||
|
|
"logps/chosen": -260.0,
|
||
|
|
"logps/rejected": -284.0,
|
||
|
|
"loss": 0.6538,
|
||
|
|
"loss/chosen-sft": 1.03125,
|
||
|
|
"loss/dpo": 0.65625,
|
||
|
|
"rewards/accuracies": 0.5625,
|
||
|
|
"rewards/chosen": -0.09912109375,
|
||
|
|
"rewards/margins": 0.08154296875,
|
||
|
|
"rewards/rejected": -0.1806640625,
|
||
|
|
"step": 41
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.18584070796460178,
|
||
|
|
"grad_norm": 2.5172457695007324,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.40625,
|
||
|
|
"logits/rejected": -0.38671875,
|
||
|
|
"logps/chosen": -272.0,
|
||
|
|
"logps/rejected": -216.0,
|
||
|
|
"loss": 0.6426,
|
||
|
|
"loss/chosen-sft": 1.234375,
|
||
|
|
"loss/dpo": 0.625,
|
||
|
|
"rewards/accuracies": 0.75,
|
||
|
|
"rewards/chosen": -0.07080078125,
|
||
|
|
"rewards/margins": 0.1708984375,
|
||
|
|
"rewards/rejected": -0.2421875,
|
||
|
|
"step": 42
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.1902654867256637,
|
||
|
|
"grad_norm": 10.608353614807129,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.400390625,
|
||
|
|
"logits/rejected": -0.353515625,
|
||
|
|
"logps/chosen": -240.0,
|
||
|
|
"logps/rejected": -224.0,
|
||
|
|
"loss": 0.6455,
|
||
|
|
"loss/chosen-sft": 1.140625,
|
||
|
|
"loss/dpo": 0.6328125,
|
||
|
|
"rewards/accuracies": 0.71875,
|
||
|
|
"rewards/chosen": -0.0888671875,
|
||
|
|
"rewards/margins": 0.1357421875,
|
||
|
|
"rewards/rejected": -0.224609375,
|
||
|
|
"step": 43
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.19469026548672566,
|
||
|
|
"grad_norm": 17.277069091796875,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.33984375,
|
||
|
|
"logits/rejected": -0.37109375,
|
||
|
|
"logps/chosen": -300.0,
|
||
|
|
"logps/rejected": -215.0,
|
||
|
|
"loss": 0.6396,
|
||
|
|
"loss/chosen-sft": 1.1640625,
|
||
|
|
"loss/dpo": 0.60546875,
|
||
|
|
"rewards/accuracies": 0.75,
|
||
|
|
"rewards/chosen": -0.06005859375,
|
||
|
|
"rewards/margins": 0.2041015625,
|
||
|
|
"rewards/rejected": -0.263671875,
|
||
|
|
"step": 44
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.19911504424778761,
|
||
|
|
"grad_norm": 18.799257278442383,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.0888671875,
|
||
|
|
"logits/rejected": -0.171875,
|
||
|
|
"logps/chosen": -234.0,
|
||
|
|
"logps/rejected": -260.0,
|
||
|
|
"loss": 0.6382,
|
||
|
|
"loss/chosen-sft": 0.890625,
|
||
|
|
"loss/dpo": 0.60546875,
|
||
|
|
"rewards/accuracies": 0.75,
|
||
|
|
"rewards/chosen": -0.08203125,
|
||
|
|
"rewards/margins": 0.2197265625,
|
||
|
|
"rewards/rejected": -0.302734375,
|
||
|
|
"step": 45
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.20353982300884957,
|
||
|
|
"grad_norm": 34.8527717590332,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.3828125,
|
||
|
|
"logits/rejected": -0.46875,
|
||
|
|
"logps/chosen": -246.0,
|
||
|
|
"logps/rejected": -255.0,
|
||
|
|
"loss": 0.6538,
|
||
|
|
"loss/chosen-sft": 1.109375,
|
||
|
|
"loss/dpo": 0.71875,
|
||
|
|
"rewards/accuracies": 0.46875,
|
||
|
|
"rewards/chosen": -0.240234375,
|
||
|
|
"rewards/margins": -0.01708984375,
|
||
|
|
"rewards/rejected": -0.22265625,
|
||
|
|
"step": 46
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2079646017699115,
|
||
|
|
"grad_norm": 6.215828895568848,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.578125,
|
||
|
|
"logits/rejected": -0.49609375,
|
||
|
|
"logps/chosen": -288.0,
|
||
|
|
"logps/rejected": -284.0,
|
||
|
|
"loss": 0.6431,
|
||
|
|
"loss/chosen-sft": 1.25,
|
||
|
|
"loss/dpo": 0.61328125,
|
||
|
|
"rewards/accuracies": 0.75,
|
||
|
|
"rewards/chosen": -0.0625,
|
||
|
|
"rewards/margins": 0.18359375,
|
||
|
|
"rewards/rejected": -0.2470703125,
|
||
|
|
"step": 47
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.21238938053097345,
|
||
|
|
"grad_norm": 2.940314531326294,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.361328125,
|
||
|
|
"logits/rejected": -0.400390625,
|
||
|
|
"logps/chosen": -288.0,
|
||
|
|
"logps/rejected": -214.0,
|
||
|
|
"loss": 0.627,
|
||
|
|
"loss/chosen-sft": 1.171875,
|
||
|
|
"loss/dpo": 0.62109375,
|
||
|
|
"rewards/accuracies": 0.625,
|
||
|
|
"rewards/chosen": -0.1357421875,
|
||
|
|
"rewards/margins": 0.1748046875,
|
||
|
|
"rewards/rejected": -0.310546875,
|
||
|
|
"step": 48
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2168141592920354,
|
||
|
|
"grad_norm": 124.2203598022461,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.2255859375,
|
||
|
|
"logits/rejected": -0.27734375,
|
||
|
|
"logps/chosen": -276.0,
|
||
|
|
"logps/rejected": -280.0,
|
||
|
|
"loss": 0.6323,
|
||
|
|
"loss/chosen-sft": 1.015625,
|
||
|
|
"loss/dpo": 0.61328125,
|
||
|
|
"rewards/accuracies": 0.78125,
|
||
|
|
"rewards/chosen": -0.076171875,
|
||
|
|
"rewards/margins": 0.177734375,
|
||
|
|
"rewards/rejected": -0.25390625,
|
||
|
|
"step": 49
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.22123893805309736,
|
||
|
|
"grad_norm": 49.72818374633789,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.5859375,
|
||
|
|
"logits/rejected": -0.55859375,
|
||
|
|
"logps/chosen": -312.0,
|
||
|
|
"logps/rejected": -266.0,
|
||
|
|
"loss": 0.6343,
|
||
|
|
"loss/chosen-sft": 1.1796875,
|
||
|
|
"loss/dpo": 0.609375,
|
||
|
|
"rewards/accuracies": 0.65625,
|
||
|
|
"rewards/chosen": -0.10791015625,
|
||
|
|
"rewards/margins": 0.2021484375,
|
||
|
|
"rewards/rejected": -0.310546875,
|
||
|
|
"step": 50
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.22566371681415928,
|
||
|
|
"grad_norm": 110.1352310180664,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.44921875,
|
||
|
|
"logits/rejected": -0.431640625,
|
||
|
|
"logps/chosen": -274.0,
|
||
|
|
"logps/rejected": -264.0,
|
||
|
|
"loss": 0.6245,
|
||
|
|
"loss/chosen-sft": 1.171875,
|
||
|
|
"loss/dpo": 0.61328125,
|
||
|
|
"rewards/accuracies": 0.71875,
|
||
|
|
"rewards/chosen": -0.1162109375,
|
||
|
|
"rewards/margins": 0.208984375,
|
||
|
|
"rewards/rejected": -0.32421875,
|
||
|
|
"step": 51
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.23008849557522124,
|
||
|
|
"grad_norm": 14.234803199768066,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.40625,
|
||
|
|
"logits/rejected": -0.28515625,
|
||
|
|
"logps/chosen": -268.0,
|
||
|
|
"logps/rejected": -256.0,
|
||
|
|
"loss": 0.627,
|
||
|
|
"loss/chosen-sft": 1.046875,
|
||
|
|
"loss/dpo": 0.66796875,
|
||
|
|
"rewards/accuracies": 0.5625,
|
||
|
|
"rewards/chosen": -0.220703125,
|
||
|
|
"rewards/margins": 0.0849609375,
|
||
|
|
"rewards/rejected": -0.306640625,
|
||
|
|
"step": 52
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2345132743362832,
|
||
|
|
"grad_norm": 25.839595794677734,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.61328125,
|
||
|
|
"logits/rejected": -0.5390625,
|
||
|
|
"logps/chosen": -302.0,
|
||
|
|
"logps/rejected": -251.0,
|
||
|
|
"loss": 0.6152,
|
||
|
|
"loss/chosen-sft": 1.3203125,
|
||
|
|
"loss/dpo": 0.58203125,
|
||
|
|
"rewards/accuracies": 0.875,
|
||
|
|
"rewards/chosen": -0.0849609375,
|
||
|
|
"rewards/margins": 0.2578125,
|
||
|
|
"rewards/rejected": -0.34375,
|
||
|
|
"step": 53
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.23893805309734514,
|
||
|
|
"grad_norm": 17.5559024810791,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.32421875,
|
||
|
|
"logits/rejected": -0.4140625,
|
||
|
|
"logps/chosen": -241.0,
|
||
|
|
"logps/rejected": -272.0,
|
||
|
|
"loss": 0.6279,
|
||
|
|
"loss/chosen-sft": 1.109375,
|
||
|
|
"loss/dpo": 0.61328125,
|
||
|
|
"rewards/accuracies": 0.6875,
|
||
|
|
"rewards/chosen": -0.173828125,
|
||
|
|
"rewards/margins": 0.21875,
|
||
|
|
"rewards/rejected": -0.392578125,
|
||
|
|
"step": 54
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.24336283185840707,
|
||
|
|
"grad_norm": 4.360561847686768,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.2353515625,
|
||
|
|
"logits/rejected": -0.10107421875,
|
||
|
|
"logps/chosen": -284.0,
|
||
|
|
"logps/rejected": -237.0,
|
||
|
|
"loss": 0.605,
|
||
|
|
"loss/chosen-sft": 1.0546875,
|
||
|
|
"loss/dpo": 0.6015625,
|
||
|
|
"rewards/accuracies": 0.6875,
|
||
|
|
"rewards/chosen": -0.1689453125,
|
||
|
|
"rewards/margins": 0.2353515625,
|
||
|
|
"rewards/rejected": -0.404296875,
|
||
|
|
"step": 55
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.24778761061946902,
|
||
|
|
"grad_norm": 16.581727981567383,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.431640625,
|
||
|
|
"logits/rejected": -0.39453125,
|
||
|
|
"logps/chosen": -340.0,
|
||
|
|
"logps/rejected": -294.0,
|
||
|
|
"loss": 0.6133,
|
||
|
|
"loss/chosen-sft": 1.25,
|
||
|
|
"loss/dpo": 0.6484375,
|
||
|
|
"rewards/accuracies": 0.71875,
|
||
|
|
"rewards/chosen": -0.2177734375,
|
||
|
|
"rewards/margins": 0.1396484375,
|
||
|
|
"rewards/rejected": -0.357421875,
|
||
|
|
"step": 56
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.252212389380531,
|
||
|
|
"grad_norm": 49.88325119018555,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.396484375,
|
||
|
|
"logits/rejected": -0.173828125,
|
||
|
|
"logps/chosen": -226.0,
|
||
|
|
"logps/rejected": -191.0,
|
||
|
|
"loss": 0.6279,
|
||
|
|
"loss/chosen-sft": 1.0546875,
|
||
|
|
"loss/dpo": 0.68359375,
|
||
|
|
"rewards/accuracies": 0.53125,
|
||
|
|
"rewards/chosen": -0.1884765625,
|
||
|
|
"rewards/margins": 0.036865234375,
|
||
|
|
"rewards/rejected": -0.2255859375,
|
||
|
|
"step": 57
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.25663716814159293,
|
||
|
|
"grad_norm": 24.68882179260254,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.37890625,
|
||
|
|
"logits/rejected": -0.28515625,
|
||
|
|
"logps/chosen": -244.0,
|
||
|
|
"logps/rejected": -260.0,
|
||
|
|
"loss": 0.6133,
|
||
|
|
"loss/chosen-sft": 1.078125,
|
||
|
|
"loss/dpo": 0.60546875,
|
||
|
|
"rewards/accuracies": 0.84375,
|
||
|
|
"rewards/chosen": -0.23828125,
|
||
|
|
"rewards/margins": 0.2255859375,
|
||
|
|
"rewards/rejected": -0.46484375,
|
||
|
|
"step": 58
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2610619469026549,
|
||
|
|
"grad_norm": 15.78062915802002,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.294921875,
|
||
|
|
"logits/rejected": -0.375,
|
||
|
|
"logps/chosen": -300.0,
|
||
|
|
"logps/rejected": -255.0,
|
||
|
|
"loss": 0.6138,
|
||
|
|
"loss/chosen-sft": 1.1796875,
|
||
|
|
"loss/dpo": 0.59375,
|
||
|
|
"rewards/accuracies": 0.75,
|
||
|
|
"rewards/chosen": -0.154296875,
|
||
|
|
"rewards/margins": 0.2421875,
|
||
|
|
"rewards/rejected": -0.396484375,
|
||
|
|
"step": 59
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.26548672566371684,
|
||
|
|
"grad_norm": 20.29654884338379,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.318359375,
|
||
|
|
"logits/rejected": -0.376953125,
|
||
|
|
"logps/chosen": -280.0,
|
||
|
|
"logps/rejected": -300.0,
|
||
|
|
"loss": 0.6206,
|
||
|
|
"loss/chosen-sft": 1.2265625,
|
||
|
|
"loss/dpo": 0.6484375,
|
||
|
|
"rewards/accuracies": 0.625,
|
||
|
|
"rewards/chosen": -0.2265625,
|
||
|
|
"rewards/margins": 0.1357421875,
|
||
|
|
"rewards/rejected": -0.361328125,
|
||
|
|
"step": 60
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.26991150442477874,
|
||
|
|
"grad_norm": 43.65996551513672,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.36328125,
|
||
|
|
"logits/rejected": -0.3046875,
|
||
|
|
"logps/chosen": -238.0,
|
||
|
|
"logps/rejected": -278.0,
|
||
|
|
"loss": 0.6255,
|
||
|
|
"loss/chosen-sft": 1.25,
|
||
|
|
"loss/dpo": 0.59375,
|
||
|
|
"rewards/accuracies": 0.71875,
|
||
|
|
"rewards/chosen": -0.1572265625,
|
||
|
|
"rewards/margins": 0.2578125,
|
||
|
|
"rewards/rejected": -0.416015625,
|
||
|
|
"step": 61
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2743362831858407,
|
||
|
|
"grad_norm": 72.5498046875,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.359375,
|
||
|
|
"logits/rejected": -0.373046875,
|
||
|
|
"logps/chosen": -318.0,
|
||
|
|
"logps/rejected": -270.0,
|
||
|
|
"loss": 0.6245,
|
||
|
|
"loss/chosen-sft": 1.1015625,
|
||
|
|
"loss/dpo": 0.59375,
|
||
|
|
"rewards/accuracies": 0.65625,
|
||
|
|
"rewards/chosen": -0.1884765625,
|
||
|
|
"rewards/margins": 0.275390625,
|
||
|
|
"rewards/rejected": -0.46484375,
|
||
|
|
"step": 62
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.27876106194690264,
|
||
|
|
"grad_norm": 99.62593078613281,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.265625,
|
||
|
|
"logits/rejected": -0.33984375,
|
||
|
|
"logps/chosen": -294.0,
|
||
|
|
"logps/rejected": -258.0,
|
||
|
|
"loss": 0.627,
|
||
|
|
"loss/chosen-sft": 1.0703125,
|
||
|
|
"loss/dpo": 0.65234375,
|
||
|
|
"rewards/accuracies": 0.53125,
|
||
|
|
"rewards/chosen": -0.267578125,
|
||
|
|
"rewards/margins": 0.1328125,
|
||
|
|
"rewards/rejected": -0.400390625,
|
||
|
|
"step": 63
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2831858407079646,
|
||
|
|
"grad_norm": 71.84874725341797,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.359375,
|
||
|
|
"logits/rejected": -0.392578125,
|
||
|
|
"logps/chosen": -260.0,
|
||
|
|
"logps/rejected": -219.0,
|
||
|
|
"loss": 0.6138,
|
||
|
|
"loss/chosen-sft": 1.171875,
|
||
|
|
"loss/dpo": 0.671875,
|
||
|
|
"rewards/accuracies": 0.5,
|
||
|
|
"rewards/chosen": -0.248046875,
|
||
|
|
"rewards/margins": 0.087890625,
|
||
|
|
"rewards/rejected": -0.3359375,
|
||
|
|
"step": 64
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.28761061946902655,
|
||
|
|
"grad_norm": 43.867332458496094,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.494140625,
|
||
|
|
"logits/rejected": -0.38671875,
|
||
|
|
"logps/chosen": -298.0,
|
||
|
|
"logps/rejected": -276.0,
|
||
|
|
"loss": 0.6162,
|
||
|
|
"loss/chosen-sft": 1.234375,
|
||
|
|
"loss/dpo": 0.6171875,
|
||
|
|
"rewards/accuracies": 0.65625,
|
||
|
|
"rewards/chosen": -0.251953125,
|
||
|
|
"rewards/margins": 0.2021484375,
|
||
|
|
"rewards/rejected": -0.455078125,
|
||
|
|
"step": 65
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.2920353982300885,
|
||
|
|
"grad_norm": 15.392335891723633,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.275390625,
|
||
|
|
"logits/rejected": -0.1953125,
|
||
|
|
"logps/chosen": -241.0,
|
||
|
|
"logps/rejected": -234.0,
|
||
|
|
"loss": 0.6211,
|
||
|
|
"loss/chosen-sft": 1.1953125,
|
||
|
|
"loss/dpo": 0.703125,
|
||
|
|
"rewards/accuracies": 0.5625,
|
||
|
|
"rewards/chosen": -0.251953125,
|
||
|
|
"rewards/margins": 0.017578125,
|
||
|
|
"rewards/rejected": -0.26953125,
|
||
|
|
"step": 66
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.29646017699115046,
|
||
|
|
"grad_norm": 17.315120697021484,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.271484375,
|
||
|
|
"logits/rejected": -0.3828125,
|
||
|
|
"logps/chosen": -268.0,
|
||
|
|
"logps/rejected": -292.0,
|
||
|
|
"loss": 0.6226,
|
||
|
|
"loss/chosen-sft": 1.0078125,
|
||
|
|
"loss/dpo": 0.6171875,
|
||
|
|
"rewards/accuracies": 0.6875,
|
||
|
|
"rewards/chosen": -0.30078125,
|
||
|
|
"rewards/margins": 0.203125,
|
||
|
|
"rewards/rejected": -0.50390625,
|
||
|
|
"step": 67
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3008849557522124,
|
||
|
|
"grad_norm": 17.68255615234375,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.369140625,
|
||
|
|
"logits/rejected": -0.50390625,
|
||
|
|
"logps/chosen": -266.0,
|
||
|
|
"logps/rejected": -248.0,
|
||
|
|
"loss": 0.6143,
|
||
|
|
"loss/chosen-sft": 1.109375,
|
||
|
|
"loss/dpo": 0.58203125,
|
||
|
|
"rewards/accuracies": 0.71875,
|
||
|
|
"rewards/chosen": -0.138671875,
|
||
|
|
"rewards/margins": 0.3046875,
|
||
|
|
"rewards/rejected": -0.443359375,
|
||
|
|
"step": 68
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3053097345132743,
|
||
|
|
"grad_norm": 31.16883087158203,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.373046875,
|
||
|
|
"logits/rejected": -0.369140625,
|
||
|
|
"logps/chosen": -256.0,
|
||
|
|
"logps/rejected": -235.0,
|
||
|
|
"loss": 0.6191,
|
||
|
|
"loss/chosen-sft": 1.15625,
|
||
|
|
"loss/dpo": 0.59765625,
|
||
|
|
"rewards/accuracies": 0.65625,
|
||
|
|
"rewards/chosen": -0.1884765625,
|
||
|
|
"rewards/margins": 0.279296875,
|
||
|
|
"rewards/rejected": -0.46875,
|
||
|
|
"step": 69
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.30973451327433627,
|
||
|
|
"grad_norm": 65.01067352294922,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.423828125,
|
||
|
|
"logits/rejected": -0.5,
|
||
|
|
"logps/chosen": -272.0,
|
||
|
|
"logps/rejected": -244.0,
|
||
|
|
"loss": 0.5884,
|
||
|
|
"loss/chosen-sft": 1.21875,
|
||
|
|
"loss/dpo": 0.6015625,
|
||
|
|
"rewards/accuracies": 0.625,
|
||
|
|
"rewards/chosen": -0.1787109375,
|
||
|
|
"rewards/margins": 0.251953125,
|
||
|
|
"rewards/rejected": -0.431640625,
|
||
|
|
"step": 70
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3141592920353982,
|
||
|
|
"grad_norm": 9.247684478759766,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.412109375,
|
||
|
|
"logits/rejected": -0.421875,
|
||
|
|
"logps/chosen": -284.0,
|
||
|
|
"logps/rejected": -245.0,
|
||
|
|
"loss": 0.605,
|
||
|
|
"loss/chosen-sft": 1.203125,
|
||
|
|
"loss/dpo": 0.671875,
|
||
|
|
"rewards/accuracies": 0.5625,
|
||
|
|
"rewards/chosen": -0.2216796875,
|
||
|
|
"rewards/margins": 0.1396484375,
|
||
|
|
"rewards/rejected": -0.361328125,
|
||
|
|
"step": 71
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3185840707964602,
|
||
|
|
"grad_norm": 29.392963409423828,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.46875,
|
||
|
|
"logits/rejected": -0.462890625,
|
||
|
|
"logps/chosen": -346.0,
|
||
|
|
"logps/rejected": -292.0,
|
||
|
|
"loss": 0.5928,
|
||
|
|
"loss/chosen-sft": 1.25,
|
||
|
|
"loss/dpo": 0.64453125,
|
||
|
|
"rewards/accuracies": 0.65625,
|
||
|
|
"rewards/chosen": -0.30078125,
|
||
|
|
"rewards/margins": 0.1650390625,
|
||
|
|
"rewards/rejected": -0.46484375,
|
||
|
|
"step": 72
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3230088495575221,
|
||
|
|
"grad_norm": 12.59981918334961,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.6171875,
|
||
|
|
"logits/rejected": -0.41796875,
|
||
|
|
"logps/chosen": -296.0,
|
||
|
|
"logps/rejected": -253.0,
|
||
|
|
"loss": 0.6255,
|
||
|
|
"loss/chosen-sft": 1.2734375,
|
||
|
|
"loss/dpo": 0.62109375,
|
||
|
|
"rewards/accuracies": 0.8125,
|
||
|
|
"rewards/chosen": -0.228515625,
|
||
|
|
"rewards/margins": 0.1884765625,
|
||
|
|
"rewards/rejected": -0.416015625,
|
||
|
|
"step": 73
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3274336283185841,
|
||
|
|
"grad_norm": 27.949209213256836,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.5,
|
||
|
|
"logits/rejected": -0.4921875,
|
||
|
|
"logps/chosen": -272.0,
|
||
|
|
"logps/rejected": -218.0,
|
||
|
|
"loss": 0.5723,
|
||
|
|
"loss/chosen-sft": 1.1171875,
|
||
|
|
"loss/dpo": 0.62109375,
|
||
|
|
"rewards/accuracies": 0.75,
|
||
|
|
"rewards/chosen": -0.25390625,
|
||
|
|
"rewards/margins": 0.1806640625,
|
||
|
|
"rewards/rejected": -0.435546875,
|
||
|
|
"step": 74
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.33185840707964603,
|
||
|
|
"grad_norm": 42.84572219848633,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.54296875,
|
||
|
|
"logits/rejected": -0.5546875,
|
||
|
|
"logps/chosen": -268.0,
|
||
|
|
"logps/rejected": -222.0,
|
||
|
|
"loss": 0.6099,
|
||
|
|
"loss/chosen-sft": 1.265625,
|
||
|
|
"loss/dpo": 0.6015625,
|
||
|
|
"rewards/accuracies": 0.625,
|
||
|
|
"rewards/chosen": -0.1572265625,
|
||
|
|
"rewards/margins": 0.2392578125,
|
||
|
|
"rewards/rejected": -0.396484375,
|
||
|
|
"step": 75
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.336283185840708,
|
||
|
|
"grad_norm": 58.05986404418945,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.5390625,
|
||
|
|
"logits/rejected": -0.51171875,
|
||
|
|
"logps/chosen": -206.0,
|
||
|
|
"logps/rejected": -262.0,
|
||
|
|
"loss": 0.6021,
|
||
|
|
"loss/chosen-sft": 1.0546875,
|
||
|
|
"loss/dpo": 0.58984375,
|
||
|
|
"rewards/accuracies": 0.8125,
|
||
|
|
"rewards/chosen": -0.1728515625,
|
||
|
|
"rewards/margins": 0.24609375,
|
||
|
|
"rewards/rejected": -0.41796875,
|
||
|
|
"step": 76
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3407079646017699,
|
||
|
|
"grad_norm": 31.465473175048828,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.423828125,
|
||
|
|
"logits/rejected": -0.1796875,
|
||
|
|
"logps/chosen": -264.0,
|
||
|
|
"logps/rejected": -286.0,
|
||
|
|
"loss": 0.5967,
|
||
|
|
"loss/chosen-sft": 1.0703125,
|
||
|
|
"loss/dpo": 0.62890625,
|
||
|
|
"rewards/accuracies": 0.65625,
|
||
|
|
"rewards/chosen": -0.31640625,
|
||
|
|
"rewards/margins": 0.19921875,
|
||
|
|
"rewards/rejected": -0.515625,
|
||
|
|
"step": 77
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.34513274336283184,
|
||
|
|
"grad_norm": 57.08030700683594,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.578125,
|
||
|
|
"logits/rejected": -0.326171875,
|
||
|
|
"logps/chosen": -304.0,
|
||
|
|
"logps/rejected": -240.0,
|
||
|
|
"loss": 0.6187,
|
||
|
|
"loss/chosen-sft": 1.328125,
|
||
|
|
"loss/dpo": 0.59375,
|
||
|
|
"rewards/accuracies": 0.8125,
|
||
|
|
"rewards/chosen": -0.22265625,
|
||
|
|
"rewards/margins": 0.267578125,
|
||
|
|
"rewards/rejected": -0.490234375,
|
||
|
|
"step": 78
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3495575221238938,
|
||
|
|
"grad_norm": 29.671070098876953,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.48046875,
|
||
|
|
"logits/rejected": -0.59765625,
|
||
|
|
"logps/chosen": -292.0,
|
||
|
|
"logps/rejected": -256.0,
|
||
|
|
"loss": 0.6147,
|
||
|
|
"loss/chosen-sft": 1.2265625,
|
||
|
|
"loss/dpo": 0.55859375,
|
||
|
|
"rewards/accuracies": 0.78125,
|
||
|
|
"rewards/chosen": -0.1416015625,
|
||
|
|
"rewards/margins": 0.33984375,
|
||
|
|
"rewards/rejected": -0.48046875,
|
||
|
|
"step": 79
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.35398230088495575,
|
||
|
|
"grad_norm": 48.64991760253906,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.59375,
|
||
|
|
"logits/rejected": -0.6328125,
|
||
|
|
"logps/chosen": -288.0,
|
||
|
|
"logps/rejected": -264.0,
|
||
|
|
"loss": 0.6309,
|
||
|
|
"loss/chosen-sft": 1.2578125,
|
||
|
|
"loss/dpo": 0.6484375,
|
||
|
|
"rewards/accuracies": 0.65625,
|
||
|
|
"rewards/chosen": -0.33984375,
|
||
|
|
"rewards/margins": 0.162109375,
|
||
|
|
"rewards/rejected": -0.50390625,
|
||
|
|
"step": 80
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3584070796460177,
|
||
|
|
"grad_norm": 44.117034912109375,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.49609375,
|
||
|
|
"logits/rejected": -0.51953125,
|
||
|
|
"logps/chosen": -252.0,
|
||
|
|
"logps/rejected": -262.0,
|
||
|
|
"loss": 0.5845,
|
||
|
|
"loss/chosen-sft": 1.0859375,
|
||
|
|
"loss/dpo": 0.55078125,
|
||
|
|
"rewards/accuracies": 0.78125,
|
||
|
|
"rewards/chosen": -0.2412109375,
|
||
|
|
"rewards/margins": 0.3828125,
|
||
|
|
"rewards/rejected": -0.625,
|
||
|
|
"step": 81
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.36283185840707965,
|
||
|
|
"grad_norm": 45.00334167480469,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.486328125,
|
||
|
|
"logits/rejected": -0.5390625,
|
||
|
|
"logps/chosen": -294.0,
|
||
|
|
"logps/rejected": -288.0,
|
||
|
|
"loss": 0.6045,
|
||
|
|
"loss/chosen-sft": 1.21875,
|
||
|
|
"loss/dpo": 0.578125,
|
||
|
|
"rewards/accuracies": 0.71875,
|
||
|
|
"rewards/chosen": -0.287109375,
|
||
|
|
"rewards/margins": 0.328125,
|
||
|
|
"rewards/rejected": -0.6171875,
|
||
|
|
"step": 82
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3672566371681416,
|
||
|
|
"grad_norm": 6.153012752532959,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.427734375,
|
||
|
|
"logits/rejected": -0.427734375,
|
||
|
|
"logps/chosen": -272.0,
|
||
|
|
"logps/rejected": -278.0,
|
||
|
|
"loss": 0.582,
|
||
|
|
"loss/chosen-sft": 1.0546875,
|
||
|
|
"loss/dpo": 0.5625,
|
||
|
|
"rewards/accuracies": 0.78125,
|
||
|
|
"rewards/chosen": -0.1689453125,
|
||
|
|
"rewards/margins": 0.357421875,
|
||
|
|
"rewards/rejected": -0.52734375,
|
||
|
|
"step": 83
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.37168141592920356,
|
||
|
|
"grad_norm": 4.050904273986816,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.349609375,
|
||
|
|
"logits/rejected": -0.3984375,
|
||
|
|
"logps/chosen": -288.0,
|
||
|
|
"logps/rejected": -274.0,
|
||
|
|
"loss": 0.603,
|
||
|
|
"loss/chosen-sft": 1.1796875,
|
||
|
|
"loss/dpo": 0.62109375,
|
||
|
|
"rewards/accuracies": 0.65625,
|
||
|
|
"rewards/chosen": -0.28515625,
|
||
|
|
"rewards/margins": 0.2333984375,
|
||
|
|
"rewards/rejected": -0.51953125,
|
||
|
|
"step": 84
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.37610619469026546,
|
||
|
|
"grad_norm": 53.353515625,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.455078125,
|
||
|
|
"logits/rejected": -0.462890625,
|
||
|
|
"logps/chosen": -278.0,
|
||
|
|
"logps/rejected": -227.0,
|
||
|
|
"loss": 0.6323,
|
||
|
|
"loss/chosen-sft": 1.109375,
|
||
|
|
"loss/dpo": 0.609375,
|
||
|
|
"rewards/accuracies": 0.6875,
|
||
|
|
"rewards/chosen": -0.24609375,
|
||
|
|
"rewards/margins": 0.2373046875,
|
||
|
|
"rewards/rejected": -0.482421875,
|
||
|
|
"step": 85
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3805309734513274,
|
||
|
|
"grad_norm": 63.98937225341797,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.400390625,
|
||
|
|
"logits/rejected": -0.4609375,
|
||
|
|
"logps/chosen": -300.0,
|
||
|
|
"logps/rejected": -340.0,
|
||
|
|
"loss": 0.6182,
|
||
|
|
"loss/chosen-sft": 1.046875,
|
||
|
|
"loss/dpo": 0.62890625,
|
||
|
|
"rewards/accuracies": 0.65625,
|
||
|
|
"rewards/chosen": -0.349609375,
|
||
|
|
"rewards/margins": 0.1904296875,
|
||
|
|
"rewards/rejected": -0.5390625,
|
||
|
|
"step": 86
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.38495575221238937,
|
||
|
|
"grad_norm": 22.751070022583008,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.416015625,
|
||
|
|
"logits/rejected": -0.47265625,
|
||
|
|
"logps/chosen": -270.0,
|
||
|
|
"logps/rejected": -234.0,
|
||
|
|
"loss": 0.5938,
|
||
|
|
"loss/chosen-sft": 1.109375,
|
||
|
|
"loss/dpo": 0.6015625,
|
||
|
|
"rewards/accuracies": 0.78125,
|
||
|
|
"rewards/chosen": -0.203125,
|
||
|
|
"rewards/margins": 0.2734375,
|
||
|
|
"rewards/rejected": -0.4765625,
|
||
|
|
"step": 87
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3893805309734513,
|
||
|
|
"grad_norm": 15.500909805297852,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.42578125,
|
||
|
|
"logits/rejected": -0.51171875,
|
||
|
|
"logps/chosen": -220.0,
|
||
|
|
"logps/rejected": -222.0,
|
||
|
|
"loss": 0.5884,
|
||
|
|
"loss/chosen-sft": 1.3828125,
|
||
|
|
"loss/dpo": 0.69140625,
|
||
|
|
"rewards/accuracies": 0.625,
|
||
|
|
"rewards/chosen": -0.33203125,
|
||
|
|
"rewards/margins": 0.0634765625,
|
||
|
|
"rewards/rejected": -0.396484375,
|
||
|
|
"step": 88
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.3938053097345133,
|
||
|
|
"grad_norm": 21.14480209350586,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.50390625,
|
||
|
|
"logits/rejected": -0.4921875,
|
||
|
|
"logps/chosen": -282.0,
|
||
|
|
"logps/rejected": -223.0,
|
||
|
|
"loss": 0.584,
|
||
|
|
"loss/chosen-sft": 1.296875,
|
||
|
|
"loss/dpo": 0.65234375,
|
||
|
|
"rewards/accuracies": 0.5625,
|
||
|
|
"rewards/chosen": -0.27734375,
|
||
|
|
"rewards/margins": 0.1875,
|
||
|
|
"rewards/rejected": -0.466796875,
|
||
|
|
"step": 89
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.39823008849557523,
|
||
|
|
"grad_norm": 14.2146635055542,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.447265625,
|
||
|
|
"logits/rejected": -0.392578125,
|
||
|
|
"logps/chosen": -268.0,
|
||
|
|
"logps/rejected": -215.0,
|
||
|
|
"loss": 0.6104,
|
||
|
|
"loss/chosen-sft": 1.09375,
|
||
|
|
"loss/dpo": 0.65625,
|
||
|
|
"rewards/accuracies": 0.53125,
|
||
|
|
"rewards/chosen": -0.359375,
|
||
|
|
"rewards/margins": 0.1923828125,
|
||
|
|
"rewards/rejected": -0.55078125,
|
||
|
|
"step": 90
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4026548672566372,
|
||
|
|
"grad_norm": 76.13188171386719,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.478515625,
|
||
|
|
"logits/rejected": -0.462890625,
|
||
|
|
"logps/chosen": -270.0,
|
||
|
|
"logps/rejected": -280.0,
|
||
|
|
"loss": 0.5781,
|
||
|
|
"loss/chosen-sft": 1.2109375,
|
||
|
|
"loss/dpo": 0.59765625,
|
||
|
|
"rewards/accuracies": 0.65625,
|
||
|
|
"rewards/chosen": -0.2421875,
|
||
|
|
"rewards/margins": 0.2431640625,
|
||
|
|
"rewards/rejected": -0.486328125,
|
||
|
|
"step": 91
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.40707964601769914,
|
||
|
|
"grad_norm": 34.39772033691406,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.5625,
|
||
|
|
"logits/rejected": -0.4921875,
|
||
|
|
"logps/chosen": -308.0,
|
||
|
|
"logps/rejected": -294.0,
|
||
|
|
"loss": 0.5698,
|
||
|
|
"loss/chosen-sft": 1.3203125,
|
||
|
|
"loss/dpo": 0.52734375,
|
||
|
|
"rewards/accuracies": 0.75,
|
||
|
|
"rewards/chosen": -0.2275390625,
|
||
|
|
"rewards/margins": 0.49609375,
|
||
|
|
"rewards/rejected": -0.7265625,
|
||
|
|
"step": 92
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.41150442477876104,
|
||
|
|
"grad_norm": 36.51502227783203,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.4296875,
|
||
|
|
"logits/rejected": -0.51953125,
|
||
|
|
"logps/chosen": -278.0,
|
||
|
|
"logps/rejected": -336.0,
|
||
|
|
"loss": 0.5869,
|
||
|
|
"loss/chosen-sft": 1.15625,
|
||
|
|
"loss/dpo": 0.57421875,
|
||
|
|
"rewards/accuracies": 0.78125,
|
||
|
|
"rewards/chosen": -0.29296875,
|
||
|
|
"rewards/margins": 0.314453125,
|
||
|
|
"rewards/rejected": -0.60546875,
|
||
|
|
"step": 93
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.415929203539823,
|
||
|
|
"grad_norm": 41.33882141113281,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.291015625,
|
||
|
|
"logits/rejected": -0.26171875,
|
||
|
|
"logps/chosen": -255.0,
|
||
|
|
"logps/rejected": -251.0,
|
||
|
|
"loss": 0.5908,
|
||
|
|
"loss/chosen-sft": 1.171875,
|
||
|
|
"loss/dpo": 0.6171875,
|
||
|
|
"rewards/accuracies": 0.65625,
|
||
|
|
"rewards/chosen": -0.310546875,
|
||
|
|
"rewards/margins": 0.232421875,
|
||
|
|
"rewards/rejected": -0.54296875,
|
||
|
|
"step": 94
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.42035398230088494,
|
||
|
|
"grad_norm": 25.335350036621094,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.2578125,
|
||
|
|
"logits/rejected": -0.26953125,
|
||
|
|
"logps/chosen": -234.0,
|
||
|
|
"logps/rejected": -312.0,
|
||
|
|
"loss": 0.5811,
|
||
|
|
"loss/chosen-sft": 1.03125,
|
||
|
|
"loss/dpo": 0.5546875,
|
||
|
|
"rewards/accuracies": 0.8125,
|
||
|
|
"rewards/chosen": -0.298828125,
|
||
|
|
"rewards/margins": 0.41796875,
|
||
|
|
"rewards/rejected": -0.71875,
|
||
|
|
"step": 95
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4247787610619469,
|
||
|
|
"grad_norm": 14.678760528564453,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.46484375,
|
||
|
|
"logits/rejected": -0.439453125,
|
||
|
|
"logps/chosen": -304.0,
|
||
|
|
"logps/rejected": -248.0,
|
||
|
|
"loss": 0.6094,
|
||
|
|
"loss/chosen-sft": 1.2265625,
|
||
|
|
"loss/dpo": 0.6484375,
|
||
|
|
"rewards/accuracies": 0.625,
|
||
|
|
"rewards/chosen": -0.287109375,
|
||
|
|
"rewards/margins": 0.1328125,
|
||
|
|
"rewards/rejected": -0.41796875,
|
||
|
|
"step": 96
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.42920353982300885,
|
||
|
|
"grad_norm": 12.859259605407715,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.50390625,
|
||
|
|
"logits/rejected": -0.4765625,
|
||
|
|
"logps/chosen": -255.0,
|
||
|
|
"logps/rejected": -226.0,
|
||
|
|
"loss": 0.5938,
|
||
|
|
"loss/chosen-sft": 1.1953125,
|
||
|
|
"loss/dpo": 0.64453125,
|
||
|
|
"rewards/accuracies": 0.625,
|
||
|
|
"rewards/chosen": -0.353515625,
|
||
|
|
"rewards/margins": 0.15625,
|
||
|
|
"rewards/rejected": -0.51171875,
|
||
|
|
"step": 97
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4336283185840708,
|
||
|
|
"grad_norm": 9.326794624328613,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.4765625,
|
||
|
|
"logits/rejected": -0.447265625,
|
||
|
|
"logps/chosen": -272.0,
|
||
|
|
"logps/rejected": -268.0,
|
||
|
|
"loss": 0.5752,
|
||
|
|
"loss/chosen-sft": 1.1640625,
|
||
|
|
"loss/dpo": 0.54296875,
|
||
|
|
"rewards/accuracies": 0.71875,
|
||
|
|
"rewards/chosen": -0.291015625,
|
||
|
|
"rewards/margins": 0.4765625,
|
||
|
|
"rewards/rejected": -0.76953125,
|
||
|
|
"step": 98
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.43805309734513276,
|
||
|
|
"grad_norm": 10.561424255371094,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.5625,
|
||
|
|
"logits/rejected": -0.5234375,
|
||
|
|
"logps/chosen": -304.0,
|
||
|
|
"logps/rejected": -282.0,
|
||
|
|
"loss": 0.5786,
|
||
|
|
"loss/chosen-sft": 1.1640625,
|
||
|
|
"loss/dpo": 0.578125,
|
||
|
|
"rewards/accuracies": 0.78125,
|
||
|
|
"rewards/chosen": -0.390625,
|
||
|
|
"rewards/margins": 0.353515625,
|
||
|
|
"rewards/rejected": -0.7421875,
|
||
|
|
"step": 99
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4424778761061947,
|
||
|
|
"grad_norm": 102.05797576904297,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.44921875,
|
||
|
|
"logits/rejected": -0.423828125,
|
||
|
|
"logps/chosen": -235.0,
|
||
|
|
"logps/rejected": -284.0,
|
||
|
|
"loss": 0.6079,
|
||
|
|
"loss/chosen-sft": 1.1171875,
|
||
|
|
"loss/dpo": 0.65234375,
|
||
|
|
"rewards/accuracies": 0.75,
|
||
|
|
"rewards/chosen": -0.423828125,
|
||
|
|
"rewards/margins": 0.236328125,
|
||
|
|
"rewards/rejected": -0.66015625,
|
||
|
|
"step": 100
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4469026548672566,
|
||
|
|
"grad_norm": 10.166366577148438,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.51953125,
|
||
|
|
"logits/rejected": -0.431640625,
|
||
|
|
"logps/chosen": -342.0,
|
||
|
|
"logps/rejected": -302.0,
|
||
|
|
"loss": 0.5864,
|
||
|
|
"loss/chosen-sft": 1.1953125,
|
||
|
|
"loss/dpo": 0.53515625,
|
||
|
|
"rewards/accuracies": 0.8125,
|
||
|
|
"rewards/chosen": -0.333984375,
|
||
|
|
"rewards/margins": 0.5,
|
||
|
|
"rewards/rejected": -0.8359375,
|
||
|
|
"step": 101
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.45132743362831856,
|
||
|
|
"grad_norm": 9.208560943603516,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.373046875,
|
||
|
|
"logits/rejected": -0.3359375,
|
||
|
|
"logps/chosen": -332.0,
|
||
|
|
"logps/rejected": -326.0,
|
||
|
|
"loss": 0.584,
|
||
|
|
"loss/chosen-sft": 1.1640625,
|
||
|
|
"loss/dpo": 0.5390625,
|
||
|
|
"rewards/accuracies": 0.75,
|
||
|
|
"rewards/chosen": -0.30078125,
|
||
|
|
"rewards/margins": 0.435546875,
|
||
|
|
"rewards/rejected": -0.73828125,
|
||
|
|
"step": 102
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4557522123893805,
|
||
|
|
"grad_norm": 58.12112045288086,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.46484375,
|
||
|
|
"logits/rejected": -0.486328125,
|
||
|
|
"logps/chosen": -298.0,
|
||
|
|
"logps/rejected": -300.0,
|
||
|
|
"loss": 0.6191,
|
||
|
|
"loss/chosen-sft": 1.1640625,
|
||
|
|
"loss/dpo": 0.58984375,
|
||
|
|
"rewards/accuracies": 0.6875,
|
||
|
|
"rewards/chosen": -0.365234375,
|
||
|
|
"rewards/margins": 0.40625,
|
||
|
|
"rewards/rejected": -0.76953125,
|
||
|
|
"step": 103
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.46017699115044247,
|
||
|
|
"grad_norm": 32.06050491333008,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.51953125,
|
||
|
|
"logits/rejected": -0.49609375,
|
||
|
|
"logps/chosen": -376.0,
|
||
|
|
"logps/rejected": -346.0,
|
||
|
|
"loss": 0.5581,
|
||
|
|
"loss/chosen-sft": 1.2265625,
|
||
|
|
"loss/dpo": 0.55078125,
|
||
|
|
"rewards/accuracies": 0.8125,
|
||
|
|
"rewards/chosen": -0.35546875,
|
||
|
|
"rewards/margins": 0.46875,
|
||
|
|
"rewards/rejected": -0.82421875,
|
||
|
|
"step": 104
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4646017699115044,
|
||
|
|
"grad_norm": 66.27430725097656,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.578125,
|
||
|
|
"logits/rejected": -0.53125,
|
||
|
|
"logps/chosen": -340.0,
|
||
|
|
"logps/rejected": -284.0,
|
||
|
|
"loss": 0.5674,
|
||
|
|
"loss/chosen-sft": 1.2734375,
|
||
|
|
"loss/dpo": 0.5390625,
|
||
|
|
"rewards/accuracies": 0.78125,
|
||
|
|
"rewards/chosen": -0.37890625,
|
||
|
|
"rewards/margins": 0.427734375,
|
||
|
|
"rewards/rejected": -0.80859375,
|
||
|
|
"step": 105
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4690265486725664,
|
||
|
|
"grad_norm": 43.450740814208984,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.369140625,
|
||
|
|
"logits/rejected": -0.443359375,
|
||
|
|
"logps/chosen": -328.0,
|
||
|
|
"logps/rejected": -310.0,
|
||
|
|
"loss": 0.5688,
|
||
|
|
"loss/chosen-sft": 1.1640625,
|
||
|
|
"loss/dpo": 0.59765625,
|
||
|
|
"rewards/accuracies": 0.625,
|
||
|
|
"rewards/chosen": -0.404296875,
|
||
|
|
"rewards/margins": 0.318359375,
|
||
|
|
"rewards/rejected": -0.72265625,
|
||
|
|
"step": 106
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.47345132743362833,
|
||
|
|
"grad_norm": 48.25244903564453,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.4765625,
|
||
|
|
"logits/rejected": -0.43359375,
|
||
|
|
"logps/chosen": -320.0,
|
||
|
|
"logps/rejected": -260.0,
|
||
|
|
"loss": 0.5781,
|
||
|
|
"loss/chosen-sft": 1.2421875,
|
||
|
|
"loss/dpo": 0.62890625,
|
||
|
|
"rewards/accuracies": 0.625,
|
||
|
|
"rewards/chosen": -0.5234375,
|
||
|
|
"rewards/margins": 0.283203125,
|
||
|
|
"rewards/rejected": -0.8046875,
|
||
|
|
"step": 107
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4778761061946903,
|
||
|
|
"grad_norm": 26.64389419555664,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.427734375,
|
||
|
|
"logits/rejected": -0.41796875,
|
||
|
|
"logps/chosen": -262.0,
|
||
|
|
"logps/rejected": -240.0,
|
||
|
|
"loss": 0.564,
|
||
|
|
"loss/chosen-sft": 1.1015625,
|
||
|
|
"loss/dpo": 0.625,
|
||
|
|
"rewards/accuracies": 0.625,
|
||
|
|
"rewards/chosen": -0.357421875,
|
||
|
|
"rewards/margins": 0.216796875,
|
||
|
|
"rewards/rejected": -0.57421875,
|
||
|
|
"step": 108
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4823008849557522,
|
||
|
|
"grad_norm": 51.666202545166016,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.671875,
|
||
|
|
"logits/rejected": -0.734375,
|
||
|
|
"logps/chosen": -372.0,
|
||
|
|
"logps/rejected": -340.0,
|
||
|
|
"loss": 0.564,
|
||
|
|
"loss/chosen-sft": 1.3125,
|
||
|
|
"loss/dpo": 0.5234375,
|
||
|
|
"rewards/accuracies": 0.71875,
|
||
|
|
"rewards/chosen": -0.35546875,
|
||
|
|
"rewards/margins": 0.5859375,
|
||
|
|
"rewards/rejected": -0.94140625,
|
||
|
|
"step": 109
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.48672566371681414,
|
||
|
|
"grad_norm": 14.793038368225098,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.490234375,
|
||
|
|
"logits/rejected": -0.57421875,
|
||
|
|
"logps/chosen": -288.0,
|
||
|
|
"logps/rejected": -262.0,
|
||
|
|
"loss": 0.564,
|
||
|
|
"loss/chosen-sft": 1.2890625,
|
||
|
|
"loss/dpo": 0.63671875,
|
||
|
|
"rewards/accuracies": 0.6875,
|
||
|
|
"rewards/chosen": -0.53125,
|
||
|
|
"rewards/margins": 0.232421875,
|
||
|
|
"rewards/rejected": -0.765625,
|
||
|
|
"step": 110
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.4911504424778761,
|
||
|
|
"grad_norm": 54.62705993652344,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.51171875,
|
||
|
|
"logits/rejected": -0.59765625,
|
||
|
|
"logps/chosen": -312.0,
|
||
|
|
"logps/rejected": -286.0,
|
||
|
|
"loss": 0.5803,
|
||
|
|
"loss/chosen-sft": 1.21875,
|
||
|
|
"loss/dpo": 0.53125,
|
||
|
|
"rewards/accuracies": 0.71875,
|
||
|
|
"rewards/chosen": -0.466796875,
|
||
|
|
"rewards/margins": 0.515625,
|
||
|
|
"rewards/rejected": -0.98046875,
|
||
|
|
"step": 111
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.49557522123893805,
|
||
|
|
"grad_norm": 45.08600997924805,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.53125,
|
||
|
|
"logits/rejected": -0.65625,
|
||
|
|
"logps/chosen": -306.0,
|
||
|
|
"logps/rejected": -314.0,
|
||
|
|
"loss": 0.5442,
|
||
|
|
"loss/chosen-sft": 1.1953125,
|
||
|
|
"loss/dpo": 0.57421875,
|
||
|
|
"rewards/accuracies": 0.75,
|
||
|
|
"rewards/chosen": -0.435546875,
|
||
|
|
"rewards/margins": 0.34375,
|
||
|
|
"rewards/rejected": -0.78125,
|
||
|
|
"step": 112
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5,
|
||
|
|
"grad_norm": 29.89005470275879,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.5234375,
|
||
|
|
"logits/rejected": -0.5078125,
|
||
|
|
"logps/chosen": -282.0,
|
||
|
|
"logps/rejected": -242.0,
|
||
|
|
"loss": 0.5535,
|
||
|
|
"loss/chosen-sft": 1.1015625,
|
||
|
|
"loss/dpo": 0.63671875,
|
||
|
|
"rewards/accuracies": 0.59375,
|
||
|
|
"rewards/chosen": -0.43359375,
|
||
|
|
"rewards/margins": 0.21484375,
|
||
|
|
"rewards/rejected": -0.6484375,
|
||
|
|
"step": 113
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.504424778761062,
|
||
|
|
"grad_norm": 5.516697406768799,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.376953125,
|
||
|
|
"logits/rejected": -0.423828125,
|
||
|
|
"logps/chosen": -249.0,
|
||
|
|
"logps/rejected": -298.0,
|
||
|
|
"loss": 0.5713,
|
||
|
|
"loss/chosen-sft": 1.125,
|
||
|
|
"loss/dpo": 0.51953125,
|
||
|
|
"rewards/accuracies": 0.75,
|
||
|
|
"rewards/chosen": -0.298828125,
|
||
|
|
"rewards/margins": 0.48046875,
|
||
|
|
"rewards/rejected": -0.77734375,
|
||
|
|
"step": 114
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5088495575221239,
|
||
|
|
"grad_norm": 42.4152946472168,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.5625,
|
||
|
|
"logits/rejected": -0.48046875,
|
||
|
|
"logps/chosen": -316.0,
|
||
|
|
"logps/rejected": -312.0,
|
||
|
|
"loss": 0.5811,
|
||
|
|
"loss/chosen-sft": 1.2265625,
|
||
|
|
"loss/dpo": 0.60546875,
|
||
|
|
"rewards/accuracies": 0.59375,
|
||
|
|
"rewards/chosen": -0.59765625,
|
||
|
|
"rewards/margins": 0.29296875,
|
||
|
|
"rewards/rejected": -0.890625,
|
||
|
|
"step": 115
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5132743362831859,
|
||
|
|
"grad_norm": 43.45073699951172,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.51171875,
|
||
|
|
"logits/rejected": -0.49609375,
|
||
|
|
"logps/chosen": -320.0,
|
||
|
|
"logps/rejected": -368.0,
|
||
|
|
"loss": 0.5562,
|
||
|
|
"loss/chosen-sft": 1.203125,
|
||
|
|
"loss/dpo": 0.58203125,
|
||
|
|
"rewards/accuracies": 0.6875,
|
||
|
|
"rewards/chosen": -0.51953125,
|
||
|
|
"rewards/margins": 0.431640625,
|
||
|
|
"rewards/rejected": -0.94921875,
|
||
|
|
"step": 116
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5176991150442478,
|
||
|
|
"grad_norm": 13.291467666625977,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.392578125,
|
||
|
|
"logits/rejected": -0.375,
|
||
|
|
"logps/chosen": -300.0,
|
||
|
|
"logps/rejected": -316.0,
|
||
|
|
"loss": 0.5491,
|
||
|
|
"loss/chosen-sft": 1.2734375,
|
||
|
|
"loss/dpo": 0.58984375,
|
||
|
|
"rewards/accuracies": 0.71875,
|
||
|
|
"rewards/chosen": -0.5078125,
|
||
|
|
"rewards/margins": 0.3515625,
|
||
|
|
"rewards/rejected": -0.859375,
|
||
|
|
"step": 117
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5221238938053098,
|
||
|
|
"grad_norm": 16.5191707611084,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.330078125,
|
||
|
|
"logits/rejected": -0.240234375,
|
||
|
|
"logps/chosen": -292.0,
|
||
|
|
"logps/rejected": -292.0,
|
||
|
|
"loss": 0.5605,
|
||
|
|
"loss/chosen-sft": 1.2421875,
|
||
|
|
"loss/dpo": 0.57421875,
|
||
|
|
"rewards/accuracies": 0.71875,
|
||
|
|
"rewards/chosen": -0.484375,
|
||
|
|
"rewards/margins": 0.34375,
|
||
|
|
"rewards/rejected": -0.828125,
|
||
|
|
"step": 118
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5265486725663717,
|
||
|
|
"grad_norm": 55.267738342285156,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.58984375,
|
||
|
|
"logits/rejected": -0.62890625,
|
||
|
|
"logps/chosen": -316.0,
|
||
|
|
"logps/rejected": -318.0,
|
||
|
|
"loss": 0.5439,
|
||
|
|
"loss/chosen-sft": 1.1484375,
|
||
|
|
"loss/dpo": 0.53515625,
|
||
|
|
"rewards/accuracies": 0.71875,
|
||
|
|
"rewards/chosen": -0.412109375,
|
||
|
|
"rewards/margins": 0.482421875,
|
||
|
|
"rewards/rejected": -0.89453125,
|
||
|
|
"step": 119
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5309734513274337,
|
||
|
|
"grad_norm": 52.895042419433594,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.28515625,
|
||
|
|
"logits/rejected": -0.380859375,
|
||
|
|
"logps/chosen": -252.0,
|
||
|
|
"logps/rejected": -278.0,
|
||
|
|
"loss": 0.5623,
|
||
|
|
"loss/chosen-sft": 0.96484375,
|
||
|
|
"loss/dpo": 0.50390625,
|
||
|
|
"rewards/accuracies": 0.71875,
|
||
|
|
"rewards/chosen": -0.359375,
|
||
|
|
"rewards/margins": 0.609375,
|
||
|
|
"rewards/rejected": -0.96875,
|
||
|
|
"step": 120
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5353982300884956,
|
||
|
|
"grad_norm": 33.5416374206543,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.365234375,
|
||
|
|
"logits/rejected": -0.45703125,
|
||
|
|
"logps/chosen": -282.0,
|
||
|
|
"logps/rejected": -262.0,
|
||
|
|
"loss": 0.5391,
|
||
|
|
"loss/chosen-sft": 1.28125,
|
||
|
|
"loss/dpo": 0.546875,
|
||
|
|
"rewards/accuracies": 0.8125,
|
||
|
|
"rewards/chosen": -0.4296875,
|
||
|
|
"rewards/margins": 0.4609375,
|
||
|
|
"rewards/rejected": -0.890625,
|
||
|
|
"step": 121
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5398230088495575,
|
||
|
|
"grad_norm": 55.33546447753906,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.53515625,
|
||
|
|
"logits/rejected": -0.5703125,
|
||
|
|
"logps/chosen": -334.0,
|
||
|
|
"logps/rejected": -340.0,
|
||
|
|
"loss": 0.5278,
|
||
|
|
"loss/chosen-sft": 1.2578125,
|
||
|
|
"loss/dpo": 0.53515625,
|
||
|
|
"rewards/accuracies": 0.71875,
|
||
|
|
"rewards/chosen": -0.45703125,
|
||
|
|
"rewards/margins": 0.609375,
|
||
|
|
"rewards/rejected": -1.0703125,
|
||
|
|
"step": 122
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5442477876106194,
|
||
|
|
"grad_norm": 46.70622253417969,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.53515625,
|
||
|
|
"logits/rejected": -0.59765625,
|
||
|
|
"logps/chosen": -308.0,
|
||
|
|
"logps/rejected": -274.0,
|
||
|
|
"loss": 0.55,
|
||
|
|
"loss/chosen-sft": 1.34375,
|
||
|
|
"loss/dpo": 0.5703125,
|
||
|
|
"rewards/accuracies": 0.71875,
|
||
|
|
"rewards/chosen": -0.5390625,
|
||
|
|
"rewards/margins": 0.3671875,
|
||
|
|
"rewards/rejected": -0.90625,
|
||
|
|
"step": 123
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5486725663716814,
|
||
|
|
"grad_norm": 48.83370590209961,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.5234375,
|
||
|
|
"logits/rejected": -0.60546875,
|
||
|
|
"logps/chosen": -322.0,
|
||
|
|
"logps/rejected": -318.0,
|
||
|
|
"loss": 0.5825,
|
||
|
|
"loss/chosen-sft": 1.2734375,
|
||
|
|
"loss/dpo": 0.59375,
|
||
|
|
"rewards/accuracies": 0.6875,
|
||
|
|
"rewards/chosen": -0.625,
|
||
|
|
"rewards/margins": 0.361328125,
|
||
|
|
"rewards/rejected": -0.984375,
|
||
|
|
"step": 124
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5530973451327433,
|
||
|
|
"grad_norm": 25.2650089263916,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.3203125,
|
||
|
|
"logits/rejected": -0.25390625,
|
||
|
|
"logps/chosen": -244.0,
|
||
|
|
"logps/rejected": -274.0,
|
||
|
|
"loss": 0.5889,
|
||
|
|
"loss/chosen-sft": 1.1484375,
|
||
|
|
"loss/dpo": 0.55859375,
|
||
|
|
"rewards/accuracies": 0.65625,
|
||
|
|
"rewards/chosen": -0.439453125,
|
||
|
|
"rewards/margins": 0.41796875,
|
||
|
|
"rewards/rejected": -0.859375,
|
||
|
|
"step": 125
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5575221238938053,
|
||
|
|
"grad_norm": 36.186500549316406,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.58984375,
|
||
|
|
"logits/rejected": -0.6640625,
|
||
|
|
"logps/chosen": -324.0,
|
||
|
|
"logps/rejected": -358.0,
|
||
|
|
"loss": 0.585,
|
||
|
|
"loss/chosen-sft": 1.2890625,
|
||
|
|
"loss/dpo": 0.54296875,
|
||
|
|
"rewards/accuracies": 0.8125,
|
||
|
|
"rewards/chosen": -0.6484375,
|
||
|
|
"rewards/margins": 0.50390625,
|
||
|
|
"rewards/rejected": -1.15625,
|
||
|
|
"step": 126
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5619469026548672,
|
||
|
|
"grad_norm": 13.623043060302734,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.640625,
|
||
|
|
"logits/rejected": -0.66796875,
|
||
|
|
"logps/chosen": -312.0,
|
||
|
|
"logps/rejected": -332.0,
|
||
|
|
"loss": 0.5789,
|
||
|
|
"loss/chosen-sft": 1.2109375,
|
||
|
|
"loss/dpo": 0.5546875,
|
||
|
|
"rewards/accuracies": 0.75,
|
||
|
|
"rewards/chosen": -0.38671875,
|
||
|
|
"rewards/margins": 0.42578125,
|
||
|
|
"rewards/rejected": -0.8125,
|
||
|
|
"step": 127
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5663716814159292,
|
||
|
|
"grad_norm": 9.796255111694336,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.494140625,
|
||
|
|
"logits/rejected": -0.6015625,
|
||
|
|
"logps/chosen": -290.0,
|
||
|
|
"logps/rejected": -346.0,
|
||
|
|
"loss": 0.5703,
|
||
|
|
"loss/chosen-sft": 1.3203125,
|
||
|
|
"loss/dpo": 0.609375,
|
||
|
|
"rewards/accuracies": 0.625,
|
||
|
|
"rewards/chosen": -0.65234375,
|
||
|
|
"rewards/margins": 0.359375,
|
||
|
|
"rewards/rejected": -1.015625,
|
||
|
|
"step": 128
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5707964601769911,
|
||
|
|
"grad_norm": 70.11766052246094,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.345703125,
|
||
|
|
"logits/rejected": -0.353515625,
|
||
|
|
"logps/chosen": -255.0,
|
||
|
|
"logps/rejected": -296.0,
|
||
|
|
"loss": 0.5767,
|
||
|
|
"loss/chosen-sft": 1.15625,
|
||
|
|
"loss/dpo": 0.48046875,
|
||
|
|
"rewards/accuracies": 0.78125,
|
||
|
|
"rewards/chosen": -0.3046875,
|
||
|
|
"rewards/margins": 0.75,
|
||
|
|
"rewards/rejected": -1.0546875,
|
||
|
|
"step": 129
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5752212389380531,
|
||
|
|
"grad_norm": 137.03662109375,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.095703125,
|
||
|
|
"logits/rejected": -0.38671875,
|
||
|
|
"logps/chosen": -212.0,
|
||
|
|
"logps/rejected": -262.0,
|
||
|
|
"loss": 0.5471,
|
||
|
|
"loss/chosen-sft": 1.125,
|
||
|
|
"loss/dpo": 0.515625,
|
||
|
|
"rewards/accuracies": 0.75,
|
||
|
|
"rewards/chosen": -0.328125,
|
||
|
|
"rewards/margins": 0.65234375,
|
||
|
|
"rewards/rejected": -0.98046875,
|
||
|
|
"step": 130
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5796460176991151,
|
||
|
|
"grad_norm": 148.0476531982422,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.5625,
|
||
|
|
"logits/rejected": -0.59375,
|
||
|
|
"logps/chosen": -270.0,
|
||
|
|
"logps/rejected": -284.0,
|
||
|
|
"loss": 0.5403,
|
||
|
|
"loss/chosen-sft": 1.3046875,
|
||
|
|
"loss/dpo": 0.6796875,
|
||
|
|
"rewards/accuracies": 0.6875,
|
||
|
|
"rewards/chosen": -0.54296875,
|
||
|
|
"rewards/margins": 0.134765625,
|
||
|
|
"rewards/rejected": -0.6796875,
|
||
|
|
"step": 131
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.584070796460177,
|
||
|
|
"grad_norm": 93.40387725830078,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.7109375,
|
||
|
|
"logits/rejected": -0.66015625,
|
||
|
|
"logps/chosen": -340.0,
|
||
|
|
"logps/rejected": -350.0,
|
||
|
|
"loss": 0.561,
|
||
|
|
"loss/chosen-sft": 1.390625,
|
||
|
|
"loss/dpo": 0.5078125,
|
||
|
|
"rewards/accuracies": 0.78125,
|
||
|
|
"rewards/chosen": -0.51953125,
|
||
|
|
"rewards/margins": 0.69921875,
|
||
|
|
"rewards/rejected": -1.21875,
|
||
|
|
"step": 132
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.588495575221239,
|
||
|
|
"grad_norm": 55.550758361816406,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.42578125,
|
||
|
|
"logits/rejected": -0.55859375,
|
||
|
|
"logps/chosen": -294.0,
|
||
|
|
"logps/rejected": -296.0,
|
||
|
|
"loss": 0.5627,
|
||
|
|
"loss/chosen-sft": 1.140625,
|
||
|
|
"loss/dpo": 0.6015625,
|
||
|
|
"rewards/accuracies": 0.75,
|
||
|
|
"rewards/chosen": -0.453125,
|
||
|
|
"rewards/margins": 0.33203125,
|
||
|
|
"rewards/rejected": -0.78515625,
|
||
|
|
"step": 133
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5929203539823009,
|
||
|
|
"grad_norm": 80.4654541015625,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.43359375,
|
||
|
|
"logits/rejected": -0.470703125,
|
||
|
|
"logps/chosen": -308.0,
|
||
|
|
"logps/rejected": -338.0,
|
||
|
|
"loss": 0.5481,
|
||
|
|
"loss/chosen-sft": 1.1640625,
|
||
|
|
"loss/dpo": 0.4765625,
|
||
|
|
"rewards/accuracies": 0.78125,
|
||
|
|
"rewards/chosen": -0.3984375,
|
||
|
|
"rewards/margins": 0.67578125,
|
||
|
|
"rewards/rejected": -1.0703125,
|
||
|
|
"step": 134
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.5973451327433629,
|
||
|
|
"grad_norm": 109.14212036132812,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.470703125,
|
||
|
|
"logits/rejected": -0.50390625,
|
||
|
|
"logps/chosen": -334.0,
|
||
|
|
"logps/rejected": -304.0,
|
||
|
|
"loss": 0.542,
|
||
|
|
"loss/chosen-sft": 1.171875,
|
||
|
|
"loss/dpo": 0.6171875,
|
||
|
|
"rewards/accuracies": 0.59375,
|
||
|
|
"rewards/chosen": -0.54296875,
|
||
|
|
"rewards/margins": 0.2578125,
|
||
|
|
"rewards/rejected": -0.80078125,
|
||
|
|
"step": 135
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6017699115044248,
|
||
|
|
"grad_norm": 37.90422821044922,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.421875,
|
||
|
|
"logits/rejected": -0.427734375,
|
||
|
|
"logps/chosen": -372.0,
|
||
|
|
"logps/rejected": -298.0,
|
||
|
|
"loss": 0.5493,
|
||
|
|
"loss/chosen-sft": 1.3203125,
|
||
|
|
"loss/dpo": 0.546875,
|
||
|
|
"rewards/accuracies": 0.71875,
|
||
|
|
"rewards/chosen": -0.50390625,
|
||
|
|
"rewards/margins": 0.474609375,
|
||
|
|
"rewards/rejected": -0.9765625,
|
||
|
|
"step": 136
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6061946902654868,
|
||
|
|
"grad_norm": 118.1666030883789,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.46875,
|
||
|
|
"logits/rejected": -0.515625,
|
||
|
|
"logps/chosen": -326.0,
|
||
|
|
"logps/rejected": -394.0,
|
||
|
|
"loss": 0.5112,
|
||
|
|
"loss/chosen-sft": 1.234375,
|
||
|
|
"loss/dpo": 0.5390625,
|
||
|
|
"rewards/accuracies": 0.6875,
|
||
|
|
"rewards/chosen": -0.419921875,
|
||
|
|
"rewards/margins": 0.5703125,
|
||
|
|
"rewards/rejected": -0.9921875,
|
||
|
|
"step": 137
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6106194690265486,
|
||
|
|
"grad_norm": 99.32813262939453,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.427734375,
|
||
|
|
"logits/rejected": -0.50390625,
|
||
|
|
"logps/chosen": -288.0,
|
||
|
|
"logps/rejected": -268.0,
|
||
|
|
"loss": 0.5508,
|
||
|
|
"loss/chosen-sft": 1.3984375,
|
||
|
|
"loss/dpo": 0.61328125,
|
||
|
|
"rewards/accuracies": 0.65625,
|
||
|
|
"rewards/chosen": -0.5625,
|
||
|
|
"rewards/margins": 0.353515625,
|
||
|
|
"rewards/rejected": -0.9140625,
|
||
|
|
"step": 138
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6150442477876106,
|
||
|
|
"grad_norm": 17.352619171142578,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.48046875,
|
||
|
|
"logits/rejected": -0.546875,
|
||
|
|
"logps/chosen": -282.0,
|
||
|
|
"logps/rejected": -272.0,
|
||
|
|
"loss": 0.5352,
|
||
|
|
"loss/chosen-sft": 1.1640625,
|
||
|
|
"loss/dpo": 0.51953125,
|
||
|
|
"rewards/accuracies": 0.78125,
|
||
|
|
"rewards/chosen": -0.466796875,
|
||
|
|
"rewards/margins": 0.5078125,
|
||
|
|
"rewards/rejected": -0.97265625,
|
||
|
|
"step": 139
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6194690265486725,
|
||
|
|
"grad_norm": 59.95145797729492,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.6171875,
|
||
|
|
"logits/rejected": -0.6875,
|
||
|
|
"logps/chosen": -302.0,
|
||
|
|
"logps/rejected": -336.0,
|
||
|
|
"loss": 0.5435,
|
||
|
|
"loss/chosen-sft": 1.265625,
|
||
|
|
"loss/dpo": 0.51953125,
|
||
|
|
"rewards/accuracies": 0.84375,
|
||
|
|
"rewards/chosen": -0.4765625,
|
||
|
|
"rewards/margins": 0.55078125,
|
||
|
|
"rewards/rejected": -1.0234375,
|
||
|
|
"step": 140
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6238938053097345,
|
||
|
|
"grad_norm": 55.3637580871582,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.51953125,
|
||
|
|
"logits/rejected": -0.56640625,
|
||
|
|
"logps/chosen": -342.0,
|
||
|
|
"logps/rejected": -382.0,
|
||
|
|
"loss": 0.5469,
|
||
|
|
"loss/chosen-sft": 1.3046875,
|
||
|
|
"loss/dpo": 0.55078125,
|
||
|
|
"rewards/accuracies": 0.78125,
|
||
|
|
"rewards/chosen": -0.5546875,
|
||
|
|
"rewards/margins": 0.65625,
|
||
|
|
"rewards/rejected": -1.2109375,
|
||
|
|
"step": 141
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6283185840707964,
|
||
|
|
"grad_norm": 146.2696075439453,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.55859375,
|
||
|
|
"logits/rejected": -0.62890625,
|
||
|
|
"logps/chosen": -352.0,
|
||
|
|
"logps/rejected": -318.0,
|
||
|
|
"loss": 0.5806,
|
||
|
|
"loss/chosen-sft": 1.3515625,
|
||
|
|
"loss/dpo": 0.7578125,
|
||
|
|
"rewards/accuracies": 0.59375,
|
||
|
|
"rewards/chosen": -0.703125,
|
||
|
|
"rewards/margins": 0.07275390625,
|
||
|
|
"rewards/rejected": -0.77734375,
|
||
|
|
"step": 142
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6327433628318584,
|
||
|
|
"grad_norm": 46.21394729614258,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.578125,
|
||
|
|
"logits/rejected": -0.5859375,
|
||
|
|
"logps/chosen": -268.0,
|
||
|
|
"logps/rejected": -296.0,
|
||
|
|
"loss": 0.5393,
|
||
|
|
"loss/chosen-sft": 1.265625,
|
||
|
|
"loss/dpo": 0.54296875,
|
||
|
|
"rewards/accuracies": 0.71875,
|
||
|
|
"rewards/chosen": -0.341796875,
|
||
|
|
"rewards/margins": 0.48046875,
|
||
|
|
"rewards/rejected": -0.82421875,
|
||
|
|
"step": 143
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6371681415929203,
|
||
|
|
"grad_norm": 38.909610748291016,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.60546875,
|
||
|
|
"logits/rejected": -0.73828125,
|
||
|
|
"logps/chosen": -320.0,
|
||
|
|
"logps/rejected": -334.0,
|
||
|
|
"loss": 0.543,
|
||
|
|
"loss/chosen-sft": 1.4375,
|
||
|
|
"loss/dpo": 0.55859375,
|
||
|
|
"rewards/accuracies": 0.59375,
|
||
|
|
"rewards/chosen": -0.6796875,
|
||
|
|
"rewards/margins": 0.5234375,
|
||
|
|
"rewards/rejected": -1.203125,
|
||
|
|
"step": 144
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6415929203539823,
|
||
|
|
"grad_norm": 137.8043975830078,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.53515625,
|
||
|
|
"logits/rejected": -0.58203125,
|
||
|
|
"logps/chosen": -340.0,
|
||
|
|
"logps/rejected": -290.0,
|
||
|
|
"loss": 0.5903,
|
||
|
|
"loss/chosen-sft": 1.328125,
|
||
|
|
"loss/dpo": 0.65234375,
|
||
|
|
"rewards/accuracies": 0.71875,
|
||
|
|
"rewards/chosen": -0.7109375,
|
||
|
|
"rewards/margins": 0.333984375,
|
||
|
|
"rewards/rejected": -1.046875,
|
||
|
|
"step": 145
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6460176991150443,
|
||
|
|
"grad_norm": 36.96350860595703,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.6953125,
|
||
|
|
"logits/rejected": -0.66796875,
|
||
|
|
"logps/chosen": -398.0,
|
||
|
|
"logps/rejected": -382.0,
|
||
|
|
"loss": 0.5288,
|
||
|
|
"loss/chosen-sft": 1.34375,
|
||
|
|
"loss/dpo": 0.546875,
|
||
|
|
"rewards/accuracies": 0.75,
|
||
|
|
"rewards/chosen": -0.84375,
|
||
|
|
"rewards/margins": 0.486328125,
|
||
|
|
"rewards/rejected": -1.328125,
|
||
|
|
"step": 146
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6504424778761062,
|
||
|
|
"grad_norm": 31.765138626098633,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.6796875,
|
||
|
|
"logits/rejected": -0.66015625,
|
||
|
|
"logps/chosen": -354.0,
|
||
|
|
"logps/rejected": -326.0,
|
||
|
|
"loss": 0.5535,
|
||
|
|
"loss/chosen-sft": 1.4375,
|
||
|
|
"loss/dpo": 0.5078125,
|
||
|
|
"rewards/accuracies": 0.8125,
|
||
|
|
"rewards/chosen": -0.63671875,
|
||
|
|
"rewards/margins": 0.5546875,
|
||
|
|
"rewards/rejected": -1.1953125,
|
||
|
|
"step": 147
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6548672566371682,
|
||
|
|
"grad_norm": 73.44290924072266,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.609375,
|
||
|
|
"logits/rejected": -0.69921875,
|
||
|
|
"logps/chosen": -356.0,
|
||
|
|
"logps/rejected": -350.0,
|
||
|
|
"loss": 0.5137,
|
||
|
|
"loss/chosen-sft": 1.3046875,
|
||
|
|
"loss/dpo": 0.474609375,
|
||
|
|
"rewards/accuracies": 0.75,
|
||
|
|
"rewards/chosen": -0.5234375,
|
||
|
|
"rewards/margins": 0.67578125,
|
||
|
|
"rewards/rejected": -1.203125,
|
||
|
|
"step": 148
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6592920353982301,
|
||
|
|
"grad_norm": 14.243433952331543,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.50390625,
|
||
|
|
"logits/rejected": -0.55078125,
|
||
|
|
"logps/chosen": -326.0,
|
||
|
|
"logps/rejected": -316.0,
|
||
|
|
"loss": 0.5225,
|
||
|
|
"loss/chosen-sft": 1.2578125,
|
||
|
|
"loss/dpo": 0.48046875,
|
||
|
|
"rewards/accuracies": 0.78125,
|
||
|
|
"rewards/chosen": -0.61328125,
|
||
|
|
"rewards/margins": 0.765625,
|
||
|
|
"rewards/rejected": -1.3828125,
|
||
|
|
"step": 149
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6637168141592921,
|
||
|
|
"grad_norm": 21.770702362060547,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.55078125,
|
||
|
|
"logits/rejected": -0.546875,
|
||
|
|
"logps/chosen": -326.0,
|
||
|
|
"logps/rejected": -330.0,
|
||
|
|
"loss": 0.5234,
|
||
|
|
"loss/chosen-sft": 1.296875,
|
||
|
|
"loss/dpo": 0.5,
|
||
|
|
"rewards/accuracies": 0.78125,
|
||
|
|
"rewards/chosen": -0.4609375,
|
||
|
|
"rewards/margins": 0.60546875,
|
||
|
|
"rewards/rejected": -1.0703125,
|
||
|
|
"step": 150
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.668141592920354,
|
||
|
|
"grad_norm": 77.819091796875,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.7109375,
|
||
|
|
"logits/rejected": -0.6171875,
|
||
|
|
"logps/chosen": -404.0,
|
||
|
|
"logps/rejected": -442.0,
|
||
|
|
"loss": 0.5,
|
||
|
|
"loss/chosen-sft": 1.3125,
|
||
|
|
"loss/dpo": 0.5078125,
|
||
|
|
"rewards/accuracies": 0.78125,
|
||
|
|
"rewards/chosen": -0.7734375,
|
||
|
|
"rewards/margins": 0.6875,
|
||
|
|
"rewards/rejected": -1.4609375,
|
||
|
|
"step": 151
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.672566371681416,
|
||
|
|
"grad_norm": 37.56740951538086,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.3828125,
|
||
|
|
"logits/rejected": -0.2275390625,
|
||
|
|
"logps/chosen": -294.0,
|
||
|
|
"logps/rejected": -324.0,
|
||
|
|
"loss": 0.5325,
|
||
|
|
"loss/chosen-sft": 1.3984375,
|
||
|
|
"loss/dpo": 0.59375,
|
||
|
|
"rewards/accuracies": 0.71875,
|
||
|
|
"rewards/chosen": -0.73828125,
|
||
|
|
"rewards/margins": 0.52734375,
|
||
|
|
"rewards/rejected": -1.265625,
|
||
|
|
"step": 152
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6769911504424779,
|
||
|
|
"grad_norm": 105.4240951538086,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.423828125,
|
||
|
|
"logits/rejected": -0.54296875,
|
||
|
|
"logps/chosen": -368.0,
|
||
|
|
"logps/rejected": -390.0,
|
||
|
|
"loss": 0.5544,
|
||
|
|
"loss/chosen-sft": 1.28125,
|
||
|
|
"loss/dpo": 0.46875,
|
||
|
|
"rewards/accuracies": 0.78125,
|
||
|
|
"rewards/chosen": -0.77734375,
|
||
|
|
"rewards/margins": 0.7734375,
|
||
|
|
"rewards/rejected": -1.546875,
|
||
|
|
"step": 153
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6814159292035398,
|
||
|
|
"grad_norm": 56.64170837402344,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.5390625,
|
||
|
|
"logits/rejected": -0.6640625,
|
||
|
|
"logps/chosen": -358.0,
|
||
|
|
"logps/rejected": -320.0,
|
||
|
|
"loss": 0.5625,
|
||
|
|
"loss/chosen-sft": 1.3984375,
|
||
|
|
"loss/dpo": 0.5859375,
|
||
|
|
"rewards/accuracies": 0.6875,
|
||
|
|
"rewards/chosen": -0.7578125,
|
||
|
|
"rewards/margins": 0.453125,
|
||
|
|
"rewards/rejected": -1.2109375,
|
||
|
|
"step": 154
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6858407079646017,
|
||
|
|
"grad_norm": 22.23441505432129,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.61328125,
|
||
|
|
"logits/rejected": -0.5703125,
|
||
|
|
"logps/chosen": -336.0,
|
||
|
|
"logps/rejected": -314.0,
|
||
|
|
"loss": 0.5115,
|
||
|
|
"loss/chosen-sft": 1.453125,
|
||
|
|
"loss/dpo": 0.52734375,
|
||
|
|
"rewards/accuracies": 0.71875,
|
||
|
|
"rewards/chosen": -0.58984375,
|
||
|
|
"rewards/margins": 0.5625,
|
||
|
|
"rewards/rejected": -1.15625,
|
||
|
|
"step": 155
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6902654867256637,
|
||
|
|
"grad_norm": 71.9916000366211,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.69921875,
|
||
|
|
"logits/rejected": -0.7109375,
|
||
|
|
"logps/chosen": -354.0,
|
||
|
|
"logps/rejected": -338.0,
|
||
|
|
"loss": 0.5227,
|
||
|
|
"loss/chosen-sft": 1.40625,
|
||
|
|
"loss/dpo": 0.5546875,
|
||
|
|
"rewards/accuracies": 0.65625,
|
||
|
|
"rewards/chosen": -0.59375,
|
||
|
|
"rewards/margins": 0.6171875,
|
||
|
|
"rewards/rejected": -1.2109375,
|
||
|
|
"step": 156
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6946902654867256,
|
||
|
|
"grad_norm": 11.088499069213867,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.48046875,
|
||
|
|
"logits/rejected": -0.51171875,
|
||
|
|
"logps/chosen": -328.0,
|
||
|
|
"logps/rejected": -358.0,
|
||
|
|
"loss": 0.5259,
|
||
|
|
"loss/chosen-sft": 1.328125,
|
||
|
|
"loss/dpo": 0.60546875,
|
||
|
|
"rewards/accuracies": 0.71875,
|
||
|
|
"rewards/chosen": -0.84765625,
|
||
|
|
"rewards/margins": 0.4609375,
|
||
|
|
"rewards/rejected": -1.3125,
|
||
|
|
"step": 157
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.6991150442477876,
|
||
|
|
"grad_norm": 104.77384185791016,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.55859375,
|
||
|
|
"logits/rejected": -0.7265625,
|
||
|
|
"logps/chosen": -330.0,
|
||
|
|
"logps/rejected": -340.0,
|
||
|
|
"loss": 0.5566,
|
||
|
|
"loss/chosen-sft": 1.4296875,
|
||
|
|
"loss/dpo": 0.61328125,
|
||
|
|
"rewards/accuracies": 0.5625,
|
||
|
|
"rewards/chosen": -0.640625,
|
||
|
|
"rewards/margins": 0.5390625,
|
||
|
|
"rewards/rejected": -1.1796875,
|
||
|
|
"step": 158
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7035398230088495,
|
||
|
|
"grad_norm": 87.36003875732422,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.53515625,
|
||
|
|
"logits/rejected": -0.55078125,
|
||
|
|
"logps/chosen": -296.0,
|
||
|
|
"logps/rejected": -342.0,
|
||
|
|
"loss": 0.5449,
|
||
|
|
"loss/chosen-sft": 1.359375,
|
||
|
|
"loss/dpo": 0.58203125,
|
||
|
|
"rewards/accuracies": 0.6875,
|
||
|
|
"rewards/chosen": -0.8125,
|
||
|
|
"rewards/margins": 0.65625,
|
||
|
|
"rewards/rejected": -1.46875,
|
||
|
|
"step": 159
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7079646017699115,
|
||
|
|
"grad_norm": 37.620750427246094,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.455078125,
|
||
|
|
"logits/rejected": -0.50390625,
|
||
|
|
"logps/chosen": -376.0,
|
||
|
|
"logps/rejected": -444.0,
|
||
|
|
"loss": 0.5303,
|
||
|
|
"loss/chosen-sft": 1.265625,
|
||
|
|
"loss/dpo": 0.54296875,
|
||
|
|
"rewards/accuracies": 0.71875,
|
||
|
|
"rewards/chosen": -0.87890625,
|
||
|
|
"rewards/margins": 0.62109375,
|
||
|
|
"rewards/rejected": -1.5,
|
||
|
|
"step": 160
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7123893805309734,
|
||
|
|
"grad_norm": 75.54209899902344,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.546875,
|
||
|
|
"logits/rejected": -0.64453125,
|
||
|
|
"logps/chosen": -348.0,
|
||
|
|
"logps/rejected": -368.0,
|
||
|
|
"loss": 0.5203,
|
||
|
|
"loss/chosen-sft": 1.3125,
|
||
|
|
"loss/dpo": 0.54296875,
|
||
|
|
"rewards/accuracies": 0.6875,
|
||
|
|
"rewards/chosen": -0.86328125,
|
||
|
|
"rewards/margins": 0.4921875,
|
||
|
|
"rewards/rejected": -1.359375,
|
||
|
|
"step": 161
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7168141592920354,
|
||
|
|
"grad_norm": 54.434139251708984,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.60546875,
|
||
|
|
"logits/rejected": -0.74609375,
|
||
|
|
"logps/chosen": -298.0,
|
||
|
|
"logps/rejected": -352.0,
|
||
|
|
"loss": 0.511,
|
||
|
|
"loss/chosen-sft": 1.2109375,
|
||
|
|
"loss/dpo": 0.466796875,
|
||
|
|
"rewards/accuracies": 0.90625,
|
||
|
|
"rewards/chosen": -0.58984375,
|
||
|
|
"rewards/margins": 0.67578125,
|
||
|
|
"rewards/rejected": -1.265625,
|
||
|
|
"step": 162
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7212389380530974,
|
||
|
|
"grad_norm": 10.78385066986084,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.6875,
|
||
|
|
"logits/rejected": -0.5859375,
|
||
|
|
"logps/chosen": -288.0,
|
||
|
|
"logps/rejected": -306.0,
|
||
|
|
"loss": 0.5369,
|
||
|
|
"loss/chosen-sft": 1.2734375,
|
||
|
|
"loss/dpo": 0.51953125,
|
||
|
|
"rewards/accuracies": 0.75,
|
||
|
|
"rewards/chosen": -0.72265625,
|
||
|
|
"rewards/margins": 0.65234375,
|
||
|
|
"rewards/rejected": -1.375,
|
||
|
|
"step": 163
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7256637168141593,
|
||
|
|
"grad_norm": 46.15651321411133,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.70703125,
|
||
|
|
"logits/rejected": -0.765625,
|
||
|
|
"logps/chosen": -364.0,
|
||
|
|
"logps/rejected": -338.0,
|
||
|
|
"loss": 0.533,
|
||
|
|
"loss/chosen-sft": 1.3828125,
|
||
|
|
"loss/dpo": 0.50390625,
|
||
|
|
"rewards/accuracies": 0.78125,
|
||
|
|
"rewards/chosen": -0.5703125,
|
||
|
|
"rewards/margins": 0.6015625,
|
||
|
|
"rewards/rejected": -1.171875,
|
||
|
|
"step": 164
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7300884955752213,
|
||
|
|
"grad_norm": 76.59629821777344,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.57421875,
|
||
|
|
"logits/rejected": -0.64453125,
|
||
|
|
"logps/chosen": -258.0,
|
||
|
|
"logps/rejected": -324.0,
|
||
|
|
"loss": 0.5183,
|
||
|
|
"loss/chosen-sft": 1.1953125,
|
||
|
|
"loss/dpo": 0.52734375,
|
||
|
|
"rewards/accuracies": 0.6875,
|
||
|
|
"rewards/chosen": -0.58203125,
|
||
|
|
"rewards/margins": 0.5390625,
|
||
|
|
"rewards/rejected": -1.1171875,
|
||
|
|
"step": 165
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7345132743362832,
|
||
|
|
"grad_norm": 73.99260711669922,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.390625,
|
||
|
|
"logits/rejected": -0.578125,
|
||
|
|
"logps/chosen": -316.0,
|
||
|
|
"logps/rejected": -372.0,
|
||
|
|
"loss": 0.5208,
|
||
|
|
"loss/chosen-sft": 1.2109375,
|
||
|
|
"loss/dpo": 0.5,
|
||
|
|
"rewards/accuracies": 0.84375,
|
||
|
|
"rewards/chosen": -0.8359375,
|
||
|
|
"rewards/margins": 1.0,
|
||
|
|
"rewards/rejected": -1.8359375,
|
||
|
|
"step": 166
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7389380530973452,
|
||
|
|
"grad_norm": 47.95753479003906,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.5625,
|
||
|
|
"logits/rejected": -0.72265625,
|
||
|
|
"logps/chosen": -356.0,
|
||
|
|
"logps/rejected": -354.0,
|
||
|
|
"loss": 0.4956,
|
||
|
|
"loss/chosen-sft": 1.2890625,
|
||
|
|
"loss/dpo": 0.6171875,
|
||
|
|
"rewards/accuracies": 0.65625,
|
||
|
|
"rewards/chosen": -0.66015625,
|
||
|
|
"rewards/margins": 0.455078125,
|
||
|
|
"rewards/rejected": -1.1171875,
|
||
|
|
"step": 167
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7433628318584071,
|
||
|
|
"grad_norm": 27.858415603637695,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.7109375,
|
||
|
|
"logits/rejected": -0.64453125,
|
||
|
|
"logps/chosen": -336.0,
|
||
|
|
"logps/rejected": -332.0,
|
||
|
|
"loss": 0.4971,
|
||
|
|
"loss/chosen-sft": 1.4375,
|
||
|
|
"loss/dpo": 0.59375,
|
||
|
|
"rewards/accuracies": 0.65625,
|
||
|
|
"rewards/chosen": -0.74609375,
|
||
|
|
"rewards/margins": 0.3515625,
|
||
|
|
"rewards/rejected": -1.09375,
|
||
|
|
"step": 168
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7477876106194691,
|
||
|
|
"grad_norm": 113.69762420654297,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.79296875,
|
||
|
|
"logits/rejected": -0.79296875,
|
||
|
|
"logps/chosen": -420.0,
|
||
|
|
"logps/rejected": -446.0,
|
||
|
|
"loss": 0.4646,
|
||
|
|
"loss/chosen-sft": 1.4453125,
|
||
|
|
"loss/dpo": 0.439453125,
|
||
|
|
"rewards/accuracies": 0.75,
|
||
|
|
"rewards/chosen": -0.90625,
|
||
|
|
"rewards/margins": 0.92578125,
|
||
|
|
"rewards/rejected": -1.828125,
|
||
|
|
"step": 169
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7522123893805309,
|
||
|
|
"grad_norm": 92.540283203125,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.53125,
|
||
|
|
"logits/rejected": -0.60546875,
|
||
|
|
"logps/chosen": -356.0,
|
||
|
|
"logps/rejected": -412.0,
|
||
|
|
"loss": 0.5298,
|
||
|
|
"loss/chosen-sft": 1.3828125,
|
||
|
|
"loss/dpo": 0.51171875,
|
||
|
|
"rewards/accuracies": 0.78125,
|
||
|
|
"rewards/chosen": -0.9453125,
|
||
|
|
"rewards/margins": 0.64453125,
|
||
|
|
"rewards/rejected": -1.5859375,
|
||
|
|
"step": 170
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7566371681415929,
|
||
|
|
"grad_norm": 34.24614334106445,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.515625,
|
||
|
|
"logits/rejected": -0.62109375,
|
||
|
|
"logps/chosen": -336.0,
|
||
|
|
"logps/rejected": -426.0,
|
||
|
|
"loss": 0.4983,
|
||
|
|
"loss/chosen-sft": 1.28125,
|
||
|
|
"loss/dpo": 0.40625,
|
||
|
|
"rewards/accuracies": 0.84375,
|
||
|
|
"rewards/chosen": -0.7734375,
|
||
|
|
"rewards/margins": 1.1328125,
|
||
|
|
"rewards/rejected": -1.90625,
|
||
|
|
"step": 171
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7610619469026548,
|
||
|
|
"grad_norm": 26.35588264465332,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.7734375,
|
||
|
|
"logits/rejected": -0.79296875,
|
||
|
|
"logps/chosen": -358.0,
|
||
|
|
"logps/rejected": -358.0,
|
||
|
|
"loss": 0.52,
|
||
|
|
"loss/chosen-sft": 1.4453125,
|
||
|
|
"loss/dpo": 0.435546875,
|
||
|
|
"rewards/accuracies": 0.84375,
|
||
|
|
"rewards/chosen": -0.6875,
|
||
|
|
"rewards/margins": 0.92578125,
|
||
|
|
"rewards/rejected": -1.6171875,
|
||
|
|
"step": 172
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7654867256637168,
|
||
|
|
"grad_norm": 70.59893798828125,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.578125,
|
||
|
|
"logits/rejected": -0.63671875,
|
||
|
|
"logps/chosen": -336.0,
|
||
|
|
"logps/rejected": -406.0,
|
||
|
|
"loss": 0.4802,
|
||
|
|
"loss/chosen-sft": 1.4609375,
|
||
|
|
"loss/dpo": 0.451171875,
|
||
|
|
"rewards/accuracies": 0.78125,
|
||
|
|
"rewards/chosen": -0.83984375,
|
||
|
|
"rewards/margins": 1.078125,
|
||
|
|
"rewards/rejected": -1.921875,
|
||
|
|
"step": 173
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7699115044247787,
|
||
|
|
"grad_norm": 35.492210388183594,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.7109375,
|
||
|
|
"logits/rejected": -0.63671875,
|
||
|
|
"logps/chosen": -326.0,
|
||
|
|
"logps/rejected": -396.0,
|
||
|
|
"loss": 0.4731,
|
||
|
|
"loss/chosen-sft": 1.4140625,
|
||
|
|
"loss/dpo": 0.490234375,
|
||
|
|
"rewards/accuracies": 0.71875,
|
||
|
|
"rewards/chosen": -0.69140625,
|
||
|
|
"rewards/margins": 1.109375,
|
||
|
|
"rewards/rejected": -1.8046875,
|
||
|
|
"step": 174
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7743362831858407,
|
||
|
|
"grad_norm": 52.300148010253906,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.294921875,
|
||
|
|
"logits/rejected": -0.431640625,
|
||
|
|
"logps/chosen": -298.0,
|
||
|
|
"logps/rejected": -330.0,
|
||
|
|
"loss": 0.5232,
|
||
|
|
"loss/chosen-sft": 1.3984375,
|
||
|
|
"loss/dpo": 0.6796875,
|
||
|
|
"rewards/accuracies": 0.625,
|
||
|
|
"rewards/chosen": -0.9453125,
|
||
|
|
"rewards/margins": 0.330078125,
|
||
|
|
"rewards/rejected": -1.2734375,
|
||
|
|
"step": 175
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7787610619469026,
|
||
|
|
"grad_norm": 133.416748046875,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.59375,
|
||
|
|
"logits/rejected": -0.50390625,
|
||
|
|
"logps/chosen": -330.0,
|
||
|
|
"logps/rejected": -342.0,
|
||
|
|
"loss": 0.499,
|
||
|
|
"loss/chosen-sft": 1.6796875,
|
||
|
|
"loss/dpo": 0.609375,
|
||
|
|
"rewards/accuracies": 0.71875,
|
||
|
|
"rewards/chosen": -0.8828125,
|
||
|
|
"rewards/margins": 0.33203125,
|
||
|
|
"rewards/rejected": -1.21875,
|
||
|
|
"step": 176
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7831858407079646,
|
||
|
|
"grad_norm": 23.086772918701172,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.5625,
|
||
|
|
"logits/rejected": -0.50390625,
|
||
|
|
"logps/chosen": -376.0,
|
||
|
|
"logps/rejected": -506.0,
|
||
|
|
"loss": 0.4585,
|
||
|
|
"loss/chosen-sft": 1.421875,
|
||
|
|
"loss/dpo": 0.392578125,
|
||
|
|
"rewards/accuracies": 0.90625,
|
||
|
|
"rewards/chosen": -0.84375,
|
||
|
|
"rewards/margins": 1.34375,
|
||
|
|
"rewards/rejected": -2.1875,
|
||
|
|
"step": 177
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7876106194690266,
|
||
|
|
"grad_norm": 206.20298767089844,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.53125,
|
||
|
|
"logits/rejected": -0.470703125,
|
||
|
|
"logps/chosen": -276.0,
|
||
|
|
"logps/rejected": -296.0,
|
||
|
|
"loss": 0.5459,
|
||
|
|
"loss/chosen-sft": 1.2890625,
|
||
|
|
"loss/dpo": 0.5625,
|
||
|
|
"rewards/accuracies": 0.65625,
|
||
|
|
"rewards/chosen": -0.62890625,
|
||
|
|
"rewards/margins": 0.58203125,
|
||
|
|
"rewards/rejected": -1.2109375,
|
||
|
|
"step": 178
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7920353982300885,
|
||
|
|
"grad_norm": 138.48703002929688,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.4375,
|
||
|
|
"logits/rejected": -0.51171875,
|
||
|
|
"logps/chosen": -294.0,
|
||
|
|
"logps/rejected": -364.0,
|
||
|
|
"loss": 0.4871,
|
||
|
|
"loss/chosen-sft": 1.328125,
|
||
|
|
"loss/dpo": 0.470703125,
|
||
|
|
"rewards/accuracies": 0.78125,
|
||
|
|
"rewards/chosen": -0.76171875,
|
||
|
|
"rewards/margins": 0.7109375,
|
||
|
|
"rewards/rejected": -1.46875,
|
||
|
|
"step": 179
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.7964601769911505,
|
||
|
|
"grad_norm": 7.023651599884033,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.65625,
|
||
|
|
"logits/rejected": -0.8359375,
|
||
|
|
"logps/chosen": -338.0,
|
||
|
|
"logps/rejected": -370.0,
|
||
|
|
"loss": 0.4651,
|
||
|
|
"loss/chosen-sft": 1.453125,
|
||
|
|
"loss/dpo": 0.431640625,
|
||
|
|
"rewards/accuracies": 0.8125,
|
||
|
|
"rewards/chosen": -0.8203125,
|
||
|
|
"rewards/margins": 1.015625,
|
||
|
|
"rewards/rejected": -1.828125,
|
||
|
|
"step": 180
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8008849557522124,
|
||
|
|
"grad_norm": 101.0399169921875,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.51953125,
|
||
|
|
"logits/rejected": -0.578125,
|
||
|
|
"logps/chosen": -418.0,
|
||
|
|
"logps/rejected": -480.0,
|
||
|
|
"loss": 0.4773,
|
||
|
|
"loss/chosen-sft": 1.5,
|
||
|
|
"loss/dpo": 0.431640625,
|
||
|
|
"rewards/accuracies": 0.8125,
|
||
|
|
"rewards/chosen": -1.0625,
|
||
|
|
"rewards/margins": 1.21875,
|
||
|
|
"rewards/rejected": -2.28125,
|
||
|
|
"step": 181
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8053097345132744,
|
||
|
|
"grad_norm": 42.745174407958984,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.6484375,
|
||
|
|
"logits/rejected": -0.5859375,
|
||
|
|
"logps/chosen": -376.0,
|
||
|
|
"logps/rejected": -418.0,
|
||
|
|
"loss": 0.4871,
|
||
|
|
"loss/chosen-sft": 1.5625,
|
||
|
|
"loss/dpo": 0.45703125,
|
||
|
|
"rewards/accuracies": 0.78125,
|
||
|
|
"rewards/chosen": -0.98828125,
|
||
|
|
"rewards/margins": 1.046875,
|
||
|
|
"rewards/rejected": -2.03125,
|
||
|
|
"step": 182
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8097345132743363,
|
||
|
|
"grad_norm": 65.14553833007812,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.6484375,
|
||
|
|
"logits/rejected": -0.6953125,
|
||
|
|
"logps/chosen": -298.0,
|
||
|
|
"logps/rejected": -312.0,
|
||
|
|
"loss": 0.4883,
|
||
|
|
"loss/chosen-sft": 1.4921875,
|
||
|
|
"loss/dpo": 0.5234375,
|
||
|
|
"rewards/accuracies": 0.78125,
|
||
|
|
"rewards/chosen": -0.8828125,
|
||
|
|
"rewards/margins": 0.640625,
|
||
|
|
"rewards/rejected": -1.5234375,
|
||
|
|
"step": 183
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8141592920353983,
|
||
|
|
"grad_norm": 306.6606750488281,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.451171875,
|
||
|
|
"logits/rejected": -0.5703125,
|
||
|
|
"logps/chosen": -320.0,
|
||
|
|
"logps/rejected": -420.0,
|
||
|
|
"loss": 0.5232,
|
||
|
|
"loss/chosen-sft": 1.3203125,
|
||
|
|
"loss/dpo": 0.482421875,
|
||
|
|
"rewards/accuracies": 0.78125,
|
||
|
|
"rewards/chosen": -0.79296875,
|
||
|
|
"rewards/margins": 1.2421875,
|
||
|
|
"rewards/rejected": -2.046875,
|
||
|
|
"step": 184
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8185840707964602,
|
||
|
|
"grad_norm": 20.622095108032227,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.6171875,
|
||
|
|
"logits/rejected": -0.72265625,
|
||
|
|
"logps/chosen": -368.0,
|
||
|
|
"logps/rejected": -396.0,
|
||
|
|
"loss": 0.5178,
|
||
|
|
"loss/chosen-sft": 1.59375,
|
||
|
|
"loss/dpo": 0.490234375,
|
||
|
|
"rewards/accuracies": 0.75,
|
||
|
|
"rewards/chosen": -0.76953125,
|
||
|
|
"rewards/margins": 0.90625,
|
||
|
|
"rewards/rejected": -1.671875,
|
||
|
|
"step": 185
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8230088495575221,
|
||
|
|
"grad_norm": 15.8814115524292,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.640625,
|
||
|
|
"logits/rejected": -0.81640625,
|
||
|
|
"logps/chosen": -350.0,
|
||
|
|
"logps/rejected": -382.0,
|
||
|
|
"loss": 0.5017,
|
||
|
|
"loss/chosen-sft": 1.484375,
|
||
|
|
"loss/dpo": 0.53515625,
|
||
|
|
"rewards/accuracies": 0.8125,
|
||
|
|
"rewards/chosen": -0.83203125,
|
||
|
|
"rewards/margins": 0.8125,
|
||
|
|
"rewards/rejected": -1.6484375,
|
||
|
|
"step": 186
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.827433628318584,
|
||
|
|
"grad_norm": 45.224002838134766,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.478515625,
|
||
|
|
"logits/rejected": -0.404296875,
|
||
|
|
"logps/chosen": -294.0,
|
||
|
|
"logps/rejected": -386.0,
|
||
|
|
"loss": 0.5002,
|
||
|
|
"loss/chosen-sft": 1.3046875,
|
||
|
|
"loss/dpo": 0.453125,
|
||
|
|
"rewards/accuracies": 0.8125,
|
||
|
|
"rewards/chosen": -0.72265625,
|
||
|
|
"rewards/margins": 0.83984375,
|
||
|
|
"rewards/rejected": -1.5625,
|
||
|
|
"step": 187
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.831858407079646,
|
||
|
|
"grad_norm": 32.42481994628906,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.38671875,
|
||
|
|
"logits/rejected": -0.43359375,
|
||
|
|
"logps/chosen": -336.0,
|
||
|
|
"logps/rejected": -340.0,
|
||
|
|
"loss": 0.5051,
|
||
|
|
"loss/chosen-sft": 1.3359375,
|
||
|
|
"loss/dpo": 0.5390625,
|
||
|
|
"rewards/accuracies": 0.75,
|
||
|
|
"rewards/chosen": -0.84765625,
|
||
|
|
"rewards/margins": 0.5703125,
|
||
|
|
"rewards/rejected": -1.4140625,
|
||
|
|
"step": 188
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8362831858407079,
|
||
|
|
"grad_norm": 54.047035217285156,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.5234375,
|
||
|
|
"logits/rejected": -0.37890625,
|
||
|
|
"logps/chosen": -290.0,
|
||
|
|
"logps/rejected": -366.0,
|
||
|
|
"loss": 0.4817,
|
||
|
|
"loss/chosen-sft": 1.3671875,
|
||
|
|
"loss/dpo": 0.54296875,
|
||
|
|
"rewards/accuracies": 0.8125,
|
||
|
|
"rewards/chosen": -0.90234375,
|
||
|
|
"rewards/margins": 0.8125,
|
||
|
|
"rewards/rejected": -1.71875,
|
||
|
|
"step": 189
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8407079646017699,
|
||
|
|
"grad_norm": 10.179818153381348,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.41796875,
|
||
|
|
"logits/rejected": -0.4296875,
|
||
|
|
"logps/chosen": -296.0,
|
||
|
|
"logps/rejected": -302.0,
|
||
|
|
"loss": 0.5049,
|
||
|
|
"loss/chosen-sft": 1.3984375,
|
||
|
|
"loss/dpo": 0.55859375,
|
||
|
|
"rewards/accuracies": 0.8125,
|
||
|
|
"rewards/chosen": -0.796875,
|
||
|
|
"rewards/margins": 0.482421875,
|
||
|
|
"rewards/rejected": -1.28125,
|
||
|
|
"step": 190
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8451327433628318,
|
||
|
|
"grad_norm": 63.52503204345703,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.76953125,
|
||
|
|
"logits/rejected": -0.69921875,
|
||
|
|
"logps/chosen": -356.0,
|
||
|
|
"logps/rejected": -416.0,
|
||
|
|
"loss": 0.5083,
|
||
|
|
"loss/chosen-sft": 1.46875,
|
||
|
|
"loss/dpo": 0.486328125,
|
||
|
|
"rewards/accuracies": 0.84375,
|
||
|
|
"rewards/chosen": -0.93359375,
|
||
|
|
"rewards/margins": 0.859375,
|
||
|
|
"rewards/rejected": -1.7890625,
|
||
|
|
"step": 191
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8495575221238938,
|
||
|
|
"grad_norm": 64.59293365478516,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.4609375,
|
||
|
|
"logits/rejected": -0.474609375,
|
||
|
|
"logps/chosen": -378.0,
|
||
|
|
"logps/rejected": -392.0,
|
||
|
|
"loss": 0.4697,
|
||
|
|
"loss/chosen-sft": 1.3203125,
|
||
|
|
"loss/dpo": 0.47265625,
|
||
|
|
"rewards/accuracies": 0.84375,
|
||
|
|
"rewards/chosen": -0.80078125,
|
||
|
|
"rewards/margins": 0.87109375,
|
||
|
|
"rewards/rejected": -1.671875,
|
||
|
|
"step": 192
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8539823008849557,
|
||
|
|
"grad_norm": 48.203311920166016,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.3046875,
|
||
|
|
"logits/rejected": -0.31640625,
|
||
|
|
"logps/chosen": -308.0,
|
||
|
|
"logps/rejected": -390.0,
|
||
|
|
"loss": 0.5132,
|
||
|
|
"loss/chosen-sft": 1.171875,
|
||
|
|
"loss/dpo": 0.4375,
|
||
|
|
"rewards/accuracies": 0.84375,
|
||
|
|
"rewards/chosen": -0.6796875,
|
||
|
|
"rewards/margins": 0.9140625,
|
||
|
|
"rewards/rejected": -1.59375,
|
||
|
|
"step": 193
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8584070796460177,
|
||
|
|
"grad_norm": 86.9441909790039,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.435546875,
|
||
|
|
"logits/rejected": -0.4609375,
|
||
|
|
"logps/chosen": -296.0,
|
||
|
|
"logps/rejected": -358.0,
|
||
|
|
"loss": 0.4749,
|
||
|
|
"loss/chosen-sft": 1.2734375,
|
||
|
|
"loss/dpo": 0.44140625,
|
||
|
|
"rewards/accuracies": 0.8125,
|
||
|
|
"rewards/chosen": -0.6484375,
|
||
|
|
"rewards/margins": 0.94921875,
|
||
|
|
"rewards/rejected": -1.59375,
|
||
|
|
"step": 194
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8628318584070797,
|
||
|
|
"grad_norm": 108.06549835205078,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.4453125,
|
||
|
|
"logits/rejected": -0.5234375,
|
||
|
|
"logps/chosen": -342.0,
|
||
|
|
"logps/rejected": -404.0,
|
||
|
|
"loss": 0.4771,
|
||
|
|
"loss/chosen-sft": 1.4453125,
|
||
|
|
"loss/dpo": 0.65234375,
|
||
|
|
"rewards/accuracies": 0.78125,
|
||
|
|
"rewards/chosen": -0.9921875,
|
||
|
|
"rewards/margins": 0.64453125,
|
||
|
|
"rewards/rejected": -1.640625,
|
||
|
|
"step": 195
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8672566371681416,
|
||
|
|
"grad_norm": 173.70831298828125,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.58984375,
|
||
|
|
"logits/rejected": -0.62890625,
|
||
|
|
"logps/chosen": -368.0,
|
||
|
|
"logps/rejected": -400.0,
|
||
|
|
"loss": 0.4434,
|
||
|
|
"loss/chosen-sft": 1.46875,
|
||
|
|
"loss/dpo": 0.44140625,
|
||
|
|
"rewards/accuracies": 0.75,
|
||
|
|
"rewards/chosen": -0.9453125,
|
||
|
|
"rewards/margins": 1.046875,
|
||
|
|
"rewards/rejected": -1.984375,
|
||
|
|
"step": 196
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8716814159292036,
|
||
|
|
"grad_norm": 41.173553466796875,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.60546875,
|
||
|
|
"logits/rejected": -0.671875,
|
||
|
|
"logps/chosen": -392.0,
|
||
|
|
"logps/rejected": -470.0,
|
||
|
|
"loss": 0.4729,
|
||
|
|
"loss/chosen-sft": 1.5,
|
||
|
|
"loss/dpo": 0.44140625,
|
||
|
|
"rewards/accuracies": 0.84375,
|
||
|
|
"rewards/chosen": -1.140625,
|
||
|
|
"rewards/margins": 1.1015625,
|
||
|
|
"rewards/rejected": -2.25,
|
||
|
|
"step": 197
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8761061946902655,
|
||
|
|
"grad_norm": 14.614328384399414,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.68359375,
|
||
|
|
"logits/rejected": -0.76171875,
|
||
|
|
"logps/chosen": -360.0,
|
||
|
|
"logps/rejected": -384.0,
|
||
|
|
"loss": 0.5215,
|
||
|
|
"loss/chosen-sft": 1.484375,
|
||
|
|
"loss/dpo": 0.48828125,
|
||
|
|
"rewards/accuracies": 0.78125,
|
||
|
|
"rewards/chosen": -1.046875,
|
||
|
|
"rewards/margins": 0.91015625,
|
||
|
|
"rewards/rejected": -1.953125,
|
||
|
|
"step": 198
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8805309734513275,
|
||
|
|
"grad_norm": 86.90143585205078,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.8046875,
|
||
|
|
"logits/rejected": -0.81640625,
|
||
|
|
"logps/chosen": -454.0,
|
||
|
|
"logps/rejected": -482.0,
|
||
|
|
"loss": 0.541,
|
||
|
|
"loss/chosen-sft": 1.7578125,
|
||
|
|
"loss/dpo": 0.55859375,
|
||
|
|
"rewards/accuracies": 0.78125,
|
||
|
|
"rewards/chosen": -1.3828125,
|
||
|
|
"rewards/margins": 1.0625,
|
||
|
|
"rewards/rejected": -2.4375,
|
||
|
|
"step": 199
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8849557522123894,
|
||
|
|
"grad_norm": 24.392606735229492,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.6171875,
|
||
|
|
"logits/rejected": -0.73828125,
|
||
|
|
"logps/chosen": -326.0,
|
||
|
|
"logps/rejected": -384.0,
|
||
|
|
"loss": 0.4583,
|
||
|
|
"loss/chosen-sft": 1.59375,
|
||
|
|
"loss/dpo": 0.50390625,
|
||
|
|
"rewards/accuracies": 0.71875,
|
||
|
|
"rewards/chosen": -0.953125,
|
||
|
|
"rewards/margins": 0.99609375,
|
||
|
|
"rewards/rejected": -1.953125,
|
||
|
|
"step": 200
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8893805309734514,
|
||
|
|
"grad_norm": 67.55127716064453,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.5078125,
|
||
|
|
"logits/rejected": -0.5546875,
|
||
|
|
"logps/chosen": -408.0,
|
||
|
|
"logps/rejected": -484.0,
|
||
|
|
"loss": 0.5017,
|
||
|
|
"loss/chosen-sft": 1.5625,
|
||
|
|
"loss/dpo": 0.494140625,
|
||
|
|
"rewards/accuracies": 0.8125,
|
||
|
|
"rewards/chosen": -1.1328125,
|
||
|
|
"rewards/margins": 1.25,
|
||
|
|
"rewards/rejected": -2.375,
|
||
|
|
"step": 201
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8938053097345132,
|
||
|
|
"grad_norm": 35.01057434082031,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.69921875,
|
||
|
|
"logits/rejected": -0.640625,
|
||
|
|
"logps/chosen": -336.0,
|
||
|
|
"logps/rejected": -458.0,
|
||
|
|
"loss": 0.5005,
|
||
|
|
"loss/chosen-sft": 1.4609375,
|
||
|
|
"loss/dpo": 0.478515625,
|
||
|
|
"rewards/accuracies": 0.875,
|
||
|
|
"rewards/chosen": -0.75390625,
|
||
|
|
"rewards/margins": 1.3046875,
|
||
|
|
"rewards/rejected": -2.0625,
|
||
|
|
"step": 202
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.8982300884955752,
|
||
|
|
"grad_norm": 24.339378356933594,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.408203125,
|
||
|
|
"logits/rejected": -0.435546875,
|
||
|
|
"logps/chosen": -310.0,
|
||
|
|
"logps/rejected": -408.0,
|
||
|
|
"loss": 0.4685,
|
||
|
|
"loss/chosen-sft": 1.28125,
|
||
|
|
"loss/dpo": 0.455078125,
|
||
|
|
"rewards/accuracies": 0.75,
|
||
|
|
"rewards/chosen": -0.73828125,
|
||
|
|
"rewards/margins": 1.234375,
|
||
|
|
"rewards/rejected": -1.9765625,
|
||
|
|
"step": 203
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9026548672566371,
|
||
|
|
"grad_norm": 135.99609375,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.498046875,
|
||
|
|
"logits/rejected": -0.5625,
|
||
|
|
"logps/chosen": -314.0,
|
||
|
|
"logps/rejected": -396.0,
|
||
|
|
"loss": 0.4561,
|
||
|
|
"loss/chosen-sft": 1.3125,
|
||
|
|
"loss/dpo": 0.47265625,
|
||
|
|
"rewards/accuracies": 0.78125,
|
||
|
|
"rewards/chosen": -0.62109375,
|
||
|
|
"rewards/margins": 0.9140625,
|
||
|
|
"rewards/rejected": -1.53125,
|
||
|
|
"step": 204
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9070796460176991,
|
||
|
|
"grad_norm": 137.96900939941406,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.50390625,
|
||
|
|
"logits/rejected": -0.7109375,
|
||
|
|
"logps/chosen": -330.0,
|
||
|
|
"logps/rejected": -432.0,
|
||
|
|
"loss": 0.5061,
|
||
|
|
"loss/chosen-sft": 1.3359375,
|
||
|
|
"loss/dpo": 0.470703125,
|
||
|
|
"rewards/accuracies": 0.78125,
|
||
|
|
"rewards/chosen": -0.7109375,
|
||
|
|
"rewards/margins": 0.953125,
|
||
|
|
"rewards/rejected": -1.6640625,
|
||
|
|
"step": 205
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.911504424778761,
|
||
|
|
"grad_norm": 165.43182373046875,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.4609375,
|
||
|
|
"logits/rejected": -0.3828125,
|
||
|
|
"logps/chosen": -260.0,
|
||
|
|
"logps/rejected": -288.0,
|
||
|
|
"loss": 0.5105,
|
||
|
|
"loss/chosen-sft": 1.265625,
|
||
|
|
"loss/dpo": 0.5625,
|
||
|
|
"rewards/accuracies": 0.625,
|
||
|
|
"rewards/chosen": -0.609375,
|
||
|
|
"rewards/margins": 0.494140625,
|
||
|
|
"rewards/rejected": -1.109375,
|
||
|
|
"step": 206
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.915929203539823,
|
||
|
|
"grad_norm": 216.99935913085938,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.43359375,
|
||
|
|
"logits/rejected": -0.59375,
|
||
|
|
"logps/chosen": -308.0,
|
||
|
|
"logps/rejected": -344.0,
|
||
|
|
"loss": 0.4929,
|
||
|
|
"loss/chosen-sft": 1.3125,
|
||
|
|
"loss/dpo": 0.466796875,
|
||
|
|
"rewards/accuracies": 0.78125,
|
||
|
|
"rewards/chosen": -0.57421875,
|
||
|
|
"rewards/margins": 0.75390625,
|
||
|
|
"rewards/rejected": -1.328125,
|
||
|
|
"step": 207
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9203539823008849,
|
||
|
|
"grad_norm": 51.624351501464844,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.5234375,
|
||
|
|
"logits/rejected": -0.6328125,
|
||
|
|
"logps/chosen": -304.0,
|
||
|
|
"logps/rejected": -286.0,
|
||
|
|
"loss": 0.5669,
|
||
|
|
"loss/chosen-sft": 1.4140625,
|
||
|
|
"loss/dpo": 0.59375,
|
||
|
|
"rewards/accuracies": 0.65625,
|
||
|
|
"rewards/chosen": -0.60546875,
|
||
|
|
"rewards/margins": 0.314453125,
|
||
|
|
"rewards/rejected": -0.91796875,
|
||
|
|
"step": 208
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9247787610619469,
|
||
|
|
"grad_norm": 29.490407943725586,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.55859375,
|
||
|
|
"logits/rejected": -0.5703125,
|
||
|
|
"logps/chosen": -366.0,
|
||
|
|
"logps/rejected": -350.0,
|
||
|
|
"loss": 0.5042,
|
||
|
|
"loss/chosen-sft": 1.234375,
|
||
|
|
"loss/dpo": 0.46484375,
|
||
|
|
"rewards/accuracies": 0.875,
|
||
|
|
"rewards/chosen": -0.451171875,
|
||
|
|
"rewards/margins": 0.796875,
|
||
|
|
"rewards/rejected": -1.25,
|
||
|
|
"step": 209
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9292035398230089,
|
||
|
|
"grad_norm": 159.65997314453125,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.67578125,
|
||
|
|
"logits/rejected": -0.55859375,
|
||
|
|
"logps/chosen": -340.0,
|
||
|
|
"logps/rejected": -346.0,
|
||
|
|
"loss": 0.5374,
|
||
|
|
"loss/chosen-sft": 1.3125,
|
||
|
|
"loss/dpo": 0.474609375,
|
||
|
|
"rewards/accuracies": 0.71875,
|
||
|
|
"rewards/chosen": -0.6328125,
|
||
|
|
"rewards/margins": 0.83984375,
|
||
|
|
"rewards/rejected": -1.46875,
|
||
|
|
"step": 210
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9336283185840708,
|
||
|
|
"grad_norm": 22.276451110839844,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.41015625,
|
||
|
|
"logits/rejected": -0.46875,
|
||
|
|
"logps/chosen": -308.0,
|
||
|
|
"logps/rejected": -324.0,
|
||
|
|
"loss": 0.5066,
|
||
|
|
"loss/chosen-sft": 1.359375,
|
||
|
|
"loss/dpo": 0.57421875,
|
||
|
|
"rewards/accuracies": 0.75,
|
||
|
|
"rewards/chosen": -0.78515625,
|
||
|
|
"rewards/margins": 0.53515625,
|
||
|
|
"rewards/rejected": -1.3203125,
|
||
|
|
"step": 211
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9380530973451328,
|
||
|
|
"grad_norm": 62.60710525512695,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.68359375,
|
||
|
|
"logits/rejected": -0.76171875,
|
||
|
|
"logps/chosen": -378.0,
|
||
|
|
"logps/rejected": -396.0,
|
||
|
|
"loss": 0.4917,
|
||
|
|
"loss/chosen-sft": 1.5,
|
||
|
|
"loss/dpo": 0.578125,
|
||
|
|
"rewards/accuracies": 0.71875,
|
||
|
|
"rewards/chosen": -0.94921875,
|
||
|
|
"rewards/margins": 0.8984375,
|
||
|
|
"rewards/rejected": -1.8515625,
|
||
|
|
"step": 212
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9424778761061947,
|
||
|
|
"grad_norm": 183.3363037109375,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.62109375,
|
||
|
|
"logits/rejected": -0.71875,
|
||
|
|
"logps/chosen": -328.0,
|
||
|
|
"logps/rejected": -326.0,
|
||
|
|
"loss": 0.426,
|
||
|
|
"loss/chosen-sft": 1.375,
|
||
|
|
"loss/dpo": 0.47265625,
|
||
|
|
"rewards/accuracies": 0.6875,
|
||
|
|
"rewards/chosen": -0.80078125,
|
||
|
|
"rewards/margins": 0.8046875,
|
||
|
|
"rewards/rejected": -1.609375,
|
||
|
|
"step": 213
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9469026548672567,
|
||
|
|
"grad_norm": 182.4455108642578,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.62890625,
|
||
|
|
"logits/rejected": -0.57421875,
|
||
|
|
"logps/chosen": -358.0,
|
||
|
|
"logps/rejected": -480.0,
|
||
|
|
"loss": 0.5305,
|
||
|
|
"loss/chosen-sft": 1.4765625,
|
||
|
|
"loss/dpo": 0.5234375,
|
||
|
|
"rewards/accuracies": 0.8125,
|
||
|
|
"rewards/chosen": -0.80078125,
|
||
|
|
"rewards/margins": 1.609375,
|
||
|
|
"rewards/rejected": -2.40625,
|
||
|
|
"step": 214
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9513274336283186,
|
||
|
|
"grad_norm": 112.33076477050781,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.52734375,
|
||
|
|
"logits/rejected": -0.58203125,
|
||
|
|
"logps/chosen": -344.0,
|
||
|
|
"logps/rejected": -448.0,
|
||
|
|
"loss": 0.4475,
|
||
|
|
"loss/chosen-sft": 1.3359375,
|
||
|
|
"loss/dpo": 0.3359375,
|
||
|
|
"rewards/accuracies": 0.90625,
|
||
|
|
"rewards/chosen": -0.61328125,
|
||
|
|
"rewards/margins": 1.4453125,
|
||
|
|
"rewards/rejected": -2.0625,
|
||
|
|
"step": 215
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9557522123893806,
|
||
|
|
"grad_norm": 125.87716674804688,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.4296875,
|
||
|
|
"logits/rejected": -0.47265625,
|
||
|
|
"logps/chosen": -314.0,
|
||
|
|
"logps/rejected": -462.0,
|
||
|
|
"loss": 0.4885,
|
||
|
|
"loss/chosen-sft": 1.5078125,
|
||
|
|
"loss/dpo": 0.365234375,
|
||
|
|
"rewards/accuracies": 0.875,
|
||
|
|
"rewards/chosen": -0.84375,
|
||
|
|
"rewards/margins": 1.5546875,
|
||
|
|
"rewards/rejected": -2.390625,
|
||
|
|
"step": 216
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9601769911504425,
|
||
|
|
"grad_norm": 159.3242950439453,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.53515625,
|
||
|
|
"logits/rejected": -0.53125,
|
||
|
|
"logps/chosen": -330.0,
|
||
|
|
"logps/rejected": -398.0,
|
||
|
|
"loss": 0.4597,
|
||
|
|
"loss/chosen-sft": 1.34375,
|
||
|
|
"loss/dpo": 0.4140625,
|
||
|
|
"rewards/accuracies": 0.8125,
|
||
|
|
"rewards/chosen": -0.765625,
|
||
|
|
"rewards/margins": 1.171875,
|
||
|
|
"rewards/rejected": -1.9453125,
|
||
|
|
"step": 217
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9646017699115044,
|
||
|
|
"grad_norm": 36.168601989746094,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.451171875,
|
||
|
|
"logits/rejected": -0.44140625,
|
||
|
|
"logps/chosen": -326.0,
|
||
|
|
"logps/rejected": -366.0,
|
||
|
|
"loss": 0.4858,
|
||
|
|
"loss/chosen-sft": 1.3203125,
|
||
|
|
"loss/dpo": 0.52734375,
|
||
|
|
"rewards/accuracies": 0.78125,
|
||
|
|
"rewards/chosen": -0.953125,
|
||
|
|
"rewards/margins": 0.94921875,
|
||
|
|
"rewards/rejected": -1.8984375,
|
||
|
|
"step": 218
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9690265486725663,
|
||
|
|
"grad_norm": 19.605365753173828,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.4921875,
|
||
|
|
"logits/rejected": -0.55859375,
|
||
|
|
"logps/chosen": -284.0,
|
||
|
|
"logps/rejected": -370.0,
|
||
|
|
"loss": 0.4822,
|
||
|
|
"loss/chosen-sft": 1.3046875,
|
||
|
|
"loss/dpo": 0.40234375,
|
||
|
|
"rewards/accuracies": 0.84375,
|
||
|
|
"rewards/chosen": -0.90234375,
|
||
|
|
"rewards/margins": 1.046875,
|
||
|
|
"rewards/rejected": -1.9453125,
|
||
|
|
"step": 219
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9734513274336283,
|
||
|
|
"grad_norm": 143.3219451904297,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.37890625,
|
||
|
|
"logits/rejected": -0.4765625,
|
||
|
|
"logps/chosen": -366.0,
|
||
|
|
"logps/rejected": -510.0,
|
||
|
|
"loss": 0.509,
|
||
|
|
"loss/chosen-sft": 1.515625,
|
||
|
|
"loss/dpo": 0.54296875,
|
||
|
|
"rewards/accuracies": 0.6875,
|
||
|
|
"rewards/chosen": -1.2734375,
|
||
|
|
"rewards/margins": 1.03125,
|
||
|
|
"rewards/rejected": -2.296875,
|
||
|
|
"step": 220
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9778761061946902,
|
||
|
|
"grad_norm": 268.8928527832031,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.392578125,
|
||
|
|
"logits/rejected": -0.30859375,
|
||
|
|
"logps/chosen": -249.0,
|
||
|
|
"logps/rejected": -414.0,
|
||
|
|
"loss": 0.5212,
|
||
|
|
"loss/chosen-sft": 1.3984375,
|
||
|
|
"loss/dpo": 0.404296875,
|
||
|
|
"rewards/accuracies": 0.875,
|
||
|
|
"rewards/chosen": -0.78125,
|
||
|
|
"rewards/margins": 1.53125,
|
||
|
|
"rewards/rejected": -2.3125,
|
||
|
|
"step": 221
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9823008849557522,
|
||
|
|
"grad_norm": 295.1648864746094,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.56640625,
|
||
|
|
"logits/rejected": -0.6328125,
|
||
|
|
"logps/chosen": -368.0,
|
||
|
|
"logps/rejected": -496.0,
|
||
|
|
"loss": 0.5183,
|
||
|
|
"loss/chosen-sft": 1.5,
|
||
|
|
"loss/dpo": 0.5,
|
||
|
|
"rewards/accuracies": 0.75,
|
||
|
|
"rewards/chosen": -1.265625,
|
||
|
|
"rewards/margins": 0.96875,
|
||
|
|
"rewards/rejected": -2.234375,
|
||
|
|
"step": 222
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9867256637168141,
|
||
|
|
"grad_norm": 21.13129425048828,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.498046875,
|
||
|
|
"logits/rejected": -0.6328125,
|
||
|
|
"logps/chosen": -346.0,
|
||
|
|
"logps/rejected": -374.0,
|
||
|
|
"loss": 0.4485,
|
||
|
|
"loss/chosen-sft": 1.5234375,
|
||
|
|
"loss/dpo": 0.5234375,
|
||
|
|
"rewards/accuracies": 0.75,
|
||
|
|
"rewards/chosen": -1.0390625,
|
||
|
|
"rewards/margins": 0.984375,
|
||
|
|
"rewards/rejected": -2.03125,
|
||
|
|
"step": 223
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.9911504424778761,
|
||
|
|
"grad_norm": 199.18679809570312,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.62890625,
|
||
|
|
"logits/rejected": -0.625,
|
||
|
|
"logps/chosen": -338.0,
|
||
|
|
"logps/rejected": -406.0,
|
||
|
|
"loss": 0.4338,
|
||
|
|
"loss/chosen-sft": 1.484375,
|
||
|
|
"loss/dpo": 0.5,
|
||
|
|
"rewards/accuracies": 0.75,
|
||
|
|
"rewards/chosen": -1.109375,
|
||
|
|
"rewards/margins": 0.94140625,
|
||
|
|
"rewards/rejected": -2.046875,
|
||
|
|
"step": 224
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 0.995575221238938,
|
||
|
|
"grad_norm": 238.7730255126953,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.478515625,
|
||
|
|
"logits/rejected": -0.30078125,
|
||
|
|
"logps/chosen": -300.0,
|
||
|
|
"logps/rejected": -416.0,
|
||
|
|
"loss": 0.4624,
|
||
|
|
"loss/chosen-sft": 1.4765625,
|
||
|
|
"loss/dpo": 0.451171875,
|
||
|
|
"rewards/accuracies": 0.78125,
|
||
|
|
"rewards/chosen": -0.9609375,
|
||
|
|
"rewards/margins": 1.2890625,
|
||
|
|
"rewards/rejected": -2.25,
|
||
|
|
"step": 225
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0,
|
||
|
|
"grad_norm": 94.32278442382812,
|
||
|
|
"learning_rate": 7e-07,
|
||
|
|
"logits/chosen": -0.482421875,
|
||
|
|
"logits/rejected": -0.55078125,
|
||
|
|
"logps/chosen": -312.0,
|
||
|
|
"logps/rejected": -424.0,
|
||
|
|
"loss": 0.4558,
|
||
|
|
"loss/chosen-sft": 1.359375,
|
||
|
|
"loss/dpo": 0.478515625,
|
||
|
|
"rewards/accuracies": 0.75,
|
||
|
|
"rewards/chosen": -0.828125,
|
||
|
|
"rewards/margins": 1.109375,
|
||
|
|
"rewards/rejected": -1.9296875,
|
||
|
|
"step": 226
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"epoch": 1.0,
|
||
|
|
"step": 226,
|
||
|
|
"total_flos": 0.0,
|
||
|
|
"train_loss": 0.5733826223727876,
|
||
|
|
"train_runtime": 2164.3688,
|
||
|
|
"train_samples_per_second": 26.647,
|
||
|
|
"train_steps_per_second": 0.104
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"logging_steps": 1,
|
||
|
|
"max_steps": 226,
|
||
|
|
"num_input_tokens_seen": 0,
|
||
|
|
"num_train_epochs": 1,
|
||
|
|
"save_steps": 1000,
|
||
|
|
"stateful_callbacks": {
|
||
|
|
"TrainerControl": {
|
||
|
|
"args": {
|
||
|
|
"should_epoch_stop": false,
|
||
|
|
"should_evaluate": false,
|
||
|
|
"should_log": false,
|
||
|
|
"should_save": true,
|
||
|
|
"should_training_stop": true
|
||
|
|
},
|
||
|
|
"attributes": {}
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"total_flos": 0.0,
|
||
|
|
"train_batch_size": 32,
|
||
|
|
"trial_name": null,
|
||
|
|
"trial_params": null
|
||
|
|
}
|