32 lines
1.3 KiB
JSON
32 lines
1.3 KiB
JSON
|
|
{
|
||
|
|
"epoch": 1.0,
|
||
|
|
"eval_epsilon_dpo/beta": 0.00137312151491642,
|
||
|
|
"eval_epsilon_dpo/beta_margin_grad_mean": -0.457266241312027,
|
||
|
|
"eval_epsilon_dpo/beta_margin_grad_std": 0.07400882244110107,
|
||
|
|
"eval_epsilon_dpo/beta_margin_mean": 0.17634743452072144,
|
||
|
|
"eval_epsilon_dpo/beta_margin_std": 0.3081098198890686,
|
||
|
|
"eval_epsilon_dpo/loss_margin_mean": 129.92251586914062,
|
||
|
|
"eval_kl/n_epsilon_steps": 0.3146404027938843,
|
||
|
|
"eval_kl/p_epsilon_steps": 0.6840753555297852,
|
||
|
|
"eval_logits/chosen": 1.162959337234497,
|
||
|
|
"eval_logits/rejected": 1.6319952011108398,
|
||
|
|
"eval_logps/chosen": -537.3916015625,
|
||
|
|
"eval_logps/ref_chosen": -79.05104064941406,
|
||
|
|
"eval_logps/ref_rejected": -86.79793548583984,
|
||
|
|
"eval_logps/rejected": -675.0609741210938,
|
||
|
|
"eval_loss": 0.621041476726532,
|
||
|
|
"eval_rewards/accuracies": 0.712756872177124,
|
||
|
|
"eval_rewards/chosen": -0.6303684711456299,
|
||
|
|
"eval_rewards/margins": 0.17634743452072144,
|
||
|
|
"eval_rewards/rejected": -0.8067159056663513,
|
||
|
|
"eval_runtime": 43.1088,
|
||
|
|
"eval_samples": 2339,
|
||
|
|
"eval_samples_per_second": 54.258,
|
||
|
|
"eval_steps_per_second": 1.717,
|
||
|
|
"total_flos": 0.0,
|
||
|
|
"train_loss": 0.8920053139482126,
|
||
|
|
"train_runtime": 3235.1211,
|
||
|
|
"train_samples": 43598,
|
||
|
|
"train_samples_per_second": 13.476,
|
||
|
|
"train_steps_per_second": 0.211
|
||
|
|
}
|