Model: jackf857/qwen3-8b-base-epsilon-dpo-ultrafeedback-4xh200-batch-128 Source: Original Platform
26 lines
1.1 KiB
JSON
26 lines
1.1 KiB
JSON
{
|
|
"epoch": 0.9989528795811519,
|
|
"eval_epsilon_dpo/beta": 0.001474155462346971,
|
|
"eval_epsilon_dpo/beta_margin_grad_mean": -0.47817176580429077,
|
|
"eval_epsilon_dpo/beta_margin_grad_std": 0.03784441202878952,
|
|
"eval_epsilon_dpo/beta_margin_mean": 0.08784138411283493,
|
|
"eval_epsilon_dpo/beta_margin_std": 0.15235216915607452,
|
|
"eval_epsilon_dpo/loss_margin_mean": 60.283775329589844,
|
|
"eval_kl/n_epsilon_steps": 0.28600001335144043,
|
|
"eval_kl/p_epsilon_steps": 0.7074999809265137,
|
|
"eval_logits/chosen": 1.5632102489471436,
|
|
"eval_logits/rejected": 1.9477818012237549,
|
|
"eval_logps/chosen": -340.5993957519531,
|
|
"eval_logps/ref_chosen": -280.4282531738281,
|
|
"eval_logps/ref_rejected": -264.7044677734375,
|
|
"eval_logps/rejected": -385.1593933105469,
|
|
"eval_loss": 0.653335452079773,
|
|
"eval_rewards/accuracies": 0.7164999842643738,
|
|
"eval_rewards/chosen": -0.08922950178384781,
|
|
"eval_rewards/margins": 0.08784138411283493,
|
|
"eval_rewards/rejected": -0.17707087099552155,
|
|
"eval_runtime": 103.2584,
|
|
"eval_samples": 2000,
|
|
"eval_samples_per_second": 19.369,
|
|
"eval_steps_per_second": 1.211
|
|
} |