Files
qwen3-8b-base-epsilon-dpo-u…/eval_results.json
ModelHub XC 163929230f 初始化项目,由ModelHub XC社区提供模型
Model: jackf857/qwen3-8b-base-epsilon-dpo-ultrafeedback-4xh200-batch-128
Source: Original Platform
2026-06-12 14:32:41 +08:00

26 lines
1.1 KiB
JSON

{
"epoch": 0.9989528795811519,
"eval_epsilon_dpo/beta": 0.001474155462346971,
"eval_epsilon_dpo/beta_margin_grad_mean": -0.47817176580429077,
"eval_epsilon_dpo/beta_margin_grad_std": 0.03784441202878952,
"eval_epsilon_dpo/beta_margin_mean": 0.08784138411283493,
"eval_epsilon_dpo/beta_margin_std": 0.15235216915607452,
"eval_epsilon_dpo/loss_margin_mean": 60.283775329589844,
"eval_kl/n_epsilon_steps": 0.28600001335144043,
"eval_kl/p_epsilon_steps": 0.7074999809265137,
"eval_logits/chosen": 1.5632102489471436,
"eval_logits/rejected": 1.9477818012237549,
"eval_logps/chosen": -340.5993957519531,
"eval_logps/ref_chosen": -280.4282531738281,
"eval_logps/ref_rejected": -264.7044677734375,
"eval_logps/rejected": -385.1593933105469,
"eval_loss": 0.653335452079773,
"eval_rewards/accuracies": 0.7164999842643738,
"eval_rewards/chosen": -0.08922950178384781,
"eval_rewards/margins": 0.08784138411283493,
"eval_rewards/rejected": -0.17707087099552155,
"eval_runtime": 103.2584,
"eval_samples": 2000,
"eval_samples_per_second": 19.369,
"eval_steps_per_second": 1.211
}