Files
qwen3-8b-base-epsilon-dpo-h…/eval_results.json
ModelHub XC 3b8f28089f 初始化项目,由ModelHub XC社区提供模型
Model: jackf857/qwen3-8b-base-epsilon-dpo-hh-helpful-4xh200-batch-64-20260424-040306
Source: Original Platform
2026-05-14 01:00:02 +08:00

20 lines
772 B
JSON

{
"epoch": 1.0,
"eval_kl/n_epsilon_steps": 0.2517123222351074,
"eval_kl/p_epsilon_steps": 0.7478595972061157,
"eval_logits/chosen": -7.606959342956543,
"eval_logits/rejected": -7.186291217803955,
"eval_logps/chosen": -252.70166015625,
"eval_logps/ref_chosen": -100.49356842041016,
"eval_logps/ref_rejected": -94.06775665283203,
"eval_logps/rejected": -314.7497253417969,
"eval_loss": 0.6180921196937561,
"eval_rewards/accuracies": 0.7577054500579834,
"eval_rewards/chosen": -0.3851844370365143,
"eval_rewards/margins": 0.17150068283081055,
"eval_rewards/rejected": -0.5566850900650024,
"eval_runtime": 48.3953,
"eval_samples": 2339,
"eval_samples_per_second": 48.331,
"eval_steps_per_second": 1.529
}