Files
qwen3-8b-base-epsilon-dpo-h…/all_results.json
ModelHub XC 42e9e465a7 初始化项目,由ModelHub XC社区提供模型
Model: jackf857/qwen3-8b-base-epsilon-dpo-hh-harmless-4xh200-batch-64-20260424-040415
Source: Original Platform
2026-05-14 00:59:58 +08:00

26 lines
987 B
JSON

{
"epoch": 0.999244142101285,
"eval_kl/n_epsilon_steps": 0.26716548204421997,
"eval_kl/p_epsilon_steps": 0.7293133735656738,
"eval_logits/chosen": -1.3011770248413086,
"eval_logits/rejected": -1.4259974956512451,
"eval_logps/chosen": -150.5532684326172,
"eval_logps/ref_chosen": -86.90177917480469,
"eval_logps/ref_rejected": -96.69639587402344,
"eval_logps/rejected": -199.7850799560547,
"eval_loss": 0.5832020044326782,
"eval_rewards/accuracies": 0.7341549396514893,
"eval_rewards/chosen": -0.49299976229667664,
"eval_rewards/margins": 0.3015401065349579,
"eval_rewards/rejected": -0.7945398688316345,
"eval_runtime": 46.7622,
"eval_samples": 2303,
"eval_samples_per_second": 49.249,
"eval_steps_per_second": 1.54,
"total_flos": 0.0,
"train_loss": 1.1575256359739492,
"train_runtime": 3253.0214,
"train_samples": 42336,
"train_samples_per_second": 13.014,
"train_steps_per_second": 0.203
}