Files
qwen3-8b-base-new-dpo-hh-ha…/eval_results.json

17 lines
635 B
JSON
Raw Normal View History

{
"epoch": 0.999244142101285,
"eval_fcm_dpo/beta": 0.5098052024841309,
"eval_logits/chosen": 1.7105576992034912,
"eval_logits/rejected": 1.603922724723816,
"eval_logps/chosen": -82.0059585571289,
"eval_logps/ref_chosen": -86.90177917480469,
"eval_logps/ref_rejected": -96.69639587402344,
"eval_logps/rejected": -93.3553695678711,
"eval_loss": 0.54054856300354,
"eval_margin_dpo/margin_mean": 1.554817795753479,
"eval_margin_dpo/margin_std": 2.5823159217834473,
"eval_runtime": 42.2442,
"eval_samples": 2303,
"eval_samples_per_second": 54.516,
"eval_steps_per_second": 1.704
}