{ "epoch": 0.9989528795811519, "eval_log_odds_chosen": 0.2855910658836365, "eval_log_odds_ratio": -0.6565974354743958, "eval_logits/chosen": -0.49791988730430603, "eval_logits/rejected": -0.48265013098716736, "eval_logps/chosen": -0.8755427598953247, "eval_logps/rejected": -1.060484766960144, "eval_loss": 1.2236413955688477, "eval_nll_loss": 1.2166308164596558, "eval_rewards/accuracies": 0.600806474685669, "eval_rewards/chosen": -0.008755426853895187, "eval_rewards/margins": 0.0018494179239496589, "eval_rewards/rejected": -0.010604845359921455, "eval_runtime": 44.1184, "eval_samples": 2000, "eval_samples_per_second": 45.333, "eval_steps_per_second": 1.428, "total_flos": 0.0, "train_loss": 5.24161350602124, "train_runtime": 5082.9537, "train_samples": 61135, "train_samples_per_second": 12.027, "train_steps_per_second": 0.094 }