Qwen3-8B-SOCIALIQA-DPO/dpo_run_config.json

{
  "model_name_or_path": "Qwen/Qwen3-8B",
  "ref_model_name_or_path": "Qwen/Qwen3-8B",
  "train_dataset_path": "../data/SOCIALIQA/train_dpo.jsonl",
  "eval_dataset_path": "",
  "split_mode": "train_only(val_ratio=0.0500)",
  "val_ratio": 0.05,
  "learning_rate": 5e-07,
  "beta": 0.1,
  "epochs": 1.0,
  "warmup_steps": 150,
  "max_length": 512,
  "max_prompt_length": 384,
  "global_batch_size": 16,
  "per_device_train_batch_size": 1,
  "gradient_accumulation_steps": 16,
  "optimizer": "rmsprop",
  "freeze": {
    "stack_name": "model.layers",
    "num_layers": 36,
    "selected_layer_indices_0based": [
      8,
      9,
      10,
      11,
      12,
      13,
      14,
      15,
      16,
      17,
      18,
      19,
      20,
      21,
      22,
      23,
      24,
      25,
      26,
      27,
      28,
      29,
      30,
      31,
      32,
      33,
      34,
      35
    ],
    "selected_layer_indices_1based": [
      9,
      10,
      11,
      12,
      13,
      14,
      15,
      16,
      17,
      18,
      19,
      20,
      21,
      22,
      23,
      24,
      25,
      26,
      27,
      28,
      29,
      30,
      31,
      32,
      33,
      34,
      35,
      36
    ],
    "final_norm_modules": [],
    "lm_head_enabled": false,
    "trainable_params": 5402500096,
    "total_params": 8190735360,
    "trainable_ratio": 0.6595866986966112
  },
  "num_train_examples": 31740,
  "num_eval_examples": 1670,
  "precompute_ref_log_probs": true,
  "precompute_ref_batch_size": 1,
  "use_logits_to_keep": true,
  "torch_empty_cache_steps": 0
}