opd_gsm8k_S-Qwen2-0.5B-Inst…/run_config.json

{
  "student_model": "Qwen/Qwen2-0.5B-Instruct",
  "teacher_model": "Qwen/Qwen2-7B-Instruct",
  "dataset": "gsm8k/main",
  "train_samples": 2000,
  "eval_samples": 256,
  "opd_mode": "expectation",
  "trust_region": true,
  "ppo_clip_eps": 0.2,
  "use_correction": false,
  "correction_alpha": 0.2,
  "correction_lr": 0.001,
  "num_inner_steps": 10,
  "replay_buffer_size": 10,
  "max_steps": 800,
  "num_epochs": 1.0,
  "lr": 2e-06,
  "batch_size": 1,
  "grad_accum": 8,
  "output_dir": "opd_gsm8k_S-Qwen2-0.5B-Instruct_T-Qwen2-7B-Instruct_expectation_tr_L10_buf10_20260420_154428",
  "use_lora": false,
  "lora_r": null,
  "lora_alpha": null,
  "lora_dropout": null,
  "lora_target_modules": null
}