{ "method": "opd", "student_model": "Qwen/Qwen2-0.5B-Instruct", "teacher_model": "Qwen/Qwen2-7B-Instruct", "train_dataset": "EleutherAI/hendrycks_math/all", "eval_dataset": "HuggingFaceH4/MATH-500", "train_samples": 7500, "eval_samples": 500, "opd_mode": "expectation", "trust_region": true, "ppo_clip_eps": 0.2, "use_correction": false, "correction_alpha": 0.2, "correction_lr": 0.001, "num_inner_steps": 10, "replay_buffer_size": 10, "max_steps": 800, "num_epochs": 1.0, "lr": 2e-06, "batch_size": 1, "grad_accum": 8, "output_dir": "opd_math500_S-Qwen2-0.5B-Instruct_T-Qwen2-7B-Instruct_expectation_tr_L10_buf10_20260420_154437", "use_lora": false, "lora_r": null, "lora_alpha": null, "lora_dropout": null, "lora_target_modules": null }