{ "method": "gkd", "student_model": "Qwen/Qwen2-1.5B-Instruct", "teacher_model": "Qwen/Qwen2-7B-Instruct", "dataset": "gsm8k/main", "train_samples": 2000, "eval_samples": 256, "beta": 0.5, "max_steps": 800, "num_epochs": 1.0, "lr": 2e-06, "batch_size": 1, "grad_accum": 8, "output_dir": "gkd_gsm8k_S-Qwen2-1.5B-Instruct_T-Qwen2-7B-Instruct_20260420_154424", "use_lora": false, "lora_r": null, "lora_alpha": null, "lora_dropout": null, "lora_target_modules": null }