初始化项目,由ModelHub XC社区提供模型
Model: QpiEImitation/gkd_gsm8k_S-Qwen2.5-3B-Instruct_T-Qwen2-7B-Instruct Source: Original Platform
This commit is contained in:
20
run_config.json
Normal file
20
run_config.json
Normal file
@@ -0,0 +1,20 @@
|
||||
{
|
||||
"method": "gkd",
|
||||
"student_model": "Qwen/Qwen2.5-3B-Instruct",
|
||||
"teacher_model": "Qwen/Qwen2-7B-Instruct",
|
||||
"dataset": "gsm8k/main",
|
||||
"train_samples": 2000,
|
||||
"eval_samples": 256,
|
||||
"beta": 0.5,
|
||||
"max_steps": 800,
|
||||
"num_epochs": 1.0,
|
||||
"lr": 2e-06,
|
||||
"batch_size": 1,
|
||||
"grad_accum": 8,
|
||||
"output_dir": "gkd_gsm8k_S-Qwen2.5-3B-Instruct_T-Qwen2-7B-Instruct_20260420_154428",
|
||||
"use_lora": false,
|
||||
"lora_r": null,
|
||||
"lora_alpha": null,
|
||||
"lora_dropout": null,
|
||||
"lora_target_modules": null
|
||||
}
|
||||
Reference in New Issue
Block a user