Model: QpiEImitation/opd_gsm8k_S-Qwen2-0.5B-Instruct_T-Qwen2-7B-Instruct Source: Original Platform
26 lines
700 B
JSON
26 lines
700 B
JSON
{
|
|
"student_model": "Qwen/Qwen2-0.5B-Instruct",
|
|
"teacher_model": "Qwen/Qwen2-7B-Instruct",
|
|
"dataset": "gsm8k/main",
|
|
"train_samples": 2000,
|
|
"eval_samples": 256,
|
|
"opd_mode": "expectation",
|
|
"trust_region": true,
|
|
"ppo_clip_eps": 0.2,
|
|
"use_correction": false,
|
|
"correction_alpha": 0.2,
|
|
"correction_lr": 0.001,
|
|
"num_inner_steps": 10,
|
|
"replay_buffer_size": 10,
|
|
"max_steps": 800,
|
|
"num_epochs": 1.0,
|
|
"lr": 2e-06,
|
|
"batch_size": 1,
|
|
"grad_accum": 8,
|
|
"output_dir": "opd_gsm8k_S-Qwen2-0.5B-Instruct_T-Qwen2-7B-Instruct_expectation_tr_L10_buf10_20260420_154428",
|
|
"use_lora": false,
|
|
"lora_r": null,
|
|
"lora_alpha": null,
|
|
"lora_dropout": null,
|
|
"lora_target_modules": null
|
|
} |