Files
Qwen3-8B-SOCIALIQA-DPO/dpo_run_config.json
ModelHub XC 43a49d684d 初始化项目,由ModelHub XC社区提供模型
Model: Jihyung803/Qwen3-8B-SOCIALIQA-DPO
Source: Original Platform
2026-05-29 14:20:50 +08:00

94 lines
1.5 KiB
JSON

{
"model_name_or_path": "Qwen/Qwen3-8B",
"ref_model_name_or_path": "Qwen/Qwen3-8B",
"train_dataset_path": "../data/SOCIALIQA/train_dpo.jsonl",
"eval_dataset_path": "",
"split_mode": "train_only(val_ratio=0.0500)",
"val_ratio": 0.05,
"learning_rate": 5e-07,
"beta": 0.1,
"epochs": 1.0,
"warmup_steps": 150,
"max_length": 512,
"max_prompt_length": 384,
"global_batch_size": 16,
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 16,
"optimizer": "rmsprop",
"freeze": {
"stack_name": "model.layers",
"num_layers": 36,
"selected_layer_indices_0based": [
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35
],
"selected_layer_indices_1based": [
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36
],
"final_norm_modules": [],
"lm_head_enabled": false,
"trainable_params": 5402500096,
"total_params": 8190735360,
"trainable_ratio": 0.6595866986966112
},
"num_train_examples": 31740,
"num_eval_examples": 1670,
"precompute_ref_log_probs": true,
"precompute_ref_batch_size": 1,
"use_logits_to_keep": true,
"torch_empty_cache_steps": 0
}