初始化项目,由ModelHub XC社区提供模型
Model: Jihyung803/Qwen3-8B-SOCIALIQA-DPO Source: Original Platform
This commit is contained in:
93
dpo_run_config.json
Normal file
93
dpo_run_config.json
Normal file
@@ -0,0 +1,93 @@
|
||||
{
|
||||
"model_name_or_path": "Qwen/Qwen3-8B",
|
||||
"ref_model_name_or_path": "Qwen/Qwen3-8B",
|
||||
"train_dataset_path": "../data/SOCIALIQA/train_dpo.jsonl",
|
||||
"eval_dataset_path": "",
|
||||
"split_mode": "train_only(val_ratio=0.0500)",
|
||||
"val_ratio": 0.05,
|
||||
"learning_rate": 5e-07,
|
||||
"beta": 0.1,
|
||||
"epochs": 1.0,
|
||||
"warmup_steps": 150,
|
||||
"max_length": 512,
|
||||
"max_prompt_length": 384,
|
||||
"global_batch_size": 16,
|
||||
"per_device_train_batch_size": 1,
|
||||
"gradient_accumulation_steps": 16,
|
||||
"optimizer": "rmsprop",
|
||||
"freeze": {
|
||||
"stack_name": "model.layers",
|
||||
"num_layers": 36,
|
||||
"selected_layer_indices_0based": [
|
||||
8,
|
||||
9,
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
15,
|
||||
16,
|
||||
17,
|
||||
18,
|
||||
19,
|
||||
20,
|
||||
21,
|
||||
22,
|
||||
23,
|
||||
24,
|
||||
25,
|
||||
26,
|
||||
27,
|
||||
28,
|
||||
29,
|
||||
30,
|
||||
31,
|
||||
32,
|
||||
33,
|
||||
34,
|
||||
35
|
||||
],
|
||||
"selected_layer_indices_1based": [
|
||||
9,
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
15,
|
||||
16,
|
||||
17,
|
||||
18,
|
||||
19,
|
||||
20,
|
||||
21,
|
||||
22,
|
||||
23,
|
||||
24,
|
||||
25,
|
||||
26,
|
||||
27,
|
||||
28,
|
||||
29,
|
||||
30,
|
||||
31,
|
||||
32,
|
||||
33,
|
||||
34,
|
||||
35,
|
||||
36
|
||||
],
|
||||
"final_norm_modules": [],
|
||||
"lm_head_enabled": false,
|
||||
"trainable_params": 5402500096,
|
||||
"total_params": 8190735360,
|
||||
"trainable_ratio": 0.6595866986966112
|
||||
},
|
||||
"num_train_examples": 31740,
|
||||
"num_eval_examples": 1670,
|
||||
"precompute_ref_log_probs": true,
|
||||
"precompute_ref_batch_size": 1,
|
||||
"use_logits_to_keep": true,
|
||||
"torch_empty_cache_steps": 0
|
||||
}
|
||||
Reference in New Issue
Block a user