初始化项目,由ModelHub XC社区提供模型
Model: sonthenguyen/OpenHermes-2.5-Mistral-7B-mt-bench-DPO Source: Original Platform
This commit is contained in:
26
README.md
Normal file
26
README.md
Normal file
@@ -0,0 +1,26 @@
|
||||
---
|
||||
license: apache-2.0
|
||||
---
|
||||
|
||||
Training hyperparameters
|
||||
LoRA:
|
||||
r=16
|
||||
lora_alpha=16
|
||||
lora_dropout=0.05
|
||||
bias="none"
|
||||
task_type="CAUSAL_LM"
|
||||
target_modules=['k_proj', 'gate_proj', 'v_proj', 'up_proj', 'q_proj', 'o_proj', 'down_proj']
|
||||
|
||||
Training arguments:
|
||||
auto_find_batch_size=True
|
||||
gradient_checkpointing=True
|
||||
learning_rate=5e-7
|
||||
lr_scheduler_type="cosine"
|
||||
max_steps=3922
|
||||
optim="paged_adamw_32bit"
|
||||
warmup_steps=100
|
||||
|
||||
DPOTrainer:
|
||||
beta=0.1
|
||||
max_prompt_length=1024
|
||||
max_length=1536
|
||||
Reference in New Issue
Block a user