初始化项目,由ModelHub XC社区提供模型
Model: ali-elganzory/1.7b-Comma0.1-300BT-longsft_16k-DPO-Tulu3-decontaminated Source: Original Platform
This commit is contained in:
52
config.yaml
Normal file
52
config.yaml
Normal file
@@ -0,0 +1,52 @@
|
||||
model_name_or_path: ali-elganzory/1.7b-Comma0.1-300BT-longsft_16k-SFT-Tulu3-decontaminated
|
||||
dtype: bfloat16
|
||||
attn_implementation: flash_attention_2
|
||||
trust_remote_code: true
|
||||
chat_template: "\n{%- for message in messages -%}\n\t{%- if message[\"role\"] == \"\
|
||||
system\" -%}\n\t\t{{- \"<|system|>\n\" + message[\"content\"] + \"\n\" -}}\n\t{%-\
|
||||
\ elif message[\"role\"] == \"user\" -%}\n\t\t{{- \"<|user|>\n\" + message[\"content\"\
|
||||
] + \"\n\" -}}\n\t{%- elif message[\"role\"] == \"assistant\" -%}\n\t\t{%- if not\
|
||||
\ loop.last -%}\n\t\t\t{{- \"<|assistant|>\n\" + message[\"content\"] + eos_token\
|
||||
\ + \"\n\" -}}\n\t\t{%- else -%}\n\t\t\t{{- \"<|assistant|>\n\" + message[\"content\"\
|
||||
] + eos_token -}}\n\t\t{%- endif -%}\n\t{%- endif -%}\n\t{%- if loop.last and add_generation_prompt\
|
||||
\ -%}\n\t\t{{- \"<|assistant|>\n\" -}}\n\t{%- endif -%}\n{%- endfor -%}\n"
|
||||
additional_special_tokens:
|
||||
- <|system|>
|
||||
- <|user|>
|
||||
- <|assistant|>
|
||||
dataset_mixture:
|
||||
datasets:
|
||||
- id: ali-elganzory/llama-3.1-tulu-3-8b-preference-mixture-decontaminated
|
||||
config: default
|
||||
split: train
|
||||
columns:
|
||||
- chosen
|
||||
- rejected
|
||||
dataset_num_proc: 32
|
||||
remove_unused_columns: true
|
||||
eval_strategy: 'no'
|
||||
output_dir: /e/project1/reformo/ali/alignment-handbook/results/mv_exp/dpo/ali-elganzory-1.7b-Comma0.1-300BT-longsft_16k-SFT-Tulu3-decontaminated_tulu-3-8b-preference-mixture-decontaminated_GH200
|
||||
overwrite_output_dir: false
|
||||
save_strategy: steps
|
||||
save_steps: 200
|
||||
save_total_limit: 2
|
||||
log_level: info
|
||||
report_to:
|
||||
- wandb
|
||||
logging_steps: 10
|
||||
logging_strategy: steps
|
||||
seed: 42
|
||||
bf16: true
|
||||
gradient_checkpointing: true
|
||||
gradient_checkpointing_kwargs:
|
||||
use_reentrant: false
|
||||
learning_rate: 5.0e-07
|
||||
lr_scheduler_type: linear
|
||||
max_length: 2048
|
||||
beta: 5
|
||||
warmup_ratio: 0.1
|
||||
per_device_train_batch_size: 32
|
||||
gradient_accumulation_steps: 1
|
||||
num_train_epochs: 1
|
||||
hub_model_id: ali-elganzory/1.7b-Comma0.1-300BT-longsft_16k-DPO-Tulu3-decontaminated
|
||||
hub_strategy: every_save
|
||||
Reference in New Issue
Block a user