Files
1.7b-Comma0.1-300BT-longsft…/config.yaml
ModelHub XC 2be9b9147a 初始化项目,由ModelHub XC社区提供模型
Model: ali-elganzory/1.7b-Comma0.1-300BT-longsft_16k-DPO-Tulu3-decontaminated
Source: Original Platform
2026-04-27 17:30:08 +08:00

53 lines
1.9 KiB
YAML

model_name_or_path: ali-elganzory/1.7b-Comma0.1-300BT-longsft_16k-SFT-Tulu3-decontaminated
dtype: bfloat16
attn_implementation: flash_attention_2
trust_remote_code: true
chat_template: "\n{%- for message in messages -%}\n\t{%- if message[\"role\"] == \"\
system\" -%}\n\t\t{{- \"<|system|>\n\" + message[\"content\"] + \"\n\" -}}\n\t{%-\
\ elif message[\"role\"] == \"user\" -%}\n\t\t{{- \"<|user|>\n\" + message[\"content\"\
] + \"\n\" -}}\n\t{%- elif message[\"role\"] == \"assistant\" -%}\n\t\t{%- if not\
\ loop.last -%}\n\t\t\t{{- \"<|assistant|>\n\" + message[\"content\"] + eos_token\
\ + \"\n\" -}}\n\t\t{%- else -%}\n\t\t\t{{- \"<|assistant|>\n\" + message[\"content\"\
] + eos_token -}}\n\t\t{%- endif -%}\n\t{%- endif -%}\n\t{%- if loop.last and add_generation_prompt\
\ -%}\n\t\t{{- \"<|assistant|>\n\" -}}\n\t{%- endif -%}\n{%- endfor -%}\n"
additional_special_tokens:
- <|system|>
- <|user|>
- <|assistant|>
dataset_mixture:
datasets:
- id: ali-elganzory/llama-3.1-tulu-3-8b-preference-mixture-decontaminated
config: default
split: train
columns:
- chosen
- rejected
dataset_num_proc: 32
remove_unused_columns: true
eval_strategy: 'no'
output_dir: /e/project1/reformo/ali/alignment-handbook/results/mv_exp/dpo/ali-elganzory-1.7b-Comma0.1-300BT-longsft_16k-SFT-Tulu3-decontaminated_tulu-3-8b-preference-mixture-decontaminated_GH200
overwrite_output_dir: false
save_strategy: steps
save_steps: 200
save_total_limit: 2
log_level: info
report_to:
- wandb
logging_steps: 10
logging_strategy: steps
seed: 42
bf16: true
gradient_checkpointing: true
gradient_checkpointing_kwargs:
use_reentrant: false
learning_rate: 5.0e-07
lr_scheduler_type: linear
max_length: 2048
beta: 5
warmup_ratio: 0.1
per_device_train_batch_size: 32
gradient_accumulation_steps: 1
num_train_epochs: 1
hub_model_id: ali-elganzory/1.7b-Comma0.1-300BT-longsft_16k-DPO-Tulu3-decontaminated
hub_strategy: every_save