model_name_or_path: ali-elganzory/1.7b-Comma0.1-300BT-longsft_16k-SFT-Tulu3-decontaminated dtype: bfloat16 attn_implementation: flash_attention_2 trust_remote_code: true chat_template: "\n{%- for message in messages -%}\n\t{%- if message[\"role\"] == \"\ system\" -%}\n\t\t{{- \"<|system|>\n\" + message[\"content\"] + \"\n\" -}}\n\t{%-\ \ elif message[\"role\"] == \"user\" -%}\n\t\t{{- \"<|user|>\n\" + message[\"content\"\ ] + \"\n\" -}}\n\t{%- elif message[\"role\"] == \"assistant\" -%}\n\t\t{%- if not\ \ loop.last -%}\n\t\t\t{{- \"<|assistant|>\n\" + message[\"content\"] + eos_token\ \ + \"\n\" -}}\n\t\t{%- else -%}\n\t\t\t{{- \"<|assistant|>\n\" + message[\"content\"\ ] + eos_token -}}\n\t\t{%- endif -%}\n\t{%- endif -%}\n\t{%- if loop.last and add_generation_prompt\ \ -%}\n\t\t{{- \"<|assistant|>\n\" -}}\n\t{%- endif -%}\n{%- endfor -%}\n" additional_special_tokens: - <|system|> - <|user|> - <|assistant|> dataset_mixture: datasets: - id: ali-elganzory/llama-3.1-tulu-3-8b-preference-mixture-decontaminated config: default split: train columns: - chosen - rejected dataset_num_proc: 32 remove_unused_columns: true eval_strategy: 'no' output_dir: /e/project1/reformo/ali/alignment-handbook/results/mv_exp/dpo/ali-elganzory-1.7b-Comma0.1-300BT-longsft_16k-SFT-Tulu3-decontaminated_tulu-3-8b-preference-mixture-decontaminated_GH200 overwrite_output_dir: false save_strategy: steps save_steps: 200 save_total_limit: 2 log_level: info report_to: - wandb logging_steps: 10 logging_strategy: steps seed: 42 bf16: true gradient_checkpointing: true gradient_checkpointing_kwargs: use_reentrant: false learning_rate: 5.0e-07 lr_scheduler_type: linear max_length: 2048 beta: 5 warmup_ratio: 0.1 per_device_train_batch_size: 32 gradient_accumulation_steps: 1 num_train_epochs: 1 hub_model_id: ali-elganzory/1.7b-Comma0.1-300BT-longsft_16k-DPO-Tulu3-decontaminated hub_strategy: every_save