model_name_or_path: PleIAs/Baguettotron dtype: bfloat16 attn_implementation: flash_attention_2 trust_remote_code: true chat_template: "\n{%- for message in messages -%}\n\t{%- if message[\"role\"] == \"\ system\" -%}\n\t\t{{- \"<|system|>\n\" + message[\"content\"] + \"\n\" -}}\n\t{%-\ \ elif message[\"role\"] == \"user\" -%}\n\t\t{{- \"<|user|>\n\" + message[\"content\"\ ] + \"\n\" -}}\n\t{%- elif message[\"role\"] == \"assistant\" -%}\n\t\t{%- if not\ \ loop.last -%}\n\t\t\t{{- \"<|assistant|>\n\" + message[\"content\"] + eos_token\ \ + \"\n\" -}}\n\t\t{%- else -%}\n\t\t\t{{- \"<|assistant|>\n\" + message[\"content\"\ ] + eos_token -}}\n\t\t{%- endif -%}\n\t{%- endif -%}\n\t{%- if loop.last and add_generation_prompt\ \ -%}\n\t\t{{- \"<|assistant|>\n\" -}}\n\t{%- endif -%}\n{%- endfor -%}\n" additional_special_tokens: - <|system|> - <|user|> - <|assistant|> dataset_mixture: datasets: - id: ali-elganzory/tulu-3-sft-mixture-decontaminated config: default split: train columns: - messages dataset_num_proc: 32 eval_strategy: 'no' remove_unused_columns: true dataset_kwargs: add_special_tokens: false append_concat_token: false output_dir: /e/project1/reformo/ali/alignment-handbook/results/mv_exp/sft/PleIAs-Baguettotron_tulu-3-sft-mixture-decontaminated_GH200 overwrite_output_dir: false save_strategy: steps save_steps: 200 save_total_limit: 2 push_to_hub: false log_level: info report_to: - wandb logging_steps: 10 logging_strategy: steps seed: 42 bf16: true gradient_checkpointing: true learning_rate: 5.0e-06 lr_scheduler_type: linear max_length: 4096 warmup_ratio: 0.03 num_train_epochs: 2 per_device_train_batch_size: 16 gradient_accumulation_steps: 1 hub_model_id: ali-elganzory/Baguettotron-SFT-Tulu3-decontaminated hub_strategy: every_save