From 6a4a7ba9da32e2961c10372e21cbcb91daaa6235 Mon Sep 17 00:00:00 2001 From: Huseyin ABANOZ Date: Wed, 22 Nov 2023 16:38:10 +0000 Subject: [PATCH] Create README.md --- README.md | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..eccb5a8 --- /dev/null +++ b/README.md @@ -0,0 +1,62 @@ +--- +license: apache-2.0 +datasets: +- habanoz/airoboros-3.1-no-mathjson-max-1k +language: +- en +pipeline_tag: text-generation +--- + +TinyLlama-1.1B-intermediate-step-715k-1.5T finetuned using airoboros-3.1-no-mathjson-max-1k dataset. + +Qlora is used. Adapter is merged. + +SFT code: +https://github.com/habanoz/qlora.git + +Command used: +```bash +accelerate launch $BASE_DIR/qlora/train.py \ + --model_name_or_path $BASE_MODEL \ + --working_dir $BASE_DIR/$OUTPUT_NAME-checkpoints \ + --output_dir $BASE_DIR/$OUTPUT_NAME-peft \ + --merged_output_dir $BASE_DIR/$OUTPUT_NAME \ + --final_output_dir $BASE_DIR/$OUTPUT_NAME-final \ + --num_train_epochs 1 \ + --logging_steps 1 \ + --save_strategy steps \ + --save_steps 120 \ + --save_total_limit 2 \ + --data_seed 11422 \ + --evaluation_strategy steps \ + --per_device_eval_batch_size 4 \ + --eval_dataset_size 0.01 \ + --eval_steps 120 \ + --max_new_tokens 1024 \ + --dataloader_num_workers 3 \ + --logging_strategy steps \ + --do_train \ + --do_eval \ + --lora_r 64 \ + --lora_alpha 16 \ + --lora_modules all \ + --bits 4 \ + --double_quant \ + --quant_type nf4 \ + --lr_scheduler_type constant \ + --dataset habanoz/airoboros-3.1-no-mathjson-max-1k \ + --dataset_format airoboros_chat \ + --model_max_len 1024 \ + --per_device_train_batch_size 1 \ + --gradient_accumulation_steps 16 \ + --learning_rate 1e-5 \ + --adam_beta2 0.999 \ + --max_grad_norm 0.3 \ + --lora_dropout 0.0 \ + --weight_decay 0.0 \ + --seed 11422 \ + --gradient_checkpointing False \ + --use_flash_attention_2 \ + --ddp_find_unused_parameters False \ + --trust_remote_code True + ``` \ No newline at end of file