diff --git a/README.md b/README.md index eccb5a8..63d2f38 100644 --- a/README.md +++ b/README.md @@ -25,13 +25,13 @@ accelerate launch $BASE_DIR/qlora/train.py \ --num_train_epochs 1 \ --logging_steps 1 \ --save_strategy steps \ - --save_steps 120 \ + --save_steps 75 \ --save_total_limit 2 \ --data_seed 11422 \ --evaluation_strategy steps \ --per_device_eval_batch_size 4 \ --eval_dataset_size 0.01 \ - --eval_steps 120 \ + --eval_steps 75 \ --max_new_tokens 1024 \ --dataloader_num_workers 3 \ --logging_strategy steps \ @@ -47,16 +47,15 @@ accelerate launch $BASE_DIR/qlora/train.py \ --dataset habanoz/airoboros-3.1-no-mathjson-max-1k \ --dataset_format airoboros_chat \ --model_max_len 1024 \ - --per_device_train_batch_size 1 \ - --gradient_accumulation_steps 16 \ + --per_device_train_batch_size 4 \ + --gradient_accumulation_steps 4 \ --learning_rate 1e-5 \ --adam_beta2 0.999 \ --max_grad_norm 0.3 \ --lora_dropout 0.0 \ --weight_decay 0.0 \ --seed 11422 \ - --gradient_checkpointing False \ + --gradient_checkpointing \ --use_flash_attention_2 \ - --ddp_find_unused_parameters False \ - --trust_remote_code True + --ddp_find_unused_parameters False ``` \ No newline at end of file