初始化项目,由ModelHub XC社区提供模型
Model: openbmb/BitCPM-CANN-3B-unquantized Source: Original Platform
This commit is contained in:
38
example/run.sh
Normal file
38
example/run.sh
Normal file
@@ -0,0 +1,38 @@
|
||||
#!/bin/bash
|
||||
|
||||
MODEL_PATH="/model/BitCPM-CANN-1B-unquantized"
|
||||
DATA_PATH="/dataset/c4-pro/data/000_1_7.parquet"
|
||||
OUTPUT_DIR="./output"
|
||||
DS_CONFIG="./ds_config_z2.json"
|
||||
|
||||
NUM_GPUS=8
|
||||
BATCH_SIZE_PER_GPU=8
|
||||
GRAD_ACCUM_STEPS=8
|
||||
MAX_SEQ_LENGTH=1024
|
||||
|
||||
export ASCEND_RT_VISIBLE_DEVICES=8,9,10,11,12,13,14,15
|
||||
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
|
||||
export DS_SKIP_CUDA_CHECK=1
|
||||
torchrun --nproc_per_node=$NUM_GPUS train.py \
|
||||
--model_name_or_path $MODEL_PATH \
|
||||
--data_path $DATA_PATH \
|
||||
--max_seq_length $MAX_SEQ_LENGTH \
|
||||
--output_dir $OUTPUT_DIR \
|
||||
--per_device_train_batch_size $BATCH_SIZE_PER_GPU \
|
||||
--gradient_accumulation_steps $GRAD_ACCUM_STEPS \
|
||||
--max_steps 100 \
|
||||
--learning_rate 4e-5 \
|
||||
--lr_scheduler_type cosine \
|
||||
--warmup_ratio 0.1 \
|
||||
--weight_decay 1e-2 \
|
||||
--logging_steps 2 \
|
||||
--save_steps 500 \
|
||||
--save_total_limit 3 \
|
||||
--bf16 \
|
||||
--deepspeed $DS_CONFIG \
|
||||
--gradient_checkpointing \
|
||||
--seed 42 \
|
||||
--dataloader_num_workers 4 \
|
||||
--report_to tensorboard \
|
||||
--logging_dir /data/tensorboard/pretrain \
|
||||
--gradient_checkpointing_kwargs '{"use_reentrant": false}'
|
||||
Reference in New Issue
Block a user