Files
xc-llm-ascend/examples/run_dp_server.sh

30 lines
789 B
Bash
Raw Normal View History

export HCCL_IF_IP=2.0.0.0
export GLOO_SOCKET_IFNAME="enp189s0f0"
export TP_SOCKET_IFNAME="enp189s0f0"
export HCCL_SOCKET_IFNAME="enp189s0f0"
export OMP_PROC_BIND=false
export OMP_NUM_THREADS=100
export VLLM_USE_V1=0
export ASCEND_RT_VISIBLE_DEVICES=0,1
export VLLM_DP_SIZE=2
export VLLM_DP_RANK=0
export VLLM_DP_MASTER_IP="2.0.0.0"
export VLLM_DP_MASTER_PORT=40001
export VLLM_DP_PROXY_IP="2.0.0.0"
export VLLM_DP_PROXY_PORT=30002
export VLLM_DP_MONITOR_PORT=30003
export VLLM_HTTP_PORT=20001
vllm serve /data/weights/Qwen2.5-0.5B-Instruct \
--host 0.0.0.0 \
--port 20001 \
--tensor-parallel-size 1 \
--seed 1024 \
--served-model-name Qwen \
--max-model-len 2000 \
--max-num-batched-tokens 2000 \
--trust-remote-code \
--gpu-memory-utilization 0.9 \