38 lines
1.1 KiB
Bash
38 lines
1.1 KiB
Bash
|
|
export HCCL_IF_IP=2.0.0.0
|
||
|
|
export GLOO_SOCKET_IFNAME="enp189s0f0"
|
||
|
|
export TP_SOCKET_IFNAME="enp189s0f0"
|
||
|
|
export HCCL_SOCKET_IFNAME="enp189s0f0"
|
||
|
|
|
||
|
|
export OMP_PROC_BIND=false
|
||
|
|
export OMP_NUM_THREADS=100
|
||
|
|
|
||
|
|
export VLLM_USE_V1=0
|
||
|
|
|
||
|
|
export ASCEND_RT_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
|
||
|
|
|
||
|
|
|
||
|
|
vllm serve deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B \
|
||
|
|
--host 0.0.0.0 \
|
||
|
|
--port 20002 \
|
||
|
|
--tensor-parallel-size 8 \
|
||
|
|
--seed 1024 \
|
||
|
|
--served-model-name deepseek \
|
||
|
|
--max-model-len 2000 \
|
||
|
|
--max-num-batched-tokens 2000 \
|
||
|
|
--trust-remote-code \
|
||
|
|
--gpu-memory-utilization 0.9 \
|
||
|
|
--kv-transfer-config \
|
||
|
|
'{"kv_connector": "AscendSimpleConnector",
|
||
|
|
"kv_buffer_device": "npu",
|
||
|
|
"kv_role": "kv_consumer",
|
||
|
|
"kv_parallel_size": 8,
|
||
|
|
"kv_port":"21001",
|
||
|
|
"kv_connector_extra_config":
|
||
|
|
{"prompt_device_ips": ["1.2.3.1", "1.2.3.2", "1.2.3.3", "1.2.3.4", "1.2.3.5", "1.2.3.6", "1.2.3.7", "1.2.3.8"],
|
||
|
|
"decode_device_ips": ["1.2.3.9", "1.2.3.10", "1.2.3.11", "1.2.3.12", "1.2.3.13", "1.2.3.14", "1.2.3.15", "1.2.3.16"],
|
||
|
|
"llmdatadist_comm_port": 26000,
|
||
|
|
"proxy_ip":"3.0.0.0",
|
||
|
|
"proxy_port":"30001",
|
||
|
|
"http_port": 10002}
|
||
|
|
}'
|