diff --git a/docs/source/tutorials/DeepSeek-V3.1.md b/docs/source/tutorials/DeepSeek-V3.1.md index c1573189..fd172408 100644 --- a/docs/source/tutorials/DeepSeek-V3.1.md +++ b/docs/source/tutorials/DeepSeek-V3.1.md @@ -105,7 +105,7 @@ export GLOO_SOCKET_IFNAME=$nic_name export TP_SOCKET_IFNAME=$nic_name export HCCL_SOCKET_IFNAME=$nic_name export OMP_PROC_BIND=false -export OMP_NUM_THREADS=100 +export OMP_NUM_THREADS=10 export VLLM_USE_V1=1 export HCCL_BUFFSIZE=200 export VLLM_ASCEND_ENABLE_MLAPO=1 @@ -164,7 +164,7 @@ export GLOO_SOCKET_IFNAME=$nic_name export TP_SOCKET_IFNAME=$nic_name export HCCL_SOCKET_IFNAME=$nic_name export OMP_PROC_BIND=false -export OMP_NUM_THREADS=100 +export OMP_NUM_THREADS=10 export VLLM_USE_V1=1 export HCCL_BUFFSIZE=200 export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True @@ -220,7 +220,7 @@ export GLOO_SOCKET_IFNAME=$nic_name export TP_SOCKET_IFNAME=$nic_name export HCCL_SOCKET_IFNAME=$nic_name export OMP_PROC_BIND=false -export OMP_NUM_THREADS=100 +export OMP_NUM_THREADS=10 export HCCL_BUFFSIZE=200 export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True export VLLM_ASCEND_ENABLE_MLAPO=1 diff --git a/docs/source/tutorials/DeepSeek-V3.2-Exp.md b/docs/source/tutorials/DeepSeek-V3.2-Exp.md index 84a5863f..79518dc4 100644 --- a/docs/source/tutorials/DeepSeek-V3.2-Exp.md +++ b/docs/source/tutorials/DeepSeek-V3.2-Exp.md @@ -206,7 +206,7 @@ export GLOO_SOCKET_IFNAME=$nic_name export TP_SOCKET_IFNAME=$nic_name export HCCL_SOCKET_IFNAME=$nic_name export OMP_PROC_BIND=false -export OMP_NUM_THREADS=100 +export OMP_NUM_THREADS=10 export HCCL_BUFFSIZE=1024 vllm serve /root/.cache/Modelers_Park/DeepSeek-V3.2-Exp \ @@ -248,7 +248,7 @@ export GLOO_SOCKET_IFNAME=$nic_name export TP_SOCKET_IFNAME=$nic_name export HCCL_SOCKET_IFNAME=$nic_name export OMP_PROC_BIND=false -export OMP_NUM_THREADS=100 +export OMP_NUM_THREADS=10 export HCCL_BUFFSIZE=1024 vllm serve /root/.cache/Modelers_Park/DeepSeek-V3.2-Exp \ @@ -295,9 +295,8 @@ export GLOO_SOCKET_IFNAME=$nic_name export TP_SOCKET_IFNAME=$nic_name export HCCL_SOCKET_IFNAME=$nic_name export OMP_PROC_BIND=false -export OMP_NUM_THREADS=100 +export OMP_NUM_THREADS=10 export HCCL_BUFFSIZE=1024 -export HCCL_OP_EXPANSION_MODE="AIV" export PYTORCH_NPU_ALLOC_CONF="expandable_segments:True" vllm serve vllm-ascend/DeepSeek-V3.2-Exp-W8A8 \ @@ -340,9 +339,8 @@ export GLOO_SOCKET_IFNAME=$nic_name export TP_SOCKET_IFNAME=$nic_name export HCCL_SOCKET_IFNAME=$nic_name export OMP_PROC_BIND=false -export OMP_NUM_THREADS=100 +export OMP_NUM_THREADS=10 export HCCL_BUFFSIZE=1024 -export HCCL_OP_EXPANSION_MODE="AIV" export PYTORCH_NPU_ALLOC_CONF="expandable_segments:True" vllm serve vllm-ascend/DeepSeek-V3.2-Exp-W8A8 \ diff --git a/docs/source/tutorials/multi_node.md b/docs/source/tutorials/multi_node.md index d04fa090..fbfbbd74 100644 --- a/docs/source/tutorials/multi_node.md +++ b/docs/source/tutorials/multi_node.md @@ -114,7 +114,7 @@ export GLOO_SOCKET_IFNAME=$nic_name export TP_SOCKET_IFNAME=$nic_name export HCCL_SOCKET_IFNAME=$nic_name export OMP_PROC_BIND=false -export OMP_NUM_THREADS=100 +export OMP_NUM_THREADS=10 export HCCL_BUFFSIZE=1024 # The w8a8 weight can be obtained from https://www.modelscope.cn/models/vllm-ascend/DeepSeek-V3.1-W8A8 @@ -159,7 +159,7 @@ export GLOO_SOCKET_IFNAME=$nic_name export TP_SOCKET_IFNAME=$nic_name export HCCL_SOCKET_IFNAME=$nic_name export OMP_PROC_BIND=false -export OMP_NUM_THREADS=100 +export OMP_NUM_THREADS=10 export HCCL_BUFFSIZE=1024 vllm serve vllm-ascend/DeepSeek-V3.1-W8A8 \ diff --git a/docs/source/tutorials/multi_node_kimi.md b/docs/source/tutorials/multi_node_kimi.md index 84840cdf..d53ef09d 100644 --- a/docs/source/tutorials/multi_node_kimi.md +++ b/docs/source/tutorials/multi_node_kimi.md @@ -69,7 +69,7 @@ export GLOO_SOCKET_IFNAME=$nic_name export TP_SOCKET_IFNAME=$nic_name export HCCL_SOCKET_IFNAME=$nic_name export OMP_PROC_BIND=false -export OMP_NUM_THREADS=100 +export OMP_NUM_THREADS=10 export HCCL_BUFFSIZE=1024 # The w8a8 weight can be obtained from https://www.modelscope.cn/models/vllm-ascend/Kimi-K2-Instruct-W8A8 @@ -114,7 +114,7 @@ export GLOO_SOCKET_IFNAME=$nic_name export TP_SOCKET_IFNAME=$nic_name export HCCL_SOCKET_IFNAME=$nic_name export OMP_PROC_BIND=false -export OMP_NUM_THREADS=100 +export OMP_NUM_THREADS=10 export HCCL_BUFFSIZE=1024 vllm serve /home/cache/weights/Kimi-K2-Instruct-W8A8 \ diff --git a/docs/source/tutorials/multi_node_qwen3vl.md b/docs/source/tutorials/multi_node_qwen3vl.md index 033bc6dc..48763b70 100644 --- a/docs/source/tutorials/multi_node_qwen3vl.md +++ b/docs/source/tutorials/multi_node_qwen3vl.md @@ -69,7 +69,7 @@ export GLOO_SOCKET_IFNAME=$nic_name export TP_SOCKET_IFNAME=$nic_name export HCCL_SOCKET_IFNAME=$nic_name export OMP_PROC_BIND=false -export OMP_NUM_THREADS=100 +export OMP_NUM_THREADS=10 export HCCL_BUFFSIZE=1024 vllm serve Qwen/Qwen3-VL-235B-A22B-Instruct \ @@ -110,7 +110,7 @@ export GLOO_SOCKET_IFNAME=$nic_name export TP_SOCKET_IFNAME=$nic_name export HCCL_SOCKET_IFNAME=$nic_name export OMP_PROC_BIND=false -export OMP_NUM_THREADS=100 +export OMP_NUM_THREADS=10 export HCCL_BUFFSIZE=1024 vllm serve Qwen/Qwen3-VL-235B-A22B-Instruct \ diff --git a/examples/disaggregated_prefill_v1/README.md b/examples/disaggregated_prefill_v1/README.md index 83f5565e..c3724676 100644 --- a/examples/disaggregated_prefill_v1/README.md +++ b/examples/disaggregated_prefill_v1/README.md @@ -40,7 +40,7 @@ export TP_SOCKET_IFNAME="eth0" export HCCL_SOCKET_IFNAME="eth0" export DISAGGREGATED_PREFILL_RANK_TABLE_PATH=/vllm-workspace/vllm-ascend/examples/disaggregated_prefill_v1/ranktable.json export OMP_PROC_BIND=false -export OMP_NUM_THREADS=100 +export OMP_NUM_THREADS=10 export VLLM_ASCEND_LLMDD_RPC_PORT=5559 vllm serve /models/deepseek_r1_w8a8 \ @@ -80,7 +80,7 @@ export TP_SOCKET_IFNAME="eth0" export HCCL_SOCKET_IFNAME="eth0" export DISAGGREGATED_PREFILL_RANK_TABLE_PATH=/vllm-workspace/vllm-ascend/examples/disaggregated_prefill_v1/ranktable.json export OMP_PROC_BIND=false -export OMP_NUM_THREADS=100 +export OMP_NUM_THREADS=10 export VLLM_ASCEND_LLMDD_RPC_PORT=5659 vllm serve /models/deepseek_r1_w8a8 \ @@ -123,7 +123,7 @@ export TP_SOCKET_IFNAME="eth0" export HCCL_SOCKET_IFNAME="eth0" export DISAGGREGATED_PREFILL_RANK_TABLE_PATH=/vllm-workspace/vllm-ascend/examples/disaggregated_prefill_v1/ranktable.json export OMP_PROC_BIND=false -export OMP_NUM_THREADS=100 +export OMP_NUM_THREADS=10 export VLLM_ASCEND_LLMDD_RPC_PORT=5759 vllm serve /models/deepseek_r1_w8a8 \ @@ -164,7 +164,7 @@ export TP_SOCKET_IFNAME="eth0" export HCCL_SOCKET_IFNAME="eth0" export DISAGGREGATED_PREFILL_RANK_TABLE_PATH=/vllm-workspace/vllm-ascend/examples/disaggregated_prefill_v1/ranktable.json export OMP_PROC_BIND=false -export OMP_NUM_THREADS=100 +export OMP_NUM_THREADS=10 export VLLM_ASCEND_LLMDD_RPC_PORT=5859 vllm serve /models/deepseek_r1_w8a8 \ diff --git a/examples/disaggregated_prefill_v1/run_server.sh b/examples/disaggregated_prefill_v1/run_server.sh index 37833909..6d18f403 100644 --- a/examples/disaggregated_prefill_v1/run_server.sh +++ b/examples/disaggregated_prefill_v1/run_server.sh @@ -5,7 +5,7 @@ export HCCL_SOCKET_IFNAME="enp48s3u1u1" export DISAGGREGATED_PREFILL_RANK_TABLE_PATH=path-to-rank-table export OMP_PROC_BIND=false -export OMP_NUM_THREADS=100 +export OMP_NUM_THREADS=10 vllm serve model_path \ --host 0.0.0.0 \ diff --git a/examples/run_dp_server.sh b/examples/run_dp_server.sh index ec0cb686..c6ff7aa6 100644 --- a/examples/run_dp_server.sh +++ b/examples/run_dp_server.sh @@ -5,7 +5,7 @@ export TP_SOCKET_IFNAME="eth0" export HCCL_SOCKET_IFNAME="eth0" export OMP_PROC_BIND=false -export OMP_NUM_THREADS=100 +export OMP_NUM_THREADS=10 export VLLM_USE_MODELSCOPE=true