[Doc] Upgrade env VLLM_ASCEND_ENABLE_FUSED_MC2 used in nightly test and tutorials (#8441)

### What this PR does / why we need it?
The env `VLLM_ASCEND_ENABLE_FUSED_MC2` should only enabled in the
decoder node during Prefill-Decode Disaggregation scenario

---------

Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
Li Wang
2026-04-20 22:39:23 +08:00
committed by GitHub
parent 3db5048d74
commit 36a0470de1
9 changed files with 10 additions and 21 deletions

View File

@@ -10,7 +10,6 @@ env_common:
OMP_PROC_BIND: false
OMP_NUM_THREADS: 1
VLLM_ASCEND_ENABLE_FLASHCOMM1: 1
VLLM_ASCEND_ENABLE_FUSED_MC2: 2
TASK_QUEUE_ENABLE: 1
SERVER_PORT: 8080
@@ -21,6 +20,9 @@ disaggregated_prefill:
deployment:
-
envs:
# should disable this in the prefiller node
VLLM_ASCEND_ENABLE_FUSED_MC2: 0
server_cmd: >
vllm serve "Qwen/Qwen3-235B-A22B"
--host 0.0.0.0
@@ -57,6 +59,8 @@ deployment:
}'
-
envs:
VLLM_ASCEND_ENABLE_FUSED_MC2: 2
server_cmd: >
vllm serve "Qwen/Qwen3-235B-A22B"
--host 0.0.0.0

View File

@@ -12,7 +12,6 @@ _envs: &envs
VLLM_ASCEND_BALANCE_SCHEDULING: "1"
VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE: "1"
VLLM_ASCEND_ENABLE_FLASHCOMM1: "1"
VLLM_ASCEND_ENABLE_FUSED_MC2: "1"
_server_cmd: &server_cmd
- "--enable-expert-parallel"

View File

@@ -10,7 +10,6 @@ _envs: &envs
HCCL_BUFFSIZE: "1536"
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
VLLM_ASCEND_ENABLE_FLASHCOMM1: "1"
VLLM_ASCEND_ENABLE_FUSED_MC2: "1"
VLLM_ASCEND_ENABLE_NZ: "2"
VLLM_ASCEND_BALANCE_SCHEDULING: "1"
SERVER_PORT: "DEFAULT_PORT"

View File

@@ -13,7 +13,6 @@ test_cases:
OMP_NUM_THREADS: "1"
TASK_QUEUE_ENABLE: "1"
SERVER_PORT: "DEFAULT_PORT"
VLLM_ASCEND_ENABLE_FUSED_MC2: "1"
VLLM_ASCEND_ENABLE_FLASHCOMM1: "1"
server_cmd:
- "--tensor-parallel-size"