[Doc] Upgrade env VLLM_ASCEND_ENABLE_FUSED_MC2 used in nightly test and tutorials (#8441)

### What this PR does / why we need it?
The env `VLLM_ASCEND_ENABLE_FUSED_MC2` should only enabled in the
decoder node during Prefill-Decode Disaggregation scenario

---------

Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
Li Wang
2026-04-20 22:39:23 +08:00
committed by GitHub
parent 3db5048d74
commit 36a0470de1
9 changed files with 10 additions and 21 deletions

View File

@@ -12,7 +12,6 @@ _envs: &envs
VLLM_ASCEND_BALANCE_SCHEDULING: "1"
VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE: "1"
VLLM_ASCEND_ENABLE_FLASHCOMM1: "1"
VLLM_ASCEND_ENABLE_FUSED_MC2: "1"
_server_cmd: &server_cmd
- "--enable-expert-parallel"

View File

@@ -10,7 +10,6 @@ _envs: &envs
HCCL_BUFFSIZE: "1536"
PYTORCH_NPU_ALLOC_CONF: "expandable_segments:True"
VLLM_ASCEND_ENABLE_FLASHCOMM1: "1"
VLLM_ASCEND_ENABLE_FUSED_MC2: "1"
VLLM_ASCEND_ENABLE_NZ: "2"
VLLM_ASCEND_BALANCE_SCHEDULING: "1"
SERVER_PORT: "DEFAULT_PORT"

View File

@@ -13,7 +13,6 @@ test_cases:
OMP_NUM_THREADS: "1"
TASK_QUEUE_ENABLE: "1"
SERVER_PORT: "DEFAULT_PORT"
VLLM_ASCEND_ENABLE_FUSED_MC2: "1"
VLLM_ASCEND_ENABLE_FLASHCOMM1: "1"
server_cmd:
- "--tensor-parallel-size"