[Doc] Upgrade env VLLM_ASCEND_ENABLE_FUSED_MC2 used in nightly test and tutorials (#8441)

### What this PR does / why we need it?
The env `VLLM_ASCEND_ENABLE_FUSED_MC2` should only enabled in the
decoder node during Prefill-Decode Disaggregation scenario

---------

Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
Li Wang
2026-04-20 22:39:23 +08:00
committed by GitHub
parent 3db5048d74
commit 36a0470de1
9 changed files with 10 additions and 21 deletions

View File

@@ -146,7 +146,6 @@ export HCCL_OP_EXPANSION_MODE=AIV
export VLLM_ASCEND_BALANCE_SCHEDULING=1
export VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE=1
export VLLM_ASCEND_ENABLE_FLASHCOMM1=1
export VLLM_ASCEND_ENABLE_FUSED_MC2=1
vllm serve Eco-Tech/GLM-4.7-W8A8-floatmtp \
--data-parallel-size 2 \
@@ -198,7 +197,6 @@ export HCCL_OP_EXPANSION_MODE=AIV
export VLLM_ASCEND_BALANCE_SCHEDULING=1
export VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE=1
export VLLM_ASCEND_ENABLE_FLASHCOMM1=1
export VLLM_ASCEND_ENABLE_FUSED_MC2=1
vllm serve Eco-Tech/GLM-4.7-W8A8-floatmtp \
--host 0.0.0.0 \
@@ -250,7 +248,6 @@ export HCCL_OP_EXPANSION_MODE=AIV
export VLLM_ASCEND_BALANCE_SCHEDULING=1
export VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE=1
export VLLM_ASCEND_ENABLE_FLASHCOMM1=1
export VLLM_ASCEND_ENABLE_FUSED_MC2=1
vllm serve Eco-Tech/GLM-4.7-W8A8-floatmtp \
--host 0.0.0.0 \
@@ -413,7 +410,6 @@ Before you start, please
export VLLM_ASCEND_BALANCE_SCHEDULING=1
export VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE=1
export VLLM_ASCEND_ENABLE_FLASHCOMM1=1
export VLLM_ASCEND_ENABLE_FUSED_MC2=1
export ASCEND_RT_VISIBLE_DEVICES=$1
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages/mooncake:$LD_LIBRARY_PATH
@@ -479,7 +475,6 @@ Before you start, please
export VLLM_ASCEND_BALANCE_SCHEDULING=1
export VLLM_ASCEND_ENABLE_TOPK_OPTIMIZE=1
export VLLM_ASCEND_ENABLE_FLASHCOMM1=1
export VLLM_ASCEND_ENABLE_FUSED_MC2=1
export ASCEND_RT_VISIBLE_DEVICES=$1
export LD_LIBRARY_PATH=/usr/local/Ascend/ascend-toolkit/latest/python/site-packages/mooncake:$LD_LIBRARY_PATH