[Bugfix] Add constraints for sequence parallelism (#4014)

### What this PR does / why we need it? Add Add constraints for sequence parallelism for unsupported scenarios: 1. tp_size > 1 2. enable_expert_parallel must be True for MoE model ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.11.0 - vLLM main: 83f478bb19 --------- Signed-off-by: realliujiaxu <realliujiaxu@163.com>
2025-11-06 20:02:03 +08:00
parent 259eb25f88
commit 22005c64c1
2 changed files with 12 additions and 3 deletions
--- a/vllm_ascend/ascend_forward_context.py
+++ b/vllm_ascend/ascend_forward_context.py
@@ -115,12 +115,10 @@ def set_ascend_forward_context(
        # the performance may degrade due to the switching of communication methods.
        mmrs_fusion = True
        if is_moe_model(vllm_config):
-            sp_enabled = enable_sp(vllm_config) and \
-                tp_world_size > 1 and num_tokens is not None
+            sp_enabled = enable_sp(vllm_config) and num_tokens is not None
            mmrs_fusion = False
        else:
            sp_enabled = enable_sp(vllm_config) and \
-                tp_world_size > 1 and \
                num_tokens is not None and num_tokens > 1000
        forward_context.mmrs_fusion = mmrs_fusion