[Bugfix] Add constraints for sequence parallelism (#4014)

### What this PR does / why we need it? Add Add constraints for sequence parallelism for unsupported scenarios: 1. tp_size > 1 2. enable_expert_parallel must be True for MoE model ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.11.0 - vLLM main: 83f478bb19 --------- Signed-off-by: realliujiaxu <realliujiaxu@163.com>
2025-11-06 20:02:03 +08:00
parent 259eb25f88
commit 22005c64c1
2 changed files with 12 additions and 3 deletions
--- a/vllm_ascend/utils.py
+++ b/vllm_ascend/utils.py
@@ -659,6 +659,17 @@ def enable_sp(vllm_config=None) -> bool:
            # We retain the env VLLM_ASCEND_ENABLE_FLASHCOMM here for backward compatibility.
            or bool(int(os.getenv("VLLM_ASCEND_ENABLE_FLASHCOMM", '0'))))

+        if not _ENABLE_SP:
+            return _ENABLE_SP
+
+        assert vllm_config.parallel_config.tensor_parallel_size > 1, \
+            "Flash Comm v1 (Sequence Parallelism) is only supported when tp_size > 1."
+
+        assert (
+            not is_moe_model(vllm_config)
+            or vllm_config.parallel_config.enable_expert_parallel
+        ), "Flash Comm v1 (Sequence Parallelism) requires enable_expert_parallel=True for MoE models."
+
    return _ENABLE_SP