[Feat] Flash comm allgher ep (#3334)

Support flash comm v1(Sequence Parallelism) for Allgather EP. - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 --------- Signed-off-by: realliujiaxu <realliujiaxu@163.com> Co-authored-by: zhaozx-cn <zhaozx2116@163.com>
2025-10-15 19:36:32 +08:00
parent 8abe517870
commit f69a83b7ba
15 changed files with 283 additions and 78 deletions
--- a/tests/e2e/multicard/test_offline_inference_distributed.py
+++ b/tests/e2e/multicard/test_offline_inference_distributed.py
@@ -166,7 +166,7 @@ def test_sp_for_qwen3_moe() -> None:
@pytest.mark.parametrize("enforce_eager", [True, False])
@pytest.mark.parametrize("model", QWEN_DENSE_MODELS)
@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_DENSE_OPTIMIZE": "1"})
-@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_FLASHCOMM": "1"})
+@patch.dict(os.environ, {"VLLM_ASCEND_ENABLE_FLASHCOMM1": "1"})
 def test_models_distributed_Qwen_Dense_with_flashcomm_v1(model, enforce_eager):
    example_prompts = [
        "Hello, my name is",