From 9e65da990ece406e23bffb64150b432b3a99073a Mon Sep 17 00:00:00 2001 From: 22dimensions Date: Fri, 1 Aug 2025 09:06:09 +0800 Subject: [PATCH] [Misc] Add warning for incompatible Ray backend with ACL Graph mode (#2132) ### What this PR does / why we need it? cherry-pick #1501 from 0.9.1-dev to main Currently, Ray is not compatible with ACL Graph, so we need to fall back to eager mode when using the Ray backend. co-authored: Yizhou Liu - vLLM version: v0.10.0 - vLLM main: https://github.com/vllm-project/vllm/commit/2836dd73f13015ee386c544760ca0d16888203f3 Signed-off-by: 22dimensions --- tests/e2e/singlecard/test_aclgraph.py | 8 ++++++++ vllm_ascend/platform.py | 5 +++++ 2 files changed, 13 insertions(+) diff --git a/tests/e2e/singlecard/test_aclgraph.py b/tests/e2e/singlecard/test_aclgraph.py index 2a03744..5b150e7 100644 --- a/tests/e2e/singlecard/test_aclgraph.py +++ b/tests/e2e/singlecard/test_aclgraph.py @@ -84,3 +84,11 @@ def test_deepseek_raises_error(monkeypatch: pytest.MonkeyPatch) -> None: max_model_len=1024, enforce_eager=False) assert "ACL Graph does not support deepseek" in str(excinfo.value) + + +@pytest.mark.parametrize("model", MODELS) +def test_ray_backend_sets_no_compilation(model: str) -> None: + runner = VllmRunner(model, + enforce_eager=False, + distributed_executor_backend="ray") + assert runner.model.llm_engine.vllm_config.compilation_config.level == 0 diff --git a/vllm_ascend/platform.py b/vllm_ascend/platform.py index 8f8b2b4..2d3b819 100644 --- a/vllm_ascend/platform.py +++ b/vllm_ascend/platform.py @@ -153,6 +153,11 @@ class NPUPlatform(Platform): "Torchair compilation enabled on NPU. Setting level to NO_COMPILATION" ) compilation_config.level = CompilationLevel.NO_COMPILATION + elif parallel_config.distributed_executor_backend == "ray": + logger.warning( + "Ray distributed executor backend is not compatible with ACL Graph mode " + "right now. Setting level to NO_COMPILATION") + compilation_config.level = CompilationLevel.NO_COMPILATION else: logger.info( "PIECEWISE compilation enabled on NPU. use_inductor not supported - "