[Misc] Add warning for incompatible Ray backend with ACL Graph mode (#2132)

### What this PR does / why we need it?

cherry-pick #1501 from 0.9.1-dev to main

Currently, Ray is not compatible with ACL Graph, so we need to fall back
to eager mode when using the Ray backend.

co-authored: Yizhou Liu <liu_yizhou@outlook.com>

- vLLM version: v0.10.0
- vLLM main:
2836dd73f1

Signed-off-by: 22dimensions <waitingwind@foxmail.com>
This commit is contained in:
22dimensions
2025-08-01 09:06:09 +08:00
committed by GitHub
parent 99fa0ac882
commit 9e65da990e
2 changed files with 13 additions and 0 deletions

View File

@@ -84,3 +84,11 @@ def test_deepseek_raises_error(monkeypatch: pytest.MonkeyPatch) -> None:
max_model_len=1024,
enforce_eager=False)
assert "ACL Graph does not support deepseek" in str(excinfo.value)
@pytest.mark.parametrize("model", MODELS)
def test_ray_backend_sets_no_compilation(model: str) -> None:
runner = VllmRunner(model,
enforce_eager=False,
distributed_executor_backend="ray")
assert runner.model.llm_engine.vllm_config.compilation_config.level == 0

View File

@@ -153,6 +153,11 @@ class NPUPlatform(Platform):
"Torchair compilation enabled on NPU. Setting level to NO_COMPILATION"
)
compilation_config.level = CompilationLevel.NO_COMPILATION
elif parallel_config.distributed_executor_backend == "ray":
logger.warning(
"Ray distributed executor backend is not compatible with ACL Graph mode "
"right now. Setting level to NO_COMPILATION")
compilation_config.level = CompilationLevel.NO_COMPILATION
else:
logger.info(
"PIECEWISE compilation enabled on NPU. use_inductor not supported - "