[Misc] Add warning for incompatible Ray backend with ACL Graph mode (#2132)
### What this PR does / why we need it?
cherry-pick #1501 from 0.9.1-dev to main
Currently, Ray is not compatible with ACL Graph, so we need to fall back
to eager mode when using the Ray backend.
co-authored: Yizhou Liu <liu_yizhou@outlook.com>
- vLLM version: v0.10.0
- vLLM main:
2836dd73f1
Signed-off-by: 22dimensions <waitingwind@foxmail.com>
This commit is contained in:
@@ -84,3 +84,11 @@ def test_deepseek_raises_error(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
max_model_len=1024,
|
||||
enforce_eager=False)
|
||||
assert "ACL Graph does not support deepseek" in str(excinfo.value)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model", MODELS)
|
||||
def test_ray_backend_sets_no_compilation(model: str) -> None:
|
||||
runner = VllmRunner(model,
|
||||
enforce_eager=False,
|
||||
distributed_executor_backend="ray")
|
||||
assert runner.model.llm_engine.vllm_config.compilation_config.level == 0
|
||||
|
||||
@@ -153,6 +153,11 @@ class NPUPlatform(Platform):
|
||||
"Torchair compilation enabled on NPU. Setting level to NO_COMPILATION"
|
||||
)
|
||||
compilation_config.level = CompilationLevel.NO_COMPILATION
|
||||
elif parallel_config.distributed_executor_backend == "ray":
|
||||
logger.warning(
|
||||
"Ray distributed executor backend is not compatible with ACL Graph mode "
|
||||
"right now. Setting level to NO_COMPILATION")
|
||||
compilation_config.level = CompilationLevel.NO_COMPILATION
|
||||
else:
|
||||
logger.info(
|
||||
"PIECEWISE compilation enabled on NPU. use_inductor not supported - "
|
||||
|
||||
Reference in New Issue
Block a user