[Feat] allow using aclgraph in ray backend (#2589)
### What this PR does / why we need it?
Allow using aclgraph in ray backend, for tp + pp + aclgraph in multi
machine
### Does this PR introduce _any_ user-facing change?
### How was this patch tested?
- vLLM version: v0.10.1.1
- vLLM main:
4ba0c587ba
Signed-off-by: withHades <244036962@qq.com>
This commit is contained in:
@@ -363,36 +363,6 @@ class TestNPUPlatform(TestBase):
|
||||
CUDAGraphMode.NONE,
|
||||
)
|
||||
|
||||
@patch("vllm_ascend.utils.is_310p", return_value=False)
|
||||
@patch("vllm_ascend.ascend_config.check_ascend_config")
|
||||
@patch("vllm_ascend.ascend_config.init_ascend_config")
|
||||
def test_check_and_update_config_disable_aclgraph_when_ray_enabled(
|
||||
self, mock_init_ascend, mock_check_ascend, mock_is_310p):
|
||||
mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config(
|
||||
)
|
||||
vllm_config = TestNPUPlatform.mock_vllm_config()
|
||||
vllm_config.model_config.enforce_eager = False
|
||||
vllm_config.compilation_config.level = CompilationLevel.PIECEWISE
|
||||
vllm_config.parallel_config.distributed_executor_backend = "ray"
|
||||
|
||||
with self.assertLogs(logger="vllm", level="WARNING") as cm:
|
||||
from vllm_ascend import platform
|
||||
|
||||
importlib.reload(platform)
|
||||
self.platform.check_and_update_config(vllm_config)
|
||||
print(30 * "=", f"cm.output: {cm.output}")
|
||||
self.assertTrue(
|
||||
"Ray distributed executor backend is not compatible with ACL Graph mode"
|
||||
in cm.output[0])
|
||||
self.assertEqual(
|
||||
vllm_config.compilation_config.level,
|
||||
CompilationLevel.NO_COMPILATION,
|
||||
)
|
||||
self.assertEqual(
|
||||
vllm_config.compilation_config.cudagraph_mode,
|
||||
CUDAGraphMode.NONE,
|
||||
)
|
||||
|
||||
@patch("vllm_ascend.utils.is_310p", return_value=False)
|
||||
@patch("vllm_ascend.ascend_config.check_ascend_config")
|
||||
@patch("vllm_ascend.ascend_config.init_ascend_config")
|
||||
|
||||
@@ -185,12 +185,6 @@ class NPUPlatform(Platform):
|
||||
"and use_cached_kv_cache_bytes in torchair_graph_config.")
|
||||
delete_torchair_cache_file()
|
||||
|
||||
if parallel_config.distributed_executor_backend == "ray":
|
||||
logger.warning(
|
||||
"Ray distributed executor backend is not compatible with ACL Graph mode "
|
||||
"right now. Setting CUDAGraphMode to NONE")
|
||||
compilation_config.cudagraph_mode = CUDAGraphMode.NONE
|
||||
|
||||
# set cudaprah sizes before extending `compilation_config.splitting_ops`
|
||||
vllm_config._set_cudagraph_sizes()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user