From 0c0789be7442122eb1203abbf89a9592648922e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=97=A0=E8=84=B8=E7=94=B7?= <244036962@qq.com> Date: Thu, 4 Sep 2025 11:45:56 +0800 Subject: [PATCH] [Feat] allow using aclgraph in ray backend (#2589) ### What this PR does / why we need it? Allow using aclgraph in ray backend, for tp + pp + aclgraph in multi machine ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.10.1.1 - vLLM main: https://github.com/vllm-project/vllm/commit/4ba0c587ba3ad2ab419ba6f43a2d52946c58d530 Signed-off-by: withHades <244036962@qq.com> --- tests/ut/test_platform.py | 30 ------------------------------ vllm_ascend/platform.py | 6 ------ 2 files changed, 36 deletions(-) diff --git a/tests/ut/test_platform.py b/tests/ut/test_platform.py index de8b9be..2170a9a 100644 --- a/tests/ut/test_platform.py +++ b/tests/ut/test_platform.py @@ -363,36 +363,6 @@ class TestNPUPlatform(TestBase): CUDAGraphMode.NONE, ) - @patch("vllm_ascend.utils.is_310p", return_value=False) - @patch("vllm_ascend.ascend_config.check_ascend_config") - @patch("vllm_ascend.ascend_config.init_ascend_config") - def test_check_and_update_config_disable_aclgraph_when_ray_enabled( - self, mock_init_ascend, mock_check_ascend, mock_is_310p): - mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config( - ) - vllm_config = TestNPUPlatform.mock_vllm_config() - vllm_config.model_config.enforce_eager = False - vllm_config.compilation_config.level = CompilationLevel.PIECEWISE - vllm_config.parallel_config.distributed_executor_backend = "ray" - - with self.assertLogs(logger="vllm", level="WARNING") as cm: - from vllm_ascend import platform - - importlib.reload(platform) - self.platform.check_and_update_config(vllm_config) - print(30 * "=", f"cm.output: {cm.output}") - self.assertTrue( - "Ray distributed executor backend is not compatible with ACL Graph mode" - in cm.output[0]) - self.assertEqual( - vllm_config.compilation_config.level, - CompilationLevel.NO_COMPILATION, - ) - self.assertEqual( - vllm_config.compilation_config.cudagraph_mode, - CUDAGraphMode.NONE, - ) - @patch("vllm_ascend.utils.is_310p", return_value=False) @patch("vllm_ascend.ascend_config.check_ascend_config") @patch("vllm_ascend.ascend_config.init_ascend_config") diff --git a/vllm_ascend/platform.py b/vllm_ascend/platform.py index 57ace2b..94ec99a 100644 --- a/vllm_ascend/platform.py +++ b/vllm_ascend/platform.py @@ -185,12 +185,6 @@ class NPUPlatform(Platform): "and use_cached_kv_cache_bytes in torchair_graph_config.") delete_torchair_cache_file() - if parallel_config.distributed_executor_backend == "ray": - logger.warning( - "Ray distributed executor backend is not compatible with ACL Graph mode " - "right now. Setting CUDAGraphMode to NONE") - compilation_config.cudagraph_mode = CUDAGraphMode.NONE - # set cudaprah sizes before extending `compilation_config.splitting_ops` vllm_config._set_cudagraph_sizes()