From 0c0789be7442122eb1203abbf89a9592648922e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=97=A0=E8=84=B8=E7=94=B7?= <244036962@qq.com>
Date: Thu, 4 Sep 2025 11:45:56 +0800
Subject: [PATCH] [Feat] allow using aclgraph in ray backend (#2589)

### What this PR does / why we need it?

Allow using aclgraph in ray backend, for tp + pp + aclgraph in multi
machine

### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

- vLLM version: v0.10.1.1
- vLLM main:
https://github.com/vllm-project/vllm/commit/4ba0c587ba3ad2ab419ba6f43a2d52946c58d530

Signed-off-by: withHades <244036962@qq.com>
---
 tests/ut/test_platform.py | 30 ------------------------------
 vllm_ascend/platform.py   |  6 ------
 2 files changed, 36 deletions(-)

diff --git a/tests/ut/test_platform.py b/tests/ut/test_platform.py
index de8b9be..2170a9a 100644
--- a/tests/ut/test_platform.py
+++ b/tests/ut/test_platform.py
@@ -363,36 +363,6 @@ class TestNPUPlatform(TestBase):
                 CUDAGraphMode.NONE,
             )
 
-    @patch("vllm_ascend.utils.is_310p", return_value=False)
-    @patch("vllm_ascend.ascend_config.check_ascend_config")
-    @patch("vllm_ascend.ascend_config.init_ascend_config")
-    def test_check_and_update_config_disable_aclgraph_when_ray_enabled(
-            self, mock_init_ascend, mock_check_ascend, mock_is_310p):
-        mock_init_ascend.return_value = TestNPUPlatform.mock_vllm_ascend_config(
-        )
-        vllm_config = TestNPUPlatform.mock_vllm_config()
-        vllm_config.model_config.enforce_eager = False
-        vllm_config.compilation_config.level = CompilationLevel.PIECEWISE
-        vllm_config.parallel_config.distributed_executor_backend = "ray"
-
-        with self.assertLogs(logger="vllm", level="WARNING") as cm:
-            from vllm_ascend import platform
-
-            importlib.reload(platform)
-            self.platform.check_and_update_config(vllm_config)
-            print(30 * "=", f"cm.output: {cm.output}")
-            self.assertTrue(
-                "Ray distributed executor backend is not compatible with ACL Graph mode"
-                in cm.output[0])
-            self.assertEqual(
-                vllm_config.compilation_config.level,
-                CompilationLevel.NO_COMPILATION,
-            )
-            self.assertEqual(
-                vllm_config.compilation_config.cudagraph_mode,
-                CUDAGraphMode.NONE,
-            )
-
     @patch("vllm_ascend.utils.is_310p", return_value=False)
     @patch("vllm_ascend.ascend_config.check_ascend_config")
     @patch("vllm_ascend.ascend_config.init_ascend_config")
diff --git a/vllm_ascend/platform.py b/vllm_ascend/platform.py
index 57ace2b..94ec99a 100644
--- a/vllm_ascend/platform.py
+++ b/vllm_ascend/platform.py
@@ -185,12 +185,6 @@ class NPUPlatform(Platform):
                     "and use_cached_kv_cache_bytes in torchair_graph_config.")
                 delete_torchair_cache_file()
 
-        if parallel_config.distributed_executor_backend == "ray":
-            logger.warning(
-                "Ray distributed executor backend is not compatible with ACL Graph mode "
-                "right now. Setting CUDAGraphMode to NONE")
-            compilation_config.cudagraph_mode = CUDAGraphMode.NONE
-
         # set cudaprah sizes before extending `compilation_config.splitting_ops`
         vllm_config._set_cudagraph_sizes()