[Aclgraph] Update compilation config in check_and_update_config (#2540)

### What this PR does / why we need it? This pr updates compilation config in `check_and_update_config`, we use `compilation_config.level` to update `compilation_config.cudagraph_mode` to ensure the config is correct. Add `compilation_config.cudagraph_num_of_warmups = 1` when V1 is enabled, cause this is also used in torchair graph mode. and this fixes https://github.com/vllm-project/vllm-ascend/issues/2523 fix the bug that the `aclgraphmode` always be `NONE` while running forward in aclgraph mode ### How was this patch tested? CI passed with new added/existing test. - vLLM version: v0.10.1.1 - vLLM main: f58675bfb3 --------- Signed-off-by: MengqingCao <cmq0113@163.com>
2025-08-27 09:30:25 +08:00
parent f22077daa6
commit a9e78a3299
3 changed files with 118 additions and 34 deletions
--- a/tests/ut/test_platform.py
+++ b/tests/ut/test_platform.py
@@ -3,11 +3,11 @@ import unittest
 from datetime import timedelta
 from unittest.mock import MagicMock, patch

-import pytest
 import torch
 from torch.distributed import ProcessGroup
 from torch.distributed.distributed_c10d import PrefixStore
 from vllm.config import CompilationLevel
+from vllm.config.compilation import CUDAGraphMode
 from vllm.platforms import PlatformEnum

 from tests.ut.base import TestBase
@@ -28,6 +28,7 @@ class TestNPUPlatform(TestBase):
        self.mock_vllm_config.scheduler_config = MagicMock()
        self.mock_vllm_config.speculative_config = None
        self.mock_vllm_config.compilation_config.pass_config.enable_sequence_parallelism = False
+        self.mock_vllm_config.compilation_config.cudagraph_mode = None

        self.mock_ascend_config = MagicMock()
        self.mock_ascend_config.torchair_graph_config.enabled = False
@@ -269,8 +270,6 @@ class TestNPUPlatform(TestBase):
            self.platform.check_and_update_config(self.mock_vllm_config)
        self.assertTrue("Model config is missing" in cm.output[0])

-    @pytest.mark.skip(
-        reason="TODO: revert me when the occasional failed is fixed")
    @patch("vllm_ascend.utils.is_310p", return_value=False)
    @patch("vllm_ascend.ascend_config.check_ascend_config")
    @patch("vllm_ascend.ascend_config.init_ascend_config")
@@ -290,6 +289,10 @@ class TestNPUPlatform(TestBase):
            self.mock_vllm_config.compilation_config.level,
            CompilationLevel.NO_COMPILATION,
        )
+        self.assertEqual(
+            self.mock_vllm_config.compilation_config.cudagraph_mode,
+            CUDAGraphMode.NONE,
+        )

    @patch("vllm_ascend.utils.is_310p", return_value=False)
    @patch("vllm_ascend.ascend_config.check_ascend_config")
@@ -310,6 +313,64 @@ class TestNPUPlatform(TestBase):
                self.mock_vllm_config.compilation_config.level,
                CompilationLevel.NO_COMPILATION,
            )
+            self.assertEqual(
+                self.mock_vllm_config.compilation_config.cudagraph_mode,
+                CUDAGraphMode.NONE,
+            )
+
+    @patch("vllm_ascend.utils.is_310p", return_value=False)
+    @patch("vllm_ascend.ascend_config.check_ascend_config")
+    @patch("vllm_ascend.ascend_config.init_ascend_config")
+    def test_check_and_update_config_unsupported_cudagraph_mode(
+            self, mock_init_ascend, mock_check_ascend, mock_is_310p):
+        mock_init_ascend.return_value = self.mock_ascend_config
+        self.mock_vllm_config.model_config.enforce_eager = False
+        self.mock_vllm_config.compilation_config.cudagraph_mode = CUDAGraphMode.FULL
+
+        with self.assertLogs(logger="vllm", level="INFO") as cm:
+            from vllm_ascend import platform
+
+            importlib.reload(platform)
+            self.platform.check_and_update_config(self.mock_vllm_config)
+            self.assertTrue(
+                "cudagraph_mode is not support on NPU. falling back to NONE" in
+                cm.output[0])
+            self.assertEqual(
+                self.mock_vllm_config.compilation_config.level,
+                CompilationLevel.NO_COMPILATION,
+            )
+            self.assertEqual(
+                self.mock_vllm_config.compilation_config.cudagraph_mode,
+                CUDAGraphMode.NONE,
+            )
+
+    @patch("vllm_ascend.utils.is_310p", return_value=False)
+    @patch("vllm_ascend.ascend_config.check_ascend_config")
+    @patch("vllm_ascend.ascend_config.init_ascend_config")
+    def test_check_and_update_config_disable_aclgraph_when_ray_enabled(
+            self, mock_init_ascend, mock_check_ascend, mock_is_310p):
+        mock_init_ascend.return_value = self.mock_ascend_config
+        self.mock_vllm_config.model_config.enforce_eager = False
+        self.mock_vllm_config.compilation_config.level = CompilationLevel.PIECEWISE
+        self.mock_vllm_config.parallel_config.distributed_executor_backend = "ray"
+
+        with self.assertLogs(logger="vllm", level="WARNING") as cm:
+            from vllm_ascend import platform
+
+            importlib.reload(platform)
+            self.platform.check_and_update_config(self.mock_vllm_config)
+            print(30 * "=", f"cm.output: {cm.output}")
+            self.assertTrue(
+                "Ray distributed executor backend is not compatible with ACL Graph mode"
+                in cm.output[0])
+            self.assertEqual(
+                self.mock_vllm_config.compilation_config.level,
+                CompilationLevel.NO_COMPILATION,
+            )
+            self.assertEqual(
+                self.mock_vllm_config.compilation_config.cudagraph_mode,
+                CUDAGraphMode.NONE,
+            )

    @patch("vllm_ascend.utils.is_310p", return_value=False)
    @patch("vllm_ascend.ascend_config.check_ascend_config")
@@ -331,6 +392,10 @@ class TestNPUPlatform(TestBase):
            self.mock_vllm_config.compilation_config.level,
            CompilationLevel.NO_COMPILATION,
        )
+        self.assertEqual(
+            self.mock_vllm_config.compilation_config.cudagraph_mode,
+            CUDAGraphMode.NONE,
+        )

    @patch("vllm_ascend.utils.is_310p", return_value=False)
    @patch("vllm_ascend.ascend_config.check_ascend_config")