[Aclgraph] Update compilation config in check_and_update_config (#2540)
### What this PR does / why we need it?
This pr updates compilation config in `check_and_update_config`, we use
`compilation_config.level` to update `compilation_config.cudagraph_mode`
to ensure the config is correct.
Add `compilation_config.cudagraph_num_of_warmups = 1` when V1 is
enabled, cause this is also used in torchair graph mode. and this fixes
https://github.com/vllm-project/vllm-ascend/issues/2523
fix the bug that the `aclgraphmode` always be `NONE` while running
forward in aclgraph mode
### How was this patch tested?
CI passed with new added/existing test.
- vLLM version: v0.10.1.1
- vLLM main:
f58675bfb3
---------
Signed-off-by: MengqingCao <cmq0113@163.com>
This commit is contained in:
@@ -3,11 +3,11 @@ import unittest
|
||||
from datetime import timedelta
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
from torch.distributed import ProcessGroup
|
||||
from torch.distributed.distributed_c10d import PrefixStore
|
||||
from vllm.config import CompilationLevel
|
||||
from vllm.config.compilation import CUDAGraphMode
|
||||
from vllm.platforms import PlatformEnum
|
||||
|
||||
from tests.ut.base import TestBase
|
||||
@@ -28,6 +28,7 @@ class TestNPUPlatform(TestBase):
|
||||
self.mock_vllm_config.scheduler_config = MagicMock()
|
||||
self.mock_vllm_config.speculative_config = None
|
||||
self.mock_vllm_config.compilation_config.pass_config.enable_sequence_parallelism = False
|
||||
self.mock_vllm_config.compilation_config.cudagraph_mode = None
|
||||
|
||||
self.mock_ascend_config = MagicMock()
|
||||
self.mock_ascend_config.torchair_graph_config.enabled = False
|
||||
@@ -269,8 +270,6 @@ class TestNPUPlatform(TestBase):
|
||||
self.platform.check_and_update_config(self.mock_vllm_config)
|
||||
self.assertTrue("Model config is missing" in cm.output[0])
|
||||
|
||||
@pytest.mark.skip(
|
||||
reason="TODO: revert me when the occasional failed is fixed")
|
||||
@patch("vllm_ascend.utils.is_310p", return_value=False)
|
||||
@patch("vllm_ascend.ascend_config.check_ascend_config")
|
||||
@patch("vllm_ascend.ascend_config.init_ascend_config")
|
||||
@@ -290,6 +289,10 @@ class TestNPUPlatform(TestBase):
|
||||
self.mock_vllm_config.compilation_config.level,
|
||||
CompilationLevel.NO_COMPILATION,
|
||||
)
|
||||
self.assertEqual(
|
||||
self.mock_vllm_config.compilation_config.cudagraph_mode,
|
||||
CUDAGraphMode.NONE,
|
||||
)
|
||||
|
||||
@patch("vllm_ascend.utils.is_310p", return_value=False)
|
||||
@patch("vllm_ascend.ascend_config.check_ascend_config")
|
||||
@@ -310,6 +313,64 @@ class TestNPUPlatform(TestBase):
|
||||
self.mock_vllm_config.compilation_config.level,
|
||||
CompilationLevel.NO_COMPILATION,
|
||||
)
|
||||
self.assertEqual(
|
||||
self.mock_vllm_config.compilation_config.cudagraph_mode,
|
||||
CUDAGraphMode.NONE,
|
||||
)
|
||||
|
||||
@patch("vllm_ascend.utils.is_310p", return_value=False)
|
||||
@patch("vllm_ascend.ascend_config.check_ascend_config")
|
||||
@patch("vllm_ascend.ascend_config.init_ascend_config")
|
||||
def test_check_and_update_config_unsupported_cudagraph_mode(
|
||||
self, mock_init_ascend, mock_check_ascend, mock_is_310p):
|
||||
mock_init_ascend.return_value = self.mock_ascend_config
|
||||
self.mock_vllm_config.model_config.enforce_eager = False
|
||||
self.mock_vllm_config.compilation_config.cudagraph_mode = CUDAGraphMode.FULL
|
||||
|
||||
with self.assertLogs(logger="vllm", level="INFO") as cm:
|
||||
from vllm_ascend import platform
|
||||
|
||||
importlib.reload(platform)
|
||||
self.platform.check_and_update_config(self.mock_vllm_config)
|
||||
self.assertTrue(
|
||||
"cudagraph_mode is not support on NPU. falling back to NONE" in
|
||||
cm.output[0])
|
||||
self.assertEqual(
|
||||
self.mock_vllm_config.compilation_config.level,
|
||||
CompilationLevel.NO_COMPILATION,
|
||||
)
|
||||
self.assertEqual(
|
||||
self.mock_vllm_config.compilation_config.cudagraph_mode,
|
||||
CUDAGraphMode.NONE,
|
||||
)
|
||||
|
||||
@patch("vllm_ascend.utils.is_310p", return_value=False)
|
||||
@patch("vllm_ascend.ascend_config.check_ascend_config")
|
||||
@patch("vllm_ascend.ascend_config.init_ascend_config")
|
||||
def test_check_and_update_config_disable_aclgraph_when_ray_enabled(
|
||||
self, mock_init_ascend, mock_check_ascend, mock_is_310p):
|
||||
mock_init_ascend.return_value = self.mock_ascend_config
|
||||
self.mock_vllm_config.model_config.enforce_eager = False
|
||||
self.mock_vllm_config.compilation_config.level = CompilationLevel.PIECEWISE
|
||||
self.mock_vllm_config.parallel_config.distributed_executor_backend = "ray"
|
||||
|
||||
with self.assertLogs(logger="vllm", level="WARNING") as cm:
|
||||
from vllm_ascend import platform
|
||||
|
||||
importlib.reload(platform)
|
||||
self.platform.check_and_update_config(self.mock_vllm_config)
|
||||
print(30 * "=", f"cm.output: {cm.output}")
|
||||
self.assertTrue(
|
||||
"Ray distributed executor backend is not compatible with ACL Graph mode"
|
||||
in cm.output[0])
|
||||
self.assertEqual(
|
||||
self.mock_vllm_config.compilation_config.level,
|
||||
CompilationLevel.NO_COMPILATION,
|
||||
)
|
||||
self.assertEqual(
|
||||
self.mock_vllm_config.compilation_config.cudagraph_mode,
|
||||
CUDAGraphMode.NONE,
|
||||
)
|
||||
|
||||
@patch("vllm_ascend.utils.is_310p", return_value=False)
|
||||
@patch("vllm_ascend.ascend_config.check_ascend_config")
|
||||
@@ -331,6 +392,10 @@ class TestNPUPlatform(TestBase):
|
||||
self.mock_vllm_config.compilation_config.level,
|
||||
CompilationLevel.NO_COMPILATION,
|
||||
)
|
||||
self.assertEqual(
|
||||
self.mock_vllm_config.compilation_config.cudagraph_mode,
|
||||
CUDAGraphMode.NONE,
|
||||
)
|
||||
|
||||
@patch("vllm_ascend.utils.is_310p", return_value=False)
|
||||
@patch("vllm_ascend.ascend_config.check_ascend_config")
|
||||
|
||||
Reference in New Issue
Block a user