[Bugfix] Fix aclgraph not enabled by default (#2590)

### What this PR does / why we need it?
As vllm will set `cudagraph_mode` to `NONE` before
`check_and_update_config` in post init of `VllmConfig`
(5da4f5d857/vllm/config/__init__.py (L3630)),
we always have `cudagraph_mode` isn't `None`, thus we must remove this
check and add it when the related adaption in vllm is done.

part of https://github.com/vllm-project/vllm-ascend/pull/2577, will add
the e2e test on applying reply after the CI refactor is done

### How was this patch tested?
CI passed with existing test.

- vLLM version: v0.10.1.1
- vLLM main:
f48a9af892

Signed-off-by: MengqingCao <cmq0113@163.com>
This commit is contained in:
Mengqing Cao
2025-08-28 14:08:31 +08:00
committed by GitHub
parent cf96366a39
commit 6c973361fc
3 changed files with 22 additions and 20 deletions

View File

@@ -3,6 +3,7 @@ import unittest
from datetime import timedelta from datetime import timedelta
from unittest.mock import MagicMock, patch from unittest.mock import MagicMock, patch
import pytest
import torch import torch
from torch.distributed import ProcessGroup from torch.distributed import ProcessGroup
from torch.distributed.distributed_c10d import PrefixStore from torch.distributed.distributed_c10d import PrefixStore
@@ -318,6 +319,8 @@ class TestNPUPlatform(TestBase):
CUDAGraphMode.NONE, CUDAGraphMode.NONE,
) )
@pytest.mark.skip(
"Revert me when vllm support setting cudagraph_mode on oot platform")
@patch("vllm_ascend.utils.is_310p", return_value=False) @patch("vllm_ascend.utils.is_310p", return_value=False)
@patch("vllm_ascend.ascend_config.check_ascend_config") @patch("vllm_ascend.ascend_config.check_ascend_config")
@patch("vllm_ascend.ascend_config.init_ascend_config") @patch("vllm_ascend.ascend_config.init_ascend_config")

View File

@@ -13,12 +13,10 @@ from vllm.compilation.cuda_graph import CUDAGraphOptions
from vllm.compilation.monitor import validate_cudagraph_capturing_enabled from vllm.compilation.monitor import validate_cudagraph_capturing_enabled
from vllm.config import CUDAGraphMode, VllmConfig from vllm.config import CUDAGraphMode, VllmConfig
from vllm.forward_context import BatchDescriptor, get_forward_context from vllm.forward_context import BatchDescriptor, get_forward_context
from vllm.logger import init_logger from vllm.logger import logger
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.utils import weak_ref_tensors from vllm.utils import weak_ref_tensors
logger = init_logger(__name__)
@dataclasses.dataclass @dataclasses.dataclass
class ACLGraphEntry: class ACLGraphEntry:
@@ -182,5 +180,6 @@ class ACLGraphWrapper:
f"during replay. Expected {entry.input_addresses}, " f"during replay. Expected {entry.input_addresses}, "
f"got {new_input_addresses}") f"got {new_input_addresses}")
logger.info_once("Replaying aclgraph")
entry.aclgraph.replay() entry.aclgraph.replay()
return entry.output return entry.output

View File

@@ -146,23 +146,23 @@ class NPUPlatform(Platform):
compilation_config.cudagraph_num_of_warmups = 1 compilation_config.cudagraph_num_of_warmups = 1
if compilation_config.cudagraph_mode is None: # TODO: make vllm support oot platform to set `compilation_config.cudagraph_mode`
# if cudagraph_mode is not explicitly set by users, set default value # if cudagraph_mode is not explicitly set by users, set default value
if compilation_config.level == CompilationLevel.PIECEWISE: if compilation_config.level == CompilationLevel.PIECEWISE:
compilation_config.cudagraph_mode = \ compilation_config.cudagraph_mode = \
CUDAGraphMode.PIECEWISE CUDAGraphMode.PIECEWISE
elif compilation_config.level not in [ elif compilation_config.level not in [
CompilationLevel.NO_COMPILATION, CompilationLevel.PIECEWISE CompilationLevel.NO_COMPILATION, CompilationLevel.PIECEWISE
]: ]:
logger.warning( logger.warning(
"NPU does not support %s compilation level. Setting CUDAGraphMode to NONE", "NPU does not support %s compilation level. Setting CUDAGraphMode to NONE",
compilation_config.level) compilation_config.level)
compilation_config.cudagraph_mode = CUDAGraphMode.NONE compilation_config.cudagraph_mode = CUDAGraphMode.NONE
else: else:
logger.warning( logger.warning(
"compilation_config.level = CompilationLevel.NO_COMPILATION is set, Setting CUDAGraphMode to NONE" "compilation_config.level = CompilationLevel.NO_COMPILATION is set, Setting CUDAGraphMode to NONE"
) )
compilation_config.cudagraph_mode = CUDAGraphMode.NONE compilation_config.cudagraph_mode = CUDAGraphMode.NONE
# set CUDAGraphMode to None when torchair is enabled, no mather what compilation_config.level is. # set CUDAGraphMode to None when torchair is enabled, no mather what compilation_config.level is.
if ascend_config.torchair_graph_config.enabled: if ascend_config.torchair_graph_config.enabled: