diff --git a/vllm_ascend/compilation/compiler_interface.py b/vllm_ascend/compilation/compiler_interface.py index 8d29316d..8a39e404 100644 --- a/vllm_ascend/compilation/compiler_interface.py +++ b/vllm_ascend/compilation/compiler_interface.py @@ -17,7 +17,6 @@ # import copy import functools -import logging from collections.abc import Callable from typing import Any @@ -30,12 +29,11 @@ from torch.fx import GraphModule from vllm.compilation.compiler_interface import CompilerInterface from vllm.config import VllmConfig from vllm.config.utils import Range +from vllm.logger import logger from vllm_ascend.ascend_config import AscendCompilationConfig, get_ascend_config from vllm_ascend.utils import COMPILATION_PASS_KEY -logger = logging.getLogger(__name__) - def compile_fx(graph: GraphModule, example_inputs: list, inner_compile: Callable, decompositions: dict) -> Callable: recursive_compile_fx = functools.partial(compile_fx, inner_compile=inner_compile, decompositions=decompositions) @@ -91,6 +89,9 @@ def npugraph_ex_compile( # and cause copy_between_host_and_device error. config.debug.aclgraph.disable_reinplace_inplaceable_ops_pass = True if ascend_compilation_config.enable_static_kernel: + logger.info( + "enable_static_kernel is enabled, static shape kernel will be used to accelerate aclgraph execution." + ) config.experimental_config.aclgraph._aclnn_static_shape_kernel = True # According to the cudagraph_capture_size configuration, set the shapes # that can trigger the compilation of static kernel. If this configuration is @@ -158,6 +159,7 @@ class AscendCompiler(CompilerInterface): ascend_compilation_config = get_ascend_config().ascend_compilation_config if ascend_compilation_config.enable_npugraph_ex: + logger.info("enable_npugraph_ex is enabled, which will bring graph compilation optimization.") assert hasattr(self, "vllm_config") return npugraph_ex_compile( graph, example_inputs, compiler_config, self.vllm_config, ascend_compilation_config, compile_range, key