# # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. # This file is a part of the vllm-ascend project. # # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # from torch import fx as fx from vllm.config import VllmConfig from vllm_ascend.ascend_config import get_ascend_config from vllm_ascend.utils import vllm_version_is if vllm_version_is("0.15.0"): from vllm.compilation.inductor_pass import get_pass_context # type: ignore from vllm.compilation.vllm_inductor_pass import VllmInductorPass # type: ignore else: from vllm.compilation.passes.inductor_pass import get_pass_context from vllm.compilation.passes.vllm_inductor_pass import VllmInductorPass class NpuGraphEXPassManager: """ A pass manager for npu_graph ex fusion passes. It handles the configuration and execution of passes. The counterpart in vllm is PostGradPassManager. Since torch_npu does not support triton for now, we define our own pass manager. """ def __init__(self): self.passes: list[VllmInductorPass] = [] def __call__(self, graph: fx.Graph) -> fx.Graph: compile_range = get_pass_context().compile_range for pass_ in self.passes: if pass_.is_applicable_for_range(compile_range): pass_(graph) graph.recompiler() return graph def add(self, pass_: VllmInductorPass): assert isinstance(pass_, VllmInductorPass) self.passes.append(pass_) def configure(self, config: VllmConfig): # By default, we enable the graph fusion and quantization fusion pass. self.npugraph_ex_config = get_ascend_config().npugraph_ex_config if self.npugraph_ex_config.fuse_norm_quant: from .npugraph_ex_passes.graphex_norm_quant_fusion_pass import GraphEXAddRMSNormFusionPass self.passes.append(GraphEXAddRMSNormFusionPass(config)) if self.npugraph_ex_config.fuse_qknorm_rope: from .npugraph_ex_passes.graphex_qknorm_rope_fusion_pass import GraphEXQKNormRopeFusionPass self.passes.append(GraphEXQKNormRopeFusionPass(config)) if self.npugraph_ex_config.fuse_allreduce_rms: from .npugraph_ex_passes.graphex_allreduce_rmsnorm_fusion_pass import GraphEXMatmulAllReduceAddRMSNormPass self.passes.append(GraphEXMatmulAllReduceAddRMSNormPass(config))