[Kernel] add custom op GmmSwigluQuantWeightNzTensorList (#3804)

### What this PR does / why we need it?

This PR introduces support for adding custom CANN `aclnn` ops to
`vllm-ascend`, allowing users to define and use their own custom
operators.

Key changes include:
- Building and installing custom ops into the `vllm-ascend`-specified
directory
- Binding the `aclnn` op interface to the `torch.ops._C_ascend` module
- Enabling invocation of these ops within `vllm-ascend`

This PR includes a sample custom op:
`aclnnGroupedMatmulSwigluQuantWeightNzTensorList`, which is adapted from
the CANN operator
[`aclnnGroupedMatmulSwigluQuantWeightNZ`](https://www.hiascend.com/document/detail/zh/canncommercial/83RC1/API/aolapi/context/aclnnGroupedMatmulSwigluQuantWeightNZ.md).
Its input parameters `weight` and `weight_scale` now accept
`list[torch.Tensor]` (i.e., `at::TensorList`).

### Does this PR introduce _any_ user-facing change?

No.


- vLLM version: v0.11.2

---------

Signed-off-by: QianChenxi <chenxi.qian.cq@outlook.com>
This commit is contained in:
Chenxi Qian
2025-11-28 18:06:39 +08:00
committed by GitHub
parent 3199fe8350
commit 554f16ae1f
50 changed files with 6934 additions and 7 deletions

View File

@@ -25,7 +25,7 @@ import sys
from sysconfig import get_paths
from typing import Dict, List
from setuptools import Extension, find_packages, setup
from setuptools import Command, Extension, find_packages, setup
from setuptools.command.build_ext import build_ext
from setuptools.command.build_py import build_py
from setuptools.command.develop import develop
@@ -199,6 +199,27 @@ class custom_build_info(build_py):
super().run()
class build_and_install_aclnn(Command):
description = "Build and install AclNN by running build_aclnn.sh"
user_options = []
def initialize_options(self):
pass
def finalize_options(self):
pass
def run(self):
try:
print("Running bash build_aclnn.sh ...")
subprocess.check_call(
["bash", "csrc/build_aclnn.sh", ROOT_DIR, envs.SOC_VERSION])
print("buid_aclnn.sh executed successfully!")
except subprocess.CalledProcessError as e:
print(f"Error running build_aclnn.sh: {e}")
raise SystemExit(e.returncode)
class cmake_build_ext(build_ext):
# A dict of extension directories that have been configured.
did_config: Dict[str, bool] = {}
@@ -385,8 +406,22 @@ class cmake_build_ext(build_ext):
shutil.copy(src_path, dst_path)
print(f"Copy: {src_path} -> {dst_path}")
# copy back _cann_ops_custom directory
src_cann_ops_custom = os.path.join(ROOT_DIR, "vllm_ascend",
"_cann_ops_custom")
dst_cann_ops_custom = os.path.join(self.build_lib, "vllm_ascend",
"_cann_ops_custom")
if os.path.exists(src_cann_ops_custom):
import shutil
if os.path.exists(dst_cann_ops_custom):
shutil.rmtree(dst_cann_ops_custom)
shutil.copytree(src_cann_ops_custom, dst_cann_ops_custom)
print(f"Copy: {src_cann_ops_custom} -> {dst_cann_ops_custom}")
def run(self):
# First, run the standard build_ext command to compile the extensions
# First, ensure ACLNN custom-ops is built and installed.
self.run_command("build_aclnn")
# Then, run the standard build_ext command to compile the extensions
super().run()
@@ -450,6 +485,7 @@ def get_requirements() -> List[str]:
cmdclass = {
"develop": custom_develop,
"build_py": custom_build_info,
"build_aclnn": build_and_install_aclnn,
"build_ext": cmake_build_ext,
"install": custom_install
}