[Kernel] add custom op GmmSwigluQuantWeightNzTensorList (#3804)
### What this PR does / why we need it? This PR introduces support for adding custom CANN `aclnn` ops to `vllm-ascend`, allowing users to define and use their own custom operators. Key changes include: - Building and installing custom ops into the `vllm-ascend`-specified directory - Binding the `aclnn` op interface to the `torch.ops._C_ascend` module - Enabling invocation of these ops within `vllm-ascend` This PR includes a sample custom op: `aclnnGroupedMatmulSwigluQuantWeightNzTensorList`, which is adapted from the CANN operator [`aclnnGroupedMatmulSwigluQuantWeightNZ`](https://www.hiascend.com/document/detail/zh/canncommercial/83RC1/API/aolapi/context/aclnnGroupedMatmulSwigluQuantWeightNZ.md). Its input parameters `weight` and `weight_scale` now accept `list[torch.Tensor]` (i.e., `at::TensorList`). ### Does this PR introduce _any_ user-facing change? No. - vLLM version: v0.11.2 --------- Signed-off-by: QianChenxi <chenxi.qian.cq@outlook.com>
This commit is contained in:
40
setup.py
40
setup.py
@@ -25,7 +25,7 @@ import sys
|
||||
from sysconfig import get_paths
|
||||
from typing import Dict, List
|
||||
|
||||
from setuptools import Extension, find_packages, setup
|
||||
from setuptools import Command, Extension, find_packages, setup
|
||||
from setuptools.command.build_ext import build_ext
|
||||
from setuptools.command.build_py import build_py
|
||||
from setuptools.command.develop import develop
|
||||
@@ -199,6 +199,27 @@ class custom_build_info(build_py):
|
||||
super().run()
|
||||
|
||||
|
||||
class build_and_install_aclnn(Command):
|
||||
description = "Build and install AclNN by running build_aclnn.sh"
|
||||
user_options = []
|
||||
|
||||
def initialize_options(self):
|
||||
pass
|
||||
|
||||
def finalize_options(self):
|
||||
pass
|
||||
|
||||
def run(self):
|
||||
try:
|
||||
print("Running bash build_aclnn.sh ...")
|
||||
subprocess.check_call(
|
||||
["bash", "csrc/build_aclnn.sh", ROOT_DIR, envs.SOC_VERSION])
|
||||
print("buid_aclnn.sh executed successfully!")
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Error running build_aclnn.sh: {e}")
|
||||
raise SystemExit(e.returncode)
|
||||
|
||||
|
||||
class cmake_build_ext(build_ext):
|
||||
# A dict of extension directories that have been configured.
|
||||
did_config: Dict[str, bool] = {}
|
||||
@@ -385,8 +406,22 @@ class cmake_build_ext(build_ext):
|
||||
shutil.copy(src_path, dst_path)
|
||||
print(f"Copy: {src_path} -> {dst_path}")
|
||||
|
||||
# copy back _cann_ops_custom directory
|
||||
src_cann_ops_custom = os.path.join(ROOT_DIR, "vllm_ascend",
|
||||
"_cann_ops_custom")
|
||||
dst_cann_ops_custom = os.path.join(self.build_lib, "vllm_ascend",
|
||||
"_cann_ops_custom")
|
||||
if os.path.exists(src_cann_ops_custom):
|
||||
import shutil
|
||||
if os.path.exists(dst_cann_ops_custom):
|
||||
shutil.rmtree(dst_cann_ops_custom)
|
||||
shutil.copytree(src_cann_ops_custom, dst_cann_ops_custom)
|
||||
print(f"Copy: {src_cann_ops_custom} -> {dst_cann_ops_custom}")
|
||||
|
||||
def run(self):
|
||||
# First, run the standard build_ext command to compile the extensions
|
||||
# First, ensure ACLNN custom-ops is built and installed.
|
||||
self.run_command("build_aclnn")
|
||||
# Then, run the standard build_ext command to compile the extensions
|
||||
super().run()
|
||||
|
||||
|
||||
@@ -450,6 +485,7 @@ def get_requirements() -> List[str]:
|
||||
cmdclass = {
|
||||
"develop": custom_develop,
|
||||
"build_py": custom_build_info,
|
||||
"build_aclnn": build_and_install_aclnn,
|
||||
"build_ext": cmake_build_ext,
|
||||
"install": custom_install
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user