[MOE] commit GMM custom operator (#7010)
### What this PR does / why we need it?
GMM custom operator optimization in small batch scenarios
### How was this patch tested?
Submit the GMM custom operator for subsequent integration into the MOE
process.
- vLLM version: v0.16.0
- vLLM main:
15d76f74e2
---------
Signed-off-by: chenxi-hh <chen464822955@163.com>
Signed-off-by: chenxi-hh <32731611+chenxi-hh@users.noreply.github.com>
This commit is contained in:
6
setup.py
6
setup.py
@@ -405,8 +405,10 @@ class cmake_build_ext(build_ext):
|
||||
print(f"Copy: {src_cann_ops_custom} -> {dst_cann_ops_custom}")
|
||||
|
||||
def run(self):
|
||||
# First, ensure ACLNN custom-ops is built and installed.
|
||||
self.run_command("build_aclnn")
|
||||
if envs.COMPILE_CUSTOM_KERNELS:
|
||||
# First, ensure ACLNN custom-ops is built and installed.
|
||||
self.run_command("build_aclnn")
|
||||
|
||||
# Then, run the standard build_ext command to compile the extensions
|
||||
super().run()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user