[MOE] commit GMM custom operator (#7010)
### What this PR does / why we need it?
GMM custom operator optimization in small batch scenarios
### How was this patch tested?
Submit the GMM custom operator for subsequent integration into the MOE
process.
- vLLM version: v0.16.0
- vLLM main:
15d76f74e2
---------
Signed-off-by: chenxi-hh <chen464822955@163.com>
Signed-off-by: chenxi-hh <32731611+chenxi-hh@users.noreply.github.com>
This commit is contained in:
71
csrc/moe_grouped_matmul/op_host/CMakeLists.txt
Normal file
71
csrc/moe_grouped_matmul/op_host/CMakeLists.txt
Normal file
@@ -0,0 +1,71 @@
|
||||
# Copyright (c) 2026 Huawei Technologies Co., Ltd.
|
||||
# This file is a part of the CANN Open Software.
|
||||
# Licensed under CANN Open Software License Agreement Version 1.0 (the "License").
|
||||
# Please refer to the License for details. You may not use this file except in compliance with the License.
|
||||
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED,
|
||||
# INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE.
|
||||
# See LICENSE in the root of the software repository for the full text of the License.
|
||||
# ======================================================================================================================
|
||||
|
||||
add_ops_compile_options(
|
||||
OP_NAME MoeGroupedMatmulCustom
|
||||
OPTIONS --cce-auto-sync=off
|
||||
-Wno-deprecated-declarations
|
||||
-Werror
|
||||
)
|
||||
|
||||
target_sources(optiling PRIVATE
|
||||
moe_grouped_matmul_cpu.cpp
|
||||
)
|
||||
|
||||
target_include_directories(optiling PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}
|
||||
${ASCEND_CANN_PACKAGE_PATH}/include
|
||||
${ASCEND_CANN_PACKAGE_PATH}/include/external
|
||||
${ASCEND_CANN_PACKAGE_PATH}/include/experiment
|
||||
${ASCEND_CANN_PACKAGE_PATH}/include/experiment/platform
|
||||
${ASCEND_CANN_PACKAGE_PATH}/include/experiment/metadef
|
||||
${ASCEND_CANN_PACKAGE_PATH}/include/experiment/runtime
|
||||
${ASCEND_CANN_PACKAGE_PATH}/include/experiment/msprof
|
||||
)
|
||||
|
||||
target_sources(opsproto PRIVATE
|
||||
moe_grouped_matmul_infershape.cpp
|
||||
)
|
||||
|
||||
target_include_directories(opsproto PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}
|
||||
${ASCEND_CANN_PACKAGE_PATH}/include
|
||||
${ASCEND_CANN_PACKAGE_PATH}/include/external
|
||||
${ASCEND_CANN_PACKAGE_PATH}/include/experiment
|
||||
${ASCEND_CANN_PACKAGE_PATH}/include/experiment/platform
|
||||
${ASCEND_CANN_PACKAGE_PATH}/include/experiment/metadef
|
||||
${ASCEND_CANN_PACKAGE_PATH}/include/experiment/runtime
|
||||
${ASCEND_CANN_PACKAGE_PATH}/include/experiment/msprof
|
||||
)
|
||||
|
||||
target_sources(op_host_aclnnInner PRIVATE
|
||||
moe_grouped_matmul_def.cpp
|
||||
)
|
||||
|
||||
target_include_directories(op_host_aclnnInner PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}
|
||||
${ASCEND_CANN_PACKAGE_PATH}/include
|
||||
${ASCEND_CANN_PACKAGE_PATH}/include/external
|
||||
${ASCEND_CANN_PACKAGE_PATH}/include/experiment
|
||||
${ASCEND_CANN_PACKAGE_PATH}/include/experiment/platform
|
||||
${ASCEND_CANN_PACKAGE_PATH}/include/experiment/metadef
|
||||
${ASCEND_CANN_PACKAGE_PATH}/include/experiment/runtime
|
||||
${ASCEND_CANN_PACKAGE_PATH}/include/experiment/msprof
|
||||
)
|
||||
|
||||
target_sources(opapi PRIVATE
|
||||
moe_grouped_matmul_l0.cpp
|
||||
aclnn_moe_grouped_matmul.cpp
|
||||
)
|
||||
|
||||
install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/aclnn_moe_grouped_matmul.h"
|
||||
DESTINATION ${ACLNN_INC_INSTALL_DIR} OPTIONAL)
|
||||
|
||||
install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/aclnn_moe_grouped_matmul_weight_nz.h"
|
||||
DESTINATION ${ACLNN_INC_INSTALL_DIR} OPTIONAL)
|
||||
Reference in New Issue
Block a user