[OPS] add bmm_transpose ops (#3990)
### What this PR does / why we need it? Add a new fusion ops to custom_op, which can cobime the torch.bmm() and transpsose to achieve better peformance. This ops is used in mla_v1 to replace the bmm and transpose ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - vLLM version: v0.11.2 --------- Signed-off-by: hust17yixuan <303660421@qq.com>
This commit is contained in:
@@ -55,16 +55,36 @@ include(${ASCENDC_CMAKE_DIR}/ascendc.cmake)
|
||||
file(GLOB KERNEL_FILES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/csrc/kernels/*.cpp)
|
||||
|
||||
ascendc_library(vllm_ascend_kernels SHARED
|
||||
set(VLLM_ASCEND_CUSTOM_OP
|
||||
${KERNEL_FILES}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/csrc/mla_preprocess/op_kernel/mla_preprocess_kernel.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/csrc/batch_matmul_transpose/op_kernel/batch_matmul_transpose_kernel.cpp
|
||||
)
|
||||
|
||||
set(VLLM_ASCEND_CUSTOM_OP_EXCLUDE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/csrc/batch_matmul_transpose/op_kernel/batch_matmul_transpose_kernel.cpp
|
||||
)
|
||||
|
||||
if(SOC_VERSION STREQUAL "ASCEND310P3")
|
||||
list(REMOVE_ITEM VLLM_ASCEND_CUSTOM_OP ${VLLM_ASCEND_CUSTOM_OP_EXCLUDE})
|
||||
endif()
|
||||
|
||||
ascendc_library(vllm_ascend_kernels SHARED
|
||||
${VLLM_ASCEND_CUSTOM_OP}
|
||||
)
|
||||
|
||||
message("TORCH_NPU_PATH is ${TORCH_NPU_PATH}")
|
||||
|
||||
file(GLOB VLLM_ASCEND_SRC
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/csrc/*.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/csrc/aclnn_torch_adapter/*.cpp)
|
||||
if(SOC_VERSION STREQUAL "ASCEND310P3")
|
||||
file(GLOB VLLM_ASCEND_SRC
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/csrc/*.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/csrc/aclnn_torch_adapter/*.cpp)
|
||||
else()
|
||||
file(GLOB VLLM_ASCEND_SRC
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/csrc/*.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/csrc/aclnn_torch_adapter/*.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/csrc/batch_matmul_transpose/op_host/tiling/tiling_data.cpp)
|
||||
endif()
|
||||
|
||||
include_directories(
|
||||
${pybind11_INCLUDE_DIRS}
|
||||
@@ -74,6 +94,7 @@ include_directories(
|
||||
${ASCEND_HOME_PATH}/include
|
||||
${ASCEND_HOME_PATH}/aarch64-linux/include/experiment/platform
|
||||
${ASCEND_HOME_PATH}/x86_64-linux/include/experiment/platform
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/csrc/batch_matmul_transpose/op_host
|
||||
)
|
||||
|
||||
set(
|
||||
|
||||
Reference in New Issue
Block a user