[Feature]: Support 310P device run qwen2.5/3 dense and qwen2.5vl models (#5776)
### What this PR does / why we need it?
Add basic 310p support. Only dense models work with eager mode now.
- vLLM version: v0.13.0
- vLLM main:
2f4e6548ef
---------
Signed-off-by: Tflowers-0129 <2906339855@qq.com>
Signed-off-by: Shaoxu Cheng <2906339855@qq.com>
This commit is contained in:
@@ -62,14 +62,17 @@ set(VLLM_ASCEND_CUSTOM_OP
|
||||
)
|
||||
|
||||
set(VLLM_ASCEND_CUSTOM_OP_EXCLUDE
|
||||
${KERNEL_FILES}/bgmv_expand.cpp
|
||||
${KERNEL_FILES}/bgmv_shrink.cpp
|
||||
${KERNEL_FILES}/sgmv_expand.cpp
|
||||
${KERNEL_FILES}/sgmv_shrink.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/csrc/kernels/bgmv_expand.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/csrc/kernels/bgmv_shrink.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/csrc/kernels/sgmv_expand.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/csrc/kernels/sgmv_shrink.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/csrc/mla_preprocess/op_kernel/mla_preprocess_kernel.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/csrc/batch_matmul_transpose/op_kernel/batch_matmul_transpose_kernel.cpp
|
||||
)
|
||||
|
||||
if(SOC_VERSION STREQUAL "ASCEND310P3")
|
||||
if(SOC_VERSION STREQUAL "ascend310p3")
|
||||
message(STATUS "310P hardware detected: disabling MLAPO operators")
|
||||
message(STATUS "310P hardware detected: excluding batch_matmul_transpose operators")
|
||||
list(REMOVE_ITEM VLLM_ASCEND_CUSTOM_OP ${VLLM_ASCEND_CUSTOM_OP_EXCLUDE})
|
||||
endif()
|
||||
|
||||
@@ -79,7 +82,7 @@ ascendc_library(vllm_ascend_kernels SHARED
|
||||
|
||||
message("TORCH_NPU_PATH is ${TORCH_NPU_PATH}")
|
||||
|
||||
if(SOC_VERSION STREQUAL "ASCEND310P3")
|
||||
if(SOC_VERSION STREQUAL "ascend310p3")
|
||||
file(GLOB VLLM_ASCEND_SRC
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/csrc/*.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/csrc/aclnn_torch_adapter/*.cpp)
|
||||
|
||||
Reference in New Issue
Block a user