diff --git a/CMakeLists.txt b/CMakeLists.txt index 8868c5f5..b72a7eb0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -131,6 +131,6 @@ target_link_libraries( opapi ) -target_link_options(vllm_ascend_C PRIVATE "-Wl,-rpath,$ORIGIN:$ORIGIN/lib") +target_link_options(vllm_ascend_C PRIVATE "-Wl,-rpath,$ORIGIN:$ORIGIN/lib:$ORIGIN/_cann_ops_custom/vendors/vllm-ascend/op_api/lib") install(TARGETS vllm_ascend_C vllm_ascend_kernels DESTINATION ${VLLM_ASCEND_INSTALL_PATH}) diff --git a/csrc/CMakeLists.txt b/csrc/CMakeLists.txt index dab92509..29c654c0 100644 --- a/csrc/CMakeLists.txt +++ b/csrc/CMakeLists.txt @@ -15,7 +15,7 @@ option(BUILD_OPEN_PROJECT "Build open ascend ops project." ON) option(ENABLE_CCACHE "Enable ccache capability" ON) set(ASCEND_COMPUTE_UNIT "ascend910b" CACHE STRING "soc that need to be compiled") set(ASCEND_OP_NAME "ALL" CACHE STRING "operators that need to be compiled") -set(VENDOR_NAME "customize" CACHE STRING "vendor name") +set(VENDOR_NAME "vllm-ascend" CACHE STRING "vendor name") include(cmake/config.cmake) include(cmake/func.cmake) diff --git a/csrc/build_aclnn.sh b/csrc/build_aclnn.sh index 9dba287e..8a282bef 100644 --- a/csrc/build_aclnn.sh +++ b/csrc/build_aclnn.sh @@ -31,4 +31,3 @@ bash build.sh -n $CUSTOM_OPS -c $SOC_ARG # install custom ops to vllm_ascend/_cann_ops_custom ./output/CANN-custom_ops*.run --install-path=$ROOT_DIR/vllm_ascend/_cann_ops_custom -source $ROOT_DIR/vllm_ascend/_cann_ops_custom/vendors/customize/bin/set_env.bash diff --git a/csrc/torch_binding.cpp b/csrc/torch_binding.cpp index b596337d..9ef0cfbb 100644 --- a/csrc/torch_binding.cpp +++ b/csrc/torch_binding.cpp @@ -568,9 +568,9 @@ std::tuple grouped_matmul_swiglu_quant_weigh int m = x_size[0]; int k = x_size[1]; - at::Tensor output = at::zeros({m, n/2}, x.options().dtype(at::kChar)); - at::Tensor output_scale = at::zeros({m}, x.options().dtype(at::kFloat)); - at::Tensor output_offset = at::zeros({m}, x.options().dtype(at::kFloat)); + at::Tensor output = at::empty({m, n/2}, x.options().dtype(at::kChar)); + at::Tensor output_scale = at::empty({m}, x.options().dtype(at::kFloat)); + at::Tensor output_offset = at::empty({m}, x.options().dtype(at::kFloat)); EXEC_NPU_CMD( aclnnGroupedMatmulSwigluQuantWeightNzTensorList, diff --git a/vllm_ascend/platform.py b/vllm_ascend/platform.py index 5ff66926..728c56c3 100644 --- a/vllm_ascend/platform.py +++ b/vllm_ascend/platform.py @@ -38,27 +38,6 @@ from vllm_ascend.utils import ( prefill_context_parallel_enable, update_aclgraph_sizes, update_cudagraph_capture_sizes, update_default_aclgraph_sizes) -# set custom ops path -CUR_DIR = os.path.dirname(os.path.realpath(__file__)) -CUSTOM_OPP_PATH = os.path.join(CUR_DIR, "vllm_ascend", "_cann_ops_custom", - "vendors", "customize") -CUSTOM_LIB_PATH = os.path.join(CUSTOM_OPP_PATH, "op_api", "lib") - -if os.path.exists(CUSTOM_OPP_PATH): - current_cust_opp_path = os.environ.get("ASCEND_CUSTOM_OPP_PATH", "") - if current_cust_opp_path: - os.environ[ - "ASCEND_CUSTOM_OPP_PATH"] = f"{CUSTOM_OPP_PATH}:{current_cust_opp_path}" - else: - os.environ["ASCEND_CUSTOM_OPP_PATH"] = CUSTOM_OPP_PATH - -if os.path.exists(CUSTOM_LIB_PATH): - current_lib_path = os.environ.get("LD_LIBRARY_PATH", "") - if current_lib_path: - os.environ["LD_LIBRARY_PATH"] = f"{CUSTOM_LIB_PATH}:{current_lib_path}" - else: - os.environ["LD_LIBRARY_PATH"] = CUSTOM_LIB_PATH - if TYPE_CHECKING: from vllm.config import ModelConfig, VllmConfig from vllm.utils import FlexibleArgumentParser diff --git a/vllm_ascend/utils.py b/vllm_ascend/utils.py index e9441e28..c1adb0c0 100644 --- a/vllm_ascend/utils.py +++ b/vllm_ascend/utils.py @@ -247,6 +247,19 @@ def enable_custom_op(): Ensure that ASCEND_RT_VISIBLE_DEVICES can be dynamically modified before torch.npu.set_device(). """ global _CUSTOM_OP_ENABLED + + # set custom ops path + CUR_DIR = os.path.dirname(os.path.realpath(__file__)) + CUSTOM_OPP_PATH = os.path.join(CUR_DIR, "_cann_ops_custom", "vendors", + "vllm-ascend") + if os.path.exists(CUSTOM_OPP_PATH): + current_cust_opp_path = os.environ.get("ASCEND_CUSTOM_OPP_PATH", "") + if current_cust_opp_path: + os.environ[ + "ASCEND_CUSTOM_OPP_PATH"] = f"{CUSTOM_OPP_PATH}:{current_cust_opp_path}" + else: + os.environ["ASCEND_CUSTOM_OPP_PATH"] = CUSTOM_OPP_PATH + if _CUSTOM_OP_ENABLED is not None: return _CUSTOM_OP_ENABLED try: