refactor: 统一硬件相关头文件引用
将分散在各文件中的CUDA/HIP/MUSA硬件相关头文件引用统一到vendors目录下的对应头文件中,提高代码可维护性。移除重复的头文件引用,优化构建配置。
This commit is contained in:
146
cmake/hip.cmake
Normal file
146
cmake/hip.cmake
Normal file
@@ -0,0 +1,146 @@
|
||||
#
|
||||
# HIP/ROCm-specific configuration for vLLM
|
||||
#
|
||||
|
||||
set(VLLM_GPU_LANG "HIP")
|
||||
|
||||
# Set the supported torch version for ROCm
|
||||
set(TORCH_SUPPORTED_VERSION_ROCM "2.9.0")
|
||||
|
||||
# Warn if the torch version doesn't match what we expect
|
||||
if (ROCM_VERSION_DEV_MAJOR GREATER_EQUAL 5 AND
|
||||
Torch_VERSION VERSION_LESS ${TORCH_SUPPORTED_VERSION_ROCM})
|
||||
message(WARNING "Pytorch version >= ${TORCH_SUPPORTED_VERSION_ROCM} "
|
||||
"expected for ROCm build, saw ${Torch_VERSION} instead.")
|
||||
endif()
|
||||
|
||||
# Enable HIP language support
|
||||
# Importing torch recognizes and sets up some HIP/ROCm configuration but does
|
||||
# not let cmake recognize .hip files. In order to get cmake to understand the
|
||||
# .hip extension automatically, HIP must be enabled explicitly.
|
||||
enable_language(HIP)
|
||||
|
||||
# Supported AMD GPU architectures
|
||||
set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx1200;gfx1201;gfx1150;gfx1151")
|
||||
|
||||
# Override GPU architectures detected by cmake/torch and filter by supported versions
|
||||
override_gpu_arches(VLLM_GPU_ARCHES
|
||||
${VLLM_GPU_LANG}
|
||||
"${${VLLM_GPU_LANG}_SUPPORTED_ARCHS}")
|
||||
|
||||
# Query torch for additional GPU compilation flags
|
||||
get_torch_gpu_compiler_flags(VLLM_GPU_FLAGS ${VLLM_GPU_LANG})
|
||||
|
||||
# Overriding the default -O set up by cmake, adding ggdb3 for the most verbose debug info
|
||||
set(CMAKE_${VLLM_GPU_LANG}_FLAGS_DEBUG "${CMAKE_${VLLM_GPU_LANG}_FLAGS_DEBUG} -O0 -ggdb3")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -ggdb3")
|
||||
|
||||
# Certain HIP functions are marked as [[nodiscard]], yet vllm ignores the result which generates
|
||||
# a lot of warnings that always mask real issues. Suppressing until this is properly addressed.
|
||||
set(CMAKE_${VLLM_GPU_LANG}_FLAGS "${CMAKE_${VLLM_GPU_LANG}_FLAGS} -Wno-unused-result")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-result")
|
||||
|
||||
# Set up CUTLASS for HIP builds
|
||||
SET(CUTLASS_ENABLE_HEADERS_ONLY ON CACHE BOOL "Enable only the header library")
|
||||
|
||||
# Set CUTLASS_REVISION. Used for FetchContent. Also fixes some bogus messages when building.
|
||||
set(CUTLASS_REVISION "v4.2.1")
|
||||
|
||||
# Use the specified CUTLASS source directory for compilation if VLLM_CUTLASS_SRC_DIR is provided
|
||||
if (DEFINED ENV{VLLM_CUTLASS_SRC_DIR})
|
||||
set(VLLM_CUTLASS_SRC_DIR $ENV{VLLM_CUTLASS_SRC_DIR})
|
||||
endif()
|
||||
|
||||
if(VLLM_CUTLASS_SRC_DIR)
|
||||
if(NOT IS_ABSOLUTE VLLM_CUTLASS_SRC_DIR)
|
||||
get_filename_component(VLLM_CUTLASS_SRC_DIR "${VLLM_CUTLASS_SRC_DIR}" ABSOLUTE)
|
||||
endif()
|
||||
message(STATUS "The VLLM_CUTLASS_SRC_DIR is set, using ${VLLM_CUTLASS_SRC_DIR} for compilation")
|
||||
FetchContent_Declare(cutlass SOURCE_DIR ${VLLM_CUTLASS_SRC_DIR})
|
||||
else()
|
||||
FetchContent_Declare(
|
||||
cutlass
|
||||
GIT_REPOSITORY https://github.com/nvidia/cutlass.git
|
||||
# Please keep this in sync with CUTLASS_REVISION line above.
|
||||
GIT_TAG ${CUTLASS_REVISION}
|
||||
GIT_PROGRESS TRUE
|
||||
|
||||
# Speed up CUTLASS download by retrieving only the specified GIT_TAG instead of the history.
|
||||
# Important: If GIT_SHALLOW is enabled then GIT_TAG works only with branch names and tags.
|
||||
# So if the GIT_TAG above is updated to a commit hash, GIT_SHALLOW must be set to FALSE
|
||||
GIT_SHALLOW TRUE
|
||||
)
|
||||
endif()
|
||||
FetchContent_MakeAvailable(cutlass)
|
||||
|
||||
# Set HIP extension sources
|
||||
set(VLLM_EXT_SRC
|
||||
"csrc/mamba/mamba_ssm/selective_scan_fwd.cu"
|
||||
"csrc/cache_kernels.cu"
|
||||
"csrc/attention/paged_attention_v1.cu"
|
||||
"csrc/attention/paged_attention_v2.cu"
|
||||
"csrc/attention/merge_attn_states.cu"
|
||||
"csrc/attention/vertical_slash_index.cu"
|
||||
"csrc/pos_encoding_kernels.cu"
|
||||
"csrc/activation_kernels.cu"
|
||||
"csrc/layernorm_kernels.cu"
|
||||
"csrc/fused_qknorm_rope_kernel.cu"
|
||||
"csrc/layernorm_quant_kernels.cu"
|
||||
"csrc/sampler.cu"
|
||||
"csrc/cuda_view.cu" # Note: Keeping this name for compatibility
|
||||
"csrc/quantization/gptq/q_gemm.cu"
|
||||
"csrc/quantization/w8a8/int8/scaled_quant.cu"
|
||||
"csrc/quantization/w8a8/fp8/common.cu"
|
||||
"csrc/quantization/fused_kernels/fused_layernorm_dynamic_per_token_quant.cu"
|
||||
"csrc/quantization/gguf/gguf_kernel.cu"
|
||||
"csrc/quantization/activation_kernels.cu"
|
||||
"csrc/cuda_utils_kernels.cu" # Note: Keeping this name for compatibility
|
||||
"csrc/custom_all_reduce.cu"
|
||||
"csrc/torch_bindings.cpp")
|
||||
|
||||
# Add QuickReduce kernels for ROCm
|
||||
list(APPEND VLLM_EXT_SRC
|
||||
"csrc/custom_quickreduce.cu"
|
||||
)
|
||||
|
||||
# MOE extension sources for ROCm
|
||||
set(VLLM_MOE_EXT_SRC
|
||||
"csrc/moe/torch_bindings.cpp"
|
||||
"csrc/moe/moe_align_sum_kernels.cu"
|
||||
"csrc/moe/topk_softmax_kernels.cu")
|
||||
|
||||
# Cumem allocator for ROCm
|
||||
set(VLLM_CUMEM_EXT_SRC
|
||||
"csrc/cumem_allocator.cpp")
|
||||
|
||||
# Link against rocm driver library for cumem
|
||||
# Prefer an absolute path to libamdhip64.so inside ${ROCM_PATH}/lib if available,
|
||||
# otherwise fall back to linking by name "amdhip64".
|
||||
find_library(AMDHIP64_LIB
|
||||
NAMES amdhip64 libamdhip64.so
|
||||
PATHS ${ROCM_PATH}/lib
|
||||
NO_DEFAULT_PATH)
|
||||
if(AMDHIP64_LIB)
|
||||
message(STATUS "Found libamdhip64 at ${AMDHIP64_LIB}")
|
||||
list(APPEND CUMEM_LIBS ${AMDHIP64_LIB})
|
||||
else()
|
||||
message(WARNING "libamdhip64 not found in ${ROCM_PATH}/lib; falling back to linking 'amdhip64' by name")
|
||||
list(APPEND CUMEM_LIBS amdhip64)
|
||||
endif()
|
||||
|
||||
# ROCm-specific extension sources
|
||||
set(VLLM_ROCM_EXT_SRC
|
||||
"csrc/rocm/torch_bindings.cpp"
|
||||
"csrc/rocm/skinny_gemms.cu"
|
||||
"csrc/rocm/attention.cu")
|
||||
|
||||
# Define ROCm-specific extension target
|
||||
define_extension_target(
|
||||
_rocm_C
|
||||
DESTINATION vllm
|
||||
LANGUAGE ${VLLM_GPU_LANG}
|
||||
SOURCES ${VLLM_ROCM_EXT_SRC}
|
||||
COMPILE_FLAGS ${VLLM_GPU_FLAGS}
|
||||
ARCHITECTURES ${VLLM_GPU_ARCHES}
|
||||
USE_SABI 3
|
||||
WITH_SOABI)
|
||||
Reference in New Issue
Block a user