# # HIP/ROCm-specific configuration for vLLM # set(VLLM_GPU_LANG "HIP") # Set the supported torch version for ROCm set(TORCH_SUPPORTED_VERSION_ROCM "2.9.0") # Warn if the torch version doesn't match what we expect if (ROCM_VERSION_DEV_MAJOR GREATER_EQUAL 5 AND Torch_VERSION VERSION_LESS ${TORCH_SUPPORTED_VERSION_ROCM}) message(WARNING "Pytorch version >= ${TORCH_SUPPORTED_VERSION_ROCM} " "expected for ROCm build, saw ${Torch_VERSION} instead.") endif() # Enable HIP language support # Importing torch recognizes and sets up some HIP/ROCm configuration but does # not let cmake recognize .hip files. In order to get cmake to understand the # .hip extension automatically, HIP must be enabled explicitly. enable_language(HIP) # Supported AMD GPU architectures set(HIP_SUPPORTED_ARCHS "gfx906;gfx908;gfx90a;gfx942;gfx950;gfx1030;gfx1100;gfx1101;gfx1200;gfx1201;gfx1150;gfx1151") # Override GPU architectures detected by cmake/torch and filter by supported versions override_gpu_arches(VLLM_GPU_ARCHES ${VLLM_GPU_LANG} "${${VLLM_GPU_LANG}_SUPPORTED_ARCHS}") # Query torch for additional GPU compilation flags get_torch_gpu_compiler_flags(VLLM_GPU_FLAGS ${VLLM_GPU_LANG}) # Overriding the default -O set up by cmake, adding ggdb3 for the most verbose debug info set(CMAKE_${VLLM_GPU_LANG}_FLAGS_DEBUG "${CMAKE_${VLLM_GPU_LANG}_FLAGS_DEBUG} -O0 -ggdb3") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -ggdb3") # Certain HIP functions are marked as [[nodiscard]], yet vllm ignores the result which generates # a lot of warnings that always mask real issues. Suppressing until this is properly addressed. set(CMAKE_${VLLM_GPU_LANG}_FLAGS "${CMAKE_${VLLM_GPU_LANG}_FLAGS} -Wno-unused-result") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-result") # Set up CUTLASS for HIP builds SET(CUTLASS_ENABLE_HEADERS_ONLY ON CACHE BOOL "Enable only the header library") # Set CUTLASS_REVISION. Used for FetchContent. Also fixes some bogus messages when building. set(CUTLASS_REVISION "v4.2.1") # Use the specified CUTLASS source directory for compilation if VLLM_CUTLASS_SRC_DIR is provided if (DEFINED ENV{VLLM_CUTLASS_SRC_DIR}) set(VLLM_CUTLASS_SRC_DIR $ENV{VLLM_CUTLASS_SRC_DIR}) endif() if(VLLM_CUTLASS_SRC_DIR) if(NOT IS_ABSOLUTE VLLM_CUTLASS_SRC_DIR) get_filename_component(VLLM_CUTLASS_SRC_DIR "${VLLM_CUTLASS_SRC_DIR}" ABSOLUTE) endif() message(STATUS "The VLLM_CUTLASS_SRC_DIR is set, using ${VLLM_CUTLASS_SRC_DIR} for compilation") FetchContent_Declare(cutlass SOURCE_DIR ${VLLM_CUTLASS_SRC_DIR}) else() FetchContent_Declare( cutlass GIT_REPOSITORY https://github.com/nvidia/cutlass.git # Please keep this in sync with CUTLASS_REVISION line above. GIT_TAG ${CUTLASS_REVISION} GIT_PROGRESS TRUE # Speed up CUTLASS download by retrieving only the specified GIT_TAG instead of the history. # Important: If GIT_SHALLOW is enabled then GIT_TAG works only with branch names and tags. # So if the GIT_TAG above is updated to a commit hash, GIT_SHALLOW must be set to FALSE GIT_SHALLOW TRUE ) endif() FetchContent_MakeAvailable(cutlass) # Set HIP extension sources set(VLLM_EXT_SRC "csrc/mamba/mamba_ssm/selective_scan_fwd.cu" "csrc/cache_kernels.cu" "csrc/attention/paged_attention_v1.cu" "csrc/attention/paged_attention_v2.cu" "csrc/attention/merge_attn_states.cu" "csrc/attention/vertical_slash_index.cu" "csrc/pos_encoding_kernels.cu" "csrc/activation_kernels.cu" "csrc/layernorm_kernels.cu" "csrc/fused_qknorm_rope_kernel.cu" "csrc/layernorm_quant_kernels.cu" "csrc/sampler.cu" "csrc/cuda_view.cu" # Note: Keeping this name for compatibility "csrc/quantization/gptq/q_gemm.cu" "csrc/quantization/w8a8/int8/scaled_quant.cu" "csrc/quantization/w8a8/fp8/common.cu" "csrc/quantization/fused_kernels/fused_layernorm_dynamic_per_token_quant.cu" "csrc/quantization/gguf/gguf_kernel.cu" "csrc/quantization/activation_kernels.cu" "csrc/cuda_utils_kernels.cu" # Note: Keeping this name for compatibility "csrc/custom_all_reduce.cu" "csrc/torch_bindings.cpp") # Add QuickReduce kernels for ROCm list(APPEND VLLM_EXT_SRC "csrc/custom_quickreduce.cu" ) # MOE extension sources for ROCm set(VLLM_MOE_EXT_SRC "csrc/moe/torch_bindings.cpp" "csrc/moe/moe_align_sum_kernels.cu" "csrc/moe/topk_softmax_kernels.cu") # Cumem allocator for ROCm set(VLLM_CUMEM_EXT_SRC "csrc/cumem_allocator.cpp") # Link against rocm driver library for cumem # Prefer an absolute path to libamdhip64.so inside ${ROCM_PATH}/lib if available, # otherwise fall back to linking by name "amdhip64". find_library(AMDHIP64_LIB NAMES amdhip64 libamdhip64.so PATHS ${ROCM_PATH}/lib NO_DEFAULT_PATH) if(AMDHIP64_LIB) message(STATUS "Found libamdhip64 at ${AMDHIP64_LIB}") list(APPEND CUMEM_LIBS ${AMDHIP64_LIB}) else() message(WARNING "libamdhip64 not found in ${ROCM_PATH}/lib; falling back to linking 'amdhip64' by name") list(APPEND CUMEM_LIBS amdhip64) endif() # ROCm-specific extension sources set(VLLM_ROCM_EXT_SRC "csrc/rocm/torch_bindings.cpp" "csrc/rocm/skinny_gemms.cu" "csrc/rocm/attention.cu") # Define ROCm-specific extension target define_extension_target( _rocm_C DESTINATION vllm LANGUAGE ${VLLM_GPU_LANG} SOURCES ${VLLM_ROCM_EXT_SRC} COMPILE_FLAGS ${VLLM_GPU_FLAGS} ARCHITECTURES ${VLLM_GPU_ARCHES} USE_SABI 3 WITH_SOABI)