2026-01-19 10:38:50 +08:00
include ( FetchContent )
set ( CMAKE_CXX_STANDARD_REQUIRED ON )
set ( CMAKE_CXX_STANDARD 17 )
set ( CMAKE_CXX_EXTENSIONS ON )
2026-01-09 13:34:11 +08:00
set ( CMAKE_EXPORT_COMPILE_COMMANDS ON )
2026-01-19 10:38:50 +08:00
if ( ${ CMAKE_SYSTEM_NAME } MATCHES "Darwin" )
set ( MACOSX_FOUND TRUE )
endif ( )
2026-01-09 13:34:11 +08:00
#
# Define environment variables for special configurations
#
2026-01-19 10:38:50 +08:00
set ( ENABLE_AVX512BF16 $ENV{ VLLM_CPU_AVX512BF16 } )
set ( ENABLE_AVX512VNNI $ENV{ VLLM_CPU_AVX512VNNI } )
set ( ENABLE_AMXBF16 $ENV{ VLLM_CPU_AMXBF16 } )
2026-01-09 13:34:11 +08:00
include_directories ( "${CMAKE_SOURCE_DIR}/csrc" )
2026-01-19 10:38:50 +08:00
set ( ENABLE_NUMA TRUE )
2026-01-09 13:34:11 +08:00
#
# Check the compile flags
#
2026-01-19 10:38:50 +08:00
if ( CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64" )
list ( APPEND CXX_COMPILE_FLAGS
" - m f 1 6 c "
)
endif ( )
2026-01-09 13:34:11 +08:00
2026-01-19 10:38:50 +08:00
if ( MACOSX_FOUND )
list ( APPEND CXX_COMPILE_FLAGS
" - D V L L M _ C P U _ E X T E N S I O N " )
else ( )
list ( APPEND CXX_COMPILE_FLAGS
" - f o p e n m p "
" - D V L L M _ C P U _ E X T E N S I O N " )
2026-01-09 13:34:11 +08:00
endif ( )
2026-01-19 10:38:50 +08:00
if ( NOT MACOSX_FOUND )
execute_process ( COMMAND cat /proc/cpuinfo
R E S U L T _ V A R I A B L E C P U I N F O _ R E T
O U T P U T _ V A R I A B L E C P U I N F O )
if ( NOT CPUINFO_RET EQUAL 0 )
message ( FATAL_ERROR "Failed to check CPU features via /proc/cpuinfo" )
endif ( )
endif ( )
2026-01-09 13:34:11 +08:00
function ( find_isa CPUINFO TARGET OUT )
string ( FIND ${ CPUINFO } ${ TARGET } ISA_FOUND )
if ( NOT ISA_FOUND EQUAL -1 )
set ( ${ OUT } ON PARENT_SCOPE )
else ( )
set ( ${ OUT } OFF PARENT_SCOPE )
endif ( )
endfunction ( )
2026-01-19 10:38:50 +08:00
function ( check_sysctl TARGET OUT )
execute_process ( COMMAND sysctl -n "${TARGET}"
R E S U L T _ V A R I A B L E S Y S C T L _ R E T
O U T P U T _ V A R I A B L E S Y S C T L _ I N F O
E R R O R _ Q U I E T
O U T P U T _ S T R I P _ T R A I L I N G _ W H I T E S P A C E )
if ( SYSCTL_RET EQUAL 0 AND
( S Y S C T L _ I N F O S T R E Q U A L " 1 " O R S Y S C T L _ I N F O G R E A T E R 0 ) )
set ( ${ OUT } ON PARENT_SCOPE )
else ( )
set ( ${ OUT } OFF PARENT_SCOPE )
endif ( )
endfunction ( )
function ( is_avx512_disabled OUT )
set ( DISABLE_AVX512 $ENV{ VLLM_CPU_DISABLE_AVX512 } )
if ( DISABLE_AVX512 AND DISABLE_AVX512 STREQUAL "true" )
set ( ${ OUT } ON PARENT_SCOPE )
else ( )
set ( ${ OUT } OFF PARENT_SCOPE )
endif ( )
endfunction ( )
is_avx512_disabled ( AVX512_DISABLED )
if ( MACOSX_FOUND AND CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64" )
message ( STATUS "Apple Silicon Detected" )
set ( APPLE_SILICON_FOUND TRUE )
set ( ENABLE_NUMA OFF )
check_sysctl ( hw.optional.neon ASIMD_FOUND )
check_sysctl ( hw.optional.arm.FEAT_BF16 ARM_BF16_FOUND )
else ( )
find_isa ( ${ CPUINFO } "avx2" AVX2_FOUND )
find_isa ( ${ CPUINFO } "avx512f" AVX512_FOUND )
find_isa ( ${ CPUINFO } "Power11" POWER11_FOUND )
find_isa ( ${ CPUINFO } "POWER10" POWER10_FOUND )
find_isa ( ${ CPUINFO } "POWER9" POWER9_FOUND )
find_isa ( ${ CPUINFO } "asimd" ASIMD_FOUND ) # Check for ARM NEON support
find_isa ( ${ CPUINFO } "bf16" ARM_BF16_FOUND ) # Check for ARM BF16 support
find_isa ( ${ CPUINFO } "S390" S390_FOUND )
find_isa ( ${ CPUINFO } "v" RVV_FOUND ) # Check for RISC-V RVV support
endif ( )
if ( AVX512_FOUND AND NOT AVX512_DISABLED )
2026-01-09 13:34:11 +08:00
list ( APPEND CXX_COMPILE_FLAGS
" - m a v x 5 1 2 f "
" - m a v x 5 1 2 v l "
" - m a v x 5 1 2 b w "
" - m a v x 5 1 2 d q " )
find_isa ( ${ CPUINFO } "avx512_bf16" AVX512BF16_FOUND )
if ( AVX512BF16_FOUND OR ENABLE_AVX512BF16 )
2026-01-19 10:38:50 +08:00
if ( CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND
C M A K E _ C X X _ C O M P I L E R _ V E R S I O N V E R S I O N _ G R E A T E R _ E Q U A L 1 2 . 3 )
2026-01-09 13:34:11 +08:00
list ( APPEND CXX_COMPILE_FLAGS "-mavx512bf16" )
2026-01-19 10:38:50 +08:00
set ( ENABLE_AVX512BF16 ON )
2026-01-09 13:34:11 +08:00
else ( )
2026-01-19 10:38:50 +08:00
set ( ENABLE_AVX512BF16 OFF )
2026-01-09 13:34:11 +08:00
message ( WARNING "Disable AVX512-BF16 ISA support, requires gcc/g++ >= 12.3" )
endif ( )
else ( )
2026-01-19 10:38:50 +08:00
set ( ENABLE_AVX512BF16 OFF )
2026-01-09 13:34:11 +08:00
message ( WARNING "Disable AVX512-BF16 ISA support, no avx512_bf16 found in local CPU flags." " If cross-compilation is required, please set env VLLM_CPU_AVX512BF16=1." )
endif ( )
2026-01-19 10:38:50 +08:00
find_isa ( ${ CPUINFO } "avx512_vnni" AVX512VNNI_FOUND )
if ( AVX512VNNI_FOUND OR ENABLE_AVX512VNNI )
if ( CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND
C M A K E _ C X X _ C O M P I L E R _ V E R S I O N V E R S I O N _ G R E A T E R _ E Q U A L 1 2 . 3 )
list ( APPEND CXX_COMPILE_FLAGS "-mavx512vnni" )
set ( ENABLE_AVX512VNNI ON )
else ( )
set ( ENABLE_AVX512VNNI OFF )
message ( WARNING "Disable AVX512-VNNI ISA support, requires gcc/g++ >= 12.3" )
endif ( )
else ( )
set ( ENABLE_AVX512VNNI OFF )
message ( WARNING "Disable AVX512-VNNI ISA support, no avx512_vnni found in local CPU flags." " If cross-compilation is required, please set env VLLM_CPU_AVX512VNNI=1." )
endif ( )
find_isa ( ${ CPUINFO } "amx_bf16" AMXBF16_FOUND )
if ( AMXBF16_FOUND OR ENABLE_AMXBF16 )
if ( CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND
C M A K E _ C X X _ C O M P I L E R _ V E R S I O N V E R S I O N _ G R E A T E R _ E Q U A L 1 2 . 3 )
list ( APPEND CXX_COMPILE_FLAGS "-mamx-bf16" "-mamx-tile" )
set ( ENABLE_AMXBF16 ON )
add_compile_definitions ( -DCPU_CAPABILITY_AMXBF16 )
else ( )
set ( ENABLE_AMXBF16 OFF )
message ( WARNING "Disable AMX_BF16 ISA support, requires gcc/g++ >= 12.3" )
endif ( )
else ( )
set ( ENABLE_AMXBF16 OFF )
message ( WARNING "Disable AMX_BF16 ISA support, no amx_bf16 found in local CPU flags." " If cross-compilation is required, please set env VLLM_CPU_AMXBF16=1." )
endif ( )
elseif ( AVX2_FOUND )
list ( APPEND CXX_COMPILE_FLAGS "-mavx2" )
message ( WARNING "vLLM CPU backend using AVX2 ISA" )
elseif ( POWER9_FOUND OR POWER10_FOUND OR POWER11_FOUND )
message ( STATUS "PowerPC detected" )
if ( POWER9_FOUND )
list ( APPEND CXX_COMPILE_FLAGS
" - m v s x "
" - m c p u = p o w e r 9 "
" - m t u n e = p o w e r 9 " )
elseif ( POWER10_FOUND OR POWER11_FOUND )
list ( APPEND CXX_COMPILE_FLAGS
" - m v s x "
" - m c p u = p o w e r 1 0 "
" - m t u n e = p o w e r 1 0 " )
endif ( )
elseif ( ASIMD_FOUND )
message ( STATUS "ARMv8 or later architecture detected" )
if ( ARM_BF16_FOUND )
message ( STATUS "BF16 extension detected" )
set ( MARCH_FLAGS "-march=armv8.2-a+bf16+dotprod+fp16" )
add_compile_definitions ( ARM_BF16_SUPPORT )
else ( )
message ( WARNING "BF16 functionality is not available" )
set ( MARCH_FLAGS "-march=armv8.2-a+dotprod+fp16" )
endif ( )
list ( APPEND CXX_COMPILE_FLAGS ${ MARCH_FLAGS } )
elseif ( S390_FOUND )
message ( STATUS "S390 detected" )
# Check for S390 VXE support
list ( APPEND CXX_COMPILE_FLAGS
" - m v x "
" - m z v e c t o r "
" - m a r c h = n a t i v e "
" - m t u n e = n a t i v e " )
elseif ( CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64" )
if ( RVV_FOUND )
message ( FAIL_ERROR "Can't support rvv now." )
else ( )
list ( APPEND CXX_COMPILE_FLAGS "-march=rv64gc" )
endif ( )
2026-01-09 13:34:11 +08:00
else ( )
2026-01-19 10:38:50 +08:00
message ( FATAL_ERROR "vLLM CPU backend requires AVX512, AVX2, Power9+ ISA, S390X ISA, ARMv8 or RISC-V support." )
2026-01-09 13:34:11 +08:00
endif ( )
2026-01-19 10:38:50 +08:00
# Build oneDNN for GEMM kernels (only for x86-AVX512 /ARM platforms)
if ( ( AVX512_FOUND AND NOT AVX512_DISABLED ) OR ( ASIMD_FOUND AND NOT APPLE_SILICON_FOUND ) OR POWER9_FOUND OR POWER10_FOUND OR POWER11_FOUND )
# Fetch and build Arm Compute Library (ACL) as oneDNN's backend for AArch64
# TODO [fadara01]: remove this once ACL can be fetched and built automatically as a dependency of oneDNN
set ( ONEDNN_AARCH64_USE_ACL OFF CACHE BOOL "" )
if ( ASIMD_FOUND )
# Set number of parallel build processes
include ( ProcessorCount )
ProcessorCount ( NPROC )
if ( NOT NPROC )
set ( NPROC 4 )
endif ( )
# locate PyTorch's libgomp (e.g. site-packages/torch.libs/libgomp-947d5fa1.so.1.0.0)
# and create a local shim dir with it
vllm_prepare_torch_gomp_shim ( VLLM_TORCH_GOMP_SHIM_DIR )
2026-01-09 13:34:11 +08:00
2026-01-19 10:38:50 +08:00
find_library ( OPEN_MP
N A M E S g o m p
P A T H S $ { V L L M _ T O R C H _ G O M P _ S H I M _ D I R }
N O _ D E F A U L T _ P A T H
R E Q U I R E D
)
# Set LD_LIBRARY_PATH to include the shim dir at build time to use the same libgomp as PyTorch
if ( OPEN_MP )
set ( ENV{LD_LIBRARY_PATH} "${VLLM_TORCH_GOMP_SHIM_DIR}:$ENV{LD_LIBRARY_PATH}" )
endif ( )
# Fetch and populate ACL
if ( DEFINED ENV{ACL_ROOT_DIR} AND IS_DIRECTORY "$ENV{ACL_ROOT_DIR}" )
message ( STATUS "Using ACL from specified source directory: $ENV{ACL_ROOT_DIR}" )
else ( )
message ( STATUS "Downloading Arm Compute Library (ACL) from GitHub" )
FetchContent_Populate ( arm_compute
S U B B U I L D _ D I R " $ { F E T C H C O N T E N T _ B A S E _ D I R } / a r m _ c o m p u t e - s u b b u i l d "
S O U R C E _ D I R " $ { F E T C H C O N T E N T _ B A S E _ D I R } / a r m _ c o m p u t e - s r c "
G I T _ R E P O S I T O R Y h t t p s : / / g i t h u b . c o m / A R M - s o f t w a r e / C o m p u t e L i b r a r y . g i t
G I T _ T A G v 5 2 . 6 . 0
G I T _ S H A L L O W T R U E
G I T _ P R O G R E S S T R U E
)
set ( ENV{ACL_ROOT_DIR} "${arm_compute_SOURCE_DIR}" )
set ( ACL_LIB_DIR "$ENV{ACL_ROOT_DIR}/build" )
endif ( )
# Build ACL with CMake
set ( _cmake_config_cmd
$ { C M A K E _ C O M M A N D } - G N i n j a - B b u i l d
- D A R M _ C O M P U T E _ B U I L D _ S H A R E D _ L I B = O F F
- D C M A K E _ B U I L D _ T Y P E = R e l e a s e
- D A R M _ C O M P U T E _ A R C H = a r m v 8 . 2 - a
- D A R M _ C O M P U T E _ E N A B L E _ A S S E R T S = O F F
- D A R M _ C O M P U T E _ E N A B L E _ C P P T H R E A D S = O F F
- D A R M _ C O M P U T E _ E N A B L E _ O P E N M P = O N
- D A R M _ C O M P U T E _ E N A B L E _ W E R R O R = O F F
- D A R M _ C O M P U T E _ B U I L D _ E X A M P L E S = O F F
- D A R M _ C O M P U T E _ B U I L D _ T E S T I N G = O F F )
set ( _cmake_build_cmd
$ { C M A K E _ C O M M A N D } - - b u i l d b u i l d - - - j $ { N P R O C }
)
execute_process (
C O M M A N D $ { _ c m a k e _ c o n f i g _ c m d }
W O R K I N G _ D I R E C T O R Y " $ E N V { A C L _ R O O T _ D I R } "
)
execute_process (
C O M M A N D $ { _ c m a k e _ b u i l d _ c m d }
W O R K I N G _ D I R E C T O R Y " $ E N V { A C L _ R O O T _ D I R } "
R E S U L T _ V A R I A B L E _ a c l _ r c
)
if ( NOT _acl_rc EQUAL 0 )
message ( FATAL_ERROR "ACL SCons build failed (exit ${_acl_rc})." )
endif ( )
message ( STATUS "Arm Compute Library (ACL) built successfully." )
# VLLM/oneDNN settings for ACL
set ( ONEDNN_AARCH64_USE_ACL ON CACHE BOOL "" FORCE )
add_compile_definitions ( VLLM_USE_ACL )
endif ( )
set ( FETCHCONTENT_SOURCE_DIR_ONEDNN "$ENV{FETCHCONTENT_SOURCE_DIR_ONEDNN}" CACHE PATH "Path to a local oneDNN source directory." )
if ( FETCHCONTENT_SOURCE_DIR_ONEDNN )
message ( STATUS "Using oneDNN from specified source directory: ${FETCHCONTENT_SOURCE_DIR_ONEDNN}" )
FetchContent_Declare (
o n e D N N
S O U R C E _ D I R $ { F E T C H C O N T E N T _ S O U R C E _ D I R _ O N E D N N }
)
else ( )
message ( STATUS "Downloading oneDNN from GitHub" )
FetchContent_Declare (
o n e D N N
G I T _ R E P O S I T O R Y h t t p s : / / g i t h u b . c o m / o n e a p i - s r c / o n e D N N . g i t
G I T _ T A G v 3 . 1 0
G I T _ P R O G R E S S T R U E
G I T _ S H A L L O W T R U E
)
endif ( )
set ( ONEDNN_LIBRARY_TYPE "STATIC" )
set ( ONEDNN_BUILD_DOC "OFF" )
set ( ONEDNN_BUILD_EXAMPLES "OFF" )
set ( ONEDNN_BUILD_TESTS "OFF" )
set ( ONEDNN_ENABLE_WORKLOAD "INFERENCE" )
set ( ONEDNN_ENABLE_PRIMITIVE "MATMUL;REORDER" )
set ( ONEDNN_BUILD_GRAPH "OFF" )
set ( ONEDNN_ENABLE_JIT_PROFILING "OFF" )
set ( ONEDNN_ENABLE_ITT_TASKS "OFF" )
set ( ONEDNN_ENABLE_MAX_CPU_ISA "OFF" )
set ( ONEDNN_ENABLE_CPU_ISA_HINTS "OFF" )
set ( ONEDNN_VERBOSE "OFF" )
set ( CMAKE_POLICY_DEFAULT_CMP0077 NEW )
set ( VLLM_BUILD_TYPE ${ CMAKE_BUILD_TYPE } )
set ( CMAKE_BUILD_TYPE "Release" ) # remove oneDNN debug symbols to reduce size
FetchContent_MakeAvailable ( oneDNN )
set ( CMAKE_BUILD_TYPE ${ VLLM_BUILD_TYPE } )
add_library ( dnnl_ext OBJECT "csrc/cpu/dnnl_helper.cpp" )
target_include_directories (
d n n l _ e x t
P U B L I C $ { o n e D N N _ S O U R C E _ D I R } / i n c l u d e
P U B L I C $ { o n e D N N _ B I N A R Y _ D I R } / i n c l u d e
P R I V A T E $ { o n e D N N _ S O U R C E _ D I R } / s r c
)
target_link_libraries ( dnnl_ext dnnl )
target_compile_options ( dnnl_ext PRIVATE ${ CXX_COMPILE_FLAGS } -fPIC )
list ( APPEND LIBS dnnl_ext )
set ( USE_ONEDNN ON )
else ( )
set ( USE_ONEDNN OFF )
endif ( )
message ( STATUS "CPU extension compile flags: ${CXX_COMPILE_FLAGS}" )
if ( ENABLE_NUMA )
list ( APPEND LIBS numa )
else ( )
message ( STATUS "NUMA is disabled" )
add_compile_definitions ( -DVLLM_NUMA_DISABLED )
endif ( )
2026-01-09 13:34:11 +08:00
#
# _C extension
#
set ( VLLM_EXT_SRC
" c s r c / c p u / a c t i v a t i o n . c p p "
2026-01-19 10:38:50 +08:00
" c s r c / c p u / u t i l s . c p p "
2026-01-09 13:34:11 +08:00
" c s r c / c p u / l a y e r n o r m . c p p "
2026-01-19 10:38:50 +08:00
" c s r c / c p u / m l a _ d e c o d e . c p p "
2026-01-09 13:34:11 +08:00
" c s r c / c p u / p o s _ e n c o d i n g . c p p "
2026-01-19 10:38:50 +08:00
" c s r c / m o e / d y n a m i c _ 4 b i t _ i n t _ m o e _ c p u . c p p "
" c s r c / c p u / c p u _ a t t n . c p p "
" c s r c / c p u / s c r a t c h p a d _ m a n a g e r . c p p "
" c s r c / c p u / t o r c h _ b i n d i n g s . c p p " )
2026-01-09 13:34:11 +08:00
2026-01-19 10:38:50 +08:00
if ( AVX512_FOUND AND NOT AVX512_DISABLED )
set ( VLLM_EXT_SRC
" c s r c / c p u / s h m . c p p "
" c s r c / c p u / c p u _ w n a 1 6 . c p p "
$ { V L L M _ E X T _ S R C } )
if ( ENABLE_AVX512BF16 AND ENABLE_AVX512VNNI )
set ( VLLM_EXT_SRC
" c s r c / c p u / s g l - k e r n e l s / g e m m . c p p "
" c s r c / c p u / s g l - k e r n e l s / g e m m _ i n t 8 . c p p "
" c s r c / c p u / s g l - k e r n e l s / g e m m _ f p 8 . c p p "
" c s r c / c p u / s g l - k e r n e l s / m o e . c p p "
" c s r c / c p u / s g l - k e r n e l s / m o e _ i n t 8 . c p p "
" c s r c / c p u / s g l - k e r n e l s / m o e _ f p 8 . c p p "
$ { V L L M _ E X T _ S R C } )
add_compile_definitions ( -DCPU_CAPABILITY_AVX512 )
endif ( )
endif ( )
if ( USE_ONEDNN )
set ( VLLM_EXT_SRC
" c s r c / c p u / d n n l _ k e r n e l s . c p p "
$ { V L L M _ E X T _ S R C } )
endif ( )
message ( STATUS "CPU extension source files: ${VLLM_EXT_SRC}" )
#
# Define extension targets
#
define_extension_target (
2026-01-09 13:34:11 +08:00
_ C
D E S T I N A T I O N v l l m
L A N G U A G E C X X
S O U R C E S $ { V L L M _ E X T _ S R C }
2026-01-19 10:38:50 +08:00
L I B R A R I E S $ { L I B S }
2026-01-09 13:34:11 +08:00
C O M P I L E _ F L A G S $ { C X X _ C O M P I L E _ F L A G S }
2026-01-19 10:38:50 +08:00
U S E _ S A B I 3
W I T H _ S O A B I
2026-01-09 13:34:11 +08:00
)
message ( STATUS "Enabling C extension." )