[minor] cleanup cmakelists.txt (#5420)
This commit is contained in:
2
.github/workflows/pr-test.yml
vendored
2
.github/workflows/pr-test.yml
vendored
@@ -187,8 +187,6 @@ jobs:
|
||||
timeout-minutes: 10
|
||||
run: |
|
||||
cd test/srt
|
||||
USE_VLLM_CUSTOM_ALLREDUCE=1 python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
|
||||
|
||||
python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
|
||||
|
||||
- name: Benchmark single latency + torch.compile (TP=2)
|
||||
|
||||
@@ -4,6 +4,10 @@ project(sgl-kernel LANGUAGES CXX CUDA)
|
||||
# CMake
|
||||
cmake_policy(SET CMP0169 OLD)
|
||||
include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake)
|
||||
set(CMAKE_COLOR_DIAGNOSTICS ON)
|
||||
set(CMAKE_VERBOSE_MAKEFILE ON CACHE BOOL "ON")
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
set(CMAKE_SHARED_LIBRARY_PREFIX "")
|
||||
|
||||
# Python
|
||||
find_package(Python COMPONENTS Interpreter Development.Module ${SKBUILD_SABI_COMPONENT} REQUIRED)
|
||||
@@ -82,8 +86,6 @@ include_directories(
|
||||
${PROJECT_SOURCE_DIR}/csrc
|
||||
${repo-cutlass_SOURCE_DIR}/include
|
||||
${repo-cutlass_SOURCE_DIR}/tools/util/include
|
||||
${repo-cutlass_SOURCE_DIR}/examples/77_blackwell_fmha
|
||||
${repo-cutlass_SOURCE_DIR}/examples/common
|
||||
${repo-flashinfer_SOURCE_DIR}/include
|
||||
${repo-flashinfer_SOURCE_DIR}/csrc
|
||||
)
|
||||
@@ -109,6 +111,8 @@ set(SGL_KERNEL_CUDA_FLAGS
|
||||
"--expt-relaxed-constexpr"
|
||||
"--expt-extended-lambda"
|
||||
"--threads=32"
|
||||
|
||||
# Supress warnings
|
||||
"-Xcompiler=-Wconversion"
|
||||
"-Xcompiler=-fno-strict-aliasing"
|
||||
|
||||
@@ -209,17 +213,19 @@ Python_add_library(common_ops MODULE USE_SABI ${SKBUILD_SABI_VERSION} WITH_SOABI
|
||||
|
||||
target_compile_options(common_ops PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:${SGL_KERNEL_CUDA_FLAGS}>)
|
||||
target_include_directories(common_ops PRIVATE
|
||||
${TORCH_INCLUDE_DIRS}
|
||||
${repo-flash-attention_SOURCE_DIR}/csrc/flash_attn/src)
|
||||
${repo-cutlass_SOURCE_DIR}/examples/77_blackwell_fmha
|
||||
${repo-cutlass_SOURCE_DIR}/examples/common
|
||||
${repo-flash-attention_SOURCE_DIR}/csrc/flash_attn/src
|
||||
)
|
||||
target_link_libraries(common_ops PRIVATE ${TORCH_LIBRARIES} c10 cuda cublas cublasLt)
|
||||
|
||||
target_compile_definitions(common_ops PRIVATE
|
||||
FLASHATTENTION_DISABLE_BACKWARD
|
||||
FLASHATTENTION_DISABLE_DROPOUT
|
||||
FLASHATTENTION_DISABLE_UNEVEN_K
|
||||
)
|
||||
FLASHATTENTION_DISABLE_BACKWARD
|
||||
FLASHATTENTION_DISABLE_DROPOUT
|
||||
FLASHATTENTION_DISABLE_UNEVEN_K
|
||||
)
|
||||
|
||||
install(TARGETS common_ops LIBRARY DESTINATION "sgl_kernel")
|
||||
install(TARGETS common_ops LIBRARY DESTINATION sgl_kernel)
|
||||
|
||||
# ============================ Optional Install ============================= #
|
||||
# set flash-attention sources file
|
||||
@@ -280,8 +286,8 @@ if (SGL_KERNEL_ENABLE_FA3)
|
||||
|
||||
target_compile_options(flash_ops PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:${SGL_FLASH_KERNEL_CUDA_FLAGS}>)
|
||||
target_include_directories(flash_ops PRIVATE
|
||||
${TORCH_INCLUDE_DIRS}
|
||||
${repo-flash-attention_SOURCE_DIR}/hopper)
|
||||
${repo-flash-attention_SOURCE_DIR}/hopper
|
||||
)
|
||||
target_link_libraries(flash_ops PRIVATE ${TORCH_LIBRARIES} c10 cuda)
|
||||
|
||||
install(TARGETS flash_ops LIBRARY DESTINATION "sgl_kernel")
|
||||
|
||||
@@ -35,6 +35,8 @@ docker run --rm \
|
||||
${PYTHON_ROOT_PATH}/bin/pip install --no-cache-dir ninja setuptools==75.0.0 wheel==0.41.0 numpy uv scikit-build-core && \
|
||||
export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0+PTX' && \
|
||||
export CUDA_VERSION=${CUDA_VERSION} && \
|
||||
export CMAKE_BUILD_PARALLEL_LEVEL=96
|
||||
export MAX_JOBS=96
|
||||
mkdir -p /usr/lib/x86_64-linux-gnu/ && \
|
||||
ln -s /usr/local/cuda-${CUDA_VERSION}/targets/x86_64-linux/lib/stubs/libcuda.so /usr/lib/x86_64-linux-gnu/libcuda.so && \
|
||||
cd /sgl-kernel && \
|
||||
|
||||
Reference in New Issue
Block a user