[minor] cleanup cmakelists.txt (#5420)

2025-04-15 07:07:07 -07:00
parent f1b3b75fc6
commit 838fa0f218
3 changed files with 19 additions and 13 deletions
--- a/.github/workflows/pr-test.yml
+++ b/.github/workflows/pr-test.yml
@@ -187,8 +187,6 @@ jobs:
        timeout-minutes: 10
        run: |
          cd test/srt
-          USE_VLLM_CUSTOM_ALLREDUCE=1 python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
-
          python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1

      - name: Benchmark single latency + torch.compile (TP=2)
--- a/sgl-kernel/CMakeLists.txt
+++ b/sgl-kernel/CMakeLists.txt
@@ -4,6 +4,10 @@ project(sgl-kernel LANGUAGES CXX CUDA)
 # CMake
 cmake_policy(SET CMP0169 OLD)
 include(${CMAKE_CURRENT_LIST_DIR}/cmake/utils.cmake)
+set(CMAKE_COLOR_DIAGNOSTICS ON)
+set(CMAKE_VERBOSE_MAKEFILE ON CACHE BOOL "ON")
+set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+set(CMAKE_SHARED_LIBRARY_PREFIX "")

 # Python
 find_package(Python COMPONENTS Interpreter Development.Module ${SKBUILD_SABI_COMPONENT} REQUIRED)
@@ -82,8 +86,6 @@ include_directories(
    ${PROJECT_SOURCE_DIR}/csrc
    ${repo-cutlass_SOURCE_DIR}/include
    ${repo-cutlass_SOURCE_DIR}/tools/util/include
-    ${repo-cutlass_SOURCE_DIR}/examples/77_blackwell_fmha
-    ${repo-cutlass_SOURCE_DIR}/examples/common
    ${repo-flashinfer_SOURCE_DIR}/include
    ${repo-flashinfer_SOURCE_DIR}/csrc
 )
@@ -109,6 +111,8 @@ set(SGL_KERNEL_CUDA_FLAGS
    "--expt-relaxed-constexpr"
    "--expt-extended-lambda"
    "--threads=32"
+
+    # Supress warnings
    "-Xcompiler=-Wconversion"
    "-Xcompiler=-fno-strict-aliasing"

@@ -209,17 +213,19 @@ Python_add_library(common_ops MODULE USE_SABI ${SKBUILD_SABI_VERSION} WITH_SOABI

 target_compile_options(common_ops PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:${SGL_KERNEL_CUDA_FLAGS}>)
 target_include_directories(common_ops PRIVATE
-        ${TORCH_INCLUDE_DIRS}
-        ${repo-flash-attention_SOURCE_DIR}/csrc/flash_attn/src)
+    ${repo-cutlass_SOURCE_DIR}/examples/77_blackwell_fmha
+    ${repo-cutlass_SOURCE_DIR}/examples/common
+    ${repo-flash-attention_SOURCE_DIR}/csrc/flash_attn/src
+)
 target_link_libraries(common_ops PRIVATE ${TORCH_LIBRARIES} c10 cuda cublas cublasLt)

 target_compile_definitions(common_ops PRIVATE
-         FLASHATTENTION_DISABLE_BACKWARD
-         FLASHATTENTION_DISABLE_DROPOUT
-         FLASHATTENTION_DISABLE_UNEVEN_K
-    )
+    FLASHATTENTION_DISABLE_BACKWARD
+    FLASHATTENTION_DISABLE_DROPOUT
+    FLASHATTENTION_DISABLE_UNEVEN_K
+)

-install(TARGETS common_ops LIBRARY DESTINATION "sgl_kernel")
+install(TARGETS common_ops LIBRARY DESTINATION sgl_kernel)

 # ============================ Optional Install ============================= #
 # set flash-attention sources file
@@ -280,8 +286,8 @@ if (SGL_KERNEL_ENABLE_FA3)

    target_compile_options(flash_ops PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:${SGL_FLASH_KERNEL_CUDA_FLAGS}>)
    target_include_directories(flash_ops PRIVATE
-        ${TORCH_INCLUDE_DIRS}
-        ${repo-flash-attention_SOURCE_DIR}/hopper)
+        ${repo-flash-attention_SOURCE_DIR}/hopper
+    )
    target_link_libraries(flash_ops PRIVATE ${TORCH_LIBRARIES} c10 cuda)

    install(TARGETS flash_ops LIBRARY DESTINATION "sgl_kernel")
--- a/sgl-kernel/build.sh
+++ b/sgl-kernel/build.sh
@@ -35,6 +35,8 @@ docker run --rm \
   ${PYTHON_ROOT_PATH}/bin/pip install --no-cache-dir ninja setuptools==75.0.0 wheel==0.41.0 numpy uv scikit-build-core && \
   export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0+PTX' && \
   export CUDA_VERSION=${CUDA_VERSION} && \
+   export CMAKE_BUILD_PARALLEL_LEVEL=96
+   export MAX_JOBS=96
   mkdir -p /usr/lib/x86_64-linux-gnu/ && \
   ln -s /usr/local/cuda-${CUDA_VERSION}/targets/x86_64-linux/lib/stubs/libcuda.so /usr/lib/x86_64-linux-gnu/libcuda.so && \
   cd /sgl-kernel && \