move compile threads to an option to avoid OOM on low memory host (#10123)
This commit is contained in:
@@ -148,7 +148,10 @@ set(SGL_KERNEL_CUDA_FLAGS
|
|||||||
"-DCUTLASS_DEBUG_TRACE_LEVEL=0"
|
"-DCUTLASS_DEBUG_TRACE_LEVEL=0"
|
||||||
"--expt-relaxed-constexpr"
|
"--expt-relaxed-constexpr"
|
||||||
"--expt-extended-lambda"
|
"--expt-extended-lambda"
|
||||||
"--threads=32"
|
# The following flag leads to the CMAKE_BUILD_PARALLEL_LEVEL breaking,
|
||||||
|
# it triggers OOM with low memory host. Extract the threads number to
|
||||||
|
# option named SGL_KERNEL_COMPILE_THREADS, default value 32.
|
||||||
|
# "--threads=32"
|
||||||
|
|
||||||
# Supress warnings
|
# Supress warnings
|
||||||
"-Xcompiler=-Wno-clang-format-violations"
|
"-Xcompiler=-Wno-clang-format-violations"
|
||||||
@@ -164,6 +167,20 @@ set(SGL_KERNEL_CUDA_FLAGS
|
|||||||
# "--ptxas-options=--verbose,--register-usage-level=10,--warn-on-local-memory-usage"
|
# "--ptxas-options=--verbose,--register-usage-level=10,--warn-on-local-memory-usage"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
set(SGL_KERNEL_COMPILE_THREADS 32 CACHE STRING "Set compilation threads, default 32")
|
||||||
|
|
||||||
|
# When SGL_KERNEL_COMPILE_THREADS value is less than 1, set it to 1
|
||||||
|
if (NOT SGL_KERNEL_COMPILE_THREADS MATCHES "^[0-9]+$")
|
||||||
|
message(FATAL_ERROR "SGL_KERNEL_COMPILE_THREADS must be an integer, but was set to '${SGL_KERNEL_COMPILE_THREADS}'.")
|
||||||
|
elseif (SGL_KERNEL_COMPILE_THREADS LESS 1)
|
||||||
|
message(STATUS "SGL_KERNEL_COMPILE_THREADS was set to a value less than 1. Using 1 instead.")
|
||||||
|
set(SGL_KERNEL_COMPILE_THREADS 1)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
list(APPEND SGL_KERNEL_CUDA_FLAGS
|
||||||
|
"--threads=${SGL_KERNEL_COMPILE_THREADS}"
|
||||||
|
)
|
||||||
|
|
||||||
option(SGL_KERNEL_ENABLE_BF16 "Enable BF16" ON)
|
option(SGL_KERNEL_ENABLE_BF16 "Enable BF16" ON)
|
||||||
option(SGL_KERNEL_ENABLE_FP8 "Enable FP8" ON)
|
option(SGL_KERNEL_ENABLE_FP8 "Enable FP8" ON)
|
||||||
option(SGL_KERNEL_ENABLE_FP4 "Enable FP4" OFF)
|
option(SGL_KERNEL_ENABLE_FP4 "Enable FP4" OFF)
|
||||||
|
|||||||
@@ -52,10 +52,12 @@ See CMakeLists.txt for more options.
|
|||||||
### Parallel Build
|
### Parallel Build
|
||||||
|
|
||||||
We highly recommend you build sgl-kernel with Ninja. Ninja can automatically build sgl-kernel in parallel.
|
We highly recommend you build sgl-kernel with Ninja. Ninja can automatically build sgl-kernel in parallel.
|
||||||
And if you build the sgl-kernel with cmake, you need to add `CMAKE_BUILD_PARALLEL_LEVEL` for parallel build like:
|
And if you build the sgl-kernel with cmake, you need to add `CMAKE_BUILD_PARALLEL_LEVEL` and limit the
|
||||||
|
nvcc threads to a single thread by setting `SGL_KERNEL_COMPILE_THREADS=1` for parallel build like:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
CMAKE_BUILD_PARALLEL_LEVEL=$(nproc) python -m uv build --wheel -Cbuild-dir=build --color=always .
|
CMAKE_BUILD_PARALLEL_LEVEL=$(nproc) python -m uv build --wheel -Cbuild-dir=build \
|
||||||
|
-Ccmake.define.SGL_KERNEL_COMPILE_THREADS=1 --color=always .
|
||||||
```
|
```
|
||||||
|
|
||||||
### ⚠️ Compilation Issue with `sgl-kernel` and CUDA 12.6
|
### ⚠️ Compilation Issue with `sgl-kernel` and CUDA 12.6
|
||||||
|
|||||||
Reference in New Issue
Block a user