diff --git a/sgl-kernel/build.sh b/sgl-kernel/build.sh index 0bf5a07ed..fa21b125e 100755 --- a/sgl-kernel/build.sh +++ b/sgl-kernel/build.sh @@ -31,11 +31,35 @@ else TORCH_INSTALL="pip install --no-cache-dir torch==2.8.0 --index-url https://download.pytorch.org/whl/cu126" fi +# Create cache directories for persistent build artifacts in home directory +# Using home directory to persist across workspace cleanups/checkouts +CACHE_DIR="${HOME}/.cache/sgl-kernel" +CMAKE_DOWNLOAD_CACHE="${CACHE_DIR}/cmake-downloads" +CCACHE_DIR="${CACHE_DIR}/ccache" + +mkdir -p "${CMAKE_DOWNLOAD_CACHE}" +mkdir -p "${CCACHE_DIR}" + +echo "===================================" +echo "Cache Configuration" +echo "===================================" +echo "CMake download cache: ${CMAKE_DOWNLOAD_CACHE}" +echo "ccache directory: ${CCACHE_DIR}" +echo "" + docker run --rm \ -v $(pwd):/sgl-kernel \ + -v ${CMAKE_DOWNLOAD_CACHE}:/cmake-downloads \ + -v ${CCACHE_DIR}:/ccache \ + -e ENABLE_CMAKE_PROFILE="${ENABLE_CMAKE_PROFILE:-}" \ + -e ENABLE_BUILD_PROFILE="${ENABLE_BUILD_PROFILE:-}" \ ${DOCKER_IMAGE} \ bash -c " - # Install CMake (version >= 3.26) - Robust Installation + set -e + # Install CMake (version >= 3.26) - Robust Installation with caching + echo \"==================================\" + echo \"Installing CMake\" + echo \"==================================\" export CMAKE_VERSION_MAJOR=3.31 export CMAKE_VERSION_MINOR=1 # Setting these flags to reduce OOM chance only on ARM @@ -45,10 +69,23 @@ docker run --rm \ export MAKEFLAGS='-j2' export CMAKE_BUILD_PARALLEL_LEVEL=2 export NINJAFLAGS='-j2' + echo \"ARM detected: Using extra conservative settings (2 parallel jobs)\" fi - echo \"Downloading CMake from: https://cmake.org/files/v\${CMAKE_VERSION_MAJOR}/cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-${ARCH}.tar.gz\" - wget https://cmake.org/files/v\${CMAKE_VERSION_MAJOR}/cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-${ARCH}.tar.gz - tar -xzf cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-${ARCH}.tar.gz + + CMAKE_TARBALL=\"cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-${ARCH}.tar.gz\" + + # Check if CMake is already cached + if [ -f \"/cmake-downloads/\${CMAKE_TARBALL}\" ]; then + echo \"Using cached CMake from /cmake-downloads/\${CMAKE_TARBALL}\" + cp /cmake-downloads/\${CMAKE_TARBALL} . + else + echo \"Downloading CMake from: https://cmake.org/files/v\${CMAKE_VERSION_MAJOR}/\${CMAKE_TARBALL}\" + wget https://cmake.org/files/v\${CMAKE_VERSION_MAJOR}/\${CMAKE_TARBALL} + # Cache the downloaded file + cp \${CMAKE_TARBALL} /cmake-downloads/ + fi + + tar -xzf \${CMAKE_TARBALL} mv cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-${ARCH} /opt/cmake export PATH=/opt/cmake/bin:\$PATH export LD_LIBRARY_PATH=/lib64:\$LD_LIBRARY_PATH @@ -58,6 +95,50 @@ docker run --rm \ which cmake cmake --version + echo \"==================================\" + echo \"Installing and configuring ccache\" + echo \"==================================\" + + # Install ccache 4.12.1 from source for CUDA support (yum provides old 3.7.7) + echo \"Installing ccache 4.12.1 from source...\" + + # Install build dependencies + yum install -y gcc gcc-c++ make wget tar + + # Download and build ccache 4.12.1 + cd /tmp + wget -q https://github.com/ccache/ccache/releases/download/v4.12.1/ccache-4.12.1.tar.xz + tar -xf ccache-4.12.1.tar.xz + cd ccache-4.12.1 + + # Build and install (uses already-installed CMake 3.31) + mkdir build && cd build + /opt/cmake/bin/cmake -D CMAKE_BUILD_TYPE=Release -D CMAKE_INSTALL_PREFIX=/usr .. >/dev/null + make -j\$(nproc) >/dev/null + make install >/dev/null + + # Verify installation + ccache --version + echo \"ccache 4.12.1 installed successfully\" + cd /sgl-kernel + + # Configure ccache + export CCACHE_DIR=/ccache + export CCACHE_BASEDIR=/sgl-kernel + export CCACHE_MAXSIZE=10G + export CCACHE_COMPILERCHECK=content + export CCACHE_COMPRESS=true + export CCACHE_SLOPPINESS=file_macro,time_macros,include_file_mtime,include_file_ctime + + # Set up ccache as compiler launcher (don't use PATH to avoid -ccbin conflicts) + export CMAKE_C_COMPILER_LAUNCHER=ccache + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export CMAKE_CUDA_COMPILER_LAUNCHER=ccache + + # Show ccache stats before build + ccache -sV || true + echo \"\" + yum install numactl-devel -y && \ yum install libibverbs -y --nogpgcheck && \ ln -sv /usr/lib64/libibverbs.so.1 /usr/lib64/libibverbs.so && \ @@ -70,6 +151,77 @@ docker run --rm \ cd /sgl-kernel && \ ls -la ${PYTHON_ROOT_PATH}/lib/python${PYTHON_VERSION}/site-packages/wheel/ && \ + + # Enable CMake profiling if requested + if [ -n \"${ENABLE_CMAKE_PROFILE}\" ]; then + echo \"CMake profiling enabled - will save to /sgl-kernel/cmake-profile.json\" + export CMAKE_ARGS=\"--profiling-output=/sgl-kernel/cmake-profile.json --profiling-format=google-trace\" + fi + + export NINJA_STATUS=\"[%f/%t %es] \" + # Enable Ninja build profiling if requested + if [ -n \"${ENABLE_BUILD_PROFILE}\" ]; then + echo \"Ninja build profiling enabled - will save to /sgl-kernel/build-trace.json\" + fi + PYTHONPATH=${PYTHON_ROOT_PATH}/lib/python${PYTHON_VERSION}/site-packages ${PYTHON_ROOT_PATH}/bin/python -m uv build --wheel -Cbuild-dir=build . --color=always --no-build-isolation && \ ./rename_wheels.sh + + # Show profile location if profiling was enabled + if [ -n \"${ENABLE_CMAKE_PROFILE}\" ] && [ -f /sgl-kernel/cmake-profile.json ]; then + echo \"\" + echo \"==================================\" + echo \"CMake Profile Generated\" + echo \"==================================\" + echo \"Profile saved to: cmake-profile.json\" + echo \"View in browser: chrome://tracing or edge://tracing\" + echo \"\" + fi + + # Generate Ninja build trace if profiling enabled + if [ -n \"${ENABLE_BUILD_PROFILE}\" ] && [ -f /sgl-kernel/build/.ninja_log ]; then + echo \"\" + echo \"==================================\" + echo \"Generating Ninja Build Trace\" + echo \"==================================\" + + # Download ninjatracing script from GitHub (using PR #39 branch for ninja log v7 support) + wget -q https://raw.githubusercontent.com/cradleapps/ninjatracing/084212eaf68f25c70579958a2ed67fb4ec2a9ca4/ninjatracing -O /tmp/ninjatracing || echo \"Note: Failed to download ninjatracing, skipping build trace\" + + # Convert .ninja_log to Chrome trace (JSON format) + if [ -f /tmp/ninjatracing ]; then + ${PYTHON_ROOT_PATH}/bin/python /tmp/ninjatracing /sgl-kernel/build/.ninja_log > /sgl-kernel/build-trace.json || true + + if [ -f /sgl-kernel/build-trace.json ]; then + # Compress the trace for smaller file size and faster loading + gzip -9 -k /sgl-kernel/build-trace.json 2>/dev/null || true + + echo \"Build trace saved to: build-trace.json\" + if [ -f /sgl-kernel/build-trace.json.gz ]; then + ORIGINAL_SIZE=\$(stat -f%z /sgl-kernel/build-trace.json 2>/dev/null || stat -c%s /sgl-kernel/build-trace.json) + COMPRESSED_SIZE=\$(stat -f%z /sgl-kernel/build-trace.json.gz 2>/dev/null || stat -c%s /sgl-kernel/build-trace.json.gz) + echo \"Compressed to: build-trace.json.gz (\${RATIO}% smaller)\" + fi + echo \"\" + echo \"View in browser:\" + echo \" - chrome://tracing (load JSON file)\" + echo \" - ui.perfetto.dev (recommended, supports .gz files)\" + echo \"\" + echo \"Shows:\" + echo \" - Compilation time per file\" + echo \" - Parallelism utilization\" + echo \" - Critical path (longest dependency chain)\" + echo \" - Where the 2-hour build time went\" + fi + fi + echo \"\" + fi + + # Show ccache statistics after build + echo \"\" + echo \"==================================\" + echo \"ccache Statistics\" + echo \"==================================\" + ccache -s + echo \"\" " diff --git a/sgl-kernel/kernel-runner-setup.sh b/sgl-kernel/kernel-runner-setup.sh new file mode 100755 index 000000000..d7411d1e2 --- /dev/null +++ b/sgl-kernel/kernel-runner-setup.sh @@ -0,0 +1,150 @@ +#!/bin/bash +set -e + +CUDA_VERSIONS="${1:-12-8,12-9}" + +echo "===================================" +echo "Installing Docker..." +echo "===================================" + +# Add Docker's official GPG key: +sudo apt-get update +sudo apt-get install -y ca-certificates curl +sudo install -m 0755 -d /etc/apt/keyrings +sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc +sudo chmod a+r /etc/apt/keyrings/docker.asc + +# Add the repository to Apt sources: +echo \ + "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ + $(. /etc/os-release && echo "${UBUNTU_CODENAME:-$VERSION_CODENAME}") stable" | \ + sudo tee /etc/apt/sources.list.d/docker.list > /dev/null +sudo apt-get update + +sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin + +# Add current user to docker group +sudo usermod -aG docker $USER + +echo "Docker installed successfully!" +echo "Note: You need to log out and log back in for docker group membership to take effect" +echo "" + +# Detect architecture for Docker image selection +ARCH=$(uname -m) + +if [ "$ARCH" = "x86_64" ]; then + BUILDER_NAME="pytorch/manylinux2_28-builder" +elif [ "$ARCH" = "aarch64" ]; then + BUILDER_NAME="pytorch/manylinuxaarch64-builder" +else + echo "Unsupported architecture: $ARCH" + exit 1 +fi + +# Pull Docker images for the specified CUDA versions +echo "===================================" +echo "Pulling Docker Images..." +echo "===================================" +echo "Architecture: ${ARCH}" +echo "Builder: ${BUILDER_NAME}" + +# Parse CUDA versions and pull corresponding Docker images +IFS=',' read -ra CUDA_VERSION_ARRAY <<< "$CUDA_VERSIONS" + +# Convert CUDA versions from format "12-8" to "12.8" and pull images +for CUDA_VERSION in "${CUDA_VERSION_ARRAY[@]}"; do + # Trim whitespace + CUDA_VERSION=$(echo "$CUDA_VERSION" | xargs) + + # Convert format: 12-8 -> 12.8 + CUDA_VERSION_DOTTED=$(echo "$CUDA_VERSION" | tr '-' '.') + + DOCKER_IMAGE="${BUILDER_NAME}:cuda${CUDA_VERSION_DOTTED}" + + echo "" + echo "Pulling ${DOCKER_IMAGE}..." + + # Use newgrp to ensure docker commands work (user was just added to docker group) + if sg docker -c "docker pull ${DOCKER_IMAGE}"; then + echo "✓ Successfully pulled ${DOCKER_IMAGE}" + else + echo "✗ Failed to pull ${DOCKER_IMAGE}" + echo " You may need to log out and log back in for docker group to take effect" + fi +done + +echo "" +echo "Docker images pulled successfully!" +echo "" + +# Auto-detect Ubuntu version +if command -v lsb_release &> /dev/null; then + UBUNTU_VERSION=$(lsb_release -rs | tr -d '.') +else + UBUNTU_VERSION=$(. /etc/os-release && echo $VERSION_ID | tr -d '.') +fi + +# Set CUDA architecture (ARCH already detected above for Docker images) +if [ "$ARCH" = "x86_64" ]; then + CUDA_ARCH="x86_64" +elif [ "$ARCH" = "aarch64" ]; then + CUDA_ARCH="sbsa" +else + echo "Unsupported architecture: $ARCH" + exit 1 +fi + +echo "===================================" +echo "System Information:" +echo "===================================" +echo "Ubuntu Version: ${UBUNTU_VERSION}" +echo "Architecture: ${ARCH}" +echo "CUDA Architecture: ${CUDA_ARCH}" +echo "" + +# Install CUDA keyring (only need to do this once) +echo "===================================" +echo "Installing CUDA keyring..." +echo "===================================" +KEYRING_URL="https://developer.download.nvidia.com/compute/cuda/repos/ubuntu${UBUNTU_VERSION}/${CUDA_ARCH}/cuda-keyring_1.1-1_all.deb" +wget -q $KEYRING_URL -O cuda-keyring.deb +sudo dpkg -i cuda-keyring.deb +sudo apt-get update +rm cuda-keyring.deb +echo "CUDA keyring installed successfully!" +echo "" + +# Split CUDA versions and install each one +IFS=',' read -ra CUDA_VERSION_ARRAY <<< "$CUDA_VERSIONS" + +echo "===================================" +echo "Installing CUDA Toolkits..." +echo "===================================" +echo "Versions to install: ${CUDA_VERSIONS}" +echo "" + +for CUDA_VERSION in "${CUDA_VERSION_ARRAY[@]}"; do + # Trim whitespace + CUDA_VERSION=$(echo "$CUDA_VERSION" | xargs) + + echo "-----------------------------------" + echo "Installing CUDA Toolkit ${CUDA_VERSION}..." + echo "-----------------------------------" + + if sudo apt-get install -y cuda-toolkit-${CUDA_VERSION}; then + echo "✓ CUDA Toolkit ${CUDA_VERSION} installed successfully!" + else + echo "✗ Failed to install CUDA Toolkit ${CUDA_VERSION}" + echo " This might be due to an invalid version or repository issue" + fi + echo "" +done + +echo "===================================" +echo "Installation Summary" +echo "===================================" +echo "Installed CUDA versions:" +ls -d /usr/local/cuda-* 2>/dev/null || echo "No CUDA installations found in /usr/local/" +echo "" +echo "Setup complete!"