From 533cb5b274246bd5eac2b9e4bb333e69147d2c90 Mon Sep 17 00:00:00 2001
From: Hongbo Xu <1320612015@qq.com>
Date: Fri, 1 Aug 2025 22:59:27 +0800
Subject: [PATCH] [DOC]Update sgl-kernel README (#8665)

---
 sgl-kernel/README.md | 51 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/sgl-kernel/README.md b/sgl-kernel/README.md
index a57ab9864..c71b92335 100644
--- a/sgl-kernel/README.md
+++ b/sgl-kernel/README.md
@@ -58,6 +58,57 @@ And if you build the sgl-kernel with cmake, you need to add `CMAKE_BUILD_PARALLE
 CMAKE_BUILD_PARALLEL_LEVEL=$(nproc) python -m uv build --wheel -Cbuild-dir=build --color=always .
 ```
 
+### FlashAttention on Hopper
+⚠️ **Note**: To ensure that FlashAttention compiles correctly on Hopper GPU Architecture(sm90), it is strongly [recommended](https://github.com/Dao-AILab/flash-attention/issues/1453) to use:
+- nvcc version: 12.6
+- ptxas version: 12.8
+
+**1. Check Current Versions**
+
+Before proceeding, verify your current CUDA tool versions:
+```bash
+nvcc --version
+ptxas --version
+```
+**2. Update ptxas to 12.8 (if needed)**
+
+1. Save the following script to a file (e.g., `update_ptxas.sh`).
+```bash
+#!/usr/bin/env bash
+# Source: https://github.com/Dao-AILab/flash-attention/blob/7ff1b621112ba8b538e2fc6a316f2a6b6f22e518/hopper/setup.py#L404
+set -ex
+
+if [ -z "$1" ]; then
+    echo "Usage: $0 <CUDA_VERSION>"
+    exit 1
+fi
+
+CUDA_VERSION=$1
+
+if awk "BEGIN {exit !("$CUDA_VERSION" >= 12.6 && "$CUDA_VERSION" < 12.8)}"; then
+    NVCC_ARCHIVE_VERSION="12.8.93"
+    NVCC_ARCHIVE_NAME="cuda_nvcc-linux-x86_64-${NVCC_ARCHIVE_VERSION}-archive"
+    NVCC_ARCHIVE_TAR="${NVCC_ARCHIVE_NAME}.tar.xz"
+    NVCC_ARCHIVE_URL="https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/linux-x86_64/${NVCC_ARCHIVE_TAR}"
+
+    wget "$NVCC_ARCHIVE_URL"
+    tar -xf "$NVCC_ARCHIVE_TAR"
+
+    mkdir -p /usr/local/cuda/bin
+    cp "${NVCC_ARCHIVE_NAME}/bin/ptxas" /usr/local/cuda/bin/
+
+    # Clean up temporary files
+    rm -f "${NVCC_ARCHIVE_TAR}"
+    rm -rf "${NVCC_ARCHIVE_NAME}"
+fi
+```
+2. Run the script with your CUDA version as the argument, using `sudo`:
+```bash
+sudo bash update_ptxas.sh 12.6
+# Check the version
+ptxas --version
+```
+
 # Developer Guide
 
 ## Development Environment Setup