Rename runner labels (#11228)

2025-10-05 18:05:41 -07:00
parent 41763ba079
commit d645ae90a3
13 changed files with 48 additions and 277 deletions
--- a/scripts/ci/ci_install_dependency.sh
+++ b/scripts/ci/ci_install_dependency.sh
@@ -3,21 +3,16 @@
 set -euxo pipefail

 IS_BLACKWELL=${IS_BLACKWELL:-0}
-
-if [ "$IS_BLACKWELL" = "1" ]; then
-    CU_VERSION="cu129"
-else
-    CU_VERSION="cu126"
-fi
-
-# Clear torch compilation cache
-python3 -c 'import os, shutil, tempfile, getpass; cache_dir = os.environ.get("TORCHINDUCTOR_CACHE_DIR") or os.path.join(tempfile.gettempdir(), "torchinductor_" + getpass.getuser()); shutil.rmtree(cache_dir, ignore_errors=True)'
+CU_VERSION="cu128"

 # Kill existing processes
 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 bash "${SCRIPT_DIR}/../killall_sglang.sh"
 echo "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-}"

+# Clear torch compilation cache
+python3 -c 'import os, shutil, tempfile, getpass; cache_dir = os.environ.get("TORCHINDUCTOR_CACHE_DIR") or os.path.join(tempfile.gettempdir(), "torchinductor_" + getpass.getuser()); shutil.rmtree(cache_dir, ignore_errors=True)'
+
 # Install apt packages
 apt install -y git libnuma-dev

@@ -29,7 +24,7 @@ if [ "$IS_BLACKWELL" = "1" ]; then
    PIP_INSTALL_SUFFIX="--break-system-packages"

    # Clean up existing installations
-    $PIP_CMD uninstall -y flashinfer_python sgl-kernel sglang vllm $PIP_INSTALL_SUFFIX || true
+    $PIP_CMD uninstall -y flashinfer_python sgl-kernel sglang vllm torch $PIP_INSTALL_SUFFIX || true
 else
    # In normal cases, we use uv, which is much faster than pip.
    pip install --upgrade pip
@@ -40,7 +35,7 @@ else
    PIP_INSTALL_SUFFIX="--index-strategy unsafe-best-match"

    # Clean up existing installations
-    $PIP_CMD uninstall flashinfer_python sgl-kernel sglang vllm || true
+    $PIP_CMD uninstall flashinfer_python sgl-kernel sglang vllm torch || true
 fi

 # Install the main package
@@ -49,26 +44,16 @@ $PIP_CMD install -e "python[dev]" --extra-index-url https://download.pytorch.org
 # Install router for pd-disagg test
 SGLANG_ROUTER_BUILD_NO_RUST=1 $PIP_CMD install -e "sgl-router" $PIP_INSTALL_SUFFIX

+# Install sgl-kernel
 SGL_KERNEL_VERSION_FROM_KERNEL=$(grep -Po '(?<=^version = ")[^"]*' sgl-kernel/pyproject.toml)
 SGL_KERNEL_VERSION_FROM_SRT=$(grep -Po -m1 '(?<=sgl-kernel==)[0-9A-Za-z\.\-]+' python/pyproject.toml)
 echo "SGL_KERNEL_VERSION_FROM_KERNEL=${SGL_KERNEL_VERSION_FROM_KERNEL} SGL_KERNEL_VERSION_FROM_SRT=${SGL_KERNEL_VERSION_FROM_SRT}"

-if [ "$IS_BLACKWELL" = "1" ]; then
-    SGL_KERNEL_CUDA_VERSION=cu128
-else
-    SGL_KERNEL_CUDA_VERSION=cu124
-fi
-
 if [ "${CUSTOM_BUILD_SGL_KERNEL:-}" = "true" ]; then
    ls -alh sgl-kernel/dist
-    WHEEL_FILE=$(ls sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}+${SGL_KERNEL_CUDA_VERSION}-cp310-abi3-manylinux2014_x86_64.whl 2>/dev/null || true)
-    if [ -f "$WHEEL_FILE" ]; then
-      $PIP_CMD install sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}+${SGL_KERNEL_CUDA_VERSION}-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX
-    else
-      $PIP_CMD install sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX
-    fi
+    $PIP_CMD install sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX
 else
-    $PIP_CMD install https://github.com/sgl-project/whl/releases/download/v${SGL_KERNEL_VERSION_FROM_SRT}/sgl_kernel-${SGL_KERNEL_VERSION_FROM_SRT}+${SGL_KERNEL_CUDA_VERSION}-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX
+    $PIP_CMD install sgl-kernel==${SGL_KERNEL_VERSION_FROM_SRT} --force-reinstall $PIP_INSTALL_SUFFIX
 fi

 # Show current packages
@@ -86,14 +71,6 @@ if [ "$IS_BLACKWELL" != "1" ]; then
    $PIP_CMD install xformers --index-url https://download.pytorch.org/whl/${CU_VERSION} --no-deps $PIP_INSTALL_SUFFIX
 fi

-# Install FlashMLA for attention backend tests
-# $PIP_CMD install git+https://github.com/deepseek-ai/FlashMLA.git $PIP_INSTALL_SUFFIX
-
 # Show current packages
 $PIP_CMD list
-
-
-if [ -n "${HF_TOKEN:-}" ]; then
-    $PIP_CMD install -U "huggingface_hub[cli]" $PIP_INSTALL_SUFFIX
-    hf auth login --token $HF_TOKEN
-fi
+python3 -c "import torch; print(torch.version.cuda)"