Rename runner labels (#11228)
This commit is contained in:
2
.github/workflows/pr-test-pd-router.yml
vendored
2
.github/workflows/pr-test-pd-router.yml
vendored
@@ -28,7 +28,7 @@ permissions:
|
||||
jobs:
|
||||
test-disaggregation:
|
||||
if: github.event_name != 'pull_request' || (contains(github.event.pull_request.labels.*.name, 'run-ci') && contains(github.event.pull_request.labels.*.name, 'router-benchmark'))
|
||||
runs-on: [h200]
|
||||
runs-on: [8-gpu-h200-oracle]
|
||||
timeout-minutes: 45
|
||||
|
||||
steps:
|
||||
|
||||
2
.github/workflows/pr-test-rust.yml
vendored
2
.github/workflows/pr-test-rust.yml
vendored
@@ -83,7 +83,7 @@ jobs:
|
||||
|
||||
pytest-rust:
|
||||
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
|
||||
runs-on: BM.A10.4
|
||||
runs-on: 4-gpu-a10
|
||||
timeout-minutes: 25
|
||||
steps:
|
||||
- name: Checkout code
|
||||
|
||||
11
.github/workflows/pr-test.yml
vendored
11
.github/workflows/pr-test.yml
vendored
@@ -62,7 +62,7 @@ jobs:
|
||||
sgl-kernel-build-wheels:
|
||||
needs: [check-changes]
|
||||
if: needs.check-changes.outputs.sgl_kernel == 'true'
|
||||
runs-on: sgl-kernel-build-node
|
||||
runs-on: x64-kernel-build-node
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
@@ -323,7 +323,7 @@ jobs:
|
||||
needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels]
|
||||
if: always() && !failure() && !cancelled() &&
|
||||
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
|
||||
runs-on: 8-gpu-runner
|
||||
runs-on: 8-gpu-h200
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
@@ -641,7 +641,7 @@ jobs:
|
||||
needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels]
|
||||
if: always() && !failure() && !cancelled() &&
|
||||
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
|
||||
runs-on: 8-gpu-runner
|
||||
runs-on: 8-gpu-h200
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
@@ -668,7 +668,7 @@ jobs:
|
||||
needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels]
|
||||
if: always() && !failure() && !cancelled() &&
|
||||
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
|
||||
runs-on: 4-b200-runner
|
||||
runs-on: 4-gpu-b200
|
||||
strategy:
|
||||
fail-fast: false
|
||||
steps:
|
||||
@@ -702,7 +702,8 @@ jobs:
|
||||
|
||||
unit-test-frontend, unit-test-backend-1-gpu,
|
||||
unit-test-backend-2-gpu, unit-test-backend-4-gpu, unit-test-backend-8-gpu,
|
||||
performance-test-1-gpu-part-1, performance-test-1-gpu-part-2, performance-test-2-gpu,
|
||||
performance-test-1-gpu-part-1, performance-test-1-gpu-part-2, performance-test-1-gpu-part-3,
|
||||
performance-test-2-gpu,
|
||||
accuracy-test-1-gpu, accuracy-test-2-gpu,
|
||||
unit-test-deepep-4-gpu, unit-test-deepep-8-gpu,
|
||||
unit-test-backend-4-gpu-b200,
|
||||
|
||||
6
.github/workflows/release-docker-dev.yml
vendored
6
.github/workflows/release-docker-dev.yml
vendored
@@ -8,7 +8,7 @@ on:
|
||||
jobs:
|
||||
build-dev-x86:
|
||||
if: ${{ github.repository == 'sgl-project/sglang' }}
|
||||
runs-on: nvidia
|
||||
runs-on: x64-docker-build-node
|
||||
strategy:
|
||||
matrix:
|
||||
variant:
|
||||
@@ -48,12 +48,12 @@ jobs:
|
||||
|
||||
build-dev-arm:
|
||||
if: ${{ github.repository == 'sgl-project/sglang' }}
|
||||
runs-on: sgl-kernel-release-node-arm
|
||||
runs-on: arm-docker-build-node
|
||||
strategy:
|
||||
matrix:
|
||||
variant:
|
||||
- version: 12.9.1
|
||||
type: blackwell_aarch64
|
||||
type: all_aarch64
|
||||
tag: dev-arm64
|
||||
steps:
|
||||
- name: Delete huge unnecessary tools folder
|
||||
|
||||
14
.github/workflows/release-docker.yml
vendored
14
.github/workflows/release-docker.yml
vendored
@@ -14,13 +14,9 @@ jobs:
|
||||
strategy:
|
||||
matrix:
|
||||
variant:
|
||||
- cuda_version: "12.6.1"
|
||||
build_type: "all"
|
||||
- cuda_version: "12.8.1"
|
||||
build_type: "blackwell"
|
||||
- cuda_version: "12.9.1"
|
||||
build_type: "blackwell"
|
||||
runs-on: nvidia
|
||||
build_type: "all"
|
||||
runs-on: x64-docker-build-node
|
||||
steps:
|
||||
- name: Delete huge unnecessary tools folder
|
||||
run: rm -rf /opt/hostedtoolcache
|
||||
@@ -67,8 +63,6 @@ jobs:
|
||||
|
||||
if [ "${{ matrix.variant.build_type }}" = "all" ]; then
|
||||
tag_suffix=""
|
||||
elif [ "${{ matrix.variant.build_type }}" = "blackwell" ]; then
|
||||
tag_suffix="-b200"
|
||||
else
|
||||
echo "Unsupported build type"
|
||||
exit 1
|
||||
@@ -87,8 +81,8 @@ jobs:
|
||||
matrix:
|
||||
variant:
|
||||
- cuda_version: "12.9.1"
|
||||
build_type: "blackwell_aarch64"
|
||||
runs-on: sgl-kernel-release-node-arm
|
||||
build_type: "all_aarch64"
|
||||
runs-on: arm-docker-build-node
|
||||
steps:
|
||||
- name: Delete huge unnecessary tools folder
|
||||
run: rm -rf /opt/hostedtoolcache
|
||||
|
||||
92
.github/workflows/release-whl-kernel-cu118.yml
vendored
92
.github/workflows/release-whl-kernel-cu118.yml
vendored
@@ -1,92 +0,0 @@
|
||||
name: Release SGLang Kernel Wheel (cu118)
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
tag_name:
|
||||
type: string
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- sgl-kernel/python/sgl_kernel/version.py
|
||||
|
||||
jobs:
|
||||
build-wheels:
|
||||
if: github.repository == 'sgl-project/sglang'
|
||||
runs-on: sgl-kernel-release-node
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.9"]
|
||||
cuda-version: ["11.8"]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: "recursive"
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Build wheels for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }}
|
||||
run: |
|
||||
cd sgl-kernel
|
||||
chmod +x ./build.sh
|
||||
./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
|
||||
path: sgl-kernel/dist/*
|
||||
|
||||
release:
|
||||
needs: build-wheels
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Download artifacts
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: sgl-kernel/dist/
|
||||
merge-multiple: true
|
||||
pattern: wheel-*
|
||||
|
||||
- name: Set tag name
|
||||
id: set_tag_name
|
||||
run: |
|
||||
if [ -z "${{ inputs.tag_name }}" ]; then
|
||||
TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)"
|
||||
echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
tag_name: ${{ steps.set_tag_name.outputs.tag_name }}
|
||||
repository: sgl-project/whl
|
||||
token: ${{ secrets.WHL_TOKEN }}
|
||||
files: |
|
||||
sgl-kernel/dist/*
|
||||
|
||||
- name: Clone wheel index
|
||||
run: git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
|
||||
env:
|
||||
WHL_TOKEN: ${{ secrets.WHL_TOKEN }}
|
||||
|
||||
- name: Update wheel index
|
||||
run: python3 scripts/update_kernel_whl_index.py
|
||||
|
||||
- name: Push wheel index
|
||||
run: |
|
||||
cd sgl-whl
|
||||
git config --local user.name "github-actions[bot]"
|
||||
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
||||
git add -A
|
||||
git commit -m "update whl index"
|
||||
git push
|
||||
121
.github/workflows/release-whl-kernel.yml
vendored
121
.github/workflows/release-whl-kernel.yml
vendored
@@ -19,7 +19,7 @@ concurrency:
|
||||
jobs:
|
||||
build-cu129:
|
||||
if: github.repository == 'sgl-project/sglang'
|
||||
runs-on: sgl-kernel-release-node
|
||||
runs-on: x64-kernel-build-node
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.10"]
|
||||
@@ -46,38 +46,14 @@ jobs:
|
||||
pip install twine
|
||||
python3 -m twine upload --skip-existing dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }}
|
||||
|
||||
build-cu124:
|
||||
if: github.repository == 'sgl-project/sglang'
|
||||
needs: build-cu129
|
||||
runs-on: sgl-kernel-release-node
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.10"]
|
||||
cuda-version: ["12.4"]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: "recursive"
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Build wheels
|
||||
run: |
|
||||
cd sgl-kernel
|
||||
chmod +x ./build.sh
|
||||
./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
|
||||
path: sgl-kernel/dist/*
|
||||
|
||||
release-cu124:
|
||||
needs: build-cu124
|
||||
release-cu129:
|
||||
needs: build-cu129
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -114,99 +90,20 @@ jobs:
|
||||
WHL_TOKEN: ${{ secrets.WHL_TOKEN }}
|
||||
|
||||
- name: Update wheel index
|
||||
run: python3 scripts/update_kernel_whl_index.py --cuda 124
|
||||
run: python3 scripts/update_kernel_whl_index.py --cuda 129
|
||||
|
||||
- name: Push wheel index
|
||||
run: |
|
||||
cd sgl-whl
|
||||
git config --local user.name "github-actions[bot]"
|
||||
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
||||
git add -A
|
||||
git commit -m "update whl index"
|
||||
git push
|
||||
|
||||
build-cu128:
|
||||
if: github.repository == 'sgl-project/sglang'
|
||||
needs: build-cu129
|
||||
runs-on: sgl-kernel-release-node
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.10"]
|
||||
cuda-version: ["12.8"]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: "recursive"
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Build wheels
|
||||
run: |
|
||||
cd sgl-kernel
|
||||
chmod +x ./build.sh
|
||||
./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
|
||||
path: sgl-kernel/dist/*
|
||||
|
||||
release-cu128:
|
||||
needs: build-cu128
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Download artifacts
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: sgl-kernel/dist/
|
||||
merge-multiple: true
|
||||
pattern: wheel-*
|
||||
|
||||
- name: Set tag name
|
||||
id: set_tag_name
|
||||
run: |
|
||||
if [ -z "${{ inputs.tag_name }}" ]; then
|
||||
TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)"
|
||||
echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
tag_name: ${{ steps.set_tag_name.outputs.tag_name }}
|
||||
repository: sgl-project/whl
|
||||
token: ${{ secrets.WHL_TOKEN }}
|
||||
files: |
|
||||
sgl-kernel/dist/*
|
||||
|
||||
- name: Clone wheel index
|
||||
run: git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
|
||||
env:
|
||||
WHL_TOKEN: ${{ secrets.WHL_TOKEN }}
|
||||
|
||||
- name: Update wheel index
|
||||
run: python3 scripts/update_kernel_whl_index.py --cuda 128
|
||||
|
||||
- name: Push wheel index
|
||||
run: |
|
||||
cd sgl-whl
|
||||
git config --local user.name "github-actions[bot]"
|
||||
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
||||
git config --local user.name "sglang-bot"
|
||||
git config --local user.email "sglangbot@gmail.com"
|
||||
git add -A
|
||||
git commit -m "update whl index"
|
||||
git push
|
||||
|
||||
build-cu129-aarch64:
|
||||
if: github.repository == 'sgl-project/sglang'
|
||||
runs-on: sgl-kernel-release-node-arm
|
||||
runs-on: arm-kernel-build-node
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.10"]
|
||||
@@ -282,8 +179,8 @@ jobs:
|
||||
- name: Push wheel index
|
||||
run: |
|
||||
cd sgl-whl
|
||||
git config --local user.name "github-actions[bot]"
|
||||
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
||||
git config --local user.name "sglang-bot"
|
||||
git config --local user.email "sglangbot@gmail.com"
|
||||
git add -A
|
||||
git commit -m "update whl index"
|
||||
git push
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
You can install SGLang using one of the methods below.
|
||||
|
||||
This page primarily applies to common NVIDIA GPU platforms.
|
||||
For other or newer platforms, please refer to the dedicated pages for [NVIDIA Blackwell GPUs](../platforms/blackwell_gpu.md), [AMD GPUs](../platforms/amd_gpu.md), [Intel Xeon CPUs](../platforms/cpu_server.md), [NVIDIA Jetson](../platforms/nvidia_jetson.md), [Ascend NPUs](../platforms/ascend_npu.md).
|
||||
For other or newer platforms, please refer to the dedicated pages for [AMD GPUs](../platforms/amd_gpu.md), [Intel Xeon CPUs](../platforms/cpu_server.md), [NVIDIA Jetson](../platforms/nvidia_jetson.md), [Ascend NPUs](../platforms/ascend_npu.md).
|
||||
|
||||
## Method 1: With pip or uv
|
||||
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
# Blackwell GPUs
|
||||
|
||||
We will release the pre-built wheels soon. Before that, please try to compile from source or check the blackwell docker images from [lmsysorg/sglang](https://hub.docker.com/r/lmsysorg/sglang/tags).
|
||||
|
||||
## B200 with x86 CPUs
|
||||
TODO
|
||||
|
||||
## GB200/GB300 with ARM CPUs
|
||||
TODO
|
||||
@@ -91,9 +91,14 @@ tracing = [
|
||||
"opentelemetry-sdk",
|
||||
]
|
||||
all = ["sglang[test]", "sglang[decord]"]
|
||||
all_aarch64 = ["sglang[test]"]
|
||||
dev = ["sglang[test]", "sglang[decord]"]
|
||||
|
||||
|
||||
# The following will be deprecated in 2 weeks
|
||||
blackwell = ["sglang[test]", "sglang[decord]"]
|
||||
blackwell_aarch64 = ["sglang[test]"]
|
||||
dev = ["sglang[test]", "sglang[decord]"]
|
||||
|
||||
|
||||
[project.urls]
|
||||
"Homepage" = "https://github.com/sgl-project/sglang"
|
||||
|
||||
@@ -3,21 +3,16 @@
|
||||
set -euxo pipefail
|
||||
|
||||
IS_BLACKWELL=${IS_BLACKWELL:-0}
|
||||
|
||||
if [ "$IS_BLACKWELL" = "1" ]; then
|
||||
CU_VERSION="cu129"
|
||||
else
|
||||
CU_VERSION="cu126"
|
||||
fi
|
||||
|
||||
# Clear torch compilation cache
|
||||
python3 -c 'import os, shutil, tempfile, getpass; cache_dir = os.environ.get("TORCHINDUCTOR_CACHE_DIR") or os.path.join(tempfile.gettempdir(), "torchinductor_" + getpass.getuser()); shutil.rmtree(cache_dir, ignore_errors=True)'
|
||||
CU_VERSION="cu128"
|
||||
|
||||
# Kill existing processes
|
||||
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||
bash "${SCRIPT_DIR}/../killall_sglang.sh"
|
||||
echo "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-}"
|
||||
|
||||
# Clear torch compilation cache
|
||||
python3 -c 'import os, shutil, tempfile, getpass; cache_dir = os.environ.get("TORCHINDUCTOR_CACHE_DIR") or os.path.join(tempfile.gettempdir(), "torchinductor_" + getpass.getuser()); shutil.rmtree(cache_dir, ignore_errors=True)'
|
||||
|
||||
# Install apt packages
|
||||
apt install -y git libnuma-dev
|
||||
|
||||
@@ -29,7 +24,7 @@ if [ "$IS_BLACKWELL" = "1" ]; then
|
||||
PIP_INSTALL_SUFFIX="--break-system-packages"
|
||||
|
||||
# Clean up existing installations
|
||||
$PIP_CMD uninstall -y flashinfer_python sgl-kernel sglang vllm $PIP_INSTALL_SUFFIX || true
|
||||
$PIP_CMD uninstall -y flashinfer_python sgl-kernel sglang vllm torch $PIP_INSTALL_SUFFIX || true
|
||||
else
|
||||
# In normal cases, we use uv, which is much faster than pip.
|
||||
pip install --upgrade pip
|
||||
@@ -40,7 +35,7 @@ else
|
||||
PIP_INSTALL_SUFFIX="--index-strategy unsafe-best-match"
|
||||
|
||||
# Clean up existing installations
|
||||
$PIP_CMD uninstall flashinfer_python sgl-kernel sglang vllm || true
|
||||
$PIP_CMD uninstall flashinfer_python sgl-kernel sglang vllm torch || true
|
||||
fi
|
||||
|
||||
# Install the main package
|
||||
@@ -49,26 +44,16 @@ $PIP_CMD install -e "python[dev]" --extra-index-url https://download.pytorch.org
|
||||
# Install router for pd-disagg test
|
||||
SGLANG_ROUTER_BUILD_NO_RUST=1 $PIP_CMD install -e "sgl-router" $PIP_INSTALL_SUFFIX
|
||||
|
||||
# Install sgl-kernel
|
||||
SGL_KERNEL_VERSION_FROM_KERNEL=$(grep -Po '(?<=^version = ")[^"]*' sgl-kernel/pyproject.toml)
|
||||
SGL_KERNEL_VERSION_FROM_SRT=$(grep -Po -m1 '(?<=sgl-kernel==)[0-9A-Za-z\.\-]+' python/pyproject.toml)
|
||||
echo "SGL_KERNEL_VERSION_FROM_KERNEL=${SGL_KERNEL_VERSION_FROM_KERNEL} SGL_KERNEL_VERSION_FROM_SRT=${SGL_KERNEL_VERSION_FROM_SRT}"
|
||||
|
||||
if [ "$IS_BLACKWELL" = "1" ]; then
|
||||
SGL_KERNEL_CUDA_VERSION=cu128
|
||||
else
|
||||
SGL_KERNEL_CUDA_VERSION=cu124
|
||||
fi
|
||||
|
||||
if [ "${CUSTOM_BUILD_SGL_KERNEL:-}" = "true" ]; then
|
||||
ls -alh sgl-kernel/dist
|
||||
WHEEL_FILE=$(ls sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}+${SGL_KERNEL_CUDA_VERSION}-cp310-abi3-manylinux2014_x86_64.whl 2>/dev/null || true)
|
||||
if [ -f "$WHEEL_FILE" ]; then
|
||||
$PIP_CMD install sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}+${SGL_KERNEL_CUDA_VERSION}-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX
|
||||
else
|
||||
$PIP_CMD install sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX
|
||||
fi
|
||||
$PIP_CMD install sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX
|
||||
else
|
||||
$PIP_CMD install https://github.com/sgl-project/whl/releases/download/v${SGL_KERNEL_VERSION_FROM_SRT}/sgl_kernel-${SGL_KERNEL_VERSION_FROM_SRT}+${SGL_KERNEL_CUDA_VERSION}-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX
|
||||
$PIP_CMD install sgl-kernel==${SGL_KERNEL_VERSION_FROM_SRT} --force-reinstall $PIP_INSTALL_SUFFIX
|
||||
fi
|
||||
|
||||
# Show current packages
|
||||
@@ -86,14 +71,6 @@ if [ "$IS_BLACKWELL" != "1" ]; then
|
||||
$PIP_CMD install xformers --index-url https://download.pytorch.org/whl/${CU_VERSION} --no-deps $PIP_INSTALL_SUFFIX
|
||||
fi
|
||||
|
||||
# Install FlashMLA for attention backend tests
|
||||
# $PIP_CMD install git+https://github.com/deepseek-ai/FlashMLA.git $PIP_INSTALL_SUFFIX
|
||||
|
||||
# Show current packages
|
||||
$PIP_CMD list
|
||||
|
||||
|
||||
if [ -n "${HF_TOKEN:-}" ]; then
|
||||
$PIP_CMD install -U "huggingface_hub[cli]" $PIP_INSTALL_SUFFIX
|
||||
hf auth login --token $HF_TOKEN
|
||||
fi
|
||||
python3 -c "import torch; print(torch.version.cuda)"
|
||||
|
||||
@@ -25,7 +25,6 @@ make build
|
||||
```
|
||||
|
||||
Note:
|
||||
|
||||
The `sgl-kernel` is rapidly evolving. If you experience a compilation failure, try using `make rebuild`.
|
||||
|
||||
### Build with [ccache](https://github.com/ccache/ccache)
|
||||
|
||||
@@ -88,7 +88,7 @@ suites = {
|
||||
TestFile("test_metrics.py", 32),
|
||||
TestFile("test_metrics_utils.py", 1),
|
||||
TestFile("test_mla.py", 167),
|
||||
TestFile("test_mla_deepseek_v3.py", 1420),
|
||||
TestFile("test_mla_deepseek_v3.py", 500),
|
||||
TestFile("test_mla_int8_deepseek_v3.py", 429),
|
||||
TestFile("test_mla_flashinfer.py", 302),
|
||||
TestFile("test_mla_fp8.py", 93),
|
||||
@@ -130,7 +130,7 @@ suites = {
|
||||
TestFile("lora/test_lora_tp.py", 116),
|
||||
TestFile("rl/test_update_weights_from_distributed.py", 103),
|
||||
TestFile("test_data_parallelism.py", 73),
|
||||
TestFile("test_dp_attention.py", 277),
|
||||
TestFile("test_dp_attention.py", 594),
|
||||
TestFile("test_load_weights_from_remote_instance.py", 72),
|
||||
TestFile("test_patch_torch.py", 19),
|
||||
TestFile("test_release_memory_occupation.py", 257),
|
||||
@@ -138,17 +138,16 @@ suites = {
|
||||
TestFile("hicache/test_hicache_storage_3fs_backend.py", 200),
|
||||
],
|
||||
"per-commit-4-gpu": [
|
||||
TestFile("test_gpt_oss_4gpu.py", 600),
|
||||
TestFile("test_local_attn.py", 250),
|
||||
TestFile("test_pp_single_node.py", 372),
|
||||
TestFile("models/test_qwen3_next_models.py", 200),
|
||||
TestFile("models/test_falcon_h1_models.py", 200),
|
||||
TestFile("test_gpt_oss_4gpu.py", 300),
|
||||
TestFile("test_local_attn.py", 411),
|
||||
TestFile("test_pp_single_node.py", 481),
|
||||
TestFile("models/test_qwen3_next_models.py", 291),
|
||||
TestFile("test_multi_instance_release_memory_occupation.py", 64),
|
||||
],
|
||||
"per-commit-8-gpu": [
|
||||
TestFile("hicache/test_hicache_storage_mooncake_backend.py", 400),
|
||||
TestFile("lora/test_lora_llama4.py", 400),
|
||||
TestFile("test_disaggregation.py", 600),
|
||||
TestFile("test_disaggregation.py", 499),
|
||||
TestFile("test_disaggregation_dp_attention.py", 155),
|
||||
TestFile("test_disaggregation_different_tp.py", 600),
|
||||
TestFile("test_disaggregation_pp.py", 140),
|
||||
|
||||
Reference in New Issue
Block a user