Rename runner labels (#11228)
This commit is contained in:
2
.github/workflows/pr-test-pd-router.yml
vendored
2
.github/workflows/pr-test-pd-router.yml
vendored
@@ -28,7 +28,7 @@ permissions:
|
|||||||
jobs:
|
jobs:
|
||||||
test-disaggregation:
|
test-disaggregation:
|
||||||
if: github.event_name != 'pull_request' || (contains(github.event.pull_request.labels.*.name, 'run-ci') && contains(github.event.pull_request.labels.*.name, 'router-benchmark'))
|
if: github.event_name != 'pull_request' || (contains(github.event.pull_request.labels.*.name, 'run-ci') && contains(github.event.pull_request.labels.*.name, 'router-benchmark'))
|
||||||
runs-on: [h200]
|
runs-on: [8-gpu-h200-oracle]
|
||||||
timeout-minutes: 45
|
timeout-minutes: 45
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
|
|||||||
2
.github/workflows/pr-test-rust.yml
vendored
2
.github/workflows/pr-test-rust.yml
vendored
@@ -83,7 +83,7 @@ jobs:
|
|||||||
|
|
||||||
pytest-rust:
|
pytest-rust:
|
||||||
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
|
if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
|
||||||
runs-on: BM.A10.4
|
runs-on: 4-gpu-a10
|
||||||
timeout-minutes: 25
|
timeout-minutes: 25
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
|
|||||||
11
.github/workflows/pr-test.yml
vendored
11
.github/workflows/pr-test.yml
vendored
@@ -62,7 +62,7 @@ jobs:
|
|||||||
sgl-kernel-build-wheels:
|
sgl-kernel-build-wheels:
|
||||||
needs: [check-changes]
|
needs: [check-changes]
|
||||||
if: needs.check-changes.outputs.sgl_kernel == 'true'
|
if: needs.check-changes.outputs.sgl_kernel == 'true'
|
||||||
runs-on: sgl-kernel-build-node
|
runs-on: x64-kernel-build-node
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
@@ -323,7 +323,7 @@ jobs:
|
|||||||
needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels]
|
needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels]
|
||||||
if: always() && !failure() && !cancelled() &&
|
if: always() && !failure() && !cancelled() &&
|
||||||
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
|
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
|
||||||
runs-on: 8-gpu-runner
|
runs-on: 8-gpu-h200
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
@@ -641,7 +641,7 @@ jobs:
|
|||||||
needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels]
|
needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels]
|
||||||
if: always() && !failure() && !cancelled() &&
|
if: always() && !failure() && !cancelled() &&
|
||||||
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
|
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
|
||||||
runs-on: 8-gpu-runner
|
runs-on: 8-gpu-h200
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
@@ -668,7 +668,7 @@ jobs:
|
|||||||
needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels]
|
needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels]
|
||||||
if: always() && !failure() && !cancelled() &&
|
if: always() && !failure() && !cancelled() &&
|
||||||
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
|
((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
|
||||||
runs-on: 4-b200-runner
|
runs-on: 4-gpu-b200
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
steps:
|
steps:
|
||||||
@@ -702,7 +702,8 @@ jobs:
|
|||||||
|
|
||||||
unit-test-frontend, unit-test-backend-1-gpu,
|
unit-test-frontend, unit-test-backend-1-gpu,
|
||||||
unit-test-backend-2-gpu, unit-test-backend-4-gpu, unit-test-backend-8-gpu,
|
unit-test-backend-2-gpu, unit-test-backend-4-gpu, unit-test-backend-8-gpu,
|
||||||
performance-test-1-gpu-part-1, performance-test-1-gpu-part-2, performance-test-2-gpu,
|
performance-test-1-gpu-part-1, performance-test-1-gpu-part-2, performance-test-1-gpu-part-3,
|
||||||
|
performance-test-2-gpu,
|
||||||
accuracy-test-1-gpu, accuracy-test-2-gpu,
|
accuracy-test-1-gpu, accuracy-test-2-gpu,
|
||||||
unit-test-deepep-4-gpu, unit-test-deepep-8-gpu,
|
unit-test-deepep-4-gpu, unit-test-deepep-8-gpu,
|
||||||
unit-test-backend-4-gpu-b200,
|
unit-test-backend-4-gpu-b200,
|
||||||
|
|||||||
6
.github/workflows/release-docker-dev.yml
vendored
6
.github/workflows/release-docker-dev.yml
vendored
@@ -8,7 +8,7 @@ on:
|
|||||||
jobs:
|
jobs:
|
||||||
build-dev-x86:
|
build-dev-x86:
|
||||||
if: ${{ github.repository == 'sgl-project/sglang' }}
|
if: ${{ github.repository == 'sgl-project/sglang' }}
|
||||||
runs-on: nvidia
|
runs-on: x64-docker-build-node
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
variant:
|
variant:
|
||||||
@@ -48,12 +48,12 @@ jobs:
|
|||||||
|
|
||||||
build-dev-arm:
|
build-dev-arm:
|
||||||
if: ${{ github.repository == 'sgl-project/sglang' }}
|
if: ${{ github.repository == 'sgl-project/sglang' }}
|
||||||
runs-on: sgl-kernel-release-node-arm
|
runs-on: arm-docker-build-node
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
variant:
|
variant:
|
||||||
- version: 12.9.1
|
- version: 12.9.1
|
||||||
type: blackwell_aarch64
|
type: all_aarch64
|
||||||
tag: dev-arm64
|
tag: dev-arm64
|
||||||
steps:
|
steps:
|
||||||
- name: Delete huge unnecessary tools folder
|
- name: Delete huge unnecessary tools folder
|
||||||
|
|||||||
14
.github/workflows/release-docker.yml
vendored
14
.github/workflows/release-docker.yml
vendored
@@ -14,13 +14,9 @@ jobs:
|
|||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
variant:
|
variant:
|
||||||
- cuda_version: "12.6.1"
|
|
||||||
build_type: "all"
|
|
||||||
- cuda_version: "12.8.1"
|
|
||||||
build_type: "blackwell"
|
|
||||||
- cuda_version: "12.9.1"
|
- cuda_version: "12.9.1"
|
||||||
build_type: "blackwell"
|
build_type: "all"
|
||||||
runs-on: nvidia
|
runs-on: x64-docker-build-node
|
||||||
steps:
|
steps:
|
||||||
- name: Delete huge unnecessary tools folder
|
- name: Delete huge unnecessary tools folder
|
||||||
run: rm -rf /opt/hostedtoolcache
|
run: rm -rf /opt/hostedtoolcache
|
||||||
@@ -67,8 +63,6 @@ jobs:
|
|||||||
|
|
||||||
if [ "${{ matrix.variant.build_type }}" = "all" ]; then
|
if [ "${{ matrix.variant.build_type }}" = "all" ]; then
|
||||||
tag_suffix=""
|
tag_suffix=""
|
||||||
elif [ "${{ matrix.variant.build_type }}" = "blackwell" ]; then
|
|
||||||
tag_suffix="-b200"
|
|
||||||
else
|
else
|
||||||
echo "Unsupported build type"
|
echo "Unsupported build type"
|
||||||
exit 1
|
exit 1
|
||||||
@@ -87,8 +81,8 @@ jobs:
|
|||||||
matrix:
|
matrix:
|
||||||
variant:
|
variant:
|
||||||
- cuda_version: "12.9.1"
|
- cuda_version: "12.9.1"
|
||||||
build_type: "blackwell_aarch64"
|
build_type: "all_aarch64"
|
||||||
runs-on: sgl-kernel-release-node-arm
|
runs-on: arm-docker-build-node
|
||||||
steps:
|
steps:
|
||||||
- name: Delete huge unnecessary tools folder
|
- name: Delete huge unnecessary tools folder
|
||||||
run: rm -rf /opt/hostedtoolcache
|
run: rm -rf /opt/hostedtoolcache
|
||||||
|
|||||||
92
.github/workflows/release-whl-kernel-cu118.yml
vendored
92
.github/workflows/release-whl-kernel-cu118.yml
vendored
@@ -1,92 +0,0 @@
|
|||||||
name: Release SGLang Kernel Wheel (cu118)
|
|
||||||
|
|
||||||
on:
|
|
||||||
workflow_dispatch:
|
|
||||||
inputs:
|
|
||||||
tag_name:
|
|
||||||
type: string
|
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- main
|
|
||||||
paths:
|
|
||||||
- sgl-kernel/python/sgl_kernel/version.py
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
build-wheels:
|
|
||||||
if: github.repository == 'sgl-project/sglang'
|
|
||||||
runs-on: sgl-kernel-release-node
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
python-version: ["3.9"]
|
|
||||||
cuda-version: ["11.8"]
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: "recursive"
|
|
||||||
|
|
||||||
- name: Set up Python ${{ matrix.python-version }}
|
|
||||||
uses: actions/setup-python@v5
|
|
||||||
with:
|
|
||||||
python-version: ${{ matrix.python-version }}
|
|
||||||
|
|
||||||
- name: Build wheels for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }}
|
|
||||||
run: |
|
|
||||||
cd sgl-kernel
|
|
||||||
chmod +x ./build.sh
|
|
||||||
./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
|
|
||||||
|
|
||||||
- name: Upload artifacts
|
|
||||||
uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
|
|
||||||
path: sgl-kernel/dist/*
|
|
||||||
|
|
||||||
release:
|
|
||||||
needs: build-wheels
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Download artifacts
|
|
||||||
uses: actions/download-artifact@v4
|
|
||||||
with:
|
|
||||||
path: sgl-kernel/dist/
|
|
||||||
merge-multiple: true
|
|
||||||
pattern: wheel-*
|
|
||||||
|
|
||||||
- name: Set tag name
|
|
||||||
id: set_tag_name
|
|
||||||
run: |
|
|
||||||
if [ -z "${{ inputs.tag_name }}" ]; then
|
|
||||||
TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)"
|
|
||||||
echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT
|
|
||||||
else
|
|
||||||
echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Release
|
|
||||||
uses: softprops/action-gh-release@v2
|
|
||||||
with:
|
|
||||||
tag_name: ${{ steps.set_tag_name.outputs.tag_name }}
|
|
||||||
repository: sgl-project/whl
|
|
||||||
token: ${{ secrets.WHL_TOKEN }}
|
|
||||||
files: |
|
|
||||||
sgl-kernel/dist/*
|
|
||||||
|
|
||||||
- name: Clone wheel index
|
|
||||||
run: git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
|
|
||||||
env:
|
|
||||||
WHL_TOKEN: ${{ secrets.WHL_TOKEN }}
|
|
||||||
|
|
||||||
- name: Update wheel index
|
|
||||||
run: python3 scripts/update_kernel_whl_index.py
|
|
||||||
|
|
||||||
- name: Push wheel index
|
|
||||||
run: |
|
|
||||||
cd sgl-whl
|
|
||||||
git config --local user.name "github-actions[bot]"
|
|
||||||
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
|
||||||
git add -A
|
|
||||||
git commit -m "update whl index"
|
|
||||||
git push
|
|
||||||
121
.github/workflows/release-whl-kernel.yml
vendored
121
.github/workflows/release-whl-kernel.yml
vendored
@@ -19,7 +19,7 @@ concurrency:
|
|||||||
jobs:
|
jobs:
|
||||||
build-cu129:
|
build-cu129:
|
||||||
if: github.repository == 'sgl-project/sglang'
|
if: github.repository == 'sgl-project/sglang'
|
||||||
runs-on: sgl-kernel-release-node
|
runs-on: x64-kernel-build-node
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
python-version: ["3.10"]
|
python-version: ["3.10"]
|
||||||
@@ -46,38 +46,14 @@ jobs:
|
|||||||
pip install twine
|
pip install twine
|
||||||
python3 -m twine upload --skip-existing dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }}
|
python3 -m twine upload --skip-existing dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }}
|
||||||
|
|
||||||
build-cu124:
|
|
||||||
if: github.repository == 'sgl-project/sglang'
|
|
||||||
needs: build-cu129
|
|
||||||
runs-on: sgl-kernel-release-node
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
python-version: ["3.10"]
|
|
||||||
cuda-version: ["12.4"]
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: "recursive"
|
|
||||||
|
|
||||||
- name: Set up Python ${{ matrix.python-version }}
|
|
||||||
uses: actions/setup-python@v5
|
|
||||||
with:
|
|
||||||
python-version: ${{ matrix.python-version }}
|
|
||||||
|
|
||||||
- name: Build wheels
|
|
||||||
run: |
|
|
||||||
cd sgl-kernel
|
|
||||||
chmod +x ./build.sh
|
|
||||||
./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
|
|
||||||
|
|
||||||
- name: Upload artifacts
|
- name: Upload artifacts
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
|
name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
|
||||||
path: sgl-kernel/dist/*
|
path: sgl-kernel/dist/*
|
||||||
|
|
||||||
release-cu124:
|
release-cu129:
|
||||||
needs: build-cu124
|
needs: build-cu129
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
@@ -114,99 +90,20 @@ jobs:
|
|||||||
WHL_TOKEN: ${{ secrets.WHL_TOKEN }}
|
WHL_TOKEN: ${{ secrets.WHL_TOKEN }}
|
||||||
|
|
||||||
- name: Update wheel index
|
- name: Update wheel index
|
||||||
run: python3 scripts/update_kernel_whl_index.py --cuda 124
|
run: python3 scripts/update_kernel_whl_index.py --cuda 129
|
||||||
|
|
||||||
- name: Push wheel index
|
- name: Push wheel index
|
||||||
run: |
|
run: |
|
||||||
cd sgl-whl
|
cd sgl-whl
|
||||||
git config --local user.name "github-actions[bot]"
|
git config --local user.name "sglang-bot"
|
||||||
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
git config --local user.email "sglangbot@gmail.com"
|
||||||
git add -A
|
|
||||||
git commit -m "update whl index"
|
|
||||||
git push
|
|
||||||
|
|
||||||
build-cu128:
|
|
||||||
if: github.repository == 'sgl-project/sglang'
|
|
||||||
needs: build-cu129
|
|
||||||
runs-on: sgl-kernel-release-node
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
python-version: ["3.10"]
|
|
||||||
cuda-version: ["12.8"]
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
submodules: "recursive"
|
|
||||||
|
|
||||||
- name: Set up Python ${{ matrix.python-version }}
|
|
||||||
uses: actions/setup-python@v5
|
|
||||||
with:
|
|
||||||
python-version: ${{ matrix.python-version }}
|
|
||||||
|
|
||||||
- name: Build wheels
|
|
||||||
run: |
|
|
||||||
cd sgl-kernel
|
|
||||||
chmod +x ./build.sh
|
|
||||||
./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
|
|
||||||
|
|
||||||
- name: Upload artifacts
|
|
||||||
uses: actions/upload-artifact@v4
|
|
||||||
with:
|
|
||||||
name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
|
|
||||||
path: sgl-kernel/dist/*
|
|
||||||
|
|
||||||
release-cu128:
|
|
||||||
needs: build-cu128
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Download artifacts
|
|
||||||
uses: actions/download-artifact@v4
|
|
||||||
with:
|
|
||||||
path: sgl-kernel/dist/
|
|
||||||
merge-multiple: true
|
|
||||||
pattern: wheel-*
|
|
||||||
|
|
||||||
- name: Set tag name
|
|
||||||
id: set_tag_name
|
|
||||||
run: |
|
|
||||||
if [ -z "${{ inputs.tag_name }}" ]; then
|
|
||||||
TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)"
|
|
||||||
echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT
|
|
||||||
else
|
|
||||||
echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Release
|
|
||||||
uses: softprops/action-gh-release@v2
|
|
||||||
with:
|
|
||||||
tag_name: ${{ steps.set_tag_name.outputs.tag_name }}
|
|
||||||
repository: sgl-project/whl
|
|
||||||
token: ${{ secrets.WHL_TOKEN }}
|
|
||||||
files: |
|
|
||||||
sgl-kernel/dist/*
|
|
||||||
|
|
||||||
- name: Clone wheel index
|
|
||||||
run: git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
|
|
||||||
env:
|
|
||||||
WHL_TOKEN: ${{ secrets.WHL_TOKEN }}
|
|
||||||
|
|
||||||
- name: Update wheel index
|
|
||||||
run: python3 scripts/update_kernel_whl_index.py --cuda 128
|
|
||||||
|
|
||||||
- name: Push wheel index
|
|
||||||
run: |
|
|
||||||
cd sgl-whl
|
|
||||||
git config --local user.name "github-actions[bot]"
|
|
||||||
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
|
||||||
git add -A
|
git add -A
|
||||||
git commit -m "update whl index"
|
git commit -m "update whl index"
|
||||||
git push
|
git push
|
||||||
|
|
||||||
build-cu129-aarch64:
|
build-cu129-aarch64:
|
||||||
if: github.repository == 'sgl-project/sglang'
|
if: github.repository == 'sgl-project/sglang'
|
||||||
runs-on: sgl-kernel-release-node-arm
|
runs-on: arm-kernel-build-node
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
python-version: ["3.10"]
|
python-version: ["3.10"]
|
||||||
@@ -282,8 +179,8 @@ jobs:
|
|||||||
- name: Push wheel index
|
- name: Push wheel index
|
||||||
run: |
|
run: |
|
||||||
cd sgl-whl
|
cd sgl-whl
|
||||||
git config --local user.name "github-actions[bot]"
|
git config --local user.name "sglang-bot"
|
||||||
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
git config --local user.email "sglangbot@gmail.com"
|
||||||
git add -A
|
git add -A
|
||||||
git commit -m "update whl index"
|
git commit -m "update whl index"
|
||||||
git push
|
git push
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
You can install SGLang using one of the methods below.
|
You can install SGLang using one of the methods below.
|
||||||
|
|
||||||
This page primarily applies to common NVIDIA GPU platforms.
|
This page primarily applies to common NVIDIA GPU platforms.
|
||||||
For other or newer platforms, please refer to the dedicated pages for [NVIDIA Blackwell GPUs](../platforms/blackwell_gpu.md), [AMD GPUs](../platforms/amd_gpu.md), [Intel Xeon CPUs](../platforms/cpu_server.md), [NVIDIA Jetson](../platforms/nvidia_jetson.md), [Ascend NPUs](../platforms/ascend_npu.md).
|
For other or newer platforms, please refer to the dedicated pages for [AMD GPUs](../platforms/amd_gpu.md), [Intel Xeon CPUs](../platforms/cpu_server.md), [NVIDIA Jetson](../platforms/nvidia_jetson.md), [Ascend NPUs](../platforms/ascend_npu.md).
|
||||||
|
|
||||||
## Method 1: With pip or uv
|
## Method 1: With pip or uv
|
||||||
|
|
||||||
|
|||||||
@@ -1,9 +0,0 @@
|
|||||||
# Blackwell GPUs
|
|
||||||
|
|
||||||
We will release the pre-built wheels soon. Before that, please try to compile from source or check the blackwell docker images from [lmsysorg/sglang](https://hub.docker.com/r/lmsysorg/sglang/tags).
|
|
||||||
|
|
||||||
## B200 with x86 CPUs
|
|
||||||
TODO
|
|
||||||
|
|
||||||
## GB200/GB300 with ARM CPUs
|
|
||||||
TODO
|
|
||||||
@@ -91,9 +91,14 @@ tracing = [
|
|||||||
"opentelemetry-sdk",
|
"opentelemetry-sdk",
|
||||||
]
|
]
|
||||||
all = ["sglang[test]", "sglang[decord]"]
|
all = ["sglang[test]", "sglang[decord]"]
|
||||||
|
all_aarch64 = ["sglang[test]"]
|
||||||
|
dev = ["sglang[test]", "sglang[decord]"]
|
||||||
|
|
||||||
|
|
||||||
|
# The following will be deprecated in 2 weeks
|
||||||
blackwell = ["sglang[test]", "sglang[decord]"]
|
blackwell = ["sglang[test]", "sglang[decord]"]
|
||||||
blackwell_aarch64 = ["sglang[test]"]
|
blackwell_aarch64 = ["sglang[test]"]
|
||||||
dev = ["sglang[test]", "sglang[decord]"]
|
|
||||||
|
|
||||||
[project.urls]
|
[project.urls]
|
||||||
"Homepage" = "https://github.com/sgl-project/sglang"
|
"Homepage" = "https://github.com/sgl-project/sglang"
|
||||||
|
|||||||
@@ -3,21 +3,16 @@
|
|||||||
set -euxo pipefail
|
set -euxo pipefail
|
||||||
|
|
||||||
IS_BLACKWELL=${IS_BLACKWELL:-0}
|
IS_BLACKWELL=${IS_BLACKWELL:-0}
|
||||||
|
CU_VERSION="cu128"
|
||||||
if [ "$IS_BLACKWELL" = "1" ]; then
|
|
||||||
CU_VERSION="cu129"
|
|
||||||
else
|
|
||||||
CU_VERSION="cu126"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Clear torch compilation cache
|
|
||||||
python3 -c 'import os, shutil, tempfile, getpass; cache_dir = os.environ.get("TORCHINDUCTOR_CACHE_DIR") or os.path.join(tempfile.gettempdir(), "torchinductor_" + getpass.getuser()); shutil.rmtree(cache_dir, ignore_errors=True)'
|
|
||||||
|
|
||||||
# Kill existing processes
|
# Kill existing processes
|
||||||
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||||
bash "${SCRIPT_DIR}/../killall_sglang.sh"
|
bash "${SCRIPT_DIR}/../killall_sglang.sh"
|
||||||
echo "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-}"
|
echo "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-}"
|
||||||
|
|
||||||
|
# Clear torch compilation cache
|
||||||
|
python3 -c 'import os, shutil, tempfile, getpass; cache_dir = os.environ.get("TORCHINDUCTOR_CACHE_DIR") or os.path.join(tempfile.gettempdir(), "torchinductor_" + getpass.getuser()); shutil.rmtree(cache_dir, ignore_errors=True)'
|
||||||
|
|
||||||
# Install apt packages
|
# Install apt packages
|
||||||
apt install -y git libnuma-dev
|
apt install -y git libnuma-dev
|
||||||
|
|
||||||
@@ -29,7 +24,7 @@ if [ "$IS_BLACKWELL" = "1" ]; then
|
|||||||
PIP_INSTALL_SUFFIX="--break-system-packages"
|
PIP_INSTALL_SUFFIX="--break-system-packages"
|
||||||
|
|
||||||
# Clean up existing installations
|
# Clean up existing installations
|
||||||
$PIP_CMD uninstall -y flashinfer_python sgl-kernel sglang vllm $PIP_INSTALL_SUFFIX || true
|
$PIP_CMD uninstall -y flashinfer_python sgl-kernel sglang vllm torch $PIP_INSTALL_SUFFIX || true
|
||||||
else
|
else
|
||||||
# In normal cases, we use uv, which is much faster than pip.
|
# In normal cases, we use uv, which is much faster than pip.
|
||||||
pip install --upgrade pip
|
pip install --upgrade pip
|
||||||
@@ -40,7 +35,7 @@ else
|
|||||||
PIP_INSTALL_SUFFIX="--index-strategy unsafe-best-match"
|
PIP_INSTALL_SUFFIX="--index-strategy unsafe-best-match"
|
||||||
|
|
||||||
# Clean up existing installations
|
# Clean up existing installations
|
||||||
$PIP_CMD uninstall flashinfer_python sgl-kernel sglang vllm || true
|
$PIP_CMD uninstall flashinfer_python sgl-kernel sglang vllm torch || true
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Install the main package
|
# Install the main package
|
||||||
@@ -49,26 +44,16 @@ $PIP_CMD install -e "python[dev]" --extra-index-url https://download.pytorch.org
|
|||||||
# Install router for pd-disagg test
|
# Install router for pd-disagg test
|
||||||
SGLANG_ROUTER_BUILD_NO_RUST=1 $PIP_CMD install -e "sgl-router" $PIP_INSTALL_SUFFIX
|
SGLANG_ROUTER_BUILD_NO_RUST=1 $PIP_CMD install -e "sgl-router" $PIP_INSTALL_SUFFIX
|
||||||
|
|
||||||
|
# Install sgl-kernel
|
||||||
SGL_KERNEL_VERSION_FROM_KERNEL=$(grep -Po '(?<=^version = ")[^"]*' sgl-kernel/pyproject.toml)
|
SGL_KERNEL_VERSION_FROM_KERNEL=$(grep -Po '(?<=^version = ")[^"]*' sgl-kernel/pyproject.toml)
|
||||||
SGL_KERNEL_VERSION_FROM_SRT=$(grep -Po -m1 '(?<=sgl-kernel==)[0-9A-Za-z\.\-]+' python/pyproject.toml)
|
SGL_KERNEL_VERSION_FROM_SRT=$(grep -Po -m1 '(?<=sgl-kernel==)[0-9A-Za-z\.\-]+' python/pyproject.toml)
|
||||||
echo "SGL_KERNEL_VERSION_FROM_KERNEL=${SGL_KERNEL_VERSION_FROM_KERNEL} SGL_KERNEL_VERSION_FROM_SRT=${SGL_KERNEL_VERSION_FROM_SRT}"
|
echo "SGL_KERNEL_VERSION_FROM_KERNEL=${SGL_KERNEL_VERSION_FROM_KERNEL} SGL_KERNEL_VERSION_FROM_SRT=${SGL_KERNEL_VERSION_FROM_SRT}"
|
||||||
|
|
||||||
if [ "$IS_BLACKWELL" = "1" ]; then
|
|
||||||
SGL_KERNEL_CUDA_VERSION=cu128
|
|
||||||
else
|
|
||||||
SGL_KERNEL_CUDA_VERSION=cu124
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "${CUSTOM_BUILD_SGL_KERNEL:-}" = "true" ]; then
|
if [ "${CUSTOM_BUILD_SGL_KERNEL:-}" = "true" ]; then
|
||||||
ls -alh sgl-kernel/dist
|
ls -alh sgl-kernel/dist
|
||||||
WHEEL_FILE=$(ls sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}+${SGL_KERNEL_CUDA_VERSION}-cp310-abi3-manylinux2014_x86_64.whl 2>/dev/null || true)
|
$PIP_CMD install sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX
|
||||||
if [ -f "$WHEEL_FILE" ]; then
|
|
||||||
$PIP_CMD install sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}+${SGL_KERNEL_CUDA_VERSION}-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX
|
|
||||||
else
|
|
||||||
$PIP_CMD install sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX
|
|
||||||
fi
|
|
||||||
else
|
else
|
||||||
$PIP_CMD install https://github.com/sgl-project/whl/releases/download/v${SGL_KERNEL_VERSION_FROM_SRT}/sgl_kernel-${SGL_KERNEL_VERSION_FROM_SRT}+${SGL_KERNEL_CUDA_VERSION}-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX
|
$PIP_CMD install sgl-kernel==${SGL_KERNEL_VERSION_FROM_SRT} --force-reinstall $PIP_INSTALL_SUFFIX
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Show current packages
|
# Show current packages
|
||||||
@@ -86,14 +71,6 @@ if [ "$IS_BLACKWELL" != "1" ]; then
|
|||||||
$PIP_CMD install xformers --index-url https://download.pytorch.org/whl/${CU_VERSION} --no-deps $PIP_INSTALL_SUFFIX
|
$PIP_CMD install xformers --index-url https://download.pytorch.org/whl/${CU_VERSION} --no-deps $PIP_INSTALL_SUFFIX
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Install FlashMLA for attention backend tests
|
|
||||||
# $PIP_CMD install git+https://github.com/deepseek-ai/FlashMLA.git $PIP_INSTALL_SUFFIX
|
|
||||||
|
|
||||||
# Show current packages
|
# Show current packages
|
||||||
$PIP_CMD list
|
$PIP_CMD list
|
||||||
|
python3 -c "import torch; print(torch.version.cuda)"
|
||||||
|
|
||||||
if [ -n "${HF_TOKEN:-}" ]; then
|
|
||||||
$PIP_CMD install -U "huggingface_hub[cli]" $PIP_INSTALL_SUFFIX
|
|
||||||
hf auth login --token $HF_TOKEN
|
|
||||||
fi
|
|
||||||
|
|||||||
@@ -25,7 +25,6 @@ make build
|
|||||||
```
|
```
|
||||||
|
|
||||||
Note:
|
Note:
|
||||||
|
|
||||||
The `sgl-kernel` is rapidly evolving. If you experience a compilation failure, try using `make rebuild`.
|
The `sgl-kernel` is rapidly evolving. If you experience a compilation failure, try using `make rebuild`.
|
||||||
|
|
||||||
### Build with [ccache](https://github.com/ccache/ccache)
|
### Build with [ccache](https://github.com/ccache/ccache)
|
||||||
|
|||||||
@@ -88,7 +88,7 @@ suites = {
|
|||||||
TestFile("test_metrics.py", 32),
|
TestFile("test_metrics.py", 32),
|
||||||
TestFile("test_metrics_utils.py", 1),
|
TestFile("test_metrics_utils.py", 1),
|
||||||
TestFile("test_mla.py", 167),
|
TestFile("test_mla.py", 167),
|
||||||
TestFile("test_mla_deepseek_v3.py", 1420),
|
TestFile("test_mla_deepseek_v3.py", 500),
|
||||||
TestFile("test_mla_int8_deepseek_v3.py", 429),
|
TestFile("test_mla_int8_deepseek_v3.py", 429),
|
||||||
TestFile("test_mla_flashinfer.py", 302),
|
TestFile("test_mla_flashinfer.py", 302),
|
||||||
TestFile("test_mla_fp8.py", 93),
|
TestFile("test_mla_fp8.py", 93),
|
||||||
@@ -130,7 +130,7 @@ suites = {
|
|||||||
TestFile("lora/test_lora_tp.py", 116),
|
TestFile("lora/test_lora_tp.py", 116),
|
||||||
TestFile("rl/test_update_weights_from_distributed.py", 103),
|
TestFile("rl/test_update_weights_from_distributed.py", 103),
|
||||||
TestFile("test_data_parallelism.py", 73),
|
TestFile("test_data_parallelism.py", 73),
|
||||||
TestFile("test_dp_attention.py", 277),
|
TestFile("test_dp_attention.py", 594),
|
||||||
TestFile("test_load_weights_from_remote_instance.py", 72),
|
TestFile("test_load_weights_from_remote_instance.py", 72),
|
||||||
TestFile("test_patch_torch.py", 19),
|
TestFile("test_patch_torch.py", 19),
|
||||||
TestFile("test_release_memory_occupation.py", 257),
|
TestFile("test_release_memory_occupation.py", 257),
|
||||||
@@ -138,17 +138,16 @@ suites = {
|
|||||||
TestFile("hicache/test_hicache_storage_3fs_backend.py", 200),
|
TestFile("hicache/test_hicache_storage_3fs_backend.py", 200),
|
||||||
],
|
],
|
||||||
"per-commit-4-gpu": [
|
"per-commit-4-gpu": [
|
||||||
TestFile("test_gpt_oss_4gpu.py", 600),
|
TestFile("test_gpt_oss_4gpu.py", 300),
|
||||||
TestFile("test_local_attn.py", 250),
|
TestFile("test_local_attn.py", 411),
|
||||||
TestFile("test_pp_single_node.py", 372),
|
TestFile("test_pp_single_node.py", 481),
|
||||||
TestFile("models/test_qwen3_next_models.py", 200),
|
TestFile("models/test_qwen3_next_models.py", 291),
|
||||||
TestFile("models/test_falcon_h1_models.py", 200),
|
|
||||||
TestFile("test_multi_instance_release_memory_occupation.py", 64),
|
TestFile("test_multi_instance_release_memory_occupation.py", 64),
|
||||||
],
|
],
|
||||||
"per-commit-8-gpu": [
|
"per-commit-8-gpu": [
|
||||||
TestFile("hicache/test_hicache_storage_mooncake_backend.py", 400),
|
TestFile("hicache/test_hicache_storage_mooncake_backend.py", 400),
|
||||||
TestFile("lora/test_lora_llama4.py", 400),
|
TestFile("lora/test_lora_llama4.py", 400),
|
||||||
TestFile("test_disaggregation.py", 600),
|
TestFile("test_disaggregation.py", 499),
|
||||||
TestFile("test_disaggregation_dp_attention.py", 155),
|
TestFile("test_disaggregation_dp_attention.py", 155),
|
||||||
TestFile("test_disaggregation_different_tp.py", 600),
|
TestFile("test_disaggregation_different_tp.py", 600),
|
||||||
TestFile("test_disaggregation_pp.py", 140),
|
TestFile("test_disaggregation_pp.py", 140),
|
||||||
|
|||||||
Reference in New Issue
Block a user