sglangv0.5.2 & support Qwen3-Next-80B-A3B-Instruct
This commit is contained in:
45
.github/workflows/cancel-all-pending-pr-test-runs.yml
vendored
Normal file
45
.github/workflows/cancel-all-pending-pr-test-runs.yml
vendored
Normal file
@@ -0,0 +1,45 @@
|
||||
name: Cancel All Pending PR Test Runs
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
workflows:
|
||||
description: 'Space-separated list of workflow filenames to cancel'
|
||||
required: true
|
||||
type: string
|
||||
default: 'pr-test.yml pr-test-xeon.yml'
|
||||
|
||||
permissions:
|
||||
actions: write # Needed to cancel runs
|
||||
contents: read # Needed to read repo info
|
||||
|
||||
jobs:
|
||||
cancel-pending:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Install GitHub CLI
|
||||
run: sudo apt-get install -y gh jq
|
||||
|
||||
- name: Cancel all pending/waiting runs for specified workflows
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
REPO: ${{ github.repository }}
|
||||
run: |
|
||||
# Read the space-separated string from the input into a bash array
|
||||
WORKFLOW_FILES=(${{ github.event.inputs.workflows }})
|
||||
|
||||
echo "Targeting ${#WORKFLOW_FILES[@]} workflow(s): ${{ github.event.inputs.workflows }}"
|
||||
|
||||
for workflow_file in "${WORKFLOW_FILES[@]}"; do
|
||||
echo "--- Checking workflow: $workflow_file ---"
|
||||
gh run list \
|
||||
--repo "$REPO" \
|
||||
--workflow "$workflow_file" \
|
||||
--json databaseId,status \
|
||||
--limit 1000 \
|
||||
| jq -r '.[] | select(.status=="queued" or .status=="in_progress") | .databaseId' \
|
||||
| while read run_id; do
|
||||
echo "Cancelling run ID: $run_id for workflow: $workflow_file"
|
||||
gh run cancel "$run_id" --repo "$REPO"
|
||||
done
|
||||
done
|
||||
22
.github/workflows/cancel-pr-workflow-on-merge.yml
vendored
Normal file
22
.github/workflows/cancel-pr-workflow-on-merge.yml
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
name: Cancel PR Workflows on Merge
|
||||
|
||||
on:
|
||||
pull_request_target:
|
||||
types:
|
||||
- closed
|
||||
|
||||
permissions:
|
||||
actions: write
|
||||
|
||||
jobs:
|
||||
cancel:
|
||||
if: github.event.pull_request.merged == true
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Cancel Previous Runs
|
||||
uses: styfle/cancel-workflow-action@0.12.1
|
||||
with:
|
||||
workflow_id: all
|
||||
access_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
ignore_sha: true
|
||||
pr_number: ${{ github.event.pull_request.number }}
|
||||
96
.github/workflows/close-inactive-issues.yml
vendored
Normal file
96
.github/workflows/close-inactive-issues.yml
vendored
Normal file
@@ -0,0 +1,96 @@
|
||||
name: Close Inactive Issues
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 0 * * *'
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
issues: write
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
close-inactive-issues:
|
||||
if: github.repository == 'sgl-project/sglang'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check and close inactive issues
|
||||
uses: actions/github-script@v6
|
||||
with:
|
||||
github-token: ${{secrets.GITHUB_TOKEN}}
|
||||
script: |
|
||||
const sixtyDaysAgo = new Date(Date.now() - 60 * 24 * 60 * 60 * 1000);
|
||||
|
||||
const [owner, repo] = process.env.GITHUB_REPOSITORY.split('/');
|
||||
console.log(`Owner: ${owner}, Repo: ${repo}`);
|
||||
|
||||
async function fetchIssues(page = 1) {
|
||||
console.log(`Fetching issues for ${owner}/${repo}, page ${page}`);
|
||||
return await github.rest.issues.listForRepo({
|
||||
owner,
|
||||
repo,
|
||||
state: 'open',
|
||||
sort: 'updated',
|
||||
direction: 'asc',
|
||||
per_page: 100,
|
||||
page: page
|
||||
});
|
||||
}
|
||||
|
||||
async function processIssues() {
|
||||
console.log('Starting to process issues');
|
||||
console.log(`Repository: ${owner}/${repo}`);
|
||||
|
||||
let page = 1;
|
||||
let hasMoreIssues = true;
|
||||
while (hasMoreIssues) {
|
||||
try {
|
||||
const issues = await fetchIssues(page);
|
||||
console.log(`Fetched ${issues.data.length} issues on page ${page}`);
|
||||
|
||||
if (issues.data.length === 0) {
|
||||
hasMoreIssues = false;
|
||||
break;
|
||||
}
|
||||
|
||||
for (const issue of issues.data) {
|
||||
// Skip if the issue has 'good first issue' label
|
||||
if (issue.labels.some(label => label.name === 'good first issue')) {
|
||||
console.log(`Skipping issue #${issue.number} as it's marked as 'good first issue'`);
|
||||
continue;
|
||||
}
|
||||
if (new Date(issue.updated_at) < sixtyDaysAgo) {
|
||||
try {
|
||||
await github.rest.issues.update({
|
||||
owner,
|
||||
repo,
|
||||
issue_number: issue.number,
|
||||
state: 'closed',
|
||||
labels: [...issue.labels.map(l => l.name), 'inactive']
|
||||
});
|
||||
await github.rest.issues.createComment({
|
||||
owner,
|
||||
repo,
|
||||
issue_number: issue.number,
|
||||
body: 'This issue has been automatically closed due to inactivity. Please feel free to reopen it if needed.'
|
||||
});
|
||||
console.log(`Closed issue #${issue.number} due to inactivity.`);
|
||||
} catch (error) {
|
||||
console.error(`Failed to close issue #${issue.number}: ${error.message}`);
|
||||
}
|
||||
} else {
|
||||
console.log(`Issue #${issue.number} is still active. Stopping processing.`);
|
||||
hasMoreIssues = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
page += 1;
|
||||
} catch (error) {
|
||||
console.error(`Error fetching issues on page ${page}: ${error.message}`);
|
||||
hasMoreIssues = false;
|
||||
}
|
||||
}
|
||||
console.log('Finished processing issues');
|
||||
}
|
||||
|
||||
await processIssues();
|
||||
60
.github/workflows/execute-notebook.yml
vendored
Normal file
60
.github/workflows/execute-notebook.yml
vendored
Normal file
@@ -0,0 +1,60 @@
|
||||
name: Execute Notebooks
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
paths:
|
||||
- "python/sglang/**"
|
||||
- "docs/**"
|
||||
workflow_dispatch:
|
||||
|
||||
|
||||
concurrency:
|
||||
group: execute-notebook-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
|
||||
jobs:
|
||||
run-all-notebooks:
|
||||
runs-on: 1-gpu-runner
|
||||
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
bash scripts/ci/ci_install_dependency.sh
|
||||
pip install -r docs/requirements.txt
|
||||
apt-get update && apt-get install -y pandoc parallel retry
|
||||
ln -sf "$(which python3)" /usr/bin/python
|
||||
|
||||
- name: Setup Jupyter Kernel
|
||||
run: |
|
||||
python -m ipykernel install --user --name python3 --display-name "Python 3"
|
||||
|
||||
- name: Execute notebooks
|
||||
timeout-minutes: 40
|
||||
run: |
|
||||
cd docs
|
||||
make clean
|
||||
make compile
|
||||
|
||||
|
||||
notebook-finish:
|
||||
needs: [
|
||||
run-all-notebooks
|
||||
]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check all dependent job statuses
|
||||
run: |
|
||||
results=(${{ join(needs.*.result, ' ') }})
|
||||
for result in "${results[@]}"; do
|
||||
if [ "$result" = "failure" ] || [ "$result" = "cancelled" ]; then
|
||||
echo "Job failed with result: $result"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
echo "All jobs completed successfully"
|
||||
exit 0
|
||||
30
.github/workflows/experiment-runner.yml
vendored
Normal file
30
.github/workflows/experiment-runner.yml
vendored
Normal file
@@ -0,0 +1,30 @@
|
||||
name: Experiment Runner
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
script:
|
||||
description: "Experiment Runner Script"
|
||||
default: "configs/sharegpt_config.yaml"
|
||||
|
||||
concurrency:
|
||||
group: experiment-runner-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
experiment-runner-1-gpu:
|
||||
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
|
||||
runs-on: 1-gpu-runner
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
bash scripts/ci/ci_install_dependency.sh
|
||||
|
||||
- name: Test experiment runner
|
||||
timeout-minutes: 120
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 experiment_runner.py --config ${{ inputs.script }}
|
||||
22
.github/workflows/lint.yml
vendored
Normal file
22
.github/workflows/lint.yml
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
name: Lint
|
||||
|
||||
on: [pull_request]
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.10"
|
||||
|
||||
- name: Install pre-commit hook
|
||||
run: |
|
||||
python -m pip install pre-commit
|
||||
pre-commit install
|
||||
|
||||
- name: Linting
|
||||
run: pre-commit run --all-files --show-diff-on-failure
|
||||
41
.github/workflows/nightly-test-amd.yml
vendored
Normal file
41
.github/workflows/nightly-test-amd.yml
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
name: Nightly Test (AMD)
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 0 * * *'
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- "python/sglang/version.py"
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: nightly-test-amd-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
nightly-test:
|
||||
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
|
||||
strategy:
|
||||
matrix:
|
||||
runner: [linux-mi300-gpu-2, linux-mi325-gpu-2-nightly]
|
||||
runs-on: ${{matrix.runner}}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup docker
|
||||
run: |
|
||||
touch github_summary.md
|
||||
bash scripts/ci/amd_ci_start_container.sh
|
||||
env:
|
||||
GITHUB_WORKSPACE: ${{ github.workspace }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash scripts/ci/amd_ci_install_dependency.sh
|
||||
|
||||
- name: Nightly Test
|
||||
run: |
|
||||
bash scripts/ci/amd_ci_exec.sh -e GITHUB_STEP_SUMMARY="/sglang-checkout/github_summary.md" python3 run_suite.py --suite nightly-amd --timeout-per-file 7200
|
||||
echo "$(<github_summary.md )" >> $GITHUB_STEP_SUMMARY
|
||||
33
.github/workflows/nightly-test.yml
vendored
Normal file
33
.github/workflows/nightly-test.yml
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
name: Nightly Test
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: '0 0 * * *'
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- "python/sglang/version.py"
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: nightly-test-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
nightly-test:
|
||||
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
|
||||
runs-on: 2-gpu-runner
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
bash scripts/ci/ci_install_dependency.sh
|
||||
|
||||
- name: Run test
|
||||
timeout-minutes: 120
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 run_suite.py --suite nightly --timeout-per-file 3600
|
||||
28
.github/workflows/open-pr-copy-from-oss.yml
vendored
Normal file
28
.github/workflows/open-pr-copy-from-oss.yml
vendored
Normal file
@@ -0,0 +1,28 @@
|
||||
name: Open A PR to Copy Code From OSS
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
# schedule:
|
||||
# - cron: '0 10 * * *'
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
jobs:
|
||||
copy:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: 'main'
|
||||
|
||||
- name: Install GitHub CLI (if not present)
|
||||
run: |
|
||||
bash scripts/code_sync/install_github_cli.sh
|
||||
|
||||
- name: Copy from OSS code
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.PAT_FOR_CODE_SYNC_FROM_LIANMIN }}
|
||||
run: |
|
||||
python3 scripts/code_sync/copy_from_oss.py
|
||||
31
.github/workflows/open-pr-copy-to-oss.yml
vendored
Normal file
31
.github/workflows/open-pr-copy-to-oss.yml
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
name: Open A PR to Copy Diff To OSS
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
commit_sha:
|
||||
description: 'The commit SHA to copy. Defaults to LAST to copy the latest commit.'
|
||||
required: false
|
||||
default: 'LAST'
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
jobs:
|
||||
copy:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Install GitHub CLI (if not present)
|
||||
run: |
|
||||
bash scripts/code_sync/install_github_cli.sh
|
||||
|
||||
- name: Copy to OSS code
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.PAT_FOR_CODE_SYNC_FROM_LIANMIN }}
|
||||
run: |
|
||||
python3 scripts/code_sync/copy_to_oss.py --commit ${{ github.event.inputs.commit_sha }}
|
||||
306
.github/workflows/pr-benchmark-rust.yml
vendored
Normal file
306
.github/workflows/pr-benchmark-rust.yml
vendored
Normal file
@@ -0,0 +1,306 @@
|
||||
name: PR Benchmark (Rust Router)
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
paths:
|
||||
- "sgl-router/**"
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
paths:
|
||||
- "sgl-router/**"
|
||||
types: [opened, synchronize, reopened, labeled]
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: pr-benchmark-rust-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
issues: write
|
||||
jobs:
|
||||
# Quick check job that always runs on PRs
|
||||
benchmark-compile-check:
|
||||
name: Benchmark Compilation Check
|
||||
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
bash scripts/ci/ci_install_rust.sh
|
||||
|
||||
- name: Setup sccache
|
||||
uses: mozilla-actions/sccache-action@v0.0.3
|
||||
continue-on-error: true
|
||||
|
||||
- name: Rust cache
|
||||
uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
workspaces: sgl-router
|
||||
# Share cache across all benchmark jobs
|
||||
shared-key: "rust-cache"
|
||||
# Save cache even on failure
|
||||
save-if: true
|
||||
|
||||
- name: Check benchmarks compile
|
||||
run: |
|
||||
source "$HOME/.cargo/env"
|
||||
cd sgl-router/
|
||||
# Try to use sccache, but disable if it fails
|
||||
if command -v sccache &> /dev/null; then
|
||||
echo "Testing sccache availability..."
|
||||
# Try to start sccache and check if it works
|
||||
export RUSTC_WRAPPER=sccache
|
||||
export SCCACHE_GHA_ENABLED="true"
|
||||
if sccache --start-server 2>/dev/null && sccache --show-stats 2>/dev/null; then
|
||||
echo "sccache is working, using it for compilation"
|
||||
else
|
||||
echo "sccache failed to start, falling back to regular cargo"
|
||||
unset RUSTC_WRAPPER
|
||||
unset SCCACHE_GHA_ENABLED
|
||||
fi
|
||||
else
|
||||
echo "sccache not available, using regular cargo"
|
||||
fi
|
||||
cargo check --benches
|
||||
|
||||
# Full benchmark jobs that only run with label or on main branch
|
||||
benchmark-request-processing:
|
||||
name: Request Processing Benchmark
|
||||
if: |
|
||||
github.repository == 'sgl-project/sglang' &&
|
||||
(github.event_name == 'push' ||
|
||||
github.event_name == 'workflow_dispatch' ||
|
||||
contains(github.event.pull_request.labels.*.name, 'benchmark'))
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
# Fetch enough history for baseline comparison
|
||||
fetch-depth: 100
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
bash scripts/ci/ci_install_rust.sh
|
||||
|
||||
- name: Setup sccache
|
||||
uses: mozilla-actions/sccache-action@v0.0.3
|
||||
continue-on-error: true
|
||||
|
||||
- name: Rust cache
|
||||
uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
workspaces: sgl-router
|
||||
# Share cache across all benchmark jobs
|
||||
shared-key: "rust-cache"
|
||||
# Save cache even on failure
|
||||
save-if: true
|
||||
|
||||
- name: Run request processing benchmark
|
||||
timeout-minutes: 30
|
||||
run: |
|
||||
source "$HOME/.cargo/env"
|
||||
cd sgl-router/
|
||||
# Try to use sccache, but disable if it fails
|
||||
if command -v sccache &> /dev/null; then
|
||||
echo "Testing sccache availability..."
|
||||
# Try to start sccache and check if it works
|
||||
export RUSTC_WRAPPER=sccache
|
||||
export SCCACHE_GHA_ENABLED="true"
|
||||
if sccache --start-server 2>/dev/null && sccache --show-stats 2>/dev/null; then
|
||||
echo "sccache is working, using it for compilation"
|
||||
else
|
||||
echo "sccache failed to start, falling back to regular cargo"
|
||||
unset RUSTC_WRAPPER
|
||||
unset SCCACHE_GHA_ENABLED
|
||||
fi
|
||||
else
|
||||
echo "sccache not available, using regular cargo"
|
||||
fi
|
||||
# Run only the summary benchmark for quick validation in PRs
|
||||
cargo bench --bench request_processing -- benchmark_summary --exact
|
||||
|
||||
- name: Upload benchmark results
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: request-processing-results-${{ github.sha }}
|
||||
path: |
|
||||
sgl-router/target/criterion/benchmark_summary/
|
||||
retention-days: 30
|
||||
|
||||
benchmark-tokenizer:
|
||||
name: Tokenizer Benchmark
|
||||
if: |
|
||||
github.repository == 'sgl-project/sglang' &&
|
||||
(github.event_name == 'push' ||
|
||||
github.event_name == 'workflow_dispatch' ||
|
||||
contains(github.event.pull_request.labels.*.name, 'benchmark'))
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 100
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
bash scripts/ci/ci_install_rust.sh
|
||||
|
||||
- name: Setup sccache
|
||||
uses: mozilla-actions/sccache-action@v0.0.3
|
||||
continue-on-error: true
|
||||
|
||||
- name: Rust cache
|
||||
uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
workspaces: sgl-router
|
||||
# Share cache across all benchmark jobs
|
||||
shared-key: "rust-cache"
|
||||
# Save cache even on failure
|
||||
save-if: true
|
||||
|
||||
- name: Run tokenizer benchmark
|
||||
timeout-minutes: 30
|
||||
run: |
|
||||
source "$HOME/.cargo/env"
|
||||
cd sgl-router/
|
||||
# Try to use sccache, but disable if it fails
|
||||
if command -v sccache &> /dev/null; then
|
||||
echo "Testing sccache availability..."
|
||||
# Try to start sccache and check if it works
|
||||
export RUSTC_WRAPPER=sccache
|
||||
export SCCACHE_GHA_ENABLED="true"
|
||||
if sccache --start-server 2>/dev/null && sccache --show-stats 2>/dev/null; then
|
||||
echo "sccache is working, using it for compilation"
|
||||
else
|
||||
echo "sccache failed to start, falling back to regular cargo"
|
||||
unset RUSTC_WRAPPER
|
||||
unset SCCACHE_GHA_ENABLED
|
||||
fi
|
||||
else
|
||||
echo "sccache not available, using regular cargo"
|
||||
fi
|
||||
cargo bench --bench tokenizer_benchmark
|
||||
|
||||
- name: Upload benchmark results
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: tokenizer-results-${{ github.sha }}
|
||||
path: |
|
||||
sgl-router/target/criterion/tokenizer*/
|
||||
retention-days: 30
|
||||
|
||||
benchmark-tool-parser:
|
||||
name: Tool Parser Benchmark
|
||||
if: |
|
||||
github.repository == 'sgl-project/sglang' &&
|
||||
(github.event_name == 'push' ||
|
||||
github.event_name == 'workflow_dispatch' ||
|
||||
contains(github.event.pull_request.labels.*.name, 'benchmark'))
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 100
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
bash scripts/ci/ci_install_rust.sh
|
||||
|
||||
- name: Setup sccache
|
||||
uses: mozilla-actions/sccache-action@v0.0.3
|
||||
continue-on-error: true
|
||||
|
||||
- name: Rust cache
|
||||
uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
workspaces: sgl-router
|
||||
# Share cache across all benchmark jobs
|
||||
shared-key: "rust-cache"
|
||||
# Save cache even on failure
|
||||
save-if: true
|
||||
|
||||
- name: Run tool parser benchmark
|
||||
timeout-minutes: 30
|
||||
run: |
|
||||
source "$HOME/.cargo/env"
|
||||
cd sgl-router/
|
||||
# Try to use sccache, but disable if it fails
|
||||
if command -v sccache &> /dev/null; then
|
||||
echo "Testing sccache availability..."
|
||||
# Try to start sccache and check if it works
|
||||
export RUSTC_WRAPPER=sccache
|
||||
export SCCACHE_GHA_ENABLED="true"
|
||||
if sccache --start-server 2>/dev/null && sccache --show-stats 2>/dev/null; then
|
||||
echo "sccache is working, using it for compilation"
|
||||
else
|
||||
echo "sccache failed to start, falling back to regular cargo"
|
||||
unset RUSTC_WRAPPER
|
||||
unset SCCACHE_GHA_ENABLED
|
||||
fi
|
||||
else
|
||||
echo "sccache not available, using regular cargo"
|
||||
fi
|
||||
cargo bench --bench tool_parser_benchmark
|
||||
|
||||
- name: Upload benchmark results
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: tool-parser-results-${{ github.sha }}
|
||||
path: |
|
||||
sgl-router/target/criterion/tool_parser*/
|
||||
retention-days: 30
|
||||
|
||||
benchmark-summary:
|
||||
name: Benchmark Summary
|
||||
needs: [benchmark-request-processing, benchmark-tokenizer, benchmark-tool-parser]
|
||||
if: always() && (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request')
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Download all benchmark results
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
pattern: '*-results-${{ github.sha }}'
|
||||
path: benchmark-results
|
||||
|
||||
- name: Generate summary
|
||||
run: |
|
||||
echo "## Benchmark Results Summary" > summary.md
|
||||
echo "" >> summary.md
|
||||
echo "### Request Processing" >> summary.md
|
||||
if [ -d "benchmark-results/request-processing-results-${{ github.sha }}" ]; then
|
||||
echo "✅ Completed" >> summary.md
|
||||
else
|
||||
echo "❌ Failed or skipped" >> summary.md
|
||||
fi
|
||||
echo "" >> summary.md
|
||||
echo "### Tokenizer" >> summary.md
|
||||
if [ -d "benchmark-results/tokenizer-results-${{ github.sha }}" ]; then
|
||||
echo "✅ Completed" >> summary.md
|
||||
else
|
||||
echo "❌ Failed or skipped" >> summary.md
|
||||
fi
|
||||
echo "" >> summary.md
|
||||
echo "### Tool Parser" >> summary.md
|
||||
if [ -d "benchmark-results/tool-parser-results-${{ github.sha }}" ]; then
|
||||
echo "✅ Completed" >> summary.md
|
||||
else
|
||||
echo "❌ Failed or skipped" >> summary.md
|
||||
fi
|
||||
cat summary.md
|
||||
|
||||
- name: Upload summary
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: benchmark-summary-${{ github.sha }}
|
||||
path: summary.md
|
||||
retention-days: 30
|
||||
377
.github/workflows/pr-test-amd.yml
vendored
Normal file
377
.github/workflows/pr-test-amd.yml
vendored
Normal file
@@ -0,0 +1,377 @@
|
||||
name: PR Test (AMD)
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
paths:
|
||||
- "python/**"
|
||||
- "scripts/ci/**"
|
||||
- "test/**"
|
||||
- "sgl-kernel/**"
|
||||
- ".github/workflows/pr-test-amd.yml"
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
paths:
|
||||
- "python/**"
|
||||
- "scripts/ci/**"
|
||||
- "test/**"
|
||||
- "sgl-kernel/**"
|
||||
- ".github/workflows/pr-test-amd.yml"
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: pr-test-amd-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
accuracy-test-1-gpu-amd:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
|
||||
runs-on: ${{matrix.runner}}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Start CI container
|
||||
run: bash scripts/ci/amd_ci_start_container.sh
|
||||
env:
|
||||
GITHUB_WORKSPACE: ${{ github.workspace }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash scripts/ci/amd_ci_install_dependency.sh
|
||||
|
||||
- name: Evaluate Accuracy
|
||||
timeout-minutes: 30
|
||||
run: |
|
||||
bash scripts/ci/amd_ci_exec.sh -e SGLANG_USE_AITER=0 python3 test_eval_accuracy_large.py
|
||||
bash scripts/ci/amd_ci_exec.sh python3 test_eval_fp8_accuracy.py
|
||||
bash scripts/ci/amd_ci_exec.sh python3 models/test_qwen_models.py
|
||||
|
||||
accuracy-test-2-gpu-amd:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
runner: [linux-mi300-gpu-2, linux-mi325-gpu-2, linux-mi35x-gpu-2]
|
||||
runs-on: ${{matrix.runner}}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Start CI container
|
||||
run: bash scripts/ci/amd_ci_start_container.sh
|
||||
env:
|
||||
GITHUB_WORKSPACE: ${{ github.workspace }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash scripts/ci/amd_ci_install_dependency.sh
|
||||
|
||||
- name: Evaluate accuracy (TP=2)
|
||||
timeout-minutes: 60
|
||||
run: |
|
||||
bash scripts/ci/amd_ci_exec.sh -e SGLANG_USE_AITER=0 python3 test_moe_eval_accuracy_large.py
|
||||
|
||||
mla-test-1-gpu-amd:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
|
||||
runs-on: ${{matrix.runner}}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Start CI container
|
||||
run: bash scripts/ci/amd_ci_start_container.sh
|
||||
env:
|
||||
GITHUB_WORKSPACE: ${{ github.workspace }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash scripts/ci/amd_ci_install_dependency.sh
|
||||
|
||||
- name: MLA TEST
|
||||
timeout-minutes: 30
|
||||
run: |
|
||||
bash scripts/ci/amd_ci_exec.sh python3 test_mla.py
|
||||
|
||||
performance-test-1-gpu-part-1-amd:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
|
||||
runs-on: ${{matrix.runner}}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Start CI container
|
||||
run: bash scripts/ci/amd_ci_start_container.sh
|
||||
env:
|
||||
GITHUB_WORKSPACE: ${{ github.workspace }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash scripts/ci/amd_ci_install_dependency.sh
|
||||
|
||||
- name: Benchmark single latency
|
||||
timeout-minutes: 20
|
||||
run: |
|
||||
bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_bs1_small
|
||||
bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_bs1_default
|
||||
|
||||
- name: Benchmark online latency
|
||||
timeout-minutes: 15
|
||||
run: |
|
||||
bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_online_latency_default
|
||||
|
||||
- name: Benchmark offline throughput
|
||||
timeout-minutes: 15
|
||||
run: |
|
||||
bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_default
|
||||
|
||||
- name: Benchmark offline throughput (Non-streaming, small batch size)
|
||||
timeout-minutes: 15
|
||||
run: |
|
||||
bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_non_stream_small_batch_size
|
||||
|
||||
performance-test-1-gpu-part-2-amd:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
|
||||
runs-on: ${{matrix.runner}}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Start CI container
|
||||
run: bash scripts/ci/amd_ci_start_container.sh
|
||||
env:
|
||||
GITHUB_WORKSPACE: ${{ github.workspace }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash scripts/ci/amd_ci_install_dependency.sh
|
||||
|
||||
- name: Benchmark offline throughput (w/o RadixAttention)
|
||||
timeout-minutes: 15
|
||||
run: |
|
||||
bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_without_radix_cache
|
||||
|
||||
- name: Benchmark offline throughput (w/ Triton)
|
||||
timeout-minutes: 15
|
||||
run: |
|
||||
bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_with_triton_attention_backend
|
||||
|
||||
- name: Benchmark offline throughput (w/ FP8)
|
||||
timeout-minutes: 15
|
||||
run: |
|
||||
bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_default_fp8
|
||||
|
||||
bench-test-2-gpu-amd:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
runner: [linux-mi300-gpu-2, linux-mi325-gpu-2]
|
||||
runs-on: ${{matrix.runner}}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Start CI container
|
||||
run: bash scripts/ci/amd_ci_start_container.sh
|
||||
env:
|
||||
GITHUB_WORKSPACE: ${{ github.workspace }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash scripts/ci/amd_ci_install_dependency.sh
|
||||
|
||||
- name: Benchmark dummy grok (TP=2)
|
||||
timeout-minutes: 30
|
||||
run: |
|
||||
bash scripts/ci/amd_ci_exec.sh python3 models/test_dummy_grok_models.py
|
||||
|
||||
- name: Benchmark single latency (TP=2)
|
||||
timeout-minutes: 25
|
||||
run: |
|
||||
bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
|
||||
|
||||
- name: Benchmark single latency + torch.compile (TP=2)
|
||||
timeout-minutes: 25
|
||||
run: |
|
||||
bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_torch_compile_tp2_bs1
|
||||
|
||||
- name: Benchmark offline throughput (TP=2)
|
||||
timeout-minutes: 25
|
||||
run: |
|
||||
bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_default
|
||||
|
||||
- name: Benchmark offline throughput (w/o RadixAttention) (TP=2)
|
||||
timeout-minutes: 25
|
||||
run: |
|
||||
bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_without_radix_cache
|
||||
|
||||
unit-test-backend-1-gpu-amd:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
|
||||
part: [0, 1, 2, 3, 4, 5, 6, 7]
|
||||
runs-on: ${{matrix.runner}}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Start CI container
|
||||
run: bash scripts/ci/amd_ci_start_container.sh
|
||||
env:
|
||||
GITHUB_WORKSPACE: ${{ github.workspace }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash scripts/ci/amd_ci_install_dependency.sh
|
||||
|
||||
- name: Run test
|
||||
timeout-minutes: 50
|
||||
run: |
|
||||
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 8
|
||||
|
||||
unit-test-backend-1-gpu-amd-mi35x:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
runner: [linux-mi35x-gpu-1]
|
||||
runs-on: ${{matrix.runner}}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Start CI container
|
||||
run: bash scripts/ci/amd_ci_start_container.sh
|
||||
env:
|
||||
GITHUB_WORKSPACE: ${{ github.workspace }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash scripts/ci/amd_ci_install_dependency.sh
|
||||
|
||||
- name: Run test
|
||||
timeout-minutes: 50
|
||||
run: |
|
||||
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd-mi35x
|
||||
|
||||
unit-test-backend-2-gpu-amd:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
runner: [linux-mi300-gpu-2, linux-mi325-gpu-2]
|
||||
runs-on: ${{matrix.runner}}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Start CI container
|
||||
run: bash scripts/ci/amd_ci_start_container.sh
|
||||
env:
|
||||
GITHUB_WORKSPACE: ${{ github.workspace }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash scripts/ci/amd_ci_install_dependency.sh
|
||||
|
||||
- name: Run test
|
||||
timeout-minutes: 40
|
||||
run: |
|
||||
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-2-gpu-amd
|
||||
|
||||
unit-test-backend-8-gpu-amd:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
runner: [linux-mi300-gpu-8]
|
||||
runs-on: ${{matrix.runner}}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Start CI container
|
||||
run: bash scripts/ci/amd_ci_start_container.sh
|
||||
env:
|
||||
GITHUB_WORKSPACE: ${{ github.workspace }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash scripts/ci/amd_ci_install_dependency.sh
|
||||
|
||||
- name: Run test
|
||||
timeout-minutes: 60
|
||||
run: |
|
||||
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-8-gpu-amd --timeout-per-file 3600
|
||||
|
||||
unit-test-sgl-kernel-amd:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
|
||||
runs-on: ${{matrix.runner}}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Start CI container
|
||||
run: bash scripts/ci/amd_ci_start_container.sh
|
||||
env:
|
||||
GITHUB_WORKSPACE: ${{ github.workspace }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
bash scripts/ci/amd_ci_install_dependency.sh
|
||||
|
||||
- name: Run test
|
||||
timeout-minutes: 14
|
||||
run: |
|
||||
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_align.py
|
||||
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_topk_softmax.py
|
||||
docker exec -w /sglang-checkout/sgl-kernel/tests/speculative ci_sglang python3 -m pytest test_eagle_utils.py
|
||||
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_apply_token_bitmask_inplace.py
|
||||
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_activation.py
|
||||
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_kvcacheio.py
|
||||
|
||||
pr-test-amd-finish:
|
||||
if: always()
|
||||
needs: [
|
||||
accuracy-test-1-gpu-amd, mla-test-1-gpu-amd, bench-test-2-gpu-amd,
|
||||
accuracy-test-2-gpu-amd, performance-test-1-gpu-part-1-amd, performance-test-1-gpu-part-2-amd,
|
||||
unit-test-backend-1-gpu-amd, unit-test-backend-1-gpu-amd-mi35x, unit-test-backend-2-gpu-amd,
|
||||
unit-test-backend-8-gpu-amd, unit-test-sgl-kernel-amd
|
||||
]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check all dependent job statuses
|
||||
run: |
|
||||
results=(${{ join(needs.*.result, ' ') }})
|
||||
for result in "${results[@]}"; do
|
||||
if [ "$result" = "failure" ] || [ "$result" = "cancelled" ]; then
|
||||
echo "Job failed with result: $result"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
echo "All jobs completed successfully"
|
||||
exit 0
|
||||
81
.github/workflows/pr-test-h20.yml
vendored
Normal file
81
.github/workflows/pr-test-h20.yml
vendored
Normal file
@@ -0,0 +1,81 @@
|
||||
name: PR Test (H20)
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
version:
|
||||
required: true
|
||||
type: choice
|
||||
default: 'release'
|
||||
options:
|
||||
- 'release'
|
||||
- 'nightly'
|
||||
|
||||
concurrency:
|
||||
group: pr-test-h20-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
check-changes:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
src: ${{ steps.filter.outputs.src }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Detect file changes
|
||||
id: filter
|
||||
uses: dorny/paths-filter@v3
|
||||
with:
|
||||
filters: |
|
||||
src:
|
||||
- "python/sglang/srt/models/deepseek*"
|
||||
- "python/sglang/srt/layers/moe/**"
|
||||
- ".github/workflows/pr-test-h20.yml"
|
||||
- "python/pyproject.toml"
|
||||
|
||||
per-commit-8-gpu-h20:
|
||||
needs: [check-changes]
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false &&
|
||||
needs.check-changes.outputs.src == 'true'
|
||||
runs-on: 8-gpu-h20
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
bash scripts/ci/ci_install_dependency.sh
|
||||
|
||||
- name: Run test
|
||||
timeout-minutes: 20
|
||||
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 run_suite.py --suite per-commit-8-gpu-h20
|
||||
|
||||
pr-test-h20-finish:
|
||||
needs: [
|
||||
check-changes,
|
||||
per-commit-8-gpu-h20,
|
||||
]
|
||||
if: needs.check-changes.outputs.src == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check all dependent job statuses
|
||||
run: |
|
||||
results=(${{ join(needs.*.result, ' ') }})
|
||||
for result in "${results[@]}"; do
|
||||
if [ "$result" = "failure" ] || [ "$result" = "cancelled" ]; then
|
||||
echo "Job failed with result: $result"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
echo "All jobs completed successfully"
|
||||
exit 0
|
||||
184
.github/workflows/pr-test-npu.yml
vendored
Normal file
184
.github/workflows/pr-test-npu.yml
vendored
Normal file
@@ -0,0 +1,184 @@
|
||||
name: PR Test (Ascend NPU)
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
paths:
|
||||
- "python/**"
|
||||
- "scripts/ci/**"
|
||||
- "test/**"
|
||||
- ".github/workflows/pr-test-npu.yml"
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
paths:
|
||||
- "python/**"
|
||||
- "scripts/ci/**"
|
||||
- "test/**"
|
||||
- ".github/workflows/pr-test-npu.yml"
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: pr-test-npu-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
per-commit-1-ascend-npu:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
runs-on: linux-arm64-npu-1
|
||||
container:
|
||||
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
# speed up by using infra cache services
|
||||
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
|
||||
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
|
||||
pip config set global.index-url http://${CACHING_URL}/pypi/simple
|
||||
pip config set global.trusted-host ${CACHING_URL}
|
||||
|
||||
bash scripts/ci/npu_ci_install_dependency.sh
|
||||
# copy required file from our daily cache
|
||||
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
|
||||
# copy download through proxy
|
||||
curl -o /tmp/test.jsonl -L https://gh-proxy.test.osinfra.cn/https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl
|
||||
|
||||
- name: Run test
|
||||
timeout-minutes: 60
|
||||
env:
|
||||
SGLANG_USE_MODELSCOPE: true
|
||||
SGLANG_IS_IN_CI: true
|
||||
HF_ENDPOINT: https://hf-mirror.com
|
||||
TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 run_suite.py --suite per-commit-1-ascend-npu
|
||||
|
||||
per-commit-2-ascend-npu:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
runs-on: linux-arm64-npu-2
|
||||
container:
|
||||
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
# speed up by using infra cache services
|
||||
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
|
||||
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
|
||||
pip config set global.index-url http://${CACHING_URL}/pypi/simple
|
||||
pip config set global.trusted-host ${CACHING_URL}
|
||||
|
||||
bash scripts/ci/npu_ci_install_dependency.sh
|
||||
# copy required file from our daily cache
|
||||
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
|
||||
# copy download through proxy
|
||||
curl -o /tmp/test.jsonl -L https://gh-proxy.test.osinfra.cn/https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl
|
||||
|
||||
- name: Run test
|
||||
timeout-minutes: 90
|
||||
env:
|
||||
SGLANG_USE_MODELSCOPE: true
|
||||
SGLANG_IS_IN_CI: true
|
||||
HF_ENDPOINT: https://hf-mirror.com
|
||||
TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 run_suite.py --suite per-commit-2-ascend-npu
|
||||
|
||||
per-commit-4-ascend-npu:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
runs-on: linux-arm64-npu-4
|
||||
container:
|
||||
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
# speed up by using infra cache services
|
||||
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
|
||||
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
|
||||
pip config set global.index-url http://${CACHING_URL}/pypi/simple
|
||||
pip config set global.trusted-host ${CACHING_URL}
|
||||
|
||||
bash scripts/ci/npu_ci_install_dependency.sh
|
||||
# copy required file from our daily cache
|
||||
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
|
||||
# copy download through proxy
|
||||
curl -o /tmp/test.jsonl -L https://gh-proxy.test.osinfra.cn/https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl
|
||||
|
||||
- name: Run test
|
||||
timeout-minutes: 120
|
||||
env:
|
||||
SGLANG_USE_MODELSCOPE: true
|
||||
SGLANG_IS_IN_CI: true
|
||||
HF_ENDPOINT: https://hf-mirror.com
|
||||
TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 run_suite.py --suite per-commit-4-ascend-npu --timeout-per-file 3600
|
||||
|
||||
per-commit-16-ascend-a3:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
runs-on: linux-aarch64-a3-16
|
||||
container:
|
||||
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-a3-ubuntu22.04-py3.11
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
# speed up by using infra cache services
|
||||
CACHING_URL="cache-service.nginx-pypi-cache.svc.cluster.local"
|
||||
sed -Ei "s@(ports|archive).ubuntu.com@${CACHING_URL}:8081@g" /etc/apt/sources.list
|
||||
pip config set global.index-url http://${CACHING_URL}/pypi/simple
|
||||
pip config set global.trusted-host ${CACHING_URL}
|
||||
|
||||
bash scripts/ci/npu_ci_install_dependency.sh
|
||||
# copy required file from our daily cache
|
||||
cp ~/.cache/modelscope/hub/datasets/otavia/ShareGPT_Vicuna_unfiltered/ShareGPT_V3_unfiltered_cleaned_split.json /tmp
|
||||
# copy download through proxy
|
||||
curl -o /tmp/test.jsonl -L https://gh-proxy.test.osinfra.cn/https://raw.githubusercontent.com/openai/grade-school-math/master/grade_school_math/data/test.jsonl
|
||||
|
||||
- name: Run test
|
||||
timeout-minutes: 90
|
||||
env:
|
||||
SGLANG_USE_MODELSCOPE: true
|
||||
SGLANG_IS_IN_CI: true
|
||||
HF_ENDPOINT: https://hf-mirror.com
|
||||
TORCH_EXTENSIONS_DIR: /tmp/torch_extensions
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 run_suite.py --suite per-commit-16-ascend-a3 --timeout-per-file 5400
|
||||
|
||||
pr-test-npu-finish:
|
||||
if: always()
|
||||
needs:
|
||||
- per-commit-1-ascend-npu
|
||||
- per-commit-2-ascend-npu
|
||||
- per-commit-4-ascend-npu
|
||||
- per-commit-16-ascend-a3
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check all dependent job statuses
|
||||
run: |
|
||||
results=(${{ join(needs.*.result, ' ') }})
|
||||
for result in "${results[@]}"; do
|
||||
if [ "$result" = "failure" ] || [ "$result" = "cancelled" ]; then
|
||||
echo "Job failed with result: $result"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
echo "All jobs completed successfully"
|
||||
exit 0
|
||||
599
.github/workflows/pr-test-pd-router.yml
vendored
Normal file
599
.github/workflows/pr-test-pd-router.yml
vendored
Normal file
@@ -0,0 +1,599 @@
|
||||
name: PR Test (PD Router)
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
paths:
|
||||
- 'python/sglang/srt/disaggregation/**'
|
||||
- 'scripts/ci/ci_start_disaggregation_servers.sh'
|
||||
- 'sgl-router/**'
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
paths:
|
||||
- 'python/sglang/srt/disaggregation/**'
|
||||
- 'scripts/ci/ci_start_disaggregation_servers.sh'
|
||||
- 'sgl-router/**'
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: test-disaggregation-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
issues: write
|
||||
|
||||
jobs:
|
||||
test-disaggregation:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
runs-on: [h200]
|
||||
timeout-minutes: 45
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 10
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.12'
|
||||
|
||||
- name: Setup Rust
|
||||
run: |
|
||||
bash scripts/ci/ci_install_rust.sh
|
||||
|
||||
- name: Cache Rust dependencies
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/.cargo/bin/
|
||||
~/.cargo/registry/index/
|
||||
~/.cargo/registry/cache/
|
||||
~/.cargo/git/db/
|
||||
sgl-router/target/
|
||||
key: ${{ runner.os }}-cargo-${{ hashFiles('sgl-router/Cargo.lock') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-cargo-
|
||||
|
||||
- name: Cache pip dependencies
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.cache/pip
|
||||
key: ${{ runner.os }}-pip-${{ hashFiles('python/pyproject.toml') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-pip-
|
||||
|
||||
- name: Validate environment
|
||||
run: |
|
||||
echo "=== System Validation ==="
|
||||
nvidia-smi
|
||||
echo "GPU count: $(nvidia-smi -L | wc -l)"
|
||||
if [ $(nvidia-smi -L | wc -l) -lt 8 ]; then
|
||||
echo "Error: This test requires at least 8 GPUs"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "=== GPU Process Check ==="
|
||||
# Fail fast if any GPU compute processes are active
|
||||
if command -v nvidia-smi >/dev/null 2>&1; then
|
||||
# Try to query compute apps first (preferred and concise)
|
||||
gpu_procs=$(nvidia-smi --query-compute-apps=pid,process_name,gpu_uuid --format=csv,noheader 2>/dev/null | sed '/^$/d' || true)
|
||||
|
||||
# Fallback to detailed PIDS report if the query returns nothing but there might still be processes
|
||||
if [ -z "$gpu_procs" ]; then
|
||||
gpu_procs=$(nvidia-smi -q -d PIDS 2>/dev/null | awk '/Processes/{flag=1;next}/^$/{flag=0}flag' | sed '/^\s*Processes:/d' | sed '/^\s*$/d' || true)
|
||||
fi
|
||||
|
||||
if [ -n "$gpu_procs" ]; then
|
||||
echo "Error: Found active GPU processes using the device(s):"
|
||||
echo "$gpu_procs"
|
||||
exit 1
|
||||
else
|
||||
echo "No active GPU compute processes detected."
|
||||
fi
|
||||
else
|
||||
echo "Error: nvidia-smi not found; skipping GPU process check."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "=== RDMA Validation ==="
|
||||
if ! command -v ibv_devices >/dev/null 2>&1; then
|
||||
echo "Error: InfiniBand tools not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check for active IB devices
|
||||
found_active_device=false
|
||||
for device in mlx5_{0..11}; do
|
||||
if ibv_devinfo $device >/dev/null 2>&1; then
|
||||
state=$(ibv_devinfo $device | grep "state:" | head -1 | awk '{print $2}')
|
||||
if [[ "$state" == "PORT_ACTIVE" ]]; then
|
||||
echo "✓ Found active device: $device"
|
||||
found_active_device=true
|
||||
break
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$found_active_device" = false ]; then
|
||||
echo "Error: No active IB devices found"
|
||||
echo "Available devices:"
|
||||
ibv_devices || true
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "=== Model Validation ==="
|
||||
if [ ! -d "/raid/models/meta-llama/Llama-3.1-8B-Instruct" ]; then
|
||||
echo "Error: Model not found"
|
||||
ls -la /raid/models/ || echo "No models directory"
|
||||
exit 1
|
||||
fi
|
||||
echo "✓ Model found"
|
||||
|
||||
- name: Install SGLang dependencies
|
||||
run: |
|
||||
echo "Installing SGLang with all extras..."
|
||||
python3 -m pip --no-cache-dir install --upgrade pip
|
||||
python3 -m pip --no-cache-dir install torch==2.8.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/test/cu126
|
||||
python3 -m pip --no-cache-dir install -e "python[all]" --break-system-packages
|
||||
python3 -m pip --no-cache-dir install mooncake-transfer-engine==0.3.5
|
||||
python3 -m pip --no-cache-dir install --user --force-reinstall genai-bench==0.0.2
|
||||
python3 -m pip --no-cache-dir install sgl-kernel==0.3.9.post2
|
||||
|
||||
- name: Build and install sgl-router
|
||||
run: |
|
||||
source "$HOME/.cargo/env"
|
||||
echo "Building sgl-router..."
|
||||
cd sgl-router
|
||||
cargo build && python3 -m build && pip install --force-reinstall dist/*.whl
|
||||
|
||||
- name: Start disaggregation servers
|
||||
id: start_servers
|
||||
run: |
|
||||
echo "Starting disaggregation servers..."
|
||||
bash scripts/ci/ci_start_disaggregation_servers.sh &
|
||||
SERVER_PID=$!
|
||||
echo "server_pid=$SERVER_PID" >> $GITHUB_OUTPUT
|
||||
|
||||
# Wait for all 8 servers to be healthy (script already does this)
|
||||
wait_count=0
|
||||
while [ $wait_count -lt 30 ]; do
|
||||
if ps -p $SERVER_PID > /dev/null; then
|
||||
# Check if the startup script printed success message
|
||||
sleep 2
|
||||
wait_count=$((wait_count + 1))
|
||||
else
|
||||
# Script exited - check if it was successful
|
||||
wait $SERVER_PID
|
||||
exit_code=$?
|
||||
if [ $exit_code -eq 0 ]; then
|
||||
echo "✓ All disaggregation servers are healthy"
|
||||
break
|
||||
else
|
||||
echo "Error: Server startup failed with code $exit_code"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
echo "✓ Servers started (PID: $SERVER_PID)"
|
||||
|
||||
- name: Test all policies sequentially
|
||||
timeout-minutes: 30
|
||||
run: |
|
||||
POLICIES=("random" "round_robin" "cache_aware" "power_of_two")
|
||||
BASE_URL="http://127.0.0.9:8000"
|
||||
|
||||
# Free commonly used ports for router and metrics
|
||||
echo "Freeing ports 29000 (metrics) and 8000 (API), if in use..."
|
||||
fuser -k -n tcp 29000 2>/dev/null || true
|
||||
fuser -k -n tcp 8000 2>/dev/null || true
|
||||
sleep 1
|
||||
|
||||
for policy in "${POLICIES[@]}"; do
|
||||
echo ""
|
||||
echo "=================================================="
|
||||
echo "Testing policy: $policy"
|
||||
echo "=================================================="
|
||||
|
||||
# Free ports before starting router
|
||||
fuser -k -n tcp 29000 2>/dev/null || true
|
||||
fuser -k -n tcp 8000 2>/dev/null || true
|
||||
|
||||
# Start router with the current policy
|
||||
echo "Starting router with policy: $policy..."
|
||||
RUST_BACKTRACE=1 python3 -m sglang_router.launch_router \
|
||||
--pd-disaggregation \
|
||||
--policy "$policy" \
|
||||
--prefill http://127.0.0.1:30001 9001 \
|
||||
--prefill http://127.0.0.2:30002 9002 \
|
||||
--prefill http://127.0.0.3:30003 9003 \
|
||||
--prefill http://127.0.0.4:30004 9004 \
|
||||
--decode http://127.0.0.5:30005 \
|
||||
--decode http://127.0.0.6:30006 \
|
||||
--decode http://127.0.0.7:30007 \
|
||||
--decode http://127.0.0.8:30008 \
|
||||
--host 127.0.0.9 \
|
||||
--port 8000 &
|
||||
ROUTER_PID=$!
|
||||
|
||||
# Wait for router to become healthy
|
||||
echo "Waiting for router to become healthy..."
|
||||
TIMEOUT=60
|
||||
ELAPSED=0
|
||||
while [ $ELAPSED -lt $TIMEOUT ]; do
|
||||
if curl --connect-timeout 5 --silent http://127.0.0.9:8000 > /dev/null 2>&1; then
|
||||
echo "✓ Router is reachable"
|
||||
break
|
||||
fi
|
||||
if ! ps -p $ROUTER_PID > /dev/null; then
|
||||
echo "Error: Router process died"
|
||||
exit 1
|
||||
fi
|
||||
sleep 5
|
||||
ELAPSED=$((ELAPSED + 5))
|
||||
done
|
||||
|
||||
if [ $ELAPSED -ge $TIMEOUT ]; then
|
||||
echo "Error: Router health check timeout"
|
||||
kill $ROUTER_PID 2>/dev/null || true
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Test API functionality
|
||||
echo "Testing API completions for $policy..."
|
||||
response=$(curl -s -X POST "$BASE_URL/v1/chat/completions" \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer test-token" \
|
||||
-d '{
|
||||
"model": "/raid/models/meta-llama/Llama-3.1-8B-Instruct",
|
||||
"messages": [
|
||||
{"role": "user", "content": "Write a Python function to calculate fibonacci numbers recursively"}
|
||||
],
|
||||
"stream": false,
|
||||
"max_tokens": 100
|
||||
}')
|
||||
|
||||
if echo "$response" | jq -e '.choices[0].message.content' > /dev/null 2>&1; then
|
||||
echo "✓ API test passed for $policy"
|
||||
else
|
||||
echo "✗ API test failed for $policy: $response"
|
||||
kill $ROUTER_PID 2>/dev/null || true
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Test streaming
|
||||
echo "Testing streaming API for $policy..."
|
||||
stream_response=$(timeout 30 curl -s -X POST "$BASE_URL/v1/chat/completions" \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "Authorization: Bearer test-token" \
|
||||
-d '{
|
||||
"model": "/raid/models/meta-llama/Llama-3.1-8B-Instruct",
|
||||
"messages": [
|
||||
{"role": "user", "content": "Count from 1 to 5"}
|
||||
],
|
||||
"stream": true,
|
||||
"max_tokens": 50
|
||||
}')
|
||||
|
||||
if echo "$stream_response" | grep -q "data:"; then
|
||||
echo "✓ Streaming API test passed for $policy"
|
||||
else
|
||||
echo "✗ Streaming API test failed for $policy"
|
||||
kill $ROUTER_PID 2>/dev/null || true
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Run genai-bench benchmark
|
||||
echo "Running genai-bench for $policy..."
|
||||
genai-bench benchmark \
|
||||
--api-backend openai \
|
||||
--api-base "http://127.0.0.9:8000" \
|
||||
--api-key "dummy-token" \
|
||||
--api-model-name "/raid/models/meta-llama/Llama-3.1-8B-Instruct" \
|
||||
--model-tokenizer /raid/models/meta-llama/Llama-3.1-8B-Instruct \
|
||||
--task text-to-text \
|
||||
--num-concurrency 64 \
|
||||
--traffic-scenario "D(8000,2000)" \
|
||||
--max-requests-per-run 640 \
|
||||
--max-time-per-run 2 \
|
||||
--experiment-folder-name "benchmark_${policy}" \
|
||||
--experiment-base-dir "."
|
||||
|
||||
# Find the actual experiment folder
|
||||
actual_folder=$(find . -maxdepth 1 -name "benchmark_${policy}" -type d | head -1)
|
||||
|
||||
if [ -n "$actual_folder" ]; then
|
||||
# Extract metrics from the Excel summary or JSON files
|
||||
summary_file="$actual_folder"/*_summary.xlsx
|
||||
json_files=$(find "$actual_folder" -name "*.json" | grep -v experiment_metadata)
|
||||
|
||||
echo "Genai-bench results saved in: $actual_folder"
|
||||
|
||||
# Extract mean values and validate performance thresholds
|
||||
echo "📊 Extracting performance metrics for $policy..."
|
||||
|
||||
# Find JSON files excluding experiment metadata
|
||||
json_files=$(find "$actual_folder" -name "*.json" | grep -v experiment_metadata)
|
||||
|
||||
if [ -n "$json_files" ]; then
|
||||
# Extract metrics using jq and validate against loose thresholds
|
||||
for json_file in $json_files; do
|
||||
echo "Processing: $(basename "$json_file")"
|
||||
|
||||
# Extract mean values for performance validation
|
||||
ttft_mean=$(jq -r '.aggregated_metrics.stats.ttft.mean' "$json_file")
|
||||
e2e_latency_mean=$(jq -r '.aggregated_metrics.stats.e2e_latency.mean' "$json_file")
|
||||
input_throughput_mean=$(jq -r '.aggregated_metrics.stats.input_throughput.mean' "$json_file")
|
||||
output_throughput_mean=$(jq -r '.aggregated_metrics.stats.output_throughput.mean' "$json_file")
|
||||
|
||||
echo " TTFT mean: ${ttft_mean}s"
|
||||
echo " E2E Latency mean: ${e2e_latency_mean}s"
|
||||
echo " Input Throughput mean: ${input_throughput_mean} tokens/s"
|
||||
echo " Output Throughput mean: ${output_throughput_mean} tokens/s"
|
||||
|
||||
# Set mean thresholds (allowing for reasonable variance)
|
||||
# These can be adjusted based on your performance requirements
|
||||
ttft_threshold=4.7 # Max 4.7 seconds for mean TTFT
|
||||
e2e_latency_threshold=35.0 # Max 35.0 seconds for mean E2E latency
|
||||
input_throughput_threshold=12000 # Min 12000 tokens/s for mean input throughput
|
||||
output_throughput_threshold=68 # Min 68 tokens/s for mean output throughput
|
||||
|
||||
|
||||
# Validate mean thresholds
|
||||
validation_passed=true
|
||||
|
||||
if (( $(echo "$ttft_mean > $ttft_threshold" | bc -l) )); then
|
||||
echo "❌ TTFT validation failed: $ttft_mean > $ttft_threshold"
|
||||
validation_passed=false
|
||||
fi
|
||||
|
||||
if (( $(echo "$e2e_latency_mean > $e2e_latency_threshold" | bc -l) )); then
|
||||
echo "❌ E2E Latency validation failed: $e2e_latency_mean > $e2e_latency_threshold"
|
||||
validation_passed=false
|
||||
fi
|
||||
|
||||
if (( $(echo "$input_throughput_mean < $input_throughput_threshold" | bc -l) )); then
|
||||
echo "❌ Input Throughput validation failed: $input_throughput_mean < $input_throughput_threshold"
|
||||
validation_passed=false
|
||||
fi
|
||||
|
||||
if (( $(echo "$output_throughput_mean < $output_throughput_threshold" | bc -l) )); then
|
||||
echo "❌ Output Throughput validation failed: $output_throughput_mean < $output_throughput_threshold"
|
||||
validation_passed=false
|
||||
fi
|
||||
|
||||
if [ "$validation_passed" = true ]; then
|
||||
echo "✅ Performance validation passed for $policy"
|
||||
else
|
||||
echo "❌ Performance validation failed for $policy"
|
||||
kill $ROUTER_PID 2>/dev/null || true
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
echo "✓ Genai-bench completed successfully for $policy"
|
||||
echo "📊 Detailed metrics and plots available in: $actual_folder"
|
||||
else
|
||||
echo "✗ Benchmark failed for $policy: No JSON results found"
|
||||
kill $ROUTER_PID 2>/dev/null || true
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo "✗ Benchmark failed for $policy: Experiment folder not found"
|
||||
kill $ROUTER_PID 2>/dev/null || true
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Stop router before testing next policy
|
||||
echo "Stopping router for $policy..."
|
||||
# First try graceful shutdown
|
||||
kill $ROUTER_PID 2>/dev/null || true
|
||||
|
||||
# Wait up to 5 seconds for graceful shutdown
|
||||
for i in {1..5}; do
|
||||
if ! ps -p $ROUTER_PID > /dev/null 2>&1; then
|
||||
echo "Router stopped gracefully"
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
done
|
||||
|
||||
# Force kill if still running
|
||||
if ps -p $ROUTER_PID > /dev/null 2>&1; then
|
||||
echo "Force killing router..."
|
||||
kill -9 $ROUTER_PID 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Short delay to ensure port is released
|
||||
sleep 2
|
||||
|
||||
echo "✓ Completed testing for $policy"
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "✅ All policies tested successfully!"
|
||||
|
||||
|
||||
- name: Upload benchmark results
|
||||
if: success()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: genai-bench-results-all-policies
|
||||
path: benchmark_**/
|
||||
|
||||
- name: Cleanup servers
|
||||
if: always()
|
||||
run: |
|
||||
if [ -n "${{ steps.start_servers.outputs.server_pid }}" ]; then
|
||||
pkill -P ${{ steps.start_servers.outputs.server_pid }} || true
|
||||
kill ${{ steps.start_servers.outputs.server_pid }} || true
|
||||
fi
|
||||
pkill -f "sglang.launch_server" || true
|
||||
sleep 5
|
||||
remaining=$(ps aux | grep -c "sglang.launch_server" || echo "0")
|
||||
echo "Cleanup completed. Remaining processes: $remaining"
|
||||
|
||||
summarize-benchmarks:
|
||||
needs: test-disaggregation
|
||||
runs-on: ubuntu-latest
|
||||
if: success()
|
||||
|
||||
steps:
|
||||
- name: Install jq
|
||||
run: sudo apt-get update && sudo apt-get install -y jq bc
|
||||
|
||||
- name: Download benchmark results
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: genai-bench-results-all-policies
|
||||
|
||||
- name: List downloaded contents
|
||||
run: |
|
||||
echo "Contents after download:"
|
||||
ls -la
|
||||
find . -name "benchmark_*" -type d
|
||||
echo "JSON files found:"
|
||||
find . -name "*.json" | head -10
|
||||
|
||||
- name: Create benchmark summary
|
||||
run: |
|
||||
echo "=== DEBUG: Creating benchmark summary ==="
|
||||
echo "Available benchmark directories:"
|
||||
find . -name "benchmark_*" -type d
|
||||
echo "=========================================="
|
||||
|
||||
echo "## PD Router Genai-Bench Results Summary" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "🚀 **Benchmarked with genai-bench for comprehensive LLM serving performance evaluation**" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "| Policy | Status | TTFT (s) | E2E Latency (s) | Input Throughput (tok/s) | Output Throughput (tok/s) |" >> $GITHUB_STEP_SUMMARY
|
||||
echo "|--------|--------|----------|-----------------|--------------------------|---------------------------|" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
# First, complete the table with all policies
|
||||
for policy in random round_robin cache_aware power_of_two; do
|
||||
# Find genai-bench result folders for this policy (handle zip extraction structure)
|
||||
result_folder=$(find . -maxdepth 2 -name "benchmark_${policy}" -type d | head -1)
|
||||
if [ -z "$result_folder" ]; then
|
||||
# Try alternative patterns in case of different extraction structure
|
||||
result_folder=$(find . -maxdepth 3 -path "*benchmark_${policy}*" -type d | head -1)
|
||||
fi
|
||||
|
||||
echo "DEBUG: Policy ${policy} -> Found folder: ${result_folder:-'NOT FOUND'}"
|
||||
|
||||
if [ -n "$result_folder" ] && [ -d "$result_folder" ]; then
|
||||
# Find JSON file with metrics
|
||||
json_file=$(find "$result_folder" -name "*.json" -not -name "experiment_metadata.json" | head -1)
|
||||
|
||||
if [ -n "$json_file" ] && [ -f "$json_file" ]; then
|
||||
# Extract performance metrics
|
||||
ttft_mean=$(jq -r '.aggregated_metrics.stats.ttft.mean // "N/A"' "$json_file" 2>/dev/null || echo "N/A")
|
||||
e2e_latency_mean=$(jq -r '.aggregated_metrics.stats.e2e_latency.mean // "N/A"' "$json_file" 2>/dev/null || echo "N/A")
|
||||
input_throughput_mean=$(jq -r '.aggregated_metrics.stats.input_throughput.mean // "N/A"' "$json_file" 2>/dev/null || echo "N/A")
|
||||
output_throughput_mean=$(jq -r '.aggregated_metrics.stats.output_throughput.mean // "N/A"' "$json_file" 2>/dev/null || echo "N/A")
|
||||
|
||||
# Format numbers for display (2 decimal places)
|
||||
if [ "$ttft_mean" != "N/A" ] && [ "$ttft_mean" != "null" ]; then
|
||||
ttft_display=$(printf "%.2f" "$ttft_mean" 2>/dev/null || echo "$ttft_mean")
|
||||
else
|
||||
ttft_display="N/A"
|
||||
fi
|
||||
|
||||
if [ "$e2e_latency_mean" != "N/A" ] && [ "$e2e_latency_mean" != "null" ]; then
|
||||
e2e_display=$(printf "%.2f" "$e2e_latency_mean" 2>/dev/null || echo "$e2e_latency_mean")
|
||||
else
|
||||
e2e_display="N/A"
|
||||
fi
|
||||
|
||||
if [ "$input_throughput_mean" != "N/A" ] && [ "$input_throughput_mean" != "null" ]; then
|
||||
input_display=$(printf "%.0f" "$input_throughput_mean" 2>/dev/null || echo "$input_throughput_mean")
|
||||
else
|
||||
input_display="N/A"
|
||||
fi
|
||||
|
||||
if [ "$output_throughput_mean" != "N/A" ] && [ "$output_throughput_mean" != "null" ]; then
|
||||
output_display=$(printf "%.0f" "$output_throughput_mean" 2>/dev/null || echo "$output_throughput_mean")
|
||||
else
|
||||
output_display="N/A"
|
||||
fi
|
||||
|
||||
echo "| ${policy} | ✅ Success | $ttft_display | $e2e_display | $input_display | $output_display |" >> $GITHUB_STEP_SUMMARY
|
||||
else
|
||||
echo "| ${policy} | ❌ No Data | N/A | N/A | N/A | N/A |" >> $GITHUB_STEP_SUMMARY
|
||||
fi
|
||||
else
|
||||
echo "| ${policy} | ❌ Failed | N/A | N/A | N/A | N/A |" >> $GITHUB_STEP_SUMMARY
|
||||
fi
|
||||
done
|
||||
|
||||
# Add performance validation summary
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "## 📊 Performance Validation" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "**Thresholds:** TTFT ≤ 2.0s | E2E Latency ≤ 8.0s | Input Throughput ≥ 10,000 tok/s | Output Throughput ≥ 100 tok/s" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
validation_summary=""
|
||||
for policy in random round_robin cache_aware power_of_two; do
|
||||
# Use same robust path finding as above
|
||||
result_folder=$(find . -maxdepth 2 -name "benchmark_${policy}" -type d | head -1)
|
||||
if [ -z "$result_folder" ]; then
|
||||
result_folder=$(find . -maxdepth 3 -path "*benchmark_${policy}*" -type d | head -1)
|
||||
fi
|
||||
|
||||
if [ -n "$result_folder" ] && [ -d "$result_folder" ]; then
|
||||
json_file=$(find "$result_folder" -name "*.json" -not -name "experiment_metadata.json" | head -1)
|
||||
if [ -n "$json_file" ] && [ -f "$json_file" ]; then
|
||||
# Extract metrics for validation
|
||||
ttft=$(jq -r '.aggregated_metrics.stats.ttft.mean // "N/A"' "$json_file" 2>/dev/null || echo "N/A")
|
||||
e2e_latency=$(jq -r '.aggregated_metrics.stats.e2e_latency.mean // "N/A"' "$json_file" 2>/dev/null || echo "N/A")
|
||||
input_throughput=$(jq -r '.aggregated_metrics.stats.input_throughput.mean // "N/A"' "$json_file" 2>/dev/null || echo "N/A")
|
||||
output_throughput=$(jq -r '.aggregated_metrics.stats.output_throughput.mean // "N/A"' "$json_file" 2>/dev/null || echo "N/A")
|
||||
|
||||
# Check thresholds (using same values as in main workflow)
|
||||
validation_status="✅"
|
||||
if [ "$ttft" != "N/A" ] && [ "$ttft" != "null" ]; then
|
||||
if (( $(echo "$ttft > 2.0" | bc -l 2>/dev/null || echo "0") )); then
|
||||
validation_status="❌"
|
||||
fi
|
||||
fi
|
||||
if [ "$e2e_latency" != "N/A" ] && [ "$e2e_latency" != "null" ]; then
|
||||
if (( $(echo "$e2e_latency > 24.0" | bc -l 2>/dev/null || echo "0") )); then
|
||||
validation_status="❌"
|
||||
fi
|
||||
fi
|
||||
if [ "$input_throughput" != "N/A" ] && [ "$input_throughput" != "null" ]; then
|
||||
if (( $(echo "$input_throughput < 10000" | bc -l 2>/dev/null || echo "0") )); then
|
||||
validation_status="❌"
|
||||
fi
|
||||
fi
|
||||
if [ "$output_throughput" != "N/A" ] && [ "$output_throughput" != "null" ]; then
|
||||
if (( $(echo "$output_throughput < 90" | bc -l 2>/dev/null || echo "0") )); then
|
||||
validation_status="❌"
|
||||
fi
|
||||
fi
|
||||
|
||||
validation_summary="${validation_summary}- **${policy}**: $validation_status\n"
|
||||
else
|
||||
validation_summary="${validation_summary}- **${policy}**: ❌ No data\n"
|
||||
fi
|
||||
else
|
||||
validation_summary="${validation_summary}- **${policy}**: ❌ Failed\n"
|
||||
fi
|
||||
done
|
||||
|
||||
echo -e "$validation_summary" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "## 📊 Genai-Bench Features Used" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Token-level Performance**: TTFT, TPOT, End-to-End latency" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Throughput Analysis**: Input/Output/Total token throughput" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Statistical Analysis**: Percentiles, mean, std dev for all metrics" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **Visual Reports**: Automated plots and Excel summaries" >> $GITHUB_STEP_SUMMARY
|
||||
echo "- **SGLang Backend**: Native integration with SGLang serving" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "✅ All policies tested successfully with genai-bench!" >> $GITHUB_STEP_SUMMARY
|
||||
190
.github/workflows/pr-test-rust.yml
vendored
Normal file
190
.github/workflows/pr-test-rust.yml
vendored
Normal file
@@ -0,0 +1,190 @@
|
||||
name: PR Test (Rust)
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
paths:
|
||||
- "sgl-router/**"
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
paths:
|
||||
- "sgl-router/**"
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: pr-test-rust-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
unit-test-rust:
|
||||
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
bash scripts/ci/ci_install_rust.sh
|
||||
|
||||
- name: Rust cache
|
||||
uses: Swatinem/rust-cache@v2
|
||||
with:
|
||||
workspaces: sgl-router
|
||||
|
||||
- name: Run lint
|
||||
run: |
|
||||
source "$HOME/.cargo/env"
|
||||
cd sgl-router/
|
||||
cargo clippy --all-targets --all-features -- -D warnings
|
||||
|
||||
- name: Run fmt
|
||||
run: |
|
||||
source "$HOME/.cargo/env"
|
||||
cd sgl-router/
|
||||
cargo fmt -- --check
|
||||
|
||||
- name: Run Rust tests
|
||||
timeout-minutes: 20
|
||||
run: |
|
||||
source "$HOME/.cargo/env"
|
||||
cd sgl-router/
|
||||
cargo test
|
||||
|
||||
- name: Check benchmark compilation
|
||||
run: |
|
||||
source "$HOME/.cargo/env"
|
||||
cd sgl-router/
|
||||
cargo check --benches
|
||||
|
||||
- name: Quick benchmark sanity check
|
||||
timeout-minutes: 15
|
||||
run: |
|
||||
source "$HOME/.cargo/env"
|
||||
cd sgl-router/
|
||||
# Run quick benchmarks to ensure they work using Python script
|
||||
python3 scripts/run_benchmarks.py --quick
|
||||
|
||||
pytest-rust:
|
||||
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
|
||||
runs-on: BM.A10.4
|
||||
timeout-minutes: 25
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install rust dependencies
|
||||
run: |
|
||||
bash scripts/ci/ci_install_rust.sh
|
||||
|
||||
- name: Install SGLang dependencies
|
||||
run: |
|
||||
sudo bash scripts/ci/ci_install_dependency.sh
|
||||
|
||||
- name: Build python binding
|
||||
run: |
|
||||
source "$HOME/.cargo/env"
|
||||
cd sgl-router
|
||||
pip install setuptools-rust wheel build
|
||||
python3 -m build
|
||||
pip install --force-reinstall dist/*.whl
|
||||
|
||||
|
||||
- name: Run Python unit tests
|
||||
run: |
|
||||
cd sgl-router
|
||||
source "$HOME/.cargo/env"
|
||||
pip install pytest pytest-cov pytest-xdist
|
||||
pytest -q py_test/unit --cov=sglang_router --cov-report=term-missing --cov-fail-under=80
|
||||
|
||||
- name: Run Python integration tests
|
||||
run: |
|
||||
cd sgl-router
|
||||
source "$HOME/.cargo/env"
|
||||
# Integration tests use FastAPI/uvicorn for mock workers
|
||||
pip install fastapi uvicorn orjson
|
||||
pytest -q -m integration
|
||||
|
||||
- name: Run Python E2E tests
|
||||
run: |
|
||||
bash scripts/killall_sglang.sh "nuk_gpus"
|
||||
cd sgl-router
|
||||
python3 -m pip --no-cache-dir install --upgrade --ignore-installed blinker
|
||||
python3 -m pip --no-cache-dir install --upgrade --break-system-packages genai-bench==0.0.2
|
||||
pytest -m e2e -s -vv -o log_cli=true --log-cli-level=INFO
|
||||
|
||||
- name: Upload benchmark results
|
||||
if: success()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: genai-bench-results-all-policies
|
||||
path: sgl-router/benchmark_**/
|
||||
|
||||
finish:
|
||||
needs: [unit-test-rust, pytest-rust]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Finish
|
||||
run: echo "This is an empty step to ensure that all jobs are completed."
|
||||
|
||||
summarize-benchmarks:
|
||||
needs: pytest-rust
|
||||
runs-on: ubuntu-latest
|
||||
if: success()
|
||||
|
||||
steps:
|
||||
- name: Install jq
|
||||
run: sudo apt-get update && sudo apt-get install -y jq bc
|
||||
|
||||
- name: Download benchmark results
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: genai-bench-results-all-policies
|
||||
|
||||
- name: List downloaded contents
|
||||
run: |
|
||||
echo "Contents after download:"
|
||||
ls -la
|
||||
find . -name "benchmark_*" -type d
|
||||
echo "JSON files found:"
|
||||
find . -name "*.json" | head -10
|
||||
|
||||
- name: Create benchmark summary
|
||||
run: |
|
||||
echo "=== DEBUG: Creating benchmark summary ==="
|
||||
echo "Available benchmark directories:"
|
||||
find . -name "benchmark_*" -type d || true
|
||||
echo "=========================================="
|
||||
|
||||
echo "## Router E2E Genai-Bench Results Summary" >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "Results captured from E2E tests for two scenarios: regular router (2 workers, dp=2) and PD router (2 prefill + 2 decode)." >> $GITHUB_STEP_SUMMARY
|
||||
echo "" >> $GITHUB_STEP_SUMMARY
|
||||
echo "| Scenario | Status | TTFT (s) | E2E Latency (s) | Input Throughput (tok/s) | Output Throughput (tok/s) |" >> $GITHUB_STEP_SUMMARY
|
||||
echo "|----------|--------|----------|-----------------|--------------------------|---------------------------|" >> $GITHUB_STEP_SUMMARY
|
||||
|
||||
scenarios=$'Regular (dp=2, round_robin)|benchmark_round_robin_regular\nPD (2 prefill + 2 decode, round_robin)|benchmark_round_robin_pd'
|
||||
|
||||
echo "$scenarios" | sed 's/^\s*//' | while IFS='|' read -r label pattern; do
|
||||
[ -z "$label" ] && continue
|
||||
# Find the result folder (handle different extraction layouts)
|
||||
result_folder=$(find . -maxdepth 3 \( -name "$pattern" -o -path "*${pattern}*" \) -type d | head -1)
|
||||
|
||||
if [ -n "$result_folder" ] && [ -d "$result_folder" ]; then
|
||||
json_file=$(find "$result_folder" -name "*.json" -not -name "experiment_metadata.json" | head -1)
|
||||
|
||||
if [ -n "$json_file" ] && [ -f "$json_file" ]; then
|
||||
ttft_mean=$(jq -r '.aggregated_metrics.stats.ttft.mean' "$json_file")
|
||||
e2e_latency_mean=$(jq -r '.aggregated_metrics.stats.e2e_latency.mean' "$json_file")
|
||||
input_throughput_mean=$(jq -r '.aggregated_metrics.stats.input_throughput.mean' "$json_file")
|
||||
output_throughput_mean=$(jq -r '.aggregated_metrics.stats.output_throughput.mean' "$json_file")
|
||||
|
||||
ttft_display=$(printf "%.2f" "$ttft_mean" 2>/dev/null || echo "$ttft_mean")
|
||||
e2e_display=$(printf "%.2f" "$e2e_latency_mean" 2>/dev/null || echo "$e2e_latency_mean")
|
||||
input_display=$(printf "%.0f" "$input_throughput_mean" 2>/dev/null || echo "$input_throughput_mean")
|
||||
output_display=$(printf "%.0f" "$output_throughput_mean" 2>/dev/null || echo "$output_throughput_mean")
|
||||
|
||||
echo "| ${label} | ✅ Success | $ttft_display | $e2e_display | $input_display | $output_display |" >> $GITHUB_STEP_SUMMARY
|
||||
fi
|
||||
fi
|
||||
done
|
||||
151
.github/workflows/pr-test-sgl-kernel.yml
vendored
Normal file
151
.github/workflows/pr-test-sgl-kernel.yml
vendored
Normal file
@@ -0,0 +1,151 @@
|
||||
name: PR Test (sgl-kernel)
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
paths:
|
||||
- "sgl-kernel/**"
|
||||
pull_request:
|
||||
branches: [main]
|
||||
paths:
|
||||
- "sgl-kernel/**"
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: pr-test-sgl-kernel-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Check clang-format
|
||||
uses: DoozyX/clang-format-lint-action@v0.18.1
|
||||
with:
|
||||
source: sgl-kernel
|
||||
extensions: h,c,cpp,hpp,cu,cuh,cc
|
||||
clangFormatVersion: 18
|
||||
style: file
|
||||
|
||||
build-wheels:
|
||||
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
|
||||
runs-on: sgl-kernel-build-node
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- python-version: "3.10"
|
||||
cuda-version: "12.4"
|
||||
- python-version: "3.10"
|
||||
cuda-version: "12.8"
|
||||
- python-version: "3.10"
|
||||
cuda-version: "12.9"
|
||||
name: Build Wheel (CUDA ${{ matrix.cuda-version }})
|
||||
steps:
|
||||
- name: Cleanup
|
||||
run: |
|
||||
sudo rm -rf $GITHUB_WORKSPACE/* || true
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: "recursive"
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Build wheel for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }}
|
||||
if: github.event_name != 'push' || (matrix.cuda-version != '12.4' && matrix.cuda-version != '12.8')
|
||||
run: |
|
||||
cd sgl-kernel
|
||||
chmod +x ./build.sh
|
||||
./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
|
||||
path: sgl-kernel/dist/*
|
||||
|
||||
unit-test:
|
||||
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
|
||||
needs: build-wheels
|
||||
runs-on: 1-gpu-runner
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Download artifacts
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: sgl-kernel/dist/
|
||||
merge-multiple: true
|
||||
pattern: wheel-python3.10-cuda12.9
|
||||
|
||||
- name: Install
|
||||
run: |
|
||||
bash scripts/ci/ci_install_dependency.sh
|
||||
pip3 install torch==2.8.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/test/cu126 && pip3 install pytest
|
||||
pip3 uninstall sgl-kernel -y || true
|
||||
pip3 install sgl-kernel/dist/*whl --force-reinstall --no-deps
|
||||
pip3 list | grep sgl-kernel
|
||||
|
||||
- name: Run test
|
||||
timeout-minutes: 30
|
||||
run: |
|
||||
cd sgl-kernel
|
||||
pytest tests/
|
||||
|
||||
- name: Uninstall dependencies
|
||||
run: |
|
||||
pip3 uninstall sgl-kernel -y
|
||||
|
||||
mla-test:
|
||||
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
|
||||
needs: build-wheels
|
||||
runs-on: 1-gpu-runner
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Download artifacts
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: sgl-kernel/dist/
|
||||
merge-multiple: true
|
||||
pattern: wheel-python3.10-cuda12.9
|
||||
|
||||
- name: Install
|
||||
run: |
|
||||
bash scripts/ci/ci_install_dependency.sh
|
||||
pip3 install torch==2.8.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/test/cu126
|
||||
pip3 uninstall sgl-kernel -y || true
|
||||
pip3 install sgl-kernel/dist/*whl --force-reinstall --no-deps
|
||||
pip3 list | grep sgl-kernel
|
||||
|
||||
- name: Run test
|
||||
timeout-minutes: 30
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 test_mla_deepseek_v3.py
|
||||
|
||||
- name: Uninstall dependencies
|
||||
run: |
|
||||
pip3 uninstall sgl-kernel -y
|
||||
|
||||
finish:
|
||||
needs: [unit-test, mla-test, lint, build-wheels]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check all dependent job statuses
|
||||
run: |
|
||||
results=(${{ join(needs.*.result, ' ') }})
|
||||
for result in "${results[@]}"; do
|
||||
if [ "$result" = "failure" ] || [ "$result" = "cancelled" ]; then
|
||||
echo "Job failed with result: $result"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
echo "All jobs completed successfully"
|
||||
exit 0
|
||||
106
.github/workflows/pr-test-xeon.yml
vendored
Normal file
106
.github/workflows/pr-test-xeon.yml
vendored
Normal file
@@ -0,0 +1,106 @@
|
||||
name: PR Test (Xeon)
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
paths:
|
||||
- "python/**"
|
||||
- "scripts/ci/**"
|
||||
- "test/**"
|
||||
- "sgl-kernel/**"
|
||||
- ".github/workflows/pr-test-xeon.yml"
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
paths:
|
||||
- "python/**"
|
||||
- "scripts/ci/**"
|
||||
- "test/**"
|
||||
- "sgl-kernel/**"
|
||||
- ".github/workflows/pr-test-xeon.yml"
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: pr-test-xeon-${{ github.ref }}
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
build-test:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
runs-on: xeon-gnr
|
||||
env:
|
||||
HF_HOME: /home/sdp/.cache/huggingface
|
||||
strategy:
|
||||
matrix:
|
||||
build_type: ['all']
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Build and Push
|
||||
run: |
|
||||
version=$(cat python/sglang/version.py | cut -d'"' -f2)
|
||||
tag=v${version}-xeon
|
||||
|
||||
docker build . -f docker/Dockerfile.xeon -t sglang_xeon --no-cache
|
||||
|
||||
- name: Run container
|
||||
run: |
|
||||
docker run -dt \
|
||||
-v ${{ github.workspace }}:/sglang-checkout/ --ipc=host \
|
||||
-v ${HF_HOME}:/root/.cache/huggingface \
|
||||
--name ci_sglang_xeon \
|
||||
sglang_xeon
|
||||
|
||||
- name: Install dependencies
|
||||
timeout-minutes: 20
|
||||
run: |
|
||||
docker exec ci_sglang_xeon bash -c "python3 -m pip install --upgrade pip"
|
||||
docker exec ci_sglang_xeon pip uninstall sgl-kernel -y || true
|
||||
docker exec -w /sglang-checkout/sgl-kernel ci_sglang_xeon bash -c "cp pyproject_cpu.toml pyproject.toml && pip install -v ."
|
||||
docker exec -w /sglang-checkout/ ci_sglang_xeon bash -c "pip install -e "python[dev_cpu]""
|
||||
|
||||
- name: Check AMX support
|
||||
id: check_amx
|
||||
timeout-minutes: 5
|
||||
run: |
|
||||
docker exec -w /sglang-checkout/ ci_sglang_xeon \
|
||||
bash -c "python3 -c 'import torch; import sgl_kernel; assert torch._C._cpu._is_amx_tile_supported(); assert hasattr(torch.ops.sgl_kernel, \"convert_weight_packed\"); '"
|
||||
continue-on-error: true
|
||||
|
||||
- name: Run unit tests
|
||||
if: steps.check_amx.outcome == 'success'
|
||||
timeout-minutes: 36
|
||||
run: |
|
||||
docker exec -w /sglang-checkout/ ci_sglang_xeon \
|
||||
bash -c "cd ./test/srt && python3 run_suite.py --suite per-commit-cpu"
|
||||
|
||||
- name: Change permission
|
||||
timeout-minutes: 2
|
||||
run: |
|
||||
docker exec -u root ci_sglang_xeon bash -c "
|
||||
rm -rf /tmp/ci-home &&
|
||||
chown -R $(id -u):$(id -g) /sglang-checkout/ 2>/dev/null || true
|
||||
"
|
||||
|
||||
- name: Cleanup container
|
||||
if: always()
|
||||
run: |
|
||||
docker rm -f ci_sglang_xeon || true
|
||||
|
||||
pr-test-xeon-finish:
|
||||
if: always()
|
||||
needs: [build-test]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check all dependent job statuses
|
||||
run: |
|
||||
results=(${{ join(needs.*.result, ' ') }})
|
||||
for result in "${results[@]}"; do
|
||||
if [ "$result" = "failure" ] || [ "$result" = "cancelled" ]; then
|
||||
echo "Job failed with result: $result"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
echo "All jobs completed successfully"
|
||||
exit 0
|
||||
437
.github/workflows/pr-test.yml
vendored
Normal file
437
.github/workflows/pr-test.yml
vendored
Normal file
@@ -0,0 +1,437 @@
|
||||
name: PR Test
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
version:
|
||||
description: "FlashInfer version"
|
||||
required: true
|
||||
type: choice
|
||||
default: 'release'
|
||||
options:
|
||||
- 'release'
|
||||
- 'nightly'
|
||||
|
||||
concurrency:
|
||||
group: pr-test-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
check-changes:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
src: ${{ steps.filter.outputs.src }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Detect file changes
|
||||
id: filter
|
||||
uses: dorny/paths-filter@v3
|
||||
with:
|
||||
filters: |
|
||||
src:
|
||||
- "python/**"
|
||||
- "scripts/ci/**"
|
||||
- "test/**"
|
||||
- ".github/workflows/pr-test.yml"
|
||||
|
||||
unit-test-frontend:
|
||||
needs: check-changes
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false &&
|
||||
needs.check-changes.outputs.src == 'true'
|
||||
runs-on: 1-gpu-runner
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
bash scripts/ci/ci_install_dependency.sh
|
||||
|
||||
- name: Run test
|
||||
timeout-minutes: 10
|
||||
run: |
|
||||
cd test/lang
|
||||
python3 run_suite.py --suite per-commit
|
||||
|
||||
unit-test-backend-1-gpu:
|
||||
needs: [check-changes, unit-test-frontend]
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false &&
|
||||
needs.check-changes.outputs.src == 'true'
|
||||
runs-on: 1-gpu-runner
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
part: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
bash scripts/ci/ci_install_dependency.sh
|
||||
|
||||
- name: Run test
|
||||
timeout-minutes: 30
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 run_suite.py --suite per-commit --auto-partition-id ${{ matrix.part }} --auto-partition-size 10
|
||||
|
||||
unit-test-backend-2-gpu:
|
||||
needs: [check-changes]
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false &&
|
||||
needs.check-changes.outputs.src == 'true'
|
||||
runs-on: 2-gpu-runner
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
part: [0, 1]
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
bash scripts/ci/ci_install_dependency.sh
|
||||
|
||||
- name: Run test
|
||||
timeout-minutes: 30
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 run_suite.py --suite per-commit-2-gpu --auto-partition-id ${{ matrix.part }} --auto-partition-size 2
|
||||
|
||||
unit-test-backend-4-gpu:
|
||||
needs: [check-changes, unit-test-backend-2-gpu]
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false &&
|
||||
needs.check-changes.outputs.src == 'true'
|
||||
runs-on: 4-gpu-runner
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
part: [0, 1]
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
bash scripts/ci/ci_install_dependency.sh
|
||||
|
||||
- name: Run test
|
||||
timeout-minutes: 20
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 run_suite.py --suite per-commit-4-gpu --auto-partition-id ${{ matrix.part }} --auto-partition-size 2
|
||||
|
||||
unit-test-backend-8-gpu:
|
||||
needs: [check-changes, unit-test-backend-2-gpu]
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false &&
|
||||
needs.check-changes.outputs.src == 'true'
|
||||
runs-on: 8-gpu-runner
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
part: [0, 1]
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
bash scripts/ci/ci_install_dependency.sh
|
||||
|
||||
- name: Run test
|
||||
timeout-minutes: 20
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 run_suite.py --suite per-commit-8-gpu --auto-partition-id ${{ matrix.part }} --auto-partition-size 2
|
||||
|
||||
performance-test-1-gpu-part-1:
|
||||
needs: check-changes
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false &&
|
||||
needs.check-changes.outputs.src == 'true'
|
||||
runs-on: 1-gpu-runner
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
bash scripts/ci/ci_install_dependency.sh
|
||||
|
||||
- name: Benchmark single latency
|
||||
timeout-minutes: 10
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_bs1_small
|
||||
python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_bs1_default
|
||||
|
||||
- name: Benchmark online latency
|
||||
timeout-minutes: 10
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 -m unittest test_bench_serving.TestBenchServing.test_online_latency_default
|
||||
|
||||
- name: Benchmark offline throughput
|
||||
timeout-minutes: 10
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_default
|
||||
|
||||
- name: Benchmark offline throughput (Non-streaming, small batch size)
|
||||
timeout-minutes: 10
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_non_stream_small_batch_size
|
||||
|
||||
- name: Benchmark online latency (EAGLE)
|
||||
timeout-minutes: 10
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 -m unittest test_bench_serving.TestBenchServing.test_online_latency_eagle
|
||||
|
||||
- name: Benchmark online latency (LoRA)
|
||||
timeout-minutes: 10
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 -m unittest test_bench_serving.TestBenchServing.test_lora_online_latency
|
||||
python3 -m unittest test_bench_serving.TestBenchServing.test_lora_online_latency_with_concurrent_adapter_updates
|
||||
|
||||
performance-test-1-gpu-part-2:
|
||||
needs: check-changes
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false &&
|
||||
needs.check-changes.outputs.src == 'true'
|
||||
runs-on: 1-gpu-runner
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
bash scripts/ci/ci_install_dependency.sh
|
||||
|
||||
- name: Benchmark offline throughput (w/o RadixAttention)
|
||||
timeout-minutes: 10
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_without_radix_cache
|
||||
|
||||
- name: Benchmark offline throughput (w/ Triton)
|
||||
timeout-minutes: 10
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_with_triton_attention_backend
|
||||
|
||||
- name: Benchmark offline throughput (w/ FP8)
|
||||
timeout-minutes: 10
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_default_fp8
|
||||
|
||||
- name: Benchmark VLM offline throughput
|
||||
timeout-minutes: 10
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 -m unittest test_bench_serving.TestBenchServing.test_vlm_offline_throughput
|
||||
|
||||
- name: Benchmark VLM online latency
|
||||
timeout-minutes: 10
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 -m unittest test_bench_serving.TestBenchServing.test_vlm_online_latency
|
||||
|
||||
performance-test-2-gpu:
|
||||
needs: [check-changes, unit-test-backend-2-gpu]
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false &&
|
||||
needs.check-changes.outputs.src == 'true'
|
||||
runs-on: 2-gpu-runner
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
bash scripts/ci/ci_install_dependency.sh
|
||||
|
||||
- name: Benchmark single latency (TP=2)
|
||||
timeout-minutes: 10
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
|
||||
|
||||
- name: Benchmark single latency + torch.compile (TP=2)
|
||||
timeout-minutes: 10
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_torch_compile_tp2_bs1
|
||||
|
||||
- name: Benchmark offline throughput (TP=2)
|
||||
timeout-minutes: 10
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_default
|
||||
|
||||
- name: Benchmark offline throughput (w/o RadixAttention) (TP=2)
|
||||
timeout-minutes: 10
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_without_radix_cache
|
||||
|
||||
- name: Benchmark offline PP decode throughput (PP=2)
|
||||
timeout-minutes: 10
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 -m unittest test_bench_serving.TestBenchServing.test_pp_offline_throughput_default_decode
|
||||
|
||||
- name: Benchmark offline PP prefill throughput (PP=2)
|
||||
timeout-minutes: 10
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 -m unittest test_bench_serving.TestBenchServing.test_pp_long_context_prefill
|
||||
|
||||
accuracy-test-1-gpu:
|
||||
needs: check-changes
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false &&
|
||||
needs.check-changes.outputs.src == 'true'
|
||||
runs-on: 1-gpu-runner
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
bash scripts/ci/ci_install_dependency.sh
|
||||
git clone https://github.com/merrymercy/human-eval.git
|
||||
cd human-eval
|
||||
pip install -e .
|
||||
|
||||
- name: Evaluate accuracy
|
||||
timeout-minutes: 20
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 test_eval_accuracy_large.py
|
||||
|
||||
accuracy-test-2-gpu:
|
||||
needs: [check-changes, accuracy-test-1-gpu]
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false &&
|
||||
needs.check-changes.outputs.src == 'true'
|
||||
runs-on: 2-gpu-runner
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
bash scripts/ci/ci_install_dependency.sh
|
||||
git clone https://github.com/merrymercy/human-eval.git
|
||||
cd human-eval
|
||||
pip install -e .
|
||||
|
||||
- name: Evaluate accuracy (TP=2)
|
||||
timeout-minutes: 20
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 test_moe_eval_accuracy_large.py
|
||||
|
||||
unit-test-deepep-4-gpu:
|
||||
needs: [check-changes, unit-test-backend-2-gpu]
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false &&
|
||||
needs.check-changes.outputs.src == 'true'
|
||||
runs-on: 4-gpu-runner
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
bash scripts/ci/ci_install_deepep.sh
|
||||
|
||||
- name: Run test
|
||||
timeout-minutes: 20
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 run_suite.py --suite per-commit-4-gpu-deepep
|
||||
|
||||
unit-test-deepep-8-gpu:
|
||||
needs: [check-changes, unit-test-backend-2-gpu]
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false &&
|
||||
needs.check-changes.outputs.src == 'true'
|
||||
runs-on: 8-gpu-runner
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
bash scripts/ci/ci_install_deepep.sh
|
||||
|
||||
- name: Run test
|
||||
timeout-minutes: 20
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 run_suite.py --suite per-commit-8-gpu-deepep
|
||||
|
||||
unit-test-backend-8-gpu-b200:
|
||||
needs: [check-changes, unit-test-backend-2-gpu]
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false &&
|
||||
needs.check-changes.outputs.src == 'true'
|
||||
runs-on: b200-runner
|
||||
strategy:
|
||||
fail-fast: false
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
IS_BLACKWELL=1 bash scripts/ci/ci_install_dependency.sh
|
||||
|
||||
- name: Run test
|
||||
timeout-minutes: 20
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 run_suite.py --suite per-commit-8-gpu-b200 --auto-partition-id 0 --auto-partition-size 1
|
||||
|
||||
|
||||
pr-test-finish:
|
||||
needs: [
|
||||
check-changes,
|
||||
unit-test-frontend, unit-test-backend-1-gpu,
|
||||
unit-test-backend-2-gpu, unit-test-backend-4-gpu, unit-test-backend-8-gpu,
|
||||
performance-test-1-gpu-part-1, performance-test-1-gpu-part-2, performance-test-2-gpu,
|
||||
accuracy-test-1-gpu, accuracy-test-2-gpu,
|
||||
unit-test-deepep-4-gpu, unit-test-deepep-8-gpu,
|
||||
unit-test-backend-8-gpu-b200,
|
||||
]
|
||||
if: always()
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check all dependent job statuses
|
||||
run: |
|
||||
results=(${{ join(needs.*.result, ' ') }})
|
||||
for result in "${results[@]}"; do
|
||||
if [ "$result" = "failure" ] || [ "$result" = "cancelled" ]; then
|
||||
echo "Job failed with result: $result"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
echo "All jobs completed successfully"
|
||||
exit 0
|
||||
65
.github/workflows/release-docker-amd-nightly.yml
vendored
Normal file
65
.github/workflows/release-docker-amd-nightly.yml
vendored
Normal file
@@ -0,0 +1,65 @@
|
||||
name: Release Docker Images Nightly (AMD)
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: '0 13 * * *'
|
||||
|
||||
concurrency:
|
||||
# A PR number if a pull request and otherwise the commit hash. This cancels
|
||||
# queued and in-progress runs for the same PR (presubmit) or commit
|
||||
# (postsubmit). The workflow name is prepended to avoid conflicts between
|
||||
# different workflows.
|
||||
group: ${{ github.workflow }}-${{ github.event.number || github.sha }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
if: github.repository == 'sgl-project/sglang'
|
||||
runs-on: amd-docker-scale
|
||||
environment: 'prod'
|
||||
strategy:
|
||||
matrix:
|
||||
gpu_arch: ['gfx942', 'gfx942-rocm700', 'gfx950']
|
||||
build_type: ['all', 'srt']
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: "Set Date"
|
||||
run: |
|
||||
echo "DATE=$(date +%Y%m%d)" >> $GITHUB_ENV
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_AMD_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_AMD_TOKEN }}
|
||||
|
||||
- name: Build and Push
|
||||
run: |
|
||||
version=$(cat python/sglang/version.py | cut -d'"' -f2)
|
||||
|
||||
if [ "${{ matrix.gpu_arch }}" = "gfx942" ]; then
|
||||
rocm_tag="rocm630-mi30x"
|
||||
elif [ "${{ matrix.gpu_arch }}" = "gfx942-rocm700" ]; then
|
||||
rocm_tag="rocm700-mi30x"
|
||||
elif [ "${{ matrix.gpu_arch }}" = "gfx950" ]; then
|
||||
rocm_tag="rocm700-mi35x"
|
||||
else
|
||||
echo "Unsupported gfx arch"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
tag=v${version}-${rocm_tag}
|
||||
|
||||
if [ "${{ matrix.build_type }}" = "all" ]; then
|
||||
tag_suffix=""
|
||||
elif [ "${{ matrix.build_type }}" = "srt" ]; then
|
||||
tag_suffix="-srt"
|
||||
else
|
||||
echo "Unsupported build type"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
docker build . -f docker/Dockerfile.rocm --build-arg BUILD_TYPE=${{ matrix.build_type }} --build-arg GPU_ARCH=${{ matrix.gpu_arch }} -t rocm/sgl-dev:${tag}-${{ env.DATE }}${tag_suffix} --no-cache
|
||||
docker push rocm/sgl-dev:${tag}-${{ env.DATE }}${tag_suffix}
|
||||
56
.github/workflows/release-docker-amd.yml
vendored
Normal file
56
.github/workflows/release-docker-amd.yml
vendored
Normal file
@@ -0,0 +1,56 @@
|
||||
name: Release Docker Images (AMD)
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- "python/sglang/version.py"
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
if: github.repository == 'sgl-project/sglang'
|
||||
runs-on: amd-docker-scale
|
||||
environment: 'prod'
|
||||
strategy:
|
||||
matrix:
|
||||
gpu_arch: ['gfx942', 'gfx942-rocm700', 'gfx950']
|
||||
build_type: ['all', 'srt']
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Build and Push
|
||||
run: |
|
||||
version=$(cat python/sglang/version.py | cut -d'"' -f2)
|
||||
|
||||
if [ "${{ matrix.gpu_arch }}" = "gfx942" ]; then
|
||||
rocm_tag="rocm630-mi30x"
|
||||
elif [ "${{ matrix.gpu_arch }}" = "gfx942-rocm700" ]; then
|
||||
rocm_tag="rocm700-mi30x"
|
||||
elif [ "${{ matrix.gpu_arch }}" = "gfx950" ]; then
|
||||
rocm_tag="rocm700-mi35x"
|
||||
else
|
||||
echo "Unsupported gfx arch"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
tag=v${version}-${rocm_tag}
|
||||
|
||||
if [ "${{ matrix.build_type }}" = "all" ]; then
|
||||
tag_suffix=""
|
||||
elif [ "${{ matrix.build_type }}" = "srt" ]; then
|
||||
tag_suffix="-srt"
|
||||
else
|
||||
echo "Unsupported build type"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
docker build . -f docker/Dockerfile.rocm --build-arg BUILD_TYPE=${{ matrix.build_type }} --build-arg GPU_ARCH=${{ matrix.gpu_arch }} -t lmsysorg/sglang:${tag}${tag_suffix} --no-cache
|
||||
docker push lmsysorg/sglang:${tag}${tag_suffix}
|
||||
49
.github/workflows/release-docker-dev.yml
vendored
Normal file
49
.github/workflows/release-docker-dev.yml
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
name: Build Development Docker Image
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: '0 0 * * *'
|
||||
|
||||
jobs:
|
||||
build-dev:
|
||||
if: ${{ github.repository == 'sgl-project/sglang' }}
|
||||
runs-on: ubuntu-22.04
|
||||
strategy:
|
||||
matrix:
|
||||
variant:
|
||||
- version: 12.6.1
|
||||
type: all
|
||||
tag: dev
|
||||
- version: 12.8.1
|
||||
type: blackwell
|
||||
tag: blackwell
|
||||
- version: 12.9.1
|
||||
type: blackwell
|
||||
tag: b200-cu129
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Free disk space
|
||||
uses: jlumbroso/free-disk-space@main
|
||||
with:
|
||||
tool-cache: false
|
||||
docker-images: false
|
||||
android: true
|
||||
dotnet: true
|
||||
haskell: true
|
||||
large-packages: true
|
||||
swap-storage: false
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Build and Push Dev Image
|
||||
run: |
|
||||
docker buildx build --output type=image,compression=zstd . -f docker/Dockerfile --build-arg CUDA_VERSION=${{ matrix.variant.version }} --build-arg BUILD_TYPE=${{ matrix.variant.type }} --build-arg CMAKE_BUILD_PARALLEL_LEVEL=$(nproc) -t lmsysorg/sglang:${{ matrix.variant.tag }} --no-cache
|
||||
docker push lmsysorg/sglang:${{ matrix.variant.tag }}
|
||||
36
.github/workflows/release-docker-gb200.yml
vendored
Normal file
36
.github/workflows/release-docker-gb200.yml
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
name: Release Docker Images (GB200)
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- "python/sglang/version.py"
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
if: github.repository == 'sgl-project/sglang'
|
||||
runs-on: ubuntu-22.04-arm
|
||||
environment: "prod"
|
||||
steps:
|
||||
- name: Delete huge unnecessary tools folder
|
||||
run: rm -rf /opt/hostedtoolcache
|
||||
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Build and Push
|
||||
run: |
|
||||
version=$(cat python/sglang/version.py | cut -d'"' -f2)
|
||||
tag=v${version}-cu129-gb200
|
||||
|
||||
docker buildx build --platform linux/arm64 --push --output type=image -t lmsysorg/sglang:${tag} -f docker/Dockerfile.gb200 --build-arg CUDA_VERSION=12.9.1 --build-arg BUILD_TYPE=blackwell --no-cache .
|
||||
78
.github/workflows/release-docker-npu-nightly.yml
vendored
Normal file
78
.github/workflows/release-docker-npu-nightly.yml
vendored
Normal file
@@ -0,0 +1,78 @@
|
||||
name: Release Docker Images Nightly (Ascend NPU)
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- ".github/workflows/release-docker-npu-nightly.yml"
|
||||
- "docker/Dockerfile.npu"
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
- cron: "0 0 * * *"
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.sha }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-22.04-arm
|
||||
strategy:
|
||||
matrix:
|
||||
cann_version: ["8.2.rc1"]
|
||||
device_type: ["910b", "a3"]
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Free up disk space
|
||||
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
|
||||
with:
|
||||
tool-cache: true
|
||||
docker-images: false
|
||||
|
||||
- name: Setup Docker buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: |
|
||||
lmsysorg/sglang
|
||||
# push with schedule event
|
||||
# push with workflow_dispatch event
|
||||
tags: |
|
||||
type=ref,event=pr
|
||||
type=ref,event=branch
|
||||
type=schedule,pattern=main
|
||||
flavor: |
|
||||
latest=false
|
||||
suffix=-cann${{ matrix.cann_version }}-${{ matrix.device_type }},onlatest=true
|
||||
# Login against a Docker registry except on PR
|
||||
# https://github.com/docker/login-action
|
||||
- name: Log into docker hub
|
||||
uses: docker/login-action@v3
|
||||
if: ${{ github.repository == 'sgl-project/sglang' && github.event_name != 'pull_request' }}
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
# Build and push Docker image with Buildx (don't push on PR)
|
||||
# https://github.com/docker/build-push-action
|
||||
- name: Build and push Docker image
|
||||
id: build-and-push
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: docker
|
||||
file: docker/Dockerfile.npu
|
||||
# TODO: need add x86 platforms support when memfabric is ready
|
||||
platforms: linux/arm64
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
push: ${{ github.repository == 'sgl-project/sglang' && github.event_name != 'pull_request' }}
|
||||
provenance: false
|
||||
build-args: |
|
||||
SGLANG_KERNEL_NPU_TAG=20250901
|
||||
CANN_VERSION=${{ matrix.cann_version }}
|
||||
DEVICE_TYPE=${{ matrix.device_type }}
|
||||
74
.github/workflows/release-docker-npu.yml
vendored
Normal file
74
.github/workflows/release-docker-npu.yml
vendored
Normal file
@@ -0,0 +1,74 @@
|
||||
name: Release Docker Images (Ascend NPU)
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "*" # Trigger on all tags and filterred by pep440 later
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- ".github/workflows/release-docker-npu.yml"
|
||||
- "docker/Dockerfile.npu"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-22.04-arm
|
||||
strategy:
|
||||
matrix:
|
||||
cann_version: ["8.2.rc1"]
|
||||
device_type: ["910b", "a3"]
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Free up disk space
|
||||
uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
|
||||
with:
|
||||
tool-cache: true
|
||||
docker-images: false
|
||||
|
||||
# push with tag
|
||||
- name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: |
|
||||
lmsysorg/sglang
|
||||
tags: |
|
||||
type=ref,event=pr
|
||||
type=ref,event=tag,suffix=-cann${{ matrix.cann_version }}-${{ matrix.device_type }}
|
||||
flavor: |
|
||||
latest=false
|
||||
|
||||
# Login against a Docker registry except on PR
|
||||
# https://github.com/docker/login-action
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v2
|
||||
if: ${{ github.repository == 'sgl-project/sglang' && github.event_name != 'pull_request' }}
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Get version
|
||||
id: get_version
|
||||
run: |
|
||||
version=$(cat python/sglang/version.py | cut -d'"' -f2)
|
||||
echo "TAG=lmsysorg/sglang:v$version-cann${{ matrix.cann_version }}-${{ matrix.device_type }}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Build and push Docker image
|
||||
id: build-and-push
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: docker
|
||||
file: docker/Dockerfile.npu
|
||||
# TODO: need add x86 platforms support when memfabric is ready
|
||||
platforms: linux/arm64
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
tags: ${{ steps.meta.outputs.tags || steps.get_version.outputs.TAG }}
|
||||
push: ${{ github.repository == 'sgl-project/sglang' && github.event_name != 'pull_request' }}
|
||||
provenance: false
|
||||
build-args: |
|
||||
SGLANG_KERNEL_NPU_TAG=20250901
|
||||
CANN_VERSION=${{ matrix.cann_version }}
|
||||
DEVICE_TYPE=${{ matrix.device_type }}
|
||||
30
.github/workflows/release-docker-router.yml
vendored
Normal file
30
.github/workflows/release-docker-router.yml
vendored
Normal file
@@ -0,0 +1,30 @@
|
||||
name: Release SGLang Router Docker Image
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- "sgl-router/py_src/sglang_router/version.py"
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
if: github.repository == 'sgl-project/sglang'
|
||||
runs-on: ubuntu-24.04
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Build and Push
|
||||
run: |
|
||||
version=$(cat sgl-router/py_src/sglang_router/version.py | cut -d'"' -f2)
|
||||
tag=v${version}
|
||||
|
||||
docker build . -f docker/Dockerfile.router -t lmsysorg/sglang-router:${tag} --no-cache
|
||||
docker push lmsysorg/sglang-router:${tag}
|
||||
35
.github/workflows/release-docker-xeon.yml
vendored
Normal file
35
.github/workflows/release-docker-xeon.yml
vendored
Normal file
@@ -0,0 +1,35 @@
|
||||
name: Release Docker Xeon Images
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- "python/sglang/version.py"
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
if: github.repository == 'sgl-project/sglang'
|
||||
runs-on: ubuntu-24.04
|
||||
environment: 'prod'
|
||||
strategy:
|
||||
matrix:
|
||||
build_type: ['all']
|
||||
steps:
|
||||
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Build and Push
|
||||
run: |
|
||||
version=$(cat python/sglang/version.py | cut -d'"' -f2)
|
||||
tag=v${version}-xeon
|
||||
|
||||
docker build . -f docker/Dockerfile.xeon -t lmsysorg/sglang:${tag} --no-cache
|
||||
docker push lmsysorg/sglang:${tag}
|
||||
97
.github/workflows/release-docker.yml
vendored
Normal file
97
.github/workflows/release-docker.yml
vendored
Normal file
@@ -0,0 +1,97 @@
|
||||
name: Release Docker Images
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- "python/sglang/version.py"
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
if: github.repository == 'sgl-project/sglang'
|
||||
runs-on: ubuntu-latest
|
||||
environment: 'prod'
|
||||
strategy:
|
||||
matrix:
|
||||
cuda_version: ['12.6.1', '12.8.1', '12.9.1']
|
||||
build_type: ['all', 'blackwell']
|
||||
exclude:
|
||||
- cuda_version: '12.6.1'
|
||||
build_type: 'blackwell'
|
||||
- cuda_version: '12.8.1'
|
||||
build_type: 'all'
|
||||
- cuda_version: '12.9.1'
|
||||
build_type: 'all'
|
||||
steps:
|
||||
- name: Delete huge unnecessary tools folder
|
||||
run: rm -rf /opt/hostedtoolcache
|
||||
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Free disk space
|
||||
uses: jlumbroso/free-disk-space@main
|
||||
with:
|
||||
tool-cache: false
|
||||
docker-images: false
|
||||
android: true
|
||||
dotnet: true
|
||||
haskell: true
|
||||
large-packages: true
|
||||
swap-storage: false
|
||||
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v2
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Build and Push
|
||||
run: |
|
||||
version=$(cat python/sglang/version.py | cut -d'"' -f2)
|
||||
|
||||
if [ "${{ matrix.cuda_version }}" = "11.8.0" ]; then
|
||||
cuda_tag="cu118"
|
||||
elif [ "${{ matrix.cuda_version }}" = "12.1.1" ]; then
|
||||
cuda_tag="cu121"
|
||||
elif [ "${{ matrix.cuda_version }}" = "12.4.1" ]; then
|
||||
cuda_tag="cu124"
|
||||
elif [ "${{ matrix.cuda_version }}" = "12.5.1" ]; then
|
||||
cuda_tag="cu125"
|
||||
elif [ "${{ matrix.cuda_version }}" = "12.6.1" ]; then
|
||||
cuda_tag="cu126"
|
||||
elif [ "${{ matrix.cuda_version }}" = "12.8.1" ]; then
|
||||
cuda_tag="cu128"
|
||||
elif [ "${{ matrix.cuda_version }}" = "12.9.1" ]; then
|
||||
cuda_tag="cu129"
|
||||
else
|
||||
echo "Unsupported CUDA version"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
tag=v${version}-${cuda_tag}
|
||||
|
||||
if [ "${{ matrix.build_type }}" = "all" ]; then
|
||||
tag_suffix=""
|
||||
elif [ "${{ matrix.build_type }}" = "srt" ]; then
|
||||
tag_suffix="-srt"
|
||||
elif [ "${{ matrix.build_type }}" = "blackwell" ]; then
|
||||
tag_suffix="-b200"
|
||||
else
|
||||
echo "Unsupported build type"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
docker buildx build --output type=image,compression=zstd . -f docker/Dockerfile --build-arg CUDA_VERSION=${{ matrix.cuda_version }} --build-arg BUILD_TYPE=${{ matrix.build_type }} -t lmsysorg/sglang:${tag}${tag_suffix} --no-cache
|
||||
docker push lmsysorg/sglang:${tag}${tag_suffix}
|
||||
|
||||
if [ "${{ matrix.cuda_version }}" = "12.6.1" ]; then
|
||||
docker tag lmsysorg/sglang:${tag}${tag_suffix} lmsysorg/sglang:latest${tag_suffix}
|
||||
docker push lmsysorg/sglang:latest${tag_suffix}
|
||||
fi
|
||||
|
||||
if [ "${{ matrix.cuda_version }}" = "12.9.1" ]; then
|
||||
docker tag lmsysorg/sglang:${tag}${tag_suffix} lmsysorg/sglang:v${version}
|
||||
docker push lmsysorg/sglang:v${version}
|
||||
fi
|
||||
65
.github/workflows/release-docs.yml
vendored
Normal file
65
.github/workflows/release-docs.yml
vendored
Normal file
@@ -0,0 +1,65 @@
|
||||
name: Release Documentation
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- "docs/**"
|
||||
- "python/sglang/version.py"
|
||||
- "python/sglang/**"
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: release-docs-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
execute-and-deploy:
|
||||
runs-on: 1-gpu-runner
|
||||
if: github.repository == 'sgl-project/sglang'
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
bash scripts/ci/ci_install_dependency.sh
|
||||
pip install -r docs/requirements.txt
|
||||
apt-get update && apt-get install -y pandoc parallel retry
|
||||
ln -sf "$(which python3)" /usr/bin/python
|
||||
|
||||
- name: Setup Jupyter Kernel
|
||||
run: |
|
||||
python -m ipykernel install --user --name python3 --display-name "Python 3"
|
||||
|
||||
- name: Execute notebooks
|
||||
timeout-minutes: 40
|
||||
run: |
|
||||
cd docs
|
||||
make clean
|
||||
make compile
|
||||
|
||||
- name: Push HTML to sgl-project.github.io
|
||||
timeout-minutes: 60
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.DOCUMENTATION_PAT_TOKEN }}
|
||||
run: |
|
||||
cd docs
|
||||
make html
|
||||
python3 wrap_run_llm.py
|
||||
|
||||
cd _build/html
|
||||
|
||||
git clone https://$GITHUB_TOKEN@github.com/sgl-project/sgl-project.github.io.git ../sgl-project.github.io --depth 1
|
||||
find ../sgl-project.github.io/ -mindepth 1 -not -path "../sgl-project.github.io/.git*" -not -name CNAME -not -name ".jekyll" -not -name ".nojekyll" -delete
|
||||
cp -r * ../sgl-project.github.io
|
||||
cp ../../README.md ../sgl-project.github.io/README.md
|
||||
cd ../sgl-project.github.io
|
||||
git config user.name "zhaochenyang20"
|
||||
git config user.email "zhaochenyang20@gmail.com"
|
||||
git add .
|
||||
git commit -m "Update $(date +'%Y-%m-%d %H:%M:%S')"
|
||||
git push https://$GITHUB_TOKEN@github.com/sgl-project/sgl-project.github.io.git main
|
||||
cd ..
|
||||
rm -rf sgl-project.github.io
|
||||
35
.github/workflows/release-fake-tag.yml
vendored
Normal file
35
.github/workflows/release-fake-tag.yml
vendored
Normal file
@@ -0,0 +1,35 @@
|
||||
name: Release Fake Tag
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- "python/sglang/version.py"
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
if: github.repository == 'sgl-project/sglang'
|
||||
runs-on: ubuntu-latest
|
||||
environment: 'prod'
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Get version
|
||||
id: get_version
|
||||
run: |
|
||||
version=$(cat python/sglang/version.py | cut -d'"' -f2)
|
||||
echo "TAG=v$version" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Create and push fake tag
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.REPO_TOKEN }}
|
||||
run: |
|
||||
git config user.name zhyncs
|
||||
git config user.email me@zhyncs.com
|
||||
git checkout -b ${{ steps.get_version.outputs.TAG }}
|
||||
git push --set-upstream origin ${{ steps.get_version.outputs.TAG }}
|
||||
119
.github/workflows/release-pypi-router.yml
vendored
Normal file
119
.github/workflows/release-pypi-router.yml
vendored
Normal file
@@ -0,0 +1,119 @@
|
||||
# Reference: https://github.com/openai/tiktoken/blob/63527649963def8c759b0f91f2eb69a40934e468/.github/workflows/build_wheels.yml#L1
|
||||
|
||||
name: Release SGLang Router to PyPI
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- sgl-router/pyproject.toml
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: Build on ${{ matrix.os }} (${{ matrix.target }})
|
||||
runs-on: ${{ matrix.os }}-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- os: ubuntu
|
||||
target: x86_64
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
path: sglang-repo
|
||||
|
||||
- name: Move sgl-router folder to root and delete sglang-repo
|
||||
run: |
|
||||
mv sglang-repo/sgl-router/* .
|
||||
rm -rf sglang-repo
|
||||
ls -alt
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Install build dependencies
|
||||
run: |
|
||||
python -m pip install -U pip
|
||||
python -m pip install build twine auditwheel
|
||||
|
||||
- name: Build package
|
||||
uses: pypa/cibuildwheel@v2.21.3
|
||||
env:
|
||||
CIBW_BUILD: "cp38-manylinux_x86_64 cp39-manylinux_x86_64 cp310-manylinux_x86_64 cp311-manylinux_x86_64 cp312-manylinux_x86_64"
|
||||
CIBW_BEFORE_ALL: |
|
||||
yum update -y && yum install -y openssl-devel wget unzip && \
|
||||
# Install latest protoc (v32.0) that supports proto3
|
||||
cd /tmp && \
|
||||
wget https://github.com/protocolbuffers/protobuf/releases/download/v32.0/protoc-32.0-linux-x86_64.zip && \
|
||||
unzip protoc-32.0-linux-x86_64.zip -d /usr/local && \
|
||||
rm protoc-32.0-linux-x86_64.zip && \
|
||||
# Install Rust
|
||||
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
|
||||
CIBW_ENVIRONMENT: "PATH=$HOME/.cargo/bin:$PATH"
|
||||
|
||||
- name: List built packages
|
||||
run: ls -lh wheelhouse/
|
||||
|
||||
- name: Check packages
|
||||
run: twine check --strict wheelhouse/*
|
||||
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: packages-${{ matrix.os }}-${{ matrix.target }}
|
||||
path: wheelhouse/
|
||||
|
||||
build-sdist:
|
||||
name: Build SDist
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
path: sglang-repo
|
||||
|
||||
- name: Move sgl-router folder to root, copy the license file, and delete sglang-repo
|
||||
run: |
|
||||
mv sglang-repo/sgl-router/* .
|
||||
mv sglang-repo/LICENSE .
|
||||
rm -rf sglang-repo
|
||||
ls -alt
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
|
||||
- name: Build SDist
|
||||
run: |
|
||||
pip install build
|
||||
python -m pip install -U packaging
|
||||
python -m build --sdist
|
||||
|
||||
- uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: sdist
|
||||
path: dist/*.tar.gz
|
||||
|
||||
upload:
|
||||
name: Upload to PyPI
|
||||
if: github.repository == 'sgl-project/sglang' # Ensure this job only runs for the sgl-project/sglang repository
|
||||
needs: [build, build-sdist]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: dist
|
||||
merge-multiple: true
|
||||
|
||||
- name: Upload to PyPI
|
||||
env:
|
||||
TWINE_USERNAME: __token__
|
||||
TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN_ROUTER }}
|
||||
run: |
|
||||
pip install twine
|
||||
twine upload dist/* --verbose
|
||||
31
.github/workflows/release-pypi.yml
vendored
Normal file
31
.github/workflows/release-pypi.yml
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
name: Release PyPI
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- "python/sglang/version.py"
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
if: github.repository == 'sgl-project/sglang'
|
||||
runs-on: ubuntu-latest
|
||||
environment: "prod"
|
||||
steps:
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.10"
|
||||
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Upload to pypi
|
||||
run: |
|
||||
cd python
|
||||
cp ../README.md ../LICENSE .
|
||||
pip install build
|
||||
python3 -m build
|
||||
pip install twine
|
||||
python3 -m twine upload dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }}
|
||||
92
.github/workflows/release-whl-kernel-cu118.yml
vendored
Normal file
92
.github/workflows/release-whl-kernel-cu118.yml
vendored
Normal file
@@ -0,0 +1,92 @@
|
||||
name: Release SGLang Kernel Wheel (cu118)
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
tag_name:
|
||||
type: string
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- sgl-kernel/python/sgl_kernel/version.py
|
||||
|
||||
jobs:
|
||||
build-wheels:
|
||||
if: github.repository == 'sgl-project/sglang'
|
||||
runs-on: sgl-kernel-release-node
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.9"]
|
||||
cuda-version: ["11.8"]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: "recursive"
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Build wheels for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }}
|
||||
run: |
|
||||
cd sgl-kernel
|
||||
chmod +x ./build.sh
|
||||
./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
|
||||
path: sgl-kernel/dist/*
|
||||
|
||||
release:
|
||||
needs: build-wheels
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Download artifacts
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: sgl-kernel/dist/
|
||||
merge-multiple: true
|
||||
pattern: wheel-*
|
||||
|
||||
- name: Set tag name
|
||||
id: set_tag_name
|
||||
run: |
|
||||
if [ -z "${{ inputs.tag_name }}" ]; then
|
||||
TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)"
|
||||
echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
tag_name: ${{ steps.set_tag_name.outputs.tag_name }}
|
||||
repository: sgl-project/whl
|
||||
token: ${{ secrets.WHL_TOKEN }}
|
||||
files: |
|
||||
sgl-kernel/dist/*
|
||||
|
||||
- name: Clone wheel index
|
||||
run: git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
|
||||
env:
|
||||
WHL_TOKEN: ${{ secrets.WHL_TOKEN }}
|
||||
|
||||
- name: Update wheel index
|
||||
run: python3 scripts/update_kernel_whl_index.py
|
||||
|
||||
- name: Push wheel index
|
||||
run: |
|
||||
cd sgl-whl
|
||||
git config --local user.name "github-actions[bot]"
|
||||
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
||||
git add -A
|
||||
git commit -m "update whl index"
|
||||
git push
|
||||
283
.github/workflows/release-whl-kernel.yml
vendored
Normal file
283
.github/workflows/release-whl-kernel.yml
vendored
Normal file
@@ -0,0 +1,283 @@
|
||||
name: Release SGLang Kernels
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
paths:
|
||||
- sgl-kernel/python/sgl_kernel/version.py
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
tag_name:
|
||||
type: string
|
||||
required: false
|
||||
|
||||
concurrency:
|
||||
group: release-sglang-kernels-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
build-cu129:
|
||||
if: github.repository == 'sgl-project/sglang'
|
||||
runs-on: sgl-kernel-release-node
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.10"]
|
||||
cuda-version: ["12.9"]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: "recursive"
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Build wheels
|
||||
run: |
|
||||
cd sgl-kernel
|
||||
chmod +x ./build.sh
|
||||
./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
|
||||
|
||||
- name: Upload to PyPI
|
||||
working-directory: sgl-kernel
|
||||
run: |
|
||||
pip install twine
|
||||
python3 -m twine upload dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }}
|
||||
|
||||
build-cu124:
|
||||
if: github.repository == 'sgl-project/sglang'
|
||||
needs: build-cu129
|
||||
runs-on: sgl-kernel-release-node
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.10"]
|
||||
cuda-version: ["12.4"]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: "recursive"
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Build wheels
|
||||
run: |
|
||||
cd sgl-kernel
|
||||
chmod +x ./build.sh
|
||||
./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
|
||||
path: sgl-kernel/dist/*
|
||||
|
||||
release-cu124:
|
||||
needs: build-cu124
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Download artifacts
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: sgl-kernel/dist/
|
||||
merge-multiple: true
|
||||
pattern: wheel-*
|
||||
|
||||
- name: Set tag name
|
||||
id: set_tag_name
|
||||
run: |
|
||||
if [ -z "${{ inputs.tag_name }}" ]; then
|
||||
TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)"
|
||||
echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
tag_name: ${{ steps.set_tag_name.outputs.tag_name }}
|
||||
repository: sgl-project/whl
|
||||
token: ${{ secrets.WHL_TOKEN }}
|
||||
files: |
|
||||
sgl-kernel/dist/*
|
||||
|
||||
- name: Clone wheel index
|
||||
run: git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
|
||||
env:
|
||||
WHL_TOKEN: ${{ secrets.WHL_TOKEN }}
|
||||
|
||||
- name: Update wheel index
|
||||
run: python3 scripts/update_kernel_whl_index.py --cuda 124
|
||||
|
||||
- name: Push wheel index
|
||||
run: |
|
||||
cd sgl-whl
|
||||
git config --local user.name "github-actions[bot]"
|
||||
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
||||
git add -A
|
||||
git commit -m "update whl index"
|
||||
git push
|
||||
|
||||
build-cu128:
|
||||
if: github.repository == 'sgl-project/sglang'
|
||||
needs: build-cu129
|
||||
runs-on: sgl-kernel-release-node
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.10"]
|
||||
cuda-version: ["12.8"]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: "recursive"
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Build wheels
|
||||
run: |
|
||||
cd sgl-kernel
|
||||
chmod +x ./build.sh
|
||||
./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
|
||||
path: sgl-kernel/dist/*
|
||||
|
||||
release-cu128:
|
||||
needs: build-cu128
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Download artifacts
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: sgl-kernel/dist/
|
||||
merge-multiple: true
|
||||
pattern: wheel-*
|
||||
|
||||
- name: Set tag name
|
||||
id: set_tag_name
|
||||
run: |
|
||||
if [ -z "${{ inputs.tag_name }}" ]; then
|
||||
TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)"
|
||||
echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
tag_name: ${{ steps.set_tag_name.outputs.tag_name }}
|
||||
repository: sgl-project/whl
|
||||
token: ${{ secrets.WHL_TOKEN }}
|
||||
files: |
|
||||
sgl-kernel/dist/*
|
||||
|
||||
- name: Clone wheel index
|
||||
run: git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
|
||||
env:
|
||||
WHL_TOKEN: ${{ secrets.WHL_TOKEN }}
|
||||
|
||||
- name: Update wheel index
|
||||
run: python3 scripts/update_kernel_whl_index.py --cuda 128
|
||||
|
||||
- name: Push wheel index
|
||||
run: |
|
||||
cd sgl-whl
|
||||
git config --local user.name "github-actions[bot]"
|
||||
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
||||
git add -A
|
||||
git commit -m "update whl index"
|
||||
git push
|
||||
|
||||
build-cu129-aarch64:
|
||||
if: github.repository == 'sgl-project/sglang'
|
||||
runs-on: sgl-kernel-release-node-arm
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.10"]
|
||||
cuda-version: ["12.9"]
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: "recursive"
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Build wheels
|
||||
run: |
|
||||
cd sgl-kernel
|
||||
chmod +x ./build.sh
|
||||
./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}" aarch64
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}-aarch64
|
||||
path: sgl-kernel/dist/*
|
||||
|
||||
release-cu129-aarch64:
|
||||
needs: build-cu129-aarch64
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Download artifacts
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: sgl-kernel/dist/
|
||||
merge-multiple: true
|
||||
pattern: wheel-*
|
||||
|
||||
- name: Set tag name
|
||||
id: set_tag_name
|
||||
run: |
|
||||
if [ -z "${{ inputs.tag_name }}" ]; then
|
||||
TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)"
|
||||
echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Release
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
tag_name: ${{ steps.set_tag_name.outputs.tag_name }}
|
||||
repository: sgl-project/whl
|
||||
token: ${{ secrets.WHL_TOKEN }}
|
||||
files: |
|
||||
sgl-kernel/dist/*
|
||||
|
||||
- name: Clone wheel index
|
||||
run: git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
|
||||
env:
|
||||
WHL_TOKEN: ${{ secrets.WHL_TOKEN }}
|
||||
|
||||
- name: Update wheel index
|
||||
run: python3 scripts/update_kernel_whl_index.py --cuda 129
|
||||
|
||||
- name: Push wheel index
|
||||
run: |
|
||||
cd sgl-whl
|
||||
git config --local user.name "github-actions[bot]"
|
||||
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
||||
git add -A
|
||||
git commit -m "update whl index"
|
||||
git push
|
||||
43
.github/workflows/vllm-dependency-test.yml
vendored
Normal file
43
.github/workflows/vllm-dependency-test.yml
vendored
Normal file
@@ -0,0 +1,43 @@
|
||||
name: VLLM Dependency Test
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
paths:
|
||||
- "python/**"
|
||||
- "scripts/ci/**"
|
||||
- "test/**"
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
paths:
|
||||
- "python/**"
|
||||
- "scripts/ci/**"
|
||||
- "test/**"
|
||||
|
||||
concurrency:
|
||||
group: vllm-dependency-test-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
vllm-dependency-test:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
runs-on: 1-gpu-runner
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
bash scripts/ci/ci_install_dependency.sh
|
||||
pip install "bitsandbytes>=0.44.0"
|
||||
|
||||
pip install "sgl-kernel==0.3.7"
|
||||
|
||||
- name: Run vLLM dependency tests
|
||||
timeout-minutes: 60
|
||||
run: |
|
||||
export SGLANG_SKIP_SGL_KERNEL_VERSION_CHECK=1
|
||||
|
||||
cd test/srt
|
||||
python3 run_suite.py --suite vllm_dependency_test --timeout-per-file 3600
|
||||
Reference in New Issue
Block a user