[router] include rust benchamrks (#9932)
This commit is contained in:
257
.github/workflows/pr-benchmark-rust.yml
vendored
257
.github/workflows/pr-benchmark-rust.yml
vendored
@@ -9,6 +9,7 @@ on:
|
|||||||
branches: [ main ]
|
branches: [ main ]
|
||||||
paths:
|
paths:
|
||||||
- "sgl-router/**"
|
- "sgl-router/**"
|
||||||
|
types: [opened, synchronize, reopened, labeled]
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
@@ -19,9 +20,67 @@ permissions:
|
|||||||
pull-requests: write
|
pull-requests: write
|
||||||
issues: write
|
issues: write
|
||||||
jobs:
|
jobs:
|
||||||
benchmark-router:
|
# Quick check job that always runs on PRs
|
||||||
|
benchmark-compile-check:
|
||||||
|
name: Benchmark Compilation Check
|
||||||
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
|
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
bash scripts/ci/ci_install_rust.sh
|
||||||
|
|
||||||
|
- name: Setup sccache
|
||||||
|
uses: mozilla-actions/sccache-action@v0.0.3
|
||||||
|
continue-on-error: true
|
||||||
|
|
||||||
|
- name: Cache Rust dependencies
|
||||||
|
uses: actions/cache@v4
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
~/.cargo/bin/
|
||||||
|
~/.cargo/registry/index/
|
||||||
|
~/.cargo/registry/cache/
|
||||||
|
~/.cargo/git/db/
|
||||||
|
sgl-router/target/
|
||||||
|
key: ${{ runner.os }}-cargo-${{ hashFiles('sgl-router/Cargo.lock') }}
|
||||||
|
restore-keys: |
|
||||||
|
${{ runner.os }}-cargo-
|
||||||
|
|
||||||
|
- name: Check benchmarks compile
|
||||||
|
run: |
|
||||||
|
source "$HOME/.cargo/env"
|
||||||
|
cd sgl-router/
|
||||||
|
# Try to use sccache, but disable if it fails
|
||||||
|
if command -v sccache &> /dev/null; then
|
||||||
|
echo "Testing sccache availability..."
|
||||||
|
# Try to start sccache and check if it works
|
||||||
|
export RUSTC_WRAPPER=sccache
|
||||||
|
export SCCACHE_GHA_ENABLED="true"
|
||||||
|
if sccache --start-server 2>/dev/null && sccache --show-stats 2>/dev/null; then
|
||||||
|
echo "sccache is working, using it for compilation"
|
||||||
|
else
|
||||||
|
echo "sccache failed to start, falling back to regular cargo"
|
||||||
|
unset RUSTC_WRAPPER
|
||||||
|
unset SCCACHE_GHA_ENABLED
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "sccache not available, using regular cargo"
|
||||||
|
fi
|
||||||
|
cargo check --benches
|
||||||
|
|
||||||
|
# Full benchmark jobs that only run with label or on main branch
|
||||||
|
benchmark-request-processing:
|
||||||
|
name: Request Processing Benchmark
|
||||||
|
if: |
|
||||||
|
github.repository == 'sgl-project/sglang' &&
|
||||||
|
(github.event_name == 'push' ||
|
||||||
|
github.event_name == 'workflow_dispatch' ||
|
||||||
|
contains(github.event.pull_request.labels.*.name, 'benchmark'))
|
||||||
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
@@ -33,6 +92,10 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
bash scripts/ci/ci_install_rust.sh
|
bash scripts/ci/ci_install_rust.sh
|
||||||
|
|
||||||
|
- name: Setup sccache
|
||||||
|
uses: mozilla-actions/sccache-action@v0.0.3
|
||||||
|
continue-on-error: true
|
||||||
|
|
||||||
- name: Cache Rust dependencies
|
- name: Cache Rust dependencies
|
||||||
uses: actions/cache@v4
|
uses: actions/cache@v4
|
||||||
with:
|
with:
|
||||||
@@ -46,40 +109,61 @@ jobs:
|
|||||||
restore-keys: |
|
restore-keys: |
|
||||||
${{ runner.os }}-cargo-
|
${{ runner.os }}-cargo-
|
||||||
|
|
||||||
- name: Build router in release mode
|
- name: Run request processing benchmark
|
||||||
|
timeout-minutes: 30
|
||||||
run: |
|
run: |
|
||||||
source "$HOME/.cargo/env"
|
source "$HOME/.cargo/env"
|
||||||
cd sgl-router/
|
cd sgl-router/
|
||||||
cargo build --release
|
# Try to use sccache, but disable if it fails
|
||||||
|
if command -v sccache &> /dev/null; then
|
||||||
- name: Run quick benchmarks
|
echo "Testing sccache availability..."
|
||||||
timeout-minutes: 15
|
# Try to start sccache and check if it works
|
||||||
run: |
|
export RUSTC_WRAPPER=sccache
|
||||||
source "$HOME/.cargo/env"
|
export SCCACHE_GHA_ENABLED="true"
|
||||||
cd sgl-router/
|
if sccache --start-server 2>/dev/null && sccache --show-stats 2>/dev/null; then
|
||||||
# Run quick benchmarks for PR validation using Python script
|
echo "sccache is working, using it for compilation"
|
||||||
python3 scripts/run_benchmarks.py --quick --validate-thresholds --save-results
|
else
|
||||||
|
echo "sccache failed to start, falling back to regular cargo"
|
||||||
|
unset RUSTC_WRAPPER
|
||||||
|
unset SCCACHE_GHA_ENABLED
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "sccache not available, using regular cargo"
|
||||||
|
fi
|
||||||
|
# Run only the summary benchmark for quick validation in PRs
|
||||||
|
cargo bench --bench request_processing -- benchmark_summary --exact
|
||||||
|
|
||||||
- name: Upload benchmark results
|
- name: Upload benchmark results
|
||||||
if: always()
|
if: always()
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: benchmark-results-${{ github.sha }}
|
name: request-processing-results-${{ github.sha }}
|
||||||
path: |
|
path: |
|
||||||
sgl-router/target/criterion/
|
sgl-router/target/criterion/benchmark_summary/
|
||||||
retention-days: 30
|
retention-days: 30
|
||||||
|
|
||||||
benchmark-integration-test:
|
benchmark-tokenizer:
|
||||||
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
|
name: Tokenizer Benchmark
|
||||||
|
if: |
|
||||||
|
github.repository == 'sgl-project/sglang' &&
|
||||||
|
(github.event_name == 'push' ||
|
||||||
|
github.event_name == 'workflow_dispatch' ||
|
||||||
|
contains(github.event.pull_request.labels.*.name, 'benchmark'))
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 100
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
bash scripts/ci/ci_install_rust.sh
|
bash scripts/ci/ci_install_rust.sh
|
||||||
|
|
||||||
|
- name: Setup sccache
|
||||||
|
uses: mozilla-actions/sccache-action@v0.0.3
|
||||||
|
continue-on-error: true
|
||||||
|
|
||||||
- name: Cache Rust dependencies
|
- name: Cache Rust dependencies
|
||||||
uses: actions/cache@v4
|
uses: actions/cache@v4
|
||||||
with:
|
with:
|
||||||
@@ -93,17 +177,146 @@ jobs:
|
|||||||
restore-keys: |
|
restore-keys: |
|
||||||
${{ runner.os }}-cargo-
|
${{ runner.os }}-cargo-
|
||||||
|
|
||||||
- name: Run benchmark integration tests
|
- name: Run tokenizer benchmark
|
||||||
timeout-minutes: 10
|
timeout-minutes: 30
|
||||||
run: |
|
run: |
|
||||||
source "$HOME/.cargo/env"
|
source "$HOME/.cargo/env"
|
||||||
cd sgl-router/
|
cd sgl-router/
|
||||||
# Run integration tests to ensure benchmark code compiles and works
|
# Try to use sccache, but disable if it fails
|
||||||
cargo test --test benchmark_integration
|
if command -v sccache &> /dev/null; then
|
||||||
|
echo "Testing sccache availability..."
|
||||||
|
# Try to start sccache and check if it works
|
||||||
|
export RUSTC_WRAPPER=sccache
|
||||||
|
export SCCACHE_GHA_ENABLED="true"
|
||||||
|
if sccache --start-server 2>/dev/null && sccache --show-stats 2>/dev/null; then
|
||||||
|
echo "sccache is working, using it for compilation"
|
||||||
|
else
|
||||||
|
echo "sccache failed to start, falling back to regular cargo"
|
||||||
|
unset RUSTC_WRAPPER
|
||||||
|
unset SCCACHE_GHA_ENABLED
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "sccache not available, using regular cargo"
|
||||||
|
fi
|
||||||
|
cargo bench --bench tokenizer_benchmark
|
||||||
|
|
||||||
- name: Verify benchmark compilation
|
- name: Upload benchmark results
|
||||||
|
if: always()
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: tokenizer-results-${{ github.sha }}
|
||||||
|
path: |
|
||||||
|
sgl-router/target/criterion/tokenizer*/
|
||||||
|
retention-days: 30
|
||||||
|
|
||||||
|
benchmark-tool-parser:
|
||||||
|
name: Tool Parser Benchmark
|
||||||
|
if: |
|
||||||
|
github.repository == 'sgl-project/sglang' &&
|
||||||
|
(github.event_name == 'push' ||
|
||||||
|
github.event_name == 'workflow_dispatch' ||
|
||||||
|
contains(github.event.pull_request.labels.*.name, 'benchmark'))
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 100
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
bash scripts/ci/ci_install_rust.sh
|
||||||
|
|
||||||
|
- name: Setup sccache
|
||||||
|
uses: mozilla-actions/sccache-action@v0.0.3
|
||||||
|
continue-on-error: true
|
||||||
|
|
||||||
|
- name: Cache Rust dependencies
|
||||||
|
uses: actions/cache@v4
|
||||||
|
with:
|
||||||
|
path: |
|
||||||
|
~/.cargo/bin/
|
||||||
|
~/.cargo/registry/index/
|
||||||
|
~/.cargo/registry/cache/
|
||||||
|
~/.cargo/git/db/
|
||||||
|
sgl-router/target/
|
||||||
|
key: ${{ runner.os }}-cargo-${{ hashFiles('sgl-router/Cargo.lock') }}
|
||||||
|
restore-keys: |
|
||||||
|
${{ runner.os }}-cargo-
|
||||||
|
|
||||||
|
- name: Run tool parser benchmark
|
||||||
|
timeout-minutes: 30
|
||||||
run: |
|
run: |
|
||||||
source "$HOME/.cargo/env"
|
source "$HOME/.cargo/env"
|
||||||
cd sgl-router/
|
cd sgl-router/
|
||||||
# Ensure all benchmarks compile without running them
|
# Try to use sccache, but disable if it fails
|
||||||
cargo check --benches
|
if command -v sccache &> /dev/null; then
|
||||||
|
echo "Testing sccache availability..."
|
||||||
|
# Try to start sccache and check if it works
|
||||||
|
export RUSTC_WRAPPER=sccache
|
||||||
|
export SCCACHE_GHA_ENABLED="true"
|
||||||
|
if sccache --start-server 2>/dev/null && sccache --show-stats 2>/dev/null; then
|
||||||
|
echo "sccache is working, using it for compilation"
|
||||||
|
else
|
||||||
|
echo "sccache failed to start, falling back to regular cargo"
|
||||||
|
unset RUSTC_WRAPPER
|
||||||
|
unset SCCACHE_GHA_ENABLED
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "sccache not available, using regular cargo"
|
||||||
|
fi
|
||||||
|
cargo bench --bench tool_parser_benchmark
|
||||||
|
|
||||||
|
- name: Upload benchmark results
|
||||||
|
if: always()
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: tool-parser-results-${{ github.sha }}
|
||||||
|
path: |
|
||||||
|
sgl-router/target/criterion/tool_parser*/
|
||||||
|
retention-days: 30
|
||||||
|
|
||||||
|
benchmark-summary:
|
||||||
|
name: Benchmark Summary
|
||||||
|
needs: [benchmark-request-processing, benchmark-tokenizer, benchmark-tool-parser]
|
||||||
|
if: always() && (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request')
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Download all benchmark results
|
||||||
|
uses: actions/download-artifact@v4
|
||||||
|
with:
|
||||||
|
pattern: '*-results-${{ github.sha }}'
|
||||||
|
path: benchmark-results
|
||||||
|
|
||||||
|
- name: Generate summary
|
||||||
|
run: |
|
||||||
|
echo "## Benchmark Results Summary" > summary.md
|
||||||
|
echo "" >> summary.md
|
||||||
|
echo "### Request Processing" >> summary.md
|
||||||
|
if [ -d "benchmark-results/request-processing-results-${{ github.sha }}" ]; then
|
||||||
|
echo "✅ Completed" >> summary.md
|
||||||
|
else
|
||||||
|
echo "❌ Failed or skipped" >> summary.md
|
||||||
|
fi
|
||||||
|
echo "" >> summary.md
|
||||||
|
echo "### Tokenizer" >> summary.md
|
||||||
|
if [ -d "benchmark-results/tokenizer-results-${{ github.sha }}" ]; then
|
||||||
|
echo "✅ Completed" >> summary.md
|
||||||
|
else
|
||||||
|
echo "❌ Failed or skipped" >> summary.md
|
||||||
|
fi
|
||||||
|
echo "" >> summary.md
|
||||||
|
echo "### Tool Parser" >> summary.md
|
||||||
|
if [ -d "benchmark-results/tool-parser-results-${{ github.sha }}" ]; then
|
||||||
|
echo "✅ Completed" >> summary.md
|
||||||
|
else
|
||||||
|
echo "❌ Failed or skipped" >> summary.md
|
||||||
|
fi
|
||||||
|
cat summary.md
|
||||||
|
|
||||||
|
- name: Upload summary
|
||||||
|
uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: benchmark-summary-${{ github.sha }}
|
||||||
|
path: summary.md
|
||||||
|
retention-days: 30
|
||||||
|
|||||||
@@ -1,203 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
GitHub PR Comment Poster for Benchmark Results
|
|
||||||
|
|
||||||
Posts benchmark results as comments on GitHub PRs with update capability.
|
|
||||||
Replaces JavaScript logic in GitHub Actions for better maintainability.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Dict, Optional
|
|
||||||
|
|
||||||
import requests
|
|
||||||
|
|
||||||
|
|
||||||
class GitHubCommentPoster:
|
|
||||||
"""Handles posting benchmark results as GitHub PR comments."""
|
|
||||||
|
|
||||||
def __init__(self, token: str, repo_owner: str, repo_name: str):
|
|
||||||
self.token = token
|
|
||||||
self.repo_owner = repo_owner
|
|
||||||
self.repo_name = repo_name
|
|
||||||
self.base_url = f"https://api.github.com/repos/{repo_owner}/{repo_name}"
|
|
||||||
self.headers = {
|
|
||||||
"Authorization": f"token {token}",
|
|
||||||
"Accept": "application/vnd.github.v3+json",
|
|
||||||
}
|
|
||||||
|
|
||||||
def read_benchmark_results(self, results_file: str) -> Dict[str, str]:
|
|
||||||
"""Read benchmark results from file."""
|
|
||||||
results = {}
|
|
||||||
filepath = Path(results_file)
|
|
||||||
|
|
||||||
if not filepath.exists():
|
|
||||||
print(f"Results file not found: {filepath}")
|
|
||||||
return {"error": "Results file not found"}
|
|
||||||
|
|
||||||
try:
|
|
||||||
with open(filepath, "r") as f:
|
|
||||||
for line in f:
|
|
||||||
line = line.strip()
|
|
||||||
if "=" in line:
|
|
||||||
key, value = line.split("=", 1)
|
|
||||||
results[key] = value
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error reading results file: {e}")
|
|
||||||
return {"error": str(e)}
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
def format_benchmark_comment(
|
|
||||||
self, results: Dict[str, str], pr_number: int, commit_sha: str
|
|
||||||
) -> str:
|
|
||||||
"""Format benchmark results into a GitHub comment."""
|
|
||||||
serialization_time = results.get("serialization_time", "N/A")
|
|
||||||
deserialization_time = results.get("deserialization_time", "N/A")
|
|
||||||
adaptation_time = results.get("adaptation_time", "N/A")
|
|
||||||
total_time = results.get("total_time", "N/A")
|
|
||||||
|
|
||||||
comment = f"""
|
|
||||||
### SGLang Router Benchmark Results
|
|
||||||
|
|
||||||
**Performance Summary for PR #{pr_number}**
|
|
||||||
|
|
||||||
The router benchmarks have completed successfully!
|
|
||||||
|
|
||||||
**Performance Thresholds:** All passed
|
|
||||||
- Serialization: < 2μs
|
|
||||||
- Deserialization: < 2μs
|
|
||||||
- PD Adaptation: < 5μs
|
|
||||||
- Total Pipeline: < 10μs
|
|
||||||
|
|
||||||
**Measured Results:**
|
|
||||||
- Serialization: `{serialization_time}`ns
|
|
||||||
- Deserialization: `{deserialization_time}`ns
|
|
||||||
- PD Adaptation: `{adaptation_time}`ns
|
|
||||||
- Total Pipeline: `{total_time}`ns
|
|
||||||
|
|
||||||
**Detailed Reports:**
|
|
||||||
- Download the `benchmark-results-{commit_sha}` artifact for HTML reports
|
|
||||||
- Run `make bench` locally for detailed analysis
|
|
||||||
|
|
||||||
**Commit:** {commit_sha}
|
|
||||||
""".strip()
|
|
||||||
|
|
||||||
return comment
|
|
||||||
|
|
||||||
def find_existing_comment(self, pr_number: int) -> Optional[int]:
|
|
||||||
"""Find existing benchmark comment in the PR."""
|
|
||||||
url = f"{self.base_url}/issues/{pr_number}/comments"
|
|
||||||
|
|
||||||
try:
|
|
||||||
response = requests.get(url, headers=self.headers)
|
|
||||||
response.raise_for_status()
|
|
||||||
comments = response.json()
|
|
||||||
|
|
||||||
for comment in comments:
|
|
||||||
if comment.get("user", {}).get(
|
|
||||||
"login"
|
|
||||||
) == "github-actions[bot]" and "SGLang Router Benchmark Results" in comment.get(
|
|
||||||
"body", ""
|
|
||||||
):
|
|
||||||
return comment["id"]
|
|
||||||
|
|
||||||
except requests.RequestException as e:
|
|
||||||
print(f"Error fetching comments: {e}")
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
def post_comment(self, pr_number: int, comment_body: str) -> bool:
|
|
||||||
"""Post a new comment on the PR."""
|
|
||||||
url = f"{self.base_url}/issues/{pr_number}/comments"
|
|
||||||
data = {"body": comment_body}
|
|
||||||
|
|
||||||
try:
|
|
||||||
response = requests.post(url, headers=self.headers, json=data)
|
|
||||||
response.raise_for_status()
|
|
||||||
print(f"Posted new benchmark comment on PR #{pr_number}")
|
|
||||||
return True
|
|
||||||
except requests.RequestException as e:
|
|
||||||
print(f"Error posting comment: {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
def update_comment(self, comment_id: int, comment_body: str) -> bool:
|
|
||||||
"""Update an existing comment."""
|
|
||||||
url = f"{self.base_url}/issues/comments/{comment_id}"
|
|
||||||
data = {"body": comment_body}
|
|
||||||
|
|
||||||
try:
|
|
||||||
response = requests.patch(url, headers=self.headers, json=data)
|
|
||||||
response.raise_for_status()
|
|
||||||
print(f"Updated existing benchmark comment (ID: {comment_id})")
|
|
||||||
return True
|
|
||||||
except requests.RequestException as e:
|
|
||||||
print(f"Error updating comment: {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
def post_or_update_comment(
|
|
||||||
self, pr_number: int, results_file: str, commit_sha: str
|
|
||||||
) -> bool:
|
|
||||||
"""Post or update benchmark results comment on PR."""
|
|
||||||
# Read benchmark results
|
|
||||||
results = self.read_benchmark_results(results_file)
|
|
||||||
if "error" in results:
|
|
||||||
print(f"Failed to read benchmark results: {results['error']}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Format comment
|
|
||||||
comment_body = self.format_benchmark_comment(results, pr_number, commit_sha)
|
|
||||||
|
|
||||||
# Check for existing comment
|
|
||||||
existing_comment_id = self.find_existing_comment(pr_number)
|
|
||||||
|
|
||||||
if existing_comment_id:
|
|
||||||
return self.update_comment(existing_comment_id, comment_body)
|
|
||||||
else:
|
|
||||||
return self.post_comment(pr_number, comment_body)
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser(description="Post benchmark results to GitHub PR")
|
|
||||||
parser.add_argument(
|
|
||||||
"--pr-number", type=int, required=True, help="Pull request number"
|
|
||||||
)
|
|
||||||
parser.add_argument("--commit-sha", type=str, required=True, help="Commit SHA")
|
|
||||||
parser.add_argument(
|
|
||||||
"--results-file",
|
|
||||||
type=str,
|
|
||||||
default="benchmark_results.env",
|
|
||||||
help="Path to benchmark results file",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--repo-owner", type=str, default="sgl-project", help="GitHub repository owner"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--repo-name", type=str, default="sglang", help="GitHub repository name"
|
|
||||||
)
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
# Get GitHub token from environment
|
|
||||||
github_token = os.environ.get("GITHUB_TOKEN")
|
|
||||||
if not github_token:
|
|
||||||
print("Error: GITHUB_TOKEN environment variable is required")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
# Create poster and post comment
|
|
||||||
poster = GitHubCommentPoster(github_token, args.repo_owner, args.repo_name)
|
|
||||||
success = poster.post_or_update_comment(
|
|
||||||
args.pr_number, args.results_file, args.commit_sha
|
|
||||||
)
|
|
||||||
|
|
||||||
if not success:
|
|
||||||
print("Failed to post benchmark comment")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
print("Benchmark comment posted successfully!")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,228 +0,0 @@
|
|||||||
// Integration test to ensure benchmarks compile and basic functionality works
|
|
||||||
// This prevents benchmarks from breaking in CI
|
|
||||||
//
|
|
||||||
// UPDATED: Removed deprecated ToPdRequest usage, now uses direct JSON serialization
|
|
||||||
|
|
||||||
use serde_json::{from_str, to_string, to_value};
|
|
||||||
use sglang_router_rs::core::{BasicWorker, WorkerType};
|
|
||||||
use sglang_router_rs::protocols::spec::{
|
|
||||||
ChatCompletionRequest, ChatMessage, CompletionRequest, GenerateParameters, GenerateRequest,
|
|
||||||
SamplingParams, StringOrArray, UserMessageContent,
|
|
||||||
};
|
|
||||||
|
|
||||||
/// Create a default GenerateRequest for benchmarks with minimal fields set
|
|
||||||
fn default_generate_request() -> GenerateRequest {
|
|
||||||
GenerateRequest {
|
|
||||||
text: None,
|
|
||||||
prompt: None,
|
|
||||||
input_ids: None,
|
|
||||||
stream: false,
|
|
||||||
parameters: None,
|
|
||||||
sampling_params: None,
|
|
||||||
return_logprob: false,
|
|
||||||
// SGLang Extensions
|
|
||||||
lora_path: None,
|
|
||||||
session_params: None,
|
|
||||||
return_hidden_states: false,
|
|
||||||
rid: None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Create a default ChatCompletionRequest for benchmarks with minimal fields set
|
|
||||||
fn default_chat_completion_request() -> ChatCompletionRequest {
|
|
||||||
ChatCompletionRequest {
|
|
||||||
model: String::new(),
|
|
||||||
messages: vec![],
|
|
||||||
max_tokens: None,
|
|
||||||
max_completion_tokens: None,
|
|
||||||
temperature: None,
|
|
||||||
top_p: None,
|
|
||||||
n: None,
|
|
||||||
stream: false,
|
|
||||||
stream_options: None,
|
|
||||||
stop: None,
|
|
||||||
presence_penalty: None,
|
|
||||||
frequency_penalty: None,
|
|
||||||
logit_bias: None,
|
|
||||||
logprobs: false,
|
|
||||||
top_logprobs: None,
|
|
||||||
user: None,
|
|
||||||
response_format: None,
|
|
||||||
seed: None,
|
|
||||||
tools: None,
|
|
||||||
tool_choice: None,
|
|
||||||
parallel_tool_calls: None,
|
|
||||||
function_call: None,
|
|
||||||
functions: None,
|
|
||||||
// SGLang Extensions
|
|
||||||
top_k: None,
|
|
||||||
min_p: None,
|
|
||||||
min_tokens: None,
|
|
||||||
repetition_penalty: None,
|
|
||||||
regex: None,
|
|
||||||
ebnf: None,
|
|
||||||
stop_token_ids: None,
|
|
||||||
no_stop_trim: false,
|
|
||||||
ignore_eos: false,
|
|
||||||
continue_final_message: false,
|
|
||||||
skip_special_tokens: true,
|
|
||||||
// SGLang Extensions
|
|
||||||
lora_path: None,
|
|
||||||
session_params: None,
|
|
||||||
separate_reasoning: true,
|
|
||||||
stream_reasoning: true,
|
|
||||||
return_hidden_states: false,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Create a default CompletionRequest for benchmarks with minimal fields set
|
|
||||||
fn default_completion_request() -> CompletionRequest {
|
|
||||||
CompletionRequest {
|
|
||||||
model: String::new(),
|
|
||||||
prompt: StringOrArray::String(String::new()),
|
|
||||||
suffix: None,
|
|
||||||
max_tokens: None,
|
|
||||||
temperature: None,
|
|
||||||
top_p: None,
|
|
||||||
n: None,
|
|
||||||
stream: false,
|
|
||||||
stream_options: None,
|
|
||||||
logprobs: None,
|
|
||||||
echo: false,
|
|
||||||
stop: None,
|
|
||||||
presence_penalty: None,
|
|
||||||
frequency_penalty: None,
|
|
||||||
best_of: None,
|
|
||||||
logit_bias: None,
|
|
||||||
user: None,
|
|
||||||
seed: None,
|
|
||||||
// SGLang Extensions
|
|
||||||
top_k: None,
|
|
||||||
min_p: None,
|
|
||||||
min_tokens: None,
|
|
||||||
repetition_penalty: None,
|
|
||||||
regex: None,
|
|
||||||
ebnf: None,
|
|
||||||
json_schema: None,
|
|
||||||
stop_token_ids: None,
|
|
||||||
no_stop_trim: false,
|
|
||||||
ignore_eos: false,
|
|
||||||
skip_special_tokens: true,
|
|
||||||
// SGLang Extensions
|
|
||||||
lora_path: None,
|
|
||||||
session_params: None,
|
|
||||||
return_hidden_states: false,
|
|
||||||
other: serde_json::Map::new(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[allow(dead_code)]
|
|
||||||
fn create_test_worker() -> BasicWorker {
|
|
||||||
BasicWorker::new(
|
|
||||||
"http://test-server:8000".to_string(),
|
|
||||||
WorkerType::Prefill {
|
|
||||||
bootstrap_port: Some(5678),
|
|
||||||
},
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_benchmark_request_creation() {
|
|
||||||
// Ensure all benchmark request types can be created without panicking
|
|
||||||
|
|
||||||
let generate_req = GenerateRequest {
|
|
||||||
text: Some("Test prompt".to_string()),
|
|
||||||
parameters: Some(GenerateParameters {
|
|
||||||
max_new_tokens: Some(100),
|
|
||||||
temperature: Some(0.8),
|
|
||||||
top_p: Some(0.9),
|
|
||||||
top_k: Some(50),
|
|
||||||
repetition_penalty: Some(1.0),
|
|
||||||
..Default::default()
|
|
||||||
}),
|
|
||||||
sampling_params: Some(SamplingParams {
|
|
||||||
temperature: Some(0.8),
|
|
||||||
top_p: Some(0.9),
|
|
||||||
top_k: Some(50),
|
|
||||||
frequency_penalty: Some(0.0),
|
|
||||||
presence_penalty: Some(0.0),
|
|
||||||
repetition_penalty: Some(1.0),
|
|
||||||
..Default::default()
|
|
||||||
}),
|
|
||||||
..default_generate_request()
|
|
||||||
};
|
|
||||||
|
|
||||||
let chat_req = ChatCompletionRequest {
|
|
||||||
model: "test-model".to_string(),
|
|
||||||
messages: vec![ChatMessage::User {
|
|
||||||
role: "user".to_string(),
|
|
||||||
content: UserMessageContent::Text("Test message".to_string()),
|
|
||||||
name: None,
|
|
||||||
}],
|
|
||||||
max_tokens: Some(150),
|
|
||||||
max_completion_tokens: Some(150),
|
|
||||||
temperature: Some(0.7),
|
|
||||||
top_p: Some(1.0),
|
|
||||||
n: Some(1),
|
|
||||||
presence_penalty: Some(0.0),
|
|
||||||
frequency_penalty: Some(0.0),
|
|
||||||
parallel_tool_calls: Some(true),
|
|
||||||
..default_chat_completion_request()
|
|
||||||
};
|
|
||||||
|
|
||||||
let completion_req = CompletionRequest {
|
|
||||||
model: "test-model".to_string(),
|
|
||||||
prompt: StringOrArray::String("Test prompt".to_string()),
|
|
||||||
max_tokens: Some(50),
|
|
||||||
temperature: Some(0.8),
|
|
||||||
top_p: Some(1.0),
|
|
||||||
n: Some(1),
|
|
||||||
presence_penalty: Some(0.0),
|
|
||||||
frequency_penalty: Some(0.0),
|
|
||||||
best_of: Some(1),
|
|
||||||
..default_completion_request()
|
|
||||||
};
|
|
||||||
|
|
||||||
// Test serialization works
|
|
||||||
assert!(to_string(&generate_req).is_ok());
|
|
||||||
assert!(to_string(&chat_req).is_ok());
|
|
||||||
assert!(to_string(&completion_req).is_ok());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_benchmark_serialization_roundtrip() {
|
|
||||||
// Test serialization/deserialization roundtrip for benchmark types
|
|
||||||
|
|
||||||
let generate_req = GenerateRequest {
|
|
||||||
text: Some("Test prompt".to_string()),
|
|
||||||
..default_generate_request()
|
|
||||||
};
|
|
||||||
|
|
||||||
// Serialize and deserialize
|
|
||||||
let json = to_string(&generate_req).expect("Serialization should work");
|
|
||||||
let deserialized: GenerateRequest = from_str(&json).expect("Deserialization should work");
|
|
||||||
|
|
||||||
// Verify basic field equality
|
|
||||||
assert_eq!(generate_req.text, deserialized.text);
|
|
||||||
assert_eq!(generate_req.stream, deserialized.stream);
|
|
||||||
assert_eq!(generate_req.return_logprob, deserialized.return_logprob);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_benchmark_direct_json_routing() {
|
|
||||||
// Test direct JSON routing functionality for benchmark types (replaces regular routing)
|
|
||||||
|
|
||||||
let generate_req = GenerateRequest {
|
|
||||||
text: Some("Test prompt".to_string()),
|
|
||||||
..default_generate_request()
|
|
||||||
};
|
|
||||||
|
|
||||||
// Test direct JSON conversion (replaces regular routing methods)
|
|
||||||
let json = to_value(&generate_req).unwrap();
|
|
||||||
let json_string = to_string(&json).unwrap();
|
|
||||||
let bytes = json_string.as_bytes();
|
|
||||||
|
|
||||||
// Verify conversions work
|
|
||||||
assert!(!json_string.is_empty());
|
|
||||||
assert!(!bytes.is_empty());
|
|
||||||
}
|
|
||||||
Reference in New Issue
Block a user