diff --git a/.github/workflows/pr-benchmark-rust.yml b/.github/workflows/pr-benchmark-rust.yml index e34454c19..d01aadebd 100644 --- a/.github/workflows/pr-benchmark-rust.yml +++ b/.github/workflows/pr-benchmark-rust.yml @@ -9,6 +9,7 @@ on: branches: [ main ] paths: - "sgl-router/**" + types: [opened, synchronize, reopened, labeled] workflow_dispatch: concurrency: @@ -19,9 +20,67 @@ permissions: pull-requests: write issues: write jobs: - benchmark-router: + # Quick check job that always runs on PRs + benchmark-compile-check: + name: Benchmark Compilation Check if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Install dependencies + run: | + bash scripts/ci/ci_install_rust.sh + + - name: Setup sccache + uses: mozilla-actions/sccache-action@v0.0.3 + continue-on-error: true + + - name: Cache Rust dependencies + uses: actions/cache@v4 + with: + path: | + ~/.cargo/bin/ + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + sgl-router/target/ + key: ${{ runner.os }}-cargo-${{ hashFiles('sgl-router/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo- + + - name: Check benchmarks compile + run: | + source "$HOME/.cargo/env" + cd sgl-router/ + # Try to use sccache, but disable if it fails + if command -v sccache &> /dev/null; then + echo "Testing sccache availability..." + # Try to start sccache and check if it works + export RUSTC_WRAPPER=sccache + export SCCACHE_GHA_ENABLED="true" + if sccache --start-server 2>/dev/null && sccache --show-stats 2>/dev/null; then + echo "sccache is working, using it for compilation" + else + echo "sccache failed to start, falling back to regular cargo" + unset RUSTC_WRAPPER + unset SCCACHE_GHA_ENABLED + fi + else + echo "sccache not available, using regular cargo" + fi + cargo check --benches + + # Full benchmark jobs that only run with label or on main branch + benchmark-request-processing: + name: Request Processing Benchmark + if: | + github.repository == 'sgl-project/sglang' && + (github.event_name == 'push' || + github.event_name == 'workflow_dispatch' || + contains(github.event.pull_request.labels.*.name, 'benchmark')) + runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v4 @@ -33,6 +92,10 @@ jobs: run: | bash scripts/ci/ci_install_rust.sh + - name: Setup sccache + uses: mozilla-actions/sccache-action@v0.0.3 + continue-on-error: true + - name: Cache Rust dependencies uses: actions/cache@v4 with: @@ -46,40 +109,61 @@ jobs: restore-keys: | ${{ runner.os }}-cargo- - - name: Build router in release mode + - name: Run request processing benchmark + timeout-minutes: 30 run: | source "$HOME/.cargo/env" cd sgl-router/ - cargo build --release - - - name: Run quick benchmarks - timeout-minutes: 15 - run: | - source "$HOME/.cargo/env" - cd sgl-router/ - # Run quick benchmarks for PR validation using Python script - python3 scripts/run_benchmarks.py --quick --validate-thresholds --save-results + # Try to use sccache, but disable if it fails + if command -v sccache &> /dev/null; then + echo "Testing sccache availability..." + # Try to start sccache and check if it works + export RUSTC_WRAPPER=sccache + export SCCACHE_GHA_ENABLED="true" + if sccache --start-server 2>/dev/null && sccache --show-stats 2>/dev/null; then + echo "sccache is working, using it for compilation" + else + echo "sccache failed to start, falling back to regular cargo" + unset RUSTC_WRAPPER + unset SCCACHE_GHA_ENABLED + fi + else + echo "sccache not available, using regular cargo" + fi + # Run only the summary benchmark for quick validation in PRs + cargo bench --bench request_processing -- benchmark_summary --exact - name: Upload benchmark results if: always() uses: actions/upload-artifact@v4 with: - name: benchmark-results-${{ github.sha }} + name: request-processing-results-${{ github.sha }} path: | - sgl-router/target/criterion/ + sgl-router/target/criterion/benchmark_summary/ retention-days: 30 - benchmark-integration-test: - if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request' + benchmark-tokenizer: + name: Tokenizer Benchmark + if: | + github.repository == 'sgl-project/sglang' && + (github.event_name == 'push' || + github.event_name == 'workflow_dispatch' || + contains(github.event.pull_request.labels.*.name, 'benchmark')) runs-on: ubuntu-latest steps: - name: Checkout code uses: actions/checkout@v4 + with: + fetch-depth: 100 - name: Install dependencies run: | bash scripts/ci/ci_install_rust.sh + - name: Setup sccache + uses: mozilla-actions/sccache-action@v0.0.3 + continue-on-error: true + - name: Cache Rust dependencies uses: actions/cache@v4 with: @@ -93,17 +177,146 @@ jobs: restore-keys: | ${{ runner.os }}-cargo- - - name: Run benchmark integration tests - timeout-minutes: 10 + - name: Run tokenizer benchmark + timeout-minutes: 30 run: | source "$HOME/.cargo/env" cd sgl-router/ - # Run integration tests to ensure benchmark code compiles and works - cargo test --test benchmark_integration + # Try to use sccache, but disable if it fails + if command -v sccache &> /dev/null; then + echo "Testing sccache availability..." + # Try to start sccache and check if it works + export RUSTC_WRAPPER=sccache + export SCCACHE_GHA_ENABLED="true" + if sccache --start-server 2>/dev/null && sccache --show-stats 2>/dev/null; then + echo "sccache is working, using it for compilation" + else + echo "sccache failed to start, falling back to regular cargo" + unset RUSTC_WRAPPER + unset SCCACHE_GHA_ENABLED + fi + else + echo "sccache not available, using regular cargo" + fi + cargo bench --bench tokenizer_benchmark - - name: Verify benchmark compilation + - name: Upload benchmark results + if: always() + uses: actions/upload-artifact@v4 + with: + name: tokenizer-results-${{ github.sha }} + path: | + sgl-router/target/criterion/tokenizer*/ + retention-days: 30 + + benchmark-tool-parser: + name: Tool Parser Benchmark + if: | + github.repository == 'sgl-project/sglang' && + (github.event_name == 'push' || + github.event_name == 'workflow_dispatch' || + contains(github.event.pull_request.labels.*.name, 'benchmark')) + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 100 + + - name: Install dependencies + run: | + bash scripts/ci/ci_install_rust.sh + + - name: Setup sccache + uses: mozilla-actions/sccache-action@v0.0.3 + continue-on-error: true + + - name: Cache Rust dependencies + uses: actions/cache@v4 + with: + path: | + ~/.cargo/bin/ + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + sgl-router/target/ + key: ${{ runner.os }}-cargo-${{ hashFiles('sgl-router/Cargo.lock') }} + restore-keys: | + ${{ runner.os }}-cargo- + + - name: Run tool parser benchmark + timeout-minutes: 30 run: | source "$HOME/.cargo/env" cd sgl-router/ - # Ensure all benchmarks compile without running them - cargo check --benches + # Try to use sccache, but disable if it fails + if command -v sccache &> /dev/null; then + echo "Testing sccache availability..." + # Try to start sccache and check if it works + export RUSTC_WRAPPER=sccache + export SCCACHE_GHA_ENABLED="true" + if sccache --start-server 2>/dev/null && sccache --show-stats 2>/dev/null; then + echo "sccache is working, using it for compilation" + else + echo "sccache failed to start, falling back to regular cargo" + unset RUSTC_WRAPPER + unset SCCACHE_GHA_ENABLED + fi + else + echo "sccache not available, using regular cargo" + fi + cargo bench --bench tool_parser_benchmark + + - name: Upload benchmark results + if: always() + uses: actions/upload-artifact@v4 + with: + name: tool-parser-results-${{ github.sha }} + path: | + sgl-router/target/criterion/tool_parser*/ + retention-days: 30 + + benchmark-summary: + name: Benchmark Summary + needs: [benchmark-request-processing, benchmark-tokenizer, benchmark-tool-parser] + if: always() && (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') + runs-on: ubuntu-latest + steps: + - name: Download all benchmark results + uses: actions/download-artifact@v4 + with: + pattern: '*-results-${{ github.sha }}' + path: benchmark-results + + - name: Generate summary + run: | + echo "## Benchmark Results Summary" > summary.md + echo "" >> summary.md + echo "### Request Processing" >> summary.md + if [ -d "benchmark-results/request-processing-results-${{ github.sha }}" ]; then + echo "✅ Completed" >> summary.md + else + echo "❌ Failed or skipped" >> summary.md + fi + echo "" >> summary.md + echo "### Tokenizer" >> summary.md + if [ -d "benchmark-results/tokenizer-results-${{ github.sha }}" ]; then + echo "✅ Completed" >> summary.md + else + echo "❌ Failed or skipped" >> summary.md + fi + echo "" >> summary.md + echo "### Tool Parser" >> summary.md + if [ -d "benchmark-results/tool-parser-results-${{ github.sha }}" ]; then + echo "✅ Completed" >> summary.md + else + echo "❌ Failed or skipped" >> summary.md + fi + cat summary.md + + - name: Upload summary + uses: actions/upload-artifact@v4 + with: + name: benchmark-summary-${{ github.sha }} + path: summary.md + retention-days: 30 diff --git a/sgl-router/scripts/post_benchmark_comment.py b/sgl-router/scripts/post_benchmark_comment.py deleted file mode 100755 index 402a0b5bf..000000000 --- a/sgl-router/scripts/post_benchmark_comment.py +++ /dev/null @@ -1,203 +0,0 @@ -#!/usr/bin/env python3 -""" -GitHub PR Comment Poster for Benchmark Results - -Posts benchmark results as comments on GitHub PRs with update capability. -Replaces JavaScript logic in GitHub Actions for better maintainability. -""" - -import argparse -import os -import sys -from pathlib import Path -from typing import Dict, Optional - -import requests - - -class GitHubCommentPoster: - """Handles posting benchmark results as GitHub PR comments.""" - - def __init__(self, token: str, repo_owner: str, repo_name: str): - self.token = token - self.repo_owner = repo_owner - self.repo_name = repo_name - self.base_url = f"https://api.github.com/repos/{repo_owner}/{repo_name}" - self.headers = { - "Authorization": f"token {token}", - "Accept": "application/vnd.github.v3+json", - } - - def read_benchmark_results(self, results_file: str) -> Dict[str, str]: - """Read benchmark results from file.""" - results = {} - filepath = Path(results_file) - - if not filepath.exists(): - print(f"Results file not found: {filepath}") - return {"error": "Results file not found"} - - try: - with open(filepath, "r") as f: - for line in f: - line = line.strip() - if "=" in line: - key, value = line.split("=", 1) - results[key] = value - except Exception as e: - print(f"Error reading results file: {e}") - return {"error": str(e)} - - return results - - def format_benchmark_comment( - self, results: Dict[str, str], pr_number: int, commit_sha: str - ) -> str: - """Format benchmark results into a GitHub comment.""" - serialization_time = results.get("serialization_time", "N/A") - deserialization_time = results.get("deserialization_time", "N/A") - adaptation_time = results.get("adaptation_time", "N/A") - total_time = results.get("total_time", "N/A") - - comment = f""" -### SGLang Router Benchmark Results - -**Performance Summary for PR #{pr_number}** - -The router benchmarks have completed successfully! - -**Performance Thresholds:** All passed -- Serialization: < 2μs -- Deserialization: < 2μs -- PD Adaptation: < 5μs -- Total Pipeline: < 10μs - -**Measured Results:** -- Serialization: `{serialization_time}`ns -- Deserialization: `{deserialization_time}`ns -- PD Adaptation: `{adaptation_time}`ns -- Total Pipeline: `{total_time}`ns - -**Detailed Reports:** -- Download the `benchmark-results-{commit_sha}` artifact for HTML reports -- Run `make bench` locally for detailed analysis - -**Commit:** {commit_sha} -""".strip() - - return comment - - def find_existing_comment(self, pr_number: int) -> Optional[int]: - """Find existing benchmark comment in the PR.""" - url = f"{self.base_url}/issues/{pr_number}/comments" - - try: - response = requests.get(url, headers=self.headers) - response.raise_for_status() - comments = response.json() - - for comment in comments: - if comment.get("user", {}).get( - "login" - ) == "github-actions[bot]" and "SGLang Router Benchmark Results" in comment.get( - "body", "" - ): - return comment["id"] - - except requests.RequestException as e: - print(f"Error fetching comments: {e}") - - return None - - def post_comment(self, pr_number: int, comment_body: str) -> bool: - """Post a new comment on the PR.""" - url = f"{self.base_url}/issues/{pr_number}/comments" - data = {"body": comment_body} - - try: - response = requests.post(url, headers=self.headers, json=data) - response.raise_for_status() - print(f"Posted new benchmark comment on PR #{pr_number}") - return True - except requests.RequestException as e: - print(f"Error posting comment: {e}") - return False - - def update_comment(self, comment_id: int, comment_body: str) -> bool: - """Update an existing comment.""" - url = f"{self.base_url}/issues/comments/{comment_id}" - data = {"body": comment_body} - - try: - response = requests.patch(url, headers=self.headers, json=data) - response.raise_for_status() - print(f"Updated existing benchmark comment (ID: {comment_id})") - return True - except requests.RequestException as e: - print(f"Error updating comment: {e}") - return False - - def post_or_update_comment( - self, pr_number: int, results_file: str, commit_sha: str - ) -> bool: - """Post or update benchmark results comment on PR.""" - # Read benchmark results - results = self.read_benchmark_results(results_file) - if "error" in results: - print(f"Failed to read benchmark results: {results['error']}") - return False - - # Format comment - comment_body = self.format_benchmark_comment(results, pr_number, commit_sha) - - # Check for existing comment - existing_comment_id = self.find_existing_comment(pr_number) - - if existing_comment_id: - return self.update_comment(existing_comment_id, comment_body) - else: - return self.post_comment(pr_number, comment_body) - - -def main(): - parser = argparse.ArgumentParser(description="Post benchmark results to GitHub PR") - parser.add_argument( - "--pr-number", type=int, required=True, help="Pull request number" - ) - parser.add_argument("--commit-sha", type=str, required=True, help="Commit SHA") - parser.add_argument( - "--results-file", - type=str, - default="benchmark_results.env", - help="Path to benchmark results file", - ) - parser.add_argument( - "--repo-owner", type=str, default="sgl-project", help="GitHub repository owner" - ) - parser.add_argument( - "--repo-name", type=str, default="sglang", help="GitHub repository name" - ) - - args = parser.parse_args() - - # Get GitHub token from environment - github_token = os.environ.get("GITHUB_TOKEN") - if not github_token: - print("Error: GITHUB_TOKEN environment variable is required") - sys.exit(1) - - # Create poster and post comment - poster = GitHubCommentPoster(github_token, args.repo_owner, args.repo_name) - success = poster.post_or_update_comment( - args.pr_number, args.results_file, args.commit_sha - ) - - if not success: - print("Failed to post benchmark comment") - sys.exit(1) - - print("Benchmark comment posted successfully!") - - -if __name__ == "__main__": - main() diff --git a/sgl-router/tests/benchmark_integration.rs b/sgl-router/tests/benchmark_integration.rs deleted file mode 100644 index e40ca08ab..000000000 --- a/sgl-router/tests/benchmark_integration.rs +++ /dev/null @@ -1,228 +0,0 @@ -// Integration test to ensure benchmarks compile and basic functionality works -// This prevents benchmarks from breaking in CI -// -// UPDATED: Removed deprecated ToPdRequest usage, now uses direct JSON serialization - -use serde_json::{from_str, to_string, to_value}; -use sglang_router_rs::core::{BasicWorker, WorkerType}; -use sglang_router_rs::protocols::spec::{ - ChatCompletionRequest, ChatMessage, CompletionRequest, GenerateParameters, GenerateRequest, - SamplingParams, StringOrArray, UserMessageContent, -}; - -/// Create a default GenerateRequest for benchmarks with minimal fields set -fn default_generate_request() -> GenerateRequest { - GenerateRequest { - text: None, - prompt: None, - input_ids: None, - stream: false, - parameters: None, - sampling_params: None, - return_logprob: false, - // SGLang Extensions - lora_path: None, - session_params: None, - return_hidden_states: false, - rid: None, - } -} - -/// Create a default ChatCompletionRequest for benchmarks with minimal fields set -fn default_chat_completion_request() -> ChatCompletionRequest { - ChatCompletionRequest { - model: String::new(), - messages: vec![], - max_tokens: None, - max_completion_tokens: None, - temperature: None, - top_p: None, - n: None, - stream: false, - stream_options: None, - stop: None, - presence_penalty: None, - frequency_penalty: None, - logit_bias: None, - logprobs: false, - top_logprobs: None, - user: None, - response_format: None, - seed: None, - tools: None, - tool_choice: None, - parallel_tool_calls: None, - function_call: None, - functions: None, - // SGLang Extensions - top_k: None, - min_p: None, - min_tokens: None, - repetition_penalty: None, - regex: None, - ebnf: None, - stop_token_ids: None, - no_stop_trim: false, - ignore_eos: false, - continue_final_message: false, - skip_special_tokens: true, - // SGLang Extensions - lora_path: None, - session_params: None, - separate_reasoning: true, - stream_reasoning: true, - return_hidden_states: false, - } -} - -/// Create a default CompletionRequest for benchmarks with minimal fields set -fn default_completion_request() -> CompletionRequest { - CompletionRequest { - model: String::new(), - prompt: StringOrArray::String(String::new()), - suffix: None, - max_tokens: None, - temperature: None, - top_p: None, - n: None, - stream: false, - stream_options: None, - logprobs: None, - echo: false, - stop: None, - presence_penalty: None, - frequency_penalty: None, - best_of: None, - logit_bias: None, - user: None, - seed: None, - // SGLang Extensions - top_k: None, - min_p: None, - min_tokens: None, - repetition_penalty: None, - regex: None, - ebnf: None, - json_schema: None, - stop_token_ids: None, - no_stop_trim: false, - ignore_eos: false, - skip_special_tokens: true, - // SGLang Extensions - lora_path: None, - session_params: None, - return_hidden_states: false, - other: serde_json::Map::new(), - } -} - -#[allow(dead_code)] -fn create_test_worker() -> BasicWorker { - BasicWorker::new( - "http://test-server:8000".to_string(), - WorkerType::Prefill { - bootstrap_port: Some(5678), - }, - ) -} - -#[test] -fn test_benchmark_request_creation() { - // Ensure all benchmark request types can be created without panicking - - let generate_req = GenerateRequest { - text: Some("Test prompt".to_string()), - parameters: Some(GenerateParameters { - max_new_tokens: Some(100), - temperature: Some(0.8), - top_p: Some(0.9), - top_k: Some(50), - repetition_penalty: Some(1.0), - ..Default::default() - }), - sampling_params: Some(SamplingParams { - temperature: Some(0.8), - top_p: Some(0.9), - top_k: Some(50), - frequency_penalty: Some(0.0), - presence_penalty: Some(0.0), - repetition_penalty: Some(1.0), - ..Default::default() - }), - ..default_generate_request() - }; - - let chat_req = ChatCompletionRequest { - model: "test-model".to_string(), - messages: vec![ChatMessage::User { - role: "user".to_string(), - content: UserMessageContent::Text("Test message".to_string()), - name: None, - }], - max_tokens: Some(150), - max_completion_tokens: Some(150), - temperature: Some(0.7), - top_p: Some(1.0), - n: Some(1), - presence_penalty: Some(0.0), - frequency_penalty: Some(0.0), - parallel_tool_calls: Some(true), - ..default_chat_completion_request() - }; - - let completion_req = CompletionRequest { - model: "test-model".to_string(), - prompt: StringOrArray::String("Test prompt".to_string()), - max_tokens: Some(50), - temperature: Some(0.8), - top_p: Some(1.0), - n: Some(1), - presence_penalty: Some(0.0), - frequency_penalty: Some(0.0), - best_of: Some(1), - ..default_completion_request() - }; - - // Test serialization works - assert!(to_string(&generate_req).is_ok()); - assert!(to_string(&chat_req).is_ok()); - assert!(to_string(&completion_req).is_ok()); -} - -#[test] -fn test_benchmark_serialization_roundtrip() { - // Test serialization/deserialization roundtrip for benchmark types - - let generate_req = GenerateRequest { - text: Some("Test prompt".to_string()), - ..default_generate_request() - }; - - // Serialize and deserialize - let json = to_string(&generate_req).expect("Serialization should work"); - let deserialized: GenerateRequest = from_str(&json).expect("Deserialization should work"); - - // Verify basic field equality - assert_eq!(generate_req.text, deserialized.text); - assert_eq!(generate_req.stream, deserialized.stream); - assert_eq!(generate_req.return_logprob, deserialized.return_logprob); -} - -#[test] -fn test_benchmark_direct_json_routing() { - // Test direct JSON routing functionality for benchmark types (replaces regular routing) - - let generate_req = GenerateRequest { - text: Some("Test prompt".to_string()), - ..default_generate_request() - }; - - // Test direct JSON conversion (replaces regular routing methods) - let json = to_value(&generate_req).unwrap(); - let json_string = to_string(&json).unwrap(); - let bytes = json_string.as_bytes(); - - // Verify conversions work - assert!(!json_string.is_empty()); - assert!(!bytes.is_empty()); -}