[router] include rust benchamrks (#9932)

2025-09-02 09:32:09 -07:00
parent f64b8e3e4e
commit 9491d6e554
3 changed files with 235 additions and 453 deletions
--- a/.github/workflows/pr-benchmark-rust.yml
+++ b/.github/workflows/pr-benchmark-rust.yml
@@ -9,6 +9,7 @@ on:
    branches: [ main ]
    paths:
      - "sgl-router/**"
    types: [opened, synchronize, reopened, labeled]
  workflow_dispatch:
 concurrency:
@@ -19,9 +20,67 @@ permissions:
  pull-requests: write
  issues: write
 jobs:
-  benchmark-router:
+  # Quick check job that always runs on PRs
  benchmark-compile-check:
    name: Benchmark Compilation Check
    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
      - name: Install dependencies
        run: |
          bash scripts/ci/ci_install_rust.sh
      - name: Setup sccache
        uses: mozilla-actions/sccache-action@v0.0.3
        continue-on-error: true
      - name: Cache Rust dependencies
        uses: actions/cache@v4
        with:
          path: |
            ~/.cargo/bin/
            ~/.cargo/registry/index/
            ~/.cargo/registry/cache/
            ~/.cargo/git/db/
            sgl-router/target/
          key: ${{ runner.os }}-cargo-${{ hashFiles('sgl-router/Cargo.lock') }}
          restore-keys: |
            ${{ runner.os }}-cargo-
      - name: Check benchmarks compile
        run: |
          source "$HOME/.cargo/env"
          cd sgl-router/
          # Try to use sccache, but disable if it fails
          if command -v sccache &> /dev/null; then
            echo "Testing sccache availability..."
            # Try to start sccache and check if it works
            export RUSTC_WRAPPER=sccache
            export SCCACHE_GHA_ENABLED="true"
            if sccache --start-server 2>/dev/null && sccache --show-stats 2>/dev/null; then
              echo "sccache is working, using it for compilation"
            else
              echo "sccache failed to start, falling back to regular cargo"
              unset RUSTC_WRAPPER
              unset SCCACHE_GHA_ENABLED
            fi
          else
            echo "sccache not available, using regular cargo"
          fi
          cargo check --benches
  # Full benchmark jobs that only run with label or on main branch
  benchmark-request-processing:
    name: Request Processing Benchmark
    if: |
      github.repository == 'sgl-project/sglang' &&
      (github.event_name == 'push' ||
       github.event_name == 'workflow_dispatch' ||
       contains(github.event.pull_request.labels.*.name, 'benchmark'))
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
@@ -33,6 +92,10 @@ jobs:
        run: |
          bash scripts/ci/ci_install_rust.sh
      - name: Setup sccache
        uses: mozilla-actions/sccache-action@v0.0.3
        continue-on-error: true
      - name: Cache Rust dependencies
        uses: actions/cache@v4
        with:
@@ -46,40 +109,61 @@ jobs:
          restore-keys: |
            ${{ runner.os }}-cargo-
-      - name: Build router in release mode
+      - name: Run request processing benchmark
        timeout-minutes: 30
        run: |
          source "$HOME/.cargo/env"
          cd sgl-router/
-          cargo build --release
+          # Try to use sccache, but disable if it fails
-
+          if command -v sccache &> /dev/null; then
-      - name: Run quick benchmarks
+            echo "Testing sccache availability..."
-        timeout-minutes: 15
+            # Try to start sccache and check if it works
-        run: |
+            export RUSTC_WRAPPER=sccache
-          source "$HOME/.cargo/env"
+            export SCCACHE_GHA_ENABLED="true"
-          cd sgl-router/
+            if sccache --start-server 2>/dev/null && sccache --show-stats 2>/dev/null; then
-          # Run quick benchmarks for PR validation using Python script
+              echo "sccache is working, using it for compilation"
-          python3 scripts/run_benchmarks.py --quick --validate-thresholds --save-results
+            else
              echo "sccache failed to start, falling back to regular cargo"
              unset RUSTC_WRAPPER
              unset SCCACHE_GHA_ENABLED
            fi
          else
            echo "sccache not available, using regular cargo"
          fi
          # Run only the summary benchmark for quick validation in PRs
          cargo bench --bench request_processing -- benchmark_summary --exact
      - name: Upload benchmark results
        if: always()
        uses: actions/upload-artifact@v4
        with:
-          name: benchmark-results-${{ github.sha }}
+          name: request-processing-results-${{ github.sha }}
          path: |
-            sgl-router/target/criterion/
+            sgl-router/target/criterion/benchmark_summary/
          retention-days: 30
-  benchmark-integration-test:
+  benchmark-tokenizer:
-    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
+    name: Tokenizer Benchmark
    if: |
      github.repository == 'sgl-project/sglang' &&
      (github.event_name == 'push' ||
       github.event_name == 'workflow_dispatch' ||
       contains(github.event.pull_request.labels.*.name, 'benchmark'))
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          fetch-depth: 100
      - name: Install dependencies
        run: |
          bash scripts/ci/ci_install_rust.sh
      - name: Setup sccache
        uses: mozilla-actions/sccache-action@v0.0.3
        continue-on-error: true
      - name: Cache Rust dependencies
        uses: actions/cache@v4
        with:
@@ -93,17 +177,146 @@ jobs:
          restore-keys: |
            ${{ runner.os }}-cargo-
-      - name: Run benchmark integration tests
+      - name: Run tokenizer benchmark
-        timeout-minutes: 10
+        timeout-minutes: 30
        run: |
          source "$HOME/.cargo/env"
          cd sgl-router/
-          # Run integration tests to ensure benchmark code compiles and works
+          # Try to use sccache, but disable if it fails
-          cargo test --test benchmark_integration
+          if command -v sccache &> /dev/null; then
            echo "Testing sccache availability..."
            # Try to start sccache and check if it works
            export RUSTC_WRAPPER=sccache
            export SCCACHE_GHA_ENABLED="true"
            if sccache --start-server 2>/dev/null && sccache --show-stats 2>/dev/null; then
              echo "sccache is working, using it for compilation"
            else
              echo "sccache failed to start, falling back to regular cargo"
              unset RUSTC_WRAPPER
              unset SCCACHE_GHA_ENABLED
            fi
          else
            echo "sccache not available, using regular cargo"
          fi
          cargo bench --bench tokenizer_benchmark
-      - name: Verify benchmark compilation
+      - name: Upload benchmark results
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: tokenizer-results-${{ github.sha }}
          path: |
            sgl-router/target/criterion/tokenizer*/
          retention-days: 30
  benchmark-tool-parser:
    name: Tool Parser Benchmark
    if: |
      github.repository == 'sgl-project/sglang' &&
      (github.event_name == 'push' ||
       github.event_name == 'workflow_dispatch' ||
       contains(github.event.pull_request.labels.*.name, 'benchmark'))
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          fetch-depth: 100
      - name: Install dependencies
        run: |
          bash scripts/ci/ci_install_rust.sh
      - name: Setup sccache
        uses: mozilla-actions/sccache-action@v0.0.3
        continue-on-error: true
      - name: Cache Rust dependencies
        uses: actions/cache@v4
        with:
          path: |
            ~/.cargo/bin/
            ~/.cargo/registry/index/
            ~/.cargo/registry/cache/
            ~/.cargo/git/db/
            sgl-router/target/
          key: ${{ runner.os }}-cargo-${{ hashFiles('sgl-router/Cargo.lock') }}
          restore-keys: |
            ${{ runner.os }}-cargo-
      - name: Run tool parser benchmark
        timeout-minutes: 30
        run: |
          source "$HOME/.cargo/env"
          cd sgl-router/
-          # Ensure all benchmarks compile without running them
+          # Try to use sccache, but disable if it fails
-          cargo check --benches
+          if command -v sccache &> /dev/null; then
            echo "Testing sccache availability..."
            # Try to start sccache and check if it works
            export RUSTC_WRAPPER=sccache
            export SCCACHE_GHA_ENABLED="true"
            if sccache --start-server 2>/dev/null && sccache --show-stats 2>/dev/null; then
              echo "sccache is working, using it for compilation"
            else
              echo "sccache failed to start, falling back to regular cargo"
              unset RUSTC_WRAPPER
              unset SCCACHE_GHA_ENABLED
            fi
          else
            echo "sccache not available, using regular cargo"
          fi
          cargo bench --bench tool_parser_benchmark
      - name: Upload benchmark results
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: tool-parser-results-${{ github.sha }}
          path: |
            sgl-router/target/criterion/tool_parser*/
          retention-days: 30
  benchmark-summary:
    name: Benchmark Summary
    needs: [benchmark-request-processing, benchmark-tokenizer, benchmark-tool-parser]
    if: always() && (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request')
    runs-on: ubuntu-latest
    steps:
      - name: Download all benchmark results
        uses: actions/download-artifact@v4
        with:
          pattern: '*-results-${{ github.sha }}'
          path: benchmark-results
      - name: Generate summary
        run: |
          echo "## Benchmark Results Summary" > summary.md
          echo "" >> summary.md
          echo "### Request Processing" >> summary.md
          if [ -d "benchmark-results/request-processing-results-${{ github.sha }}" ]; then
            echo "✅ Completed" >> summary.md
          else
            echo "❌ Failed or skipped" >> summary.md
          fi
          echo "" >> summary.md
          echo "### Tokenizer" >> summary.md
          if [ -d "benchmark-results/tokenizer-results-${{ github.sha }}" ]; then
            echo "✅ Completed" >> summary.md
          else
            echo "❌ Failed or skipped" >> summary.md
          fi
          echo "" >> summary.md
          echo "### Tool Parser" >> summary.md
          if [ -d "benchmark-results/tool-parser-results-${{ github.sha }}" ]; then
            echo "✅ Completed" >> summary.md
          else
            echo "❌ Failed or skipped" >> summary.md
          fi
          cat summary.md
      - name: Upload summary
        uses: actions/upload-artifact@v4
        with:
          name: benchmark-summary-${{ github.sha }}
          path: summary.md
          retention-days: 30
--- a/sgl-router/scripts/post_benchmark_comment.py
+++ b/sgl-router/scripts/post_benchmark_comment.py
@@ -1,203 +0,0 @@
 #!/usr/bin/env python3
 """
 GitHub PR Comment Poster for Benchmark Results
 Posts benchmark results as comments on GitHub PRs with update capability.
 Replaces JavaScript logic in GitHub Actions for better maintainability.
 """
 import argparse
 import os
 import sys
 from pathlib import Path
 from typing import Dict, Optional
 import requests
 class GitHubCommentPoster:
    """Handles posting benchmark results as GitHub PR comments."""
    def __init__(self, token: str, repo_owner: str, repo_name: str):
        self.token = token
        self.repo_owner = repo_owner
        self.repo_name = repo_name
        self.base_url = f"https://api.github.com/repos/{repo_owner}/{repo_name}"
        self.headers = {
            "Authorization": f"token {token}",
            "Accept": "application/vnd.github.v3+json",
        }
    def read_benchmark_results(self, results_file: str) -> Dict[str, str]:
        """Read benchmark results from file."""
        results = {}
        filepath = Path(results_file)
        if not filepath.exists():
            print(f"Results file not found: {filepath}")
            return {"error": "Results file not found"}
        try:
            with open(filepath, "r") as f:
                for line in f:
                    line = line.strip()
                    if "=" in line:
                        key, value = line.split("=", 1)
                        results[key] = value
        except Exception as e:
            print(f"Error reading results file: {e}")
            return {"error": str(e)}
        return results
    def format_benchmark_comment(
        self, results: Dict[str, str], pr_number: int, commit_sha: str
    ) -> str:
        """Format benchmark results into a GitHub comment."""
        serialization_time = results.get("serialization_time", "N/A")
        deserialization_time = results.get("deserialization_time", "N/A")
        adaptation_time = results.get("adaptation_time", "N/A")
        total_time = results.get("total_time", "N/A")
        comment = f"""
 ### SGLang Router Benchmark Results
 **Performance Summary for PR #{pr_number}**
 The router benchmarks have completed successfully!
 **Performance Thresholds:** All passed
 - Serialization: < 2μs
 - Deserialization: < 2μs
 - PD Adaptation: < 5μs
 - Total Pipeline: < 10μs
 **Measured Results:**
 - Serialization: `{serialization_time}`ns
 - Deserialization: `{deserialization_time}`ns
 - PD Adaptation: `{adaptation_time}`ns
 - Total Pipeline: `{total_time}`ns
 **Detailed Reports:**
 - Download the `benchmark-results-{commit_sha}` artifact for HTML reports
 - Run `make bench` locally for detailed analysis
 **Commit:** {commit_sha}
 """.strip()
        return comment
    def find_existing_comment(self, pr_number: int) -> Optional[int]:
        """Find existing benchmark comment in the PR."""
        url = f"{self.base_url}/issues/{pr_number}/comments"
        try:
            response = requests.get(url, headers=self.headers)
            response.raise_for_status()
            comments = response.json()
            for comment in comments:
                if comment.get("user", {}).get(
                    "login"
                ) == "github-actions[bot]" and "SGLang Router Benchmark Results" in comment.get(
                    "body", ""
                ):
                    return comment["id"]
        except requests.RequestException as e:
            print(f"Error fetching comments: {e}")
        return None
    def post_comment(self, pr_number: int, comment_body: str) -> bool:
        """Post a new comment on the PR."""
        url = f"{self.base_url}/issues/{pr_number}/comments"
        data = {"body": comment_body}
        try:
            response = requests.post(url, headers=self.headers, json=data)
            response.raise_for_status()
            print(f"Posted new benchmark comment on PR #{pr_number}")
            return True
        except requests.RequestException as e:
            print(f"Error posting comment: {e}")
            return False
    def update_comment(self, comment_id: int, comment_body: str) -> bool:
        """Update an existing comment."""
        url = f"{self.base_url}/issues/comments/{comment_id}"
        data = {"body": comment_body}
        try:
            response = requests.patch(url, headers=self.headers, json=data)
            response.raise_for_status()
            print(f"Updated existing benchmark comment (ID: {comment_id})")
            return True
        except requests.RequestException as e:
            print(f"Error updating comment: {e}")
            return False
    def post_or_update_comment(
        self, pr_number: int, results_file: str, commit_sha: str
    ) -> bool:
        """Post or update benchmark results comment on PR."""
        # Read benchmark results
        results = self.read_benchmark_results(results_file)
        if "error" in results:
            print(f"Failed to read benchmark results: {results['error']}")
            return False
        # Format comment
        comment_body = self.format_benchmark_comment(results, pr_number, commit_sha)
        # Check for existing comment
        existing_comment_id = self.find_existing_comment(pr_number)
        if existing_comment_id:
            return self.update_comment(existing_comment_id, comment_body)
        else:
            return self.post_comment(pr_number, comment_body)
 def main():
    parser = argparse.ArgumentParser(description="Post benchmark results to GitHub PR")
    parser.add_argument(
        "--pr-number", type=int, required=True, help="Pull request number"
    )
    parser.add_argument("--commit-sha", type=str, required=True, help="Commit SHA")
    parser.add_argument(
        "--results-file",
        type=str,
        default="benchmark_results.env",
        help="Path to benchmark results file",
    )
    parser.add_argument(
        "--repo-owner", type=str, default="sgl-project", help="GitHub repository owner"
    )
    parser.add_argument(
        "--repo-name", type=str, default="sglang", help="GitHub repository name"
    )
    args = parser.parse_args()
    # Get GitHub token from environment
    github_token = os.environ.get("GITHUB_TOKEN")
    if not github_token:
        print("Error: GITHUB_TOKEN environment variable is required")
        sys.exit(1)
    # Create poster and post comment
    poster = GitHubCommentPoster(github_token, args.repo_owner, args.repo_name)
    success = poster.post_or_update_comment(
        args.pr_number, args.results_file, args.commit_sha
    )
    if not success:
        print("Failed to post benchmark comment")
        sys.exit(1)
    print("Benchmark comment posted successfully!")
 if __name__ == "__main__":
    main()
--- a/sgl-router/tests/benchmark_integration.rs
+++ b/sgl-router/tests/benchmark_integration.rs
@@ -1,228 +0,0 @@
 // Integration test to ensure benchmarks compile and basic functionality works
 // This prevents benchmarks from breaking in CI
 //
 // UPDATED: Removed deprecated ToPdRequest usage, now uses direct JSON serialization
 use serde_json::{from_str, to_string, to_value};
 use sglang_router_rs::core::{BasicWorker, WorkerType};
 use sglang_router_rs::protocols::spec::{
    ChatCompletionRequest, ChatMessage, CompletionRequest, GenerateParameters, GenerateRequest,
    SamplingParams, StringOrArray, UserMessageContent,
 };
 /// Create a default GenerateRequest for benchmarks with minimal fields set
 fn default_generate_request() -> GenerateRequest {
    GenerateRequest {
        text: None,
        prompt: None,
        input_ids: None,
        stream: false,
        parameters: None,
        sampling_params: None,
        return_logprob: false,
        // SGLang Extensions
        lora_path: None,
        session_params: None,
        return_hidden_states: false,
        rid: None,
    }
 }
 /// Create a default ChatCompletionRequest for benchmarks with minimal fields set
 fn default_chat_completion_request() -> ChatCompletionRequest {
    ChatCompletionRequest {
        model: String::new(),
        messages: vec![],
        max_tokens: None,
        max_completion_tokens: None,
        temperature: None,
        top_p: None,
        n: None,
        stream: false,
        stream_options: None,
        stop: None,
        presence_penalty: None,
        frequency_penalty: None,
        logit_bias: None,
        logprobs: false,
        top_logprobs: None,
        user: None,
        response_format: None,
        seed: None,
        tools: None,
        tool_choice: None,
        parallel_tool_calls: None,
        function_call: None,
        functions: None,
        // SGLang Extensions
        top_k: None,
        min_p: None,
        min_tokens: None,
        repetition_penalty: None,
        regex: None,
        ebnf: None,
        stop_token_ids: None,
        no_stop_trim: false,
        ignore_eos: false,
        continue_final_message: false,
        skip_special_tokens: true,
        // SGLang Extensions
        lora_path: None,
        session_params: None,
        separate_reasoning: true,
        stream_reasoning: true,
        return_hidden_states: false,
    }
 }
 /// Create a default CompletionRequest for benchmarks with minimal fields set
 fn default_completion_request() -> CompletionRequest {
    CompletionRequest {
        model: String::new(),
        prompt: StringOrArray::String(String::new()),
        suffix: None,
        max_tokens: None,
        temperature: None,
        top_p: None,
        n: None,
        stream: false,
        stream_options: None,
        logprobs: None,
        echo: false,
        stop: None,
        presence_penalty: None,
        frequency_penalty: None,
        best_of: None,
        logit_bias: None,
        user: None,
        seed: None,
        // SGLang Extensions
        top_k: None,
        min_p: None,
        min_tokens: None,
        repetition_penalty: None,
        regex: None,
        ebnf: None,
        json_schema: None,
        stop_token_ids: None,
        no_stop_trim: false,
        ignore_eos: false,
        skip_special_tokens: true,
        // SGLang Extensions
        lora_path: None,
        session_params: None,
        return_hidden_states: false,
        other: serde_json::Map::new(),
    }
 }
 #[allow(dead_code)]
 fn create_test_worker() -> BasicWorker {
    BasicWorker::new(
        "http://test-server:8000".to_string(),
        WorkerType::Prefill {
            bootstrap_port: Some(5678),
        },
    )
 }
 #[test]
 fn test_benchmark_request_creation() {
    // Ensure all benchmark request types can be created without panicking
    let generate_req = GenerateRequest {
        text: Some("Test prompt".to_string()),
        parameters: Some(GenerateParameters {
            max_new_tokens: Some(100),
            temperature: Some(0.8),
            top_p: Some(0.9),
            top_k: Some(50),
            repetition_penalty: Some(1.0),
            ..Default::default()
        }),
        sampling_params: Some(SamplingParams {
            temperature: Some(0.8),
            top_p: Some(0.9),
            top_k: Some(50),
            frequency_penalty: Some(0.0),
            presence_penalty: Some(0.0),
            repetition_penalty: Some(1.0),
            ..Default::default()
        }),
        ..default_generate_request()
    };
    let chat_req = ChatCompletionRequest {
        model: "test-model".to_string(),
        messages: vec![ChatMessage::User {
            role: "user".to_string(),
            content: UserMessageContent::Text("Test message".to_string()),
            name: None,
        }],
        max_tokens: Some(150),
        max_completion_tokens: Some(150),
        temperature: Some(0.7),
        top_p: Some(1.0),
        n: Some(1),
        presence_penalty: Some(0.0),
        frequency_penalty: Some(0.0),
        parallel_tool_calls: Some(true),
        ..default_chat_completion_request()
    };
    let completion_req = CompletionRequest {
        model: "test-model".to_string(),
        prompt: StringOrArray::String("Test prompt".to_string()),
        max_tokens: Some(50),
        temperature: Some(0.8),
        top_p: Some(1.0),
        n: Some(1),
        presence_penalty: Some(0.0),
        frequency_penalty: Some(0.0),
        best_of: Some(1),
        ..default_completion_request()
    };
    // Test serialization works
    assert!(to_string(&generate_req).is_ok());
    assert!(to_string(&chat_req).is_ok());
    assert!(to_string(&completion_req).is_ok());
 }
 #[test]
 fn test_benchmark_serialization_roundtrip() {
    // Test serialization/deserialization roundtrip for benchmark types
    let generate_req = GenerateRequest {
        text: Some("Test prompt".to_string()),
        ..default_generate_request()
    };
    // Serialize and deserialize
    let json = to_string(&generate_req).expect("Serialization should work");
    let deserialized: GenerateRequest = from_str(&json).expect("Deserialization should work");
    // Verify basic field equality
    assert_eq!(generate_req.text, deserialized.text);
    assert_eq!(generate_req.stream, deserialized.stream);
    assert_eq!(generate_req.return_logprob, deserialized.return_logprob);
 }
 #[test]
 fn test_benchmark_direct_json_routing() {
    // Test direct JSON routing functionality for benchmark types (replaces regular routing)
    let generate_req = GenerateRequest {
        text: Some("Test prompt".to_string()),
        ..default_generate_request()
    };
    // Test direct JSON conversion (replaces regular routing methods)
    let json = to_value(&generate_req).unwrap();
    let json_string = to_string(&json).unwrap();
    let bytes = json_string.as_bytes();
    // Verify conversions work
    assert!(!json_string.is_empty());
    assert!(!bytes.is_empty());
 }