[router] include rust benchamrks (#9932)

2025-09-02 09:32:09 -07:00
parent f64b8e3e4e
commit 9491d6e554
3 changed files with 235 additions and 453 deletions
--- a/.github/workflows/pr-benchmark-rust.yml
+++ b/.github/workflows/pr-benchmark-rust.yml
@@ -9,6 +9,7 @@ on:
    branches: [ main ]
    paths:
      - "sgl-router/**"
+    types: [opened, synchronize, reopened, labeled]
  workflow_dispatch:

 concurrency:
@@ -19,9 +20,67 @@ permissions:
  pull-requests: write
  issues: write
 jobs:
-  benchmark-router:
+  # Quick check job that always runs on PRs
+  benchmark-compile-check:
+    name: Benchmark Compilation Check
    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Install dependencies
+        run: |
+          bash scripts/ci/ci_install_rust.sh
+
+      - name: Setup sccache
+        uses: mozilla-actions/sccache-action@v0.0.3
+        continue-on-error: true
+
+      - name: Cache Rust dependencies
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cargo/bin/
+            ~/.cargo/registry/index/
+            ~/.cargo/registry/cache/
+            ~/.cargo/git/db/
+            sgl-router/target/
+          key: ${{ runner.os }}-cargo-${{ hashFiles('sgl-router/Cargo.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-cargo-
+
+      - name: Check benchmarks compile
+        run: |
+          source "$HOME/.cargo/env"
+          cd sgl-router/
+          # Try to use sccache, but disable if it fails
+          if command -v sccache &> /dev/null; then
+            echo "Testing sccache availability..."
+            # Try to start sccache and check if it works
+            export RUSTC_WRAPPER=sccache
+            export SCCACHE_GHA_ENABLED="true"
+            if sccache --start-server 2>/dev/null && sccache --show-stats 2>/dev/null; then
+              echo "sccache is working, using it for compilation"
+            else
+              echo "sccache failed to start, falling back to regular cargo"
+              unset RUSTC_WRAPPER
+              unset SCCACHE_GHA_ENABLED
+            fi
+          else
+            echo "sccache not available, using regular cargo"
+          fi
+          cargo check --benches
+
+  # Full benchmark jobs that only run with label or on main branch
+  benchmark-request-processing:
+    name: Request Processing Benchmark
+    if: |
+      github.repository == 'sgl-project/sglang' &&
+      (github.event_name == 'push' ||
+       github.event_name == 'workflow_dispatch' ||
+       contains(github.event.pull_request.labels.*.name, 'benchmark'))
+    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
@@ -33,6 +92,10 @@ jobs:
        run: |
          bash scripts/ci/ci_install_rust.sh

+      - name: Setup sccache
+        uses: mozilla-actions/sccache-action@v0.0.3
+        continue-on-error: true
+
      - name: Cache Rust dependencies
        uses: actions/cache@v4
        with:
@@ -46,40 +109,61 @@ jobs:
          restore-keys: |
            ${{ runner.os }}-cargo-

-      - name: Build router in release mode
+      - name: Run request processing benchmark
+        timeout-minutes: 30
        run: |
          source "$HOME/.cargo/env"
          cd sgl-router/
-          cargo build --release
-
-      - name: Run quick benchmarks
-        timeout-minutes: 15
-        run: |
-          source "$HOME/.cargo/env"
-          cd sgl-router/
-          # Run quick benchmarks for PR validation using Python script
-          python3 scripts/run_benchmarks.py --quick --validate-thresholds --save-results
+          # Try to use sccache, but disable if it fails
+          if command -v sccache &> /dev/null; then
+            echo "Testing sccache availability..."
+            # Try to start sccache and check if it works
+            export RUSTC_WRAPPER=sccache
+            export SCCACHE_GHA_ENABLED="true"
+            if sccache --start-server 2>/dev/null && sccache --show-stats 2>/dev/null; then
+              echo "sccache is working, using it for compilation"
+            else
+              echo "sccache failed to start, falling back to regular cargo"
+              unset RUSTC_WRAPPER
+              unset SCCACHE_GHA_ENABLED
+            fi
+          else
+            echo "sccache not available, using regular cargo"
+          fi
+          # Run only the summary benchmark for quick validation in PRs
+          cargo bench --bench request_processing -- benchmark_summary --exact

      - name: Upload benchmark results
        if: always()
        uses: actions/upload-artifact@v4
        with:
-          name: benchmark-results-${{ github.sha }}
+          name: request-processing-results-${{ github.sha }}
          path: |
-            sgl-router/target/criterion/
+            sgl-router/target/criterion/benchmark_summary/
          retention-days: 30

-  benchmark-integration-test:
-    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
+  benchmark-tokenizer:
+    name: Tokenizer Benchmark
+    if: |
+      github.repository == 'sgl-project/sglang' &&
+      (github.event_name == 'push' ||
+       github.event_name == 'workflow_dispatch' ||
+       contains(github.event.pull_request.labels.*.name, 'benchmark'))
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
+        with:
+          fetch-depth: 100

      - name: Install dependencies
        run: |
          bash scripts/ci/ci_install_rust.sh

+      - name: Setup sccache
+        uses: mozilla-actions/sccache-action@v0.0.3
+        continue-on-error: true
+
      - name: Cache Rust dependencies
        uses: actions/cache@v4
        with:
@@ -93,17 +177,146 @@ jobs:
          restore-keys: |
            ${{ runner.os }}-cargo-

-      - name: Run benchmark integration tests
-        timeout-minutes: 10
+      - name: Run tokenizer benchmark
+        timeout-minutes: 30
        run: |
          source "$HOME/.cargo/env"
          cd sgl-router/
-          # Run integration tests to ensure benchmark code compiles and works
-          cargo test --test benchmark_integration
+          # Try to use sccache, but disable if it fails
+          if command -v sccache &> /dev/null; then
+            echo "Testing sccache availability..."
+            # Try to start sccache and check if it works
+            export RUSTC_WRAPPER=sccache
+            export SCCACHE_GHA_ENABLED="true"
+            if sccache --start-server 2>/dev/null && sccache --show-stats 2>/dev/null; then
+              echo "sccache is working, using it for compilation"
+            else
+              echo "sccache failed to start, falling back to regular cargo"
+              unset RUSTC_WRAPPER
+              unset SCCACHE_GHA_ENABLED
+            fi
+          else
+            echo "sccache not available, using regular cargo"
+          fi
+          cargo bench --bench tokenizer_benchmark

-      - name: Verify benchmark compilation
+      - name: Upload benchmark results
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: tokenizer-results-${{ github.sha }}
+          path: |
+            sgl-router/target/criterion/tokenizer*/
+          retention-days: 30
+
+  benchmark-tool-parser:
+    name: Tool Parser Benchmark
+    if: |
+      github.repository == 'sgl-project/sglang' &&
+      (github.event_name == 'push' ||
+       github.event_name == 'workflow_dispatch' ||
+       contains(github.event.pull_request.labels.*.name, 'benchmark'))
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 100
+
+      - name: Install dependencies
+        run: |
+          bash scripts/ci/ci_install_rust.sh
+
+      - name: Setup sccache
+        uses: mozilla-actions/sccache-action@v0.0.3
+        continue-on-error: true
+
+      - name: Cache Rust dependencies
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cargo/bin/
+            ~/.cargo/registry/index/
+            ~/.cargo/registry/cache/
+            ~/.cargo/git/db/
+            sgl-router/target/
+          key: ${{ runner.os }}-cargo-${{ hashFiles('sgl-router/Cargo.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-cargo-
+
+      - name: Run tool parser benchmark
+        timeout-minutes: 30
        run: |
          source "$HOME/.cargo/env"
          cd sgl-router/
-          # Ensure all benchmarks compile without running them
-          cargo check --benches
+          # Try to use sccache, but disable if it fails
+          if command -v sccache &> /dev/null; then
+            echo "Testing sccache availability..."
+            # Try to start sccache and check if it works
+            export RUSTC_WRAPPER=sccache
+            export SCCACHE_GHA_ENABLED="true"
+            if sccache --start-server 2>/dev/null && sccache --show-stats 2>/dev/null; then
+              echo "sccache is working, using it for compilation"
+            else
+              echo "sccache failed to start, falling back to regular cargo"
+              unset RUSTC_WRAPPER
+              unset SCCACHE_GHA_ENABLED
+            fi
+          else
+            echo "sccache not available, using regular cargo"
+          fi
+          cargo bench --bench tool_parser_benchmark
+
+      - name: Upload benchmark results
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: tool-parser-results-${{ github.sha }}
+          path: |
+            sgl-router/target/criterion/tool_parser*/
+          retention-days: 30
+
+  benchmark-summary:
+    name: Benchmark Summary
+    needs: [benchmark-request-processing, benchmark-tokenizer, benchmark-tool-parser]
+    if: always() && (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request')
+    runs-on: ubuntu-latest
+    steps:
+      - name: Download all benchmark results
+        uses: actions/download-artifact@v4
+        with:
+          pattern: '*-results-${{ github.sha }}'
+          path: benchmark-results
+
+      - name: Generate summary
+        run: |
+          echo "## Benchmark Results Summary" > summary.md
+          echo "" >> summary.md
+          echo "### Request Processing" >> summary.md
+          if [ -d "benchmark-results/request-processing-results-${{ github.sha }}" ]; then
+            echo "✅ Completed" >> summary.md
+          else
+            echo "❌ Failed or skipped" >> summary.md
+          fi
+          echo "" >> summary.md
+          echo "### Tokenizer" >> summary.md
+          if [ -d "benchmark-results/tokenizer-results-${{ github.sha }}" ]; then
+            echo "✅ Completed" >> summary.md
+          else
+            echo "❌ Failed or skipped" >> summary.md
+          fi
+          echo "" >> summary.md
+          echo "### Tool Parser" >> summary.md
+          if [ -d "benchmark-results/tool-parser-results-${{ github.sha }}" ]; then
+            echo "✅ Completed" >> summary.md
+          else
+            echo "❌ Failed or skipped" >> summary.md
+          fi
+          cat summary.md
+
+      - name: Upload summary
+        uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-summary-${{ github.sha }}
+          path: summary.md
+          retention-days: 30
--- a/sgl-router/scripts/post_benchmark_comment.py
+++ b/sgl-router/scripts/post_benchmark_comment.py
@@ -1,203 +0,0 @@
-#!/usr/bin/env python3
-"""
-GitHub PR Comment Poster for Benchmark Results
-
-Posts benchmark results as comments on GitHub PRs with update capability.
-Replaces JavaScript logic in GitHub Actions for better maintainability.
-"""
-
-import argparse
-import os
-import sys
-from pathlib import Path
-from typing import Dict, Optional
-
-import requests
-
-
-class GitHubCommentPoster:
-    """Handles posting benchmark results as GitHub PR comments."""
-
-    def __init__(self, token: str, repo_owner: str, repo_name: str):
-        self.token = token
-        self.repo_owner = repo_owner
-        self.repo_name = repo_name
-        self.base_url = f"https://api.github.com/repos/{repo_owner}/{repo_name}"
-        self.headers = {
-            "Authorization": f"token {token}",
-            "Accept": "application/vnd.github.v3+json",
-        }
-
-    def read_benchmark_results(self, results_file: str) -> Dict[str, str]:
-        """Read benchmark results from file."""
-        results = {}
-        filepath = Path(results_file)
-
-        if not filepath.exists():
-            print(f"Results file not found: {filepath}")
-            return {"error": "Results file not found"}
-
-        try:
-            with open(filepath, "r") as f:
-                for line in f:
-                    line = line.strip()
-                    if "=" in line:
-                        key, value = line.split("=", 1)
-                        results[key] = value
-        except Exception as e:
-            print(f"Error reading results file: {e}")
-            return {"error": str(e)}
-
-        return results
-
-    def format_benchmark_comment(
-        self, results: Dict[str, str], pr_number: int, commit_sha: str
-    ) -> str:
-        """Format benchmark results into a GitHub comment."""
-        serialization_time = results.get("serialization_time", "N/A")
-        deserialization_time = results.get("deserialization_time", "N/A")
-        adaptation_time = results.get("adaptation_time", "N/A")
-        total_time = results.get("total_time", "N/A")
-
-        comment = f"""
-### SGLang Router Benchmark Results
-
-**Performance Summary for PR #{pr_number}**
-
-The router benchmarks have completed successfully!
-
-**Performance Thresholds:** All passed
- Serialization: < 2μs
- Deserialization: < 2μs
- PD Adaptation: < 5μs
- Total Pipeline: < 10μs
-
-**Measured Results:**
- Serialization: `{serialization_time}`ns
- Deserialization: `{deserialization_time}`ns
- PD Adaptation: `{adaptation_time}`ns
- Total Pipeline: `{total_time}`ns
-
-**Detailed Reports:**
- Download the `benchmark-results-{commit_sha}` artifact for HTML reports
- Run `make bench` locally for detailed analysis
-
-**Commit:** {commit_sha}
-""".strip()
-
-        return comment
-
-    def find_existing_comment(self, pr_number: int) -> Optional[int]:
-        """Find existing benchmark comment in the PR."""
-        url = f"{self.base_url}/issues/{pr_number}/comments"
-
-        try:
-            response = requests.get(url, headers=self.headers)
-            response.raise_for_status()
-            comments = response.json()
-
-            for comment in comments:
-                if comment.get("user", {}).get(
-                    "login"
-                ) == "github-actions[bot]" and "SGLang Router Benchmark Results" in comment.get(
-                    "body", ""
-                ):
-                    return comment["id"]
-
-        except requests.RequestException as e:
-            print(f"Error fetching comments: {e}")
-
-        return None
-
-    def post_comment(self, pr_number: int, comment_body: str) -> bool:
-        """Post a new comment on the PR."""
-        url = f"{self.base_url}/issues/{pr_number}/comments"
-        data = {"body": comment_body}
-
-        try:
-            response = requests.post(url, headers=self.headers, json=data)
-            response.raise_for_status()
-            print(f"Posted new benchmark comment on PR #{pr_number}")
-            return True
-        except requests.RequestException as e:
-            print(f"Error posting comment: {e}")
-            return False
-
-    def update_comment(self, comment_id: int, comment_body: str) -> bool:
-        """Update an existing comment."""
-        url = f"{self.base_url}/issues/comments/{comment_id}"
-        data = {"body": comment_body}
-
-        try:
-            response = requests.patch(url, headers=self.headers, json=data)
-            response.raise_for_status()
-            print(f"Updated existing benchmark comment (ID: {comment_id})")
-            return True
-        except requests.RequestException as e:
-            print(f"Error updating comment: {e}")
-            return False
-
-    def post_or_update_comment(
-        self, pr_number: int, results_file: str, commit_sha: str
-    ) -> bool:
-        """Post or update benchmark results comment on PR."""
-        # Read benchmark results
-        results = self.read_benchmark_results(results_file)
-        if "error" in results:
-            print(f"Failed to read benchmark results: {results['error']}")
-            return False
-
-        # Format comment
-        comment_body = self.format_benchmark_comment(results, pr_number, commit_sha)
-
-        # Check for existing comment
-        existing_comment_id = self.find_existing_comment(pr_number)
-
-        if existing_comment_id:
-            return self.update_comment(existing_comment_id, comment_body)
-        else:
-            return self.post_comment(pr_number, comment_body)
-
-
-def main():
-    parser = argparse.ArgumentParser(description="Post benchmark results to GitHub PR")
-    parser.add_argument(
-        "--pr-number", type=int, required=True, help="Pull request number"
-    )
-    parser.add_argument("--commit-sha", type=str, required=True, help="Commit SHA")
-    parser.add_argument(
-        "--results-file",
-        type=str,
-        default="benchmark_results.env",
-        help="Path to benchmark results file",
-    )
-    parser.add_argument(
-        "--repo-owner", type=str, default="sgl-project", help="GitHub repository owner"
-    )
-    parser.add_argument(
-        "--repo-name", type=str, default="sglang", help="GitHub repository name"
-    )
-
-    args = parser.parse_args()
-
-    # Get GitHub token from environment
-    github_token = os.environ.get("GITHUB_TOKEN")
-    if not github_token:
-        print("Error: GITHUB_TOKEN environment variable is required")
-        sys.exit(1)
-
-    # Create poster and post comment
-    poster = GitHubCommentPoster(github_token, args.repo_owner, args.repo_name)
-    success = poster.post_or_update_comment(
-        args.pr_number, args.results_file, args.commit_sha
-    )
-
-    if not success:
-        print("Failed to post benchmark comment")
-        sys.exit(1)
-
-    print("Benchmark comment posted successfully!")
-
-
-if __name__ == "__main__":
-    main()
--- a/sgl-router/tests/benchmark_integration.rs
+++ b/sgl-router/tests/benchmark_integration.rs
@@ -1,228 +0,0 @@
-// Integration test to ensure benchmarks compile and basic functionality works
-// This prevents benchmarks from breaking in CI
-//
-// UPDATED: Removed deprecated ToPdRequest usage, now uses direct JSON serialization
-
-use serde_json::{from_str, to_string, to_value};
-use sglang_router_rs::core::{BasicWorker, WorkerType};
-use sglang_router_rs::protocols::spec::{
-    ChatCompletionRequest, ChatMessage, CompletionRequest, GenerateParameters, GenerateRequest,
-    SamplingParams, StringOrArray, UserMessageContent,
-};
-
-/// Create a default GenerateRequest for benchmarks with minimal fields set
-fn default_generate_request() -> GenerateRequest {
-    GenerateRequest {
-        text: None,
-        prompt: None,
-        input_ids: None,
-        stream: false,
-        parameters: None,
-        sampling_params: None,
-        return_logprob: false,
-        // SGLang Extensions
-        lora_path: None,
-        session_params: None,
-        return_hidden_states: false,
-        rid: None,
-    }
-}
-
-/// Create a default ChatCompletionRequest for benchmarks with minimal fields set
-fn default_chat_completion_request() -> ChatCompletionRequest {
-    ChatCompletionRequest {
-        model: String::new(),
-        messages: vec![],
-        max_tokens: None,
-        max_completion_tokens: None,
-        temperature: None,
-        top_p: None,
-        n: None,
-        stream: false,
-        stream_options: None,
-        stop: None,
-        presence_penalty: None,
-        frequency_penalty: None,
-        logit_bias: None,
-        logprobs: false,
-        top_logprobs: None,
-        user: None,
-        response_format: None,
-        seed: None,
-        tools: None,
-        tool_choice: None,
-        parallel_tool_calls: None,
-        function_call: None,
-        functions: None,
-        // SGLang Extensions
-        top_k: None,
-        min_p: None,
-        min_tokens: None,
-        repetition_penalty: None,
-        regex: None,
-        ebnf: None,
-        stop_token_ids: None,
-        no_stop_trim: false,
-        ignore_eos: false,
-        continue_final_message: false,
-        skip_special_tokens: true,
-        // SGLang Extensions
-        lora_path: None,
-        session_params: None,
-        separate_reasoning: true,
-        stream_reasoning: true,
-        return_hidden_states: false,
-    }
-}
-
-/// Create a default CompletionRequest for benchmarks with minimal fields set
-fn default_completion_request() -> CompletionRequest {
-    CompletionRequest {
-        model: String::new(),
-        prompt: StringOrArray::String(String::new()),
-        suffix: None,
-        max_tokens: None,
-        temperature: None,
-        top_p: None,
-        n: None,
-        stream: false,
-        stream_options: None,
-        logprobs: None,
-        echo: false,
-        stop: None,
-        presence_penalty: None,
-        frequency_penalty: None,
-        best_of: None,
-        logit_bias: None,
-        user: None,
-        seed: None,
-        // SGLang Extensions
-        top_k: None,
-        min_p: None,
-        min_tokens: None,
-        repetition_penalty: None,
-        regex: None,
-        ebnf: None,
-        json_schema: None,
-        stop_token_ids: None,
-        no_stop_trim: false,
-        ignore_eos: false,
-        skip_special_tokens: true,
-        // SGLang Extensions
-        lora_path: None,
-        session_params: None,
-        return_hidden_states: false,
-        other: serde_json::Map::new(),
-    }
-}
-
-#[allow(dead_code)]
-fn create_test_worker() -> BasicWorker {
-    BasicWorker::new(
-        "http://test-server:8000".to_string(),
-        WorkerType::Prefill {
-            bootstrap_port: Some(5678),
-        },
-    )
-}
-
-#[test]
-fn test_benchmark_request_creation() {
-    // Ensure all benchmark request types can be created without panicking
-
-    let generate_req = GenerateRequest {
-        text: Some("Test prompt".to_string()),
-        parameters: Some(GenerateParameters {
-            max_new_tokens: Some(100),
-            temperature: Some(0.8),
-            top_p: Some(0.9),
-            top_k: Some(50),
-            repetition_penalty: Some(1.0),
-            ..Default::default()
-        }),
-        sampling_params: Some(SamplingParams {
-            temperature: Some(0.8),
-            top_p: Some(0.9),
-            top_k: Some(50),
-            frequency_penalty: Some(0.0),
-            presence_penalty: Some(0.0),
-            repetition_penalty: Some(1.0),
-            ..Default::default()
-        }),
-        ..default_generate_request()
-    };
-
-    let chat_req = ChatCompletionRequest {
-        model: "test-model".to_string(),
-        messages: vec![ChatMessage::User {
-            role: "user".to_string(),
-            content: UserMessageContent::Text("Test message".to_string()),
-            name: None,
-        }],
-        max_tokens: Some(150),
-        max_completion_tokens: Some(150),
-        temperature: Some(0.7),
-        top_p: Some(1.0),
-        n: Some(1),
-        presence_penalty: Some(0.0),
-        frequency_penalty: Some(0.0),
-        parallel_tool_calls: Some(true),
-        ..default_chat_completion_request()
-    };
-
-    let completion_req = CompletionRequest {
-        model: "test-model".to_string(),
-        prompt: StringOrArray::String("Test prompt".to_string()),
-        max_tokens: Some(50),
-        temperature: Some(0.8),
-        top_p: Some(1.0),
-        n: Some(1),
-        presence_penalty: Some(0.0),
-        frequency_penalty: Some(0.0),
-        best_of: Some(1),
-        ..default_completion_request()
-    };
-
-    // Test serialization works
-    assert!(to_string(&generate_req).is_ok());
-    assert!(to_string(&chat_req).is_ok());
-    assert!(to_string(&completion_req).is_ok());
-}
-
-#[test]
-fn test_benchmark_serialization_roundtrip() {
-    // Test serialization/deserialization roundtrip for benchmark types
-
-    let generate_req = GenerateRequest {
-        text: Some("Test prompt".to_string()),
-        ..default_generate_request()
-    };
-
-    // Serialize and deserialize
-    let json = to_string(&generate_req).expect("Serialization should work");
-    let deserialized: GenerateRequest = from_str(&json).expect("Deserialization should work");
-
-    // Verify basic field equality
-    assert_eq!(generate_req.text, deserialized.text);
-    assert_eq!(generate_req.stream, deserialized.stream);
-    assert_eq!(generate_req.return_logprob, deserialized.return_logprob);
-}
-
-#[test]
-fn test_benchmark_direct_json_routing() {
-    // Test direct JSON routing functionality for benchmark types (replaces regular routing)
-
-    let generate_req = GenerateRequest {
-        text: Some("Test prompt".to_string()),
-        ..default_generate_request()
-    };
-
-    // Test direct JSON conversion (replaces regular routing methods)
-    let json = to_value(&generate_req).unwrap();
-    let json_string = to_string(&json).unwrap();
-    let bytes = json_string.as_bytes();
-
-    // Verify conversions work
-    assert!(!json_string.is_empty());
-    assert!(!bytes.is_empty());
-}