[ci] add router benchmark script and CI (#7498)

2025-06-25 01:28:25 -07:00
parent afeed46530
commit 3abc30364d
9 changed files with 1461 additions and 0 deletions
--- a/sgl-router/scripts/post_benchmark_comment.py
+++ b/sgl-router/scripts/post_benchmark_comment.py
@@ -0,0 +1,203 @@
+#!/usr/bin/env python3
+"""
+GitHub PR Comment Poster for Benchmark Results
+
+Posts benchmark results as comments on GitHub PRs with update capability.
+Replaces JavaScript logic in GitHub Actions for better maintainability.
+"""
+
+import argparse
+import os
+import sys
+from pathlib import Path
+from typing import Dict, Optional
+
+import requests
+
+
+class GitHubCommentPoster:
+    """Handles posting benchmark results as GitHub PR comments."""
+
+    def __init__(self, token: str, repo_owner: str, repo_name: str):
+        self.token = token
+        self.repo_owner = repo_owner
+        self.repo_name = repo_name
+        self.base_url = f"https://api.github.com/repos/{repo_owner}/{repo_name}"
+        self.headers = {
+            "Authorization": f"token {token}",
+            "Accept": "application/vnd.github.v3+json",
+        }
+
+    def read_benchmark_results(self, results_file: str) -> Dict[str, str]:
+        """Read benchmark results from file."""
+        results = {}
+        filepath = Path(results_file)
+
+        if not filepath.exists():
+            print(f"Results file not found: {filepath}")
+            return {"error": "Results file not found"}
+
+        try:
+            with open(filepath, "r") as f:
+                for line in f:
+                    line = line.strip()
+                    if "=" in line:
+                        key, value = line.split("=", 1)
+                        results[key] = value
+        except Exception as e:
+            print(f"Error reading results file: {e}")
+            return {"error": str(e)}
+
+        return results
+
+    def format_benchmark_comment(
+        self, results: Dict[str, str], pr_number: int, commit_sha: str
+    ) -> str:
+        """Format benchmark results into a GitHub comment."""
+        serialization_time = results.get("serialization_time", "N/A")
+        deserialization_time = results.get("deserialization_time", "N/A")
+        adaptation_time = results.get("adaptation_time", "N/A")
+        total_time = results.get("total_time", "N/A")
+
+        comment = f"""
+### SGLang Router Benchmark Results
+
+**Performance Summary for PR #{pr_number}**
+
+The router benchmarks have completed successfully!
+
+**Performance Thresholds:** All passed
+- Serialization: < 2μs
+- Deserialization: < 2μs
+- PD Adaptation: < 5μs
+- Total Pipeline: < 10μs
+
+**Measured Results:**
+- Serialization: `{serialization_time}`ns
+- Deserialization: `{deserialization_time}`ns
+- PD Adaptation: `{adaptation_time}`ns
+- Total Pipeline: `{total_time}`ns
+
+**Detailed Reports:**
+- Download the `benchmark-results-{commit_sha}` artifact for HTML reports
+- Run `make bench` locally for detailed analysis
+
+**Commit:** {commit_sha}
+""".strip()
+
+        return comment
+
+    def find_existing_comment(self, pr_number: int) -> Optional[int]:
+        """Find existing benchmark comment in the PR."""
+        url = f"{self.base_url}/issues/{pr_number}/comments"
+
+        try:
+            response = requests.get(url, headers=self.headers)
+            response.raise_for_status()
+            comments = response.json()
+
+            for comment in comments:
+                if comment.get("user", {}).get(
+                    "login"
+                ) == "github-actions[bot]" and "SGLang Router Benchmark Results" in comment.get(
+                    "body", ""
+                ):
+                    return comment["id"]
+
+        except requests.RequestException as e:
+            print(f"Error fetching comments: {e}")
+
+        return None
+
+    def post_comment(self, pr_number: int, comment_body: str) -> bool:
+        """Post a new comment on the PR."""
+        url = f"{self.base_url}/issues/{pr_number}/comments"
+        data = {"body": comment_body}
+
+        try:
+            response = requests.post(url, headers=self.headers, json=data)
+            response.raise_for_status()
+            print(f"Posted new benchmark comment on PR #{pr_number}")
+            return True
+        except requests.RequestException as e:
+            print(f"Error posting comment: {e}")
+            return False
+
+    def update_comment(self, comment_id: int, comment_body: str) -> bool:
+        """Update an existing comment."""
+        url = f"{self.base_url}/issues/comments/{comment_id}"
+        data = {"body": comment_body}
+
+        try:
+            response = requests.patch(url, headers=self.headers, json=data)
+            response.raise_for_status()
+            print(f"Updated existing benchmark comment (ID: {comment_id})")
+            return True
+        except requests.RequestException as e:
+            print(f"Error updating comment: {e}")
+            return False
+
+    def post_or_update_comment(
+        self, pr_number: int, results_file: str, commit_sha: str
+    ) -> bool:
+        """Post or update benchmark results comment on PR."""
+        # Read benchmark results
+        results = self.read_benchmark_results(results_file)
+        if "error" in results:
+            print(f"Failed to read benchmark results: {results['error']}")
+            return False
+
+        # Format comment
+        comment_body = self.format_benchmark_comment(results, pr_number, commit_sha)
+
+        # Check for existing comment
+        existing_comment_id = self.find_existing_comment(pr_number)
+
+        if existing_comment_id:
+            return self.update_comment(existing_comment_id, comment_body)
+        else:
+            return self.post_comment(pr_number, comment_body)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Post benchmark results to GitHub PR")
+    parser.add_argument(
+        "--pr-number", type=int, required=True, help="Pull request number"
+    )
+    parser.add_argument("--commit-sha", type=str, required=True, help="Commit SHA")
+    parser.add_argument(
+        "--results-file",
+        type=str,
+        default="benchmark_results.env",
+        help="Path to benchmark results file",
+    )
+    parser.add_argument(
+        "--repo-owner", type=str, default="sgl-project", help="GitHub repository owner"
+    )
+    parser.add_argument(
+        "--repo-name", type=str, default="sglang", help="GitHub repository name"
+    )
+
+    args = parser.parse_args()
+
+    # Get GitHub token from environment
+    github_token = os.environ.get("GITHUB_TOKEN")
+    if not github_token:
+        print("Error: GITHUB_TOKEN environment variable is required")
+        sys.exit(1)
+
+    # Create poster and post comment
+    poster = GitHubCommentPoster(github_token, args.repo_owner, args.repo_name)
+    success = poster.post_or_update_comment(
+        args.pr_number, args.results_file, args.commit_sha
+    )
+
+    if not success:
+        print("Failed to post benchmark comment")
+        sys.exit(1)
+
+    print("Benchmark comment posted successfully!")
+
+
+if __name__ == "__main__":
+    main()
--- a/sgl-router/scripts/run_benchmarks.py
+++ b/sgl-router/scripts/run_benchmarks.py
@@ -0,0 +1,250 @@
+#!/usr/bin/env python3
+"""
+SGLang Router Benchmark Runner
+
+A Python script to run Rust benchmarks with various options and modes.
+Replaces the shell script for better maintainability and integration.
+"""
+
+import argparse
+import os
+import subprocess
+import sys
+import time
+from pathlib import Path
+from typing import Dict, List, Optional
+
+
+class BenchmarkRunner:
+    """Handles running Rust benchmarks for the SGLang router."""
+
+    def __init__(self, project_root: str):
+        self.project_root = Path(project_root)
+        self.timestamp = time.strftime("%a %b %d %H:%M:%S UTC %Y", time.gmtime())
+
+    def run_command(
+        self, cmd: List[str], capture_output: bool = False
+    ) -> subprocess.CompletedProcess:
+        """Run a command and handle errors."""
+        try:
+            if capture_output:
+                result = subprocess.run(
+                    cmd, capture_output=True, text=True, cwd=self.project_root
+                )
+            else:
+                result = subprocess.run(cmd, cwd=self.project_root)
+            return result
+        except subprocess.CalledProcessError as e:
+            print(f"Error running command: {' '.join(cmd)}")
+            print(f"Exit code: {e.returncode}")
+            sys.exit(1)
+
+    def print_header(self):
+        """Print the benchmark runner header."""
+        print("SGLang Router Benchmark Runner")
+        print("=" * 30)
+        print(f"Project: {self.project_root.absolute()}")
+        print(f"Timestamp: {self.timestamp}")
+        print()
+
+    def build_release(self):
+        """Build the project in release mode."""
+        print("Building in release mode...")
+        result = self.run_command(["cargo", "build", "--release", "--quiet"])
+        if result.returncode != 0:
+            print("Failed to build in release mode")
+            sys.exit(1)
+
+    def run_benchmarks(
+        self,
+        quick_mode: bool = False,
+        save_baseline: Optional[str] = None,
+        compare_baseline: Optional[str] = None,
+    ) -> str:
+        """Run benchmarks with specified options."""
+        bench_args = ["cargo", "bench", "--bench", "request_processing"]
+
+        if quick_mode:
+            bench_args.append("benchmark_summary")
+            print("Running quick benchmarks...")
+        else:
+            print("Running full benchmark suite...")
+
+        # Note: Criterion baselines are handled via target directory structure
+        # For now, we'll implement baseline functionality via file copying
+        if save_baseline:
+            print(f"Will save results as baseline: {save_baseline}")
+
+        if compare_baseline:
+            print(f"Will compare with baseline: {compare_baseline}")
+
+        print(f"Executing: {' '.join(bench_args)}")
+        result = self.run_command(bench_args, capture_output=True)
+
+        if result.returncode != 0:
+            print("Benchmark execution failed!")
+            print("STDOUT:", result.stdout)
+            print("STDERR:", result.stderr)
+            sys.exit(1)
+
+        # Handle baseline saving after successful run
+        if save_baseline:
+            self._save_baseline(save_baseline, result.stdout)
+
+        return result.stdout
+
+    def _save_baseline(self, filename: str, output: str):
+        """Save benchmark results to a file as baseline."""
+        filepath = self.project_root / filename
+        with open(filepath, "w") as f:
+            f.write(output)
+        print(f"Baseline saved to: {filepath}")
+
+    def parse_benchmark_results(self, output: str) -> Dict[str, str]:
+        """Parse benchmark output to extract performance metrics."""
+        results = {}
+
+        # Look for performance overview section
+        lines = output.split("\n")
+        parsing_overview = False
+
+        for line in lines:
+            line = line.strip()
+
+            if "Quick Performance Overview:" in line:
+                parsing_overview = True
+                continue
+
+            if parsing_overview and line.startswith("* "):
+                # Parse lines like "* Serialization (avg):          481 ns/req"
+                if "Serialization (avg):" in line:
+                    results["serialization_time"] = self._extract_time(line)
+                elif "Deserialization (avg):" in line:
+                    results["deserialization_time"] = self._extract_time(line)
+                elif "PD Adaptation (avg):" in line:
+                    results["adaptation_time"] = self._extract_time(line)
+                elif "Total Pipeline (avg):" in line:
+                    results["total_time"] = self._extract_time(line)
+
+            # Stop parsing after the overview section
+            if parsing_overview and line.startswith("Performance Insights:"):
+                break
+
+        return results
+
+    def _extract_time(self, line: str) -> str:
+        """Extract time value from a benchmark line."""
+        # Extract number followed by ns/req
+        import re
+
+        match = re.search(r"(\d+)\s*ns/req", line)
+        return match.group(1) if match else "N/A"
+
+    def validate_thresholds(self, results: Dict[str, str]) -> bool:
+        """Validate benchmark results against performance thresholds."""
+        thresholds = {
+            "serialization_time": 2000,  # 2μs max
+            "deserialization_time": 2000,  # 2μs max
+            "adaptation_time": 5000,  # 5μs max
+            "total_time": 10000,  # 10μs max
+        }
+
+        all_passed = True
+        print("\nPerformance Threshold Validation:")
+        print("=" * 35)
+
+        for metric, threshold in thresholds.items():
+            if metric in results and results[metric] != "N/A":
+                try:
+                    value = int(results[metric])
+                    passed = value <= threshold
+                    status = "✓ PASS" if passed else "✗ FAIL"
+                    print(f"{metric:20}: {value:>6}ns <= {threshold:>6}ns {status}")
+                    if not passed:
+                        all_passed = False
+                except ValueError:
+                    print(f"{metric:20}: Invalid value: {results[metric]}")
+                    all_passed = False
+            else:
+                print(f"{metric:20}: No data available")
+                all_passed = False
+
+        print()
+        if all_passed:
+            print("All performance thresholds passed!")
+        else:
+            print("Some performance thresholds failed!")
+
+        return all_passed
+
+    def save_results_to_file(
+        self, results: Dict[str, str], filename: str = "benchmark_results.env"
+    ):
+        """Save benchmark results to a file for CI consumption."""
+        filepath = self.project_root / filename
+        with open(filepath, "w") as f:
+            for key, value in results.items():
+                f.write(f"{key}={value}\n")
+        print(f"Results saved to: {filepath}")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Run SGLang router benchmarks")
+    parser.add_argument(
+        "--quick", action="store_true", help="Run quick benchmarks (summary only)"
+    )
+    parser.add_argument(
+        "--save-baseline", type=str, help="Save benchmark results as baseline"
+    )
+    parser.add_argument(
+        "--compare-baseline", type=str, help="Compare with saved baseline"
+    )
+    parser.add_argument(
+        "--validate-thresholds",
+        action="store_true",
+        help="Validate results against performance thresholds",
+    )
+    parser.add_argument(
+        "--save-results", action="store_true", help="Save results to file for CI"
+    )
+
+    args = parser.parse_args()
+
+    # Determine project root (script is in scripts/ subdirectory)
+    script_dir = Path(__file__).parent
+    project_root = script_dir.parent
+
+    runner = BenchmarkRunner(str(project_root))
+    runner.print_header()
+
+    # Build in release mode
+    runner.build_release()
+
+    # Run benchmarks
+    output = runner.run_benchmarks(
+        quick_mode=args.quick,
+        save_baseline=args.save_baseline,
+        compare_baseline=args.compare_baseline,
+    )
+
+    # Print the raw output
+    print(output)
+
+    # Parse and validate results if requested
+    if args.validate_thresholds or args.save_results:
+        results = runner.parse_benchmark_results(output)
+
+        if args.save_results:
+            runner.save_results_to_file(results)
+
+        if args.validate_thresholds:
+            passed = runner.validate_thresholds(results)
+            if not passed:
+                print("Validation failed - performance regression detected!")
+                sys.exit(1)
+
+    print("\nBenchmark run completed successfully!")
+
+
+if __name__ == "__main__":
+    main()