[router] include rust benchamrks (#9932)

This commit is contained in:
Simo Lin
2025-09-02 09:32:09 -07:00
committed by GitHub
parent f64b8e3e4e
commit 9491d6e554
3 changed files with 235 additions and 453 deletions

View File

@@ -1,203 +0,0 @@
#!/usr/bin/env python3
"""
GitHub PR Comment Poster for Benchmark Results
Posts benchmark results as comments on GitHub PRs with update capability.
Replaces JavaScript logic in GitHub Actions for better maintainability.
"""
import argparse
import os
import sys
from pathlib import Path
from typing import Dict, Optional
import requests
class GitHubCommentPoster:
"""Handles posting benchmark results as GitHub PR comments."""
def __init__(self, token: str, repo_owner: str, repo_name: str):
self.token = token
self.repo_owner = repo_owner
self.repo_name = repo_name
self.base_url = f"https://api.github.com/repos/{repo_owner}/{repo_name}"
self.headers = {
"Authorization": f"token {token}",
"Accept": "application/vnd.github.v3+json",
}
def read_benchmark_results(self, results_file: str) -> Dict[str, str]:
"""Read benchmark results from file."""
results = {}
filepath = Path(results_file)
if not filepath.exists():
print(f"Results file not found: {filepath}")
return {"error": "Results file not found"}
try:
with open(filepath, "r") as f:
for line in f:
line = line.strip()
if "=" in line:
key, value = line.split("=", 1)
results[key] = value
except Exception as e:
print(f"Error reading results file: {e}")
return {"error": str(e)}
return results
def format_benchmark_comment(
self, results: Dict[str, str], pr_number: int, commit_sha: str
) -> str:
"""Format benchmark results into a GitHub comment."""
serialization_time = results.get("serialization_time", "N/A")
deserialization_time = results.get("deserialization_time", "N/A")
adaptation_time = results.get("adaptation_time", "N/A")
total_time = results.get("total_time", "N/A")
comment = f"""
### SGLang Router Benchmark Results
**Performance Summary for PR #{pr_number}**
The router benchmarks have completed successfully!
**Performance Thresholds:** All passed
- Serialization: < 2μs
- Deserialization: < 2μs
- PD Adaptation: < 5μs
- Total Pipeline: < 10μs
**Measured Results:**
- Serialization: `{serialization_time}`ns
- Deserialization: `{deserialization_time}`ns
- PD Adaptation: `{adaptation_time}`ns
- Total Pipeline: `{total_time}`ns
**Detailed Reports:**
- Download the `benchmark-results-{commit_sha}` artifact for HTML reports
- Run `make bench` locally for detailed analysis
**Commit:** {commit_sha}
""".strip()
return comment
def find_existing_comment(self, pr_number: int) -> Optional[int]:
"""Find existing benchmark comment in the PR."""
url = f"{self.base_url}/issues/{pr_number}/comments"
try:
response = requests.get(url, headers=self.headers)
response.raise_for_status()
comments = response.json()
for comment in comments:
if comment.get("user", {}).get(
"login"
) == "github-actions[bot]" and "SGLang Router Benchmark Results" in comment.get(
"body", ""
):
return comment["id"]
except requests.RequestException as e:
print(f"Error fetching comments: {e}")
return None
def post_comment(self, pr_number: int, comment_body: str) -> bool:
"""Post a new comment on the PR."""
url = f"{self.base_url}/issues/{pr_number}/comments"
data = {"body": comment_body}
try:
response = requests.post(url, headers=self.headers, json=data)
response.raise_for_status()
print(f"Posted new benchmark comment on PR #{pr_number}")
return True
except requests.RequestException as e:
print(f"Error posting comment: {e}")
return False
def update_comment(self, comment_id: int, comment_body: str) -> bool:
"""Update an existing comment."""
url = f"{self.base_url}/issues/comments/{comment_id}"
data = {"body": comment_body}
try:
response = requests.patch(url, headers=self.headers, json=data)
response.raise_for_status()
print(f"Updated existing benchmark comment (ID: {comment_id})")
return True
except requests.RequestException as e:
print(f"Error updating comment: {e}")
return False
def post_or_update_comment(
self, pr_number: int, results_file: str, commit_sha: str
) -> bool:
"""Post or update benchmark results comment on PR."""
# Read benchmark results
results = self.read_benchmark_results(results_file)
if "error" in results:
print(f"Failed to read benchmark results: {results['error']}")
return False
# Format comment
comment_body = self.format_benchmark_comment(results, pr_number, commit_sha)
# Check for existing comment
existing_comment_id = self.find_existing_comment(pr_number)
if existing_comment_id:
return self.update_comment(existing_comment_id, comment_body)
else:
return self.post_comment(pr_number, comment_body)
def main():
parser = argparse.ArgumentParser(description="Post benchmark results to GitHub PR")
parser.add_argument(
"--pr-number", type=int, required=True, help="Pull request number"
)
parser.add_argument("--commit-sha", type=str, required=True, help="Commit SHA")
parser.add_argument(
"--results-file",
type=str,
default="benchmark_results.env",
help="Path to benchmark results file",
)
parser.add_argument(
"--repo-owner", type=str, default="sgl-project", help="GitHub repository owner"
)
parser.add_argument(
"--repo-name", type=str, default="sglang", help="GitHub repository name"
)
args = parser.parse_args()
# Get GitHub token from environment
github_token = os.environ.get("GITHUB_TOKEN")
if not github_token:
print("Error: GITHUB_TOKEN environment variable is required")
sys.exit(1)
# Create poster and post comment
poster = GitHubCommentPoster(github_token, args.repo_owner, args.repo_name)
success = poster.post_or_update_comment(
args.pr_number, args.results_file, args.commit_sha
)
if not success:
print("Failed to post benchmark comment")
sys.exit(1)
print("Benchmark comment posted successfully!")
if __name__ == "__main__":
main()

View File

@@ -1,228 +0,0 @@
// Integration test to ensure benchmarks compile and basic functionality works
// This prevents benchmarks from breaking in CI
//
// UPDATED: Removed deprecated ToPdRequest usage, now uses direct JSON serialization
use serde_json::{from_str, to_string, to_value};
use sglang_router_rs::core::{BasicWorker, WorkerType};
use sglang_router_rs::protocols::spec::{
ChatCompletionRequest, ChatMessage, CompletionRequest, GenerateParameters, GenerateRequest,
SamplingParams, StringOrArray, UserMessageContent,
};
/// Create a default GenerateRequest for benchmarks with minimal fields set
fn default_generate_request() -> GenerateRequest {
GenerateRequest {
text: None,
prompt: None,
input_ids: None,
stream: false,
parameters: None,
sampling_params: None,
return_logprob: false,
// SGLang Extensions
lora_path: None,
session_params: None,
return_hidden_states: false,
rid: None,
}
}
/// Create a default ChatCompletionRequest for benchmarks with minimal fields set
fn default_chat_completion_request() -> ChatCompletionRequest {
ChatCompletionRequest {
model: String::new(),
messages: vec![],
max_tokens: None,
max_completion_tokens: None,
temperature: None,
top_p: None,
n: None,
stream: false,
stream_options: None,
stop: None,
presence_penalty: None,
frequency_penalty: None,
logit_bias: None,
logprobs: false,
top_logprobs: None,
user: None,
response_format: None,
seed: None,
tools: None,
tool_choice: None,
parallel_tool_calls: None,
function_call: None,
functions: None,
// SGLang Extensions
top_k: None,
min_p: None,
min_tokens: None,
repetition_penalty: None,
regex: None,
ebnf: None,
stop_token_ids: None,
no_stop_trim: false,
ignore_eos: false,
continue_final_message: false,
skip_special_tokens: true,
// SGLang Extensions
lora_path: None,
session_params: None,
separate_reasoning: true,
stream_reasoning: true,
return_hidden_states: false,
}
}
/// Create a default CompletionRequest for benchmarks with minimal fields set
fn default_completion_request() -> CompletionRequest {
CompletionRequest {
model: String::new(),
prompt: StringOrArray::String(String::new()),
suffix: None,
max_tokens: None,
temperature: None,
top_p: None,
n: None,
stream: false,
stream_options: None,
logprobs: None,
echo: false,
stop: None,
presence_penalty: None,
frequency_penalty: None,
best_of: None,
logit_bias: None,
user: None,
seed: None,
// SGLang Extensions
top_k: None,
min_p: None,
min_tokens: None,
repetition_penalty: None,
regex: None,
ebnf: None,
json_schema: None,
stop_token_ids: None,
no_stop_trim: false,
ignore_eos: false,
skip_special_tokens: true,
// SGLang Extensions
lora_path: None,
session_params: None,
return_hidden_states: false,
other: serde_json::Map::new(),
}
}
#[allow(dead_code)]
fn create_test_worker() -> BasicWorker {
BasicWorker::new(
"http://test-server:8000".to_string(),
WorkerType::Prefill {
bootstrap_port: Some(5678),
},
)
}
#[test]
fn test_benchmark_request_creation() {
// Ensure all benchmark request types can be created without panicking
let generate_req = GenerateRequest {
text: Some("Test prompt".to_string()),
parameters: Some(GenerateParameters {
max_new_tokens: Some(100),
temperature: Some(0.8),
top_p: Some(0.9),
top_k: Some(50),
repetition_penalty: Some(1.0),
..Default::default()
}),
sampling_params: Some(SamplingParams {
temperature: Some(0.8),
top_p: Some(0.9),
top_k: Some(50),
frequency_penalty: Some(0.0),
presence_penalty: Some(0.0),
repetition_penalty: Some(1.0),
..Default::default()
}),
..default_generate_request()
};
let chat_req = ChatCompletionRequest {
model: "test-model".to_string(),
messages: vec![ChatMessage::User {
role: "user".to_string(),
content: UserMessageContent::Text("Test message".to_string()),
name: None,
}],
max_tokens: Some(150),
max_completion_tokens: Some(150),
temperature: Some(0.7),
top_p: Some(1.0),
n: Some(1),
presence_penalty: Some(0.0),
frequency_penalty: Some(0.0),
parallel_tool_calls: Some(true),
..default_chat_completion_request()
};
let completion_req = CompletionRequest {
model: "test-model".to_string(),
prompt: StringOrArray::String("Test prompt".to_string()),
max_tokens: Some(50),
temperature: Some(0.8),
top_p: Some(1.0),
n: Some(1),
presence_penalty: Some(0.0),
frequency_penalty: Some(0.0),
best_of: Some(1),
..default_completion_request()
};
// Test serialization works
assert!(to_string(&generate_req).is_ok());
assert!(to_string(&chat_req).is_ok());
assert!(to_string(&completion_req).is_ok());
}
#[test]
fn test_benchmark_serialization_roundtrip() {
// Test serialization/deserialization roundtrip for benchmark types
let generate_req = GenerateRequest {
text: Some("Test prompt".to_string()),
..default_generate_request()
};
// Serialize and deserialize
let json = to_string(&generate_req).expect("Serialization should work");
let deserialized: GenerateRequest = from_str(&json).expect("Deserialization should work");
// Verify basic field equality
assert_eq!(generate_req.text, deserialized.text);
assert_eq!(generate_req.stream, deserialized.stream);
assert_eq!(generate_req.return_logprob, deserialized.return_logprob);
}
#[test]
fn test_benchmark_direct_json_routing() {
// Test direct JSON routing functionality for benchmark types (replaces regular routing)
let generate_req = GenerateRequest {
text: Some("Test prompt".to_string()),
..default_generate_request()
};
// Test direct JSON conversion (replaces regular routing methods)
let json = to_value(&generate_req).unwrap();
let json_string = to_string(&json).unwrap();
let bytes = json_string.as_bytes();
// Verify conversions work
assert!(!json_string.is_empty());
assert!(!bytes.is_empty());
}