[ci] add router benchmark script and CI (#7498)
This commit is contained in:
241
sgl-router/tests/benchmark_integration.rs
Normal file
241
sgl-router/tests/benchmark_integration.rs
Normal file
@@ -0,0 +1,241 @@
|
||||
// Integration test to ensure benchmarks compile and basic functionality works
|
||||
// This prevents benchmarks from breaking in CI
|
||||
|
||||
use serde_json::{from_str, to_string};
|
||||
use sglang_router_rs::openai_api_types::{
|
||||
ChatCompletionRequest, ChatMessage, CompletionRequest, GenerateParameters, GenerateRequest,
|
||||
SamplingParams, StringOrArray, UserMessageContent,
|
||||
};
|
||||
use sglang_router_rs::request_adapter::{RouteableRequest, ToPdRequest};
|
||||
|
||||
#[test]
|
||||
fn test_benchmark_request_creation() {
|
||||
// Ensure all benchmark request types can be created without panicking
|
||||
|
||||
let generate_req = GenerateRequest {
|
||||
text: Some("Test prompt".to_string()),
|
||||
input_ids: None,
|
||||
prompt: None,
|
||||
parameters: Some(GenerateParameters {
|
||||
max_new_tokens: Some(100),
|
||||
temperature: Some(0.8),
|
||||
top_p: Some(0.9),
|
||||
top_k: Some(50),
|
||||
repetition_penalty: Some(1.0),
|
||||
..Default::default()
|
||||
}),
|
||||
sampling_params: Some(SamplingParams {
|
||||
temperature: Some(0.8),
|
||||
top_p: Some(0.9),
|
||||
top_k: Some(50),
|
||||
frequency_penalty: Some(0.0),
|
||||
presence_penalty: Some(0.0),
|
||||
repetition_penalty: Some(1.0),
|
||||
..Default::default()
|
||||
}),
|
||||
stream: false,
|
||||
return_logprob: false,
|
||||
};
|
||||
|
||||
let chat_req = ChatCompletionRequest {
|
||||
model: "test-model".to_string(),
|
||||
messages: vec![ChatMessage::User {
|
||||
role: "user".to_string(),
|
||||
content: UserMessageContent::Text("Test message".to_string()),
|
||||
name: None,
|
||||
}],
|
||||
max_tokens: Some(150),
|
||||
max_completion_tokens: Some(150),
|
||||
temperature: Some(0.7),
|
||||
top_p: Some(1.0),
|
||||
n: Some(1),
|
||||
stream: false,
|
||||
stop: None,
|
||||
presence_penalty: Some(0.0),
|
||||
frequency_penalty: Some(0.0),
|
||||
logit_bias: None,
|
||||
logprobs: false,
|
||||
top_logprobs: None,
|
||||
user: None,
|
||||
response_format: None,
|
||||
seed: None,
|
||||
tools: None,
|
||||
tool_choice: None,
|
||||
parallel_tool_calls: Some(true),
|
||||
function_call: None,
|
||||
functions: None,
|
||||
};
|
||||
|
||||
let completion_req = CompletionRequest {
|
||||
model: "test-model".to_string(),
|
||||
prompt: StringOrArray::String("Test prompt".to_string()),
|
||||
suffix: None,
|
||||
max_tokens: Some(50),
|
||||
temperature: Some(0.8),
|
||||
top_p: Some(1.0),
|
||||
n: Some(1),
|
||||
stream: false,
|
||||
logprobs: None,
|
||||
echo: false,
|
||||
stop: None,
|
||||
presence_penalty: Some(0.0),
|
||||
frequency_penalty: Some(0.0),
|
||||
best_of: Some(1),
|
||||
logit_bias: None,
|
||||
user: None,
|
||||
seed: None,
|
||||
};
|
||||
|
||||
// Test serialization works
|
||||
assert!(to_string(&generate_req).is_ok());
|
||||
assert!(to_string(&chat_req).is_ok());
|
||||
assert!(to_string(&completion_req).is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_benchmark_serialization_roundtrip() {
|
||||
// Test serialization/deserialization roundtrip for benchmark types
|
||||
|
||||
let generate_req = GenerateRequest {
|
||||
text: Some("Test prompt".to_string()),
|
||||
input_ids: None,
|
||||
prompt: None,
|
||||
parameters: None,
|
||||
sampling_params: None,
|
||||
stream: false,
|
||||
return_logprob: false,
|
||||
};
|
||||
|
||||
// Serialize and deserialize
|
||||
let json = to_string(&generate_req).expect("Serialization should work");
|
||||
let deserialized: GenerateRequest = from_str(&json).expect("Deserialization should work");
|
||||
|
||||
// Verify basic field equality
|
||||
assert_eq!(generate_req.text, deserialized.text);
|
||||
assert_eq!(generate_req.stream, deserialized.stream);
|
||||
assert_eq!(generate_req.return_logprob, deserialized.return_logprob);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_benchmark_request_adaptation() {
|
||||
// Test that PD request adaptation works for benchmark types
|
||||
|
||||
let generate_req = GenerateRequest {
|
||||
text: Some("Test prompt".to_string()),
|
||||
input_ids: None,
|
||||
prompt: None,
|
||||
parameters: None,
|
||||
sampling_params: None,
|
||||
stream: false,
|
||||
return_logprob: false,
|
||||
};
|
||||
|
||||
let chat_req = ChatCompletionRequest {
|
||||
model: "test-model".to_string(),
|
||||
messages: vec![ChatMessage::User {
|
||||
role: "user".to_string(),
|
||||
content: UserMessageContent::Text("Test message".to_string()),
|
||||
name: None,
|
||||
}],
|
||||
max_tokens: Some(150),
|
||||
max_completion_tokens: Some(150),
|
||||
temperature: Some(0.7),
|
||||
top_p: Some(1.0),
|
||||
n: Some(1),
|
||||
stream: false,
|
||||
stop: None,
|
||||
presence_penalty: Some(0.0),
|
||||
frequency_penalty: Some(0.0),
|
||||
logit_bias: None,
|
||||
logprobs: false,
|
||||
top_logprobs: None,
|
||||
user: None,
|
||||
response_format: None,
|
||||
seed: None,
|
||||
tools: None,
|
||||
tool_choice: None,
|
||||
parallel_tool_calls: Some(true),
|
||||
function_call: None,
|
||||
functions: None,
|
||||
};
|
||||
|
||||
let completion_req = CompletionRequest {
|
||||
model: "test-model".to_string(),
|
||||
prompt: StringOrArray::String("Test prompt".to_string()),
|
||||
suffix: None,
|
||||
max_tokens: Some(50),
|
||||
temperature: Some(0.8),
|
||||
top_p: Some(1.0),
|
||||
n: Some(1),
|
||||
stream: false,
|
||||
logprobs: None,
|
||||
echo: false,
|
||||
stop: None,
|
||||
presence_penalty: Some(0.0),
|
||||
frequency_penalty: Some(0.0),
|
||||
best_of: Some(1),
|
||||
logit_bias: None,
|
||||
user: None,
|
||||
seed: None,
|
||||
};
|
||||
|
||||
// Test PD adaptation (should not panic)
|
||||
let _pd_generate = generate_req.to_pd_request();
|
||||
let _pd_chat = chat_req.to_pd_request();
|
||||
let _pd_completion = completion_req.to_pd_request();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_benchmark_regular_routing() {
|
||||
// Test regular routing functionality for benchmark types
|
||||
|
||||
let generate_req = GenerateRequest {
|
||||
text: Some("Test prompt".to_string()),
|
||||
input_ids: None,
|
||||
prompt: None,
|
||||
parameters: None,
|
||||
sampling_params: None,
|
||||
stream: false,
|
||||
return_logprob: false,
|
||||
};
|
||||
|
||||
// Test regular routing methods (should not panic)
|
||||
let _json = generate_req.to_json();
|
||||
let _bytes = generate_req.to_bytes();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_benchmark_performance_baseline() {
|
||||
// Basic performance sanity check - ensure operations complete quickly
|
||||
use std::time::Instant;
|
||||
|
||||
let generate_req = GenerateRequest {
|
||||
text: Some("Short test prompt".to_string()),
|
||||
input_ids: None,
|
||||
prompt: None,
|
||||
parameters: None,
|
||||
sampling_params: None,
|
||||
stream: false,
|
||||
return_logprob: false,
|
||||
};
|
||||
|
||||
// Serialization should be fast (< 1ms for simple requests)
|
||||
let start = Instant::now();
|
||||
let _json = to_string(&generate_req).unwrap();
|
||||
let serialize_duration = start.elapsed();
|
||||
assert!(
|
||||
serialize_duration.as_millis() < 1,
|
||||
"Serialization took too long: {:?}",
|
||||
serialize_duration
|
||||
);
|
||||
|
||||
// PD adaptation should be very fast (< 1ms)
|
||||
let start = Instant::now();
|
||||
let _pd_req = generate_req.to_pd_request();
|
||||
let adapt_duration = start.elapsed();
|
||||
assert!(
|
||||
adapt_duration.as_millis() < 1,
|
||||
"PD adaptation took too long: {:?}",
|
||||
adapt_duration
|
||||
);
|
||||
}
|
||||
Reference in New Issue
Block a user