[router] PD Router Simplification and Reorganization (#8838)
This commit is contained in:
@@ -1,12 +1,15 @@
|
||||
// Integration test to ensure benchmarks compile and basic functionality works
|
||||
// This prevents benchmarks from breaking in CI
|
||||
//
|
||||
// UPDATED: Removed deprecated ToPdRequest usage, now uses direct JSON serialization
|
||||
|
||||
use serde_json::{from_str, to_string};
|
||||
use serde_json::{from_str, to_string, to_value};
|
||||
use sglang_router_rs::core::{BasicWorker, WorkerType};
|
||||
use sglang_router_rs::openai_api_types::{
|
||||
ChatCompletionRequest, ChatMessage, CompletionRequest, GenerateParameters, GenerateRequest,
|
||||
SamplingParams, StringOrArray, UserMessageContent,
|
||||
};
|
||||
use sglang_router_rs::routers::request_adapter::{RouteableRequest, ToPdRequest};
|
||||
use sglang_router_rs::routers::bootstrap_injector::inject_bootstrap_fields;
|
||||
|
||||
/// Create a default GenerateRequest for benchmarks with minimal fields set
|
||||
fn default_generate_request() -> GenerateRequest {
|
||||
@@ -114,6 +117,15 @@ fn default_completion_request() -> CompletionRequest {
|
||||
}
|
||||
}
|
||||
|
||||
fn create_test_worker() -> BasicWorker {
|
||||
BasicWorker::new(
|
||||
"http://test-server:8000".to_string(),
|
||||
WorkerType::Prefill {
|
||||
bootstrap_port: Some(5678),
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_benchmark_request_creation() {
|
||||
// Ensure all benchmark request types can be created without panicking
|
||||
@@ -197,8 +209,8 @@ fn test_benchmark_serialization_roundtrip() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_benchmark_request_adaptation() {
|
||||
// Test that PD request adaptation works for benchmark types
|
||||
fn test_benchmark_bootstrap_injection() {
|
||||
// Test that bootstrap injection works for benchmark types (replaces PD request adaptation)
|
||||
|
||||
let generate_req = GenerateRequest {
|
||||
text: Some("Test prompt".to_string()),
|
||||
@@ -236,24 +248,40 @@ fn test_benchmark_request_adaptation() {
|
||||
..default_completion_request()
|
||||
};
|
||||
|
||||
// Test PD adaptation (should not panic)
|
||||
let _pd_generate = generate_req.to_pd_request();
|
||||
let _pd_chat = chat_req.to_pd_request();
|
||||
let _pd_completion = completion_req.to_pd_request();
|
||||
let worker = create_test_worker();
|
||||
|
||||
// Test bootstrap injection (should not panic)
|
||||
let mut generate_json = to_value(&generate_req).unwrap();
|
||||
let mut chat_json = to_value(&chat_req).unwrap();
|
||||
let mut completion_json = to_value(&completion_req).unwrap();
|
||||
|
||||
assert!(inject_bootstrap_fields(&mut generate_json, &worker).is_ok());
|
||||
assert!(inject_bootstrap_fields(&mut chat_json, &worker).is_ok());
|
||||
assert!(inject_bootstrap_fields(&mut completion_json, &worker).is_ok());
|
||||
|
||||
// Verify bootstrap fields were added
|
||||
assert!(generate_json.get("bootstrap_host").is_some());
|
||||
assert!(generate_json.get("bootstrap_port").is_some());
|
||||
assert!(generate_json.get("bootstrap_room").is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_benchmark_regular_routing() {
|
||||
// Test regular routing functionality for benchmark types
|
||||
fn test_benchmark_direct_json_routing() {
|
||||
// Test direct JSON routing functionality for benchmark types (replaces regular routing)
|
||||
|
||||
let generate_req = GenerateRequest {
|
||||
text: Some("Test prompt".to_string()),
|
||||
..default_generate_request()
|
||||
};
|
||||
|
||||
// Test regular routing methods (should not panic)
|
||||
let _json = generate_req.to_json();
|
||||
let _bytes = generate_req.to_bytes();
|
||||
// Test direct JSON conversion (replaces regular routing methods)
|
||||
let json = to_value(&generate_req).unwrap();
|
||||
let json_string = to_string(&json).unwrap();
|
||||
let bytes = json_string.as_bytes();
|
||||
|
||||
// Verify conversions work
|
||||
assert!(!json_string.is_empty());
|
||||
assert!(!bytes.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -266,23 +294,36 @@ fn test_benchmark_performance_baseline() {
|
||||
..default_generate_request()
|
||||
};
|
||||
|
||||
// Serialization should be fast (< 1ms for simple requests)
|
||||
// Test the actual simplified pipeline: to_value + bootstrap injection
|
||||
let start = Instant::now();
|
||||
let _json = to_string(&generate_req).unwrap();
|
||||
let serialize_duration = start.elapsed();
|
||||
let worker = create_test_worker();
|
||||
|
||||
// This mirrors the actual router pipeline
|
||||
let mut json = to_value(&generate_req).unwrap();
|
||||
let _ = inject_bootstrap_fields(&mut json, &worker);
|
||||
|
||||
let total_duration = start.elapsed();
|
||||
assert!(
|
||||
serialize_duration.as_millis() < 1,
|
||||
"Serialization took too long: {:?}",
|
||||
serialize_duration
|
||||
total_duration.as_millis() < 5,
|
||||
"Simplified pipeline took too long: {:?} (should be faster than old adapter approach)",
|
||||
total_duration
|
||||
);
|
||||
|
||||
// PD adaptation should be very fast (< 1ms)
|
||||
// Individual components should also be fast
|
||||
let start = Instant::now();
|
||||
let _pd_req = generate_req.to_pd_request();
|
||||
let adapt_duration = start.elapsed();
|
||||
let _json = to_value(&generate_req).unwrap();
|
||||
let to_value_duration = start.elapsed();
|
||||
|
||||
let start = Instant::now();
|
||||
let mut json = to_value(&generate_req).unwrap();
|
||||
let _ = inject_bootstrap_fields(&mut json, &worker);
|
||||
let inject_duration = start.elapsed();
|
||||
|
||||
// Bootstrap injection should be faster than the JSON conversion
|
||||
assert!(
|
||||
adapt_duration.as_millis() < 1,
|
||||
"PD adaptation took too long: {:?}",
|
||||
adapt_duration
|
||||
inject_duration <= to_value_duration * 3,
|
||||
"Bootstrap injection ({:?}) should not be much slower than JSON conversion ({:?})",
|
||||
inject_duration,
|
||||
to_value_duration
|
||||
);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user