Files
sglang/sgl-router/src/config/validation.rs

497 lines
17 KiB
Rust
Raw Normal View History

use super::*;
/// Configuration validator
pub struct ConfigValidator;
impl ConfigValidator {
/// Validate a complete router configuration
pub fn validate(config: &RouterConfig) -> ConfigResult<()> {
// Check if service discovery is enabled
let has_service_discovery = config.discovery.as_ref().map_or(false, |d| d.enabled);
Self::validate_mode(&config.mode, has_service_discovery)?;
Self::validate_policy(&config.policy)?;
Self::validate_server_settings(config)?;
if let Some(discovery) = &config.discovery {
Self::validate_discovery(discovery, &config.mode)?;
}
if let Some(metrics) = &config.metrics {
Self::validate_metrics(metrics)?;
}
Self::validate_compatibility(config)?;
Ok(())
}
/// Validate routing mode configuration
fn validate_mode(mode: &RoutingMode, has_service_discovery: bool) -> ConfigResult<()> {
match mode {
RoutingMode::Regular { worker_urls } => {
// Validate URLs if any are provided
if !worker_urls.is_empty() {
Self::validate_urls(worker_urls)?;
}
// Note: We allow empty worker URLs even without service discovery
// to let the router start and fail at runtime when routing requests.
// This matches legacy behavior and test expectations.
}
RoutingMode::PrefillDecode {
prefill_urls,
decode_urls,
} => {
// Only require URLs if service discovery is disabled
if !has_service_discovery {
if prefill_urls.is_empty() {
return Err(ConfigError::ValidationFailed {
reason: "PD mode requires at least one prefill worker URL".to_string(),
});
}
if decode_urls.is_empty() {
return Err(ConfigError::ValidationFailed {
reason: "PD mode requires at least one decode worker URL".to_string(),
});
}
}
// Validate URLs if any are provided
if !prefill_urls.is_empty() {
let prefill_url_strings: Vec<String> =
prefill_urls.iter().map(|(url, _)| url.clone()).collect();
Self::validate_urls(&prefill_url_strings)?;
}
if !decode_urls.is_empty() {
Self::validate_urls(decode_urls)?;
}
// Validate bootstrap ports
for (_url, port) in prefill_urls {
if let Some(port) = port {
if *port == 0 {
return Err(ConfigError::InvalidValue {
field: "bootstrap_port".to_string(),
value: port.to_string(),
reason: "Port must be between 1 and 65535".to_string(),
});
}
}
}
}
}
Ok(())
}
/// Validate policy configuration
fn validate_policy(policy: &PolicyConfig) -> ConfigResult<()> {
match policy {
PolicyConfig::Random | PolicyConfig::RoundRobin => {
// No specific validation needed
}
PolicyConfig::CacheAware {
cache_threshold,
balance_abs_threshold: _,
balance_rel_threshold,
eviction_interval_secs,
max_tree_size,
} => {
if !(0.0..=1.0).contains(cache_threshold) {
return Err(ConfigError::InvalidValue {
field: "cache_threshold".to_string(),
value: cache_threshold.to_string(),
reason: "Must be between 0.0 and 1.0".to_string(),
});
}
if *balance_rel_threshold < 1.0 {
return Err(ConfigError::InvalidValue {
field: "balance_rel_threshold".to_string(),
value: balance_rel_threshold.to_string(),
reason: "Must be >= 1.0".to_string(),
});
}
if *eviction_interval_secs == 0 {
return Err(ConfigError::InvalidValue {
field: "eviction_interval_secs".to_string(),
value: eviction_interval_secs.to_string(),
reason: "Must be > 0".to_string(),
});
}
if *max_tree_size == 0 {
return Err(ConfigError::InvalidValue {
field: "max_tree_size".to_string(),
value: max_tree_size.to_string(),
reason: "Must be > 0".to_string(),
});
}
}
PolicyConfig::PowerOfTwo {
load_check_interval_secs,
} => {
if *load_check_interval_secs == 0 {
return Err(ConfigError::InvalidValue {
field: "load_check_interval_secs".to_string(),
value: load_check_interval_secs.to_string(),
reason: "Must be > 0".to_string(),
});
}
}
}
Ok(())
}
/// Validate server configuration
fn validate_server_settings(config: &RouterConfig) -> ConfigResult<()> {
if config.port == 0 {
return Err(ConfigError::InvalidValue {
field: "port".to_string(),
value: config.port.to_string(),
reason: "Port must be > 0".to_string(),
});
}
if config.max_payload_size == 0 {
return Err(ConfigError::InvalidValue {
field: "max_payload_size".to_string(),
value: config.max_payload_size.to_string(),
reason: "Must be > 0".to_string(),
});
}
if config.request_timeout_secs == 0 {
return Err(ConfigError::InvalidValue {
field: "request_timeout_secs".to_string(),
value: config.request_timeout_secs.to_string(),
reason: "Must be > 0".to_string(),
});
}
if config.worker_startup_timeout_secs == 0 {
return Err(ConfigError::InvalidValue {
field: "worker_startup_timeout_secs".to_string(),
value: config.worker_startup_timeout_secs.to_string(),
reason: "Must be > 0".to_string(),
});
}
if config.worker_startup_check_interval_secs == 0 {
return Err(ConfigError::InvalidValue {
field: "worker_startup_check_interval_secs".to_string(),
value: config.worker_startup_check_interval_secs.to_string(),
reason: "Must be > 0".to_string(),
});
}
Ok(())
}
/// Validate service discovery configuration
fn validate_discovery(discovery: &DiscoveryConfig, mode: &RoutingMode) -> ConfigResult<()> {
if !discovery.enabled {
return Ok(()); // No validation needed if disabled
}
if discovery.port == 0 {
return Err(ConfigError::InvalidValue {
field: "discovery.port".to_string(),
value: discovery.port.to_string(),
reason: "Port must be > 0".to_string(),
});
}
if discovery.check_interval_secs == 0 {
return Err(ConfigError::InvalidValue {
field: "discovery.check_interval_secs".to_string(),
value: discovery.check_interval_secs.to_string(),
reason: "Must be > 0".to_string(),
});
}
// Validate selectors based on mode
match mode {
RoutingMode::Regular { .. } => {
if discovery.selector.is_empty() {
return Err(ConfigError::ValidationFailed {
reason: "Regular mode with service discovery requires a non-empty selector"
.to_string(),
});
}
}
RoutingMode::PrefillDecode { .. } => {
if discovery.prefill_selector.is_empty() && discovery.decode_selector.is_empty() {
return Err(ConfigError::ValidationFailed {
reason: "PD mode with service discovery requires at least one non-empty selector (prefill or decode)".to_string(),
});
}
}
}
Ok(())
}
/// Validate metrics configuration
fn validate_metrics(metrics: &MetricsConfig) -> ConfigResult<()> {
if metrics.port == 0 {
return Err(ConfigError::InvalidValue {
field: "metrics.port".to_string(),
value: metrics.port.to_string(),
reason: "Port must be > 0".to_string(),
});
}
if metrics.host.is_empty() {
return Err(ConfigError::InvalidValue {
field: "metrics.host".to_string(),
value: metrics.host.clone(),
reason: "Host cannot be empty".to_string(),
});
}
Ok(())
}
/// Validate compatibility between different configuration sections
fn validate_compatibility(config: &RouterConfig) -> ConfigResult<()> {
// Check mode and policy compatibility
match (&config.mode, &config.policy) {
(RoutingMode::Regular { .. }, PolicyConfig::PowerOfTwo { .. }) => {
// PowerOfTwo is only supported in PD mode
return Err(ConfigError::IncompatibleConfig {
reason: "PowerOfTwo policy is only supported in PD disaggregated mode"
.to_string(),
});
}
(RoutingMode::PrefillDecode { .. }, PolicyConfig::RoundRobin) => {
return Err(ConfigError::IncompatibleConfig {
reason: "RoundRobin policy is not supported in PD disaggregated mode"
.to_string(),
});
}
_ => {}
}
// Check if service discovery is enabled for worker count validation
let has_service_discovery = config.discovery.as_ref().map_or(false, |d| d.enabled);
// Only validate worker counts if service discovery is disabled
if !has_service_discovery {
// Check if power-of-two policy makes sense with insufficient workers
if let PolicyConfig::PowerOfTwo { .. } = &config.policy {
let worker_count = config.mode.worker_count();
if worker_count < 2 {
return Err(ConfigError::IncompatibleConfig {
reason: "Power-of-two policy requires at least 2 workers".to_string(),
});
}
}
}
Ok(())
}
/// Validate URL format
fn validate_urls(urls: &[String]) -> ConfigResult<()> {
for url in urls {
if url.is_empty() {
return Err(ConfigError::InvalidValue {
field: "worker_url".to_string(),
value: url.clone(),
reason: "URL cannot be empty".to_string(),
});
}
if !url.starts_with("http://") && !url.starts_with("https://") {
return Err(ConfigError::InvalidValue {
field: "worker_url".to_string(),
value: url.clone(),
reason: "URL must start with http:// or https://".to_string(),
});
}
// Basic URL validation
match ::url::Url::parse(url) {
Ok(parsed) => {
// Additional validation
if parsed.host_str().is_none() {
return Err(ConfigError::InvalidValue {
field: "worker_url".to_string(),
value: url.clone(),
reason: "URL must have a valid host".to_string(),
});
}
}
Err(e) => {
return Err(ConfigError::InvalidValue {
field: "worker_url".to_string(),
value: url.clone(),
reason: format!("Invalid URL format: {}", e),
});
}
}
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_validate_regular_mode() {
let config = RouterConfig::new(
RoutingMode::Regular {
worker_urls: vec!["http://worker:8000".to_string()],
},
PolicyConfig::Random,
);
assert!(ConfigValidator::validate(&config).is_ok());
}
#[test]
fn test_validate_empty_worker_urls() {
let config = RouterConfig::new(
RoutingMode::Regular {
worker_urls: vec![],
},
PolicyConfig::Random,
);
// Empty worker URLs are now allowed to match legacy behavior
assert!(ConfigValidator::validate(&config).is_ok());
}
#[test]
fn test_validate_empty_worker_urls_with_service_discovery() {
let mut config = RouterConfig::new(
RoutingMode::Regular {
worker_urls: vec![],
},
PolicyConfig::Random,
);
// Enable service discovery
config.discovery = Some(DiscoveryConfig {
enabled: true,
selector: vec![("app".to_string(), "test".to_string())]
.into_iter()
.collect(),
..Default::default()
});
// Should pass validation since service discovery is enabled
assert!(ConfigValidator::validate(&config).is_ok());
}
#[test]
fn test_validate_invalid_urls() {
let config = RouterConfig::new(
RoutingMode::Regular {
worker_urls: vec!["invalid-url".to_string()],
},
PolicyConfig::Random,
);
assert!(ConfigValidator::validate(&config).is_err());
}
#[test]
fn test_validate_cache_aware_thresholds() {
let config = RouterConfig::new(
RoutingMode::Regular {
worker_urls: vec![
"http://worker1:8000".to_string(),
"http://worker2:8000".to_string(),
],
},
PolicyConfig::CacheAware {
cache_threshold: 1.5, // Invalid: > 1.0
balance_abs_threshold: 32,
balance_rel_threshold: 1.1,
eviction_interval_secs: 60,
max_tree_size: 1000,
},
);
assert!(ConfigValidator::validate(&config).is_err());
}
#[test]
fn test_validate_cache_aware_single_worker() {
// Cache-aware with single worker should be allowed (even if not optimal)
let config = RouterConfig::new(
RoutingMode::Regular {
worker_urls: vec!["http://worker1:8000".to_string()],
},
PolicyConfig::CacheAware {
cache_threshold: 0.5,
balance_abs_threshold: 32,
balance_rel_threshold: 1.1,
eviction_interval_secs: 60,
max_tree_size: 1000,
},
);
assert!(ConfigValidator::validate(&config).is_ok());
}
#[test]
fn test_validate_pd_mode() {
let config = RouterConfig::new(
RoutingMode::PrefillDecode {
prefill_urls: vec![("http://prefill:8000".to_string(), Some(8081))],
decode_urls: vec!["http://decode:8000".to_string()],
},
PolicyConfig::Random,
);
assert!(ConfigValidator::validate(&config).is_ok());
}
#[test]
fn test_validate_incompatible_policy() {
// RoundRobin with PD mode
let config = RouterConfig::new(
RoutingMode::PrefillDecode {
prefill_urls: vec![("http://prefill:8000".to_string(), None)],
decode_urls: vec!["http://decode:8000".to_string()],
},
PolicyConfig::RoundRobin,
);
let result = ConfigValidator::validate(&config);
assert!(result.is_err());
assert!(result
.unwrap_err()
.to_string()
.contains("RoundRobin policy is not supported in PD disaggregated mode"));
}
#[test]
fn test_validate_power_of_two_with_regular_mode() {
// PowerOfTwo with Regular mode should fail
let config = RouterConfig::new(
RoutingMode::Regular {
worker_urls: vec![
"http://worker1:8000".to_string(),
"http://worker2:8000".to_string(),
],
},
PolicyConfig::PowerOfTwo {
load_check_interval_secs: 60,
},
);
let result = ConfigValidator::validate(&config);
assert!(result.is_err());
assert!(result
.unwrap_err()
.to_string()
.contains("PowerOfTwo policy is only supported in PD disaggregated mode"));
}
}