[router] allow more health check configuration (#9198)

This commit is contained in:
Simo Lin
2025-08-15 08:07:45 -07:00
committed by GitHub
parent 0c8594e67d
commit 21b8846066
15 changed files with 398 additions and 166 deletions

View File

@@ -1,7 +1,7 @@
use clap::{ArgAction, Parser};
use sglang_router_rs::config::{
CircuitBreakerConfig, ConfigError, ConfigResult, DiscoveryConfig, MetricsConfig, PolicyConfig,
RetryConfig, RouterConfig, RoutingMode,
CircuitBreakerConfig, ConfigError, ConfigResult, DiscoveryConfig, HealthCheckConfig,
MetricsConfig, PolicyConfig, RetryConfig, RouterConfig, RoutingMode,
};
use sglang_router_rs::metrics::PrometheusConfig;
use sglang_router_rs::server::{self, ServerConfig};
@@ -105,35 +105,35 @@ struct CliArgs {
decode_policy: Option<String>,
/// Timeout in seconds for worker startup
#[arg(long, default_value_t = 300)]
#[arg(long, default_value_t = 600)]
worker_startup_timeout_secs: u64,
/// Interval in seconds between checks for worker startup
#[arg(long, default_value_t = 10)]
#[arg(long, default_value_t = 30)]
worker_startup_check_interval: u64,
/// Cache threshold (0.0-1.0) for cache-aware routing
#[arg(long, default_value_t = 0.5)]
#[arg(long, default_value_t = 0.3)]
cache_threshold: f32,
/// Absolute threshold for load balancing
#[arg(long, default_value_t = 32)]
#[arg(long, default_value_t = 64)]
balance_abs_threshold: usize,
/// Relative threshold for load balancing
#[arg(long, default_value_t = 1.0001)]
#[arg(long, default_value_t = 1.5)]
balance_rel_threshold: f32,
/// Interval in seconds between cache eviction operations
#[arg(long, default_value_t = 60)]
#[arg(long, default_value_t = 120)]
eviction_interval: u64,
/// Maximum size of the approximation tree for cache-aware routing
#[arg(long, default_value_t = 16777216)] // 2^24
#[arg(long, default_value_t = 67108864)] // 2^26
max_tree_size: usize,
/// Maximum payload size in bytes
#[arg(long, default_value_t = 268435456)] // 256MB
#[arg(long, default_value_t = 536870912)] // 512MB
max_payload_size: usize,
/// Enable data parallelism aware schedule
@@ -189,11 +189,11 @@ struct CliArgs {
request_id_headers: Vec<String>,
/// Request timeout in seconds
#[arg(long, default_value_t = 600)]
#[arg(long, default_value_t = 1800)]
request_timeout_secs: u64,
/// Maximum number of concurrent requests allowed
#[arg(long, default_value_t = 64)]
#[arg(long, default_value_t = 256)]
max_concurrent_requests: usize,
/// CORS allowed origins
@@ -202,23 +202,23 @@ struct CliArgs {
// Retry configuration
/// Maximum number of retries
#[arg(long, default_value_t = 3)]
#[arg(long, default_value_t = 5)]
retry_max_retries: u32,
/// Initial backoff in milliseconds for retries
#[arg(long, default_value_t = 100)]
#[arg(long, default_value_t = 50)]
retry_initial_backoff_ms: u64,
/// Maximum backoff in milliseconds for retries
#[arg(long, default_value_t = 10000)]
#[arg(long, default_value_t = 30000)]
retry_max_backoff_ms: u64,
/// Backoff multiplier for exponential backoff
#[arg(long, default_value_t = 2.0)]
#[arg(long, default_value_t = 1.5)]
retry_backoff_multiplier: f32,
/// Jitter factor for retry backoff
#[arg(long, default_value_t = 0.1)]
#[arg(long, default_value_t = 0.2)]
retry_jitter_factor: f32,
/// Disable retries
@@ -227,24 +227,45 @@ struct CliArgs {
// Circuit breaker configuration
/// Number of failures before circuit breaker opens
#[arg(long, default_value_t = 5)]
#[arg(long, default_value_t = 10)]
cb_failure_threshold: u32,
/// Number of successes before circuit breaker closes
#[arg(long, default_value_t = 2)]
#[arg(long, default_value_t = 3)]
cb_success_threshold: u32,
/// Timeout duration in seconds for circuit breaker
#[arg(long, default_value_t = 30)]
#[arg(long, default_value_t = 60)]
cb_timeout_duration_secs: u64,
/// Window duration in seconds for circuit breaker
#[arg(long, default_value_t = 60)]
#[arg(long, default_value_t = 120)]
cb_window_duration_secs: u64,
/// Disable circuit breaker
#[arg(long, default_value_t = false)]
disable_circuit_breaker: bool,
// Health check configuration
/// Number of consecutive health check failures before marking worker unhealthy
#[arg(long, default_value_t = 3)]
health_failure_threshold: u32,
/// Number of consecutive health check successes before marking worker healthy
#[arg(long, default_value_t = 2)]
health_success_threshold: u32,
/// Timeout in seconds for health check requests
#[arg(long, default_value_t = 5)]
health_check_timeout_secs: u64,
/// Interval in seconds between runtime health checks
#[arg(long, default_value_t = 60)]
health_check_interval_secs: u64,
/// Health check endpoint path
#[arg(long, default_value = "/health")]
health_check_endpoint: String,
}
impl CliArgs {
@@ -378,6 +399,13 @@ impl CliArgs {
},
disable_retries: self.disable_retries,
disable_circuit_breaker: self.disable_circuit_breaker,
health_check: HealthCheckConfig {
failure_threshold: self.health_failure_threshold,
success_threshold: self.health_success_threshold,
timeout_secs: self.health_check_timeout_secs,
check_interval_secs: self.health_check_interval_secs,
endpoint: self.health_check_endpoint.clone(),
},
})
}