[router] allow more health check configuration (#9198)
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
use clap::{ArgAction, Parser};
|
||||
use sglang_router_rs::config::{
|
||||
CircuitBreakerConfig, ConfigError, ConfigResult, DiscoveryConfig, MetricsConfig, PolicyConfig,
|
||||
RetryConfig, RouterConfig, RoutingMode,
|
||||
CircuitBreakerConfig, ConfigError, ConfigResult, DiscoveryConfig, HealthCheckConfig,
|
||||
MetricsConfig, PolicyConfig, RetryConfig, RouterConfig, RoutingMode,
|
||||
};
|
||||
use sglang_router_rs::metrics::PrometheusConfig;
|
||||
use sglang_router_rs::server::{self, ServerConfig};
|
||||
@@ -105,35 +105,35 @@ struct CliArgs {
|
||||
decode_policy: Option<String>,
|
||||
|
||||
/// Timeout in seconds for worker startup
|
||||
#[arg(long, default_value_t = 300)]
|
||||
#[arg(long, default_value_t = 600)]
|
||||
worker_startup_timeout_secs: u64,
|
||||
|
||||
/// Interval in seconds between checks for worker startup
|
||||
#[arg(long, default_value_t = 10)]
|
||||
#[arg(long, default_value_t = 30)]
|
||||
worker_startup_check_interval: u64,
|
||||
|
||||
/// Cache threshold (0.0-1.0) for cache-aware routing
|
||||
#[arg(long, default_value_t = 0.5)]
|
||||
#[arg(long, default_value_t = 0.3)]
|
||||
cache_threshold: f32,
|
||||
|
||||
/// Absolute threshold for load balancing
|
||||
#[arg(long, default_value_t = 32)]
|
||||
#[arg(long, default_value_t = 64)]
|
||||
balance_abs_threshold: usize,
|
||||
|
||||
/// Relative threshold for load balancing
|
||||
#[arg(long, default_value_t = 1.0001)]
|
||||
#[arg(long, default_value_t = 1.5)]
|
||||
balance_rel_threshold: f32,
|
||||
|
||||
/// Interval in seconds between cache eviction operations
|
||||
#[arg(long, default_value_t = 60)]
|
||||
#[arg(long, default_value_t = 120)]
|
||||
eviction_interval: u64,
|
||||
|
||||
/// Maximum size of the approximation tree for cache-aware routing
|
||||
#[arg(long, default_value_t = 16777216)] // 2^24
|
||||
#[arg(long, default_value_t = 67108864)] // 2^26
|
||||
max_tree_size: usize,
|
||||
|
||||
/// Maximum payload size in bytes
|
||||
#[arg(long, default_value_t = 268435456)] // 256MB
|
||||
#[arg(long, default_value_t = 536870912)] // 512MB
|
||||
max_payload_size: usize,
|
||||
|
||||
/// Enable data parallelism aware schedule
|
||||
@@ -189,11 +189,11 @@ struct CliArgs {
|
||||
request_id_headers: Vec<String>,
|
||||
|
||||
/// Request timeout in seconds
|
||||
#[arg(long, default_value_t = 600)]
|
||||
#[arg(long, default_value_t = 1800)]
|
||||
request_timeout_secs: u64,
|
||||
|
||||
/// Maximum number of concurrent requests allowed
|
||||
#[arg(long, default_value_t = 64)]
|
||||
#[arg(long, default_value_t = 256)]
|
||||
max_concurrent_requests: usize,
|
||||
|
||||
/// CORS allowed origins
|
||||
@@ -202,23 +202,23 @@ struct CliArgs {
|
||||
|
||||
// Retry configuration
|
||||
/// Maximum number of retries
|
||||
#[arg(long, default_value_t = 3)]
|
||||
#[arg(long, default_value_t = 5)]
|
||||
retry_max_retries: u32,
|
||||
|
||||
/// Initial backoff in milliseconds for retries
|
||||
#[arg(long, default_value_t = 100)]
|
||||
#[arg(long, default_value_t = 50)]
|
||||
retry_initial_backoff_ms: u64,
|
||||
|
||||
/// Maximum backoff in milliseconds for retries
|
||||
#[arg(long, default_value_t = 10000)]
|
||||
#[arg(long, default_value_t = 30000)]
|
||||
retry_max_backoff_ms: u64,
|
||||
|
||||
/// Backoff multiplier for exponential backoff
|
||||
#[arg(long, default_value_t = 2.0)]
|
||||
#[arg(long, default_value_t = 1.5)]
|
||||
retry_backoff_multiplier: f32,
|
||||
|
||||
/// Jitter factor for retry backoff
|
||||
#[arg(long, default_value_t = 0.1)]
|
||||
#[arg(long, default_value_t = 0.2)]
|
||||
retry_jitter_factor: f32,
|
||||
|
||||
/// Disable retries
|
||||
@@ -227,24 +227,45 @@ struct CliArgs {
|
||||
|
||||
// Circuit breaker configuration
|
||||
/// Number of failures before circuit breaker opens
|
||||
#[arg(long, default_value_t = 5)]
|
||||
#[arg(long, default_value_t = 10)]
|
||||
cb_failure_threshold: u32,
|
||||
|
||||
/// Number of successes before circuit breaker closes
|
||||
#[arg(long, default_value_t = 2)]
|
||||
#[arg(long, default_value_t = 3)]
|
||||
cb_success_threshold: u32,
|
||||
|
||||
/// Timeout duration in seconds for circuit breaker
|
||||
#[arg(long, default_value_t = 30)]
|
||||
#[arg(long, default_value_t = 60)]
|
||||
cb_timeout_duration_secs: u64,
|
||||
|
||||
/// Window duration in seconds for circuit breaker
|
||||
#[arg(long, default_value_t = 60)]
|
||||
#[arg(long, default_value_t = 120)]
|
||||
cb_window_duration_secs: u64,
|
||||
|
||||
/// Disable circuit breaker
|
||||
#[arg(long, default_value_t = false)]
|
||||
disable_circuit_breaker: bool,
|
||||
|
||||
// Health check configuration
|
||||
/// Number of consecutive health check failures before marking worker unhealthy
|
||||
#[arg(long, default_value_t = 3)]
|
||||
health_failure_threshold: u32,
|
||||
|
||||
/// Number of consecutive health check successes before marking worker healthy
|
||||
#[arg(long, default_value_t = 2)]
|
||||
health_success_threshold: u32,
|
||||
|
||||
/// Timeout in seconds for health check requests
|
||||
#[arg(long, default_value_t = 5)]
|
||||
health_check_timeout_secs: u64,
|
||||
|
||||
/// Interval in seconds between runtime health checks
|
||||
#[arg(long, default_value_t = 60)]
|
||||
health_check_interval_secs: u64,
|
||||
|
||||
/// Health check endpoint path
|
||||
#[arg(long, default_value = "/health")]
|
||||
health_check_endpoint: String,
|
||||
}
|
||||
|
||||
impl CliArgs {
|
||||
@@ -378,6 +399,13 @@ impl CliArgs {
|
||||
},
|
||||
disable_retries: self.disable_retries,
|
||||
disable_circuit_breaker: self.disable_circuit_breaker,
|
||||
health_check: HealthCheckConfig {
|
||||
failure_threshold: self.health_failure_threshold,
|
||||
success_threshold: self.health_success_threshold,
|
||||
timeout_secs: self.health_check_timeout_secs,
|
||||
check_interval_secs: self.health_check_interval_secs,
|
||||
endpoint: self.health_check_endpoint.clone(),
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user