[router] regular router circuit breaker (#8997)

This commit is contained in:
Simo Lin
2025-08-10 21:19:30 -07:00
committed by GitHub
parent 6beeff41c5
commit 067068f271
22 changed files with 967 additions and 112 deletions

View File

@@ -43,6 +43,12 @@ pub struct RouterConfig {
pub retry: RetryConfig,
/// Circuit breaker configuration
pub circuit_breaker: CircuitBreakerConfig,
/// Disable retries (overrides retry.max_retries to 1 when true)
#[serde(default)]
pub disable_retries: bool,
/// Disable circuit breaker (overrides circuit_breaker.failure_threshold to u32::MAX when true)
#[serde(default)]
pub disable_circuit_breaker: bool,
}
/// Routing mode configuration
@@ -197,6 +203,10 @@ pub struct RetryConfig {
pub max_backoff_ms: u64,
/// Backoff multiplier for exponential backoff
pub backoff_multiplier: f32,
/// Jitter factor applied to backoff (0.0 - 1.0)
/// Effective delay D' = D * (1 + U[-j, +j])
#[serde(default = "default_retry_jitter_factor")]
pub jitter_factor: f32,
}
impl Default for RetryConfig {
@@ -206,10 +216,15 @@ impl Default for RetryConfig {
initial_backoff_ms: 100,
max_backoff_ms: 10000,
backoff_multiplier: 2.0,
jitter_factor: 0.1,
}
}
}
fn default_retry_jitter_factor() -> f32 {
0.1
}
/// Circuit breaker configuration for worker reliability
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CircuitBreakerConfig {
@@ -276,6 +291,8 @@ impl Default for RouterConfig {
cors_allowed_origins: vec![],
retry: RetryConfig::default(),
circuit_breaker: CircuitBreakerConfig::default(),
disable_retries: false,
disable_circuit_breaker: false,
}
}
}
@@ -312,6 +329,24 @@ impl RouterConfig {
pub fn has_metrics(&self) -> bool {
self.metrics.is_some()
}
/// Compute the effective retry config considering disable flag
pub fn effective_retry_config(&self) -> RetryConfig {
let mut cfg = self.retry.clone();
if self.disable_retries {
cfg.max_retries = 1;
}
cfg
}
/// Compute the effective circuit breaker config considering disable flag
pub fn effective_circuit_breaker_config(&self) -> CircuitBreakerConfig {
let mut cfg = self.circuit_breaker.clone();
if self.disable_circuit_breaker {
cfg.failure_threshold = u32::MAX;
}
cfg
}
}
#[cfg(test)]
@@ -388,6 +423,8 @@ mod tests {
cors_allowed_origins: vec![],
retry: RetryConfig::default(),
circuit_breaker: CircuitBreakerConfig::default(),
disable_retries: false,
disable_circuit_breaker: false,
};
let json = serde_json::to_string(&config).unwrap();
@@ -817,6 +854,8 @@ mod tests {
cors_allowed_origins: vec![],
retry: RetryConfig::default(),
circuit_breaker: CircuitBreakerConfig::default(),
disable_retries: false,
disable_circuit_breaker: false,
};
assert!(config.mode.is_pd_mode());
@@ -870,6 +909,8 @@ mod tests {
cors_allowed_origins: vec![],
retry: RetryConfig::default(),
circuit_breaker: CircuitBreakerConfig::default(),
disable_retries: false,
disable_circuit_breaker: false,
};
assert!(!config.mode.is_pd_mode());
@@ -919,6 +960,8 @@ mod tests {
cors_allowed_origins: vec![],
retry: RetryConfig::default(),
circuit_breaker: CircuitBreakerConfig::default(),
disable_retries: false,
disable_circuit_breaker: false,
};
assert!(config.has_service_discovery());

View File

@@ -23,6 +23,12 @@ impl ConfigValidator {
Self::validate_compatibility(config)?;
// Validate effective retry/CB configs (respect disable flags)
let retry_cfg = config.effective_retry_config();
let cb_cfg = config.effective_circuit_breaker_config();
Self::validate_retry(&retry_cfg)?;
Self::validate_circuit_breaker(&cb_cfg)?;
Ok(())
}
@@ -263,6 +269,79 @@ impl ConfigValidator {
Ok(())
}
/// Validate retry configuration
fn validate_retry(retry: &RetryConfig) -> ConfigResult<()> {
if retry.max_retries < 1 {
return Err(ConfigError::InvalidValue {
field: "retry.max_retries".to_string(),
value: retry.max_retries.to_string(),
reason: "Must be >= 1 (set to 1 to effectively disable retries)".to_string(),
});
}
if retry.initial_backoff_ms == 0 {
return Err(ConfigError::InvalidValue {
field: "retry.initial_backoff_ms".to_string(),
value: retry.initial_backoff_ms.to_string(),
reason: "Must be > 0".to_string(),
});
}
if retry.max_backoff_ms < retry.initial_backoff_ms {
return Err(ConfigError::InvalidValue {
field: "retry.max_backoff_ms".to_string(),
value: retry.max_backoff_ms.to_string(),
reason: "Must be >= initial_backoff_ms".to_string(),
});
}
if retry.backoff_multiplier < 1.0 {
return Err(ConfigError::InvalidValue {
field: "retry.backoff_multiplier".to_string(),
value: retry.backoff_multiplier.to_string(),
reason: "Must be >= 1.0".to_string(),
});
}
if !(0.0..=1.0).contains(&retry.jitter_factor) {
return Err(ConfigError::InvalidValue {
field: "retry.jitter_factor".to_string(),
value: retry.jitter_factor.to_string(),
reason: "Must be between 0.0 and 1.0".to_string(),
});
}
Ok(())
}
/// Validate circuit breaker configuration
fn validate_circuit_breaker(cb: &CircuitBreakerConfig) -> ConfigResult<()> {
if cb.failure_threshold < 1 {
return Err(ConfigError::InvalidValue {
field: "circuit_breaker.failure_threshold".to_string(),
value: cb.failure_threshold.to_string(),
reason: "Must be >= 1 (set to u32::MAX to effectively disable CB)".to_string(),
});
}
if cb.success_threshold < 1 {
return Err(ConfigError::InvalidValue {
field: "circuit_breaker.success_threshold".to_string(),
value: cb.success_threshold.to_string(),
reason: "Must be >= 1".to_string(),
});
}
if cb.timeout_duration_secs == 0 {
return Err(ConfigError::InvalidValue {
field: "circuit_breaker.timeout_duration_secs".to_string(),
value: cb.timeout_duration_secs.to_string(),
reason: "Must be > 0".to_string(),
});
}
if cb.window_duration_secs == 0 {
return Err(ConfigError::InvalidValue {
field: "circuit_breaker.window_duration_secs".to_string(),
value: cb.window_duration_secs.to_string(),
reason: "Must be > 0".to_string(),
});
}
Ok(())
}
/// Validate compatibility between different configuration sections
fn validate_compatibility(config: &RouterConfig) -> ConfigResult<()> {
// All policies are now supported for both router types thanks to the unified trait design