[router] regular router circuit breaker (#8997)
This commit is contained in:
@@ -43,6 +43,12 @@ pub struct RouterConfig {
|
||||
pub retry: RetryConfig,
|
||||
/// Circuit breaker configuration
|
||||
pub circuit_breaker: CircuitBreakerConfig,
|
||||
/// Disable retries (overrides retry.max_retries to 1 when true)
|
||||
#[serde(default)]
|
||||
pub disable_retries: bool,
|
||||
/// Disable circuit breaker (overrides circuit_breaker.failure_threshold to u32::MAX when true)
|
||||
#[serde(default)]
|
||||
pub disable_circuit_breaker: bool,
|
||||
}
|
||||
|
||||
/// Routing mode configuration
|
||||
@@ -197,6 +203,10 @@ pub struct RetryConfig {
|
||||
pub max_backoff_ms: u64,
|
||||
/// Backoff multiplier for exponential backoff
|
||||
pub backoff_multiplier: f32,
|
||||
/// Jitter factor applied to backoff (0.0 - 1.0)
|
||||
/// Effective delay D' = D * (1 + U[-j, +j])
|
||||
#[serde(default = "default_retry_jitter_factor")]
|
||||
pub jitter_factor: f32,
|
||||
}
|
||||
|
||||
impl Default for RetryConfig {
|
||||
@@ -206,10 +216,15 @@ impl Default for RetryConfig {
|
||||
initial_backoff_ms: 100,
|
||||
max_backoff_ms: 10000,
|
||||
backoff_multiplier: 2.0,
|
||||
jitter_factor: 0.1,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn default_retry_jitter_factor() -> f32 {
|
||||
0.1
|
||||
}
|
||||
|
||||
/// Circuit breaker configuration for worker reliability
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct CircuitBreakerConfig {
|
||||
@@ -276,6 +291,8 @@ impl Default for RouterConfig {
|
||||
cors_allowed_origins: vec![],
|
||||
retry: RetryConfig::default(),
|
||||
circuit_breaker: CircuitBreakerConfig::default(),
|
||||
disable_retries: false,
|
||||
disable_circuit_breaker: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -312,6 +329,24 @@ impl RouterConfig {
|
||||
pub fn has_metrics(&self) -> bool {
|
||||
self.metrics.is_some()
|
||||
}
|
||||
|
||||
/// Compute the effective retry config considering disable flag
|
||||
pub fn effective_retry_config(&self) -> RetryConfig {
|
||||
let mut cfg = self.retry.clone();
|
||||
if self.disable_retries {
|
||||
cfg.max_retries = 1;
|
||||
}
|
||||
cfg
|
||||
}
|
||||
|
||||
/// Compute the effective circuit breaker config considering disable flag
|
||||
pub fn effective_circuit_breaker_config(&self) -> CircuitBreakerConfig {
|
||||
let mut cfg = self.circuit_breaker.clone();
|
||||
if self.disable_circuit_breaker {
|
||||
cfg.failure_threshold = u32::MAX;
|
||||
}
|
||||
cfg
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -388,6 +423,8 @@ mod tests {
|
||||
cors_allowed_origins: vec![],
|
||||
retry: RetryConfig::default(),
|
||||
circuit_breaker: CircuitBreakerConfig::default(),
|
||||
disable_retries: false,
|
||||
disable_circuit_breaker: false,
|
||||
};
|
||||
|
||||
let json = serde_json::to_string(&config).unwrap();
|
||||
@@ -817,6 +854,8 @@ mod tests {
|
||||
cors_allowed_origins: vec![],
|
||||
retry: RetryConfig::default(),
|
||||
circuit_breaker: CircuitBreakerConfig::default(),
|
||||
disable_retries: false,
|
||||
disable_circuit_breaker: false,
|
||||
};
|
||||
|
||||
assert!(config.mode.is_pd_mode());
|
||||
@@ -870,6 +909,8 @@ mod tests {
|
||||
cors_allowed_origins: vec![],
|
||||
retry: RetryConfig::default(),
|
||||
circuit_breaker: CircuitBreakerConfig::default(),
|
||||
disable_retries: false,
|
||||
disable_circuit_breaker: false,
|
||||
};
|
||||
|
||||
assert!(!config.mode.is_pd_mode());
|
||||
@@ -919,6 +960,8 @@ mod tests {
|
||||
cors_allowed_origins: vec![],
|
||||
retry: RetryConfig::default(),
|
||||
circuit_breaker: CircuitBreakerConfig::default(),
|
||||
disable_retries: false,
|
||||
disable_circuit_breaker: false,
|
||||
};
|
||||
|
||||
assert!(config.has_service_discovery());
|
||||
|
||||
@@ -23,6 +23,12 @@ impl ConfigValidator {
|
||||
|
||||
Self::validate_compatibility(config)?;
|
||||
|
||||
// Validate effective retry/CB configs (respect disable flags)
|
||||
let retry_cfg = config.effective_retry_config();
|
||||
let cb_cfg = config.effective_circuit_breaker_config();
|
||||
Self::validate_retry(&retry_cfg)?;
|
||||
Self::validate_circuit_breaker(&cb_cfg)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -263,6 +269,79 @@ impl ConfigValidator {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Validate retry configuration
|
||||
fn validate_retry(retry: &RetryConfig) -> ConfigResult<()> {
|
||||
if retry.max_retries < 1 {
|
||||
return Err(ConfigError::InvalidValue {
|
||||
field: "retry.max_retries".to_string(),
|
||||
value: retry.max_retries.to_string(),
|
||||
reason: "Must be >= 1 (set to 1 to effectively disable retries)".to_string(),
|
||||
});
|
||||
}
|
||||
if retry.initial_backoff_ms == 0 {
|
||||
return Err(ConfigError::InvalidValue {
|
||||
field: "retry.initial_backoff_ms".to_string(),
|
||||
value: retry.initial_backoff_ms.to_string(),
|
||||
reason: "Must be > 0".to_string(),
|
||||
});
|
||||
}
|
||||
if retry.max_backoff_ms < retry.initial_backoff_ms {
|
||||
return Err(ConfigError::InvalidValue {
|
||||
field: "retry.max_backoff_ms".to_string(),
|
||||
value: retry.max_backoff_ms.to_string(),
|
||||
reason: "Must be >= initial_backoff_ms".to_string(),
|
||||
});
|
||||
}
|
||||
if retry.backoff_multiplier < 1.0 {
|
||||
return Err(ConfigError::InvalidValue {
|
||||
field: "retry.backoff_multiplier".to_string(),
|
||||
value: retry.backoff_multiplier.to_string(),
|
||||
reason: "Must be >= 1.0".to_string(),
|
||||
});
|
||||
}
|
||||
if !(0.0..=1.0).contains(&retry.jitter_factor) {
|
||||
return Err(ConfigError::InvalidValue {
|
||||
field: "retry.jitter_factor".to_string(),
|
||||
value: retry.jitter_factor.to_string(),
|
||||
reason: "Must be between 0.0 and 1.0".to_string(),
|
||||
});
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Validate circuit breaker configuration
|
||||
fn validate_circuit_breaker(cb: &CircuitBreakerConfig) -> ConfigResult<()> {
|
||||
if cb.failure_threshold < 1 {
|
||||
return Err(ConfigError::InvalidValue {
|
||||
field: "circuit_breaker.failure_threshold".to_string(),
|
||||
value: cb.failure_threshold.to_string(),
|
||||
reason: "Must be >= 1 (set to u32::MAX to effectively disable CB)".to_string(),
|
||||
});
|
||||
}
|
||||
if cb.success_threshold < 1 {
|
||||
return Err(ConfigError::InvalidValue {
|
||||
field: "circuit_breaker.success_threshold".to_string(),
|
||||
value: cb.success_threshold.to_string(),
|
||||
reason: "Must be >= 1".to_string(),
|
||||
});
|
||||
}
|
||||
if cb.timeout_duration_secs == 0 {
|
||||
return Err(ConfigError::InvalidValue {
|
||||
field: "circuit_breaker.timeout_duration_secs".to_string(),
|
||||
value: cb.timeout_duration_secs.to_string(),
|
||||
reason: "Must be > 0".to_string(),
|
||||
});
|
||||
}
|
||||
if cb.window_duration_secs == 0 {
|
||||
return Err(ConfigError::InvalidValue {
|
||||
field: "circuit_breaker.window_duration_secs".to_string(),
|
||||
value: cb.window_duration_secs.to_string(),
|
||||
reason: "Must be > 0".to_string(),
|
||||
});
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Validate compatibility between different configuration sections
|
||||
fn validate_compatibility(config: &RouterConfig) -> ConfigResult<()> {
|
||||
// All policies are now supported for both router types thanks to the unified trait design
|
||||
|
||||
Reference in New Issue
Block a user