[router] router circuit breaker core (#8941)
This commit is contained in:
@@ -51,6 +51,7 @@ impl RouterFactory {
|
||||
ctx.router_config.dp_aware,
|
||||
ctx.router_config.api_key.clone(),
|
||||
ctx.router_config.retry.clone(),
|
||||
ctx.router_config.circuit_breaker.clone(),
|
||||
)?;
|
||||
|
||||
Ok(Box::new(router))
|
||||
@@ -81,6 +82,7 @@ impl RouterFactory {
|
||||
ctx.router_config.worker_startup_timeout_secs,
|
||||
ctx.router_config.worker_startup_check_interval_secs,
|
||||
ctx.router_config.retry.clone(),
|
||||
ctx.router_config.circuit_breaker.clone(),
|
||||
)?;
|
||||
|
||||
Ok(Box::new(router))
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
// PD (Prefill-Decode) Router Implementation
|
||||
// This module handles routing for disaggregated prefill-decode systems
|
||||
use super::pd_types::{api_path, PDRouterError};
|
||||
use crate::config::types::RetryConfig;
|
||||
use crate::core::{HealthChecker, Worker, WorkerFactory, WorkerLoadGuard};
|
||||
use crate::config::types::{CircuitBreakerConfig as ConfigCircuitBreakerConfig, RetryConfig};
|
||||
use crate::core::{CircuitBreakerConfig, HealthChecker, Worker, WorkerFactory, WorkerLoadGuard};
|
||||
use crate::metrics::RouterMetrics;
|
||||
use crate::openai_api_types::{ChatCompletionRequest, CompletionRequest, GenerateRequest};
|
||||
use crate::policies::LoadBalancingPolicy;
|
||||
@@ -41,6 +41,7 @@ pub struct PDRouter {
|
||||
// Dedicated client for prefill fire-and-forget (non-logprob) requests
|
||||
pub prefill_client: Client,
|
||||
pub retry_config: RetryConfig,
|
||||
pub circuit_breaker_config: CircuitBreakerConfig,
|
||||
_prefill_health_checker: Option<HealthChecker>,
|
||||
_decode_health_checker: Option<HealthChecker>,
|
||||
}
|
||||
@@ -68,8 +69,12 @@ impl PDRouter {
|
||||
// Wait for the new server to be healthy
|
||||
self.wait_for_server_health(&url).await?;
|
||||
|
||||
// Create Worker for the new prefill server
|
||||
let worker = WorkerFactory::create_prefill(url.clone(), bootstrap_port);
|
||||
// Create Worker for the new prefill server with circuit breaker configuration
|
||||
let worker = WorkerFactory::create_prefill_with_config(
|
||||
url.clone(),
|
||||
bootstrap_port,
|
||||
self.circuit_breaker_config.clone(),
|
||||
);
|
||||
|
||||
// Add to prefill workers list
|
||||
let mut workers = self
|
||||
@@ -99,8 +104,11 @@ impl PDRouter {
|
||||
// Wait for the new server to be healthy
|
||||
self.wait_for_server_health(&url).await?;
|
||||
|
||||
// Create Worker for the new decode server
|
||||
let worker = WorkerFactory::create_decode(url.clone());
|
||||
// Create Worker for the new decode server with circuit breaker configuration
|
||||
let worker = WorkerFactory::create_decode_with_config(
|
||||
url.clone(),
|
||||
self.circuit_breaker_config.clone(),
|
||||
);
|
||||
|
||||
// Add to decode workers list
|
||||
let mut workers = self
|
||||
@@ -189,16 +197,31 @@ impl PDRouter {
|
||||
timeout_secs: u64,
|
||||
interval_secs: u64,
|
||||
retry_config: RetryConfig,
|
||||
circuit_breaker_config: ConfigCircuitBreakerConfig,
|
||||
) -> Result<Self, String> {
|
||||
// Convert config CircuitBreakerConfig to core CircuitBreakerConfig
|
||||
let core_cb_config = CircuitBreakerConfig {
|
||||
failure_threshold: circuit_breaker_config.failure_threshold,
|
||||
success_threshold: circuit_breaker_config.success_threshold,
|
||||
timeout_duration: std::time::Duration::from_secs(
|
||||
circuit_breaker_config.timeout_duration_secs,
|
||||
),
|
||||
window_duration: std::time::Duration::from_secs(
|
||||
circuit_breaker_config.window_duration_secs,
|
||||
),
|
||||
};
|
||||
|
||||
// Convert URLs to Worker trait objects
|
||||
let prefill_workers: Vec<Box<dyn Worker>> = prefill_urls
|
||||
.into_iter()
|
||||
.map(|(url, port)| WorkerFactory::create_prefill(url, port))
|
||||
.map(|(url, port)| {
|
||||
WorkerFactory::create_prefill_with_config(url, port, core_cb_config.clone())
|
||||
})
|
||||
.collect();
|
||||
|
||||
let decode_workers: Vec<Box<dyn Worker>> = decode_urls
|
||||
.into_iter()
|
||||
.map(WorkerFactory::create_decode)
|
||||
.map(|url| WorkerFactory::create_decode_with_config(url, core_cb_config.clone()))
|
||||
.collect();
|
||||
|
||||
// Wait for PD workers to be healthy (skip if empty - for service discovery mode)
|
||||
@@ -280,6 +303,7 @@ impl PDRouter {
|
||||
client,
|
||||
prefill_client,
|
||||
retry_config,
|
||||
circuit_breaker_config: core_cb_config,
|
||||
_prefill_health_checker: Some(prefill_health_checker),
|
||||
_decode_health_checker: Some(decode_health_checker),
|
||||
})
|
||||
@@ -1848,6 +1872,7 @@ mod tests {
|
||||
client: Client::new(),
|
||||
prefill_client: Client::new(),
|
||||
retry_config: RetryConfig::default(),
|
||||
circuit_breaker_config: CircuitBreakerConfig::default(),
|
||||
_prefill_health_checker: None,
|
||||
_decode_health_checker: None,
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use crate::config::types::RetryConfig;
|
||||
use crate::core::{HealthChecker, Worker, WorkerFactory};
|
||||
use crate::config::types::{CircuitBreakerConfig as ConfigCircuitBreakerConfig, RetryConfig};
|
||||
use crate::core::{CircuitBreakerConfig, HealthChecker, Worker, WorkerFactory};
|
||||
use crate::metrics::RouterMetrics;
|
||||
use crate::openai_api_types::{ChatCompletionRequest, CompletionRequest, GenerateRequest};
|
||||
use crate::policies::LoadBalancingPolicy;
|
||||
@@ -42,6 +42,7 @@ pub struct Router {
|
||||
dp_aware: bool,
|
||||
api_key: Option<String>,
|
||||
retry_config: RetryConfig,
|
||||
circuit_breaker_config: CircuitBreakerConfig,
|
||||
_worker_loads: Arc<tokio::sync::watch::Receiver<HashMap<String, isize>>>,
|
||||
_load_monitor_handle: Option<Arc<tokio::task::JoinHandle<()>>>,
|
||||
_health_checker: Option<HealthChecker>,
|
||||
@@ -58,6 +59,7 @@ impl Router {
|
||||
dp_aware: bool,
|
||||
api_key: Option<String>,
|
||||
retry_config: RetryConfig,
|
||||
circuit_breaker_config: ConfigCircuitBreakerConfig,
|
||||
) -> Result<Self, String> {
|
||||
// Update active workers gauge
|
||||
RouterMetrics::set_active_workers(worker_urls.len());
|
||||
@@ -75,10 +77,24 @@ impl Router {
|
||||
worker_urls
|
||||
};
|
||||
|
||||
// Convert config CircuitBreakerConfig to core CircuitBreakerConfig
|
||||
let core_cb_config = CircuitBreakerConfig {
|
||||
failure_threshold: circuit_breaker_config.failure_threshold,
|
||||
success_threshold: circuit_breaker_config.success_threshold,
|
||||
timeout_duration: std::time::Duration::from_secs(
|
||||
circuit_breaker_config.timeout_duration_secs,
|
||||
),
|
||||
window_duration: std::time::Duration::from_secs(
|
||||
circuit_breaker_config.window_duration_secs,
|
||||
),
|
||||
};
|
||||
|
||||
// Create Worker trait objects from URLs
|
||||
let workers: Vec<Box<dyn Worker>> = worker_urls
|
||||
.iter()
|
||||
.map(|url| WorkerFactory::create_regular(url.clone()))
|
||||
.map(|url| {
|
||||
WorkerFactory::create_regular_with_config(url.clone(), core_cb_config.clone())
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Initialize policy with workers if needed (e.g., for cache-aware)
|
||||
@@ -125,6 +141,7 @@ impl Router {
|
||||
dp_aware,
|
||||
api_key,
|
||||
retry_config,
|
||||
circuit_breaker_config: core_cb_config,
|
||||
_worker_loads: worker_loads,
|
||||
_load_monitor_handle: load_monitor_handle,
|
||||
_health_checker: Some(health_checker),
|
||||
@@ -752,7 +769,10 @@ impl Router {
|
||||
continue;
|
||||
}
|
||||
info!("Added worker: {}", dp_url);
|
||||
let new_worker = WorkerFactory::create_regular(dp_url.to_string());
|
||||
let new_worker = WorkerFactory::create_regular_with_config(
|
||||
dp_url.to_string(),
|
||||
self.circuit_breaker_config.clone(),
|
||||
);
|
||||
workers_guard.push(new_worker);
|
||||
worker_added = true;
|
||||
}
|
||||
@@ -764,7 +784,10 @@ impl Router {
|
||||
return Err(format!("Worker {} already exists", worker_url));
|
||||
}
|
||||
info!("Added worker: {}", worker_url);
|
||||
let new_worker = WorkerFactory::create_regular(worker_url.to_string());
|
||||
let new_worker = WorkerFactory::create_regular_with_config(
|
||||
worker_url.to_string(),
|
||||
self.circuit_breaker_config.clone(),
|
||||
);
|
||||
workers_guard.push(new_worker);
|
||||
}
|
||||
|
||||
@@ -1223,6 +1246,7 @@ mod tests {
|
||||
api_key: None,
|
||||
client: Client::new(),
|
||||
retry_config: RetryConfig::default(),
|
||||
circuit_breaker_config: CircuitBreakerConfig::default(),
|
||||
_worker_loads: Arc::new(rx),
|
||||
_load_monitor_handle: None,
|
||||
_health_checker: None,
|
||||
|
||||
Reference in New Issue
Block a user