[router] refactor router and worker management 4/n (#10756)

Co-authored-by: Chang Su <chang.s.su@oracle.com>
This commit is contained in:
Simo Lin
2025-09-22 21:35:10 -04:00
committed by GitHub
parent 113f8f65a2
commit 89971c4c3c
4 changed files with 161 additions and 196 deletions

View File

@@ -35,6 +35,7 @@ pub struct Router {
policy_registry: Arc<PolicyRegistry>,
client: Client,
dp_aware: bool,
enable_igw: bool,
retry_config: RetryConfig,
_worker_loads: Arc<tokio::sync::watch::Receiver<HashMap<String, isize>>>,
_load_monitor_handle: Option<Arc<tokio::task::JoinHandle<()>>>,
@@ -93,6 +94,7 @@ impl Router {
policy_registry: ctx.policy_registry.clone(),
client: ctx.client.clone(),
dp_aware: ctx.router_config.dp_aware,
enable_igw: ctx.router_config.enable_igw,
retry_config: ctx.router_config.effective_retry_config(),
_worker_loads: worker_loads,
_load_monitor_handle: load_monitor_handle,
@@ -162,9 +164,11 @@ impl Router {
model_id: Option<&str>,
text: Option<&str>,
) -> Option<Arc<dyn Worker>> {
let effective_model_id = if !self.enable_igw { None } else { model_id };
// Get workers for the specified model O(1), filtered by connection mode
let workers = self.worker_registry.get_workers_filtered(
model_id,
effective_model_id,
Some(WorkerType::Regular),
Some(ConnectionMode::Http),
false, // get all workers, we'll filter by is_available() next
@@ -1106,6 +1110,7 @@ mod tests {
retry_config: RetryConfig::default(),
_worker_loads: Arc::new(rx),
_load_monitor_handle: None,
enable_igw: false,
}
}