[router] refactor router and worker management 4/n (#10756)

Co-authored-by: Chang Su <chang.s.su@oracle.com>
This commit is contained in:
Simo Lin
2025-09-22 21:35:10 -04:00
committed by GitHub
parent 113f8f65a2
commit 89971c4c3c
4 changed files with 161 additions and 196 deletions

View File

@@ -41,6 +41,7 @@ pub struct PDRouter {
pub prefill_client: Client,
pub retry_config: RetryConfig,
pub api_key: Option<String>,
pub enable_igw: bool,
prefill_drain_tx: mpsc::Sender<reqwest::Response>,
}
@@ -317,6 +318,7 @@ impl PDRouter {
prefill_drain_tx,
retry_config: ctx.router_config.effective_retry_config(),
api_key: ctx.router_config.api_key.clone(),
enable_igw: ctx.router_config.enable_igw,
})
}
@@ -849,7 +851,14 @@ impl PDRouter {
request_text: Option<&str>,
model_id: Option<&str>,
) -> Result<(Arc<dyn Worker>, Arc<dyn Worker>), String> {
let prefill_workers = if let Some(model) = model_id {
let effective_model_id = if !self.enable_igw { None } else { model_id };
debug!(
"Selecting PD pair: enable_igw={}, model_id={:?}, effective_model_id={:?}",
self.enable_igw, model_id, effective_model_id
);
let prefill_workers = if let Some(model) = effective_model_id {
self.worker_registry
.get_by_model_fast(model)
.into_iter()
@@ -859,7 +868,7 @@ impl PDRouter {
self.worker_registry.get_prefill_workers()
};
let decode_workers = if let Some(model) = model_id {
let decode_workers = if let Some(model) = effective_model_id {
self.worker_registry
.get_by_model_fast(model)
.into_iter()
@@ -1797,6 +1806,7 @@ mod tests {
prefill_drain_tx: mpsc::channel(100).0,
retry_config: RetryConfig::default(),
api_key: Some("test_api_key".to_string()),
enable_igw: false,
}
}

View File

@@ -35,6 +35,7 @@ pub struct Router {
policy_registry: Arc<PolicyRegistry>,
client: Client,
dp_aware: bool,
enable_igw: bool,
retry_config: RetryConfig,
_worker_loads: Arc<tokio::sync::watch::Receiver<HashMap<String, isize>>>,
_load_monitor_handle: Option<Arc<tokio::task::JoinHandle<()>>>,
@@ -93,6 +94,7 @@ impl Router {
policy_registry: ctx.policy_registry.clone(),
client: ctx.client.clone(),
dp_aware: ctx.router_config.dp_aware,
enable_igw: ctx.router_config.enable_igw,
retry_config: ctx.router_config.effective_retry_config(),
_worker_loads: worker_loads,
_load_monitor_handle: load_monitor_handle,
@@ -162,9 +164,11 @@ impl Router {
model_id: Option<&str>,
text: Option<&str>,
) -> Option<Arc<dyn Worker>> {
let effective_model_id = if !self.enable_igw { None } else { model_id };
// Get workers for the specified model O(1), filtered by connection mode
let workers = self.worker_registry.get_workers_filtered(
model_id,
effective_model_id,
Some(WorkerType::Regular),
Some(ConnectionMode::Http),
false, // get all workers, we'll filter by is_available() next
@@ -1106,6 +1110,7 @@ mod tests {
retry_config: RetryConfig::default(),
_worker_loads: Arc::new(rx),
_load_monitor_handle: None,
enable_igw: false,
}
}