[router] add move grpc worker management from router to worker manager (#10960)

This commit is contained in:
Simo Lin
2025-09-26 06:57:57 -04:00
committed by GitHub
parent 1b011e68dc
commit 5d4fe1ceee
4 changed files with 232 additions and 288 deletions

View File

@@ -181,29 +181,55 @@ impl WorkerManager {
) -> Result<(), String> {
info!("Starting worker initialization");
// Determine connection mode from config
let connection_mode = &config.connection_mode;
match &config.mode {
RoutingMode::Regular { worker_urls } => {
Self::initialize_regular_workers(worker_urls, config, registry, policy_registry)
RoutingMode::Regular { worker_urls } => match connection_mode {
ConfigConnectionMode::Http => {
Self::initialize_regular_workers(
worker_urls,
config,
registry,
policy_registry,
)
.await?;
}
}
ConfigConnectionMode::Grpc => {
Self::initialize_grpc_workers(worker_urls, config, registry, policy_registry)
.await?;
}
},
RoutingMode::PrefillDecode {
prefill_urls,
decode_urls,
..
} => {
let prefill_entries: Vec<(&String, &Option<u16>)> =
prefill_urls.iter().map(|(url, port)| (url, port)).collect();
} => match connection_mode {
ConfigConnectionMode::Http => {
let prefill_entries: Vec<(&String, &Option<u16>)> =
prefill_urls.iter().map(|(url, port)| (url, port)).collect();
Self::initialize_prefill_workers(
&prefill_entries,
config,
registry,
policy_registry,
)
.await?;
Self::initialize_decode_workers(decode_urls, config, registry, policy_registry)
Self::initialize_prefill_workers(
&prefill_entries,
config,
registry,
policy_registry,
)
.await?;
}
Self::initialize_decode_workers(decode_urls, config, registry, policy_registry)
.await?;
}
ConfigConnectionMode::Grpc => {
Self::initialize_grpc_pd_workers(
prefill_urls,
decode_urls,
config,
registry,
policy_registry,
)
.await?;
}
},
RoutingMode::OpenAI { .. } => {
info!("OpenAI routing mode - no workers to initialize");
}
@@ -397,6 +423,133 @@ impl WorkerManager {
Ok(())
}
/// Initialize gRPC workers for regular mode
async fn initialize_grpc_workers(
urls: &[String],
config: &RouterConfig,
registry: &Arc<WorkerRegistry>,
policy_registry: Option<&Arc<PolicyRegistry>>,
) -> Result<(), String> {
info!("Creating {} gRPC regular workers", urls.len());
let circuit_breaker_config =
Self::convert_circuit_breaker_config(&config.effective_circuit_breaker_config());
let health_config = Self::convert_health_config(&config.health_check);
let connection_mode = ConnectionMode::Grpc { port: None };
let mut registered_workers: HashMap<String, Vec<Arc<dyn Worker>>> = HashMap::new();
for url in urls {
let worker = Self::create_basic_worker(
url.clone(),
WorkerType::Regular,
connection_mode.clone(),
config.api_key.clone(),
None,
circuit_breaker_config.clone(),
health_config.clone(),
);
Self::register_worker(worker, registry, &mut registered_workers, policy_registry);
info!(
"Registered gRPC worker at {} (will connect on first use)",
url
);
}
Self::initialize_cache_policies(&registered_workers, registry, policy_registry);
Ok(())
}
/// Initialize gRPC PD (Prefill-Decode) workers
async fn initialize_grpc_pd_workers(
prefill_urls: &[(String, Option<u16>)],
decode_urls: &[String],
config: &RouterConfig,
registry: &Arc<WorkerRegistry>,
policy_registry: Option<&Arc<PolicyRegistry>>,
) -> Result<(), String> {
info!(
"Creating {} gRPC prefill workers and {} gRPC decode workers",
prefill_urls.len(),
decode_urls.len()
);
let circuit_breaker_config =
Self::convert_circuit_breaker_config(&config.effective_circuit_breaker_config());
let health_config = Self::convert_health_config(&config.health_check);
let mut registered_prefill_workers: HashMap<String, Vec<Arc<dyn Worker>>> = HashMap::new();
let mut registered_decode_workers: HashMap<String, Vec<Arc<dyn Worker>>> = HashMap::new();
for (url, bootstrap_port) in prefill_urls {
let worker_type = WorkerType::Prefill {
bootstrap_port: *bootstrap_port,
};
let connection_mode = ConnectionMode::Grpc {
port: *bootstrap_port,
};
let worker = Self::create_basic_worker(
url.clone(),
worker_type,
connection_mode,
config.api_key.clone(),
None,
circuit_breaker_config.clone(),
health_config.clone(),
);
Self::register_worker(
worker,
registry,
&mut registered_prefill_workers,
policy_registry,
);
info!(
"Registered gRPC prefill worker at {} (will connect on first use)",
url
);
}
// Create decode workers
for url in decode_urls {
let connection_mode = ConnectionMode::Grpc { port: None };
let worker = Self::create_basic_worker(
url.clone(),
WorkerType::Decode,
connection_mode,
config.api_key.clone(),
None,
circuit_breaker_config.clone(),
health_config.clone(),
);
Self::register_worker(
worker,
registry,
&mut registered_decode_workers,
policy_registry,
);
info!(
"Registered gRPC decode worker at {} (will connect on first use)",
url
);
}
if let Some(policy_reg) = policy_registry {
let all_prefill_workers: Vec<Arc<dyn Worker>> = registered_prefill_workers
.values()
.flat_map(|workers| workers.iter().cloned())
.collect();
let all_decode_workers: Vec<Arc<dyn Worker>> = registered_decode_workers
.values()
.flat_map(|workers| workers.iter().cloned())
.collect();
policy_reg.init_pd_cache_aware_policies(&all_prefill_workers, &all_decode_workers);
}
Ok(())
}
/// Add a worker from a configuration request
pub async fn add_worker_from_config(
config: &WorkerConfigRequest,