[router] add move grpc worker management from router to worker manager (#10960)
This commit is contained in:
@@ -181,29 +181,55 @@ impl WorkerManager {
|
||||
) -> Result<(), String> {
|
||||
info!("Starting worker initialization");
|
||||
|
||||
// Determine connection mode from config
|
||||
let connection_mode = &config.connection_mode;
|
||||
|
||||
match &config.mode {
|
||||
RoutingMode::Regular { worker_urls } => {
|
||||
Self::initialize_regular_workers(worker_urls, config, registry, policy_registry)
|
||||
RoutingMode::Regular { worker_urls } => match connection_mode {
|
||||
ConfigConnectionMode::Http => {
|
||||
Self::initialize_regular_workers(
|
||||
worker_urls,
|
||||
config,
|
||||
registry,
|
||||
policy_registry,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
ConfigConnectionMode::Grpc => {
|
||||
Self::initialize_grpc_workers(worker_urls, config, registry, policy_registry)
|
||||
.await?;
|
||||
}
|
||||
},
|
||||
RoutingMode::PrefillDecode {
|
||||
prefill_urls,
|
||||
decode_urls,
|
||||
..
|
||||
} => {
|
||||
let prefill_entries: Vec<(&String, &Option<u16>)> =
|
||||
prefill_urls.iter().map(|(url, port)| (url, port)).collect();
|
||||
} => match connection_mode {
|
||||
ConfigConnectionMode::Http => {
|
||||
let prefill_entries: Vec<(&String, &Option<u16>)> =
|
||||
prefill_urls.iter().map(|(url, port)| (url, port)).collect();
|
||||
|
||||
Self::initialize_prefill_workers(
|
||||
&prefill_entries,
|
||||
config,
|
||||
registry,
|
||||
policy_registry,
|
||||
)
|
||||
.await?;
|
||||
Self::initialize_decode_workers(decode_urls, config, registry, policy_registry)
|
||||
Self::initialize_prefill_workers(
|
||||
&prefill_entries,
|
||||
config,
|
||||
registry,
|
||||
policy_registry,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
Self::initialize_decode_workers(decode_urls, config, registry, policy_registry)
|
||||
.await?;
|
||||
}
|
||||
ConfigConnectionMode::Grpc => {
|
||||
Self::initialize_grpc_pd_workers(
|
||||
prefill_urls,
|
||||
decode_urls,
|
||||
config,
|
||||
registry,
|
||||
policy_registry,
|
||||
)
|
||||
.await?;
|
||||
}
|
||||
},
|
||||
RoutingMode::OpenAI { .. } => {
|
||||
info!("OpenAI routing mode - no workers to initialize");
|
||||
}
|
||||
@@ -397,6 +423,133 @@ impl WorkerManager {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Initialize gRPC workers for regular mode
|
||||
async fn initialize_grpc_workers(
|
||||
urls: &[String],
|
||||
config: &RouterConfig,
|
||||
registry: &Arc<WorkerRegistry>,
|
||||
policy_registry: Option<&Arc<PolicyRegistry>>,
|
||||
) -> Result<(), String> {
|
||||
info!("Creating {} gRPC regular workers", urls.len());
|
||||
|
||||
let circuit_breaker_config =
|
||||
Self::convert_circuit_breaker_config(&config.effective_circuit_breaker_config());
|
||||
let health_config = Self::convert_health_config(&config.health_check);
|
||||
let connection_mode = ConnectionMode::Grpc { port: None };
|
||||
|
||||
let mut registered_workers: HashMap<String, Vec<Arc<dyn Worker>>> = HashMap::new();
|
||||
|
||||
for url in urls {
|
||||
let worker = Self::create_basic_worker(
|
||||
url.clone(),
|
||||
WorkerType::Regular,
|
||||
connection_mode.clone(),
|
||||
config.api_key.clone(),
|
||||
None,
|
||||
circuit_breaker_config.clone(),
|
||||
health_config.clone(),
|
||||
);
|
||||
Self::register_worker(worker, registry, &mut registered_workers, policy_registry);
|
||||
info!(
|
||||
"Registered gRPC worker at {} (will connect on first use)",
|
||||
url
|
||||
);
|
||||
}
|
||||
|
||||
Self::initialize_cache_policies(®istered_workers, registry, policy_registry);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Initialize gRPC PD (Prefill-Decode) workers
|
||||
async fn initialize_grpc_pd_workers(
|
||||
prefill_urls: &[(String, Option<u16>)],
|
||||
decode_urls: &[String],
|
||||
config: &RouterConfig,
|
||||
registry: &Arc<WorkerRegistry>,
|
||||
policy_registry: Option<&Arc<PolicyRegistry>>,
|
||||
) -> Result<(), String> {
|
||||
info!(
|
||||
"Creating {} gRPC prefill workers and {} gRPC decode workers",
|
||||
prefill_urls.len(),
|
||||
decode_urls.len()
|
||||
);
|
||||
|
||||
let circuit_breaker_config =
|
||||
Self::convert_circuit_breaker_config(&config.effective_circuit_breaker_config());
|
||||
let health_config = Self::convert_health_config(&config.health_check);
|
||||
|
||||
let mut registered_prefill_workers: HashMap<String, Vec<Arc<dyn Worker>>> = HashMap::new();
|
||||
let mut registered_decode_workers: HashMap<String, Vec<Arc<dyn Worker>>> = HashMap::new();
|
||||
|
||||
for (url, bootstrap_port) in prefill_urls {
|
||||
let worker_type = WorkerType::Prefill {
|
||||
bootstrap_port: *bootstrap_port,
|
||||
};
|
||||
let connection_mode = ConnectionMode::Grpc {
|
||||
port: *bootstrap_port,
|
||||
};
|
||||
|
||||
let worker = Self::create_basic_worker(
|
||||
url.clone(),
|
||||
worker_type,
|
||||
connection_mode,
|
||||
config.api_key.clone(),
|
||||
None,
|
||||
circuit_breaker_config.clone(),
|
||||
health_config.clone(),
|
||||
);
|
||||
Self::register_worker(
|
||||
worker,
|
||||
registry,
|
||||
&mut registered_prefill_workers,
|
||||
policy_registry,
|
||||
);
|
||||
info!(
|
||||
"Registered gRPC prefill worker at {} (will connect on first use)",
|
||||
url
|
||||
);
|
||||
}
|
||||
|
||||
// Create decode workers
|
||||
for url in decode_urls {
|
||||
let connection_mode = ConnectionMode::Grpc { port: None };
|
||||
|
||||
let worker = Self::create_basic_worker(
|
||||
url.clone(),
|
||||
WorkerType::Decode,
|
||||
connection_mode,
|
||||
config.api_key.clone(),
|
||||
None,
|
||||
circuit_breaker_config.clone(),
|
||||
health_config.clone(),
|
||||
);
|
||||
Self::register_worker(
|
||||
worker,
|
||||
registry,
|
||||
&mut registered_decode_workers,
|
||||
policy_registry,
|
||||
);
|
||||
info!(
|
||||
"Registered gRPC decode worker at {} (will connect on first use)",
|
||||
url
|
||||
);
|
||||
}
|
||||
|
||||
if let Some(policy_reg) = policy_registry {
|
||||
let all_prefill_workers: Vec<Arc<dyn Worker>> = registered_prefill_workers
|
||||
.values()
|
||||
.flat_map(|workers| workers.iter().cloned())
|
||||
.collect();
|
||||
let all_decode_workers: Vec<Arc<dyn Worker>> = registered_decode_workers
|
||||
.values()
|
||||
.flat_map(|workers| workers.iter().cloned())
|
||||
.collect();
|
||||
policy_reg.init_pd_cache_aware_policies(&all_prefill_workers, &all_decode_workers);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Add a worker from a configuration request
|
||||
pub async fn add_worker_from_config(
|
||||
config: &WorkerConfigRequest,
|
||||
|
||||
Reference in New Issue
Block a user