2025-07-18 14:24:24 -07:00
|
|
|
//! Factory for creating router instances
|
|
|
|
|
|
2025-08-28 12:07:06 -07:00
|
|
|
use super::{
|
|
|
|
|
http::{pd_router::PDRouter, router::Router},
|
|
|
|
|
RouterTrait,
|
|
|
|
|
};
|
2025-09-01 20:06:15 -07:00
|
|
|
use crate::config::{ConnectionMode, PolicyConfig, RoutingMode};
|
2025-07-18 14:24:24 -07:00
|
|
|
use crate::policies::PolicyFactory;
|
2025-08-02 19:16:47 -07:00
|
|
|
use crate::server::AppContext;
|
|
|
|
|
use std::sync::Arc;
|
2025-07-18 14:24:24 -07:00
|
|
|
|
|
|
|
|
/// Factory for creating router instances based on configuration
|
|
|
|
|
pub struct RouterFactory;
|
|
|
|
|
|
|
|
|
|
impl RouterFactory {
|
2025-08-02 19:16:47 -07:00
|
|
|
/// Create a router instance from application context
|
2025-08-11 21:37:36 -07:00
|
|
|
pub async fn create_router(ctx: &Arc<AppContext>) -> Result<Box<dyn RouterTrait>, String> {
|
2025-08-20 17:38:57 -07:00
|
|
|
// Check if IGW mode is enabled
|
|
|
|
|
if ctx.router_config.enable_igw {
|
|
|
|
|
return Self::create_igw_router(ctx).await;
|
|
|
|
|
}
|
|
|
|
|
|
2025-09-01 20:06:15 -07:00
|
|
|
// Check connection mode and route to appropriate implementation
|
|
|
|
|
match ctx.router_config.connection_mode {
|
|
|
|
|
ConnectionMode::Grpc => {
|
|
|
|
|
// Route to gRPC implementation based on routing mode
|
|
|
|
|
match &ctx.router_config.mode {
|
|
|
|
|
RoutingMode::Regular { worker_urls } => {
|
|
|
|
|
Self::create_grpc_router(worker_urls, &ctx.router_config.policy, ctx).await
|
|
|
|
|
}
|
|
|
|
|
RoutingMode::PrefillDecode {
|
|
|
|
|
prefill_urls,
|
|
|
|
|
decode_urls,
|
|
|
|
|
prefill_policy,
|
|
|
|
|
decode_policy,
|
|
|
|
|
} => {
|
|
|
|
|
Self::create_grpc_pd_router(
|
|
|
|
|
prefill_urls,
|
|
|
|
|
decode_urls,
|
|
|
|
|
prefill_policy.as_ref(),
|
|
|
|
|
decode_policy.as_ref(),
|
|
|
|
|
&ctx.router_config.policy,
|
|
|
|
|
ctx,
|
|
|
|
|
)
|
|
|
|
|
.await
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-07-18 14:24:24 -07:00
|
|
|
}
|
2025-09-01 20:06:15 -07:00
|
|
|
ConnectionMode::Http => {
|
|
|
|
|
// Route to HTTP implementation based on routing mode
|
|
|
|
|
match &ctx.router_config.mode {
|
|
|
|
|
RoutingMode::Regular { worker_urls } => {
|
|
|
|
|
Self::create_regular_router(worker_urls, &ctx.router_config.policy, ctx)
|
|
|
|
|
.await
|
|
|
|
|
}
|
|
|
|
|
RoutingMode::PrefillDecode {
|
|
|
|
|
prefill_urls,
|
|
|
|
|
decode_urls,
|
|
|
|
|
prefill_policy,
|
|
|
|
|
decode_policy,
|
|
|
|
|
} => {
|
|
|
|
|
Self::create_pd_router(
|
|
|
|
|
prefill_urls,
|
|
|
|
|
decode_urls,
|
|
|
|
|
prefill_policy.as_ref(),
|
|
|
|
|
decode_policy.as_ref(),
|
|
|
|
|
&ctx.router_config.policy,
|
|
|
|
|
ctx,
|
|
|
|
|
)
|
|
|
|
|
.await
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-08-11 21:37:36 -07:00
|
|
|
}
|
2025-07-18 14:24:24 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Create a regular router with injected policy
|
2025-08-11 21:37:36 -07:00
|
|
|
async fn create_regular_router(
|
2025-07-18 14:24:24 -07:00
|
|
|
worker_urls: &[String],
|
|
|
|
|
policy_config: &PolicyConfig,
|
2025-08-02 19:16:47 -07:00
|
|
|
ctx: &Arc<AppContext>,
|
2025-07-18 14:24:24 -07:00
|
|
|
) -> Result<Box<dyn RouterTrait>, String> {
|
|
|
|
|
// Create policy
|
|
|
|
|
let policy = PolicyFactory::create_from_config(policy_config);
|
|
|
|
|
|
2025-08-02 19:16:47 -07:00
|
|
|
// Create regular router with injected policy and client
|
2025-07-18 14:24:24 -07:00
|
|
|
let router = Router::new(
|
|
|
|
|
worker_urls.to_vec(),
|
|
|
|
|
policy,
|
2025-08-02 19:16:47 -07:00
|
|
|
ctx.client.clone(),
|
|
|
|
|
ctx.router_config.worker_startup_timeout_secs,
|
|
|
|
|
ctx.router_config.worker_startup_check_interval_secs,
|
|
|
|
|
ctx.router_config.dp_aware,
|
|
|
|
|
ctx.router_config.api_key.clone(),
|
2025-08-04 20:42:07 -07:00
|
|
|
ctx.router_config.retry.clone(),
|
2025-08-08 09:20:22 -07:00
|
|
|
ctx.router_config.circuit_breaker.clone(),
|
2025-08-15 08:07:45 -07:00
|
|
|
ctx.router_config.health_check.clone(),
|
2025-08-11 21:37:36 -07:00
|
|
|
)
|
|
|
|
|
.await?;
|
2025-07-18 14:24:24 -07:00
|
|
|
|
|
|
|
|
Ok(Box::new(router))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Create a PD router with injected policy
|
2025-08-11 21:37:36 -07:00
|
|
|
async fn create_pd_router(
|
2025-07-18 14:24:24 -07:00
|
|
|
prefill_urls: &[(String, Option<u16>)],
|
|
|
|
|
decode_urls: &[String],
|
2025-07-27 00:39:20 -07:00
|
|
|
prefill_policy_config: Option<&PolicyConfig>,
|
|
|
|
|
decode_policy_config: Option<&PolicyConfig>,
|
|
|
|
|
main_policy_config: &PolicyConfig,
|
2025-08-02 19:16:47 -07:00
|
|
|
ctx: &Arc<AppContext>,
|
2025-07-18 14:24:24 -07:00
|
|
|
) -> Result<Box<dyn RouterTrait>, String> {
|
2025-07-27 00:39:20 -07:00
|
|
|
// Create policies - use specific policies if provided, otherwise fall back to main policy
|
|
|
|
|
let prefill_policy =
|
|
|
|
|
PolicyFactory::create_from_config(prefill_policy_config.unwrap_or(main_policy_config));
|
|
|
|
|
let decode_policy =
|
|
|
|
|
PolicyFactory::create_from_config(decode_policy_config.unwrap_or(main_policy_config));
|
2025-07-18 14:24:24 -07:00
|
|
|
|
2025-08-02 19:16:47 -07:00
|
|
|
// Create PD router with separate policies and client
|
2025-07-18 14:24:24 -07:00
|
|
|
let router = PDRouter::new(
|
|
|
|
|
prefill_urls.to_vec(),
|
|
|
|
|
decode_urls.to_vec(),
|
2025-07-27 00:39:20 -07:00
|
|
|
prefill_policy,
|
|
|
|
|
decode_policy,
|
2025-08-02 19:16:47 -07:00
|
|
|
ctx.client.clone(),
|
2025-09-02 04:57:04 +02:00
|
|
|
ctx.router_config.request_timeout_secs,
|
2025-08-02 19:16:47 -07:00
|
|
|
ctx.router_config.worker_startup_timeout_secs,
|
|
|
|
|
ctx.router_config.worker_startup_check_interval_secs,
|
2025-08-04 20:42:07 -07:00
|
|
|
ctx.router_config.retry.clone(),
|
2025-08-08 09:20:22 -07:00
|
|
|
ctx.router_config.circuit_breaker.clone(),
|
2025-08-15 08:07:45 -07:00
|
|
|
ctx.router_config.health_check.clone(),
|
2025-08-11 21:37:36 -07:00
|
|
|
)
|
|
|
|
|
.await?;
|
2025-07-18 14:24:24 -07:00
|
|
|
|
|
|
|
|
Ok(Box::new(router))
|
|
|
|
|
}
|
2025-08-20 17:38:57 -07:00
|
|
|
|
2025-08-28 12:07:06 -07:00
|
|
|
/// Create a gRPC router with injected policy
|
|
|
|
|
pub async fn create_grpc_router(
|
2025-09-01 20:06:15 -07:00
|
|
|
worker_urls: &[String],
|
|
|
|
|
policy_config: &PolicyConfig,
|
|
|
|
|
ctx: &Arc<AppContext>,
|
2025-08-28 12:07:06 -07:00
|
|
|
) -> Result<Box<dyn RouterTrait>, String> {
|
2025-09-01 20:06:15 -07:00
|
|
|
use super::grpc::router::GrpcRouter;
|
|
|
|
|
|
|
|
|
|
// Create policy
|
|
|
|
|
let policy = PolicyFactory::create_from_config(policy_config);
|
|
|
|
|
|
|
|
|
|
// Determine which tokenizer path to use
|
|
|
|
|
// Priority: tokenizer_path > model_path
|
|
|
|
|
let tokenizer_path = ctx
|
|
|
|
|
.router_config
|
|
|
|
|
.tokenizer_path
|
|
|
|
|
.clone()
|
|
|
|
|
.or_else(|| ctx.router_config.model_path.clone())
|
|
|
|
|
.ok_or_else(|| {
|
|
|
|
|
"gRPC router requires either --tokenizer-path or --model-path to be specified"
|
|
|
|
|
.to_string()
|
|
|
|
|
})?;
|
|
|
|
|
|
|
|
|
|
// Create gRPC router
|
|
|
|
|
let router = GrpcRouter::new(
|
|
|
|
|
worker_urls.to_vec(),
|
|
|
|
|
policy,
|
|
|
|
|
ctx.router_config.worker_startup_timeout_secs,
|
|
|
|
|
ctx.router_config.worker_startup_check_interval_secs,
|
|
|
|
|
ctx.router_config.dp_aware,
|
|
|
|
|
ctx.router_config.api_key.clone(),
|
|
|
|
|
ctx.router_config.effective_retry_config(),
|
|
|
|
|
ctx.router_config.effective_circuit_breaker_config(),
|
|
|
|
|
ctx.router_config.health_check.clone(),
|
|
|
|
|
tokenizer_path,
|
|
|
|
|
)
|
|
|
|
|
.await?;
|
|
|
|
|
|
|
|
|
|
Ok(Box::new(router))
|
2025-08-28 12:07:06 -07:00
|
|
|
}
|
|
|
|
|
|
2025-09-01 20:06:15 -07:00
|
|
|
/// Create a gRPC PD router with tokenizer and worker configuration
|
2025-08-28 12:07:06 -07:00
|
|
|
pub async fn create_grpc_pd_router(
|
2025-09-01 20:06:15 -07:00
|
|
|
prefill_urls: &[(String, Option<u16>)],
|
|
|
|
|
decode_urls: &[String],
|
|
|
|
|
prefill_policy_config: Option<&PolicyConfig>,
|
|
|
|
|
decode_policy_config: Option<&PolicyConfig>,
|
|
|
|
|
main_policy_config: &PolicyConfig,
|
|
|
|
|
ctx: &Arc<AppContext>,
|
2025-08-28 12:07:06 -07:00
|
|
|
) -> Result<Box<dyn RouterTrait>, String> {
|
2025-09-01 20:06:15 -07:00
|
|
|
use super::grpc::pd_router::GrpcPDRouter;
|
|
|
|
|
|
|
|
|
|
// Create policies - use specific policies if provided, otherwise fall back to main policy
|
|
|
|
|
let prefill_policy =
|
|
|
|
|
PolicyFactory::create_from_config(prefill_policy_config.unwrap_or(main_policy_config));
|
|
|
|
|
let decode_policy =
|
|
|
|
|
PolicyFactory::create_from_config(decode_policy_config.unwrap_or(main_policy_config));
|
|
|
|
|
|
|
|
|
|
// Determine which tokenizer path to use
|
|
|
|
|
// Priority: tokenizer_path > model_path
|
|
|
|
|
let tokenizer_path = ctx
|
|
|
|
|
.router_config
|
|
|
|
|
.tokenizer_path
|
|
|
|
|
.clone()
|
|
|
|
|
.or_else(|| ctx.router_config.model_path.clone())
|
|
|
|
|
.ok_or_else(|| {
|
|
|
|
|
"gRPC PD router requires either --tokenizer-path or --model-path to be specified"
|
|
|
|
|
.to_string()
|
|
|
|
|
})?;
|
|
|
|
|
|
|
|
|
|
// Create gRPC PD router
|
|
|
|
|
let router = GrpcPDRouter::new(
|
|
|
|
|
prefill_urls.to_vec(),
|
|
|
|
|
decode_urls.to_vec(),
|
|
|
|
|
prefill_policy,
|
|
|
|
|
decode_policy,
|
|
|
|
|
ctx.router_config.worker_startup_timeout_secs,
|
|
|
|
|
ctx.router_config.worker_startup_check_interval_secs,
|
|
|
|
|
ctx.router_config.dp_aware,
|
|
|
|
|
ctx.router_config.api_key.clone(),
|
|
|
|
|
ctx.router_config.effective_retry_config(),
|
|
|
|
|
ctx.router_config.effective_circuit_breaker_config(),
|
|
|
|
|
ctx.router_config.health_check.clone(),
|
|
|
|
|
tokenizer_path,
|
|
|
|
|
)
|
|
|
|
|
.await?;
|
|
|
|
|
|
|
|
|
|
Ok(Box::new(router))
|
2025-08-28 12:07:06 -07:00
|
|
|
}
|
|
|
|
|
|
2025-08-20 17:38:57 -07:00
|
|
|
/// Create an IGW router (placeholder for future implementation)
|
|
|
|
|
async fn create_igw_router(_ctx: &Arc<AppContext>) -> Result<Box<dyn RouterTrait>, String> {
|
|
|
|
|
// For now, return an error indicating IGW is not yet implemented
|
|
|
|
|
Err("IGW mode is not yet implemented".to_string())
|
|
|
|
|
}
|
2025-07-18 14:24:24 -07:00
|
|
|
}
|