[router] add grpc pd and regular router init (#9893)
This commit is contained in:
@@ -4,7 +4,7 @@ use super::{
|
||||
http::{pd_router::PDRouter, router::Router},
|
||||
RouterTrait,
|
||||
};
|
||||
use crate::config::{PolicyConfig, RoutingMode};
|
||||
use crate::config::{ConnectionMode, PolicyConfig, RoutingMode};
|
||||
use crate::policies::PolicyFactory;
|
||||
use crate::server::AppContext;
|
||||
use std::sync::Arc;
|
||||
@@ -20,28 +20,56 @@ impl RouterFactory {
|
||||
return Self::create_igw_router(ctx).await;
|
||||
}
|
||||
|
||||
// TODO: Add gRPC mode check here when implementing gRPC support
|
||||
|
||||
// Default to HTTP proxy mode
|
||||
match &ctx.router_config.mode {
|
||||
RoutingMode::Regular { worker_urls } => {
|
||||
Self::create_regular_router(worker_urls, &ctx.router_config.policy, ctx).await
|
||||
// Check connection mode and route to appropriate implementation
|
||||
match ctx.router_config.connection_mode {
|
||||
ConnectionMode::Grpc => {
|
||||
// Route to gRPC implementation based on routing mode
|
||||
match &ctx.router_config.mode {
|
||||
RoutingMode::Regular { worker_urls } => {
|
||||
Self::create_grpc_router(worker_urls, &ctx.router_config.policy, ctx).await
|
||||
}
|
||||
RoutingMode::PrefillDecode {
|
||||
prefill_urls,
|
||||
decode_urls,
|
||||
prefill_policy,
|
||||
decode_policy,
|
||||
} => {
|
||||
Self::create_grpc_pd_router(
|
||||
prefill_urls,
|
||||
decode_urls,
|
||||
prefill_policy.as_ref(),
|
||||
decode_policy.as_ref(),
|
||||
&ctx.router_config.policy,
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
}
|
||||
}
|
||||
}
|
||||
RoutingMode::PrefillDecode {
|
||||
prefill_urls,
|
||||
decode_urls,
|
||||
prefill_policy,
|
||||
decode_policy,
|
||||
} => {
|
||||
Self::create_pd_router(
|
||||
prefill_urls,
|
||||
decode_urls,
|
||||
prefill_policy.as_ref(),
|
||||
decode_policy.as_ref(),
|
||||
&ctx.router_config.policy,
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
ConnectionMode::Http => {
|
||||
// Route to HTTP implementation based on routing mode
|
||||
match &ctx.router_config.mode {
|
||||
RoutingMode::Regular { worker_urls } => {
|
||||
Self::create_regular_router(worker_urls, &ctx.router_config.policy, ctx)
|
||||
.await
|
||||
}
|
||||
RoutingMode::PrefillDecode {
|
||||
prefill_urls,
|
||||
decode_urls,
|
||||
prefill_policy,
|
||||
decode_policy,
|
||||
} => {
|
||||
Self::create_pd_router(
|
||||
prefill_urls,
|
||||
decode_urls,
|
||||
prefill_policy.as_ref(),
|
||||
decode_policy.as_ref(),
|
||||
&ctx.router_config.policy,
|
||||
ctx,
|
||||
)
|
||||
.await
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -109,25 +137,92 @@ impl RouterFactory {
|
||||
|
||||
/// Create a gRPC router with injected policy
|
||||
pub async fn create_grpc_router(
|
||||
_worker_urls: &[String],
|
||||
_policy_config: &PolicyConfig,
|
||||
_ctx: &Arc<AppContext>,
|
||||
worker_urls: &[String],
|
||||
policy_config: &PolicyConfig,
|
||||
ctx: &Arc<AppContext>,
|
||||
) -> Result<Box<dyn RouterTrait>, String> {
|
||||
// For now, return an error as gRPC router is not yet implemented
|
||||
Err("gRPC router is not yet implemented".to_string())
|
||||
use super::grpc::router::GrpcRouter;
|
||||
|
||||
// Create policy
|
||||
let policy = PolicyFactory::create_from_config(policy_config);
|
||||
|
||||
// Determine which tokenizer path to use
|
||||
// Priority: tokenizer_path > model_path
|
||||
let tokenizer_path = ctx
|
||||
.router_config
|
||||
.tokenizer_path
|
||||
.clone()
|
||||
.or_else(|| ctx.router_config.model_path.clone())
|
||||
.ok_or_else(|| {
|
||||
"gRPC router requires either --tokenizer-path or --model-path to be specified"
|
||||
.to_string()
|
||||
})?;
|
||||
|
||||
// Create gRPC router
|
||||
let router = GrpcRouter::new(
|
||||
worker_urls.to_vec(),
|
||||
policy,
|
||||
ctx.router_config.worker_startup_timeout_secs,
|
||||
ctx.router_config.worker_startup_check_interval_secs,
|
||||
ctx.router_config.dp_aware,
|
||||
ctx.router_config.api_key.clone(),
|
||||
ctx.router_config.effective_retry_config(),
|
||||
ctx.router_config.effective_circuit_breaker_config(),
|
||||
ctx.router_config.health_check.clone(),
|
||||
tokenizer_path,
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(Box::new(router))
|
||||
}
|
||||
|
||||
/// Create a gRPC PD router (placeholder for now)
|
||||
/// Create a gRPC PD router with tokenizer and worker configuration
|
||||
pub async fn create_grpc_pd_router(
|
||||
_prefill_urls: &[(String, Option<u16>)],
|
||||
_decode_urls: &[String],
|
||||
_prefill_policy_config: Option<&PolicyConfig>,
|
||||
_decode_policy_config: Option<&PolicyConfig>,
|
||||
_main_policy_config: &PolicyConfig,
|
||||
_ctx: &Arc<AppContext>,
|
||||
prefill_urls: &[(String, Option<u16>)],
|
||||
decode_urls: &[String],
|
||||
prefill_policy_config: Option<&PolicyConfig>,
|
||||
decode_policy_config: Option<&PolicyConfig>,
|
||||
main_policy_config: &PolicyConfig,
|
||||
ctx: &Arc<AppContext>,
|
||||
) -> Result<Box<dyn RouterTrait>, String> {
|
||||
// For now, return an error as gRPC PD router is not yet implemented
|
||||
Err("gRPC PD router is not yet implemented".to_string())
|
||||
use super::grpc::pd_router::GrpcPDRouter;
|
||||
|
||||
// Create policies - use specific policies if provided, otherwise fall back to main policy
|
||||
let prefill_policy =
|
||||
PolicyFactory::create_from_config(prefill_policy_config.unwrap_or(main_policy_config));
|
||||
let decode_policy =
|
||||
PolicyFactory::create_from_config(decode_policy_config.unwrap_or(main_policy_config));
|
||||
|
||||
// Determine which tokenizer path to use
|
||||
// Priority: tokenizer_path > model_path
|
||||
let tokenizer_path = ctx
|
||||
.router_config
|
||||
.tokenizer_path
|
||||
.clone()
|
||||
.or_else(|| ctx.router_config.model_path.clone())
|
||||
.ok_or_else(|| {
|
||||
"gRPC PD router requires either --tokenizer-path or --model-path to be specified"
|
||||
.to_string()
|
||||
})?;
|
||||
|
||||
// Create gRPC PD router
|
||||
let router = GrpcPDRouter::new(
|
||||
prefill_urls.to_vec(),
|
||||
decode_urls.to_vec(),
|
||||
prefill_policy,
|
||||
decode_policy,
|
||||
ctx.router_config.worker_startup_timeout_secs,
|
||||
ctx.router_config.worker_startup_check_interval_secs,
|
||||
ctx.router_config.dp_aware,
|
||||
ctx.router_config.api_key.clone(),
|
||||
ctx.router_config.effective_retry_config(),
|
||||
ctx.router_config.effective_circuit_breaker_config(),
|
||||
ctx.router_config.health_check.clone(),
|
||||
tokenizer_path,
|
||||
)
|
||||
.await?;
|
||||
|
||||
Ok(Box::new(router))
|
||||
}
|
||||
|
||||
/// Create an IGW router (placeholder for future implementation)
|
||||
|
||||
Reference in New Issue
Block a user