[router] grpc router bootstraps (#9759)

This commit is contained in:
Simo Lin
2025-08-28 12:07:06 -07:00
committed by GitHub
parent fce7ae33f8
commit 5343058875
14 changed files with 366 additions and 20 deletions

View File

@@ -1,6 +1,9 @@
//! Factory for creating router instances
use super::{pd_router::PDRouter, router::Router, RouterTrait};
use super::{
http::{pd_router::PDRouter, router::Router},
RouterTrait,
};
use crate::config::{PolicyConfig, RoutingMode};
use crate::policies::PolicyFactory;
use crate::server::AppContext;
@@ -17,7 +20,9 @@ impl RouterFactory {
return Self::create_igw_router(ctx).await;
}
// Default to proxy mode
// TODO: Add gRPC mode check here when implementing gRPC support
// Default to HTTP proxy mode
match &ctx.router_config.mode {
RoutingMode::Regular { worker_urls } => {
Self::create_regular_router(worker_urls, &ctx.router_config.policy, ctx).await
@@ -101,6 +106,29 @@ impl RouterFactory {
Ok(Box::new(router))
}
/// Create a gRPC router with injected policy
pub async fn create_grpc_router(
_worker_urls: &[String],
_policy_config: &PolicyConfig,
_ctx: &Arc<AppContext>,
) -> Result<Box<dyn RouterTrait>, String> {
// For now, return an error as gRPC router is not yet implemented
Err("gRPC router is not yet implemented".to_string())
}
/// Create a gRPC PD router (placeholder for now)
pub async fn create_grpc_pd_router(
_prefill_urls: &[(String, Option<u16>)],
_decode_urls: &[String],
_prefill_policy_config: Option<&PolicyConfig>,
_decode_policy_config: Option<&PolicyConfig>,
_main_policy_config: &PolicyConfig,
_ctx: &Arc<AppContext>,
) -> Result<Box<dyn RouterTrait>, String> {
// For now, return an error as gRPC PD router is not yet implemented
Err("gRPC PD router is not yet implemented".to_string())
}
/// Create an IGW router (placeholder for future implementation)
async fn create_igw_router(_ctx: &Arc<AppContext>) -> Result<Box<dyn RouterTrait>, String> {
// For now, return an error indicating IGW is not yet implemented

View File

@@ -0,0 +1,4 @@
//! gRPC router implementations
pub mod pd_router;
pub mod router;

View File

@@ -0,0 +1,110 @@
// PD (Prefill-Decode) gRPC Router Implementation
// TODO: Implement gRPC-based PD router for disaggregated prefill-decode systems
use crate::routers::{RouterTrait, WorkerManagement};
use async_trait::async_trait;
use axum::{
body::Body,
extract::Request,
http::{HeaderMap, StatusCode},
response::{IntoResponse, Response},
};
/// Placeholder for gRPC PD router
#[derive(Debug)]
pub struct GrpcPDRouter;
impl GrpcPDRouter {
pub async fn new() -> Result<Self, String> {
// TODO: Implement gRPC PD router initialization
Err("gRPC PD router not yet implemented".to_string())
}
}
#[async_trait]
impl RouterTrait for GrpcPDRouter {
fn as_any(&self) -> &dyn std::any::Any {
self
}
async fn health(&self, _req: Request<Body>) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}
async fn health_generate(&self, _req: Request<Body>) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}
async fn get_server_info(&self, _req: Request<Body>) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}
async fn get_models(&self, _req: Request<Body>) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}
async fn get_model_info(&self, _req: Request<Body>) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}
async fn route_generate(
&self,
_headers: Option<&HeaderMap>,
_body: &crate::protocols::spec::GenerateRequest,
) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}
async fn route_chat(
&self,
_headers: Option<&HeaderMap>,
_body: &crate::protocols::spec::ChatCompletionRequest,
) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}
async fn route_completion(
&self,
_headers: Option<&HeaderMap>,
_body: &crate::protocols::spec::CompletionRequest,
) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}
async fn route_embeddings(&self, _headers: Option<&HeaderMap>, _body: Body) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}
async fn route_rerank(&self, _headers: Option<&HeaderMap>, _body: Body) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}
async fn flush_cache(&self) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}
async fn get_worker_loads(&self) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}
fn router_type(&self) -> &'static str {
"grpc_pd"
}
fn readiness(&self) -> Response {
(StatusCode::SERVICE_UNAVAILABLE).into_response()
}
}
#[async_trait]
impl WorkerManagement for GrpcPDRouter {
async fn add_worker(&self, _worker_url: &str) -> Result<String, String> {
Err("Not implemented".to_string())
}
fn remove_worker(&self, _worker_url: &str) {}
fn get_worker_urls(&self) -> Vec<String> {
vec![]
}
}

View File

@@ -0,0 +1,110 @@
// gRPC Router Implementation
// TODO: Implement gRPC-based router
use crate::routers::{RouterTrait, WorkerManagement};
use async_trait::async_trait;
use axum::{
body::Body,
extract::Request,
http::{HeaderMap, StatusCode},
response::{IntoResponse, Response},
};
/// Placeholder for gRPC router
#[derive(Debug)]
pub struct GrpcRouter;
impl GrpcRouter {
pub async fn new() -> Result<Self, String> {
// TODO: Implement gRPC router initialization
Err("gRPC router not yet implemented".to_string())
}
}
#[async_trait]
impl RouterTrait for GrpcRouter {
fn as_any(&self) -> &dyn std::any::Any {
self
}
async fn health(&self, _req: Request<Body>) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}
async fn health_generate(&self, _req: Request<Body>) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}
async fn get_server_info(&self, _req: Request<Body>) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}
async fn get_models(&self, _req: Request<Body>) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}
async fn get_model_info(&self, _req: Request<Body>) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}
async fn route_generate(
&self,
_headers: Option<&HeaderMap>,
_body: &crate::protocols::spec::GenerateRequest,
) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}
async fn route_chat(
&self,
_headers: Option<&HeaderMap>,
_body: &crate::protocols::spec::ChatCompletionRequest,
) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}
async fn route_completion(
&self,
_headers: Option<&HeaderMap>,
_body: &crate::protocols::spec::CompletionRequest,
) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}
async fn route_embeddings(&self, _headers: Option<&HeaderMap>, _body: Body) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}
async fn route_rerank(&self, _headers: Option<&HeaderMap>, _body: Body) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}
async fn flush_cache(&self) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}
async fn get_worker_loads(&self) -> Response {
(StatusCode::NOT_IMPLEMENTED).into_response()
}
fn router_type(&self) -> &'static str {
"grpc"
}
fn readiness(&self) -> Response {
(StatusCode::SERVICE_UNAVAILABLE).into_response()
}
}
#[async_trait]
impl WorkerManagement for GrpcRouter {
async fn add_worker(&self, _worker_url: &str) -> Result<String, String> {
Err("Not implemented".to_string())
}
fn remove_worker(&self, _worker_url: &str) {}
fn get_worker_urls(&self) -> Vec<String> {
vec![]
}
}

View File

@@ -0,0 +1,5 @@
//! HTTP router implementations
pub mod pd_router;
pub mod pd_types;
pub mod router;

View File

@@ -1,6 +1,5 @@
// PD (Prefill-Decode) Router Implementation
// This module handles routing for disaggregated prefill-decode systems
use super::header_utils;
use super::pd_types::{api_path, PDRouterError};
use crate::config::types::{
CircuitBreakerConfig as ConfigCircuitBreakerConfig,
@@ -16,6 +15,7 @@ use crate::protocols::spec::{
ChatCompletionRequest, ChatMessage, CompletionRequest, GenerateRequest, StringOrArray,
UserMessageContent,
};
use crate::routers::header_utils;
use crate::routers::{RouterTrait, WorkerManagement};
use async_trait::async_trait;
use axum::{
@@ -72,7 +72,7 @@ impl PDRouter {
// Private helper method to perform health check on a new server
async fn wait_for_server_health(&self, url: &str) -> Result<(), PDRouterError> {
crate::routers::router::Router::wait_for_healthy_workers(
crate::routers::http::router::Router::wait_for_healthy_workers(
&[url.to_string()],
self.timeout_secs,
self.interval_secs,
@@ -435,7 +435,7 @@ impl PDRouter {
.map(|worker| worker.url().to_string())
.collect();
if !all_urls.is_empty() {
crate::routers::router::Router::wait_for_healthy_workers(
crate::routers::http::router::Router::wait_for_healthy_workers(
&all_urls,
timeout_secs,
interval_secs,
@@ -1935,6 +1935,14 @@ impl RouterTrait for PDRouter {
self.execute_dual_dispatch(headers, body, context).await
}
async fn route_embeddings(&self, _headers: Option<&HeaderMap>, _body: Body) -> Response {
todo!()
}
async fn route_rerank(&self, _headers: Option<&HeaderMap>, _body: Body) -> Response {
todo!()
}
async fn flush_cache(&self) -> Response {
// Process both prefill and decode workers
let (prefill_results, prefill_errors) = self
@@ -2040,7 +2048,7 @@ impl RouterTrait for PDRouter {
let total_decode = self.decode_workers.read().unwrap().len();
if healthy_prefill_count > 0 && healthy_decode_count > 0 {
Json(serde_json::json!({
Json(json!({
"status": "ready",
"prefill": {
"healthy": healthy_prefill_count,

View File

@@ -1,4 +1,3 @@
use super::header_utils;
use crate::config::types::{
CircuitBreakerConfig as ConfigCircuitBreakerConfig,
HealthCheckConfig as ConfigHealthCheckConfig, RetryConfig,
@@ -12,6 +11,7 @@ use crate::policies::LoadBalancingPolicy;
use crate::protocols::spec::{
ChatCompletionRequest, CompletionRequest, GenerateRequest, GenerationRequest,
};
use crate::routers::header_utils;
use crate::routers::{RouterTrait, WorkerManagement};
use axum::{
body::Body,
@@ -393,7 +393,7 @@ impl Router {
// Helper method to proxy GET requests to the first available worker
async fn proxy_get_request(&self, req: Request<Body>, endpoint: &str) -> Response {
let headers = super::header_utils::copy_request_headers(&req);
let headers = header_utils::copy_request_headers(&req);
match self.select_first_worker() {
Ok(worker_url) => {
@@ -667,7 +667,7 @@ impl Router {
if !is_stream {
// For non-streaming requests, preserve headers
let response_headers = super::header_utils::preserve_response_headers(res.headers());
let response_headers = header_utils::preserve_response_headers(res.headers());
let response = match res.bytes().await {
Ok(body) => {
@@ -1198,6 +1198,14 @@ impl RouterTrait for Router {
.await
}
async fn route_embeddings(&self, _headers: Option<&HeaderMap>, _body: Body) -> Response {
todo!()
}
async fn route_rerank(&self, _headers: Option<&HeaderMap>, _body: Body) -> Response {
todo!()
}
async fn flush_cache(&self) -> Response {
// Get all worker URLs
let worker_urls = self.get_worker_urls();

View File

@@ -12,10 +12,9 @@ use std::fmt::Debug;
use crate::protocols::spec::{ChatCompletionRequest, CompletionRequest, GenerateRequest};
pub mod factory;
pub mod grpc;
pub mod header_utils;
pub mod pd_router;
pub mod pd_types;
pub mod router;
pub mod http;
pub use factory::RouterFactory;
@@ -77,6 +76,10 @@ pub trait RouterTrait: Send + Sync + Debug + WorkerManagement {
body: &CompletionRequest,
) -> Response;
async fn route_embeddings(&self, headers: Option<&HeaderMap>, body: Body) -> Response;
async fn route_rerank(&self, headers: Option<&HeaderMap>, body: Body) -> Response;
/// Flush cache on all workers
async fn flush_cache(&self) -> Response;