[router] allow one router to support different model families and serving mode (#10244)

This commit is contained in:
Simo Lin
2025-09-12 19:18:27 -04:00
committed by GitHub
parent 321fecab74
commit 2f173ea074
28 changed files with 3528 additions and 837 deletions

View File

@@ -17,6 +17,7 @@ pub mod factory;
pub mod grpc;
pub mod header_utils;
pub mod http;
pub mod router_manager;
pub use factory::RouterFactory;
// Re-export HTTP routers for convenience (keeps routers::openai_router path working)
@@ -63,14 +64,19 @@ pub trait RouterTrait: Send + Sync + Debug + WorkerManagement {
async fn get_model_info(&self, req: Request<Body>) -> Response;
/// Route a generate request
async fn route_generate(&self, headers: Option<&HeaderMap>, body: &GenerateRequest)
-> Response;
async fn route_generate(
&self,
headers: Option<&HeaderMap>,
body: &GenerateRequest,
model_id: Option<&str>,
) -> Response;
/// Route a chat completion request
async fn route_chat(
&self,
headers: Option<&HeaderMap>,
body: &ChatCompletionRequest,
model_id: Option<&str>,
) -> Response;
/// Route a completion request
@@ -78,6 +84,7 @@ pub trait RouterTrait: Send + Sync + Debug + WorkerManagement {
&self,
headers: Option<&HeaderMap>,
body: &CompletionRequest,
model_id: Option<&str>,
) -> Response;
/// Route a responses request
@@ -85,11 +92,17 @@ pub trait RouterTrait: Send + Sync + Debug + WorkerManagement {
&self,
headers: Option<&HeaderMap>,
body: &ResponsesRequest,
model_id: Option<&str>,
) -> Response;
async fn route_embeddings(&self, headers: Option<&HeaderMap>, body: Body) -> Response;
async fn route_rerank(&self, headers: Option<&HeaderMap>, body: &RerankRequest) -> Response;
async fn route_rerank(
&self,
headers: Option<&HeaderMap>,
body: &RerankRequest,
model_id: Option<&str>,
) -> Response;
/// Flush cache on all workers
async fn flush_cache(&self) -> Response;