[router] allow one router to support different model families and serving mode (#10244)
This commit is contained in:
@@ -17,6 +17,7 @@ pub mod factory;
|
||||
pub mod grpc;
|
||||
pub mod header_utils;
|
||||
pub mod http;
|
||||
pub mod router_manager;
|
||||
|
||||
pub use factory::RouterFactory;
|
||||
// Re-export HTTP routers for convenience (keeps routers::openai_router path working)
|
||||
@@ -63,14 +64,19 @@ pub trait RouterTrait: Send + Sync + Debug + WorkerManagement {
|
||||
async fn get_model_info(&self, req: Request<Body>) -> Response;
|
||||
|
||||
/// Route a generate request
|
||||
async fn route_generate(&self, headers: Option<&HeaderMap>, body: &GenerateRequest)
|
||||
-> Response;
|
||||
async fn route_generate(
|
||||
&self,
|
||||
headers: Option<&HeaderMap>,
|
||||
body: &GenerateRequest,
|
||||
model_id: Option<&str>,
|
||||
) -> Response;
|
||||
|
||||
/// Route a chat completion request
|
||||
async fn route_chat(
|
||||
&self,
|
||||
headers: Option<&HeaderMap>,
|
||||
body: &ChatCompletionRequest,
|
||||
model_id: Option<&str>,
|
||||
) -> Response;
|
||||
|
||||
/// Route a completion request
|
||||
@@ -78,6 +84,7 @@ pub trait RouterTrait: Send + Sync + Debug + WorkerManagement {
|
||||
&self,
|
||||
headers: Option<&HeaderMap>,
|
||||
body: &CompletionRequest,
|
||||
model_id: Option<&str>,
|
||||
) -> Response;
|
||||
|
||||
/// Route a responses request
|
||||
@@ -85,11 +92,17 @@ pub trait RouterTrait: Send + Sync + Debug + WorkerManagement {
|
||||
&self,
|
||||
headers: Option<&HeaderMap>,
|
||||
body: &ResponsesRequest,
|
||||
model_id: Option<&str>,
|
||||
) -> Response;
|
||||
|
||||
async fn route_embeddings(&self, headers: Option<&HeaderMap>, body: Body) -> Response;
|
||||
|
||||
async fn route_rerank(&self, headers: Option<&HeaderMap>, body: &RerankRequest) -> Response;
|
||||
async fn route_rerank(
|
||||
&self,
|
||||
headers: Option<&HeaderMap>,
|
||||
body: &RerankRequest,
|
||||
model_id: Option<&str>,
|
||||
) -> Response;
|
||||
|
||||
/// Flush cache on all workers
|
||||
async fn flush_cache(&self) -> Response;
|
||||
|
||||
Reference in New Issue
Block a user