[router]: Add Embedding routing logic (#10129)

Signed-off-by: Jintao Zhang <zhangjintao9020@gmail.com>
Co-authored-by: Waël Boukhobza <wawa_wael@live.fr>
This commit is contained in:
Jintao Zhang
2025-09-15 09:44:35 +08:00
committed by GitHub
parent dcee42c200
commit f9ee6ae17a
17 changed files with 452 additions and 69 deletions

View File

@@ -10,7 +10,8 @@ use axum::{
use std::fmt::Debug;
use crate::protocols::spec::{
ChatCompletionRequest, CompletionRequest, GenerateRequest, RerankRequest, ResponsesRequest,
ChatCompletionRequest, CompletionRequest, EmbeddingRequest, GenerateRequest, RerankRequest,
ResponsesRequest,
};
pub mod factory;
@@ -123,7 +124,13 @@ pub trait RouterTrait: Send + Sync + Debug + WorkerManagement {
.into_response()
}
async fn route_embeddings(&self, headers: Option<&HeaderMap>, body: Body) -> Response;
/// Route embedding requests (OpenAI-compatible /v1/embeddings)
async fn route_embeddings(
&self,
headers: Option<&HeaderMap>,
body: &EmbeddingRequest,
model_id: Option<&str>,
) -> Response;
async fn route_rerank(
&self,