[router] Basic OAI Response api (#10346)
This commit is contained in:
@@ -289,6 +289,14 @@ impl RouterTrait for GrpcPDRouter {
|
||||
(StatusCode::NOT_IMPLEMENTED).into_response()
|
||||
}
|
||||
|
||||
async fn route_responses(
|
||||
&self,
|
||||
_headers: Option<&HeaderMap>,
|
||||
_body: &crate::protocols::spec::ResponsesRequest,
|
||||
) -> Response {
|
||||
(StatusCode::NOT_IMPLEMENTED).into_response()
|
||||
}
|
||||
|
||||
async fn route_embeddings(&self, _headers: Option<&HeaderMap>, _body: Body) -> Response {
|
||||
(StatusCode::NOT_IMPLEMENTED).into_response()
|
||||
}
|
||||
|
||||
@@ -222,6 +222,14 @@ impl RouterTrait for GrpcRouter {
|
||||
(StatusCode::NOT_IMPLEMENTED).into_response()
|
||||
}
|
||||
|
||||
async fn route_responses(
|
||||
&self,
|
||||
_headers: Option<&HeaderMap>,
|
||||
_body: &crate::protocols::spec::ResponsesRequest,
|
||||
) -> Response {
|
||||
(StatusCode::NOT_IMPLEMENTED).into_response()
|
||||
}
|
||||
|
||||
async fn route_embeddings(&self, _headers: Option<&HeaderMap>, _body: Body) -> Response {
|
||||
(StatusCode::NOT_IMPLEMENTED).into_response()
|
||||
}
|
||||
|
||||
@@ -333,6 +333,18 @@ impl super::super::RouterTrait for OpenAIRouter {
|
||||
.into_response()
|
||||
}
|
||||
|
||||
async fn route_responses(
|
||||
&self,
|
||||
_headers: Option<&HeaderMap>,
|
||||
_body: &crate::protocols::spec::ResponsesRequest,
|
||||
) -> Response {
|
||||
(
|
||||
StatusCode::NOT_IMPLEMENTED,
|
||||
"Responses endpoint not implemented for OpenAI router",
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
|
||||
async fn flush_cache(&self) -> Response {
|
||||
(
|
||||
StatusCode::NOT_IMPLEMENTED,
|
||||
|
||||
@@ -9,8 +9,8 @@ use crate::core::{
|
||||
use crate::metrics::RouterMetrics;
|
||||
use crate::policies::LoadBalancingPolicy;
|
||||
use crate::protocols::spec::{
|
||||
ChatCompletionRequest, ChatMessage, CompletionRequest, GenerateRequest, StringOrArray,
|
||||
UserMessageContent,
|
||||
ChatCompletionRequest, ChatMessage, CompletionRequest, GenerateRequest, ResponsesRequest,
|
||||
StringOrArray, UserMessageContent,
|
||||
};
|
||||
use crate::routers::header_utils;
|
||||
use crate::routers::{RouterTrait, WorkerManagement};
|
||||
@@ -1930,6 +1930,18 @@ impl RouterTrait for PDRouter {
|
||||
self.execute_dual_dispatch(headers, body, context).await
|
||||
}
|
||||
|
||||
async fn route_responses(
|
||||
&self,
|
||||
_headers: Option<&HeaderMap>,
|
||||
_body: &ResponsesRequest,
|
||||
) -> Response {
|
||||
(
|
||||
StatusCode::NOT_IMPLEMENTED,
|
||||
"Responses endpoint not implemented for PD router",
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
|
||||
async fn route_embeddings(&self, _headers: Option<&HeaderMap>, _body: Body) -> Response {
|
||||
todo!()
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ use crate::core::{
|
||||
use crate::metrics::RouterMetrics;
|
||||
use crate::policies::LoadBalancingPolicy;
|
||||
use crate::protocols::spec::{
|
||||
ChatCompletionRequest, CompletionRequest, GenerateRequest, GenerationRequest,
|
||||
ChatCompletionRequest, CompletionRequest, GenerateRequest, GenerationRequest, ResponsesRequest,
|
||||
};
|
||||
use crate::routers::header_utils;
|
||||
use crate::routers::{RouterTrait, WorkerManagement};
|
||||
@@ -1210,6 +1210,15 @@ impl RouterTrait for Router {
|
||||
.await
|
||||
}
|
||||
|
||||
async fn route_responses(
|
||||
&self,
|
||||
headers: Option<&HeaderMap>,
|
||||
body: &ResponsesRequest,
|
||||
) -> Response {
|
||||
self.route_typed_request(headers, body, "/v1/responses")
|
||||
.await
|
||||
}
|
||||
|
||||
async fn route_embeddings(&self, _headers: Option<&HeaderMap>, _body: Body) -> Response {
|
||||
todo!()
|
||||
}
|
||||
|
||||
@@ -9,7 +9,9 @@ use axum::{
|
||||
};
|
||||
use std::fmt::Debug;
|
||||
|
||||
use crate::protocols::spec::{ChatCompletionRequest, CompletionRequest, GenerateRequest};
|
||||
use crate::protocols::spec::{
|
||||
ChatCompletionRequest, CompletionRequest, GenerateRequest, ResponsesRequest,
|
||||
};
|
||||
|
||||
pub mod factory;
|
||||
pub mod grpc;
|
||||
@@ -78,6 +80,13 @@ pub trait RouterTrait: Send + Sync + Debug + WorkerManagement {
|
||||
body: &CompletionRequest,
|
||||
) -> Response;
|
||||
|
||||
/// Route a responses request
|
||||
async fn route_responses(
|
||||
&self,
|
||||
headers: Option<&HeaderMap>,
|
||||
body: &ResponsesRequest,
|
||||
) -> Response;
|
||||
|
||||
async fn route_embeddings(&self, headers: Option<&HeaderMap>, body: Body) -> Response;
|
||||
|
||||
async fn route_rerank(&self, headers: Option<&HeaderMap>, body: Body) -> Response;
|
||||
|
||||
Reference in New Issue
Block a user