[Router] Refactor protocol definitions: split spec.rs into modular files (#11677)
Co-authored-by: Chang Su <chang.s.su@oracle.com>
This commit is contained in:
@@ -12,9 +12,8 @@ use serde_json::Value;
|
||||
|
||||
use crate::core::Worker;
|
||||
use crate::grpc_client::{proto, SglangSchedulerClient};
|
||||
use crate::protocols::spec::{
|
||||
ChatCompletionRequest, ChatCompletionResponse, GenerateRequest, GenerateResponse,
|
||||
};
|
||||
use crate::protocols::chat::{ChatCompletionRequest, ChatCompletionResponse};
|
||||
use crate::protocols::generate::{GenerateRequest, GenerateResponse};
|
||||
use crate::reasoning_parser::ParserFactory as ReasoningParserFactory;
|
||||
use crate::tokenizer::stop::StopSequenceDecoder;
|
||||
use crate::tokenizer::traits::Tokenizer;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
//! gRPC router implementations
|
||||
|
||||
use crate::grpc_client::proto;
|
||||
use crate::protocols::spec::StringOrArray;
|
||||
use crate::protocols::common::StringOrArray;
|
||||
|
||||
pub mod context;
|
||||
pub mod pd_router;
|
||||
|
||||
@@ -3,10 +3,12 @@
|
||||
use crate::config::types::RetryConfig;
|
||||
use crate::core::{ConnectionMode, WorkerRegistry, WorkerType};
|
||||
use crate::policies::PolicyRegistry;
|
||||
use crate::protocols::spec::{
|
||||
ChatCompletionRequest, CompletionRequest, EmbeddingRequest, GenerateRequest, RerankRequest,
|
||||
ResponsesGetParams, ResponsesRequest,
|
||||
};
|
||||
use crate::protocols::chat::ChatCompletionRequest;
|
||||
use crate::protocols::completion::CompletionRequest;
|
||||
use crate::protocols::embedding::EmbeddingRequest;
|
||||
use crate::protocols::generate::GenerateRequest;
|
||||
use crate::protocols::rerank::RerankRequest;
|
||||
use crate::protocols::responses::{ResponsesGetParams, ResponsesRequest};
|
||||
use crate::reasoning_parser::ParserFactory as ReasoningParserFactory;
|
||||
use crate::routers::RouterTrait;
|
||||
use crate::server::AppContext;
|
||||
|
||||
@@ -14,7 +14,9 @@ use super::utils;
|
||||
use crate::core::{ConnectionMode, Worker, WorkerRegistry, WorkerType};
|
||||
use crate::grpc_client::proto;
|
||||
use crate::policies::PolicyRegistry;
|
||||
use crate::protocols::spec::{ChatCompletionRequest, GenerateRequest, InputIds};
|
||||
use crate::protocols::chat::ChatCompletionRequest;
|
||||
use crate::protocols::common::InputIds;
|
||||
use crate::protocols::generate::GenerateRequest;
|
||||
use crate::reasoning_parser::ParserFactory as ReasoningParserFactory;
|
||||
use crate::tokenizer::traits::Tokenizer;
|
||||
use crate::tool_parser::ParserFactory as ToolParserFactory;
|
||||
|
||||
@@ -9,11 +9,13 @@ use serde_json::Value;
|
||||
use tracing::error;
|
||||
|
||||
use crate::grpc_client::proto;
|
||||
use crate::protocols::spec::{
|
||||
use crate::protocols::chat::{
|
||||
ChatChoice, ChatCompletionMessage, ChatCompletionRequest, ChatCompletionResponse,
|
||||
FunctionCallResponse, GenerateMetaInfo, GenerateRequest, GenerateResponse, ToolCall,
|
||||
ToolChoice, ToolChoiceValue, Usage,
|
||||
};
|
||||
use crate::protocols::common::{
|
||||
FunctionCallResponse, ToolCall, ToolChoice, ToolChoiceValue, Usage,
|
||||
};
|
||||
use crate::protocols::generate::{GenerateMetaInfo, GenerateRequest, GenerateResponse};
|
||||
use crate::reasoning_parser::ParserFactory as ReasoningParserFactory;
|
||||
use crate::tokenizer::stop::{SequenceDecoderOutput, StopSequenceDecoder};
|
||||
use crate::tokenizer::traits::Tokenizer;
|
||||
|
||||
@@ -14,10 +14,12 @@ use tracing::debug;
|
||||
use crate::config::types::RetryConfig;
|
||||
use crate::core::WorkerRegistry;
|
||||
use crate::policies::PolicyRegistry;
|
||||
use crate::protocols::spec::{
|
||||
ChatCompletionRequest, CompletionRequest, EmbeddingRequest, GenerateRequest, RerankRequest,
|
||||
ResponsesGetParams, ResponsesRequest,
|
||||
};
|
||||
use crate::protocols::chat::ChatCompletionRequest;
|
||||
use crate::protocols::completion::CompletionRequest;
|
||||
use crate::protocols::embedding::EmbeddingRequest;
|
||||
use crate::protocols::generate::GenerateRequest;
|
||||
use crate::protocols::rerank::RerankRequest;
|
||||
use crate::protocols::responses::{ResponsesGetParams, ResponsesRequest};
|
||||
use crate::reasoning_parser::ParserFactory as ReasoningParserFactory;
|
||||
use crate::routers::RouterTrait;
|
||||
use crate::server::AppContext;
|
||||
|
||||
@@ -19,7 +19,14 @@ use tracing::{debug, error, warn};
|
||||
use super::context;
|
||||
use super::utils;
|
||||
use crate::grpc_client::proto;
|
||||
use crate::protocols::spec::*;
|
||||
use crate::protocols::chat::{
|
||||
ChatCompletionRequest, ChatCompletionStreamResponse, ChatMessageDelta, ChatStreamChoice,
|
||||
};
|
||||
use crate::protocols::common::{
|
||||
ChatLogProbs, FunctionCallDelta, StringOrArray, Tool, ToolCallDelta, ToolChoice,
|
||||
ToolChoiceValue, Usage,
|
||||
};
|
||||
use crate::protocols::generate::GenerateRequest;
|
||||
use crate::reasoning_parser::ReasoningParser;
|
||||
use crate::tokenizer::stop::{SequenceDecoderOutput, StopSequenceDecoder};
|
||||
use crate::tokenizer::traits::Tokenizer;
|
||||
|
||||
@@ -4,10 +4,12 @@ use super::ProcessedMessages;
|
||||
use crate::core::Worker;
|
||||
use crate::grpc_client::sglang_scheduler::AbortOnDropStream;
|
||||
use crate::grpc_client::{proto, SglangSchedulerClient};
|
||||
use crate::protocols::spec::{
|
||||
ChatCompletionRequest, ChatLogProbs, ChatLogProbsContent, ChatMessage, FunctionCallResponse,
|
||||
GenerateFinishReason, StringOrArray, Tool, ToolCall, ToolChoice, ToolChoiceValue, TopLogProb,
|
||||
use crate::protocols::chat::{ChatCompletionRequest, ChatMessage};
|
||||
use crate::protocols::common::{
|
||||
ChatLogProbs, ChatLogProbsContent, FunctionCallResponse, StringOrArray, Tool, ToolCall,
|
||||
ToolChoice, ToolChoiceValue, TopLogProb,
|
||||
};
|
||||
use crate::protocols::generate::GenerateFinishReason;
|
||||
use crate::tokenizer::chat_template::{ChatTemplateContentFormat, ChatTemplateParams};
|
||||
use crate::tokenizer::traits::Tokenizer;
|
||||
use crate::tokenizer::HuggingFaceTokenizer;
|
||||
@@ -952,7 +954,8 @@ pub fn parse_finish_reason(reason_str: &str, completion_tokens: i32) -> Generate
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::protocols::spec::{ChatMessage, ContentPart, ImageUrl, UserMessageContent};
|
||||
use crate::protocols::chat::{ChatMessage, UserMessageContent};
|
||||
use crate::protocols::common::{ContentPart, ImageUrl};
|
||||
use crate::tokenizer::chat_template::ChatTemplateContentFormat;
|
||||
use serde_json::json;
|
||||
|
||||
|
||||
@@ -5,10 +5,13 @@ use crate::core::{
|
||||
};
|
||||
use crate::metrics::RouterMetrics;
|
||||
use crate::policies::{LoadBalancingPolicy, PolicyRegistry};
|
||||
use crate::protocols::spec::{
|
||||
ChatCompletionRequest, ChatMessage, CompletionRequest, GenerateRequest, RerankRequest,
|
||||
ResponsesGetParams, ResponsesRequest, StringOrArray, UserMessageContent,
|
||||
};
|
||||
use crate::protocols::chat::{ChatCompletionRequest, ChatMessage, UserMessageContent};
|
||||
use crate::protocols::common::{InputIds, StringOrArray};
|
||||
use crate::protocols::completion::CompletionRequest;
|
||||
use crate::protocols::embedding::EmbeddingRequest;
|
||||
use crate::protocols::generate::GenerateRequest;
|
||||
use crate::protocols::rerank::RerankRequest;
|
||||
use crate::protocols::responses::{ResponsesGetParams, ResponsesRequest};
|
||||
use crate::routers::header_utils;
|
||||
use crate::routers::RouterTrait;
|
||||
use async_trait::async_trait;
|
||||
@@ -150,9 +153,10 @@ impl PDRouter {
|
||||
}
|
||||
|
||||
fn get_generate_batch_size(req: &GenerateRequest) -> Option<usize> {
|
||||
if let Some(text) = &req.text {
|
||||
if text.contains("[") && text.contains("]") {
|
||||
return None;
|
||||
// GenerateRequest doesn't support batch via arrays, only via input_ids
|
||||
if let Some(InputIds::Batch(batches)) = &req.input_ids {
|
||||
if !batches.is_empty() {
|
||||
return Some(batches.len());
|
||||
}
|
||||
}
|
||||
None
|
||||
@@ -1185,7 +1189,7 @@ impl RouterTrait for PDRouter {
|
||||
async fn route_embeddings(
|
||||
&self,
|
||||
_headers: Option<&HeaderMap>,
|
||||
_body: &crate::protocols::spec::EmbeddingRequest,
|
||||
_body: &EmbeddingRequest,
|
||||
_model_id: Option<&str>,
|
||||
) -> Response {
|
||||
(
|
||||
|
||||
@@ -4,10 +4,13 @@ use crate::core::{
|
||||
};
|
||||
use crate::metrics::RouterMetrics;
|
||||
use crate::policies::PolicyRegistry;
|
||||
use crate::protocols::spec::{
|
||||
ChatCompletionRequest, CompletionRequest, EmbeddingRequest, GenerateRequest, GenerationRequest,
|
||||
RerankRequest, RerankResponse, RerankResult, ResponsesGetParams, ResponsesRequest,
|
||||
};
|
||||
use crate::protocols::chat::ChatCompletionRequest;
|
||||
use crate::protocols::common::GenerationRequest;
|
||||
use crate::protocols::completion::CompletionRequest;
|
||||
use crate::protocols::embedding::EmbeddingRequest;
|
||||
use crate::protocols::generate::GenerateRequest;
|
||||
use crate::protocols::rerank::{RerankRequest, RerankResponse, RerankResult};
|
||||
use crate::protocols::responses::{ResponsesGetParams, ResponsesRequest};
|
||||
use crate::routers::header_utils;
|
||||
use crate::routers::RouterTrait;
|
||||
use axum::body::to_bytes;
|
||||
@@ -628,7 +631,7 @@ impl Router {
|
||||
let rerank_results = serde_json::from_slice::<Vec<RerankResult>>(&body_bytes)?;
|
||||
let mut rerank_response =
|
||||
RerankResponse::new(rerank_results, req.model.clone(), req.rid.clone());
|
||||
rerank_response.sort_by_score();
|
||||
// Sorting is handled by Python worker (serving_rerank.py)
|
||||
if let Some(top_k) = req.top_k {
|
||||
rerank_response.apply_top_k(top_k);
|
||||
}
|
||||
@@ -748,9 +751,6 @@ impl RouterTrait for Router {
|
||||
body: &RerankRequest,
|
||||
model_id: Option<&str>,
|
||||
) -> Response {
|
||||
if let Err(e) = body.validate() {
|
||||
return (StatusCode::BAD_REQUEST, e).into_response();
|
||||
}
|
||||
let response = self
|
||||
.route_typed_request(headers, body, "/v1/rerank", model_id)
|
||||
.await;
|
||||
|
||||
@@ -9,10 +9,12 @@ use axum::{
|
||||
};
|
||||
use std::fmt::Debug;
|
||||
|
||||
use crate::protocols::spec::{
|
||||
ChatCompletionRequest, CompletionRequest, EmbeddingRequest, GenerateRequest, RerankRequest,
|
||||
ResponsesGetParams, ResponsesRequest,
|
||||
};
|
||||
use crate::protocols::chat::ChatCompletionRequest;
|
||||
use crate::protocols::completion::CompletionRequest;
|
||||
use crate::protocols::embedding::EmbeddingRequest;
|
||||
use crate::protocols::generate::GenerateRequest;
|
||||
use crate::protocols::rerank::RerankRequest;
|
||||
use crate::protocols::responses::{ResponsesGetParams, ResponsesRequest};
|
||||
use serde_json::Value;
|
||||
|
||||
pub mod factory;
|
||||
|
||||
@@ -6,7 +6,7 @@ use crate::data_connector::{
|
||||
NewConversationItem, ResponseId, ResponseStorage, SharedConversationItemStorage,
|
||||
SharedConversationStorage,
|
||||
};
|
||||
use crate::protocols::spec::{ResponseInput, ResponsesRequest};
|
||||
use crate::protocols::responses::{ResponseInput, ResponseInputOutputItem, ResponsesRequest};
|
||||
use axum::http::StatusCode;
|
||||
use axum::response::{IntoResponse, Response};
|
||||
use axum::Json;
|
||||
@@ -1028,7 +1028,7 @@ async fn persist_items_with_storages(
|
||||
ResponseInput::Items(items_array) => {
|
||||
for input_item in items_array {
|
||||
match input_item {
|
||||
crate::protocols::spec::ResponseInputOutputItem::Message {
|
||||
ResponseInputOutputItem::Message {
|
||||
role,
|
||||
content,
|
||||
status,
|
||||
|
||||
@@ -9,7 +9,9 @@
|
||||
//! - Metadata injection for MCP operations
|
||||
|
||||
use crate::mcp::McpClientManager;
|
||||
use crate::protocols::spec::{ResponseInput, ResponseToolType, ResponsesRequest};
|
||||
use crate::protocols::responses::{
|
||||
ResponseInput, ResponseTool, ResponseToolType, ResponsesRequest,
|
||||
};
|
||||
use crate::routers::header_utils::apply_request_headers;
|
||||
use axum::http::HeaderMap;
|
||||
use bytes::Bytes;
|
||||
@@ -127,7 +129,7 @@ impl FunctionCallInProgress {
|
||||
|
||||
/// Build a request-scoped MCP manager from request tools, if present.
|
||||
pub(super) async fn mcp_manager_from_request_tools(
|
||||
tools: &[crate::protocols::spec::ResponseTool],
|
||||
tools: &[ResponseTool],
|
||||
) -> Option<Arc<McpClientManager>> {
|
||||
let tool = tools
|
||||
.iter()
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
//! Response storage, patching, and extraction utilities
|
||||
|
||||
use crate::data_connector::{ResponseId, StoredResponse};
|
||||
use crate::protocols::spec::{ResponseInput, ResponseToolType, ResponsesRequest};
|
||||
use crate::protocols::responses::{ResponseInput, ResponseToolType, ResponsesRequest};
|
||||
use serde_json::{json, Value};
|
||||
use std::collections::HashMap;
|
||||
use tracing::warn;
|
||||
|
||||
@@ -6,8 +6,12 @@ use crate::data_connector::{
|
||||
conversation_items::ListParams, conversation_items::SortOrder, ConversationId, ResponseId,
|
||||
SharedConversationItemStorage, SharedConversationStorage, SharedResponseStorage,
|
||||
};
|
||||
use crate::protocols::spec::{
|
||||
ChatCompletionRequest, CompletionRequest, EmbeddingRequest, GenerateRequest, RerankRequest,
|
||||
use crate::protocols::chat::ChatCompletionRequest;
|
||||
use crate::protocols::completion::CompletionRequest;
|
||||
use crate::protocols::embedding::EmbeddingRequest;
|
||||
use crate::protocols::generate::GenerateRequest;
|
||||
use crate::protocols::rerank::RerankRequest;
|
||||
use crate::protocols::responses::{
|
||||
ResponseContentPart, ResponseInput, ResponseInputOutputItem, ResponsesGetParams,
|
||||
ResponsesRequest,
|
||||
};
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
use crate::data_connector::{
|
||||
SharedConversationItemStorage, SharedConversationStorage, SharedResponseStorage,
|
||||
};
|
||||
use crate::protocols::spec::{ResponseToolType, ResponsesRequest};
|
||||
use crate::protocols::responses::{ResponseToolType, ResponsesRequest};
|
||||
use crate::routers::header_utils::{apply_request_headers, preserve_response_headers};
|
||||
use axum::{
|
||||
body::Body,
|
||||
|
||||
@@ -6,10 +6,12 @@
|
||||
|
||||
use crate::config::{ConnectionMode, RoutingMode};
|
||||
use crate::core::{WorkerRegistry, WorkerType};
|
||||
use crate::protocols::spec::{
|
||||
ChatCompletionRequest, CompletionRequest, EmbeddingRequest, GenerateRequest, RerankRequest,
|
||||
ResponsesGetParams, ResponsesRequest,
|
||||
};
|
||||
use crate::protocols::chat::ChatCompletionRequest;
|
||||
use crate::protocols::completion::CompletionRequest;
|
||||
use crate::protocols::embedding::EmbeddingRequest;
|
||||
use crate::protocols::generate::GenerateRequest;
|
||||
use crate::protocols::rerank::RerankRequest;
|
||||
use crate::protocols::responses::{ResponsesGetParams, ResponsesRequest};
|
||||
use crate::routers::RouterTrait;
|
||||
use crate::server::{AppContext, ServerConfig};
|
||||
use async_trait::async_trait;
|
||||
|
||||
Reference in New Issue
Block a user