[router] Add rustfmt and set group imports by default (#11732)
This commit is contained in:
@@ -1,16 +1,18 @@
|
||||
//! Factory for creating router instances
|
||||
|
||||
use super::grpc::pd_router::GrpcPDRouter;
|
||||
use super::grpc::router::GrpcRouter;
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::{
|
||||
grpc::{pd_router::GrpcPDRouter, router::GrpcRouter},
|
||||
http::{pd_router::PDRouter, router::Router},
|
||||
openai::OpenAIRouter,
|
||||
RouterTrait,
|
||||
};
|
||||
use crate::config::{ConnectionMode, PolicyConfig, RoutingMode};
|
||||
use crate::policies::PolicyFactory;
|
||||
use crate::server::AppContext;
|
||||
use std::sync::Arc;
|
||||
use crate::{
|
||||
config::{ConnectionMode, PolicyConfig, RoutingMode},
|
||||
policies::PolicyFactory,
|
||||
server::AppContext,
|
||||
};
|
||||
|
||||
/// Factory for creating router instances based on configuration
|
||||
pub struct RouterFactory;
|
||||
|
||||
@@ -4,20 +4,22 @@
|
||||
//! eliminating deep parameter passing chains and providing a single source of truth
|
||||
//! for request state.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use std::{collections::HashMap, sync::Arc};
|
||||
|
||||
use axum::http::HeaderMap;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::core::Worker;
|
||||
use crate::grpc_client::{proto, SglangSchedulerClient};
|
||||
use crate::protocols::chat::{ChatCompletionRequest, ChatCompletionResponse};
|
||||
use crate::protocols::generate::{GenerateRequest, GenerateResponse};
|
||||
use crate::reasoning_parser::ParserFactory as ReasoningParserFactory;
|
||||
use crate::tokenizer::stop::StopSequenceDecoder;
|
||||
use crate::tokenizer::traits::Tokenizer;
|
||||
use crate::tool_parser::ParserFactory as ToolParserFactory;
|
||||
use crate::{
|
||||
core::Worker,
|
||||
grpc_client::{proto, SglangSchedulerClient},
|
||||
protocols::{
|
||||
chat::{ChatCompletionRequest, ChatCompletionResponse},
|
||||
generate::{GenerateRequest, GenerateResponse},
|
||||
},
|
||||
reasoning_parser::ParserFactory as ReasoningParserFactory,
|
||||
tokenizer::{stop::StopSequenceDecoder, traits::Tokenizer},
|
||||
tool_parser::ParserFactory as ToolParserFactory,
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Core Context Types
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
//! gRPC router implementations
|
||||
|
||||
use crate::grpc_client::proto;
|
||||
use crate::protocols::common::StringOrArray;
|
||||
use crate::{grpc_client::proto, protocols::common::StringOrArray};
|
||||
|
||||
pub mod context;
|
||||
pub mod pd_router;
|
||||
|
||||
@@ -1,19 +1,7 @@
|
||||
// PD (Prefill-Decode) gRPC Router Implementation
|
||||
|
||||
use crate::config::types::RetryConfig;
|
||||
use crate::core::{ConnectionMode, WorkerRegistry, WorkerType};
|
||||
use crate::policies::PolicyRegistry;
|
||||
use crate::protocols::chat::ChatCompletionRequest;
|
||||
use crate::protocols::completion::CompletionRequest;
|
||||
use crate::protocols::embedding::EmbeddingRequest;
|
||||
use crate::protocols::generate::GenerateRequest;
|
||||
use crate::protocols::rerank::RerankRequest;
|
||||
use crate::protocols::responses::{ResponsesGetParams, ResponsesRequest};
|
||||
use crate::reasoning_parser::ParserFactory as ReasoningParserFactory;
|
||||
use crate::routers::RouterTrait;
|
||||
use crate::server::AppContext;
|
||||
use crate::tokenizer::traits::Tokenizer;
|
||||
use crate::tool_parser::ParserFactory as ToolParserFactory;
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use axum::{
|
||||
body::Body,
|
||||
@@ -21,12 +9,27 @@ use axum::{
|
||||
http::{HeaderMap, StatusCode},
|
||||
response::{IntoResponse, Response},
|
||||
};
|
||||
use std::sync::Arc;
|
||||
|
||||
use tracing::debug;
|
||||
|
||||
use super::context::SharedComponents;
|
||||
use super::pipeline::RequestPipeline;
|
||||
use super::{context::SharedComponents, pipeline::RequestPipeline};
|
||||
use crate::{
|
||||
config::types::RetryConfig,
|
||||
core::{ConnectionMode, WorkerRegistry, WorkerType},
|
||||
policies::PolicyRegistry,
|
||||
protocols::{
|
||||
chat::ChatCompletionRequest,
|
||||
completion::CompletionRequest,
|
||||
embedding::EmbeddingRequest,
|
||||
generate::GenerateRequest,
|
||||
rerank::RerankRequest,
|
||||
responses::{ResponsesGetParams, ResponsesRequest},
|
||||
},
|
||||
reasoning_parser::ParserFactory as ReasoningParserFactory,
|
||||
routers::RouterTrait,
|
||||
server::AppContext,
|
||||
tokenizer::traits::Tokenizer,
|
||||
tool_parser::ParserFactory as ToolParserFactory,
|
||||
};
|
||||
|
||||
/// gRPC PD (Prefill-Decode) router implementation for SGLang
|
||||
#[derive(Clone)]
|
||||
|
||||
@@ -3,29 +3,29 @@
|
||||
//! This module defines the core pipeline abstraction and individual processing stages
|
||||
//! that transform a RequestContext through its lifecycle.
|
||||
|
||||
use std::{
|
||||
sync::Arc,
|
||||
time::{Instant, SystemTime, UNIX_EPOCH},
|
||||
};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use axum::response::{IntoResponse, Response};
|
||||
use tracing::{debug, error, warn};
|
||||
|
||||
use super::context::*;
|
||||
use super::processing;
|
||||
use super::streaming;
|
||||
use super::utils;
|
||||
use crate::core::{ConnectionMode, Worker, WorkerRegistry, WorkerType};
|
||||
use crate::grpc_client::proto;
|
||||
use crate::policies::PolicyRegistry;
|
||||
use crate::protocols::chat::ChatCompletionRequest;
|
||||
use crate::protocols::common::InputIds;
|
||||
use crate::protocols::generate::GenerateRequest;
|
||||
use crate::reasoning_parser::ParserFactory as ReasoningParserFactory;
|
||||
use crate::tokenizer::traits::Tokenizer;
|
||||
use crate::tool_parser::ParserFactory as ToolParserFactory;
|
||||
use proto::DisaggregatedParams;
|
||||
use rand::Rng;
|
||||
use std::sync::Arc;
|
||||
use std::time::{Instant, SystemTime, UNIX_EPOCH};
|
||||
use tracing::{debug, error, warn};
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::{context::*, processing, streaming, utils};
|
||||
use crate::{
|
||||
core::{ConnectionMode, Worker, WorkerRegistry, WorkerType},
|
||||
grpc_client::proto,
|
||||
policies::PolicyRegistry,
|
||||
protocols::{chat::ChatCompletionRequest, common::InputIds, generate::GenerateRequest},
|
||||
reasoning_parser::ParserFactory as ReasoningParserFactory,
|
||||
tokenizer::traits::Tokenizer,
|
||||
tool_parser::ParserFactory as ToolParserFactory,
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Pipeline Trait
|
||||
// ============================================================================
|
||||
|
||||
@@ -3,28 +3,30 @@
|
||||
//! This module contains response processing functions that are shared between
|
||||
//! the regular router and PD router, eliminating ~1,200 lines of exact duplicates.
|
||||
|
||||
use std::sync::Arc;
|
||||
use std::{sync::Arc, time::Instant};
|
||||
|
||||
use proto::generate_complete::MatchedStop;
|
||||
use serde_json::Value;
|
||||
use tracing::error;
|
||||
|
||||
use crate::grpc_client::proto;
|
||||
use crate::protocols::chat::{
|
||||
ChatChoice, ChatCompletionMessage, ChatCompletionRequest, ChatCompletionResponse,
|
||||
use super::{
|
||||
context::{DispatchMetadata, ExecutionResult},
|
||||
utils,
|
||||
};
|
||||
use crate::protocols::common::{
|
||||
FunctionCallResponse, ToolCall, ToolChoice, ToolChoiceValue, Usage,
|
||||
use crate::{
|
||||
grpc_client::proto,
|
||||
protocols::{
|
||||
chat::{ChatChoice, ChatCompletionMessage, ChatCompletionRequest, ChatCompletionResponse},
|
||||
common::{FunctionCallResponse, ToolCall, ToolChoice, ToolChoiceValue, Usage},
|
||||
generate::{GenerateMetaInfo, GenerateRequest, GenerateResponse},
|
||||
},
|
||||
reasoning_parser::ParserFactory as ReasoningParserFactory,
|
||||
tokenizer::{
|
||||
stop::{SequenceDecoderOutput, StopSequenceDecoder},
|
||||
traits::Tokenizer,
|
||||
},
|
||||
tool_parser::ParserFactory as ToolParserFactory,
|
||||
};
|
||||
use crate::protocols::generate::{GenerateMetaInfo, GenerateRequest, GenerateResponse};
|
||||
use crate::reasoning_parser::ParserFactory as ReasoningParserFactory;
|
||||
use crate::tokenizer::stop::{SequenceDecoderOutput, StopSequenceDecoder};
|
||||
use crate::tokenizer::traits::Tokenizer;
|
||||
use crate::tool_parser::ParserFactory as ToolParserFactory;
|
||||
use proto::generate_complete::MatchedStop;
|
||||
use std::time::Instant;
|
||||
|
||||
use super::context::{DispatchMetadata, ExecutionResult};
|
||||
use super::utils;
|
||||
|
||||
// ============================================================================
|
||||
// Response Processor - Main Entry Point
|
||||
|
||||
@@ -11,23 +11,25 @@ use axum::{
|
||||
};
|
||||
use tracing::debug;
|
||||
|
||||
use crate::config::types::RetryConfig;
|
||||
use crate::core::WorkerRegistry;
|
||||
use crate::policies::PolicyRegistry;
|
||||
use crate::protocols::chat::ChatCompletionRequest;
|
||||
use crate::protocols::completion::CompletionRequest;
|
||||
use crate::protocols::embedding::EmbeddingRequest;
|
||||
use crate::protocols::generate::GenerateRequest;
|
||||
use crate::protocols::rerank::RerankRequest;
|
||||
use crate::protocols::responses::{ResponsesGetParams, ResponsesRequest};
|
||||
use crate::reasoning_parser::ParserFactory as ReasoningParserFactory;
|
||||
use crate::routers::RouterTrait;
|
||||
use crate::server::AppContext;
|
||||
use crate::tokenizer::traits::Tokenizer;
|
||||
use crate::tool_parser::ParserFactory as ToolParserFactory;
|
||||
|
||||
use super::context::SharedComponents;
|
||||
use super::pipeline::RequestPipeline;
|
||||
use super::{context::SharedComponents, pipeline::RequestPipeline};
|
||||
use crate::{
|
||||
config::types::RetryConfig,
|
||||
core::WorkerRegistry,
|
||||
policies::PolicyRegistry,
|
||||
protocols::{
|
||||
chat::ChatCompletionRequest,
|
||||
completion::CompletionRequest,
|
||||
embedding::EmbeddingRequest,
|
||||
generate::GenerateRequest,
|
||||
rerank::RerankRequest,
|
||||
responses::{ResponsesGetParams, ResponsesRequest},
|
||||
},
|
||||
reasoning_parser::ParserFactory as ReasoningParserFactory,
|
||||
routers::RouterTrait,
|
||||
server::AppContext,
|
||||
tokenizer::traits::Tokenizer,
|
||||
tool_parser::ParserFactory as ToolParserFactory,
|
||||
};
|
||||
|
||||
/// gRPC router implementation for SGLang
|
||||
#[derive(Clone)]
|
||||
|
||||
@@ -3,38 +3,40 @@
|
||||
//! This module contains shared streaming logic for both Regular and PD routers,
|
||||
//! eliminating ~600 lines of duplication.
|
||||
|
||||
use axum::response::Response;
|
||||
use axum::{body::Body, http::StatusCode};
|
||||
use std::{collections::HashMap, io, sync::Arc, time::Instant};
|
||||
|
||||
use axum::{body::Body, http::StatusCode, response::Response};
|
||||
use bytes::Bytes;
|
||||
use http::header::{HeaderValue, CONTENT_TYPE};
|
||||
use proto::{
|
||||
generate_complete::MatchedStop::{MatchedStopStr, MatchedTokenId},
|
||||
generate_response::Response::{Chunk, Complete, Error},
|
||||
};
|
||||
use serde_json::{json, Value};
|
||||
use std::collections::HashMap;
|
||||
use std::io;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::mpsc::UnboundedSender;
|
||||
use tokio_stream::wrappers::UnboundedReceiverStream;
|
||||
use tokio_stream::StreamExt;
|
||||
use tokio::sync::{mpsc, mpsc::UnboundedSender};
|
||||
use tokio_stream::{wrappers::UnboundedReceiverStream, StreamExt};
|
||||
use tracing::{debug, error, warn};
|
||||
|
||||
use super::context;
|
||||
use super::utils;
|
||||
use crate::grpc_client::proto;
|
||||
use crate::protocols::chat::{
|
||||
ChatCompletionRequest, ChatCompletionStreamResponse, ChatMessageDelta, ChatStreamChoice,
|
||||
use super::{context, utils};
|
||||
use crate::{
|
||||
grpc_client::proto,
|
||||
protocols::{
|
||||
chat::{
|
||||
ChatCompletionRequest, ChatCompletionStreamResponse, ChatMessageDelta, ChatStreamChoice,
|
||||
},
|
||||
common::{
|
||||
ChatLogProbs, FunctionCallDelta, StringOrArray, Tool, ToolCallDelta, ToolChoice,
|
||||
ToolChoiceValue, Usage,
|
||||
},
|
||||
generate::GenerateRequest,
|
||||
},
|
||||
reasoning_parser::ReasoningParser,
|
||||
tokenizer::{
|
||||
stop::{SequenceDecoderOutput, StopSequenceDecoder},
|
||||
traits::Tokenizer,
|
||||
},
|
||||
tool_parser::ToolParser,
|
||||
};
|
||||
use crate::protocols::common::{
|
||||
ChatLogProbs, FunctionCallDelta, StringOrArray, Tool, ToolCallDelta, ToolChoice,
|
||||
ToolChoiceValue, Usage,
|
||||
};
|
||||
use crate::protocols::generate::GenerateRequest;
|
||||
use crate::reasoning_parser::ReasoningParser;
|
||||
use crate::tokenizer::stop::{SequenceDecoderOutput, StopSequenceDecoder};
|
||||
use crate::tokenizer::traits::Tokenizer;
|
||||
use crate::tool_parser::ToolParser;
|
||||
use proto::generate_complete::MatchedStop::{MatchedStopStr, MatchedTokenId};
|
||||
use proto::generate_response::Response::{Chunk, Complete, Error};
|
||||
use std::time::Instant;
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
/// Shared streaming processor for both single and dual dispatch modes
|
||||
#[derive(Clone)]
|
||||
|
||||
@@ -1,19 +1,7 @@
|
||||
//! Shared utilities for gRPC routers
|
||||
|
||||
use super::ProcessedMessages;
|
||||
use crate::core::Worker;
|
||||
use crate::grpc_client::sglang_scheduler::AbortOnDropStream;
|
||||
use crate::grpc_client::{proto, SglangSchedulerClient};
|
||||
use crate::protocols::chat::{ChatCompletionRequest, ChatMessage};
|
||||
use crate::protocols::common::{
|
||||
ChatLogProbs, ChatLogProbsContent, FunctionCallResponse, StringOrArray, Tool, ToolCall,
|
||||
ToolChoice, ToolChoiceValue, TopLogProb,
|
||||
};
|
||||
use crate::protocols::generate::GenerateFinishReason;
|
||||
use crate::tokenizer::chat_template::{ChatTemplateContentFormat, ChatTemplateParams};
|
||||
use crate::tokenizer::traits::Tokenizer;
|
||||
use crate::tokenizer::HuggingFaceTokenizer;
|
||||
pub use crate::tokenizer::StopSequenceDecoder;
|
||||
use std::{collections::HashMap, sync::Arc};
|
||||
|
||||
use axum::{
|
||||
http::StatusCode,
|
||||
response::{IntoResponse, Response},
|
||||
@@ -21,11 +9,29 @@ use axum::{
|
||||
};
|
||||
use futures::StreamExt;
|
||||
use serde_json::{json, Map, Value};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use tracing::{error, warn};
|
||||
use uuid::Uuid;
|
||||
|
||||
use super::ProcessedMessages;
|
||||
pub use crate::tokenizer::StopSequenceDecoder;
|
||||
use crate::{
|
||||
core::Worker,
|
||||
grpc_client::{proto, sglang_scheduler::AbortOnDropStream, SglangSchedulerClient},
|
||||
protocols::{
|
||||
chat::{ChatCompletionRequest, ChatMessage},
|
||||
common::{
|
||||
ChatLogProbs, ChatLogProbsContent, FunctionCallResponse, StringOrArray, Tool, ToolCall,
|
||||
ToolChoice, ToolChoiceValue, TopLogProb,
|
||||
},
|
||||
generate::GenerateFinishReason,
|
||||
},
|
||||
tokenizer::{
|
||||
chat_template::{ChatTemplateContentFormat, ChatTemplateParams},
|
||||
traits::Tokenizer,
|
||||
HuggingFaceTokenizer,
|
||||
},
|
||||
};
|
||||
|
||||
/// Get gRPC client from worker, returning appropriate error response on failure
|
||||
pub async fn get_grpc_client_from_worker(
|
||||
worker: &Arc<dyn Worker>,
|
||||
@@ -953,12 +959,17 @@ pub fn parse_finish_reason(reason_str: &str, completion_tokens: i32) -> Generate
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::protocols::chat::{ChatMessage, UserMessageContent};
|
||||
use crate::protocols::common::{ContentPart, ImageUrl};
|
||||
use crate::tokenizer::chat_template::ChatTemplateContentFormat;
|
||||
use serde_json::json;
|
||||
|
||||
use super::*;
|
||||
use crate::{
|
||||
protocols::{
|
||||
chat::{ChatMessage, UserMessageContent},
|
||||
common::{ContentPart, ImageUrl},
|
||||
},
|
||||
tokenizer::chat_template::ChatTemplateContentFormat,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn test_transform_messages_string_format() {
|
||||
let messages = vec![ChatMessage::User {
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
use axum::body::Body;
|
||||
use axum::extract::Request;
|
||||
use axum::http::HeaderMap;
|
||||
use axum::{body::Body, extract::Request, http::HeaderMap};
|
||||
|
||||
/// Copy request headers to a Vec of name-value string pairs
|
||||
/// Used for forwarding headers to backend workers
|
||||
|
||||
@@ -1,19 +1,5 @@
|
||||
use super::pd_types::api_path;
|
||||
use crate::config::types::RetryConfig;
|
||||
use crate::core::{
|
||||
is_retryable_status, RetryExecutor, Worker, WorkerLoadGuard, WorkerRegistry, WorkerType,
|
||||
};
|
||||
use crate::metrics::RouterMetrics;
|
||||
use crate::policies::{LoadBalancingPolicy, PolicyRegistry};
|
||||
use crate::protocols::chat::{ChatCompletionRequest, ChatMessage, UserMessageContent};
|
||||
use crate::protocols::common::{InputIds, StringOrArray};
|
||||
use crate::protocols::completion::CompletionRequest;
|
||||
use crate::protocols::embedding::EmbeddingRequest;
|
||||
use crate::protocols::generate::GenerateRequest;
|
||||
use crate::protocols::rerank::RerankRequest;
|
||||
use crate::protocols::responses::{ResponsesGetParams, ResponsesRequest};
|
||||
use crate::routers::header_utils;
|
||||
use crate::routers::RouterTrait;
|
||||
use std::{sync::Arc, time::Instant};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use axum::{
|
||||
body::Body,
|
||||
@@ -25,11 +11,29 @@ use futures_util::StreamExt;
|
||||
use reqwest::Client;
|
||||
use serde::Serialize;
|
||||
use serde_json::{json, Value};
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
use tokio_stream::wrappers::UnboundedReceiverStream;
|
||||
use tracing::{debug, error, warn};
|
||||
|
||||
use super::pd_types::api_path;
|
||||
use crate::{
|
||||
config::types::RetryConfig,
|
||||
core::{
|
||||
is_retryable_status, RetryExecutor, Worker, WorkerLoadGuard, WorkerRegistry, WorkerType,
|
||||
},
|
||||
metrics::RouterMetrics,
|
||||
policies::{LoadBalancingPolicy, PolicyRegistry},
|
||||
protocols::{
|
||||
chat::{ChatCompletionRequest, ChatMessage, UserMessageContent},
|
||||
common::{InputIds, StringOrArray},
|
||||
completion::CompletionRequest,
|
||||
embedding::EmbeddingRequest,
|
||||
generate::GenerateRequest,
|
||||
rerank::RerankRequest,
|
||||
responses::{ResponsesGetParams, ResponsesRequest},
|
||||
},
|
||||
routers::{header_utils, RouterTrait},
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PDRouter {
|
||||
pub worker_registry: Arc<WorkerRegistry>,
|
||||
|
||||
@@ -1,35 +1,39 @@
|
||||
use crate::config::types::RetryConfig;
|
||||
use crate::core::{
|
||||
is_retryable_status, ConnectionMode, RetryExecutor, Worker, WorkerRegistry, WorkerType,
|
||||
};
|
||||
use crate::metrics::RouterMetrics;
|
||||
use crate::policies::PolicyRegistry;
|
||||
use crate::protocols::chat::ChatCompletionRequest;
|
||||
use crate::protocols::common::GenerationRequest;
|
||||
use crate::protocols::completion::CompletionRequest;
|
||||
use crate::protocols::embedding::EmbeddingRequest;
|
||||
use crate::protocols::generate::GenerateRequest;
|
||||
use crate::protocols::rerank::{RerankRequest, RerankResponse, RerankResult};
|
||||
use crate::protocols::responses::{ResponsesGetParams, ResponsesRequest};
|
||||
use crate::routers::header_utils;
|
||||
use crate::routers::RouterTrait;
|
||||
use axum::body::to_bytes;
|
||||
use std::{sync::Arc, time::Instant};
|
||||
|
||||
use axum::{
|
||||
body::Body,
|
||||
body::{to_bytes, Body},
|
||||
extract::Request,
|
||||
http::{
|
||||
header::CONTENT_LENGTH, header::CONTENT_TYPE, HeaderMap, HeaderValue, Method, StatusCode,
|
||||
header::{CONTENT_LENGTH, CONTENT_TYPE},
|
||||
HeaderMap, HeaderValue, Method, StatusCode,
|
||||
},
|
||||
response::{IntoResponse, Response},
|
||||
Json,
|
||||
};
|
||||
use futures_util::StreamExt;
|
||||
use reqwest::Client;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
use tokio_stream::wrappers::UnboundedReceiverStream;
|
||||
use tracing::{debug, error};
|
||||
|
||||
use crate::{
|
||||
config::types::RetryConfig,
|
||||
core::{
|
||||
is_retryable_status, ConnectionMode, RetryExecutor, Worker, WorkerRegistry, WorkerType,
|
||||
},
|
||||
metrics::RouterMetrics,
|
||||
policies::PolicyRegistry,
|
||||
protocols::{
|
||||
chat::ChatCompletionRequest,
|
||||
common::GenerationRequest,
|
||||
completion::CompletionRequest,
|
||||
embedding::EmbeddingRequest,
|
||||
generate::GenerateRequest,
|
||||
rerank::{RerankRequest, RerankResponse, RerankResult},
|
||||
responses::{ResponsesGetParams, ResponsesRequest},
|
||||
},
|
||||
routers::{header_utils, RouterTrait},
|
||||
};
|
||||
|
||||
/// Regular router that uses injected load balancing policies
|
||||
#[derive(Debug)]
|
||||
pub struct Router {
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
//! Router implementations
|
||||
|
||||
use std::fmt::Debug;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use axum::{
|
||||
body::Body,
|
||||
@@ -7,16 +9,17 @@ use axum::{
|
||||
http::{HeaderMap, StatusCode},
|
||||
response::{IntoResponse, Response},
|
||||
};
|
||||
use std::fmt::Debug;
|
||||
|
||||
use crate::protocols::chat::ChatCompletionRequest;
|
||||
use crate::protocols::completion::CompletionRequest;
|
||||
use crate::protocols::embedding::EmbeddingRequest;
|
||||
use crate::protocols::generate::GenerateRequest;
|
||||
use crate::protocols::rerank::RerankRequest;
|
||||
use crate::protocols::responses::{ResponsesGetParams, ResponsesRequest};
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::protocols::{
|
||||
chat::ChatCompletionRequest,
|
||||
completion::CompletionRequest,
|
||||
embedding::EmbeddingRequest,
|
||||
generate::GenerateRequest,
|
||||
rerank::RerankRequest,
|
||||
responses::{ResponsesGetParams, ResponsesRequest},
|
||||
};
|
||||
|
||||
pub mod factory;
|
||||
pub mod grpc;
|
||||
pub mod header_utils;
|
||||
@@ -25,7 +28,6 @@ pub mod openai; // New refactored OpenAI router module
|
||||
pub mod router_manager;
|
||||
|
||||
pub use factory::RouterFactory;
|
||||
|
||||
// Re-export HTTP routers for convenience
|
||||
pub use http::{pd_router, pd_types, router};
|
||||
|
||||
|
||||
@@ -1,22 +1,26 @@
|
||||
//! Conversation CRUD operations and persistence
|
||||
|
||||
use crate::data_connector::{
|
||||
conversation_items::ListParams, conversation_items::SortOrder, Conversation, ConversationId,
|
||||
ConversationItemId, ConversationItemStorage, ConversationStorage, NewConversation,
|
||||
NewConversationItem, ResponseId, ResponseStorage, SharedConversationItemStorage,
|
||||
SharedConversationStorage,
|
||||
use std::{collections::HashMap, sync::Arc};
|
||||
|
||||
use axum::{
|
||||
http::StatusCode,
|
||||
response::{IntoResponse, Response},
|
||||
Json,
|
||||
};
|
||||
use crate::protocols::responses::{ResponseInput, ResponseInputOutputItem, ResponsesRequest};
|
||||
use axum::http::StatusCode;
|
||||
use axum::response::{IntoResponse, Response};
|
||||
use axum::Json;
|
||||
use chrono::Utc;
|
||||
use serde_json::{json, Value};
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use super::responses::build_stored_response;
|
||||
use crate::{
|
||||
data_connector::{
|
||||
conversation_items::{ListParams, SortOrder},
|
||||
Conversation, ConversationId, ConversationItemId, ConversationItemStorage,
|
||||
ConversationStorage, NewConversation, NewConversationItem, ResponseId, ResponseStorage,
|
||||
SharedConversationItemStorage, SharedConversationStorage,
|
||||
},
|
||||
protocols::responses::{ResponseInput, ResponseInputOutputItem, ResponsesRequest},
|
||||
};
|
||||
|
||||
/// Maximum number of properties allowed in conversation metadata
|
||||
pub(crate) const MAX_METADATA_PROPERTIES: usize = 16;
|
||||
|
||||
@@ -8,19 +8,20 @@
|
||||
//! - Payload transformation for MCP tool interception
|
||||
//! - Metadata injection for MCP operations
|
||||
|
||||
use crate::mcp::McpClientManager;
|
||||
use crate::protocols::responses::{
|
||||
ResponseInput, ResponseTool, ResponseToolType, ResponsesRequest,
|
||||
};
|
||||
use crate::routers::header_utils::apply_request_headers;
|
||||
use std::{io, sync::Arc};
|
||||
|
||||
use axum::http::HeaderMap;
|
||||
use bytes::Bytes;
|
||||
use serde_json::{json, to_value, Value};
|
||||
use std::{io, sync::Arc};
|
||||
use tokio::sync::mpsc;
|
||||
use tracing::{info, warn};
|
||||
|
||||
use super::utils::event_types;
|
||||
use crate::{
|
||||
mcp::McpClientManager,
|
||||
protocols::responses::{ResponseInput, ResponseTool, ResponseToolType, ResponsesRequest},
|
||||
routers::header_utils::apply_request_headers,
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Configuration and State Types
|
||||
|
||||
@@ -1,12 +1,15 @@
|
||||
//! Response storage, patching, and extraction utilities
|
||||
|
||||
use crate::data_connector::{ResponseId, StoredResponse};
|
||||
use crate::protocols::responses::{ResponseInput, ResponseToolType, ResponsesRequest};
|
||||
use serde_json::{json, Value};
|
||||
use std::collections::HashMap;
|
||||
|
||||
use serde_json::{json, Value};
|
||||
use tracing::warn;
|
||||
|
||||
use super::utils::event_types;
|
||||
use crate::{
|
||||
data_connector::{ResponseId, StoredResponse},
|
||||
protocols::responses::{ResponseInput, ResponseToolType, ResponsesRequest},
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Response Storage Operations
|
||||
|
||||
@@ -1,21 +1,10 @@
|
||||
//! OpenAI router - main coordinator that delegates to specialized modules
|
||||
|
||||
use crate::config::CircuitBreakerConfig;
|
||||
use crate::core::{CircuitBreaker, CircuitBreakerConfig as CoreCircuitBreakerConfig};
|
||||
use crate::data_connector::{
|
||||
conversation_items::ListParams, conversation_items::SortOrder, ConversationId, ResponseId,
|
||||
SharedConversationItemStorage, SharedConversationStorage, SharedResponseStorage,
|
||||
use std::{
|
||||
any::Any,
|
||||
sync::{atomic::AtomicBool, Arc},
|
||||
};
|
||||
use crate::protocols::chat::ChatCompletionRequest;
|
||||
use crate::protocols::completion::CompletionRequest;
|
||||
use crate::protocols::embedding::EmbeddingRequest;
|
||||
use crate::protocols::generate::GenerateRequest;
|
||||
use crate::protocols::rerank::RerankRequest;
|
||||
use crate::protocols::responses::{
|
||||
ResponseContentPart, ResponseInput, ResponseInputOutputItem, ResponsesGetParams,
|
||||
ResponsesRequest,
|
||||
};
|
||||
use crate::routers::header_utils::apply_request_headers;
|
||||
|
||||
use axum::{
|
||||
body::Body,
|
||||
extract::Request,
|
||||
@@ -25,10 +14,6 @@ use axum::{
|
||||
};
|
||||
use futures_util::StreamExt;
|
||||
use serde_json::{json, to_value, Value};
|
||||
use std::{
|
||||
any::Any,
|
||||
sync::{atomic::AtomicBool, Arc},
|
||||
};
|
||||
use tokio::sync::mpsc;
|
||||
use tokio_stream::wrappers::UnboundedReceiverStream;
|
||||
use tracing::warn;
|
||||
@@ -39,12 +24,35 @@ use super::conversations::{
|
||||
get_conversation, get_conversation_item, list_conversation_items, persist_conversation_items,
|
||||
update_conversation,
|
||||
};
|
||||
use super::mcp::{
|
||||
execute_tool_loop, mcp_manager_from_request_tools, prepare_mcp_payload_for_streaming,
|
||||
McpLoopConfig,
|
||||
use super::{
|
||||
mcp::{
|
||||
execute_tool_loop, mcp_manager_from_request_tools, prepare_mcp_payload_for_streaming,
|
||||
McpLoopConfig,
|
||||
},
|
||||
responses::{mask_tools_as_mcp, patch_streaming_response_json},
|
||||
streaming::handle_streaming_response,
|
||||
};
|
||||
use crate::{
|
||||
config::CircuitBreakerConfig,
|
||||
core::{CircuitBreaker, CircuitBreakerConfig as CoreCircuitBreakerConfig},
|
||||
data_connector::{
|
||||
conversation_items::{ListParams, SortOrder},
|
||||
ConversationId, ResponseId, SharedConversationItemStorage, SharedConversationStorage,
|
||||
SharedResponseStorage,
|
||||
},
|
||||
protocols::{
|
||||
chat::ChatCompletionRequest,
|
||||
completion::CompletionRequest,
|
||||
embedding::EmbeddingRequest,
|
||||
generate::GenerateRequest,
|
||||
rerank::RerankRequest,
|
||||
responses::{
|
||||
ResponseContentPart, ResponseInput, ResponseInputOutputItem, ResponsesGetParams,
|
||||
ResponsesRequest,
|
||||
},
|
||||
},
|
||||
routers::header_utils::apply_request_headers,
|
||||
};
|
||||
use super::responses::{mask_tools_as_mcp, patch_streaming_response_json};
|
||||
use super::streaming::handle_streaming_response;
|
||||
|
||||
// ============================================================================
|
||||
// OpenAIRouter Struct
|
||||
|
||||
@@ -7,11 +7,8 @@
|
||||
//! - MCP tool execution loops within streaming responses
|
||||
//! - Event transformation and output index remapping
|
||||
|
||||
use crate::data_connector::{
|
||||
SharedConversationItemStorage, SharedConversationStorage, SharedResponseStorage,
|
||||
};
|
||||
use crate::protocols::responses::{ResponseToolType, ResponsesRequest};
|
||||
use crate::routers::header_utils::{apply_request_headers, preserve_response_headers};
|
||||
use std::{borrow::Cow, io, sync::Arc};
|
||||
|
||||
use axum::{
|
||||
body::Body,
|
||||
http::{header::CONTENT_TYPE, HeaderMap, HeaderValue, StatusCode},
|
||||
@@ -20,20 +17,28 @@ use axum::{
|
||||
use bytes::Bytes;
|
||||
use futures_util::StreamExt;
|
||||
use serde_json::{json, Value};
|
||||
use std::{borrow::Cow, io, sync::Arc};
|
||||
use tokio::sync::mpsc;
|
||||
use tokio_stream::wrappers::UnboundedReceiverStream;
|
||||
use tracing::warn;
|
||||
|
||||
// Import from sibling modules
|
||||
use super::conversations::persist_conversation_items;
|
||||
use super::mcp::{
|
||||
build_resume_payload, execute_streaming_tool_calls, inject_mcp_metadata_streaming,
|
||||
mcp_manager_from_request_tools, prepare_mcp_payload_for_streaming, send_mcp_list_tools_events,
|
||||
McpLoopConfig, ToolLoopState,
|
||||
use super::{
|
||||
mcp::{
|
||||
build_resume_payload, execute_streaming_tool_calls, inject_mcp_metadata_streaming,
|
||||
mcp_manager_from_request_tools, prepare_mcp_payload_for_streaming,
|
||||
send_mcp_list_tools_events, McpLoopConfig, ToolLoopState,
|
||||
},
|
||||
responses::{mask_tools_as_mcp, patch_streaming_response_json, rewrite_streaming_block},
|
||||
utils::{event_types, FunctionCallInProgress, OutputIndexMapper, StreamAction},
|
||||
};
|
||||
use crate::{
|
||||
data_connector::{
|
||||
SharedConversationItemStorage, SharedConversationStorage, SharedResponseStorage,
|
||||
},
|
||||
protocols::responses::{ResponseToolType, ResponsesRequest},
|
||||
routers::header_utils::{apply_request_headers, preserve_response_headers},
|
||||
};
|
||||
use super::responses::{mask_tools_as_mcp, patch_streaming_response_json, rewrite_streaming_block};
|
||||
use super::utils::{event_types, FunctionCallInProgress, OutputIndexMapper, StreamAction};
|
||||
|
||||
// ============================================================================
|
||||
// Streaming Response Accumulator
|
||||
|
||||
@@ -4,16 +4,8 @@
|
||||
//! - Single Router Mode (enable_igw=false): Router owns workers directly
|
||||
//! - Multi-Router Mode (enable_igw=true): RouterManager coordinates everything
|
||||
|
||||
use crate::config::{ConnectionMode, RoutingMode};
|
||||
use crate::core::{WorkerRegistry, WorkerType};
|
||||
use crate::protocols::chat::ChatCompletionRequest;
|
||||
use crate::protocols::completion::CompletionRequest;
|
||||
use crate::protocols::embedding::EmbeddingRequest;
|
||||
use crate::protocols::generate::GenerateRequest;
|
||||
use crate::protocols::rerank::RerankRequest;
|
||||
use crate::protocols::responses::{ResponsesGetParams, ResponsesRequest};
|
||||
use crate::routers::RouterTrait;
|
||||
use crate::server::{AppContext, ServerConfig};
|
||||
use std::sync::Arc;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use axum::{
|
||||
body::Body,
|
||||
@@ -23,9 +15,23 @@ use axum::{
|
||||
};
|
||||
use dashmap::DashMap;
|
||||
use serde_json::Value;
|
||||
use std::sync::Arc;
|
||||
use tracing::{debug, info, warn};
|
||||
|
||||
use crate::{
|
||||
config::{ConnectionMode, RoutingMode},
|
||||
core::{WorkerRegistry, WorkerType},
|
||||
protocols::{
|
||||
chat::ChatCompletionRequest,
|
||||
completion::CompletionRequest,
|
||||
embedding::EmbeddingRequest,
|
||||
generate::GenerateRequest,
|
||||
rerank::RerankRequest,
|
||||
responses::{ResponsesGetParams, ResponsesRequest},
|
||||
},
|
||||
routers::RouterTrait,
|
||||
server::{AppContext, ServerConfig},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Hash, Eq, PartialEq)]
|
||||
pub struct RouterId(String);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user