[router] Add rustfmt and set group imports by default (#11732)

This commit is contained in:
Chang Su
2025-10-16 17:33:29 -07:00
committed by GitHub
parent 7a7f99beb7
commit dc01313da1
126 changed files with 1127 additions and 813 deletions

View File

@@ -4,20 +4,22 @@
//! eliminating deep parameter passing chains and providing a single source of truth
//! for request state.
use std::collections::HashMap;
use std::sync::Arc;
use std::{collections::HashMap, sync::Arc};
use axum::http::HeaderMap;
use serde_json::Value;
use crate::core::Worker;
use crate::grpc_client::{proto, SglangSchedulerClient};
use crate::protocols::chat::{ChatCompletionRequest, ChatCompletionResponse};
use crate::protocols::generate::{GenerateRequest, GenerateResponse};
use crate::reasoning_parser::ParserFactory as ReasoningParserFactory;
use crate::tokenizer::stop::StopSequenceDecoder;
use crate::tokenizer::traits::Tokenizer;
use crate::tool_parser::ParserFactory as ToolParserFactory;
use crate::{
core::Worker,
grpc_client::{proto, SglangSchedulerClient},
protocols::{
chat::{ChatCompletionRequest, ChatCompletionResponse},
generate::{GenerateRequest, GenerateResponse},
},
reasoning_parser::ParserFactory as ReasoningParserFactory,
tokenizer::{stop::StopSequenceDecoder, traits::Tokenizer},
tool_parser::ParserFactory as ToolParserFactory,
};
// ============================================================================
// Core Context Types

View File

@@ -1,7 +1,6 @@
//! gRPC router implementations
use crate::grpc_client::proto;
use crate::protocols::common::StringOrArray;
use crate::{grpc_client::proto, protocols::common::StringOrArray};
pub mod context;
pub mod pd_router;

View File

@@ -1,19 +1,7 @@
// PD (Prefill-Decode) gRPC Router Implementation
use crate::config::types::RetryConfig;
use crate::core::{ConnectionMode, WorkerRegistry, WorkerType};
use crate::policies::PolicyRegistry;
use crate::protocols::chat::ChatCompletionRequest;
use crate::protocols::completion::CompletionRequest;
use crate::protocols::embedding::EmbeddingRequest;
use crate::protocols::generate::GenerateRequest;
use crate::protocols::rerank::RerankRequest;
use crate::protocols::responses::{ResponsesGetParams, ResponsesRequest};
use crate::reasoning_parser::ParserFactory as ReasoningParserFactory;
use crate::routers::RouterTrait;
use crate::server::AppContext;
use crate::tokenizer::traits::Tokenizer;
use crate::tool_parser::ParserFactory as ToolParserFactory;
use std::sync::Arc;
use async_trait::async_trait;
use axum::{
body::Body,
@@ -21,12 +9,27 @@ use axum::{
http::{HeaderMap, StatusCode},
response::{IntoResponse, Response},
};
use std::sync::Arc;
use tracing::debug;
use super::context::SharedComponents;
use super::pipeline::RequestPipeline;
use super::{context::SharedComponents, pipeline::RequestPipeline};
use crate::{
config::types::RetryConfig,
core::{ConnectionMode, WorkerRegistry, WorkerType},
policies::PolicyRegistry,
protocols::{
chat::ChatCompletionRequest,
completion::CompletionRequest,
embedding::EmbeddingRequest,
generate::GenerateRequest,
rerank::RerankRequest,
responses::{ResponsesGetParams, ResponsesRequest},
},
reasoning_parser::ParserFactory as ReasoningParserFactory,
routers::RouterTrait,
server::AppContext,
tokenizer::traits::Tokenizer,
tool_parser::ParserFactory as ToolParserFactory,
};
/// gRPC PD (Prefill-Decode) router implementation for SGLang
#[derive(Clone)]

View File

@@ -3,29 +3,29 @@
//! This module defines the core pipeline abstraction and individual processing stages
//! that transform a RequestContext through its lifecycle.
use std::{
sync::Arc,
time::{Instant, SystemTime, UNIX_EPOCH},
};
use async_trait::async_trait;
use axum::response::{IntoResponse, Response};
use tracing::{debug, error, warn};
use super::context::*;
use super::processing;
use super::streaming;
use super::utils;
use crate::core::{ConnectionMode, Worker, WorkerRegistry, WorkerType};
use crate::grpc_client::proto;
use crate::policies::PolicyRegistry;
use crate::protocols::chat::ChatCompletionRequest;
use crate::protocols::common::InputIds;
use crate::protocols::generate::GenerateRequest;
use crate::reasoning_parser::ParserFactory as ReasoningParserFactory;
use crate::tokenizer::traits::Tokenizer;
use crate::tool_parser::ParserFactory as ToolParserFactory;
use proto::DisaggregatedParams;
use rand::Rng;
use std::sync::Arc;
use std::time::{Instant, SystemTime, UNIX_EPOCH};
use tracing::{debug, error, warn};
use uuid::Uuid;
use super::{context::*, processing, streaming, utils};
use crate::{
core::{ConnectionMode, Worker, WorkerRegistry, WorkerType},
grpc_client::proto,
policies::PolicyRegistry,
protocols::{chat::ChatCompletionRequest, common::InputIds, generate::GenerateRequest},
reasoning_parser::ParserFactory as ReasoningParserFactory,
tokenizer::traits::Tokenizer,
tool_parser::ParserFactory as ToolParserFactory,
};
// ============================================================================
// Pipeline Trait
// ============================================================================

View File

@@ -3,28 +3,30 @@
//! This module contains response processing functions that are shared between
//! the regular router and PD router, eliminating ~1,200 lines of exact duplicates.
use std::sync::Arc;
use std::{sync::Arc, time::Instant};
use proto::generate_complete::MatchedStop;
use serde_json::Value;
use tracing::error;
use crate::grpc_client::proto;
use crate::protocols::chat::{
ChatChoice, ChatCompletionMessage, ChatCompletionRequest, ChatCompletionResponse,
use super::{
context::{DispatchMetadata, ExecutionResult},
utils,
};
use crate::protocols::common::{
FunctionCallResponse, ToolCall, ToolChoice, ToolChoiceValue, Usage,
use crate::{
grpc_client::proto,
protocols::{
chat::{ChatChoice, ChatCompletionMessage, ChatCompletionRequest, ChatCompletionResponse},
common::{FunctionCallResponse, ToolCall, ToolChoice, ToolChoiceValue, Usage},
generate::{GenerateMetaInfo, GenerateRequest, GenerateResponse},
},
reasoning_parser::ParserFactory as ReasoningParserFactory,
tokenizer::{
stop::{SequenceDecoderOutput, StopSequenceDecoder},
traits::Tokenizer,
},
tool_parser::ParserFactory as ToolParserFactory,
};
use crate::protocols::generate::{GenerateMetaInfo, GenerateRequest, GenerateResponse};
use crate::reasoning_parser::ParserFactory as ReasoningParserFactory;
use crate::tokenizer::stop::{SequenceDecoderOutput, StopSequenceDecoder};
use crate::tokenizer::traits::Tokenizer;
use crate::tool_parser::ParserFactory as ToolParserFactory;
use proto::generate_complete::MatchedStop;
use std::time::Instant;
use super::context::{DispatchMetadata, ExecutionResult};
use super::utils;
// ============================================================================
// Response Processor - Main Entry Point

View File

@@ -11,23 +11,25 @@ use axum::{
};
use tracing::debug;
use crate::config::types::RetryConfig;
use crate::core::WorkerRegistry;
use crate::policies::PolicyRegistry;
use crate::protocols::chat::ChatCompletionRequest;
use crate::protocols::completion::CompletionRequest;
use crate::protocols::embedding::EmbeddingRequest;
use crate::protocols::generate::GenerateRequest;
use crate::protocols::rerank::RerankRequest;
use crate::protocols::responses::{ResponsesGetParams, ResponsesRequest};
use crate::reasoning_parser::ParserFactory as ReasoningParserFactory;
use crate::routers::RouterTrait;
use crate::server::AppContext;
use crate::tokenizer::traits::Tokenizer;
use crate::tool_parser::ParserFactory as ToolParserFactory;
use super::context::SharedComponents;
use super::pipeline::RequestPipeline;
use super::{context::SharedComponents, pipeline::RequestPipeline};
use crate::{
config::types::RetryConfig,
core::WorkerRegistry,
policies::PolicyRegistry,
protocols::{
chat::ChatCompletionRequest,
completion::CompletionRequest,
embedding::EmbeddingRequest,
generate::GenerateRequest,
rerank::RerankRequest,
responses::{ResponsesGetParams, ResponsesRequest},
},
reasoning_parser::ParserFactory as ReasoningParserFactory,
routers::RouterTrait,
server::AppContext,
tokenizer::traits::Tokenizer,
tool_parser::ParserFactory as ToolParserFactory,
};
/// gRPC router implementation for SGLang
#[derive(Clone)]

View File

@@ -3,38 +3,40 @@
//! This module contains shared streaming logic for both Regular and PD routers,
//! eliminating ~600 lines of duplication.
use axum::response::Response;
use axum::{body::Body, http::StatusCode};
use std::{collections::HashMap, io, sync::Arc, time::Instant};
use axum::{body::Body, http::StatusCode, response::Response};
use bytes::Bytes;
use http::header::{HeaderValue, CONTENT_TYPE};
use proto::{
generate_complete::MatchedStop::{MatchedStopStr, MatchedTokenId},
generate_response::Response::{Chunk, Complete, Error},
};
use serde_json::{json, Value};
use std::collections::HashMap;
use std::io;
use std::sync::Arc;
use tokio::sync::mpsc::UnboundedSender;
use tokio_stream::wrappers::UnboundedReceiverStream;
use tokio_stream::StreamExt;
use tokio::sync::{mpsc, mpsc::UnboundedSender};
use tokio_stream::{wrappers::UnboundedReceiverStream, StreamExt};
use tracing::{debug, error, warn};
use super::context;
use super::utils;
use crate::grpc_client::proto;
use crate::protocols::chat::{
ChatCompletionRequest, ChatCompletionStreamResponse, ChatMessageDelta, ChatStreamChoice,
use super::{context, utils};
use crate::{
grpc_client::proto,
protocols::{
chat::{
ChatCompletionRequest, ChatCompletionStreamResponse, ChatMessageDelta, ChatStreamChoice,
},
common::{
ChatLogProbs, FunctionCallDelta, StringOrArray, Tool, ToolCallDelta, ToolChoice,
ToolChoiceValue, Usage,
},
generate::GenerateRequest,
},
reasoning_parser::ReasoningParser,
tokenizer::{
stop::{SequenceDecoderOutput, StopSequenceDecoder},
traits::Tokenizer,
},
tool_parser::ToolParser,
};
use crate::protocols::common::{
ChatLogProbs, FunctionCallDelta, StringOrArray, Tool, ToolCallDelta, ToolChoice,
ToolChoiceValue, Usage,
};
use crate::protocols::generate::GenerateRequest;
use crate::reasoning_parser::ReasoningParser;
use crate::tokenizer::stop::{SequenceDecoderOutput, StopSequenceDecoder};
use crate::tokenizer::traits::Tokenizer;
use crate::tool_parser::ToolParser;
use proto::generate_complete::MatchedStop::{MatchedStopStr, MatchedTokenId};
use proto::generate_response::Response::{Chunk, Complete, Error};
use std::time::Instant;
use tokio::sync::mpsc;
/// Shared streaming processor for both single and dual dispatch modes
#[derive(Clone)]

View File

@@ -1,19 +1,7 @@
//! Shared utilities for gRPC routers
use super::ProcessedMessages;
use crate::core::Worker;
use crate::grpc_client::sglang_scheduler::AbortOnDropStream;
use crate::grpc_client::{proto, SglangSchedulerClient};
use crate::protocols::chat::{ChatCompletionRequest, ChatMessage};
use crate::protocols::common::{
ChatLogProbs, ChatLogProbsContent, FunctionCallResponse, StringOrArray, Tool, ToolCall,
ToolChoice, ToolChoiceValue, TopLogProb,
};
use crate::protocols::generate::GenerateFinishReason;
use crate::tokenizer::chat_template::{ChatTemplateContentFormat, ChatTemplateParams};
use crate::tokenizer::traits::Tokenizer;
use crate::tokenizer::HuggingFaceTokenizer;
pub use crate::tokenizer::StopSequenceDecoder;
use std::{collections::HashMap, sync::Arc};
use axum::{
http::StatusCode,
response::{IntoResponse, Response},
@@ -21,11 +9,29 @@ use axum::{
};
use futures::StreamExt;
use serde_json::{json, Map, Value};
use std::collections::HashMap;
use std::sync::Arc;
use tracing::{error, warn};
use uuid::Uuid;
use super::ProcessedMessages;
pub use crate::tokenizer::StopSequenceDecoder;
use crate::{
core::Worker,
grpc_client::{proto, sglang_scheduler::AbortOnDropStream, SglangSchedulerClient},
protocols::{
chat::{ChatCompletionRequest, ChatMessage},
common::{
ChatLogProbs, ChatLogProbsContent, FunctionCallResponse, StringOrArray, Tool, ToolCall,
ToolChoice, ToolChoiceValue, TopLogProb,
},
generate::GenerateFinishReason,
},
tokenizer::{
chat_template::{ChatTemplateContentFormat, ChatTemplateParams},
traits::Tokenizer,
HuggingFaceTokenizer,
},
};
/// Get gRPC client from worker, returning appropriate error response on failure
pub async fn get_grpc_client_from_worker(
worker: &Arc<dyn Worker>,
@@ -953,12 +959,17 @@ pub fn parse_finish_reason(reason_str: &str, completion_tokens: i32) -> Generate
#[cfg(test)]
mod tests {
use super::*;
use crate::protocols::chat::{ChatMessage, UserMessageContent};
use crate::protocols::common::{ContentPart, ImageUrl};
use crate::tokenizer::chat_template::ChatTemplateContentFormat;
use serde_json::json;
use super::*;
use crate::{
protocols::{
chat::{ChatMessage, UserMessageContent},
common::{ContentPart, ImageUrl},
},
tokenizer::chat_template::ChatTemplateContentFormat,
};
#[test]
fn test_transform_messages_string_format() {
let messages = vec![ChatMessage::User {