From 92777135a0afd0fb78feadbd7f703fb9c9a64cee Mon Sep 17 00:00:00 2001 From: Chang Su Date: Fri, 10 Oct 2025 17:44:29 -0700 Subject: [PATCH] [router][grpc] Consolidate parser checks for chat completions (#11439) --- sgl-router/src/routers/grpc/pipeline.rs | 40 +++++++++++++++ sgl-router/src/routers/grpc/processing.rs | 15 +++--- sgl-router/src/routers/grpc/streaming.rs | 40 ++++++--------- sgl-router/src/routers/grpc/utils.rs | 28 +++++++++++ sgl-router/src/tool_parser/factory.rs | 20 ++++---- sgl-router/src/tool_parser/parsers/mod.rs | 2 + .../tool_parser/parsers/passthrough_parser.rs | 50 +++++++++++++++++++ 7 files changed, 152 insertions(+), 43 deletions(-) create mode 100644 sgl-router/src/tool_parser/parsers/passthrough_parser.rs diff --git a/sgl-router/src/routers/grpc/pipeline.rs b/sgl-router/src/routers/grpc/pipeline.rs index a4d8eb617..3be782ebb 100644 --- a/sgl-router/src/routers/grpc/pipeline.rs +++ b/sgl-router/src/routers/grpc/pipeline.rs @@ -861,6 +861,44 @@ impl ResponseProcessingStage { let chat_request = ctx.chat_request_arc(); let history_tool_calls_count = utils::get_history_tool_calls_count(&chat_request); + // Check parser availability once upfront (not per choice) + let reasoning_parser_available = chat_request.separate_reasoning + && utils::check_reasoning_parser_availability( + &self.processor.reasoning_parser_factory, + self.processor.configured_reasoning_parser.as_ref(), + &chat_request.model, + ); + + let tool_choice_enabled = !matches!( + &chat_request.tool_choice, + Some(crate::protocols::spec::ToolChoice::Value( + crate::protocols::spec::ToolChoiceValue::None + )) + ); + + let tool_parser_available = tool_choice_enabled + && chat_request.tools.is_some() + && utils::check_tool_parser_availability( + &self.processor.tool_parser_factory, + self.processor.configured_tool_parser.as_ref(), + &chat_request.model, + ); + + // Log once per request (not per choice) + if chat_request.separate_reasoning && !reasoning_parser_available { + debug!( + "No reasoning parser found for model '{}', skipping reasoning parsing", + chat_request.model + ); + } + + if chat_request.tools.is_some() && tool_choice_enabled && !tool_parser_available { + debug!( + "No tool parser found for model '{}', skipping tool call parsing", + chat_request.model + ); + } + let stop_decoder = ctx .state .response @@ -878,6 +916,8 @@ impl ResponseProcessingStage { &chat_request, stop_decoder, history_tool_calls_count, + reasoning_parser_available, + tool_parser_available, ) .await { diff --git a/sgl-router/src/routers/grpc/processing.rs b/sgl-router/src/routers/grpc/processing.rs index 523480465..50718ea2c 100644 --- a/sgl-router/src/routers/grpc/processing.rs +++ b/sgl-router/src/routers/grpc/processing.rs @@ -30,8 +30,8 @@ pub struct ResponseProcessor { pub tokenizer: Arc, pub tool_parser_factory: ToolParserFactory, pub reasoning_parser_factory: ReasoningParserFactory, - configured_tool_parser: Option, - configured_reasoning_parser: Option, + pub configured_tool_parser: Option, + pub configured_reasoning_parser: Option, } impl ResponseProcessor { @@ -52,6 +52,7 @@ impl ResponseProcessor { } /// Process a single choice from GenerateComplete response (EXACT COPY from router.rs:1573-1725) + #[allow(clippy::too_many_arguments)] pub async fn process_single_choice( &self, complete: &proto::GenerateComplete, @@ -59,6 +60,8 @@ impl ResponseProcessor { original_request: &ChatCompletionRequest, stop_decoder: &mut StopSequenceDecoder, history_tool_calls_count: usize, + reasoning_parser_available: bool, + tool_parser_available: bool, ) -> Result { stop_decoder.reset(); // Decode tokens @@ -89,8 +92,8 @@ impl ResponseProcessor { let mut reasoning_text: Option = None; let mut processed_text = final_text; - // Check if reasoning parsing is enabled and separate_reasoning is requested - if original_request.separate_reasoning { + // Check if reasoning parsing is enabled and parser is available + if original_request.separate_reasoning && reasoning_parser_available { let pooled_parser = utils::get_reasoning_parser( &self.reasoning_parser_factory, self.configured_reasoning_parser.as_ref(), @@ -113,8 +116,6 @@ impl ResponseProcessor { // Step 2: Handle tool call parsing let mut tool_calls: Option> = None; - - // Check if tool calls should be processed let tool_choice_enabled = !matches!( &original_request.tool_choice, Some(ToolChoice::Value(ToolChoiceValue::None)) @@ -134,7 +135,7 @@ impl ResponseProcessor { &processed_text, &original_request.tool_choice, ); - } else { + } else if tool_parser_available { (tool_calls, processed_text) = self .parse_tool_calls( &processed_text, diff --git a/sgl-router/src/routers/grpc/streaming.rs b/sgl-router/src/routers/grpc/streaming.rs index d932c5818..c53304095 100644 --- a/sgl-router/src/routers/grpc/streaming.rs +++ b/sgl-router/src/routers/grpc/streaming.rs @@ -195,41 +195,29 @@ impl StreamingProcessor { let system_fingerprint = dispatch.weight_version.as_deref(); // Check parser availability once upfront (log warning only once per request) - let reasoning_parser_available = if separate_reasoning { - if let Some(parser_name) = self.configured_reasoning_parser.as_ref() { - self.reasoning_parser_factory - .registry() - .has_parser(parser_name) - } else { - self.reasoning_parser_factory - .registry() - .has_parser_for_model(model) - } - } else { - false - }; + let reasoning_parser_available = separate_reasoning + && utils::check_reasoning_parser_availability( + &self.reasoning_parser_factory, + self.configured_reasoning_parser.as_ref(), + model, + ); - let tool_parser_available = if tools.is_some() { - if let Some(parser_name) = self.configured_tool_parser.as_ref() { - self.tool_parser_factory.registry().has_parser(parser_name) - } else { - self.tool_parser_factory - .registry() - .has_parser_for_model(model) - } - } else { - false - }; + let tool_parser_available = tools.is_some() + && utils::check_tool_parser_availability( + &self.tool_parser_factory, + self.configured_tool_parser.as_ref(), + model, + ); if separate_reasoning && !reasoning_parser_available { - warn!( + debug!( "No reasoning parser found for model '{}', skipping reasoning parsing", model ); } if tools.is_some() && !tool_parser_available { - warn!( + debug!( "No tool parser found for model '{}', skipping tool call parsing", model ); diff --git a/sgl-router/src/routers/grpc/utils.rs b/sgl-router/src/routers/grpc/utils.rs index b217ba815..2b9aa1ac4 100644 --- a/sgl-router/src/routers/grpc/utils.rs +++ b/sgl-router/src/routers/grpc/utils.rs @@ -675,6 +675,34 @@ pub fn generate_tool_call_id( } } +/// Check if a reasoning parser is available for the given model +pub fn check_reasoning_parser_availability( + reasoning_parser_factory: &crate::reasoning_parser::ParserFactory, + configured_parser: Option<&String>, + model: &str, +) -> bool { + if let Some(parser_name) = configured_parser { + reasoning_parser_factory.registry().has_parser(parser_name) + } else { + reasoning_parser_factory + .registry() + .has_parser_for_model(model) + } +} + +/// Check if a tool parser is available for the given model +pub fn check_tool_parser_availability( + tool_parser_factory: &crate::tool_parser::ParserFactory, + configured_parser: Option<&String>, + model: &str, +) -> bool { + if let Some(parser_name) = configured_parser { + tool_parser_factory.registry().has_parser(parser_name) + } else { + tool_parser_factory.registry().has_parser_for_model(model) + } +} + /// Get the appropriate reasoning parser for a model /// /// If a parser name is explicitly configured, use that parser. diff --git a/sgl-router/src/tool_parser/factory.rs b/sgl-router/src/tool_parser/factory.rs index 3d6bede95..43c40b6e8 100644 --- a/sgl-router/src/tool_parser/factory.rs +++ b/sgl-router/src/tool_parser/factory.rs @@ -6,7 +6,7 @@ use tokio::sync::Mutex; use crate::tool_parser::parsers::{ DeepSeekParser, Glm4MoeParser, GptOssHarmonyParser, GptOssParser, JsonParser, KimiK2Parser, - LlamaParser, MistralParser, PythonicParser, QwenParser, Step3Parser, + LlamaParser, MistralParser, PassthroughParser, PythonicParser, QwenParser, Step3Parser, }; use crate::tool_parser::traits::ToolParser; @@ -36,7 +36,7 @@ impl ParserRegistry { creators: Arc::new(RwLock::new(HashMap::new())), pool: Arc::new(RwLock::new(HashMap::new())), model_mapping: Arc::new(RwLock::new(HashMap::new())), - default_parser: Arc::new(RwLock::new("json".to_string())), + default_parser: Arc::new(RwLock::new("passthrough".to_string())), } } @@ -124,10 +124,9 @@ impl ParserRegistry { } } - // Check if default parser exists - let default = self.default_parser.read().unwrap().clone(); - let creators = self.creators.read().unwrap(); - creators.contains_key(&default) + // Return false if no specific parser found for this model + // (get_pooled will still fall back to default parser) + false } /// Create a fresh (non-pooled) parser instance for a specific model. @@ -228,6 +227,7 @@ impl ParserFactory { let registry = ParserRegistry::new(); // Register default parsers + registry.register_parser("passthrough", || Box::new(PassthroughParser::new())); registry.register_parser("json", || Box::new(JsonParser::new())); registry.register_parser("mistral", || Box::new(MistralParser::new())); registry.register_parser("qwen", || Box::new(QwenParser::new())); @@ -311,15 +311,15 @@ impl ParserFactory { /// Get a pooled parser for the given model ID. /// Returns a shared instance that can be used concurrently. - /// Falls back to JSON parser if model is not recognized. + /// Falls back to passthrough parser if model is not recognized. pub fn get_pooled(&self, model_id: &str) -> PooledParser { self.registry .get_pooled_for_model(model_id) .unwrap_or_else(|| { - // Fallback to JSON parser + // Fallback to passthrough parser (no-op, returns text unchanged) self.registry - .get_pooled_parser("json") - .expect("JSON parser should always be registered") + .get_pooled_parser("passthrough") + .expect("Passthrough parser should always be registered") }) } diff --git a/sgl-router/src/tool_parser/parsers/mod.rs b/sgl-router/src/tool_parser/parsers/mod.rs index 564a084fa..541c15baa 100644 --- a/sgl-router/src/tool_parser/parsers/mod.rs +++ b/sgl-router/src/tool_parser/parsers/mod.rs @@ -11,6 +11,7 @@ pub mod json_parser; pub mod kimik2_parser; pub mod llama_parser; pub mod mistral_parser; +pub mod passthrough_parser; pub mod pythonic_parser; pub mod qwen_parser; pub mod step3_parser; @@ -27,6 +28,7 @@ pub use json_parser::JsonParser; pub use kimik2_parser::KimiK2Parser; pub use llama_parser::LlamaParser; pub use mistral_parser::MistralParser; +pub use passthrough_parser::PassthroughParser; pub use pythonic_parser::PythonicParser; pub use qwen_parser::QwenParser; pub use step3_parser::Step3Parser; diff --git a/sgl-router/src/tool_parser/parsers/passthrough_parser.rs b/sgl-router/src/tool_parser/parsers/passthrough_parser.rs new file mode 100644 index 000000000..cb793d597 --- /dev/null +++ b/sgl-router/src/tool_parser/parsers/passthrough_parser.rs @@ -0,0 +1,50 @@ +//! Passthrough parser that returns text unchanged +//! +//! This parser is used as a fallback for unknown models where no specific +//! tool call parsing should be performed. It simply returns the input text +//! with no tool calls detected. + +use crate::protocols::spec::Tool; +use crate::tool_parser::errors::ParserResult; +use crate::tool_parser::traits::ToolParser; +use crate::tool_parser::types::{StreamingParseResult, ToolCall, ToolCallItem}; +use async_trait::async_trait; + +/// Passthrough parser that returns text unchanged with no tool calls +#[derive(Default)] +pub struct PassthroughParser; + +impl PassthroughParser { + pub fn new() -> Self { + Self + } +} + +#[async_trait] +impl ToolParser for PassthroughParser { + async fn parse_complete(&self, output: &str) -> ParserResult<(String, Vec)> { + // Return text unchanged with no tool calls + Ok((output.to_string(), vec![])) + } + + async fn parse_incremental( + &mut self, + chunk: &str, + _tools: &[Tool], + ) -> ParserResult { + // Return chunk unchanged with no tool calls + Ok(StreamingParseResult { + normal_text: chunk.to_string(), + calls: vec![], + }) + } + + fn has_tool_markers(&self, _text: &str) -> bool { + // Passthrough never detects tool calls + false + } + + fn get_unstreamed_tool_args(&self) -> Option> { + None + } +}