[router][grpc] Consolidate parser checks for chat completions (#11439)

2025-10-10 17:44:29 -07:00
parent c495833186
commit 92777135a0
7 changed files with 152 additions and 43 deletions
--- a/sgl-router/src/routers/grpc/pipeline.rs
+++ b/sgl-router/src/routers/grpc/pipeline.rs
@@ -861,6 +861,44 @@ impl ResponseProcessingStage {
        let chat_request = ctx.chat_request_arc();
        let history_tool_calls_count = utils::get_history_tool_calls_count(&chat_request);

+        // Check parser availability once upfront (not per choice)
+        let reasoning_parser_available = chat_request.separate_reasoning
+            && utils::check_reasoning_parser_availability(
+                &self.processor.reasoning_parser_factory,
+                self.processor.configured_reasoning_parser.as_ref(),
+                &chat_request.model,
+            );
+
+        let tool_choice_enabled = !matches!(
+            &chat_request.tool_choice,
+            Some(crate::protocols::spec::ToolChoice::Value(
+                crate::protocols::spec::ToolChoiceValue::None
+            ))
+        );
+
+        let tool_parser_available = tool_choice_enabled
+            && chat_request.tools.is_some()
+            && utils::check_tool_parser_availability(
+                &self.processor.tool_parser_factory,
+                self.processor.configured_tool_parser.as_ref(),
+                &chat_request.model,
+            );
+
+        // Log once per request (not per choice)
+        if chat_request.separate_reasoning && !reasoning_parser_available {
+            debug!(
+                "No reasoning parser found for model '{}', skipping reasoning parsing",
+                chat_request.model
+            );
+        }
+
+        if chat_request.tools.is_some() && tool_choice_enabled && !tool_parser_available {
+            debug!(
+                "No tool parser found for model '{}', skipping tool call parsing",
+                chat_request.model
+            );
+        }
+
        let stop_decoder = ctx
            .state
            .response
@@ -878,6 +916,8 @@ impl ResponseProcessingStage {
                    &chat_request,
                    stop_decoder,
                    history_tool_calls_count,
+                    reasoning_parser_available,
+                    tool_parser_available,
                )
                .await
            {
--- a/sgl-router/src/routers/grpc/processing.rs
+++ b/sgl-router/src/routers/grpc/processing.rs
@@ -30,8 +30,8 @@ pub struct ResponseProcessor {
    pub tokenizer: Arc<dyn Tokenizer>,
    pub tool_parser_factory: ToolParserFactory,
    pub reasoning_parser_factory: ReasoningParserFactory,
-    configured_tool_parser: Option<String>,
-    configured_reasoning_parser: Option<String>,
+    pub configured_tool_parser: Option<String>,
+    pub configured_reasoning_parser: Option<String>,
 }

 impl ResponseProcessor {
@@ -52,6 +52,7 @@ impl ResponseProcessor {
    }

    /// Process a single choice from GenerateComplete response (EXACT COPY from router.rs:1573-1725)
+    #[allow(clippy::too_many_arguments)]
    pub async fn process_single_choice(
        &self,
        complete: &proto::GenerateComplete,
@@ -59,6 +60,8 @@ impl ResponseProcessor {
        original_request: &ChatCompletionRequest,
        stop_decoder: &mut StopSequenceDecoder,
        history_tool_calls_count: usize,
+        reasoning_parser_available: bool,
+        tool_parser_available: bool,
    ) -> Result<ChatChoice, String> {
        stop_decoder.reset();
        // Decode tokens
@@ -89,8 +92,8 @@ impl ResponseProcessor {
        let mut reasoning_text: Option<String> = None;
        let mut processed_text = final_text;

-        // Check if reasoning parsing is enabled and separate_reasoning is requested
-        if original_request.separate_reasoning {
+        // Check if reasoning parsing is enabled and parser is available
+        if original_request.separate_reasoning && reasoning_parser_available {
            let pooled_parser = utils::get_reasoning_parser(
                &self.reasoning_parser_factory,
                self.configured_reasoning_parser.as_ref(),
@@ -113,8 +116,6 @@ impl ResponseProcessor {

        // Step 2: Handle tool call parsing
        let mut tool_calls: Option<Vec<ToolCall>> = None;
-
-        // Check if tool calls should be processed
        let tool_choice_enabled = !matches!(
            &original_request.tool_choice,
            Some(ToolChoice::Value(ToolChoiceValue::None))
@@ -134,7 +135,7 @@ impl ResponseProcessor {
                    &processed_text,
                    &original_request.tool_choice,
                );
-            } else {
+            } else if tool_parser_available {
                (tool_calls, processed_text) = self
                    .parse_tool_calls(
                        &processed_text,
--- a/sgl-router/src/routers/grpc/streaming.rs
+++ b/sgl-router/src/routers/grpc/streaming.rs
@@ -195,41 +195,29 @@ impl StreamingProcessor {
        let system_fingerprint = dispatch.weight_version.as_deref();

        // Check parser availability once upfront (log warning only once per request)
-        let reasoning_parser_available = if separate_reasoning {
-            if let Some(parser_name) = self.configured_reasoning_parser.as_ref() {
-                self.reasoning_parser_factory
-                    .registry()
-                    .has_parser(parser_name)
-            } else {
-                self.reasoning_parser_factory
-                    .registry()
-                    .has_parser_for_model(model)
-            }
-        } else {
-            false
-        };
+        let reasoning_parser_available = separate_reasoning
+            && utils::check_reasoning_parser_availability(
+                &self.reasoning_parser_factory,
+                self.configured_reasoning_parser.as_ref(),
+                model,
+            );

-        let tool_parser_available = if tools.is_some() {
-            if let Some(parser_name) = self.configured_tool_parser.as_ref() {
-                self.tool_parser_factory.registry().has_parser(parser_name)
-            } else {
-                self.tool_parser_factory
-                    .registry()
-                    .has_parser_for_model(model)
-            }
-        } else {
-            false
-        };
+        let tool_parser_available = tools.is_some()
+            && utils::check_tool_parser_availability(
+                &self.tool_parser_factory,
+                self.configured_tool_parser.as_ref(),
+                model,
+            );

        if separate_reasoning && !reasoning_parser_available {
-            warn!(
+            debug!(
                "No reasoning parser found for model '{}', skipping reasoning parsing",
                model
            );
        }

        if tools.is_some() && !tool_parser_available {
-            warn!(
+            debug!(
                "No tool parser found for model '{}', skipping tool call parsing",
                model
            );
--- a/sgl-router/src/routers/grpc/utils.rs
+++ b/sgl-router/src/routers/grpc/utils.rs
@@ -675,6 +675,34 @@ pub fn generate_tool_call_id(
    }
 }

+/// Check if a reasoning parser is available for the given model
+pub fn check_reasoning_parser_availability(
+    reasoning_parser_factory: &crate::reasoning_parser::ParserFactory,
+    configured_parser: Option<&String>,
+    model: &str,
+) -> bool {
+    if let Some(parser_name) = configured_parser {
+        reasoning_parser_factory.registry().has_parser(parser_name)
+    } else {
+        reasoning_parser_factory
+            .registry()
+            .has_parser_for_model(model)
+    }
+}
+
+/// Check if a tool parser is available for the given model
+pub fn check_tool_parser_availability(
+    tool_parser_factory: &crate::tool_parser::ParserFactory,
+    configured_parser: Option<&String>,
+    model: &str,
+) -> bool {
+    if let Some(parser_name) = configured_parser {
+        tool_parser_factory.registry().has_parser(parser_name)
+    } else {
+        tool_parser_factory.registry().has_parser_for_model(model)
+    }
+}
+
 /// Get the appropriate reasoning parser for a model
 ///
 /// If a parser name is explicitly configured, use that parser.
--- a/sgl-router/src/tool_parser/factory.rs
+++ b/sgl-router/src/tool_parser/factory.rs
@@ -6,7 +6,7 @@ use tokio::sync::Mutex;

 use crate::tool_parser::parsers::{
    DeepSeekParser, Glm4MoeParser, GptOssHarmonyParser, GptOssParser, JsonParser, KimiK2Parser,
-    LlamaParser, MistralParser, PythonicParser, QwenParser, Step3Parser,
+    LlamaParser, MistralParser, PassthroughParser, PythonicParser, QwenParser, Step3Parser,
 };
 use crate::tool_parser::traits::ToolParser;

@@ -36,7 +36,7 @@ impl ParserRegistry {
            creators: Arc::new(RwLock::new(HashMap::new())),
            pool: Arc::new(RwLock::new(HashMap::new())),
            model_mapping: Arc::new(RwLock::new(HashMap::new())),
-            default_parser: Arc::new(RwLock::new("json".to_string())),
+            default_parser: Arc::new(RwLock::new("passthrough".to_string())),
        }
    }

@@ -124,10 +124,9 @@ impl ParserRegistry {
            }
        }

-        // Check if default parser exists
-        let default = self.default_parser.read().unwrap().clone();
-        let creators = self.creators.read().unwrap();
-        creators.contains_key(&default)
+        // Return false if no specific parser found for this model
+        // (get_pooled will still fall back to default parser)
+        false
    }

    /// Create a fresh (non-pooled) parser instance for a specific model.
@@ -228,6 +227,7 @@ impl ParserFactory {
        let registry = ParserRegistry::new();

        // Register default parsers
+        registry.register_parser("passthrough", || Box::new(PassthroughParser::new()));
        registry.register_parser("json", || Box::new(JsonParser::new()));
        registry.register_parser("mistral", || Box::new(MistralParser::new()));
        registry.register_parser("qwen", || Box::new(QwenParser::new()));
@@ -311,15 +311,15 @@ impl ParserFactory {

    /// Get a pooled parser for the given model ID.
    /// Returns a shared instance that can be used concurrently.
-    /// Falls back to JSON parser if model is not recognized.
+    /// Falls back to passthrough parser if model is not recognized.
    pub fn get_pooled(&self, model_id: &str) -> PooledParser {
        self.registry
            .get_pooled_for_model(model_id)
            .unwrap_or_else(|| {
-                // Fallback to JSON parser
+                // Fallback to passthrough parser (no-op, returns text unchanged)
                self.registry
-                    .get_pooled_parser("json")
-                    .expect("JSON parser should always be registered")
+                    .get_pooled_parser("passthrough")
+                    .expect("Passthrough parser should always be registered")
            })
    }

--- a/sgl-router/src/tool_parser/parsers/mod.rs
+++ b/sgl-router/src/tool_parser/parsers/mod.rs
@@ -11,6 +11,7 @@ pub mod json_parser;
 pub mod kimik2_parser;
 pub mod llama_parser;
 pub mod mistral_parser;
+pub mod passthrough_parser;
 pub mod pythonic_parser;
 pub mod qwen_parser;
 pub mod step3_parser;
@@ -27,6 +28,7 @@ pub use json_parser::JsonParser;
 pub use kimik2_parser::KimiK2Parser;
 pub use llama_parser::LlamaParser;
 pub use mistral_parser::MistralParser;
+pub use passthrough_parser::PassthroughParser;
 pub use pythonic_parser::PythonicParser;
 pub use qwen_parser::QwenParser;
 pub use step3_parser::Step3Parser;
--- a/sgl-router/src/tool_parser/parsers/passthrough_parser.rs
+++ b/sgl-router/src/tool_parser/parsers/passthrough_parser.rs
@@ -0,0 +1,50 @@
+//! Passthrough parser that returns text unchanged
+//!
+//! This parser is used as a fallback for unknown models where no specific
+//! tool call parsing should be performed. It simply returns the input text
+//! with no tool calls detected.
+
+use crate::protocols::spec::Tool;
+use crate::tool_parser::errors::ParserResult;
+use crate::tool_parser::traits::ToolParser;
+use crate::tool_parser::types::{StreamingParseResult, ToolCall, ToolCallItem};
+use async_trait::async_trait;
+
+/// Passthrough parser that returns text unchanged with no tool calls
+#[derive(Default)]
+pub struct PassthroughParser;
+
+impl PassthroughParser {
+    pub fn new() -> Self {
+        Self
+    }
+}
+
+#[async_trait]
+impl ToolParser for PassthroughParser {
+    async fn parse_complete(&self, output: &str) -> ParserResult<(String, Vec<ToolCall>)> {
+        // Return text unchanged with no tool calls
+        Ok((output.to_string(), vec![]))
+    }
+
+    async fn parse_incremental(
+        &mut self,
+        chunk: &str,
+        _tools: &[Tool],
+    ) -> ParserResult<StreamingParseResult> {
+        // Return chunk unchanged with no tool calls
+        Ok(StreamingParseResult {
+            normal_text: chunk.to_string(),
+            calls: vec![],
+        })
+    }
+
+    fn has_tool_markers(&self, _text: &str) -> bool {
+        // Passthrough never detects tool calls
+        false
+    }
+
+    fn get_unstreamed_tool_args(&self) -> Option<Vec<ToolCallItem>> {
+        None
+    }
+}