[router][grpc] Fix streaming bugs: empty tool names, state pollution, and panics (#11373)

2025-10-09 03:53:23 -07:00
parent a4b424c632
commit ab926dd697
33 changed files with 1145 additions and 534 deletions
--- a/sgl-router/src/tool_parser/parsers/deepseek_parser.rs
+++ b/sgl-router/src/tool_parser/parsers/deepseek_parser.rs
@@ -5,7 +5,7 @@ use serde_json::Value;
 use crate::protocols::spec::Tool;

 use crate::tool_parser::{
-    errors::{ToolParserError, ToolParserResult},
+    errors::{ParserError, ParserResult},
    parsers::helpers,
    traits::ToolParser,
    types::{FunctionCall, StreamingParseResult, ToolCall, ToolCallItem},
@@ -78,15 +78,15 @@ impl DeepSeekParser {
    }

    /// Parse a single tool call block - throws error if parsing fails
-    fn parse_tool_call(&self, block: &str) -> ToolParserResult<ToolCall> {
+    fn parse_tool_call(&self, block: &str) -> ParserResult<ToolCall> {
        let captures = self.func_detail_extractor.captures(block).ok_or_else(|| {
-            ToolParserError::ParsingFailed("Failed to match tool call pattern".to_string())
+            ParserError::ParsingFailed("Failed to match tool call pattern".to_string())
        })?;

        // Get function type (should be "function")
        let func_type = captures.get(1).map_or("", |m| m.as_str());
        if func_type != "function" {
-            return Err(ToolParserError::ParsingFailed(format!(
+            return Err(ParserError::ParsingFailed(format!(
                "Invalid function type: {}",
                func_type
            )));
@@ -95,7 +95,7 @@ impl DeepSeekParser {
        // Get function name
        let func_name = captures.get(2).map_or("", |m| m.as_str()).trim();
        if func_name.is_empty() {
-            return Err(ToolParserError::ParsingFailed(
+            return Err(ParserError::ParsingFailed(
                "Empty function name".to_string(),
            ));
        }
@@ -105,7 +105,7 @@ impl DeepSeekParser {

        // Parse JSON arguments
        let value = serde_json::from_str::<Value>(json_args)
-            .map_err(|e| ToolParserError::ParsingFailed(format!("Invalid JSON: {}", e)))?;
+            .map_err(|e| ParserError::ParsingFailed(format!("Invalid JSON: {}", e)))?;

        // Create arguments object
        let args = if value.is_object() {
@@ -115,8 +115,8 @@ impl DeepSeekParser {
            serde_json::json!({ "value": value })
        };

-        let arguments = serde_json::to_string(&args)
-            .map_err(|e| ToolParserError::ParsingFailed(e.to_string()))?;
+        let arguments =
+            serde_json::to_string(&args).map_err(|e| ParserError::ParsingFailed(e.to_string()))?;

        Ok(ToolCall {
            function: FunctionCall {
@@ -135,7 +135,7 @@ impl Default for DeepSeekParser {

 #[async_trait]
 impl ToolParser for DeepSeekParser {
-    async fn parse_complete(&self, text: &str) -> ToolParserResult<(String, Vec<ToolCall>)> {
+    async fn parse_complete(&self, text: &str) -> ParserResult<(String, Vec<ToolCall>)> {
        if !self.has_tool_markers(text) {
            return Ok((text.to_string(), vec![]));
        }
@@ -168,7 +168,7 @@ impl ToolParser for DeepSeekParser {
        &mut self,
        chunk: &str,
        tools: &[Tool],
-    ) -> ToolParserResult<StreamingParseResult> {
+    ) -> ParserResult<StreamingParseResult> {
        self.buffer.push_str(chunk);
        let current_text = &self.buffer.clone();

@@ -314,4 +314,12 @@ impl ToolParser for DeepSeekParser {
    fn get_unstreamed_tool_args(&self) -> Option<Vec<ToolCallItem>> {
        helpers::get_unstreamed_args(&self.prev_tool_call_arr, &self.streamed_args_for_tool)
    }
+
+    fn reset(&mut self) {
+        self.buffer.clear();
+        self.prev_tool_call_arr.clear();
+        self.current_tool_id = -1;
+        self.current_tool_name_sent = false;
+        self.streamed_args_for_tool.clear();
+    }
 }
--- a/sgl-router/src/tool_parser/parsers/glm4_moe_parser.rs
+++ b/sgl-router/src/tool_parser/parsers/glm4_moe_parser.rs
@@ -5,7 +5,7 @@ use serde_json::Value;
 use crate::protocols::spec::Tool;

 use crate::tool_parser::{
-    errors::{ToolParserError, ToolParserResult},
+    errors::{ParserError, ParserResult},
    parsers::helpers,
    traits::ToolParser,
    types::{FunctionCall, StreamingParseResult, ToolCall, ToolCallItem},
@@ -72,7 +72,7 @@ impl Glm4MoeParser {
    }

    /// Parse arguments from key-value pairs
-    fn parse_arguments(&self, args_text: &str) -> ToolParserResult<serde_json::Map<String, Value>> {
+    fn parse_arguments(&self, args_text: &str) -> ParserResult<serde_json::Map<String, Value>> {
        let mut arguments = serde_json::Map::new();

        for capture in self.arg_extractor.captures_iter(args_text) {
@@ -110,7 +110,7 @@ impl Glm4MoeParser {
    }

    /// Parse a single tool call block
-    fn parse_tool_call(&self, block: &str) -> ToolParserResult<Option<ToolCall>> {
+    fn parse_tool_call(&self, block: &str) -> ParserResult<Option<ToolCall>> {
        if let Some(captures) = self.func_detail_extractor.captures(block) {
            // Get function name
            let func_name = captures.get(1).map_or("", |m| m.as_str()).trim();
@@ -122,7 +122,7 @@ impl Glm4MoeParser {
            let arguments = self.parse_arguments(args_text)?;

            let arguments_str = serde_json::to_string(&arguments)
-                .map_err(|e| ToolParserError::ParsingFailed(e.to_string()))?;
+                .map_err(|e| ParserError::ParsingFailed(e.to_string()))?;

            Ok(Some(ToolCall {
                function: FunctionCall {
@@ -137,7 +137,7 @@ impl Glm4MoeParser {

    /// Parse and return StreamingParseResult (mirrors Python's detect_and_parse)
    /// Parse all tool calls from text (shared logic for complete and incremental parsing)
-    fn parse_tool_calls_from_text(&self, text: &str) -> ToolParserResult<Vec<ToolCall>> {
+    fn parse_tool_calls_from_text(&self, text: &str) -> ParserResult<Vec<ToolCall>> {
        let mut tools = Vec::new();

        for mat in self.tool_call_extractor.find_iter(text) {
@@ -163,7 +163,7 @@ impl Default for Glm4MoeParser {

 #[async_trait]
 impl ToolParser for Glm4MoeParser {
-    async fn parse_complete(&self, text: &str) -> ToolParserResult<(String, Vec<ToolCall>)> {
+    async fn parse_complete(&self, text: &str) -> ParserResult<(String, Vec<ToolCall>)> {
        // Check if text contains GLM-4 MoE format
        if !self.has_tool_markers(text) {
            return Ok((text.to_string(), vec![]));
@@ -188,7 +188,7 @@ impl ToolParser for Glm4MoeParser {
        &mut self,
        chunk: &str,
        tools: &[Tool],
-    ) -> ToolParserResult<StreamingParseResult> {
+    ) -> ParserResult<StreamingParseResult> {
        // Python logic: Wait for complete tool call, then parse it all at once
        self.buffer.push_str(chunk);
        let current_text = &self.buffer.clone();
@@ -315,4 +315,11 @@ impl ToolParser for Glm4MoeParser {
    fn get_unstreamed_tool_args(&self) -> Option<Vec<ToolCallItem>> {
        helpers::get_unstreamed_args(&self.prev_tool_call_arr, &self.streamed_args_for_tool)
    }
+
+    fn reset(&mut self) {
+        self.buffer.clear();
+        self.prev_tool_call_arr.clear();
+        self.current_tool_id = -1;
+        self.streamed_args_for_tool.clear();
+    }
 }
--- a/sgl-router/src/tool_parser/parsers/gpt_oss_harmony_parser.rs
+++ b/sgl-router/src/tool_parser/parsers/gpt_oss_harmony_parser.rs
@@ -3,7 +3,7 @@ use async_trait::async_trait;
 use crate::protocols::spec::Tool;

 use crate::tool_parser::{
-    errors::ToolParserResult,
+    errors::ParserResult,
    traits::{TokenToolParser, ToolParser},
    types::{StreamingParseResult, ToolCall},
 };
@@ -23,7 +23,7 @@ impl GptOssHarmonyParser {

 #[async_trait]
 impl ToolParser for GptOssHarmonyParser {
-    async fn parse_complete(&self, output: &str) -> ToolParserResult<(String, Vec<ToolCall>)> {
+    async fn parse_complete(&self, output: &str) -> ParserResult<(String, Vec<ToolCall>)> {
        // Temporary stub: fall back to returning the raw text with no tool calls.
        // Later phases will decode Harmony tokens into structured tool calls.
        Ok((output.to_string(), Vec::new()))
@@ -33,7 +33,7 @@ impl ToolParser for GptOssHarmonyParser {
        &mut self,
        _chunk: &str,
        _tools: &[Tool],
-    ) -> ToolParserResult<StreamingParseResult> {
+    ) -> ParserResult<StreamingParseResult> {
        // Temporary stub until the Harmony streaming pipeline is implemented.
        Ok(StreamingParseResult::default())
    }
@@ -54,7 +54,7 @@ impl TokenToolParser for GptOssHarmonyParser {
    async fn parse_complete_tokens(
        &self,
        _tokens: &[u32],
-    ) -> ToolParserResult<(String, Vec<ToolCall>)> {
+    ) -> ParserResult<(String, Vec<ToolCall>)> {
        // Placeholder until Harmony integration lands. Returning an empty tool list ensures
        // that enabling the parser without full implementation results in a no-op rather
        // than a runtime panic.
@@ -65,7 +65,7 @@ impl TokenToolParser for GptOssHarmonyParser {
        &mut self,
        _tokens: &[u32],
        _tools: &[Tool],
-    ) -> ToolParserResult<StreamingParseResult> {
+    ) -> ParserResult<StreamingParseResult> {
        Ok(StreamingParseResult::default())
    }
 }
--- a/sgl-router/src/tool_parser/parsers/gpt_oss_parser.rs
+++ b/sgl-router/src/tool_parser/parsers/gpt_oss_parser.rs
@@ -5,7 +5,7 @@ use serde_json::Value;
 use crate::protocols::spec::Tool;

 use crate::tool_parser::{
-    errors::{ToolParserError, ToolParserResult},
+    errors::{ParserError, ParserResult},
    parsers::helpers,
    partial_json::PartialJson,
    traits::ToolParser,
@@ -76,7 +76,7 @@ impl Default for GptOssParser {

 #[async_trait]
 impl ToolParser for GptOssParser {
-    async fn parse_complete(&self, text: &str) -> ToolParserResult<(String, Vec<ToolCall>)> {
+    async fn parse_complete(&self, text: &str) -> ParserResult<(String, Vec<ToolCall>)> {
        // Check if text contains GPT-OSS format
        if !self.has_tool_markers(text) {
            return Ok((text.to_string(), vec![]));
@@ -100,7 +100,7 @@ impl ToolParser for GptOssParser {
                } else {
                    match serde_json::from_str::<Value>(args_content) {
                        Ok(value) => serde_json::to_string(&value)
-                            .map_err(|e| ToolParserError::ParsingFailed(e.to_string()))?,
+                            .map_err(|e| ParserError::ParsingFailed(e.to_string()))?,
                        Err(_) => {
                            // Skip malformed JSON
                            continue;
@@ -126,7 +126,7 @@ impl ToolParser for GptOssParser {
        &mut self,
        chunk: &str,
        tools: &[Tool],
-    ) -> ToolParserResult<StreamingParseResult> {
+    ) -> ParserResult<StreamingParseResult> {
        self.buffer.push_str(chunk);

        // Check for tool markers
@@ -211,7 +211,7 @@ impl ToolParser for GptOssParser {
                            partial_args
                        };

-                        match self.partial_json.parse_value(json_part) {
+                        match self.partial_json.parse_value(json_part, true) {
                            Ok((value, _consumed)) => {
                                let args_str = serde_json::to_string(&value)
                                    .unwrap_or_else(|_| "{}".to_string());
--- a/sgl-router/src/tool_parser/parsers/helpers.rs
+++ b/sgl-router/src/tool_parser/parsers/helpers.rs
@@ -2,7 +2,7 @@ use crate::protocols::spec::Tool;
 use serde_json::Value;
 use std::collections::HashMap;

-use crate::tool_parser::errors::{ToolParserError, ToolParserResult};
+use crate::tool_parser::errors::{ParserError, ParserResult};
 use crate::tool_parser::types::{StreamingParseResult, ToolCallItem};

 /// Get a mapping of tool names to their indices
@@ -14,6 +14,16 @@ pub fn get_tool_indices(tools: &[Tool]) -> HashMap<String, usize> {
        .collect()
 }

+/// Find the common prefix of two strings
+/// Used for incremental argument streaming when partial JSON returns different intermediate states
+pub fn find_common_prefix(s1: &str, s2: &str) -> String {
+    s1.chars()
+        .zip(s2.chars())
+        .take_while(|(c1, c2)| c1 == c2)
+        .map(|(c1, _)| c1)
+        .collect()
+}
+
 /// Get unstreamed tool call arguments
 /// Returns tool call items for arguments that have been parsed but not yet streamed
 /// This ensures tool calls are properly completed even if the model generates final arguments in the last chunk
@@ -96,7 +106,7 @@ pub fn reset_parser_state(
 ) {
    buffer.clear();
    prev_tool_call_arr.clear();
-    *current_tool_id = 0;
+    *current_tool_id = -1;
    *current_tool_name_sent = false;
    streamed_args_for_tool.clear();
 }
@@ -169,7 +179,7 @@ pub fn normalize_arguments_field(mut obj: Value) -> Value {
 ///
 /// # Returns
 /// - `Ok(StreamingParseResult)` with any tool call items to stream
-/// - `Err(ToolParserError)` if JSON parsing or serialization fails
+/// - `Err(ParserError)` if JSON parsing or serialization fails
 #[allow(clippy::too_many_arguments)]
 pub fn handle_json_tool_streaming(
    current_text: &str,
@@ -181,7 +191,7 @@ pub fn handle_json_tool_streaming(
    current_tool_name_sent: &mut bool,
    streamed_args_for_tool: &mut Vec<String>,
    prev_tool_call_arr: &mut Vec<Value>,
-) -> ToolParserResult<StreamingParseResult> {
+) -> ParserResult<StreamingParseResult> {
    // Check if we have content to parse
    if start_idx >= current_text.len() {
        return Ok(StreamingParseResult::default());
@@ -190,8 +200,12 @@ pub fn handle_json_tool_streaming(
    // Extract JSON string from current position
    let json_str = &current_text[start_idx..];

+    // When current_tool_name_sent is false, don't allow partial strings to avoid
+    // parsing incomplete tool names as empty strings
+    let allow_partial_strings = *current_tool_name_sent;
+
    // Parse partial JSON
-    let (obj, end_idx) = match partial_json.parse_value(json_str) {
+    let (obj, end_idx) = match partial_json.parse_value(json_str, allow_partial_strings) {
        Ok(result) => result,
        Err(_) => {
            return Ok(StreamingParseResult::default());
@@ -252,49 +266,68 @@ pub fn handle_json_tool_streaming(
            .map(|s| s.len())
            .unwrap_or(0);
        let cur_args_json = serde_json::to_string(cur_arguments)
-            .map_err(|e| ToolParserError::ParsingFailed(e.to_string()))?;
+            .map_err(|e| ParserError::ParsingFailed(e.to_string()))?;

-        // Compute diff: everything after what we've already sent
-        let diff = cur_args_json[sent..].to_string();
+        // Get prev_arguments (matches Python's structure)
+        let prev_arguments = if tool_id < prev_tool_call_arr.len() {
+            prev_tool_call_arr[tool_id].get("arguments")
+        } else {
+            None
+        };

-        // Send diff if there's new content
-        if !diff.is_empty() {
-            // Only accumulate if not complete
-            if !is_complete && tool_id < streamed_args_for_tool.len() {
-                streamed_args_for_tool[tool_id].push_str(&diff);
-            }
+        // Calculate diff: everything after we've already sent
+        let mut argument_diff = None;

-            result.calls.push(ToolCallItem {
-                tool_index: tool_id,
-                name: None,
-                parameters: diff,
-            });
-        }
-
-        // If JSON is complete, advance to next tool
        if is_complete {
-            // Remove processed portion, keep unprocessed content
-            *buffer = current_text[start_idx + end_idx..].to_string();
+            // Python: argument_diff = cur_args_json[sent:]
+            // Rust needs bounds check (Python returns "" automatically)
+            argument_diff = if sent < cur_args_json.len() {
+                Some(cur_args_json[sent..].to_string())
+            } else {
+                Some(String::new())
+            };
+        } else if let Some(prev_args) = prev_arguments {
+            let prev_args_json = serde_json::to_string(prev_args)
+                .map_err(|e| ParserError::ParsingFailed(e.to_string()))?;

-            // Clear completed tool data
-            if tool_id < prev_tool_call_arr.len() {
-                prev_tool_call_arr[tool_id] = Value::Null;
+            if cur_args_json != prev_args_json {
+                let prefix = find_common_prefix(&prev_args_json, &cur_args_json);
+                argument_diff = if sent < prefix.len() {
+                    Some(prefix[sent..].to_string())
+                } else {
+                    Some(String::new())
+                };
            }
-            *current_tool_name_sent = false;
-            if tool_id < streamed_args_for_tool.len() {
-                streamed_args_for_tool[tool_id].clear();
-            }
-            *current_tool_id += 1;
        }
-    }

-    // Update prev_tool_call_arr with current state
-    if *current_tool_id >= 0 {
-        ensure_capacity(*current_tool_id, prev_tool_call_arr, streamed_args_for_tool);
-        let tool_id = *current_tool_id as usize;
+        // Send diff if present
+        if let Some(diff) = argument_diff {
+            if !diff.is_empty() {
+                if tool_id < streamed_args_for_tool.len() {
+                    streamed_args_for_tool[tool_id].push_str(&diff);
+                }
+                result.calls.push(ToolCallItem {
+                    tool_index: tool_id,
+                    name: None,
+                    parameters: diff,
+                });
+            }
+        }

-        if tool_id < prev_tool_call_arr.len() {
-            prev_tool_call_arr[tool_id] = current_tool_call;
+        // Update prev_tool_call_arr with current state
+        if *current_tool_id >= 0 {
+            ensure_capacity(*current_tool_id, prev_tool_call_arr, streamed_args_for_tool);
+
+            if tool_id < prev_tool_call_arr.len() {
+                prev_tool_call_arr[tool_id] = current_tool_call;
+            }
+        }
+
+        // If complete, advance to next tool
+        if is_complete {
+            *buffer = current_text[start_idx + end_idx..].to_string();
+            *current_tool_name_sent = false;
+            *current_tool_id += 1;
        }
    }

@@ -371,7 +404,7 @@ mod tests {

        assert_eq!(buffer, "");
        assert_eq!(prev_tools.len(), 0);
-        assert_eq!(current_tool_id, 0);
+        assert_eq!(current_tool_id, -1);
        assert!(!current_tool_name_sent);
        assert_eq!(streamed_args.len(), 0);
    }
--- a/sgl-router/src/tool_parser/parsers/json_parser.rs
+++ b/sgl-router/src/tool_parser/parsers/json_parser.rs
@@ -4,7 +4,7 @@ use serde_json::Value;
 use crate::protocols::spec::Tool;

 use crate::tool_parser::{
-    errors::{ToolParserError, ToolParserResult},
+    errors::{ParserError, ParserResult},
    parsers::helpers,
    partial_json::PartialJson,
    traits::ToolParser,
@@ -117,7 +117,7 @@ impl JsonParser {
    }

    /// Parse a single JSON object into a ToolCall
-    fn parse_single_object(&self, obj: &Value) -> ToolParserResult<Option<ToolCall>> {
+    fn parse_single_object(&self, obj: &Value) -> ParserResult<Option<ToolCall>> {
        // Check if this looks like a tool call
        let name = obj
            .get("name")
@@ -134,7 +134,7 @@ impl JsonParser {

            // Convert arguments to JSON string
            let arguments = serde_json::to_string(args)
-                .map_err(|e| ToolParserError::ParsingFailed(e.to_string()))?;
+                .map_err(|e| ParserError::ParsingFailed(e.to_string()))?;

            Ok(Some(ToolCall {
                function: FunctionCall {
@@ -148,7 +148,7 @@ impl JsonParser {
    }

    /// Parse JSON value(s) into tool calls
-    fn parse_json_value(&self, value: &Value) -> ToolParserResult<Vec<ToolCall>> {
+    fn parse_json_value(&self, value: &Value) -> ParserResult<Vec<ToolCall>> {
        let mut tools = Vec::new();

        match value {
@@ -184,11 +184,11 @@ impl Default for JsonParser {

 #[async_trait]
 impl ToolParser for JsonParser {
-    async fn parse_complete(&self, text: &str) -> ToolParserResult<(String, Vec<ToolCall>)> {
+    async fn parse_complete(&self, text: &str) -> ParserResult<(String, Vec<ToolCall>)> {
        // Always use extract_json_from_text to handle both pure JSON and mixed content
        if let Some((extracted_json, normal_text)) = self.extract_json_from_text(text) {
            let parsed = serde_json::from_str::<Value>(&extracted_json)
-                .map_err(|e| ToolParserError::ParsingFailed(e.to_string()))
+                .map_err(|e| ParserError::ParsingFailed(e.to_string()))
                .and_then(|v| self.parse_json_value(&v));

            match parsed {
@@ -205,7 +205,7 @@ impl ToolParser for JsonParser {
        &mut self,
        chunk: &str,
        tools: &[Tool],
-    ) -> ToolParserResult<StreamingParseResult> {
+    ) -> ParserResult<StreamingParseResult> {
        // Append new text to buffer
        self.buffer.push_str(chunk);
        let current_text = &self.buffer.clone();
@@ -264,4 +264,14 @@ impl ToolParser for JsonParser {
    fn get_unstreamed_tool_args(&self) -> Option<Vec<ToolCallItem>> {
        helpers::get_unstreamed_args(&self.prev_tool_call_arr, &self.streamed_args_for_tool)
    }
+
+    fn reset(&mut self) {
+        helpers::reset_parser_state(
+            &mut self.buffer,
+            &mut self.prev_tool_call_arr,
+            &mut self.current_tool_id,
+            &mut self.current_tool_name_sent,
+            &mut self.streamed_args_for_tool,
+        );
+    }
 }
--- a/sgl-router/src/tool_parser/parsers/kimik2_parser.rs
+++ b/sgl-router/src/tool_parser/parsers/kimik2_parser.rs
@@ -5,7 +5,7 @@ use serde_json::Value;
 use crate::protocols::spec::Tool;

 use crate::tool_parser::{
-    errors::ToolParserResult,
+    errors::ParserResult,
    parsers::helpers,
    traits::ToolParser,
    types::{FunctionCall, StreamingParseResult, ToolCall, ToolCallItem},
@@ -102,7 +102,7 @@ impl Default for KimiK2Parser {

 #[async_trait]
 impl ToolParser for KimiK2Parser {
-    async fn parse_complete(&self, text: &str) -> ToolParserResult<(String, Vec<ToolCall>)> {
+    async fn parse_complete(&self, text: &str) -> ParserResult<(String, Vec<ToolCall>)> {
        if !self.has_tool_markers(text) {
            return Ok((text.to_string(), vec![]));
        }
@@ -161,7 +161,7 @@ impl ToolParser for KimiK2Parser {
        &mut self,
        chunk: &str,
        tools: &[Tool],
-    ) -> ToolParserResult<StreamingParseResult> {
+    ) -> ParserResult<StreamingParseResult> {
        self.buffer.push_str(chunk);
        let current_text = &self.buffer.clone();

@@ -333,4 +333,13 @@ impl ToolParser for KimiK2Parser {
    fn get_unstreamed_tool_args(&self) -> Option<Vec<ToolCallItem>> {
        helpers::get_unstreamed_args(&self.prev_tool_call_arr, &self.streamed_args_for_tool)
    }
+
+    fn reset(&mut self) {
+        self.buffer.clear();
+        self.prev_tool_call_arr.clear();
+        self.current_tool_id = -1;
+        self.current_tool_name_sent = false;
+        self.streamed_args_for_tool.clear();
+        self.last_arguments.clear();
+    }
 }
--- a/sgl-router/src/tool_parser/parsers/llama_parser.rs
+++ b/sgl-router/src/tool_parser/parsers/llama_parser.rs
@@ -4,7 +4,7 @@ use serde_json::Value;
 use crate::protocols::spec::Tool;

 use crate::tool_parser::{
-    errors::{ToolParserError, ToolParserResult},
+    errors::{ParserError, ParserResult},
    parsers::helpers,
    partial_json::PartialJson,
    traits::ToolParser,
@@ -70,7 +70,7 @@ impl LlamaParser {
    }

    /// Parse a single JSON object into a ToolCall (Llama format: name + parameters)
-    fn parse_single_object(&self, obj: &Value) -> ToolParserResult<Option<ToolCall>> {
+    fn parse_single_object(&self, obj: &Value) -> ParserResult<Option<ToolCall>> {
        // Llama format only: {"name": "function_name", "parameters": {...}}
        let name = obj.get("name").and_then(|v| v.as_str());

@@ -81,7 +81,7 @@ impl LlamaParser {

            // Convert parameters to JSON string
            let arguments = serde_json::to_string(parameters)
-                .map_err(|e| ToolParserError::ParsingFailed(e.to_string()))?;
+                .map_err(|e| ParserError::ParsingFailed(e.to_string()))?;

            Ok(Some(ToolCall {
                function: FunctionCall {
@@ -95,7 +95,7 @@ impl LlamaParser {
    }

    /// Parse semicolon-separated JSON objects
-    fn parse_semicolon_separated(&self, content: &str) -> ToolParserResult<Vec<ToolCall>> {
+    fn parse_semicolon_separated(&self, content: &str) -> ParserResult<Vec<ToolCall>> {
        let mut all_tools = Vec::new();

        // Split by semicolon and parse each JSON object
@@ -131,7 +131,7 @@ impl Default for LlamaParser {

 #[async_trait]
 impl ToolParser for LlamaParser {
-    async fn parse_complete(&self, text: &str) -> ToolParserResult<(String, Vec<ToolCall>)> {
+    async fn parse_complete(&self, text: &str) -> ParserResult<(String, Vec<ToolCall>)> {
        // Extract normal text and JSON content
        let (normal_text, json_content) =
            if let Some((normal, json)) = self.extract_content_after_python_tag(text) {
@@ -149,7 +149,7 @@ impl ToolParser for LlamaParser {
        } else {
            // Try single JSON object
            let parsed = serde_json::from_str::<Value>(json_content.trim())
-                .map_err(|e| ToolParserError::ParsingFailed(e.to_string()))
+                .map_err(|e| ParserError::ParsingFailed(e.to_string()))
                .and_then(|v| {
                    self.parse_single_object(&v)
                        .map(|opt| opt.map_or_else(Vec::new, |tool| vec![tool]))
@@ -173,7 +173,7 @@ impl ToolParser for LlamaParser {
        &mut self,
        chunk: &str,
        tools: &[Tool],
-    ) -> ToolParserResult<StreamingParseResult> {
+    ) -> ParserResult<StreamingParseResult> {
        // Append new text to buffer
        self.buffer.push_str(chunk);
        let current_text = &self.buffer.clone();
@@ -231,4 +231,14 @@ impl ToolParser for LlamaParser {
    fn get_unstreamed_tool_args(&self) -> Option<Vec<crate::tool_parser::types::ToolCallItem>> {
        helpers::get_unstreamed_args(&self.prev_tool_call_arr, &self.streamed_args_for_tool)
    }
+
+    fn reset(&mut self) {
+        helpers::reset_parser_state(
+            &mut self.buffer,
+            &mut self.prev_tool_call_arr,
+            &mut self.current_tool_id,
+            &mut self.current_tool_name_sent,
+            &mut self.streamed_args_for_tool,
+        );
+    }
 }
--- a/sgl-router/src/tool_parser/parsers/mistral_parser.rs
+++ b/sgl-router/src/tool_parser/parsers/mistral_parser.rs
@@ -4,7 +4,7 @@ use serde_json::Value;
 use crate::protocols::spec::Tool;

 use crate::tool_parser::{
-    errors::{ToolParserError, ToolParserResult},
+    errors::{ParserError, ParserResult},
    parsers::helpers,
    partial_json::PartialJson,
    traits::ToolParser,
@@ -111,9 +111,9 @@ impl MistralParser {
    }

    /// Parse tool calls from a JSON array
-    fn parse_json_array(&self, json_str: &str) -> ToolParserResult<Vec<ToolCall>> {
+    fn parse_json_array(&self, json_str: &str) -> ParserResult<Vec<ToolCall>> {
        let value: Value = serde_json::from_str(json_str)
-            .map_err(|e| ToolParserError::ParsingFailed(e.to_string()))?;
+            .map_err(|e| ParserError::ParsingFailed(e.to_string()))?;

        let mut tools = Vec::new();

@@ -134,7 +134,7 @@ impl MistralParser {
    }

    /// Parse a single JSON object into a ToolCall
-    fn parse_single_object(&self, obj: &Value) -> ToolParserResult<Option<ToolCall>> {
+    fn parse_single_object(&self, obj: &Value) -> ParserResult<Option<ToolCall>> {
        let name = obj.get("name").and_then(|v| v.as_str());

        if let Some(name) = name {
@@ -144,7 +144,7 @@ impl MistralParser {

            // Convert arguments to JSON string
            let arguments = serde_json::to_string(args)
-                .map_err(|e| ToolParserError::ParsingFailed(e.to_string()))?;
+                .map_err(|e| ParserError::ParsingFailed(e.to_string()))?;

            Ok(Some(ToolCall {
                function: FunctionCall {
@@ -166,7 +166,7 @@ impl Default for MistralParser {

 #[async_trait]
 impl ToolParser for MistralParser {
-    async fn parse_complete(&self, text: &str) -> ToolParserResult<(String, Vec<ToolCall>)> {
+    async fn parse_complete(&self, text: &str) -> ParserResult<(String, Vec<ToolCall>)> {
        // Check if text contains Mistral format
        if !self.has_tool_markers(text) {
            return Ok((text.to_string(), vec![]));
@@ -199,7 +199,7 @@ impl ToolParser for MistralParser {
        &mut self,
        chunk: &str,
        tools: &[Tool],
-    ) -> ToolParserResult<StreamingParseResult> {
+    ) -> ParserResult<StreamingParseResult> {
        // Append new text to buffer
        self.buffer.push_str(chunk);
        let current_text = &self.buffer.clone();
@@ -256,4 +256,14 @@ impl ToolParser for MistralParser {
    fn get_unstreamed_tool_args(&self) -> Option<Vec<crate::tool_parser::types::ToolCallItem>> {
        helpers::get_unstreamed_args(&self.prev_tool_call_arr, &self.streamed_args_for_tool)
    }
+
+    fn reset(&mut self) {
+        helpers::reset_parser_state(
+            &mut self.buffer,
+            &mut self.prev_tool_call_arr,
+            &mut self.current_tool_id,
+            &mut self.current_tool_name_sent,
+            &mut self.streamed_args_for_tool,
+        );
+    }
 }
--- a/sgl-router/src/tool_parser/parsers/pythonic_parser.rs
+++ b/sgl-router/src/tool_parser/parsers/pythonic_parser.rs
@@ -18,7 +18,7 @@ use std::sync::OnceLock;
 use crate::protocols::spec::Tool;

 use crate::tool_parser::{
-    errors::{ToolParserError, ToolParserResult},
+    errors::{ParserError, ParserResult},
    parsers::helpers,
    traits::ToolParser,
    types::{FunctionCall, StreamingParseResult, ToolCall, ToolCallItem},
@@ -74,7 +74,7 @@ impl PythonicParser {
            .replace("<|python_end|>", "")
    }

-    fn parse_tool_call_block(&self, block: &str) -> ToolParserResult<Vec<ToolCall>> {
+    fn parse_tool_call_block(&self, block: &str) -> ParserResult<Vec<ToolCall>> {
        let expr = parse_python_expression(block)?;
        match expr {
            Expr::List(list_expr) => list_expr
@@ -83,7 +83,7 @@ impl PythonicParser {
                .enumerate()
                .map(|(idx, call_expr)| build_tool_call(call_expr, idx))
                .collect(),
-            _ => Err(ToolParserError::ParsingFailed(
+            _ => Err(ParserError::ParsingFailed(
                "Expected a list of function calls in pythonic tool call".to_string(),
            )),
        }
@@ -92,7 +92,7 @@ impl PythonicParser {

 #[async_trait]
 impl ToolParser for PythonicParser {
-    async fn parse_complete(&self, text: &str) -> ToolParserResult<(String, Vec<ToolCall>)> {
+    async fn parse_complete(&self, text: &str) -> ParserResult<(String, Vec<ToolCall>)> {
        let cleaned = Self::strip_special_tokens(text);

        if let Some((tool_calls_text, normal_text)) = self.extract_tool_calls(&cleaned) {
@@ -120,7 +120,7 @@ impl ToolParser for PythonicParser {
        &mut self,
        chunk: &str,
        tools: &[Tool],
-    ) -> ToolParserResult<StreamingParseResult> {
+    ) -> ParserResult<StreamingParseResult> {
        self.buffer.push_str(chunk);

        let cleaned = Self::strip_special_tokens(&self.buffer);
@@ -232,23 +232,23 @@ fn find_matching_bracket(buffer: &str, start: usize) -> Option<usize> {
    None // No matching bracket found
 }

-fn parse_python_expression(source: &str) -> ToolParserResult<Expr> {
+fn parse_python_expression(source: &str) -> ParserResult<Expr> {
    let module = parse(source, Mode::Expression, "<pythonic_tool_call>")
-        .map_err(|err| ToolParserError::ParsingFailed(err.to_string()))?;
+        .map_err(|err| ParserError::ParsingFailed(err.to_string()))?;

    match module {
        Mod::Expression(expr_mod) => Ok(*expr_mod.body),
-        _ => Err(ToolParserError::ParsingFailed(
+        _ => Err(ParserError::ParsingFailed(
            "Expected a Python expression".to_string(),
        )),
    }
 }

-fn build_tool_call(expr: Expr, _index: usize) -> ToolParserResult<ToolCall> {
+fn build_tool_call(expr: Expr, _index: usize) -> ParserResult<ToolCall> {
    match expr {
        Expr::Call(call_expr) => {
            if !call_expr.args.is_empty() {
-                return Err(ToolParserError::ParsingFailed(
+                return Err(ParserError::ParsingFailed(
                    "Positional arguments are not supported in pythonic tool calls".to_string(),
                ));
            }
@@ -256,7 +256,7 @@ fn build_tool_call(expr: Expr, _index: usize) -> ToolParserResult<ToolCall> {
            let function_name = match *call_expr.func {
                Expr::Name(name_expr) => name_expr.id.to_string(),
                _ => {
-                    return Err(ToolParserError::ParsingFailed(
+                    return Err(ParserError::ParsingFailed(
                        "Unsupported function reference in pythonic tool call".to_string(),
                    ))
                }
@@ -265,7 +265,7 @@ fn build_tool_call(expr: Expr, _index: usize) -> ToolParserResult<ToolCall> {
            let mut arguments_map = Map::with_capacity(call_expr.keywords.len());
            for keyword in call_expr.keywords {
                let arg_name = keyword.arg.ok_or_else(|| {
-                    ToolParserError::ParsingFailed(
+                    ParserError::ParsingFailed(
                        "pythonic tool calls do not support **kwargs".to_string(),
                    )
                })?;
@@ -283,13 +283,13 @@ fn build_tool_call(expr: Expr, _index: usize) -> ToolParserResult<ToolCall> {
                },
            })
        }
-        _ => Err(ToolParserError::ParsingFailed(
+        _ => Err(ParserError::ParsingFailed(
            "Expected function calls inside pythonic tool call list".to_string(),
        )),
    }
 }

-fn expression_to_json(expr: &Expr) -> ToolParserResult<Value> {
+fn expression_to_json(expr: &Expr) -> ParserResult<Value> {
    match expr {
        Expr::Constant(expr_constant) => constant_to_json(&expr_constant.value),
        Expr::List(list_expr) => collect_sequence(&list_expr.elts).map(Value::Array),
@@ -300,81 +300,75 @@ fn expression_to_json(expr: &Expr) -> ToolParserResult<Value> {
        Expr::UnaryOp(unary_expr) => match unary_expr.op {
            UnaryOp::USub => match unary_expr.operand.as_ref() {
                Expr::Constant(const_expr) => negate_constant(&const_expr.value),
-                _ => Err(ToolParserError::ParsingFailed(
+                _ => Err(ParserError::ParsingFailed(
                    "Unsupported unary operand in pythonic tool call".to_string(),
                )),
            },
            UnaryOp::UAdd => expression_to_json(unary_expr.operand.as_ref()),
-            _ => Err(ToolParserError::ParsingFailed(format!(
+            _ => Err(ParserError::ParsingFailed(format!(
                "Unsupported unary operator in pythonic tool call: {:?}",
                unary_expr.op
            ))),
        },
        Expr::Name(name_expr) => Ok(Value::String(name_expr.id.to_string())),
-        _ => Err(ToolParserError::ParsingFailed(format!(
+        _ => Err(ParserError::ParsingFailed(format!(
            "Unsupported expression in pythonic tool call: {:?}",
            expr
        ))),
    }
 }

-fn constant_to_json(constant: &Constant) -> ToolParserResult<Value> {
+fn constant_to_json(constant: &Constant) -> ParserResult<Value> {
    match constant {
        Constant::None => Ok(Value::Null),
        Constant::Bool(b) => Ok(Value::Bool(*b)),
        Constant::Int(value) => Ok(integer_constant_to_value(value, false)),
        Constant::Float(f) => Number::from_f64(*f).map(Value::Number).ok_or_else(|| {
-            ToolParserError::ParsingFailed(
-                "Invalid float literal in pythonic tool call".to_string(),
-            )
+            ParserError::ParsingFailed("Invalid float literal in pythonic tool call".to_string())
        }),
        Constant::Str(s) => Ok(Value::String(s.clone())),
        Constant::Bytes(bytes) => Ok(Value::String(String::from_utf8_lossy(bytes).into_owned())),
        Constant::Tuple(values) => constant_tuple_to_array(values).map(Value::Array),
-        Constant::Ellipsis | Constant::Complex { .. } => Err(ToolParserError::ParsingFailed(
+        Constant::Ellipsis | Constant::Complex { .. } => Err(ParserError::ParsingFailed(
            "Unsupported literal in pythonic tool call".to_string(),
        )),
    }
 }

-fn negate_constant(constant: &Constant) -> ToolParserResult<Value> {
+fn negate_constant(constant: &Constant) -> ParserResult<Value> {
    match constant {
        Constant::Int(value) => Ok(integer_constant_to_value(value, true)),
        Constant::Float(f) => Number::from_f64(-f).map(Value::Number).ok_or_else(|| {
-            ToolParserError::ParsingFailed(
-                "Invalid float literal in pythonic tool call".to_string(),
-            )
+            ParserError::ParsingFailed("Invalid float literal in pythonic tool call".to_string())
        }),
-        _ => Err(ToolParserError::ParsingFailed(
+        _ => Err(ParserError::ParsingFailed(
            "Unsupported unary operand in pythonic tool call".to_string(),
        )),
    }
 }

-fn value_to_key_string(value: Value) -> ToolParserResult<String> {
+fn value_to_key_string(value: Value) -> ParserResult<String> {
    match value {
        Value::String(s) => Ok(s),
        Value::Number(num) => Ok(num.to_string()),
        Value::Bool(b) => Ok(b.to_string()),
        Value::Null => Ok("null".to_string()),
-        other => Err(ToolParserError::ParsingFailed(format!(
+        other => Err(ParserError::ParsingFailed(format!(
            "Unsupported key type in pythonic tool call: {:?}",
            other
        ))),
    }
 }

-fn collect_sequence(elements: &[Expr]) -> ToolParserResult<Vec<Value>> {
+fn collect_sequence(elements: &[Expr]) -> ParserResult<Vec<Value>> {
    elements.iter().map(expression_to_json).collect()
 }

-fn collect_dict(keys: &[Option<Expr>], values: &[Expr]) -> ToolParserResult<Map<String, Value>> {
+fn collect_dict(keys: &[Option<Expr>], values: &[Expr]) -> ParserResult<Map<String, Value>> {
    let mut map = Map::with_capacity(keys.len());
    for (key_expr, value_expr) in keys.iter().zip(values.iter()) {
        let key_expr = key_expr.as_ref().ok_or_else(|| {
-            ToolParserError::ParsingFailed(
-                "pythonic tool calls do not support **kwargs".to_string(),
-            )
+            ParserError::ParsingFailed("pythonic tool calls do not support **kwargs".to_string())
        })?;
        let key_value = expression_to_json(key_expr)?;
        let key = value_to_key_string(key_value)?;
@@ -384,7 +378,7 @@ fn collect_dict(keys: &[Option<Expr>], values: &[Expr]) -> ToolParserResult<Map<
    Ok(map)
 }

-fn constant_tuple_to_array(values: &[Constant]) -> ToolParserResult<Vec<Value>> {
+fn constant_tuple_to_array(values: &[Constant]) -> ParserResult<Vec<Value>> {
    values.iter().map(constant_to_json).collect()
 }

--- a/sgl-router/src/tool_parser/parsers/qwen_parser.rs
+++ b/sgl-router/src/tool_parser/parsers/qwen_parser.rs
@@ -5,7 +5,7 @@ use serde_json::Value;
 use crate::protocols::spec::Tool;

 use crate::tool_parser::{
-    errors::{ToolParserError, ToolParserResult},
+    errors::{ParserError, ParserResult},
    parsers::helpers,
    partial_json::PartialJson,
    traits::ToolParser,
@@ -76,7 +76,7 @@ impl QwenParser {
    }

    /// Parse a single JSON object into a ToolCall
-    fn parse_single_object(&self, obj: &Value) -> ToolParserResult<Option<ToolCall>> {
+    fn parse_single_object(&self, obj: &Value) -> ParserResult<Option<ToolCall>> {
        let name = obj.get("name").and_then(|v| v.as_str());

        if let Some(name) = name {
@@ -86,7 +86,7 @@ impl QwenParser {

            // Convert arguments to JSON string
            let arguments = serde_json::to_string(args)
-                .map_err(|e| ToolParserError::ParsingFailed(e.to_string()))?;
+                .map_err(|e| ParserError::ParsingFailed(e.to_string()))?;

            Ok(Some(ToolCall {
                function: FunctionCall {
@@ -108,7 +108,7 @@ impl Default for QwenParser {

 #[async_trait]
 impl ToolParser for QwenParser {
-    async fn parse_complete(&self, text: &str) -> ToolParserResult<(String, Vec<ToolCall>)> {
+    async fn parse_complete(&self, text: &str) -> ParserResult<(String, Vec<ToolCall>)> {
        // Check if text contains Qwen format
        if !self.has_tool_markers(text) {
            return Ok((text.to_string(), vec![]));
@@ -123,7 +123,7 @@ impl ToolParser for QwenParser {
        for captures in self.extractor.captures_iter(text) {
            if let Some(json_str) = captures.get(1) {
                let parsed = serde_json::from_str::<Value>(json_str.as_str().trim())
-                    .map_err(|e| ToolParserError::ParsingFailed(e.to_string()))
+                    .map_err(|e| ParserError::ParsingFailed(e.to_string()))
                    .and_then(|v| self.parse_single_object(&v));

                match parsed {
@@ -149,7 +149,7 @@ impl ToolParser for QwenParser {
        &mut self,
        chunk: &str,
        tools: &[Tool],
-    ) -> ToolParserResult<StreamingParseResult> {
+    ) -> ParserResult<StreamingParseResult> {
        // Append new text to buffer
        self.buffer.push_str(chunk);
        let current_text = &self.buffer.clone();
@@ -240,4 +240,14 @@ impl ToolParser for QwenParser {
    fn get_unstreamed_tool_args(&self) -> Option<Vec<crate::tool_parser::types::ToolCallItem>> {
        helpers::get_unstreamed_args(&self.prev_tool_call_arr, &self.streamed_args_for_tool)
    }
+
+    fn reset(&mut self) {
+        helpers::reset_parser_state(
+            &mut self.buffer,
+            &mut self.prev_tool_call_arr,
+            &mut self.current_tool_id,
+            &mut self.current_tool_name_sent,
+            &mut self.streamed_args_for_tool,
+        );
+    }
 }
--- a/sgl-router/src/tool_parser/parsers/step3_parser.rs
+++ b/sgl-router/src/tool_parser/parsers/step3_parser.rs
@@ -6,7 +6,7 @@ use std::collections::HashMap;
 use crate::protocols::spec::Tool;

 use crate::tool_parser::{
-    errors::{ToolParserError, ToolParserResult},
+    errors::{ParserError, ParserResult},
    parsers::helpers,
    traits::ToolParser,
    types::{FunctionCall, StreamingParseResult, ToolCall, ToolCallItem},
@@ -108,7 +108,7 @@ impl Step3Parser {
    fn parse_partial_tool_call(
        &mut self,
        tool_indices: &HashMap<String, usize>,
-    ) -> ToolParserResult<StreamingParseResult> {
+    ) -> ParserResult<StreamingParseResult> {
        let mut calls = Vec::new();

        // Check if we have tool_sep (means we're past the type declaration)
@@ -321,7 +321,7 @@ impl Step3Parser {
    fn parse_steptml_parameters(
        &self,
        params_text: &str,
-    ) -> ToolParserResult<serde_json::Map<String, Value>> {
+    ) -> ParserResult<serde_json::Map<String, Value>> {
        let mut parameters = serde_json::Map::new();

        for capture in self.param_extractor.captures_iter(params_text) {
@@ -359,7 +359,7 @@ impl Step3Parser {
    }

    /// Parse a single tool call block
-    fn parse_tool_call(&self, block: &str) -> ToolParserResult<Option<ToolCall>> {
+    fn parse_tool_call(&self, block: &str) -> ParserResult<Option<ToolCall>> {
        // Check if it contains function marker and tool separator
        if !block.contains("function") || !block.contains("<｜tool_sep｜>") {
            return Ok(None);
@@ -393,7 +393,7 @@ impl Step3Parser {
            let parameters = self.parse_steptml_parameters(params_text)?;

            let arguments_str = serde_json::to_string(&parameters)
-                .map_err(|e| ToolParserError::ParsingFailed(e.to_string()))?;
+                .map_err(|e| ParserError::ParsingFailed(e.to_string()))?;

            Ok(Some(ToolCall {
                function: FunctionCall {
@@ -415,7 +415,7 @@ impl Default for Step3Parser {

 #[async_trait]
 impl ToolParser for Step3Parser {
-    async fn parse_complete(&self, text: &str) -> ToolParserResult<(String, Vec<ToolCall>)> {
+    async fn parse_complete(&self, text: &str) -> ParserResult<(String, Vec<ToolCall>)> {
        if !self.has_tool_markers(text) {
            return Ok((text.to_string(), vec![]));
        }
@@ -449,7 +449,7 @@ impl ToolParser for Step3Parser {
        &mut self,
        chunk: &str,
        tools: &[Tool],
-    ) -> ToolParserResult<StreamingParseResult> {
+    ) -> ParserResult<StreamingParseResult> {
        self.buffer.push_str(chunk);

        // Build tool indices for validation
@@ -555,4 +555,20 @@ impl ToolParser for Step3Parser {
    fn get_unstreamed_tool_args(&self) -> Option<Vec<ToolCallItem>> {
        helpers::get_unstreamed_args(&self.prev_tool_call_arr, &self.streamed_args_for_tool)
    }
+
+    fn reset(&mut self) {
+        // Reset standard state
+        self.buffer.clear();
+        self.prev_tool_call_arr.clear();
+        self.current_tool_id = -1;
+        self.streamed_args_for_tool.clear();
+
+        // Reset Step3-specific fields
+        self.in_tool_block = false;
+        self.tool_block_finished = false;
+        self.current_function_name.clear();
+        self.current_parameters.clear();
+        self.in_tool_call = false;
+        self.function_name_sent = false;
+    }
 }