[router][tool call] Clean up redundant detect_format and has_tool_markers (#11270)

This commit is contained in:
Chang Su
2025-10-06 14:04:02 -07:00
committed by GitHub
parent 155cbb51f0
commit 466992b2d0
25 changed files with 73 additions and 114 deletions

View File

@@ -1859,7 +1859,7 @@ impl GrpcPDRouter {
// Check format detection first
let can_parse = {
let parser = pooled_parser.lock().await;
parser.detect_format(processed_text)
parser.has_tool_markers(processed_text)
// Lock is dropped here
};

View File

@@ -306,7 +306,7 @@ impl GrpcRouter {
// Check format detection first
let can_parse = {
let parser = pooled_parser.lock().await;
parser.detect_format(processed_text)
parser.has_tool_markers(processed_text)
// Lock is dropped here
};

View File

@@ -77,11 +77,6 @@ impl DeepSeekParser {
}
}
/// Check if text contains DeepSeek tool markers
fn has_tool_markers(&self, text: &str) -> bool {
text.contains("<tool▁calls▁begin>")
}
/// Parse a single tool call block - throws error if parsing fails
fn parse_tool_call(&self, block: &str) -> ToolParserResult<ToolCall> {
let captures = self.func_detail_extractor.captures(block).ok_or_else(|| {
@@ -312,8 +307,8 @@ impl ToolParser for DeepSeekParser {
})
}
fn detect_format(&self, text: &str) -> bool {
self.has_tool_markers(text)
fn has_tool_markers(&self, text: &str) -> bool {
text.contains("<tool▁calls▁begin>")
}
fn get_unstreamed_tool_args(&self) -> Option<Vec<ToolCallItem>> {

View File

@@ -71,11 +71,6 @@ impl Glm4MoeParser {
}
}
/// Check if text contains GLM-4 MoE tool markers
fn has_tool_markers(&self, text: &str) -> bool {
text.contains(self.bot_token)
}
/// Parse arguments from key-value pairs
fn parse_arguments(&self, args_text: &str) -> ToolParserResult<serde_json::Map<String, Value>> {
let mut arguments = serde_json::Map::new();
@@ -313,8 +308,8 @@ impl ToolParser for Glm4MoeParser {
})
}
fn detect_format(&self, text: &str) -> bool {
self.has_tool_markers(text)
fn has_tool_markers(&self, text: &str) -> bool {
text.contains(self.bot_token)
}
fn get_unstreamed_tool_args(&self) -> Option<Vec<ToolCallItem>> {

View File

@@ -38,7 +38,7 @@ impl ToolParser for GptOssHarmonyParser {
Ok(StreamingParseResult::default())
}
fn detect_format(&self, text: &str) -> bool {
fn has_tool_markers(&self, text: &str) -> bool {
// Reuse the legacy heuristics for now; this will be replaced with Harmony-specific
// start-token detection when the parser is fully implemented.
text.contains("<|channel|>commentary")

View File

@@ -58,11 +58,6 @@ impl GptOssParser {
}
}
/// Check if text contains GPT-OSS tool markers
fn has_tool_markers(&self, text: &str) -> bool {
text.contains("<|channel|>commentary to=")
}
/// Extract function name from full namespace (e.g., "functions.get_weather" -> "get_weather")
fn extract_function_name(&self, full_name: &str) -> String {
if let Some(dot_pos) = full_name.rfind('.') {
@@ -242,7 +237,7 @@ impl ToolParser for GptOssParser {
Ok(StreamingParseResult::default())
}
fn detect_format(&self, text: &str) -> bool {
self.has_tool_markers(text) || text.contains("<|channel|>commentary")
fn has_tool_markers(&self, text: &str) -> bool {
text.contains("<|channel|>commentary")
}
}

View File

@@ -261,7 +261,7 @@ impl ToolParser for JsonParser {
)
}
fn detect_format(&self, text: &str) -> bool {
fn has_tool_markers(&self, text: &str) -> bool {
let trimmed = text.trim();
(trimmed.starts_with('[') || trimmed.starts_with('{')) && trimmed.contains(r#""name""#)
}

View File

@@ -82,11 +82,6 @@ impl KimiK2Parser {
}
}
/// Check if text contains Kimi K2 tool markers
fn has_tool_markers(&self, text: &str) -> bool {
text.contains("<|tool_calls_section_begin|>")
}
/// Parse function ID to extract name and index
fn parse_function_id(&self, id: &str) -> Option<(String, usize)> {
if let Some(captures) = self.tool_call_id_regex.captures(id) {
@@ -331,8 +326,8 @@ impl ToolParser for KimiK2Parser {
})
}
fn detect_format(&self, text: &str) -> bool {
self.has_tool_markers(text) || text.contains("<|tool_call_begin|>")
fn has_tool_markers(&self, text: &str) -> bool {
text.contains("<|tool_calls_section_begin|>")
}
fn get_unstreamed_tool_args(&self) -> Option<Vec<ToolCallItem>> {

View File

@@ -228,7 +228,7 @@ impl ToolParser for LlamaParser {
)
}
fn detect_format(&self, text: &str) -> bool {
fn has_tool_markers(&self, text: &str) -> bool {
// Llama format if contains python_tag or starts with JSON object
text.contains("<|python_tag|>")
|| (text.trim_start().starts_with('{') && text.contains(r#""name""#))

View File

@@ -156,11 +156,6 @@ impl MistralParser {
Ok(None)
}
}
/// Check if text contains Mistral tool markers
fn has_tool_markers(&self, text: &str) -> bool {
text.contains("[TOOL_CALLS]")
}
}
impl Default for MistralParser {
@@ -254,8 +249,8 @@ impl ToolParser for MistralParser {
)
}
fn detect_format(&self, text: &str) -> bool {
self.has_tool_markers(text)
fn has_tool_markers(&self, text: &str) -> bool {
text.contains("[TOOL_CALLS]")
}
fn get_unstreamed_tool_args(&self) -> Option<Vec<crate::tool_parser::types::ToolCallItem>> {

View File

@@ -203,7 +203,7 @@ impl ToolParser for PythonicParser {
})
}
fn detect_format(&self, text: &str) -> bool {
fn has_tool_markers(&self, text: &str) -> bool {
let cleaned = Self::strip_special_tokens(text);
if pythonic_block_regex().is_match(&cleaned) {
return true;

View File

@@ -98,16 +98,6 @@ impl QwenParser {
Ok(None)
}
}
/// Check if text contains Qwen tool markers
fn has_tool_markers(&self, text: &str) -> bool {
text.contains("<tool_call>")
}
/// Check if text has tool call
fn has_tool_call(&self, text: &str) -> bool {
text.contains("<tool_call>")
}
}
impl Default for QwenParser {
@@ -165,7 +155,7 @@ impl ToolParser for QwenParser {
let current_text = &self.buffer.clone();
// Check if current_text has tool_call
let has_tool_start = self.has_tool_call(current_text)
let has_tool_start = self.has_tool_markers(current_text)
|| (self.current_tool_id >= 0 && current_text.starts_with(self.tool_call_separator));
if !has_tool_start {
@@ -243,8 +233,8 @@ impl ToolParser for QwenParser {
Ok(result)
}
fn detect_format(&self, text: &str) -> bool {
self.has_tool_markers(text)
fn has_tool_markers(&self, text: &str) -> bool {
text.contains("<tool_call>")
}
fn get_unstreamed_tool_args(&self) -> Option<Vec<crate::tool_parser::types::ToolCallItem>> {

View File

@@ -96,11 +96,6 @@ impl Step3Parser {
}
}
/// Check if text contains Step3 tool markers
fn has_tool_markers(&self, text: &str) -> bool {
text.contains(self.bot_token)
}
/// Reset streaming state for the next tool call
fn reset_streaming_state(&mut self) {
self.in_tool_call = false;
@@ -553,8 +548,8 @@ impl ToolParser for Step3Parser {
Ok(StreamingParseResult::default())
}
fn detect_format(&self, text: &str) -> bool {
self.has_tool_markers(text)
fn has_tool_markers(&self, text: &str) -> bool {
text.contains(self.bot_token)
}
fn get_unstreamed_tool_args(&self) -> Option<Vec<ToolCallItem>> {

View File

@@ -12,7 +12,7 @@ async fn test_tool_parser_factory() {
// Test that we can get a pooled parser
let pooled_parser = factory.get_pooled("gpt-4");
let parser = pooled_parser.lock().await;
assert!(parser.detect_format(r#"{"name": "test", "arguments": {}}"#));
assert!(parser.has_tool_markers(r#"{"name": "test", "arguments": {}}"#));
}
#[tokio::test]
@@ -25,7 +25,7 @@ async fn test_tool_parser_factory_model_mapping() {
// Get parser for the test model
let pooled_parser = factory.get_pooled("test-model");
let parser = pooled_parser.lock().await;
assert!(parser.detect_format(r#"{"name": "test", "arguments": {}}"#));
assert!(parser.has_tool_markers(r#"{"name": "test", "arguments": {}}"#));
}
#[test]
@@ -234,12 +234,12 @@ fn test_json_parser_format_detection() {
let parser = JsonParser::new();
// Should detect valid tool call formats
assert!(parser.detect_format(r#"{"name": "test", "arguments": {}}"#));
assert!(parser.detect_format(r#"{"name": "test", "parameters": {"x": 1}}"#));
assert!(parser.detect_format(r#"[{"name": "test"}]"#));
assert!(parser.has_tool_markers(r#"{"name": "test", "arguments": {}}"#));
assert!(parser.has_tool_markers(r#"{"name": "test", "parameters": {"x": 1}}"#));
assert!(parser.has_tool_markers(r#"[{"name": "test"}]"#));
// Should not detect non-tool formats
assert!(!parser.detect_format("plain text"));
assert!(!parser.has_tool_markers("plain text"));
}
#[tokio::test]

View File

@@ -25,7 +25,7 @@ pub trait ToolParser: Send + Sync {
) -> ToolParserResult<StreamingParseResult>;
/// Check if text contains tool calls in this parser's format
fn detect_format(&self, text: &str) -> bool;
fn has_tool_markers(&self, text: &str) -> bool;
/// Optionally expose a token-aware parser implementation.
/// Default returns `None`, meaning the parser only supports text input.

View File

@@ -108,13 +108,13 @@ fn test_deepseek_format_detection() {
let parser = DeepSeekParser::new();
// Should detect DeepSeek format
assert!(parser.detect_format("<tool▁calls▁begin>"));
assert!(parser.detect_format("text with <tool▁calls▁begin> marker"));
assert!(parser.has_tool_markers("<tool▁calls▁begin>"));
assert!(parser.has_tool_markers("text with <tool▁calls▁begin> marker"));
// Should not detect other formats
assert!(!parser.detect_format("[TOOL_CALLS]"));
assert!(!parser.detect_format("<tool_call>"));
assert!(!parser.detect_format("plain text"));
assert!(!parser.has_tool_markers("[TOOL_CALLS]"));
assert!(!parser.has_tool_markers("<tool_call>"));
assert!(!parser.has_tool_markers("plain text"));
}
#[tokio::test]

View File

@@ -117,13 +117,13 @@ fn test_glm4_format_detection() {
let parser = Glm4MoeParser::new();
// Should detect GLM-4 format
assert!(parser.detect_format("<tool_call>"));
assert!(parser.detect_format("text with <tool_call> marker"));
assert!(parser.has_tool_markers("<tool_call>"));
assert!(parser.has_tool_markers("text with <tool_call> marker"));
// Should not detect other formats
assert!(!parser.detect_format("[TOOL_CALLS]"));
assert!(!parser.detect_format("<tool▁calls▁begin>"));
assert!(!parser.detect_format("plain text"));
assert!(!parser.has_tool_markers("[TOOL_CALLS]"));
assert!(!parser.has_tool_markers("<tool▁calls▁begin>"));
assert!(!parser.has_tool_markers("plain text"));
}
#[tokio::test]

View File

@@ -109,14 +109,14 @@ fn test_gpt_oss_format_detection() {
let parser = GptOssParser::new();
// Should detect GPT-OSS format
assert!(parser.detect_format("<|channel|>commentary to="));
assert!(parser.detect_format("<|channel|>commentary"));
assert!(parser.detect_format("text with <|channel|>commentary to= marker"));
assert!(parser.has_tool_markers("<|channel|>commentary to="));
assert!(parser.has_tool_markers("<|channel|>commentary"));
assert!(parser.has_tool_markers("text with <|channel|>commentary to= marker"));
// Should not detect other formats
assert!(!parser.detect_format("[TOOL_CALLS]"));
assert!(!parser.detect_format("<tool_call>"));
assert!(!parser.detect_format("plain text"));
assert!(!parser.has_tool_markers("[TOOL_CALLS]"));
assert!(!parser.has_tool_markers("<tool_call>"));
assert!(!parser.has_tool_markers("plain text"));
}
#[tokio::test]

View File

@@ -155,7 +155,7 @@ async fn test_json_invalid_format() {
async fn test_json_format_detection() {
let parser = JsonParser::new();
assert!(parser.detect_format(r#"{"name": "test", "arguments": {}}"#));
assert!(parser.detect_format(r#"[{"name": "test"}]"#));
assert!(!parser.detect_format("plain text"));
assert!(parser.has_tool_markers(r#"{"name": "test", "arguments": {}}"#));
assert!(parser.has_tool_markers(r#"[{"name": "test"}]"#));
assert!(!parser.has_tool_markers("plain text"));
}

View File

@@ -98,14 +98,13 @@ fn test_kimik2_format_detection() {
let parser = KimiK2Parser::new();
// Should detect Kimi K2 format
assert!(parser.detect_format("<|tool_calls_section_begin|>"));
assert!(parser.detect_format("<|tool_call_begin|>"));
assert!(parser.detect_format("text with <|tool_calls_section_begin|> marker"));
assert!(parser.has_tool_markers("<|tool_calls_section_begin|>"));
assert!(parser.has_tool_markers("text with <|tool_calls_section_begin|> marker"));
// Should not detect other formats
assert!(!parser.detect_format("[TOOL_CALLS]"));
assert!(!parser.detect_format("<tool_call>"));
assert!(!parser.detect_format("plain text"));
assert!(!parser.has_tool_markers("[TOOL_CALLS]"));
assert!(!parser.has_tool_markers("<tool_call>"));
assert!(!parser.has_tool_markers("plain text"));
}
#[tokio::test]

View File

@@ -116,10 +116,10 @@ async fn test_llama_empty_arguments() {
async fn test_llama_format_detection() {
let parser = LlamaParser::new();
assert!(parser.detect_format(r#"<|python_tag|>{"name": "test"}"#));
assert!(parser.detect_format(r#"{"name": "test", "parameters": {}}"#));
assert!(!parser.detect_format("plain text"));
assert!(!parser.detect_format(r#"{"key": "value"}"#)); // No name field
assert!(parser.has_tool_markers(r#"<|python_tag|>{"name": "test"}"#));
assert!(parser.has_tool_markers(r#"{"name": "test", "parameters": {}}"#));
assert!(!parser.has_tool_markers("plain text"));
assert!(!parser.has_tool_markers(r#"{"key": "value"}"#)); // No name field
}
#[tokio::test]

View File

@@ -96,10 +96,10 @@ async fn test_mistral_with_brackets_in_strings() {
async fn test_mistral_format_detection() {
let parser = MistralParser::new();
assert!(parser.detect_format("[TOOL_CALLS] ["));
assert!(parser.detect_format("Some text [TOOL_CALLS] ["));
assert!(!parser.detect_format("Just plain text"));
assert!(!parser.detect_format("[{\"name\": \"test\"}]")); // JSON array without TOOL_CALLS
assert!(parser.has_tool_markers("[TOOL_CALLS] ["));
assert!(parser.has_tool_markers("Some text [TOOL_CALLS] ["));
assert!(!parser.has_tool_markers("Just plain text"));
assert!(!parser.has_tool_markers("[{\"name\": \"test\"}]")); // JSON array without TOOL_CALLS
}
#[tokio::test]

View File

@@ -125,10 +125,10 @@ async fn test_pythonic_empty_arguments() {
async fn test_pythonic_format_detection() {
let parser = PythonicParser::new();
assert!(!parser.detect_format("[function_name(")); // Incomplete
assert!(parser.detect_format("[get_weather(city=\"NYC\")]"));
assert!(!parser.detect_format("Just plain text"));
assert!(!parser.detect_format("{\"name\": \"test\"}")); // JSON
assert!(!parser.has_tool_markers("[function_name(")); // Incomplete
assert!(parser.has_tool_markers("[get_weather(city=\"NYC\")]"));
assert!(!parser.has_tool_markers("Just plain text"));
assert!(!parser.has_tool_markers("{\"name\": \"test\"}")); // JSON
}
#[tokio::test]

View File

@@ -120,10 +120,10 @@ async fn test_qwen_with_newlines_in_strings() {
async fn test_qwen_format_detection() {
let parser = QwenParser::new();
assert!(parser.detect_format("<tool_call>"));
assert!(parser.detect_format("Some text <tool_call>\n{"));
assert!(!parser.detect_format("Just plain text"));
assert!(!parser.detect_format("{\"name\": \"test\"}")); // Plain JSON
assert!(parser.has_tool_markers("<tool_call>"));
assert!(parser.has_tool_markers("Some text <tool_call>\n{"));
assert!(!parser.has_tool_markers("Just plain text"));
assert!(!parser.has_tool_markers("{\"name\": \"test\"}")); // Plain JSON
}
#[tokio::test]

View File

@@ -111,13 +111,13 @@ fn test_step3_format_detection() {
let parser = Step3Parser::new();
// Should detect Step3 format
assert!(parser.detect_format("<tool_calls_begin>"));
assert!(parser.detect_format("text with <tool_calls_begin> marker"));
assert!(parser.has_tool_markers("<tool_calls_begin>"));
assert!(parser.has_tool_markers("text with <tool_calls_begin> marker"));
// Should not detect other formats
assert!(!parser.detect_format("[TOOL_CALLS]"));
assert!(!parser.detect_format("<tool_call>"));
assert!(!parser.detect_format("plain text"));
assert!(!parser.has_tool_markers("[TOOL_CALLS]"));
assert!(!parser.has_tool_markers("<tool_call>"));
assert!(!parser.has_tool_markers("plain text"));
}
#[tokio::test]