[router][tool parser] Modify tool parser to return both normal text and tool calls (non-stream) (#10995)

This commit is contained in:
Chang Su
2025-09-27 15:10:17 -07:00
committed by GitHub
parent f6bc3f529b
commit c1c8dd1dd0
30 changed files with 1467 additions and 934 deletions

View File

@@ -242,12 +242,12 @@ async fn test_json_parser_complete_single() {
let parser = JsonParser::new();
let input = r#"{"name": "get_weather", "arguments": {"location": "San Francisco", "units": "celsius"}}"#;
let result = parser.parse_complete(input).await.unwrap();
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "get_weather");
assert!(result[0].function.arguments.contains("San Francisco"));
assert!(result[0].function.arguments.contains("celsius"));
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "get_weather");
assert!(tools[0].function.arguments.contains("San Francisco"));
assert!(tools[0].function.arguments.contains("celsius"));
}
#[tokio::test]
@@ -259,11 +259,11 @@ async fn test_json_parser_complete_array() {
{"name": "get_news", "arguments": {"query": "technology"}}
]"#;
let result = parser.parse_complete(input).await.unwrap();
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 2);
assert_eq!(result[0].function.name, "get_weather");
assert_eq!(result[1].function.name, "get_news");
assert_eq!(tools.len(), 2);
assert_eq!(tools[0].function.name, "get_weather");
assert_eq!(tools[1].function.name, "get_news");
}
#[tokio::test]
@@ -271,13 +271,13 @@ async fn test_json_parser_with_parameters() {
let parser = JsonParser::new();
let input = r#"{"name": "calculate", "parameters": {"x": 10, "y": 20, "operation": "add"}}"#;
let result = parser.parse_complete(input).await.unwrap();
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "calculate");
assert!(result[0].function.arguments.contains("10"));
assert!(result[0].function.arguments.contains("20"));
assert!(result[0].function.arguments.contains("add"));
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "calculate");
assert!(tools[0].function.arguments.contains("10"));
assert!(tools[0].function.arguments.contains("20"));
assert!(tools[0].function.arguments.contains("add"));
}
#[tokio::test]
@@ -289,10 +289,10 @@ async fn test_json_parser_with_tokens() {
});
let input = r#"[TOOL_CALLS] [{"name": "search", "arguments": {"query": "rust programming"}}]"#;
let result = parser.parse_complete(input).await.unwrap();
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "search");
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "search");
}
#[tokio::test]
@@ -313,12 +313,12 @@ async fn test_multiline_json_with_tokens() {
}
}</tool>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "get_weather");
assert!(result[0].function.arguments.contains("San Francisco"));
assert!(result[0].function.arguments.contains("celsius"));
assert!(result[0].function.arguments.contains("true"));
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "get_weather");
assert!(tools[0].function.arguments.contains("San Francisco"));
assert!(tools[0].function.arguments.contains("celsius"));
assert!(tools[0].function.arguments.contains("true"));
}
#[tokio::test]
@@ -342,12 +342,12 @@ async fn test_multiline_json_array() {
}
]"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 2);
assert_eq!(result[0].function.name, "function1");
assert_eq!(result[1].function.name, "function2");
assert!(result[0].function.arguments.contains("value1"));
assert!(result[1].function.arguments.contains("[1,2,3]"));
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 2);
assert_eq!(tools[0].function.name, "function1");
assert_eq!(tools[1].function.name, "function2");
assert!(tools[0].function.arguments.contains("value1"));
assert!(tools[1].function.arguments.contains("[1,2,3]"));
}
#[test]
@@ -397,9 +397,9 @@ async fn test_registry_with_json_parser() {
let parser = registry.get_parser("gpt-4-turbo").unwrap();
let input = r#"{"name": "test", "arguments": {"x": 1}}"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "test");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "test");
}
#[tokio::test]
@@ -407,9 +407,9 @@ async fn test_json_parser_invalid_input() {
let parser = JsonParser::new();
// Invalid JSON should return empty results
assert_eq!(parser.parse_complete("not json").await.unwrap().len(), 0);
assert_eq!(parser.parse_complete("{invalid}").await.unwrap().len(), 0);
assert_eq!(parser.parse_complete("").await.unwrap().len(), 0);
assert_eq!(parser.parse_complete("not json").await.unwrap().1.len(), 0);
assert_eq!(parser.parse_complete("{invalid}").await.unwrap().1.len(), 0);
assert_eq!(parser.parse_complete("").await.unwrap().1.len(), 0);
}
#[tokio::test]
@@ -418,11 +418,11 @@ async fn test_json_parser_empty_arguments() {
// Tool call with no arguments
let input = r#"{"name": "get_time"}"#;
let result = parser.parse_complete(input).await.unwrap();
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "get_time");
assert_eq!(result[0].function.arguments, "{}");
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "get_time");
assert_eq!(tools[0].function.arguments, "{}");
}
#[cfg(test)]
@@ -435,14 +435,14 @@ mod failure_cases {
// Missing name field
let input = r#"{"arguments": {"x": 1}}"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 0, "Should return empty for tool without name");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 0, "Should return empty for tool without name");
// Empty name
let input = r#"{"name": "", "arguments": {"x": 1}}"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1, "Should accept empty name string");
assert_eq!(result[0].function.name, "");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1, "Should accept empty name string");
assert_eq!(tools[0].function.name, "");
}
#[tokio::test]
@@ -451,22 +451,22 @@ mod failure_cases {
// Arguments is a string instead of object
let input = r#"{"name": "test", "arguments": "not an object"}"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
// Should serialize the string as JSON
assert!(result[0].function.arguments.contains("not an object"));
assert!(tools[0].function.arguments.contains("not an object"));
// Arguments is a number
let input = r#"{"name": "test", "arguments": 42}"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.arguments, "42");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.arguments, "42");
// Arguments is null
let input = r#"{"name": "test", "arguments": null}"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.arguments, "null");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.arguments, "null");
}
#[tokio::test]
@@ -479,26 +479,26 @@ mod failure_cases {
// Missing end token
let input = r#"<tool>{"name": "test", "arguments": {}}"#;
let result = parser.parse_complete(input).await.unwrap();
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(
result.len(),
tools.len(),
0,
"Should fail to parse without complete wrapper"
);
// Missing start token - parser looks for complete wrapper, so this won't parse
let input = r#"{"name": "test", "arguments": {}}</tool>"#;
let result = parser.parse_complete(input).await.unwrap();
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(
result.len(),
tools.len(),
0,
"Should not parse JSON with incomplete wrapper"
);
// Mismatched tokens
let input = r#"<tool>{"name": "test", "arguments": {}}</wrong>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 0, "Should fail with mismatched tokens");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 0, "Should fail with mismatched tokens");
}
#[tokio::test]
@@ -507,18 +507,18 @@ mod failure_cases {
// Trailing comma
let input = r#"{"name": "test", "arguments": {"x": 1,}}"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 0, "Should reject JSON with trailing comma");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 0, "Should reject JSON with trailing comma");
// Missing quotes on keys
let input = r#"{name: "test", arguments: {}}"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 0, "Should reject invalid JSON syntax");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 0, "Should reject invalid JSON syntax");
// Unclosed object
let input = r#"{"name": "test", "arguments": {"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 0, "Should reject incomplete JSON");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 0, "Should reject incomplete JSON");
}
}
@@ -532,17 +532,17 @@ mod edge_cases {
// Unicode in function name
let input = r#"{"name": "获取天气", "arguments": {"location": "北京"}}"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "获取天气");
assert!(result[0].function.arguments.contains("北京"));
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "获取天气");
assert!(tools[0].function.arguments.contains("北京"));
// Emoji in arguments
let input = r#"{"name": "send_message", "arguments": {"text": "Hello 👋 World 🌍"}}"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert!(result[0].function.arguments.contains("👋"));
assert!(result[0].function.arguments.contains("🌍"));
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert!(tools[0].function.arguments.contains("👋"));
assert!(tools[0].function.arguments.contains("🌍"));
}
#[tokio::test]
@@ -551,22 +551,22 @@ mod edge_cases {
// Escaped quotes in arguments
let input = r#"{"name": "echo", "arguments": {"text": "He said \"hello\""}}"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert!(result[0].function.arguments.contains(r#"\"hello\""#));
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert!(tools[0].function.arguments.contains(r#"\"hello\""#));
// Escaped backslashes
let input = r#"{"name": "path", "arguments": {"dir": "C:\\Users\\test"}}"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert!(result[0].function.arguments.contains("\\\\"));
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert!(tools[0].function.arguments.contains("\\\\"));
// Newlines and tabs
let input = r#"{"name": "format", "arguments": {"text": "line1\nline2\ttabbed"}}"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert!(result[0].function.arguments.contains("\\n"));
assert!(result[0].function.arguments.contains("\\t"));
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert!(tools[0].function.arguments.contains("\\n"));
assert!(tools[0].function.arguments.contains("\\t"));
}
#[tokio::test]
@@ -580,10 +580,10 @@ mod edge_cases {
}
large_args.push_str(r#""final": "value"}}"#);
let result = parser.parse_complete(&large_args).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "process");
assert!(result[0].function.arguments.contains("field_999"));
let (_normal_text, tools) = parser.parse_complete(&large_args).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "process");
assert!(tools[0].function.arguments.contains("field_999"));
// Large array of tool calls
let mut large_array = "[".to_string();
@@ -595,9 +595,9 @@ mod edge_cases {
}
large_array.push(']');
let result = parser.parse_complete(&large_array).await.unwrap();
assert_eq!(result.len(), 100);
assert_eq!(result[99].function.name, "func_99");
let (_normal_text, tools) = parser.parse_complete(&large_array).await.unwrap();
assert_eq!(tools.len(), 100);
assert_eq!(tools[99].function.name, "func_99");
}
#[tokio::test]
@@ -612,10 +612,10 @@ mod edge_cases {
{"key": "value", "another": "field"}
]"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 2, "Should only parse valid tool calls");
assert_eq!(result[0].function.name, "tool1");
assert_eq!(result[1].function.name, "tool2");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 2, "Should only parse valid tool calls");
assert_eq!(tools[0].function.name, "tool1");
assert_eq!(tools[1].function.name, "tool2");
}
#[tokio::test]
@@ -624,14 +624,14 @@ mod edge_cases {
// JSON with duplicate keys (last one wins in most parsers)
let input = r#"{"name": "first", "name": "second", "arguments": {"x": 1, "x": 2}}"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(
result[0].function.name, "second",
tools[0].function.name, "second",
"Last duplicate key should win"
);
assert!(
result[0].function.arguments.contains("2"),
tools[0].function.arguments.contains("2"),
"Last duplicate value should win"
);
}
@@ -642,15 +642,15 @@ mod edge_cases {
// Null values in arguments
let input = r#"{"name": "test", "arguments": {"required": "value", "optional": null}}"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert!(result[0].function.arguments.contains("null"));
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert!(tools[0].function.arguments.contains("null"));
// Array with null
let input = r#"{"name": "test", "arguments": {"items": [1, null, "three"]}}"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert!(result[0].function.arguments.contains("null"));
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert!(tools[0].function.arguments.contains("null"));
}
#[tokio::test]
@@ -663,22 +663,22 @@ mod edge_cases {
// First pattern
let input = r#"<<{"name": "test1", "arguments": {}}>>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "test1");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "test1");
// Second pattern
let input = r#"<tool>{"name": "test2", "arguments": {}}</tool>"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "test2");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "test2");
// Nested patterns (should use first match)
let input = r#"<<tool>{"name": "test3", "arguments": {}}</tool>>"#;
let result = parser.parse_complete(input).await.unwrap();
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
// This is tricky - depends on regex behavior
// The parser should handle this gracefully
assert!(result.len() <= 1, "Should not parse multiple times");
assert!(tools.len() <= 1, "Should not parse multiple times");
}
#[tokio::test]
@@ -743,25 +743,25 @@ mod edge_cases {
// Boolean values
let input = r#"{"name": "toggle", "arguments": {"enabled": true, "disabled": false}}"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert!(result[0].function.arguments.contains("true"));
assert!(result[0].function.arguments.contains("false"));
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert!(tools[0].function.arguments.contains("true"));
assert!(tools[0].function.arguments.contains("false"));
// Numbers (including float and negative)
let input = r#"{"name": "calc", "arguments": {"int": 42, "float": 3.14, "negative": -17}}"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert!(result[0].function.arguments.contains("42"));
assert!(result[0].function.arguments.contains("3.14"));
assert!(result[0].function.arguments.contains("-17"));
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert!(tools[0].function.arguments.contains("42"));
assert!(tools[0].function.arguments.contains("3.14"));
assert!(tools[0].function.arguments.contains("-17"));
// Empty arrays and objects
let input = r#"{"name": "test", "arguments": {"empty_arr": [], "empty_obj": {}}}"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert!(result[0].function.arguments.contains("[]"));
assert!(result[0].function.arguments.contains("{}"));
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert!(tools[0].function.arguments.contains("[]"));
assert!(tools[0].function.arguments.contains("{}"));
}
#[tokio::test]
@@ -770,15 +770,15 @@ mod edge_cases {
// Using "function" instead of "name"
let input = r#"{"function": "test_func", "arguments": {"x": 1}}"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "test_func");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "test_func");
// Both "name" and "function" present (name should take precedence)
let input = r#"{"name": "primary", "function": "secondary", "arguments": {}}"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "primary");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "primary");
}
#[tokio::test]
@@ -792,15 +792,15 @@ mod edge_cases {
"key" : "value"
}
} "#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "test");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "test");
// Minified JSON (no whitespace)
let input = r#"{"name":"compact","arguments":{"a":1,"b":2}}"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, "compact");
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, "compact");
}
}
@@ -830,9 +830,9 @@ mod stress_tests {
}
}"#;
let result = parser.parse_complete(input).await.unwrap();
assert_eq!(result.len(), 1);
assert!(result[0].function.arguments.contains("deep"));
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
assert_eq!(tools.len(), 1);
assert!(tools[0].function.arguments.contains("deep"));
}
#[tokio::test]
@@ -845,9 +845,9 @@ mod stress_tests {
let parser_clone = parser.clone();
let handle = tokio::spawn(async move {
let input = format!(r#"{{"name": "func_{}", "arguments": {{}}}}"#, i);
let result = parser_clone.parse_complete(&input).await.unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].function.name, format!("func_{}", i));
let (_normal_text, tools) = parser_clone.parse_complete(&input).await.unwrap();
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].function.name, format!("func_{}", i));
});
handles.push(handle);
}