[router] add json tool parser (#9516)
This commit is contained in:
390
sgl-router/src/tool_parser/json_parser.rs
Normal file
390
sgl-router/src/tool_parser/json_parser.rs
Normal file
@@ -0,0 +1,390 @@
|
||||
use async_trait::async_trait;
|
||||
use regex::Regex;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::tool_parser::{
|
||||
errors::{ToolParserError, ToolParserResult},
|
||||
partial_json::PartialJson,
|
||||
state::ParseState,
|
||||
traits::ToolParser,
|
||||
types::{FunctionCall, StreamResult, ToolCall},
|
||||
};
|
||||
|
||||
/// JSON format parser for tool calls
|
||||
///
|
||||
/// Handles various JSON formats for function calling:
|
||||
/// - Single tool call: {"name": "fn", "arguments": {...}}
|
||||
/// - Multiple tool calls: [{"name": "fn1", "arguments": {...}}, ...]
|
||||
/// - With parameters instead of arguments: {"name": "fn", "parameters": {...}}
|
||||
///
|
||||
/// Supports configurable token markers for different models
|
||||
pub struct JsonParser {
|
||||
/// Token(s) that mark the start of tool calls
|
||||
start_tokens: Vec<String>,
|
||||
/// Token(s) that mark the end of tool calls
|
||||
end_tokens: Vec<String>,
|
||||
/// Separator between multiple tool calls (reserved for future use)
|
||||
_separator: String,
|
||||
/// Parser for handling incomplete JSON during streaming
|
||||
partial_json: PartialJson,
|
||||
/// Regex patterns for extracting content between tokens
|
||||
extractors: Vec<Regex>,
|
||||
}
|
||||
|
||||
impl JsonParser {
|
||||
/// Create a new JSON parser with default configuration
|
||||
pub fn new() -> Self {
|
||||
Self::with_config(
|
||||
vec![], // No wrapper tokens by default
|
||||
vec![],
|
||||
", ".to_string(),
|
||||
)
|
||||
}
|
||||
|
||||
/// Create a parser with custom token configuration
|
||||
pub fn with_config(
|
||||
start_tokens: Vec<String>,
|
||||
end_tokens: Vec<String>,
|
||||
separator: String,
|
||||
) -> Self {
|
||||
// Build extraction patterns for each token pair
|
||||
let extractors = start_tokens
|
||||
.iter()
|
||||
.zip(end_tokens.iter())
|
||||
.filter_map(|(start, end)| {
|
||||
if !start.is_empty() && !end.is_empty() {
|
||||
// Use (?s) flag to enable DOTALL mode so . matches newlines
|
||||
let pattern =
|
||||
format!(r"(?s){}(.*?){}", regex::escape(start), regex::escape(end));
|
||||
Regex::new(&pattern).ok()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
Self {
|
||||
start_tokens,
|
||||
end_tokens,
|
||||
_separator: separator,
|
||||
partial_json: PartialJson::default(),
|
||||
extractors,
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract JSON content from text, handling wrapper tokens if configured
|
||||
fn extract_json_content<'a>(&self, text: &'a str) -> &'a str {
|
||||
let mut content = text.trim();
|
||||
|
||||
// Try each extractor pattern
|
||||
for extractor in &self.extractors {
|
||||
if let Some(captures) = extractor.captures(content) {
|
||||
if let Some(matched) = captures.get(1) {
|
||||
content = matched.as_str().trim();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle special case where there's a start token but no end token
|
||||
for (start, end) in self.start_tokens.iter().zip(self.end_tokens.iter()) {
|
||||
if !start.is_empty() && end.is_empty() {
|
||||
content = content.strip_prefix(start).unwrap_or(content);
|
||||
}
|
||||
}
|
||||
|
||||
content
|
||||
}
|
||||
|
||||
/// Parse a single JSON object into a ToolCall
|
||||
fn parse_single_object(&self, obj: &Value) -> ToolParserResult<Option<ToolCall>> {
|
||||
// Check if this looks like a tool call
|
||||
let name = obj
|
||||
.get("name")
|
||||
.or_else(|| obj.get("function"))
|
||||
.and_then(|v| v.as_str());
|
||||
|
||||
if let Some(name) = name {
|
||||
// Get arguments - support both "arguments" and "parameters" keys
|
||||
let empty_obj = Value::Object(serde_json::Map::new());
|
||||
let args = obj
|
||||
.get("arguments")
|
||||
.or_else(|| obj.get("parameters"))
|
||||
.unwrap_or(&empty_obj);
|
||||
|
||||
// Convert arguments to JSON string
|
||||
let arguments = serde_json::to_string(args)
|
||||
.map_err(|e| ToolParserError::ParsingFailed(e.to_string()))?;
|
||||
|
||||
// Generate a unique ID if not provided
|
||||
let id = obj
|
||||
.get("id")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(String::from)
|
||||
.unwrap_or_else(|| format!("call_{}", uuid::Uuid::new_v4()));
|
||||
|
||||
Ok(Some(ToolCall {
|
||||
id,
|
||||
r#type: "function".to_string(),
|
||||
function: FunctionCall {
|
||||
name: name.to_string(),
|
||||
arguments,
|
||||
},
|
||||
}))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse JSON value(s) into tool calls
|
||||
fn parse_json_value(&self, value: &Value) -> ToolParserResult<Vec<ToolCall>> {
|
||||
let mut tools = Vec::new();
|
||||
|
||||
match value {
|
||||
Value::Array(arr) => {
|
||||
// Parse each element in the array
|
||||
for item in arr {
|
||||
if let Some(tool) = self.parse_single_object(item)? {
|
||||
tools.push(tool);
|
||||
}
|
||||
}
|
||||
}
|
||||
Value::Object(_) => {
|
||||
// Single tool call
|
||||
if let Some(tool) = self.parse_single_object(value)? {
|
||||
tools.push(tool);
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
// Not a valid tool call format
|
||||
return Ok(vec![]);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(tools)
|
||||
}
|
||||
|
||||
/// Check if text contains potential tool call markers
|
||||
fn has_tool_markers(&self, text: &str) -> bool {
|
||||
// If no start tokens configured, check for JSON structure
|
||||
if self.start_tokens.is_empty() {
|
||||
// For JSON, we just need to see the start of an object or array
|
||||
return text.contains('{') || text.contains('[');
|
||||
}
|
||||
|
||||
// Check for any start token
|
||||
self.start_tokens.iter().any(|token| text.contains(token))
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for JsonParser {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ToolParser for JsonParser {
|
||||
async fn parse_complete(&self, text: &str) -> ToolParserResult<Vec<ToolCall>> {
|
||||
// Extract JSON content from wrapper tokens if present
|
||||
let json_content = self.extract_json_content(text);
|
||||
|
||||
// Try to parse as JSON
|
||||
match serde_json::from_str::<Value>(json_content) {
|
||||
Ok(value) => self.parse_json_value(&value),
|
||||
Err(_) => {
|
||||
// Not valid JSON, return empty
|
||||
Ok(vec![])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn parse_incremental(
|
||||
&self,
|
||||
chunk: &str,
|
||||
state: &mut ParseState,
|
||||
) -> ToolParserResult<StreamResult> {
|
||||
state.buffer.push_str(chunk);
|
||||
|
||||
// Check if we have potential tool calls
|
||||
if !self.has_tool_markers(&state.buffer) {
|
||||
// No tool markers, return as incomplete
|
||||
return Ok(StreamResult::Incomplete);
|
||||
}
|
||||
|
||||
// Extract JSON content
|
||||
let json_content = self.extract_json_content(&state.buffer);
|
||||
|
||||
// Try to parse with partial JSON parser
|
||||
match self.partial_json.parse_value(json_content) {
|
||||
Ok((value, consumed)) => {
|
||||
// Check if we have a complete JSON structure
|
||||
if consumed == json_content.len() {
|
||||
// Complete JSON, parse tool calls
|
||||
let tools = self.parse_json_value(&value)?;
|
||||
if !tools.is_empty() {
|
||||
// Clear buffer since we consumed everything
|
||||
state.buffer.clear();
|
||||
|
||||
// Return the first tool as complete (simplified for Phase 2)
|
||||
if let Some(tool) = tools.into_iter().next() {
|
||||
return Ok(StreamResult::ToolComplete(tool));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Partial JSON, try to extract tool name
|
||||
if let Some(name) = value.get("name").and_then(|v| v.as_str()) {
|
||||
// Simple implementation for Phase 2
|
||||
// Just return the tool name once we see it
|
||||
if !state.in_string {
|
||||
state.in_string = true; // Use as a flag for "name sent"
|
||||
return Ok(StreamResult::ToolName {
|
||||
index: 0,
|
||||
name: name.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Check for complete arguments
|
||||
if let Some(args) =
|
||||
value.get("arguments").or_else(|| value.get("parameters"))
|
||||
{
|
||||
if let Ok(args_str) = serde_json::to_string(args) {
|
||||
// Return arguments as a single update
|
||||
return Ok(StreamResult::ToolArguments {
|
||||
index: 0,
|
||||
arguments: args_str,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
// Failed to parse even as partial JSON
|
||||
// Keep buffering
|
||||
}
|
||||
}
|
||||
|
||||
Ok(StreamResult::Incomplete)
|
||||
}
|
||||
|
||||
fn detect_format(&self, text: &str) -> bool {
|
||||
// Check if text contains JSON-like structure
|
||||
if self.has_tool_markers(text) {
|
||||
// Try to extract and parse
|
||||
let json_content = self.extract_json_content(text);
|
||||
|
||||
// Check if it looks like valid JSON for tool calls
|
||||
if let Ok(value) = serde_json::from_str::<Value>(json_content) {
|
||||
match value {
|
||||
Value::Object(ref obj) => {
|
||||
// Check for tool call structure
|
||||
obj.contains_key("name") || obj.contains_key("function")
|
||||
}
|
||||
Value::Array(ref arr) => {
|
||||
// Check if array contains tool-like objects
|
||||
arr.iter().any(|v| {
|
||||
v.as_object().is_some_and(|o| {
|
||||
o.contains_key("name") || o.contains_key("function")
|
||||
})
|
||||
})
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_parse_single_tool_call() {
|
||||
let parser = JsonParser::new();
|
||||
let input = r#"{"name": "get_weather", "arguments": {"location": "San Francisco"}}"#;
|
||||
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 1);
|
||||
assert_eq!(result[0].function.name, "get_weather");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_parse_multiple_tool_calls() {
|
||||
let parser = JsonParser::new();
|
||||
let input = r#"[
|
||||
{"name": "get_weather", "arguments": {"location": "SF"}},
|
||||
{"name": "search", "arguments": {"query": "news"}}
|
||||
]"#;
|
||||
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 2);
|
||||
assert_eq!(result[0].function.name, "get_weather");
|
||||
assert_eq!(result[1].function.name, "search");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_parse_with_parameters_key() {
|
||||
let parser = JsonParser::new();
|
||||
let input = r#"{"name": "calculate", "parameters": {"x": 10, "y": 20}}"#;
|
||||
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 1);
|
||||
assert_eq!(result[0].function.name, "calculate");
|
||||
assert!(result[0].function.arguments.contains("10"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_parse_with_wrapper_tokens() {
|
||||
let parser = JsonParser::with_config(
|
||||
vec!["<tool>".to_string()],
|
||||
vec!["</tool>".to_string()],
|
||||
", ".to_string(),
|
||||
);
|
||||
|
||||
let input = r#"<tool>{"name": "test", "arguments": {}}</tool>"#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 1);
|
||||
assert_eq!(result[0].function.name, "test");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_format() {
|
||||
let parser = JsonParser::new();
|
||||
|
||||
assert!(parser.detect_format(r#"{"name": "test", "arguments": {}}"#));
|
||||
assert!(parser.detect_format(r#"[{"name": "test"}]"#));
|
||||
assert!(!parser.detect_format("plain text"));
|
||||
assert!(!parser.detect_format(r#"{"key": "value"}"#));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_streaming_parse() {
|
||||
// Phase 2 simplified streaming test
|
||||
// Just verify that streaming eventually produces a complete tool call
|
||||
let parser = JsonParser::new();
|
||||
let mut state = ParseState::new();
|
||||
|
||||
// Send complete JSON in one go (simplified for Phase 2)
|
||||
let full_json = r#"{"name": "get_weather", "arguments": {"location": "SF"}}"#;
|
||||
|
||||
let result = parser
|
||||
.parse_incremental(full_json, &mut state)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
// Should get a complete tool immediately with complete JSON
|
||||
match result {
|
||||
StreamResult::ToolComplete(tool) => {
|
||||
assert_eq!(tool.function.name, "get_weather");
|
||||
assert!(tool.function.arguments.contains("SF"));
|
||||
}
|
||||
_ => panic!("Expected ToolComplete for complete JSON input"),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,8 +1,8 @@
|
||||
/// Tool parser module for handling function/tool calls in model outputs
|
||||
///
|
||||
/// This module provides infrastructure for parsing tool calls from various model formats.
|
||||
/// Phase 1 focuses on core infrastructure: types, traits, registry, and partial JSON parsing.
|
||||
pub mod errors;
|
||||
pub mod json_parser;
|
||||
pub mod partial_json;
|
||||
pub mod registry;
|
||||
pub mod state;
|
||||
@@ -14,6 +14,7 @@ mod tests;
|
||||
|
||||
// Re-export commonly used types
|
||||
pub use errors::{ToolParserError, ToolParserResult};
|
||||
pub use json_parser::JsonParser;
|
||||
pub use registry::ParserRegistry;
|
||||
pub use state::{ParsePhase, ParseState};
|
||||
pub use traits::{PartialJsonParser, ToolParser};
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use crate::tool_parser::json_parser::JsonParser;
|
||||
use crate::tool_parser::traits::ToolParser;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
@@ -21,6 +22,9 @@ impl ParserRegistry {
|
||||
default_parser: "json".to_string(),
|
||||
};
|
||||
|
||||
// Register default parsers
|
||||
registry.register_default_parsers();
|
||||
|
||||
// Register default model mappings
|
||||
registry.register_default_mappings();
|
||||
|
||||
@@ -75,6 +79,14 @@ impl ParserRegistry {
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Register default parsers
|
||||
fn register_default_parsers(&mut self) {
|
||||
// JSON parser - most common format
|
||||
self.register_parser("json", Arc::new(JsonParser::new()));
|
||||
|
||||
// Note: Additional parsers (mistral, qwen, llama) will be added in later phases
|
||||
}
|
||||
|
||||
/// Register default model mappings
|
||||
fn register_default_mappings(&mut self) {
|
||||
// OpenAI models
|
||||
@@ -85,16 +97,16 @@ impl ParserRegistry {
|
||||
// Anthropic models
|
||||
self.map_model("claude-*", "json");
|
||||
|
||||
// Mistral models
|
||||
self.map_model("mistral-*", "mistral");
|
||||
self.map_model("mixtral-*", "mistral");
|
||||
// Mistral models (will use json until mistral parser is implemented)
|
||||
self.map_model("mistral-*", "json");
|
||||
self.map_model("mixtral-*", "json");
|
||||
|
||||
// Qwen models
|
||||
self.map_model("qwen*", "qwen");
|
||||
// Qwen models (will use json until qwen parser is implemented)
|
||||
self.map_model("qwen*", "json");
|
||||
|
||||
// Llama models
|
||||
self.map_model("llama-*", "llama");
|
||||
self.map_model("meta-llama-*", "llama");
|
||||
// Llama models (will use json until llama parser is implemented)
|
||||
self.map_model("llama-*", "json");
|
||||
self.map_model("meta-llama-*", "json");
|
||||
|
||||
// Other models default to JSON
|
||||
self.map_model("gemini-*", "json");
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
use super::*;
|
||||
use crate::tool_parser::json_parser::JsonParser;
|
||||
use crate::tool_parser::partial_json::{
|
||||
compute_diff, find_common_prefix, is_complete_json, PartialJson,
|
||||
};
|
||||
use crate::tool_parser::traits::ToolParser;
|
||||
|
||||
#[test]
|
||||
fn test_parse_state_new() {
|
||||
@@ -247,3 +249,638 @@ fn test_partial_tool_call() {
|
||||
assert!(partial.name_sent);
|
||||
assert_eq!(partial.streamed_args, r#"{"key": "#);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_json_parser_complete_single() {
|
||||
let parser = JsonParser::new();
|
||||
|
||||
// Test single tool call with arguments
|
||||
let input = r#"{"name": "get_weather", "arguments": {"location": "San Francisco", "units": "celsius"}}"#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
|
||||
assert_eq!(result.len(), 1);
|
||||
assert_eq!(result[0].function.name, "get_weather");
|
||||
assert!(result[0].function.arguments.contains("San Francisco"));
|
||||
assert!(result[0].function.arguments.contains("celsius"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_json_parser_complete_array() {
|
||||
let parser = JsonParser::new();
|
||||
|
||||
// Test array of tool calls
|
||||
let input = r#"[
|
||||
{"name": "get_weather", "arguments": {"location": "SF"}},
|
||||
{"name": "get_news", "arguments": {"query": "technology"}}
|
||||
]"#;
|
||||
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
|
||||
assert_eq!(result.len(), 2);
|
||||
assert_eq!(result[0].function.name, "get_weather");
|
||||
assert_eq!(result[1].function.name, "get_news");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_json_parser_with_parameters() {
|
||||
let parser = JsonParser::new();
|
||||
|
||||
// Test with "parameters" instead of "arguments"
|
||||
let input = r#"{"name": "calculate", "parameters": {"x": 10, "y": 20, "operation": "add"}}"#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
|
||||
assert_eq!(result.len(), 1);
|
||||
assert_eq!(result[0].function.name, "calculate");
|
||||
assert!(result[0].function.arguments.contains("10"));
|
||||
assert!(result[0].function.arguments.contains("20"));
|
||||
assert!(result[0].function.arguments.contains("add"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_json_parser_with_tokens() {
|
||||
// Test with custom wrapper tokens
|
||||
let parser = JsonParser::with_config(
|
||||
vec!["[TOOL_CALLS] [".to_string()],
|
||||
vec!["]".to_string()],
|
||||
", ".to_string(),
|
||||
);
|
||||
|
||||
let input = r#"[TOOL_CALLS] [{"name": "search", "arguments": {"query": "rust programming"}}]"#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
|
||||
assert_eq!(result.len(), 1);
|
||||
assert_eq!(result[0].function.name, "search");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_multiline_json_with_tokens() {
|
||||
// Test that regex with (?s) flag properly handles multi-line JSON
|
||||
let parser = JsonParser::with_config(
|
||||
vec!["<tool>".to_string()],
|
||||
vec!["</tool>".to_string()],
|
||||
", ".to_string(),
|
||||
);
|
||||
|
||||
// Pretty-printed multi-line JSON
|
||||
let input = r#"<tool>{
|
||||
"name": "get_weather",
|
||||
"arguments": {
|
||||
"location": "San Francisco",
|
||||
"units": "celsius",
|
||||
"include_forecast": true
|
||||
}
|
||||
}</tool>"#;
|
||||
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 1);
|
||||
assert_eq!(result[0].function.name, "get_weather");
|
||||
assert!(result[0].function.arguments.contains("San Francisco"));
|
||||
assert!(result[0].function.arguments.contains("celsius"));
|
||||
assert!(result[0].function.arguments.contains("true"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_multiline_json_array() {
|
||||
// Test multi-line JSON array without wrapper tokens
|
||||
let parser = JsonParser::new();
|
||||
|
||||
let input = r#"[
|
||||
{
|
||||
"name": "function1",
|
||||
"arguments": {
|
||||
"param1": "value1",
|
||||
"param2": 42
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "function2",
|
||||
"parameters": {
|
||||
"data": [1, 2, 3],
|
||||
"flag": false
|
||||
}
|
||||
}
|
||||
]"#;
|
||||
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 2);
|
||||
assert_eq!(result[0].function.name, "function1");
|
||||
assert_eq!(result[1].function.name, "function2");
|
||||
assert!(result[0].function.arguments.contains("value1"));
|
||||
assert!(result[1].function.arguments.contains("[1,2,3]"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_json_parser_format_detection() {
|
||||
let parser = JsonParser::new();
|
||||
|
||||
// Should detect valid tool call formats
|
||||
assert!(parser.detect_format(r#"{"name": "test", "arguments": {}}"#));
|
||||
assert!(parser.detect_format(r#"{"name": "test", "parameters": {"x": 1}}"#));
|
||||
assert!(parser.detect_format(r#"[{"name": "test"}]"#));
|
||||
|
||||
// Should not detect non-tool formats
|
||||
assert!(!parser.detect_format("plain text"));
|
||||
assert!(!parser.detect_format(r#"{"key": "value"}"#));
|
||||
assert!(!parser.detect_format(r#"{"data": {"nested": true}}"#));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_json_parser_streaming() {
|
||||
// Phase 2 simplified streaming test
|
||||
let parser = JsonParser::new();
|
||||
let mut state = ParseState::new();
|
||||
|
||||
// Test with complete JSON (simplified for Phase 2)
|
||||
let full_json = r#"{"name": "get_weather", "arguments": {"location": "San Francisco"}}"#;
|
||||
|
||||
let result = parser
|
||||
.parse_incremental(full_json, &mut state)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
match result {
|
||||
StreamResult::ToolComplete(tool) => {
|
||||
assert_eq!(tool.function.name, "get_weather");
|
||||
assert!(tool.function.arguments.contains("San Francisco"));
|
||||
}
|
||||
_ => panic!("Expected ToolComplete for complete JSON"),
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_registry_with_json_parser() {
|
||||
let registry = ParserRegistry::new();
|
||||
|
||||
// JSON parser should be registered by default
|
||||
assert!(registry.has_parser("json"));
|
||||
|
||||
// Should get JSON parser for OpenAI models
|
||||
let parser = registry.get_parser("gpt-4-turbo").unwrap();
|
||||
|
||||
// Test that the parser works
|
||||
let input = r#"{"name": "test", "arguments": {"x": 1}}"#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 1);
|
||||
assert_eq!(result[0].function.name, "test");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_json_parser_invalid_input() {
|
||||
let parser = JsonParser::new();
|
||||
|
||||
// Invalid JSON should return empty results
|
||||
assert_eq!(parser.parse_complete("not json").await.unwrap().len(), 0);
|
||||
assert_eq!(parser.parse_complete("{invalid}").await.unwrap().len(), 0);
|
||||
assert_eq!(parser.parse_complete("").await.unwrap().len(), 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_json_parser_empty_arguments() {
|
||||
let parser = JsonParser::new();
|
||||
|
||||
// Tool call with no arguments
|
||||
let input = r#"{"name": "get_time"}"#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
|
||||
assert_eq!(result.len(), 1);
|
||||
assert_eq!(result[0].function.name, "get_time");
|
||||
assert_eq!(result[0].function.arguments, "{}");
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod failure_cases {
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_malformed_tool_missing_name() {
|
||||
let parser = JsonParser::new();
|
||||
|
||||
// Missing name field
|
||||
let input = r#"{"arguments": {"x": 1}}"#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 0, "Should return empty for tool without name");
|
||||
|
||||
// Empty name
|
||||
let input = r#"{"name": "", "arguments": {"x": 1}}"#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 1, "Should accept empty name string");
|
||||
assert_eq!(result[0].function.name, "");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_invalid_arguments_json() {
|
||||
let parser = JsonParser::new();
|
||||
|
||||
// Arguments is a string instead of object
|
||||
let input = r#"{"name": "test", "arguments": "not an object"}"#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 1);
|
||||
// Should serialize the string as JSON
|
||||
assert!(result[0].function.arguments.contains("not an object"));
|
||||
|
||||
// Arguments is a number
|
||||
let input = r#"{"name": "test", "arguments": 42}"#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 1);
|
||||
assert_eq!(result[0].function.arguments, "42");
|
||||
|
||||
// Arguments is null
|
||||
let input = r#"{"name": "test", "arguments": null}"#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 1);
|
||||
assert_eq!(result[0].function.arguments, "null");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_broken_wrapper_tokens() {
|
||||
let parser = JsonParser::with_config(
|
||||
vec!["<tool>".to_string()],
|
||||
vec!["</tool>".to_string()],
|
||||
", ".to_string(),
|
||||
);
|
||||
|
||||
// Missing end token
|
||||
let input = r#"<tool>{"name": "test", "arguments": {}}"#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(
|
||||
result.len(),
|
||||
0,
|
||||
"Should fail to parse without complete wrapper"
|
||||
);
|
||||
|
||||
// Missing start token - parser looks for complete wrapper, so this won't parse
|
||||
let input = r#"{"name": "test", "arguments": {}}</tool>"#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(
|
||||
result.len(),
|
||||
0,
|
||||
"Should not parse JSON with incomplete wrapper"
|
||||
);
|
||||
|
||||
// Mismatched tokens
|
||||
let input = r#"<tool>{"name": "test", "arguments": {}}</wrong>"#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 0, "Should fail with mismatched tokens");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_invalid_json_structures() {
|
||||
let parser = JsonParser::new();
|
||||
|
||||
// Trailing comma
|
||||
let input = r#"{"name": "test", "arguments": {"x": 1,}}"#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 0, "Should reject JSON with trailing comma");
|
||||
|
||||
// Missing quotes on keys
|
||||
let input = r#"{name: "test", arguments: {}}"#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 0, "Should reject invalid JSON syntax");
|
||||
|
||||
// Unclosed object
|
||||
let input = r#"{"name": "test", "arguments": {"#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 0, "Should reject incomplete JSON");
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod edge_cases {
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_unicode_in_names_and_arguments() {
|
||||
let parser = JsonParser::new();
|
||||
|
||||
// Unicode in function name
|
||||
let input = r#"{"name": "获取天气", "arguments": {"location": "北京"}}"#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 1);
|
||||
assert_eq!(result[0].function.name, "获取天气");
|
||||
assert!(result[0].function.arguments.contains("北京"));
|
||||
|
||||
// Emoji in arguments
|
||||
let input = r#"{"name": "send_message", "arguments": {"text": "Hello 👋 World 🌍"}}"#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 1);
|
||||
assert!(result[0].function.arguments.contains("👋"));
|
||||
assert!(result[0].function.arguments.contains("🌍"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_escaped_characters() {
|
||||
let parser = JsonParser::new();
|
||||
|
||||
// Escaped quotes in arguments
|
||||
let input = r#"{"name": "echo", "arguments": {"text": "He said \"hello\""}}"#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 1);
|
||||
assert!(result[0].function.arguments.contains(r#"\"hello\""#));
|
||||
|
||||
// Escaped backslashes
|
||||
let input = r#"{"name": "path", "arguments": {"dir": "C:\\Users\\test"}}"#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 1);
|
||||
assert!(result[0].function.arguments.contains("\\\\"));
|
||||
|
||||
// Newlines and tabs
|
||||
let input = r#"{"name": "format", "arguments": {"text": "line1\nline2\ttabbed"}}"#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 1);
|
||||
assert!(result[0].function.arguments.contains("\\n"));
|
||||
assert!(result[0].function.arguments.contains("\\t"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_very_large_payloads() {
|
||||
let parser = JsonParser::new();
|
||||
|
||||
// Large arguments object
|
||||
let mut large_args = r#"{"name": "process", "arguments": {"#.to_string();
|
||||
for i in 0..1000 {
|
||||
large_args.push_str(&format!(r#""field_{}": "value_{}","#, i, i));
|
||||
}
|
||||
large_args.push_str(r#""final": "value"}}"#);
|
||||
|
||||
let result = parser.parse_complete(&large_args).await.unwrap();
|
||||
assert_eq!(result.len(), 1);
|
||||
assert_eq!(result[0].function.name, "process");
|
||||
assert!(result[0].function.arguments.contains("field_999"));
|
||||
|
||||
// Large array of tool calls
|
||||
let mut large_array = "[".to_string();
|
||||
for i in 0..100 {
|
||||
if i > 0 {
|
||||
large_array.push(',');
|
||||
}
|
||||
large_array.push_str(&format!(r#"{{"name": "func_{}", "arguments": {{}}}}"#, i));
|
||||
}
|
||||
large_array.push(']');
|
||||
|
||||
let result = parser.parse_complete(&large_array).await.unwrap();
|
||||
assert_eq!(result.len(), 100);
|
||||
assert_eq!(result[99].function.name, "func_99");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_mixed_array_tools_and_non_tools() {
|
||||
let parser = JsonParser::new();
|
||||
|
||||
// Array with both tool calls and non-tool objects
|
||||
let input = r#"[
|
||||
{"name": "tool1", "arguments": {}},
|
||||
{"not_a_tool": "just_data"},
|
||||
{"name": "tool2", "parameters": {"x": 1}},
|
||||
{"key": "value", "another": "field"}
|
||||
]"#;
|
||||
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 2, "Should only parse valid tool calls");
|
||||
assert_eq!(result[0].function.name, "tool1");
|
||||
assert_eq!(result[1].function.name, "tool2");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_duplicate_keys_in_json() {
|
||||
let parser = JsonParser::new();
|
||||
|
||||
// JSON with duplicate keys (last one wins in most parsers)
|
||||
let input = r#"{"name": "first", "name": "second", "arguments": {"x": 1, "x": 2}}"#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 1);
|
||||
assert_eq!(
|
||||
result[0].function.name, "second",
|
||||
"Last duplicate key should win"
|
||||
);
|
||||
assert!(
|
||||
result[0].function.arguments.contains("2"),
|
||||
"Last duplicate value should win"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_null_values_in_arguments() {
|
||||
let parser = JsonParser::new();
|
||||
|
||||
// Null values in arguments
|
||||
let input = r#"{"name": "test", "arguments": {"required": "value", "optional": null}}"#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 1);
|
||||
assert!(result[0].function.arguments.contains("null"));
|
||||
|
||||
// Array with null
|
||||
let input = r#"{"name": "test", "arguments": {"items": [1, null, "three"]}}"#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 1);
|
||||
assert!(result[0].function.arguments.contains("null"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_multiple_token_pairs_with_conflicts() {
|
||||
// Test with overlapping token patterns
|
||||
let parser = JsonParser::with_config(
|
||||
vec!["<<".to_string(), "<tool>".to_string()],
|
||||
vec![">>".to_string(), "</tool>".to_string()],
|
||||
", ".to_string(),
|
||||
);
|
||||
|
||||
// First pattern
|
||||
let input = r#"<<{"name": "test1", "arguments": {}}>>"#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 1);
|
||||
assert_eq!(result[0].function.name, "test1");
|
||||
|
||||
// Second pattern
|
||||
let input = r#"<tool>{"name": "test2", "arguments": {}}</tool>"#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 1);
|
||||
assert_eq!(result[0].function.name, "test2");
|
||||
|
||||
// Nested patterns (should use first match)
|
||||
let input = r#"<<tool>{"name": "test3", "arguments": {}}</tool>>"#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
// This is tricky - depends on regex behavior
|
||||
// The parser should handle this gracefully
|
||||
assert!(result.len() <= 1, "Should not parse multiple times");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_streaming_with_partial_chunks() {
|
||||
let parser = JsonParser::new();
|
||||
|
||||
// Test 1: Very incomplete JSON (just opening brace) should return Incomplete
|
||||
let mut state1 = ParseState::new();
|
||||
let partial = r#"{"#;
|
||||
let result = parser
|
||||
.parse_incremental(partial, &mut state1)
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(
|
||||
matches!(result, StreamResult::Incomplete),
|
||||
"Should return Incomplete for just opening brace"
|
||||
);
|
||||
|
||||
// Test 2: Complete JSON should return ToolComplete
|
||||
let mut state2 = ParseState::new();
|
||||
let complete = r#"{"name": "get_weather", "arguments": {"location": "SF"}}"#;
|
||||
let result = parser
|
||||
.parse_incremental(complete, &mut state2)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
match result {
|
||||
StreamResult::ToolComplete(tool) => {
|
||||
assert_eq!(tool.function.name, "get_weather");
|
||||
let args: serde_json::Value =
|
||||
serde_json::from_str(&tool.function.arguments).unwrap();
|
||||
assert_eq!(args["location"], "SF");
|
||||
}
|
||||
_ => panic!("Expected ToolComplete for complete JSON"),
|
||||
}
|
||||
|
||||
// Test 3: Partial JSON with name - Phase 2 behavior
|
||||
// The PartialJson parser can complete partial JSON by filling in missing values
|
||||
let mut state3 = ParseState::new();
|
||||
let partial_with_name = r#"{"name": "test", "argum"#;
|
||||
let result = parser
|
||||
.parse_incremental(partial_with_name, &mut state3)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
match result {
|
||||
StreamResult::ToolComplete(tool) => {
|
||||
assert_eq!(tool.function.name, "test");
|
||||
// Arguments will be empty object since "argum" is incomplete
|
||||
assert_eq!(tool.function.arguments, "{}");
|
||||
}
|
||||
StreamResult::ToolName { name, .. } => {
|
||||
assert_eq!(name, "test");
|
||||
}
|
||||
StreamResult::Incomplete => {
|
||||
// Also acceptable if parser decides to wait
|
||||
}
|
||||
_ => panic!("Unexpected result for partial JSON with name"),
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_special_json_values() {
|
||||
let parser = JsonParser::new();
|
||||
|
||||
// Boolean values
|
||||
let input = r#"{"name": "toggle", "arguments": {"enabled": true, "disabled": false}}"#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 1);
|
||||
assert!(result[0].function.arguments.contains("true"));
|
||||
assert!(result[0].function.arguments.contains("false"));
|
||||
|
||||
// Numbers (including float and negative)
|
||||
let input = r#"{"name": "calc", "arguments": {"int": 42, "float": 3.14, "negative": -17}}"#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 1);
|
||||
assert!(result[0].function.arguments.contains("42"));
|
||||
assert!(result[0].function.arguments.contains("3.14"));
|
||||
assert!(result[0].function.arguments.contains("-17"));
|
||||
|
||||
// Empty arrays and objects
|
||||
let input = r#"{"name": "test", "arguments": {"empty_arr": [], "empty_obj": {}}}"#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 1);
|
||||
assert!(result[0].function.arguments.contains("[]"));
|
||||
assert!(result[0].function.arguments.contains("{}"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_function_field_alternative() {
|
||||
let parser = JsonParser::new();
|
||||
|
||||
// Using "function" instead of "name"
|
||||
let input = r#"{"function": "test_func", "arguments": {"x": 1}}"#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 1);
|
||||
assert_eq!(result[0].function.name, "test_func");
|
||||
|
||||
// Both "name" and "function" present (name should take precedence)
|
||||
let input = r#"{"name": "primary", "function": "secondary", "arguments": {}}"#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 1);
|
||||
assert_eq!(result[0].function.name, "primary");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_whitespace_handling() {
|
||||
let parser = JsonParser::new();
|
||||
|
||||
// Extra whitespace everywhere
|
||||
let input = r#" {
|
||||
"name" : "test" ,
|
||||
"arguments" : {
|
||||
"key" : "value"
|
||||
}
|
||||
} "#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 1);
|
||||
assert_eq!(result[0].function.name, "test");
|
||||
|
||||
// Minified JSON (no whitespace)
|
||||
let input = r#"{"name":"compact","arguments":{"a":1,"b":2}}"#;
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 1);
|
||||
assert_eq!(result[0].function.name, "compact");
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod stress_tests {
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_deeply_nested_arguments() {
|
||||
let parser = JsonParser::new();
|
||||
|
||||
// Deeply nested structure
|
||||
let input = r#"{
|
||||
"name": "nested",
|
||||
"arguments": {
|
||||
"level1": {
|
||||
"level2": {
|
||||
"level3": {
|
||||
"level4": {
|
||||
"level5": {
|
||||
"value": "deep"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}"#;
|
||||
|
||||
let result = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(result.len(), 1);
|
||||
assert!(result[0].function.arguments.contains("deep"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_concurrent_parser_usage() {
|
||||
// Test that parser can be used concurrently
|
||||
let parser = std::sync::Arc::new(JsonParser::new());
|
||||
|
||||
let mut handles = vec![];
|
||||
|
||||
for i in 0..10 {
|
||||
let parser_clone = parser.clone();
|
||||
let handle = tokio::spawn(async move {
|
||||
let input = format!(r#"{{"name": "func_{}", "arguments": {{}}}}"#, i);
|
||||
let result = parser_clone.parse_complete(&input).await.unwrap();
|
||||
assert_eq!(result.len(), 1);
|
||||
assert_eq!(result[0].function.name, format!("func_{}", i));
|
||||
});
|
||||
handles.push(handle);
|
||||
}
|
||||
|
||||
for handle in handles {
|
||||
handle.await.unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user