[router][tool call] Separate JsonParser and LlamaParser (#11073)
This commit is contained in:
@@ -261,12 +261,12 @@ async fn test_almost_valid_tool_calls() {
|
||||
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
|
||||
// Some JSON parsers might accept trailing commas
|
||||
if tools.is_empty() {
|
||||
assert_eq!(normal_text, r#"{"name": "test", "arguments": ,}"#);
|
||||
assert_eq!(normal_text, r#"{"name": "test", "arguments": {},}"#);
|
||||
}
|
||||
|
||||
// Wrong quote types
|
||||
let input = r#"{'name': 'test', 'arguments': {}}"#;
|
||||
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(tools.len(), 0); // Standard JSON requires double quotes
|
||||
assert_eq!(normal_text, r#"{'name': 'test', 'arguments': }"#);
|
||||
assert_eq!(normal_text, r#"{'name': 'test', 'arguments': {}}"#);
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
//! Tests for the JSON parser which handles OpenAI, Claude, and generic JSON formats
|
||||
|
||||
use serde_json::json;
|
||||
use sglang_router_rs::tool_parser::{JsonParser, TokenConfig, ToolParser};
|
||||
use sglang_router_rs::tool_parser::{JsonParser, ToolParser};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_simple_json_tool_call() {
|
||||
@@ -158,34 +158,4 @@ async fn test_json_format_detection() {
|
||||
assert!(parser.detect_format(r#"{"name": "test", "arguments": {}}"#));
|
||||
assert!(parser.detect_format(r#"[{"name": "test"}]"#));
|
||||
assert!(!parser.detect_format("plain text"));
|
||||
assert!(!parser.detect_format(r#"{"key": "value"}"#)); // No name field
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_parse_with_wrapper_tokens() {
|
||||
let parser = JsonParser::with_config(TokenConfig {
|
||||
start_tokens: vec!["<tool>".to_string()],
|
||||
end_tokens: vec!["</tool>".to_string()],
|
||||
separator: ", ".to_string(),
|
||||
});
|
||||
|
||||
let input = r#"<tool>{"name": "test", "arguments": {}}</tool>"#;
|
||||
let (normal_text, tool_calls) = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(tool_calls.len(), 1);
|
||||
assert_eq!(tool_calls[0].function.name, "test");
|
||||
assert_eq!(normal_text, ""); // Wrapper tokens with no extra text
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_parse_with_start_token_invalid_json() {
|
||||
let parser = JsonParser::with_config(TokenConfig {
|
||||
start_tokens: vec!["<|python_tag|>".to_string()],
|
||||
end_tokens: vec!["".to_string()],
|
||||
separator: ";".to_string(),
|
||||
});
|
||||
|
||||
let input = r#"Hello world <|python_tag|>this is not valid json at all"#;
|
||||
let (normal_text, tool_calls) = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(tool_calls.len(), 0);
|
||||
assert_eq!(normal_text, input); // Should return entire original text when JSON parsing fails
|
||||
}
|
||||
|
||||
@@ -7,20 +7,44 @@ use sglang_router_rs::tool_parser::{LlamaParser, ToolParser};
|
||||
#[tokio::test]
|
||||
async fn test_llama_python_tag_format() {
|
||||
let parser = LlamaParser::new();
|
||||
let input = r#"<|python_tag|>{"name": "search", "arguments": {"query": "weather"}}"#;
|
||||
let input = r#"Here are some results: <|python_tag|>{"name": "search", "parameters": {"query": "weather"}}"#;
|
||||
|
||||
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
|
||||
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(tools.len(), 1);
|
||||
assert_eq!(tools[0].function.name, "search");
|
||||
assert_eq!(normal_text, "Here are some results: ");
|
||||
|
||||
let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
|
||||
assert_eq!(args["query"], "weather");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_llama_with_semicolon_separation() {
|
||||
let parser = LlamaParser::new();
|
||||
|
||||
let input = r#"<|python_tag|>{"name": "tool1", "parameters": {}};{"name": "tool2", "parameters": {"y": 2}}"#;
|
||||
|
||||
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(tools.len(), 2);
|
||||
assert_eq!(tools[0].function.name, "tool1");
|
||||
assert_eq!(tools[1].function.name, "tool2");
|
||||
assert_eq!(normal_text, "");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_llama_no_tool_calls() {
|
||||
let parser = LlamaParser::new();
|
||||
|
||||
let input = "This is just plain text with no tool calls";
|
||||
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(tools.len(), 0);
|
||||
assert_eq!(normal_text, input);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_llama_plain_json_fallback() {
|
||||
let parser = LlamaParser::new();
|
||||
let input = r#"{"name": "calculate", "arguments": {"x": 5, "y": 10}}"#;
|
||||
let input = r#"{"name": "calculate", "parameters": {"x": 5, "y": 10}}"#;
|
||||
|
||||
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(tools.len(), 1);
|
||||
@@ -34,7 +58,7 @@ async fn test_llama_plain_json_fallback() {
|
||||
#[tokio::test]
|
||||
async fn test_llama_with_text_before() {
|
||||
let parser = LlamaParser::new();
|
||||
let input = r#"Let me help you with that. <|python_tag|>{"name": "get_time", "arguments": {"timezone": "UTC"}}"#;
|
||||
let input = r#"Let me help you with that. <|python_tag|>{"name": "get_time", "parameters": {"timezone": "UTC"}}"#;
|
||||
|
||||
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(tools.len(), 1);
|
||||
@@ -50,7 +74,7 @@ async fn test_llama_with_nested_json() {
|
||||
let parser = LlamaParser::new();
|
||||
let input = r#"<|python_tag|>{
|
||||
"name": "update_settings",
|
||||
"arguments": {
|
||||
"parameters": {
|
||||
"preferences": {
|
||||
"theme": "dark",
|
||||
"language": "en"
|
||||
@@ -73,13 +97,13 @@ async fn test_llama_empty_arguments() {
|
||||
let parser = LlamaParser::new();
|
||||
|
||||
// With python_tag
|
||||
let input = r#"<|python_tag|>{"name": "ping", "arguments": {}}"#;
|
||||
let input = r#"<|python_tag|>{"name": "ping", "parameters": {}}"#;
|
||||
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(tools.len(), 1);
|
||||
assert_eq!(tools[0].function.name, "ping");
|
||||
|
||||
// Plain JSON
|
||||
let input = r#"{"name": "ping", "arguments": {}}"#;
|
||||
let input = r#"{"name": "ping", "parameters": {}}"#;
|
||||
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(tools.len(), 1);
|
||||
assert_eq!(tools[0].function.name, "ping");
|
||||
@@ -90,7 +114,7 @@ async fn test_llama_format_detection() {
|
||||
let parser = LlamaParser::new();
|
||||
|
||||
assert!(parser.detect_format(r#"<|python_tag|>{"name": "test"}"#));
|
||||
assert!(parser.detect_format(r#"{"name": "test", "arguments": {}}"#));
|
||||
assert!(parser.detect_format(r#"{"name": "test", "parameters": {}}"#));
|
||||
assert!(!parser.detect_format("plain text"));
|
||||
assert!(!parser.detect_format(r#"{"key": "value"}"#)); // No name field
|
||||
}
|
||||
@@ -112,7 +136,7 @@ async fn test_llama_real_world_output() {
|
||||
// Actual output from Llama 3.2 model - simplified for testing
|
||||
let input = r#"I'll search for that information for you.
|
||||
|
||||
<|python_tag|>{"name": "web_search", "arguments": {"query": "Llama 3.2 model capabilities", "num_results": 5, "search_type": "recent"}}"#;
|
||||
<|python_tag|>{"name": "web_search", "parameters": {"query": "Llama 3.2 model capabilities", "num_results": 5, "search_type": "recent"}}"#;
|
||||
|
||||
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(tools.len(), 1);
|
||||
@@ -120,7 +144,7 @@ async fn test_llama_real_world_output() {
|
||||
|
||||
let formatted_input = r#"<|python_tag|>{
|
||||
"name": "get_current_time",
|
||||
"arguments": {
|
||||
"parameters": {
|
||||
"timezone": "America/New_York",
|
||||
"format": "ISO8601"
|
||||
}
|
||||
@@ -131,22 +155,10 @@ async fn test_llama_real_world_output() {
|
||||
assert_eq!(tools2[0].function.name, "get_current_time");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_llama_json_array_format() {
|
||||
let parser = LlamaParser::new();
|
||||
|
||||
// Plain JSON array (should work as fallback)
|
||||
let input = r#"[{"name": "func1", "arguments": {}}, {"name": "func2", "arguments": {}}]"#;
|
||||
|
||||
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
|
||||
// Current implementation might handle this through JSON fallback
|
||||
assert!(!tools.is_empty());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_single_json() {
|
||||
let parser = LlamaParser::new();
|
||||
let text = r#"{"name": "get_weather", "arguments": {"city": "Paris"}}"#;
|
||||
let text = r#"{"name": "get_weather", "parameters": {"city": "Paris"}}"#;
|
||||
|
||||
let (_normal_text, tools) = parser.parse_complete(text).await.unwrap();
|
||||
assert_eq!(tools.len(), 1);
|
||||
@@ -159,7 +171,7 @@ async fn test_single_json() {
|
||||
#[tokio::test]
|
||||
async fn test_multiple_json_with_separator() {
|
||||
let parser = LlamaParser::new();
|
||||
let text = r#"<|python_tag|>{"name": "get_weather", "arguments": {"city": "Paris"}};{"name": "get_tourist_attractions", "arguments": {"city": "Paris"}}"#;
|
||||
let text = r#"<|python_tag|>{"name": "get_weather", "parameters": {"city": "Paris"}};{"name": "get_tourist_attractions", "parameters": {"city": "Paris"}}"#;
|
||||
|
||||
let (_normal_text, tools) = parser.parse_complete(text).await.unwrap();
|
||||
// Note: Current implementation may only parse the first one due to semicolon handling
|
||||
@@ -167,31 +179,24 @@ async fn test_multiple_json_with_separator() {
|
||||
assert_eq!(tools[0].function.name, "get_weather");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_multiple_json_with_separator_customized() {
|
||||
let parser = LlamaParser::new();
|
||||
let text = r#"<|python_tag|>{"name": "get_weather", "arguments": {}}<|python_tag|>{"name": "get_tourist_attractions", "arguments": {}}"#;
|
||||
|
||||
let (_normal_text, tools) = parser.parse_complete(text).await.unwrap();
|
||||
// Current implementation may handle this differently
|
||||
assert!(!tools.is_empty());
|
||||
assert_eq!(tools[0].function.name, "get_weather");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_json_with_trailing_text() {
|
||||
let parser = LlamaParser::new();
|
||||
let text = r#"{"name": "get_weather", "arguments": {}} Some follow-up text"#;
|
||||
// Valid JSON with trailing text - LlamaParser doesn't support this mixed format
|
||||
let text = r#"{"name": "get_weather", "parameters": {}} Some follow-up text"#;
|
||||
|
||||
let (_normal_text, tools) = parser.parse_complete(text).await.unwrap();
|
||||
assert_eq!(tools.len(), 1);
|
||||
assert_eq!(tools[0].function.name, "get_weather");
|
||||
let (normal_text, tools) = parser.parse_complete(text).await.unwrap();
|
||||
// LlamaParser expects pure JSON or <|python_tag|> format, not JSON with trailing text
|
||||
// So this returns as normal text
|
||||
assert_eq!(tools.len(), 0);
|
||||
assert_eq!(normal_text, text);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_invalid_then_valid_json() {
|
||||
let parser = LlamaParser::new();
|
||||
let text = r#"{"name": "get_weather", "arguments": {{"name": "get_weather", "arguments": {}}"#;
|
||||
let text =
|
||||
r#"{"name": "get_weather", "parameters": {{"name": "get_weather", "parameters": {}}"#;
|
||||
|
||||
let (_normal_text, tools) = parser.parse_complete(text).await.unwrap();
|
||||
// Should parse at least one valid JSON
|
||||
@@ -212,7 +217,7 @@ async fn test_plain_text_only() {
|
||||
#[tokio::test]
|
||||
async fn test_with_python_tag_prefix() {
|
||||
let parser = LlamaParser::new();
|
||||
let text = r#"Some intro. <|python_tag|>{"name": "get_weather", "arguments": {}}"#;
|
||||
let text = r#"Some intro. <|python_tag|>{"name": "get_weather", "parameters": {}}"#;
|
||||
|
||||
let (_normal_text, tools) = parser.parse_complete(text).await.unwrap();
|
||||
assert_eq!(tools.len(), 1);
|
||||
@@ -227,7 +232,7 @@ async fn test_llama_streaming_simple() {
|
||||
let mut state = sglang_router_rs::tool_parser::ParseState::new();
|
||||
|
||||
// Send complete JSON at once
|
||||
let full_json = r#"<|python_tag|>{"name": "search", "arguments": {"query": "weather"}}"#;
|
||||
let full_json = r#"<|python_tag|>{"name": "search", "parameters": {"query": "weather"}}"#;
|
||||
|
||||
let result = parser
|
||||
.parse_incremental(full_json, &mut state)
|
||||
@@ -252,7 +257,7 @@ async fn test_llama_streaming_partial() {
|
||||
r#"<|python"#,
|
||||
r#"_tag|>{"name": "#,
|
||||
r#""calculate", "#,
|
||||
r#""arguments": {"x": 10}"#,
|
||||
r#""parameters": {"x": 10}"#,
|
||||
r#"}"#,
|
||||
];
|
||||
|
||||
@@ -278,7 +283,7 @@ async fn test_llama_streaming_plain_json() {
|
||||
let chunks = vec![
|
||||
r#"{"name": "#,
|
||||
r#""search", "#,
|
||||
r#""arguments": "#,
|
||||
r#""parameters": "#,
|
||||
r#"{"query": "#,
|
||||
r#""test"}}"#,
|
||||
];
|
||||
@@ -305,7 +310,7 @@ async fn test_llama_streaming_with_text_before() {
|
||||
r#"Let me help you. "#,
|
||||
r#"<|python_tag|>"#,
|
||||
r#"{"name": "get_time","#,
|
||||
r#" "arguments": {"#,
|
||||
r#" "parameters": {"#,
|
||||
r#""timezone": "UTC"}}"#,
|
||||
];
|
||||
|
||||
@@ -328,7 +333,7 @@ async fn test_llama_streaming_multiple_tools() {
|
||||
let mut state = sglang_router_rs::tool_parser::ParseState::new();
|
||||
|
||||
let text =
|
||||
r#"<|python_tag|>{"name": "func1", "arguments": {}};{"name": "func2", "arguments": {}}"#;
|
||||
r#"<|python_tag|>{"name": "func1", "parameters": {}};{"name": "func2", "parameters": {}}"#;
|
||||
|
||||
let result = parser.parse_incremental(text, &mut state).await.unwrap();
|
||||
|
||||
@@ -337,7 +342,7 @@ async fn test_llama_streaming_multiple_tools() {
|
||||
sglang_router_rs::tool_parser::StreamResult::ToolComplete(tool) => {
|
||||
assert_eq!(tool.function.name, "func1");
|
||||
}
|
||||
_ => panic!("Expected first tool to be complete"),
|
||||
_ => panic!("Expected first tool to be complete, got: {:?}", result),
|
||||
}
|
||||
|
||||
// Process remaining buffer to get second tool
|
||||
@@ -356,7 +361,7 @@ async fn test_llama_streaming_multiple_tools_chunked() {
|
||||
let mut state = sglang_router_rs::tool_parser::ParseState::new();
|
||||
|
||||
// First chunk - incomplete first JSON
|
||||
let chunk1 = r#"<|python_tag|>{"name": "get_weather", "arguments""#;
|
||||
let chunk1 = r#"<|python_tag|>{"name": "get_weather", "parameters""#;
|
||||
let result1 = parser.parse_incremental(chunk1, &mut state).await.unwrap();
|
||||
|
||||
// Should be incomplete or have tool name
|
||||
@@ -383,32 +388,15 @@ async fn test_llama_streaming_multiple_tools_chunked() {
|
||||
let args: serde_json::Value = serde_json::from_str(&tool.function.arguments).unwrap();
|
||||
assert_eq!(args["city"], "Paris");
|
||||
}
|
||||
_ => panic!("Expected first tool to be complete after separator"),
|
||||
_ => panic!("Expected first tool complete, got: {:?}", result2),
|
||||
}
|
||||
|
||||
// Third chunk - complete second JSON
|
||||
let chunk3 = r#""get_time", "arguments": {"timezone": "UTC"}}"#;
|
||||
let chunk3 = r#""get_time", "parameters": {"timezone": "UTC"}}"#;
|
||||
let result3 = parser.parse_incremental(chunk3, &mut state).await.unwrap();
|
||||
|
||||
// Should get second tool complete
|
||||
match result3 {
|
||||
sglang_router_rs::tool_parser::StreamResult::ToolComplete(tool) => {
|
||||
assert_eq!(tool.function.name, "get_time");
|
||||
let args: serde_json::Value = serde_json::from_str(&tool.function.arguments).unwrap();
|
||||
assert_eq!(args["timezone"], "UTC");
|
||||
}
|
||||
_ => {
|
||||
// If not complete yet, try one more empty chunk
|
||||
let result4 = parser.parse_incremental("", &mut state).await.unwrap();
|
||||
match result4 {
|
||||
sglang_router_rs::tool_parser::StreamResult::ToolComplete(tool) => {
|
||||
assert_eq!(tool.function.name, "get_time");
|
||||
let args: serde_json::Value =
|
||||
serde_json::from_str(&tool.function.arguments).unwrap();
|
||||
assert_eq!(args["timezone"], "UTC");
|
||||
}
|
||||
_ => panic!("Expected second tool to be complete"),
|
||||
}
|
||||
}
|
||||
_ => panic!("Expected tool to be complete, got: {:?}", result3),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -122,10 +122,9 @@ async fn test_pythonic_empty_arguments() {
|
||||
async fn test_pythonic_format_detection() {
|
||||
let parser = PythonicParser::new();
|
||||
|
||||
assert!(parser.detect_format("[function_name("));
|
||||
assert!(!parser.detect_format("[function_name(")); // Incomplete
|
||||
assert!(parser.detect_format("[get_weather(city=\"NYC\")]"));
|
||||
assert!(!parser.detect_format("Just plain text"));
|
||||
assert!(!parser.detect_format("[1, 2, 3]")); // Plain list
|
||||
assert!(!parser.detect_format("{\"name\": \"test\"}")); // JSON
|
||||
}
|
||||
|
||||
|
||||
@@ -1,247 +0,0 @@
|
||||
//! Wrapper Token Tests
|
||||
//!
|
||||
//! Tests for JSON parser with custom wrapper tokens
|
||||
|
||||
use sglang_router_rs::tool_parser::{JsonParser, TokenConfig, ToolParser};
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_json_with_xml_style_wrapper() {
|
||||
let parser = JsonParser::with_config(TokenConfig {
|
||||
start_tokens: vec!["<tool>".to_string()],
|
||||
end_tokens: vec!["</tool>".to_string()],
|
||||
separator: ", ".to_string(),
|
||||
});
|
||||
|
||||
let input =
|
||||
r#"Some text before <tool>{"name": "test", "arguments": {"x": 1}}</tool> and after"#;
|
||||
|
||||
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(tools.len(), 1);
|
||||
assert_eq!(tools[0].function.name, "test");
|
||||
|
||||
let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
|
||||
assert_eq!(args["x"], 1);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_json_with_multiple_wrapper_pairs() {
|
||||
let parser = JsonParser::with_config(TokenConfig {
|
||||
start_tokens: vec!["<tool>".to_string(), "<<TOOL>>".to_string()],
|
||||
end_tokens: vec!["</tool>".to_string(), "<</TOOL>>".to_string()],
|
||||
separator: ", ".to_string(),
|
||||
});
|
||||
|
||||
let input1 = r#"<tool>{"name": "tool1", "arguments": {}}</tool>"#;
|
||||
let (_normal_text, tools1) = parser.parse_complete(input1).await.unwrap();
|
||||
assert_eq!(tools1.len(), 1);
|
||||
assert_eq!(tools1[0].function.name, "tool1");
|
||||
|
||||
let input2 = r#"<<TOOL>>{"name": "tool2", "arguments": {}}<</TOOL>>"#;
|
||||
let (_normal_text, tools2) = parser.parse_complete(input2).await.unwrap();
|
||||
assert_eq!(tools2.len(), 1);
|
||||
assert_eq!(tools2[0].function.name, "tool2");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_json_with_only_start_token() {
|
||||
let parser = JsonParser::with_config(TokenConfig {
|
||||
start_tokens: vec![">>>FUNCTION:".to_string()],
|
||||
end_tokens: vec!["".to_string()], // Empty end token
|
||||
separator: ", ".to_string(),
|
||||
});
|
||||
|
||||
let input = r#"Some preamble >>>FUNCTION:{"name": "execute", "arguments": {"cmd": "ls"}}"#;
|
||||
|
||||
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(tools.len(), 1);
|
||||
assert_eq!(tools[0].function.name, "execute");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_json_with_custom_separator() {
|
||||
let parser = JsonParser::with_config(TokenConfig {
|
||||
start_tokens: vec!["[FUNC]".to_string()],
|
||||
end_tokens: vec!["[/FUNC]".to_string()],
|
||||
separator: " | ".to_string(), // Custom separator
|
||||
});
|
||||
|
||||
// Though we're not testing multiple tools here, the separator is configured
|
||||
let input = r#"[FUNC]{"name": "test", "arguments": {}}[/FUNC]"#;
|
||||
|
||||
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(tools.len(), 1);
|
||||
assert_eq!(tools[0].function.name, "test");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_json_with_nested_wrapper_tokens_in_content() {
|
||||
// Known limitation: When wrapper tokens appear inside JSON strings,
|
||||
// the simple regex-based extraction may fail. This would require
|
||||
// a more sophisticated parser that understands JSON string escaping.
|
||||
|
||||
let parser = JsonParser::with_config(TokenConfig {
|
||||
start_tokens: vec!["<call>".to_string()],
|
||||
end_tokens: vec!["</call>".to_string()],
|
||||
separator: ", ".to_string(),
|
||||
});
|
||||
|
||||
let input =
|
||||
r#"<call>{"name": "echo", "arguments": {"text": "Use <call> and </call> tags"}}</call>"#;
|
||||
|
||||
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
|
||||
|
||||
// This is a known limitation - the parser may fail when end tokens appear in content
|
||||
// For now, we accept this behavior
|
||||
if tools.is_empty() {
|
||||
// Parser failed due to nested tokens - this is expected
|
||||
assert_eq!(
|
||||
tools.len(),
|
||||
0,
|
||||
"Known limitation: nested wrapper tokens in content"
|
||||
);
|
||||
} else {
|
||||
// If it does parse, verify it's correct
|
||||
assert_eq!(tools[0].function.name, "echo");
|
||||
let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
|
||||
assert_eq!(args["text"], "Use <call> and </call> tags");
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_json_extraction_without_wrapper_tokens() {
|
||||
// Default parser without wrapper tokens should extract JSON from text
|
||||
let parser = JsonParser::new();
|
||||
|
||||
let input = r#"
|
||||
Here is some text before the JSON.
|
||||
{"name": "search", "arguments": {"query": "test"}}
|
||||
And here is some text after.
|
||||
"#;
|
||||
|
||||
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(tools.len(), 1);
|
||||
assert_eq!(
|
||||
normal_text,
|
||||
"\n Here is some text before the JSON.\n \n And here is some text after.\n "
|
||||
);
|
||||
assert_eq!(tools[0].function.name, "search");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_json_with_multiline_wrapper_content() {
|
||||
let parser = JsonParser::with_config(TokenConfig {
|
||||
start_tokens: vec!["```json\n".to_string()],
|
||||
end_tokens: vec!["\n```".to_string()],
|
||||
separator: ", ".to_string(),
|
||||
});
|
||||
|
||||
let input = r#"Here's the function call:
|
||||
```json
|
||||
{
|
||||
"name": "format_code",
|
||||
"arguments": {
|
||||
"language": "rust",
|
||||
"code": "fn main() {}"
|
||||
}
|
||||
}
|
||||
```
|
||||
Done!"#;
|
||||
|
||||
let (normal_text, tools) = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(tools.len(), 1);
|
||||
assert_eq!(normal_text, "");
|
||||
assert_eq!(tools[0].function.name, "format_code");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_json_with_special_chars_in_tokens() {
|
||||
let parser = JsonParser::with_config(TokenConfig {
|
||||
start_tokens: vec!["{{FUNC[[".to_string()],
|
||||
end_tokens: vec!["]]FUNC}}".to_string()],
|
||||
separator: ", ".to_string(),
|
||||
});
|
||||
|
||||
let input = r#"{{FUNC[[{"name": "test", "arguments": {"special": "[]{}"}}]]FUNC}}"#;
|
||||
|
||||
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(tools.len(), 1);
|
||||
assert_eq!(tools[0].function.name, "test");
|
||||
|
||||
let args: serde_json::Value = serde_json::from_str(&tools[0].function.arguments).unwrap();
|
||||
assert_eq!(args["special"], "[]{}");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_json_multiple_tools_with_wrapper() {
|
||||
let parser = JsonParser::with_config(TokenConfig {
|
||||
start_tokens: vec!["<fn>".to_string()],
|
||||
end_tokens: vec!["</fn>".to_string()],
|
||||
separator: ", ".to_string(),
|
||||
});
|
||||
|
||||
// Multiple wrapped JSON objects
|
||||
let input = r#"
|
||||
<fn>{"name": "tool1", "arguments": {}}</fn>
|
||||
Some text between.
|
||||
<fn>{"name": "tool2", "arguments": {"x": 1}}</fn>
|
||||
"#;
|
||||
|
||||
// Current implementation might handle this as separate calls
|
||||
// Let's test that at least the first one is parsed
|
||||
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
|
||||
assert!(!tools.is_empty(), "Should parse at least one tool");
|
||||
assert_eq!(tools[0].function.name, "tool1");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_json_wrapper_with_array() {
|
||||
let parser = JsonParser::with_config(TokenConfig {
|
||||
start_tokens: vec!["<tools>".to_string()],
|
||||
end_tokens: vec!["</tools>".to_string()],
|
||||
separator: ", ".to_string(),
|
||||
});
|
||||
|
||||
let input = r#"<tools>[
|
||||
{"name": "func1", "arguments": {}},
|
||||
{"name": "func2", "arguments": {"param": "value"}}
|
||||
]</tools>"#;
|
||||
|
||||
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(tools.len(), 2);
|
||||
assert_eq!(tools[0].function.name, "func1");
|
||||
assert_eq!(tools[1].function.name, "func2");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_json_incomplete_wrapper_tokens() {
|
||||
let parser = JsonParser::with_config(TokenConfig {
|
||||
start_tokens: vec!["<tool>".to_string()],
|
||||
end_tokens: vec!["</tool>".to_string()],
|
||||
separator: ", ".to_string(),
|
||||
});
|
||||
|
||||
// Missing end token
|
||||
let input = r#"<tool>{"name": "test", "arguments": {}}"#;
|
||||
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(tools.len(), 0, "Should not parse without closing token");
|
||||
|
||||
// Missing start token
|
||||
let input = r#"{"name": "test", "arguments": {}}</tool>"#;
|
||||
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(tools.len(), 0, "Should not parse without opening token");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_json_empty_wrapper_tokens() {
|
||||
let parser = JsonParser::with_config(TokenConfig {
|
||||
start_tokens: vec![],
|
||||
end_tokens: vec![],
|
||||
separator: ", ".to_string(),
|
||||
});
|
||||
|
||||
let input = r#"{"name": "test", "arguments": {"key": "value"}}"#;
|
||||
|
||||
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(tools.len(), 1);
|
||||
assert_eq!(tools[0].function.name, "test");
|
||||
}
|
||||
Reference in New Issue
Block a user