[router][tool call] Separate JsonParser and LlamaParser (#11073)
This commit is contained in:
@@ -20,7 +20,7 @@ pub use errors::{ToolParserError, ToolParserResult};
|
||||
pub use registry::ParserRegistry;
|
||||
pub use state::{ParsePhase, ParseState};
|
||||
pub use traits::{PartialJsonParser, ToolParser};
|
||||
pub use types::{FunctionCall, PartialToolCall, StreamResult, TokenConfig, ToolCall};
|
||||
pub use types::{FunctionCall, PartialToolCall, StreamResult, ToolCall};
|
||||
|
||||
// Re-export parsers for convenience
|
||||
pub use parsers::{
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use async_trait::async_trait;
|
||||
use regex::Regex;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::tool_parser::{
|
||||
@@ -7,87 +6,28 @@ use crate::tool_parser::{
|
||||
partial_json::PartialJson,
|
||||
state::ParseState,
|
||||
traits::ToolParser,
|
||||
types::{FunctionCall, StreamResult, TokenConfig, ToolCall},
|
||||
types::{FunctionCall, StreamResult, ToolCall},
|
||||
};
|
||||
|
||||
/// JSON format parser for tool calls
|
||||
///
|
||||
/// Handles various JSON formats for function calling:
|
||||
/// Handles pure JSON formats for function calling:
|
||||
/// - Single tool call: {"name": "fn", "arguments": {...}}
|
||||
/// - Multiple tool calls: [{"name": "fn1", "arguments": {...}}, ...]
|
||||
/// - With parameters instead of arguments: {"name": "fn", "parameters": {...}}
|
||||
///
|
||||
/// Supports configurable token markers for different models
|
||||
pub struct JsonParser {
|
||||
/// Token configuration for parsing
|
||||
token_config: TokenConfig,
|
||||
/// Parser for handling incomplete JSON during streaming
|
||||
partial_json: PartialJson,
|
||||
/// Regex patterns for extracting content between tokens
|
||||
extractors: Vec<Regex>,
|
||||
}
|
||||
|
||||
impl JsonParser {
|
||||
/// Create a new JSON parser with default configuration
|
||||
/// Create a new JSON parser
|
||||
pub fn new() -> Self {
|
||||
Self::with_config(TokenConfig {
|
||||
start_tokens: vec![],
|
||||
end_tokens: vec![],
|
||||
separator: ", ".to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Create a parser with custom token configuration
|
||||
pub fn with_config(config: TokenConfig) -> Self {
|
||||
// Build extraction patterns for each token pair
|
||||
let extractors: Vec<Regex> = config
|
||||
.iter_pairs()
|
||||
.filter_map(|(start, end)| {
|
||||
if !start.is_empty() && !end.is_empty() {
|
||||
// Use (?s) flag to enable DOTALL mode so . matches newlines
|
||||
let pattern =
|
||||
format!(r"(?s){}(.*?){}", regex::escape(start), regex::escape(end));
|
||||
Regex::new(&pattern).ok()
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
Self {
|
||||
token_config: config,
|
||||
partial_json: PartialJson::default(),
|
||||
extractors,
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract JSON content from text, handling wrapper tokens if configured
|
||||
fn extract_json_content<'a>(&self, text: &'a str) -> &'a str {
|
||||
let mut content = text;
|
||||
|
||||
// Try each extractor pattern (for tokens with both start and end)
|
||||
for extractor in &self.extractors {
|
||||
if let Some(captures) = extractor.captures(content) {
|
||||
if let Some(matched) = captures.get(1) {
|
||||
return matched.as_str().trim();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle special case where there's a start token but no end token
|
||||
for (start, end) in self.token_config.iter_pairs() {
|
||||
if !start.is_empty() && end.is_empty() {
|
||||
// Find the start token and extract everything after it
|
||||
if let Some(pos) = content.find(start) {
|
||||
content = &content[pos + start.len()..];
|
||||
return content.trim();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
content.trim()
|
||||
}
|
||||
|
||||
/// Try to extract a first valid JSON object or array from text that may contain other content
|
||||
/// Returns (json_string, normal_text) where normal_text is text before and after the JSON
|
||||
fn extract_json_from_text(&self, text: &str) -> Option<(String, String)> {
|
||||
@@ -218,38 +158,20 @@ impl JsonParser {
|
||||
Ok(tools)
|
||||
}
|
||||
|
||||
/// Check if text contains potential tool call markers
|
||||
/// Check if text contains JSON tool call markers (complete markers)
|
||||
fn has_tool_markers(&self, text: &str) -> bool {
|
||||
// If no start tokens configured, check for JSON structure
|
||||
if self.token_config.start_tokens.is_empty() {
|
||||
// For JSON, we just need to see the start of an object or array
|
||||
return text.contains('{') || text.contains('[');
|
||||
}
|
||||
|
||||
// Check for any start token
|
||||
let has_start_token = self
|
||||
.token_config
|
||||
.start_tokens
|
||||
.iter()
|
||||
.any(|token| text.contains(token));
|
||||
|
||||
// Also check if we have what looks like JSON even without start token
|
||||
// This handles cases where we've already processed the start token
|
||||
// and are working on subsequent tools
|
||||
has_start_token || (text.trim_start().starts_with('{') && text.contains(r#""name""#))
|
||||
(text.contains('{') || text.contains('[')) && text.contains("name")
|
||||
}
|
||||
|
||||
/// Check if text might contain a partial start token (for streaming)
|
||||
fn has_partial_start_token(&self, text: &str) -> bool {
|
||||
if self.token_config.start_tokens.is_empty() {
|
||||
return false;
|
||||
}
|
||||
/// Check if buffer could be building toward a tool call pattern
|
||||
fn has_partial_start_token(&self, buffer: &str) -> bool {
|
||||
// Check if buffer ends with a partial match of tool call patterns
|
||||
let patterns = [r#"{"name""#, r#"[{"name""#];
|
||||
|
||||
// Check if the end of the buffer could be the start of any start token
|
||||
for start_token in &self.token_config.start_tokens {
|
||||
for i in 1..start_token.len() {
|
||||
let token_prefix = &start_token[..i];
|
||||
if text.ends_with(token_prefix) {
|
||||
for pattern in &patterns {
|
||||
// Check if buffer ends with any partial of this pattern
|
||||
for i in 1..=buffer.len().min(pattern.len()) {
|
||||
if pattern.starts_with(&buffer[buffer.len() - i..]) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -267,134 +189,20 @@ impl Default for JsonParser {
|
||||
#[async_trait]
|
||||
impl ToolParser for JsonParser {
|
||||
async fn parse_complete(&self, text: &str) -> ToolParserResult<(String, Vec<ToolCall>)> {
|
||||
// Check if we have multiple start tokens (e.g., multiple <|python_tag|> markers)
|
||||
if !self.token_config.start_tokens.is_empty() {
|
||||
let start_token = &self.token_config.start_tokens[0];
|
||||
if !start_token.is_empty() && text.matches(start_token).count() > 1 {
|
||||
// We have multiple occurrences of the start token
|
||||
let mut all_tools = Vec::new();
|
||||
let mut all_normal_text = String::new();
|
||||
let mut remaining = text;
|
||||
// Always use extract_json_from_text to handle both pure JSON and mixed content
|
||||
if let Some((extracted_json, normal_text)) = self.extract_json_from_text(text) {
|
||||
let parsed = serde_json::from_str::<Value>(&extracted_json)
|
||||
.map_err(|e| ToolParserError::ParsingFailed(e.to_string()))
|
||||
.and_then(|v| self.parse_json_value(&v));
|
||||
|
||||
while let Some(start_pos) = remaining.find(start_token.as_str()) {
|
||||
// Add text before this start token to normal text
|
||||
all_normal_text.push_str(&remaining[..start_pos]);
|
||||
|
||||
// Extract content after this start token
|
||||
let after_token = &remaining[start_pos + start_token.len()..];
|
||||
|
||||
// Find where this JSON ends (look for the next start token or end of string)
|
||||
let end_pos = if let Some(next_start) = after_token.find(start_token.as_str()) {
|
||||
next_start
|
||||
} else {
|
||||
after_token.len()
|
||||
};
|
||||
|
||||
let json_content = &after_token[..end_pos];
|
||||
|
||||
// Try to extract and parse JSON from this segment
|
||||
if let Some((extracted, segment_normal_text)) =
|
||||
self.extract_json_from_text(json_content)
|
||||
{
|
||||
if let Ok(value) = serde_json::from_str::<Value>(&extracted) {
|
||||
if let Ok(tools) = self.parse_json_value(&value) {
|
||||
all_tools.extend(tools);
|
||||
}
|
||||
}
|
||||
// Add the normal text from this segment
|
||||
all_normal_text.push_str(&segment_normal_text);
|
||||
} else {
|
||||
// If no JSON found, add the entire content as normal text
|
||||
all_normal_text.push_str(json_content);
|
||||
}
|
||||
|
||||
// Move to the next segment
|
||||
remaining = &remaining[start_pos + start_token.len() + end_pos..];
|
||||
if remaining.is_empty() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Add any remaining text
|
||||
all_normal_text.push_str(remaining);
|
||||
|
||||
return Ok((all_normal_text, all_tools));
|
||||
match parsed {
|
||||
Ok(tools) => return Ok((normal_text, tools)),
|
||||
Err(e) => tracing::warn!("parse_complete failed: {:?}", e),
|
||||
}
|
||||
}
|
||||
|
||||
// Extract JSON content from wrapper tokens if present
|
||||
let json_content = self.extract_json_content(text);
|
||||
|
||||
// Try to parse as JSON first
|
||||
match serde_json::from_str::<Value>(json_content) {
|
||||
Ok(value) => {
|
||||
let tools = self.parse_json_value(&value)?;
|
||||
Ok((String::new(), tools))
|
||||
}
|
||||
Err(_) => {
|
||||
// If parse failed, check if we have multiple JSON objects separated by the configured separator
|
||||
// Only do this if we can reasonably expect multiple complete JSON objects
|
||||
// (i.e., text starts and ends with JSON-like structure)
|
||||
if !self.token_config.separator.is_empty()
|
||||
&& json_content.contains(&self.token_config.separator)
|
||||
&& json_content.trim().starts_with('{')
|
||||
&& json_content.trim().ends_with('}')
|
||||
{
|
||||
let mut all_tools = Vec::new();
|
||||
|
||||
// Split by separator and try to parse each part
|
||||
let parts: Vec<&str> =
|
||||
json_content.split(&self.token_config.separator).collect();
|
||||
let mut normal_parts = Vec::new();
|
||||
|
||||
for part in parts {
|
||||
let trimmed = part.trim();
|
||||
if trimmed.is_empty() {
|
||||
normal_parts.push(trimmed.to_string());
|
||||
continue;
|
||||
}
|
||||
|
||||
// Try to parse this part as JSON
|
||||
if let Ok(value) = serde_json::from_str::<Value>(trimmed) {
|
||||
if let Ok(tools) = self.parse_json_value(&value) {
|
||||
all_tools.extend(tools);
|
||||
}
|
||||
normal_parts.push(trimmed.to_string());
|
||||
} else if let Some((extracted, part_normal_text)) =
|
||||
self.extract_json_from_text(trimmed)
|
||||
{
|
||||
// Try extracting JSON from this part
|
||||
if let Ok(value) = serde_json::from_str::<Value>(&extracted) {
|
||||
if let Ok(tools) = self.parse_json_value(&value) {
|
||||
all_tools.extend(tools);
|
||||
}
|
||||
}
|
||||
normal_parts.push(part_normal_text);
|
||||
} else {
|
||||
normal_parts.push(trimmed.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
// Rejoin with the original separator to preserve it
|
||||
let all_normal_text = normal_parts.join(&self.token_config.separator);
|
||||
|
||||
return Ok((all_normal_text, all_tools));
|
||||
}
|
||||
|
||||
// If no wrapper tokens configured and parse failed, try to extract JSON from mixed text
|
||||
if self.token_config.start_tokens.is_empty() {
|
||||
if let Some((extracted_json, normal_text)) = self.extract_json_from_text(text) {
|
||||
if let Ok(value) = serde_json::from_str::<Value>(&extracted_json) {
|
||||
let tools = self.parse_json_value(&value)?;
|
||||
return Ok((normal_text, tools));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// No valid JSON found, return original text as normal text
|
||||
Ok((text.to_string(), vec![]))
|
||||
}
|
||||
}
|
||||
// No valid JSON found, return original text as normal text
|
||||
Ok((text.to_string(), vec![]))
|
||||
}
|
||||
|
||||
async fn parse_incremental(
|
||||
@@ -403,132 +211,20 @@ impl ToolParser for JsonParser {
|
||||
state: &mut ParseState,
|
||||
) -> ToolParserResult<StreamResult> {
|
||||
state.buffer.push_str(chunk);
|
||||
let trimmed = state.buffer.trim();
|
||||
|
||||
// Check if we have potential tool calls
|
||||
if !self.has_tool_markers(&state.buffer) {
|
||||
if self.has_partial_start_token(&state.buffer) {
|
||||
// We might be in the middle of receiving a start token, wait for more data
|
||||
return Ok(StreamResult::Incomplete);
|
||||
}
|
||||
|
||||
// No tool markers and no partial tokens - return all buffered content as normal text
|
||||
// If no tool markers and not a partial token, return as normal text │ │
|
||||
if !self.has_tool_markers(trimmed) && !self.has_partial_start_token(trimmed) {
|
||||
let normal_text = std::mem::take(&mut state.buffer);
|
||||
return Ok(StreamResult::NormalText(normal_text));
|
||||
}
|
||||
|
||||
// Check for text before tool markers and extract it as normal text
|
||||
if !self.token_config.start_tokens.is_empty() {
|
||||
let start_token = &self.token_config.start_tokens[0];
|
||||
if !start_token.is_empty() {
|
||||
if let Some(marker_pos) = state.buffer.find(start_token) {
|
||||
if marker_pos > 0 {
|
||||
// We have text before the tool marker - extract it as normal text
|
||||
let normal_text: String = state.buffer.drain(..marker_pos).collect();
|
||||
return Ok(StreamResult::NormalText(normal_text));
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// For JSON without start tokens, look for the start of JSON structure
|
||||
// Find whichever comes first: '{' or '['
|
||||
let brace_pos = state.buffer.find('{');
|
||||
let bracket_pos = state.buffer.find('[');
|
||||
let json_pos = brace_pos.iter().chain(bracket_pos.iter()).min().copied();
|
||||
|
||||
if let Some(pos) = json_pos {
|
||||
if pos > 0 {
|
||||
// We have text before JSON structure - extract it as normal text
|
||||
let normal_text: String = state.buffer.drain(..pos).collect();
|
||||
return Ok(StreamResult::NormalText(normal_text));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract JSON content first to check for separators
|
||||
let extracted_json = self.extract_json_content(&state.buffer);
|
||||
|
||||
// Handle multiple JSON objects with separators
|
||||
// Check if we have a separator and potentially multiple JSON objects
|
||||
let separator = &self.token_config.separator;
|
||||
if !separator.is_empty() && extracted_json.contains(separator.as_str()) {
|
||||
// Try to find a complete JSON object before the separator
|
||||
if let Some(separator_pos) = extracted_json.find(separator.as_str()) {
|
||||
// Get JSON before separator
|
||||
let before_separator = &extracted_json[..separator_pos];
|
||||
|
||||
// Try to parse the JSON before the separator
|
||||
match serde_json::from_str::<Value>(before_separator) {
|
||||
Ok(value) => {
|
||||
// Parse tool calls from this JSON
|
||||
let tools = self.parse_json_value(&value)?;
|
||||
if !tools.is_empty() {
|
||||
// We need to figure out how much to remove from the original buffer
|
||||
// Find where the separator is in the original buffer and remove up to and including it
|
||||
if let Some(sep_in_original) = state.buffer.find(separator.as_str()) {
|
||||
// Remove processed content up to and including separator
|
||||
state.buffer.drain(..=sep_in_original + separator.len() - 1);
|
||||
}
|
||||
|
||||
// Return the first tool as complete
|
||||
if let Some(tool) = tools.into_iter().next() {
|
||||
return Ok(StreamResult::ToolComplete(tool));
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
// Failed to parse, continue to try other methods
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle multiple start tokens (e.g., multiple <|python_tag|> markers)
|
||||
if !self.token_config.start_tokens.is_empty() {
|
||||
let start_token = &self.token_config.start_tokens[0];
|
||||
if !start_token.is_empty() {
|
||||
// Find all occurrences of start token
|
||||
let occurrences: Vec<_> =
|
||||
state.buffer.match_indices(start_token.as_str()).collect();
|
||||
if occurrences.len() > 1 {
|
||||
// We have multiple start tokens, try to process the first complete one
|
||||
let first_pos = occurrences[0].0;
|
||||
let second_pos = occurrences[1].0;
|
||||
|
||||
// Extract content between first and second start token
|
||||
let first_json_section = &state.buffer[first_pos..second_pos];
|
||||
let json_content = self.extract_json_content(first_json_section);
|
||||
|
||||
// Try to parse this as complete JSON
|
||||
if let Ok(value) = serde_json::from_str::<Value>(json_content) {
|
||||
// Parse tool calls from this JSON
|
||||
let tools = self.parse_json_value(&value)?;
|
||||
if !tools.is_empty() {
|
||||
// Remove the processed section from buffer
|
||||
let remaining = state.buffer[second_pos..].to_string();
|
||||
state.buffer = remaining;
|
||||
|
||||
// Return the first tool as complete
|
||||
if let Some(tool) = tools.into_iter().next() {
|
||||
return Ok(StreamResult::ToolComplete(tool));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Regular single JSON parsing
|
||||
// Extract JSON content
|
||||
let json_content = self.extract_json_content(&state.buffer);
|
||||
|
||||
// Try to parse with partial JSON parser
|
||||
match self.partial_json.parse_value(json_content) {
|
||||
match self.partial_json.parse_value(trimmed) {
|
||||
Ok((value, consumed)) => {
|
||||
// Check if we have a complete JSON structure
|
||||
if consumed == json_content.len() {
|
||||
// Check if this is truly complete or just has null from incomplete parsing
|
||||
// We need to ensure the JSON actually ends properly (not cut off mid-key)
|
||||
let trimmed = json_content.trim();
|
||||
if consumed == trimmed.len() {
|
||||
// Check if this is truly complete
|
||||
let looks_complete = trimmed.ends_with('}') || trimmed.ends_with(']');
|
||||
|
||||
if looks_complete {
|
||||
@@ -583,35 +279,6 @@ impl ToolParser for JsonParser {
|
||||
}
|
||||
|
||||
fn detect_format(&self, text: &str) -> bool {
|
||||
// Check if text contains JSON-like structure
|
||||
if self.has_tool_markers(text) {
|
||||
// Try to extract and parse
|
||||
let json_content = self.extract_json_content(text);
|
||||
|
||||
// Check if it looks like valid JSON for tool calls
|
||||
if let Ok(value) = serde_json::from_str::<Value>(json_content) {
|
||||
match value {
|
||||
Value::Object(ref obj) => {
|
||||
// Check for tool call structure
|
||||
obj.contains_key("name") || obj.contains_key("function")
|
||||
}
|
||||
Value::Array(ref arr) => {
|
||||
// Check if array contains tool-like objects
|
||||
arr.iter().any(|v| {
|
||||
if let Some(obj) = v.as_object() {
|
||||
obj.contains_key("name") || obj.contains_key("function")
|
||||
} else {
|
||||
false
|
||||
}
|
||||
})
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
self.has_tool_markers(text)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
use async_trait::async_trait;
|
||||
use serde_json::Value;
|
||||
use uuid;
|
||||
|
||||
use super::json_parser::JsonParser;
|
||||
use crate::tool_parser::{
|
||||
errors::ToolParserResult,
|
||||
errors::{ToolParserError, ToolParserResult},
|
||||
partial_json::PartialJson,
|
||||
state::ParseState,
|
||||
traits::ToolParser,
|
||||
types::{StreamResult, TokenConfig, ToolCall},
|
||||
types::{FunctionCall, StreamResult, ToolCall},
|
||||
};
|
||||
|
||||
/// Llama 3.2 format parser for tool calls
|
||||
@@ -15,22 +17,124 @@ use crate::tool_parser::{
|
||||
///
|
||||
/// Also supports plain JSON without the python_tag prefix
|
||||
pub struct LlamaParser {
|
||||
/// Underlying JSON parser with Llama-specific configuration
|
||||
json_parser: JsonParser,
|
||||
/// Parser for handling incomplete JSON during streaming
|
||||
partial_json: PartialJson,
|
||||
}
|
||||
|
||||
impl LlamaParser {
|
||||
/// Create a new Llama parser
|
||||
pub fn new() -> Self {
|
||||
// Configure JSON parser with Llama's python_tag token
|
||||
// Note: No end token for python_tag format
|
||||
let json_parser = JsonParser::with_config(TokenConfig {
|
||||
start_tokens: vec!["<|python_tag|>".to_string()],
|
||||
end_tokens: vec!["".to_string()], // Empty end token
|
||||
separator: ";".to_string(), // Llama uses semicolon for multiple calls (though not well supported)
|
||||
});
|
||||
Self {
|
||||
partial_json: PartialJson::default(),
|
||||
}
|
||||
}
|
||||
|
||||
Self { json_parser }
|
||||
/// Extract content after python_tag token
|
||||
fn extract_content_after_python_tag(&self, text: &str) -> Option<(String, String)> {
|
||||
const PYTHON_TAG: &str = "<|python_tag|>";
|
||||
|
||||
if let Some(tag_pos) = text.find(PYTHON_TAG) {
|
||||
let normal_text = text[..tag_pos].to_string();
|
||||
let json_content = text[tag_pos + PYTHON_TAG.len()..].to_string();
|
||||
Some((normal_text, json_content))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a single JSON object into a ToolCall (Llama format: name + parameters)
|
||||
fn parse_single_object(&self, obj: &Value) -> ToolParserResult<Option<ToolCall>> {
|
||||
// Llama format only: {"name": "function_name", "parameters": {...}}
|
||||
let name = obj.get("name").and_then(|v| v.as_str());
|
||||
|
||||
if let Some(name) = name {
|
||||
// Llama uses "parameters" key
|
||||
let empty_obj = Value::Object(serde_json::Map::new());
|
||||
let parameters = obj.get("parameters").unwrap_or(&empty_obj);
|
||||
|
||||
// Convert parameters to JSON string
|
||||
let arguments = serde_json::to_string(parameters)
|
||||
.map_err(|e| ToolParserError::ParsingFailed(e.to_string()))?;
|
||||
|
||||
// Generate a unique ID for Llama calls
|
||||
let id = obj
|
||||
.get("id")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(String::from)
|
||||
.unwrap_or_else(|| format!("llama_call_{}", uuid::Uuid::new_v4()));
|
||||
|
||||
Ok(Some(ToolCall {
|
||||
id,
|
||||
r#type: "function".to_string(),
|
||||
function: FunctionCall {
|
||||
name: name.to_string(),
|
||||
arguments,
|
||||
},
|
||||
}))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse JSON value(s) into tool calls
|
||||
fn parse_json_value(&self, value: &Value) -> ToolParserResult<Vec<ToolCall>> {
|
||||
let mut tools = Vec::new();
|
||||
|
||||
match value {
|
||||
Value::Array(arr) => {
|
||||
// Parse each element in the array
|
||||
for item in arr {
|
||||
if let Some(tool) = self.parse_single_object(item)? {
|
||||
tools.push(tool);
|
||||
}
|
||||
}
|
||||
}
|
||||
Value::Object(_) => {
|
||||
// Single tool call
|
||||
if let Some(tool) = self.parse_single_object(value)? {
|
||||
tools.push(tool);
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
// Not a valid tool call format
|
||||
return Ok(vec![]);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(tools)
|
||||
}
|
||||
|
||||
/// Check if text contains potential tool call markers
|
||||
fn has_python_tag(&self, text: &str) -> bool {
|
||||
text.contains("<|python_tag|>")
|
||||
}
|
||||
|
||||
/// Parse semicolon-separated JSON objects
|
||||
fn parse_semicolon_separated(&self, content: &str) -> ToolParserResult<Vec<ToolCall>> {
|
||||
let mut all_tools = Vec::new();
|
||||
|
||||
// Split by semicolon and parse each JSON object
|
||||
for part in content.split(';') {
|
||||
let trimmed = part.trim();
|
||||
if trimmed.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Try to parse this part as a single JSON object
|
||||
match serde_json::from_str::<Value>(trimmed) {
|
||||
Ok(value) => {
|
||||
if let Some(tool) = self.parse_single_object(&value)? {
|
||||
all_tools.push(tool);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
// Skip invalid JSON parts in semicolon-separated list
|
||||
tracing::warn!("Failed to parse tool call: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(all_tools)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -43,31 +147,41 @@ impl Default for LlamaParser {
|
||||
#[async_trait]
|
||||
impl ToolParser for LlamaParser {
|
||||
async fn parse_complete(&self, text: &str) -> ToolParserResult<(String, Vec<ToolCall>)> {
|
||||
// First try with the configured python_tag parser
|
||||
let (_json_normal_text, tools) = self.json_parser.parse_complete(text).await?;
|
||||
|
||||
if !tools.is_empty() {
|
||||
// Extract normal text before the python tag
|
||||
// JsonParser doesn't preserve normal text for single start tokens, so we do it manually
|
||||
let normal_text = if let Some(tag_pos) = text.find("<|python_tag|>") {
|
||||
text[..tag_pos].to_string()
|
||||
// Extract normal text and JSON content
|
||||
let (normal_text, json_content) =
|
||||
if let Some((normal, json)) = self.extract_content_after_python_tag(text) {
|
||||
(normal, json)
|
||||
} else if text.trim_start().starts_with('{') {
|
||||
(String::new(), text.to_string())
|
||||
} else {
|
||||
String::new()
|
||||
// No JSON structure found
|
||||
return Ok((text.to_string(), vec![]));
|
||||
};
|
||||
return Ok((normal_text, tools));
|
||||
|
||||
// Parse the JSON content (may contain semicolon-separated objects)
|
||||
let tools = if json_content.contains(';') {
|
||||
self.parse_semicolon_separated(&json_content)?
|
||||
} else {
|
||||
// Try single JSON object
|
||||
let parsed = serde_json::from_str::<Value>(json_content.trim())
|
||||
.map_err(|e| ToolParserError::ParsingFailed(e.to_string()))
|
||||
.and_then(|v| {
|
||||
self.parse_single_object(&v)
|
||||
.map(|opt| opt.map_or_else(Vec::new, |tool| vec![tool]))
|
||||
});
|
||||
|
||||
parsed.unwrap_or_else(|e| {
|
||||
tracing::warn!("Failed to parse tool call: {:?}", e);
|
||||
vec![]
|
||||
})
|
||||
};
|
||||
|
||||
// If we couldn't parse any tools, return the original text
|
||||
if tools.is_empty() {
|
||||
return Ok((text.to_string(), vec![]));
|
||||
}
|
||||
|
||||
// If no results and text starts with '{', try plain JSON
|
||||
if text.trim_start().starts_with('{') {
|
||||
// Create a temporary plain JSON parser
|
||||
let plain_parser = JsonParser::new();
|
||||
let (_json_normal_text, tools) = plain_parser.parse_complete(text).await?;
|
||||
// For plain JSON, don't extract normal text (consistent with JsonParser behavior)
|
||||
return Ok((String::new(), tools));
|
||||
}
|
||||
|
||||
// No tool calls found, return original text as normal text
|
||||
Ok((text.to_string(), vec![]))
|
||||
Ok((normal_text, tools))
|
||||
}
|
||||
|
||||
async fn parse_incremental(
|
||||
@@ -75,29 +189,138 @@ impl ToolParser for LlamaParser {
|
||||
chunk: &str,
|
||||
state: &mut ParseState,
|
||||
) -> ToolParserResult<StreamResult> {
|
||||
// First, try with the configured json_parser (which handles python_tag)
|
||||
let result = self.json_parser.parse_incremental(chunk, state).await?;
|
||||
state.buffer.push_str(chunk);
|
||||
|
||||
// If we get Incomplete and no python_tag in buffer, might be plain JSON
|
||||
if matches!(result, StreamResult::Incomplete) {
|
||||
let trimmed = state.buffer.trim_start();
|
||||
if trimmed.starts_with('{') && !state.buffer.contains("<|python_tag|>") {
|
||||
// Likely plain JSON, try with a plain parser
|
||||
// Note: We need to be careful not to double-add the chunk
|
||||
let plain_parser = JsonParser::new();
|
||||
// The chunk was already added to state.buffer by json_parser above
|
||||
// So we call with empty string to just process what's in the buffer
|
||||
return plain_parser.parse_incremental("", state).await;
|
||||
// In streaming mode, be more lenient - check for potential JSON start
|
||||
let has_potential_json = state.buffer.contains('{');
|
||||
let has_tag = self.has_python_tag(&state.buffer);
|
||||
|
||||
// If we have neither python_tag nor potential JSON structure, return as normal text
|
||||
if !has_tag && !has_potential_json {
|
||||
// No relevant markers detected - return all buffered content as normal text
|
||||
let normal_text = std::mem::take(&mut state.buffer);
|
||||
return Ok(StreamResult::NormalText(normal_text));
|
||||
}
|
||||
|
||||
// If we only have '{' without more content, wait for more data
|
||||
let trimmed = state.buffer.trim();
|
||||
if (trimmed == "{") && !has_tag {
|
||||
return Ok(StreamResult::Incomplete);
|
||||
}
|
||||
|
||||
// Check for text before python_tag and extract it as normal text
|
||||
if let Some(tag_pos) = state.buffer.find("<|python_tag|>") {
|
||||
if tag_pos > 0 {
|
||||
// We have text before the python_tag - extract it as normal text
|
||||
let normal_text: String = state.buffer.drain(..tag_pos).collect();
|
||||
return Ok(StreamResult::NormalText(normal_text));
|
||||
}
|
||||
} else {
|
||||
// For JSON without python_tag, look for the start of JSON structure
|
||||
let brace_pos = state.buffer.find('{');
|
||||
let bracket_pos = state.buffer.find('[');
|
||||
let json_pos = brace_pos.iter().chain(bracket_pos.iter()).min().copied();
|
||||
|
||||
if let Some(pos) = json_pos {
|
||||
if pos > 0 {
|
||||
// We have text before JSON structure - extract it as normal text
|
||||
let normal_text: String = state.buffer.drain(..pos).collect();
|
||||
return Ok(StreamResult::NormalText(normal_text));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
// Extract JSON content based on whether we have python_tag
|
||||
let (json_content, content_start_pos) = if self.has_python_tag(&state.buffer) {
|
||||
// Extract content after python_tag
|
||||
if let Some(tag_pos) = state.buffer.find("<|python_tag|>") {
|
||||
let start = tag_pos + "<|python_tag|>".len();
|
||||
(&state.buffer[start..], start)
|
||||
} else {
|
||||
(&state.buffer[..], 0)
|
||||
}
|
||||
} else {
|
||||
// Find where the actual content starts after trimming
|
||||
let trimmed = state.buffer.trim_start();
|
||||
let trim_offset = state.buffer.len() - trimmed.len();
|
||||
(trimmed.trim_end(), trim_offset)
|
||||
};
|
||||
|
||||
// Check if we have a semicolon separator (multiple tools)
|
||||
if let Some(semicolon_pos) = json_content.find(';') {
|
||||
// We have multiple tools - try to parse the first one
|
||||
let first_json = &json_content[..semicolon_pos];
|
||||
|
||||
if let Ok(value) = serde_json::from_str::<Value>(first_json.trim()) {
|
||||
if let Some(tool) = self.parse_single_object(&value)? {
|
||||
// Remove the parsed JSON and semicolon from the buffer
|
||||
let end_pos = content_start_pos + semicolon_pos + 1; // +1 to include the semicolon
|
||||
state.buffer.drain(content_start_pos..end_pos);
|
||||
|
||||
return Ok(StreamResult::ToolComplete(tool));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Try to parse with partial JSON parser
|
||||
match self.partial_json.parse_value(json_content) {
|
||||
Ok((value, consumed)) => {
|
||||
// Check if we have a complete JSON structure
|
||||
if consumed == json_content.len() {
|
||||
// Check if this is truly complete
|
||||
let looks_complete = json_content.ends_with('}') || json_content.ends_with(']');
|
||||
|
||||
if looks_complete {
|
||||
// Complete JSON, parse tool calls
|
||||
let tools = self.parse_json_value(&value)?;
|
||||
if !tools.is_empty() {
|
||||
// Clear buffer since we consumed everything
|
||||
state.buffer.clear();
|
||||
|
||||
// Return the first tool as complete
|
||||
if let Some(tool) = tools.into_iter().next() {
|
||||
return Ok(StreamResult::ToolComplete(tool));
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Partial JSON, try to extract tool name for streaming
|
||||
if let Some(name) = value.get("name").and_then(|v| v.as_str()) {
|
||||
// Return tool name once we see it
|
||||
if !state.in_string {
|
||||
state.in_string = true; // Use as a flag for "name sent"
|
||||
return Ok(StreamResult::ToolName {
|
||||
index: 0,
|
||||
name: name.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
// Check for complete arguments
|
||||
if let Some(args) =
|
||||
value.get("arguments").or_else(|| value.get("parameters"))
|
||||
{
|
||||
if let Ok(args_str) = serde_json::to_string(args) {
|
||||
return Ok(StreamResult::ToolArguments {
|
||||
index: 0,
|
||||
arguments: args_str,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
// Failed to parse even as partial JSON
|
||||
// Continue waiting for more data
|
||||
}
|
||||
}
|
||||
|
||||
Ok(StreamResult::Incomplete)
|
||||
}
|
||||
|
||||
fn detect_format(&self, text: &str) -> bool {
|
||||
// Llama format if contains python_tag or starts with JSON object
|
||||
text.contains("<|python_tag|>")
|
||||
|| (text.trim_start().starts_with('{')
|
||||
&& (text.contains(r#""name""#) || text.contains(r#""function""#)))
|
||||
|| (text.trim_start().starts_with('{') && text.contains(r#""name""#))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -280,33 +280,6 @@ impl ToolParser for MistralParser {
|
||||
}
|
||||
|
||||
fn detect_format(&self, text: &str) -> bool {
|
||||
// Check if text contains Mistral-specific markers
|
||||
if self.has_tool_markers(text) {
|
||||
// Try to extract and validate the array
|
||||
if let Some(json_array) = self.extract_json_array(text) {
|
||||
// Check if it's valid JSON
|
||||
if let Ok(value) = serde_json::from_str::<Value>(json_array) {
|
||||
// Check if it contains tool-like structures
|
||||
match value {
|
||||
Value::Array(ref arr) => arr.iter().any(|v| {
|
||||
v.as_object().is_some_and(|o| {
|
||||
o.contains_key("name") && o.contains_key("arguments")
|
||||
})
|
||||
}),
|
||||
Value::Object(ref obj) => {
|
||||
obj.contains_key("name") && obj.contains_key("arguments")
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
} else {
|
||||
// Has markers but no complete array - might be streaming
|
||||
true
|
||||
}
|
||||
} else {
|
||||
false
|
||||
}
|
||||
self.has_tool_markers(text)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -130,32 +130,7 @@ impl ToolParser for PythonicParser {
|
||||
return true;
|
||||
}
|
||||
|
||||
let trimmed = cleaned.trim();
|
||||
let Some(open_idx) = trimmed.find('[') else {
|
||||
return false;
|
||||
};
|
||||
|
||||
let after_bracket = trimmed[open_idx + 1..].trim_start();
|
||||
let mut chars = after_bracket.char_indices();
|
||||
let Some((_, first_char)) = chars.next() else {
|
||||
return false;
|
||||
};
|
||||
|
||||
if !(first_char.is_ascii_alphabetic() || first_char == '_') {
|
||||
return false;
|
||||
}
|
||||
|
||||
let mut ident_len = first_char.len_utf8();
|
||||
for (idx, ch) in chars {
|
||||
if ch.is_alphanumeric() || ch == '_' {
|
||||
ident_len = idx + ch.len_utf8();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
let remaining = after_bracket[ident_len..].trim_start();
|
||||
remaining.starts_with('(')
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -39,14 +39,6 @@ impl QwenParser {
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract all tool call blocks from text
|
||||
fn extract_tool_calls<'a>(&self, text: &'a str) -> Vec<&'a str> {
|
||||
self.extractor
|
||||
.captures_iter(text)
|
||||
.filter_map(|cap| cap.get(1).map(|m| m.as_str()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Parse a single JSON object into a ToolCall
|
||||
fn parse_single_object(&self, obj: &Value, index: usize) -> ToolParserResult<Option<ToolCall>> {
|
||||
let name = obj.get("name").and_then(|v| v.as_str());
|
||||
@@ -142,17 +134,15 @@ impl ToolParser for QwenParser {
|
||||
let mut tools = Vec::new();
|
||||
for (index, captures) in self.extractor.captures_iter(text).enumerate() {
|
||||
if let Some(json_str) = captures.get(1) {
|
||||
match serde_json::from_str::<Value>(json_str.as_str().trim()) {
|
||||
Ok(value) => match self.parse_single_object(&value, index) {
|
||||
Ok(Some(tool)) => tools.push(tool),
|
||||
Ok(None) => continue,
|
||||
Err(e) => {
|
||||
tracing::warn!("Failed to parse tool call: {}", e);
|
||||
continue;
|
||||
}
|
||||
},
|
||||
let parsed = serde_json::from_str::<Value>(json_str.as_str().trim())
|
||||
.map_err(|e| ToolParserError::ParsingFailed(e.to_string()))
|
||||
.and_then(|v| self.parse_single_object(&v, index));
|
||||
|
||||
match parsed {
|
||||
Ok(Some(tool)) => tools.push(tool),
|
||||
Ok(None) => continue,
|
||||
Err(e) => {
|
||||
tracing::warn!("Failed to parse JSON in tool call: {}", e);
|
||||
tracing::warn!("Failed to parse tool call {}: {:?}", index, e);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@@ -268,26 +258,6 @@ impl ToolParser for QwenParser {
|
||||
}
|
||||
|
||||
fn detect_format(&self, text: &str) -> bool {
|
||||
// Check if text contains Qwen-specific markers. If not, it's not this format.
|
||||
if !self.has_tool_markers(text) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Try to extract tool calls to see if we have a complete, valid one.
|
||||
let tool_blocks = self.extract_tool_calls(text);
|
||||
for json_str in &tool_blocks {
|
||||
if let Ok(value) = serde_json::from_str::<Value>(json_str.trim()) {
|
||||
if let Some(obj) = value.as_object() {
|
||||
if obj.contains_key("name") && obj.contains_key("arguments") {
|
||||
// Found a valid, complete tool call.
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we have the marker but no valid complete tool call,
|
||||
// it could be a partial stream. We should detect this as the format.
|
||||
true
|
||||
self.has_tool_markers(text)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,7 +4,6 @@ use crate::tool_parser::partial_json::{
|
||||
compute_diff, find_common_prefix, is_complete_json, PartialJson,
|
||||
};
|
||||
use crate::tool_parser::traits::ToolParser;
|
||||
use crate::tool_parser::types::TokenConfig;
|
||||
|
||||
#[test]
|
||||
fn test_parse_state_new() {
|
||||
@@ -42,20 +41,6 @@ fn test_parse_state_process_char() {
|
||||
assert!(state.in_string); // Still in string because quote was escaped
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_token_config() {
|
||||
let config = TokenConfig {
|
||||
start_tokens: vec!["<start>".to_string(), "[".to_string()],
|
||||
end_tokens: vec!["</end>".to_string(), "]".to_string()],
|
||||
separator: ", ".to_string(),
|
||||
};
|
||||
|
||||
let pairs: Vec<_> = config.iter_pairs().collect();
|
||||
assert_eq!(pairs.len(), 2);
|
||||
assert_eq!(pairs[0], ("<start>", "</end>"));
|
||||
assert_eq!(pairs[1], ("[", "]"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parser_registry() {
|
||||
let registry = ParserRegistry::new();
|
||||
@@ -280,46 +265,7 @@ async fn test_json_parser_with_parameters() {
|
||||
assert!(tools[0].function.arguments.contains("add"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_json_parser_with_tokens() {
|
||||
let parser = JsonParser::with_config(TokenConfig {
|
||||
start_tokens: vec!["[TOOL_CALLS] [".to_string()],
|
||||
end_tokens: vec!["]".to_string()],
|
||||
separator: ", ".to_string(),
|
||||
});
|
||||
|
||||
let input = r#"[TOOL_CALLS] [{"name": "search", "arguments": {"query": "rust programming"}}]"#;
|
||||
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
|
||||
|
||||
assert_eq!(tools.len(), 1);
|
||||
assert_eq!(tools[0].function.name, "search");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_multiline_json_with_tokens() {
|
||||
let parser = JsonParser::with_config(TokenConfig {
|
||||
start_tokens: vec!["<tool>".to_string()],
|
||||
end_tokens: vec!["</tool>".to_string()],
|
||||
separator: ", ".to_string(),
|
||||
});
|
||||
|
||||
// Pretty-printed multi-line JSON
|
||||
let input = r#"<tool>{
|
||||
"name": "get_weather",
|
||||
"arguments": {
|
||||
"location": "San Francisco",
|
||||
"units": "celsius",
|
||||
"include_forecast": true
|
||||
}
|
||||
}</tool>"#;
|
||||
|
||||
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(tools.len(), 1);
|
||||
assert_eq!(tools[0].function.name, "get_weather");
|
||||
assert!(tools[0].function.arguments.contains("San Francisco"));
|
||||
assert!(tools[0].function.arguments.contains("celsius"));
|
||||
assert!(tools[0].function.arguments.contains("true"));
|
||||
}
|
||||
// Tests removed - TokenConfig no longer supported in JsonParser
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_multiline_json_array() {
|
||||
@@ -361,29 +307,6 @@ fn test_json_parser_format_detection() {
|
||||
|
||||
// Should not detect non-tool formats
|
||||
assert!(!parser.detect_format("plain text"));
|
||||
assert!(!parser.detect_format(r#"{"key": "value"}"#));
|
||||
assert!(!parser.detect_format(r#"{"data": {"nested": true}}"#));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_json_parser_streaming() {
|
||||
let parser = JsonParser::new();
|
||||
let mut state = ParseState::new();
|
||||
|
||||
let full_json = r#"{"name": "get_weather", "arguments": {"location": "San Francisco"}}"#;
|
||||
|
||||
let result = parser
|
||||
.parse_incremental(full_json, &mut state)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
match result {
|
||||
StreamResult::ToolComplete(tool) => {
|
||||
assert_eq!(tool.function.name, "get_weather");
|
||||
assert!(tool.function.arguments.contains("San Francisco"));
|
||||
}
|
||||
_ => panic!("Expected ToolComplete for complete JSON"),
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
@@ -469,37 +392,7 @@ mod failure_cases {
|
||||
assert_eq!(tools[0].function.arguments, "null");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_broken_wrapper_tokens() {
|
||||
let parser = JsonParser::with_config(TokenConfig {
|
||||
start_tokens: vec!["<tool>".to_string()],
|
||||
end_tokens: vec!["</tool>".to_string()],
|
||||
separator: ", ".to_string(),
|
||||
});
|
||||
|
||||
// Missing end token
|
||||
let input = r#"<tool>{"name": "test", "arguments": {}}"#;
|
||||
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(
|
||||
tools.len(),
|
||||
0,
|
||||
"Should fail to parse without complete wrapper"
|
||||
);
|
||||
|
||||
// Missing start token - parser looks for complete wrapper, so this won't parse
|
||||
let input = r#"{"name": "test", "arguments": {}}</tool>"#;
|
||||
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(
|
||||
tools.len(),
|
||||
0,
|
||||
"Should not parse JSON with incomplete wrapper"
|
||||
);
|
||||
|
||||
// Mismatched tokens
|
||||
let input = r#"<tool>{"name": "test", "arguments": {}}</wrong>"#;
|
||||
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(tools.len(), 0, "Should fail with mismatched tokens");
|
||||
}
|
||||
// Test removed - wrapper token functionality moved to specific parsers
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_invalid_json_structures() {
|
||||
@@ -653,34 +546,6 @@ mod edge_cases {
|
||||
assert!(tools[0].function.arguments.contains("null"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_multiple_token_pairs_with_conflicts() {
|
||||
let parser = JsonParser::with_config(TokenConfig {
|
||||
start_tokens: vec!["<<".to_string(), "<tool>".to_string()],
|
||||
end_tokens: vec![">>".to_string(), "</tool>".to_string()],
|
||||
separator: ", ".to_string(),
|
||||
});
|
||||
|
||||
// First pattern
|
||||
let input = r#"<<{"name": "test1", "arguments": {}}>>"#;
|
||||
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(tools.len(), 1);
|
||||
assert_eq!(tools[0].function.name, "test1");
|
||||
|
||||
// Second pattern
|
||||
let input = r#"<tool>{"name": "test2", "arguments": {}}</tool>"#;
|
||||
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
|
||||
assert_eq!(tools.len(), 1);
|
||||
assert_eq!(tools[0].function.name, "test2");
|
||||
|
||||
// Nested patterns (should use first match)
|
||||
let input = r#"<<tool>{"name": "test3", "arguments": {}}</tool>>"#;
|
||||
let (_normal_text, tools) = parser.parse_complete(input).await.unwrap();
|
||||
// This is tricky - depends on regex behavior
|
||||
// The parser should handle this gracefully
|
||||
assert!(tools.len() <= 1, "Should not parse multiple times");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_streaming_with_partial_chunks() {
|
||||
let parser = JsonParser::new();
|
||||
|
||||
Reference in New Issue
Block a user