[router] add llama3.2 multi json streaming parser (#9735)
This commit is contained in:
@@ -356,6 +356,81 @@ impl ToolParser for JsonParser {
|
|||||||
return Ok(StreamResult::Incomplete);
|
return Ok(StreamResult::Incomplete);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Extract JSON content first to check for separators
|
||||||
|
let extracted_json = self.extract_json_content(&state.buffer);
|
||||||
|
|
||||||
|
// Handle multiple JSON objects with separators
|
||||||
|
// Check if we have a separator and potentially multiple JSON objects
|
||||||
|
let separator = &self.token_config.separator;
|
||||||
|
if !separator.is_empty() && extracted_json.contains(separator.as_str()) {
|
||||||
|
// Try to find a complete JSON object before the separator
|
||||||
|
if let Some(separator_pos) = extracted_json.find(separator.as_str()) {
|
||||||
|
// Get JSON before separator
|
||||||
|
let before_separator = &extracted_json[..separator_pos];
|
||||||
|
|
||||||
|
// Try to parse the JSON before the separator
|
||||||
|
match serde_json::from_str::<Value>(before_separator) {
|
||||||
|
Ok(value) => {
|
||||||
|
// Parse tool calls from this JSON
|
||||||
|
let tools = self.parse_json_value(&value)?;
|
||||||
|
if !tools.is_empty() {
|
||||||
|
// We need to figure out how much to remove from the original buffer
|
||||||
|
// Find where the separator is in the original buffer and remove up to and including it
|
||||||
|
if let Some(sep_in_original) = state.buffer.find(separator.as_str()) {
|
||||||
|
let remaining =
|
||||||
|
state.buffer[sep_in_original + separator.len()..].to_string();
|
||||||
|
state.buffer = remaining;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the first tool as complete
|
||||||
|
if let Some(tool) = tools.into_iter().next() {
|
||||||
|
return Ok(StreamResult::ToolComplete(tool));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(_) => {
|
||||||
|
// Failed to parse, continue to try other methods
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle multiple start tokens (e.g., multiple <|python_tag|> markers)
|
||||||
|
if !self.token_config.start_tokens.is_empty() {
|
||||||
|
let start_token = &self.token_config.start_tokens[0];
|
||||||
|
if !start_token.is_empty() {
|
||||||
|
// Find all occurrences of start token
|
||||||
|
let occurrences: Vec<_> =
|
||||||
|
state.buffer.match_indices(start_token.as_str()).collect();
|
||||||
|
if occurrences.len() > 1 {
|
||||||
|
// We have multiple start tokens, try to process the first complete one
|
||||||
|
let first_pos = occurrences[0].0;
|
||||||
|
let second_pos = occurrences[1].0;
|
||||||
|
|
||||||
|
// Extract content between first and second start token
|
||||||
|
let first_json_section = &state.buffer[first_pos..second_pos];
|
||||||
|
let json_content = self.extract_json_content(first_json_section);
|
||||||
|
|
||||||
|
// Try to parse this as complete JSON
|
||||||
|
if let Ok(value) = serde_json::from_str::<Value>(json_content) {
|
||||||
|
// Parse tool calls from this JSON
|
||||||
|
let tools = self.parse_json_value(&value)?;
|
||||||
|
if !tools.is_empty() {
|
||||||
|
// Remove the processed section from buffer
|
||||||
|
let remaining = state.buffer[second_pos..].to_string();
|
||||||
|
state.buffer = remaining;
|
||||||
|
|
||||||
|
// Return the first tool as complete
|
||||||
|
if let Some(tool) = tools.into_iter().next() {
|
||||||
|
return Ok(StreamResult::ToolComplete(tool));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Regular single JSON parsing
|
||||||
// Extract JSON content
|
// Extract JSON content
|
||||||
let json_content = self.extract_json_content(&state.buffer);
|
let json_content = self.extract_json_content(&state.buffer);
|
||||||
|
|
||||||
@@ -364,16 +439,23 @@ impl ToolParser for JsonParser {
|
|||||||
Ok((value, consumed)) => {
|
Ok((value, consumed)) => {
|
||||||
// Check if we have a complete JSON structure
|
// Check if we have a complete JSON structure
|
||||||
if consumed == json_content.len() {
|
if consumed == json_content.len() {
|
||||||
// Complete JSON, parse tool calls
|
// Check if this is truly complete or just has null from incomplete parsing
|
||||||
let tools = self.parse_json_value(&value)?;
|
// We need to ensure the JSON actually ends properly (not cut off mid-key)
|
||||||
if !tools.is_empty() {
|
let trimmed = json_content.trim();
|
||||||
// Clear buffer since we consumed everything
|
let looks_complete = trimmed.ends_with('}') || trimmed.ends_with(']');
|
||||||
state.buffer.clear();
|
|
||||||
|
|
||||||
// Return the first tool as complete
|
if looks_complete {
|
||||||
// TODO simplified version, address more complex version
|
// Complete JSON, parse tool calls
|
||||||
if let Some(tool) = tools.into_iter().next() {
|
let tools = self.parse_json_value(&value)?;
|
||||||
return Ok(StreamResult::ToolComplete(tool));
|
if !tools.is_empty() {
|
||||||
|
// Clear buffer since we consumed everything
|
||||||
|
state.buffer.clear();
|
||||||
|
|
||||||
|
// Return the first tool as complete
|
||||||
|
// TODO simplified version, address more complex version
|
||||||
|
if let Some(tool) = tools.into_iter().next() {
|
||||||
|
return Ok(StreamResult::ToolComplete(tool));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -341,14 +341,84 @@ async fn test_llama_streaming_multiple_tools() {
|
|||||||
|
|
||||||
let result = parser.parse_incremental(text, &mut state).await.unwrap();
|
let result = parser.parse_incremental(text, &mut state).await.unwrap();
|
||||||
|
|
||||||
// Current implementation may handle this differently
|
// Should get first tool complete
|
||||||
match result {
|
match result {
|
||||||
sglang_router_rs::tool_parser::StreamResult::ToolComplete(tool) => {
|
sglang_router_rs::tool_parser::StreamResult::ToolComplete(tool) => {
|
||||||
// At minimum should get first tool
|
|
||||||
assert_eq!(tool.function.name, "func1");
|
assert_eq!(tool.function.name, "func1");
|
||||||
}
|
}
|
||||||
|
_ => panic!("Expected first tool to be complete"),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process remaining buffer to get second tool
|
||||||
|
let result2 = parser.parse_incremental("", &mut state).await.unwrap();
|
||||||
|
match result2 {
|
||||||
|
sglang_router_rs::tool_parser::StreamResult::ToolComplete(tool) => {
|
||||||
|
assert_eq!(tool.function.name, "func2");
|
||||||
|
}
|
||||||
|
_ => panic!("Expected second tool to be complete"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn test_llama_streaming_multiple_tools_chunked() {
|
||||||
|
// Test streaming multiple tool calls arriving in chunks
|
||||||
|
let parser = LlamaParser::new();
|
||||||
|
let mut state = sglang_router_rs::tool_parser::ParseState::new();
|
||||||
|
|
||||||
|
// First chunk - incomplete first JSON
|
||||||
|
let chunk1 = r#"<|python_tag|>{"name": "get_weather", "arguments""#;
|
||||||
|
let result1 = parser.parse_incremental(chunk1, &mut state).await.unwrap();
|
||||||
|
|
||||||
|
// Should be incomplete or have tool name
|
||||||
|
match result1 {
|
||||||
|
sglang_router_rs::tool_parser::StreamResult::Incomplete
|
||||||
|
| sglang_router_rs::tool_parser::StreamResult::ToolName { .. }
|
||||||
|
| sglang_router_rs::tool_parser::StreamResult::ToolArguments { .. } => {
|
||||||
|
// Expected - could get tool name or be incomplete or even partial args
|
||||||
|
}
|
||||||
|
_ => panic!(
|
||||||
|
"Expected incomplete or tool name for partial JSON, got: {:?}",
|
||||||
|
result1
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Second chunk - complete first JSON and separator
|
||||||
|
let chunk2 = r#": {"city": "Paris"}};{"name": "#;
|
||||||
|
let result2 = parser.parse_incremental(chunk2, &mut state).await.unwrap();
|
||||||
|
|
||||||
|
// Should get first tool complete
|
||||||
|
match result2 {
|
||||||
|
sglang_router_rs::tool_parser::StreamResult::ToolComplete(tool) => {
|
||||||
|
assert_eq!(tool.function.name, "get_weather");
|
||||||
|
let args: serde_json::Value = serde_json::from_str(&tool.function.arguments).unwrap();
|
||||||
|
assert_eq!(args["city"], "Paris");
|
||||||
|
}
|
||||||
|
_ => panic!("Expected first tool to be complete after separator"),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Third chunk - complete second JSON
|
||||||
|
let chunk3 = r#""get_time", "arguments": {"timezone": "UTC"}}"#;
|
||||||
|
let result3 = parser.parse_incremental(chunk3, &mut state).await.unwrap();
|
||||||
|
|
||||||
|
// Should get second tool complete
|
||||||
|
match result3 {
|
||||||
|
sglang_router_rs::tool_parser::StreamResult::ToolComplete(tool) => {
|
||||||
|
assert_eq!(tool.function.name, "get_time");
|
||||||
|
let args: serde_json::Value = serde_json::from_str(&tool.function.arguments).unwrap();
|
||||||
|
assert_eq!(args["timezone"], "UTC");
|
||||||
|
}
|
||||||
_ => {
|
_ => {
|
||||||
// Also acceptable if waiting for more
|
// If not complete yet, try one more empty chunk
|
||||||
|
let result4 = parser.parse_incremental("", &mut state).await.unwrap();
|
||||||
|
match result4 {
|
||||||
|
sglang_router_rs::tool_parser::StreamResult::ToolComplete(tool) => {
|
||||||
|
assert_eq!(tool.function.name, "get_time");
|
||||||
|
let args: serde_json::Value =
|
||||||
|
serde_json::from_str(&tool.function.arguments).unwrap();
|
||||||
|
assert_eq!(args["timezone"], "UTC");
|
||||||
|
}
|
||||||
|
_ => panic!("Expected second tool to be complete"),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user