[router][grpc] Support streaming for v1/chat/completions (#11179)

This commit is contained in:
Chang Su
2025-10-02 14:35:16 -07:00
committed by GitHub
parent 0618ad6dd5
commit 963175d5c0
30 changed files with 912 additions and 228 deletions

View File

@@ -123,12 +123,7 @@ impl DeepSeekParser {
let arguments = serde_json::to_string(&args)
.map_err(|e| ToolParserError::ParsingFailed(e.to_string()))?;
// Generate ID
let id = format!("deepseek_call_{}", uuid::Uuid::new_v4());
Ok(ToolCall {
id,
r#type: "function".to_string(),
function: FunctionCall {
name: func_name.to_string(),
arguments,
@@ -320,4 +315,8 @@ impl ToolParser for DeepSeekParser {
fn detect_format(&self, text: &str) -> bool {
self.has_tool_markers(text)
}
fn get_unstreamed_tool_args(&self) -> Option<Vec<ToolCallItem>> {
helpers::get_unstreamed_args(&self.prev_tool_call_arr, &self.streamed_args_for_tool)
}
}

View File

@@ -129,12 +129,7 @@ impl Glm4MoeParser {
let arguments_str = serde_json::to_string(&arguments)
.map_err(|e| ToolParserError::ParsingFailed(e.to_string()))?;
// Generate ID
let id = format!("glm4_call_{}", uuid::Uuid::new_v4());
Ok(Some(ToolCall {
id,
r#type: "function".to_string(),
function: FunctionCall {
name: func_name.to_string(),
arguments: arguments_str,
@@ -321,4 +316,8 @@ impl ToolParser for Glm4MoeParser {
fn detect_format(&self, text: &str) -> bool {
self.has_tool_markers(text)
}
fn get_unstreamed_tool_args(&self) -> Option<Vec<ToolCallItem>> {
helpers::get_unstreamed_args(&self.prev_tool_call_arr, &self.streamed_args_for_tool)
}
}

View File

@@ -113,12 +113,7 @@ impl ToolParser for GptOssParser {
}
};
// Generate unique ID
let id = format!("gpt_oss_call_{}", uuid::Uuid::new_v4());
tools.push(ToolCall {
id,
r#type: "function".to_string(),
function: FunctionCall {
name: function_name,
arguments,

View File

@@ -14,6 +14,48 @@ pub fn get_tool_indices(tools: &[Tool]) -> HashMap<String, usize> {
.collect()
}
/// Get unstreamed tool call arguments
/// Returns tool call items for arguments that have been parsed but not yet streamed
/// This ensures tool calls are properly completed even if the model generates final arguments in the last chunk
pub fn get_unstreamed_args(
prev_tool_call_arr: &[Value],
streamed_args_for_tool: &[String],
) -> Option<Vec<ToolCallItem>> {
// Check if we have tool calls being tracked
if prev_tool_call_arr.is_empty() || streamed_args_for_tool.is_empty() {
return None;
}
// Get the last tool call that was being processed
let tool_index = prev_tool_call_arr.len() - 1;
if tool_index >= streamed_args_for_tool.len() {
return None;
}
// Get expected vs actual arguments
let expected_args = prev_tool_call_arr[tool_index].get("arguments")?;
let expected_str = serde_json::to_string(expected_args).ok()?;
let actual_str = &streamed_args_for_tool[tool_index];
// Check if there are remaining arguments to send
let remaining = if expected_str.starts_with(actual_str) {
&expected_str[actual_str.len()..]
} else {
return None;
};
if remaining.is_empty() {
return None;
}
// Return the remaining arguments as a ToolCallItem
Some(vec![ToolCallItem {
tool_index,
name: None, // No name for argument deltas
parameters: remaining.to_string(),
}])
}
/// Check if a buffer ends with a partial occurrence of a token
/// Returns Some(length) if there's a partial match, None otherwise
pub fn ends_with_partial_token(buffer: &str, token: &str) -> Option<usize> {

View File

@@ -8,7 +8,7 @@ use crate::tool_parser::{
parsers::helpers,
partial_json::PartialJson,
traits::ToolParser,
types::{FunctionCall, StreamingParseResult, ToolCall},
types::{FunctionCall, StreamingParseResult, ToolCall, ToolCallItem},
};
/// JSON format parser for tool calls
@@ -136,16 +136,7 @@ impl JsonParser {
let arguments = serde_json::to_string(args)
.map_err(|e| ToolParserError::ParsingFailed(e.to_string()))?;
// Generate a unique ID if not provided
let id = obj
.get("id")
.and_then(|v| v.as_str())
.map(String::from)
.unwrap_or_else(|| format!("call_{}", uuid::Uuid::new_v4()));
Ok(Some(ToolCall {
id,
r#type: "function".to_string(),
function: FunctionCall {
name: name.to_string(),
arguments,
@@ -274,4 +265,8 @@ impl ToolParser for JsonParser {
let trimmed = text.trim();
(trimmed.starts_with('[') || trimmed.starts_with('{')) && trimmed.contains(r#""name""#)
}
fn get_unstreamed_tool_args(&self) -> Option<Vec<ToolCallItem>> {
helpers::get_unstreamed_args(&self.prev_tool_call_arr, &self.streamed_args_for_tool)
}
}

View File

@@ -131,12 +131,7 @@ impl ToolParser for KimiK2Parser {
// Try to parse JSON arguments
match serde_json::from_str::<serde_json::Value>(function_args) {
Ok(_) => {
// Generate unique ID
let id = format!("kimi_call_{}", uuid::Uuid::new_v4());
tools.push(ToolCall {
id,
r#type: "function".to_string(),
function: FunctionCall {
name: func_name,
arguments: function_args.to_string(),
@@ -339,4 +334,8 @@ impl ToolParser for KimiK2Parser {
fn detect_format(&self, text: &str) -> bool {
self.has_tool_markers(text) || text.contains("<|tool_call_begin|>")
}
fn get_unstreamed_tool_args(&self) -> Option<Vec<ToolCallItem>> {
helpers::get_unstreamed_args(&self.prev_tool_call_arr, &self.streamed_args_for_tool)
}
}

View File

@@ -1,6 +1,5 @@
use async_trait::async_trait;
use serde_json::Value;
use uuid;
use crate::protocols::spec::Tool;
@@ -84,16 +83,7 @@ impl LlamaParser {
let arguments = serde_json::to_string(parameters)
.map_err(|e| ToolParserError::ParsingFailed(e.to_string()))?;
// Generate a unique ID for Llama calls
let id = obj
.get("id")
.and_then(|v| v.as_str())
.map(String::from)
.unwrap_or_else(|| format!("llama_call_{}", uuid::Uuid::new_v4()));
Ok(Some(ToolCall {
id,
r#type: "function".to_string(),
function: FunctionCall {
name: name.to_string(),
arguments,
@@ -243,4 +233,8 @@ impl ToolParser for LlamaParser {
text.contains("<|python_tag|>")
|| (text.trim_start().starts_with('{') && text.contains(r#""name""#))
}
fn get_unstreamed_tool_args(&self) -> Option<Vec<crate::tool_parser::types::ToolCallItem>> {
helpers::get_unstreamed_args(&self.prev_tool_call_arr, &self.streamed_args_for_tool)
}
}

View File

@@ -146,16 +146,7 @@ impl MistralParser {
let arguments = serde_json::to_string(args)
.map_err(|e| ToolParserError::ParsingFailed(e.to_string()))?;
// Generate unique ID
let id = obj
.get("id")
.and_then(|v| v.as_str())
.map(String::from)
.unwrap_or_else(|| format!("mistral_call_{}", uuid::Uuid::new_v4()));
Ok(Some(ToolCall {
id,
r#type: "function".to_string(),
function: FunctionCall {
name: name.to_string(),
arguments,
@@ -266,4 +257,8 @@ impl ToolParser for MistralParser {
fn detect_format(&self, text: &str) -> bool {
self.has_tool_markers(text)
}
fn get_unstreamed_tool_args(&self) -> Option<Vec<crate::tool_parser::types::ToolCallItem>> {
helpers::get_unstreamed_args(&self.prev_tool_call_arr, &self.streamed_args_for_tool)
}
}

View File

@@ -244,7 +244,7 @@ fn parse_python_expression(source: &str) -> ToolParserResult<Expr> {
}
}
fn build_tool_call(expr: Expr, index: usize) -> ToolParserResult<ToolCall> {
fn build_tool_call(expr: Expr, _index: usize) -> ToolParserResult<ToolCall> {
match expr {
Expr::Call(call_expr) => {
if !call_expr.args.is_empty() {
@@ -277,8 +277,6 @@ fn build_tool_call(expr: Expr, index: usize) -> ToolParserResult<ToolCall> {
let arguments_string = serde_json::to_string(&arguments_json)?;
Ok(ToolCall {
id: format!("call-{}", index + 1),
r#type: "function".to_string(),
function: FunctionCall {
name: function_name,
arguments: arguments_string,

View File

@@ -88,16 +88,7 @@ impl QwenParser {
let arguments = serde_json::to_string(args)
.map_err(|e| ToolParserError::ParsingFailed(e.to_string()))?;
// Generate unique ID
let id = obj
.get("id")
.and_then(|v| v.as_str())
.map(String::from)
.unwrap_or_else(|| format!("qwen_call_{}", uuid::Uuid::new_v4()));
Ok(Some(ToolCall {
id,
r#type: "function".to_string(),
function: FunctionCall {
name: name.to_string(),
arguments,
@@ -255,4 +246,8 @@ impl ToolParser for QwenParser {
fn detect_format(&self, text: &str) -> bool {
self.has_tool_markers(text)
}
fn get_unstreamed_tool_args(&self) -> Option<Vec<crate::tool_parser::types::ToolCallItem>> {
helpers::get_unstreamed_args(&self.prev_tool_call_arr, &self.streamed_args_for_tool)
}
}

View File

@@ -400,12 +400,7 @@ impl Step3Parser {
let arguments_str = serde_json::to_string(&parameters)
.map_err(|e| ToolParserError::ParsingFailed(e.to_string()))?;
// Generate ID
let id = format!("step3_call_{}", uuid::Uuid::new_v4());
Ok(Some(ToolCall {
id,
r#type: "function".to_string(),
function: FunctionCall {
name: func_name.to_string(),
arguments: arguments_str,
@@ -561,4 +556,8 @@ impl ToolParser for Step3Parser {
fn detect_format(&self, text: &str) -> bool {
self.has_tool_markers(text)
}
fn get_unstreamed_tool_args(&self) -> Option<Vec<ToolCallItem>> {
helpers::get_unstreamed_args(&self.prev_tool_call_arr, &self.streamed_args_for_tool)
}
}

View File

@@ -31,8 +31,6 @@ async fn test_tool_parser_factory_model_mapping() {
#[test]
fn test_tool_call_serialization() {
let tool_call = ToolCall {
id: "call-123".to_string(),
r#type: "function".to_string(),
function: FunctionCall {
name: "search".to_string(),
arguments: r#"{"query": "rust programming"}"#.to_string(),
@@ -40,13 +38,15 @@ fn test_tool_call_serialization() {
};
let json = serde_json::to_string(&tool_call).unwrap();
assert!(json.contains("call-123"));
assert!(json.contains("search"));
assert!(json.contains("rust programming"));
let parsed: ToolCall = serde_json::from_str(&json).unwrap();
assert_eq!(parsed.id, "call-123");
assert_eq!(parsed.function.name, "search");
assert_eq!(
parsed.function.arguments,
r#"{"query": "rust programming"}"#
);
}
#[test]

View File

@@ -32,6 +32,12 @@ pub trait ToolParser: Send + Sync {
fn as_token_parser(&self) -> Option<&dyn TokenToolParser> {
None
}
/// Get unstreamed tool call arguments
/// Returns tool call items for arguments that have been parsed but not yet streamed
fn get_unstreamed_tool_args(&self) -> Option<Vec<crate::tool_parser::types::ToolCallItem>> {
None
}
}
/// Trait for partial JSON parsing

View File

@@ -1,13 +1,8 @@
use serde::{Deserialize, Serialize};
/// Parsed tool call from model output (OpenAI format)
/// Parsed tool call from model output
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct ToolCall {
/// Unique identifier for the tool call
pub id: String,
/// Type of tool call (currently always "function")
#[serde(rename = "type")]
pub r#type: String,
/// Function call details
pub function: FunctionCall,
}