[router][grpc] Refine streaming processes (#11277)
This commit is contained in:
@@ -187,6 +187,10 @@ impl ReasoningParser for BaseReasoningParser {
|
||||
fn model_type(&self) -> &str {
|
||||
&self.model_type
|
||||
}
|
||||
|
||||
fn is_in_reasoning(&self) -> bool {
|
||||
self.in_reasoning
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -55,6 +55,10 @@ impl ReasoningParser for DeepSeekR1Parser {
|
||||
fn model_type(&self) -> &str {
|
||||
self.base.model_type()
|
||||
}
|
||||
|
||||
fn is_in_reasoning(&self) -> bool {
|
||||
self.base.is_in_reasoning()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -54,6 +54,10 @@ impl ReasoningParser for Glm45Parser {
|
||||
fn model_type(&self) -> &str {
|
||||
self.base.model_type()
|
||||
}
|
||||
|
||||
fn is_in_reasoning(&self) -> bool {
|
||||
self.base.is_in_reasoning()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -54,6 +54,10 @@ impl ReasoningParser for KimiParser {
|
||||
fn model_type(&self) -> &str {
|
||||
self.base.model_type()
|
||||
}
|
||||
|
||||
fn is_in_reasoning(&self) -> bool {
|
||||
self.base.is_in_reasoning()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -55,6 +55,10 @@ impl ReasoningParser for Qwen3Parser {
|
||||
fn model_type(&self) -> &str {
|
||||
self.base.model_type()
|
||||
}
|
||||
|
||||
fn is_in_reasoning(&self) -> bool {
|
||||
self.base.is_in_reasoning()
|
||||
}
|
||||
}
|
||||
|
||||
/// QwenThinking parser - variant that assumes reasoning from start.
|
||||
@@ -106,6 +110,10 @@ impl ReasoningParser for QwenThinkingParser {
|
||||
fn model_type(&self) -> &str {
|
||||
self.base.model_type()
|
||||
}
|
||||
|
||||
fn is_in_reasoning(&self) -> bool {
|
||||
self.base.is_in_reasoning()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -54,6 +54,10 @@ impl ReasoningParser for Step3Parser {
|
||||
fn model_type(&self) -> &str {
|
||||
self.base.model_type()
|
||||
}
|
||||
|
||||
fn is_in_reasoning(&self) -> bool {
|
||||
self.base.is_in_reasoning()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -69,6 +69,11 @@ pub trait ReasoningParser: Send + Sync {
|
||||
|
||||
/// Get the model type this parser is designed for.
|
||||
fn model_type(&self) -> &str;
|
||||
|
||||
/// Check if the parser is currently in reasoning mode.
|
||||
///
|
||||
/// Returns true if the parser is currently parsing reasoning content.
|
||||
fn is_in_reasoning(&self) -> bool;
|
||||
}
|
||||
|
||||
/// Error types for reasoning parsing operations.
|
||||
|
||||
@@ -922,27 +922,31 @@ impl GrpcPDRouter {
|
||||
stream_buffer.push_str(&delta);
|
||||
|
||||
// Reasoning content handling
|
||||
if separate_reasoning {
|
||||
let (normal_text, reasoning_chunk) = router.process_reasoning_stream(
|
||||
&delta,
|
||||
index,
|
||||
&mut reasoning_parsers,
|
||||
&request_id,
|
||||
&model,
|
||||
created,
|
||||
);
|
||||
let in_reasoning = if separate_reasoning {
|
||||
let (normal_text, reasoning_chunk, in_reasoning) = router
|
||||
.process_reasoning_stream(
|
||||
&delta,
|
||||
index,
|
||||
&mut reasoning_parsers,
|
||||
&request_id,
|
||||
&model,
|
||||
created,
|
||||
);
|
||||
if let Some(chunk) = reasoning_chunk {
|
||||
tx.send(Ok(bytes::Bytes::from(Self::format_sse_chunk(&chunk))))
|
||||
.map_err(|_| "Failed to send reasoning chunk".to_string())?;
|
||||
}
|
||||
delta = normal_text;
|
||||
}
|
||||
in_reasoning
|
||||
} else {
|
||||
false
|
||||
};
|
||||
|
||||
// Tool call handling
|
||||
let tool_choice_enabled =
|
||||
!matches!(tool_choice, Some(ToolChoice::Value(ToolChoiceValue::None)));
|
||||
|
||||
if tool_choice_enabled && tools.is_some() {
|
||||
if !in_reasoning && tool_choice_enabled && tools.is_some() {
|
||||
let (should_skip, tool_chunks) = router
|
||||
.process_tool_calls_stream(
|
||||
&delta,
|
||||
@@ -1173,16 +1177,18 @@ impl GrpcPDRouter {
|
||||
request_id: &str,
|
||||
model: &str,
|
||||
created: u64,
|
||||
) -> (String, Option<ChatCompletionStreamResponse>) {
|
||||
) -> (String, Option<ChatCompletionStreamResponse>, bool) {
|
||||
// Get or create parser for this index
|
||||
reasoning_parsers
|
||||
.entry(index)
|
||||
.or_insert_with(|| self.reasoning_parser_factory.get_pooled(model));
|
||||
|
||||
if let Some(pooled_parser) = reasoning_parsers.get(&index) {
|
||||
let parse_result = {
|
||||
let (parse_result, in_reasoning) = {
|
||||
let mut parser = pooled_parser.lock().unwrap();
|
||||
parser.parse_reasoning_streaming_incremental(delta)
|
||||
let result = parser.parse_reasoning_streaming_incremental(delta);
|
||||
let in_reasoning = parser.is_in_reasoning();
|
||||
(result, in_reasoning)
|
||||
};
|
||||
|
||||
match parse_result {
|
||||
@@ -1214,7 +1220,7 @@ impl GrpcPDRouter {
|
||||
} else {
|
||||
None
|
||||
};
|
||||
return (normal_text, chunk);
|
||||
return (normal_text, chunk, in_reasoning);
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("Reasoning parsing error: {}", e);
|
||||
@@ -1222,7 +1228,7 @@ impl GrpcPDRouter {
|
||||
}
|
||||
}
|
||||
|
||||
(delta.to_string(), None)
|
||||
(delta.to_string(), None, false)
|
||||
}
|
||||
|
||||
/// Helper: Process tool calls in streaming mode
|
||||
|
||||
@@ -494,16 +494,18 @@ impl GrpcRouter {
|
||||
request_id: &str,
|
||||
model: &str,
|
||||
created: u64,
|
||||
) -> (String, Option<ChatCompletionStreamResponse>) {
|
||||
) -> (String, Option<ChatCompletionStreamResponse>, bool) {
|
||||
// Get or create parser for this index
|
||||
reasoning_parsers
|
||||
.entry(index)
|
||||
.or_insert_with(|| self.reasoning_parser_factory.get_pooled(model));
|
||||
|
||||
if let Some(pooled_parser) = reasoning_parsers.get(&index) {
|
||||
let parse_result = {
|
||||
let (parse_result, in_reasoning) = {
|
||||
let mut parser = pooled_parser.lock().unwrap();
|
||||
parser.parse_reasoning_streaming_incremental(delta)
|
||||
let result = parser.parse_reasoning_streaming_incremental(delta);
|
||||
let in_reasoning = parser.is_in_reasoning();
|
||||
(result, in_reasoning)
|
||||
};
|
||||
|
||||
match parse_result {
|
||||
@@ -535,7 +537,7 @@ impl GrpcRouter {
|
||||
} else {
|
||||
None
|
||||
};
|
||||
return (normal_text, chunk);
|
||||
return (normal_text, chunk, in_reasoning);
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("Reasoning parsing error: {}", e);
|
||||
@@ -543,7 +545,7 @@ impl GrpcRouter {
|
||||
}
|
||||
}
|
||||
|
||||
(delta.to_string(), None)
|
||||
(delta.to_string(), None, false)
|
||||
}
|
||||
|
||||
/// Helper: Process tool calls in streaming mode
|
||||
@@ -901,27 +903,31 @@ impl GrpcRouter {
|
||||
stream_buffer.push_str(&delta);
|
||||
|
||||
// Reasoning content handling
|
||||
if separate_reasoning {
|
||||
let (normal_text, reasoning_chunk) = router.process_reasoning_stream(
|
||||
&delta,
|
||||
index,
|
||||
&mut reasoning_parsers,
|
||||
&request_id,
|
||||
&model,
|
||||
created,
|
||||
);
|
||||
let in_reasoning = if separate_reasoning {
|
||||
let (normal_text, reasoning_chunk, in_reasoning) = router
|
||||
.process_reasoning_stream(
|
||||
&delta,
|
||||
index,
|
||||
&mut reasoning_parsers,
|
||||
&request_id,
|
||||
&model,
|
||||
created,
|
||||
);
|
||||
if let Some(chunk) = reasoning_chunk {
|
||||
tx.send(Ok(Bytes::from(Self::format_sse_chunk(&chunk))))
|
||||
.map_err(|_| "Failed to send reasoning chunk".to_string())?;
|
||||
}
|
||||
delta = normal_text;
|
||||
}
|
||||
in_reasoning
|
||||
} else {
|
||||
false
|
||||
};
|
||||
|
||||
// Tool call handling
|
||||
let tool_choice_enabled =
|
||||
!matches!(tool_choice, Some(ToolChoice::Value(ToolChoiceValue::None)));
|
||||
|
||||
if tool_choice_enabled && tools.is_some() {
|
||||
if !in_reasoning && tool_choice_enabled && tools.is_some() {
|
||||
let (should_skip, tool_chunks) = router
|
||||
.process_tool_calls_stream(
|
||||
&delta,
|
||||
|
||||
Reference in New Issue
Block a user