diff --git a/sgl-router/src/reasoning_parser/parsers/base.rs b/sgl-router/src/reasoning_parser/parsers/base.rs index 0fd2818b9..2aa8846a9 100644 --- a/sgl-router/src/reasoning_parser/parsers/base.rs +++ b/sgl-router/src/reasoning_parser/parsers/base.rs @@ -187,6 +187,10 @@ impl ReasoningParser for BaseReasoningParser { fn model_type(&self) -> &str { &self.model_type } + + fn is_in_reasoning(&self) -> bool { + self.in_reasoning + } } #[cfg(test)] diff --git a/sgl-router/src/reasoning_parser/parsers/deepseek_r1.rs b/sgl-router/src/reasoning_parser/parsers/deepseek_r1.rs index 62a7aadec..1bb2f4c48 100644 --- a/sgl-router/src/reasoning_parser/parsers/deepseek_r1.rs +++ b/sgl-router/src/reasoning_parser/parsers/deepseek_r1.rs @@ -55,6 +55,10 @@ impl ReasoningParser for DeepSeekR1Parser { fn model_type(&self) -> &str { self.base.model_type() } + + fn is_in_reasoning(&self) -> bool { + self.base.is_in_reasoning() + } } #[cfg(test)] diff --git a/sgl-router/src/reasoning_parser/parsers/glm45.rs b/sgl-router/src/reasoning_parser/parsers/glm45.rs index e4e56723c..5b2778993 100644 --- a/sgl-router/src/reasoning_parser/parsers/glm45.rs +++ b/sgl-router/src/reasoning_parser/parsers/glm45.rs @@ -54,6 +54,10 @@ impl ReasoningParser for Glm45Parser { fn model_type(&self) -> &str { self.base.model_type() } + + fn is_in_reasoning(&self) -> bool { + self.base.is_in_reasoning() + } } #[cfg(test)] diff --git a/sgl-router/src/reasoning_parser/parsers/kimi.rs b/sgl-router/src/reasoning_parser/parsers/kimi.rs index 3bc77b4f1..0095f94f0 100644 --- a/sgl-router/src/reasoning_parser/parsers/kimi.rs +++ b/sgl-router/src/reasoning_parser/parsers/kimi.rs @@ -54,6 +54,10 @@ impl ReasoningParser for KimiParser { fn model_type(&self) -> &str { self.base.model_type() } + + fn is_in_reasoning(&self) -> bool { + self.base.is_in_reasoning() + } } #[cfg(test)] diff --git a/sgl-router/src/reasoning_parser/parsers/qwen3.rs b/sgl-router/src/reasoning_parser/parsers/qwen3.rs index 8c5ce9e8c..038e7db8d 100644 --- a/sgl-router/src/reasoning_parser/parsers/qwen3.rs +++ b/sgl-router/src/reasoning_parser/parsers/qwen3.rs @@ -55,6 +55,10 @@ impl ReasoningParser for Qwen3Parser { fn model_type(&self) -> &str { self.base.model_type() } + + fn is_in_reasoning(&self) -> bool { + self.base.is_in_reasoning() + } } /// QwenThinking parser - variant that assumes reasoning from start. @@ -106,6 +110,10 @@ impl ReasoningParser for QwenThinkingParser { fn model_type(&self) -> &str { self.base.model_type() } + + fn is_in_reasoning(&self) -> bool { + self.base.is_in_reasoning() + } } #[cfg(test)] diff --git a/sgl-router/src/reasoning_parser/parsers/step3.rs b/sgl-router/src/reasoning_parser/parsers/step3.rs index cec0bcd15..155e340cc 100644 --- a/sgl-router/src/reasoning_parser/parsers/step3.rs +++ b/sgl-router/src/reasoning_parser/parsers/step3.rs @@ -54,6 +54,10 @@ impl ReasoningParser for Step3Parser { fn model_type(&self) -> &str { self.base.model_type() } + + fn is_in_reasoning(&self) -> bool { + self.base.is_in_reasoning() + } } #[cfg(test)] diff --git a/sgl-router/src/reasoning_parser/traits.rs b/sgl-router/src/reasoning_parser/traits.rs index d435e6c23..c21e342f0 100644 --- a/sgl-router/src/reasoning_parser/traits.rs +++ b/sgl-router/src/reasoning_parser/traits.rs @@ -69,6 +69,11 @@ pub trait ReasoningParser: Send + Sync { /// Get the model type this parser is designed for. fn model_type(&self) -> &str; + + /// Check if the parser is currently in reasoning mode. + /// + /// Returns true if the parser is currently parsing reasoning content. + fn is_in_reasoning(&self) -> bool; } /// Error types for reasoning parsing operations. diff --git a/sgl-router/src/routers/grpc/pd_router.rs b/sgl-router/src/routers/grpc/pd_router.rs index da3c2d7f0..362e27fcc 100644 --- a/sgl-router/src/routers/grpc/pd_router.rs +++ b/sgl-router/src/routers/grpc/pd_router.rs @@ -922,27 +922,31 @@ impl GrpcPDRouter { stream_buffer.push_str(&delta); // Reasoning content handling - if separate_reasoning { - let (normal_text, reasoning_chunk) = router.process_reasoning_stream( - &delta, - index, - &mut reasoning_parsers, - &request_id, - &model, - created, - ); + let in_reasoning = if separate_reasoning { + let (normal_text, reasoning_chunk, in_reasoning) = router + .process_reasoning_stream( + &delta, + index, + &mut reasoning_parsers, + &request_id, + &model, + created, + ); if let Some(chunk) = reasoning_chunk { tx.send(Ok(bytes::Bytes::from(Self::format_sse_chunk(&chunk)))) .map_err(|_| "Failed to send reasoning chunk".to_string())?; } delta = normal_text; - } + in_reasoning + } else { + false + }; // Tool call handling let tool_choice_enabled = !matches!(tool_choice, Some(ToolChoice::Value(ToolChoiceValue::None))); - if tool_choice_enabled && tools.is_some() { + if !in_reasoning && tool_choice_enabled && tools.is_some() { let (should_skip, tool_chunks) = router .process_tool_calls_stream( &delta, @@ -1173,16 +1177,18 @@ impl GrpcPDRouter { request_id: &str, model: &str, created: u64, - ) -> (String, Option) { + ) -> (String, Option, bool) { // Get or create parser for this index reasoning_parsers .entry(index) .or_insert_with(|| self.reasoning_parser_factory.get_pooled(model)); if let Some(pooled_parser) = reasoning_parsers.get(&index) { - let parse_result = { + let (parse_result, in_reasoning) = { let mut parser = pooled_parser.lock().unwrap(); - parser.parse_reasoning_streaming_incremental(delta) + let result = parser.parse_reasoning_streaming_incremental(delta); + let in_reasoning = parser.is_in_reasoning(); + (result, in_reasoning) }; match parse_result { @@ -1214,7 +1220,7 @@ impl GrpcPDRouter { } else { None }; - return (normal_text, chunk); + return (normal_text, chunk, in_reasoning); } Err(e) => { warn!("Reasoning parsing error: {}", e); @@ -1222,7 +1228,7 @@ impl GrpcPDRouter { } } - (delta.to_string(), None) + (delta.to_string(), None, false) } /// Helper: Process tool calls in streaming mode diff --git a/sgl-router/src/routers/grpc/router.rs b/sgl-router/src/routers/grpc/router.rs index 06832c764..e90df51fd 100644 --- a/sgl-router/src/routers/grpc/router.rs +++ b/sgl-router/src/routers/grpc/router.rs @@ -494,16 +494,18 @@ impl GrpcRouter { request_id: &str, model: &str, created: u64, - ) -> (String, Option) { + ) -> (String, Option, bool) { // Get or create parser for this index reasoning_parsers .entry(index) .or_insert_with(|| self.reasoning_parser_factory.get_pooled(model)); if let Some(pooled_parser) = reasoning_parsers.get(&index) { - let parse_result = { + let (parse_result, in_reasoning) = { let mut parser = pooled_parser.lock().unwrap(); - parser.parse_reasoning_streaming_incremental(delta) + let result = parser.parse_reasoning_streaming_incremental(delta); + let in_reasoning = parser.is_in_reasoning(); + (result, in_reasoning) }; match parse_result { @@ -535,7 +537,7 @@ impl GrpcRouter { } else { None }; - return (normal_text, chunk); + return (normal_text, chunk, in_reasoning); } Err(e) => { warn!("Reasoning parsing error: {}", e); @@ -543,7 +545,7 @@ impl GrpcRouter { } } - (delta.to_string(), None) + (delta.to_string(), None, false) } /// Helper: Process tool calls in streaming mode @@ -901,27 +903,31 @@ impl GrpcRouter { stream_buffer.push_str(&delta); // Reasoning content handling - if separate_reasoning { - let (normal_text, reasoning_chunk) = router.process_reasoning_stream( - &delta, - index, - &mut reasoning_parsers, - &request_id, - &model, - created, - ); + let in_reasoning = if separate_reasoning { + let (normal_text, reasoning_chunk, in_reasoning) = router + .process_reasoning_stream( + &delta, + index, + &mut reasoning_parsers, + &request_id, + &model, + created, + ); if let Some(chunk) = reasoning_chunk { tx.send(Ok(Bytes::from(Self::format_sse_chunk(&chunk)))) .map_err(|_| "Failed to send reasoning chunk".to_string())?; } delta = normal_text; - } + in_reasoning + } else { + false + }; // Tool call handling let tool_choice_enabled = !matches!(tool_choice, Some(ToolChoice::Value(ToolChoiceValue::None))); - if tool_choice_enabled && tools.is_some() { + if !in_reasoning && tool_choice_enabled && tools.is_some() { let (should_skip, tool_chunks) = router .process_tool_calls_stream( &delta,