From 94959237bfa1b3ae160b6d6045ebd6131a8ae24c Mon Sep 17 00:00:00 2001 From: Simo Lin Date: Tue, 19 Aug 2025 10:15:24 -0700 Subject: [PATCH] [router] add dsr1, kimi, and qwen reasoning parser (#9353) --- sgl-router/src/reasoning_parser/factory.rs | 58 ++---- sgl-router/src/reasoning_parser/mod.rs | 6 +- .../src/reasoning_parser/parsers/base.rs | 14 +- .../reasoning_parser/parsers/deepseek_r1.rs | 112 +++++++++++ .../src/reasoning_parser/parsers/kimi.rs | 137 ++++++++++++++ .../src/reasoning_parser/parsers/mod.rs | 6 + .../src/reasoning_parser/parsers/qwen3.rs | 178 ++++++++++++++++++ sgl-router/src/reasoning_parser/traits.rs | 10 +- 8 files changed, 463 insertions(+), 58 deletions(-) create mode 100644 sgl-router/src/reasoning_parser/parsers/deepseek_r1.rs create mode 100644 sgl-router/src/reasoning_parser/parsers/kimi.rs create mode 100644 sgl-router/src/reasoning_parser/parsers/qwen3.rs diff --git a/sgl-router/src/reasoning_parser/factory.rs b/sgl-router/src/reasoning_parser/factory.rs index 1ac2232b6..042653a1b 100644 --- a/sgl-router/src/reasoning_parser/factory.rs +++ b/sgl-router/src/reasoning_parser/factory.rs @@ -3,7 +3,9 @@ use std::collections::HashMap; use std::sync::{Arc, RwLock}; -use crate::reasoning_parser::parsers::BaseReasoningParser; +use crate::reasoning_parser::parsers::{ + BaseReasoningParser, DeepSeekR1Parser, KimiParser, Qwen3Parser, QwenThinkingParser, +}; use crate::reasoning_parser::traits::{ParseError, ParserConfig, ReasoningParser}; /// Type alias for parser creator functions. @@ -82,53 +84,17 @@ impl ParserFactory { Box::new(BaseReasoningParser::new(ParserConfig::default())) }); - // Register DeepSeek-R1 parser - registry.register_parser("deepseek_r1", || { - let config = ParserConfig { - think_start_token: "".to_string(), - think_end_token: "".to_string(), - force_reasoning: true, - stream_reasoning: true, - max_buffer_size: 65536, - }; - Box::new(BaseReasoningParser::new(config).with_model_type("deepseek_r1".to_string())) - }); + // Register DeepSeek-R1 parser (starts with in_reasoning=true) + registry.register_parser("deepseek_r1", || Box::new(DeepSeekR1Parser::new())); - // Register Qwen3 parser - registry.register_parser("qwen3", || { - let config = ParserConfig { - think_start_token: "".to_string(), - think_end_token: "".to_string(), - force_reasoning: false, - stream_reasoning: true, - max_buffer_size: 65536, - }; - Box::new(BaseReasoningParser::new(config).with_model_type("qwen3".to_string())) - }); + // Register Qwen3 parser (starts with in_reasoning=false) + registry.register_parser("qwen3", || Box::new(Qwen3Parser::new())); - // Register Qwen3-thinking parser (forced reasoning) - registry.register_parser("qwen3_thinking", || { - let config = ParserConfig { - think_start_token: "".to_string(), - think_end_token: "".to_string(), - force_reasoning: true, - stream_reasoning: true, - max_buffer_size: 65536, - }; - Box::new(BaseReasoningParser::new(config).with_model_type("qwen3_thinking".to_string())) - }); + // Register Qwen3-thinking parser (starts with in_reasoning=true) + registry.register_parser("qwen3_thinking", || Box::new(QwenThinkingParser::new())); - // Register Kimi parser with Unicode tokens - registry.register_parser("kimi", || { - let config = ParserConfig { - think_start_token: "◁think▷".to_string(), - think_end_token: "◁/think▷".to_string(), - force_reasoning: false, - stream_reasoning: true, - max_buffer_size: 65536, - }; - Box::new(BaseReasoningParser::new(config).with_model_type("kimi".to_string())) - }); + // Register Kimi parser with Unicode tokens (starts with in_reasoning=false) + registry.register_parser("kimi", || Box::new(KimiParser::new())); // Register model patterns registry.register_pattern("deepseek-r1", "deepseek_r1"); @@ -155,9 +121,9 @@ impl ParserFactory { let config = ParserConfig { think_start_token: "".to_string(), think_end_token: "".to_string(), - force_reasoning: false, stream_reasoning: true, max_buffer_size: 65536, + initial_in_reasoning: false, }; Ok(Box::new( BaseReasoningParser::new(config).with_model_type("passthrough".to_string()), diff --git a/sgl-router/src/reasoning_parser/mod.rs b/sgl-router/src/reasoning_parser/mod.rs index fd975a7bf..f566a5187 100644 --- a/sgl-router/src/reasoning_parser/mod.rs +++ b/sgl-router/src/reasoning_parser/mod.rs @@ -3,5 +3,7 @@ pub mod parsers; pub mod traits; pub use factory::{ParserFactory, ParserRegistry}; -pub use parsers::BaseReasoningParser; -pub use traits::{ParseError, ParserResult, ReasoningParser}; +pub use parsers::{ + BaseReasoningParser, DeepSeekR1Parser, KimiParser, Qwen3Parser, QwenThinkingParser, +}; +pub use traits::{ParseError, ParserConfig, ParserResult, ReasoningParser}; diff --git a/sgl-router/src/reasoning_parser/parsers/base.rs b/sgl-router/src/reasoning_parser/parsers/base.rs index 78743b13d..0fd2818b9 100644 --- a/sgl-router/src/reasoning_parser/parsers/base.rs +++ b/sgl-router/src/reasoning_parser/parsers/base.rs @@ -20,7 +20,7 @@ pub struct BaseReasoningParser { impl BaseReasoningParser { /// Create a new BaseReasoningParser with the given configuration. pub fn new(config: ParserConfig) -> Self { - let in_reasoning = config.force_reasoning; + let in_reasoning = config.initial_in_reasoning; Self { config, in_reasoning, @@ -179,7 +179,7 @@ impl ReasoningParser for BaseReasoningParser { } fn reset(&mut self) { - self.in_reasoning = self.config.force_reasoning; + self.in_reasoning = self.config.initial_in_reasoning; self.buffer.clear(); self.stripped_think_start = false; } @@ -193,13 +193,16 @@ impl ReasoningParser for BaseReasoningParser { mod tests { use super::*; - fn create_test_parser(force_reasoning: bool, stream_reasoning: bool) -> BaseReasoningParser { + fn create_test_parser( + initial_in_reasoning: bool, + stream_reasoning: bool, + ) -> BaseReasoningParser { let config = ParserConfig { think_start_token: "".to_string(), think_end_token: "".to_string(), - force_reasoning, stream_reasoning, max_buffer_size: 65536, + initial_in_reasoning, }; BaseReasoningParser::new(config) } @@ -265,7 +268,8 @@ mod tests { } #[test] - fn test_force_reasoning_mode() { + fn test_initial_in_reasoning_true() { + // Parser starts with in_reasoning=true (like DeepSeek-R1) let mut parser = create_test_parser(true, true); let result = parser .detect_and_parse_reasoning("no think tags here") diff --git a/sgl-router/src/reasoning_parser/parsers/deepseek_r1.rs b/sgl-router/src/reasoning_parser/parsers/deepseek_r1.rs new file mode 100644 index 000000000..62a7aadec --- /dev/null +++ b/sgl-router/src/reasoning_parser/parsers/deepseek_r1.rs @@ -0,0 +1,112 @@ +// DeepSeek-R1 specific reasoning parser. +// This parser starts with in_reasoning=true, assuming all text is reasoning +// until an end token is encountered. + +use crate::reasoning_parser::parsers::BaseReasoningParser; +use crate::reasoning_parser::traits::{ParseError, ParserConfig, ParserResult, ReasoningParser}; + +/// DeepSeek-R1 reasoning parser. +/// +/// This parser assumes reasoning from the start of text (in_reasoning=true) +/// and uses and tokens. +pub struct DeepSeekR1Parser { + base: BaseReasoningParser, +} + +impl DeepSeekR1Parser { + /// Create a new DeepSeek-R1 parser. + pub fn new() -> Self { + let config = ParserConfig { + think_start_token: "".to_string(), + think_end_token: "".to_string(), + stream_reasoning: true, + max_buffer_size: 65536, + initial_in_reasoning: true, // Always starts with reasoning + }; + + Self { + base: BaseReasoningParser::new(config).with_model_type("deepseek_r1".to_string()), + } + } +} + +impl Default for DeepSeekR1Parser { + fn default() -> Self { + Self::new() + } +} + +impl ReasoningParser for DeepSeekR1Parser { + fn detect_and_parse_reasoning(&mut self, text: &str) -> Result { + self.base.detect_and_parse_reasoning(text) + } + + fn parse_reasoning_streaming_incremental( + &mut self, + text: &str, + ) -> Result { + self.base.parse_reasoning_streaming_incremental(text) + } + + fn reset(&mut self) { + self.base.reset() + } + + fn model_type(&self) -> &str { + self.base.model_type() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_deepseek_r1_initial_state() { + let mut parser = DeepSeekR1Parser::new(); + + // Should treat text as reasoning even without start token + let result = parser + .detect_and_parse_reasoning("This is reasoning content") + .unwrap(); + assert_eq!(result.normal_text, ""); + assert_eq!(result.reasoning_text, "This is reasoning content"); + } + + #[test] + fn test_deepseek_r1_with_end_token() { + let mut parser = DeepSeekR1Parser::new(); + + // Should extract reasoning until end token + let result = parser + .detect_and_parse_reasoning("reasoning contentnormal content") + .unwrap(); + assert_eq!(result.normal_text, "normal content"); + assert_eq!(result.reasoning_text, "reasoning content"); + } + + #[test] + fn test_deepseek_r1_streaming() { + let mut parser = DeepSeekR1Parser::new(); + + // First chunk - all reasoning + let result1 = parser + .parse_reasoning_streaming_incremental("thinking about") + .unwrap(); + assert_eq!(result1.reasoning_text, "thinking about"); + assert_eq!(result1.normal_text, ""); + + // Second chunk - ends reasoning + let result2 = parser + .parse_reasoning_streaming_incremental(" the problemanswer") + .unwrap(); + assert_eq!(result2.reasoning_text, "the problem"); // Text is trimmed + assert_eq!(result2.normal_text, "answer"); + } + + #[test] + fn test_model_type() { + let parser = DeepSeekR1Parser::new(); + assert_eq!(parser.model_type(), "deepseek_r1"); + } +} diff --git a/sgl-router/src/reasoning_parser/parsers/kimi.rs b/sgl-router/src/reasoning_parser/parsers/kimi.rs new file mode 100644 index 000000000..3e11a5711 --- /dev/null +++ b/sgl-router/src/reasoning_parser/parsers/kimi.rs @@ -0,0 +1,137 @@ +// Kimi specific reasoning parser. +// This parser uses Unicode tokens and starts with in_reasoning=false. + +use crate::reasoning_parser::parsers::BaseReasoningParser; +use crate::reasoning_parser::traits::{ParseError, ParserConfig, ParserResult, ReasoningParser}; + +/// Kimi reasoning parser. +/// +/// This parser uses Unicode tokens (◁think▷ and ◁/think▷) and requires +/// explicit start tokens to enter reasoning mode. +pub struct KimiParser { + base: BaseReasoningParser, +} + +impl KimiParser { + /// Create a new Kimi parser. + pub fn new() -> Self { + let config = ParserConfig { + think_start_token: "◁think▷".to_string(), + think_end_token: "◁/think▷".to_string(), + stream_reasoning: true, + max_buffer_size: 65536, + initial_in_reasoning: false, // Requires explicit start token + }; + + Self { + base: BaseReasoningParser::new(config).with_model_type("kimi".to_string()), + } + } +} + +impl Default for KimiParser { + fn default() -> Self { + Self::new() + } +} + +impl ReasoningParser for KimiParser { + fn detect_and_parse_reasoning(&mut self, text: &str) -> Result { + self.base.detect_and_parse_reasoning(text) + } + + fn parse_reasoning_streaming_incremental( + &mut self, + text: &str, + ) -> Result { + self.base.parse_reasoning_streaming_incremental(text) + } + + fn reset(&mut self) { + self.base.reset() + } + + fn model_type(&self) -> &str { + self.base.model_type() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_kimi_initial_state() { + let mut parser = KimiParser::new(); + + // Should NOT treat text as reasoning without start token + let result = parser + .detect_and_parse_reasoning("This is normal content") + .unwrap(); + assert_eq!(result.normal_text, "This is normal content"); + assert_eq!(result.reasoning_text, ""); + } + + #[test] + fn test_kimi_with_unicode_tokens() { + let mut parser = KimiParser::new(); + + // Should extract reasoning with Unicode tokens + let result = parser + .detect_and_parse_reasoning("◁think▷reasoning content◁/think▷answer") + .unwrap(); + assert_eq!(result.normal_text, "answer"); + assert_eq!(result.reasoning_text, "reasoning content"); + } + + #[test] + fn test_kimi_partial_unicode() { + let mut parser = KimiParser::new(); + + // Test partial Unicode token buffering + let result1 = parser + .parse_reasoning_streaming_incremental("◁thi") + .unwrap(); + assert_eq!(result1.normal_text, ""); + assert_eq!(result1.reasoning_text, ""); + + // Complete the token + let result2 = parser + .parse_reasoning_streaming_incremental("nk▷reasoning") + .unwrap(); + assert_eq!(result2.normal_text, ""); + assert_eq!(result2.reasoning_text, "reasoning"); + } + + #[test] + fn test_kimi_streaming() { + let mut parser = KimiParser::new(); + + // Normal text first + let result1 = parser + .parse_reasoning_streaming_incremental("normal ") + .unwrap(); + assert_eq!(result1.normal_text, "normal "); + assert_eq!(result1.reasoning_text, ""); + + // Enter reasoning with Unicode token + let result2 = parser + .parse_reasoning_streaming_incremental("◁think▷thinking") + .unwrap(); + assert_eq!(result2.normal_text, ""); + assert_eq!(result2.reasoning_text, "thinking"); + + // Exit reasoning + let result3 = parser + .parse_reasoning_streaming_incremental("◁/think▷answer") + .unwrap(); + assert_eq!(result3.normal_text, "answer"); + assert_eq!(result3.reasoning_text, ""); // Already returned in stream mode + } + + #[test] + fn test_model_type() { + let parser = KimiParser::new(); + assert_eq!(parser.model_type(), "kimi"); + } +} diff --git a/sgl-router/src/reasoning_parser/parsers/mod.rs b/sgl-router/src/reasoning_parser/parsers/mod.rs index 64a00f864..7505a1da3 100644 --- a/sgl-router/src/reasoning_parser/parsers/mod.rs +++ b/sgl-router/src/reasoning_parser/parsers/mod.rs @@ -1,3 +1,9 @@ pub mod base; +pub mod deepseek_r1; +pub mod kimi; +pub mod qwen3; pub use base::BaseReasoningParser; +pub use deepseek_r1::DeepSeekR1Parser; +pub use kimi::KimiParser; +pub use qwen3::{Qwen3Parser, QwenThinkingParser}; diff --git a/sgl-router/src/reasoning_parser/parsers/qwen3.rs b/sgl-router/src/reasoning_parser/parsers/qwen3.rs new file mode 100644 index 000000000..8c5ce9e8c --- /dev/null +++ b/sgl-router/src/reasoning_parser/parsers/qwen3.rs @@ -0,0 +1,178 @@ +// Qwen3 specific reasoning parser. +// This parser starts with in_reasoning=false, requiring an explicit +// start token to enter reasoning mode. + +use crate::reasoning_parser::parsers::BaseReasoningParser; +use crate::reasoning_parser::traits::{ParseError, ParserConfig, ParserResult, ReasoningParser}; + +/// Qwen3 reasoning parser. +/// +/// This parser requires explicit tokens to enter reasoning mode +/// (in_reasoning=false initially). +pub struct Qwen3Parser { + base: BaseReasoningParser, +} + +impl Qwen3Parser { + /// Create a new Qwen3 parser. + pub fn new() -> Self { + let config = ParserConfig { + think_start_token: "".to_string(), + think_end_token: "".to_string(), + stream_reasoning: true, + max_buffer_size: 65536, + initial_in_reasoning: false, // Requires explicit start token + }; + + Self { + base: BaseReasoningParser::new(config).with_model_type("qwen3".to_string()), + } + } +} + +impl Default for Qwen3Parser { + fn default() -> Self { + Self::new() + } +} + +impl ReasoningParser for Qwen3Parser { + fn detect_and_parse_reasoning(&mut self, text: &str) -> Result { + self.base.detect_and_parse_reasoning(text) + } + + fn parse_reasoning_streaming_incremental( + &mut self, + text: &str, + ) -> Result { + self.base.parse_reasoning_streaming_incremental(text) + } + + fn reset(&mut self) { + self.base.reset() + } + + fn model_type(&self) -> &str { + self.base.model_type() + } +} + +/// QwenThinking parser - variant that assumes reasoning from start. +/// +/// This is for qwen*thinking models that behave like DeepSeek-R1. +pub struct QwenThinkingParser { + base: BaseReasoningParser, +} + +impl QwenThinkingParser { + /// Create a new QwenThinking parser. + pub fn new() -> Self { + let config = ParserConfig { + think_start_token: "".to_string(), + think_end_token: "".to_string(), + stream_reasoning: true, + max_buffer_size: 65536, + initial_in_reasoning: true, // Assumes reasoning from start + }; + + Self { + base: BaseReasoningParser::new(config).with_model_type("qwen_thinking".to_string()), + } + } +} + +impl Default for QwenThinkingParser { + fn default() -> Self { + Self::new() + } +} + +impl ReasoningParser for QwenThinkingParser { + fn detect_and_parse_reasoning(&mut self, text: &str) -> Result { + self.base.detect_and_parse_reasoning(text) + } + + fn parse_reasoning_streaming_incremental( + &mut self, + text: &str, + ) -> Result { + self.base.parse_reasoning_streaming_incremental(text) + } + + fn reset(&mut self) { + self.base.reset() + } + + fn model_type(&self) -> &str { + self.base.model_type() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_qwen3_initial_state() { + let mut parser = Qwen3Parser::new(); + + // Should NOT treat text as reasoning without start token + let result = parser + .detect_and_parse_reasoning("This is normal content") + .unwrap(); + assert_eq!(result.normal_text, "This is normal content"); + assert_eq!(result.reasoning_text, ""); + } + + #[test] + fn test_qwen3_with_tokens() { + let mut parser = Qwen3Parser::new(); + + // Should extract reasoning with proper tokens + let result = parser + .detect_and_parse_reasoning("reasoninganswer") + .unwrap(); + assert_eq!(result.normal_text, "answer"); + assert_eq!(result.reasoning_text, "reasoning"); + } + + #[test] + fn test_qwen_thinking_initial_state() { + let mut parser = QwenThinkingParser::new(); + + // Should treat text as reasoning even without start token + let result = parser + .detect_and_parse_reasoning("This is reasoning content") + .unwrap(); + assert_eq!(result.normal_text, ""); + assert_eq!(result.reasoning_text, "This is reasoning content"); + } + + #[test] + fn test_qwen3_streaming() { + let mut parser = Qwen3Parser::new(); + + // First chunk - normal text (no start token yet) + let result1 = parser + .parse_reasoning_streaming_incremental("normal text ") + .unwrap(); + assert_eq!(result1.normal_text, "normal text "); + assert_eq!(result1.reasoning_text, ""); + + // Second chunk - enters reasoning + let result2 = parser + .parse_reasoning_streaming_incremental("reasoning") + .unwrap(); + assert_eq!(result2.normal_text, ""); + assert_eq!(result2.reasoning_text, "reasoning"); + } + + #[test] + fn test_model_types() { + let qwen3 = Qwen3Parser::new(); + assert_eq!(qwen3.model_type(), "qwen3"); + + let qwen_thinking = QwenThinkingParser::new(); + assert_eq!(qwen_thinking.model_type(), "qwen_thinking"); + } +} diff --git a/sgl-router/src/reasoning_parser/traits.rs b/sgl-router/src/reasoning_parser/traits.rs index 672b76813..160fa51d9 100644 --- a/sgl-router/src/reasoning_parser/traits.rs +++ b/sgl-router/src/reasoning_parser/traits.rs @@ -96,14 +96,14 @@ pub struct ParserConfig { /// The token that marks the end of reasoning content. pub think_end_token: String, - /// Whether to force all text to be treated as reasoning. - pub force_reasoning: bool, - /// Whether to stream reasoning content as it arrives. pub stream_reasoning: bool, /// Maximum buffer size in bytes. pub max_buffer_size: usize, + + /// Initial state for in_reasoning flag (fixed per parser type). + pub initial_in_reasoning: bool, } impl Default for ParserConfig { @@ -111,9 +111,9 @@ impl Default for ParserConfig { Self { think_start_token: "".to_string(), think_end_token: "".to_string(), - force_reasoning: false, stream_reasoning: true, - max_buffer_size: 65536, // 64KB default + max_buffer_size: 65536, // 64KB default + initial_in_reasoning: false, // Default to false (explicit reasoning) } } }