diff --git a/sgl-router/src/tool_parser/parsers/gpt_oss_harmony_parser.rs b/sgl-router/src/tool_parser/parsers/gpt_oss_harmony_parser.rs new file mode 100644 index 000000000..953c02d38 --- /dev/null +++ b/sgl-router/src/tool_parser/parsers/gpt_oss_harmony_parser.rs @@ -0,0 +1,70 @@ +use async_trait::async_trait; + +use crate::tool_parser::{ + errors::ToolParserResult, + state::ParseState, + traits::{TokenToolParser, ToolParser}, + types::{StreamResult, ToolCall}, +}; + +/// Placeholder for the Harmony-backed GPT-OSS parser. +/// +/// regex implementation. This struct will be fleshed out in subsequent phases to +/// reuse Harmony's tokenizer and message reconstruction logic. +#[derive(Default)] +pub struct GptOssHarmonyParser; + +impl GptOssHarmonyParser { + pub fn new() -> Self { + Self + } +} + +#[async_trait] +impl ToolParser for GptOssHarmonyParser { + async fn parse_complete(&self, output: &str) -> ToolParserResult<(String, Vec)> { + // Temporary stub: fall back to returning the raw text with no tool calls. + // Later phases will decode Harmony tokens into structured tool calls. + Ok((output.to_string(), Vec::new())) + } + + async fn parse_incremental( + &self, + _chunk: &str, + _state: &mut ParseState, + ) -> ToolParserResult { + // Temporary stub until the Harmony streaming pipeline is implemented. + Ok(StreamResult::Incomplete) + } + + fn detect_format(&self, text: &str) -> bool { + // Reuse the legacy heuristics for now; this will be replaced with Harmony-specific + // start-token detection when the parser is fully implemented. + text.contains("<|channel|>commentary") + } + + fn as_token_parser(&self) -> Option<&dyn TokenToolParser> { + Some(self) + } +} + +#[async_trait] +impl TokenToolParser for GptOssHarmonyParser { + async fn parse_complete_tokens( + &self, + _tokens: &[u32], + ) -> ToolParserResult<(String, Vec)> { + // Placeholder until Harmony integration lands. Returning an empty tool list ensures + // that enabling the parser without full implementation results in a no-op rather + // than a runtime panic. + Ok((String::new(), Vec::new())) + } + + async fn parse_incremental_tokens( + &self, + _tokens: &[u32], + _state: &mut ParseState, + ) -> ToolParserResult { + Ok(StreamResult::Incomplete) + } +} diff --git a/sgl-router/src/tool_parser/parsers/mod.rs b/sgl-router/src/tool_parser/parsers/mod.rs index 693aeedf4..9a521b5d8 100644 --- a/sgl-router/src/tool_parser/parsers/mod.rs +++ b/sgl-router/src/tool_parser/parsers/mod.rs @@ -5,6 +5,7 @@ // Individual parser modules pub mod deepseek_parser; pub mod glm4_moe_parser; +pub mod gpt_oss_harmony_parser; pub mod gpt_oss_parser; pub mod json_parser; pub mod kimik2_parser; @@ -17,6 +18,7 @@ pub mod step3_parser; // Re-export parser types for convenience pub use deepseek_parser::DeepSeekParser; pub use glm4_moe_parser::Glm4MoeParser; +pub use gpt_oss_harmony_parser::GptOssHarmonyParser; pub use gpt_oss_parser::GptOssParser; pub use json_parser::JsonParser; pub use kimik2_parser::KimiK2Parser; diff --git a/sgl-router/src/tool_parser/registry.rs b/sgl-router/src/tool_parser/registry.rs index a21640443..6a469889b 100644 --- a/sgl-router/src/tool_parser/registry.rs +++ b/sgl-router/src/tool_parser/registry.rs @@ -1,11 +1,10 @@ use crate::tool_parser::parsers::{ - DeepSeekParser, Glm4MoeParser, GptOssParser, JsonParser, KimiK2Parser, LlamaParser, - MistralParser, PythonicParser, QwenParser, Step3Parser, + DeepSeekParser, Glm4MoeParser, GptOssHarmonyParser, GptOssParser, JsonParser, KimiK2Parser, + LlamaParser, MistralParser, PythonicParser, QwenParser, Step3Parser, }; use crate::tool_parser::traits::ToolParser; use once_cell::sync::Lazy; -use std::collections::HashMap; -use std::sync::Arc; +use std::{collections::HashMap, env, sync::Arc}; /// Global singleton registry instance - created once and reused pub static GLOBAL_REGISTRY: Lazy = Lazy::new(ParserRegistry::new_internal); @@ -139,8 +138,18 @@ impl ParserRegistry { // Kimi K2 parser - Token-based with indexed functions self.register_parser("kimik2", Arc::new(KimiK2Parser::new())); - // GPT-OSS parser - Channel format - self.register_parser("gpt_oss", Arc::new(GptOssParser::new())); + // GPT-OSS parsers - register legacy and Harmony variants + let gpt_oss_legacy = Arc::new(GptOssParser::new()); + let gpt_oss_harmony = Arc::new(GptOssHarmonyParser::new()); + + self.register_parser("gpt_oss_legacy", gpt_oss_legacy.clone()); + self.register_parser("gpt_oss_harmony", gpt_oss_harmony.clone()); + + if use_harmony_gpt_oss() { + self.register_parser("gpt_oss", gpt_oss_harmony); + } else { + self.register_parser("gpt_oss", gpt_oss_legacy); + } } /// Register default model mappings @@ -216,6 +225,19 @@ impl ParserRegistry { } } +fn use_harmony_gpt_oss() -> bool { + env::var("ROUTER_USE_HARMONY_GPT_OSS") + .ok() + .map(|value| { + let normalized = value.trim(); + matches!( + normalized, + "1" | "true" | "TRUE" | "True" | "yes" | "YES" | "Yes" | "on" | "ON" | "On" + ) + }) + .unwrap_or(false) +} + impl Default for &'static ParserRegistry { fn default() -> Self { ParserRegistry::new() diff --git a/sgl-router/src/tool_parser/state.rs b/sgl-router/src/tool_parser/state.rs index 096a9352f..1bef8dc4b 100644 --- a/sgl-router/src/tool_parser/state.rs +++ b/sgl-router/src/tool_parser/state.rs @@ -34,6 +34,8 @@ pub struct ParseState { pub escape_next: bool, /// Current tool index (for streaming) pub tool_index: usize, + /// Optional Harmony-specific streaming state (populated by token-aware parsers) + pub harmony_stream: Option, } impl ParseState { @@ -49,6 +51,7 @@ impl ParseState { in_string: false, escape_next: false, tool_index: 0, + harmony_stream: None, } } @@ -59,6 +62,7 @@ impl ParseState { self.bracket_depth = 0; self.in_string = false; self.escape_next = false; + self.harmony_stream = None; } /// Process a single character for JSON parsing @@ -179,3 +183,20 @@ impl Default for ParseState { Self::new() } } + +/// Placeholder for Harmony streaming metadata captured during token-aware parsing. +#[derive(Debug, Clone, Default)] +pub struct HarmonyStreamState { + /// All tokens observed so far for the current assistant response. + pub tokens: Vec, + /// Number of tokens that have already been processed by the Harmony parser. + pub processed_tokens: usize, + /// Number of tool calls emitted downstream. + pub emitted_calls: usize, + /// Pending analysis-channel content awaiting flush into normal text output. + pub analysis_buffer: String, + /// Whether the tool name has been surfaced for the current call. + pub emitted_name: bool, + /// Whether arguments have been surfaced for the current call. + pub emitted_args: bool, +} diff --git a/sgl-router/src/tool_parser/traits.rs b/sgl-router/src/tool_parser/traits.rs index 34b097a3f..ccfc99a55 100644 --- a/sgl-router/src/tool_parser/traits.rs +++ b/sgl-router/src/tool_parser/traits.rs @@ -21,6 +21,12 @@ pub trait ToolParser: Send + Sync { /// Check if text contains tool calls in this parser's format fn detect_format(&self, text: &str) -> bool; + + /// Optionally expose a token-aware parser implementation. + /// Default returns `None`, meaning the parser only supports text input. + fn as_token_parser(&self) -> Option<&dyn TokenToolParser> { + None + } } /// Trait for partial JSON parsing @@ -34,3 +40,19 @@ pub trait PartialJsonParser: Send + Sync { /// Get the maximum parsing depth fn max_depth(&self) -> usize; } + +#[async_trait] +pub trait TokenToolParser: ToolParser { + /// Parse complete tool calls when provided with raw token IDs. + async fn parse_complete_tokens( + &self, + tokens: &[u32], + ) -> ToolParserResult<(String, Vec)>; + + /// Streaming parser entrypoint for token chunks. + async fn parse_incremental_tokens( + &self, + tokens: &[u32], + state: &mut ParseState, + ) -> ToolParserResult; +}