[router] add harmony tool parser base structure and interface (#11036)
This commit is contained in:
70
sgl-router/src/tool_parser/parsers/gpt_oss_harmony_parser.rs
Normal file
70
sgl-router/src/tool_parser/parsers/gpt_oss_harmony_parser.rs
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
use async_trait::async_trait;
|
||||||
|
|
||||||
|
use crate::tool_parser::{
|
||||||
|
errors::ToolParserResult,
|
||||||
|
state::ParseState,
|
||||||
|
traits::{TokenToolParser, ToolParser},
|
||||||
|
types::{StreamResult, ToolCall},
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Placeholder for the Harmony-backed GPT-OSS parser.
|
||||||
|
///
|
||||||
|
/// regex implementation. This struct will be fleshed out in subsequent phases to
|
||||||
|
/// reuse Harmony's tokenizer and message reconstruction logic.
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct GptOssHarmonyParser;
|
||||||
|
|
||||||
|
impl GptOssHarmonyParser {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl ToolParser for GptOssHarmonyParser {
|
||||||
|
async fn parse_complete(&self, output: &str) -> ToolParserResult<(String, Vec<ToolCall>)> {
|
||||||
|
// Temporary stub: fall back to returning the raw text with no tool calls.
|
||||||
|
// Later phases will decode Harmony tokens into structured tool calls.
|
||||||
|
Ok((output.to_string(), Vec::new()))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn parse_incremental(
|
||||||
|
&self,
|
||||||
|
_chunk: &str,
|
||||||
|
_state: &mut ParseState,
|
||||||
|
) -> ToolParserResult<StreamResult> {
|
||||||
|
// Temporary stub until the Harmony streaming pipeline is implemented.
|
||||||
|
Ok(StreamResult::Incomplete)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn detect_format(&self, text: &str) -> bool {
|
||||||
|
// Reuse the legacy heuristics for now; this will be replaced with Harmony-specific
|
||||||
|
// start-token detection when the parser is fully implemented.
|
||||||
|
text.contains("<|channel|>commentary")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn as_token_parser(&self) -> Option<&dyn TokenToolParser> {
|
||||||
|
Some(self)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl TokenToolParser for GptOssHarmonyParser {
|
||||||
|
async fn parse_complete_tokens(
|
||||||
|
&self,
|
||||||
|
_tokens: &[u32],
|
||||||
|
) -> ToolParserResult<(String, Vec<ToolCall>)> {
|
||||||
|
// Placeholder until Harmony integration lands. Returning an empty tool list ensures
|
||||||
|
// that enabling the parser without full implementation results in a no-op rather
|
||||||
|
// than a runtime panic.
|
||||||
|
Ok((String::new(), Vec::new()))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn parse_incremental_tokens(
|
||||||
|
&self,
|
||||||
|
_tokens: &[u32],
|
||||||
|
_state: &mut ParseState,
|
||||||
|
) -> ToolParserResult<StreamResult> {
|
||||||
|
Ok(StreamResult::Incomplete)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -5,6 +5,7 @@
|
|||||||
// Individual parser modules
|
// Individual parser modules
|
||||||
pub mod deepseek_parser;
|
pub mod deepseek_parser;
|
||||||
pub mod glm4_moe_parser;
|
pub mod glm4_moe_parser;
|
||||||
|
pub mod gpt_oss_harmony_parser;
|
||||||
pub mod gpt_oss_parser;
|
pub mod gpt_oss_parser;
|
||||||
pub mod json_parser;
|
pub mod json_parser;
|
||||||
pub mod kimik2_parser;
|
pub mod kimik2_parser;
|
||||||
@@ -17,6 +18,7 @@ pub mod step3_parser;
|
|||||||
// Re-export parser types for convenience
|
// Re-export parser types for convenience
|
||||||
pub use deepseek_parser::DeepSeekParser;
|
pub use deepseek_parser::DeepSeekParser;
|
||||||
pub use glm4_moe_parser::Glm4MoeParser;
|
pub use glm4_moe_parser::Glm4MoeParser;
|
||||||
|
pub use gpt_oss_harmony_parser::GptOssHarmonyParser;
|
||||||
pub use gpt_oss_parser::GptOssParser;
|
pub use gpt_oss_parser::GptOssParser;
|
||||||
pub use json_parser::JsonParser;
|
pub use json_parser::JsonParser;
|
||||||
pub use kimik2_parser::KimiK2Parser;
|
pub use kimik2_parser::KimiK2Parser;
|
||||||
|
|||||||
@@ -1,11 +1,10 @@
|
|||||||
use crate::tool_parser::parsers::{
|
use crate::tool_parser::parsers::{
|
||||||
DeepSeekParser, Glm4MoeParser, GptOssParser, JsonParser, KimiK2Parser, LlamaParser,
|
DeepSeekParser, Glm4MoeParser, GptOssHarmonyParser, GptOssParser, JsonParser, KimiK2Parser,
|
||||||
MistralParser, PythonicParser, QwenParser, Step3Parser,
|
LlamaParser, MistralParser, PythonicParser, QwenParser, Step3Parser,
|
||||||
};
|
};
|
||||||
use crate::tool_parser::traits::ToolParser;
|
use crate::tool_parser::traits::ToolParser;
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use std::collections::HashMap;
|
use std::{collections::HashMap, env, sync::Arc};
|
||||||
use std::sync::Arc;
|
|
||||||
|
|
||||||
/// Global singleton registry instance - created once and reused
|
/// Global singleton registry instance - created once and reused
|
||||||
pub static GLOBAL_REGISTRY: Lazy<ParserRegistry> = Lazy::new(ParserRegistry::new_internal);
|
pub static GLOBAL_REGISTRY: Lazy<ParserRegistry> = Lazy::new(ParserRegistry::new_internal);
|
||||||
@@ -139,8 +138,18 @@ impl ParserRegistry {
|
|||||||
// Kimi K2 parser - Token-based with indexed functions
|
// Kimi K2 parser - Token-based with indexed functions
|
||||||
self.register_parser("kimik2", Arc::new(KimiK2Parser::new()));
|
self.register_parser("kimik2", Arc::new(KimiK2Parser::new()));
|
||||||
|
|
||||||
// GPT-OSS parser - Channel format
|
// GPT-OSS parsers - register legacy and Harmony variants
|
||||||
self.register_parser("gpt_oss", Arc::new(GptOssParser::new()));
|
let gpt_oss_legacy = Arc::new(GptOssParser::new());
|
||||||
|
let gpt_oss_harmony = Arc::new(GptOssHarmonyParser::new());
|
||||||
|
|
||||||
|
self.register_parser("gpt_oss_legacy", gpt_oss_legacy.clone());
|
||||||
|
self.register_parser("gpt_oss_harmony", gpt_oss_harmony.clone());
|
||||||
|
|
||||||
|
if use_harmony_gpt_oss() {
|
||||||
|
self.register_parser("gpt_oss", gpt_oss_harmony);
|
||||||
|
} else {
|
||||||
|
self.register_parser("gpt_oss", gpt_oss_legacy);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Register default model mappings
|
/// Register default model mappings
|
||||||
@@ -216,6 +225,19 @@ impl ParserRegistry {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn use_harmony_gpt_oss() -> bool {
|
||||||
|
env::var("ROUTER_USE_HARMONY_GPT_OSS")
|
||||||
|
.ok()
|
||||||
|
.map(|value| {
|
||||||
|
let normalized = value.trim();
|
||||||
|
matches!(
|
||||||
|
normalized,
|
||||||
|
"1" | "true" | "TRUE" | "True" | "yes" | "YES" | "Yes" | "on" | "ON" | "On"
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.unwrap_or(false)
|
||||||
|
}
|
||||||
|
|
||||||
impl Default for &'static ParserRegistry {
|
impl Default for &'static ParserRegistry {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
ParserRegistry::new()
|
ParserRegistry::new()
|
||||||
|
|||||||
@@ -34,6 +34,8 @@ pub struct ParseState {
|
|||||||
pub escape_next: bool,
|
pub escape_next: bool,
|
||||||
/// Current tool index (for streaming)
|
/// Current tool index (for streaming)
|
||||||
pub tool_index: usize,
|
pub tool_index: usize,
|
||||||
|
/// Optional Harmony-specific streaming state (populated by token-aware parsers)
|
||||||
|
pub harmony_stream: Option<HarmonyStreamState>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ParseState {
|
impl ParseState {
|
||||||
@@ -49,6 +51,7 @@ impl ParseState {
|
|||||||
in_string: false,
|
in_string: false,
|
||||||
escape_next: false,
|
escape_next: false,
|
||||||
tool_index: 0,
|
tool_index: 0,
|
||||||
|
harmony_stream: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -59,6 +62,7 @@ impl ParseState {
|
|||||||
self.bracket_depth = 0;
|
self.bracket_depth = 0;
|
||||||
self.in_string = false;
|
self.in_string = false;
|
||||||
self.escape_next = false;
|
self.escape_next = false;
|
||||||
|
self.harmony_stream = None;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Process a single character for JSON parsing
|
/// Process a single character for JSON parsing
|
||||||
@@ -179,3 +183,20 @@ impl Default for ParseState {
|
|||||||
Self::new()
|
Self::new()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Placeholder for Harmony streaming metadata captured during token-aware parsing.
|
||||||
|
#[derive(Debug, Clone, Default)]
|
||||||
|
pub struct HarmonyStreamState {
|
||||||
|
/// All tokens observed so far for the current assistant response.
|
||||||
|
pub tokens: Vec<u32>,
|
||||||
|
/// Number of tokens that have already been processed by the Harmony parser.
|
||||||
|
pub processed_tokens: usize,
|
||||||
|
/// Number of tool calls emitted downstream.
|
||||||
|
pub emitted_calls: usize,
|
||||||
|
/// Pending analysis-channel content awaiting flush into normal text output.
|
||||||
|
pub analysis_buffer: String,
|
||||||
|
/// Whether the tool name has been surfaced for the current call.
|
||||||
|
pub emitted_name: bool,
|
||||||
|
/// Whether arguments have been surfaced for the current call.
|
||||||
|
pub emitted_args: bool,
|
||||||
|
}
|
||||||
|
|||||||
@@ -21,6 +21,12 @@ pub trait ToolParser: Send + Sync {
|
|||||||
|
|
||||||
/// Check if text contains tool calls in this parser's format
|
/// Check if text contains tool calls in this parser's format
|
||||||
fn detect_format(&self, text: &str) -> bool;
|
fn detect_format(&self, text: &str) -> bool;
|
||||||
|
|
||||||
|
/// Optionally expose a token-aware parser implementation.
|
||||||
|
/// Default returns `None`, meaning the parser only supports text input.
|
||||||
|
fn as_token_parser(&self) -> Option<&dyn TokenToolParser> {
|
||||||
|
None
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Trait for partial JSON parsing
|
/// Trait for partial JSON parsing
|
||||||
@@ -34,3 +40,19 @@ pub trait PartialJsonParser: Send + Sync {
|
|||||||
/// Get the maximum parsing depth
|
/// Get the maximum parsing depth
|
||||||
fn max_depth(&self) -> usize;
|
fn max_depth(&self) -> usize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
pub trait TokenToolParser: ToolParser {
|
||||||
|
/// Parse complete tool calls when provided with raw token IDs.
|
||||||
|
async fn parse_complete_tokens(
|
||||||
|
&self,
|
||||||
|
tokens: &[u32],
|
||||||
|
) -> ToolParserResult<(String, Vec<ToolCall>)>;
|
||||||
|
|
||||||
|
/// Streaming parser entrypoint for token chunks.
|
||||||
|
async fn parse_incremental_tokens(
|
||||||
|
&self,
|
||||||
|
tokens: &[u32],
|
||||||
|
state: &mut ParseState,
|
||||||
|
) -> ToolParserResult<StreamResult>;
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user