[router][grpc] Consolidate parser checks for chat completions (#11439)
This commit is contained in:
@@ -861,6 +861,44 @@ impl ResponseProcessingStage {
|
|||||||
let chat_request = ctx.chat_request_arc();
|
let chat_request = ctx.chat_request_arc();
|
||||||
let history_tool_calls_count = utils::get_history_tool_calls_count(&chat_request);
|
let history_tool_calls_count = utils::get_history_tool_calls_count(&chat_request);
|
||||||
|
|
||||||
|
// Check parser availability once upfront (not per choice)
|
||||||
|
let reasoning_parser_available = chat_request.separate_reasoning
|
||||||
|
&& utils::check_reasoning_parser_availability(
|
||||||
|
&self.processor.reasoning_parser_factory,
|
||||||
|
self.processor.configured_reasoning_parser.as_ref(),
|
||||||
|
&chat_request.model,
|
||||||
|
);
|
||||||
|
|
||||||
|
let tool_choice_enabled = !matches!(
|
||||||
|
&chat_request.tool_choice,
|
||||||
|
Some(crate::protocols::spec::ToolChoice::Value(
|
||||||
|
crate::protocols::spec::ToolChoiceValue::None
|
||||||
|
))
|
||||||
|
);
|
||||||
|
|
||||||
|
let tool_parser_available = tool_choice_enabled
|
||||||
|
&& chat_request.tools.is_some()
|
||||||
|
&& utils::check_tool_parser_availability(
|
||||||
|
&self.processor.tool_parser_factory,
|
||||||
|
self.processor.configured_tool_parser.as_ref(),
|
||||||
|
&chat_request.model,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Log once per request (not per choice)
|
||||||
|
if chat_request.separate_reasoning && !reasoning_parser_available {
|
||||||
|
debug!(
|
||||||
|
"No reasoning parser found for model '{}', skipping reasoning parsing",
|
||||||
|
chat_request.model
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if chat_request.tools.is_some() && tool_choice_enabled && !tool_parser_available {
|
||||||
|
debug!(
|
||||||
|
"No tool parser found for model '{}', skipping tool call parsing",
|
||||||
|
chat_request.model
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
let stop_decoder = ctx
|
let stop_decoder = ctx
|
||||||
.state
|
.state
|
||||||
.response
|
.response
|
||||||
@@ -878,6 +916,8 @@ impl ResponseProcessingStage {
|
|||||||
&chat_request,
|
&chat_request,
|
||||||
stop_decoder,
|
stop_decoder,
|
||||||
history_tool_calls_count,
|
history_tool_calls_count,
|
||||||
|
reasoning_parser_available,
|
||||||
|
tool_parser_available,
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -30,8 +30,8 @@ pub struct ResponseProcessor {
|
|||||||
pub tokenizer: Arc<dyn Tokenizer>,
|
pub tokenizer: Arc<dyn Tokenizer>,
|
||||||
pub tool_parser_factory: ToolParserFactory,
|
pub tool_parser_factory: ToolParserFactory,
|
||||||
pub reasoning_parser_factory: ReasoningParserFactory,
|
pub reasoning_parser_factory: ReasoningParserFactory,
|
||||||
configured_tool_parser: Option<String>,
|
pub configured_tool_parser: Option<String>,
|
||||||
configured_reasoning_parser: Option<String>,
|
pub configured_reasoning_parser: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ResponseProcessor {
|
impl ResponseProcessor {
|
||||||
@@ -52,6 +52,7 @@ impl ResponseProcessor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Process a single choice from GenerateComplete response (EXACT COPY from router.rs:1573-1725)
|
/// Process a single choice from GenerateComplete response (EXACT COPY from router.rs:1573-1725)
|
||||||
|
#[allow(clippy::too_many_arguments)]
|
||||||
pub async fn process_single_choice(
|
pub async fn process_single_choice(
|
||||||
&self,
|
&self,
|
||||||
complete: &proto::GenerateComplete,
|
complete: &proto::GenerateComplete,
|
||||||
@@ -59,6 +60,8 @@ impl ResponseProcessor {
|
|||||||
original_request: &ChatCompletionRequest,
|
original_request: &ChatCompletionRequest,
|
||||||
stop_decoder: &mut StopSequenceDecoder,
|
stop_decoder: &mut StopSequenceDecoder,
|
||||||
history_tool_calls_count: usize,
|
history_tool_calls_count: usize,
|
||||||
|
reasoning_parser_available: bool,
|
||||||
|
tool_parser_available: bool,
|
||||||
) -> Result<ChatChoice, String> {
|
) -> Result<ChatChoice, String> {
|
||||||
stop_decoder.reset();
|
stop_decoder.reset();
|
||||||
// Decode tokens
|
// Decode tokens
|
||||||
@@ -89,8 +92,8 @@ impl ResponseProcessor {
|
|||||||
let mut reasoning_text: Option<String> = None;
|
let mut reasoning_text: Option<String> = None;
|
||||||
let mut processed_text = final_text;
|
let mut processed_text = final_text;
|
||||||
|
|
||||||
// Check if reasoning parsing is enabled and separate_reasoning is requested
|
// Check if reasoning parsing is enabled and parser is available
|
||||||
if original_request.separate_reasoning {
|
if original_request.separate_reasoning && reasoning_parser_available {
|
||||||
let pooled_parser = utils::get_reasoning_parser(
|
let pooled_parser = utils::get_reasoning_parser(
|
||||||
&self.reasoning_parser_factory,
|
&self.reasoning_parser_factory,
|
||||||
self.configured_reasoning_parser.as_ref(),
|
self.configured_reasoning_parser.as_ref(),
|
||||||
@@ -113,8 +116,6 @@ impl ResponseProcessor {
|
|||||||
|
|
||||||
// Step 2: Handle tool call parsing
|
// Step 2: Handle tool call parsing
|
||||||
let mut tool_calls: Option<Vec<ToolCall>> = None;
|
let mut tool_calls: Option<Vec<ToolCall>> = None;
|
||||||
|
|
||||||
// Check if tool calls should be processed
|
|
||||||
let tool_choice_enabled = !matches!(
|
let tool_choice_enabled = !matches!(
|
||||||
&original_request.tool_choice,
|
&original_request.tool_choice,
|
||||||
Some(ToolChoice::Value(ToolChoiceValue::None))
|
Some(ToolChoice::Value(ToolChoiceValue::None))
|
||||||
@@ -134,7 +135,7 @@ impl ResponseProcessor {
|
|||||||
&processed_text,
|
&processed_text,
|
||||||
&original_request.tool_choice,
|
&original_request.tool_choice,
|
||||||
);
|
);
|
||||||
} else {
|
} else if tool_parser_available {
|
||||||
(tool_calls, processed_text) = self
|
(tool_calls, processed_text) = self
|
||||||
.parse_tool_calls(
|
.parse_tool_calls(
|
||||||
&processed_text,
|
&processed_text,
|
||||||
|
|||||||
@@ -195,41 +195,29 @@ impl StreamingProcessor {
|
|||||||
let system_fingerprint = dispatch.weight_version.as_deref();
|
let system_fingerprint = dispatch.weight_version.as_deref();
|
||||||
|
|
||||||
// Check parser availability once upfront (log warning only once per request)
|
// Check parser availability once upfront (log warning only once per request)
|
||||||
let reasoning_parser_available = if separate_reasoning {
|
let reasoning_parser_available = separate_reasoning
|
||||||
if let Some(parser_name) = self.configured_reasoning_parser.as_ref() {
|
&& utils::check_reasoning_parser_availability(
|
||||||
self.reasoning_parser_factory
|
&self.reasoning_parser_factory,
|
||||||
.registry()
|
self.configured_reasoning_parser.as_ref(),
|
||||||
.has_parser(parser_name)
|
model,
|
||||||
} else {
|
);
|
||||||
self.reasoning_parser_factory
|
|
||||||
.registry()
|
|
||||||
.has_parser_for_model(model)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
false
|
|
||||||
};
|
|
||||||
|
|
||||||
let tool_parser_available = if tools.is_some() {
|
let tool_parser_available = tools.is_some()
|
||||||
if let Some(parser_name) = self.configured_tool_parser.as_ref() {
|
&& utils::check_tool_parser_availability(
|
||||||
self.tool_parser_factory.registry().has_parser(parser_name)
|
&self.tool_parser_factory,
|
||||||
} else {
|
self.configured_tool_parser.as_ref(),
|
||||||
self.tool_parser_factory
|
model,
|
||||||
.registry()
|
);
|
||||||
.has_parser_for_model(model)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
false
|
|
||||||
};
|
|
||||||
|
|
||||||
if separate_reasoning && !reasoning_parser_available {
|
if separate_reasoning && !reasoning_parser_available {
|
||||||
warn!(
|
debug!(
|
||||||
"No reasoning parser found for model '{}', skipping reasoning parsing",
|
"No reasoning parser found for model '{}', skipping reasoning parsing",
|
||||||
model
|
model
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
if tools.is_some() && !tool_parser_available {
|
if tools.is_some() && !tool_parser_available {
|
||||||
warn!(
|
debug!(
|
||||||
"No tool parser found for model '{}', skipping tool call parsing",
|
"No tool parser found for model '{}', skipping tool call parsing",
|
||||||
model
|
model
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -675,6 +675,34 @@ pub fn generate_tool_call_id(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Check if a reasoning parser is available for the given model
|
||||||
|
pub fn check_reasoning_parser_availability(
|
||||||
|
reasoning_parser_factory: &crate::reasoning_parser::ParserFactory,
|
||||||
|
configured_parser: Option<&String>,
|
||||||
|
model: &str,
|
||||||
|
) -> bool {
|
||||||
|
if let Some(parser_name) = configured_parser {
|
||||||
|
reasoning_parser_factory.registry().has_parser(parser_name)
|
||||||
|
} else {
|
||||||
|
reasoning_parser_factory
|
||||||
|
.registry()
|
||||||
|
.has_parser_for_model(model)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if a tool parser is available for the given model
|
||||||
|
pub fn check_tool_parser_availability(
|
||||||
|
tool_parser_factory: &crate::tool_parser::ParserFactory,
|
||||||
|
configured_parser: Option<&String>,
|
||||||
|
model: &str,
|
||||||
|
) -> bool {
|
||||||
|
if let Some(parser_name) = configured_parser {
|
||||||
|
tool_parser_factory.registry().has_parser(parser_name)
|
||||||
|
} else {
|
||||||
|
tool_parser_factory.registry().has_parser_for_model(model)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Get the appropriate reasoning parser for a model
|
/// Get the appropriate reasoning parser for a model
|
||||||
///
|
///
|
||||||
/// If a parser name is explicitly configured, use that parser.
|
/// If a parser name is explicitly configured, use that parser.
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ use tokio::sync::Mutex;
|
|||||||
|
|
||||||
use crate::tool_parser::parsers::{
|
use crate::tool_parser::parsers::{
|
||||||
DeepSeekParser, Glm4MoeParser, GptOssHarmonyParser, GptOssParser, JsonParser, KimiK2Parser,
|
DeepSeekParser, Glm4MoeParser, GptOssHarmonyParser, GptOssParser, JsonParser, KimiK2Parser,
|
||||||
LlamaParser, MistralParser, PythonicParser, QwenParser, Step3Parser,
|
LlamaParser, MistralParser, PassthroughParser, PythonicParser, QwenParser, Step3Parser,
|
||||||
};
|
};
|
||||||
use crate::tool_parser::traits::ToolParser;
|
use crate::tool_parser::traits::ToolParser;
|
||||||
|
|
||||||
@@ -36,7 +36,7 @@ impl ParserRegistry {
|
|||||||
creators: Arc::new(RwLock::new(HashMap::new())),
|
creators: Arc::new(RwLock::new(HashMap::new())),
|
||||||
pool: Arc::new(RwLock::new(HashMap::new())),
|
pool: Arc::new(RwLock::new(HashMap::new())),
|
||||||
model_mapping: Arc::new(RwLock::new(HashMap::new())),
|
model_mapping: Arc::new(RwLock::new(HashMap::new())),
|
||||||
default_parser: Arc::new(RwLock::new("json".to_string())),
|
default_parser: Arc::new(RwLock::new("passthrough".to_string())),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -124,10 +124,9 @@ impl ParserRegistry {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if default parser exists
|
// Return false if no specific parser found for this model
|
||||||
let default = self.default_parser.read().unwrap().clone();
|
// (get_pooled will still fall back to default parser)
|
||||||
let creators = self.creators.read().unwrap();
|
false
|
||||||
creators.contains_key(&default)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create a fresh (non-pooled) parser instance for a specific model.
|
/// Create a fresh (non-pooled) parser instance for a specific model.
|
||||||
@@ -228,6 +227,7 @@ impl ParserFactory {
|
|||||||
let registry = ParserRegistry::new();
|
let registry = ParserRegistry::new();
|
||||||
|
|
||||||
// Register default parsers
|
// Register default parsers
|
||||||
|
registry.register_parser("passthrough", || Box::new(PassthroughParser::new()));
|
||||||
registry.register_parser("json", || Box::new(JsonParser::new()));
|
registry.register_parser("json", || Box::new(JsonParser::new()));
|
||||||
registry.register_parser("mistral", || Box::new(MistralParser::new()));
|
registry.register_parser("mistral", || Box::new(MistralParser::new()));
|
||||||
registry.register_parser("qwen", || Box::new(QwenParser::new()));
|
registry.register_parser("qwen", || Box::new(QwenParser::new()));
|
||||||
@@ -311,15 +311,15 @@ impl ParserFactory {
|
|||||||
|
|
||||||
/// Get a pooled parser for the given model ID.
|
/// Get a pooled parser for the given model ID.
|
||||||
/// Returns a shared instance that can be used concurrently.
|
/// Returns a shared instance that can be used concurrently.
|
||||||
/// Falls back to JSON parser if model is not recognized.
|
/// Falls back to passthrough parser if model is not recognized.
|
||||||
pub fn get_pooled(&self, model_id: &str) -> PooledParser {
|
pub fn get_pooled(&self, model_id: &str) -> PooledParser {
|
||||||
self.registry
|
self.registry
|
||||||
.get_pooled_for_model(model_id)
|
.get_pooled_for_model(model_id)
|
||||||
.unwrap_or_else(|| {
|
.unwrap_or_else(|| {
|
||||||
// Fallback to JSON parser
|
// Fallback to passthrough parser (no-op, returns text unchanged)
|
||||||
self.registry
|
self.registry
|
||||||
.get_pooled_parser("json")
|
.get_pooled_parser("passthrough")
|
||||||
.expect("JSON parser should always be registered")
|
.expect("Passthrough parser should always be registered")
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ pub mod json_parser;
|
|||||||
pub mod kimik2_parser;
|
pub mod kimik2_parser;
|
||||||
pub mod llama_parser;
|
pub mod llama_parser;
|
||||||
pub mod mistral_parser;
|
pub mod mistral_parser;
|
||||||
|
pub mod passthrough_parser;
|
||||||
pub mod pythonic_parser;
|
pub mod pythonic_parser;
|
||||||
pub mod qwen_parser;
|
pub mod qwen_parser;
|
||||||
pub mod step3_parser;
|
pub mod step3_parser;
|
||||||
@@ -27,6 +28,7 @@ pub use json_parser::JsonParser;
|
|||||||
pub use kimik2_parser::KimiK2Parser;
|
pub use kimik2_parser::KimiK2Parser;
|
||||||
pub use llama_parser::LlamaParser;
|
pub use llama_parser::LlamaParser;
|
||||||
pub use mistral_parser::MistralParser;
|
pub use mistral_parser::MistralParser;
|
||||||
|
pub use passthrough_parser::PassthroughParser;
|
||||||
pub use pythonic_parser::PythonicParser;
|
pub use pythonic_parser::PythonicParser;
|
||||||
pub use qwen_parser::QwenParser;
|
pub use qwen_parser::QwenParser;
|
||||||
pub use step3_parser::Step3Parser;
|
pub use step3_parser::Step3Parser;
|
||||||
|
|||||||
50
sgl-router/src/tool_parser/parsers/passthrough_parser.rs
Normal file
50
sgl-router/src/tool_parser/parsers/passthrough_parser.rs
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
//! Passthrough parser that returns text unchanged
|
||||||
|
//!
|
||||||
|
//! This parser is used as a fallback for unknown models where no specific
|
||||||
|
//! tool call parsing should be performed. It simply returns the input text
|
||||||
|
//! with no tool calls detected.
|
||||||
|
|
||||||
|
use crate::protocols::spec::Tool;
|
||||||
|
use crate::tool_parser::errors::ParserResult;
|
||||||
|
use crate::tool_parser::traits::ToolParser;
|
||||||
|
use crate::tool_parser::types::{StreamingParseResult, ToolCall, ToolCallItem};
|
||||||
|
use async_trait::async_trait;
|
||||||
|
|
||||||
|
/// Passthrough parser that returns text unchanged with no tool calls
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct PassthroughParser;
|
||||||
|
|
||||||
|
impl PassthroughParser {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl ToolParser for PassthroughParser {
|
||||||
|
async fn parse_complete(&self, output: &str) -> ParserResult<(String, Vec<ToolCall>)> {
|
||||||
|
// Return text unchanged with no tool calls
|
||||||
|
Ok((output.to_string(), vec![]))
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn parse_incremental(
|
||||||
|
&mut self,
|
||||||
|
chunk: &str,
|
||||||
|
_tools: &[Tool],
|
||||||
|
) -> ParserResult<StreamingParseResult> {
|
||||||
|
// Return chunk unchanged with no tool calls
|
||||||
|
Ok(StreamingParseResult {
|
||||||
|
normal_text: chunk.to_string(),
|
||||||
|
calls: vec![],
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn has_tool_markers(&self, _text: &str) -> bool {
|
||||||
|
// Passthrough never detects tool calls
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_unstreamed_tool_args(&self) -> Option<Vec<ToolCallItem>> {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user