router-grpc: Support jinja chat template content format detection (#10832)

This commit is contained in:
Chang Su
2025-09-24 11:45:01 -07:00
committed by GitHub
parent adba172fd1
commit 9209b209be
9 changed files with 1276 additions and 353 deletions

View File

@@ -1,5 +1,18 @@
// gRPC Router Implementation
use std::collections::HashMap;
use std::sync::Arc;
use std::time::Duration;
use async_trait::async_trait;
use axum::{
body::Body,
extract::Request,
http::{HeaderMap, StatusCode},
response::{IntoResponse, Response},
};
use tracing::{debug, error, info, warn};
use crate::config::types::RetryConfig;
use crate::core::{
BasicWorkerBuilder, CircuitBreakerConfig, HealthConfig, WorkerRegistry, WorkerType,
@@ -7,27 +20,16 @@ use crate::core::{
use crate::grpc::{proto, SglangSchedulerClient};
use crate::metrics::RouterMetrics;
use crate::policies::{LoadBalancingPolicy, PolicyRegistry};
use crate::protocols::spec::{
ChatCompletionRequest, ChatMessage, ContentPart, ResponseFormat, StringOrArray,
UserMessageContent,
};
use crate::protocols::spec::{ChatCompletionRequest, ResponseFormat, StringOrArray};
use crate::reasoning_parser::ParserFactory;
use crate::routers::RouterTrait;
use crate::tokenizer::{chat_template::ChatMessage as TokenizerChatMessage, traits::Tokenizer};
use crate::tokenizer::traits::Tokenizer;
use crate::tool_parser::ParserRegistry;
use async_trait::async_trait;
use axum::{
body::Body,
extract::Request,
http::{HeaderMap, StatusCode},
response::{IntoResponse, Response},
};
use std::collections::HashMap;
use std::sync::Arc;
use std::time::Duration;
use tracing::{debug, error, info, warn};
use uuid::Uuid;
use crate::tokenizer::chat_template::ChatTemplateContentFormat;
use serde_json::Value;
// Data structures for processing
#[derive(Debug)]
pub struct ProcessedMessages {
@@ -290,16 +292,19 @@ impl GrpcRouter {
&self,
request: &ChatCompletionRequest,
) -> Result<ProcessedMessages, String> {
let tokenizer_messages = self.convert_messages_for_tokenizer(&request.messages)?;
// Use the tokenizer's chat template - we require HuggingFace tokenizer for gRPC
let formatted_text = if let Some(hf_tokenizer) =
self.tokenizer
.as_any()
.downcast_ref::<crate::tokenizer::HuggingFaceTokenizer>()
{
// Get content format and transform messages accordingly
let content_format = hf_tokenizer.chat_template_content_format();
let transformed_messages =
Self::transform_messages_for_content_format(&request.messages, content_format)?;
hf_tokenizer
.apply_chat_template(&tokenizer_messages, true)
.apply_chat_template(&transformed_messages, true)
.map_err(|e| format!("Failed to apply chat template: {}", e))?
} else {
return Err(
@@ -317,46 +322,76 @@ impl GrpcRouter {
})
}
/// Convert spec ChatMessage enum to tokenizer ChatMessage struct
fn convert_messages_for_tokenizer(
&self,
messages: &[ChatMessage],
) -> Result<Vec<TokenizerChatMessage>, String> {
let mut converted = Vec::new();
/// Transform messages based on content format for ANY message type
fn transform_messages_for_content_format(
messages: &[crate::protocols::spec::ChatMessage],
content_format: crate::tokenizer::chat_template::ChatTemplateContentFormat,
) -> Result<Vec<serde_json::Value>, String> {
messages
.iter()
.map(|message| {
let mut message_json = serde_json::to_value(message)
.map_err(|e| format!("Failed to serialize message: {}", e))?;
for message in messages {
let tokenizer_msg = match message {
ChatMessage::System { content, .. } => TokenizerChatMessage::new("system", content),
ChatMessage::User { content, .. } => {
let text_content = match content {
UserMessageContent::Text(text) => text.clone(),
UserMessageContent::Parts(parts) => {
// Simple text extraction for now - multimodal is placeholder
parts
.iter()
.filter_map(|part| match part {
ContentPart::Text { text } => Some(text.as_str()),
ContentPart::ImageUrl { .. } => None, // Skip images for now
})
.collect::<Vec<&str>>()
.join(" ")
}
};
TokenizerChatMessage::new("user", text_content)
if let Some(obj) = message_json.as_object_mut() {
if let Some(content_value) = obj.get_mut("content") {
Self::transform_content_field(content_value, content_format);
}
}
ChatMessage::Assistant { content, .. } => {
// Simple content extraction - no special tool/reasoning formatting
TokenizerChatMessage::new("assistant", content.as_deref().unwrap_or(""))
Ok(message_json)
})
.collect()
}
/// Transform a single content field based on content format
fn transform_content_field(
content_value: &mut Value,
content_format: ChatTemplateContentFormat,
) {
let Some(content_array) = content_value.as_array() else {
return; // Not multimodal, keep as-is
};
match content_format {
ChatTemplateContentFormat::String => {
// Extract and join text parts only
let text_parts: Vec<String> = content_array
.iter()
.filter_map(|part| {
part.as_object()?
.get("type")?
.as_str()
.filter(|&t| t == "text")
.and_then(|_| part.as_object()?.get("text")?.as_str())
.map(String::from)
})
.collect();
if !text_parts.is_empty() {
*content_value = Value::String(text_parts.join(" "));
}
ChatMessage::Tool { content, .. } => TokenizerChatMessage::new("tool", content),
ChatMessage::Function { content, .. } => {
TokenizerChatMessage::new("function", content)
}
};
converted.push(tokenizer_msg);
}
ChatTemplateContentFormat::OpenAI => {
// Replace media URLs with simple type placeholders
let processed_parts: Vec<Value> = content_array
.iter()
.map(|part| {
part.as_object()
.and_then(|obj| obj.get("type")?.as_str())
.and_then(|type_str| match type_str {
"image_url" => Some(serde_json::json!({"type": "image"})),
"video_url" => Some(serde_json::json!({"type": "video"})),
"audio_url" => Some(serde_json::json!({"type": "audio"})),
_ => None,
})
.unwrap_or_else(|| part.clone())
})
.collect();
*content_value = Value::Array(processed_parts);
}
}
Ok(converted)
}
/// Build gRPC SamplingParams from OpenAI request
@@ -636,3 +671,260 @@ impl RouterTrait for GrpcRouter {
(StatusCode::SERVICE_UNAVAILABLE).into_response()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::protocols::spec::{ChatMessage, ContentPart, ImageUrl, UserMessageContent};
use crate::tokenizer::chat_template::ChatTemplateContentFormat;
use serde_json::json;
#[test]
fn test_transform_messages_string_format() {
let messages = vec![ChatMessage::User {
role: "user".to_string(),
content: UserMessageContent::Parts(vec![
ContentPart::Text {
text: "Hello".to_string(),
},
ContentPart::ImageUrl {
image_url: ImageUrl {
url: "https://example.com/image.jpg".to_string(),
detail: None,
},
},
ContentPart::Text {
text: "World".to_string(),
},
]),
name: None,
}];
let result = GrpcRouter::transform_messages_for_content_format(
&messages,
ChatTemplateContentFormat::String,
)
.unwrap();
assert_eq!(result.len(), 1);
let transformed_message = &result[0];
// Should flatten multimodal content to text only
assert_eq!(
transformed_message["content"].as_str().unwrap(),
"Hello World"
);
assert_eq!(transformed_message["role"].as_str().unwrap(), "user");
}
#[test]
fn test_transform_messages_openai_format() {
let messages = vec![ChatMessage::User {
role: "user".to_string(),
content: UserMessageContent::Parts(vec![
ContentPart::Text {
text: "Describe this image:".to_string(),
},
ContentPart::ImageUrl {
image_url: ImageUrl {
url: "https://example.com/image.jpg".to_string(),
detail: Some("high".to_string()),
},
},
]),
name: None,
}];
let result = GrpcRouter::transform_messages_for_content_format(
&messages,
ChatTemplateContentFormat::OpenAI,
)
.unwrap();
assert_eq!(result.len(), 1);
let transformed_message = &result[0];
// Should replace media URLs with simple type placeholders
let content_array = transformed_message["content"].as_array().unwrap();
assert_eq!(content_array.len(), 2);
// Text part should remain unchanged
assert_eq!(content_array[0]["type"], "text");
assert_eq!(content_array[0]["text"], "Describe this image:");
// Image part should be replaced with simple type placeholder
assert_eq!(content_array[1], json!({"type": "image"}));
}
#[test]
fn test_transform_messages_simple_string_content() {
let messages = vec![ChatMessage::User {
role: "user".to_string(),
content: UserMessageContent::Text("Simple text message".to_string()),
name: None,
}];
let result = GrpcRouter::transform_messages_for_content_format(
&messages,
ChatTemplateContentFormat::String,
)
.unwrap();
assert_eq!(result.len(), 1);
let transformed_message = &result[0];
// Simple string content should remain unchanged
assert_eq!(
transformed_message["content"].as_str().unwrap(),
"Simple text message"
);
}
#[test]
fn test_transform_messages_assistant_message() {
let messages = vec![ChatMessage::Assistant {
role: "assistant".to_string(),
content: Some("Assistant response".to_string()),
name: None,
tool_calls: None,
function_call: None,
reasoning_content: None,
}];
let result = GrpcRouter::transform_messages_for_content_format(
&messages,
ChatTemplateContentFormat::String,
)
.unwrap();
assert_eq!(result.len(), 1);
let transformed_message = &result[0];
assert_eq!(transformed_message["role"].as_str().unwrap(), "assistant");
assert_eq!(
transformed_message["content"].as_str().unwrap(),
"Assistant response"
);
}
#[test]
fn test_transform_messages_multiple_messages() {
let messages = vec![
ChatMessage::System {
role: "system".to_string(),
content: "System prompt".to_string(),
name: None,
},
ChatMessage::User {
role: "user".to_string(),
content: UserMessageContent::Parts(vec![
ContentPart::Text {
text: "User message".to_string(),
},
ContentPart::ImageUrl {
image_url: ImageUrl {
url: "https://example.com/image.jpg".to_string(),
detail: None,
},
},
]),
name: None,
},
];
let result = GrpcRouter::transform_messages_for_content_format(
&messages,
ChatTemplateContentFormat::String,
)
.unwrap();
assert_eq!(result.len(), 2);
// System message should remain unchanged
assert_eq!(result[0]["role"].as_str().unwrap(), "system");
assert_eq!(result[0]["content"].as_str().unwrap(), "System prompt");
// User message should be flattened to text only
assert_eq!(result[1]["role"].as_str().unwrap(), "user");
assert_eq!(result[1]["content"].as_str().unwrap(), "User message");
}
#[test]
fn test_transform_messages_empty_text_parts() {
let messages = vec![ChatMessage::User {
role: "user".to_string(),
content: UserMessageContent::Parts(vec![ContentPart::ImageUrl {
image_url: ImageUrl {
url: "https://example.com/image.jpg".to_string(),
detail: None,
},
}]),
name: None,
}];
let result = GrpcRouter::transform_messages_for_content_format(
&messages,
ChatTemplateContentFormat::String,
)
.unwrap();
assert_eq!(result.len(), 1);
let transformed_message = &result[0];
// Should keep original multimodal content when no text parts exist
assert!(transformed_message["content"].is_array());
}
#[test]
fn test_transform_messages_mixed_content_types() {
// Test with both text and multimodal content
let messages = vec![
ChatMessage::User {
role: "user".to_string(),
content: UserMessageContent::Text("Plain text".to_string()),
name: None,
},
ChatMessage::User {
role: "user".to_string(),
content: UserMessageContent::Parts(vec![
ContentPart::Text {
text: "With image".to_string(),
},
ContentPart::ImageUrl {
image_url: ImageUrl {
url: "https://example.com/image.jpg".to_string(),
detail: Some("low".to_string()),
},
},
]),
name: None,
},
];
// Test String format
let result_string = GrpcRouter::transform_messages_for_content_format(
&messages,
ChatTemplateContentFormat::String,
)
.unwrap();
assert_eq!(result_string.len(), 2);
assert_eq!(result_string[0]["content"].as_str().unwrap(), "Plain text");
assert_eq!(result_string[1]["content"].as_str().unwrap(), "With image");
// Test OpenAI format
let result_openai = GrpcRouter::transform_messages_for_content_format(
&messages,
ChatTemplateContentFormat::OpenAI,
)
.unwrap();
assert_eq!(result_openai.len(), 2);
assert_eq!(result_openai[0]["content"].as_str().unwrap(), "Plain text");
let content_array = result_openai[1]["content"].as_array().unwrap();
assert_eq!(content_array.len(), 2);
assert_eq!(content_array[0]["type"], "text");
assert_eq!(content_array[1], json!({"type": "image"}));
}
}