diff --git a/sgl-router/Cargo.toml b/sgl-router/Cargo.toml index 9d3602dca..9fe8a7e33 100644 --- a/sgl-router/Cargo.toml +++ b/sgl-router/Cargo.toml @@ -64,6 +64,7 @@ anyhow = "1.0" tokenizers = { version = "0.22.0" } tiktoken-rs = { version = "0.7.0" } minijinja = { version = "2.0", features = ["unstable_machinery", "json", "builtins"] } +minijinja-contrib = { version = "2.0", features = ["pycompat"] } rustls = { version = "0.23", default-features = false, features = ["ring", "std"] } hf-hub = { version = "0.4.3", features = ["tokio"] } rmcp = { version = "0.6.3", features = ["client", "server", diff --git a/sgl-router/src/routers/grpc/utils.rs b/sgl-router/src/routers/grpc/utils.rs index c82d764aa..086d580a6 100644 --- a/sgl-router/src/routers/grpc/utils.rs +++ b/sgl-router/src/routers/grpc/utils.rs @@ -382,7 +382,6 @@ pub fn process_chat_messages( let params = ChatTemplateParams { add_generation_prompt: true, - continue_final_message: request.continue_final_message, tools: tools_json.as_deref(), template_kwargs: final_template_kwargs, ..Default::default() diff --git a/sgl-router/src/tokenizer/chat_template.rs b/sgl-router/src/tokenizer/chat_template.rs index a575e8c44..387974f59 100644 --- a/sgl-router/src/tokenizer/chat_template.rs +++ b/sgl-router/src/tokenizer/chat_template.rs @@ -3,12 +3,16 @@ //! This module provides functionality to apply chat templates to messages, //! similar to HuggingFace transformers' apply_chat_template method. -use std::collections::HashMap; +use std::{collections::HashMap, fs}; use anyhow::{anyhow, Result}; use minijinja::{ context, - machinery::ast::{Expr, Stmt}, + machinery::{ + ast::{Expr, Stmt}, + parse, WhitespaceConfig, + }, + syntax::SyntaxConfig, Environment, Value, }; use serde_json; @@ -323,11 +327,6 @@ impl<'a> Detector<'a> { /// AST-based detection using minijinja's unstable machinery /// Single-pass detector with scope tracking fn detect_format_with_ast(template: &str) -> Option { - use minijinja::{ - machinery::{parse, WhitespaceConfig}, - syntax::SyntaxConfig, - }; - let ast = match parse( template, "template", @@ -350,7 +349,6 @@ fn detect_format_with_ast(template: &str) -> Option { #[derive(Default)] pub struct ChatTemplateParams<'a> { pub add_generation_prompt: bool, - pub continue_final_message: bool, pub tools: Option<&'a [serde_json::Value]>, pub documents: Option<&'a [serde_json::Value]>, pub template_kwargs: Option<&'a HashMap>, @@ -377,16 +375,15 @@ impl ChatTemplateProcessor { messages: &[serde_json::Value], params: ChatTemplateParams, ) -> Result { - // Validate incompatible options - if params.continue_final_message && params.add_generation_prompt { - return Err(anyhow!("continue_final_message and add_generation_prompt are not compatible. Use continue_final_message when you want the model to continue the final message, and add_generation_prompt when you want to add a header that will prompt it to start a new assistant message instead.")); - } let mut env = Environment::new(); // Register the template env.add_template("chat", &self.template) .map_err(|e| anyhow!("Failed to add template: {}", e))?; + // Enable Python method compatibility (e.g., str.startswith, str.endswith) + env.set_unknown_method_callback(minijinja_contrib::pycompat::unknown_method_callback); + // Get the template let tmpl = env .get_template("chat") @@ -423,8 +420,6 @@ impl ChatTemplateProcessor { /// Load chat template from tokenizer config JSON pub fn load_chat_template_from_config(config_path: &str) -> Result> { - use std::fs; - let content = fs::read_to_string(config_path)?; let config: serde_json::Value = serde_json::from_str(&content)?;