[router] allow user to specify chat template path (#11549)

2025-10-13 13:47:57 -04:00
parent 7b59b0b8b0
commit 728af88781
13 changed files with 159 additions and 32 deletions
--- a/sgl-router/src/config/types.rs
+++ b/sgl-router/src/config/types.rs
@@ -67,6 +67,8 @@ pub struct RouterConfig {
    pub model_path: Option<String>,
    /// Explicit tokenizer path (overrides model_path tokenizer if provided)
    pub tokenizer_path: Option<String>,
+    /// Chat template path (optional)
+    pub chat_template: Option<String>,
    /// History backend configuration (memory or none, default: memory)
    #[serde(default = "default_history_backend")]
    pub history_backend: HistoryBackend,
@@ -450,6 +452,7 @@ impl Default for RouterConfig {
            connection_mode: ConnectionMode::Http,
            model_path: None,
            tokenizer_path: None,
+            chat_template: None,
            history_backend: default_history_backend(),
            oracle: None,
            reasoning_parser: None,
@@ -994,6 +997,7 @@ mod tests {
            connection_mode: ConnectionMode::Http,
            model_path: None,
            tokenizer_path: None,
+            chat_template: None,
            history_backend: default_history_backend(),
            oracle: None,
            reasoning_parser: None,
@@ -1061,6 +1065,7 @@ mod tests {
            connection_mode: ConnectionMode::Http,
            model_path: None,
            tokenizer_path: None,
+            chat_template: None,
            history_backend: default_history_backend(),
            oracle: None,
            reasoning_parser: None,
@@ -1124,6 +1129,7 @@ mod tests {
            connection_mode: ConnectionMode::Http,
            model_path: None,
            tokenizer_path: None,
+            chat_template: None,
            history_backend: default_history_backend(),
            oracle: None,
            reasoning_parser: None,
--- a/sgl-router/src/lib.rs
+++ b/sgl-router/src/lib.rs
@@ -90,6 +90,7 @@ struct Router {
    connection_mode: config::ConnectionMode,
    model_path: Option<String>,
    tokenizer_path: Option<String>,
+    chat_template: Option<String>,
    reasoning_parser: Option<String>,
    tool_call_parser: Option<String>,
 }
@@ -216,6 +217,7 @@ impl Router {
            enable_igw: self.enable_igw,
            model_path: self.model_path.clone(),
            tokenizer_path: self.tokenizer_path.clone(),
+            chat_template: self.chat_template.clone(),
            history_backend: config::HistoryBackend::Memory,
            oracle: None,
            reasoning_parser: self.reasoning_parser.clone(),
@@ -284,6 +286,7 @@ impl Router {
        rate_limit_tokens_per_second = None,
        model_path = None,
        tokenizer_path = None,
+        chat_template = None,
        reasoning_parser = None,
        tool_call_parser = None,
    ))]
@@ -345,6 +348,7 @@ impl Router {
        rate_limit_tokens_per_second: Option<i32>,
        model_path: Option<String>,
        tokenizer_path: Option<String>,
+        chat_template: Option<String>,
        reasoning_parser: Option<String>,
        tool_call_parser: Option<String>,
    ) -> PyResult<Self> {
@@ -420,6 +424,7 @@ impl Router {
            connection_mode,
            model_path,
            tokenizer_path,
+            chat_template,
            reasoning_parser,
            tool_call_parser,
        })
--- a/sgl-router/src/main.rs
+++ b/sgl-router/src/main.rs
@@ -255,6 +255,9 @@ struct CliArgs {
    #[arg(long)]
    tokenizer_path: Option<String>,

+    #[arg(long)]
+    chat_template: Option<String>,
+
    #[arg(long, default_value = "memory", value_parser = ["memory", "none", "oracle"])]
    history_backend: String,

@@ -561,6 +564,7 @@ impl CliArgs {
            rate_limit_tokens_per_second: None,
            model_path: self.model_path.clone(),
            tokenizer_path: self.tokenizer_path.clone(),
+            chat_template: self.chat_template.clone(),
            history_backend,
            oracle,
            reasoning_parser: self.reasoning_parser.clone(),
--- a/sgl-router/src/server.rs
+++ b/sgl-router/src/server.rs
@@ -82,28 +82,40 @@ impl AppContext {
            }
        };

-        let (tokenizer, reasoning_parser_factory, tool_parser_factory) =
-            if router_config.connection_mode == ConnectionMode::Grpc {
-                let tokenizer_path = router_config
-                    .tokenizer_path
-                    .clone()
-                    .or_else(|| router_config.model_path.clone())
-                    .ok_or_else(|| {
-                        "gRPC mode requires either --tokenizer-path or --model-path to be specified"
-                            .to_string()
-                    })?;
+        let (tokenizer, reasoning_parser_factory, tool_parser_factory) = if router_config
+            .connection_mode
+            == ConnectionMode::Grpc
+        {
+            let tokenizer_path = router_config
+                .tokenizer_path
+                .clone()
+                .or_else(|| router_config.model_path.clone())
+                .ok_or_else(|| {
+                    "gRPC mode requires either --tokenizer-path or --model-path to be specified"
+                        .to_string()
+                })?;

-                let tokenizer = Some(
-                    tokenizer_factory::create_tokenizer(&tokenizer_path)
-                        .map_err(|e| format!("Failed to create tokenizer: {e}"))?,
+            let tokenizer = Some(
+                    tokenizer_factory::create_tokenizer_with_chat_template_blocking(
+                        &tokenizer_path,
+                        router_config.chat_template.as_deref(),
+                    )
+                    .map_err(|e| {
+                        format!(
+                            "Failed to create tokenizer from '{}': {}. \
+                            Ensure the path is valid and points to a tokenizer file (tokenizer.json) \
+                            or a HuggingFace model ID. For directories, ensure they contain tokenizer files.",
+                            tokenizer_path, e
+                        )
+                    })?,
                );
-                let reasoning_parser_factory = Some(crate::reasoning_parser::ParserFactory::new());
-                let tool_parser_factory = Some(crate::tool_parser::ParserFactory::new());
+            let reasoning_parser_factory = Some(crate::reasoning_parser::ParserFactory::new());
+            let tool_parser_factory = Some(crate::tool_parser::ParserFactory::new());

-                (tokenizer, reasoning_parser_factory, tool_parser_factory)
-            } else {
-                (None, None, None)
-            };
+            (tokenizer, reasoning_parser_factory, tool_parser_factory)
+        } else {
+            (None, None, None)
+        };

        let worker_registry = Arc::new(WorkerRegistry::new());
        let policy_registry = Arc::new(PolicyRegistry::new(router_config.policy.clone()));
--- a/sgl-router/src/tokenizer/factory.rs
+++ b/sgl-router/src/tokenizer/factory.rs
@@ -4,6 +4,7 @@ use std::fs::File;
 use std::io::Read;
 use std::path::Path;
 use std::sync::Arc;
+use tracing::{debug, info};

 use super::huggingface::HuggingFaceTokenizer;
 use super::tiktoken::TiktokenTokenizer;
@@ -189,14 +190,57 @@ pub fn discover_chat_template_in_dir(dir: &Path) -> Option<String> {
    None
 }

+/// Helper function to resolve and log chat template selection
+///
+/// Resolves the final chat template to use by prioritizing provided path over auto-discovery,
+/// and logs the source for debugging purposes.
+fn resolve_and_log_chat_template(
+    provided_path: Option<&str>,
+    discovery_dir: &Path,
+    model_name: &str,
+) -> Option<String> {
+    let final_chat_template = provided_path
+        .map(|s| s.to_string())
+        .or_else(|| discover_chat_template_in_dir(discovery_dir));
+
+    match (&provided_path, &final_chat_template) {
+        (Some(provided), _) => {
+            info!("Using provided chat template: {}", provided);
+        }
+        (None, Some(discovered)) => {
+            info!(
+                "Auto-discovered chat template in '{}': {}",
+                discovery_dir.display(),
+                discovered
+            );
+        }
+        (None, None) => {
+            debug!(
+                "No chat template provided or discovered for model: {}",
+                model_name
+            );
+        }
+    }
+
+    final_chat_template
+}
+
 /// Factory function to create tokenizer from a model name or path (async version)
 pub async fn create_tokenizer_async(
    model_name_or_path: &str,
+) -> Result<Arc<dyn traits::Tokenizer>> {
+    create_tokenizer_async_with_chat_template(model_name_or_path, None).await
+}
+
+/// Factory function to create tokenizer with optional chat template (async version)
+pub async fn create_tokenizer_async_with_chat_template(
+    model_name_or_path: &str,
+    chat_template_path: Option<&str>,
 ) -> Result<Arc<dyn traits::Tokenizer>> {
    // Check if it's a file path
    let path = Path::new(model_name_or_path);
    if path.exists() {
-        return create_tokenizer_from_file(model_name_or_path);
+        return create_tokenizer_with_chat_template(model_name_or_path, chat_template_path);
    }

    // Check if it's a GPT model name that should use Tiktoken
@@ -216,8 +260,13 @@ pub async fn create_tokenizer_async(
            // Look for tokenizer.json in the cache directory
            let tokenizer_path = cache_dir.join("tokenizer.json");
            if tokenizer_path.exists() {
-                // Try to find a chat template file in the cache directory
-                let chat_template_path = discover_chat_template_in_dir(&cache_dir);
+                // Resolve chat template: provided path takes precedence over auto-discovery
+                let final_chat_template = resolve_and_log_chat_template(
+                    chat_template_path,
+                    &cache_dir,
+                    model_name_or_path,
+                );
+
                let tokenizer_path_str = tokenizer_path.to_str().ok_or_else(|| {
                    Error::msg(format!(
                        "Tokenizer path is not valid UTF-8: {:?}",
@@ -226,7 +275,7 @@ pub async fn create_tokenizer_async(
                })?;
                create_tokenizer_with_chat_template(
                    tokenizer_path_str,
-                    chat_template_path.as_deref(),
+                    final_chat_template.as_deref(),
                )
            } else {
                // Try other common tokenizer file names
@@ -234,13 +283,19 @@ pub async fn create_tokenizer_async(
                for file_name in &possible_files {
                    let file_path = cache_dir.join(file_name);
                    if file_path.exists() {
-                        let chat_template_path = discover_chat_template_in_dir(&cache_dir);
+                        // Resolve chat template: provided path takes precedence over auto-discovery
+                        let final_chat_template = resolve_and_log_chat_template(
+                            chat_template_path,
+                            &cache_dir,
+                            model_name_or_path,
+                        );
+
                        let file_path_str = file_path.to_str().ok_or_else(|| {
                            Error::msg(format!("File path is not valid UTF-8: {:?}", file_path))
                        })?;
                        return create_tokenizer_with_chat_template(
                            file_path_str,
-                            chat_template_path.as_deref(),
+                            final_chat_template.as_deref(),
                        );
                    }
                }
@@ -258,11 +313,22 @@ pub async fn create_tokenizer_async(
 }

 /// Factory function to create tokenizer from a model name or path (blocking version)
+///
+/// This delegates to `create_tokenizer_with_chat_template_blocking` with no chat template,
+/// which handles both local files and HuggingFace Hub downloads uniformly.
 pub fn create_tokenizer(model_name_or_path: &str) -> Result<Arc<dyn traits::Tokenizer>> {
+    create_tokenizer_with_chat_template_blocking(model_name_or_path, None)
+}
+
+/// Factory function to create tokenizer with optional chat template (blocking version)
+pub fn create_tokenizer_with_chat_template_blocking(
+    model_name_or_path: &str,
+    chat_template_path: Option<&str>,
+) -> Result<Arc<dyn traits::Tokenizer>> {
    // Check if it's a file path
    let path = Path::new(model_name_or_path);
    if path.exists() {
-        return create_tokenizer_from_file(model_name_or_path);
+        return create_tokenizer_with_chat_template(model_name_or_path, chat_template_path);
    }

    // Check if it's a GPT model name that should use Tiktoken
@@ -280,11 +346,19 @@ pub fn create_tokenizer(model_name_or_path: &str) -> Result<Arc<dyn traits::Toke
    // Check if we're already in a tokio runtime
    if let Ok(handle) = tokio::runtime::Handle::try_current() {
        // We're in a runtime, use block_in_place
-        tokio::task::block_in_place(|| handle.block_on(create_tokenizer_async(model_name_or_path)))
+        tokio::task::block_in_place(|| {
+            handle.block_on(create_tokenizer_async_with_chat_template(
+                model_name_or_path,
+                chat_template_path,
+            ))
+        })
    } else {
        // No runtime, create a temporary one
        let rt = tokio::runtime::Runtime::new()?;
-        rt.block_on(create_tokenizer_async(model_name_or_path))
+        rt.block_on(create_tokenizer_async_with_chat_template(
+            model_name_or_path,
+            chat_template_path,
+        ))
    }
 }

--- a/sgl-router/src/tokenizer/mod.rs
+++ b/sgl-router/src/tokenizer/mod.rs
@@ -23,8 +23,9 @@ mod tests;

 // Re-exports
 pub use factory::{
-    create_tokenizer, create_tokenizer_async, create_tokenizer_from_file,
-    create_tokenizer_with_chat_template, TokenizerType,
+    create_tokenizer, create_tokenizer_async, create_tokenizer_async_with_chat_template,
+    create_tokenizer_from_file, create_tokenizer_with_chat_template,
+    create_tokenizer_with_chat_template_blocking, TokenizerType,
 };
 pub use sequence::Sequence;
 pub use stop::{SequenceDecoderOutput, StopSequenceConfig, StopSequenceDecoder};