[router] allow user to specify chat template path (#11549)

2025-10-13 13:47:57 -04:00
parent 7b59b0b8b0
commit 728af88781
13 changed files with 159 additions and 32 deletions
--- a/sgl-router/src/tokenizer/factory.rs
+++ b/sgl-router/src/tokenizer/factory.rs
@@ -4,6 +4,7 @@ use std::fs::File;
 use std::io::Read;
 use std::path::Path;
 use std::sync::Arc;
+use tracing::{debug, info};

 use super::huggingface::HuggingFaceTokenizer;
 use super::tiktoken::TiktokenTokenizer;
@@ -189,14 +190,57 @@ pub fn discover_chat_template_in_dir(dir: &Path) -> Option<String> {
    None
 }

+/// Helper function to resolve and log chat template selection
+///
+/// Resolves the final chat template to use by prioritizing provided path over auto-discovery,
+/// and logs the source for debugging purposes.
+fn resolve_and_log_chat_template(
+    provided_path: Option<&str>,
+    discovery_dir: &Path,
+    model_name: &str,
+) -> Option<String> {
+    let final_chat_template = provided_path
+        .map(|s| s.to_string())
+        .or_else(|| discover_chat_template_in_dir(discovery_dir));
+
+    match (&provided_path, &final_chat_template) {
+        (Some(provided), _) => {
+            info!("Using provided chat template: {}", provided);
+        }
+        (None, Some(discovered)) => {
+            info!(
+                "Auto-discovered chat template in '{}': {}",
+                discovery_dir.display(),
+                discovered
+            );
+        }
+        (None, None) => {
+            debug!(
+                "No chat template provided or discovered for model: {}",
+                model_name
+            );
+        }
+    }
+
+    final_chat_template
+}
+
 /// Factory function to create tokenizer from a model name or path (async version)
 pub async fn create_tokenizer_async(
    model_name_or_path: &str,
+) -> Result<Arc<dyn traits::Tokenizer>> {
+    create_tokenizer_async_with_chat_template(model_name_or_path, None).await
+}
+
+/// Factory function to create tokenizer with optional chat template (async version)
+pub async fn create_tokenizer_async_with_chat_template(
+    model_name_or_path: &str,
+    chat_template_path: Option<&str>,
 ) -> Result<Arc<dyn traits::Tokenizer>> {
    // Check if it's a file path
    let path = Path::new(model_name_or_path);
    if path.exists() {
-        return create_tokenizer_from_file(model_name_or_path);
+        return create_tokenizer_with_chat_template(model_name_or_path, chat_template_path);
    }

    // Check if it's a GPT model name that should use Tiktoken
@@ -216,8 +260,13 @@ pub async fn create_tokenizer_async(
            // Look for tokenizer.json in the cache directory
            let tokenizer_path = cache_dir.join("tokenizer.json");
            if tokenizer_path.exists() {
-                // Try to find a chat template file in the cache directory
-                let chat_template_path = discover_chat_template_in_dir(&cache_dir);
+                // Resolve chat template: provided path takes precedence over auto-discovery
+                let final_chat_template = resolve_and_log_chat_template(
+                    chat_template_path,
+                    &cache_dir,
+                    model_name_or_path,
+                );
+
                let tokenizer_path_str = tokenizer_path.to_str().ok_or_else(|| {
                    Error::msg(format!(
                        "Tokenizer path is not valid UTF-8: {:?}",
@@ -226,7 +275,7 @@ pub async fn create_tokenizer_async(
                })?;
                create_tokenizer_with_chat_template(
                    tokenizer_path_str,
-                    chat_template_path.as_deref(),
+                    final_chat_template.as_deref(),
                )
            } else {
                // Try other common tokenizer file names
@@ -234,13 +283,19 @@ pub async fn create_tokenizer_async(
                for file_name in &possible_files {
                    let file_path = cache_dir.join(file_name);
                    if file_path.exists() {
-                        let chat_template_path = discover_chat_template_in_dir(&cache_dir);
+                        // Resolve chat template: provided path takes precedence over auto-discovery
+                        let final_chat_template = resolve_and_log_chat_template(
+                            chat_template_path,
+                            &cache_dir,
+                            model_name_or_path,
+                        );
+
                        let file_path_str = file_path.to_str().ok_or_else(|| {
                            Error::msg(format!("File path is not valid UTF-8: {:?}", file_path))
                        })?;
                        return create_tokenizer_with_chat_template(
                            file_path_str,
-                            chat_template_path.as_deref(),
+                            final_chat_template.as_deref(),
                        );
                    }
                }
@@ -258,11 +313,22 @@ pub async fn create_tokenizer_async(
 }

 /// Factory function to create tokenizer from a model name or path (blocking version)
+///
+/// This delegates to `create_tokenizer_with_chat_template_blocking` with no chat template,
+/// which handles both local files and HuggingFace Hub downloads uniformly.
 pub fn create_tokenizer(model_name_or_path: &str) -> Result<Arc<dyn traits::Tokenizer>> {
+    create_tokenizer_with_chat_template_blocking(model_name_or_path, None)
+}
+
+/// Factory function to create tokenizer with optional chat template (blocking version)
+pub fn create_tokenizer_with_chat_template_blocking(
+    model_name_or_path: &str,
+    chat_template_path: Option<&str>,
+) -> Result<Arc<dyn traits::Tokenizer>> {
    // Check if it's a file path
    let path = Path::new(model_name_or_path);
    if path.exists() {
-        return create_tokenizer_from_file(model_name_or_path);
+        return create_tokenizer_with_chat_template(model_name_or_path, chat_template_path);
    }

    // Check if it's a GPT model name that should use Tiktoken
@@ -280,11 +346,19 @@ pub fn create_tokenizer(model_name_or_path: &str) -> Result<Arc<dyn traits::Toke
    // Check if we're already in a tokio runtime
    if let Ok(handle) = tokio::runtime::Handle::try_current() {
        // We're in a runtime, use block_in_place
-        tokio::task::block_in_place(|| handle.block_on(create_tokenizer_async(model_name_or_path)))
+        tokio::task::block_in_place(|| {
+            handle.block_on(create_tokenizer_async_with_chat_template(
+                model_name_or_path,
+                chat_template_path,
+            ))
+        })
    } else {
        // No runtime, create a temporary one
        let rt = tokio::runtime::Runtime::new()?;
-        rt.block_on(create_tokenizer_async(model_name_or_path))
+        rt.block_on(create_tokenizer_async_with_chat_template(
+            model_name_or_path,
+            chat_template_path,
+        ))
    }
 }