[router] allow user to specify chat template path (#11549)

This commit is contained in:
Simo Lin
2025-10-13 13:47:57 -04:00
committed by GitHub
parent 7b59b0b8b0
commit 728af88781
13 changed files with 159 additions and 32 deletions

View File

@@ -4,6 +4,7 @@ use std::fs::File;
use std::io::Read;
use std::path::Path;
use std::sync::Arc;
use tracing::{debug, info};
use super::huggingface::HuggingFaceTokenizer;
use super::tiktoken::TiktokenTokenizer;
@@ -189,14 +190,57 @@ pub fn discover_chat_template_in_dir(dir: &Path) -> Option<String> {
None
}
/// Helper function to resolve and log chat template selection
///
/// Resolves the final chat template to use by prioritizing provided path over auto-discovery,
/// and logs the source for debugging purposes.
fn resolve_and_log_chat_template(
provided_path: Option<&str>,
discovery_dir: &Path,
model_name: &str,
) -> Option<String> {
let final_chat_template = provided_path
.map(|s| s.to_string())
.or_else(|| discover_chat_template_in_dir(discovery_dir));
match (&provided_path, &final_chat_template) {
(Some(provided), _) => {
info!("Using provided chat template: {}", provided);
}
(None, Some(discovered)) => {
info!(
"Auto-discovered chat template in '{}': {}",
discovery_dir.display(),
discovered
);
}
(None, None) => {
debug!(
"No chat template provided or discovered for model: {}",
model_name
);
}
}
final_chat_template
}
/// Factory function to create tokenizer from a model name or path (async version)
pub async fn create_tokenizer_async(
model_name_or_path: &str,
) -> Result<Arc<dyn traits::Tokenizer>> {
create_tokenizer_async_with_chat_template(model_name_or_path, None).await
}
/// Factory function to create tokenizer with optional chat template (async version)
pub async fn create_tokenizer_async_with_chat_template(
model_name_or_path: &str,
chat_template_path: Option<&str>,
) -> Result<Arc<dyn traits::Tokenizer>> {
// Check if it's a file path
let path = Path::new(model_name_or_path);
if path.exists() {
return create_tokenizer_from_file(model_name_or_path);
return create_tokenizer_with_chat_template(model_name_or_path, chat_template_path);
}
// Check if it's a GPT model name that should use Tiktoken
@@ -216,8 +260,13 @@ pub async fn create_tokenizer_async(
// Look for tokenizer.json in the cache directory
let tokenizer_path = cache_dir.join("tokenizer.json");
if tokenizer_path.exists() {
// Try to find a chat template file in the cache directory
let chat_template_path = discover_chat_template_in_dir(&cache_dir);
// Resolve chat template: provided path takes precedence over auto-discovery
let final_chat_template = resolve_and_log_chat_template(
chat_template_path,
&cache_dir,
model_name_or_path,
);
let tokenizer_path_str = tokenizer_path.to_str().ok_or_else(|| {
Error::msg(format!(
"Tokenizer path is not valid UTF-8: {:?}",
@@ -226,7 +275,7 @@ pub async fn create_tokenizer_async(
})?;
create_tokenizer_with_chat_template(
tokenizer_path_str,
chat_template_path.as_deref(),
final_chat_template.as_deref(),
)
} else {
// Try other common tokenizer file names
@@ -234,13 +283,19 @@ pub async fn create_tokenizer_async(
for file_name in &possible_files {
let file_path = cache_dir.join(file_name);
if file_path.exists() {
let chat_template_path = discover_chat_template_in_dir(&cache_dir);
// Resolve chat template: provided path takes precedence over auto-discovery
let final_chat_template = resolve_and_log_chat_template(
chat_template_path,
&cache_dir,
model_name_or_path,
);
let file_path_str = file_path.to_str().ok_or_else(|| {
Error::msg(format!("File path is not valid UTF-8: {:?}", file_path))
})?;
return create_tokenizer_with_chat_template(
file_path_str,
chat_template_path.as_deref(),
final_chat_template.as_deref(),
);
}
}
@@ -258,11 +313,22 @@ pub async fn create_tokenizer_async(
}
/// Factory function to create tokenizer from a model name or path (blocking version)
///
/// This delegates to `create_tokenizer_with_chat_template_blocking` with no chat template,
/// which handles both local files and HuggingFace Hub downloads uniformly.
pub fn create_tokenizer(model_name_or_path: &str) -> Result<Arc<dyn traits::Tokenizer>> {
create_tokenizer_with_chat_template_blocking(model_name_or_path, None)
}
/// Factory function to create tokenizer with optional chat template (blocking version)
pub fn create_tokenizer_with_chat_template_blocking(
model_name_or_path: &str,
chat_template_path: Option<&str>,
) -> Result<Arc<dyn traits::Tokenizer>> {
// Check if it's a file path
let path = Path::new(model_name_or_path);
if path.exists() {
return create_tokenizer_from_file(model_name_or_path);
return create_tokenizer_with_chat_template(model_name_or_path, chat_template_path);
}
// Check if it's a GPT model name that should use Tiktoken
@@ -280,11 +346,19 @@ pub fn create_tokenizer(model_name_or_path: &str) -> Result<Arc<dyn traits::Toke
// Check if we're already in a tokio runtime
if let Ok(handle) = tokio::runtime::Handle::try_current() {
// We're in a runtime, use block_in_place
tokio::task::block_in_place(|| handle.block_on(create_tokenizer_async(model_name_or_path)))
tokio::task::block_in_place(|| {
handle.block_on(create_tokenizer_async_with_chat_template(
model_name_or_path,
chat_template_path,
))
})
} else {
// No runtime, create a temporary one
let rt = tokio::runtime::Runtime::new()?;
rt.block_on(create_tokenizer_async(model_name_or_path))
rt.block_on(create_tokenizer_async_with_chat_template(
model_name_or_path,
chat_template_path,
))
}
}