[router] fix chat template loading and tokenizer path (#10999)
This commit is contained in:
@@ -314,3 +314,245 @@ fn test_thread_safety() {
|
||||
handle.join().expect("Thread panicked");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_chat_template_discovery() {
|
||||
use std::fs;
|
||||
use tempfile::TempDir;
|
||||
|
||||
// Create a temporary directory with test files
|
||||
let temp_dir = TempDir::new().expect("Failed to create temp dir");
|
||||
let dir_path = temp_dir.path();
|
||||
|
||||
// Copy a real tokenizer.json file for testing
|
||||
// We'll use the TinyLlama tokenizer that's already cached
|
||||
let cached_tokenizer = ensure_tokenizer_cached();
|
||||
let tokenizer_path = dir_path.join("tokenizer.json");
|
||||
fs::copy(&cached_tokenizer, &tokenizer_path).expect("Failed to copy tokenizer file");
|
||||
|
||||
// Test 1: With chat_template.jinja file
|
||||
let jinja_path = dir_path.join("chat_template.jinja");
|
||||
fs::write(&jinja_path, "{{ messages }}").expect("Failed to write chat template");
|
||||
|
||||
let tokenizer = HuggingFaceTokenizer::from_file(tokenizer_path.to_str().unwrap());
|
||||
assert!(
|
||||
tokenizer.is_ok(),
|
||||
"Should load tokenizer with chat template"
|
||||
);
|
||||
|
||||
// Clean up for next test
|
||||
fs::remove_file(&jinja_path).ok();
|
||||
|
||||
// Test 2: With tokenizer_config.json containing chat_template
|
||||
let config_path = dir_path.join("tokenizer_config.json");
|
||||
fs::write(&config_path, r#"{"chat_template": "{{ messages }}"}"#)
|
||||
.expect("Failed to write config");
|
||||
|
||||
let tokenizer = HuggingFaceTokenizer::from_file(tokenizer_path.to_str().unwrap());
|
||||
assert!(
|
||||
tokenizer.is_ok(),
|
||||
"Should load tokenizer with embedded template"
|
||||
);
|
||||
|
||||
// Test 3: No chat template
|
||||
fs::remove_file(&config_path).ok();
|
||||
let tokenizer = HuggingFaceTokenizer::from_file(tokenizer_path.to_str().unwrap());
|
||||
assert!(
|
||||
tokenizer.is_ok(),
|
||||
"Should load tokenizer without chat template"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_load_chat_template_from_local_file() {
|
||||
use std::fs;
|
||||
use tempfile::TempDir;
|
||||
|
||||
// Test 1: Load tokenizer with explicit chat template path
|
||||
let temp_dir = TempDir::new().expect("Failed to create temp dir");
|
||||
let dir_path = temp_dir.path();
|
||||
|
||||
// Copy a real tokenizer for testing
|
||||
let cached_tokenizer = ensure_tokenizer_cached();
|
||||
let tokenizer_path = dir_path.join("tokenizer.json");
|
||||
fs::copy(&cached_tokenizer, &tokenizer_path).expect("Failed to copy tokenizer");
|
||||
|
||||
// Create a chat template file
|
||||
let template_path = dir_path.join("my_template.jinja");
|
||||
let template_content = r#"{% for message in messages %}{{ message.role }}: {{ message.content }}
|
||||
{% endfor %}"#;
|
||||
fs::write(&template_path, template_content).expect("Failed to write template");
|
||||
|
||||
// Load tokenizer with explicit template path
|
||||
let tokenizer = HuggingFaceTokenizer::from_file_with_chat_template(
|
||||
tokenizer_path.to_str().unwrap(),
|
||||
Some(template_path.to_str().unwrap()),
|
||||
);
|
||||
assert!(
|
||||
tokenizer.is_ok(),
|
||||
"Should load tokenizer with explicit template path"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_tinyllama_embedded_template() {
|
||||
use sglang_router_rs::tokenizer::hub::download_tokenizer_from_hf;
|
||||
|
||||
// Skip in CI without HF_TOKEN
|
||||
|
||||
// Test 2: TinyLlama has chat template embedded in tokenizer_config.json
|
||||
match download_tokenizer_from_hf("TinyLlama/TinyLlama-1.1B-Chat-v1.0").await {
|
||||
Ok(cache_dir) => {
|
||||
// Verify tokenizer_config.json exists
|
||||
let config_path = cache_dir.join("tokenizer_config.json");
|
||||
assert!(config_path.exists(), "tokenizer_config.json should exist");
|
||||
|
||||
// Load the config and check for chat_template
|
||||
let config_content =
|
||||
std::fs::read_to_string(&config_path).expect("Failed to read config");
|
||||
assert!(
|
||||
config_content.contains("\"chat_template\""),
|
||||
"TinyLlama should have embedded chat_template in config"
|
||||
);
|
||||
|
||||
// Load tokenizer and verify it has chat template
|
||||
let tokenizer_path = cache_dir.join("tokenizer.json");
|
||||
let _tokenizer = HuggingFaceTokenizer::from_file(tokenizer_path.to_str().unwrap())
|
||||
.expect("Failed to load tokenizer");
|
||||
|
||||
println!(
|
||||
"✓ TinyLlama: Loaded tokenizer with embedded template from tokenizer_config.json"
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
println!("Download test skipped due to error: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_qwen3_next_embedded_template() {
|
||||
use sglang_router_rs::tokenizer::hub::download_tokenizer_from_hf;
|
||||
|
||||
// Test 3: Qwen3-Next has chat template in tokenizer_config.json
|
||||
match download_tokenizer_from_hf("Qwen/Qwen3-Next-80B-A3B-Instruct").await {
|
||||
Ok(cache_dir) => {
|
||||
let config_path = cache_dir.join("tokenizer_config.json");
|
||||
assert!(config_path.exists(), "tokenizer_config.json should exist");
|
||||
|
||||
// Verify chat_template in config
|
||||
let config_content =
|
||||
std::fs::read_to_string(&config_path).expect("Failed to read config");
|
||||
assert!(
|
||||
config_content.contains("\"chat_template\""),
|
||||
"Qwen3-Next should have chat_template in tokenizer_config.json"
|
||||
);
|
||||
|
||||
// Load tokenizer
|
||||
let tokenizer_path = cache_dir.join("tokenizer.json");
|
||||
if tokenizer_path.exists() {
|
||||
let _tokenizer = HuggingFaceTokenizer::from_file(tokenizer_path.to_str().unwrap())
|
||||
.expect("Failed to load tokenizer");
|
||||
println!("✓ Qwen3-Next: Loaded tokenizer with embedded template");
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
println!("Download test skipped due to error: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_qwen3_vl_json_template_priority() {
|
||||
use sglang_router_rs::tokenizer::hub::download_tokenizer_from_hf;
|
||||
|
||||
// Test 4: Qwen3-VL has both tokenizer_config.json template and chat_template.json
|
||||
// Should prioritize chat_template.json
|
||||
match download_tokenizer_from_hf("Qwen/Qwen3-VL-235B-A22B-Instruct").await {
|
||||
Ok(cache_dir) => {
|
||||
// Check for chat_template.json
|
||||
let json_template_path = cache_dir.join("chat_template.json");
|
||||
let has_json_template = json_template_path.exists();
|
||||
|
||||
// Also check tokenizer_config.json
|
||||
let config_path = cache_dir.join("tokenizer_config.json");
|
||||
assert!(config_path.exists(), "tokenizer_config.json should exist");
|
||||
|
||||
if has_json_template {
|
||||
let json_content = std::fs::read_to_string(&json_template_path)
|
||||
.expect("Failed to read chat_template.json");
|
||||
println!("✓ Qwen3-VL: Found chat_template.json (should be prioritized)");
|
||||
|
||||
// Verify it contains jinja template
|
||||
assert!(
|
||||
!json_content.is_empty(),
|
||||
"chat_template.json should contain template"
|
||||
);
|
||||
}
|
||||
|
||||
// Load tokenizer - it should use the appropriate template
|
||||
let tokenizer_path = cache_dir.join("tokenizer.json");
|
||||
if tokenizer_path.exists() {
|
||||
let _tokenizer = HuggingFaceTokenizer::from_file(tokenizer_path.to_str().unwrap())
|
||||
.expect("Failed to load tokenizer");
|
||||
println!("✓ Qwen3-VL: Loaded tokenizer with template priority handling");
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
println!("Download test skipped due to error: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_llava_separate_jinja_template() {
|
||||
use sglang_router_rs::tokenizer::hub::download_tokenizer_from_hf;
|
||||
|
||||
// Test 5: llava has chat_template.jinja as a separate file, not in tokenizer_config.json
|
||||
match download_tokenizer_from_hf("llava-hf/llava-1.5-7b-hf").await {
|
||||
Ok(cache_dir) => {
|
||||
// Check for .jinja file
|
||||
let jinja_path = cache_dir.join("chat_template.jinja");
|
||||
let has_jinja = jinja_path.exists()
|
||||
|| std::fs::read_dir(&cache_dir)
|
||||
.map(|entries| {
|
||||
entries.filter_map(|e| e.ok()).any(|e| {
|
||||
e.file_name()
|
||||
.to_str()
|
||||
.is_some_and(|name| name.ends_with(".jinja"))
|
||||
})
|
||||
})
|
||||
.unwrap_or(false);
|
||||
|
||||
if has_jinja {
|
||||
println!("✓ llava: Found separate .jinja chat template file");
|
||||
}
|
||||
|
||||
// Check tokenizer_config.json - should NOT have embedded template
|
||||
let config_path = cache_dir.join("tokenizer_config.json");
|
||||
if config_path.exists() {
|
||||
let config_content =
|
||||
std::fs::read_to_string(&config_path).expect("Failed to read config");
|
||||
|
||||
// llava might not have chat_template in config
|
||||
if !config_content.contains("\"chat_template\"") {
|
||||
println!("✓ llava: No embedded template in config (as expected)");
|
||||
}
|
||||
}
|
||||
|
||||
// Load tokenizer - should auto-discover the .jinja file
|
||||
let tokenizer_path = cache_dir.join("tokenizer.json");
|
||||
if tokenizer_path.exists() {
|
||||
let tokenizer = HuggingFaceTokenizer::from_file(tokenizer_path.to_str().unwrap());
|
||||
if tokenizer.is_ok() {
|
||||
println!("✓ llava: Loaded tokenizer with auto-discovered .jinja template");
|
||||
} else {
|
||||
println!("Note: llava tokenizer loading failed - might need specific handling");
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
println!("Download test skipped due to error: {}", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user