[router] fix chat template loading and tokenizer path (#10999)

2025-09-27 23:54:12 -04:00
parent 72392f2908
commit 5519766a4d
5 changed files with 418 additions and 9 deletions
--- a/sgl-router/tests/tokenizer_integration.rs
+++ b/sgl-router/tests/tokenizer_integration.rs
@@ -314,3 +314,245 @@ fn test_thread_safety() {
        handle.join().expect("Thread panicked");
    }
 }
+
+#[test]
+fn test_chat_template_discovery() {
+    use std::fs;
+    use tempfile::TempDir;
+
+    // Create a temporary directory with test files
+    let temp_dir = TempDir::new().expect("Failed to create temp dir");
+    let dir_path = temp_dir.path();
+
+    // Copy a real tokenizer.json file for testing
+    // We'll use the TinyLlama tokenizer that's already cached
+    let cached_tokenizer = ensure_tokenizer_cached();
+    let tokenizer_path = dir_path.join("tokenizer.json");
+    fs::copy(&cached_tokenizer, &tokenizer_path).expect("Failed to copy tokenizer file");
+
+    // Test 1: With chat_template.jinja file
+    let jinja_path = dir_path.join("chat_template.jinja");
+    fs::write(&jinja_path, "{{ messages }}").expect("Failed to write chat template");
+
+    let tokenizer = HuggingFaceTokenizer::from_file(tokenizer_path.to_str().unwrap());
+    assert!(
+        tokenizer.is_ok(),
+        "Should load tokenizer with chat template"
+    );
+
+    // Clean up for next test
+    fs::remove_file(&jinja_path).ok();
+
+    // Test 2: With tokenizer_config.json containing chat_template
+    let config_path = dir_path.join("tokenizer_config.json");
+    fs::write(&config_path, r#"{"chat_template": "{{ messages }}"}"#)
+        .expect("Failed to write config");
+
+    let tokenizer = HuggingFaceTokenizer::from_file(tokenizer_path.to_str().unwrap());
+    assert!(
+        tokenizer.is_ok(),
+        "Should load tokenizer with embedded template"
+    );
+
+    // Test 3: No chat template
+    fs::remove_file(&config_path).ok();
+    let tokenizer = HuggingFaceTokenizer::from_file(tokenizer_path.to_str().unwrap());
+    assert!(
+        tokenizer.is_ok(),
+        "Should load tokenizer without chat template"
+    );
+}
+
+#[test]
+fn test_load_chat_template_from_local_file() {
+    use std::fs;
+    use tempfile::TempDir;
+
+    // Test 1: Load tokenizer with explicit chat template path
+    let temp_dir = TempDir::new().expect("Failed to create temp dir");
+    let dir_path = temp_dir.path();
+
+    // Copy a real tokenizer for testing
+    let cached_tokenizer = ensure_tokenizer_cached();
+    let tokenizer_path = dir_path.join("tokenizer.json");
+    fs::copy(&cached_tokenizer, &tokenizer_path).expect("Failed to copy tokenizer");
+
+    // Create a chat template file
+    let template_path = dir_path.join("my_template.jinja");
+    let template_content = r#"{% for message in messages %}{{ message.role }}: {{ message.content }}
+{% endfor %}"#;
+    fs::write(&template_path, template_content).expect("Failed to write template");
+
+    // Load tokenizer with explicit template path
+    let tokenizer = HuggingFaceTokenizer::from_file_with_chat_template(
+        tokenizer_path.to_str().unwrap(),
+        Some(template_path.to_str().unwrap()),
+    );
+    assert!(
+        tokenizer.is_ok(),
+        "Should load tokenizer with explicit template path"
+    );
+}
+
+#[tokio::test]
+async fn test_tinyllama_embedded_template() {
+    use sglang_router_rs::tokenizer::hub::download_tokenizer_from_hf;
+
+    // Skip in CI without HF_TOKEN
+
+    // Test 2: TinyLlama has chat template embedded in tokenizer_config.json
+    match download_tokenizer_from_hf("TinyLlama/TinyLlama-1.1B-Chat-v1.0").await {
+        Ok(cache_dir) => {
+            // Verify tokenizer_config.json exists
+            let config_path = cache_dir.join("tokenizer_config.json");
+            assert!(config_path.exists(), "tokenizer_config.json should exist");
+
+            // Load the config and check for chat_template
+            let config_content =
+                std::fs::read_to_string(&config_path).expect("Failed to read config");
+            assert!(
+                config_content.contains("\"chat_template\""),
+                "TinyLlama should have embedded chat_template in config"
+            );
+
+            // Load tokenizer and verify it has chat template
+            let tokenizer_path = cache_dir.join("tokenizer.json");
+            let _tokenizer = HuggingFaceTokenizer::from_file(tokenizer_path.to_str().unwrap())
+                .expect("Failed to load tokenizer");
+
+            println!(
+                "✓ TinyLlama: Loaded tokenizer with embedded template from tokenizer_config.json"
+            );
+        }
+        Err(e) => {
+            println!("Download test skipped due to error: {}", e);
+        }
+    }
+}
+
+#[tokio::test]
+async fn test_qwen3_next_embedded_template() {
+    use sglang_router_rs::tokenizer::hub::download_tokenizer_from_hf;
+
+    // Test 3: Qwen3-Next has chat template in tokenizer_config.json
+    match download_tokenizer_from_hf("Qwen/Qwen3-Next-80B-A3B-Instruct").await {
+        Ok(cache_dir) => {
+            let config_path = cache_dir.join("tokenizer_config.json");
+            assert!(config_path.exists(), "tokenizer_config.json should exist");
+
+            // Verify chat_template in config
+            let config_content =
+                std::fs::read_to_string(&config_path).expect("Failed to read config");
+            assert!(
+                config_content.contains("\"chat_template\""),
+                "Qwen3-Next should have chat_template in tokenizer_config.json"
+            );
+
+            // Load tokenizer
+            let tokenizer_path = cache_dir.join("tokenizer.json");
+            if tokenizer_path.exists() {
+                let _tokenizer = HuggingFaceTokenizer::from_file(tokenizer_path.to_str().unwrap())
+                    .expect("Failed to load tokenizer");
+                println!("✓ Qwen3-Next: Loaded tokenizer with embedded template");
+            }
+        }
+        Err(e) => {
+            println!("Download test skipped due to error: {}", e);
+        }
+    }
+}
+
+#[tokio::test]
+async fn test_qwen3_vl_json_template_priority() {
+    use sglang_router_rs::tokenizer::hub::download_tokenizer_from_hf;
+
+    // Test 4: Qwen3-VL has both tokenizer_config.json template and chat_template.json
+    // Should prioritize chat_template.json
+    match download_tokenizer_from_hf("Qwen/Qwen3-VL-235B-A22B-Instruct").await {
+        Ok(cache_dir) => {
+            // Check for chat_template.json
+            let json_template_path = cache_dir.join("chat_template.json");
+            let has_json_template = json_template_path.exists();
+
+            // Also check tokenizer_config.json
+            let config_path = cache_dir.join("tokenizer_config.json");
+            assert!(config_path.exists(), "tokenizer_config.json should exist");
+
+            if has_json_template {
+                let json_content = std::fs::read_to_string(&json_template_path)
+                    .expect("Failed to read chat_template.json");
+                println!("✓ Qwen3-VL: Found chat_template.json (should be prioritized)");
+
+                // Verify it contains jinja template
+                assert!(
+                    !json_content.is_empty(),
+                    "chat_template.json should contain template"
+                );
+            }
+
+            // Load tokenizer - it should use the appropriate template
+            let tokenizer_path = cache_dir.join("tokenizer.json");
+            if tokenizer_path.exists() {
+                let _tokenizer = HuggingFaceTokenizer::from_file(tokenizer_path.to_str().unwrap())
+                    .expect("Failed to load tokenizer");
+                println!("✓ Qwen3-VL: Loaded tokenizer with template priority handling");
+            }
+        }
+        Err(e) => {
+            println!("Download test skipped due to error: {}", e);
+        }
+    }
+}
+
+#[tokio::test]
+async fn test_llava_separate_jinja_template() {
+    use sglang_router_rs::tokenizer::hub::download_tokenizer_from_hf;
+
+    // Test 5: llava has chat_template.jinja as a separate file, not in tokenizer_config.json
+    match download_tokenizer_from_hf("llava-hf/llava-1.5-7b-hf").await {
+        Ok(cache_dir) => {
+            // Check for .jinja file
+            let jinja_path = cache_dir.join("chat_template.jinja");
+            let has_jinja = jinja_path.exists()
+                || std::fs::read_dir(&cache_dir)
+                    .map(|entries| {
+                        entries.filter_map(|e| e.ok()).any(|e| {
+                            e.file_name()
+                                .to_str()
+                                .is_some_and(|name| name.ends_with(".jinja"))
+                        })
+                    })
+                    .unwrap_or(false);
+
+            if has_jinja {
+                println!("✓ llava: Found separate .jinja chat template file");
+            }
+
+            // Check tokenizer_config.json - should NOT have embedded template
+            let config_path = cache_dir.join("tokenizer_config.json");
+            if config_path.exists() {
+                let config_content =
+                    std::fs::read_to_string(&config_path).expect("Failed to read config");
+
+                // llava might not have chat_template in config
+                if !config_content.contains("\"chat_template\"") {
+                    println!("✓ llava: No embedded template in config (as expected)");
+                }
+            }
+
+            // Load tokenizer - should auto-discover the .jinja file
+            let tokenizer_path = cache_dir.join("tokenizer.json");
+            if tokenizer_path.exists() {
+                let tokenizer = HuggingFaceTokenizer::from_file(tokenizer_path.to_str().unwrap());
+                if tokenizer.is_ok() {
+                    println!("✓ llava: Loaded tokenizer with auto-discovered .jinja template");
+                } else {
+                    println!("Note: llava tokenizer loading failed - might need specific handling");
+                }
+            }
+        }
+        Err(e) => {
+            println!("Download test skipped due to error: {}", e);
+        }
+    }
+}