From 598c0bc19deaff932a382a42eeb815265cdfd233 Mon Sep 17 00:00:00 2001 From: Chang Su Date: Mon, 1 Sep 2025 10:40:37 -0700 Subject: [PATCH] [router] add tokenizer download support from hf hub (#9882) --- sgl-router/Cargo.toml | 11 +- sgl-router/src/tokenizer/README.md | 85 +++++-- sgl-router/src/tokenizer/chat_template.rs | 6 - sgl-router/src/tokenizer/factory.rs | 155 ++++++++---- sgl-router/src/tokenizer/hub.rs | 238 ++++++++++++++++++ sgl-router/src/tokenizer/huggingface.rs | 29 --- sgl-router/src/tokenizer/mod.rs | 12 +- sgl-router/tests/test_chat_template.rs | 6 - .../tests/test_chat_template_loading.rs | 3 - 9 files changed, 407 insertions(+), 138 deletions(-) create mode 100644 sgl-router/src/tokenizer/hub.rs diff --git a/sgl-router/Cargo.toml b/sgl-router/Cargo.toml index b05b62568..fd4862054 100644 --- a/sgl-router/Cargo.toml +++ b/sgl-router/Cargo.toml @@ -4,9 +4,7 @@ version = "0.0.0" edition = "2021" [features] -default = ["huggingface", "grpc-client"] -huggingface = ["tokenizers", "minijinja"] -tiktoken = ["tiktoken-rs"] +default = ["grpc-client"] grpc-client = [] grpc-server = [] @@ -52,10 +50,11 @@ regex = "1.10" url = "2.5.4" tokio-stream = { version = "0.1", features = ["sync"] } anyhow = "1.0" -tokenizers = { version = "0.21.4", optional = true } -tiktoken-rs = { version = "0.7.0", optional = true } -minijinja = { version = "2.0", optional = true } +tokenizers = { version = "0.22.0" } +tiktoken-rs = { version = "0.7.0" } +minijinja = { version = "2.0" } rustls = { version = "0.23", default-features = false, features = ["ring", "std"] } +hf-hub = { version = "0.4.3", features = ["tokio"] } # gRPC and Protobuf dependencies tonic = { version = "0.12", features = ["tls", "gzip", "transport"] } diff --git a/sgl-router/src/tokenizer/README.md b/sgl-router/src/tokenizer/README.md index f13db08f9..67972ccbd 100644 --- a/sgl-router/src/tokenizer/README.md +++ b/sgl-router/src/tokenizer/README.md @@ -8,6 +8,7 @@ The SGL Router tokenizer layer provides a unified interface for text tokenizatio **Key Components:** - **Factory Pattern**: Auto-detection and creation of appropriate tokenizer types from files or model names +- **HuggingFace Hub Integration**: Automatic downloading of tokenizer files from HuggingFace Hub for model IDs - **Trait System**: `Encoder`, `Decoder`, and `Tokenizer` traits for implementation flexibility - **Streaming**: Incremental decoding with UTF-8 boundary handling and buffering - **Stop Sequences**: Complex pattern matching for stop tokens and sequences with "jail" buffering @@ -16,7 +17,7 @@ The SGL Router tokenizer layer provides a unified interface for text tokenizatio - **Metrics Integration**: Comprehensive performance and error tracking across all operations **Data Flow:** -1. Request → Factory (type detection) → Concrete Tokenizer Creation +1. Request → Factory (type detection/HF download) → Concrete Tokenizer Creation 2. Encode: Text → Tokenizer → Encoding (token IDs) 3. Stream: Token IDs → DecodeStream → Incremental Text Chunks 4. Stop Detection: Tokens → StopSequenceDecoder → Text/Held/Stopped @@ -25,8 +26,9 @@ The SGL Router tokenizer layer provides a unified interface for text tokenizatio ### Architecture Highlights - **Extended Backend Support**: HuggingFace, Tiktoken (GPT models), and Mock for testing +- **HuggingFace Hub Integration**: Automatic tokenizer downloads with caching - **Comprehensive Metrics**: Full TokenizerMetrics integration for observability -- **Feature Gating**: Conditional compilation for tokenizer backends +- **Unified Dependencies**: All tokenizer backends included by default (no feature gates) - **Stop Sequence Detection**: Sophisticated partial matching with jail buffer - **Chat Template Support**: Full Jinja2 rendering with HuggingFace compatibility - **Thread Safety**: Arc-based sharing with Send + Sync guarantees @@ -92,9 +94,14 @@ sequenceDiagram participant SD as StopDecoder participant M as Metrics - C->>F: create_tokenizer(path) + C->>F: create_tokenizer(path_or_model_id) F->>F: detect_type() - F->>T: new HF/Tiktoken/Mock + alt local file + F->>T: new HF/Tiktoken/Mock + else HuggingFace model ID + F->>F: download_tokenizer_from_hf() + F->>T: new from downloaded files + end F->>M: record_factory_load() F-->>C: Arc @@ -287,11 +294,11 @@ impl Tokenizer { - Single field: `Arc` for polymorphic dispatch - Immutable after creation, Clone via Arc -**Re-exports** (mod.rs:25-39): -- Factory functions: `create_tokenizer`, `create_tokenizer_from_file`, `create_tokenizer_with_chat_template` -- Types: `Sequence`, `StopSequenceConfig`, `DecodeStream`, `Encoding` -- Chat template: `ChatMessage` (when huggingface feature enabled) -- Conditional: `HuggingFaceTokenizer`, `TiktokenTokenizer` based on features +**Re-exports** (mod.rs:26-43): +- Factory functions: `create_tokenizer`, `create_tokenizer_async`, `create_tokenizer_from_file`, `create_tokenizer_with_chat_template` +- Types: `Sequence`, `StopSequenceConfig`, `DecodeStream`, `Encoding`, `TokenizerType` +- Chat template: `ChatMessage` +- Tokenizer implementations: `HuggingFaceTokenizer`, `TiktokenTokenizer` ### 3.2 traits.rs (Trait Definitions) @@ -350,6 +357,7 @@ pub fn create_tokenizer_with_chat_template( chat_template_path: Option<&str> ) -> Result> pub fn create_tokenizer(model_name_or_path: &str) -> Result> +pub async fn create_tokenizer_async(model_name_or_path: &str) -> Result> pub fn get_tokenizer_info(file_path: &str) -> Result ``` @@ -364,10 +372,16 @@ pub fn get_tokenizer_info(file_path: &str) -> Result - SentencePiece: Check for specific byte patterns - GGUF: Check magic number "GGUF" -**Model Name Routing** (factory.rs:163-203): +**Model Name Routing** (factory.rs:145-193): - GPT models → Tiktoken (gpt-4, gpt-3.5, davinci, curie, etc.) - File paths → file-based creation -- HuggingFace Hub → Not implemented (returns error) +- HuggingFace model IDs → Automatic download from Hub + +**HuggingFace Hub Integration**: +- Downloads tokenizer files (tokenizer.json, tokenizer_config.json, etc.) +- Respects HF_TOKEN environment variable for private models +- Caches downloaded files using hf-hub crate +- Async and blocking versions available **Metrics Integration:** - Records factory load/error events (factory.rs:56-57, 82-83) @@ -613,7 +627,32 @@ pub enum TiktokenModel { - Decode: Join tokens with spaces - Skips special tokens when requested -### 3.10 chat_template.rs (Chat Template Support) +### 3.10 hub.rs (HuggingFace Hub Download) + +**Location**: `src/tokenizer/hub.rs` + +**Purpose:** Download tokenizer files from HuggingFace Hub when given a model ID. + +**Key Functions:** + +```rust +pub async fn download_tokenizer_from_hf(model_id: impl AsRef) -> Result +pub async fn from_hf(name: impl AsRef, ignore_weights: bool) -> Result +``` + +**Features:** +- Downloads only tokenizer-related files by default +- Filters out model weights, images, and documentation +- Uses HF_TOKEN environment variable for authentication +- Returns cached directory path for subsequent use +- Progress indication during download + +**File Detection:** +- Tokenizer files: tokenizer.json, tokenizer_config.json, special_tokens_map.json +- Vocabulary files: vocab.json, merges.txt +- SentencePiece models: *.model files + +### 3.11 chat_template.rs (Chat Template Support) **Location**: `src/tokenizer/chat_template.rs` @@ -894,11 +933,11 @@ The `Encoding` enum must: ### Configuration **Environment Variables:** -- None currently defined +- `HF_TOKEN`: HuggingFace authentication token for private models -**Feature Flags:** -- `huggingface`: Enable HF tokenizer -- `tiktoken`: Enable Tiktoken support +**Dependencies:** +- All tokenizer backends included by default +- No feature flags required **Model Mapping:** - Hardcoded in factory.rs @@ -961,26 +1000,22 @@ The `Encoding` enum must: - File: `src/tokenizer/traits.rs` - Symbol: `pub type Offsets = (usize, usize)` -3. **TODO:** Implement HuggingFace Hub downloading - - File: `src/tokenizer/factory.rs:191` - - Symbol: `create_tokenizer()` function - -4. **TODO:** Support SentencePiece models +3. **TODO:** Support SentencePiece models - File: `src/tokenizer/factory.rs:69-72` - Symbol: Extension match arm for "model" -5. **TODO:** Support GGUF format +4. **TODO:** Support GGUF format - File: `src/tokenizer/factory.rs:74-78` - Symbol: Extension match arm for "gguf" -6. **TODO:** Add token↔ID mapping for Tiktoken +5. **TODO:** Add token↔ID mapping for Tiktoken - File: `src/tokenizer/tiktoken.rs:151-161` - Symbol: `token_to_id()` and `id_to_token()` methods -7. **TODO:** Fix `token_ids_ref()` for Tiktoken +6. **TODO:** Fix `token_ids_ref()` for Tiktoken - File: `src/tokenizer/traits.rs:46-50` - Symbol: `Encoding::Tiktoken` match arm -8. **TODO:** Make model→tokenizer mapping configurable +7. **TODO:** Make model→tokenizer mapping configurable - File: `src/tokenizer/factory.rs:174-184` - Symbol: GPT model detection logic diff --git a/sgl-router/src/tokenizer/chat_template.rs b/sgl-router/src/tokenizer/chat_template.rs index 91ba55f60..8a9a0fe1d 100644 --- a/sgl-router/src/tokenizer/chat_template.rs +++ b/sgl-router/src/tokenizer/chat_template.rs @@ -4,7 +4,6 @@ //! similar to HuggingFace transformers' apply_chat_template method. use anyhow::{anyhow, Result}; -#[cfg(feature = "huggingface")] use minijinja::{context, Environment, Value}; use serde::{Deserialize, Serialize}; use serde_json; @@ -38,14 +37,12 @@ impl ChatMessage { } /// Chat template processor using Jinja2 -#[cfg(feature = "huggingface")] pub struct ChatTemplateProcessor { template: String, bos_token: Option, eos_token: Option, } -#[cfg(feature = "huggingface")] impl ChatTemplateProcessor { /// Create a new chat template processor pub fn new(template: String, bos_token: Option, eos_token: Option) -> Self { @@ -102,7 +99,6 @@ impl ChatTemplateProcessor { } /// Load chat template from tokenizer config JSON -#[cfg(feature = "huggingface")] pub fn load_chat_template_from_config(config_path: &str) -> Result> { use std::fs; @@ -136,7 +132,6 @@ mod tests { assert_eq!(assistant_msg.role, "assistant"); } - #[cfg(feature = "huggingface")] #[test] fn test_simple_chat_template() { // Simple template that formats messages @@ -162,7 +157,6 @@ assistant: assert!(result.contains("assistant:")); } - #[cfg(feature = "huggingface")] #[test] fn test_chat_template_with_tokens() { // Template that uses special tokens diff --git a/sgl-router/src/tokenizer/factory.rs b/sgl-router/src/tokenizer/factory.rs index 6c938b26c..8c80749e2 100644 --- a/sgl-router/src/tokenizer/factory.rs +++ b/sgl-router/src/tokenizer/factory.rs @@ -5,15 +5,15 @@ use std::io::Read; use std::path::Path; use std::sync::Arc; -#[cfg(feature = "huggingface")] use super::huggingface::HuggingFaceTokenizer; +use super::tiktoken::TiktokenTokenizer; +use crate::tokenizer::hub::download_tokenizer_from_hf; /// Represents the type of tokenizer being used #[derive(Debug, Clone)] pub enum TokenizerType { HuggingFace(String), Mock, - #[cfg(feature = "tiktoken")] Tiktoken(String), // Future: SentencePiece, GGUF } @@ -52,21 +52,10 @@ pub fn create_tokenizer_with_chat_template( let result = match extension.as_deref() { Some("json") => { - #[cfg(feature = "huggingface")] - { - let tokenizer = HuggingFaceTokenizer::from_file_with_chat_template( - file_path, - chat_template_path, - )?; + let tokenizer = + HuggingFaceTokenizer::from_file_with_chat_template(file_path, chat_template_path)?; - Ok(Arc::new(tokenizer) as Arc) - } - #[cfg(not(feature = "huggingface"))] - { - Err(Error::msg( - "HuggingFace support not enabled. Enable the 'huggingface' feature.", - )) - } + Ok(Arc::new(tokenizer) as Arc) } Some("model") => { // SentencePiece model file @@ -94,17 +83,8 @@ fn auto_detect_tokenizer(file_path: &str) -> Result> // Check for JSON (HuggingFace format) if is_likely_json(&buffer) { - #[cfg(feature = "huggingface")] - { - let tokenizer = HuggingFaceTokenizer::from_file(file_path)?; - return Ok(Arc::new(tokenizer)); - } - #[cfg(not(feature = "huggingface"))] - { - return Err(Error::msg( - "File appears to be JSON (HuggingFace) format, but HuggingFace support is not enabled", - )); - } + let tokenizer = HuggingFaceTokenizer::from_file(file_path)?; + return Ok(Arc::new(tokenizer)); } // Check for GGUF magic number @@ -154,7 +134,57 @@ fn is_likely_sentencepiece(buffer: &[u8]) -> bool { || buffer.windows(4).any(|w| w == b"")) } -/// Factory function to create tokenizer from a model name or path +/// Factory function to create tokenizer from a model name or path (async version) +pub async fn create_tokenizer_async( + model_name_or_path: &str, +) -> Result> { + // Check if it's a file path + let path = Path::new(model_name_or_path); + if path.exists() { + return create_tokenizer_from_file(model_name_or_path); + } + + // Check if it's a GPT model name that should use Tiktoken + if model_name_or_path.contains("gpt-") + || model_name_or_path.contains("davinci") + || model_name_or_path.contains("curie") + || model_name_or_path.contains("babbage") + || model_name_or_path.contains("ada") + { + let tokenizer = TiktokenTokenizer::from_model_name(model_name_or_path)?; + return Ok(Arc::new(tokenizer)); + } + + // Try to download tokenizer files from HuggingFace + match download_tokenizer_from_hf(model_name_or_path).await { + Ok(cache_dir) => { + // Look for tokenizer.json in the cache directory + let tokenizer_path = cache_dir.join("tokenizer.json"); + if tokenizer_path.exists() { + create_tokenizer_from_file(tokenizer_path.to_str().unwrap()) + } else { + // Try other common tokenizer file names + let possible_files = ["tokenizer_config.json", "vocab.json"]; + for file_name in &possible_files { + let file_path = cache_dir.join(file_name); + if file_path.exists() { + return create_tokenizer_from_file(file_path.to_str().unwrap()); + } + } + Err(Error::msg(format!( + "Downloaded model '{}' but couldn't find a suitable tokenizer file", + model_name_or_path + ))) + } + } + Err(e) => Err(Error::msg(format!( + "Failed to download tokenizer from HuggingFace: {}", + e + ))), + } +} + +/// Factory function to create tokenizer from a model name or path (blocking version) pub fn create_tokenizer(model_name_or_path: &str) -> Result> { // Check if it's a file path let path = Path::new(model_name_or_path); @@ -163,35 +193,25 @@ pub fn create_tokenizer(model_name_or_path: &str) -> Result { + assert!(tokenizer.vocab_size() > 0); + println!("Successfully downloaded and created tokenizer"); + } + Err(e) => { + println!("Download failed (this might be expected): {}", e); + // Don't fail the test - network issues shouldn't break CI + } + } + } } diff --git a/sgl-router/src/tokenizer/hub.rs b/sgl-router/src/tokenizer/hub.rs new file mode 100644 index 000000000..c9d2cd1a4 --- /dev/null +++ b/sgl-router/src/tokenizer/hub.rs @@ -0,0 +1,238 @@ +use hf_hub::api::tokio::ApiBuilder; +use std::env; +use std::path::{Path, PathBuf}; + +const IGNORED: [&str; 5] = [ + ".gitattributes", + "LICENSE", + "LICENSE.txt", + "README.md", + "USE_POLICY.md", +]; + +const HF_TOKEN_ENV_VAR: &str = "HF_TOKEN"; + +/// Checks if a file is a model weight file +fn is_weight_file(filename: &str) -> bool { + filename.ends_with(".bin") + || filename.ends_with(".safetensors") + || filename.ends_with(".h5") + || filename.ends_with(".msgpack") + || filename.ends_with(".ckpt.index") +} + +/// Checks if a file is an image file +fn is_image(filename: &str) -> bool { + filename.ends_with(".png") + || filename.ends_with("PNG") + || filename.ends_with(".jpg") + || filename.ends_with("JPG") + || filename.ends_with(".jpeg") + || filename.ends_with("JPEG") +} + +/// Checks if a file is a tokenizer file +fn is_tokenizer_file(filename: &str) -> bool { + filename.ends_with("tokenizer.json") + || filename.ends_with("tokenizer_config.json") + || filename.ends_with("special_tokens_map.json") + || filename.ends_with("vocab.json") + || filename.ends_with("merges.txt") + || filename.ends_with(".model") // SentencePiece models + || filename.ends_with(".tiktoken") +} + +/// Attempt to download tokenizer files from Hugging Face +/// Returns the directory containing the downloaded tokenizer files +pub async fn download_tokenizer_from_hf(model_id: impl AsRef) -> anyhow::Result { + let model_id = model_id.as_ref(); + let token = env::var(HF_TOKEN_ENV_VAR).ok(); + let api = ApiBuilder::new() + .with_progress(true) + .with_token(token) + .build()?; + let model_name = model_id.display().to_string(); + + let repo = api.model(model_name.clone()); + + let info = match repo.info().await { + Ok(info) => info, + Err(e) => { + return Err(anyhow::anyhow!( + "Failed to fetch model '{}' from HuggingFace: {}. Is this a valid HuggingFace ID?", + model_name, + e + )); + } + }; + + if info.siblings.is_empty() { + return Err(anyhow::anyhow!( + "Model '{}' exists but contains no downloadable files.", + model_name + )); + } + + let mut cache_dir = None; + let mut tokenizer_files_found = false; + + // First, identify all tokenizer files to download + let tokenizer_files: Vec<_> = info + .siblings + .iter() + .filter(|sib| { + !IGNORED.contains(&sib.rfilename.as_str()) + && !is_image(&sib.rfilename) + && !is_weight_file(&sib.rfilename) + && is_tokenizer_file(&sib.rfilename) + }) + .collect(); + + if tokenizer_files.is_empty() { + return Err(anyhow::anyhow!( + "No tokenizer files found for model '{}'.", + model_name + )); + } + + // Download all tokenizer files + for sib in tokenizer_files { + match repo.get(&sib.rfilename).await { + Ok(path) => { + if cache_dir.is_none() { + cache_dir = path.parent().map(|p| p.to_path_buf()); + } + tokenizer_files_found = true; + } + Err(e) => { + return Err(anyhow::anyhow!( + "Failed to download tokenizer file '{}' from model '{}': {}", + sib.rfilename, + model_name, + e + )); + } + } + } + + if !tokenizer_files_found { + return Err(anyhow::anyhow!( + "No tokenizer files could be downloaded for model '{}'.", + model_name + )); + } + + match cache_dir { + Some(dir) => Ok(dir), + None => Err(anyhow::anyhow!( + "Invalid HF cache path for model '{}'", + model_name + )), + } +} + +/// Attempt to download a model from Hugging Face (including weights) +/// Returns the directory it is in +/// If ignore_weights is true, model weight files will be skipped +pub async fn from_hf(name: impl AsRef, ignore_weights: bool) -> anyhow::Result { + let name = name.as_ref(); + let token = env::var(HF_TOKEN_ENV_VAR).ok(); + let api = ApiBuilder::new() + .with_progress(true) + .with_token(token) + .build()?; + let model_name = name.display().to_string(); + + let repo = api.model(model_name.clone()); + + let info = match repo.info().await { + Ok(info) => info, + Err(e) => { + return Err(anyhow::anyhow!( + "Failed to fetch model '{}' from HuggingFace: {}. Is this a valid HuggingFace ID?", + model_name, + e + )); + } + }; + + if info.siblings.is_empty() { + return Err(anyhow::anyhow!( + "Model '{}' exists but contains no downloadable files.", + model_name + )); + } + + let mut p = PathBuf::new(); + let mut files_downloaded = false; + + for sib in info.siblings { + if IGNORED.contains(&sib.rfilename.as_str()) || is_image(&sib.rfilename) { + continue; + } + + // If ignore_weights is true, skip weight files + if ignore_weights && is_weight_file(&sib.rfilename) { + continue; + } + + match repo.get(&sib.rfilename).await { + Ok(path) => { + p = path; + files_downloaded = true; + } + Err(e) => { + return Err(anyhow::anyhow!( + "Failed to download file '{}' from model '{}': {}", + sib.rfilename, + model_name, + e + )); + } + } + } + + if !files_downloaded { + let file_type = if ignore_weights { + "non-weight" + } else { + "valid" + }; + return Err(anyhow::anyhow!( + "No {} files found for model '{}'.", + file_type, + model_name + )); + } + + match p.parent() { + Some(p) => Ok(p.to_path_buf()), + None => Err(anyhow::anyhow!("Invalid HF cache path: {}", p.display())), + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_is_tokenizer_file() { + assert!(is_tokenizer_file("tokenizer.json")); + assert!(is_tokenizer_file("tokenizer_config.json")); + assert!(is_tokenizer_file("special_tokens_map.json")); + assert!(is_tokenizer_file("vocab.json")); + assert!(is_tokenizer_file("merges.txt")); + assert!(is_tokenizer_file("spiece.model")); + assert!(!is_tokenizer_file("model.bin")); + assert!(!is_tokenizer_file("README.md")); + } + + #[test] + fn test_is_weight_file() { + assert!(is_weight_file("model.bin")); + assert!(is_weight_file("model.safetensors")); + assert!(is_weight_file("pytorch_model.bin")); + assert!(!is_weight_file("tokenizer.json")); + assert!(!is_weight_file("config.json")); + } +} diff --git a/sgl-router/src/tokenizer/huggingface.rs b/sgl-router/src/tokenizer/huggingface.rs index 063716c3a..02dce5a0a 100644 --- a/sgl-router/src/tokenizer/huggingface.rs +++ b/sgl-router/src/tokenizer/huggingface.rs @@ -5,7 +5,6 @@ use anyhow::{Error, Result}; use std::collections::HashMap; use tokenizers::tokenizer::Tokenizer as HfTokenizer; -#[cfg(feature = "minijinja")] use super::chat_template::{ChatMessage, ChatTemplateProcessor}; /// HuggingFace tokenizer wrapper @@ -14,7 +13,6 @@ pub struct HuggingFaceTokenizer { special_tokens: SpecialTokens, vocab: HashMap, reverse_vocab: HashMap, - #[cfg(feature = "minijinja")] chat_template: Option, } @@ -43,7 +41,6 @@ impl HuggingFaceTokenizer { .collect(); // Load chat template - #[cfg(feature = "minijinja")] let chat_template = if let Some(template_path) = chat_template_path { // Load from specified .jinja file Self::load_chat_template_from_file(template_path)? @@ -57,7 +54,6 @@ impl HuggingFaceTokenizer { special_tokens, vocab, reverse_vocab, - #[cfg(feature = "minijinja")] chat_template, }) } @@ -76,7 +72,6 @@ impl HuggingFaceTokenizer { special_tokens, vocab, reverse_vocab, - #[cfg(feature = "minijinja")] chat_template: None, } } @@ -109,7 +104,6 @@ impl HuggingFaceTokenizer { } /// Try to load chat template from tokenizer_config.json - #[cfg(feature = "minijinja")] fn load_chat_template(tokenizer_path: &str) -> Option { // Try to find tokenizer_config.json in the same directory let path = std::path::Path::new(tokenizer_path); @@ -127,7 +121,6 @@ impl HuggingFaceTokenizer { } /// Load chat template from a .jinja file - #[cfg(feature = "minijinja")] fn load_chat_template_from_file(template_path: &str) -> Result> { use std::fs; @@ -141,13 +134,11 @@ impl HuggingFaceTokenizer { } /// Set or override the chat template - #[cfg(feature = "minijinja")] pub fn set_chat_template(&mut self, template: String) { self.chat_template = Some(template); } /// Apply chat template if available - #[cfg(feature = "minijinja")] pub fn apply_chat_template( &self, messages: &[ChatMessage], @@ -172,24 +163,6 @@ impl HuggingFaceTokenizer { Ok(result) } } - - /// Apply chat template if available (without minijinja feature) - #[cfg(not(feature = "minijinja"))] - pub fn apply_chat_template( - &self, - messages: &[ChatMessage], - add_generation_prompt: bool, - ) -> Result { - // Fallback to simple formatting - let mut result = String::new(); - for msg in messages { - result.push_str(&format!("{}: {}\n", msg.role, msg.content)); - } - if add_generation_prompt { - result.push_str("assistant: "); - } - Ok(result) - } } impl Encoder for HuggingFaceTokenizer { @@ -241,10 +214,8 @@ impl TokenizerTrait for HuggingFaceTokenizer { #[cfg(test)] mod tests { - #[cfg(feature = "minijinja")] use super::ChatMessage; - #[cfg(feature = "minijinja")] #[test] fn test_chat_message_creation() { let msg = ChatMessage::system("You are a helpful assistant"); diff --git a/sgl-router/src/tokenizer/mod.rs b/sgl-router/src/tokenizer/mod.rs index 78632062b..98a23f761 100644 --- a/sgl-router/src/tokenizer/mod.rs +++ b/sgl-router/src/tokenizer/mod.rs @@ -3,6 +3,7 @@ use std::ops::Deref; use std::sync::Arc; pub mod factory; +pub mod hub; pub mod mock; pub mod sequence; pub mod stop; @@ -10,13 +11,11 @@ pub mod stream; pub mod traits; // Feature-gated modules -#[cfg(feature = "huggingface")] + pub mod chat_template; -#[cfg(feature = "huggingface")] pub mod huggingface; -#[cfg(feature = "tiktoken")] pub mod tiktoken; #[cfg(test)] @@ -24,21 +23,18 @@ mod tests; // Re-exports pub use factory::{ - create_tokenizer, create_tokenizer_from_file, create_tokenizer_with_chat_template, - TokenizerType, + create_tokenizer, create_tokenizer_async, create_tokenizer_from_file, + create_tokenizer_with_chat_template, TokenizerType, }; pub use sequence::Sequence; pub use stop::{SequenceDecoderOutput, StopSequenceConfig, StopSequenceDecoder}; pub use stream::DecodeStream; pub use traits::{Decoder, Encoder, Encoding, SpecialTokens, Tokenizer as TokenizerTrait}; -#[cfg(feature = "huggingface")] pub use huggingface::HuggingFaceTokenizer; -#[cfg(feature = "huggingface")] pub use chat_template::ChatMessage; -#[cfg(feature = "tiktoken")] pub use tiktoken::{TiktokenModel, TiktokenTokenizer}; /// Main tokenizer wrapper that provides a unified interface for different tokenizer implementations diff --git a/sgl-router/tests/test_chat_template.rs b/sgl-router/tests/test_chat_template.rs index c9fea45ed..4a0e73bd0 100644 --- a/sgl-router/tests/test_chat_template.rs +++ b/sgl-router/tests/test_chat_template.rs @@ -3,7 +3,6 @@ mod tests { use sglang_router_rs::tokenizer::chat_template::{ChatMessage, ChatTemplateProcessor}; #[test] - #[cfg(feature = "huggingface")] fn test_chat_message_helpers() { let system_msg = ChatMessage::system("You are a helpful assistant"); assert_eq!(system_msg.role, "system"); @@ -19,7 +18,6 @@ mod tests { } #[test] - #[cfg(feature = "huggingface")] fn test_llama_style_template() { // Test a Llama-style chat template let template = r#" @@ -67,7 +65,6 @@ mod tests { } #[test] - #[cfg(feature = "huggingface")] fn test_chatml_template() { // Test a ChatML-style template let template = r#" @@ -97,7 +94,6 @@ mod tests { } #[test] - #[cfg(feature = "huggingface")] fn test_template_without_generation_prompt() { let template = r#" {%- for message in messages -%} @@ -122,7 +118,6 @@ assistant: } #[test] - #[cfg(feature = "huggingface")] fn test_template_with_special_tokens() { let template = r#"{{ bos_token }}{% for msg in messages %}{{ msg.content }}{{ eos_token }}{% endfor %}"#; @@ -139,7 +134,6 @@ assistant: } #[test] - #[cfg(feature = "huggingface")] fn test_empty_messages() { let template = r#"{% for msg in messages %}{{ msg.role }}: {{ msg.content }}\n{% endfor %}"#; diff --git a/sgl-router/tests/test_chat_template_loading.rs b/sgl-router/tests/test_chat_template_loading.rs index 235c608e8..ad1501233 100644 --- a/sgl-router/tests/test_chat_template_loading.rs +++ b/sgl-router/tests/test_chat_template_loading.rs @@ -4,7 +4,6 @@ mod tests { use tempfile::TempDir; #[test] - #[cfg(feature = "huggingface")] fn test_load_chat_template_from_file() { use sglang_router_rs::tokenizer::chat_template::ChatMessage; use sglang_router_rs::tokenizer::huggingface::HuggingFaceTokenizer; @@ -73,7 +72,6 @@ mod tests { } #[test] - #[cfg(feature = "huggingface")] fn test_override_existing_template() { use sglang_router_rs::tokenizer::chat_template::ChatMessage; use sglang_router_rs::tokenizer::huggingface::HuggingFaceTokenizer; @@ -136,7 +134,6 @@ mod tests { } #[test] - #[cfg(feature = "huggingface")] fn test_set_chat_template_after_creation() { use sglang_router_rs::tokenizer::chat_template::ChatMessage; use sglang_router_rs::tokenizer::huggingface::HuggingFaceTokenizer;