[router] add tokenizer download support from hf hub (#9882)

This commit is contained in:
Chang Su
2025-09-01 10:40:37 -07:00
committed by GitHub
parent b361750a4a
commit 598c0bc19d
9 changed files with 407 additions and 138 deletions

View File

@@ -3,6 +3,7 @@ use std::ops::Deref;
use std::sync::Arc;
pub mod factory;
pub mod hub;
pub mod mock;
pub mod sequence;
pub mod stop;
@@ -10,13 +11,11 @@ pub mod stream;
pub mod traits;
// Feature-gated modules
#[cfg(feature = "huggingface")]
pub mod chat_template;
#[cfg(feature = "huggingface")]
pub mod huggingface;
#[cfg(feature = "tiktoken")]
pub mod tiktoken;
#[cfg(test)]
@@ -24,21 +23,18 @@ mod tests;
// Re-exports
pub use factory::{
create_tokenizer, create_tokenizer_from_file, create_tokenizer_with_chat_template,
TokenizerType,
create_tokenizer, create_tokenizer_async, create_tokenizer_from_file,
create_tokenizer_with_chat_template, TokenizerType,
};
pub use sequence::Sequence;
pub use stop::{SequenceDecoderOutput, StopSequenceConfig, StopSequenceDecoder};
pub use stream::DecodeStream;
pub use traits::{Decoder, Encoder, Encoding, SpecialTokens, Tokenizer as TokenizerTrait};
#[cfg(feature = "huggingface")]
pub use huggingface::HuggingFaceTokenizer;
#[cfg(feature = "huggingface")]
pub use chat_template::ChatMessage;
#[cfg(feature = "tiktoken")]
pub use tiktoken::{TiktokenModel, TiktokenTokenizer};
/// Main tokenizer wrapper that provides a unified interface for different tokenizer implementations