[router] add tokenizer download support from hf hub (#9882)
This commit is contained in:
@@ -5,7 +5,6 @@ use anyhow::{Error, Result};
|
||||
use std::collections::HashMap;
|
||||
use tokenizers::tokenizer::Tokenizer as HfTokenizer;
|
||||
|
||||
#[cfg(feature = "minijinja")]
|
||||
use super::chat_template::{ChatMessage, ChatTemplateProcessor};
|
||||
|
||||
/// HuggingFace tokenizer wrapper
|
||||
@@ -14,7 +13,6 @@ pub struct HuggingFaceTokenizer {
|
||||
special_tokens: SpecialTokens,
|
||||
vocab: HashMap<String, TokenIdType>,
|
||||
reverse_vocab: HashMap<TokenIdType, String>,
|
||||
#[cfg(feature = "minijinja")]
|
||||
chat_template: Option<String>,
|
||||
}
|
||||
|
||||
@@ -43,7 +41,6 @@ impl HuggingFaceTokenizer {
|
||||
.collect();
|
||||
|
||||
// Load chat template
|
||||
#[cfg(feature = "minijinja")]
|
||||
let chat_template = if let Some(template_path) = chat_template_path {
|
||||
// Load from specified .jinja file
|
||||
Self::load_chat_template_from_file(template_path)?
|
||||
@@ -57,7 +54,6 @@ impl HuggingFaceTokenizer {
|
||||
special_tokens,
|
||||
vocab,
|
||||
reverse_vocab,
|
||||
#[cfg(feature = "minijinja")]
|
||||
chat_template,
|
||||
})
|
||||
}
|
||||
@@ -76,7 +72,6 @@ impl HuggingFaceTokenizer {
|
||||
special_tokens,
|
||||
vocab,
|
||||
reverse_vocab,
|
||||
#[cfg(feature = "minijinja")]
|
||||
chat_template: None,
|
||||
}
|
||||
}
|
||||
@@ -109,7 +104,6 @@ impl HuggingFaceTokenizer {
|
||||
}
|
||||
|
||||
/// Try to load chat template from tokenizer_config.json
|
||||
#[cfg(feature = "minijinja")]
|
||||
fn load_chat_template(tokenizer_path: &str) -> Option<String> {
|
||||
// Try to find tokenizer_config.json in the same directory
|
||||
let path = std::path::Path::new(tokenizer_path);
|
||||
@@ -127,7 +121,6 @@ impl HuggingFaceTokenizer {
|
||||
}
|
||||
|
||||
/// Load chat template from a .jinja file
|
||||
#[cfg(feature = "minijinja")]
|
||||
fn load_chat_template_from_file(template_path: &str) -> Result<Option<String>> {
|
||||
use std::fs;
|
||||
|
||||
@@ -141,13 +134,11 @@ impl HuggingFaceTokenizer {
|
||||
}
|
||||
|
||||
/// Set or override the chat template
|
||||
#[cfg(feature = "minijinja")]
|
||||
pub fn set_chat_template(&mut self, template: String) {
|
||||
self.chat_template = Some(template);
|
||||
}
|
||||
|
||||
/// Apply chat template if available
|
||||
#[cfg(feature = "minijinja")]
|
||||
pub fn apply_chat_template(
|
||||
&self,
|
||||
messages: &[ChatMessage],
|
||||
@@ -172,24 +163,6 @@ impl HuggingFaceTokenizer {
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
/// Apply chat template if available (without minijinja feature)
|
||||
#[cfg(not(feature = "minijinja"))]
|
||||
pub fn apply_chat_template(
|
||||
&self,
|
||||
messages: &[ChatMessage],
|
||||
add_generation_prompt: bool,
|
||||
) -> Result<String> {
|
||||
// Fallback to simple formatting
|
||||
let mut result = String::new();
|
||||
for msg in messages {
|
||||
result.push_str(&format!("{}: {}\n", msg.role, msg.content));
|
||||
}
|
||||
if add_generation_prompt {
|
||||
result.push_str("assistant: ");
|
||||
}
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
|
||||
impl Encoder for HuggingFaceTokenizer {
|
||||
@@ -241,10 +214,8 @@ impl TokenizerTrait for HuggingFaceTokenizer {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
#[cfg(feature = "minijinja")]
|
||||
use super::ChatMessage;
|
||||
|
||||
#[cfg(feature = "minijinja")]
|
||||
#[test]
|
||||
fn test_chat_message_creation() {
|
||||
let msg = ChatMessage::system("You are a helpful assistant");
|
||||
|
||||
Reference in New Issue
Block a user