[router] Add rustfmt and set group imports by default (#11732)

This commit is contained in:
Chang Su
2025-10-16 17:33:29 -07:00
committed by GitHub
parent 7a7f99beb7
commit dc01313da1
126 changed files with 1127 additions and 813 deletions

View File

@@ -3,12 +3,16 @@
//! This module provides functionality to apply chat templates to messages,
//! similar to HuggingFace transformers' apply_chat_template method.
use anyhow::{anyhow, Result};
use minijinja::machinery::ast::{Expr, Stmt};
use minijinja::{context, Environment, Value};
use serde_json;
use std::collections::HashMap;
use anyhow::{anyhow, Result};
use minijinja::{
context,
machinery::ast::{Expr, Stmt},
Environment, Value,
};
use serde_json;
/// Chat template content format
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ChatTemplateContentFormat {
@@ -319,8 +323,10 @@ impl<'a> Detector<'a> {
/// AST-based detection using minijinja's unstable machinery
/// Single-pass detector with scope tracking
fn detect_format_with_ast(template: &str) -> Option<ChatTemplateContentFormat> {
use minijinja::machinery::{parse, WhitespaceConfig};
use minijinja::syntax::SyntaxConfig;
use minijinja::{
machinery::{parse, WhitespaceConfig},
syntax::SyntaxConfig,
};
let ast = match parse(
template,

View File

@@ -1,13 +1,9 @@
use super::traits;
use std::{fs::File, io::Read, path::Path, sync::Arc};
use anyhow::{Error, Result};
use std::fs::File;
use std::io::Read;
use std::path::Path;
use std::sync::Arc;
use tracing::{debug, info};
use super::huggingface::HuggingFaceTokenizer;
use super::tiktoken::TiktokenTokenizer;
use super::{huggingface::HuggingFaceTokenizer, tiktoken::TiktokenTokenizer, traits};
use crate::tokenizer::hub::download_tokenizer_from_hf;
/// Represents the type of tokenizer being used
@@ -379,8 +375,7 @@ pub fn get_tokenizer_info(file_path: &str) -> Result<TokenizerType> {
Some("json") => Ok(TokenizerType::HuggingFace(file_path.to_string())),
_ => {
// Try auto-detection
use std::fs::File;
use std::io::Read;
use std::{fs::File, io::Read};
let mut file = File::open(file_path)?;
let mut buffer = vec![0u8; 512];

View File

@@ -1,6 +1,9 @@
use std::{
env,
path::{Path, PathBuf},
};
use hf_hub::api::tokio::ApiBuilder;
use std::env;
use std::path::{Path, PathBuf};
const IGNORED: [&str; 5] = [
".gitattributes",

View File

@@ -3,12 +3,12 @@ use std::collections::HashMap;
use anyhow::{Error, Result};
use tokenizers::tokenizer::Tokenizer as HfTokenizer;
use super::chat_template::{
detect_chat_template_content_format, ChatTemplateContentFormat, ChatTemplateParams,
ChatTemplateProcessor,
};
use super::traits::{
Decoder, Encoder, Encoding, SpecialTokens, TokenIdType, Tokenizer as TokenizerTrait,
use super::{
chat_template::{
detect_chat_template_content_format, ChatTemplateContentFormat, ChatTemplateParams,
ChatTemplateProcessor,
},
traits::{Decoder, Encoder, Encoding, SpecialTokens, TokenIdType, Tokenizer as TokenizerTrait},
};
/// HuggingFace tokenizer wrapper

View File

@@ -1,9 +1,11 @@
//! Mock tokenizer implementation for testing
use super::traits::{Decoder, Encoder, Encoding, SpecialTokens, Tokenizer as TokenizerTrait};
use anyhow::Result;
use std::collections::HashMap;
use anyhow::Result;
use super::traits::{Decoder, Encoder, Encoding, SpecialTokens, Tokenizer as TokenizerTrait};
/// Mock tokenizer for testing purposes
pub struct MockTokenizer {
vocab: HashMap<String, u32>,

View File

@@ -1,6 +1,6 @@
use std::{ops::Deref, sync::Arc};
use anyhow::Result;
use std::ops::Deref;
use std::sync::Arc;
pub mod factory;
pub mod hub;
@@ -27,14 +27,12 @@ pub use factory::{
create_tokenizer_from_file, create_tokenizer_with_chat_template,
create_tokenizer_with_chat_template_blocking, TokenizerType,
};
pub use huggingface::HuggingFaceTokenizer;
pub use sequence::Sequence;
pub use stop::{SequenceDecoderOutput, StopSequenceConfig, StopSequenceDecoder};
pub use stream::DecodeStream;
pub use traits::{Decoder, Encoder, Encoding, SpecialTokens, Tokenizer as TokenizerTrait};
pub use huggingface::HuggingFaceTokenizer;
pub use tiktoken::{TiktokenModel, TiktokenTokenizer};
pub use traits::{Decoder, Encoder, Encoding, SpecialTokens, Tokenizer as TokenizerTrait};
/// Main tokenizer wrapper that provides a unified interface for different tokenizer implementations
#[derive(Clone)]

View File

@@ -1,7 +1,9 @@
use super::traits::{TokenIdType, Tokenizer as TokenizerTrait};
use anyhow::Result;
use std::sync::Arc;
use anyhow::Result;
use super::traits::{TokenIdType, Tokenizer as TokenizerTrait};
/// Maintains state for an ongoing sequence of tokens and their decoded text
/// This provides a cleaner abstraction for managing token sequences
pub struct Sequence {

View File

@@ -1,8 +1,11 @@
use super::sequence::Sequence;
use super::traits::{self, TokenIdType};
use std::{collections::HashSet, sync::Arc};
use anyhow::Result;
use std::collections::HashSet;
use std::sync::Arc;
use super::{
sequence::Sequence,
traits::{self, TokenIdType},
};
/// Output from the sequence decoder
#[derive(Debug, Clone, PartialEq)]

View File

@@ -1,9 +1,11 @@
// src/tokenizer/stream.rs
use super::traits::{self, TokenIdType};
use anyhow::Result;
use std::sync::Arc;
use anyhow::Result;
use super::traits::{self, TokenIdType};
const INITIAL_INCREMENTAL_DETOKENIZATION_OFFSET: usize = 5;
/// DecodeStream will keep the state necessary to produce individual chunks of

View File

@@ -1,8 +1,9 @@
#[cfg(test)]
use super::*;
#[cfg(test)]
use std::sync::Arc;
#[cfg(test)]
use super::*;
#[test]
fn test_mock_tokenizer_encode() {
let tokenizer = mock::MockTokenizer::new();

View File

@@ -1,8 +1,9 @@
use anyhow::{Error, Result};
use tiktoken_rs::{cl100k_base, p50k_base, p50k_edit, r50k_base, CoreBPE};
use super::traits::{
Decoder, Encoder, Encoding, SpecialTokens, TokenIdType, Tokenizer as TokenizerTrait,
};
use anyhow::{Error, Result};
use tiktoken_rs::{cl100k_base, p50k_base, p50k_edit, r50k_base, CoreBPE};
/// Tiktoken tokenizer wrapper for OpenAI GPT models
pub struct TiktokenTokenizer {

View File

@@ -1,6 +1,9 @@
use std::{
collections::hash_map::DefaultHasher,
hash::{Hash, Hasher},
};
use anyhow::Result;
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
/// Type alias for token IDs
pub type TokenIdType = u32;