[router] add tokenizer chat template support (#9370)

Co-authored-by: Chang Su <chang.s.su@oracle.com>
This commit is contained in:
Simo Lin
2025-08-19 20:14:02 -07:00
committed by GitHub
parent 7638f5e44e
commit 5fbad308cd
12 changed files with 748 additions and 85 deletions

View File

@@ -1,4 +1,4 @@
use super::traits;
use super::traits::{self, TokenIdType};
use crate::metrics::TokenizerMetrics;
use anyhow::Result;
use std::collections::HashSet;
@@ -22,18 +22,18 @@ pub enum SequenceDecoderOutput {
#[derive(Debug, Clone, Default)]
pub struct StopSequenceConfig {
/// Token IDs that trigger a stop
pub stop_tokens: HashSet<u32>,
pub stop_tokens: HashSet<TokenIdType>,
/// String sequences that trigger a stop
pub stop_sequences: Vec<String>,
/// Token IDs for visible stops (included in output)
pub visible_stop_tokens: HashSet<u32>,
pub visible_stop_tokens: HashSet<TokenIdType>,
/// String sequences for visible stops (included in output)
pub visible_stop_sequences: Vec<String>,
}
impl StopSequenceConfig {
/// Builder pattern - add a stop token
pub fn with_stop_token(mut self, token_id: u32) -> Self {
pub fn with_stop_token(mut self, token_id: TokenIdType) -> Self {
self.stop_tokens.insert(token_id);
self
}
@@ -45,7 +45,7 @@ impl StopSequenceConfig {
}
/// Builder pattern - add a visible stop token
pub fn with_visible_stop_token(mut self, token_id: u32) -> Self {
pub fn with_visible_stop_token(mut self, token_id: TokenIdType) -> Self {
self.visible_stop_tokens.insert(token_id);
self
}
@@ -64,7 +64,7 @@ pub struct StopSequenceDecoder {
/// Buffer for partial matches (the "jail")
jail_buffer: String,
/// Accumulated tokens
token_buffer: Vec<u32>,
token_buffer: Vec<TokenIdType>,
/// Offset where the prefix text starts (for context)
prefix_offset: usize,
/// Offset marking the end of previously decoded text
@@ -94,7 +94,7 @@ impl StopSequenceDecoder {
}
/// Process a single token
pub fn process_token(&mut self, token_id: u32) -> Result<SequenceDecoderOutput> {
pub fn process_token(&mut self, token_id: TokenIdType) -> Result<SequenceDecoderOutput> {
let start = Instant::now();
if self.stopped {
@@ -252,7 +252,10 @@ impl StopSequenceDecoder {
}
/// Process multiple tokens
pub fn process_tokens(&mut self, token_ids: &[u32]) -> Result<Vec<SequenceDecoderOutput>> {
pub fn process_tokens(
&mut self,
token_ids: &[TokenIdType],
) -> Result<Vec<SequenceDecoderOutput>> {
let mut outputs = Vec::new();
for &token_id in token_ids {
outputs.push(self.process_token(token_id)?);
@@ -302,7 +305,7 @@ impl StopSequenceDecoderBuilder {
}
}
pub fn stop_token(mut self, token_id: u32) -> Self {
pub fn stop_token(mut self, token_id: TokenIdType) -> Self {
self.config.stop_tokens.insert(token_id);
self
}
@@ -312,7 +315,7 @@ impl StopSequenceDecoderBuilder {
self
}
pub fn visible_stop_token(mut self, token_id: u32) -> Self {
pub fn visible_stop_token(mut self, token_id: TokenIdType) -> Self {
self.config.visible_stop_tokens.insert(token_id);
self
}