[router] add tokenizer chat template support (#9370)
Co-authored-by: Chang Su <chang.s.su@oracle.com>
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
use super::traits;
|
||||
use super::traits::{self, TokenIdType};
|
||||
use crate::metrics::TokenizerMetrics;
|
||||
use anyhow::Result;
|
||||
use std::collections::HashSet;
|
||||
@@ -22,18 +22,18 @@ pub enum SequenceDecoderOutput {
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct StopSequenceConfig {
|
||||
/// Token IDs that trigger a stop
|
||||
pub stop_tokens: HashSet<u32>,
|
||||
pub stop_tokens: HashSet<TokenIdType>,
|
||||
/// String sequences that trigger a stop
|
||||
pub stop_sequences: Vec<String>,
|
||||
/// Token IDs for visible stops (included in output)
|
||||
pub visible_stop_tokens: HashSet<u32>,
|
||||
pub visible_stop_tokens: HashSet<TokenIdType>,
|
||||
/// String sequences for visible stops (included in output)
|
||||
pub visible_stop_sequences: Vec<String>,
|
||||
}
|
||||
|
||||
impl StopSequenceConfig {
|
||||
/// Builder pattern - add a stop token
|
||||
pub fn with_stop_token(mut self, token_id: u32) -> Self {
|
||||
pub fn with_stop_token(mut self, token_id: TokenIdType) -> Self {
|
||||
self.stop_tokens.insert(token_id);
|
||||
self
|
||||
}
|
||||
@@ -45,7 +45,7 @@ impl StopSequenceConfig {
|
||||
}
|
||||
|
||||
/// Builder pattern - add a visible stop token
|
||||
pub fn with_visible_stop_token(mut self, token_id: u32) -> Self {
|
||||
pub fn with_visible_stop_token(mut self, token_id: TokenIdType) -> Self {
|
||||
self.visible_stop_tokens.insert(token_id);
|
||||
self
|
||||
}
|
||||
@@ -64,7 +64,7 @@ pub struct StopSequenceDecoder {
|
||||
/// Buffer for partial matches (the "jail")
|
||||
jail_buffer: String,
|
||||
/// Accumulated tokens
|
||||
token_buffer: Vec<u32>,
|
||||
token_buffer: Vec<TokenIdType>,
|
||||
/// Offset where the prefix text starts (for context)
|
||||
prefix_offset: usize,
|
||||
/// Offset marking the end of previously decoded text
|
||||
@@ -94,7 +94,7 @@ impl StopSequenceDecoder {
|
||||
}
|
||||
|
||||
/// Process a single token
|
||||
pub fn process_token(&mut self, token_id: u32) -> Result<SequenceDecoderOutput> {
|
||||
pub fn process_token(&mut self, token_id: TokenIdType) -> Result<SequenceDecoderOutput> {
|
||||
let start = Instant::now();
|
||||
|
||||
if self.stopped {
|
||||
@@ -252,7 +252,10 @@ impl StopSequenceDecoder {
|
||||
}
|
||||
|
||||
/// Process multiple tokens
|
||||
pub fn process_tokens(&mut self, token_ids: &[u32]) -> Result<Vec<SequenceDecoderOutput>> {
|
||||
pub fn process_tokens(
|
||||
&mut self,
|
||||
token_ids: &[TokenIdType],
|
||||
) -> Result<Vec<SequenceDecoderOutput>> {
|
||||
let mut outputs = Vec::new();
|
||||
for &token_id in token_ids {
|
||||
outputs.push(self.process_token(token_id)?);
|
||||
@@ -302,7 +305,7 @@ impl StopSequenceDecoderBuilder {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn stop_token(mut self, token_id: u32) -> Self {
|
||||
pub fn stop_token(mut self, token_id: TokenIdType) -> Self {
|
||||
self.config.stop_tokens.insert(token_id);
|
||||
self
|
||||
}
|
||||
@@ -312,7 +315,7 @@ impl StopSequenceDecoderBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
pub fn visible_stop_token(mut self, token_id: u32) -> Self {
|
||||
pub fn visible_stop_token(mut self, token_id: TokenIdType) -> Self {
|
||||
self.config.visible_stop_tokens.insert(token_id);
|
||||
self
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user