[router] add tokenizer metrics (#9307)

Co-authored-by: Chang Su <chang.s.su@oracle.com>
This commit is contained in:
Simo Lin
2025-08-18 09:25:51 -07:00
committed by GitHub
parent 4c0bb411e5
commit 24247b4168
5 changed files with 344 additions and 11 deletions

View File

@@ -1,7 +1,9 @@
use super::traits;
use crate::metrics::TokenizerMetrics;
use anyhow::Result;
use std::collections::HashSet;
use std::sync::Arc;
use std::time::Instant;
/// Output from the sequence decoder
#[derive(Debug, Clone, PartialEq)]
@@ -93,6 +95,8 @@ impl StopSequenceDecoder {
/// Process a single token
pub fn process_token(&mut self, token_id: u32) -> Result<SequenceDecoderOutput> {
let start = Instant::now();
if self.stopped {
return Ok(SequenceDecoderOutput::Stopped);
}
@@ -100,23 +104,30 @@ impl StopSequenceDecoder {
// Check for token-level stops first
if self.config.stop_tokens.contains(&token_id) {
self.stopped = true;
TokenizerMetrics::record_stop_sequence_detected("token");
// Flush any jailed text before stopping
if !self.jail_buffer.is_empty() {
let output = self.jail_buffer.clone();
self.jail_buffer.clear();
TokenizerMetrics::record_stop_detection_duration(start.elapsed());
return Ok(SequenceDecoderOutput::StoppedWithText(output));
}
TokenizerMetrics::record_stop_detection_duration(start.elapsed());
return Ok(SequenceDecoderOutput::Stopped);
}
if self.config.visible_stop_tokens.contains(&token_id) {
self.stopped = true;
TokenizerMetrics::record_stop_sequence_detected("visible_token");
// Include jailed text plus the stop token
let stop_text = self
.tokenizer
.decode(&[token_id], self.skip_special_tokens)?;
let output = format!("{}{}", self.jail_buffer, stop_text);
self.jail_buffer.clear();
TokenizerMetrics::record_stop_detection_duration(start.elapsed());
return Ok(SequenceDecoderOutput::StoppedWithText(output));
}
@@ -161,9 +172,12 @@ impl StopSequenceDecoder {
for stop_seq in &self.config.stop_sequences {
if let Some(pos) = check_text.find(stop_seq) {
self.stopped = true;
TokenizerMetrics::record_stop_sequence_detected("string");
// Output text before the stop sequence
let output = check_text[..pos].to_string();
self.jail_buffer.clear();
TokenizerMetrics::record_stop_detection_duration(start.elapsed());
return Ok(if output.is_empty() {
SequenceDecoderOutput::Stopped
} else {
@@ -176,10 +190,13 @@ impl StopSequenceDecoder {
for stop_seq in &self.config.visible_stop_sequences {
if let Some(pos) = check_text.find(stop_seq) {
self.stopped = true;
TokenizerMetrics::record_stop_sequence_detected("visible_string");
// Include the stop sequence in output
let end_pos = pos + stop_seq.len();
let output = check_text[..end_pos].to_string();
self.jail_buffer.clear();
TokenizerMetrics::record_stop_detection_duration(start.elapsed());
return Ok(SequenceDecoderOutput::StoppedWithText(output));
}
}
@@ -202,6 +219,8 @@ impl StopSequenceDecoder {
}
if partial_match_len > 0 {
TokenizerMetrics::record_partial_match();
// Split: output safe text, jail the potential match
let safe_end = check_text.len() - partial_match_len;
let safe_text = &check_text[..safe_end];
@@ -211,6 +230,8 @@ impl StopSequenceDecoder {
self.prefix_offset = self.read_offset;
self.read_offset = self.token_buffer.len();
TokenizerMetrics::record_stop_detection_duration(start.elapsed());
if safe_text.is_empty() {
Ok(SequenceDecoderOutput::Held)
} else {
@@ -224,6 +245,8 @@ impl StopSequenceDecoder {
self.prefix_offset = self.read_offset;
self.read_offset = self.token_buffer.len();
TokenizerMetrics::record_stop_detection_duration(start.elapsed());
Ok(SequenceDecoderOutput::Text(check_text))
}
}