[router] add tokenizer metrics (#9307)

Co-authored-by: Chang Su <chang.s.su@oracle.com>
This commit is contained in:
Simo Lin
2025-08-18 09:25:51 -07:00
committed by GitHub
parent 4c0bb411e5
commit 24247b4168
5 changed files with 344 additions and 11 deletions

View File

@@ -1,8 +1,10 @@
// src/tokenizer/stream.rs
use super::traits;
use crate::metrics::TokenizerMetrics;
use anyhow::Result;
use std::sync::Arc;
use std::time::Instant;
const INITIAL_INCREMENTAL_DETOKENIZATION_OFFSET: usize = 5;
@@ -43,8 +45,12 @@ impl DecodeStream {
/// Step appends a token_id to the internal state and tries to produce a text chunk.
/// Returning `None` means the given id is not enough to produce a chunk.
pub fn step(&mut self, id: u32) -> Result<Option<String>> {
let start = Instant::now();
self.all_token_ids.push(id);
TokenizerMetrics::record_stream_token();
let prefix_text = self.tokenizer.decode(
&self.all_token_ids[self.prefix_offset..self.read_offset],
self.skip_special_tokens,
@@ -61,8 +67,16 @@ impl DecodeStream {
self.prefix_offset = self.read_offset;
self.read_offset = self.all_token_ids.len();
TokenizerMetrics::record_stream_step_duration(start.elapsed());
Ok(Some(new_text))
} else {
if new_text.ends_with("<EFBFBD>") {
TokenizerMetrics::record_incomplete_utf8();
}
TokenizerMetrics::record_stream_step_duration(start.elapsed());
Ok(None)
}
}