[router] remove old/oudated/useless comments (#10967)

This commit is contained in:
Simo Lin
2025-09-26 12:45:15 -04:00
committed by GitHub
parent be059b83d6
commit a7fe6e10a1
5 changed files with 28 additions and 306 deletions

View File

@@ -19,7 +19,6 @@ impl Default for PrometheusConfig {
}
pub fn init_metrics() {
// Request metrics
describe_counter!(
"sgl_router_requests_total",
"Total number of requests by route and method"
@@ -45,7 +44,6 @@ pub fn init_metrics() {
"Total number of requests that exhausted retries by route"
);
// Circuit breaker metrics
describe_gauge!(
"sgl_router_cb_state",
"Circuit breaker state per worker (0=closed, 1=open, 2=half_open)"
@@ -59,7 +57,6 @@ pub fn init_metrics() {
"Total number of circuit breaker outcomes by worker and outcome type (success/failure)"
);
// Worker metrics
describe_gauge!(
"sgl_router_active_workers",
"Number of currently active workers"
@@ -74,7 +71,6 @@ pub fn init_metrics() {
"Total requests processed by each worker"
);
// Policy metrics
describe_counter!(
"sgl_router_policy_decisions_total",
"Total routing policy decisions by policy and worker"
@@ -92,7 +88,6 @@ pub fn init_metrics() {
describe_gauge!("sgl_router_max_load", "Maximum worker load");
describe_gauge!("sgl_router_min_load", "Minimum worker load");
// PD-specific metrics
describe_counter!("sgl_router_pd_requests_total", "Total PD requests by route");
describe_counter!(
"sgl_router_pd_prefill_requests_total",
@@ -123,7 +118,6 @@ pub fn init_metrics() {
"PD request duration by route"
);
// Service discovery metrics
describe_counter!(
"sgl_router_discovery_updates_total",
"Total service discovery update events"
@@ -137,13 +131,11 @@ pub fn init_metrics() {
"Number of workers removed in last discovery update"
);
// Generate request specific metrics
describe_histogram!(
"sgl_router_generate_duration_seconds",
"Generate request duration"
);
// Embedding request specific metrics
describe_counter!("sgl_router_embeddings_total", "Total embedding requests");
describe_histogram!(
"sgl_router_embeddings_duration_seconds",
@@ -155,13 +147,11 @@ pub fn init_metrics() {
);
describe_gauge!("sgl_router_embeddings_queue_size", "Embedding queue size");
// Running requests gauge for cache-aware policy
describe_gauge!(
"sgl_router_running_requests",
"Number of running requests per worker"
);
// Tokenizer metrics
describe_histogram!(
"sgl_tokenizer_encode_duration_seconds",
"Time to encode text to tokens"
@@ -207,7 +197,6 @@ pub fn init_metrics() {
"Vocabulary size of the loaded tokenizer"
);
// Stop sequence detection metrics
describe_counter!(
"sgl_tokenizer_stop_sequences_detected_total",
"Total stop sequences detected by type"
@@ -221,7 +210,6 @@ pub fn init_metrics() {
"Time to check for stop sequences per token"
);
// Streaming decode metrics
describe_counter!(
"sgl_tokenizer_stream_tokens_total",
"Total tokens processed in streaming decode"
@@ -235,7 +223,6 @@ pub fn init_metrics() {
"Time per streaming decode step"
);
// Factory metrics
describe_counter!(
"sgl_tokenizer_factory_loads_total",
"Total tokenizer loads by file type"
@@ -251,7 +238,6 @@ pub fn init_metrics() {
}
pub fn start_prometheus(config: PrometheusConfig) {
// Initialize metric descriptions
init_metrics();
let duration_matcher = Matcher::Suffix(String::from("duration_seconds"));
@@ -280,7 +266,6 @@ pub struct RouterMetrics;
pub struct TokenizerMetrics;
impl RouterMetrics {
// Request metrics
pub fn record_request(route: &str) {
counter!("sgl_router_requests_total",
"route" => route.to_string()
@@ -324,7 +309,6 @@ impl RouterMetrics {
.increment(1);
}
// Worker metrics
pub fn set_active_workers(count: usize) {
gauge!("sgl_router_active_workers").set(count as f64);
}
@@ -350,7 +334,6 @@ impl RouterMetrics {
.increment(1);
}
// Policy metrics
pub fn record_policy_decision(policy: &str, worker: &str) {
counter!("sgl_router_policy_decisions_total",
"policy" => policy.to_string(),
@@ -383,7 +366,6 @@ impl RouterMetrics {
gauge!("sgl_router_min_load").set(min_load as f64);
}
// PD-specific metrics
pub fn record_pd_request(route: &str) {
counter!("sgl_router_pd_requests_total",
"route" => route.to_string()
@@ -440,19 +422,16 @@ impl RouterMetrics {
.increment(1);
}
// Service discovery metrics
pub fn record_discovery_update(added: usize, removed: usize) {
counter!("sgl_router_discovery_updates_total").increment(1);
gauge!("sgl_router_discovery_workers_added").set(added as f64);
gauge!("sgl_router_discovery_workers_removed").set(removed as f64);
}
// Generate request metrics
pub fn record_generate_duration(duration: Duration) {
histogram!("sgl_router_generate_duration_seconds").record(duration.as_secs_f64());
}
// Embeddings metrics
pub fn record_embeddings_request() {
counter!("sgl_router_embeddings_total").increment(1);
}
@@ -473,7 +452,6 @@ impl RouterMetrics {
gauge!("sgl_router_embeddings_queue_size").set(size as f64);
}
// Running requests for cache-aware policy
pub fn set_running_requests(worker: &str, count: usize) {
gauge!("sgl_router_running_requests",
"worker" => worker.to_string()
@@ -481,7 +459,6 @@ impl RouterMetrics {
.set(count as f64);
}
// Circuit breaker metrics
pub fn set_cb_state(worker: &str, state_code: u8) {
gauge!("sgl_router_cb_state",
"worker" => worker.to_string()
@@ -508,7 +485,6 @@ impl RouterMetrics {
}
impl TokenizerMetrics {
// Encoding metrics
pub fn record_encode_request(tokenizer_type: &str) {
counter!("sgl_tokenizer_encode_requests_total",
"tokenizer_type" => tokenizer_type.to_string()
@@ -535,7 +511,6 @@ impl TokenizerMetrics {
histogram!("sgl_tokenizer_chars_per_encode").record(char_count as f64);
}
// Decoding metrics
pub fn record_decode_request(tokenizer_type: &str) {
counter!("sgl_tokenizer_decode_requests_total",
"tokenizer_type" => tokenizer_type.to_string()
@@ -558,7 +533,6 @@ impl TokenizerMetrics {
histogram!("sgl_tokenizer_tokens_per_decode").record(token_count as f64);
}
// Batch encoding metrics
pub fn record_encode_batch_duration(duration: Duration, batch_size: usize) {
histogram!("sgl_tokenizer_encode_batch_duration_seconds",
"batch_size" => batch_size.to_string()
@@ -566,7 +540,6 @@ impl TokenizerMetrics {
.record(duration.as_secs_f64());
}
// Stop sequence detection metrics
pub fn record_stop_sequence_detected(stop_type: &str) {
counter!("sgl_tokenizer_stop_sequences_detected_total",
"type" => stop_type.to_string()
@@ -582,7 +555,6 @@ impl TokenizerMetrics {
histogram!("sgl_tokenizer_stop_detection_duration_seconds").record(duration.as_secs_f64());
}
// Streaming decode metrics
pub fn record_stream_token() {
counter!("sgl_tokenizer_stream_tokens_total").increment(1);
}
@@ -595,7 +567,6 @@ impl TokenizerMetrics {
histogram!("sgl_tokenizer_stream_step_duration_seconds").record(duration.as_secs_f64());
}
// Factory metrics
pub fn record_factory_load(file_type: &str) {
counter!("sgl_tokenizer_factory_loads_total",
"file_type" => file_type.to_string()
@@ -614,7 +585,6 @@ impl TokenizerMetrics {
histogram!("sgl_tokenizer_factory_load_duration_seconds").record(duration.as_secs_f64());
}
// Vocabulary metrics
pub fn set_vocab_size(tokenizer_type: &str, size: usize) {
gauge!("sgl_tokenizer_vocab_size",
"tokenizer_type" => tokenizer_type.to_string()
@@ -705,7 +675,6 @@ mod tests {
.parse()
.unwrap_or(IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)));
// Should fall back to 0.0.0.0
assert_eq!(ip_addr, IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)));
}
}
@@ -780,7 +749,6 @@ mod tests {
fn test_duration_suffix_matcher() {
let matcher = Matcher::Suffix(String::from("duration_seconds"));
// Test matching behavior
let _matching_metrics = [
"request_duration_seconds",
"response_duration_seconds",
@@ -789,8 +757,6 @@ mod tests {
let _non_matching_metrics = ["duration_total", "duration_seconds_total", "other_metric"];
// Note: We can't directly test Matcher matching without the internals,
// but we can verify the matcher is created correctly
match matcher {
Matcher::Suffix(suffix) => assert_eq!(suffix, "duration_seconds"),
_ => panic!("Expected Suffix matcher"),
@@ -801,7 +767,6 @@ mod tests {
#[test]
fn test_prometheus_builder_configuration() {
// This test verifies the builder configuration without actually starting Prometheus
let _config = PrometheusConfig::default();
let duration_matcher = Matcher::Suffix(String::from("duration_seconds"));
@@ -810,10 +775,8 @@ mod tests {
60.0, 90.0, 120.0, 180.0, 240.0,
];
// Verify bucket configuration
assert_eq!(duration_bucket.len(), 20);
// Verify matcher is suffix type
match duration_matcher {
Matcher::Suffix(s) => assert_eq!(s, "duration_seconds"),
_ => panic!("Expected Suffix matcher"),
@@ -832,14 +795,12 @@ mod tests {
#[test]
fn test_custom_buckets_for_different_metrics() {
// Test that we can create different bucket configurations
let request_buckets = [0.001, 0.01, 0.1, 1.0, 10.0];
let generate_buckets = [0.1, 0.5, 1.0, 5.0, 30.0, 60.0];
assert_eq!(request_buckets.len(), 5);
assert_eq!(generate_buckets.len(), 6);
// Verify each set is sorted
for i in 1..request_buckets.len() {
assert!(request_buckets[i] > request_buckets[i - 1]);
}
@@ -853,7 +814,6 @@ mod tests {
#[test]
fn test_metrics_static_methods() {
// Test that all static methods can be called without panic
RouterMetrics::record_request("/generate");
RouterMetrics::record_request_duration("/generate", Duration::from_millis(100));
RouterMetrics::record_request_error("/generate", "timeout");
@@ -887,41 +847,32 @@ mod tests {
#[test]
fn test_tokenizer_metrics_static_methods() {
// Test that all tokenizer metric methods can be called without panic
// Encoding metrics
TokenizerMetrics::record_encode_request("huggingface");
TokenizerMetrics::record_encode_duration(Duration::from_millis(10));
TokenizerMetrics::record_encode_error("invalid_input");
TokenizerMetrics::record_tokens_per_encode(100);
TokenizerMetrics::record_chars_per_encode(500);
// Decoding metrics
TokenizerMetrics::record_decode_request("huggingface");
TokenizerMetrics::record_decode_duration(Duration::from_millis(5));
TokenizerMetrics::record_decode_error("invalid_tokens");
TokenizerMetrics::record_tokens_per_decode(50);
// Batch encoding
TokenizerMetrics::record_encode_batch_duration(Duration::from_millis(100), 10);
// Stop sequence detection
TokenizerMetrics::record_stop_sequence_detected("token");
TokenizerMetrics::record_stop_sequence_detected("string");
TokenizerMetrics::record_partial_match();
TokenizerMetrics::record_stop_detection_duration(Duration::from_micros(100));
// Streaming decode
TokenizerMetrics::record_stream_token();
TokenizerMetrics::record_incomplete_utf8();
TokenizerMetrics::record_stream_step_duration(Duration::from_micros(50));
// Factory metrics
TokenizerMetrics::record_factory_load("json");
TokenizerMetrics::record_factory_error("unsupported_format");
TokenizerMetrics::record_factory_load_duration(Duration::from_millis(200));
// Vocabulary metrics
TokenizerMetrics::set_vocab_size("huggingface", 50000);
}
@@ -929,17 +880,14 @@ mod tests {
#[test]
fn test_port_already_in_use() {
// Skip this test if we can't bind to the port
let port = 29123; // Use a different port to avoid conflicts
let port = 29123;
if let Ok(_listener) = TcpListener::bind(("127.0.0.1", port)) {
// Port is available, we can test
let config = PrometheusConfig {
port,
host: "127.0.0.1".to_string(),
};
// Just verify config is created correctly
assert_eq!(config.port, port);
}
}
@@ -948,8 +896,6 @@ mod tests {
#[test]
fn test_metrics_endpoint_accessibility() {
// This would be an integration test in practice
// Here we just verify the configuration
let config = PrometheusConfig {
port: 29000,
host: "127.0.0.1".to_string(),
@@ -963,7 +909,6 @@ mod tests {
#[test]
fn test_concurrent_metric_updates() {
// Test that metric updates can be called concurrently
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Arc;
use std::thread;
@@ -984,11 +929,9 @@ mod tests {
handles.push(handle);
}
// Let threads run briefly
thread::sleep(Duration::from_millis(10));
done.store(true, Ordering::Relaxed);
// Wait for all threads
for handle in handles {
handle.join().unwrap();
}
@@ -998,7 +941,6 @@ mod tests {
#[test]
fn test_empty_string_metrics() {
// Test that empty strings don't cause issues
RouterMetrics::record_request("");
RouterMetrics::set_worker_health("", true);
RouterMetrics::record_policy_decision("", "");
@@ -1030,7 +972,6 @@ mod tests {
#[test]
fn test_extreme_metric_values() {
// Test extreme values
RouterMetrics::set_active_workers(0);
RouterMetrics::set_active_workers(usize::MAX);
@@ -1038,7 +979,6 @@ mod tests {
RouterMetrics::set_worker_load("worker", usize::MAX);
RouterMetrics::record_request_duration("route", Duration::from_nanos(1));
// 24 hours
RouterMetrics::record_request_duration("route", Duration::from_secs(86400));
}
}