[router] remove old/oudated/useless comments (#10967)
This commit is contained in:
@@ -19,7 +19,6 @@ impl Default for PrometheusConfig {
|
||||
}
|
||||
|
||||
pub fn init_metrics() {
|
||||
// Request metrics
|
||||
describe_counter!(
|
||||
"sgl_router_requests_total",
|
||||
"Total number of requests by route and method"
|
||||
@@ -45,7 +44,6 @@ pub fn init_metrics() {
|
||||
"Total number of requests that exhausted retries by route"
|
||||
);
|
||||
|
||||
// Circuit breaker metrics
|
||||
describe_gauge!(
|
||||
"sgl_router_cb_state",
|
||||
"Circuit breaker state per worker (0=closed, 1=open, 2=half_open)"
|
||||
@@ -59,7 +57,6 @@ pub fn init_metrics() {
|
||||
"Total number of circuit breaker outcomes by worker and outcome type (success/failure)"
|
||||
);
|
||||
|
||||
// Worker metrics
|
||||
describe_gauge!(
|
||||
"sgl_router_active_workers",
|
||||
"Number of currently active workers"
|
||||
@@ -74,7 +71,6 @@ pub fn init_metrics() {
|
||||
"Total requests processed by each worker"
|
||||
);
|
||||
|
||||
// Policy metrics
|
||||
describe_counter!(
|
||||
"sgl_router_policy_decisions_total",
|
||||
"Total routing policy decisions by policy and worker"
|
||||
@@ -92,7 +88,6 @@ pub fn init_metrics() {
|
||||
describe_gauge!("sgl_router_max_load", "Maximum worker load");
|
||||
describe_gauge!("sgl_router_min_load", "Minimum worker load");
|
||||
|
||||
// PD-specific metrics
|
||||
describe_counter!("sgl_router_pd_requests_total", "Total PD requests by route");
|
||||
describe_counter!(
|
||||
"sgl_router_pd_prefill_requests_total",
|
||||
@@ -123,7 +118,6 @@ pub fn init_metrics() {
|
||||
"PD request duration by route"
|
||||
);
|
||||
|
||||
// Service discovery metrics
|
||||
describe_counter!(
|
||||
"sgl_router_discovery_updates_total",
|
||||
"Total service discovery update events"
|
||||
@@ -137,13 +131,11 @@ pub fn init_metrics() {
|
||||
"Number of workers removed in last discovery update"
|
||||
);
|
||||
|
||||
// Generate request specific metrics
|
||||
describe_histogram!(
|
||||
"sgl_router_generate_duration_seconds",
|
||||
"Generate request duration"
|
||||
);
|
||||
|
||||
// Embedding request specific metrics
|
||||
describe_counter!("sgl_router_embeddings_total", "Total embedding requests");
|
||||
describe_histogram!(
|
||||
"sgl_router_embeddings_duration_seconds",
|
||||
@@ -155,13 +147,11 @@ pub fn init_metrics() {
|
||||
);
|
||||
describe_gauge!("sgl_router_embeddings_queue_size", "Embedding queue size");
|
||||
|
||||
// Running requests gauge for cache-aware policy
|
||||
describe_gauge!(
|
||||
"sgl_router_running_requests",
|
||||
"Number of running requests per worker"
|
||||
);
|
||||
|
||||
// Tokenizer metrics
|
||||
describe_histogram!(
|
||||
"sgl_tokenizer_encode_duration_seconds",
|
||||
"Time to encode text to tokens"
|
||||
@@ -207,7 +197,6 @@ pub fn init_metrics() {
|
||||
"Vocabulary size of the loaded tokenizer"
|
||||
);
|
||||
|
||||
// Stop sequence detection metrics
|
||||
describe_counter!(
|
||||
"sgl_tokenizer_stop_sequences_detected_total",
|
||||
"Total stop sequences detected by type"
|
||||
@@ -221,7 +210,6 @@ pub fn init_metrics() {
|
||||
"Time to check for stop sequences per token"
|
||||
);
|
||||
|
||||
// Streaming decode metrics
|
||||
describe_counter!(
|
||||
"sgl_tokenizer_stream_tokens_total",
|
||||
"Total tokens processed in streaming decode"
|
||||
@@ -235,7 +223,6 @@ pub fn init_metrics() {
|
||||
"Time per streaming decode step"
|
||||
);
|
||||
|
||||
// Factory metrics
|
||||
describe_counter!(
|
||||
"sgl_tokenizer_factory_loads_total",
|
||||
"Total tokenizer loads by file type"
|
||||
@@ -251,7 +238,6 @@ pub fn init_metrics() {
|
||||
}
|
||||
|
||||
pub fn start_prometheus(config: PrometheusConfig) {
|
||||
// Initialize metric descriptions
|
||||
init_metrics();
|
||||
|
||||
let duration_matcher = Matcher::Suffix(String::from("duration_seconds"));
|
||||
@@ -280,7 +266,6 @@ pub struct RouterMetrics;
|
||||
pub struct TokenizerMetrics;
|
||||
|
||||
impl RouterMetrics {
|
||||
// Request metrics
|
||||
pub fn record_request(route: &str) {
|
||||
counter!("sgl_router_requests_total",
|
||||
"route" => route.to_string()
|
||||
@@ -324,7 +309,6 @@ impl RouterMetrics {
|
||||
.increment(1);
|
||||
}
|
||||
|
||||
// Worker metrics
|
||||
pub fn set_active_workers(count: usize) {
|
||||
gauge!("sgl_router_active_workers").set(count as f64);
|
||||
}
|
||||
@@ -350,7 +334,6 @@ impl RouterMetrics {
|
||||
.increment(1);
|
||||
}
|
||||
|
||||
// Policy metrics
|
||||
pub fn record_policy_decision(policy: &str, worker: &str) {
|
||||
counter!("sgl_router_policy_decisions_total",
|
||||
"policy" => policy.to_string(),
|
||||
@@ -383,7 +366,6 @@ impl RouterMetrics {
|
||||
gauge!("sgl_router_min_load").set(min_load as f64);
|
||||
}
|
||||
|
||||
// PD-specific metrics
|
||||
pub fn record_pd_request(route: &str) {
|
||||
counter!("sgl_router_pd_requests_total",
|
||||
"route" => route.to_string()
|
||||
@@ -440,19 +422,16 @@ impl RouterMetrics {
|
||||
.increment(1);
|
||||
}
|
||||
|
||||
// Service discovery metrics
|
||||
pub fn record_discovery_update(added: usize, removed: usize) {
|
||||
counter!("sgl_router_discovery_updates_total").increment(1);
|
||||
gauge!("sgl_router_discovery_workers_added").set(added as f64);
|
||||
gauge!("sgl_router_discovery_workers_removed").set(removed as f64);
|
||||
}
|
||||
|
||||
// Generate request metrics
|
||||
pub fn record_generate_duration(duration: Duration) {
|
||||
histogram!("sgl_router_generate_duration_seconds").record(duration.as_secs_f64());
|
||||
}
|
||||
|
||||
// Embeddings metrics
|
||||
pub fn record_embeddings_request() {
|
||||
counter!("sgl_router_embeddings_total").increment(1);
|
||||
}
|
||||
@@ -473,7 +452,6 @@ impl RouterMetrics {
|
||||
gauge!("sgl_router_embeddings_queue_size").set(size as f64);
|
||||
}
|
||||
|
||||
// Running requests for cache-aware policy
|
||||
pub fn set_running_requests(worker: &str, count: usize) {
|
||||
gauge!("sgl_router_running_requests",
|
||||
"worker" => worker.to_string()
|
||||
@@ -481,7 +459,6 @@ impl RouterMetrics {
|
||||
.set(count as f64);
|
||||
}
|
||||
|
||||
// Circuit breaker metrics
|
||||
pub fn set_cb_state(worker: &str, state_code: u8) {
|
||||
gauge!("sgl_router_cb_state",
|
||||
"worker" => worker.to_string()
|
||||
@@ -508,7 +485,6 @@ impl RouterMetrics {
|
||||
}
|
||||
|
||||
impl TokenizerMetrics {
|
||||
// Encoding metrics
|
||||
pub fn record_encode_request(tokenizer_type: &str) {
|
||||
counter!("sgl_tokenizer_encode_requests_total",
|
||||
"tokenizer_type" => tokenizer_type.to_string()
|
||||
@@ -535,7 +511,6 @@ impl TokenizerMetrics {
|
||||
histogram!("sgl_tokenizer_chars_per_encode").record(char_count as f64);
|
||||
}
|
||||
|
||||
// Decoding metrics
|
||||
pub fn record_decode_request(tokenizer_type: &str) {
|
||||
counter!("sgl_tokenizer_decode_requests_total",
|
||||
"tokenizer_type" => tokenizer_type.to_string()
|
||||
@@ -558,7 +533,6 @@ impl TokenizerMetrics {
|
||||
histogram!("sgl_tokenizer_tokens_per_decode").record(token_count as f64);
|
||||
}
|
||||
|
||||
// Batch encoding metrics
|
||||
pub fn record_encode_batch_duration(duration: Duration, batch_size: usize) {
|
||||
histogram!("sgl_tokenizer_encode_batch_duration_seconds",
|
||||
"batch_size" => batch_size.to_string()
|
||||
@@ -566,7 +540,6 @@ impl TokenizerMetrics {
|
||||
.record(duration.as_secs_f64());
|
||||
}
|
||||
|
||||
// Stop sequence detection metrics
|
||||
pub fn record_stop_sequence_detected(stop_type: &str) {
|
||||
counter!("sgl_tokenizer_stop_sequences_detected_total",
|
||||
"type" => stop_type.to_string()
|
||||
@@ -582,7 +555,6 @@ impl TokenizerMetrics {
|
||||
histogram!("sgl_tokenizer_stop_detection_duration_seconds").record(duration.as_secs_f64());
|
||||
}
|
||||
|
||||
// Streaming decode metrics
|
||||
pub fn record_stream_token() {
|
||||
counter!("sgl_tokenizer_stream_tokens_total").increment(1);
|
||||
}
|
||||
@@ -595,7 +567,6 @@ impl TokenizerMetrics {
|
||||
histogram!("sgl_tokenizer_stream_step_duration_seconds").record(duration.as_secs_f64());
|
||||
}
|
||||
|
||||
// Factory metrics
|
||||
pub fn record_factory_load(file_type: &str) {
|
||||
counter!("sgl_tokenizer_factory_loads_total",
|
||||
"file_type" => file_type.to_string()
|
||||
@@ -614,7 +585,6 @@ impl TokenizerMetrics {
|
||||
histogram!("sgl_tokenizer_factory_load_duration_seconds").record(duration.as_secs_f64());
|
||||
}
|
||||
|
||||
// Vocabulary metrics
|
||||
pub fn set_vocab_size(tokenizer_type: &str, size: usize) {
|
||||
gauge!("sgl_tokenizer_vocab_size",
|
||||
"tokenizer_type" => tokenizer_type.to_string()
|
||||
@@ -705,7 +675,6 @@ mod tests {
|
||||
.parse()
|
||||
.unwrap_or(IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)));
|
||||
|
||||
// Should fall back to 0.0.0.0
|
||||
assert_eq!(ip_addr, IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)));
|
||||
}
|
||||
}
|
||||
@@ -780,7 +749,6 @@ mod tests {
|
||||
fn test_duration_suffix_matcher() {
|
||||
let matcher = Matcher::Suffix(String::from("duration_seconds"));
|
||||
|
||||
// Test matching behavior
|
||||
let _matching_metrics = [
|
||||
"request_duration_seconds",
|
||||
"response_duration_seconds",
|
||||
@@ -789,8 +757,6 @@ mod tests {
|
||||
|
||||
let _non_matching_metrics = ["duration_total", "duration_seconds_total", "other_metric"];
|
||||
|
||||
// Note: We can't directly test Matcher matching without the internals,
|
||||
// but we can verify the matcher is created correctly
|
||||
match matcher {
|
||||
Matcher::Suffix(suffix) => assert_eq!(suffix, "duration_seconds"),
|
||||
_ => panic!("Expected Suffix matcher"),
|
||||
@@ -801,7 +767,6 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_prometheus_builder_configuration() {
|
||||
// This test verifies the builder configuration without actually starting Prometheus
|
||||
let _config = PrometheusConfig::default();
|
||||
|
||||
let duration_matcher = Matcher::Suffix(String::from("duration_seconds"));
|
||||
@@ -810,10 +775,8 @@ mod tests {
|
||||
60.0, 90.0, 120.0, 180.0, 240.0,
|
||||
];
|
||||
|
||||
// Verify bucket configuration
|
||||
assert_eq!(duration_bucket.len(), 20);
|
||||
|
||||
// Verify matcher is suffix type
|
||||
match duration_matcher {
|
||||
Matcher::Suffix(s) => assert_eq!(s, "duration_seconds"),
|
||||
_ => panic!("Expected Suffix matcher"),
|
||||
@@ -832,14 +795,12 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_custom_buckets_for_different_metrics() {
|
||||
// Test that we can create different bucket configurations
|
||||
let request_buckets = [0.001, 0.01, 0.1, 1.0, 10.0];
|
||||
let generate_buckets = [0.1, 0.5, 1.0, 5.0, 30.0, 60.0];
|
||||
|
||||
assert_eq!(request_buckets.len(), 5);
|
||||
assert_eq!(generate_buckets.len(), 6);
|
||||
|
||||
// Verify each set is sorted
|
||||
for i in 1..request_buckets.len() {
|
||||
assert!(request_buckets[i] > request_buckets[i - 1]);
|
||||
}
|
||||
@@ -853,7 +814,6 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_metrics_static_methods() {
|
||||
// Test that all static methods can be called without panic
|
||||
RouterMetrics::record_request("/generate");
|
||||
RouterMetrics::record_request_duration("/generate", Duration::from_millis(100));
|
||||
RouterMetrics::record_request_error("/generate", "timeout");
|
||||
@@ -887,41 +847,32 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_tokenizer_metrics_static_methods() {
|
||||
// Test that all tokenizer metric methods can be called without panic
|
||||
|
||||
// Encoding metrics
|
||||
TokenizerMetrics::record_encode_request("huggingface");
|
||||
TokenizerMetrics::record_encode_duration(Duration::from_millis(10));
|
||||
TokenizerMetrics::record_encode_error("invalid_input");
|
||||
TokenizerMetrics::record_tokens_per_encode(100);
|
||||
TokenizerMetrics::record_chars_per_encode(500);
|
||||
|
||||
// Decoding metrics
|
||||
TokenizerMetrics::record_decode_request("huggingface");
|
||||
TokenizerMetrics::record_decode_duration(Duration::from_millis(5));
|
||||
TokenizerMetrics::record_decode_error("invalid_tokens");
|
||||
TokenizerMetrics::record_tokens_per_decode(50);
|
||||
|
||||
// Batch encoding
|
||||
TokenizerMetrics::record_encode_batch_duration(Duration::from_millis(100), 10);
|
||||
|
||||
// Stop sequence detection
|
||||
TokenizerMetrics::record_stop_sequence_detected("token");
|
||||
TokenizerMetrics::record_stop_sequence_detected("string");
|
||||
TokenizerMetrics::record_partial_match();
|
||||
TokenizerMetrics::record_stop_detection_duration(Duration::from_micros(100));
|
||||
|
||||
// Streaming decode
|
||||
TokenizerMetrics::record_stream_token();
|
||||
TokenizerMetrics::record_incomplete_utf8();
|
||||
TokenizerMetrics::record_stream_step_duration(Duration::from_micros(50));
|
||||
|
||||
// Factory metrics
|
||||
TokenizerMetrics::record_factory_load("json");
|
||||
TokenizerMetrics::record_factory_error("unsupported_format");
|
||||
TokenizerMetrics::record_factory_load_duration(Duration::from_millis(200));
|
||||
|
||||
// Vocabulary metrics
|
||||
TokenizerMetrics::set_vocab_size("huggingface", 50000);
|
||||
}
|
||||
|
||||
@@ -929,17 +880,14 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_port_already_in_use() {
|
||||
// Skip this test if we can't bind to the port
|
||||
let port = 29123; // Use a different port to avoid conflicts
|
||||
let port = 29123;
|
||||
|
||||
if let Ok(_listener) = TcpListener::bind(("127.0.0.1", port)) {
|
||||
// Port is available, we can test
|
||||
let config = PrometheusConfig {
|
||||
port,
|
||||
host: "127.0.0.1".to_string(),
|
||||
};
|
||||
|
||||
// Just verify config is created correctly
|
||||
assert_eq!(config.port, port);
|
||||
}
|
||||
}
|
||||
@@ -948,8 +896,6 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_metrics_endpoint_accessibility() {
|
||||
// This would be an integration test in practice
|
||||
// Here we just verify the configuration
|
||||
let config = PrometheusConfig {
|
||||
port: 29000,
|
||||
host: "127.0.0.1".to_string(),
|
||||
@@ -963,7 +909,6 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_concurrent_metric_updates() {
|
||||
// Test that metric updates can be called concurrently
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::Arc;
|
||||
use std::thread;
|
||||
@@ -984,11 +929,9 @@ mod tests {
|
||||
handles.push(handle);
|
||||
}
|
||||
|
||||
// Let threads run briefly
|
||||
thread::sleep(Duration::from_millis(10));
|
||||
done.store(true, Ordering::Relaxed);
|
||||
|
||||
// Wait for all threads
|
||||
for handle in handles {
|
||||
handle.join().unwrap();
|
||||
}
|
||||
@@ -998,7 +941,6 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_empty_string_metrics() {
|
||||
// Test that empty strings don't cause issues
|
||||
RouterMetrics::record_request("");
|
||||
RouterMetrics::set_worker_health("", true);
|
||||
RouterMetrics::record_policy_decision("", "");
|
||||
@@ -1030,7 +972,6 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_extreme_metric_values() {
|
||||
// Test extreme values
|
||||
RouterMetrics::set_active_workers(0);
|
||||
RouterMetrics::set_active_workers(usize::MAX);
|
||||
|
||||
@@ -1038,7 +979,6 @@ mod tests {
|
||||
RouterMetrics::set_worker_load("worker", usize::MAX);
|
||||
|
||||
RouterMetrics::record_request_duration("route", Duration::from_nanos(1));
|
||||
// 24 hours
|
||||
RouterMetrics::record_request_duration("route", Duration::from_secs(86400));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user