[router] remove old/oudated/useless comments (#10967)

2025-09-26 12:45:15 -04:00
parent be059b83d6
commit a7fe6e10a1
5 changed files with 28 additions and 306 deletions
--- a/sgl-router/src/metrics.rs
+++ b/sgl-router/src/metrics.rs
@@ -19,7 +19,6 @@ impl Default for PrometheusConfig {
 }

 pub fn init_metrics() {
-    // Request metrics
    describe_counter!(
        "sgl_router_requests_total",
        "Total number of requests by route and method"
@@ -45,7 +44,6 @@ pub fn init_metrics() {
        "Total number of requests that exhausted retries by route"
    );

-    // Circuit breaker metrics
    describe_gauge!(
        "sgl_router_cb_state",
        "Circuit breaker state per worker (0=closed, 1=open, 2=half_open)"
@@ -59,7 +57,6 @@ pub fn init_metrics() {
        "Total number of circuit breaker outcomes by worker and outcome type (success/failure)"
    );

-    // Worker metrics
    describe_gauge!(
        "sgl_router_active_workers",
        "Number of currently active workers"
@@ -74,7 +71,6 @@ pub fn init_metrics() {
        "Total requests processed by each worker"
    );

-    // Policy metrics
    describe_counter!(
        "sgl_router_policy_decisions_total",
        "Total routing policy decisions by policy and worker"
@@ -92,7 +88,6 @@ pub fn init_metrics() {
    describe_gauge!("sgl_router_max_load", "Maximum worker load");
    describe_gauge!("sgl_router_min_load", "Minimum worker load");

-    // PD-specific metrics
    describe_counter!("sgl_router_pd_requests_total", "Total PD requests by route");
    describe_counter!(
        "sgl_router_pd_prefill_requests_total",
@@ -123,7 +118,6 @@ pub fn init_metrics() {
        "PD request duration by route"
    );

-    // Service discovery metrics
    describe_counter!(
        "sgl_router_discovery_updates_total",
        "Total service discovery update events"
@@ -137,13 +131,11 @@ pub fn init_metrics() {
        "Number of workers removed in last discovery update"
    );

-    // Generate request specific metrics
    describe_histogram!(
        "sgl_router_generate_duration_seconds",
        "Generate request duration"
    );

-    // Embedding request specific metrics
    describe_counter!("sgl_router_embeddings_total", "Total embedding requests");
    describe_histogram!(
        "sgl_router_embeddings_duration_seconds",
@@ -155,13 +147,11 @@ pub fn init_metrics() {
    );
    describe_gauge!("sgl_router_embeddings_queue_size", "Embedding queue size");

-    // Running requests gauge for cache-aware policy
    describe_gauge!(
        "sgl_router_running_requests",
        "Number of running requests per worker"
    );

-    // Tokenizer metrics
    describe_histogram!(
        "sgl_tokenizer_encode_duration_seconds",
        "Time to encode text to tokens"
@@ -207,7 +197,6 @@ pub fn init_metrics() {
        "Vocabulary size of the loaded tokenizer"
    );

-    // Stop sequence detection metrics
    describe_counter!(
        "sgl_tokenizer_stop_sequences_detected_total",
        "Total stop sequences detected by type"
@@ -221,7 +210,6 @@ pub fn init_metrics() {
        "Time to check for stop sequences per token"
    );

-    // Streaming decode metrics
    describe_counter!(
        "sgl_tokenizer_stream_tokens_total",
        "Total tokens processed in streaming decode"
@@ -235,7 +223,6 @@ pub fn init_metrics() {
        "Time per streaming decode step"
    );

-    // Factory metrics
    describe_counter!(
        "sgl_tokenizer_factory_loads_total",
        "Total tokenizer loads by file type"
@@ -251,7 +238,6 @@ pub fn init_metrics() {
 }

 pub fn start_prometheus(config: PrometheusConfig) {
-    // Initialize metric descriptions
    init_metrics();

    let duration_matcher = Matcher::Suffix(String::from("duration_seconds"));
@@ -280,7 +266,6 @@ pub struct RouterMetrics;
 pub struct TokenizerMetrics;

 impl RouterMetrics {
-    // Request metrics
    pub fn record_request(route: &str) {
        counter!("sgl_router_requests_total",
            "route" => route.to_string()
@@ -324,7 +309,6 @@ impl RouterMetrics {
        .increment(1);
    }

-    // Worker metrics
    pub fn set_active_workers(count: usize) {
        gauge!("sgl_router_active_workers").set(count as f64);
    }
@@ -350,7 +334,6 @@ impl RouterMetrics {
        .increment(1);
    }

-    // Policy metrics
    pub fn record_policy_decision(policy: &str, worker: &str) {
        counter!("sgl_router_policy_decisions_total",
            "policy" => policy.to_string(),
@@ -383,7 +366,6 @@ impl RouterMetrics {
        gauge!("sgl_router_min_load").set(min_load as f64);
    }

-    // PD-specific metrics
    pub fn record_pd_request(route: &str) {
        counter!("sgl_router_pd_requests_total",
            "route" => route.to_string()
@@ -440,19 +422,16 @@ impl RouterMetrics {
        .increment(1);
    }

-    // Service discovery metrics
    pub fn record_discovery_update(added: usize, removed: usize) {
        counter!("sgl_router_discovery_updates_total").increment(1);
        gauge!("sgl_router_discovery_workers_added").set(added as f64);
        gauge!("sgl_router_discovery_workers_removed").set(removed as f64);
    }

-    // Generate request metrics
    pub fn record_generate_duration(duration: Duration) {
        histogram!("sgl_router_generate_duration_seconds").record(duration.as_secs_f64());
    }

-    // Embeddings metrics
    pub fn record_embeddings_request() {
        counter!("sgl_router_embeddings_total").increment(1);
    }
@@ -473,7 +452,6 @@ impl RouterMetrics {
        gauge!("sgl_router_embeddings_queue_size").set(size as f64);
    }

-    // Running requests for cache-aware policy
    pub fn set_running_requests(worker: &str, count: usize) {
        gauge!("sgl_router_running_requests",
            "worker" => worker.to_string()
@@ -481,7 +459,6 @@ impl RouterMetrics {
        .set(count as f64);
    }

-    // Circuit breaker metrics
    pub fn set_cb_state(worker: &str, state_code: u8) {
        gauge!("sgl_router_cb_state",
            "worker" => worker.to_string()
@@ -508,7 +485,6 @@ impl RouterMetrics {
 }

 impl TokenizerMetrics {
-    // Encoding metrics
    pub fn record_encode_request(tokenizer_type: &str) {
        counter!("sgl_tokenizer_encode_requests_total",
            "tokenizer_type" => tokenizer_type.to_string()
@@ -535,7 +511,6 @@ impl TokenizerMetrics {
        histogram!("sgl_tokenizer_chars_per_encode").record(char_count as f64);
    }

-    // Decoding metrics
    pub fn record_decode_request(tokenizer_type: &str) {
        counter!("sgl_tokenizer_decode_requests_total",
            "tokenizer_type" => tokenizer_type.to_string()
@@ -558,7 +533,6 @@ impl TokenizerMetrics {
        histogram!("sgl_tokenizer_tokens_per_decode").record(token_count as f64);
    }

-    // Batch encoding metrics
    pub fn record_encode_batch_duration(duration: Duration, batch_size: usize) {
        histogram!("sgl_tokenizer_encode_batch_duration_seconds",
            "batch_size" => batch_size.to_string()
@@ -566,7 +540,6 @@ impl TokenizerMetrics {
        .record(duration.as_secs_f64());
    }

-    // Stop sequence detection metrics
    pub fn record_stop_sequence_detected(stop_type: &str) {
        counter!("sgl_tokenizer_stop_sequences_detected_total",
            "type" => stop_type.to_string()
@@ -582,7 +555,6 @@ impl TokenizerMetrics {
        histogram!("sgl_tokenizer_stop_detection_duration_seconds").record(duration.as_secs_f64());
    }

-    // Streaming decode metrics
    pub fn record_stream_token() {
        counter!("sgl_tokenizer_stream_tokens_total").increment(1);
    }
@@ -595,7 +567,6 @@ impl TokenizerMetrics {
        histogram!("sgl_tokenizer_stream_step_duration_seconds").record(duration.as_secs_f64());
    }

-    // Factory metrics
    pub fn record_factory_load(file_type: &str) {
        counter!("sgl_tokenizer_factory_loads_total",
            "file_type" => file_type.to_string()
@@ -614,7 +585,6 @@ impl TokenizerMetrics {
        histogram!("sgl_tokenizer_factory_load_duration_seconds").record(duration.as_secs_f64());
    }

-    // Vocabulary metrics
    pub fn set_vocab_size(tokenizer_type: &str, size: usize) {
        gauge!("sgl_tokenizer_vocab_size",
            "tokenizer_type" => tokenizer_type.to_string()
@@ -705,7 +675,6 @@ mod tests {
                .parse()
                .unwrap_or(IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)));

-            // Should fall back to 0.0.0.0
            assert_eq!(ip_addr, IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)));
        }
    }
@@ -780,7 +749,6 @@ mod tests {
    fn test_duration_suffix_matcher() {
        let matcher = Matcher::Suffix(String::from("duration_seconds"));

-        // Test matching behavior
        let _matching_metrics = [
            "request_duration_seconds",
            "response_duration_seconds",
@@ -789,8 +757,6 @@ mod tests {

        let _non_matching_metrics = ["duration_total", "duration_seconds_total", "other_metric"];

-        // Note: We can't directly test Matcher matching without the internals,
-        // but we can verify the matcher is created correctly
        match matcher {
            Matcher::Suffix(suffix) => assert_eq!(suffix, "duration_seconds"),
            _ => panic!("Expected Suffix matcher"),
@@ -801,7 +767,6 @@ mod tests {

    #[test]
    fn test_prometheus_builder_configuration() {
-        // This test verifies the builder configuration without actually starting Prometheus
        let _config = PrometheusConfig::default();

        let duration_matcher = Matcher::Suffix(String::from("duration_seconds"));
@@ -810,10 +775,8 @@ mod tests {
            60.0, 90.0, 120.0, 180.0, 240.0,
        ];

-        // Verify bucket configuration
        assert_eq!(duration_bucket.len(), 20);

-        // Verify matcher is suffix type
        match duration_matcher {
            Matcher::Suffix(s) => assert_eq!(s, "duration_seconds"),
            _ => panic!("Expected Suffix matcher"),
@@ -832,14 +795,12 @@ mod tests {

    #[test]
    fn test_custom_buckets_for_different_metrics() {
-        // Test that we can create different bucket configurations
        let request_buckets = [0.001, 0.01, 0.1, 1.0, 10.0];
        let generate_buckets = [0.1, 0.5, 1.0, 5.0, 30.0, 60.0];

        assert_eq!(request_buckets.len(), 5);
        assert_eq!(generate_buckets.len(), 6);

-        // Verify each set is sorted
        for i in 1..request_buckets.len() {
            assert!(request_buckets[i] > request_buckets[i - 1]);
        }
@@ -853,7 +814,6 @@ mod tests {

    #[test]
    fn test_metrics_static_methods() {
-        // Test that all static methods can be called without panic
        RouterMetrics::record_request("/generate");
        RouterMetrics::record_request_duration("/generate", Duration::from_millis(100));
        RouterMetrics::record_request_error("/generate", "timeout");
@@ -887,41 +847,32 @@ mod tests {

    #[test]
    fn test_tokenizer_metrics_static_methods() {
-        // Test that all tokenizer metric methods can be called without panic
-
-        // Encoding metrics
        TokenizerMetrics::record_encode_request("huggingface");
        TokenizerMetrics::record_encode_duration(Duration::from_millis(10));
        TokenizerMetrics::record_encode_error("invalid_input");
        TokenizerMetrics::record_tokens_per_encode(100);
        TokenizerMetrics::record_chars_per_encode(500);

-        // Decoding metrics
        TokenizerMetrics::record_decode_request("huggingface");
        TokenizerMetrics::record_decode_duration(Duration::from_millis(5));
        TokenizerMetrics::record_decode_error("invalid_tokens");
        TokenizerMetrics::record_tokens_per_decode(50);

-        // Batch encoding
        TokenizerMetrics::record_encode_batch_duration(Duration::from_millis(100), 10);

-        // Stop sequence detection
        TokenizerMetrics::record_stop_sequence_detected("token");
        TokenizerMetrics::record_stop_sequence_detected("string");
        TokenizerMetrics::record_partial_match();
        TokenizerMetrics::record_stop_detection_duration(Duration::from_micros(100));

-        // Streaming decode
        TokenizerMetrics::record_stream_token();
        TokenizerMetrics::record_incomplete_utf8();
        TokenizerMetrics::record_stream_step_duration(Duration::from_micros(50));

-        // Factory metrics
        TokenizerMetrics::record_factory_load("json");
        TokenizerMetrics::record_factory_error("unsupported_format");
        TokenizerMetrics::record_factory_load_duration(Duration::from_millis(200));

-        // Vocabulary metrics
        TokenizerMetrics::set_vocab_size("huggingface", 50000);
    }

@@ -929,17 +880,14 @@ mod tests {

    #[test]
    fn test_port_already_in_use() {
-        // Skip this test if we can't bind to the port
-        let port = 29123; // Use a different port to avoid conflicts
+        let port = 29123;

        if let Ok(_listener) = TcpListener::bind(("127.0.0.1", port)) {
-            // Port is available, we can test
            let config = PrometheusConfig {
                port,
                host: "127.0.0.1".to_string(),
            };

-            // Just verify config is created correctly
            assert_eq!(config.port, port);
        }
    }
@@ -948,8 +896,6 @@ mod tests {

    #[test]
    fn test_metrics_endpoint_accessibility() {
-        // This would be an integration test in practice
-        // Here we just verify the configuration
        let config = PrometheusConfig {
            port: 29000,
            host: "127.0.0.1".to_string(),
@@ -963,7 +909,6 @@ mod tests {

    #[test]
    fn test_concurrent_metric_updates() {
-        // Test that metric updates can be called concurrently
        use std::sync::atomic::{AtomicBool, Ordering};
        use std::sync::Arc;
        use std::thread;
@@ -984,11 +929,9 @@ mod tests {
            handles.push(handle);
        }

-        // Let threads run briefly
        thread::sleep(Duration::from_millis(10));
        done.store(true, Ordering::Relaxed);

-        // Wait for all threads
        for handle in handles {
            handle.join().unwrap();
        }
@@ -998,7 +941,6 @@ mod tests {

    #[test]
    fn test_empty_string_metrics() {
-        // Test that empty strings don't cause issues
        RouterMetrics::record_request("");
        RouterMetrics::set_worker_health("", true);
        RouterMetrics::record_policy_decision("", "");
@@ -1030,7 +972,6 @@ mod tests {

    #[test]
    fn test_extreme_metric_values() {
-        // Test extreme values
        RouterMetrics::set_active_workers(0);
        RouterMetrics::set_active_workers(usize::MAX);

@@ -1038,7 +979,6 @@ mod tests {
        RouterMetrics::set_worker_load("worker", usize::MAX);

        RouterMetrics::record_request_duration("route", Duration::from_nanos(1));
-        // 24 hours
        RouterMetrics::record_request_duration("route", Duration::from_secs(86400));
    }
 }