diff --git a/sgl-router/src/core/job_queue.rs b/sgl-router/src/core/job_queue.rs index 749eb9b87..05b39e4c2 100644 --- a/sgl-router/src/core/job_queue.rs +++ b/sgl-router/src/core/job_queue.rs @@ -139,7 +139,7 @@ impl JobQueue { pub fn new(config: JobQueueConfig, context: Weak) -> Arc { let (tx, rx) = mpsc::channel(config.queue_capacity); - info!( + debug!( "Initializing worker job queue: capacity={}, workers={}", config.queue_capacity, config.worker_count ); @@ -194,7 +194,7 @@ impl JobQueue { Ok(_) => { let queue_depth = self.tx.max_capacity() - self.tx.capacity(); RouterMetrics::set_job_queue_depth(queue_depth); - info!( + debug!( "Job submitted: type={}, worker={}, queue_depth={}", job_type, worker_url, queue_depth ); @@ -225,7 +225,7 @@ impl JobQueue { context: Weak, status_map: Arc>, ) { - info!("Worker job queue worker {} started", worker_id); + debug!("Worker job queue worker {} started", worker_id); loop { // Lock the receiver and try to receive a job @@ -246,7 +246,7 @@ impl JobQueue { JobStatus::processing(&job_type, &worker_url), ); - info!( + debug!( "Worker {} processing job: type={}, worker={}", worker_id, job_type, worker_url ); @@ -289,7 +289,7 @@ impl JobQueue { } } - warn!("Worker job queue worker {} stopped", worker_id); + debug!("Worker job queue worker {} stopped", worker_id); } /// Execute a specific job @@ -303,7 +303,7 @@ impl JobQueue { let instance_id = Self::start_worker_workflow(engine, config, context).await?; - info!( + debug!( "Started worker registration workflow for {} (instance: {})", config.url, instance_id ); @@ -357,7 +357,7 @@ impl JobQueue { } }; - info!( + debug!( "Creating AddWorker jobs for {} workers from config", workers.len() ); @@ -501,7 +501,7 @@ impl JobQueue { Ok(message) => { RouterMetrics::record_job_success(job_type); status_map.remove(worker_url); - info!( + debug!( "Worker {} completed job: type={}, worker={}, duration={:.3}s, result={}", worker_id, job_type, diff --git a/sgl-router/src/core/workflow/steps/worker_registration.rs b/sgl-router/src/core/workflow/steps/worker_registration.rs index 06916a866..894326c19 100644 --- a/sgl-router/src/core/workflow/steps/worker_registration.rs +++ b/sgl-router/src/core/workflow/steps/worker_registration.rs @@ -17,7 +17,7 @@ use async_trait::async_trait; use once_cell::sync::Lazy; use reqwest::Client; use serde_json::Value; -use tracing::{info, warn}; +use tracing::{debug, info, warn}; use crate::{ core::{ @@ -202,7 +202,7 @@ impl StepExecutor for DetectConnectionModeStep { .get("worker_config") .ok_or_else(|| WorkflowError::ContextValueNotFound("worker_config".to_string()))?; - info!( + debug!( "Detecting connection mode for {} (timeout: {}s, max_attempts: {})", config.url, config.health_check_timeout_secs, config.max_connection_attempts ); @@ -217,11 +217,11 @@ impl StepExecutor for DetectConnectionModeStep { let connection_mode = match (http_result, grpc_result) { (Ok(_), _) => { - info!("{} detected as HTTP", config.url); + debug!("{} detected as HTTP", config.url); ConnectionMode::Http } (_, Ok(_)) => { - info!("{} detected as gRPC", config.url); + debug!("{} detected as gRPC", config.url); ConnectionMode::Grpc { port: None } } (Err(http_err), Err(grpc_err)) => { @@ -259,7 +259,7 @@ impl StepExecutor for DiscoverMetadataStep { .get("connection_mode") .ok_or_else(|| WorkflowError::ContextValueNotFound("connection_mode".to_string()))?; - info!( + debug!( "Discovering metadata for {} ({:?})", config.url, *connection_mode ); @@ -275,7 +275,7 @@ impl StepExecutor for DiscoverMetadataStep { HashMap::new() }); - info!( + debug!( "Discovered {} metadata labels for {}", discovered_labels.len(), config.url @@ -304,14 +304,14 @@ impl StepExecutor for DiscoverDPInfoStep { // Skip DP discovery if not DP-aware if !config.dp_aware { - info!( + debug!( "Worker {} is not DP-aware, skipping DP discovery", config.url ); return Ok(StepResult::Success); } - info!("Discovering DP info for {} (DP-aware)", config.url); + debug!("Discovering DP info for {} (DP-aware)", config.url); // Get DP info from worker let dp_info = WorkerManager::get_dp_info(&config.url, config.api_key.as_deref()) @@ -321,7 +321,7 @@ impl StepExecutor for DiscoverDPInfoStep { message: format!("Failed to get DP info: {}", e), })?; - info!( + debug!( "Discovered DP size {} for {} (model: {})", dp_info.dp_size, config.url, dp_info.model_id ); @@ -406,12 +406,12 @@ impl StepExecutor for CreateWorkerStep { .cloned(); if let Some(model_id) = derived_model_id { - info!("Derived model_id from metadata: {}", model_id); + debug!("Derived model_id from metadata: {}", model_id); final_labels.insert("model_id".to_string(), model_id); } } - info!( + debug!( "Creating worker {} with {} discovered + {} config = {} final labels", config.url, discovered_labels.len(), @@ -471,7 +471,7 @@ impl StepExecutor for CreateWorkerStep { }; if normalized_url != config.url { - info!( + debug!( "Normalized worker URL: {} -> {} ({:?})", config.url, normalized_url, @@ -486,7 +486,7 @@ impl StepExecutor for CreateWorkerStep { .get("dp_info") .ok_or_else(|| WorkflowError::ContextValueNotFound("dp_info".to_string()))?; - info!( + debug!( "Creating {} DP-aware workers for {} (dp_size: {})", dp_info.dp_size, config.url, dp_info.dp_size ); @@ -512,7 +512,7 @@ impl StepExecutor for CreateWorkerStep { worker.set_healthy(false); workers.push(worker); - info!( + debug!( "Created DP-aware worker {}@{}/{} ({:?})", config.url, rank, @@ -545,7 +545,7 @@ impl StepExecutor for CreateWorkerStep { let worker = Arc::new(builder.build()) as Arc; worker.set_healthy(false); - info!( + debug!( "Created worker object for {} ({:?}) with {} labels", config.url, connection_mode.as_ref(), @@ -589,7 +589,7 @@ impl StepExecutor for RegisterWorkerStep { for worker in workers.iter() { let worker_id = app_context.worker_registry.register(Arc::clone(worker)); worker_ids.push(worker_id.clone()); - info!( + debug!( "Registered DP-aware worker {} with ID {:?}", config.url, worker_id ); @@ -607,7 +607,7 @@ impl StepExecutor for RegisterWorkerStep { .worker_registry .register(Arc::clone(worker.as_ref())); - info!("Registered worker {} with ID {:?}", config.url, worker_id); + debug!("Registered worker {} with ID {:?}", config.url, worker_id); context.set("worker_id", worker_id); Ok(StepResult::Success) @@ -664,7 +664,7 @@ impl StepExecutor for UpdatePoliciesStep { } } - info!( + debug!( "Updated policies for {} DP-aware workers {} (model: {})", workers.len(), config.url, @@ -693,7 +693,7 @@ impl StepExecutor for UpdatePoliciesStep { } } - info!( + debug!( "Updated policies for worker {} (model: {})", config.url, model_id ); @@ -728,7 +728,7 @@ impl StepExecutor for ActivateWorkerStep { worker.set_healthy(true); } - info!( + debug!( "Activated {} DP-aware workers {} (marked as healthy)", workers.len(), config.url