diff --git a/python/sglang/srt/model_executor/cuda_graph_runner.py b/python/sglang/srt/model_executor/cuda_graph_runner.py index abf95d4d0..2effec9c0 100644 --- a/python/sglang/srt/model_executor/cuda_graph_runner.py +++ b/python/sglang/srt/model_executor/cuda_graph_runner.py @@ -54,7 +54,7 @@ from sglang.srt.utils import ( empty_context, get_available_gpu_memory, get_device_memory_capacity, - rank0_log, + log_info_on_rank0, require_attn_tp_gather, require_gathered_buffer, require_mlp_sync, @@ -267,7 +267,7 @@ class CudaGraphRunner: # Batch sizes to capture self.capture_bs, self.compile_bs = get_batch_sizes_to_capture(model_runner) - rank0_log(f"Capture cuda graph bs {self.capture_bs}") + log_info_on_rank0(logger, f"Capture cuda graph bs {self.capture_bs}") self.capture_forward_mode = ForwardMode.DECODE self.capture_hidden_mode = CaptureHiddenMode.NULL self.num_tokens_per_bs = 1 diff --git a/python/sglang/srt/utils.py b/python/sglang/srt/utils.py index cb5e4cd1e..d23c57cc9 100644 --- a/python/sglang/srt/utils.py +++ b/python/sglang/srt/utils.py @@ -2002,13 +2002,6 @@ def configure_ipv6(dist_init_addr): return port, host -def rank0_log(msg: str): - from sglang.srt.distributed import get_tensor_model_parallel_rank - - if get_tensor_model_parallel_rank() == 0: - logger.info(msg) - - def launch_dummy_health_check_server(host, port, enable_metrics): import asyncio