diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py index 12db1d055..923b4d02b 100644 --- a/python/sglang/srt/model_executor/model_runner.py +++ b/python/sglang/srt/model_executor/model_runner.py @@ -561,7 +561,7 @@ class ModelRunner: # Check memory for tensor parallelism local_gpu_memory = get_available_gpu_memory(self.device, self.gpu_id) - if self.tp_size > 1: + if self.tp_size > 1 and not self.is_draft_worker: if min_per_gpu_memory < local_gpu_memory * 0.9: if get_bool_env_var("SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK"): logger.warning(