diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index 75afecbed..de1d4ee68 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -191,10 +191,12 @@ class ServerArgs: self.dp_size = self.tp_size self.chunked_prefill_size = self.chunked_prefill_size // 2 self.cuda_graph_max_bs = min(self.cuda_graph_max_bs, 96) + self.schedule_conservativeness = self.schedule_conservativeness * 0.3 self.enable_overlap_schedule = False logger.warning( f"DP attention is enabled. The chunked prefill size is adjusted to {self.chunked_prefill_size} to avoid MoE kernel issues. " f"The CUDA graph max batch size is adjusted to {self.cuda_graph_max_bs}. " + f"The schedule conservativeness is adjusted to {self.schedule_conservativeness}. " "Data parallel size is adjusted to be the same as tensor parallel size." )