From 6317c5c61f39ab293204e7c88f86bc0f683d24d1 Mon Sep 17 00:00:00 2001 From: HAI Date: Mon, 19 May 2025 21:16:20 -0700 Subject: [PATCH] Address performance regression: disable multiple streams on ROCm (#6412) --- python/sglang/srt/models/deepseek_v2.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py index 9b44186be..0b6642a23 100644 --- a/python/sglang/srt/models/deepseek_v2.py +++ b/python/sglang/srt/models/deepseek_v2.py @@ -1315,7 +1315,8 @@ class DeepseekV2Model(nn.Module): config.hidden_size, enable_tp=not global_server_args_dict["enable_dp_attention"], ) - self.alt_stream = torch.cuda.Stream() + # TODO(haishaw): multi-stream performance on ROCm + self.alt_stream = None if _is_hip else torch.cuda.Stream() self.layers = nn.ModuleList( [ DeepseekV2DecoderLayer(