From 477fddf28d6617b318ea3c9541587b14b7b6f980 Mon Sep 17 00:00:00 2001 From: maxiao Date: Thu, 30 Oct 2025 18:03:07 +0800 Subject: [PATCH] =?UTF-8?q?=E9=80=82=E9=85=8Dqwen3-next?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- python/sglang/srt/models/qwen3_next.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/sglang/srt/models/qwen3_next.py b/python/sglang/srt/models/qwen3_next.py index 62cf15af7..03a31b186 100644 --- a/python/sglang/srt/models/qwen3_next.py +++ b/python/sglang/srt/models/qwen3_next.py @@ -396,7 +396,7 @@ class Qwen3GatedDeltaNet(nn.Module): def _forward_input_proj(self, hidden_states: torch.Tensor): DUAL_STREAM_TOKEN_THRESHOLD = 1024 if not _is_npu else 0 seq_len, _ = hidden_states.shape - if seq_len < DUAL_STREAM_TOKEN_THRESHOLD: + if seq_len < DUAL_STREAM_TOKEN_THRESHOLD and self.alt_stream is not None: current_stream = torch.cuda.current_stream() self.alt_stream.wait_stream(current_stream) projected_states_qkvz, _ = self.in_proj_qkvz(hidden_states)