From 477fddf28d6617b318ea3c9541587b14b7b6f980 Mon Sep 17 00:00:00 2001
From: maxiao <maxiao1@sugon.com>
Date: Thu, 30 Oct 2025 18:03:07 +0800
Subject: [PATCH] =?UTF-8?q?=E9=80=82=E9=85=8Dqwen3-next?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 python/sglang/srt/models/qwen3_next.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/sglang/srt/models/qwen3_next.py b/python/sglang/srt/models/qwen3_next.py
index 62cf15af7..03a31b186 100644
--- a/python/sglang/srt/models/qwen3_next.py
+++ b/python/sglang/srt/models/qwen3_next.py
@@ -396,7 +396,7 @@ class Qwen3GatedDeltaNet(nn.Module):
     def _forward_input_proj(self, hidden_states: torch.Tensor):
         DUAL_STREAM_TOKEN_THRESHOLD = 1024 if not _is_npu else 0
         seq_len, _ = hidden_states.shape
-        if seq_len < DUAL_STREAM_TOKEN_THRESHOLD:
+        if seq_len < DUAL_STREAM_TOKEN_THRESHOLD and self.alt_stream is not None:
             current_stream = torch.cuda.current_stream()
             self.alt_stream.wait_stream(current_stream)
             projected_states_qkvz, _ = self.in_proj_qkvz(hidden_states)