适配qwen3-next

2025-10-30 18:03:07 +08:00
parent 8fc552638f
commit 477fddf28d
1 changed files with 1 additions and 1 deletions
--- a/python/sglang/srt/models/qwen3_next.py
+++ b/python/sglang/srt/models/qwen3_next.py
@@ -396,7 +396,7 @@ class Qwen3GatedDeltaNet(nn.Module):
    def _forward_input_proj(self, hidden_states: torch.Tensor):
        DUAL_STREAM_TOKEN_THRESHOLD = 1024 if not _is_npu else 0
        seq_len, _ = hidden_states.shape
-        if seq_len < DUAL_STREAM_TOKEN_THRESHOLD:
+        if seq_len < DUAL_STREAM_TOKEN_THRESHOLD and self.alt_stream is not None:
            current_stream = torch.cuda.current_stream()
            self.alt_stream.wait_stream(current_stream)
            projected_states_qkvz, _ = self.in_proj_qkvz(hidden_states)