From dfce9269216a170a617437190b7d3049ee388b94 Mon Sep 17 00:00:00 2001
From: Yineng Zhang <me@zhyncs.com>
Date: Sat, 15 Feb 2025 23:11:28 +0800
Subject: [PATCH] fix high qps crash when enable mtp (#3592)

Co-authored-by: ispobock <ispobaoke@hotmail.com>
---
 python/sglang/srt/model_executor/forward_batch_info.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/python/sglang/srt/model_executor/forward_batch_info.py b/python/sglang/srt/model_executor/forward_batch_info.py
index b36dedc9f..cdd03bec4 100644
--- a/python/sglang/srt/model_executor/forward_batch_info.py
+++ b/python/sglang/srt/model_executor/forward_batch_info.py
@@ -263,7 +263,10 @@ class ForwardBatch:
             ret.extend_prefix_lens = torch.tensor(
                 batch.extend_prefix_lens, dtype=torch.int32
             ).to(device, non_blocking=True)
-            if model_runner.server_args.attention_backend != "torch_native":
+            if (
+                model_runner.server_args.attention_backend != "torch_native"
+                and model_runner.server_args.speculative_algorithm != "NEXTN"
+            ):
                 ret.extend_num_tokens = batch.extend_num_tokens
                 positions, ret.extend_start_loc = compute_position_triton(
                     ret.extend_prefix_lens, ret.extend_seq_lens, ret.extend_num_tokens