From dfce9269216a170a617437190b7d3049ee388b94 Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Sat, 15 Feb 2025 23:11:28 +0800 Subject: [PATCH] fix high qps crash when enable mtp (#3592) Co-authored-by: ispobock --- python/sglang/srt/model_executor/forward_batch_info.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/sglang/srt/model_executor/forward_batch_info.py b/python/sglang/srt/model_executor/forward_batch_info.py index b36dedc9f..cdd03bec4 100644 --- a/python/sglang/srt/model_executor/forward_batch_info.py +++ b/python/sglang/srt/model_executor/forward_batch_info.py @@ -263,7 +263,10 @@ class ForwardBatch: ret.extend_prefix_lens = torch.tensor( batch.extend_prefix_lens, dtype=torch.int32 ).to(device, non_blocking=True) - if model_runner.server_args.attention_backend != "torch_native": + if ( + model_runner.server_args.attention_backend != "torch_native" + and model_runner.server_args.speculative_algorithm != "NEXTN" + ): ret.extend_num_tokens = batch.extend_num_tokens positions, ret.extend_start_loc = compute_position_triton( ret.extend_prefix_lens, ret.extend_seq_lens, ret.extend_num_tokens