From 3d7cdb2ebd89413ecfec4dad27456f994f1ee7ee Mon Sep 17 00:00:00 2001
From: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com>
Date: Fri, 27 Jun 2025 06:40:54 +0800
Subject: [PATCH] Fix MTP error when enabling two-batch overlap  (#7569)

---
 python/sglang/srt/model_executor/cuda_graph_runner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/sglang/srt/model_executor/cuda_graph_runner.py b/python/sglang/srt/model_executor/cuda_graph_runner.py
index 3e1f4e836..48f62d28b 100644
--- a/python/sglang/srt/model_executor/cuda_graph_runner.py
+++ b/python/sglang/srt/model_executor/cuda_graph_runner.py
@@ -168,7 +168,7 @@ def get_batch_sizes_to_capture(model_runner: ModelRunner):
         capture_bs += [model_runner.req_to_token_pool.size]
 
     if server_args.enable_two_batch_overlap:
-        capture_bs = [bs for bs in capture_bs if bs >= 2]
+        capture_bs = [bs for bs in capture_bs if bs % 2 == 0]
 
     if server_args.cuda_graph_max_bs:
         capture_bs = [bs for bs in capture_bs if bs <= server_args.cuda_graph_max_bs]