From 5d1344016253992a576c0521d5964acc63067373 Mon Sep 17 00:00:00 2001 From: Zhaoyang Hao <77828610+Muuuchen@users.noreply.github.com> Date: Tue, 15 Apr 2025 16:42:27 +0800 Subject: [PATCH] [FIX] Fix concatenation error in capture_bs when open --disable-cuda-graph-padding and without MTP (#5412) --- python/sglang/srt/model_executor/cuda_graph_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/sglang/srt/model_executor/cuda_graph_runner.py b/python/sglang/srt/model_executor/cuda_graph_runner.py index 75f085ae4..bc2c4abb0 100644 --- a/python/sglang/srt/model_executor/cuda_graph_runner.py +++ b/python/sglang/srt/model_executor/cuda_graph_runner.py @@ -116,7 +116,7 @@ def get_batch_sizes_to_capture(model_runner: ModelRunner): if capture_bs is None: if server_args.speculative_algorithm is None: if server_args.disable_cuda_graph_padding: - capture_bs = list(range(1, 33)) + range(40, 161, 16) + capture_bs = list(range(1, 33)) + list(range(40, 161, 16)) else: capture_bs = [1, 2, 4, 8] + list(range(16, 161, 8)) else: