From 86a2c473b775f9051f460b4107a34c5e662fd1a3 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Thu, 24 Oct 2024 21:26:05 -0700 Subject: [PATCH] [Fix] Fix seq_lens_sum for cuda graph runner in padded cases (#1789) --- python/sglang/srt/model_executor/cuda_graph_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/sglang/srt/model_executor/cuda_graph_runner.py b/python/sglang/srt/model_executor/cuda_graph_runner.py index 22ed6cc2b..d9a9861cc 100644 --- a/python/sglang/srt/model_executor/cuda_graph_runner.py +++ b/python/sglang/srt/model_executor/cuda_graph_runner.py @@ -307,7 +307,7 @@ class CudaGraphRunner: bs, self.req_pool_indices, self.seq_lens, - forward_batch.seq_lens_sum, + forward_batch.seq_lens_sum + (bs - raw_bs), self.encoder_lens, )