From 0089c4bc96013806162aa47c929f597d5327d662 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Thu, 24 Oct 2024 04:16:59 -0700 Subject: [PATCH] [Fix] Fix NaN issues by fixing the cuda graph padding values for flashinfer (#1779) --- python/sglang/srt/model_executor/cuda_graph_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/sglang/srt/model_executor/cuda_graph_runner.py b/python/sglang/srt/model_executor/cuda_graph_runner.py index 3ef9b35ed..23090688d 100644 --- a/python/sglang/srt/model_executor/cuda_graph_runner.py +++ b/python/sglang/srt/model_executor/cuda_graph_runner.py @@ -290,7 +290,7 @@ class CudaGraphRunner: index = bisect.bisect_left(self.capture_bs, raw_bs) bs = self.capture_bs[index] if bs != raw_bs: - self.seq_lens.fill_(self.seq_len_fill_value) + self.seq_lens.fill_(1) self.out_cache_loc.zero_() # Common inputs