From 8e5a6d3441d53e9ce68f9d70d0d5c9732a2250b6 Mon Sep 17 00:00:00 2001 From: pengcuo Date: Tue, 29 Apr 2025 16:03:13 +0800 Subject: [PATCH] [Fix] Fix a bug for flashmla to run R1 model (#5875) Co-authored-by: pengcuo --- python/sglang/srt/layers/attention/flashmla_backend.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/sglang/srt/layers/attention/flashmla_backend.py b/python/sglang/srt/layers/attention/flashmla_backend.py index 1513c1c71..0823239a7 100644 --- a/python/sglang/srt/layers/attention/flashmla_backend.py +++ b/python/sglang/srt/layers/attention/flashmla_backend.py @@ -241,6 +241,9 @@ class FlashMLABackend(FlashInferMLAAttnBackend): seq_lens_cpu, ) + def get_cuda_graph_seq_len_fill_value(self): + return 1024 + def forward_decode( self, q: torch.Tensor,