[Fix] Fix a bug for flashmla to run R1 model (#5875)

Co-authored-by: pengcuo <dgpengcuo@gmail.com>
2025-04-29 16:03:13 +08:00
parent 8465f035d1
commit 8e5a6d3441
1 changed files with 3 additions and 0 deletions
--- a/python/sglang/srt/layers/attention/flashmla_backend.py
+++ b/python/sglang/srt/layers/attention/flashmla_backend.py
@@ -241,6 +241,9 @@ class FlashMLABackend(FlashInferMLAAttnBackend):
                seq_lens_cpu,
            )

+    def get_cuda_graph_seq_len_fill_value(self):
+        return 1024
+
    def forward_decode(
        self,
        q: torch.Tensor,