Revert "Add fast decode plan for flashinfer mla" (#4008)

2025-03-02 19:29:10 -08:00
parent fa56106731
commit 9e1014cf99
9 changed files with 52 additions and 156 deletions
--- a/python/sglang/srt/layers/attention/flashinfer_backend.py
+++ b/python/sglang/srt/layers/attention/flashinfer_backend.py
@@ -269,10 +269,9 @@ class FlashInferAttnBackend(AttentionBackend):
        num_tokens: int,
        req_pool_indices: torch.Tensor,
        seq_lens: torch.Tensor,
-        forward_mode: ForwardMode,
        encoder_lens: Optional[torch.Tensor],
+        forward_mode: ForwardMode,
        spec_info: Optional[SpecInfo],
-        **kwargs,
    ):
        if forward_mode.is_decode_or_idle():
            decode_wrappers = []
@@ -340,10 +339,9 @@ class FlashInferAttnBackend(AttentionBackend):
        req_pool_indices: torch.Tensor,
        seq_lens: torch.Tensor,
        seq_lens_sum: int,
-        forward_mode: ForwardMode,
        encoder_lens: Optional[torch.Tensor],
+        forward_mode: ForwardMode,
        spec_info: Optional[SpecInfo],
-        **kwargs,
    ):
        if forward_mode.is_decode_or_idle():
            self.indices_updater_decode.update(