Revert "Replace enable_flashinfer_mla argument with attention_backend" (#5048)

2025-04-03 13:30:56 -07:00
parent b8b6008f47
commit 74885a848b
8 changed files with 20 additions and 21 deletions
--- a/python/sglang/srt/models/deepseek_v2.py
+++ b/python/sglang/srt/models/deepseek_v2.py
@@ -684,6 +684,7 @@ class DeepseekV2AttentionMLA(nn.Module):
        self.w_vc = None
        self.w_scale = None

+        self.enable_flashinfer_mla = global_server_args_dict["enable_flashinfer_mla"]
        self.flashinfer_mla_disable_ragged = global_server_args_dict[
            "flashinfer_mla_disable_ragged"
        ]
@@ -691,7 +692,7 @@ class DeepseekV2AttentionMLA(nn.Module):
        self.rocm_fused_decode_mla = os.getenv("SGLANG_ROCM_FUSED_DECODE_MLA") == "1"

    def no_absorb(self, forward_batch: ForwardBatch) -> bool:
-        if self.attention_backend == "flashinfer_mla":
+        if self.enable_flashinfer_mla:
            # Flashinfer MLA: Do not absorb when enabling ragged prefill
            return (
                not self.flashinfer_mla_disable_ragged