Revert "Replace enable_flashinfer_mla argument with attention_backend" (#5048)

2025-04-03 13:30:56 -07:00
parent b8b6008f47
commit 74885a848b
8 changed files with 20 additions and 21 deletions
--- a/python/sglang/srt/layers/attention/flashinfer_mla_backend.py
+++ b/python/sglang/srt/layers/attention/flashinfer_mla_backend.py
@@ -71,6 +71,8 @@ class FlashInferMLAAttnBackend(AttentionBackend):
        self.device = model_runner.device
        self.skip_prefill = skip_prefill

+        global_config.enable_flashinfer_mla = True
+
        # Allocate buffers
        global global_workspace_buffer
        if global_workspace_buffer is None: