Replace enable_flashinfer_mla argument with attention_backend (#5005)

2025-04-03 02:53:58 -07:00
parent 772d2a191d
commit e8999b13b7
8 changed files with 21 additions and 20 deletions
--- a/python/sglang/srt/layers/attention/flashinfer_mla_backend.py
+++ b/python/sglang/srt/layers/attention/flashinfer_mla_backend.py
@@ -71,8 +71,6 @@ class FlashInferMLAAttnBackend(AttentionBackend):
        self.device = model_runner.device
        self.skip_prefill = skip_prefill

-        global_config.enable_flashinfer_mla = True
-
        # Allocate buffers
        global global_workspace_buffer
        if global_workspace_buffer is None: