[Revision] Replace enable_flashinfer_mla argument with attention_backend (#5052)

2025-04-05 01:23:02 -07:00
parent ca8d02abd5
commit efbae697b3
9 changed files with 92 additions and 82 deletions
--- a/python/sglang/srt/layers/attention/flashinfer_mla_backend.py
+++ b/python/sglang/srt/layers/attention/flashinfer_mla_backend.py
@@ -71,8 +71,6 @@ class FlashInferMLAAttnBackend(AttentionBackend):
        self.device = model_runner.device
        self.skip_prefill = skip_prefill

-        global_config.enable_flashinfer_mla = True
-
        # Allocate buffers
        global global_workspace_buffer
        if global_workspace_buffer is None: