Replace enable_flashinfer_mla argument with attention_backend (#5005)

2025-04-03 02:53:58 -07:00
parent 772d2a191d
commit e8999b13b7
8 changed files with 21 additions and 20 deletions
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -179,7 +179,7 @@ class ServerArgs:
    tool_call_parser: Optional[str] = None
    enable_hierarchical_cache: bool = False
    hicache_ratio: float = 2.0
-    enable_flashinfer_mla: bool = False
+    enable_flashinfer_mla: bool = False  # TODO: remove this argument
    enable_flashmla: bool = False
    flashinfer_mla_disable_ragged: bool = False
    warmups: Optional[str] = None
@@ -836,7 +836,7 @@ class ServerArgs:
        parser.add_argument(
            "--enable-flashinfer-mla",
            action="store_true",
-            help="Enable FlashInfer MLA optimization",
+            help="Enable FlashInfer MLA optimization. This argument will be deprecated soon! Please use '--attention-backend flashinfer' instead for switching on flashfiner mla!",
        )
        parser.add_argument(
            "--enable-flashmla",