feat: support flashinfer mla attention for deepseek v3 (#3550)

2025-02-14 08:50:14 +08:00
parent 368de3661e
commit 70f894b810
12 changed files with 299 additions and 135 deletions
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -168,6 +168,8 @@ class ServerArgs:
    tool_call_parser: str = None
    enable_hierarchical_cache: bool = False

+    enable_flashinfer_mla: bool = False
+
    def __post_init__(self):
        # Set missing default values
        if self.tokenizer_path is None:
@@ -693,6 +695,11 @@ class ServerArgs:
            default=ServerArgs.grammar_backend,
            help="Choose the backend for grammar-guided decoding.",
        )
+        parser.add_argument(
+            "--enable-flashinfer-mla",
+            action="store_true",
+            help="Enable FlashInfer MLA optimization",
+        )

        # Speculative decoding
        parser.add_argument(