feat: support flashinfer mla attention for deepseek v3 (#3550)

This commit is contained in:
Yineng Zhang
2025-02-14 08:50:14 +08:00
committed by GitHub
parent 368de3661e
commit 70f894b810
12 changed files with 299 additions and 135 deletions

View File

@@ -168,6 +168,8 @@ class ServerArgs:
tool_call_parser: str = None
enable_hierarchical_cache: bool = False
enable_flashinfer_mla: bool = False
def __post_init__(self):
# Set missing default values
if self.tokenizer_path is None:
@@ -693,6 +695,11 @@ class ServerArgs:
default=ServerArgs.grammar_backend,
help="Choose the backend for grammar-guided decoding.",
)
parser.add_argument(
"--enable-flashinfer-mla",
action="store_true",
help="Enable FlashInfer MLA optimization",
)
# Speculative decoding
parser.add_argument(