[Feature]Support ragged prefill in flashinfer mla backend (#3967)

Co-authored-by: Yineng Zhang <me@zhyncs.com>
Co-authored-by: pankajroark <pankajroark@users.noreply.github.com>
This commit is contained in:
Baizhou Zhang
2025-02-28 18:13:56 -08:00
committed by GitHub
parent f3b99f73b3
commit 90a4b7d98a
9 changed files with 308 additions and 407 deletions

View File

@@ -167,6 +167,7 @@ class ServerArgs:
tool_call_parser: str = None
enable_hierarchical_cache: bool = False
enable_flashinfer_mla: bool = False
flashinfer_mla_disable_ragged: bool = False
def __post_init__(self):
# Set missing default values
@@ -713,6 +714,11 @@ class ServerArgs:
action="store_true",
help="Enable FlashInfer MLA optimization",
)
parser.add_argument(
"--flashinfer-mla-disable-ragged",
action="store_true",
help="Not using ragged prefill wrapper when running flashinfer mla",
)
# Speculative decoding
parser.add_argument(