[Feature]Support ragged prefill in flashinfer mla backend (#3967)
Co-authored-by: Yineng Zhang <me@zhyncs.com> Co-authored-by: pankajroark <pankajroark@users.noreply.github.com>
This commit is contained in:
@@ -167,6 +167,7 @@ class ServerArgs:
|
||||
tool_call_parser: str = None
|
||||
enable_hierarchical_cache: bool = False
|
||||
enable_flashinfer_mla: bool = False
|
||||
flashinfer_mla_disable_ragged: bool = False
|
||||
|
||||
def __post_init__(self):
|
||||
# Set missing default values
|
||||
@@ -713,6 +714,11 @@ class ServerArgs:
|
||||
action="store_true",
|
||||
help="Enable FlashInfer MLA optimization",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--flashinfer-mla-disable-ragged",
|
||||
action="store_true",
|
||||
help="Not using ragged prefill wrapper when running flashinfer mla",
|
||||
)
|
||||
|
||||
# Speculative decoding
|
||||
parser.add_argument(
|
||||
|
||||
Reference in New Issue
Block a user