Support FlashMLA backend (#4472)

Co-authored-by: yinfan98 <1106310035@qq.com>
This commit is contained in:
lukec
2025-03-17 00:07:06 +08:00
committed by GitHub
parent 1b859295f4
commit a53fe428f9
6 changed files with 209 additions and 1 deletions

View File

@@ -182,6 +182,12 @@ def main(args, server_args):
"--enable-flashinfer-mla",
]
)
if server_args.enable_flashmla:
other_args.extend(
[
"--enable-flashmla",
]
)
if server_args.quantization:
other_args.extend(