Support FlashMLA backend (#4472)

Co-authored-by: yinfan98 <1106310035@qq.com>
2025-03-17 00:07:06 +08:00
parent 1b859295f4
commit a53fe428f9
6 changed files with 209 additions and 1 deletions
--- a/scripts/playground/bench_speculative.py
+++ b/scripts/playground/bench_speculative.py
@@ -182,6 +182,12 @@ def main(args, server_args):
                    "--enable-flashinfer-mla",
                ]
            )
+        if server_args.enable_flashmla:
+            other_args.extend(
+                [
+                    "--enable-flashmla",
+                ]
+            )

        if server_args.quantization:
            other_args.extend(