Revert "Replace enable_flashinfer_mla argument with attention_backend" (#5048)

2025-04-03 13:30:56 -07:00
parent b8b6008f47
commit 74885a848b
8 changed files with 20 additions and 21 deletions
--- a/test/srt/test_mla_flashinfer.py
+++ b/test/srt/test_mla_flashinfer.py
@@ -26,8 +26,7 @@ class TestFlashinferMLA(CustomTestCase):
                    "--enable-torch-compile",
                    "--cuda-graph-max-bs",
                    "2",
-                    "--attention-backend",
-                    "flashinfer",
+                    "--enable-flashinfer-mla",
                ]
            )
        cls.process = popen_launch_server(
@@ -70,8 +69,8 @@ class TestFlashinferMLANoRagged(CustomTestCase):
                    "--disable-cuda-graph",
                    "--cuda-graph-max-bs",
                    "4",
-                    "--attention-backend",
-                    "flashinfer",
+                    "--enable-flashinfer-mla",
+                    "--flashinfer-mla-disable-ragged",
                ]
            )
        cls.process = popen_launch_server(
@@ -126,8 +125,7 @@ class TestFlashinferMLAMTP(CustomTestCase):
                    "1",
                    "--speculative-num-draft-tokens",
                    "4",
-                    "--attention-backend",
-                    "flashinfer",
+                    "--enable-flashinfer-mla",
                ]
            )
        cls.process = popen_launch_server(