Clean up server_args.py to have a dedicated function for model specific adjustments (#8983)

2025-08-08 19:56:50 -07:00
parent 23f2afb2ce
commit 706bd69cc5
24 changed files with 201 additions and 340 deletions
--- a/test/srt/test_mla_flashinfer.py
+++ b/test/srt/test_mla_flashinfer.py
@@ -25,7 +25,7 @@ class TestFlashinferMLA(CustomTestCase):
                [
                    "--enable-torch-compile",
                    "--cuda-graph-max-bs",
-                    "2",
+                    "4",
                    "--attention-backend",
                    "flashinfer",
                ]
@@ -68,7 +68,6 @@ class TestFlashinferMLAMTP(CustomTestCase):
                [
                    "--cuda-graph-max-bs",
                    "4",
-                    "--disable-radix",
                    "--enable-torch-compile",
                    "--torch-compile-max-bs",
                    "1",