Clean up server_args.py to have a dedicated function for model specific adjustments (#8983)

This commit is contained in:
Lianmin Zheng
2025-08-08 19:56:50 -07:00
committed by GitHub
parent 23f2afb2ce
commit 706bd69cc5
24 changed files with 201 additions and 340 deletions

View File

@@ -25,7 +25,7 @@ class TestFlashinferMLA(CustomTestCase):
[
"--enable-torch-compile",
"--cuda-graph-max-bs",
"2",
"4",
"--attention-backend",
"flashinfer",
]
@@ -68,7 +68,6 @@ class TestFlashinferMLAMTP(CustomTestCase):
[
"--cuda-graph-max-bs",
"4",
"--disable-radix",
"--enable-torch-compile",
"--torch-compile-max-bs",
"1",