Clean up server_args.py to have a dedicated function for model specific adjustments (#8983)
This commit is contained in:
@@ -25,7 +25,7 @@ class TestFlashinferMLA(CustomTestCase):
|
||||
[
|
||||
"--enable-torch-compile",
|
||||
"--cuda-graph-max-bs",
|
||||
"2",
|
||||
"4",
|
||||
"--attention-backend",
|
||||
"flashinfer",
|
||||
]
|
||||
@@ -68,7 +68,6 @@ class TestFlashinferMLAMTP(CustomTestCase):
|
||||
[
|
||||
"--cuda-graph-max-bs",
|
||||
"4",
|
||||
"--disable-radix",
|
||||
"--enable-torch-compile",
|
||||
"--torch-compile-max-bs",
|
||||
"1",
|
||||
|
||||
Reference in New Issue
Block a user