Use only one GPU for MLA CI tests (#2858)

This commit is contained in:
Lianmin Zheng
2025-01-13 03:55:33 -08:00
committed by GitHub
parent 4536d72446
commit 67008f4b32
4 changed files with 39 additions and 8 deletions

View File

@@ -21,8 +21,6 @@ class TestMLA(unittest.TestCase):
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=[
"--tp",
"2",
"--trust-remote-code",
"--kv-cache-dtype",
"fp8_e5m2",