Use only one GPU for MLA CI tests (#2858)

This commit is contained in:
Lianmin Zheng
2025-01-13 03:55:33 -08:00
committed by GitHub
parent 4536d72446
commit 67008f4b32
4 changed files with 39 additions and 8 deletions

View File

@@ -22,6 +22,8 @@ suites = {
"test_json_constrained.py",
"test_large_max_new_tokens.py",
"test_metrics.py",
"test_mla.py",
"test_mla_fp8.py",
"test_no_chunked_prefill.py",
"test_no_overlap_scheduler.py",
"test_openai_server.py",