Improve streaming, log_level, memory report, weight loading, and benchmark script (#7632)

Co-authored-by: Kan Wu <wukanustc@gmail.com>
This commit is contained in:
Lianmin Zheng
2025-06-29 23:16:19 -07:00
committed by GitHub
parent c5131f7a2f
commit 22352d47a9
24 changed files with 626 additions and 160 deletions

View File

@@ -173,10 +173,11 @@ suites = {
# TestFile("test_deepep_intranode.py", 50),
# TestFile("test_deepep_low_latency.py", 50),
# TestFile("test_moe_deepep_eval_accuracy_large.py", 250),
# Disabled because it hangs on the CI.
# TestFile("test_moe_ep.py", 181),
TestFile("test_disaggregation.py", 270),
TestFile("test_disaggregation_different_tp.py", 155),
TestFile("test_full_deepseek_v3.py", 463),
TestFile("test_moe_ep.py", 181),
],
"per-commit-8-gpu-amd": [
TestFile("test_full_deepseek_v3.py", 250),

View File

@@ -178,7 +178,7 @@ class TestVisionChunkedPrefill(CustomTestCase):
print(output_chunked)
print("output without chunked prefill:")
print(output_no_chunked)
assert output_chunked == output_no_chunked
self.assertEqual(output_chunked, output_no_chunked)
def test_chunked_prefill(self):
self._test_chunked_prefill(batches=[False, True], num_frames=[1, [2, 6, 8, 10]])