Improve streaming, log_level, memory report, weight loading, and benchmark script (#7632)
Co-authored-by: Kan Wu <wukanustc@gmail.com>
This commit is contained in:
@@ -173,10 +173,11 @@ suites = {
|
||||
# TestFile("test_deepep_intranode.py", 50),
|
||||
# TestFile("test_deepep_low_latency.py", 50),
|
||||
# TestFile("test_moe_deepep_eval_accuracy_large.py", 250),
|
||||
# Disabled because it hangs on the CI.
|
||||
# TestFile("test_moe_ep.py", 181),
|
||||
TestFile("test_disaggregation.py", 270),
|
||||
TestFile("test_disaggregation_different_tp.py", 155),
|
||||
TestFile("test_full_deepseek_v3.py", 463),
|
||||
TestFile("test_moe_ep.py", 181),
|
||||
],
|
||||
"per-commit-8-gpu-amd": [
|
||||
TestFile("test_full_deepseek_v3.py", 250),
|
||||
|
||||
@@ -178,7 +178,7 @@ class TestVisionChunkedPrefill(CustomTestCase):
|
||||
print(output_chunked)
|
||||
print("output without chunked prefill:")
|
||||
print(output_no_chunked)
|
||||
assert output_chunked == output_no_chunked
|
||||
self.assertEqual(output_chunked, output_no_chunked)
|
||||
|
||||
def test_chunked_prefill(self):
|
||||
self._test_chunked_prefill(batches=[False, True], num_frames=[1, [2, 6, 8, 10]])
|
||||
|
||||
Reference in New Issue
Block a user