diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index 00adfa318..4746d97bc 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -146,7 +146,6 @@ suites = { TestFile("test_data_parallelism.py", 73), TestFile("test_dp_attention.py", 137), TestFile("test_mla_tp.py", 170), - TestFile("test_moe_ep.py", 181), TestFile("test_patch_torch.py", 19), TestFile("test_update_weights_from_distributed.py", 103), TestFile("test_release_memory_occupation.py", 44), @@ -175,6 +174,7 @@ suites = { TestFile("test_disaggregation.py", 270), TestFile("test_disaggregation_different_tp.py", 155), TestFile("test_full_deepseek_v3.py", 463), + TestFile("test_moe_ep.py", 181), ], "per-commit-8-gpu-amd": [ TestFile("test_full_deepseek_v3.py", 250), diff --git a/test/srt/test_bench_serving.py b/test/srt/test_bench_serving.py index 3626ed81e..19936c574 100644 --- a/test/srt/test_bench_serving.py +++ b/test/srt/test_bench_serving.py @@ -194,7 +194,7 @@ class TestBenchServing(CustomTestCase): self.assertLess(res["median_ttft_ms"], 150) # TODO: not set yet, need AMD machine else: - self.assertLess(res["median_ttft_ms"], 98) + self.assertLess(res["median_ttft_ms"], 100) self.assertLess(res["median_itl_ms"], 8) def test_online_latency_eagle(self):