Fix request abortion (#6184)

This commit is contained in:
Lianmin Zheng
2025-05-10 21:54:46 -07:00
committed by GitHub
parent 4319978c73
commit de167cf5fa
10 changed files with 148 additions and 84 deletions

View File

@@ -190,7 +190,7 @@ class TestBenchServing(CustomTestCase):
f"### test_vlm_online_latency\n"
f'median_e2e_latency_ms: {res["median_e2e_latency_ms"]:.2f} ms\n'
)
self.assertLess(res["median_e2e_latency_ms"], 16000)
self.assertLess(res["median_e2e_latency_ms"], 16500)
if os.getenv("SGLANG_AMD_CI") == "1":
self.assertLess(res["median_ttft_ms"], 150)
# TODO: not set yet, need AMD machine