Support overlapped lora updates (#8213)

This commit is contained in:
Lifu Huang
2025-07-27 13:00:44 -07:00
committed by GitHub
parent 95217a9b4d
commit df90645525
4 changed files with 204 additions and 35 deletions

View File

@@ -231,8 +231,7 @@ class TestBenchServing(CustomTestCase):
f"median_ttft_ms: {res['median_ttft_ms']:.2f} ms\n"
)
self.assertLess(res["median_e2e_latency_ms"], 4000)
# TODO (lifuhuang): This will be fixed by the overlapped LoRA update in a separate PR.
self.assertLess(res["median_ttft_ms"], 1600)
self.assertLess(res["median_ttft_ms"], 80)
def _run_lora_latency_test(self, enable_background_task: bool):
"""