Support overlapped lora updates (#8213)
This commit is contained in:
@@ -231,8 +231,7 @@ class TestBenchServing(CustomTestCase):
|
||||
f"median_ttft_ms: {res['median_ttft_ms']:.2f} ms\n"
|
||||
)
|
||||
self.assertLess(res["median_e2e_latency_ms"], 4000)
|
||||
# TODO (lifuhuang): This will be fixed by the overlapped LoRA update in a separate PR.
|
||||
self.assertLess(res["median_ttft_ms"], 1600)
|
||||
self.assertLess(res["median_ttft_ms"], 80)
|
||||
|
||||
def _run_lora_latency_test(self, enable_background_task: bool):
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user