Disable radix cache in test_lora_update.py for better stability (#9852)
This commit is contained in:
@@ -824,6 +824,7 @@ class LoRAUpdateEngineTestSession(LoRAUpdateTestSessionBase):
|
||||
disable_cuda_graph=self.disable_cuda_graph,
|
||||
cuda_graph_max_bs=self.cuda_graph_max_bs,
|
||||
enable_lora=self.enable_lora,
|
||||
disable_radix_cache=True,
|
||||
)
|
||||
self.handle.__enter__()
|
||||
return self
|
||||
@@ -958,6 +959,7 @@ class LoRAUpdateServerTestSession(LoRAUpdateTestSessionBase):
|
||||
"1",
|
||||
"--mem-fraction-static",
|
||||
str(MEM_FRACTION_STATIC),
|
||||
"--disable-radix-cache",
|
||||
]
|
||||
if self.enable_lora:
|
||||
other_args.append("--enable-lora")
|
||||
|
||||
Reference in New Issue
Block a user