Disable radix cache in test_lora_update.py for better stability (#9852)
This commit is contained in:
@@ -824,6 +824,7 @@ class LoRAUpdateEngineTestSession(LoRAUpdateTestSessionBase):
|
|||||||
disable_cuda_graph=self.disable_cuda_graph,
|
disable_cuda_graph=self.disable_cuda_graph,
|
||||||
cuda_graph_max_bs=self.cuda_graph_max_bs,
|
cuda_graph_max_bs=self.cuda_graph_max_bs,
|
||||||
enable_lora=self.enable_lora,
|
enable_lora=self.enable_lora,
|
||||||
|
disable_radix_cache=True,
|
||||||
)
|
)
|
||||||
self.handle.__enter__()
|
self.handle.__enter__()
|
||||||
return self
|
return self
|
||||||
@@ -958,6 +959,7 @@ class LoRAUpdateServerTestSession(LoRAUpdateTestSessionBase):
|
|||||||
"1",
|
"1",
|
||||||
"--mem-fraction-static",
|
"--mem-fraction-static",
|
||||||
str(MEM_FRACTION_STATIC),
|
str(MEM_FRACTION_STATIC),
|
||||||
|
"--disable-radix-cache",
|
||||||
]
|
]
|
||||||
if self.enable_lora:
|
if self.enable_lora:
|
||||||
other_args.append("--enable-lora")
|
other_args.append("--enable-lora")
|
||||||
|
|||||||
Reference in New Issue
Block a user