Support radix cache for Lora feature (#7216)

This commit is contained in:
Baizhou Zhang
2025-08-11 10:14:11 -07:00
committed by GitHub
parent 6f81a710f7
commit 75e6a7cde1
12 changed files with 546 additions and 27 deletions

View File

@@ -787,7 +787,6 @@ class LoRAUpdateEngineTestSession(LoRAUpdateTestSessionBase):
max_loaded_loras=self.max_loaded_loras,
disable_cuda_graph=self.disable_cuda_graph,
cuda_graph_max_bs=self.cuda_graph_max_bs,
disable_radix_cache=True,
enable_lora=self.enable_lora,
)
self.handle.__enter__()
@@ -917,7 +916,6 @@ class LoRAUpdateServerTestSession(LoRAUpdateTestSessionBase):
str(self.max_loras_per_batch),
"--lora-backend",
self.lora_backend,
"--disable-radix-cache",
"--random-seed",
"42",
"--max-running-request",