Support radix cache for Lora feature (#7216)

This commit is contained in:
Baizhou Zhang
2025-08-11 10:14:11 -07:00
committed by GitHub
parent 6f81a710f7
commit 75e6a7cde1
12 changed files with 546 additions and 27 deletions

View File

@@ -104,7 +104,6 @@ class TestLoRA(CustomTestCase):
lora_paths=[lora_adapter_paths[0], lora_adapter_paths[1]],
max_loras_per_batch=len(lora_adapter_paths) + 1,
lora_backend=backend,
disable_radix_cache=True,
sleep_on_idle=True, # Eliminate non-determinism by forcing all requests to be processed in one batch.
attention_backend="torch_native",
)