Support radix cache for Lora feature (#7216)

2025-08-11 10:14:11 -07:00
parent 6f81a710f7
commit 75e6a7cde1
12 changed files with 546 additions and 27 deletions
--- a/test/srt/lora/test_lora.py
+++ b/test/srt/lora/test_lora.py
@@ -104,7 +104,6 @@ class TestLoRA(CustomTestCase):
                    lora_paths=[lora_adapter_paths[0], lora_adapter_paths[1]],
                    max_loras_per_batch=len(lora_adapter_paths) + 1,
                    lora_backend=backend,
-                    disable_radix_cache=True,
                    sleep_on_idle=True,  # Eliminate non-determinism by forcing all requests to be processed in one batch.
                    attention_backend="torch_native",
                )