Support radix cache for Lora feature (#7216)

2025-08-11 10:14:11 -07:00
parent 6f81a710f7
commit 75e6a7cde1
12 changed files with 546 additions and 27 deletions
--- a/test/srt/lora/utils.py
+++ b/test/srt/lora/utils.py
@@ -136,7 +136,7 @@ def run_lora_test_one_by_one(
    max_new_tokens: int,
    backend: str,
    disable_cuda_graph: bool = False,
-    disable_radix_cache: bool = True,
+    disable_radix_cache: bool = False,
    mem_fraction_static: float = 0.88,
    test_tag: str = "",
 ):
@@ -156,7 +156,7 @@ def run_lora_test_one_by_one(
        max_new_tokens (int): The maximum number of new tokens to generate.
        backend (str): The lora backend to use.
        disable_cuda_graph (bool, optional): Whether to disable CUDA graph. Defaults to False.
-        disable_radix_cache (bool, optional): Whether to disable radix cache. Defaults to True.
+        disable_radix_cache (bool, optional): Whether to disable radix cache. Defaults to False.
        mem_fraction_static (float, optional): The fraction of memory to use. Defaults to 0.88.
        test_tag (str, optional): The tag to use for the test. Defaults to "".
    """
@@ -284,7 +284,7 @@ def run_lora_test_by_batch(
    max_new_tokens: int,
    backend: str,
    disable_cuda_graph: bool = False,
-    disable_radix_cache: bool = True,
+    disable_radix_cache: bool = False,
    mem_fraction_static: float = 0.88,
    test_tag: str = "",
 ):
@@ -303,7 +303,7 @@ def run_lora_test_by_batch(
        max_new_tokens (int): The maximum number of new tokens to generate.
        backend (str): The lora backend to use.
        disable_cuda_graph (bool, optional): Whether to disable CUDA graph. Defaults to False.
-        disable_radix_cache (bool, optional): Whether to disable radix cache. Defaults to True.
+        disable_radix_cache (bool, optional): Whether to disable radix cache. Defaults to False.
        mem_fraction_static (float, optional): The fraction of memory to use. Defaults to 0.88.
        test_tag (str, optional): The tag to use for the test. Defaults to "".
    """