[Feature] Define backends and add Triton backend for Lora (#3161)

Co-authored-by: Ying Sheng <sqy1415@gmail.com>
This commit is contained in:
Baizhou Zhang
2025-02-03 22:09:13 -08:00
committed by GitHub
parent 7b5a374114
commit 70817a7eae
18 changed files with 1129 additions and 135 deletions

View File

@@ -272,6 +272,7 @@ class SRTRunner:
port: int = DEFAULT_PORT_FOR_SRT_TEST_RUNNER,
lora_paths: List[str] = None,
max_loras_per_batch: int = 4,
lora_backend: str = "triton",
disable_cuda_graph: bool = False,
disable_radix_cache: bool = False,
):
@@ -287,6 +288,7 @@ class SRTRunner:
is_embedding=not self.is_generation,
lora_paths=lora_paths,
max_loras_per_batch=max_loras_per_batch,
lora_backend=lora_backend,
disable_cuda_graph=disable_cuda_graph,
disable_radix_cache=disable_radix_cache,
)