[Feature] Define backends and add Triton backend for Lora (#3161)
Co-authored-by: Ying Sheng <sqy1415@gmail.com>
This commit is contained in:
@@ -272,6 +272,7 @@ class SRTRunner:
|
||||
port: int = DEFAULT_PORT_FOR_SRT_TEST_RUNNER,
|
||||
lora_paths: List[str] = None,
|
||||
max_loras_per_batch: int = 4,
|
||||
lora_backend: str = "triton",
|
||||
disable_cuda_graph: bool = False,
|
||||
disable_radix_cache: bool = False,
|
||||
):
|
||||
@@ -287,6 +288,7 @@ class SRTRunner:
|
||||
is_embedding=not self.is_generation,
|
||||
lora_paths=lora_paths,
|
||||
max_loras_per_batch=max_loras_per_batch,
|
||||
lora_backend=lora_backend,
|
||||
disable_cuda_graph=disable_cuda_graph,
|
||||
disable_radix_cache=disable_radix_cache,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user