[Feature] Define backends and add Triton backend for Lora (#3161)

Co-authored-by: Ying Sheng <sqy1415@gmail.com>
2025-02-03 22:09:13 -08:00
parent 7b5a374114
commit 70817a7eae
18 changed files with 1129 additions and 135 deletions
--- a/python/sglang/test/runners.py
+++ b/python/sglang/test/runners.py
@@ -272,6 +272,7 @@ class SRTRunner:
        port: int = DEFAULT_PORT_FOR_SRT_TEST_RUNNER,
        lora_paths: List[str] = None,
        max_loras_per_batch: int = 4,
+        lora_backend: str = "triton",
        disable_cuda_graph: bool = False,
        disable_radix_cache: bool = False,
    ):
@@ -287,6 +288,7 @@ class SRTRunner:
            is_embedding=not self.is_generation,
            lora_paths=lora_paths,
            max_loras_per_batch=max_loras_per_batch,
+            lora_backend=lora_backend,
            disable_cuda_graph=disable_cuda_graph,
            disable_radix_cache=disable_radix_cache,
        )