[Feature] Define backends and add Triton backend for Lora (#3161)
Co-authored-by: Ying Sheng <sqy1415@gmail.com>
This commit is contained in:
@@ -113,6 +113,7 @@ class ServerArgs:
|
||||
# LoRA
|
||||
lora_paths: Optional[List[str]] = None
|
||||
max_loras_per_batch: int = 8
|
||||
lora_backend: str = "triton"
|
||||
|
||||
# Kernel backend
|
||||
attention_backend: Optional[str] = None
|
||||
@@ -653,13 +654,19 @@ class ServerArgs:
|
||||
nargs="*",
|
||||
default=None,
|
||||
action=LoRAPathAction,
|
||||
help="The list of LoRA adapters. You can provide a list of either path in str or renamed path in the format {name}={path}",
|
||||
help="The list of LoRA adapters. You can provide a list of either path in str or renamed path in the format {name}={path}.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-loras-per-batch",
|
||||
type=int,
|
||||
default=8,
|
||||
help="Maximum number of adapters for a running batch, include base-only request",
|
||||
help="Maximum number of adapters for a running batch, include base-only request.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--lora-backend",
|
||||
type=str,
|
||||
default="triton",
|
||||
help="Choose the kernel backend for multi-LoRA serving.",
|
||||
)
|
||||
|
||||
# Kernel backend
|
||||
|
||||
Reference in New Issue
Block a user