@@ -309,8 +309,8 @@ class ServerArgs:
|
||||
] = None
|
||||
max_loaded_loras: Optional[int] = None
|
||||
max_loras_per_batch: int = 8
|
||||
lora_backend: str = "csgmv"
|
||||
lora_eviction_policy: str = DEFAULT_LORA_EVICTION_POLICY
|
||||
lora_backend: str = "triton"
|
||||
max_lora_chunk_size: Optional[int] = 16
|
||||
|
||||
# Kernel backend
|
||||
|
||||
@@ -496,7 +496,7 @@ class SRTRunner:
|
||||
attention_backend: Optional[str] = None,
|
||||
prefill_attention_backend: Optional[str] = None,
|
||||
decode_attention_backend: Optional[str] = None,
|
||||
lora_backend: str = "csgmv",
|
||||
lora_backend: str = "triton",
|
||||
disable_cuda_graph: bool = False,
|
||||
disable_radix_cache: bool = False,
|
||||
chunked_prefill_size: Optional[int] = None,
|
||||
|
||||
Reference in New Issue
Block a user