Set csgmv as default lora backend. (#11488)

This commit is contained in:
Lifu Huang
2025-10-15 21:53:24 -07:00
committed by GitHub
parent cbac499750
commit b0d20cdec7
11 changed files with 11 additions and 23 deletions

View File

@@ -309,8 +309,8 @@ class ServerArgs:
] = None
max_loaded_loras: Optional[int] = None
max_loras_per_batch: int = 8
lora_backend: str = "csgmv"
lora_eviction_policy: str = DEFAULT_LORA_EVICTION_POLICY
lora_backend: str = "triton"
max_lora_chunk_size: Optional[int] = 16
# Kernel backend

View File

@@ -496,7 +496,7 @@ class SRTRunner:
attention_backend: Optional[str] = None,
prefill_attention_backend: Optional[str] = None,
decode_attention_backend: Optional[str] = None,
lora_backend: str = "triton",
lora_backend: str = "csgmv",
disable_cuda_graph: bool = False,
disable_radix_cache: bool = False,
chunked_prefill_size: Optional[int] = None,