Support start up LoRA server without initial adapters (#8019)

This commit is contained in:
Lifu Huang
2025-07-19 15:38:09 -07:00
committed by GitHub
parent 60468da4e2
commit 4e3defe5a7
12 changed files with 290 additions and 195 deletions

View File

@@ -26,6 +26,8 @@ from typing import List, Literal, Optional, Union
from sglang.srt.hf_transformers_utils import check_gguf_file, get_config
from sglang.srt.reasoning_parser import ReasoningParser
from sglang.srt.utils import (
LORA_TARGET_ALL_MODULES,
SUPPORTED_LORA_TARGET_MODULES,
configure_ipv6,
get_device,
get_device_memory_capacity,
@@ -140,8 +142,9 @@ class ServerArgs:
preferred_sampling_params: Optional[str] = None
# LoRA
enable_lora: Optional[bool] = None
max_lora_rank: Optional[int] = None
lora_target_modules: Optional[List[str]] = None
lora_target_modules: Optional[Union[set[str], List[str]]] = None
lora_paths: Optional[Union[dict[str, str], List[str]]] = None
max_loras_per_batch: int = 8
lora_backend: str = "triton"
@@ -1148,6 +1151,12 @@ class ServerArgs:
)
# LoRA
parser.add_argument(
"--enable-lora",
default=ServerArgs.enable_lora,
action="store_true",
help="Enable LoRA support for the model. This argument is automatically set to True if `--lora-paths` is provided for backward compatibility.",
)
parser.add_argument(
"--max-lora-rank",
default=ServerArgs.max_lora_rank,
@@ -1157,18 +1166,12 @@ class ServerArgs:
parser.add_argument(
"--lora-target-modules",
type=str,
choices=[
"q_proj",
"k_proj",
"v_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj",
],
choices=SUPPORTED_LORA_TARGET_MODULES + [LORA_TARGET_ALL_MODULES],
nargs="*",
default=None,
help="The union set of all target modules where LoRA should be applied. If not specified, it will be automatically inferred from the adapters provided in --lora-paths.",
help="The union set of all target modules where LoRA should be applied. If not specified, "
"it will be automatically inferred from the adapters provided in --lora-paths. If 'all' is specified, "
"all supported modules will be targeted.",
)
parser.add_argument(
"--lora-paths",
@@ -1816,15 +1819,46 @@ class ServerArgs:
None,
}, "moe_dense_tp_size only support 1 and None currently"
if isinstance(self.lora_paths, list):
lora_paths = self.lora_paths
self.lora_paths = {}
for lora_path in lora_paths:
if "=" in lora_path:
name, path = lora_path.split("=", 1)
self.lora_paths[name] = path
else:
self.lora_paths[lora_path] = lora_path
self.check_lora_server_args()
def check_lora_server_args(self):
# Enable LoRA if any LoRA paths are provided for backward compatibility.
if self.lora_paths:
if self.enable_lora is None:
self.enable_lora = True
logger.info(
"--enable-lora is set to True because --lora-paths is provided."
)
elif self.enable_lora is False:
logger.warning(
"--enable-lora is set to False, any provided lora_paths will be ignored."
)
if self.enable_lora:
# Normalize lora_paths to a dictionary if it is a list.
if isinstance(self.lora_paths, list):
lora_paths = self.lora_paths
self.lora_paths = {}
for lora_path in lora_paths:
if "=" in lora_path:
name, path = lora_path.split("=", 1)
self.lora_paths[name] = path
else:
self.lora_paths[lora_path] = lora_path
# Expand target modules
if self.lora_target_modules:
self.lora_target_modules = set(self.lora_target_modules)
if "all" in self.lora_target_modules:
assert (
len(self.lora_target_modules) == 1
), "If 'all' is specified in --lora-target-modules, it should be the only module specified."
self.lora_target_modules = set(SUPPORTED_LORA_TARGET_MODULES)
# Ensure sufficient information is provided for LoRA initialization.
assert self.lora_paths or (
self.max_lora_rank and self.lora_target_modules
), "When no initial --lora-paths is provided, you need to specify both --max-lora-rank and --lora-target-modules for LoRA initialization."
def validate_disagg_tp_size(self, prefill_tp: int, decode_tp: int):
larger_tp = max(decode_tp, prefill_tp)