Refactor dynamic LoRA update to fix incorrect handling of variant weight shapes (#7844)

This commit is contained in:
Lifu Huang
2025-07-13 18:36:01 -07:00
committed by GitHub
parent b5dd5e8741
commit e2ed9d049a
10 changed files with 840 additions and 227 deletions

View File

@@ -134,6 +134,8 @@ class ServerArgs:
preferred_sampling_params: Optional[str] = None
# LoRA
max_lora_rank: Optional[int] = None
lora_target_modules: Optional[List[str]] = None
lora_paths: Optional[Union[dict[str, str], List[str]]] = None
max_loras_per_batch: int = 8
lora_backend: str = "triton"
@@ -1129,6 +1131,28 @@ class ServerArgs:
)
# LoRA
parser.add_argument(
"--max-lora-rank",
default=ServerArgs.max_lora_rank,
type=int,
help="The maximum rank of LoRA adapters. If not specified, it will be automatically inferred from the adapters provided in --lora-paths.",
)
parser.add_argument(
"--lora-target-modules",
type=str,
choices=[
"q_proj",
"k_proj",
"v_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj",
],
nargs="*",
default=None,
help="The union set of all target modules where LoRA should be applied. If not specified, it will be automatically inferred from the adapters provided in --lora-paths.",
)
parser.add_argument(
"--lora-paths",
type=str,