Refactor dynamic LoRA update to fix incorrect handling of variant weight shapes (#7844)

2025-07-13 18:36:01 -07:00
parent b5dd5e8741
commit e2ed9d049a
10 changed files with 840 additions and 227 deletions
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -134,6 +134,8 @@ class ServerArgs:
    preferred_sampling_params: Optional[str] = None

    # LoRA
+    max_lora_rank: Optional[int] = None
+    lora_target_modules: Optional[List[str]] = None
    lora_paths: Optional[Union[dict[str, str], List[str]]] = None
    max_loras_per_batch: int = 8
    lora_backend: str = "triton"
@@ -1129,6 +1131,28 @@ class ServerArgs:
        )

        # LoRA
+        parser.add_argument(
+            "--max-lora-rank",
+            default=ServerArgs.max_lora_rank,
+            type=int,
+            help="The maximum rank of LoRA adapters. If not specified, it will be automatically inferred from the adapters provided in --lora-paths.",
+        )
+        parser.add_argument(
+            "--lora-target-modules",
+            type=str,
+            choices=[
+                "q_proj",
+                "k_proj",
+                "v_proj",
+                "o_proj",
+                "gate_proj",
+                "up_proj",
+                "down_proj",
+            ],
+            nargs="*",
+            default=None,
+            help="The union set of all target modules where LoRA should be applied. If not specified, it will be automatically inferred from the adapters provided in --lora-paths.",
+        )
        parser.add_argument(
            "--lora-paths",
            type=str,