[Feature] Initial support for multi-LoRA serving (#1307)

2024-09-12 16:46:14 -07:00
parent c33d82a211
commit 712216928f
21 changed files with 1435 additions and 22 deletions
--- a/python/sglang/srt/utils.py
+++ b/python/sglang/srt/utils.py
@@ -35,6 +35,7 @@ import torch
 import torch.distributed as dist
 from fastapi.responses import JSONResponse
 from packaging import version as pkg_version
+from torch import nn
 from torch.nn.parameter import Parameter
 from triton.runtime.cache import (
    FileCacheManager,
@@ -714,3 +715,14 @@ def configure_logger(server_args, prefix: str = ""):
        datefmt="%H:%M:%S",
        force=True,
    )
+
+
+# source: https://github.com/vllm-project/vllm/blob/93b38bea5dd03e1b140ca997dfaadef86f8f1855/vllm/lora/utils.py#L9
+def replace_submodule(
+    model: nn.Module, module_name: str, new_module: nn.Module
+) -> nn.Module:
+    """Replace a submodule in a model with a new module."""
+    parent = model.get_submodule(".".join(module_name.split(".")[:-1]))
+    target_name = module_name.split(".")[-1]
+    setattr(parent, target_name, new_module)
+    return new_module