[Feature] Initial support for multi-LoRA serving (#1307)
This commit is contained in:
@@ -35,6 +35,7 @@ import torch
|
||||
import torch.distributed as dist
|
||||
from fastapi.responses import JSONResponse
|
||||
from packaging import version as pkg_version
|
||||
from torch import nn
|
||||
from torch.nn.parameter import Parameter
|
||||
from triton.runtime.cache import (
|
||||
FileCacheManager,
|
||||
@@ -714,3 +715,14 @@ def configure_logger(server_args, prefix: str = ""):
|
||||
datefmt="%H:%M:%S",
|
||||
force=True,
|
||||
)
|
||||
|
||||
|
||||
# source: https://github.com/vllm-project/vllm/blob/93b38bea5dd03e1b140ca997dfaadef86f8f1855/vllm/lora/utils.py#L9
|
||||
def replace_submodule(
|
||||
model: nn.Module, module_name: str, new_module: nn.Module
|
||||
) -> nn.Module:
|
||||
"""Replace a submodule in a model with a new module."""
|
||||
parent = model.get_submodule(".".join(module_name.split(".")[:-1]))
|
||||
target_name = module_name.split(".")[-1]
|
||||
setattr(parent, target_name, new_module)
|
||||
return new_module
|
||||
|
||||
Reference in New Issue
Block a user