first commit
This commit is contained in:
72
vllm/plugins/__init__.py
Normal file
72
vllm/plugins/__init__.py
Normal file
@@ -0,0 +1,72 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import logging
|
||||
from typing import Any, Callable
|
||||
|
||||
import vllm.envs as envs
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DEFAULT_PLUGINS_GROUP = 'vllm.general_plugins'
|
||||
|
||||
# make sure one process only loads plugins once
|
||||
plugins_loaded = False
|
||||
|
||||
|
||||
def load_plugins_by_group(group: str) -> dict[str, Callable[[], Any]]:
|
||||
import sys
|
||||
if sys.version_info < (3, 10):
|
||||
from importlib_metadata import entry_points
|
||||
else:
|
||||
from importlib.metadata import entry_points
|
||||
|
||||
allowed_plugins = envs.VLLM_PLUGINS
|
||||
|
||||
discovered_plugins = entry_points(group=group)
|
||||
if len(discovered_plugins) == 0:
|
||||
logger.debug("No plugins for group %s found.", group)
|
||||
return {}
|
||||
|
||||
# Check if the only discovered plugin is the default one
|
||||
is_default_group = (group == DEFAULT_PLUGINS_GROUP)
|
||||
# Use INFO for non-default groups and DEBUG for the default group
|
||||
log_level = logger.debug if is_default_group else logger.info
|
||||
|
||||
log_level("Available plugins for group %s:", group)
|
||||
for plugin in discovered_plugins:
|
||||
log_level("- %s -> %s", plugin.name, plugin.value)
|
||||
|
||||
if allowed_plugins is None:
|
||||
log_level("All plugins in this group will be loaded. "
|
||||
"Set `VLLM_PLUGINS` to control which plugins to load.")
|
||||
|
||||
plugins = dict[str, Callable[[], Any]]()
|
||||
for plugin in discovered_plugins:
|
||||
if allowed_plugins is None or plugin.name in allowed_plugins:
|
||||
if allowed_plugins is not None:
|
||||
log_level("Loading plugin %s", plugin.name)
|
||||
|
||||
try:
|
||||
func = plugin.load()
|
||||
plugins[plugin.name] = func
|
||||
except Exception:
|
||||
logger.exception("Failed to load plugin %s", plugin.name)
|
||||
|
||||
return plugins
|
||||
|
||||
|
||||
def load_general_plugins():
|
||||
"""WARNING: plugins can be loaded for multiple times in different
|
||||
processes. They should be designed in a way that they can be loaded
|
||||
multiple times without causing issues.
|
||||
"""
|
||||
global plugins_loaded
|
||||
if plugins_loaded:
|
||||
return
|
||||
plugins_loaded = True
|
||||
|
||||
plugins = load_plugins_by_group(group=DEFAULT_PLUGINS_GROUP)
|
||||
# general plugins, we only need to execute the loaded functions
|
||||
for func in plugins.values():
|
||||
func()
|
||||
BIN
vllm/plugins/__pycache__/__init__.cpython-310.pyc
Normal file
BIN
vllm/plugins/__pycache__/__init__.cpython-310.pyc
Normal file
Binary file not shown.
68
vllm/plugins/io_processors/__init__.py
Normal file
68
vllm/plugins/io_processors/__init__.py
Normal file
@@ -0,0 +1,68 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
from vllm.config import VllmConfig
|
||||
from vllm.plugins import load_plugins_by_group
|
||||
from vllm.plugins.io_processors.interface import IOProcessor
|
||||
from vllm.utils import resolve_obj_by_qualname
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_io_processor(
|
||||
vllm_config: VllmConfig,
|
||||
plugin_from_init: Optional[str] = None) -> IOProcessor | None:
|
||||
# Input.Output processors are loaded as plugins under the
|
||||
# 'vllm.io_processor_plugins' group. Similar to platform
|
||||
# plugins, these plugins register a function that returns the class
|
||||
# name for the processor to install.
|
||||
|
||||
if plugin_from_init:
|
||||
model_plugin = plugin_from_init
|
||||
else:
|
||||
# A plugin can be specified via the model config
|
||||
# Retrieve the model specific plugin if available
|
||||
# This is using a custom field in the hf_config for the model
|
||||
hf_config = vllm_config.model_config.hf_config.to_dict()
|
||||
config_plugin = hf_config.get("io_processor_plugin")
|
||||
model_plugin = config_plugin
|
||||
|
||||
if model_plugin is None:
|
||||
logger.debug("No IOProcessor plugins requested by the model")
|
||||
return None
|
||||
|
||||
logger.debug("IOProcessor plugin to be loaded %s", model_plugin)
|
||||
|
||||
# Load all installed plugin in the group
|
||||
multimodal_data_processor_plugins = \
|
||||
load_plugins_by_group('vllm.io_processor_plugins')
|
||||
|
||||
loadable_plugins = {}
|
||||
for name, func in multimodal_data_processor_plugins.items():
|
||||
try:
|
||||
assert callable(func)
|
||||
processor_cls_qualname = func()
|
||||
if processor_cls_qualname is not None:
|
||||
loadable_plugins[name] = processor_cls_qualname
|
||||
except Exception:
|
||||
logger.warning("Failed to load plugin %s.", name, exc_info=True)
|
||||
|
||||
num_available_plugins = len(loadable_plugins.keys())
|
||||
if num_available_plugins == 0:
|
||||
raise ValueError("No IOProcessor plugins installed"
|
||||
f" but one is required ({model_plugin}).")
|
||||
|
||||
if model_plugin not in loadable_plugins:
|
||||
raise ValueError(
|
||||
f"The model requires the '{model_plugin}' IO Processor plugin "
|
||||
"but it is not installed. "
|
||||
f"Available plugins: {list(loadable_plugins.keys())}")
|
||||
|
||||
activated_plugin_cls = loadable_plugins[model_plugin]
|
||||
|
||||
return resolve_obj_by_qualname(activated_plugin_cls)(vllm_config)
|
||||
BIN
vllm/plugins/io_processors/__pycache__/__init__.cpython-310.pyc
Normal file
BIN
vllm/plugins/io_processors/__pycache__/__init__.cpython-310.pyc
Normal file
Binary file not shown.
BIN
vllm/plugins/io_processors/__pycache__/interface.cpython-310.pyc
Normal file
BIN
vllm/plugins/io_processors/__pycache__/interface.cpython-310.pyc
Normal file
Binary file not shown.
67
vllm/plugins/io_processors/interface.py
Normal file
67
vllm/plugins/io_processors/interface.py
Normal file
@@ -0,0 +1,67 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from collections.abc import AsyncGenerator, Sequence
|
||||
from typing import Any, Generic, Optional, TypeVar, Union
|
||||
|
||||
from vllm.config import VllmConfig
|
||||
from vllm.entrypoints.openai.protocol import IOProcessorResponse
|
||||
from vllm.inputs.data import PromptType
|
||||
from vllm.outputs import PoolingRequestOutput
|
||||
|
||||
IOProcessorInput = TypeVar('IOProcessorInput')
|
||||
IOProcessorOutput = TypeVar('IOProcessorOutput')
|
||||
|
||||
|
||||
class IOProcessor(ABC, Generic[IOProcessorInput, IOProcessorOutput]):
|
||||
|
||||
def __init__(self, vllm_config: VllmConfig):
|
||||
self.vllm_config = vllm_config
|
||||
|
||||
@abstractmethod
|
||||
def pre_process(
|
||||
self,
|
||||
prompt: IOProcessorInput,
|
||||
request_id: Optional[str] = None,
|
||||
**kwargs,
|
||||
) -> Union[PromptType, Sequence[PromptType]]:
|
||||
raise NotImplementedError
|
||||
|
||||
async def pre_process_async(
|
||||
self,
|
||||
prompt: IOProcessorInput,
|
||||
request_id: Optional[str] = None,
|
||||
**kwargs,
|
||||
) -> Union[PromptType, Sequence[PromptType]]:
|
||||
return self.pre_process(prompt, request_id, **kwargs)
|
||||
|
||||
@abstractmethod
|
||||
def post_process(self,
|
||||
model_output: Sequence[PoolingRequestOutput],
|
||||
request_id: Optional[str] = None,
|
||||
**kwargs) -> IOProcessorOutput:
|
||||
raise NotImplementedError
|
||||
|
||||
async def post_process_async(
|
||||
self,
|
||||
model_output: AsyncGenerator[tuple[int, PoolingRequestOutput]],
|
||||
request_id: Optional[str] = None,
|
||||
**kwargs,
|
||||
) -> IOProcessorOutput:
|
||||
# We cannot guarantee outputs are returned in the same order they were
|
||||
# fed to vLLM.
|
||||
# Let's sort them by id before post_processing
|
||||
sorted_output = sorted([(i, item) async for i, item in model_output],
|
||||
key=lambda output: output[0])
|
||||
collected_output = [output[1] for output in sorted_output]
|
||||
return self.post_process(collected_output, request_id, **kwargs)
|
||||
|
||||
@abstractmethod
|
||||
def parse_request(self, request: Any) -> IOProcessorInput:
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def output_to_response(
|
||||
self, plugin_output: IOProcessorOutput) -> IOProcessorResponse:
|
||||
raise NotImplementedError
|
||||
16
vllm/plugins/lora_resolvers/README.md
Normal file
16
vllm/plugins/lora_resolvers/README.md
Normal file
@@ -0,0 +1,16 @@
|
||||
# LoRA Resolver Plugins
|
||||
|
||||
This directory contains vLLM general plugins for dynamically discovering and loading LoRA adapters
|
||||
via the LoRAResolver plugin framework.
|
||||
|
||||
Note that `VLLM_ALLOW_RUNTIME_LORA_UPDATING` must be set to true to allow LoRA resolver plugins
|
||||
to work, and `VLLM_PLUGINS` must be set to include the desired resolver plugins.
|
||||
|
||||
## lora_filesystem_resolver
|
||||
|
||||
This LoRA Resolver is installed with vLLM by default.
|
||||
To use, set `VLLM_PLUGIN_LORA_CACHE_DIR` to a local directory. When vLLM receives a request
|
||||
for a LoRA adapter `foobar` it doesn't currently recognize, it will look in that local directory
|
||||
for a subdirectory `foobar` containing a LoRA adapter. If such an adapter exists, it will
|
||||
load that adapter, and then service the request as normal. That adapter will then be available
|
||||
for future requests as normal.
|
||||
0
vllm/plugins/lora_resolvers/__init__.py
Normal file
0
vllm/plugins/lora_resolvers/__init__.py
Normal file
BIN
vllm/plugins/lora_resolvers/__pycache__/__init__.cpython-310.pyc
Normal file
BIN
vllm/plugins/lora_resolvers/__pycache__/__init__.cpython-310.pyc
Normal file
Binary file not shown.
Binary file not shown.
50
vllm/plugins/lora_resolvers/filesystem_resolver.py
Normal file
50
vllm/plugins/lora_resolvers/filesystem_resolver.py
Normal file
@@ -0,0 +1,50 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
import json
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
import vllm.envs as envs
|
||||
from vllm.lora.request import LoRARequest
|
||||
from vllm.lora.resolver import LoRAResolver, LoRAResolverRegistry
|
||||
|
||||
|
||||
class FilesystemResolver(LoRAResolver):
|
||||
|
||||
def __init__(self, lora_cache_dir: str):
|
||||
self.lora_cache_dir = lora_cache_dir
|
||||
|
||||
async def resolve_lora(self, base_model_name: str,
|
||||
lora_name: str) -> Optional[LoRARequest]:
|
||||
lora_path = os.path.join(self.lora_cache_dir, lora_name)
|
||||
if os.path.exists(lora_path):
|
||||
adapter_config_path = os.path.join(self.lora_cache_dir, lora_name,
|
||||
"adapter_config.json")
|
||||
if os.path.exists(adapter_config_path):
|
||||
with open(adapter_config_path) as file:
|
||||
adapter_config = json.load(file)
|
||||
if adapter_config["peft_type"] == "LORA" and adapter_config[
|
||||
"base_model_name_or_path"] == base_model_name:
|
||||
lora_request = LoRARequest(lora_name=lora_name,
|
||||
lora_int_id=abs(
|
||||
hash(lora_name)),
|
||||
lora_path=lora_path)
|
||||
return lora_request
|
||||
return None
|
||||
|
||||
|
||||
def register_filesystem_resolver():
|
||||
"""Register the filesystem LoRA Resolver with vLLM"""
|
||||
|
||||
lora_cache_dir = envs.VLLM_LORA_RESOLVER_CACHE_DIR
|
||||
if lora_cache_dir:
|
||||
if not os.path.exists(lora_cache_dir) or not os.path.isdir(
|
||||
lora_cache_dir):
|
||||
raise ValueError(
|
||||
"VLLM_LORA_RESOLVER_CACHE_DIR must be set to a valid directory \
|
||||
for Filesystem Resolver plugin to function")
|
||||
fs_resolver = FilesystemResolver(lora_cache_dir)
|
||||
LoRAResolverRegistry.register_resolver("Filesystem Resolver",
|
||||
fs_resolver)
|
||||
|
||||
return
|
||||
Reference in New Issue
Block a user