v1.0
This commit is contained in:
78
plugins/__init__.py
Normal file
78
plugins/__init__.py
Normal file
@@ -0,0 +1,78 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import logging
|
||||
from collections.abc import Callable
|
||||
from typing import Any
|
||||
|
||||
import vllm.envs as envs
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Default plugins group will be loaded in all processes(process0, engine core
|
||||
# process and worker processes)
|
||||
DEFAULT_PLUGINS_GROUP = "vllm.general_plugins"
|
||||
# IO processor plugins group will be loaded in process0 only
|
||||
IO_PROCESSOR_PLUGINS_GROUP = "vllm.io_processor_plugins"
|
||||
# Platform plugins group will be loaded in all processes when
|
||||
# `vllm.platforms.current_platform` is called and the value not initialized,
|
||||
PLATFORM_PLUGINS_GROUP = "vllm.platform_plugins"
|
||||
|
||||
# make sure one process only loads plugins once
|
||||
plugins_loaded = False
|
||||
|
||||
|
||||
def load_plugins_by_group(group: str) -> dict[str, Callable[[], Any]]:
|
||||
from importlib.metadata import entry_points
|
||||
|
||||
allowed_plugins = envs.VLLM_PLUGINS
|
||||
|
||||
discovered_plugins = entry_points(group=group)
|
||||
if len(discovered_plugins) == 0:
|
||||
logger.debug("No plugins for group %s found.", group)
|
||||
return {}
|
||||
|
||||
# Check if the only discovered plugin is the default one
|
||||
is_default_group = group == DEFAULT_PLUGINS_GROUP
|
||||
# Use INFO for non-default groups and DEBUG for the default group
|
||||
log_level = logger.debug if is_default_group else logger.info
|
||||
|
||||
log_level("Available plugins for group %s:", group)
|
||||
for plugin in discovered_plugins:
|
||||
log_level("- %s -> %s", plugin.name, plugin.value)
|
||||
|
||||
if allowed_plugins is None:
|
||||
log_level(
|
||||
"All plugins in this group will be loaded. "
|
||||
"Set `VLLM_PLUGINS` to control which plugins to load."
|
||||
)
|
||||
|
||||
plugins = dict[str, Callable[[], Any]]()
|
||||
for plugin in discovered_plugins:
|
||||
if allowed_plugins is None or plugin.name in allowed_plugins:
|
||||
if allowed_plugins is not None:
|
||||
log_level("Loading plugin %s", plugin.name)
|
||||
|
||||
try:
|
||||
func = plugin.load()
|
||||
plugins[plugin.name] = func
|
||||
except Exception:
|
||||
logger.exception("Failed to load plugin %s", plugin.name)
|
||||
|
||||
return plugins
|
||||
|
||||
|
||||
def load_general_plugins():
|
||||
"""WARNING: plugins can be loaded for multiple times in different
|
||||
processes. They should be designed in a way that they can be loaded
|
||||
multiple times without causing issues.
|
||||
"""
|
||||
global plugins_loaded
|
||||
if plugins_loaded:
|
||||
return
|
||||
plugins_loaded = True
|
||||
|
||||
plugins = load_plugins_by_group(group=DEFAULT_PLUGINS_GROUP)
|
||||
# general plugins, we only need to execute the loaded functions
|
||||
for func in plugins.values():
|
||||
func()
|
||||
BIN
plugins/__pycache__/__init__.cpython-312.pyc
Normal file
BIN
plugins/__pycache__/__init__.cpython-312.pyc
Normal file
Binary file not shown.
68
plugins/io_processors/__init__.py
Normal file
68
plugins/io_processors/__init__.py
Normal file
@@ -0,0 +1,68 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import logging
|
||||
|
||||
from vllm.config import VllmConfig
|
||||
from vllm.plugins import IO_PROCESSOR_PLUGINS_GROUP, load_plugins_by_group
|
||||
from vllm.plugins.io_processors.interface import IOProcessor
|
||||
from vllm.utils.import_utils import resolve_obj_by_qualname
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_io_processor(
|
||||
vllm_config: VllmConfig, plugin_from_init: str | None = None
|
||||
) -> IOProcessor | None:
|
||||
# Input.Output processors are loaded as plugins under the
|
||||
# 'vllm.io_processor_plugins' group. Similar to platform
|
||||
# plugins, these plugins register a function that returns the class
|
||||
# name for the processor to install.
|
||||
|
||||
if plugin_from_init:
|
||||
model_plugin = plugin_from_init
|
||||
else:
|
||||
# A plugin can be specified via the model config
|
||||
# Retrieve the model specific plugin if available
|
||||
# This is using a custom field in the hf_config for the model
|
||||
hf_config = vllm_config.model_config.hf_config.to_dict()
|
||||
config_plugin = hf_config.get("io_processor_plugin")
|
||||
model_plugin = config_plugin
|
||||
|
||||
if model_plugin is None:
|
||||
logger.debug("No IOProcessor plugins requested by the model")
|
||||
return None
|
||||
|
||||
logger.debug("IOProcessor plugin to be loaded %s", model_plugin)
|
||||
|
||||
# Load all installed plugin in the group
|
||||
multimodal_data_processor_plugins = load_plugins_by_group(
|
||||
IO_PROCESSOR_PLUGINS_GROUP
|
||||
)
|
||||
|
||||
loadable_plugins = {}
|
||||
for name, func in multimodal_data_processor_plugins.items():
|
||||
try:
|
||||
assert callable(func)
|
||||
processor_cls_qualname = func()
|
||||
if processor_cls_qualname is not None:
|
||||
loadable_plugins[name] = processor_cls_qualname
|
||||
except Exception:
|
||||
logger.warning("Failed to load plugin %s.", name, exc_info=True)
|
||||
|
||||
num_available_plugins = len(loadable_plugins.keys())
|
||||
if num_available_plugins == 0:
|
||||
raise ValueError(
|
||||
f"No IOProcessor plugins installed but one is required ({model_plugin})."
|
||||
)
|
||||
|
||||
if model_plugin not in loadable_plugins:
|
||||
raise ValueError(
|
||||
f"The model requires the '{model_plugin}' IO Processor plugin "
|
||||
"but it is not installed. "
|
||||
f"Available plugins: {list(loadable_plugins.keys())}"
|
||||
)
|
||||
|
||||
activated_plugin_cls = loadable_plugins[model_plugin]
|
||||
|
||||
return resolve_obj_by_qualname(activated_plugin_cls)(vllm_config)
|
||||
BIN
plugins/io_processors/__pycache__/__init__.cpython-312.pyc
Normal file
BIN
plugins/io_processors/__pycache__/__init__.cpython-312.pyc
Normal file
Binary file not shown.
BIN
plugins/io_processors/__pycache__/interface.cpython-312.pyc
Normal file
BIN
plugins/io_processors/__pycache__/interface.cpython-312.pyc
Normal file
Binary file not shown.
77
plugins/io_processors/interface.py
Normal file
77
plugins/io_processors/interface.py
Normal file
@@ -0,0 +1,77 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from collections.abc import AsyncGenerator, Sequence
|
||||
from typing import Any, Generic, TypeVar
|
||||
|
||||
from vllm.config import VllmConfig
|
||||
from vllm.entrypoints.openai.protocol import IOProcessorResponse
|
||||
from vllm.inputs.data import PromptType
|
||||
from vllm.outputs import PoolingRequestOutput
|
||||
from vllm.pooling_params import PoolingParams
|
||||
from vllm.sampling_params import SamplingParams
|
||||
|
||||
IOProcessorInput = TypeVar("IOProcessorInput")
|
||||
IOProcessorOutput = TypeVar("IOProcessorOutput")
|
||||
|
||||
|
||||
class IOProcessor(ABC, Generic[IOProcessorInput, IOProcessorOutput]):
|
||||
def __init__(self, vllm_config: VllmConfig):
|
||||
self.vllm_config = vllm_config
|
||||
|
||||
@abstractmethod
|
||||
def pre_process(
|
||||
self,
|
||||
prompt: IOProcessorInput,
|
||||
request_id: str | None = None,
|
||||
**kwargs,
|
||||
) -> PromptType | Sequence[PromptType]:
|
||||
raise NotImplementedError
|
||||
|
||||
async def pre_process_async(
|
||||
self,
|
||||
prompt: IOProcessorInput,
|
||||
request_id: str | None = None,
|
||||
**kwargs,
|
||||
) -> PromptType | Sequence[PromptType]:
|
||||
return self.pre_process(prompt, request_id, **kwargs)
|
||||
|
||||
@abstractmethod
|
||||
def post_process(
|
||||
self,
|
||||
model_output: Sequence[PoolingRequestOutput],
|
||||
request_id: str | None = None,
|
||||
**kwargs,
|
||||
) -> IOProcessorOutput:
|
||||
raise NotImplementedError
|
||||
|
||||
async def post_process_async(
|
||||
self,
|
||||
model_output: AsyncGenerator[tuple[int, PoolingRequestOutput]],
|
||||
request_id: str | None = None,
|
||||
**kwargs,
|
||||
) -> IOProcessorOutput:
|
||||
# We cannot guarantee outputs are returned in the same order they were
|
||||
# fed to vLLM.
|
||||
# Let's sort them by id before post_processing
|
||||
sorted_output = sorted(
|
||||
[(i, item) async for i, item in model_output], key=lambda output: output[0]
|
||||
)
|
||||
collected_output = [output[1] for output in sorted_output]
|
||||
return self.post_process(collected_output, request_id, **kwargs)
|
||||
|
||||
@abstractmethod
|
||||
def parse_request(self, request: Any) -> IOProcessorInput:
|
||||
raise NotImplementedError
|
||||
|
||||
def validate_or_generate_params(
|
||||
self, params: SamplingParams | PoolingParams | None = None
|
||||
) -> SamplingParams | PoolingParams:
|
||||
return params or PoolingParams()
|
||||
|
||||
@abstractmethod
|
||||
def output_to_response(
|
||||
self, plugin_output: IOProcessorOutput
|
||||
) -> IOProcessorResponse:
|
||||
raise NotImplementedError
|
||||
0
plugins/lora_resolvers/__init__.py
Normal file
0
plugins/lora_resolvers/__init__.py
Normal file
BIN
plugins/lora_resolvers/__pycache__/__init__.cpython-312.pyc
Normal file
BIN
plugins/lora_resolvers/__pycache__/__init__.cpython-312.pyc
Normal file
Binary file not shown.
Binary file not shown.
52
plugins/lora_resolvers/filesystem_resolver.py
Normal file
52
plugins/lora_resolvers/filesystem_resolver.py
Normal file
@@ -0,0 +1,52 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
import json
|
||||
import os
|
||||
|
||||
import vllm.envs as envs
|
||||
from vllm.lora.request import LoRARequest
|
||||
from vllm.lora.resolver import LoRAResolver, LoRAResolverRegistry
|
||||
|
||||
|
||||
class FilesystemResolver(LoRAResolver):
|
||||
def __init__(self, lora_cache_dir: str):
|
||||
self.lora_cache_dir = lora_cache_dir
|
||||
|
||||
async def resolve_lora(
|
||||
self, base_model_name: str, lora_name: str
|
||||
) -> LoRARequest | None:
|
||||
lora_path = os.path.join(self.lora_cache_dir, lora_name)
|
||||
if os.path.exists(lora_path):
|
||||
adapter_config_path = os.path.join(
|
||||
self.lora_cache_dir, lora_name, "adapter_config.json"
|
||||
)
|
||||
if os.path.exists(adapter_config_path):
|
||||
with open(adapter_config_path) as file:
|
||||
adapter_config = json.load(file)
|
||||
if (
|
||||
adapter_config["peft_type"] == "LORA"
|
||||
and adapter_config["base_model_name_or_path"] == base_model_name
|
||||
):
|
||||
lora_request = LoRARequest(
|
||||
lora_name=lora_name,
|
||||
lora_int_id=abs(hash(lora_name)),
|
||||
lora_path=lora_path,
|
||||
)
|
||||
return lora_request
|
||||
return None
|
||||
|
||||
|
||||
def register_filesystem_resolver():
|
||||
"""Register the filesystem LoRA Resolver with vLLM"""
|
||||
|
||||
lora_cache_dir = envs.VLLM_LORA_RESOLVER_CACHE_DIR
|
||||
if lora_cache_dir:
|
||||
if not os.path.exists(lora_cache_dir) or not os.path.isdir(lora_cache_dir):
|
||||
raise ValueError(
|
||||
"VLLM_LORA_RESOLVER_CACHE_DIR must be set to a valid directory \
|
||||
for Filesystem Resolver plugin to function"
|
||||
)
|
||||
fs_resolver = FilesystemResolver(lora_cache_dir)
|
||||
LoRAResolverRegistry.register_resolver("Filesystem Resolver", fs_resolver)
|
||||
|
||||
return
|
||||
Reference in New Issue
Block a user