first commit

This commit is contained in:
2026-03-10 13:31:25 +08:00
parent ba974cecfa
commit b62b889355
2604 changed files with 438977 additions and 0 deletions

72
vllm/plugins/__init__.py Normal file
View File

@@ -0,0 +1,72 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import logging
from typing import Any, Callable
import vllm.envs as envs
logger = logging.getLogger(__name__)
DEFAULT_PLUGINS_GROUP = 'vllm.general_plugins'
# make sure one process only loads plugins once
plugins_loaded = False
def load_plugins_by_group(group: str) -> dict[str, Callable[[], Any]]:
import sys
if sys.version_info < (3, 10):
from importlib_metadata import entry_points
else:
from importlib.metadata import entry_points
allowed_plugins = envs.VLLM_PLUGINS
discovered_plugins = entry_points(group=group)
if len(discovered_plugins) == 0:
logger.debug("No plugins for group %s found.", group)
return {}
# Check if the only discovered plugin is the default one
is_default_group = (group == DEFAULT_PLUGINS_GROUP)
# Use INFO for non-default groups and DEBUG for the default group
log_level = logger.debug if is_default_group else logger.info
log_level("Available plugins for group %s:", group)
for plugin in discovered_plugins:
log_level("- %s -> %s", plugin.name, plugin.value)
if allowed_plugins is None:
log_level("All plugins in this group will be loaded. "
"Set `VLLM_PLUGINS` to control which plugins to load.")
plugins = dict[str, Callable[[], Any]]()
for plugin in discovered_plugins:
if allowed_plugins is None or plugin.name in allowed_plugins:
if allowed_plugins is not None:
log_level("Loading plugin %s", plugin.name)
try:
func = plugin.load()
plugins[plugin.name] = func
except Exception:
logger.exception("Failed to load plugin %s", plugin.name)
return plugins
def load_general_plugins():
"""WARNING: plugins can be loaded for multiple times in different
processes. They should be designed in a way that they can be loaded
multiple times without causing issues.
"""
global plugins_loaded
if plugins_loaded:
return
plugins_loaded = True
plugins = load_plugins_by_group(group=DEFAULT_PLUGINS_GROUP)
# general plugins, we only need to execute the loaded functions
for func in plugins.values():
func()

Binary file not shown.

View File

@@ -0,0 +1,68 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from __future__ import annotations
import logging
from typing import Optional
from vllm.config import VllmConfig
from vllm.plugins import load_plugins_by_group
from vllm.plugins.io_processors.interface import IOProcessor
from vllm.utils import resolve_obj_by_qualname
logger = logging.getLogger(__name__)
def get_io_processor(
vllm_config: VllmConfig,
plugin_from_init: Optional[str] = None) -> IOProcessor | None:
# Input.Output processors are loaded as plugins under the
# 'vllm.io_processor_plugins' group. Similar to platform
# plugins, these plugins register a function that returns the class
# name for the processor to install.
if plugin_from_init:
model_plugin = plugin_from_init
else:
# A plugin can be specified via the model config
# Retrieve the model specific plugin if available
# This is using a custom field in the hf_config for the model
hf_config = vllm_config.model_config.hf_config.to_dict()
config_plugin = hf_config.get("io_processor_plugin")
model_plugin = config_plugin
if model_plugin is None:
logger.debug("No IOProcessor plugins requested by the model")
return None
logger.debug("IOProcessor plugin to be loaded %s", model_plugin)
# Load all installed plugin in the group
multimodal_data_processor_plugins = \
load_plugins_by_group('vllm.io_processor_plugins')
loadable_plugins = {}
for name, func in multimodal_data_processor_plugins.items():
try:
assert callable(func)
processor_cls_qualname = func()
if processor_cls_qualname is not None:
loadable_plugins[name] = processor_cls_qualname
except Exception:
logger.warning("Failed to load plugin %s.", name, exc_info=True)
num_available_plugins = len(loadable_plugins.keys())
if num_available_plugins == 0:
raise ValueError("No IOProcessor plugins installed"
f" but one is required ({model_plugin}).")
if model_plugin not in loadable_plugins:
raise ValueError(
f"The model requires the '{model_plugin}' IO Processor plugin "
"but it is not installed. "
f"Available plugins: {list(loadable_plugins.keys())}")
activated_plugin_cls = loadable_plugins[model_plugin]
return resolve_obj_by_qualname(activated_plugin_cls)(vllm_config)

View File

@@ -0,0 +1,67 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from abc import ABC, abstractmethod
from collections.abc import AsyncGenerator, Sequence
from typing import Any, Generic, Optional, TypeVar, Union
from vllm.config import VllmConfig
from vllm.entrypoints.openai.protocol import IOProcessorResponse
from vllm.inputs.data import PromptType
from vllm.outputs import PoolingRequestOutput
IOProcessorInput = TypeVar('IOProcessorInput')
IOProcessorOutput = TypeVar('IOProcessorOutput')
class IOProcessor(ABC, Generic[IOProcessorInput, IOProcessorOutput]):
def __init__(self, vllm_config: VllmConfig):
self.vllm_config = vllm_config
@abstractmethod
def pre_process(
self,
prompt: IOProcessorInput,
request_id: Optional[str] = None,
**kwargs,
) -> Union[PromptType, Sequence[PromptType]]:
raise NotImplementedError
async def pre_process_async(
self,
prompt: IOProcessorInput,
request_id: Optional[str] = None,
**kwargs,
) -> Union[PromptType, Sequence[PromptType]]:
return self.pre_process(prompt, request_id, **kwargs)
@abstractmethod
def post_process(self,
model_output: Sequence[PoolingRequestOutput],
request_id: Optional[str] = None,
**kwargs) -> IOProcessorOutput:
raise NotImplementedError
async def post_process_async(
self,
model_output: AsyncGenerator[tuple[int, PoolingRequestOutput]],
request_id: Optional[str] = None,
**kwargs,
) -> IOProcessorOutput:
# We cannot guarantee outputs are returned in the same order they were
# fed to vLLM.
# Let's sort them by id before post_processing
sorted_output = sorted([(i, item) async for i, item in model_output],
key=lambda output: output[0])
collected_output = [output[1] for output in sorted_output]
return self.post_process(collected_output, request_id, **kwargs)
@abstractmethod
def parse_request(self, request: Any) -> IOProcessorInput:
raise NotImplementedError
@abstractmethod
def output_to_response(
self, plugin_output: IOProcessorOutput) -> IOProcessorResponse:
raise NotImplementedError

View File

@@ -0,0 +1,16 @@
# LoRA Resolver Plugins
This directory contains vLLM general plugins for dynamically discovering and loading LoRA adapters
via the LoRAResolver plugin framework.
Note that `VLLM_ALLOW_RUNTIME_LORA_UPDATING` must be set to true to allow LoRA resolver plugins
to work, and `VLLM_PLUGINS` must be set to include the desired resolver plugins.
## lora_filesystem_resolver
This LoRA Resolver is installed with vLLM by default.
To use, set `VLLM_PLUGIN_LORA_CACHE_DIR` to a local directory. When vLLM receives a request
for a LoRA adapter `foobar` it doesn't currently recognize, it will look in that local directory
for a subdirectory `foobar` containing a LoRA adapter. If such an adapter exists, it will
load that adapter, and then service the request as normal. That adapter will then be available
for future requests as normal.

View File

View File

@@ -0,0 +1,50 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import json
import os
from typing import Optional
import vllm.envs as envs
from vllm.lora.request import LoRARequest
from vllm.lora.resolver import LoRAResolver, LoRAResolverRegistry
class FilesystemResolver(LoRAResolver):
def __init__(self, lora_cache_dir: str):
self.lora_cache_dir = lora_cache_dir
async def resolve_lora(self, base_model_name: str,
lora_name: str) -> Optional[LoRARequest]:
lora_path = os.path.join(self.lora_cache_dir, lora_name)
if os.path.exists(lora_path):
adapter_config_path = os.path.join(self.lora_cache_dir, lora_name,
"adapter_config.json")
if os.path.exists(adapter_config_path):
with open(adapter_config_path) as file:
adapter_config = json.load(file)
if adapter_config["peft_type"] == "LORA" and adapter_config[
"base_model_name_or_path"] == base_model_name:
lora_request = LoRARequest(lora_name=lora_name,
lora_int_id=abs(
hash(lora_name)),
lora_path=lora_path)
return lora_request
return None
def register_filesystem_resolver():
"""Register the filesystem LoRA Resolver with vLLM"""
lora_cache_dir = envs.VLLM_LORA_RESOLVER_CACHE_DIR
if lora_cache_dir:
if not os.path.exists(lora_cache_dir) or not os.path.isdir(
lora_cache_dir):
raise ValueError(
"VLLM_LORA_RESOLVER_CACHE_DIR must be set to a valid directory \
for Filesystem Resolver plugin to function")
fs_resolver = FilesystemResolver(lora_cache_dir)
LoRAResolverRegistry.register_resolver("Filesystem Resolver",
fs_resolver)
return