first commit

2026-03-10 13:31:25 +08:00
parent ba974cecfa
commit b62b889355
2604 changed files with 438977 additions and 0 deletions
--- a/vllm/plugins/init.py
+++ b/vllm/plugins/init.py
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import logging
+from typing import Any, Callable
+
+import vllm.envs as envs
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_PLUGINS_GROUP = 'vllm.general_plugins'
+
+# make sure one process only loads plugins once
+plugins_loaded = False
+
+
+def load_plugins_by_group(group: str) -> dict[str, Callable[[], Any]]:
+    import sys
+    if sys.version_info < (3, 10):
+        from importlib_metadata import entry_points
+    else:
+        from importlib.metadata import entry_points
+
+    allowed_plugins = envs.VLLM_PLUGINS
+
+    discovered_plugins = entry_points(group=group)
+    if len(discovered_plugins) == 0:
+        logger.debug("No plugins for group %s found.", group)
+        return {}
+
+    # Check if the only discovered plugin is the default one
+    is_default_group = (group == DEFAULT_PLUGINS_GROUP)
+    # Use INFO for non-default groups and DEBUG for the default group
+    log_level = logger.debug if is_default_group else logger.info
+
+    log_level("Available plugins for group %s:", group)
+    for plugin in discovered_plugins:
+        log_level("- %s -> %s", plugin.name, plugin.value)
+
+    if allowed_plugins is None:
+        log_level("All plugins in this group will be loaded. "
+                  "Set `VLLM_PLUGINS` to control which plugins to load.")
+
+    plugins = dict[str, Callable[[], Any]]()
+    for plugin in discovered_plugins:
+        if allowed_plugins is None or plugin.name in allowed_plugins:
+            if allowed_plugins is not None:
+                log_level("Loading plugin %s", plugin.name)
+
+            try:
+                func = plugin.load()
+                plugins[plugin.name] = func
+            except Exception:
+                logger.exception("Failed to load plugin %s", plugin.name)
+
+    return plugins
+
+
+def load_general_plugins():
+    """WARNING: plugins can be loaded for multiple times in different
+    processes. They should be designed in a way that they can be loaded
+    multiple times without causing issues.
+    """
+    global plugins_loaded
+    if plugins_loaded:
+        return
+    plugins_loaded = True
+
+    plugins = load_plugins_by_group(group=DEFAULT_PLUGINS_GROUP)
+    # general plugins, we only need to execute the loaded functions
+    for func in plugins.values():
+        func()
--- a/vllm/plugins/pycache/init.cpython-310.pyc
+++ b/vllm/plugins/pycache/init.cpython-310.pyc
--- a/vllm/plugins/io_processors/init.py
+++ b/vllm/plugins/io_processors/init.py
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from __future__ import annotations
+
+import logging
+from typing import Optional
+
+from vllm.config import VllmConfig
+from vllm.plugins import load_plugins_by_group
+from vllm.plugins.io_processors.interface import IOProcessor
+from vllm.utils import resolve_obj_by_qualname
+
+logger = logging.getLogger(__name__)
+
+
+def get_io_processor(
+        vllm_config: VllmConfig,
+        plugin_from_init: Optional[str] = None) -> IOProcessor | None:
+    # Input.Output processors are loaded as plugins under the
+    # 'vllm.io_processor_plugins' group. Similar to platform
+    # plugins, these plugins register a function that returns the class
+    # name for the processor to install.
+
+    if plugin_from_init:
+        model_plugin = plugin_from_init
+    else:
+        # A plugin can be specified via the model config
+        # Retrieve the model specific plugin if available
+        # This is using a custom field in the hf_config for the model
+        hf_config = vllm_config.model_config.hf_config.to_dict()
+        config_plugin = hf_config.get("io_processor_plugin")
+        model_plugin = config_plugin
+
+    if model_plugin is None:
+        logger.debug("No IOProcessor plugins requested by the model")
+        return None
+
+    logger.debug("IOProcessor plugin to be loaded %s", model_plugin)
+
+    # Load all installed plugin in the group
+    multimodal_data_processor_plugins = \
+        load_plugins_by_group('vllm.io_processor_plugins')
+
+    loadable_plugins = {}
+    for name, func in multimodal_data_processor_plugins.items():
+        try:
+            assert callable(func)
+            processor_cls_qualname = func()
+            if processor_cls_qualname is not None:
+                loadable_plugins[name] = processor_cls_qualname
+        except Exception:
+            logger.warning("Failed to load plugin %s.", name, exc_info=True)
+
+    num_available_plugins = len(loadable_plugins.keys())
+    if num_available_plugins == 0:
+        raise ValueError("No IOProcessor plugins installed"
+                         f" but one is required ({model_plugin}).")
+
+    if model_plugin not in loadable_plugins:
+        raise ValueError(
+            f"The model requires the '{model_plugin}' IO Processor plugin "
+            "but it is not installed. "
+            f"Available plugins: {list(loadable_plugins.keys())}")
+
+    activated_plugin_cls = loadable_plugins[model_plugin]
+
+    return resolve_obj_by_qualname(activated_plugin_cls)(vllm_config)
--- a/vllm/plugins/io_processors/pycache/init.cpython-310.pyc
+++ b/vllm/plugins/io_processors/pycache/init.cpython-310.pyc
--- a/vllm/plugins/io_processors/pycache/interface.cpython-310.pyc
+++ b/vllm/plugins/io_processors/pycache/interface.cpython-310.pyc
--- a/vllm/plugins/io_processors/interface.py
+++ b/vllm/plugins/io_processors/interface.py
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from abc import ABC, abstractmethod
+from collections.abc import AsyncGenerator, Sequence
+from typing import Any, Generic, Optional, TypeVar, Union
+
+from vllm.config import VllmConfig
+from vllm.entrypoints.openai.protocol import IOProcessorResponse
+from vllm.inputs.data import PromptType
+from vllm.outputs import PoolingRequestOutput
+
+IOProcessorInput = TypeVar('IOProcessorInput')
+IOProcessorOutput = TypeVar('IOProcessorOutput')
+
+
+class IOProcessor(ABC, Generic[IOProcessorInput, IOProcessorOutput]):
+
+    def __init__(self, vllm_config: VllmConfig):
+        self.vllm_config = vllm_config
+
+    @abstractmethod
+    def pre_process(
+        self,
+        prompt: IOProcessorInput,
+        request_id: Optional[str] = None,
+        **kwargs,
+    ) -> Union[PromptType, Sequence[PromptType]]:
+        raise NotImplementedError
+
+    async def pre_process_async(
+        self,
+        prompt: IOProcessorInput,
+        request_id: Optional[str] = None,
+        **kwargs,
+    ) -> Union[PromptType, Sequence[PromptType]]:
+        return self.pre_process(prompt, request_id, **kwargs)
+
+    @abstractmethod
+    def post_process(self,
+                     model_output: Sequence[PoolingRequestOutput],
+                     request_id: Optional[str] = None,
+                     **kwargs) -> IOProcessorOutput:
+        raise NotImplementedError
+
+    async def post_process_async(
+        self,
+        model_output: AsyncGenerator[tuple[int, PoolingRequestOutput]],
+        request_id: Optional[str] = None,
+        **kwargs,
+    ) -> IOProcessorOutput:
+        # We cannot guarantee outputs are returned in the same order they were
+        # fed to vLLM.
+        # Let's sort them by id before post_processing
+        sorted_output = sorted([(i, item) async for i, item in model_output],
+                               key=lambda output: output[0])
+        collected_output = [output[1] for output in sorted_output]
+        return self.post_process(collected_output, request_id, **kwargs)
+
+    @abstractmethod
+    def parse_request(self, request: Any) -> IOProcessorInput:
+        raise NotImplementedError
+
+    @abstractmethod
+    def output_to_response(
+            self, plugin_output: IOProcessorOutput) -> IOProcessorResponse:
+        raise NotImplementedError
--- a/vllm/plugins/lora_resolvers/README.md
+++ b/vllm/plugins/lora_resolvers/README.md
@@ -0,0 +1,16 @@
+# LoRA Resolver Plugins
+
+This directory contains vLLM general plugins for dynamically discovering and loading LoRA adapters
+via the LoRAResolver plugin framework.
+
+Note that `VLLM_ALLOW_RUNTIME_LORA_UPDATING` must be set to true to allow LoRA resolver plugins
+to work, and `VLLM_PLUGINS` must be set to include the desired resolver plugins.
+
+## lora_filesystem_resolver
+
+This LoRA Resolver is installed with vLLM by default.
+To use, set `VLLM_PLUGIN_LORA_CACHE_DIR` to a local directory. When vLLM receives a request
+for a LoRA adapter `foobar` it doesn't currently recognize, it will look in that local directory
+for a subdirectory `foobar` containing a LoRA adapter. If such an adapter exists, it will
+load that adapter, and then service the request as normal. That adapter will then be available
+for future requests as normal.
--- a/vllm/plugins/lora_resolvers/init.py
+++ b/vllm/plugins/lora_resolvers/init.py
--- a/vllm/plugins/lora_resolvers/pycache/init.cpython-310.pyc
+++ b/vllm/plugins/lora_resolvers/pycache/init.cpython-310.pyc
--- a/vllm/plugins/lora_resolvers/pycache/filesystem_resolver.cpython-310.pyc
+++ b/vllm/plugins/lora_resolvers/pycache/filesystem_resolver.cpython-310.pyc
--- a/vllm/plugins/lora_resolvers/filesystem_resolver.py
+++ b/vllm/plugins/lora_resolvers/filesystem_resolver.py
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import json
+import os
+from typing import Optional
+
+import vllm.envs as envs
+from vllm.lora.request import LoRARequest
+from vllm.lora.resolver import LoRAResolver, LoRAResolverRegistry
+
+
+class FilesystemResolver(LoRAResolver):
+
+    def __init__(self, lora_cache_dir: str):
+        self.lora_cache_dir = lora_cache_dir
+
+    async def resolve_lora(self, base_model_name: str,
+                           lora_name: str) -> Optional[LoRARequest]:
+        lora_path = os.path.join(self.lora_cache_dir, lora_name)
+        if os.path.exists(lora_path):
+            adapter_config_path = os.path.join(self.lora_cache_dir, lora_name,
+                                               "adapter_config.json")
+            if os.path.exists(adapter_config_path):
+                with open(adapter_config_path) as file:
+                    adapter_config = json.load(file)
+                if adapter_config["peft_type"] == "LORA" and adapter_config[
+                        "base_model_name_or_path"] == base_model_name:
+                    lora_request = LoRARequest(lora_name=lora_name,
+                                               lora_int_id=abs(
+                                                   hash(lora_name)),
+                                               lora_path=lora_path)
+                    return lora_request
+        return None
+
+
+def register_filesystem_resolver():
+    """Register the filesystem LoRA Resolver with vLLM"""
+
+    lora_cache_dir = envs.VLLM_LORA_RESOLVER_CACHE_DIR
+    if lora_cache_dir:
+        if not os.path.exists(lora_cache_dir) or not os.path.isdir(
+                lora_cache_dir):
+            raise ValueError(
+                "VLLM_LORA_RESOLVER_CACHE_DIR must be set to a valid directory \
+                for Filesystem Resolver plugin to function")
+        fs_resolver = FilesystemResolver(lora_cache_dir)
+        LoRAResolverRegistry.register_resolver("Filesystem Resolver",
+                                               fs_resolver)
+
+    return