From 92f0016e6f1519b1cd0713459d8a93f0d0273d46 Mon Sep 17 00:00:00 2001 From: Chranos <826995883@qq.com> Date: Thu, 5 Feb 2026 15:53:43 +0800 Subject: [PATCH] add dynamic register --- vllm-v0.6.2/vllm/config.py | 24 ++- .../vllm/model_executor/model_loader/utils.py | 8 +- .../vllm/model_executor/models/registry.py | 150 ++++++++++++++++-- .../vllm/transformers_utils/dynamic_module.py | 76 +++++++++ 4 files changed, 244 insertions(+), 14 deletions(-) create mode 100644 vllm-v0.6.2/vllm/transformers_utils/dynamic_module.py diff --git a/vllm-v0.6.2/vllm/config.py b/vllm-v0.6.2/vllm/config.py index b11c78d..33cb7fb 100644 --- a/vllm-v0.6.2/vllm/config.py +++ b/vllm-v0.6.2/vllm/config.py @@ -274,7 +274,13 @@ class ModelConfig: self, limit_mm_per_prompt: Optional[Mapping[str, int]] ) -> Optional["MultiModalConfig"]: architectures = getattr(self.hf_config, "architectures", []) - if ModelRegistry.is_multimodal_model(architectures): + if ModelRegistry.is_multimodal_model( + architectures, + model_path=self.model, + revision=self.revision, + trust_remote_code=self.trust_remote_code, + hf_config=self.hf_config, + ): return MultiModalConfig(limit_per_prompt=limit_mm_per_prompt or {}) if limit_mm_per_prompt: @@ -308,11 +314,23 @@ class ModelConfig: def _init_attention_free(self) -> bool: architectures = getattr(self.hf_config, "architectures", []) - return ModelRegistry.is_attention_free_model(architectures) + return ModelRegistry.is_attention_free_model( + architectures, + model_path=self.model, + revision=self.revision, + trust_remote_code=self.trust_remote_code, + hf_config=self.hf_config, + ) def _init_has_inner_state(self) -> bool: architectures = getattr(self.hf_config, "architectures", []) - return ModelRegistry.model_has_inner_state(architectures) + return ModelRegistry.model_has_inner_state( + architectures, + model_path=self.model, + revision=self.revision, + trust_remote_code=self.trust_remote_code, + hf_config=self.hf_config, + ) def _verify_tokenizer_mode(self) -> None: tokenizer_mode = self.tokenizer_mode.lower() diff --git a/vllm-v0.6.2/vllm/model_executor/model_loader/utils.py b/vllm-v0.6.2/vllm/model_executor/model_loader/utils.py index b95c0b7..506bd29 100644 --- a/vllm-v0.6.2/vllm/model_executor/model_loader/utils.py +++ b/vllm-v0.6.2/vllm/model_executor/model_loader/utils.py @@ -32,7 +32,13 @@ def get_model_architecture( and "MixtralForCausalLM" in architectures): architectures = ["QuantMixtralForCausalLM"] - return ModelRegistry.resolve_model_cls(architectures) + return ModelRegistry.resolve_model_cls( + architectures, + model_path=model_config.model, + revision=model_config.revision, + trust_remote_code=model_config.trust_remote_code, + hf_config=model_config.hf_config, + ) def get_architecture_class_name(model_config: ModelConfig) -> str: diff --git a/vllm-v0.6.2/vllm/model_executor/models/registry.py b/vllm-v0.6.2/vllm/model_executor/models/registry.py index 5a730ea..b61cd0c 100644 --- a/vllm-v0.6.2/vllm/model_executor/models/registry.py +++ b/vllm-v0.6.2/vllm/model_executor/models/registry.py @@ -16,9 +16,11 @@ from typing import (AbstractSet, Callable, Dict, List, Optional, Tuple, Type, import cloudpickle import torch.nn as nn +import transformers from vllm.logger import init_logger from vllm.platforms import current_platform +from vllm.transformers_utils.dynamic_module import try_get_class_from_dynamic_module from .interfaces import (has_inner_state, is_attention_free, supports_multimodal, supports_pp) @@ -157,6 +159,11 @@ _SPECULATIVE_DECODING_MODELS = { "MedusaModel": ("medusa", "Medusa"), "MLPSpeculatorPreTrainedModel": ("mlp_speculator", "MLPSpeculator"), } + +# Transformers backend models - for custom models with auto_map +_TRANSFORMERS_BACKEND_MODELS = { + "TransformersForCausalLM": ("transformers_backend", "TransformersForCausalLM"), +} # yapf: enable _VLLM_MODELS = { @@ -369,6 +376,62 @@ class _ModelRegistry: return _try_inspect_model_cls(model_arch, self.models[model_arch]) + def _try_resolve_transformers( + self, + architecture: str, + model_path: str, + revision: Optional[str], + trust_remote_code: bool, + hf_config: Optional[object] = None, + ) -> Optional[Type[nn.Module]]: + """ + Try to resolve a model architecture using the Transformers backend. + This allows loading custom models that define their own implementation + via the `auto_map` field in config.json. + + Returns the loaded model class if successful, None otherwise. + """ + # Check if architecture is in transformers + model_module = getattr(transformers, architecture, None) + + # Get auto_map from hf_config + auto_map: Dict[str, str] = {} + if hf_config is not None: + auto_map = getattr(hf_config, "auto_map", None) or {} + + if model_module is None and auto_map: + # Try to load from auto_map + # First, ensure config class is loaded + for prefix in ("AutoConfig", "AutoModel"): + for name, module in auto_map.items(): + if name.startswith(prefix): + try_get_class_from_dynamic_module( + module, + model_path, + trust_remote_code=trust_remote_code, + revision=revision, + warn_on_fail=False, + ) + + # Now try to load the model class + for name, module in auto_map.items(): + if name.startswith("AutoModel"): + model_module = try_get_class_from_dynamic_module( + module, + model_path, + trust_remote_code=trust_remote_code, + revision=revision, + warn_on_fail=True, + ) + if model_module is not None: + logger.info( + "Loaded custom model class %s from auto_map", + model_module.__name__ + ) + return model_module + + return model_module + def _normalize_archs( self, architectures: Union[str, List[str]], @@ -383,6 +446,10 @@ class _ModelRegistry: def inspect_model_cls( self, architectures: Union[str, List[str]], + model_path: Optional[str] = None, + revision: Optional[str] = None, + trust_remote_code: bool = False, + hf_config: Optional[object] = None, ) -> _ModelInfo: architectures = self._normalize_archs(architectures) @@ -391,11 +458,25 @@ class _ModelRegistry: if model_info is not None: return model_info + # Fallback: try to resolve using transformers backend (auto_map) + if model_path and trust_remote_code and hf_config: + for arch in architectures: + model_cls = self._try_resolve_transformers( + arch, model_path, revision, trust_remote_code, hf_config + ) + if model_cls is not None: + # Create ModelInfo from the dynamically loaded class + return _ModelInfo.from_model_cls(model_cls) + return self._raise_for_unsupported(architectures) def resolve_model_cls( self, architectures: Union[str, List[str]], + model_path: Optional[str] = None, + revision: Optional[str] = None, + trust_remote_code: bool = False, + hf_config: Optional[object] = None, ) -> Tuple[Type[nn.Module], str]: architectures = self._normalize_archs(architectures) @@ -404,39 +485,88 @@ class _ModelRegistry: if model_cls is not None: return (model_cls, arch) + # Fallback: try to resolve using transformers backend (auto_map) + if model_path and trust_remote_code and hf_config: + for arch in architectures: + model_cls = self._try_resolve_transformers( + arch, model_path, revision, trust_remote_code, hf_config + ) + if model_cls is not None: + return (model_cls, arch) + return self._raise_for_unsupported(architectures) def is_text_generation_model( self, architectures: Union[str, List[str]], + model_path: Optional[str] = None, + revision: Optional[str] = None, + trust_remote_code: bool = False, + hf_config: Optional[object] = None, ) -> bool: - return self.inspect_model_cls(architectures).is_text_generation_model + return self.inspect_model_cls( + architectures, model_path, revision, trust_remote_code, hf_config + ).is_text_generation_model def is_embedding_model( self, architectures: Union[str, List[str]], + model_path: Optional[str] = None, + revision: Optional[str] = None, + trust_remote_code: bool = False, + hf_config: Optional[object] = None, ) -> bool: - return self.inspect_model_cls(architectures).is_embedding_model + return self.inspect_model_cls( + architectures, model_path, revision, trust_remote_code, hf_config + ).is_embedding_model def is_multimodal_model( self, architectures: Union[str, List[str]], + model_path: Optional[str] = None, + revision: Optional[str] = None, + trust_remote_code: bool = False, + hf_config: Optional[object] = None, ) -> bool: - return self.inspect_model_cls(architectures).supports_multimodal + return self.inspect_model_cls( + architectures, model_path, revision, trust_remote_code, hf_config + ).supports_multimodal def is_pp_supported_model( self, architectures: Union[str, List[str]], + model_path: Optional[str] = None, + revision: Optional[str] = None, + trust_remote_code: bool = False, + hf_config: Optional[object] = None, ) -> bool: - return self.inspect_model_cls(architectures).supports_pp + return self.inspect_model_cls( + architectures, model_path, revision, trust_remote_code, hf_config + ).supports_pp - def model_has_inner_state(self, architectures: Union[str, - List[str]]) -> bool: - return self.inspect_model_cls(architectures).has_inner_state + def model_has_inner_state( + self, + architectures: Union[str, List[str]], + model_path: Optional[str] = None, + revision: Optional[str] = None, + trust_remote_code: bool = False, + hf_config: Optional[object] = None, + ) -> bool: + return self.inspect_model_cls( + architectures, model_path, revision, trust_remote_code, hf_config + ).has_inner_state - def is_attention_free_model(self, architectures: Union[str, - List[str]]) -> bool: - return self.inspect_model_cls(architectures).is_attention_free + def is_attention_free_model( + self, + architectures: Union[str, List[str]], + model_path: Optional[str] = None, + revision: Optional[str] = None, + trust_remote_code: bool = False, + hf_config: Optional[object] = None, + ) -> bool: + return self.inspect_model_cls( + architectures, model_path, revision, trust_remote_code, hf_config + ).is_attention_free ModelRegistry = _ModelRegistry({ diff --git a/vllm-v0.6.2/vllm/transformers_utils/dynamic_module.py b/vllm-v0.6.2/vllm/transformers_utils/dynamic_module.py new file mode 100644 index 0000000..a53e0ee --- /dev/null +++ b/vllm-v0.6.2/vllm/transformers_utils/dynamic_module.py @@ -0,0 +1,76 @@ +""" +Dynamic module loading utilities for custom HuggingFace models. +Ported from latest vLLM to support auto_map in model config. +""" +import os +from typing import Dict, Optional, Type, Union + +from transformers.dynamic_module_utils import ( + get_class_from_dynamic_module, + resolve_trust_remote_code, +) + +import vllm.envs as envs +from vllm.logger import init_logger + +logger = init_logger(__name__) + + +def try_get_class_from_dynamic_module( + class_reference: str, + pretrained_model_name_or_path: str, + trust_remote_code: bool, + cache_dir: Optional[Union[str, os.PathLike]] = None, + force_download: bool = False, + resume_download: Optional[bool] = None, + proxies: Optional[Dict[str, str]] = None, + token: Optional[Union[bool, str]] = None, + revision: Optional[str] = None, + local_files_only: bool = False, + repo_type: Optional[str] = None, + code_revision: Optional[str] = None, + warn_on_fail: bool = True, + **kwargs, +) -> Optional[Type]: + """ + As `transformers.dynamic_module_utils.get_class_from_dynamic_module`, + but ignoring any errors. + + This allows vLLM to load custom models that define their own + model classes via the `auto_map` field in config.json. + """ + try: + resolve_trust_remote_code( + trust_remote_code, + pretrained_model_name_or_path, + has_local_code=False, + has_remote_code=True, + ) + + return get_class_from_dynamic_module( + class_reference, + pretrained_model_name_or_path, + cache_dir=cache_dir, + force_download=force_download, + resume_download=resume_download, + proxies=proxies, + token=token, + revision=revision, + local_files_only=local_files_only, + repo_type=repo_type, + code_revision=code_revision, + **kwargs, + ) + except Exception: + location = "ModelScope" if envs.VLLM_USE_MODELSCOPE else "HF Hub" + + if warn_on_fail: + logger.warning( + "Unable to load %s from %s on %s.", + class_reference, + pretrained_model_name_or_path, + location, + exc_info=True, + ) + + return None