add dynamic register
This commit is contained in:
@@ -274,7 +274,13 @@ class ModelConfig:
|
||||
self, limit_mm_per_prompt: Optional[Mapping[str, int]]
|
||||
) -> Optional["MultiModalConfig"]:
|
||||
architectures = getattr(self.hf_config, "architectures", [])
|
||||
if ModelRegistry.is_multimodal_model(architectures):
|
||||
if ModelRegistry.is_multimodal_model(
|
||||
architectures,
|
||||
model_path=self.model,
|
||||
revision=self.revision,
|
||||
trust_remote_code=self.trust_remote_code,
|
||||
hf_config=self.hf_config,
|
||||
):
|
||||
return MultiModalConfig(limit_per_prompt=limit_mm_per_prompt or {})
|
||||
|
||||
if limit_mm_per_prompt:
|
||||
@@ -308,11 +314,23 @@ class ModelConfig:
|
||||
|
||||
def _init_attention_free(self) -> bool:
|
||||
architectures = getattr(self.hf_config, "architectures", [])
|
||||
return ModelRegistry.is_attention_free_model(architectures)
|
||||
return ModelRegistry.is_attention_free_model(
|
||||
architectures,
|
||||
model_path=self.model,
|
||||
revision=self.revision,
|
||||
trust_remote_code=self.trust_remote_code,
|
||||
hf_config=self.hf_config,
|
||||
)
|
||||
|
||||
def _init_has_inner_state(self) -> bool:
|
||||
architectures = getattr(self.hf_config, "architectures", [])
|
||||
return ModelRegistry.model_has_inner_state(architectures)
|
||||
return ModelRegistry.model_has_inner_state(
|
||||
architectures,
|
||||
model_path=self.model,
|
||||
revision=self.revision,
|
||||
trust_remote_code=self.trust_remote_code,
|
||||
hf_config=self.hf_config,
|
||||
)
|
||||
|
||||
def _verify_tokenizer_mode(self) -> None:
|
||||
tokenizer_mode = self.tokenizer_mode.lower()
|
||||
|
||||
@@ -32,7 +32,13 @@ def get_model_architecture(
|
||||
and "MixtralForCausalLM" in architectures):
|
||||
architectures = ["QuantMixtralForCausalLM"]
|
||||
|
||||
return ModelRegistry.resolve_model_cls(architectures)
|
||||
return ModelRegistry.resolve_model_cls(
|
||||
architectures,
|
||||
model_path=model_config.model,
|
||||
revision=model_config.revision,
|
||||
trust_remote_code=model_config.trust_remote_code,
|
||||
hf_config=model_config.hf_config,
|
||||
)
|
||||
|
||||
|
||||
def get_architecture_class_name(model_config: ModelConfig) -> str:
|
||||
|
||||
@@ -16,9 +16,11 @@ from typing import (AbstractSet, Callable, Dict, List, Optional, Tuple, Type,
|
||||
|
||||
import cloudpickle
|
||||
import torch.nn as nn
|
||||
import transformers
|
||||
|
||||
from vllm.logger import init_logger
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.transformers_utils.dynamic_module import try_get_class_from_dynamic_module
|
||||
|
||||
from .interfaces import (has_inner_state, is_attention_free,
|
||||
supports_multimodal, supports_pp)
|
||||
@@ -157,6 +159,11 @@ _SPECULATIVE_DECODING_MODELS = {
|
||||
"MedusaModel": ("medusa", "Medusa"),
|
||||
"MLPSpeculatorPreTrainedModel": ("mlp_speculator", "MLPSpeculator"),
|
||||
}
|
||||
|
||||
# Transformers backend models - for custom models with auto_map
|
||||
_TRANSFORMERS_BACKEND_MODELS = {
|
||||
"TransformersForCausalLM": ("transformers_backend", "TransformersForCausalLM"),
|
||||
}
|
||||
# yapf: enable
|
||||
|
||||
_VLLM_MODELS = {
|
||||
@@ -369,6 +376,62 @@ class _ModelRegistry:
|
||||
|
||||
return _try_inspect_model_cls(model_arch, self.models[model_arch])
|
||||
|
||||
def _try_resolve_transformers(
|
||||
self,
|
||||
architecture: str,
|
||||
model_path: str,
|
||||
revision: Optional[str],
|
||||
trust_remote_code: bool,
|
||||
hf_config: Optional[object] = None,
|
||||
) -> Optional[Type[nn.Module]]:
|
||||
"""
|
||||
Try to resolve a model architecture using the Transformers backend.
|
||||
This allows loading custom models that define their own implementation
|
||||
via the `auto_map` field in config.json.
|
||||
|
||||
Returns the loaded model class if successful, None otherwise.
|
||||
"""
|
||||
# Check if architecture is in transformers
|
||||
model_module = getattr(transformers, architecture, None)
|
||||
|
||||
# Get auto_map from hf_config
|
||||
auto_map: Dict[str, str] = {}
|
||||
if hf_config is not None:
|
||||
auto_map = getattr(hf_config, "auto_map", None) or {}
|
||||
|
||||
if model_module is None and auto_map:
|
||||
# Try to load from auto_map
|
||||
# First, ensure config class is loaded
|
||||
for prefix in ("AutoConfig", "AutoModel"):
|
||||
for name, module in auto_map.items():
|
||||
if name.startswith(prefix):
|
||||
try_get_class_from_dynamic_module(
|
||||
module,
|
||||
model_path,
|
||||
trust_remote_code=trust_remote_code,
|
||||
revision=revision,
|
||||
warn_on_fail=False,
|
||||
)
|
||||
|
||||
# Now try to load the model class
|
||||
for name, module in auto_map.items():
|
||||
if name.startswith("AutoModel"):
|
||||
model_module = try_get_class_from_dynamic_module(
|
||||
module,
|
||||
model_path,
|
||||
trust_remote_code=trust_remote_code,
|
||||
revision=revision,
|
||||
warn_on_fail=True,
|
||||
)
|
||||
if model_module is not None:
|
||||
logger.info(
|
||||
"Loaded custom model class %s from auto_map",
|
||||
model_module.__name__
|
||||
)
|
||||
return model_module
|
||||
|
||||
return model_module
|
||||
|
||||
def _normalize_archs(
|
||||
self,
|
||||
architectures: Union[str, List[str]],
|
||||
@@ -383,6 +446,10 @@ class _ModelRegistry:
|
||||
def inspect_model_cls(
|
||||
self,
|
||||
architectures: Union[str, List[str]],
|
||||
model_path: Optional[str] = None,
|
||||
revision: Optional[str] = None,
|
||||
trust_remote_code: bool = False,
|
||||
hf_config: Optional[object] = None,
|
||||
) -> _ModelInfo:
|
||||
architectures = self._normalize_archs(architectures)
|
||||
|
||||
@@ -391,11 +458,25 @@ class _ModelRegistry:
|
||||
if model_info is not None:
|
||||
return model_info
|
||||
|
||||
# Fallback: try to resolve using transformers backend (auto_map)
|
||||
if model_path and trust_remote_code and hf_config:
|
||||
for arch in architectures:
|
||||
model_cls = self._try_resolve_transformers(
|
||||
arch, model_path, revision, trust_remote_code, hf_config
|
||||
)
|
||||
if model_cls is not None:
|
||||
# Create ModelInfo from the dynamically loaded class
|
||||
return _ModelInfo.from_model_cls(model_cls)
|
||||
|
||||
return self._raise_for_unsupported(architectures)
|
||||
|
||||
def resolve_model_cls(
|
||||
self,
|
||||
architectures: Union[str, List[str]],
|
||||
model_path: Optional[str] = None,
|
||||
revision: Optional[str] = None,
|
||||
trust_remote_code: bool = False,
|
||||
hf_config: Optional[object] = None,
|
||||
) -> Tuple[Type[nn.Module], str]:
|
||||
architectures = self._normalize_archs(architectures)
|
||||
|
||||
@@ -404,39 +485,88 @@ class _ModelRegistry:
|
||||
if model_cls is not None:
|
||||
return (model_cls, arch)
|
||||
|
||||
# Fallback: try to resolve using transformers backend (auto_map)
|
||||
if model_path and trust_remote_code and hf_config:
|
||||
for arch in architectures:
|
||||
model_cls = self._try_resolve_transformers(
|
||||
arch, model_path, revision, trust_remote_code, hf_config
|
||||
)
|
||||
if model_cls is not None:
|
||||
return (model_cls, arch)
|
||||
|
||||
return self._raise_for_unsupported(architectures)
|
||||
|
||||
def is_text_generation_model(
|
||||
self,
|
||||
architectures: Union[str, List[str]],
|
||||
model_path: Optional[str] = None,
|
||||
revision: Optional[str] = None,
|
||||
trust_remote_code: bool = False,
|
||||
hf_config: Optional[object] = None,
|
||||
) -> bool:
|
||||
return self.inspect_model_cls(architectures).is_text_generation_model
|
||||
return self.inspect_model_cls(
|
||||
architectures, model_path, revision, trust_remote_code, hf_config
|
||||
).is_text_generation_model
|
||||
|
||||
def is_embedding_model(
|
||||
self,
|
||||
architectures: Union[str, List[str]],
|
||||
model_path: Optional[str] = None,
|
||||
revision: Optional[str] = None,
|
||||
trust_remote_code: bool = False,
|
||||
hf_config: Optional[object] = None,
|
||||
) -> bool:
|
||||
return self.inspect_model_cls(architectures).is_embedding_model
|
||||
return self.inspect_model_cls(
|
||||
architectures, model_path, revision, trust_remote_code, hf_config
|
||||
).is_embedding_model
|
||||
|
||||
def is_multimodal_model(
|
||||
self,
|
||||
architectures: Union[str, List[str]],
|
||||
model_path: Optional[str] = None,
|
||||
revision: Optional[str] = None,
|
||||
trust_remote_code: bool = False,
|
||||
hf_config: Optional[object] = None,
|
||||
) -> bool:
|
||||
return self.inspect_model_cls(architectures).supports_multimodal
|
||||
return self.inspect_model_cls(
|
||||
architectures, model_path, revision, trust_remote_code, hf_config
|
||||
).supports_multimodal
|
||||
|
||||
def is_pp_supported_model(
|
||||
self,
|
||||
architectures: Union[str, List[str]],
|
||||
model_path: Optional[str] = None,
|
||||
revision: Optional[str] = None,
|
||||
trust_remote_code: bool = False,
|
||||
hf_config: Optional[object] = None,
|
||||
) -> bool:
|
||||
return self.inspect_model_cls(architectures).supports_pp
|
||||
return self.inspect_model_cls(
|
||||
architectures, model_path, revision, trust_remote_code, hf_config
|
||||
).supports_pp
|
||||
|
||||
def model_has_inner_state(self, architectures: Union[str,
|
||||
List[str]]) -> bool:
|
||||
return self.inspect_model_cls(architectures).has_inner_state
|
||||
def model_has_inner_state(
|
||||
self,
|
||||
architectures: Union[str, List[str]],
|
||||
model_path: Optional[str] = None,
|
||||
revision: Optional[str] = None,
|
||||
trust_remote_code: bool = False,
|
||||
hf_config: Optional[object] = None,
|
||||
) -> bool:
|
||||
return self.inspect_model_cls(
|
||||
architectures, model_path, revision, trust_remote_code, hf_config
|
||||
).has_inner_state
|
||||
|
||||
def is_attention_free_model(self, architectures: Union[str,
|
||||
List[str]]) -> bool:
|
||||
return self.inspect_model_cls(architectures).is_attention_free
|
||||
def is_attention_free_model(
|
||||
self,
|
||||
architectures: Union[str, List[str]],
|
||||
model_path: Optional[str] = None,
|
||||
revision: Optional[str] = None,
|
||||
trust_remote_code: bool = False,
|
||||
hf_config: Optional[object] = None,
|
||||
) -> bool:
|
||||
return self.inspect_model_cls(
|
||||
architectures, model_path, revision, trust_remote_code, hf_config
|
||||
).is_attention_free
|
||||
|
||||
|
||||
ModelRegistry = _ModelRegistry({
|
||||
|
||||
76
vllm-v0.6.2/vllm/transformers_utils/dynamic_module.py
Normal file
76
vllm-v0.6.2/vllm/transformers_utils/dynamic_module.py
Normal file
@@ -0,0 +1,76 @@
|
||||
"""
|
||||
Dynamic module loading utilities for custom HuggingFace models.
|
||||
Ported from latest vLLM to support auto_map in model config.
|
||||
"""
|
||||
import os
|
||||
from typing import Dict, Optional, Type, Union
|
||||
|
||||
from transformers.dynamic_module_utils import (
|
||||
get_class_from_dynamic_module,
|
||||
resolve_trust_remote_code,
|
||||
)
|
||||
|
||||
import vllm.envs as envs
|
||||
from vllm.logger import init_logger
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
def try_get_class_from_dynamic_module(
|
||||
class_reference: str,
|
||||
pretrained_model_name_or_path: str,
|
||||
trust_remote_code: bool,
|
||||
cache_dir: Optional[Union[str, os.PathLike]] = None,
|
||||
force_download: bool = False,
|
||||
resume_download: Optional[bool] = None,
|
||||
proxies: Optional[Dict[str, str]] = None,
|
||||
token: Optional[Union[bool, str]] = None,
|
||||
revision: Optional[str] = None,
|
||||
local_files_only: bool = False,
|
||||
repo_type: Optional[str] = None,
|
||||
code_revision: Optional[str] = None,
|
||||
warn_on_fail: bool = True,
|
||||
**kwargs,
|
||||
) -> Optional[Type]:
|
||||
"""
|
||||
As `transformers.dynamic_module_utils.get_class_from_dynamic_module`,
|
||||
but ignoring any errors.
|
||||
|
||||
This allows vLLM to load custom models that define their own
|
||||
model classes via the `auto_map` field in config.json.
|
||||
"""
|
||||
try:
|
||||
resolve_trust_remote_code(
|
||||
trust_remote_code,
|
||||
pretrained_model_name_or_path,
|
||||
has_local_code=False,
|
||||
has_remote_code=True,
|
||||
)
|
||||
|
||||
return get_class_from_dynamic_module(
|
||||
class_reference,
|
||||
pretrained_model_name_or_path,
|
||||
cache_dir=cache_dir,
|
||||
force_download=force_download,
|
||||
resume_download=resume_download,
|
||||
proxies=proxies,
|
||||
token=token,
|
||||
revision=revision,
|
||||
local_files_only=local_files_only,
|
||||
repo_type=repo_type,
|
||||
code_revision=code_revision,
|
||||
**kwargs,
|
||||
)
|
||||
except Exception:
|
||||
location = "ModelScope" if envs.VLLM_USE_MODELSCOPE else "HF Hub"
|
||||
|
||||
if warn_on_fail:
|
||||
logger.warning(
|
||||
"Unable to load %s from %s on %s.",
|
||||
class_reference,
|
||||
pretrained_model_name_or_path,
|
||||
location,
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
return None
|
||||
Reference in New Issue
Block a user