Add support for Qwen3 MoE+GPTQ

2025-11-15 20:14:45 +08:00
parent b296c44ae0
commit 8152e24cb2
35 changed files with 6468 additions and 574 deletions
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
--- a/vllm/transformers_utils/config_parser_base.py
+++ b/vllm/transformers_utils/config_parser_base.py
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from abc import ABC, abstractmethod
+from pathlib import Path
+
+from transformers import PretrainedConfig
+
+
+class ConfigParserBase(ABC):
+    @abstractmethod
+    def parse(
+        self,
+        model: str | Path,
+        trust_remote_code: bool,
+        revision: str | None = None,
+        code_revision: str | None = None,
+        **kwargs,
+    ) -> tuple[dict, PretrainedConfig]:
+        raise NotImplementedError
--- a/vllm/transformers_utils/dynamic_module.py
+++ b/vllm/transformers_utils/dynamic_module.py
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import os
+
+from transformers.dynamic_module_utils import get_class_from_dynamic_module
+
+import vllm.envs as envs
+from vllm.logger import init_logger
+
+logger = init_logger(__name__)
+
+
+def try_get_class_from_dynamic_module(
+    class_reference: str,
+    pretrained_model_name_or_path: str,
+    cache_dir: str | os.PathLike | None = None,
+    force_download: bool = False,
+    resume_download: bool | None = None,
+    proxies: dict[str, str] | None = None,
+    token: bool | str | None = None,
+    revision: str | None = None,
+    local_files_only: bool = False,
+    repo_type: str | None = None,
+    code_revision: str | None = None,
+    warn_on_fail: bool = True,
+    **kwargs,
+) -> type | None:
+    """
+    As `transformers.dynamic_module_utils.get_class_from_dynamic_module`,
+    but ignoring any errors.
+    """
+    try:
+        return get_class_from_dynamic_module(
+            class_reference,
+            pretrained_model_name_or_path,
+            cache_dir=cache_dir,
+            force_download=force_download,
+            resume_download=resume_download,
+            proxies=proxies,
+            token=token,
+            revision=revision,
+            local_files_only=local_files_only,
+            repo_type=repo_type,
+            code_revision=code_revision,
+            **kwargs,
+        )
+    except Exception:
+        location = "ModelScope" if envs.VLLM_USE_MODELSCOPE else "HF Hub"
+
+        if warn_on_fail:
+            logger.warning(
+                "Unable to load %s from %s on %s.",
+                class_reference,
+                pretrained_model_name_or_path,
+                location,
+                exc_info=True,
+            )
+
+        return None
--- a/vllm/transformers_utils/utils.py
+++ b/vllm/transformers_utils/utils.py
@@ -2,22 +2,32 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project

 import json
+import os
+import struct
 from functools import cache
 from os import PathLike
 from pathlib import Path
-from typing import Optional, Union
+from typing import Any

-from vllm.envs import VLLM_MODEL_REDIRECT_PATH
+import vllm.envs as envs
 from vllm.logger import init_logger

 logger = init_logger(__name__)


 def is_s3(model_or_path: str) -> bool:
-    return model_or_path.lower().startswith('s3://')
+    return model_or_path.lower().startswith("s3://")


-def check_gguf_file(model: Union[str, PathLike]) -> bool:
+def is_gcs(model_or_path: str) -> bool:
+    return model_or_path.lower().startswith("gs://")
+
+
+def is_cloud_storage(model_or_path: str) -> bool:
+    return is_s3(model_or_path) or is_gcs(model_or_path)
+
+
+def check_gguf_file(model: str | PathLike) -> bool:
    """Check if the file is a GGUF model."""
    model = Path(model)
    if not model.is_file():
@@ -37,23 +47,26 @@ def check_gguf_file(model: Union[str, PathLike]) -> bool:

 def modelscope_list_repo_files(
    repo_id: str,
-    revision: Optional[str] = None,
-    token: Union[str, bool, None] = None,
+    revision: str | None = None,
+    token: str | bool | None = None,
 ) -> list[str]:
    """List files in a modelscope repo."""
    from modelscope.hub.api import HubApi
+
    api = HubApi()
    api.login(token)
    # same as huggingface_hub.list_repo_files
    files = [
-        file['Path'] for file in api.get_model_files(
-            model_id=repo_id, revision=revision, recursive=True)
-        if file['Type'] == 'blob'
+        file["Path"]
+        for file in api.get_model_files(
+            model_id=repo_id, revision=revision, recursive=True
+        )
+        if file["Type"] == "blob"
    ]
    return files


-def _maybe_json_dict(path: Union[str, PathLike]) -> dict[str, str]:
+def _maybe_json_dict(path: str | PathLike) -> dict[str, str]:
    with open(path) as f:
        try:
            return json.loads(f.read())
@@ -61,7 +74,7 @@ def _maybe_json_dict(path: Union[str, PathLike]) -> dict[str, str]:
            return dict[str, str]()


-def _maybe_space_split_dict(path: Union[str, PathLike]) -> dict[str, str]:
+def _maybe_space_split_dict(path: str | PathLike) -> dict[str, str]:
    parsed_dict = dict[str, str]()
    with open(path) as f:
        for line in f.readlines():
@@ -82,7 +95,7 @@ def maybe_model_redirect(model: str) -> str:
    :return: maybe redirect to a local folder
    """

-    model_redirect_path = VLLM_MODEL_REDIRECT_PATH
+    model_redirect_path = envs.VLLM_MODEL_REDIRECT_PATH

    if not model_redirect_path:
        return model
@@ -90,10 +103,28 @@ def maybe_model_redirect(model: str) -> str:
    if not Path(model_redirect_path).exists():
        return model

-    redirect_dict = (_maybe_json_dict(model_redirect_path)
-                     or _maybe_space_split_dict(model_redirect_path))
-    if (redirect_model := redirect_dict.get(model)):
+    redirect_dict = _maybe_json_dict(model_redirect_path) or _maybe_space_split_dict(
+        model_redirect_path
+    )
+    if redirect_model := redirect_dict.get(model):
        logger.info("model redirect: [ %s ] -> [ %s ]", model, redirect_model)
        return redirect_model

    return model
+
+
+def parse_safetensors_file_metadata(path: str | PathLike) -> dict[str, Any]:
+    with open(path, "rb") as f:
+        length_of_metadata = struct.unpack("<Q", f.read(8))[0]
+        metadata = json.loads(f.read(length_of_metadata).decode("utf-8"))
+        return metadata
+
+
+def convert_model_repo_to_path(model_repo: str) -> str:
+    """When VLLM_USE_MODELSCOPE is True convert a model
+    repository string to a Path str."""
+    if not envs.VLLM_USE_MODELSCOPE or Path(model_repo).exists():
+        return model_repo
+    from modelscope.utils.file_utils import get_model_cache_root
+
+    return os.path.join(get_model_cache_root(), model_repo)