Add support for Qwen3 MoE+GPTQ

2025-11-15 20:14:45 +08:00
parent b296c44ae0
commit 8152e24cb2
35 changed files with 6468 additions and 574 deletions
--- a/vllm/transformers_utils/utils.py
+++ b/vllm/transformers_utils/utils.py
@@ -2,22 +2,32 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project

 import json
+import os
+import struct
 from functools import cache
 from os import PathLike
 from pathlib import Path
-from typing import Optional, Union
+from typing import Any

-from vllm.envs import VLLM_MODEL_REDIRECT_PATH
+import vllm.envs as envs
 from vllm.logger import init_logger

 logger = init_logger(__name__)


 def is_s3(model_or_path: str) -> bool:
-    return model_or_path.lower().startswith('s3://')
+    return model_or_path.lower().startswith("s3://")


-def check_gguf_file(model: Union[str, PathLike]) -> bool:
+def is_gcs(model_or_path: str) -> bool:
+    return model_or_path.lower().startswith("gs://")
+
+
+def is_cloud_storage(model_or_path: str) -> bool:
+    return is_s3(model_or_path) or is_gcs(model_or_path)
+
+
+def check_gguf_file(model: str | PathLike) -> bool:
    """Check if the file is a GGUF model."""
    model = Path(model)
    if not model.is_file():
@@ -37,23 +47,26 @@ def check_gguf_file(model: Union[str, PathLike]) -> bool:

 def modelscope_list_repo_files(
    repo_id: str,
-    revision: Optional[str] = None,
-    token: Union[str, bool, None] = None,
+    revision: str | None = None,
+    token: str | bool | None = None,
 ) -> list[str]:
    """List files in a modelscope repo."""
    from modelscope.hub.api import HubApi
+
    api = HubApi()
    api.login(token)
    # same as huggingface_hub.list_repo_files
    files = [
-        file['Path'] for file in api.get_model_files(
-            model_id=repo_id, revision=revision, recursive=True)
-        if file['Type'] == 'blob'
+        file["Path"]
+        for file in api.get_model_files(
+            model_id=repo_id, revision=revision, recursive=True
+        )
+        if file["Type"] == "blob"
    ]
    return files


-def _maybe_json_dict(path: Union[str, PathLike]) -> dict[str, str]:
+def _maybe_json_dict(path: str | PathLike) -> dict[str, str]:
    with open(path) as f:
        try:
            return json.loads(f.read())
@@ -61,7 +74,7 @@ def _maybe_json_dict(path: Union[str, PathLike]) -> dict[str, str]:
            return dict[str, str]()


-def _maybe_space_split_dict(path: Union[str, PathLike]) -> dict[str, str]:
+def _maybe_space_split_dict(path: str | PathLike) -> dict[str, str]:
    parsed_dict = dict[str, str]()
    with open(path) as f:
        for line in f.readlines():
@@ -82,7 +95,7 @@ def maybe_model_redirect(model: str) -> str:
    :return: maybe redirect to a local folder
    """

-    model_redirect_path = VLLM_MODEL_REDIRECT_PATH
+    model_redirect_path = envs.VLLM_MODEL_REDIRECT_PATH

    if not model_redirect_path:
        return model
@@ -90,10 +103,28 @@ def maybe_model_redirect(model: str) -> str:
    if not Path(model_redirect_path).exists():
        return model

-    redirect_dict = (_maybe_json_dict(model_redirect_path)
-                     or _maybe_space_split_dict(model_redirect_path))
-    if (redirect_model := redirect_dict.get(model)):
+    redirect_dict = _maybe_json_dict(model_redirect_path) or _maybe_space_split_dict(
+        model_redirect_path
+    )
+    if redirect_model := redirect_dict.get(model):
        logger.info("model redirect: [ %s ] -> [ %s ]", model, redirect_model)
        return redirect_model

    return model
+
+
+def parse_safetensors_file_metadata(path: str | PathLike) -> dict[str, Any]:
+    with open(path, "rb") as f:
+        length_of_metadata = struct.unpack("<Q", f.read(8))[0]
+        metadata = json.loads(f.read(length_of_metadata).decode("utf-8"))
+        return metadata
+
+
+def convert_model_repo_to_path(model_repo: str) -> str:
+    """When VLLM_USE_MODELSCOPE is True convert a model
+    repository string to a Path str."""
+    if not envs.VLLM_USE_MODELSCOPE or Path(model_repo).exists():
+        return model_repo
+    from modelscope.utils.file_utils import get_model_cache_root
+
+    return os.path.join(get_model_cache_root(), model_repo)