Add support for Qwen3 MoE+GPTQ

This commit is contained in:
2025-11-15 20:14:45 +08:00
parent b296c44ae0
commit 8152e24cb2
35 changed files with 6468 additions and 574 deletions

View File

@@ -2,22 +2,32 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import json
import os
import struct
from functools import cache
from os import PathLike
from pathlib import Path
from typing import Optional, Union
from typing import Any
from vllm.envs import VLLM_MODEL_REDIRECT_PATH
import vllm.envs as envs
from vllm.logger import init_logger
logger = init_logger(__name__)
def is_s3(model_or_path: str) -> bool:
return model_or_path.lower().startswith('s3://')
return model_or_path.lower().startswith("s3://")
def check_gguf_file(model: Union[str, PathLike]) -> bool:
def is_gcs(model_or_path: str) -> bool:
return model_or_path.lower().startswith("gs://")
def is_cloud_storage(model_or_path: str) -> bool:
return is_s3(model_or_path) or is_gcs(model_or_path)
def check_gguf_file(model: str | PathLike) -> bool:
"""Check if the file is a GGUF model."""
model = Path(model)
if not model.is_file():
@@ -37,23 +47,26 @@ def check_gguf_file(model: Union[str, PathLike]) -> bool:
def modelscope_list_repo_files(
repo_id: str,
revision: Optional[str] = None,
token: Union[str, bool, None] = None,
revision: str | None = None,
token: str | bool | None = None,
) -> list[str]:
"""List files in a modelscope repo."""
from modelscope.hub.api import HubApi
api = HubApi()
api.login(token)
# same as huggingface_hub.list_repo_files
files = [
file['Path'] for file in api.get_model_files(
model_id=repo_id, revision=revision, recursive=True)
if file['Type'] == 'blob'
file["Path"]
for file in api.get_model_files(
model_id=repo_id, revision=revision, recursive=True
)
if file["Type"] == "blob"
]
return files
def _maybe_json_dict(path: Union[str, PathLike]) -> dict[str, str]:
def _maybe_json_dict(path: str | PathLike) -> dict[str, str]:
with open(path) as f:
try:
return json.loads(f.read())
@@ -61,7 +74,7 @@ def _maybe_json_dict(path: Union[str, PathLike]) -> dict[str, str]:
return dict[str, str]()
def _maybe_space_split_dict(path: Union[str, PathLike]) -> dict[str, str]:
def _maybe_space_split_dict(path: str | PathLike) -> dict[str, str]:
parsed_dict = dict[str, str]()
with open(path) as f:
for line in f.readlines():
@@ -82,7 +95,7 @@ def maybe_model_redirect(model: str) -> str:
:return: maybe redirect to a local folder
"""
model_redirect_path = VLLM_MODEL_REDIRECT_PATH
model_redirect_path = envs.VLLM_MODEL_REDIRECT_PATH
if not model_redirect_path:
return model
@@ -90,10 +103,28 @@ def maybe_model_redirect(model: str) -> str:
if not Path(model_redirect_path).exists():
return model
redirect_dict = (_maybe_json_dict(model_redirect_path)
or _maybe_space_split_dict(model_redirect_path))
if (redirect_model := redirect_dict.get(model)):
redirect_dict = _maybe_json_dict(model_redirect_path) or _maybe_space_split_dict(
model_redirect_path
)
if redirect_model := redirect_dict.get(model):
logger.info("model redirect: [ %s ] -> [ %s ]", model, redirect_model)
return redirect_model
return model
def parse_safetensors_file_metadata(path: str | PathLike) -> dict[str, Any]:
with open(path, "rb") as f:
length_of_metadata = struct.unpack("<Q", f.read(8))[0]
metadata = json.loads(f.read(length_of_metadata).decode("utf-8"))
return metadata
def convert_model_repo_to_path(model_repo: str) -> str:
"""When VLLM_USE_MODELSCOPE is True convert a model
repository string to a Path str."""
if not envs.VLLM_USE_MODELSCOPE or Path(model_repo).exists():
return model_repo
from modelscope.utils.file_utils import get_model_cache_root
return os.path.join(get_model_cache_root(), model_repo)