Add support for Qwen3 MoE+GPTQ
This commit is contained in:
File diff suppressed because it is too large
Load Diff
20
vllm/transformers_utils/config_parser_base.py
Normal file
20
vllm/transformers_utils/config_parser_base.py
Normal file
@@ -0,0 +1,20 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from pathlib import Path
|
||||
|
||||
from transformers import PretrainedConfig
|
||||
|
||||
|
||||
class ConfigParserBase(ABC):
|
||||
@abstractmethod
|
||||
def parse(
|
||||
self,
|
||||
model: str | Path,
|
||||
trust_remote_code: bool,
|
||||
revision: str | None = None,
|
||||
code_revision: str | None = None,
|
||||
**kwargs,
|
||||
) -> tuple[dict, PretrainedConfig]:
|
||||
raise NotImplementedError
|
||||
59
vllm/transformers_utils/dynamic_module.py
Normal file
59
vllm/transformers_utils/dynamic_module.py
Normal file
@@ -0,0 +1,59 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
import os
|
||||
|
||||
from transformers.dynamic_module_utils import get_class_from_dynamic_module
|
||||
|
||||
import vllm.envs as envs
|
||||
from vllm.logger import init_logger
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
def try_get_class_from_dynamic_module(
|
||||
class_reference: str,
|
||||
pretrained_model_name_or_path: str,
|
||||
cache_dir: str | os.PathLike | None = None,
|
||||
force_download: bool = False,
|
||||
resume_download: bool | None = None,
|
||||
proxies: dict[str, str] | None = None,
|
||||
token: bool | str | None = None,
|
||||
revision: str | None = None,
|
||||
local_files_only: bool = False,
|
||||
repo_type: str | None = None,
|
||||
code_revision: str | None = None,
|
||||
warn_on_fail: bool = True,
|
||||
**kwargs,
|
||||
) -> type | None:
|
||||
"""
|
||||
As `transformers.dynamic_module_utils.get_class_from_dynamic_module`,
|
||||
but ignoring any errors.
|
||||
"""
|
||||
try:
|
||||
return get_class_from_dynamic_module(
|
||||
class_reference,
|
||||
pretrained_model_name_or_path,
|
||||
cache_dir=cache_dir,
|
||||
force_download=force_download,
|
||||
resume_download=resume_download,
|
||||
proxies=proxies,
|
||||
token=token,
|
||||
revision=revision,
|
||||
local_files_only=local_files_only,
|
||||
repo_type=repo_type,
|
||||
code_revision=code_revision,
|
||||
**kwargs,
|
||||
)
|
||||
except Exception:
|
||||
location = "ModelScope" if envs.VLLM_USE_MODELSCOPE else "HF Hub"
|
||||
|
||||
if warn_on_fail:
|
||||
logger.warning(
|
||||
"Unable to load %s from %s on %s.",
|
||||
class_reference,
|
||||
pretrained_model_name_or_path,
|
||||
location,
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
return None
|
||||
@@ -2,22 +2,32 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import json
|
||||
import os
|
||||
import struct
|
||||
from functools import cache
|
||||
from os import PathLike
|
||||
from pathlib import Path
|
||||
from typing import Optional, Union
|
||||
from typing import Any
|
||||
|
||||
from vllm.envs import VLLM_MODEL_REDIRECT_PATH
|
||||
import vllm.envs as envs
|
||||
from vllm.logger import init_logger
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
|
||||
def is_s3(model_or_path: str) -> bool:
|
||||
return model_or_path.lower().startswith('s3://')
|
||||
return model_or_path.lower().startswith("s3://")
|
||||
|
||||
|
||||
def check_gguf_file(model: Union[str, PathLike]) -> bool:
|
||||
def is_gcs(model_or_path: str) -> bool:
|
||||
return model_or_path.lower().startswith("gs://")
|
||||
|
||||
|
||||
def is_cloud_storage(model_or_path: str) -> bool:
|
||||
return is_s3(model_or_path) or is_gcs(model_or_path)
|
||||
|
||||
|
||||
def check_gguf_file(model: str | PathLike) -> bool:
|
||||
"""Check if the file is a GGUF model."""
|
||||
model = Path(model)
|
||||
if not model.is_file():
|
||||
@@ -37,23 +47,26 @@ def check_gguf_file(model: Union[str, PathLike]) -> bool:
|
||||
|
||||
def modelscope_list_repo_files(
|
||||
repo_id: str,
|
||||
revision: Optional[str] = None,
|
||||
token: Union[str, bool, None] = None,
|
||||
revision: str | None = None,
|
||||
token: str | bool | None = None,
|
||||
) -> list[str]:
|
||||
"""List files in a modelscope repo."""
|
||||
from modelscope.hub.api import HubApi
|
||||
|
||||
api = HubApi()
|
||||
api.login(token)
|
||||
# same as huggingface_hub.list_repo_files
|
||||
files = [
|
||||
file['Path'] for file in api.get_model_files(
|
||||
model_id=repo_id, revision=revision, recursive=True)
|
||||
if file['Type'] == 'blob'
|
||||
file["Path"]
|
||||
for file in api.get_model_files(
|
||||
model_id=repo_id, revision=revision, recursive=True
|
||||
)
|
||||
if file["Type"] == "blob"
|
||||
]
|
||||
return files
|
||||
|
||||
|
||||
def _maybe_json_dict(path: Union[str, PathLike]) -> dict[str, str]:
|
||||
def _maybe_json_dict(path: str | PathLike) -> dict[str, str]:
|
||||
with open(path) as f:
|
||||
try:
|
||||
return json.loads(f.read())
|
||||
@@ -61,7 +74,7 @@ def _maybe_json_dict(path: Union[str, PathLike]) -> dict[str, str]:
|
||||
return dict[str, str]()
|
||||
|
||||
|
||||
def _maybe_space_split_dict(path: Union[str, PathLike]) -> dict[str, str]:
|
||||
def _maybe_space_split_dict(path: str | PathLike) -> dict[str, str]:
|
||||
parsed_dict = dict[str, str]()
|
||||
with open(path) as f:
|
||||
for line in f.readlines():
|
||||
@@ -82,7 +95,7 @@ def maybe_model_redirect(model: str) -> str:
|
||||
:return: maybe redirect to a local folder
|
||||
"""
|
||||
|
||||
model_redirect_path = VLLM_MODEL_REDIRECT_PATH
|
||||
model_redirect_path = envs.VLLM_MODEL_REDIRECT_PATH
|
||||
|
||||
if not model_redirect_path:
|
||||
return model
|
||||
@@ -90,10 +103,28 @@ def maybe_model_redirect(model: str) -> str:
|
||||
if not Path(model_redirect_path).exists():
|
||||
return model
|
||||
|
||||
redirect_dict = (_maybe_json_dict(model_redirect_path)
|
||||
or _maybe_space_split_dict(model_redirect_path))
|
||||
if (redirect_model := redirect_dict.get(model)):
|
||||
redirect_dict = _maybe_json_dict(model_redirect_path) or _maybe_space_split_dict(
|
||||
model_redirect_path
|
||||
)
|
||||
if redirect_model := redirect_dict.get(model):
|
||||
logger.info("model redirect: [ %s ] -> [ %s ]", model, redirect_model)
|
||||
return redirect_model
|
||||
|
||||
return model
|
||||
|
||||
|
||||
def parse_safetensors_file_metadata(path: str | PathLike) -> dict[str, Any]:
|
||||
with open(path, "rb") as f:
|
||||
length_of_metadata = struct.unpack("<Q", f.read(8))[0]
|
||||
metadata = json.loads(f.read(length_of_metadata).decode("utf-8"))
|
||||
return metadata
|
||||
|
||||
|
||||
def convert_model_repo_to_path(model_repo: str) -> str:
|
||||
"""When VLLM_USE_MODELSCOPE is True convert a model
|
||||
repository string to a Path str."""
|
||||
if not envs.VLLM_USE_MODELSCOPE or Path(model_repo).exists():
|
||||
return model_repo
|
||||
from modelscope.utils.file_utils import get_model_cache_root
|
||||
|
||||
return os.path.join(get_model_cache_root(), model_repo)
|
||||
|
||||
Reference in New Issue
Block a user