enginex-mlu370-vllm/vllm-v0.6.2/vllm/model_executor/model_loader/utils.py

"""Utilities for selecting and loading models."""
import contextlib
from typing import Tuple, Type

import torch
from torch import nn

from vllm.config import ModelConfig
from vllm.model_executor.models import ModelRegistry


@contextlib.contextmanager
def set_default_torch_dtype(dtype: torch.dtype):
    """Sets the default torch dtype to the given dtype."""
    old_dtype = torch.get_default_dtype()
    torch.set_default_dtype(dtype)
    yield
    torch.set_default_dtype(old_dtype)


def get_model_architecture(
        model_config: ModelConfig) -> Tuple[Type[nn.Module], str]:
    architectures = getattr(model_config.hf_config, "architectures", None) or []
    # Special handling for quantized Mixtral.
    # FIXME(woosuk): This is a temporary hack.
    mixtral_supported = [
        "fp8", "compressed-tensors", "gptq_marlin", "awq_marlin"
    ]

    if (model_config.quantization is not None
            and model_config.quantization not in mixtral_supported
            and "MixtralForCausalLM" in architectures):
        architectures = ["QuantMixtralForCausalLM"]

    return ModelRegistry.resolve_model_cls(
        architectures,
        model_path=model_config.model,
        revision=model_config.revision,
        trust_remote_code=model_config.trust_remote_code,
        hf_config=model_config.hf_config,
    )


def get_architecture_class_name(model_config: ModelConfig) -> str:
    return get_model_architecture(model_config)[1]
add qwen3 2026-02-04 17:22:39 +08:00			`"""Utilities for selecting and loading models."""`
			`import contextlib`
			`from typing import Tuple, Type`

			`import torch`
			`from torch import nn`

			`from vllm.config import ModelConfig`
			`from vllm.model_executor.models import ModelRegistry`


			`@contextlib.contextmanager`
			`def set_default_torch_dtype(dtype: torch.dtype):`
			`"""Sets the default torch dtype to the given dtype."""`
			`old_dtype = torch.get_default_dtype()`
			`torch.set_default_dtype(dtype)`
			`yield`
			`torch.set_default_dtype(old_dtype)`


			`def get_model_architecture(`
			`model_config: ModelConfig) -> Tuple[Type[nn.Module], str]:`
add deepseekv3 and llama4 2026-02-11 14:39:48 +08:00			`architectures = getattr(model_config.hf_config, "architectures", None) or []`
add qwen3 2026-02-04 17:22:39 +08:00			`# Special handling for quantized Mixtral.`
			`# FIXME(woosuk): This is a temporary hack.`
			`mixtral_supported = [`
			`"fp8", "compressed-tensors", "gptq_marlin", "awq_marlin"`
			`]`

			`if (model_config.quantization is not None`
			`and model_config.quantization not in mixtral_supported`
			`and "MixtralForCausalLM" in architectures):`
			`architectures = ["QuantMixtralForCausalLM"]`

add dynamic register 2026-02-05 15:53:43 +08:00			`return ModelRegistry.resolve_model_cls(`
			`architectures,`
			`model_path=model_config.model,`
			`revision=model_config.revision,`
			`trust_remote_code=model_config.trust_remote_code,`
			`hf_config=model_config.hf_config,`
			`)`
add qwen3 2026-02-04 17:22:39 +08:00

			`def get_architecture_class_name(model_config: ModelConfig) -> str:`
			`return get_model_architecture(model_config)[1]`