Add minimal vLLM 0.16.1 build repo for BI-V150

This commit is contained in:
2026-04-18 10:56:22 +08:00
commit d69657327e
1895 changed files with 615301 additions and 0 deletions

57
vllm/config/model_arch.py Normal file
View File

@@ -0,0 +1,57 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from typing import Any
from pydantic import ConfigDict
from pydantic.dataclasses import dataclass
from vllm.logger import init_logger
logger = init_logger(__name__)
@dataclass(config=ConfigDict(arbitrary_types_allowed=True))
class ModelArchitectureConfig:
"""
Configuration for model architecture that required by vLLM runtime
"""
architectures: list[str] | None
"""List of model architecture class names (e.g., ['LlamaForCausalLM']).
It can be None upon calling `vllm_config.with_hf_config(config.text_config)`"""
model_type: str
"""Model type identifier (e.g., 'llama', 'gpt_oss')."""
text_model_type: str | None
"""Text model type identifier (e.g., 'llama4_text')."""
hidden_size: int
"""Hidden size of the model."""
total_num_hidden_layers: int
"""Number of hidden layers in the model."""
total_num_attention_heads: int
"""Number of attention heads in the model."""
head_size: int
"""Head dimension of the model."""
vocab_size: int
"""Vocabulary size of the model."""
total_num_kv_heads: int
"""Number of key value heads in the model."""
num_experts: int
"""Number of experts in the model."""
quantization_config: dict[str, Any] | None
"""Quantization configuration dictionary containing quantization parameters."""
is_deepseek_mla: bool
"""Whether the model is a DeepSeek MLA model."""
derived_max_model_len_and_key: tuple[float, str | None]
"""Derived maximum model length and key from the hf config."""