add deepseekv3 and llama4

2026-02-11 15:13:14 +08:00
parent eaeb5169e0
commit 86fd3b5a92
3 changed files with 3 additions and 9 deletions
--- a/vllm-v0.6.2/vllm/model_executor/model_loader/utils.py
+++ b/vllm-v0.6.2/vllm/model_executor/model_loader/utils.py
@@ -21,10 +21,6 @@ def set_default_torch_dtype(dtype: torch.dtype):
 def get_model_architecture(
        model_config: ModelConfig) -> Tuple[Type[nn.Module], str]:
    architectures = getattr(model_config.hf_config, "architectures", None) or []
    print(f"[DEBUG-ARCH] get_model_architecture: "
          f"type(hf_config)={type(model_config.hf_config).__name__}, "
          f"architectures={getattr(model_config.hf_config, 'architectures', 'MISSING')}, "
          f"id(hf_config)={id(model_config.hf_config)}")
    # Special handling for quantized Mixtral.
    # FIXME(woosuk): This is a temporary hack.
    mixtral_supported = [
--- a/vllm-v0.6.2/vllm/model_executor/models/llama4.py
+++ b/vllm-v0.6.2/vllm/model_executor/models/llama4.py
@@ -450,7 +450,10 @@ class Llama4ForCausalLM(nn.Module, SupportsPP):
        # which has text_config sub-config. Extract it for text model.
        text_config = getattr(config, "text_config", None)
        if text_config is not None:
            orig_archs = getattr(config, "architectures", None)
            vllm_config.model_config.hf_config = text_config
            if orig_archs and not getattr(text_config, "architectures", None):
                text_config.architectures = orig_archs
            config = text_config
        quant_config = vllm_config.quant_config
        lora_config = vllm_config.lora_config
--- a/vllm-v0.6.2/vllm/transformers_utils/config.py
+++ b/vllm-v0.6.2/vllm/transformers_utils/config.py
@@ -230,11 +230,6 @@ def get_config(
        config.update({"architectures": [model_type]})
    # Architecture mapping for models without explicit architectures field
    logger.warning("[DEBUG-ARCH] after load: type(config)=%s, "
                   "config.architectures=%s, config.model_type=%s",
                   type(config).__name__,
                   getattr(config, "architectures", "MISSING"),
                   getattr(config, "model_type", "MISSING"))
    if not getattr(config, "architectures", None):
        if config.model_type not in MODEL_MAPPING_NAMES:
            logger.warning(