From 96ed925486d769b024bd11c3ebd9edb7c86ab009 Mon Sep 17 00:00:00 2001 From: Chranos <826995883@qq.com> Date: Wed, 11 Feb 2026 14:30:01 +0800 Subject: [PATCH] add deepseekv3 and llama4 --- .../vllm/model_executor/models/llama4.py | 27 +++++++++++++++---- .../vllm_mlu/model_executor/models/llama4.py | 9 +++++++ 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/vllm-v0.6.2/vllm/model_executor/models/llama4.py b/vllm-v0.6.2/vllm/model_executor/models/llama4.py index 4ec2076..ef10536 100644 --- a/vllm-v0.6.2/vllm/model_executor/models/llama4.py +++ b/vllm-v0.6.2/vllm/model_executor/models/llama4.py @@ -446,6 +446,12 @@ class Llama4ForCausalLM(nn.Module, SupportsPP): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): super().__init__() config = vllm_config.model_config.hf_config + # Llama4ForConditionalGeneration uses top-level Llama4Config + # which has text_config sub-config. Extract it for text model. + text_config = getattr(config, "text_config", None) + if text_config is not None: + vllm_config.model_config.hf_config = text_config + config = text_config quant_config = vllm_config.quant_config lora_config = vllm_config.lora_config self.config = config @@ -553,8 +559,19 @@ class Llama4ForCausalLM(nn.Module, SupportsPP): if getattr(self.config, "tie_word_embeddings", False) else None), ) - weights = [ - self.permute_qk_weight_for_rotary(name, loaded_weight) - for name, loaded_weight in weights - ] - loader.load_weights(weights) + + def _process_weights(weights): + for name, loaded_weight in weights: + # Strip language_model. prefix for Llama4ForConditionalGeneration + if name.startswith("language_model."): + name = name[len("language_model."):] + # Skip vision encoder weights + elif name.startswith("multi_modal_projector.") or \ + name.startswith("vision_encoder.") or \ + name.startswith("vision_model."): + continue + name, loaded_weight = self.permute_qk_weight_for_rotary( + name, loaded_weight) + yield name, loaded_weight + + loader.load_weights(_process_weights(weights)) diff --git a/vllm-v0.6.2/vllm_mlu/vllm_mlu/model_executor/models/llama4.py b/vllm-v0.6.2/vllm_mlu/vllm_mlu/model_executor/models/llama4.py index eb6bd16..604086f 100644 --- a/vllm-v0.6.2/vllm_mlu/vllm_mlu/model_executor/models/llama4.py +++ b/vllm-v0.6.2/vllm_mlu/vllm_mlu/model_executor/models/llama4.py @@ -389,6 +389,15 @@ def vllm__llama4__Llama4ForCausalLM__load_weights( if "rotary_emb.inv_freq" in name: continue + # Strip language_model. prefix for Llama4ForConditionalGeneration + if name.startswith("language_model."): + name = name[len("language_model."):] + # Skip vision encoder weights + elif (name.startswith("multi_modal_projector.") + or name.startswith("vision_encoder.") + or name.startswith("vision_model.")): + continue + # Permute Q/K weights for rotary embedding name, loaded_weight = self.permute_qk_weight_for_rotary( name, loaded_weight)