diff --git a/vllm-v0.6.2/vllm_mlu/vllm_mlu/model_executor/models/llama4.py b/vllm-v0.6.2/vllm_mlu/vllm_mlu/model_executor/models/llama4.py index 604086f..495b2a4 100644 --- a/vllm-v0.6.2/vllm_mlu/vllm_mlu/model_executor/models/llama4.py +++ b/vllm-v0.6.2/vllm_mlu/vllm_mlu/model_executor/models/llama4.py @@ -9,7 +9,8 @@ from vllm.model_executor.layers.layernorm import RMSNorm from vllm_mlu.model_executor.layers.feed_forward import FeedForward from vllm_mlu.mlu_hijack_utils import MluHijackObject from vllm.distributed import tensor_model_parallel_all_reduce -from vllm.model_executor.model_loader.weight_utils import default_weight_loader +from vllm.model_executor.model_loader.weight_utils import ( + default_weight_loader, maybe_remap_kv_scale_name) from vllm.model_executor.models.llama4 import ( Llama4Attention, Llama4DecoderLayer, Llama4ForCausalLM, Llama4Model, Llama4MoE)