From 5b8e47cb688660ff097ce9f9e20b3714eb0cb420 Mon Sep 17 00:00:00 2001 From: meihanc Date: Tue, 10 Feb 2026 09:04:32 +0800 Subject: [PATCH] [bugfix]Fix no attribute 'data' when MLAPO is enable (#6601) ### What this PR does / why we need it? This PR fixes an `AttributeError: 'Parameter' object has no attribute 'data'` that occurs when MLAPO is enabled with vLLM v0.15.0. The error is caused by a monkey-patch on `MLAAttention.process_weights_after_loading` which is incompatible with changes in vLLM v0.15.0. This is likely related to PyTorch's deprecation of the `.data` attribute on `torch.nn.Parameter` objects. This change makes the monkey-patch conditional, so it is not applied for vLLM v0.15.0 and newer versions, resolving the crash. - vLLM version: v0.15.0 - vLLM main: https://github.com/vllm-project/vllm/commit/d7e17aaacd5ed1b4b4be6bcfef3a1b7cbc84fc9a Signed-off-by: Meihan-chen --- vllm_ascend/ops/mla.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/vllm_ascend/ops/mla.py b/vllm_ascend/ops/mla.py index 64d5d36a..6f02cecd 100644 --- a/vllm_ascend/ops/mla.py +++ b/vllm_ascend/ops/mla.py @@ -126,16 +126,17 @@ class AscendMultiHeadLatentAttention(MultiHeadLatentAttentionWrapper): o_proj=mla_modules.o_proj, ) - original_process_weights = self.mla_attn.process_weights_after_loading + if not vllm_version_is("v0.15.0"): + original_process_weights = self.mla_attn.process_weights_after_loading - def wrapped_process_weights(act_dtype: torch.dtype): - from vllm_ascend.attention.sfa_v1 import AscendSFAImpl + def wrapped_process_weights(act_dtype: torch.dtype): + from vllm_ascend.attention.sfa_v1 import AscendSFAImpl - if not isinstance(self.mla_attn.impl, AscendSFAImpl): - original_process_weights(act_dtype) - self.mla_attn.impl.process_weights_after_loading(act_dtype) + if not isinstance(self.mla_attn.impl, AscendSFAImpl): + original_process_weights(act_dtype) + self.mla_attn.impl.process_weights_after_loading(act_dtype) - self.mla_attn.process_weights_after_loading = wrapped_process_weights + self.mla_attn.process_weights_after_loading = wrapped_process_weights compilation_config = get_current_vllm_config().compilation_config if prefix in compilation_config.static_forward_context: