[bugfix]Fix no attribute 'data' when MLAPO is enable (#6601)

### What this PR does / why we need it? This PR fixes an `AttributeError: 'Parameter' object has no attribute 'data'` that occurs when MLAPO is enabled with vLLM v0.15.0. The error is caused by a monkey-patch on `MLAAttention.process_weights_after_loading` which is incompatible with changes in vLLM v0.15.0. This is likely related to PyTorch's deprecation of the `.data` attribute on `torch.nn.Parameter` objects. This change makes the monkey-patch conditional, so it is not applied for vLLM v0.15.0 and newer versions, resolving the crash. - vLLM version: v0.15.0 - vLLM main: d7e17aaacd Signed-off-by: Meihan-chen <jcccx.cmh@gmail.com>
2026-02-10 09:04:32 +08:00
parent 905f0764e0
commit 5b8e47cb68
1 changed files with 8 additions and 7 deletions
--- a/vllm_ascend/ops/mla.py
+++ b/vllm_ascend/ops/mla.py
@@ -126,16 +126,17 @@ class AscendMultiHeadLatentAttention(MultiHeadLatentAttentionWrapper):
            o_proj=mla_modules.o_proj,
        )

-        original_process_weights = self.mla_attn.process_weights_after_loading
+        if not vllm_version_is("v0.15.0"):
+            original_process_weights = self.mla_attn.process_weights_after_loading

-        def wrapped_process_weights(act_dtype: torch.dtype):
-            from vllm_ascend.attention.sfa_v1 import AscendSFAImpl
+            def wrapped_process_weights(act_dtype: torch.dtype):
+                from vllm_ascend.attention.sfa_v1 import AscendSFAImpl

-            if not isinstance(self.mla_attn.impl, AscendSFAImpl):
-                original_process_weights(act_dtype)
-            self.mla_attn.impl.process_weights_after_loading(act_dtype)
+                if not isinstance(self.mla_attn.impl, AscendSFAImpl):
+                    original_process_weights(act_dtype)
+                self.mla_attn.impl.process_weights_after_loading(act_dtype)

-        self.mla_attn.process_weights_after_loading = wrapped_process_weights
+            self.mla_attn.process_weights_after_loading = wrapped_process_weights

        compilation_config = get_current_vllm_config().compilation_config
        if prefix in compilation_config.static_forward_context: