[bugfix]Fix no attribute 'data' when MLAPO is enable (#6601)
### What this PR does / why we need it?
This PR fixes an `AttributeError: 'Parameter' object has no attribute
'data'` that occurs when MLAPO is enabled with vLLM v0.15.0.
The error is caused by a monkey-patch on
`MLAAttention.process_weights_after_loading` which is incompatible with
changes in vLLM v0.15.0. This is likely related to PyTorch's deprecation
of the `.data` attribute on `torch.nn.Parameter` objects.
This change makes the monkey-patch conditional, so it is not applied for
vLLM v0.15.0 and newer versions, resolving the crash.
- vLLM version: v0.15.0
- vLLM main:
d7e17aaacd
Signed-off-by: Meihan-chen <jcccx.cmh@gmail.com>
This commit is contained in:
@@ -126,16 +126,17 @@ class AscendMultiHeadLatentAttention(MultiHeadLatentAttentionWrapper):
|
||||
o_proj=mla_modules.o_proj,
|
||||
)
|
||||
|
||||
original_process_weights = self.mla_attn.process_weights_after_loading
|
||||
if not vllm_version_is("v0.15.0"):
|
||||
original_process_weights = self.mla_attn.process_weights_after_loading
|
||||
|
||||
def wrapped_process_weights(act_dtype: torch.dtype):
|
||||
from vllm_ascend.attention.sfa_v1 import AscendSFAImpl
|
||||
def wrapped_process_weights(act_dtype: torch.dtype):
|
||||
from vllm_ascend.attention.sfa_v1 import AscendSFAImpl
|
||||
|
||||
if not isinstance(self.mla_attn.impl, AscendSFAImpl):
|
||||
original_process_weights(act_dtype)
|
||||
self.mla_attn.impl.process_weights_after_loading(act_dtype)
|
||||
if not isinstance(self.mla_attn.impl, AscendSFAImpl):
|
||||
original_process_weights(act_dtype)
|
||||
self.mla_attn.impl.process_weights_after_loading(act_dtype)
|
||||
|
||||
self.mla_attn.process_weights_after_loading = wrapped_process_weights
|
||||
self.mla_attn.process_weights_after_loading = wrapped_process_weights
|
||||
|
||||
compilation_config = get_current_vllm_config().compilation_config
|
||||
if prefix in compilation_config.static_forward_context:
|
||||
|
||||
Reference in New Issue
Block a user