diff --git a/vllm_ascend/attention/mla_v1.py b/vllm_ascend/attention/mla_v1.py index 4044126..c0e175c 100644 --- a/vllm_ascend/attention/mla_v1.py +++ b/vllm_ascend/attention/mla_v1.py @@ -1166,6 +1166,8 @@ class AscendMLAImpl(MLAAttentionImpl): dim=-1, ) q_c = self.q_a_layernorm(q_c) + # allgather need contiguous data + kv_no_split = kv_no_split.contiguous() else: q_c = hidden_states kv_no_split = self.kv_a_proj_with_mqa(hidden_states)[0]