[Bugfix] Add func swap_states to fix MLA attention (#1580)

### What this PR does / why we need it? mla attention still using the gpu_input_batch's attr:`swap_states`, which will lead to an error `AttributeError: 'InputBatch' object has no attribute 'swap_states'` This PR fixed the mla input patch error ### How was this patch tested? will be tested by #1136 --------- Signed-off-by: wangli <wangli858794774@gmail.com>
2025-07-02 17:42:53 +08:00
parent 59237ea788
commit 30bf7014d0
3 changed files with 76 additions and 1 deletions
--- a/vllm_ascend/attention/mla_v1.py
+++ b/vllm_ascend/attention/mla_v1.py
@@ -22,10 +22,10 @@ from vllm_ascend.multistream.context import get_multistream_comm_context
 from vllm_ascend.multistream.ms_split import model_input_split_v1_mla_attn
 from vllm_ascend.ops.attention import vanilla_chunked_prefill_mla
 from vllm_ascend.utils import npu_stream_switch, npu_wait_tensor
+from vllm_ascend.worker.npu_input_batch import InputBatch

 if TYPE_CHECKING:
    from vllm.v1.core.sched.output import SchedulerOutput
-    from vllm.v1.worker.gpu_input_batch import InputBatch


@dataclass