From 79fbb20b4db5538f33ae1d1fc6f531847a42de8b Mon Sep 17 00:00:00 2001
From: Mengqing Cao <cmq0113@163.com>
Date: Tue, 25 Feb 2025 17:51:09 +0800
Subject: [PATCH] [ModelRunner] remove unused args (follow vllm changes) (#159)

### What this PR does / why we need it?
The arg list of `Attention.forward()` is changed by
https://github.com/vllm-project/vllm/pull/13555.
The unused args `kv_caches` and `attn_metadata` are removed.

### Does this PR introduce _any_ user-facing change?
N/A

### How was this patch tested?
CI passed with existing test.

Signed-off-by: MengqingCao <cmq0113@163.com>
---
 vllm_ascend/model_runner.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/vllm_ascend/model_runner.py b/vllm_ascend/model_runner.py
index d0aa06d..ea77026 100644
--- a/vllm_ascend/model_runner.py
+++ b/vllm_ascend/model_runner.py
@@ -1142,8 +1142,6 @@ class NPUModelRunner(NPUModelRunnerBase[ModelInputForNPUWithSamplingMetadata]):
                 hidden_or_intermediate_states = model_executable(
                     input_ids=model_input.input_tokens,
                     positions=model_input.input_positions,
-                    kv_caches=kv_caches,
-                    attn_metadata=model_input.attn_metadata,
                     intermediate_tensors=intermediate_tensors,
                     **MultiModalKwargs.as_kwargs(multi_modal_kwargs,
                                                  device=self.device),