From 79fbb20b4db5538f33ae1d1fc6f531847a42de8b Mon Sep 17 00:00:00 2001 From: Mengqing Cao Date: Tue, 25 Feb 2025 17:51:09 +0800 Subject: [PATCH] [ModelRunner] remove unused args (follow vllm changes) (#159) ### What this PR does / why we need it? The arg list of `Attention.forward()` is changed by https://github.com/vllm-project/vllm/pull/13555. The unused args `kv_caches` and `attn_metadata` are removed. ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? CI passed with existing test. Signed-off-by: MengqingCao --- vllm_ascend/model_runner.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/vllm_ascend/model_runner.py b/vllm_ascend/model_runner.py index d0aa06d..ea77026 100644 --- a/vllm_ascend/model_runner.py +++ b/vllm_ascend/model_runner.py @@ -1142,8 +1142,6 @@ class NPUModelRunner(NPUModelRunnerBase[ModelInputForNPUWithSamplingMetadata]): hidden_or_intermediate_states = model_executable( input_ids=model_input.input_tokens, positions=model_input.input_positions, - kv_caches=kv_caches, - attn_metadata=model_input.attn_metadata, intermediate_tensors=intermediate_tensors, **MultiModalKwargs.as_kwargs(multi_modal_kwargs, device=self.device),