diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py index 5677550b..741f0d2e 100644 --- a/vllm_ascend/worker/model_runner_v1.py +++ b/vllm_ascend/worker/model_runner_v1.py @@ -2810,8 +2810,7 @@ class NPUModelRunner(LoRAModelRunnerMixin): else: # FIXME: Try using `auto_dispatch_capture=True` update_mla_attn_params(self.update_stream, forward_context, - positions.shape[0], - self.speculative_config) + num_tokens, self.speculative_config) else: if self.pcp_size * self.dcp_size > 1: update_attn_dcp_pcp_params(self.update_stream, @@ -2819,7 +2818,7 @@ class NPUModelRunner(LoRAModelRunnerMixin): positions.shape[0]) else: update_attn_params(self.update_stream, forward_context, - positions.shape[0]) + num_tokens) if self.drafter and self.drafter.name == SpecDcodeType.EAGLE3: hidden_states, _ = hidden_states