diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py index 69daf02..731b93f 100644 --- a/vllm_ascend/worker/model_runner_v1.py +++ b/vllm_ascend/worker/model_runner_v1.py @@ -1462,6 +1462,7 @@ class NPUModelRunner(LoRAModelRunnerMixin): slot_mapping[:total_num_scheduled_tokens], non_blocking=True, ) + self.slot_mapping[total_num_scheduled_tokens:].fill_(0) # Make AscendCommonAttentionMetadata common_attn_metadata = AscendCommonAttentionMetadata(