diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py index 5739c0f3..ffc2ee04 100644 --- a/vllm_ascend/worker/model_runner_v1.py +++ b/vllm_ascend/worker/model_runner_v1.py @@ -3079,7 +3079,7 @@ class NPUModelRunner(GPUModelRunner): (2 * self.pcp_size)).astype(np.int32) * (2 * self.pcp_size) num_padded_scheduled_tokens[:num_decode_reqs] = ( tokens[:num_decode_reqs] * self.pcp_size) - self.num_pcp_pads = num_padded_scheduled_tokens - tokens + self.num_pcp_pads = torch.tensor(num_padded_scheduled_tokens - tokens) cu_padded_tokens, pcp_padded_arange = \ self._get_cumsum_and_arange(num_padded_scheduled_tokens) unpad_mask = torch.from_numpy(