diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py index 51b345a7..47c7c5a6 100644 --- a/vllm_ascend/worker/model_runner_v1.py +++ b/vllm_ascend/worker/model_runner_v1.py @@ -723,8 +723,8 @@ class NPUModelRunner(GPUModelRunner): if self.uses_mrope: # Only relevant for models using M-RoPE (e.g, Qwen2-VL) self._calc_mrope_positions(scheduler_output) - self.mrope_positions.gpu[:, :total_num_scheduled_tokens].copy_( - self.mrope_positions.cpu[:, :total_num_scheduled_tokens], + self.mrope_positions.gpu.copy_( + self.mrope_positions.cpu, non_blocking=True, ) elif self.uses_xdrope_dim > 0: