diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py index 0083a407..f0bf0ce4 100644 --- a/vllm_ascend/worker/model_runner_v1.py +++ b/vllm_ascend/worker/model_runner_v1.py @@ -397,7 +397,7 @@ class NPUModelRunner(GPUModelRunner): def _use_aclgraph(self) -> bool: return self.compilation_config.cudagraph_mode != CUDAGraphMode.NONE and self.compilation_config.mode == CompilationMode.VLLM_COMPILE and not self.model_config.enforce_eager - def _skip_all_reduce_acorss_dp_group(self) -> bool: + def _skip_all_reduce_across_dp_group(self) -> bool: """ Decide whether to skip the all-reduce across the data-parallel (DP) group. @@ -446,7 +446,7 @@ class NPUModelRunner(GPUModelRunner): if self.dp_size == 1: return num_tokens, None, with_prefill - if self._skip_all_reduce_acorss_dp_group(): + if self._skip_all_reduce_across_dp_group(): num_tokens_after_padding = torch.tensor([num_tokens] * self.dp_size, device="cpu",