From c735bb09419beb0fa9a186ce06cc9ddf2c3cc50b Mon Sep 17 00:00:00 2001 From: yiz-liu <136800916+yiz-liu@users.noreply.github.com> Date: Mon, 8 Sep 2025 09:55:16 +0800 Subject: [PATCH] [Fix] Ensure metadata sync across DP ranks in eager mode (#2766) ### What this PR does / why we need it? Removes the condition that skips metadata synchronization when `enforce_eager` is enabled. This change is necessary to correctly sync the `with_prefill` and `enable_dbo` flags across all data parallel ranks, which is not required in the base implementation. Forcing the sync operation prevents potential inconsistencies, albeit with a minor performance impact. ### Does this PR introduce _any_ user-facing change? None. ### How was this patch tested? Add a E2E online test case? - vLLM version: v0.10.1.1 - vLLM main: https://github.com/vllm-project/vllm/commit/e599e2c65ee32abcc986733ab0a55becea158bb4 Signed-off-by: Yizhou Liu --- vllm_ascend/worker/model_runner_v1.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py index 9e8b58e..3afed6c 100644 --- a/vllm_ascend/worker/model_runner_v1.py +++ b/vllm_ascend/worker/model_runner_v1.py @@ -581,7 +581,13 @@ class NPUModelRunner(LoRAModelRunnerMixin): def _sync_metadata_across_dp( self, num_tokens: int, with_prefill: bool, enable_dbo: bool ) -> tuple[int, Optional[torch.Tensor], bool, bool]: - if self.dp_size == 1 or self.vllm_config.model_config.enforce_eager: + # TODO: In vLLM, the only thing that needs to be synced is num_tokens, but in + # our case, we still need to sync the other two flags as well. So we need to + # include them in the all_reduce operation, and more over, we CANNOT skip it + # even if we are running in eager mode, which harms performance. + # FIXME: Restore the `or self.vllm_config.model_config.enforce_eager` here + # immediately once the other two flags are no longer needed. + if self.dp_size == 1: return num_tokens, None, with_prefill, enable_dbo # Sync num_tokens, with_prefill, enable_dbo across dp ranks