From 7feb74590b63206fa078d28dfdaead74ce412645 Mon Sep 17 00:00:00 2001
From: Angazenn <92204292+Angazenn@users.noreply.github.com>
Date: Fri, 16 Jan 2026 23:29:35 +0800
Subject: [PATCH] Revert "[bugfix]limit graph replay sync (#5761)" (#5965)

### What this PR does / why we need it?
reverts #5761 to fix accuracy issues when using piecewise graph mode.

### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

- vLLM version: v0.13.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/2c24bc6996cb165fce92f780b388a5e39b3f4060

Signed-off-by: Angazenn <supperccell@163.com>
---
 vllm_ascend/compilation/acl_graph.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/vllm_ascend/compilation/acl_graph.py b/vllm_ascend/compilation/acl_graph.py
index c81d2d2f..28eafed5 100644
--- a/vllm_ascend/compilation/acl_graph.py
+++ b/vllm_ascend/compilation/acl_graph.py
@@ -186,13 +186,12 @@ class ACLGraphWrapper:
             )
 
         logger.info_once("Replaying aclgraph")
-        # In async scheduling or multi-threaded (MT) scenarios when graph mode is FULL, it is possible that
+        # In async scheduling or multi-threaded (MT) scenarios, it is possible that
         # the CPU's record event (from update_attn_params) for the iteration i completes
         # before the grph replay of iteration i-1.
         # To ensure proper ordering, we must call synchronize here before replaying,
         # so that update_attn_params only executes after the previous graph replay has fully completed.
-        if self.runtime_mode == CUDAGraphMode.FULL:
-            torch.npu.synchronize()
+        torch.npu.synchronize()
         entry.aclgraph.replay()
         return entry.output