diff --git a/vllm_ascend/compilation/acl_graph.py b/vllm_ascend/compilation/acl_graph.py index 060ecf98..579dfda3 100644 --- a/vllm_ascend/compilation/acl_graph.py +++ b/vllm_ascend/compilation/acl_graph.py @@ -202,10 +202,9 @@ class ACLGraphWrapper: # If we do not in main model and in full-graph mode when using merge-eagle-graph, # we do not need to synchronize. # When enable_enpu is on, model_runner orders update vs replay; skip here. - # When FULL + EAGLE draft (merge path), replay does not need this barrier. + # When EAGLE draft (merge path), replay does not need this barrier. is_draft_eagle = _EXTRA_CTX.is_draft_model and self.use_eagle - need_sync = self.runtime_mode == CUDAGraphMode.FULL and not is_draft_eagle - if not self.enable_enpu and need_sync: + if not self.enable_enpu and not is_draft_eagle: torch.npu.current_stream().synchronize() entry.aclgraph.replay() return entry.output