From b72ade9acda04f7b5d7310e4cd07dfc448fc2539 Mon Sep 17 00:00:00 2001 From: Zetong Li <48438720+slippersss@users.noreply.github.com> Date: Fri, 17 Apr 2026 22:46:16 +0800 Subject: [PATCH] [0.18.0][BugFix] Update capture sizes after rounding operations (#8380) ### What this PR does / why we need it? This PR is partially cherry-picked from #8172. This PR aims to fix mismatched capture sizes after rounding operations when using sp or speculative. The reason is that original `self.cudagraph_capture_sizes` is no longer updated and remains as the initial sizes. Now we use `self.cudagraph_dispatcher.get_capture_descs` to the get up-to-date sizes. ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? by ci Signed-off-by: Zetong Li --- vllm_ascend/worker/model_runner_v1.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py index 4a6bf06f..206f40df 100644 --- a/vllm_ascend/worker/model_runner_v1.py +++ b/vllm_ascend/worker/model_runner_v1.py @@ -3298,12 +3298,19 @@ class NPUModelRunner(GPUModelRunner): with update_pass_config(self): super()._check_and_update_cudagraph_mode(attention_backends, kv_cache_groups) + capture_descs = self.cudagraph_dispatcher.get_capture_descs() + capture_sizes = sorted({ + desc.num_tokens + for _, descs in capture_descs + for desc in descs + }) + # NOTE: Since aclgraph_batch_sizes cannot be determined until here, # we set the graph params right before initializing the keys. if self.use_aclgraph: - set_graph_params(self.cudagraph_batch_sizes) + set_graph_params(capture_sizes) if self.speculative_config: - set_draft_graph_params(self.cudagraph_batch_sizes) + set_draft_graph_params(capture_sizes) def capture_model(self) -> None: gpu_model_runner_cls = next((cls for cls in self.__class__.__mro__ if cls.__name__ == "GPUModelRunner"), None)