[0.18.0][BugFix] Update capture sizes after rounding operations (#8380)

### What this PR does / why we need it? This PR is partially cherry-picked from #8172. This PR aims to fix mismatched capture sizes after rounding operations when using sp or speculative. The reason is that original `self.cudagraph_capture_sizes` is no longer updated and remains as the initial sizes. Now we use `self.cudagraph_dispatcher.get_capture_descs` to the get up-to-date sizes. ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? by ci Signed-off-by: Zetong Li <slippersss@126.com>
2026-04-17 22:46:16 +08:00
parent 76cc2204bd
commit b72ade9acd
1 changed files with 9 additions and 2 deletions
--- a/vllm_ascend/worker/model_runner_v1.py
+++ b/vllm_ascend/worker/model_runner_v1.py
@@ -3298,12 +3298,19 @@ class NPUModelRunner(GPUModelRunner):
        with update_pass_config(self):
            super()._check_and_update_cudagraph_mode(attention_backends, kv_cache_groups)

+        capture_descs = self.cudagraph_dispatcher.get_capture_descs()
+        capture_sizes = sorted({
+            desc.num_tokens
+            for _, descs in capture_descs
+            for desc in descs
+        })
+
        # NOTE: Since aclgraph_batch_sizes cannot be determined until here,
        # we set the graph params right before initializing the keys.
        if self.use_aclgraph:
-            set_graph_params(self.cudagraph_batch_sizes)
+            set_graph_params(capture_sizes)
            if self.speculative_config:
-                set_draft_graph_params(self.cudagraph_batch_sizes)
+                set_draft_graph_params(capture_sizes)

    def capture_model(self) -> None:
        gpu_model_runner_cls = next((cls for cls in self.__class__.__mro__ if cls.__name__ == "GPUModelRunner"), None)