[Fix] Sorts aclgraph batch sizes in ascending order (#4230)

### What this PR does / why we need it? Sorts aclgraph batch sizes in ascending order, corresponding to vLLM [#26016](https://github.com/vllm-project/vllm/pull/26016) Ensures batch sizes for aclgraph are sorted ascending when aclgraph mode is enabled, improving consistency and compatibility with later logic that may depend on order. ### Does this PR introduce _any_ user-facing change? None. ### How was this patch tested? Waiting for #3886 - vLLM version: v0.11.0 - vLLM main: 2918c1b49c Signed-off-by: Yizhou Liu <liu_yizhou@outlook.com>
2025-11-19 09:36:37 +08:00
parent e98543267a
commit 63561d6763
1 changed files with 8 additions and 3 deletions
--- a/vllm_ascend/worker/model_runner_v1.py
+++ b/vllm_ascend/worker/model_runner_v1.py
@@ -521,8 +521,12 @@ class NPUModelRunner(LoRAModelRunnerMixin):
            if self.speculative_config else 0)

        self.use_aclgraph = self._use_aclgraph()
-        self.aclgraph_batch_sizes = list(
-            reversed(self.compilation_config.cudagraph_capture_sizes))
+
+        # self.aclgraph_batch_sizes sorts in ascending order.
+        if (self.compilation_config.cudagraph_capture_sizes and
+                self.compilation_config.cudagraph_mode != CUDAGraphMode.NONE):
+            self.aclgraph_batch_sizes = sorted(
+                self.compilation_config.cudagraph_capture_sizes)

        self.uniform_decode_query_len = 1 if not self.speculative_config else \
            1 + self.speculative_config.num_speculative_tokens
@@ -4101,7 +4105,8 @@ class NPUModelRunner(LoRAModelRunnerMixin):
            if aclgraph_mode.mixed_mode() != CUDAGraphMode.NONE:
                aclgraph_runtime_mode = aclgraph_mode.mixed_mode()

-                compilation_cases = sorted(self.aclgraph_batch_sizes)
+                # make sure we capture the largest batch size first
+                compilation_cases = list(reversed(self.aclgraph_batch_sizes))

                try:
                    self._capture_aclgraphs(