From 63561d67636bdd1ba287f01ddd8eb5ec00e5ffeb Mon Sep 17 00:00:00 2001 From: Yizhou <136800916+yiz-liu@users.noreply.github.com> Date: Wed, 19 Nov 2025 09:36:37 +0800 Subject: [PATCH] [Fix] Sorts aclgraph batch sizes in ascending order (#4230) ### What this PR does / why we need it? Sorts aclgraph batch sizes in ascending order, corresponding to vLLM [#26016](https://github.com/vllm-project/vllm/pull/26016) Ensures batch sizes for aclgraph are sorted ascending when aclgraph mode is enabled, improving consistency and compatibility with later logic that may depend on order. ### Does this PR introduce _any_ user-facing change? None. ### How was this patch tested? Waiting for #3886 - vLLM version: v0.11.0 - vLLM main: https://github.com/vllm-project/vllm/commit/2918c1b49c88c29783c86f78d2c4221cb9622379 Signed-off-by: Yizhou Liu --- vllm_ascend/worker/model_runner_v1.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py index 3c9fc126..20891e50 100644 --- a/vllm_ascend/worker/model_runner_v1.py +++ b/vllm_ascend/worker/model_runner_v1.py @@ -521,8 +521,12 @@ class NPUModelRunner(LoRAModelRunnerMixin): if self.speculative_config else 0) self.use_aclgraph = self._use_aclgraph() - self.aclgraph_batch_sizes = list( - reversed(self.compilation_config.cudagraph_capture_sizes)) + + # self.aclgraph_batch_sizes sorts in ascending order. + if (self.compilation_config.cudagraph_capture_sizes and + self.compilation_config.cudagraph_mode != CUDAGraphMode.NONE): + self.aclgraph_batch_sizes = sorted( + self.compilation_config.cudagraph_capture_sizes) self.uniform_decode_query_len = 1 if not self.speculative_config else \ 1 + self.speculative_config.num_speculative_tokens @@ -4101,7 +4105,8 @@ class NPUModelRunner(LoRAModelRunnerMixin): if aclgraph_mode.mixed_mode() != CUDAGraphMode.NONE: aclgraph_runtime_mode = aclgraph_mode.mixed_mode() - compilation_cases = sorted(self.aclgraph_batch_sizes) + # make sure we capture the largest batch size first + compilation_cases = list(reversed(self.aclgraph_batch_sizes)) try: self._capture_aclgraphs(