From f2d8493221744c0d8e8b6df2aa7fc7f031d0aaac Mon Sep 17 00:00:00 2001 From: fan2956 Date: Sun, 28 Sep 2025 18:22:08 +0800 Subject: [PATCH] [BugFix] Fix ascend scheduler assert error (#3191) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What this PR does / why we need it? Running multimodal model with ascend scheduler may cause assert error 【assert (request.num_tokens - request.num_computed_tokens) == 1】 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - vLLM version: v0.10.2 - vLLM main: https://github.com/vllm-project/vllm/commit/17b4c6685ce62d5652654784d6771a3d38e4273e --------- Signed-off-by: fan2956 --- vllm_ascend/core/scheduler.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/vllm_ascend/core/scheduler.py b/vllm_ascend/core/scheduler.py index d77465d..f4c8cc7 100644 --- a/vllm_ascend/core/scheduler.py +++ b/vllm_ascend/core/scheduler.py @@ -208,15 +208,16 @@ class AscendScheduler(Scheduler): assert num_new_tokens > 0 blocks = new_computed_blocks.blocks[0] - # Schedule encoder inputs. - if request.has_encoder_inputs: - (encoder_inputs_to_schedule, num_new_tokens, - new_encoder_budget) = self._try_schedule_encoder_inputs( - request, num_computed_tokens, num_new_tokens, - encoder_budget) - if num_new_tokens == 0: - # The request cannot be scheduled. - break + # Schedule encoder inputs. + if request.has_encoder_inputs: + (encoder_inputs_to_schedule, num_new_tokens, + new_encoder_budget) = self._try_schedule_encoder_inputs( + request, num_computed_tokens, num_new_tokens, + encoder_budget) + if num_new_tokens == 0 or len( + encoder_inputs_to_schedule) == 0: + # The request cannot be scheduled. + break watermark = getattr(self.scheduler_config, "watermark", 0.01) if not self._check_watermark_for_prefill(request, num_new_tokens,