From 918ded915577ab3c658e6a8cb7e21ae89e3470f7 Mon Sep 17 00:00:00 2001 From: Mengqing Cao Date: Mon, 20 Oct 2025 15:29:48 +0800 Subject: [PATCH] [BugFix][HybridKV] Update the check logic of reinitializing inputbatch (#3540) ### What this PR does / why we need it? Update the check logic of reinitializing inputbatch, this is a follow-up pr of #3477. `kernel_block_sizes` is a `list[list[int]]` and the original logic will always update `InputBatch` when using hybrid blocks, this pr fixes that ### How was this patch tested? locally test with qwen3-next - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 Signed-off-by: MengqingCao --- vllm_ascend/worker/model_runner_v1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py index ae9c695..a400f90 100644 --- a/vllm_ascend/worker/model_runner_v1.py +++ b/vllm_ascend/worker/model_runner_v1.py @@ -3147,7 +3147,7 @@ class NPUModelRunner(LoRAModelRunnerMixin): if block_sizes != [ self.cache_config.block_size - ] or kernel_block_sizes != [self.cache_config.block_size]: + ] or kernel_block_sizes != [[self.cache_config.block_size]]: assert self.cache_config.cpu_offload_gb == 0, ( "Cannot re-initialize the input batch when CPU weight " "offloading is enabled. See https://github.com/vllm-project/vllm/pull/18298 " # noqa: E501