From 918ded915577ab3c658e6a8cb7e21ae89e3470f7 Mon Sep 17 00:00:00 2001
From: Mengqing Cao <cmq0113@163.com>
Date: Mon, 20 Oct 2025 15:29:48 +0800
Subject: [PATCH] [BugFix][HybridKV] Update the check logic of reinitializing
 inputbatch (#3540)

### What this PR does / why we need it?
Update the check logic of reinitializing inputbatch, this is a follow-up
pr of #3477. `kernel_block_sizes` is a `list[list[int]]` and the
original logic will always update `InputBatch` when using hybrid blocks,
this pr fixes that

### How was this patch tested?
locally test with qwen3-next
- vLLM version: v0.11.0rc3
- vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0

Signed-off-by: MengqingCao <cmq0113@163.com>
---
 vllm_ascend/worker/model_runner_v1.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py
index ae9c695..a400f90 100644
--- a/vllm_ascend/worker/model_runner_v1.py
+++ b/vllm_ascend/worker/model_runner_v1.py
@@ -3147,7 +3147,7 @@ class NPUModelRunner(LoRAModelRunnerMixin):
 
         if block_sizes != [
                 self.cache_config.block_size
-        ] or kernel_block_sizes != [self.cache_config.block_size]:
+        ] or kernel_block_sizes != [[self.cache_config.block_size]]:
             assert self.cache_config.cpu_offload_gb == 0, (
                 "Cannot re-initialize the input batch when CPU weight "
                 "offloading is enabled. See https://github.com/vllm-project/vllm/pull/18298 "  # noqa: E501