[Bugfix]Fix the compatibility issue of may_reinitialize_input_batch (#6290)

### What this PR does / why we need it?
Added a check in the may_reinitialize_input_batch method to verify
whether the backend implements the get_supported_block_size method

### Does this PR introduce _any_ user-facing change?
no user-facing change

### How was this patch tested?
Only a few lines of code within the methods were modified, and the
format check test has been passed.
- vLLM version: v0.14.1
- vLLM main:
dc917cceb8

---------

Signed-off-by: Debuuuuger <huangzr@cmbchina.com>
Signed-off-by: debuger <102402761+huangazazaz@users.noreply.github.com>
Signed-off-by: Debuuuuger <12110718@mail.sustech.edu.cn>
Co-authored-by: Debuuuuger <huangzr@cmbchina.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
debuger
2026-02-02 19:16:26 +08:00
committed by GitHub
parent 7932255c06
commit c1618a0427
5 changed files with 15 additions and 7 deletions

View File

@@ -127,7 +127,7 @@ class AscendAttentionBackend(AttentionBackend):
value_caches[dst_indices] = value_caches[src_indices]
@staticmethod
def get_supported_block_size() -> list[int]:
def get_supported_kernel_block_sizes() -> list[int]:
return [128]
@@ -227,7 +227,7 @@ class AscendAttentionMetadataBuilder(AttentionMetadataBuilder[AscendMetadata]):
self.compilation_config = vllm_config.compilation_config
self.device = device
self.max_num_blocks_per_req = cdiv(
self.model_config.max_model_len, AscendAttentionBackend.get_supported_block_size()[0]
self.model_config.max_model_len, AscendAttentionBackend.get_supported_kernel_block_sizes()[0]
)
self.speculative_config = vllm_config.speculative_config

View File

@@ -89,6 +89,10 @@ class AscendMLABackend(AttentionBackend):
return AscendMlaCPImpl
return AscendMLAImpl
@staticmethod
def get_supported_kernel_block_sizes() -> list[int]:
return [128]
@dataclass
class ChunkedContextMetadata:

View File

@@ -78,6 +78,10 @@ class AscendSFABackend(AttentionBackend):
def get_impl_cls() -> type["AscendSFAImpl"]:
return AscendSFAImpl
@staticmethod
def get_supported_kernel_block_sizes() -> list[int]:
return [128]
@dataclass
class DSACPContext: