[Misc] Add attention mask (#1673)

Move attention mark from V0 to common place.
- vLLM version: v0.9.2
- vLLM main:
b942c094e3

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
wangxiyuan
2025-07-09 09:12:03 +08:00
committed by GitHub
parent cc1588be50
commit 392fd7239b
5 changed files with 216 additions and 107 deletions

View File

@@ -74,8 +74,8 @@ class EagleProposer:
mask_len = os.getenv("PAGED_ATTENTION_MASK_LEN", 10000)
self.attn_mask_len = min(self.model_config.max_model_len,
int(mask_len))
self.attn_mask_builder = AttentionMaskBuilder.initialize_from_len(
self.attn_mask_len, self.dtype)
self.attn_mask_builder = AttentionMaskBuilder(self.attn_mask_len,
self.dtype)
def _make_attention_mask(
self,

View File

@@ -325,8 +325,8 @@ class NPUModelRunner(LoRAModelRunnerMixin):
# the size of the pre-constructed mask matrix based on requirements.
mask_len = os.getenv("PAGED_ATTENTION_MASK_LEN", 10000)
attn_mask_len = min(self.model_config.max_model_len, int(mask_len))
self.attn_mask_builder = AttentionMaskBuilder.initialize_from_len(
attn_mask_len, self.dtype)
self.attn_mask_builder = AttentionMaskBuilder(attn_mask_len,
self.dtype)
self.new_kv_cache_bytes = -1
self.torchair_compiled_model = None # type: ignore