[Misc] Add attention mask (#1673)

Move attention mark from V0 to common place. - vLLM version: v0.9.2 - vLLM main: b942c094e3 Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2025-07-09 09:12:03 +08:00
parent cc1588be50
commit 392fd7239b
5 changed files with 216 additions and 107 deletions
--- a/vllm_ascend/worker/eagle_proposer_v1.py
+++ b/vllm_ascend/worker/eagle_proposer_v1.py
@@ -74,8 +74,8 @@ class EagleProposer:
        mask_len = os.getenv("PAGED_ATTENTION_MASK_LEN", 10000)
        self.attn_mask_len = min(self.model_config.max_model_len,
                                 int(mask_len))
-        self.attn_mask_builder = AttentionMaskBuilder.initialize_from_len(
-            self.attn_mask_len, self.dtype)
+        self.attn_mask_builder = AttentionMaskBuilder(self.attn_mask_len,
+                                                      self.dtype)

    def _make_attention_mask(
        self,
--- a/vllm_ascend/worker/model_runner_v1.py
+++ b/vllm_ascend/worker/model_runner_v1.py
@@ -325,8 +325,8 @@ class NPUModelRunner(LoRAModelRunnerMixin):
        # the size of the pre-constructed mask matrix based on requirements.
        mask_len = os.getenv("PAGED_ATTENTION_MASK_LEN", 10000)
        attn_mask_len = min(self.model_config.max_model_len, int(mask_len))
-        self.attn_mask_builder = AttentionMaskBuilder.initialize_from_len(
-            attn_mask_len, self.dtype)
+        self.attn_mask_builder = AttentionMaskBuilder(attn_mask_len,
+                                                      self.dtype)

        self.new_kv_cache_bytes = -1
        self.torchair_compiled_model = None  # type: ignore