[Misc] Add attention mask (#1673)
Move attention mark from V0 to common place.
- vLLM version: v0.9.2
- vLLM main:
b942c094e3
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -74,8 +74,8 @@ class EagleProposer:
|
||||
mask_len = os.getenv("PAGED_ATTENTION_MASK_LEN", 10000)
|
||||
self.attn_mask_len = min(self.model_config.max_model_len,
|
||||
int(mask_len))
|
||||
self.attn_mask_builder = AttentionMaskBuilder.initialize_from_len(
|
||||
self.attn_mask_len, self.dtype)
|
||||
self.attn_mask_builder = AttentionMaskBuilder(self.attn_mask_len,
|
||||
self.dtype)
|
||||
|
||||
def _make_attention_mask(
|
||||
self,
|
||||
|
||||
@@ -325,8 +325,8 @@ class NPUModelRunner(LoRAModelRunnerMixin):
|
||||
# the size of the pre-constructed mask matrix based on requirements.
|
||||
mask_len = os.getenv("PAGED_ATTENTION_MASK_LEN", 10000)
|
||||
attn_mask_len = min(self.model_config.max_model_len, int(mask_len))
|
||||
self.attn_mask_builder = AttentionMaskBuilder.initialize_from_len(
|
||||
attn_mask_len, self.dtype)
|
||||
self.attn_mask_builder = AttentionMaskBuilder(attn_mask_len,
|
||||
self.dtype)
|
||||
|
||||
self.new_kv_cache_bytes = -1
|
||||
self.torchair_compiled_model = None # type: ignore
|
||||
|
||||
Reference in New Issue
Block a user