[Cleanup] Remove dead code make_attention_mask function (#5818)
### What this PR does / why we need it?
This PR removes the unused `make_attention_mask` function from
`vllm_ascend/worker/v2/attn_utils.py`.
**Why it's dead code:**
- After PR #4870 (attention mask unification refactor), attention mask
generation has been centralized in the `AttentionMaskBuilder` singleton
class
- The mask is now generated directly by metadata builders when needed
(e.g., `AscendAttentionMetadataBuilder`, `AscendMLAMetadataBuilder`)
- The `make_attention_mask` function is no longer called anywhere in the
codebase
- The function's parameters (including `attn_mask` and `spec_attn_mask`)
were also removed from `build_attn_metadata` in the same refactor
**Changes:**
- Remove `make_attention_mask` function (24 lines) from
`vllm_ascend/worker/v2/attn_utils.py`
### Does this PR introduce _any_ user-facing change?
No. This is a code cleanup that removes dead code. No user-facing
behavior changes.
### How was this patch tested?
- Verified that `make_attention_mask` is not called anywhere in the
codebase (via `grep`)
- CI tests pass to ensure no regressions
- The function has been unused since PR #4870 was merged
- vLLM version: v0.13.0
- vLLM main:
2f4e6548ef
Signed-off-by: lico67373 <918688502@qq.com>
Co-authored-by: weijinqian0 <1184188277@qq.com>
This commit is contained in:
@@ -23,7 +23,6 @@ from typing import Any, Tuple
|
||||
import numpy as np
|
||||
import torch
|
||||
from vllm.config import VllmConfig
|
||||
from vllm.config.model import ModelDType
|
||||
from vllm.v1.attention.backends.utils import AttentionMetadataBuilder
|
||||
from vllm.v1.kv_cache_interface import EncoderOnlyAttentionSpec, KVCacheConfig
|
||||
|
||||
@@ -145,26 +144,3 @@ def build_attn_state(
|
||||
else:
|
||||
attn_state = AscendAttentionState.PrefillCacheHit
|
||||
return attn_state
|
||||
|
||||
|
||||
def make_attention_mask(
|
||||
vllm_config: VllmConfig,
|
||||
attn_state: AscendAttentionState,
|
||||
dtype: ModelDType | torch.dtype,
|
||||
device: torch.device,
|
||||
) -> torch.Tensor:
|
||||
"""make attention mask for npu's attention backend."""
|
||||
attn_mask_builder = get_attn_mask_builder(device)
|
||||
# pcp situation.
|
||||
if attn_mask_builder is None:
|
||||
raise ValueError("Attn mask builder is None")
|
||||
# Pooling situation.
|
||||
if vllm_config.model_config.runner_type == "pooling":
|
||||
return attn_mask_builder.get_attn_mask(2048, torch.bool)
|
||||
|
||||
# TODO(Ronald1995) cosidering pcp.
|
||||
if vllm_config.model_config.use_mla:
|
||||
# mla prefill
|
||||
if attn_state != AscendAttentionState.DecodeOnly:
|
||||
return attn_mask_builder.get_mla_mask(dtype)
|
||||
return attn_mask_builder.get_splitfuse_attn_mask()
|
||||
|
||||
Reference in New Issue
Block a user