diff --git a/tests/ut/attention/test_attention_v1.py b/tests/ut/attention/test_attention_v1.py index e86c1332..2f923d41 100644 --- a/tests/ut/attention/test_attention_v1.py +++ b/tests/ut/attention/test_attention_v1.py @@ -7,8 +7,7 @@ from tests.ut.base import TestBase from vllm_ascend.attention.attention_v1 import (AscendAttentionBackend, AscendAttentionBackendImpl, AscendAttentionMetadataBuilder, - AscendAttentionState, - AscendMetadata) + AscendAttentionState) from vllm_ascend.attention.utils import AscendCommonAttentionMetadata @@ -21,10 +20,6 @@ class TestAscendAttentionBackend(TestBase): self.assertEqual(AscendAttentionBackend.get_impl_cls(), AscendAttentionBackendImpl) - def test_get_metadata_cls(self): - self.assertEqual(AscendAttentionBackend.get_metadata_cls(), - AscendMetadata) - def test_get_builder_cls(self): self.assertEqual(AscendAttentionBackend.get_builder_cls(), AscendAttentionMetadataBuilder) diff --git a/tests/ut/attention/test_mla_v1.py b/tests/ut/attention/test_mla_v1.py index 102b6feb..f3872a15 100644 --- a/tests/ut/attention/test_mla_v1.py +++ b/tests/ut/attention/test_mla_v1.py @@ -18,10 +18,6 @@ class TestAscendMLABackend(TestBase): def test_get_name(self): self.assertEqual(AscendMLABackend.get_name(), "ASCEND_MLA") - def test_get_metadata_cls(self): - self.assertEqual(AscendMLABackend.get_metadata_cls(), - AscendMLAMetadata) - def test_get_builder_cls(self): self.assertEqual(AscendMLABackend.get_builder_cls(), AscendMLAMetadataBuilder) diff --git a/tests/ut/attention/test_sfa_v1.py b/tests/ut/attention/test_sfa_v1.py index 88f39701..3db637c1 100644 --- a/tests/ut/attention/test_sfa_v1.py +++ b/tests/ut/attention/test_sfa_v1.py @@ -15,10 +15,6 @@ class TestAscendSFABackend(TestBase): def test_get_name(self): self.assertEqual(AscendSFABackend.get_name(), "ASCEND_SFA") - def test_get_metadata_cls(self): - self.assertEqual(AscendSFABackend.get_metadata_cls(), - AscendSFAMetadata) - def test_get_builder_cls(self): self.assertEqual(AscendSFABackend.get_builder_cls(), AscendSFAMetadataBuilder) diff --git a/tests/ut/torchair/test_torchair_mla.py b/tests/ut/torchair/test_torchair_mla.py index 1f108b3e..b0904a3c 100644 --- a/tests/ut/torchair/test_torchair_mla.py +++ b/tests/ut/torchair/test_torchair_mla.py @@ -21,10 +21,6 @@ class TestAscendMLATorchairBackend(TestBase): self.assertEqual(AscendMLATorchairBackend.get_name(), "ASCEND_MLA_TORCHAIR") - def test_get_metadata_cls(self): - self.assertEqual(AscendMLATorchairBackend.get_metadata_cls(), - AscendMLATorchairMetadata) - def test_get_builder_cls(self): self.assertEqual(AscendMLATorchairBackend.get_builder_cls(), AscendMLATorchairMetadataBuilder) diff --git a/vllm_ascend/attention/attention_v1.py b/vllm_ascend/attention/attention_v1.py index 32c2dc03..4f316097 100644 --- a/vllm_ascend/attention/attention_v1.py +++ b/vllm_ascend/attention/attention_v1.py @@ -75,10 +75,6 @@ class AscendAttentionBackend(AttentionBackend): def get_impl_cls() -> Type["AscendAttentionBackendImpl"]: return AscendAttentionBackendImpl - @staticmethod - def get_metadata_cls() -> Type["AscendMetadata"]: - return AscendMetadata - @staticmethod def get_builder_cls() -> type["AscendAttentionMetadataBuilder"]: return AscendAttentionMetadataBuilder diff --git a/vllm_ascend/attention/mla_v1.py b/vllm_ascend/attention/mla_v1.py index 47bcc494..5038fc95 100644 --- a/vllm_ascend/attention/mla_v1.py +++ b/vllm_ascend/attention/mla_v1.py @@ -7,9 +7,7 @@ import torch import torch.distributed as dist import torch_npu from torch import nn -from vllm.attention.backends.abstract import (AttentionBackend, - AttentionMetadata, - MLAAttentionImpl) +from vllm.attention.backends.abstract import AttentionBackend, MLAAttentionImpl from vllm.config import VllmConfig, get_current_vllm_config from vllm.distributed import (get_dcp_group, get_decode_context_model_parallel_rank, @@ -69,10 +67,6 @@ class AscendMLABackend(AttentionBackend): def get_name() -> str: return "ASCEND_MLA" - @staticmethod - def get_metadata_cls() -> type["AttentionMetadata"]: - return AscendMLAMetadata - @staticmethod def get_builder_cls(): return AscendMLAMetadataBuilder diff --git a/vllm_ascend/attention/sfa_v1.py b/vllm_ascend/attention/sfa_v1.py index 9747c2d1..874ee392 100644 --- a/vllm_ascend/attention/sfa_v1.py +++ b/vllm_ascend/attention/sfa_v1.py @@ -4,9 +4,7 @@ from typing import TYPE_CHECKING, ClassVar, Optional, Tuple, Type, TypeVar import torch import torch_npu from torch import nn -from vllm.attention.backends.abstract import (AttentionBackend, - AttentionMetadata, - MLAAttentionImpl) +from vllm.attention.backends.abstract import AttentionBackend, MLAAttentionImpl from vllm.config import VllmConfig from vllm.distributed import get_tensor_model_parallel_world_size from vllm.model_executor.layers.linear import (LinearBase, @@ -35,10 +33,6 @@ class AscendSFABackend(AttentionBackend): def get_name() -> str: return "ASCEND_SFA" - @staticmethod - def get_metadata_cls() -> type["AttentionMetadata"]: - return AscendSFAMetadata - @staticmethod def get_builder_cls(): return AscendSFAMetadataBuilder diff --git a/vllm_ascend/torchair/torchair_attention.py b/vllm_ascend/torchair/torchair_attention.py index a524a3bb..c3836200 100644 --- a/vllm_ascend/torchair/torchair_attention.py +++ b/vllm_ascend/torchair/torchair_attention.py @@ -55,10 +55,6 @@ class AscendAttentionTorchairBackend(AscendAttentionBackend): def get_impl_cls() -> Type["AscendAttentionTorchairBackendImpl"]: return AscendAttentionTorchairBackendImpl - @staticmethod - def get_metadata_cls() -> Type["AscendTorchairMetadata"]: - return AscendTorchairMetadata - @staticmethod def get_builder_cls() -> type["AscendAttentionTorchairMetadataBuilder"]: return AscendAttentionTorchairMetadataBuilder diff --git a/vllm_ascend/torchair/torchair_mla.py b/vllm_ascend/torchair/torchair_mla.py index 116b124e..8cbd6603 100644 --- a/vllm_ascend/torchair/torchair_mla.py +++ b/vllm_ascend/torchair/torchair_mla.py @@ -6,7 +6,6 @@ import torch import torch.nn as nn import torch_npu from vllm.attention.backends.abstract import (AttentionBackend, AttentionLayer, - AttentionMetadata, MLAAttentionImpl) from vllm.attention.backends.utils import PAD_SLOT_ID from vllm.config import VllmConfig, get_current_vllm_config @@ -43,10 +42,6 @@ class AscendMLATorchairBackend(AttentionBackend): def get_name() -> str: return "ASCEND_MLA_TORCHAIR" - @staticmethod - def get_metadata_cls() -> type["AttentionMetadata"]: - return AscendMLATorchairMetadata - @staticmethod def get_builder_cls(): return AscendMLATorchairMetadataBuilder diff --git a/vllm_ascend/torchair/torchair_sfa.py b/vllm_ascend/torchair/torchair_sfa.py index 12b8d07a..cfa71209 100644 --- a/vllm_ascend/torchair/torchair_sfa.py +++ b/vllm_ascend/torchair/torchair_sfa.py @@ -6,9 +6,7 @@ import torch import torch.nn as nn import torch.nn.functional as F import torch_npu -from vllm.attention.backends.abstract import (AttentionBackend, - AttentionMetadata, - MLAAttentionImpl) +from vllm.attention.backends.abstract import AttentionBackend, MLAAttentionImpl from vllm.attention.backends.utils import PAD_SLOT_ID from vllm.config import VllmConfig, get_current_vllm_config from vllm.distributed import get_tensor_model_parallel_world_size, get_tp_group @@ -43,10 +41,6 @@ class AscendSFATorchairBackend(AttentionBackend): def get_name() -> str: return "ASCEND_SFA_TORCHAIR" - @staticmethod - def get_metadata_cls() -> type["AttentionMetadata"]: - return AscendSFATorchairMetadata - @staticmethod def get_builder_cls(): return AscendSFATorchairMetadataBuilder