[Bugfix] Fix the input constraints checks for the mlapo and bmm_transpose operators (#5764)

### What this PR does / why we need it? This PR fix the input constraints checks for the mlapo and bmm_transpose operators. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? CI passed with new added/existing test. - vLLM version: v0.13.0 - vLLM main: 2f4e6548ef ### Perf 64K/3K，1P1D，bs=32 before this pr: TPOT 29ms, TTFT 47s，TPS 606 token/s after this pr: TPOT 29ms, TTFT 48s，TPS 636 token/s Signed-off-by: rjg-lyh <1318825571@qq.com>
2026-01-16 17:52:48 +08:00
parent 4f446aec4c
commit 3af91e5ac4
3 changed files with 28 additions and 37 deletions
--- a/tests/ut/attention/test_sfa_v1.py
+++ b/tests/ut/attention/test_sfa_v1.py
@@ -36,7 +36,6 @@ class TestAscendSFABackend(TestBase):
 class TestAscendSFAMetadata(TestBase):

    def test_ascend_sfa_metadata_default(self):
-        has_prefill = True
        num_actual_tokens = 100
        slot_mapping = torch.randn(100, 4, 1024)
        seq_lens = torch.tensor([30, 50])
@@ -54,7 +53,6 @@ class TestAscendSFAMetadata(TestBase):
        attn_state = AscendAttentionState.ChunkedPrefill

        metadata = AscendSFAMetadata(
-            has_prefill=has_prefill,
            num_actual_tokens=num_actual_tokens,
            slot_mapping=slot_mapping,
            seq_lens=seq_lens,
@@ -68,7 +66,6 @@ class TestAscendSFAMetadata(TestBase):
            attn_state=attn_state,
        )

-        self.assertEqual(metadata.has_prefill, has_prefill)
        self.assertEqual(metadata.num_actual_tokens, num_actual_tokens)
        self.assertIs(metadata.slot_mapping, slot_mapping)
        self.assertTrue(torch.equal(metadata.seq_lens, seq_lens))