[Bugfix] Fix the input constraints checks for the mlapo and bmm_transpose operators (#5764)

### What this PR does / why we need it?
This PR fix the input constraints checks for the mlapo and bmm_transpose
operators.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
CI passed with new added/existing test.

- vLLM version: v0.13.0
- vLLM main:
2f4e6548ef

### Perf
64K/3K,1P1D,bs=32

before this pr:
TPOT 29ms, TTFT 47s,TPS 606 token/s

after this pr:
TPOT 29ms, TTFT 48s,TPS 636 token/s

Signed-off-by: rjg-lyh <1318825571@qq.com>
This commit is contained in:
rjg-lyh
2026-01-16 17:52:48 +08:00
committed by GitHub
parent 4f446aec4c
commit 3af91e5ac4
3 changed files with 28 additions and 37 deletions

View File

@@ -36,7 +36,6 @@ class TestAscendSFABackend(TestBase):
class TestAscendSFAMetadata(TestBase):
def test_ascend_sfa_metadata_default(self):
has_prefill = True
num_actual_tokens = 100
slot_mapping = torch.randn(100, 4, 1024)
seq_lens = torch.tensor([30, 50])
@@ -54,7 +53,6 @@ class TestAscendSFAMetadata(TestBase):
attn_state = AscendAttentionState.ChunkedPrefill
metadata = AscendSFAMetadata(
has_prefill=has_prefill,
num_actual_tokens=num_actual_tokens,
slot_mapping=slot_mapping,
seq_lens=seq_lens,
@@ -68,7 +66,6 @@ class TestAscendSFAMetadata(TestBase):
attn_state=attn_state,
)
self.assertEqual(metadata.has_prefill, has_prefill)
self.assertEqual(metadata.num_actual_tokens, num_actual_tokens)
self.assertIs(metadata.slot_mapping, slot_mapping)
self.assertTrue(torch.equal(metadata.seq_lens, seq_lens))