[Bugfix] Fix the input constraints checks for the mlapo and bmm_transpose operators (#5764)
### What this PR does / why we need it?
This PR fix the input constraints checks for the mlapo and bmm_transpose
operators.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
CI passed with new added/existing test.
- vLLM version: v0.13.0
- vLLM main:
2f4e6548ef
### Perf
64K/3K,1P1D,bs=32
before this pr:
TPOT 29ms, TTFT 47s,TPS 606 token/s
after this pr:
TPOT 29ms, TTFT 48s,TPS 636 token/s
Signed-off-by: rjg-lyh <1318825571@qq.com>
This commit is contained in:
@@ -36,7 +36,6 @@ class TestAscendSFABackend(TestBase):
|
||||
class TestAscendSFAMetadata(TestBase):
|
||||
|
||||
def test_ascend_sfa_metadata_default(self):
|
||||
has_prefill = True
|
||||
num_actual_tokens = 100
|
||||
slot_mapping = torch.randn(100, 4, 1024)
|
||||
seq_lens = torch.tensor([30, 50])
|
||||
@@ -54,7 +53,6 @@ class TestAscendSFAMetadata(TestBase):
|
||||
attn_state = AscendAttentionState.ChunkedPrefill
|
||||
|
||||
metadata = AscendSFAMetadata(
|
||||
has_prefill=has_prefill,
|
||||
num_actual_tokens=num_actual_tokens,
|
||||
slot_mapping=slot_mapping,
|
||||
seq_lens=seq_lens,
|
||||
@@ -68,7 +66,6 @@ class TestAscendSFAMetadata(TestBase):
|
||||
attn_state=attn_state,
|
||||
)
|
||||
|
||||
self.assertEqual(metadata.has_prefill, has_prefill)
|
||||
self.assertEqual(metadata.num_actual_tokens, num_actual_tokens)
|
||||
self.assertIs(metadata.slot_mapping, slot_mapping)
|
||||
self.assertTrue(torch.equal(metadata.seq_lens, seq_lens))
|
||||
|
||||
Reference in New Issue
Block a user