[Feat] Flash comm allgher ep (#3334)
Support flash comm v1(Sequence Parallelism) for Allgather EP. - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 --------- Signed-off-by: realliujiaxu <realliujiaxu@163.com> Co-authored-by: zhaozx-cn <zhaozx2116@163.com>
This commit is contained in:
@@ -500,9 +500,12 @@ class TestAscendMLAImpl(TestBase):
|
||||
mock_up_proj.assert_called_once()
|
||||
mock_npu_fused_infer_attention_score.assert_called_once()
|
||||
|
||||
@patch("torch.ops.vllm.maybe_all_gather_and_maybe_unpad")
|
||||
@patch("vllm_ascend.attention.mla_v1.maybe_npu_prefetch")
|
||||
def test_mla_preprocess(self, magic_npu_fetch):
|
||||
def test_mla_preprocess(self, magic_npu_fetch,
|
||||
mock_maybe_all_gather_and_maybe_unpad):
|
||||
magic_npu_fetch.return_value = MagicMock()
|
||||
mock_maybe_all_gather_and_maybe_unpad.side_effect = lambda x, label: x
|
||||
batch_size = 4
|
||||
seq_len = 8
|
||||
hidden_size = 1024
|
||||
|
||||
Reference in New Issue
Block a user