[Feat] Shared expert dp for deepseek and deepseek_mtp (#3495)

### What this PR does / why we need it? shared expert dp for deepseek and deepseek_mtp, could be combined with sp to improve performance. ### How was this patch tested? - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 --------- Signed-off-by: zhaozx-cn <zhaozx2116@163.com> Co-authored-by: realliujiaxu <realliujiaxu@163.com>
2025-10-17 15:06:37 +08:00
parent d9ee491f70
commit bf87606932
9 changed files with 57 additions and 10 deletions
--- a/tests/ut/models/test_deepseek_mtp.py
+++ b/tests/ut/models/test_deepseek_mtp.py
@@ -57,6 +57,8 @@ class TestCustomDeepSeekMultiTokenPredictorLayer(PytestBase):
                            'eh_proj',
                            return_value=torch.randn(2, 3, 768))
        mocker.patch("torch.cat", return_value=torch.randn(2, 3, 768))
+        mocker.patch("torch.ops.vllm.maybe_all_gather_and_maybe_unpad",
+                     lambda x, label: x)
        mtp_layer.mtp_block.return_value = (torch.randn(2, 3, 768),
                                            torch.randn(2, 3, 768))

@@ -182,6 +184,8 @@ class TestCustomDeepSeekMTP(PytestBase):
        assert isinstance(mtp, CustomDeepSeekMTP)

    def test_forward(self, mocker: MockerFixture, setup_mtp):
+        mocker.patch("torch.ops.vllm.maybe_all_gather_and_maybe_unpad",
+                     lambda x, label: x)
        input_ids = torch.tensor([[1, 2, 3]])
        positions = torch.tensor([[0, 1, 2]])
        kv_caches = [torch.tensor([[0.1, 0.2, 0.3]])]