[Feat] Shared expert dp for deepseek and deepseek_mtp (#3495)

### What this PR does / why we need it? shared expert dp for deepseek and deepseek_mtp, could be combined with sp to improve performance. ### How was this patch tested? - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 --------- Signed-off-by: zhaozx-cn <zhaozx2116@163.com> Co-authored-by: realliujiaxu <realliujiaxu@163.com>
2025-10-17 15:06:37 +08:00
parent d9ee491f70
commit bf87606932
9 changed files with 57 additions and 10 deletions
--- a/vllm_ascend/models/deepseek_mtp.py
+++ b/vllm_ascend/models/deepseek_mtp.py
@@ -88,6 +88,8 @@ class CustomDeepSeekMultiTokenPredictorLayer(DeepSeekMultiTokenPredictorLayer):
        spec_step_index: int = 0,
    ) -> torch.Tensor:
        assert inputs_embeds is not None
+        inputs_embeds = torch.ops.vllm.maybe_all_gather_and_maybe_unpad(
+            inputs_embeds, True)
        # masking inputs at position 0, as not needed by MTP
        inputs_embeds = torch.where((positions == 0).unsqueeze(-1),
                                    torch.zeros_like(inputs_embeds),
@@ -200,4 +202,6 @@ class CustomDeepSeekMTP(DeepSeekMTP):
        hidden_states = self.model(input_ids, positions, kv_caches,
                                   attn_metadata, previous_hidden_states,
                                   inputs_embeds, spec_step_idx)
+        hidden_states = torch.ops.vllm.maybe_all_gather_and_maybe_unpad(
+            hidden_states, True)
        return hidden_states