[main] remove dbo code (#3712)

### What this PR does / why we need it? Remove codes of dbo. Currently, vLLM has supported dbo with pr: https://github.com/vllm-project/vllm/pull/23693. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.11.0rc3 - vLLM main: 17c540a993 Signed-off-by: zzzzwwjj <1183291235@qq.com>
2025-10-25 15:53:01 +08:00
parent d9cdc65854
commit e5676fc36e
26 changed files with 69 additions and 1588 deletions
--- a/vllm_ascend/torchair/torchair_model_runner.py
+++ b/vllm_ascend/torchair/torchair_model_runner.py
@@ -110,30 +110,28 @@ class NPUTorchairModelRunner(NPUModelRunner):
        self.mc2_tokens_capacity = num_tokens_per_tp_rank * tp_size

    def _sync_metadata_across_dp(
-            self, num_tokens: int, with_prefill: bool, enable_dbo: bool
-    ) -> tuple[int, Optional[torch.Tensor], bool, bool]:
+            self, num_tokens: int,
+            with_prefill: bool) -> tuple[int, Optional[torch.Tensor], bool]:
        """Override from NPUModelRunner to pad num_tokens"""
        if self.enable_shared_expert_dp:
            # Padding is not required for shared_expert_dp cases in eager mode.
-            return num_tokens, None, with_prefill, enable_dbo
+            return num_tokens, None, with_prefill
        if self.dp_size == 1:
            if not with_prefill:
                maybe_padded_num_tokens = self.select_torchair_padded_batch_size(
                    num_tokens)
-                return maybe_padded_num_tokens, None, with_prefill, enable_dbo
-            return num_tokens, None, with_prefill, enable_dbo
+                return maybe_padded_num_tokens, None, with_prefill
+            return num_tokens, None, with_prefill

-        num_tokens_across_dp = torch.zeros(self.dp_size + 2,
+        num_tokens_across_dp = torch.zeros(self.dp_size + 1,
                                           dtype=torch.int32,
                                           device="npu")
        num_tokens_across_dp[self.dp_rank] = num_tokens
-        num_tokens_across_dp[-2] = int(with_prefill)
-        num_tokens_across_dp[-1] = int(not enable_dbo)
+        num_tokens_across_dp[-1] = int(with_prefill)
        dist.all_reduce(num_tokens_across_dp,
                        group=get_dp_group().device_group)
-        with_prefill = bool(num_tokens_across_dp[-2])
-        enable_dbo = not bool(num_tokens_across_dp[-1])
-        num_tokens_across_dp = num_tokens_across_dp[:-2]
+        with_prefill = bool(num_tokens_across_dp[-1])
+        num_tokens_across_dp = num_tokens_across_dp[:-1]

        if not with_prefill:
            max_num_token = num_tokens_across_dp.max().item()
@@ -146,7 +144,7 @@ class NPUTorchairModelRunner(NPUModelRunner):
        else:
            maybe_padded_num_tokens = num_tokens

-        return maybe_padded_num_tokens, num_tokens_across_dp, with_prefill, enable_dbo
+        return maybe_padded_num_tokens, num_tokens_across_dp, with_prefill

    def _build_dummy_attn_metadata(
        self,