[main] remove dbo code (#3712)

### What this PR does / why we need it?
Remove codes of dbo.
Currently, vLLM has supported dbo with pr:
https://github.com/vllm-project/vllm/pull/23693.

### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

- vLLM version: v0.11.0rc3
- vLLM main:
17c540a993

Signed-off-by: zzzzwwjj <1183291235@qq.com>
This commit is contained in:
zzzzwwjj
2025-10-25 15:53:01 +08:00
committed by GitHub
parent d9cdc65854
commit e5676fc36e
26 changed files with 69 additions and 1588 deletions

View File

@@ -110,30 +110,28 @@ class NPUTorchairModelRunner(NPUModelRunner):
self.mc2_tokens_capacity = num_tokens_per_tp_rank * tp_size
def _sync_metadata_across_dp(
self, num_tokens: int, with_prefill: bool, enable_dbo: bool
) -> tuple[int, Optional[torch.Tensor], bool, bool]:
self, num_tokens: int,
with_prefill: bool) -> tuple[int, Optional[torch.Tensor], bool]:
"""Override from NPUModelRunner to pad num_tokens"""
if self.enable_shared_expert_dp:
# Padding is not required for shared_expert_dp cases in eager mode.
return num_tokens, None, with_prefill, enable_dbo
return num_tokens, None, with_prefill
if self.dp_size == 1:
if not with_prefill:
maybe_padded_num_tokens = self.select_torchair_padded_batch_size(
num_tokens)
return maybe_padded_num_tokens, None, with_prefill, enable_dbo
return num_tokens, None, with_prefill, enable_dbo
return maybe_padded_num_tokens, None, with_prefill
return num_tokens, None, with_prefill
num_tokens_across_dp = torch.zeros(self.dp_size + 2,
num_tokens_across_dp = torch.zeros(self.dp_size + 1,
dtype=torch.int32,
device="npu")
num_tokens_across_dp[self.dp_rank] = num_tokens
num_tokens_across_dp[-2] = int(with_prefill)
num_tokens_across_dp[-1] = int(not enable_dbo)
num_tokens_across_dp[-1] = int(with_prefill)
dist.all_reduce(num_tokens_across_dp,
group=get_dp_group().device_group)
with_prefill = bool(num_tokens_across_dp[-2])
enable_dbo = not bool(num_tokens_across_dp[-1])
num_tokens_across_dp = num_tokens_across_dp[:-2]
with_prefill = bool(num_tokens_across_dp[-1])
num_tokens_across_dp = num_tokens_across_dp[:-1]
if not with_prefill:
max_num_token = num_tokens_across_dp.max().item()
@@ -146,7 +144,7 @@ class NPUTorchairModelRunner(NPUModelRunner):
else:
maybe_padded_num_tokens = num_tokens
return maybe_padded_num_tokens, num_tokens_across_dp, with_prefill, enable_dbo
return maybe_padded_num_tokens, num_tokens_across_dp, with_prefill
def _build_dummy_attn_metadata(
self,