Remove useless PD check in deepseek (#3161)

### What this PR does / why we need it?
Remove useless PD check in deepseek

### How was this patch tested?


- vLLM version: v0.10.2
- vLLM main:
f225ea7dd9

Signed-off-by: wangxiaoteng <wangxiaoteng@huawei.com>
Co-authored-by: wangxiaoteng <wangxiaoteng@huawei.com>
This commit is contained in:
liziyu
2025-09-24 23:25:47 +08:00
committed by GitHub
parent 4ee58e213b
commit 464270e4ca

View File

@@ -32,8 +32,7 @@ import torch_npu
from torch import nn from torch import nn
from transformers import PretrainedConfig from transformers import PretrainedConfig
from vllm.attention import Attention, AttentionMetadata from vllm.attention import Attention, AttentionMetadata
from vllm.config import (CacheConfig, ModelConfig, VllmConfig, from vllm.config import CacheConfig, ModelConfig, VllmConfig
get_current_vllm_config)
from vllm.distributed import (get_pp_group, get_tensor_model_parallel_rank, from vllm.distributed import (get_pp_group, get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size, get_tensor_model_parallel_world_size,
get_tp_group, split_tensor_along_last_dim, get_tp_group, split_tensor_along_last_dim,
@@ -376,10 +375,6 @@ class TorchairDeepseekV2MoE(nn.Module):
self.tp_group = get_tp_group().device_group self.tp_group = get_tp_group().device_group
self.tp_rank = get_tp_group().rank_in_group self.tp_rank = get_tp_group().rank_in_group
self.ep_group = get_ep_group() self.ep_group = get_ep_group()
self.kv_consumer = None
transfer_config = get_current_vllm_config().kv_transfer_config
if transfer_config is not None:
self.kv_consumer = transfer_config.kv_role == "kv_consumer"
self.params_dtype = torch.get_default_dtype() self.params_dtype = torch.get_default_dtype()
self.rm_router_logits = self.experts.rm_router_logits self.rm_router_logits = self.experts.rm_router_logits
@@ -397,12 +392,6 @@ class TorchairDeepseekV2MoE(nn.Module):
is_prefill = forward_context.with_prefill is_prefill = forward_context.with_prefill
# If this node is kv_consumer, we force the moe always runs in decode path to make sure
# the behaviour aligned between dummy_run and normal model_execute.
if self.kv_consumer:
is_prefill = False
enable_force_load_balance = False
# router_logits: (num_tokens, n_experts) # router_logits: (num_tokens, n_experts)
router_logits = None router_logits = None
if not self.rm_router_logits and not self.multistream_overlap_shared_expert: if not self.rm_router_logits and not self.multistream_overlap_shared_expert: