[Misc] Remove useless PD check in deepseek (#2739)

### What this PR does / why we need it?
Remove useless PD check in deepseek


- vLLM version: v0.10.1.1
- vLLM main:
6c7af8110a

---------

Signed-off-by: liziyu <liziyu16@huawei.com>
This commit is contained in:
liziyu
2025-09-04 22:22:19 +08:00
committed by GitHub
parent 3a2a7d88db
commit 4c90fa79ca

View File

@@ -32,8 +32,7 @@ import torch_npu
from torch import nn from torch import nn
from transformers import PretrainedConfig from transformers import PretrainedConfig
from vllm.attention import Attention, AttentionMetadata from vllm.attention import Attention, AttentionMetadata
from vllm.config import (CacheConfig, ModelConfig, VllmConfig, from vllm.config import CacheConfig, ModelConfig, VllmConfig
get_current_vllm_config)
from vllm.distributed import (get_pp_group, get_tensor_model_parallel_rank, from vllm.distributed import (get_pp_group, get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size, get_tensor_model_parallel_world_size,
get_tp_group, split_tensor_along_last_dim, get_tp_group, split_tensor_along_last_dim,
@@ -374,10 +373,6 @@ class CustomDeepseekV2MoE(nn.Module):
self.tp_group = get_tp_group().device_group self.tp_group = get_tp_group().device_group
self.tp_rank = get_tp_group().rank_in_group self.tp_rank = get_tp_group().rank_in_group
self.ep_group = get_ep_group() self.ep_group = get_ep_group()
self.kv_consumer = None
transfer_config = get_current_vllm_config().kv_transfer_config
if transfer_config is not None:
self.kv_consumer = transfer_config.kv_role == "kv_consumer"
self.params_dtype = torch.get_default_dtype() self.params_dtype = torch.get_default_dtype()
self.rm_router_logits = self.experts.rm_router_logits self.rm_router_logits = self.experts.rm_router_logits
@@ -395,12 +390,6 @@ class CustomDeepseekV2MoE(nn.Module):
is_prefill = forward_context.with_prefill is_prefill = forward_context.with_prefill
# If this node is kv_consumer, we force the moe always runs in decode path to make sure
# the behaviour aligned between dummy_run and normal model_execute.
if self.kv_consumer:
is_prefill = False
enable_force_load_balance = False
# router_logits: (num_tokens, n_experts) # router_logits: (num_tokens, n_experts)
router_logits = None router_logits = None
if not self.rm_router_logits and not self.enable_multistream_moe: if not self.rm_router_logits and not self.enable_multistream_moe: