Add note for deepseek related docs and remove unnecessary comments (#590)

### What this PR does / why we need it?
Add notes for deepseek's patch and remove some of the unnecessary
comments

---------

Signed-off-by: ganyi <pleaplusone.gy@gmail.com>
This commit is contained in:
Pleaplusone
2025-04-22 09:59:09 +08:00
committed by GitHub
parent c5850d302d
commit d12a057df8
6 changed files with 78 additions and 199 deletions

View File

@@ -364,23 +364,6 @@ def select_experts(
Raises:
ValueError: If an unsupported scoring function is provided.
"""
# assert hidden_states.shape[0] == router_logits.shape[0], (
# "Number of tokens mismatch")
# if os.environ.get("VLLM_ENABLE_GRAPH_MODE") == "1" and not is_prefill:
# topk_weight, topk_idx, _ = torch.ops.npu_inference.npu_moe_gating_top_k(
# router_logits,
# k=top_k, # topk当前写8
# bias=e_score_correction_bias,
# k_group=topk_group, # fix: 4
# group_count=num_expert_group, # fix 8
# group_select_mode=1, # 0: group中的最大; 1: topk2.sum(fix)
# renorm=0, # 0: softmax->topk(fix); 1: topk->softmax
# norm_type=1, # 0: softmax; 1: sigmoid(fix)
# # out_flag=False, # todo new api; 第三个输出是否输出
# # y2_flag=False, # old api; 第三个输出是否输出
# routed_scaling_factor=1,
# eps=float(1e-20))
# return topk_weight, topk_idx
if custom_routing_function is not None:
raise NotImplementedError(
@@ -483,8 +466,6 @@ class AscendUnquantizedFusedMoEMethod(UnquantizedFusedMoEMethod):
is_prefill=False,
**kwargs,
):
# assert router_logits.shape[
# 1] == global_num_experts, "Number of global experts mismatch"
# set prefill as false always, should fix this
topk_weights, topk_ids = select_experts(
hidden_states=x,
@@ -670,7 +651,6 @@ class AscendFusedMoE(FusedMoE):
scatter_dim=0,
group=get_dp_group().device_group)
# if self.reduce_results and self.tp_size > 1:
if self.reduce_results and (self.tp_size > 1 or self.ep_size > 1):
final_hidden_states = tensor_model_parallel_all_reduce(
final_hidden_states)