Add note for deepseek related docs and remove unnecessary comments (#590)
### What this PR does / why we need it? Add notes for deepseek's patch and remove some of the unnecessary comments --------- Signed-off-by: ganyi <pleaplusone.gy@gmail.com>
This commit is contained in:
@@ -364,23 +364,6 @@ def select_experts(
|
||||
Raises:
|
||||
ValueError: If an unsupported scoring function is provided.
|
||||
"""
|
||||
# assert hidden_states.shape[0] == router_logits.shape[0], (
|
||||
# "Number of tokens mismatch")
|
||||
# if os.environ.get("VLLM_ENABLE_GRAPH_MODE") == "1" and not is_prefill:
|
||||
# topk_weight, topk_idx, _ = torch.ops.npu_inference.npu_moe_gating_top_k(
|
||||
# router_logits,
|
||||
# k=top_k, # topk当前写8
|
||||
# bias=e_score_correction_bias,
|
||||
# k_group=topk_group, # fix: 4
|
||||
# group_count=num_expert_group, # fix 8
|
||||
# group_select_mode=1, # 0: group中的最大; 1: topk2.sum(fix)
|
||||
# renorm=0, # 0: softmax->topk(fix); 1: topk->softmax
|
||||
# norm_type=1, # 0: softmax; 1: sigmoid(fix)
|
||||
# # out_flag=False, # todo new api; 第三个输出是否输出
|
||||
# # y2_flag=False, # old api; 第三个输出是否输出
|
||||
# routed_scaling_factor=1,
|
||||
# eps=float(1e-20))
|
||||
# return topk_weight, topk_idx
|
||||
|
||||
if custom_routing_function is not None:
|
||||
raise NotImplementedError(
|
||||
@@ -483,8 +466,6 @@ class AscendUnquantizedFusedMoEMethod(UnquantizedFusedMoEMethod):
|
||||
is_prefill=False,
|
||||
**kwargs,
|
||||
):
|
||||
# assert router_logits.shape[
|
||||
# 1] == global_num_experts, "Number of global experts mismatch"
|
||||
# set prefill as false always, should fix this
|
||||
topk_weights, topk_ids = select_experts(
|
||||
hidden_states=x,
|
||||
@@ -670,7 +651,6 @@ class AscendFusedMoE(FusedMoE):
|
||||
scatter_dim=0,
|
||||
group=get_dp_group().device_group)
|
||||
|
||||
# if self.reduce_results and self.tp_size > 1:
|
||||
if self.reduce_results and (self.tp_size > 1 or self.ep_size > 1):
|
||||
final_hidden_states = tensor_model_parallel_all_reduce(
|
||||
final_hidden_states)
|
||||
|
||||
@@ -229,7 +229,6 @@ def _set_cos_sin_cache(self, seq_len, device, dtype):
|
||||
|
||||
# TODO: Patch when aclnn ops avaiable
|
||||
RotaryEmbedding.forward_oot = rope_forward_oot
|
||||
# DeepseekScalingRotaryEmbedding.forward = rope_deepseek_forward_oot
|
||||
DeepseekScalingRotaryEmbedding.forward = native_rope_deepseek_forward
|
||||
DeepseekScalingRotaryEmbedding._set_cos_sin_cache = _set_cos_sin_cache
|
||||
DeepseekScalingRotaryEmbedding.max_seq_len_cached = None
|
||||
|
||||
Reference in New Issue
Block a user