Add note for deepseek related docs and remove unnecessary comments (#590)

### What this PR does / why we need it? Add notes for deepseek's patch and remove some of the unnecessary comments --------- Signed-off-by: ganyi <pleaplusone.gy@gmail.com>
2025-04-22 09:59:09 +08:00
parent c5850d302d
commit d12a057df8
6 changed files with 78 additions and 199 deletions
--- a/vllm_ascend/ops/fused_moe.py
+++ b/vllm_ascend/ops/fused_moe.py
@@ -364,23 +364,6 @@ def select_experts(
    Raises:
        ValueError: If an unsupported scoring function is provided.
    """
-    # assert hidden_states.shape[0] == router_logits.shape[0], (
-    #     "Number of tokens mismatch")
-    # if os.environ.get("VLLM_ENABLE_GRAPH_MODE") == "1" and not is_prefill:
-    #     topk_weight, topk_idx, _ = torch.ops.npu_inference.npu_moe_gating_top_k(
-    #         router_logits,
-    #         k=top_k, # topk当前写8
-    #         bias=e_score_correction_bias,
-    #         k_group=topk_group, # fix: 4
-    #         group_count=num_expert_group, # fix 8
-    #         group_select_mode=1, # 0: group中的最大; 1: topk2.sum(fix)
-    #         renorm=0, # 0: softmax->topk(fix); 1: topk->softmax
-    #         norm_type=1, # 0: softmax; 1: sigmoid(fix)
-    #         # out_flag=False, # todo new api; 第三个输出是否输出
-    #         # y2_flag=False, # old api; 第三个输出是否输出
-    #         routed_scaling_factor=1,
-    #         eps=float(1e-20))
-    #     return topk_weight, topk_idx

    if custom_routing_function is not None:
        raise NotImplementedError(
@@ -483,8 +466,6 @@ class AscendUnquantizedFusedMoEMethod(UnquantizedFusedMoEMethod):
        is_prefill=False,
        **kwargs,
    ):
-        # assert router_logits.shape[
-        #     1] == global_num_experts, "Number of global experts mismatch"
        # set prefill as false always, should fix this
        topk_weights, topk_ids = select_experts(
            hidden_states=x,
@@ -670,7 +651,6 @@ class AscendFusedMoE(FusedMoE):
                    scatter_dim=0,
                    group=get_dp_group().device_group)

-        # if self.reduce_results and self.tp_size > 1:
        if self.reduce_results and (self.tp_size > 1 or self.ep_size > 1):
            final_hidden_states = tensor_model_parallel_all_reduce(
                final_hidden_states)
--- a/vllm_ascend/ops/rotary_embedding.py
+++ b/vllm_ascend/ops/rotary_embedding.py
@@ -229,7 +229,6 @@ def _set_cos_sin_cache(self, seq_len, device, dtype):

 # TODO: Patch when aclnn ops avaiable
 RotaryEmbedding.forward_oot = rope_forward_oot
-# DeepseekScalingRotaryEmbedding.forward = rope_deepseek_forward_oot
 DeepseekScalingRotaryEmbedding.forward = native_rope_deepseek_forward
 DeepseekScalingRotaryEmbedding._set_cos_sin_cache = _set_cos_sin_cache
 DeepseekScalingRotaryEmbedding.max_seq_len_cached = None