[Model] Add LongCat-Flash (#3833)

### What this PR does / why we need it? Add LongCat-Flash support. ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? CI passed - vLLM version: v0.13.0 - vLLM main: ad32e3e19c --------- Signed-off-by: chuyuelin <923822139@qq.com> Co-authored-by: chuyuelin <chuyuelin1@huawei.com>
2025-12-31 17:06:55 +08:00
parent 03679cf1d3
commit d07d8a4535
8 changed files with 79 additions and 14 deletions
--- a/vllm_ascend/ops/fused_moe/experts_selector.py
+++ b/vllm_ascend/ops/fused_moe/experts_selector.py
@@ -225,7 +225,7 @@ def _select_experts_with_fusion_ops(
        norm_type=norm_type,  # 0: softmax; 1: sigmoid
        # out_flag=False, # todo new api; should the third output be output
        # y2_flag=False, # old api; should the third output be output
-        routed_scaling_factor=1,
+        routed_scaling_factor=routed_scaling_factor,
        eps=float(1e-20))
    if scoring_func == "softmax":
        topk_weights = _renormalize_topk_weights(topk_weights, renormalize)
@@ -304,3 +304,28 @@ def _native_select_experts(
    topk_weights = _renormalize_topk_weights(topk_weights, renormalize)

    return topk_weights, topk_ids
+
+
+def zero_experts_compute(
+    expert_indices: torch.Tensor,
+    expert_scales: torch.Tensor,
+    num_experts: int,
+    zero_expert_type: str,
+    hidden_states: torch.Tensor,
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+    if zero_expert_type == "identity":
+        zero_expert_mask = expert_indices < num_experts
+        zero_expert_scales = expert_scales.clone()
+        zero_expert_scales = torch.where(zero_expert_mask, 0.0,
+                                         zero_expert_scales)
+
+        hidden_states = hidden_states.unsqueeze(1)
+        zero_expert_scales = zero_expert_scales.unsqueeze(2)
+        result = hidden_states * zero_expert_scales
+        result = result.sum(dim=1)
+
+    normal_expert_mask = expert_indices >= num_experts
+    expert_indices = torch.where(normal_expert_mask, 0, expert_indices)
+    expert_scales = torch.where(normal_expert_mask, 0.0, expert_scales)
+
+    return expert_indices, expert_scales, result