[qwen3-omni] Add Qwen3-Omni moe thinker

2025-10-09 17:51:14 +08:00
parent bc57e2ef60
commit 24fab12b2f
8 changed files with 1543 additions and 37 deletions
--- a/vllm/model_executor/layers/fused_moe/layer.py
+++ b/vllm/model_executor/layers/fused_moe/layer.py
@@ -2,7 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project

 from abc import abstractmethod
-from collections.abc import Iterable
+from collections.abc import Iterable, Sequence
 from enum import Enum
 from typing import Callable, Literal, Optional, overload

@@ -1667,37 +1667,57 @@ class FusedMoE(CustomOp):

        return final_hidden_states

-    # @classmethod
-    # def make_expert_params_mapping(
-    #         cls,
-    #         ckpt_gate_proj_name: str,
-    #         ckpt_down_proj_name: str,
-    #         ckpt_up_proj_name: str,
-    #         num_experts: int,
-    #         num_redundant_experts: int = 0) -> list[tuple[str, str, int, str]]:
+    @classmethod
+    def make_expert_params_mapping(
+            cls,
+            ckpt_gate_proj_name: str,
+            ckpt_down_proj_name: str,
+            ckpt_up_proj_name: str,
+            num_experts: int,
+            num_redundant_experts: int = 0) -> list[tuple[str, str, int, str]]:

-    #     num_physical_experts = num_experts + num_redundant_experts
+        num_physical_experts = num_experts + num_redundant_experts

-    #     # In the returned mapping:
-    #     # - `expert_id` is the physical expert id
-    #     # - `weight_name` contains the weight name of the logical expert
-    #     # So that we should map the expert id to logical in `weight_name`
-    #     physical_to_logical_map = \
-    #         EplbState.build_initial_global_physical_to_logical_map(
-    #         num_experts, num_redundant_experts)

-    #     return [
-    #         # (param_name, weight_name, expert_id, shard_id)
-    #         ("experts.w13_" if weight_name
-    #          in [ckpt_gate_proj_name, ckpt_up_proj_name] else "experts.w2_",
-    #          f"experts.{physical_to_logical_map[expert_id]}.{weight_name}.",
-    #          expert_id, shard_id) for expert_id in range(num_physical_experts)
-    #         for shard_id, weight_name in [
-    #             ("w1", ckpt_gate_proj_name),
-    #             ("w2", ckpt_down_proj_name),
-    #             ("w3", ckpt_up_proj_name),
-    #         ]
-    #     ]
+        def build_initial_global_physical_to_logical_map(
+            num_routed_experts: int,
+            num_redundant_experts: int,
+        ) -> Sequence[int]:
+            """
+            Build an initial expert arrangement using the following structure:
+            [original routed experts, redundant experts]
+
+            Returns:
+                physical_to_logical_map (Sequence[int]): A list of integers,
+                    where each integer is the index of the logical expert
+                    that the corresponding physical expert maps to.
+            """
+            global_physical_to_logical_map = list(range(num_routed_experts))
+            global_physical_to_logical_map += [
+                i % num_routed_experts for i in range(num_redundant_experts)
+            ]
+            return global_physical_to_logical_map
+        
+        # In the returned mapping:
+        # - `expert_id` is the physical expert id
+        # - `weight_name` contains the weight name of the logical expert
+        # So that we should map the expert id to logical in `weight_name`
+        physical_to_logical_map = \
+            build_initial_global_physical_to_logical_map(
+            num_experts, num_redundant_experts)
+
+        return [
+            # (param_name, weight_name, expert_id, shard_id)
+            ("experts.w13_" if weight_name
+             in [ckpt_gate_proj_name, ckpt_up_proj_name] else "experts.w2_",
+             f"experts.{physical_to_logical_map[expert_id]}.{weight_name}.",
+             expert_id, shard_id) for expert_id in range(num_physical_experts)
+            for shard_id, weight_name in [
+                ("w1", ckpt_gate_proj_name),
+                ("w2", ckpt_down_proj_name),
+                ("w3", ckpt_up_proj_name),
+            ]
+        ]

    def extra_repr(self) -> str: