[qwen3-omni] Add Qwen3-Omni moe thinker

This commit is contained in:
2025-10-09 17:51:14 +08:00
parent bc57e2ef60
commit 24fab12b2f
8 changed files with 1543 additions and 37 deletions

View File

@@ -2,7 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from abc import abstractmethod
from collections.abc import Iterable
from collections.abc import Iterable, Sequence
from enum import Enum
from typing import Callable, Literal, Optional, overload
@@ -1667,37 +1667,57 @@ class FusedMoE(CustomOp):
return final_hidden_states
# @classmethod
# def make_expert_params_mapping(
# cls,
# ckpt_gate_proj_name: str,
# ckpt_down_proj_name: str,
# ckpt_up_proj_name: str,
# num_experts: int,
# num_redundant_experts: int = 0) -> list[tuple[str, str, int, str]]:
@classmethod
def make_expert_params_mapping(
cls,
ckpt_gate_proj_name: str,
ckpt_down_proj_name: str,
ckpt_up_proj_name: str,
num_experts: int,
num_redundant_experts: int = 0) -> list[tuple[str, str, int, str]]:
# num_physical_experts = num_experts + num_redundant_experts
num_physical_experts = num_experts + num_redundant_experts
# # In the returned mapping:
# # - `expert_id` is the physical expert id
# # - `weight_name` contains the weight name of the logical expert
# # So that we should map the expert id to logical in `weight_name`
# physical_to_logical_map = \
# EplbState.build_initial_global_physical_to_logical_map(
# num_experts, num_redundant_experts)
# return [
# # (param_name, weight_name, expert_id, shard_id)
# ("experts.w13_" if weight_name
# in [ckpt_gate_proj_name, ckpt_up_proj_name] else "experts.w2_",
# f"experts.{physical_to_logical_map[expert_id]}.{weight_name}.",
# expert_id, shard_id) for expert_id in range(num_physical_experts)
# for shard_id, weight_name in [
# ("w1", ckpt_gate_proj_name),
# ("w2", ckpt_down_proj_name),
# ("w3", ckpt_up_proj_name),
# ]
# ]
def build_initial_global_physical_to_logical_map(
num_routed_experts: int,
num_redundant_experts: int,
) -> Sequence[int]:
"""
Build an initial expert arrangement using the following structure:
[original routed experts, redundant experts]
Returns:
physical_to_logical_map (Sequence[int]): A list of integers,
where each integer is the index of the logical expert
that the corresponding physical expert maps to.
"""
global_physical_to_logical_map = list(range(num_routed_experts))
global_physical_to_logical_map += [
i % num_routed_experts for i in range(num_redundant_experts)
]
return global_physical_to_logical_map
# In the returned mapping:
# - `expert_id` is the physical expert id
# - `weight_name` contains the weight name of the logical expert
# So that we should map the expert id to logical in `weight_name`
physical_to_logical_map = \
build_initial_global_physical_to_logical_map(
num_experts, num_redundant_experts)
return [
# (param_name, weight_name, expert_id, shard_id)
("experts.w13_" if weight_name
in [ckpt_gate_proj_name, ckpt_up_proj_name] else "experts.w2_",
f"experts.{physical_to_logical_map[expert_id]}.{weight_name}.",
expert_id, shard_id) for expert_id in range(num_physical_experts)
for shard_id, weight_name in [
("w1", ckpt_gate_proj_name),
("w2", ckpt_down_proj_name),
("w3", ckpt_up_proj_name),
]
]
def extra_repr(self) -> str: