[qwen3-omni] Add Qwen3-Omni moe thinker
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
from abc import abstractmethod
|
||||
from collections.abc import Iterable
|
||||
from collections.abc import Iterable, Sequence
|
||||
from enum import Enum
|
||||
from typing import Callable, Literal, Optional, overload
|
||||
|
||||
@@ -1667,37 +1667,57 @@ class FusedMoE(CustomOp):
|
||||
|
||||
return final_hidden_states
|
||||
|
||||
# @classmethod
|
||||
# def make_expert_params_mapping(
|
||||
# cls,
|
||||
# ckpt_gate_proj_name: str,
|
||||
# ckpt_down_proj_name: str,
|
||||
# ckpt_up_proj_name: str,
|
||||
# num_experts: int,
|
||||
# num_redundant_experts: int = 0) -> list[tuple[str, str, int, str]]:
|
||||
@classmethod
|
||||
def make_expert_params_mapping(
|
||||
cls,
|
||||
ckpt_gate_proj_name: str,
|
||||
ckpt_down_proj_name: str,
|
||||
ckpt_up_proj_name: str,
|
||||
num_experts: int,
|
||||
num_redundant_experts: int = 0) -> list[tuple[str, str, int, str]]:
|
||||
|
||||
# num_physical_experts = num_experts + num_redundant_experts
|
||||
num_physical_experts = num_experts + num_redundant_experts
|
||||
|
||||
# # In the returned mapping:
|
||||
# # - `expert_id` is the physical expert id
|
||||
# # - `weight_name` contains the weight name of the logical expert
|
||||
# # So that we should map the expert id to logical in `weight_name`
|
||||
# physical_to_logical_map = \
|
||||
# EplbState.build_initial_global_physical_to_logical_map(
|
||||
# num_experts, num_redundant_experts)
|
||||
|
||||
# return [
|
||||
# # (param_name, weight_name, expert_id, shard_id)
|
||||
# ("experts.w13_" if weight_name
|
||||
# in [ckpt_gate_proj_name, ckpt_up_proj_name] else "experts.w2_",
|
||||
# f"experts.{physical_to_logical_map[expert_id]}.{weight_name}.",
|
||||
# expert_id, shard_id) for expert_id in range(num_physical_experts)
|
||||
# for shard_id, weight_name in [
|
||||
# ("w1", ckpt_gate_proj_name),
|
||||
# ("w2", ckpt_down_proj_name),
|
||||
# ("w3", ckpt_up_proj_name),
|
||||
# ]
|
||||
# ]
|
||||
def build_initial_global_physical_to_logical_map(
|
||||
num_routed_experts: int,
|
||||
num_redundant_experts: int,
|
||||
) -> Sequence[int]:
|
||||
"""
|
||||
Build an initial expert arrangement using the following structure:
|
||||
[original routed experts, redundant experts]
|
||||
|
||||
Returns:
|
||||
physical_to_logical_map (Sequence[int]): A list of integers,
|
||||
where each integer is the index of the logical expert
|
||||
that the corresponding physical expert maps to.
|
||||
"""
|
||||
global_physical_to_logical_map = list(range(num_routed_experts))
|
||||
global_physical_to_logical_map += [
|
||||
i % num_routed_experts for i in range(num_redundant_experts)
|
||||
]
|
||||
return global_physical_to_logical_map
|
||||
|
||||
# In the returned mapping:
|
||||
# - `expert_id` is the physical expert id
|
||||
# - `weight_name` contains the weight name of the logical expert
|
||||
# So that we should map the expert id to logical in `weight_name`
|
||||
physical_to_logical_map = \
|
||||
build_initial_global_physical_to_logical_map(
|
||||
num_experts, num_redundant_experts)
|
||||
|
||||
return [
|
||||
# (param_name, weight_name, expert_id, shard_id)
|
||||
("experts.w13_" if weight_name
|
||||
in [ckpt_gate_proj_name, ckpt_up_proj_name] else "experts.w2_",
|
||||
f"experts.{physical_to_logical_map[expert_id]}.{weight_name}.",
|
||||
expert_id, shard_id) for expert_id in range(num_physical_experts)
|
||||
for shard_id, weight_name in [
|
||||
("w1", ckpt_gate_proj_name),
|
||||
("w2", ckpt_down_proj_name),
|
||||
("w3", ckpt_up_proj_name),
|
||||
]
|
||||
]
|
||||
|
||||
def extra_repr(self) -> str:
|
||||
|
||||
|
||||
Reference in New Issue
Block a user