add fused moe config for qwen3moe fp8/bf16 (#5849)

This commit is contained in:
Yi Zhang
2025-04-29 02:55:52 +08:00
committed by GitHub
parent 663037a7a0
commit a0251a3fd6
6 changed files with 731 additions and 6 deletions

View File

@@ -393,12 +393,7 @@ def main(args: argparse.Namespace):
topk = config.num_experts_per_tok
intermediate_size = config.intermediate_size
shard_intermediate_size = 2 * intermediate_size // args.tp_size
elif config.architectures[0] == "Qwen2MoeForCausalLM":
E = config.num_experts
topk = config.num_experts_per_tok
intermediate_size = config.moe_intermediate_size
shard_intermediate_size = 2 * intermediate_size // args.tp_size
elif config.architectures[0] == "Qwen3MoeForCausalLM":
elif config.architectures[0] in ["Qwen2MoeForCausalLM", "Qwen3MoeForCausalLM"]:
E = config.num_experts
topk = config.num_experts_per_tok
intermediate_size = config.moe_intermediate_size