From 1466c1b89679976b729cd8762f0a5334d9078dae Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Mon, 28 Jul 2025 14:32:58 -0700 Subject: [PATCH] feat: support glm4 tuning (#8473) --- .../kernels/fused_moe_triton/tuning_fused_moe_triton.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py b/benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py index 69b0563e9..2af320d56 100644 --- a/benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py +++ b/benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py @@ -427,6 +427,11 @@ def main(args: argparse.Namespace): topk = config.num_experts_per_tok intermediate_size = config.moe_intermediate_size shard_intermediate_size = 2 * intermediate_size // args.tp_size + elif config.architectures[0] in ["Glm4MoeForCausalLM"]: + E = config.n_routed_experts + topk = config.num_experts_per_tok + intermediate_size = config.moe_intermediate_size + shard_intermediate_size = 2 * intermediate_size // args.tp_size else: # Default: Mixtral E = config.num_local_experts