[1/N] MoE Refactor: refactor select_experts (#7966)

This commit is contained in:
Cheng Wan
2025-07-19 00:51:15 -07:00
committed by GitHub
parent cfab0ff6e2
commit 15ad6c9086
39 changed files with 556 additions and 871 deletions

View File

@@ -45,6 +45,7 @@ from sglang.srt.layers.logits_processor import LogitsProcessor
from sglang.srt.layers.moe.ep_moe.layer import EPMoE
from sglang.srt.layers.moe.fused_moe_triton import FusedMoE
from sglang.srt.layers.moe.router import fused_moe_router_shim
from sglang.srt.layers.moe.topk import TopK
from sglang.srt.layers.quantization.base_config import QuantizationConfig
from sglang.srt.layers.radix_attention import RadixAttention
from sglang.srt.layers.rotary_embedding import get_rope
@@ -108,6 +109,12 @@ class Grok1MoE(nn.Module):
fused_moe_router_shim, self.router_logit_softcapping
)
self.topk = TopK(
top_k=top_k,
renormalize=False,
custom_routing_function=custom_routing_function,
)
kwargs = {}
if global_server_args_dict["enable_ep_moe"]:
MoEImpl = EPMoE
@@ -124,17 +131,16 @@ class Grok1MoE(nn.Module):
hidden_size=hidden_size,
intermediate_size=intermediate_size,
params_dtype=params_dtype,
renormalize=False,
quant_config=quant_config,
tp_size=tp_size,
custom_routing_function=custom_routing_function,
activation="gelu",
**kwargs,
)
def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
# need to assert self.gate.quant_method is unquantized
return self.experts(hidden_states, self.gate.weight)
topk_output = self.topk(hidden_states, self.gate.weight)
return self.experts(hidden_states, topk_output)
class Grok1Attention(nn.Module):