[1/N] MoE Refactor: refactor select_experts (#7966)

This commit is contained in:
Cheng Wan
2025-07-19 00:51:15 -07:00
committed by GitHub
parent cfab0ff6e2
commit 15ad6c9086
39 changed files with 556 additions and 871 deletions

View File

@@ -5,6 +5,7 @@ import torch
from sglang.srt.layers.activation import SiluAndMul
from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_moe
from sglang.srt.layers.moe.topk import select_experts
from sglang.test.test_utils import CustomTestCase
@@ -171,14 +172,18 @@ class TestW8A8BlockINT8FusedMoE(CustomTestCase):
score = torch.randn((M, E), dtype=dtype)
topk_output = select_experts(
hidden_states=a,
router_logits=score,
top_k=topk,
)
with torch.inference_mode():
out = fused_moe(
a,
w1,
w2,
score,
topk,
renormalize=False,
topk_output,
use_int8_w8a8=True,
w1_scale=w1_s,
w2_scale=w2_s,