Update batch size limitation of dsv3_router_gemm kernel to 16 (#8051)

This commit is contained in:
Baizhou Zhang
2025-07-31 20:53:31 -07:00
committed by GitHub
parent dd7ca00601
commit e7e5a3050a

View File

@@ -252,8 +252,7 @@ class MoEGate(nn.Module):
# NOTE: For some unknown reason, router_gemm seems degrade accept length.
if (
_is_cuda
and not self.is_nextn
and hidden_states.shape[0] < 4
and hidden_states.shape[0] <= 16
and hidden_states.shape[1] == 7168
and self.weight.shape[0] == 256
and _device_sm >= 90