Update batch size limitation of dsv3_router_gemm kernel to 16 (#8051)
This commit is contained in:
@@ -252,8 +252,7 @@ class MoEGate(nn.Module):
|
||||
# NOTE: For some unknown reason, router_gemm seems degrade accept length.
|
||||
if (
|
||||
_is_cuda
|
||||
and not self.is_nextn
|
||||
and hidden_states.shape[0] < 4
|
||||
and hidden_states.shape[0] <= 16
|
||||
and hidden_states.shape[1] == 7168
|
||||
and self.weight.shape[0] == 256
|
||||
and _device_sm >= 90
|
||||
|
||||
Reference in New Issue
Block a user