Update batch size limitation of dsv3_router_gemm kernel to 16 (#8051)
This commit is contained in:
@@ -252,8 +252,7 @@ class MoEGate(nn.Module):
|
|||||||
# NOTE: For some unknown reason, router_gemm seems degrade accept length.
|
# NOTE: For some unknown reason, router_gemm seems degrade accept length.
|
||||||
if (
|
if (
|
||||||
_is_cuda
|
_is_cuda
|
||||||
and not self.is_nextn
|
and hidden_states.shape[0] <= 16
|
||||||
and hidden_states.shape[0] < 4
|
|
||||||
and hidden_states.shape[1] == 7168
|
and hidden_states.shape[1] == 7168
|
||||||
and self.weight.shape[0] == 256
|
and self.weight.shape[0] == 256
|
||||||
and _device_sm >= 90
|
and _device_sm >= 90
|
||||||
|
|||||||
Reference in New Issue
Block a user