From e7e5a3050a64cbdc7624ac24d289cff16f12e09c Mon Sep 17 00:00:00 2001 From: Baizhou Zhang Date: Thu, 31 Jul 2025 20:53:31 -0700 Subject: [PATCH] Update batch size limitation of dsv3_router_gemm kernel to 16 (#8051) --- python/sglang/srt/models/deepseek_v2.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py index 5ed19ed86..fcd9c4367 100644 --- a/python/sglang/srt/models/deepseek_v2.py +++ b/python/sglang/srt/models/deepseek_v2.py @@ -252,8 +252,7 @@ class MoEGate(nn.Module): # NOTE: For some unknown reason, router_gemm seems degrade accept length. if ( _is_cuda - and not self.is_nextn - and hidden_states.shape[0] < 4 + and hidden_states.shape[0] <= 16 and hidden_states.shape[1] == 7168 and self.weight.shape[0] == 256 and _device_sm >= 90