From 5a7e10fe4c1e80d730ad677240939fc9d1b39fce Mon Sep 17 00:00:00 2001 From: Cheng Wan <54331508+ch-wan@users.noreply.github.com> Date: Sun, 7 Sep 2025 19:43:59 -0700 Subject: [PATCH] [MoE] fix: incorrect weight initialization for cutlass_fused_experts_fp8 (#10144) --- python/sglang/srt/layers/quantization/fp8.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/sglang/srt/layers/quantization/fp8.py b/python/sglang/srt/layers/quantization/fp8.py index 31a2c2eb2..ecdca3381 100644 --- a/python/sglang/srt/layers/quantization/fp8.py +++ b/python/sglang/srt/layers/quantization/fp8.py @@ -656,7 +656,7 @@ class Fp8MoEMethod(FusedMoEMethodBase): ) self.c_strides2 = torch.full( (num_experts,), - intermediate_size_per_partition, + hidden_size, device=w2_weight.device, dtype=torch.int64, )