From 5a7e10fe4c1e80d730ad677240939fc9d1b39fce Mon Sep 17 00:00:00 2001
From: Cheng Wan <54331508+ch-wan@users.noreply.github.com>
Date: Sun, 7 Sep 2025 19:43:59 -0700
Subject: [PATCH] [MoE] fix: incorrect weight initialization for 
 cutlass_fused_experts_fp8 (#10144)

---
 python/sglang/srt/layers/quantization/fp8.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/sglang/srt/layers/quantization/fp8.py b/python/sglang/srt/layers/quantization/fp8.py
index 31a2c2eb2..ecdca3381 100644
--- a/python/sglang/srt/layers/quantization/fp8.py
+++ b/python/sglang/srt/layers/quantization/fp8.py
@@ -656,7 +656,7 @@ class Fp8MoEMethod(FusedMoEMethodBase):
                 )
                 self.c_strides2 = torch.full(
                     (num_experts,),
-                    intermediate_size_per_partition,
+                    hidden_size,
                     device=w2_weight.device,
                     dtype=torch.int64,
                 )