[MoE] fix: incorrect weight initialization for cutlass_fused_experts_fp8 (#10144)

2025-09-07 19:43:59 -07:00
parent 33467c05a4
commit 5a7e10fe4c
1 changed files with 1 additions and 1 deletions
--- a/python/sglang/srt/layers/quantization/fp8.py
+++ b/python/sglang/srt/layers/quantization/fp8.py
@@ -656,7 +656,7 @@ class Fp8MoEMethod(FusedMoEMethodBase):
                )
                self.c_strides2 = torch.full(
                    (num_experts,),
-                    intermediate_size_per_partition,
+                    hidden_size,
                    device=w2_weight.device,
                    dtype=torch.int64,
                )