diff --git a/python/sglang/srt/layers/quantization/fp8.py b/python/sglang/srt/layers/quantization/fp8.py index 31a2c2eb2..ecdca3381 100644 --- a/python/sglang/srt/layers/quantization/fp8.py +++ b/python/sglang/srt/layers/quantization/fp8.py @@ -656,7 +656,7 @@ class Fp8MoEMethod(FusedMoEMethodBase): ) self.c_strides2 = torch.full( (num_experts,), - intermediate_size_per_partition, + hidden_size, device=w2_weight.device, dtype=torch.int64, )