Revert "[NVIDIA]Fix local_num_experts for EP (#8779)" (#8797)

2025-08-04 23:30:43 -07:00
parent 873f384a51
commit 5e91fed1c5
2 changed files with 2 additions and 4 deletions
--- a/python/sglang/srt/layers/quantization/modelopt_quant.py
+++ b/python/sglang/srt/layers/quantization/modelopt_quant.py
@@ -752,7 +752,6 @@ class ModelOptNvFp4FusedMoEMethod(FusedMoEMethodBase):
        self,
        layer: torch.nn.Module,
        num_experts: int,
-        num_local_experts: int,
        hidden_size: int,
        intermediate_size_per_partition: int,
        params_dtype: torch.dtype,
@@ -766,7 +765,7 @@ class ModelOptNvFp4FusedMoEMethod(FusedMoEMethodBase):

        # TODO(ch-wan): check if this is needed
        layer.num_experts = num_experts
-        layer.num_local_experts = num_local_experts
+        layer.num_local_experts = num_experts
        layer.intermediate_size_per_partition = intermediate_size_per_partition
        layer.params_dtype = params_dtype
        layer.quant_config = self.quant_config