[NVIDIA] Remove unused get_fused_moe_impl_class function (#9764)
This commit is contained in:
@@ -1074,16 +1074,3 @@ class FlashInferFP4MoE(FusedMoE):
|
|||||||
)[0]
|
)[0]
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
def get_fused_moe_impl_class():
|
|
||||||
"""Factory function to get the appropriate FusedMoE implementation class."""
|
|
||||||
if should_use_flashinfer_trtllm_moe() and _is_fp4_quantization_enabled():
|
|
||||||
# Use FP4 variant when FP4 quantization is enabled
|
|
||||||
return FlashInferFP4MoE
|
|
||||||
elif should_use_flashinfer_trtllm_moe():
|
|
||||||
# Use regular FlashInfer variant for non-FP4 FlashInfer cases
|
|
||||||
return FlashInferFusedMoE
|
|
||||||
else:
|
|
||||||
# Default case
|
|
||||||
return FusedMoE
|
|
||||||
|
|||||||
@@ -635,11 +635,7 @@ class Fp8MoEMethod(FusedMoEMethodBase):
|
|||||||
layer.register_parameter("w13_weight_scale_inv", w13_weight_scale)
|
layer.register_parameter("w13_weight_scale_inv", w13_weight_scale)
|
||||||
layer.register_parameter("w2_weight_scale_inv", w2_weight_scale)
|
layer.register_parameter("w2_weight_scale_inv", w2_weight_scale)
|
||||||
assert self.quant_config.activation_scheme == "dynamic"
|
assert self.quant_config.activation_scheme == "dynamic"
|
||||||
if (
|
if self.use_cutlass_fused_experts_fp8:
|
||||||
get_bool_env_var("SGLANG_CUTLASS_MOE")
|
|
||||||
and self.cutlass_fp8_supported
|
|
||||||
and (is_sm100_supported() or is_sm90_supported())
|
|
||||||
):
|
|
||||||
self.ab_strides1 = torch.full(
|
self.ab_strides1 = torch.full(
|
||||||
(num_experts,),
|
(num_experts,),
|
||||||
hidden_size,
|
hidden_size,
|
||||||
|
|||||||
Reference in New Issue
Block a user