Minor code cleanup refactor for DeepSeek models (#6324)

This commit is contained in:
fzyzcjy
2025-05-17 10:06:03 +08:00
committed by GitHub
parent 8dc191f237
commit 2df9d40aa6
2 changed files with 26 additions and 35 deletions

View File

@@ -5,6 +5,7 @@ import torch
from torch.nn import Module
from sglang.srt.layers.quantization.deep_gemm import _ENABLE_JIT_DEEPGEMM
from sglang.srt.managers.schedule_batch import global_server_args_dict
try:
from deep_gemm import (
@@ -40,7 +41,7 @@ from sglang.srt.layers.moe.ep_moe.kernels import (
tma_align_input_scale,
)
from sglang.srt.layers.moe.fused_moe_triton import FusedMoeWeightScaleSupported
from sglang.srt.layers.moe.fused_moe_triton.layer import FusedMoEMethodBase
from sglang.srt.layers.moe.fused_moe_triton.layer import FusedMoE, FusedMoEMethodBase
from sglang.srt.layers.moe.topk import select_experts
from sglang.srt.layers.quantization.base_config import (
QuantizationConfig,
@@ -1173,3 +1174,11 @@ class DeepEPMoE(EPMoE):
)
return down_output
def get_moe_impl_class():
if global_server_args_dict["enable_deepep_moe"]:
return DeepEPMoE
if global_server_args_dict["enable_ep_moe"]:
return EPMoE
return FusedMoE