diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py index 132210e27..d5f332126 100644 --- a/python/sglang/srt/models/deepseek_v2.py +++ b/python/sglang/srt/models/deepseek_v2.py @@ -50,7 +50,6 @@ from sglang.srt.layers.linear import ( ) from sglang.srt.layers.logits_processor import LogitsProcessor from sglang.srt.layers.moe.ep_moe.layer import DeepEPMoE, EPMoE -from sglang.srt.layers.moe.ep_moe.token_dispatcher import DeepEPDispatcher from sglang.srt.layers.moe.fused_moe_triton import FusedMoE from sglang.srt.layers.moe.topk import select_experts from sglang.srt.layers.quantization.base_config import QuantizationConfig @@ -79,6 +78,8 @@ _is_cuda = is_cuda() if _is_cuda: from sgl_kernel import awq_dequantize, bmm_fp8 + + from sglang.srt.layers.moe.ep_moe.token_dispatcher import DeepEPDispatcher else: from vllm import _custom_ops as ops