update and simplify CustomOp (#3249)

2025-02-01 18:56:44 +08:00
parent 17dbf976c5
commit 4eb4b401cc
8 changed files with 46 additions and 45 deletions
--- a/python/sglang/srt/layers/moe/ep_moe/layer.py
+++ b/python/sglang/srt/layers/moe/ep_moe/layer.py
@@ -4,13 +4,12 @@ from typing import Callable, List, Optional, Tuple
 import torch
 from torch.nn import Module
 from vllm import _custom_ops as ops
-from vllm.model_executor.custom_op import CustomOp

+from sglang.srt.custom_op import CustomOp
 from sglang.srt.distributed import (
    get_tensor_model_parallel_rank,
    get_tensor_model_parallel_world_size,
 )
-from sglang.srt.layers.custom_op_util import register_custom_op
 from sglang.srt.layers.moe.ep_moe.kernels import (
    grouped_gemm_triton,
    post_reorder_triton_kernel,
@@ -407,7 +406,6 @@ class EPMoE(torch.nn.Module):
                param_data[expert_id] = loaded_weight


-@register_custom_op("sglang_unquantized_ep_moe")
 class UnquantizedEPMoEMethod(FusedMoEMethodBase, CustomOp):
    def create_weights(
        self,