update and simplify CustomOp (#3249)

This commit is contained in:
Yineng Zhang
2025-02-01 18:56:44 +08:00
committed by GitHub
parent 17dbf976c5
commit 4eb4b401cc
8 changed files with 46 additions and 45 deletions

View File

@@ -4,13 +4,12 @@ from typing import Callable, List, Optional, Tuple
import torch
from torch.nn import Module
from vllm import _custom_ops as ops
from vllm.model_executor.custom_op import CustomOp
from sglang.srt.custom_op import CustomOp
from sglang.srt.distributed import (
get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size,
)
from sglang.srt.layers.custom_op_util import register_custom_op
from sglang.srt.layers.moe.ep_moe.kernels import (
grouped_gemm_triton,
post_reorder_triton_kernel,
@@ -407,7 +406,6 @@ class EPMoE(torch.nn.Module):
param_data[expert_id] = loaded_weight
@register_custom_op("sglang_unquantized_ep_moe")
class UnquantizedEPMoEMethod(FusedMoEMethodBase, CustomOp):
def create_weights(
self,