update and simplify CustomOp (#3249)

This commit is contained in:
Yineng Zhang
2025-02-01 18:56:44 +08:00
committed by GitHub
parent 17dbf976c5
commit 4eb4b401cc
8 changed files with 46 additions and 45 deletions

View File

@@ -4,13 +4,12 @@ from typing import Callable, List, Optional, Tuple
import torch
from torch.nn import Module
from vllm import _custom_ops as ops
from vllm.model_executor.custom_op import CustomOp
from sglang.srt.custom_op import CustomOp
from sglang.srt.distributed import (
get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size,
)
from sglang.srt.layers.custom_op_util import register_custom_op
from sglang.srt.layers.moe.ep_moe.kernels import (
grouped_gemm_triton,
post_reorder_triton_kernel,
@@ -407,7 +406,6 @@ class EPMoE(torch.nn.Module):
param_data[expert_id] = loaded_weight
@register_custom_op("sglang_unquantized_ep_moe")
class UnquantizedEPMoEMethod(FusedMoEMethodBase, CustomOp):
def create_weights(
self,

View File

@@ -5,14 +5,13 @@ from enum import Enum
from typing import Callable, List, Optional, Tuple
import torch
from vllm.model_executor.custom_op import CustomOp
from sglang.srt.custom_op import CustomOp
from sglang.srt.distributed import (
get_tensor_model_parallel_rank,
get_tensor_model_parallel_world_size,
tensor_model_parallel_all_reduce,
)
from sglang.srt.layers.custom_op_util import register_custom_op
from sglang.srt.layers.moe.fused_moe_native import moe_forward_native
from sglang.srt.layers.moe.topk import select_experts
from sglang.srt.layers.quantization.base_config import (
@@ -67,7 +66,6 @@ class FusedMoEMethodBase(QuantizeMethodBase):
raise NotImplementedError
@register_custom_op("sglang_unquantized_fused_moe")
class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
"""MoE method without quantization."""