update and simplify CustomOp (#3249)
This commit is contained in:
@@ -4,13 +4,12 @@ from typing import Callable, List, Optional, Tuple
|
||||
import torch
|
||||
from torch.nn import Module
|
||||
from vllm import _custom_ops as ops
|
||||
from vllm.model_executor.custom_op import CustomOp
|
||||
|
||||
from sglang.srt.custom_op import CustomOp
|
||||
from sglang.srt.distributed import (
|
||||
get_tensor_model_parallel_rank,
|
||||
get_tensor_model_parallel_world_size,
|
||||
)
|
||||
from sglang.srt.layers.custom_op_util import register_custom_op
|
||||
from sglang.srt.layers.moe.ep_moe.kernels import (
|
||||
grouped_gemm_triton,
|
||||
post_reorder_triton_kernel,
|
||||
@@ -407,7 +406,6 @@ class EPMoE(torch.nn.Module):
|
||||
param_data[expert_id] = loaded_weight
|
||||
|
||||
|
||||
@register_custom_op("sglang_unquantized_ep_moe")
|
||||
class UnquantizedEPMoEMethod(FusedMoEMethodBase, CustomOp):
|
||||
def create_weights(
|
||||
self,
|
||||
|
||||
@@ -5,14 +5,13 @@ from enum import Enum
|
||||
from typing import Callable, List, Optional, Tuple
|
||||
|
||||
import torch
|
||||
from vllm.model_executor.custom_op import CustomOp
|
||||
|
||||
from sglang.srt.custom_op import CustomOp
|
||||
from sglang.srt.distributed import (
|
||||
get_tensor_model_parallel_rank,
|
||||
get_tensor_model_parallel_world_size,
|
||||
tensor_model_parallel_all_reduce,
|
||||
)
|
||||
from sglang.srt.layers.custom_op_util import register_custom_op
|
||||
from sglang.srt.layers.moe.fused_moe_native import moe_forward_native
|
||||
from sglang.srt.layers.moe.topk import select_experts
|
||||
from sglang.srt.layers.quantization.base_config import (
|
||||
@@ -67,7 +66,6 @@ class FusedMoEMethodBase(QuantizeMethodBase):
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
@register_custom_op("sglang_unquantized_fused_moe")
|
||||
class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
|
||||
"""MoE method without quantization."""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user