[Refactor] move deep_gemm_wrapper out of quantization (#11784)

This commit is contained in:
Cheng Wan
2025-10-17 18:57:54 -07:00
committed by GitHub
parent 13219e1e48
commit 5b214b50b6
19 changed files with 18 additions and 24 deletions

View File

@@ -28,7 +28,6 @@ import torch.nn.functional as F
from torch import nn
from transformers import PretrainedConfig
from sglang.srt import single_batch_overlap
from sglang.srt.configs.model_config import (
get_nsa_index_head_dim,
get_nsa_index_n_heads,
@@ -48,6 +47,7 @@ from sglang.srt.distributed.device_communicators.pynccl_allocator import (
from sglang.srt.eplb.expert_distribution import get_global_expert_distribution_recorder
from sglang.srt.eplb.expert_location import ModelConfigForExpertLocation
from sglang.srt.eplb.expert_location_dispatch import ExpertLocationDispatchInfo
from sglang.srt.layers import deep_gemm_wrapper
from sglang.srt.layers.activation import SiluAndMul
from sglang.srt.layers.amx_utils import PackWeightMethod
from sglang.srt.layers.attention.npu_ops.mla_preprocess import (
@@ -82,7 +82,6 @@ from sglang.srt.layers.moe import (
from sglang.srt.layers.moe.ep_moe.layer import DeepEPMoE, get_moe_impl_class
from sglang.srt.layers.moe.fused_moe_triton.layer import FusedMoE
from sglang.srt.layers.moe.topk import TopK, TopKOutputFormat
from sglang.srt.layers.quantization import deep_gemm_wrapper
from sglang.srt.layers.quantization.base_config import QuantizationConfig
from sglang.srt.layers.quantization.fp8_kernel import (
is_fp8_fnuz,