[Refactor] move deep_gemm_wrapper out of quantization (#11784)

This commit is contained in:
Cheng Wan
2025-10-17 18:57:54 -07:00
committed by GitHub
parent 13219e1e48
commit 5b214b50b6
19 changed files with 18 additions and 24 deletions

View File

@@ -64,6 +64,7 @@ from sglang.srt.eplb.expert_location import (
set_global_expert_location_metadata,
)
from sglang.srt.eplb.expert_location_updater import ExpertLocationUpdater
from sglang.srt.layers import deep_gemm_wrapper
from sglang.srt.layers.attention.attention_registry import (
ATTENTION_BACKENDS,
attn_backend_wrapper,
@@ -75,10 +76,7 @@ from sglang.srt.layers.dp_attention import (
initialize_dp_attention,
)
from sglang.srt.layers.logits_processor import LogitsProcessorOutput
from sglang.srt.layers.quantization import (
deep_gemm_wrapper,
monkey_patch_isinstance_for_vllm_base_layer,
)
from sglang.srt.layers.quantization import monkey_patch_isinstance_for_vllm_base_layer
from sglang.srt.layers.sampler import Sampler
from sglang.srt.layers.torchao_utils import apply_torchao_config_to_model
from sglang.srt.lora.lora_manager import LoRAManager