[Refactor] move deep_gemm_wrapper out of quantization (#11784)
This commit is contained in:
@@ -64,6 +64,7 @@ from sglang.srt.eplb.expert_location import (
|
||||
set_global_expert_location_metadata,
|
||||
)
|
||||
from sglang.srt.eplb.expert_location_updater import ExpertLocationUpdater
|
||||
from sglang.srt.layers import deep_gemm_wrapper
|
||||
from sglang.srt.layers.attention.attention_registry import (
|
||||
ATTENTION_BACKENDS,
|
||||
attn_backend_wrapper,
|
||||
@@ -75,10 +76,7 @@ from sglang.srt.layers.dp_attention import (
|
||||
initialize_dp_attention,
|
||||
)
|
||||
from sglang.srt.layers.logits_processor import LogitsProcessorOutput
|
||||
from sglang.srt.layers.quantization import (
|
||||
deep_gemm_wrapper,
|
||||
monkey_patch_isinstance_for_vllm_base_layer,
|
||||
)
|
||||
from sglang.srt.layers.quantization import monkey_patch_isinstance_for_vllm_base_layer
|
||||
from sglang.srt.layers.sampler import Sampler
|
||||
from sglang.srt.layers.torchao_utils import apply_torchao_config_to_model
|
||||
from sglang.srt.lora.lora_manager import LoRAManager
|
||||
|
||||
Reference in New Issue
Block a user