[3/n] chore: decouple AWQ implementation from vLLM dependency (#8113)

Co-authored-by: AniZpZ <zhuangsen.zp@antgroup.com>
This commit is contained in:
Hongbo Xu
2025-07-19 02:45:22 +08:00
committed by GitHub
parent 6737671c82
commit 1f76fc8747
8 changed files with 1143 additions and 20 deletions

View File

@@ -24,7 +24,7 @@ def check_quant_method(model_path: str, use_marlin_kernel: bool):
set_custom_all_reduce,
)
from sglang.srt.distributed.parallel_state import monkey_patch_vllm_parallel_state
from sglang.srt.layers.quantization import get_dynamic_override
from sglang.srt.layers.quantization.utils import get_dynamic_override
from sglang.srt.model_loader import get_model
from sglang.srt.server_args import PortArgs, ServerArgs