[FEAT] Support GGUF format (#2215)

Co-authored-by: Yang Zheng(SW)(Alex) <you@example.com>
This commit is contained in:
Yang Zheng
2024-11-30 16:44:48 +08:00
committed by GitHub
parent 0d6a49bd7d
commit 883c955489
39 changed files with 180 additions and 89 deletions

View File

@@ -59,6 +59,7 @@ from sglang.srt.utils import (
enable_show_time_cost,
get_available_gpu_memory,
is_hip,
monkey_patch_vllm_gguf_config,
monkey_patch_vllm_model_config,
monkey_patch_vllm_p2p_access_check,
set_cpu_offload_max_bytes,
@@ -297,6 +298,8 @@ class ModelRunner:
download_dir=self.server_args.download_dir,
)
monkey_patch_vllm_model_config()
if self.server_args.load_format == "gguf":
monkey_patch_vllm_gguf_config()
self.vllm_model_config = VllmModelConfig(**self.get_model_config_params())
if self.model_config.model_override_args is not None:
self.vllm_model_config.hf_config.update(