Update vllm to 0.6.3 (#1711) (#1720)

Co-authored-by: Ke Bao <ISPObaoke@163.com>
This commit is contained in:
Yineng Zhang
2024-10-19 20:45:41 -07:00
committed by GitHub
parent 12cad0feae
commit 8bee20f80b
9 changed files with 133 additions and 76 deletions

View File

@@ -59,8 +59,11 @@ from sglang.srt.server_args import ServerArgs
from sglang.srt.utils import (
enable_show_time_cost,
get_available_gpu_memory,
is_attention_free_model,
is_embedding_model,
is_generation_model,
is_multimodal_model,
model_has_inner_state,
monkey_patch_vllm_dummy_weight_loader,
monkey_patch_vllm_p2p_access_check,
)
@@ -316,11 +319,13 @@ class ModelRunner:
def get_weight_iter(config):
iter = loader._get_weights_iterator(
config.model,
config.revision,
fall_back_to_pt=getattr(
self.model, "fall_back_to_pt_during_load", True
),
DefaultModelLoader.Source(
config.model,
revision=config.revision,
fall_back_to_pt=getattr(
self.model, "fall_back_to_pt_during_load", True
),
)
)
return iter
@@ -662,3 +667,7 @@ def load_model_cls_srt(model_arch: str) -> Optional[Type[nn.Module]]:
# Monkey patch model loader
setattr(ModelRegistry, "_try_load_model_cls", load_model_cls_srt)
setattr(ModelRegistry, "is_multimodal_model", is_multimodal_model)
setattr(ModelRegistry, "is_attention_free_model", is_attention_free_model)
setattr(ModelRegistry, "model_has_inner_state", model_has_inner_state)
setattr(ModelRegistry, "is_embedding_model", is_embedding_model)