feat(remote_model): support variable remote backend for model loader (#3964)

Signed-off-by: wangyu <wangyu.steph@bytedance.com>
This commit is contained in:
wangyu
2025-03-14 15:40:44 +08:00
committed by GitHub
parent 977d7cd26a
commit 1ce4878d31
22 changed files with 1055 additions and 9 deletions

View File

@@ -37,6 +37,8 @@ from sglang.srt.configs import (
MultiModalityConfig,
Qwen2_5_VLConfig,
)
from sglang.srt.connector import create_remote_connector
from sglang.srt.utils import is_remote_url
_CONFIG_REGISTRY: Dict[str, Type[PretrainedConfig]] = {
ChatGLMConfig.model_type: ChatGLMConfig,
@@ -155,6 +157,14 @@ def get_tokenizer(
kwargs["gguf_file"] = tokenizer_name
tokenizer_name = Path(tokenizer_name).parent
if is_remote_url(tokenizer_name):
# BaseConnector implements __del__() to clean up the local dir.
# Since config files need to exist all the time, so we DO NOT use
# with statement to avoid closing the client.
client = create_remote_connector(tokenizer_name)
client.pull_files(ignore_pattern=["*.pt", "*.safetensors", "*.bin"])
tokenizer_name = client.get_local_dir()
try:
tokenizer = AutoTokenizer.from_pretrained(
tokenizer_name,