feat(remote_model): support variable remote backend for model loader (#3964)

Signed-off-by: wangyu <wangyu.steph@bytedance.com>
2025-03-14 15:40:44 +08:00
parent 977d7cd26a
commit 1ce4878d31
22 changed files with 1055 additions and 9 deletions
--- a/python/sglang/srt/hf_transformers_utils.py
+++ b/python/sglang/srt/hf_transformers_utils.py
@@ -37,6 +37,8 @@ from sglang.srt.configs import (
    MultiModalityConfig,
    Qwen2_5_VLConfig,
 )
+from sglang.srt.connector import create_remote_connector
+from sglang.srt.utils import is_remote_url

 _CONFIG_REGISTRY: Dict[str, Type[PretrainedConfig]] = {
    ChatGLMConfig.model_type: ChatGLMConfig,
@@ -155,6 +157,14 @@ def get_tokenizer(
        kwargs["gguf_file"] = tokenizer_name
        tokenizer_name = Path(tokenizer_name).parent

+    if is_remote_url(tokenizer_name):
+        # BaseConnector implements __del__() to clean up the local dir.
+        # Since config files need to exist all the time, so we DO NOT use
+        # with statement to avoid closing the client.
+        client = create_remote_connector(tokenizer_name)
+        client.pull_files(ignore_pattern=["*.pt", "*.safetensors", "*.bin"])
+        tokenizer_name = client.get_local_dir()
+
    try:
        tokenizer = AutoTokenizer.from_pretrained(
            tokenizer_name,