support models from www.modelscope.cn (#994)
Co-authored-by: mulin.lyh <mulin.lyh@taobao.com>
This commit is contained in:
@@ -74,6 +74,8 @@ from sglang.srt.utils import (
|
||||
enable_show_time_cost,
|
||||
kill_child_process,
|
||||
maybe_set_triton_cache_manager,
|
||||
prepare_model,
|
||||
prepare_tokenizer,
|
||||
set_ulimit,
|
||||
)
|
||||
from sglang.utils import get_exception_traceback
|
||||
@@ -250,6 +252,10 @@ def launch_server(
|
||||
)
|
||||
logger.info(f"{server_args=}")
|
||||
|
||||
# Use model from www.modelscope.cn, first download the model.
|
||||
server_args.model_path = prepare_model(server_args.model_path)
|
||||
server_args.tokenizer_path = prepare_tokenizer(server_args.tokenizer_path)
|
||||
|
||||
# Launch processes for multi-node tensor parallelism
|
||||
if server_args.nnodes > 1:
|
||||
if server_args.node_rank != 0:
|
||||
|
||||
@@ -701,3 +701,23 @@ def add_api_key_middleware(app, api_key):
|
||||
if request.headers.get("Authorization") != "Bearer " + api_key:
|
||||
return JSONResponse(content={"error": "Unauthorized"}, status_code=401)
|
||||
return await call_next(request)
|
||||
|
||||
|
||||
def prepare_model(model_path):
|
||||
if "SGLANG_USE_MODELSCOPE" in os.environ:
|
||||
if not os.path.exists(model_path):
|
||||
from modelscope import snapshot_download
|
||||
|
||||
return snapshot_download(model_path)
|
||||
return model_path
|
||||
|
||||
|
||||
def prepare_tokenizer(tokenizer_path):
|
||||
if "SGLANG_USE_MODELSCOPE" in os.environ:
|
||||
if not os.path.exists(tokenizer_path):
|
||||
from modelscope import snapshot_download
|
||||
|
||||
return snapshot_download(
|
||||
tokenizer_path, ignore_patterns=["*.bin", "*.safetensors"]
|
||||
)
|
||||
return tokenizer_path
|
||||
|
||||
Reference in New Issue
Block a user