Fix dockerfile and triton cache manager (#720)

This commit is contained in:
Liangsheng Yin
2024-07-25 03:04:21 -07:00
committed by GitHub
parent d63f13c13b
commit 04ec6ba2ac
3 changed files with 50 additions and 9 deletions

View File

@@ -52,6 +52,7 @@ from sglang.srt.utils import (
allocate_init_ports,
assert_pkg_version,
enable_show_time_cost,
maybe_set_triton_cache_manager,
set_ulimit,
)
from sglang.utils import get_exception_traceback
@@ -201,6 +202,11 @@ def launch_server(
"reinstall the latest version by following the instructions "
"at https://docs.flashinfer.ai/installation.html.",
)
if server_args.tp_size // server_args.dp_size > 1:
# FIXME: remove this after https://github.com/triton-lang/triton/pull/4295 is used as a dependency.
maybe_set_triton_cache_manager()
if server_args.chat_template:
# TODO: replace this with huggingface transformers template
load_chat_template_for_openai_api(server_args.chat_template)