Support jinja as chat template file (#1104)

This commit is contained in:
Ying Sheng
2024-08-14 17:43:14 -07:00
committed by GitHub
parent 73cf6834f2
commit 6767e2229f
2 changed files with 34 additions and 26 deletions

View File

@@ -288,6 +288,8 @@ def launch_server(
# Launch processes
tokenizer_manager = TokenizerManager(server_args, port_args, model_overide_args)
if server_args.chat_template:
load_chat_template_for_openai_api(tokenizer_manager, server_args.chat_template)
pipe_controller_reader, pipe_controller_writer = mp.Pipe(duplex=False)
pipe_detoken_reader, pipe_detoken_writer = mp.Pipe(duplex=False)
@@ -375,11 +377,6 @@ def _set_envs_and_config(server_args: ServerArgs):
# FIXME: remove this after https://github.com/triton-lang/triton/pull/4295 is used as a dependency.
maybe_set_triton_cache_manager()
# Set global chat template
if server_args.chat_template:
# TODO: replace this with huggingface transformers template
load_chat_template_for_openai_api(server_args.chat_template)
# Check flashinfer version
if not server_args.disable_flashinfer:
assert_pkg_version(