Support jinja as chat template file (#1104)
This commit is contained in:
@@ -288,6 +288,8 @@ def launch_server(
|
||||
|
||||
# Launch processes
|
||||
tokenizer_manager = TokenizerManager(server_args, port_args, model_overide_args)
|
||||
if server_args.chat_template:
|
||||
load_chat_template_for_openai_api(tokenizer_manager, server_args.chat_template)
|
||||
pipe_controller_reader, pipe_controller_writer = mp.Pipe(duplex=False)
|
||||
pipe_detoken_reader, pipe_detoken_writer = mp.Pipe(duplex=False)
|
||||
|
||||
@@ -375,11 +377,6 @@ def _set_envs_and_config(server_args: ServerArgs):
|
||||
# FIXME: remove this after https://github.com/triton-lang/triton/pull/4295 is used as a dependency.
|
||||
maybe_set_triton_cache_manager()
|
||||
|
||||
# Set global chat template
|
||||
if server_args.chat_template:
|
||||
# TODO: replace this with huggingface transformers template
|
||||
load_chat_template_for_openai_api(server_args.chat_template)
|
||||
|
||||
# Check flashinfer version
|
||||
if not server_args.disable_flashinfer:
|
||||
assert_pkg_version(
|
||||
|
||||
Reference in New Issue
Block a user