Fix bugs (fp8 checkpoints, triton cache manager) (#729)

This commit is contained in:
Ying Sheng
2024-07-25 07:42:00 -07:00
committed by GitHub
parent ae0f6130cb
commit 8fbba3de3d
4 changed files with 61 additions and 10 deletions

View File

@@ -202,15 +202,12 @@ def launch_server(
"reinstall the latest version by following the instructions "
"at https://docs.flashinfer.ai/installation.html.",
)
if server_args.tp_size // server_args.dp_size > 1:
if server_args.tp_size * server_args.dp_size > 1:
# FIXME: remove this after https://github.com/triton-lang/triton/pull/4295 is used as a dependency.
maybe_set_triton_cache_manager()
if server_args.chat_template:
# TODO: replace this with huggingface transformers template
load_chat_template_for_openai_api(server_args.chat_template)
if server_args.enable_torch_compile:
_set_torch_compile_config()