Support gpt-bigcode model class (#681)

This commit is contained in:
Liangsheng Yin
2024-07-20 18:34:37 -07:00
committed by GitHub
parent 69d19188fc
commit caaad53b52
6 changed files with 341 additions and 12 deletions

View File

@@ -157,6 +157,19 @@ def _set_global_server_args(server_args: ServerArgs):
}
def _set_torch_compile_config():
# The following configurations are for torch compile optimizations
import torch._dynamo.config
import torch._inductor.config
torch._inductor.config.coordinate_descent_tuning = True
torch._inductor.config.triton.unique_kernel_names = True
torch._inductor.config.fx_graph_cache = True # Experimental feature to reduce compilation times, will be on by default in future
# FIXME: tmp workaround
torch._dynamo.config.accumulated_cache_size_limit = 128
def launch_server(
server_args: ServerArgs,
model_overide_args: Optional[dict] = None,
@@ -190,6 +203,10 @@ def launch_server(
if server_args.chat_template:
# TODO: replace this with huggingface transformers template
load_chat_template_for_openai_api(server_args.chat_template)
if server_args.enable_torch_compile:
_set_torch_compile_config()
_set_global_server_args(server_args)
# Allocate ports