Support gpt-bigcode model class (#681)
This commit is contained in:
@@ -157,6 +157,19 @@ def _set_global_server_args(server_args: ServerArgs):
|
||||
}
|
||||
|
||||
|
||||
def _set_torch_compile_config():
|
||||
# The following configurations are for torch compile optimizations
|
||||
import torch._dynamo.config
|
||||
import torch._inductor.config
|
||||
|
||||
torch._inductor.config.coordinate_descent_tuning = True
|
||||
torch._inductor.config.triton.unique_kernel_names = True
|
||||
torch._inductor.config.fx_graph_cache = True # Experimental feature to reduce compilation times, will be on by default in future
|
||||
|
||||
# FIXME: tmp workaround
|
||||
torch._dynamo.config.accumulated_cache_size_limit = 128
|
||||
|
||||
|
||||
def launch_server(
|
||||
server_args: ServerArgs,
|
||||
model_overide_args: Optional[dict] = None,
|
||||
@@ -190,6 +203,10 @@ def launch_server(
|
||||
if server_args.chat_template:
|
||||
# TODO: replace this with huggingface transformers template
|
||||
load_chat_template_for_openai_api(server_args.chat_template)
|
||||
|
||||
if server_args.enable_torch_compile:
|
||||
_set_torch_compile_config()
|
||||
|
||||
_set_global_server_args(server_args)
|
||||
|
||||
# Allocate ports
|
||||
|
||||
Reference in New Issue
Block a user