Support gpt-bigcode model class (#681)

2024-07-20 18:34:37 -07:00
parent 69d19188fc
commit caaad53b52
6 changed files with 341 additions and 12 deletions
--- a/python/sglang/srt/server.py
+++ b/python/sglang/srt/server.py
@@ -157,6 +157,19 @@ def _set_global_server_args(server_args: ServerArgs):
    }


+def _set_torch_compile_config():
+    # The following configurations are for torch compile optimizations
+    import torch._dynamo.config
+    import torch._inductor.config
+
+    torch._inductor.config.coordinate_descent_tuning = True
+    torch._inductor.config.triton.unique_kernel_names = True
+    torch._inductor.config.fx_graph_cache = True  # Experimental feature to reduce compilation times, will be on by default in future
+
+    # FIXME: tmp workaround
+    torch._dynamo.config.accumulated_cache_size_limit = 128
+
+
 def launch_server(
    server_args: ServerArgs,
    model_overide_args: Optional[dict] = None,
@@ -190,6 +203,10 @@ def launch_server(
    if server_args.chat_template:
        # TODO: replace this with huggingface transformers template
        load_chat_template_for_openai_api(server_args.chat_template)
+
+    if server_args.enable_torch_compile:
+        _set_torch_compile_config()
+
    _set_global_server_args(server_args)

    # Allocate ports