Move torch.compile configs into cuda_graph_runner.py (#993)

2024-08-08 13:20:30 -07:00
parent ab7875941b
commit 9f662501a3
4 changed files with 15 additions and 19 deletions
--- a/python/sglang/srt/server.py
+++ b/python/sglang/srt/server.py
@@ -74,7 +74,6 @@ from sglang.srt.utils import (
    enable_show_time_cost,
    kill_child_process,
    maybe_set_triton_cache_manager,
-    set_torch_compile_config,
    set_ulimit,
 )
 from sglang.utils import get_exception_traceback
@@ -347,10 +346,6 @@ def _set_envs_and_config(server_args: ServerArgs):
        # FIXME: remove this after https://github.com/triton-lang/triton/pull/4295 is used as a dependency.
        maybe_set_triton_cache_manager()

-    # Set torch compile config
-    if server_args.enable_torch_compile:
-        set_torch_compile_config()
-
    # Set global chat template
    if server_args.chat_template:
        # TODO: replace this with huggingface transformers template