diff --git a/python/sglang/compile_deep_gemm.py b/python/sglang/compile_deep_gemm.py index e59036f7b..5504bc448 100644 --- a/python/sglang/compile_deep_gemm.py +++ b/python/sglang/compile_deep_gemm.py @@ -141,6 +141,9 @@ def refine_server_args(server_args: ServerArgs, compile_args: CompileArgs): server_args.enable_torch_compile = False print(f"Disable CUDA Graph and Torch Compile to save time...") + server_args.load_format = "dummy" + print(f"Set load format to dummy to save time...") + # Set watchdog timeout to compile_args.timeout because compilation will take a long time server_args.watchdog_timeout = compile_args.timeout server_args.warmups = "compile-deep-gemm"