From 97cd38e58d4c33a6cac002cd832e7e91c5b86e88 Mon Sep 17 00:00:00 2001 From: Cheng Wan <54331508+ch-wan@users.noreply.github.com> Date: Tue, 7 Oct 2025 21:52:46 -0700 Subject: [PATCH] Skip weight loading in deepgemm compilation (#11312) --- python/sglang/compile_deep_gemm.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/sglang/compile_deep_gemm.py b/python/sglang/compile_deep_gemm.py index e59036f7b..5504bc448 100644 --- a/python/sglang/compile_deep_gemm.py +++ b/python/sglang/compile_deep_gemm.py @@ -141,6 +141,9 @@ def refine_server_args(server_args: ServerArgs, compile_args: CompileArgs): server_args.enable_torch_compile = False print(f"Disable CUDA Graph and Torch Compile to save time...") + server_args.load_format = "dummy" + print(f"Set load format to dummy to save time...") + # Set watchdog timeout to compile_args.timeout because compilation will take a long time server_args.watchdog_timeout = compile_args.timeout server_args.warmups = "compile-deep-gemm"