From 97cd38e58d4c33a6cac002cd832e7e91c5b86e88 Mon Sep 17 00:00:00 2001
From: Cheng Wan <54331508+ch-wan@users.noreply.github.com>
Date: Tue, 7 Oct 2025 21:52:46 -0700
Subject: [PATCH] Skip weight loading in deepgemm compilation (#11312)

---
 python/sglang/compile_deep_gemm.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/python/sglang/compile_deep_gemm.py b/python/sglang/compile_deep_gemm.py
index e59036f7b..5504bc448 100644
--- a/python/sglang/compile_deep_gemm.py
+++ b/python/sglang/compile_deep_gemm.py
@@ -141,6 +141,9 @@ def refine_server_args(server_args: ServerArgs, compile_args: CompileArgs):
     server_args.enable_torch_compile = False
     print(f"Disable CUDA Graph and Torch Compile to save time...")
 
+    server_args.load_format = "dummy"
+    print(f"Set load format to dummy to save time...")
+
     # Set watchdog timeout to compile_args.timeout because compilation will take a long time
     server_args.watchdog_timeout = compile_args.timeout
     server_args.warmups = "compile-deep-gemm"