Skip weight loading in deepgemm compilation (#11312)

2025-10-07 21:52:46 -07:00
parent 3c06b673af
commit 97cd38e58d
1 changed files with 3 additions and 0 deletions
--- a/python/sglang/compile_deep_gemm.py
+++ b/python/sglang/compile_deep_gemm.py
@@ -141,6 +141,9 @@ def refine_server_args(server_args: ServerArgs, compile_args: CompileArgs):
    server_args.enable_torch_compile = False
    print(f"Disable CUDA Graph and Torch Compile to save time...")
    server_args.load_format = "dummy"
    print(f"Set load format to dummy to save time...")
    # Set watchdog timeout to compile_args.timeout because compilation will take a long time
    server_args.watchdog_timeout = compile_args.timeout
    server_args.warmups = "compile-deep-gemm"