Skip weight loading in deepgemm compilation (#11312)

This commit is contained in:
Cheng Wan
2025-10-07 21:52:46 -07:00
committed by GitHub
parent 3c06b673af
commit 97cd38e58d

View File

@@ -141,6 +141,9 @@ def refine_server_args(server_args: ServerArgs, compile_args: CompileArgs):
server_args.enable_torch_compile = False
print(f"Disable CUDA Graph and Torch Compile to save time...")
server_args.load_format = "dummy"
print(f"Set load format to dummy to save time...")
# Set watchdog timeout to compile_args.timeout because compilation will take a long time
server_args.watchdog_timeout = compile_args.timeout
server_args.warmups = "compile-deep-gemm"