Skip weight loading in deepgemm compilation (#11312)
This commit is contained in:
@@ -141,6 +141,9 @@ def refine_server_args(server_args: ServerArgs, compile_args: CompileArgs):
|
|||||||
server_args.enable_torch_compile = False
|
server_args.enable_torch_compile = False
|
||||||
print(f"Disable CUDA Graph and Torch Compile to save time...")
|
print(f"Disable CUDA Graph and Torch Compile to save time...")
|
||||||
|
|
||||||
|
server_args.load_format = "dummy"
|
||||||
|
print(f"Set load format to dummy to save time...")
|
||||||
|
|
||||||
# Set watchdog timeout to compile_args.timeout because compilation will take a long time
|
# Set watchdog timeout to compile_args.timeout because compilation will take a long time
|
||||||
server_args.watchdog_timeout = compile_args.timeout
|
server_args.watchdog_timeout = compile_args.timeout
|
||||||
server_args.warmups = "compile-deep-gemm"
|
server_args.warmups = "compile-deep-gemm"
|
||||||
|
|||||||
Reference in New Issue
Block a user